mirror of
https://github.com/Noratrieb/intringen.git
synced 2026-01-16 06:35:03 +01:00
more intrinsics
This commit is contained in:
parent
9e4ce7913a
commit
10b9cefe24
4 changed files with 288 additions and 42 deletions
|
|
@ -154,21 +154,24 @@ fn generate_body_test(intr: &Intrinsic, rng: &mut SmallRng) -> Result<syn::Block
|
||||||
}
|
}
|
||||||
|
|
||||||
fn random_value(ty: &str, rng: &mut SmallRng) -> Result<syn::Expr> {
|
fn random_value(ty: &str, rng: &mut SmallRng) -> Result<syn::Expr> {
|
||||||
let quotei16 = |n| {
|
fn quote(n: impl quote::ToTokens) -> syn::Expr {
|
||||||
syn::parse_quote! { #n }
|
syn::parse_quote! { #n }
|
||||||
};
|
}
|
||||||
Ok(match ty {
|
Ok(match ty {
|
||||||
"i16" => quotei16(rng.gen::<i16>()),
|
"i8" => quote(rng.gen::<i8>()),
|
||||||
|
"i16" => quote(rng.gen::<i16>()),
|
||||||
|
"i32" => quote(rng.gen::<i32>()),
|
||||||
|
"i64" => quote(rng.gen::<i64>()),
|
||||||
"__m128i" => {
|
"__m128i" => {
|
||||||
let args = [
|
let args = [
|
||||||
quotei16(rng.gen::<i16>()),
|
quote(rng.gen::<i16>()),
|
||||||
quotei16(rng.gen::<i16>()),
|
quote(rng.gen::<i16>()),
|
||||||
quotei16(rng.gen::<i16>()),
|
quote(rng.gen::<i16>()),
|
||||||
quotei16(rng.gen::<i16>()),
|
quote(rng.gen::<i16>()),
|
||||||
quotei16(rng.gen::<i16>()),
|
quote(rng.gen::<i16>()),
|
||||||
quotei16(rng.gen::<i16>()),
|
quote(rng.gen::<i16>()),
|
||||||
quotei16(rng.gen::<i16>()),
|
quote(rng.gen::<i16>()),
|
||||||
quotei16(rng.gen::<i16>()),
|
quote(rng.gen::<i16>()),
|
||||||
];
|
];
|
||||||
|
|
||||||
syn::parse_quote! {
|
syn::parse_quote! {
|
||||||
|
|
@ -192,7 +195,10 @@ impl VariableType {
|
||||||
fn of(etype: &str, ty: &str) -> Result<Self> {
|
fn of(etype: &str, ty: &str) -> Result<Self> {
|
||||||
let (rawtype_signed, full_width) = match map_type_to_rust(ty) {
|
let (rawtype_signed, full_width) = match map_type_to_rust(ty) {
|
||||||
"__m128i" => (false, 128),
|
"__m128i" => (false, 128),
|
||||||
|
"i8" => (true, 8),
|
||||||
"i16" => (true, 16),
|
"i16" => (true, 16),
|
||||||
|
"i32" => (true, 32),
|
||||||
|
"i64" => (true, 64),
|
||||||
_ => bail!("unknown type: {ty}"),
|
_ => bail!("unknown type: {ty}"),
|
||||||
};
|
};
|
||||||
let (is_signed, elem_width) = match etype {
|
let (is_signed, elem_width) = match etype {
|
||||||
|
|
@ -201,6 +207,8 @@ impl VariableType {
|
||||||
"SI32" => (true, 32),
|
"SI32" => (true, 32),
|
||||||
"UI8" => (false, 8),
|
"UI8" => (false, 8),
|
||||||
"UI16" => (false, 16),
|
"UI16" => (false, 16),
|
||||||
|
"UI32" => (false, 32),
|
||||||
|
"UI64" => (false, 64),
|
||||||
_ => bail!("unknown element type: {etype}"),
|
_ => bail!("unknown element type: {etype}"),
|
||||||
};
|
};
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
|
|
@ -432,7 +440,11 @@ fn signature_soft_arch(intr: &Intrinsic, body: syn::Block) -> Result<syn::ItemFn
|
||||||
|
|
||||||
fn map_type_to_rust(ty: &str) -> &str {
|
fn map_type_to_rust(ty: &str) -> &str {
|
||||||
match ty {
|
match ty {
|
||||||
|
"__m128i" => ty,
|
||||||
|
"char" => "i8",
|
||||||
"short" => "i16",
|
"short" => "i16",
|
||||||
ty => ty,
|
"int" => "i32",
|
||||||
|
"__int64" => "i64",
|
||||||
|
ty => panic!("unknown type: {ty}"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -89,9 +89,13 @@ fn main() -> Result<()> {
|
||||||
}
|
}
|
||||||
|
|
||||||
const INTRINSICS_GENERATE: &[&str] = &[
|
const INTRINSICS_GENERATE: &[&str] = &[
|
||||||
|
"_mm_setr_epi8",
|
||||||
|
"_mm_setr_epi16",
|
||||||
|
"_mm_setr_epi32",
|
||||||
|
"_mm_set_epi64x",
|
||||||
|
// packing instructions
|
||||||
"_mm_packus_epi16",
|
"_mm_packus_epi16",
|
||||||
"_mm_packs_epi16",
|
"_mm_packs_epi16",
|
||||||
"_mm_packus_epi32",
|
"_mm_packus_epi32",
|
||||||
"_mm_packs_epi32",
|
"_mm_packs_epi32",
|
||||||
"_mm_setr_epi16",
|
|
||||||
];
|
];
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,28 @@
|
||||||
impl<C: super::Core> Intrinsics for C {}
|
impl<C: super::Core> Intrinsics for C {}
|
||||||
pub trait Intrinsics: super::Core {
|
pub trait Intrinsics: super::Core {
|
||||||
|
fn _mm_set_epi64x(&mut self, dst: &mut Self::__m128i, e1: Self::i64, e0: Self::i64) {
|
||||||
|
let __tmp = self.cast_sign_i64_u64(e0);
|
||||||
|
self.set_lane___m128i_u64(dst, 0u64, __tmp);
|
||||||
|
let __tmp = self.cast_sign_i64_u64(e1);
|
||||||
|
self.set_lane___m128i_u64(dst, 1u64, __tmp);
|
||||||
|
}
|
||||||
|
fn _mm_setr_epi32(
|
||||||
|
&mut self,
|
||||||
|
dst: &mut Self::__m128i,
|
||||||
|
e3: Self::i32,
|
||||||
|
e2: Self::i32,
|
||||||
|
e1: Self::i32,
|
||||||
|
e0: Self::i32,
|
||||||
|
) {
|
||||||
|
let __tmp = self.cast_sign_i32_u32(e3);
|
||||||
|
self.set_lane___m128i_u32(dst, 0u64, __tmp);
|
||||||
|
let __tmp = self.cast_sign_i32_u32(e2);
|
||||||
|
self.set_lane___m128i_u32(dst, 1u64, __tmp);
|
||||||
|
let __tmp = self.cast_sign_i32_u32(e1);
|
||||||
|
self.set_lane___m128i_u32(dst, 2u64, __tmp);
|
||||||
|
let __tmp = self.cast_sign_i32_u32(e0);
|
||||||
|
self.set_lane___m128i_u32(dst, 3u64, __tmp);
|
||||||
|
}
|
||||||
fn _mm_setr_epi16(
|
fn _mm_setr_epi16(
|
||||||
&mut self,
|
&mut self,
|
||||||
dst: &mut Self::__m128i,
|
dst: &mut Self::__m128i,
|
||||||
|
|
@ -29,6 +52,59 @@ pub trait Intrinsics: super::Core {
|
||||||
let __tmp = self.cast_sign_i16_u16(e0);
|
let __tmp = self.cast_sign_i16_u16(e0);
|
||||||
self.set_lane___m128i_u16(dst, 7u64, __tmp);
|
self.set_lane___m128i_u16(dst, 7u64, __tmp);
|
||||||
}
|
}
|
||||||
|
fn _mm_setr_epi8(
|
||||||
|
&mut self,
|
||||||
|
dst: &mut Self::__m128i,
|
||||||
|
e15: Self::i8,
|
||||||
|
e14: Self::i8,
|
||||||
|
e13: Self::i8,
|
||||||
|
e12: Self::i8,
|
||||||
|
e11: Self::i8,
|
||||||
|
e10: Self::i8,
|
||||||
|
e9: Self::i8,
|
||||||
|
e8: Self::i8,
|
||||||
|
e7: Self::i8,
|
||||||
|
e6: Self::i8,
|
||||||
|
e5: Self::i8,
|
||||||
|
e4: Self::i8,
|
||||||
|
e3: Self::i8,
|
||||||
|
e2: Self::i8,
|
||||||
|
e1: Self::i8,
|
||||||
|
e0: Self::i8,
|
||||||
|
) {
|
||||||
|
let __tmp = self.cast_sign_i8_u8(e15);
|
||||||
|
self.set_lane___m128i_u8(dst, 0u64, __tmp);
|
||||||
|
let __tmp = self.cast_sign_i8_u8(e14);
|
||||||
|
self.set_lane___m128i_u8(dst, 1u64, __tmp);
|
||||||
|
let __tmp = self.cast_sign_i8_u8(e13);
|
||||||
|
self.set_lane___m128i_u8(dst, 2u64, __tmp);
|
||||||
|
let __tmp = self.cast_sign_i8_u8(e12);
|
||||||
|
self.set_lane___m128i_u8(dst, 3u64, __tmp);
|
||||||
|
let __tmp = self.cast_sign_i8_u8(e11);
|
||||||
|
self.set_lane___m128i_u8(dst, 4u64, __tmp);
|
||||||
|
let __tmp = self.cast_sign_i8_u8(e10);
|
||||||
|
self.set_lane___m128i_u8(dst, 5u64, __tmp);
|
||||||
|
let __tmp = self.cast_sign_i8_u8(e9);
|
||||||
|
self.set_lane___m128i_u8(dst, 6u64, __tmp);
|
||||||
|
let __tmp = self.cast_sign_i8_u8(e8);
|
||||||
|
self.set_lane___m128i_u8(dst, 7u64, __tmp);
|
||||||
|
let __tmp = self.cast_sign_i8_u8(e7);
|
||||||
|
self.set_lane___m128i_u8(dst, 8u64, __tmp);
|
||||||
|
let __tmp = self.cast_sign_i8_u8(e6);
|
||||||
|
self.set_lane___m128i_u8(dst, 9u64, __tmp);
|
||||||
|
let __tmp = self.cast_sign_i8_u8(e5);
|
||||||
|
self.set_lane___m128i_u8(dst, 10u64, __tmp);
|
||||||
|
let __tmp = self.cast_sign_i8_u8(e4);
|
||||||
|
self.set_lane___m128i_u8(dst, 11u64, __tmp);
|
||||||
|
let __tmp = self.cast_sign_i8_u8(e3);
|
||||||
|
self.set_lane___m128i_u8(dst, 12u64, __tmp);
|
||||||
|
let __tmp = self.cast_sign_i8_u8(e2);
|
||||||
|
self.set_lane___m128i_u8(dst, 13u64, __tmp);
|
||||||
|
let __tmp = self.cast_sign_i8_u8(e1);
|
||||||
|
self.set_lane___m128i_u8(dst, 14u64, __tmp);
|
||||||
|
let __tmp = self.cast_sign_i8_u8(e0);
|
||||||
|
self.set_lane___m128i_u8(dst, 15u64, __tmp);
|
||||||
|
}
|
||||||
fn _mm_packs_epi16(
|
fn _mm_packs_epi16(
|
||||||
&mut self,
|
&mut self,
|
||||||
dst: &mut Self::__m128i,
|
dst: &mut Self::__m128i,
|
||||||
|
|
@ -205,6 +281,16 @@ pub trait Intrinsics: super::Core {
|
||||||
pub mod soft_arch {
|
pub mod soft_arch {
|
||||||
pub use super::super::soft_arch_types::*;
|
pub use super::super::soft_arch_types::*;
|
||||||
use super::Intrinsics;
|
use super::Intrinsics;
|
||||||
|
pub fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
|
||||||
|
let mut output = unsafe { std::mem::zeroed() };
|
||||||
|
super::super::ValueCore._mm_set_epi64x(&mut output, e1, e0);
|
||||||
|
output
|
||||||
|
}
|
||||||
|
pub fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
|
||||||
|
let mut output = unsafe { std::mem::zeroed() };
|
||||||
|
super::super::ValueCore._mm_setr_epi32(&mut output, e3, e2, e1, e0);
|
||||||
|
output
|
||||||
|
}
|
||||||
pub fn _mm_setr_epi16(
|
pub fn _mm_setr_epi16(
|
||||||
e7: i16,
|
e7: i16,
|
||||||
e6: i16,
|
e6: i16,
|
||||||
|
|
@ -220,6 +306,47 @@ pub mod soft_arch {
|
||||||
._mm_setr_epi16(&mut output, e7, e6, e5, e4, e3, e2, e1, e0);
|
._mm_setr_epi16(&mut output, e7, e6, e5, e4, e3, e2, e1, e0);
|
||||||
output
|
output
|
||||||
}
|
}
|
||||||
|
pub fn _mm_setr_epi8(
|
||||||
|
e15: i8,
|
||||||
|
e14: i8,
|
||||||
|
e13: i8,
|
||||||
|
e12: i8,
|
||||||
|
e11: i8,
|
||||||
|
e10: i8,
|
||||||
|
e9: i8,
|
||||||
|
e8: i8,
|
||||||
|
e7: i8,
|
||||||
|
e6: i8,
|
||||||
|
e5: i8,
|
||||||
|
e4: i8,
|
||||||
|
e3: i8,
|
||||||
|
e2: i8,
|
||||||
|
e1: i8,
|
||||||
|
e0: i8,
|
||||||
|
) -> __m128i {
|
||||||
|
let mut output = unsafe { std::mem::zeroed() };
|
||||||
|
super::super::ValueCore
|
||||||
|
._mm_setr_epi8(
|
||||||
|
&mut output,
|
||||||
|
e15,
|
||||||
|
e14,
|
||||||
|
e13,
|
||||||
|
e12,
|
||||||
|
e11,
|
||||||
|
e10,
|
||||||
|
e9,
|
||||||
|
e8,
|
||||||
|
e7,
|
||||||
|
e6,
|
||||||
|
e5,
|
||||||
|
e4,
|
||||||
|
e3,
|
||||||
|
e2,
|
||||||
|
e1,
|
||||||
|
e0,
|
||||||
|
);
|
||||||
|
output
|
||||||
|
}
|
||||||
pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
|
pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
|
||||||
let mut output = unsafe { std::mem::zeroed() };
|
let mut output = unsafe { std::mem::zeroed() };
|
||||||
super::super::ValueCore._mm_packs_epi16(&mut output, a, b);
|
super::super::ValueCore._mm_packs_epi16(&mut output, a, b);
|
||||||
|
|
@ -245,46 +372,70 @@ pub mod soft_arch {
|
||||||
pub mod tests {
|
pub mod tests {
|
||||||
use super::super::compare_test_helper::hard_soft_same_128;
|
use super::super::compare_test_helper::hard_soft_same_128;
|
||||||
#[test]
|
#[test]
|
||||||
|
fn _mm_set_epi64x() {
|
||||||
|
hard_soft_same_128! {
|
||||||
|
{ let e1 = 1041352657357235268i64; let e0 = 1955209120357942897i64;
|
||||||
|
_mm_set_epi64x(e1, e0) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn _mm_setr_epi32() {
|
||||||
|
hard_soft_same_128! {
|
||||||
|
{ let e3 = 1455669123i32; let e2 = 247864885i32; let e1 = 1390920924i32; let
|
||||||
|
e0 = 1068333055i32; _mm_setr_epi32(e3, e2, e1, e0) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
fn _mm_setr_epi16() {
|
fn _mm_setr_epi16() {
|
||||||
hard_soft_same_128! {
|
hard_soft_same_128! {
|
||||||
{ let e7 = - 24391i16; let e6 = 19541i16; let e5 = - 16509i16; let e4 =
|
{ let e7 = 16513i16; let e6 = 22878i16; let e5 = 23986i16; let e4 = 27900i16;
|
||||||
7733i16; let e3 = - 15140i16; let e2 = 30719i16; let e1 = 16513i16; let e0 =
|
let e3 = - 8343i16; let e2 = - 10648i16; let e1 = 4841i16; let e0 = 14610i16;
|
||||||
22878i16; _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) }
|
_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn _mm_setr_epi8() {
|
||||||
|
hard_soft_same_128! {
|
||||||
|
{ let e15 = - 99i8; let e14 = 125i8; let e13 = 118i8; let e12 = 5i8; let e11
|
||||||
|
= 41i8; let e10 = - 40i8; let e9 = 124i8; let e8 = - 6i8; let e7 = 114i8; let
|
||||||
|
e6 = 24i8; let e5 = - 99i8; let e4 = 65i8; let e3 = 11i8; let e2 = - 15i8;
|
||||||
|
let e1 = 20i8; let e0 = - 107i8; _mm_setr_epi8(e15, e14, e13, e12, e11, e10,
|
||||||
|
e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn _mm_packs_epi16() {
|
fn _mm_packs_epi16() {
|
||||||
hard_soft_same_128! {
|
hard_soft_same_128! {
|
||||||
{ let a = _mm_setr_epi16(23986i16, 27900i16, - 8343i16, - 10648i16, 4841i16,
|
{ let a = _mm_setr_epi16(23623i16, - 22080i16, - 1436i16, - 30227i16,
|
||||||
14610i16, - 17251i16, - 3971i16); let b = _mm_setr_epi16(22390i16, -
|
8629i16, 10922i16, - 16731i16, - 1013i16); let b = _mm_setr_epi16(- 14310i16,
|
||||||
23547i16, 15401i16, 15832i16, - 14212i16, - 1286i16, - 18062i16, 22296i16);
|
2892i16, - 28568i16, 12614i16, 20103i16, 32412i16, - 28704i16, - 27930i16);
|
||||||
_mm_packs_epi16(a, b) }
|
_mm_packs_epi16(a, b) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn _mm_packs_epi32() {
|
fn _mm_packs_epi32() {
|
||||||
hard_soft_same_128! {
|
hard_soft_same_128! {
|
||||||
{ let a = _mm_setr_epi16(18077i16, 23617i16, - 9205i16, 21233i16, - 4332i16,
|
{ let a = _mm_setr_epi16(4197i16, 1829i16, 9149i16, 18759i16, 30885i16, -
|
||||||
- 31339i16, 23623i16, - 22080i16); let b = _mm_setr_epi16(- 1436i16, -
|
3879i16, 21600i16, 24454i16); let b = _mm_setr_epi16(23524i16, 10765i16,
|
||||||
30227i16, 8629i16, 10922i16, - 16731i16, - 1013i16, - 14310i16, 2892i16);
|
32539i16, 26890i16, - 3892i16, 4386i16, 18704i16, 8253i16);
|
||||||
_mm_packs_epi32(a, b) }
|
_mm_packs_epi32(a, b) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn _mm_packus_epi16() {
|
fn _mm_packus_epi16() {
|
||||||
hard_soft_same_128! {
|
hard_soft_same_128! {
|
||||||
{ let a = _mm_setr_epi16(- 28568i16, 12614i16, 20103i16, 32412i16, -
|
{ let a = _mm_setr_epi16(- 29217i16, 32013i16, 7448i16, 2172i16, - 14764i16,
|
||||||
28704i16, - 27930i16, 4197i16, 1829i16); let b = _mm_setr_epi16(9149i16,
|
- 1068i16, - 25463i16, 21215i16); let b = _mm_setr_epi16(- 31392i16, -
|
||||||
18759i16, 30885i16, - 3879i16, 21600i16, 24454i16, 23524i16, 10765i16);
|
14015i16, - 32565i16, - 11312i16, - 4934i16, - 19283i16, - 27533i16, -
|
||||||
_mm_packus_epi16(a, b) }
|
9939i16); _mm_packus_epi16(a, b) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn _mm_packus_epi32() {
|
fn _mm_packus_epi32() {
|
||||||
hard_soft_same_128! {
|
hard_soft_same_128! {
|
||||||
{ let a = _mm_setr_epi16(32539i16, 26890i16, - 3892i16, 4386i16, 18704i16,
|
{ let a = _mm_setr_epi16(- 9518i16, - 29742i16, 10115i16, 1617i16, 13256i16,
|
||||||
8253i16, - 29217i16, 32013i16); let b = _mm_setr_epi16(7448i16, 2172i16, -
|
- 2379i16, 19254i16, 7533i16); let b = _mm_setr_epi16(- 17891i16, 30761i16,
|
||||||
14764i16, - 1068i16, - 25463i16, 21215i16, - 31392i16, - 14015i16);
|
2539i16, 4135i16, 26713i16, 16348i16, - 21336i16, 3595i16);
|
||||||
_mm_packus_epi32(a, b) }
|
_mm_packus_epi32(a, b) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -17,17 +17,28 @@ pub trait Core {
|
||||||
|
|
||||||
type __m128i: Copy;
|
type __m128i: Copy;
|
||||||
|
|
||||||
|
fn cast_sign_i8_u8(&mut self, value: Self::i8) -> Self::u8;
|
||||||
fn cast_sign_i16_u16(&mut self, value: Self::i16) -> Self::u16;
|
fn cast_sign_i16_u16(&mut self, value: Self::i16) -> Self::u16;
|
||||||
|
fn cast_sign_i32_u32(&mut self, value: Self::i32) -> Self::u32;
|
||||||
|
fn cast_sign_i64_u64(&mut self, value: Self::i64) -> Self::u64;
|
||||||
|
|
||||||
|
fn get_lane___m128i_u8(&mut self, value: Self::__m128i, idx: u64) -> Self::u8;
|
||||||
|
fn get_lane___m128i_i8(&mut self, value: Self::__m128i, idx: u64) -> Self::i8;
|
||||||
fn get_lane___m128i_u16(&mut self, value: Self::__m128i, idx: u64) -> Self::u16;
|
fn get_lane___m128i_u16(&mut self, value: Self::__m128i, idx: u64) -> Self::u16;
|
||||||
fn get_lane___m128i_i16(&mut self, value: Self::__m128i, idx: u64) -> Self::i16;
|
fn get_lane___m128i_i16(&mut self, value: Self::__m128i, idx: u64) -> Self::i16;
|
||||||
fn get_lane___m128i_u32(&mut self, value: Self::__m128i, idx: u64) -> Self::u32;
|
fn get_lane___m128i_u32(&mut self, value: Self::__m128i, idx: u64) -> Self::u32;
|
||||||
fn get_lane___m128i_i32(&mut self, value: Self::__m128i, idx: u64) -> Self::i32;
|
fn get_lane___m128i_i32(&mut self, value: Self::__m128i, idx: u64) -> Self::i32;
|
||||||
|
fn get_lane___m128i_u64(&mut self, value: Self::__m128i, idx: u64) -> Self::u64;
|
||||||
|
fn get_lane___m128i_i64(&mut self, value: Self::__m128i, idx: u64) -> Self::i64;
|
||||||
|
|
||||||
fn set_lane___m128i_u8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u8);
|
fn set_lane___m128i_u8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u8);
|
||||||
fn set_lane___m128i_i8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i8);
|
fn set_lane___m128i_i8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i8);
|
||||||
fn set_lane___m128i_u16(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u16);
|
fn set_lane___m128i_u16(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u16);
|
||||||
fn set_lane___m128i_i16(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i16);
|
fn set_lane___m128i_i16(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i16);
|
||||||
|
fn set_lane___m128i_u32(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u32);
|
||||||
|
fn set_lane___m128i_i32(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i32);
|
||||||
|
fn set_lane___m128i_u64(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u64);
|
||||||
|
fn set_lane___m128i_i64(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i64);
|
||||||
|
|
||||||
fn saturate8(&mut self, elem: Self::i16) -> Self::i8;
|
fn saturate8(&mut self, elem: Self::i16) -> Self::i8;
|
||||||
fn saturate_u8(&mut self, elem: Self::i16) -> Self::u8;
|
fn saturate_u8(&mut self, elem: Self::i16) -> Self::u8;
|
||||||
|
|
@ -50,15 +61,42 @@ impl Core for ValueCore {
|
||||||
|
|
||||||
type __m128i = [u8; 16];
|
type __m128i = [u8; 16];
|
||||||
|
|
||||||
|
////// CAST
|
||||||
|
|
||||||
|
fn cast_sign_i8_u8(&mut self, value: Self::i8) -> Self::u8 {
|
||||||
|
value as _
|
||||||
|
}
|
||||||
|
|
||||||
fn cast_sign_i16_u16(&mut self, value: Self::i16) -> Self::u16 {
|
fn cast_sign_i16_u16(&mut self, value: Self::i16) -> Self::u16 {
|
||||||
value as _
|
value as _
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_lane___m128i_u16(&mut self, value: Self::__m128i, idx: u64) -> Self::u16 {
|
fn cast_sign_i32_u32(&mut self, value: Self::i32) -> Self::u32 {
|
||||||
let first = value[(idx * 2 + 1) as usize];
|
value as _
|
||||||
let second = value[(idx * 2) as usize];
|
}
|
||||||
|
|
||||||
((first as u16) << 8) | (second as u16)
|
fn cast_sign_i64_u64(&mut self, value: Self::i64) -> Self::u64 {
|
||||||
|
value as _
|
||||||
|
}
|
||||||
|
|
||||||
|
////// GET LANE
|
||||||
|
|
||||||
|
fn get_lane___m128i_u8(&mut self, value: Self::__m128i, idx: u64) -> Self::u8 {
|
||||||
|
value[idx as usize]
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_lane___m128i_i8(&mut self, value: Self::__m128i, idx: u64) -> Self::i8 {
|
||||||
|
self.get_lane___m128i_u8(value, idx) as i8
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_lane___m128i_u16(&mut self, value: Self::__m128i, idx: u64) -> Self::u16 {
|
||||||
|
let mut acc = 0;
|
||||||
|
for i in 0..2 {
|
||||||
|
let v = value[(idx * 2 + i) as usize];
|
||||||
|
acc |= (v as u16) << (8 * i);
|
||||||
|
}
|
||||||
|
|
||||||
|
acc
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_lane___m128i_i16(&mut self, value: Self::__m128i, idx: u64) -> Self::i16 {
|
fn get_lane___m128i_i16(&mut self, value: Self::__m128i, idx: u64) -> Self::i16 {
|
||||||
|
|
@ -66,18 +104,35 @@ impl Core for ValueCore {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_lane___m128i_u32(&mut self, value: Self::__m128i, idx: u64) -> Self::u32 {
|
fn get_lane___m128i_u32(&mut self, value: Self::__m128i, idx: u64) -> Self::u32 {
|
||||||
let first = value[(idx * 4 + 3) as usize];
|
let mut acc = 0;
|
||||||
let second = value[(idx * 4 + 2) as usize];
|
for i in 0..4 {
|
||||||
let third = value[(idx * 4 + 1) as usize];
|
let v = value[(idx * 4 + i) as usize];
|
||||||
let fourth = value[(idx * 4) as usize];
|
acc |= (v as u32) << (8 * i);
|
||||||
|
}
|
||||||
|
|
||||||
((first as u32) << 24) | ((second as u32) << 16) | ((third as u32) << 8) | (fourth as u32)
|
acc
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_lane___m128i_i32(&mut self, value: Self::__m128i, idx: u64) -> Self::i32 {
|
fn get_lane___m128i_i32(&mut self, value: Self::__m128i, idx: u64) -> Self::i32 {
|
||||||
self.get_lane___m128i_u32(value, idx) as i32
|
self.get_lane___m128i_u32(value, idx) as i32
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn get_lane___m128i_u64(&mut self, value: Self::__m128i, idx: u64) -> Self::u64 {
|
||||||
|
let mut acc = 0;
|
||||||
|
for i in 0..8 {
|
||||||
|
let v = value[(idx * 8 + i) as usize];
|
||||||
|
acc |= (v as u64) << (8 * i);
|
||||||
|
}
|
||||||
|
|
||||||
|
acc
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_lane___m128i_i64(&mut self, value: Self::__m128i, idx: u64) -> Self::i64 {
|
||||||
|
self.get_lane___m128i_u64(value, idx) as i64
|
||||||
|
}
|
||||||
|
|
||||||
|
////// SET LANE
|
||||||
|
|
||||||
fn set_lane___m128i_u8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u8) {
|
fn set_lane___m128i_u8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u8) {
|
||||||
place[idx as usize] = value;
|
place[idx as usize] = value;
|
||||||
}
|
}
|
||||||
|
|
@ -87,16 +142,40 @@ impl Core for ValueCore {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn set_lane___m128i_u16(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u16) {
|
fn set_lane___m128i_u16(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u16) {
|
||||||
let first = (value & 0xFF) as u8;
|
for i in 0..2 {
|
||||||
let second = (value >> 8) as u8;
|
let value = ((value >> 8 * i) & 0xFF) as u8;
|
||||||
place[(idx * 2) as usize] = first;
|
place[(idx * 2 + i) as usize] = value;
|
||||||
place[(idx * 2 + 1) as usize] = second;
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn set_lane___m128i_i16(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i16) {
|
fn set_lane___m128i_i16(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i16) {
|
||||||
self.set_lane___m128i_u16(place, idx, value as u16);
|
self.set_lane___m128i_u16(place, idx, value as u16);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn set_lane___m128i_u32(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u32) {
|
||||||
|
for i in 0..4 {
|
||||||
|
let value = ((value >> 8 * i) & 0xFF) as u8;
|
||||||
|
place[(idx * 4 + i) as usize] = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn set_lane___m128i_i32(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i32) {
|
||||||
|
self.set_lane___m128i_u32(place, idx, value as u32);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn set_lane___m128i_u64(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u64) {
|
||||||
|
for i in 0..8 {
|
||||||
|
let value = ((value >> 8 * i) & 0xFF) as u8;
|
||||||
|
place[(idx * 8 + i) as usize] = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn set_lane___m128i_i64(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i64) {
|
||||||
|
self.set_lane___m128i_u32(place, idx, value as u32);
|
||||||
|
}
|
||||||
|
|
||||||
|
////// HELPERS
|
||||||
|
|
||||||
fn saturate8(&mut self, elem: Self::i16) -> Self::i8 {
|
fn saturate8(&mut self, elem: Self::i16) -> Self::i8 {
|
||||||
let clamp = elem.clamp(i8::MIN as i16, i8::MAX as i16);
|
let clamp = elem.clamp(i8::MIN as i16, i8::MAX as i16);
|
||||||
clamp as i8
|
clamp as i8
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue