mirror of
https://github.com/Noratrieb/intringen.git
synced 2026-01-14 13:55:02 +01:00
fix
This commit is contained in:
parent
ef1aaa50ad
commit
f55e29c7c4
4 changed files with 298 additions and 518 deletions
|
|
@ -130,7 +130,7 @@ fn generate_body_soft_arch(intr: &Intrinsic) -> Result<syn::Block> {
|
||||||
|
|
||||||
let args = intr.parameter.iter().map(|param| -> syn::Expr {
|
let args = intr.parameter.iter().map(|param| -> syn::Expr {
|
||||||
let name = ident_opt_s(¶m.varname).unwrap();
|
let name = ident_opt_s(¶m.varname).unwrap();
|
||||||
syn::parse_quote! { #name }
|
syn::parse_quote! { #name as _ }
|
||||||
});
|
});
|
||||||
|
|
||||||
block.stmts.push(syn::parse_quote! {
|
block.stmts.push(syn::parse_quote! {
|
||||||
|
|
@ -200,54 +200,49 @@ fn random_value(ty: &str, rng: &mut SmallRng) -> Result<syn::Expr> {
|
||||||
_mm_setr_epi16(#(#args),*)
|
_mm_setr_epi16(#(#args),*)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => bail!("unknown type: {ty}"),
|
_ => bail!("unknown type for random value: {ty}"),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy, PartialEq, Debug)]
|
#[derive(Clone, Copy, PartialEq, Debug)]
|
||||||
enum Type {
|
enum Type {
|
||||||
Vector(VectorType),
|
Vector(VectorType),
|
||||||
Scalar {
|
Scalar { elemty: ElementType },
|
||||||
/// Some parameters have C types that are signed, while their `etype` is not.
|
|
||||||
c_is_signed: bool,
|
|
||||||
elemty: ElementType,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A SIMD vector type like `16xi8 (__m128i)`
|
||||||
#[derive(Clone, Copy, PartialEq, Debug)]
|
#[derive(Clone, Copy, PartialEq, Debug)]
|
||||||
struct VectorType {
|
struct VectorType {
|
||||||
|
/// The amount of lanes, `16` in `16xi8 (__m128i)`.
|
||||||
lanes: u64,
|
lanes: u64,
|
||||||
|
/// The type of a single lane, `i8` in `16xi8 (__m128i)`.
|
||||||
elem: ElementType,
|
elem: ElementType,
|
||||||
|
/// The raw Rust/C type, `__m128i` in `16xi8 (__m128i)`.
|
||||||
raw_type: &'static str,
|
raw_type: &'static str,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A single element in a vector.
|
||||||
|
/// For example in `16xi8 (__m128i)`, it would be `i8` (we do not care about signedness).
|
||||||
#[derive(Clone, Copy, PartialEq, Debug)]
|
#[derive(Clone, Copy, PartialEq, Debug)]
|
||||||
struct ElementType {
|
struct ElementType {
|
||||||
is_signed: bool,
|
|
||||||
width: u64,
|
width: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Type {
|
impl Type {
|
||||||
fn of(etype: &str, ty: &str) -> Result<Self> {
|
fn of(etype: &str, ty: &str) -> Result<Self> {
|
||||||
let (etype_signed, etype_width) = match etype {
|
let etype_width = match etype {
|
||||||
"SI8" => (true, 8),
|
"SI8" => 8,
|
||||||
"SI16" => (true, 16),
|
"SI16" => 16,
|
||||||
"SI32" => (true, 32),
|
"SI32" => 32,
|
||||||
"UI8" => (false, 8),
|
"UI8" => 8,
|
||||||
"UI16" => (false, 16),
|
"UI16" => 16,
|
||||||
"UI32" => (false, 32),
|
"UI32" => 32,
|
||||||
"UI64" => (false, 64),
|
"UI64" => 64,
|
||||||
_ => bail!("unknown element type: {etype}"),
|
_ => bail!("unknown element type: {etype}"),
|
||||||
};
|
};
|
||||||
let elem = ElementType {
|
let elem = ElementType { width: etype_width };
|
||||||
is_signed: etype_signed,
|
|
||||||
width: etype_width,
|
|
||||||
};
|
|
||||||
|
|
||||||
let scalar = |sign| Type::Scalar {
|
let scalar = Type::Scalar { elemty: elem };
|
||||||
c_is_signed: sign,
|
|
||||||
elemty: elem,
|
|
||||||
};
|
|
||||||
|
|
||||||
Ok(match ty {
|
Ok(match ty {
|
||||||
"__m128i" => Type::Vector(VectorType {
|
"__m128i" => Type::Vector(VectorType {
|
||||||
|
|
@ -255,10 +250,10 @@ impl Type {
|
||||||
elem,
|
elem,
|
||||||
raw_type: "__m128i",
|
raw_type: "__m128i",
|
||||||
}),
|
}),
|
||||||
"char" => scalar(true),
|
"char" => scalar,
|
||||||
"short" => scalar(true),
|
"short" => scalar,
|
||||||
"int" => scalar(true),
|
"int" => scalar,
|
||||||
"__int64" => scalar(true),
|
"__int64" => scalar,
|
||||||
_ => bail!("unknown type: {ty}"),
|
_ => bail!("unknown type: {ty}"),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
@ -276,18 +271,11 @@ impl Type {
|
||||||
};
|
};
|
||||||
ty
|
ty
|
||||||
}
|
}
|
||||||
fn expect_scalar(&self) -> ElementType {
|
|
||||||
let Self::Scalar { elemty, .. } = *self else {
|
|
||||||
panic!("expected scalar, found vector");
|
|
||||||
};
|
|
||||||
elemty
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ElementType {
|
impl ElementType {
|
||||||
fn rust_type(&self) -> String {
|
fn rust_type(&self) -> String {
|
||||||
let pre = if self.is_signed { 'i' } else { 'u' };
|
format!("u{}", self.width)
|
||||||
format!("{pre}{}", self.width)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -461,30 +449,8 @@ fn gen_expr_tmp(
|
||||||
let (result, ty): (syn::Expr, _) = match expr {
|
let (result, ty): (syn::Expr, _) = match expr {
|
||||||
Expr::Int(int) => (syn::parse_quote! { #int }, None),
|
Expr::Int(int) => (syn::parse_quote! { #int }, None),
|
||||||
Expr::Ident(identifier) => {
|
Expr::Ident(identifier) => {
|
||||||
let ty = type_of_ident(&identifier);
|
|
||||||
let identifier = ident(&identifier);
|
let identifier = ident(&identifier);
|
||||||
match ty {
|
(syn::parse_quote! { #identifier }, None)
|
||||||
Ok(Type::Scalar {
|
|
||||||
c_is_signed,
|
|
||||||
elemty,
|
|
||||||
}) if elemty.is_signed != c_is_signed => {
|
|
||||||
// intel intrinsics types kinda lie sometimes.
|
|
||||||
// _mm_setr_epi16 says the etype of the argument is UI16 (unsigned),
|
|
||||||
// while the actual type is short (signed). Do a cast to the etype, since we used that.
|
|
||||||
let from = ElementType {
|
|
||||||
is_signed: c_is_signed,
|
|
||||||
width: elemty.width,
|
|
||||||
}
|
|
||||||
.rust_type();
|
|
||||||
let to = elemty.rust_type();
|
|
||||||
let method = ident(&format!("cast_sign_{from}_{to}"));
|
|
||||||
(
|
|
||||||
tmp(block, syn::parse_quote! { self.#method(#identifier) }),
|
|
||||||
None,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
_ => (syn::parse_quote! { #identifier }, None),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Expr::Index { lhs, idx } => {
|
Expr::Index { lhs, idx } => {
|
||||||
let (identifier, method, lane_idx, ty) = gen_idx("get", *lhs, *idx, type_of_ident)?;
|
let (identifier, method, lane_idx, ty) = gen_idx("get", *lhs, *idx, type_of_ident)?;
|
||||||
|
|
@ -492,13 +458,7 @@ fn gen_expr_tmp(
|
||||||
block,
|
block,
|
||||||
syn::parse_quote! { self.#method(#identifier, #lane_idx) },
|
syn::parse_quote! { self.#method(#identifier, #lane_idx) },
|
||||||
);
|
);
|
||||||
(
|
(expr, Some(Type::Scalar { elemty: ty.elem }))
|
||||||
expr,
|
|
||||||
Some(Type::Scalar {
|
|
||||||
c_is_signed: ty.elem.is_signed,
|
|
||||||
elemty: ty.elem,
|
|
||||||
}),
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
Expr::Range { .. } => todo!(),
|
Expr::Range { .. } => todo!(),
|
||||||
Expr::Call { function, args } => {
|
Expr::Call { function, args } => {
|
||||||
|
|
@ -543,24 +503,25 @@ fn gen_expr_tmp(
|
||||||
};
|
};
|
||||||
syn::parse_quote! { ( #lhs #token #rhs ) }
|
syn::parse_quote! { ( #lhs #token #rhs ) }
|
||||||
}
|
}
|
||||||
Some(ty) => {
|
Some(_ty) => {
|
||||||
let prefix = match op {
|
let prefix = match op {
|
||||||
BinaryOpKind::Add => "add",
|
BinaryOpKind::Add => "add",
|
||||||
BinaryOpKind::Mul => "mul",
|
BinaryOpKind::Mul => "mul",
|
||||||
};
|
};
|
||||||
|
|
||||||
let ty = ty.expect_scalar();
|
// TODO: EXTEND somehow possibly??? ugh.
|
||||||
let method = ident(&format!(
|
|
||||||
"ext_{}_{}64",
|
//let ty = ty.expect_scalar();
|
||||||
ty.rust_type(),
|
//let method = ident(&format!(
|
||||||
if ty.is_signed { "s" } else { "u" }
|
// "ext_{}_u64",
|
||||||
));
|
// ty.rust_type(),
|
||||||
let lhs_ext = tmp(block, syn::parse_quote! { self.#method(#lhs) });
|
// if ty.is_signed { "s" } else { "u" }
|
||||||
let rhs_ext = tmp(block, syn::parse_quote! { self.#method(#rhs) });
|
//));
|
||||||
|
//let lhs_ext = tmp(block, syn::parse_quote! { self.#method(#lhs) });
|
||||||
|
//let rhs_ext = tmp(block, syn::parse_quote! { self.#method(#rhs) });
|
||||||
|
|
||||||
// TODO: EXTEND
|
|
||||||
let method = ident(&format!("{prefix}_64"));
|
let method = ident(&format!("{prefix}_64"));
|
||||||
tmp(block, syn::parse_quote! { self.#method(#lhs_ext, #rhs_ext) })
|
tmp(block, syn::parse_quote! { self.#method(#lhs, #rhs) })
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -590,7 +551,7 @@ fn signature(intr: &Intrinsic, body: syn::Block) -> Result<syn::TraitItem> {
|
||||||
let name = ident(&intr.name);
|
let name = ident(&intr.name);
|
||||||
|
|
||||||
let ret_name = ident_opt_s(&intr.ret.varname)?;
|
let ret_name = ident_opt_s(&intr.ret.varname)?;
|
||||||
let ret_ty = ident(map_type_to_rust(intr.ret.r#type.as_ref().unwrap()));
|
let ret_ty = ident(map_type_to_rust_unsigned(intr.ret.r#type.as_ref().unwrap()));
|
||||||
|
|
||||||
let args = [
|
let args = [
|
||||||
syn::parse_quote! { &mut self },
|
syn::parse_quote! { &mut self },
|
||||||
|
|
@ -599,7 +560,7 @@ fn signature(intr: &Intrinsic, body: syn::Block) -> Result<syn::TraitItem> {
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.chain(intr.parameter.iter().map(|param| -> syn::FnArg {
|
.chain(intr.parameter.iter().map(|param| -> syn::FnArg {
|
||||||
let varname = ident_opt_s(¶m.varname).unwrap();
|
let varname = ident_opt_s(¶m.varname).unwrap();
|
||||||
let ty = ident(map_type_to_rust(param.r#type.as_ref().unwrap()));
|
let ty = ident(map_type_to_rust_unsigned(param.r#type.as_ref().unwrap()));
|
||||||
|
|
||||||
syn::parse_quote! { #varname: Self::#ty }
|
syn::parse_quote! { #varname: Self::#ty }
|
||||||
}));
|
}));
|
||||||
|
|
@ -632,3 +593,14 @@ fn map_type_to_rust(ty: &str) -> &str {
|
||||||
ty => panic!("unknown type: {ty}"),
|
ty => panic!("unknown type: {ty}"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn map_type_to_rust_unsigned(ty: &str) -> &str {
|
||||||
|
match ty {
|
||||||
|
"__m128i" => ty,
|
||||||
|
"char" => "u8",
|
||||||
|
"short" => "u16",
|
||||||
|
"int" => "u32",
|
||||||
|
"__int64" => "u64",
|
||||||
|
ty => panic!("unknown type: {ty}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -92,11 +92,11 @@ const INTRINSICS_GENERATE: &[&str] = &[
|
||||||
/////
|
/////
|
||||||
///// Arithmetic
|
///// Arithmetic
|
||||||
/////
|
/////
|
||||||
"_mm_add_epi16",
|
// "_mm_add_epi16",
|
||||||
"_mm_add_epi32",
|
// "_mm_add_epi32",
|
||||||
"_mm_add_epi64",
|
// "_mm_add_epi64",
|
||||||
// todo: float and __m64 stuff
|
// todo: float and __m64 stuff
|
||||||
"_mm_adds_epi16",
|
// "_mm_adds_epi16",
|
||||||
//"_mm_adds_epi8",
|
//"_mm_adds_epi8",
|
||||||
//"_mm_adds_epu16",
|
//"_mm_adds_epu16",
|
||||||
//"_mm_adds_epu8",
|
//"_mm_adds_epu8",
|
||||||
|
|
|
||||||
|
|
@ -1,175 +1,80 @@
|
||||||
#![allow(unused_parens)]
|
#![allow(unused_parens)]
|
||||||
impl<C: super::Core> Intrinsics for C {}
|
impl<C: super::Core> Intrinsics for C {}
|
||||||
pub trait Intrinsics: super::Core {
|
pub trait Intrinsics: super::Core {
|
||||||
fn _mm_add_epi16(
|
fn _mm_set_epi64x(&mut self, dst: &mut Self::__m128i, e1: Self::u64, e0: Self::u64) {
|
||||||
&mut self,
|
self.set_lane___m128i_u64(dst, 0u64, e0);
|
||||||
dst: &mut Self::__m128i,
|
self.set_lane___m128i_u64(dst, 1u64, e1);
|
||||||
a: Self::__m128i,
|
|
||||||
b: Self::__m128i,
|
|
||||||
) {
|
|
||||||
for j in 0u64..=7u64 {
|
|
||||||
let i = (j * 16u64);
|
|
||||||
let __tmp0 = self.get_lane___m128i_u16(a, (i / 16u64));
|
|
||||||
let __tmp1 = self.get_lane___m128i_u16(b, (i / 16u64));
|
|
||||||
let __tmp2 = self.ext_u16_u64(__tmp0);
|
|
||||||
let __tmp3 = self.ext_u16_u64(__tmp1);
|
|
||||||
let __tmp4 = self.add_64(__tmp2, __tmp3);
|
|
||||||
self.set_lane___m128i_u16(dst, (i / 16u64), __tmp4);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fn _mm_add_epi32(
|
|
||||||
&mut self,
|
|
||||||
dst: &mut Self::__m128i,
|
|
||||||
a: Self::__m128i,
|
|
||||||
b: Self::__m128i,
|
|
||||||
) {
|
|
||||||
for j in 0u64..=3u64 {
|
|
||||||
let i = (j * 32u64);
|
|
||||||
let __tmp0 = self.get_lane___m128i_u32(a, (i / 32u64));
|
|
||||||
let __tmp1 = self.get_lane___m128i_u32(b, (i / 32u64));
|
|
||||||
let __tmp2 = self.ext_u32_u64(__tmp0);
|
|
||||||
let __tmp3 = self.ext_u32_u64(__tmp1);
|
|
||||||
let __tmp4 = self.add_64(__tmp2, __tmp3);
|
|
||||||
self.set_lane___m128i_u32(dst, (i / 32u64), __tmp4);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fn _mm_add_epi64(
|
|
||||||
&mut self,
|
|
||||||
dst: &mut Self::__m128i,
|
|
||||||
a: Self::__m128i,
|
|
||||||
b: Self::__m128i,
|
|
||||||
) {
|
|
||||||
for j in 0u64..=1u64 {
|
|
||||||
let i = (j * 64u64);
|
|
||||||
let __tmp0 = self.get_lane___m128i_u64(a, (i / 64u64));
|
|
||||||
let __tmp1 = self.get_lane___m128i_u64(b, (i / 64u64));
|
|
||||||
let __tmp2 = self.ext_u64_u64(__tmp0);
|
|
||||||
let __tmp3 = self.ext_u64_u64(__tmp1);
|
|
||||||
let __tmp4 = self.add_64(__tmp2, __tmp3);
|
|
||||||
self.set_lane___m128i_u64(dst, (i / 64u64), __tmp4);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fn _mm_adds_epi16(
|
|
||||||
&mut self,
|
|
||||||
dst: &mut Self::__m128i,
|
|
||||||
a: Self::__m128i,
|
|
||||||
b: Self::__m128i,
|
|
||||||
) {
|
|
||||||
for j in 0u64..=7u64 {
|
|
||||||
let i = (j * 16u64);
|
|
||||||
let __tmp0 = self.get_lane___m128i_i16(a, (i / 16u64));
|
|
||||||
let __tmp1 = self.get_lane___m128i_i16(b, (i / 16u64));
|
|
||||||
let __tmp2 = self.ext_i16_s64(__tmp0);
|
|
||||||
let __tmp3 = self.ext_i16_s64(__tmp1);
|
|
||||||
let __tmp4 = self.add_64(__tmp2, __tmp3);
|
|
||||||
let __tmp5 = self.saturate16(__tmp4);
|
|
||||||
self.set_lane___m128i_u16(dst, (i / 16u64), __tmp5);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fn _mm_set_epi64x(&mut self, dst: &mut Self::__m128i, e1: Self::i64, e0: Self::i64) {
|
|
||||||
let __tmp0 = self.cast_sign_i64_u64(e0);
|
|
||||||
self.set_lane___m128i_u64(dst, 0u64, __tmp0);
|
|
||||||
let __tmp1 = self.cast_sign_i64_u64(e1);
|
|
||||||
self.set_lane___m128i_u64(dst, 1u64, __tmp1);
|
|
||||||
}
|
}
|
||||||
fn _mm_setr_epi32(
|
fn _mm_setr_epi32(
|
||||||
&mut self,
|
&mut self,
|
||||||
dst: &mut Self::__m128i,
|
dst: &mut Self::__m128i,
|
||||||
e3: Self::i32,
|
e3: Self::u32,
|
||||||
e2: Self::i32,
|
e2: Self::u32,
|
||||||
e1: Self::i32,
|
e1: Self::u32,
|
||||||
e0: Self::i32,
|
e0: Self::u32,
|
||||||
) {
|
) {
|
||||||
let __tmp0 = self.cast_sign_i32_u32(e3);
|
self.set_lane___m128i_u32(dst, 0u64, e3);
|
||||||
self.set_lane___m128i_u32(dst, 0u64, __tmp0);
|
self.set_lane___m128i_u32(dst, 1u64, e2);
|
||||||
let __tmp1 = self.cast_sign_i32_u32(e2);
|
self.set_lane___m128i_u32(dst, 2u64, e1);
|
||||||
self.set_lane___m128i_u32(dst, 1u64, __tmp1);
|
self.set_lane___m128i_u32(dst, 3u64, e0);
|
||||||
let __tmp2 = self.cast_sign_i32_u32(e1);
|
|
||||||
self.set_lane___m128i_u32(dst, 2u64, __tmp2);
|
|
||||||
let __tmp3 = self.cast_sign_i32_u32(e0);
|
|
||||||
self.set_lane___m128i_u32(dst, 3u64, __tmp3);
|
|
||||||
}
|
}
|
||||||
fn _mm_setr_epi16(
|
fn _mm_setr_epi16(
|
||||||
&mut self,
|
&mut self,
|
||||||
dst: &mut Self::__m128i,
|
dst: &mut Self::__m128i,
|
||||||
e7: Self::i16,
|
e7: Self::u16,
|
||||||
e6: Self::i16,
|
e6: Self::u16,
|
||||||
e5: Self::i16,
|
e5: Self::u16,
|
||||||
e4: Self::i16,
|
e4: Self::u16,
|
||||||
e3: Self::i16,
|
e3: Self::u16,
|
||||||
e2: Self::i16,
|
e2: Self::u16,
|
||||||
e1: Self::i16,
|
e1: Self::u16,
|
||||||
e0: Self::i16,
|
e0: Self::u16,
|
||||||
) {
|
) {
|
||||||
let __tmp0 = self.cast_sign_i16_u16(e7);
|
self.set_lane___m128i_u16(dst, 0u64, e7);
|
||||||
self.set_lane___m128i_u16(dst, 0u64, __tmp0);
|
self.set_lane___m128i_u16(dst, 1u64, e6);
|
||||||
let __tmp1 = self.cast_sign_i16_u16(e6);
|
self.set_lane___m128i_u16(dst, 2u64, e5);
|
||||||
self.set_lane___m128i_u16(dst, 1u64, __tmp1);
|
self.set_lane___m128i_u16(dst, 3u64, e4);
|
||||||
let __tmp2 = self.cast_sign_i16_u16(e5);
|
self.set_lane___m128i_u16(dst, 4u64, e3);
|
||||||
self.set_lane___m128i_u16(dst, 2u64, __tmp2);
|
self.set_lane___m128i_u16(dst, 5u64, e2);
|
||||||
let __tmp3 = self.cast_sign_i16_u16(e4);
|
self.set_lane___m128i_u16(dst, 6u64, e1);
|
||||||
self.set_lane___m128i_u16(dst, 3u64, __tmp3);
|
self.set_lane___m128i_u16(dst, 7u64, e0);
|
||||||
let __tmp4 = self.cast_sign_i16_u16(e3);
|
|
||||||
self.set_lane___m128i_u16(dst, 4u64, __tmp4);
|
|
||||||
let __tmp5 = self.cast_sign_i16_u16(e2);
|
|
||||||
self.set_lane___m128i_u16(dst, 5u64, __tmp5);
|
|
||||||
let __tmp6 = self.cast_sign_i16_u16(e1);
|
|
||||||
self.set_lane___m128i_u16(dst, 6u64, __tmp6);
|
|
||||||
let __tmp7 = self.cast_sign_i16_u16(e0);
|
|
||||||
self.set_lane___m128i_u16(dst, 7u64, __tmp7);
|
|
||||||
}
|
}
|
||||||
fn _mm_setr_epi8(
|
fn _mm_setr_epi8(
|
||||||
&mut self,
|
&mut self,
|
||||||
dst: &mut Self::__m128i,
|
dst: &mut Self::__m128i,
|
||||||
e15: Self::i8,
|
e15: Self::u8,
|
||||||
e14: Self::i8,
|
e14: Self::u8,
|
||||||
e13: Self::i8,
|
e13: Self::u8,
|
||||||
e12: Self::i8,
|
e12: Self::u8,
|
||||||
e11: Self::i8,
|
e11: Self::u8,
|
||||||
e10: Self::i8,
|
e10: Self::u8,
|
||||||
e9: Self::i8,
|
e9: Self::u8,
|
||||||
e8: Self::i8,
|
e8: Self::u8,
|
||||||
e7: Self::i8,
|
e7: Self::u8,
|
||||||
e6: Self::i8,
|
e6: Self::u8,
|
||||||
e5: Self::i8,
|
e5: Self::u8,
|
||||||
e4: Self::i8,
|
e4: Self::u8,
|
||||||
e3: Self::i8,
|
e3: Self::u8,
|
||||||
e2: Self::i8,
|
e2: Self::u8,
|
||||||
e1: Self::i8,
|
e1: Self::u8,
|
||||||
e0: Self::i8,
|
e0: Self::u8,
|
||||||
) {
|
) {
|
||||||
let __tmp0 = self.cast_sign_i8_u8(e15);
|
self.set_lane___m128i_u8(dst, 0u64, e15);
|
||||||
self.set_lane___m128i_u8(dst, 0u64, __tmp0);
|
self.set_lane___m128i_u8(dst, 1u64, e14);
|
||||||
let __tmp1 = self.cast_sign_i8_u8(e14);
|
self.set_lane___m128i_u8(dst, 2u64, e13);
|
||||||
self.set_lane___m128i_u8(dst, 1u64, __tmp1);
|
self.set_lane___m128i_u8(dst, 3u64, e12);
|
||||||
let __tmp2 = self.cast_sign_i8_u8(e13);
|
self.set_lane___m128i_u8(dst, 4u64, e11);
|
||||||
self.set_lane___m128i_u8(dst, 2u64, __tmp2);
|
self.set_lane___m128i_u8(dst, 5u64, e10);
|
||||||
let __tmp3 = self.cast_sign_i8_u8(e12);
|
self.set_lane___m128i_u8(dst, 6u64, e9);
|
||||||
self.set_lane___m128i_u8(dst, 3u64, __tmp3);
|
self.set_lane___m128i_u8(dst, 7u64, e8);
|
||||||
let __tmp4 = self.cast_sign_i8_u8(e11);
|
self.set_lane___m128i_u8(dst, 8u64, e7);
|
||||||
self.set_lane___m128i_u8(dst, 4u64, __tmp4);
|
self.set_lane___m128i_u8(dst, 9u64, e6);
|
||||||
let __tmp5 = self.cast_sign_i8_u8(e10);
|
self.set_lane___m128i_u8(dst, 10u64, e5);
|
||||||
self.set_lane___m128i_u8(dst, 5u64, __tmp5);
|
self.set_lane___m128i_u8(dst, 11u64, e4);
|
||||||
let __tmp6 = self.cast_sign_i8_u8(e9);
|
self.set_lane___m128i_u8(dst, 12u64, e3);
|
||||||
self.set_lane___m128i_u8(dst, 6u64, __tmp6);
|
self.set_lane___m128i_u8(dst, 13u64, e2);
|
||||||
let __tmp7 = self.cast_sign_i8_u8(e8);
|
self.set_lane___m128i_u8(dst, 14u64, e1);
|
||||||
self.set_lane___m128i_u8(dst, 7u64, __tmp7);
|
self.set_lane___m128i_u8(dst, 15u64, e0);
|
||||||
let __tmp8 = self.cast_sign_i8_u8(e7);
|
|
||||||
self.set_lane___m128i_u8(dst, 8u64, __tmp8);
|
|
||||||
let __tmp9 = self.cast_sign_i8_u8(e6);
|
|
||||||
self.set_lane___m128i_u8(dst, 9u64, __tmp9);
|
|
||||||
let __tmp10 = self.cast_sign_i8_u8(e5);
|
|
||||||
self.set_lane___m128i_u8(dst, 10u64, __tmp10);
|
|
||||||
let __tmp11 = self.cast_sign_i8_u8(e4);
|
|
||||||
self.set_lane___m128i_u8(dst, 11u64, __tmp11);
|
|
||||||
let __tmp12 = self.cast_sign_i8_u8(e3);
|
|
||||||
self.set_lane___m128i_u8(dst, 12u64, __tmp12);
|
|
||||||
let __tmp13 = self.cast_sign_i8_u8(e2);
|
|
||||||
self.set_lane___m128i_u8(dst, 13u64, __tmp13);
|
|
||||||
let __tmp14 = self.cast_sign_i8_u8(e1);
|
|
||||||
self.set_lane___m128i_u8(dst, 14u64, __tmp14);
|
|
||||||
let __tmp15 = self.cast_sign_i8_u8(e0);
|
|
||||||
self.set_lane___m128i_u8(dst, 15u64, __tmp15);
|
|
||||||
}
|
}
|
||||||
fn _mm_packs_epi16(
|
fn _mm_packs_epi16(
|
||||||
&mut self,
|
&mut self,
|
||||||
|
|
@ -177,54 +82,54 @@ pub trait Intrinsics: super::Core {
|
||||||
a: Self::__m128i,
|
a: Self::__m128i,
|
||||||
b: Self::__m128i,
|
b: Self::__m128i,
|
||||||
) {
|
) {
|
||||||
let __tmp0 = self.get_lane___m128i_i16(a, 0u64);
|
let __tmp0 = self.get_lane___m128i_u16(a, 0u64);
|
||||||
let __tmp1 = self.saturate8(__tmp0);
|
let __tmp1 = self.saturate8(__tmp0);
|
||||||
self.set_lane___m128i_i8(dst, 0u64, __tmp1);
|
self.set_lane___m128i_u8(dst, 0u64, __tmp1);
|
||||||
let __tmp2 = self.get_lane___m128i_i16(a, 1u64);
|
let __tmp2 = self.get_lane___m128i_u16(a, 1u64);
|
||||||
let __tmp3 = self.saturate8(__tmp2);
|
let __tmp3 = self.saturate8(__tmp2);
|
||||||
self.set_lane___m128i_i8(dst, 1u64, __tmp3);
|
self.set_lane___m128i_u8(dst, 1u64, __tmp3);
|
||||||
let __tmp4 = self.get_lane___m128i_i16(a, 2u64);
|
let __tmp4 = self.get_lane___m128i_u16(a, 2u64);
|
||||||
let __tmp5 = self.saturate8(__tmp4);
|
let __tmp5 = self.saturate8(__tmp4);
|
||||||
self.set_lane___m128i_i8(dst, 2u64, __tmp5);
|
self.set_lane___m128i_u8(dst, 2u64, __tmp5);
|
||||||
let __tmp6 = self.get_lane___m128i_i16(a, 3u64);
|
let __tmp6 = self.get_lane___m128i_u16(a, 3u64);
|
||||||
let __tmp7 = self.saturate8(__tmp6);
|
let __tmp7 = self.saturate8(__tmp6);
|
||||||
self.set_lane___m128i_i8(dst, 3u64, __tmp7);
|
self.set_lane___m128i_u8(dst, 3u64, __tmp7);
|
||||||
let __tmp8 = self.get_lane___m128i_i16(a, 4u64);
|
let __tmp8 = self.get_lane___m128i_u16(a, 4u64);
|
||||||
let __tmp9 = self.saturate8(__tmp8);
|
let __tmp9 = self.saturate8(__tmp8);
|
||||||
self.set_lane___m128i_i8(dst, 4u64, __tmp9);
|
self.set_lane___m128i_u8(dst, 4u64, __tmp9);
|
||||||
let __tmp10 = self.get_lane___m128i_i16(a, 5u64);
|
let __tmp10 = self.get_lane___m128i_u16(a, 5u64);
|
||||||
let __tmp11 = self.saturate8(__tmp10);
|
let __tmp11 = self.saturate8(__tmp10);
|
||||||
self.set_lane___m128i_i8(dst, 5u64, __tmp11);
|
self.set_lane___m128i_u8(dst, 5u64, __tmp11);
|
||||||
let __tmp12 = self.get_lane___m128i_i16(a, 6u64);
|
let __tmp12 = self.get_lane___m128i_u16(a, 6u64);
|
||||||
let __tmp13 = self.saturate8(__tmp12);
|
let __tmp13 = self.saturate8(__tmp12);
|
||||||
self.set_lane___m128i_i8(dst, 6u64, __tmp13);
|
self.set_lane___m128i_u8(dst, 6u64, __tmp13);
|
||||||
let __tmp14 = self.get_lane___m128i_i16(a, 7u64);
|
let __tmp14 = self.get_lane___m128i_u16(a, 7u64);
|
||||||
let __tmp15 = self.saturate8(__tmp14);
|
let __tmp15 = self.saturate8(__tmp14);
|
||||||
self.set_lane___m128i_i8(dst, 7u64, __tmp15);
|
self.set_lane___m128i_u8(dst, 7u64, __tmp15);
|
||||||
let __tmp16 = self.get_lane___m128i_i16(b, 0u64);
|
let __tmp16 = self.get_lane___m128i_u16(b, 0u64);
|
||||||
let __tmp17 = self.saturate8(__tmp16);
|
let __tmp17 = self.saturate8(__tmp16);
|
||||||
self.set_lane___m128i_i8(dst, 8u64, __tmp17);
|
self.set_lane___m128i_u8(dst, 8u64, __tmp17);
|
||||||
let __tmp18 = self.get_lane___m128i_i16(b, 1u64);
|
let __tmp18 = self.get_lane___m128i_u16(b, 1u64);
|
||||||
let __tmp19 = self.saturate8(__tmp18);
|
let __tmp19 = self.saturate8(__tmp18);
|
||||||
self.set_lane___m128i_i8(dst, 9u64, __tmp19);
|
self.set_lane___m128i_u8(dst, 9u64, __tmp19);
|
||||||
let __tmp20 = self.get_lane___m128i_i16(b, 2u64);
|
let __tmp20 = self.get_lane___m128i_u16(b, 2u64);
|
||||||
let __tmp21 = self.saturate8(__tmp20);
|
let __tmp21 = self.saturate8(__tmp20);
|
||||||
self.set_lane___m128i_i8(dst, 10u64, __tmp21);
|
self.set_lane___m128i_u8(dst, 10u64, __tmp21);
|
||||||
let __tmp22 = self.get_lane___m128i_i16(b, 3u64);
|
let __tmp22 = self.get_lane___m128i_u16(b, 3u64);
|
||||||
let __tmp23 = self.saturate8(__tmp22);
|
let __tmp23 = self.saturate8(__tmp22);
|
||||||
self.set_lane___m128i_i8(dst, 11u64, __tmp23);
|
self.set_lane___m128i_u8(dst, 11u64, __tmp23);
|
||||||
let __tmp24 = self.get_lane___m128i_i16(b, 4u64);
|
let __tmp24 = self.get_lane___m128i_u16(b, 4u64);
|
||||||
let __tmp25 = self.saturate8(__tmp24);
|
let __tmp25 = self.saturate8(__tmp24);
|
||||||
self.set_lane___m128i_i8(dst, 12u64, __tmp25);
|
self.set_lane___m128i_u8(dst, 12u64, __tmp25);
|
||||||
let __tmp26 = self.get_lane___m128i_i16(b, 5u64);
|
let __tmp26 = self.get_lane___m128i_u16(b, 5u64);
|
||||||
let __tmp27 = self.saturate8(__tmp26);
|
let __tmp27 = self.saturate8(__tmp26);
|
||||||
self.set_lane___m128i_i8(dst, 13u64, __tmp27);
|
self.set_lane___m128i_u8(dst, 13u64, __tmp27);
|
||||||
let __tmp28 = self.get_lane___m128i_i16(b, 6u64);
|
let __tmp28 = self.get_lane___m128i_u16(b, 6u64);
|
||||||
let __tmp29 = self.saturate8(__tmp28);
|
let __tmp29 = self.saturate8(__tmp28);
|
||||||
self.set_lane___m128i_i8(dst, 14u64, __tmp29);
|
self.set_lane___m128i_u8(dst, 14u64, __tmp29);
|
||||||
let __tmp30 = self.get_lane___m128i_i16(b, 7u64);
|
let __tmp30 = self.get_lane___m128i_u16(b, 7u64);
|
||||||
let __tmp31 = self.saturate8(__tmp30);
|
let __tmp31 = self.saturate8(__tmp30);
|
||||||
self.set_lane___m128i_i8(dst, 15u64, __tmp31);
|
self.set_lane___m128i_u8(dst, 15u64, __tmp31);
|
||||||
}
|
}
|
||||||
fn _mm_packs_epi32(
|
fn _mm_packs_epi32(
|
||||||
&mut self,
|
&mut self,
|
||||||
|
|
@ -232,30 +137,30 @@ pub trait Intrinsics: super::Core {
|
||||||
a: Self::__m128i,
|
a: Self::__m128i,
|
||||||
b: Self::__m128i,
|
b: Self::__m128i,
|
||||||
) {
|
) {
|
||||||
let __tmp0 = self.get_lane___m128i_i32(a, 0u64);
|
let __tmp0 = self.get_lane___m128i_u32(a, 0u64);
|
||||||
let __tmp1 = self.saturate16(__tmp0);
|
let __tmp1 = self.saturate16(__tmp0);
|
||||||
self.set_lane___m128i_i16(dst, 0u64, __tmp1);
|
self.set_lane___m128i_u16(dst, 0u64, __tmp1);
|
||||||
let __tmp2 = self.get_lane___m128i_i32(a, 1u64);
|
let __tmp2 = self.get_lane___m128i_u32(a, 1u64);
|
||||||
let __tmp3 = self.saturate16(__tmp2);
|
let __tmp3 = self.saturate16(__tmp2);
|
||||||
self.set_lane___m128i_i16(dst, 1u64, __tmp3);
|
self.set_lane___m128i_u16(dst, 1u64, __tmp3);
|
||||||
let __tmp4 = self.get_lane___m128i_i32(a, 2u64);
|
let __tmp4 = self.get_lane___m128i_u32(a, 2u64);
|
||||||
let __tmp5 = self.saturate16(__tmp4);
|
let __tmp5 = self.saturate16(__tmp4);
|
||||||
self.set_lane___m128i_i16(dst, 2u64, __tmp5);
|
self.set_lane___m128i_u16(dst, 2u64, __tmp5);
|
||||||
let __tmp6 = self.get_lane___m128i_i32(a, 3u64);
|
let __tmp6 = self.get_lane___m128i_u32(a, 3u64);
|
||||||
let __tmp7 = self.saturate16(__tmp6);
|
let __tmp7 = self.saturate16(__tmp6);
|
||||||
self.set_lane___m128i_i16(dst, 3u64, __tmp7);
|
self.set_lane___m128i_u16(dst, 3u64, __tmp7);
|
||||||
let __tmp8 = self.get_lane___m128i_i32(b, 0u64);
|
let __tmp8 = self.get_lane___m128i_u32(b, 0u64);
|
||||||
let __tmp9 = self.saturate16(__tmp8);
|
let __tmp9 = self.saturate16(__tmp8);
|
||||||
self.set_lane___m128i_i16(dst, 4u64, __tmp9);
|
self.set_lane___m128i_u16(dst, 4u64, __tmp9);
|
||||||
let __tmp10 = self.get_lane___m128i_i32(b, 1u64);
|
let __tmp10 = self.get_lane___m128i_u32(b, 1u64);
|
||||||
let __tmp11 = self.saturate16(__tmp10);
|
let __tmp11 = self.saturate16(__tmp10);
|
||||||
self.set_lane___m128i_i16(dst, 5u64, __tmp11);
|
self.set_lane___m128i_u16(dst, 5u64, __tmp11);
|
||||||
let __tmp12 = self.get_lane___m128i_i32(b, 2u64);
|
let __tmp12 = self.get_lane___m128i_u32(b, 2u64);
|
||||||
let __tmp13 = self.saturate16(__tmp12);
|
let __tmp13 = self.saturate16(__tmp12);
|
||||||
self.set_lane___m128i_i16(dst, 6u64, __tmp13);
|
self.set_lane___m128i_u16(dst, 6u64, __tmp13);
|
||||||
let __tmp14 = self.get_lane___m128i_i32(b, 3u64);
|
let __tmp14 = self.get_lane___m128i_u32(b, 3u64);
|
||||||
let __tmp15 = self.saturate16(__tmp14);
|
let __tmp15 = self.saturate16(__tmp14);
|
||||||
self.set_lane___m128i_i16(dst, 7u64, __tmp15);
|
self.set_lane___m128i_u16(dst, 7u64, __tmp15);
|
||||||
}
|
}
|
||||||
fn _mm_packus_epi16(
|
fn _mm_packus_epi16(
|
||||||
&mut self,
|
&mut self,
|
||||||
|
|
@ -263,52 +168,52 @@ pub trait Intrinsics: super::Core {
|
||||||
a: Self::__m128i,
|
a: Self::__m128i,
|
||||||
b: Self::__m128i,
|
b: Self::__m128i,
|
||||||
) {
|
) {
|
||||||
let __tmp0 = self.get_lane___m128i_i16(a, 0u64);
|
let __tmp0 = self.get_lane___m128i_u16(a, 0u64);
|
||||||
let __tmp1 = self.saturate_u8(__tmp0);
|
let __tmp1 = self.saturate_u8(__tmp0);
|
||||||
self.set_lane___m128i_u8(dst, 0u64, __tmp1);
|
self.set_lane___m128i_u8(dst, 0u64, __tmp1);
|
||||||
let __tmp2 = self.get_lane___m128i_i16(a, 1u64);
|
let __tmp2 = self.get_lane___m128i_u16(a, 1u64);
|
||||||
let __tmp3 = self.saturate_u8(__tmp2);
|
let __tmp3 = self.saturate_u8(__tmp2);
|
||||||
self.set_lane___m128i_u8(dst, 1u64, __tmp3);
|
self.set_lane___m128i_u8(dst, 1u64, __tmp3);
|
||||||
let __tmp4 = self.get_lane___m128i_i16(a, 2u64);
|
let __tmp4 = self.get_lane___m128i_u16(a, 2u64);
|
||||||
let __tmp5 = self.saturate_u8(__tmp4);
|
let __tmp5 = self.saturate_u8(__tmp4);
|
||||||
self.set_lane___m128i_u8(dst, 2u64, __tmp5);
|
self.set_lane___m128i_u8(dst, 2u64, __tmp5);
|
||||||
let __tmp6 = self.get_lane___m128i_i16(a, 3u64);
|
let __tmp6 = self.get_lane___m128i_u16(a, 3u64);
|
||||||
let __tmp7 = self.saturate_u8(__tmp6);
|
let __tmp7 = self.saturate_u8(__tmp6);
|
||||||
self.set_lane___m128i_u8(dst, 3u64, __tmp7);
|
self.set_lane___m128i_u8(dst, 3u64, __tmp7);
|
||||||
let __tmp8 = self.get_lane___m128i_i16(a, 4u64);
|
let __tmp8 = self.get_lane___m128i_u16(a, 4u64);
|
||||||
let __tmp9 = self.saturate_u8(__tmp8);
|
let __tmp9 = self.saturate_u8(__tmp8);
|
||||||
self.set_lane___m128i_u8(dst, 4u64, __tmp9);
|
self.set_lane___m128i_u8(dst, 4u64, __tmp9);
|
||||||
let __tmp10 = self.get_lane___m128i_i16(a, 5u64);
|
let __tmp10 = self.get_lane___m128i_u16(a, 5u64);
|
||||||
let __tmp11 = self.saturate_u8(__tmp10);
|
let __tmp11 = self.saturate_u8(__tmp10);
|
||||||
self.set_lane___m128i_u8(dst, 5u64, __tmp11);
|
self.set_lane___m128i_u8(dst, 5u64, __tmp11);
|
||||||
let __tmp12 = self.get_lane___m128i_i16(a, 6u64);
|
let __tmp12 = self.get_lane___m128i_u16(a, 6u64);
|
||||||
let __tmp13 = self.saturate_u8(__tmp12);
|
let __tmp13 = self.saturate_u8(__tmp12);
|
||||||
self.set_lane___m128i_u8(dst, 6u64, __tmp13);
|
self.set_lane___m128i_u8(dst, 6u64, __tmp13);
|
||||||
let __tmp14 = self.get_lane___m128i_i16(a, 7u64);
|
let __tmp14 = self.get_lane___m128i_u16(a, 7u64);
|
||||||
let __tmp15 = self.saturate_u8(__tmp14);
|
let __tmp15 = self.saturate_u8(__tmp14);
|
||||||
self.set_lane___m128i_u8(dst, 7u64, __tmp15);
|
self.set_lane___m128i_u8(dst, 7u64, __tmp15);
|
||||||
let __tmp16 = self.get_lane___m128i_i16(b, 0u64);
|
let __tmp16 = self.get_lane___m128i_u16(b, 0u64);
|
||||||
let __tmp17 = self.saturate_u8(__tmp16);
|
let __tmp17 = self.saturate_u8(__tmp16);
|
||||||
self.set_lane___m128i_u8(dst, 8u64, __tmp17);
|
self.set_lane___m128i_u8(dst, 8u64, __tmp17);
|
||||||
let __tmp18 = self.get_lane___m128i_i16(b, 1u64);
|
let __tmp18 = self.get_lane___m128i_u16(b, 1u64);
|
||||||
let __tmp19 = self.saturate_u8(__tmp18);
|
let __tmp19 = self.saturate_u8(__tmp18);
|
||||||
self.set_lane___m128i_u8(dst, 9u64, __tmp19);
|
self.set_lane___m128i_u8(dst, 9u64, __tmp19);
|
||||||
let __tmp20 = self.get_lane___m128i_i16(b, 2u64);
|
let __tmp20 = self.get_lane___m128i_u16(b, 2u64);
|
||||||
let __tmp21 = self.saturate_u8(__tmp20);
|
let __tmp21 = self.saturate_u8(__tmp20);
|
||||||
self.set_lane___m128i_u8(dst, 10u64, __tmp21);
|
self.set_lane___m128i_u8(dst, 10u64, __tmp21);
|
||||||
let __tmp22 = self.get_lane___m128i_i16(b, 3u64);
|
let __tmp22 = self.get_lane___m128i_u16(b, 3u64);
|
||||||
let __tmp23 = self.saturate_u8(__tmp22);
|
let __tmp23 = self.saturate_u8(__tmp22);
|
||||||
self.set_lane___m128i_u8(dst, 11u64, __tmp23);
|
self.set_lane___m128i_u8(dst, 11u64, __tmp23);
|
||||||
let __tmp24 = self.get_lane___m128i_i16(b, 4u64);
|
let __tmp24 = self.get_lane___m128i_u16(b, 4u64);
|
||||||
let __tmp25 = self.saturate_u8(__tmp24);
|
let __tmp25 = self.saturate_u8(__tmp24);
|
||||||
self.set_lane___m128i_u8(dst, 12u64, __tmp25);
|
self.set_lane___m128i_u8(dst, 12u64, __tmp25);
|
||||||
let __tmp26 = self.get_lane___m128i_i16(b, 5u64);
|
let __tmp26 = self.get_lane___m128i_u16(b, 5u64);
|
||||||
let __tmp27 = self.saturate_u8(__tmp26);
|
let __tmp27 = self.saturate_u8(__tmp26);
|
||||||
self.set_lane___m128i_u8(dst, 13u64, __tmp27);
|
self.set_lane___m128i_u8(dst, 13u64, __tmp27);
|
||||||
let __tmp28 = self.get_lane___m128i_i16(b, 6u64);
|
let __tmp28 = self.get_lane___m128i_u16(b, 6u64);
|
||||||
let __tmp29 = self.saturate_u8(__tmp28);
|
let __tmp29 = self.saturate_u8(__tmp28);
|
||||||
self.set_lane___m128i_u8(dst, 14u64, __tmp29);
|
self.set_lane___m128i_u8(dst, 14u64, __tmp29);
|
||||||
let __tmp30 = self.get_lane___m128i_i16(b, 7u64);
|
let __tmp30 = self.get_lane___m128i_u16(b, 7u64);
|
||||||
let __tmp31 = self.saturate_u8(__tmp30);
|
let __tmp31 = self.saturate_u8(__tmp30);
|
||||||
self.set_lane___m128i_u8(dst, 15u64, __tmp31);
|
self.set_lane___m128i_u8(dst, 15u64, __tmp31);
|
||||||
}
|
}
|
||||||
|
|
@ -318,52 +223,52 @@ pub trait Intrinsics: super::Core {
|
||||||
a: Self::__m128i,
|
a: Self::__m128i,
|
||||||
b: Self::__m128i,
|
b: Self::__m128i,
|
||||||
) {
|
) {
|
||||||
let __tmp0 = self.get_lane___m128i_i32(a, 0u64);
|
let __tmp0 = self.get_lane___m128i_u32(a, 0u64);
|
||||||
let __tmp1 = self.saturate_u16(__tmp0);
|
let __tmp1 = self.saturate_u16(__tmp0);
|
||||||
self.set_lane___m128i_u16(dst, 0u64, __tmp1);
|
self.set_lane___m128i_u16(dst, 0u64, __tmp1);
|
||||||
let __tmp2 = self.get_lane___m128i_i32(a, 1u64);
|
let __tmp2 = self.get_lane___m128i_u32(a, 1u64);
|
||||||
let __tmp3 = self.saturate_u16(__tmp2);
|
let __tmp3 = self.saturate_u16(__tmp2);
|
||||||
self.set_lane___m128i_u16(dst, 1u64, __tmp3);
|
self.set_lane___m128i_u16(dst, 1u64, __tmp3);
|
||||||
let __tmp4 = self.get_lane___m128i_i32(a, 2u64);
|
let __tmp4 = self.get_lane___m128i_u32(a, 2u64);
|
||||||
let __tmp5 = self.saturate_u16(__tmp4);
|
let __tmp5 = self.saturate_u16(__tmp4);
|
||||||
self.set_lane___m128i_u16(dst, 2u64, __tmp5);
|
self.set_lane___m128i_u16(dst, 2u64, __tmp5);
|
||||||
let __tmp6 = self.get_lane___m128i_i32(a, 3u64);
|
let __tmp6 = self.get_lane___m128i_u32(a, 3u64);
|
||||||
let __tmp7 = self.saturate_u16(__tmp6);
|
let __tmp7 = self.saturate_u16(__tmp6);
|
||||||
self.set_lane___m128i_u16(dst, 3u64, __tmp7);
|
self.set_lane___m128i_u16(dst, 3u64, __tmp7);
|
||||||
let __tmp8 = self.get_lane___m128i_i32(b, 0u64);
|
let __tmp8 = self.get_lane___m128i_u32(b, 0u64);
|
||||||
let __tmp9 = self.saturate_u16(__tmp8);
|
let __tmp9 = self.saturate_u16(__tmp8);
|
||||||
self.set_lane___m128i_u16(dst, 4u64, __tmp9);
|
self.set_lane___m128i_u16(dst, 4u64, __tmp9);
|
||||||
let __tmp10 = self.get_lane___m128i_i32(b, 1u64);
|
let __tmp10 = self.get_lane___m128i_u32(b, 1u64);
|
||||||
let __tmp11 = self.saturate_u16(__tmp10);
|
let __tmp11 = self.saturate_u16(__tmp10);
|
||||||
self.set_lane___m128i_u16(dst, 5u64, __tmp11);
|
self.set_lane___m128i_u16(dst, 5u64, __tmp11);
|
||||||
let __tmp12 = self.get_lane___m128i_i32(b, 2u64);
|
let __tmp12 = self.get_lane___m128i_u32(b, 2u64);
|
||||||
let __tmp13 = self.saturate_u16(__tmp12);
|
let __tmp13 = self.saturate_u16(__tmp12);
|
||||||
self.set_lane___m128i_u16(dst, 6u64, __tmp13);
|
self.set_lane___m128i_u16(dst, 6u64, __tmp13);
|
||||||
let __tmp14 = self.get_lane___m128i_i32(b, 3u64);
|
let __tmp14 = self.get_lane___m128i_u32(b, 3u64);
|
||||||
let __tmp15 = self.saturate_u16(__tmp14);
|
let __tmp15 = self.saturate_u16(__tmp14);
|
||||||
self.set_lane___m128i_u16(dst, 7u64, __tmp15);
|
self.set_lane___m128i_u16(dst, 7u64, __tmp15);
|
||||||
}
|
}
|
||||||
fn _mm_abs_epi8(&mut self, dst: &mut Self::__m128i, a: Self::__m128i) {
|
fn _mm_abs_epi8(&mut self, dst: &mut Self::__m128i, a: Self::__m128i) {
|
||||||
for j in 0u64..=15u64 {
|
for j in 0u64..=15u64 {
|
||||||
let i = (j * 8u64);
|
let i = (j * 8u64);
|
||||||
let __tmp0 = self.get_lane___m128i_i8(a, (i / 8u64));
|
let __tmp0 = self.get_lane___m128i_u8(a, (i / 8u64));
|
||||||
let __tmp1 = self.abs_i8(__tmp0);
|
let __tmp1 = self.abs_u8(__tmp0);
|
||||||
self.set_lane___m128i_u8(dst, (i / 8u64), __tmp1);
|
self.set_lane___m128i_u8(dst, (i / 8u64), __tmp1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn _mm_abs_epi16(&mut self, dst: &mut Self::__m128i, a: Self::__m128i) {
|
fn _mm_abs_epi16(&mut self, dst: &mut Self::__m128i, a: Self::__m128i) {
|
||||||
for j in 0u64..=7u64 {
|
for j in 0u64..=7u64 {
|
||||||
let i = (j * 16u64);
|
let i = (j * 16u64);
|
||||||
let __tmp0 = self.get_lane___m128i_i16(a, (i / 16u64));
|
let __tmp0 = self.get_lane___m128i_u16(a, (i / 16u64));
|
||||||
let __tmp1 = self.abs_i16(__tmp0);
|
let __tmp1 = self.abs_u16(__tmp0);
|
||||||
self.set_lane___m128i_u16(dst, (i / 16u64), __tmp1);
|
self.set_lane___m128i_u16(dst, (i / 16u64), __tmp1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn _mm_abs_epi32(&mut self, dst: &mut Self::__m128i, a: Self::__m128i) {
|
fn _mm_abs_epi32(&mut self, dst: &mut Self::__m128i, a: Self::__m128i) {
|
||||||
for j in 0u64..=3u64 {
|
for j in 0u64..=3u64 {
|
||||||
let i = (j * 32u64);
|
let i = (j * 32u64);
|
||||||
let __tmp0 = self.get_lane___m128i_i32(a, (i / 32u64));
|
let __tmp0 = self.get_lane___m128i_u32(a, (i / 32u64));
|
||||||
let __tmp1 = self.abs_i32(__tmp0);
|
let __tmp1 = self.abs_u32(__tmp0);
|
||||||
self.set_lane___m128i_u32(dst, (i / 32u64), __tmp1);
|
self.set_lane___m128i_u32(dst, (i / 32u64), __tmp1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -371,34 +276,15 @@ pub trait Intrinsics: super::Core {
|
||||||
pub mod soft_arch {
|
pub mod soft_arch {
|
||||||
pub use super::super::soft_arch_types::*;
|
pub use super::super::soft_arch_types::*;
|
||||||
use super::Intrinsics;
|
use super::Intrinsics;
|
||||||
pub fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
|
|
||||||
let mut output = unsafe { std::mem::zeroed() };
|
|
||||||
super::super::ValueCore._mm_add_epi16(&mut output, a, b);
|
|
||||||
output
|
|
||||||
}
|
|
||||||
pub fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
|
|
||||||
let mut output = unsafe { std::mem::zeroed() };
|
|
||||||
super::super::ValueCore._mm_add_epi32(&mut output, a, b);
|
|
||||||
output
|
|
||||||
}
|
|
||||||
pub fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
|
|
||||||
let mut output = unsafe { std::mem::zeroed() };
|
|
||||||
super::super::ValueCore._mm_add_epi64(&mut output, a, b);
|
|
||||||
output
|
|
||||||
}
|
|
||||||
pub fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
|
|
||||||
let mut output = unsafe { std::mem::zeroed() };
|
|
||||||
super::super::ValueCore._mm_adds_epi16(&mut output, a, b);
|
|
||||||
output
|
|
||||||
}
|
|
||||||
pub fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
|
pub fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
|
||||||
let mut output = unsafe { std::mem::zeroed() };
|
let mut output = unsafe { std::mem::zeroed() };
|
||||||
super::super::ValueCore._mm_set_epi64x(&mut output, e1, e0);
|
super::super::ValueCore._mm_set_epi64x(&mut output, e1 as _, e0 as _);
|
||||||
output
|
output
|
||||||
}
|
}
|
||||||
pub fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
|
pub fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
|
||||||
let mut output = unsafe { std::mem::zeroed() };
|
let mut output = unsafe { std::mem::zeroed() };
|
||||||
super::super::ValueCore._mm_setr_epi32(&mut output, e3, e2, e1, e0);
|
super::super::ValueCore
|
||||||
|
._mm_setr_epi32(&mut output, e3 as _, e2 as _, e1 as _, e0 as _);
|
||||||
output
|
output
|
||||||
}
|
}
|
||||||
pub fn _mm_setr_epi16(
|
pub fn _mm_setr_epi16(
|
||||||
|
|
@ -413,7 +299,17 @@ pub mod soft_arch {
|
||||||
) -> __m128i {
|
) -> __m128i {
|
||||||
let mut output = unsafe { std::mem::zeroed() };
|
let mut output = unsafe { std::mem::zeroed() };
|
||||||
super::super::ValueCore
|
super::super::ValueCore
|
||||||
._mm_setr_epi16(&mut output, e7, e6, e5, e4, e3, e2, e1, e0);
|
._mm_setr_epi16(
|
||||||
|
&mut output,
|
||||||
|
e7 as _,
|
||||||
|
e6 as _,
|
||||||
|
e5 as _,
|
||||||
|
e4 as _,
|
||||||
|
e3 as _,
|
||||||
|
e2 as _,
|
||||||
|
e1 as _,
|
||||||
|
e0 as _,
|
||||||
|
);
|
||||||
output
|
output
|
||||||
}
|
}
|
||||||
pub fn _mm_setr_epi8(
|
pub fn _mm_setr_epi8(
|
||||||
|
|
@ -438,58 +334,58 @@ pub mod soft_arch {
|
||||||
super::super::ValueCore
|
super::super::ValueCore
|
||||||
._mm_setr_epi8(
|
._mm_setr_epi8(
|
||||||
&mut output,
|
&mut output,
|
||||||
e15,
|
e15 as _,
|
||||||
e14,
|
e14 as _,
|
||||||
e13,
|
e13 as _,
|
||||||
e12,
|
e12 as _,
|
||||||
e11,
|
e11 as _,
|
||||||
e10,
|
e10 as _,
|
||||||
e9,
|
e9 as _,
|
||||||
e8,
|
e8 as _,
|
||||||
e7,
|
e7 as _,
|
||||||
e6,
|
e6 as _,
|
||||||
e5,
|
e5 as _,
|
||||||
e4,
|
e4 as _,
|
||||||
e3,
|
e3 as _,
|
||||||
e2,
|
e2 as _,
|
||||||
e1,
|
e1 as _,
|
||||||
e0,
|
e0 as _,
|
||||||
);
|
);
|
||||||
output
|
output
|
||||||
}
|
}
|
||||||
pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
|
pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
|
||||||
let mut output = unsafe { std::mem::zeroed() };
|
let mut output = unsafe { std::mem::zeroed() };
|
||||||
super::super::ValueCore._mm_packs_epi16(&mut output, a, b);
|
super::super::ValueCore._mm_packs_epi16(&mut output, a as _, b as _);
|
||||||
output
|
output
|
||||||
}
|
}
|
||||||
pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
|
pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
|
||||||
let mut output = unsafe { std::mem::zeroed() };
|
let mut output = unsafe { std::mem::zeroed() };
|
||||||
super::super::ValueCore._mm_packs_epi32(&mut output, a, b);
|
super::super::ValueCore._mm_packs_epi32(&mut output, a as _, b as _);
|
||||||
output
|
output
|
||||||
}
|
}
|
||||||
pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
|
pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
|
||||||
let mut output = unsafe { std::mem::zeroed() };
|
let mut output = unsafe { std::mem::zeroed() };
|
||||||
super::super::ValueCore._mm_packus_epi16(&mut output, a, b);
|
super::super::ValueCore._mm_packus_epi16(&mut output, a as _, b as _);
|
||||||
output
|
output
|
||||||
}
|
}
|
||||||
pub fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i {
|
pub fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i {
|
||||||
let mut output = unsafe { std::mem::zeroed() };
|
let mut output = unsafe { std::mem::zeroed() };
|
||||||
super::super::ValueCore._mm_packus_epi32(&mut output, a, b);
|
super::super::ValueCore._mm_packus_epi32(&mut output, a as _, b as _);
|
||||||
output
|
output
|
||||||
}
|
}
|
||||||
pub fn _mm_abs_epi8(a: __m128i) -> __m128i {
|
pub fn _mm_abs_epi8(a: __m128i) -> __m128i {
|
||||||
let mut output = unsafe { std::mem::zeroed() };
|
let mut output = unsafe { std::mem::zeroed() };
|
||||||
super::super::ValueCore._mm_abs_epi8(&mut output, a);
|
super::super::ValueCore._mm_abs_epi8(&mut output, a as _);
|
||||||
output
|
output
|
||||||
}
|
}
|
||||||
pub fn _mm_abs_epi16(a: __m128i) -> __m128i {
|
pub fn _mm_abs_epi16(a: __m128i) -> __m128i {
|
||||||
let mut output = unsafe { std::mem::zeroed() };
|
let mut output = unsafe { std::mem::zeroed() };
|
||||||
super::super::ValueCore._mm_abs_epi16(&mut output, a);
|
super::super::ValueCore._mm_abs_epi16(&mut output, a as _);
|
||||||
output
|
output
|
||||||
}
|
}
|
||||||
pub fn _mm_abs_epi32(a: __m128i) -> __m128i {
|
pub fn _mm_abs_epi32(a: __m128i) -> __m128i {
|
||||||
let mut output = unsafe { std::mem::zeroed() };
|
let mut output = unsafe { std::mem::zeroed() };
|
||||||
super::super::ValueCore._mm_abs_epi32(&mut output, a);
|
super::super::ValueCore._mm_abs_epi32(&mut output, a as _);
|
||||||
output
|
output
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -497,128 +393,92 @@ pub mod soft_arch {
|
||||||
pub mod tests {
|
pub mod tests {
|
||||||
use super::super::compare_test_helper::hard_soft_same_128;
|
use super::super::compare_test_helper::hard_soft_same_128;
|
||||||
#[test]
|
#[test]
|
||||||
fn _mm_add_epi16() {
|
|
||||||
hard_soft_same_128! {
|
|
||||||
{ let a = _mm_setr_epi16(- 24391i16, 19541i16, - 16509i16, 7733i16, -
|
|
||||||
15140i16, 30719i16, 16513i16, 22878i16); let b = _mm_setr_epi16(23986i16,
|
|
||||||
27900i16, - 8343i16, - 10648i16, 4841i16, 14610i16, - 17251i16, - 3971i16);
|
|
||||||
_mm_add_epi16(a, b) }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#[test]
|
|
||||||
fn _mm_add_epi32() {
|
|
||||||
hard_soft_same_128! {
|
|
||||||
{ let a = _mm_setr_epi16(22390i16, - 23547i16, 15401i16, 15832i16, -
|
|
||||||
14212i16, - 1286i16, - 18062i16, 22296i16); let b = _mm_setr_epi16(18077i16,
|
|
||||||
23617i16, - 9205i16, 21233i16, - 4332i16, - 31339i16, 23623i16, - 22080i16);
|
|
||||||
_mm_add_epi32(a, b) }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#[test]
|
|
||||||
fn _mm_add_epi64() {
|
|
||||||
hard_soft_same_128! {
|
|
||||||
{ let a = _mm_setr_epi16(- 1436i16, - 30227i16, 8629i16, 10922i16, -
|
|
||||||
16731i16, - 1013i16, - 14310i16, 2892i16); let b = _mm_setr_epi16(- 28568i16,
|
|
||||||
12614i16, 20103i16, 32412i16, - 28704i16, - 27930i16, 4197i16, 1829i16);
|
|
||||||
_mm_add_epi64(a, b) }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#[test]
|
|
||||||
fn _mm_adds_epi16() {
|
|
||||||
hard_soft_same_128! {
|
|
||||||
{ let a = _mm_setr_epi16(9149i16, 18759i16, 30885i16, - 3879i16, 21600i16,
|
|
||||||
24454i16, 23524i16, 10765i16); let b = _mm_setr_epi16(32539i16, 26890i16, -
|
|
||||||
3892i16, 4386i16, 18704i16, 8253i16, - 29217i16, 32013i16); _mm_adds_epi16(a,
|
|
||||||
b) }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#[test]
|
|
||||||
fn _mm_set_epi64x() {
|
fn _mm_set_epi64x() {
|
||||||
hard_soft_same_128! {
|
hard_soft_same_128! {
|
||||||
{ let e1 = - 589376611403916251i64; let e0 = 3902096933100612535i64;
|
{ let e1 = 1041352657357235268i64; let e0 = 1955209120357942897i64;
|
||||||
_mm_set_epi64x(e1, e0) }
|
_mm_set_epi64x(e1, e0) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn _mm_setr_epi32() {
|
fn _mm_setr_epi32() {
|
||||||
hard_soft_same_128! {
|
hard_soft_same_128! {
|
||||||
{ let e3 = 1973077588i32; let e2 = 650443732i32; let e1 = - 2133091191i32;
|
{ let e3 = 1455669123i32; let e2 = 247864885i32; let e1 = 1390920924i32; let
|
||||||
let e0 = - 352824609i32; _mm_setr_epi32(e3, e2, e1, e0) }
|
e0 = 1068333055i32; _mm_setr_epi32(e3, e2, e1, e0) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn _mm_setr_epi16() {
|
fn _mm_setr_epi16() {
|
||||||
hard_soft_same_128! {
|
hard_soft_same_128! {
|
||||||
{ let e7 = - 31392i16; let e6 = - 14015i16; let e5 = - 32565i16; let e4 = -
|
{ let e7 = 16513i16; let e6 = 22878i16; let e5 = 23986i16; let e4 = 27900i16;
|
||||||
11312i16; let e3 = - 4934i16; let e2 = - 19283i16; let e1 = - 27533i16; let
|
let e3 = - 8343i16; let e2 = - 10648i16; let e1 = 4841i16; let e0 = 14610i16;
|
||||||
e0 = - 9939i16; _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) }
|
_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn _mm_setr_epi8() {
|
fn _mm_setr_epi8() {
|
||||||
hard_soft_same_128! {
|
hard_soft_same_128! {
|
||||||
{ let e15 = - 46i8; let e14 = - 46i8; let e13 = - 125i8; let e12 = 81i8; let
|
{ let e15 = - 99i8; let e14 = 125i8; let e13 = 118i8; let e12 = 5i8; let e11
|
||||||
e11 = - 56i8; let e10 = - 75i8; let e9 = 54i8; let e8 = 109i8; let e7 = 29i8;
|
= 41i8; let e10 = - 40i8; let e9 = 124i8; let e8 = - 6i8; let e7 = 114i8; let
|
||||||
let e6 = 41i8; let e5 = - 21i8; let e4 = 39i8; let e3 = 89i8; let e2 = -
|
e6 = 24i8; let e5 = - 99i8; let e4 = 65i8; let e3 = 11i8; let e2 = - 15i8;
|
||||||
36i8; let e1 = - 88i8; let e0 = 11i8; _mm_setr_epi8(e15, e14, e13, e12, e11,
|
let e1 = 20i8; let e0 = - 107i8; _mm_setr_epi8(e15, e14, e13, e12, e11, e10,
|
||||||
e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) }
|
e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn _mm_packs_epi16() {
|
fn _mm_packs_epi16() {
|
||||||
hard_soft_same_128! {
|
hard_soft_same_128! {
|
||||||
{ let a = _mm_setr_epi16(6572i16, - 54i16, 10431i16, - 4614i16, - 1911i16,
|
{ let a = _mm_setr_epi16(23623i16, - 22080i16, - 1436i16, - 30227i16,
|
||||||
17046i16, - 12772i16, - 28109i16); let b = _mm_setr_epi16(7409i16, -
|
8629i16, 10922i16, - 16731i16, - 1013i16); let b = _mm_setr_epi16(- 14310i16,
|
||||||
30136i16, - 28607i16, - 1975i16, 23451i16, - 32657i16, - 28920i16, -
|
2892i16, - 28568i16, 12614i16, 20103i16, 32412i16, - 28704i16, - 27930i16);
|
||||||
2519i16); _mm_packs_epi16(a, b) }
|
_mm_packs_epi16(a, b) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn _mm_packs_epi32() {
|
fn _mm_packs_epi32() {
|
||||||
hard_soft_same_128! {
|
hard_soft_same_128! {
|
||||||
{ let a = _mm_setr_epi16(- 7284i16, 7023i16, - 31688i16, 4770i16, 28846i16, -
|
{ let a = _mm_setr_epi16(4197i16, 1829i16, 9149i16, 18759i16, 30885i16, -
|
||||||
13549i16, 13781i16, - 10474i16); let b = _mm_setr_epi16(12050i16, - 782i16,
|
3879i16, 21600i16, 24454i16); let b = _mm_setr_epi16(23524i16, 10765i16,
|
||||||
8840i16, 8344i16, 9169i16, 303i16, - 6879i16, - 28778i16); _mm_packs_epi32(a,
|
32539i16, 26890i16, - 3892i16, 4386i16, 18704i16, 8253i16);
|
||||||
b) }
|
_mm_packs_epi32(a, b) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn _mm_packus_epi16() {
|
fn _mm_packus_epi16() {
|
||||||
hard_soft_same_128! {
|
hard_soft_same_128! {
|
||||||
{ let a = _mm_setr_epi16(- 11301i16, 10802i16, 18689i16, 12867i16, 18892i16,
|
{ let a = _mm_setr_epi16(- 29217i16, 32013i16, 7448i16, 2172i16, - 14764i16,
|
||||||
20484i16, - 4754i16, - 28358i16); let b = _mm_setr_epi16(27422i16, -
|
- 1068i16, - 25463i16, 21215i16); let b = _mm_setr_epi16(- 31392i16, -
|
||||||
14791i16, - 32685i16, - 4504i16, - 19709i16, 1090i16, 1898i16, 11224i16);
|
14015i16, - 32565i16, - 11312i16, - 4934i16, - 19283i16, - 27533i16, -
|
||||||
_mm_packus_epi16(a, b) }
|
9939i16); _mm_packus_epi16(a, b) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn _mm_packus_epi32() {
|
fn _mm_packus_epi32() {
|
||||||
hard_soft_same_128! {
|
hard_soft_same_128! {
|
||||||
{ let a = _mm_setr_epi16(27569i16, 26879i16, 11743i16, 1055i16, 5327i16, -
|
{ let a = _mm_setr_epi16(- 9518i16, - 29742i16, 10115i16, 1617i16, 13256i16,
|
||||||
1490i16, - 6436i16, 1056i16); let b = _mm_setr_epi16(- 16744i16, 28829i16,
|
- 2379i16, 19254i16, 7533i16); let b = _mm_setr_epi16(- 17891i16, 30761i16,
|
||||||
23772i16, - 31202i16, 9764i16, 16146i16, 29119i16, 1909i16);
|
2539i16, 4135i16, 26713i16, 16348i16, - 21336i16, 3595i16);
|
||||||
_mm_packus_epi32(a, b) }
|
_mm_packus_epi32(a, b) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn _mm_abs_epi8() {
|
fn _mm_abs_epi8() {
|
||||||
hard_soft_same_128! {
|
hard_soft_same_128! {
|
||||||
{ let a = _mm_setr_epi16(- 4803i16, - 23533i16, - 22862i16, - 25389i16, -
|
{ let a = _mm_setr_epi16(6572i16, - 54i16, 10431i16, - 4614i16, - 1911i16,
|
||||||
16117i16, - 21476i16, 30010i16, - 15743i16); _mm_abs_epi8(a) }
|
17046i16, - 12772i16, - 28109i16); _mm_abs_epi8(a) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn _mm_abs_epi16() {
|
fn _mm_abs_epi16() {
|
||||||
hard_soft_same_128! {
|
hard_soft_same_128! {
|
||||||
{ let a = _mm_setr_epi16(- 20689i16, - 11653i16, 22142i16, - 16597i16,
|
{ let a = _mm_setr_epi16(7409i16, - 30136i16, - 28607i16, - 1975i16,
|
||||||
28514i16, - 15735i16, - 6977i16, - 5493i16); _mm_abs_epi16(a) }
|
23451i16, - 32657i16, - 28920i16, - 2519i16); _mm_abs_epi16(a) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn _mm_abs_epi32() {
|
fn _mm_abs_epi32() {
|
||||||
hard_soft_same_128! {
|
hard_soft_same_128! {
|
||||||
{ let a = _mm_setr_epi16(17059i16, 15712i16, 32305i16, - 23877i16, 29411i16,
|
{ let a = _mm_setr_epi16(- 7284i16, 7023i16, - 31688i16, 4770i16, 28846i16, -
|
||||||
- 3868i16, - 10128i16, 25298i16); _mm_abs_epi32(a) }
|
13549i16, 13781i16, - 10474i16); _mm_abs_epi32(a) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -10,41 +10,28 @@ pub trait Core {
|
||||||
type u32: Copy;
|
type u32: Copy;
|
||||||
type u64: Copy;
|
type u64: Copy;
|
||||||
|
|
||||||
type i8: Copy;
|
|
||||||
type i16: Copy;
|
|
||||||
type i32: Copy;
|
|
||||||
type i64: Copy;
|
|
||||||
|
|
||||||
type __m128i: Copy;
|
type __m128i: Copy;
|
||||||
|
|
||||||
fn cast_sign_i8_u8(&mut self, value: Self::i8) -> Self::u8;
|
|
||||||
fn cast_sign_i16_u16(&mut self, value: Self::i16) -> Self::u16;
|
|
||||||
fn cast_sign_i32_u32(&mut self, value: Self::i32) -> Self::u32;
|
|
||||||
fn cast_sign_i64_u64(&mut self, value: Self::i64) -> Self::u64;
|
|
||||||
|
|
||||||
fn get_lane___m128i_u8(&mut self, value: Self::__m128i, idx: u64) -> Self::u8;
|
fn get_lane___m128i_u8(&mut self, value: Self::__m128i, idx: u64) -> Self::u8;
|
||||||
fn get_lane___m128i_i8(&mut self, value: Self::__m128i, idx: u64) -> Self::i8;
|
|
||||||
fn get_lane___m128i_u16(&mut self, value: Self::__m128i, idx: u64) -> Self::u16;
|
fn get_lane___m128i_u16(&mut self, value: Self::__m128i, idx: u64) -> Self::u16;
|
||||||
fn get_lane___m128i_i16(&mut self, value: Self::__m128i, idx: u64) -> Self::i16;
|
|
||||||
fn get_lane___m128i_u32(&mut self, value: Self::__m128i, idx: u64) -> Self::u32;
|
fn get_lane___m128i_u32(&mut self, value: Self::__m128i, idx: u64) -> Self::u32;
|
||||||
fn get_lane___m128i_i32(&mut self, value: Self::__m128i, idx: u64) -> Self::i32;
|
|
||||||
fn get_lane___m128i_u64(&mut self, value: Self::__m128i, idx: u64) -> Self::u64;
|
fn get_lane___m128i_u64(&mut self, value: Self::__m128i, idx: u64) -> Self::u64;
|
||||||
fn get_lane___m128i_i64(&mut self, value: Self::__m128i, idx: u64) -> Self::i64;
|
|
||||||
|
|
||||||
fn set_lane___m128i_u8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u8);
|
fn set_lane___m128i_u8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u8);
|
||||||
fn set_lane___m128i_i8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i8);
|
|
||||||
fn set_lane___m128i_u16(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u16);
|
fn set_lane___m128i_u16(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u16);
|
||||||
fn set_lane___m128i_i16(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i16);
|
|
||||||
fn set_lane___m128i_u32(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u32);
|
fn set_lane___m128i_u32(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u32);
|
||||||
fn set_lane___m128i_i32(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i32);
|
|
||||||
fn set_lane___m128i_u64(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u64);
|
fn set_lane___m128i_u64(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u64);
|
||||||
fn set_lane___m128i_i64(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i64);
|
|
||||||
|
|
||||||
fn saturate8(&mut self, elem: Self::i16) -> Self::i8;
|
fn saturate8(&mut self, elem: Self::u16) -> Self::u8;
|
||||||
fn saturate_u8(&mut self, elem: Self::i16) -> Self::u8;
|
fn saturate_u8(&mut self, elem: Self::u16) -> Self::u8;
|
||||||
fn saturate16(&mut self, elem: Self::i32) -> Self::i16;
|
fn saturate16(&mut self, elem: Self::u32) -> Self::u16;
|
||||||
fn saturate_u16(&mut self, elem: Self::i32) -> Self::u16;
|
fn saturate_u16(&mut self, elem: Self::u32) -> Self::u16;
|
||||||
fn add_u64(&mut self, lhs: Self::u64, rhs: Self::u64) -> Self::u64;
|
fn add_64(&mut self, lhs: Self::u64, rhs: Self::u64) -> Self::u64;
|
||||||
|
|
||||||
|
fn abs_u8(&mut self, x: Self::u8) -> Self::u8;
|
||||||
|
fn abs_u16(&mut self, x: Self::u16) -> Self::u16;
|
||||||
|
fn abs_u32(&mut self, x: Self::u32) -> Self::u32;
|
||||||
|
fn abs_u64(&mut self, x: Self::u64) -> Self::u64;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct ValueCore;
|
pub struct ValueCore;
|
||||||
|
|
@ -55,40 +42,13 @@ impl Core for ValueCore {
|
||||||
type u32 = u32;
|
type u32 = u32;
|
||||||
type u64 = u64;
|
type u64 = u64;
|
||||||
|
|
||||||
type i8 = i8;
|
|
||||||
type i16 = i16;
|
|
||||||
type i32 = i32;
|
|
||||||
type i64 = i64;
|
|
||||||
|
|
||||||
type __m128i = [u8; 16];
|
type __m128i = [u8; 16];
|
||||||
|
|
||||||
////// CAST
|
|
||||||
|
|
||||||
fn cast_sign_i8_u8(&mut self, value: Self::i8) -> Self::u8 {
|
|
||||||
value as _
|
|
||||||
}
|
|
||||||
|
|
||||||
fn cast_sign_i16_u16(&mut self, value: Self::i16) -> Self::u16 {
|
|
||||||
value as _
|
|
||||||
}
|
|
||||||
|
|
||||||
fn cast_sign_i32_u32(&mut self, value: Self::i32) -> Self::u32 {
|
|
||||||
value as _
|
|
||||||
}
|
|
||||||
|
|
||||||
fn cast_sign_i64_u64(&mut self, value: Self::i64) -> Self::u64 {
|
|
||||||
value as _
|
|
||||||
}
|
|
||||||
|
|
||||||
////// GET LANE
|
////// GET LANE
|
||||||
fn get_lane___m128i_u8(&mut self, value: Self::__m128i, idx: u64) -> Self::u8 {
|
fn get_lane___m128i_u8(&mut self, value: Self::__m128i, idx: u64) -> Self::u8 {
|
||||||
value[idx as usize]
|
value[idx as usize]
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_lane___m128i_i8(&mut self, value: Self::__m128i, idx: u64) -> Self::i8 {
|
|
||||||
self.get_lane___m128i_u8(value, idx) as i8
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_lane___m128i_u16(&mut self, value: Self::__m128i, idx: u64) -> Self::u16 {
|
fn get_lane___m128i_u16(&mut self, value: Self::__m128i, idx: u64) -> Self::u16 {
|
||||||
let mut acc = 0;
|
let mut acc = 0;
|
||||||
for i in 0..2 {
|
for i in 0..2 {
|
||||||
|
|
@ -99,10 +59,6 @@ impl Core for ValueCore {
|
||||||
acc
|
acc
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_lane___m128i_i16(&mut self, value: Self::__m128i, idx: u64) -> Self::i16 {
|
|
||||||
self.get_lane___m128i_u16(value, idx) as i16
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_lane___m128i_u32(&mut self, value: Self::__m128i, idx: u64) -> Self::u32 {
|
fn get_lane___m128i_u32(&mut self, value: Self::__m128i, idx: u64) -> Self::u32 {
|
||||||
let mut acc = 0;
|
let mut acc = 0;
|
||||||
for i in 0..4 {
|
for i in 0..4 {
|
||||||
|
|
@ -113,10 +69,6 @@ impl Core for ValueCore {
|
||||||
acc
|
acc
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_lane___m128i_i32(&mut self, value: Self::__m128i, idx: u64) -> Self::i32 {
|
|
||||||
self.get_lane___m128i_u32(value, idx) as i32
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_lane___m128i_u64(&mut self, value: Self::__m128i, idx: u64) -> Self::u64 {
|
fn get_lane___m128i_u64(&mut self, value: Self::__m128i, idx: u64) -> Self::u64 {
|
||||||
let mut acc = 0;
|
let mut acc = 0;
|
||||||
for i in 0..8 {
|
for i in 0..8 {
|
||||||
|
|
@ -127,20 +79,12 @@ impl Core for ValueCore {
|
||||||
acc
|
acc
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_lane___m128i_i64(&mut self, value: Self::__m128i, idx: u64) -> Self::i64 {
|
|
||||||
self.get_lane___m128i_u64(value, idx) as i64
|
|
||||||
}
|
|
||||||
|
|
||||||
////// SET LANE
|
////// SET LANE
|
||||||
|
|
||||||
fn set_lane___m128i_u8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u8) {
|
fn set_lane___m128i_u8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u8) {
|
||||||
place[idx as usize] = value;
|
place[idx as usize] = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn set_lane___m128i_i8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i8) {
|
|
||||||
self.set_lane___m128i_u8(place, idx, value as u8);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn set_lane___m128i_u16(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u16) {
|
fn set_lane___m128i_u16(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u16) {
|
||||||
for i in 0..2 {
|
for i in 0..2 {
|
||||||
let value = ((value >> 8 * i) & 0xFF) as u8;
|
let value = ((value >> 8 * i) & 0xFF) as u8;
|
||||||
|
|
@ -148,10 +92,6 @@ impl Core for ValueCore {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn set_lane___m128i_i16(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i16) {
|
|
||||||
self.set_lane___m128i_u16(place, idx, value as u16);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn set_lane___m128i_u32(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u32) {
|
fn set_lane___m128i_u32(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u32) {
|
||||||
for i in 0..4 {
|
for i in 0..4 {
|
||||||
let value = ((value >> 8 * i) & 0xFF) as u8;
|
let value = ((value >> 8 * i) & 0xFF) as u8;
|
||||||
|
|
@ -159,10 +99,6 @@ impl Core for ValueCore {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn set_lane___m128i_i32(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i32) {
|
|
||||||
self.set_lane___m128i_u32(place, idx, value as u32);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn set_lane___m128i_u64(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u64) {
|
fn set_lane___m128i_u64(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u64) {
|
||||||
for i in 0..8 {
|
for i in 0..8 {
|
||||||
let value = ((value >> 8 * i) & 0xFF) as u8;
|
let value = ((value >> 8 * i) & 0xFF) as u8;
|
||||||
|
|
@ -170,33 +106,45 @@ impl Core for ValueCore {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn set_lane___m128i_i64(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i64) {
|
|
||||||
self.set_lane___m128i_u32(place, idx, value as u32);
|
|
||||||
}
|
|
||||||
|
|
||||||
////// HELPERS
|
////// HELPERS
|
||||||
|
|
||||||
fn saturate8(&mut self, elem: Self::i16) -> Self::i8 {
|
fn saturate8(&mut self, elem: Self::u16) -> Self::u8 {
|
||||||
let clamp = elem.clamp(i8::MIN as i16, i8::MAX as i16);
|
let clamp = (elem as i16).clamp(i8::MIN as i16, i8::MAX as i16);
|
||||||
clamp as i8
|
clamp as i8 as u8
|
||||||
}
|
}
|
||||||
|
|
||||||
fn saturate_u8(&mut self, elem: Self::i16) -> Self::u8 {
|
fn saturate_u8(&mut self, elem: Self::u16) -> Self::u8 {
|
||||||
let clamp = elem.clamp(0, u8::MAX as i16);
|
let clamp = (elem as i16).clamp(0, u8::MAX as i16);
|
||||||
clamp as u8
|
clamp as u8
|
||||||
}
|
}
|
||||||
|
|
||||||
fn saturate16(&mut self, elem: Self::i32) -> Self::i16 {
|
fn saturate16(&mut self, elem: Self::u32) -> Self::u16 {
|
||||||
let clamp = elem.clamp(i16::MIN as i32, i16::MAX as i32);
|
let clamp = (elem as i32).clamp(i16::MIN as i32, i16::MAX as i32);
|
||||||
clamp as i16
|
clamp as i16 as u16
|
||||||
}
|
}
|
||||||
fn saturate_u16(&mut self, elem: Self::i32) -> Self::u16 {
|
fn saturate_u16(&mut self, elem: Self::u32) -> Self::u16 {
|
||||||
let clamp = elem.clamp(0, u16::MAX as i32);
|
let clamp = (elem as i32).clamp(0, u16::MAX as i32);
|
||||||
clamp as u16
|
clamp as u16
|
||||||
}
|
}
|
||||||
fn add_u64(&mut self, lhs: Self::u64, rhs: Self::u64) -> Self::u64 {
|
fn add_64(&mut self, lhs: Self::u64, rhs: Self::u64) -> Self::u64 {
|
||||||
lhs.wrapping_add(rhs)
|
lhs.wrapping_add(rhs)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn abs_u8(&mut self, x: Self::u8) -> Self::u8 {
|
||||||
|
(x as i8).abs() as u8
|
||||||
|
}
|
||||||
|
|
||||||
|
fn abs_u16(&mut self, x: Self::u16) -> Self::u16 {
|
||||||
|
(x as i16).abs() as u16
|
||||||
|
}
|
||||||
|
|
||||||
|
fn abs_u32(&mut self, x: Self::u32) -> Self::u32 {
|
||||||
|
(x as i32).abs() as u32
|
||||||
|
}
|
||||||
|
|
||||||
|
fn abs_u64(&mut self, x: Self::u64) -> Self::u64 {
|
||||||
|
(x as i64).abs() as u64
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mod soft_arch_types {
|
mod soft_arch_types {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue