finish abs

This commit is contained in:
nora 2024-01-06 19:49:40 +01:00
parent a0b75b348b
commit f2c631106a
4 changed files with 547 additions and 120 deletions

View file

@ -6,6 +6,9 @@ use eyre::{bail, Context, OptionExt, Result};
use rand::{rngs::SmallRng, Rng, SeedableRng};
use syn::Block;
// If a function is polymorphic, the argument type will be "mangled" into the name.
const POLYMORPHIC_FNS: &[&str] = &["ABS"];
pub fn generate(intrinsics: &[Intrinsic]) -> Result<syn::File> {
let blanket: syn::ItemImpl = syn::parse_quote! {
impl<C: super::Core> Intrinsics for C {}
@ -24,7 +27,7 @@ pub fn generate(intrinsics: &[Intrinsic]) -> Result<syn::File> {
let test = generate_test_module(intrinsics).wrap_err("generating test module")?;
let mut file: syn::File = syn::parse_quote! {};
let mut file: syn::File = syn::parse_quote! { #![allow(unused_parens)] };
file.items = vec![
blanket.into(),
trait_def.into(),
@ -254,10 +257,11 @@ fn generate_body(instr: &Intrinsic) -> Result<syn::Block> {
}
fn gen_idx(
method_prefix: &str,
lhs: Expr,
idx: Expr,
type_of_ident: &impl Fn(&str) -> Result<VariableType>,
) -> Result<()> {
) -> Result<(syn::Ident, syn::Ident, syn::Expr, VariableType)> {
let Expr::Ident(identifier) = lhs else {
bail!("lhs of indexing must be identifier");
};
@ -286,14 +290,14 @@ fn gen_idx(
},
Expr::Ident(low),
) => {
let Expr::Ident(high_ident) = *rhs else {
bail!("rhs of lhs of + indexing must be ident");
let Expr::Ident(ref high_ident) = *lhs else {
bail!("lhs of lhs of + indexing must be ident, was {rhs:?}");
};
let Expr::Int(high_offset) = *lhs else {
bail!("lhs of lhs of + indexing must be ident");
let Expr::Int(ref high_offset) = *rhs else {
bail!("rhs of lhs of + indexing must be ident, was {lhs:?}");
};
if high_ident != low {
if *high_ident != low {
bail!("{high_ident} != {low}");
}
let size = high_offset + 1;
@ -316,8 +320,8 @@ fn gen_idx(
let rust_type = ty.rust_type();
let identifier = ident(&identifier);
let method = ident(&format!("get_lane_{raw}_{rust_type}"));
Ok(())
let method = ident(&format!("{method_prefix}_lane_{raw}_{rust_type}"));
Ok((identifier, method, lane_idx, ty))
}
fn gen_block(
@ -332,41 +336,9 @@ fn gen_block(
lhs: Expr::Index { lhs, idx },
rhs,
} => {
let Expr::Ident(identifier) = *lhs else {
bail!("lhs of indexing must be identifier");
};
let Expr::Range { left, right } = *idx else {
bail!("idx argument must be range");
};
let Expr::Int(high) = *left else {
bail!("lhs of range must be int");
};
let Expr::Int(low) = *right else {
bail!("rhs of range must be int");
};
if high < low {
bail!("range must be HIGH:LOW, but was {high}:{low}");
}
let (identifier, method, lane_idx, _) = gen_idx("set", *lhs, *idx, type_of_ident)?;
let expr = gen_expr_tmp(&mut rust_stmts, rhs, &type_of_ident)?.0;
let size = high - low + 1; // (inclusive)
if !size.is_power_of_two() {
bail!("indexing size must be power of two");
}
let ty = type_of_ident(&identifier)?;
if size != ty.elem_width {
bail!(
"unsupported not-direct element indexing, size={size}, element size={}",
ty.elem_width
);
}
let expr = gen_expr_tmp(&mut rust_stmts, rhs, &type_of_ident)?;
let raw = &ty.raw_type;
let rust_type = ty.rust_type();
let lane_idx = low / ty.elem_width;
let method = ident(&format!("set_lane_{raw}_{rust_type}"));
let identifier = ident(&identifier);
rust_stmts.push(syn::parse_quote! {
self.#method(#identifier, #lane_idx, #expr);
});
@ -375,7 +347,7 @@ fn gen_block(
lhs: Expr::Ident(lhs),
rhs,
} => {
let rhs = gen_expr_tmp(&mut rust_stmts, rhs, type_of_ident)?;
let rhs = gen_expr_tmp(&mut rust_stmts, rhs, type_of_ident)?.0;
let exists = type_of_ident(&lhs).is_ok();
@ -411,19 +383,21 @@ fn gen_block(
})
}
type RustType = String;
fn gen_expr_tmp(
rust_stmts: &mut Vec<syn::Stmt>,
expr: Expr,
type_of_ident: &impl Fn(&str) -> Result<VariableType>,
) -> Result<syn::Expr> {
) -> Result<(syn::Expr, Option<RustType>)> {
let tmp = |rust_stmts: &mut Vec<syn::Stmt>, inner: syn::Expr| {
let stmt = syn::parse_quote! { let __tmp = #inner; };
rust_stmts.push(stmt);
syn::parse_quote! { __tmp }
};
let result: syn::Expr = match expr {
Expr::Int(int) => syn::parse_quote! { #int },
let (result, ty): (syn::Expr, _) = match expr {
Expr::Int(int) => (syn::parse_quote! { #int }, None),
Expr::Ident(identifier) => {
let ty = type_of_ident(&identifier);
let identifier = ident(&identifier);
@ -435,103 +409,63 @@ fn gen_expr_tmp(
let from = &ty.raw_type;
let to = ty.rust_type();
let method = ident(&format!("cast_sign_{from}_{to}"));
tmp(rust_stmts, syn::parse_quote! { self.#method(#identifier) })
(
tmp(rust_stmts, syn::parse_quote! { self.#method(#identifier) }),
None,
)
}
_ => syn::parse_quote! { #identifier },
_ => (syn::parse_quote! { #identifier }, None),
}
}
Expr::Index { lhs, idx } => {
let Expr::Ident(identifier) = *lhs else {
bail!("lhs of indexing must be identifier");
};
let Expr::Range { left, right } = *idx else {
bail!("idx argument must be range");
};
let ty = type_of_ident(&identifier)?;
let (lane_idx, size): (syn::Expr, _) = match (*left, *right) {
(Expr::Int(high), Expr::Int(low)) => {
if high < low {
bail!("range must be HIGH:LOW, but was {high}:{low}");
}
let size = high - low + 1; // (inclusive)
let lane_idx = low / ty.elem_width;
(syn::parse_quote! { #lane_idx }, size)
}
(
Expr::BinaryOp {
op: BinaryOpKind::Add,
lhs,
rhs,
},
Expr::Ident(low),
) => {
let Expr::Ident(high_ident) = *rhs else {
bail!("rhs of lhs of + indexing must be ident");
};
let Expr::Int(high_offset) = *lhs else {
bail!("lhs of lhs of + indexing must be ident");
};
if high_ident != low {
bail!("{high_ident} != {low}");
}
let size = high_offset + 1;
let identifier = ident(&low);
(syn::parse_quote! { ( #identifier / #size ) }, size)
}
_ => bail!("unknown range indexing arguments"),
};
if !size.is_power_of_two() {
bail!("indexing size must be power of two");
}
if size != ty.elem_width {
bail!(
"unsupported not-direct element indexing, size={size}, element size={}",
ty.elem_width
);
}
let raw = &ty.raw_type;
let rust_type = ty.rust_type();
let identifier = ident(&identifier);
let method = ident(&format!("get_lane_{raw}_{rust_type}"));
tmp(
let (identifier, method, lane_idx, ty) = gen_idx("get", *lhs, *idx, type_of_ident)?;
let expr = tmp(
rust_stmts,
syn::parse_quote! { self.#method(#identifier, #lane_idx) },
)
);
(expr, Some(ty.rust_type()))
}
Expr::Range { .. } => todo!(),
Expr::Call { function, args } => {
let function = ident(&heck::ToSnekCase::to_snek_case(function.as_str()));
let args = args
let (args, arg_tys): (Vec<_>, Vec<_>) = args
.into_iter()
.map(|arg| gen_expr_tmp(rust_stmts, arg, type_of_ident))
.collect::<Result<Vec<syn::Expr>>>()?;
.collect::<Result<Vec<(syn::Expr, Option<RustType>)>>>()?
.into_iter()
.unzip();
tmp(
let argtype = arg_tys
.into_iter()
.map(|argty| argty.expect("argument type unknown for polymorphic function"))
.collect::<Vec<_>>()
.join("_");
let function = if POLYMORPHIC_FNS.contains(&function.as_str()) {
format!("{function}_{argtype}")
} else {
function
};
let function = ident(&heck::ToSnekCase::to_snek_case(function.as_str()));
let expr = tmp(
rust_stmts,
syn::parse_quote! { self.#function( #(#args),* ) },
)
);
(expr, None)
}
Expr::BinaryOp { op, lhs, rhs } => {
let lhs = gen_expr_tmp(rust_stmts, *lhs, type_of_ident)?;
let rhs = gen_expr_tmp(rust_stmts, *rhs, type_of_ident)?;
let lhs = gen_expr_tmp(rust_stmts, *lhs, type_of_ident)?.0;
let rhs = gen_expr_tmp(rust_stmts, *rhs, type_of_ident)?.0;
let token = match op {
BinaryOpKind::Add => quote::quote! { + },
BinaryOpKind::Mul => quote::quote! { * },
};
syn::parse_quote! { ( #lhs #token #rhs ) }
(syn::parse_quote! { ( #lhs #token #rhs ) }, None)
}
};
Ok(result)
Ok((result, ty))
}
fn parse_op(intr: &Intrinsic) -> Result<Vec<Stmt>> {

View file

@ -90,6 +90,7 @@ fn main() -> Result<()> {
const INTRINSICS_GENERATE: &[&str] = &[
"_mm_abs_epi16",
"_mm_abs_epi8",
"_mm_setr_epi8",
"_mm_setr_epi16",
"_mm_setr_epi32",

View file

@ -0,0 +1,484 @@
#![allow(unused_parens)]
impl<C: super::Core> Intrinsics for C {}
pub trait Intrinsics: super::Core {
fn _mm_set_epi64x(&mut self, dst: &mut Self::__m128i, e1: Self::i64, e0: Self::i64) {
let __tmp = self.cast_sign_i64_u64(e0);
self.set_lane___m128i_u64(dst, 0u64, __tmp);
let __tmp = self.cast_sign_i64_u64(e1);
self.set_lane___m128i_u64(dst, 1u64, __tmp);
}
fn _mm_setr_epi32(
&mut self,
dst: &mut Self::__m128i,
e3: Self::i32,
e2: Self::i32,
e1: Self::i32,
e0: Self::i32,
) {
let __tmp = self.cast_sign_i32_u32(e3);
self.set_lane___m128i_u32(dst, 0u64, __tmp);
let __tmp = self.cast_sign_i32_u32(e2);
self.set_lane___m128i_u32(dst, 1u64, __tmp);
let __tmp = self.cast_sign_i32_u32(e1);
self.set_lane___m128i_u32(dst, 2u64, __tmp);
let __tmp = self.cast_sign_i32_u32(e0);
self.set_lane___m128i_u32(dst, 3u64, __tmp);
}
fn _mm_setr_epi16(
&mut self,
dst: &mut Self::__m128i,
e7: Self::i16,
e6: Self::i16,
e5: Self::i16,
e4: Self::i16,
e3: Self::i16,
e2: Self::i16,
e1: Self::i16,
e0: Self::i16,
) {
let __tmp = self.cast_sign_i16_u16(e7);
self.set_lane___m128i_u16(dst, 0u64, __tmp);
let __tmp = self.cast_sign_i16_u16(e6);
self.set_lane___m128i_u16(dst, 1u64, __tmp);
let __tmp = self.cast_sign_i16_u16(e5);
self.set_lane___m128i_u16(dst, 2u64, __tmp);
let __tmp = self.cast_sign_i16_u16(e4);
self.set_lane___m128i_u16(dst, 3u64, __tmp);
let __tmp = self.cast_sign_i16_u16(e3);
self.set_lane___m128i_u16(dst, 4u64, __tmp);
let __tmp = self.cast_sign_i16_u16(e2);
self.set_lane___m128i_u16(dst, 5u64, __tmp);
let __tmp = self.cast_sign_i16_u16(e1);
self.set_lane___m128i_u16(dst, 6u64, __tmp);
let __tmp = self.cast_sign_i16_u16(e0);
self.set_lane___m128i_u16(dst, 7u64, __tmp);
}
fn _mm_setr_epi8(
&mut self,
dst: &mut Self::__m128i,
e15: Self::i8,
e14: Self::i8,
e13: Self::i8,
e12: Self::i8,
e11: Self::i8,
e10: Self::i8,
e9: Self::i8,
e8: Self::i8,
e7: Self::i8,
e6: Self::i8,
e5: Self::i8,
e4: Self::i8,
e3: Self::i8,
e2: Self::i8,
e1: Self::i8,
e0: Self::i8,
) {
let __tmp = self.cast_sign_i8_u8(e15);
self.set_lane___m128i_u8(dst, 0u64, __tmp);
let __tmp = self.cast_sign_i8_u8(e14);
self.set_lane___m128i_u8(dst, 1u64, __tmp);
let __tmp = self.cast_sign_i8_u8(e13);
self.set_lane___m128i_u8(dst, 2u64, __tmp);
let __tmp = self.cast_sign_i8_u8(e12);
self.set_lane___m128i_u8(dst, 3u64, __tmp);
let __tmp = self.cast_sign_i8_u8(e11);
self.set_lane___m128i_u8(dst, 4u64, __tmp);
let __tmp = self.cast_sign_i8_u8(e10);
self.set_lane___m128i_u8(dst, 5u64, __tmp);
let __tmp = self.cast_sign_i8_u8(e9);
self.set_lane___m128i_u8(dst, 6u64, __tmp);
let __tmp = self.cast_sign_i8_u8(e8);
self.set_lane___m128i_u8(dst, 7u64, __tmp);
let __tmp = self.cast_sign_i8_u8(e7);
self.set_lane___m128i_u8(dst, 8u64, __tmp);
let __tmp = self.cast_sign_i8_u8(e6);
self.set_lane___m128i_u8(dst, 9u64, __tmp);
let __tmp = self.cast_sign_i8_u8(e5);
self.set_lane___m128i_u8(dst, 10u64, __tmp);
let __tmp = self.cast_sign_i8_u8(e4);
self.set_lane___m128i_u8(dst, 11u64, __tmp);
let __tmp = self.cast_sign_i8_u8(e3);
self.set_lane___m128i_u8(dst, 12u64, __tmp);
let __tmp = self.cast_sign_i8_u8(e2);
self.set_lane___m128i_u8(dst, 13u64, __tmp);
let __tmp = self.cast_sign_i8_u8(e1);
self.set_lane___m128i_u8(dst, 14u64, __tmp);
let __tmp = self.cast_sign_i8_u8(e0);
self.set_lane___m128i_u8(dst, 15u64, __tmp);
}
fn _mm_packs_epi16(
&mut self,
dst: &mut Self::__m128i,
a: Self::__m128i,
b: Self::__m128i,
) {
let __tmp = self.get_lane___m128i_i16(a, 0u64);
let __tmp = self.saturate8(__tmp);
self.set_lane___m128i_i8(dst, 0u64, __tmp);
let __tmp = self.get_lane___m128i_i16(a, 1u64);
let __tmp = self.saturate8(__tmp);
self.set_lane___m128i_i8(dst, 1u64, __tmp);
let __tmp = self.get_lane___m128i_i16(a, 2u64);
let __tmp = self.saturate8(__tmp);
self.set_lane___m128i_i8(dst, 2u64, __tmp);
let __tmp = self.get_lane___m128i_i16(a, 3u64);
let __tmp = self.saturate8(__tmp);
self.set_lane___m128i_i8(dst, 3u64, __tmp);
let __tmp = self.get_lane___m128i_i16(a, 4u64);
let __tmp = self.saturate8(__tmp);
self.set_lane___m128i_i8(dst, 4u64, __tmp);
let __tmp = self.get_lane___m128i_i16(a, 5u64);
let __tmp = self.saturate8(__tmp);
self.set_lane___m128i_i8(dst, 5u64, __tmp);
let __tmp = self.get_lane___m128i_i16(a, 6u64);
let __tmp = self.saturate8(__tmp);
self.set_lane___m128i_i8(dst, 6u64, __tmp);
let __tmp = self.get_lane___m128i_i16(a, 7u64);
let __tmp = self.saturate8(__tmp);
self.set_lane___m128i_i8(dst, 7u64, __tmp);
let __tmp = self.get_lane___m128i_i16(b, 0u64);
let __tmp = self.saturate8(__tmp);
self.set_lane___m128i_i8(dst, 8u64, __tmp);
let __tmp = self.get_lane___m128i_i16(b, 1u64);
let __tmp = self.saturate8(__tmp);
self.set_lane___m128i_i8(dst, 9u64, __tmp);
let __tmp = self.get_lane___m128i_i16(b, 2u64);
let __tmp = self.saturate8(__tmp);
self.set_lane___m128i_i8(dst, 10u64, __tmp);
let __tmp = self.get_lane___m128i_i16(b, 3u64);
let __tmp = self.saturate8(__tmp);
self.set_lane___m128i_i8(dst, 11u64, __tmp);
let __tmp = self.get_lane___m128i_i16(b, 4u64);
let __tmp = self.saturate8(__tmp);
self.set_lane___m128i_i8(dst, 12u64, __tmp);
let __tmp = self.get_lane___m128i_i16(b, 5u64);
let __tmp = self.saturate8(__tmp);
self.set_lane___m128i_i8(dst, 13u64, __tmp);
let __tmp = self.get_lane___m128i_i16(b, 6u64);
let __tmp = self.saturate8(__tmp);
self.set_lane___m128i_i8(dst, 14u64, __tmp);
let __tmp = self.get_lane___m128i_i16(b, 7u64);
let __tmp = self.saturate8(__tmp);
self.set_lane___m128i_i8(dst, 15u64, __tmp);
}
fn _mm_packs_epi32(
&mut self,
dst: &mut Self::__m128i,
a: Self::__m128i,
b: Self::__m128i,
) {
let __tmp = self.get_lane___m128i_i32(a, 0u64);
let __tmp = self.saturate16(__tmp);
self.set_lane___m128i_i16(dst, 0u64, __tmp);
let __tmp = self.get_lane___m128i_i32(a, 1u64);
let __tmp = self.saturate16(__tmp);
self.set_lane___m128i_i16(dst, 1u64, __tmp);
let __tmp = self.get_lane___m128i_i32(a, 2u64);
let __tmp = self.saturate16(__tmp);
self.set_lane___m128i_i16(dst, 2u64, __tmp);
let __tmp = self.get_lane___m128i_i32(a, 3u64);
let __tmp = self.saturate16(__tmp);
self.set_lane___m128i_i16(dst, 3u64, __tmp);
let __tmp = self.get_lane___m128i_i32(b, 0u64);
let __tmp = self.saturate16(__tmp);
self.set_lane___m128i_i16(dst, 4u64, __tmp);
let __tmp = self.get_lane___m128i_i32(b, 1u64);
let __tmp = self.saturate16(__tmp);
self.set_lane___m128i_i16(dst, 5u64, __tmp);
let __tmp = self.get_lane___m128i_i32(b, 2u64);
let __tmp = self.saturate16(__tmp);
self.set_lane___m128i_i16(dst, 6u64, __tmp);
let __tmp = self.get_lane___m128i_i32(b, 3u64);
let __tmp = self.saturate16(__tmp);
self.set_lane___m128i_i16(dst, 7u64, __tmp);
}
fn _mm_packus_epi16(
&mut self,
dst: &mut Self::__m128i,
a: Self::__m128i,
b: Self::__m128i,
) {
let __tmp = self.get_lane___m128i_i16(a, 0u64);
let __tmp = self.saturate_u8(__tmp);
self.set_lane___m128i_u8(dst, 0u64, __tmp);
let __tmp = self.get_lane___m128i_i16(a, 1u64);
let __tmp = self.saturate_u8(__tmp);
self.set_lane___m128i_u8(dst, 1u64, __tmp);
let __tmp = self.get_lane___m128i_i16(a, 2u64);
let __tmp = self.saturate_u8(__tmp);
self.set_lane___m128i_u8(dst, 2u64, __tmp);
let __tmp = self.get_lane___m128i_i16(a, 3u64);
let __tmp = self.saturate_u8(__tmp);
self.set_lane___m128i_u8(dst, 3u64, __tmp);
let __tmp = self.get_lane___m128i_i16(a, 4u64);
let __tmp = self.saturate_u8(__tmp);
self.set_lane___m128i_u8(dst, 4u64, __tmp);
let __tmp = self.get_lane___m128i_i16(a, 5u64);
let __tmp = self.saturate_u8(__tmp);
self.set_lane___m128i_u8(dst, 5u64, __tmp);
let __tmp = self.get_lane___m128i_i16(a, 6u64);
let __tmp = self.saturate_u8(__tmp);
self.set_lane___m128i_u8(dst, 6u64, __tmp);
let __tmp = self.get_lane___m128i_i16(a, 7u64);
let __tmp = self.saturate_u8(__tmp);
self.set_lane___m128i_u8(dst, 7u64, __tmp);
let __tmp = self.get_lane___m128i_i16(b, 0u64);
let __tmp = self.saturate_u8(__tmp);
self.set_lane___m128i_u8(dst, 8u64, __tmp);
let __tmp = self.get_lane___m128i_i16(b, 1u64);
let __tmp = self.saturate_u8(__tmp);
self.set_lane___m128i_u8(dst, 9u64, __tmp);
let __tmp = self.get_lane___m128i_i16(b, 2u64);
let __tmp = self.saturate_u8(__tmp);
self.set_lane___m128i_u8(dst, 10u64, __tmp);
let __tmp = self.get_lane___m128i_i16(b, 3u64);
let __tmp = self.saturate_u8(__tmp);
self.set_lane___m128i_u8(dst, 11u64, __tmp);
let __tmp = self.get_lane___m128i_i16(b, 4u64);
let __tmp = self.saturate_u8(__tmp);
self.set_lane___m128i_u8(dst, 12u64, __tmp);
let __tmp = self.get_lane___m128i_i16(b, 5u64);
let __tmp = self.saturate_u8(__tmp);
self.set_lane___m128i_u8(dst, 13u64, __tmp);
let __tmp = self.get_lane___m128i_i16(b, 6u64);
let __tmp = self.saturate_u8(__tmp);
self.set_lane___m128i_u8(dst, 14u64, __tmp);
let __tmp = self.get_lane___m128i_i16(b, 7u64);
let __tmp = self.saturate_u8(__tmp);
self.set_lane___m128i_u8(dst, 15u64, __tmp);
}
fn _mm_packus_epi32(
&mut self,
dst: &mut Self::__m128i,
a: Self::__m128i,
b: Self::__m128i,
) {
let __tmp = self.get_lane___m128i_i32(a, 0u64);
let __tmp = self.saturate_u16(__tmp);
self.set_lane___m128i_u16(dst, 0u64, __tmp);
let __tmp = self.get_lane___m128i_i32(a, 1u64);
let __tmp = self.saturate_u16(__tmp);
self.set_lane___m128i_u16(dst, 1u64, __tmp);
let __tmp = self.get_lane___m128i_i32(a, 2u64);
let __tmp = self.saturate_u16(__tmp);
self.set_lane___m128i_u16(dst, 2u64, __tmp);
let __tmp = self.get_lane___m128i_i32(a, 3u64);
let __tmp = self.saturate_u16(__tmp);
self.set_lane___m128i_u16(dst, 3u64, __tmp);
let __tmp = self.get_lane___m128i_i32(b, 0u64);
let __tmp = self.saturate_u16(__tmp);
self.set_lane___m128i_u16(dst, 4u64, __tmp);
let __tmp = self.get_lane___m128i_i32(b, 1u64);
let __tmp = self.saturate_u16(__tmp);
self.set_lane___m128i_u16(dst, 5u64, __tmp);
let __tmp = self.get_lane___m128i_i32(b, 2u64);
let __tmp = self.saturate_u16(__tmp);
self.set_lane___m128i_u16(dst, 6u64, __tmp);
let __tmp = self.get_lane___m128i_i32(b, 3u64);
let __tmp = self.saturate_u16(__tmp);
self.set_lane___m128i_u16(dst, 7u64, __tmp);
}
fn _mm_abs_epi8(&mut self, dst: &mut Self::__m128i, a: Self::__m128i) {
for j in 0u64..=15u64 {
let i = (j * 8u64);
let __tmp = self.get_lane___m128i_i8(a, (i / 8u64));
let __tmp = self.abs_i8(__tmp);
self.set_lane___m128i_u8(dst, (i / 8u64), __tmp);
}
}
fn _mm_abs_epi16(&mut self, dst: &mut Self::__m128i, a: Self::__m128i) {
for j in 0u64..=7u64 {
let i = (j * 16u64);
let __tmp = self.get_lane___m128i_i16(a, (i / 16u64));
let __tmp = self.abs_i16(__tmp);
self.set_lane___m128i_u16(dst, (i / 16u64), __tmp);
}
}
}
pub mod soft_arch {
pub use super::super::soft_arch_types::*;
use super::Intrinsics;
pub fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
let mut output = unsafe { std::mem::zeroed() };
super::super::ValueCore._mm_set_epi64x(&mut output, e1, e0);
output
}
pub fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
let mut output = unsafe { std::mem::zeroed() };
super::super::ValueCore._mm_setr_epi32(&mut output, e3, e2, e1, e0);
output
}
pub fn _mm_setr_epi16(
e7: i16,
e6: i16,
e5: i16,
e4: i16,
e3: i16,
e2: i16,
e1: i16,
e0: i16,
) -> __m128i {
let mut output = unsafe { std::mem::zeroed() };
super::super::ValueCore
._mm_setr_epi16(&mut output, e7, e6, e5, e4, e3, e2, e1, e0);
output
}
pub fn _mm_setr_epi8(
e15: i8,
e14: i8,
e13: i8,
e12: i8,
e11: i8,
e10: i8,
e9: i8,
e8: i8,
e7: i8,
e6: i8,
e5: i8,
e4: i8,
e3: i8,
e2: i8,
e1: i8,
e0: i8,
) -> __m128i {
let mut output = unsafe { std::mem::zeroed() };
super::super::ValueCore
._mm_setr_epi8(
&mut output,
e15,
e14,
e13,
e12,
e11,
e10,
e9,
e8,
e7,
e6,
e5,
e4,
e3,
e2,
e1,
e0,
);
output
}
pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
let mut output = unsafe { std::mem::zeroed() };
super::super::ValueCore._mm_packs_epi16(&mut output, a, b);
output
}
pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
let mut output = unsafe { std::mem::zeroed() };
super::super::ValueCore._mm_packs_epi32(&mut output, a, b);
output
}
pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
let mut output = unsafe { std::mem::zeroed() };
super::super::ValueCore._mm_packus_epi16(&mut output, a, b);
output
}
pub fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i {
let mut output = unsafe { std::mem::zeroed() };
super::super::ValueCore._mm_packus_epi32(&mut output, a, b);
output
}
pub fn _mm_abs_epi8(a: __m128i) -> __m128i {
let mut output = unsafe { std::mem::zeroed() };
super::super::ValueCore._mm_abs_epi8(&mut output, a);
output
}
pub fn _mm_abs_epi16(a: __m128i) -> __m128i {
let mut output = unsafe { std::mem::zeroed() };
super::super::ValueCore._mm_abs_epi16(&mut output, a);
output
}
}
#[cfg(all(test, target_arch = "x86_64"))]
pub mod tests {
use super::super::compare_test_helper::hard_soft_same_128;
#[test]
fn _mm_set_epi64x() {
hard_soft_same_128! {
{ let e1 = 1041352657357235268i64; let e0 = 1955209120357942897i64;
_mm_set_epi64x(e1, e0) }
}
}
#[test]
fn _mm_setr_epi32() {
hard_soft_same_128! {
{ let e3 = 1455669123i32; let e2 = 247864885i32; let e1 = 1390920924i32; let
e0 = 1068333055i32; _mm_setr_epi32(e3, e2, e1, e0) }
}
}
#[test]
fn _mm_setr_epi16() {
hard_soft_same_128! {
{ let e7 = 16513i16; let e6 = 22878i16; let e5 = 23986i16; let e4 = 27900i16;
let e3 = - 8343i16; let e2 = - 10648i16; let e1 = 4841i16; let e0 = 14610i16;
_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) }
}
}
#[test]
fn _mm_setr_epi8() {
hard_soft_same_128! {
{ let e15 = - 99i8; let e14 = 125i8; let e13 = 118i8; let e12 = 5i8; let e11
= 41i8; let e10 = - 40i8; let e9 = 124i8; let e8 = - 6i8; let e7 = 114i8; let
e6 = 24i8; let e5 = - 99i8; let e4 = 65i8; let e3 = 11i8; let e2 = - 15i8;
let e1 = 20i8; let e0 = - 107i8; _mm_setr_epi8(e15, e14, e13, e12, e11, e10,
e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) }
}
}
#[test]
fn _mm_packs_epi16() {
hard_soft_same_128! {
{ let a = _mm_setr_epi16(23623i16, - 22080i16, - 1436i16, - 30227i16,
8629i16, 10922i16, - 16731i16, - 1013i16); let b = _mm_setr_epi16(- 14310i16,
2892i16, - 28568i16, 12614i16, 20103i16, 32412i16, - 28704i16, - 27930i16);
_mm_packs_epi16(a, b) }
}
}
#[test]
fn _mm_packs_epi32() {
hard_soft_same_128! {
{ let a = _mm_setr_epi16(4197i16, 1829i16, 9149i16, 18759i16, 30885i16, -
3879i16, 21600i16, 24454i16); let b = _mm_setr_epi16(23524i16, 10765i16,
32539i16, 26890i16, - 3892i16, 4386i16, 18704i16, 8253i16);
_mm_packs_epi32(a, b) }
}
}
#[test]
fn _mm_packus_epi16() {
hard_soft_same_128! {
{ let a = _mm_setr_epi16(- 29217i16, 32013i16, 7448i16, 2172i16, - 14764i16,
- 1068i16, - 25463i16, 21215i16); let b = _mm_setr_epi16(- 31392i16, -
14015i16, - 32565i16, - 11312i16, - 4934i16, - 19283i16, - 27533i16, -
9939i16); _mm_packus_epi16(a, b) }
}
}
#[test]
fn _mm_packus_epi32() {
hard_soft_same_128! {
{ let a = _mm_setr_epi16(- 9518i16, - 29742i16, 10115i16, 1617i16, 13256i16,
- 2379i16, 19254i16, 7533i16); let b = _mm_setr_epi16(- 17891i16, 30761i16,
2539i16, 4135i16, 26713i16, 16348i16, - 21336i16, 3595i16);
_mm_packus_epi32(a, b) }
}
}
#[test]
fn _mm_abs_epi8() {
hard_soft_same_128! {
{ let a = _mm_setr_epi16(6572i16, - 54i16, 10431i16, - 4614i16, - 1911i16,
17046i16, - 12772i16, - 28109i16); _mm_abs_epi8(a) }
}
}
#[test]
fn _mm_abs_epi16() {
hard_soft_same_128! {
{ let a = _mm_setr_epi16(7409i16, - 30136i16, - 28607i16, - 1975i16,
23451i16, - 32657i16, - 28920i16, - 2519i16); _mm_abs_epi16(a) }
}
}
}

View file

@ -44,6 +44,8 @@ pub trait Core {
fn saturate_u8(&mut self, elem: Self::i16) -> Self::u8;
fn saturate16(&mut self, elem: Self::i32) -> Self::i16;
fn saturate_u16(&mut self, elem: Self::i32) -> Self::u16;
fn abs_i8(&mut self, elem: Self::i8) -> Self::u8;
fn abs_i16(&mut self, elem: Self::i16) -> Self::u16;
}
pub struct ValueCore;
@ -194,6 +196,12 @@ impl Core for ValueCore {
let clamp = elem.clamp(0, u16::MAX as i32);
clamp as u16
}
fn abs_i8(&mut self, elem: Self::i8) -> Self::u8 {
elem.abs() as u8
}
fn abs_i16(&mut self, elem: Self::i16) -> Self::u16 {
elem.abs() as u16
}
}
mod soft_arch_types {