diff --git a/crates/generate/src/generate.rs b/crates/generate/src/generate.rs index f84a227..7524dcf 100644 --- a/crates/generate/src/generate.rs +++ b/crates/generate/src/generate.rs @@ -6,6 +6,9 @@ use eyre::{bail, Context, OptionExt, Result}; use rand::{rngs::SmallRng, Rng, SeedableRng}; use syn::Block; +// If a function is polymorphic, the argument type will be "mangled" into the name. +const POLYMORPHIC_FNS: &[&str] = &["ABS"]; + pub fn generate(intrinsics: &[Intrinsic]) -> Result { let blanket: syn::ItemImpl = syn::parse_quote! { impl Intrinsics for C {} @@ -24,7 +27,7 @@ pub fn generate(intrinsics: &[Intrinsic]) -> Result { let test = generate_test_module(intrinsics).wrap_err("generating test module")?; - let mut file: syn::File = syn::parse_quote! {}; + let mut file: syn::File = syn::parse_quote! { #![allow(unused_parens)] }; file.items = vec![ blanket.into(), trait_def.into(), @@ -254,10 +257,11 @@ fn generate_body(instr: &Intrinsic) -> Result { } fn gen_idx( + method_prefix: &str, lhs: Expr, idx: Expr, type_of_ident: &impl Fn(&str) -> Result, -) -> Result<()> { +) -> Result<(syn::Ident, syn::Ident, syn::Expr, VariableType)> { let Expr::Ident(identifier) = lhs else { bail!("lhs of indexing must be identifier"); }; @@ -286,14 +290,14 @@ fn gen_idx( }, Expr::Ident(low), ) => { - let Expr::Ident(high_ident) = *rhs else { - bail!("rhs of lhs of + indexing must be ident"); + let Expr::Ident(ref high_ident) = *lhs else { + bail!("lhs of lhs of + indexing must be ident, was {rhs:?}"); }; - let Expr::Int(high_offset) = *lhs else { - bail!("lhs of lhs of + indexing must be ident"); + let Expr::Int(ref high_offset) = *rhs else { + bail!("rhs of lhs of + indexing must be ident, was {lhs:?}"); }; - if high_ident != low { + if *high_ident != low { bail!("{high_ident} != {low}"); } let size = high_offset + 1; @@ -316,8 +320,8 @@ fn gen_idx( let rust_type = ty.rust_type(); let identifier = ident(&identifier); - let method = ident(&format!("get_lane_{raw}_{rust_type}")); - Ok(()) + let method = ident(&format!("{method_prefix}_lane_{raw}_{rust_type}")); + Ok((identifier, method, lane_idx, ty)) } fn gen_block( @@ -332,41 +336,9 @@ fn gen_block( lhs: Expr::Index { lhs, idx }, rhs, } => { - let Expr::Ident(identifier) = *lhs else { - bail!("lhs of indexing must be identifier"); - }; - let Expr::Range { left, right } = *idx else { - bail!("idx argument must be range"); - }; - let Expr::Int(high) = *left else { - bail!("lhs of range must be int"); - }; - let Expr::Int(low) = *right else { - bail!("rhs of range must be int"); - }; - if high < low { - bail!("range must be HIGH:LOW, but was {high}:{low}"); - } + let (identifier, method, lane_idx, _) = gen_idx("set", *lhs, *idx, type_of_ident)?; + let expr = gen_expr_tmp(&mut rust_stmts, rhs, &type_of_ident)?.0; - let size = high - low + 1; // (inclusive) - if !size.is_power_of_two() { - bail!("indexing size must be power of two"); - } - - let ty = type_of_ident(&identifier)?; - if size != ty.elem_width { - bail!( - "unsupported not-direct element indexing, size={size}, element size={}", - ty.elem_width - ); - } - let expr = gen_expr_tmp(&mut rust_stmts, rhs, &type_of_ident)?; - let raw = &ty.raw_type; - let rust_type = ty.rust_type(); - let lane_idx = low / ty.elem_width; - - let method = ident(&format!("set_lane_{raw}_{rust_type}")); - let identifier = ident(&identifier); rust_stmts.push(syn::parse_quote! { self.#method(#identifier, #lane_idx, #expr); }); @@ -375,7 +347,7 @@ fn gen_block( lhs: Expr::Ident(lhs), rhs, } => { - let rhs = gen_expr_tmp(&mut rust_stmts, rhs, type_of_ident)?; + let rhs = gen_expr_tmp(&mut rust_stmts, rhs, type_of_ident)?.0; let exists = type_of_ident(&lhs).is_ok(); @@ -411,19 +383,21 @@ fn gen_block( }) } +type RustType = String; + fn gen_expr_tmp( rust_stmts: &mut Vec, expr: Expr, type_of_ident: &impl Fn(&str) -> Result, -) -> Result { +) -> Result<(syn::Expr, Option)> { let tmp = |rust_stmts: &mut Vec, inner: syn::Expr| { let stmt = syn::parse_quote! { let __tmp = #inner; }; rust_stmts.push(stmt); syn::parse_quote! { __tmp } }; - let result: syn::Expr = match expr { - Expr::Int(int) => syn::parse_quote! { #int }, + let (result, ty): (syn::Expr, _) = match expr { + Expr::Int(int) => (syn::parse_quote! { #int }, None), Expr::Ident(identifier) => { let ty = type_of_ident(&identifier); let identifier = ident(&identifier); @@ -435,103 +409,63 @@ fn gen_expr_tmp( let from = &ty.raw_type; let to = ty.rust_type(); let method = ident(&format!("cast_sign_{from}_{to}")); - tmp(rust_stmts, syn::parse_quote! { self.#method(#identifier) }) + ( + tmp(rust_stmts, syn::parse_quote! { self.#method(#identifier) }), + None, + ) } - _ => syn::parse_quote! { #identifier }, + _ => (syn::parse_quote! { #identifier }, None), } } Expr::Index { lhs, idx } => { - let Expr::Ident(identifier) = *lhs else { - bail!("lhs of indexing must be identifier"); - }; - let Expr::Range { left, right } = *idx else { - bail!("idx argument must be range"); - }; - - let ty = type_of_ident(&identifier)?; - - let (lane_idx, size): (syn::Expr, _) = match (*left, *right) { - (Expr::Int(high), Expr::Int(low)) => { - if high < low { - bail!("range must be HIGH:LOW, but was {high}:{low}"); - } - let size = high - low + 1; // (inclusive) - - let lane_idx = low / ty.elem_width; - - (syn::parse_quote! { #lane_idx }, size) - } - ( - Expr::BinaryOp { - op: BinaryOpKind::Add, - lhs, - rhs, - }, - Expr::Ident(low), - ) => { - let Expr::Ident(high_ident) = *rhs else { - bail!("rhs of lhs of + indexing must be ident"); - }; - let Expr::Int(high_offset) = *lhs else { - bail!("lhs of lhs of + indexing must be ident"); - }; - - if high_ident != low { - bail!("{high_ident} != {low}"); - } - let size = high_offset + 1; - let identifier = ident(&low); - (syn::parse_quote! { ( #identifier / #size ) }, size) - } - _ => bail!("unknown range indexing arguments"), - }; - - if !size.is_power_of_two() { - bail!("indexing size must be power of two"); - } - if size != ty.elem_width { - bail!( - "unsupported not-direct element indexing, size={size}, element size={}", - ty.elem_width - ); - } - let raw = &ty.raw_type; - let rust_type = ty.rust_type(); - - let identifier = ident(&identifier); - let method = ident(&format!("get_lane_{raw}_{rust_type}")); - - tmp( + let (identifier, method, lane_idx, ty) = gen_idx("get", *lhs, *idx, type_of_ident)?; + let expr = tmp( rust_stmts, syn::parse_quote! { self.#method(#identifier, #lane_idx) }, - ) + ); + (expr, Some(ty.rust_type())) } Expr::Range { .. } => todo!(), Expr::Call { function, args } => { - let function = ident(&heck::ToSnekCase::to_snek_case(function.as_str())); - let args = args + let (args, arg_tys): (Vec<_>, Vec<_>) = args .into_iter() .map(|arg| gen_expr_tmp(rust_stmts, arg, type_of_ident)) - .collect::>>()?; + .collect::)>>>()? + .into_iter() + .unzip(); - tmp( + let argtype = arg_tys + .into_iter() + .map(|argty| argty.expect("argument type unknown for polymorphic function")) + .collect::>() + .join("_"); + + let function = if POLYMORPHIC_FNS.contains(&function.as_str()) { + format!("{function}_{argtype}") + } else { + function + }; + + let function = ident(&heck::ToSnekCase::to_snek_case(function.as_str())); + let expr = tmp( rust_stmts, syn::parse_quote! { self.#function( #(#args),* ) }, - ) + ); + (expr, None) } Expr::BinaryOp { op, lhs, rhs } => { - let lhs = gen_expr_tmp(rust_stmts, *lhs, type_of_ident)?; - let rhs = gen_expr_tmp(rust_stmts, *rhs, type_of_ident)?; + let lhs = gen_expr_tmp(rust_stmts, *lhs, type_of_ident)?.0; + let rhs = gen_expr_tmp(rust_stmts, *rhs, type_of_ident)?.0; let token = match op { BinaryOpKind::Add => quote::quote! { + }, BinaryOpKind::Mul => quote::quote! { * }, }; - syn::parse_quote! { ( #lhs #token #rhs ) } + (syn::parse_quote! { ( #lhs #token #rhs ) }, None) } }; - Ok(result) + Ok((result, ty)) } fn parse_op(intr: &Intrinsic) -> Result> { diff --git a/crates/generate/src/main.rs b/crates/generate/src/main.rs index 9b4ec37..5bec738 100644 --- a/crates/generate/src/main.rs +++ b/crates/generate/src/main.rs @@ -90,6 +90,7 @@ fn main() -> Result<()> { const INTRINSICS_GENERATE: &[&str] = &[ "_mm_abs_epi16", + "_mm_abs_epi8", "_mm_setr_epi8", "_mm_setr_epi16", "_mm_setr_epi32", diff --git a/crates/intringen/src/x86/generated.rs b/crates/intringen/src/x86/generated.rs index e69de29..3af63de 100644 --- a/crates/intringen/src/x86/generated.rs +++ b/crates/intringen/src/x86/generated.rs @@ -0,0 +1,484 @@ +#![allow(unused_parens)] +impl Intrinsics for C {} +pub trait Intrinsics: super::Core { + fn _mm_set_epi64x(&mut self, dst: &mut Self::__m128i, e1: Self::i64, e0: Self::i64) { + let __tmp = self.cast_sign_i64_u64(e0); + self.set_lane___m128i_u64(dst, 0u64, __tmp); + let __tmp = self.cast_sign_i64_u64(e1); + self.set_lane___m128i_u64(dst, 1u64, __tmp); + } + fn _mm_setr_epi32( + &mut self, + dst: &mut Self::__m128i, + e3: Self::i32, + e2: Self::i32, + e1: Self::i32, + e0: Self::i32, + ) { + let __tmp = self.cast_sign_i32_u32(e3); + self.set_lane___m128i_u32(dst, 0u64, __tmp); + let __tmp = self.cast_sign_i32_u32(e2); + self.set_lane___m128i_u32(dst, 1u64, __tmp); + let __tmp = self.cast_sign_i32_u32(e1); + self.set_lane___m128i_u32(dst, 2u64, __tmp); + let __tmp = self.cast_sign_i32_u32(e0); + self.set_lane___m128i_u32(dst, 3u64, __tmp); + } + fn _mm_setr_epi16( + &mut self, + dst: &mut Self::__m128i, + e7: Self::i16, + e6: Self::i16, + e5: Self::i16, + e4: Self::i16, + e3: Self::i16, + e2: Self::i16, + e1: Self::i16, + e0: Self::i16, + ) { + let __tmp = self.cast_sign_i16_u16(e7); + self.set_lane___m128i_u16(dst, 0u64, __tmp); + let __tmp = self.cast_sign_i16_u16(e6); + self.set_lane___m128i_u16(dst, 1u64, __tmp); + let __tmp = self.cast_sign_i16_u16(e5); + self.set_lane___m128i_u16(dst, 2u64, __tmp); + let __tmp = self.cast_sign_i16_u16(e4); + self.set_lane___m128i_u16(dst, 3u64, __tmp); + let __tmp = self.cast_sign_i16_u16(e3); + self.set_lane___m128i_u16(dst, 4u64, __tmp); + let __tmp = self.cast_sign_i16_u16(e2); + self.set_lane___m128i_u16(dst, 5u64, __tmp); + let __tmp = self.cast_sign_i16_u16(e1); + self.set_lane___m128i_u16(dst, 6u64, __tmp); + let __tmp = self.cast_sign_i16_u16(e0); + self.set_lane___m128i_u16(dst, 7u64, __tmp); + } + fn _mm_setr_epi8( + &mut self, + dst: &mut Self::__m128i, + e15: Self::i8, + e14: Self::i8, + e13: Self::i8, + e12: Self::i8, + e11: Self::i8, + e10: Self::i8, + e9: Self::i8, + e8: Self::i8, + e7: Self::i8, + e6: Self::i8, + e5: Self::i8, + e4: Self::i8, + e3: Self::i8, + e2: Self::i8, + e1: Self::i8, + e0: Self::i8, + ) { + let __tmp = self.cast_sign_i8_u8(e15); + self.set_lane___m128i_u8(dst, 0u64, __tmp); + let __tmp = self.cast_sign_i8_u8(e14); + self.set_lane___m128i_u8(dst, 1u64, __tmp); + let __tmp = self.cast_sign_i8_u8(e13); + self.set_lane___m128i_u8(dst, 2u64, __tmp); + let __tmp = self.cast_sign_i8_u8(e12); + self.set_lane___m128i_u8(dst, 3u64, __tmp); + let __tmp = self.cast_sign_i8_u8(e11); + self.set_lane___m128i_u8(dst, 4u64, __tmp); + let __tmp = self.cast_sign_i8_u8(e10); + self.set_lane___m128i_u8(dst, 5u64, __tmp); + let __tmp = self.cast_sign_i8_u8(e9); + self.set_lane___m128i_u8(dst, 6u64, __tmp); + let __tmp = self.cast_sign_i8_u8(e8); + self.set_lane___m128i_u8(dst, 7u64, __tmp); + let __tmp = self.cast_sign_i8_u8(e7); + self.set_lane___m128i_u8(dst, 8u64, __tmp); + let __tmp = self.cast_sign_i8_u8(e6); + self.set_lane___m128i_u8(dst, 9u64, __tmp); + let __tmp = self.cast_sign_i8_u8(e5); + self.set_lane___m128i_u8(dst, 10u64, __tmp); + let __tmp = self.cast_sign_i8_u8(e4); + self.set_lane___m128i_u8(dst, 11u64, __tmp); + let __tmp = self.cast_sign_i8_u8(e3); + self.set_lane___m128i_u8(dst, 12u64, __tmp); + let __tmp = self.cast_sign_i8_u8(e2); + self.set_lane___m128i_u8(dst, 13u64, __tmp); + let __tmp = self.cast_sign_i8_u8(e1); + self.set_lane___m128i_u8(dst, 14u64, __tmp); + let __tmp = self.cast_sign_i8_u8(e0); + self.set_lane___m128i_u8(dst, 15u64, __tmp); + } + fn _mm_packs_epi16( + &mut self, + dst: &mut Self::__m128i, + a: Self::__m128i, + b: Self::__m128i, + ) { + let __tmp = self.get_lane___m128i_i16(a, 0u64); + let __tmp = self.saturate8(__tmp); + self.set_lane___m128i_i8(dst, 0u64, __tmp); + let __tmp = self.get_lane___m128i_i16(a, 1u64); + let __tmp = self.saturate8(__tmp); + self.set_lane___m128i_i8(dst, 1u64, __tmp); + let __tmp = self.get_lane___m128i_i16(a, 2u64); + let __tmp = self.saturate8(__tmp); + self.set_lane___m128i_i8(dst, 2u64, __tmp); + let __tmp = self.get_lane___m128i_i16(a, 3u64); + let __tmp = self.saturate8(__tmp); + self.set_lane___m128i_i8(dst, 3u64, __tmp); + let __tmp = self.get_lane___m128i_i16(a, 4u64); + let __tmp = self.saturate8(__tmp); + self.set_lane___m128i_i8(dst, 4u64, __tmp); + let __tmp = self.get_lane___m128i_i16(a, 5u64); + let __tmp = self.saturate8(__tmp); + self.set_lane___m128i_i8(dst, 5u64, __tmp); + let __tmp = self.get_lane___m128i_i16(a, 6u64); + let __tmp = self.saturate8(__tmp); + self.set_lane___m128i_i8(dst, 6u64, __tmp); + let __tmp = self.get_lane___m128i_i16(a, 7u64); + let __tmp = self.saturate8(__tmp); + self.set_lane___m128i_i8(dst, 7u64, __tmp); + let __tmp = self.get_lane___m128i_i16(b, 0u64); + let __tmp = self.saturate8(__tmp); + self.set_lane___m128i_i8(dst, 8u64, __tmp); + let __tmp = self.get_lane___m128i_i16(b, 1u64); + let __tmp = self.saturate8(__tmp); + self.set_lane___m128i_i8(dst, 9u64, __tmp); + let __tmp = self.get_lane___m128i_i16(b, 2u64); + let __tmp = self.saturate8(__tmp); + self.set_lane___m128i_i8(dst, 10u64, __tmp); + let __tmp = self.get_lane___m128i_i16(b, 3u64); + let __tmp = self.saturate8(__tmp); + self.set_lane___m128i_i8(dst, 11u64, __tmp); + let __tmp = self.get_lane___m128i_i16(b, 4u64); + let __tmp = self.saturate8(__tmp); + self.set_lane___m128i_i8(dst, 12u64, __tmp); + let __tmp = self.get_lane___m128i_i16(b, 5u64); + let __tmp = self.saturate8(__tmp); + self.set_lane___m128i_i8(dst, 13u64, __tmp); + let __tmp = self.get_lane___m128i_i16(b, 6u64); + let __tmp = self.saturate8(__tmp); + self.set_lane___m128i_i8(dst, 14u64, __tmp); + let __tmp = self.get_lane___m128i_i16(b, 7u64); + let __tmp = self.saturate8(__tmp); + self.set_lane___m128i_i8(dst, 15u64, __tmp); + } + fn _mm_packs_epi32( + &mut self, + dst: &mut Self::__m128i, + a: Self::__m128i, + b: Self::__m128i, + ) { + let __tmp = self.get_lane___m128i_i32(a, 0u64); + let __tmp = self.saturate16(__tmp); + self.set_lane___m128i_i16(dst, 0u64, __tmp); + let __tmp = self.get_lane___m128i_i32(a, 1u64); + let __tmp = self.saturate16(__tmp); + self.set_lane___m128i_i16(dst, 1u64, __tmp); + let __tmp = self.get_lane___m128i_i32(a, 2u64); + let __tmp = self.saturate16(__tmp); + self.set_lane___m128i_i16(dst, 2u64, __tmp); + let __tmp = self.get_lane___m128i_i32(a, 3u64); + let __tmp = self.saturate16(__tmp); + self.set_lane___m128i_i16(dst, 3u64, __tmp); + let __tmp = self.get_lane___m128i_i32(b, 0u64); + let __tmp = self.saturate16(__tmp); + self.set_lane___m128i_i16(dst, 4u64, __tmp); + let __tmp = self.get_lane___m128i_i32(b, 1u64); + let __tmp = self.saturate16(__tmp); + self.set_lane___m128i_i16(dst, 5u64, __tmp); + let __tmp = self.get_lane___m128i_i32(b, 2u64); + let __tmp = self.saturate16(__tmp); + self.set_lane___m128i_i16(dst, 6u64, __tmp); + let __tmp = self.get_lane___m128i_i32(b, 3u64); + let __tmp = self.saturate16(__tmp); + self.set_lane___m128i_i16(dst, 7u64, __tmp); + } + fn _mm_packus_epi16( + &mut self, + dst: &mut Self::__m128i, + a: Self::__m128i, + b: Self::__m128i, + ) { + let __tmp = self.get_lane___m128i_i16(a, 0u64); + let __tmp = self.saturate_u8(__tmp); + self.set_lane___m128i_u8(dst, 0u64, __tmp); + let __tmp = self.get_lane___m128i_i16(a, 1u64); + let __tmp = self.saturate_u8(__tmp); + self.set_lane___m128i_u8(dst, 1u64, __tmp); + let __tmp = self.get_lane___m128i_i16(a, 2u64); + let __tmp = self.saturate_u8(__tmp); + self.set_lane___m128i_u8(dst, 2u64, __tmp); + let __tmp = self.get_lane___m128i_i16(a, 3u64); + let __tmp = self.saturate_u8(__tmp); + self.set_lane___m128i_u8(dst, 3u64, __tmp); + let __tmp = self.get_lane___m128i_i16(a, 4u64); + let __tmp = self.saturate_u8(__tmp); + self.set_lane___m128i_u8(dst, 4u64, __tmp); + let __tmp = self.get_lane___m128i_i16(a, 5u64); + let __tmp = self.saturate_u8(__tmp); + self.set_lane___m128i_u8(dst, 5u64, __tmp); + let __tmp = self.get_lane___m128i_i16(a, 6u64); + let __tmp = self.saturate_u8(__tmp); + self.set_lane___m128i_u8(dst, 6u64, __tmp); + let __tmp = self.get_lane___m128i_i16(a, 7u64); + let __tmp = self.saturate_u8(__tmp); + self.set_lane___m128i_u8(dst, 7u64, __tmp); + let __tmp = self.get_lane___m128i_i16(b, 0u64); + let __tmp = self.saturate_u8(__tmp); + self.set_lane___m128i_u8(dst, 8u64, __tmp); + let __tmp = self.get_lane___m128i_i16(b, 1u64); + let __tmp = self.saturate_u8(__tmp); + self.set_lane___m128i_u8(dst, 9u64, __tmp); + let __tmp = self.get_lane___m128i_i16(b, 2u64); + let __tmp = self.saturate_u8(__tmp); + self.set_lane___m128i_u8(dst, 10u64, __tmp); + let __tmp = self.get_lane___m128i_i16(b, 3u64); + let __tmp = self.saturate_u8(__tmp); + self.set_lane___m128i_u8(dst, 11u64, __tmp); + let __tmp = self.get_lane___m128i_i16(b, 4u64); + let __tmp = self.saturate_u8(__tmp); + self.set_lane___m128i_u8(dst, 12u64, __tmp); + let __tmp = self.get_lane___m128i_i16(b, 5u64); + let __tmp = self.saturate_u8(__tmp); + self.set_lane___m128i_u8(dst, 13u64, __tmp); + let __tmp = self.get_lane___m128i_i16(b, 6u64); + let __tmp = self.saturate_u8(__tmp); + self.set_lane___m128i_u8(dst, 14u64, __tmp); + let __tmp = self.get_lane___m128i_i16(b, 7u64); + let __tmp = self.saturate_u8(__tmp); + self.set_lane___m128i_u8(dst, 15u64, __tmp); + } + fn _mm_packus_epi32( + &mut self, + dst: &mut Self::__m128i, + a: Self::__m128i, + b: Self::__m128i, + ) { + let __tmp = self.get_lane___m128i_i32(a, 0u64); + let __tmp = self.saturate_u16(__tmp); + self.set_lane___m128i_u16(dst, 0u64, __tmp); + let __tmp = self.get_lane___m128i_i32(a, 1u64); + let __tmp = self.saturate_u16(__tmp); + self.set_lane___m128i_u16(dst, 1u64, __tmp); + let __tmp = self.get_lane___m128i_i32(a, 2u64); + let __tmp = self.saturate_u16(__tmp); + self.set_lane___m128i_u16(dst, 2u64, __tmp); + let __tmp = self.get_lane___m128i_i32(a, 3u64); + let __tmp = self.saturate_u16(__tmp); + self.set_lane___m128i_u16(dst, 3u64, __tmp); + let __tmp = self.get_lane___m128i_i32(b, 0u64); + let __tmp = self.saturate_u16(__tmp); + self.set_lane___m128i_u16(dst, 4u64, __tmp); + let __tmp = self.get_lane___m128i_i32(b, 1u64); + let __tmp = self.saturate_u16(__tmp); + self.set_lane___m128i_u16(dst, 5u64, __tmp); + let __tmp = self.get_lane___m128i_i32(b, 2u64); + let __tmp = self.saturate_u16(__tmp); + self.set_lane___m128i_u16(dst, 6u64, __tmp); + let __tmp = self.get_lane___m128i_i32(b, 3u64); + let __tmp = self.saturate_u16(__tmp); + self.set_lane___m128i_u16(dst, 7u64, __tmp); + } + fn _mm_abs_epi8(&mut self, dst: &mut Self::__m128i, a: Self::__m128i) { + for j in 0u64..=15u64 { + let i = (j * 8u64); + let __tmp = self.get_lane___m128i_i8(a, (i / 8u64)); + let __tmp = self.abs_i8(__tmp); + self.set_lane___m128i_u8(dst, (i / 8u64), __tmp); + } + } + fn _mm_abs_epi16(&mut self, dst: &mut Self::__m128i, a: Self::__m128i) { + for j in 0u64..=7u64 { + let i = (j * 16u64); + let __tmp = self.get_lane___m128i_i16(a, (i / 16u64)); + let __tmp = self.abs_i16(__tmp); + self.set_lane___m128i_u16(dst, (i / 16u64), __tmp); + } + } +} +pub mod soft_arch { + pub use super::super::soft_arch_types::*; + use super::Intrinsics; + pub fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i { + let mut output = unsafe { std::mem::zeroed() }; + super::super::ValueCore._mm_set_epi64x(&mut output, e1, e0); + output + } + pub fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { + let mut output = unsafe { std::mem::zeroed() }; + super::super::ValueCore._mm_setr_epi32(&mut output, e3, e2, e1, e0); + output + } + pub fn _mm_setr_epi16( + e7: i16, + e6: i16, + e5: i16, + e4: i16, + e3: i16, + e2: i16, + e1: i16, + e0: i16, + ) -> __m128i { + let mut output = unsafe { std::mem::zeroed() }; + super::super::ValueCore + ._mm_setr_epi16(&mut output, e7, e6, e5, e4, e3, e2, e1, e0); + output + } + pub fn _mm_setr_epi8( + e15: i8, + e14: i8, + e13: i8, + e12: i8, + e11: i8, + e10: i8, + e9: i8, + e8: i8, + e7: i8, + e6: i8, + e5: i8, + e4: i8, + e3: i8, + e2: i8, + e1: i8, + e0: i8, + ) -> __m128i { + let mut output = unsafe { std::mem::zeroed() }; + super::super::ValueCore + ._mm_setr_epi8( + &mut output, + e15, + e14, + e13, + e12, + e11, + e10, + e9, + e8, + e7, + e6, + e5, + e4, + e3, + e2, + e1, + e0, + ); + output + } + pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i { + let mut output = unsafe { std::mem::zeroed() }; + super::super::ValueCore._mm_packs_epi16(&mut output, a, b); + output + } + pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i { + let mut output = unsafe { std::mem::zeroed() }; + super::super::ValueCore._mm_packs_epi32(&mut output, a, b); + output + } + pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i { + let mut output = unsafe { std::mem::zeroed() }; + super::super::ValueCore._mm_packus_epi16(&mut output, a, b); + output + } + pub fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i { + let mut output = unsafe { std::mem::zeroed() }; + super::super::ValueCore._mm_packus_epi32(&mut output, a, b); + output + } + pub fn _mm_abs_epi8(a: __m128i) -> __m128i { + let mut output = unsafe { std::mem::zeroed() }; + super::super::ValueCore._mm_abs_epi8(&mut output, a); + output + } + pub fn _mm_abs_epi16(a: __m128i) -> __m128i { + let mut output = unsafe { std::mem::zeroed() }; + super::super::ValueCore._mm_abs_epi16(&mut output, a); + output + } +} +#[cfg(all(test, target_arch = "x86_64"))] +pub mod tests { + use super::super::compare_test_helper::hard_soft_same_128; + #[test] + fn _mm_set_epi64x() { + hard_soft_same_128! { + { let e1 = 1041352657357235268i64; let e0 = 1955209120357942897i64; + _mm_set_epi64x(e1, e0) } + } + } + #[test] + fn _mm_setr_epi32() { + hard_soft_same_128! { + { let e3 = 1455669123i32; let e2 = 247864885i32; let e1 = 1390920924i32; let + e0 = 1068333055i32; _mm_setr_epi32(e3, e2, e1, e0) } + } + } + #[test] + fn _mm_setr_epi16() { + hard_soft_same_128! { + { let e7 = 16513i16; let e6 = 22878i16; let e5 = 23986i16; let e4 = 27900i16; + let e3 = - 8343i16; let e2 = - 10648i16; let e1 = 4841i16; let e0 = 14610i16; + _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) } + } + } + #[test] + fn _mm_setr_epi8() { + hard_soft_same_128! { + { let e15 = - 99i8; let e14 = 125i8; let e13 = 118i8; let e12 = 5i8; let e11 + = 41i8; let e10 = - 40i8; let e9 = 124i8; let e8 = - 6i8; let e7 = 114i8; let + e6 = 24i8; let e5 = - 99i8; let e4 = 65i8; let e3 = 11i8; let e2 = - 15i8; + let e1 = 20i8; let e0 = - 107i8; _mm_setr_epi8(e15, e14, e13, e12, e11, e10, + e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) } + } + } + #[test] + fn _mm_packs_epi16() { + hard_soft_same_128! { + { let a = _mm_setr_epi16(23623i16, - 22080i16, - 1436i16, - 30227i16, + 8629i16, 10922i16, - 16731i16, - 1013i16); let b = _mm_setr_epi16(- 14310i16, + 2892i16, - 28568i16, 12614i16, 20103i16, 32412i16, - 28704i16, - 27930i16); + _mm_packs_epi16(a, b) } + } + } + #[test] + fn _mm_packs_epi32() { + hard_soft_same_128! { + { let a = _mm_setr_epi16(4197i16, 1829i16, 9149i16, 18759i16, 30885i16, - + 3879i16, 21600i16, 24454i16); let b = _mm_setr_epi16(23524i16, 10765i16, + 32539i16, 26890i16, - 3892i16, 4386i16, 18704i16, 8253i16); + _mm_packs_epi32(a, b) } + } + } + #[test] + fn _mm_packus_epi16() { + hard_soft_same_128! { + { let a = _mm_setr_epi16(- 29217i16, 32013i16, 7448i16, 2172i16, - 14764i16, + - 1068i16, - 25463i16, 21215i16); let b = _mm_setr_epi16(- 31392i16, - + 14015i16, - 32565i16, - 11312i16, - 4934i16, - 19283i16, - 27533i16, - + 9939i16); _mm_packus_epi16(a, b) } + } + } + #[test] + fn _mm_packus_epi32() { + hard_soft_same_128! { + { let a = _mm_setr_epi16(- 9518i16, - 29742i16, 10115i16, 1617i16, 13256i16, + - 2379i16, 19254i16, 7533i16); let b = _mm_setr_epi16(- 17891i16, 30761i16, + 2539i16, 4135i16, 26713i16, 16348i16, - 21336i16, 3595i16); + _mm_packus_epi32(a, b) } + } + } + #[test] + fn _mm_abs_epi8() { + hard_soft_same_128! { + { let a = _mm_setr_epi16(6572i16, - 54i16, 10431i16, - 4614i16, - 1911i16, + 17046i16, - 12772i16, - 28109i16); _mm_abs_epi8(a) } + } + } + #[test] + fn _mm_abs_epi16() { + hard_soft_same_128! { + { let a = _mm_setr_epi16(7409i16, - 30136i16, - 28607i16, - 1975i16, + 23451i16, - 32657i16, - 28920i16, - 2519i16); _mm_abs_epi16(a) } + } + } +} + diff --git a/crates/intringen/src/x86/mod.rs b/crates/intringen/src/x86/mod.rs index ff95545..4dbf08f 100644 --- a/crates/intringen/src/x86/mod.rs +++ b/crates/intringen/src/x86/mod.rs @@ -44,6 +44,8 @@ pub trait Core { fn saturate_u8(&mut self, elem: Self::i16) -> Self::u8; fn saturate16(&mut self, elem: Self::i32) -> Self::i16; fn saturate_u16(&mut self, elem: Self::i32) -> Self::u16; + fn abs_i8(&mut self, elem: Self::i8) -> Self::u8; + fn abs_i16(&mut self, elem: Self::i16) -> Self::u16; } pub struct ValueCore; @@ -194,6 +196,12 @@ impl Core for ValueCore { let clamp = elem.clamp(0, u16::MAX as i32); clamp as u16 } + fn abs_i8(&mut self, elem: Self::i8) -> Self::u8 { + elem.abs() as u8 + } + fn abs_i16(&mut self, elem: Self::i16) -> Self::u16 { + elem.abs() as u16 + } } mod soft_arch_types {