diff --git a/crates/generate/src/generate.rs b/crates/generate/src/generate.rs index a9c0e97..f84a227 100644 --- a/crates/generate/src/generate.rs +++ b/crates/generate/src/generate.rs @@ -1,9 +1,10 @@ use crate::{ - parse::{Expr, Stmt}, + parse::{BinaryOpKind, Expr, Stmt}, Intrinsic, }; use eyre::{bail, Context, OptionExt, Result}; use rand::{rngs::SmallRng, Rng, SeedableRng}; +use syn::Block; pub fn generate(intrinsics: &[Intrinsic]) -> Result { let blanket: syn::ItemImpl = syn::parse_quote! { @@ -228,7 +229,6 @@ impl VariableType { fn generate_body(instr: &Intrinsic) -> Result { let opstmts = parse_op(instr)?; - let mut rust_stmts = Vec::::new(); let type_of_ident = |ident: &str| -> Result { for param in &instr.parameter { @@ -250,12 +250,88 @@ fn generate_body(instr: &Intrinsic) -> Result { bail!("variable {ident} not found in pseudocode"); }; + gen_block(opstmts, &type_of_ident) +} + +fn gen_idx( + lhs: Expr, + idx: Expr, + type_of_ident: &impl Fn(&str) -> Result, +) -> Result<()> { + let Expr::Ident(identifier) = lhs else { + bail!("lhs of indexing must be identifier"); + }; + let Expr::Range { left, right } = idx else { + bail!("idx argument must be range"); + }; + + let ty = type_of_ident(&identifier)?; + + let (lane_idx, size): (syn::Expr, _) = match (*left, *right) { + (Expr::Int(high), Expr::Int(low)) => { + if high < low { + bail!("range must be HIGH:LOW, but was {high}:{low}"); + } + let size = high - low + 1; // (inclusive) + + let lane_idx = low / ty.elem_width; + + (syn::parse_quote! { #lane_idx }, size) + } + ( + Expr::BinaryOp { + op: BinaryOpKind::Add, + lhs, + rhs, + }, + Expr::Ident(low), + ) => { + let Expr::Ident(high_ident) = *rhs else { + bail!("rhs of lhs of + indexing must be ident"); + }; + let Expr::Int(high_offset) = *lhs else { + bail!("lhs of lhs of + indexing must be ident"); + }; + + if high_ident != low { + bail!("{high_ident} != {low}"); + } + let size = high_offset + 1; + let identifier = ident(&low); + (syn::parse_quote! { ( #identifier / #size ) }, size) + } + _ => bail!("unknown range indexing arguments"), + }; + + if !size.is_power_of_two() { + bail!("indexing size must be power of two"); + } + if size != ty.elem_width { + bail!( + "unsupported not-direct element indexing, size={size}, element size={}", + ty.elem_width + ); + } + let raw = &ty.raw_type; + let rust_type = ty.rust_type(); + + let identifier = ident(&identifier); + let method = ident(&format!("get_lane_{raw}_{rust_type}")); + Ok(()) +} + +fn gen_block( + opstmts: Vec, + type_of_ident: &impl Fn(&str) -> Result, +) -> Result { + let mut rust_stmts = Vec::::new(); + for stmt in opstmts { match stmt { - Stmt::Assign { lhs, rhs } => { - let Expr::Index { lhs, idx } = lhs else { - bail!("lhs of assign must be indexing"); - }; + Stmt::Assign { + lhs: Expr::Index { lhs, idx }, + rhs, + } => { let Expr::Ident(identifier) = *lhs else { bail!("lhs of indexing must be identifier"); }; @@ -284,7 +360,7 @@ fn generate_body(instr: &Intrinsic) -> Result { ty.elem_width ); } - let expr = generate_expr_tmp(&mut rust_stmts, rhs, &type_of_ident)?; + let expr = gen_expr_tmp(&mut rust_stmts, rhs, &type_of_ident)?; let raw = &ty.raw_type; let rust_type = ty.rust_type(); let lane_idx = low / ty.elem_width; @@ -295,6 +371,38 @@ fn generate_body(instr: &Intrinsic) -> Result { self.#method(#identifier, #lane_idx, #expr); }); } + Stmt::Assign { + lhs: Expr::Ident(lhs), + rhs, + } => { + let rhs = gen_expr_tmp(&mut rust_stmts, rhs, type_of_ident)?; + + let exists = type_of_ident(&lhs).is_ok(); + + let lhs = ident(&lhs); + let stmt = if exists { + syn::parse_quote! { #lhs = #rhs; } + } else { + syn::parse_quote! { let #lhs = #rhs; } + }; + + rust_stmts.push(stmt); + } + Stmt::For { + counter, + from, + to, + body, + } => { + let counter = ident(&counter); + let mut for_: syn::ExprForLoop = + syn::parse_quote! { for #counter in #from..=#to {} }; + + let body = gen_block(body, type_of_ident)?; + for_.body = body; + + rust_stmts.push(syn::Stmt::Expr(syn::Expr::ForLoop(for_), None)); + } _ => todo!(), } } @@ -303,7 +411,7 @@ fn generate_body(instr: &Intrinsic) -> Result { }) } -fn generate_expr_tmp( +fn gen_expr_tmp( rust_stmts: &mut Vec, expr: Expr, type_of_ident: &impl Fn(&str) -> Result, @@ -317,15 +425,19 @@ fn generate_expr_tmp( let result: syn::Expr = match expr { Expr::Int(int) => syn::parse_quote! { #int }, Expr::Ident(identifier) => { - let ty = type_of_ident(&identifier)?; + let ty = type_of_ident(&identifier); let identifier = ident(&identifier); - if ty.is_signed != ty.rawtype_signed { - let from = &ty.raw_type; - let to = ty.rust_type(); - let method = ident(&format!("cast_sign_{from}_{to}")); - tmp(rust_stmts, syn::parse_quote! { self.#method(#identifier) }) - } else { - syn::parse_quote! { #identifier } + match ty { + Ok(ty) if ty.is_signed != ty.rawtype_signed => { + // intel intrinsics types kinda lie sometimes. + // _mm_setr_epi16 says the etype of the argument is UI16 (unsigned), + // while the actual type is short (signed). Do a cast to the etype, since we used that. + let from = &ty.raw_type; + let to = ty.rust_type(); + let method = ident(&format!("cast_sign_{from}_{to}")); + tmp(rust_stmts, syn::parse_quote! { self.#method(#identifier) }) + } + _ => syn::parse_quote! { #identifier }, } } Expr::Index { lhs, idx } => { @@ -335,21 +447,48 @@ fn generate_expr_tmp( let Expr::Range { left, right } = *idx else { bail!("idx argument must be range"); }; - let Expr::Int(high) = *left else { - bail!("lhs of range must be int"); + + let ty = type_of_ident(&identifier)?; + + let (lane_idx, size): (syn::Expr, _) = match (*left, *right) { + (Expr::Int(high), Expr::Int(low)) => { + if high < low { + bail!("range must be HIGH:LOW, but was {high}:{low}"); + } + let size = high - low + 1; // (inclusive) + + let lane_idx = low / ty.elem_width; + + (syn::parse_quote! { #lane_idx }, size) + } + ( + Expr::BinaryOp { + op: BinaryOpKind::Add, + lhs, + rhs, + }, + Expr::Ident(low), + ) => { + let Expr::Ident(high_ident) = *rhs else { + bail!("rhs of lhs of + indexing must be ident"); + }; + let Expr::Int(high_offset) = *lhs else { + bail!("lhs of lhs of + indexing must be ident"); + }; + + if high_ident != low { + bail!("{high_ident} != {low}"); + } + let size = high_offset + 1; + let identifier = ident(&low); + (syn::parse_quote! { ( #identifier / #size ) }, size) + } + _ => bail!("unknown range indexing arguments"), }; - let Expr::Int(low) = *right else { - bail!("rhs of range must be int"); - }; - if high < low { - bail!("range must be HIGH:LOW, but was {high}:{low}"); - } - let size = high - low + 1; // (inclusive) + if !size.is_power_of_two() { bail!("indexing size must be power of two"); } - - let ty = type_of_ident(&identifier)?; if size != ty.elem_width { bail!( "unsupported not-direct element indexing, size={size}, element size={}", @@ -358,7 +497,6 @@ fn generate_expr_tmp( } let raw = &ty.raw_type; let rust_type = ty.rust_type(); - let lane_idx = low / ty.elem_width; let identifier = ident(&identifier); let method = ident(&format!("get_lane_{raw}_{rust_type}")); @@ -373,7 +511,7 @@ fn generate_expr_tmp( let function = ident(&heck::ToSnekCase::to_snek_case(function.as_str())); let args = args .into_iter() - .map(|arg| generate_expr_tmp(rust_stmts, arg, type_of_ident)) + .map(|arg| gen_expr_tmp(rust_stmts, arg, type_of_ident)) .collect::>>()?; tmp( @@ -381,7 +519,17 @@ fn generate_expr_tmp( syn::parse_quote! { self.#function( #(#args),* ) }, ) } - Expr::BinaryOp { .. } => todo!(), + Expr::BinaryOp { op, lhs, rhs } => { + let lhs = gen_expr_tmp(rust_stmts, *lhs, type_of_ident)?; + let rhs = gen_expr_tmp(rust_stmts, *rhs, type_of_ident)?; + + let token = match op { + BinaryOpKind::Add => quote::quote! { + }, + BinaryOpKind::Mul => quote::quote! { * }, + }; + + syn::parse_quote! { ( #lhs #token #rhs ) } + } }; Ok(result) } diff --git a/crates/generate/src/main.rs b/crates/generate/src/main.rs index ffe78be..9b4ec37 100644 --- a/crates/generate/src/main.rs +++ b/crates/generate/src/main.rs @@ -89,6 +89,7 @@ fn main() -> Result<()> { } const INTRINSICS_GENERATE: &[&str] = &[ + "_mm_abs_epi16", "_mm_setr_epi8", "_mm_setr_epi16", "_mm_setr_epi32", diff --git a/crates/generate/src/parse.rs b/crates/generate/src/parse.rs index 1718246..f1bb775 100644 --- a/crates/generate/src/parse.rs +++ b/crates/generate/src/parse.rs @@ -9,6 +9,12 @@ pub(crate) enum Stmt { lhs: Expr, rhs: Expr, }, + For { + counter: String, + from: u64, + to: u64, + body: Vec, + }, If { cond: Expr, then: Expr, @@ -41,7 +47,10 @@ pub(crate) enum Expr { } #[derive(Debug)] -pub(crate) enum BinaryOpKind {} +pub(crate) enum BinaryOpKind { + Add, + Mul, +} pub(crate) fn parse_operation(op: &str) -> Result> { let tokens = Token::lexer(op.trim()) @@ -95,7 +104,33 @@ fn parse(tokens: Vec) -> Result> { } fn parse_stmt(parser: &mut Parser) -> Result { - let stmt = match parser.peek() { + let stmt = match parser.peek()? { + Token::For => { + parser.next()?; + let Token::Ident(ident) = parser.next()? else { + bail!("expected ident after for"); + }; + parser.expect(Token::Assign)?; + let Token::Integer(from) = parser.next()? else { + bail!("expected integer in for"); + }; + parser.expect(Token::To)?; + let Token::Integer(to) = parser.next()? else { + bail!("expected integer after to"); + }; + parser.expect(Token::Newline)?; + let mut stmts = Vec::new(); + while !parser.peek().is_ok_and(|t| *t == Token::Endfor) { + stmts.push(parse_stmt(parser)?); + } + parser.expect(Token::Endfor)?; + Stmt::For { + counter: ident, + from, + to, + body: stmts, + } + } _ => { let expr = parse_expr(parser)?; @@ -117,9 +152,9 @@ fn parse_expr(parser: &mut Parser) -> Result { } fn parse_expr_range(parser: &mut Parser) -> Result { - let expr = parse_expr_call(parser)?; + let expr = parse_expr_addsub(parser)?; if parser.eat(Token::Colon) { - let rhs = parse_expr_call(parser)?; + let rhs = parse_expr_addsub(parser)?; Ok(Expr::Range { left: Box::new(expr), right: Box::new(rhs), @@ -129,6 +164,24 @@ fn parse_expr_range(parser: &mut Parser) -> Result { } } +fn parse_expr_addsub(parser: &mut Parser) -> Result { + let lhs = parse_expr_muldiv(parser)?; + if parser.eat(Token::Plus) { + let rhs = parse_expr_addsub(parser)?; + return Ok(Expr::BinaryOp { op: BinaryOpKind::Add, lhs: Box::new(lhs), rhs: Box::new(rhs) }); + } + Ok(lhs) +} + +fn parse_expr_muldiv(parser: &mut Parser) -> Result { + let lhs = parse_expr_call(parser)?; + if parser.eat(Token::Star) { + let rhs = parse_expr_muldiv(parser)?; + return Ok(Expr::BinaryOp { op: BinaryOpKind::Mul, lhs: Box::new(lhs), rhs: Box::new(rhs) }); + } + Ok(lhs) +} + fn parse_expr_call(parser: &mut Parser) -> Result { let mut lhs = parse_expr_atom(parser)?; @@ -198,6 +251,12 @@ enum Token { Return, #[token("OF")] Of, + #[token("to")] + To, + #[token("FOR")] + For, + #[token("ENDFOR")] + Endfor, #[token("\n")] Newline, @@ -208,6 +267,10 @@ enum Token { Assign, #[token(":")] Colon, + #[token("*")] + Star, + #[token("+")] + Plus, #[regex(r"[a-zA-Z_]\w*", |lex| lex.slice().to_owned())] Ident(String), diff --git a/crates/intringen/src/x86/generated.rs b/crates/intringen/src/x86/generated.rs index 89138e4..e69de29 100644 --- a/crates/intringen/src/x86/generated.rs +++ b/crates/intringen/src/x86/generated.rs @@ -1,443 +0,0 @@ -impl Intrinsics for C {} -pub trait Intrinsics: super::Core { - fn _mm_set_epi64x(&mut self, dst: &mut Self::__m128i, e1: Self::i64, e0: Self::i64) { - let __tmp = self.cast_sign_i64_u64(e0); - self.set_lane___m128i_u64(dst, 0u64, __tmp); - let __tmp = self.cast_sign_i64_u64(e1); - self.set_lane___m128i_u64(dst, 1u64, __tmp); - } - fn _mm_setr_epi32( - &mut self, - dst: &mut Self::__m128i, - e3: Self::i32, - e2: Self::i32, - e1: Self::i32, - e0: Self::i32, - ) { - let __tmp = self.cast_sign_i32_u32(e3); - self.set_lane___m128i_u32(dst, 0u64, __tmp); - let __tmp = self.cast_sign_i32_u32(e2); - self.set_lane___m128i_u32(dst, 1u64, __tmp); - let __tmp = self.cast_sign_i32_u32(e1); - self.set_lane___m128i_u32(dst, 2u64, __tmp); - let __tmp = self.cast_sign_i32_u32(e0); - self.set_lane___m128i_u32(dst, 3u64, __tmp); - } - fn _mm_setr_epi16( - &mut self, - dst: &mut Self::__m128i, - e7: Self::i16, - e6: Self::i16, - e5: Self::i16, - e4: Self::i16, - e3: Self::i16, - e2: Self::i16, - e1: Self::i16, - e0: Self::i16, - ) { - let __tmp = self.cast_sign_i16_u16(e7); - self.set_lane___m128i_u16(dst, 0u64, __tmp); - let __tmp = self.cast_sign_i16_u16(e6); - self.set_lane___m128i_u16(dst, 1u64, __tmp); - let __tmp = self.cast_sign_i16_u16(e5); - self.set_lane___m128i_u16(dst, 2u64, __tmp); - let __tmp = self.cast_sign_i16_u16(e4); - self.set_lane___m128i_u16(dst, 3u64, __tmp); - let __tmp = self.cast_sign_i16_u16(e3); - self.set_lane___m128i_u16(dst, 4u64, __tmp); - let __tmp = self.cast_sign_i16_u16(e2); - self.set_lane___m128i_u16(dst, 5u64, __tmp); - let __tmp = self.cast_sign_i16_u16(e1); - self.set_lane___m128i_u16(dst, 6u64, __tmp); - let __tmp = self.cast_sign_i16_u16(e0); - self.set_lane___m128i_u16(dst, 7u64, __tmp); - } - fn _mm_setr_epi8( - &mut self, - dst: &mut Self::__m128i, - e15: Self::i8, - e14: Self::i8, - e13: Self::i8, - e12: Self::i8, - e11: Self::i8, - e10: Self::i8, - e9: Self::i8, - e8: Self::i8, - e7: Self::i8, - e6: Self::i8, - e5: Self::i8, - e4: Self::i8, - e3: Self::i8, - e2: Self::i8, - e1: Self::i8, - e0: Self::i8, - ) { - let __tmp = self.cast_sign_i8_u8(e15); - self.set_lane___m128i_u8(dst, 0u64, __tmp); - let __tmp = self.cast_sign_i8_u8(e14); - self.set_lane___m128i_u8(dst, 1u64, __tmp); - let __tmp = self.cast_sign_i8_u8(e13); - self.set_lane___m128i_u8(dst, 2u64, __tmp); - let __tmp = self.cast_sign_i8_u8(e12); - self.set_lane___m128i_u8(dst, 3u64, __tmp); - let __tmp = self.cast_sign_i8_u8(e11); - self.set_lane___m128i_u8(dst, 4u64, __tmp); - let __tmp = self.cast_sign_i8_u8(e10); - self.set_lane___m128i_u8(dst, 5u64, __tmp); - let __tmp = self.cast_sign_i8_u8(e9); - self.set_lane___m128i_u8(dst, 6u64, __tmp); - let __tmp = self.cast_sign_i8_u8(e8); - self.set_lane___m128i_u8(dst, 7u64, __tmp); - let __tmp = self.cast_sign_i8_u8(e7); - self.set_lane___m128i_u8(dst, 8u64, __tmp); - let __tmp = self.cast_sign_i8_u8(e6); - self.set_lane___m128i_u8(dst, 9u64, __tmp); - let __tmp = self.cast_sign_i8_u8(e5); - self.set_lane___m128i_u8(dst, 10u64, __tmp); - let __tmp = self.cast_sign_i8_u8(e4); - self.set_lane___m128i_u8(dst, 11u64, __tmp); - let __tmp = self.cast_sign_i8_u8(e3); - self.set_lane___m128i_u8(dst, 12u64, __tmp); - let __tmp = self.cast_sign_i8_u8(e2); - self.set_lane___m128i_u8(dst, 13u64, __tmp); - let __tmp = self.cast_sign_i8_u8(e1); - self.set_lane___m128i_u8(dst, 14u64, __tmp); - let __tmp = self.cast_sign_i8_u8(e0); - self.set_lane___m128i_u8(dst, 15u64, __tmp); - } - fn _mm_packs_epi16( - &mut self, - dst: &mut Self::__m128i, - a: Self::__m128i, - b: Self::__m128i, - ) { - let __tmp = self.get_lane___m128i_i16(a, 0u64); - let __tmp = self.saturate8(__tmp); - self.set_lane___m128i_i8(dst, 0u64, __tmp); - let __tmp = self.get_lane___m128i_i16(a, 1u64); - let __tmp = self.saturate8(__tmp); - self.set_lane___m128i_i8(dst, 1u64, __tmp); - let __tmp = self.get_lane___m128i_i16(a, 2u64); - let __tmp = self.saturate8(__tmp); - self.set_lane___m128i_i8(dst, 2u64, __tmp); - let __tmp = self.get_lane___m128i_i16(a, 3u64); - let __tmp = self.saturate8(__tmp); - self.set_lane___m128i_i8(dst, 3u64, __tmp); - let __tmp = self.get_lane___m128i_i16(a, 4u64); - let __tmp = self.saturate8(__tmp); - self.set_lane___m128i_i8(dst, 4u64, __tmp); - let __tmp = self.get_lane___m128i_i16(a, 5u64); - let __tmp = self.saturate8(__tmp); - self.set_lane___m128i_i8(dst, 5u64, __tmp); - let __tmp = self.get_lane___m128i_i16(a, 6u64); - let __tmp = self.saturate8(__tmp); - self.set_lane___m128i_i8(dst, 6u64, __tmp); - let __tmp = self.get_lane___m128i_i16(a, 7u64); - let __tmp = self.saturate8(__tmp); - self.set_lane___m128i_i8(dst, 7u64, __tmp); - let __tmp = self.get_lane___m128i_i16(b, 0u64); - let __tmp = self.saturate8(__tmp); - self.set_lane___m128i_i8(dst, 8u64, __tmp); - let __tmp = self.get_lane___m128i_i16(b, 1u64); - let __tmp = self.saturate8(__tmp); - self.set_lane___m128i_i8(dst, 9u64, __tmp); - let __tmp = self.get_lane___m128i_i16(b, 2u64); - let __tmp = self.saturate8(__tmp); - self.set_lane___m128i_i8(dst, 10u64, __tmp); - let __tmp = self.get_lane___m128i_i16(b, 3u64); - let __tmp = self.saturate8(__tmp); - self.set_lane___m128i_i8(dst, 11u64, __tmp); - let __tmp = self.get_lane___m128i_i16(b, 4u64); - let __tmp = self.saturate8(__tmp); - self.set_lane___m128i_i8(dst, 12u64, __tmp); - let __tmp = self.get_lane___m128i_i16(b, 5u64); - let __tmp = self.saturate8(__tmp); - self.set_lane___m128i_i8(dst, 13u64, __tmp); - let __tmp = self.get_lane___m128i_i16(b, 6u64); - let __tmp = self.saturate8(__tmp); - self.set_lane___m128i_i8(dst, 14u64, __tmp); - let __tmp = self.get_lane___m128i_i16(b, 7u64); - let __tmp = self.saturate8(__tmp); - self.set_lane___m128i_i8(dst, 15u64, __tmp); - } - fn _mm_packs_epi32( - &mut self, - dst: &mut Self::__m128i, - a: Self::__m128i, - b: Self::__m128i, - ) { - let __tmp = self.get_lane___m128i_i32(a, 0u64); - let __tmp = self.saturate16(__tmp); - self.set_lane___m128i_i16(dst, 0u64, __tmp); - let __tmp = self.get_lane___m128i_i32(a, 1u64); - let __tmp = self.saturate16(__tmp); - self.set_lane___m128i_i16(dst, 1u64, __tmp); - let __tmp = self.get_lane___m128i_i32(a, 2u64); - let __tmp = self.saturate16(__tmp); - self.set_lane___m128i_i16(dst, 2u64, __tmp); - let __tmp = self.get_lane___m128i_i32(a, 3u64); - let __tmp = self.saturate16(__tmp); - self.set_lane___m128i_i16(dst, 3u64, __tmp); - let __tmp = self.get_lane___m128i_i32(b, 0u64); - let __tmp = self.saturate16(__tmp); - self.set_lane___m128i_i16(dst, 4u64, __tmp); - let __tmp = self.get_lane___m128i_i32(b, 1u64); - let __tmp = self.saturate16(__tmp); - self.set_lane___m128i_i16(dst, 5u64, __tmp); - let __tmp = self.get_lane___m128i_i32(b, 2u64); - let __tmp = self.saturate16(__tmp); - self.set_lane___m128i_i16(dst, 6u64, __tmp); - let __tmp = self.get_lane___m128i_i32(b, 3u64); - let __tmp = self.saturate16(__tmp); - self.set_lane___m128i_i16(dst, 7u64, __tmp); - } - fn _mm_packus_epi16( - &mut self, - dst: &mut Self::__m128i, - a: Self::__m128i, - b: Self::__m128i, - ) { - let __tmp = self.get_lane___m128i_i16(a, 0u64); - let __tmp = self.saturate_u8(__tmp); - self.set_lane___m128i_u8(dst, 0u64, __tmp); - let __tmp = self.get_lane___m128i_i16(a, 1u64); - let __tmp = self.saturate_u8(__tmp); - self.set_lane___m128i_u8(dst, 1u64, __tmp); - let __tmp = self.get_lane___m128i_i16(a, 2u64); - let __tmp = self.saturate_u8(__tmp); - self.set_lane___m128i_u8(dst, 2u64, __tmp); - let __tmp = self.get_lane___m128i_i16(a, 3u64); - let __tmp = self.saturate_u8(__tmp); - self.set_lane___m128i_u8(dst, 3u64, __tmp); - let __tmp = self.get_lane___m128i_i16(a, 4u64); - let __tmp = self.saturate_u8(__tmp); - self.set_lane___m128i_u8(dst, 4u64, __tmp); - let __tmp = self.get_lane___m128i_i16(a, 5u64); - let __tmp = self.saturate_u8(__tmp); - self.set_lane___m128i_u8(dst, 5u64, __tmp); - let __tmp = self.get_lane___m128i_i16(a, 6u64); - let __tmp = self.saturate_u8(__tmp); - self.set_lane___m128i_u8(dst, 6u64, __tmp); - let __tmp = self.get_lane___m128i_i16(a, 7u64); - let __tmp = self.saturate_u8(__tmp); - self.set_lane___m128i_u8(dst, 7u64, __tmp); - let __tmp = self.get_lane___m128i_i16(b, 0u64); - let __tmp = self.saturate_u8(__tmp); - self.set_lane___m128i_u8(dst, 8u64, __tmp); - let __tmp = self.get_lane___m128i_i16(b, 1u64); - let __tmp = self.saturate_u8(__tmp); - self.set_lane___m128i_u8(dst, 9u64, __tmp); - let __tmp = self.get_lane___m128i_i16(b, 2u64); - let __tmp = self.saturate_u8(__tmp); - self.set_lane___m128i_u8(dst, 10u64, __tmp); - let __tmp = self.get_lane___m128i_i16(b, 3u64); - let __tmp = self.saturate_u8(__tmp); - self.set_lane___m128i_u8(dst, 11u64, __tmp); - let __tmp = self.get_lane___m128i_i16(b, 4u64); - let __tmp = self.saturate_u8(__tmp); - self.set_lane___m128i_u8(dst, 12u64, __tmp); - let __tmp = self.get_lane___m128i_i16(b, 5u64); - let __tmp = self.saturate_u8(__tmp); - self.set_lane___m128i_u8(dst, 13u64, __tmp); - let __tmp = self.get_lane___m128i_i16(b, 6u64); - let __tmp = self.saturate_u8(__tmp); - self.set_lane___m128i_u8(dst, 14u64, __tmp); - let __tmp = self.get_lane___m128i_i16(b, 7u64); - let __tmp = self.saturate_u8(__tmp); - self.set_lane___m128i_u8(dst, 15u64, __tmp); - } - fn _mm_packus_epi32( - &mut self, - dst: &mut Self::__m128i, - a: Self::__m128i, - b: Self::__m128i, - ) { - let __tmp = self.get_lane___m128i_i32(a, 0u64); - let __tmp = self.saturate_u16(__tmp); - self.set_lane___m128i_u16(dst, 0u64, __tmp); - let __tmp = self.get_lane___m128i_i32(a, 1u64); - let __tmp = self.saturate_u16(__tmp); - self.set_lane___m128i_u16(dst, 1u64, __tmp); - let __tmp = self.get_lane___m128i_i32(a, 2u64); - let __tmp = self.saturate_u16(__tmp); - self.set_lane___m128i_u16(dst, 2u64, __tmp); - let __tmp = self.get_lane___m128i_i32(a, 3u64); - let __tmp = self.saturate_u16(__tmp); - self.set_lane___m128i_u16(dst, 3u64, __tmp); - let __tmp = self.get_lane___m128i_i32(b, 0u64); - let __tmp = self.saturate_u16(__tmp); - self.set_lane___m128i_u16(dst, 4u64, __tmp); - let __tmp = self.get_lane___m128i_i32(b, 1u64); - let __tmp = self.saturate_u16(__tmp); - self.set_lane___m128i_u16(dst, 5u64, __tmp); - let __tmp = self.get_lane___m128i_i32(b, 2u64); - let __tmp = self.saturate_u16(__tmp); - self.set_lane___m128i_u16(dst, 6u64, __tmp); - let __tmp = self.get_lane___m128i_i32(b, 3u64); - let __tmp = self.saturate_u16(__tmp); - self.set_lane___m128i_u16(dst, 7u64, __tmp); - } -} -pub mod soft_arch { - pub use super::super::soft_arch_types::*; - use super::Intrinsics; - pub fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i { - let mut output = unsafe { std::mem::zeroed() }; - super::super::ValueCore._mm_set_epi64x(&mut output, e1, e0); - output - } - pub fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { - let mut output = unsafe { std::mem::zeroed() }; - super::super::ValueCore._mm_setr_epi32(&mut output, e3, e2, e1, e0); - output - } - pub fn _mm_setr_epi16( - e7: i16, - e6: i16, - e5: i16, - e4: i16, - e3: i16, - e2: i16, - e1: i16, - e0: i16, - ) -> __m128i { - let mut output = unsafe { std::mem::zeroed() }; - super::super::ValueCore - ._mm_setr_epi16(&mut output, e7, e6, e5, e4, e3, e2, e1, e0); - output - } - pub fn _mm_setr_epi8( - e15: i8, - e14: i8, - e13: i8, - e12: i8, - e11: i8, - e10: i8, - e9: i8, - e8: i8, - e7: i8, - e6: i8, - e5: i8, - e4: i8, - e3: i8, - e2: i8, - e1: i8, - e0: i8, - ) -> __m128i { - let mut output = unsafe { std::mem::zeroed() }; - super::super::ValueCore - ._mm_setr_epi8( - &mut output, - e15, - e14, - e13, - e12, - e11, - e10, - e9, - e8, - e7, - e6, - e5, - e4, - e3, - e2, - e1, - e0, - ); - output - } - pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i { - let mut output = unsafe { std::mem::zeroed() }; - super::super::ValueCore._mm_packs_epi16(&mut output, a, b); - output - } - pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i { - let mut output = unsafe { std::mem::zeroed() }; - super::super::ValueCore._mm_packs_epi32(&mut output, a, b); - output - } - pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i { - let mut output = unsafe { std::mem::zeroed() }; - super::super::ValueCore._mm_packus_epi16(&mut output, a, b); - output - } - pub fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i { - let mut output = unsafe { std::mem::zeroed() }; - super::super::ValueCore._mm_packus_epi32(&mut output, a, b); - output - } -} -#[cfg(all(test, target_arch = "x86_64"))] -pub mod tests { - use super::super::compare_test_helper::hard_soft_same_128; - #[test] - fn _mm_set_epi64x() { - hard_soft_same_128! { - { let e1 = 1041352657357235268i64; let e0 = 1955209120357942897i64; - _mm_set_epi64x(e1, e0) } - } - } - #[test] - fn _mm_setr_epi32() { - hard_soft_same_128! { - { let e3 = 1455669123i32; let e2 = 247864885i32; let e1 = 1390920924i32; let - e0 = 1068333055i32; _mm_setr_epi32(e3, e2, e1, e0) } - } - } - #[test] - fn _mm_setr_epi16() { - hard_soft_same_128! { - { let e7 = 16513i16; let e6 = 22878i16; let e5 = 23986i16; let e4 = 27900i16; - let e3 = - 8343i16; let e2 = - 10648i16; let e1 = 4841i16; let e0 = 14610i16; - _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) } - } - } - #[test] - fn _mm_setr_epi8() { - hard_soft_same_128! { - { let e15 = - 99i8; let e14 = 125i8; let e13 = 118i8; let e12 = 5i8; let e11 - = 41i8; let e10 = - 40i8; let e9 = 124i8; let e8 = - 6i8; let e7 = 114i8; let - e6 = 24i8; let e5 = - 99i8; let e4 = 65i8; let e3 = 11i8; let e2 = - 15i8; - let e1 = 20i8; let e0 = - 107i8; _mm_setr_epi8(e15, e14, e13, e12, e11, e10, - e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) } - } - } - #[test] - fn _mm_packs_epi16() { - hard_soft_same_128! { - { let a = _mm_setr_epi16(23623i16, - 22080i16, - 1436i16, - 30227i16, - 8629i16, 10922i16, - 16731i16, - 1013i16); let b = _mm_setr_epi16(- 14310i16, - 2892i16, - 28568i16, 12614i16, 20103i16, 32412i16, - 28704i16, - 27930i16); - _mm_packs_epi16(a, b) } - } - } - #[test] - fn _mm_packs_epi32() { - hard_soft_same_128! { - { let a = _mm_setr_epi16(4197i16, 1829i16, 9149i16, 18759i16, 30885i16, - - 3879i16, 21600i16, 24454i16); let b = _mm_setr_epi16(23524i16, 10765i16, - 32539i16, 26890i16, - 3892i16, 4386i16, 18704i16, 8253i16); - _mm_packs_epi32(a, b) } - } - } - #[test] - fn _mm_packus_epi16() { - hard_soft_same_128! { - { let a = _mm_setr_epi16(- 29217i16, 32013i16, 7448i16, 2172i16, - 14764i16, - - 1068i16, - 25463i16, 21215i16); let b = _mm_setr_epi16(- 31392i16, - - 14015i16, - 32565i16, - 11312i16, - 4934i16, - 19283i16, - 27533i16, - - 9939i16); _mm_packus_epi16(a, b) } - } - } - #[test] - fn _mm_packus_epi32() { - hard_soft_same_128! { - { let a = _mm_setr_epi16(- 9518i16, - 29742i16, 10115i16, 1617i16, 13256i16, - - 2379i16, 19254i16, 7533i16); let b = _mm_setr_epi16(- 17891i16, 30761i16, - 2539i16, 4135i16, 26713i16, 16348i16, - 21336i16, 3595i16); - _mm_packus_epi32(a, b) } - } - } -} -