From 8f49c199cc63b978d73bed5f1e4d6f4c1cf190ff Mon Sep 17 00:00:00 2001 From: Nilstrieb <48135649+Nilstrieb@users.noreply.github.com> Date: Fri, 5 Jan 2024 22:30:39 +0100 Subject: [PATCH] compare hard and soft --- crates/generate/src/generate.rs | 106 ++++++++++++++++++++++++-- crates/generate/src/main.rs | 4 +- crates/intringen/src/x86/generated.rs | 35 ++++++++- crates/intringen/src/x86/mod.rs | 80 ++++++++++++++----- crates/intringen/src/x86/tests.rs | 34 +++++++++ 5 files changed, 231 insertions(+), 28 deletions(-) create mode 100644 crates/intringen/src/x86/tests.rs diff --git a/crates/generate/src/generate.rs b/crates/generate/src/generate.rs index 2811265..29a9be2 100644 --- a/crates/generate/src/generate.rs +++ b/crates/generate/src/generate.rs @@ -6,17 +6,35 @@ use eyre::{bail, Context, OptionExt, Result}; pub fn generate(intrinsics: &[Intrinsic]) -> Result<()> { println!("impl Intrinsics for C {{}}"); - println!("trait Intrinsics: super::Core {{"); + println!("pub trait Intrinsics: super::Core {{"); for intr in intrinsics { generate_intr(intr).wrap_err_with(|| format!("generating `{}`", intr.name))?; } println!("}}"); + println!(); + generate_soft_arch_module(intrinsics).context("generating soft_arch module")?; + + Ok(()) +} + +fn generate_soft_arch_module(intrinsics: &[Intrinsic]) -> Result<()> { + println!("pub mod soft_arch {{"); + println!(" pub use super::super::soft_arch_types::*;"); + println!(" use super::Intrinsics;"); + + for intr in intrinsics { + generate_intr_soft_arch_wrap(intr) + .wrap_err_with(|| format!("generating soft_arch `{}`", intr.name))?; + } + + println!("}}"); Ok(()) } fn generate_intr(intr: &Intrinsic) -> Result<(), eyre::Error> { + eprintln!("generating {}...", intr.name); let signature = signature(intr)?; println!(" {signature} {{"); let body = generate_body(intr).wrap_err("generating body")?; @@ -25,6 +43,37 @@ fn generate_intr(intr: &Intrinsic) -> Result<(), eyre::Error> { Ok(()) } +fn generate_intr_soft_arch_wrap(intr: &Intrinsic) -> Result<(), eyre::Error> { + eprintln!("generating soft_arch wrapper {}...", intr.name); + let signature = signature_soft_arch(intr)?; + println!(" {signature} {{"); + let body = generate_body_soft_arch(intr).wrap_err("generating body")?; + println!("{}", indent(&body, 8)); + println!(" }}"); + Ok(()) +} + +fn generate_body_soft_arch(intr: &Intrinsic) -> Result { + let mut rust_stmts = Vec::::new(); + + rust_stmts.push("let mut output = unsafe { std::mem::zeroed() };".into()); + + let name = &intr.name; + let args = intr + .parameter + .iter() + .map(|param| param.varname.as_deref().ok_or_eyre("parameter has no name")) + .collect::>>()? + .join(", "); + rust_stmts.push(format!( + "super::super::ValueCore.{name}(&mut output, {args});" + )); + + rust_stmts.push("output".into()); + + Ok(rust_stmts.join("\n")) +} + fn indent(input: &str, indent: usize) -> String { let replace = format!("\n{}", " ".repeat(indent)); let mut s = " ".repeat(indent); @@ -34,6 +83,7 @@ fn indent(input: &str, indent: usize) -> String { struct VariableType { is_signed: bool, + rawtype_signed: bool, elem_width: u64, #[allow(dead_code)] full_width: u64, @@ -42,20 +92,23 @@ struct VariableType { impl VariableType { fn of(etype: &str, ty: &str) -> Result { - let full_width = match ty { - "__m128i" => 128, + let (rawtype_signed, full_width) = match map_type_to_rust(ty) { + "__m128i" => (false, 128), + "i16" => (true, 16), _ => bail!("unknown type: {ty}"), }; let (is_signed, elem_width) = match etype { "SI16" => (true, 16), "UI8" => (false, 8), + "UI16" => (false, 16), _ => bail!("unknown element type: {etype}"), }; Ok(Self { is_signed, + rawtype_signed, full_width, elem_width, - raw_type: ty.to_owned(), + raw_type: map_type_to_rust(ty).to_owned(), }) } @@ -144,7 +197,18 @@ fn generate_expr_tmp( ) -> Result { let result = match expr { Expr::Int(int) => int.to_string(), - Expr::Ident(_) => todo!(), + Expr::Ident(ident) => { + let ty = type_of_ident(&ident)?; + if ty.is_signed != ty.rawtype_signed { + let from = &ty.raw_type; + let to = ty.rust_type(); + let stmt = format!("let __tmp = self.cast_sign_{from}_{to}({ident});"); + rust_stmts.push(stmt); + "__tmp".into() + } else { + ident + } + } Expr::Index { lhs, idx } => { let Expr::Ident(ident) = *lhs else { bail!("lhs of indexing must be identifier"); @@ -215,16 +279,44 @@ fn signature(intr: &Intrinsic) -> Result { format!( "{}: Self::{}", param.varname.as_ref().unwrap(), - param.r#type.as_ref().unwrap() + map_type_to_rust(param.r#type.as_ref().unwrap()) ) }) .collect::>() .join(", "); let ret_name = intr.ret.varname.as_ref().unwrap(); - let ret_ty = intr.ret.r#type.as_ref().unwrap(); + let ret_ty = map_type_to_rust(intr.ret.r#type.as_ref().unwrap()); Ok(format!( "fn {name}(&mut self, {ret_name}: &mut Self::{ret_ty}, {args})" )) } + +fn signature_soft_arch(intr: &Intrinsic) -> Result { + let name = &intr.name; + + let args = intr + .parameter + .iter() + .map(|param| { + format!( + "{}: {}", + param.varname.as_ref().unwrap(), + map_type_to_rust(param.r#type.as_ref().unwrap()) + ) + }) + .collect::>() + .join(", "); + + let ret_ty = map_type_to_rust(intr.ret.r#type.as_ref().unwrap()); + + Ok(format!("pub fn {name}({args}) -> {ret_ty}")) +} + +fn map_type_to_rust(ty: &str) -> &str { + match ty { + "short" => "i16", + ty => ty, + } +} diff --git a/crates/generate/src/main.rs b/crates/generate/src/main.rs index f539037..5924df2 100644 --- a/crates/generate/src/main.rs +++ b/crates/generate/src/main.rs @@ -76,7 +76,7 @@ fn main() -> Result<()> { let list = list .into_iter() .filter(|intr| intr.cpuid.iter().any(|cpu| !cpu.value.contains("AVX512"))) - .filter(|intr| intr.name == "_mm_packus_epi16") + .filter(|intr| INTRINSICS_GENERATE.contains(&intr.name.as_str())) .collect::>(); eprintln!("filtered: {}", list.len()); @@ -85,3 +85,5 @@ fn main() -> Result<()> { Ok(()) } + +const INTRINSICS_GENERATE: &[&str] = &["_mm_packus_epi16", "_mm_setr_epi16"]; diff --git a/crates/intringen/src/x86/generated.rs b/crates/intringen/src/x86/generated.rs index 37b49c9..aa058fc 100644 --- a/crates/intringen/src/x86/generated.rs +++ b/crates/intringen/src/x86/generated.rs @@ -1,5 +1,23 @@ impl Intrinsics for C {} -trait Intrinsics: super::Core { +pub trait Intrinsics: super::Core { + fn _mm_setr_epi16(&mut self, dst: &mut Self::__m128i, e7: Self::i16, e6: Self::i16, e5: Self::i16, e4: Self::i16, e3: Self::i16, e2: Self::i16, e1: Self::i16, e0: Self::i16) { + let __tmp = self.cast_sign_i16_u16(e7); + self.set_lane___m128i_u16(dst, 0, __tmp); + let __tmp = self.cast_sign_i16_u16(e6); + self.set_lane___m128i_u16(dst, 1, __tmp); + let __tmp = self.cast_sign_i16_u16(e5); + self.set_lane___m128i_u16(dst, 2, __tmp); + let __tmp = self.cast_sign_i16_u16(e4); + self.set_lane___m128i_u16(dst, 3, __tmp); + let __tmp = self.cast_sign_i16_u16(e3); + self.set_lane___m128i_u16(dst, 4, __tmp); + let __tmp = self.cast_sign_i16_u16(e2); + self.set_lane___m128i_u16(dst, 5, __tmp); + let __tmp = self.cast_sign_i16_u16(e1); + self.set_lane___m128i_u16(dst, 6, __tmp); + let __tmp = self.cast_sign_i16_u16(e0); + self.set_lane___m128i_u16(dst, 7, __tmp); + } fn _mm_packus_epi16(&mut self, dst: &mut Self::__m128i, a: Self::__m128i, b: Self::__m128i) { let __tmp = self.get_lane___m128i_i16(a, 0); let __tmp = self.saturate_u8(__tmp); @@ -51,3 +69,18 @@ trait Intrinsics: super::Core { self.set_lane___m128i_u8(dst, 15, __tmp); } } + +pub mod soft_arch { + pub use super::super::soft_arch_types::*; + use super::Intrinsics; + pub fn _mm_setr_epi16(e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16) -> __m128i { + let mut output = unsafe { std::mem::zeroed() }; + super::super::ValueCore._mm_setr_epi16(&mut output, e7, e6, e5, e4, e3, e2, e1, e0); + output + } + pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i { + let mut output = unsafe { std::mem::zeroed() }; + super::super::ValueCore._mm_packus_epi16(&mut output, a, b); + output + } +} diff --git a/crates/intringen/src/x86/mod.rs b/crates/intringen/src/x86/mod.rs index 088d44d..15d0a0c 100644 --- a/crates/intringen/src/x86/mod.rs +++ b/crates/intringen/src/x86/mod.rs @@ -2,48 +2,90 @@ mod generated; +pub use generated::soft_arch; + pub trait Core { + type u8: Copy; + type u16: Copy; + type u32: Copy; + type u64: Copy; + + type i8: Copy; + type i16: Copy; + type i32: Copy; + type i64: Copy; + type __m128i: Copy; - fn get_lane___m128i_u16(&mut self, value: Self::__m128i, idx: u64) -> u16; - fn get_lane___m128i_i16(&mut self, value: Self::__m128i, idx: u64) -> i16; + fn cast_sign_i16_u16(&mut self, value: Self::i16) -> Self::u16; - fn set_lane___m128i_u8(&mut self, place: &mut Self::__m128i, idx: u64, value: u8); - fn set_lane___m128i_i8(&mut self, place: &mut Self::__m128i, idx: u64, value: i8); + fn get_lane___m128i_u16(&mut self, value: Self::__m128i, idx: u64) -> Self::u16; + fn get_lane___m128i_i16(&mut self, value: Self::__m128i, idx: u64) -> Self::i16; - fn saturate_u8(&mut self, elem: i16) -> u8; + fn set_lane___m128i_u8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u8); + fn set_lane___m128i_i8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i8); + fn set_lane___m128i_u16(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u16); + + fn saturate_u8(&mut self, elem: Self::i16) -> Self::u8; } pub struct ValueCore; impl Core for ValueCore { + type u8 = u8; + type u16 = u16; + type u32 = u32; + type u64 = u64; + + type i8 = i8; + type i16 = i16; + type i32 = i32; + type i64 = i64; + type __m128i = [u8; 16]; - fn get_lane___m128i_u16(&mut self, value: Self::__m128i, idx: u64) -> u16 { - let first = value[(idx * 2) as usize]; - let second = value[(idx * 2 + 1) as usize]; - // todo: le? be? - ((first << 8) as u16) | (second as u16) + fn cast_sign_i16_u16(&mut self, value: Self::i16) -> Self::u16 { + value as _ } - fn get_lane___m128i_i16(&mut self, value: Self::__m128i, idx: u64) -> i16 { - let first = value[(idx * 2) as usize]; - let second = value[(idx * 2 + 1) as usize]; - // todo: le? be? - (((first << 8) as u16) | (second as u16)) as i16 + fn get_lane___m128i_u16(&mut self, value: Self::__m128i, idx: u64) -> Self::u16 { + let first = value[(idx * 2 + 1) as usize]; + let second = value[(idx * 2) as usize]; + + ((first as u16) << 8) | (second as u16) } - fn set_lane___m128i_u8(&mut self, place: &mut Self::__m128i, idx: u64, value: u8) { + fn get_lane___m128i_i16(&mut self, value: Self::__m128i, idx: u64) -> Self::i16 { + let first = value[(idx * 2 + 1) as usize]; + let second = value[(idx * 2) as usize]; + + ((((first as u16) << 8) as u16) | (second as u16)) as i16 + } + + fn set_lane___m128i_u8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u8) { place[idx as usize] = value; } - fn set_lane___m128i_i8(&mut self, place: &mut Self::__m128i, idx: u64, value: i8) { + fn set_lane___m128i_i8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i8) { place[idx as usize] = value as u8; } - fn saturate_u8(&mut self, elem: i16) -> u8 { + fn set_lane___m128i_u16(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u16) { + let first = (value & 0xFF) as u8; + let second = (value >> 8) as u8; + place[(idx * 2) as usize] = first; + place[(idx * 2 + 1) as usize] = second; + } + + fn saturate_u8(&mut self, elem: Self::i16) -> Self::u8 { let clamp = elem.clamp(0, u8::MAX as i16); clamp as u8 } } -pub trait Lanes {} + +mod soft_arch_types { + pub type __m128i = [u8; 16]; +} + +#[cfg(test)] +mod tests; \ No newline at end of file diff --git a/crates/intringen/src/x86/tests.rs b/crates/intringen/src/x86/tests.rs new file mode 100644 index 0000000..fc016b3 --- /dev/null +++ b/crates/intringen/src/x86/tests.rs @@ -0,0 +1,34 @@ +#[cfg(target_arch = "x86_64")] +mod x86_compare { + macro_rules! hard_soft_same_128 { + ($($stmt:tt)*) => { + let soft = { + use crate::x86::soft_arch::*; + $($stmt)* + }; + let hard = unsafe { + std::mem::transmute::<_, [u8; 16]>({ + use core::arch::x86_64::*; + $($stmt)* + }) + }; + assert_eq!(soft, hard); + }; + } + + #[test] + fn _mm_setr_epi16() { + hard_soft_same_128! { + _mm_setr_epi16(0, -1, 100, 3535, 35, 2, i16::MIN, i16::MAX) + }; + } + + #[test] + fn _mm_packus_epi16() { + hard_soft_same_128! { + let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0); + let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100); + _mm_packus_epi16(a, b) + } + } +}