compare hard and soft

This commit is contained in:
nora 2024-01-05 22:30:39 +01:00
parent 429d9b826c
commit 8f49c199cc
5 changed files with 231 additions and 28 deletions

View file

@ -6,17 +6,35 @@ use eyre::{bail, Context, OptionExt, Result};
pub fn generate(intrinsics: &[Intrinsic]) -> Result<()> { pub fn generate(intrinsics: &[Intrinsic]) -> Result<()> {
println!("impl<C: super::Core> Intrinsics for C {{}}"); println!("impl<C: super::Core> Intrinsics for C {{}}");
println!("trait Intrinsics: super::Core {{"); println!("pub trait Intrinsics: super::Core {{");
for intr in intrinsics { for intr in intrinsics {
generate_intr(intr).wrap_err_with(|| format!("generating `{}`", intr.name))?; generate_intr(intr).wrap_err_with(|| format!("generating `{}`", intr.name))?;
} }
println!("}}"); println!("}}");
println!();
generate_soft_arch_module(intrinsics).context("generating soft_arch module")?;
Ok(())
}
fn generate_soft_arch_module(intrinsics: &[Intrinsic]) -> Result<()> {
println!("pub mod soft_arch {{");
println!(" pub use super::super::soft_arch_types::*;");
println!(" use super::Intrinsics;");
for intr in intrinsics {
generate_intr_soft_arch_wrap(intr)
.wrap_err_with(|| format!("generating soft_arch `{}`", intr.name))?;
}
println!("}}");
Ok(()) Ok(())
} }
fn generate_intr(intr: &Intrinsic) -> Result<(), eyre::Error> { fn generate_intr(intr: &Intrinsic) -> Result<(), eyre::Error> {
eprintln!("generating {}...", intr.name);
let signature = signature(intr)?; let signature = signature(intr)?;
println!(" {signature} {{"); println!(" {signature} {{");
let body = generate_body(intr).wrap_err("generating body")?; let body = generate_body(intr).wrap_err("generating body")?;
@ -25,6 +43,37 @@ fn generate_intr(intr: &Intrinsic) -> Result<(), eyre::Error> {
Ok(()) Ok(())
} }
fn generate_intr_soft_arch_wrap(intr: &Intrinsic) -> Result<(), eyre::Error> {
eprintln!("generating soft_arch wrapper {}...", intr.name);
let signature = signature_soft_arch(intr)?;
println!(" {signature} {{");
let body = generate_body_soft_arch(intr).wrap_err("generating body")?;
println!("{}", indent(&body, 8));
println!(" }}");
Ok(())
}
fn generate_body_soft_arch(intr: &Intrinsic) -> Result<String> {
let mut rust_stmts = Vec::<String>::new();
rust_stmts.push("let mut output = unsafe { std::mem::zeroed() };".into());
let name = &intr.name;
let args = intr
.parameter
.iter()
.map(|param| param.varname.as_deref().ok_or_eyre("parameter has no name"))
.collect::<Result<Vec<_>>>()?
.join(", ");
rust_stmts.push(format!(
"super::super::ValueCore.{name}(&mut output, {args});"
));
rust_stmts.push("output".into());
Ok(rust_stmts.join("\n"))
}
fn indent(input: &str, indent: usize) -> String { fn indent(input: &str, indent: usize) -> String {
let replace = format!("\n{}", " ".repeat(indent)); let replace = format!("\n{}", " ".repeat(indent));
let mut s = " ".repeat(indent); let mut s = " ".repeat(indent);
@ -34,6 +83,7 @@ fn indent(input: &str, indent: usize) -> String {
struct VariableType { struct VariableType {
is_signed: bool, is_signed: bool,
rawtype_signed: bool,
elem_width: u64, elem_width: u64,
#[allow(dead_code)] #[allow(dead_code)]
full_width: u64, full_width: u64,
@ -42,20 +92,23 @@ struct VariableType {
impl VariableType { impl VariableType {
fn of(etype: &str, ty: &str) -> Result<Self> { fn of(etype: &str, ty: &str) -> Result<Self> {
let full_width = match ty { let (rawtype_signed, full_width) = match map_type_to_rust(ty) {
"__m128i" => 128, "__m128i" => (false, 128),
"i16" => (true, 16),
_ => bail!("unknown type: {ty}"), _ => bail!("unknown type: {ty}"),
}; };
let (is_signed, elem_width) = match etype { let (is_signed, elem_width) = match etype {
"SI16" => (true, 16), "SI16" => (true, 16),
"UI8" => (false, 8), "UI8" => (false, 8),
"UI16" => (false, 16),
_ => bail!("unknown element type: {etype}"), _ => bail!("unknown element type: {etype}"),
}; };
Ok(Self { Ok(Self {
is_signed, is_signed,
rawtype_signed,
full_width, full_width,
elem_width, elem_width,
raw_type: ty.to_owned(), raw_type: map_type_to_rust(ty).to_owned(),
}) })
} }
@ -144,7 +197,18 @@ fn generate_expr_tmp(
) -> Result<String> { ) -> Result<String> {
let result = match expr { let result = match expr {
Expr::Int(int) => int.to_string(), Expr::Int(int) => int.to_string(),
Expr::Ident(_) => todo!(), Expr::Ident(ident) => {
let ty = type_of_ident(&ident)?;
if ty.is_signed != ty.rawtype_signed {
let from = &ty.raw_type;
let to = ty.rust_type();
let stmt = format!("let __tmp = self.cast_sign_{from}_{to}({ident});");
rust_stmts.push(stmt);
"__tmp".into()
} else {
ident
}
}
Expr::Index { lhs, idx } => { Expr::Index { lhs, idx } => {
let Expr::Ident(ident) = *lhs else { let Expr::Ident(ident) = *lhs else {
bail!("lhs of indexing must be identifier"); bail!("lhs of indexing must be identifier");
@ -215,16 +279,44 @@ fn signature(intr: &Intrinsic) -> Result<String> {
format!( format!(
"{}: Self::{}", "{}: Self::{}",
param.varname.as_ref().unwrap(), param.varname.as_ref().unwrap(),
param.r#type.as_ref().unwrap() map_type_to_rust(param.r#type.as_ref().unwrap())
) )
}) })
.collect::<Vec<_>>() .collect::<Vec<_>>()
.join(", "); .join(", ");
let ret_name = intr.ret.varname.as_ref().unwrap(); let ret_name = intr.ret.varname.as_ref().unwrap();
let ret_ty = intr.ret.r#type.as_ref().unwrap(); let ret_ty = map_type_to_rust(intr.ret.r#type.as_ref().unwrap());
Ok(format!( Ok(format!(
"fn {name}(&mut self, {ret_name}: &mut Self::{ret_ty}, {args})" "fn {name}(&mut self, {ret_name}: &mut Self::{ret_ty}, {args})"
)) ))
} }
fn signature_soft_arch(intr: &Intrinsic) -> Result<String> {
let name = &intr.name;
let args = intr
.parameter
.iter()
.map(|param| {
format!(
"{}: {}",
param.varname.as_ref().unwrap(),
map_type_to_rust(param.r#type.as_ref().unwrap())
)
})
.collect::<Vec<_>>()
.join(", ");
let ret_ty = map_type_to_rust(intr.ret.r#type.as_ref().unwrap());
Ok(format!("pub fn {name}({args}) -> {ret_ty}"))
}
fn map_type_to_rust(ty: &str) -> &str {
match ty {
"short" => "i16",
ty => ty,
}
}

View file

@ -76,7 +76,7 @@ fn main() -> Result<()> {
let list = list let list = list
.into_iter() .into_iter()
.filter(|intr| intr.cpuid.iter().any(|cpu| !cpu.value.contains("AVX512"))) .filter(|intr| intr.cpuid.iter().any(|cpu| !cpu.value.contains("AVX512")))
.filter(|intr| intr.name == "_mm_packus_epi16") .filter(|intr| INTRINSICS_GENERATE.contains(&intr.name.as_str()))
.collect::<Vec<_>>(); .collect::<Vec<_>>();
eprintln!("filtered: {}", list.len()); eprintln!("filtered: {}", list.len());
@ -85,3 +85,5 @@ fn main() -> Result<()> {
Ok(()) Ok(())
} }
const INTRINSICS_GENERATE: &[&str] = &["_mm_packus_epi16", "_mm_setr_epi16"];

View file

@ -1,5 +1,23 @@
impl<C: super::Core> Intrinsics for C {} impl<C: super::Core> Intrinsics for C {}
trait Intrinsics: super::Core { pub trait Intrinsics: super::Core {
fn _mm_setr_epi16(&mut self, dst: &mut Self::__m128i, e7: Self::i16, e6: Self::i16, e5: Self::i16, e4: Self::i16, e3: Self::i16, e2: Self::i16, e1: Self::i16, e0: Self::i16) {
let __tmp = self.cast_sign_i16_u16(e7);
self.set_lane___m128i_u16(dst, 0, __tmp);
let __tmp = self.cast_sign_i16_u16(e6);
self.set_lane___m128i_u16(dst, 1, __tmp);
let __tmp = self.cast_sign_i16_u16(e5);
self.set_lane___m128i_u16(dst, 2, __tmp);
let __tmp = self.cast_sign_i16_u16(e4);
self.set_lane___m128i_u16(dst, 3, __tmp);
let __tmp = self.cast_sign_i16_u16(e3);
self.set_lane___m128i_u16(dst, 4, __tmp);
let __tmp = self.cast_sign_i16_u16(e2);
self.set_lane___m128i_u16(dst, 5, __tmp);
let __tmp = self.cast_sign_i16_u16(e1);
self.set_lane___m128i_u16(dst, 6, __tmp);
let __tmp = self.cast_sign_i16_u16(e0);
self.set_lane___m128i_u16(dst, 7, __tmp);
}
fn _mm_packus_epi16(&mut self, dst: &mut Self::__m128i, a: Self::__m128i, b: Self::__m128i) { fn _mm_packus_epi16(&mut self, dst: &mut Self::__m128i, a: Self::__m128i, b: Self::__m128i) {
let __tmp = self.get_lane___m128i_i16(a, 0); let __tmp = self.get_lane___m128i_i16(a, 0);
let __tmp = self.saturate_u8(__tmp); let __tmp = self.saturate_u8(__tmp);
@ -51,3 +69,18 @@ trait Intrinsics: super::Core {
self.set_lane___m128i_u8(dst, 15, __tmp); self.set_lane___m128i_u8(dst, 15, __tmp);
} }
} }
pub mod soft_arch {
pub use super::super::soft_arch_types::*;
use super::Intrinsics;
pub fn _mm_setr_epi16(e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16) -> __m128i {
let mut output = unsafe { std::mem::zeroed() };
super::super::ValueCore._mm_setr_epi16(&mut output, e7, e6, e5, e4, e3, e2, e1, e0);
output
}
pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
let mut output = unsafe { std::mem::zeroed() };
super::super::ValueCore._mm_packus_epi16(&mut output, a, b);
output
}
}

View file

@ -2,48 +2,90 @@
mod generated; mod generated;
pub use generated::soft_arch;
pub trait Core { pub trait Core {
type u8: Copy;
type u16: Copy;
type u32: Copy;
type u64: Copy;
type i8: Copy;
type i16: Copy;
type i32: Copy;
type i64: Copy;
type __m128i: Copy; type __m128i: Copy;
fn get_lane___m128i_u16(&mut self, value: Self::__m128i, idx: u64) -> u16; fn cast_sign_i16_u16(&mut self, value: Self::i16) -> Self::u16;
fn get_lane___m128i_i16(&mut self, value: Self::__m128i, idx: u64) -> i16;
fn set_lane___m128i_u8(&mut self, place: &mut Self::__m128i, idx: u64, value: u8); fn get_lane___m128i_u16(&mut self, value: Self::__m128i, idx: u64) -> Self::u16;
fn set_lane___m128i_i8(&mut self, place: &mut Self::__m128i, idx: u64, value: i8); fn get_lane___m128i_i16(&mut self, value: Self::__m128i, idx: u64) -> Self::i16;
fn saturate_u8(&mut self, elem: i16) -> u8; fn set_lane___m128i_u8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u8);
fn set_lane___m128i_i8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i8);
fn set_lane___m128i_u16(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u16);
fn saturate_u8(&mut self, elem: Self::i16) -> Self::u8;
} }
pub struct ValueCore; pub struct ValueCore;
impl Core for ValueCore { impl Core for ValueCore {
type u8 = u8;
type u16 = u16;
type u32 = u32;
type u64 = u64;
type i8 = i8;
type i16 = i16;
type i32 = i32;
type i64 = i64;
type __m128i = [u8; 16]; type __m128i = [u8; 16];
fn get_lane___m128i_u16(&mut self, value: Self::__m128i, idx: u64) -> u16 { fn cast_sign_i16_u16(&mut self, value: Self::i16) -> Self::u16 {
let first = value[(idx * 2) as usize]; value as _
let second = value[(idx * 2 + 1) as usize];
// todo: le? be?
((first << 8) as u16) | (second as u16)
} }
fn get_lane___m128i_i16(&mut self, value: Self::__m128i, idx: u64) -> i16 { fn get_lane___m128i_u16(&mut self, value: Self::__m128i, idx: u64) -> Self::u16 {
let first = value[(idx * 2) as usize]; let first = value[(idx * 2 + 1) as usize];
let second = value[(idx * 2 + 1) as usize]; let second = value[(idx * 2) as usize];
// todo: le? be?
(((first << 8) as u16) | (second as u16)) as i16 ((first as u16) << 8) | (second as u16)
} }
fn set_lane___m128i_u8(&mut self, place: &mut Self::__m128i, idx: u64, value: u8) { fn get_lane___m128i_i16(&mut self, value: Self::__m128i, idx: u64) -> Self::i16 {
let first = value[(idx * 2 + 1) as usize];
let second = value[(idx * 2) as usize];
((((first as u16) << 8) as u16) | (second as u16)) as i16
}
fn set_lane___m128i_u8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u8) {
place[idx as usize] = value; place[idx as usize] = value;
} }
fn set_lane___m128i_i8(&mut self, place: &mut Self::__m128i, idx: u64, value: i8) { fn set_lane___m128i_i8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i8) {
place[idx as usize] = value as u8; place[idx as usize] = value as u8;
} }
fn saturate_u8(&mut self, elem: i16) -> u8 { fn set_lane___m128i_u16(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u16) {
let first = (value & 0xFF) as u8;
let second = (value >> 8) as u8;
place[(idx * 2) as usize] = first;
place[(idx * 2 + 1) as usize] = second;
}
fn saturate_u8(&mut self, elem: Self::i16) -> Self::u8 {
let clamp = elem.clamp(0, u8::MAX as i16); let clamp = elem.clamp(0, u8::MAX as i16);
clamp as u8 clamp as u8
} }
} }
pub trait Lanes<const N: usize, Elem> {}
mod soft_arch_types {
pub type __m128i = [u8; 16];
}
#[cfg(test)]
mod tests;

View file

@ -0,0 +1,34 @@
#[cfg(target_arch = "x86_64")]
mod x86_compare {
macro_rules! hard_soft_same_128 {
($($stmt:tt)*) => {
let soft = {
use crate::x86::soft_arch::*;
$($stmt)*
};
let hard = unsafe {
std::mem::transmute::<_, [u8; 16]>({
use core::arch::x86_64::*;
$($stmt)*
})
};
assert_eq!(soft, hard);
};
}
#[test]
fn _mm_setr_epi16() {
hard_soft_same_128! {
_mm_setr_epi16(0, -1, 100, 3535, 35, 2, i16::MIN, i16::MAX)
};
}
#[test]
fn _mm_packus_epi16() {
hard_soft_same_128! {
let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
_mm_packus_epi16(a, b)
}
}
}