mirror of
https://github.com/Noratrieb/intringen.git
synced 2026-01-14 13:55:02 +01:00
commit
This commit is contained in:
parent
f70bb0b3ab
commit
ef1aaa50ad
4 changed files with 201 additions and 117 deletions
|
|
@ -122,7 +122,9 @@ impl BlockBuilder {
|
||||||
fn generate_body_soft_arch(intr: &Intrinsic) -> Result<syn::Block> {
|
fn generate_body_soft_arch(intr: &Intrinsic) -> Result<syn::Block> {
|
||||||
let mut block = BlockBuilder::default();
|
let mut block = BlockBuilder::default();
|
||||||
|
|
||||||
block.stmts.push(syn::parse_quote! { let mut output = unsafe { std::mem::zeroed() }; });
|
block
|
||||||
|
.stmts
|
||||||
|
.push(syn::parse_quote! { let mut output = unsafe { std::mem::zeroed() }; });
|
||||||
|
|
||||||
let name = ident(&intr.name);
|
let name = ident(&intr.name);
|
||||||
|
|
||||||
|
|
@ -135,7 +137,9 @@ fn generate_body_soft_arch(intr: &Intrinsic) -> Result<syn::Block> {
|
||||||
super::super::ValueCore.#name(&mut output, #(#args),*);
|
super::super::ValueCore.#name(&mut output, #(#args),*);
|
||||||
});
|
});
|
||||||
|
|
||||||
block.stmts.push(syn::Stmt::Expr(syn::parse_quote! { output }, None));
|
block
|
||||||
|
.stmts
|
||||||
|
.push(syn::Stmt::Expr(syn::parse_quote! { output }, None));
|
||||||
let block = block.stmts;
|
let block = block.stmts;
|
||||||
Ok(syn::parse_quote! {
|
Ok(syn::parse_quote! {
|
||||||
{ #(#block)* }
|
{ #(#block)* }
|
||||||
|
|
@ -200,26 +204,32 @@ fn random_value(ty: &str, rng: &mut SmallRng) -> Result<syn::Expr> {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
struct VariableType {
|
#[derive(Clone, Copy, PartialEq, Debug)]
|
||||||
is_signed: bool,
|
enum Type {
|
||||||
rawtype_signed: bool,
|
Vector(VectorType),
|
||||||
elem_width: u64,
|
Scalar {
|
||||||
#[allow(dead_code)]
|
/// Some parameters have C types that are signed, while their `etype` is not.
|
||||||
full_width: u64,
|
c_is_signed: bool,
|
||||||
raw_type: String,
|
elemty: ElementType,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
impl VariableType {
|
#[derive(Clone, Copy, PartialEq, Debug)]
|
||||||
|
struct VectorType {
|
||||||
|
lanes: u64,
|
||||||
|
elem: ElementType,
|
||||||
|
raw_type: &'static str,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, PartialEq, Debug)]
|
||||||
|
struct ElementType {
|
||||||
|
is_signed: bool,
|
||||||
|
width: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Type {
|
||||||
fn of(etype: &str, ty: &str) -> Result<Self> {
|
fn of(etype: &str, ty: &str) -> Result<Self> {
|
||||||
let (rawtype_signed, full_width) = match map_type_to_rust(ty) {
|
let (etype_signed, etype_width) = match etype {
|
||||||
"__m128i" => (false, 128),
|
|
||||||
"i8" => (true, 8),
|
|
||||||
"i16" => (true, 16),
|
|
||||||
"i32" => (true, 32),
|
|
||||||
"i64" => (true, 64),
|
|
||||||
_ => bail!("unknown type: {ty}"),
|
|
||||||
};
|
|
||||||
let (is_signed, elem_width) = match etype {
|
|
||||||
"SI8" => (true, 8),
|
"SI8" => (true, 8),
|
||||||
"SI16" => (true, 16),
|
"SI16" => (true, 16),
|
||||||
"SI32" => (true, 32),
|
"SI32" => (true, 32),
|
||||||
|
|
@ -229,28 +239,65 @@ impl VariableType {
|
||||||
"UI64" => (false, 64),
|
"UI64" => (false, 64),
|
||||||
_ => bail!("unknown element type: {etype}"),
|
_ => bail!("unknown element type: {etype}"),
|
||||||
};
|
};
|
||||||
Ok(Self {
|
let elem = ElementType {
|
||||||
is_signed,
|
is_signed: etype_signed,
|
||||||
rawtype_signed,
|
width: etype_width,
|
||||||
full_width,
|
};
|
||||||
elem_width,
|
|
||||||
raw_type: map_type_to_rust(ty).to_owned(),
|
let scalar = |sign| Type::Scalar {
|
||||||
|
c_is_signed: sign,
|
||||||
|
elemty: elem,
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(match ty {
|
||||||
|
"__m128i" => Type::Vector(VectorType {
|
||||||
|
lanes: 128 / etype_width,
|
||||||
|
elem,
|
||||||
|
raw_type: "__m128i",
|
||||||
|
}),
|
||||||
|
"char" => scalar(true),
|
||||||
|
"short" => scalar(true),
|
||||||
|
"int" => scalar(true),
|
||||||
|
"__int64" => scalar(true),
|
||||||
|
_ => bail!("unknown type: {ty}"),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn rust_type(&self) -> String {
|
||||||
|
match self {
|
||||||
|
Type::Vector(v) => v.raw_type.to_owned(),
|
||||||
|
Type::Scalar { elemty, .. } => elemty.rust_type(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn expect_vector(&self) -> VectorType {
|
||||||
|
let Self::Vector(ty) = *self else {
|
||||||
|
panic!("expected vector, found scalar");
|
||||||
|
};
|
||||||
|
ty
|
||||||
|
}
|
||||||
|
fn expect_scalar(&self) -> ElementType {
|
||||||
|
let Self::Scalar { elemty, .. } = *self else {
|
||||||
|
panic!("expected scalar, found vector");
|
||||||
|
};
|
||||||
|
elemty
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ElementType {
|
||||||
fn rust_type(&self) -> String {
|
fn rust_type(&self) -> String {
|
||||||
let pre = if self.is_signed { 'i' } else { 'u' };
|
let pre = if self.is_signed { 'i' } else { 'u' };
|
||||||
format!("{pre}{}", self.elem_width)
|
format!("{pre}{}", self.width)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn generate_body(instr: &Intrinsic) -> Result<syn::Block> {
|
fn generate_body(instr: &Intrinsic) -> Result<syn::Block> {
|
||||||
let opstmts = parse_op(instr)?;
|
let opstmts = parse_op(instr)?;
|
||||||
|
|
||||||
let type_of_ident = |ident: &str| -> Result<VariableType> {
|
let type_of_ident = |ident: &str| -> Result<Type> {
|
||||||
for param in &instr.parameter {
|
for param in &instr.parameter {
|
||||||
if param.varname.as_deref() == Some(ident) {
|
if param.varname.as_deref() == Some(ident) {
|
||||||
return VariableType::of(
|
return Type::of(
|
||||||
param.etype.as_deref().ok_or_eyre("no param etype")?,
|
param.etype.as_deref().ok_or_eyre("no param etype")?,
|
||||||
param.r#type.as_deref().ok_or_eyre("no param type")?,
|
param.r#type.as_deref().ok_or_eyre("no param type")?,
|
||||||
);
|
);
|
||||||
|
|
@ -258,7 +305,7 @@ fn generate_body(instr: &Intrinsic) -> Result<syn::Block> {
|
||||||
}
|
}
|
||||||
|
|
||||||
if instr.ret.varname.as_deref() == Some(ident) {
|
if instr.ret.varname.as_deref() == Some(ident) {
|
||||||
return VariableType::of(
|
return Type::of(
|
||||||
instr.ret.etype.as_deref().ok_or_eyre("no param etype")?,
|
instr.ret.etype.as_deref().ok_or_eyre("no param etype")?,
|
||||||
instr.ret.r#type.as_deref().ok_or_eyre("no param type")?,
|
instr.ret.r#type.as_deref().ok_or_eyre("no param type")?,
|
||||||
);
|
);
|
||||||
|
|
@ -274,8 +321,8 @@ fn gen_idx(
|
||||||
method_prefix: &str,
|
method_prefix: &str,
|
||||||
lhs: Expr,
|
lhs: Expr,
|
||||||
idx: Expr,
|
idx: Expr,
|
||||||
type_of_ident: &impl Fn(&str) -> Result<VariableType>,
|
type_of_ident: &impl Fn(&str) -> Result<Type>,
|
||||||
) -> Result<(syn::Ident, syn::Ident, syn::Expr, VariableType)> {
|
) -> Result<(syn::Ident, syn::Ident, syn::Expr, VectorType)> {
|
||||||
let Expr::Ident(identifier) = lhs else {
|
let Expr::Ident(identifier) = lhs else {
|
||||||
bail!("lhs of indexing must be identifier");
|
bail!("lhs of indexing must be identifier");
|
||||||
};
|
};
|
||||||
|
|
@ -283,7 +330,7 @@ fn gen_idx(
|
||||||
bail!("idx argument must be range");
|
bail!("idx argument must be range");
|
||||||
};
|
};
|
||||||
|
|
||||||
let ty = type_of_ident(&identifier)?;
|
let ty = type_of_ident(&identifier)?.expect_vector();
|
||||||
|
|
||||||
let (lane_idx, size): (syn::Expr, _) = match (*left, *right) {
|
let (lane_idx, size): (syn::Expr, _) = match (*left, *right) {
|
||||||
(Expr::Int(high), Expr::Int(low)) => {
|
(Expr::Int(high), Expr::Int(low)) => {
|
||||||
|
|
@ -292,7 +339,7 @@ fn gen_idx(
|
||||||
}
|
}
|
||||||
let size = high - low + 1; // (inclusive)
|
let size = high - low + 1; // (inclusive)
|
||||||
|
|
||||||
let lane_idx = low / ty.elem_width;
|
let lane_idx = low / ty.elem.width;
|
||||||
|
|
||||||
(syn::parse_quote! { #lane_idx }, size)
|
(syn::parse_quote! { #lane_idx }, size)
|
||||||
}
|
}
|
||||||
|
|
@ -324,24 +371,23 @@ fn gen_idx(
|
||||||
if !size.is_power_of_two() {
|
if !size.is_power_of_two() {
|
||||||
bail!("indexing size must be power of two");
|
bail!("indexing size must be power of two");
|
||||||
}
|
}
|
||||||
if size != ty.elem_width {
|
if size != ty.elem.width {
|
||||||
bail!(
|
bail!(
|
||||||
"unsupported not-direct element indexing, size={size}, element size={}",
|
"unsupported not-direct element indexing, size={size}, element size={}",
|
||||||
ty.elem_width
|
ty.elem.width
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
let raw = &ty.raw_type;
|
|
||||||
let rust_type = ty.rust_type();
|
|
||||||
|
|
||||||
let identifier = ident(&identifier);
|
let identifier = ident(&identifier);
|
||||||
let method = ident(&format!("{method_prefix}_lane_{raw}_{rust_type}"));
|
let method = ident(&format!(
|
||||||
|
"{method_prefix}_lane_{}_{}",
|
||||||
|
ty.raw_type,
|
||||||
|
ty.elem.rust_type()
|
||||||
|
));
|
||||||
Ok((identifier, method, lane_idx, ty))
|
Ok((identifier, method, lane_idx, ty))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn gen_block(
|
fn gen_block(opstmts: Vec<Stmt>, type_of_ident: &impl Fn(&str) -> Result<Type>) -> Result<Block> {
|
||||||
opstmts: Vec<Stmt>,
|
|
||||||
type_of_ident: &impl Fn(&str) -> Result<VariableType>,
|
|
||||||
) -> Result<Block> {
|
|
||||||
let mut block = BlockBuilder::default();
|
let mut block = BlockBuilder::default();
|
||||||
|
|
||||||
for stmt in opstmts {
|
for stmt in opstmts {
|
||||||
|
|
@ -387,7 +433,9 @@ fn gen_block(
|
||||||
let body = gen_block(body, type_of_ident)?;
|
let body = gen_block(body, type_of_ident)?;
|
||||||
for_.body = body;
|
for_.body = body;
|
||||||
|
|
||||||
block.stmts.push(syn::Stmt::Expr(syn::Expr::ForLoop(for_), None));
|
block
|
||||||
|
.stmts
|
||||||
|
.push(syn::Stmt::Expr(syn::Expr::ForLoop(for_), None));
|
||||||
}
|
}
|
||||||
_ => todo!(),
|
_ => todo!(),
|
||||||
}
|
}
|
||||||
|
|
@ -398,13 +446,11 @@ fn gen_block(
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
type RustType = String;
|
|
||||||
|
|
||||||
fn gen_expr_tmp(
|
fn gen_expr_tmp(
|
||||||
block: &mut BlockBuilder,
|
block: &mut BlockBuilder,
|
||||||
expr: Expr,
|
expr: Expr,
|
||||||
type_of_ident: &impl Fn(&str) -> Result<VariableType>,
|
type_of_ident: &impl Fn(&str) -> Result<Type>,
|
||||||
) -> Result<(syn::Expr, Option<RustType>)> {
|
) -> Result<(syn::Expr, Option<Type>)> {
|
||||||
let tmp = |block: &mut BlockBuilder, inner: syn::Expr| {
|
let tmp = |block: &mut BlockBuilder, inner: syn::Expr| {
|
||||||
let var = block.tmp();
|
let var = block.tmp();
|
||||||
let stmt = syn::parse_quote! { let #var = #inner; };
|
let stmt = syn::parse_quote! { let #var = #inner; };
|
||||||
|
|
@ -418,12 +464,19 @@ fn gen_expr_tmp(
|
||||||
let ty = type_of_ident(&identifier);
|
let ty = type_of_ident(&identifier);
|
||||||
let identifier = ident(&identifier);
|
let identifier = ident(&identifier);
|
||||||
match ty {
|
match ty {
|
||||||
Ok(ty) if ty.is_signed != ty.rawtype_signed => {
|
Ok(Type::Scalar {
|
||||||
|
c_is_signed,
|
||||||
|
elemty,
|
||||||
|
}) if elemty.is_signed != c_is_signed => {
|
||||||
// intel intrinsics types kinda lie sometimes.
|
// intel intrinsics types kinda lie sometimes.
|
||||||
// _mm_setr_epi16 says the etype of the argument is UI16 (unsigned),
|
// _mm_setr_epi16 says the etype of the argument is UI16 (unsigned),
|
||||||
// while the actual type is short (signed). Do a cast to the etype, since we used that.
|
// while the actual type is short (signed). Do a cast to the etype, since we used that.
|
||||||
let from = &ty.raw_type;
|
let from = ElementType {
|
||||||
let to = ty.rust_type();
|
is_signed: c_is_signed,
|
||||||
|
width: elemty.width,
|
||||||
|
}
|
||||||
|
.rust_type();
|
||||||
|
let to = elemty.rust_type();
|
||||||
let method = ident(&format!("cast_sign_{from}_{to}"));
|
let method = ident(&format!("cast_sign_{from}_{to}"));
|
||||||
(
|
(
|
||||||
tmp(block, syn::parse_quote! { self.#method(#identifier) }),
|
tmp(block, syn::parse_quote! { self.#method(#identifier) }),
|
||||||
|
|
@ -439,20 +492,26 @@ fn gen_expr_tmp(
|
||||||
block,
|
block,
|
||||||
syn::parse_quote! { self.#method(#identifier, #lane_idx) },
|
syn::parse_quote! { self.#method(#identifier, #lane_idx) },
|
||||||
);
|
);
|
||||||
(expr, Some(ty.rust_type()))
|
(
|
||||||
|
expr,
|
||||||
|
Some(Type::Scalar {
|
||||||
|
c_is_signed: ty.elem.is_signed,
|
||||||
|
elemty: ty.elem,
|
||||||
|
}),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
Expr::Range { .. } => todo!(),
|
Expr::Range { .. } => todo!(),
|
||||||
Expr::Call { function, args } => {
|
Expr::Call { function, args } => {
|
||||||
let (args, arg_tys): (Vec<_>, Vec<_>) = args
|
let (args, arg_tys): (Vec<_>, Vec<_>) = args
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|arg| gen_expr_tmp(block, arg, type_of_ident))
|
.map(|arg| gen_expr_tmp(block, arg, type_of_ident))
|
||||||
.collect::<Result<Vec<(syn::Expr, Option<RustType>)>>>()?
|
.collect::<Result<Vec<(syn::Expr, _)>>>()?
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.unzip();
|
.unzip();
|
||||||
|
|
||||||
let argtype = arg_tys
|
let argtype = arg_tys
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|argty| argty.expect("argument type unknown for polymorphic function"))
|
.map(|argty| argty.unwrap().rust_type())
|
||||||
.collect::<Vec<_>>()
|
.collect::<Vec<_>>()
|
||||||
.join("_");
|
.join("_");
|
||||||
|
|
||||||
|
|
@ -463,10 +522,7 @@ fn gen_expr_tmp(
|
||||||
};
|
};
|
||||||
|
|
||||||
let function = ident(&heck::ToSnekCase::to_snek_case(function.as_str()));
|
let function = ident(&heck::ToSnekCase::to_snek_case(function.as_str()));
|
||||||
let expr = tmp(
|
let expr = tmp(block, syn::parse_quote! { self.#function( #(#args),* ) });
|
||||||
block,
|
|
||||||
syn::parse_quote! { self.#function( #(#args),* ) },
|
|
||||||
);
|
|
||||||
(expr, None)
|
(expr, None)
|
||||||
}
|
}
|
||||||
Expr::BinaryOp { op, lhs, rhs } => {
|
Expr::BinaryOp { op, lhs, rhs } => {
|
||||||
|
|
@ -479,6 +535,7 @@ fn gen_expr_tmp(
|
||||||
|
|
||||||
let expr = match &lhs_ty {
|
let expr = match &lhs_ty {
|
||||||
// probably a rust primitive operation
|
// probably a rust primitive operation
|
||||||
|
// this is extremely wonky....., but rustc typeck will complain if we get this wrong
|
||||||
None => {
|
None => {
|
||||||
let token = match op {
|
let token = match op {
|
||||||
BinaryOpKind::Add => quote::quote! { + },
|
BinaryOpKind::Add => quote::quote! { + },
|
||||||
|
|
@ -491,8 +548,19 @@ fn gen_expr_tmp(
|
||||||
BinaryOpKind::Add => "add",
|
BinaryOpKind::Add => "add",
|
||||||
BinaryOpKind::Mul => "mul",
|
BinaryOpKind::Mul => "mul",
|
||||||
};
|
};
|
||||||
let method = ident(&format!("{prefix}_{ty}"));
|
|
||||||
tmp(block, syn::parse_quote! { self.#method(#lhs, #rhs) })
|
let ty = ty.expect_scalar();
|
||||||
|
let method = ident(&format!(
|
||||||
|
"ext_{}_{}64",
|
||||||
|
ty.rust_type(),
|
||||||
|
if ty.is_signed { "s" } else { "u" }
|
||||||
|
));
|
||||||
|
let lhs_ext = tmp(block, syn::parse_quote! { self.#method(#lhs) });
|
||||||
|
let rhs_ext = tmp(block, syn::parse_quote! { self.#method(#rhs) });
|
||||||
|
|
||||||
|
// TODO: EXTEND
|
||||||
|
let method = ident(&format!("{prefix}_64"));
|
||||||
|
tmp(block, syn::parse_quote! { self.#method(#lhs_ext, #rhs_ext) })
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -96,7 +96,7 @@ const INTRINSICS_GENERATE: &[&str] = &[
|
||||||
"_mm_add_epi32",
|
"_mm_add_epi32",
|
||||||
"_mm_add_epi64",
|
"_mm_add_epi64",
|
||||||
// todo: float and __m64 stuff
|
// todo: float and __m64 stuff
|
||||||
//"_mm_adds_epi16",
|
"_mm_adds_epi16",
|
||||||
//"_mm_adds_epi8",
|
//"_mm_adds_epi8",
|
||||||
//"_mm_adds_epu16",
|
//"_mm_adds_epu16",
|
||||||
//"_mm_adds_epu8",
|
//"_mm_adds_epu8",
|
||||||
|
|
|
||||||
|
|
@ -11,8 +11,10 @@ pub trait Intrinsics: super::Core {
|
||||||
let i = (j * 16u64);
|
let i = (j * 16u64);
|
||||||
let __tmp0 = self.get_lane___m128i_u16(a, (i / 16u64));
|
let __tmp0 = self.get_lane___m128i_u16(a, (i / 16u64));
|
||||||
let __tmp1 = self.get_lane___m128i_u16(b, (i / 16u64));
|
let __tmp1 = self.get_lane___m128i_u16(b, (i / 16u64));
|
||||||
let __tmp2 = self.add_u16(__tmp0, __tmp1);
|
let __tmp2 = self.ext_u16_u64(__tmp0);
|
||||||
self.set_lane___m128i_u16(dst, (i / 16u64), __tmp2);
|
let __tmp3 = self.ext_u16_u64(__tmp1);
|
||||||
|
let __tmp4 = self.add_64(__tmp2, __tmp3);
|
||||||
|
self.set_lane___m128i_u16(dst, (i / 16u64), __tmp4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn _mm_add_epi32(
|
fn _mm_add_epi32(
|
||||||
|
|
@ -25,8 +27,10 @@ pub trait Intrinsics: super::Core {
|
||||||
let i = (j * 32u64);
|
let i = (j * 32u64);
|
||||||
let __tmp0 = self.get_lane___m128i_u32(a, (i / 32u64));
|
let __tmp0 = self.get_lane___m128i_u32(a, (i / 32u64));
|
||||||
let __tmp1 = self.get_lane___m128i_u32(b, (i / 32u64));
|
let __tmp1 = self.get_lane___m128i_u32(b, (i / 32u64));
|
||||||
let __tmp2 = self.add_u32(__tmp0, __tmp1);
|
let __tmp2 = self.ext_u32_u64(__tmp0);
|
||||||
self.set_lane___m128i_u32(dst, (i / 32u64), __tmp2);
|
let __tmp3 = self.ext_u32_u64(__tmp1);
|
||||||
|
let __tmp4 = self.add_64(__tmp2, __tmp3);
|
||||||
|
self.set_lane___m128i_u32(dst, (i / 32u64), __tmp4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn _mm_add_epi64(
|
fn _mm_add_epi64(
|
||||||
|
|
@ -39,8 +43,27 @@ pub trait Intrinsics: super::Core {
|
||||||
let i = (j * 64u64);
|
let i = (j * 64u64);
|
||||||
let __tmp0 = self.get_lane___m128i_u64(a, (i / 64u64));
|
let __tmp0 = self.get_lane___m128i_u64(a, (i / 64u64));
|
||||||
let __tmp1 = self.get_lane___m128i_u64(b, (i / 64u64));
|
let __tmp1 = self.get_lane___m128i_u64(b, (i / 64u64));
|
||||||
let __tmp2 = self.add_u64(__tmp0, __tmp1);
|
let __tmp2 = self.ext_u64_u64(__tmp0);
|
||||||
self.set_lane___m128i_u64(dst, (i / 64u64), __tmp2);
|
let __tmp3 = self.ext_u64_u64(__tmp1);
|
||||||
|
let __tmp4 = self.add_64(__tmp2, __tmp3);
|
||||||
|
self.set_lane___m128i_u64(dst, (i / 64u64), __tmp4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fn _mm_adds_epi16(
|
||||||
|
&mut self,
|
||||||
|
dst: &mut Self::__m128i,
|
||||||
|
a: Self::__m128i,
|
||||||
|
b: Self::__m128i,
|
||||||
|
) {
|
||||||
|
for j in 0u64..=7u64 {
|
||||||
|
let i = (j * 16u64);
|
||||||
|
let __tmp0 = self.get_lane___m128i_i16(a, (i / 16u64));
|
||||||
|
let __tmp1 = self.get_lane___m128i_i16(b, (i / 16u64));
|
||||||
|
let __tmp2 = self.ext_i16_s64(__tmp0);
|
||||||
|
let __tmp3 = self.ext_i16_s64(__tmp1);
|
||||||
|
let __tmp4 = self.add_64(__tmp2, __tmp3);
|
||||||
|
let __tmp5 = self.saturate16(__tmp4);
|
||||||
|
self.set_lane___m128i_u16(dst, (i / 16u64), __tmp5);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn _mm_set_epi64x(&mut self, dst: &mut Self::__m128i, e1: Self::i64, e0: Self::i64) {
|
fn _mm_set_epi64x(&mut self, dst: &mut Self::__m128i, e1: Self::i64, e0: Self::i64) {
|
||||||
|
|
@ -363,6 +386,11 @@ pub mod soft_arch {
|
||||||
super::super::ValueCore._mm_add_epi64(&mut output, a, b);
|
super::super::ValueCore._mm_add_epi64(&mut output, a, b);
|
||||||
output
|
output
|
||||||
}
|
}
|
||||||
|
pub fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
|
||||||
|
let mut output = unsafe { std::mem::zeroed() };
|
||||||
|
super::super::ValueCore._mm_adds_epi16(&mut output, a, b);
|
||||||
|
output
|
||||||
|
}
|
||||||
pub fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
|
pub fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
|
||||||
let mut output = unsafe { std::mem::zeroed() };
|
let mut output = unsafe { std::mem::zeroed() };
|
||||||
super::super::ValueCore._mm_set_epi64x(&mut output, e1, e0);
|
super::super::ValueCore._mm_set_epi64x(&mut output, e1, e0);
|
||||||
|
|
@ -496,92 +524,101 @@ pub mod tests {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
|
fn _mm_adds_epi16() {
|
||||||
|
hard_soft_same_128! {
|
||||||
|
{ let a = _mm_setr_epi16(9149i16, 18759i16, 30885i16, - 3879i16, 21600i16,
|
||||||
|
24454i16, 23524i16, 10765i16); let b = _mm_setr_epi16(32539i16, 26890i16, -
|
||||||
|
3892i16, 4386i16, 18704i16, 8253i16, - 29217i16, 32013i16); _mm_adds_epi16(a,
|
||||||
|
b) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
fn _mm_set_epi64x() {
|
fn _mm_set_epi64x() {
|
||||||
hard_soft_same_128! {
|
hard_soft_same_128! {
|
||||||
{ let e1 = - 1407335585757566417i64; let e0 = 6810649108177377822i64;
|
{ let e1 = - 589376611403916251i64; let e0 = 3902096933100612535i64;
|
||||||
_mm_set_epi64x(e1, e0) }
|
_mm_set_epi64x(e1, e0) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn _mm_setr_epi32() {
|
fn _mm_setr_epi32() {
|
||||||
hard_soft_same_128! {
|
hard_soft_same_128! {
|
||||||
{ let e3 = 1012103333i32; let e2 = - 1086525223i32; let e1 = - 1399630752i32;
|
{ let e3 = 1973077588i32; let e2 = 650443732i32; let e1 = - 2133091191i32;
|
||||||
let e0 = - 395616378i32; _mm_setr_epi32(e3, e2, e1, e0) }
|
let e0 = - 352824609i32; _mm_setr_epi32(e3, e2, e1, e0) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn _mm_setr_epi16() {
|
fn _mm_setr_epi16() {
|
||||||
hard_soft_same_128! {
|
hard_soft_same_128! {
|
||||||
{ let e7 = 23524i16; let e6 = 10765i16; let e5 = 32539i16; let e4 = 26890i16;
|
{ let e7 = - 31392i16; let e6 = - 14015i16; let e5 = - 32565i16; let e4 = -
|
||||||
let e3 = - 3892i16; let e2 = 4386i16; let e1 = 18704i16; let e0 = 8253i16;
|
11312i16; let e3 = - 4934i16; let e2 = - 19283i16; let e1 = - 27533i16; let
|
||||||
_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) }
|
e0 = - 9939i16; _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn _mm_setr_epi8() {
|
fn _mm_setr_epi8() {
|
||||||
hard_soft_same_128! {
|
hard_soft_same_128! {
|
||||||
{ let e15 = - 33i8; let e14 = 13i8; let e13 = 24i8; let e12 = 124i8; let e11
|
{ let e15 = - 46i8; let e14 = - 46i8; let e13 = - 125i8; let e12 = 81i8; let
|
||||||
= 84i8; let e10 = - 44i8; let e9 = - 119i8; let e8 = - 33i8; let e7 = 96i8;
|
e11 = - 56i8; let e10 = - 75i8; let e9 = 54i8; let e8 = 109i8; let e7 = 29i8;
|
||||||
let e6 = 65i8; let e5 = - 53i8; let e4 = - 48i8; let e3 = - 70i8; let e2 = -
|
let e6 = 41i8; let e5 = - 21i8; let e4 = 39i8; let e3 = 89i8; let e2 = -
|
||||||
83i8; let e1 = 115i8; let e0 = 45i8; _mm_setr_epi8(e15, e14, e13, e12, e11,
|
36i8; let e1 = - 88i8; let e0 = 11i8; _mm_setr_epi8(e15, e14, e13, e12, e11,
|
||||||
e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) }
|
e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn _mm_packs_epi16() {
|
fn _mm_packs_epi16() {
|
||||||
hard_soft_same_128! {
|
hard_soft_same_128! {
|
||||||
{ let a = _mm_setr_epi16(- 9518i16, - 29742i16, 10115i16, 1617i16, 13256i16,
|
{ let a = _mm_setr_epi16(6572i16, - 54i16, 10431i16, - 4614i16, - 1911i16,
|
||||||
- 2379i16, 19254i16, 7533i16); let b = _mm_setr_epi16(- 17891i16, 30761i16,
|
17046i16, - 12772i16, - 28109i16); let b = _mm_setr_epi16(7409i16, -
|
||||||
2539i16, 4135i16, 26713i16, 16348i16, - 21336i16, 3595i16);
|
30136i16, - 28607i16, - 1975i16, 23451i16, - 32657i16, - 28920i16, -
|
||||||
_mm_packs_epi16(a, b) }
|
2519i16); _mm_packs_epi16(a, b) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn _mm_packs_epi32() {
|
fn _mm_packs_epi32() {
|
||||||
hard_soft_same_128! {
|
hard_soft_same_128! {
|
||||||
{ let a = _mm_setr_epi16(6572i16, - 54i16, 10431i16, - 4614i16, - 1911i16,
|
{ let a = _mm_setr_epi16(- 7284i16, 7023i16, - 31688i16, 4770i16, 28846i16, -
|
||||||
17046i16, - 12772i16, - 28109i16); let b = _mm_setr_epi16(7409i16, -
|
13549i16, 13781i16, - 10474i16); let b = _mm_setr_epi16(12050i16, - 782i16,
|
||||||
30136i16, - 28607i16, - 1975i16, 23451i16, - 32657i16, - 28920i16, -
|
8840i16, 8344i16, 9169i16, 303i16, - 6879i16, - 28778i16); _mm_packs_epi32(a,
|
||||||
2519i16); _mm_packs_epi32(a, b) }
|
b) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn _mm_packus_epi16() {
|
fn _mm_packus_epi16() {
|
||||||
hard_soft_same_128! {
|
hard_soft_same_128! {
|
||||||
{ let a = _mm_setr_epi16(- 7284i16, 7023i16, - 31688i16, 4770i16, 28846i16, -
|
{ let a = _mm_setr_epi16(- 11301i16, 10802i16, 18689i16, 12867i16, 18892i16,
|
||||||
13549i16, 13781i16, - 10474i16); let b = _mm_setr_epi16(12050i16, - 782i16,
|
20484i16, - 4754i16, - 28358i16); let b = _mm_setr_epi16(27422i16, -
|
||||||
8840i16, 8344i16, 9169i16, 303i16, - 6879i16, - 28778i16);
|
14791i16, - 32685i16, - 4504i16, - 19709i16, 1090i16, 1898i16, 11224i16);
|
||||||
_mm_packus_epi16(a, b) }
|
_mm_packus_epi16(a, b) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn _mm_packus_epi32() {
|
fn _mm_packus_epi32() {
|
||||||
hard_soft_same_128! {
|
hard_soft_same_128! {
|
||||||
{ let a = _mm_setr_epi16(- 11301i16, 10802i16, 18689i16, 12867i16, 18892i16,
|
{ let a = _mm_setr_epi16(27569i16, 26879i16, 11743i16, 1055i16, 5327i16, -
|
||||||
20484i16, - 4754i16, - 28358i16); let b = _mm_setr_epi16(27422i16, -
|
1490i16, - 6436i16, 1056i16); let b = _mm_setr_epi16(- 16744i16, 28829i16,
|
||||||
14791i16, - 32685i16, - 4504i16, - 19709i16, 1090i16, 1898i16, 11224i16);
|
23772i16, - 31202i16, 9764i16, 16146i16, 29119i16, 1909i16);
|
||||||
_mm_packus_epi32(a, b) }
|
_mm_packus_epi32(a, b) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn _mm_abs_epi8() {
|
fn _mm_abs_epi8() {
|
||||||
hard_soft_same_128! {
|
hard_soft_same_128! {
|
||||||
{ let a = _mm_setr_epi16(27569i16, 26879i16, 11743i16, 1055i16, 5327i16, -
|
{ let a = _mm_setr_epi16(- 4803i16, - 23533i16, - 22862i16, - 25389i16, -
|
||||||
1490i16, - 6436i16, 1056i16); _mm_abs_epi8(a) }
|
16117i16, - 21476i16, 30010i16, - 15743i16); _mm_abs_epi8(a) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn _mm_abs_epi16() {
|
fn _mm_abs_epi16() {
|
||||||
hard_soft_same_128! {
|
hard_soft_same_128! {
|
||||||
{ let a = _mm_setr_epi16(- 16744i16, 28829i16, 23772i16, - 31202i16, 9764i16,
|
{ let a = _mm_setr_epi16(- 20689i16, - 11653i16, 22142i16, - 16597i16,
|
||||||
16146i16, 29119i16, 1909i16); _mm_abs_epi16(a) }
|
28514i16, - 15735i16, - 6977i16, - 5493i16); _mm_abs_epi16(a) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn _mm_abs_epi32() {
|
fn _mm_abs_epi32() {
|
||||||
hard_soft_same_128! {
|
hard_soft_same_128! {
|
||||||
{ let a = _mm_setr_epi16(- 4803i16, - 23533i16, - 22862i16, - 25389i16, -
|
{ let a = _mm_setr_epi16(17059i16, 15712i16, 32305i16, - 23877i16, 29411i16,
|
||||||
16117i16, - 21476i16, 30010i16, - 15743i16); _mm_abs_epi32(a) }
|
- 3868i16, - 10128i16, 25298i16); _mm_abs_epi32(a) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -44,11 +44,6 @@ pub trait Core {
|
||||||
fn saturate_u8(&mut self, elem: Self::i16) -> Self::u8;
|
fn saturate_u8(&mut self, elem: Self::i16) -> Self::u8;
|
||||||
fn saturate16(&mut self, elem: Self::i32) -> Self::i16;
|
fn saturate16(&mut self, elem: Self::i32) -> Self::i16;
|
||||||
fn saturate_u16(&mut self, elem: Self::i32) -> Self::u16;
|
fn saturate_u16(&mut self, elem: Self::i32) -> Self::u16;
|
||||||
fn abs_i8(&mut self, elem: Self::i8) -> Self::u8;
|
|
||||||
fn abs_i16(&mut self, elem: Self::i16) -> Self::u16;
|
|
||||||
fn abs_i32(&mut self, elem: Self::i32) -> Self::u32;
|
|
||||||
fn add_u16(&mut self, lhs: Self::u16, rhs: Self::u16) -> Self::u16;
|
|
||||||
fn add_u32(&mut self, lhs: Self::u32, rhs: Self::u32) -> Self::u32;
|
|
||||||
fn add_u64(&mut self, lhs: Self::u64, rhs: Self::u64) -> Self::u64;
|
fn add_u64(&mut self, lhs: Self::u64, rhs: Self::u64) -> Self::u64;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -86,7 +81,6 @@ impl Core for ValueCore {
|
||||||
}
|
}
|
||||||
|
|
||||||
////// GET LANE
|
////// GET LANE
|
||||||
|
|
||||||
fn get_lane___m128i_u8(&mut self, value: Self::__m128i, idx: u64) -> Self::u8 {
|
fn get_lane___m128i_u8(&mut self, value: Self::__m128i, idx: u64) -> Self::u8 {
|
||||||
value[idx as usize]
|
value[idx as usize]
|
||||||
}
|
}
|
||||||
|
|
@ -200,21 +194,6 @@ impl Core for ValueCore {
|
||||||
let clamp = elem.clamp(0, u16::MAX as i32);
|
let clamp = elem.clamp(0, u16::MAX as i32);
|
||||||
clamp as u16
|
clamp as u16
|
||||||
}
|
}
|
||||||
fn abs_i8(&mut self, elem: Self::i8) -> Self::u8 {
|
|
||||||
elem.abs() as u8
|
|
||||||
}
|
|
||||||
fn abs_i16(&mut self, elem: Self::i16) -> Self::u16 {
|
|
||||||
elem.abs() as u16
|
|
||||||
}
|
|
||||||
fn abs_i32(&mut self, elem: Self::i32) -> Self::u32 {
|
|
||||||
elem.abs() as u32
|
|
||||||
}
|
|
||||||
fn add_u16(&mut self, lhs: Self::u16, rhs: Self::u16) -> Self::u16 {
|
|
||||||
lhs.wrapping_add(rhs)
|
|
||||||
}
|
|
||||||
fn add_u32(&mut self, lhs: Self::u32, rhs: Self::u32) -> Self::u32 {
|
|
||||||
lhs.wrapping_add(rhs)
|
|
||||||
}
|
|
||||||
fn add_u64(&mut self, lhs: Self::u64, rhs: Self::u64) -> Self::u64 {
|
fn add_u64(&mut self, lhs: Self::u64, rhs: Self::u64) -> Self::u64 {
|
||||||
lhs.wrapping_add(rhs)
|
lhs.wrapping_add(rhs)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue