more pack

This commit is contained in:
nora 2024-01-06 00:03:23 +01:00
parent 2f60340a3b
commit 9e4ce7913a
4 changed files with 131 additions and 6 deletions

View file

@ -198,6 +198,7 @@ impl VariableType {
let (is_signed, elem_width) = match etype {
"SI8" => (true, 8),
"SI16" => (true, 16),
"SI32" => (true, 32),
"UI8" => (false, 8),
"UI16" => (false, 16),
_ => bail!("unknown element type: {etype}"),

View file

@ -88,4 +88,10 @@ fn main() -> Result<()> {
Ok(())
}
const INTRINSICS_GENERATE: &[&str] = &["_mm_packus_epi16", "_mm_packs_epi16", "_mm_setr_epi16"];
const INTRINSICS_GENERATE: &[&str] = &[
"_mm_packus_epi16",
"_mm_packs_epi16",
"_mm_packus_epi32",
"_mm_packs_epi32",
"_mm_setr_epi16",
];

View file

@ -84,6 +84,37 @@ pub trait Intrinsics: super::Core {
let __tmp = self.saturate8(__tmp);
self.set_lane___m128i_i8(dst, 15u64, __tmp);
}
fn _mm_packs_epi32(
&mut self,
dst: &mut Self::__m128i,
a: Self::__m128i,
b: Self::__m128i,
) {
let __tmp = self.get_lane___m128i_i32(a, 0u64);
let __tmp = self.saturate16(__tmp);
self.set_lane___m128i_i16(dst, 0u64, __tmp);
let __tmp = self.get_lane___m128i_i32(a, 1u64);
let __tmp = self.saturate16(__tmp);
self.set_lane___m128i_i16(dst, 1u64, __tmp);
let __tmp = self.get_lane___m128i_i32(a, 2u64);
let __tmp = self.saturate16(__tmp);
self.set_lane___m128i_i16(dst, 2u64, __tmp);
let __tmp = self.get_lane___m128i_i32(a, 3u64);
let __tmp = self.saturate16(__tmp);
self.set_lane___m128i_i16(dst, 3u64, __tmp);
let __tmp = self.get_lane___m128i_i32(b, 0u64);
let __tmp = self.saturate16(__tmp);
self.set_lane___m128i_i16(dst, 4u64, __tmp);
let __tmp = self.get_lane___m128i_i32(b, 1u64);
let __tmp = self.saturate16(__tmp);
self.set_lane___m128i_i16(dst, 5u64, __tmp);
let __tmp = self.get_lane___m128i_i32(b, 2u64);
let __tmp = self.saturate16(__tmp);
self.set_lane___m128i_i16(dst, 6u64, __tmp);
let __tmp = self.get_lane___m128i_i32(b, 3u64);
let __tmp = self.saturate16(__tmp);
self.set_lane___m128i_i16(dst, 7u64, __tmp);
}
fn _mm_packus_epi16(
&mut self,
dst: &mut Self::__m128i,
@ -139,6 +170,37 @@ pub trait Intrinsics: super::Core {
let __tmp = self.saturate_u8(__tmp);
self.set_lane___m128i_u8(dst, 15u64, __tmp);
}
fn _mm_packus_epi32(
&mut self,
dst: &mut Self::__m128i,
a: Self::__m128i,
b: Self::__m128i,
) {
let __tmp = self.get_lane___m128i_i32(a, 0u64);
let __tmp = self.saturate_u16(__tmp);
self.set_lane___m128i_u16(dst, 0u64, __tmp);
let __tmp = self.get_lane___m128i_i32(a, 1u64);
let __tmp = self.saturate_u16(__tmp);
self.set_lane___m128i_u16(dst, 1u64, __tmp);
let __tmp = self.get_lane___m128i_i32(a, 2u64);
let __tmp = self.saturate_u16(__tmp);
self.set_lane___m128i_u16(dst, 2u64, __tmp);
let __tmp = self.get_lane___m128i_i32(a, 3u64);
let __tmp = self.saturate_u16(__tmp);
self.set_lane___m128i_u16(dst, 3u64, __tmp);
let __tmp = self.get_lane___m128i_i32(b, 0u64);
let __tmp = self.saturate_u16(__tmp);
self.set_lane___m128i_u16(dst, 4u64, __tmp);
let __tmp = self.get_lane___m128i_i32(b, 1u64);
let __tmp = self.saturate_u16(__tmp);
self.set_lane___m128i_u16(dst, 5u64, __tmp);
let __tmp = self.get_lane___m128i_i32(b, 2u64);
let __tmp = self.saturate_u16(__tmp);
self.set_lane___m128i_u16(dst, 6u64, __tmp);
let __tmp = self.get_lane___m128i_i32(b, 3u64);
let __tmp = self.saturate_u16(__tmp);
self.set_lane___m128i_u16(dst, 7u64, __tmp);
}
}
pub mod soft_arch {
pub use super::super::soft_arch_types::*;
@ -163,11 +225,21 @@ pub mod soft_arch {
super::super::ValueCore._mm_packs_epi16(&mut output, a, b);
output
}
pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
let mut output = unsafe { std::mem::zeroed() };
super::super::ValueCore._mm_packs_epi32(&mut output, a, b);
output
}
pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
let mut output = unsafe { std::mem::zeroed() };
super::super::ValueCore._mm_packus_epi16(&mut output, a, b);
output
}
pub fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i {
let mut output = unsafe { std::mem::zeroed() };
super::super::ValueCore._mm_packus_epi32(&mut output, a, b);
output
}
}
#[cfg(all(test, target_arch = "x86_64"))]
pub mod tests {
@ -190,13 +262,31 @@ pub mod tests {
}
}
#[test]
fn _mm_packus_epi16() {
fn _mm_packs_epi32() {
hard_soft_same_128! {
{ let a = _mm_setr_epi16(18077i16, 23617i16, - 9205i16, 21233i16, - 4332i16,
- 31339i16, 23623i16, - 22080i16); let b = _mm_setr_epi16(- 1436i16, -
30227i16, 8629i16, 10922i16, - 16731i16, - 1013i16, - 14310i16, 2892i16);
_mm_packs_epi32(a, b) }
}
}
#[test]
fn _mm_packus_epi16() {
hard_soft_same_128! {
{ let a = _mm_setr_epi16(- 28568i16, 12614i16, 20103i16, 32412i16, -
28704i16, - 27930i16, 4197i16, 1829i16); let b = _mm_setr_epi16(9149i16,
18759i16, 30885i16, - 3879i16, 21600i16, 24454i16, 23524i16, 10765i16);
_mm_packus_epi16(a, b) }
}
}
#[test]
fn _mm_packus_epi32() {
hard_soft_same_128! {
{ let a = _mm_setr_epi16(32539i16, 26890i16, - 3892i16, 4386i16, 18704i16,
8253i16, - 29217i16, 32013i16); let b = _mm_setr_epi16(7448i16, 2172i16, -
14764i16, - 1068i16, - 25463i16, 21215i16, - 31392i16, - 14015i16);
_mm_packus_epi32(a, b) }
}
}
}

View file

@ -21,13 +21,18 @@ pub trait Core {
fn get_lane___m128i_u16(&mut self, value: Self::__m128i, idx: u64) -> Self::u16;
fn get_lane___m128i_i16(&mut self, value: Self::__m128i, idx: u64) -> Self::i16;
fn get_lane___m128i_u32(&mut self, value: Self::__m128i, idx: u64) -> Self::u32;
fn get_lane___m128i_i32(&mut self, value: Self::__m128i, idx: u64) -> Self::i32;
fn set_lane___m128i_u8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u8);
fn set_lane___m128i_i8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i8);
fn set_lane___m128i_u16(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u16);
fn set_lane___m128i_i16(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i16);
fn saturate8(&mut self, elem: Self::i16) -> Self::i8;
fn saturate_u8(&mut self, elem: Self::i16) -> Self::u8;
fn saturate16(&mut self, elem: Self::i32) -> Self::i16;
fn saturate_u16(&mut self, elem: Self::i32) -> Self::u16;
}
pub struct ValueCore;
@ -57,10 +62,20 @@ impl Core for ValueCore {
}
fn get_lane___m128i_i16(&mut self, value: Self::__m128i, idx: u64) -> Self::i16 {
let first = value[(idx * 2 + 1) as usize];
let second = value[(idx * 2) as usize];
self.get_lane___m128i_u16(value, idx) as i16
}
((((first as u16) << 8) as u16) | (second as u16)) as i16
fn get_lane___m128i_u32(&mut self, value: Self::__m128i, idx: u64) -> Self::u32 {
let first = value[(idx * 4 + 3) as usize];
let second = value[(idx * 4 + 2) as usize];
let third = value[(idx * 4 + 1) as usize];
let fourth = value[(idx * 4) as usize];
((first as u32) << 24) | ((second as u32) << 16) | ((third as u32) << 8) | (fourth as u32)
}
fn get_lane___m128i_i32(&mut self, value: Self::__m128i, idx: u64) -> Self::i32 {
self.get_lane___m128i_u32(value, idx) as i32
}
fn set_lane___m128i_u8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u8) {
@ -68,7 +83,7 @@ impl Core for ValueCore {
}
fn set_lane___m128i_i8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i8) {
place[idx as usize] = value as u8;
self.set_lane___m128i_u8(place, idx, value as u8);
}
fn set_lane___m128i_u16(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u16) {
@ -78,6 +93,10 @@ impl Core for ValueCore {
place[(idx * 2 + 1) as usize] = second;
}
fn set_lane___m128i_i16(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i16) {
self.set_lane___m128i_u16(place, idx, value as u16);
}
fn saturate8(&mut self, elem: Self::i16) -> Self::i8 {
let clamp = elem.clamp(i8::MIN as i16, i8::MAX as i16);
clamp as i8
@ -87,6 +106,15 @@ impl Core for ValueCore {
let clamp = elem.clamp(0, u8::MAX as i16);
clamp as u8
}
fn saturate16(&mut self, elem: Self::i32) -> Self::i16 {
let clamp = elem.clamp(i16::MIN as i32, i16::MAX as i32);
clamp as i16
}
fn saturate_u16(&mut self, elem: Self::i32) -> Self::u16 {
let clamp = elem.clamp(0, u16::MAX as i32);
clamp as u16
}
}
mod soft_arch_types {