mirror of
https://github.com/Noratrieb/intringen.git
synced 2026-01-16 06:35:03 +01:00
more pack
This commit is contained in:
parent
2f60340a3b
commit
9e4ce7913a
4 changed files with 131 additions and 6 deletions
|
|
@ -198,6 +198,7 @@ impl VariableType {
|
||||||
let (is_signed, elem_width) = match etype {
|
let (is_signed, elem_width) = match etype {
|
||||||
"SI8" => (true, 8),
|
"SI8" => (true, 8),
|
||||||
"SI16" => (true, 16),
|
"SI16" => (true, 16),
|
||||||
|
"SI32" => (true, 32),
|
||||||
"UI8" => (false, 8),
|
"UI8" => (false, 8),
|
||||||
"UI16" => (false, 16),
|
"UI16" => (false, 16),
|
||||||
_ => bail!("unknown element type: {etype}"),
|
_ => bail!("unknown element type: {etype}"),
|
||||||
|
|
|
||||||
|
|
@ -88,4 +88,10 @@ fn main() -> Result<()> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
const INTRINSICS_GENERATE: &[&str] = &["_mm_packus_epi16", "_mm_packs_epi16", "_mm_setr_epi16"];
|
const INTRINSICS_GENERATE: &[&str] = &[
|
||||||
|
"_mm_packus_epi16",
|
||||||
|
"_mm_packs_epi16",
|
||||||
|
"_mm_packus_epi32",
|
||||||
|
"_mm_packs_epi32",
|
||||||
|
"_mm_setr_epi16",
|
||||||
|
];
|
||||||
|
|
|
||||||
|
|
@ -84,6 +84,37 @@ pub trait Intrinsics: super::Core {
|
||||||
let __tmp = self.saturate8(__tmp);
|
let __tmp = self.saturate8(__tmp);
|
||||||
self.set_lane___m128i_i8(dst, 15u64, __tmp);
|
self.set_lane___m128i_i8(dst, 15u64, __tmp);
|
||||||
}
|
}
|
||||||
|
fn _mm_packs_epi32(
|
||||||
|
&mut self,
|
||||||
|
dst: &mut Self::__m128i,
|
||||||
|
a: Self::__m128i,
|
||||||
|
b: Self::__m128i,
|
||||||
|
) {
|
||||||
|
let __tmp = self.get_lane___m128i_i32(a, 0u64);
|
||||||
|
let __tmp = self.saturate16(__tmp);
|
||||||
|
self.set_lane___m128i_i16(dst, 0u64, __tmp);
|
||||||
|
let __tmp = self.get_lane___m128i_i32(a, 1u64);
|
||||||
|
let __tmp = self.saturate16(__tmp);
|
||||||
|
self.set_lane___m128i_i16(dst, 1u64, __tmp);
|
||||||
|
let __tmp = self.get_lane___m128i_i32(a, 2u64);
|
||||||
|
let __tmp = self.saturate16(__tmp);
|
||||||
|
self.set_lane___m128i_i16(dst, 2u64, __tmp);
|
||||||
|
let __tmp = self.get_lane___m128i_i32(a, 3u64);
|
||||||
|
let __tmp = self.saturate16(__tmp);
|
||||||
|
self.set_lane___m128i_i16(dst, 3u64, __tmp);
|
||||||
|
let __tmp = self.get_lane___m128i_i32(b, 0u64);
|
||||||
|
let __tmp = self.saturate16(__tmp);
|
||||||
|
self.set_lane___m128i_i16(dst, 4u64, __tmp);
|
||||||
|
let __tmp = self.get_lane___m128i_i32(b, 1u64);
|
||||||
|
let __tmp = self.saturate16(__tmp);
|
||||||
|
self.set_lane___m128i_i16(dst, 5u64, __tmp);
|
||||||
|
let __tmp = self.get_lane___m128i_i32(b, 2u64);
|
||||||
|
let __tmp = self.saturate16(__tmp);
|
||||||
|
self.set_lane___m128i_i16(dst, 6u64, __tmp);
|
||||||
|
let __tmp = self.get_lane___m128i_i32(b, 3u64);
|
||||||
|
let __tmp = self.saturate16(__tmp);
|
||||||
|
self.set_lane___m128i_i16(dst, 7u64, __tmp);
|
||||||
|
}
|
||||||
fn _mm_packus_epi16(
|
fn _mm_packus_epi16(
|
||||||
&mut self,
|
&mut self,
|
||||||
dst: &mut Self::__m128i,
|
dst: &mut Self::__m128i,
|
||||||
|
|
@ -139,6 +170,37 @@ pub trait Intrinsics: super::Core {
|
||||||
let __tmp = self.saturate_u8(__tmp);
|
let __tmp = self.saturate_u8(__tmp);
|
||||||
self.set_lane___m128i_u8(dst, 15u64, __tmp);
|
self.set_lane___m128i_u8(dst, 15u64, __tmp);
|
||||||
}
|
}
|
||||||
|
fn _mm_packus_epi32(
|
||||||
|
&mut self,
|
||||||
|
dst: &mut Self::__m128i,
|
||||||
|
a: Self::__m128i,
|
||||||
|
b: Self::__m128i,
|
||||||
|
) {
|
||||||
|
let __tmp = self.get_lane___m128i_i32(a, 0u64);
|
||||||
|
let __tmp = self.saturate_u16(__tmp);
|
||||||
|
self.set_lane___m128i_u16(dst, 0u64, __tmp);
|
||||||
|
let __tmp = self.get_lane___m128i_i32(a, 1u64);
|
||||||
|
let __tmp = self.saturate_u16(__tmp);
|
||||||
|
self.set_lane___m128i_u16(dst, 1u64, __tmp);
|
||||||
|
let __tmp = self.get_lane___m128i_i32(a, 2u64);
|
||||||
|
let __tmp = self.saturate_u16(__tmp);
|
||||||
|
self.set_lane___m128i_u16(dst, 2u64, __tmp);
|
||||||
|
let __tmp = self.get_lane___m128i_i32(a, 3u64);
|
||||||
|
let __tmp = self.saturate_u16(__tmp);
|
||||||
|
self.set_lane___m128i_u16(dst, 3u64, __tmp);
|
||||||
|
let __tmp = self.get_lane___m128i_i32(b, 0u64);
|
||||||
|
let __tmp = self.saturate_u16(__tmp);
|
||||||
|
self.set_lane___m128i_u16(dst, 4u64, __tmp);
|
||||||
|
let __tmp = self.get_lane___m128i_i32(b, 1u64);
|
||||||
|
let __tmp = self.saturate_u16(__tmp);
|
||||||
|
self.set_lane___m128i_u16(dst, 5u64, __tmp);
|
||||||
|
let __tmp = self.get_lane___m128i_i32(b, 2u64);
|
||||||
|
let __tmp = self.saturate_u16(__tmp);
|
||||||
|
self.set_lane___m128i_u16(dst, 6u64, __tmp);
|
||||||
|
let __tmp = self.get_lane___m128i_i32(b, 3u64);
|
||||||
|
let __tmp = self.saturate_u16(__tmp);
|
||||||
|
self.set_lane___m128i_u16(dst, 7u64, __tmp);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
pub mod soft_arch {
|
pub mod soft_arch {
|
||||||
pub use super::super::soft_arch_types::*;
|
pub use super::super::soft_arch_types::*;
|
||||||
|
|
@ -163,11 +225,21 @@ pub mod soft_arch {
|
||||||
super::super::ValueCore._mm_packs_epi16(&mut output, a, b);
|
super::super::ValueCore._mm_packs_epi16(&mut output, a, b);
|
||||||
output
|
output
|
||||||
}
|
}
|
||||||
|
pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
|
||||||
|
let mut output = unsafe { std::mem::zeroed() };
|
||||||
|
super::super::ValueCore._mm_packs_epi32(&mut output, a, b);
|
||||||
|
output
|
||||||
|
}
|
||||||
pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
|
pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
|
||||||
let mut output = unsafe { std::mem::zeroed() };
|
let mut output = unsafe { std::mem::zeroed() };
|
||||||
super::super::ValueCore._mm_packus_epi16(&mut output, a, b);
|
super::super::ValueCore._mm_packus_epi16(&mut output, a, b);
|
||||||
output
|
output
|
||||||
}
|
}
|
||||||
|
pub fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i {
|
||||||
|
let mut output = unsafe { std::mem::zeroed() };
|
||||||
|
super::super::ValueCore._mm_packus_epi32(&mut output, a, b);
|
||||||
|
output
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#[cfg(all(test, target_arch = "x86_64"))]
|
#[cfg(all(test, target_arch = "x86_64"))]
|
||||||
pub mod tests {
|
pub mod tests {
|
||||||
|
|
@ -190,13 +262,31 @@ pub mod tests {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn _mm_packus_epi16() {
|
fn _mm_packs_epi32() {
|
||||||
hard_soft_same_128! {
|
hard_soft_same_128! {
|
||||||
{ let a = _mm_setr_epi16(18077i16, 23617i16, - 9205i16, 21233i16, - 4332i16,
|
{ let a = _mm_setr_epi16(18077i16, 23617i16, - 9205i16, 21233i16, - 4332i16,
|
||||||
- 31339i16, 23623i16, - 22080i16); let b = _mm_setr_epi16(- 1436i16, -
|
- 31339i16, 23623i16, - 22080i16); let b = _mm_setr_epi16(- 1436i16, -
|
||||||
30227i16, 8629i16, 10922i16, - 16731i16, - 1013i16, - 14310i16, 2892i16);
|
30227i16, 8629i16, 10922i16, - 16731i16, - 1013i16, - 14310i16, 2892i16);
|
||||||
|
_mm_packs_epi32(a, b) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn _mm_packus_epi16() {
|
||||||
|
hard_soft_same_128! {
|
||||||
|
{ let a = _mm_setr_epi16(- 28568i16, 12614i16, 20103i16, 32412i16, -
|
||||||
|
28704i16, - 27930i16, 4197i16, 1829i16); let b = _mm_setr_epi16(9149i16,
|
||||||
|
18759i16, 30885i16, - 3879i16, 21600i16, 24454i16, 23524i16, 10765i16);
|
||||||
_mm_packus_epi16(a, b) }
|
_mm_packus_epi16(a, b) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#[test]
|
||||||
|
fn _mm_packus_epi32() {
|
||||||
|
hard_soft_same_128! {
|
||||||
|
{ let a = _mm_setr_epi16(32539i16, 26890i16, - 3892i16, 4386i16, 18704i16,
|
||||||
|
8253i16, - 29217i16, 32013i16); let b = _mm_setr_epi16(7448i16, 2172i16, -
|
||||||
|
14764i16, - 1068i16, - 25463i16, 21215i16, - 31392i16, - 14015i16);
|
||||||
|
_mm_packus_epi32(a, b) }
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -21,13 +21,18 @@ pub trait Core {
|
||||||
|
|
||||||
fn get_lane___m128i_u16(&mut self, value: Self::__m128i, idx: u64) -> Self::u16;
|
fn get_lane___m128i_u16(&mut self, value: Self::__m128i, idx: u64) -> Self::u16;
|
||||||
fn get_lane___m128i_i16(&mut self, value: Self::__m128i, idx: u64) -> Self::i16;
|
fn get_lane___m128i_i16(&mut self, value: Self::__m128i, idx: u64) -> Self::i16;
|
||||||
|
fn get_lane___m128i_u32(&mut self, value: Self::__m128i, idx: u64) -> Self::u32;
|
||||||
|
fn get_lane___m128i_i32(&mut self, value: Self::__m128i, idx: u64) -> Self::i32;
|
||||||
|
|
||||||
fn set_lane___m128i_u8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u8);
|
fn set_lane___m128i_u8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u8);
|
||||||
fn set_lane___m128i_i8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i8);
|
fn set_lane___m128i_i8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i8);
|
||||||
fn set_lane___m128i_u16(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u16);
|
fn set_lane___m128i_u16(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u16);
|
||||||
|
fn set_lane___m128i_i16(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i16);
|
||||||
|
|
||||||
fn saturate8(&mut self, elem: Self::i16) -> Self::i8;
|
fn saturate8(&mut self, elem: Self::i16) -> Self::i8;
|
||||||
fn saturate_u8(&mut self, elem: Self::i16) -> Self::u8;
|
fn saturate_u8(&mut self, elem: Self::i16) -> Self::u8;
|
||||||
|
fn saturate16(&mut self, elem: Self::i32) -> Self::i16;
|
||||||
|
fn saturate_u16(&mut self, elem: Self::i32) -> Self::u16;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct ValueCore;
|
pub struct ValueCore;
|
||||||
|
|
@ -57,10 +62,20 @@ impl Core for ValueCore {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_lane___m128i_i16(&mut self, value: Self::__m128i, idx: u64) -> Self::i16 {
|
fn get_lane___m128i_i16(&mut self, value: Self::__m128i, idx: u64) -> Self::i16 {
|
||||||
let first = value[(idx * 2 + 1) as usize];
|
self.get_lane___m128i_u16(value, idx) as i16
|
||||||
let second = value[(idx * 2) as usize];
|
}
|
||||||
|
|
||||||
((((first as u16) << 8) as u16) | (second as u16)) as i16
|
fn get_lane___m128i_u32(&mut self, value: Self::__m128i, idx: u64) -> Self::u32 {
|
||||||
|
let first = value[(idx * 4 + 3) as usize];
|
||||||
|
let second = value[(idx * 4 + 2) as usize];
|
||||||
|
let third = value[(idx * 4 + 1) as usize];
|
||||||
|
let fourth = value[(idx * 4) as usize];
|
||||||
|
|
||||||
|
((first as u32) << 24) | ((second as u32) << 16) | ((third as u32) << 8) | (fourth as u32)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_lane___m128i_i32(&mut self, value: Self::__m128i, idx: u64) -> Self::i32 {
|
||||||
|
self.get_lane___m128i_u32(value, idx) as i32
|
||||||
}
|
}
|
||||||
|
|
||||||
fn set_lane___m128i_u8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u8) {
|
fn set_lane___m128i_u8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u8) {
|
||||||
|
|
@ -68,7 +83,7 @@ impl Core for ValueCore {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn set_lane___m128i_i8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i8) {
|
fn set_lane___m128i_i8(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i8) {
|
||||||
place[idx as usize] = value as u8;
|
self.set_lane___m128i_u8(place, idx, value as u8);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn set_lane___m128i_u16(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u16) {
|
fn set_lane___m128i_u16(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::u16) {
|
||||||
|
|
@ -78,6 +93,10 @@ impl Core for ValueCore {
|
||||||
place[(idx * 2 + 1) as usize] = second;
|
place[(idx * 2 + 1) as usize] = second;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn set_lane___m128i_i16(&mut self, place: &mut Self::__m128i, idx: u64, value: Self::i16) {
|
||||||
|
self.set_lane___m128i_u16(place, idx, value as u16);
|
||||||
|
}
|
||||||
|
|
||||||
fn saturate8(&mut self, elem: Self::i16) -> Self::i8 {
|
fn saturate8(&mut self, elem: Self::i16) -> Self::i8 {
|
||||||
let clamp = elem.clamp(i8::MIN as i16, i8::MAX as i16);
|
let clamp = elem.clamp(i8::MIN as i16, i8::MAX as i16);
|
||||||
clamp as i8
|
clamp as i8
|
||||||
|
|
@ -87,6 +106,15 @@ impl Core for ValueCore {
|
||||||
let clamp = elem.clamp(0, u8::MAX as i16);
|
let clamp = elem.clamp(0, u8::MAX as i16);
|
||||||
clamp as u8
|
clamp as u8
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn saturate16(&mut self, elem: Self::i32) -> Self::i16 {
|
||||||
|
let clamp = elem.clamp(i16::MIN as i32, i16::MAX as i32);
|
||||||
|
clamp as i16
|
||||||
|
}
|
||||||
|
fn saturate_u16(&mut self, elem: Self::i32) -> Self::u16 {
|
||||||
|
let clamp = elem.clamp(0, u16::MAX as i32);
|
||||||
|
clamp as u16
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mod soft_arch_types {
|
mod soft_arch_types {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue