Skip to content

Commit 064d06a

Browse files
tvladyslavalexcrichton
authored andcommitted
[x86][sse4.1] Add pmin* instructions (#186)
1 parent 6d4af47 commit 064d06a

File tree

1 file changed

+123
-1
lines changed

1 file changed

+123
-1
lines changed

src/x86/sse41.rs

+123-1
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ pub unsafe fn _mm_insert_epi64(a: i64x2, i: i64, imm8: u8) -> i64x2 {
208208
a.replace((imm8 & 0b1) as u32, i)
209209
}
210210

211-
/// Compare packed 8-bit integers in `a` and `b`,87 and return packed maximum
211+
/// Compare packed 8-bit integers in `a` and `b` and return packed maximum
212212
/// values in dst.
213213
#[inline(always)]
214214
#[target_feature = "+sse4.1"]
@@ -244,6 +244,42 @@ pub unsafe fn _mm_max_epu32(a: u32x4, b: u32x4) -> u32x4 {
244244
pmaxud(a, b)
245245
}
246246

247+
/// Compare packed 8-bit integers in `a` and `b` and return packed minimum
248+
/// values in dst.
249+
#[inline(always)]
250+
#[target_feature = "+sse4.1"]
251+
#[cfg_attr(test, assert_instr(pminsb))]
252+
pub unsafe fn _mm_min_epi8(a: i8x16, b: i8x16) -> i8x16 {
253+
pminsb(a, b)
254+
}
255+
256+
/// Compare packed unsigned 16-bit integers in `a` and `b`, and return packed
257+
/// minimum.
258+
#[inline(always)]
259+
#[target_feature = "+sse4.1"]
260+
#[cfg_attr(test, assert_instr(pminuw))]
261+
pub unsafe fn _mm_min_epu16(a: u16x8, b: u16x8) -> u16x8 {
262+
pminuw(a, b)
263+
}
264+
265+
/// Compare packed 32-bit integers in `a` and `b`, and return packed minimum
266+
/// values.
267+
#[inline(always)]
268+
#[target_feature = "+sse4.1"]
269+
#[cfg_attr(test, assert_instr(pminsd))]
270+
pub unsafe fn _mm_min_epi32(a: i32x4, b: i32x4) -> i32x4 {
271+
pminsd(a, b)
272+
}
273+
274+
/// Compare packed unsigned 32-bit integers in `a` and `b`, and return packed
275+
/// minimum values.
276+
#[inline(always)]
277+
#[target_feature = "+sse4.1"]
278+
#[cfg_attr(test, assert_instr(pminud))]
279+
pub unsafe fn _mm_min_epu32(a: u32x4, b: u32x4) -> u32x4 {
280+
pminud(a, b)
281+
}
282+
247283
/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers
248284
/// using unsigned saturation
249285
#[inline(always)]
@@ -569,6 +605,14 @@ extern "C" {
569605
fn pmaxsd(a: i32x4, b: i32x4) -> i32x4;
570606
#[link_name = "llvm.x86.sse41.pmaxud"]
571607
fn pmaxud(a: u32x4, b: u32x4) -> u32x4;
608+
#[link_name = "llvm.x86.sse41.pminsb"]
609+
fn pminsb(a: i8x16, b: i8x16) -> i8x16;
610+
#[link_name = "llvm.x86.sse41.pminuw"]
611+
fn pminuw(a: u16x8, b: u16x8) -> u16x8;
612+
#[link_name = "llvm.x86.sse41.pminsd"]
613+
fn pminsd(a: i32x4, b: i32x4) -> i32x4;
614+
#[link_name = "llvm.x86.sse41.pminud"]
615+
fn pminud(a: u32x4, b: u32x4) -> u32x4;
572616
#[link_name = "llvm.x86.sse41.packusdw"]
573617
fn packusdw(a: i32x4, b: i32x4) -> u16x8;
574618
#[link_name = "llvm.x86.sse41.dppd"]
@@ -784,6 +828,84 @@ mod tests {
784828
assert_eq!(r, e);
785829
}
786830

831+
#[simd_test = "sse4.1"]
832+
unsafe fn _mm_min_epi8_1() {
833+
#[cfg_attr(rustfmt, rustfmt_skip)]
834+
let a = i8x16::new(
835+
1, 4, 5, 8, 9, 12, 13, 16,
836+
17, 20, 21, 24, 25, 28, 29, 32,
837+
);
838+
#[cfg_attr(rustfmt, rustfmt_skip)]
839+
let b = i8x16::new(
840+
2, 3, 6, 7, 10, 11, 14, 15,
841+
18, 19, 22, 23, 26, 27, 30, 31,
842+
);
843+
let r = sse41::_mm_min_epi8(a, b);
844+
#[cfg_attr(rustfmt, rustfmt_skip)]
845+
let e = i8x16::new(
846+
1, 3, 5, 7, 9, 11, 13, 15,
847+
17, 19, 21, 23, 25, 27, 29, 31,
848+
);
849+
assert_eq!(r, e);
850+
}
851+
852+
#[simd_test = "sse4.1"]
853+
unsafe fn _mm_min_epi8_2() {
854+
#[cfg_attr(rustfmt, rustfmt_skip)]
855+
let a = i8x16::new(
856+
1, -4, -5, 8, -9, -12, 13, -16,
857+
17, 20, 21, 24, 25, 28, 29, 32,
858+
);
859+
#[cfg_attr(rustfmt, rustfmt_skip)]
860+
let b = i8x16::new(
861+
2, -3, -6, 7, -10, -11, 14, -15,
862+
18, 19, 22, 23, 26, 27, 30, 31,
863+
);
864+
let r = sse41::_mm_min_epi8(a, b);
865+
#[cfg_attr(rustfmt, rustfmt_skip)]
866+
let e = i8x16::new(
867+
1, -4, -6, 7, -10, -12, 13, -16,
868+
17, 19, 21, 23, 25, 27, 29, 31,
869+
);
870+
assert_eq!(r, e);
871+
}
872+
873+
#[simd_test = "sse4.1"]
874+
unsafe fn _mm_min_epu16() {
875+
let a = u16x8::new(1, 4, 5, 8, 9, 12, 13, 16);
876+
let b = u16x8::new(2, 3, 6, 7, 10, 11, 14, 15);
877+
let r = sse41::_mm_min_epu16(a, b);
878+
let e = u16x8::new(1, 3, 5, 7, 9, 11, 13, 15);
879+
assert_eq!(r, e);
880+
}
881+
882+
#[simd_test = "sse4.1"]
883+
unsafe fn _mm_min_epi32_1() {
884+
let a = i32x4::new(1, 4, 5, 8);
885+
let b = i32x4::new(2, 3, 6, 7);
886+
let r = sse41::_mm_min_epi32(a, b);
887+
let e = i32x4::new(1, 3, 5, 7);
888+
assert_eq!(r, e);
889+
}
890+
891+
#[simd_test = "sse4.1"]
892+
unsafe fn _mm_min_epi32_2() {
893+
let a = i32x4::new(-1, 4, 5, -7);
894+
let b = i32x4::new(-2, 3, -6, 8);
895+
let r = sse41::_mm_min_epi32(a, b);
896+
let e = i32x4::new(-2, 3, -6, -7);
897+
assert_eq!(r, e);
898+
}
899+
900+
#[simd_test = "sse4.1"]
901+
unsafe fn _mm_min_epu32() {
902+
let a = u32x4::new(1, 4, 5, 8);
903+
let b = u32x4::new(2, 3, 6, 7);
904+
let r = sse41::_mm_min_epu32(a, b);
905+
let e = u32x4::new(1, 3, 5, 7);
906+
assert_eq!(r, e);
907+
}
908+
787909
#[simd_test = "sse4.1"]
788910
unsafe fn _mm_packus_epi32() {
789911
let a = i32x4::new(1, 2, 3, 4);

0 commit comments

Comments
 (0)