Skip to content

Commit 6b96d90

Browse files
authored
Merge pull request #467 from Demindiro/memcmp-x86_64
2 parents b5065a0 + 22c06e4 commit 6b96d90

File tree

6 files changed

+182
-20
lines changed

6 files changed

+182
-20
lines changed

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#![feature(compiler_builtins)]
77
#![feature(core_ffi_c)]
88
#![feature(core_intrinsics)]
9+
#![feature(inline_const)]
910
#![feature(lang_items)]
1011
#![feature(linkage)]
1112
#![feature(naked_functions)]

src/mem/impls.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,3 +265,17 @@ pub unsafe fn set_bytes(mut s: *mut u8, c: u8, mut n: usize) {
265265
}
266266
set_bytes_bytes(s, c, n);
267267
}
268+
269+
#[inline(always)]
270+
pub unsafe fn compare_bytes(s1: *const u8, s2: *const u8, n: usize) -> i32 {
271+
let mut i = 0;
272+
while i < n {
273+
let a = *s1.add(i);
274+
let b = *s2.add(i);
275+
if a != b {
276+
return a as i32 - b as i32;
277+
}
278+
i += 1;
279+
}
280+
0
281+
}

src/mem/mod.rs

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -51,16 +51,7 @@ intrinsics! {
5151
#[mem_builtin]
5252
#[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")]
5353
pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 {
54-
let mut i = 0;
55-
while i < n {
56-
let a = *s1.add(i);
57-
let b = *s2.add(i);
58-
if a != b {
59-
return a as i32 - b as i32;
60-
}
61-
i += 1;
62-
}
63-
0
54+
impls::compare_bytes(s1, s2, n)
6455
}
6556

6657
#[mem_builtin]

src/mem/x86_64.rs

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
// feature is present at compile-time. We don't bother detecting other features.
1717
// Note that ERMSB does not enhance the backwards (DF=1) "rep movsb".
1818

19+
use core::intrinsics;
20+
use core::mem;
21+
1922
#[inline(always)]
2023
#[cfg(target_feature = "ermsb")]
2124
pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) {
@@ -98,3 +101,47 @@ pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) {
98101
options(att_syntax, nostack, preserves_flags)
99102
);
100103
}
104+
105+
#[inline(always)]
106+
pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 {
107+
#[inline(always)]
108+
unsafe fn cmp<T, U, F>(mut a: *const T, mut b: *const T, n: usize, f: F) -> i32
109+
where
110+
T: Clone + Copy + Eq,
111+
U: Clone + Copy + Eq,
112+
F: FnOnce(*const U, *const U, usize) -> i32,
113+
{
114+
// Ensure T is not a ZST.
115+
const { assert!(mem::size_of::<T>() != 0) };
116+
117+
let end = a.add(intrinsics::unchecked_div(n, mem::size_of::<T>()));
118+
while a != end {
119+
if a.read_unaligned() != b.read_unaligned() {
120+
return f(a.cast(), b.cast(), mem::size_of::<T>());
121+
}
122+
a = a.add(1);
123+
b = b.add(1);
124+
}
125+
f(
126+
a.cast(),
127+
b.cast(),
128+
intrinsics::unchecked_rem(n, mem::size_of::<T>()),
129+
)
130+
}
131+
let c1 = |mut a: *const u8, mut b: *const u8, n| {
132+
for _ in 0..n {
133+
if a.read() != b.read() {
134+
return i32::from(a.read()) - i32::from(b.read());
135+
}
136+
a = a.add(1);
137+
b = b.add(1);
138+
}
139+
0
140+
};
141+
let c2 = |a: *const u16, b, n| cmp(a, b, n, c1);
142+
let c4 = |a: *const u32, b, n| cmp(a, b, n, c2);
143+
let c8 = |a: *const u64, b, n| cmp(a, b, n, c4);
144+
let c16 = |a: *const u128, b, n| cmp(a, b, n, c8);
145+
let c32 = |a: *const [u128; 2], b, n| cmp(a, b, n, c16);
146+
c32(a.cast(), b.cast(), n)
147+
}

testcrate/benches/mem.rs

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,18 @@ fn memcmp_builtin(b: &mut Bencher, n: usize) {
9696
})
9797
}
9898

99+
fn memcmp_builtin_unaligned(b: &mut Bencher, n: usize) {
100+
let v1 = AlignedVec::new(0, n);
101+
let mut v2 = AlignedVec::new(0, n);
102+
v2[n - 1] = 1;
103+
b.bytes = n as u64;
104+
b.iter(|| {
105+
let s1: &[u8] = black_box(&v1[0..]);
106+
let s2: &[u8] = black_box(&v2[1..]);
107+
s1.cmp(s2)
108+
})
109+
}
110+
99111
fn memcmp_rust(b: &mut Bencher, n: usize) {
100112
let v1 = AlignedVec::new(0, n);
101113
let mut v2 = AlignedVec::new(0, n);
@@ -108,6 +120,18 @@ fn memcmp_rust(b: &mut Bencher, n: usize) {
108120
})
109121
}
110122

123+
fn memcmp_rust_unaligned(b: &mut Bencher, n: usize) {
124+
let v1 = AlignedVec::new(0, n);
125+
let mut v2 = AlignedVec::new(0, n);
126+
v2[n - 1] = 1;
127+
b.bytes = n as u64;
128+
b.iter(|| {
129+
let s1: &[u8] = black_box(&v1[0..]);
130+
let s2: &[u8] = black_box(&v2[1..]);
131+
unsafe { memcmp(s1.as_ptr(), s2.as_ptr(), n - 1) }
132+
})
133+
}
134+
111135
fn memmove_builtin(b: &mut Bencher, n: usize, offset: usize) {
112136
let mut v = AlignedVec::new(0, n + n / 2 + offset);
113137
b.bytes = n as u64;
@@ -209,6 +233,38 @@ fn memset_rust_1048576_offset(b: &mut Bencher) {
209233
memset_rust(b, 1048576, 65)
210234
}
211235

236+
#[bench]
237+
fn memcmp_builtin_8(b: &mut Bencher) {
238+
memcmp_builtin(b, 8)
239+
}
240+
#[bench]
241+
fn memcmp_rust_8(b: &mut Bencher) {
242+
memcmp_rust(b, 8)
243+
}
244+
#[bench]
245+
fn memcmp_builtin_16(b: &mut Bencher) {
246+
memcmp_builtin(b, 16)
247+
}
248+
#[bench]
249+
fn memcmp_rust_16(b: &mut Bencher) {
250+
memcmp_rust(b, 16)
251+
}
252+
#[bench]
253+
fn memcmp_builtin_32(b: &mut Bencher) {
254+
memcmp_builtin(b, 32)
255+
}
256+
#[bench]
257+
fn memcmp_rust_32(b: &mut Bencher) {
258+
memcmp_rust(b, 32)
259+
}
260+
#[bench]
261+
fn memcmp_builtin_64(b: &mut Bencher) {
262+
memcmp_builtin(b, 64)
263+
}
264+
#[bench]
265+
fn memcmp_rust_64(b: &mut Bencher) {
266+
memcmp_rust(b, 64)
267+
}
212268
#[bench]
213269
fn memcmp_builtin_4096(b: &mut Bencher) {
214270
memcmp_builtin(b, 4096)
@@ -225,6 +281,54 @@ fn memcmp_builtin_1048576(b: &mut Bencher) {
225281
fn memcmp_rust_1048576(b: &mut Bencher) {
226282
memcmp_rust(b, 1048576)
227283
}
284+
#[bench]
285+
fn memcmp_builtin_unaligned_7(b: &mut Bencher) {
286+
memcmp_builtin_unaligned(b, 8)
287+
}
288+
#[bench]
289+
fn memcmp_rust_unaligned_7(b: &mut Bencher) {
290+
memcmp_rust_unaligned(b, 8)
291+
}
292+
#[bench]
293+
fn memcmp_builtin_unaligned_15(b: &mut Bencher) {
294+
memcmp_builtin_unaligned(b, 16)
295+
}
296+
#[bench]
297+
fn memcmp_rust_unaligned_15(b: &mut Bencher) {
298+
memcmp_rust_unaligned(b, 16)
299+
}
300+
#[bench]
301+
fn memcmp_builtin_unaligned_31(b: &mut Bencher) {
302+
memcmp_builtin_unaligned(b, 32)
303+
}
304+
#[bench]
305+
fn memcmp_rust_unaligned_31(b: &mut Bencher) {
306+
memcmp_rust_unaligned(b, 32)
307+
}
308+
#[bench]
309+
fn memcmp_builtin_unaligned_63(b: &mut Bencher) {
310+
memcmp_builtin_unaligned(b, 64)
311+
}
312+
#[bench]
313+
fn memcmp_rust_unaligned_63(b: &mut Bencher) {
314+
memcmp_rust_unaligned(b, 64)
315+
}
316+
#[bench]
317+
fn memcmp_builtin_unaligned_4095(b: &mut Bencher) {
318+
memcmp_builtin_unaligned(b, 4096)
319+
}
320+
#[bench]
321+
fn memcmp_rust_unaligned_4095(b: &mut Bencher) {
322+
memcmp_rust_unaligned(b, 4096)
323+
}
324+
#[bench]
325+
fn memcmp_builtin_unaligned_1048575(b: &mut Bencher) {
326+
memcmp_builtin_unaligned(b, 1048576)
327+
}
328+
#[bench]
329+
fn memcmp_rust_unaligned_1048575(b: &mut Bencher) {
330+
memcmp_rust_unaligned(b, 1048576)
331+
}
228332

229333
#[bench]
230334
fn memmove_builtin_4096(b: &mut Bencher) {

testcrate/tests/mem.rs

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -116,21 +116,26 @@ fn memset_nonzero() {
116116

117117
#[test]
118118
fn memcmp_eq() {
119-
let arr1: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7];
120-
let arr2: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7];
121-
unsafe {
122-
assert_eq!(memcmp(arr1.as_ptr(), arr2.as_ptr(), 8), 0);
123-
assert_eq!(memcmp(arr1.as_ptr(), arr2.as_ptr(), 3), 0);
119+
let arr1 @ arr2 = gen_arr::<256>();
120+
for i in 0..256 {
121+
unsafe {
122+
assert_eq!(memcmp(arr1.0.as_ptr(), arr2.0.as_ptr(), i), 0);
123+
assert_eq!(memcmp(arr2.0.as_ptr(), arr1.0.as_ptr(), i), 0);
124+
}
124125
}
125126
}
126127

127128
#[test]
128129
fn memcmp_ne() {
129-
let arr1: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7];
130-
let arr2: [u8; 8] = [0, 1, 2, 3, 4, 5, 7, 7];
131-
unsafe {
132-
assert!(memcmp(arr1.as_ptr(), arr2.as_ptr(), 8) < 0);
133-
assert!(memcmp(arr2.as_ptr(), arr1.as_ptr(), 8) > 0);
130+
let arr1 @ arr2 = gen_arr::<256>();
131+
for i in 0..256 {
132+
let mut diff_arr = arr1;
133+
diff_arr.0[i] = 127;
134+
let expect = diff_arr.0[i].cmp(&arr2.0[i]);
135+
for k in i + 1..256 {
136+
let result = unsafe { memcmp(diff_arr.0.as_ptr(), arr2.0.as_ptr(), k) };
137+
assert_eq!(expect, result.cmp(&0));
138+
}
134139
}
135140
}
136141

0 commit comments

Comments
 (0)