Skip to content

Commit 0df4359

Browse files
newpavlovalexcrichton
authored andcommitted
CLMUL instruction set (#320)
* added pclmul * added docs * pclmul -> pclmulqdq * imm8: u8 -> imm8: i32 * return changes to stdsimd/arch/detect/x86.rs * error fixes * added rustc_args_required_const * fixed assert_instr for _mm_clmulepi64_si128 * fixed pclmul assert_instr tests
1 parent 80a524d commit 0df4359

File tree

3 files changed

+81
-0
lines changed

3 files changed

+81
-0
lines changed

coresimd/x86/i686/mod.rs

+3
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ pub use self::aes::*;
66
mod mmx;
77
pub use self::mmx::*;
88

9+
mod pclmulqdq;
10+
pub use self::pclmulqdq::*;
11+
912
mod sse;
1013
pub use self::sse::*;
1114

coresimd/x86/i686/pclmulqdq.rs

+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
//! Carry-less Multiplication (CLMUL)
2+
//!
3+
//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
4+
//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref] (p. 4-241).
5+
//!
6+
//! [intel64_ref]: http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
7+
8+
use coresimd::x86::__m128i;
9+
10+
#[cfg(test)]
11+
use stdsimd_test::assert_instr;
12+
13+
#[allow(improper_ctypes)]
14+
extern "C" {
15+
#[link_name = "llvm.x86.pclmulqdq"]
16+
fn pclmulqdq(a: __m128i, round_key: __m128i, imm8: u8) -> __m128i;
17+
}
18+
19+
/// Perform a carry-less multiplication of two 64-bit polynomials over the
20+
/// finite field GF(2^k).
21+
///
22+
/// The immediate byte is used for determining which halves of `a` and `b`
23+
/// should be used. Immediate bits other than 0 and 4 are ignored.
24+
#[inline]
25+
#[target_feature(enable = "pclmulqdq")]
26+
#[cfg_attr(all(test, not(target_os="linux")), assert_instr(pclmulqdq, imm8 = 0))]
27+
#[cfg_attr(all(test, target_os="linux"), assert_instr(pclmullqlqdq, imm8 = 0))]
28+
#[cfg_attr(all(test, target_os="linux"), assert_instr(pclmulhqlqdq, imm8 = 1))]
29+
#[cfg_attr(all(test, target_os="linux"), assert_instr(pclmullqhqdq, imm8 = 16))]
30+
#[cfg_attr(all(test, target_os="linux"), assert_instr(pclmulhqhqdq, imm8 = 17))]
31+
#[rustc_args_required_const(2)]
32+
pub unsafe fn _mm_clmulepi64_si128(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
33+
macro_rules! call {
34+
($imm8:expr) => (pclmulqdq(a, b, $imm8))
35+
}
36+
constify_imm8!(imm8, call)
37+
}
38+
39+
40+
#[cfg(test)]
41+
mod tests {
42+
// The constants in the tests below are just bit patterns. They should not
43+
// be interpreted as integers; signedness does not make sense for them, but
44+
// __m128i happens to be defined in terms of signed integers.
45+
#![allow(overflowing_literals)]
46+
47+
use stdsimd_test::simd_test;
48+
49+
use coresimd::x86::*;
50+
51+
#[simd_test = "pclmulqdq"]
52+
unsafe fn test_mm_clmulepi64_si128() {
53+
// Constants taken from https://software.intel.com/sites/default/files/managed/72/cc/clmul-wp-rev-2.02-2014-04-20.pdf
54+
let a = _mm_set_epi64x(0x7b5b546573745665, 0x63746f725d53475d);
55+
let b = _mm_set_epi64x(0x4869285368617929, 0x5b477565726f6e5d);
56+
let r00 = _mm_set_epi64x(0x1d4d84c85c3440c0, 0x929633d5d36f0451);
57+
let r01 = _mm_set_epi64x(0x1bd17c8d556ab5a1, 0x7fa540ac2a281315);
58+
let r10 = _mm_set_epi64x(0x1a2bf6db3a30862f, 0xbabf262df4b7d5c9);
59+
let r11 = _mm_set_epi64x(0x1d1e1f2c592e7c45, 0xd66ee03e410fd4ed);
60+
61+
assert_eq_m128i(_mm_clmulepi64_si128(a, b, 0x00), r00);
62+
assert_eq_m128i(_mm_clmulepi64_si128(a, b, 0x10), r01);
63+
assert_eq_m128i(_mm_clmulepi64_si128(a, b, 0x01), r10);
64+
assert_eq_m128i(_mm_clmulepi64_si128(a, b, 0x11), r11);
65+
66+
let a0 = _mm_set_epi64x(0x0000000000000000, 0x8000000000000000);
67+
let r = _mm_set_epi64x(0x4000000000000000, 0x0000000000000000);
68+
assert_eq_m128i(_mm_clmulepi64_si128(a0, a0, 0x00), r);
69+
}
70+
}

stdsimd/arch/detect/x86.rs

+8
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ macro_rules! is_target_feature_detected {
3030
("aes") => {
3131
$crate::arch::detect::check_for(
3232
$crate::arch::detect::Feature::aes) };
33+
("pclmulqdq") => {
34+
$crate::arch::detect::check_for(
35+
$crate::arch::detect::Feature::pclmulqdq) };
3336
("tsc") => {
3437
$crate::arch::detect::check_for(
3538
$crate::arch::detect::Feature::tsc) };
@@ -174,6 +177,8 @@ macro_rules! is_target_feature_detected {
174177
pub enum Feature {
175178
/// AES (Advanced Encryption Standard New Instructions AES-NI)
176179
aes,
180+
/// CLMUL (Carry-less Multiplication)
181+
pclmulqdq,
177182
/// TSC (Time Stamp Counter)
178183
tsc,
179184
/// MMX
@@ -345,6 +350,7 @@ pub fn detect_features() -> cache::Initializer {
345350
enable(proc_info_ecx, 20, Feature::sse4_2);
346351
enable(proc_info_ecx, 23, Feature::popcnt);
347352
enable(proc_info_ecx, 25, Feature::aes);
353+
enable(proc_info_ecx, 1, Feature::pclmulqdq);
348354
enable(proc_info_edx, 4, Feature::tsc);
349355
enable(proc_info_edx, 23, Feature::mmx);
350356
enable(proc_info_edx, 24, Feature::fxsr);
@@ -457,6 +463,7 @@ mod tests {
457463
#[test]
458464
fn dump() {
459465
println!("aes: {:?}", is_target_feature_detected!("aes"));
466+
println!("pclmulqdq: {:?}", is_target_feature_detected!("pclmulqdq"));
460467
println!("tsc: {:?}", is_target_feature_detected!("tsc"));
461468
println!("sse: {:?}", is_target_feature_detected!("sse"));
462469
println!("sse2: {:?}", is_target_feature_detected!("sse2"));
@@ -498,6 +505,7 @@ mod tests {
498505
fn compare_with_cupid() {
499506
let information = cupid::master().unwrap();
500507
assert_eq!(is_target_feature_detected!("aes"), information.aesni());
508+
assert_eq!(is_target_feature_detected!("pclmulqdq"), information.pclmulqdq());
501509
assert_eq!(is_target_feature_detected!("tsc"), information.tsc());
502510
assert_eq!(is_target_feature_detected!("sse"), information.sse());
503511
assert_eq!(is_target_feature_detected!("sse2"), information.sse2());

0 commit comments

Comments
 (0)