CLMUL instruction set (#320)

newpavlov · alexcrichton · commit 0df4359c1777 · 2018-02-18T15:55:57.000+09:00
* added pclmul

* added docs

* pclmul -&gt; pclmulqdq

* imm8: u8 -&gt; imm8: i32

* return changes to stdsimd/arch/detect/x86.rs

* error fixes

* added rustc_args_required_const

* fixed assert_instr for _mm_clmulepi64_si128

* fixed pclmul assert_instr tests
diff --git a/coresimd/x86/i686/mod.rs b/coresimd/x86/i686/mod.rs
@@ -6,6 +6,9 @@ pub use self::aes::*;
 mod mmx;
 pub use self::mmx::*;
 
+mod pclmulqdq;
+pub use self::pclmulqdq::*;
+
 mod sse;
 pub use self::sse::*;
 
diff --git a/coresimd/x86/i686/pclmulqdq.rs b/coresimd/x86/i686/pclmulqdq.rs
@@ -0,0 +1,70 @@
+//! Carry-less Multiplication (CLMUL)
+//!
+//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
+//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref] (p. 4-241).
+//!
+//! [intel64_ref]: http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+
+use coresimd::x86::__m128i;
+
+#[cfg(test)]
+use stdsimd_test::assert_instr;
+
+#[allow(improper_ctypes)]
+extern "C" {
+    #[link_name = "llvm.x86.pclmulqdq"]
+    fn pclmulqdq(a: __m128i, round_key: __m128i, imm8: u8) -> __m128i;
+}
+
+/// Perform a carry-less multiplication of two 64-bit polynomials over the
+/// finite field GF(2^k).
+///
+/// The immediate byte is used for determining which halves of `a` and `b`
+/// should be used. Immediate bits other than 0 and 4 are ignored.
+#[inline]
+#[target_feature(enable = "pclmulqdq")]
+#[cfg_attr(all(test, not(target_os="linux")), assert_instr(pclmulqdq, imm8 = 0))]
+#[cfg_attr(all(test, target_os="linux"), assert_instr(pclmullqlqdq, imm8 = 0))]
+#[cfg_attr(all(test, target_os="linux"), assert_instr(pclmulhqlqdq, imm8 = 1))]
+#[cfg_attr(all(test, target_os="linux"), assert_instr(pclmullqhqdq, imm8 = 16))]
+#[cfg_attr(all(test, target_os="linux"), assert_instr(pclmulhqhqdq, imm8 = 17))]
+#[rustc_args_required_const(2)]
+pub unsafe fn _mm_clmulepi64_si128(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
+    macro_rules! call {
+        ($imm8:expr) => (pclmulqdq(a, b, $imm8))
+    }
+    constify_imm8!(imm8, call)
+}
+
+
+#[cfg(test)]
+mod tests {
+    // The constants in the tests below are just bit patterns. They should not
+    // be interpreted as integers; signedness does not make sense for them, but
+    // __m128i happens to be defined in terms of signed integers.
+    #![allow(overflowing_literals)]
+
+    use stdsimd_test::simd_test;
+
+    use coresimd::x86::*;
+
+    #[simd_test = "pclmulqdq"]
+    unsafe fn test_mm_clmulepi64_si128() {
+        // Constants taken from https://software.intel.com/sites/default/files/managed/72/cc/clmul-wp-rev-2.02-2014-04-20.pdf
+        let a = _mm_set_epi64x(0x7b5b546573745665, 0x63746f725d53475d);
+        let b = _mm_set_epi64x(0x4869285368617929, 0x5b477565726f6e5d);
+        let r00 = _mm_set_epi64x(0x1d4d84c85c3440c0, 0x929633d5d36f0451);
+        let r01 = _mm_set_epi64x(0x1bd17c8d556ab5a1, 0x7fa540ac2a281315);
+        let r10 = _mm_set_epi64x(0x1a2bf6db3a30862f, 0xbabf262df4b7d5c9);
+        let r11 = _mm_set_epi64x(0x1d1e1f2c592e7c45, 0xd66ee03e410fd4ed);
+
+        assert_eq_m128i(_mm_clmulepi64_si128(a, b, 0x00), r00);
+        assert_eq_m128i(_mm_clmulepi64_si128(a, b, 0x10), r01);
+        assert_eq_m128i(_mm_clmulepi64_si128(a, b, 0x01), r10);
+        assert_eq_m128i(_mm_clmulepi64_si128(a, b, 0x11), r11);
+
+        let a0 = _mm_set_epi64x(0x0000000000000000, 0x8000000000000000);
+        let r = _mm_set_epi64x(0x4000000000000000, 0x0000000000000000);
+        assert_eq_m128i(_mm_clmulepi64_si128(a0, a0, 0x00), r);
+    }
+}
diff --git a/stdsimd/arch/detect/x86.rs b/stdsimd/arch/detect/x86.rs
@@ -30,6 +30,9 @@ macro_rules! is_target_feature_detected {
     ("aes") => {
         $crate::arch::detect::check_for(
             $crate::arch::detect::Feature::aes)  };
+    ("pclmulqdq") => {
+        $crate::arch::detect::check_for(
+            $crate::arch::detect::Feature::pclmulqdq)  };
     ("tsc") => {
         $crate::arch::detect::check_for(
             $crate::arch::detect::Feature::tsc)  };
@@ -174,6 +177,8 @@ macro_rules! is_target_feature_detected {
 pub enum Feature {
     /// AES (Advanced Encryption Standard New Instructions AES-NI)
     aes,
+    /// CLMUL (Carry-less Multiplication)
+    pclmulqdq,
     /// TSC (Time Stamp Counter)
     tsc,
     /// MMX
@@ -345,6 +350,7 @@ pub fn detect_features() -> cache::Initializer {
         enable(proc_info_ecx, 20, Feature::sse4_2);
         enable(proc_info_ecx, 23, Feature::popcnt);
         enable(proc_info_ecx, 25, Feature::aes);
+        enable(proc_info_ecx, 1, Feature::pclmulqdq);
         enable(proc_info_edx, 4, Feature::tsc);
         enable(proc_info_edx, 23, Feature::mmx);
         enable(proc_info_edx, 24, Feature::fxsr);
@@ -457,6 +463,7 @@ mod tests {
     #[test]
     fn dump() {
         println!("aes: {:?}", is_target_feature_detected!("aes"));
+        println!("pclmulqdq: {:?}", is_target_feature_detected!("pclmulqdq"));
         println!("tsc: {:?}", is_target_feature_detected!("tsc"));
         println!("sse: {:?}", is_target_feature_detected!("sse"));
         println!("sse2: {:?}", is_target_feature_detected!("sse2"));
@@ -498,6 +505,7 @@ mod tests {
     fn compare_with_cupid() {
         let information = cupid::master().unwrap();
         assert_eq!(is_target_feature_detected!("aes"), information.aesni());
+        assert_eq!(is_target_feature_detected!("pclmulqdq"), information.pclmulqdq());
         assert_eq!(is_target_feature_detected!("tsc"), information.tsc());
         assert_eq!(is_target_feature_detected!("sse"), information.sse());
         assert_eq!(is_target_feature_detected!("sse2"), information.sse2());