[doc] document all missing items

gnzlbg · gnzlbg · commit 47b4a87ca287 · 2017-10-12T14:38:58.000+02:00
diff --git a/.appveyor.yml b/.appveyor.yml
@@ -19,7 +19,8 @@ install:
 build: false
 
 test_script:
-  - C:\msys64\usr\bin\sh ci\run.sh
+  - cargo test --target %TARGET%
+  - cargo test --target %TARGET% --release
 
 branches:
   only:
diff --git a/src/arm/mod.rs b/src/arm/mod.rs
@@ -1,4 +1,5 @@
 //! ARM intrinsics.
+
 pub use self::v6::*;
 pub use self::v7::*;
 #[cfg(target_arch = "aarch64")]
diff --git a/src/lib.rs b/src/lib.rs
@@ -118,7 +118,7 @@
 #![cfg_attr(test, feature(proc_macro, test))]
 
 #![cfg_attr(feature = "cargo-clippy",
-            allow(inline_always, too_many_arguments, missing_docs_in_private_items,
+            allow(inline_always, too_many_arguments,
                   cast_sign_loss, cast_lossless, cast_possible_wrap,
                   cast_possible_truncation, cast_precision_loss, shadow_reuse,
                   cyclomatic_complexity, similar_names
diff --git a/src/macros.rs b/src/macros.rs
@@ -1,3 +1,5 @@
+//! Utility macros
+
 macro_rules! define_ty {
     ($name:ident, $($elty:ident),+) => {
         #[repr(simd)]
diff --git a/src/simd_llvm.rs b/src/simd_llvm.rs
@@ -1,3 +1,7 @@
+//! LLVM's simd platform intrinsics
+//!
+//! TODO: should use `link_llvm_intrinsic` instead: issue #112
+
 extern "platform-intrinsic" {
     pub fn simd_eq<T, U>(x: T, y: T) -> U;
     pub fn simd_ne<T, U>(x: T, y: T) -> U;
diff --git a/src/v128.rs b/src/v128.rs
@@ -1,3 +1,5 @@
+//! 128-bit wide vector types
+
 use simd_llvm::*;
 
 define_ty! { f64x2, f64, f64 }
diff --git a/src/v256.rs b/src/v256.rs
@@ -1,3 +1,5 @@
+//! 256-bit wide vector types
+
 use simd_llvm::*;
 
 define_ty! { f64x4, f64, f64, f64, f64 }
diff --git a/src/v512.rs b/src/v512.rs
@@ -1,3 +1,5 @@
+//! 512-bit wide vector types
+
 use simd_llvm::*;
 
 define_ty! { f64x8, f64, f64, f64, f64, f64, f64, f64, f64 }
diff --git a/src/v64.rs b/src/v64.rs
@@ -1,3 +1,5 @@
+//! 64-bit wide vector types
+
 use simd_llvm::*;
 
 define_ty_doc! {
diff --git a/src/x86/avx.rs b/src/x86/avx.rs
@@ -1,3 +1,12 @@
+//! Advanced Vector Extensions (AVX)
+//!
+//! The references are:
+//!
+//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: Instruction Set Reference, A-Z](http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf).
+//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and System Instructions](http://support.amd.com/TechDocs/24594.pdf).
+//!
+//! [Wikipedia](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions) provides a quick overview of the instructions available.
+
 use std::mem;
 
 #[cfg(test)]
@@ -484,69 +493,69 @@ pub unsafe fn _mm256_xor_ps(a: f32x8, b: f32x8) -> f32x8 {
     mem::transmute(a ^ b)
 }
 
-// Equal (ordered, non-signaling)
+/// Equal (ordered, non-signaling)
 pub const _CMP_EQ_OQ: u8 = 0x00;
-// Less-than (ordered, signaling)
+/// Less-than (ordered, signaling)
 pub const _CMP_LT_OS: u8 = 0x01;
-// Less-than-or-equal (ordered, signaling)
+/// Less-than-or-equal (ordered, signaling)
 pub const _CMP_LE_OS: u8 = 0x02;
-// Unordered (non-signaling)
+/// Unordered (non-signaling)
 pub const _CMP_UNORD_Q: u8 = 0x03;
-// Not-equal (unordered, non-signaling)
+/// Not-equal (unordered, non-signaling)
 pub const _CMP_NEQ_UQ: u8 = 0x04;
-// Not-less-than (unordered, signaling)
+/// Not-less-than (unordered, signaling)
 pub const _CMP_NLT_US: u8 = 0x05;
-// Not-less-than-or-equal (unordered, signaling)
+/// Not-less-than-or-equal (unordered, signaling)
 pub const _CMP_NLE_US: u8 = 0x06;
-// Ordered (non-signaling)
+/// Ordered (non-signaling)
 pub const _CMP_ORD_Q: u8 = 0x07;
-// Equal (unordered, non-signaling)
+/// Equal (unordered, non-signaling)
 pub const _CMP_EQ_UQ: u8 = 0x08;
-// Not-greater-than-or-equal (unordered, signaling)
+/// Not-greater-than-or-equal (unordered, signaling)
 pub const _CMP_NGE_US: u8 = 0x09;
-// Not-greater-than (unordered, signaling)
+/// Not-greater-than (unordered, signaling)
 pub const _CMP_NGT_US: u8 = 0x0a;
-// False (ordered, non-signaling)
+/// False (ordered, non-signaling)
 pub const _CMP_FALSE_OQ: u8 = 0x0b;
-// Not-equal (ordered, non-signaling)
+/// Not-equal (ordered, non-signaling)
 pub const _CMP_NEQ_OQ: u8 = 0x0c;
-// Greater-than-or-equal (ordered, signaling)
+/// Greater-than-or-equal (ordered, signaling)
 pub const _CMP_GE_OS: u8 = 0x0d;
-// Greater-than (ordered, signaling)
+/// Greater-than (ordered, signaling)
 pub const _CMP_GT_OS: u8 = 0x0e;
-// True (unordered, non-signaling)
+/// True (unordered, non-signaling)
 pub const _CMP_TRUE_UQ: u8 = 0x0f;
-// Equal (ordered, signaling)
+/// Equal (ordered, signaling)
 pub const _CMP_EQ_OS: u8 = 0x10;
-// Less-than (ordered, non-signaling)
+/// Less-than (ordered, non-signaling)
 pub const _CMP_LT_OQ: u8 = 0x11;
-// Less-than-or-equal (ordered, non-signaling)
+/// Less-than-or-equal (ordered, non-signaling)
 pub const _CMP_LE_OQ: u8 = 0x12;
-// Unordered (signaling)
+/// Unordered (signaling)
 pub const _CMP_UNORD_S: u8 = 0x13;
-// Not-equal (unordered, signaling)
+/// Not-equal (unordered, signaling)
 pub const _CMP_NEQ_US: u8 = 0x14;
-// Not-less-than (unordered, non-signaling)
+/// Not-less-than (unordered, non-signaling)
 pub const _CMP_NLT_UQ: u8 = 0x15;
-// Not-less-than-or-equal (unordered, non-signaling)
+/// Not-less-than-or-equal (unordered, non-signaling)
 pub const _CMP_NLE_UQ: u8 = 0x16;
-// Ordered (signaling)
+/// Ordered (signaling)
 pub const _CMP_ORD_S: u8 = 0x17;
-// Equal (unordered, signaling)
+/// Equal (unordered, signaling)
 pub const _CMP_EQ_US: u8 = 0x18;
-// Not-greater-than-or-equal (unordered, non-signaling)
+/// Not-greater-than-or-equal (unordered, non-signaling)
 pub const _CMP_NGE_UQ: u8 = 0x19;
-// Not-greater-than (unordered, non-signaling)
+/// Not-greater-than (unordered, non-signaling)
 pub const _CMP_NGT_UQ: u8 = 0x1a;
-// False (ordered, signaling)
+/// False (ordered, signaling)
 pub const _CMP_FALSE_OS: u8 = 0x1b;
-// Not-equal (ordered, signaling)
+/// Not-equal (ordered, signaling)
 pub const _CMP_NEQ_OS: u8 = 0x1c;
-// Greater-than-or-equal (ordered, non-signaling)
+/// Greater-than-or-equal (ordered, non-signaling)
 pub const _CMP_GE_OQ: u8 = 0x1d;
-// Greater-than (ordered, non-signaling)
+/// Greater-than (ordered, non-signaling)
 pub const _CMP_GT_OQ: u8 = 0x1e;
-// True (unordered, signaling)
+/// True (unordered, signaling)
 pub const _CMP_TRUE_US: u8 = 0x1f;
 
 /// Compare packed double-precision (64-bit) floating-point
@@ -806,12 +815,11 @@ pub unsafe fn _mm_permutevar_ps(a: f32x4, b: i32x4) -> f32x4 {
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))]
 pub unsafe fn _mm256_permute_ps(a: f32x8, imm8: i32) -> f32x8 {
-    const fn add4(x: u32) -> u32 { x + 4 }
     let imm8 = (imm8 & 0xFF) as u8;
     macro_rules! shuffle4 {
         ($a:expr, $b:expr, $c:expr, $d:expr) => {
             simd_shuffle8(a, _mm256_undefined_ps(), [
-                $a, $b, $c, $d, add4($a), add4($b), add4($c), add4($d)
+                $a, $b, $c, $d, $a + 4, $b + 4, $c + 4, $d + 4
             ])
         }
     }
@@ -907,6 +915,8 @@ pub unsafe fn _mm_permute_ps(a: f32x4, imm8: i32) -> f32x4 {
     }
 }
 
+/// Shuffle double-precision (64-bit) floating-point elements in `a`
+/// within 256-bit lanes using the control in `b`.
 #[inline(always)]
 #[target_feature = "+avx"]
 #[cfg_attr(test, assert_instr(vpermilpd))]
diff --git a/src/x86/avx2.rs b/src/x86/avx2.rs
@@ -1,3 +1,17 @@
+//! Advanced Vector Extensions 2 (AVX)
+//!
+//! AVX2 expands most AVX commands to 256-bit wide vector registers and
+//! adds [FMA](https://en.wikipedia.org/wiki/Fused_multiply-accumulate).
+//!
+//! The references are:
+//!
+//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: Instruction Set Reference, A-Z](http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf).
+//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and System Instructions](http://support.amd.com/TechDocs/24594.pdf).
+//!
+//! Wikipedia's [AVX](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions)
+//! and [FMA](https://en.wikipedia.org/wiki/Fused_multiply-accumulate) pages
+//! provide a quick overview of the instructions available.
+
 use simd_llvm::simd_shuffle32;
 use v256::*;
 use v128::*;
@@ -100,7 +114,6 @@ pub unsafe fn _mm256_adds_epu16(a: u16x16, b: u16x16) -> u16x16 {
 #[target_feature = "+avx2"]
 #[cfg_attr(test, assert_instr(vpalignr, n = 15))]
 pub unsafe fn _mm256_alignr_epi8(a: i8x32, b: i8x32, n: i32) -> i8x32 {
-    const fn add(a: u32, b: u32) -> u32 { a + b }
     let n = n as u32;
     // If palignr is shifting the pair of vectors more than the size of two
     // lanes, emit zero.
@@ -118,22 +131,22 @@ pub unsafe fn _mm256_alignr_epi8(a: i8x32, b: i8x32, n: i32) -> i8x32 {
     macro_rules! shuffle {
         ($shift:expr) => {
             simd_shuffle32(b, a, [
-                add(0, $shift), add(1, $shift),
-                add(2, $shift), add(3, $shift),
-                add(4, $shift), add(5, $shift),
-                add(6, $shift), add(7, $shift),
-                add(8, $shift), add(9, $shift),
-                add(10, $shift), add(11, $shift),
-                add(12, $shift), add(13, $shift),
-                add(14, $shift), add(15, $shift),
-                add(16, $shift), add(17, $shift),
-                add(18, $shift), add(19, $shift),
-                add(20, $shift), add(21, $shift),
-                add(22, $shift), add(23, $shift),
-                add(24, $shift), add(25, $shift),
-                add(26, $shift), add(27, $shift),
-                add(28, $shift), add(29, $shift),
-                add(30, $shift), add(31, $shift),
+                0 + $shift, 1 + $shift,
+                2 + $shift, 3 + $shift,
+                4 + $shift, 5 + $shift,
+                6 + $shift, 7 + $shift,
+                8 + $shift, 9 + $shift,
+                10 + $shift, 11 + $shift,
+                12 + $shift, 13 + $shift,
+                14 + $shift, 15 + $shift,
+                16 + $shift, 17 + $shift,
+                18 + $shift, 19 + $shift,
+                20 + $shift, 21 + $shift,
+                22 + $shift, 23 + $shift,
+                24 + $shift, 25 + $shift,
+                26 + $shift, 27 + $shift,
+                28 + $shift, 29 + $shift,
+                30 + $shift, 31 + $shift,
             ])
         }
     }
diff --git a/src/x86/macros.rs b/src/x86/macros.rs
@@ -1,3 +1,5 @@
+//! Utility macros.
+
 macro_rules! constify_imm8 {
     ($imm8:expr, $expand:ident) => {
         #[allow(overflowing_literals)]
diff --git a/src/x86/mod.rs b/src/x86/mod.rs
@@ -1,3 +1,5 @@
+//! x86 intrinsics
+
 pub use self::sse::*;
 pub use self::sse2::*;
 pub use self::sse3::*;
@@ -14,8 +16,10 @@ pub use self::tbm::*;
 
 pub use self::runtime::{__Feature, __unstable_detect_feature};
 
+/// 128-bit wide signed integer vector type
 #[allow(non_camel_case_types)]
 pub type __m128i = ::v128::i8x16;
+/// 256-bit wide signed integer vector type
 #[allow(non_camel_case_types)]
 pub type __m256i = ::v256::i8x32;
 
diff --git a/src/x86/sse.rs b/src/x86/sse.rs
@@ -1,3 +1,5 @@
+//! Streaming SIMD Extensions (SSE)
+
 use simd_llvm::simd_shuffle4;
 use v128::*;
 use v64::f32x2;
@@ -705,6 +707,7 @@ pub const _MM_EXCEPT_OVERFLOW: u32   = 0x0008;
 pub const _MM_EXCEPT_UNDERFLOW: u32  = 0x0010;
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
 pub const _MM_EXCEPT_INEXACT: u32    = 0x0020;
+/// See [`_MM_GET_EXCEPTION_STATE`](fn._MM_GET_EXCEPTION_STATE.html)
 pub const _MM_EXCEPT_MASK: u32       = 0x003f;
 
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
@@ -719,6 +722,7 @@ pub const _MM_MASK_OVERFLOW: u32     = 0x0400;
 pub const _MM_MASK_UNDERFLOW: u32    = 0x0800;
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
 pub const _MM_MASK_INEXACT: u32      = 0x1000;
+/// See [`_MM_GET_EXCEPTION_MASK`](fn._MM_GET_EXCEPTION_MASK.html)
 pub const _MM_MASK_MASK: u32         = 0x1f80;
 
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
@@ -729,56 +733,65 @@ pub const _MM_ROUND_DOWN: u32        = 0x2000;
 pub const _MM_ROUND_UP: u32          = 0x4000;
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
 pub const _MM_ROUND_TOWARD_ZERO: u32 = 0x6000;
+/// See [`_MM_GET_ROUNDING_MODE`](fn._MM_GET_ROUNDING_MODE.html)
 pub const _MM_ROUND_MASK: u32        = 0x6000;
 
+/// See [`_MM_GET_FLUSH_ZERO_MODE`](fn._MM_GET_FLUSH_ZERO_MODE.html)
 pub const _MM_FLUSH_ZERO_MASK: u32   = 0x8000;
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
 pub const _MM_FLUSH_ZERO_ON: u32     = 0x8000;
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
 pub const _MM_FLUSH_ZERO_OFF: u32    = 0x0000;
 
+/// See [`_mm_setcsr`](fn._mm_setcsr.html)
 #[inline(always)]
 #[allow(non_snake_case)]
 #[target_feature = "+sse"]
 pub unsafe fn _MM_GET_EXCEPTION_MASK() -> u32 {
     _mm_getcsr() & _MM_MASK_MASK
 }
 
+/// See [`_mm_setcsr`](fn._mm_setcsr.html)
 #[inline(always)]
 #[allow(non_snake_case)]
 #[target_feature = "+sse"]
 pub unsafe fn _MM_GET_EXCEPTION_STATE() -> u32 {
     _mm_getcsr() & _MM_EXCEPT_MASK
 }
 
+/// See [`_mm_setcsr`](fn._mm_setcsr.html)
 #[inline(always)]
 #[allow(non_snake_case)]
 #[target_feature = "+sse"]
 pub unsafe fn _MM_GET_FLUSH_ZERO_MODE() -> u32 {
     _mm_getcsr() & _MM_FLUSH_ZERO_MASK
 }
 
+/// See [`_mm_setcsr`](fn._mm_setcsr.html)
 #[inline(always)]
 #[allow(non_snake_case)]
 #[target_feature = "+sse"]
 pub unsafe fn _MM_GET_ROUNDING_MODE() -> u32 {
     _mm_getcsr() & _MM_ROUND_MASK
 }
 
+/// See [`_mm_setcsr`](fn._mm_setcsr.html)
 #[inline(always)]
 #[allow(non_snake_case)]
 #[target_feature = "+sse"]
 pub unsafe fn _MM_SET_EXCEPTION_MASK(x: u32) {
     _mm_setcsr((_mm_getcsr() & !_MM_MASK_MASK) | x)
 }
 
+/// See [`_mm_setcsr`](fn._mm_setcsr.html)
 #[inline(always)]
 #[allow(non_snake_case)]
 #[target_feature = "+sse"]
 pub unsafe fn _MM_SET_EXCEPTION_STATE(x: u32) {
     _mm_setcsr((_mm_getcsr() & !_MM_EXCEPT_MASK) | x)
 }
 
+/// See [`_mm_setcsr`](fn._mm_setcsr.html)
 #[inline(always)]
 #[allow(non_snake_case)]
 #[target_feature = "+sse"]
@@ -788,6 +801,7 @@ pub unsafe fn _MM_SET_FLUSH_ZERO_MODE(x: u32) {
     _mm_setcsr(val)
 }
 
+/// See [`_mm_setcsr`](fn._mm_setcsr.html)
 #[inline(always)]
 #[allow(non_snake_case)]
 #[target_feature = "+sse"]
diff --git a/src/x86/sse2.rs b/src/x86/sse2.rs
diff --git a/src/x86/sse3.rs b/src/x86/sse3.rs
diff --git a/src/x86/sse41.rs b/src/x86/sse41.rs
diff --git a/src/x86/sse42.rs b/src/x86/sse42.rs
diff --git a/src/x86/ssse3.rs b/src/x86/ssse3.rs

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`//! ARM intrinsics.`
	`2`	`+`
`2`	`3`	`pub use self::v6::*;`
`3`	`4`	`pub use self::v7::*;`
`4`	`5`	`#[cfg(target_arch = "aarch64")]`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+//! Utility macros`
	`2`	`+`
`1`	`3`	`macro_rules! define_ty {`
`2`	`4`	`($name:ident, $($elty:ident),+) => {`
`3`	`5`	`#[repr(simd)]`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+//! 128-bit wide vector types`
	`2`	`+`
`1`	`3`	`use simd_llvm::*;`
`2`	`4`
`3`	`5`	`define_ty! { f64x2, f64, f64 }`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+//! 256-bit wide vector types`
	`2`	`+`
`1`	`3`	`use simd_llvm::*;`
`2`	`4`
`3`	`5`	`define_ty! { f64x4, f64, f64, f64, f64 }`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+//! 512-bit wide vector types`
	`2`	`+`
`1`	`3`	`use simd_llvm::*;`
`2`	`4`
`3`	`5`	`define_ty! { f64x8, f64, f64, f64, f64, f64, f64, f64, f64 }`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+//! 64-bit wide vector types`
	`2`	`+`
`1`	`3`	`use simd_llvm::*;`
`2`	`4`
`3`	`5`	`define_ty_doc! {`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+//! Utility macros.`
	`2`	`+`
`1`	`3`	`macro_rules! constify_imm8 {`
`2`	`4`	`($imm8:expr, $expand:ident) => {`
`3`	`5`	`#[allow(overflowing_literals)]`