Skip to content

Commit 47b4a87

Browse files
committed
[doc] document all missing items
1 parent 7b83e03 commit 47b4a87

19 files changed

+159
-88
lines changed

.appveyor.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ install:
1919
build: false
2020

2121
test_script:
22-
- C:\msys64\usr\bin\sh ci\run.sh
22+
- cargo test --target %TARGET%
23+
- cargo test --target %TARGET% --release
2324

2425
branches:
2526
only:

src/arm/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
//! ARM intrinsics.
2+
23
pub use self::v6::*;
34
pub use self::v7::*;
45
#[cfg(target_arch = "aarch64")]

src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@
118118
#![cfg_attr(test, feature(proc_macro, test))]
119119

120120
#![cfg_attr(feature = "cargo-clippy",
121-
allow(inline_always, too_many_arguments, missing_docs_in_private_items,
121+
allow(inline_always, too_many_arguments,
122122
cast_sign_loss, cast_lossless, cast_possible_wrap,
123123
cast_possible_truncation, cast_precision_loss, shadow_reuse,
124124
cyclomatic_complexity, similar_names

src/macros.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
//! Utility macros
2+
13
macro_rules! define_ty {
24
($name:ident, $($elty:ident),+) => {
35
#[repr(simd)]

src/simd_llvm.rs

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
//! LLVM's simd platform intrinsics
2+
//!
3+
//! TODO: should use `link_llvm_intrinsic` instead: issue #112
4+
15
extern "platform-intrinsic" {
26
pub fn simd_eq<T, U>(x: T, y: T) -> U;
37
pub fn simd_ne<T, U>(x: T, y: T) -> U;

src/v128.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
//! 128-bit wide vector types
2+
13
use simd_llvm::*;
24

35
define_ty! { f64x2, f64, f64 }

src/v256.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
//! 256-bit wide vector types
2+
13
use simd_llvm::*;
24

35
define_ty! { f64x4, f64, f64, f64, f64 }

src/v512.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
//! 512-bit wide vector types
2+
13
use simd_llvm::*;
24

35
define_ty! { f64x8, f64, f64, f64, f64, f64, f64, f64, f64 }

src/v64.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
//! 64-bit wide vector types
2+
13
use simd_llvm::*;
24

35
define_ty_doc! {

src/x86/avx.rs

+44-34
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
//! Advanced Vector Extensions (AVX)
2+
//!
3+
//! The references are:
4+
//!
5+
//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: Instruction Set Reference, A-Z](http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf).
6+
//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and System Instructions](http://support.amd.com/TechDocs/24594.pdf).
7+
//!
8+
//! [Wikipedia](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions) provides a quick overview of the instructions available.
9+
110
use std::mem;
211

312
#[cfg(test)]
@@ -484,69 +493,69 @@ pub unsafe fn _mm256_xor_ps(a: f32x8, b: f32x8) -> f32x8 {
484493
mem::transmute(a ^ b)
485494
}
486495

487-
// Equal (ordered, non-signaling)
496+
/// Equal (ordered, non-signaling)
488497
pub const _CMP_EQ_OQ: u8 = 0x00;
489-
// Less-than (ordered, signaling)
498+
/// Less-than (ordered, signaling)
490499
pub const _CMP_LT_OS: u8 = 0x01;
491-
// Less-than-or-equal (ordered, signaling)
500+
/// Less-than-or-equal (ordered, signaling)
492501
pub const _CMP_LE_OS: u8 = 0x02;
493-
// Unordered (non-signaling)
502+
/// Unordered (non-signaling)
494503
pub const _CMP_UNORD_Q: u8 = 0x03;
495-
// Not-equal (unordered, non-signaling)
504+
/// Not-equal (unordered, non-signaling)
496505
pub const _CMP_NEQ_UQ: u8 = 0x04;
497-
// Not-less-than (unordered, signaling)
506+
/// Not-less-than (unordered, signaling)
498507
pub const _CMP_NLT_US: u8 = 0x05;
499-
// Not-less-than-or-equal (unordered, signaling)
508+
/// Not-less-than-or-equal (unordered, signaling)
500509
pub const _CMP_NLE_US: u8 = 0x06;
501-
// Ordered (non-signaling)
510+
/// Ordered (non-signaling)
502511
pub const _CMP_ORD_Q: u8 = 0x07;
503-
// Equal (unordered, non-signaling)
512+
/// Equal (unordered, non-signaling)
504513
pub const _CMP_EQ_UQ: u8 = 0x08;
505-
// Not-greater-than-or-equal (unordered, signaling)
514+
/// Not-greater-than-or-equal (unordered, signaling)
506515
pub const _CMP_NGE_US: u8 = 0x09;
507-
// Not-greater-than (unordered, signaling)
516+
/// Not-greater-than (unordered, signaling)
508517
pub const _CMP_NGT_US: u8 = 0x0a;
509-
// False (ordered, non-signaling)
518+
/// False (ordered, non-signaling)
510519
pub const _CMP_FALSE_OQ: u8 = 0x0b;
511-
// Not-equal (ordered, non-signaling)
520+
/// Not-equal (ordered, non-signaling)
512521
pub const _CMP_NEQ_OQ: u8 = 0x0c;
513-
// Greater-than-or-equal (ordered, signaling)
522+
/// Greater-than-or-equal (ordered, signaling)
514523
pub const _CMP_GE_OS: u8 = 0x0d;
515-
// Greater-than (ordered, signaling)
524+
/// Greater-than (ordered, signaling)
516525
pub const _CMP_GT_OS: u8 = 0x0e;
517-
// True (unordered, non-signaling)
526+
/// True (unordered, non-signaling)
518527
pub const _CMP_TRUE_UQ: u8 = 0x0f;
519-
// Equal (ordered, signaling)
528+
/// Equal (ordered, signaling)
520529
pub const _CMP_EQ_OS: u8 = 0x10;
521-
// Less-than (ordered, non-signaling)
530+
/// Less-than (ordered, non-signaling)
522531
pub const _CMP_LT_OQ: u8 = 0x11;
523-
// Less-than-or-equal (ordered, non-signaling)
532+
/// Less-than-or-equal (ordered, non-signaling)
524533
pub const _CMP_LE_OQ: u8 = 0x12;
525-
// Unordered (signaling)
534+
/// Unordered (signaling)
526535
pub const _CMP_UNORD_S: u8 = 0x13;
527-
// Not-equal (unordered, signaling)
536+
/// Not-equal (unordered, signaling)
528537
pub const _CMP_NEQ_US: u8 = 0x14;
529-
// Not-less-than (unordered, non-signaling)
538+
/// Not-less-than (unordered, non-signaling)
530539
pub const _CMP_NLT_UQ: u8 = 0x15;
531-
// Not-less-than-or-equal (unordered, non-signaling)
540+
/// Not-less-than-or-equal (unordered, non-signaling)
532541
pub const _CMP_NLE_UQ: u8 = 0x16;
533-
// Ordered (signaling)
542+
/// Ordered (signaling)
534543
pub const _CMP_ORD_S: u8 = 0x17;
535-
// Equal (unordered, signaling)
544+
/// Equal (unordered, signaling)
536545
pub const _CMP_EQ_US: u8 = 0x18;
537-
// Not-greater-than-or-equal (unordered, non-signaling)
546+
/// Not-greater-than-or-equal (unordered, non-signaling)
538547
pub const _CMP_NGE_UQ: u8 = 0x19;
539-
// Not-greater-than (unordered, non-signaling)
548+
/// Not-greater-than (unordered, non-signaling)
540549
pub const _CMP_NGT_UQ: u8 = 0x1a;
541-
// False (ordered, signaling)
550+
/// False (ordered, signaling)
542551
pub const _CMP_FALSE_OS: u8 = 0x1b;
543-
// Not-equal (ordered, signaling)
552+
/// Not-equal (ordered, signaling)
544553
pub const _CMP_NEQ_OS: u8 = 0x1c;
545-
// Greater-than-or-equal (ordered, non-signaling)
554+
/// Greater-than-or-equal (ordered, non-signaling)
546555
pub const _CMP_GE_OQ: u8 = 0x1d;
547-
// Greater-than (ordered, non-signaling)
556+
/// Greater-than (ordered, non-signaling)
548557
pub const _CMP_GT_OQ: u8 = 0x1e;
549-
// True (unordered, signaling)
558+
/// True (unordered, signaling)
550559
pub const _CMP_TRUE_US: u8 = 0x1f;
551560

552561
/// Compare packed double-precision (64-bit) floating-point
@@ -806,12 +815,11 @@ pub unsafe fn _mm_permutevar_ps(a: f32x4, b: i32x4) -> f32x4 {
806815
#[target_feature = "+avx"]
807816
#[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))]
808817
pub unsafe fn _mm256_permute_ps(a: f32x8, imm8: i32) -> f32x8 {
809-
const fn add4(x: u32) -> u32 { x + 4 }
810818
let imm8 = (imm8 & 0xFF) as u8;
811819
macro_rules! shuffle4 {
812820
($a:expr, $b:expr, $c:expr, $d:expr) => {
813821
simd_shuffle8(a, _mm256_undefined_ps(), [
814-
$a, $b, $c, $d, add4($a), add4($b), add4($c), add4($d)
822+
$a, $b, $c, $d, $a + 4, $b + 4, $c + 4, $d + 4
815823
])
816824
}
817825
}
@@ -907,6 +915,8 @@ pub unsafe fn _mm_permute_ps(a: f32x4, imm8: i32) -> f32x4 {
907915
}
908916
}
909917

918+
/// Shuffle double-precision (64-bit) floating-point elements in `a`
919+
/// within 256-bit lanes using the control in `b`.
910920
#[inline(always)]
911921
#[target_feature = "+avx"]
912922
#[cfg_attr(test, assert_instr(vpermilpd))]

src/x86/avx2.rs

+30-17
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,17 @@
1+
//! Advanced Vector Extensions 2 (AVX)
2+
//!
3+
//! AVX2 expands most AVX commands to 256-bit wide vector registers and
4+
//! adds [FMA](https://en.wikipedia.org/wiki/Fused_multiply-accumulate).
5+
//!
6+
//! The references are:
7+
//!
8+
//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: Instruction Set Reference, A-Z](http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf).
9+
//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and System Instructions](http://support.amd.com/TechDocs/24594.pdf).
10+
//!
11+
//! Wikipedia's [AVX](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions)
12+
//! and [FMA](https://en.wikipedia.org/wiki/Fused_multiply-accumulate) pages
13+
//! provide a quick overview of the instructions available.
14+
115
use simd_llvm::simd_shuffle32;
216
use v256::*;
317
use v128::*;
@@ -100,7 +114,6 @@ pub unsafe fn _mm256_adds_epu16(a: u16x16, b: u16x16) -> u16x16 {
100114
#[target_feature = "+avx2"]
101115
#[cfg_attr(test, assert_instr(vpalignr, n = 15))]
102116
pub unsafe fn _mm256_alignr_epi8(a: i8x32, b: i8x32, n: i32) -> i8x32 {
103-
const fn add(a: u32, b: u32) -> u32 { a + b }
104117
let n = n as u32;
105118
// If palignr is shifting the pair of vectors more than the size of two
106119
// lanes, emit zero.
@@ -118,22 +131,22 @@ pub unsafe fn _mm256_alignr_epi8(a: i8x32, b: i8x32, n: i32) -> i8x32 {
118131
macro_rules! shuffle {
119132
($shift:expr) => {
120133
simd_shuffle32(b, a, [
121-
add(0, $shift), add(1, $shift),
122-
add(2, $shift), add(3, $shift),
123-
add(4, $shift), add(5, $shift),
124-
add(6, $shift), add(7, $shift),
125-
add(8, $shift), add(9, $shift),
126-
add(10, $shift), add(11, $shift),
127-
add(12, $shift), add(13, $shift),
128-
add(14, $shift), add(15, $shift),
129-
add(16, $shift), add(17, $shift),
130-
add(18, $shift), add(19, $shift),
131-
add(20, $shift), add(21, $shift),
132-
add(22, $shift), add(23, $shift),
133-
add(24, $shift), add(25, $shift),
134-
add(26, $shift), add(27, $shift),
135-
add(28, $shift), add(29, $shift),
136-
add(30, $shift), add(31, $shift),
134+
0 + $shift, 1 + $shift,
135+
2 + $shift, 3 + $shift,
136+
4 + $shift, 5 + $shift,
137+
6 + $shift, 7 + $shift,
138+
8 + $shift, 9 + $shift,
139+
10 + $shift, 11 + $shift,
140+
12 + $shift, 13 + $shift,
141+
14 + $shift, 15 + $shift,
142+
16 + $shift, 17 + $shift,
143+
18 + $shift, 19 + $shift,
144+
20 + $shift, 21 + $shift,
145+
22 + $shift, 23 + $shift,
146+
24 + $shift, 25 + $shift,
147+
26 + $shift, 27 + $shift,
148+
28 + $shift, 29 + $shift,
149+
30 + $shift, 31 + $shift,
137150
])
138151
}
139152
}

src/x86/macros.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
//! Utility macros.
2+
13
macro_rules! constify_imm8 {
24
($imm8:expr, $expand:ident) => {
35
#[allow(overflowing_literals)]

src/x86/mod.rs

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
//! x86 intrinsics
2+
13
pub use self::sse::*;
24
pub use self::sse2::*;
35
pub use self::sse3::*;
@@ -14,8 +16,10 @@ pub use self::tbm::*;
1416

1517
pub use self::runtime::{__Feature, __unstable_detect_feature};
1618

19+
/// 128-bit wide signed integer vector type
1720
#[allow(non_camel_case_types)]
1821
pub type __m128i = ::v128::i8x16;
22+
/// 256-bit wide signed integer vector type
1923
#[allow(non_camel_case_types)]
2024
pub type __m256i = ::v256::i8x32;
2125

src/x86/sse.rs

+14
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
//! Streaming SIMD Extensions (SSE)
2+
13
use simd_llvm::simd_shuffle4;
24
use v128::*;
35
use v64::f32x2;
@@ -705,6 +707,7 @@ pub const _MM_EXCEPT_OVERFLOW: u32 = 0x0008;
705707
pub const _MM_EXCEPT_UNDERFLOW: u32 = 0x0010;
706708
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
707709
pub const _MM_EXCEPT_INEXACT: u32 = 0x0020;
710+
/// See [`_MM_GET_EXCEPTION_STATE`](fn._MM_GET_EXCEPTION_STATE.html)
708711
pub const _MM_EXCEPT_MASK: u32 = 0x003f;
709712

710713
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
@@ -719,6 +722,7 @@ pub const _MM_MASK_OVERFLOW: u32 = 0x0400;
719722
pub const _MM_MASK_UNDERFLOW: u32 = 0x0800;
720723
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
721724
pub const _MM_MASK_INEXACT: u32 = 0x1000;
725+
/// See [`_MM_GET_EXCEPTION_MASK`](fn._MM_GET_EXCEPTION_MASK.html)
722726
pub const _MM_MASK_MASK: u32 = 0x1f80;
723727

724728
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
@@ -729,56 +733,65 @@ pub const _MM_ROUND_DOWN: u32 = 0x2000;
729733
pub const _MM_ROUND_UP: u32 = 0x4000;
730734
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
731735
pub const _MM_ROUND_TOWARD_ZERO: u32 = 0x6000;
736+
/// See [`_MM_GET_ROUNDING_MODE`](fn._MM_GET_ROUNDING_MODE.html)
732737
pub const _MM_ROUND_MASK: u32 = 0x6000;
733738

739+
/// See [`_MM_GET_FLUSH_ZERO_MODE`](fn._MM_GET_FLUSH_ZERO_MODE.html)
734740
pub const _MM_FLUSH_ZERO_MASK: u32 = 0x8000;
735741
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
736742
pub const _MM_FLUSH_ZERO_ON: u32 = 0x8000;
737743
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
738744
pub const _MM_FLUSH_ZERO_OFF: u32 = 0x0000;
739745

746+
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
740747
#[inline(always)]
741748
#[allow(non_snake_case)]
742749
#[target_feature = "+sse"]
743750
pub unsafe fn _MM_GET_EXCEPTION_MASK() -> u32 {
744751
_mm_getcsr() & _MM_MASK_MASK
745752
}
746753

754+
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
747755
#[inline(always)]
748756
#[allow(non_snake_case)]
749757
#[target_feature = "+sse"]
750758
pub unsafe fn _MM_GET_EXCEPTION_STATE() -> u32 {
751759
_mm_getcsr() & _MM_EXCEPT_MASK
752760
}
753761

762+
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
754763
#[inline(always)]
755764
#[allow(non_snake_case)]
756765
#[target_feature = "+sse"]
757766
pub unsafe fn _MM_GET_FLUSH_ZERO_MODE() -> u32 {
758767
_mm_getcsr() & _MM_FLUSH_ZERO_MASK
759768
}
760769

770+
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
761771
#[inline(always)]
762772
#[allow(non_snake_case)]
763773
#[target_feature = "+sse"]
764774
pub unsafe fn _MM_GET_ROUNDING_MODE() -> u32 {
765775
_mm_getcsr() & _MM_ROUND_MASK
766776
}
767777

778+
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
768779
#[inline(always)]
769780
#[allow(non_snake_case)]
770781
#[target_feature = "+sse"]
771782
pub unsafe fn _MM_SET_EXCEPTION_MASK(x: u32) {
772783
_mm_setcsr((_mm_getcsr() & !_MM_MASK_MASK) | x)
773784
}
774785

786+
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
775787
#[inline(always)]
776788
#[allow(non_snake_case)]
777789
#[target_feature = "+sse"]
778790
pub unsafe fn _MM_SET_EXCEPTION_STATE(x: u32) {
779791
_mm_setcsr((_mm_getcsr() & !_MM_EXCEPT_MASK) | x)
780792
}
781793

794+
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
782795
#[inline(always)]
783796
#[allow(non_snake_case)]
784797
#[target_feature = "+sse"]
@@ -788,6 +801,7 @@ pub unsafe fn _MM_SET_FLUSH_ZERO_MODE(x: u32) {
788801
_mm_setcsr(val)
789802
}
790803

804+
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
791805
#[inline(always)]
792806
#[allow(non_snake_case)]
793807
#[target_feature = "+sse"]

0 commit comments

Comments
 (0)