22//! crate.
33//!
44//! The LLVM assembly language is documented here: <https://llvm.org/docs/LangRef.html>
5+ //!
6+ //! A quick glossary of jargon that may appear in this module, mostly paraphrasing LLVM's LangRef:
7+ //! - poison: "undefined behavior as a value". specifically, it is like uninit memory (such as padding bytes). it is "safe" to create poison, BUT
8+ //! poison MUST NOT be observed from safe code, as operations on poison return poison, like NaN. unlike NaN, which has defined comparisons,
9+ //! poison is neither true nor false, and LLVM may also convert it to undef (at which point it is both). so, it can't be conditioned on, either.
10+ //! - undef: "a value that is every value". functionally like poison, insofar as Rust is concerned. poison may become this. note:
11+ //! this means that division by poison or undef is like division by zero, which means it inflicts...
12+ //! - "UB": poison and undef cover most of what people call "UB". "UB" means this operation immediately invalidates the program:
13+ //! LLVM is allowed to lower it to `ud2` or other opcodes that may cause an illegal instruction exception, and this is the "good end".
14+ //! The "bad end" is that LLVM may reverse time to the moment control flow diverged on a path towards undefined behavior,
15+ //! and destroy the other branch, potentially deleting safe code and violating Rust's `unsafe` contract.
16+ //!
17+ //! Note that according to LLVM, vectors are not arrays, but they are equivalent when stored to and loaded from memory.
18+ //!
19+ //! Unless stated otherwise, all intrinsics for binary operations require SIMD vectors of equal types and lengths.
520
621/// These intrinsics aren't linked directly from LLVM and are mostly undocumented, however they are
7- /// simply lowered to the matching LLVM instructions by the compiler. The associated instruction
8- /// is documented alongside each intrinsic.
22+ /// mostly lowered to the matching LLVM instructions by the compiler in a fairly straightforward manner.
23+ /// The associated LLVM instruction or intrinsic is documented alongside each Rust intrinsic function .
924extern "platform-intrinsic" {
1025 /// add/fadd
1126 pub ( crate ) fn simd_add < T > ( x : T , y : T ) -> T ;
1227
1328 /// sub/fsub
14- pub ( crate ) fn simd_sub < T > ( x : T , y : T ) -> T ;
29+ pub ( crate ) fn simd_sub < T > ( lhs : T , rhs : T ) -> T ;
1530
1631 /// mul/fmul
1732 pub ( crate ) fn simd_mul < T > ( x : T , y : T ) -> T ;
@@ -20,19 +35,22 @@ extern "platform-intrinsic" {
2035 /// ints and uints: {s,u}div incur UB if division by zero occurs.
2136 /// ints: sdiv is UB for int::MIN / -1.
2237 /// floats: fdiv is never UB, but may create NaNs or infinities.
23- pub ( crate ) fn simd_div < T > ( x : T , y : T ) -> T ;
38+ pub ( crate ) fn simd_div < T > ( lhs : T , rhs : T ) -> T ;
2439
2540 /// urem/srem/frem
2641 /// ints and uints: {s,u}rem incur UB if division by zero occurs.
2742 /// ints: srem is UB for int::MIN / -1.
2843 /// floats: frem is equivalent to libm::fmod in the "default" floating point environment, sans errno.
29- pub ( crate ) fn simd_rem < T > ( x : T , y : T ) -> T ;
44+ pub ( crate ) fn simd_rem < T > ( lhs : T , rhs : T ) -> T ;
3045
3146 /// shl
32- pub ( crate ) fn simd_shl < T > ( x : T , y : T ) -> T ;
47+ /// for (u)ints. poison if rhs >= lhs::BITS
48+ pub ( crate ) fn simd_shl < T > ( lhs : T , rhs : T ) -> T ;
3349
34- /// lshr/ashr
35- pub ( crate ) fn simd_shr < T > ( x : T , y : T ) -> T ;
50+ /// ints: ashr
51+ /// uints: lshr
52+ /// poison if rhs >= lhs::BITS
53+ pub ( crate ) fn simd_shr < T > ( lhs : T , rhs : T ) -> T ;
3654
3755 /// and
3856 pub ( crate ) fn simd_and < T > ( x : T , y : T ) -> T ;
@@ -44,6 +62,9 @@ extern "platform-intrinsic" {
4462 pub ( crate ) fn simd_xor < T > ( x : T , y : T ) -> T ;
4563
4664 /// fptoui/fptosi/uitofp/sitofp
65+ /// casting floats to integers is truncating, so it is safe to convert values like e.g. 1.5
66+ /// but the truncated value must fit in the target type or the result is poison.
67+ /// use `simd_as` instead for a cast that performs a saturating conversion.
4768 pub ( crate ) fn simd_cast < T , U > ( x : T ) -> U ;
4869 /// follows Rust's `T as U` semantics, including saturating float casts
4970 /// which amounts to the same as `simd_cast` for many cases
@@ -63,6 +84,7 @@ extern "platform-intrinsic" {
6384 pub ( crate ) fn simd_fmin < T > ( x : T , y : T ) -> T ;
6485 pub ( crate ) fn simd_fmax < T > ( x : T , y : T ) -> T ;
6586
87+ // these return Simd<int, N> with the same BITS size as the inputs
6688 pub ( crate ) fn simd_eq < T , U > ( x : T , y : T ) -> U ;
6789 pub ( crate ) fn simd_ne < T , U > ( x : T , y : T ) -> U ;
6890 pub ( crate ) fn simd_lt < T , U > ( x : T , y : T ) -> U ;
@@ -71,19 +93,31 @@ extern "platform-intrinsic" {
7193 pub ( crate ) fn simd_ge < T , U > ( x : T , y : T ) -> U ;
7294
7395 // shufflevector
96+ // idx: LLVM calls it a "shuffle mask vector constant", a vector of i32s
7497 pub ( crate ) fn simd_shuffle < T , U , V > ( x : T , y : T , idx : U ) -> V ;
7598
99+ /// llvm.masked.gather
100+ /// like a loop of pointer reads
101+ /// val: vector of values to select if a lane is masked
102+ /// ptr: vector of pointers to read from
103+ /// mask: a "wide" mask of integers, selects as if simd_select(mask, read(ptr), val)
104+ /// note, the LLVM intrinsic accepts a mask vector of <N x i1>
105+ /// FIXME: review this if/when we fix up our mask story in general?
76106 pub ( crate ) fn simd_gather < T , U , V > ( val : T , ptr : U , mask : V ) -> T ;
107+ /// llvm.masked.scatter
108+ /// like gather, but more spicy, as it writes instead of reads
77109 pub ( crate ) fn simd_scatter < T , U , V > ( val : T , ptr : U , mask : V ) ;
78110
79111 // {s,u}add.sat
80112 pub ( crate ) fn simd_saturating_add < T > ( x : T , y : T ) -> T ;
81113
82114 // {s,u}sub.sat
83- pub ( crate ) fn simd_saturating_sub < T > ( x : T , y : T ) -> T ;
115+ pub ( crate ) fn simd_saturating_sub < T > ( lhs : T , rhs : T ) -> T ;
84116
85117 // reductions
118+ // llvm.vector.reduce.{add,fadd}
86119 pub ( crate ) fn simd_reduce_add_ordered < T , U > ( x : T , y : U ) -> U ;
120+ // llvm.vector.reduce.{mul,fmul}
87121 pub ( crate ) fn simd_reduce_mul_ordered < T , U > ( x : T , y : U ) -> U ;
88122 #[ allow( unused) ]
89123 pub ( crate ) fn simd_reduce_all < T > ( x : T ) -> bool ;
@@ -100,7 +134,10 @@ extern "platform-intrinsic" {
100134 pub ( crate ) fn simd_bitmask < T , U > ( x : T ) -> U ;
101135
102136 // select
103- pub ( crate ) fn simd_select < M , T > ( m : M , a : T , b : T ) -> T ;
137+ // first argument is a vector of integers, -1 (all bits 1) is "true"
138+ // logically equivalent to (yes & m) | (no & (m^-1),
139+ // but you can use it on floats.
140+ pub ( crate ) fn simd_select < M , T > ( m : M , yes : T , no : T ) -> T ;
104141 #[ allow( unused) ]
105- pub ( crate ) fn simd_select_bitmask < M , T > ( m : M , a : T , b : T ) -> T ;
142+ pub ( crate ) fn simd_select_bitmask < M , T > ( m : M , yes : T , no : T ) -> T ;
106143}
0 commit comments