|
1 | 1 | //! Masks that take up full SIMD vector registers. |
2 | 2 |
|
3 | | -use super::{to_bitmask::ToBitMaskArray, MaskElement}; |
4 | 3 | use crate::simd::intrinsics; |
5 | | -use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask}; |
| 4 | +use crate::simd::{LaneCount, MaskElement, Simd, SupportedLaneCount}; |
6 | 5 |
|
7 | 6 | #[repr(transparent)] |
8 | 7 | pub struct Mask<T, const N: usize>(Simd<T, N>) |
@@ -143,95 +142,105 @@ where |
143 | 142 | } |
144 | 143 |
|
145 | 144 | #[inline] |
146 | | - #[must_use = "method returns a new array and does not mutate the original value"] |
147 | | - pub fn to_bitmask_array<const M: usize>(self) -> [u8; M] |
148 | | - where |
149 | | - super::Mask<T, N>: ToBitMaskArray, |
150 | | - { |
| 145 | + #[must_use = "method returns a new vector and does not mutate the original value"] |
| 146 | + pub fn to_bitmask_vector(self) -> Simd<T, N> { |
| 147 | + let mut bitmask = Self::splat(false).to_int(); |
| 148 | + |
151 | 149 | // Safety: Bytes is the right size array |
152 | 150 | unsafe { |
153 | 151 | // Compute the bitmask |
154 | | - let bitmask: <super::Mask<T, N> as ToBitMaskArray>::BitMaskArray = |
| 152 | + let mut bytes: <LaneCount<N> as SupportedLaneCount>::BitMask = |
155 | 153 | intrinsics::simd_bitmask(self.0); |
156 | 154 |
|
157 | | - // Transmute to the return type |
158 | | - let mut bitmask: [u8; M] = core::mem::transmute_copy(&bitmask); |
159 | | - |
160 | 155 | // LLVM assumes bit order should match endianness |
161 | 156 | if cfg!(target_endian = "big") { |
162 | | - for x in bitmask.as_mut() { |
163 | | - *x = x.reverse_bits(); |
| 157 | + for x in bytes.as_mut() { |
| 158 | + *x = x.reverse_bits() |
164 | 159 | } |
165 | | - }; |
| 160 | + } |
166 | 161 |
|
167 | | - bitmask |
| 162 | + assert!( |
| 163 | + core::mem::size_of::<Simd<T, N>>() |
| 164 | + >= core::mem::size_of::<<LaneCount<N> as SupportedLaneCount>::BitMask>() |
| 165 | + ); |
| 166 | + core::ptr::copy_nonoverlapping( |
| 167 | + bytes.as_ref().as_ptr(), |
| 168 | + bitmask.as_mut_array().as_mut_ptr() as _, |
| 169 | + bytes.as_ref().len(), |
| 170 | + ); |
168 | 171 | } |
| 172 | + |
| 173 | + bitmask |
169 | 174 | } |
170 | 175 |
|
171 | 176 | #[inline] |
172 | 177 | #[must_use = "method returns a new mask and does not mutate the original value"] |
173 | | - pub fn from_bitmask_array<const M: usize>(mut bitmask: [u8; M]) -> Self |
174 | | - where |
175 | | - super::Mask<T, N>: ToBitMaskArray, |
176 | | - { |
| 178 | + pub fn from_bitmask_vector(bitmask: Simd<T, N>) -> Self { |
| 179 | + let mut bytes = <LaneCount<N> as SupportedLaneCount>::BitMask::default(); |
| 180 | + |
177 | 181 | // Safety: Bytes is the right size array |
178 | 182 | unsafe { |
| 183 | + assert!( |
| 184 | + core::mem::size_of::<Simd<T, N>>() |
| 185 | + >= core::mem::size_of::<<LaneCount<N> as SupportedLaneCount>::BitMask>() |
| 186 | + ); |
| 187 | + core::ptr::copy_nonoverlapping( |
| 188 | + bitmask.as_array().as_ptr() as _, |
| 189 | + bytes.as_mut().as_mut_ptr(), |
| 190 | + bytes.as_mut().len(), |
| 191 | + ); |
| 192 | + |
179 | 193 | // LLVM assumes bit order should match endianness |
180 | 194 | if cfg!(target_endian = "big") { |
181 | | - for x in bitmask.as_mut() { |
| 195 | + for x in bytes.as_mut() { |
182 | 196 | *x = x.reverse_bits(); |
183 | 197 | } |
184 | 198 | } |
185 | 199 |
|
186 | | - // Transmute to the bitmask |
187 | | - let bitmask: <super::Mask<T, N> as ToBitMaskArray>::BitMaskArray = |
188 | | - core::mem::transmute_copy(&bitmask); |
189 | | - |
190 | 200 | // Compute the regular mask |
191 | 201 | Self::from_int_unchecked(intrinsics::simd_select_bitmask( |
192 | | - bitmask, |
| 202 | + bytes, |
193 | 203 | Self::splat(true).to_int(), |
194 | 204 | Self::splat(false).to_int(), |
195 | 205 | )) |
196 | 206 | } |
197 | 207 | } |
198 | 208 |
|
199 | 209 | #[inline] |
200 | | - pub(crate) fn to_bitmask_integer<U: ReverseBits>(self) -> U |
201 | | - where |
202 | | - super::Mask<T, N>: ToBitMask<BitMask = U>, |
203 | | - { |
204 | | - // Safety: U is required to be the appropriate bitmask type |
205 | | - let bitmask: U = unsafe { intrinsics::simd_bitmask(self.0) }; |
| 210 | + pub(crate) fn to_bitmask_integer(self) -> u64 { |
| 211 | + let resized = self.to_int().extend::<64>(T::FALSE); |
| 212 | + |
| 213 | + // SAFETY: `resized` is an integer vector with length 64 |
| 214 | + let bitmask: u64 = unsafe { intrinsics::simd_bitmask(resized) }; |
206 | 215 |
|
207 | 216 | // LLVM assumes bit order should match endianness |
208 | 217 | if cfg!(target_endian = "big") { |
209 | | - bitmask.reverse_bits(N) |
| 218 | + bitmask.reverse_bits() |
210 | 219 | } else { |
211 | 220 | bitmask |
212 | 221 | } |
213 | 222 | } |
214 | 223 |
|
215 | 224 | #[inline] |
216 | | - pub(crate) fn from_bitmask_integer<U: ReverseBits>(bitmask: U) -> Self |
217 | | - where |
218 | | - super::Mask<T, N>: ToBitMask<BitMask = U>, |
219 | | - { |
| 225 | + pub(crate) fn from_bitmask_integer(bitmask: u64) -> Self { |
220 | 226 | // LLVM assumes bit order should match endianness |
221 | 227 | let bitmask = if cfg!(target_endian = "big") { |
222 | | - bitmask.reverse_bits(N) |
| 228 | + bitmask.reverse_bits() |
223 | 229 | } else { |
224 | 230 | bitmask |
225 | 231 | }; |
226 | 232 |
|
227 | | - // Safety: U is required to be the appropriate bitmask type |
228 | | - unsafe { |
229 | | - Self::from_int_unchecked(intrinsics::simd_select_bitmask( |
| 233 | + // SAFETY: `mask` is the correct bitmask type for a u64 bitmask |
| 234 | + let mask: Simd<T, 64> = unsafe { |
| 235 | + intrinsics::simd_select_bitmask( |
230 | 236 | bitmask, |
231 | | - Self::splat(true).to_int(), |
232 | | - Self::splat(false).to_int(), |
233 | | - )) |
234 | | - } |
| 237 | + Simd::<T, 64>::splat(T::TRUE), |
| 238 | + Simd::<T, 64>::splat(T::FALSE), |
| 239 | + ) |
| 240 | + }; |
| 241 | + |
| 242 | + // SAFETY: `mask` only contains `T::TRUE` or `T::FALSE` |
| 243 | + unsafe { Self::from_int_unchecked(mask.extend::<N>(T::FALSE)) } |
235 | 244 | } |
236 | 245 |
|
237 | 246 | #[inline] |
|
0 commit comments