@@ -259,6 +259,9 @@ unsafe fn test_simd() {
259259 test_mm_insert_epi16 ( ) ;
260260 test_mm_shuffle_epi8 ( ) ;
261261
262+ #[ cfg( not( jit) ) ]
263+ test_mm_cmpestri ( ) ;
264+
262265 test_mm256_shuffle_epi8 ( ) ;
263266 test_mm256_permute2x128_si256 ( ) ;
264267 test_mm256_permutevar8x32_epi32 ( ) ;
@@ -430,6 +433,31 @@ unsafe fn test_mm_shuffle_epi8() {
430433 assert_eq_m128i ( r, expected) ;
431434}
432435
436+ // Currently one cannot `load` a &[u8] that is less than 16
437+ // in length. This makes loading strings less than 16 in length
438+ // a bit difficult. Rather than `load` and mutate the __m128i,
439+ // it is easier to memcpy the given string to a local slice with
440+ // length 16 and `load` the local slice.
441+ #[ cfg( not( jit) ) ]
442+ #[ cfg( target_arch = "x86_64" ) ]
443+ #[ target_feature( enable = "sse4.2" ) ]
444+ unsafe fn str_to_m128i ( s : & [ u8 ] ) -> __m128i {
445+ assert ! ( s. len( ) <= 16 ) ;
446+ let slice = & mut [ 0u8 ; 16 ] ;
447+ std:: ptr:: copy_nonoverlapping ( s. as_ptr ( ) , slice. as_mut_ptr ( ) , s. len ( ) ) ;
448+ _mm_loadu_si128 ( slice. as_ptr ( ) as * const _ )
449+ }
450+
451+ #[ cfg( not( jit) ) ]
452+ #[ cfg( target_arch = "x86_64" ) ]
453+ #[ target_feature( enable = "sse4.2" ) ]
454+ unsafe fn test_mm_cmpestri ( ) {
455+ let a = str_to_m128i ( b"bar - garbage" ) ;
456+ let b = str_to_m128i ( b"foobar" ) ;
457+ let i = _mm_cmpestri :: < _SIDD_CMP_EQUAL_ORDERED > ( a, 3 , b, 6 ) ;
458+ assert_eq ! ( 3 , i) ;
459+ }
460+
433461#[ cfg( target_arch = "x86_64" ) ]
434462#[ target_feature( enable = "avx2" ) ]
435463unsafe fn test_mm256_shuffle_epi8 ( ) {
0 commit comments