@@ -21,7 +21,7 @@ enum Step {
2121 Done
2222}
2323
24- use Step :: * ;
24+ use self :: Step :: * ;
2525
2626impl From < SearchStep > for Step {
2727 fn from ( x : SearchStep ) -> Self {
@@ -42,6 +42,12 @@ impl From<Option<(usize, usize)>> for Step {
4242 }
4343}
4444
45+ // XXXManishearth these tests focus on single-character searching (CharSearcher)
46+ // and on next()/next_match(), not next_reject(). This is because
47+ // the memchr changes make next_match() for single chars complex, but next_reject()
48+ // continues to use next() under the hood. We should add more test cases for all
49+ // of these, as well as tests for StrSearcher and higher level tests for str::find() (etc)
50+
4551#[ test]
4652fn test_simple_iteration ( ) {
4753 search_asserts ! ( "abcdeabcd" , 'a' , "forward iteration for ASCII string" ,
@@ -98,3 +104,149 @@ fn test_simple_search() {
98104 ) ;
99105}
100106
107+ // Á, 각, ก, 😀 all end in 0x81
108+ // 🁀, ᘀ do not end in 0x81 but contain the byte
109+ // ꁁ has 0x81 as its second and third bytes.
110+ //
111+ // The memchr-using implementation of next_match
112+ // and next_match_back temporarily violate
113+ // the property that the search is always on a unicode boundary,
114+ // which is fine as long as this never reaches next() or next_back().
115+ // So we test if next() is correct after each next_match() as well.
116+ const STRESS : & str = "Áa🁀bÁꁁfg😁각กᘀ각aÁ각ꁁก😁a" ;
117+
118+ #[ test]
119+ fn test_stress_indices ( ) {
120+ // this isn't really a test, more of documentation on the indices of each character in the stresstest string
121+
122+ search_asserts ! ( STRESS , 'x' , "Indices of characters in stress test" ,
123+ [ next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next] ,
124+ [ Rejects ( 0 , 2 ) , // Á
125+ Rejects ( 2 , 3 ) , // a
126+ Rejects ( 3 , 7 ) , // 🁀
127+ Rejects ( 7 , 8 ) , // b
128+ Rejects ( 8 , 10 ) , // Á
129+ Rejects ( 10 , 13 ) , // ꁁ
130+ Rejects ( 13 , 14 ) , // f
131+ Rejects ( 14 , 15 ) , // g
132+ Rejects ( 15 , 19 ) , // 😀
133+ Rejects ( 19 , 22 ) , // 각
134+ Rejects ( 22 , 25 ) , // ก
135+ Rejects ( 25 , 28 ) , // ᘀ
136+ Rejects ( 28 , 31 ) , // 각
137+ Rejects ( 31 , 32 ) , // a
138+ Rejects ( 32 , 34 ) , // Á
139+ Rejects ( 34 , 37 ) , // 각
140+ Rejects ( 37 , 40 ) , // ꁁ
141+ Rejects ( 40 , 43 ) , // ก
142+ Rejects ( 43 , 47 ) , // 😀
143+ Rejects ( 47 , 48 ) , // a
144+ Done ]
145+ ) ;
146+ }
147+
148+ #[ test]
149+ fn test_forward_search_shared_bytes ( ) {
150+ search_asserts ! ( STRESS , 'Á' , "Forward search for two-byte Latin character" ,
151+ [ next_match, next_match, next_match, next_match] ,
152+ [ InRange ( 0 , 2 ) , InRange ( 8 , 10 ) , InRange ( 32 , 34 ) , Done ]
153+ ) ;
154+
155+ search_asserts ! ( STRESS , 'Á' , "Forward search for two-byte Latin character; check if next() still works" ,
156+ [ next_match, next, next_match, next, next_match, next, next_match] ,
157+ [ InRange ( 0 , 2 ) , Rejects ( 2 , 3 ) , InRange ( 8 , 10 ) , Rejects ( 10 , 13 ) , InRange ( 32 , 34 ) , Rejects ( 34 , 37 ) , Done ]
158+ ) ;
159+
160+ search_asserts ! ( STRESS , '각' , "Forward search for three-byte Hangul character" ,
161+ [ next_match, next, next_match, next_match, next_match] ,
162+ [ InRange ( 19 , 22 ) , Rejects ( 22 , 25 ) , InRange ( 28 , 31 ) , InRange ( 34 , 37 ) , Done ]
163+ ) ;
164+
165+ search_asserts ! ( STRESS , '각' , "Forward search for three-byte Hangul character; check if next() still works" ,
166+ [ next_match, next, next_match, next, next_match, next, next_match] ,
167+ [ InRange ( 19 , 22 ) , Rejects ( 22 , 25 ) , InRange ( 28 , 31 ) , Rejects ( 31 , 32 ) , InRange ( 34 , 37 ) , Rejects ( 37 , 40 ) , Done ]
168+ ) ;
169+
170+ search_asserts ! ( STRESS , 'ก' , "Forward search for three-byte Thai character" ,
171+ [ next_match, next, next_match, next, next_match] ,
172+ [ InRange ( 22 , 25 ) , Rejects ( 25 , 28 ) , InRange ( 40 , 43 ) , Rejects ( 43 , 47 ) , Done ]
173+ ) ;
174+
175+ search_asserts ! ( STRESS , 'ก' , "Forward search for three-byte Thai character; check if next() still works" ,
176+ [ next_match, next, next_match, next, next_match] ,
177+ [ InRange ( 22 , 25 ) , Rejects ( 25 , 28 ) , InRange ( 40 , 43 ) , Rejects ( 43 , 47 ) , Done ]
178+ ) ;
179+
180+ search_asserts ! ( STRESS , '😁' , "Forward search for four-byte emoji" ,
181+ [ next_match, next, next_match, next, next_match] ,
182+ [ InRange ( 15 , 19 ) , Rejects ( 19 , 22 ) , InRange ( 43 , 47 ) , Rejects ( 47 , 48 ) , Done ]
183+ ) ;
184+
185+ search_asserts ! ( STRESS , '😁' , "Forward search for four-byte emoji; check if next() still works" ,
186+ [ next_match, next, next_match, next, next_match] ,
187+ [ InRange ( 15 , 19 ) , Rejects ( 19 , 22 ) , InRange ( 43 , 47 ) , Rejects ( 47 , 48 ) , Done ]
188+ ) ;
189+
190+ search_asserts ! ( STRESS , 'ꁁ' , "Forward search for three-byte Yi character with repeated bytes" ,
191+ [ next_match, next, next_match, next, next_match] ,
192+ [ InRange ( 10 , 13 ) , Rejects ( 13 , 14 ) , InRange ( 37 , 40 ) , Rejects ( 40 , 43 ) , Done ]
193+ ) ;
194+
195+ search_asserts ! ( STRESS , 'ꁁ' , "Forward search for three-byte Yi character with repeated bytes; check if next() still works" ,
196+ [ next_match, next, next_match, next, next_match] ,
197+ [ InRange ( 10 , 13 ) , Rejects ( 13 , 14 ) , InRange ( 37 , 40 ) , Rejects ( 40 , 43 ) , Done ]
198+ ) ;
199+ }
200+
201+ #[ test]
202+ fn test_reverse_search_shared_bytes ( ) {
203+ search_asserts ! ( STRESS , 'Á' , "Reverse search for two-byte Latin character" ,
204+ [ next_match_back, next_match_back, next_match_back, next_match_back] ,
205+ [ InRange ( 32 , 34 ) , InRange ( 8 , 10 ) , InRange ( 0 , 2 ) , Done ]
206+ ) ;
207+
208+ search_asserts ! ( STRESS , 'Á' , "Reverse search for two-byte Latin character; check if next_back() still works" ,
209+ [ next_match_back, next_back, next_match_back, next_back, next_match_back, next_back] ,
210+ [ InRange ( 32 , 34 ) , Rejects ( 31 , 32 ) , InRange ( 8 , 10 ) , Rejects ( 7 , 8 ) , InRange ( 0 , 2 ) , Done ]
211+ ) ;
212+
213+ search_asserts ! ( STRESS , '각' , "Reverse search for three-byte Hangul character" ,
214+ [ next_match_back, next_back, next_match_back, next_match_back, next_match_back] ,
215+ [ InRange ( 34 , 37 ) , Rejects ( 32 , 34 ) , InRange ( 28 , 31 ) , InRange ( 19 , 22 ) , Done ]
216+ ) ;
217+
218+ search_asserts ! ( STRESS , '각' , "Reverse search for three-byte Hangul character; check if next_back() still works" ,
219+ [ next_match_back, next_back, next_match_back, next_back, next_match_back, next_back, next_match_back] ,
220+ [ InRange ( 34 , 37 ) , Rejects ( 32 , 34 ) , InRange ( 28 , 31 ) , Rejects ( 25 , 28 ) , InRange ( 19 , 22 ) , Rejects ( 15 , 19 ) , Done ]
221+ ) ;
222+
223+ search_asserts ! ( STRESS , 'ก' , "Reverse search for three-byte Thai character" ,
224+ [ next_match_back, next_back, next_match_back, next_back, next_match_back] ,
225+ [ InRange ( 40 , 43 ) , Rejects ( 37 , 40 ) , InRange ( 22 , 25 ) , Rejects ( 19 , 22 ) , Done ]
226+ ) ;
227+
228+ search_asserts ! ( STRESS , 'ก' , "Reverse search for three-byte Thai character; check if next_back() still works" ,
229+ [ next_match_back, next_back, next_match_back, next_back, next_match_back] ,
230+ [ InRange ( 40 , 43 ) , Rejects ( 37 , 40 ) , InRange ( 22 , 25 ) , Rejects ( 19 , 22 ) , Done ]
231+ ) ;
232+
233+ search_asserts ! ( STRESS , '😁' , "Reverse search for four-byte emoji" ,
234+ [ next_match_back, next_back, next_match_back, next_back, next_match_back] ,
235+ [ InRange ( 43 , 47 ) , Rejects ( 40 , 43 ) , InRange ( 15 , 19 ) , Rejects ( 14 , 15 ) , Done ]
236+ ) ;
237+
238+ search_asserts ! ( STRESS , '😁' , "Reverse search for four-byte emoji; check if next_back() still works" ,
239+ [ next_match_back, next_back, next_match_back, next_back, next_match_back] ,
240+ [ InRange ( 43 , 47 ) , Rejects ( 40 , 43 ) , InRange ( 15 , 19 ) , Rejects ( 14 , 15 ) , Done ]
241+ ) ;
242+
243+ search_asserts ! ( STRESS , 'ꁁ' , "Reverse search for three-byte Yi character with repeated bytes" ,
244+ [ next_match_back, next_back, next_match_back, next_back, next_match_back] ,
245+ [ InRange ( 37 , 40 ) , Rejects ( 34 , 37 ) , InRange ( 10 , 13 ) , Rejects ( 8 , 10 ) , Done ]
246+ ) ;
247+
248+ search_asserts ! ( STRESS , 'ꁁ' , "Reverse search for three-byte Yi character with repeated bytes; check if next_back() still works" ,
249+ [ next_match_back, next_back, next_match_back, next_back, next_match_back] ,
250+ [ InRange ( 37 , 40 ) , Rejects ( 34 , 37 ) , InRange ( 10 , 13 ) , Rejects ( 8 , 10 ) , Done ]
251+ ) ;
252+ }
0 commit comments