@@ -144,13 +144,212 @@ static const mbfl_encoding *mbfl_encoding_ptr_list[] = {
144144 NULL
145145};
146146
147+ /* The following perfect hashing table was amended from gperf, and hashing code was generated using gperf.
148+ * The table was amended to refer to the table above such that it is lighter for the data cache.
149+ * Command used: gperf encodings.txt --readonly-tables --null-strings --ignore-case
150+ * The encodings.txt contains all the contents of the name fields of the mbfl_encoding_ptr_list table. */
151+
152+ static const int8_t mbfl_encoding_ptr_list_after_hashing [187 ] = {
153+ -1 , -1 , -1 ,
154+ 65 ,
155+ 23 ,
156+ 9 ,
157+ -1 ,
158+ 60 ,
159+ 36 ,
160+ -1 , -1 ,
161+ 58 ,
162+ 42 ,
163+ -1 , -1 ,
164+ 18 ,
165+ 27 ,
166+ 77 ,
167+ 26 ,
168+ 40 ,
169+ 72 ,
170+ 12 ,
171+ 10 ,
172+ 2 ,
173+ 31 ,
174+ -1 , -1 ,
175+ 75 ,
176+ 74 ,
177+ 33 ,
178+ 45 ,
179+ -1 ,
180+ 67 ,
181+ 13 ,
182+ -1 ,
183+ 51 ,
184+ 53 ,
185+ 11 ,
186+ 1 ,
187+ -1 ,
188+ 48 ,
189+ 56 ,
190+ -1 ,
191+ 38 ,
192+ 20 ,
193+ 46 ,
194+ 54 ,
195+ -1 ,
196+ 14 ,
197+ 24 ,
198+ 44 ,
199+ 39 ,
200+ 43 ,
201+ -1 ,
202+ 30 ,
203+ 49 ,
204+ 57 ,
205+ 76 ,
206+ -1 , -1 ,
207+ 68 ,
208+ 73 ,
209+ 7 ,
210+ 16 ,
211+ -1 ,
212+ 35 ,
213+ 66 ,
214+ -1 , -1 , -1 ,
215+ 47 ,
216+ 55 ,
217+ -1 , -1 , -1 ,
218+ 63 ,
219+ 15 ,
220+ 8 ,
221+ 17 ,
222+ -1 ,
223+ 21 ,
224+ 70 ,
225+ -1 ,
226+ 29 ,
227+ 5 ,
228+ 6 ,
229+ 61 ,
230+ -1 , -1 ,
231+ 71 ,
232+ 52 ,
233+ 3 ,
234+ 37 ,
235+ -1 , -1 ,
236+ 28 ,
237+ -1 , -1 , -1 ,
238+ 32 ,
239+ 50 ,
240+ 34 ,
241+ -1 , -1 , -1 ,
242+ 62 ,
243+ -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 ,
244+ 59 ,
245+ 0 ,
246+ -1 , -1 , -1 , -1 ,
247+ 22 ,
248+ -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 ,
249+ 25 ,
250+ 41 ,
251+ -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 ,
252+ 19 ,
253+ -1 , -1 , -1 ,
254+ 4 ,
255+ -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 ,
256+ 69 ,
257+ -1 , -1 , -1 , -1 ,
258+ 64 ,
259+ };
260+
261+ static unsigned int mbfl_name2encoding_perfect_hash (const char * str , size_t len )
262+ {
263+ static const unsigned char asso_values [] =
264+ {
265+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
266+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
267+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
268+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
269+ 187 , 187 , 187 , 187 , 187 , 0 , 187 , 187 , 5 , 20 ,
270+ 0 , 15 , 40 , 10 , 25 , 70 , 5 , 60 , 187 , 187 ,
271+ 187 , 187 , 187 , 187 , 187 , 75 , 5 , 0 , 20 , 5 ,
272+ 0 , 75 , 5 , 0 , 40 , 75 , 20 , 0 , 0 , 0 ,
273+ 35 , 45 , 50 , 0 , 75 , 0 , 187 , 0 , 187 , 187 ,
274+ 0 , 187 , 187 , 187 , 187 , 187 , 187 , 75 , 5 , 0 ,
275+ 20 , 5 , 0 , 75 , 5 , 0 , 40 , 75 , 20 , 0 ,
276+ 0 , 0 , 35 , 45 , 50 , 0 , 75 , 0 , 187 , 0 ,
277+ 187 , 187 , 0 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
278+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
279+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
280+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
281+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
282+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
283+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
284+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
285+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
286+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
287+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
288+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
289+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
290+ 187 , 187 , 187 , 187 , 187 , 187
291+ };
292+ unsigned int hval = len ;
293+
294+ switch (hval )
295+ {
296+ default :
297+ hval += asso_values [(unsigned char )str [6 ]];
298+ ZEND_FALLTHROUGH ;
299+ case 6 :
300+ hval += asso_values [(unsigned char )str [5 ]];
301+ ZEND_FALLTHROUGH ;
302+ case 5 :
303+ hval += asso_values [(unsigned char )str [4 ]];
304+ ZEND_FALLTHROUGH ;
305+ case 4 :
306+ case 3 :
307+ hval += asso_values [(unsigned char )str [2 ]];
308+ ZEND_FALLTHROUGH ;
309+ case 2 :
310+ case 1 :
311+ hval += asso_values [(unsigned char )str [0 ]];
312+ break ;
313+ }
314+ return hval + asso_values [(unsigned char )str [len - 1 ]];
315+ }
316+
317+ #define NAME_HASH_MIN_NAME_LENGTH 2
318+ #define NAME_HASH_MAX_NAME_LENGTH 23
319+
147320const mbfl_encoding * mbfl_name2encoding (const char * name )
148321{
149- const mbfl_encoding * * encoding ;
322+ const mbfl_encoding * const * encoding ;
150323
324+ /* Sanity check perfect hash for name.
325+ * Never enable this in production, this is only a development-time sanity check! */
326+ #if ZEND_DEBUG && 0
151327 for (encoding = mbfl_encoding_ptr_list ; * encoding ; encoding ++ ) {
152- if (strcasecmp ((* encoding )-> name , name ) == 0 ) {
153- return * encoding ;
328+ size_t name_length = strlen ((* encoding )-> name );
329+ if (!(name_length <= NAME_HASH_MAX_NAME_LENGTH && name_length >= NAME_HASH_MIN_NAME_LENGTH )) {
330+ fprintf (stderr , "name length is not satisfying bound check: %zu %s\n" , name_length , (* encoding )-> name );
331+ abort ();
332+ }
333+ unsigned int key = mbfl_name2encoding_perfect_hash ((* encoding )-> name , name_length );
334+ if (mbfl_encoding_ptr_list [mbfl_encoding_ptr_list_after_hashing [key ]] != * encoding ) {
335+ fprintf (stderr , "mbfl_name2encoding_perfect_hash: key %u %s mismatch\n" , key , (* encoding )-> name );
336+ abort ();
337+ }
338+ }
339+ #endif
340+
341+ /* Use perfect hash lookup for name */
342+ size_t name_len = strlen (name );
343+ if (name_len <= NAME_HASH_MAX_NAME_LENGTH && name_len >= NAME_HASH_MIN_NAME_LENGTH ) {
344+ unsigned int key = mbfl_name2encoding_perfect_hash (name , name_len );
345+ if (key <= 186 ) {
346+ int8_t offset = mbfl_encoding_ptr_list_after_hashing [key ];
347+ if (offset >= 0 ) {
348+ encoding = mbfl_encoding_ptr_list + offset ;
349+ if (strcasecmp ((* encoding )-> name , name ) == 0 ) {
350+ return * encoding ;
351+ }
352+ }
154353 }
155354 }
156355
0 commit comments