@@ -14,6 +14,7 @@ Written by Philip Hazel, October 2016
1414#include <stdlib.h>
1515#include <string.h>
1616
17+ #include "config.h"
1718#define PCRE2_CODE_UNIT_WIDTH 8
1819#include "pcre2.h"
1920
@@ -36,6 +37,148 @@ Written by Philip Hazel, October 2016
3637 PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \
3738 PCRE2_PARTIAL_SOFT)
3839
40+ static void print_compile_options (FILE * stream , uint32_t compile_options )
41+ {
42+ fprintf (stream , "Compile options %.8x never_backslash_c" , compile_options );
43+ fprintf (stream , "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n" ,
44+ ((compile_options & PCRE2_ALT_BSUX ) != 0 )? ",alt_bsux" : "" ,
45+ ((compile_options & PCRE2_ALT_CIRCUMFLEX ) != 0 )? ",alt_circumflex" : "" ,
46+ ((compile_options & PCRE2_ALT_VERBNAMES ) != 0 )? ",alt_verbnames" : "" ,
47+ ((compile_options & PCRE2_ALLOW_EMPTY_CLASS ) != 0 )? ",allow_empty_class" : "" ,
48+ ((compile_options & PCRE2_ANCHORED ) != 0 )? ",anchored" : "" ,
49+ ((compile_options & PCRE2_AUTO_CALLOUT ) != 0 )? ",auto_callout" : "" ,
50+ ((compile_options & PCRE2_CASELESS ) != 0 )? ",caseless" : "" ,
51+ ((compile_options & PCRE2_DOLLAR_ENDONLY ) != 0 )? ",dollar_endonly" : "" ,
52+ ((compile_options & PCRE2_DOTALL ) != 0 )? ",dotall" : "" ,
53+ ((compile_options & PCRE2_DUPNAMES ) != 0 )? ",dupnames" : "" ,
54+ ((compile_options & PCRE2_ENDANCHORED ) != 0 )? ",endanchored" : "" ,
55+ ((compile_options & PCRE2_EXTENDED ) != 0 )? ",extended" : "" ,
56+ ((compile_options & PCRE2_FIRSTLINE ) != 0 )? ",firstline" : "" ,
57+ ((compile_options & PCRE2_MATCH_UNSET_BACKREF ) != 0 )? ",match_unset_backref" : "" ,
58+ ((compile_options & PCRE2_MULTILINE ) != 0 )? ",multiline" : "" ,
59+ ((compile_options & PCRE2_NEVER_UCP ) != 0 )? ",never_ucp" : "" ,
60+ ((compile_options & PCRE2_NEVER_UTF ) != 0 )? ",never_utf" : "" ,
61+ ((compile_options & PCRE2_NO_AUTO_CAPTURE ) != 0 )? ",no_auto_capture" : "" ,
62+ ((compile_options & PCRE2_NO_AUTO_POSSESS ) != 0 )? ",no_auto_possess" : "" ,
63+ ((compile_options & PCRE2_NO_DOTSTAR_ANCHOR ) != 0 )? ",no_dotstar_anchor" : "" ,
64+ ((compile_options & PCRE2_NO_UTF_CHECK ) != 0 )? ",no_utf_check" : "" ,
65+ ((compile_options & PCRE2_NO_START_OPTIMIZE ) != 0 )? ",no_start_optimize" : "" ,
66+ ((compile_options & PCRE2_UCP ) != 0 )? ",ucp" : "" ,
67+ ((compile_options & PCRE2_UNGREEDY ) != 0 )? ",ungreedy" : "" ,
68+ ((compile_options & PCRE2_USE_OFFSET_LIMIT ) != 0 )? ",use_offset_limit" : "" ,
69+ ((compile_options & PCRE2_UTF ) != 0 )? ",utf" : "" );
70+ }
71+
72+ static void print_match_options (FILE * stream , uint32_t match_options )
73+ {
74+ fprintf (stream , "Match options %.8x" , match_options );
75+ fprintf (stream , "%s%s%s%s%s%s%s%s%s\n" ,
76+ ((match_options & PCRE2_ANCHORED ) != 0 )? ",anchored" : "" ,
77+ ((match_options & PCRE2_ENDANCHORED ) != 0 )? ",endanchored" : "" ,
78+ ((match_options & PCRE2_NO_UTF_CHECK ) != 0 )? ",no_utf_check" : "" ,
79+ ((match_options & PCRE2_NOTBOL ) != 0 )? ",notbol" : "" ,
80+ ((match_options & PCRE2_NOTEMPTY ) != 0 )? ",notempty" : "" ,
81+ ((match_options & PCRE2_NOTEMPTY_ATSTART ) != 0 )? ",notempty_atstart" : "" ,
82+ ((match_options & PCRE2_NOTEOL ) != 0 )? ",noteol" : "" ,
83+ ((match_options & PCRE2_PARTIAL_HARD ) != 0 )? ",partial_hard" : "" ,
84+ ((match_options & PCRE2_PARTIAL_SOFT ) != 0 )? ",partial_soft" : "" );
85+ }
86+
87+ static void dump_matches (FILE * stream , pcre2_match_data * match_data , pcre2_match_context * match_context )
88+ {
89+ PCRE2_UCHAR error_buf [256 ];
90+ int errorcode ;
91+ uint32_t ovector_count = pcre2_get_ovector_count (match_data );
92+
93+ for (uint32_t ovector = ovector_count ; ovector < ovector_count ; ovector ++ )
94+ {
95+ PCRE2_UCHAR * bufferptr = NULL ;
96+ PCRE2_SIZE bufflen = 0 ;
97+
98+ errorcode = pcre2_substring_get_bynumber (match_data , ovector , & bufferptr , & bufflen );
99+
100+ if (errorcode >= 0 )
101+ {
102+ fprintf (stream , "Match %d (hex encoded): " , ovector );
103+ for (PCRE2_SIZE i = 0 ; i < bufflen ; i ++ )
104+ {
105+ fprintf (stderr , "%02x" , bufferptr [i ]);
106+ }
107+ fprintf (stderr , "\n" );
108+ }
109+ else
110+ {
111+ pcre2_get_error_message (errorcode , error_buf , 256 );
112+ fprintf (stream , "Match %d failed: %s\n" , ovector , error_buf );
113+ }
114+ }
115+ }
116+
117+ /* This function describes the current test case being evaluated, then aborts */
118+
119+ #ifdef SUPPORT_JIT
120+ static void describe_failure (
121+ const char * task ,
122+ const unsigned char * data ,
123+ size_t size ,
124+ uint32_t compile_options ,
125+ uint32_t match_options ,
126+ int errorcode ,
127+ pcre2_match_data * match_data ,
128+ int errorcode_jit ,
129+ pcre2_match_data * match_data_jit ,
130+ pcre2_match_context * match_context
131+ ) {
132+ PCRE2_UCHAR buffer [256 ];
133+
134+ fprintf (stderr , "Encountered failure while performing %s; context:\n" , task );
135+
136+ fprintf (stderr , "Pattern/sample string (hex encoded): " );
137+ for (size_t i = 0 ; i < size ; i ++ )
138+ {
139+ fprintf (stderr , "%02x" , data [i ]);
140+ }
141+ fprintf (stderr , "\n" );
142+
143+ print_compile_options (stderr , compile_options );
144+ print_match_options (stderr , match_options );
145+
146+ if (errorcode < 0 )
147+ {
148+ pcre2_get_error_message (errorcode , buffer , 256 );
149+ fprintf (stderr , "Non-JIT'd operation emitted an error: %s\n" , buffer );
150+ }
151+ else
152+ {
153+ fprintf (stderr , "Non-JIT'd operation did not emit an error.\n" );
154+ if (match_data != NULL )
155+ {
156+ fprintf (stderr , "%d matches discovered by non-JIT'd regex:\n" , pcre2_get_ovector_count (match_data ));
157+ dump_matches (stderr , match_data , match_context );
158+ fprintf (stderr , "\n" );
159+ }
160+ }
161+
162+ if (errorcode_jit < 0 )
163+ {
164+ pcre2_get_error_message (errorcode_jit , buffer , 256 );
165+ fprintf (stderr , "JIT'd operation emitted an error: %s\n" , buffer );
166+ }
167+ else
168+ {
169+ fprintf (stderr , "JIT'd operation did not emit an error.\n" );
170+ if (match_data_jit != NULL )
171+ {
172+ fprintf (stderr , "%d matches discovered by JIT'd regex:\n" , pcre2_get_ovector_count (match_data_jit ));
173+ dump_matches (stderr , match_data_jit , match_context );
174+ fprintf (stderr , "\n" );
175+ }
176+ }
177+
178+ abort ();
179+ }
180+ #endif
181+
39182/* This is the callout function. Its only purpose is to halt matching if there
40183are more than 100 callouts, as one way of stopping too much time being spent on
41184fruitless matches. The callout data is a pointer to the counter. */
@@ -110,34 +253,7 @@ for (i = 0; i < 2; i++)
110253 pcre2_code * code ;
111254
112255#ifdef STANDALONE
113- printf ("Compile options %.8x never_backslash_c" , compile_options );
114- printf ("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n" ,
115- ((compile_options & PCRE2_ALT_BSUX ) != 0 )? ",alt_bsux" : "" ,
116- ((compile_options & PCRE2_ALT_CIRCUMFLEX ) != 0 )? ",alt_circumflex" : "" ,
117- ((compile_options & PCRE2_ALT_VERBNAMES ) != 0 )? ",alt_verbnames" : "" ,
118- ((compile_options & PCRE2_ALLOW_EMPTY_CLASS ) != 0 )? ",allow_empty_class" : "" ,
119- ((compile_options & PCRE2_ANCHORED ) != 0 )? ",anchored" : "" ,
120- ((compile_options & PCRE2_AUTO_CALLOUT ) != 0 )? ",auto_callout" : "" ,
121- ((compile_options & PCRE2_CASELESS ) != 0 )? ",caseless" : "" ,
122- ((compile_options & PCRE2_DOLLAR_ENDONLY ) != 0 )? ",dollar_endonly" : "" ,
123- ((compile_options & PCRE2_DOTALL ) != 0 )? ",dotall" : "" ,
124- ((compile_options & PCRE2_DUPNAMES ) != 0 )? ",dupnames" : "" ,
125- ((compile_options & PCRE2_ENDANCHORED ) != 0 )? ",endanchored" : "" ,
126- ((compile_options & PCRE2_EXTENDED ) != 0 )? ",extended" : "" ,
127- ((compile_options & PCRE2_FIRSTLINE ) != 0 )? ",firstline" : "" ,
128- ((compile_options & PCRE2_MATCH_UNSET_BACKREF ) != 0 )? ",match_unset_backref" : "" ,
129- ((compile_options & PCRE2_MULTILINE ) != 0 )? ",multiline" : "" ,
130- ((compile_options & PCRE2_NEVER_UCP ) != 0 )? ",never_ucp" : "" ,
131- ((compile_options & PCRE2_NEVER_UTF ) != 0 )? ",never_utf" : "" ,
132- ((compile_options & PCRE2_NO_AUTO_CAPTURE ) != 0 )? ",no_auto_capture" : "" ,
133- ((compile_options & PCRE2_NO_AUTO_POSSESS ) != 0 )? ",no_auto_possess" : "" ,
134- ((compile_options & PCRE2_NO_DOTSTAR_ANCHOR ) != 0 )? ",no_dotstar_anchor" : "" ,
135- ((compile_options & PCRE2_NO_UTF_CHECK ) != 0 )? ",no_utf_check" : "" ,
136- ((compile_options & PCRE2_NO_START_OPTIMIZE ) != 0 )? ",no_start_optimize" : "" ,
137- ((compile_options & PCRE2_UCP ) != 0 )? ",ucp" : "" ,
138- ((compile_options & PCRE2_UNGREEDY ) != 0 )? ",ungreedy" : "" ,
139- ((compile_options & PCRE2_USE_OFFSET_LIMIT ) != 0 )? ",use_offset_limit" : "" ,
140- ((compile_options & PCRE2_UTF ) != 0 )? ",utf" : "" );
256+ print_compile_options (stdout , compile_options );
141257#endif
142258
143259 code = pcre2_compile ((PCRE2_SPTR )data , (PCRE2_SIZE )size , compile_options ,
@@ -169,7 +285,7 @@ for (i = 0; i < 2; i++)
169285#endif
170286 {
171287#ifdef STANDALONE
172- printf ( "** Failed to create match data block\n" );
288+ fprintf ( stderr , "** Failed to create match data block\n" );
173289#endif
174290 abort ();
175291 }
@@ -181,7 +297,7 @@ for (i = 0; i < 2; i++)
181297 if (match_context == NULL )
182298 {
183299#ifdef STANDALONE
184- printf ( "** Failed to create match context block\n" );
300+ fprintf ( stderr , "** Failed to create match context block\n" );
185301#endif
186302 abort ();
187303 }
@@ -195,18 +311,7 @@ for (i = 0; i < 2; i++)
195311 for (j = 0 ; j < 2 ; j ++ )
196312 {
197313#ifdef STANDALONE
198- printf ("Match options %.8x" , match_options );
199- printf ("%s%s%s%s%s%s%s%s%s%s\n" ,
200- ((match_options & PCRE2_ANCHORED ) != 0 )? ",anchored" : "" ,
201- ((match_options & PCRE2_ENDANCHORED ) != 0 )? ",endanchored" : "" ,
202- ((match_options & PCRE2_NO_JIT ) != 0 )? ",no_jit" : "" ,
203- ((match_options & PCRE2_NO_UTF_CHECK ) != 0 )? ",no_utf_check" : "" ,
204- ((match_options & PCRE2_NOTBOL ) != 0 )? ",notbol" : "" ,
205- ((match_options & PCRE2_NOTEMPTY ) != 0 )? ",notempty" : "" ,
206- ((match_options & PCRE2_NOTEMPTY_ATSTART ) != 0 )? ",notempty_atstart" : "" ,
207- ((match_options & PCRE2_NOTEOL ) != 0 )? ",noteol" : "" ,
208- ((match_options & PCRE2_PARTIAL_HARD ) != 0 )? ",partial_hard" : "" ,
209- ((match_options & PCRE2_PARTIAL_SOFT ) != 0 )? ",partial_soft" : "" );
314+ print_match_options (stdout , match_options );
210315#endif
211316
212317 callout_count = 0 ;
@@ -231,16 +336,14 @@ for (i = 0; i < 2; i++)
231336
232337 if (errorcode_jit != errorcode )
233338 {
234- printf ("JIT errorcode %d did not match original errorcode %d\n" , errorcode_jit , errorcode );
235- abort ();
339+ describe_failure ("match errorcode comparison" , data , size , compile_options , match_options , errorcode , match_data , errorcode_jit , match_data_jit , match_context );
236340 }
237341
238342 ovector_count = pcre2_get_ovector_count (match_data );
239343
240344 if (ovector_count != pcre2_get_ovector_count (match_data_jit ))
241345 {
242- puts ("JIT ovector count did not match original" );
243- abort ();
346+ describe_failure ("ovector count comparison" , data , size , compile_options , match_options , errorcode , match_data , errorcode_jit , match_data_jit , match_context );
244347 }
245348
246349 for (uint32_t ovector = 0 ; ovector < ovector_count ; ovector ++ )
@@ -256,22 +359,19 @@ for (i = 0; i < 2; i++)
256359
257360 if (errorcode != errorcode_jit )
258361 {
259- printf ("when extracting substring, JIT errorcode %d did not match original %d\n" , errorcode_jit , errorcode );
260- abort ();
362+ describe_failure ("ovector entry errorcode comparison" , data , size , compile_options , match_options , errorcode , match_data , errorcode_jit , match_data_jit , match_context );
261363 }
262364
263365 if (errorcode >= 0 )
264366 {
265367 if (bufflen != bufflen_jit )
266368 {
267- printf ("when extracting substring, JIT buffer length %zu did not match original %zu\n" , bufflen_jit , bufflen );
268- abort ();
369+ describe_failure ("ovector entry length comparison" , data , size , compile_options , match_options , errorcode , match_data , errorcode_jit , match_data_jit , match_context );
269370 }
270371
271372 if (memcmp (bufferptr , bufferptr_jit , bufflen ) != 0 )
272373 {
273- puts ("when extracting substring, JIT buffer contents did not match original" );
274- abort ();
374+ describe_failure ("ovector entry content comparison" , data , size , compile_options , match_options , errorcode , match_data , errorcode_jit , match_data_jit , match_context );
275375 }
276376 }
277377
0 commit comments