@@ -249,11 +249,9 @@ typedef struct TParser
249249 /* string and position information */
250250 char * str ; /* multibyte string */
251251 int lenstr ; /* length of mbstring */
252- #ifdef USE_WIDE_UPPER_LOWER
253252 wchar_t * wstr ; /* wide character string */
254253 pg_wchar * pgwstr ; /* wide character string for C-locale */
255254 bool usewide ;
256- #endif
257255
258256 /* State of parse */
259257 int charmaxlen ;
@@ -302,8 +300,6 @@ TParserInit(char *str, int len)
302300 prs -> str = str ;
303301 prs -> lenstr = len ;
304302
305- #ifdef USE_WIDE_UPPER_LOWER
306-
307303 /*
308304 * Use wide char code only when max encoding length > 1.
309305 */
@@ -331,7 +327,6 @@ TParserInit(char *str, int len)
331327 }
332328 else
333329 prs -> usewide = false;
334- #endif
335330
336331 prs -> state = newTParserPosition (NULL );
337332 prs -> state -> state = TPS_Base ;
@@ -368,15 +363,12 @@ TParserCopyInit(const TParser *orig)
368363 prs -> charmaxlen = orig -> charmaxlen ;
369364 prs -> str = orig -> str + orig -> state -> posbyte ;
370365 prs -> lenstr = orig -> lenstr - orig -> state -> posbyte ;
371-
372- #ifdef USE_WIDE_UPPER_LOWER
373366 prs -> usewide = orig -> usewide ;
374367
375368 if (orig -> pgwstr )
376369 prs -> pgwstr = orig -> pgwstr + orig -> state -> poschar ;
377370 if (orig -> wstr )
378371 prs -> wstr = orig -> wstr + orig -> state -> poschar ;
379- #endif
380372
381373 prs -> state = newTParserPosition (NULL );
382374 prs -> state -> state = TPS_Base ;
@@ -401,12 +393,10 @@ TParserClose(TParser *prs)
401393 prs -> state = ptr ;
402394 }
403395
404- #ifdef USE_WIDE_UPPER_LOWER
405396 if (prs -> wstr )
406397 pfree (prs -> wstr );
407398 if (prs -> pgwstr )
408399 pfree (prs -> pgwstr );
409- #endif
410400
411401#ifdef WPARSER_TRACE
412402 fprintf (stderr , "closing parser\n" );
@@ -445,96 +435,45 @@ TParserCopyClose(TParser *prs)
445435 * - if locale is C then we use pgwstr instead of wstr.
446436 */
447437
448- #ifdef USE_WIDE_UPPER_LOWER
449-
450- #define p_iswhat (type ) \
438+ #define p_iswhat (type , nonascii ) \
439+ \
451440static int \
452- p_is##type(TParser *prs) { \
453- Assert( prs->state ); \
454- if ( prs->usewide ) \
441+ p_is##type(TParser *prs) \
442+ { \
443+ Assert(prs->state); \
444+ if (prs->usewide) \
455445 { \
456- if ( prs->pgwstr ) \
446+ if (prs->pgwstr) \
457447 { \
458448 unsigned int c = *(prs->pgwstr + prs->state->poschar); \
459- if ( c > 0x7f ) \
460- return 0; \
461- return is##type( c ); \
449+ if (c > 0x7f) \
450+ return nonascii; \
451+ return is##type(c); \
462452 } \
463- return isw##type( *( prs->wstr + prs->state->poschar ) ); \
453+ return isw##type(*( prs->wstr + prs->state->poschar)); \
464454 } \
465- \
466- return is##type( *(unsigned char*)( prs->str + prs->state->posbyte ) ); \
467- } \
455+ return is##type(*(unsigned char *) (prs->str + prs->state->posbyte)); \
456+ } \
468457 \
469458static int \
470- p_isnot##type(TParser *prs) { \
459+ p_isnot##type(TParser *prs) \
460+ { \
471461 return !p_is##type(prs); \
472462}
473463
474- static int
475- p_isalnum (TParser * prs )
476- {
477- Assert (prs -> state );
478-
479- if (prs -> usewide )
480- {
481- if (prs -> pgwstr )
482- {
483- unsigned int c = * (prs -> pgwstr + prs -> state -> poschar );
484-
485- /*
486- * any non-ascii symbol with multibyte encoding with C-locale is
487- * an alpha character
488- */
489- if (c > 0x7f )
490- return 1 ;
491-
492- return isalnum (c );
493- }
494-
495- return iswalnum (* (prs -> wstr + prs -> state -> poschar ));
496- }
497-
498- return isalnum (* (unsigned char * ) (prs -> str + prs -> state -> posbyte ));
499- }
500- static int
501- p_isnotalnum (TParser * prs )
502- {
503- return !p_isalnum (prs );
504- }
505-
506- static int
507- p_isalpha (TParser * prs )
508- {
509- Assert (prs -> state );
510-
511- if (prs -> usewide )
512- {
513- if (prs -> pgwstr )
514- {
515- unsigned int c = * (prs -> pgwstr + prs -> state -> poschar );
516-
517- /*
518- * any non-ascii symbol with multibyte encoding with C-locale is
519- * an alpha character
520- */
521- if (c > 0x7f )
522- return 1 ;
523-
524- return isalpha (c );
525- }
526-
527- return iswalpha (* (prs -> wstr + prs -> state -> poschar ));
528- }
529-
530- return isalpha (* (unsigned char * ) (prs -> str + prs -> state -> posbyte ));
531- }
532-
533- static int
534- p_isnotalpha (TParser * prs )
535- {
536- return !p_isalpha (prs );
537- }
464+ /*
465+ * In C locale with a multibyte encoding, any non-ASCII symbol is considered
466+ * an alpha character, but not a member of other char classes.
467+ */
468+ p_iswhat (alnum , 1 )
469+ p_iswhat (alpha , 1 )
470+ p_iswhat (digit , 0 )
471+ p_iswhat (lower , 0 )
472+ p_iswhat (print , 0 )
473+ p_iswhat (punct , 0 )
474+ p_iswhat (space , 0 )
475+ p_iswhat (upper , 0 )
476+ p_iswhat (xdigit , 0 )
538477
539478/* p_iseq should be used only for ascii symbols */
540479
@@ -544,39 +483,6 @@ p_iseq(TParser *prs, char c)
544483 Assert (prs -> state );
545484 return ((prs -> state -> charlen == 1 && * (prs -> str + prs -> state -> posbyte ) == c )) ? 1 : 0 ;
546485}
547- #else /* USE_WIDE_UPPER_LOWER */
548-
549- #define p_iswhat (type ) \
550- static int \
551- p_is##type(TParser *prs) { \
552- Assert( prs->state ); \
553- return is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ); \
554- } \
555- \
556- static int \
557- p_isnot##type(TParser *prs) { \
558- return !p_is##type(prs); \
559- }
560-
561-
562- static int
563- p_iseq (TParser * prs , char c )
564- {
565- Assert (prs -> state );
566- return (* (prs -> str + prs -> state -> posbyte ) == c ) ? 1 : 0 ;
567- }
568-
569- p_iswhat (alnum )
570- p_iswhat (alpha )
571- #endif /* USE_WIDE_UPPER_LOWER */
572-
573- p_iswhat (digit )
574- p_iswhat (lower )
575- p_iswhat (print )
576- p_iswhat (punct )
577- p_iswhat (space )
578- p_iswhat (upper )
579- p_iswhat (xdigit )
580486
581487static int
582488p_isEOF (TParser * prs )
@@ -793,8 +699,6 @@ p_isspecial(TParser *prs)
793699 if (pg_dsplen (prs -> str + prs -> state -> posbyte ) == 0 )
794700 return 1 ;
795701
796- #ifdef USE_WIDE_UPPER_LOWER
797-
798702 /*
799703 * Unicode Characters in the 'Mark, Spacing Combining' Category That
800704 * characters are not alpha although they are not breakers of word too.
@@ -1058,7 +962,6 @@ p_isspecial(TParser *prs)
1058962 StopHigh = StopMiddle ;
1059963 }
1060964 }
1061- #endif
1062965
1063966 return 0 ;
1064967}
0 commit comments