@@ -6691,6 +6691,15 @@ else
66916691JUMPTO (SLJIT_JUMP , mainloop );
66926692}
66936693
6694+ #ifdef SUPPORT_UNICODE
6695+ #define UCPCAT (bit ) (1 << (bit))
6696+ #define UCPCAT2 (bit1 , bit2 ) (UCPCAT(bit1) | UCPCAT(bit2))
6697+ #define UCPCAT3 (bit1 , bit2 , bit3 ) (UCPCAT(bit1) | UCPCAT(bit2) | UCPCAT(bit3))
6698+ #define UCPCAT_RANGE (start , end ) (((1 << ((end) + 1)) - 1) - ((1 << (start)) - 1))
6699+ #define UCPCAT_L UCPCAT_RANGE(ucp_Ll, ucp_Lu)
6700+ #define UCPCAT_N UCPCAT_RANGE(ucp_Nd, ucp_No)
6701+ #endif
6702+
66946703static void check_wordboundary (compiler_common * common , BOOL ucp )
66956704{
66966705DEFINE_COMPILER ;
@@ -6748,17 +6757,9 @@ else
67486757if (ucp )
67496758 {
67506759 add_jump (compiler , & common -> getucdtype , JUMP (SLJIT_FAST_CALL ));
6751- OP2U (SLJIT_SUB | SLJIT_SET_Z , TMP1 , 0 , SLJIT_IMM , ucp_Mn );
6752- OP_FLAGS (SLJIT_MOV , TMP2 , 0 , SLJIT_EQUAL );
6753- OP2U (SLJIT_SUB | SLJIT_SET_Z , TMP1 , 0 , SLJIT_IMM , ucp_Pc );
6754- OP_FLAGS (SLJIT_OR , TMP2 , 0 , SLJIT_EQUAL );
6755- OP2 (SLJIT_SUB , TMP1 , 0 , TMP1 , 0 , SLJIT_IMM , ucp_Ll );
6756- OP2U (SLJIT_SUB | SLJIT_SET_LESS_EQUAL , TMP1 , 0 , SLJIT_IMM , ucp_Lu - ucp_Ll );
6757- OP_FLAGS (SLJIT_OR , TMP2 , 0 , SLJIT_LESS_EQUAL );
6758- OP2 (SLJIT_SUB , TMP1 , 0 , TMP1 , 0 , SLJIT_IMM , ucp_Nd - ucp_Ll );
6759- OP2U (SLJIT_SUB | SLJIT_SET_LESS_EQUAL , TMP1 , 0 , SLJIT_IMM , ucp_No - ucp_Nd );
6760- OP_FLAGS (SLJIT_OR , TMP2 , 0 , SLJIT_LESS_EQUAL );
6761- OP1 (SLJIT_MOV , TMP3 , 0 , TMP2 , 0 );
6760+ OP2 (SLJIT_SHL , TMP2 , 0 , SLJIT_IMM , 1 , TMP1 , 0 );
6761+ OP2U (SLJIT_AND | SLJIT_SET_Z , TMP2 , 0 , SLJIT_IMM , UCPCAT2 (ucp_Mn , ucp_Pc ) | UCPCAT_L | UCPCAT_N );
6762+ OP_FLAGS (SLJIT_MOV , TMP3 , 0 , SLJIT_NOT_ZERO );
67626763 }
67636764else
67646765#endif /* SUPPORT_UNICODE */
@@ -6795,16 +6796,9 @@ valid_utf = LABEL();
67956796if (ucp )
67966797 {
67976798 add_jump (compiler , & common -> getucdtype , JUMP (SLJIT_FAST_CALL ));
6798- OP2U (SLJIT_SUB | SLJIT_SET_Z , TMP1 , 0 , SLJIT_IMM , ucp_Mn );
6799- OP_FLAGS (SLJIT_MOV , TMP2 , 0 , SLJIT_EQUAL );
6800- OP2U (SLJIT_SUB | SLJIT_SET_Z , TMP1 , 0 , SLJIT_IMM , ucp_Pc );
6801- OP_FLAGS (SLJIT_OR , TMP2 , 0 , SLJIT_EQUAL );
6802- OP2 (SLJIT_SUB , TMP1 , 0 , TMP1 , 0 , SLJIT_IMM , ucp_Ll );
6803- OP2U (SLJIT_SUB | SLJIT_SET_LESS_EQUAL , TMP1 , 0 , SLJIT_IMM , ucp_Lu - ucp_Ll );
6804- OP_FLAGS (SLJIT_OR , TMP2 , 0 , SLJIT_LESS_EQUAL );
6805- OP2 (SLJIT_SUB , TMP1 , 0 , TMP1 , 0 , SLJIT_IMM , ucp_Nd - ucp_Ll );
6806- OP2U (SLJIT_SUB | SLJIT_SET_LESS_EQUAL , TMP1 , 0 , SLJIT_IMM , ucp_No - ucp_Nd );
6807- OP_FLAGS (SLJIT_OR , TMP2 , 0 , SLJIT_LESS_EQUAL );
6799+ OP2 (SLJIT_SHL , TMP2 , 0 , SLJIT_IMM , 1 , TMP1 , 0 );
6800+ OP2U (SLJIT_AND | SLJIT_SET_Z , TMP2 , 0 , SLJIT_IMM , UCPCAT2 (ucp_Mn , ucp_Pc ) | UCPCAT_L | UCPCAT_N );
6801+ OP_FLAGS (SLJIT_MOV , TMP2 , 0 , SLJIT_NOT_ZERO );
68086802 }
68096803else
68106804#endif /* SUPPORT_UNICODE */
@@ -7543,16 +7537,6 @@ return cc;
75437537
75447538#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
75457539
7546- #define SET_TYPE_OFFSET (value ) \
7547- if ((value) != typeoffset) \
7548- { \
7549- if ((value) < typeoffset) \
7550- OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
7551- else \
7552- OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
7553- } \
7554- typeoffset = (value);
7555-
75567540#define SET_CHAR_OFFSET (value ) \
75577541 if ((value) != charoffset) \
75587542 { \
@@ -7577,7 +7561,6 @@ static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHA
75777561#define XCLASS_SCRIPT_EXTENSION_NOTPROP 0x080
75787562#define XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR 0x100
75797563#define XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0 0x200
7580-
75817564#endif /* SUPPORT_UNICODE */
75827565
75837566static void compile_xclass_matchingpath (compiler_common * common , PCRE2_SPTR cc , jump_list * * backtracks )
@@ -7597,7 +7580,6 @@ BOOL utf = common->utf;
75977580sljit_u32 unicode_status = 0 ;
75987581int typereg = TMP1 ;
75997582const sljit_u32 * other_cases ;
7600- sljit_uw typeoffset ;
76017583#endif /* SUPPORT_UNICODE */
76027584
76037585/* Scanning the necessary info. */
@@ -7672,6 +7654,7 @@ while (*cc != XCL_END)
76727654 case PT_LAMP :
76737655 case PT_GC :
76747656 case PT_PC :
7657+ case PT_WORD :
76757658 case PT_ALNUM :
76767659 unicode_status |= XCLASS_HAS_TYPE ;
76777660 break ;
@@ -7692,7 +7675,6 @@ while (*cc != XCL_END)
76927675
76937676 case PT_SPACE :
76947677 case PT_PXSPACE :
7695- case PT_WORD :
76967678 case PT_PXGRAPH :
76977679 case PT_PXPRINT :
76987680 case PT_PXPUNCT :
@@ -8027,16 +8009,14 @@ if (unicode_status & XCLASS_NEEDS_UCD)
80278009 typereg = RETURN_ADDR ;
80288010
80298011 OP1 (SLJIT_MOV_U8 , typereg , 0 , SLJIT_MEM1 (TMP2 ), (sljit_sw )PRIV (ucd_records ) + SLJIT_OFFSETOF (ucd_record , chartype ));
8012+ OP2 (SLJIT_SHL , typereg , 0 , SLJIT_IMM , 1 , typereg , 0 );
80308013 }
80318014 }
80328015#endif /* SUPPORT_UNICODE */
80338016
80348017/* Generating code. */
80358018charoffset = 0 ;
80368019numberofcmps = 0 ;
8037- #ifdef SUPPORT_UNICODE
8038- typeoffset = 0 ;
8039- #endif /* SUPPORT_UNICODE */
80408020
80418021while (* cc != XCL_END )
80428022 {
@@ -8109,23 +8089,18 @@ while (*cc != XCL_END)
81098089 break ;
81108090
81118091 case PT_LAMP :
8112- OP2U (SLJIT_SUB | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , ucp_Lu - typeoffset );
8113- OP_FLAGS (SLJIT_MOV , TMP2 , 0 , SLJIT_EQUAL );
8114- OP2U (SLJIT_SUB | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , ucp_Ll - typeoffset );
8115- OP_FLAGS (SLJIT_OR , TMP2 , 0 , SLJIT_EQUAL );
8116- OP2U (SLJIT_SUB | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , ucp_Lt - typeoffset );
8117- OP_FLAGS (SLJIT_OR | SLJIT_SET_Z , TMP2 , 0 , SLJIT_EQUAL );
8092+ OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , UCPCAT3 (ucp_Lu , ucp_Ll , ucp_Lt ));
81188093 jump = JUMP (SLJIT_NOT_ZERO ^ invertcmp );
81198094 break ;
81208095
81218096 case PT_GC :
8122- c = PRIV (ucp_typerange )[(int )cc [1 ] * 2 ];
8123- SET_TYPE_OFFSET (c );
8124- jump = CMP (SLJIT_LESS_EQUAL ^ invertcmp , typereg , 0 , SLJIT_IMM , PRIV (ucp_typerange )[(int )cc [1 ] * 2 + 1 ] - c );
8097+ OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , UCPCAT_RANGE (PRIV (ucp_typerange )[(int )cc [1 ] * 2 ], PRIV (ucp_typerange )[(int )cc [1 ] * 2 + 1 ]));
8098+ jump = JUMP (SLJIT_NOT_ZERO ^ invertcmp );
81258099 break ;
81268100
81278101 case PT_PC :
8128- jump = CMP (SLJIT_EQUAL ^ invertcmp , typereg , 0 , SLJIT_IMM , (int )cc [1 ] - typeoffset );
8102+ OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , UCPCAT (cc [1 ]));
8103+ jump = JUMP (SLJIT_NOT_ZERO ^ invertcmp );
81298104 break ;
81308105
81318106 case PT_SC :
@@ -8148,26 +8123,18 @@ while (*cc != XCL_END)
81488123 OP2U (SLJIT_SUB | SLJIT_SET_Z , TMP1 , 0 , SLJIT_IMM , 0x180e - 0x9 );
81498124 OP_FLAGS (SLJIT_OR , TMP2 , 0 , SLJIT_EQUAL );
81508125
8151- SET_TYPE_OFFSET (ucp_Zl );
8152- OP2U (SLJIT_SUB | SLJIT_SET_LESS_EQUAL , typereg , 0 , SLJIT_IMM , ucp_Zs - ucp_Zl );
8153- OP_FLAGS (SLJIT_OR | SLJIT_SET_Z , TMP2 , 0 , SLJIT_LESS_EQUAL );
8126+ OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , UCPCAT_RANGE (ucp_Zl , ucp_Zs ));
8127+ OP_FLAGS (SLJIT_OR | SLJIT_SET_Z , TMP2 , 0 , SLJIT_NOT_ZERO );
81548128 jump = JUMP (SLJIT_NOT_ZERO ^ invertcmp );
81558129 break ;
81568130
81578131 case PT_WORD :
8158- OP2U (SLJIT_SUB | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , ucp_Mn - typeoffset );
8159- OP_FLAGS (SLJIT_MOV , TMP2 , 0 , SLJIT_EQUAL );
8160- OP2U (SLJIT_SUB | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , ucp_Pc - typeoffset );
8161- OP_FLAGS (SLJIT_OR , TMP2 , 0 , SLJIT_EQUAL );
8162- /* Fall through. */
8132+ OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , UCPCAT2 (ucp_Mn , ucp_Pc ) | UCPCAT_L | UCPCAT_N );
8133+ jump = JUMP (SLJIT_NOT_ZERO ^ invertcmp );
8134+ break ;
81638135
81648136 case PT_ALNUM :
8165- SET_TYPE_OFFSET (ucp_Ll );
8166- OP2U (SLJIT_SUB | SLJIT_SET_LESS_EQUAL , typereg , 0 , SLJIT_IMM , ucp_Lu - ucp_Ll );
8167- OP_FLAGS ((* cc == PT_ALNUM ) ? SLJIT_MOV : SLJIT_OR , TMP2 , 0 , SLJIT_LESS_EQUAL );
8168- SET_TYPE_OFFSET (ucp_Nd );
8169- OP2U (SLJIT_SUB | SLJIT_SET_LESS_EQUAL , typereg , 0 , SLJIT_IMM , ucp_No - ucp_Nd );
8170- OP_FLAGS (SLJIT_OR | SLJIT_SET_Z , TMP2 , 0 , SLJIT_LESS_EQUAL );
8137+ OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , UCPCAT_L | UCPCAT_N );
81718138 jump = JUMP (SLJIT_NOT_ZERO ^ invertcmp );
81728139 break ;
81738140
@@ -8242,12 +8209,11 @@ while (*cc != XCL_END)
82428209 break ;
82438210
82448211 case PT_PXGRAPH :
8245- /* C and Z groups are the farthest two groups. */
8246- SET_TYPE_OFFSET (ucp_Ll );
8247- OP2U (SLJIT_SUB | SLJIT_SET_GREATER , typereg , 0 , SLJIT_IMM , ucp_So - ucp_Ll );
8248- OP_FLAGS (SLJIT_MOV , TMP2 , 0 , SLJIT_GREATER );
8212+ OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , UCPCAT_RANGE (ucp_Cc , ucp_Cs ) | UCPCAT_RANGE (ucp_Zl , ucp_Zs ));
8213+ OP_FLAGS (SLJIT_MOV , TMP2 , 0 , SLJIT_NOT_ZERO );
82498214
8250- jump = CMP (SLJIT_NOT_EQUAL , typereg , 0 , SLJIT_IMM , ucp_Cf - ucp_Ll );
8215+ OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , UCPCAT (ucp_Cf ));
8216+ jump = JUMP (SLJIT_ZERO );
82518217
82528218 /* In case of ucp_Cf, we overwrite the result. */
82538219 SET_CHAR_OFFSET (0x2066 );
@@ -8265,15 +8231,11 @@ while (*cc != XCL_END)
82658231 break ;
82668232
82678233 case PT_PXPRINT :
8268- /* C and Z groups are the farthest two groups. */
8269- SET_TYPE_OFFSET (ucp_Ll );
8270- OP2U (SLJIT_SUB | SLJIT_SET_GREATER , typereg , 0 , SLJIT_IMM , ucp_So - ucp_Ll );
8271- OP_FLAGS (SLJIT_MOV , TMP2 , 0 , SLJIT_GREATER );
8234+ OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , UCPCAT_RANGE (ucp_Cc , ucp_Cs ) | UCPCAT2 (ucp_Zl , ucp_Zp ));
8235+ OP_FLAGS (SLJIT_MOV , TMP2 , 0 , SLJIT_NOT_ZERO );
82728236
8273- OP2U (SLJIT_SUB | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , ucp_Zs - ucp_Ll );
8274- OP_FLAGS (SLJIT_AND , TMP2 , 0 , SLJIT_NOT_EQUAL );
8275-
8276- jump = CMP (SLJIT_NOT_EQUAL , typereg , 0 , SLJIT_IMM , ucp_Cf - ucp_Ll );
8237+ OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , UCPCAT (ucp_Cf ));
8238+ jump = JUMP (SLJIT_ZERO );
82778239
82788240 /* In case of ucp_Cf, we overwrite the result. */
82798241 SET_CHAR_OFFSET (0x2066 );
@@ -8288,17 +8250,15 @@ while (*cc != XCL_END)
82888250 break ;
82898251
82908252 case PT_PXPUNCT :
8291- SET_TYPE_OFFSET (ucp_Sc );
8292- OP2U (SLJIT_SUB | SLJIT_SET_LESS_EQUAL , typereg , 0 , SLJIT_IMM , ucp_So - ucp_Sc );
8293- OP_FLAGS (SLJIT_MOV , TMP2 , 0 , SLJIT_LESS_EQUAL );
8253+ OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , UCPCAT_RANGE (ucp_Sc , ucp_So ));
8254+ OP_FLAGS (SLJIT_MOV , TMP2 , 0 , SLJIT_NOT_ZERO );
82948255
82958256 SET_CHAR_OFFSET (0 );
82968257 OP2U (SLJIT_SUB | SLJIT_SET_LESS_EQUAL , TMP1 , 0 , SLJIT_IMM , 0x7f );
82978258 OP_FLAGS (SLJIT_AND , TMP2 , 0 , SLJIT_LESS_EQUAL );
82988259
8299- SET_TYPE_OFFSET (ucp_Pc );
8300- OP2U (SLJIT_SUB | SLJIT_SET_LESS_EQUAL , typereg , 0 , SLJIT_IMM , ucp_Ps - ucp_Pc );
8301- OP_FLAGS (SLJIT_OR | SLJIT_SET_Z , TMP2 , 0 , SLJIT_LESS_EQUAL );
8260+ OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , UCPCAT_RANGE (ucp_Pc , ucp_Ps ));
8261+ OP_FLAGS (SLJIT_OR | SLJIT_SET_Z , TMP2 , 0 , SLJIT_NOT_ZERO );
83028262 jump = JUMP (SLJIT_NOT_ZERO ^ invertcmp );
83038263 break ;
83048264
0 commit comments