Skip to content

Commit c36b2fb

Browse files
authored
Move L& conversion (#645)
1 parent 266325f commit c36b2fb

File tree

7 files changed

+62
-28
lines changed

7 files changed

+62
-28
lines changed

src/pcre2_compile.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4338,6 +4338,18 @@ while (ptr < ptrend)
43384338
uint16_t ptype = 0, pdata = 0;
43394339
if (!get_ucp(&ptr, &negated, &ptype, &pdata, &errorcode, cb))
43404340
goto FAILED;
4341+
4342+
/* In caseless matching, particular characteristics Lu, Ll, and Lt
4343+
get converted to the general characteristic L&. That is, upper,
4344+
lower, and title case letters are all conflated. */
4345+
4346+
if ((options & PCRE2_CASELESS) != 0 && ptype == PT_PC &&
4347+
(pdata == ucp_Lu || pdata == ucp_Ll || pdata == ucp_Lt))
4348+
{
4349+
ptype = PT_LAMP;
4350+
pdata = 0;
4351+
}
4352+
43414353
if (negated) escape = (escape == ESC_P)? ESC_p : ESC_P;
43424354
*parsed_pattern++ = META_ESCAPE + escape;
43434355
*parsed_pattern++ = (ptype << 16) | pdata;

src/pcre2_compile_class.c

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1378,17 +1378,6 @@ while (TRUE)
13781378
continue;
13791379
}
13801380

1381-
/* In caseless matching, particular characteristics Lu, Ll, and Lt
1382-
get converted to the general characteristic L&. That is, upper,
1383-
lower, and title case letters are all conflated. */
1384-
1385-
if ((options & PCRE2_CASELESS) != 0 && ptype == PT_PC &&
1386-
(pdata == ucp_Lu || pdata == ucp_Ll || pdata == ucp_Lt))
1387-
{
1388-
ptype = PT_LAMP;
1389-
pdata = 0;
1390-
}
1391-
13921381
PRIV(update_classbits)(ptype, pdata, (escape == ESC_P), classbits);
13931382

13941383
if ((xclass_props & XCLASS_HIGH_ANY) == 0)

src/pcre2_jit_char_inc.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -626,13 +626,17 @@ if (category_list == UCPCAT_ALL)
626626
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
627627
return;
628628
}
629+
630+
if (category_list != 0)
631+
compares++;
629632
#endif
630633

631634
if (*cc != XCL_END)
632635
{
633636
#if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
634637
if (common->utf && compares == 0 && !(status & XCLASS_IS_ECLASS))
635638
{
639+
SLJIT_ASSERT(category_list == 0);
636640
max = 0;
637641
min = (ccbegin[-1] & XCL_MAP) != 0 ? 0 : READ_CHAR_MAX;
638642
xclass_update_min_max(common, cc, &min, &max);
@@ -701,9 +705,6 @@ if (status & XCLASS_NEEDS_UCD)
701705

702706
ccbegin = cc;
703707

704-
if (category_list != 0)
705-
compares++;
706-
707708
if (status & XCLASS_HAS_BIDICL)
708709
{
709710
OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));

testdata/testinput4

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2918,25 +2918,35 @@
29182918
/\p{Lu}\p{Ll}\P{Lu}\P{Ll}/utf
29192919
>AbbD<
29202920
>Abb\x{01c5}<
2921-
\= Expect no match
2921+
\= Expect no match
29222922
>aBBd<
2923-
>aB!!<
2923+
>aB!!<
29242924

29252925
/\p{Lu}\p{Ll}\P{Lu}\P{Ll}/i,utf
2926-
>aB!!<
2927-
>\x{01c5}B!!<
2928-
\= Expect no match
2926+
>aB!!<
2927+
>\x{01c5}B!!<
2928+
\= Expect no match
29292929
>AbbD<
29302930
>aBBd<
29312931
>Abb\x{01c5}<
29322932

29332933
/[.\p{Lu}][.\p{Ll}][.\P{Lu}][.\P{Ll}]/i,utf
2934-
>aB!!<
2935-
\= Expect no match
2934+
>aB!!<
2935+
\= Expect no match
29362936
>AbbD<
29372937
>aBBd<
29382938
>Abb\x{01c5}<
29392939

2940+
/[\p{Lt}\x{36b}][\P{Lt}\x{10a0}]/i,utf
2941+
>A!<
2942+
>\x{3c9}\x{58d}<
2943+
>\x{413}\x{940}<
2944+
\= Expect no match
2945+
\x{3c9}\x{3c9}
2946+
\x{58d}\x{58d}
2947+
\x{413}\x{413}
2948+
\x{940}\x{940}
2949+
29402950
/^\p{Lt}+/i,utf
29412951
\x{1c5}AB
29422952

testdata/testinput5

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,8 @@
339339

340340
/[[:a\x{100}b:]]/utf
341341

342+
/[\p{InvalidOrBadProperty}]/
343+
342344
/a[^]b/utf,allow_empty_class,match_unset_backref
343345
a\x{1234}b
344346
a\nb

testdata/testoutput4

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4681,18 +4681,18 @@ No match
46814681
0: AbbD
46824682
>Abb\x{01c5}<
46834683
0: Abb\x{1c5}
4684-
\= Expect no match
4684+
\= Expect no match
46854685
>aBBd<
46864686
No match
4687-
>aB!!<
4687+
>aB!!<
46884688
No match
46894689

46904690
/\p{Lu}\p{Ll}\P{Lu}\P{Ll}/i,utf
4691-
>aB!!<
4691+
>aB!!<
46924692
0: aB!!
4693-
>\x{01c5}B!!<
4693+
>\x{01c5}B!!<
46944694
0: \x{1c5}B!!
4695-
\= Expect no match
4695+
\= Expect no match
46964696
>AbbD<
46974697
No match
46984698
>aBBd<
@@ -4701,16 +4701,33 @@ No match
47014701
No match
47024702

47034703
/[.\p{Lu}][.\p{Ll}][.\P{Lu}][.\P{Ll}]/i,utf
4704-
>aB!!<
4704+
>aB!!<
47054705
0: aB!!
4706-
\= Expect no match
4706+
\= Expect no match
47074707
>AbbD<
47084708
No match
47094709
>aBBd<
47104710
No match
47114711
>Abb\x{01c5}<
47124712
No match
47134713

4714+
/[\p{Lt}\x{36b}][\P{Lt}\x{10a0}]/i,utf
4715+
>A!<
4716+
0: A!
4717+
>\x{3c9}\x{58d}<
4718+
0: \x{3c9}\x{58d}
4719+
>\x{413}\x{940}<
4720+
0: \x{413}\x{940}
4721+
\= Expect no match
4722+
\x{3c9}\x{3c9}
4723+
No match
4724+
\x{58d}\x{58d}
4725+
No match
4726+
\x{413}\x{413}
4727+
No match
4728+
\x{940}\x{940}
4729+
No match
4730+
47144731
/^\p{Lt}+/i,utf
47154732
\x{1c5}AB
47164733
0: \x{1c5}AB

testdata/testoutput5

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -822,6 +822,9 @@ No match
822822
/[[:a\x{100}b:]]/utf
823823
Failed: error 130 at offset 14: unknown POSIX class name
824824

825+
/[\p{InvalidOrBadProperty}]/
826+
Failed: error 147 at offset 25: unknown property after \P or \p
827+
825828
/a[^]b/utf,allow_empty_class,match_unset_backref
826829
a\x{1234}b
827830
0: a\x{1234}b

0 commit comments

Comments
 (0)