Skip to content

Commit 735e7cc

Browse files
committed
toke.c: Change parse_ident to take any string
Prior to this commit, the string passed to this function had to be pointing to somewhere in PL_bufptr. But this is only because it assumed that the initial position is less than PL_bufend. By passing the upper bound in, that assumption is automatically removed.
1 parent e4be402 commit 735e7cc

File tree

4 files changed

+42
-34
lines changed

4 files changed

+42
-34
lines changed

embed.fnc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6176,7 +6176,8 @@ So |SV * |new_constant |NULLOK const char *s \
61766176
|NULLOK const char *type \
61776177
|STRLEN typelen \
61786178
|NULLOK const char **error_msg
6179-
S |void |parse_ident |NN char **s \
6179+
S |char * |parse_ident |SPTR char *s \
6180+
|EPTRQ char * const s_end \
61806181
|SPTR char **d \
61816182
|EPTR char * const e \
61826183
|bool is_utf8 \

embed.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1688,7 +1688,7 @@
16881688
# define is_existing_identifier(a,b,c,d) S_is_existing_identifier(aTHX_ a,b,c,d)
16891689
# define lop(a,b,c,d) S_lop(aTHX_ a,b,c,d)
16901690
# define missingterm(a,b) S_missingterm(aTHX_ a,b)
1691-
# define parse_ident(a,b,c,d,e) S_parse_ident(aTHX_ a,b,c,d,e)
1691+
# define parse_ident(a,b,c,d,e,f) S_parse_ident(aTHX_ a,b,c,d,e,f)
16921692
# define pending_ident() S_pending_ident(aTHX)
16931693
# define scan_const(a) S_scan_const(aTHX_ a)
16941694
# define scan_formline(a) S_scan_formline(aTHX_ a)

proto.h

Lines changed: 4 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

toke.c

Lines changed: 35 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -5504,7 +5504,8 @@ yyl_sigvar(pTHX_ char *s)
55045504
char *dest = PL_tokenbuf + 1;
55055505
/* read var name, including sigil, into PL_tokenbuf */
55065506
PL_tokenbuf[0] = sigil;
5507-
parse_ident(&s, &dest, C_ARRAY_END(PL_tokenbuf), cBOOL(UTF), 0);
5507+
s = parse_ident(s, PL_bufend, &dest, C_ARRAY_END(PL_tokenbuf),
5508+
cBOOL(UTF), 0);
55085509
*dest = '\0';
55095510
assert(PL_tokenbuf[1]); /* we have a variable name */
55105511
}
@@ -10537,15 +10538,16 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
1053710538
return SvREFCNT_inc_simple_NN(sv);
1053810539
}
1053910540

10540-
STATIC void
10541-
S_parse_ident(pTHX_ char **s, char **d, char * const e, bool is_utf8,
10542-
U32 flags)
10541+
STATIC char *
10542+
S_parse_ident(pTHX_ char *s, char * const s_end,
10543+
char **d, char * const e,
10544+
bool is_utf8, U32 flags)
1054310545
{
1054410546
PERL_ARGS_ASSERT_PARSE_IDENT;
1054510547
assert(*s <= PL_bufend);
1054610548

1054710549
/* This function parses the string pointed to by '*s' (whose upper bound
10548-
* is 'send') looking for an identifier. It stops at the first character
10550+
* is 's_end') looking for an identifier. It stops at the first character
1054910551
* that isn't in one of the types of identifiers looked for, which are:
1055010552
*
1055110553
* 1) A normal identifier whose first character matches IDFIRST followed
@@ -10558,6 +10560,10 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, bool is_utf8,
1055810560
* end of the identifier. The reason it needs to copy is that it may
1055910561
* convert apostrophe package separators into double colons.
1056010562
*
10563+
* Upon success, it returns the position in s just beyond where the
10564+
* identifier ends in the input. If no identifier was found, the return
10565+
* will be the the input 's' unchanged.
10566+
*
1056110567
* The function croaks if there is not enough room for the entire source
1056210568
* identifier to be copied.
1056310569
*
@@ -10572,7 +10578,7 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, bool is_utf8,
1057210578
* in things like Foo::$bar */
1057310579
const bool check_dollar = flags & CHECK_DOLLAR;
1057410580

10575-
while (*s < PL_bufend) {
10581+
while (s < s_end) {
1057610582
if (*d >= e)
1057710583
croak("%s", ident_too_long);
1057810584

@@ -10581,58 +10587,58 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, bool is_utf8,
1058110587
* Unicode definition only when UTF-8 is in effect. We have to check
1058210588
* for the subset before checking for the superset. */
1058310589
Size_t advance;
10584-
if (is_utf8 && (advance = isIDFIRST_utf8_safe(*s, PL_bufend))) {
10590+
if (is_utf8 && (advance = isIDFIRST_utf8_safe(s, s_end))) {
1058510591

1058610592
/* Find the end of the identifier by accumulating characters until
1058710593
* find a non-identifier character */
10588-
char *t = *s + advance;
10594+
char *t = s + advance;
1058910595
while ((advance = isIDCONT_utf8_safe((const U8*) t,
10590-
(const U8*) PL_bufend)))
10596+
(const U8*) s_end)))
1059110597
{
1059210598
t += advance;
1059310599
}
1059410600

1059510601
/* Here we have found the end of the identifier */
10596-
if (*d + (t - *s) > e)
10602+
if (*d + (t - s) > e)
1059710603
croak("%s", ident_too_long);
1059810604

1059910605
/* And copy the whole thing in one operation */
10600-
Copy(*s, *d, t - *s, char);
10601-
*d += t - *s;
10602-
*s = t;
10606+
Copy(s, *d, t - s, char);
10607+
*d += t - s;
10608+
s = t;
1060310609
}
10604-
else if ( isWORDCHAR_A(**s) ) {
10610+
else if ( isWORDCHAR_A(*s) ) {
1060510611

1060610612
/* This is the superset; it accepts \w+, including an initial
1060710613
* digit */
1060810614
do {
10609-
*(*d)++ = *(*s)++;
10610-
} while (isWORDCHAR_A(**s) && *d < e);
10615+
*(*d)++ = *s++;
10616+
} while (isWORDCHAR_A(*s) && *d < e);
1061110617
}
1061210618
else if ( allow_package
10613-
&& **s == '\''
10619+
&& *s == '\''
1061410620
&& FEATURE_APOS_AS_NAME_SEP_IS_ENABLED
10615-
&& isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
10621+
&& isIDFIRST_lazy_if_safe(s + 1, s_end, is_utf8))
1061610622
{ /* Convert the apostrophe to "::" */
1061710623
*(*d)++ = ':';
1061810624
*(*d)++ = ':';
10619-
(*s)++;
10625+
s++;
1062010626
}
10621-
else if (allow_package && **s == ':' && (*s)[1] == ':'
10627+
else if (allow_package && *s == ':' && s[1] == ':'
1062210628
/* Disallow things like Foo::$bar. For the curious, this is
1062310629
* the code path that triggers the "Bad name after" warning
1062410630
* when looking for barewords.
1062510631
*/
10626-
&& !(check_dollar && (*s)[2] == '$'))
10632+
&& !(check_dollar && s[2] == '$'))
1062710633
{
10628-
*(*d)++ = *(*s)++;
10629-
*(*d)++ = *(*s)++;
10634+
*(*d)++ = *s++;
10635+
*(*d)++ = *s++;
1063010636
}
1063110637
else /* None of the above means have come to the end of any
1063210638
identifier*/
1063310639
break;
1063410640
}
10635-
return;
10641+
return s;
1063610642
}
1063710643

1063810644
char *
@@ -10644,8 +10650,8 @@ Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STR
1064410650
char * const e = d + destlen - 3; /* two-character token, ending NUL */
1064510651
bool is_utf8 = cBOOL(UTF);
1064610652

10647-
parse_ident(&s, &d, e, is_utf8,
10648-
(CHECK_DOLLAR | ((allow_package) ? ALLOW_PACKAGE : 0)));
10653+
s = parse_ident(s, PL_bufend, &d, e, is_utf8,
10654+
(CHECK_DOLLAR | ((allow_package) ? ALLOW_PACKAGE : 0)));
1064910655
*d = '\0';
1065010656
*slp = d - dest;
1065110657
return s;
@@ -10686,7 +10692,7 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, bool chk_unary)
1068610692
croak(ident_var_zero_multi_digit);
1068710693
}
1068810694
else { /* See if it is a "normal" identifier */
10689-
parse_ident(&s, &d, e, is_utf8, ALLOW_PACKAGE);
10695+
s = parse_ident(s, PL_bufend, &d, e, is_utf8, ALLOW_PACKAGE);
1069010696
}
1069110697
*d = '\0';
1069210698
d = dest;
@@ -10807,8 +10813,8 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, bool chk_unary)
1080710813
(the later check for } being at the expected point will trap
1080810814
cases where this doesn't pan out.) */
1080910815
d += advance;
10810-
parse_ident(&s, &d, e, is_utf8, ( ALLOW_PACKAGE
10811-
|CHECK_DOLLAR));
10816+
s = parse_ident(s, PL_bufend, &d, e, is_utf8,
10817+
(ALLOW_PACKAGE | CHECK_DOLLAR));
1081210818
*d = '\0';
1081310819
}
1081410820
else { /* caret word: ${^Foo} ${^CAPTURE[0]} */

0 commit comments

Comments
 (0)