@@ -5504,7 +5504,8 @@ yyl_sigvar(pTHX_ char *s)
55045504 char * dest = PL_tokenbuf + 1 ;
55055505 /* read var name, including sigil, into PL_tokenbuf */
55065506 PL_tokenbuf [0 ] = sigil ;
5507- parse_ident (& s , & dest , C_ARRAY_END (PL_tokenbuf ), cBOOL (UTF ), 0 );
5507+ s = parse_ident (s , PL_bufend , & dest , C_ARRAY_END (PL_tokenbuf ),
5508+ cBOOL (UTF ), 0 );
55085509 * dest = '\0' ;
55095510 assert (PL_tokenbuf [1 ]); /* we have a variable name */
55105511 }
@@ -10537,15 +10538,16 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
1053710538 return SvREFCNT_inc_simple_NN (sv );
1053810539}
1053910540
10540- STATIC void
10541- S_parse_ident (pTHX_ char * * s , char * * d , char * const e , bool is_utf8 ,
10542- U32 flags )
10541+ STATIC char *
10542+ S_parse_ident (pTHX_ char * s , char * const s_end ,
10543+ char * * d , char * const e ,
10544+ bool is_utf8 , U32 flags )
1054310545{
1054410546 PERL_ARGS_ASSERT_PARSE_IDENT ;
1054510547 assert (* s <= PL_bufend );
1054610548
1054710549 /* This function parses the string pointed to by '*s' (whose upper bound
10548- * is 'send ') looking for an identifier. It stops at the first character
10550+ * is 's_end ') looking for an identifier. It stops at the first character
1054910551 * that isn't in one of the types of identifiers looked for, which are:
1055010552 *
1055110553 * 1) A normal identifier whose first character matches IDFIRST followed
@@ -10558,6 +10560,10 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, bool is_utf8,
1055810560 * end of the identifier. The reason it needs to copy is that it may
1055910561 * convert apostrophe package separators into double colons.
1056010562 *
10563+ * Upon success, it returns the position in s just beyond where the
10564+ * identifier ends in the input. If no identifier was found, the return
10565+ * will be the the input 's' unchanged.
10566+ *
1056110567 * The function croaks if there is not enough room for the entire source
1056210568 * identifier to be copied.
1056310569 *
@@ -10572,7 +10578,7 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, bool is_utf8,
1057210578 * in things like Foo::$bar */
1057310579 const bool check_dollar = flags & CHECK_DOLLAR ;
1057410580
10575- while (* s < PL_bufend ) {
10581+ while (s < s_end ) {
1057610582 if (* d >= e )
1057710583 croak ("%s" , ident_too_long );
1057810584
@@ -10581,58 +10587,58 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, bool is_utf8,
1058110587 * Unicode definition only when UTF-8 is in effect. We have to check
1058210588 * for the subset before checking for the superset. */
1058310589 Size_t advance ;
10584- if (is_utf8 && (advance = isIDFIRST_utf8_safe (* s , PL_bufend ))) {
10590+ if (is_utf8 && (advance = isIDFIRST_utf8_safe (s , s_end ))) {
1058510591
1058610592 /* Find the end of the identifier by accumulating characters until
1058710593 * find a non-identifier character */
10588- char * t = * s + advance ;
10594+ char * t = s + advance ;
1058910595 while ((advance = isIDCONT_utf8_safe ((const U8 * ) t ,
10590- (const U8 * ) PL_bufend )))
10596+ (const U8 * ) s_end )))
1059110597 {
1059210598 t += advance ;
1059310599 }
1059410600
1059510601 /* Here we have found the end of the identifier */
10596- if (* d + (t - * s ) > e )
10602+ if (* d + (t - s ) > e )
1059710603 croak ("%s" , ident_too_long );
1059810604
1059910605 /* And copy the whole thing in one operation */
10600- Copy (* s , * d , t - * s , char );
10601- * d += t - * s ;
10602- * s = t ;
10606+ Copy (s , * d , t - s , char );
10607+ * d += t - s ;
10608+ s = t ;
1060310609 }
10604- else if ( isWORDCHAR_A (* * s ) ) {
10610+ else if ( isWORDCHAR_A (* s ) ) {
1060510611
1060610612 /* This is the superset; it accepts \w+, including an initial
1060710613 * digit */
1060810614 do {
10609- * (* d )++ = * ( * s ) ++ ;
10610- } while (isWORDCHAR_A (* * s ) && * d < e );
10615+ * (* d )++ = * s ++ ;
10616+ } while (isWORDCHAR_A (* s ) && * d < e );
1061110617 }
1061210618 else if ( allow_package
10613- && * * s == '\''
10619+ && * s == '\''
1061410620 && FEATURE_APOS_AS_NAME_SEP_IS_ENABLED
10615- && isIDFIRST_lazy_if_safe (( * s ) + 1 , PL_bufend , is_utf8 ))
10621+ && isIDFIRST_lazy_if_safe (s + 1 , s_end , is_utf8 ))
1061610622 { /* Convert the apostrophe to "::" */
1061710623 * (* d )++ = ':' ;
1061810624 * (* d )++ = ':' ;
10619- ( * s ) ++ ;
10625+ s ++ ;
1062010626 }
10621- else if (allow_package && * * s == ':' && ( * s ) [1 ] == ':'
10627+ else if (allow_package && * s == ':' && s [1 ] == ':'
1062210628 /* Disallow things like Foo::$bar. For the curious, this is
1062310629 * the code path that triggers the "Bad name after" warning
1062410630 * when looking for barewords.
1062510631 */
10626- && !(check_dollar && ( * s ) [2 ] == '$' ))
10632+ && !(check_dollar && s [2 ] == '$' ))
1062710633 {
10628- * (* d )++ = * ( * s ) ++ ;
10629- * (* d )++ = * ( * s ) ++ ;
10634+ * (* d )++ = * s ++ ;
10635+ * (* d )++ = * s ++ ;
1063010636 }
1063110637 else /* None of the above means have come to the end of any
1063210638 identifier*/
1063310639 break ;
1063410640 }
10635- return ;
10641+ return s ;
1063610642}
1063710643
1063810644char *
@@ -10644,8 +10650,8 @@ Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STR
1064410650 char * const e = d + destlen - 3 ; /* two-character token, ending NUL */
1064510651 bool is_utf8 = cBOOL (UTF );
1064610652
10647- parse_ident (& s , & d , e , is_utf8 ,
10648- (CHECK_DOLLAR | ((allow_package ) ? ALLOW_PACKAGE : 0 )));
10653+ s = parse_ident (s , PL_bufend , & d , e , is_utf8 ,
10654+ (CHECK_DOLLAR | ((allow_package ) ? ALLOW_PACKAGE : 0 )));
1064910655 * d = '\0' ;
1065010656 * slp = d - dest ;
1065110657 return s ;
@@ -10686,7 +10692,7 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, bool chk_unary)
1068610692 croak (ident_var_zero_multi_digit );
1068710693 }
1068810694 else { /* See if it is a "normal" identifier */
10689- parse_ident (& s , & d , e , is_utf8 , ALLOW_PACKAGE );
10695+ s = parse_ident (s , PL_bufend , & d , e , is_utf8 , ALLOW_PACKAGE );
1069010696 }
1069110697 * d = '\0' ;
1069210698 d = dest ;
@@ -10807,8 +10813,8 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, bool chk_unary)
1080710813 (the later check for } being at the expected point will trap
1080810814 cases where this doesn't pan out.) */
1080910815 d += advance ;
10810- parse_ident (& s , & d , e , is_utf8 , ( ALLOW_PACKAGE
10811- | CHECK_DOLLAR ));
10816+ s = parse_ident (s , PL_bufend , & d , e , is_utf8 ,
10817+ ( ALLOW_PACKAGE | CHECK_DOLLAR ));
1081210818 * d = '\0' ;
1081310819 }
1081410820 else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
0 commit comments