@@ -838,17 +838,15 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
838838 assert_accept_frame = F ;
839839 RRETURN (MATCH_ACCEPT );
840840
841- /* If recursing, we have to find the most recent recursion. */
841+ /* For ACCEPT within a recursion, we have to find the most recent
842+ recursion. If not in a recursion, fall through to code that is common with
843+ OP_END. */
842844
843845 case OP_ACCEPT :
844- case OP_END :
845-
846- /* Handle end of a recursion. */
847-
848846 if (Fcurrent_recurse != RECURSE_UNSET )
849847 {
850848#ifdef DEBUG_SHOW_OPS
851- fprintf (stderr , "++ End within recursion\n" );
849+ fprintf (stderr , "++ Accept within recursion\n" );
852850#endif
853851 offset = Flast_group_offset ;
854852 for (;;)
@@ -857,7 +855,6 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
857855 N = (heapframe * )((char * )match_data -> heapframes + offset );
858856 P = (heapframe * )((char * )N - frame_size );
859857 if (GF_IDMASK (N -> group_frame_type ) == GF_RECURSE ) break ;
860-
861858 offset = P -> last_group_offset ;
862859 }
863860
@@ -873,11 +870,17 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
873870 Fecode += 1 + LINK_SIZE ;
874871 continue ;
875872 }
873+ /* Fall through */
876874
877- /* Not a recursion. Fail for an empty string match if either PCRE2_NOTEMPTY
878- is set, or if PCRE2_NOTEMPTY_ATSTART is set and we have matched at the
879- start of the subject. In both cases, backtracking will then try other
880- alternatives, if any. */
875+ /* OP_END itself can never be reached within a recursion because that is
876+ picked up when the OP_KET that always precedes OP_END is reached. */
877+
878+ case OP_END :
879+
880+ /* Fail for an empty string match if either PCRE2_NOTEMPTY is set, or if
881+ PCRE2_NOTEMPTY_ATSTART is set and we have matched at the start of the
882+ subject. In both cases, backtracking will then try other alternatives, if
883+ any. */
881884
882885 if (Feptr == Fstart_match &&
883886 ((mb -> moptions & PCRE2_NOTEMPTY ) != 0 ||
@@ -5856,7 +5859,8 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
58565859 /* ===================================================================== */
58575860 /* The end of a parenthesized group. For all but OP_BRA and OP_COND, the
58585861 starting frame was added to the chained frames in order to remember the
5859- starting subject position for the group. */
5862+ starting subject position for the group. (Not true for OP_BRA when it's a
5863+ whole pattern recursion, but that is handled separately below.)*/
58605864
58615865 case OP_KET :
58625866 case OP_KETRMIN :
@@ -5908,8 +5912,37 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
59085912
59095913 switch (* bracode )
59105914 {
5911- case OP_BRA : /* No need to do anything for these */
5912- case OP_COND :
5915+ /* Whole pattern recursion is handled as a recursion into group 0, but
5916+ the entire pattern is wrapped in OP_BRA/OP_KET rather than a capturing
5917+ group - a design mistake: it should perhaps have been capture group 0.
5918+ Anyway, that means the end of such recursion must be handled here. It is
5919+ detected by checking for an immediately following OP_END when we are
5920+ recursing in group 0. If this is not the end of a whole-pattern
5921+ recursion, there is nothing to be done. */
5922+
5923+ case OP_BRA :
5924+ if (Fcurrent_recurse != 0 || Fecode [1 + LINK_SIZE ] != OP_END ) break ;
5925+
5926+ /* It is the end of whole-pattern recursion. */
5927+
5928+ offset = Flast_group_offset ;
5929+ if (offset == PCRE2_UNSET ) return PCRE2_ERROR_INTERNAL ;
5930+ N = (heapframe * )((char * )match_data -> heapframes + offset );
5931+ P = (heapframe * )((char * )N - frame_size );
5932+ Flast_group_offset = P -> last_group_offset ;
5933+
5934+ /* Reinstate the previous set of captures and then carry on after the
5935+ recursion call. */
5936+
5937+ memcpy ((char * )F + offsetof(heapframe , ovector ), P -> ovector ,
5938+ Foffset_top * sizeof (PCRE2_SIZE ));
5939+ Foffset_top = P -> offset_top ;
5940+ Fcapture_last = P -> capture_last ;
5941+ Fcurrent_recurse = P -> current_recurse ;
5942+ Fecode = P -> ecode + 1 + LINK_SIZE ;
5943+ continue ; /* With next opcode */
5944+
5945+ case OP_COND : /* No need to do anything for these */
59135946 case OP_SCOND :
59145947 break ;
59155948
@@ -5976,9 +6009,8 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
59766009 if (!PRIV (script_run )(P -> eptr , Feptr , utf )) RRETURN (MATCH_NOMATCH );
59776010 break ;
59786011
5979- /* Whole-pattern recursion is coded as a recurse into group 0, so it
5980- won't be picked up here. Instead, we catch it when the OP_END is reached.
5981- Other recursion is handled here. */
6012+ /* Whole-pattern recursion is coded as a recurse into group 0, and is
6013+ handled with OP_BRA above. Other recursion is handled here. */
59826014
59836015 case OP_CBRA :
59846016 case OP_CBRAPOS :
0 commit comments