@@ -7,16 +7,29 @@ private import semmle.python.regex
77 * An element containing a regular expression term, that is, either
88 * a string literal (parsed as a regular expression)
99 * or another regular expression term.
10+ *
11+ * For sequences and alternations, we require at least one child.
12+ * Otherwise, we wish to represent the term differently.
13+ * This avoids multiple representations of the same term.
1014 */
1115newtype TRegExpParent =
1216 /** A string literal used as a regular expression */
1317 TRegExpLiteral ( Regex re ) or
1418 /** A quantified term */
1519 TRegExpQuantifier ( Regex re , int start , int end ) { re .qualifiedItem ( start , end , _, _) } or
1620 /** A sequence term */
17- TRegExpSequence ( Regex re , int start , int end ) { re .sequence ( start , end ) } or
18- /** An alternatio term */
19- TRegExpAlt ( Regex re , int start , int end ) { re .alternation ( start , end ) } or
21+ TRegExpSequence ( Regex re , int start , int end ) {
22+ re .sequence ( start , end ) and
23+ exists ( seqChild ( re , start , end , 1 ) ) // if a sequence does not have more than one element, it should be treated as that element instead.
24+ } or
25+ /** An alternation term */
26+ TRegExpAlt ( Regex re , int start , int end ) {
27+ re .alternation ( start , end ) and
28+ exists ( int part_end |
29+ re .alternationOption ( start , end , start , part_end ) and
30+ part_end < end
31+ ) // if an alternation does not have more than one element, it should be treated as that element instead.
32+ } or
2033 /** A character class term */
2134 TRegExpCharacterClass ( Regex re , int start , int end ) { re .charSet ( start , end ) } or
2235 /** A character range term */
@@ -93,8 +106,7 @@ class RegExpTerm extends RegExpParent {
93106 or
94107 this = TRegExpQuantifier ( re , start , end )
95108 or
96- this = TRegExpSequence ( re , start , end ) and
97- exists ( seqChild ( re , start , end , 1 ) ) // if a sequence does not have more than one element, it should be treated as that element instead.
109+ this = TRegExpSequence ( re , start , end )
98110 or
99111 this = TRegExpSpecialChar ( re , start , end )
100112 }
@@ -341,10 +353,7 @@ class RegExpRange extends RegExpQuantifier {
341353 * This is a sequence with the elements `(ECMA|Java)` and `Script`.
342354 */
343355class RegExpSequence extends RegExpTerm , TRegExpSequence {
344- RegExpSequence ( ) {
345- this = TRegExpSequence ( re , start , end ) and
346- exists ( seqChild ( re , start , end , 1 ) ) // if a sequence does not have more than one element, it should be treated as that element instead.
347- }
356+ RegExpSequence ( ) { this = TRegExpSequence ( re , start , end ) }
348357
349358 override RegExpTerm getChild ( int i ) { result = seqChild ( re , start , end , i ) }
350359
0 commit comments