@@ -7,16 +7,29 @@ private import semmle.python.regex
77 * An element containing a regular expression term, that is, either
88 * a string literal (parsed as a regular expression)
99 * or another regular expression term.
10+ *
11+ * For sequences and alternations, we require at least one child.
12+ * Otherwise, we wish to represent the term differently.
13+ * This avoids multiple representations of the same term.
1014 */
1115newtype TRegExpParent =
1216 /** A string literal used as a regular expression */
1317 TRegExpLiteral ( Regex re ) or
1418 /** A quantified term */
1519 TRegExpQuantifier ( Regex re , int start , int end ) { re .qualifiedItem ( start , end , _, _) } or
1620 /** A sequence term */
17- TRegExpSequence ( Regex re , int start , int end ) { re .sequence ( start , end ) } or
21+ TRegExpSequence ( Regex re , int start , int end ) {
22+ re .sequence ( start , end ) and
23+ exists ( seqChild ( re , start , end , 1 ) ) // if a sequence does not have more than one element, it should be treated as that element instead.
24+ } or
1825 /** An alternatio term */
19- TRegExpAlt ( Regex re , int start , int end ) { re .alternation ( start , end ) } or
26+ TRegExpAlt ( Regex re , int start , int end ) {
27+ re .alternation ( start , end ) and
28+ exists ( int part_end |
29+ re .alternationOption ( start , end , start , part_end ) and
30+ part_end < end
31+ ) // if an alternation does not have more than one element, it should be treated as that element instead.
32+ } or
2033 /** A character class term */
2134 TRegExpCharacterClass ( Regex re , int start , int end ) { re .charSet ( start , end ) } or
2235 /** A character range term */
@@ -75,11 +88,7 @@ class RegExpTerm extends RegExpParent {
7588 int end ;
7689
7790 RegExpTerm ( ) {
78- this = TRegExpAlt ( re , start , end ) and
79- exists ( int part_end |
80- re .alternationOption ( start , end , start , part_end ) and
81- part_end < end
82- ) // if an alternation does not have more than one element, it should be treated as that element instead.
91+ this = TRegExpAlt ( re , start , end )
8392 or
8493 this = TRegExpBackRef ( re , start , end )
8594 or
@@ -93,8 +102,7 @@ class RegExpTerm extends RegExpParent {
93102 or
94103 this = TRegExpQuantifier ( re , start , end )
95104 or
96- this = TRegExpSequence ( re , start , end ) and
97- exists ( seqChild ( re , start , end , 1 ) ) // if a sequence does not have more than one element, it should be treated as that element instead.
105+ this = TRegExpSequence ( re , start , end )
98106 or
99107 this = TRegExpSpecialChar ( re , start , end )
100108 }
@@ -341,10 +349,7 @@ class RegExpRange extends RegExpQuantifier {
341349 * This is a sequence with the elements `(ECMA|Java)` and `Script`.
342350 */
343351class RegExpSequence extends RegExpTerm , TRegExpSequence {
344- RegExpSequence ( ) {
345- this = TRegExpSequence ( re , start , end ) and
346- exists ( seqChild ( re , start , end , 1 ) ) // if a sequence does not have more than one element, it should be treated as that element instead.
347- }
352+ RegExpSequence ( ) { this = TRegExpSequence ( re , start , end ) }
348353
349354 override RegExpTerm getChild ( int i ) { result = seqChild ( re , start , end , i ) }
350355
0 commit comments