1+ /**
2+ * Provides predicates that track strings and regular expressions to where they are used.
3+ * This is implemented using TypeTracking in two phases:
4+ *
5+ * 1: An exploratory analysis that just imprecisely tracks all string and regular expressions
6+ * to all places where regular expressions (as string or as regular expression objects) can be used.
7+ * The exploratory phase then ends with a backwards analysis from the uses that were reached.
8+ * This is similar to the exploratory phase of the JavaScript global DataFlow library.
9+ *
10+ * 2: A precise type tracking analysis that tracks
11+ * strings and regular expressions to the places where they are used.
12+ * This phase keeps track of which strings and regular expressions ends up in which places.
13+ */
14+
115private import codeql.ruby.Regexp as RE
216private import codeql.ruby.AST as Ast
317private import codeql.ruby.CFG
@@ -11,41 +25,114 @@ private import codeql.ruby.dataflow.internal.DataFlowPrivate as DataFlowPrivate
1125private import codeql.ruby.TaintTracking
1226private import codeql.ruby.frameworks.core.String
1327
14- /**
15- * Gets a node that has been tracked from the string constant `start` to some node.
16- * This is used to figure out where `start` is evaluated as a regular expression against an input string,
17- * or where `start` is compiled into a regular expression.
18- */
19- private DataFlow:: LocalSourceNode strToReg ( DataFlow:: Node start , TypeTracker t ) {
20- t .start ( ) and
21- start = result and
28+ /** Gets a constant string value that may be used as a regular expression. */
29+ DataFlow:: LocalSourceNode strStart ( ) {
2230 result .asExpr ( ) =
2331 any ( ExprCfgNode e |
2432 e .getConstantValue ( ) .isString ( _) and
2533 not e instanceof ExprNodes:: VariableReadAccessCfgNode and
2634 not e instanceof ExprNodes:: ConstantReadAccessCfgNode
2735 )
36+ }
37+
38+ /** Gets a dataflow node for a regular expression literal. */
39+ DataFlow:: LocalSourceNode regStart ( ) { result .asExpr ( ) .getExpr ( ) instanceof Ast:: RegExpLiteral }
40+
41+ /**
42+ * Holds if the analysis should track flow from `nodeFrom` to `nodeTo` on top of the ordinary type-tracking steps.
43+ * `nodeFrom` and `nodeTo` has type `fromType` and `toType` respectively.
44+ * The types are either "string" or "regexp".
45+ */
46+ predicate step (
47+ DataFlow:: Node nodeFrom , DataFlow:: LocalSourceNode nodeTo , string fromType , string toType
48+ ) {
49+ fromType = toType and
50+ fromType = "string" and
51+ (
52+ // include taint flow through `String` summaries
53+ TaintTracking:: localTaintStep ( nodeFrom , nodeTo ) and
54+ nodeFrom .( DataFlowPrivate:: SummaryNode ) .getSummarizedCallable ( ) instanceof
55+ String:: SummarizedCallable
56+ or
57+ // string concatenations, and
58+ exists ( CfgNodes:: ExprNodes:: OperationCfgNode op |
59+ op = nodeTo .asExpr ( ) and
60+ op .getAnOperand ( ) = nodeFrom .asExpr ( ) and
61+ op .getExpr ( ) .( Ast:: BinaryOperation ) .getOperator ( ) = "+"
62+ )
63+ or
64+ // string interpolations
65+ nodeFrom .asExpr ( ) =
66+ nodeTo .asExpr ( ) .( CfgNodes:: ExprNodes:: StringlikeLiteralCfgNode ) .getAComponent ( )
67+ )
68+ or
69+ fromType = "string" and
70+ toType = "reg" and
71+ exists ( DataFlow:: CallNode call |
72+ call = API:: getTopLevelMember ( "Regexp" ) .getAMethodCall ( [ "compile" , "new" ] ) and
73+ nodeFrom = call .getArgument ( 0 ) and
74+ nodeTo = call
75+ )
76+ }
77+
78+ /** Gets a node where string values that flow to the node are interpreted as regular expressions. */
79+ DataFlow:: Node stringSink ( ) {
80+ result instanceof RE:: RegExpInterpretation:: Range and
81+ not exists ( DataFlow:: CallNode mce | mce .getMethodName ( ) = [ "match" , "match?" ] |
82+ // receiver of https://ruby-doc.org/core-2.4.0/String.html#method-i-match
83+ result = mce .getReceiver ( ) and
84+ mce .getArgument ( 0 ) = trackRegexpType ( )
85+ or
86+ // first argument of https://ruby-doc.org/core-2.4.0/Regexp.html#method-i-match
87+ result = mce .getArgument ( 0 ) and
88+ mce .getReceiver ( ) = trackRegexpType ( )
89+ )
90+ }
91+
92+ /** Gets a node where regular expressions that flow to the node are used. */
93+ DataFlow:: Node regSink ( ) { result = any ( RegexExecution exec ) .getRegex ( ) }
94+
95+ /** Gets a node that is reachable by type-tracking from any string or regular expression. */
96+ DataFlow:: LocalSourceNode forward ( TypeTracker t ) {
97+ t .start ( ) and
98+ result = [ strStart ( ) , regStart ( ) ]
99+ or
100+ exists ( TypeTracker t2 | result = forward ( t2 ) .track ( t2 , t ) )
101+ or
102+ exists ( TypeTracker t2 | t2 = t .continue ( ) | step ( forward ( t2 ) .getALocalUse ( ) , result , _, _) )
103+ }
104+
105+ /**
106+ * Gets a node that is backwards reachable from any regular expression use,
107+ * where that use is reachable by type-tracking from any string or regular expression.
108+ */
109+ DataFlow:: LocalSourceNode backwards ( TypeBackTracker t ) {
110+ t .start ( ) and
111+ result .flowsTo ( [ stringSink ( ) , regSink ( ) ] ) and
112+ result = forward ( TypeTracker:: end ( ) )
28113 or
29- exists ( TypeTracker t2 | result = strToReg ( start , t2 ) .track ( t2 , t ) )
114+ exists ( TypeBackTracker t2 | result = backwards ( t2 ) .backtrack ( t2 , t ) )
30115 or
31- exists ( TypeTracker t2 , DataFlow:: Node nodeFrom | t2 = t .continue ( ) |
32- strToReg ( start , t2 ) .flowsTo ( nodeFrom ) and
33- (
34- // include taint flow through `String` summaries
35- TaintTracking:: localTaintStep ( nodeFrom , result ) and
36- nodeFrom .( DataFlowPrivate:: SummaryNode ) .getSummarizedCallable ( ) instanceof
37- String:: SummarizedCallable
38- or
39- // string concatenations, and
40- exists ( CfgNodes:: ExprNodes:: OperationCfgNode op |
41- op = result .asExpr ( ) and
42- op .getAnOperand ( ) = nodeFrom .asExpr ( ) and
43- op .getExpr ( ) .( Ast:: BinaryOperation ) .getOperator ( ) = "+"
44- )
45- or
46- // string interpolations
47- nodeFrom .asExpr ( ) =
48- result .asExpr ( ) .( CfgNodes:: ExprNodes:: StringlikeLiteralCfgNode ) .getAComponent ( )
116+ exists ( TypeBackTracker t2 | t2 = t .continue ( ) | step ( result .getALocalUse ( ) , backwards ( t2 ) , _, _) )
117+ }
118+
119+ /**
120+ * Gets a node that has been tracked from the string constant `start` to some node.
121+ * This is used to figure out where `start` is evaluated as a regular expression against an input string,
122+ * or where `start` is compiled into a regular expression.
123+ */
124+ private DataFlow:: LocalSourceNode trackStrings ( DataFlow:: Node start , TypeTracker t ) {
125+ result = backwards ( _) and
126+ (
127+ t .start ( ) and
128+ start = result and
129+ result = strStart ( )
130+ or
131+ exists ( TypeTracker t2 | result = trackStrings ( start , t2 ) .track ( t2 , t ) )
132+ or
133+ // an additional step from string to string
134+ exists ( TypeTracker t2 | t2 = t .continue ( ) |
135+ step ( trackStrings ( start , t2 ) .getALocalUse ( ) , result , "string" , "string" )
49136 )
50137 )
51138}
@@ -54,19 +141,18 @@ private DataFlow::LocalSourceNode strToReg(DataFlow::Node start, TypeTracker t)
54141 * Gets a node that has been tracked from the regular expression `start` to some node.
55142 * This is used to figure out where `start` is executed against an input string.
56143 */
57- private DataFlow:: LocalSourceNode regToReg ( DataFlow:: Node start , TypeTracker t ) {
58- t .start ( ) and
59- start = result and
60- result .asExpr ( ) .getExpr ( ) instanceof Ast:: RegExpLiteral
61- or
62- exists ( TypeTracker t2 | result = regToReg ( start , t2 ) .track ( t2 , t ) )
63- or
64- exists ( TypeTracker t2 |
65- t2 = t .continue ( ) and
66- exists ( DataFlow:: CallNode call |
67- call = API:: getTopLevelMember ( "Regexp" ) .getAMethodCall ( [ "compile" , "new" ] ) and
68- strToReg ( start , t2 ) .flowsTo ( call .getArgument ( 0 ) ) and
69- result = call
144+ private DataFlow:: LocalSourceNode trackRegs ( DataFlow:: Node start , TypeTracker t ) {
145+ result = backwards ( _) and
146+ (
147+ t .start ( ) and
148+ start = result and
149+ result = regStart ( )
150+ or
151+ exists ( TypeTracker t2 | result = trackRegs ( start , t2 ) .track ( t2 , t ) )
152+ or
153+ // an additional step where a string is converted to a regular expression
154+ exists ( TypeTracker t2 | t2 = t .continue ( ) |
155+ step ( trackStrings ( start , t2 ) .getALocalUse ( ) , result , "string" , "reg" )
70156 )
71157 )
72158}
@@ -75,7 +161,7 @@ private DataFlow::LocalSourceNode regToReg(DataFlow::Node start, TypeTracker t)
75161private DataFlow:: LocalSourceNode trackRegexpType ( TypeTracker t ) {
76162 t .start ( ) and
77163 (
78- result . asExpr ( ) . getExpr ( ) instanceof Ast :: RegExpLiteral or
164+ result = regStart ( ) or
79165 result = API:: getTopLevelMember ( "Regexp" ) .getAMethodCall ( [ "compile" , "new" ] )
80166 )
81167 or
@@ -85,25 +171,14 @@ private DataFlow::LocalSourceNode trackRegexpType(TypeTracker t) {
85171/** Gests a node that references a regular expression. */
86172DataFlow:: Node trackRegexpType ( ) { trackRegexpType ( TypeTracker:: end ( ) ) .flowsTo ( result ) }
87173
88- /** Gets a the value for the regular expression that is evaluated at `re`. */
174+ /** Gets a node holding a value for the regular expression that is evaluated at `re`. */
89175cached
90176DataFlow:: Node regExpSource ( DataFlow:: Node re ) {
91- exists ( DataFlow:: LocalSourceNode end | end = strToReg ( result , TypeTracker:: end ( ) ) |
92- end .flowsTo ( re ) and
93- re instanceof RE:: RegExpInterpretation:: Range and
94- not exists ( DataFlow:: CallNode mce | mce .getMethodName ( ) = [ "match" , "match?" ] |
95- // receiver of https://ruby-doc.org/core-2.4.0/String.html#method-i-match
96- re = mce .getReceiver ( ) and
97- mce .getArgument ( 0 ) = trackRegexpType ( )
98- or
99- // first argument of https://ruby-doc.org/core-2.4.0/Regexp.html#method-i-match
100- re = mce .getArgument ( 0 ) and
101- mce .getReceiver ( ) = trackRegexpType ( )
102- )
177+ exists ( DataFlow:: LocalSourceNode end | end = trackStrings ( result , TypeTracker:: end ( ) ) |
178+ end .getALocalUse ( ) = re and re = stringSink ( )
103179 )
104180 or
105- exists ( DataFlow:: LocalSourceNode end | end = regToReg ( result , TypeTracker:: end ( ) ) |
106- end .flowsTo ( re ) and
107- re = any ( RegexExecution exec ) .getRegex ( )
181+ exists ( DataFlow:: LocalSourceNode end | end = trackRegs ( result , TypeTracker:: end ( ) ) |
182+ end .getALocalUse ( ) = re and re = regSink ( )
108183 )
109184}
0 commit comments