|
16 | 16 |
|
17 | 17 | import javascript |
18 | 18 | import semmle.javascript.security.performance.ReDoSUtil |
19 | | - |
20 | | -/* |
21 | | - * This query implements the analysis described in the following two papers: |
22 | | - * |
23 | | - * James Kirrage, Asiri Rathnayake, Hayo Thielecke: Static Analysis for |
24 | | - * Regular Expression Denial-of-Service Attacks. NSS 2013. |
25 | | - * (http://www.cs.bham.ac.uk/~hxt/research/reg-exp-sec.pdf) |
26 | | - * Asiri Rathnayake, Hayo Thielecke: Static Analysis for Regular Expression |
27 | | - * Exponential Runtime via Substructural Logics. 2014. |
28 | | - * (https://www.cs.bham.ac.uk/~hxt/research/redos_full.pdf) |
29 | | - * |
30 | | - * The basic idea is to search for overlapping cycles in the NFA, that is, |
31 | | - * states `q` such that there are two distinct paths from `q` to itself |
32 | | - * that consume the same word `w`. |
33 | | - * |
34 | | - * For any such state `q`, an attack string can be constructed as follows: |
35 | | - * concatenate a prefix `v` that takes the NFA to `q` with `n` copies of |
36 | | - * the word `w` that leads back to `q` along two different paths, followed |
37 | | - * by a suffix `x` that is _not_ accepted in state `q`. A backtracking |
38 | | - * implementation will need to explore at least 2^n different ways of going |
39 | | - * from `q` back to itself while trying to match the `n` copies of `w` |
40 | | - * before finally giving up. |
41 | | - * |
42 | | - * Now in order to identify overlapping cycles, all we have to do is find |
43 | | - * pumpable forks, that is, states `q` that can transition to two different |
44 | | - * states `r1` and `r2` on the same input symbol `c`, such that there are |
45 | | - * paths from both `r1` and `r2` to `q` that consume the same word. The latter |
46 | | - * condition is equivalent to saying that `(q, q)` is reachable from `(r1, r2)` |
47 | | - * in the product NFA. |
48 | | - * |
49 | | - * This is what the query does. It makes a simple attempt to construct a |
50 | | - * prefix `v` leading into `q`, but only to improve the alert message. |
51 | | - * And the query tries to prove the existence of a suffix that ensures |
52 | | - * rejection. This check might fail, which can cause false positives. |
53 | | - * |
54 | | - * Finally, sometimes it depends on the translation whether the NFA generated |
55 | | - * for a regular expression has a pumpable fork or not. We implement one |
56 | | - * particular translation, which may result in false positives or negatives |
57 | | - * relative to some particular JavaScript engine. |
58 | | - * |
59 | | - * More precisely, the query constructs an NFA from a regular expression `r` |
60 | | - * as follows: |
61 | | - * |
62 | | - * * Every sub-term `t` gives rise to an NFA state `Match(t,i)`, representing |
63 | | - * the state of the automaton before attempting to match the `i`th character in `t`. |
64 | | - * * There is one accepting state `Accept(r)`. |
65 | | - * * There is a special `AcceptAnySuffix(r)` state, which accepts any suffix string |
66 | | - * by using an epsilon transition to `Accept(r)` and an any transition to itself. |
67 | | - * * Transitions between states may be labelled with epsilon, or an abstract |
68 | | - * input symbol. |
69 | | - * * Each abstract input symbol represents a set of concrete input characters: |
70 | | - * either a single character, a set of characters represented by a |
71 | | - * character class, or the set of all characters. |
72 | | - * * The product automaton is constructed lazily, starting with pair states |
73 | | - * `(q, q)` where `q` is a fork, and proceding along an over-approximate |
74 | | - * step relation. |
75 | | - * * The over-approximate step relation allows transitions along pairs of |
76 | | - * abstract input symbols where the symbols have overlap in the characters they accept. |
77 | | - * * Once a trace of pairs of abstract input symbols that leads from a fork |
78 | | - * back to itself has been identified, we attempt to construct a concrete |
79 | | - * string corresponding to it, which may fail. |
80 | | - * * Lastly we ensure that any state reached by repeating `n` copies of `w` has |
81 | | - * a suffix `x` (possible empty) that is most likely __not__ accepted. |
82 | | - */ |
83 | | - |
84 | | -/** |
85 | | - * Holds if state `s` might be inside a backtracking repetition. |
86 | | - */ |
87 | | -pragma[noinline] |
88 | | -predicate stateInsideBacktracking(State s) { |
89 | | - s.getRepr().getParent*() instanceof MaybeBacktrackingRepetition |
90 | | -} |
91 | | - |
92 | | -/** |
93 | | - * A infinitely repeating quantifier that might backtrack. |
94 | | - */ |
95 | | -class MaybeBacktrackingRepetition extends InfiniteRepetitionQuantifier { |
96 | | - MaybeBacktrackingRepetition() { |
97 | | - exists(RegExpTerm child | |
98 | | - child instanceof RegExpAlt or |
99 | | - child instanceof RegExpQuantifier |
100 | | - | |
101 | | - child.getParent+() = this |
102 | | - ) |
103 | | - } |
104 | | -} |
105 | | - |
106 | | -/** |
107 | | - * A state in the product automaton. |
108 | | - * |
109 | | - * We lazily only construct those states that we are actually |
110 | | - * going to need: `(q, q)` for every fork state `q`, and any |
111 | | - * pair of states that can be reached from a pair that we have |
112 | | - * already constructed. To cut down on the number of states, |
113 | | - * we only represent states `(q1, q2)` where `q1` is lexicographically |
114 | | - * no bigger than `q2`. |
115 | | - * |
116 | | - * States are only constructed if both states in the pair are |
117 | | - * inside a repetition that might backtrack. |
118 | | - */ |
119 | | -newtype TStatePair = |
120 | | - MkStatePair(State q1, State q2) { |
121 | | - isFork(q1, _, _, _, _) and q2 = q1 |
122 | | - or |
123 | | - (step(_, _, _, q1, q2) or step(_, _, _, q2, q1)) and |
124 | | - rankState(q1) <= rankState(q2) |
125 | | - } |
126 | | - |
127 | | -/** |
128 | | - * Gets a unique number for a `state`. |
129 | | - * Is used to create an ordering of states, where states with the same `toString()` will be ordered differently. |
130 | | - */ |
131 | | -int rankState(State state) { |
132 | | - state = |
133 | | - rank[result](State s, Location l | |
134 | | - l = s.getRepr().getLocation() |
135 | | - | |
136 | | - s order by l.getStartLine(), l.getStartColumn(), s.toString() |
137 | | - ) |
138 | | -} |
139 | | - |
140 | | -class StatePair extends TStatePair { |
141 | | - State q1; |
142 | | - State q2; |
143 | | - |
144 | | - StatePair() { this = MkStatePair(q1, q2) } |
145 | | - |
146 | | - string toString() { result = "(" + q1 + ", " + q2 + ")" } |
147 | | - |
148 | | - State getLeft() { result = q1 } |
149 | | - |
150 | | - State getRight() { result = q2 } |
151 | | -} |
152 | | - |
153 | | -predicate isStatePair(StatePair p) { any() } |
154 | | - |
155 | | -predicate delta2(StatePair q, StatePair r) { step(q, _, _, r) } |
156 | | - |
157 | | -/** |
158 | | - * Gets the minimum length of a path from `q` to `r` in the |
159 | | - * product automaton. |
160 | | - */ |
161 | | -int statePairDist(StatePair q, StatePair r) = |
162 | | - shortestDistances(isStatePair/1, delta2/2)(q, r, result) |
163 | | - |
164 | | -/** |
165 | | - * Holds if there are transitions from `q` to `r1` and from `q` to `r2` |
166 | | - * labelled with `s1` and `s2`, respectively, where `s1` and `s2` do not |
167 | | - * trivially have an empty intersection. |
168 | | - * |
169 | | - * This predicate only holds for states associated with regular expressions |
170 | | - * that have at least one repetition quantifier in them (otherwise the |
171 | | - * expression cannot be vulnerable to ReDoS attacks anyway). |
172 | | - */ |
173 | | -pragma[noopt] |
174 | | -predicate isFork(State q, InputSymbol s1, InputSymbol s2, State r1, State r2) { |
175 | | - stateInsideBacktracking(q) and |
176 | | - exists(State q1, State q2 | |
177 | | - q1 = epsilonSucc*(q) and |
178 | | - delta(q1, s1, r1) and |
179 | | - q2 = epsilonSucc*(q) and |
180 | | - delta(q2, s2, r2) and |
181 | | - // Use pragma[noopt] to prevent intersect(s1,s2) from being the starting point of the join. |
182 | | - // From (s1,s2) it would find a huge number of intermediate state pairs (q1,q2) originating from different literals, |
183 | | - // and discover at the end that no `q` can reach both `q1` and `q2` by epsilon transitions. |
184 | | - exists(intersect(s1, s2)) |
185 | | - | |
186 | | - s1 != s2 |
187 | | - or |
188 | | - r1 != r2 |
189 | | - or |
190 | | - r1 = r2 and q1 != q2 |
191 | | - or |
192 | | - // If q can reach itself by epsilon transitions, then there are two distinct paths to the q1/q2 state: |
193 | | - // one that uses the loop and one that doesn't. The engine will separately attempt to match with each path, |
194 | | - // despite ending in the same state. The "fork" thus arises from the choice of whether to use the loop or not. |
195 | | - // To avoid every state in the loop becoming a fork state, |
196 | | - // we arbitrarily pick the InfiniteRepetitionQuantifier state as the canonical fork state for the loop |
197 | | - // (every epsilon-loop must contain such a state). |
198 | | - // |
199 | | - // We additionally require that the there exists another InfiniteRepetitionQuantifier `mid` on the path from `q` to itself. |
200 | | - // This is done to avoid flagging regular expressions such as `/(a?)*b/` - that only has polynomial runtime, and is detected by `js/polynomial-redos`. |
201 | | - // The below code is therefore a heuritic, that only flags regular expressions such as `/(a*)*b/`, |
202 | | - // and does not flag regular expressions such as `/(a?b?)c/`, but the latter pattern is not used frequently. |
203 | | - r1 = r2 and |
204 | | - q1 = q2 and |
205 | | - epsilonSucc+(q) = q and |
206 | | - exists(RegExpTerm term | term = q.getRepr() | term instanceof InfiniteRepetitionQuantifier) and |
207 | | - // One of the mid states is an infinite quantifier itself |
208 | | - exists(State mid, RegExpTerm term | |
209 | | - mid = epsilonSucc+(q) and |
210 | | - term = mid.getRepr() and |
211 | | - term instanceof InfiniteRepetitionQuantifier and |
212 | | - q = epsilonSucc+(mid) and |
213 | | - not mid = q |
214 | | - ) |
215 | | - ) and |
216 | | - stateInsideBacktracking(r1) and |
217 | | - stateInsideBacktracking(r2) |
218 | | -} |
219 | | - |
220 | | -/** |
221 | | - * Gets the state pair `(q1, q2)` or `(q2, q1)`; note that only |
222 | | - * one or the other is defined. |
223 | | - */ |
224 | | -StatePair mkStatePair(State q1, State q2) { |
225 | | - result = MkStatePair(q1, q2) or result = MkStatePair(q2, q1) |
226 | | -} |
227 | | - |
228 | | -/** |
229 | | - * Holds if there are transitions from the components of `q` to the corresponding |
230 | | - * components of `r` labelled with `s1` and `s2`, respectively. |
231 | | - */ |
232 | | -predicate step(StatePair q, InputSymbol s1, InputSymbol s2, StatePair r) { |
233 | | - exists(State r1, State r2 | step(q, s1, s2, r1, r2) and r = mkStatePair(r1, r2)) |
234 | | -} |
235 | | - |
236 | | -/** |
237 | | - * Holds if there are transitions from the components of `q` to `r1` and `r2` |
238 | | - * labelled with `s1` and `s2`, respectively. |
239 | | - * |
240 | | - * We only consider transitions where the resulting states `(r1, r2)` are both |
241 | | - * inside a repetition that might backtrack. |
242 | | - */ |
243 | | -pragma[noopt] |
244 | | -predicate step(StatePair q, InputSymbol s1, InputSymbol s2, State r1, State r2) { |
245 | | - exists(State q1, State q2 | q.getLeft() = q1 and q.getRight() = q2 | |
246 | | - deltaClosed(q1, s1, r1) and |
247 | | - deltaClosed(q2, s2, r2) and |
248 | | - // use noopt to force the join on `intersect` to happen last. |
249 | | - exists(intersect(s1, s2)) |
250 | | - ) and |
251 | | - stateInsideBacktracking(r1) and |
252 | | - stateInsideBacktracking(r2) |
253 | | -} |
254 | | - |
255 | | -private newtype TTrace = |
256 | | - Nil() or |
257 | | - Step(InputSymbol s1, InputSymbol s2, TTrace t) { |
258 | | - exists(StatePair p | |
259 | | - isReachableFromFork(_, p, t, _) and |
260 | | - step(p, s1, s2, _) |
261 | | - ) |
262 | | - or |
263 | | - t = Nil() and isFork(_, s1, s2, _, _) |
264 | | - } |
265 | | - |
266 | | -/** |
267 | | - * A list of pairs of input symbols that describe a path in the product automaton |
268 | | - * starting from some fork state. |
269 | | - */ |
270 | | -class Trace extends TTrace { |
271 | | - string toString() { |
272 | | - this = Nil() and result = "Nil()" |
273 | | - or |
274 | | - exists(InputSymbol s1, InputSymbol s2, Trace t | this = Step(s1, s2, t) | |
275 | | - result = "Step(" + s1 + ", " + s2 + ", " + t + ")" |
276 | | - ) |
277 | | - } |
278 | | -} |
279 | | - |
280 | | -/** |
281 | | - * Gets a string corresponding to the trace `t`. |
282 | | - */ |
283 | | -string concretise(Trace t) { |
284 | | - t = Nil() and result = "" |
285 | | - or |
286 | | - exists(InputSymbol s1, InputSymbol s2, Trace rest | t = Step(s1, s2, rest) | |
287 | | - result = concretise(rest) + intersect(s1, s2) |
288 | | - ) |
289 | | -} |
290 | | - |
291 | | -/** |
292 | | - * Holds if `r` is reachable from `(fork, fork)` under input `w`, and there is |
293 | | - * a path from `r` back to `(fork, fork)` with `rem` steps. |
294 | | - */ |
295 | | -predicate isReachableFromFork(State fork, StatePair r, Trace w, int rem) { |
296 | | - // base case |
297 | | - exists(InputSymbol s1, InputSymbol s2, State q1, State q2 | |
298 | | - isFork(fork, s1, s2, q1, q2) and |
299 | | - r = MkStatePair(q1, q2) and |
300 | | - w = Step(s1, s2, Nil()) and |
301 | | - rem = statePairDist(r, MkStatePair(fork, fork)) |
302 | | - ) |
303 | | - or |
304 | | - // recursive case |
305 | | - exists(StatePair p, Trace v, InputSymbol s1, InputSymbol s2 | |
306 | | - isReachableFromFork(fork, p, v, rem + 1) and |
307 | | - step(p, s1, s2, r) and |
308 | | - w = Step(s1, s2, v) and |
309 | | - rem >= statePairDist(r, MkStatePair(fork, fork)) |
310 | | - ) |
311 | | -} |
312 | | - |
313 | | -/** |
314 | | - * Gets a state in the product automaton from which `(fork, fork)` is |
315 | | - * reachable in zero or more epsilon transitions. |
316 | | - */ |
317 | | -StatePair getAForkPair(State fork) { |
318 | | - isFork(fork, _, _, _, _) and |
319 | | - result = MkStatePair(epsilonPred*(fork), epsilonPred*(fork)) |
320 | | -} |
321 | | - |
322 | | -/** |
323 | | - * Holds if `fork` is a pumpable fork with word `w`. |
324 | | - */ |
325 | | -predicate isPumpable(State fork, string w) { |
326 | | - exists(StatePair q, Trace t | |
327 | | - isReachableFromFork(fork, q, t, _) and |
328 | | - q = getAForkPair(fork) and |
329 | | - w = concretise(t) |
330 | | - ) |
331 | | -} |
332 | | - |
333 | | -/** |
334 | | - * An instantiation of `ReDoSConfiguration` for exponential backtracking. |
335 | | - */ |
336 | | -class ExponentialReDoSConfiguration extends ReDoSConfiguration { |
337 | | - ExponentialReDoSConfiguration() { this = "ExponentialReDoSConfiguration" } |
338 | | - |
339 | | - override predicate isReDoSCandidate(State state, string pump) { isPumpable(state, pump) } |
340 | | -} |
| 19 | +import semmle.javascript.security.performance.ExponentialBackTracking |
341 | 20 |
|
342 | 21 | from RegExpTerm t, string pump, State s, string prefixMsg |
343 | 22 | where hasReDoSResult(t, pump, s, prefixMsg) |
|
0 commit comments