|
2 | 2 | * Provides predicates for reasoning about bad tag filter vulnerabilities. |
3 | 3 | */ |
4 | 4 |
|
5 | | -import regexp.RegexpMatching |
6 | | - |
7 | | -/** |
8 | | - * Holds if the regexp `root` should be tested against `str`. |
9 | | - * Implements the `isRegexpMatchingCandidateSig` signature from `RegexpMatching`. |
10 | | - * `ignorePrefix` toggles whether the regular expression should be treated as accepting any prefix if it's unanchored. |
11 | | - * `testWithGroups` toggles whether it's tested which groups are filled by a given input string. |
12 | | - */ |
13 | | -private predicate isBadTagFilterCandidate( |
14 | | - RootTerm root, string str, boolean ignorePrefix, boolean testWithGroups |
15 | | -) { |
16 | | - // the regexp must mention "<" and ">" explicitly. |
17 | | - forall(string angleBracket | angleBracket = ["<", ">"] | |
18 | | - any(RegExpConstant term | term.getValue().matches("%" + angleBracket + "%")).getRootTerm() = |
19 | | - root |
20 | | - ) and |
21 | | - ignorePrefix = true and |
22 | | - ( |
23 | | - str = ["<!-- foo -->", "<!-- foo --!>", "<!- foo ->", "<foo>", "<script>"] and |
24 | | - testWithGroups = true |
25 | | - or |
26 | | - str = |
27 | | - [ |
28 | | - "<!-- foo -->", "<!- foo ->", "<!-- foo --!>", "<!-- foo\n -->", "<script>foo</script>", |
29 | | - "<script \n>foo</script>", "<script >foo\n</script>", "<foo ></foo>", "<foo>", |
30 | | - "<foo src=\"foo\"></foo>", "<script>", "<script src=\"foo\"></script>", |
31 | | - "<script src='foo'></script>", "<SCRIPT>foo</SCRIPT>", "<script\tsrc=\"foo\"/>", |
32 | | - "<script\tsrc='foo'></script>", "<sCrIpT>foo</ScRiPt>", "<script src=\"foo\">foo</script >", |
33 | | - "<script src=\"foo\">foo</script foo=\"bar\">", "<script src=\"foo\">foo</script\t\n bar>" |
34 | | - ] and |
35 | | - testWithGroups = false |
36 | | - ) |
37 | | -} |
38 | | - |
39 | | -/** |
40 | | - * A regexp that matches some string from the `isBadTagFilterCandidate` predicate. |
41 | | - */ |
42 | | -class HtmlMatchingRegExp extends RootTerm { |
43 | | - HtmlMatchingRegExp() { RegexpMatching<isBadTagFilterCandidate/4>::matches(this, _) } |
44 | | - |
45 | | - /** Holds if this regexp matched `str`, where `str` is one of the string from `isBadTagFilterCandidate`. */ |
46 | | - predicate matches(string str) { RegexpMatching<isBadTagFilterCandidate/4>::matches(this, str) } |
47 | | - |
48 | | - /** Holds if this regexp fills capture group `g' when matching `str', where `str` is one of the string from `isBadTagFilterCandidate`. */ |
49 | | - predicate fillsCaptureGroup(string str, int g) { |
50 | | - RegexpMatching<isBadTagFilterCandidate/4>::fillsCaptureGroup(this, str, g) |
51 | | - } |
52 | | -} |
53 | | - |
54 | | -/** DEPRECATED: Alias for HtmlMatchingRegExp */ |
55 | | -deprecated class HTMLMatchingRegExp = HtmlMatchingRegExp; |
56 | | - |
57 | | -/** |
58 | | - * Holds if `regexp` matches some HTML tags, but misses some HTML tags that it should match. |
59 | | - * |
60 | | - * When adding a new case to this predicate, make sure the test string used in `matches(..)` calls are present in `HTMLMatchingRegExp::test` / `HTMLMatchingRegExp::testWithGroups`. |
61 | | - */ |
62 | | -predicate isBadRegexpFilter(HtmlMatchingRegExp regexp, string msg) { |
63 | | - // CVE-2021-33829 - matching both "<!-- foo -->" and "<!-- foo --!>", but in different capture groups |
64 | | - regexp.matches("<!-- foo -->") and |
65 | | - regexp.matches("<!-- foo --!>") and |
66 | | - exists(int a, int b | a != b | |
67 | | - regexp.fillsCaptureGroup("<!-- foo -->", a) and |
68 | | - // <!-- foo --> might be ambiguously parsed (matching both capture groups), and that is ok here. |
69 | | - regexp.fillsCaptureGroup("<!-- foo --!>", b) and |
70 | | - not regexp.fillsCaptureGroup("<!-- foo --!>", a) and |
71 | | - msg = |
72 | | - "Comments ending with --> are matched differently from comments ending with --!>. The first is matched with capture group " |
73 | | - + a + " and comments ending with --!> are matched with capture group " + |
74 | | - strictconcat(int i | regexp.fillsCaptureGroup("<!-- foo --!>", i) | i.toString(), ", ") + |
75 | | - "." |
76 | | - ) |
77 | | - or |
78 | | - // CVE-2020-17480 - matching "<!-- foo -->" and other tags, but not "<!-- foo --!>". |
79 | | - exists(int group, int other | |
80 | | - group != other and |
81 | | - regexp.fillsCaptureGroup("<!-- foo -->", group) and |
82 | | - regexp.fillsCaptureGroup("<foo>", other) and |
83 | | - not regexp.matches("<!-- foo --!>") and |
84 | | - not regexp.fillsCaptureGroup("<!-- foo -->", any(int i | i != group)) and |
85 | | - not regexp.fillsCaptureGroup("<!- foo ->", group) and |
86 | | - not regexp.fillsCaptureGroup("<foo>", group) and |
87 | | - not regexp.fillsCaptureGroup("<script>", group) and |
88 | | - msg = |
89 | | - "This regular expression only parses --> (capture group " + group + |
90 | | - ") and not --!> as an HTML comment end tag." |
91 | | - ) |
92 | | - or |
93 | | - regexp.matches("<!-- foo -->") and |
94 | | - not regexp.matches("<!-- foo\n -->") and |
95 | | - not regexp.matches("<!- foo ->") and |
96 | | - not regexp.matches("<foo>") and |
97 | | - not regexp.matches("<script>") and |
98 | | - msg = "This regular expression does not match comments containing newlines." |
99 | | - or |
100 | | - regexp.matches("<script>foo</script>") and |
101 | | - regexp.matches("<script src=\"foo\"></script>") and |
102 | | - not regexp.matches("<foo ></foo>") and |
103 | | - ( |
104 | | - not regexp.matches("<script \n>foo</script>") and |
105 | | - msg = "This regular expression matches <script></script>, but not <script \\n></script>" |
106 | | - or |
107 | | - not regexp.matches("<script >foo\n</script>") and |
108 | | - msg = "This regular expression matches <script>...</script>, but not <script >...\\n</script>" |
109 | | - ) |
110 | | - or |
111 | | - regexp.matches("<script>foo</script>") and |
112 | | - regexp.matches("<script src=\"foo\"></script>") and |
113 | | - not regexp.matches("<script src='foo'></script>") and |
114 | | - not regexp.matches("<foo>") and |
115 | | - msg = "This regular expression does not match script tags where the attribute uses single-quotes." |
116 | | - or |
117 | | - regexp.matches("<script>foo</script>") and |
118 | | - regexp.matches("<script src='foo'></script>") and |
119 | | - not regexp.matches("<script src=\"foo\"></script>") and |
120 | | - not regexp.matches("<foo>") and |
121 | | - msg = "This regular expression does not match script tags where the attribute uses double-quotes." |
122 | | - or |
123 | | - regexp.matches("<script>foo</script>") and |
124 | | - regexp.matches("<script src='foo'></script>") and |
125 | | - not regexp.matches("<script\tsrc='foo'></script>") and |
126 | | - not regexp.matches("<foo>") and |
127 | | - not regexp.matches("<foo src=\"foo\"></foo>") and |
128 | | - msg = "This regular expression does not match script tags where tabs are used between attributes." |
129 | | - or |
130 | | - regexp.matches("<script>foo</script>") and |
131 | | - not RegExpFlags::isIgnoreCase(regexp) and |
132 | | - not regexp.matches("<foo>") and |
133 | | - not regexp.matches("<foo ></foo>") and |
134 | | - ( |
135 | | - not regexp.matches("<SCRIPT>foo</SCRIPT>") and |
136 | | - msg = "This regular expression does not match upper case <SCRIPT> tags." |
137 | | - or |
138 | | - not regexp.matches("<sCrIpT>foo</ScRiPt>") and |
139 | | - regexp.matches("<SCRIPT>foo</SCRIPT>") and |
140 | | - msg = "This regular expression does not match mixed case <sCrIpT> tags." |
141 | | - ) |
142 | | - or |
143 | | - regexp.matches("<script src=\"foo\"></script>") and |
144 | | - not regexp.matches("<foo>") and |
145 | | - not regexp.matches("<foo ></foo>") and |
146 | | - ( |
147 | | - not regexp.matches("<script src=\"foo\">foo</script >") and |
148 | | - msg = "This regular expression does not match script end tags like </script >." |
149 | | - or |
150 | | - not regexp.matches("<script src=\"foo\">foo</script foo=\"bar\">") and |
151 | | - msg = "This regular expression does not match script end tags like </script foo=\"bar\">." |
152 | | - or |
153 | | - not regexp.matches("<script src=\"foo\">foo</script\t\n bar>") and |
154 | | - msg = "This regular expression does not match script end tags like </script\\t\\n bar>." |
155 | | - ) |
156 | | -} |
| 5 | +private import regexp.RegExpTreeView::RegExpTreeView as TreeView |
| 6 | +// BadTagFilterQuery should be used directly from the shared pack, and not from this file. |
| 7 | +deprecated private import codeql.regex.nfa.BadTagFilterQuery::Make<TreeView> as Dep |
| 8 | +import Dep |
0 commit comments