@@ -8,35 +8,64 @@ class BashShellScript extends ShellScript {
88 )
99 }
1010
11- private string lineProducer ( int i ) {
12- result = this .getRawScript ( ) .regexpReplaceAll ( "\\\\\\s*\n" , "" ) .splitAt ( "\n" , i )
13- }
14-
15- private predicate cmdSubstitutionReplacement ( string cmdSubs , string id , int k ) {
16- exists ( string line | line = this .lineProducer ( k ) |
17- exists ( int i , int j |
18- cmdSubs =
19- // $() cmd substitution
20- line .regexpFind ( "\\$\\((?:[^()]+|\\((?:[^()]+|\\([^()]*\\))*\\))*\\)" , i , j )
21- .regexpReplaceAll ( "^\\$\\(" , "" )
22- .regexpReplaceAll ( "\\)$" , "" ) and
23- id = "cmdsubs:" + k + ":" + i + ":" + j
24- )
25- or
26- exists ( int i , int j |
27- // `...` cmd substitution
28- cmdSubs =
29- line .regexpFind ( "\\`[^\\`]+\\`" , i , j )
30- .regexpReplaceAll ( "^\\`" , "" )
31- .regexpReplaceAll ( "\\`$" , "" ) and
32- id = "cmd:" + k + ":" + i + ":" + j
33- )
11+ /**
12+ * Gets the line at 0-based index `lineIndex` within this shell script,
13+ * assuming newlines as separators.
14+ */
15+ private string lineProducer ( int lineIndex ) {
16+ result = this .getRawScript ( ) .regexpReplaceAll ( "\\\\\\s*\n" , "" ) .splitAt ( "\n" , lineIndex )
17+ }
18+
19+ private predicate cmdSubstitutionReplacement ( string command , string id , int lineIndex ) {
20+ this .commandInSubstitution ( lineIndex , command , id )
21+ or
22+ this .commandInBackticks ( lineIndex , command , id )
23+ }
24+
25+ /**
26+ * Holds if there is a command substitution `$(command)` in
27+ * the line at `lineIndex` in the shell script,
28+ * and `id` is a unique identifier for this command.
29+ */
30+ private predicate commandInSubstitution ( int lineIndex , string command , string id ) {
31+ exists ( int occurrenceIndex , int occurrenceOffset |
32+ command =
33+ // Look for the command inside a $(...) command substitution
34+ this .lineProducer ( lineIndex )
35+ .regexpFind ( "\\$\\((?:[^()]+|\\((?:[^()]+|\\([^()]*\\))*\\))*\\)" , occurrenceIndex ,
36+ occurrenceOffset )
37+ // trim starting $( - TODO do this in first regex
38+ .regexpReplaceAll ( "^\\$\\(" , "" )
39+ // trim ending ) - TODO do this in first regex
40+ .regexpReplaceAll ( "\\)$" , "" ) and
41+ id = "cmdsubs:" + lineIndex + ":" + occurrenceIndex + ":" + occurrenceOffset
3442 )
3543 }
3644
37- private predicate rankedCmdSubstitutionReplacements ( int i , string old , string new ) {
38- old = rank [ i ] ( string old2 | this .cmdSubstitutionReplacement ( old2 , _, _) | old2 ) and
39- this .cmdSubstitutionReplacement ( old , new , _)
45+ /**
46+ * Holds if `command` is a command in backticks `` `...` `` in
47+ * the line at `lineIndex` in the shell script,
48+ * and `id` is a unique identifier for this command.
49+ */
50+ private predicate commandInBackticks ( int lineIndex , string command , string id ) {
51+ exists ( int occurrenceIndex , int occurrenceOffset |
52+ command =
53+ this .lineProducer ( lineIndex )
54+ .regexpFind ( "\\`[^\\`]+\\`" , occurrenceIndex , occurrenceOffset )
55+ // trim leading backtick - TODO do this in first regex
56+ .regexpReplaceAll ( "^\\`" , "" )
57+ // trim trailing backtick - TODO do this in first regex
58+ .regexpReplaceAll ( "\\`$" , "" ) and
59+ id = "cmd:" + lineIndex + ":" + occurrenceIndex + ":" + occurrenceOffset
60+ )
61+ }
62+
63+ private predicate rankedCmdSubstitutionReplacements ( int i , string command , string commandId ) {
64+ // rank commands by their unique IDs
65+ commandId = rank [ i ] ( string c , string id | this .cmdSubstitutionReplacement ( c , id , _) | id ) and
66+ // since we cannot output (command, ID) tuples from the rank operation,
67+ // we need to work out the specific command associated with the resulting ID
68+ this .cmdSubstitutionReplacement ( command , commandId , _)
4069 }
4170
4271 private predicate doReplaceCmdSubstitutions ( int line , int round , string old , string new ) {
@@ -64,31 +93,56 @@ class BashShellScript extends ShellScript {
6493 this .cmdSubstitutionReplacement ( result , _, i )
6594 }
6695
96+ /**
97+ * Holds if `quotedStr` is a string in double quotes in
98+ * the line at `lineIndex` in the shell script,
99+ * and `id` is a unique identifier for this quoted string.
100+ */
101+ private predicate doubleQuotedString ( int lineIndex , string quotedStr , string id ) {
102+ exists ( int occurrenceIndex , int occurrenceOffset |
103+ // double quoted string
104+ quotedStr =
105+ this .cmdSubstitutedLineProducer ( lineIndex )
106+ .regexpFind ( "\"((?:[^\"\\\\]|\\\\.)*)\"" , occurrenceIndex , occurrenceOffset ) and
107+ id =
108+ "qstr:" + lineIndex + ":" + occurrenceIndex + ":" + occurrenceOffset + ":" +
109+ quotedStr .length ( ) + ":" + quotedStr .regexpReplaceAll ( "[^a-zA-Z0-9]" , "" )
110+ )
111+ }
112+
113+ /**
114+ * Holds if `quotedStr` is a string in single quotes in
115+ * the line at `lineIndex` in the shell script,
116+ * and `id` is a unique identifier for this quoted string.
117+ */
118+ private predicate singleQuotedString ( int lineIndex , string quotedStr , string id ) {
119+ exists ( int occurrenceIndex , int occurrenceOffset |
120+ // single quoted string
121+ quotedStr =
122+ this .cmdSubstitutedLineProducer ( lineIndex )
123+ .regexpFind ( "'((?:\\\\.|[^'\\\\])*)'" , occurrenceIndex , occurrenceOffset ) and
124+ id =
125+ "qstr:" + lineIndex + ":" + occurrenceIndex + ":" + occurrenceOffset + ":" +
126+ quotedStr .length ( ) + ":" + quotedStr .regexpReplaceAll ( "[^a-zA-Z0-9]" , "" )
127+ )
128+ }
129+
67130 private predicate quotedStringReplacement ( string quotedStr , string id ) {
68- exists ( string line , int k | line = this .cmdSubstitutedLineProducer ( k ) |
69- exists ( int i , int j |
70- // double quoted string
71- quotedStr = line .regexpFind ( "\"((?:[^\"\\\\]|\\\\.)*)\"" , i , j ) and
72- id =
73- "qstr:" + k + ":" + i + ":" + j + ":" + quotedStr .length ( ) + ":" +
74- quotedStr .regexpReplaceAll ( "[^a-zA-Z0-9]" , "" )
75- )
131+ exists ( int lineIndex |
132+ this .doubleQuotedString ( lineIndex , quotedStr , id )
76133 or
77- exists ( int i , int j |
78- // single quoted string
79- quotedStr = line .regexpFind ( "'((?:\\\\.|[^'\\\\])*)'" , i , j ) and
80- id =
81- "qstr:" + k + ":" + i + ":" + j + ":" + quotedStr .length ( ) + ":" +
82- quotedStr .regexpReplaceAll ( "[^a-zA-Z0-9]" , "" )
83- )
134+ this .singleQuotedString ( lineIndex , quotedStr , id )
84135 ) and
85136 // Only do this for strings that might otherwise disrupt subsequent parsing
86137 quotedStr .regexpMatch ( "[\"'].*[$\n\r'\"" + Bash:: separator ( ) + "].*[\"']" )
87138 }
88139
89- private predicate rankedQuotedStringReplacements ( int i , string old , string new ) {
90- old = rank [ i ] ( string old2 | this .quotedStringReplacement ( old2 , _) | old2 ) and
91- this .quotedStringReplacement ( old , new )
140+ private predicate rankedQuotedStringReplacements ( int i , string quotedString , string quotedStringId ) {
141+ // rank quoted strings by their nearly-unique IDs
142+ quotedStringId = rank [ i ] ( string s , string id | this .quotedStringReplacement ( s , id ) | id ) and
143+ // since we cannot output (string, ID) tuples from the rank operation,
144+ // we need to work out the specific string associated with the resulting ID
145+ this .quotedStringReplacement ( quotedString , quotedStringId )
92146 }
93147
94148 private predicate doReplaceQuotedStrings ( int line , int round , string old , string new ) {
0 commit comments