2424# in order to avoid misinterpreting the ")" in constructs such as "x=$(...)"
2525# and "case $x in *)" as ending the subshell.
2626#
27- # Lines missing a final "&&" are flagged with "?!AMP?!", and lines which chain
28- # commands with ";" internally rather than "&&" are flagged "?!SEMI?!" . A line
29- # may be flagged for both violations.
27+ # Lines missing a final "&&" are flagged with "?!AMP?!", as are lines which
28+ # chain commands with ";" internally rather than "&&". A line may be flagged
29+ # for both violations.
3030#
3131# Detection of a missing &&-link in a multi-line subshell is complicated by the
3232# fact that the last statement before the closing ")" must not end with "&&".
4747# "?!AMP?!" violation is removed from the "bar" line (retrieved from the "hold"
4848# area) since the final statement of a subshell must not end with "&&". The
4949# final line of a subshell may still break the &&-chain by using ";" internally
50- # to chain commands together rather than "&&", so "?!SEMI ?!" is never removed
51- # from a line ( even though "?!AMP?!" might be) .
50+ # to chain commands together rather than "&&", but an internal "?!AMP ?!" is
51+ # never removed from a line even though a line-ending "?!AMP?!" might be.
5252#
5353# Care is taken to recognize the last _statement_ of a multi-line subshell, not
5454# necessarily the last textual _line_ within the subshell, since &&-chaining
6262# receives similar treatment.
6363#
6464# Swallowing here-docs with arbitrary tags requires a bit of finesse. When a
65- # line such as "cat <<EOF >out " is seen, the here-doc tag is moved to the front
66- # of the line enclosed in angle brackets as a sentinel, giving "<EOF>cat >out ".
65+ # line such as "cat <<EOF" is seen, the here-doc tag is copied to the front of
66+ # the line enclosed in angle brackets as a sentinel, giving "<EOF>cat <<EOF ".
6767# As each subsequent line is read, it is appended to the target line and a
6868# (whitespace-loose) back-reference match /^<(.*)>\n\1$/ is attempted to see if
6969# the content inside "<...>" matches the entirety of the newly-read line. For
7070# instance, if the next line read is "some data", when concatenated with the
71- # target line, it becomes "<EOF>cat >out \nsome data", and a match is attempted
71+ # target line, it becomes "<EOF>cat <<EOF \nsome data", and a match is attempted
7272# to see if "EOF" matches "some data". Since it doesn't, the next line is
7373# attempted. When a line consisting of only "EOF" (and possible whitespace) is
74- # encountered, it is appended to the target line giving "<EOF>cat >out \nEOF",
74+ # encountered, it is appended to the target line giving "<EOF>cat <<EOF \nEOF",
7575# in which case the "EOF" inside "<...>" does match the text following the
7676# newline, thus the closing here-doc tag has been found. The closing tag line
7777# and the "<...>" prefix on the target line are then discarded, leaving just
78- # the target line "cat >out".
79- #
80- # To facilitate regression testing (and manual debugging), a ">" annotation is
81- # applied to the line containing ")" which closes a subshell, ">>" to a line
82- # closing a nested subshell, and ">>>" to a line closing both at once. This
83- # makes it easy to detect whether the heuristics correctly identify
84- # end-of-subshell.
78+ # the target line "cat <<EOF".
8579# ------------------------------------------------------------------------------
8680
8781# incomplete line -- slurp up next line
9488
9589# here-doc -- swallow it to avoid false hits within its body (but keep the
9690# command to which it was attached)
97- /<<[ ] * [- \\ '"] * [A-Za-z0-9_] / {
98- s / ^ \( . * \) <<[ ] * [- \\ ' "]* \( [A-Za-z0-9_][A-Za-z0-9_] * \) ['"] * / < \2 > \1 << /
99- s /[ ] * << / /
91+ /<<- * [ ] * [\\ '"] * [A-Za-z0-9_] / {
92+ / " [ ^ "] * <<[^ "] * " / b notdoc
93+ s /^ \( . * <<- * [ ] * \) [ \\ '"] * \( [A-Za-z0-9_][A-Za-z0-9_] * \) ['"] * / < \2 > \1\2 /
10094 : hered
10195 N
10296 /^ <\( [^ >] * \) >. * \n [ ] * \1 [ ] * $ /! {
106100 s /^ <[^ >] * > //
107101 s /\n . * $ //
108102}
103+ : notdoc
109104
110105# one-liner "(...) &&"
111106/^ [ ] * !* [ ] * ( .. * ) [ ] * &&[ ] * $ /b oneline
126121# "&&" (but not ";" in a string)
127122: oneline
128123/; /{
129- /"[^ "] * ;[^ "] * " /! s /^ / ?!SEMI ?! /
124+ /"[^ "] * ;[^ "] * " /! s /; / ; ?!AMP ?! /
130125}
131126b
132127
136131 h
137132 b nextln
138133}
139- # "(..." line -- split off and stash "(", then process "..." as its own line
134+ # "(..." line -- "(" opening subshell cuddled with command; temporarily replace
135+ # "(" with sentinel "^" and process the line as if "(" had been seen solo on
136+ # the preceding line; this temporary replacement prevents several rules from
137+ # accidentally thinking "(" introduces a nested subshell; "^" is changed back
138+ # to "(" at output time
140139x
141- s /. * /( /
140+ s /. * //
142141x
143- s /( //
142+ s /( /^ /
144143b slurp
145144
146145: nextln
@@ -157,8 +156,10 @@ s/.*\n//
157156 /"[^ '"] * '[^ '"] * " /! b sqstr
158157}
159158: folded
160- # here-doc -- swallow it
161- /<<[ ] * [-\\ '"] * [A-Za-z0-9_] /b heredoc
159+ # here-doc -- swallow it (but not "<<" in a string)
160+ /<<-* [ ] * [\\ '"] * [A-Za-z0-9_] /{
161+ /"[^ "] * <<[^ "] * " /! b heredoc
162+ }
162163# comment or empty line -- discard since final non-comment, non-empty line
163164# before closing ")", "done", "elsif", "else", or "fi" will need to be
164165# re-visited to drop "suspect" marking since final line of those constructs
@@ -171,12 +172,12 @@ s/.*\n//
171172 /"[^ "] * #[^ "] * " /! s /[ ] #. * $ //
172173}
173174# one-liner "case ... esac"
174- /^ [ ] * case[ ] * .. * esac /b chkchn
175+ /^ [ ^ ] * case[ ] * .. * esac /b chkchn
175176# multi-line "case ... esac"
176- /^ [ ] * case[ ] .. * [ ] in /b case
177+ /^ [ ^ ] * case[ ] .. * [ ] in /b case
177178# multi-line "for ... done" or "while ... done"
178- /^ [ ] * for[ ] .. * [ ] in /b cont
179- /^ [ ] * while[ ] /b cont
179+ /^ [ ^ ] * for[ ] .. * [ ] in /b cont
180+ /^ [ ^ ] * while[ ] /b cont
180181/^ [ ] * do[ ] /b cont
181182/^ [ ] * do[ ] * $ /b cont
182183/;[ ] * do /b cont
@@ -187,7 +188,7 @@ s/.*\n//
187188/|| [ ] * exit[ ] /b cont
188189/|| [ ] * exit[ ] * $ /b cont
189190# multi-line "if...elsif...else...fi"
190- /^ [ ] * if[ ] /b cont
191+ /^ [ ^ ] * if[ ] /b cont
191192/^ [ ] * then[ ] /b cont
192193/^ [ ] * then[ ] * $ /b cont
193194/;[ ] * then /b cont
@@ -200,15 +201,15 @@ s/.*\n//
200201/^ [ ] * fi[ ] * [<>|] /b done
201202/^ [ ] * fi[ ] * ) /b done
202203# nested one-liner "(...) &&"
203- /^ [ ] * ( . * ) [ ] * &&[ ] * $ /b chkchn
204+ /^ [ ^ ] * ( . * ) [ ] * &&[ ] * $ /b chkchn
204205# nested one-liner "(...)"
205- /^ [ ] * ( . * ) [ ] * $ /b chkchn
206+ /^ [ ^ ] * ( . * ) [ ] * $ /b chkchn
206207# nested one-liner "(...) >x" (or "2>x" or "<x" or "|x")
207- /^ [ ] * ( . * ) [ ] * [0-9] * [<>|] /b chkchn
208+ /^ [ ^ ] * ( . * ) [ ] * [0-9] * [<>|] /b chkchn
208209# nested multi-line "(...\n...)"
209- /^ [ ] * ( /b nest
210+ /^ [ ^ ] * ( /b nest
210211# multi-line "{...\n...}"
211- /^ [ ] * { /b block
212+ /^ [ ^ ] * { /b block
212213# closing ")" on own line -- exit subshell
213214/^ [ ] * ) /b clssolo
214215# "$((...))" -- arithmetic expansion; not closing ")"
@@ -230,16 +231,18 @@ s/.*\n//
230231# string and not ";;" in one-liner "case...esac")
231232/; /{
232233 /;; /! {
233- /"[^ "] * ;[^ "] * " /! s /^ / ?!SEMI ?! /
234+ /"[^ "] * ;[^ "] * " /! s /; / ; ?!AMP ?! /
234235 }
235236}
236237# line ends with pipe "...|" -- valid; not missing "&&"
237238/| [ ] * $ /b cont
238239# missing end-of-line "&&" -- mark suspect
239- /&&[ ] * $ /! s /^ / ?!AMP?! /
240+ /&&[ ] * $ /! s /$ / ?!AMP?! /
240241: cont
241242# retrieve and print previous line
242243x
244+ s /^ \( [ ] * \) ^ /\1 ( /
245+ s /? !HERE? ! /<< /g
243246n
244247b slurp
245248
@@ -280,8 +283,7 @@ bfolded
280283# found here-doc -- swallow it to avoid false hits within its body (but keep
281284# the command to which it was attached)
282285: heredoc
283- s /^ \( . * \) <<[ ] * [-\\ '"] * \( [A-Za-z0-9_][A-Za-z0-9_] * \) ['"] * /<\2 >\1 << /
284- s /[ ] * << //
286+ s /^ \( . * \) <<\( -* [ ] * \) [\\ '"] * \( [A-Za-z0-9_][A-Za-z0-9_] * \) ['"] * /<\3 >\1 ?!HERE?!\2\3 /
285287: hdocsub
286288N
287289/^ <\( [^ >] * \) >. * \n [ ] * \1 [ ] * $ /! {
@@ -295,23 +297,31 @@ bfolded
295297# found "case ... in" -- pass through untouched
296298: case
297299x
300+ s /^ \( [ ] * \) ^ /\1 ( /
301+ s /? !HERE? ! /<< /g
298302n
303+ : cascom
304+ /^ [ ] * # /{
305+ N
306+ s /. * \n //
307+ b cascom
308+ }
299309/^ [ ] * esac /b slurp
300310b case
301311
302312# found "else" or "elif" -- drop "suspect" from final line before "else" since
303313# that line legitimately lacks "&&"
304314: else
305315x
306- s /? !AMP? ! //
316+ s /\( ? !AMP? !\) * ? !AMP ? ! $ //
307317x
308318b cont
309319
310320# found "done" closing for-loop or while-loop, or "fi" closing if-then -- drop
311321# "suspect" from final contained line since that line legitimately lacks "&&"
312322: done
313323x
314- s /? !AMP? ! //
324+ s /\( ? !AMP? !\) * ? !AMP ? ! $ //
315325x
316326# is 'done' or 'fi' cuddled with ")" to close subshell?
317327/done. * ) /b close
@@ -322,11 +332,18 @@ bchkchn
322332: nest
323333x
324334: nstslrp
335+ s /^ \( [ ] * \) ^ /\1 ( /
336+ s /? !HERE? ! /<< /g
325337n
338+ : nstcom
339+ # comment -- not closing ")" if in comment
340+ /^ [ ] * # /{
341+ N
342+ s /. * \n //
343+ b nstcom
344+ }
326345# closing ")" on own line -- stop nested slurp
327346/^ [ ] * ) /b nstcl
328- # comment -- not closing ")" if in comment
329- /^ [ ] * # /b nstcnt
330347# "$((...))" -- arithmetic expansion; not closing ")"
331348/\$ (( [^ )][^ )] * )) [^ )] * $ /b nstcnt
332349# "$(...)" -- command substitution; not closing ")"
337354x
338355b nstslrp
339356: nstcl
340- s /^ />> /
341357# is it "))" which closes nested and parent subshells?
342358/) [ ] * ) /b slurp
343359b chkchn
344360
345361# found multi-line "{...\n...}" block -- pass through untouched
346362: block
347363x
364+ s /^ \( [ ] * \) ^ /\1 ( /
365+ s /? !HERE? ! /<< /g
348366n
367+ : blkcom
368+ /^ [ ] * # /{
369+ N
370+ s /. * \n //
371+ b blkcom
372+ }
349373# closing "}" -- stop block slurp
350374/} /b chkchn
351375b block
@@ -354,16 +378,22 @@ bblock
354378# since that line legitimately lacks "&&" and exit subshell loop
355379: clssolo
356380x
357- s /? !AMP? ! //
381+ s /\( ? !AMP? !\) * ? !AMP? !$ //
382+ s /^ \( [ ] * \) ^ /\1 ( /
383+ s /? !HERE? ! /<< /g
358384p
359385x
360- s /^ /> /
386+ s /^ \( [ ] * \) ^ /\1 ( /
387+ s /? !HERE? ! /<< /g
361388b
362389
363390# found closing "...)" -- exit subshell loop
364391: close
365392x
393+ s /^ \( [ ] * \) ^ /\1 ( /
394+ s /? !HERE? ! /<< /g
366395p
367396x
368- s /^ /> /
397+ s /^ \( [ ] * \) ^ /\1 ( /
398+ s /? !HERE? ! /<< /g
369399b
0 commit comments