Skip to content

Commit 404e226

Browse files
authored
Merge pull request #384 from pre-commit/line_endings
Handle crlf endings in fix-encoding-pragma
2 parents 45fc394 + 79a1b26 commit 404e226

File tree

2 files changed

+29
-18
lines changed

2 files changed

+29
-18
lines changed

pre_commit_hooks/fix_encoding_pragma.py

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,14 @@
99
from typing import Sequence
1010
from typing import Union
1111

12-
DEFAULT_PRAGMA = b'# -*- coding: utf-8 -*-\n'
12+
DEFAULT_PRAGMA = b'# -*- coding: utf-8 -*-'
1313

1414

1515
def has_coding(line): # type: (bytes) -> bool
1616
if not line.strip():
1717
return False
1818
return (
19-
line.lstrip()[0:1] == b'#' and (
19+
line.lstrip()[:1] == b'#' and (
2020
b'unicode' in line or
2121
b'encoding' in line or
2222
b'coding:' in line or
@@ -26,7 +26,7 @@ def has_coding(line): # type: (bytes) -> bool
2626

2727

2828
class ExpectedContents(collections.namedtuple(
29-
'ExpectedContents', ('shebang', 'rest', 'pragma_status'),
29+
'ExpectedContents', ('shebang', 'rest', 'pragma_status', 'ending'),
3030
)):
3131
"""
3232
pragma_status:
@@ -47,6 +47,8 @@ def is_expected_pragma(self, remove): # type: (bool) -> bool
4747

4848
def _get_expected_contents(first_line, second_line, rest, expected_pragma):
4949
# type: (bytes, bytes, bytes, bytes) -> ExpectedContents
50+
ending = b'\r\n' if first_line.endswith(b'\r\n') else b'\n'
51+
5052
if first_line.startswith(b'#!'):
5153
shebang = first_line
5254
potential_coding = second_line
@@ -55,7 +57,7 @@ def _get_expected_contents(first_line, second_line, rest, expected_pragma):
5557
potential_coding = first_line
5658
rest = second_line + rest
5759

58-
if potential_coding == expected_pragma:
60+
if potential_coding.rstrip(b'\r\n') == expected_pragma:
5961
pragma_status = True # type: Optional[bool]
6062
elif has_coding(potential_coding):
6163
pragma_status = None
@@ -64,7 +66,7 @@ def _get_expected_contents(first_line, second_line, rest, expected_pragma):
6466
rest = potential_coding + rest
6567

6668
return ExpectedContents(
67-
shebang=shebang, rest=rest, pragma_status=pragma_status,
69+
shebang=shebang, rest=rest, pragma_status=pragma_status, ending=ending,
6870
)
6971

7072

@@ -93,7 +95,7 @@ def fix_encoding_pragma(f, remove=False, expected_pragma=DEFAULT_PRAGMA):
9395
f.truncate()
9496
f.write(expected.shebang)
9597
if not remove:
96-
f.write(expected_pragma)
98+
f.write(expected_pragma + expected.ending)
9799
f.write(expected.rest)
98100

99101
return 1
@@ -102,11 +104,7 @@ def fix_encoding_pragma(f, remove=False, expected_pragma=DEFAULT_PRAGMA):
102104
def _normalize_pragma(pragma): # type: (Union[bytes, str]) -> bytes
103105
if not isinstance(pragma, bytes):
104106
pragma = pragma.encode('UTF-8')
105-
return pragma.rstrip() + b'\n'
106-
107-
108-
def _to_disp(pragma): # type: (bytes) -> str
109-
return pragma.decode().rstrip()
107+
return pragma.rstrip()
110108

111109

112110
def main(argv=None): # type: (Optional[Sequence[str]]) -> int
@@ -117,7 +115,7 @@ def main(argv=None): # type: (Optional[Sequence[str]]) -> int
117115
parser.add_argument(
118116
'--pragma', default=DEFAULT_PRAGMA, type=_normalize_pragma,
119117
help='The encoding pragma to use. Default: {}'.format(
120-
_to_disp(DEFAULT_PRAGMA),
118+
DEFAULT_PRAGMA.decode(),
121119
),
122120
)
123121
parser.add_argument(
@@ -141,7 +139,7 @@ def main(argv=None): # type: (Optional[Sequence[str]]) -> int
141139
retv |= file_ret
142140
if file_ret:
143141
print(fmt.format(
144-
pragma=_to_disp(args.pragma), filename=filename,
142+
pragma=args.pragma.decode(), filename=filename,
145143
))
146144

147145
return retv

tests/fix_encoding_pragma_test.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -112,15 +112,15 @@ def test_not_ok_inputs(input_str, output):
112112
def test_ok_input_alternate_pragma():
113113
input_s = b'# coding: utf-8\nx = 1\n'
114114
bytesio = io.BytesIO(input_s)
115-
ret = fix_encoding_pragma(bytesio, expected_pragma=b'# coding: utf-8\n')
115+
ret = fix_encoding_pragma(bytesio, expected_pragma=b'# coding: utf-8')
116116
assert ret == 0
117117
bytesio.seek(0)
118118
assert bytesio.read() == input_s
119119

120120

121121
def test_not_ok_input_alternate_pragma():
122122
bytesio = io.BytesIO(b'x = 1\n')
123-
ret = fix_encoding_pragma(bytesio, expected_pragma=b'# coding: utf-8\n')
123+
ret = fix_encoding_pragma(bytesio, expected_pragma=b'# coding: utf-8')
124124
assert ret == 1
125125
bytesio.seek(0)
126126
assert bytesio.read() == b'# coding: utf-8\nx = 1\n'
@@ -130,11 +130,11 @@ def test_not_ok_input_alternate_pragma():
130130
('input_s', 'expected'),
131131
(
132132
# Python 2 cli parameters are bytes
133-
(b'# coding: utf-8', b'# coding: utf-8\n'),
133+
(b'# coding: utf-8', b'# coding: utf-8'),
134134
# Python 3 cli parameters are text
135-
('# coding: utf-8', b'# coding: utf-8\n'),
135+
('# coding: utf-8', b'# coding: utf-8'),
136136
# trailing whitespace
137-
('# coding: utf-8\n', b'# coding: utf-8\n'),
137+
('# coding: utf-8\n', b'# coding: utf-8'),
138138
),
139139
)
140140
def test_normalize_pragma(input_s, expected):
@@ -150,3 +150,16 @@ def test_integration_alternate_pragma(tmpdir, capsys):
150150
assert f.read() == '# coding: utf-8\nx = 1\n'
151151
out, _ = capsys.readouterr()
152152
assert out == 'Added `# coding: utf-8` to {}\n'.format(f.strpath)
153+
154+
155+
def test_crlf_ok(tmpdir):
156+
f = tmpdir.join('f.py')
157+
f.write_binary(b'# -*- coding: utf-8 -*-\r\nx = 1\r\n')
158+
assert not main((f.strpath,))
159+
160+
161+
def test_crfl_adds(tmpdir):
162+
f = tmpdir.join('f.py')
163+
f.write_binary(b'x = 1\r\n')
164+
assert main((f.strpath,))
165+
assert f.read_binary() == b'# -*- coding: utf-8 -*-\r\nx = 1\r\n'

0 commit comments

Comments
 (0)