Skip to content

Commit 5ba0a1a

Browse files
gh-136702: Deprecate passing non-ascii *encoding* (str) to encodings.normalize_encoding (#140030)
Closes #136702
1 parent 7ae440f commit 5ba0a1a

File tree

8 files changed

+42
-6
lines changed

8 files changed

+42
-6
lines changed

Doc/deprecations/pending-removal-in-3.17.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@ Pending removal in Python 3.17
2323
(Contributed by Shantanu Jain in :gh:`91896`.)
2424

2525

26+
* :mod:`encodings`:
27+
28+
- Passing non-ascii *encoding* names to :func:`encodings.normalize_encoding`
29+
is deprecated and scheduled for removal in Python 3.17.
30+
(Contributed by Stan Ulbrych in :gh:`136702`)
31+
2632
* :mod:`typing`:
2733

2834
- Before Python 3.14, old-style unions were implemented using the private class

Lib/email/_header_value_parser.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -796,6 +796,10 @@ def params(self):
796796
value = urllib.parse.unquote(value, encoding='latin-1')
797797
else:
798798
try:
799+
# Explicitly look up the codec for warning generation, see gh-140030
800+
# Can be removed in 3.17
801+
import codecs
802+
codecs.lookup(charset)
799803
value = value.decode(charset, 'surrogateescape')
800804
except (LookupError, UnicodeEncodeError):
801805
# XXX: there should really be a custom defect for

Lib/email/utils.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,10 @@ def collapse_rfc2231_value(value, errors='replace',
460460
charset = fallback_charset
461461
rawbytes = bytes(text, 'raw-unicode-escape')
462462
try:
463+
# Explicitly look up the codec for warning generation, see gh-140030
464+
# Can be removed in 3.17
465+
import codecs
466+
codecs.lookup(charset)
463467
return str(rawbytes, charset, errors)
464468
except LookupError:
465469
# charset is not a known codec.

Lib/encodings/__init__.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
2727
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
2828
29-
"""#"
29+
"""
3030

3131
import codecs
3232
import sys
@@ -56,6 +56,12 @@ def normalize_encoding(encoding):
5656
if isinstance(encoding, bytes):
5757
encoding = str(encoding, "ascii")
5858

59+
if not encoding.isascii():
60+
import warnings
61+
warnings.warn(
62+
"Support for non-ascii encoding names will be removed in 3.17",
63+
DeprecationWarning, stacklevel=2)
64+
5965
return _normalize_encoding(encoding)
6066

6167
def search_function(encoding):

Lib/test/test_codecs.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3886,22 +3886,26 @@ def search_function(encoding):
38863886
self.assertEqual(codecs.lookup('TEST.AAA 8'), ('test.aaa-8', 2, 3, 4))
38873887
self.assertEqual(codecs.lookup('TEST.AAA---8'), ('test.aaa---8', 2, 3, 4))
38883888
self.assertEqual(codecs.lookup('TEST.AAA 8'), ('test.aaa---8', 2, 3, 4))
3889-
self.assertEqual(codecs.lookup('TEST.AAA\xe9\u20ac-8'), ('test.aaa\xe9\u20ac-8', 2, 3, 4))
38903889
self.assertEqual(codecs.lookup('TEST.AAA.8'), ('test.aaa.8', 2, 3, 4))
38913890
self.assertEqual(codecs.lookup('TEST.AAA...8'), ('test.aaa...8', 2, 3, 4))
3891+
with self.assertWarns(DeprecationWarning):
3892+
self.assertEqual(codecs.lookup('TEST.AAA\xe9\u20ac-8'), ('test.aaa\xe9\u20ac-8', 2, 3, 4))
38923893

38933894
def test_encodings_normalize_encoding(self):
3894-
# encodings.normalize_encoding() ignores non-ASCII characters.
38953895
normalize = encodings.normalize_encoding
38963896
self.assertEqual(normalize('utf_8'), 'utf_8')
3897-
self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8')
38983897
self.assertEqual(normalize('utf 8'), 'utf_8')
38993898
# encodings.normalize_encoding() doesn't convert
39003899
# characters to lower case.
39013900
self.assertEqual(normalize('UTF 8'), 'UTF_8')
39023901
self.assertEqual(normalize('utf.8'), 'utf.8')
39033902
self.assertEqual(normalize('utf...8'), 'utf...8')
39043903

3904+
# Non-ASCII *encoding* is deprecated.
3905+
with self.assertWarnsRegex(DeprecationWarning,
3906+
"Support for non-ascii encoding names will be removed in 3.17"):
3907+
self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8')
3908+
39053909

39063910
if __name__ == "__main__":
39073911
unittest.main()

Lib/test/test_email/test_email.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5738,7 +5738,8 @@ def test_rfc2231_bad_character_in_encoding(self):
57385738
57395739
"""
57405740
msg = email.message_from_string(m)
5741-
self.assertEqual(msg.get_filename(), 'myfile.txt')
5741+
with self.assertWarns(DeprecationWarning):
5742+
self.assertEqual(msg.get_filename(), 'myfile.txt')
57425743

57435744
def test_rfc2231_single_tick_in_filename_extended(self):
57445745
eq = self.assertEqual

Lib/test/test_email/test_headerregistry.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,15 @@ def content_type_as_value(self,
247247
decoded = args[2] if l>2 and args[2] is not DITTO else source
248248
header = 'Content-Type:' + ' ' if source else ''
249249
folded = args[3] if l>3 else header + decoded + '\n'
250-
h = self.make_header('Content-Type', source)
250+
# Both rfc2231 test cases with utf-8%E2%80%9D raise warnings,
251+
# clear encoding cache to ensure test isolation.
252+
if 'utf-8%E2%80%9D' in source and 'ascii' not in source:
253+
import encodings
254+
encodings._cache.clear()
255+
with self.assertWarns(DeprecationWarning):
256+
h = self.make_header('Content-Type', source)
257+
else:
258+
h = self.make_header('Content-Type', source)
251259
self.assertEqual(h.content_type, content_type)
252260
self.assertEqual(h.maintype, maintype)
253261
self.assertEqual(h.subtype, subtype)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
:mod:`encodings`: Deprecate passing a non-ascii *encoding* name to
2+
:func:`encodings.normalize_encoding` and schedule removal of support for
3+
Python 3.17.

0 commit comments

Comments
 (0)