From fe6dd199f46dc23e29cb9e4d6e1261d2656cf13d Mon Sep 17 00:00:00 2001 From: mohsinm-dev Date: Mon, 10 Nov 2025 06:31:05 +0500 Subject: [PATCH 1/6] gh-141314: Fix TextIOWrapper.tell() assertion failure with standalone carriage return When TextIOWrapper.tell() is called after reading a line that ends with a standalone carriage return (\r), the tell optimization algorithm incorrectly assumes there is buffered data to search through. This causes an assertion failure when skip_back=1 exceeds the empty buffer size. The fix detects when next_input is empty and skips the optimization phase, falling back to the byte-by-byte decoding method which always works correctly. This properly handles the architectural constraint that buffer optimization cannot function without buffered data. --- Lib/test/test_io/test_textio.py | 18 ++++++++++++++++++ Modules/_io/textio.c | 15 +++++++++++---- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_io/test_textio.py b/Lib/test/test_io/test_textio.py index d8d0928b4ba69b..2ffc370be99b94 100644 --- a/Lib/test/test_io/test_textio.py +++ b/Lib/test/test_io/test_textio.py @@ -686,6 +686,24 @@ def test_multibyte_seek_and_tell(self): self.assertEqual(f.tell(), p1) f.close() + def test_tell_after_readline_with_cr(self): + # Test for gh-141314: TextIOWrapper.tell() assertion failure + # when dealing with standalone carriage returns + data = b'line1=1\r' + with self.open(os_helper.TESTFN, "wb") as f: + f.write(data) + + with self.open(os_helper.TESTFN, "r") as f: + # Read line that ends with \r + line = f.readline() + self.assertEqual(line, "line1=1\n") + # This should not cause an assertion failure + pos = f.tell() + # Verify we can seek back to this position + f.seek(pos) + remaining = f.read() + self.assertEqual(remaining, "") + def test_seek_with_encoder_state(self): f = self.open(os_helper.TESTFN, "w", encoding="euc_jis_2004") f.write("\u00e6\u0300") diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index c462bd2ac578fe..401eb7724f8f9f 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -2844,10 +2844,16 @@ _io_TextIOWrapper_tell_impl(textio *self) /* Fast search for an acceptable start point, close to our current pos */ skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip); - skip_back = 1; - assert(skip_back <= PyBytes_GET_SIZE(next_input)); - input = PyBytes_AS_STRING(next_input); - while (skip_bytes > 0) { + + /* Skip the optimization if next_input is empty */ + if (PyBytes_GET_SIZE(next_input) == 0) { + skip_bytes = 0; + } + else { + skip_back = 1; + assert(skip_back <= PyBytes_GET_SIZE(next_input)); + input = PyBytes_AS_STRING(next_input); + while (skip_bytes > 0) { /* Decode up to temptative start point */ if (_textiowrapper_decoder_setstate(self, &cookie) < 0) goto fail; @@ -2870,6 +2876,7 @@ _io_TextIOWrapper_tell_impl(textio *self) skip_back *= 2; } } + } if (skip_bytes <= 0) { skip_bytes = 0; if (_textiowrapper_decoder_setstate(self, &cookie) < 0) From 69960dc1755b44ee06904d58c648dd6d570b86cf Mon Sep 17 00:00:00 2001 From: mohsinm-dev Date: Mon, 10 Nov 2025 06:49:49 +0500 Subject: [PATCH 2/6] gh-141314: Fix TextIOWrapper.tell() assertion failure with standalone carriage return Add test case and fix assertion failure in TextIOWrapper.tell() when reading files that end with a standalone carriage return (\r). The optimization algorithm incorrectly assumed buffered data would always be available, causing an assertion failure when next_input is empty. --- .../next/Library/2025-11-10-01-47-18.gh-issue-141314.baaa28.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2025-11-10-01-47-18.gh-issue-141314.baaa28.rst diff --git a/Misc/NEWS.d/next/Library/2025-11-10-01-47-18.gh-issue-141314.baaa28.rst b/Misc/NEWS.d/next/Library/2025-11-10-01-47-18.gh-issue-141314.baaa28.rst new file mode 100644 index 00000000000000..ef9046d84767a3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-11-10-01-47-18.gh-issue-141314.baaa28.rst @@ -0,0 +1 @@ +Fix assertion failure in :meth:`io.TextIOWrapper.tell` when reading files with standalone carriage return (``\r``) line endings. \ No newline at end of file From cdb92836e1bc53d871ff064822141a1d1c5ee579 Mon Sep 17 00:00:00 2001 From: mohsinm-dev Date: Mon, 10 Nov 2025 06:54:45 +0500 Subject: [PATCH 3/6] Fix linting issues - Remove trailing whitespace - Add missing newline at end of NEWS file --- .../next/Library/2025-11-10-01-47-18.gh-issue-141314.baaa28.rst | 2 +- Modules/_io/textio.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2025-11-10-01-47-18.gh-issue-141314.baaa28.rst b/Misc/NEWS.d/next/Library/2025-11-10-01-47-18.gh-issue-141314.baaa28.rst index ef9046d84767a3..37acaabfa3eada 100644 --- a/Misc/NEWS.d/next/Library/2025-11-10-01-47-18.gh-issue-141314.baaa28.rst +++ b/Misc/NEWS.d/next/Library/2025-11-10-01-47-18.gh-issue-141314.baaa28.rst @@ -1 +1 @@ -Fix assertion failure in :meth:`io.TextIOWrapper.tell` when reading files with standalone carriage return (``\r``) line endings. \ No newline at end of file +Fix assertion failure in :meth:`io.TextIOWrapper.tell` when reading files with standalone carriage return (``\r``) line endings. diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 401eb7724f8f9f..2c974fe44090e6 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -2844,7 +2844,6 @@ _io_TextIOWrapper_tell_impl(textio *self) /* Fast search for an acceptable start point, close to our current pos */ skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip); - /* Skip the optimization if next_input is empty */ if (PyBytes_GET_SIZE(next_input) == 0) { skip_bytes = 0; From b1e18857a02226fc5291a6e45b25943b4dc3f014 Mon Sep 17 00:00:00 2001 From: mohsinm-dev Date: Mon, 10 Nov 2025 14:30:21 +0500 Subject: [PATCH 4/6] Refactor code structure and add test for multiple CR cases --- Lib/test/test_io/test_textio.py | 40 +++++++++++++++++++++++++++++++++ Modules/_io/textio.c | 7 +++--- 2 files changed, 43 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_io/test_textio.py b/Lib/test/test_io/test_textio.py index 2ffc370be99b94..2611a148262bff 100644 --- a/Lib/test/test_io/test_textio.py +++ b/Lib/test/test_io/test_textio.py @@ -704,6 +704,46 @@ def test_tell_after_readline_with_cr(self): remaining = f.read() self.assertEqual(remaining, "") + def test_tell_after_readline_with_multiple_cr(self): + # Test for gh-141314: TextIOWrapper.tell() assertion failure + # when dealing with multiple standalone carriage returns + test_cases = [ + (b'line1\r\rline2\r', ['line1\n', '\n', 'line2\n']), + (b'line1\r\r\rline2\r', ['line1\n', '\n', '\n', 'line2\n']), + (b'line1\rline2\rline3\r', ['line1\n', 'line2\n', 'line3\n']), + (b'\r\rdata\r', ['\n', '\n', 'data\n']), + ] + + for data, expected_lines in test_cases: + with self.subTest(data=data): + with self.open(os_helper.TESTFN, "wb") as f: + f.write(data) + + with self.open(os_helper.TESTFN, "r") as f: + # Read all lines and call tell() after each + lines_read = [] + positions = [] + while True: + pos_before = f.tell() + line = f.readline() + if not line: + break + lines_read.append(line) + # This should not cause an assertion failure + pos_after = f.tell() + positions.append((pos_before, pos_after)) + + # Verify lines read correctly + self.assertEqual(lines_read, expected_lines) + + # Verify we can seek back to each position + f.seek(0) + for i, (pos_before, pos_after) in enumerate(positions): + f.seek(pos_before) + line = f.readline() + self.assertEqual(line, expected_lines[i]) + self.assertEqual(f.tell(), pos_after) + def test_seek_with_encoder_state(self): f = self.open(os_helper.TESTFN, "w", encoding="euc_jis_2004") f.write("\u00e6\u0300") diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 2c974fe44090e6..812dc05727409d 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -2847,12 +2847,12 @@ _io_TextIOWrapper_tell_impl(textio *self) /* Skip the optimization if next_input is empty */ if (PyBytes_GET_SIZE(next_input) == 0) { skip_bytes = 0; - } - else { + } else { skip_back = 1; assert(skip_back <= PyBytes_GET_SIZE(next_input)); input = PyBytes_AS_STRING(next_input); - while (skip_bytes > 0) { + } + while (skip_bytes > 0) { /* Decode up to temptative start point */ if (_textiowrapper_decoder_setstate(self, &cookie) < 0) goto fail; @@ -2875,7 +2875,6 @@ _io_TextIOWrapper_tell_impl(textio *self) skip_back *= 2; } } - } if (skip_bytes <= 0) { skip_bytes = 0; if (_textiowrapper_decoder_setstate(self, &cookie) < 0) From 1d9f77fdbe70659bc06d6e950c9c1b9b6955ffd6 Mon Sep 17 00:00:00 2001 From: mohsinm-dev Date: Mon, 10 Nov 2025 14:38:17 +0500 Subject: [PATCH 5/6] Fix trailing whitespace in test file --- Lib/test/test_io/test_textio.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_io/test_textio.py b/Lib/test/test_io/test_textio.py index 2611a148262bff..e1873af666b8f7 100644 --- a/Lib/test/test_io/test_textio.py +++ b/Lib/test/test_io/test_textio.py @@ -713,12 +713,12 @@ def test_tell_after_readline_with_multiple_cr(self): (b'line1\rline2\rline3\r', ['line1\n', 'line2\n', 'line3\n']), (b'\r\rdata\r', ['\n', '\n', 'data\n']), ] - + for data, expected_lines in test_cases: with self.subTest(data=data): with self.open(os_helper.TESTFN, "wb") as f: f.write(data) - + with self.open(os_helper.TESTFN, "r") as f: # Read all lines and call tell() after each lines_read = [] @@ -732,10 +732,10 @@ def test_tell_after_readline_with_multiple_cr(self): # This should not cause an assertion failure pos_after = f.tell() positions.append((pos_before, pos_after)) - + # Verify lines read correctly self.assertEqual(lines_read, expected_lines) - + # Verify we can seek back to each position f.seek(0) for i, (pos_before, pos_after) in enumerate(positions): From e84d686fd0b07eebd9a0b6e499095e4dcf0af2c8 Mon Sep 17 00:00:00 2001 From: mohsinm-dev Date: Mon, 10 Nov 2025 22:19:53 +0500 Subject: [PATCH 6/6] Fix TextIOWrapper assertion typo and simplify test - Fix assertion to check skip_bytes instead of skip_back - Use simpler test data b'line1\r' instead of b'line1=1\r' - Remove unnecessary multiple CR test case - Clean up workaround code The assertion was checking wrong variable (skip_back vs skip_bytes). skip_back is search step size, skip_bytes is buffer offset needing validation. --- Lib/test/test_io/test_textio.py | 43 ++------------------------------- Modules/_io/textio.c | 11 +++------ 2 files changed, 5 insertions(+), 49 deletions(-) diff --git a/Lib/test/test_io/test_textio.py b/Lib/test/test_io/test_textio.py index e1873af666b8f7..6331ed2b958552 100644 --- a/Lib/test/test_io/test_textio.py +++ b/Lib/test/test_io/test_textio.py @@ -689,14 +689,14 @@ def test_multibyte_seek_and_tell(self): def test_tell_after_readline_with_cr(self): # Test for gh-141314: TextIOWrapper.tell() assertion failure # when dealing with standalone carriage returns - data = b'line1=1\r' + data = b'line1\r' with self.open(os_helper.TESTFN, "wb") as f: f.write(data) with self.open(os_helper.TESTFN, "r") as f: # Read line that ends with \r line = f.readline() - self.assertEqual(line, "line1=1\n") + self.assertEqual(line, "line1\n") # This should not cause an assertion failure pos = f.tell() # Verify we can seek back to this position @@ -704,45 +704,6 @@ def test_tell_after_readline_with_cr(self): remaining = f.read() self.assertEqual(remaining, "") - def test_tell_after_readline_with_multiple_cr(self): - # Test for gh-141314: TextIOWrapper.tell() assertion failure - # when dealing with multiple standalone carriage returns - test_cases = [ - (b'line1\r\rline2\r', ['line1\n', '\n', 'line2\n']), - (b'line1\r\r\rline2\r', ['line1\n', '\n', '\n', 'line2\n']), - (b'line1\rline2\rline3\r', ['line1\n', 'line2\n', 'line3\n']), - (b'\r\rdata\r', ['\n', '\n', 'data\n']), - ] - - for data, expected_lines in test_cases: - with self.subTest(data=data): - with self.open(os_helper.TESTFN, "wb") as f: - f.write(data) - - with self.open(os_helper.TESTFN, "r") as f: - # Read all lines and call tell() after each - lines_read = [] - positions = [] - while True: - pos_before = f.tell() - line = f.readline() - if not line: - break - lines_read.append(line) - # This should not cause an assertion failure - pos_after = f.tell() - positions.append((pos_before, pos_after)) - - # Verify lines read correctly - self.assertEqual(lines_read, expected_lines) - - # Verify we can seek back to each position - f.seek(0) - for i, (pos_before, pos_after) in enumerate(positions): - f.seek(pos_before) - line = f.readline() - self.assertEqual(line, expected_lines[i]) - self.assertEqual(f.tell(), pos_after) def test_seek_with_encoder_state(self): f = self.open(os_helper.TESTFN, "w", encoding="euc_jis_2004") diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 812dc05727409d..404729f30abf4d 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -2844,14 +2844,9 @@ _io_TextIOWrapper_tell_impl(textio *self) /* Fast search for an acceptable start point, close to our current pos */ skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip); - /* Skip the optimization if next_input is empty */ - if (PyBytes_GET_SIZE(next_input) == 0) { - skip_bytes = 0; - } else { - skip_back = 1; - assert(skip_back <= PyBytes_GET_SIZE(next_input)); - input = PyBytes_AS_STRING(next_input); - } + skip_back = 1; + assert(skip_bytes <= PyBytes_GET_SIZE(next_input)); + input = PyBytes_AS_STRING(next_input); while (skip_bytes > 0) { /* Decode up to temptative start point */ if (_textiowrapper_decoder_setstate(self, &cookie) < 0)