⚡️ Speed up method ParseError.column by 44%
#48
+8
−5
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
📄 44% (0.44x) speedup for
ParseError.columninpython/ccxt/static_dependencies/parsimonious/exceptions.py⏱️ Runtime :
59.6 microseconds→41.4 microseconds(best of250runs)📝 Explanation and details
The optimization replaces exception-based control flow with explicit conditional checks, delivering a 43% speedup by eliminating the overhead of Python's exception handling mechanism.
Key Changes:
rindex()withrfind():rindex()throws aValueErrorwhen no newline is found, whilerfind()returns -1. This avoids expensive exception creation and handling.if last_newline == -1).Why This is Faster:
Exception handling in Python is notoriously expensive - it involves stack unwinding, exception object creation, and control flow changes. The line profiler shows that in the original code, 62.7% of time was spent in the
rindex()call and 8.7% handling theValueError. The optimized version eliminates this overhead entirely.Performance Profile:
This optimization is particularly valuable since
ParseError.column()is likely called frequently during parsing operations where errors occur, making the cumulative time savings significant across many parsing attempts.✅ Correctness verification report:
🌀 Generated Regression Tests and Runtime
import pytest # used for our unit tests
from ccxt.static_dependencies.parsimonious.exceptions import ParseError
unit tests
Basic Test Cases
def test_column_basic_start_of_text():
# pos at 0, no newlines
err = ParseError("abcdef", 0)
codeflash_output = err.column() # 1.09μs -> 365ns (198% faster)
def test_column_basic_middle_of_text():
# pos in middle, no newlines
err = ParseError("abcdef", 3)
codeflash_output = err.column() # 1.12μs -> 860ns (30.0% faster)
def test_column_basic_end_of_text():
# pos at end, no newlines
err = ParseError("abcdef", 5)
codeflash_output = err.column() # 1.15μs -> 771ns (49.2% faster)
def test_column_basic_single_newline():
# pos after newline
err = ParseError("abc\ndef", 5) # position at 'e'
# text[:5] == "abc\nd", last newline at pos 3, so column = 5-3 = 2
codeflash_output = err.column() # 743ns -> 732ns (1.50% faster)
def test_column_basic_multiple_newlines():
# pos after multiple newlines
text = "abc\ndef\nghi"
err = ParseError(text, 9) # position at 'i'
# last newline before pos 9 is at pos 7, so column = 9-7 = 2
codeflash_output = err.column() # 780ns -> 723ns (7.88% faster)
Edge Test Cases
def test_column_edge_pos_negative():
# pos negative, should treat as before start (column 0)
err = ParseError("abcdef", -1)
# No newline before -1, so column = -1 + 1 = 0
codeflash_output = err.column() # 1.21μs -> 877ns (37.7% faster)
def test_column_edge_pos_zero_with_newline_at_start():
# text starts with newline, pos at 0
err = ParseError("\nabcdef", 0)
# No newline before 0, so column = 0 + 1 = 1
codeflash_output = err.column() # 1.11μs -> 366ns (204% faster)
def test_column_edge_pos_at_newline():
# pos at a newline character
text = "abc\ndef"
err = ParseError(text, 3) # position at '\n'
# last newline before pos 3 is none, so column = 3 + 1 = 4
codeflash_output = err.column() # 1.14μs -> 857ns (32.9% faster)
def test_column_edge_pos_just_after_newline():
# pos just after a newline
text = "abc\ndef"
err = ParseError(text, 4) # position at 'd'
# last newline at pos 3, so column = 4-3 = 1
codeflash_output = err.column() # 774ns -> 734ns (5.45% faster)
def test_column_edge_text_empty():
# empty text, pos at 0
err = ParseError("", 0)
codeflash_output = err.column() # 1.20μs -> 365ns (229% faster)
def test_column_edge_pos_beyond_text_length():
# pos beyond text length
err = ParseError("abc", 10)
# No newline before 10, so column = 10 + 1 = 11
codeflash_output = err.column() # 1.12μs -> 815ns (38.0% faster)
def test_column_edge_text_only_newlines():
# text is only newlines
err = ParseError("\n\n\n", 2)
# last newline before 2 is at pos 1, so column = 2-1 = 1
codeflash_output = err.column() # 812ns -> 706ns (15.0% faster)
def test_column_edge_text_ends_with_newline():
# text ends with newline, pos at last char
err = ParseError("abc\n", 4)
# last newline before 4 is at 3, so column = 4-3 = 1
codeflash_output = err.column() # 741ns -> 746ns (0.670% slower)
def test_column_edge_text_newline_at_pos_zero():
# text is a single newline, pos at 0
err = ParseError("\n", 0)
codeflash_output = err.column() # 1.19μs -> 363ns (229% faster)
def test_column_edge_pos_at_last_char():
# pos at last character
text = "abc\ndef"
err = ParseError(text, len(text)-1)
# last newline before pos 6 is at 3, so column = 6-3 = 3
codeflash_output = err.column() # 787ns -> 792ns (0.631% slower)
Large Scale Test Cases
def test_column_large_no_newlines():
# Large text, no newlines
text = "a" * 1000
pos = 999
err = ParseError(text, pos)
codeflash_output = err.column() # 1.28μs -> 954ns (34.5% faster)
def test_column_large_newlines_every_10():
# Large text, newlines every 10 chars
text = "".join("a"*9 + "\n" for _ in range(100))
pos = 900 # somewhere in the middle
# Find last newline before pos
last_newline = text.rindex('\n', 0, pos)
expected_column = pos - last_newline
err = ParseError(text, pos)
codeflash_output = err.column() # 608ns -> 783ns (22.3% slower)
def test_column_large_newline_at_start_and_end():
# Large text, newlines at start and end
text = "\n" + "a" * 998 + "\n"
pos = 500
# last newline before 500 is at pos 0, so column = 500-0 = 500
err = ParseError(text, pos)
codeflash_output = err.column() # 870ns -> 894ns (2.68% slower)
def test_column_large_all_newlines():
# Large text, all newlines
text = "\n" * 999
pos = 998
# last newline before 998 is at 997, so column = 998-997 = 1
err = ParseError(text, pos)
codeflash_output = err.column() # 821ns -> 839ns (2.15% slower)
def test_column_large_pos_at_last_newline():
# Large text, pos at last newline
text = "abc\n" * 250 # length 1000
pos = len(text) - 1 # last character
# Find last newline before pos
last_newline = text.rindex('\n', 0, pos)
expected_column = pos - last_newline
err = ParseError(text, pos)
codeflash_output = err.column() # 597ns -> 702ns (15.0% slower)
def test_column_large_pos_zero():
# Large text, pos at 0
text = "a" * 999 + "\n"
err = ParseError(text, 0)
codeflash_output = err.column() # 1.24μs -> 372ns (233% faster)
def test_column_large_text_with_various_newline_positions():
# Large text, newlines at random positions
text = ["a"] * 1000
for i in range(0, 1000, 100):
text[i] = "\n"
text = "".join(text)
pos = 555
# Find last newline before pos
last_newline = text.rindex('\n', 0, pos)
expected_column = pos - last_newline
err = ParseError(text, pos)
codeflash_output = err.column() # 647ns -> 804ns (19.5% slower)
Additional Edge Cases
def test_column_edge_pos_at_text_length():
# pos at exactly len(text)
text = "abc\ndef"
pos = len(text)
err = ParseError(text, pos)
# last newline before pos 7 is at 3, so column = 7-3 = 4
codeflash_output = err.column() # 783ns -> 738ns (6.10% faster)
def test_column_edge_pos_zero_text_with_newlines():
# pos at 0, text has newlines later
text = "a\nb\nc"
err = ParseError(text, 0)
codeflash_output = err.column() # 1.25μs -> 371ns (236% faster)
def test_column_edge_text_contains_only_one_char():
# text is a single character
text = "x"
err = ParseError(text, 0)
codeflash_output = err.column() # 1.09μs -> 363ns (202% faster)
def test_column_edge_text_contains_only_one_newline():
# text is a single newline
text = "\n"
err = ParseError(text, 0)
codeflash_output = err.column() # 1.07μs -> 357ns (199% faster)
def test_column_edge_pos_negative_large_text():
# negative pos in large text
text = "a" * 999
err = ParseError(text, -10)
codeflash_output = err.column() # 1.37μs -> 1.17μs (17.6% faster)
def test_column_edge_text_none_or_empty_string():
# text is empty string, pos > 0
err = ParseError("", 5)
codeflash_output = err.column() # 1.07μs -> 730ns (46.6% faster)
codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
#------------------------------------------------
import pytest # used for our unit tests
from ccxt.static_dependencies.parsimonious.exceptions import ParseError
function to test
from ccxt.static_dependencies.parsimonious.utils import StrAndRepr
unit tests
-------------------------
Basic Test Cases
-------------------------
def test_column_basic_no_newline():
# Text with no newlines, pos at index 0
err = ParseError("abcdef", 0)
codeflash_output = err.column() # 1.36μs -> 405ns (237% faster)
def test_column_basic_middle_no_newline():
# Text with no newlines, pos at index 3
err = ParseError("abcdef", 3)
codeflash_output = err.column() # 1.31μs -> 876ns (49.9% faster)
def test_column_basic_end_no_newline():
# Text with no newlines, pos at last index
err = ParseError("abcdef", 5)
codeflash_output = err.column() # 1.25μs -> 765ns (63.8% faster)
def test_column_basic_with_newline_start_of_line():
# Text with one newline, pos at start of second line
err = ParseError("abc\ndef", 4)
codeflash_output = err.column() # 809ns -> 744ns (8.74% faster)
def test_column_basic_with_newline_middle_of_line():
# Text with one newline, pos in middle of second line
err = ParseError("abc\ndef", 6)
codeflash_output = err.column() # 831ns -> 759ns (9.49% faster)
def test_column_basic_multiple_newlines():
# Text with multiple newlines, pos in third line
text = "a\nb\ncde"
err = ParseError(text, 6) # 'e' at index 6
codeflash_output = err.column() # 763ns -> 725ns (5.24% faster)
-------------------------
Edge Test Cases
-------------------------
def test_column_pos_zero():
# pos = 0, always column 1 regardless of text
err = ParseError("\nabcdef", 0)
codeflash_output = err.column() # 1.20μs -> 350ns (244% faster)
def test_column_pos_negative_one():
# pos = -1, should return 0 if no '\n' before
err = ParseError("abcdef", -1)
codeflash_output = err.column() # 1.28μs -> 920ns (39.0% faster)
def test_column_pos_negative_one_with_newline():
# pos = -1, text with newlines
text = "abc\ndef"
err = ParseError(text, -1)
# Last newline at index 3, so column = -1 - 3 = -4
codeflash_output = err.column() # 867ns -> 793ns (9.33% faster)
def test_column_newline_at_end():
# Text ends with newline, pos at last character (the newline)
text = "abc\n"
err = ParseError(text, 3) # 'c' at 2, '\n' at 3
codeflash_output = err.column() # 1.21μs -> 767ns (58.0% faster)
def test_column_newline_at_pos():
# pos points to a newline character
text = "abc\ndef\nghi"
err = ParseError(text, 7) # '\n' at index 7
# Last newline before index 7 is at index 6, column = 7 - 6 = 1
codeflash_output = err.column() # 848ns -> 709ns (19.6% faster)
def test_column_empty_text():
# Empty text, pos = 0
err = ParseError("", 0)
codeflash_output = err.column() # 1.21μs -> 333ns (262% faster)
def test_column_text_only_newlines():
# Text is just newlines
text = "\n\n\n"
for i in range(4):
err = ParseError(text, i)
# Should be column 1 for pos 0, 1, 2, 3
codeflash_output = err.column() # 2.19μs -> 1.56μs (40.5% faster)
def test_column_text_newline_at_start():
# Text starts with newline
text = "\nabc"
err = ParseError(text, 1) # 'a' at index 1
codeflash_output = err.column() # 729ns -> 720ns (1.25% faster)
def test_column_text_newline_at_end():
# Text ends with newline
text = "abc\n"
err = ParseError(text, 4) # Out of bounds, but should return 5
codeflash_output = err.column() # 695ns -> 711ns (2.25% slower)
def test_column_text_with_carriage_return():
# Text with carriage returns, should ignore them
text = "abc\r\ndef"
err = ParseError(text, 6) # 'e' at index 6
# Last '\n' at index 4, so column = 6 - 4 = 2
codeflash_output = err.column() # 760ns -> 693ns (9.67% faster)
def test_column_text_with_multiple_consecutive_newlines():
# Multiple consecutive newlines
text = "abc\n\n\nxyz"
err = ParseError(text, 7) # 'y' at index 7
# Last '\n' at index 5, so column = 7 - 5 = 2
codeflash_output = err.column() # 758ns -> 725ns (4.55% faster)
def test_column_pos_equals_text_length():
# pos equal to text length (out of bounds)
text = "abc\ndef"
err = ParseError(text, len(text))
# Last '\n' at index 3, so column = 7 - 3 = 4
codeflash_output = err.column() # 763ns -> 722ns (5.68% faster)
-------------------------
Large Scale Test Cases
-------------------------
def test_column_large_text_no_newlines():
# Large text, no newlines
text = "a" * 1000
err = ParseError(text, 999)
codeflash_output = err.column() # 1.40μs -> 979ns (43.2% faster)
def test_column_large_text_with_newlines_every_100():
# Large text with newlines every 100 chars
text = "".join("a" * 99 + "\n" for _ in range(10)) # 10 lines of 99 'a' + '\n'
# pos at 500th character
err = ParseError(text, 500)
# Find last newline before pos 500
last_newline = text.rindex('\n', 0, 500)
expected_column = 500 - last_newline
codeflash_output = err.column() # 601ns -> 754ns (20.3% slower)
def test_column_large_text_newline_at_end():
# Large text ending with newline
text = "a" * 999 + "\n"
err = ParseError(text, 999)
# Last newline at 999, so column = 999 - 999 = 0
codeflash_output = err.column() # 1.30μs -> 860ns (51.5% faster)
def test_column_large_text_pos_zero():
# Large text, pos at 0
text = "b" * 1000
err = ParseError(text, 0)
codeflash_output = err.column() # 1.20μs -> 345ns (249% faster)
def test_column_large_text_all_newlines():
# Large text, all newlines
text = "\n" * 999
for i in range(0, 1000, 100):
err = ParseError(text, i)
codeflash_output = err.column() # 3.72μs -> 3.21μs (15.8% faster)
def test_column_large_text_last_line():
# Large text, pos at last character after several newlines
text = "\n".join(["abc"] * 333) # 333 lines
pos = len(text) - 1
# Find last newline before pos
last_newline = text.rindex('\n', 0, pos)
expected_column = pos - last_newline
err = ParseError(text, pos)
codeflash_output = err.column() # 589ns -> 680ns (13.4% slower)
-------------------------
Mutation Testing Guards
-------------------------
def test_column_mutation_guard_off_by_one():
# If column() returns pos instead of pos+1 (when no newline), this fails
err = ParseError("abcdef", 2)
codeflash_output = err.column() # 1.19μs -> 753ns (58.3% faster)
def test_column_mutation_guard_wrong_newline_index():
# If column() uses text.find('\n') instead of rindex, this fails
text = "abc\ndef\nghi"
err = ParseError(text, 8) # 'h' at index 8
# Last newline before index 8 is at index 6, so column = 8 - 6 = 2
codeflash_output = err.column() # 744ns -> 696ns (6.90% faster)
def test_column_mutation_guard_one_based():
# If column() returns 0-based index, this fails
err = ParseError("abcdef", 0)
codeflash_output = err.column() # 1.20μs -> 357ns (236% faster)
def test_column_mutation_guard_empty_string():
# If column() returns 0 for empty string, this fails
err = ParseError("", 0)
codeflash_output = err.column() # 1.10μs -> 346ns (217% faster)
codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
To edit these changes
git checkout codeflash/optimize-ParseError.column-mhuzozbzand push.