Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion pandas/_libs/src/parser/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -1510,8 +1510,15 @@ double xstrtod(const char *str, char **endptr, char decimal, char sci,
// Process string of digits.
num_digits = 0;
int n = 0;
// Prevent integer overflow by capping exponent value
// DBL_MAX_EXP is typically 1024, so we use a safe upper bound
const int MAX_EXPONENT_DIGITS = 4; // Allows up to 9999
while (isdigit_ascii(*p)) {
n = n * 10 + (*p - '0');
if (num_digits < MAX_EXPONENT_DIGITS) {
n = n * 10 + (*p - '0');
}
// Continue consuming digits even after cap to maintain correct parsing
// position
num_digits++;
p++;
}
Expand Down
41 changes: 41 additions & 0 deletions pandas/tests/io/parser/test_issue_63089.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""
Test for issue #63089 - read_csv segfault with large exponent
"""

import io

import pandas as pd


class TestIssue63089:
def test_large_exponent_no_segfault(self):
"""Test that extremely large exponents don't cause segfault."""
# This previously caused SIGSEGV due to integer overflow
# when parsing the exponent
result = pd.read_csv(
io.StringIO("""h
4e492493924924""")
)

# Should parse as infinity or large float, not crash
assert len(result) == 1
assert "h" in result.columns
# The value should be infinity since the exponent is way too large
import numpy as np

assert np.isinf(result["h"].iloc[0]) or result["h"].iloc[0] > 1e308

def test_various_large_exponents(self):
"""Test various edge cases with large exponents."""
test_cases = [
"1e999999999", # Very large positive exponent
"1e-999999999", # Very large negative exponent
"2.5e123456789", # Large exponent with decimal
]

for test_val in test_cases:
csv_data = f"col\n{test_val}"
result = pd.read_csv(io.StringIO(csv_data))
# Should not crash, result should be inf, 0, or valid float
assert len(result) == 1
assert not pd.isna(result["col"].iloc[0]) or True # Just don't crash
14 changes: 14 additions & 0 deletions reproduce_issue_63089.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import io

import pandas as pd

print("Testing issue #63089...")
try:
result = pd.read_csv(
io.StringIO("""h
4e492493924924""")
)
print("Success! Result:")
print(result)
except Exception as e:
print(f"Exception occurred: {type(e).__name__}: {e}")
Loading