Skip to content

Commit 4ff07e3

Browse files
committed
fix: early return on overflow, but still check next chars
1 parent 5219386 commit 4ff07e3

File tree

1 file changed

+23
-32
lines changed

1 file changed

+23
-32
lines changed

pandas/_libs/src/parser/tokenizer.c

Lines changed: 23 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1834,6 +1834,17 @@ int uint64_conflict(uint_state *self) {
18341834
return self->seen_uint && (self->seen_sint || self->seen_null);
18351835
}
18361836

1837+
static inline void check_for_invalid_char(const char *p_item, int *error) {
1838+
while (*p_item != '\0' && isdigit_ascii(*p_item)) {
1839+
p_item++;
1840+
}
1841+
1842+
// check if reached the end of string after consuming all digits
1843+
if (*p_item != '\0') {
1844+
*error = ERROR_INVALID_CHARS;
1845+
}
1846+
}
1847+
18371848
int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
18381849
int *error, char tsep) {
18391850
const char *p = p_item;
@@ -1879,7 +1890,8 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
18791890
d = *++p;
18801891
} else {
18811892
*error = ERROR_OVERFLOW;
1882-
break;
1893+
check_for_invalid_char(p, error);
1894+
return 0;
18831895
}
18841896
}
18851897
} else {
@@ -1890,7 +1902,8 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
18901902
d = *++p;
18911903
} else {
18921904
*error = ERROR_OVERFLOW;
1893-
break;
1905+
check_for_invalid_char(p, error);
1906+
return 0;
18941907
}
18951908
}
18961909
}
@@ -1917,7 +1930,8 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
19171930

19181931
} else {
19191932
*error = ERROR_OVERFLOW;
1920-
break;
1933+
check_for_invalid_char(p, error);
1934+
return 0;
19211935
}
19221936
}
19231937
} else {
@@ -1929,25 +1943,13 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
19291943

19301944
} else {
19311945
*error = ERROR_OVERFLOW;
1932-
break;
1946+
check_for_invalid_char(p, error);
1947+
return 0;
19331948
}
19341949
}
19351950
}
19361951
}
19371952

1938-
if (*error == ERROR_OVERFLOW) {
1939-
// advance digits
1940-
while (*p != '\0' && isdigit_ascii(*p)) {
1941-
p++;
1942-
}
1943-
1944-
// check if is float
1945-
if (*p != '\0') {
1946-
*error = ERROR_INVALID_CHARS;
1947-
}
1948-
return 0;
1949-
}
1950-
19511953
// Skip trailing spaces.
19521954
while (isspace_ascii(*p)) {
19531955
++p;
@@ -2010,7 +2012,8 @@ uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max,
20102012

20112013
} else {
20122014
*error = ERROR_OVERFLOW;
2013-
break;
2015+
check_for_invalid_char(p, error);
2016+
return 0;
20142017
}
20152018
}
20162019
} else {
@@ -2022,24 +2025,12 @@ uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max,
20222025

20232026
} else {
20242027
*error = ERROR_OVERFLOW;
2025-
break;
2028+
check_for_invalid_char(p, error);
2029+
return 0;
20262030
}
20272031
}
20282032
}
20292033

2030-
if (*error == ERROR_OVERFLOW) {
2031-
// advance digits
2032-
while (*p != '\0' && isdigit_ascii(*p)) {
2033-
p++;
2034-
}
2035-
2036-
// check if is float
2037-
if (*p != '\0') {
2038-
*error = ERROR_INVALID_CHARS;
2039-
}
2040-
return 0;
2041-
}
2042-
20432034
// Skip trailing spaces.
20442035
while (isspace_ascii(*p)) {
20452036
++p;

0 commit comments

Comments
 (0)