Skip to content

Commit cf0a26d

Browse files
committed
fix: make check_for_invalid_char return status code
Additionally, don't permit trailing whitespace.
1 parent 448f944 commit cf0a26d

File tree

1 file changed

+31
-23
lines changed

1 file changed

+31
-23
lines changed

pandas/_libs/src/parser/tokenizer.c

Lines changed: 31 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1835,41 +1835,31 @@ int uint64_conflict(uint_state *self) {
18351835
}
18361836

18371837
/**
1838-
* @brief Validates that a string contains only numeric digits and optional
1839-
* trailing whitespace.
1838+
* @brief Validates that a string contains only numeric digits.
18401839
*
18411840
* This function is used after an integer overflow,
1842-
* where is checks the rest of the string for a non-numeric character,
1843-
* while also ignoring trailing white-space.
1841+
* where is checks the rest of the string for a non-numeric character.
18441842
*
18451843
* Pure integer overflows during CSV parsing are converted to PyLongObjects,
18461844
* while, if any invalid character is found, it skips integer
18471845
* parsing and tries other conversion methods.
18481846
*
18491847
* @param p_item Pointer to the string to validate for numeric format
1850-
* @param error Pointer to indicate error code.
1851-
* Set to ERROR_INVALID_CHARS if an invalid character is found.
18521848
*
1853-
* @return Pointer to the position in the string where validation stopped.
1854-
* - If valid: terminates at the null terminator.
1855-
* - If invalid: points to the first invalid character encountered.
1849+
* @return Integer 0 if the remainder of the string contains only digits,
1850+
* otherwise returns the error code for [ERROR_INVALID_CHARS].
18561851
*/
1857-
static inline const char *check_for_invalid_char(const char *p_item,
1858-
int *error) {
1852+
static inline int check_for_invalid_char(const char *p_item) {
18591853
while (*p_item != '\0' && isdigit_ascii(*p_item)) {
18601854
p_item++;
18611855
}
18621856

1863-
while (*p_item != '\0' && isspace_ascii(*p_item)) {
1864-
++p_item;
1865-
}
1866-
18671857
// check if reached the end of string after consuming all digits
18681858
if (*p_item != '\0') {
1869-
*error = ERROR_INVALID_CHARS;
1859+
return ERROR_INVALID_CHARS;
18701860
}
18711861

1872-
return p_item;
1862+
return 0;
18731863
}
18741864

18751865
int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
@@ -1917,7 +1907,10 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
19171907
d = *++p;
19181908
} else {
19191909
*error = ERROR_OVERFLOW;
1920-
check_for_invalid_char(p, error);
1910+
int status = check_for_invalid_char(p);
1911+
if (status != 0) {
1912+
*error = status;
1913+
}
19211914
return 0;
19221915
}
19231916
}
@@ -1929,7 +1922,10 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
19291922
d = *++p;
19301923
} else {
19311924
*error = ERROR_OVERFLOW;
1932-
check_for_invalid_char(p, error);
1925+
int status = check_for_invalid_char(p);
1926+
if (status != 0) {
1927+
*error = status;
1928+
}
19331929
return 0;
19341930
}
19351931
}
@@ -1957,7 +1953,10 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
19571953

19581954
} else {
19591955
*error = ERROR_OVERFLOW;
1960-
check_for_invalid_char(p, error);
1956+
int status = check_for_invalid_char(p);
1957+
if (status != 0) {
1958+
*error = status;
1959+
}
19611960
return 0;
19621961
}
19631962
}
@@ -1970,7 +1969,10 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
19701969

19711970
} else {
19721971
*error = ERROR_OVERFLOW;
1973-
check_for_invalid_char(p, error);
1972+
int status = check_for_invalid_char(p);
1973+
if (status != 0) {
1974+
*error = status;
1975+
}
19741976
return 0;
19751977
}
19761978
}
@@ -2039,7 +2041,10 @@ uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max,
20392041

20402042
} else {
20412043
*error = ERROR_OVERFLOW;
2042-
check_for_invalid_char(p, error);
2044+
int status = check_for_invalid_char(p);
2045+
if (status != 0) {
2046+
*error = status;
2047+
}
20432048
return 0;
20442049
}
20452050
}
@@ -2052,7 +2057,10 @@ uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max,
20522057

20532058
} else {
20542059
*error = ERROR_OVERFLOW;
2055-
check_for_invalid_char(p, error);
2060+
int status = check_for_invalid_char(p);
2061+
if (status != 0) {
2062+
*error = status;
2063+
}
20562064
return 0;
20572065
}
20582066
}

0 commit comments

Comments
 (0)