Skip to content

Commit 35f075a

Browse files
committed
docs: document function to check for invalid character
1 parent 4c8d770 commit 35f075a

File tree

1 file changed

+24
-1
lines changed

1 file changed

+24
-1
lines changed

pandas/_libs/src/parser/tokenizer.c

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1834,7 +1834,28 @@ int uint64_conflict(uint_state *self) {
18341834
return self->seen_uint && (self->seen_sint || self->seen_null);
18351835
}
18361836

1837-
static inline void check_for_invalid_char(const char *p_item, int *error) {
1837+
/**
1838+
* @brief Validates that a string contains only numeric digits and optional
1839+
* trailing whitespace.
1840+
*
1841+
* This function is used after an integer overflow,
1842+
* where is checks the rest of the string for a non-numeric character,
1843+
* while also ignoring trailing white-space.
1844+
*
1845+
* Pure integer overflows during CSV parsing are converted to PyLongObjects,
1846+
* while, if any invalid character is found, it skips integer
1847+
* parsing and tries other conversion methods.
1848+
*
1849+
* @param p_item Pointer to the string to validate for numeric format
1850+
* @param error Pointer to indicate error code.
1851+
* Set to ERROR_INVALID_CHARS if an invalid character is found.
1852+
*
1853+
* @return Pointer to the position in the string where validation stopped.
1854+
* - If valid: terminates at the null terminator.
1855+
* - If invalid: points to the first invalid character encountered.
1856+
*/
1857+
static inline const char *check_for_invalid_char(const char *p_item,
1858+
int *error) {
18381859
while (*p_item != '\0' && isdigit_ascii(*p_item)) {
18391860
p_item++;
18401861
}
@@ -1847,6 +1868,8 @@ static inline void check_for_invalid_char(const char *p_item, int *error) {
18471868
if (*p_item != '\0') {
18481869
*error = ERROR_INVALID_CHARS;
18491870
}
1871+
1872+
return p_item;
18501873
}
18511874

18521875
int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,

0 commit comments

Comments
 (0)