fix: remove decimal_separator argument

Alvaro-Kothe · Alvaro-Kothe · commit 5219386cace9 · 2025-10-08T15:25:24.000-03:00
diff --git a/pandas/_libs/include/pandas/parser/pd_parser.h b/pandas/_libs/include/pandas/parser/pd_parser.h
@@ -37,8 +37,8 @@ typedef struct {
   int (*parser_trim_buffers)(parser_t *);
   int (*tokenize_all_rows)(parser_t *, const char *);
   int (*tokenize_nrows)(parser_t *, size_t, const char *);
-  int64_t (*str_to_int64)(const char *, char, int64_t, int64_t, int *, char);
-  uint64_t (*str_to_uint64)(uint_state *, const char *, char, int64_t, uint64_t,
+  int64_t (*str_to_int64)(const char *, int64_t, int64_t, int *, char);
+  uint64_t (*str_to_uint64)(uint_state *, const char *, int64_t, uint64_t,
                             int *, char);
   double (*xstrtod)(const char *, char **, char, char, char, int, int *, int *);
   double (*precise_xstrtod)(const char *, char **, char, char, char, int, int *,
@@ -87,14 +87,12 @@ static PandasParser_CAPI *PandasParserAPI = NULL;
   PandasParserAPI->tokenize_all_rows((self), (encoding_errors))
 #define tokenize_nrows(self, nrows, encoding_errors)                           \
   PandasParserAPI->tokenize_nrows((self), (nrows), (encoding_errors))
-#define str_to_int64(p_item, decimal_separator, int_min, int_max, error,       \
-                     t_sep)                                                    \
-  PandasParserAPI->str_to_int64((p_item), (decimal_separator), (int_min),      \
-                                (int_max), (error), (t_sep))
-#define str_to_uint64(state, p_item, decimal_separator, int_max, uint_max,     \
-                      error, t_sep)                                            \
-  PandasParserAPI->str_to_uint64((state), (p_item), (decimal_separator),       \
-                                 (int_max), (uint_max), (error), (t_sep))
+#define str_to_int64(p_item, int_min, int_max, error, t_sep)                   \
+  PandasParserAPI->str_to_int64((p_item), (int_min), (int_max), (error),       \
+                                (t_sep))
+#define str_to_uint64(state, p_item, int_max, uint_max, error, t_sep)          \
+  PandasParserAPI->str_to_uint64((state), (p_item), (int_max), (uint_max),     \
+                                 (error), (t_sep))
 #define xstrtod(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int)     \
   PandasParserAPI->xstrtod((p), (q), (decimal), (sci), (tsep),                 \
                            (skip_trailing), (error), (maybe_int))
diff --git a/pandas/_libs/include/pandas/parser/tokenizer.h b/pandas/_libs/include/pandas/parser/tokenizer.h
@@ -17,7 +17,6 @@ See LICENSE for the license
 #define ERROR_NO_DIGITS 1
 #define ERROR_OVERFLOW 2
 #define ERROR_INVALID_CHARS 3
-#define ERROR_IS_FLOAT 4
 
 #include <stdint.h>
 
@@ -209,11 +208,10 @@ void uint_state_init(uint_state *self);
 
 int uint64_conflict(uint_state *self);
 
-uint64_t str_to_uint64(uint_state *state, const char *p_item,
-                       char decimal_separator, int64_t int_max,
+uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max,
                        uint64_t uint_max, int *error, char tsep);
-int64_t str_to_int64(const char *p_item, char decimal_separator,
-                     int64_t int_min, int64_t int_max, int *error, char tsep);
+int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
+                     int *error, char tsep);
 double xstrtod(const char *p, char **q, char decimal, char sci, char tsep,
                int skip_trailing, int *error, int *maybe_int);
 double precise_xstrtod(const char *p, char **q, char decimal, char sci,
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -149,7 +149,7 @@ cdef extern from "pandas/parser/tokenizer.h":
         SKIP_LINE
         FINISHED
 
-    enum: ERROR_OVERFLOW, ERROR_IS_FLOAT
+    enum: ERROR_OVERFLOW, ERROR_INVALID_CHARS
 
     ctypedef enum BadLineHandleMethod:
         ERROR,
@@ -281,11 +281,10 @@ cdef extern from "pandas/parser/pd_parser.h":
     int tokenize_all_rows(parser_t *self, const char *encoding_errors) nogil
     int tokenize_nrows(parser_t *self, size_t nrows, const char *encoding_errors) nogil
 
-    int64_t str_to_int64(char *p_item, char decimal_separator, int64_t int_min,
+    int64_t str_to_int64(char *p_item, int64_t int_min,
                          int64_t int_max, int *error, char tsep) nogil
-    uint64_t str_to_uint64(uint_state *state, char *p_item, char decimal_separator,
-                           int64_t int_max, uint64_t uint_max,
-                           int *error, char tsep) nogil
+    uint64_t str_to_uint64(uint_state *state, char *p_item, int64_t int_max,
+                           uint64_t uint_max, int *error, char tsep) nogil
 
     double xstrtod(const char *p, char **q, char decimal,
                    char sci, char tsep, int skip_trailing,
@@ -1775,9 +1774,9 @@ cdef _try_uint64(parser_t *parser, int64_t col,
         if error == ERROR_OVERFLOW:
             # Can't get the word variable
             raise OverflowError("Overflow")
-        elif raise_on_float and error == ERROR_IS_FLOAT:
+        elif raise_on_float and error == ERROR_INVALID_CHARS:
             raise ValueError("Number is float")
-        elif not raise_on_float or error != ERROR_IS_FLOAT:
+        elif not raise_on_float or error != ERROR_INVALID_CHARS:
             return None
 
     if uint64_conflict(&state):
@@ -1811,14 +1810,14 @@ cdef int _try_uint64_nogil(parser_t *parser, int64_t col,
                 data[i] = 0
                 continue
 
-            data[i] = str_to_uint64(state, word, parser.decimal, INT64_MAX, UINT64_MAX,
+            data[i] = str_to_uint64(state, word, INT64_MAX, UINT64_MAX,
                                     &error, parser.thousands)
             if error != 0:
                 return error
     else:
         for i in range(lines):
             COLITER_NEXT(it, word)
-            data[i] = str_to_uint64(state, word, parser.decimal, INT64_MAX, UINT64_MAX,
+            data[i] = str_to_uint64(state, word, INT64_MAX, UINT64_MAX,
                                     &error, parser.thousands)
             if error != 0:
                 return error
@@ -1848,9 +1847,9 @@ cdef _try_int64(parser_t *parser, int64_t col,
         if error == ERROR_OVERFLOW:
             # Can't get the word variable
             raise OverflowError("Overflow")
-        elif raise_on_float and error == ERROR_IS_FLOAT:
+        elif raise_on_float and error == ERROR_INVALID_CHARS:
             raise ValueError("Number is float")
-        elif not raise_on_float or error != ERROR_IS_FLOAT:
+        elif not raise_on_float or error != ERROR_INVALID_CHARS:
             return None, None
 
     return result, na_count
@@ -1879,14 +1878,14 @@ cdef int _try_int64_nogil(parser_t *parser, int64_t col,
                 data[i] = NA
                 continue
 
-            data[i] = str_to_int64(word, parser.decimal, INT64_MIN, INT64_MAX,
+            data[i] = str_to_int64(word, INT64_MIN, INT64_MAX,
                                    &error, parser.thousands)
             if error != 0:
                 return error
     else:
         for i in range(lines):
             COLITER_NEXT(it, word)
-            data[i] = str_to_int64(word, parser.decimal, INT64_MIN, INT64_MAX,
+            data[i] = str_to_int64(word, INT64_MIN, INT64_MAX,
                                    &error, parser.thousands)
             if error != 0:
                 return error
diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c
@@ -1834,8 +1834,8 @@ int uint64_conflict(uint_state *self) {
   return self->seen_uint && (self->seen_sint || self->seen_null);
 }
 
-int64_t str_to_int64(const char *p_item, char decimal_separator,
-                     int64_t int_min, int64_t int_max, int *error, char tsep) {
+int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
+                     int *error, char tsep) {
   const char *p = p_item;
   // Skip leading spaces.
   while (isspace_ascii(*p)) {
@@ -1942,8 +1942,8 @@ int64_t str_to_int64(const char *p_item, char decimal_separator,
     }
 
     // check if is float
-    if (*p == decimal_separator || *p == 'e' || *p == 'E') {
-      *error = ERROR_IS_FLOAT;
+    if (*p != '\0') {
+      *error = ERROR_INVALID_CHARS;
     }
     return 0;
   }
@@ -1955,20 +1955,15 @@ int64_t str_to_int64(const char *p_item, char decimal_separator,
 
   // Did we use up all the characters?
   if (*p) {
-    if (*p == decimal_separator || *p == 'e' || *p == 'E') {
-      *error = ERROR_IS_FLOAT;
-    } else {
-      *error = ERROR_INVALID_CHARS;
-    }
+    *error = ERROR_INVALID_CHARS;
     return 0;
   }
 
   *error = 0;
   return number;
 }
 
-uint64_t str_to_uint64(uint_state *state, const char *p_item,
-                       char decimal_separator, int64_t int_max,
+uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max,
                        uint64_t uint_max, int *error, char tsep) {
   const char *p = p_item;
   // Skip leading spaces.
@@ -2039,8 +2034,8 @@ uint64_t str_to_uint64(uint_state *state, const char *p_item,
     }
 
     // check if is float
-    if (*p == decimal_separator || *p == 'e' || *p == 'E') {
-      *error = ERROR_IS_FLOAT;
+    if (*p != '\0') {
+      *error = ERROR_INVALID_CHARS;
     }
     return 0;
   }
@@ -2052,11 +2047,7 @@ uint64_t str_to_uint64(uint_state *state, const char *p_item,
 
   // Did we use up all the characters?
   if (*p) {
-    if (*p == decimal_separator || *p == 'e' || *p == 'E') {
-      *error = ERROR_IS_FLOAT;
-    } else {
-      *error = ERROR_INVALID_CHARS;
-    }
+    *error = ERROR_INVALID_CHARS;
     return 0;
   }