Skip to content

Commit 5219386

Browse files
committed
fix: remove decimal_separator argument
1 parent 7e8033d commit 5219386

File tree

4 files changed

+32
-46
lines changed

4 files changed

+32
-46
lines changed

pandas/_libs/include/pandas/parser/pd_parser.h

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ typedef struct {
3737
int (*parser_trim_buffers)(parser_t *);
3838
int (*tokenize_all_rows)(parser_t *, const char *);
3939
int (*tokenize_nrows)(parser_t *, size_t, const char *);
40-
int64_t (*str_to_int64)(const char *, char, int64_t, int64_t, int *, char);
41-
uint64_t (*str_to_uint64)(uint_state *, const char *, char, int64_t, uint64_t,
40+
int64_t (*str_to_int64)(const char *, int64_t, int64_t, int *, char);
41+
uint64_t (*str_to_uint64)(uint_state *, const char *, int64_t, uint64_t,
4242
int *, char);
4343
double (*xstrtod)(const char *, char **, char, char, char, int, int *, int *);
4444
double (*precise_xstrtod)(const char *, char **, char, char, char, int, int *,
@@ -87,14 +87,12 @@ static PandasParser_CAPI *PandasParserAPI = NULL;
8787
PandasParserAPI->tokenize_all_rows((self), (encoding_errors))
8888
#define tokenize_nrows(self, nrows, encoding_errors) \
8989
PandasParserAPI->tokenize_nrows((self), (nrows), (encoding_errors))
90-
#define str_to_int64(p_item, decimal_separator, int_min, int_max, error, \
91-
t_sep) \
92-
PandasParserAPI->str_to_int64((p_item), (decimal_separator), (int_min), \
93-
(int_max), (error), (t_sep))
94-
#define str_to_uint64(state, p_item, decimal_separator, int_max, uint_max, \
95-
error, t_sep) \
96-
PandasParserAPI->str_to_uint64((state), (p_item), (decimal_separator), \
97-
(int_max), (uint_max), (error), (t_sep))
90+
#define str_to_int64(p_item, int_min, int_max, error, t_sep) \
91+
PandasParserAPI->str_to_int64((p_item), (int_min), (int_max), (error), \
92+
(t_sep))
93+
#define str_to_uint64(state, p_item, int_max, uint_max, error, t_sep) \
94+
PandasParserAPI->str_to_uint64((state), (p_item), (int_max), (uint_max), \
95+
(error), (t_sep))
9896
#define xstrtod(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int) \
9997
PandasParserAPI->xstrtod((p), (q), (decimal), (sci), (tsep), \
10098
(skip_trailing), (error), (maybe_int))

pandas/_libs/include/pandas/parser/tokenizer.h

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ See LICENSE for the license
1717
#define ERROR_NO_DIGITS 1
1818
#define ERROR_OVERFLOW 2
1919
#define ERROR_INVALID_CHARS 3
20-
#define ERROR_IS_FLOAT 4
2120

2221
#include <stdint.h>
2322

@@ -209,11 +208,10 @@ void uint_state_init(uint_state *self);
209208

210209
int uint64_conflict(uint_state *self);
211210

212-
uint64_t str_to_uint64(uint_state *state, const char *p_item,
213-
char decimal_separator, int64_t int_max,
211+
uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max,
214212
uint64_t uint_max, int *error, char tsep);
215-
int64_t str_to_int64(const char *p_item, char decimal_separator,
216-
int64_t int_min, int64_t int_max, int *error, char tsep);
213+
int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
214+
int *error, char tsep);
217215
double xstrtod(const char *p, char **q, char decimal, char sci, char tsep,
218216
int skip_trailing, int *error, int *maybe_int);
219217
double precise_xstrtod(const char *p, char **q, char decimal, char sci,

pandas/_libs/parsers.pyx

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ cdef extern from "pandas/parser/tokenizer.h":
149149
SKIP_LINE
150150
FINISHED
151151

152-
enum: ERROR_OVERFLOW, ERROR_IS_FLOAT
152+
enum: ERROR_OVERFLOW, ERROR_INVALID_CHARS
153153

154154
ctypedef enum BadLineHandleMethod:
155155
ERROR,
@@ -281,11 +281,10 @@ cdef extern from "pandas/parser/pd_parser.h":
281281
int tokenize_all_rows(parser_t *self, const char *encoding_errors) nogil
282282
int tokenize_nrows(parser_t *self, size_t nrows, const char *encoding_errors) nogil
283283

284-
int64_t str_to_int64(char *p_item, char decimal_separator, int64_t int_min,
284+
int64_t str_to_int64(char *p_item, int64_t int_min,
285285
int64_t int_max, int *error, char tsep) nogil
286-
uint64_t str_to_uint64(uint_state *state, char *p_item, char decimal_separator,
287-
int64_t int_max, uint64_t uint_max,
288-
int *error, char tsep) nogil
286+
uint64_t str_to_uint64(uint_state *state, char *p_item, int64_t int_max,
287+
uint64_t uint_max, int *error, char tsep) nogil
289288

290289
double xstrtod(const char *p, char **q, char decimal,
291290
char sci, char tsep, int skip_trailing,
@@ -1775,9 +1774,9 @@ cdef _try_uint64(parser_t *parser, int64_t col,
17751774
if error == ERROR_OVERFLOW:
17761775
# Can't get the word variable
17771776
raise OverflowError("Overflow")
1778-
elif raise_on_float and error == ERROR_IS_FLOAT:
1777+
elif raise_on_float and error == ERROR_INVALID_CHARS:
17791778
raise ValueError("Number is float")
1780-
elif not raise_on_float or error != ERROR_IS_FLOAT:
1779+
elif not raise_on_float or error != ERROR_INVALID_CHARS:
17811780
return None
17821781

17831782
if uint64_conflict(&state):
@@ -1811,14 +1810,14 @@ cdef int _try_uint64_nogil(parser_t *parser, int64_t col,
18111810
data[i] = 0
18121811
continue
18131812

1814-
data[i] = str_to_uint64(state, word, parser.decimal, INT64_MAX, UINT64_MAX,
1813+
data[i] = str_to_uint64(state, word, INT64_MAX, UINT64_MAX,
18151814
&error, parser.thousands)
18161815
if error != 0:
18171816
return error
18181817
else:
18191818
for i in range(lines):
18201819
COLITER_NEXT(it, word)
1821-
data[i] = str_to_uint64(state, word, parser.decimal, INT64_MAX, UINT64_MAX,
1820+
data[i] = str_to_uint64(state, word, INT64_MAX, UINT64_MAX,
18221821
&error, parser.thousands)
18231822
if error != 0:
18241823
return error
@@ -1848,9 +1847,9 @@ cdef _try_int64(parser_t *parser, int64_t col,
18481847
if error == ERROR_OVERFLOW:
18491848
# Can't get the word variable
18501849
raise OverflowError("Overflow")
1851-
elif raise_on_float and error == ERROR_IS_FLOAT:
1850+
elif raise_on_float and error == ERROR_INVALID_CHARS:
18521851
raise ValueError("Number is float")
1853-
elif not raise_on_float or error != ERROR_IS_FLOAT:
1852+
elif not raise_on_float or error != ERROR_INVALID_CHARS:
18541853
return None, None
18551854

18561855
return result, na_count
@@ -1879,14 +1878,14 @@ cdef int _try_int64_nogil(parser_t *parser, int64_t col,
18791878
data[i] = NA
18801879
continue
18811880

1882-
data[i] = str_to_int64(word, parser.decimal, INT64_MIN, INT64_MAX,
1881+
data[i] = str_to_int64(word, INT64_MIN, INT64_MAX,
18831882
&error, parser.thousands)
18841883
if error != 0:
18851884
return error
18861885
else:
18871886
for i in range(lines):
18881887
COLITER_NEXT(it, word)
1889-
data[i] = str_to_int64(word, parser.decimal, INT64_MIN, INT64_MAX,
1888+
data[i] = str_to_int64(word, INT64_MIN, INT64_MAX,
18901889
&error, parser.thousands)
18911890
if error != 0:
18921891
return error

pandas/_libs/src/parser/tokenizer.c

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1834,8 +1834,8 @@ int uint64_conflict(uint_state *self) {
18341834
return self->seen_uint && (self->seen_sint || self->seen_null);
18351835
}
18361836

1837-
int64_t str_to_int64(const char *p_item, char decimal_separator,
1838-
int64_t int_min, int64_t int_max, int *error, char tsep) {
1837+
int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
1838+
int *error, char tsep) {
18391839
const char *p = p_item;
18401840
// Skip leading spaces.
18411841
while (isspace_ascii(*p)) {
@@ -1942,8 +1942,8 @@ int64_t str_to_int64(const char *p_item, char decimal_separator,
19421942
}
19431943

19441944
// check if is float
1945-
if (*p == decimal_separator || *p == 'e' || *p == 'E') {
1946-
*error = ERROR_IS_FLOAT;
1945+
if (*p != '\0') {
1946+
*error = ERROR_INVALID_CHARS;
19471947
}
19481948
return 0;
19491949
}
@@ -1955,20 +1955,15 @@ int64_t str_to_int64(const char *p_item, char decimal_separator,
19551955

19561956
// Did we use up all the characters?
19571957
if (*p) {
1958-
if (*p == decimal_separator || *p == 'e' || *p == 'E') {
1959-
*error = ERROR_IS_FLOAT;
1960-
} else {
1961-
*error = ERROR_INVALID_CHARS;
1962-
}
1958+
*error = ERROR_INVALID_CHARS;
19631959
return 0;
19641960
}
19651961

19661962
*error = 0;
19671963
return number;
19681964
}
19691965

1970-
uint64_t str_to_uint64(uint_state *state, const char *p_item,
1971-
char decimal_separator, int64_t int_max,
1966+
uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max,
19721967
uint64_t uint_max, int *error, char tsep) {
19731968
const char *p = p_item;
19741969
// Skip leading spaces.
@@ -2039,8 +2034,8 @@ uint64_t str_to_uint64(uint_state *state, const char *p_item,
20392034
}
20402035

20412036
// check if is float
2042-
if (*p == decimal_separator || *p == 'e' || *p == 'E') {
2043-
*error = ERROR_IS_FLOAT;
2037+
if (*p != '\0') {
2038+
*error = ERROR_INVALID_CHARS;
20442039
}
20452040
return 0;
20462041
}
@@ -2052,11 +2047,7 @@ uint64_t str_to_uint64(uint_state *state, const char *p_item,
20522047

20532048
// Did we use up all the characters?
20542049
if (*p) {
2055-
if (*p == decimal_separator || *p == 'e' || *p == 'E') {
2056-
*error = ERROR_IS_FLOAT;
2057-
} else {
2058-
*error = ERROR_INVALID_CHARS;
2059-
}
2050+
*error = ERROR_INVALID_CHARS;
20602051
return 0;
20612052
}
20622053

0 commit comments

Comments
 (0)