Skip to content

Commit 05504cf

Browse files
committed
refactor: Extract macro for readability
1 parent 4fbac54 commit 05504cf

File tree

1 file changed

+65
-16
lines changed

1 file changed

+65
-16
lines changed

pandas/_libs/src/parser/tokenizer.c

Lines changed: 65 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1732,6 +1732,8 @@ double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,
17321732
return number;
17331733
}
17341734

1735+
/* Helper functions and macros for string consumption and buffer management */
1736+
17351737
static inline int str_consume_nspan(char **dst, size_t dst_sz, const char **src,
17361738
size_t src_sz, const char *charset) {
17371739
size_t span_sz = strspn(*src, charset);
@@ -1754,45 +1756,92 @@ static inline int str_consume_span(char **dst, size_t dst_sz, const char **src,
17541756
return str_consume_nspan(dst, dst_sz, src, SIZE_MAX, charset);
17551757
}
17561758

1759+
#define SKIP_SPAN(s, charset) str_consume_span(NULL, 0, &(s), (charset))
1760+
1761+
#define SKIP_NSPAN(s, n, charset) \
1762+
str_consume_nspan(NULL, 0, &(s), (n), (charset))
1763+
1764+
#define SAFE_CONSUME_SPAN(d, de, s, charset) \
1765+
do { \
1766+
size_t _remaining = (de) - (d); \
1767+
int _ret = str_consume_span(&(d), _remaining, &(s), (charset)); \
1768+
if (_ret < 0) \
1769+
return ERROR_OVERFLOW; \
1770+
} while (0)
1771+
1772+
#define SAFE_CONSUME_NSPAN(d, de, s, n, charset) \
1773+
do { \
1774+
size_t _remaining = (de) - (d); \
1775+
if (str_consume_nspan(&(d), _remaining, &(s), (n), (charset)) < 0) \
1776+
return ERROR_OVERFLOW; \
1777+
} while (0)
1778+
1779+
#define CHECK_BUFFER_SPACE(d, de) \
1780+
do { \
1781+
if ((d) >= (de)) \
1782+
return ERROR_OVERFLOW; \
1783+
} while (0)
1784+
17571785
/* copy a decimal number string with `decimal`, `tsep` as decimal point
17581786
and thousands separator to an equivalent c-locale decimal string (striping
17591787
`tsep`, replacing `decimal` with '.'). The result is written into `dst`
17601788
and null-terminated. */
17611789

17621790
static int _str_copy_decimal_str_c(char *dst, size_t dst_sz, const char *src,
17631791
char **endpos, char decimal, char tsep) {
1764-
const char *digits = "0123456789";
1765-
const char *exponents = "Ee";
1766-
const char *signs = "+-";
1767-
const char *whitespaces = " \t\n\v\f\r";
1792+
static const char *digits = "0123456789";
1793+
static const char *exponents = "Ee";
1794+
static const char *signs = "+-";
1795+
static const char *whitespaces = " \t\n\v\f\r";
1796+
17681797
const char decimals[] = {decimal, '\0'};
17691798
const char tseps[] = {tsep, '\0'};
17701799

17711800
const char *s = src;
17721801
char *d = dst;
1773-
char *de = dst + dst_sz;
1802+
const char *de = dst + dst_sz;
1803+
int ret;
1804+
1805+
if (endpos != NULL)
1806+
*endpos = (char *)s;
1807+
17741808
// Skip leading whitespace.
1775-
str_consume_span(NULL, 0, &s, whitespaces);
1776-
// Copy Leading sign
1777-
str_consume_nspan(&d, de - d, &s, 1, signs);
1809+
SKIP_SPAN(s, whitespaces);
1810+
1811+
// Copy leading sign (optional)
1812+
SAFE_CONSUME_NSPAN(d, de, s, 1, signs);
1813+
17781814
// Copy integer part dropping `tsep`
1779-
while (str_consume_span(&d, de - d, &s, digits)) {
1780-
str_consume_nspan(NULL, 0, &s, 1, tseps);
1815+
while ((ret = str_consume_span(&d, de - d, &s, digits))) {
1816+
if (ret < 0)
1817+
return ERROR_OVERFLOW;
1818+
SKIP_NSPAN(s, 1, tseps);
17811819
}
1820+
17821821
// Replace `decimal` with '.'
1783-
if (str_consume_nspan(NULL, 0, &s, 1, decimals)) {
1822+
if (SKIP_NSPAN(s, 1, decimals)) {
1823+
CHECK_BUFFER_SPACE(d, de);
17841824
*d++ = '.';
17851825
}
1826+
17861827
// Copy fractional part after decimal (if any)
1787-
str_consume_span(&d, de - d, &s, digits);
1828+
SAFE_CONSUME_SPAN(d, de, s, digits);
1829+
17881830
// Copy exponent if any
1789-
if (str_consume_nspan(&d, de - d, &s, 1, exponents)) {
1790-
str_consume_nspan(&d, de - d, &s, 1, signs);
1791-
str_consume_span(&d, de - d, &s, digits);
1831+
if ((ret = str_consume_nspan(&d, de - d, &s, 1, exponents)) > 0) {
1832+
SAFE_CONSUME_NSPAN(d, de, s, 1, signs);
1833+
SAFE_CONSUME_SPAN(d, de, s, digits);
1834+
} else if (ret < 0) {
1835+
return ERROR_OVERFLOW;
17921836
}
1793-
*d++ = '\0'; // terminate
1837+
1838+
// Terminate string
1839+
CHECK_BUFFER_SPACE(d, de);
1840+
*d++ = '\0';
1841+
17941842
if (endpos != NULL)
17951843
*endpos = (char *)s;
1844+
17961845
return 0;
17971846
}
17981847

0 commit comments

Comments
 (0)