pandas-dev
diff --git a/‎doc/source/whatsnew/v3.0.0.rst‎
Lines changed: 2 additions & 0 deletions b/‎doc/source/whatsnew/v3.0.0.rst‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎pandas/_libs/include/pandas/parser/pd_parser.h‎
Lines changed: 6 additions & 9 deletions b/‎pandas/_libs/include/pandas/parser/pd_parser.h‎
Lines changed: 6 additions & 9 deletions
diff --git a/‎pandas/_libs/include/pandas/parser/tokenizer.h‎
Lines changed: 3 additions & 4 deletions b/‎pandas/_libs/include/pandas/parser/tokenizer.h‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎pandas/_libs/parsers.pyx‎
Lines changed: 6 additions & 17 deletions b/‎pandas/_libs/parsers.pyx‎
Lines changed: 6 additions & 17 deletions
diff --git a/‎pandas/_libs/src/parser/tokenizer.c‎
Lines changed: 84 additions & 126 deletions b/‎pandas/_libs/src/parser/tokenizer.c‎
Lines changed: 84 additions & 126 deletions
@@ -655,6 +655,7 @@ Other API changes
   an empty ``RangeIndex`` or empty ``Index`` with object dtype when determining
   the dtype of the resulting Index (:issue:`60797`)
 - :class:`IncompatibleFrequency` now subclasses ``TypeError`` instead of ``ValueError``. As a result, joins with mismatched frequencies now cast to object like other non-comparable joins, and arithmetic with indexes with mismatched frequencies align (:issue:`55782`)
+- :class:`Series` "flex" methods like :meth:`Series.add` no longer allow passing a :class:`DataFrame` for ``other``; use the DataFrame reversed method instead (:issue:`46179`)
 - :meth:`CategoricalIndex.append` no longer attempts to cast different-dtype indexes to the caller's dtype (:issue:`41626`)
 - :meth:`ExtensionDtype.construct_array_type` is now a regular method instead of a ``classmethod`` (:issue:`58663`)
 - Comparison operations between :class:`Index` and :class:`Series` now consistently return :class:`Series` regardless of which object is on the left or right (:issue:`36759`)
@@ -874,6 +875,7 @@ Other Removals
 - Removed the ``method`` keyword in ``ExtensionArray.fillna``, implement ``ExtensionArray._pad_or_backfill`` instead (:issue:`53621`)
 - Removed the attribute ``dtypes`` from :class:`.DataFrameGroupBy` (:issue:`51997`)
 - Enforced deprecation of ``argmin``, ``argmax``, ``idxmin``, and ``idxmax`` returning a result when ``skipna=False`` and an NA value is encountered or all values are NA values; these operations will now raise in such cases (:issue:`33941`, :issue:`51276`)
+- Enforced deprecation of storage option "pyarrow_numpy" for :class:`StringDtype` (:issue:`60152`)
 - Removed specifying ``include_groups=True`` in :class:`.DataFrameGroupBy.apply` and :class:`.Resampler.apply` (:issue:`7155`)
 
 .. ---------------------------------------------------------------------------
 
@@ -37,9 +37,8 @@ typedef struct {
   int (*parser_trim_buffers)(parser_t *);
   int (*tokenize_all_rows)(parser_t *, const char *);
   int (*tokenize_nrows)(parser_t *, size_t, const char *);
-  int64_t (*str_to_int64)(const char *, int64_t, int64_t, int *, char);
-  uint64_t (*str_to_uint64)(uint_state *, const char *, int64_t, uint64_t,
-                            int *, char);
+  int64_t (*str_to_int64)(const char *, int *, char);
+  uint64_t (*str_to_uint64)(uint_state *, const char *, int *, char);
   double (*xstrtod)(const char *, char **, char, char, char, int, int *, int *);
   double (*precise_xstrtod)(const char *, char **, char, char, char, int, int *,
                             int *);
@@ -87,12 +86,10 @@ static PandasParser_CAPI *PandasParserAPI = NULL;
   PandasParserAPI->tokenize_all_rows((self), (encoding_errors))
 #define tokenize_nrows(self, nrows, encoding_errors)                           \
   PandasParserAPI->tokenize_nrows((self), (nrows), (encoding_errors))
-#define str_to_int64(p_item, int_min, int_max, error, t_sep)                   \
-  PandasParserAPI->str_to_int64((p_item), (int_min), (int_max), (error),       \
-                                (t_sep))
-#define str_to_uint64(state, p_item, int_max, uint_max, error, t_sep)          \
-  PandasParserAPI->str_to_uint64((state), (p_item), (int_max), (uint_max),     \
-                                 (error), (t_sep))
+#define str_to_int64(p_item, error, t_sep)                                     \
+  PandasParserAPI->str_to_int64((p_item), (error), (t_sep))
+#define str_to_uint64(state, p_item, error, t_sep)                             \
+  PandasParserAPI->str_to_uint64((state), (p_item), (error), (t_sep))
 #define xstrtod(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int)     \
   PandasParserAPI->xstrtod((p), (q), (decimal), (sci), (tsep),                 \
                            (skip_trailing), (error), (maybe_int))
 
@@ -208,10 +208,9 @@ void uint_state_init(uint_state *self);
 
 int uint64_conflict(uint_state *self);
 
-uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max,
-                       uint64_t uint_max, int *error, char tsep);
-int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
-                     int *error, char tsep);
+uint64_t str_to_uint64(uint_state *state, const char *p_item, int *error,
+                       char tsep);
+int64_t str_to_int64(const char *p_item, int *error, char tsep);
 double xstrtod(const char *p, char **q, char decimal, char sci, char tsep,
                int skip_trailing, int *error, int *maybe_int);
 double precise_xstrtod(const char *p, char **q, char decimal, char sci,
 
@@ -63,11 +63,6 @@ from numpy cimport (
 cnp.import_array()
 
 from pandas._libs cimport util
-from pandas._libs.util cimport (
-    INT64_MAX,
-    INT64_MIN,
-    UINT64_MAX,
-)
 
 from pandas._libs import lib
 
@@ -281,10 +276,8 @@ cdef extern from "pandas/parser/pd_parser.h":
     int tokenize_all_rows(parser_t *self, const char *encoding_errors) nogil
     int tokenize_nrows(parser_t *self, size_t nrows, const char *encoding_errors) nogil
 
-    int64_t str_to_int64(char *p_item, int64_t int_min,
-                         int64_t int_max, int *error, char tsep) nogil
-    uint64_t str_to_uint64(uint_state *state, char *p_item, int64_t int_max,
-                           uint64_t uint_max, int *error, char tsep) nogil
+    int64_t str_to_int64(char *p_item,  int *error, char tsep) nogil
+    uint64_t str_to_uint64(uint_state *state, char *p_item, int *error, char tsep) nogil
 
     double xstrtod(const char *p, char **q, char decimal,
                    char sci, char tsep, int skip_trailing,
@@ -1855,15 +1848,13 @@ cdef int _try_uint64_nogil(parser_t *parser, int64_t col,
                 data[i] = 0
                 continue
 
-            data[i] = str_to_uint64(state, word, INT64_MAX, UINT64_MAX,
-                                    &error, parser.thousands)
+            data[i] = str_to_uint64(state, word, &error, parser.thousands)
             if error != 0:
                 return error
     else:
         for i in range(lines):
             COLITER_NEXT(it, word)
-            data[i] = str_to_uint64(state, word, INT64_MAX, UINT64_MAX,
-                                    &error, parser.thousands)
+            data[i] = str_to_uint64(state, word, &error, parser.thousands)
             if error != 0:
                 return error
 
@@ -1920,15 +1911,13 @@ cdef int _try_int64_nogil(parser_t *parser, int64_t col,
                 data[i] = NA
                 continue
 
-            data[i] = str_to_int64(word, INT64_MIN, INT64_MAX,
-                                   &error, parser.thousands)
+            data[i] = str_to_int64(word, &error, parser.thousands)
             if error != 0:
                 return error
     else:
         for i in range(lines):
             COLITER_NEXT(it, word)
-            data[i] = str_to_int64(word, INT64_MIN, INT64_MAX,
-                                   &error, parser.thousands)
+            data[i] = str_to_int64(word, &error, parser.thousands)
             if error != 0:
                 return error
 
 
@@ -23,10 +23,15 @@ GitHub. See Python Software Foundation License and BSD licenses for these.
 #include <float.h>
 #include <math.h>
 #include <stdbool.h>
+#include <stdlib.h>
 
 #include "pandas/portable.h"
 #include "pandas/vendored/klib/khash.h" // for kh_int64_t, kh_destroy_int64
 
+// Arrow256 allows up to 76 decimal digits.
+// We rounded up to the next power of 2.
+#define PROCESSED_WORD_CAPACITY 128
+
 void coliter_setup(coliter_t *self, parser_t *parser, int64_t i,
                    int64_t start) {
   // column i, starting at 0
@@ -1834,114 +1839,86 @@ int uint64_conflict(uint_state *self) {
   return self->seen_uint && (self->seen_sint || self->seen_null);
 }
 
-int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
-                     int *error, char tsep) {
+/* Copy a string without `char_to_remove` into `output`.
+ */
+static int copy_string_without_char(char output[PROCESSED_WORD_CAPACITY],
+                                    const char *str, size_t str_len,
+                                    char char_to_remove) {
+  const char *left = str;
+  const char *end_ptr = str + str_len;
+  size_t bytes_written = 0;
+
+  while (left < end_ptr) {
+    const size_t remaining_bytes_to_read = end_ptr - left;
+    const char *right = memchr(left, char_to_remove, remaining_bytes_to_read);
+
+    if (!right) {
+      // If it doesn't find the char to remove, just copy until EOS.
+      right = end_ptr;
+    }
+
+    const size_t chunk_size = right - left;
+
+    if (chunk_size + bytes_written >= PROCESSED_WORD_CAPACITY) {
+      return -1;
+    }
+    memcpy(&output[bytes_written], left, chunk_size);
+    bytes_written += chunk_size;
+
+    left = right + 1;
+  }
+
+  output[bytes_written] = '\0';
+  return 0;
+}
+
+int64_t str_to_int64(const char *p_item, int *error, char tsep) {
   const char *p = p_item;
   // Skip leading spaces.
   while (isspace_ascii(*p)) {
     ++p;
   }
 
   // Handle sign.
-  const bool isneg = *p == '-' ? true : false;
+  const bool has_sign = *p == '-' || *p == '+';
   // Handle sign.
-  if (isneg || (*p == '+')) {
-    p++;
-  }
+  const char *digit_start = has_sign ? p + 1 : p;
 
   // Check that there is a first digit.
-  if (!isdigit_ascii(*p)) {
+  if (!isdigit_ascii(*digit_start)) {
     // Error...
     *error = ERROR_NO_DIGITS;
     return 0;
   }
 
-  int64_t number = 0;
-  if (isneg) {
-    // If number is greater than pre_min, at least one more digit
-    // can be processed without overflowing.
-    int dig_pre_min = -(int_min % 10);
-    int64_t pre_min = int_min / 10;
-
-    // Process the digits.
-    char d = *p;
-    if (tsep != '\0') {
-      while (1) {
-        if (d == tsep) {
-          d = *++p;
-          continue;
-        } else if (!isdigit_ascii(d)) {
-          break;
-        }
-        if ((number > pre_min) ||
-            ((number == pre_min) && (d - '0' <= dig_pre_min))) {
-          number = number * 10 - (d - '0');
-          d = *++p;
-        } else {
-          *error = ERROR_OVERFLOW;
-          return 0;
-        }
-      }
-    } else {
-      while (isdigit_ascii(d)) {
-        if ((number > pre_min) ||
-            ((number == pre_min) && (d - '0' <= dig_pre_min))) {
-          number = number * 10 - (d - '0');
-          d = *++p;
-        } else {
-          *error = ERROR_OVERFLOW;
-          return 0;
-        }
-      }
+  char buffer[PROCESSED_WORD_CAPACITY];
+  const size_t str_len = strlen(p);
+  if (tsep != '\0' && memchr(p, tsep, str_len) != NULL) {
+    const int status = copy_string_without_char(buffer, p, str_len, tsep);
+    if (status != 0) {
+      // Word is too big, probably will cause an overflow
+      *error = ERROR_OVERFLOW;
+      return 0;
     }
-  } else {
-    // If number is less than pre_max, at least one more digit
-    // can be processed without overflowing.
-    int64_t pre_max = int_max / 10;
-    int dig_pre_max = int_max % 10;
-
-    // Process the digits.
-    char d = *p;
-    if (tsep != '\0') {
-      while (1) {
-        if (d == tsep) {
-          d = *++p;
-          continue;
-        } else if (!isdigit_ascii(d)) {
-          break;
-        }
-        if ((number < pre_max) ||
-            ((number == pre_max) && (d - '0' <= dig_pre_max))) {
-          number = number * 10 + (d - '0');
-          d = *++p;
+    p = buffer;
+  }
 
-        } else {
-          *error = ERROR_OVERFLOW;
-          return 0;
-        }
-      }
-    } else {
-      while (isdigit_ascii(d)) {
-        if ((number < pre_max) ||
-            ((number == pre_max) && (d - '0' <= dig_pre_max))) {
-          number = number * 10 + (d - '0');
-          d = *++p;
+  char *endptr;
+  int64_t number = strtoll(p, &endptr, 10);
 
-        } else {
-          *error = ERROR_OVERFLOW;
-          return 0;
-        }
-      }
-    }
+  if (errno == ERANGE) {
+    *error = ERROR_OVERFLOW;
+    errno = 0;
+    return 0;
   }
 
   // Skip trailing spaces.
-  while (isspace_ascii(*p)) {
-    ++p;
+  while (isspace_ascii(*endptr)) {
+    ++endptr;
   }
 
   // Did we use up all the characters?
-  if (*p) {
+  if (*endptr) {
     *error = ERROR_INVALID_CHARS;
     return 0;
   }
@@ -1950,8 +1927,8 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
   return number;
 }
 
-uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max,
-                       uint64_t uint_max, int *error, char tsep) {
+uint64_t str_to_uint64(uint_state *state, const char *p_item, int *error,
+                       char tsep) {
   const char *p = p_item;
   // Skip leading spaces.
   while (isspace_ascii(*p)) {
@@ -1974,58 +1951,39 @@ uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max,
     return 0;
   }
 
-  // If number is less than pre_max, at least one more digit
-  // can be processed without overflowing.
-  //
-  // Process the digits.
-  uint64_t number = 0;
-  const uint64_t pre_max = uint_max / 10;
-  const uint64_t dig_pre_max = uint_max % 10;
-  char d = *p;
-  if (tsep != '\0') {
-    while (1) {
-      if (d == tsep) {
-        d = *++p;
-        continue;
-      } else if (!isdigit_ascii(d)) {
-        break;
-      }
-      if ((number < pre_max) ||
-          ((number == pre_max) && ((uint64_t)(d - '0') <= dig_pre_max))) {
-        number = number * 10 + (d - '0');
-        d = *++p;
-
-      } else {
-        *error = ERROR_OVERFLOW;
-        return 0;
-      }
+  char buffer[PROCESSED_WORD_CAPACITY];
+  const size_t str_len = strlen(p);
+  if (tsep != '\0' && memchr(p, tsep, str_len) != NULL) {
+    const int status = copy_string_without_char(buffer, p, str_len, tsep);
+    if (status != 0) {
+      // Word is too big, probably will cause an overflow
+      *error = ERROR_OVERFLOW;
+      return 0;
     }
-  } else {
-    while (isdigit_ascii(d)) {
-      if ((number < pre_max) ||
-          ((number == pre_max) && ((uint64_t)(d - '0') <= dig_pre_max))) {
-        number = number * 10 + (d - '0');
-        d = *++p;
+    p = buffer;
+  }
 
-      } else {
-        *error = ERROR_OVERFLOW;
-        return 0;
-      }
-    }
+  char *endptr;
+  uint64_t number = strtoull(p, &endptr, 10);
+
+  if (errno == ERANGE) {
+    *error = ERROR_OVERFLOW;
+    errno = 0;
+    return 0;
   }
 
   // Skip trailing spaces.
-  while (isspace_ascii(*p)) {
-    ++p;
+  while (isspace_ascii(*endptr)) {
+    ++endptr;
   }
 
   // Did we use up all the characters?
-  if (*p) {
+  if (*endptr) {
     *error = ERROR_INVALID_CHARS;
     return 0;
   }
 
-  if (number > (uint64_t)int_max) {
+  if (number > (uint64_t)INT64_MAX) {
     state->seen_uint = 1;
   }