|
1 | 1 | from __future__ import annotations |
2 | 2 |
|
3 | 3 | from typing import TYPE_CHECKING |
| 4 | +import warnings |
4 | 5 |
|
5 | 6 | from pandas._config import using_pyarrow_string_dtype |
6 | 7 |
|
7 | 8 | from pandas._libs import lib |
| 9 | +from pandas._libs.parsers import ParserWarning |
8 | 10 | from pandas.compat._optional import import_optional_dependency |
| 11 | +from pandas.util._exceptions import find_stack_level |
9 | 12 |
|
10 | 13 | from pandas.core.dtypes.inference import is_integer |
11 | 14 |
|
@@ -85,6 +88,29 @@ def _get_pyarrow_options(self) -> None: |
85 | 88 | and option_name |
86 | 89 | in ("delimiter", "quote_char", "escape_char", "ignore_empty_lines") |
87 | 90 | } |
| 91 | + |
| 92 | + if "on_bad_lines" in self.kwds: |
| 93 | + if callable(self.kwds["on_bad_lines"]): |
| 94 | + self.parse_options["invalid_row_handler"] = self.kwds["on_bad_lines"] |
| 95 | + elif self.kwds["on_bad_lines"] == ParserBase.BadLineHandleMethod.ERROR: |
| 96 | + self.parse_options[ |
| 97 | + "invalid_row_handler" |
| 98 | + ] = None # PyArrow raises an exception by default |
| 99 | + elif self.kwds["on_bad_lines"] == ParserBase.BadLineHandleMethod.WARN: |
| 100 | + |
| 101 | + def handle_warning(invalid_row): |
| 102 | + warnings.warn( |
| 103 | + f"Expected {invalid_row.expected_columns} columns, but found " |
| 104 | + f"{invalid_row.actual_columns}: {invalid_row.text}", |
| 105 | + ParserWarning, |
| 106 | + stacklevel=find_stack_level(), |
| 107 | + ) |
| 108 | + return "skip" |
| 109 | + |
| 110 | + self.parse_options["invalid_row_handler"] = handle_warning |
| 111 | + elif self.kwds["on_bad_lines"] == ParserBase.BadLineHandleMethod.SKIP: |
| 112 | + self.parse_options["invalid_row_handler"] = lambda _: "skip" |
| 113 | + |
88 | 114 | self.convert_options = { |
89 | 115 | option_name: option_value |
90 | 116 | for option_name, option_value in self.kwds.items() |
|
0 commit comments