@@ -140,20 +140,29 @@ def handle_warning(invalid_row) -> str:
140140 "encoding" : self .encoding ,
141141 }
142142
143- def _finalize_pandas_output (self , frame : DataFrame ) -> DataFrame :
144- """
145- Processes data read in based on kwargs.
143+ def _get_convert_options (self ):
144+ pyarrow_csv = import_optional_dependency ("pyarrow.csv" )
146145
147- Parameters
148- ----------
149- frame: DataFrame
150- The DataFrame to process.
146+ try :
147+ convert_options = pyarrow_csv .ConvertOptions (** self .convert_options )
148+ except TypeError as err :
149+ include = self .convert_options .get ("include_columns" , None )
150+ if include is not None :
151+ self ._validate_usecols (include )
151152
152- Returns
153- -------
154- DataFrame
155- The processed DataFrame.
156- """
153+ nulls = self .convert_options .get ("null_values" , set ())
154+ if not lib .is_list_like (nulls ) or not all (
155+ isinstance (x , str ) for x in nulls
156+ ):
157+ raise TypeError (
158+ "The 'pyarrow' engine requires all na_values to be strings"
159+ ) from err
160+
161+ raise
162+
163+ return convert_options
164+
165+ def _adjust_column_names (self , frame : DataFrame ) -> tuple [DataFrame , bool ]:
157166 num_cols = len (frame .columns )
158167 multi_index_named = True
159168 if self .header is None :
@@ -169,8 +178,9 @@ def _finalize_pandas_output(self, frame: DataFrame) -> DataFrame:
169178 self .names = columns_prefix + self .names
170179 multi_index_named = False
171180 frame .columns = self .names
181+ return frame , multi_index_named
172182
173- frame = self . _do_date_conversions ( frame . columns , frame )
183+ def _finalize_index ( self , frame : DataFrame , multi_index_named : bool ) -> DataFrame :
174184 if self .index_col is not None :
175185 index_to_set = self .index_col .copy ()
176186 for i , item in enumerate (self .index_col ):
@@ -196,6 +206,9 @@ def _finalize_pandas_output(self, frame: DataFrame) -> DataFrame:
196206 if self .header is None and not multi_index_named :
197207 frame .index .names = [None ] * len (frame .index .names )
198208
209+ return frame
210+
211+ def _finalize_dtype (self , frame : DataFrame ) -> DataFrame :
199212 if self .dtype is not None :
200213 # Ignore non-existent columns from dtype mapping
201214 # like other parsers do
@@ -214,6 +227,26 @@ def _finalize_pandas_output(self, frame: DataFrame) -> DataFrame:
214227 raise ValueError (str (err )) from err
215228 return frame
216229
230+ def _finalize_pandas_output (self , frame : DataFrame ) -> DataFrame :
231+ """
232+ Processes data read in based on kwargs.
233+
234+ Parameters
235+ ----------
236+ frame: DataFrame
237+ The DataFrame to process.
238+
239+ Returns
240+ -------
241+ DataFrame
242+ The processed DataFrame.
243+ """
244+ frame , multi_index_named = self ._adjust_column_names (frame )
245+ frame = self ._do_date_conversions (frame .columns , frame )
246+ frame = self ._finalize_index (frame , multi_index_named )
247+ frame = self ._finalize_dtype (frame )
248+ return frame
249+
217250 def _validate_usecols (self , usecols ) -> None :
218251 if lib .is_list_like (usecols ) and not all (isinstance (x , str ) for x in usecols ):
219252 raise ValueError (
@@ -239,23 +272,7 @@ def read(self) -> DataFrame:
239272 pa = import_optional_dependency ("pyarrow" )
240273 pyarrow_csv = import_optional_dependency ("pyarrow.csv" )
241274 self ._get_pyarrow_options ()
242-
243- try :
244- convert_options = pyarrow_csv .ConvertOptions (** self .convert_options )
245- except TypeError as err :
246- include = self .convert_options .get ("include_columns" , None )
247- if include is not None :
248- self ._validate_usecols (include )
249-
250- nulls = self .convert_options .get ("null_values" , set ())
251- if not lib .is_list_like (nulls ) or not all (
252- isinstance (x , str ) for x in nulls
253- ):
254- raise TypeError (
255- "The 'pyarrow' engine requires all na_values to be strings"
256- ) from err
257-
258- raise
275+ convert_options = self ._get_convert_options ()
259276
260277 try :
261278 table = pyarrow_csv .read_csv (
0 commit comments