@@ -93,8 +93,11 @@ def wrapper(*args, **kwargs):
9393@pyarrow_cpu_count_equal_numba_num_treads
9494def do_read_csv (filepath_or_buffer , sep , delimiter , names , usecols , dtype , skiprows , parse_dates ):
9595
96+ if delimiter is None :
97+ delimiter = sep
98+
9699 pa_options = get_pyarrow_read_csv_options (
97- sep , delimiter , names , usecols , dtype , skiprows , parse_dates )
100+ delimiter , names , usecols , dtype , skiprows , parse_dates )
98101
99102 table = csv .read_csv (
100103 filepath_or_buffer ,
@@ -107,11 +110,11 @@ def do_read_csv(filepath_or_buffer, sep, delimiter, names, usecols, dtype, skipr
107110
108111
109112def csv_reader_infer_nb_arrow_type (
110- filepath_or_buffer , sep , delimiter , names , usecols , dtype , skiprows , parse_dates
113+ filepath_or_buffer , delimiter = ',' , names = None , usecols = None , dtype = None , skiprows = None , parse_dates = False
111114):
112115
113116 read_opts , parse_opts , convert_opts = get_pyarrow_read_csv_options (
114- sep , delimiter , names , usecols , dtype , skiprows , parse_dates )
117+ delimiter , names , usecols , dtype , skiprows , parse_dates )
115118 csv_reader = csv .open_csv (filepath_or_buffer ,
116119 read_options = read_opts ,
117120 parse_options = parse_opts ,
@@ -138,13 +141,13 @@ def csv_reader_infer_nb_arrow_type(
138141
139142
140143def csv_reader_infer_nb_pandas_type (
141- filepath_or_buffer , sep , delimiter , names , usecols , dtype , skiprows , parse_dates
144+ filepath_or_buffer , delimiter = ',' , names = None , usecols = None , dtype = None , skiprows = None , parse_dates = False
142145):
143146
144147 # infer column types from the first block (similarly as Arrow does this)
145148 # TO-DO: tune the block size or allow user configure it via env var
146149 rows_to_read = 1000
147- df = pd .read_csv (filepath_or_buffer , sep = sep , delimiter = delimiter , names = names ,
150+ df = pd .read_csv (filepath_or_buffer , delimiter = delimiter , names = names ,
148151 usecols = usecols , dtype = dtype , skiprows = skiprows , nrows = rows_to_read ,
149152 parse_dates = parse_dates )
150153
@@ -185,10 +188,7 @@ def csv_reader_get_pyarrow_read_options(names, skiprows):
185188 return read_options
186189
187190
188- def csv_reader_get_pyarrow_parse_options (delimiter , sep ):
189-
190- if delimiter is None :
191- delimiter = sep
191+ def csv_reader_get_pyarrow_parse_options (delimiter ):
192192
193193 parse_options = csv .ParseOptions (
194194 delimiter = delimiter ,
@@ -264,11 +264,11 @@ def csv_reader_get_pyarrow_convert_options(names, usecols, dtype, parse_dates):
264264 return convert_options
265265
266266
267- def get_pyarrow_read_csv_options (sep , delimiter , names , usecols , dtype , skiprows , parse_dates ):
267+ def get_pyarrow_read_csv_options (delimiter , names , usecols , dtype , skiprows , parse_dates ):
268268 """ This function attempts to map pandas read_csv parameters to pyarrow read_csv options to be used """
269269
270270 read_opts = csv_reader_get_pyarrow_read_options (names , skiprows )
271- parse_opts = csv_reader_get_pyarrow_parse_options (delimiter , sep )
271+ parse_opts = csv_reader_get_pyarrow_parse_options (delimiter )
272272 convert_opts = csv_reader_get_pyarrow_convert_options (names , usecols , dtype , parse_dates )
273273
274274 return (read_opts , parse_opts , convert_opts )
0 commit comments