@@ -1001,8 +1001,9 @@ def cell_activated(self, row_idx, column_idx):
10011001@adapter_for ('pathlib.Path' )
10021002def get_path_suffix_adapter (fpath ):
10031003 logger .debug (f"get_path_suffix_adapter('{ fpath } ')" )
1004- if fpath .suffix .lower () in PATH_SUFFIX_ADAPTERS :
1005- path_adapter_cls , required_module = PATH_SUFFIX_ADAPTERS [fpath .suffix ]
1004+ suffix = fpath .suffix .lower ()
1005+ if suffix in PATH_SUFFIX_ADAPTERS :
1006+ path_adapter_cls , required_module = PATH_SUFFIX_ADAPTERS [suffix ]
10061007 if required_module is not None :
10071008 if required_module not in sys .modules :
10081009 import importlib
@@ -1013,7 +1014,15 @@ def get_path_suffix_adapter(fpath):
10131014 f"which is required to handle { fpath .suffix } "
10141015 f"files" )
10151016 return None
1016- return path_adapter_cls
1017+ # 2 options:
1018+ # - either there is a single adapter for that suffix
1019+ if (isinstance (path_adapter_cls , type ) and
1020+ issubclass (path_adapter_cls , AbstractAdapter )):
1021+ return path_adapter_cls
1022+ # - different adapters handle that suffix and/or not all instances can
1023+ # be handled
1024+ else :
1025+ return path_adapter_cls (fpath )
10171026 elif fpath .is_dir ():
10181027 return DirectoryPathAdapter
10191028 else :
@@ -2771,8 +2780,12 @@ def _detect_encoding(self, chunk):
27712780 try :
27722781 import charset_normalizer
27732782 chartset_match = charset_normalizer .from_bytes (chunk ).best ()
2774- self ._encoding = chartset_match .encoding
2775- logger .debug (f"encoding detected as { self ._encoding } " )
2783+ if chartset_match is None :
2784+ self ._encoding = None
2785+ logger .debug ("could not detect encoding from chunk" )
2786+ else :
2787+ self ._encoding = chartset_match .encoding
2788+ logger .debug (f"encoding detected as { self ._encoding } " )
27762789 except ImportError :
27772790 logger .debug ("could not import 'charset_normalizer' => cannot detect encoding" )
27782791
@@ -3198,6 +3211,31 @@ def cell_activated(self, row_idx, column_idx):
31983211 # return self.data.open(info.filename)
31993212
32003213
3214+ class CSVGZPathAdapater (CsvFileAdapter ):
3215+ @classmethod
3216+ def open (cls , fpath ):
3217+ import gzip
3218+ # not specifying an encoding is not an option because in that case
3219+ # we would get bytes and not str, which makes csv reader unhappy
3220+ return gzip .open (fpath , mode = 'rt' , encoding = 'utf-8' )
3221+
3222+ @property
3223+ def _binary_file (self ):
3224+ import gzip
3225+ return gzip .open (self .data .name , mode = 'rb' )
3226+
3227+
3228+ @path_adapter_for ('.gz' , 'gzip' )
3229+ def dispatch_gzip_path_adapter (gz_path ):
3230+ # strip .gz extension and dispatch to appropriate adapter
3231+ fpath = gz_path .with_name (gz_path .stem )
3232+ suffix = fpath .suffix .lower ()
3233+ if suffix == '.csv' :
3234+ return CSVGZPathAdapater
3235+ else :
3236+ return None
3237+
3238+
32013239@path_adapter_for ('.zip' , 'zipfile' )
32023240class ZipPathAdapter (ZipFileAdapter ):
32033241 @classmethod
0 commit comments