99from copy import deepcopy
1010from functools import cache
1111from os import path
12- from typing import Any , Dict , Iterable , List , Mapping , MutableMapping , Optional , Set , Tuple , Union
12+ from typing import (
13+ Any ,
14+ Dict ,
15+ Iterable ,
16+ List ,
17+ Mapping ,
18+ MutableMapping ,
19+ NoReturn ,
20+ Optional ,
21+ Set ,
22+ Tuple ,
23+ Union ,
24+ )
1325
1426from airbyte_cdk .models import AirbyteLogMessage , AirbyteMessage , AirbyteStream , FailureType , Level
1527from airbyte_cdk .models import Type as MessageType
1628from airbyte_cdk .sources .file_based .config .file_based_stream_config import PrimaryKeyType
1729from airbyte_cdk .sources .file_based .exceptions import (
1830 DuplicatedFilesError ,
31+ EmptyFileSchemaInferenceError ,
1932 FileBasedSourceError ,
2033 InvalidSchemaError ,
2134 MissingSchemaError ,
@@ -230,7 +243,7 @@ def cursor_field(self) -> Union[str, List[str]]:
230243 return self .ab_last_mod_col
231244
232245 @cache
233- def get_json_schema (self ) -> JsonSchema :
246+ def get_json_schema (self ) -> JsonSchema : # type: ignore
234247 if self .use_file_transfer :
235248 return file_transfer_schema
236249 extra_fields = {
@@ -246,12 +259,12 @@ def get_json_schema(self) -> JsonSchema:
246259 exception = AirbyteTracedException (exception = config_exception ),
247260 failure_type = FailureType .config_error ,
248261 )
262+ except EmptyFileSchemaInferenceError as exc :
263+ self ._raise_schema_inference_error (exc )
249264 except AirbyteTracedException as ate :
250265 raise ate
251266 except Exception as exc :
252- raise SchemaInferenceError (
253- FileBasedSourceError .SCHEMA_INFERENCE_ERROR , stream = self .name
254- ) from exc
267+ self ._raise_schema_inference_error (exc )
255268 else :
256269 return {"type" : "object" , "properties" : {** extra_fields , ** schema ["properties" ]}}
257270
@@ -380,17 +393,24 @@ async def _infer_schema(self, files: List[RemoteFile]) -> Mapping[str, Any]:
380393
381394 return base_schema
382395
383- async def _infer_file_schema (self , file : RemoteFile ) -> SchemaType :
396+ async def _infer_file_schema (self , file : RemoteFile ) -> SchemaType : # type: ignore
384397 try :
385398 return await self .get_parser ().infer_schema (
386399 self .config , file , self .stream_reader , self .logger
387400 )
401+ except EmptyFileSchemaInferenceError as exc :
402+ self ._raise_schema_inference_error (exc , file )
388403 except AirbyteTracedException as ate :
389404 raise ate
390405 except Exception as exc :
391- raise SchemaInferenceError (
392- FileBasedSourceError .SCHEMA_INFERENCE_ERROR ,
393- file = file .uri ,
394- format = str (self .config .format ),
395- stream = self .name ,
396- ) from exc
406+ self ._raise_schema_inference_error (exc , file )
407+
408+ def _raise_schema_inference_error (
409+ self , exc : Exception , file : Optional [RemoteFile ] = None
410+ ) -> NoReturn :
411+ raise SchemaInferenceError (
412+ FileBasedSourceError .SCHEMA_INFERENCE_ERROR ,
413+ file = file .uri if file else None ,
414+ format = str (self .config .format ) if self .config .format else None ,
415+ stream = self .name ,
416+ ) from exc
0 commit comments