From 17c836f601205c5bf65b8e4acb21d644fc1116f2 Mon Sep 17 00:00:00 2001 From: raushan Date: Fri, 7 Nov 2025 10:50:04 +0100 Subject: [PATCH 1/5] fix the helper fn for new processor config format --- .../models/auto/feature_extraction_auto.py | 45 +++++++++++------ .../models/auto/image_processing_auto.py | 42 ++++++++++------ .../models/auto/video_processing_auto.py | 39 ++++++++++----- tests/models/auto/test_processor_auto.py | 49 +++++++++++++++++++ 4 files changed, 131 insertions(+), 44 deletions(-) diff --git a/src/transformers/models/auto/feature_extraction_auto.py b/src/transformers/models/auto/feature_extraction_auto.py index 22bc20728aad..475c11a63965 100644 --- a/src/transformers/models/auto/feature_extraction_auto.py +++ b/src/transformers/models/auto/feature_extraction_auto.py @@ -24,7 +24,7 @@ from ...configuration_utils import PreTrainedConfig from ...dynamic_module_utils import get_class_from_dynamic_module, resolve_trust_remote_code from ...feature_extraction_utils import FeatureExtractionMixin -from ...utils import CONFIG_NAME, FEATURE_EXTRACTOR_NAME, cached_file, logging +from ...utils import CONFIG_NAME, FEATURE_EXTRACTOR_NAME, PROCESSOR_NAME, cached_file, logging from .auto_factory import _LazyAutoMapping from .configuration_auto import ( CONFIG_MAPPING_NAMES, @@ -167,27 +167,40 @@ def get_feature_extractor_config( feature_extractor.save_pretrained("feature-extractor-test") feature_extractor_config = get_feature_extractor_config("feature-extractor-test") ```""" - resolved_config_file = cached_file( - pretrained_model_name_or_path, - FEATURE_EXTRACTOR_NAME, - cache_dir=cache_dir, - force_download=force_download, - proxies=proxies, - token=token, - revision=revision, - local_files_only=local_files_only, - _raise_exceptions_for_gated_repo=False, - _raise_exceptions_for_missing_entries=False, - _raise_exceptions_for_connection_errors=False, - ) - if resolved_config_file is None: + resolved_config_files = [ + resolved_file + for filename in [FEATURE_EXTRACTOR_NAME, PROCESSOR_NAME] + if ( + resolved_file := cached_file( + pretrained_model_name_or_path, + filename=filename, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + local_files_only=local_files_only, + _raise_exceptions_for_gated_repo=False, + _raise_exceptions_for_missing_entries=False, + _raise_exceptions_for_connection_errors=False, + ) + ) + is not None + ] + if resolved_config_files is None: logger.info( "Could not locate the feature extractor configuration file, will try to use the model config instead." ) return {} + resolved_config_file = resolved_config_files[0] with open(resolved_config_file, encoding="utf-8") as reader: - return json.load(reader) + feature_extractor_dict = json.load(reader) + if "audio_processor" in feature_extractor_dict: + feature_extractor_dict = feature_extractor_dict["audio_processor"] + else: + feature_extractor_dict = feature_extractor_dict.get("feature_extractor", feature_extractor_dict) + return feature_extractor_dict class AutoFeatureExtractor: diff --git a/src/transformers/models/auto/image_processing_auto.py b/src/transformers/models/auto/image_processing_auto.py index 0bd8cc850e2c..79f3114b45d1 100644 --- a/src/transformers/models/auto/image_processing_auto.py +++ b/src/transformers/models/auto/image_processing_auto.py @@ -29,6 +29,7 @@ from ...utils import ( CONFIG_NAME, IMAGE_PROCESSOR_NAME, + PROCESSOR_NAME, cached_file, is_timm_config_dict, is_timm_local_checkpoint, @@ -305,27 +306,38 @@ def get_image_processor_config( image_processor.save_pretrained("image-processor-test") image_processor_config = get_image_processor_config("image-processor-test") ```""" - resolved_config_file = cached_file( - pretrained_model_name_or_path, - IMAGE_PROCESSOR_NAME, - cache_dir=cache_dir, - force_download=force_download, - proxies=proxies, - token=token, - revision=revision, - local_files_only=local_files_only, - _raise_exceptions_for_gated_repo=False, - _raise_exceptions_for_missing_entries=False, - _raise_exceptions_for_connection_errors=False, - ) - if resolved_config_file is None: + resolved_config_files = [ + resolved_file + for filename in [IMAGE_PROCESSOR_NAME, PROCESSOR_NAME] + if ( + resolved_file := cached_file( + pretrained_model_name_or_path, + filename=filename, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + local_files_only=local_files_only, + _raise_exceptions_for_gated_repo=False, + _raise_exceptions_for_missing_entries=False, + _raise_exceptions_for_connection_errors=False, + ) + ) + is not None + ] + # An empty list if none of the possible files is found in the repo + if not resolved_config_files: logger.info( "Could not locate the image processor configuration file, will try to use the model config instead." ) return {} + resolved_config_file = resolved_config_files[0] with open(resolved_config_file, encoding="utf-8") as reader: - return json.load(reader) + image_processor_dict = json.load(reader) + image_processor_dict = image_processor_dict.get("image_processor", image_processor_dict) + return image_processor_dict def _warning_fast_image_processor_available(fast_class): diff --git a/src/transformers/models/auto/video_processing_auto.py b/src/transformers/models/auto/video_processing_auto.py index bcac454b2d65..4c1987de35f8 100644 --- a/src/transformers/models/auto/video_processing_auto.py +++ b/src/transformers/models/auto/video_processing_auto.py @@ -23,7 +23,7 @@ # Build the list of all video processors from ...configuration_utils import PreTrainedConfig from ...dynamic_module_utils import get_class_from_dynamic_module, resolve_trust_remote_code -from ...utils import CONFIG_NAME, VIDEO_PROCESSOR_NAME, cached_file, is_torchvision_available, logging +from ...utils import CONFIG_NAME, PROCESSOR_NAME, VIDEO_PROCESSOR_NAME, cached_file, is_torchvision_available, logging from ...utils.import_utils import requires from ...video_processing_utils import BaseVideoProcessor from .auto_factory import _LazyAutoMapping @@ -167,24 +167,37 @@ def get_video_processor_config( video_processor.save_pretrained("video-processor-test") video_processor = get_video_processor_config("video-processor-test") ```""" - resolved_config_file = cached_file( - pretrained_model_name_or_path, - VIDEO_PROCESSOR_NAME, - cache_dir=cache_dir, - force_download=force_download, - proxies=proxies, - token=token, - revision=revision, - local_files_only=local_files_only, - ) - if resolved_config_file is None: + resolved_config_files = [ + resolved_file + for filename in [VIDEO_PROCESSOR_NAME, PROCESSOR_NAME] + if ( + resolved_file := cached_file( + pretrained_model_name_or_path, + filename=filename, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + local_files_only=local_files_only, + _raise_exceptions_for_gated_repo=False, + _raise_exceptions_for_missing_entries=False, + _raise_exceptions_for_connection_errors=False, + ) + ) + is not None + ] + if resolved_config_files is None: logger.info( "Could not locate the video processor configuration file, will try to use the model config instead." ) return {} + resolved_config_file = resolved_config_files[0] with open(resolved_config_file, encoding="utf-8") as reader: - return json.load(reader) + video_processor_dict = json.load(reader) + video_processor_dict = video_processor_dict.get("video_processor", video_processor_dict) + return video_processor_dict @requires(backends=("vision", "torchvision")) diff --git a/tests/models/auto/test_processor_auto.py b/tests/models/auto/test_processor_auto.py index 5ce4e9a3867b..a8d8d137b7ca 100644 --- a/tests/models/auto/test_processor_auto.py +++ b/tests/models/auto/test_processor_auto.py @@ -33,8 +33,12 @@ AutoFeatureExtractor, AutoProcessor, AutoTokenizer, + BaseVideoProcessor, BertTokenizer, + FeatureExtractionMixin, + ImageProcessingMixin, LlamaTokenizer, + LlavaOnevisionVideoProcessor, LlavaProcessor, ProcessorMixin, SiglipImageProcessor, @@ -42,6 +46,9 @@ Wav2Vec2FeatureExtractor, Wav2Vec2Processor, ) +from transformers.models.auto.feature_extraction_auto import get_feature_extractor_config +from transformers.models.auto.image_processing_auto import get_image_processor_config +from transformers.models.auto.video_processing_auto import get_video_processor_config from transformers.testing_utils import TOKEN, TemporaryHubRepo, get_tests_dir, is_staging_test from transformers.tokenization_utils import TOKENIZER_CONFIG_FILE from transformers.utils import ( @@ -107,6 +114,48 @@ def test_processor_from_local_directory_from_extractor_config(self): self.assertIsInstance(processor, Wav2Vec2Processor) + def test_subcomponent_get_config_dict__saved_as_nested_config(self): + """ + Tests that we can get config dict of a subcomponents of a processor, + even if they were saved as nested dict in `processor_config.json` + """ + # Test feature extractor first + with tempfile.TemporaryDirectory() as tmpdirname: + processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base-960h") + processor.save_pretrained(tmpdirname) + + config_dict_1 = get_feature_extractor_config(tmpdirname) + feature_extractor_1 = Wav2Vec2FeatureExtractor(**config_dict_1) + self.assertIsInstance(feature_extractor_1, Wav2Vec2FeatureExtractor) + + config_dict_2, _ = FeatureExtractionMixin.get_feature_extractor_dict(tmpdirname) + feature_extractor_2 = Wav2Vec2FeatureExtractor(**config_dict_2) + self.assertIsInstance(feature_extractor_2, Wav2Vec2FeatureExtractor) + self.assertEqual(config_dict_1, config_dict_2) + + # Test image and video processors next + with tempfile.TemporaryDirectory() as tmpdirname: + processor = AutoProcessor.from_pretrained("llava-hf/llava-onevision-qwen2-0.5b-ov-hf") + processor.save_pretrained(tmpdirname) + + config_dict_1 = get_image_processor_config(tmpdirname) + image_processor_1 = SiglipImageProcessor(**config_dict_1) + self.assertIsInstance(image_processor_1, SiglipImageProcessor) + + config_dict_2, _ = ImageProcessingMixin.get_image_processor_dict(tmpdirname) + image_processor_2 = SiglipImageProcessor(**config_dict_2) + self.assertIsInstance(image_processor_2, SiglipImageProcessor) + self.assertEqual(config_dict_1, config_dict_2) + + config_dict_1 = get_video_processor_config(tmpdirname) + video_processor_1 = LlavaOnevisionVideoProcessor(**config_dict_1) + self.assertIsInstance(video_processor_1, LlavaOnevisionVideoProcessor) + + config_dict_2, _ = BaseVideoProcessor.get_video_processor_dict(tmpdirname) + video_processor_2 = LlavaOnevisionVideoProcessor(**config_dict_2) + self.assertIsInstance(video_processor_2, LlavaOnevisionVideoProcessor) + self.assertEqual(config_dict_1, config_dict_2) + def test_processor_from_processor_class(self): with tempfile.TemporaryDirectory() as tmpdirname: feature_extractor = Wav2Vec2FeatureExtractor() From 68bebfbcf5482b705a6714fcd59eebb523c5d5f7 Mon Sep 17 00:00:00 2001 From: raushan Date: Fri, 7 Nov 2025 15:37:02 +0100 Subject: [PATCH 2/5] change the priority order --- src/transformers/feature_extraction_utils.py | 2 +- src/transformers/image_processing_base.py | 2 +- .../models/auto/feature_extraction_auto.py | 3 ++- .../models/auto/image_processing_auto.py | 3 ++- .../models/auto/video_processing_auto.py | 13 +++++++++++-- src/transformers/video_processing_utils.py | 4 ++-- tests/models/auto/test_processor_auto.py | 2 +- 7 files changed, 20 insertions(+), 9 deletions(-) diff --git a/src/transformers/feature_extraction_utils.py b/src/transformers/feature_extraction_utils.py index 4c9db36b020b..fa0b099bd6e0 100644 --- a/src/transformers/feature_extraction_utils.py +++ b/src/transformers/feature_extraction_utils.py @@ -437,7 +437,7 @@ def get_feature_extractor_dict( # Load from local folder or from cache or download from model Hub and cache resolved_feature_extractor_files = [ resolved_file - for filename in [feature_extractor_file, PROCESSOR_NAME] + for filename in [PROCESSOR_NAME, feature_extractor_file] if ( resolved_file := cached_file( pretrained_model_name_or_path, diff --git a/src/transformers/image_processing_base.py b/src/transformers/image_processing_base.py index 60774390bf23..fce48957f1e6 100644 --- a/src/transformers/image_processing_base.py +++ b/src/transformers/image_processing_base.py @@ -290,7 +290,7 @@ def get_image_processor_dict( # Load from local folder or from cache or download from model Hub and cache resolved_image_processor_files = [ resolved_file - for filename in [image_processor_file, PROCESSOR_NAME] + for filename in [PROCESSOR_NAME, image_processor_file] if ( resolved_file := cached_file( pretrained_model_name_or_path, diff --git a/src/transformers/models/auto/feature_extraction_auto.py b/src/transformers/models/auto/feature_extraction_auto.py index 475c11a63965..1c517de15a72 100644 --- a/src/transformers/models/auto/feature_extraction_auto.py +++ b/src/transformers/models/auto/feature_extraction_auto.py @@ -167,9 +167,10 @@ def get_feature_extractor_config( feature_extractor.save_pretrained("feature-extractor-test") feature_extractor_config = get_feature_extractor_config("feature-extractor-test") ```""" + # Load with a priority given to the nested processor config, if available in repo resolved_config_files = [ resolved_file - for filename in [FEATURE_EXTRACTOR_NAME, PROCESSOR_NAME] + for filename in [PROCESSOR_NAME, FEATURE_EXTRACTOR_NAME] if ( resolved_file := cached_file( pretrained_model_name_or_path, diff --git a/src/transformers/models/auto/image_processing_auto.py b/src/transformers/models/auto/image_processing_auto.py index 79f3114b45d1..ec8d53ad60d0 100644 --- a/src/transformers/models/auto/image_processing_auto.py +++ b/src/transformers/models/auto/image_processing_auto.py @@ -306,9 +306,10 @@ def get_image_processor_config( image_processor.save_pretrained("image-processor-test") image_processor_config = get_image_processor_config("image-processor-test") ```""" + # Load with a priority given to the nested processor config, if available in repo resolved_config_files = [ resolved_file - for filename in [IMAGE_PROCESSOR_NAME, PROCESSOR_NAME] + for filename in [PROCESSOR_NAME, IMAGE_PROCESSOR_NAME] if ( resolved_file := cached_file( pretrained_model_name_or_path, diff --git a/src/transformers/models/auto/video_processing_auto.py b/src/transformers/models/auto/video_processing_auto.py index 4c1987de35f8..02d601be8895 100644 --- a/src/transformers/models/auto/video_processing_auto.py +++ b/src/transformers/models/auto/video_processing_auto.py @@ -23,7 +23,15 @@ # Build the list of all video processors from ...configuration_utils import PreTrainedConfig from ...dynamic_module_utils import get_class_from_dynamic_module, resolve_trust_remote_code -from ...utils import CONFIG_NAME, PROCESSOR_NAME, VIDEO_PROCESSOR_NAME, cached_file, is_torchvision_available, logging +from ...utils import ( + CONFIG_NAME, + IMAGE_PROCESSOR_NAME, + PROCESSOR_NAME, + VIDEO_PROCESSOR_NAME, + cached_file, + is_torchvision_available, + logging, +) from ...utils.import_utils import requires from ...video_processing_utils import BaseVideoProcessor from .auto_factory import _LazyAutoMapping @@ -167,9 +175,10 @@ def get_video_processor_config( video_processor.save_pretrained("video-processor-test") video_processor = get_video_processor_config("video-processor-test") ```""" + # Load with a priority given to the nested processor config, if available in repo resolved_config_files = [ resolved_file - for filename in [VIDEO_PROCESSOR_NAME, PROCESSOR_NAME] + for filename in [PROCESSOR_NAME, VIDEO_PROCESSOR_NAME, IMAGE_PROCESSOR_NAME] if ( resolved_file := cached_file( pretrained_model_name_or_path, diff --git a/src/transformers/video_processing_utils.py b/src/transformers/video_processing_utils.py index 08de802f22f5..d164ba75877e 100644 --- a/src/transformers/video_processing_utils.py +++ b/src/transformers/video_processing_utils.py @@ -643,10 +643,10 @@ def get_video_processor_dict( video_processor_file = VIDEO_PROCESSOR_NAME try: # Try to load with a new config name first and if not successful try with the old file name - # NOTE: we will gradually change to saving all processor configs as nested dict in PROCESSOR_NAME + # NOTE: we save all processor configs as nested dict in PROCESSOR_NAME from v5, which is the standard resolved_video_processor_files = [ resolved_file - for filename in [VIDEO_PROCESSOR_NAME, IMAGE_PROCESSOR_NAME, PROCESSOR_NAME] + for filename in [PROCESSOR_NAME, VIDEO_PROCESSOR_NAME, IMAGE_PROCESSOR_NAME] if ( resolved_file := cached_file( pretrained_model_name_or_path, diff --git a/tests/models/auto/test_processor_auto.py b/tests/models/auto/test_processor_auto.py index a8d8d137b7ca..9aaccc5b940e 100644 --- a/tests/models/auto/test_processor_auto.py +++ b/tests/models/auto/test_processor_auto.py @@ -114,7 +114,7 @@ def test_processor_from_local_directory_from_extractor_config(self): self.assertIsInstance(processor, Wav2Vec2Processor) - def test_subcomponent_get_config_dict__saved_as_nested_config(self): + def test_subcomponent_get_config_dict_saved_as_nested_config(self): """ Tests that we can get config dict of a subcomponents of a processor, even if they were saved as nested dict in `processor_config.json` From 7d02d746bf73dcb926401ba2fe3812cd3da297dd Mon Sep 17 00:00:00 2001 From: raushan Date: Mon, 10 Nov 2025 12:49:53 +0100 Subject: [PATCH 3/5] maybe we need to explicitly load and then decide --- src/transformers/feature_extraction_utils.py | 90 ++++++++++++------- src/transformers/image_processing_base.py | 88 +++++++++++------- .../models/auto/feature_extraction_auto.py | 83 ++++++++++------- .../models/auto/image_processing_auto.py | 79 ++++++++++------ .../models/auto/video_processing_auto.py | 53 ++++++++--- src/transformers/video_processing_utils.py | 61 ++++++++++--- 6 files changed, 312 insertions(+), 142 deletions(-) diff --git a/src/transformers/feature_extraction_utils.py b/src/transformers/feature_extraction_utils.py index fa0b099bd6e0..180c2b63cb3b 100644 --- a/src/transformers/feature_extraction_utils.py +++ b/src/transformers/feature_extraction_utils.py @@ -427,35 +427,42 @@ def get_feature_extractor_dict( feature_extractor_file = os.path.join(pretrained_model_name_or_path, FEATURE_EXTRACTOR_NAME) if os.path.isfile(pretrained_model_name_or_path): resolved_feature_extractor_file = pretrained_model_name_or_path + resolved_processor_file = None is_local = True elif is_remote_url(pretrained_model_name_or_path): feature_extractor_file = pretrained_model_name_or_path + resolved_processor_file = None resolved_feature_extractor_file = download_url(pretrained_model_name_or_path) else: feature_extractor_file = FEATURE_EXTRACTOR_NAME try: # Load from local folder or from cache or download from model Hub and cache - resolved_feature_extractor_files = [ - resolved_file - for filename in [PROCESSOR_NAME, feature_extractor_file] - if ( - resolved_file := cached_file( - pretrained_model_name_or_path, - filename=filename, - cache_dir=cache_dir, - force_download=force_download, - proxies=proxies, - local_files_only=local_files_only, - subfolder=subfolder, - token=token, - user_agent=user_agent, - revision=revision, - _raise_exceptions_for_missing_entries=False, - ) - ) - is not None - ] - resolved_feature_extractor_file = resolved_feature_extractor_files[0] + resolved_processor_file = cached_file( + pretrained_model_name_or_path, + filename=PROCESSOR_NAME, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + token=token, + user_agent=user_agent, + revision=revision, + subfolder=subfolder, + _raise_exceptions_for_missing_entries=False, + ) + resolved_feature_extractor_file = cached_file( + pretrained_model_name_or_path, + filename=feature_extractor_file, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + token=token, + user_agent=user_agent, + revision=revision, + subfolder=subfolder, + _raise_exceptions_for_missing_entries=False, + ) except OSError: # Raise any environment error raise by `cached_file`. It will have a helpful error message adapted to # the original exception. @@ -469,19 +476,38 @@ def get_feature_extractor_dict( f" directory containing a {FEATURE_EXTRACTOR_NAME} file" ) - try: - # Load feature_extractor dict - with open(resolved_feature_extractor_file, encoding="utf-8") as reader: - text = reader.read() - feature_extractor_dict = json.loads(text) - if "audio_processor" in feature_extractor_dict: - feature_extractor_dict = feature_extractor_dict["audio_processor"] - else: - feature_extractor_dict = feature_extractor_dict.get("feature_extractor", feature_extractor_dict) + # Load feature_extractor dict. Priority goes as (nested config if found -> image processor config) + # We are downloading both configs because almost all models have a `processor_config.json` but + # not all of these are nested. We need to check if it was saved recebtly as nested or if it is legacy style + feature_extractor_dict = None + if resolved_processor_file is not None: + try: + with open(resolved_processor_file, encoding="utf-8") as reader: + text = reader.read() + processor_dict = json.loads(text) + except json.JSONDecodeError: + raise OSError( + f"It looks like the config file at '{resolved_processor_file}' is not a valid JSON file." + ) + if "feature_extractor" in processor_dict or "audio_processor" in processor_dict: + feature_extractor_dict = processor_dict.get("feature_extractor", processor_dict.get("audio_processor")) + + if resolved_feature_extractor_file is not None and feature_extractor_dict is None: + try: + with open(resolved_feature_extractor_file, encoding="utf-8") as reader: + text = reader.read() + feature_extractor_dict = json.loads(text) + except json.JSONDecodeError: + raise OSError( + f"It looks like the config file at '{resolved_feature_extractor_file}' is not a valid JSON file." + ) - except json.JSONDecodeError: + if feature_extractor_dict is None: raise OSError( - f"It looks like the config file at '{resolved_feature_extractor_file}' is not a valid JSON file." + f"Can't load feature extractor for '{pretrained_model_name_or_path}'. If you were trying to load" + " it from 'https://huggingface.co/models', make sure you don't have a local directory with the" + f" same name. Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a" + f" directory containing a {feature_extractor_file} file" ) if is_local: diff --git a/src/transformers/image_processing_base.py b/src/transformers/image_processing_base.py index fce48957f1e6..6c1ae0e5ebec 100644 --- a/src/transformers/image_processing_base.py +++ b/src/transformers/image_processing_base.py @@ -280,35 +280,41 @@ def get_image_processor_dict( image_processor_file = os.path.join(pretrained_model_name_or_path, image_processor_filename) if os.path.isfile(pretrained_model_name_or_path): resolved_image_processor_file = pretrained_model_name_or_path + resolved_processor_file = None is_local = True elif is_remote_url(pretrained_model_name_or_path): image_processor_file = pretrained_model_name_or_path + resolved_processor_file = None resolved_image_processor_file = download_url(pretrained_model_name_or_path) else: image_processor_file = image_processor_filename try: - # Load from local folder or from cache or download from model Hub and cache - resolved_image_processor_files = [ - resolved_file - for filename in [PROCESSOR_NAME, image_processor_file] - if ( - resolved_file := cached_file( - pretrained_model_name_or_path, - filename=filename, - cache_dir=cache_dir, - force_download=force_download, - proxies=proxies, - local_files_only=local_files_only, - token=token, - user_agent=user_agent, - revision=revision, - subfolder=subfolder, - _raise_exceptions_for_missing_entries=False, - ) - ) - is not None - ] - resolved_image_processor_file = resolved_image_processor_files[0] + resolved_processor_file = cached_file( + pretrained_model_name_or_path, + filename=PROCESSOR_NAME, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + token=token, + user_agent=user_agent, + revision=revision, + subfolder=subfolder, + _raise_exceptions_for_missing_entries=False, + ) + resolved_image_processor_file = cached_file( + pretrained_model_name_or_path, + filename=image_processor_file, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + token=token, + user_agent=user_agent, + revision=revision, + subfolder=subfolder, + _raise_exceptions_for_missing_entries=False, + ) except OSError: # Raise any environment error raise by `cached_file`. It will have a helpful error message adapted to # the original exception. @@ -322,16 +328,38 @@ def get_image_processor_dict( f" directory containing a {image_processor_filename} file" ) - try: - # Load image_processor dict - with open(resolved_image_processor_file, encoding="utf-8") as reader: - text = reader.read() - image_processor_dict = json.loads(text) - image_processor_dict = image_processor_dict.get("image_processor", image_processor_dict) + # Load image_processor dict. Priority goes as (nested config if found -> image processor config) + # We are downloading both configs because almost all models have a `processor_config.json` but + # not all of these are nested. We need to check if it was saved recebtly as nested or if it is legacy style + image_processor_dict = None + if resolved_processor_file is not None: + try: + with open(resolved_processor_file, encoding="utf-8") as reader: + text = reader.read() + processor_dict = json.loads(text) + except json.JSONDecodeError: + raise OSError( + f"It looks like the config file at '{resolved_processor_file}' is not a valid JSON file." + ) + if "image_processor" in processor_dict: + image_processor_dict = processor_dict["image_processor"] + + if resolved_image_processor_file is not None and image_processor_dict is None: + try: + with open(resolved_image_processor_file, encoding="utf-8") as reader: + text = reader.read() + image_processor_dict = json.loads(text) + except json.JSONDecodeError: + raise OSError( + f"It looks like the config file at '{resolved_image_processor_file}' is not a valid JSON file." + ) - except json.JSONDecodeError: + if image_processor_dict is None: raise OSError( - f"It looks like the config file at '{resolved_image_processor_file}' is not a valid JSON file." + f"Can't load image processor for '{pretrained_model_name_or_path}'. If you were trying to load" + " it from 'https://huggingface.co/models', make sure you don't have a local directory with the" + f" same name. Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a" + f" directory containing a {image_processor_filename} file" ) if is_local: diff --git a/src/transformers/models/auto/feature_extraction_auto.py b/src/transformers/models/auto/feature_extraction_auto.py index 1c517de15a72..fec00203ba8a 100644 --- a/src/transformers/models/auto/feature_extraction_auto.py +++ b/src/transformers/models/auto/feature_extraction_auto.py @@ -168,39 +168,60 @@ def get_feature_extractor_config( feature_extractor_config = get_feature_extractor_config("feature-extractor-test") ```""" # Load with a priority given to the nested processor config, if available in repo - resolved_config_files = [ - resolved_file - for filename in [PROCESSOR_NAME, FEATURE_EXTRACTOR_NAME] - if ( - resolved_file := cached_file( - pretrained_model_name_or_path, - filename=filename, - cache_dir=cache_dir, - force_download=force_download, - proxies=proxies, - token=token, - revision=revision, - local_files_only=local_files_only, - _raise_exceptions_for_gated_repo=False, - _raise_exceptions_for_missing_entries=False, - _raise_exceptions_for_connection_errors=False, - ) - ) - is not None - ] - if resolved_config_files is None: - logger.info( - "Could not locate the feature extractor configuration file, will try to use the model config instead." - ) + resolved_processor_file = cached_file( + pretrained_model_name_or_path, + filename=PROCESSOR_NAME, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + local_files_only=local_files_only, + _raise_exceptions_for_gated_repo=False, + _raise_exceptions_for_missing_entries=False, + ) + resolved_feature_extractor_file = cached_file( + pretrained_model_name_or_path, + filename=FEATURE_EXTRACTOR_NAME, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + local_files_only=local_files_only, + _raise_exceptions_for_gated_repo=False, + _raise_exceptions_for_missing_entries=False, + ) + + # An empty list if none of the possible files is found in the repo + if not resolved_feature_extractor_file and not resolved_processor_file: + logger.info("Could not locate the feature extractor configuration file.") return {} - resolved_config_file = resolved_config_files[0] - with open(resolved_config_file, encoding="utf-8") as reader: - feature_extractor_dict = json.load(reader) - if "audio_processor" in feature_extractor_dict: - feature_extractor_dict = feature_extractor_dict["audio_processor"] - else: - feature_extractor_dict = feature_extractor_dict.get("feature_extractor", feature_extractor_dict) + # Load feature_extractor dict. Priority goes as (nested config if found -> feature extractor config) + # We are downloading both configs because almost all models have a `processor_config.json` but + # not all of these are nested. We need to check if it was saved recebtly as nested or if it is legacy style + feature_extractor_dict = {} + if resolved_processor_file is not None: + try: + with open(resolved_processor_file, encoding="utf-8") as reader: + text = reader.read() + processor_dict = json.loads(text) + except json.JSONDecodeError: + raise OSError(f"It looks like the config file at '{resolved_processor_file}' is not a valid JSON file.") + if "feature_extractor" in processor_dict: + feature_extractor_dict = processor_dict["feature_extractor"] + + if resolved_feature_extractor_file is not None and feature_extractor_dict is None: + try: + with open(resolved_feature_extractor_file, encoding="utf-8") as reader: + text = reader.read() + feature_extractor_dict = json.loads(text) + except json.JSONDecodeError: + raise OSError( + f"It looks like the config file at '{resolved_feature_extractor_file}' is not a valid JSON file." + ) + return feature_extractor_dict diff --git a/src/transformers/models/auto/image_processing_auto.py b/src/transformers/models/auto/image_processing_auto.py index ec8d53ad60d0..423c96e53cb6 100644 --- a/src/transformers/models/auto/image_processing_auto.py +++ b/src/transformers/models/auto/image_processing_auto.py @@ -307,37 +307,60 @@ def get_image_processor_config( image_processor_config = get_image_processor_config("image-processor-test") ```""" # Load with a priority given to the nested processor config, if available in repo - resolved_config_files = [ - resolved_file - for filename in [PROCESSOR_NAME, IMAGE_PROCESSOR_NAME] - if ( - resolved_file := cached_file( - pretrained_model_name_or_path, - filename=filename, - cache_dir=cache_dir, - force_download=force_download, - proxies=proxies, - token=token, - revision=revision, - local_files_only=local_files_only, - _raise_exceptions_for_gated_repo=False, - _raise_exceptions_for_missing_entries=False, - _raise_exceptions_for_connection_errors=False, - ) - ) - is not None - ] + resolved_processor_file = cached_file( + pretrained_model_name_or_path, + filename=PROCESSOR_NAME, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + local_files_only=local_files_only, + _raise_exceptions_for_gated_repo=False, + _raise_exceptions_for_missing_entries=False, + ) + resolved_image_processor_file = cached_file( + pretrained_model_name_or_path, + filename=IMAGE_PROCESSOR_NAME, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + local_files_only=local_files_only, + _raise_exceptions_for_gated_repo=False, + _raise_exceptions_for_missing_entries=False, + ) + # An empty list if none of the possible files is found in the repo - if not resolved_config_files: - logger.info( - "Could not locate the image processor configuration file, will try to use the model config instead." - ) + if not resolved_image_processor_file and not resolved_processor_file: + logger.info("Could not locate the image processor configuration file.") return {} - resolved_config_file = resolved_config_files[0] - with open(resolved_config_file, encoding="utf-8") as reader: - image_processor_dict = json.load(reader) - image_processor_dict = image_processor_dict.get("image_processor", image_processor_dict) + # Load image_processor dict. Priority goes as (nested config if found -> image processor config) + # We are downloading both configs because almost all models have a `processor_config.json` but + # not all of these are nested. We need to check if it was saved recebtly as nested or if it is legacy style + image_processor_dict = {} + if resolved_processor_file is not None: + try: + with open(resolved_processor_file, encoding="utf-8") as reader: + text = reader.read() + processor_dict = json.loads(text) + except json.JSONDecodeError: + raise OSError(f"It looks like the config file at '{resolved_processor_file}' is not a valid JSON file.") + if "image_processor" in processor_dict: + image_processor_dict = processor_dict["image_processor"] + + if resolved_image_processor_file is not None and image_processor_dict is None: + try: + with open(resolved_image_processor_file, encoding="utf-8") as reader: + text = reader.read() + image_processor_dict = json.loads(text) + except json.JSONDecodeError: + raise OSError( + f"It looks like the config file at '{resolved_image_processor_file}' is not a valid JSON file." + ) + return image_processor_dict diff --git a/src/transformers/models/auto/video_processing_auto.py b/src/transformers/models/auto/video_processing_auto.py index 02d601be8895..abff7af82a72 100644 --- a/src/transformers/models/auto/video_processing_auto.py +++ b/src/transformers/models/auto/video_processing_auto.py @@ -176,9 +176,21 @@ def get_video_processor_config( video_processor = get_video_processor_config("video-processor-test") ```""" # Load with a priority given to the nested processor config, if available in repo - resolved_config_files = [ + resolved_processor_file = cached_file( + pretrained_model_name_or_path, + filename=PROCESSOR_NAME, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + local_files_only=local_files_only, + _raise_exceptions_for_gated_repo=False, + _raise_exceptions_for_missing_entries=False, + ) + resolved_video_processor_files = [ resolved_file - for filename in [PROCESSOR_NAME, VIDEO_PROCESSOR_NAME, IMAGE_PROCESSOR_NAME] + for filename in [VIDEO_PROCESSOR_NAME, IMAGE_PROCESSOR_NAME] if ( resolved_file := cached_file( pretrained_model_name_or_path, @@ -196,16 +208,37 @@ def get_video_processor_config( ) is not None ] - if resolved_config_files is None: - logger.info( - "Could not locate the video processor configuration file, will try to use the model config instead." - ) + resolved_video_processor_file = resolved_video_processor_files[0] if resolved_video_processor_files else None + + # An empty list if none of the possible files is found in the repo + if not resolved_video_processor_file and not resolved_processor_file: + logger.info("Could not locate the video processor configuration file.") return {} - resolved_config_file = resolved_config_files[0] - with open(resolved_config_file, encoding="utf-8") as reader: - video_processor_dict = json.load(reader) - video_processor_dict = video_processor_dict.get("video_processor", video_processor_dict) + # Load video_processor dict. Priority goes as (nested config if found -> video processor config -> image processor config) + # We are downloading both configs because almost all models have a `processor_config.json` but + # not all of these are nested. We need to check if it was saved recebtly as nested or if it is legacy style + video_processor_dict = {} + if resolved_processor_file is not None: + try: + with open(resolved_processor_file, encoding="utf-8") as reader: + text = reader.read() + processor_dict = json.loads(text) + except json.JSONDecodeError: + raise OSError(f"It looks like the config file at '{resolved_processor_file}' is not a valid JSON file.") + if "video_processor" in processor_dict: + video_processor_dict = processor_dict["video_processor"] + + if resolved_video_processor_file is not None and video_processor_dict is None: + try: + with open(resolved_video_processor_file, encoding="utf-8") as reader: + text = reader.read() + video_processor_dict = json.loads(text) + except json.JSONDecodeError: + raise OSError( + f"It looks like the config file at '{resolved_video_processor_file}' is not a valid JSON file." + ) + return video_processor_dict diff --git a/src/transformers/video_processing_utils.py b/src/transformers/video_processing_utils.py index d164ba75877e..041913062d05 100644 --- a/src/transformers/video_processing_utils.py +++ b/src/transformers/video_processing_utils.py @@ -635,18 +635,33 @@ def get_video_processor_dict( is_local = os.path.isdir(pretrained_model_name_or_path) if os.path.isfile(pretrained_model_name_or_path): resolved_video_processor_file = pretrained_model_name_or_path + resolved_processor_file = None is_local = True elif is_remote_url(pretrained_model_name_or_path): video_processor_file = pretrained_model_name_or_path + resolved_processor_file = None resolved_video_processor_file = download_url(pretrained_model_name_or_path) else: video_processor_file = VIDEO_PROCESSOR_NAME try: # Try to load with a new config name first and if not successful try with the old file name # NOTE: we save all processor configs as nested dict in PROCESSOR_NAME from v5, which is the standard + resolved_processor_file = cached_file( + pretrained_model_name_or_path, + filename=PROCESSOR_NAME, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + token=token, + user_agent=user_agent, + revision=revision, + subfolder=subfolder, + _raise_exceptions_for_missing_entries=False, + ) resolved_video_processor_files = [ resolved_file - for filename in [PROCESSOR_NAME, VIDEO_PROCESSOR_NAME, IMAGE_PROCESSOR_NAME] + for filename in [video_processor_file, IMAGE_PROCESSOR_NAME] if ( resolved_file := cached_file( pretrained_model_name_or_path, @@ -664,7 +679,9 @@ def get_video_processor_dict( ) is not None ] - resolved_video_processor_file = resolved_video_processor_files[0] + resolved_video_processor_file = ( + resolved_video_processor_files[0] if resolved_video_processor_files else None + ) except OSError: # Raise any OS error raise by `cached_file`. It will have a helpful error message adapted to # the original exception. @@ -675,19 +692,41 @@ def get_video_processor_dict( f"Can't load video processor for '{pretrained_model_name_or_path}'. If you were trying to load" " it from 'https://huggingface.co/models', make sure you don't have a local directory with the" f" same name. Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a" - f" directory containing a {VIDEO_PROCESSOR_NAME} file" + f" directory containing a {video_processor_file} file" ) - try: - # Load video_processor dict - with open(resolved_video_processor_file, "r", encoding="utf-8") as reader: - text = reader.read() - video_processor_dict = json.loads(text) - video_processor_dict = video_processor_dict.get("video_processor", video_processor_dict) + # Load video_processor dict. Priority goes as (nested config if found -> video processor config -> image processor config) + # We are downloading both configs because almost all models have a `processor_config.json` but + # not all of these are nested. We need to check if it was saved recebtly as nested or if it is legacy style + video_processor_dict = None + if resolved_processor_file is not None: + try: + with open(resolved_processor_file, encoding="utf-8") as reader: + text = reader.read() + processor_dict = json.loads(text) + except json.JSONDecodeError: + raise OSError( + f"It looks like the config file at '{resolved_processor_file}' is not a valid JSON file." + ) + if "video_processor" in processor_dict: + video_processor_dict = processor_dict["video_processor"] + + if resolved_video_processor_file is not None and video_processor_dict is None: + try: + with open(resolved_video_processor_file, encoding="utf-8") as reader: + text = reader.read() + video_processor_dict = json.loads(text) + except json.JSONDecodeError: + raise OSError( + f"It looks like the config file at '{resolved_video_processor_file}' is not a valid JSON file." + ) - except json.JSONDecodeError: + if video_processor_dict is None: raise OSError( - f"It looks like the config file at '{resolved_video_processor_file}' is not a valid JSON file." + f"Can't load video processor for '{pretrained_model_name_or_path}'. If you were trying to load" + " it from 'https://huggingface.co/models', make sure you don't have a local directory with the" + f" same name. Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a" + f" directory containing a {video_processor_file} file" ) if is_local: From 046ccec624d645977246ab043030527ff7a2ee06 Mon Sep 17 00:00:00 2001 From: Raushan Turganbay Date: Wed, 12 Nov 2025 15:57:26 +0100 Subject: [PATCH 4/5] Apply suggestions from code review Co-authored-by: Pablo Montalvo <39954772+molbap@users.noreply.github.com> --- src/transformers/models/auto/feature_extraction_auto.py | 2 +- src/transformers/models/auto/image_processing_auto.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/auto/feature_extraction_auto.py b/src/transformers/models/auto/feature_extraction_auto.py index fec00203ba8a..677207094b92 100644 --- a/src/transformers/models/auto/feature_extraction_auto.py +++ b/src/transformers/models/auto/feature_extraction_auto.py @@ -200,7 +200,7 @@ def get_feature_extractor_config( # Load feature_extractor dict. Priority goes as (nested config if found -> feature extractor config) # We are downloading both configs because almost all models have a `processor_config.json` but - # not all of these are nested. We need to check if it was saved recebtly as nested or if it is legacy style + # not all of these are nested. We need to check if it was saved recently as nested or if it is legacy style feature_extractor_dict = {} if resolved_processor_file is not None: try: diff --git a/src/transformers/models/auto/image_processing_auto.py b/src/transformers/models/auto/image_processing_auto.py index 423c96e53cb6..6932496ceaf7 100644 --- a/src/transformers/models/auto/image_processing_auto.py +++ b/src/transformers/models/auto/image_processing_auto.py @@ -339,7 +339,7 @@ def get_image_processor_config( # Load image_processor dict. Priority goes as (nested config if found -> image processor config) # We are downloading both configs because almost all models have a `processor_config.json` but - # not all of these are nested. We need to check if it was saved recebtly as nested or if it is legacy style + # not all of these are nested. We need to check if it was saved recently as nested or if it is legacy style image_processor_dict = {} if resolved_processor_file is not None: try: From 8173bf7be02c6a8cf36b23702796ffb747921123 Mon Sep 17 00:00:00 2001 From: raushan Date: Wed, 12 Nov 2025 16:06:41 +0100 Subject: [PATCH 5/5] use helper fn for json decoding --- src/transformers/feature_extraction_utils.py | 19 +++--------------- src/transformers/image_processing_base.py | 19 +++--------------- .../models/auto/feature_extraction_auto.py | 20 +++---------------- .../models/auto/image_processing_auto.py | 18 +++-------------- .../models/auto/video_processing_auto.py | 18 +++-------------- src/transformers/utils/__init__.py | 1 + src/transformers/utils/generic.py | 11 ++++++++++ src/transformers/video_processing_utils.py | 19 +++--------------- 8 files changed, 30 insertions(+), 95 deletions(-) diff --git a/src/transformers/feature_extraction_utils.py b/src/transformers/feature_extraction_utils.py index 180c2b63cb3b..844d9a215914 100644 --- a/src/transformers/feature_extraction_utils.py +++ b/src/transformers/feature_extraction_utils.py @@ -39,6 +39,7 @@ is_torch_dtype, logging, requires_backends, + safe_load_json_file, ) from .utils.hub import cached_file @@ -481,26 +482,12 @@ def get_feature_extractor_dict( # not all of these are nested. We need to check if it was saved recebtly as nested or if it is legacy style feature_extractor_dict = None if resolved_processor_file is not None: - try: - with open(resolved_processor_file, encoding="utf-8") as reader: - text = reader.read() - processor_dict = json.loads(text) - except json.JSONDecodeError: - raise OSError( - f"It looks like the config file at '{resolved_processor_file}' is not a valid JSON file." - ) + processor_dict = safe_load_json_file(resolved_processor_file) if "feature_extractor" in processor_dict or "audio_processor" in processor_dict: feature_extractor_dict = processor_dict.get("feature_extractor", processor_dict.get("audio_processor")) if resolved_feature_extractor_file is not None and feature_extractor_dict is None: - try: - with open(resolved_feature_extractor_file, encoding="utf-8") as reader: - text = reader.read() - feature_extractor_dict = json.loads(text) - except json.JSONDecodeError: - raise OSError( - f"It looks like the config file at '{resolved_feature_extractor_file}' is not a valid JSON file." - ) + feature_extractor_dict = safe_load_json_file(resolved_feature_extractor_file) if feature_extractor_dict is None: raise OSError( diff --git a/src/transformers/image_processing_base.py b/src/transformers/image_processing_base.py index 6c1ae0e5ebec..564af6be8081 100644 --- a/src/transformers/image_processing_base.py +++ b/src/transformers/image_processing_base.py @@ -32,6 +32,7 @@ is_offline_mode, is_remote_url, logging, + safe_load_json_file, ) from .utils.hub import cached_file @@ -333,26 +334,12 @@ def get_image_processor_dict( # not all of these are nested. We need to check if it was saved recebtly as nested or if it is legacy style image_processor_dict = None if resolved_processor_file is not None: - try: - with open(resolved_processor_file, encoding="utf-8") as reader: - text = reader.read() - processor_dict = json.loads(text) - except json.JSONDecodeError: - raise OSError( - f"It looks like the config file at '{resolved_processor_file}' is not a valid JSON file." - ) + processor_dict = safe_load_json_file(resolved_processor_file) if "image_processor" in processor_dict: image_processor_dict = processor_dict["image_processor"] if resolved_image_processor_file is not None and image_processor_dict is None: - try: - with open(resolved_image_processor_file, encoding="utf-8") as reader: - text = reader.read() - image_processor_dict = json.loads(text) - except json.JSONDecodeError: - raise OSError( - f"It looks like the config file at '{resolved_image_processor_file}' is not a valid JSON file." - ) + image_processor_dict = safe_load_json_file(resolved_image_processor_file) if image_processor_dict is None: raise OSError( diff --git a/src/transformers/models/auto/feature_extraction_auto.py b/src/transformers/models/auto/feature_extraction_auto.py index 677207094b92..c392de084fcc 100644 --- a/src/transformers/models/auto/feature_extraction_auto.py +++ b/src/transformers/models/auto/feature_extraction_auto.py @@ -15,7 +15,6 @@ """AutoFeatureExtractor class.""" import importlib -import json import os from collections import OrderedDict from typing import Optional, Union @@ -24,7 +23,7 @@ from ...configuration_utils import PreTrainedConfig from ...dynamic_module_utils import get_class_from_dynamic_module, resolve_trust_remote_code from ...feature_extraction_utils import FeatureExtractionMixin -from ...utils import CONFIG_NAME, FEATURE_EXTRACTOR_NAME, PROCESSOR_NAME, cached_file, logging +from ...utils import CONFIG_NAME, FEATURE_EXTRACTOR_NAME, PROCESSOR_NAME, cached_file, logging, safe_load_json_file from .auto_factory import _LazyAutoMapping from .configuration_auto import ( CONFIG_MAPPING_NAMES, @@ -203,25 +202,12 @@ def get_feature_extractor_config( # not all of these are nested. We need to check if it was saved recently as nested or if it is legacy style feature_extractor_dict = {} if resolved_processor_file is not None: - try: - with open(resolved_processor_file, encoding="utf-8") as reader: - text = reader.read() - processor_dict = json.loads(text) - except json.JSONDecodeError: - raise OSError(f"It looks like the config file at '{resolved_processor_file}' is not a valid JSON file.") + processor_dict = safe_load_json_file(resolved_processor_file) if "feature_extractor" in processor_dict: feature_extractor_dict = processor_dict["feature_extractor"] if resolved_feature_extractor_file is not None and feature_extractor_dict is None: - try: - with open(resolved_feature_extractor_file, encoding="utf-8") as reader: - text = reader.read() - feature_extractor_dict = json.loads(text) - except json.JSONDecodeError: - raise OSError( - f"It looks like the config file at '{resolved_feature_extractor_file}' is not a valid JSON file." - ) - + feature_extractor_dict = safe_load_json_file(resolved_feature_extractor_file) return feature_extractor_dict diff --git a/src/transformers/models/auto/image_processing_auto.py b/src/transformers/models/auto/image_processing_auto.py index 6932496ceaf7..da34c840480c 100644 --- a/src/transformers/models/auto/image_processing_auto.py +++ b/src/transformers/models/auto/image_processing_auto.py @@ -15,7 +15,6 @@ """AutoImageProcessor class.""" import importlib -import json import os import warnings from collections import OrderedDict @@ -36,6 +35,7 @@ is_torchvision_available, is_vision_available, logging, + safe_load_json_file, ) from ...utils.import_utils import requires from .auto_factory import _LazyAutoMapping @@ -342,24 +342,12 @@ def get_image_processor_config( # not all of these are nested. We need to check if it was saved recently as nested or if it is legacy style image_processor_dict = {} if resolved_processor_file is not None: - try: - with open(resolved_processor_file, encoding="utf-8") as reader: - text = reader.read() - processor_dict = json.loads(text) - except json.JSONDecodeError: - raise OSError(f"It looks like the config file at '{resolved_processor_file}' is not a valid JSON file.") + processor_dict = safe_load_json_file(resolved_processor_file) if "image_processor" in processor_dict: image_processor_dict = processor_dict["image_processor"] if resolved_image_processor_file is not None and image_processor_dict is None: - try: - with open(resolved_image_processor_file, encoding="utf-8") as reader: - text = reader.read() - image_processor_dict = json.loads(text) - except json.JSONDecodeError: - raise OSError( - f"It looks like the config file at '{resolved_image_processor_file}' is not a valid JSON file." - ) + image_processor_dict = safe_load_json_file(resolved_image_processor_file) return image_processor_dict diff --git a/src/transformers/models/auto/video_processing_auto.py b/src/transformers/models/auto/video_processing_auto.py index abff7af82a72..27b260653002 100644 --- a/src/transformers/models/auto/video_processing_auto.py +++ b/src/transformers/models/auto/video_processing_auto.py @@ -15,7 +15,6 @@ """AutoVideoProcessor class.""" import importlib -import json import os from collections import OrderedDict from typing import TYPE_CHECKING, Optional, Union @@ -31,6 +30,7 @@ cached_file, is_torchvision_available, logging, + safe_load_json_file, ) from ...utils.import_utils import requires from ...video_processing_utils import BaseVideoProcessor @@ -220,24 +220,12 @@ def get_video_processor_config( # not all of these are nested. We need to check if it was saved recebtly as nested or if it is legacy style video_processor_dict = {} if resolved_processor_file is not None: - try: - with open(resolved_processor_file, encoding="utf-8") as reader: - text = reader.read() - processor_dict = json.loads(text) - except json.JSONDecodeError: - raise OSError(f"It looks like the config file at '{resolved_processor_file}' is not a valid JSON file.") + processor_dict = safe_load_json_file(resolved_processor_file) if "video_processor" in processor_dict: video_processor_dict = processor_dict["video_processor"] if resolved_video_processor_file is not None and video_processor_dict is None: - try: - with open(resolved_video_processor_file, encoding="utf-8") as reader: - text = reader.read() - video_processor_dict = json.loads(text) - except json.JSONDecodeError: - raise OSError( - f"It looks like the config file at '{resolved_video_processor_file}' is not a valid JSON file." - ) + video_processor_dict = safe_load_json_file(resolved_video_processor_file) return video_processor_dict diff --git a/src/transformers/utils/__init__.py b/src/transformers/utils/__init__.py index 44589d96c841..38b5db8f4893 100644 --- a/src/transformers/utils/__init__.py +++ b/src/transformers/utils/__init__.py @@ -63,6 +63,7 @@ is_torch_dtype, is_torch_tensor, reshape, + safe_load_json_file, squeeze, strtobool, tensor_size, diff --git a/src/transformers/utils/generic.py b/src/transformers/utils/generic.py index 9bc51f1bac65..00cc581b1ac1 100644 --- a/src/transformers/utils/generic.py +++ b/src/transformers/utils/generic.py @@ -221,6 +221,17 @@ def to_numpy(obj): return obj +def safe_load_json_file(json_file: str): + "A helper to load safe config files and raise a proper error message if it wasn't serialized correctly" + try: + with open(json_file, encoding="utf-8") as reader: + text = reader.read() + config_dict = json.loads(text) + except json.JSONDecodeError: + raise OSError(f"It looks like the config file at '{json_file}' is not a valid JSON file.") + return config_dict + + class ModelOutput(OrderedDict): """ Base class for all model outputs as dataclass. Has a `__getitem__` that allows indexing by integer or slice (like a diff --git a/src/transformers/video_processing_utils.py b/src/transformers/video_processing_utils.py index 041913062d05..eeb04eeb3adb 100644 --- a/src/transformers/video_processing_utils.py +++ b/src/transformers/video_processing_utils.py @@ -50,6 +50,7 @@ is_torchcodec_available, is_torchvision_v2_available, logging, + safe_load_json_file, ) from .utils.hub import cached_file from .utils.import_utils import requires @@ -700,26 +701,12 @@ def get_video_processor_dict( # not all of these are nested. We need to check if it was saved recebtly as nested or if it is legacy style video_processor_dict = None if resolved_processor_file is not None: - try: - with open(resolved_processor_file, encoding="utf-8") as reader: - text = reader.read() - processor_dict = json.loads(text) - except json.JSONDecodeError: - raise OSError( - f"It looks like the config file at '{resolved_processor_file}' is not a valid JSON file." - ) + processor_dict = safe_load_json_file(resolved_processor_file) if "video_processor" in processor_dict: video_processor_dict = processor_dict["video_processor"] if resolved_video_processor_file is not None and video_processor_dict is None: - try: - with open(resolved_video_processor_file, encoding="utf-8") as reader: - text = reader.read() - video_processor_dict = json.loads(text) - except json.JSONDecodeError: - raise OSError( - f"It looks like the config file at '{resolved_video_processor_file}' is not a valid JSON file." - ) + video_processor_dict = safe_load_json_file(resolved_video_processor_file) if video_processor_dict is None: raise OSError(