diff --git a/ads/aqua/app.py b/ads/aqua/app.py index a7a6165d8..253996268 100644 --- a/ads/aqua/app.py +++ b/ads/aqua/app.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright (c) 2024 Oracle and/or its affiliates. +# Copyright (c) 2024, 2025 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ import json @@ -298,7 +298,7 @@ def get_config(self, model_id: str, config_file_name: str) -> Dict: config = {} artifact_path = get_artifact_path(oci_model.custom_metadata_list) if not artifact_path: - logger.error( + logger.debug( f"Failed to get artifact path from custom metadata for the model: {model_id}" ) return config diff --git a/ads/aqua/evaluation/evaluation.py b/ads/aqua/evaluation/evaluation.py index 0b7cb7773..13adf0bdb 100644 --- a/ads/aqua/evaluation/evaluation.py +++ b/ads/aqua/evaluation/evaluation.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright (c) 2024 Oracle and/or its affiliates. +# Copyright (c) 2024, 2025 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ import base64 import json @@ -199,11 +199,11 @@ def create( eval_inference_configuration = ( container.spec.evaluation_configuration ) - except Exception: + except Exception as ex: logger.debug( f"Could not load inference config details for the evaluation source id: " f"{create_aqua_evaluation_details.evaluation_source_id}. Please check if the container" - f" runtime has the correct SMC image information." + f" runtime has the correct SMC image information.\nError: {str(ex)}" ) elif ( DataScienceResource.MODEL @@ -289,7 +289,7 @@ def create( f"Invalid experiment name. Please provide an experiment with `{Tags.AQUA_EVALUATION}` in tags." ) except Exception: - logger.debug( + logger.info( f"Model version set {experiment_model_version_set_name} doesn't exist. " "Creating new model version set." ) @@ -711,21 +711,27 @@ def get(self, eval_id) -> AquaEvaluationDetail: try: log = utils.query_resource(log_id, return_all=False) log_name = log.display_name if log else "" - except Exception: + except Exception as ex: + logger.debug(f"Failed to get associated log name. Error: {ex}") pass if loggroup_id: try: loggroup = utils.query_resource(loggroup_id, return_all=False) loggroup_name = loggroup.display_name if loggroup else "" - except Exception: + except Exception as ex: + logger.debug(f"Failed to get associated loggroup name. Error: {ex}") pass try: introspection = json.loads( self._get_attribute_from_model_metadata(resource, "ArtifactTestResults") ) - except Exception: + except Exception as ex: + logger.debug( + f"There was an issue loading the model attribute as json object for evaluation {eval_id}. " + f"Setting introspection to empty.\n Error:{ex}" + ) introspection = {} summary = AquaEvaluationDetail( @@ -878,13 +884,13 @@ def get_status(self, eval_id: str) -> dict: try: log_id = job_run_details.log_details.log_id except Exception as e: - logger.debug(f"Failed to get associated log. {str(e)}") + logger.debug(f"Failed to get associated log.\nError: {str(e)}") log_id = "" try: loggroup_id = job_run_details.log_details.log_group_id except Exception as e: - logger.debug(f"Failed to get associated log. {str(e)}") + logger.debug(f"Failed to get associated log.\nError: {str(e)}") loggroup_id = "" loggroup_url = get_log_links(region=self.region, log_group_id=loggroup_id) @@ -958,7 +964,7 @@ def load_metrics(self, eval_id: str) -> AquaEvalMetrics: ) except Exception as e: logger.debug( - "Failed to load `report.json` from evaluation artifact" f"{str(e)}" + f"Failed to load `report.json` from evaluation artifact.\nError: {str(e)}" ) json_report = {} @@ -1047,6 +1053,7 @@ def download_report(self, eval_id) -> AquaEvalReport: return report with tempfile.TemporaryDirectory() as temp_dir: + logger.info(f"Downloading evaluation artifact for {eval_id}.") DataScienceModel.from_id(eval_id).download_artifact( temp_dir, auth=self._auth, @@ -1200,6 +1207,7 @@ def _delete_job_and_model(job, model): def load_evaluation_config(self, container: Optional[str] = None) -> Dict: """Loads evaluation config.""" + logger.info("Loading evaluation container config.") # retrieve the evaluation config by container family name evaluation_config = get_evaluation_service_config(container) @@ -1279,9 +1287,9 @@ def _get_source( raise AquaRuntimeError( f"Not supported source type: {resource_type}" ) - except Exception: + except Exception as ex: logger.debug( - f"Failed to retrieve source information for evaluation {evaluation.identifier}." + f"Failed to retrieve source information for evaluation {evaluation.identifier}.\nError: {str(ex)}" ) source_name = "" diff --git a/ads/aqua/extension/aqua_ws_msg_handler.py b/ads/aqua/extension/aqua_ws_msg_handler.py index 04ff651f4..1fcbbf946 100644 --- a/ads/aqua/extension/aqua_ws_msg_handler.py +++ b/ads/aqua/extension/aqua_ws_msg_handler.py @@ -1,10 +1,10 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*-- -# Copyright (c) 2024 Oracle and/or its affiliates. +# Copyright (c) 2024, 2025 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ import traceback +import uuid from abc import abstractmethod from http.client import responses from typing import List @@ -34,7 +34,7 @@ def __init__(self, message: str): self.telemetry = TelemetryClient( bucket=AQUA_TELEMETRY_BUCKET, namespace=AQUA_TELEMETRY_BUCKET_NS ) - except: + except Exception: pass @staticmethod @@ -66,16 +66,23 @@ def write_error(self, status_code, **kwargs): "message": message, "service_payload": service_payload, "reason": reason, + "request_id": str(uuid.uuid4()), } exc_info = kwargs.get("exc_info") if exc_info: - logger.error("".join(traceback.format_exception(*exc_info))) + logger.error( + f"Error Request ID: {reply['request_id']}\n" + f"Error: {''.join(traceback.format_exception(*exc_info))}" + ) e = exc_info[1] if isinstance(e, HTTPError): reply["message"] = e.log_message or message reply["reason"] = e.reason - else: - logger.warning(reply["message"]) + + logger.error( + f"Error Request ID: {reply['request_id']}\n" + f"Error: {reply['message']} {reply['reason']}" + ) # telemetry may not be present if there is an error while initializing if hasattr(self, "telemetry"): aqua_api_details = kwargs.get("aqua_api_details", {}) @@ -83,7 +90,7 @@ def write_error(self, status_code, **kwargs): category="aqua/error", action=str(status_code), value=reason, - **aqua_api_details + **aqua_api_details, ) response = AquaWsError( status=status_code, diff --git a/ads/aqua/extension/base_handler.py b/ads/aqua/extension/base_handler.py index 5bd9f7091..19dda9ce5 100644 --- a/ads/aqua/extension/base_handler.py +++ b/ads/aqua/extension/base_handler.py @@ -1,6 +1,5 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024 Oracle and/or its affiliates. +# Copyright (c) 2024, 2025 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ @@ -35,7 +34,7 @@ def __init__( self.telemetry = TelemetryClient( bucket=AQUA_TELEMETRY_BUCKET, namespace=AQUA_TELEMETRY_BUCKET_NS ) - except: + except Exception: pass @staticmethod @@ -82,19 +81,23 @@ def write_error(self, status_code, **kwargs): "message": message, "service_payload": service_payload, "reason": reason, + "request_id": str(uuid.uuid4()), } exc_info = kwargs.get("exc_info") if exc_info: - logger.error("".join(traceback.format_exception(*exc_info))) + logger.error( + f"Error Request ID: {reply['request_id']}\n" + f"Error: {''.join(traceback.format_exception(*exc_info))}" + ) e = exc_info[1] if isinstance(e, HTTPError): reply["message"] = e.log_message or message reply["reason"] = e.reason if e.reason else reply["reason"] - reply["request_id"] = str(uuid.uuid4()) - else: - reply["request_id"] = str(uuid.uuid4()) - logger.warning(reply["message"]) + logger.error( + f"Error Request ID: {reply['request_id']}\n" + f"Error: {reply['message']} {reply['reason']}" + ) # telemetry may not be present if there is an error while initializing if hasattr(self, "telemetry"): @@ -103,7 +106,7 @@ def write_error(self, status_code, **kwargs): category="aqua/error", action=str(status_code), value=reason, - **aqua_api_details + **aqua_api_details, ) self.finish(json.dumps(reply)) diff --git a/ads/aqua/extension/model_handler.py b/ads/aqua/extension/model_handler.py index 54c36d2c8..029d71643 100644 --- a/ads/aqua/extension/model_handler.py +++ b/ads/aqua/extension/model_handler.py @@ -140,6 +140,10 @@ def post(self, *args, **kwargs): # noqa: ARG002 ignore_patterns = input_data.get("ignore_patterns") freeform_tags = input_data.get("freeform_tags") defined_tags = input_data.get("defined_tags") + ignore_model_artifact_check = ( + str(input_data.get("ignore_model_artifact_check", "false")).lower() + == "true" + ) return self.finish( AquaModelApp().register( @@ -158,6 +162,7 @@ def post(self, *args, **kwargs): # noqa: ARG002 ignore_patterns=ignore_patterns, freeform_tags=freeform_tags, defined_tags=defined_tags, + ignore_model_artifact_check=ignore_model_artifact_check, ) ) diff --git a/ads/aqua/finetuning/finetuning.py b/ads/aqua/finetuning/finetuning.py index 2cc8aa789..45ea1335a 100644 --- a/ads/aqua/finetuning/finetuning.py +++ b/ads/aqua/finetuning/finetuning.py @@ -382,6 +382,9 @@ def create( defined_tags=model_defined_tags, ), ) + logger.debug( + f"Successfully updated model custom metadata list and freeform tags for the model {ft_model.id}." + ) self.update_model_provenance( model_id=ft_model.id, @@ -389,6 +392,9 @@ def create( training_id=ft_job_run.id ), ) + logger.debug( + f"Successfully updated model provenance for the model {ft_model.id}." + ) # tracks the shape and replica used for fine-tuning the service models telemetry_kwargs = ( @@ -564,7 +570,7 @@ def get_finetuning_config(self, model_id: str) -> Dict: config = self.get_config(model_id, AQUA_MODEL_FINETUNING_CONFIG) if not config: logger.debug( - f"Fine-tuning config for custom model: {model_id} is not available." + f"Fine-tuning config for custom model: {model_id} is not available. Use defaults." ) return config diff --git a/ads/aqua/model/entities.py b/ads/aqua/model/entities.py index 6dd5eba21..dfd1ac374 100644 --- a/ads/aqua/model/entities.py +++ b/ads/aqua/model/entities.py @@ -294,6 +294,7 @@ class ImportModelDetails(CLIBuilderMixin): ignore_patterns: Optional[List[str]] = None freeform_tags: Optional[dict] = None defined_tags: Optional[dict] = None + ignore_model_artifact_check: Optional[bool] = None def __post_init__(self): self._command = "model register" diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py index 8acf4229a..be8e08b85 100644 --- a/ads/aqua/model/model.py +++ b/ads/aqua/model/model.py @@ -20,7 +20,11 @@ InferenceContainerTypeFamily, Tags, ) -from ads.aqua.common.errors import AquaRuntimeError, AquaValueError +from ads.aqua.common.errors import ( + AquaFileNotFoundError, + AquaRuntimeError, + AquaValueError, +) from ads.aqua.common.utils import ( LifecycleStatus, _build_resource_identifier, @@ -162,7 +166,7 @@ def create( target_compartment = compartment_id or COMPARTMENT_OCID if service_model.compartment_id != ODSC_MODEL_COMPARTMENT_OCID: - logger.debug( + logger.info( f"Aqua Model {model_id} already exists in user's compartment." "Skipped copying." ) @@ -193,8 +197,8 @@ def create( # TODO: decide what kwargs will be needed. .create(model_by_reference=True, **kwargs) ) - logger.debug( - f"Aqua Model {custom_model.id} created with the service model {model_id}" + logger.info( + f"Aqua Model {custom_model.id} created with the service model {model_id}." ) # tracks unique models that were created in the user compartment @@ -225,11 +229,16 @@ def get(self, model_id: str, load_model_card: Optional[bool] = True) -> "AquaMod cached_item = self._service_model_details_cache.get(model_id) if cached_item: + logger.info(f"Fetching model details for model {model_id} from cache.") return cached_item + logger.info(f"Fetching model details for model {model_id}.") ds_model = DataScienceModel.from_id(model_id) if not self._if_show(ds_model): - raise AquaRuntimeError(f"Target model `{ds_model.id} `is not Aqua model.") + raise AquaRuntimeError( + f"Target model `{ds_model.id} `is not an Aqua model as it does not contain " + f"{Tags.AQUA_TAG} tag." + ) is_fine_tuned_model = bool( ds_model.freeform_tags @@ -248,16 +257,21 @@ def get(self, model_id: str, load_model_card: Optional[bool] = True) -> "AquaMod ds_model.custom_metadata_list._to_oci_metadata() ) if artifact_path != UNKNOWN: + model_card_path = ( + f"{artifact_path.rstrip('/')}/config/{README}" + if is_verified_type + else f"{artifact_path.rstrip('/')}/{README}" + ) model_card = str( read_file( - file_path=( - f"{artifact_path.rstrip('/')}/config/{README}" - if is_verified_type - else f"{artifact_path.rstrip('/')}/{README}" - ), + file_path=model_card_path, auth=default_signer(), ) ) + if not model_card: + logger.warn( + f"Model card for {model_id} is empty or could not be loaded from {model_card_path}." + ) inference_container = ds_model.custom_metadata_list.get( ModelCustomMetadataFields.DEPLOYMENT_CONTAINER, @@ -303,9 +317,10 @@ def get(self, model_id: str, load_model_card: Optional[bool] = True) -> "AquaMod try: jobrun_ocid = ds_model.provenance_metadata.training_id jobrun = self.ds_client.get_job_run(jobrun_ocid).data - except Exception: + except Exception as e: logger.debug( f"Missing jobrun information in the provenance metadata of the given model {model_id}." + f"\nError: {str(e)}" ) jobrun = None @@ -314,7 +329,10 @@ def get(self, model_id: str, load_model_card: Optional[bool] = True) -> "AquaMod FineTuningCustomMetadata.FT_SOURCE ).value except ValueError as e: - logger.debug(str(e)) + logger.debug( + f"Custom metadata is missing {FineTuningCustomMetadata.FT_SOURCE} key for " + f"model {model_id}.\nError: {str(e)}" + ) source_id = UNKNOWN try: @@ -322,7 +340,10 @@ def get(self, model_id: str, load_model_card: Optional[bool] = True) -> "AquaMod FineTuningCustomMetadata.FT_SOURCE_NAME ).value except ValueError as e: - logger.debug(str(e)) + logger.debug( + f"Custom metadata is missing {FineTuningCustomMetadata.FT_SOURCE_NAME} key for " + f"model {model_id}.\nError: {str(e)}" + ) source_name = UNKNOWN source_identifier = _build_resource_identifier( @@ -372,6 +393,7 @@ def delete_model(self, model_id): Tags.AQUA_FINE_TUNED_MODEL_TAG, None ) if is_registered_model or is_fine_tuned_model: + logger.info(f"Deleting model {model_id}.") return ds_model.delete() else: raise AquaRuntimeError( @@ -478,6 +500,7 @@ def edit_registered_model( freeform_tags=freeform_tags, ) AquaApp().update_model(id, update_model_details) + logger.info(f"Updated model details for the model {id}.") else: raise AquaRuntimeError("Only registered unverified models can be edited.") @@ -735,7 +758,7 @@ def list( ) logger.info( - f"Fetch {len(models)} model in compartment_id={compartment_id or ODSC_MODEL_COMPARTMENT_OCID}." + f"Fetched {len(models)} model in compartment_id={compartment_id or ODSC_MODEL_COMPARTMENT_OCID}." ) aqua_models = [] @@ -765,10 +788,12 @@ def clear_model_list_cache( dict with the key used, and True if cache has the key that needs to be deleted. """ res = {} - logger.info("Clearing _service_models_cache") with self._cache_lock: if ODSC_MODEL_COMPARTMENT_OCID in self._service_models_cache: self._service_models_cache.pop(key=ODSC_MODEL_COMPARTMENT_OCID) + logger.info( + f"Cleared models cache for service compartment {ODSC_MODEL_COMPARTMENT_OCID}." + ) res = { "key": { "compartment_id": ODSC_MODEL_COMPARTMENT_OCID, @@ -785,10 +810,10 @@ def clear_model_details_cache(self, model_id): dict with the key used, and True if cache has the key that needs to be deleted. """ res = {} - logger.info(f"Clearing _service_model_details_cache for {model_id}") with self._cache_lock: if model_id in self._service_model_details_cache: self._service_model_details_cache.pop(key=model_id) + logger.info(f"Clearing model details cache for model {model_id}.") res = {"key": {"model_id": model_id}, "cache_deleted": True} return res @@ -873,7 +898,8 @@ def _create_model_catalog_entry( metadata = ModelCustomMetadata() if not inference_container: raise AquaRuntimeError( - f"Require Inference container information. Model: {model_name} does not have associated inference container defaults. Check docs for more information on how to pass inference container." + f"Require Inference container information. Model: {model_name} does not have associated inference " + f"container defaults. Check docs for more information on how to pass inference container." ) metadata.add( key=AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME, @@ -948,7 +974,7 @@ def _create_model_catalog_entry( artifact_path = metadata.get(MODEL_BY_REFERENCE_OSS_PATH_KEY).value logger.info( f"Found model artifact in the service bucket. " - f"Using artifact from service bucket instead of {os_path}" + f"Using artifact from service bucket instead of {os_path}." ) # todo: implement generic copy_folder method @@ -980,7 +1006,7 @@ def _create_model_catalog_entry( .with_freeform_tags(**tags) .with_defined_tags(**(defined_tags or {})) ).create(model_by_reference=True) - logger.debug(model) + logger.debug(f"Created model catalog entry for the model:\n{model}") return model @staticmethod @@ -1000,13 +1026,23 @@ def get_model_files(os_path: str, model_format: ModelFormat) -> List[str]: # todo: revisit this logic to account for .bin files. In the current state, .bin and .safetensor models # are grouped in one category and validation checks for config.json files only. if model_format == ModelFormat.SAFETENSORS: + model_files.extend( + list_os_files_with_extension(oss_path=os_path, extension=".safetensors") + ) try: load_config( file_path=os_path, config_file_name=AQUA_MODEL_ARTIFACT_CONFIG, ) - except Exception: - pass + except Exception as ex: + message = ( + f"The model path {os_path} does not contain the file config.json. " + f"Please check if the path is correct or the model artifacts are available at this location." + ) + logger.warning( + f"{message}\n" + f"Details: {ex.reason if isinstance(ex, AquaFileNotFoundError) else str(ex)}\n" + ) else: model_files.append(AQUA_MODEL_ARTIFACT_CONFIG) @@ -1014,6 +1050,9 @@ def get_model_files(os_path: str, model_format: ModelFormat) -> List[str]: model_files.extend( list_os_files_with_extension(oss_path=os_path, extension=".gguf") ) + logger.debug( + f"Fetched {len(model_files)} model files from {os_path} for model format {model_format}." + ) return model_files @staticmethod @@ -1050,12 +1089,17 @@ def get_hf_model_files(model_name: str, model_format: ModelFormat) -> List[str]: for model_sibling in model_siblings: extension = pathlib.Path(model_sibling.rfilename).suffix[1:].upper() - if model_format == ModelFormat.SAFETENSORS: - if model_sibling.rfilename == AQUA_MODEL_ARTIFACT_CONFIG: - model_files.append(model_sibling.rfilename) - elif extension == model_format.value: + if ( + model_format == ModelFormat.SAFETENSORS + and model_sibling.rfilename == AQUA_MODEL_ARTIFACT_CONFIG + ): + model_files.append(model_sibling.rfilename) + if extension == model_format.value: model_files.append(model_sibling.rfilename) + logger.debug( + f"Fetched {len(model_files)} model files for the model {model_name} for model format {model_format}." + ) return model_files def _validate_model( @@ -1089,7 +1133,10 @@ def _validate_model( safetensors_model_files = self.get_hf_model_files( model_name, ModelFormat.SAFETENSORS ) - if safetensors_model_files: + if ( + safetensors_model_files + and AQUA_MODEL_ARTIFACT_CONFIG in safetensors_model_files + ): hf_download_config_present = True gguf_model_files = self.get_hf_model_files(model_name, ModelFormat.GGUF) else: @@ -1145,8 +1192,11 @@ def _validate_model( Tags.LICENSE: license_value, } validation_result.tags = hf_tags - except Exception: - pass + except Exception as ex: + logger.debug( + f"An error occurred while getting tag information for model {model_name}. " + f"Error: {str(ex)}" + ) validation_result.model_formats = model_formats @@ -1201,40 +1251,55 @@ def _validate_safetensor_format( model_name: str = None, ): if import_model_details.download_from_hf: - # validates config.json exists for safetensors model from hugginface - if not hf_download_config_present: + # validates config.json exists for safetensors model from huggingface + if not ( + hf_download_config_present + or import_model_details.ignore_model_artifact_check + ): raise AquaRuntimeError( f"The model {model_name} does not contain {AQUA_MODEL_ARTIFACT_CONFIG} file as required " f"by {ModelFormat.SAFETENSORS.value} format model." f" Please check if the model name is correct in Hugging Face repository." ) + validation_result.telemetry_model_name = model_name else: + # validate if config.json is available from object storage, and get model name for telemetry + model_config = None try: model_config = load_config( file_path=import_model_details.os_path, config_file_name=AQUA_MODEL_ARTIFACT_CONFIG, ) except Exception as ex: - logger.error( - f"Exception occurred while loading config file from {import_model_details.os_path}" - f"Exception message: {ex}" - ) - raise AquaRuntimeError( + message = ( f"The model path {import_model_details.os_path} does not contain the file config.json. " f"Please check if the path is correct or the model artifacts are available at this location." - ) from ex - else: + ) + if not import_model_details.ignore_model_artifact_check: + logger.error( + f"{message}\n" + f"Details: {ex.reason if isinstance(ex, AquaFileNotFoundError) else str(ex)}" + ) + raise AquaRuntimeError(message) from ex + else: + logger.warning( + f"{message}\n" + f"Proceeding with model registration as ignore_model_artifact_check field is set." + ) + + if verified_model: + # model_type validation, log message if metadata field doesn't match. try: metadata_model_type = verified_model.custom_metadata_list.get( AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE ).value - if metadata_model_type: + if metadata_model_type and model_config is not None: if AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE in model_config: if ( model_config[AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE] != metadata_model_type ): - raise AquaRuntimeError( + logger.debug( f"The {AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE} attribute in {AQUA_MODEL_ARTIFACT_CONFIG}" f" at {import_model_details.os_path} is invalid, expected {metadata_model_type} for " f"the model {model_name}. Please check if the path is correct or " @@ -1246,22 +1311,26 @@ def _validate_safetensor_format( f"Could not find {AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE} attribute in " f"{AQUA_MODEL_ARTIFACT_CONFIG}. Proceeding with model registration." ) - except Exception: - pass - if verified_model: - validation_result.telemetry_model_name = verified_model.display_name - elif ( - model_config is not None - and AQUA_MODEL_ARTIFACT_CONFIG_MODEL_NAME in model_config - ): - validation_result.telemetry_model_name = f"{AQUA_MODEL_TYPE_CUSTOM}_{model_config[AQUA_MODEL_ARTIFACT_CONFIG_MODEL_NAME]}" - elif ( - model_config is not None - and AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE in model_config - ): - validation_result.telemetry_model_name = f"{AQUA_MODEL_TYPE_CUSTOM}_{model_config[AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE]}" - else: - validation_result.telemetry_model_name = AQUA_MODEL_TYPE_CUSTOM + except Exception as ex: + # todo: raise exception if model_type doesn't match. Currently log message and pass since service + # models do not have this metadata. + logger.debug( + f"Error occurred while processing metadata for model {model_name}. " + f"Exception: {str(ex)}" + ) + validation_result.telemetry_model_name = verified_model.display_name + elif ( + model_config is not None + and AQUA_MODEL_ARTIFACT_CONFIG_MODEL_NAME in model_config + ): + validation_result.telemetry_model_name = f"{AQUA_MODEL_TYPE_CUSTOM}_{model_config[AQUA_MODEL_ARTIFACT_CONFIG_MODEL_NAME]}" + elif ( + model_config is not None + and AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE in model_config + ): + validation_result.telemetry_model_name = f"{AQUA_MODEL_TYPE_CUSTOM}_{model_config[AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE]}" + else: + validation_result.telemetry_model_name = AQUA_MODEL_TYPE_CUSTOM @staticmethod def _validate_gguf_format( @@ -1363,6 +1432,10 @@ def _download_model_from_hf( allow_patterns=allow_patterns, ignore_patterns=ignore_patterns, ) + # Upload to object storage and skip .cache/huggingface/ folder + logger.debug( + f"Uploading local artifacts from local directory {local_dir} to {os_path}." + ) # Upload to object storage model_artifact_path = upload_folder( os_path=os_path, @@ -1409,6 +1482,7 @@ def register( import_model_details.model.startswith("ocid") and "datasciencemodel" in import_model_details.model ): + logger.info(f"Fetching details for model {import_model_details.model}.") verified_model = DataScienceModel.from_id(import_model_details.model) else: # If users passes model name, check if there is model with the same name in the service model catalog. If it is there, then use that model @@ -1446,7 +1520,6 @@ def register( ).rstrip("/") else: artifact_path = import_model_details.os_path.rstrip("/") - # Create Model catalog entry with pass by reference ds_model = self._create_model_catalog_entry( os_path=artifact_path, @@ -1539,7 +1612,7 @@ def _rqs(self, compartment_id: str, model_type="FT", **kwargs): elif model_type == ModelType.BASE: filter_tag = Tags.BASE_MODEL_CUSTOM else: - raise ValueError( + raise AquaValueError( f"Model of type {model_type} is unknown. The values should be in {ModelType.values()}" ) @@ -1579,7 +1652,10 @@ def load_license(self, model_id: str) -> AquaModelLicense: oci_model = self.ds_client.get_model(model_id).data artifact_path = get_artifact_path(oci_model.custom_metadata_list) if not artifact_path: - raise AquaRuntimeError("Failed to get artifact path from custom metadata.") + raise AquaRuntimeError( + f"License could not be loaded. Failed to get artifact path from custom metadata for" + f"the model {model_id}." + ) content = str( read_file( @@ -1610,6 +1686,9 @@ def _find_matching_aqua_model(self, model_id: str) -> Optional[str]: for aqua_model_summary in aqua_model_list: if aqua_model_summary.name.lower() == model_id_lower: + logger.info( + f"Found matching verified model id {aqua_model_summary.id} for the model {model_id}" + ) return aqua_model_summary.id return None diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py index b7787ea21..c65858b53 100644 --- a/ads/aqua/modeldeployment/deployment.py +++ b/ads/aqua/modeldeployment/deployment.py @@ -1,8 +1,7 @@ #!/usr/bin/env python -# Copyright (c) 2024 Oracle and/or its affiliates. +# Copyright (c) 2024, 2025 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ -import logging import shlex from typing import Dict, List, Optional, Union @@ -271,7 +270,7 @@ def create( f"field. Either re-register the model with custom container URI, or set container_image_uri " f"parameter when creating this deployment." ) from err - logging.info( + logger.info( f"Aqua Image used for deploying {aqua_model.id} : {container_image_uri}" ) @@ -282,14 +281,14 @@ def create( default_cmd_var = shlex.split(cmd_var_string) if default_cmd_var: cmd_var = validate_cmd_var(default_cmd_var, cmd_var) - logging.info(f"CMD used for deploying {aqua_model.id} :{cmd_var}") + logger.info(f"CMD used for deploying {aqua_model.id} :{cmd_var}") except ValueError: - logging.debug( + logger.debug( f"CMD will be ignored for this deployment as {AQUA_DEPLOYMENT_CONTAINER_CMD_VAR_METADATA_NAME} " f"key is not available in the custom metadata field for this model." ) except Exception as e: - logging.error( + logger.error( f"There was an issue processing CMD arguments. Error: {str(e)}" ) @@ -385,7 +384,7 @@ def create( if key not in env_var: env_var.update(env) - logging.info(f"Env vars used for deploying {aqua_model.id} :{env_var}") + logger.info(f"Env vars used for deploying {aqua_model.id} :{env_var}") # Start model deployment # configure model deployment infrastructure @@ -440,10 +439,14 @@ def create( .with_runtime(container_runtime) ).deploy(wait_for_completion=False) + deployment_id = deployment.dsc_model_deployment.id + logger.info( + f"Aqua model deployment {deployment_id} created for model {aqua_model.id}." + ) model_type = ( AQUA_MODEL_TYPE_CUSTOM if is_fine_tuned_model else AQUA_MODEL_TYPE_SERVICE ) - deployment_id = deployment.dsc_model_deployment.id + # we arbitrarily choose last 8 characters of OCID to identify MD in telemetry telemetry_kwargs = {"ocid": get_ocid_substring(deployment_id, key_len=8)} @@ -539,6 +542,9 @@ def list(self, **kwargs) -> List["AquaDeployment"]: value=state, ) + logger.info( + f"Fetched {len(results)} model deployments from compartment_id={compartment_id}." + ) # tracks number of times deployment listing was called self.telemetry.record_event_async(category="aqua/deployment", action="list") @@ -546,18 +552,21 @@ def list(self, **kwargs) -> List["AquaDeployment"]: @telemetry(entry_point="plugin=deployment&action=delete", name="aqua") def delete(self, model_deployment_id: str): + logger.info(f"Deleting model deployment {model_deployment_id}.") return self.ds_client.delete_model_deployment( model_deployment_id=model_deployment_id ).data @telemetry(entry_point="plugin=deployment&action=deactivate", name="aqua") def deactivate(self, model_deployment_id: str): + logger.info(f"Deactivating model deployment {model_deployment_id}.") return self.ds_client.deactivate_model_deployment( model_deployment_id=model_deployment_id ).data @telemetry(entry_point="plugin=deployment&action=activate", name="aqua") def activate(self, model_deployment_id: str): + logger.info(f"Activating model deployment {model_deployment_id}.") return self.ds_client.activate_model_deployment( model_deployment_id=model_deployment_id ).data @@ -579,6 +588,8 @@ def get(self, model_deployment_id: str, **kwargs) -> "AquaDeploymentDetail": AquaDeploymentDetail: The instance of the Aqua model deployment details. """ + logger.info(f"Fetching model deployment details for {model_deployment_id}.") + model_deployment = self.ds_client.get_model_deployment( model_deployment_id=model_deployment_id, **kwargs ).data @@ -594,7 +605,8 @@ def get(self, model_deployment_id: str, **kwargs) -> "AquaDeploymentDetail": if not oci_aqua: raise AquaRuntimeError( - f"Target deployment {model_deployment_id} is not Aqua deployment." + f"Target deployment {model_deployment_id} is not Aqua deployment as it does not contain " + f"{Tags.AQUA_TAG} tag." ) log_id = "" @@ -652,7 +664,7 @@ def get_deployment_config(self, model_id: str) -> Dict: config = self.get_config(model_id, AQUA_MODEL_DEPLOYMENT_CONFIG) if not config: logger.debug( - f"Deployment config for custom model: {model_id} is not available." + f"Deployment config for custom model: {model_id} is not available. Use defaults." ) return config diff --git a/ads/cli.py b/ads/cli.py index 872e7d177..249920eef 100644 --- a/ads/cli.py +++ b/ads/cli.py @@ -1,12 +1,12 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*-- -# Copyright (c) 2021, 2024 Oracle and/or its affiliates. +# Copyright (c) 2021, 2025 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ +import logging import sys import traceback -from dataclasses import is_dataclass +import uuid import fire @@ -27,7 +27,7 @@ ) logger.debug(ex) logger.debug(traceback.format_exc()) - exit() + sys.exit() # https://packaging.python.org/en/latest/guides/single-sourcing-package-version/#single-sourcing-the-package-version if sys.version_info >= (3, 8): @@ -122,8 +122,9 @@ def exit_program(ex: Exception, logger: "logging.Logger") -> None: ... exit_program(e, logger) """ - logger.debug(traceback.format_exc()) - logger.error(str(ex)) + request_id = str(uuid.uuid4()) + logger.debug(f"Error Request ID: {request_id}\nError: {traceback.format_exc()}") + logger.error(f"Error Request ID: {request_id}\n" f"Error: {str(ex)}") exit_code = getattr(ex, "exit_code", 1) logger.error(f"Exit code: {exit_code}") diff --git a/tests/unitary/with_extras/aqua/test_cli.py b/tests/unitary/with_extras/aqua/test_cli.py index 6c3c97cc8..4a2c5aed5 100644 --- a/tests/unitary/with_extras/aqua/test_cli.py +++ b/tests/unitary/with_extras/aqua/test_cli.py @@ -1,12 +1,13 @@ #!/usr/bin/env python # -*- coding: utf-8 -*-- -# Copyright (c) 2024 Oracle and/or its affiliates. +# Copyright (c) 2024, 2025 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ import logging import os import subprocess +import uuid from importlib import reload from unittest import TestCase from unittest.mock import call, patch @@ -148,6 +149,7 @@ def test_aqua_cli(self, mock_logger, mock_aqua_command, mock_fire, mock_serializ ] ) @patch("sys.argv", ["ads", "aqua", "--error-option"]) + @patch("uuid.uuid4") @patch("fire.Fire") @patch("ads.aqua.cli.AquaCommand") @patch("ads.aqua.logger.error") @@ -162,11 +164,17 @@ def test_aqua_cli_with_error( mock_logger_error, mock_aqua_command, mock_fire, + mock_uuid, ): """Tests when Aqua Cli gracefully exit when error raised.""" mock_fire.side_effect = mock_side_effect from ads.cli import cli + uuid_value = "12345678-1234-5678-1234-567812345678" + mock_uuid.return_value = uuid.UUID(uuid_value) + expected_logging_message = type(expected_logging_message)( + f"Error Request ID: {uuid_value}\nError: {str(expected_logging_message)}" + ) cli() calls = [ call(expected_logging_message), diff --git a/tests/unitary/with_extras/aqua/test_handlers.py b/tests/unitary/with_extras/aqua/test_handlers.py index a4ae749e9..6cbffe23e 100644 --- a/tests/unitary/with_extras/aqua/test_handlers.py +++ b/tests/unitary/with_extras/aqua/test_handlers.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*-- -# Copyright (c) 2024 Oracle and/or its affiliates. +# Copyright (c) 2024, 2025 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ import json @@ -131,9 +131,13 @@ def test_finish(self, name, payload, expected_call, mock_super_finish): ), aqua_api_details=dict( aqua_api_name="TestDataset.create", - oci_api_name=TestDataset.mock_service_payload_create["operation_name"], - service_endpoint=TestDataset.mock_service_payload_create["request_endpoint"] - ) + oci_api_name=TestDataset.mock_service_payload_create[ + "operation_name" + ], + service_endpoint=TestDataset.mock_service_payload_create[ + "request_endpoint" + ], + ), ), "Authorization Failed: The resource you're looking for isn't accessible. Operation Name: get_job_run.", ], @@ -171,10 +175,13 @@ def test_write_error(self, name, input, expected_msg, mock_uuid, mock_logger): input.get("status_code"), ), value=input.get("reason"), - **aqua_api_details + **aqua_api_details, ) - - mock_logger.warning.assert_called_with(expected_msg) + error_message = ( + f"Error Request ID: {expected_reply['request_id']}\n" + f"Error: {expected_reply['message']} {expected_reply['reason']}" + ) + mock_logger.error.assert_called_with(error_message) class TestHandlers(unittest.TestCase): diff --git a/tests/unitary/with_extras/aqua/test_model.py b/tests/unitary/with_extras/aqua/test_model.py index 3eb732aae..4cd59afb9 100644 --- a/tests/unitary/with_extras/aqua/test_model.py +++ b/tests/unitary/with_extras/aqua/test_model.py @@ -937,10 +937,18 @@ def test_import_model_with_project_compartment_override( assert model.project_id == project_override @pytest.mark.parametrize( - "download_from_hf", - [True, False], + ("ignore_artifact_check", "download_from_hf"), + [ + (True, True), + (True, False), + (False, True), + (False, False), + (None, False), + (None, True), + ], ) @patch("ads.model.service.oci_datascience_model.OCIDataScienceModel.create") + @patch("ads.model.datascience_model.DataScienceModel.sync") @patch("ads.model.datascience_model.DataScienceModel.upload_artifact") @patch("ads.common.object_storage_details.ObjectStorageDetails.list_objects") @patch("ads.aqua.common.utils.load_config", side_effect=AquaFileNotFoundError) @@ -953,45 +961,65 @@ def test_import_model_with_missing_config( mock_load_config, mock_list_objects, mock_upload_artifact, + mock_sync, mock_ocidsc_create, - mock_get_container_config, + ignore_artifact_check, download_from_hf, mock_get_hf_model_info, mock_init_client, ): - """Test for validating if error is returned when model artifacts are incomplete or not available.""" - - os_path = "oci://aqua-bkt@aqua-ns/prefix/path" - model_name = "oracle/aqua-1t-mega-model" + my_model = "oracle/aqua-1t-mega-model" ObjectStorageDetails.is_bucket_versioned = MagicMock(return_value=True) - mock_list_objects.return_value = MagicMock(objects=[]) - reload(ads.aqua.model.model) - app = AquaModelApp() - app.list = MagicMock(return_value=[]) + # set object list from OSS without config.json + os_path = "oci://aqua-bkt@aqua-ns/prefix/path" + # set object list from HF without config.json if download_from_hf: - with pytest.raises(AquaValueError): - mock_get_hf_model_info.return_value.siblings = [] - with tempfile.TemporaryDirectory() as tmpdir: - model: AquaModel = app.register( - model=model_name, - os_path=os_path, - local_dir=str(tmpdir), - download_from_hf=True, - ) + mock_get_hf_model_info.return_value.siblings = [ + MagicMock(rfilename="model.safetensors") + ] else: - with pytest.raises(AquaRuntimeError): + obj1 = MagicMock(etag="12345-1234-1234-1234-123456789", size=150) + obj1.name = f"prefix/path/model.safetensors" + objects = [obj1] + mock_list_objects.return_value = MagicMock(objects=objects) + + reload(ads.aqua.model.model) + app = AquaModelApp() + with patch.object(AquaModelApp, "list") as aqua_model_mock_list: + aqua_model_mock_list.return_value = [ + AquaModelSummary( + id="test_id1", + name="organization1/name1", + organization="organization1", + ) + ] + + if ignore_artifact_check: model: AquaModel = app.register( - model=model_name, + model=my_model, os_path=os_path, - download_from_hf=False, + inference_container="odsc-vllm-or-tgi-container", + finetuning_container="odsc-llm-fine-tuning", + download_from_hf=download_from_hf, + ignore_model_artifact_check=ignore_artifact_check, ) + assert model.ready_to_deploy is True + else: + with pytest.raises(AquaRuntimeError): + model: AquaModel = app.register( + model=my_model, + os_path=os_path, + inference_container="odsc-vllm-or-tgi-container", + finetuning_container="odsc-llm-fine-tuning", + download_from_hf=download_from_hf, + ignore_model_artifact_check=ignore_artifact_check, + ) @patch("ads.model.service.oci_datascience_model.OCIDataScienceModel.create") @patch("ads.model.datascience_model.DataScienceModel.sync") @patch("ads.model.datascience_model.DataScienceModel.upload_artifact") @patch("ads.common.object_storage_details.ObjectStorageDetails.list_objects") - @patch.object(HfApi, "model_info") @patch("ads.aqua.common.utils.load_config", return_value={}) def test_import_any_model_smc_container( self, @@ -1247,6 +1275,15 @@ def test_import_model_with_input_tags( "--download_from_hf True --cleanup_model_cache True --inference_container odsc-vllm-serving --freeform_tags " '{"ftag1": "fvalue1", "ftag2": "fvalue2"} --defined_tags {"dtag1": "dvalue1", "dtag2": "dvalue2"}', ), + ( + { + "os_path": "oci://aqua-bkt@aqua-ns/path", + "model": "oracle/oracle-1it", + "inference_container": "odsc-vllm-serving", + "ignore_model_artifact_check": True, + }, + "ads aqua model register --model oracle/oracle-1it --os_path oci://aqua-bkt@aqua-ns/path --download_from_hf True --cleanup_model_cache True --inference_container odsc-vllm-serving --ignore_model_artifact_check True", + ), ], ) def test_import_cli(self, data, expected_output): diff --git a/tests/unitary/with_extras/aqua/test_model_handler.py b/tests/unitary/with_extras/aqua/test_model_handler.py index 391f6a19d..af9165f63 100644 --- a/tests/unitary/with_extras/aqua/test_model_handler.py +++ b/tests/unitary/with_extras/aqua/test_model_handler.py @@ -132,7 +132,7 @@ def test_list(self, mock_list): @parameterized.expand( [ - (None, None, False, None, None, None, None, None), + (None, None, False, None, None, None, None, None, True), ( "odsc-llm-fine-tuning", None, @@ -142,8 +142,9 @@ def test_list(self, mock_list): ["test.json"], None, None, + False, ), - (None, "test.gguf", True, None, ["*.json"], None, None, None), + (None, "test.gguf", True, None, ["*.json"], None, None, None, False), ( None, None, @@ -153,6 +154,7 @@ def test_list(self, mock_list): ["test.json"], None, None, + False, ), ( None, @@ -163,6 +165,7 @@ def test_list(self, mock_list): None, {"ftag1": "fvalue1"}, {"dtag1": "dvalue1"}, + False, ), ], ) @@ -178,6 +181,7 @@ def test_register( ignore_patterns, freeform_tags, defined_tags, + ignore_model_artifact_check, mock_register, mock_finish, ): @@ -201,6 +205,7 @@ def test_register( ignore_patterns=ignore_patterns, freeform_tags=freeform_tags, defined_tags=defined_tags, + ignore_model_artifact_check=ignore_model_artifact_check, ) ) result = self.model_handler.post() @@ -220,6 +225,7 @@ def test_register( ignore_patterns=ignore_patterns, freeform_tags=freeform_tags, defined_tags=defined_tags, + ignore_model_artifact_check=ignore_model_artifact_check, ) assert result["id"] == "test_id" assert result["inference_container"] == "odsc-tgi-serving"