-
Notifications
You must be signed in to change notification settings - Fork 60
[AQUA Telemetry] Update MD Tracking #1193
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
f6e3f00
619a925
2608622
6917c98
075d714
bc41862
5e84eaf
f3a9e9d
fc4c72a
fd09187
c0c072f
bf2953b
8b143ff
cabbfa4
c2b8d41
a96bb82
a58ea43
3938aac
8f2e0a0
4507162
6eeb6cb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,6 +8,8 @@ | |
| from typing import Dict, List, Optional, Union | ||
|
|
||
| from cachetools import TTLCache, cached | ||
| import concurrent | ||
| from ads.common.work_request import DataScienceWorkRequest | ||
| from oci.data_science.models import ModelDeploymentShapeSummary | ||
| from pydantic import ValidationError | ||
|
|
||
|
|
@@ -43,6 +45,8 @@ | |
| MODEL_BY_REFERENCE_OSS_PATH_KEY, | ||
| MODEL_NAME_DELIMITER, | ||
| UNKNOWN_DICT, | ||
| DEFAULT_WAIT_TIME, | ||
| DEFAULT_POLL_INTERVAL | ||
| ) | ||
| from ads.aqua.data import AquaResourceIdentifier | ||
| from ads.aqua.model import AquaModelApp | ||
|
|
@@ -80,6 +84,9 @@ | |
| from ads.model.model_metadata import ModelCustomMetadataItem | ||
| from ads.telemetry import telemetry | ||
|
|
||
| THREAD_POOL_SIZE = 16 | ||
|
||
| thread_pool = concurrent.futures.ThreadPoolExecutor(max_workers=THREAD_POOL_SIZE) | ||
|
||
|
|
||
|
|
||
| class AquaDeploymentApp(AquaApp): | ||
| """Provides a suite of APIs to interact with Aqua model deployments within the Oracle | ||
|
|
@@ -780,11 +787,18 @@ def _create_deployment( | |
| .with_runtime(container_runtime) | ||
| ).deploy(wait_for_completion=False) | ||
|
|
||
| deployment_id = deployment.id | ||
|
|
||
|
|
||
| deployment_id = deployment.id() | ||
|
||
| logger.info( | ||
| f"Aqua model deployment {deployment_id} created for model {aqua_model_id}." | ||
| ) | ||
|
|
||
| thread_pool.submit(self.get_deployment_status, | ||
| model_deployment_id=deployment_id, | ||
| work_request_id=deployment.dsc_model_deployment.workflow_req_id, | ||
| model_type=model_type) | ||
|
|
||
| # we arbitrarily choose last 8 characters of OCID to identify MD in telemetry | ||
| telemetry_kwargs = {"ocid": get_ocid_substring(deployment_id, key_len=8)} | ||
|
|
||
|
|
@@ -1309,4 +1323,33 @@ def list_shapes(self, **kwargs) -> List[ComputeShapeSummary]: | |
| or gpu_specs.shapes.get(oci_shape.name.upper()), | ||
| ) | ||
| for oci_shape in oci_shapes | ||
| ] | ||
| ] | ||
|
|
||
|
|
||
| def get_deployment_status(self,model_deployment_id: str, work_request_id : str, model_type : str) : | ||
|
||
|
|
||
| telemetry_kwargs = {"ocid": get_ocid_substring(model_deployment_id, key_len=8)} | ||
|
|
||
| try: | ||
| DataScienceWorkRequest(work_request_id).wait_work_request( | ||
| progress_bar_description="Creating model deployment", | ||
| max_wait_time=DEFAULT_WAIT_TIME, | ||
| poll_interval=DEFAULT_POLL_INTERVAL | ||
| ) | ||
| except Exception as e: | ||
| logger.error( | ||
| "Error while trying to create model deployment: " + str(e) | ||
| ) | ||
| self.telemetry.record_event_async( | ||
| category=f"aqua/{model_type}/deployment/status", | ||
| action="FAILED", | ||
| detail="Error creating model deployment" | ||
|
||
| **telemetry_kwargs | ||
| ) | ||
|
|
||
| self.telemetry.record_event_async( | ||
|
||
| category=f"aqua/{model_type}/deployment/status", | ||
| action="SUCCEEDED", | ||
| detail=" Create model deployment successful", | ||
|
||
| **telemetry_kwargs | ||
| ) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -38,6 +38,7 @@ def __init__( | |
| config: dict = None, | ||
| signer: Signer = None, | ||
| client_kwargs: dict = None, | ||
| _error_message: str = None, | ||
|
||
| **kwargs | ||
| ) -> None: | ||
| """Initializes ADSWorkRequest object. | ||
|
|
@@ -65,6 +66,7 @@ def __init__( | |
| self._description = description | ||
| self._percentage = 0 | ||
| self._status = None | ||
| _error_message = _error_message | ||
| super().__init__(config, signer, client_kwargs, **kwargs) | ||
|
|
||
|
|
||
|
|
@@ -78,6 +80,7 @@ def _sync(self): | |
| self._percentage= work_request.percent_complete | ||
| self._status = work_request.status | ||
| self._description = work_request_logs[-1].message if work_request_logs else "Processing" | ||
| if work_request.status == 'FAILED' : self._error_message = self.client.list_work_request_errors | ||
|
||
|
|
||
| def watch( | ||
| self, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
since these constants are specific to model deployment status, we could have it in the ads.aqua.modeldeployment.constants.py.