44
55
66import json
7+ import re
78import shlex
89import threading
910from datetime import datetime , timedelta
@@ -732,28 +733,18 @@ def _create_deployment(
732733 f"Aqua model deployment { deployment_id } created for model { aqua_model_id } . Work request Id is { deployment .dsc_model_deployment .workflow_req_id } "
733734 )
734735 status_list = []
735- progress_thread_1 = threading .Thread (
736- target = deployment .watch ,
737- args = (status_list ),
738- daemon = True ,
739- )
740- progress_thread_1 .start ()
741736
742- progress_thread_2 = threading .Thread (
737+ progress_thread = threading .Thread (
743738 target = self .get_deployment_status ,
744739 args = (
745740 deployment_id ,
746741 deployment .dsc_model_deployment .workflow_req_id ,
747742 model_type ,
748743 model_name ,
749- status_list ,
750744 ),
751745 daemon = True ,
752746 )
753- progress_thread_2 .start ()
754-
755- progress_thread_1 .join ()
756- progress_thread_2 .join ()
747+ progress_thread .start ()
757748
758749 # we arbitrarily choose last 8 characters of OCID to identify MD in telemetry
759750 telemetry_kwargs = {"ocid" : get_ocid_substring (deployment_id , key_len = 8 )}
@@ -1245,11 +1236,11 @@ def list_shapes(self, **kwargs) -> List[ComputeShapeSummary]:
12451236
12461237 def get_deployment_status (
12471238 self ,
1239+ deployment ,
12481240 model_deployment_id : str ,
12491241 work_request_id : str ,
12501242 model_type : str ,
12511243 model_name : str ,
1252- status_list : List [str ] = [],
12531244 ) -> None :
12541245 """Waits for the data science model deployment to be completed and log its status in telemetry.
12551246
@@ -1262,17 +1253,14 @@ def get_deployment_status(
12621253 The work request Id of the model deployment.
12631254 model_type: str
12641255 The type of aqua model to be deployed. Allowed values are: `custom`, `service` and `multi_model`.
1265- status_list: List[str]
1266- The list of status frmo streams the access and/or predict logs of model deployment.
12671256
12681257 Returns
12691258 -------
12701259 AquaDeployment
12711260 An Aqua deployment instance.
12721261 """
1273-
12741262 ocid = get_ocid_substring (model_deployment_id , key_len = 8 )
1275- telemetry_kwargs = { "ocid" : ocid , "model_name" : model_name }
1263+ status_list : List [ str ] = []
12761264
12771265 data_science_work_request : DataScienceWorkRequest = DataScienceWorkRequest (
12781266 work_request_id
@@ -1284,25 +1272,104 @@ def get_deployment_status(
12841272 max_wait_time = DEFAULT_WAIT_TIME ,
12851273 poll_interval = DEFAULT_POLL_INTERVAL ,
12861274 )
1287- except Exception :
1275+ predict_logs = deployment .tail_logs ("predict" )
1276+ access_logs = deployment .tail_logs ("access" )
1277+
1278+ status = ""
1279+ if access_logs and len (access_logs ) > 0 :
1280+ print ("access log list ############################" )
1281+ print (access_logs )
1282+ status = access_logs [0 ]["message" ]
1283+
1284+ if predict_logs and len (predict_logs ) > 0 :
1285+ print ("predict_logs ############################" )
1286+ print (predict_logs )
1287+ status += predict_logs [0 ]["message" ]
1288+
1289+ status = re .sub (r"[^a-zA-Z0-9]" , "" , status )
1290+ telemetry_kwargs = {
1291+ "ocid" : ocid ,
1292+ "model_name" : model_name ,
1293+ "status" : status ,
1294+ }
1295+ print (telemetry_kwargs )
1296+ print ("############################" )
1297+
1298+ self .telemetry .record_event (
1299+ category = f"aqua/{ model_type } /deployment/status" ,
1300+ action = "LAST_LOG" ,
1301+ # detail=error_str,
1302+ ** telemetry_kwargs ,
1303+ )
1304+
1305+ except Exception as e :
12881306 if data_science_work_request ._error_message :
12891307 error_str = ""
12901308 for error in data_science_work_request ._error_message :
12911309 error_str = error_str + " " + error .message
12921310
12931311 status = ""
1294- if len (status_list ) > 0 :
1295- status = status_list [- 1 ]
1296-
1297- telemetry_kwargs ["status" ] = status
1312+ predict_logs = deployment .tail_logs ("predict" )
1313+ access_logs = deployment .tail_logs ("access" )
1314+ if access_logs and len (access_logs ) > 0 :
1315+ print (access_logs )
1316+ status = access_logs [0 ]["message" ]
1317+
1318+ if predict_logs and len (predict_logs ) > 0 :
1319+ print ("predict_logs ############################" )
1320+ print (predict_logs )
1321+ status += predict_logs [0 ]["message" ]
1322+ status = re .sub (r"[^a-zA-Z0-9]" , "" , status )
1323+ error_str = re .sub (r"[^a-zA-Z0-9]" , "" , error_str )
1324+ telemetry_kwargs = {
1325+ "ocid" : ocid ,
1326+ "model_name" : model_name ,
1327+ "status" : error_str + " " + status ,
1328+ }
1329+ print (telemetry_kwargs )
1330+ print ("############################" )
12981331
12991332 self .telemetry .record_event (
13001333 category = f"aqua/{ model_type } /deployment/status" ,
13011334 action = "FAILED" ,
13021335 detail = error_str ,
13031336 ** telemetry_kwargs ,
13041337 )
1338+ else :
1339+ print (str (e ))
1340+ status = str (e )
1341+ predict_logs = deployment .tail_logs ("predict" )
1342+ access_logs = deployment .tail_logs ("access" )
1343+ if access_logs and len (access_logs ) > 0 :
1344+ print ("access log list ############################" )
1345+ print (access_logs )
1346+ status = access_logs [0 ]["message" ]
1347+
1348+ if predict_logs and len (predict_logs ) > 0 :
1349+ print ("predict_logs ############################" )
1350+ print (predict_logs )
1351+ status += predict_logs [0 ]["message" ]
1352+
1353+ status = re .sub (r"[^a-zA-Z0-9]" , "" , status )
1354+ error_str = re .sub (r"[^a-zA-Z0-9]" , "" , error_str )
1355+
1356+ telemetry_kwargs = {
1357+ "ocid" : ocid ,
1358+ "model_name" : model_name ,
1359+ "status" : error_str + " " + status ,
1360+ }
1361+ print (telemetry_kwargs )
1362+ print ("############################" )
1363+
1364+ self .telemetry .record_event (
1365+ category = f"aqua/{ model_type } /deployment/status" ,
1366+ action = "FAILED" ,
1367+ # detail=error_str,
1368+ ** telemetry_kwargs ,
1369+ )
1370+
13051371 else :
1372+ telemetry_kwargs = {"ocid" : ocid , "model_name" : model_name }
13061373 self .telemetry .record_event_async (
13071374 category = f"aqua/{ model_type } /deployment/status" ,
13081375 action = "SUCCEEDED" ,
0 commit comments