@@ -149,8 +149,6 @@ def compile_resource_info_map(resource_names, uploaded_resources, email, sapi, d
149149 resource_metadata = json .loads (resource_metadata )
150150 if "runtime" in resource_metadata :
151151 resource_info ["runtime" ] = resource_metadata ["runtime" ]
152- #if "num_gpu" in resource_metadata:
153- # print("RESOURCE_INFO: " + str(resource_info["num_gpu"]))
154152
155153 num_chunks_str = dlc .get ("grain_source_zip_num_chunks_" + resource_id )
156154 try :
@@ -261,7 +259,7 @@ def get_workflow_host_port(host_to_deploy, sid):
261259
262260 return success , host_port
263261
264- def create_k8s_deployment (email , workflow_info , runtime , management = False , use_gpus = 0 ):
262+ def create_k8s_deployment (email , workflow_info , runtime , gpu_usage , management = False ):
265263 # KUBERNETES MODE
266264 new_workflow_conf = {}
267265 conf_file = '/opt/mfn/SandboxAgent/conf/new_workflow.conf'
@@ -313,16 +311,17 @@ def create_k8s_deployment(email, workflow_info, runtime, management=False, use_g
313311 env .append ({'name' : 'WORKFLOWID' , 'value' : workflow_info ["workflowId" ]})
314312 env .append ({'name' : 'WORKFLOWNAME' , 'value' : workflow_info ["workflowName" ]})
315313
316- if use_gpus >= 0 :
317- #print("INSIDE K8S Deploy, num_gpu: " + str(workflow_info['num_gpu']))
318- #num_gpu = int(workflow_info['num_gpu'])
314+ # apply gpu_usage fraction to k8s deployment configuration
315+ use_gpus = gpu_usage
316+
317+ if not management and use_gpus >= 0 :
319318 # overwrite values from values.yaml for new workflows
320319 kservice ['spec' ]['template' ]['spec' ]['containers' ][0 ]['resources' ]['limits' ]['nvidia.com/gpu' ] = str (use_gpus )
321320 kservice ['spec' ]['template' ]['spec' ]['containers' ][0 ]['resources' ]['requests' ]['nvidia.com/gpu' ] = str (use_gpus )
322321 #kservice['spec']['template']['spec']['containers'][0]['image'] = "localhost:5000/microfn/sandbox"
323322 kservice ['spec' ]['template' ]['spec' ]['containers' ][0 ]['image' ] = "localhost:5000/microfn/sandbox_gpu"
324323
325- # Special handling for the management container
324+ # Special handling for the management container: never run on gpu
326325 if management :
327326 kservice ['spec' ]['template' ]['spec' ]['volumes' ] = [{ 'name' : 'new-workflow-conf' , 'configMap' : {'name' : new_workflow_conf ['configmap' ]}}]
328327 kservice ['spec' ]['template' ]['spec' ]['containers' ][0 ]['volumeMounts' ] = [{'name' : 'new-workflow-conf' , 'mountPath' : '/opt/mfn/SandboxAgent/conf' }]
@@ -421,13 +420,10 @@ def handle(value, sapi):
421420 workflow = data ["workflow" ]
422421 if "id" not in workflow :
423422 raise Exception ("malformed input" )
424- """
425- if "gpu_usage" not in workflow:
426- raise Exception("malformed input: no gpu_usage")
427- use_gpus = int(data['gpu_usage'])
428- """
429423 sapi .log (json .dumps (workflow ))
430424 wfmeta = sapi .get (email + "_workflow_" + workflow ["id" ], True )
425+ print ("WFMETA in deployWorkflow: " + str (wfmeta ))
426+
431427 if wfmeta is None or wfmeta == "" :
432428 raise Exception ("workflow metadata is not valid." )
433429 try :
@@ -498,16 +494,14 @@ def handle(value, sapi):
498494 else :
499495 runtime = "Python"
500496
501- """
502- if "num_gpu" in resource_info_map.keys():
503- print ("RESOURCE_INFO_MAP: " + str(resource_info_map))
504- workflow_info['num_gpu'] = resource_info_map['num_gpu']
497+ if "gpu_usage" in wfmeta and wfmeta ["gpu_usage" ] != "None" :
498+ gpu_usage = float (wfmeta ["gpu_usage" ])
505499 else :
506- workflow_info['num_gpu'] = 0
507- """
508- use_gpus = 0
500+ gpu_usage = 0.
501+
502+ #print("deduced gpu_usage: " + str(gpu_usage))
509503
510- url , endpoint_key = create_k8s_deployment (email , workflow_info , runtime , use_gpus )
504+ url , endpoint_key = create_k8s_deployment (email , workflow_info , runtime , gpu_usage )
511505 if url is not None and len (url ) > 0 :
512506 status = "deploying"
513507 sapi .addSetEntry (workflow_info ["workflowId" ] + "_workflow_endpoints" , str (url ), is_private = True )
@@ -522,9 +516,6 @@ def handle(value, sapi):
522516 # _XXX_: due to the queue service still being in java in the sandbox
523517
524518 sandbox_image_name = "microfn/sandbox" # default value
525- #if "on_gpu" in resource_info_map.keys(): # sandbox_gpu image should be used for ths workflow
526- # if resource_info_map["on_gpu"] == True:
527- # sandbox_image_name = "microfn/sandbox_gpu"
528519
529520 if any (resource_info_map [res_name ]["runtime" ] == "Java" for res_name in resource_info_map ):
530521 sandbox_image_name = "microfn/sandbox_java"
0 commit comments