@@ -642,53 +642,47 @@ def handle(value, sapi):
642642 status = "failed"
643643 else :
644644 # We're running BARE METAL mode
645- # _XXX_: due to the queue service still being in java in the sandbox
646645 print ("gpu_usage before decision:" + str (gpu_usage ))
647- if gpu_usage == 0 :
648- sandbox_image_name = "microfn/sandbox" # default value
649- elif gpu_usage > 0 :
646+ if gpu_usage > 0 :
650647 sandbox_image_name = "microfn/sandbox_gpu" # sandbox uses GPU
651- if any (resource_info_map [res_name ]["runtime" ] == "Java" for res_name in resource_info_map ):
648+ elif any (resource_info_map [res_name ]["runtime" ] == "Java" for res_name in resource_info_map ):
652649 sandbox_image_name = "microfn/sandbox_java"
650+ else :
651+ sandbox_image_name = "microfn/sandbox" # default value
653652
654653 # TODO: intelligence on how to pick hosts
655654 hosts = sapi .get ("available_hosts" , True )
656655 # hostst is string representation of list or dict
657656 print ("available_hosts: " + hosts )
658657 hosts = json .loads (hosts )
659658
659+ deployed_hosts = {}
660660 if hosts is not None and hosts != "" and isinstance (hosts ,dict ):
661661 host_has_gpu = False
662- deployed_hosts = {}
663662 gpu_hosts = {}
664- picked_hosts = {}
663+ picked_hosts = None
665664 plain_hosts = {}
666- hostname_t = ""
667665 for hostname in hosts : # individual host dict
668- nodeHasGPU = hosts ["has_gpu" ] # check if host has a GPU
669- if hostname != "has_gpu" : # skip this key
670- hostname_t = hostname
671- #print("current hostnae: " + str(hostname) + str(hosts[hostname]))
672- hostip = hosts [hostname ]
673- plain_hosts [hostname ] = hostip # add to general hosts
674- if nodeHasGPU :
675- gpu_hosts [hostname ] = hostip # add to GPU hosts
676- hostname = hostname_t
666+ host_has_gpu = hosts [hostname ]["has_gpu" ] # check if host has a GPU
667+ hostip = hosts [hostname ]["ip" ]
668+ plain_hosts [hostname ] = hostip # add to general hosts
669+ if host_has_gpu :
670+ gpu_hosts [hostname ] = hostip # add to GPU hosts
677671 # instruct hosts to start the sandbox and deploy workflow
678672 print ("selected host:" + str (hostname ) + " " + str (hostip ))
679- #print("calulated host:" + str(gpu_hosts) + " " + str(plain_hosts))
680- if sandbox_image_name == "microfn/sandbox" or sandbox_image_name == "microfn/sandbox_java" : # can use any host
681- picked_hosts = plain_hosts
682- #hosts["has_gpu"] = False
683- #print("picked_hosts: " + str(picked_hosts))
684- elif len (gpu_hosts ) > 0 :
673+ #print("founds hosts:" + str(gpu_hosts) + " " + str(plain_hosts))
674+ if sandbox_image_name == "microfn/sandbox_gpu" and gpu_hosts :
685675 picked_hosts = gpu_hosts
686- else :
687- picked_hosts = plain_hosts # fallback as there are no gpu hosts available
688- print ("available GPU hosts list is empty. Deploying on general purpose host" )
676+ elif sandbox_image_name == "microfn/sandbox_gpu" :
677+ # can't deploy; no gpu hosts available.
678+ picked_hosts = {}
679+ elif sandbox_image_name == "microfn/sandbox" or sandbox_image_name == "microfn/sandbox_java" : # can use any host
680+ picked_hosts = plain_hosts
681+
682+ print ("picked_hosts: " + str (picked_hosts ))
689683
690684 for hostname in picked_hosts : # loop over all hosts, need to pich gpu hosts for python/gpu workflows
691- hostip = hosts [hostname ]
685+ hostip = hosts [hostname ][ "ip" ]
692686 host_to_deploy = (hostname , hostip )
693687 print ("host_to_deploy: " + str (host_to_deploy ) )
694688 #host_to_deploy = ("userslfu99", "192.168.8.99")
@@ -713,17 +707,14 @@ def handle(value, sapi):
713707 sapi .putMapEntry (workflow_info ["workflowId" ] + "_sandbox_status_map" , endpoint_key , json .dumps (sbinfo ), is_private = True )
714708 #endpoints = sapi.retrieveMap(workflow_info["workflowId"] + "_workflow_endpoints", True)
715709 #sapi.log(str(endpoints))
716- elif hosts is not None and hosts != "" and isinstance (hosts ,list ):
717- print ("hosts is not dict type!" )
718-
719- if not bool (deployed_hosts ):
720- status = "failed"
721- else :
722- #sapi.log("deployed on hosts: " + json.dumps(deployed_hosts))
723- sapi .put (email + "_workflow_hosts_" + workflow ["id" ], json .dumps (deployed_hosts ), True )
724710 else :
725- print ("available_hosts is empty. Not deploying" )
711+ print ("available_hosts is empty or not a dictionary; not deploying..." )
712+
713+ if not bool (deployed_hosts ):
726714 status = "failed"
715+ else :
716+ #sapi.log("deployed on hosts: " + json.dumps(deployed_hosts))
717+ sapi .put (email + "_workflow_hosts_" + workflow ["id" ], json .dumps (deployed_hosts ), True )
727718
728719 # Update workflow status
729720 wfmeta ["status" ] = status
0 commit comments