|
| 1 | +import yaml |
| 2 | +import sys |
| 3 | +import argparse |
| 4 | +import uuid |
| 5 | + |
| 6 | +def readTemplate(template): |
| 7 | + with open(template, "r") as stream: |
| 8 | + try: |
| 9 | + return yaml.safe_load(stream) |
| 10 | + except yaml.YAMLError as exc: |
| 11 | + print(exc) |
| 12 | + |
| 13 | +def gen_names(): |
| 14 | + gen_id = str(uuid.uuid4()) |
| 15 | + appwrapper_name = "appwrapper-" + gen_id |
| 16 | + cluster_name = "cluster-" + gen_id |
| 17 | + return appwrapper_name, cluster_name |
| 18 | + |
| 19 | +def update_names(yaml, item, appwrapper_name, cluster_name): |
| 20 | + metadata = yaml.get("metadata") |
| 21 | + metadata["name"] = appwrapper_name |
| 22 | + lower_meta = item.get("generictemplate", {}).get("metadata") |
| 23 | + lower_meta["labels"]["appwrapper.mcad.ibm.com"] = appwrapper_name |
| 24 | + lower_meta["name"] = cluster_name |
| 25 | + |
| 26 | +def updateCustompodresources(item, cpu, memory, gpu, workers): |
| 27 | + if 'custompodresources' in item.keys(): |
| 28 | + custompodresources = item.get('custompodresources') |
| 29 | + for resource in custompodresources: |
| 30 | + for k,v in resource.items(): |
| 31 | + if k == "replicas": |
| 32 | + resource[k] = workers |
| 33 | + if k == "requests" or k == "limits": |
| 34 | + for spec,_ in v.items(): |
| 35 | + if spec == "cpu": |
| 36 | + resource[k][spec] = cpu |
| 37 | + if spec == "memory": |
| 38 | + resource[k][spec] = str(memory) + "G" |
| 39 | + if spec == "nvidia.com/gpu": |
| 40 | + resource[k][spec] = gpu |
| 41 | + else: |
| 42 | + sys.exit("Error: malformed template") |
| 43 | + |
| 44 | +def update_affinity(spec, appwrapper_name): |
| 45 | + node_selector_terms = spec.get("affinity").get("nodeAffinity").get("requiredDuringSchedulingIgnoredDuringExecution").get("nodeSelectorTerms") |
| 46 | + node_selector_terms[0]["matchExpressions"][0]["values"][0] = appwrapper_name |
| 47 | + |
| 48 | +def update_resources(spec, cpu, memory, gpu): |
| 49 | + container = spec.get("containers") |
| 50 | + for resource in container: |
| 51 | + requests = resource.get('resources').get('requests') |
| 52 | + if requests is not None: |
| 53 | + requests["cpu"] = cpu |
| 54 | + requests["memory"] = str(memory) + "G" |
| 55 | + requests["nvidia.com/gpu"] = gpu |
| 56 | + limits = resource.get('resources').get('limits') |
| 57 | + if limits is not None: |
| 58 | + limits["cpu"] = cpu |
| 59 | + limits["memory"] = str(memory) + "G" |
| 60 | + limits["nvidia.com/gpu"] = gpu |
| 61 | + |
| 62 | +def update_nodes(item, appwrapper_name, cpu, memory, gpu, workers): |
| 63 | + if "generictemplate" in item.keys(): |
| 64 | + head = item.get("generictemplate").get("spec").get("headGroupSpec") |
| 65 | + worker = item.get("generictemplate").get("spec").get("workerGroupSpecs")[0] |
| 66 | + |
| 67 | + # Head counts as first worker |
| 68 | + worker["replicas"] = workers - 1 |
| 69 | + worker["minReplicas"] = workers - 1 |
| 70 | + worker["maxReplicas"] = workers - 1 |
| 71 | + |
| 72 | + for comp in [head, worker]: |
| 73 | + spec = comp.get("template").get("spec") |
| 74 | + update_affinity(spec, appwrapper_name) |
| 75 | + update_resources(spec, cpu, memory, gpu) |
| 76 | + |
| 77 | +def generateAppwrapper(cpu, memory, gpu, workers, template): |
| 78 | + user_yaml = readTemplate(template) |
| 79 | + appwrapper_name, cluster_name = gen_names() |
| 80 | + resources = user_yaml.get("spec","resources") |
| 81 | + item = resources["resources"].get("GenericItems")[0] |
| 82 | + update_names(user_yaml, item, appwrapper_name, cluster_name) |
| 83 | + updateCustompodresources(item, cpu, memory, gpu, workers) |
| 84 | + update_nodes(item, appwrapper_name, cpu, memory, gpu, workers) |
| 85 | + writeUserAppwrapper(user_yaml, appwrapper_name) |
| 86 | + |
| 87 | +def writeUserAppwrapper(user_yaml, appwrapper_name): |
| 88 | + with open(f'{appwrapper_name}.yaml','w') as outfile: |
| 89 | + yaml.dump(user_yaml, outfile, default_flow_style=False) |
| 90 | + |
| 91 | +def main(): |
| 92 | + parser = argparse.ArgumentParser(description='Generate user AppWrapper') |
| 93 | + parser.add_argument("--cpu", type=int, required=True, help="number of CPU(s) in a worker required for running job") |
| 94 | + parser.add_argument("--memory", required=True, help="RAM required in a worker for running job") |
| 95 | + parser.add_argument("--gpu",type=int, required=True, help="GPU(s) required in a worker for running job") |
| 96 | + parser.add_argument("--workers", type=int, required=True, help="How many workers are required in the cluster") |
| 97 | + parser.add_argument("--template", required=True, help="Template AppWrapper yaml file") |
| 98 | + |
| 99 | + args = parser.parse_args() |
| 100 | + cpu = args.cpu |
| 101 | + memory = args.memory |
| 102 | + gpu = args.gpu |
| 103 | + workers = args.workers |
| 104 | + template = args.template |
| 105 | + |
| 106 | + generateAppwrapper(cpu, memory, gpu, workers, template) |
| 107 | + |
| 108 | +if __name__=="__main__": |
| 109 | + main() |
0 commit comments