Skip to content

Commit 0caf069

Browse files
committed
test: add second dashboard test input, with 4 workers
1 parent 8483304 commit 0caf069

File tree

11 files changed

+1957
-0
lines changed

11 files changed

+1957
-0
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Run of ml/codeflare/tuning/glue from 20220627 with 4 ray workers
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
{
2+
"madwizard/apriori/use-gpu": "don't use gpus",
3+
"madwizard/apriori/arch": "x64",
4+
"madwizard/apriori/platform": "darwin",
5+
"madwizard/apriori/mac-installer": "Homebrew",
6+
"madwizard/apriori/in-terminal": "HTML",
7+
"Training####Fine Tuning": "Fine Tuning",
8+
"GLUE": "GLUE",
9+
"AWS####IBM": "AWS",
10+
"S3 Bucket for Run Data.expand([ -n \"$MC_CONFIG_DIR\" ] && mc -q --config-dir ${MC_CONFIG_DIR} ls s3 | awk '{print substr($NF, 1, length($NF) - 1)}', S3 Buckets)####separator####📁 Create a new bucket": "browsey",
11+
"Run Locally####Run on a Kubernetes Cluster": "Run on a Kubernetes Cluster",
12+
"Choose the bucket that contains your model and glue data.madwizard/apriori/platform": "Darwin",
13+
"expand(kubectl config get-contexts -o name, Kubernetes contexts)": "default/api-codeflare-train-v11-codeflare-openshift-com:6443/kube:admin",
14+
"expand([ -z ${KUBE_CONTEXT} ] && exit 1 || kubectl --context ${KUBE_CONTEXT} get ns -o name | grep -Ev 'openshift|kube-' | sed 's#namespace/##', Kubernetes namespaces)####Create a namespace": "nvidia-gpu-operator",
15+
"Number of CPUs####Number of GPUs####Minimum Workers####Maximum Workers####Worker Memory####Head Memory": "{\"Number of CPUs\":\"1\",\"Number of GPUs\":\"1\",\"Minimum Workers\":\"5\",\"Maximum Workers\":\"5\",\"Worker Memory\":\"32Gi\",\"Head Memory\":\"32Gi\"}",
16+
"Choose the bucket that contains your model and glue data.expand([ -n \"$MC_CONFIG_DIR\" ] && mc -q --config-dir ${MC_CONFIG_DIR} ls s3 | awk '{print substr($NF, 1, length($NF) - 1)}', S3 Buckets)####separator####📁 Create a new bucket": "browsey",
17+
"Choose your Model File.expand([ -n \"$MC_CONFIG_DIR\" ] && [ -n \"$S3_FILEPATH\" ] && [ -n \"$S3_FILEPATH${S3_BUCKET_SUFFIX}\" ] && mc -q --config-dir ${MC_CONFIG_DIR} ls \"s3/$S3_FILEPATH${S3_BUCKET_SUFFIX}\" | awk '{print $NF}', S3 Objects)": "roberta-base",
18+
"Choose your Glue Data File.expand([ -n \"$MC_CONFIG_DIR\" ] && [ -n \"$S3_FILEPATH\" ] && [ -n \"$S3_FILEPATH${S3_BUCKET_SUFFIX}\" ] && mc -q --config-dir ${MC_CONFIG_DIR} ls \"s3/$S3_FILEPATH${S3_BUCKET_SUFFIX}\" | awk '{print $NF}', S3 Objects)": "glue_data",
19+
"BERT": "BERT",
20+
"Example: Using Ray Tasks to Parallelize a Function####Example: Using Ray Actors to Parallelize a Class####Example: Creating and Transforming Datasets####Example: Training Using PyTorch####Example: Hyperparameter Tuning####Example: Serving a scikit-learn gradient boosting classifier": "Example: Using Ray Tasks to Parallelize a Function",
21+
"Number of CPUs####Number of GPUs": "{\"Number of CPUs\":4,\"Number of GPUs\":3}",
22+
"expand(echo ${A-error} ; echo ${B-4} ; echo ${C-5})": "3",
23+
"XXXXXX.11111####222222": "11111",
24+
"YYYYYY.11111####222222": "222222"
25+
}

tests/plugin-codeflare/dashboard/inputs/2/events/kubernetes.txt

Whitespace-only changes.

tests/plugin-codeflare/dashboard/inputs/2/job.json

Lines changed: 56 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
11f4cebe-c012-4467-b5d6-b3cf1fd69269

tests/plugin-codeflare/dashboard/inputs/2/logs/job.txt

Lines changed: 1135 additions & 0 deletions
Large diffs are not rendered by default.

tests/plugin-codeflare/dashboard/inputs/2/resources/gpu.txt

Lines changed: 420 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
Sample 2022-06-27T11:33:18-04:00
2+
NAME GPUCap GPUFree CPUCap CPUFree MemCap MemFree DiskCap DiskFree Type
3+
ip-10-0-130-33.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
4+
ip-10-0-131-199.ec2.internal <none> <none> 2 1500m 8149576Ki 6998600Ki 125293548Ki 115470533646 m4.large
5+
ip-10-0-131-59.ec2.internal <none> <none> 4 3500m 16407104Ki 15256128Ki 125293548Ki 115470533646 m4.xlarge
6+
ip-10-0-134-142.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
7+
ip-10-0-135-200.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
8+
ip-10-0-137-245.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
9+
ip-10-0-138-210.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
10+
ip-10-0-143-168.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
11+
ip-10-0-154-249.ec2.internal <none> <none> 2 1500m 8149576Ki 6998600Ki 125293548Ki 115470533646 m4.large
12+
ip-10-0-158-5.ec2.internal <none> <none> 4 3500m 16407108Ki 15256132Ki 125293548Ki 115470533646 m4.xlarge
13+
ip-10-0-167-175.ec2.internal <none> <none> 2 1500m 8149572Ki 6998596Ki 125293548Ki 115470533646 m4.large
14+
ip-10-0-169-105.ec2.internal <none> <none> 4 3500m 16407108Ki 15256132Ki 125293548Ki 115470533646 m4.xlarge
15+
16+
Sample 2022-06-27T11:33:44-04:00
17+
NAME GPUCap GPUFree CPUCap CPUFree MemCap MemFree DiskCap DiskFree Type
18+
ip-10-0-130-33.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
19+
ip-10-0-131-199.ec2.internal <none> <none> 2 1500m 8149576Ki 6998600Ki 125293548Ki 115470533646 m4.large
20+
ip-10-0-131-59.ec2.internal <none> <none> 4 3500m 16407104Ki 15256128Ki 125293548Ki 115470533646 m4.xlarge
21+
ip-10-0-134-142.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
22+
ip-10-0-135-200.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
23+
ip-10-0-137-245.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
24+
ip-10-0-138-210.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
25+
ip-10-0-143-168.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
26+
ip-10-0-154-249.ec2.internal <none> <none> 2 1500m 8149576Ki 6998600Ki 125293548Ki 115470533646 m4.large
27+
ip-10-0-158-5.ec2.internal <none> <none> 4 3500m 16407108Ki 15256132Ki 125293548Ki 115470533646 m4.xlarge
28+
ip-10-0-167-175.ec2.internal <none> <none> 2 1500m 8149572Ki 6998596Ki 125293548Ki 115470533646 m4.large
29+
ip-10-0-169-105.ec2.internal <none> <none> 4 3500m 16407108Ki 15256132Ki 125293548Ki 115470533646 m4.xlarge
30+
31+
Sample 2022-06-27T11:33:59-04:00
32+
NAME GPUCap GPUFree CPUCap CPUFree MemCap MemFree DiskCap DiskFree Type
33+
ip-10-0-130-33.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
34+
ip-10-0-131-199.ec2.internal <none> <none> 2 1500m 8149576Ki 6998600Ki 125293548Ki 115470533646 m4.large
35+
ip-10-0-131-59.ec2.internal <none> <none> 4 3500m 16407104Ki 15256128Ki 125293548Ki 115470533646 m4.xlarge
36+
ip-10-0-134-142.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
37+
ip-10-0-135-200.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
38+
ip-10-0-137-245.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
39+
ip-10-0-138-210.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
40+
ip-10-0-143-168.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
41+
ip-10-0-154-249.ec2.internal <none> <none> 2 1500m 8149576Ki 6998600Ki 125293548Ki 115470533646 m4.large
42+
ip-10-0-158-5.ec2.internal <none> <none> 4 3500m 16407108Ki 15256132Ki 125293548Ki 115470533646 m4.xlarge
43+
ip-10-0-167-175.ec2.internal <none> <none> 2 1500m 8149572Ki 6998596Ki 125293548Ki 115470533646 m4.large
44+
ip-10-0-169-105.ec2.internal <none> <none> 4 3500m 16407108Ki 15256132Ki 125293548Ki 115470533646 m4.xlarge
45+
46+
Sample 2022-06-27T11:34:14-04:00
47+
NAME GPUCap GPUFree CPUCap CPUFree MemCap MemFree DiskCap DiskFree Type
48+
ip-10-0-130-33.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
49+
ip-10-0-131-199.ec2.internal <none> <none> 2 1500m 8149576Ki 6998600Ki 125293548Ki 115470533646 m4.large
50+
ip-10-0-131-59.ec2.internal <none> <none> 4 3500m 16407104Ki 15256128Ki 125293548Ki 115470533646 m4.xlarge
51+
ip-10-0-134-142.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
52+
ip-10-0-135-200.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
53+
ip-10-0-137-245.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
54+
ip-10-0-138-210.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
55+
ip-10-0-143-168.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
56+
ip-10-0-154-249.ec2.internal <none> <none> 2 1500m 8149576Ki 6998600Ki 125293548Ki 115470533646 m4.large
57+
ip-10-0-158-5.ec2.internal <none> <none> 4 3500m 16407108Ki 15256132Ki 125293548Ki 115470533646 m4.xlarge
58+
ip-10-0-167-175.ec2.internal <none> <none> 2 1500m 8149572Ki 6998596Ki 125293548Ki 115470533646 m4.large
59+
ip-10-0-169-105.ec2.internal <none> <none> 4 3500m 16407108Ki 15256132Ki 125293548Ki 115470533646 m4.xlarge
60+
61+
Sample 2022-06-27T11:34:30-04:00
62+
NAME GPUCap GPUFree CPUCap CPUFree MemCap MemFree DiskCap DiskFree Type
63+
ip-10-0-130-33.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
64+
ip-10-0-131-199.ec2.internal <none> <none> 2 1500m 8149576Ki 6998600Ki 125293548Ki 115470533646 m4.large
65+
ip-10-0-131-59.ec2.internal <none> <none> 4 3500m 16407104Ki 15256128Ki 125293548Ki 115470533646 m4.xlarge
66+
ip-10-0-134-142.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
67+
ip-10-0-135-200.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
68+
ip-10-0-137-245.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
69+
ip-10-0-138-210.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
70+
ip-10-0-143-168.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
71+
ip-10-0-154-249.ec2.internal <none> <none> 2 1500m 8149576Ki 6998600Ki 125293548Ki 115470533646 m4.large
72+
ip-10-0-158-5.ec2.internal <none> <none> 4 3500m 16407108Ki 15256132Ki 125293548Ki 115470533646 m4.xlarge
73+
ip-10-0-167-175.ec2.internal <none> <none> 2 1500m 8149572Ki 6998596Ki 125293548Ki 115470533646 m4.large
74+
ip-10-0-169-105.ec2.internal <none> <none> 4 3500m 16407108Ki 15256132Ki 125293548Ki 115470533646 m4.xlarge
75+
76+
Sample 2022-06-27T11:34:45-04:00
77+
NAME GPUCap GPUFree CPUCap CPUFree MemCap MemFree DiskCap DiskFree Type
78+
ip-10-0-130-33.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
79+
ip-10-0-131-199.ec2.internal <none> <none> 2 1500m 8149576Ki 6998600Ki 125293548Ki 115470533646 m4.large
80+
ip-10-0-131-59.ec2.internal <none> <none> 4 3500m 16407104Ki 15256128Ki 125293548Ki 115470533646 m4.xlarge
81+
ip-10-0-134-142.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
82+
ip-10-0-135-200.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
83+
ip-10-0-137-245.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
84+
ip-10-0-138-210.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
85+
ip-10-0-143-168.ec2.internal 1 1 8 7500m 62855724Ki 61704748Ki 125293548Ki 115470533646 p3.2xlarge
86+
ip-10-0-154-249.ec2.internal <none> <none> 2 1500m 8149576Ki 6998600Ki 125293548Ki 115470533646 m4.large
87+
ip-10-0-158-5.ec2.internal <none> <none> 4 3500m 16407108Ki 15256132Ki 125293548Ki 115470533646 m4.xlarge
88+
ip-10-0-167-175.ec2.internal <none> <none> 2 1500m 8149572Ki 6998596Ki 125293548Ki 115470533646 m4.large
89+
ip-10-0-169-105.ec2.internal <none> <none> 4 3500m 16407108Ki 15256132Ki 125293548Ki 115470533646 m4.xlarge
90+
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
mycluster-ray-head-type-64jbz 5190090752 34359738368
2+
mycluster-ray-worker-type-wxg6j 19960578048 34359738368
3+
mycluster-ray-head-type-64jbz 5190791168 34359738368
4+
mycluster-ray-worker-type-wxg6j 20848730112 34359738368
5+
mycluster-ray-head-type-64jbz 5190770688 34359738368
6+
mycluster-ray-worker-type-wxg6j 21521006592 34359738368
7+
mycluster-ray-head-type-64jbz 5190729728 34359738368
8+
mycluster-ray-worker-type-wxg6j 20242448384 34359738368
9+
mycluster-ray-head-type-64jbz 5191041024 34359738368
10+
mycluster-ray-worker-type-wxg6j 21651267584 34359738368
11+
mycluster-ray-head-type-64jbz 5191806976 34359738368
12+
mycluster-ray-worker-type-wxg6j 23813095424 34359738368
13+
mycluster-ray-head-type-64jbz 5192212480 34359738368
14+
mycluster-ray-worker-type-wxg6j 23813267456 34359738368
15+
mycluster-ray-head-type-64jbz 5195247616 34359738368
16+
mycluster-ray-worker-type-wxg6j 23813160960 34359738368
17+
mycluster-ray-head-type-64jbz 5096464384 34359738368
18+
mycluster-ray-worker-type-wxg6j 20056743936 34359738368
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
Sample 2022-06-27T11:33:18-04:00
2+
NAME GPUReq CPUReq MemReq
3+
mycluster-ray-head-type-64jbz <none> 1 32Gi
4+
mycluster-ray-worker-type-7ch6t 1 1 32Gi
5+
mycluster-ray-worker-type-fn5j8 1 1 32Gi
6+
mycluster-ray-worker-type-pdtbg 1 1 32Gi
7+
mycluster-ray-worker-type-wxg6j 1 1 32Gi
8+
mycluster-ray-worker-type-xl9qd 1 1 32Gi
9+
10+
Sample 2022-06-27T11:33:34-04:00
11+
NAME GPUReq CPUReq MemReq
12+
mycluster-ray-head-type-64jbz <none> 1 32Gi
13+
mycluster-ray-worker-type-7ch6t 1 1 32Gi
14+
mycluster-ray-worker-type-fn5j8 1 1 32Gi
15+
mycluster-ray-worker-type-pdtbg 1 1 32Gi
16+
mycluster-ray-worker-type-wxg6j 1 1 32Gi
17+
mycluster-ray-worker-type-xl9qd 1 1 32Gi
18+
19+
Sample 2022-06-27T11:33:49-04:00
20+
NAME GPUReq CPUReq MemReq
21+
mycluster-ray-head-type-64jbz <none> 1 32Gi
22+
mycluster-ray-worker-type-7ch6t 1 1 32Gi
23+
mycluster-ray-worker-type-fn5j8 1 1 32Gi
24+
mycluster-ray-worker-type-pdtbg 1 1 32Gi
25+
mycluster-ray-worker-type-wxg6j 1 1 32Gi
26+
mycluster-ray-worker-type-xl9qd 1 1 32Gi
27+
28+
Sample 2022-06-27T11:34:04-04:00
29+
NAME GPUReq CPUReq MemReq
30+
mycluster-ray-head-type-64jbz <none> 1 32Gi
31+
mycluster-ray-worker-type-7ch6t 1 1 32Gi
32+
mycluster-ray-worker-type-fn5j8 1 1 32Gi
33+
mycluster-ray-worker-type-pdtbg 1 1 32Gi
34+
mycluster-ray-worker-type-wxg6j 1 1 32Gi
35+
mycluster-ray-worker-type-xl9qd 1 1 32Gi
36+
37+
Sample 2022-06-27T11:34:20-04:00
38+
NAME GPUReq CPUReq MemReq
39+
mycluster-ray-head-type-64jbz <none> 1 32Gi
40+
mycluster-ray-worker-type-7ch6t 1 1 32Gi
41+
mycluster-ray-worker-type-fn5j8 1 1 32Gi
42+
mycluster-ray-worker-type-pdtbg 1 1 32Gi
43+
mycluster-ray-worker-type-wxg6j 1 1 32Gi
44+
mycluster-ray-worker-type-xl9qd 1 1 32Gi
45+
46+
Sample 2022-06-27T11:34:35-04:00
47+
NAME GPUReq CPUReq MemReq
48+
mycluster-ray-head-type-64jbz <none> 1 32Gi
49+
mycluster-ray-worker-type-7ch6t 1 1 32Gi
50+
mycluster-ray-worker-type-fn5j8 1 1 32Gi
51+
mycluster-ray-worker-type-pdtbg 1 1 32Gi
52+
mycluster-ray-worker-type-wxg6j 1 1 32Gi
53+
mycluster-ray-worker-type-xl9qd 1 1 32Gi
54+
55+
Sample 2022-06-27T11:34:51-04:00
56+
NAME GPUReq CPUReq MemReq
57+
mycluster-ray-head-type-64jbz <none> 1 32Gi
58+
mycluster-ray-worker-type-7ch6t 1 1 32Gi
59+
mycluster-ray-worker-type-fn5j8 1 1 32Gi
60+
mycluster-ray-worker-type-pdtbg 1 1 32Gi
61+
mycluster-ray-worker-type-wxg6j 1 1 32Gi
62+
mycluster-ray-worker-type-xl9qd 1 1 32Gi
63+

0 commit comments

Comments
 (0)