@@ -6,12 +6,8 @@ import (
66
77 . "github.com/onsi/gomega"
88 mcadv1beta1 "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/apis/controller/v1beta1"
9-
109 batchv1 "k8s.io/api/batch/v1"
1110 corev1 "k8s.io/api/core/v1"
12- "k8s.io/apimachinery/pkg/api/resource"
13- metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
14-
1511 . "github.com/project-codeflare/codeflare-operator/test/support"
1612)
1713
@@ -23,41 +19,13 @@ func TestInstascaleMachinePool(t *testing.T) {
2319 namespace := test .NewTestNamespace ()
2420
2521 // Test configuration
26- config := & corev1.ConfigMap {
27- TypeMeta : metav1.TypeMeta {
28- APIVersion : corev1 .SchemeGroupVersion .String (),
29- Kind : "ConfigMap" ,
30- },
31- ObjectMeta : metav1.ObjectMeta {
32- Name : "mnist-mcad" ,
33- Namespace : namespace .Name ,
34- },
35- BinaryData : map [string ][]byte {
36- // pip requirements
37- "requirements.txt" : ReadFile (test , "mnist_pip_requirements.txt" ),
38- // MNIST training script
39- "mnist.py" : ReadFile (test , "mnist.py" ),
40- },
41- Immutable : Ptr (true ),
42- }
43-
44- config , err := test .Client ().Core ().CoreV1 ().ConfigMaps (namespace .Name ).Create (test .Ctx (), config , metav1.CreateOptions {})
45- test .Expect (err ).NotTo (HaveOccurred ())
46- test .T ().Logf ("Created ConfigMap %s/%s successfully" , config .Namespace , config .Name )
22+ config , err := TestConfig (test , namespace .Name )
23+ test .Expect (err ).To (BeNil ())
4724
4825 //create OCM connection
49- instascaleOCMSecret , err := test .Client ().Core ().CoreV1 ().Secrets ("default" ).Get (test .Ctx (), "instascale-ocm-secret" , metav1.GetOptions {})
50- if err != nil {
51- test .T ().Errorf ("unable to retrieve instascale-ocm-secret - Error : %v" , err )
52- }
53- test .Expect (err ).NotTo (HaveOccurred ())
54- ocmToken := string (instascaleOCMSecret .Data ["token" ])
55- test .T ().Logf ("Retrieved Secret %s successfully" , instascaleOCMSecret .Name )
26+ connection , err := CreateConnection (test )
27+ test .Expect (err ).To (BeNil ())
5628
57- connection , err := CreateOCMConnection (ocmToken )
58- if err != nil {
59- test .T ().Errorf ("Unable to create ocm connection - Error : %v" , err )
60- }
6129 defer connection .Close ()
6230
6331 // check existing cluster machine pool resources
@@ -66,120 +34,9 @@ func TestInstascaleMachinePool(t *testing.T) {
6634 test .Expect (err ).NotTo (HaveOccurred ())
6735 test .Expect (foundMachinePool ).To (BeFalse ())
6836
69- // Batch Job
70- job := & batchv1.Job {
71- TypeMeta : metav1.TypeMeta {
72- APIVersion : batchv1 .SchemeGroupVersion .String (),
73- Kind : "Job" ,
74- },
75- ObjectMeta : metav1.ObjectMeta {
76- Name : "mnist" ,
77- Namespace : namespace .Name ,
78- },
79- Spec : batchv1.JobSpec {
80- Completions : Ptr (int32 (1 )),
81- Parallelism : Ptr (int32 (1 )),
82- Template : corev1.PodTemplateSpec {
83- Spec : corev1.PodSpec {
84- Containers : []corev1.Container {
85- {
86- Name : "job" ,
87- Image : GetPyTorchImage (),
88- Env : []corev1.EnvVar {
89- corev1.EnvVar {Name : "PYTHONUSERBASE" , Value : "/test2" },
90- },
91- Command : []string {"/bin/sh" , "-c" , "pip install -r /test/requirements.txt && torchrun /test/mnist.py" },
92- Args : []string {"$PYTHONUSERBASE" },
93- VolumeMounts : []corev1.VolumeMount {
94- {
95- Name : "test" ,
96- MountPath : "/test" ,
97- },
98- {
99- Name : "test2" ,
100- MountPath : "/test2" ,
101- },
102- },
103- WorkingDir : "/test2" ,
104- },
105- },
106- Volumes : []corev1.Volume {
107- {
108- Name : "test" ,
109- VolumeSource : corev1.VolumeSource {
110- ConfigMap : & corev1.ConfigMapVolumeSource {
111- LocalObjectReference : corev1.LocalObjectReference {
112- Name : config .Name ,
113- },
114- },
115- },
116- },
117- {
118- Name : "test2" ,
119- VolumeSource : corev1.VolumeSource {
120- EmptyDir : & corev1.EmptyDirVolumeSource {},
121- },
122- },
123- },
124- RestartPolicy : corev1 .RestartPolicyNever ,
125- },
126- },
127- },
128- }
129-
130- // create an appwrapper
131- aw := & mcadv1beta1.AppWrapper {
132- ObjectMeta : metav1.ObjectMeta {
133- Name : "test-instascale" ,
134- Namespace : namespace .Name ,
135- Labels : map [string ]string {
136- "orderedinstance" : "m5.xlarge_g4dn.xlarge" ,
137- },
138- },
139- Spec : mcadv1beta1.AppWrapperSpec {
140- AggrResources : mcadv1beta1.AppWrapperResourceList {
141- GenericItems : []mcadv1beta1.AppWrapperGenericResource {
142- {
143- CustomPodResources : []mcadv1beta1.CustomPodResourceTemplate {
144- {
145- Replicas : 1 ,
146- Requests : corev1.ResourceList {
147- corev1 .ResourceCPU : resource .MustParse ("250m" ),
148- corev1 .ResourceMemory : resource .MustParse ("512Mi" ),
149- "nvidia.com/gpu" : resource .MustParse ("1" ),
150- },
151- Limits : corev1.ResourceList {
152- corev1 .ResourceCPU : resource .MustParse ("500m" ),
153- corev1 .ResourceMemory : resource .MustParse ("1G" ),
154- "nvidia.com/gpu" : resource .MustParse ("1" ),
155- },
156- },
157- {
158- Replicas : 1 ,
159- Requests : corev1.ResourceList {
160- corev1 .ResourceCPU : resource .MustParse ("250m" ),
161- corev1 .ResourceMemory : resource .MustParse ("512Mi" ),
162- },
163- Limits : corev1.ResourceList {
164- corev1 .ResourceCPU : resource .MustParse ("500m" ),
165- corev1 .ResourceMemory : resource .MustParse ("1G" ),
166- },
167- },
168- },
169- GenericTemplate : Raw (test , job ),
170- CompletionStatus : "Complete" ,
171- },
172- },
173- },
174- },
175- }
176-
177- _ , err = test .Client ().MCAD ().WorkloadV1beta1 ().AppWrappers (namespace .Name ).Create (test .Ctx (), aw , metav1.CreateOptions {})
178- test .Expect (err ).NotTo (HaveOccurred ())
179- test .T ().Logf ("AppWrapper created successfully %s/%s" , aw .Namespace , aw .Name )
180-
181- test .Eventually (AppWrapper (test , namespace , aw .Name ), TestTimeoutShort ).
182- Should (WithTransform (AppWrapperState , Equal (mcadv1beta1 .AppWrapperStateActive )))
37+ // Setup batch job and AppWrapper
38+ job , aw , err := JobAppwrapperSetup (test , namespace , config )
39+ test .Expect (err ).To (BeNil ())
18340
18441 // time.Sleep is used twice throughout the test, each for 30 seconds. Can look into using sync package waitGroup instead if that makes more sense
18542 // wait for required resources to scale up before checking them again
0 commit comments