@@ -103,26 +103,6 @@ def job_client(self):
103103 )
104104 return self ._job_submission_client
105105
106- def evaluate_dispatch_priority (self ):
107- priority_class = self .config .dispatch_priority
108-
109- try :
110- config_check ()
111- api_instance = client .CustomObjectsApi (api_config_handler ())
112- priority_classes = api_instance .list_cluster_custom_object (
113- group = "scheduling.k8s.io" ,
114- version = "v1" ,
115- plural = "priorityclasses" ,
116- )
117- except Exception as e : # pragma: no cover
118- return _kube_api_error_handling (e )
119-
120- for pc in priority_classes ["items" ]:
121- if pc ["metadata" ]["name" ] == priority_class :
122- return pc ["value" ]
123- print (f"Priority class { priority_class } is not available in the cluster" )
124- return None
125-
126106 def validate_image_config (self ):
127107 """
128108 Validates that the image configuration is not empty.
@@ -152,18 +132,6 @@ def create_app_wrapper(self):
152132 self .validate_image_config ()
153133
154134 # Before attempting to create the cluster AW, let's evaluate the ClusterConfig
155- if self .config .dispatch_priority :
156- if not self .config .mcad :
157- raise ValueError (
158- "Invalid Cluster Configuration, cannot have dispatch priority without MCAD"
159- )
160- priority_val = self .evaluate_dispatch_priority ()
161- if priority_val == None :
162- raise ValueError (
163- "Invalid Cluster Configuration, AppWrapper not generated"
164- )
165- else :
166- priority_val = None
167135
168136 name = self .config .name
169137 namespace = self .config .namespace
@@ -178,12 +146,10 @@ def create_app_wrapper(self):
178146 workers = self .config .num_workers
179147 template = self .config .template
180148 image = self .config .image
181- instascale = self .config .instascale
182- mcad = self .config .mcad
149+ appwrapper = self .config .appwrapper
183150 instance_types = self .config .machine_types
184151 env = self .config .envs
185152 image_pull_secrets = self .config .image_pull_secrets
186- dispatch_priority = self .config .dispatch_priority
187153 write_to_file = self .config .write_to_file
188154 verify_tls = self .config .verify_tls
189155 local_queue = self .config .local_queue
@@ -202,13 +168,10 @@ def create_app_wrapper(self):
202168 workers = workers ,
203169 template = template ,
204170 image = image ,
205- instascale = instascale ,
206- mcad = mcad ,
171+ appwrapper = appwrapper ,
207172 instance_types = instance_types ,
208173 env = env ,
209174 image_pull_secrets = image_pull_secrets ,
210- dispatch_priority = dispatch_priority ,
211- priority_val = priority_val ,
212175 write_to_file = write_to_file ,
213176 verify_tls = verify_tls ,
214177 local_queue = local_queue ,
@@ -230,13 +193,13 @@ def up(self):
230193 try :
231194 config_check ()
232195 api_instance = client .CustomObjectsApi (api_config_handler ())
233- if self .config .mcad :
196+ if self .config .appwrapper :
234197 if self .config .write_to_file :
235198 with open (self .app_wrapper_yaml ) as f :
236199 aw = yaml .load (f , Loader = yaml .FullLoader )
237200 api_instance .create_namespaced_custom_object (
238201 group = "workload.codeflare.dev" ,
239- version = "v1beta1 " ,
202+ version = "v1beta2 " ,
240203 namespace = namespace ,
241204 plural = "appwrappers" ,
242205 body = aw ,
@@ -245,7 +208,7 @@ def up(self):
245208 aw = yaml .safe_load (self .app_wrapper_yaml )
246209 api_instance .create_namespaced_custom_object (
247210 group = "workload.codeflare.dev" ,
248- version = "v1beta1 " ,
211+ version = "v1beta2 " ,
249212 namespace = namespace ,
250213 plural = "appwrappers" ,
251214 body = aw ,
@@ -284,10 +247,10 @@ def down(self):
284247 try :
285248 config_check ()
286249 api_instance = client .CustomObjectsApi (api_config_handler ())
287- if self .config .mcad :
250+ if self .config .appwrapper :
288251 api_instance .delete_namespaced_custom_object (
289252 group = "workload.codeflare.dev" ,
290- version = "v1beta1 " ,
253+ version = "v1beta2 " ,
291254 namespace = namespace ,
292255 plural = "appwrappers" ,
293256 name = self .app_wrapper_name ,
@@ -306,30 +269,28 @@ def status(
306269 """
307270 ready = False
308271 status = CodeFlareClusterStatus .UNKNOWN
309- if self .config .mcad :
272+ if self .config .appwrapper :
310273 # check the app wrapper status
311274 appwrapper = _app_wrapper_status (self .config .name , self .config .namespace )
312275 if appwrapper :
313276 if appwrapper .status in [
314- AppWrapperStatus .RUNNING ,
315- AppWrapperStatus .COMPLETED ,
316- AppWrapperStatus .RUNNING_HOLD_COMPLETION ,
277+ AppWrapperStatus .RESUMING ,
278+ AppWrapperStatus .RESETTING ,
317279 ]:
318280 ready = False
319281 status = CodeFlareClusterStatus .STARTING
320282 elif appwrapper .status in [
321283 AppWrapperStatus .FAILED ,
322- AppWrapperStatus .DELETED ,
323284 ]:
324285 ready = False
325286 status = CodeFlareClusterStatus .FAILED # should deleted be separate
326287 return status , ready # exit early, no need to check ray status
327288 elif appwrapper .status in [
328- AppWrapperStatus .PENDING ,
329- AppWrapperStatus .QUEUEING ,
289+ AppWrapperStatus .SUSPENDED ,
290+ AppWrapperStatus .SUSPENDING ,
330291 ]:
331292 ready = False
332- if appwrapper .status == AppWrapperStatus .PENDING :
293+ if appwrapper .status == AppWrapperStatus .SUSPENDED :
333294 status = CodeFlareClusterStatus .QUEUED
334295 else :
335296 status = CodeFlareClusterStatus .QUEUEING
@@ -501,7 +462,7 @@ def job_logs(self, job_id: str) -> str:
501462
502463 def from_k8_cluster_object (
503464 rc ,
504- mcad = True ,
465+ appwrapper = True ,
505466 write_to_file = False ,
506467 verify_tls = True ,
507468 ):
@@ -534,11 +495,10 @@ def from_k8_cluster_object(
534495 "resources"
535496 ]["limits" ]["nvidia.com/gpu" ]
536497 ),
537- instascale = True if machine_types else False ,
538498 image = rc ["spec" ]["workerGroupSpecs" ][0 ]["template" ]["spec" ]["containers" ][
539499 0
540500 ]["image" ],
541- mcad = mcad ,
501+ appwrapper = appwrapper ,
542502 write_to_file = write_to_file ,
543503 verify_tls = verify_tls ,
544504 local_queue = rc ["metadata" ]
@@ -597,15 +557,15 @@ def list_all_clusters(namespace: str, print_to_console: bool = True):
597557 return clusters
598558
599559
600- def list_all_queued (namespace : str , print_to_console : bool = True , mcad : bool = False ):
560+ def list_all_queued (
561+ namespace : str , print_to_console : bool = True , appwrapper : bool = False
562+ ):
601563 """
602564 Returns (and prints by default) a list of all currently queued-up Ray Clusters
603565 in a given namespace.
604566 """
605- if mcad :
606- resources = _get_app_wrappers (
607- namespace , filter = [AppWrapperStatus .RUNNING , AppWrapperStatus .PENDING ]
608- )
567+ if appwrapper :
568+ resources = _get_app_wrappers (namespace , filter = [AppWrapperStatus .SUSPENDED ])
609569 if print_to_console :
610570 pretty_print .print_app_wrappers_status (resources )
611571 else :
@@ -675,10 +635,10 @@ def get_cluster(
675635
676636 for rc in rcs ["items" ]:
677637 if rc ["metadata" ]["name" ] == cluster_name :
678- mcad = _check_aw_exists (cluster_name , namespace )
638+ appwrapper = _check_aw_exists (cluster_name , namespace )
679639 return Cluster .from_k8_cluster_object (
680640 rc ,
681- mcad = mcad ,
641+ appwrapper = appwrapper ,
682642 write_to_file = write_to_file ,
683643 verify_tls = verify_tls ,
684644 )
@@ -721,7 +681,7 @@ def _check_aw_exists(name: str, namespace: str) -> bool:
721681 api_instance = client .CustomObjectsApi (api_config_handler ())
722682 aws = api_instance .list_namespaced_custom_object (
723683 group = "workload.codeflare.dev" ,
724- version = "v1beta1 " ,
684+ version = "v1beta2 " ,
725685 namespace = namespace ,
726686 plural = "appwrappers" ,
727687 )
@@ -781,7 +741,7 @@ def _app_wrapper_status(name, namespace="default") -> Optional[AppWrapper]:
781741 api_instance = client .CustomObjectsApi (api_config_handler ())
782742 aws = api_instance .list_namespaced_custom_object (
783743 group = "workload.codeflare.dev" ,
784- version = "v1beta1 " ,
744+ version = "v1beta2 " ,
785745 namespace = namespace ,
786746 plural = "appwrappers" ,
787747 )
@@ -851,7 +811,7 @@ def _get_app_wrappers(
851811 api_instance = client .CustomObjectsApi (api_config_handler ())
852812 aws = api_instance .list_namespaced_custom_object (
853813 group = "workload.codeflare.dev" ,
854- version = "v1beta1 " ,
814+ version = "v1beta2 " ,
855815 namespace = namespace ,
856816 plural = "appwrappers" ,
857817 )
@@ -945,18 +905,14 @@ def _map_to_ray_cluster(rc) -> Optional[RayCluster]:
945905
946906
947907def _map_to_app_wrapper (aw ) -> AppWrapper :
948- if "status" in aw and "canrun" in aw [ "status" ] :
908+ if "status" in aw :
949909 return AppWrapper (
950910 name = aw ["metadata" ]["name" ],
951- status = AppWrapperStatus (aw ["status" ]["state" ].lower ()),
952- can_run = aw ["status" ]["canrun" ],
953- job_state = aw ["status" ]["queuejobstate" ],
911+ status = AppWrapperStatus (aw ["status" ]["phase" ].lower ()),
954912 )
955913 return AppWrapper (
956914 name = aw ["metadata" ]["name" ],
957- status = AppWrapperStatus ("queueing" ),
958- can_run = False ,
959- job_state = "Still adding to queue" ,
915+ status = AppWrapperStatus ("suspended" ),
960916 )
961917
962918
0 commit comments