Skip to content

Commit f336a2d

Browse files
authored
fix: use platform namespace for http route (#937)
1 parent 1d96cb2 commit f336a2d

File tree

3 files changed

+173
-52
lines changed

3 files changed

+173
-52
lines changed

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
[project]
22
name = "codeflare-sdk"
3-
version = "0.32.0"
3+
version = "0.32.2"
44

55
[tool.poetry]
66
name = "codeflare-sdk"
7-
version = "0.32.0"
7+
version = "0.32.2"
88
description = "Python SDK for codeflare client"
99

1010
license = "Apache-2.0"

src/codeflare_sdk/ray/cluster/cluster.py

Lines changed: 62 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,8 @@ def is_dashboard_ready(self) -> bool:
387387
388388
This method attempts to send a GET request to the cluster dashboard URI.
389389
If the request is successful (HTTP status code 200), it returns True.
390+
For OAuth-protected dashboards, a 302 redirect to the OAuth login page
391+
also indicates the dashboard is ready (the OAuth proxy is working).
390392
If an SSL error occurs, it returns False, indicating the dashboard is not ready.
391393
392394
Returns:
@@ -399,11 +401,14 @@ def is_dashboard_ready(self) -> bool:
399401
return False
400402

401403
try:
404+
# Don't follow redirects - we want to see the redirect response
405+
# A 302 redirect from OAuth proxy indicates the dashboard is ready
402406
response = requests.get(
403407
dashboard_uri,
404408
headers=self._client_headers,
405409
timeout=5,
406410
verify=self._client_verify_tls,
411+
allow_redirects=False,
407412
)
408413
except requests.exceptions.SSLError: # pragma no cover
409414
# SSL exception occurs when oauth ingress has been created but cluster is not up
@@ -412,7 +417,11 @@ def is_dashboard_ready(self) -> bool:
412417
# Any other exception (connection errors, timeouts, etc.)
413418
return False
414419

415-
if response.status_code == 200:
420+
# Dashboard is ready if:
421+
# - 200: Dashboard is accessible (no auth required or already authenticated)
422+
# - 302: OAuth redirect - dashboard and OAuth proxy are ready, just needs authentication
423+
# - 401/403: OAuth is working and blocking unauthenticated requests - dashboard is ready
424+
if response.status_code in (200, 302, 401, 403):
416425
return True
417426
else:
418427
return False
@@ -1153,36 +1162,68 @@ def _get_dashboard_url_from_httproute(
11531162
cluster_name: str, namespace: str
11541163
) -> Optional[str]:
11551164
"""
1156-
Attempts to get the Ray dashboard URL from an HTTPRoute resource.
1157-
This is used for RHOAI v3.0+ clusters that use Gateway API.
1158-
1165+
Get the Ray dashboard URL from an HTTPRoute (RHOAI v3.0+ Gateway API).
1166+
Searches for HTTPRoute labeled with ray.io/cluster-name and ray.io/cluster-namespace.
1167+
Returns the dashboard URL if found, or None to allow fallback to Routes/Ingress.
11591168
Args:
1160-
cluster_name: Name of the Ray cluster
1161-
namespace: Namespace of the Ray cluster
1162-
1169+
cluster_name: Ray cluster name
1170+
namespace: Ray cluster namespace
11631171
Returns:
1164-
Dashboard URL if HTTPRoute is found, None otherwise
1172+
Dashboard URL if found, else None
11651173
"""
11661174
try:
11671175
config_check()
11681176
api_instance = client.CustomObjectsApi(get_api_client())
11691177

1170-
# Try to get HTTPRoute for this Ray cluster
1178+
label_selector = (
1179+
f"ray.io/cluster-name={cluster_name},ray.io/cluster-namespace={namespace}"
1180+
)
1181+
1182+
# Try cluster-wide search first (if permissions allow)
11711183
try:
1172-
httproute = api_instance.get_namespaced_custom_object(
1184+
httproutes = api_instance.list_cluster_custom_object(
11731185
group="gateway.networking.k8s.io",
11741186
version="v1",
1175-
namespace=namespace,
11761187
plural="httproutes",
1177-
name=cluster_name,
1188+
label_selector=label_selector,
11781189
)
1179-
except client.exceptions.ApiException as e:
1180-
if e.status == 404:
1181-
# HTTPRoute not found - this is expected for SDK v0.31.1 and below or Kind clusters
1190+
items = httproutes.get("items", [])
1191+
if items:
1192+
httproute = items[0]
1193+
else:
1194+
# No HTTPRoute found
1195+
return None
1196+
except Exception:
1197+
# No cluster-wide permissions, try namespace-specific search
1198+
search_namespaces = [
1199+
"redhat-ods-applications",
1200+
"opendatahub",
1201+
"default",
1202+
"ray-system",
1203+
]
1204+
1205+
httproute = None
1206+
for ns in search_namespaces:
1207+
try:
1208+
httproutes = api_instance.list_namespaced_custom_object(
1209+
group="gateway.networking.k8s.io",
1210+
version="v1",
1211+
namespace=ns,
1212+
plural="httproutes",
1213+
label_selector=label_selector,
1214+
)
1215+
items = httproutes.get("items", [])
1216+
if items:
1217+
httproute = items[0]
1218+
break
1219+
except client.ApiException:
1220+
continue
1221+
1222+
if not httproute:
1223+
# No HTTPRoute found
11821224
return None
1183-
raise
11841225

1185-
# Get the Gateway reference from HTTPRoute
1226+
# Extract Gateway reference and construct dashboard URL
11861227
parent_refs = httproute.get("spec", {}).get("parentRefs", [])
11871228
if not parent_refs:
11881229
return None
@@ -1203,7 +1244,6 @@ def _get_dashboard_url_from_httproute(
12031244
name=gateway_name,
12041245
)
12051246

1206-
# Extract hostname from Gateway listeners
12071247
listeners = gateway.get("spec", {}).get("listeners", [])
12081248
if not listeners:
12091249
return None
@@ -1212,14 +1252,9 @@ def _get_dashboard_url_from_httproute(
12121252
if not hostname:
12131253
return None
12141254

1215-
# Construct the dashboard URL using RHOAI v3.0+ Gateway API pattern
1216-
# The HTTPRoute existence confirms v3.0+, so we use the standard path pattern
1217-
# Format: https://{hostname}/ray/{namespace}/{cluster-name}
1218-
protocol = "https" # Gateway API uses HTTPS
1219-
dashboard_url = f"{protocol}://{hostname}/ray/{namespace}/{cluster_name}"
1220-
1221-
return dashboard_url
1255+
# Construct dashboard URL: https://{hostname}/ray/{namespace}/{cluster-name}
1256+
return f"https://{hostname}/ray/{namespace}/{cluster_name}"
12221257

1223-
except Exception as e: # pragma: no cover
1224-
# If any error occurs, return None to fall back to OpenShift Route
1258+
except Exception: # pragma: no cover
1259+
# Any error means no HTTPRoute - fall back to Routes/Ingress
12251260
return None

src/codeflare_sdk/ray/cluster/test_cluster.py

Lines changed: 109 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -991,14 +991,22 @@ def test_get_dashboard_url_from_httproute(mocker):
991991
},
992992
}
993993

994-
# Mock the CustomObjectsApi to return HTTPRoute and Gateway
995-
def mock_get_namespaced_custom_object(group, version, namespace, plural, name):
994+
# Mock list_cluster_custom_object to return HTTPRoute (cluster-wide search)
995+
def mock_list_cluster_custom_object(group, version, plural, label_selector):
996996
if plural == "httproutes":
997-
return mock_httproute
998-
elif plural == "gateways":
997+
return {"items": [mock_httproute]}
998+
raise Exception("Unexpected plural")
999+
1000+
# Mock get_namespaced_custom_object to return Gateway
1001+
def mock_get_namespaced_custom_object(group, version, namespace, plural, name):
1002+
if plural == "gateways":
9991003
return mock_gateway
10001004
raise Exception("Unexpected plural")
10011005

1006+
mocker.patch(
1007+
"kubernetes.client.CustomObjectsApi.list_cluster_custom_object",
1008+
side_effect=mock_list_cluster_custom_object,
1009+
)
10021010
mocker.patch(
10031011
"kubernetes.client.CustomObjectsApi.get_namespaced_custom_object",
10041012
side_effect=mock_get_namespaced_custom_object,
@@ -1011,15 +1019,24 @@ def mock_get_namespaced_custom_object(group, version, namespace, plural, name):
10111019
)
10121020
assert result == expected_url, f"Expected {expected_url}, got {result}"
10131021

1014-
# Test HTTPRoute not found (404) - should return None
1015-
def mock_404_error(group, version, namespace, plural, name):
1016-
error = client.exceptions.ApiException(status=404)
1017-
error.status = 404
1018-
raise error
1022+
# Test HTTPRoute not found - should return None
1023+
def mock_list_cluster_empty(group, version, plural, label_selector):
1024+
if plural == "httproutes":
1025+
return {"items": []}
1026+
raise Exception("Unexpected plural")
1027+
1028+
def mock_list_namespaced_empty(group, version, namespace, plural, label_selector):
1029+
if plural == "httproutes":
1030+
return {"items": []}
1031+
raise Exception("Unexpected plural")
10191032

10201033
mocker.patch(
1021-
"kubernetes.client.CustomObjectsApi.get_namespaced_custom_object",
1022-
side_effect=mock_404_error,
1034+
"kubernetes.client.CustomObjectsApi.list_cluster_custom_object",
1035+
side_effect=mock_list_cluster_empty,
1036+
)
1037+
mocker.patch(
1038+
"kubernetes.client.CustomObjectsApi.list_namespaced_custom_object",
1039+
side_effect=mock_list_namespaced_empty,
10231040
)
10241041

10251042
result = _get_dashboard_url_from_httproute("nonexistent-cluster", "test-ns")
@@ -1031,14 +1048,14 @@ def mock_404_error(group, version, namespace, plural, name):
10311048
"spec": {"parentRefs": []}, # Empty parentRefs
10321049
}
10331050

1034-
def mock_httproute_no_parents_fn(group, version, namespace, plural, name):
1051+
def mock_list_cluster_no_parents(group, version, plural, label_selector):
10351052
if plural == "httproutes":
1036-
return mock_httproute_no_parents
1053+
return {"items": [mock_httproute_no_parents]}
10371054
raise Exception("Unexpected plural")
10381055

10391056
mocker.patch(
1040-
"kubernetes.client.CustomObjectsApi.get_namespaced_custom_object",
1041-
side_effect=mock_httproute_no_parents_fn,
1057+
"kubernetes.client.CustomObjectsApi.list_cluster_custom_object",
1058+
side_effect=mock_list_cluster_no_parents,
10421059
)
10431060

10441061
result = _get_dashboard_url_from_httproute("test-cluster", "test-ns")
@@ -1059,14 +1076,14 @@ def mock_httproute_no_parents_fn(group, version, namespace, plural, name):
10591076
},
10601077
}
10611078

1062-
def mock_httproute_no_name_fn(group, version, namespace, plural, name):
1079+
def mock_list_cluster_no_name(group, version, plural, label_selector):
10631080
if plural == "httproutes":
1064-
return mock_httproute_no_name
1081+
return {"items": [mock_httproute_no_name]}
10651082
raise Exception("Unexpected plural")
10661083

10671084
mocker.patch(
1068-
"kubernetes.client.CustomObjectsApi.get_namespaced_custom_object",
1069-
side_effect=mock_httproute_no_name_fn,
1085+
"kubernetes.client.CustomObjectsApi.list_cluster_custom_object",
1086+
side_effect=mock_list_cluster_no_name,
10701087
)
10711088

10721089
result = _get_dashboard_url_from_httproute("test-cluster", "test-ns")
@@ -1087,14 +1104,14 @@ def mock_httproute_no_name_fn(group, version, namespace, plural, name):
10871104
},
10881105
}
10891106

1090-
def mock_httproute_no_namespace_fn(group, version, namespace, plural, name):
1107+
def mock_list_cluster_no_namespace(group, version, plural, label_selector):
10911108
if plural == "httproutes":
1092-
return mock_httproute_no_namespace
1109+
return {"items": [mock_httproute_no_namespace]}
10931110
raise Exception("Unexpected plural")
10941111

10951112
mocker.patch(
1096-
"kubernetes.client.CustomObjectsApi.get_namespaced_custom_object",
1097-
side_effect=mock_httproute_no_namespace_fn,
1113+
"kubernetes.client.CustomObjectsApi.list_cluster_custom_object",
1114+
side_effect=mock_list_cluster_no_namespace,
10981115
)
10991116

11001117
result = _get_dashboard_url_from_httproute("test-cluster", "test-ns")
@@ -1183,6 +1200,75 @@ def mock_403_error(group, version, namespace, plural, name):
11831200
result is None
11841201
), "Should return None when non-404 exception occurs (caught by outer handler)"
11851202

1203+
# Real-world scenario: Cluster-wide permissions denied, falls back to namespace search
1204+
# This simulates a regular data scientist without cluster-admin permissions
1205+
call_count = {"cluster_wide": 0, "namespaced": 0}
1206+
1207+
def mock_list_cluster_permission_denied(group, version, plural, label_selector):
1208+
call_count["cluster_wide"] += 1
1209+
# Simulate permission denied for cluster-wide search
1210+
error = client.exceptions.ApiException(status=403)
1211+
error.status = 403
1212+
raise error
1213+
1214+
def mock_list_namespaced_success(group, version, namespace, plural, label_selector):
1215+
call_count["namespaced"] += 1
1216+
# First namespace fails, second succeeds (simulates opendatahub deployment)
1217+
if namespace == "redhat-ods-applications":
1218+
return {"items": []}
1219+
elif namespace == "opendatahub":
1220+
return {"items": [mock_httproute]}
1221+
return {"items": []}
1222+
1223+
mocker.patch(
1224+
"kubernetes.client.CustomObjectsApi.list_cluster_custom_object",
1225+
side_effect=mock_list_cluster_permission_denied,
1226+
)
1227+
mocker.patch(
1228+
"kubernetes.client.CustomObjectsApi.list_namespaced_custom_object",
1229+
side_effect=mock_list_namespaced_success,
1230+
)
1231+
mocker.patch(
1232+
"kubernetes.client.CustomObjectsApi.get_namespaced_custom_object",
1233+
side_effect=mock_get_namespaced_custom_object,
1234+
)
1235+
1236+
result = _get_dashboard_url_from_httproute("test-cluster", "test-ns")
1237+
expected_url = (
1238+
"https://data-science-gateway.apps.example.com/ray/test-ns/test-cluster"
1239+
)
1240+
assert result == expected_url, f"Expected {expected_url}, got {result}"
1241+
assert call_count["cluster_wide"] == 1, "Should try cluster-wide search first"
1242+
assert (
1243+
call_count["namespaced"] >= 2
1244+
), "Should fall back to namespace search and try multiple namespaces"
1245+
1246+
# Real-world scenario: Gateway not found (404) - should return None
1247+
# This can happen if Gateway was deleted but HTTPRoute still exists
1248+
def mock_list_cluster_with_httproute(group, version, plural, label_selector):
1249+
if plural == "httproutes":
1250+
return {"items": [mock_httproute]}
1251+
raise Exception("Unexpected plural")
1252+
1253+
def mock_get_gateway_404(group, version, namespace, plural, name):
1254+
if plural == "gateways":
1255+
error = client.exceptions.ApiException(status=404)
1256+
error.status = 404
1257+
raise error
1258+
raise Exception("Unexpected plural")
1259+
1260+
mocker.patch(
1261+
"kubernetes.client.CustomObjectsApi.list_cluster_custom_object",
1262+
side_effect=mock_list_cluster_with_httproute,
1263+
)
1264+
mocker.patch(
1265+
"kubernetes.client.CustomObjectsApi.get_namespaced_custom_object",
1266+
side_effect=mock_get_gateway_404,
1267+
)
1268+
1269+
result = _get_dashboard_url_from_httproute("test-cluster", "test-ns")
1270+
assert result is None, "Should return None when Gateway not found (404)"
1271+
11861272

11871273
def test_cluster_dashboard_uri_httproute_first(mocker):
11881274
"""

0 commit comments

Comments
 (0)