Skip to content

Commit d5653e5

Browse files
committed
Add Shape-Specific Env Config for GPT-OSS Models in AQUA Deployment Config Reader
1 parent f38230a commit d5653e5

File tree

7 files changed

+157
-65
lines changed

7 files changed

+157
-65
lines changed

ads/aqua/common/utils.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import shutil
1515
import subprocess
1616
from datetime import datetime, timedelta
17-
from functools import wraps
17+
from functools import lru_cache, wraps
1818
from pathlib import Path
1919
from string import Template
2020
from typing import Any, Dict, List, Optional, Union
@@ -997,6 +997,45 @@ def get_container_params_type(container_type_name: str) -> str:
997997
return UNKNOWN
998998

999999

1000+
@lru_cache(maxsize=None)
1001+
def get_container_env_type(container_type_name: Optional[str]) -> str:
1002+
"""
1003+
Determine the container environment type based on the container type name.
1004+
1005+
This function matches the provided container type name against the known
1006+
values of `InferenceContainerType`. The check is case-insensitive and
1007+
allows for partial matches so that changes in container naming conventions
1008+
(e.g., prefixes or suffixes) will still be matched correctly.
1009+
1010+
Examples:
1011+
>>> get_container_env_type("odsc-vllm-serving")
1012+
'vllm'
1013+
>>> get_container_env_type("ODSC-TGI-Serving")
1014+
'tgi'
1015+
>>> get_container_env_type("custom-unknown-container")
1016+
'UNKNOWN'
1017+
1018+
Args:
1019+
container_type_name (Optional[str]):
1020+
The deployment container type name (e.g., "odsc-vllm-serving").
1021+
1022+
Returns:
1023+
str:
1024+
- A matching `InferenceContainerType` value string (e.g., "VLLM", "TGI", "LLAMA-CPP").
1025+
- `"UNKNOWN"` if no match is found or the input is empty/None.
1026+
"""
1027+
if not container_type_name:
1028+
return UNKNOWN
1029+
1030+
needle = container_type_name.strip().casefold()
1031+
1032+
for container_type in InferenceContainerType.values():
1033+
if container_type and container_type.casefold() in needle:
1034+
return container_type.upper()
1035+
1036+
return UNKNOWN
1037+
1038+
10001039
def get_restricted_params_by_container(container_type_name: str) -> set:
10011040
"""The utility function accepts the deployment container type name and returns a set of restricted params
10021041
for that container.

ads/aqua/modeldeployment/config_loader.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ class MultiModelConfig(Serializable):
8888
gpu_count (int, optional): Number of GPUs count to this model of this shape.
8989
parameters (Dict[str, str], optional): A dictionary of parameters (e.g., VLLM_PARAMS) to
9090
configure the behavior of a particular GPU shape.
91+
env (Dict[str, Dict[str, str]]): Environment variables grouped by namespace (e.g., "VLLM": {"VAR": "VAL"}).
9192
"""
9293

9394
gpu_count: Optional[int] = Field(
@@ -97,6 +98,10 @@ class MultiModelConfig(Serializable):
9798
default_factory=dict,
9899
description="Key-value pairs for GPU shape parameters (e.g., VLLM_PARAMS).",
99100
)
101+
env: Optional[Dict[str, Dict[str, str]]] = Field(
102+
default_factory=dict,
103+
description="Environment variables grouped by namespace",
104+
)
100105

101106
class Config:
102107
extra = "allow"
@@ -130,6 +135,7 @@ class ConfigurationItem(Serializable):
130135
configure the behavior of a particular GPU shape.
131136
multi_model_deployment (List[MultiModelConfig], optional): A list of multi model configuration details.
132137
shape_info (DeploymentShapeInfo, optional): The shape information to this model for specific CPU shape.
138+
env (Dict[str, Dict[str, str]]): Environment variables grouped by namespace (e.g., "VLLM": {"VAR": "VAL"}).
133139
"""
134140

135141
parameters: Optional[Dict[str, str]] = Field(
@@ -143,6 +149,10 @@ class ConfigurationItem(Serializable):
143149
default_factory=DeploymentShapeInfo,
144150
description="The shape information to this model for specific shape",
145151
)
152+
env: Optional[Dict[str, Dict[str, str]]] = Field(
153+
default_factory=dict,
154+
description="Environment variables grouped by namespace",
155+
)
146156

147157
class Config:
148158
extra = "allow"

ads/aqua/modeldeployment/deployment.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
build_pydantic_error_message,
2828
find_restricted_params,
2929
get_combined_params,
30+
get_container_env_type,
3031
get_container_params_type,
3132
get_ocid_substring,
3233
get_params_list,
@@ -1042,6 +1043,7 @@ def get_deployment_config(self, model_id: str) -> AquaDeploymentConfig:
10421043
config = self.get_config_from_metadata(
10431044
model_id, AquaModelMetadataKeys.DEPLOYMENT_CONFIGURATION
10441045
).config
1046+
10451047
if config:
10461048
logger.info(
10471049
f"Fetched {AquaModelMetadataKeys.DEPLOYMENT_CONFIGURATION} from defined metadata for model: {model_id}."
@@ -1126,7 +1128,7 @@ def get_deployment_default_params(
11261128
model_id: str,
11271129
instance_shape: str,
11281130
gpu_count: int = None,
1129-
) -> List[str]:
1131+
) -> Dict:
11301132
"""Gets the default params set in the deployment configs for the given model and instance shape.
11311133
11321134
Parameters
@@ -1148,6 +1150,7 @@ def get_deployment_default_params(
11481150
11491151
"""
11501152
default_params = []
1153+
default_envs = {}
11511154
config_params = {}
11521155
model = DataScienceModel.from_id(model_id)
11531156
try:
@@ -1157,19 +1160,15 @@ def get_deployment_default_params(
11571160
except ValueError:
11581161
container_type_key = UNKNOWN
11591162
logger.debug(
1160-
f"{AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME} key is not available in the custom metadata field for model {model_id}."
1163+
f"{AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME} key is not available in the "
1164+
f"custom metadata field for model {model_id}."
11611165
)
11621166

1163-
if (
1164-
container_type_key
1165-
and container_type_key in InferenceContainerTypeFamily.values()
1166-
):
1167+
if container_type_key:
11671168
deployment_config = self.get_deployment_config(model_id)
1168-
11691169
instance_shape_config = deployment_config.configuration.get(
11701170
instance_shape, ConfigurationItem()
11711171
)
1172-
11731172
if instance_shape_config.multi_model_deployment and gpu_count:
11741173
gpu_params = instance_shape_config.multi_model_deployment
11751174

@@ -1178,12 +1177,18 @@ def get_deployment_default_params(
11781177
config_params = gpu_config.parameters.get(
11791178
get_container_params_type(container_type_key), UNKNOWN
11801179
)
1180+
default_envs = instance_shape_config.env.get(
1181+
get_container_env_type(container_type_key), {}
1182+
)
11811183
break
11821184

11831185
else:
11841186
config_params = instance_shape_config.parameters.get(
11851187
get_container_params_type(container_type_key), UNKNOWN
11861188
)
1189+
default_envs = instance_shape_config.env.get(
1190+
get_container_env_type(container_type_key), {}
1191+
)
11871192

11881193
if config_params:
11891194
params_list = get_params_list(config_params)
@@ -1196,7 +1201,7 @@ def get_deployment_default_params(
11961201
if params.split()[0] not in restricted_params_set:
11971202
default_params.append(params)
11981203

1199-
return default_params
1204+
return {"data": default_params, "env": default_envs}
12001205

12011206
def validate_deployment_params(
12021207
self,

tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model_deployment_config.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,24 @@
11
{
22
"configuration": {
33
"BM.GPU.A100-v2.8": {
4+
"env": {},
45
"multi_model_deployment": [
56
{
7+
"env": {},
68
"gpu_count": 1,
79
"parameters": {
810
"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
911
}
1012
},
1113
{
14+
"env": {},
1215
"gpu_count": 2,
1316
"parameters": {
1417
"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
1518
}
1619
},
1720
{
21+
"env": {},
1822
"gpu_count": 8,
1923
"parameters": {
2024
"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
@@ -26,6 +30,7 @@
2630
}
2731
},
2832
"BM.GPU.H100.8": {
33+
"env": {},
2934
"multi_model_deployment": [
3035
{
3136
"gpu_count": 1
@@ -44,6 +49,7 @@
4449
"VM.GPU.A10.2": {
4550
"multi_model_deployment": [
4651
{
52+
"env": {},
4753
"gpu_count": 2,
4854
"parameters": {
4955
"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
@@ -52,8 +58,10 @@
5258
]
5359
},
5460
"VM.GPU.A10.4": {
61+
"env": {},
5562
"multi_model_deployment": [
5663
{
64+
"env": {},
5765
"gpu_count": 2,
5866
"parameters": {
5967
"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"

tests/unitary/with_extras/aqua/test_data/deployment/deployment_config.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
{
22
"configuration": {
33
"VM.GPU.A10.4": {
4+
"env": {
5+
"VLLM": {
6+
"VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1"
7+
}
8+
},
49
"parameters": {
510
"TGI_PARAMS": "--max-stop-sequences 6",
611
"VLLM_PARAMS": "--max-model-len 4096"
Lines changed: 53 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,58 @@
11
{
2-
"shape": [
3-
"VM.GPU.A10.1",
4-
"VM.GPU.A10.2",
5-
"BM.GPU.A10.4",
6-
"BM.GPU.L40S-NC.4"
7-
],
8-
"configuration": {
9-
"VM.GPU.A10.2": {
10-
"parameters": {
11-
"VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
12-
},
13-
"multi_model_deployment": [
14-
{
15-
"gpu_count": 1
16-
}
17-
]
18-
},
19-
"BM.GPU.A10.4": {
20-
"parameters": {
21-
"VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
22-
},
23-
"multi_model_deployment": [
24-
{
25-
"gpu_count": 1
26-
},
27-
{
28-
"gpu_count": 2
29-
}
30-
]
2+
"configuration": {
3+
"BM.GPU.A10.4": {
4+
"env": {
5+
"VLLM": {
6+
"VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1"
7+
}
8+
},
9+
"multi_model_deployment": [
10+
{
11+
"gpu_count": 1
3112
},
32-
"BM.GPU.L40S-NC.4": {
33-
"parameters": {
34-
"VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
35-
},
36-
"multi_model_deployment": [
37-
{
38-
"gpu_count": 2
39-
}
40-
]
13+
{
14+
"gpu_count": 2
15+
}
16+
],
17+
"parameters": {
18+
"VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
19+
}
20+
},
21+
"BM.GPU.L40S-NC.4": {
22+
"env": {
23+
"VLLM": {
24+
"VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1"
25+
}
26+
},
27+
"multi_model_deployment": [
28+
{
29+
"gpu_count": 2
30+
}
31+
],
32+
"parameters": {
33+
"VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
34+
}
35+
},
36+
"VM.GPU.A10.2": {
37+
"env": {
38+
"VLLM": {
39+
"VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1"
40+
}
41+
},
42+
"multi_model_deployment": [
43+
{
44+
"gpu_count": 1
4145
}
46+
],
47+
"parameters": {
48+
"VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
49+
}
4250
}
51+
},
52+
"shape": [
53+
"VM.GPU.A10.1",
54+
"VM.GPU.A10.2",
55+
"BM.GPU.A10.4",
56+
"BM.GPU.L40S-NC.4"
57+
]
4358
}

0 commit comments

Comments
 (0)