Skip to content

Commit 389d050

Browse files
authored
Merge branch 'main' into ODSC-74228/GPU-Shape-Recommendation
2 parents cce4d90 + 18e9b1c commit 389d050

File tree

11 files changed

+165
-65
lines changed

11 files changed

+165
-65
lines changed

ads/aqua/common/enums.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ class InferenceContainerTypeFamily(ExtendedEnum):
5858
AQUA_VLLM_LLAMA4_CONTAINER_FAMILY = "odsc-vllm-serving-llama4"
5959
AQUA_TGI_CONTAINER_FAMILY = "odsc-tgi-serving"
6060
AQUA_LLAMA_CPP_CONTAINER_FAMILY = "odsc-llama-cpp-serving"
61+
AQUA_VLLM_OPENAI_CONTAINER_FAMILY = "odsc-vllm-serving-openai"
6162

6263

6364
class CustomInferenceContainerTypeFamily(ExtendedEnum):

ads/aqua/common/utils.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -997,6 +997,44 @@ def get_container_params_type(container_type_name: str) -> str:
997997
return UNKNOWN
998998

999999

1000+
def get_container_env_type(container_type_name: Optional[str]) -> str:
1001+
"""
1002+
Determine the container environment type based on the container type name.
1003+
1004+
This function matches the provided container type name against the known
1005+
values of `InferenceContainerType`. The check is case-insensitive and
1006+
allows for partial matches so that changes in container naming conventions
1007+
(e.g., prefixes or suffixes) will still be matched correctly.
1008+
1009+
Examples:
1010+
>>> get_container_env_type("odsc-vllm-serving")
1011+
'vllm'
1012+
>>> get_container_env_type("ODSC-TGI-Serving")
1013+
'tgi'
1014+
>>> get_container_env_type("custom-unknown-container")
1015+
'UNKNOWN'
1016+
1017+
Args:
1018+
container_type_name (Optional[str]):
1019+
The deployment container type name (e.g., "odsc-vllm-serving").
1020+
1021+
Returns:
1022+
str:
1023+
- A matching `InferenceContainerType` value string (e.g., "VLLM", "TGI", "LLAMA-CPP").
1024+
- `"UNKNOWN"` if no match is found or the input is empty/None.
1025+
"""
1026+
if not container_type_name:
1027+
return UNKNOWN
1028+
1029+
needle = container_type_name.strip().casefold()
1030+
1031+
for container_type in InferenceContainerType.values():
1032+
if container_type and container_type.casefold() in needle:
1033+
return container_type.upper()
1034+
1035+
return UNKNOWN
1036+
1037+
10001038
def get_restricted_params_by_container(container_type_name: str) -> set:
10011039
"""The utility function accepts the deployment container type name and returns a set of restricted params
10021040
for that container.

ads/aqua/modeldeployment/config_loader.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ class MultiModelConfig(Serializable):
8888
gpu_count (int, optional): Number of GPUs count to this model of this shape.
8989
parameters (Dict[str, str], optional): A dictionary of parameters (e.g., VLLM_PARAMS) to
9090
configure the behavior of a particular GPU shape.
91+
env (Dict[str, Dict[str, str]]): Environment variables grouped by namespace (e.g., "VLLM": {"VAR": "VAL"}).
9192
"""
9293

9394
gpu_count: Optional[int] = Field(
@@ -97,6 +98,10 @@ class MultiModelConfig(Serializable):
9798
default_factory=dict,
9899
description="Key-value pairs for GPU shape parameters (e.g., VLLM_PARAMS).",
99100
)
101+
env: Optional[Dict[str, Dict[str, str]]] = Field(
102+
default_factory=dict,
103+
description="Environment variables grouped by namespace",
104+
)
100105

101106
class Config:
102107
extra = "allow"
@@ -130,6 +135,7 @@ class ConfigurationItem(Serializable):
130135
configure the behavior of a particular GPU shape.
131136
multi_model_deployment (List[MultiModelConfig], optional): A list of multi model configuration details.
132137
shape_info (DeploymentShapeInfo, optional): The shape information to this model for specific CPU shape.
138+
env (Dict[str, Dict[str, str]]): Environment variables grouped by namespace (e.g., "VLLM": {"VAR": "VAL"}).
133139
"""
134140

135141
parameters: Optional[Dict[str, str]] = Field(
@@ -143,6 +149,10 @@ class ConfigurationItem(Serializable):
143149
default_factory=DeploymentShapeInfo,
144150
description="The shape information to this model for specific shape",
145151
)
152+
env: Optional[Dict[str, Dict[str, str]]] = Field(
153+
default_factory=dict,
154+
description="Environment variables grouped by namespace",
155+
)
146156

147157
class Config:
148158
extra = "allow"

ads/aqua/modeldeployment/deployment.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
build_pydantic_error_message,
2929
find_restricted_params,
3030
get_combined_params,
31+
get_container_env_type,
3132
get_container_params_type,
3233
get_ocid_substring,
3334
get_params_list,
@@ -390,6 +391,7 @@ def _create(
390391
Tags.AQUA_SERVICE_MODEL_TAG,
391392
Tags.AQUA_FINE_TUNED_MODEL_TAG,
392393
Tags.AQUA_TAG,
394+
Tags.BASE_MODEL_CUSTOM,
393395
]:
394396
if tag in aqua_model.freeform_tags:
395397
tags[tag] = aqua_model.freeform_tags[tag]
@@ -1051,6 +1053,7 @@ def get_deployment_config(self, model_id: str) -> AquaDeploymentConfig:
10511053
config = self.get_config_from_metadata(
10521054
model_id, AquaModelMetadataKeys.DEPLOYMENT_CONFIGURATION
10531055
).config
1056+
10541057
if config:
10551058
logger.info(
10561059
f"Fetched {AquaModelMetadataKeys.DEPLOYMENT_CONFIGURATION} from defined metadata for model: {model_id}."
@@ -1135,7 +1138,7 @@ def get_deployment_default_params(
11351138
model_id: str,
11361139
instance_shape: str,
11371140
gpu_count: int = None,
1138-
) -> List[str]:
1141+
) -> Dict:
11391142
"""Gets the default params set in the deployment configs for the given model and instance shape.
11401143
11411144
Parameters
@@ -1157,6 +1160,7 @@ def get_deployment_default_params(
11571160
11581161
"""
11591162
default_params = []
1163+
default_envs = {}
11601164
config_params = {}
11611165
model = DataScienceModel.from_id(model_id)
11621166
try:
@@ -1166,19 +1170,15 @@ def get_deployment_default_params(
11661170
except ValueError:
11671171
container_type_key = UNKNOWN
11681172
logger.debug(
1169-
f"{AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME} key is not available in the custom metadata field for model {model_id}."
1173+
f"{AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME} key is not available in the "
1174+
f"custom metadata field for model {model_id}."
11701175
)
11711176

1172-
if (
1173-
container_type_key
1174-
and container_type_key in InferenceContainerTypeFamily.values()
1175-
):
1177+
if container_type_key:
11761178
deployment_config = self.get_deployment_config(model_id)
1177-
11781179
instance_shape_config = deployment_config.configuration.get(
11791180
instance_shape, ConfigurationItem()
11801181
)
1181-
11821182
if instance_shape_config.multi_model_deployment and gpu_count:
11831183
gpu_params = instance_shape_config.multi_model_deployment
11841184

@@ -1187,12 +1187,18 @@ def get_deployment_default_params(
11871187
config_params = gpu_config.parameters.get(
11881188
get_container_params_type(container_type_key), UNKNOWN
11891189
)
1190+
default_envs = instance_shape_config.env.get(
1191+
get_container_env_type(container_type_key), {}
1192+
)
11901193
break
11911194

11921195
else:
11931196
config_params = instance_shape_config.parameters.get(
11941197
get_container_params_type(container_type_key), UNKNOWN
11951198
)
1199+
default_envs = instance_shape_config.env.get(
1200+
get_container_env_type(container_type_key), {}
1201+
)
11961202

11971203
if config_params:
11981204
params_list = get_params_list(config_params)
@@ -1205,7 +1211,7 @@ def get_deployment_default_params(
12051211
if params.split()[0] not in restricted_params_set:
12061212
default_params.append(params)
12071213

1208-
return default_params
1214+
return {"data": default_params, "env": default_envs}
12091215

12101216
def validate_deployment_params(
12111217
self,

ads/aqua/modeldeployment/entities.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ class CreateModelDeploymentDetails(BaseModel):
233233
None, description="The description of the deployment."
234234
)
235235
model_id: Optional[str] = Field(None, description="The model OCID to deploy.")
236+
236237
models: Optional[List[AquaMultiModelRef]] = Field(
237238
None, description="List of models for multimodel deployment."
238239
)

docs/source/release_notes.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22
Release Notes
33
=============
44

5+
2.13.17
6+
-------
7+
Release date: Aug 10, 2025
8+
9+
* AI Quick Actions enhancements.
10+
511
2.13.16
612
-------
713
Release date: Jul 16, 2025

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ build-backend = "flit_core.buildapi"
2121

2222
# Required
2323
name = "oracle_ads" # the install (PyPI) name; name for local build in [tool.flit.module] section below
24-
version = "2.13.16"
24+
version = "2.13.17"
2525

2626
# Optional
2727
description = "Oracle Accelerated Data Science SDK"

tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model_deployment_config.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,24 @@
11
{
22
"configuration": {
33
"BM.GPU.A100-v2.8": {
4+
"env": {},
45
"multi_model_deployment": [
56
{
7+
"env": {},
68
"gpu_count": 1,
79
"parameters": {
810
"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
911
}
1012
},
1113
{
14+
"env": {},
1215
"gpu_count": 2,
1316
"parameters": {
1417
"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
1518
}
1619
},
1720
{
21+
"env": {},
1822
"gpu_count": 8,
1923
"parameters": {
2024
"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
@@ -26,6 +30,7 @@
2630
}
2731
},
2832
"BM.GPU.H100.8": {
33+
"env": {},
2934
"multi_model_deployment": [
3035
{
3136
"gpu_count": 1
@@ -44,6 +49,7 @@
4449
"VM.GPU.A10.2": {
4550
"multi_model_deployment": [
4651
{
52+
"env": {},
4753
"gpu_count": 2,
4854
"parameters": {
4955
"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
@@ -52,8 +58,10 @@
5258
]
5359
},
5460
"VM.GPU.A10.4": {
61+
"env": {},
5562
"multi_model_deployment": [
5663
{
64+
"env": {},
5765
"gpu_count": 2,
5866
"parameters": {
5967
"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"

tests/unitary/with_extras/aqua/test_data/deployment/deployment_config.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
{
22
"configuration": {
33
"VM.GPU.A10.4": {
4+
"env": {
5+
"VLLM": {
6+
"VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1"
7+
}
8+
},
49
"parameters": {
510
"TGI_PARAMS": "--max-stop-sequences 6",
611
"VLLM_PARAMS": "--max-model-len 4096"
Lines changed: 53 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,58 @@
11
{
2-
"shape": [
3-
"VM.GPU.A10.1",
4-
"VM.GPU.A10.2",
5-
"BM.GPU.A10.4",
6-
"BM.GPU.L40S-NC.4"
7-
],
8-
"configuration": {
9-
"VM.GPU.A10.2": {
10-
"parameters": {
11-
"VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
12-
},
13-
"multi_model_deployment": [
14-
{
15-
"gpu_count": 1
16-
}
17-
]
18-
},
19-
"BM.GPU.A10.4": {
20-
"parameters": {
21-
"VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
22-
},
23-
"multi_model_deployment": [
24-
{
25-
"gpu_count": 1
26-
},
27-
{
28-
"gpu_count": 2
29-
}
30-
]
2+
"configuration": {
3+
"BM.GPU.A10.4": {
4+
"env": {
5+
"VLLM": {
6+
"VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1"
7+
}
8+
},
9+
"multi_model_deployment": [
10+
{
11+
"gpu_count": 1
3112
},
32-
"BM.GPU.L40S-NC.4": {
33-
"parameters": {
34-
"VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
35-
},
36-
"multi_model_deployment": [
37-
{
38-
"gpu_count": 2
39-
}
40-
]
13+
{
14+
"gpu_count": 2
15+
}
16+
],
17+
"parameters": {
18+
"VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
19+
}
20+
},
21+
"BM.GPU.L40S-NC.4": {
22+
"env": {
23+
"VLLM": {
24+
"VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1"
25+
}
26+
},
27+
"multi_model_deployment": [
28+
{
29+
"gpu_count": 2
30+
}
31+
],
32+
"parameters": {
33+
"VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
34+
}
35+
},
36+
"VM.GPU.A10.2": {
37+
"env": {
38+
"VLLM": {
39+
"VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1"
40+
}
41+
},
42+
"multi_model_deployment": [
43+
{
44+
"gpu_count": 1
4145
}
46+
],
47+
"parameters": {
48+
"VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
49+
}
4250
}
51+
},
52+
"shape": [
53+
"VM.GPU.A10.1",
54+
"VM.GPU.A10.2",
55+
"BM.GPU.A10.4",
56+
"BM.GPU.L40S-NC.4"
57+
]
4358
}

0 commit comments

Comments
 (0)