Skip to content

Commit 99f5067

Browse files
authored
Merge branch 'main' into ODSC-77200-auto-select-latest-conda-2
2 parents a7ae673 + a40168f commit 99f5067

35 files changed

+3152
-458
lines changed

ads/aqua/common/entities.py

Lines changed: 183 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
44

55
import re
6-
from typing import Any, Dict, List, Optional
6+
from typing import Any, Dict, List, Optional, Union
77

88
from oci.data_science.models import Model
99
from pydantic import BaseModel, ConfigDict, Field, model_validator
@@ -46,37 +46,77 @@ class Config:
4646
arbitrary_types_allowed = True
4747
protected_namespaces = ()
4848

49+
4950
class ComputeRank(Serializable):
5051
"""
51-
Represents the cost and performance ranking for a compute shape.
52+
Represents the cost and performance rankings for a specific compute shape.
53+
These rankings help compare different shapes based on their relative pricing
54+
and computational capabilities.
5255
"""
53-
cost: int = Field(
54-
None, description="The relative rank of the cost of the shape. Range is [10 (cost-effective), 100 (most-expensive)]"
56+
57+
cost: Optional[int] = Field(
58+
None,
59+
description=(
60+
"Relative cost ranking of the compute shape. "
61+
"Value ranges from 10 (most cost-effective) to 100 (most expensive). "
62+
"Lower values indicate cheaper compute options."
63+
),
5564
)
5665

57-
performance: int = Field(
58-
None, description="The relative rank of the performance of the shape. Range is [10 (lower performance), 110 (highest performance)]"
66+
performance: Optional[int] = Field(
67+
None,
68+
description=(
69+
"Relative performance ranking of the compute shape. "
70+
"Value ranges from 10 (lowest performance) to 110 (highest performance). "
71+
"Higher values indicate better compute performance."
72+
),
5973
)
6074

75+
6176
class GPUSpecs(Serializable):
6277
"""
63-
Represents the GPU specifications for a compute instance.
78+
Represents the specifications and capabilities of a GPU-enabled compute shape.
79+
Includes details about GPU and CPU resources, supported quantization formats, and
80+
relative rankings for cost and performance.
6481
"""
6582

66-
gpu_memory_in_gbs: Optional[int] = Field(
67-
default=None, description="The amount of GPU memory available (in GB)."
68-
)
6983
gpu_count: Optional[int] = Field(
70-
default=None, description="The number of GPUs available."
84+
default=None,
85+
description="Number of physical GPUs available on the compute shape.",
86+
)
87+
88+
gpu_memory_in_gbs: Optional[int] = Field(
89+
default=None, description="Total GPU memory available in gigabytes (GB)."
7190
)
91+
7292
gpu_type: Optional[str] = Field(
73-
default=None, description="The type of GPU (e.g., 'V100, A100, H100')."
93+
default=None,
94+
description="Type of GPU and architecture. Example: 'H100', 'GB200'.",
7495
)
96+
7597
quantization: Optional[List[str]] = Field(
76-
default_factory=list, description="The quantization format supported by shape. (ex. bitsandbytes, fp8, etc.)"
98+
default_factory=list,
99+
description=(
100+
"List of supported quantization formats for the GPU. "
101+
"Examples: 'fp16', 'int8', 'bitsandbytes', 'bf16', 'fp4', etc."
102+
),
77103
)
104+
105+
cpu_count: Optional[int] = Field(
106+
default=None, description="Number of CPU cores available on the shape."
107+
)
108+
109+
cpu_memory_in_gbs: Optional[int] = Field(
110+
default=None, description="Total CPU memory available in gigabytes (GB)."
111+
)
112+
78113
ranking: Optional[ComputeRank] = Field(
79-
None, description="The relative rank of the cost and performance of the shape."
114+
default=None,
115+
description=(
116+
"Relative cost and performance rankings of this shape. "
117+
"Cost is ranked from 10 (least expensive) to 100+ (most expensive), "
118+
"and performance from 10 (lowest) to 100+ (highest)."
119+
),
80120
)
81121

82122

@@ -97,50 +137,49 @@ class GPUShapesIndex(Serializable):
97137

98138
class ComputeShapeSummary(Serializable):
99139
"""
100-
Represents the specifications of a compute instance shape,
101-
including CPU, memory, and optional GPU characteristics.
140+
Represents a compute shape's specification including CPU, memory, and (if applicable) GPU configuration.
102141
"""
103142

104143
available: Optional[bool] = Field(
105-
default = False,
106-
description="True if shape is available on user tenancy, "
144+
default=False,
145+
description="True if the shape is available in the user's tenancy/region.",
107146
)
147+
108148
core_count: Optional[int] = Field(
109-
default=None,
110-
description="Total number of CPU cores available for the compute shape.",
149+
default=None, description="Number of vCPUs available for the compute shape."
111150
)
151+
112152
memory_in_gbs: Optional[int] = Field(
113-
default=None,
114-
description="Amount of memory (in GB) available for the compute shape.",
153+
default=None, description="Total CPU memory available for the shape (in GB)."
115154
)
155+
116156
name: Optional[str] = Field(
117-
default=None,
118-
description="Full name of the compute shape, e.g., 'VM.GPU.A10.2'.",
157+
default=None, description="Name of the compute shape, e.g., 'VM.GPU.A10.2'."
119158
)
159+
120160
shape_series: Optional[str] = Field(
121161
default=None,
122-
description="Shape family or series, e.g., 'GPU', 'Standard', etc.",
162+
description="Series or family of the shape, e.g., 'GPU', 'Standard'.",
123163
)
164+
124165
gpu_specs: Optional[GPUSpecs] = Field(
125-
default=None,
126-
description="Optional GPU specifications associated with the shape.",
166+
default=None, description="GPU configuration for the shape, if applicable."
127167
)
128168

129169
@model_validator(mode="after")
130170
@classmethod
131-
def set_gpu_specs(cls, model: "ComputeShapeSummary") -> "ComputeShapeSummary":
171+
def populate_gpu_specs(cls, model: "ComputeShapeSummary") -> "ComputeShapeSummary":
132172
"""
133-
Validates and populates GPU specifications if the shape_series indicates a GPU-based shape.
134-
135-
- If the shape_series contains "GPU", the validator first checks if the shape name exists
136-
in the GPU_SPECS dictionary. If found, it creates a GPUSpecs instance with the corresponding data.
137-
- If the shape is not found in the GPU_SPECS, it attempts to extract the GPU count from the shape name
138-
using a regex pattern (looking for a number following a dot at the end of the name).
173+
Attempts to populate GPU specs if the shape is GPU-based and no GPU specs are explicitly set.
139174
140-
The information about shapes is taken from: https://docs.oracle.com/en-us/iaas/data-science/using/supported-shapes.htm
175+
Logic:
176+
- If `shape_series` includes 'GPU' and `gpu_specs` is None:
177+
- Tries to parse the shape name to extract GPU count (e.g., from 'VM.GPU.A10.2').
178+
- Fallback is based on suffix numeric group (e.g., '.2' → gpu_count=2).
179+
- If extraction fails, logs debug-level error but does not raise.
141180
142181
Returns:
143-
ComputeShapeSummary: The updated instance with gpu_specs populated if applicable.
182+
ComputeShapeSummary: The updated model instance.
144183
"""
145184
try:
146185
if (
@@ -149,16 +188,15 @@ def set_gpu_specs(cls, model: "ComputeShapeSummary") -> "ComputeShapeSummary":
149188
and model.name
150189
and not model.gpu_specs
151190
):
152-
# Try to extract gpu_count from the shape name using a regex (e.g., "VM.GPU3.2" -> gpu_count=2)
153191
match = re.search(r"\.(\d+)$", model.name)
154192
if match:
155193
gpu_count = int(match.group(1))
156194
model.gpu_specs = GPUSpecs(gpu_count=gpu_count)
157195
except Exception as err:
158196
logger.debug(
159-
f"Error occurred in attempt to extract GPU specification for the f{model.name}. "
160-
f"Details: {err}"
197+
f"[populate_gpu_specs] Failed to auto-populate GPU specs for shape '{model.name}': {err}"
161198
)
199+
162200
return model
163201

164202

@@ -207,55 +245,71 @@ class AquaMultiModelRef(Serializable):
207245
"""
208246
Lightweight model descriptor used for multi-model deployment.
209247
210-
This class only contains essential details
211-
required to fetch complete model metadata and deploy models.
248+
This class holds essential details required to fetch model metadata and deploy
249+
individual models as part of a multi-model deployment group.
212250
213251
Attributes
214252
----------
215253
model_id : str
216-
The unique identifier of the model.
254+
The unique identifier (OCID) of the base model.
217255
model_name : Optional[str]
218-
The name of the model.
256+
Optional name for the model.
219257
gpu_count : Optional[int]
220-
Number of GPUs required for deployment.
258+
Number of GPUs required to allocate for this model during deployment.
221259
model_task : Optional[str]
222-
The task that model operates on. Supported tasks are in MultiModelSupportedTaskType
260+
The machine learning task this model performs (e.g., text-generation, summarization).
261+
Supported values are listed in `MultiModelSupportedTaskType`.
223262
env_var : Optional[Dict[str, Any]]
224-
Optional environment variables to override during deployment.
263+
Optional dictionary of environment variables to inject into the runtime environment
264+
of the model container.
265+
params : Optional[Dict[str, Any]]
266+
Optional dictionary of container-specific inference parameters to override.
267+
These are typically framework-level flags required by the runtime backend.
268+
For example, in vLLM containers, valid params may include:
269+
`--tensor-parallel-size`, `--enforce-eager`, `--max-model-len`, etc.
225270
artifact_location : Optional[str]
226-
Artifact path of model in the multimodel group.
271+
Relative path or URI of the model artifact inside the multi-model group folder.
227272
fine_tune_weights : Optional[List[LoraModuleSpec]]
228-
For fine tuned models, the artifact path of the modified model weights
273+
List of fine-tuned weight artifacts (e.g., LoRA modules) associated with this model.
229274
"""
230275

231276
model_id: str = Field(..., description="The model OCID to deploy.")
232-
model_name: Optional[str] = Field(None, description="The name of model.")
277+
model_name: Optional[str] = Field(None, description="The name of the model.")
233278
gpu_count: Optional[int] = Field(
234-
None, description="The gpu count allocation for the model."
279+
None, description="The number of GPUs allocated for the model."
235280
)
236281
model_task: Optional[str] = Field(
237282
None,
238-
description="The task that model operates on. Supported tasks are in MultiModelSupportedTaskType",
283+
description="The task this model performs. See `MultiModelSupportedTaskType` for supported values.",
239284
)
240285
env_var: Optional[dict] = Field(
241-
default_factory=dict, description="The environment variables of the model."
286+
default_factory=dict,
287+
description="Environment variables to override during container startup.",
288+
)
289+
params: Optional[dict] = Field(
290+
default_factory=dict,
291+
description=(
292+
"Framework-specific startup parameters required by the container runtime. "
293+
"For example, vLLM models may use flags like `--tensor-parallel-size`, `--enforce-eager`, etc."
294+
),
242295
)
243296
artifact_location: Optional[str] = Field(
244-
None, description="Artifact path of model in the multimodel group."
297+
None,
298+
description="Path to the model artifact relative to the multi-model base folder.",
245299
)
246300
fine_tune_weights: Optional[List[LoraModuleSpec]] = Field(
247301
None,
248-
description="For fine tuned models, the artifact path of the modified model weights",
302+
description="List of fine-tuned weight modules (e.g., LoRA) associated with this base model.",
249303
)
250304

251305
def all_model_ids(self) -> List[str]:
252306
"""
253-
Returns all associated model OCIDs, including the base model and any fine-tuned models.
307+
Returns all model OCIDs associated with this reference, including fine-tuned weights.
254308
255309
Returns
256310
-------
257311
List[str]
258-
A list of all model OCIDs associated with this multi-model reference.
312+
A list containing the base model OCID and any fine-tuned module OCIDs.
259313
"""
260314
ids = {self.model_id}
261315
if self.fine_tune_weights:
@@ -264,8 +318,80 @@ def all_model_ids(self) -> List[str]:
264318
)
265319
return list(ids)
266320

321+
@model_validator(mode="before")
322+
@classmethod
323+
def extract_params_from_env_var(cls, values: Dict[str, Any]) -> Dict[str, Any]:
324+
"""
325+
A model-level validator that extracts `PARAMS` from the `env_var` dictionary
326+
and injects them into the `params` field as a dictionary.
327+
328+
This is useful for backward compatibility where users pass CLI-style
329+
parameters via environment variables, e.g.:
330+
env_var = { "PARAMS": "--max-model-len 65536 --enable-streaming" }
331+
332+
If `params` is already set, values from `PARAMS` in `env_var` are added
333+
only if they do not override existing keys.
334+
"""
335+
env = values.get("env_var", {})
336+
param_string = env.pop("PARAMS", None)
337+
338+
if param_string:
339+
parsed_params = cls._parse_params(params=param_string)
340+
existing_params = values.get("params", {}) or {}
341+
# Avoid overriding existing keys
342+
for k, v in parsed_params.items():
343+
if k not in existing_params:
344+
existing_params[k] = v
345+
values["params"] = existing_params
346+
values["env_var"] = env # cleaned up version without PARAMS
347+
348+
return values
349+
350+
@staticmethod
351+
def _parse_params(params: Union[str, List[str]]) -> Dict[str, str]:
352+
"""
353+
Parses CLI-style parameters into a dictionary format.
354+
355+
This method accepts either:
356+
- A single string of parameters (e.g., "--key1 val1 --key2 val2")
357+
- A list of strings (e.g., ["--key1", "val1", "--key2", "val2"])
358+
359+
Returns a dictionary of the form { "key1": "val1", "key2": "val2" }.
360+
361+
Parameters
362+
----------
363+
params : Union[str, List[str]]
364+
The parameters to parse. Can be a single string or a list of strings.
365+
366+
Returns
367+
-------
368+
Dict[str, str]
369+
Dictionary with parameter names as keys and their corresponding values as strings.
370+
"""
371+
if not params or not isinstance(params, (str, list)):
372+
return {}
373+
374+
# Normalize string to list of "--key value" strings
375+
if isinstance(params, str):
376+
params_list = [
377+
f"--{param.strip()}" for param in params.split("--") if param.strip()
378+
]
379+
else:
380+
params_list = params
381+
382+
parsed = {}
383+
for item in params_list:
384+
parts = item.strip().split()
385+
if not parts:
386+
continue
387+
key = parts[0]
388+
value = " ".join(parts[1:]) if len(parts) > 1 else ""
389+
parsed[key] = value
390+
391+
return parsed
392+
267393
class Config:
268-
extra = "ignore"
394+
extra = "allow"
269395
protected_namespaces = ()
270396

271397

ads/aqua/common/enums.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,12 @@ class Platform(ExtendedEnum):
123123
# - Key: The preferred container family to use when multiple compatible families are selected.
124124
# - Value: A list of all compatible families (including the preferred one).
125125
CONTAINER_FAMILY_COMPATIBILITY: Dict[str, List[str]] = {
126+
InferenceContainerTypeFamily.AQUA_VLLM_OPENAI_CONTAINER_FAMILY: [
127+
InferenceContainerTypeFamily.AQUA_VLLM_OPENAI_CONTAINER_FAMILY,
128+
InferenceContainerTypeFamily.AQUA_VLLM_LLAMA4_CONTAINER_FAMILY,
129+
InferenceContainerTypeFamily.AQUA_VLLM_V1_CONTAINER_FAMILY,
130+
InferenceContainerTypeFamily.AQUA_VLLM_CONTAINER_FAMILY,
131+
],
126132
InferenceContainerTypeFamily.AQUA_VLLM_V1_CONTAINER_FAMILY: [
127133
InferenceContainerTypeFamily.AQUA_VLLM_V1_CONTAINER_FAMILY,
128134
InferenceContainerTypeFamily.AQUA_VLLM_CONTAINER_FAMILY,

0 commit comments

Comments
 (0)