Merge branch 'main' into ODSC-74228/GPU-Shape-Recommendation

elizjo · web-flow · commit 389d050286f1 · 2025-08-11T15:20:12.000-07:00
diff --git a/ads/aqua/common/enums.py b/ads/aqua/common/enums.py
@@ -58,6 +58,7 @@ class InferenceContainerTypeFamily(ExtendedEnum):
     AQUA_VLLM_LLAMA4_CONTAINER_FAMILY = "odsc-vllm-serving-llama4"
     AQUA_TGI_CONTAINER_FAMILY = "odsc-tgi-serving"
     AQUA_LLAMA_CPP_CONTAINER_FAMILY = "odsc-llama-cpp-serving"
+    AQUA_VLLM_OPENAI_CONTAINER_FAMILY = "odsc-vllm-serving-openai"
 
 
 class CustomInferenceContainerTypeFamily(ExtendedEnum):
diff --git a/ads/aqua/common/utils.py b/ads/aqua/common/utils.py
@@ -997,6 +997,44 @@ def get_container_params_type(container_type_name: str) -> str:
         return UNKNOWN
 
 
+def get_container_env_type(container_type_name: Optional[str]) -> str:
+    """
+    Determine the container environment type based on the container type name.
+
+    This function matches the provided container type name against the known
+    values of `InferenceContainerType`. The check is case-insensitive and
+    allows for partial matches so that changes in container naming conventions
+    (e.g., prefixes or suffixes) will still be matched correctly.
+
+    Examples:
+        >>> get_container_env_type("odsc-vllm-serving")
+        'vllm'
+        >>> get_container_env_type("ODSC-TGI-Serving")
+        'tgi'
+        >>> get_container_env_type("custom-unknown-container")
+        'UNKNOWN'
+
+    Args:
+        container_type_name (Optional[str]):
+            The deployment container type name (e.g., "odsc-vllm-serving").
+
+    Returns:
+        str:
+            - A matching `InferenceContainerType` value string (e.g., "VLLM", "TGI", "LLAMA-CPP").
+            - `"UNKNOWN"` if no match is found or the input is empty/None.
+    """
+    if not container_type_name:
+        return UNKNOWN
+
+    needle = container_type_name.strip().casefold()
+
+    for container_type in InferenceContainerType.values():
+        if container_type and container_type.casefold() in needle:
+            return container_type.upper()
+
+    return UNKNOWN
+
+
 def get_restricted_params_by_container(container_type_name: str) -> set:
     """The utility function accepts the deployment container type name and returns a set of restricted params
         for that container.
diff --git a/ads/aqua/modeldeployment/config_loader.py b/ads/aqua/modeldeployment/config_loader.py
@@ -88,6 +88,7 @@ class MultiModelConfig(Serializable):
         gpu_count (int, optional): Number of GPUs count to this model of this shape.
         parameters (Dict[str, str], optional): A dictionary of parameters (e.g., VLLM_PARAMS) to
             configure the behavior of a particular GPU shape.
+        env (Dict[str, Dict[str, str]]): Environment variables grouped by namespace (e.g., "VLLM": {"VAR": "VAL"}).
     """
 
     gpu_count: Optional[int] = Field(
@@ -97,6 +98,10 @@ class MultiModelConfig(Serializable):
         default_factory=dict,
         description="Key-value pairs for GPU shape parameters (e.g., VLLM_PARAMS).",
     )
+    env: Optional[Dict[str, Dict[str, str]]] = Field(
+        default_factory=dict,
+        description="Environment variables grouped by namespace",
+    )
 
     class Config:
         extra = "allow"
@@ -130,6 +135,7 @@ class ConfigurationItem(Serializable):
             configure the behavior of a particular GPU shape.
         multi_model_deployment (List[MultiModelConfig], optional): A list of multi model configuration details.
         shape_info (DeploymentShapeInfo, optional): The shape information to this model for specific CPU shape.
+        env (Dict[str, Dict[str, str]]): Environment variables grouped by namespace (e.g., "VLLM": {"VAR": "VAL"}).
     """
 
     parameters: Optional[Dict[str, str]] = Field(
@@ -143,6 +149,10 @@ class ConfigurationItem(Serializable):
         default_factory=DeploymentShapeInfo,
         description="The shape information to this model for specific shape",
     )
+    env: Optional[Dict[str, Dict[str, str]]] = Field(
+        default_factory=dict,
+        description="Environment variables grouped by namespace",
+    )
 
     class Config:
         extra = "allow"
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
@@ -28,6 +28,7 @@
     build_pydantic_error_message,
     find_restricted_params,
     get_combined_params,
+    get_container_env_type,
     get_container_params_type,
     get_ocid_substring,
     get_params_list,
@@ -390,6 +391,7 @@ def _create(
             Tags.AQUA_SERVICE_MODEL_TAG,
             Tags.AQUA_FINE_TUNED_MODEL_TAG,
             Tags.AQUA_TAG,
+            Tags.BASE_MODEL_CUSTOM,
         ]:
             if tag in aqua_model.freeform_tags:
                 tags[tag] = aqua_model.freeform_tags[tag]
@@ -1051,6 +1053,7 @@ def get_deployment_config(self, model_id: str) -> AquaDeploymentConfig:
         config = self.get_config_from_metadata(
             model_id, AquaModelMetadataKeys.DEPLOYMENT_CONFIGURATION
         ).config
+
         if config:
             logger.info(
                 f"Fetched {AquaModelMetadataKeys.DEPLOYMENT_CONFIGURATION} from defined metadata for model: {model_id}."
@@ -1135,7 +1138,7 @@ def get_deployment_default_params(
         model_id: str,
         instance_shape: str,
         gpu_count: int = None,
-    ) -> List[str]:
+    ) -> Dict:
         """Gets the default params set in the deployment configs for the given model and instance shape.
 
         Parameters
@@ -1157,6 +1160,7 @@ def get_deployment_default_params(
 
         """
         default_params = []
+        default_envs = {}
         config_params = {}
         model = DataScienceModel.from_id(model_id)
         try:
@@ -1166,19 +1170,15 @@ def get_deployment_default_params(
         except ValueError:
             container_type_key = UNKNOWN
             logger.debug(
-                f"{AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME} key is not available in the custom metadata field for model {model_id}."
+                f"{AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME} key is not available in the "
+                f"custom metadata field for model {model_id}."
             )
 
-        if (
-            container_type_key
-            and container_type_key in InferenceContainerTypeFamily.values()
-        ):
+        if container_type_key:
             deployment_config = self.get_deployment_config(model_id)
-
             instance_shape_config = deployment_config.configuration.get(
                 instance_shape, ConfigurationItem()
             )
-
             if instance_shape_config.multi_model_deployment and gpu_count:
                 gpu_params = instance_shape_config.multi_model_deployment
 
@@ -1187,12 +1187,18 @@ def get_deployment_default_params(
                         config_params = gpu_config.parameters.get(
                             get_container_params_type(container_type_key), UNKNOWN
                         )
+                        default_envs = instance_shape_config.env.get(
+                            get_container_env_type(container_type_key), {}
+                        )
                         break
 
             else:
                 config_params = instance_shape_config.parameters.get(
                     get_container_params_type(container_type_key), UNKNOWN
                 )
+                default_envs = instance_shape_config.env.get(
+                    get_container_env_type(container_type_key), {}
+                )
 
             if config_params:
                 params_list = get_params_list(config_params)
@@ -1205,7 +1211,7 @@ def get_deployment_default_params(
                     if params.split()[0] not in restricted_params_set:
                         default_params.append(params)
 
-        return default_params
+        return {"data": default_params, "env": default_envs}
 
     def validate_deployment_params(
         self,
diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
@@ -233,6 +233,7 @@ class CreateModelDeploymentDetails(BaseModel):
         None, description="The description of the deployment."
     )
     model_id: Optional[str] = Field(None, description="The model OCID to deploy.")
+
     models: Optional[List[AquaMultiModelRef]] = Field(
         None, description="List of models for multimodel deployment."
     )
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -2,6 +2,12 @@
 Release Notes
 =============
 
+2.13.17
+-------
+Release date: Aug 10, 2025
+
+* AI Quick Actions enhancements.
+
 2.13.16
 -------
 Release date: Jul 16, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,7 +21,7 @@ build-backend = "flit_core.buildapi"
 
 # Required
 name = "oracle_ads" # the install (PyPI) name; name for local build in [tool.flit.module] section below
-version = "2.13.16"
+version = "2.13.17"
 
 # Optional
 description = "Oracle Accelerated Data Science SDK"
diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model_deployment_config.json b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model_deployment_config.json
@@ -1,20 +1,24 @@
 {
   "configuration": {
     "BM.GPU.A100-v2.8": {
+      "env": {},
       "multi_model_deployment": [
         {
+          "env": {},
           "gpu_count": 1,
           "parameters": {
             "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
           }
         },
         {
+          "env": {},
           "gpu_count": 2,
           "parameters": {
             "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
           }
         },
         {
+          "env": {},
           "gpu_count": 8,
           "parameters": {
             "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
@@ -26,6 +30,7 @@
       }
     },
     "BM.GPU.H100.8": {
+      "env": {},
       "multi_model_deployment": [
         {
           "gpu_count": 1
@@ -44,6 +49,7 @@
     "VM.GPU.A10.2": {
       "multi_model_deployment": [
         {
+          "env": {},
           "gpu_count": 2,
           "parameters": {
             "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
@@ -52,8 +58,10 @@
       ]
     },
     "VM.GPU.A10.4": {
+      "env": {},
       "multi_model_deployment": [
         {
+          "env": {},
           "gpu_count": 2,
           "parameters": {
             "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_config.json b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_config.json
@@ -1,6 +1,11 @@
 {
   "configuration": {
     "VM.GPU.A10.4": {
+      "env": {
+        "VLLM": {
+          "VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1"
+        }
+      },
       "parameters": {
         "TGI_PARAMS": "--max-stop-sequences 6",
         "VLLM_PARAMS": "--max-model-len 4096"
diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json
@@ -1,43 +1,58 @@
 {
-    "shape": [
-        "VM.GPU.A10.1",
-        "VM.GPU.A10.2",
-        "BM.GPU.A10.4",
-        "BM.GPU.L40S-NC.4"
-    ],
-    "configuration": {
-        "VM.GPU.A10.2": {
-            "parameters": {
-                "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
-            },
-            "multi_model_deployment": [
-                {
-                    "gpu_count": 1
-                }
-            ]
-        },
-        "BM.GPU.A10.4": {
-            "parameters": {
-                "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
-            },
-            "multi_model_deployment": [
-                {
-                    "gpu_count": 1
-                },
-                {
-                    "gpu_count": 2
-                }
-            ]
+  "configuration": {
+    "BM.GPU.A10.4": {
+      "env": {
+        "VLLM": {
+          "VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1"
+        }
+      },
+      "multi_model_deployment": [
+        {
+          "gpu_count": 1
         },
-        "BM.GPU.L40S-NC.4": {
-            "parameters": {
-                "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
-            },
-            "multi_model_deployment": [
-                {
-                    "gpu_count": 2
-                }
-            ]
+        {
+          "gpu_count": 2
+        }
+      ],
+      "parameters": {
+        "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+      }
+    },
+    "BM.GPU.L40S-NC.4": {
+      "env": {
+        "VLLM": {
+          "VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1"
+        }
+      },
+      "multi_model_deployment": [
+        {
+          "gpu_count": 2
+        }
+      ],
+      "parameters": {
+        "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+      }
+    },
+    "VM.GPU.A10.2": {
+      "env": {
+        "VLLM": {
+          "VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1"
+        }
+      },
+      "multi_model_deployment": [
+        {
+          "gpu_count": 1
         }
+      ],
+      "parameters": {
+        "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+      }
     }
+  },
+  "shape": [
+    "VM.GPU.A10.1",
+    "VM.GPU.A10.2",
+    "BM.GPU.A10.4",
+    "BM.GPU.L40S-NC.4"
+  ]
 }
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py

Original file line number	Diff line number	Diff line change
`@@ -233,6 +233,7 @@ class CreateModelDeploymentDetails(BaseModel):`
`233`	`233`	`None, description="The description of the deployment."`
`234`	`234`	`)`
`235`	`235`	`model_id: Optional[str] = Field(None, description="The model OCID to deploy.")`
	`236`	`+`
`236`	`237`	`models: Optional[List[AquaMultiModelRef]] = Field(`
`237`	`238`	`None, description="List of models for multimodel deployment."`
`238`	`239`	`)`
Original file line number	Diff line number	Diff line change
`@@ -1,20 +1,24 @@`
`1`	`1`	`{`
`2`	`2`	`"configuration": {`
`3`	`3`	`"BM.GPU.A100-v2.8": {`
	`4`	`+ "env": {},`
`4`	`5`	`"multi_model_deployment": [`
`5`	`6`	`{`
	`7`	`+ "env": {},`
`6`	`8`	`"gpu_count": 1,`
`7`	`9`	`"parameters": {`
`8`	`10`	`"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"`
`9`	`11`	`}`
`10`	`12`	`},`
`11`	`13`	`{`
	`14`	`+ "env": {},`
`12`	`15`	`"gpu_count": 2,`
`13`	`16`	`"parameters": {`
`14`	`17`	`"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"`
`15`	`18`	`}`
`16`	`19`	`},`
`17`	`20`	`{`
	`21`	`+ "env": {},`
`18`	`22`	`"gpu_count": 8,`
`19`	`23`	`"parameters": {`
`20`	`24`	`"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"`
`@@ -26,6 +30,7 @@`
`26`	`30`	`}`
`27`	`31`	`},`
`28`	`32`	`"BM.GPU.H100.8": {`
	`33`	`+ "env": {},`
`29`	`34`	`"multi_model_deployment": [`
`30`	`35`	`{`
`31`	`36`	`"gpu_count": 1`
`@@ -44,6 +49,7 @@`
`44`	`49`	`"VM.GPU.A10.2": {`
`45`	`50`	`"multi_model_deployment": [`
`46`	`51`	`{`
	`52`	`+ "env": {},`
`47`	`53`	`"gpu_count": 2,`
`48`	`54`	`"parameters": {`
`49`	`55`	`"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"`
`@@ -52,8 +58,10 @@`
`52`	`58`	`]`
`53`	`59`	`},`
`54`	`60`	`"VM.GPU.A10.4": {`
	`61`	`+ "env": {},`
`55`	`62`	`"multi_model_deployment": [`
`56`	`63`	`{`
	`64`	`+ "env": {},`
`57`	`65`	`"gpu_count": 2,`
`58`	`66`	`"parameters": {`
`59`	`67`	`"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"`