fixed failed test case

elizjo · elizjo · commit 0eff21e06c79 · 2025-08-11T15:18:00.000-07:00
diff --git a/ads/aqua/shaperecommend/estimator.py b/ads/aqua/shaperecommend/estimator.py
@@ -116,25 +116,22 @@ def construct_deployment_params(self) -> str:
         - Suggests in-flight quantization **only if the model is unquantized**
             and in-flight quantization (such as '4bit') is requested in config.
 
-        Parameters
-        ----------
-        shape_quantization : set[str]
-            Allowed quantization methods for the compute shape
-
         Returns
         -------
             str: Parameter string for model deployment.
         """
         c = self.llm_config
-        params = ""
+        params = []
         if self.seq_len < c.max_seq_len:
-            params += f"{VLLM_PARAMS['max_model_len']} {str(self.seq_len)}"
+            params.append(VLLM_PARAMS["max_model_len"])
+            params.append(str(self.seq_len))
 
         # Only suggest in-flight quantization for unquantized models when such quantization is requested
         if not c.quantization and c.in_flight_quantization in IN_FLIGHT_QUANTIZATION:
             # vLLM only supports 4bit in-flight quantization
-            params += " " + VLLM_PARAMS["in_flight_quant"]
+            params.append(VLLM_PARAMS["in_flight_quant"])
 
+        params = " ".join(params) if params else ""
         return params
 
     def suggest_param_advice(self, allowed: float) -> str:
diff --git a/tests/unitary/with_extras/aqua/test_data/recommend/result-Devstral-Small-2507-GQA.json b/tests/unitary/with_extras/aqua/test_data/recommend/result-Devstral-Small-2507-GQA.json
@@ -14,21 +14,20 @@
             "model_size_gb": 47.98,
             "total_model_gb": 69.46
           },
-          "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (69.5GB used / 1128.0GB allowed)."
+          "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (69.5GB used / 96.0GB allowed)."
         }
       ],
       "shape_details": {
         "available": false,
         "core_count": null,
         "gpu_specs": {
-          "gpu_count": 8,
-          "gpu_memory_in_gbs": 1128,
-          "gpu_type": "H200",
+          "gpu_count": 4,
+          "gpu_memory_in_gbs": 96,
+          "gpu_type": "A10",
           "quantization": [
             "awq",
             "gptq",
             "marlin",
-            "fp8",
             "int8",
             "bitblas",
             "aqlm",
@@ -37,12 +36,12 @@
             "gguf"
           ],
           "ranking": {
-            "cost": 100,
-            "performance": 110
+            "cost": 50,
+            "performance": 50
           }
         },
         "memory_in_gbs": null,
-        "name": "BM.GPU.H200.8",
+        "name": "BM.GPU.A10.4",
         "shape_series": "GPU"
       }
     },
@@ -59,27 +58,35 @@
             "model_size_gb": 47.98,
             "total_model_gb": 69.46
           },
-          "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (69.5GB used / 1536.0GB allowed)."
+          "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (69.5GB used / 1128.0GB allowed)."
         }
       ],
       "shape_details": {
         "available": false,
         "core_count": null,
         "gpu_specs": {
           "gpu_count": 8,
-          "gpu_memory_in_gbs": 1536,
-          "gpu_type": "MI300X",
+          "gpu_memory_in_gbs": 1128,
+          "gpu_type": "H200",
           "quantization": [
+            "awq",
+            "gptq",
+            "marlin",
             "fp8",
+            "int8",
+            "bitblas",
+            "aqlm",
+            "bitsandbytes",
+            "deepspeedfp",
             "gguf"
           ],
           "ranking": {
-            "cost": 90,
-            "performance": 90
+            "cost": 100,
+            "performance": 110
           }
         },
         "memory_in_gbs": null,
-        "name": "BM.GPU.MI300X.8",
+        "name": "BM.GPU.H200.8",
         "shape_series": "GPU"
       }
     },
@@ -177,68 +184,61 @@
       "configurations": [
         {
           "deployment_params": {
-            "max_model_len": 32768,
-            "params": "--max-model-len 32768 --quantization bitsandbytes --load-format bitsandbytes",
-            "quantization": "4bit"
+            "max_model_len": 131072,
+            "params": "",
+            "quantization": "bfloat16"
           },
           "model_details": {
-            "kv_cache_size_gb": 5.37,
-            "model_size_gb": 12.0,
-            "total_model_gb": 17.36
+            "kv_cache_size_gb": 21.47,
+            "model_size_gb": 47.98,
+            "total_model_gb": 69.46
           },
-          "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (17.4GB used / 24.0GB allowed)."
+          "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (69.5GB used / 1536.0GB allowed)."
         }
       ],
       "shape_details": {
         "available": false,
         "core_count": null,
         "gpu_specs": {
-          "gpu_count": 1,
-          "gpu_memory_in_gbs": 24,
-          "gpu_type": "A10",
+          "gpu_count": 8,
+          "gpu_memory_in_gbs": 1536,
+          "gpu_type": "MI300X",
           "quantization": [
-            "awq",
-            "gptq",
-            "marlin",
-            "int8",
-            "bitblas",
-            "aqlm",
-            "bitsandbytes",
-            "deepspeedfp",
+            "fp8",
             "gguf"
           ],
           "ranking": {
-            "cost": 20,
-            "performance": 30
+            "cost": 90,
+            "performance": 90
           }
         },
         "memory_in_gbs": null,
-        "name": "VM.GPU.A10.1",
+        "name": "BM.GPU.MI300X.8",
         "shape_series": "GPU"
       }
     },
     {
       "configurations": [
         {
           "deployment_params": {
-            "max_model_len": 131072,
-            "params": " --quantization bitsandbytes --load-format bitsandbytes",
+            "max_model_len": 32768,
+            "params": "--max-model-len 32768 --quantization bitsandbytes --load-format bitsandbytes",
             "quantization": "4bit"
           },
           "model_details": {
-            "kv_cache_size_gb": 21.47,
+            "kv_cache_size_gb": 5.37,
             "model_size_gb": 12.0,
-            "total_model_gb": 33.47
+            "total_model_gb": 17.36
           },
-          "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (33.5GB used / 48.0GB allowed)."
+          "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (17.4GB used / 24.0GB allowed)."
         }
       ],
       "shape_details": {
         "available": false,
         "core_count": null,
         "gpu_specs": {
-          "gpu_count": 2,
-          "gpu_memory_in_gbs": 48,
+          "gpu_count": 1,
+          "gpu_memory_in_gbs": 24,
           "gpu_type": "A10",
           "quantization": [
             "awq",
@@ -252,12 +252,12 @@
             "gguf"
           ],
           "ranking": {
-            "cost": 40,
-            "performance": 40
+            "cost": 20,
+            "performance": 30
           }
         },
         "memory_in_gbs": null,
-        "name": "VM.GPU.A10.2",
+        "name": "VM.GPU.A10.1",
         "shape_series": "GPU"
       }
     },
@@ -266,23 +266,23 @@
         {
           "deployment_params": {
             "max_model_len": 131072,
-            "params": "",
-            "quantization": "bfloat16"
+            "params": "--quantization bitsandbytes --load-format bitsandbytes",
+            "quantization": "4bit"
           },
           "model_details": {
             "kv_cache_size_gb": 21.47,
-            "model_size_gb": 47.98,
-            "total_model_gb": 69.46
+            "model_size_gb": 12.0,
+            "total_model_gb": 33.47
           },
-          "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (69.5GB used / 96.0GB allowed)."
+          "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (33.5GB used / 48.0GB allowed)."
         }
       ],
       "shape_details": {
         "available": false,
         "core_count": null,
         "gpu_specs": {
-          "gpu_count": 4,
-          "gpu_memory_in_gbs": 96,
+          "gpu_count": 2,
+          "gpu_memory_in_gbs": 48,
           "gpu_type": "A10",
           "quantization": [
             "awq",
@@ -296,12 +296,12 @@
             "gguf"
           ],
           "ranking": {
-            "cost": 50,
-            "performance": 50
+            "cost": 40,
+            "performance": 40
           }
         },
         "memory_in_gbs": null,
-        "name": "BM.GPU.A10.4",
+        "name": "VM.GPU.A10.2",
         "shape_series": "GPU"
       }
     }
diff --git a/tests/unitary/with_extras/aqua/test_recommend.py b/tests/unitary/with_extras/aqua/test_recommend.py
@@ -1,3 +1,9 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*--
+
+# Copyright (c) 2025 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+
 import json
 import os
 import re
@@ -363,6 +369,7 @@ def test_which_shapes_valid_from_file(
         result = app.which_shapes(request=request)
 
         expected_result = load_config(result_file)
+        print(result.model_dump_json())
         assert result.model_dump() == expected_result