oracle
diff --git a/‎tests/unitary/with_extras/aqua/test_data/recommend/result-Devstral-Small-2507-GQA.json‎
Lines changed: 144 additions & 0 deletions b/‎tests/unitary/with_extras/aqua/test_data/recommend/result-Devstral-Small-2507-GQA.json‎
Lines changed: 144 additions & 0 deletions
diff --git a/‎tests/unitary/with_extras/aqua/test_data/recommend/result-Kimi-K2-Instruct-MOE.json‎
Lines changed: 86 additions & 40 deletions b/‎tests/unitary/with_extras/aqua/test_data/recommend/result-Kimi-K2-Instruct-MOE.json‎
Lines changed: 86 additions & 40 deletions
@@ -21,6 +21,8 @@
         "available": false,
         "core_count": null,
         "gpu_specs": {
+          "cpu_count": 64,
+          "cpu_memory_in_gbs": 1024,
           "gpu_count": 4,
           "gpu_memory_in_gbs": 96,
           "gpu_type": "A10",
@@ -45,6 +47,95 @@
         "shape_series": "GPU"
       }
     },
+    {
+      "configurations": [
+        {
+          "deployment_params": {
+            "max_model_len": 131072,
+            "params": "",
+            "quantization": "bfloat16"
+          },
+          "model_details": {
+            "kv_cache_size_gb": 21.47,
+            "model_size_gb": 47.98,
+            "total_model_gb": 69.46
+          },
+          "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (69.5GB used / 1440.0GB allowed)."
+        }
+      ],
+      "shape_details": {
+        "available": false,
+        "core_count": null,
+        "gpu_specs": {
+          "cpu_count": 128,
+          "cpu_memory_in_gbs": 4096,
+          "gpu_count": 8,
+          "gpu_memory_in_gbs": 1440,
+          "gpu_type": "B200",
+          "quantization": [
+            "fp4",
+            "fp8",
+            "fp16",
+            "bf16",
+            "tf32",
+            "int8",
+            "fp64"
+          ],
+          "ranking": {
+            "cost": 120,
+            "performance": 130
+          }
+        },
+        "memory_in_gbs": null,
+        "name": "BM.GPU.B200.8",
+        "shape_series": "GPU"
+      }
+    },
+    {
+      "configurations": [
+        {
+          "deployment_params": {
+            "max_model_len": 131072,
+            "params": "",
+            "quantization": "bfloat16"
+          },
+          "model_details": {
+            "kv_cache_size_gb": 21.47,
+            "model_size_gb": 47.98,
+            "total_model_gb": 69.46
+          },
+          "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (69.5GB used / 768.0GB allowed)."
+        }
+      ],
+      "shape_details": {
+        "available": false,
+        "core_count": null,
+        "gpu_specs": {
+          "cpu_count": 144,
+          "cpu_memory_in_gbs": 1024,
+          "gpu_count": 4,
+          "gpu_memory_in_gbs": 768,
+          "gpu_type": "GB200",
+          "quantization": [
+            "fp4",
+            "fp8",
+            "fp6",
+            "int8",
+            "fp16",
+            "bf16",
+            "tf32",
+            "fp64"
+          ],
+          "ranking": {
+            "cost": 110,
+            "performance": 120
+          }
+        },
+        "memory_in_gbs": null,
+        "name": "BM.GPU.GB200.4",
+        "shape_series": "GPU"
+      }
+    },
     {
       "configurations": [
         {
@@ -65,6 +156,8 @@
         "available": false,
         "core_count": null,
         "gpu_specs": {
+          "cpu_count": 112,
+          "cpu_memory_in_gbs": 3072,
           "gpu_count": 8,
           "gpu_memory_in_gbs": 1128,
           "gpu_type": "H200",
@@ -110,6 +203,8 @@
         "available": false,
         "core_count": null,
         "gpu_specs": {
+          "cpu_count": 112,
+          "cpu_memory_in_gbs": 1024,
           "gpu_count": 4,
           "gpu_memory_in_gbs": 192,
           "gpu_type": "L40S",
@@ -155,6 +250,8 @@
         "available": false,
         "core_count": null,
         "gpu_specs": {
+          "cpu_count": 112,
+          "cpu_memory_in_gbs": 1024,
           "gpu_count": 4,
           "gpu_memory_in_gbs": 192,
           "gpu_type": "L40S",
@@ -200,6 +297,8 @@
         "available": false,
         "core_count": null,
         "gpu_specs": {
+          "cpu_count": 112,
+          "cpu_memory_in_gbs": 2048,
           "gpu_count": 8,
           "gpu_memory_in_gbs": 1536,
           "gpu_type": "MI300X",
@@ -217,6 +316,47 @@
         "shape_series": "GPU"
       }
     },
+    {
+      "configurations": [
+        {
+          "deployment_params": {
+            "max_model_len": 131072,
+            "params": "",
+            "quantization": "bfloat16"
+          },
+          "model_details": {
+            "kv_cache_size_gb": 21.47,
+            "model_size_gb": 47.98,
+            "total_model_gb": 69.46
+          },
+          "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (69.5GB used / 320.0GB allowed)."
+        }
+      ],
+      "shape_details": {
+        "available": false,
+        "core_count": null,
+        "gpu_specs": {
+          "cpu_count": 64,
+          "cpu_memory_in_gbs": 2048,
+          "gpu_count": 8,
+          "gpu_memory_in_gbs": 320,
+          "gpu_type": "A100",
+          "quantization": [
+            "int8",
+            "fp16",
+            "bf16",
+            "tf32"
+          ],
+          "ranking": {
+            "cost": 57,
+            "performance": 65
+          }
+        },
+        "memory_in_gbs": null,
+        "name": "BM.GPU4.8",
+        "shape_series": "GPU"
+      }
+    },
     {
       "configurations": [
         {
@@ -237,6 +377,8 @@
         "available": false,
         "core_count": null,
         "gpu_specs": {
+          "cpu_count": 15,
+          "cpu_memory_in_gbs": 240,
           "gpu_count": 1,
           "gpu_memory_in_gbs": 24,
           "gpu_type": "A10",
@@ -281,6 +423,8 @@
         "available": false,
         "core_count": null,
         "gpu_specs": {
+          "cpu_count": 30,
+          "cpu_memory_in_gbs": 480,
           "gpu_count": 2,
           "gpu_memory_in_gbs": 48,
           "gpu_type": "A10",
 
@@ -1,43 +1,89 @@
 {
-    "display_name": "Kimi-K2-Instruct-MOE",
-    "recommendations": [
+  "display_name": "Kimi-K2-Instruct-MOE",
+  "recommendations": [
+    {
+      "configurations": [
         {
-            "shape_details": {
-                "available": false,
-                "core_count": null,
-                "memory_in_gbs": null,
-                "name": "BM.GPU.MI300X.8",
-                "shape_series": "GPU",
-                "gpu_specs": {
-                    "gpu_memory_in_gbs": 1536,
-                    "gpu_count": 8,
-                    "gpu_type": "MI300X",
-                    "quantization": [
-                        "fp8",
-                        "gguf"
-                    ],
-                    "ranking": {
-                        "cost": 90,
-                        "performance": 90
-                    }
-                }
-            },
-            "configurations": [
-                {
-                    "model_details": {
-                        "model_size_gb": 1046.48,
-                        "kv_cache_size_gb": 3.58,
-                        "total_model_gb": 1050.06
-                    },
-                    "deployment_params": {
-                        "quantization": "fp8",
-                        "max_model_len": 2048,
-                        "params": "--max-model-len 2048"
-                    },
-                    "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (1050.1GB used / 1536.0GB allowed)."
-                }
-            ]
+          "deployment_params": {
+            "max_model_len": 2048,
+            "params": "--max-model-len 2048",
+            "quantization": "fp8"
+          },
+          "model_details": {
+            "kv_cache_size_gb": 3.58,
+            "model_size_gb": 1046.48,
+            "total_model_gb": 1050.06
+          },
+          "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (1050.1GB used / 1440.0GB allowed)."
         }
-    ],
-    "troubleshoot": ""
-}
+      ],
+      "shape_details": {
+        "available": false,
+        "core_count": null,
+        "gpu_specs": {
+          "cpu_count": 128,
+          "cpu_memory_in_gbs": 4096,
+          "gpu_count": 8,
+          "gpu_memory_in_gbs": 1440,
+          "gpu_type": "B200",
+          "quantization": [
+            "fp4",
+            "fp8",
+            "fp16",
+            "bf16",
+            "tf32",
+            "int8",
+            "fp64"
+          ],
+          "ranking": {
+            "cost": 120,
+            "performance": 130
+          }
+        },
+        "memory_in_gbs": null,
+        "name": "BM.GPU.B200.8",
+        "shape_series": "GPU"
+      }
+    },
+    {
+      "configurations": [
+        {
+          "deployment_params": {
+            "max_model_len": 2048,
+            "params": "--max-model-len 2048",
+            "quantization": "fp8"
+          },
+          "model_details": {
+            "kv_cache_size_gb": 3.58,
+            "model_size_gb": 1046.48,
+            "total_model_gb": 1050.06
+          },
+          "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (1050.1GB used / 1536.0GB allowed)."
+        }
+      ],
+      "shape_details": {
+        "available": false,
+        "core_count": null,
+        "gpu_specs": {
+          "cpu_count": 112,
+          "cpu_memory_in_gbs": 2048,
+          "gpu_count": 8,
+          "gpu_memory_in_gbs": 1536,
+          "gpu_type": "MI300X",
+          "quantization": [
+            "fp8",
+            "gguf"
+          ],
+          "ranking": {
+            "cost": 90,
+            "performance": 90
+          }
+        },
+        "memory_in_gbs": null,
+        "name": "BM.GPU.MI300X.8",
+        "shape_series": "GPU"
+      }
+    }
+  ],
+  "troubleshoot": ""
+}