Skip to content

Commit 0eff21e

Browse files
committed
fixed failed test case
1 parent 82ae39b commit 0eff21e

File tree

3 files changed

+67
-63
lines changed

3 files changed

+67
-63
lines changed

ads/aqua/shaperecommend/estimator.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -116,25 +116,22 @@ def construct_deployment_params(self) -> str:
116116
- Suggests in-flight quantization **only if the model is unquantized**
117117
and in-flight quantization (such as '4bit') is requested in config.
118118
119-
Parameters
120-
----------
121-
shape_quantization : set[str]
122-
Allowed quantization methods for the compute shape
123-
124119
Returns
125120
-------
126121
str: Parameter string for model deployment.
127122
"""
128123
c = self.llm_config
129-
params = ""
124+
params = []
130125
if self.seq_len < c.max_seq_len:
131-
params += f"{VLLM_PARAMS['max_model_len']} {str(self.seq_len)}"
126+
params.append(VLLM_PARAMS["max_model_len"])
127+
params.append(str(self.seq_len))
132128

133129
# Only suggest in-flight quantization for unquantized models when such quantization is requested
134130
if not c.quantization and c.in_flight_quantization in IN_FLIGHT_QUANTIZATION:
135131
# vLLM only supports 4bit in-flight quantization
136-
params += " " + VLLM_PARAMS["in_flight_quant"]
132+
params.append(VLLM_PARAMS["in_flight_quant"])
137133

134+
params = " ".join(params) if params else ""
138135
return params
139136

140137
def suggest_param_advice(self, allowed: float) -> str:

tests/unitary/with_extras/aqua/test_data/recommend/result-Devstral-Small-2507-GQA.json

Lines changed: 55 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,20 @@
1414
"model_size_gb": 47.98,
1515
"total_model_gb": 69.46
1616
},
17-
"recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (69.5GB used / 1128.0GB allowed)."
17+
"recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (69.5GB used / 96.0GB allowed)."
1818
}
1919
],
2020
"shape_details": {
2121
"available": false,
2222
"core_count": null,
2323
"gpu_specs": {
24-
"gpu_count": 8,
25-
"gpu_memory_in_gbs": 1128,
26-
"gpu_type": "H200",
24+
"gpu_count": 4,
25+
"gpu_memory_in_gbs": 96,
26+
"gpu_type": "A10",
2727
"quantization": [
2828
"awq",
2929
"gptq",
3030
"marlin",
31-
"fp8",
3231
"int8",
3332
"bitblas",
3433
"aqlm",
@@ -37,12 +36,12 @@
3736
"gguf"
3837
],
3938
"ranking": {
40-
"cost": 100,
41-
"performance": 110
39+
"cost": 50,
40+
"performance": 50
4241
}
4342
},
4443
"memory_in_gbs": null,
45-
"name": "BM.GPU.H200.8",
44+
"name": "BM.GPU.A10.4",
4645
"shape_series": "GPU"
4746
}
4847
},
@@ -59,27 +58,35 @@
5958
"model_size_gb": 47.98,
6059
"total_model_gb": 69.46
6160
},
62-
"recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (69.5GB used / 1536.0GB allowed)."
61+
"recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (69.5GB used / 1128.0GB allowed)."
6362
}
6463
],
6564
"shape_details": {
6665
"available": false,
6766
"core_count": null,
6867
"gpu_specs": {
6968
"gpu_count": 8,
70-
"gpu_memory_in_gbs": 1536,
71-
"gpu_type": "MI300X",
69+
"gpu_memory_in_gbs": 1128,
70+
"gpu_type": "H200",
7271
"quantization": [
72+
"awq",
73+
"gptq",
74+
"marlin",
7375
"fp8",
76+
"int8",
77+
"bitblas",
78+
"aqlm",
79+
"bitsandbytes",
80+
"deepspeedfp",
7481
"gguf"
7582
],
7683
"ranking": {
77-
"cost": 90,
78-
"performance": 90
84+
"cost": 100,
85+
"performance": 110
7986
}
8087
},
8188
"memory_in_gbs": null,
82-
"name": "BM.GPU.MI300X.8",
89+
"name": "BM.GPU.H200.8",
8390
"shape_series": "GPU"
8491
}
8592
},
@@ -177,68 +184,61 @@
177184
"configurations": [
178185
{
179186
"deployment_params": {
180-
"max_model_len": 32768,
181-
"params": "--max-model-len 32768 --quantization bitsandbytes --load-format bitsandbytes",
182-
"quantization": "4bit"
187+
"max_model_len": 131072,
188+
"params": "",
189+
"quantization": "bfloat16"
183190
},
184191
"model_details": {
185-
"kv_cache_size_gb": 5.37,
186-
"model_size_gb": 12.0,
187-
"total_model_gb": 17.36
192+
"kv_cache_size_gb": 21.47,
193+
"model_size_gb": 47.98,
194+
"total_model_gb": 69.46
188195
},
189-
"recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (17.4GB used / 24.0GB allowed)."
196+
"recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (69.5GB used / 1536.0GB allowed)."
190197
}
191198
],
192199
"shape_details": {
193200
"available": false,
194201
"core_count": null,
195202
"gpu_specs": {
196-
"gpu_count": 1,
197-
"gpu_memory_in_gbs": 24,
198-
"gpu_type": "A10",
203+
"gpu_count": 8,
204+
"gpu_memory_in_gbs": 1536,
205+
"gpu_type": "MI300X",
199206
"quantization": [
200-
"awq",
201-
"gptq",
202-
"marlin",
203-
"int8",
204-
"bitblas",
205-
"aqlm",
206-
"bitsandbytes",
207-
"deepspeedfp",
207+
"fp8",
208208
"gguf"
209209
],
210210
"ranking": {
211-
"cost": 20,
212-
"performance": 30
211+
"cost": 90,
212+
"performance": 90
213213
}
214214
},
215215
"memory_in_gbs": null,
216-
"name": "VM.GPU.A10.1",
216+
"name": "BM.GPU.MI300X.8",
217217
"shape_series": "GPU"
218218
}
219219
},
220220
{
221221
"configurations": [
222222
{
223223
"deployment_params": {
224-
"max_model_len": 131072,
225-
"params": " --quantization bitsandbytes --load-format bitsandbytes",
224+
"max_model_len": 32768,
225+
"params": "--max-model-len 32768 --quantization bitsandbytes --load-format bitsandbytes",
226226
"quantization": "4bit"
227227
},
228228
"model_details": {
229-
"kv_cache_size_gb": 21.47,
229+
"kv_cache_size_gb": 5.37,
230230
"model_size_gb": 12.0,
231-
"total_model_gb": 33.47
231+
"total_model_gb": 17.36
232232
},
233-
"recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (33.5GB used / 48.0GB allowed)."
233+
"recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (17.4GB used / 24.0GB allowed)."
234234
}
235235
],
236236
"shape_details": {
237237
"available": false,
238238
"core_count": null,
239239
"gpu_specs": {
240-
"gpu_count": 2,
241-
"gpu_memory_in_gbs": 48,
240+
"gpu_count": 1,
241+
"gpu_memory_in_gbs": 24,
242242
"gpu_type": "A10",
243243
"quantization": [
244244
"awq",
@@ -252,12 +252,12 @@
252252
"gguf"
253253
],
254254
"ranking": {
255-
"cost": 40,
256-
"performance": 40
255+
"cost": 20,
256+
"performance": 30
257257
}
258258
},
259259
"memory_in_gbs": null,
260-
"name": "VM.GPU.A10.2",
260+
"name": "VM.GPU.A10.1",
261261
"shape_series": "GPU"
262262
}
263263
},
@@ -266,23 +266,23 @@
266266
{
267267
"deployment_params": {
268268
"max_model_len": 131072,
269-
"params": "",
270-
"quantization": "bfloat16"
269+
"params": "--quantization bitsandbytes --load-format bitsandbytes",
270+
"quantization": "4bit"
271271
},
272272
"model_details": {
273273
"kv_cache_size_gb": 21.47,
274-
"model_size_gb": 47.98,
275-
"total_model_gb": 69.46
274+
"model_size_gb": 12.0,
275+
"total_model_gb": 33.47
276276
},
277-
"recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (69.5GB used / 96.0GB allowed)."
277+
"recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (33.5GB used / 48.0GB allowed)."
278278
}
279279
],
280280
"shape_details": {
281281
"available": false,
282282
"core_count": null,
283283
"gpu_specs": {
284-
"gpu_count": 4,
285-
"gpu_memory_in_gbs": 96,
284+
"gpu_count": 2,
285+
"gpu_memory_in_gbs": 48,
286286
"gpu_type": "A10",
287287
"quantization": [
288288
"awq",
@@ -296,12 +296,12 @@
296296
"gguf"
297297
],
298298
"ranking": {
299-
"cost": 50,
300-
"performance": 50
299+
"cost": 40,
300+
"performance": 40
301301
}
302302
},
303303
"memory_in_gbs": null,
304-
"name": "BM.GPU.A10.4",
304+
"name": "VM.GPU.A10.2",
305305
"shape_series": "GPU"
306306
}
307307
}

tests/unitary/with_extras/aqua/test_recommend.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*--
3+
4+
# Copyright (c) 2025 Oracle and/or its affiliates.
5+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6+
17
import json
28
import os
39
import re
@@ -363,6 +369,7 @@ def test_which_shapes_valid_from_file(
363369
result = app.which_shapes(request=request)
364370

365371
expected_result = load_config(result_file)
372+
print(result.model_dump_json())
366373
assert result.model_dump() == expected_result
367374

368375

0 commit comments

Comments
 (0)