Skip to content

Commit 9223346

Browse files
committed
Fixes unit tests
1 parent b5b5434 commit 9223346

File tree

4 files changed

+415
-126
lines changed

4 files changed

+415
-126
lines changed

tests/unitary/with_extras/aqua/test_data/recommend/result-Devstral-Small-2507-GQA.json

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
"available": false,
2222
"core_count": null,
2323
"gpu_specs": {
24+
"cpu_count": 64,
25+
"cpu_memory_in_gbs": 1024,
2426
"gpu_count": 4,
2527
"gpu_memory_in_gbs": 96,
2628
"gpu_type": "A10",
@@ -45,6 +47,95 @@
4547
"shape_series": "GPU"
4648
}
4749
},
50+
{
51+
"configurations": [
52+
{
53+
"deployment_params": {
54+
"max_model_len": 131072,
55+
"params": "",
56+
"quantization": "bfloat16"
57+
},
58+
"model_details": {
59+
"kv_cache_size_gb": 21.47,
60+
"model_size_gb": 47.98,
61+
"total_model_gb": 69.46
62+
},
63+
"recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (69.5GB used / 1440.0GB allowed)."
64+
}
65+
],
66+
"shape_details": {
67+
"available": false,
68+
"core_count": null,
69+
"gpu_specs": {
70+
"cpu_count": 128,
71+
"cpu_memory_in_gbs": 4096,
72+
"gpu_count": 8,
73+
"gpu_memory_in_gbs": 1440,
74+
"gpu_type": "B200",
75+
"quantization": [
76+
"fp4",
77+
"fp8",
78+
"fp16",
79+
"bf16",
80+
"tf32",
81+
"int8",
82+
"fp64"
83+
],
84+
"ranking": {
85+
"cost": 120,
86+
"performance": 130
87+
}
88+
},
89+
"memory_in_gbs": null,
90+
"name": "BM.GPU.B200.8",
91+
"shape_series": "GPU"
92+
}
93+
},
94+
{
95+
"configurations": [
96+
{
97+
"deployment_params": {
98+
"max_model_len": 131072,
99+
"params": "",
100+
"quantization": "bfloat16"
101+
},
102+
"model_details": {
103+
"kv_cache_size_gb": 21.47,
104+
"model_size_gb": 47.98,
105+
"total_model_gb": 69.46
106+
},
107+
"recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (69.5GB used / 768.0GB allowed)."
108+
}
109+
],
110+
"shape_details": {
111+
"available": false,
112+
"core_count": null,
113+
"gpu_specs": {
114+
"cpu_count": 144,
115+
"cpu_memory_in_gbs": 1024,
116+
"gpu_count": 4,
117+
"gpu_memory_in_gbs": 768,
118+
"gpu_type": "GB200",
119+
"quantization": [
120+
"fp4",
121+
"fp8",
122+
"fp6",
123+
"int8",
124+
"fp16",
125+
"bf16",
126+
"tf32",
127+
"fp64"
128+
],
129+
"ranking": {
130+
"cost": 110,
131+
"performance": 120
132+
}
133+
},
134+
"memory_in_gbs": null,
135+
"name": "BM.GPU.GB200.4",
136+
"shape_series": "GPU"
137+
}
138+
},
48139
{
49140
"configurations": [
50141
{
@@ -65,6 +156,8 @@
65156
"available": false,
66157
"core_count": null,
67158
"gpu_specs": {
159+
"cpu_count": 112,
160+
"cpu_memory_in_gbs": 3072,
68161
"gpu_count": 8,
69162
"gpu_memory_in_gbs": 1128,
70163
"gpu_type": "H200",
@@ -110,6 +203,8 @@
110203
"available": false,
111204
"core_count": null,
112205
"gpu_specs": {
206+
"cpu_count": 112,
207+
"cpu_memory_in_gbs": 1024,
113208
"gpu_count": 4,
114209
"gpu_memory_in_gbs": 192,
115210
"gpu_type": "L40S",
@@ -155,6 +250,8 @@
155250
"available": false,
156251
"core_count": null,
157252
"gpu_specs": {
253+
"cpu_count": 112,
254+
"cpu_memory_in_gbs": 1024,
158255
"gpu_count": 4,
159256
"gpu_memory_in_gbs": 192,
160257
"gpu_type": "L40S",
@@ -200,6 +297,8 @@
200297
"available": false,
201298
"core_count": null,
202299
"gpu_specs": {
300+
"cpu_count": 112,
301+
"cpu_memory_in_gbs": 2048,
203302
"gpu_count": 8,
204303
"gpu_memory_in_gbs": 1536,
205304
"gpu_type": "MI300X",
@@ -217,6 +316,47 @@
217316
"shape_series": "GPU"
218317
}
219318
},
319+
{
320+
"configurations": [
321+
{
322+
"deployment_params": {
323+
"max_model_len": 131072,
324+
"params": "",
325+
"quantization": "bfloat16"
326+
},
327+
"model_details": {
328+
"kv_cache_size_gb": 21.47,
329+
"model_size_gb": 47.98,
330+
"total_model_gb": 69.46
331+
},
332+
"recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (69.5GB used / 320.0GB allowed)."
333+
}
334+
],
335+
"shape_details": {
336+
"available": false,
337+
"core_count": null,
338+
"gpu_specs": {
339+
"cpu_count": 64,
340+
"cpu_memory_in_gbs": 2048,
341+
"gpu_count": 8,
342+
"gpu_memory_in_gbs": 320,
343+
"gpu_type": "A100",
344+
"quantization": [
345+
"int8",
346+
"fp16",
347+
"bf16",
348+
"tf32"
349+
],
350+
"ranking": {
351+
"cost": 57,
352+
"performance": 65
353+
}
354+
},
355+
"memory_in_gbs": null,
356+
"name": "BM.GPU4.8",
357+
"shape_series": "GPU"
358+
}
359+
},
220360
{
221361
"configurations": [
222362
{
@@ -237,6 +377,8 @@
237377
"available": false,
238378
"core_count": null,
239379
"gpu_specs": {
380+
"cpu_count": 15,
381+
"cpu_memory_in_gbs": 240,
240382
"gpu_count": 1,
241383
"gpu_memory_in_gbs": 24,
242384
"gpu_type": "A10",
@@ -281,6 +423,8 @@
281423
"available": false,
282424
"core_count": null,
283425
"gpu_specs": {
426+
"cpu_count": 30,
427+
"cpu_memory_in_gbs": 480,
284428
"gpu_count": 2,
285429
"gpu_memory_in_gbs": 48,
286430
"gpu_type": "A10",
Lines changed: 86 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,89 @@
11
{
2-
"display_name": "Kimi-K2-Instruct-MOE",
3-
"recommendations": [
2+
"display_name": "Kimi-K2-Instruct-MOE",
3+
"recommendations": [
4+
{
5+
"configurations": [
46
{
5-
"shape_details": {
6-
"available": false,
7-
"core_count": null,
8-
"memory_in_gbs": null,
9-
"name": "BM.GPU.MI300X.8",
10-
"shape_series": "GPU",
11-
"gpu_specs": {
12-
"gpu_memory_in_gbs": 1536,
13-
"gpu_count": 8,
14-
"gpu_type": "MI300X",
15-
"quantization": [
16-
"fp8",
17-
"gguf"
18-
],
19-
"ranking": {
20-
"cost": 90,
21-
"performance": 90
22-
}
23-
}
24-
},
25-
"configurations": [
26-
{
27-
"model_details": {
28-
"model_size_gb": 1046.48,
29-
"kv_cache_size_gb": 3.58,
30-
"total_model_gb": 1050.06
31-
},
32-
"deployment_params": {
33-
"quantization": "fp8",
34-
"max_model_len": 2048,
35-
"params": "--max-model-len 2048"
36-
},
37-
"recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (1050.1GB used / 1536.0GB allowed)."
38-
}
39-
]
7+
"deployment_params": {
8+
"max_model_len": 2048,
9+
"params": "--max-model-len 2048",
10+
"quantization": "fp8"
11+
},
12+
"model_details": {
13+
"kv_cache_size_gb": 3.58,
14+
"model_size_gb": 1046.48,
15+
"total_model_gb": 1050.06
16+
},
17+
"recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (1050.1GB used / 1440.0GB allowed)."
4018
}
41-
],
42-
"troubleshoot": ""
43-
}
19+
],
20+
"shape_details": {
21+
"available": false,
22+
"core_count": null,
23+
"gpu_specs": {
24+
"cpu_count": 128,
25+
"cpu_memory_in_gbs": 4096,
26+
"gpu_count": 8,
27+
"gpu_memory_in_gbs": 1440,
28+
"gpu_type": "B200",
29+
"quantization": [
30+
"fp4",
31+
"fp8",
32+
"fp16",
33+
"bf16",
34+
"tf32",
35+
"int8",
36+
"fp64"
37+
],
38+
"ranking": {
39+
"cost": 120,
40+
"performance": 130
41+
}
42+
},
43+
"memory_in_gbs": null,
44+
"name": "BM.GPU.B200.8",
45+
"shape_series": "GPU"
46+
}
47+
},
48+
{
49+
"configurations": [
50+
{
51+
"deployment_params": {
52+
"max_model_len": 2048,
53+
"params": "--max-model-len 2048",
54+
"quantization": "fp8"
55+
},
56+
"model_details": {
57+
"kv_cache_size_gb": 3.58,
58+
"model_size_gb": 1046.48,
59+
"total_model_gb": 1050.06
60+
},
61+
"recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (1050.1GB used / 1536.0GB allowed)."
62+
}
63+
],
64+
"shape_details": {
65+
"available": false,
66+
"core_count": null,
67+
"gpu_specs": {
68+
"cpu_count": 112,
69+
"cpu_memory_in_gbs": 2048,
70+
"gpu_count": 8,
71+
"gpu_memory_in_gbs": 1536,
72+
"gpu_type": "MI300X",
73+
"quantization": [
74+
"fp8",
75+
"gguf"
76+
],
77+
"ranking": {
78+
"cost": 90,
79+
"performance": 90
80+
}
81+
},
82+
"memory_in_gbs": null,
83+
"name": "BM.GPU.MI300X.8",
84+
"shape_series": "GPU"
85+
}
86+
}
87+
],
88+
"troubleshoot": ""
89+
}

0 commit comments

Comments
 (0)