|
1 | 1 | { |
2 | | - "BM.GPU.B200.8": { |
3 | | - "cpu_count": 128, |
4 | | - "cpu_memory_in_gbs": 4096, |
5 | | - "gpu_count": 8, |
6 | | - "gpu_memory_in_gbs": 1440, |
7 | | - "gpu_type": "B200", |
8 | | - "quantization": [ |
9 | | - "fp4", |
10 | | - "fp8", |
11 | | - "fp16", |
12 | | - "bf16", |
13 | | - "tf32", |
14 | | - "int8", |
15 | | - "fp64" |
16 | | - ], |
17 | | - "ranking": { |
18 | | - "cost": 120, |
19 | | - "performance": 130 |
20 | | - } |
21 | | - }, |
22 | | - "BM.GPU.GB200.4": { |
23 | | - "cpu_count": 144, |
24 | | - "cpu_memory_in_gbs": 1024, |
25 | | - "gpu_count": 4, |
26 | | - "gpu_memory_in_gbs": 768, |
27 | | - "gpu_type": "GB200", |
28 | | - "quantization": [ |
29 | | - "fp4", |
30 | | - "fp8", |
31 | | - "fp6", |
32 | | - "int8", |
33 | | - "fp16", |
34 | | - "bf16", |
35 | | - "tf32", |
36 | | - "fp64" |
37 | | - ], |
38 | | - "ranking": { |
39 | | - "cost": 110, |
40 | | - "performance": 120 |
41 | | - } |
42 | | - }, |
43 | | - "BM.GPU4.8": { |
44 | | - "cpu_count": 64, |
45 | | - "cpu_memory_in_gbs": 2048, |
46 | | - "gpu_count": 8, |
47 | | - "gpu_memory_in_gbs": 320, |
48 | | - "gpu_type": "A100", |
49 | | - "quantization": [ |
50 | | - "int8", |
51 | | - "fp16", |
52 | | - "bf16", |
53 | | - "tf32" |
54 | | - ], |
55 | | - "ranking": { |
56 | | - "cost": 57, |
57 | | - "performance": 65 |
58 | | - } |
59 | | - }, |
60 | | - "VM.GPU3.8": { |
61 | | - "cpu_count": 24, |
62 | | - "cpu_memory_in_gbs": 768, |
63 | | - "gpu_count": 8, |
64 | | - "gpu_memory_in_gbs": 128, |
65 | | - "gpu_type": "V100", |
66 | | - "quantization": [ |
67 | | - "gptq", |
68 | | - "bitblas", |
69 | | - "aqlm", |
70 | | - "bitsandbytes", |
71 | | - "deepspeedfp", |
72 | | - "gguf" |
73 | | - ], |
74 | | - "ranking": { |
75 | | - "cost": 56, |
76 | | - "performance": 46 |
77 | | - } |
78 | | - }, |
79 | 2 | "shapes": { |
80 | 3 | "BM.GPU.A10.4": { |
81 | 4 | "cpu_count": 64, |
|
121 | 44 | "performance": 70 |
122 | 45 | } |
123 | 46 | }, |
| 47 | + "BM.GPU.B200.8": { |
| 48 | + "cpu_count": 128, |
| 49 | + "cpu_memory_in_gbs": 4096, |
| 50 | + "gpu_count": 8, |
| 51 | + "gpu_memory_in_gbs": 1440, |
| 52 | + "gpu_type": "B200", |
| 53 | + "quantization": [ |
| 54 | + "fp4", |
| 55 | + "fp8", |
| 56 | + "fp16", |
| 57 | + "bf16", |
| 58 | + "tf32", |
| 59 | + "int8", |
| 60 | + "fp64" |
| 61 | + ], |
| 62 | + "ranking": { |
| 63 | + "cost": 120, |
| 64 | + "performance": 130 |
| 65 | + } |
| 66 | + }, |
124 | 67 | "BM.GPU.B4.8": { |
125 | 68 | "cpu_count": 64, |
126 | 69 | "cpu_memory_in_gbs": 2048, |
|
143 | 86 | "performance": 60 |
144 | 87 | } |
145 | 88 | }, |
| 89 | + "BM.GPU.GB200.4": { |
| 90 | + "cpu_count": 144, |
| 91 | + "cpu_memory_in_gbs": 1024, |
| 92 | + "gpu_count": 4, |
| 93 | + "gpu_memory_in_gbs": 768, |
| 94 | + "gpu_type": "GB200", |
| 95 | + "quantization": [ |
| 96 | + "fp4", |
| 97 | + "fp8", |
| 98 | + "fp6", |
| 99 | + "int8", |
| 100 | + "fp16", |
| 101 | + "bf16", |
| 102 | + "tf32", |
| 103 | + "fp64" |
| 104 | + ], |
| 105 | + "ranking": { |
| 106 | + "cost": 110, |
| 107 | + "performance": 120 |
| 108 | + } |
| 109 | + }, |
146 | 110 | "BM.GPU.H100.8": { |
147 | 111 | "cpu_count": 112, |
148 | 112 | "cpu_memory_in_gbs": 2048, |
|
264 | 228 | "performance": 20 |
265 | 229 | } |
266 | 230 | }, |
| 231 | + "BM.GPU4.8": { |
| 232 | + "cpu_count": 64, |
| 233 | + "cpu_memory_in_gbs": 2048, |
| 234 | + "gpu_count": 8, |
| 235 | + "gpu_memory_in_gbs": 320, |
| 236 | + "gpu_type": "A100", |
| 237 | + "quantization": [ |
| 238 | + "int8", |
| 239 | + "fp16", |
| 240 | + "bf16", |
| 241 | + "tf32" |
| 242 | + ], |
| 243 | + "ranking": { |
| 244 | + "cost": 57, |
| 245 | + "performance": 65 |
| 246 | + } |
| 247 | + }, |
267 | 248 | "VM.GPU.A10.1": { |
268 | 249 | "cpu_count": 15, |
269 | 250 | "cpu_memory_in_gbs": 240, |
|
378 | 359 | "cost": 55, |
379 | 360 | "performance": 45 |
380 | 361 | } |
| 362 | + }, |
| 363 | + "VM.GPU3.8": { |
| 364 | + "cpu_count": 24, |
| 365 | + "cpu_memory_in_gbs": 768, |
| 366 | + "gpu_count": 8, |
| 367 | + "gpu_memory_in_gbs": 128, |
| 368 | + "gpu_type": "V100", |
| 369 | + "quantization": [ |
| 370 | + "gptq", |
| 371 | + "bitblas", |
| 372 | + "aqlm", |
| 373 | + "bitsandbytes", |
| 374 | + "deepspeedfp", |
| 375 | + "gguf" |
| 376 | + ], |
| 377 | + "ranking": { |
| 378 | + "cost": 56, |
| 379 | + "performance": 46 |
| 380 | + } |
381 | 381 | } |
382 | 382 | } |
383 | 383 | } |
0 commit comments