11{
22 "shapes" : {
3- "BM.GPU.H200.8" : {
4- "gpu_count" : 8 ,
5- "gpu_memory_in_gbs" : 1128 ,
6- "gpu_type" : " H200" ,
7- "quantization" : [" awq" , " gptq" , " marlin" , " fp8" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
8- "ranking" : {
9- "cost" : 100 ,
10- "performance" : 110
11- }
12- },
13- "BM.GPU.H100.8" : {
14- "gpu_count" : 8 ,
15- "gpu_memory_in_gbs" : 640 ,
16- "gpu_type" : " H100" ,
17- "quantization" : [" awq" , " gptq" , " marlin" , " fp8" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
18- "ranking" : {
19- "cost" : 100 ,
20- "performance" : 100
21- }
22- },
23- "BM.GPU.MI300X.8" : {
24- "gpu_count" : 8 ,
25- "gpu_memory_in_gbs" : 1536 ,
26- "gpu_type" : " MI300X" ,
27- "quantization" : [" fp8" , " gguf" ],
3+ "BM.GPU.A10.4" : {
4+ "gpu_count" : 4 ,
5+ "gpu_memory_in_gbs" : 96 ,
6+ "gpu_type" : " A10" ,
7+ "quantization" : [
8+ " awq" ,
9+ " gptq" ,
10+ " marlin" ,
11+ " int8" ,
12+ " bitblas" ,
13+ " aqlm" ,
14+ " bitsandbytes" ,
15+ " deepspeedfp" ,
16+ " gguf"
17+ ],
2818 "ranking" : {
29- "cost" : 90 ,
30- "performance" : 90
19+ "cost" : 50 ,
20+ "performance" : 50
3121 }
3222 },
3323 "BM.GPU.A100-V2.8" : {
3424 "gpu_count" : 8 ,
3525 "gpu_memory_in_gbs" : 640 ,
3626 "gpu_type" : " A100" ,
37- "quantization" : [" awq" , " gptq" , " marlin" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
27+ "quantization" : [
28+ " awq" ,
29+ " gptq" ,
30+ " marlin" ,
31+ " int8" ,
32+ " bitblas" ,
33+ " aqlm" ,
34+ " bitsandbytes" ,
35+ " deepspeedfp" ,
36+ " gguf"
37+ ],
3838 "ranking" : {
3939 "cost" : 80 ,
4040 "performance" : 70
4444 "gpu_count" : 8 ,
4545 "gpu_memory_in_gbs" : 320 ,
4646 "gpu_type" : " A100" ,
47- "quantization" : [" awq" , " gptq" , " marlin" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
47+ "quantization" : [
48+ " awq" ,
49+ " gptq" ,
50+ " marlin" ,
51+ " int8" ,
52+ " bitblas" ,
53+ " aqlm" ,
54+ " bitsandbytes" ,
55+ " deepspeedfp" ,
56+ " gguf"
57+ ],
4858 "ranking" : {
4959 "cost" : 70 ,
5060 "performance" : 60
5161 }
5262 },
63+ "BM.GPU.H100.8" : {
64+ "gpu_count" : 8 ,
65+ "gpu_memory_in_gbs" : 640 ,
66+ "gpu_type" : " H100" ,
67+ "quantization" : [
68+ " awq" ,
69+ " gptq" ,
70+ " marlin" ,
71+ " fp8" ,
72+ " int8" ,
73+ " bitblas" ,
74+ " aqlm" ,
75+ " bitsandbytes" ,
76+ " deepspeedfp" ,
77+ " gguf"
78+ ],
79+ "ranking" : {
80+ "cost" : 100 ,
81+ "performance" : 100
82+ }
83+ },
84+ "BM.GPU.H200.8" : {
85+ "gpu_count" : 8 ,
86+ "gpu_memory_in_gbs" : 1128 ,
87+ "gpu_type" : " H200" ,
88+ "quantization" : [
89+ " awq" ,
90+ " gptq" ,
91+ " marlin" ,
92+ " fp8" ,
93+ " int8" ,
94+ " bitblas" ,
95+ " aqlm" ,
96+ " bitsandbytes" ,
97+ " deepspeedfp" ,
98+ " gguf"
99+ ],
100+ "ranking" : {
101+ "cost" : 100 ,
102+ "performance" : 110
103+ }
104+ },
53105 "BM.GPU.L40S-NC.4" : {
54106 "gpu_count" : 4 ,
55107 "gpu_memory_in_gbs" : 192 ,
56108 "gpu_type" : " L40S" ,
57- "quantization" : [" awq" , " gptq" , " marlin" , " fp8" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
109+ "quantization" : [
110+ " awq" ,
111+ " gptq" ,
112+ " marlin" ,
113+ " fp8" ,
114+ " int8" ,
115+ " bitblas" ,
116+ " aqlm" ,
117+ " bitsandbytes" ,
118+ " deepspeedfp" ,
119+ " gguf"
120+ ],
58121 "ranking" : {
59122 "cost" : 60 ,
60123 "performance" : 80
64127 "gpu_count" : 4 ,
65128 "gpu_memory_in_gbs" : 192 ,
66129 "gpu_type" : " L40S" ,
67- "quantization" : [" awq" , " gptq" , " marlin" , " fp8" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
130+ "quantization" : [
131+ " awq" ,
132+ " gptq" ,
133+ " marlin" ,
134+ " fp8" ,
135+ " int8" ,
136+ " bitblas" ,
137+ " aqlm" ,
138+ " bitsandbytes" ,
139+ " deepspeedfp" ,
140+ " gguf"
141+ ],
68142 "ranking" : {
69143 "cost" : 60 ,
70144 "performance" : 80
71145 }
72146 },
147+ "BM.GPU.MI300X.8" : {
148+ "gpu_count" : 8 ,
149+ "gpu_memory_in_gbs" : 1536 ,
150+ "gpu_type" : " MI300X" ,
151+ "quantization" : [
152+ " fp8" ,
153+ " gguf"
154+ ],
155+ "ranking" : {
156+ "cost" : 90 ,
157+ "performance" : 90
158+ }
159+ },
160+ "BM.GPU2.2" : {
161+ "gpu_count" : 2 ,
162+ "gpu_memory_in_gbs" : 32 ,
163+ "gpu_type" : " P100" ,
164+ "quantization" : [
165+ " fp16"
166+ ],
167+ "ranking" : {
168+ "cost" : 30 ,
169+ "performance" : 20
170+ }
171+ },
73172 "VM.GPU.A10.1" : {
74173 "gpu_count" : 1 ,
75174 "gpu_memory_in_gbs" : 24 ,
76175 "gpu_type" : " A10" ,
77- "quantization" : [" awq" , " gptq" , " marlin" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
78- "ranking" : {
176+ "quantization" : [
177+ " awq" ,
178+ " gptq" ,
179+ " marlin" ,
180+ " int8" ,
181+ " bitblas" ,
182+ " aqlm" ,
183+ " bitsandbytes" ,
184+ " deepspeedfp" ,
185+ " gguf"
186+ ],
187+ "ranking" : {
79188 "cost" : 20 ,
80189 "performance" : 30
81190 }
84193 "gpu_count" : 2 ,
85194 "gpu_memory_in_gbs" : 48 ,
86195 "gpu_type" : " A10" ,
87- "quantization" : [" awq" , " gptq" , " marlin" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
88- "ranking" : {
196+ "quantization" : [
197+ " awq" ,
198+ " gptq" ,
199+ " marlin" ,
200+ " int8" ,
201+ " bitblas" ,
202+ " aqlm" ,
203+ " bitsandbytes" ,
204+ " deepspeedfp" ,
205+ " gguf"
206+ ],
207+ "ranking" : {
89208 "cost" : 40 ,
90209 "performance" : 40
91210 }
92211 },
93- "BM.GPU.A10.4" : {
94- "gpu_count" : 4 ,
95- "gpu_memory_in_gbs" : 96 ,
96- "gpu_type" : " A10" ,
97- "quantization" : [" awq" , " gptq" , " marlin" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
98- "ranking" : {
99- "cost" : 50 ,
100- "performance" : 50
101- }
102- },
103- "BM.GPU2.2" : {
104- "gpu_count" : 2 ,
105- "gpu_memory_in_gbs" : 32 ,
106- "gpu_type" : " P100" ,
107- "quantization" : [" fp16" ],
108- "ranking" : {
109- "cost" : 30 ,
110- "performance" : 20
111- }
112- },
113212 "VM.GPU2.1" : {
114213 "gpu_count" : 1 ,
115214 "gpu_memory_in_gbs" : 16 ,
116215 "gpu_type" : " P100" ,
117- "quantization" : [" fp16" ],
216+ "quantization" : [
217+ " fp16"
218+ ],
118219 "ranking" : {
119220 "cost" : 10 ,
120221 "performance" : 10
124225 "gpu_count" : 1 ,
125226 "gpu_memory_in_gbs" : 16 ,
126227 "gpu_type" : " V100" ,
127- "quantization" : [" gptq" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
128- "ranking" : {
228+ "quantization" : [
229+ " gptq" ,
230+ " bitblas" ,
231+ " aqlm" ,
232+ " bitsandbytes" ,
233+ " deepspeedfp" ,
234+ " gguf"
235+ ],
236+ "ranking" : {
129237 "cost" : 35 ,
130- "performance" : 10
238+ "performance" : 10
131239 }
132240 },
133241 "VM.GPU3.2" : {
134242 "gpu_count" : 2 ,
135243 "gpu_memory_in_gbs" : 32 ,
136244 "gpu_type" : " V100" ,
137- "ranking" : {
245+ "quantization" : [
246+ " gptq" ,
247+ " bitblas" ,
248+ " aqlm" ,
249+ " bitsandbytes" ,
250+ " deepspeedfp" ,
251+ " gguf"
252+ ],
253+ "ranking" : {
138254 "cost" : 45 ,
139- "performance" : 20
255+ "performance" : 20
140256 }
141257 },
142258 "VM.GPU3.4" : {
143259 "gpu_count" : 4 ,
144260 "gpu_memory_in_gbs" : 64 ,
145261 "gpu_type" : " V100" ,
146- "ranking" : {
262+ "quantization" : [
263+ " gptq" ,
264+ " bitblas" ,
265+ " aqlm" ,
266+ " bitsandbytes" ,
267+ " deepspeedfp" ,
268+ " gguf"
269+ ],
270+ "ranking" : {
147271 "cost" : 55 ,
148- "performance" : 45
272+ "performance" : 45
149273 }
150274 }
151275 }
152- }
276+ }
0 commit comments