1+ {
2+ "shapes" : {
3+ "BM.GPU.H200.8" : {
4+ "gpu_count" : 8 ,
5+ "gpu_memory_in_gbs" : 1128 ,
6+ "gpu_type" : " H200" ,
7+ "quantization" : [" awq" , " gptq" , " marlin" , " fp8" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
8+ "ranking" : {
9+ "cost" : 100 ,
10+ "performance" : 110
11+ }
12+ },
13+ "BM.GPU.H100.8" : {
14+ "gpu_count" : 8 ,
15+ "gpu_memory_in_gbs" : 640 ,
16+ "gpu_type" : " H100" ,
17+ "quantization" : [" awq" , " gptq" , " marlin" , " fp8" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
18+ "ranking" : {
19+ "cost" : 100 ,
20+ "performance" : 100
21+ }
22+ },
23+ "BM.GPU.MI300X.8" : {
24+ "gpu_count" : 8 ,
25+ "gpu_memory_in_gbs" : 1536 ,
26+ "gpu_type" : " MI300X" ,
27+ "quantization" : [" fp8" , " gguf" ],
28+ "ranking" : {
29+ "cost" : 90 ,
30+ "performance" : 90
31+ }
32+ },
33+ "BM.GPU.A100-V2.8" : {
34+ "gpu_count" : 8 ,
35+ "gpu_memory_in_gbs" : 640 ,
36+ "gpu_type" : " A100" ,
37+ "quantization" : [" awq" , " gptq" , " marlin" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
38+ "ranking" : {
39+ "cost" : 80 ,
40+ "performance" : 70
41+ }
42+ },
43+ "BM.GPU.B4.8" : {
44+ "gpu_count" : 8 ,
45+ "gpu_memory_in_gbs" : 320 ,
46+ "gpu_type" : " A100" ,
47+ "quantization" : [" awq" , " gptq" , " marlin" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
48+ "ranking" : {
49+ "cost" : 70 ,
50+ "performance" : 60
51+ }
52+ },
53+ "BM.GPU.L40S-NC.4" : {
54+ "gpu_count" : 4 ,
55+ "gpu_memory_in_gbs" : 192 ,
56+ "gpu_type" : " L40S" ,
57+ "quantization" : [" awq" , " gptq" , " marlin" , " fp8" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
58+ "ranking" : {
59+ "cost" : 60 ,
60+ "performance" : 80
61+ }
62+ },
63+ "BM.GPU.L40S.4" : {
64+ "gpu_count" : 4 ,
65+ "gpu_memory_in_gbs" : 192 ,
66+ "gpu_type" : " L40S" ,
67+ "quantization" : [" awq" , " gptq" , " marlin" , " fp8" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
68+ "ranking" : {
69+ "cost" : 60 ,
70+ "performance" : 80
71+ }
72+ },
73+ "VM.GPU.A10.1" : {
74+ "gpu_count" : 1 ,
75+ "gpu_memory_in_gbs" : 24 ,
76+ "gpu_type" : " A10" ,
77+ "quantization" : [" awq" , " gptq" , " marlin" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
78+ "ranking" : {
79+ "cost" : 20 ,
80+ "performance" : 30
81+ }
82+ },
83+ "VM.GPU.A10.2" : {
84+ "gpu_count" : 2 ,
85+ "gpu_memory_in_gbs" : 48 ,
86+ "gpu_type" : " A10" ,
87+ "quantization" : [" awq" , " gptq" , " marlin" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
88+ "ranking" : {
89+ "cost" : 40 ,
90+ "performance" : 40
91+ }
92+ },
93+ "BM.GPU.A10.4" : {
94+ "gpu_count" : 4 ,
95+ "gpu_memory_in_gbs" : 96 ,
96+ "gpu_type" : " A10" ,
97+ "quantization" : [" awq" , " gptq" , " marlin" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
98+ "ranking" : {
99+ "cost" : 50 ,
100+ "performance" : 50
101+ }
102+ },
103+ "BM.GPU2.2" : {
104+ "gpu_count" : 2 ,
105+ "gpu_memory_in_gbs" : 32 ,
106+ "gpu_type" : " P100" ,
107+ "quantization" : [" fp16" ],
108+ "ranking" : {
109+ "cost" : 30 ,
110+ "performance" : 20
111+ }
112+ },
113+ "VM.GPU2.1" : {
114+ "gpu_count" : 1 ,
115+ "gpu_memory_in_gbs" : 16 ,
116+ "gpu_type" : " P100" ,
117+ "quantization" : [" fp16" ],
118+ "ranking" : {
119+ "cost" : 10 ,
120+ "performance" : 10
121+ }
122+ }
123+ }
124+ }
0 commit comments