2121logger = logging .getLogger ()
2222
2323
24- # yapf conflicts with isort for this docstring
25- # yapf: disable
2624"""
2725tensorize_vllm_model.py is a script that can be used to serialize and
2826deserialize vLLM models. These models can be loaded using tensorizer
@@ -132,7 +130,8 @@ def get_parser():
132130 "can be loaded using tensorizer directly to the GPU "
133131 "extremely quickly. Tensor encryption and decryption is "
134132 "also supported, although libsodium must be installed to "
135- "use it." )
133+ "use it."
134+ )
136135 parser = EngineArgs .add_cli_args (parser )
137136
138137 parser .add_argument (
@@ -144,13 +143,14 @@ def get_parser():
144143 "along with the model by instantiating a TensorizerConfig object, "
145144 "creating a dict from it with TensorizerConfig.to_serializable(), "
146145 "and passing it to LoRARequest's initializer with the kwarg "
147- "tensorizer_config_dict."
146+ "tensorizer_config_dict." ,
148147 )
149148
150- subparsers = parser .add_subparsers (dest = ' command' , required = True )
149+ subparsers = parser .add_subparsers (dest = " command" , required = True )
151150
152151 serialize_parser = subparsers .add_parser (
153- 'serialize' , help = "Serialize a model to `--serialized-directory`" )
152+ "serialize" , help = "Serialize a model to `--serialized-directory`"
153+ )
154154
155155 serialize_parser .add_argument (
156156 "--suffix" ,
@@ -163,7 +163,9 @@ def get_parser():
163163 "`--suffix` is `v1`, the serialized model tensors will be "
164164 "saved to "
165165 "`s3://my-bucket/vllm/EleutherAI/gpt-j-6B/v1/model.tensors`. "
166- "If none is provided, a random UUID will be used." ))
166+ "If none is provided, a random UUID will be used."
167+ ),
168+ )
167169 serialize_parser .add_argument (
168170 "--serialized-directory" ,
169171 type = str ,
@@ -175,108 +177,127 @@ def get_parser():
175177 "and the model HuggingFace ID is `EleutherAI/gpt-j-6B`, tensors will "
176178 "be saved to `dir/vllm/EleutherAI/gpt-j-6B/suffix/model.tensors`, "
177179 "where `suffix` is given by `--suffix` or a random UUID if not "
178- "provided." )
180+ "provided." ,
181+ )
179182
180183 serialize_parser .add_argument (
181184 "--serialization-kwargs" ,
182185 type = tensorizer_kwargs_arg ,
183186 required = False ,
184- help = ("A JSON string containing additional keyword arguments to "
185- "pass to Tensorizer's TensorSerializer during "
186- "serialization." ))
187+ help = (
188+ "A JSON string containing additional keyword arguments to "
189+ "pass to Tensorizer's TensorSerializer during "
190+ "serialization."
191+ ),
192+ )
187193
188194 serialize_parser .add_argument (
189195 "--keyfile" ,
190196 type = str ,
191197 required = False ,
192- help = ("Encrypt the model weights with a randomly-generated binary key,"
193- " and save the key at this path" ))
198+ help = (
199+ "Encrypt the model weights with a randomly-generated binary key,"
200+ " and save the key at this path"
201+ ),
202+ )
194203
195204 deserialize_parser = subparsers .add_parser (
196- 'deserialize' ,
197- help = ("Deserialize a model from `--path-to-tensors`"
198- " to verify it can be loaded and used." ))
205+ "deserialize" ,
206+ help = (
207+ "Deserialize a model from `--path-to-tensors`"
208+ " to verify it can be loaded and used."
209+ ),
210+ )
199211
200212 deserialize_parser .add_argument (
201213 "--path-to-tensors" ,
202214 type = str ,
203215 required = False ,
204- help = "The local path or S3 URI to the model tensors to deserialize. " )
216+ help = "The local path or S3 URI to the model tensors to deserialize. " ,
217+ )
205218
206219 deserialize_parser .add_argument (
207220 "--serialized-directory" ,
208221 type = str ,
209222 required = False ,
210223 help = "Directory with model artifacts for loading. Assumes a "
211- "model.tensors file exists therein. Can supersede "
212- "--path-to-tensors." )
224+ "model.tensors file exists therein. Can supersede "
225+ "--path-to-tensors." ,
226+ )
213227
214228 deserialize_parser .add_argument (
215229 "--keyfile" ,
216230 type = str ,
217231 required = False ,
218- help = ("Path to a binary key to use to decrypt the model weights,"
219- " if the model was serialized with encryption" ))
232+ help = (
233+ "Path to a binary key to use to decrypt the model weights,"
234+ " if the model was serialized with encryption"
235+ ),
236+ )
220237
221238 deserialize_parser .add_argument (
222239 "--deserialization-kwargs" ,
223240 type = tensorizer_kwargs_arg ,
224241 required = False ,
225- help = ("A JSON string containing additional keyword arguments to "
226- "pass to Tensorizer's `TensorDeserializer` during "
227- "deserialization." ))
242+ help = (
243+ "A JSON string containing additional keyword arguments to "
244+ "pass to Tensorizer's `TensorDeserializer` during "
245+ "deserialization."
246+ ),
247+ )
228248
229249 TensorizerArgs .add_cli_args (deserialize_parser )
230250
231251 return parser
232252
233- def merge_extra_config_with_tensorizer_config ( extra_cfg : dict ,
234- cfg : TensorizerConfig ):
253+
254+ def merge_extra_config_with_tensorizer_config ( extra_cfg : dict , cfg : TensorizerConfig ):
235255 for k , v in extra_cfg .items ():
236256 if hasattr (cfg , k ):
237257 setattr (cfg , k , v )
238258 logger .info (
239259 "Updating TensorizerConfig with %s from "
240- "--model-loader-extra-config provided" , k
260+ "--model-loader-extra-config provided" ,
261+ k ,
241262 )
242263
264+
243265def deserialize (args , tensorizer_config ):
244266 if args .lora_path :
245267 tensorizer_config .lora_dir = tensorizer_config .tensorizer_dir
246- llm = LLM (model = args .model ,
247- load_format = "tensorizer" ,
248- tensor_parallel_size = args .tensor_parallel_size ,
249- model_loader_extra_config = tensorizer_config ,
250- enable_lora = True ,
268+ llm = LLM (
269+ model = args .model ,
270+ load_format = "tensorizer" ,
271+ tensor_parallel_size = args .tensor_parallel_size ,
272+ model_loader_extra_config = tensorizer_config ,
273+ enable_lora = True ,
251274 )
252275 sampling_params = SamplingParams (
253- temperature = 0 ,
254- max_tokens = 256 ,
255- stop = ["[/assistant]" ]
276+ temperature = 0 , max_tokens = 256 , stop = ["[/assistant]" ]
256277 )
257278
258279 # Truncating this as the extra text isn't necessary
259- prompts = [
260- "[user] Write a SQL query to answer the question based on ..."
261- ]
280+ prompts = ["[user] Write a SQL query to answer the question based on ..." ]
262281
263282 # Test LoRA load
264283 print (
265284 llm .generate (
266- prompts ,
267- sampling_params ,
268- lora_request = LoRARequest ("sql-lora" ,
269- 1 ,
270- args .lora_path ,
271- tensorizer_config_dict = tensorizer_config
272- .to_serializable ())
285+ prompts ,
286+ sampling_params ,
287+ lora_request = LoRARequest (
288+ "sql-lora" ,
289+ 1 ,
290+ args .lora_path ,
291+ tensorizer_config_dict = tensorizer_config .to_serializable (),
292+ ),
273293 )
274294 )
275295 else :
276- llm = LLM (model = args .model ,
277- load_format = "tensorizer" ,
278- tensor_parallel_size = args .tensor_parallel_size ,
279- model_loader_extra_config = tensorizer_config
296+ llm = LLM (
297+ model = args .model ,
298+ load_format = "tensorizer" ,
299+ tensor_parallel_size = args .tensor_parallel_size ,
300+ model_loader_extra_config = tensorizer_config ,
280301 )
281302 return llm
282303
@@ -285,17 +306,20 @@ def main():
285306 parser = get_parser ()
286307 args = parser .parse_args ()
287308
288- s3_access_key_id = (getattr (args , 's3_access_key_id' , None )
289- or os .environ .get ("S3_ACCESS_KEY_ID" , None ))
290- s3_secret_access_key = (getattr (args , 's3_secret_access_key' , None )
291- or os .environ .get ("S3_SECRET_ACCESS_KEY" , None ))
292- s3_endpoint = (getattr (args , 's3_endpoint' , None )
293- or os .environ .get ("S3_ENDPOINT_URL" , None ))
309+ s3_access_key_id = getattr (args , "s3_access_key_id" , None ) or os .environ .get (
310+ "S3_ACCESS_KEY_ID" , None
311+ )
312+ s3_secret_access_key = getattr (
313+ args , "s3_secret_access_key" , None
314+ ) or os .environ .get ("S3_SECRET_ACCESS_KEY" , None )
315+ s3_endpoint = getattr (args , "s3_endpoint" , None ) or os .environ .get (
316+ "S3_ENDPOINT_URL" , None
317+ )
294318
295319 credentials = {
296320 "s3_access_key_id" : s3_access_key_id ,
297321 "s3_secret_access_key" : s3_secret_access_key ,
298- "s3_endpoint" : s3_endpoint
322+ "s3_endpoint" : s3_endpoint ,
299323 }
300324
301325 model_ref = args .model
@@ -309,25 +333,25 @@ def main():
309333 if args .model_loader_extra_config :
310334 extra_config = json .loads (args .model_loader_extra_config )
311335
312-
313- tensorizer_dir = (args .serialized_directory or
314- extra_config .get ("tensorizer_dir" ))
315- tensorizer_uri = (getattr (args , "path_to_tensors" , None )
316- or extra_config .get ("tensorizer_uri" ))
336+ tensorizer_dir = args .serialized_directory or extra_config .get ("tensorizer_dir" )
337+ tensorizer_uri = getattr (args , "path_to_tensors" , None ) or extra_config .get (
338+ "tensorizer_uri"
339+ )
317340
318341 if tensorizer_dir and tensorizer_uri :
319- parser .error ("--serialized-directory and --path-to-tensors "
320- "cannot both be provided" )
342+ parser .error (
343+ "--serialized-directory and --path-to-tensors cannot both be provided"
344+ )
321345
322346 if not tensorizer_dir and not tensorizer_uri :
323- parser .error ("Either --serialized-directory or --path-to-tensors "
324- " must be provided")
325-
347+ parser .error (
348+ "Either --serialized-directory or --path-to-tensors must be provided"
349+ )
326350
327351 if args .command == "serialize" :
328352 engine_args = EngineArgs .from_cli_args (args )
329353
330- input_dir = tensorizer_dir .rstrip ('/' )
354+ input_dir = tensorizer_dir .rstrip ("/" )
331355 suffix = args .suffix if args .suffix else uuid .uuid4 ().hex
332356 base_path = f"{ input_dir } /vllm/{ model_ref } /{ suffix } "
333357 if engine_args .tensor_parallel_size > 1 :
@@ -339,15 +363,14 @@ def main():
339363 tensorizer_uri = model_path ,
340364 encryption_keyfile = keyfile ,
341365 serialization_kwargs = args .serialization_kwargs or {},
342- ** credentials
366+ ** credentials ,
343367 )
344368
345369 if args .lora_path :
346370 tensorizer_config .lora_dir = tensorizer_config .tensorizer_dir
347371 tensorize_lora_adapter (args .lora_path , tensorizer_config )
348372
349- merge_extra_config_with_tensorizer_config (extra_config ,
350- tensorizer_config )
373+ merge_extra_config_with_tensorizer_config (extra_config , tensorizer_config )
351374 tensorize_vllm_model (engine_args , tensorizer_config )
352375
353376 elif args .command == "deserialize" :
@@ -356,11 +379,10 @@ def main():
356379 tensorizer_dir = args .serialized_directory ,
357380 encryption_keyfile = keyfile ,
358381 deserialization_kwargs = args .deserialization_kwargs or {},
359- ** credentials
382+ ** credentials ,
360383 )
361384
362- merge_extra_config_with_tensorizer_config (extra_config ,
363- tensorizer_config )
385+ merge_extra_config_with_tensorizer_config (extra_config , tensorizer_config )
364386 deserialize (args , tensorizer_config )
365387 else :
366388 raise ValueError ("Either serialize or deserialize must be specified." )
0 commit comments