55from typing import TYPE_CHECKING , Any , ClassVar , Literal , Optional , Union
66
77import torch
8- from pydantic import ConfigDict
8+ from pydantic import ConfigDict , Field , model_validator
99from pydantic .dataclasses import dataclass
10+ from typing_extensions import Self
1011
1112import vllm .envs as envs
1213from vllm .config .utils import config
2324logger = init_logger (__name__ )
2425
2526LoRADType = Literal ["auto" , "float16" , "bfloat16" ]
27+ MaxLoRARanks = Literal [1 , 8 , 16 , 32 , 64 , 128 , 256 , 320 , 512 ]
28+ LoRAExtraVocabSize = Literal [256 , 512 ]
2629
2730
2831@config
2932@dataclass (config = ConfigDict (arbitrary_types_allowed = True ))
3033class LoRAConfig :
3134 """Configuration for LoRA."""
3235
33- max_lora_rank : int = 16
36+ max_lora_rank : MaxLoRARanks = 16
3437 """Max LoRA rank."""
35- max_loras : int = 1
38+ max_loras : int = Field ( default = 1 , ge = 1 )
3639 """Max number of LoRAs in a single batch."""
3740 fully_sharded_loras : bool = False
3841 """By default, only half of the LoRA computation is sharded with tensor
@@ -44,7 +47,14 @@ class LoRAConfig:
4447 `max_loras`."""
4548 lora_dtype : Union [torch .dtype , LoRADType ] = "auto"
4649 """Data type for LoRA. If auto, will default to base model dtype."""
47- lora_extra_vocab_size : int = 256
50+ lora_extra_vocab_size : LoRAExtraVocabSize = Field (
51+ default = 256 ,
52+ deprecated = (
53+ "`lora_extra_vocab_size` is deprecated and will be removed "
54+ "in v0.12.0. Additional vocabulary support for "
55+ "LoRA adapters is being phased out."
56+ ),
57+ )
4858 """(Deprecated) Maximum size of extra vocabulary that can be present in a
4959 LoRA adapter. Will be removed in v0.12.0."""
5060 lora_vocab_padding_size : ClassVar [int ] = (
@@ -60,7 +70,10 @@ class LoRAConfig:
6070 per prompt. When run in offline mode, the lora IDs for n modalities
6171 will be automatically assigned to 1-n with the names of the modalities
6272 in alphabetic order."""
63- bias_enabled : bool = False
73+ bias_enabled : bool = Field (
74+ default = False ,
75+ deprecated = "`bias_enabled` is deprecated and will be removed in v0.12.0." ,
76+ )
6477 """[DEPRECATED] Enable bias for LoRA adapters. This option will be
6578 removed in v0.12.0."""
6679
@@ -87,36 +100,8 @@ def compute_hash(self) -> str:
87100 hash_str = hashlib .md5 (str (factors ).encode (), usedforsecurity = False ).hexdigest ()
88101 return hash_str
89102
90- def __post_init__ (self ):
91- # Deprecation warning for lora_extra_vocab_size
92- logger .warning (
93- "`lora_extra_vocab_size` is deprecated and will be removed "
94- "in v0.12.0. Additional vocabulary support for "
95- "LoRA adapters is being phased out."
96- )
97-
98- # Deprecation warning for enable_lora_bias
99- if self .bias_enabled :
100- logger .warning (
101- "`enable_lora_bias` is deprecated and will be removed in v0.12.0."
102- )
103-
104- # Setting the maximum rank to 512 should be able to satisfy the vast
105- # majority of applications.
106- possible_max_ranks = (1 , 8 , 16 , 32 , 64 , 128 , 256 , 320 , 512 )
107- possible_lora_extra_vocab_size = (256 , 512 )
108- if self .max_lora_rank not in possible_max_ranks :
109- raise ValueError (
110- f"max_lora_rank ({ self .max_lora_rank } ) must be one of "
111- f"{ possible_max_ranks } ."
112- )
113- if self .lora_extra_vocab_size not in possible_lora_extra_vocab_size :
114- raise ValueError (
115- f"lora_extra_vocab_size ({ self .lora_extra_vocab_size } ) "
116- f"must be one of { possible_lora_extra_vocab_size } ."
117- )
118- if self .max_loras < 1 :
119- raise ValueError (f"max_loras ({ self .max_loras } ) must be >= 1." )
103+ @model_validator (mode = "after" )
104+ def _validate_lora_config (self ) -> Self :
120105 if self .max_cpu_loras is None :
121106 self .max_cpu_loras = self .max_loras
122107 elif self .max_cpu_loras < self .max_loras :
@@ -125,6 +110,8 @@ def __post_init__(self):
125110 f"max_loras ({ self .max_loras } )"
126111 )
127112
113+ return self
114+
128115 def verify_with_cache_config (self , cache_config : CacheConfig ):
129116 if cache_config .cpu_offload_gb > 0 and not envs .VLLM_USE_V1 :
130117 raise ValueError ("V0 LoRA does not support CPU offload, please use V1." )
0 commit comments