@@ -28,20 +28,16 @@ def load_lora_model(self, model: nn.Module, model_config: ModelConfig,
28
28
scheduler_config : SchedulerConfig ,
29
29
lora_config : LoRAConfig , device : str ) -> nn .Module :
30
30
31
- assert supports_lora (
32
- model ), f"{ model .__class__ .__name__ } does not support LoRA yet."
31
+ if not supports_lora (model ):
32
+ raise ValueError (
33
+ f"{ model .__class__ .__name__ } does not support LoRA yet." )
33
34
34
35
if supports_multimodal (model ):
35
36
logger .warning ("Regarding multimodal models, vLLM currently "
36
37
"only supports adding LoRA to language model." )
37
38
38
- # It's necessary to distinguish between the max_position_embeddings
39
- # of VLMs and LLMs.
40
- if hasattr (model .config , "max_position_embeddings" ):
41
- max_pos_embeddings = model .config .max_position_embeddings
42
- else :
43
- max_pos_embeddings = (
44
- model .config .text_config .max_position_embeddings )
39
+ # Use get_text_config() in case of multimodal models
40
+ text_config = model_config .hf_config .get_text_config ()
45
41
46
42
# Add LoRA Manager to the Model Runner
47
43
self .lora_manager = LRUCacheWorkerLoRAManager (
@@ -52,7 +48,7 @@ def load_lora_model(self, model: nn.Module, model_config: ModelConfig,
52
48
device ,
53
49
model .embedding_modules ,
54
50
model .embedding_padding_modules ,
55
- max_position_embeddings = max_pos_embeddings ,
51
+ max_position_embeddings = text_config . max_position_embeddings ,
56
52
)
57
53
return self .lora_manager .create_lora_manager (model )
58
54
0 commit comments