[Model] Ignore rotary embed load for Cohere model (#17319)

ekagra-ranjan · web-flow · commit 97cc8729f0bc · 2025-04-29T00:30:40.000-07:00
diff --git a/vllm/model_executor/models/commandr.py b/vllm/model_executor/models/commandr.py
@@ -418,6 +418,10 @@ def load_weights(self, weights: Iterable[Tuple[str,
         loaded_params: Set[str] = set()
         for name, loaded_weight in weights:
 
+            # Skip loading rotary embeddings since vLLM has its own
+            if "rotary_emb.inv_freq" in name:
+                continue
+
             if (self.quant_config is not None and
                 (scale_name := self.quant_config.get_cache_scale(name))):
                 # Loading kv cache quantization scales