Skip to content

Commit 3c3d767

Browse files
[BugFix] Fix mla cpu - missing 3 required positional arguments (#17494)
Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com>
1 parent 13cf6b6 commit 3c3d767

File tree

2 files changed

+6
-1
lines changed

2 files changed

+6
-1
lines changed

vllm/_ipex_ops.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ def varlen_attention(
177177
out: torch.Tensor,
178178
seqlen_q: torch.Tensor,
179179
seqlen_k: torch.Tensor,
180-
alibi_slopes: torch.Tensor,
180+
alibi_slopes: Optional[torch.Tensor],
181181
max_seqlen_q: int,
182182
max_seqlen_k: int,
183183
pdropout: float,
@@ -193,6 +193,8 @@ def varlen_attention(
193193
if ipex.__version__.endswith("cpu"):
194194
if logits_soft_cap != 0.0:
195195
raise ValueError("IPEX CPU does not support logits_soft_cap")
196+
assert alibi_slopes is None
197+
assert window_size_left < 0 and window_size_right < 0
196198
ipex.llm.functional.varlen_attention(query.contiguous(),
197199
key.contiguous(),
198200
value.contiguous(), out,

vllm/attention/backends/cpu_mla.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,9 @@ def _forward_prefill(
273273
return_softmax=False,
274274
gen_=None,
275275
logits_soft_cap=0.0,
276+
window_size_left=-1,
277+
window_size_right=-1,
278+
alibi_slopes=None,
276279
)
277280

278281
# remove padding

0 commit comments

Comments
 (0)