We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 3c17c62 commit 8d29c8cCopy full SHA for 8d29c8c
vllm/_custom_ops.py
@@ -1458,8 +1458,9 @@ def cutlass_mla_decode(q_nope_and_q_pe: torch.Tensor,
1458
assert B_pt == B_q, f"Batch dims must be same for page_table and q_nope_and_q_pe, but got {B_pt} and {B_q}"
1459
1460
# Current cutlass MLA implementation will pack smaller pages into a 128 page.
1461
- assert PAGE_NUM % (128 / PAGE_SIZE) == 0, f"PAGE_NUM must be divisible by 128 / PAGE_SIZE, but got {PAGE_NUM} and {128 / PAGE_SIZE}"
1462
-
+ assert PAGE_NUM % (
+ 128 / PAGE_SIZE
1463
+ ) == 0, f"PAGE_NUM must be divisible by 128 / PAGE_SIZE, but got {PAGE_NUM} and {128 / PAGE_SIZE}"
1464
1465
# TODO(kaixih@nvidia): support fp8
1466
assert q_nope_and_q_pe.dtype in (torch.float16, torch.bfloat16), (
0 commit comments