Skip to content

Commit 7509b9e

Browse files
committed
Fix GPU Mamba's output diverging from CPU version in major way
1 parent b77cb69 commit 7509b9e

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

llama.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -8790,7 +8790,7 @@ static struct ggml_tensor * llm_build_mamba(
87908790

87918791
// {d_inner, n_seq_tokens, n_seqs} * {d_inner} => {d_inner, n_seq_tokens, n_seqs}
87928792
y = ggml_add(ctx, y, ggml_mul(ctx, x, model.layers[il].ssm_d));
8793-
y = ggml_mul(ctx, y, ggml_silu(ctx, z));
8793+
y = ggml_mul(ctx, y, ggml_silu(ctx, ggml_cont(ctx, z)));
87948794

87958795
// {d_inner, n_embd} @ {d_inner, n_seq_tokens, n_seqs} => {n_embd, n_seq_tokens, n_seqs}
87968796
cur = ggml_mul_mat(ctx, model.layers[il].ssm_out, y);

0 commit comments

Comments
 (0)