Skip to content

Commit 2282a3d

Browse files
ikawrakowKawrakow
authored andcommitted
Make Q3_K_S be the same as olf Q3_K_L for Mixtral-8x7B (ggml-org#4906)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
1 parent 4926913 commit 2282a3d

File tree

1 file changed

+10
-3
lines changed

1 file changed

+10
-3
lines changed

llama.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8489,9 +8489,16 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
84898489
++qs.i_feed_forward_w2;
84908490
} else if (name.find("attn_output.weight") != std::string::npos) {
84918491
if (arch != LLM_ARCH_FALCON) {
8492-
if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K ) new_type = GGML_TYPE_Q3_K;
8493-
else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M) new_type = GGML_TYPE_Q4_K;
8494-
else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) new_type = GGML_TYPE_Q5_K;
8492+
if (qs.model.hparams.n_expert == 8) {
8493+
if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M ||
8494+
ftype == LLAMA_FTYPE_MOSTLY_Q4_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M) {
8495+
new_type = GGML_TYPE_Q5_K;
8496+
}
8497+
} else {
8498+
if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K ) new_type = GGML_TYPE_Q3_K;
8499+
else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M) new_type = GGML_TYPE_Q4_K;
8500+
else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) new_type = GGML_TYPE_Q5_K;
8501+
}
84958502
} else {
84968503
if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) new_type = GGML_TYPE_Q4_K;
84978504
}

0 commit comments

Comments
 (0)