Skip to content

Commit cc8d529

Browse files
committed
Fixed issues
1 parent 6c80b3c commit cc8d529

File tree

2 files changed

+22
-18
lines changed

2 files changed

+22
-18
lines changed

llama.cpp

+20-16
Original file line numberDiff line numberDiff line change
@@ -4076,24 +4076,26 @@ static void llm_load_vocab(
40764076
if (add_space_prefix_keyidx != -1) {
40774077
vocab.add_space_prefix = gguf_get_val_bool(ctx, add_space_prefix_keyidx);
40784078
} // The default value of add_space_prefix is true.
4079+
} else if (tokenizer_name == "bert") {
4080+
vocab.type = LLAMA_VOCAB_TYPE_WPM;
4081+
4082+
// default special tokens
4083+
vocab.special_bos_id = -1;
4084+
vocab.special_eos_id = -1;
4085+
vocab.special_unk_id = 100;
4086+
vocab.special_sep_id = 102;
4087+
vocab.special_pad_id = 0;
4088+
vocab.special_cls_id = 101;
4089+
vocab.special_mask_id = 103;
4090+
vocab.add_space_prefix = false;
40794091
} else {
40804092
if (tokenizer_name == "gpt2") {
40814093
vocab.type = LLAMA_VOCAB_TYPE_BPE;
40824094
} else if (tokenizer_name == "deepseek_coder") {
40834095
vocab.type = LLAMA_VOCAB_TYPE_DEEPSEEKCODER;
40844096
} else if (tokenizer_name == "deepseek_llm") {
40854097
vocab.type = LLAMA_VOCAB_TYPE_DEEPSEEKLLM;
4086-
} else if (tokenizer_name == "bert") {
4087-
vocab.type = LLAMA_VOCAB_TYPE_WPM;
4088-
4089-
// default special tokens
4090-
vocab.special_bos_id = 101;
4091-
vocab.special_eos_id = 102;
4092-
vocab.special_unk_id = 100;
4093-
vocab.special_sep_id = -1;
4094-
vocab.special_pad_id = -1;
4095-
vocab.add_space_prefix = false;
4096-
} else {
4098+
} else {
40974099
LLAMA_LOG_WARN("%s: unknown tokenizer: '%s'", __func__, tokenizer_name.c_str());
40984100
LLAMA_LOG_WARN("%s: using default tokenizer: 'llama'", __func__);
40994101
vocab.type = LLAMA_VOCAB_TYPE_SPM;
@@ -4125,11 +4127,13 @@ static void llm_load_vocab(
41254127
}
41264128

41274129
// default special tokens
4128-
vocab.special_bos_id = 11;
4129-
vocab.special_eos_id = 11;
4130-
vocab.special_unk_id = -1;
4131-
vocab.special_sep_id = -1;
4132-
vocab.special_pad_id = -1;
4130+
vocab.special_bos_id = 11;
4131+
vocab.special_eos_id = 11;
4132+
vocab.special_unk_id = -1;
4133+
vocab.special_sep_id = -1;
4134+
vocab.special_pad_id = -1;
4135+
vocab.special_cls_id = -1;
4136+
vocab.special_mask_id = -1;
41334137
}
41344138
}
41354139

0 commit comments

Comments
 (0)