Skip to content

Commit 52801c0

Browse files
authored
Merge pull request #1 from jrudolph/convert-llama2-vocab
Support loading llama2.c vocabulary as well
2 parents aab15de + aa26201 commit 52801c0

File tree

1 file changed

+61
-26
lines changed

1 file changed

+61
-26
lines changed

examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp

Lines changed: 61 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,11 @@ struct llama_file {
438438
read_raw(&ret, sizeof(ret));
439439
return ret;
440440
}
441+
std::float_t read_f32() {
442+
std::float_t ret;
443+
read_raw(&ret, sizeof(ret));
444+
return ret;
445+
}
441446

442447
std::string read_string(std::uint32_t len) {
443448
std::vector<char> chars(len);
@@ -491,6 +496,59 @@ void write_tensor(struct llama_file * file, struct ggml_tensor * tensor) {
491496
file->write_raw(tensor->data, ggml_nbytes(tensor));
492497
}
493498

499+
bool is_ggml_file(const char *filename) {
500+
llama_file file(filename, "rb");
501+
if (file.size < 4) {
502+
return false;
503+
}
504+
uint32_t magic = file.read_u32();
505+
return magic == LLAMA_FILE_MAGIC;
506+
}
507+
508+
void load_vocab(const char *filename, Config *config, struct llama_vocab *vocab) {
509+
// heuristic to infer whether vocab is from ggml or from llama2.c vocabulary
510+
if (is_ggml_file(filename)) {
511+
512+
struct llama_context_params llama_params = llama_context_default_params();
513+
llama_params.vocab_only = true;
514+
515+
struct llama_model * lmodel = llama_load_model_from_file(filename, llama_params);
516+
struct llama_context * lctx = llama_new_context_with_model(lmodel, llama_params);
517+
518+
std::vector<const char *> strings;
519+
std::vector<float> scores;
520+
int n_vocab = llama_n_vocab(lctx);
521+
strings.resize(n_vocab, NULL);
522+
scores.resize(n_vocab, 0);
523+
n_vocab = llama_get_vocab(lctx, strings.data(), scores.data(), n_vocab);
524+
GGML_ASSERT(n_vocab == llama_n_vocab(lctx));
525+
vocab->id_to_token.resize(n_vocab);
526+
for (int i=0; i<n_vocab; ++i) {
527+
std::string tok = std::string(strings[i]);
528+
float score = scores[i];
529+
vocab->id_to_token[i].tok = tok;
530+
vocab->id_to_token[i].score = score;
531+
vocab->token_to_id.emplace(tok, i);
532+
}
533+
llama_free(lctx);
534+
llama_free_model(lmodel);
535+
} else { // assume llama2.c vocabulary
536+
printf("Assuming llama2.c vocabulary since %s is not a ggml file\n", filename);
537+
llama_file file(filename, "rb");
538+
uint32_t n_vocab = config->vocab_size;
539+
/* uint32_t max_token_length = */ file.read_u32(); // unused
540+
vocab->id_to_token.resize(n_vocab);
541+
for (uint32_t i=0; i<n_vocab; ++i) {
542+
float_t score = file.read_f32();
543+
uint32_t len = file.read_u32();
544+
std::string tok = file.read_string(len);
545+
vocab->id_to_token[i].tok = tok;
546+
vocab->id_to_token[i].score = score;
547+
vocab->token_to_id.emplace(tok, i);
548+
}
549+
}
550+
}
551+
494552
void stuff_karpathy_weights_into_gg(struct ggml_tensor * gg_weights, float * karpathy_weights){
495553
int ct;
496554
switch (gg_weights->n_dims){
@@ -658,7 +716,7 @@ void print_usage(int /*argc*/, char ** argv, const struct train_params * params)
658716
fprintf(stderr, "\n");
659717
fprintf(stderr, "options:\n");
660718
fprintf(stderr, " -h, --help show this help message and exit\n");
661-
fprintf(stderr, " --copy-vocab-from-model FNAME model path from which to copy vocab (default '%s')\n", params->fn_vocab_model);
719+
fprintf(stderr, " --copy-vocab-from-model FNAME llama2.c vocabulary or ggml model path from which to copy vocab (default '%s')\n", params->fn_vocab_model);
662720
fprintf(stderr, " --llama2c-model FNAME [REQUIRED] model path from which to load Karpathy's llama2.c model\n");
663721
fprintf(stderr, " --llama2c-output-model FNAME model path to save the converted llama2.c model (default %s')\n", params->fn_llama2c_output_model);
664722
fprintf(stderr, "\n");
@@ -737,30 +795,9 @@ int main(int argc, char ** argv) {
737795
fclose(file);
738796
}
739797

740-
struct llama_context_params llama_params = llama_context_default_params();
741-
llama_params.vocab_only = true;
742-
743-
struct llama_model * lmodel = llama_load_model_from_file(params.fn_vocab_model, llama_params);
744-
struct llama_context * lctx = llama_new_context_with_model(lmodel, llama_params);
745-
746798
struct llama_vocab vocab;
747-
{
748-
std::vector<const char *> strings;
749-
std::vector<float> scores;
750-
int n_vocab = llama_n_vocab(lctx);
751-
strings.resize(n_vocab, NULL);
752-
scores.resize(n_vocab, 0);
753-
n_vocab = llama_get_vocab(lctx, strings.data(), scores.data(), n_vocab);
754-
GGML_ASSERT(n_vocab == llama_n_vocab(lctx));
755-
vocab.id_to_token.resize(n_vocab);
756-
for (int i=0; i<n_vocab; ++i) {
757-
std::string tok = std::string(strings[i]);
758-
float score = scores[i];
759-
vocab.id_to_token[i].tok = tok;
760-
vocab.id_to_token[i].score = score;
761-
vocab.token_to_id.emplace(tok, i);
762-
}
763-
}
799+
load_vocab(params.fn_vocab_model, &config, &vocab);
800+
764801
struct my_llama_model model;
765802
model.hparams.n_vocab = config.vocab_size; //llama_n_vocab(lctx);
766803
model.hparams.n_ctx = params.n_ctx;
@@ -782,8 +819,6 @@ int main(int argc, char ** argv) {
782819

783820
printf("Saving llama.c model file %s in ggml format at %s\n", params.fn_llama2c_model, params.fn_llama2c_output_model);
784821

785-
llama_free(lctx);
786-
llama_free_model(lmodel);
787822
ggml_free(model.ctx);
788823
free_weights(&weights);
789824
return 0;

0 commit comments

Comments
 (0)