ggml-org · ggerganov · Oct 23, 2023 · Oct 21, 2023 · Oct 21, 2023 · Oct 23, 2023
diff --git a/llama.cpp b/llama.cpp
@@ -9669,29 +9669,57 @@ llama_token llama_token_bos(const struct llama_context * ctx) {
     return ctx->model.vocab.special_bos_id;
 }
 
+llama_token llama_model_token_bos(const struct llama_model * model) {
+    return model->vocab.special_bos_id;
+}
+
 llama_token llama_token_eos(const struct llama_context * ctx) {
     return ctx->model.vocab.special_eos_id;
 }
 
+llama_token llama_model_token_eos(const struct llama_model * model) {
+    return model->vocab.special_eos_id;
+}
+
 llama_token llama_token_nl(const struct llama_context * ctx) {
     return ctx->model.vocab.linefeed_id;
 }
+
+llama_token llama_model_token_nl(const struct llama_model * model) {
+    return model->vocab.linefeed_id;
+}
 llama_token llama_token_prefix(const struct llama_context * ctx) {
     return ctx->model.vocab.special_prefix_id;
 }
 
+llama_token llama_model_token_prefix(const struct llama_model * model) {
+    return model->vocab.special_prefix_id;
+}
+
 llama_token llama_token_middle(const struct llama_context * ctx) {
     return ctx->model.vocab.special_middle_id;
 }
 
+llama_token llama_model_token_middle(const struct llama_model * model) {
+    return model->vocab.special_middle_id;
+}
+
 llama_token llama_token_suffix(const struct llama_context * ctx) {
     return ctx->model.vocab.special_suffix_id;
 }
 
+llama_token llama_model_token_suffix(const struct llama_model * model) {
+    return model->vocab.special_suffix_id;
+}
+
 llama_token llama_token_eot(const struct llama_context * ctx) {
     return ctx->model.vocab.special_eot_id;
 }
 
+llama_token llama_model_token_eot(const struct llama_model * model) {
+    return model->vocab.special_eot_id;
+}
+
 int llama_tokenize(
     const struct llama_model * model,
                   const char * text,

diff --git a/llama.h b/llama.h
@@ -504,12 +504,22 @@ extern "C" {
     LLAMA_API llama_token llama_token_bos(const struct llama_context * ctx);  // beginning-of-sentence
     LLAMA_API llama_token llama_token_eos(const struct llama_context * ctx);  // end-of-sentence
     LLAMA_API llama_token llama_token_nl (const struct llama_context * ctx);  // next-line
+
+    LLAMA_API llama_token llama_model_token_bos(const struct llama_model *model);
+    LLAMA_API llama_token llama_model_token_eos(const struct llama_model *model);
+    LLAMA_API llama_token llama_model_token_nl(const struct llama_model *model);
+
     // codellama infill tokens
     LLAMA_API llama_token llama_token_prefix(const struct llama_context * ctx); // Beginning of infill prefix
     LLAMA_API llama_token llama_token_middle(const struct llama_context * ctx); // Beginning of infill middle
     LLAMA_API llama_token llama_token_suffix(const struct llama_context * ctx); // Beginning of infill suffix
     LLAMA_API llama_token llama_token_eot   (const struct llama_context * ctx); // End of infill middle
 
+    LLAMA_API llama_token llama_model_token_prefix(const struct llama_model * model);
+    LLAMA_API llama_token llama_model_token_middle(const struct llama_model * model);
+    LLAMA_API llama_token llama_model_token_suffix(const struct llama_model * model);
+    LLAMA_API llama_token llama_model_token_eot   (const struct llama_model * model);
+
     //
     // Tokenization
     //