Skip to content

Commit d1af0e9

Browse files
committed
lora : update API names (#11167)
ggml-ci
1 parent aeeb942 commit d1af0e9

File tree

12 files changed

+150
-153
lines changed

12 files changed

+150
-153
lines changed

common/common.cpp

+13-12
Original file line numberDiff line numberDiff line change
@@ -910,12 +910,13 @@ struct common_init_result common_init_from_params(common_params & params) {
910910
return iparams;
911911
}
912912

913-
int err = llama_control_vector_apply(lctx,
914-
cvec.data.data(),
915-
cvec.data.size(),
916-
cvec.n_embd,
917-
params.control_vector_layer_start,
918-
params.control_vector_layer_end);
913+
int err = llama_apply_adapter_cvec(
914+
lctx,
915+
cvec.data.data(),
916+
cvec.data.size(),
917+
cvec.n_embd,
918+
params.control_vector_layer_start,
919+
params.control_vector_layer_end);
919920
if (err) {
920921
llama_free(lctx);
921922
llama_model_free(model);
@@ -926,8 +927,8 @@ struct common_init_result common_init_from_params(common_params & params) {
926927

927928
// load and optionally apply lora adapters
928929
for (auto & la : params.lora_adapters) {
929-
llama_lora_adapter_ptr lora;
930-
lora.reset(llama_lora_adapter_init(model, la.path.c_str()));
930+
llama_adapter_lora_ptr lora;
931+
lora.reset(llama_adapter_lora_init(model, la.path.c_str()));
931932
if (lora == nullptr) {
932933
LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
933934
llama_free(lctx);
@@ -940,7 +941,7 @@ struct common_init_result common_init_from_params(common_params & params) {
940941
}
941942

942943
if (!params.lora_init_without_apply) {
943-
common_lora_adapters_apply(lctx, params.lora_adapters);
944+
common_set_adapter_lora(lctx, params.lora_adapters);
944945
}
945946

946947
if (params.sampling.ignore_eos && llama_token_eos(vocab) == LLAMA_TOKEN_NULL) {
@@ -1008,11 +1009,11 @@ struct common_init_result common_init_from_params(common_params & params) {
10081009
return iparams;
10091010
}
10101011

1011-
void common_lora_adapters_apply(struct llama_context * ctx, std::vector<common_lora_adapter_info> & lora) {
1012-
llama_lora_adapter_clear(ctx);
1012+
void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora) {
1013+
llama_clear_adapter_lora(ctx);
10131014
for (auto & la : lora) {
10141015
if (la.scale != 0.0f) {
1015-
llama_lora_adapter_set(ctx, la.ptr, la.scale);
1016+
llama_set_adapter_lora(ctx, la.ptr, la.scale);
10161017
}
10171018
}
10181019
}

common/common.h

+6-6
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,11 @@
2424

2525
#define DEFAULT_MODEL_PATH "models/7B/ggml-model-f16.gguf"
2626

27-
struct common_lora_adapter_info {
27+
struct common_adapter_lora_info {
2828
std::string path;
2929
float scale;
3030

31-
struct llama_lora_adapter * ptr;
31+
struct llama_adapter_lora * ptr;
3232
};
3333

3434
using llama_tokens = std::vector<llama_token>;
@@ -246,8 +246,8 @@ struct common_params {
246246
std::vector<std::string> antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts)
247247
std::vector<llama_model_kv_override> kv_overrides;
248248

249-
bool lora_init_without_apply = false; // only load lora to memory, but do not apply it to ctx (user can manually apply lora later using llama_lora_adapter_apply)
250-
std::vector<common_lora_adapter_info> lora_adapters; // lora adapter path with user defined scale
249+
bool lora_init_without_apply = false; // only load lora to memory, but do not apply it to ctx (user can manually apply lora later using llama_adapter_lora_apply)
250+
std::vector<common_adapter_lora_info> lora_adapters; // lora adapter path with user defined scale
251251

252252
std::vector<common_control_vector_load_info> control_vectors; // control vector with user defined scale
253253

@@ -481,7 +481,7 @@ struct common_init_result {
481481
llama_model_ptr model;
482482
llama_context_ptr context;
483483

484-
std::vector<llama_lora_adapter_ptr> lora;
484+
std::vector<llama_adapter_lora_ptr> lora;
485485
};
486486

487487
struct common_init_result common_init_from_params(common_params & params);
@@ -503,7 +503,7 @@ struct llama_model * common_load_model_from_hf(
503503
const struct llama_model_params & params);
504504

505505
// clear LoRA adapters from context, then apply new list of adapters
506-
void common_lora_adapters_apply(struct llama_context * ctx, std::vector<common_lora_adapter_info> & lora);
506+
void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora);
507507

508508
//
509509
// Batch utils

examples/export-lora/export-lora.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ struct lora_merge_ctx {
130130

131131
lora_merge_ctx(
132132
std::string & base_fname,
133-
std::vector<common_lora_adapter_info> & lora_files,
133+
std::vector<common_adapter_lora_info> & lora_files,
134134
std::string & outfile,
135135
int n_threads) : base_model(base_fname, 0), n_threads(n_threads), fout(outfile, std::ios::binary) {
136136
fout.exceptions(std::ofstream::failbit); // fail fast on write errors

examples/server/server.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ struct slot_params {
9898
int64_t t_max_prompt_ms = -1; // TODO: implement
9999
int64_t t_max_predict_ms = -1; // if positive, limit the generation phase to this time limit
100100

101-
std::vector<common_lora_adapter_info> lora;
101+
std::vector<common_adapter_lora_info> lora;
102102

103103
std::vector<std::string> antiprompt;
104104
std::vector<std::string> response_fields;
@@ -198,7 +198,7 @@ struct server_task {
198198
bool metrics_reset_bucket = false;
199199

200200
// used by SERVER_TASK_TYPE_SET_LORA
201-
std::vector<common_lora_adapter_info> set_lora;
201+
std::vector<common_adapter_lora_info> set_lora;
202202

203203
server_task(server_task_type type) : type(type) {}
204204

@@ -1133,7 +1133,7 @@ struct server_slot {
11331133

11341134
common_speculative * spec = nullptr;
11351135

1136-
std::vector<common_lora_adapter_info> lora;
1136+
std::vector<common_adapter_lora_info> lora;
11371137

11381138
// the index relative to completion multi-task request
11391139
size_t index = 0;
@@ -2934,7 +2934,7 @@ struct server_context {
29342934
// make sure we're in the right embedding mode
29352935
llama_set_embeddings(ctx, slot_batched->is_non_causal());
29362936
// apply lora, only need to do it once per batch
2937-
common_lora_adapters_apply(ctx, slot_batched->lora);
2937+
common_set_adapter_lora(ctx, slot_batched->lora);
29382938
}
29392939

29402940
// process the created batch of tokens

examples/server/utils.hpp

+5-5
Original file line numberDiff line numberDiff line change
@@ -804,8 +804,8 @@ static std::vector<llama_token_data> get_token_probabilities(llama_context * ctx
804804
}
805805

806806
static bool are_lora_equal(
807-
const std::vector<common_lora_adapter_info> & l1,
808-
const std::vector<common_lora_adapter_info> & l2) {
807+
const std::vector<common_adapter_lora_info> & l1,
808+
const std::vector<common_adapter_lora_info> & l2) {
809809
if (l1.size() != l2.size()) {
810810
return false;
811811
}
@@ -819,10 +819,10 @@ static bool are_lora_equal(
819819
}
820820

821821
// parse lora config from JSON request, returned a copy of lora_base with updated scale
822-
static std::vector<common_lora_adapter_info> parse_lora_request(
823-
const std::vector<common_lora_adapter_info> & lora_base,
822+
static std::vector<common_adapter_lora_info> parse_lora_request(
823+
const std::vector<common_adapter_lora_info> & lora_base,
824824
const json & data) {
825-
std::vector<common_lora_adapter_info> lora(lora_base);
825+
std::vector<common_adapter_lora_info> lora(lora_base);
826826
int max_idx = lora.size();
827827

828828
// clear existing value

include/llama-cpp.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@ struct llama_sampler_deleter {
2020
void operator()(llama_sampler * sampler) { llama_sampler_free(sampler); }
2121
};
2222

23-
struct llama_lora_adapter_deleter {
24-
void operator()(llama_lora_adapter * lora_adapter) { llama_lora_adapter_free(lora_adapter); }
23+
struct llama_adapter_lora_deleter {
24+
void operator()(llama_adapter_lora * adapter) { llama_adapter_lora_free(adapter); }
2525
};
2626

2727
typedef std::unique_ptr<llama_model, llama_model_deleter> llama_model_ptr;
2828
typedef std::unique_ptr<llama_context, llama_context_deleter> llama_context_ptr;
2929
typedef std::unique_ptr<llama_sampler, llama_sampler_deleter> llama_sampler_ptr;
30-
typedef std::unique_ptr<llama_lora_adapter, llama_lora_adapter_deleter> llama_lora_adapter_ptr;
30+
typedef std::unique_ptr<llama_adapter_lora, llama_adapter_lora_deleter> llama_adapter_lora_ptr;

include/llama.h

+15-20
Original file line numberDiff line numberDiff line change
@@ -385,8 +385,7 @@ extern "C" {
385385
} llama_chat_message;
386386

387387
// lora adapter
388-
// TODO: rename to llama_adapter_lora
389-
struct llama_lora_adapter;
388+
struct llama_adapter_lora;
390389

391390
// Helpers for getting default parameters
392391
// TODO: update API to start accepting pointers to params structs (https://github.com/ggerganov/llama.cpp/discussions/9172)
@@ -520,44 +519,40 @@ extern "C" {
520519
//
521520

522521
// Load a LoRA adapter from file
523-
// TODO: rename to llama_adapter_lora_init
524-
LLAMA_API struct llama_lora_adapter * llama_lora_adapter_init(
522+
LLAMA_API struct llama_adapter_lora * llama_adapter_lora_init(
525523
struct llama_model * model,
526524
const char * path_lora);
527525

526+
// Manually free a LoRA adapter
527+
// Note: loaded adapters will be free when the associated model is deleted
528+
LLAMA_API void llama_adapter_lora_free(struct llama_adapter_lora * adapter);
529+
530+
// The following functions operate on a llama_context, hence the naming: llama_verb_...
531+
528532
// Add a loaded LoRA adapter to given context
529533
// This will not modify model's weight
530-
// TODO: rename to llama_set_adapter_lora
531-
LLAMA_API int32_t llama_lora_adapter_set(
534+
LLAMA_API int32_t llama_set_adapter_lora(
532535
struct llama_context * ctx,
533-
struct llama_lora_adapter * adapter,
536+
struct llama_adapter_lora * adapter,
534537
float scale);
535538

536539
// Remove a specific LoRA adapter from given context
537540
// Return -1 if the adapter is not present in the context
538-
// TODO: rename to llama_rm_adapter_lora
539-
LLAMA_API int32_t llama_lora_adapter_remove(
541+
LLAMA_API int32_t llama_rm_adapter_lora(
540542
struct llama_context * ctx,
541-
struct llama_lora_adapter * adapter);
543+
struct llama_adapter_lora * adapter);
542544

543545
// Remove all LoRA adapters from given context
544-
// TODO: rename to llama_clear_adapter_lora
545-
LLAMA_API void llama_lora_adapter_clear(struct llama_context * ctx);
546-
547-
// Manually free a LoRA adapter
548-
// Note: loaded adapters will be free when the associated model is deleted
549-
// TODO: rename to llama_adapter_lora_free
550-
LLAMA_API void llama_lora_adapter_free(struct llama_lora_adapter * adapter);
546+
LLAMA_API void llama_clear_adapter_lora(struct llama_context * ctx);
551547

552548
// Apply a loaded control vector to a llama_context, or if data is NULL, clear
553549
// the currently loaded vector.
554550
// n_embd should be the size of a single layer's control, and data should point
555551
// to an n_embd x n_layers buffer starting from layer 1.
556552
// il_start and il_end are the layer range the vector should apply to (both inclusive)
557553
// See llama_control_vector_load in common to load a control vector.
558-
// TODO: rename to llama_adapter_cvec_apply
559-
LLAMA_API int32_t llama_control_vector_apply(
560-
struct llama_context * lctx,
554+
LLAMA_API int32_t llama_apply_adapter_cvec(
555+
struct llama_context * ctx,
561556
const float * data,
562557
size_t len,
563558
int32_t n_embd,

0 commit comments

Comments
 (0)