Skip to content

Commit 11dc109

Browse files
authored
Honor -ngl option for Cuda offloading in llava (#3621)
1 parent 2a4bcba commit 11dc109

File tree

1 file changed

+7
-1
lines changed

1 file changed

+7
-1
lines changed

examples/llava/llava.cpp

+7-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,13 @@ int main(int argc, char ** argv) {
7979

8080
llama_backend_init(params.numa);
8181

82-
llama_model_params model_params = llama_model_default_params();
82+
llama_model_params model_params = llama_model_default_params();
83+
model_params.n_gpu_layers = params.n_gpu_layers;
84+
model_params.main_gpu = params.main_gpu;
85+
model_params.tensor_split = params.tensor_split;
86+
model_params.use_mmap = params.use_mmap;
87+
model_params.use_mlock = params.use_mlock;
88+
8389
llama_model * model = llama_load_model_from_file(params.model.c_str(), model_params);
8490
if (model == NULL) {
8591
fprintf(stderr , "%s: error: unable to load model\n" , __func__);

0 commit comments

Comments
 (0)