File tree 3 files changed +12
-7
lines changed
3 files changed +12
-7
lines changed Original file line number Diff line number Diff line change @@ -88,6 +88,7 @@ endif()
88
88
# 3rd party libs
89
89
option (LLAMA_ACCELERATE "llama: enable Accelerate framework" ON )
90
90
option (LLAMA_BLAS "llama: use BLAS" OFF )
91
+ option (LLAMA_LLAMAFILE "llama: use llamafile SGEMM" ON )
91
92
set (LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor" )
92
93
option (LLAMA_CUDA "llama: use CUDA" OFF )
93
94
option (LLAMA_CUBLAS "llama: use CUDA (deprecated, use LLAMA_CUDA)" OFF )
@@ -286,6 +287,7 @@ if (LLAMA_METAL)
286
287
${METALKIT_FRAMEWORK}
287
288
)
288
289
endif ()
290
+
289
291
if (LLAMA_BLAS)
290
292
if (LLAMA_STATIC)
291
293
set (BLA_STATIC ON )
@@ -368,6 +370,10 @@ if (LLAMA_BLAS)
368
370
endif ()
369
371
endif ()
370
372
373
+ if (LLAMA_LLAMAFILE)
374
+ add_compile_definitions (GGML_USE_LLAMAFILE)
375
+ endif ()
376
+
371
377
if (LLAMA_QKK_64)
372
378
add_compile_definitions (GGML_QKK_64)
373
379
endif ()
Original file line number Diff line number Diff line change @@ -222,6 +222,8 @@ endif # LLAMA_DISABLE_LOGS
222
222
# disable ggml.c's use of sgemm.cpp
223
223
ifdef LLAMA_NO_LLAMAFILE
224
224
MK_CPPFLAGS += -DGGML_USE_LLAMAFILE=0
225
+ else
226
+ MK_CPPFLAGS += -DGGML_USE_LLAMAFILE=1
225
227
endif
226
228
227
229
# warnings
Original file line number Diff line number Diff line change 33
33
#include <unistd.h>
34
34
#endif
35
35
36
- #ifndef GGML_USE_LLAMAFILE
37
36
#ifdef __ARM_FEATURE_MATMUL_INT8
38
- #define GGML_USE_LLAMAFILE 0
39
- #else
40
- #define GGML_USE_LLAMAFILE 1
41
- #endif
37
+ #undef GGML_USE_LLAMAFILE
42
38
#endif
43
39
44
40
#if defined(_MSC_VER)
@@ -10879,8 +10875,9 @@ UseGgmlGemm1:;
10879
10875
if (!llamafile_sgemm(ne01, ne11, ne00/ggml_blck_size(src0->type),
10880
10876
(const char *)src0->data + i12/r2*nb02 + i13/r3*nb03,
10881
10877
nb01/ggml_type_size(src0->type),
10882
- (const char *)wdata + (nb12/ggml_type_size(src1->type)*ggml_type_size(vec_dot_type)*i12 +
10883
- nb13/ggml_type_size(src1->type)*ggml_type_size(vec_dot_type)*i13),
10878
+ (const char *)wdata + ggml_row_size(vec_dot_type,
10879
+ nb12/ggml_type_size(src1->type)*i12 +
10880
+ nb13/ggml_type_size(src1->type)*i13),
10884
10881
row_size/ggml_type_size(vec_dot_type),
10885
10882
(char *)dst->data + i12*nb2 + i13*nb3,
10886
10883
nb1/ggml_type_size(dst->type),
You can’t perform that action at this time.
0 commit comments