Skip to content

Commit fb79973

Browse files
ggerganovtybalex
authored andcommitted
ggml : fix llamafile sgemm wdata offsets (ggml-org#6710)
ggml-ci
1 parent 96a05b5 commit fb79973

File tree

3 files changed

+12
-7
lines changed

3 files changed

+12
-7
lines changed

CMakeLists.txt

+6
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ endif()
8888
# 3rd party libs
8989
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
9090
option(LLAMA_BLAS "llama: use BLAS" OFF)
91+
option(LLAMA_LLAMAFILE "llama: use llamafile SGEMM" ON)
9192
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
9293
option(LLAMA_CUDA "llama: use CUDA" OFF)
9394
option(LLAMA_CUBLAS "llama: use CUDA (deprecated, use LLAMA_CUDA)" OFF)
@@ -286,6 +287,7 @@ if (LLAMA_METAL)
286287
${METALKIT_FRAMEWORK}
287288
)
288289
endif()
290+
289291
if (LLAMA_BLAS)
290292
if (LLAMA_STATIC)
291293
set(BLA_STATIC ON)
@@ -368,6 +370,10 @@ if (LLAMA_BLAS)
368370
endif()
369371
endif()
370372

373+
if (LLAMA_LLAMAFILE)
374+
add_compile_definitions(GGML_USE_LLAMAFILE)
375+
endif()
376+
371377
if (LLAMA_QKK_64)
372378
add_compile_definitions(GGML_QKK_64)
373379
endif()

Makefile

+2
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,8 @@ endif # LLAMA_DISABLE_LOGS
222222
# disable ggml.c's use of sgemm.cpp
223223
ifdef LLAMA_NO_LLAMAFILE
224224
MK_CPPFLAGS += -DGGML_USE_LLAMAFILE=0
225+
else
226+
MK_CPPFLAGS += -DGGML_USE_LLAMAFILE=1
225227
endif
226228

227229
# warnings

ggml.c

+4-7
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,8 @@
3333
#include <unistd.h>
3434
#endif
3535

36-
#ifndef GGML_USE_LLAMAFILE
3736
#ifdef __ARM_FEATURE_MATMUL_INT8
38-
#define GGML_USE_LLAMAFILE 0
39-
#else
40-
#define GGML_USE_LLAMAFILE 1
41-
#endif
37+
#undef GGML_USE_LLAMAFILE
4238
#endif
4339

4440
#if defined(_MSC_VER)
@@ -10879,8 +10875,9 @@ UseGgmlGemm1:;
1087910875
if (!llamafile_sgemm(ne01, ne11, ne00/ggml_blck_size(src0->type),
1088010876
(const char *)src0->data + i12/r2*nb02 + i13/r3*nb03,
1088110877
nb01/ggml_type_size(src0->type),
10882-
(const char *)wdata + (nb12/ggml_type_size(src1->type)*ggml_type_size(vec_dot_type)*i12 +
10883-
nb13/ggml_type_size(src1->type)*ggml_type_size(vec_dot_type)*i13),
10878+
(const char *)wdata + ggml_row_size(vec_dot_type,
10879+
nb12/ggml_type_size(src1->type)*i12 +
10880+
nb13/ggml_type_size(src1->type)*i13),
1088410881
row_size/ggml_type_size(vec_dot_type),
1088510882
(char *)dst->data + i12*nb2 + i13*nb3,
1088610883
nb1/ggml_type_size(dst->type),

0 commit comments

Comments
 (0)