Skip to content

Commit 0190b62

Browse files
slarenpockers21
authored and
pockers21
committed
ggml : add SSE 4.2 and x64 base variant for CPUs without AVX (ggml-org#12871)
* ggml : add SSE 4.2 variant for CPUs without AVX * ggml : add x64 base ABI variant
1 parent 0da1f2c commit 0190b62

File tree

4 files changed

+16
-10
lines changed

4 files changed

+16
-10
lines changed

ggml/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ message(DEBUG "INS_ENB : ${INS_ENB}")
107107
option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
108108
option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
109109
option(GGML_CPU_KLEIDIAI "ggml: use KleidiAI optimized kernels if applicable" OFF)
110+
option(GGML_SSE42 "ggml: enable SSE 4.2" ${INS_ENB})
110111
option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
111112
option(GGML_AVX_VNNI "ggml: enable AVX-VNNI" OFF)
112113
option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})

ggml/src/CMakeLists.txt

+9-6
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,7 @@ function(ggml_add_cpu_backend_variant tag_name)
267267
set(GGML_CPU_TAG_NAME ${tag_name})
268268
# other: OPENMP LLAMAFILE CPU_HBM
269269
foreach (feat NATIVE
270+
SSE42
270271
AVX AVX2 BMI2 AVX_VNNI FMA F16C
271272
AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
272273
AMX_TILE AMX_INT8 AMX_BF16)
@@ -286,14 +287,16 @@ if (GGML_CPU_ALL_VARIANTS)
286287
if (NOT GGML_BACKEND_DL)
287288
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
288289
endif()
289-
ggml_add_cpu_backend_variant(sandybridge AVX)
290-
ggml_add_cpu_backend_variant(haswell AVX F16C AVX2 BMI2 FMA)
291-
ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 BMI2 FMA AVX512)
292-
ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
293-
ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 BMI2 FMA AVX_VNNI)
290+
ggml_add_cpu_backend_variant(x64)
291+
ggml_add_cpu_backend_variant(sse42 SSE42)
292+
ggml_add_cpu_backend_variant(sandybridge SSE42 AVX)
293+
ggml_add_cpu_backend_variant(haswell SSE42 AVX F16C AVX2 BMI2 FMA)
294+
ggml_add_cpu_backend_variant(skylakex SSE42 AVX F16C AVX2 BMI2 FMA AVX512)
295+
ggml_add_cpu_backend_variant(icelake SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
296+
ggml_add_cpu_backend_variant(alderlake SSE42 AVX F16C AVX2 BMI2 FMA AVX_VNNI)
294297
if (NOT MSVC)
295298
# MSVC doesn't support AMX
296-
ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
299+
ggml_add_cpu_backend_variant(sapphirerapids SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
297300
endif()
298301
elseif (GGML_CPU)
299302
ggml_add_cpu_backend_variant_impl("")

ggml/src/ggml-cpu/CMakeLists.txt

+5-3
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
222222
elseif (GGML_AVX)
223223
list(APPEND ARCH_FLAGS /arch:AVX)
224224
list(APPEND ARCH_DEFINITIONS GGML_AVX)
225-
else ()
225+
elseif (GGML_SSE42)
226226
list(APPEND ARCH_FLAGS /arch:SSE4.2)
227227
list(APPEND ARCH_DEFINITIONS GGML_SSE42)
228228
endif()
@@ -237,8 +237,10 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
237237
if (GGML_NATIVE)
238238
list(APPEND ARCH_FLAGS -march=native)
239239
else ()
240-
list(APPEND ARCH_FLAGS -msse4.2)
241-
list(APPEND ARCH_DEFINITIONS GGML_SSE42)
240+
if (GGML_SSE42)
241+
list(APPEND ARCH_FLAGS -msse4.2)
242+
list(APPEND ARCH_DEFINITIONS GGML_SSE42)
243+
endif()
242244
if (GGML_F16C)
243245
list(APPEND ARCH_FLAGS -mf16c)
244246
list(APPEND ARCH_DEFINITIONS GGML_F16C)

ggml/src/ggml-cpu/cpu-feats-x86.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ void test_x86_is() {
263263
static int ggml_backend_cpu_x86_score() {
264264
// FIXME: this does not check for OS support
265265

266-
int score = 0;
266+
int score = 1;
267267
cpuid_x86 is;
268268

269269
#ifdef GGML_FMA

0 commit comments

Comments
 (0)