ggml-org
diff --git a/‎examples/finetune/finetune.cpp
+1-1 b/‎examples/finetune/finetune.cpp
+1-1
diff --git a/‎examples/quantize/quantize.cpp
+2-1 b/‎examples/quantize/quantize.cpp
+2-1
diff --git a/‎ggml-impl.h
+3 b/‎ggml-impl.h
+3
@@ -575,7 +575,7 @@ static struct ggml_tensor * llama_build_lora_finetune_graphs(
     GGML_ASSERT(tokens_input->type == GGML_TYPE_I32);
 
     auto add_to_f32 = [] (struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) {
-        if (ggml_is_quantized(a->type) || a->type == GGML_TYPE_F16) {
+        if (ggml_is_quantized(a->type) || a->type == GGML_TYPE_F16 || a->type == GGML_TYPE_BF16) {
             return ggml_add_cast(ctx, a, b, GGML_TYPE_F32);
         } else if (a->type == GGML_TYPE_F32) {
             return ggml_add(ctx, a, b);
 
@@ -47,7 +47,8 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
     { "Q5_K_M", LLAMA_FTYPE_MOSTLY_Q5_K_M, " 4.45G, +0.0122 ppl @ LLaMA-v1-7B", },
     { "Q6_K",   LLAMA_FTYPE_MOSTLY_Q6_K,   " 5.15G, +0.0008 ppl @ LLaMA-v1-7B", },
     { "Q8_0",   LLAMA_FTYPE_MOSTLY_Q8_0,   " 6.70G, +0.0004 ppl @ LLaMA-v1-7B", },
-    { "F16",    LLAMA_FTYPE_MOSTLY_F16,    "13.00G              @ 7B", },
+    { "F16",    LLAMA_FTYPE_MOSTLY_F16,    "14.00G, -0.0020 ppl @ Mistral-7B", },
+    { "BF16",   LLAMA_FTYPE_MOSTLY_BF16,   "14.00G, -0.0050 ppl @ Mistral-7B", },
     { "F32",    LLAMA_FTYPE_ALL_F32,       "26.00G              @ 7B", },
     // Note: Ensure COPY comes after F32 to avoid ftype 0 from matching.
     { "COPY",   LLAMA_FTYPE_ALL_F32,       "only copy tensors, no quantizing", },
 
@@ -260,6 +260,9 @@ size_t ggml_hash_insert        (      struct ggml_hash_set hash_set, struct ggml
 // return index, asserts if table is full
 size_t ggml_hash_find_or_insert(      struct ggml_hash_set hash_set, struct ggml_tensor * key);
 
+#define GGML_FP32_TO_BF16(x) ggml_fp32_to_bf16(x)
+#define GGML_BF16_TO_FP32(x) ggml_bf16_to_fp32(x)
+
 #ifdef __cplusplus
 }
 #endif
Original file line number	Diff line number	Diff line change
`@@ -260,6 +260,9 @@ size_t ggml_hash_insert ( struct ggml_hash_set hash_set, struct ggml`
`260`	`260`	`// return index, asserts if table is full`
`261`	`261`	`size_t ggml_hash_find_or_insert( struct ggml_hash_set hash_set, struct ggml_tensor * key);`
`262`	`262`
	`263`	`+#define GGML_FP32_TO_BF16(x) ggml_fp32_to_bf16(x)`
	`264`	`+#define GGML_BF16_TO_FP32(x) ggml_bf16_to_fp32(x)`
	`265`	`+`
`263`	`266`	`#ifdef __cplusplus`
`264`	`267`	`}`
`265`	`268`	`#endif`