Skip to content

Commit 94d1b3b

Browse files
authored
use _wfopen instead of fopen on Windows (#6248)
also fix missing #defines before windows.h, and BPE LF token on MSVC
1 parent 9556217 commit 94d1b3b

File tree

3 files changed

+69
-9
lines changed

3 files changed

+69
-9
lines changed

ggml.c

+61-5
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
#include "ggml-impl.h"
55
#include "ggml-quants.h"
6+
#include "ggml.h"
67

78
#if defined(_MSC_VER) || defined(__MINGW32__)
89
#include <malloc.h> // using malloc.h with MSC/MINGW
@@ -43,6 +44,10 @@
4344

4445
#if defined(_WIN32)
4546

47+
#define WIN32_LEAN_AND_MEAN
48+
#ifndef NOMINMAX
49+
#define NOMINMAX
50+
#endif
4651
#include <windows.h>
4752

4853
typedef volatile LONG atomic_int;
@@ -430,6 +435,57 @@ int64_t ggml_cycles_per_ms(void) {
430435
#define ggml_perf_cycles_per_ms() 0
431436
#endif
432437

438+
//
439+
// cross-platform UTF-8 file paths
440+
//
441+
442+
#ifdef _WIN32
443+
static wchar_t * ggml_mbstowcs(const char * mbs) {
444+
int wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, NULL, 0);
445+
if (!wlen) {
446+
errno = EINVAL;
447+
return NULL;
448+
}
449+
450+
wchar_t * wbuf = GGML_MALLOC(wlen * sizeof(wchar_t));
451+
wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, wbuf, wlen);
452+
if (!wlen) {
453+
GGML_FREE(wbuf);
454+
errno = EINVAL;
455+
return NULL;
456+
}
457+
458+
return wbuf;
459+
}
460+
#endif
461+
462+
FILE * ggml_fopen(const char * fname, const char * mode) {
463+
#ifdef _WIN32
464+
FILE * file = NULL;
465+
466+
// convert fname (UTF-8)
467+
wchar_t * wfname = ggml_mbstowcs(fname);
468+
if (wfname) {
469+
// convert mode (ANSI)
470+
wchar_t * wmode = GGML_MALLOC(strlen(mode) + 1);
471+
wchar_t * wmode_p = wmode;
472+
do {
473+
*wmode_p++ = (wchar_t)*mode;
474+
} while (*mode++);
475+
476+
// open file
477+
file = _wfopen(wfname, wmode);
478+
479+
GGML_FREE(wfname);
480+
GGML_FREE(wmode);
481+
}
482+
483+
return file;
484+
#else
485+
return fopen(fname, mode);
486+
#endif
487+
}
488+
433489
//
434490
// cache line
435491
//
@@ -18739,7 +18795,7 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
1873918795

1874018796
// write binary data
1874118797
{
18742-
FILE * fout = fopen(fname, "wb");
18798+
FILE * fout = ggml_fopen(fname, "wb");
1874318799

1874418800
if (!fout) {
1874518801
fprintf(stderr, "%s: failed to open %s\n", __func__, fname);
@@ -18877,7 +18933,7 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
1887718933

1887818934
// read file into data
1887918935
{
18880-
FILE * fin = fopen(fname, "rb");
18936+
FILE * fin = ggml_fopen(fname, "rb");
1888118937
if (!fin) {
1888218938
fprintf(stderr, "%s: failed to open %s\n", __func__, fname);
1888318939
return result;
@@ -19213,7 +19269,7 @@ static void ggml_graph_dump_dot_leaf_edge(FILE * fp, struct ggml_tensor * node,
1921319269
void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename) {
1921419270
char color[16];
1921519271

19216-
FILE * fp = fopen(filename, "w");
19272+
FILE * fp = ggml_fopen(filename, "w");
1921719273
GGML_ASSERT(fp);
1921819274

1921919275
fprintf(fp, "digraph G {\n");
@@ -20531,7 +20587,7 @@ struct gguf_context * gguf_init_empty(void) {
2053120587
}
2053220588

2053320589
struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) {
20534-
FILE * file = fopen(fname, "rb");
20590+
FILE * file = ggml_fopen(fname, "rb");
2053520591
if (!file) {
2053620592
return NULL;
2053720593
}
@@ -21486,7 +21542,7 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
2148621542
}
2148721543

2148821544
void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) {
21489-
FILE * file = fopen(fname, "wb");
21545+
FILE * file = ggml_fopen(fname, "wb");
2149021546
if (!file) {
2149121547
GGML_ASSERT(false && "failed to open file for writing");
2149221548
}

ggml.h

+6-2
Original file line numberDiff line numberDiff line change
@@ -214,9 +214,10 @@
214214
# define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
215215
#endif
216216

217-
#include <stdint.h>
218-
#include <stddef.h>
219217
#include <stdbool.h>
218+
#include <stddef.h>
219+
#include <stdint.h>
220+
#include <stdio.h>
220221

221222
#define GGML_FILE_MAGIC 0x67676d6c // "ggml"
222223
#define GGML_FILE_VERSION 1
@@ -708,6 +709,9 @@ extern "C" {
708709

709710
GGML_API void ggml_print_backtrace(void);
710711

712+
// accepts a UTF-8 path, even on Windows
713+
GGML_API FILE * ggml_fopen(const char * fname, const char * mode);
714+
711715
GGML_API void ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems
712716
GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
713717

llama.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -1065,7 +1065,7 @@ struct llama_file {
10651065
size_t size;
10661066

10671067
llama_file(const char * fname, const char * mode) {
1068-
fp = std::fopen(fname, mode);
1068+
fp = ggml_fopen(fname, mode);
10691069
if (fp == NULL) {
10701070
throw std::runtime_error(format("failed to open %s: %s", fname, strerror(errno)));
10711071
}
@@ -4006,7 +4006,7 @@ static void llm_load_vocab(
40064006
} else if (vocab.type == LLAMA_VOCAB_TYPE_WPM) {
40074007
vocab.linefeed_id = vocab.special_pad_id;
40084008
} else {
4009-
const std::vector<int> ids = llama_tokenize_internal(vocab, "\u010A", false);
4009+
const std::vector<int> ids = llama_tokenize_internal(vocab, "\xC4\x8A", false); // U+010A
40104010
GGML_ASSERT(!ids.empty() && "model vocab missing newline token");
40114011
vocab.linefeed_id = ids[0];
40124012
}

0 commit comments

Comments
 (0)