1
1
#pragma once
2
2
3
+ //
4
+ // GGML Tensor Library
5
+ //
6
+ // This documentation is still a work in progress.
7
+ // If you wish some specific topics to be covered, feel free to drop a comment:
8
+ //
9
+ // https://github.com/ggerganov/whisper.cpp/issues/40
10
+ //
11
+ // ## Overview
12
+ //
13
+ // This library implements:
14
+ //
15
+ // - a set of tensor operations
16
+ // - automatic differentiation
17
+ // - basic optimization algorithms
18
+ //
19
+ // The aim of this library is to provide a minimalistic approach for various machine learning tasks. This includes,
20
+ // but is not limited to, the following:
21
+ //
22
+ // - linear regression
23
+ // - support vector machines
24
+ // - neural networks
25
+ //
26
+ // The library allows the user to define a certain function using the available tensor operations. This function
27
+ // definition is represented internally via a computation graph. Each tensor operation in the function definition
28
+ // corresponds to a node in the graph. Having the computation graph defined, the user can choose to compute the
29
+ // function's value and/or its gradient with respect to the input variables. Optionally, the function can be optimized
30
+ // using one of the available optimization algorithms.
31
+ //
32
+ // For example, here we define the function: f(x) = a*x^2 + b
33
+ //
34
+ // {
35
+ // struct ggml_init_params params = {
36
+ // .mem_size = 16*1024*1024,
37
+ // .mem_buffer = NULL,
38
+ // };
39
+ //
40
+ // // memory allocation happens here
41
+ // struct ggml_context * ctx = ggml_init(params);
42
+ //
43
+ // struct ggml_tensor * x = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
44
+ //
45
+ // ggml_set_param(ctx, x); // x is an input variable
46
+ //
47
+ // struct ggml_tensor * a = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
48
+ // struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
49
+ // struct ggml_tensor * x2 = ggml_mul(ctx, x, x);
50
+ // struct ggml_tensor * f = ggml_add(ctx, ggml_mul(ctx, a, x2), b);
51
+ //
52
+ // ...
53
+ // }
54
+ //
55
+ // Notice that the function definition above does not involve any actual computation. The computation is performed only
56
+ // when the user explicitly requests it. For example, to compute the function's value at x = 2.0:
57
+ //
58
+ // {
59
+ // ...
60
+ //
61
+ // struct ggml_cgraph gf = ggml_build_forward(f);
62
+ //
63
+ // // set the input variable and parameter values
64
+ // ggml_set_f32(x, 2.0f);
65
+ // ggml_set_f32(a, 3.0f);
66
+ // ggml_set_f32(b, 4.0f);
67
+ //
68
+ // ggml_graph_compute(ctx0, &gf);
69
+ //
70
+ // printf("f = %f\n", ggml_get_f32_1d(f, 0));
71
+ //
72
+ // ...
73
+ // }
74
+ //
75
+ // The actual computation is performed in the ggml_graph_compute() function.
76
+ //
77
+ // The ggml_new_tensor_...() functions create new tensors. They are allocated in the memory buffer provided to the
78
+ // ggml_init() function. You have to be careful not to exceed the memory buffer size. Therefore, you have to know
79
+ // in advance how much memory you need for your computation. Alternatively, you can allocate a large enough memory
80
+ // and after defining the computation graph, call the ggml_used_mem() function to find out how much memory was
81
+ // actually needed.
82
+ //
83
+ // The ggml_set_param() function marks a tensor as an input variable. This is used by the automatic
84
+ // differentiation and optimization algorithms.
85
+ //
86
+ // The described approach allows to define the function graph once and then compute its forward or backward graphs
87
+ // multiple times. All computations will use the same memory buffer allocated in the ggml_init() function. This way
88
+ // the user can avoid the memory allocation overhead at runtime.
89
+ //
90
+ // The library supports multi-dimensional tensors - up to 4 dimensions. The FP16 and FP32 data types are first class
91
+ // citizens, but in theory the library can be extended to support FP8 and integer data types.
92
+ //
93
+ // Each tensor operation produces a new tensor. Initially the library was envisioned to support only the use of unary
94
+ // and binary operations. Most of the available operations fall into one of these two categories. With time, it became
95
+ // clear that the library needs to support more complex operations. The way to support these operations is not clear
96
+ // yet, but a few examples are demonstrated in the following operations:
97
+ //
98
+ // - ggml_permute()
99
+ // - ggml_conv_1d_1s()
100
+ // - ggml_conv_1d_2s()
101
+ //
102
+ // For each tensor operator, the library implements a forward and backward computation function. The forward function
103
+ // computes the output tensor value given the input tensor values. The backward function computes the adjoint of the
104
+ // input tensors given the adjoint of the output tensor. For a detailed explanation of what this means, take a
105
+ // calculus class, or watch the following video:
106
+ //
107
+ // What is Automatic Differentiation?
108
+ // https://www.youtube.com/watch?v=wG_nF1awSSY
109
+ //
110
+ //
111
+ // ## Tensor data (struct ggml_tensor)
112
+ //
113
+ // The tensors are stored in memory via the ggml_tensor struct. The structure provides information about the size of
114
+ // the tensor, the data type, and the memory buffer where the tensor data is stored. Additionally, it contains
115
+ // pointers to the "source" tensors - i.e. the tensors that were used to compute the current tensor. For example:
116
+ //
117
+ // {
118
+ // struct ggml_tensor * c = ggml_add(ctx, a, b);
119
+ //
120
+ // assert(c->src[0] == a);
121
+ // assert(c->src[1] == b);
122
+ // }
123
+ //
124
+ // The multi-dimensional tensors are stored in row-major order. The ggml_tensor struct contains fields for the
125
+ // number of elements in each dimension ("ne") as well as the number of bytes ("nb", a.k.a. stride). This allows
126
+ // to store tensors that are not contiguous in memory, which is useful for operations such as transposition and
127
+ // permutation. All tensor operations have to take the stride into account and not assume that the tensor is
128
+ // contiguous in memory.
129
+ //
130
+ // The data of the tensor is accessed via the "data" pointer. For example:
131
+ //
132
+ // {
133
+ // struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 2, 3);
134
+ //
135
+ // // a[1, 2] = 1.0f;
136
+ // *(float *) ((char *) a->data + 2*a->nb[1] + 1*a->nb[0]) = 1.0f;
137
+ //
138
+ // // a[2, 0] = 2.0f;
139
+ // *(float *) ((char *) a->data + 0*a->nb[1] + 2*a->nb[0]) = 2.0f;
140
+ //
141
+ // ...
142
+ // }
143
+ //
144
+ // Alternatively, there are helper functions, such as ggml_get_f32_1d() and ggml_set_f32_1d() that can be used.
145
+ //
146
+ // ## The matrix multiplication operator (ggml_mul_mat)
147
+ //
148
+ // TODO
149
+ //
150
+ //
151
+ // ## Multi-threading
152
+ //
153
+ // TODO
154
+ //
155
+ //
156
+ // ## Overview of ggml.c
157
+ //
158
+ // TODO
159
+ //
160
+ //
161
+ // ## SIMD optimizations
162
+ //
163
+ // TODO
164
+ //
165
+ //
166
+ // ## Debugging ggml
167
+ //
168
+ // TODO
169
+ //
170
+ //
171
+
3
172
#ifdef __cplusplus
4
173
extern "C" {
5
174
#endif
@@ -21,7 +190,8 @@ typedef __fp16 ggml_fp16_t;
21
190
typedef uint16_t ggml_fp16_t ;
22
191
#endif
23
192
24
- float ggml_fp16_to_fp32 (ggml_fp16_t x );
193
+ // convert FP16 <-> FP32
194
+ float ggml_fp16_to_fp32 (ggml_fp16_t x );
25
195
ggml_fp16_t ggml_fp32_to_fp16 (float x );
26
196
27
197
struct ggml_object ;
@@ -36,6 +206,7 @@ enum ggml_type {
36
206
GGML_TYPE_COUNT ,
37
207
};
38
208
209
+ // available tensor operations:
39
210
enum ggml_op {
40
211
GGML_OP_NONE = 0 ,
41
212
@@ -136,7 +307,7 @@ struct ggml_init_params {
136
307
void * mem_buffer ; // if NULL, memory will be allocated internally
137
308
};
138
309
139
- void ggml_time_init (void );
310
+ void ggml_time_init (void ); // call this once at the beginning of the program
140
311
int64_t ggml_time_ms (void );
141
312
int64_t ggml_time_us (void );
142
313
int64_t ggml_cycles (void );
0 commit comments