Skip to content

Commit 7dc218e

Browse files
committed
gguf: add custom Q_K_XL quants
1 parent 29150f4 commit 7dc218e

File tree

1 file changed

+17
-3
lines changed

1 file changed

+17
-3
lines changed

packages/tasks/src/gguf.ts

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,15 @@ export enum GGMLFileQuantizationType {
4040
Q4_0_8_8 = 35,
4141
TQ1_0 = 36,
4242
TQ2_0 = 37,
43+
44+
// custom quants used by unsloth
45+
// they are not officially a scheme enum value in GGUF, but only here for naming
46+
Q2_K_XL = 1000,
47+
Q3_K_XL = 1001,
48+
Q4_K_XL = 1002,
49+
Q5_K_XL = 1003,
50+
Q6_K_XL = 1004,
51+
Q8_K_XL = 1005,
4352
}
4453

4554
const ggufQuants = Object.values(GGMLFileQuantizationType).filter((v): v is string => typeof v === "string");
@@ -58,32 +67,36 @@ export const GGUF_QUANT_ORDER: GGMLFileQuantizationType[] = [
5867
GGMLFileQuantizationType.F32,
5968
GGMLFileQuantizationType.BF16,
6069
GGMLFileQuantizationType.F16,
70+
GGMLFileQuantizationType.Q8_K_XL,
6171
GGMLFileQuantizationType.Q8_0,
6272

6373
// 6-bit quantizations
74+
GGMLFileQuantizationType.Q6_K_XL,
6475
GGMLFileQuantizationType.Q6_K,
6576

6677
// 5-bit quantizations
67-
GGMLFileQuantizationType.Q5_0,
68-
GGMLFileQuantizationType.Q5_1,
6978
GGMLFileQuantizationType.Q5_K_M,
7079
GGMLFileQuantizationType.Q5_K_S,
80+
GGMLFileQuantizationType.Q5_0,
81+
GGMLFileQuantizationType.Q5_1,
7182

7283
// 4-bit quantizations
84+
GGMLFileQuantizationType.Q4_K_XL,
7385
GGMLFileQuantizationType.Q4_K_M,
7486
GGMLFileQuantizationType.Q4_K_S,
7587
GGMLFileQuantizationType.IQ4_NL,
7688
GGMLFileQuantizationType.IQ4_XS,
7789
GGMLFileQuantizationType.Q4_0_4_4,
7890
GGMLFileQuantizationType.Q4_0_4_8,
7991
GGMLFileQuantizationType.Q4_0_8_8,
80-
GGMLFileQuantizationType.Q4_0,
8192
GGMLFileQuantizationType.Q4_1_SOME_F16,
93+
GGMLFileQuantizationType.Q4_0,
8294
GGMLFileQuantizationType.Q4_1,
8395
GGMLFileQuantizationType.Q4_2,
8496
GGMLFileQuantizationType.Q4_3,
8597

8698
// 3-bit quantizations
99+
GGMLFileQuantizationType.Q3_K_XL,
87100
GGMLFileQuantizationType.Q3_K_L,
88101
GGMLFileQuantizationType.Q3_K_M,
89102
GGMLFileQuantizationType.Q3_K_S,
@@ -93,6 +106,7 @@ export const GGUF_QUANT_ORDER: GGMLFileQuantizationType[] = [
93106
GGMLFileQuantizationType.IQ3_XXS,
94107

95108
// 2-bit quantizations
109+
GGMLFileQuantizationType.Q2_K_XL,
96110
GGMLFileQuantizationType.Q2_K,
97111
GGMLFileQuantizationType.Q2_K_S,
98112
GGMLFileQuantizationType.IQ2_M,

0 commit comments

Comments
 (0)