@@ -40,6 +40,15 @@ export enum GGMLFileQuantizationType {
40
40
Q4_0_8_8 = 35 ,
41
41
TQ1_0 = 36 ,
42
42
TQ2_0 = 37 ,
43
+
44
+ // custom quants used by unsloth
45
+ // they are not officially a scheme enum value in GGUF, but only here for naming
46
+ Q2_K_XL = 1000 ,
47
+ Q3_K_XL = 1001 ,
48
+ Q4_K_XL = 1002 ,
49
+ Q5_K_XL = 1003 ,
50
+ Q6_K_XL = 1004 ,
51
+ Q8_K_XL = 1005 ,
43
52
}
44
53
45
54
const ggufQuants = Object . values ( GGMLFileQuantizationType ) . filter ( ( v ) : v is string => typeof v === "string" ) ;
@@ -58,32 +67,36 @@ export const GGUF_QUANT_ORDER: GGMLFileQuantizationType[] = [
58
67
GGMLFileQuantizationType . F32 ,
59
68
GGMLFileQuantizationType . BF16 ,
60
69
GGMLFileQuantizationType . F16 ,
70
+ GGMLFileQuantizationType . Q8_K_XL ,
61
71
GGMLFileQuantizationType . Q8_0 ,
62
72
63
73
// 6-bit quantizations
74
+ GGMLFileQuantizationType . Q6_K_XL ,
64
75
GGMLFileQuantizationType . Q6_K ,
65
76
66
77
// 5-bit quantizations
67
- GGMLFileQuantizationType . Q5_0 ,
68
- GGMLFileQuantizationType . Q5_1 ,
69
78
GGMLFileQuantizationType . Q5_K_M ,
70
79
GGMLFileQuantizationType . Q5_K_S ,
80
+ GGMLFileQuantizationType . Q5_0 ,
81
+ GGMLFileQuantizationType . Q5_1 ,
71
82
72
83
// 4-bit quantizations
84
+ GGMLFileQuantizationType . Q4_K_XL ,
73
85
GGMLFileQuantizationType . Q4_K_M ,
74
86
GGMLFileQuantizationType . Q4_K_S ,
75
87
GGMLFileQuantizationType . IQ4_NL ,
76
88
GGMLFileQuantizationType . IQ4_XS ,
77
89
GGMLFileQuantizationType . Q4_0_4_4 ,
78
90
GGMLFileQuantizationType . Q4_0_4_8 ,
79
91
GGMLFileQuantizationType . Q4_0_8_8 ,
80
- GGMLFileQuantizationType . Q4_0 ,
81
92
GGMLFileQuantizationType . Q4_1_SOME_F16 ,
93
+ GGMLFileQuantizationType . Q4_0 ,
82
94
GGMLFileQuantizationType . Q4_1 ,
83
95
GGMLFileQuantizationType . Q4_2 ,
84
96
GGMLFileQuantizationType . Q4_3 ,
85
97
86
98
// 3-bit quantizations
99
+ GGMLFileQuantizationType . Q3_K_XL ,
87
100
GGMLFileQuantizationType . Q3_K_L ,
88
101
GGMLFileQuantizationType . Q3_K_M ,
89
102
GGMLFileQuantizationType . Q3_K_S ,
@@ -93,6 +106,7 @@ export const GGUF_QUANT_ORDER: GGMLFileQuantizationType[] = [
93
106
GGMLFileQuantizationType . IQ3_XXS ,
94
107
95
108
// 2-bit quantizations
109
+ GGMLFileQuantizationType . Q2_K_XL ,
96
110
GGMLFileQuantizationType . Q2_K ,
97
111
GGMLFileQuantizationType . Q2_K_S ,
98
112
GGMLFileQuantizationType . IQ2_M ,
0 commit comments