|
56 | 56 | from .quant_primitives import (
|
57 | 57 | MappingType,
|
58 | 58 | ZeroPointDomain,
|
| 59 | + TorchAODType, |
59 | 60 | choose_qparams_affine,
|
60 |
| - dequantize_affine, |
61 | 61 | quantize_affine,
|
| 62 | + dequantize_affine, |
| 63 | + choose_qparams_affine_floatx, |
| 64 | + choose_qparams_affine_with_min_max, |
| 65 | + quantize_affine_floatx, |
| 66 | + dequantize_affine_floatx, |
| 67 | + fake_quantize_affine, |
| 68 | + fake_quantize_affine_cachemask, |
| 69 | + choose_qparams_and_quantize_affine_hqq, |
62 | 70 | )
|
63 | 71 | from .smoothquant import (
|
64 | 72 | SmoothFakeDynamicallyQuantizedLinear,
|
|
74 | 82 | compute_error,
|
75 | 83 | )
|
76 | 84 | from .weight_only import WeightOnlyInt8QuantLinear
|
77 |
| -from .linear_activation_weight_observed_tensor import ( |
78 |
| - to_linear_activation_weight_observed, |
79 |
| -) |
80 | 85 |
|
81 | 86 | __all__ = [
|
82 | 87 | # top level API - auto
|
|
94 | 99 | "int8_weight_only",
|
95 | 100 | "float8_weight_only",
|
96 | 101 | "float8_dynamic_activation_float8_weight",
|
97 |
| - "float8_static_activation_float8_weight" |
| 102 | + "float8_static_activation_float8_weight", |
98 | 103 | "uintx_weight_only",
|
99 | 104 | "fpx_weight_only",
|
100 | 105 |
|
101 | 106 | # smooth quant - subject to change
|
102 |
| - "swap_conv2d_1x1_to_linear" |
| 107 | + "swap_conv2d_1x1_to_linear", |
103 | 108 | "get_scale",
|
104 | 109 | "SmoothFakeDynQuantMixin",
|
105 | 110 | "SmoothFakeDynamicallyQuantizedLinear",
|
|
115 | 120 | "AffineQuantizedObserverBase",
|
116 | 121 |
|
117 | 122 | # quant primitive ops
|
118 |
| - "choose_qprams_affine", |
| 123 | + "choose_qparams_affine", |
119 | 124 | "choose_qparams_affine_with_min_max",
|
120 | 125 | "choose_qparams_affine_floatx",
|
121 | 126 | "quantize_affine",
|
|
0 commit comments