comma

jerryzh168 · jerryzh168 · commit 2b0f78b4cf0e · 2024-11-13T11:06:52.000-08:00
diff --git a/torchao/quantization/__init__.py b/torchao/quantization/__init__.py
@@ -56,9 +56,17 @@
 from .quant_primitives import (
     MappingType,
     ZeroPointDomain,
+    TorchAODType,
     choose_qparams_affine,
-    dequantize_affine,
     quantize_affine,
+    dequantize_affine,
+    choose_qparams_affine_floatx,
+    choose_qparams_affine_with_min_max,
+    quantize_affine_floatx,
+    dequantize_affine_floatx,
+    fake_quantize_affine,
+    fake_quantize_affine_cachemask,
+    choose_qparams_and_quantize_affine_hqq,
 )
 from .smoothquant import (
     SmoothFakeDynamicallyQuantizedLinear,
@@ -74,9 +82,6 @@
     compute_error,
 )
 from .weight_only import WeightOnlyInt8QuantLinear
-from .linear_activation_weight_observed_tensor import (
-    to_linear_activation_weight_observed,
-)
 
 __all__ = [
     # top level API - auto
@@ -94,12 +99,12 @@
     "int8_weight_only",
     "float8_weight_only",
     "float8_dynamic_activation_float8_weight",
-    "float8_static_activation_float8_weight"
+    "float8_static_activation_float8_weight",
     "uintx_weight_only",
     "fpx_weight_only",
 
     # smooth quant - subject to change
-    "swap_conv2d_1x1_to_linear"
+    "swap_conv2d_1x1_to_linear",
     "get_scale",
     "SmoothFakeDynQuantMixin",
     "SmoothFakeDynamicallyQuantizedLinear",
@@ -115,7 +120,7 @@
     "AffineQuantizedObserverBase",
 
     # quant primitive ops
-    "choose_qprams_affine",
+    "choose_qparams_affine",
     "choose_qparams_affine_with_min_max",
     "choose_qparams_affine_floatx",
     "quantize_affine",
diff --git a/torchao/quantization/quant_primitives.py b/torchao/quantization/quant_primitives.py
@@ -10,7 +10,6 @@
 
 import torch
 
-from torchao.kernel.intmm import int_scaled_matmul, safe_int_mm
 from torchao.prototype.custom_fp_utils import (
     _f32_to_floatx_unpacked,
     _floatx_unpacked_to_f32,