Update torchao after pytorch/pytorch#129940

jerryzh168 · jerryzh168 · commit bd4b1daa525d · 2024-07-15T16:15:36.000-07:00
Summary: Fixes torchao code after the bc breaking change in pytorch/pytorch#129940 Test Plan: python test/quantization/test_quant_api.py -k test_quantized_tensor_subclass_int4 python test/integration/test_integration.py -k test_save_load_int4woqtensors_2_cpu Reviewers: Subscribers: Tasks: Tags:
diff --git a/torchao/dtypes/affine_quantized_tensor.py b/torchao/dtypes/affine_quantized_tensor.py
@@ -461,7 +461,8 @@ def __tensor_unflatten__(
 
     @classmethod
     def from_plain(cls, int_data, scale, zero_point, inner_k_tiles=8):
-        packed_weight = torch.ops.aten._convert_weight_to_int4pack(int_data.to(torch.int32), inner_k_tiles)
+        assert int_data.dtype == torch.uint8, "torch.ops.aten._convert_weight_to_int4pack expects `uint8` dtype"
+        packed_weight = torch.ops.aten._convert_weight_to_int4pack(int_data, inner_k_tiles)
         scale = scale.reshape(int_data.shape[0], -1)
         zero_point = zero_point.reshape(int_data.shape[0], -1)
         scale_and_zero = pack_tinygemm_scales_and_zeros(scale, zero_point)
diff --git a/torchao/quantization/quant_api.py b/torchao/quantization/quant_api.py
@@ -383,7 +383,7 @@ def apply_int4_weight_only_quant(weight):
 
         mapping_type = MappingType.ASYMMETRIC
         block_size = (1, group_size)
-        target_dtype = torch.int32
+        target_dtype = torch.uint8
         quant_min = 0
         quant_max = 15
         eps = 1e-6
diff --git a/torchao/quantization/utils.py b/torchao/quantization/utils.py
@@ -344,11 +344,13 @@ def groupwise_affine_quantize_tensor_from_qparams(
     assert w.dim() == 2
 
     block_size = (1, groupsize)
-    output_dtype = torch.int32
+    output_dtype = torch.uint8
     quant_min = 0
     quant_max = 2 ** n_bit - 1
 
-    return quantize_affine(w, block_size, scales, zeros, output_dtype, quant_min, quant_max, zero_point_domain = ZeroPointDomain.FLOAT)
+    int_data = quantize_affine(w, block_size, scales, zeros, output_dtype, quant_min, quant_max, zero_point_domain = ZeroPointDomain.FLOAT)
+    int_data = (int_data[::, ::2] << 4 | int_data[::, 1::2]).to(torch.uint8)
+    return int_data
 
 def groupwise_affine_dequantize_tensor_from_qparams(
     w_int4x8,