Skip to content

Commit 5e7f05b

Browse files
committed
[Bugfix] Temporarily disable gptq_bitblas on ROCm
Signed-off-by: Yan Cangang <nalanzeyu@gmail.com>
1 parent a39203f commit 5e7f05b

File tree

1 file changed

+5
-0
lines changed

1 file changed

+5
-0
lines changed

vllm/model_executor/layers/quantization/gptq_bitblas.py

+5
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
PackedColumnParameter,
2525
PackedvLLMParameter,
2626
RowvLLMParameter)
27+
from vllm.platforms import current_platform
2728
from vllm.scalar_type import scalar_types
2829

2930
logger = init_logger(__name__)
@@ -190,6 +191,10 @@ def is_gptq_bitblas_compatible(cls, quant_config: Dict[str, Any]):
190191
sym = quant_config.get("sym")
191192
desc_act = quant_config.get("desc_act")
192193

194+
# temporarily disable on ROCm platform
195+
if not current_platform.is_cuda():
196+
return False
197+
193198
# If we cannot find the info needed in the config, cannot convert.
194199
if (num_bits is None or group_size is None or sym is None
195200
or desc_act is None):

0 commit comments

Comments
 (0)