From ad9a353d15bc4e79b9d6ff70d5ff7bb22c92222f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 24 Apr 2024 12:41:20 +0200 Subject: [PATCH 1/2] DAG: Fix widening of fptrunc_round vectors --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 2 +- .../SelectionDAG/LegalizeVectorTypes.cpp | 8 ++- .../test/CodeGen/AMDGPU/llvm.fptrunc.round.ll | 53 +++++++++++++++---- 3 files changed, 51 insertions(+), 12 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 0483f7c74f91a..9c855e5585531 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -984,7 +984,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_FP_TO_XINT_SAT(SDNode *N); SDValue WidenVecRes_XRINT(SDNode *N); SDValue WidenVecRes_FCOPYSIGN(SDNode *N); - SDValue WidenVecRes_IS_FPCLASS(SDNode *N); + SDValue WidenVecRes_UnarySameEltsWithScalarArg(SDNode *N); SDValue WidenVecRes_ExpOp(SDNode *N); SDValue WidenVecRes_Unary(SDNode *N); SDValue WidenVecRes_InregOp(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 8776d89f4c5bd..99ba4e3c7aa47 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4242,7 +4242,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::IS_FPCLASS: - Res = WidenVecRes_IS_FPCLASS(N); + case ISD::FPTRUNC_ROUND: + Res = WidenVecRes_UnarySameEltsWithScalarArg(N); break; case ISD::FLDEXP: @@ -5004,7 +5005,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) { return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); } -SDValue DAGTypeLegalizer::WidenVecRes_IS_FPCLASS(SDNode *N) { +/// Result and first source operand are different scalar types, but must have +/// the same number of elements. There is an additional control argument which +/// should be passed thruogh unchanged. +SDValue DAGTypeLegalizer::WidenVecRes_UnarySameEltsWithScalarArg(SDNode *N) { SDValue FpValue = N->getOperand(0); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); if (getTypeAction(FpValue.getValueType()) != TargetLowering::TypeWidenVector) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll index 4526efc0d8fa4..b8c16d2ed3b2f 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll @@ -266,16 +266,51 @@ define amdgpu_gs void @s_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls(<2 x ret void } -; FIXME -; define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_upward(<3 x float> %a) { -; %res = call <3 x half> @llvm.fptrunc.round.v3f16.v3f32(<3 x float> %a, metadata !"round.upward") -; ret <3 x half> %res -; } +define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_upward(<3 x float> %a) { +; SDAG-LABEL: v_fptrunc_round_v3f32_to_v3f16_upward: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 +; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v2 +; SDAG-NEXT: ; return to shader part epilog +; +; GISEL-LABEL: v_fptrunc_round_v3f32_to_v3f16_upward: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 +; GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GISEL-NEXT: ; return to shader part epilog + %res = call <3 x half> @llvm.fptrunc.round.v3f16.v3f32(<3 x float> %a, metadata !"round.upward") + ret <3 x half> %res +} -; define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_downward(<3 x float> %a) { -; %res = call <3 x half> @llvm.fptrunc.round.v3f16.v3f32(<3 x float> %a, metadata !"round.downward") -; ret <3 x half> %res -; } +define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_downward(<3 x float> %a) { +; SDAG-LABEL: v_fptrunc_round_v3f32_to_v3f16_downward: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 +; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v2 +; SDAG-NEXT: ; return to shader part epilog +; +; GISEL-LABEL: v_fptrunc_round_v3f32_to_v3f16_downward: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 +; GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GISEL-NEXT: ; return to shader part epilog + %res = call <3 x half> @llvm.fptrunc.round.v3f16.v3f32(<3 x float> %a, metadata !"round.downward") + ret <3 x half> %res +} define amdgpu_gs <4 x half> @v_fptrunc_round_v4f32_to_v4f16_upward(<4 x float> %a) { ; SDAG-LABEL: v_fptrunc_round_v4f32_to_v4f16_upward: From ccc91e3aa47260092c87a9bc5df94ac72545c3a7 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 24 Apr 2024 14:34:54 +0200 Subject: [PATCH 2/2] Fix comment typo --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 99ba4e3c7aa47..985c9f16ab97c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -5007,7 +5007,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) { /// Result and first source operand are different scalar types, but must have /// the same number of elements. There is an additional control argument which -/// should be passed thruogh unchanged. +/// should be passed through unchanged. SDValue DAGTypeLegalizer::WidenVecRes_UnarySameEltsWithScalarArg(SDNode *N) { SDValue FpValue = N->getOperand(0); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));