Skip to content

Commit c13436e

Browse files
authored
[SDAG][NVPTX] Add TLI hook to get preferred FP->INT opcode (#132470)
Extract the logic for choosing FP_TO_UINT vs FP_TO_SINT opcodes into a TLI hook. This hook can be overridden by targets that prefer not to use the default behavior of replacing FP_TO_UINT with FP_TO_SINT when both are custom. Implement an override for NVPTX to only change opcode when FP_TO_UINT is not legal and FP_TO_SINT is legal.
1 parent 8b34986 commit c13436e

File tree

5 files changed

+194
-20
lines changed

5 files changed

+194
-20
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

+28
Original file line numberDiff line numberDiff line change
@@ -3464,6 +3464,34 @@ class TargetLoweringBase {
34643464
return false;
34653465
}
34663466

3467+
// Get the preferred opcode for FP_TO_XINT nodes.
3468+
// By default, this checks if the provded operation is an illegal FP_TO_UINT
3469+
// and if so, checks if FP_TO_SINT is legal or custom for use as a
3470+
// replacement. If both UINT and SINT conversions are Custom, we choose SINT
3471+
// by default because that's the right thing on PPC.
3472+
virtual unsigned getPreferredFPToIntOpcode(unsigned Op, EVT FromVT,
3473+
EVT ToVT) const {
3474+
if (isOperationLegal(Op, ToVT))
3475+
return Op;
3476+
switch (Op) {
3477+
case ISD::FP_TO_UINT:
3478+
if (isOperationLegalOrCustom(ISD::FP_TO_SINT, ToVT))
3479+
return ISD::FP_TO_SINT;
3480+
break;
3481+
case ISD::STRICT_FP_TO_UINT:
3482+
if (isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, ToVT))
3483+
return ISD::STRICT_FP_TO_SINT;
3484+
break;
3485+
case ISD::VP_FP_TO_UINT:
3486+
if (isOperationLegalOrCustom(ISD::VP_FP_TO_SINT, ToVT))
3487+
return ISD::VP_FP_TO_SINT;
3488+
break;
3489+
default:
3490+
break;
3491+
}
3492+
return Op;
3493+
}
3494+
34673495
/// Create the IR node for the given complex deinterleaving operation.
34683496
/// If one cannot be created using all the given inputs, nullptr should be
34693497
/// returned.

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

+2-20
Original file line numberDiff line numberDiff line change
@@ -849,28 +849,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
849849

850850
SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
851851
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
852-
unsigned NewOpc = N->getOpcode();
852+
unsigned NewOpc =
853+
TLI.getPreferredFPToIntOpcode(N->getOpcode(), N->getValueType(0), NVT);
853854
SDLoc dl(N);
854855

855-
// If we're promoting a UINT to a larger size and the larger FP_TO_UINT is
856-
// not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT
857-
// and SINT conversions are Custom, there is no way to tell which is
858-
// preferable. We choose SINT because that's the right thing on PPC.)
859-
if (N->getOpcode() == ISD::FP_TO_UINT &&
860-
!TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) &&
861-
TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
862-
NewOpc = ISD::FP_TO_SINT;
863-
864-
if (N->getOpcode() == ISD::STRICT_FP_TO_UINT &&
865-
!TLI.isOperationLegal(ISD::STRICT_FP_TO_UINT, NVT) &&
866-
TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT))
867-
NewOpc = ISD::STRICT_FP_TO_SINT;
868-
869-
if (N->getOpcode() == ISD::VP_FP_TO_UINT &&
870-
!TLI.isOperationLegal(ISD::VP_FP_TO_UINT, NVT) &&
871-
TLI.isOperationLegalOrCustom(ISD::VP_FP_TO_SINT, NVT))
872-
NewOpc = ISD::VP_FP_TO_SINT;
873-
874856
SDValue Res;
875857
if (N->isStrictFPOpcode()) {
876858
Res = DAG.getNode(NewOpc, dl, {NVT, MVT::Other},

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

+27
Original file line numberDiff line numberDiff line change
@@ -6214,6 +6214,33 @@ Instruction *NVPTXTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
62146214
return nullptr;
62156215
}
62166216

6217+
// Rather than default to SINT when both UINT and SINT are custom, we only
6218+
// change the opcode when UINT is not legal and SINT is. UINT is preferred when
6219+
// both are custom since unsigned CVT instructions can lead to slightly better
6220+
// SASS code with fewer instructions.
6221+
unsigned NVPTXTargetLowering::getPreferredFPToIntOpcode(unsigned Op, EVT FromVT,
6222+
EVT ToVT) const {
6223+
if (isOperationLegal(Op, ToVT))
6224+
return Op;
6225+
switch (Op) {
6226+
case ISD::FP_TO_UINT:
6227+
if (isOperationLegal(ISD::FP_TO_SINT, ToVT))
6228+
return ISD::FP_TO_SINT;
6229+
break;
6230+
case ISD::STRICT_FP_TO_UINT:
6231+
if (isOperationLegal(ISD::STRICT_FP_TO_SINT, ToVT))
6232+
return ISD::STRICT_FP_TO_SINT;
6233+
break;
6234+
case ISD::VP_FP_TO_UINT:
6235+
if (isOperationLegal(ISD::VP_FP_TO_SINT, ToVT))
6236+
return ISD::VP_FP_TO_SINT;
6237+
break;
6238+
default:
6239+
break;
6240+
}
6241+
return Op;
6242+
}
6243+
62176244
// Pin NVPTXTargetObjectFile's vtables to this file.
62186245
NVPTXTargetObjectFile::~NVPTXTargetObjectFile() = default;
62196246

llvm/lib/Target/NVPTX/NVPTXISelLowering.h

+3
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,9 @@ class NVPTXTargetLowering : public TargetLowering {
282282
Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
283283
AtomicOrdering Ord) const override;
284284

285+
unsigned getPreferredFPToIntOpcode(unsigned Op, EVT FromVT,
286+
EVT ToVT) const override;
287+
285288
private:
286289
const NVPTXSubtarget &STI; // cache the subtarget here
287290
mutable unsigned GlobalUniqueCallSite;
+134
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 | FileCheck %s
3+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 | FileCheck %s
4+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 | FileCheck %s
5+
6+
define i8 @cvt_u8_f32(float %x) {
7+
; CHECK-LABEL: cvt_u8_f32(
8+
; CHECK: {
9+
; CHECK-NEXT: .reg .b16 %rs<2>;
10+
; CHECK-NEXT: .reg .b32 %r<2>;
11+
; CHECK-NEXT: .reg .f32 %f<2>;
12+
; CHECK-EMPTY:
13+
; CHECK-NEXT: // %bb.0:
14+
; CHECK-NEXT: ld.param.f32 %f1, [cvt_u8_f32_param_0];
15+
; CHECK-NEXT: cvt.rzi.u16.f32 %rs1, %f1;
16+
; CHECK-NEXT: cvt.u32.u16 %r1, %rs1;
17+
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
18+
; CHECK-NEXT: ret;
19+
%a = fptoui float %x to i8
20+
ret i8 %a
21+
}
22+
23+
define i8 @cvt_u8_f64(double %x) {
24+
; CHECK-LABEL: cvt_u8_f64(
25+
; CHECK: {
26+
; CHECK-NEXT: .reg .b16 %rs<2>;
27+
; CHECK-NEXT: .reg .b32 %r<2>;
28+
; CHECK-NEXT: .reg .f64 %fd<2>;
29+
; CHECK-EMPTY:
30+
; CHECK-NEXT: // %bb.0:
31+
; CHECK-NEXT: ld.param.f64 %fd1, [cvt_u8_f64_param_0];
32+
; CHECK-NEXT: cvt.rzi.u16.f64 %rs1, %fd1;
33+
; CHECK-NEXT: cvt.u32.u16 %r1, %rs1;
34+
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
35+
; CHECK-NEXT: ret;
36+
%a = fptoui double %x to i8
37+
ret i8 %a
38+
}
39+
40+
define float @cvt_f32_i8(i8 %x) {
41+
; CHECK-LABEL: cvt_f32_i8(
42+
; CHECK: {
43+
; CHECK-NEXT: .reg .b16 %rs<2>;
44+
; CHECK-NEXT: .reg .f32 %f<2>;
45+
; CHECK-EMPTY:
46+
; CHECK-NEXT: // %bb.0:
47+
; CHECK-NEXT: ld.param.u8 %rs1, [cvt_f32_i8_param_0];
48+
; CHECK-NEXT: cvt.rn.f32.u16 %f1, %rs1;
49+
; CHECK-NEXT: st.param.f32 [func_retval0], %f1;
50+
; CHECK-NEXT: ret;
51+
%a = uitofp i8 %x to float
52+
ret float %a
53+
}
54+
55+
define double @cvt_f64_i8(i8 %x) {
56+
; CHECK-LABEL: cvt_f64_i8(
57+
; CHECK: {
58+
; CHECK-NEXT: .reg .b16 %rs<2>;
59+
; CHECK-NEXT: .reg .f64 %fd<2>;
60+
; CHECK-EMPTY:
61+
; CHECK-NEXT: // %bb.0:
62+
; CHECK-NEXT: ld.param.u8 %rs1, [cvt_f64_i8_param_0];
63+
; CHECK-NEXT: cvt.rn.f64.u16 %fd1, %rs1;
64+
; CHECK-NEXT: st.param.f64 [func_retval0], %fd1;
65+
; CHECK-NEXT: ret;
66+
%a = uitofp i8 %x to double
67+
ret double %a
68+
}
69+
70+
define float @cvt_f32_s8(i8 %x) {
71+
; CHECK-LABEL: cvt_f32_s8(
72+
; CHECK: {
73+
; CHECK-NEXT: .reg .b16 %rs<2>;
74+
; CHECK-NEXT: .reg .f32 %f<2>;
75+
; CHECK-EMPTY:
76+
; CHECK-NEXT: // %bb.0:
77+
; CHECK-NEXT: ld.param.s8 %rs1, [cvt_f32_s8_param_0];
78+
; CHECK-NEXT: cvt.rn.f32.s16 %f1, %rs1;
79+
; CHECK-NEXT: st.param.f32 [func_retval0], %f1;
80+
; CHECK-NEXT: ret;
81+
%a = sitofp i8 %x to float
82+
ret float %a
83+
}
84+
85+
define double @cvt_f64_s8(i8 %x) {
86+
; CHECK-LABEL: cvt_f64_s8(
87+
; CHECK: {
88+
; CHECK-NEXT: .reg .b16 %rs<2>;
89+
; CHECK-NEXT: .reg .f64 %fd<2>;
90+
; CHECK-EMPTY:
91+
; CHECK-NEXT: // %bb.0:
92+
; CHECK-NEXT: ld.param.s8 %rs1, [cvt_f64_s8_param_0];
93+
; CHECK-NEXT: cvt.rn.f64.s16 %fd1, %rs1;
94+
; CHECK-NEXT: st.param.f64 [func_retval0], %fd1;
95+
; CHECK-NEXT: ret;
96+
%a = sitofp i8 %x to double
97+
ret double %a
98+
}
99+
100+
define i8 @cvt_s8_f32(float %x) {
101+
; CHECK-LABEL: cvt_s8_f32(
102+
; CHECK: {
103+
; CHECK-NEXT: .reg .b16 %rs<2>;
104+
; CHECK-NEXT: .reg .b32 %r<3>;
105+
; CHECK-NEXT: .reg .f32 %f<2>;
106+
; CHECK-EMPTY:
107+
; CHECK-NEXT: // %bb.0:
108+
; CHECK-NEXT: ld.param.f32 %f1, [cvt_s8_f32_param_0];
109+
; CHECK-NEXT: cvt.rzi.s16.f32 %rs1, %f1;
110+
; CHECK-NEXT: cvt.u32.u16 %r1, %rs1;
111+
; CHECK-NEXT: and.b32 %r2, %r1, 255;
112+
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
113+
; CHECK-NEXT: ret;
114+
%a = fptosi float %x to i8
115+
ret i8 %a
116+
}
117+
118+
define i8 @cvt_s8_f64(double %x) {
119+
; CHECK-LABEL: cvt_s8_f64(
120+
; CHECK: {
121+
; CHECK-NEXT: .reg .b16 %rs<2>;
122+
; CHECK-NEXT: .reg .b32 %r<3>;
123+
; CHECK-NEXT: .reg .f64 %fd<2>;
124+
; CHECK-EMPTY:
125+
; CHECK-NEXT: // %bb.0:
126+
; CHECK-NEXT: ld.param.f64 %fd1, [cvt_s8_f64_param_0];
127+
; CHECK-NEXT: cvt.rzi.s16.f64 %rs1, %fd1;
128+
; CHECK-NEXT: cvt.u32.u16 %r1, %rs1;
129+
; CHECK-NEXT: and.b32 %r2, %r1, 255;
130+
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
131+
; CHECK-NEXT: ret;
132+
%a = fptosi double %x to i8
133+
ret i8 %a
134+
}

0 commit comments

Comments
 (0)