Skip to content

Commit 26f272e

Browse files
[X86][SelectionDAG] - Add support for llvm.canonicalize intrinsic (#106370)
Enable support for fcanonicalize intrinsic lowering.
1 parent 677177b commit 26f272e

File tree

4 files changed

+1100
-0
lines changed

4 files changed

+1100
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

+15
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,7 @@ namespace {
508508
SDValue visitFSQRT(SDNode *N);
509509
SDValue visitFCOPYSIGN(SDNode *N);
510510
SDValue visitFPOW(SDNode *N);
511+
SDValue visitFCANONICALIZE(SDNode *N);
511512
SDValue visitSINT_TO_FP(SDNode *N);
512513
SDValue visitUINT_TO_FP(SDNode *N);
513514
SDValue visitFP_TO_SINT(SDNode *N);
@@ -1980,6 +1981,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
19801981
case ISD::FREEZE: return visitFREEZE(N);
19811982
case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
19821983
case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
1984+
case ISD::FCANONICALIZE: return visitFCANONICALIZE(N);
19831985
case ISD::VECREDUCE_FADD:
19841986
case ISD::VECREDUCE_FMUL:
19851987
case ISD::VECREDUCE_ADD:
@@ -2090,6 +2092,19 @@ static SDValue getInputChainForNode(SDNode *N) {
20902092
return SDValue();
20912093
}
20922094

2095+
SDValue DAGCombiner::visitFCANONICALIZE(SDNode *N) {
2096+
SDValue Operand = N->getOperand(0);
2097+
EVT VT = Operand.getValueType();
2098+
SDLoc dl(N);
2099+
2100+
// Canonicalize undef to quiet NaN.
2101+
if (Operand.isUndef()) {
2102+
APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics());
2103+
return DAG.getConstantFP(CanonicalQNaN, dl, VT);
2104+
}
2105+
return SDValue();
2106+
}
2107+
20932108
SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
20942109
// If N has two operands, where one has an input chain equal to the other,
20952110
// the 'other' chain is redundant.

llvm/lib/Target/X86/X86ISelLowering.cpp

+34
Original file line numberDiff line numberDiff line change
@@ -336,9 +336,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
336336
setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
337337
setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
338338
}
339+
setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom);
339340
if (Subtarget.is64Bit()) {
340341
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
341342
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
343+
setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
342344
}
343345
}
344346
if (Subtarget.hasAVX10_2()) {
@@ -358,6 +360,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
358360
if (!Subtarget.hasSSE2()) {
359361
setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
360362
setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
363+
setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom);
364+
setOperationAction(ISD::FCANONICALIZE, MVT::f80, Custom);
365+
setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
361366
if (Subtarget.is64Bit()) {
362367
setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
363368
// Without SSE, i64->f64 goes through memory.
@@ -721,6 +726,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
721726
setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Promote);
722727
setOperationAction(ISD::STRICT_FTRUNC, MVT::f16, Promote);
723728
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
729+
setOperationAction(ISD::FCANONICALIZE, MVT::f16, Custom);
724730
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
725731
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
726732

@@ -937,6 +943,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
937943
if (isTypeLegal(MVT::f80)) {
938944
setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
939945
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
946+
setOperationAction(ISD::FCANONICALIZE, MVT::f80, Custom);
940947
}
941948

942949
setOperationAction(ISD::SETCC, MVT::f128, Custom);
@@ -1070,9 +1077,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
10701077
setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
10711078
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
10721079
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
1080+
setOperationAction(ISD::FCANONICALIZE, MVT::v4f32, Custom);
10731081

10741082
setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
10751083
setOperationAction(ISD::STORE, MVT::v2f32, Custom);
1084+
setOperationAction(ISD::FCANONICALIZE, MVT::v2f32, Custom);
10761085

10771086
setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
10781087
setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
@@ -1133,6 +1142,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
11331142
setOperationAction(ISD::UMULO, MVT::v2i32, Custom);
11341143

11351144
setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
1145+
setOperationAction(ISD::FCANONICALIZE, MVT::v2f64, Custom);
11361146
setOperationAction(ISD::FABS, MVT::v2f64, Custom);
11371147
setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
11381148

@@ -1465,6 +1475,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
14651475

14661476
setOperationAction(ISD::FMAXIMUM, VT, Custom);
14671477
setOperationAction(ISD::FMINIMUM, VT, Custom);
1478+
setOperationAction(ISD::FCANONICALIZE, VT, Custom);
14681479
}
14691480

14701481
setOperationAction(ISD::LRINT, MVT::v8f32, Custom);
@@ -1730,6 +1741,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
17301741
setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
17311742
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
17321743
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
1744+
setOperationAction(ISD::FCANONICALIZE, MVT::v8f16, Custom);
1745+
setOperationAction(ISD::FCANONICALIZE, MVT::v16f16, Custom);
1746+
setOperationAction(ISD::FCANONICALIZE, MVT::v32f16, Custom);
17331747

17341748
// There is no byte sized k-register load or store without AVX512DQ.
17351749
if (!Subtarget.hasDQI()) {
@@ -1809,6 +1823,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
18091823
setOperationAction(ISD::FMA, VT, Legal);
18101824
setOperationAction(ISD::STRICT_FMA, VT, Legal);
18111825
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1826+
setOperationAction(ISD::FCANONICALIZE, VT, Custom);
18121827
}
18131828
setOperationAction(ISD::LRINT, MVT::v16f32,
18141829
Subtarget.hasDQI() ? Legal : Custom);
@@ -32694,6 +32709,24 @@ static SDValue LowerPREFETCH(SDValue Op, const X86Subtarget &Subtarget,
3269432709
return Op;
3269532710
}
3269632711

32712+
static SDValue LowerFCanonicalize(SDValue Op, SelectionDAG &DAG) {
32713+
SDNode *N = Op.getNode();
32714+
SDValue Operand = N->getOperand(0);
32715+
EVT VT = Operand.getValueType();
32716+
SDLoc dl(N);
32717+
32718+
SDValue One = DAG.getConstantFP(1.0, dl, VT);
32719+
32720+
// TODO: Fix Crash for bf16 when generating strict_fmul as it
32721+
// leads to a error : SoftPromoteHalfResult #0: t11: bf16,ch = strict_fmul t0,
32722+
// ConstantFP:bf16<APFloat(16256)>, t5 LLVM ERROR: Do not know how to soft
32723+
// promote this operator's result!
32724+
SDValue Chain = DAG.getEntryNode();
32725+
SDValue StrictFmul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other},
32726+
{Chain, Operand, One});
32727+
return StrictFmul;
32728+
}
32729+
3269732730
static StringRef getInstrStrFromOpNo(const SmallVectorImpl<StringRef> &AsmStrs,
3269832731
unsigned OpNo) {
3269932732
const APInt Operand(32, OpNo);
@@ -32833,6 +32866,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
3283332866
case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
3283432867
case ISD::FSHL:
3283532868
case ISD::FSHR: return LowerFunnelShift(Op, Subtarget, DAG);
32869+
case ISD::FCANONICALIZE: return LowerFCanonicalize(Op, DAG);
3283632870
case ISD::STRICT_SINT_TO_FP:
3283732871
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
3283832872
case ISD::STRICT_UINT_TO_FP:

0 commit comments

Comments
 (0)