@@ -336,9 +336,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
336
336
setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
337
337
setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
338
338
}
339
+ setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom);
339
340
if (Subtarget.is64Bit()) {
340
341
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
341
342
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
343
+ setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
342
344
}
343
345
}
344
346
if (Subtarget.hasAVX10_2()) {
@@ -358,6 +360,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
358
360
if (!Subtarget.hasSSE2()) {
359
361
setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
360
362
setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
363
+ setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom);
364
+ setOperationAction(ISD::FCANONICALIZE, MVT::f80, Custom);
365
+ setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
361
366
if (Subtarget.is64Bit()) {
362
367
setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
363
368
// Without SSE, i64->f64 goes through memory.
@@ -721,6 +726,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
721
726
setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Promote);
722
727
setOperationAction(ISD::STRICT_FTRUNC, MVT::f16, Promote);
723
728
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
729
+ setOperationAction(ISD::FCANONICALIZE, MVT::f16, Custom);
724
730
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
725
731
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
726
732
@@ -937,6 +943,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
937
943
if (isTypeLegal(MVT::f80)) {
938
944
setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
939
945
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
946
+ setOperationAction(ISD::FCANONICALIZE, MVT::f80, Custom);
940
947
}
941
948
942
949
setOperationAction(ISD::SETCC, MVT::f128, Custom);
@@ -1070,9 +1077,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
1070
1077
setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
1071
1078
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
1072
1079
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
1080
+ setOperationAction(ISD::FCANONICALIZE, MVT::v4f32, Custom);
1073
1081
1074
1082
setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
1075
1083
setOperationAction(ISD::STORE, MVT::v2f32, Custom);
1084
+ setOperationAction(ISD::FCANONICALIZE, MVT::v2f32, Custom);
1076
1085
1077
1086
setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
1078
1087
setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
@@ -1133,6 +1142,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
1133
1142
setOperationAction(ISD::UMULO, MVT::v2i32, Custom);
1134
1143
1135
1144
setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
1145
+ setOperationAction(ISD::FCANONICALIZE, MVT::v2f64, Custom);
1136
1146
setOperationAction(ISD::FABS, MVT::v2f64, Custom);
1137
1147
setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
1138
1148
@@ -1465,6 +1475,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
1465
1475
1466
1476
setOperationAction(ISD::FMAXIMUM, VT, Custom);
1467
1477
setOperationAction(ISD::FMINIMUM, VT, Custom);
1478
+ setOperationAction(ISD::FCANONICALIZE, VT, Custom);
1468
1479
}
1469
1480
1470
1481
setOperationAction(ISD::LRINT, MVT::v8f32, Custom);
@@ -1730,6 +1741,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
1730
1741
setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
1731
1742
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
1732
1743
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
1744
+ setOperationAction(ISD::FCANONICALIZE, MVT::v8f16, Custom);
1745
+ setOperationAction(ISD::FCANONICALIZE, MVT::v16f16, Custom);
1746
+ setOperationAction(ISD::FCANONICALIZE, MVT::v32f16, Custom);
1733
1747
1734
1748
// There is no byte sized k-register load or store without AVX512DQ.
1735
1749
if (!Subtarget.hasDQI()) {
@@ -1809,6 +1823,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
1809
1823
setOperationAction(ISD::FMA, VT, Legal);
1810
1824
setOperationAction(ISD::STRICT_FMA, VT, Legal);
1811
1825
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1826
+ setOperationAction(ISD::FCANONICALIZE, VT, Custom);
1812
1827
}
1813
1828
setOperationAction(ISD::LRINT, MVT::v16f32,
1814
1829
Subtarget.hasDQI() ? Legal : Custom);
@@ -32694,6 +32709,24 @@ static SDValue LowerPREFETCH(SDValue Op, const X86Subtarget &Subtarget,
32694
32709
return Op;
32695
32710
}
32696
32711
32712
+ static SDValue LowerFCanonicalize(SDValue Op, SelectionDAG &DAG) {
32713
+ SDNode *N = Op.getNode();
32714
+ SDValue Operand = N->getOperand(0);
32715
+ EVT VT = Operand.getValueType();
32716
+ SDLoc dl(N);
32717
+
32718
+ SDValue One = DAG.getConstantFP(1.0, dl, VT);
32719
+
32720
+ // TODO: Fix Crash for bf16 when generating strict_fmul as it
32721
+ // leads to a error : SoftPromoteHalfResult #0: t11: bf16,ch = strict_fmul t0,
32722
+ // ConstantFP:bf16<APFloat(16256)>, t5 LLVM ERROR: Do not know how to soft
32723
+ // promote this operator's result!
32724
+ SDValue Chain = DAG.getEntryNode();
32725
+ SDValue StrictFmul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other},
32726
+ {Chain, Operand, One});
32727
+ return StrictFmul;
32728
+ }
32729
+
32697
32730
static StringRef getInstrStrFromOpNo(const SmallVectorImpl<StringRef> &AsmStrs,
32698
32731
unsigned OpNo) {
32699
32732
const APInt Operand(32, OpNo);
@@ -32833,6 +32866,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
32833
32866
case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
32834
32867
case ISD::FSHL:
32835
32868
case ISD::FSHR: return LowerFunnelShift(Op, Subtarget, DAG);
32869
+ case ISD::FCANONICALIZE: return LowerFCanonicalize(Op, DAG);
32836
32870
case ISD::STRICT_SINT_TO_FP:
32837
32871
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
32838
32872
case ISD::STRICT_UINT_TO_FP:
0 commit comments