-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[WIP][DAG] Introduce generic shl_add node [NFC] #88791
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12789,10 +12789,9 @@ static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, | |
SDLoc DL(N); | ||
SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0); | ||
SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0); | ||
SDValue NA0 = | ||
DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT)); | ||
SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS); | ||
return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT)); | ||
SDValue SHADD = | ||
DAG.getNode(ISD::SHL_ADD, DL, VT, NL, DAG.getConstant(Diff, DL, VT), NS); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we use TargetConstant if its required to be a constant? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed over in (#89263). |
||
return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT)); | ||
} | ||
|
||
// Combine a constant select operand into its use: | ||
|
@@ -13028,14 +13027,17 @@ static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) { | |
N0.getOperand(0)); | ||
} | ||
|
||
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, | ||
static SDValue performADDCombine(SDNode *N, | ||
TargetLowering::DAGCombinerInfo &DCI, | ||
const RISCVSubtarget &Subtarget) { | ||
SelectionDAG &DAG = DCI.DAG; | ||
if (SDValue V = combineAddOfBooleanXor(N, DAG)) | ||
return V; | ||
if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget)) | ||
return V; | ||
if (SDValue V = transformAddShlImm(N, DAG, Subtarget)) | ||
return V; | ||
if (!DCI.isBeforeLegalize()) | ||
if (SDValue V = transformAddShlImm(N, DAG, Subtarget)) | ||
return V; | ||
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) | ||
return V; | ||
if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) | ||
|
@@ -15894,7 +15896,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, | |
return V; | ||
if (SDValue V = combineToVWMACC(N, DAG, Subtarget)) | ||
return V; | ||
return performADDCombine(N, DAG, Subtarget); | ||
return performADDCombine(N, DCI, Subtarget); | ||
} | ||
case ISD::SUB: { | ||
if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33553,7 +33553,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { | |
NODE_NAME_CASE(BZHI) | ||
NODE_NAME_CASE(PDEP) | ||
NODE_NAME_CASE(PEXT) | ||
NODE_NAME_CASE(MUL_IMM) | ||
NODE_NAME_CASE(MOVMSK) | ||
NODE_NAME_CASE(PTEST) | ||
NODE_NAME_CASE(TESTP) | ||
|
@@ -36845,13 +36844,6 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, | |
Known.resetAll(); | ||
switch (Opc) { | ||
default: break; | ||
case X86ISD::MUL_IMM: { | ||
KnownBits Known2; | ||
Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); | ||
Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); | ||
Known = KnownBits::mul(Known, Known2); | ||
break; | ||
} | ||
case X86ISD::SETCC: | ||
Known.Zero.setBitsFrom(1); | ||
break; | ||
|
@@ -46905,12 +46897,18 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG, | |
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi); | ||
} | ||
|
||
static SDValue createMulImm(uint64_t MulAmt, SDValue N, SelectionDAG &DAG, | ||
EVT VT, const SDLoc &DL) { | ||
assert(MulAmt == 3 || MulAmt == 5 || MulAmt == 9); | ||
SDValue ShAmt = DAG.getConstant(Log2_64(MulAmt - 1), DL, MVT::i8); | ||
return DAG.getNode(ISD::SHL_ADD, DL, VT, N, ShAmt, N); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this going to cause issue with poison? We've now increased the use count of N. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That... is a good question. We probably need to freeze here since we're increasing the number of uses, I had not considered that. Let me add the freeze and see if that influences codegen in practice. If it does, we may need to consider both a SHL_ADD node and a MUL359 node. I'm hoping we don't, let me investigate and report back. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, the news here is not good. Adding in Freeze in the x86 backend code causes a whole bunch of regressions that were not obvious on first glance. Interestingly, incorporating the same logic into the RISC-V specific version of this patch (#89263) doesn't seem to expose the same kind of problems - mostly likely because the usage is much more isolated. #89290 fixes an analogous freeze issue in code already landed, again with no visible code diff. I think what I'd like to suggest here is that we go ahead and focus review on #89263. Once we land that, I can iterate in tree on the RISC-V specific parts, and then rebase this patch on a fully fleshed through implementation and focus it on the x86 merge. (I clearly need to track something down there.) (For the record, the issue @dtcxzyw flagged in the RISCV specific part of this patch doesn't exist in #89263 as I focused on a different subset there. That's probably confusing for reviewers in retrospect, sorry!) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I investigated these differences further. Net result is one fairly obvious missed optimization, one somewhat complicated but reasonable issue with COPY elimination, and one fundamental issue. I'm going to focus on only the last. We end up with a situation where an inserted freeze gets hoisted through a chain of computation. This is all correct and fine, but as a side effect of that hoisting, we strip nsw off an add. The net result is that we can't prove a narrow addressing sequence is equivalent to the wider form, and thus fail to be able to fold a constant base offset into the addressing mode. I'm a bit stuck on what to do about this case, and need to give this more thought. |
||
} | ||
|
||
static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG, | ||
EVT VT, const SDLoc &DL) { | ||
|
||
auto combineMulShlAddOrSub = [&](int Mult, int Shift, bool isAdd) { | ||
SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), | ||
DAG.getConstant(Mult, DL, VT)); | ||
SDValue Result = createMulImm(Mult, N->getOperand(0), DAG, VT, DL); | ||
Result = DAG.getNode(ISD::SHL, DL, VT, Result, | ||
DAG.getConstant(Shift, DL, MVT::i8)); | ||
Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result, | ||
|
@@ -46919,10 +46917,8 @@ static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG, | |
}; | ||
|
||
auto combineMulMulAddOrSub = [&](int Mul1, int Mul2, bool isAdd) { | ||
SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), | ||
DAG.getConstant(Mul1, DL, VT)); | ||
Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, Result, | ||
DAG.getConstant(Mul2, DL, VT)); | ||
SDValue Result = createMulImm(Mul1, N->getOperand(0), DAG, VT, DL); | ||
Result = createMulImm(Mul2, Result, DAG, VT, DL); | ||
Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result, | ||
N->getOperand(0)); | ||
return Result; | ||
|
@@ -46982,9 +46978,8 @@ static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG, | |
unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1))); | ||
SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), | ||
DAG.getConstant(ShiftAmt, DL, MVT::i8)); | ||
SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), | ||
DAG.getConstant(ScaleShift, DL, MVT::i8)); | ||
return DAG.getNode(ISD::ADD, DL, VT, Shift1, Shift2); | ||
return DAG.getNode(ISD::SHL_ADD, DL, VT, N->getOperand(0), | ||
DAG.getConstant(ScaleShift, DL, MVT::i8), Shift1); | ||
} | ||
} | ||
|
||
|
@@ -47204,8 +47199,7 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG, | |
SDValue NewMul = SDValue(); | ||
if (VT == MVT::i64 || VT == MVT::i32) { | ||
if (AbsMulAmt == 3 || AbsMulAmt == 5 || AbsMulAmt == 9) { | ||
NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), | ||
DAG.getConstant(AbsMulAmt, DL, VT)); | ||
NewMul = createMulImm(AbsMulAmt, N->getOperand(0), DAG, VT, DL); | ||
if (SignMulAmt < 0) | ||
NewMul = | ||
DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), NewMul); | ||
|
@@ -47243,15 +47237,13 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG, | |
NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), | ||
DAG.getConstant(Log2_64(MulAmt1), DL, MVT::i8)); | ||
else | ||
NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), | ||
DAG.getConstant(MulAmt1, DL, VT)); | ||
NewMul = createMulImm(MulAmt1, N->getOperand(0), DAG, VT, DL); | ||
|
||
if (isPowerOf2_64(MulAmt2)) | ||
NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul, | ||
DAG.getConstant(Log2_64(MulAmt2), DL, MVT::i8)); | ||
else | ||
NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul, | ||
DAG.getConstant(MulAmt2, DL, VT)); | ||
NewMul = NewMul = createMulImm(MulAmt2, NewMul, DAG, VT, DL); | ||
|
||
// Negate the result. | ||
if (SignMulAmt < 0) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
RISCV -> RISC-V