Skip to content

[InstCombine] Replace an integer comparison of a phi node with multiple ucmp/scmp operands and a constant with phi of individual comparisons of original intrinsic's arguments #107769

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Sep 13, 2024
88 changes: 50 additions & 38 deletions llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1809,8 +1809,8 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
// Check to see whether the instruction can be folded into each phi operand.
// If there is one operand that does not fold, remember the BB it is in.
SmallVector<Value *> NewPhiValues;
BasicBlock *NonSimplifiedBB = nullptr;
Value *NonSimplifiedInVal = nullptr;
SmallVector<unsigned int> OpsToMoveUseToIncomingBB;
bool SeenNonSimplifiedInVal = false;
for (unsigned i = 0; i != NumPHIValues; ++i) {
Value *InVal = PN->getIncomingValue(i);
BasicBlock *InBB = PN->getIncomingBlock(i);
Expand All @@ -1820,35 +1820,64 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
continue;
}

if (NonSimplifiedBB) return nullptr; // More than one non-simplified value.
// If the only use of phi is comparing it with a constant then we can
// put this comparison in the incoming BB directly after a ucmp/scmp call
// because we know that it will simplify to a single icmp.
// NOTE: the single-use check here is not only to ensure that the
// optimization is profitable, but also to avoid creating a potentially
// invalid phi node when we have a multi-edge in the CFG.
const APInt *Ignored;
if (isa<CmpIntrinsic>(InVal) && InVal->hasOneUse() &&
match(&I, m_ICmp(m_Specific(PN), m_APInt(Ignored)))) {
OpsToMoveUseToIncomingBB.push_back(i);
NewPhiValues.push_back(nullptr);
continue;
}

if (SeenNonSimplifiedInVal)
return nullptr; // More than one non-simplified value.
SeenNonSimplifiedInVal = true;

// If there is exactly one non-simplified value, we can insert a copy of the
// operation in that block. However, if this is a critical edge, we would
// be inserting the computation on some other paths (e.g. inside a loop).
// Only do this if the pred block is unconditionally branching into the phi
// block. Also, make sure that the pred block is not dead code.
BranchInst *BI = dyn_cast<BranchInst>(InBB->getTerminator());
if (!BI || !BI->isUnconditional() || !DT.isReachableFromEntry(InBB))
return nullptr;

NonSimplifiedBB = InBB;
NonSimplifiedInVal = InVal;
NewPhiValues.push_back(nullptr);
OpsToMoveUseToIncomingBB.push_back(i);

// If the InVal is an invoke at the end of the pred block, then we can't
// insert a computation after it without breaking the edge.
if (isa<InvokeInst>(InVal))
if (cast<Instruction>(InVal)->getParent() == NonSimplifiedBB)
if (cast<Instruction>(InVal)->getParent() == InBB)
return nullptr;

// Do not push the operation across a loop backedge. This could result in
// an infinite combine loop, and is generally non-profitable (especially
// if the operation was originally outside the loop).
if (isBackEdge(NonSimplifiedBB, PN->getParent()))
if (isBackEdge(InBB, PN->getParent()))
return nullptr;
}

// If there is exactly one non-simplified value, we can insert a copy of the
// operation in that block. However, if this is a critical edge, we would be
// inserting the computation on some other paths (e.g. inside a loop). Only
// do this if the pred block is unconditionally branching into the phi block.
// Also, make sure that the pred block is not dead code.
if (NonSimplifiedBB != nullptr) {
BranchInst *BI = dyn_cast<BranchInst>(NonSimplifiedBB->getTerminator());
if (!BI || !BI->isUnconditional() ||
!DT.isReachableFromEntry(NonSimplifiedBB))
return nullptr;
// Clone the instruction that uses the phi node and move it into the incoming
// BB because we know that the next iteration of InstCombine will simplify it.
for (auto OpIndex : OpsToMoveUseToIncomingBB) {
Value *Op = PN->getIncomingValue(OpIndex);
BasicBlock *OpBB = PN->getIncomingBlock(OpIndex);

Instruction *Clone = I.clone();
for (Use &U : Clone->operands()) {
if (U == PN)
U = Op;
else
U = U->DoPHITranslation(PN->getParent(), OpBB);
}
Clone = InsertNewInstBefore(Clone, OpBB->getTerminator()->getIterator());
NewPhiValues[OpIndex] = Clone;
}

// Okay, we can do the transformation: create the new PHI node.
Expand All @@ -1857,30 +1886,13 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
NewPN->takeName(PN);
NewPN->setDebugLoc(PN->getDebugLoc());

// If we are going to have to insert a new computation, do so right before the
// predecessor's terminator.
Instruction *Clone = nullptr;
if (NonSimplifiedBB) {
Clone = I.clone();
for (Use &U : Clone->operands()) {
if (U == PN)
U = NonSimplifiedInVal;
else
U = U->DoPHITranslation(PN->getParent(), NonSimplifiedBB);
}
InsertNewInstBefore(Clone, NonSimplifiedBB->getTerminator()->getIterator());
}

for (unsigned i = 0; i != NumPHIValues; ++i) {
if (NewPhiValues[i])
NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i));
else
NewPN->addIncoming(Clone, PN->getIncomingBlock(i));
}
for (unsigned i = 0; i != NumPHIValues; ++i)
NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i));

for (User *U : make_early_inc_range(PN->users())) {
Instruction *User = cast<Instruction>(U);
if (User == &I) continue;
if (User == &I)
continue;
replaceInstUsesWith(*User, NewPN);
eraseInstFromFunction(*User);
}
Expand Down
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please also add negative tests where one scmp is not one-use or the icmp operand is not constant?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The case you're proposing would still actually enable this optimization since we can still move the icmp into every incoming BB, and only one of the scmps will not be removed due to it being used more than once (in this case the BB with scmp becomes what was previously NonSimplifiedBB). I think it's better to add a negative test where both scmps are not one-use, since in that case the optimization wouldn't be trivially profitable.

Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -passes=instcombine -S | FileCheck %s

declare void @use(i8 %value);

; Since we know that any comparison of ucmp/scmp with a constant will result in
; a comparison of ucmp/scmp's operands, we can propagate such a comparison
; through the phi node and let the next iteration of instcombine simplify it.
define i1 @icmp_of_phi_of_scmp_with_constant(i1 %c, i16 %x, i16 %y)
; CHECK-LABEL: define i1 @icmp_of_phi_of_scmp_with_constant(
; CHECK-SAME: i1 [[C:%.*]], i16 [[X:%.*]], i16 [[Y:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br i1 [[C]], label %[[TRUE:.*]], label %[[FALSE:.*]]
; CHECK: [[TRUE]]:
; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i16 [[X]], [[Y]]
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[FALSE]]:
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i16 [[Y]], [[X]]
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[R:%.*]] = phi i1 [ [[TMP0]], %[[TRUE]] ], [ [[TMP1]], %[[FALSE]] ]
; CHECK-NEXT: ret i1 [[R]]
;
{
entry:
br i1 %c, label %true, label %false
true:
%cmp1 = call i8 @llvm.scmp(i16 %x, i16 %y)
br label %exit
false:
%cmp2 = call i8 @llvm.scmp(i16 %y, i16 %x)
br label %exit
exit:
%phi = phi i8 [%cmp1, %true], [%cmp2, %false]
%r = icmp slt i8 %phi, 0
ret i1 %r
}

; When one of the incoming values is ucmp/scmp and the other is not we can still perform the transformation
define i1 @icmp_of_phi_of_one_scmp_with_constant(i1 %c, i16 %x, i16 %y, i8 %false_val)
; CHECK-LABEL: define i1 @icmp_of_phi_of_one_scmp_with_constant(
; CHECK-SAME: i1 [[C:%.*]], i16 [[X:%.*]], i16 [[Y:%.*]], i8 [[FALSE_VAL:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br i1 [[C]], label %[[TRUE:.*]], label %[[FALSE:.*]]
; CHECK: [[TRUE]]:
; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i16 [[X]], [[Y]]
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[FALSE]]:
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i8 [[FALSE_VAL]], 0
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[PHI:%.*]] = phi i1 [ [[TMP0]], %[[TRUE]] ], [ [[TMP1]], %[[FALSE]] ]
; CHECK-NEXT: ret i1 [[PHI]]
;
{
entry:
br i1 %c, label %true, label %false
true:
%cmp1 = call i8 @llvm.scmp(i16 %x, i16 %y)
br label %exit
false:
br label %exit
exit:
%phi = phi i8 [%cmp1, %true], [%false_val, %false]
%r = icmp slt i8 %phi, 0
ret i1 %r
}

; Negative test: the RHS of comparison that uses the phi node is not constant
define i1 @icmp_of_phi_of_scmp_with_non_constant(i1 %c, i16 %x, i16 %y, i8 %cmp)
; CHECK-LABEL: define i1 @icmp_of_phi_of_scmp_with_non_constant(
; CHECK-SAME: i1 [[C:%.*]], i16 [[X:%.*]], i16 [[Y:%.*]], i8 [[CMP:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br i1 [[C]], label %[[TRUE:.*]], label %[[FALSE:.*]]
; CHECK: [[TRUE]]:
; CHECK-NEXT: [[CMP1:%.*]] = call i8 @llvm.scmp.i8.i16(i16 [[X]], i16 [[Y]])
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[FALSE]]:
; CHECK-NEXT: [[CMP2:%.*]] = call i8 @llvm.scmp.i8.i16(i16 [[Y]], i16 [[X]])
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[PHI:%.*]] = phi i8 [ [[CMP1]], %[[TRUE]] ], [ [[CMP2]], %[[FALSE]] ]
; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[PHI]], [[CMP]]
; CHECK-NEXT: ret i1 [[R]]
;
{
entry:
br i1 %c, label %true, label %false
true:
%cmp1 = call i8 @llvm.scmp(i16 %x, i16 %y)
br label %exit
false:
%cmp2 = call i8 @llvm.scmp(i16 %y, i16 %x)
br label %exit
exit:
%phi = phi i8 [%cmp1, %true], [%cmp2, %false]
%r = icmp slt i8 %phi, %cmp
ret i1 %r
}

; Negative test: more than one incoming value of the phi node is not one-use
define i1 @icmp_of_phi_of_scmp_with_constant_not_one_use(i1 %c, i16 %x, i16 %y)
; CHECK-LABEL: define i1 @icmp_of_phi_of_scmp_with_constant_not_one_use(
; CHECK-SAME: i1 [[C:%.*]], i16 [[X:%.*]], i16 [[Y:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br i1 [[C]], label %[[TRUE:.*]], label %[[FALSE:.*]]
; CHECK: [[TRUE]]:
; CHECK-NEXT: [[CMP1:%.*]] = call i8 @llvm.scmp.i8.i16(i16 [[X]], i16 [[Y]])
; CHECK-NEXT: call void @use(i8 [[CMP1]])
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[FALSE]]:
; CHECK-NEXT: [[CMP2:%.*]] = call i8 @llvm.scmp.i8.i16(i16 [[Y]], i16 [[X]])
; CHECK-NEXT: call void @use(i8 [[CMP2]])
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[PHI:%.*]] = phi i8 [ [[CMP1]], %[[TRUE]] ], [ [[CMP2]], %[[FALSE]] ]
; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[PHI]], 0
; CHECK-NEXT: ret i1 [[R]]
;
{
entry:
br i1 %c, label %true, label %false
true:
%cmp1 = call i8 @llvm.scmp(i16 %x, i16 %y)
call void @use(i8 %cmp1)
br label %exit
false:
%cmp2 = call i8 @llvm.scmp(i16 %y, i16 %x)
call void @use(i8 %cmp2)
br label %exit
exit:
%phi = phi i8 [%cmp1, %true], [%cmp2, %false]
%r = icmp slt i8 %phi, 0
ret i1 %r
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any tests where only 1 of the incoming values is an {s,u}cmp intrin?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added one

}
Loading