Skip to content

Commit eef5ea0

Browse files
authored
[VPlan] Account for dead FOR splice simplification in cost model (#131486)
Fixes #131359 After #129645, a first-order recurrence will no longer have it's splice costed if the VPInstruction::FirstOrderRecurrenceSplice has no users and is dead. The legacy cost model didn't account for this, so this accounts for it in planContainsAdditionalSimplifications to avoid the "VPlan cost model and legacy cost model disagreed" assertion.
1 parent e2c43ba commit eef5ea0

File tree

2 files changed

+110
-0
lines changed

2 files changed

+110
-0
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+10
Original file line numberDiff line numberDiff line change
@@ -7467,6 +7467,16 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan,
74677467
}
74687468
continue;
74697469
}
7470+
// Unused FOR splices are removed by VPlan transforms, so the VPlan-based
7471+
// cost model won't cost it whilst the legacy will.
7472+
if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R)) {
7473+
if (none_of(FOR->users(), [](VPUser *U) {
7474+
auto *VPI = dyn_cast<VPInstruction>(U);
7475+
return VPI && VPI->getOpcode() ==
7476+
VPInstruction::FirstOrderRecurrenceSplice;
7477+
}))
7478+
return true;
7479+
}
74707480
// The VPlan-based cost model is more accurate for partial reduction and
74717481
// comparing against the legacy cost isn't desirable.
74727482
if (isa<VPPartialReductionRecipe>(&R))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
2+
; RUN: opt -p loop-vectorize -S %s | FileCheck %s
3+
4+
; If a FOR isn't used the VPInstruction::FirstOrderRecurrenceSplice will be dead
5+
; and won't be costed in the VPlan cost model. Make sure we account for this
6+
; simplifcation in comparison to the legacy cost model.
7+
8+
target triple = "x86_64"
9+
10+
define void @no_use() {
11+
; CHECK-LABEL: define void @no_use() {
12+
; CHECK-NEXT: [[ENTRY:.*]]:
13+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
14+
; CHECK: [[VECTOR_PH]]:
15+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
16+
; CHECK: [[VECTOR_BODY]]:
17+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
18+
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, %[[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], %[[VECTOR_BODY]] ]
19+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
20+
; CHECK-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
21+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
22+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
23+
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
24+
; CHECK-NEXT: br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
25+
; CHECK: [[MIDDLE_BLOCK]]:
26+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 3
27+
; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]]
28+
; CHECK: [[SCALAR_PH]]:
29+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
30+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 40, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
31+
; CHECK-NEXT: br label %[[LOOP:.*]]
32+
; CHECK: [[LOOP]]:
33+
; CHECK-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[E_0_I:%.*]], %[[LOOP]] ]
34+
; CHECK-NEXT: [[E_0_I]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC_I:%.*]], %[[LOOP]] ]
35+
; CHECK-NEXT: [[INC_I]] = add i32 [[E_0_I]], 1
36+
; CHECK-NEXT: [[EXITCOND_NOT_I:%.*]] = icmp eq i32 [[E_0_I]], 43
37+
; CHECK-NEXT: br i1 [[EXITCOND_NOT_I]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
38+
; CHECK: [[EXIT]]:
39+
; CHECK-NEXT: ret void
40+
;
41+
entry:
42+
br label %loop
43+
44+
loop:
45+
%for = phi i32 [ 0, %entry ], [ %iv, %loop ]
46+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
47+
%iv.next = add i32 %iv, 1
48+
%exitcond.not.i = icmp eq i32 %iv, 43
49+
br i1 %exitcond.not.i, label %exit, label %loop
50+
51+
exit:
52+
ret void
53+
}
54+
55+
define void @dead_use() {
56+
; CHECK-LABEL: define void @dead_use() {
57+
; CHECK-NEXT: [[ENTRY:.*]]:
58+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
59+
; CHECK: [[VECTOR_PH]]:
60+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
61+
; CHECK: [[VECTOR_BODY]]:
62+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
63+
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, %[[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], %[[VECTOR_BODY]] ]
64+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
65+
; CHECK-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
66+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
67+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
68+
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
69+
; CHECK-NEXT: br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
70+
; CHECK: [[MIDDLE_BLOCK]]:
71+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 3
72+
; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]]
73+
; CHECK: [[SCALAR_PH]]:
74+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
75+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 40, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
76+
; CHECK-NEXT: br label %[[LOOP:.*]]
77+
; CHECK: [[LOOP]]:
78+
; CHECK-NEXT: [[D_0_I:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[E_0_I:%.*]], %[[LOOP]] ]
79+
; CHECK-NEXT: [[E_0_I]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC_I:%.*]], %[[LOOP]] ]
80+
; CHECK-NEXT: [[DEAD:%.*]] = add i32 [[D_0_I]], 1
81+
; CHECK-NEXT: [[INC_I]] = add i32 [[E_0_I]], 1
82+
; CHECK-NEXT: [[EXITCOND_NOT_I:%.*]] = icmp eq i32 [[E_0_I]], 43
83+
; CHECK-NEXT: br i1 [[EXITCOND_NOT_I]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
84+
; CHECK: [[EXIT]]:
85+
; CHECK-NEXT: ret void
86+
;
87+
entry:
88+
br label %loop
89+
90+
loop:
91+
%for = phi i32 [ 0, %entry ], [ %iv, %loop ]
92+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
93+
%dead = add i32 %for, 1
94+
%iv.next = add i32 %iv, 1
95+
%exitcond.not.i = icmp eq i32 %iv, 43
96+
br i1 %exitcond.not.i, label %exit, label %loop
97+
98+
exit:
99+
ret void
100+
}

0 commit comments

Comments
 (0)