Skip to content

Commit d6f4d52

Browse files
committed
[CGP][AArch64] Rebase the common base offset for better ISel
When all the large const offsets masked with the same value from bit-12 to bit-23. Fold add x8, x0, #2031, lsl #12 add x8, x8, #960 ldr x9, [x8, x8] ldr x8, [x8, #2056] into add x8, x0, #2031, lsl #12 ldr x9, [x8, #960] ldr x8, [x8, #3016]
1 parent 6a8a562 commit d6f4d52

File tree

7 files changed

+105
-65
lines changed

7 files changed

+105
-65
lines changed

llvm/include/llvm/CodeGen/BasicTTIImpl.h

+4
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
342342
return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
343343
}
344344

345+
int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) {
346+
return getTLI()->getPreferredLargeGEPBaseOffset(MinOffset, MaxOffset);
347+
}
348+
345349
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
346350
Type *ScalarValTy) const {
347351
auto &&IsSupportedByTarget = [this, ScalarMemTy, ScalarValTy](unsigned VF) {

llvm/include/llvm/CodeGen/TargetLowering.h

+7-1
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@
3030
#include "llvm/CodeGen/DAGCombine.h"
3131
#include "llvm/CodeGen/ISDOpcodes.h"
3232
#include "llvm/CodeGen/LowLevelTypeUtils.h"
33-
#include "llvm/CodeGen/MachineValueType.h"
3433
#include "llvm/CodeGen/MachineRegisterInfo.h"
34+
#include "llvm/CodeGen/MachineValueType.h"
3535
#include "llvm/CodeGen/RuntimeLibcalls.h"
3636
#include "llvm/CodeGen/SelectionDAG.h"
3737
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -2721,6 +2721,12 @@ class TargetLoweringBase {
27212721
Type *Ty, unsigned AddrSpace,
27222722
Instruction *I = nullptr) const;
27232723

2724+
/// Return the prefered common base offset.
2725+
virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset,
2726+
int64_t MaxOffset) const {
2727+
return 0;
2728+
}
2729+
27242730
/// Return true if the specified immediate is legal icmp immediate, that is
27252731
/// the target has icmp instructions which can compare a register against the
27262732
/// immediate without having to materialize the immediate into a register.

llvm/lib/CodeGen/CodeGenPrepare.cpp

+50-29
Original file line numberDiff line numberDiff line change
@@ -6121,6 +6121,55 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
61216121
int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
61226122
Value *NewBaseGEP = nullptr;
61236123

6124+
auto createNewBase = [&](int64_t BaseOffset, Value *OldBase,
6125+
GetElementPtrInst *GEP) {
6126+
LLVMContext &Ctx = GEP->getContext();
6127+
Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6128+
Type *I8PtrTy =
6129+
PointerType::get(Ctx, GEP->getType()->getPointerAddressSpace());
6130+
Type *I8Ty = Type::getInt8Ty(Ctx);
6131+
6132+
BasicBlock::iterator NewBaseInsertPt;
6133+
BasicBlock *NewBaseInsertBB;
6134+
if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
6135+
// If the base of the struct is an instruction, the new base will be
6136+
// inserted close to it.
6137+
NewBaseInsertBB = BaseI->getParent();
6138+
if (isa<PHINode>(BaseI))
6139+
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6140+
else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
6141+
NewBaseInsertBB =
6142+
SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
6143+
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6144+
} else
6145+
NewBaseInsertPt = std::next(BaseI->getIterator());
6146+
} else {
6147+
// If the current base is an argument or global value, the new base
6148+
// will be inserted to the entry block.
6149+
NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
6150+
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6151+
}
6152+
IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
6153+
// Create a new base.
6154+
Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset);
6155+
NewBaseGEP = OldBase;
6156+
if (NewBaseGEP->getType() != I8PtrTy)
6157+
NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
6158+
NewBaseGEP =
6159+
NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep");
6160+
NewGEPBases.insert(NewBaseGEP);
6161+
return;
6162+
};
6163+
6164+
// Check whether all the offsets can be encoded with prefered common base.
6165+
if (int64_t PreferBase = TLI->getPreferredLargeGEPBaseOffset(
6166+
LargeOffsetGEPs.front().second, LargeOffsetGEPs.back().second)) {
6167+
BaseOffset = PreferBase;
6168+
// Create a new base if the offset of the BaseGEP can be decoded with one
6169+
// instruction.
6170+
createNewBase(BaseOffset, OldBase, BaseGEP);
6171+
}
6172+
61246173
auto *LargeOffsetGEP = LargeOffsetGEPs.begin();
61256174
while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
61266175
GetElementPtrInst *GEP = LargeOffsetGEP->first;
@@ -6153,35 +6202,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
61536202
if (!NewBaseGEP) {
61546203
// Create a new base if we don't have one yet. Find the insertion
61556204
// pointer for the new base first.
6156-
BasicBlock::iterator NewBaseInsertPt;
6157-
BasicBlock *NewBaseInsertBB;
6158-
if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
6159-
// If the base of the struct is an instruction, the new base will be
6160-
// inserted close to it.
6161-
NewBaseInsertBB = BaseI->getParent();
6162-
if (isa<PHINode>(BaseI))
6163-
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6164-
else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
6165-
NewBaseInsertBB =
6166-
SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
6167-
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6168-
} else
6169-
NewBaseInsertPt = std::next(BaseI->getIterator());
6170-
} else {
6171-
// If the current base is an argument or global value, the new base
6172-
// will be inserted to the entry block.
6173-
NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
6174-
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6175-
}
6176-
IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
6177-
// Create a new base.
6178-
Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset);
6179-
NewBaseGEP = OldBase;
6180-
if (NewBaseGEP->getType() != I8PtrTy)
6181-
NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
6182-
NewBaseGEP =
6183-
NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep");
6184-
NewGEPBases.insert(NewBaseGEP);
6205+
createNewBase(BaseOffset, OldBase, GEP);
61856206
}
61866207

61876208
IRBuilder<> Builder(GEP);

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+14
Original file line numberDiff line numberDiff line change
@@ -16070,6 +16070,20 @@ bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
1607016070
AM.Scale);
1607116071
}
1607216072

16073+
// Check whether the 2 offsets belong to the same imm24 range, and their high
16074+
// 12bits are same, then their high part can be decoded with the offset of add.
16075+
int64_t
16076+
AArch64TargetLowering::getPreferredLargeGEPBaseOffset(int64_t MinOffset,
16077+
int64_t MaxOffset) const {
16078+
int64_t HighPart = MinOffset & ~0xfffULL;
16079+
if (MinOffset >> 12 == MaxOffset >> 12 && isLegalAddImmediate(HighPart)) {
16080+
// Rebase the value to an integer multiple of imm12.
16081+
return HighPart;
16082+
}
16083+
16084+
return 0;
16085+
}
16086+
1607316087
bool AArch64TargetLowering::shouldConsiderGEPOffsetSplit() const {
1607416088
// Consider splitting large offset of struct or array.
1607516089
return true;

llvm/lib/Target/AArch64/AArch64ISelLowering.h

+3
Original file line numberDiff line numberDiff line change
@@ -699,6 +699,9 @@ class AArch64TargetLowering : public TargetLowering {
699699
unsigned AS,
700700
Instruction *I = nullptr) const override;
701701

702+
int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset,
703+
int64_t MaxOffset) const override;
704+
702705
/// Return true if an FMA operation is faster than a pair of fmul and fadd
703706
/// instructions. fmuladd intrinsics will be expanded to FMAs when this method
704707
/// returns true, otherwise fmuladd is expanded to fmul + fadd.

llvm/test/CodeGen/AArch64/arm64-addrmode.ll

+2-3
Original file line numberDiff line numberDiff line change
@@ -252,9 +252,8 @@ define i64 @LdOffset_i64_multi_offset(ptr %a) {
252252
; CHECK-LABEL: LdOffset_i64_multi_offset:
253253
; CHECK: // %bb.0:
254254
; CHECK-NEXT: add x8, x0, #2031, lsl #12 // =8318976
255-
; CHECK-NEXT: add x8, x8, #960
256-
; CHECK-NEXT: ldr x9, [x8]
257-
; CHECK-NEXT: ldr x8, [x8, #2056]
255+
; CHECK-NEXT: ldr x9, [x8, #960]
256+
; CHECK-NEXT: ldr x8, [x8, #3016]
258257
; CHECK-NEXT: add x0, x8, x9
259258
; CHECK-NEXT: ret
260259
%arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992

llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll

+25-32
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,17 @@
66
define void @test1(ptr %s, i32 %n) {
77
; CHECK-LABEL: test1:
88
; CHECK: // %bb.0: // %entry
9-
; CHECK-NEXT: ldr x9, [x0]
10-
; CHECK-NEXT: mov w10, #40000 // =0x9c40
11-
; CHECK-NEXT: mov w8, wzr
12-
; CHECK-NEXT: add x9, x9, x10
13-
; CHECK-NEXT: cmp w8, w1
9+
; CHECK-NEXT: ldr x8, [x0]
10+
; CHECK-NEXT: mov w9, wzr
11+
; CHECK-NEXT: add x8, x8, #9, lsl #12 // =36864
12+
; CHECK-NEXT: cmp w9, w1
1413
; CHECK-NEXT: b.ge .LBB0_2
1514
; CHECK-NEXT: .LBB0_1: // %while_body
1615
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
17-
; CHECK-NEXT: str w8, [x9, #4]
18-
; CHECK-NEXT: add w8, w8, #1
19-
; CHECK-NEXT: str w8, [x9]
20-
; CHECK-NEXT: cmp w8, w1
16+
; CHECK-NEXT: str w9, [x8, #3140]
17+
; CHECK-NEXT: add w9, w9, #1
18+
; CHECK-NEXT: str w9, [x8, #3136]
19+
; CHECK-NEXT: cmp w9, w1
2120
; CHECK-NEXT: b.lt .LBB0_1
2221
; CHECK-NEXT: .LBB0_2: // %while_end
2322
; CHECK-NEXT: ret
@@ -47,16 +46,15 @@ define void @test2(ptr %struct, i32 %n) {
4746
; CHECK: // %bb.0: // %entry
4847
; CHECK-NEXT: cbz x0, .LBB1_3
4948
; CHECK-NEXT: // %bb.1: // %while_cond.preheader
50-
; CHECK-NEXT: mov w8, #40000 // =0x9c40
5149
; CHECK-NEXT: mov w9, wzr
52-
; CHECK-NEXT: add x8, x0, x8
50+
; CHECK-NEXT: add x8, x0, #9, lsl #12 // =36864
5351
; CHECK-NEXT: cmp w9, w1
5452
; CHECK-NEXT: b.ge .LBB1_3
5553
; CHECK-NEXT: .LBB1_2: // %while_body
5654
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
57-
; CHECK-NEXT: str w9, [x8, #4]
55+
; CHECK-NEXT: str w9, [x8, #3140]
5856
; CHECK-NEXT: add w9, w9, #1
59-
; CHECK-NEXT: str w9, [x8]
57+
; CHECK-NEXT: str w9, [x8, #3136]
6058
; CHECK-NEXT: cmp w9, w1
6159
; CHECK-NEXT: b.lt .LBB1_2
6260
; CHECK-NEXT: .LBB1_3: // %while_end
@@ -89,16 +87,15 @@ define void @test3(ptr %s1, ptr %s2, i1 %cond, i32 %n) {
8987
; CHECK-NEXT: csel x8, x1, x0, ne
9088
; CHECK-NEXT: cbz x8, .LBB2_3
9189
; CHECK-NEXT: // %bb.1: // %while_cond.preheader
92-
; CHECK-NEXT: mov w10, #40000 // =0x9c40
9390
; CHECK-NEXT: mov w9, wzr
94-
; CHECK-NEXT: add x8, x8, x10
91+
; CHECK-NEXT: add x8, x8, #9, lsl #12 // =36864
9592
; CHECK-NEXT: cmp w9, w3
9693
; CHECK-NEXT: b.ge .LBB2_3
9794
; CHECK-NEXT: .LBB2_2: // %while_body
9895
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
99-
; CHECK-NEXT: str w9, [x8, #4]
96+
; CHECK-NEXT: str w9, [x8, #3140]
10097
; CHECK-NEXT: add w9, w9, #1
101-
; CHECK-NEXT: str w9, [x8]
98+
; CHECK-NEXT: str w9, [x8, #3136]
10299
; CHECK-NEXT: cmp w9, w3
103100
; CHECK-NEXT: b.lt .LBB2_2
104101
; CHECK-NEXT: .LBB2_3: // %while_end
@@ -141,41 +138,38 @@ define void @test4(i32 %n) uwtable personality ptr @__FrameHandler {
141138
; CHECK-NEXT: .cfi_personality 156, DW.ref.__FrameHandler
142139
; CHECK-NEXT: .cfi_lsda 28, .Lexception0
143140
; CHECK-NEXT: // %bb.0: // %entry
144-
; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
141+
; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
145142
; CHECK-NEXT: .cfi_def_cfa_offset 32
146143
; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
147144
; CHECK-NEXT: .cfi_offset w19, -8
148145
; CHECK-NEXT: .cfi_offset w20, -16
149-
; CHECK-NEXT: .cfi_offset w21, -24
150146
; CHECK-NEXT: .cfi_offset w30, -32
151147
; CHECK-NEXT: .cfi_remember_state
152148
; CHECK-NEXT: mov w19, w0
153-
; CHECK-NEXT: mov w21, wzr
154-
; CHECK-NEXT: mov w20, #40000 // =0x9c40
149+
; CHECK-NEXT: mov w20, wzr
155150
; CHECK-NEXT: .LBB3_1: // %while_cond
156151
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
157152
; CHECK-NEXT: .Ltmp0:
158153
; CHECK-NEXT: bl foo
159154
; CHECK-NEXT: .Ltmp1:
160155
; CHECK-NEXT: // %bb.2: // %while_cond_x.split
161156
; CHECK-NEXT: // in Loop: Header=BB3_1 Depth=1
162-
; CHECK-NEXT: add x8, x0, x20
163-
; CHECK-NEXT: cmp w21, w19
164-
; CHECK-NEXT: str wzr, [x8]
157+
; CHECK-NEXT: add x8, x0, #9, lsl #12 // =36864
158+
; CHECK-NEXT: cmp w20, w19
159+
; CHECK-NEXT: str wzr, [x8, #3136]
165160
; CHECK-NEXT: b.ge .LBB3_4
166161
; CHECK-NEXT: // %bb.3: // %while_body
167162
; CHECK-NEXT: // in Loop: Header=BB3_1 Depth=1
168-
; CHECK-NEXT: str w21, [x8, #4]
169-
; CHECK-NEXT: add w21, w21, #1
170-
; CHECK-NEXT: str w21, [x8]
163+
; CHECK-NEXT: str w20, [x8, #3140]
164+
; CHECK-NEXT: add w20, w20, #1
165+
; CHECK-NEXT: str w20, [x8, #3136]
171166
; CHECK-NEXT: b .LBB3_1
172167
; CHECK-NEXT: .LBB3_4: // %while_end
173168
; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
174-
; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
169+
; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
175170
; CHECK-NEXT: .cfi_def_cfa_offset 0
176171
; CHECK-NEXT: .cfi_restore w19
177172
; CHECK-NEXT: .cfi_restore w20
178-
; CHECK-NEXT: .cfi_restore w21
179173
; CHECK-NEXT: .cfi_restore w30
180174
; CHECK-NEXT: ret
181175
; CHECK-NEXT: .LBB3_5: // %cleanup
@@ -223,14 +217,13 @@ define void @test5(ptr %s, i32 %n) {
223217
; CHECK-NEXT: ldr x8, [x0]
224218
; CHECK-NEXT: mov w9, wzr
225219
; CHECK-NEXT: add x8, x8, #19, lsl #12 // =77824
226-
; CHECK-NEXT: add x8, x8, #2176
227220
; CHECK-NEXT: cmp w9, w1
228221
; CHECK-NEXT: b.ge .LBB4_2
229222
; CHECK-NEXT: .LBB4_1: // %while_body
230223
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
231-
; CHECK-NEXT: str w9, [x8, #4]
224+
; CHECK-NEXT: str w9, [x8, #2180]
232225
; CHECK-NEXT: add w9, w9, #1
233-
; CHECK-NEXT: str w9, [x8]
226+
; CHECK-NEXT: str w9, [x8, #2176]
234227
; CHECK-NEXT: cmp w9, w1
235228
; CHECK-NEXT: b.lt .LBB4_1
236229
; CHECK-NEXT: .LBB4_2: // %while_end

0 commit comments

Comments
 (0)