Skip to content

Commit fa5121b

Browse files
committed
[ARM] Try access-sized addends in CombineBaseUpdate() first
Sort ISD::ADD uses of load/store node according to an heuristic: * the memory access-sized constant addends are checked first (striving to leverage "[rN]!" addressing mode which does not require any extra register operand even for VLDn/VSTn) * other constant addends are checked later in the increasing order * non-constant addends are checked last For example, before this patch the code generated for _test_silly_load function from llvm/test/CodeGen/ARM/vector-load.ll was: vldr d16, [r0, #16] movs r1, #24 vld1.8 {d16, d17}, [r0:128], r1 ldr r0, [r0] bx lr and after this patch code is: ldr r1, [r0, #24] vld1.8 {d16, d17}, [r0:128]! vldr d16, [r0] bx lr
1 parent 4b273b1 commit fa5121b

File tree

6 files changed

+157
-126
lines changed

6 files changed

+157
-126
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 108 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -14202,6 +14202,75 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
1420214202
DAG.getUNDEF(VT), NewMask);
1420314203
}
1420414204

14205+
static bool ClassifyForCombining(SDNode *N, bool isIntrinsic, bool &isLoadOp,
14206+
bool &isLaneOp, unsigned &NewOpc,
14207+
unsigned &NumVecs) {
14208+
isLoadOp = true;
14209+
isLaneOp = false;
14210+
NewOpc = 0;
14211+
NumVecs = 0;
14212+
if (isIntrinsic) {
14213+
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
14214+
switch (IntNo) {
14215+
default: llvm_unreachable("unexpected intrinsic for Neon base update");
14216+
case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD;
14217+
NumVecs = 1; break;
14218+
case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD;
14219+
NumVecs = 2; break;
14220+
case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD;
14221+
NumVecs = 3; break;
14222+
case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD;
14223+
NumVecs = 4; break;
14224+
case Intrinsic::arm_neon_vld1x2:
14225+
case Intrinsic::arm_neon_vld1x3:
14226+
case Intrinsic::arm_neon_vld1x4:
14227+
case Intrinsic::arm_neon_vld2dup:
14228+
case Intrinsic::arm_neon_vld3dup:
14229+
case Intrinsic::arm_neon_vld4dup:
14230+
case Intrinsic::arm_neon_vst1x2:
14231+
case Intrinsic::arm_neon_vst1x3:
14232+
case Intrinsic::arm_neon_vst1x4:
14233+
// TODO: Support updating VLD1x, VST1x and VLDxDUP nodes.
14234+
// For now, we just skip combining base updates for such intrinsics.
14235+
return false;
14236+
case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
14237+
NumVecs = 2; isLaneOp = true; break;
14238+
case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD;
14239+
NumVecs = 3; isLaneOp = true; break;
14240+
case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
14241+
NumVecs = 4; isLaneOp = true; break;
14242+
case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD;
14243+
NumVecs = 1; isLoadOp = false; break;
14244+
case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD;
14245+
NumVecs = 2; isLoadOp = false; break;
14246+
case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD;
14247+
NumVecs = 3; isLoadOp = false; break;
14248+
case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD;
14249+
NumVecs = 4; isLoadOp = false; break;
14250+
case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
14251+
NumVecs = 2; isLoadOp = false; isLaneOp = true; break;
14252+
case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
14253+
NumVecs = 3; isLoadOp = false; isLaneOp = true; break;
14254+
case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
14255+
NumVecs = 4; isLoadOp = false; isLaneOp = true; break;
14256+
}
14257+
} else {
14258+
isLaneOp = true;
14259+
switch (N->getOpcode()) {
14260+
default: llvm_unreachable("unexpected opcode for Neon base update");
14261+
case ARMISD::VLD1DUP: NewOpc = ARMISD::VLD1DUP_UPD; NumVecs = 1; break;
14262+
case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
14263+
case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
14264+
case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
14265+
case ISD::LOAD: NewOpc = ARMISD::VLD1_UPD;
14266+
NumVecs = 1; isLaneOp = false; break;
14267+
case ISD::STORE: NewOpc = ARMISD::VST1_UPD;
14268+
NumVecs = 1; isLaneOp = false; isLoadOp = false; break;
14269+
}
14270+
}
14271+
return true;
14272+
}
14273+
1420514274
/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
1420614275
/// NEON load/store intrinsics, and generic vector load/stores, to merge
1420714276
/// base address updates.
@@ -14218,6 +14287,29 @@ static SDValue CombineBaseUpdate(SDNode *N,
1421814287
MemSDNode *MemN = cast<MemSDNode>(N);
1421914288
SDLoc dl(N);
1422014289

14290+
// Find the new opcode for the updating load/store.
14291+
bool isLoadOp, isLaneOp;
14292+
unsigned NewOpc, NumVecs;
14293+
if (!ClassifyForCombining(N, isIntrinsic, isLoadOp, isLaneOp, NewOpc,
14294+
NumVecs))
14295+
return SDValue();
14296+
14297+
// Find the size of memory referenced by the load/store.
14298+
EVT VecTy;
14299+
if (isLoadOp) {
14300+
VecTy = N->getValueType(0);
14301+
} else if (isIntrinsic) {
14302+
VecTy = N->getOperand(AddrOpIdx + 1).getValueType();
14303+
} else {
14304+
assert(isStore && "Node has to be a load, a store, or an intrinsic!");
14305+
VecTy = N->getOperand(1).getValueType();
14306+
}
14307+
14308+
unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
14309+
if (isLaneOp)
14310+
NumBytes /= VecTy.getVectorNumElements();
14311+
14312+
SmallVector<SDNode *, 16> Increments;
1422114313
// Search for a use of the address operand that is an increment.
1422214314
for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
1422314315
UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
@@ -14237,84 +14329,24 @@ static SDValue CombineBaseUpdate(SDNode *N,
1423714329
if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
1423814330
SDNode::hasPredecessorHelper(User, Visited, Worklist))
1423914331
continue;
14332+
Increments.push_back(User);
14333+
}
1424014334

14241-
// Find the new opcode for the updating load/store.
14242-
bool isLoadOp = true;
14243-
bool isLaneOp = false;
14244-
unsigned NewOpc = 0;
14245-
unsigned NumVecs = 0;
14246-
if (isIntrinsic) {
14247-
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
14248-
switch (IntNo) {
14249-
default: llvm_unreachable("unexpected intrinsic for Neon base update");
14250-
case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD;
14251-
NumVecs = 1; break;
14252-
case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD;
14253-
NumVecs = 2; break;
14254-
case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD;
14255-
NumVecs = 3; break;
14256-
case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD;
14257-
NumVecs = 4; break;
14258-
case Intrinsic::arm_neon_vld1x2:
14259-
case Intrinsic::arm_neon_vld1x3:
14260-
case Intrinsic::arm_neon_vld1x4:
14261-
case Intrinsic::arm_neon_vld2dup:
14262-
case Intrinsic::arm_neon_vld3dup:
14263-
case Intrinsic::arm_neon_vld4dup:
14264-
// TODO: Support updating VLD1x and VLDxDUP nodes. For now, we just skip
14265-
// combining base updates for such intrinsics.
14266-
continue;
14267-
case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
14268-
NumVecs = 2; isLaneOp = true; break;
14269-
case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD;
14270-
NumVecs = 3; isLaneOp = true; break;
14271-
case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
14272-
NumVecs = 4; isLaneOp = true; break;
14273-
case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD;
14274-
NumVecs = 1; isLoadOp = false; break;
14275-
case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD;
14276-
NumVecs = 2; isLoadOp = false; break;
14277-
case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD;
14278-
NumVecs = 3; isLoadOp = false; break;
14279-
case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD;
14280-
NumVecs = 4; isLoadOp = false; break;
14281-
case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
14282-
NumVecs = 2; isLoadOp = false; isLaneOp = true; break;
14283-
case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
14284-
NumVecs = 3; isLoadOp = false; isLaneOp = true; break;
14285-
case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
14286-
NumVecs = 4; isLoadOp = false; isLaneOp = true; break;
14287-
}
14288-
} else {
14289-
isLaneOp = true;
14290-
switch (N->getOpcode()) {
14291-
default: llvm_unreachable("unexpected opcode for Neon base update");
14292-
case ARMISD::VLD1DUP: NewOpc = ARMISD::VLD1DUP_UPD; NumVecs = 1; break;
14293-
case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
14294-
case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
14295-
case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
14296-
case ISD::LOAD: NewOpc = ARMISD::VLD1_UPD;
14297-
NumVecs = 1; isLaneOp = false; break;
14298-
case ISD::STORE: NewOpc = ARMISD::VST1_UPD;
14299-
NumVecs = 1; isLaneOp = false; isLoadOp = false; break;
14300-
}
14301-
}
14302-
14303-
// Find the size of memory referenced by the load/store.
14304-
EVT VecTy;
14305-
if (isLoadOp) {
14306-
VecTy = N->getValueType(0);
14307-
} else if (isIntrinsic) {
14308-
VecTy = N->getOperand(AddrOpIdx+1).getValueType();
14309-
} else {
14310-
assert(isStore && "Node has to be a load, a store, or an intrinsic!");
14311-
VecTy = N->getOperand(1).getValueType();
14312-
}
14313-
14314-
unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
14315-
if (isLaneOp)
14316-
NumBytes /= VecTy.getVectorNumElements();
14335+
auto ClassifyIncrement = [Addr, NumBytes](SDNode *A) -> uint64_t {
14336+
SDValue Inc = A->getOperand(A->getOperand(0) == Addr ? 1 : 0);
14337+
ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
14338+
if (!CInc)
14339+
return std::numeric_limits<uint64_t>::max(); // try if other cases fail
14340+
uint64_t CIncValue = CInc->getZExtValue();
14341+
if (CIncValue == NumBytes)
14342+
return 0; // best match
14343+
return CIncValue;
14344+
};
14345+
stable_sort(Increments, [ClassifyIncrement](SDNode *A, SDNode *B) {
14346+
return ClassifyIncrement(A) < ClassifyIncrement(B);
14347+
});
1431714348

14349+
for (auto User : Increments) {
1431814350
// If the increment is a constant, it must match the memory ref size.
1431914351
SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
1432014352
ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());

llvm/test/CodeGen/ARM/fp16-vector-argument.ll

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -83,16 +83,16 @@ define void @test(double, float, i16, <4 x half>, <8 x half>) {
8383
; SOFT: @ %bb.0: @ %entry
8484
; SOFT-NEXT: push {r11, lr}
8585
; SOFT-NEXT: sub sp, sp, #32
86-
; SOFT-NEXT: vldr d16, [sp, #40]
87-
; SOFT-NEXT: mov r12, #16
88-
; SOFT-NEXT: vabs.f16 d16, d16
89-
; SOFT-NEXT: mov lr, sp
90-
; SOFT-NEXT: vst1.16 {d16}, [lr:64], r12
9186
; SOFT-NEXT: add r12, sp, #48
9287
; SOFT-NEXT: vld1.64 {d16, d17}, [r12]
88+
; SOFT-NEXT: add r12, sp, #16
9389
; SOFT-NEXT: vabs.f16 q8, q8
94-
; SOFT-NEXT: str r3, [sp, #8]
95-
; SOFT-NEXT: vst1.64 {d16, d17}, [lr]
90+
; SOFT-NEXT: vst1.64 {d16, d17}, [r12]
91+
; SOFT-NEXT: mov r12, sp
92+
; SOFT-NEXT: vldr d16, [sp, #40]
93+
; SOFT-NEXT: vabs.f16 d16, d16
94+
; SOFT-NEXT: vst1.16 {d16}, [r12:64]!
95+
; SOFT-NEXT: str r3, [r12]
9696
; SOFT-NEXT: bl use
9797
; SOFT-NEXT: add sp, sp, #32
9898
; SOFT-NEXT: pop {r11, pc}
@@ -105,26 +105,26 @@ define void @test(double, float, i16, <4 x half>, <8 x half>) {
105105
;
106106
; SOFTEB-LABEL: test:
107107
; SOFTEB: @ %bb.0: @ %entry
108-
; SOFTEB-NEXT: .save {r11, lr}
109-
; SOFTEB-NEXT: push {r11, lr}
108+
; SOFTEB-NEXT: .save {r4, lr}
109+
; SOFTEB-NEXT: push {r4, lr}
110110
; SOFTEB-NEXT: .pad #32
111111
; SOFTEB-NEXT: sub sp, sp, #32
112112
; SOFTEB-NEXT: vldr d16, [sp, #40]
113-
; SOFTEB-NEXT: mov r12, #16
114113
; SOFTEB-NEXT: mov lr, sp
115-
; SOFTEB-NEXT: str r3, [sp, #8]
114+
; SOFTEB-NEXT: add r4, sp, #48
115+
; SOFTEB-NEXT: add r12, sp, #16
116116
; SOFTEB-NEXT: vrev64.16 d16, d16
117117
; SOFTEB-NEXT: vabs.f16 d16, d16
118-
; SOFTEB-NEXT: vst1.16 {d16}, [lr:64], r12
119-
; SOFTEB-NEXT: add r12, sp, #48
120-
; SOFTEB-NEXT: vld1.64 {d16, d17}, [r12]
118+
; SOFTEB-NEXT: vst1.16 {d16}, [lr:64]!
119+
; SOFTEB-NEXT: vld1.64 {d16, d17}, [r4]
121120
; SOFTEB-NEXT: vrev64.16 q8, q8
121+
; SOFTEB-NEXT: str r3, [lr]
122122
; SOFTEB-NEXT: vabs.f16 q8, q8
123123
; SOFTEB-NEXT: vrev64.16 q8, q8
124-
; SOFTEB-NEXT: vst1.64 {d16, d17}, [lr]
124+
; SOFTEB-NEXT: vst1.64 {d16, d17}, [r12]
125125
; SOFTEB-NEXT: bl use
126126
; SOFTEB-NEXT: add sp, sp, #32
127-
; SOFTEB-NEXT: pop {r11, pc}
127+
; SOFTEB-NEXT: pop {r4, pc}
128128
;
129129
; HARDEB-LABEL: test:
130130
; HARDEB: @ %bb.0: @ %entry
@@ -148,20 +148,20 @@ define void @many_args_test(double, float, i16, <4 x half>, <8 x half>, <8 x hal
148148
; SOFT-NEXT: push {r11, lr}
149149
; SOFT-NEXT: sub sp, sp, #32
150150
; SOFT-NEXT: add r12, sp, #80
151-
; SOFT-NEXT: mov lr, sp
152151
; SOFT-NEXT: vld1.64 {d16, d17}, [r12]
153152
; SOFT-NEXT: add r12, sp, #48
154153
; SOFT-NEXT: vabs.f16 q8, q8
155154
; SOFT-NEXT: vld1.64 {d18, d19}, [r12]
156155
; SOFT-NEXT: add r12, sp, #64
157-
; SOFT-NEXT: str r3, [sp, #8]
158156
; SOFT-NEXT: vadd.f16 q8, q8, q9
159157
; SOFT-NEXT: vld1.64 {d18, d19}, [r12]
160-
; SOFT-NEXT: mov r12, #16
158+
; SOFT-NEXT: add r12, sp, #16
161159
; SOFT-NEXT: vmul.f16 q8, q9, q8
162-
; SOFT-NEXT: vldr d18, [sp, #40]
163-
; SOFT-NEXT: vst1.16 {d18}, [lr:64], r12
164-
; SOFT-NEXT: vst1.64 {d16, d17}, [lr]
160+
; SOFT-NEXT: vst1.64 {d16, d17}, [r12]
161+
; SOFT-NEXT: mov r12, sp
162+
; SOFT-NEXT: vldr d16, [sp, #40]
163+
; SOFT-NEXT: vst1.16 {d16}, [r12:64]!
164+
; SOFT-NEXT: str r3, [r12]
165165
; SOFT-NEXT: bl use
166166
; SOFT-NEXT: add sp, sp, #32
167167
; SOFT-NEXT: pop {r11, pc}
@@ -181,13 +181,8 @@ define void @many_args_test(double, float, i16, <4 x half>, <8 x half>, <8 x hal
181181
; SOFTEB-NEXT: push {r11, lr}
182182
; SOFTEB-NEXT: .pad #32
183183
; SOFTEB-NEXT: sub sp, sp, #32
184-
; SOFTEB-NEXT: vldr d16, [sp, #40]
185-
; SOFTEB-NEXT: mov r12, #16
186-
; SOFTEB-NEXT: mov lr, sp
187-
; SOFTEB-NEXT: str r3, [sp, #8]
188-
; SOFTEB-NEXT: vrev64.16 d16, d16
189-
; SOFTEB-NEXT: vst1.16 {d16}, [lr:64], r12
190184
; SOFTEB-NEXT: add r12, sp, #80
185+
; SOFTEB-NEXT: mov lr, sp
191186
; SOFTEB-NEXT: vld1.64 {d16, d17}, [r12]
192187
; SOFTEB-NEXT: add r12, sp, #48
193188
; SOFTEB-NEXT: vrev64.16 q8, q8
@@ -197,10 +192,15 @@ define void @many_args_test(double, float, i16, <4 x half>, <8 x half>, <8 x hal
197192
; SOFTEB-NEXT: vrev64.16 q9, q9
198193
; SOFTEB-NEXT: vadd.f16 q8, q8, q9
199194
; SOFTEB-NEXT: vld1.64 {d18, d19}, [r12]
195+
; SOFTEB-NEXT: add r12, sp, #16
200196
; SOFTEB-NEXT: vrev64.16 q9, q9
201197
; SOFTEB-NEXT: vmul.f16 q8, q9, q8
198+
; SOFTEB-NEXT: vldr d18, [sp, #40]
199+
; SOFTEB-NEXT: vrev64.16 d18, d18
200+
; SOFTEB-NEXT: vst1.16 {d18}, [lr:64]!
201+
; SOFTEB-NEXT: str r3, [lr]
202202
; SOFTEB-NEXT: vrev64.16 q8, q8
203-
; SOFTEB-NEXT: vst1.64 {d16, d17}, [lr]
203+
; SOFTEB-NEXT: vst1.64 {d16, d17}, [r12]
204204
; SOFTEB-NEXT: bl use
205205
; SOFTEB-NEXT: add sp, sp, #32
206206
; SOFTEB-NEXT: pop {r11, pc}

llvm/test/CodeGen/ARM/memcpy-inline.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,14 @@ entry:
4444
define void @t2(i8* nocapture %C) nounwind {
4545
entry:
4646
; CHECK-LABEL: t2:
47-
; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r2]!
48-
; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r2]
49-
; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
50-
; CHECK: movs [[INC:r[0-9]+]], #32
51-
; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0], [[INC]]
52-
; CHECK: movw [[REG2:r[0-9]+]], #16716
53-
; CHECK: movt [[REG2:r[0-9]+]], #72
54-
; CHECK: str [[REG2]], [r0]
47+
; CHECK: movw r[[REG1:[0-9]+]], #16716
48+
; CHECK: movt r[[REG1]], #72
49+
; CHECK: str r[[REG1]], [r0, #32]
50+
; CHECK: add r1, pc
51+
; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]!
52+
; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]!
53+
; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
54+
; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
5555
; CHECK-T1-LABEL: t2:
5656
; CHECK-T1: bl _memcpy
5757
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i1 false)

llvm/test/CodeGen/ARM/memset-align.ll

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,17 @@ define void @test() {
1010
; CHECK-NEXT: push {r7, lr}
1111
; CHECK-NEXT: .pad #24
1212
; CHECK-NEXT: sub sp, #24
13+
; CHECK-NEXT: vmov.i32 q8, #0x0
1314
; CHECK-NEXT: mov r0, sp
1415
; CHECK-NEXT: mov.w r1, #-1
15-
; CHECK-NEXT: vmov.i32 q8, #0x0
16-
; CHECK-NEXT: movs r2, #15
17-
; CHECK-NEXT: mov r3, r0
16+
; CHECK-NEXT: mov r2, r0
1817
; CHECK-NEXT: strd r1, r1, [sp, #8]
1918
; CHECK-NEXT: strd r1, r1, [sp]
20-
; CHECK-NEXT: str r1, [sp, #16]
21-
; CHECK-NEXT: vst1.64 {d16, d17}, [r3], r2
22-
; CHECK-NEXT: movs r2, #0
23-
; CHECK-NEXT: str r2, [r3]
19+
; CHECK-NEXT: vst1.64 {d16, d17}, [r2]!
20+
; CHECK-NEXT: str r1, [r2]
2421
; CHECK-NEXT: str r1, [sp, #20]
22+
; CHECK-NEXT: movs r1, #0
23+
; CHECK-NEXT: str.w r1, [sp, #15]
2524
; CHECK-NEXT: bl callee
2625
; CHECK-NEXT: add sp, #24
2726
; CHECK-NEXT: pop {r7, pc}

llvm/test/CodeGen/ARM/vector-load.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -253,10 +253,9 @@ define <4 x i32> @zextload_v8i8tov8i32_fake_update(<4 x i8>** %ptr) {
253253
}
254254

255255
; CHECK-LABEL: test_silly_load:
256-
; CHECK: vldr d{{[0-9]+}}, [r0, #16]
257-
; CHECK: movs r1, #24
258-
; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0:128], r1
259-
; CHECK: ldr {{r[0-9]+}}, [r0]
256+
; CHECK: ldr {{r[0-9]+}}, [r0, #24]
257+
; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0:128]!
258+
; CHECK: vldr d{{[0-9]+}}, [r0]
260259

261260
define void @test_silly_load(<28 x i8>* %addr) {
262261
load volatile <28 x i8>, <28 x i8>* %addr

0 commit comments

Comments
 (0)