Skip to content

Commit 8336515

Browse files
committed
[AArch64] Add tests for operations on vectors with 3 elements.
1 parent 741b836 commit 8336515

File tree

1 file changed

+310
-0
lines changed

1 file changed

+310
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,310 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -mtriple=arm64-apple-macosx -o - %s | FileCheck %s
3+
; RUN: llc -mtriple=aarch64_be -o - %s | FileCheck --check-prefix BE %s
4+
5+
define <16 x i8> @load_v3i8(ptr %src, ptr %dst) {
6+
; CHECK-LABEL: load_v3i8:
7+
; CHECK: ; %bb.0:
8+
; CHECK-NEXT: sub sp, sp, #16
9+
; CHECK-NEXT: .cfi_def_cfa_offset 16
10+
; CHECK-NEXT: ldrh w8, [x0]
11+
; CHECK-NEXT: strh w8, [sp, #12]
12+
; CHECK-NEXT: ldr s0, [sp, #12]
13+
; CHECK-NEXT: ushll.8h v0, v0, #0
14+
; CHECK-NEXT: umov.h w8, v0[0]
15+
; CHECK-NEXT: umov.h w9, v0[1]
16+
; CHECK-NEXT: fmov s0, w8
17+
; CHECK-NEXT: add x8, x0, #2
18+
; CHECK-NEXT: mov.b v0[1], w9
19+
; CHECK-NEXT: ld1.b { v0 }[2], [x8]
20+
; CHECK-NEXT: add sp, sp, #16
21+
; CHECK-NEXT: ret
22+
;
23+
; BE-LABEL: load_v3i8:
24+
; BE: // %bb.0:
25+
; BE-NEXT: sub sp, sp, #16
26+
; BE-NEXT: .cfi_def_cfa_offset 16
27+
; BE-NEXT: ldrh w8, [x0]
28+
; BE-NEXT: strh w8, [sp, #12]
29+
; BE-NEXT: ldr s0, [sp, #12]
30+
; BE-NEXT: rev32 v0.8b, v0.8b
31+
; BE-NEXT: ushll v0.8h, v0.8b, #0
32+
; BE-NEXT: umov w8, v0.h[0]
33+
; BE-NEXT: umov w9, v0.h[1]
34+
; BE-NEXT: fmov s0, w8
35+
; BE-NEXT: add x8, x0, #2
36+
; BE-NEXT: mov v0.b[1], w9
37+
; BE-NEXT: ld1 { v0.b }[2], [x8]
38+
; BE-NEXT: rev64 v0.16b, v0.16b
39+
; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
40+
; BE-NEXT: add sp, sp, #16
41+
; BE-NEXT: ret
42+
%l = load <3 x i8>, ptr %src, align 1
43+
%s = shufflevector <3 x i8> poison, <3 x i8> %l, <16 x i32> <i32 3, i32 4, i32 5, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
44+
ret <16 x i8> %s
45+
}
46+
47+
define <4 x i32> @load_v3i8_to_4xi32(ptr %src, ptr %dst) {
48+
; CHECK-LABEL: load_v3i8_to_4xi32:
49+
; CHECK: ; %bb.0:
50+
; CHECK-NEXT: sub sp, sp, #16
51+
; CHECK-NEXT: .cfi_def_cfa_offset 16
52+
; CHECK-NEXT: ldrh w8, [x0]
53+
; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
54+
; CHECK-NEXT: strh w8, [sp, #12]
55+
; CHECK-NEXT: ldr s0, [sp, #12]
56+
; CHECK-NEXT: ldrsb w8, [x0, #2]
57+
; CHECK-NEXT: ushll.8h v0, v0, #0
58+
; CHECK-NEXT: mov.h v0[1], v0[1]
59+
; CHECK-NEXT: mov.h v0[2], w8
60+
; CHECK-NEXT: ushll.4s v0, v0, #0
61+
; CHECK-NEXT: and.16b v0, v0, v1
62+
; CHECK-NEXT: add sp, sp, #16
63+
; CHECK-NEXT: ret
64+
;
65+
; BE-LABEL: load_v3i8_to_4xi32:
66+
; BE: // %bb.0:
67+
; BE-NEXT: sub sp, sp, #16
68+
; BE-NEXT: .cfi_def_cfa_offset 16
69+
; BE-NEXT: ldrh w8, [x0]
70+
; BE-NEXT: movi v1.2d, #0x0000ff000000ff
71+
; BE-NEXT: strh w8, [sp, #12]
72+
; BE-NEXT: ldr s0, [sp, #12]
73+
; BE-NEXT: ldrsb w8, [x0, #2]
74+
; BE-NEXT: rev32 v0.8b, v0.8b
75+
; BE-NEXT: ushll v0.8h, v0.8b, #0
76+
; BE-NEXT: mov v0.h[1], v0.h[1]
77+
; BE-NEXT: mov v0.h[2], w8
78+
; BE-NEXT: ushll v0.4s, v0.4h, #0
79+
; BE-NEXT: and v0.16b, v0.16b, v1.16b
80+
; BE-NEXT: rev64 v0.4s, v0.4s
81+
; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
82+
; BE-NEXT: add sp, sp, #16
83+
; BE-NEXT: ret
84+
%l = load <3 x i8>, ptr %src, align 1
85+
%s = shufflevector <3 x i8> poison, <3 x i8> %l, <4 x i32> <i32 3, i32 4, i32 5, i32 undef>
86+
%e = zext <4 x i8> %s to <4 x i32>
87+
ret <4 x i32> %e
88+
}
89+
90+
define <4 x i32> @volatile_load_v3i8_to_4xi32(ptr %src, ptr %dst) {
91+
; CHECK-LABEL: volatile_load_v3i8_to_4xi32:
92+
; CHECK: ; %bb.0:
93+
; CHECK-NEXT: sub sp, sp, #16
94+
; CHECK-NEXT: .cfi_def_cfa_offset 16
95+
; CHECK-NEXT: ldrh w8, [x0]
96+
; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
97+
; CHECK-NEXT: strh w8, [sp, #12]
98+
; CHECK-NEXT: ldr s0, [sp, #12]
99+
; CHECK-NEXT: ldrsb w8, [x0, #2]
100+
; CHECK-NEXT: ushll.8h v0, v0, #0
101+
; CHECK-NEXT: mov.h v0[1], v0[1]
102+
; CHECK-NEXT: mov.h v0[2], w8
103+
; CHECK-NEXT: ushll.4s v0, v0, #0
104+
; CHECK-NEXT: and.16b v0, v0, v1
105+
; CHECK-NEXT: add sp, sp, #16
106+
; CHECK-NEXT: ret
107+
;
108+
; BE-LABEL: volatile_load_v3i8_to_4xi32:
109+
; BE: // %bb.0:
110+
; BE-NEXT: sub sp, sp, #16
111+
; BE-NEXT: .cfi_def_cfa_offset 16
112+
; BE-NEXT: ldrh w8, [x0]
113+
; BE-NEXT: movi v1.2d, #0x0000ff000000ff
114+
; BE-NEXT: strh w8, [sp, #12]
115+
; BE-NEXT: ldr s0, [sp, #12]
116+
; BE-NEXT: ldrsb w8, [x0, #2]
117+
; BE-NEXT: rev32 v0.8b, v0.8b
118+
; BE-NEXT: ushll v0.8h, v0.8b, #0
119+
; BE-NEXT: mov v0.h[1], v0.h[1]
120+
; BE-NEXT: mov v0.h[2], w8
121+
; BE-NEXT: ushll v0.4s, v0.4h, #0
122+
; BE-NEXT: and v0.16b, v0.16b, v1.16b
123+
; BE-NEXT: rev64 v0.4s, v0.4s
124+
; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
125+
; BE-NEXT: add sp, sp, #16
126+
; BE-NEXT: ret
127+
%l = load volatile <3 x i8>, ptr %src, align 1
128+
%s = shufflevector <3 x i8> poison, <3 x i8> %l, <4 x i32> <i32 3, i32 4, i32 5, i32 undef>
129+
%e = zext <4 x i8> %s to <4 x i32>
130+
ret <4 x i32> %e
131+
}
132+
133+
define <3 x i32> @load_v3i32(ptr %src) {
134+
; CHECK-LABEL: load_v3i32:
135+
; CHECK: ; %bb.0:
136+
; CHECK-NEXT: ldr d0, [x0]
137+
; CHECK-NEXT: add x8, x0, #8
138+
; CHECK-NEXT: ld1.s { v0 }[2], [x8]
139+
; CHECK-NEXT: ret
140+
;
141+
; BE-LABEL: load_v3i32:
142+
; BE: // %bb.0:
143+
; BE-NEXT: ldr d0, [x0]
144+
; BE-NEXT: add x8, x0, #8
145+
; BE-NEXT: rev64 v0.4s, v0.4s
146+
; BE-NEXT: ld1 { v0.s }[2], [x8]
147+
; BE-NEXT: rev64 v0.4s, v0.4s
148+
; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
149+
; BE-NEXT: ret
150+
%l = load <3 x i32>, ptr %src, align 1
151+
ret <3 x i32> %l
152+
}
153+
154+
define void @store_trunc_from_64bits(ptr %src, ptr %dst) {
155+
; CHECK-LABEL: store_trunc_from_64bits:
156+
; CHECK: ; %bb.0: ; %entry
157+
; CHECK-NEXT: sub sp, sp, #16
158+
; CHECK-NEXT: .cfi_def_cfa_offset 16
159+
; CHECK-NEXT: ldr s0, [x0]
160+
; CHECK-NEXT: ldrh w8, [x0, #4]
161+
; CHECK-NEXT: mov.h v0[2], w8
162+
; CHECK-NEXT: xtn.8b v0, v0
163+
; CHECK-NEXT: str s0, [sp, #12]
164+
; CHECK-NEXT: ldrh w9, [sp, #12]
165+
; CHECK-NEXT: strb w8, [x1, #2]
166+
; CHECK-NEXT: strh w9, [x1]
167+
; CHECK-NEXT: add sp, sp, #16
168+
; CHECK-NEXT: ret
169+
;
170+
; BE-LABEL: store_trunc_from_64bits:
171+
; BE: // %bb.0: // %entry
172+
; BE-NEXT: sub sp, sp, #16
173+
; BE-NEXT: .cfi_def_cfa_offset 16
174+
; BE-NEXT: ldr s0, [x0]
175+
; BE-NEXT: ldrh w8, [x0, #4]
176+
; BE-NEXT: rev32 v0.4h, v0.4h
177+
; BE-NEXT: mov v0.h[2], w8
178+
; BE-NEXT: xtn v0.8b, v0.8h
179+
; BE-NEXT: rev32 v0.16b, v0.16b
180+
; BE-NEXT: str s0, [sp, #12]
181+
; BE-NEXT: ldrh w9, [sp, #12]
182+
; BE-NEXT: strb w8, [x1, #2]
183+
; BE-NEXT: strh w9, [x1]
184+
; BE-NEXT: add sp, sp, #16
185+
; BE-NEXT: ret
186+
entry:
187+
%l = load <3 x i16>, ptr %src, align 1
188+
%t = trunc <3 x i16> %l to <3 x i8>
189+
store <3 x i8> %t, ptr %dst, align 1
190+
ret void
191+
}
192+
193+
define void @load_ext_to_64bits(ptr %src, ptr %dst) {
194+
; CHECK-LABEL: load_ext_to_64bits:
195+
; CHECK: ; %bb.0: ; %entry
196+
; CHECK-NEXT: sub sp, sp, #16
197+
; CHECK-NEXT: .cfi_def_cfa_offset 16
198+
; CHECK-NEXT: ldrh w8, [x0]
199+
; CHECK-NEXT: strh w8, [sp, #12]
200+
; CHECK-NEXT: add x8, x0, #2
201+
; CHECK-NEXT: ldr s0, [sp, #12]
202+
; CHECK-NEXT: ushll.8h v0, v0, #0
203+
; CHECK-NEXT: ld1.b { v0 }[4], [x8]
204+
; CHECK-NEXT: add x8, x1, #4
205+
; CHECK-NEXT: bic.4h v0, #255, lsl #8
206+
; CHECK-NEXT: st1.h { v0 }[2], [x8]
207+
; CHECK-NEXT: str s0, [x1]
208+
; CHECK-NEXT: add sp, sp, #16
209+
; CHECK-NEXT: ret
210+
;
211+
; BE-LABEL: load_ext_to_64bits:
212+
; BE: // %bb.0: // %entry
213+
; BE-NEXT: sub sp, sp, #16
214+
; BE-NEXT: .cfi_def_cfa_offset 16
215+
; BE-NEXT: ldrh w8, [x0]
216+
; BE-NEXT: strh w8, [sp, #12]
217+
; BE-NEXT: add x8, x0, #2
218+
; BE-NEXT: ldr s0, [sp, #12]
219+
; BE-NEXT: rev32 v0.8b, v0.8b
220+
; BE-NEXT: ushll v0.8h, v0.8b, #0
221+
; BE-NEXT: ld1 { v0.b }[4], [x8]
222+
; BE-NEXT: add x8, x1, #4
223+
; BE-NEXT: bic v0.4h, #255, lsl #8
224+
; BE-NEXT: rev32 v1.8h, v0.8h
225+
; BE-NEXT: st1 { v0.h }[2], [x8]
226+
; BE-NEXT: str s1, [x1]
227+
; BE-NEXT: add sp, sp, #16
228+
; BE-NEXT: ret
229+
entry:
230+
%l = load <3 x i8>, ptr %src, align 1
231+
%e = zext <3 x i8> %l to <3 x i16>
232+
store <3 x i16> %e, ptr %dst, align 1
233+
ret void
234+
}
235+
236+
define void @shift_trunc_store(ptr %src, ptr %dst) {
237+
; CHECK-LABEL: shift_trunc_store:
238+
; CHECK: ; %bb.0:
239+
; CHECK-NEXT: sub sp, sp, #16
240+
; CHECK-NEXT: .cfi_def_cfa_offset 16
241+
; CHECK-NEXT: ldr q0, [x0]
242+
; CHECK-NEXT: shrn.4h v0, v0, #16
243+
; CHECK-NEXT: xtn.8b v1, v0
244+
; CHECK-NEXT: umov.h w8, v0[2]
245+
; CHECK-NEXT: str s1, [sp, #12]
246+
; CHECK-NEXT: ldrh w9, [sp, #12]
247+
; CHECK-NEXT: strb w8, [x1, #2]
248+
; CHECK-NEXT: strh w9, [x1]
249+
; CHECK-NEXT: add sp, sp, #16
250+
; CHECK-NEXT: ret
251+
;
252+
; BE-LABEL: shift_trunc_store:
253+
; BE: // %bb.0:
254+
; BE-NEXT: sub sp, sp, #16
255+
; BE-NEXT: .cfi_def_cfa_offset 16
256+
; BE-NEXT: ld1 { v0.4s }, [x0]
257+
; BE-NEXT: shrn v0.4h, v0.4s, #16
258+
; BE-NEXT: xtn v1.8b, v0.8h
259+
; BE-NEXT: umov w8, v0.h[2]
260+
; BE-NEXT: rev32 v1.16b, v1.16b
261+
; BE-NEXT: str s1, [sp, #12]
262+
; BE-NEXT: ldrh w9, [sp, #12]
263+
; BE-NEXT: strb w8, [x1, #2]
264+
; BE-NEXT: strh w9, [x1]
265+
; BE-NEXT: add sp, sp, #16
266+
; BE-NEXT: ret
267+
%l = load <3 x i32>, ptr %src
268+
%s = lshr <3 x i32> %l, <i32 16, i32 16, i32 16>
269+
%t = trunc <3 x i32> %s to <3 x i8>
270+
store <3 x i8> %t, ptr %dst, align 1
271+
ret void
272+
}
273+
274+
define void @shift_trunc_volatile_store(ptr %src, ptr %dst) {
275+
; CHECK-LABEL: shift_trunc_volatile_store:
276+
; CHECK: ; %bb.0:
277+
; CHECK-NEXT: sub sp, sp, #16
278+
; CHECK-NEXT: .cfi_def_cfa_offset 16
279+
; CHECK-NEXT: ldr q0, [x0]
280+
; CHECK-NEXT: shrn.4h v0, v0, #16
281+
; CHECK-NEXT: xtn.8b v1, v0
282+
; CHECK-NEXT: umov.h w8, v0[2]
283+
; CHECK-NEXT: str s1, [sp, #12]
284+
; CHECK-NEXT: ldrh w9, [sp, #12]
285+
; CHECK-NEXT: strb w8, [x1, #2]
286+
; CHECK-NEXT: strh w9, [x1]
287+
; CHECK-NEXT: add sp, sp, #16
288+
; CHECK-NEXT: ret
289+
;
290+
; BE-LABEL: shift_trunc_volatile_store:
291+
; BE: // %bb.0:
292+
; BE-NEXT: sub sp, sp, #16
293+
; BE-NEXT: .cfi_def_cfa_offset 16
294+
; BE-NEXT: ld1 { v0.4s }, [x0]
295+
; BE-NEXT: shrn v0.4h, v0.4s, #16
296+
; BE-NEXT: xtn v1.8b, v0.8h
297+
; BE-NEXT: umov w8, v0.h[2]
298+
; BE-NEXT: rev32 v1.16b, v1.16b
299+
; BE-NEXT: str s1, [sp, #12]
300+
; BE-NEXT: ldrh w9, [sp, #12]
301+
; BE-NEXT: strb w8, [x1, #2]
302+
; BE-NEXT: strh w9, [x1]
303+
; BE-NEXT: add sp, sp, #16
304+
; BE-NEXT: ret
305+
%l = load <3 x i32>, ptr %src
306+
%s = lshr <3 x i32> %l, <i32 16, i32 16, i32 16>
307+
%t = trunc <3 x i32> %s to <3 x i8>
308+
store volatile <3 x i8> %t, ptr %dst, align 1
309+
ret void
310+
}

0 commit comments

Comments
 (0)