@@ -87,6 +87,76 @@ define <4 x i32> @load_v3i8_to_4xi32(ptr %src) {
87
87
ret <4 x i32 > %e
88
88
}
89
89
90
+ define <4 x i32 > @load_v3i8_to_4xi32_align_2 (ptr %src ) {
91
+ ; CHECK-LABEL: load_v3i8_to_4xi32_align_2:
92
+ ; CHECK: ; %bb.0:
93
+ ; CHECK-NEXT: sub sp, sp, #16
94
+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
95
+ ; CHECK-NEXT: ldrh w8, [x0]
96
+ ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
97
+ ; CHECK-NEXT: strh w8, [sp, #12]
98
+ ; CHECK-NEXT: ldr s0, [sp, #12]
99
+ ; CHECK-NEXT: ldrsb w8, [x0, #2]
100
+ ; CHECK-NEXT: ushll.8h v0, v0, #0
101
+ ; CHECK-NEXT: mov.h v0[1], v0[1]
102
+ ; CHECK-NEXT: mov.h v0[2], w8
103
+ ; CHECK-NEXT: ushll.4s v0, v0, #0
104
+ ; CHECK-NEXT: and.16b v0, v0, v1
105
+ ; CHECK-NEXT: add sp, sp, #16
106
+ ; CHECK-NEXT: ret
107
+ ;
108
+ ; BE-LABEL: load_v3i8_to_4xi32_align_2:
109
+ ; BE: // %bb.0:
110
+ ; BE-NEXT: sub sp, sp, #16
111
+ ; BE-NEXT: .cfi_def_cfa_offset 16
112
+ ; BE-NEXT: ldrh w8, [x0]
113
+ ; BE-NEXT: movi v1.2d, #0x0000ff000000ff
114
+ ; BE-NEXT: strh w8, [sp, #12]
115
+ ; BE-NEXT: ldr s0, [sp, #12]
116
+ ; BE-NEXT: ldrsb w8, [x0, #2]
117
+ ; BE-NEXT: rev32 v0.8b, v0.8b
118
+ ; BE-NEXT: ushll v0.8h, v0.8b, #0
119
+ ; BE-NEXT: mov v0.h[1], v0.h[1]
120
+ ; BE-NEXT: mov v0.h[2], w8
121
+ ; BE-NEXT: ushll v0.4s, v0.4h, #0
122
+ ; BE-NEXT: and v0.16b, v0.16b, v1.16b
123
+ ; BE-NEXT: rev64 v0.4s, v0.4s
124
+ ; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
125
+ ; BE-NEXT: add sp, sp, #16
126
+ ; BE-NEXT: ret
127
+ %l = load <3 x i8 >, ptr %src , align 2
128
+ %s = shufflevector <3 x i8 > poison, <3 x i8 > %l , <4 x i32 > <i32 3 , i32 4 , i32 5 , i32 undef >
129
+ %e = zext <4 x i8 > %s to <4 x i32 >
130
+ ret <4 x i32 > %e
131
+ }
132
+
133
+ define <4 x i32 > @load_v3i8_to_4xi32_align_4 (ptr %src ) {
134
+ ; CHECK-LABEL: load_v3i8_to_4xi32_align_4:
135
+ ; CHECK: ; %bb.0:
136
+ ; CHECK-NEXT: ldr s0, [x0]
137
+ ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
138
+ ; CHECK-NEXT: zip1.8b v0, v0, v0
139
+ ; CHECK-NEXT: ushll.4s v0, v0, #0
140
+ ; CHECK-NEXT: and.16b v0, v0, v1
141
+ ; CHECK-NEXT: ret
142
+ ;
143
+ ; BE-LABEL: load_v3i8_to_4xi32_align_4:
144
+ ; BE: // %bb.0:
145
+ ; BE-NEXT: ldr s0, [x0]
146
+ ; BE-NEXT: movi v1.2d, #0x0000ff000000ff
147
+ ; BE-NEXT: rev32 v0.8b, v0.8b
148
+ ; BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
149
+ ; BE-NEXT: ushll v0.4s, v0.4h, #0
150
+ ; BE-NEXT: and v0.16b, v0.16b, v1.16b
151
+ ; BE-NEXT: rev64 v0.4s, v0.4s
152
+ ; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
153
+ ; BE-NEXT: ret
154
+ %l = load <3 x i8 >, ptr %src , align 4
155
+ %s = shufflevector <3 x i8 > poison, <3 x i8 > %l , <4 x i32 > <i32 3 , i32 4 , i32 5 , i32 undef >
156
+ %e = zext <4 x i8 > %s to <4 x i32 >
157
+ ret <4 x i32 > %e
158
+ }
159
+
90
160
define <4 x i32 > @load_v3i8_to_4xi32_const_offset_1 (ptr %src ) {
91
161
; CHECK-LABEL: load_v3i8_to_4xi32_const_offset_1:
92
162
; CHECK: ; %bb.0:
@@ -176,6 +246,42 @@ define <4 x i32> @load_v3i8_to_4xi32_const_offset_3(ptr %src) {
176
246
}
177
247
178
248
define <4 x i32 > @volatile_load_v3i8_to_4xi32 (ptr %src ) {
249
+ ; check-label: volatile_load_v3i8_to_4xi32:
250
+ ; check: ; %bb.0:
251
+ ; check-next: sub sp, sp, #16
252
+ ; check-next: .cfi_def_cfa_offset 16
253
+ ; check-next: ldrh w8, [x0]
254
+ ; check-next: movi.2d v1, #0x0000ff000000ff
255
+ ; check-next: strh w8, [sp, #12]
256
+ ; check-next: ldr s0, [sp, #12]
257
+ ; check-next: ldrsb w8, [x0, #2]
258
+ ; check-next: ushll.8h v0, v0, #0
259
+ ; check-next: mov.h v0[1], v0[1]
260
+ ; check-next: mov.h v0[2], w8
261
+ ; check-next: ushll.4s v0, v0, #0
262
+ ; check-next: and.16b v0, v0, v1
263
+ ; check-next: add sp, sp, #16
264
+ ; check-next: ret
265
+ ;
266
+ ; be-label: volatile_load_v3i8_to_4xi32:
267
+ ; be: // %bb.0:
268
+ ; be-next: sub sp, sp, #16
269
+ ; be-next: .cfi_def_cfa_offset 16
270
+ ; be-next: ldrh w8, [x0]
271
+ ; be-next: movi v1.2d, #0x0000ff000000ff
272
+ ; be-next: strh w8, [sp, #12]
273
+ ; be-next: ldr s0, [sp, #12]
274
+ ; be-next: ldrsb w8, [x0, #2]
275
+ ; be-next: rev32 v0.8b, v0.8b
276
+ ; be-next: ushll v0.8h, v0.8b, #0
277
+ ; be-next: mov v0.h[1], v0.h[1]
278
+ ; be-next: mov v0.h[2], w8
279
+ ; be-next: ushll v0.4s, v0.4h, #0
280
+ ; be-next: and v0.16b, v0.16b, v1.16b
281
+ ; be-next: rev64 v0.4s, v0.4s
282
+ ; be-next: ext v0.16b, v0.16b, v0.16b, #8
283
+ ; be-next: add sp, sp, #16
284
+ ; be-next: ret
179
285
; CHECK-LABEL: volatile_load_v3i8_to_4xi32:
180
286
; CHECK: ; %bb.0:
181
287
; CHECK-NEXT: sub sp, sp, #16
@@ -286,9 +392,9 @@ define void @store_trunc_add_from_64bits(ptr %src, ptr %dst) {
286
392
; CHECK-NEXT: ldr s0, [x0]
287
393
; CHECK-NEXT: add x9, x0, #4
288
394
; CHECK-NEXT: Lloh0:
289
- ; CHECK-NEXT: adrp x8, lCPI7_0 @PAGE
395
+ ; CHECK-NEXT: adrp x8, lCPI9_0 @PAGE
290
396
; CHECK-NEXT: Lloh1:
291
- ; CHECK-NEXT: ldr d1, [x8, lCPI7_0 @PAGEOFF]
397
+ ; CHECK-NEXT: ldr d1, [x8, lCPI9_0 @PAGEOFF]
292
398
; CHECK-NEXT: ld1.h { v0 }[2], [x9]
293
399
; CHECK-NEXT: add.4h v0, v0, v1
294
400
; CHECK-NEXT: xtn.8b v1, v0
@@ -309,8 +415,8 @@ define void @store_trunc_add_from_64bits(ptr %src, ptr %dst) {
309
415
; BE-NEXT: add x8, x0, #4
310
416
; BE-NEXT: rev32 v0.4h, v0.4h
311
417
; BE-NEXT: ld1 { v0.h }[2], [x8]
312
- ; BE-NEXT: adrp x8, .LCPI7_0
313
- ; BE-NEXT: add x8, x8, :lo12:.LCPI7_0
418
+ ; BE-NEXT: adrp x8, .LCPI9_0
419
+ ; BE-NEXT: add x8, x8, :lo12:.LCPI9_0
314
420
; BE-NEXT: ld1 { v1.4h }, [x8]
315
421
; BE-NEXT: add v0.4h, v0.4h, v1.4h
316
422
; BE-NEXT: xtn v1.8b, v0.8h
@@ -373,16 +479,74 @@ entry:
373
479
ret void
374
480
}
375
481
482
+ define void @load_ext_to_64bits_default_align (ptr %src , ptr %dst ) {
483
+ ; CHECK-LABEL: load_ext_to_64bits_default_align:
484
+ ; CHECK: ; %bb.0: ; %entry
485
+ ; CHECK-NEXT: ldr s0, [x0]
486
+ ; CHECK-NEXT: add x8, x1, #4
487
+ ; CHECK-NEXT: zip1.8b v0, v0, v0
488
+ ; CHECK-NEXT: bic.4h v0, #255, lsl #8
489
+ ; CHECK-NEXT: st1.h { v0 }[2], [x8]
490
+ ; CHECK-NEXT: str s0, [x1]
491
+ ; CHECK-NEXT: ret
492
+ ;
493
+ ; BE-LABEL: load_ext_to_64bits_default_align:
494
+ ; BE: // %bb.0: // %entry
495
+ ; BE-NEXT: ldr s0, [x0]
496
+ ; BE-NEXT: add x8, x1, #4
497
+ ; BE-NEXT: rev32 v0.8b, v0.8b
498
+ ; BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
499
+ ; BE-NEXT: bic v0.4h, #255, lsl #8
500
+ ; BE-NEXT: rev32 v1.8h, v0.8h
501
+ ; BE-NEXT: st1 { v0.h }[2], [x8]
502
+ ; BE-NEXT: str s1, [x1]
503
+ ; BE-NEXT: ret
504
+ entry:
505
+ %l = load <3 x i8 >, ptr %src
506
+ %e = zext <3 x i8 > %l to <3 x i16 >
507
+ store <3 x i16 > %e , ptr %dst , align 1
508
+ ret void
509
+ }
510
+
511
+ define void @load_ext_to_64bits_align_4 (ptr %src , ptr %dst ) {
512
+ ; CHECK-LABEL: load_ext_to_64bits_align_4:
513
+ ; CHECK: ; %bb.0: ; %entry
514
+ ; CHECK-NEXT: ldr s0, [x0]
515
+ ; CHECK-NEXT: add x8, x1, #4
516
+ ; CHECK-NEXT: zip1.8b v0, v0, v0
517
+ ; CHECK-NEXT: bic.4h v0, #255, lsl #8
518
+ ; CHECK-NEXT: st1.h { v0 }[2], [x8]
519
+ ; CHECK-NEXT: str s0, [x1]
520
+ ; CHECK-NEXT: ret
521
+ ;
522
+ ; BE-LABEL: load_ext_to_64bits_align_4:
523
+ ; BE: // %bb.0: // %entry
524
+ ; BE-NEXT: ldr s0, [x0]
525
+ ; BE-NEXT: add x8, x1, #4
526
+ ; BE-NEXT: rev32 v0.8b, v0.8b
527
+ ; BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
528
+ ; BE-NEXT: bic v0.4h, #255, lsl #8
529
+ ; BE-NEXT: rev32 v1.8h, v0.8h
530
+ ; BE-NEXT: st1 { v0.h }[2], [x8]
531
+ ; BE-NEXT: str s1, [x1]
532
+ ; BE-NEXT: ret
533
+ entry:
534
+ %l = load <3 x i8 >, ptr %src , align 4
535
+ %e = zext <3 x i8 > %l to <3 x i16 >
536
+ store <3 x i16 > %e , ptr %dst , align 1
537
+ ret void
538
+ }
539
+
376
540
define void @load_ext_add_to_64bits (ptr %src , ptr %dst ) {
377
541
; CHECK-LABEL: load_ext_add_to_64bits:
378
542
; CHECK: ; %bb.0: ; %entry
379
543
; CHECK-NEXT: sub sp, sp, #16
380
544
; CHECK-NEXT: .cfi_def_cfa_offset 16
381
545
; CHECK-NEXT: ldrh w9, [x0]
382
546
; CHECK-NEXT: Lloh2:
383
- ; CHECK-NEXT: adrp x8, lCPI9_0 @PAGE
547
+ ; CHECK-NEXT: adrp x8, lCPI13_0 @PAGE
384
548
; CHECK-NEXT: Lloh3:
385
- ; CHECK-NEXT: ldr d1, [x8, lCPI9_0 @PAGEOFF]
549
+ ; CHECK-NEXT: ldr d1, [x8, lCPI13_0 @PAGEOFF]
386
550
; CHECK-NEXT: add x8, x1, #4
387
551
; CHECK-NEXT: strh w9, [sp, #12]
388
552
; CHECK-NEXT: add x9, x0, #2
@@ -408,8 +572,8 @@ define void @load_ext_add_to_64bits(ptr %src, ptr %dst) {
408
572
; BE-NEXT: rev32 v0.8b, v0.8b
409
573
; BE-NEXT: ushll v0.8h, v0.8b, #0
410
574
; BE-NEXT: ld1 { v0.b }[4], [x8]
411
- ; BE-NEXT: adrp x8, .LCPI9_0
412
- ; BE-NEXT: add x8, x8, :lo12:.LCPI9_0
575
+ ; BE-NEXT: adrp x8, .LCPI13_0
576
+ ; BE-NEXT: add x8, x8, :lo12:.LCPI13_0
413
577
; BE-NEXT: ld1 { v1.4h }, [x8]
414
578
; BE-NEXT: add x8, x1, #4
415
579
; BE-NEXT: bic v0.4h, #255, lsl #8
@@ -465,6 +629,82 @@ define void @shift_trunc_store(ptr %src, ptr %dst) {
465
629
ret void
466
630
}
467
631
632
+ define void @shift_trunc_store_default_align (ptr %src , ptr %dst ) {
633
+ ; CHECK-LABEL: shift_trunc_store_default_align:
634
+ ; CHECK: ; %bb.0:
635
+ ; CHECK-NEXT: sub sp, sp, #16
636
+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
637
+ ; CHECK-NEXT: ldr q0, [x0]
638
+ ; CHECK-NEXT: shrn.4h v0, v0, #16
639
+ ; CHECK-NEXT: xtn.8b v1, v0
640
+ ; CHECK-NEXT: umov.h w8, v0[2]
641
+ ; CHECK-NEXT: str s1, [sp, #12]
642
+ ; CHECK-NEXT: ldrh w9, [sp, #12]
643
+ ; CHECK-NEXT: strb w8, [x1, #2]
644
+ ; CHECK-NEXT: strh w9, [x1]
645
+ ; CHECK-NEXT: add sp, sp, #16
646
+ ; CHECK-NEXT: ret
647
+ ;
648
+ ; BE-LABEL: shift_trunc_store_default_align:
649
+ ; BE: // %bb.0:
650
+ ; BE-NEXT: sub sp, sp, #16
651
+ ; BE-NEXT: .cfi_def_cfa_offset 16
652
+ ; BE-NEXT: ld1 { v0.4s }, [x0]
653
+ ; BE-NEXT: shrn v0.4h, v0.4s, #16
654
+ ; BE-NEXT: xtn v1.8b, v0.8h
655
+ ; BE-NEXT: umov w8, v0.h[2]
656
+ ; BE-NEXT: rev32 v1.16b, v1.16b
657
+ ; BE-NEXT: str s1, [sp, #12]
658
+ ; BE-NEXT: ldrh w9, [sp, #12]
659
+ ; BE-NEXT: strb w8, [x1, #2]
660
+ ; BE-NEXT: strh w9, [x1]
661
+ ; BE-NEXT: add sp, sp, #16
662
+ ; BE-NEXT: ret
663
+ %l = load <3 x i32 >, ptr %src
664
+ %s = lshr <3 x i32 > %l , <i32 16 , i32 16 , i32 16 >
665
+ %t = trunc <3 x i32 > %s to <3 x i8 >
666
+ store <3 x i8 > %t , ptr %dst
667
+ ret void
668
+ }
669
+
670
+ define void @shift_trunc_store_align_4 (ptr %src , ptr %dst ) {
671
+ ; CHECK-LABEL: shift_trunc_store_align_4:
672
+ ; CHECK: ; %bb.0:
673
+ ; CHECK-NEXT: sub sp, sp, #16
674
+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
675
+ ; CHECK-NEXT: ldr q0, [x0]
676
+ ; CHECK-NEXT: shrn.4h v0, v0, #16
677
+ ; CHECK-NEXT: xtn.8b v1, v0
678
+ ; CHECK-NEXT: umov.h w8, v0[2]
679
+ ; CHECK-NEXT: str s1, [sp, #12]
680
+ ; CHECK-NEXT: ldrh w9, [sp, #12]
681
+ ; CHECK-NEXT: strb w8, [x1, #2]
682
+ ; CHECK-NEXT: strh w9, [x1]
683
+ ; CHECK-NEXT: add sp, sp, #16
684
+ ; CHECK-NEXT: ret
685
+ ;
686
+ ; BE-LABEL: shift_trunc_store_align_4:
687
+ ; BE: // %bb.0:
688
+ ; BE-NEXT: sub sp, sp, #16
689
+ ; BE-NEXT: .cfi_def_cfa_offset 16
690
+ ; BE-NEXT: ld1 { v0.4s }, [x0]
691
+ ; BE-NEXT: shrn v0.4h, v0.4s, #16
692
+ ; BE-NEXT: xtn v1.8b, v0.8h
693
+ ; BE-NEXT: umov w8, v0.h[2]
694
+ ; BE-NEXT: rev32 v1.16b, v1.16b
695
+ ; BE-NEXT: str s1, [sp, #12]
696
+ ; BE-NEXT: ldrh w9, [sp, #12]
697
+ ; BE-NEXT: strb w8, [x1, #2]
698
+ ; BE-NEXT: strh w9, [x1]
699
+ ; BE-NEXT: add sp, sp, #16
700
+ ; BE-NEXT: ret
701
+ %l = load <3 x i32 >, ptr %src
702
+ %s = lshr <3 x i32 > %l , <i32 16 , i32 16 , i32 16 >
703
+ %t = trunc <3 x i32 > %s to <3 x i8 >
704
+ store <3 x i8 > %t , ptr %dst , align 4
705
+ ret void
706
+ }
707
+
468
708
define void @shift_trunc_store_const_offset_1 (ptr %src , ptr %dst ) {
469
709
; CHECK-LABEL: shift_trunc_store_const_offset_1:
470
710
; CHECK: ; %bb.0:
0 commit comments