Skip to content

Commit 2e095fd

Browse files
committed
[SLP] Vectorize non-power-of-2 ops with padding.
This patch introduces a new VectorizeWithPadding node type for root and leave nodes to allow vectorizing loads/stores with non-power-of-2 number of elements. VectorizeWithPadding load nodes will pad the result to the next power of 2 with poison elements. Non-leaf nodes will operate on normal power-of-2 vectors. For those non-leaf nodes, we still track the number of padding elements needed to go to the next power-of-2, to be used in various places, like cost computation. VectorizeWithPadding store nodes strip away the padding elements and store the non-power-of-2 number of data elements. Note that re-ordering and shuffling is not implemented for nodes requiring padding yet to keep the initial implementation simpler. The initial implementation also only tries to vectorize with padding if original number of elements + 1 is a power-of-2, i.e. if only a single padding element is needed. The feature is guarded by a new flag, off by defaul for now.
1 parent 3b3da7c commit 2e095fd

File tree

7 files changed

+807
-348
lines changed

7 files changed

+807
-348
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

+233-48
Large diffs are not rendered by default.

llvm/test/Transforms/SLPVectorizer/AArch64/vec15-base.ll

+59-31
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,65 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2-
; RUN: opt -passes=slp-vectorizer -mtriple=arm64-apple-ios -S %s | FileCheck %s
2+
; RUN: opt -passes=slp-vectorizer -slp-vectorize-with-padding -mtriple=arm64-apple-ios -S %s | FileCheck --check-prefixes=PADDING %s
3+
; RUN: opt -passes=slp-vectorizer -slp-vectorize-with-padding=false -mtriple=arm64-apple-ios -S %s | FileCheck --check-prefixes=NO-PADDING %s
34

45
define void @v15_load_i8_mul_by_constant_store(ptr %src, ptr noalias %dst) {
5-
; CHECK-LABEL: define void @v15_load_i8_mul_by_constant_store(
6-
; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
7-
; CHECK-NEXT: entry:
8-
; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 0
9-
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[GEP_SRC_0]], align 4
10-
; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <8 x i8> [[TMP0]], <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
11-
; CHECK-NEXT: store <8 x i8> [[TMP1]], ptr [[DST]], align 1
12-
; CHECK-NEXT: [[GEP_SRC_8:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 8
13-
; CHECK-NEXT: [[DST_8:%.*]] = getelementptr i8, ptr [[DST]], i8 8
14-
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[GEP_SRC_8]], align 4
15-
; CHECK-NEXT: [[TMP3:%.*]] = mul nsw <4 x i8> [[TMP2]], <i8 10, i8 10, i8 10, i8 10>
16-
; CHECK-NEXT: store <4 x i8> [[TMP3]], ptr [[DST_8]], align 1
17-
; CHECK-NEXT: [[GEP_SRC_12:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 12
18-
; CHECK-NEXT: [[L_SRC_12:%.*]] = load i8, ptr [[GEP_SRC_12]], align 4
19-
; CHECK-NEXT: [[MUL_12:%.*]] = mul nsw i8 [[L_SRC_12]], 10
20-
; CHECK-NEXT: [[DST_12:%.*]] = getelementptr i8, ptr [[DST]], i8 12
21-
; CHECK-NEXT: store i8 [[MUL_12]], ptr [[DST_12]], align 1
22-
; CHECK-NEXT: [[GEP_SRC_13:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 13
23-
; CHECK-NEXT: [[L_SRC_13:%.*]] = load i8, ptr [[GEP_SRC_13]], align 4
24-
; CHECK-NEXT: [[MUL_13:%.*]] = mul nsw i8 [[L_SRC_13]], 10
25-
; CHECK-NEXT: [[DST_13:%.*]] = getelementptr i8, ptr [[DST]], i8 13
26-
; CHECK-NEXT: store i8 [[MUL_13]], ptr [[DST_13]], align 1
27-
; CHECK-NEXT: [[GEP_SRC_14:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 14
28-
; CHECK-NEXT: [[L_SRC_14:%.*]] = load i8, ptr [[GEP_SRC_14]], align 4
29-
; CHECK-NEXT: [[MUL_14:%.*]] = mul nsw i8 [[L_SRC_14]], 10
30-
; CHECK-NEXT: [[DST_14:%.*]] = getelementptr i8, ptr [[DST]], i8 14
31-
; CHECK-NEXT: store i8 [[MUL_14]], ptr [[DST_14]], align 1
32-
; CHECK-NEXT: ret void
6+
; PADDING-LABEL: define void @v15_load_i8_mul_by_constant_store(
7+
; PADDING-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
8+
; PADDING-NEXT: entry:
9+
; PADDING-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 0
10+
; PADDING-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[GEP_SRC_0]], align 4
11+
; PADDING-NEXT: [[TMP1:%.*]] = mul nsw <8 x i8> [[TMP0]], <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
12+
; PADDING-NEXT: store <8 x i8> [[TMP1]], ptr [[DST]], align 1
13+
; PADDING-NEXT: [[GEP_SRC_8:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 8
14+
; PADDING-NEXT: [[DST_8:%.*]] = getelementptr i8, ptr [[DST]], i8 8
15+
; PADDING-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[GEP_SRC_8]], align 4
16+
; PADDING-NEXT: [[TMP3:%.*]] = mul nsw <4 x i8> [[TMP2]], <i8 10, i8 10, i8 10, i8 10>
17+
; PADDING-NEXT: store <4 x i8> [[TMP3]], ptr [[DST_8]], align 1
18+
; PADDING-NEXT: [[GEP_SRC_12:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 12
19+
; PADDING-NEXT: [[L_SRC_12:%.*]] = load i8, ptr [[GEP_SRC_12]], align 4
20+
; PADDING-NEXT: [[MUL_12:%.*]] = mul nsw i8 [[L_SRC_12]], 10
21+
; PADDING-NEXT: [[DST_12:%.*]] = getelementptr i8, ptr [[DST]], i8 12
22+
; PADDING-NEXT: store i8 [[MUL_12]], ptr [[DST_12]], align 1
23+
; PADDING-NEXT: [[GEP_SRC_13:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 13
24+
; PADDING-NEXT: [[L_SRC_13:%.*]] = load i8, ptr [[GEP_SRC_13]], align 4
25+
; PADDING-NEXT: [[MUL_13:%.*]] = mul nsw i8 [[L_SRC_13]], 10
26+
; PADDING-NEXT: [[DST_13:%.*]] = getelementptr i8, ptr [[DST]], i8 13
27+
; PADDING-NEXT: store i8 [[MUL_13]], ptr [[DST_13]], align 1
28+
; PADDING-NEXT: [[GEP_SRC_14:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 14
29+
; PADDING-NEXT: [[L_SRC_14:%.*]] = load i8, ptr [[GEP_SRC_14]], align 4
30+
; PADDING-NEXT: [[MUL_14:%.*]] = mul nsw i8 [[L_SRC_14]], 10
31+
; PADDING-NEXT: [[DST_14:%.*]] = getelementptr i8, ptr [[DST]], i8 14
32+
; PADDING-NEXT: store i8 [[MUL_14]], ptr [[DST_14]], align 1
33+
; PADDING-NEXT: ret void
34+
;
35+
; NO-PADDING-LABEL: define void @v15_load_i8_mul_by_constant_store(
36+
; NO-PADDING-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
37+
; NO-PADDING-NEXT: entry:
38+
; NO-PADDING-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 0
39+
; NO-PADDING-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[GEP_SRC_0]], align 4
40+
; NO-PADDING-NEXT: [[TMP1:%.*]] = mul nsw <8 x i8> [[TMP0]], <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
41+
; NO-PADDING-NEXT: store <8 x i8> [[TMP1]], ptr [[DST]], align 1
42+
; NO-PADDING-NEXT: [[GEP_SRC_8:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 8
43+
; NO-PADDING-NEXT: [[DST_8:%.*]] = getelementptr i8, ptr [[DST]], i8 8
44+
; NO-PADDING-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[GEP_SRC_8]], align 4
45+
; NO-PADDING-NEXT: [[TMP3:%.*]] = mul nsw <4 x i8> [[TMP2]], <i8 10, i8 10, i8 10, i8 10>
46+
; NO-PADDING-NEXT: store <4 x i8> [[TMP3]], ptr [[DST_8]], align 1
47+
; NO-PADDING-NEXT: [[GEP_SRC_12:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 12
48+
; NO-PADDING-NEXT: [[L_SRC_12:%.*]] = load i8, ptr [[GEP_SRC_12]], align 4
49+
; NO-PADDING-NEXT: [[MUL_12:%.*]] = mul nsw i8 [[L_SRC_12]], 10
50+
; NO-PADDING-NEXT: [[DST_12:%.*]] = getelementptr i8, ptr [[DST]], i8 12
51+
; NO-PADDING-NEXT: store i8 [[MUL_12]], ptr [[DST_12]], align 1
52+
; NO-PADDING-NEXT: [[GEP_SRC_13:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 13
53+
; NO-PADDING-NEXT: [[L_SRC_13:%.*]] = load i8, ptr [[GEP_SRC_13]], align 4
54+
; NO-PADDING-NEXT: [[MUL_13:%.*]] = mul nsw i8 [[L_SRC_13]], 10
55+
; NO-PADDING-NEXT: [[DST_13:%.*]] = getelementptr i8, ptr [[DST]], i8 13
56+
; NO-PADDING-NEXT: store i8 [[MUL_13]], ptr [[DST_13]], align 1
57+
; NO-PADDING-NEXT: [[GEP_SRC_14:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 14
58+
; NO-PADDING-NEXT: [[L_SRC_14:%.*]] = load i8, ptr [[GEP_SRC_14]], align 4
59+
; NO-PADDING-NEXT: [[MUL_14:%.*]] = mul nsw i8 [[L_SRC_14]], 10
60+
; NO-PADDING-NEXT: [[DST_14:%.*]] = getelementptr i8, ptr [[DST]], i8 14
61+
; NO-PADDING-NEXT: store i8 [[MUL_14]], ptr [[DST_14]], align 1
62+
; NO-PADDING-NEXT: ret void
3363
;
3464
entry:
3565
%gep.src.0 = getelementptr inbounds i8, ptr %src, i8 0
@@ -123,5 +153,3 @@ entry:
123153

124154
ret void
125155
}
126-
127-

0 commit comments

Comments
 (0)