Skip to content

Commit 0e18e97

Browse files
committed
Now only taskloops whose loop iter is used in multideps have num_deps = -1
Closes llvm#101
1 parent 761694f commit 0e18e97

File tree

4 files changed

+113
-7
lines changed

4 files changed

+113
-7
lines changed

llvm/lib/Transforms/OmpSs/OmpSsTransform.cpp

+17-2
Original file line numberDiff line numberDiff line change
@@ -1726,6 +1726,19 @@ struct OmpSs : public ModulePass {
17261726
return OlConstraintsFuncVar;
17271727
}
17281728

1729+
bool hasMultidepUsingLoopIter(
1730+
const DirectiveLoopInfo &LoopInfo, const DirectiveDependsInfo &DependsInfo) {
1731+
for (auto &DepInfo : DependsInfo.List) {
1732+
if (const auto *MultiDepInfo = dyn_cast<MultiDependInfo>(DepInfo.get())) {
1733+
for (const auto *V : MultiDepInfo->Args) {
1734+
if (V == LoopInfo.IndVar)
1735+
return true;
1736+
}
1737+
}
1738+
}
1739+
return false;
1740+
}
1741+
17291742
Function *createPriorityOlFunc(
17301743
Module &M, Function &F, int taskNum,
17311744
const MapVector<Value *, size_t> &TaskArgsToStructIdxMap,
@@ -2153,6 +2166,7 @@ struct OmpSs : public ModulePass {
21532166
const DirectiveVLADimsInfo &VLADimsInfo = DirEnv.VLADimsInfo;
21542167
const DirectiveCapturedInfo &CapturedInfo = DirEnv.CapturedInfo;
21552168
const DirectiveDependsInfo &DependsInfo = DirEnv.DependsInfo;
2169+
const DirectiveLoopInfo &LoopInfo = DirEnv.LoopInfo;
21562170

21572171
IRBuilder<> IRB(codeReplacer);
21582172
// Set debug info from the task entry to all instructions
@@ -2210,8 +2224,9 @@ struct OmpSs : public ModulePass {
22102224
Instruction *NumDependencies = IRB.CreateAlloca(IRB.getInt64Ty(), nullptr, "num.deps");
22112225
PostMoveInstructions.push_back(NumDependencies);
22122226

2213-
if (DirEnv.isOmpSsTaskLoopDirective()) {
2214-
// If taskloop NumDeps = -1
2227+
if (DirEnv.isOmpSsTaskLoopDirective() && hasMultidepUsingLoopIter(LoopInfo, DependsInfo)) {
2228+
// If taskloop has a multidep using the loop iterator
2229+
// NumDeps = -1
22152230
IRB.CreateStore(IRB.getInt64(-1), NumDependencies);
22162231
} else {
22172232
IRB.CreateStore(IRB.getInt64(0), NumDependencies);

llvm/test/Transforms/OmpSs/loop_directives_num_deps.ll

+2-4
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@ source_filename = "loop_directives_num_deps.ll"
55
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
66
target triple = "x86_64-unknown-linux-gnu"
77

8-
; taskloop directives have -1 num dependencies
9-
108
; void foo(int lb, int ub, int step) {
119
; #pragma oss task for
1210
; for (int i = 0; i < 10; i += 1) {}
@@ -63,7 +61,7 @@ define void @foo(i32 %lb, i32 %ub, i32 %step) #0 !dbg !6 {
6361
; CHECK-NEXT: br label [[FINAL_COND12:%.*]], [[DBG10]]
6462
; CHECK: codeRepl17:
6563
; CHECK-NEXT: [[TMP11:%.*]] = bitcast %nanos6_task_args_foo1** [[TMP2]] to i8**, [[DBG10]]
66-
; CHECK-NEXT: store i64 -1, i64* [[NUM_DEPS20]], align 8, [[DBG10]]
64+
; CHECK-NEXT: store i64 0, i64* [[NUM_DEPS20]], align 8, [[DBG10]]
6765
; CHECK-NEXT: [[TMP12:%.*]] = load i64, i64* [[NUM_DEPS20]], align 8, [[DBG10]]
6866
; CHECK-NEXT: call void @nanos6_create_task(%nanos6_task_info_t* @task_info_var_foo1, %nanos6_task_invocation_info_t* @task_invocation_info_foo1, i64 16, i8** [[TMP11]], i8** [[TMP3]], i64 4, i64 [[TMP12]]), [[DBG10]]
6967
; CHECK-NEXT: [[TMP13:%.*]] = load %nanos6_task_args_foo1*, %nanos6_task_args_foo1** [[TMP2]], align 8, [[DBG10]]
@@ -84,7 +82,7 @@ define void @foo(i32 %lb, i32 %ub, i32 %step) #0 !dbg !6 {
8482
; CHECK-NEXT: br label [[FINAL_COND29:%.*]], [[DBG11]]
8583
; CHECK: codeRepl34:
8684
; CHECK-NEXT: [[TMP16:%.*]] = bitcast %nanos6_task_args_foo2** [[TMP4]] to i8**, [[DBG11]]
87-
; CHECK-NEXT: store i64 -1, i64* [[NUM_DEPS37]], align 8, [[DBG11]]
85+
; CHECK-NEXT: store i64 0, i64* [[NUM_DEPS37]], align 8, [[DBG11]]
8886
; CHECK-NEXT: [[TMP17:%.*]] = load i64, i64* [[NUM_DEPS37]], align 8, [[DBG11]]
8987
; CHECK-NEXT: call void @nanos6_create_task(%nanos6_task_info_t* @task_info_var_foo2, %nanos6_task_invocation_info_t* @task_invocation_info_foo2, i64 16, i8** [[TMP16]], i8** [[TMP5]], i64 12, i64 [[TMP17]]), [[DBG11]]
9088
; CHECK-NEXT: [[TMP18:%.*]] = load %nanos6_task_args_foo2*, %nanos6_task_args_foo2** [[TMP4]], align 8, [[DBG11]]

llvm/test/Transforms/OmpSs/taskloop_final_loop.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ define dso_local void @taskloop(i32 %lb, i32 %ub, i32 %step) #0 !dbg !6 {
4444
; CHECK-NEXT: br label [[FINAL_COND:%.*]], [[DBG8]]
4545
; CHECK: codeRepl:
4646
; CHECK-NEXT: [[TMP2:%.*]] = bitcast %nanos6_task_args_taskloop0** [[TMP0]] to i8**, [[DBG8]]
47-
; CHECK-NEXT: store i64 -1, i64* [[NUM_DEPS]], align 8, [[DBG8]]
47+
; CHECK-NEXT: store i64 0, i64* [[NUM_DEPS]], align 8, [[DBG8]]
4848
; CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[NUM_DEPS]], align 8, [[DBG8]]
4949
; CHECK-NEXT: call void @nanos6_create_task(%nanos6_task_info_t* @task_info_var_taskloop0, %nanos6_task_invocation_info_t* @task_invocation_info_taskloop0, i64 16, i8** [[TMP2]], i8** [[TMP1]], i64 4, i64 [[TMP3]]), [[DBG8]]
5050
; CHECK-NEXT: [[TMP4:%.*]] = load %nanos6_task_args_taskloop0*, %nanos6_task_args_taskloop0** [[TMP0]], align 8, [[DBG8]]

llvm/test/Transforms/OmpSs/taskloop_multideps.ll

+93
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
12
; RUN: opt %s -ompss-2 -S | FileCheck %s
23
; ModuleID = 'taskloop_multideps.ll'
34
source_filename = "taskloop_multideps.ll"
@@ -7,6 +8,9 @@ target triple = "x86_64-unknown-linux-gnu"
78
; This test checks we use nanos6 lower bound to build
89
; multidep loop and call to register dep
910

11+
; Also, taskloop having multideps using the loop iterator
12+
; means num_deps = -1
13+
1014
; int v[10];
1115
; int main() {
1216
; #pragma oss taskloop out( { v[i], i=0;j } )
@@ -20,6 +24,57 @@ target triple = "x86_64-unknown-linux-gnu"
2024

2125
; Function Attrs: noinline nounwind optnone
2226
define i32 @main() #0 !dbg !6 {
27+
; CHECK-LABEL: @main(
28+
; CHECK-NEXT: entry:
29+
; CHECK-NEXT: [[J:%.*]] = alloca i32, align 4
30+
; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
31+
; CHECK-NEXT: store i32 0, i32* [[J]], align 4, [[DBG9:!dbg !.*]]
32+
; CHECK-NEXT: store i32 0, i32* [[I]], align 4, [[DBG10:!dbg !.*]]
33+
; CHECK-NEXT: [[TMP0:%.*]] = alloca %nanos6_task_args_main0*, align 8, [[DBG9]]
34+
; CHECK-NEXT: [[TMP1:%.*]] = alloca i8*, align 8, [[DBG9]]
35+
; CHECK-NEXT: [[NUM_DEPS:%.*]] = alloca i64, align 8, [[DBG9]]
36+
; CHECK-NEXT: br label [[FINAL_COND:%.*]], [[DBG9]]
37+
; CHECK: codeRepl:
38+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast %nanos6_task_args_main0** [[TMP0]] to i8**, [[DBG9]]
39+
; CHECK-NEXT: store i64 -1, i64* [[NUM_DEPS]], align 8, [[DBG9]]
40+
; CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[NUM_DEPS]], align 8, [[DBG9]]
41+
; CHECK-NEXT: call void @nanos6_create_task(%nanos6_task_info_t* @task_info_var_main0, %nanos6_task_invocation_info_t* @task_invocation_info_main0, i64 32, i8** [[TMP2]], i8** [[TMP1]], i64 4, i64 [[TMP3]]), [[DBG9]]
42+
; CHECK-NEXT: [[TMP4:%.*]] = load %nanos6_task_args_main0*, %nanos6_task_args_main0** [[TMP0]], align 8, [[DBG9]]
43+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast %nanos6_task_args_main0* [[TMP4]] to i8*, [[DBG9]]
44+
; CHECK-NEXT: [[ARGS_END:%.*]] = getelementptr i8, i8* [[TMP5]], i64 32, [[DBG9]]
45+
; CHECK-NEXT: [[GEP_V:%.*]] = getelementptr [[NANOS6_TASK_ARGS_MAIN0:%.*]], %nanos6_task_args_main0* [[TMP4]], i32 0, i32 0, [[DBG9]]
46+
; CHECK-NEXT: store [10 x i32]* @v, [10 x i32]** [[GEP_V]], align 8, [[DBG9]]
47+
; CHECK-NEXT: [[CAPT_GEP_:%.*]] = getelementptr [[NANOS6_TASK_ARGS_MAIN0]], %nanos6_task_args_main0* [[TMP4]], i32 0, i32 3, [[DBG9]]
48+
; CHECK-NEXT: store i32 0, i32* [[CAPT_GEP_]], align 4, [[DBG9]]
49+
; CHECK-NEXT: [[CAPT_GEP_4:%.*]] = getelementptr [[NANOS6_TASK_ARGS_MAIN0]], %nanos6_task_args_main0* [[TMP4]], i32 0, i32 4, [[DBG9]]
50+
; CHECK-NEXT: store i32 10, i32* [[CAPT_GEP_4]], align 4, [[DBG9]]
51+
; CHECK-NEXT: [[CAPT_GEP_5:%.*]] = getelementptr [[NANOS6_TASK_ARGS_MAIN0]], %nanos6_task_args_main0* [[TMP4]], i32 0, i32 5, [[DBG9]]
52+
; CHECK-NEXT: store i32 1, i32* [[CAPT_GEP_5]], align 4, [[DBG9]]
53+
; CHECK-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP1]], align 8, [[DBG9]]
54+
; CHECK-NEXT: call void @nanos6_register_loop_bounds(i8* [[TMP6]], i64 0, i64 10, i64 0, i64 0), [[DBG9]]
55+
; CHECK-NEXT: call void @nanos6_submit_task(i8* [[TMP6]]), [[DBG9]]
56+
; CHECK-NEXT: br label [[FINAL_END:%.*]], [[DBG9]]
57+
; CHECK: final.end:
58+
; CHECK-NEXT: ret i32 0, [[DBG11:!dbg !.*]]
59+
; CHECK: final.then:
60+
; CHECK-NEXT: store i32 0, i32* [[J]], align 4, [[DBG9]]
61+
; CHECK-NEXT: br label [[FOR_COND:%.*]], [[DBG9]]
62+
; CHECK: for.cond:
63+
; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[J]], align 4, [[DBG9]]
64+
; CHECK-NEXT: [[TMP8:%.*]] = icmp slt i32 [[TMP7]], 10, [[DBG9]]
65+
; CHECK-NEXT: br i1 [[TMP8]], label [[FOR_BODY:%.*]], label [[FINAL_END]], [[DBG9]]
66+
; CHECK: for.body:
67+
; CHECK-NEXT: br label [[FOR_INCR:%.*]], [[DBG11]]
68+
; CHECK: for.incr:
69+
; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[J]], align 4, [[DBG9]]
70+
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 1, [[DBG9]]
71+
; CHECK-NEXT: store i32 [[TMP10]], i32* [[J]], align 4, [[DBG9]]
72+
; CHECK-NEXT: br label [[FOR_COND]], [[DBG9]]
73+
; CHECK: final.cond:
74+
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @nanos6_in_final(), [[DBG9]]
75+
; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0, [[DBG9]]
76+
; CHECK-NEXT: br i1 [[TMP12]], label [[FINAL_THEN:%.*]], label [[CODEREPL:%.*]], [[DBG9]]
77+
;
2378
entry:
2479
%j = alloca i32, align 4
2580
%i = alloca i32, align 4
@@ -37,6 +92,24 @@ declare token @llvm.directive.region.entry() #1
3792
declare void @llvm.directive.region.exit(token) #1
3893

3994
define internal %struct._depend_unpack_t @compute_dep(i32* %i, i32* %j) {
95+
; CHECK-LABEL: @compute_dep(
96+
; CHECK-NEXT: entry:
97+
; CHECK-NEXT: [[RETURN_VAL:%.*]] = alloca [[STRUCT__DEPEND_UNPACK_T:%.*]], align 4
98+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4, [[DBG10]]
99+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[J:%.*]], align 4, [[DBG10]]
100+
; CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[TMP1]]
101+
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], -1
102+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__DEPEND_UNPACK_T]], %struct._depend_unpack_t* [[RETURN_VAL]], i32 0, i32 0
103+
; CHECK-NEXT: store i32 0, i32* [[TMP4]], align 4
104+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__DEPEND_UNPACK_T]], %struct._depend_unpack_t* [[RETURN_VAL]], i32 0, i32 1
105+
; CHECK-NEXT: store i32 [[TMP0]], i32* [[TMP5]], align 4
106+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__DEPEND_UNPACK_T]], %struct._depend_unpack_t* [[RETURN_VAL]], i32 0, i32 2
107+
; CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP6]], align 4
108+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__DEPEND_UNPACK_T]], %struct._depend_unpack_t* [[RETURN_VAL]], i32 0, i32 3
109+
; CHECK-NEXT: store i32 1, i32* [[TMP7]], align 4
110+
; CHECK-NEXT: [[TMP8:%.*]] = load [[STRUCT__DEPEND_UNPACK_T]], %struct._depend_unpack_t* [[RETURN_VAL]], align 4
111+
; CHECK-NEXT: ret [[STRUCT__DEPEND_UNPACK_T]] [[TMP8]]
112+
;
40113
entry:
41114
%return.val = alloca %struct._depend_unpack_t, align 4
42115
%0 = load i32, i32* %i, align 4, !dbg !10
@@ -56,6 +129,26 @@ entry:
56129
}
57130

58131
define internal %struct._depend_unpack_t.0 @compute_dep.1(i32* %i, i32* %j, [10 x i32]* %v) {
132+
; CHECK-LABEL: @compute_dep.1(
133+
; CHECK-NEXT: entry:
134+
; CHECK-NEXT: [[RETURN_VAL:%.*]] = alloca [[STRUCT__DEPEND_UNPACK_T_0:%.*]], align 8
135+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4, [[DBG10]]
136+
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[TMP0]] to i64
137+
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], 1
138+
; CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[V:%.*]], i64 0, i64 0, [[DBG10]]
139+
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP1]], 4
140+
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], 4
141+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__DEPEND_UNPACK_T_0]], %struct._depend_unpack_t.0* [[RETURN_VAL]], i32 0, i32 0
142+
; CHECK-NEXT: store i32* [[ARRAYDECAY]], i32** [[TMP5]], align 8
143+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__DEPEND_UNPACK_T_0]], %struct._depend_unpack_t.0* [[RETURN_VAL]], i32 0, i32 1
144+
; CHECK-NEXT: store i64 40, i64* [[TMP6]], align 8
145+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__DEPEND_UNPACK_T_0]], %struct._depend_unpack_t.0* [[RETURN_VAL]], i32 0, i32 2
146+
; CHECK-NEXT: store i64 [[TMP3]], i64* [[TMP7]], align 8
147+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__DEPEND_UNPACK_T_0]], %struct._depend_unpack_t.0* [[RETURN_VAL]], i32 0, i32 3
148+
; CHECK-NEXT: store i64 [[TMP4]], i64* [[TMP8]], align 8
149+
; CHECK-NEXT: [[TMP9:%.*]] = load [[STRUCT__DEPEND_UNPACK_T_0]], %struct._depend_unpack_t.0* [[RETURN_VAL]], align 8
150+
; CHECK-NEXT: ret [[STRUCT__DEPEND_UNPACK_T_0]] [[TMP9]]
151+
;
59152
entry:
60153
%return.val = alloca %struct._depend_unpack_t.0, align 8
61154
%0 = load i32, i32* %i, align 4, !dbg !10

0 commit comments

Comments
 (0)