1
+ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1
2
; RUN: opt %s -ompss-2 -S | FileCheck %s
2
3
; ModuleID = 'taskloop_multideps.ll'
3
4
source_filename = "taskloop_multideps.ll"
@@ -7,6 +8,9 @@ target triple = "x86_64-unknown-linux-gnu"
7
8
; This test checks we use nanos6 lower bound to build
8
9
; multidep loop and call to register dep
9
10
11
+ ; Also, taskloop having multideps using the loop iterator
12
+ ; means num_deps = -1
13
+
10
14
; int v[10];
11
15
; int main() {
12
16
; #pragma oss taskloop out( { v[i], i=0;j } )
@@ -20,6 +24,57 @@ target triple = "x86_64-unknown-linux-gnu"
20
24
21
25
; Function Attrs: noinline nounwind optnone
22
26
define i32 @main () #0 !dbg !6 {
27
+ ; CHECK-LABEL: @main(
28
+ ; CHECK-NEXT: entry:
29
+ ; CHECK-NEXT: [[J:%.*]] = alloca i32, align 4
30
+ ; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
31
+ ; CHECK-NEXT: store i32 0, i32* [[J]], align 4, [[DBG9:!dbg !.*]]
32
+ ; CHECK-NEXT: store i32 0, i32* [[I]], align 4, [[DBG10:!dbg !.*]]
33
+ ; CHECK-NEXT: [[TMP0:%.*]] = alloca %nanos6_task_args_main0*, align 8, [[DBG9]]
34
+ ; CHECK-NEXT: [[TMP1:%.*]] = alloca i8*, align 8, [[DBG9]]
35
+ ; CHECK-NEXT: [[NUM_DEPS:%.*]] = alloca i64, align 8, [[DBG9]]
36
+ ; CHECK-NEXT: br label [[FINAL_COND:%.*]], [[DBG9]]
37
+ ; CHECK: codeRepl:
38
+ ; CHECK-NEXT: [[TMP2:%.*]] = bitcast %nanos6_task_args_main0** [[TMP0]] to i8**, [[DBG9]]
39
+ ; CHECK-NEXT: store i64 -1, i64* [[NUM_DEPS]], align 8, [[DBG9]]
40
+ ; CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[NUM_DEPS]], align 8, [[DBG9]]
41
+ ; CHECK-NEXT: call void @nanos6_create_task(%nanos6_task_info_t* @task_info_var_main0, %nanos6_task_invocation_info_t* @task_invocation_info_main0, i64 32, i8** [[TMP2]], i8** [[TMP1]], i64 4, i64 [[TMP3]]), [[DBG9]]
42
+ ; CHECK-NEXT: [[TMP4:%.*]] = load %nanos6_task_args_main0*, %nanos6_task_args_main0** [[TMP0]], align 8, [[DBG9]]
43
+ ; CHECK-NEXT: [[TMP5:%.*]] = bitcast %nanos6_task_args_main0* [[TMP4]] to i8*, [[DBG9]]
44
+ ; CHECK-NEXT: [[ARGS_END:%.*]] = getelementptr i8, i8* [[TMP5]], i64 32, [[DBG9]]
45
+ ; CHECK-NEXT: [[GEP_V:%.*]] = getelementptr [[NANOS6_TASK_ARGS_MAIN0:%.*]], %nanos6_task_args_main0* [[TMP4]], i32 0, i32 0, [[DBG9]]
46
+ ; CHECK-NEXT: store [10 x i32]* @v, [10 x i32]** [[GEP_V]], align 8, [[DBG9]]
47
+ ; CHECK-NEXT: [[CAPT_GEP_:%.*]] = getelementptr [[NANOS6_TASK_ARGS_MAIN0]], %nanos6_task_args_main0* [[TMP4]], i32 0, i32 3, [[DBG9]]
48
+ ; CHECK-NEXT: store i32 0, i32* [[CAPT_GEP_]], align 4, [[DBG9]]
49
+ ; CHECK-NEXT: [[CAPT_GEP_4:%.*]] = getelementptr [[NANOS6_TASK_ARGS_MAIN0]], %nanos6_task_args_main0* [[TMP4]], i32 0, i32 4, [[DBG9]]
50
+ ; CHECK-NEXT: store i32 10, i32* [[CAPT_GEP_4]], align 4, [[DBG9]]
51
+ ; CHECK-NEXT: [[CAPT_GEP_5:%.*]] = getelementptr [[NANOS6_TASK_ARGS_MAIN0]], %nanos6_task_args_main0* [[TMP4]], i32 0, i32 5, [[DBG9]]
52
+ ; CHECK-NEXT: store i32 1, i32* [[CAPT_GEP_5]], align 4, [[DBG9]]
53
+ ; CHECK-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP1]], align 8, [[DBG9]]
54
+ ; CHECK-NEXT: call void @nanos6_register_loop_bounds(i8* [[TMP6]], i64 0, i64 10, i64 0, i64 0), [[DBG9]]
55
+ ; CHECK-NEXT: call void @nanos6_submit_task(i8* [[TMP6]]), [[DBG9]]
56
+ ; CHECK-NEXT: br label [[FINAL_END:%.*]], [[DBG9]]
57
+ ; CHECK: final.end:
58
+ ; CHECK-NEXT: ret i32 0, [[DBG11:!dbg !.*]]
59
+ ; CHECK: final.then:
60
+ ; CHECK-NEXT: store i32 0, i32* [[J]], align 4, [[DBG9]]
61
+ ; CHECK-NEXT: br label [[FOR_COND:%.*]], [[DBG9]]
62
+ ; CHECK: for.cond:
63
+ ; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[J]], align 4, [[DBG9]]
64
+ ; CHECK-NEXT: [[TMP8:%.*]] = icmp slt i32 [[TMP7]], 10, [[DBG9]]
65
+ ; CHECK-NEXT: br i1 [[TMP8]], label [[FOR_BODY:%.*]], label [[FINAL_END]], [[DBG9]]
66
+ ; CHECK: for.body:
67
+ ; CHECK-NEXT: br label [[FOR_INCR:%.*]], [[DBG11]]
68
+ ; CHECK: for.incr:
69
+ ; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[J]], align 4, [[DBG9]]
70
+ ; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 1, [[DBG9]]
71
+ ; CHECK-NEXT: store i32 [[TMP10]], i32* [[J]], align 4, [[DBG9]]
72
+ ; CHECK-NEXT: br label [[FOR_COND]], [[DBG9]]
73
+ ; CHECK: final.cond:
74
+ ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @nanos6_in_final(), [[DBG9]]
75
+ ; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0, [[DBG9]]
76
+ ; CHECK-NEXT: br i1 [[TMP12]], label [[FINAL_THEN:%.*]], label [[CODEREPL:%.*]], [[DBG9]]
77
+ ;
23
78
entry:
24
79
%j = alloca i32 , align 4
25
80
%i = alloca i32 , align 4
@@ -37,6 +92,24 @@ declare token @llvm.directive.region.entry() #1
37
92
declare void @llvm.directive.region.exit (token) #1
38
93
39
94
define internal %struct._depend_unpack_t @compute_dep (i32* %i , i32* %j ) {
95
+ ; CHECK-LABEL: @compute_dep(
96
+ ; CHECK-NEXT: entry:
97
+ ; CHECK-NEXT: [[RETURN_VAL:%.*]] = alloca [[STRUCT__DEPEND_UNPACK_T:%.*]], align 4
98
+ ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4, [[DBG10]]
99
+ ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[J:%.*]], align 4, [[DBG10]]
100
+ ; CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[TMP1]]
101
+ ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], -1
102
+ ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__DEPEND_UNPACK_T]], %struct._depend_unpack_t* [[RETURN_VAL]], i32 0, i32 0
103
+ ; CHECK-NEXT: store i32 0, i32* [[TMP4]], align 4
104
+ ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__DEPEND_UNPACK_T]], %struct._depend_unpack_t* [[RETURN_VAL]], i32 0, i32 1
105
+ ; CHECK-NEXT: store i32 [[TMP0]], i32* [[TMP5]], align 4
106
+ ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__DEPEND_UNPACK_T]], %struct._depend_unpack_t* [[RETURN_VAL]], i32 0, i32 2
107
+ ; CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP6]], align 4
108
+ ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__DEPEND_UNPACK_T]], %struct._depend_unpack_t* [[RETURN_VAL]], i32 0, i32 3
109
+ ; CHECK-NEXT: store i32 1, i32* [[TMP7]], align 4
110
+ ; CHECK-NEXT: [[TMP8:%.*]] = load [[STRUCT__DEPEND_UNPACK_T]], %struct._depend_unpack_t* [[RETURN_VAL]], align 4
111
+ ; CHECK-NEXT: ret [[STRUCT__DEPEND_UNPACK_T]] [[TMP8]]
112
+ ;
40
113
entry:
41
114
%return.val = alloca %struct._depend_unpack_t , align 4
42
115
%0 = load i32 , i32* %i , align 4 , !dbg !10
@@ -56,6 +129,26 @@ entry:
56
129
}
57
130
58
131
define internal %struct._depend_unpack_t.0 @compute_dep.1 (i32* %i , i32* %j , [10 x i32 ]* %v ) {
132
+ ; CHECK-LABEL: @compute_dep.1(
133
+ ; CHECK-NEXT: entry:
134
+ ; CHECK-NEXT: [[RETURN_VAL:%.*]] = alloca [[STRUCT__DEPEND_UNPACK_T_0:%.*]], align 8
135
+ ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4, [[DBG10]]
136
+ ; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[TMP0]] to i64
137
+ ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], 1
138
+ ; CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[V:%.*]], i64 0, i64 0, [[DBG10]]
139
+ ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP1]], 4
140
+ ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], 4
141
+ ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__DEPEND_UNPACK_T_0]], %struct._depend_unpack_t.0* [[RETURN_VAL]], i32 0, i32 0
142
+ ; CHECK-NEXT: store i32* [[ARRAYDECAY]], i32** [[TMP5]], align 8
143
+ ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__DEPEND_UNPACK_T_0]], %struct._depend_unpack_t.0* [[RETURN_VAL]], i32 0, i32 1
144
+ ; CHECK-NEXT: store i64 40, i64* [[TMP6]], align 8
145
+ ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__DEPEND_UNPACK_T_0]], %struct._depend_unpack_t.0* [[RETURN_VAL]], i32 0, i32 2
146
+ ; CHECK-NEXT: store i64 [[TMP3]], i64* [[TMP7]], align 8
147
+ ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__DEPEND_UNPACK_T_0]], %struct._depend_unpack_t.0* [[RETURN_VAL]], i32 0, i32 3
148
+ ; CHECK-NEXT: store i64 [[TMP4]], i64* [[TMP8]], align 8
149
+ ; CHECK-NEXT: [[TMP9:%.*]] = load [[STRUCT__DEPEND_UNPACK_T_0]], %struct._depend_unpack_t.0* [[RETURN_VAL]], align 8
150
+ ; CHECK-NEXT: ret [[STRUCT__DEPEND_UNPACK_T_0]] [[TMP9]]
151
+ ;
59
152
entry:
60
153
%return.val = alloca %struct._depend_unpack_t.0 , align 8
61
154
%0 = load i32 , i32* %i , align 4 , !dbg !10
0 commit comments