Skip to content

Commit afab606

Browse files
committed
Build the loop that counts how many deps in a multidep are going to be registered.
For taskloop default to -1 Closes llvm#93
1 parent 7bb5b3d commit afab606

15 files changed

+967
-578
lines changed

llvm/lib/Transforms/OmpSs/OmpSsTransform.cpp

+73-36
Original file line numberDiff line numberDiff line change
@@ -2143,6 +2143,32 @@ struct OmpSs : public ModulePass {
21432143
// Set debug info from the task entry to all instructions
21442144
IRB.SetCurrentDebugLocation(DLoc);
21452145

2146+
// Add a branch to the next basic block after the task region
2147+
// and replace the terminator that exits the task region
2148+
// Since this is a single entry single exit region this should
2149+
// be done once.
2150+
BasicBlock *NewRetBB = nullptr;
2151+
for (BasicBlock *Block : Blocks) {
2152+
Instruction *DirInfo = Block->getTerminator();
2153+
for (unsigned i = 0, e = DirInfo->getNumSuccessors(); i != e; ++i)
2154+
if (!Blocks.count(DirInfo->getSuccessor(i))) {
2155+
assert(!NewRetBB && "More than one exit in task code");
2156+
2157+
BasicBlock *OldTarget = DirInfo->getSuccessor(i);
2158+
// Create branch to next BB after the task region
2159+
IRB.CreateBr(OldTarget);
2160+
2161+
NewRetBB = BasicBlock::Create(M.getContext(), ".exitStub", newFunction);
2162+
IRBuilder<> (NewRetBB).CreateRetVoid();
2163+
2164+
// rewrite the original branch instruction with this new target
2165+
DirInfo->setSuccessor(i, NewRetBB);
2166+
}
2167+
}
2168+
2169+
// Here we have a valid codeReplacer BasicBlock with its terminator
2170+
IRB.SetInsertPoint(codeReplacer->getTerminator());
2171+
21462172
AllocaInst *TaskArgsVar = IRB.CreateAlloca(TaskArgsTy->getPointerTo());
21472173
Value *TaskArgsVarCast = IRB.CreateBitCast(TaskArgsVar, IRB.getInt8PtrTy()->getPointerTo());
21482174
Value *TaskFlagsVar = computeTaskFlags(IRB, DirEnv);
@@ -2163,20 +2189,55 @@ struct OmpSs : public ModulePass {
21632189

21642190
Value *TaskArgsVLAsExtraSizeOf = computeTaskArgsVLAsExtraSizeOf(M, IRB, VLADimsInfo);
21652191
Value *TaskArgsSizeOf = IRB.CreateNUWAdd(TaskArgsStructSizeOf, TaskArgsVLAsExtraSizeOf);
2166-
int NumDependencies = DependsInfo.List.size();
2167-
for (auto &DepInfo : DependsInfo.List) {
2168-
if (isa<MultiDependInfo>(DepInfo.get())) {
2169-
// TODO: build loop to compute the amount of dependencies in
2170-
// multideps. Fallback to -1 if the task has some dependency
2171-
NumDependencies = -1;
2172-
break;
2192+
2193+
Instruction *NumDependencies = IRB.CreateAlloca(IRB.getInt64Ty(), nullptr, "num.deps");
2194+
if (DirEnv.isOmpSsTaskLoopDirective()) {
2195+
// If taskloop NumDeps = -1
2196+
IRB.CreateStore(IRB.getInt64(-1), NumDependencies);
2197+
} else {
2198+
IRB.CreateStore(IRB.getInt64(0), NumDependencies);
2199+
for (auto &DepInfo : DependsInfo.List) {
2200+
Instruction *NumDependenciesLoad = IRB.CreateLoad(NumDependencies);
2201+
Value *NumDependenciesIncr = IRB.CreateAdd(NumDependenciesLoad, IRB.getInt64(1));
2202+
Instruction *NumDependenciesStore = IRB.CreateStore(NumDependenciesIncr, NumDependencies);
2203+
if (const auto *MultiDepInfo = dyn_cast<MultiDependInfo>(DepInfo.get())) {
2204+
2205+
// Build a BasicBlock containing the num_deps increment
2206+
NumDependenciesLoad->getParent()->splitBasicBlock(NumDependenciesLoad);
2207+
Instruction *AfterNumDependenciesStore = NumDependenciesStore->getNextNode();
2208+
AfterNumDependenciesStore->getParent()->splitBasicBlock(AfterNumDependenciesStore);
2209+
2210+
// NOTE: after spliting IRBuilder is pointing to a bad BasicBlock.
2211+
// Set again the insert point
2212+
IRB.SetInsertPoint(AfterNumDependenciesStore);
2213+
2214+
Function *ComputeMultiDepFun = MultiDepInfo->ComputeMultiDepFun;
2215+
auto Args = MultiDepInfo->Args;
2216+
for (size_t i = 0; i < MultiDepInfo->Iters.size(); i++) {
2217+
Value *IndVar = MultiDepInfo->Iters[i];
2218+
auto LBoundGen = [ComputeMultiDepFun, &Args, i](IRBuilder<> &IRB) {
2219+
Value *ComputeMultiDepCall = IRB.CreateCall(ComputeMultiDepFun, Args);
2220+
return IRB.CreateExtractValue(ComputeMultiDepCall, i*(3 + 1) + 0);
2221+
};
2222+
auto RemapGen = [IndVar](IRBuilder<> &IRB) {
2223+
// We do not need remap here
2224+
return IRB.CreateLoad(IndVar);
2225+
};
2226+
auto UBoundGen = [ComputeMultiDepFun, &Args, i](IRBuilder<> &IRB) {
2227+
Value *ComputeMultiDepCall = IRB.CreateCall(ComputeMultiDepFun, Args);
2228+
return IRB.CreateExtractValue(ComputeMultiDepCall, i*(3 + 1) + 2);
2229+
};
2230+
auto IncrGen = [ComputeMultiDepFun, &Args, i](IRBuilder<> &IRB) {
2231+
Value *ComputeMultiDepCall = IRB.CreateCall(ComputeMultiDepFun, Args);
2232+
return IRB.CreateExtractValue(ComputeMultiDepCall, i*(3 + 1) + 3);
2233+
};
2234+
buildLoopForMultiDep(
2235+
M, F, NumDependenciesLoad, NumDependenciesStore, IndVar, LBoundGen, RemapGen, UBoundGen, IncrGen);
2236+
}
2237+
}
21732238
}
21742239
}
21752240

2176-
// If taskloop NumDeps = -1
2177-
if (DirEnv.isOmpSsTaskLoopDirective())
2178-
NumDependencies = -1;
2179-
21802241
// Store label if it's not a string literal (i.e label("L1"))
21812242
if (DirEnv.Label && !isa<Constant>(DirEnv.Label)) {
21822243
Value *Idx[3];
@@ -2193,8 +2254,7 @@ struct OmpSs : public ModulePass {
21932254
TaskArgsVarCast,
21942255
TaskPtrVar,
21952256
TaskFlagsVar,
2196-
ConstantInt::get(IRB.getInt64Ty(),
2197-
NumDependencies)});
2257+
IRB.CreateLoad(NumDependencies)});
21982258

21992259
// DSA capture
22002260
Value *TaskArgsVarL = IRB.CreateLoad(TaskArgsVar);
@@ -2398,29 +2458,6 @@ struct OmpSs : public ModulePass {
23982458
}
23992459

24002460
CallInst *TaskSubmitFuncCall = IRB.CreateCall(TaskSubmitFuncCallee, TaskPtrVarL);
2401-
// Add a branch to the next basic block after the task region
2402-
// and replace the terminator that exits the task region
2403-
// Since this is a single entry single exit region this should
2404-
// be done once.
2405-
BasicBlock *NewRetBB = nullptr;
2406-
for (BasicBlock *Block : Blocks) {
2407-
Instruction *DirInfo = Block->getTerminator();
2408-
for (unsigned i = 0, e = DirInfo->getNumSuccessors(); i != e; ++i)
2409-
if (!Blocks.count(DirInfo->getSuccessor(i))) {
2410-
assert(!NewRetBB && "More than one exit in task code");
2411-
2412-
BasicBlock *OldTarget = DirInfo->getSuccessor(i);
2413-
// Create branch to next BB after the task region
2414-
IRB.CreateBr(OldTarget);
2415-
2416-
NewRetBB = BasicBlock::Create(M.getContext(), ".exitStub", newFunction);
2417-
IRBuilder<> (NewRetBB).CreateRetVoid();
2418-
2419-
// rewrite the original branch instruction with this new target
2420-
DirInfo->setSuccessor(i, NewRetBB);
2421-
}
2422-
}
2423-
24242461
return TaskSubmitFuncCall;
24252462
};
24262463

llvm/test/Transforms/OmpSs/loop_clauses.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,10 @@ entry:
3737
ret void, !dbg !20
3838
}
3939

40-
; CHECK: call void @nanos6_register_loop_bounds(i8* %5, i64 0, i64 10, i64 0, i64 777)
41-
; CHECK: call void @nanos6_register_loop_bounds(i8* %11, i64 0, i64 10, i64 0, i64 777)
42-
; CHECK: call void @nanos6_register_loop_bounds(i8* %17, i64 0, i64 10, i64 777, i64 0)
43-
; CHECK: call void @nanos6_register_loop_bounds(i8* %23, i64 0, i64 10, i64 777, i64 0)
40+
; CHECK: call void @nanos6_register_loop_bounds(i8* {{%.*}}, i64 0, i64 10, i64 0, i64 777)
41+
; CHECK: call void @nanos6_register_loop_bounds(i8* {{%.*}}, i64 0, i64 10, i64 0, i64 777)
42+
; CHECK: call void @nanos6_register_loop_bounds(i8* {{%.*}}, i64 0, i64 10, i64 777, i64 0)
43+
; CHECK: call void @nanos6_register_loop_bounds(i8* {{%.*}}, i64 0, i64 10, i64 777, i64 0)
4444

4545
; Function Attrs: nounwind
4646
declare token @llvm.directive.region.entry() #1

llvm/test/Transforms/OmpSs/loop_directives_num_deps.ll

+15-3
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,21 @@ entry:
3838
call void @llvm.directive.region.exit(token %2), !dbg !11
3939
ret void, !dbg !12
4040
}
41-
; CHECK: call void @nanos6_create_task(%nanos6_task_info_t* @task_info_var_foo0, %nanos6_task_invocation_info_t* @task_invocation_info_foo0, i64 16, i8** %1, i8** %2, i64 8, i64 0)
42-
; CHECK: call void @nanos6_create_task(%nanos6_task_info_t* @task_info_var_foo1, %nanos6_task_invocation_info_t* @task_invocation_info_foo1, i64 16, i8** %7, i8** %8, i64 4, i64 -1)
43-
; CHECK: call void @nanos6_create_task(%nanos6_task_info_t* @task_info_var_foo2, %nanos6_task_invocation_info_t* @task_invocation_info_foo2, i64 16, i8** %13, i8** %14, i64 12, i64 -1)
41+
42+
; CHECK: %num.deps = alloca i64, align 8
43+
; CHECK-NEXT: store i64 0, i64* %num.deps, align 8
44+
; CHECK-NEXT: %3 = load i64, i64* %num.deps, align 8
45+
; CHECK-NEXT: call void @nanos6_create_task(%nanos6_task_info_t* @task_info_var_foo0, %nanos6_task_invocation_info_t* @task_invocation_info_foo0, i64 16, i8** %1, i8** %2, i64 8, i64 %3)
46+
47+
; CHECK: %num.deps20 = alloca i64, align 8
48+
; CHECK-NEXT: store i64 -1, i64* %num.deps20, align 8
49+
; CHECK-NEXT: %10 = load i64, i64* %num.deps20, align 8
50+
; CHECK-NEXT: call void @nanos6_create_task(%nanos6_task_info_t* @task_info_var_foo1, %nanos6_task_invocation_info_t* @task_invocation_info_foo1, i64 16, i8** %8, i8** %9, i64 4, i64 %10)
51+
52+
; CHECK: %num.deps37 = alloca i64, align 8
53+
; CHECK-NEXT: store i64 -1, i64* %num.deps37, align 8
54+
; CHECK-NEXT: %17 = load i64, i64* %num.deps37, align 8
55+
; CHECK-NEXT: call void @nanos6_create_task(%nanos6_task_info_t* @task_info_var_foo2, %nanos6_task_invocation_info_t* @task_invocation_info_foo2, i64 16, i8** %15, i8** %16, i64 12, i64 %17)
4456

4557
; Function Attrs: nounwind
4658
declare token @llvm.directive.region.entry() #1

0 commit comments

Comments
 (0)