@@ -45,6 +45,7 @@ struct OmpSs : public ModulePass {
45
45
StructType *Ty = nullptr ;
46
46
};
47
47
TaskConstraintsTy TskConstraintsTy;
48
+
48
49
struct TaskInvInfoTy {
49
50
struct Members {
50
51
Type *InvSourceTy;
@@ -53,6 +54,7 @@ struct OmpSs : public ModulePass {
53
54
Members Mmbers;
54
55
};
55
56
TaskInvInfoTy TskInvInfoTy;
57
+
56
58
struct TaskImplInfoTy {
57
59
struct Members {
58
60
Type *DeviceTypeIdTy;
@@ -66,6 +68,7 @@ struct OmpSs : public ModulePass {
66
68
Members Mmbers;
67
69
};
68
70
TaskImplInfoTy TskImplInfoTy;
71
+
69
72
struct TaskInfoTy {
70
73
struct Members {
71
74
Type *NumSymbolsTy;
@@ -92,7 +95,7 @@ struct OmpSs : public ModulePass {
92
95
" nanos6_taskwait" , IRB.getVoidTy (), IRB.getInt8PtrTy ()));
93
96
// 2. Build String
94
97
// TODO: add debug info (line:col)
95
- Constant *Nanos6TaskwaitStr = IRB.CreateGlobalStringPtr (M.getModuleIdentifier ());
98
+ Constant *Nanos6TaskwaitStr = IRB.CreateGlobalStringPtr (M.getSourceFileName ());
96
99
97
100
// 3. Insert the call
98
101
IRB.CreateCall (Func, {Nanos6TaskwaitStr});
@@ -102,6 +105,7 @@ struct OmpSs : public ModulePass {
102
105
103
106
void lowerTask (const TaskInfo &TI,
104
107
Function &F,
108
+ size_t taskNum,
105
109
Module &M) {
106
110
// 1. Split BB
107
111
BasicBlock *EntryBB = TI.Entry ->getParent ();
@@ -135,7 +139,7 @@ struct OmpSs : public ModulePass {
135
139
for (Value *V : TI.DSAInfo .Firstprivate ) {
136
140
TaskArgsMemberTy.push_back (V->getType ()->getPointerElementType ());
137
141
}
138
- StructType *TaskArgsTy = StructType::create (M.getContext (), TaskArgsMemberTy, (" nanos6_task_args_" + F.getName ()).str ());
142
+ StructType *TaskArgsTy = StructType::create (M.getContext (), TaskArgsMemberTy, (" nanos6_task_args_" + F.getName () + Twine (taskNum) ).str ());
139
143
// Create nanos6_task_args_* END
140
144
141
145
// nanos6_unpacked_task_region_* START
@@ -155,7 +159,7 @@ struct OmpSs : public ModulePass {
155
159
156
160
Function *unpackFuncVar = Function::Create (
157
161
unpackFuncType, GlobalValue::InternalLinkage, F.getAddressSpace (),
158
- " nanos6_unpacked_task_region_" + F.getName (), &M);
162
+ " nanos6_unpacked_task_region_" + F.getName () + Twine (taskNum) , &M);
159
163
160
164
// Create an iterator to name all of the arguments we inserted.
161
165
Function::arg_iterator AI = unpackFuncVar->arg_begin ();
@@ -187,7 +191,7 @@ struct OmpSs : public ModulePass {
187
191
188
192
Function *outlineFuncVar = Function::Create (
189
193
outlineFuncType, GlobalValue::InternalLinkage, F.getAddressSpace (),
190
- " nanos6_ol_task_region_" + F.getName (), &M);
194
+ " nanos6_ol_task_region_" + F.getName () + Twine (taskNum) , &M);
191
195
192
196
BasicBlock *outlineEntryBB = BasicBlock::Create (M.getContext (), " entry" , outlineFuncVar);
193
197
@@ -225,27 +229,28 @@ struct OmpSs : public ModulePass {
225
229
}
226
230
TaskUnpackParams.push_back (&*AI++);
227
231
TaskUnpackParams.push_back (&*AI++);
228
- Instruction *TaskUnpackCall =
229
- BBBuilder.CreateCall (unpackFuncVar, TaskUnpackParams);
230
- ReturnInst *TaskOlRet = BBBuilder.CreateRetVoid ();
232
+ // Build TaskUnpackCall
233
+ BBBuilder.CreateCall (unpackFuncVar, TaskUnpackParams);
234
+ // Make BB legal with a terminator to task outline function
235
+ BBBuilder.CreateRetVoid ();
231
236
232
237
// nanos6_ol_task_region_* END
233
238
234
- // 0.1 Create Nanos6 task data structures info
235
- Constant *TaskInvInfoVar = M.getOrInsertGlobal ((" task_invocation_info_" + F.getName ()).str (),
239
+ // 3. Create Nanos6 task data structures info
240
+ Constant *TaskInvInfoVar = M.getOrInsertGlobal ((" task_invocation_info_" + F.getName () + Twine (taskNum) ).str (),
236
241
TskInvInfoTy.Ty ,
237
242
[&] {
238
243
GlobalVariable *GV = new GlobalVariable (M, TskInvInfoTy.Ty ,
239
244
false ,
240
245
GlobalVariable::InternalLinkage,
241
246
ConstantStruct::get (TskInvInfoTy.Ty ,
242
247
ConstantPointerNull::get (Type::getInt8PtrTy (M.getContext ()))),
243
- (" task_invocation_info_" + F.getName ()).str ());
248
+ (" task_invocation_info_" + F.getName () + Twine (taskNum) ).str ());
244
249
GV->setAlignment (64 );
245
250
return GV;
246
251
});
247
252
248
- Constant *TaskImplInfoVar = M.getOrInsertGlobal ((" implementations_var_" + F.getName ()).str (),
253
+ Constant *TaskImplInfoVar = M.getOrInsertGlobal ((" implementations_var_" + F.getName () + Twine (taskNum) ).str (),
249
254
ArrayType::get (TskImplInfoTy.Ty , 1 ),
250
255
[&] {
251
256
auto *outlineFuncCastTy = FunctionType::get (Type::getVoidTy (M.getContext ()),
@@ -264,13 +269,13 @@ struct OmpSs : public ModulePass {
264
269
ConstantPointerNull::get (TskImplInfoTy.Mmbers .TaskLabelTy ->getPointerTo ()),
265
270
ConstantPointerNull::get (TskImplInfoTy.Mmbers .DeclSourceTy ->getPointerTo ()),
266
271
ConstantPointerNull::get (TskImplInfoTy.Mmbers .RunWrapperFuncTy ->getPointerTo ()))),
267
- (" implementations_var_" + F.getName ()).str ());
272
+ (" implementations_var_" + F.getName () + Twine (taskNum) ).str ());
268
273
269
274
GV->setAlignment (64 );
270
275
return GV;
271
276
});
272
277
273
- Constant *TaskInfoVar = M.getOrInsertGlobal ((" task_info_var_" + F.getName ()).str (),
278
+ Constant *TaskInfoVar = M.getOrInsertGlobal ((" task_info_var_" + F.getName () + Twine (taskNum) ).str (),
274
279
TskInfoTy.Ty ,
275
280
[&] {
276
281
GlobalVariable *GV = new GlobalVariable (M, TskInfoTy.Ty ,
@@ -287,12 +292,13 @@ struct OmpSs : public ModulePass {
287
292
ConstantPointerNull::get (TskInfoTy.Mmbers .DuplicateArgsBlockFuncTy ->getPointerTo ()),
288
293
ConstantPointerNull::get (TskInfoTy.Mmbers .ReductInitsFuncTy ->getPointerTo ()),
289
294
ConstantPointerNull::get (TskInfoTy.Mmbers .ReductCombsFuncTy ->getPointerTo ())),
290
- (" task_info_var_" + F.getName ()).str ());
295
+ (" task_info_var_" + F.getName () + Twine (taskNum) ).str ());
291
296
292
297
GV->setAlignment (64 );
293
298
return GV;
294
299
});
295
300
301
+ // 4. Create nanos6_create_task nanos6_submit_task function types
296
302
Function *CreateTaskFuncTy = cast<Function>(M.getOrInsertFunction (" nanos6_create_task" ,
297
303
Type::getVoidTy (M.getContext ()),
298
304
TskInfoTy.Ty ->getPointerTo (),
@@ -307,7 +313,7 @@ struct OmpSs : public ModulePass {
307
313
Type::getVoidTy (M.getContext ()),
308
314
Type::getInt8PtrTy (M.getContext ())));
309
315
310
- auto constructOmpSsFunctions = [&](BasicBlock *header,
316
+ auto rewriteOutToInTaskBrAndGetOmpSsUnpackFunc = [&](BasicBlock *header,
311
317
BasicBlock *newRootNode,
312
318
BasicBlock *newHeader,
313
319
Function *oldFunction,
@@ -316,11 +322,9 @@ struct OmpSs : public ModulePass {
316
322
317
323
unpackFuncVar->getBasicBlockList ().push_back (newRootNode);
318
324
319
- // Rewrite branches to basic blocks outside of the loop to new dummy blocks
320
- // within the new function. This must be done before we lose track of which
321
- // blocks were originally in the code region.
322
- // ?? FIXME: Parece que esto se usa para cambiar los branches al codigo que movemos
323
- // Por ej. br label %codeRepl
325
+ // Rewrite branches from basic blocks outside of the task region to blocks
326
+ // inside the region to use the new label (newHeader) since the task region
327
+ // will be outlined
324
328
std::vector<User *> Users (header->user_begin (), header->user_end ());
325
329
for (unsigned i = 0 , e = Users.size (); i != e; ++i)
326
330
// The BasicBlock which contains the branch is not in the region
@@ -330,19 +334,20 @@ struct OmpSs : public ModulePass {
330
334
I->getParent ()->getParent () == oldFunction)
331
335
I->replaceUsesOfWith (header, newHeader);
332
336
333
-
334
337
return unpackFuncVar;
335
338
};
336
- auto emitCaptureAndCall = [&](Function *newFunction,
339
+ auto emitOmpSsCaptureAndSubmitTask = [&](Function *newFunction,
337
340
BasicBlock *codeReplacer,
338
341
const SetVector<BasicBlock *> &Blocks) {
339
342
340
343
IRBuilder<> IRB (codeReplacer);
341
344
Value *TaskArgsVar = IRB.CreateAlloca (TaskArgsTy->getPointerTo ());
342
345
Value *TaskArgsVarCast = IRB.CreateBitCast (TaskArgsVar, IRB.getInt8PtrTy ()->getPointerTo ());
346
+ // TODO: For now TaskFlagsVar is hardcoded
343
347
// Value *TaskFlagsVar = IRB.CreateAlloca(IRB.getInt64Ty());
344
348
// IRB.CreateStore(ConstantInt::get(IRB.getInt64Ty(), 0), TaskFlagsVar);
345
349
Value *TaskPtrVar = IRB.CreateAlloca (IRB.getInt8PtrTy ());
350
+ // TODO: For now TaskNumDepsVar is hardcoded
346
351
// Value *TaskNumDepsVar = IRB.CreateAlloca(IRB.getInt64Ty());
347
352
// IRB.CreateStore(ConstantInt::get(IRB.getInt64Ty(), 0), TaskNumDepsVar);
348
353
uint64_t TaskArgsSizeOf = M.getDataLayout ().getTypeAllocSize (TaskArgsTy);
@@ -363,7 +368,7 @@ struct OmpSs : public ModulePass {
363
368
Idx[1 ] = ConstantInt::get (IRB.getInt32Ty (), TaskArgsIdx);
364
369
Value *GEP = IRB.CreateGEP (
365
370
TaskArgsVarL, Idx, " gep_" + TI.DSAInfo .Shared [i]->getName ());
366
- Value *CaptureDSA = IRB.CreateStore (TI.DSAInfo .Shared [i], GEP);
371
+ IRB.CreateStore (TI.DSAInfo .Shared [i], GEP);
367
372
}
368
373
TaskArgsIdx += TI.DSAInfo .Private .size ();
369
374
for (unsigned i = 0 ; i < TI.DSAInfo .Firstprivate .size (); ++i, ++TaskArgsIdx) {
@@ -373,49 +378,42 @@ struct OmpSs : public ModulePass {
373
378
Value *GEP = IRB.CreateGEP (
374
379
TaskArgsVarL, Idx, " gep_" + TI.DSAInfo .Firstprivate [i]->getName ());
375
380
Value *FPValue = IRB.CreateLoad (TI.DSAInfo .Firstprivate [i]);
376
- Value *CaptureDSA = IRB.CreateStore (FPValue, GEP);
381
+ IRB.CreateStore (FPValue, GEP);
377
382
}
378
383
379
384
Value *TaskPtrVarL = IRB.CreateLoad (TaskPtrVar);
380
- IRB.CreateCall (TaskSubmitFuncTy, TaskPtrVarL);
381
-
382
- // Since there may be multiple exits from the original region, make the new
383
- // function return an unsigned, switch on that number. This loop iterates
384
- // over all of the blocks in the extracted region, updating any terminator
385
- // instructions in the to-be-extracted region that branch to blocks that are
386
- // not in the region to be extracted.
387
- std::map<BasicBlock *, BasicBlock *> ExitBlockMap;
385
+ CallInst *TaskSubmitFuncCall = IRB.CreateCall (TaskSubmitFuncTy, TaskPtrVarL);
388
386
389
- unsigned switchVal = 0 ;
387
+ // Add a branch to the next basic block after the task region
388
+ // and replace the terminator that exits the task region
389
+ // Since this is a single entry single exit region this should
390
+ // be done once.
391
+ bool DoneOnce = false ;
390
392
for (BasicBlock *Block : Blocks) {
391
393
Instruction *TI = Block->getTerminator ();
392
394
for (unsigned i = 0 , e = TI->getNumSuccessors (); i != e; ++i)
393
395
if (!Blocks.count (TI->getSuccessor (i))) {
396
+ assert (!DoneOnce && " More than one exit in task code" );
397
+ DoneOnce = true ;
398
+
394
399
BasicBlock *OldTarget = TI->getSuccessor (i);
395
400
401
+ // Create branch to next BB after the task region
396
402
IRB.CreateBr (OldTarget);
397
403
398
- // add a new basic block which returns the appropriate value
399
- BasicBlock *&NewTarget = ExitBlockMap[OldTarget];
400
- if (!NewTarget) {
401
- // If we don't already have an exit stub for this non-extracted
402
- // destination, create one now!
403
- NewTarget = BasicBlock::Create (M.getContext (),
404
- OldTarget->getName () + " .exitStub" ,
405
- newFunction);
406
-
407
- ReturnInst::Create (M.getContext (), nullptr , NewTarget);
408
- }
409
- // rewrite the original branch instruction with this new target
410
- TI->setSuccessor (i, NewTarget);
404
+ IRBuilder<> BNewTerminatorI (TI);
405
+ BNewTerminatorI.CreateRetVoid ();
411
406
}
407
+ if (DoneOnce)
408
+ TI->eraseFromParent ();
412
409
}
413
410
414
- return nullptr ;
411
+ return TaskSubmitFuncCall ;
415
412
};
416
- CodeExtractor CE (TaskBBs.getArrayRef (), constructOmpSsFunctions, emitCaptureAndCall);
417
413
418
- Function *OutF = CE.extractCodeRegion ();
414
+ // 4. Extract region the way we want
415
+ CodeExtractor CE (TaskBBs.getArrayRef (), rewriteOutToInTaskBrAndGetOmpSsUnpackFunc, emitOmpSsCaptureAndSubmitTask);
416
+ CE.extractCodeRegion ();
419
417
}
420
418
421
419
bool runOnModule (Module &M) override {
@@ -521,8 +519,9 @@ struct OmpSs : public ModulePass {
521
519
for (TaskwaitInfo& TwI : TwFI.PostOrder ) {
522
520
lowerTaskwait (TwI, M);
523
521
}
522
+ size_t taskNum = 0 ;
524
523
for (TaskInfo TI : TFI.PostOrder ) {
525
- lowerTask (TI, F, M);
524
+ lowerTask (TI, F, taskNum++, M);
526
525
}
527
526
528
527
}
0 commit comments