@@ -91,18 +91,16 @@ class AMDGPULibCalls {
91
91
// sqrt
92
92
bool fold_sqrt (FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
93
93
94
+ bool insertSinCos (CallInst *Sin, CallInst *Cos, IRBuilder<> &B,
95
+ const FuncInfo &FInfo);
96
+
94
97
// sin/cos
95
98
bool fold_sincos (FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
96
99
97
100
// __read_pipe/__write_pipe
98
101
bool fold_read_write_pipe (CallInst *CI, IRBuilder<> &B,
99
102
const FuncInfo &FInfo);
100
103
101
- // Get insertion point at entry.
102
- BasicBlock::iterator getEntryIns (CallInst * UI);
103
- // Insert an Alloc instruction.
104
- AllocaInst* insertAlloca (CallInst * UI, IRBuilder<> &B, const char *prefix);
105
-
106
104
// Get a scalar native builtin single argument FP function
107
105
FunctionCallee getNativeFunction (Module *M, const FuncInfo &FInfo);
108
106
@@ -1153,6 +1151,71 @@ bool AMDGPULibCalls::fold_sqrt(FPMathOperator *FPOp, IRBuilder<> &B,
1153
1151
return false ;
1154
1152
}
1155
1153
1154
+ bool AMDGPULibCalls::insertSinCos (CallInst *Sin, CallInst *Cos, IRBuilder<> &B,
1155
+ const FuncInfo &fInfo ) {
1156
+ Value *Arg = Sin->getOperand (0 );
1157
+ assert (Arg == Cos->getOperand (0 ));
1158
+
1159
+ Function *F = B.GetInsertBlock ()->getParent ();
1160
+ Module *M = F->getParent ();
1161
+ // Merge the sin and cos.
1162
+
1163
+ // for OpenCL 2.0 we have only generic implementation of sincos
1164
+ // function.
1165
+ // FIXME: This is not true anymore
1166
+ AMDGPULibFunc nf (AMDGPULibFunc::EI_SINCOS, fInfo );
1167
+ nf.getLeads ()[0 ].PtrKind =
1168
+ AMDGPULibFunc::getEPtrKindFromAddrSpace (AMDGPUAS::FLAT_ADDRESS);
1169
+ FunctionCallee Fsincos = getFunction (M, nf);
1170
+ if (!Fsincos)
1171
+ return false ;
1172
+
1173
+ B.SetInsertPointPastAllocas (F);
1174
+
1175
+ DILocation *MergedDebugLoc =
1176
+ DILocation::getMergedLocation (Sin->getDebugLoc (), Cos->getDebugLoc ());
1177
+ B.SetCurrentDebugLocation (MergedDebugLoc);
1178
+
1179
+ AllocaInst *Alloc = B.CreateAlloca (Sin->getType (), nullptr , " __sincos_" );
1180
+
1181
+ if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
1182
+ // If the argument is an instruction, it must dominate all uses so put our
1183
+ // sincos call there. Otherwise, right after the allocas works well enough
1184
+ // if it's an argument or constant.
1185
+
1186
+ B.SetInsertPoint (ArgInst->getParent (), ++ArgInst->getIterator ());
1187
+ B.SetCurrentDebugLocation (MergedDebugLoc);
1188
+ }
1189
+
1190
+ Value *P = Alloc;
1191
+ Type *PTy = Fsincos.getFunctionType ()->getParamType (1 );
1192
+ // The allocaInst allocates the memory in private address space. This need
1193
+ // to be bitcasted to point to the address space of cos pointer type.
1194
+ // In OpenCL 2.0 this is generic, while in 1.2 that is private.
1195
+ if (PTy->getPointerAddressSpace () != AMDGPUAS::PRIVATE_ADDRESS)
1196
+ P = B.CreateAddrSpaceCast (Alloc, PTy);
1197
+
1198
+ // Intersect the two sets of flags.
1199
+ FastMathFlags FMF = cast<FPMathOperator>(Sin)->getFastMathFlags ();
1200
+ FMF &= cast<FPMathOperator>(Cos)->getFastMathFlags ();
1201
+ B.setFastMathFlags (FMF);
1202
+
1203
+ CallInst *Call = CreateCallEx2 (B, Fsincos, Arg, P);
1204
+ LoadInst *Reload = B.CreateLoad (Alloc->getAllocatedType (), Alloc);
1205
+ Reload->setDebugLoc (Cos->getDebugLoc ());
1206
+
1207
+ LLVM_DEBUG (errs () << " AMDIC: fold_sincos (" << *Sin << " , " << *Cos
1208
+ << " ) with " << *Call << ' \n ' );
1209
+
1210
+ Sin->replaceAllUsesWith (Call);
1211
+ Sin->eraseFromParent ();
1212
+
1213
+ Cos->replaceAllUsesWith (Reload);
1214
+ Cos->eraseFromParent ();
1215
+
1216
+ return true ;
1217
+ }
1218
+
1156
1219
// fold sin, cos -> sincos.
1157
1220
bool AMDGPULibCalls::fold_sincos (FPMathOperator *FPOp, IRBuilder<> &B,
1158
1221
const FuncInfo &fInfo ) {
@@ -1168,106 +1231,33 @@ bool AMDGPULibCalls::fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B,
1168
1231
1169
1232
Value *CArgVal = FPOp->getOperand (0 );
1170
1233
CallInst *CI = cast<CallInst>(FPOp);
1171
- BasicBlock * const CBB = CI->getParent ();
1172
-
1173
- int const MaxScan = 30 ;
1174
1234
bool Changed = false ;
1175
1235
1176
- Module *M = CI->getModule ();
1177
1236
FuncInfo PartnerInfo (isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN,
1178
1237
fInfo );
1179
1238
const std::string PairName = PartnerInfo.mangle ();
1180
1239
1181
1240
CallInst *UI = nullptr ;
1241
+
1242
+ // TODO: Handle repeated uses, the generic implementation does.
1182
1243
for (User* U : CArgVal->users ()) {
1183
- CallInst *XI = dyn_cast_or_null <CallInst>(U);
1184
- if (!XI || XI == CI || XI-> getParent () != CBB )
1244
+ CallInst *XI = dyn_cast <CallInst>(U);
1245
+ if (!XI || XI-> isNoBuiltin () )
1185
1246
continue ;
1186
1247
1187
1248
Function *UCallee = XI->getCalledFunction ();
1188
- if (!UCallee || !UCallee->getName ().equals (PairName))
1189
- continue ;
1190
-
1191
- BasicBlock::iterator BBI = CI->getIterator ();
1192
- if (BBI == CI->getParent ()->begin ())
1193
- break ;
1194
- --BBI;
1195
- for (int I = MaxScan; I > 0 && BBI != CBB->begin (); --BBI, --I) {
1196
- if (cast<Instruction>(BBI) == XI) {
1197
- UI = XI;
1198
- break ;
1199
- }
1200
- }
1201
- if (UI) break ;
1249
+ if (UCallee && UCallee->getName ().equals (PairName))
1250
+ UI = XI;
1251
+ else if (UI)
1252
+ return Changed;
1202
1253
}
1203
1254
1204
1255
if (!UI)
1205
1256
return Changed;
1206
1257
1207
- // Merge the sin and cos.
1208
-
1209
- // for OpenCL 2.0 we have only generic implementation of sincos
1210
- // function.
1211
- AMDGPULibFunc nf (AMDGPULibFunc::EI_SINCOS, fInfo );
1212
- nf.getLeads ()[0 ].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace (AMDGPUAS::FLAT_ADDRESS);
1213
- FunctionCallee Fsincos = getFunction (M, nf);
1214
- if (!Fsincos)
1215
- return Changed;
1216
-
1217
- BasicBlock::iterator ItOld = B.GetInsertPoint ();
1218
- AllocaInst *Alloc = insertAlloca (UI, B, " __sincos_" );
1219
- B.SetInsertPoint (UI);
1220
-
1221
- Value *P = Alloc;
1222
- Type *PTy = Fsincos.getFunctionType ()->getParamType (1 );
1223
- // The allocaInst allocates the memory in private address space. This need
1224
- // to be bitcasted to point to the address space of cos pointer type.
1225
- // In OpenCL 2.0 this is generic, while in 1.2 that is private.
1226
- if (PTy->getPointerAddressSpace () != AMDGPUAS::PRIVATE_ADDRESS)
1227
- P = B.CreateAddrSpaceCast (Alloc, PTy);
1228
- CallInst *Call = CreateCallEx2 (B, Fsincos, UI->getArgOperand (0 ), P);
1229
-
1230
- LLVM_DEBUG (errs () << " AMDIC: fold_sincos (" << *CI << " , " << *UI << " ) with "
1231
- << *Call << " \n " );
1232
-
1233
- if (!isSin) { // CI->cos, UI->sin
1234
- B.SetInsertPoint (&*ItOld);
1235
- UI->replaceAllUsesWith (&*Call);
1236
- Instruction *Reload = B.CreateLoad (Alloc->getAllocatedType (), Alloc);
1237
- CI->replaceAllUsesWith (Reload);
1238
- UI->eraseFromParent ();
1239
- CI->eraseFromParent ();
1240
- } else { // CI->sin, UI->cos
1241
- Instruction *Reload = B.CreateLoad (Alloc->getAllocatedType (), Alloc);
1242
- UI->replaceAllUsesWith (Reload);
1243
- CI->replaceAllUsesWith (Call);
1244
- UI->eraseFromParent ();
1245
- CI->eraseFromParent ();
1246
- }
1247
- return true ;
1248
- }
1249
-
1250
- // Get insertion point at entry.
1251
- BasicBlock::iterator AMDGPULibCalls::getEntryIns (CallInst * UI) {
1252
- Function * Func = UI->getParent ()->getParent ();
1253
- BasicBlock * BB = &Func->getEntryBlock ();
1254
- assert (BB && " Entry block not found!" );
1255
- BasicBlock::iterator ItNew = BB->begin ();
1256
- return ItNew;
1257
- }
1258
-
1259
- // Insert a AllocsInst at the beginning of function entry block.
1260
- AllocaInst* AMDGPULibCalls::insertAlloca (CallInst *UI, IRBuilder<> &B,
1261
- const char *prefix) {
1262
- BasicBlock::iterator ItNew = getEntryIns (UI);
1263
- Function *UCallee = UI->getCalledFunction ();
1264
- Type *RetType = UCallee->getReturnType ();
1265
- B.SetInsertPoint (&*ItNew);
1266
- AllocaInst *Alloc =
1267
- B.CreateAlloca (RetType, nullptr , std::string (prefix) + UI->getName ());
1268
- Alloc->setAlignment (
1269
- Align (UCallee->getParent ()->getDataLayout ().getTypeAllocSize (RetType)));
1270
- return Alloc;
1258
+ CallInst *Sin = isSin ? CI : UI;
1259
+ CallInst *Cos = isSin ? UI : CI;
1260
+ return insertSinCos (Sin, Cos, B, fInfo ) || Changed;
1271
1261
}
1272
1262
1273
1263
bool AMDGPULibCalls::evaluateScalarMathFunc (const FuncInfo &FInfo,
0 commit comments