Skip to content

Commit 3073c3c

Browse files
authored
[SDAG] Avoid creating redundant stack slots when lowering FSINCOS (#108401)
When lowering `FSINCOS` to a library call (that takes output pointers) we can avoid creating new stack allocations if the results of the `FSINCOS` are being stored. Instead, we can take the destination pointers from the stores and pass those to the library call. --- Note: As a NFC this also adds (and uses) `RTLIB::getFSINCOS()`.
1 parent db054a1 commit 3073c3c

File tree

4 files changed

+315
-55
lines changed

4 files changed

+315
-55
lines changed

llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ Libcall getLDEXP(EVT RetVT);
6262
/// UNKNOWN_LIBCALL if there is none.
6363
Libcall getFREXP(EVT RetVT);
6464

65+
/// getFSINCOS - Return the FSINCOS_* value for the given types, or
66+
/// UNKNOWN_LIBCALL if there is none.
67+
Libcall getFSINCOS(EVT RetVT);
68+
6569
/// Return the SYNC_FETCH_AND_* value for the given opcode and type, or
6670
/// UNKNOWN_LIBCALL if there is none.
6771
Libcall getSYNC(unsigned Opc, MVT VT);

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 51 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -2326,15 +2326,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
23262326

23272327
/// Return true if sincos libcall is available.
23282328
static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) {
2329-
RTLIB::Libcall LC;
2330-
switch (Node->getSimpleValueType(0).SimpleTy) {
2331-
default: llvm_unreachable("Unexpected request for libcall!");
2332-
case MVT::f32: LC = RTLIB::SINCOS_F32; break;
2333-
case MVT::f64: LC = RTLIB::SINCOS_F64; break;
2334-
case MVT::f80: LC = RTLIB::SINCOS_F80; break;
2335-
case MVT::f128: LC = RTLIB::SINCOS_F128; break;
2336-
case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break;
2337-
}
2329+
RTLIB::Libcall LC = RTLIB::getFSINCOS(Node->getSimpleValueType(0).SimpleTy);
23382330
return TLI.getLibcallName(LC) != nullptr;
23392331
}
23402332

@@ -2355,68 +2347,72 @@ static bool useSinCos(SDNode *Node) {
23552347
}
23562348

23572349
/// Issue libcalls to sincos to compute sin / cos pairs.
2358-
void
2359-
SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
2360-
SmallVectorImpl<SDValue> &Results) {
2361-
RTLIB::Libcall LC;
2362-
switch (Node->getSimpleValueType(0).SimpleTy) {
2363-
default: llvm_unreachable("Unexpected request for libcall!");
2364-
case MVT::f32: LC = RTLIB::SINCOS_F32; break;
2365-
case MVT::f64: LC = RTLIB::SINCOS_F64; break;
2366-
case MVT::f80: LC = RTLIB::SINCOS_F80; break;
2367-
case MVT::f128: LC = RTLIB::SINCOS_F128; break;
2368-
case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break;
2350+
void SelectionDAGLegalize::ExpandSinCosLibCall(
2351+
SDNode *Node, SmallVectorImpl<SDValue> &Results) {
2352+
EVT VT = Node->getValueType(0);
2353+
Type *Ty = VT.getTypeForEVT(*DAG.getContext());
2354+
RTLIB::Libcall LC = RTLIB::getFSINCOS(VT);
2355+
2356+
// Find users of the node that store the results (and share input chains). The
2357+
// destination pointers can be used instead of creating stack allocations.
2358+
SDValue StoresInChain{};
2359+
std::array<StoreSDNode *, 2> ResultStores = {nullptr};
2360+
for (SDNode *User : Node->uses()) {
2361+
if (!ISD::isNormalStore(User))
2362+
continue;
2363+
auto *ST = cast<StoreSDNode>(User);
2364+
if (!ST->isSimple() || ST->getAddressSpace() != 0 ||
2365+
ST->getAlign() < DAG.getDataLayout().getABITypeAlign(Ty) ||
2366+
(StoresInChain && ST->getChain() != StoresInChain) ||
2367+
Node->isPredecessorOf(ST->getChain().getNode()))
2368+
continue;
2369+
ResultStores[ST->getValue().getResNo()] = ST;
2370+
StoresInChain = ST->getChain();
23692371
}
23702372

2371-
// The input chain to this libcall is the entry node of the function.
2372-
// Legalizing the call will automatically add the previous call to the
2373-
// dependence.
2374-
SDValue InChain = DAG.getEntryNode();
2375-
2376-
EVT RetVT = Node->getValueType(0);
2377-
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
2378-
23792373
TargetLowering::ArgListTy Args;
2380-
TargetLowering::ArgListEntry Entry;
2374+
TargetLowering::ArgListEntry Entry{};
23812375

23822376
// Pass the argument.
23832377
Entry.Node = Node->getOperand(0);
2384-
Entry.Ty = RetTy;
2385-
Entry.IsSExt = false;
2386-
Entry.IsZExt = false;
2387-
Args.push_back(Entry);
2388-
2389-
// Pass the return address of sin.
2390-
SDValue SinPtr = DAG.CreateStackTemporary(RetVT);
2391-
Entry.Node = SinPtr;
2392-
Entry.Ty = PointerType::getUnqual(RetTy->getContext());
2393-
Entry.IsSExt = false;
2394-
Entry.IsZExt = false;
2378+
Entry.Ty = Ty;
23952379
Args.push_back(Entry);
23962380

2397-
// Also pass the return address of the cos.
2398-
SDValue CosPtr = DAG.CreateStackTemporary(RetVT);
2399-
Entry.Node = CosPtr;
2400-
Entry.Ty = PointerType::getUnqual(RetTy->getContext());
2401-
Entry.IsSExt = false;
2402-
Entry.IsZExt = false;
2403-
Args.push_back(Entry);
2381+
// Pass the output pointers for sin and cos.
2382+
SmallVector<SDValue, 2> ResultPtrs{};
2383+
for (StoreSDNode *ST : ResultStores) {
2384+
SDValue ResultPtr = ST ? ST->getBasePtr() : DAG.CreateStackTemporary(VT);
2385+
Entry.Node = ResultPtr;
2386+
Entry.Ty = PointerType::getUnqual(Ty->getContext());
2387+
Args.push_back(Entry);
2388+
ResultPtrs.push_back(ResultPtr);
2389+
}
24042390

2391+
SDLoc DL(Node);
2392+
SDValue InChain = StoresInChain ? StoresInChain : DAG.getEntryNode();
24052393
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
24062394
TLI.getPointerTy(DAG.getDataLayout()));
2407-
2408-
SDLoc dl(Node);
24092395
TargetLowering::CallLoweringInfo CLI(DAG);
2410-
CLI.setDebugLoc(dl).setChain(InChain).setLibCallee(
2396+
CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(
24112397
TLI.getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), Callee,
24122398
std::move(Args));
24132399

2414-
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
2400+
auto [Call, OutChain] = TLI.LowerCallTo(CLI);
24152401

2416-
Results.push_back(
2417-
DAG.getLoad(RetVT, dl, CallInfo.second, SinPtr, MachinePointerInfo()));
2418-
Results.push_back(
2419-
DAG.getLoad(RetVT, dl, CallInfo.second, CosPtr, MachinePointerInfo()));
2402+
for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) {
2403+
MachinePointerInfo PtrInfo;
2404+
if (StoreSDNode *ST = ResultStores[ResNo]) {
2405+
// Replace store with the library call.
2406+
DAG.ReplaceAllUsesOfValueWith(SDValue(ST, 0), OutChain);
2407+
PtrInfo = ST->getPointerInfo();
2408+
} else {
2409+
PtrInfo = MachinePointerInfo::getFixedStack(
2410+
DAG.getMachineFunction(),
2411+
cast<FrameIndexSDNode>(ResultPtr)->getIndex());
2412+
}
2413+
SDValue LoadResult = DAG.getLoad(VT, DL, OutChain, ResultPtr, PtrInfo);
2414+
Results.push_back(LoadResult);
2415+
}
24202416
}
24212417

24222418
SDValue SelectionDAGLegalize::expandLdexp(SDNode *Node) const {

llvm/lib/CodeGen/TargetLoweringBase.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,11 @@ RTLIB::Libcall RTLIB::getFREXP(EVT RetVT) {
400400
FREXP_PPCF128);
401401
}
402402

403+
RTLIB::Libcall RTLIB::getFSINCOS(EVT RetVT) {
404+
return getFPLibCall(RetVT, SINCOS_F32, SINCOS_F64, SINCOS_F80, SINCOS_F128,
405+
SINCOS_PPCF128);
406+
}
407+
403408
RTLIB::Libcall RTLIB::getOutlineAtomicHelper(const Libcall (&LC)[5][4],
404409
AtomicOrdering Order,
405410
uint64_t MemSize) {

0 commit comments

Comments
 (0)