|
15 | 15 | #include "llvm/Transforms/IPO/FunctionAttrs.h"
|
16 | 16 | #include "llvm/ADT/ArrayRef.h"
|
17 | 17 | #include "llvm/ADT/DenseMap.h"
|
| 18 | +#include "llvm/ADT/PostOrderIterator.h" |
18 | 19 | #include "llvm/ADT/SCCIterator.h"
|
19 | 20 | #include "llvm/ADT/STLExtras.h"
|
20 | 21 | #include "llvm/ADT/SetVector.h"
|
|
36 | 37 | #include "llvm/IR/Attributes.h"
|
37 | 38 | #include "llvm/IR/BasicBlock.h"
|
38 | 39 | #include "llvm/IR/Constant.h"
|
| 40 | +#include "llvm/IR/ConstantRangeList.h" |
39 | 41 | #include "llvm/IR/Constants.h"
|
40 | 42 | #include "llvm/IR/Function.h"
|
41 | 43 | #include "llvm/IR/InstIterator.h"
|
@@ -581,6 +583,200 @@ struct ArgumentUsesTracker : public CaptureTracker {
|
581 | 583 | const SCCNodeSet &SCCNodes;
|
582 | 584 | };
|
583 | 585 |
|
| 586 | +/// A struct of argument use: a Use and the offset it accesses. This struct |
| 587 | +/// is to track uses inside function via GEP. If GEP has a non-constant index, |
| 588 | +/// the Offset field is nullopt. |
| 589 | +struct ArgumentUse { |
| 590 | + Use *U; |
| 591 | + std::optional<int64_t> Offset; |
| 592 | +}; |
| 593 | + |
| 594 | +/// A struct of argument access info. "Unknown" accesses are the cases like |
| 595 | +/// unrecognized instructions, instructions that have more than one use of |
| 596 | +/// the argument, or volatile memory accesses. "WriteWithSideEffect" are call |
| 597 | +/// instructions that not only write an argument but also capture it. |
| 598 | +struct ArgumentAccessInfo { |
| 599 | + enum class AccessType : uint8_t { Write, WriteWithSideEffect, Read, Unknown }; |
| 600 | + AccessType ArgAccessType; |
| 601 | + ConstantRangeList AccessRanges; |
| 602 | +}; |
| 603 | + |
| 604 | +/// A struct to wrap the argument use info per block. |
| 605 | +struct UsesPerBlockInfo { |
| 606 | + SmallDenseMap<Instruction *, ArgumentAccessInfo, 4> Insts; |
| 607 | + bool HasWrites = false; |
| 608 | + bool HasUnknownAccess = false; |
| 609 | +}; |
| 610 | + |
| 611 | +/// A struct to summarize the argument use info in a function. |
| 612 | +struct ArgumentUsesSummary { |
| 613 | + bool HasAnyWrite = false; |
| 614 | + bool HasWriteOutsideEntryBB = false; |
| 615 | + SmallDenseMap<const BasicBlock *, UsesPerBlockInfo, 16> UsesPerBlock; |
| 616 | +}; |
| 617 | + |
| 618 | +ArgumentAccessInfo getArgmentAccessInfo(const Instruction *I, |
| 619 | + const ArgumentUse &ArgUse, |
| 620 | + const DataLayout &DL) { |
| 621 | + auto GetTypeAccessRange = |
| 622 | + [&DL](Type *Ty, |
| 623 | + std::optional<int64_t> Offset) -> std::optional<ConstantRange> { |
| 624 | + auto TypeSize = DL.getTypeStoreSize(Ty); |
| 625 | + if (!TypeSize.isScalable() && Offset) { |
| 626 | + int64_t Size = TypeSize.getFixedValue(); |
| 627 | + return ConstantRange(APInt(64, *Offset, true), |
| 628 | + APInt(64, *Offset + Size, true)); |
| 629 | + } |
| 630 | + return std::nullopt; |
| 631 | + }; |
| 632 | + auto GetConstantIntRange = |
| 633 | + [](Value *Length, |
| 634 | + std::optional<int64_t> Offset) -> std::optional<ConstantRange> { |
| 635 | + auto *ConstantLength = dyn_cast<ConstantInt>(Length); |
| 636 | + if (ConstantLength && Offset) |
| 637 | + return ConstantRange( |
| 638 | + APInt(64, *Offset, true), |
| 639 | + APInt(64, *Offset + ConstantLength->getSExtValue(), true)); |
| 640 | + return std::nullopt; |
| 641 | + }; |
| 642 | + if (auto *SI = dyn_cast<StoreInst>(I)) { |
| 643 | + if (SI->isSimple() && &SI->getOperandUse(1) == ArgUse.U) { |
| 644 | + // Get the fixed type size of "SI". Since the access range of a write |
| 645 | + // will be unioned, if "SI" doesn't have a fixed type size, we just set |
| 646 | + // the access range to empty. |
| 647 | + ConstantRangeList AccessRanges; |
| 648 | + if (auto TypeAccessRange = |
| 649 | + GetTypeAccessRange(SI->getAccessType(), ArgUse.Offset)) |
| 650 | + AccessRanges.insert(*TypeAccessRange); |
| 651 | + return {ArgumentAccessInfo::AccessType::Write, std::move(AccessRanges)}; |
| 652 | + } |
| 653 | + } else if (auto *LI = dyn_cast<LoadInst>(I)) { |
| 654 | + if (LI->isSimple()) { |
| 655 | + assert(&LI->getOperandUse(0) == ArgUse.U); |
| 656 | + // Get the fixed type size of "LI". Different from Write, if "LI" |
| 657 | + // doesn't have a fixed type size, we conservatively set as a clobber |
| 658 | + // with an empty access range. |
| 659 | + if (auto TypeAccessRange = |
| 660 | + GetTypeAccessRange(LI->getAccessType(), ArgUse.Offset)) |
| 661 | + return {ArgumentAccessInfo::AccessType::Read, {*TypeAccessRange}}; |
| 662 | + } |
| 663 | + } else if (auto *MemSet = dyn_cast<MemSetInst>(I)) { |
| 664 | + if (!MemSet->isVolatile()) { |
| 665 | + ConstantRangeList AccessRanges; |
| 666 | + if (auto AccessRange = |
| 667 | + GetConstantIntRange(MemSet->getLength(), ArgUse.Offset)) |
| 668 | + AccessRanges.insert(*AccessRange); |
| 669 | + return {ArgumentAccessInfo::AccessType::Write, AccessRanges}; |
| 670 | + } |
| 671 | + } else if (auto *MTI = dyn_cast<MemTransferInst>(I)) { |
| 672 | + if (!MTI->isVolatile()) { |
| 673 | + if (&MTI->getOperandUse(0) == ArgUse.U) { |
| 674 | + ConstantRangeList AccessRanges; |
| 675 | + if (auto AccessRange = |
| 676 | + GetConstantIntRange(MTI->getLength(), ArgUse.Offset)) |
| 677 | + AccessRanges.insert(*AccessRange); |
| 678 | + return {ArgumentAccessInfo::AccessType::Write, AccessRanges}; |
| 679 | + } else if (&MTI->getOperandUse(1) == ArgUse.U) { |
| 680 | + if (auto AccessRange = |
| 681 | + GetConstantIntRange(MTI->getLength(), ArgUse.Offset)) |
| 682 | + return {ArgumentAccessInfo::AccessType::Read, {*AccessRange}}; |
| 683 | + } |
| 684 | + } |
| 685 | + } else if (auto *CB = dyn_cast<CallBase>(I)) { |
| 686 | + if (CB->isArgOperand(ArgUse.U)) { |
| 687 | + unsigned ArgNo = CB->getArgOperandNo(ArgUse.U); |
| 688 | + bool IsInitialize = CB->paramHasAttr(ArgNo, Attribute::Initializes); |
| 689 | + // Argument is a Write when parameter is writeonly/readnone |
| 690 | + // and nocapture. Otherwise, it's a WriteWithSideEffect. |
| 691 | + auto Access = CB->onlyWritesMemory(ArgNo) && |
| 692 | + CB->paramHasAttr(ArgNo, Attribute::NoCapture) |
| 693 | + ? ArgumentAccessInfo::AccessType::Write |
| 694 | + : ArgumentAccessInfo::AccessType::WriteWithSideEffect; |
| 695 | + ConstantRangeList AccessRanges; |
| 696 | + if (IsInitialize && ArgUse.Offset) { |
| 697 | + Attribute Attr = CB->getParamAttr(ArgNo, Attribute::Initializes); |
| 698 | + ConstantRangeList CBCRL = Attr.getValueAsConstantRangeList(); |
| 699 | + for (ConstantRange &CR : CBCRL) |
| 700 | + AccessRanges.insert(ConstantRange(CR.getLower() + *ArgUse.Offset, |
| 701 | + CR.getUpper() + *ArgUse.Offset)); |
| 702 | + return {Access, AccessRanges}; |
| 703 | + } |
| 704 | + } |
| 705 | + } |
| 706 | + // Other unrecognized instructions are considered as unknown. |
| 707 | + return {ArgumentAccessInfo::AccessType::Unknown, {}}; |
| 708 | +} |
| 709 | + |
| 710 | +// Collect the uses of argument "A" in "F". |
| 711 | +ArgumentUsesSummary collectArgumentUsesPerBlock(Argument &A, Function &F) { |
| 712 | + auto &DL = F.getParent()->getDataLayout(); |
| 713 | + unsigned PointerSize = |
| 714 | + DL.getIndexSizeInBits(A.getType()->getPointerAddressSpace()); |
| 715 | + ArgumentUsesSummary Result; |
| 716 | + |
| 717 | + BasicBlock &EntryBB = F.getEntryBlock(); |
| 718 | + SmallVector<ArgumentUse, 4> Worklist; |
| 719 | + for (Use &U : A.uses()) |
| 720 | + Worklist.push_back({&U, 0}); |
| 721 | + |
| 722 | + // Update "UsesPerBlock" with the block of "I" as key and "Info" as value. |
| 723 | + // Return true if the block of "I" has write accesses after updating. |
| 724 | + auto UpdateUseInfo = [&Result](Instruction *I, ArgumentAccessInfo Info) { |
| 725 | + auto *BB = I->getParent(); |
| 726 | + auto &BBInfo = Result.UsesPerBlock[BB]; |
| 727 | + bool AlreadyVisitedInst = BBInfo.Insts.contains(I); |
| 728 | + auto &IInfo = BBInfo.Insts[I]; |
| 729 | + |
| 730 | + // Instructions that have more than one use of the argument are considered |
| 731 | + // as clobbers. |
| 732 | + if (AlreadyVisitedInst) { |
| 733 | + IInfo = {ArgumentAccessInfo::AccessType::Unknown, {}}; |
| 734 | + BBInfo.HasUnknownAccess = true; |
| 735 | + return false; |
| 736 | + } |
| 737 | + |
| 738 | + IInfo = std::move(Info); |
| 739 | + BBInfo.HasUnknownAccess |= |
| 740 | + IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Unknown; |
| 741 | + bool InfoHasWrites = |
| 742 | + (IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Write || |
| 743 | + IInfo.ArgAccessType == |
| 744 | + ArgumentAccessInfo::AccessType::WriteWithSideEffect) && |
| 745 | + !IInfo.AccessRanges.empty(); |
| 746 | + BBInfo.HasWrites |= InfoHasWrites; |
| 747 | + return InfoHasWrites; |
| 748 | + }; |
| 749 | + |
| 750 | + // No need for a visited set because we don't look through phis, so there are |
| 751 | + // no cycles. |
| 752 | + while (!Worklist.empty()) { |
| 753 | + ArgumentUse ArgUse = Worklist.pop_back_val(); |
| 754 | + User *U = ArgUse.U->getUser(); |
| 755 | + // Add GEP uses to worklist. |
| 756 | + // If the GEP is not a constant GEP, set the ArgumentUse::Offset to nullopt. |
| 757 | + if (auto *GEP = dyn_cast<GEPOperator>(U)) { |
| 758 | + std::optional<int64_t> NewOffset = std::nullopt; |
| 759 | + if (ArgUse.Offset) { |
| 760 | + APInt Offset(PointerSize, 0); |
| 761 | + if (GEP->accumulateConstantOffset(DL, Offset)) |
| 762 | + NewOffset = *ArgUse.Offset + Offset.getSExtValue(); |
| 763 | + } |
| 764 | + for (Use &U : GEP->uses()) |
| 765 | + Worklist.push_back({&U, NewOffset}); |
| 766 | + continue; |
| 767 | + } |
| 768 | + |
| 769 | + auto *I = cast<Instruction>(U); |
| 770 | + bool HasWrite = UpdateUseInfo(I, getArgmentAccessInfo(I, ArgUse, DL)); |
| 771 | + |
| 772 | + Result.HasAnyWrite |= HasWrite; |
| 773 | + |
| 774 | + if (HasWrite && I->getParent() != &EntryBB) |
| 775 | + Result.HasWriteOutsideEntryBB = true; |
| 776 | + } |
| 777 | + return Result; |
| 778 | +} |
| 779 | + |
584 | 780 | } // end anonymous namespace
|
585 | 781 |
|
586 | 782 | namespace llvm {
|
@@ -867,9 +1063,129 @@ static bool addAccessAttr(Argument *A, Attribute::AttrKind R) {
|
867 | 1063 | return true;
|
868 | 1064 | }
|
869 | 1065 |
|
| 1066 | +static bool inferInitializes(Argument &A, Function &F) { |
| 1067 | + auto ArgumentUses = collectArgumentUsesPerBlock(A, F); |
| 1068 | + // No write anywhere in the function, bail. |
| 1069 | + if (!ArgumentUses.HasAnyWrite) |
| 1070 | + return false; |
| 1071 | + |
| 1072 | + auto &UsesPerBlock = ArgumentUses.UsesPerBlock; |
| 1073 | + BasicBlock &EntryBB = F.getEntryBlock(); |
| 1074 | + // A map to store the argument ranges initialized by a BasicBlock (including |
| 1075 | + // its successors). |
| 1076 | + DenseMap<const BasicBlock *, ConstantRangeList> Initialized; |
| 1077 | + // Visit the successors of "BB" block and the instructions in BB (post-order) |
| 1078 | + // to get the argument ranges initialized by "BB" (including its successors). |
| 1079 | + // The result will be cached in "Initialized". |
| 1080 | + auto VisitBlock = [&](const BasicBlock *BB) -> ConstantRangeList { |
| 1081 | + auto UPB = UsesPerBlock.find(BB); |
| 1082 | + ConstantRangeList CRL; |
| 1083 | + |
| 1084 | + // Start with intersection of successors. |
| 1085 | + // If this block has any clobbering use, we're going to clear out the |
| 1086 | + // ranges at some point in this block anyway, so don't bother looking at |
| 1087 | + // successors. |
| 1088 | + if (UPB == UsesPerBlock.end() || !UPB->second.HasUnknownAccess) { |
| 1089 | + bool HasAddedSuccessor = false; |
| 1090 | + for (auto *Succ : successors(BB)) { |
| 1091 | + if (auto SuccI = Initialized.find(Succ); SuccI != Initialized.end()) { |
| 1092 | + if (HasAddedSuccessor) { |
| 1093 | + CRL = CRL.intersectWith(SuccI->second); |
| 1094 | + } else { |
| 1095 | + CRL = SuccI->second; |
| 1096 | + HasAddedSuccessor = true; |
| 1097 | + } |
| 1098 | + } else { |
| 1099 | + CRL = ConstantRangeList(); |
| 1100 | + break; |
| 1101 | + } |
| 1102 | + } |
| 1103 | + } |
| 1104 | + |
| 1105 | + if (UPB != UsesPerBlock.end()) { |
| 1106 | + // Sort uses in this block by instruction order. |
| 1107 | + SmallVector<std::pair<Instruction *, ArgumentAccessInfo>, 2> Insts; |
| 1108 | + append_range(Insts, UPB->second.Insts); |
| 1109 | + sort(Insts, [](std::pair<Instruction *, ArgumentAccessInfo> &LHS, |
| 1110 | + std::pair<Instruction *, ArgumentAccessInfo> &RHS) { |
| 1111 | + return LHS.first->comesBefore(RHS.first); |
| 1112 | + }); |
| 1113 | + |
| 1114 | + // From the end of the block to the beginning of the block, set |
| 1115 | + // initializes ranges. |
| 1116 | + for (auto &[_, Info] : reverse(Insts)) { |
| 1117 | + if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Unknown || |
| 1118 | + Info.ArgAccessType == |
| 1119 | + ArgumentAccessInfo::AccessType::WriteWithSideEffect) |
| 1120 | + CRL = ConstantRangeList(); |
| 1121 | + if (!Info.AccessRanges.empty()) { |
| 1122 | + if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Write || |
| 1123 | + Info.ArgAccessType == |
| 1124 | + ArgumentAccessInfo::AccessType::WriteWithSideEffect) { |
| 1125 | + CRL = CRL.unionWith(Info.AccessRanges); |
| 1126 | + } else { |
| 1127 | + assert(Info.ArgAccessType == ArgumentAccessInfo::AccessType::Read); |
| 1128 | + for (const auto &ReadRange : Info.AccessRanges) |
| 1129 | + CRL.subtract(ReadRange); |
| 1130 | + } |
| 1131 | + } |
| 1132 | + } |
| 1133 | + } |
| 1134 | + return CRL; |
| 1135 | + }; |
| 1136 | + |
| 1137 | + ConstantRangeList EntryCRL; |
| 1138 | + // If all write instructions are in the EntryBB, or if the EntryBB has |
| 1139 | + // a clobbering use, we only need to look at EntryBB. |
| 1140 | + bool OnlyScanEntryBlock = !ArgumentUses.HasWriteOutsideEntryBB; |
| 1141 | + if (!OnlyScanEntryBlock) |
| 1142 | + if (auto EntryUPB = UsesPerBlock.find(&EntryBB); |
| 1143 | + EntryUPB != UsesPerBlock.end()) |
| 1144 | + OnlyScanEntryBlock = EntryUPB->second.HasUnknownAccess; |
| 1145 | + if (OnlyScanEntryBlock) { |
| 1146 | + EntryCRL = VisitBlock(&EntryBB); |
| 1147 | + if (EntryCRL.empty()) |
| 1148 | + return false; |
| 1149 | + } else { |
| 1150 | + // Now we have to go through CFG to get the initialized argument ranges |
| 1151 | + // across blocks. With dominance and post-dominance, the initialized ranges |
| 1152 | + // by a block include both accesses inside this block and accesses in its |
| 1153 | + // (transitive) successors. So visit successors before predecessors with a |
| 1154 | + // post-order walk of the blocks and memorize the results in "Initialized". |
| 1155 | + for (const BasicBlock *BB : post_order(&F)) { |
| 1156 | + ConstantRangeList CRL = VisitBlock(BB); |
| 1157 | + if (!CRL.empty()) |
| 1158 | + Initialized[BB] = CRL; |
| 1159 | + } |
| 1160 | + |
| 1161 | + auto EntryCRLI = Initialized.find(&EntryBB); |
| 1162 | + if (EntryCRLI == Initialized.end()) |
| 1163 | + return false; |
| 1164 | + |
| 1165 | + EntryCRL = EntryCRLI->second; |
| 1166 | + } |
| 1167 | + |
| 1168 | + assert(!EntryCRL.empty() && |
| 1169 | + "should have bailed already if EntryCRL is empty"); |
| 1170 | + |
| 1171 | + if (A.hasAttribute(Attribute::Initializes)) { |
| 1172 | + ConstantRangeList PreviousCRL = |
| 1173 | + A.getAttribute(Attribute::Initializes).getValueAsConstantRangeList(); |
| 1174 | + if (PreviousCRL == EntryCRL) |
| 1175 | + return false; |
| 1176 | + EntryCRL = EntryCRL.unionWith(PreviousCRL); |
| 1177 | + } |
| 1178 | + |
| 1179 | + A.addAttr(Attribute::get(A.getContext(), Attribute::Initializes, |
| 1180 | + EntryCRL.rangesRef())); |
| 1181 | + |
| 1182 | + return true; |
| 1183 | +} |
| 1184 | + |
870 | 1185 | /// Deduce nocapture attributes for the SCC.
|
871 | 1186 | static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
|
872 |
| - SmallSet<Function *, 8> &Changed) { |
| 1187 | + SmallSet<Function *, 8> &Changed, |
| 1188 | + bool SkipInitializes) { |
873 | 1189 | ArgumentGraph AG;
|
874 | 1190 |
|
875 | 1191 | // Check each function in turn, determining which pointer arguments are not
|
@@ -937,6 +1253,10 @@ static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
|
937 | 1253 | if (addAccessAttr(&A, R))
|
938 | 1254 | Changed.insert(F);
|
939 | 1255 | }
|
| 1256 | + if (!SkipInitializes && !A.onlyReadsMemory()) { |
| 1257 | + if (inferInitializes(A, *F)) |
| 1258 | + Changed.insert(F); |
| 1259 | + } |
940 | 1260 | }
|
941 | 1261 | }
|
942 | 1262 |
|
@@ -1910,13 +2230,16 @@ deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter,
|
1910 | 2230 |
|
1911 | 2231 | SmallSet<Function *, 8> Changed;
|
1912 | 2232 | if (ArgAttrsOnly) {
|
1913 |
| - addArgumentAttrs(Nodes.SCCNodes, Changed); |
| 2233 | + // ArgAttrsOnly means to only infer attributes that may aid optimizations |
| 2234 | + // on the *current* function. "initializes" attribute is to aid |
| 2235 | + // optimizations (like DSE) on the callers, so skip "initializes" here. |
| 2236 | + addArgumentAttrs(Nodes.SCCNodes, Changed, /*SkipInitializes=*/true); |
1914 | 2237 | return Changed;
|
1915 | 2238 | }
|
1916 | 2239 |
|
1917 | 2240 | addArgumentReturnedAttrs(Nodes.SCCNodes, Changed);
|
1918 | 2241 | addMemoryAttrs(Nodes.SCCNodes, AARGetter, Changed);
|
1919 |
| - addArgumentAttrs(Nodes.SCCNodes, Changed); |
| 2242 | + addArgumentAttrs(Nodes.SCCNodes, Changed, /*SkipInitializes=*/false); |
1920 | 2243 | inferConvergent(Nodes.SCCNodes, Changed);
|
1921 | 2244 | addNoReturnAttrs(Nodes.SCCNodes, Changed);
|
1922 | 2245 | addColdAttrs(Nodes.SCCNodes, Changed);
|
|
0 commit comments