Skip to content

Commit 7748055

Browse files
committed
[RegAllocGreedy] New hook regClassPriorityTrumpsGlobalness
Add a new TargetRegisterInfo hook to allow targets to tweak the priority of live ranges, so that AllocationPriority of the register class will be treated as more important than whether the range is local to a basic block or global. This is determined per-MachineFunction. Differential Revision: https://reviews.llvm.org/D125102
1 parent 9e469ce commit 7748055

File tree

5 files changed

+82
-2
lines changed

5 files changed

+82
-2
lines changed

llvm/include/llvm/CodeGen/TargetRegisterInfo.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ class TargetRegisterClass {
5555
const LaneBitmask LaneMask;
5656
/// Classes with a higher priority value are assigned first by register
5757
/// allocators using a greedy heuristic. The value is in the range [0,63].
58+
/// Values >= 32 should be used with care since they may overlap with other
59+
/// fields in the allocator's priority heuristics.
5860
const uint8_t AllocationPriority;
5961
/// Configurable target specific flags.
6062
const uint8_t TSFlags;
@@ -1076,6 +1078,14 @@ class TargetRegisterInfo : public MCRegisterInfo {
10761078
return false;
10771079
}
10781080

1081+
/// When prioritizing live ranges in register allocation, if this hook returns
1082+
/// true then the AllocationPriority of the register class will be treated as
1083+
/// more important than whether the range is local to a basic block or global.
1084+
virtual bool
1085+
regClassPriorityTrumpsGlobalness(const MachineFunction &MF) const {
1086+
return false;
1087+
}
1088+
10791089
//===--------------------------------------------------------------------===//
10801090
/// Debug information queries.
10811091

llvm/include/llvm/Target/Target.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,8 @@ class RegisterClass<string namespace, list<ValueType> regTypes, int alignment,
279279
// heuristic. Classes with higher priority values are assigned first. This is
280280
// useful as it is sometimes beneficial to assign registers to highly
281281
// constrained classes first. The value has to be in the range [0,63].
282+
// Values >= 32 should be used with care since they may overlap with other
283+
// fields in the allocator's priority heuristics.
282284
int AllocationPriority = 0;
283285

284286
// Generate register pressure set for this register class and any class

llvm/lib/CodeGen/RegAllocGreedy.cpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,13 @@ static cl::opt<unsigned long> GrowRegionComplexityBudget(
128128
"limit its budget and bail out once we reach the limit."),
129129
cl::init(10000), cl::Hidden);
130130

131+
static cl::opt<bool> GreedyRegClassPriorityTrumpsGlobalness(
132+
"greedy-regclass-priority-trumps-globalness",
133+
cl::desc("Change the greedy register allocator's live range priority "
134+
"calculation to make the AllocationPriority of the register class "
135+
"more important then whether the range is global"),
136+
cl::Hidden);
137+
131138
static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
132139
createGreedyRegisterAllocator);
133140

@@ -305,6 +312,7 @@ void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) {
305312
const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
306313
bool ForceGlobal = !ReverseLocal &&
307314
(Size / SlotIndex::InstrDist) > (2 * RCI.getNumAllocatableRegs(&RC));
315+
unsigned GlobalBit = 0;
308316

309317
if (Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
310318
LIS->intervalIsInOneMBB(*LI)) {
@@ -323,9 +331,13 @@ void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) {
323331
// Allocate global and split ranges in long->short order. Long ranges that
324332
// don't fit should be spilled (or split) ASAP so they don't create
325333
// interference. Mark a bit to prioritize global above local ranges.
326-
Prio = (1u << 29) + Size;
334+
Prio = Size;
335+
GlobalBit = 1;
327336
}
328-
Prio |= RC.AllocationPriority << 24;
337+
if (RegClassPriorityTrumpsGlobalness)
338+
Prio |= RC.AllocationPriority << 25 | GlobalBit << 24;
339+
else
340+
Prio |= GlobalBit << 29 | RC.AllocationPriority << 24;
329341

330342
// Mark a higher bit to prioritize global and local above RS_Split.
331343
Prio |= (1u << 31);
@@ -2692,6 +2704,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
26922704
initializeCSRCost();
26932705

26942706
RegCosts = TRI->getRegisterCosts(*MF);
2707+
RegClassPriorityTrumpsGlobalness =
2708+
GreedyRegClassPriorityTrumpsGlobalness.getNumOccurrences()
2709+
? GreedyRegClassPriorityTrumpsGlobalness
2710+
: TRI->regClassPriorityTrumpsGlobalness(*MF);
26952711

26962712
ExtraInfo.emplace();
26972713
EvictAdvisor =

llvm/lib/CodeGen/RegAllocGreedy.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,10 @@ class LLVM_LIBRARY_VISIBILITY RAGreedy : public MachineFunctionPass,
322322
/// Function
323323
ArrayRef<uint8_t> RegCosts;
324324

325+
/// Flags for the live range priority calculation, determined once per
326+
/// machine function.
327+
bool RegClassPriorityTrumpsGlobalness;
328+
325329
public:
326330
RAGreedy(const RegClassFilterFunc F = allocateAllRegClasses);
327331

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# RUN: llc -march=amdgcn -mcpu=gfx1030 -greedy-regclass-priority-trumps-globalness=0 -start-before greedy -o - %s | FileCheck %s -check-prefix=OLD
2+
# RUN: llc -march=amdgcn -mcpu=gfx1030 -greedy-regclass-priority-trumps-globalness=1 -start-before greedy -o - %s | FileCheck %s -check-prefix=NEW
3+
4+
# At the time of writing -greedy-regclass-priority-trumps-globalness makes a
5+
# significant improvement in the total number of vgprs needed to compile this
6+
# test, from 11 down to 7.
7+
8+
# OLD: NumVgprs: 11{{$}}
9+
# NEW: NumVgprs: 7{{$}}
10+
11+
---
12+
name: _amdgpu_cs_main
13+
tracksRegLiveness: true
14+
body: |
15+
bb.0:
16+
successors: %bb.1, %bb.2
17+
liveins: $vgpr0, $vgpr6
18+
19+
%6:vgpr_32 = COPY $vgpr6
20+
undef %30.sub0:vreg_128 = COPY $vgpr0
21+
undef %27.sub0:vreg_128 = V_MED3_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
22+
undef %16.sub0:sgpr_256 = S_MOV_B32 0
23+
undef %26.sub1:vreg_64 = V_LSHRREV_B32_e32 1, %6, implicit $exec
24+
%27.sub1:vreg_128 = COPY %27.sub0
25+
%27.sub2:vreg_128 = COPY %27.sub0
26+
%27.sub3:vreg_128 = COPY %27.sub0
27+
%26.sub0:vreg_64 = V_MOV_B32_e32 1, implicit $exec
28+
%16.sub1:sgpr_256 = COPY %16.sub0
29+
%16.sub2:sgpr_256 = COPY %16.sub0
30+
%16.sub3:sgpr_256 = COPY %16.sub0
31+
%16.sub4:sgpr_256 = COPY %16.sub0
32+
%16.sub5:sgpr_256 = COPY %16.sub0
33+
%16.sub6:sgpr_256 = COPY %16.sub0
34+
%16.sub7:sgpr_256 = COPY %16.sub0
35+
IMAGE_STORE_V4_V2_gfx10 %27, %26, %16, 0, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "ImageResource")
36+
S_CBRANCH_SCC1 %bb.2, implicit undef $scc
37+
S_BRANCH %bb.1
38+
39+
bb.1:
40+
%30.sub1:vreg_128 = V_MOV_B32_e32 0, implicit $exec
41+
%30.sub2:vreg_128 = COPY %30.sub1
42+
%30.sub3:vreg_128 = COPY %30.sub1
43+
%26.sub1:vreg_64 = COPY %30.sub1
44+
IMAGE_STORE_V4_V2_gfx10 %30, %26, %16, 0, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "ImageResource")
45+
46+
bb.2:
47+
S_ENDPGM 0
48+
...

0 commit comments

Comments
 (0)