Skip to content

Commit 401f228

Browse files
committed
[ARM] Add pass for handling undef early-clobber values
When using Greedy Register Allocation, there are times where early-clobber values are ignored, and assigned the same register. This is illeagal behaviour for these intructions. To get around this, using Pseudo instructions for early-clobber registers gives them a definition and allows Greedy to assign them to a different register. This then meets the ARM Architecture Reference Manual and matches the defined behaviour. This patch takes a similar approach to the RISC-V pass added as part of #3b8c0b3 to fix issue 50157.
1 parent b6f9677 commit 401f228

File tree

8 files changed

+280
-0
lines changed

8 files changed

+280
-0
lines changed

llvm/lib/Target/ARM/ARM.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ FunctionPass *createARMSLSHardeningPass();
5757
FunctionPass *createARMIndirectThunks();
5858
Pass *createMVELaneInterleavingPass();
5959
FunctionPass *createARMFixCortexA57AES1742098Pass();
60+
FunctionPass *createARMInitUndefPass();
6061

6162
void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
6263
ARMAsmPrinter &AP);
@@ -67,6 +68,7 @@ void initializeARMConstantIslandsPass(PassRegistry &);
6768
void initializeARMDAGToDAGISelPass(PassRegistry &);
6869
void initializeARMExpandPseudoPass(PassRegistry &);
6970
void initializeARMFixCortexA57AES1742098Pass(PassRegistry &);
71+
void initializeARMInitUndefPass(PassRegistry &);
7072
void initializeARMLoadStoreOptPass(PassRegistry &);
7173
void initializeARMLowOverheadLoopsPass(PassRegistry &);
7274
void initializeARMParallelDSPPass(PassRegistry &);
@@ -80,6 +82,8 @@ void initializeMVEVPTBlockPass(PassRegistry &);
8082
void initializeThumb2ITBlockPass(PassRegistry &);
8183
void initializeThumb2SizeReducePass(PassRegistry &);
8284

85+
extern char &ARMInitUndefPass;
86+
8387
} // end namespace llvm
8488

8589
#endif // LLVM_LIB_TARGET_ARM_ARM_H

llvm/lib/Target/ARM/ARMAsmPrinter.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2409,6 +2409,9 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
24092409
case ARM::SEH_EpilogEnd:
24102410
ATS.emitARMWinCFIEpilogEnd();
24112411
return;
2412+
2413+
case ARM::PseudoARMInitUndef:
2414+
return;
24122415
}
24132416

24142417
MCInst TmpInst;

llvm/lib/Target/ARM/ARMInitUndef.cpp

Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
//===- ARMInitUndef.cpp - Initialize undef vector value to pseudo -------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This pass runs to allow for Undef values to be given a definition in
10+
// early-clobber instructions. This can occur when using higher levels of
11+
// optimisations. Before, undef values were ignored and it would result in
12+
// early-clobber instructions using the same registeres for the output as one of
13+
// the inputs. This is an illegal operation according to the ARM Architecture
14+
// Reference Manual. This pass will check for early-clobber instructions and
15+
// give any undef values a defined Pseudo value to ensure that the early-clobber
16+
// rules are followed.
17+
//
18+
// Example: Without this pass, for the vhcadd instruction the following would
19+
// be generated: `vhcadd.s32 q0, q0, q0, #270`. This is an illegal instruction
20+
// as the output register and 2nd input register cannot match. By using this
21+
// pass, and using the Pseudo instruction, the following will be generated.
22+
// `vhcadd.s32 q0, q1, q2, #270`. This is allowed, as the output register and qd
23+
// input register are different.
24+
//===----------------------------------------------------------------------===//
25+
26+
#include "ARM.h"
27+
#include "ARMBaseRegisterInfo.h"
28+
#include "ARMSubtarget.h"
29+
#include "llvm/ADT/bit.h"
30+
#include "llvm/CodeGen/DetectDeadLanes.h"
31+
#include "llvm/CodeGen/MachineBasicBlock.h"
32+
#include "llvm/CodeGen/MachineFunction.h"
33+
#include "llvm/CodeGen/MachineFunctionPass.h"
34+
#include "llvm/CodeGen/MachineInstr.h"
35+
#include "llvm/CodeGen/MachineInstrBuilder.h"
36+
#include "llvm/CodeGen/MachineOperand.h"
37+
#include "llvm/CodeGen/MachineRegisterInfo.h"
38+
#include "llvm/CodeGen/Register.h"
39+
#include "llvm/CodeGen/TargetInstrInfo.h"
40+
#include "llvm/CodeGen/TargetOpcodes.h"
41+
#include "llvm/CodeGen/TargetRegisterInfo.h"
42+
#include "llvm/Pass.h"
43+
#include "llvm/PassRegistry.h"
44+
#include "llvm/Support/Debug.h"
45+
#include <cstddef>
46+
#include <optional>
47+
48+
using namespace llvm;
49+
50+
#define DEBUG_TYPE "arm-init-undef"
51+
#define ARM_INIT_UNDEF_NAME "ARM init undef pass"
52+
53+
namespace {
54+
class ARMInitUndef : public MachineFunctionPass {
55+
const TargetInstrInfo *TII;
56+
MachineRegisterInfo *MRI;
57+
const ARMSubtarget *ST;
58+
const TargetRegisterInfo *TRI;
59+
60+
SmallSet<Register, 8> NewRegs;
61+
62+
public:
63+
static char ID;
64+
65+
ARMInitUndef() : MachineFunctionPass(ID) {
66+
initializeARMInitUndefPass(*PassRegistry::getPassRegistry());
67+
}
68+
bool runOnMachineFunction(MachineFunction &MF) override;
69+
70+
void getAnalysisUsage(AnalysisUsage &AU) const override {
71+
AU.setPreservesCFG();
72+
MachineFunctionPass::getAnalysisUsage(AU);
73+
}
74+
75+
StringRef getPassName() const override { return ARM_INIT_UNDEF_NAME; }
76+
77+
private:
78+
bool handleImplicitDef(MachineBasicBlock &MBB,
79+
MachineBasicBlock::iterator &Inst);
80+
bool isVectorRegClass(const Register R);
81+
const TargetRegisterClass *
82+
getVRLargestSuperClass(const TargetRegisterClass *RC) const;
83+
bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB,
84+
const DeadLaneDetector &DLD);
85+
};
86+
} // end anonymous namespace
87+
88+
char ARMInitUndef::ID = 0;
89+
INITIALIZE_PASS(ARMInitUndef, DEBUG_TYPE, ARM_INIT_UNDEF_NAME, false, false)
90+
char &llvm::ARMInitUndefPass = ARMInitUndef::ID;
91+
92+
static bool isEarlyClobberMI(MachineInstr &MI) {
93+
return llvm::any_of(MI.defs(), [](const MachineOperand &DefMO) {
94+
return DefMO.isReg() && DefMO.isEarlyClobber();
95+
});
96+
}
97+
98+
static unsigned getUndefInitOpcode(unsigned RegClassID) {
99+
if (RegClassID == ARM::MQPRRegClass.getID())
100+
return ARM::PseudoARMInitUndef;
101+
102+
llvm_unreachable("Unexpected register class.");
103+
}
104+
105+
/* handleImplicitDef is used to apply the definition to any undef values. This
106+
* is only done for instructions that are early-clobber, and are not tied to
107+
* other instructions. It will cycle through every MachineBasicBlock iterator
108+
* that is an IMPLICIT_DEF then check for if it is early-clobber, not tied and
109+
* is used. If all these are true, then the value is applied. This is then
110+
* changed to be defined at ARM_INIT_UNDEF_PSEUDO
111+
*/
112+
bool ARMInitUndef::handleImplicitDef(MachineBasicBlock &MBB,
113+
MachineBasicBlock::iterator &Inst) {
114+
bool Changed = false;
115+
Register LastReg;
116+
117+
while (Inst->getOpcode() == TargetOpcode::IMPLICIT_DEF &&
118+
Inst->getOperand(0).getReg() != LastReg) {
119+
120+
bool HasOtherUse = false;
121+
SmallVector<MachineOperand *, 1> UseMOs;
122+
LastReg = Inst->getOperand(0).getReg();
123+
124+
Register Reg = Inst->getOperand(0).getReg();
125+
if (!Reg.isVirtual())
126+
continue;
127+
128+
for (MachineOperand &MO : MRI->reg_operands(Reg)) {
129+
LLVM_DEBUG(dbgs() << "Register: " << MO.getReg() << "\n");
130+
LLVM_DEBUG(dbgs() << "MO " << MO << " is EarlyClobber "
131+
<< isEarlyClobberMI(*MO.getParent()) << "\n");
132+
if (isEarlyClobberMI(*MO.getParent())) {
133+
LLVM_DEBUG(dbgs() << "MO " << &MO << " is use " << MO.isUse()
134+
<< " MO is tied " << MO.isTied() << "\n");
135+
if (MO.isUse() && !MO.isTied()) {
136+
LLVM_DEBUG(dbgs() << "MO " << MO << " added\n");
137+
UseMOs.push_back(&MO);
138+
} else
139+
HasOtherUse = true;
140+
}
141+
}
142+
LLVM_DEBUG(dbgs() << "UseMOs " << &UseMOs << " is empty " << UseMOs.empty()
143+
<< "\n");
144+
if (UseMOs.empty())
145+
continue;
146+
147+
LLVM_DEBUG(
148+
dbgs() << "Emitting PseudoARMInitUndef for implicit vector register "
149+
<< Reg << '\n'
150+
<< '\n');
151+
152+
const TargetRegisterClass *TargetRegClass =
153+
getVRLargestSuperClass(MRI->getRegClass(Reg));
154+
unsigned Opcode = getUndefInitOpcode(TargetRegClass->getID());
155+
156+
Register NewDest = Reg;
157+
if (HasOtherUse) {
158+
NewDest = MRI->createVirtualRegister(TargetRegClass);
159+
NewRegs.insert(NewDest);
160+
}
161+
BuildMI(MBB, Inst, Inst->getDebugLoc(), TII->get(Opcode), NewDest);
162+
163+
if (!HasOtherUse)
164+
Inst = MBB.erase(Inst);
165+
166+
for (auto *MO : UseMOs) {
167+
MO->setReg(NewDest);
168+
MO->setIsUndef(false);
169+
}
170+
171+
Changed = true;
172+
}
173+
return Changed;
174+
}
175+
176+
bool ARMInitUndef::isVectorRegClass(Register R) {
177+
return ARM::MQPRRegClass.hasSubClassEq(MRI->getRegClass(R));
178+
}
179+
180+
const TargetRegisterClass *
181+
ARMInitUndef::getVRLargestSuperClass(const TargetRegisterClass *RC) const {
182+
if (ARM::MQPRRegClass.hasSubClassEq(RC))
183+
return &ARM::MQPRRegClass;
184+
return RC;
185+
}
186+
187+
/* This will process a BasicBlock within the MachineFunction. This will take
188+
* each iterator and determine if there is an Implicit Def that needs dealing
189+
* with. This also deals with implicit def's that are tied to an operand, to
190+
* ensure that the correct undef values are given a definition.
191+
*/
192+
bool ARMInitUndef::processBasicBlock(MachineFunction &MF,
193+
MachineBasicBlock &MBB,
194+
const DeadLaneDetector &DLD) {
195+
bool Changed = false;
196+
197+
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); I++) {
198+
MachineInstr &MI = *I;
199+
200+
unsigned UseOpIdx;
201+
if (MI.getNumDefs() != 0 && MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
202+
MachineOperand &UseMO = MI.getOperand(UseOpIdx);
203+
if (UseMO.getReg() == ARM::NoRegister) {
204+
const TargetRegisterClass *RC =
205+
TII->getRegClass(MI.getDesc(), UseOpIdx, TRI, MF);
206+
Register NewDest = MRI->createVirtualRegister(RC);
207+
208+
NewRegs.insert(NewDest);
209+
BuildMI(MBB, I, I->getDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF),
210+
NewDest);
211+
UseMO.setReg(NewDest);
212+
Changed = true;
213+
}
214+
}
215+
216+
if (MI.isImplicitDef()) {
217+
auto DstReg = MI.getOperand(0).getReg();
218+
if (DstReg.isVirtual() && isVectorRegClass(DstReg))
219+
Changed |= handleImplicitDef(MBB, I);
220+
}
221+
}
222+
return Changed;
223+
}
224+
225+
bool ARMInitUndef::runOnMachineFunction(MachineFunction &MF) {
226+
ST = &MF.getSubtarget<ARMSubtarget>();
227+
MRI = &MF.getRegInfo();
228+
TII = ST->getInstrInfo();
229+
TRI = MRI->getTargetRegisterInfo();
230+
231+
bool Changed = false;
232+
DeadLaneDetector DLD(MRI, TRI);
233+
DLD.computeSubRegisterLaneBitInfo();
234+
235+
for (MachineBasicBlock &BB : MF) {
236+
Changed |= processBasicBlock(MF, BB, DLD);
237+
}
238+
239+
return Changed;
240+
}
241+
242+
FunctionPass *llvm::createARMInitUndefPass() { return new ARMInitUndef(); }

llvm/lib/Target/ARM/ARMInstrInfo.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6532,3 +6532,11 @@ let isPseudo = 1 in {
65326532
let isTerminator = 1 in
65336533
def SEH_EpilogEnd : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
65346534
}
6535+
6536+
//===----------------------------------------------------------------------===//
6537+
// Pseudo Instructions for use when early-clobber is defined and Greedy Register
6538+
// Allocation is used. This ensures the constraint is used properly.
6539+
//===----------------------------------------------------------------------===//
6540+
let isCodeGenOnly = 1 in {
6541+
def PseudoARMInitUndef : PseudoInst<(outs MQPR:$vd), (ins), NoItinerary, []>;
6542+
}

llvm/lib/Target/ARM/ARMTargetMachine.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include "llvm/IR/Function.h"
4242
#include "llvm/MC/TargetRegistry.h"
4343
#include "llvm/Pass.h"
44+
#include "llvm/PassRegistry.h"
4445
#include "llvm/Support/CodeGen.h"
4546
#include "llvm/Support/CommandLine.h"
4647
#include "llvm/Support/ErrorHandling.h"
@@ -111,6 +112,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTarget() {
111112
initializeMVELaneInterleavingPass(Registry);
112113
initializeARMFixCortexA57AES1742098Pass(Registry);
113114
initializeARMDAGToDAGISelPass(Registry);
115+
initializeARMInitUndefPass(Registry);
114116
}
115117

116118
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -384,6 +386,7 @@ class ARMPassConfig : public TargetPassConfig {
384386
void addPreSched2() override;
385387
void addPreEmitPass() override;
386388
void addPreEmitPass2() override;
389+
void addOptimizedRegAlloc() override;
387390

388391
std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
389392
};
@@ -621,6 +624,13 @@ void ARMPassConfig::addPreEmitPass2() {
621624
}
622625
}
623626

627+
void ARMPassConfig::addOptimizedRegAlloc() {
628+
if (getOptimizeRegAlloc())
629+
insertPass(&DetectDeadLanesID, &ARMInitUndefPass);
630+
631+
TargetPassConfig::addOptimizedRegAlloc();
632+
}
633+
624634
yaml::MachineFunctionInfo *
625635
ARMBaseTargetMachine::createDefaultFuncInfoYAML() const {
626636
return new yaml::ARMFunctionInfo();

llvm/lib/Target/ARM/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ add_llvm_target(ARMCodeGen
3535
ARMFixCortexA57AES1742098Pass.cpp
3636
ARMFrameLowering.cpp
3737
ARMHazardRecognizer.cpp
38+
ARMInitUndef.cpp
3839
ARMInstructionSelector.cpp
3940
ARMISelDAGToDAG.cpp
4041
ARMISelLowering.cpp

llvm/test/CodeGen/ARM/O3-pipeline.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@
113113
; CHECK-NEXT: ARM pre- register allocation load / store optimization pass
114114
; CHECK-NEXT: ARM A15 S->D optimizer
115115
; CHECK-NEXT: Detect Dead Lanes
116+
; CHECK-NEXT: ARM init undef pass
116117
; CHECK-NEXT: Process Implicit Definitions
117118
; CHECK-NEXT: Remove unreachable machine basic blocks
118119
; CHECK-NEXT: Live Variable Analysis

llvm/test/CodeGen/Thumb2/mve-intrinsics/vcaddq.ll

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -699,6 +699,17 @@ entry:
699699
ret <4 x i32> %0
700700
}
701701

702+
define arm_aapcs_vfpcc <4 x i32> @test_vhcaddq_rot270_s32_undef() {
703+
; CHECK-LABEL: test_vhcaddq_rot270_s32_undef:
704+
; CHECK: @ %bb.0: @ %entry
705+
; CHECK-NEXT: vhcadd.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #270
706+
; CHECK-NOT: vhcadd.s32 q[[REG:[0-9]+]], q{{[0-9]+}}, q[[REG]], #270
707+
; CHECK-NEXT: bx lr
708+
entry:
709+
%0 = tail call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 0, i32 1, <4 x i32> undef, <4 x i32> undef)
710+
ret <4 x i32> %0
711+
}
712+
702713
define arm_aapcs_vfpcc <16 x i8> @test_vhcaddq_rot90_x_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
703714
; CHECK-LABEL: test_vhcaddq_rot90_x_s8:
704715
; CHECK: @ %bb.0: @ %entry

0 commit comments

Comments
 (0)