Skip to content

Commit fab2c85

Browse files
JonPssonJonPsson1
authored andcommitted
Cleaned up after experiments.
1 parent d41615e commit fab2c85

27 files changed

+2106
-80
lines changed

llvm/include/llvm/CodeGen/MachineCombinerPattern.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,15 @@ enum class MachineCombinerPattern {
176176
FMSUB,
177177
FNMSUB,
178178

179+
// SystemZ patterns. (EXPERIMENTAL)
180+
FMA2_P1P0,
181+
FMA2_P0P1,
182+
FMA2,
183+
FMA1_Add_L,
184+
FMA1_Add_R,
185+
FMA3, // These are inspired by PPC
186+
FMA2_Add, //
187+
179188
// X86 VNNI
180189
DPWSSD,
181190

llvm/include/llvm/CodeGen/TargetInstrInfo.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1698,7 +1698,7 @@ class TargetInstrInfo : public MCInstrInfo {
16981698
/// instruction that defines FoldAsLoadDefReg, and the function returns
16991699
/// the machine instruction generated due to folding.
17001700
virtual MachineInstr *optimizeLoadInstr(MachineInstr &MI,
1701-
const MachineRegisterInfo *MRI,
1701+
MachineRegisterInfo *MRI,
17021702
Register &FoldAsLoadDefReg,
17031703
MachineInstr *&DefMI) const {
17041704
return nullptr;

llvm/lib/CodeGen/MachineCombiner.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -155,9 +155,7 @@ MachineCombiner::getOperandDef(const MachineOperand &MO) {
155155
// We need a virtual register definition.
156156
if (MO.isReg() && MO.getReg().isVirtual())
157157
DefInstr = MRI->getUniqueVRegDef(MO.getReg());
158-
// PHI's have no depth etc.
159-
if (DefInstr && DefInstr->isPHI())
160-
DefInstr = nullptr;
158+
// (PATCH PROPOSED for PHIs: https://github.com/llvm/llvm-project/pull/82025)
161159
return DefInstr;
162160
}
163161

@@ -317,6 +315,13 @@ static CombinerObjective getCombinerObjective(MachineCombinerPattern P) {
317315
case MachineCombinerPattern::FMADD_XA:
318316
case MachineCombinerPattern::FMSUB:
319317
case MachineCombinerPattern::FNMSUB:
318+
case MachineCombinerPattern::FMA2_P1P0:
319+
case MachineCombinerPattern::FMA2_P0P1:
320+
case MachineCombinerPattern::FMA2:
321+
case MachineCombinerPattern::FMA1_Add_L:
322+
case MachineCombinerPattern::FMA1_Add_R:
323+
case MachineCombinerPattern::FMA3:
324+
case MachineCombinerPattern::FMA2_Add:
320325
return CombinerObjective::MustReduceDepth;
321326
case MachineCombinerPattern::REASSOC_XY_BCA:
322327
case MachineCombinerPattern::REASSOC_XY_BAC:

llvm/lib/Target/SystemZ/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ add_llvm_target(SystemZCodeGen
2020
SystemZConstantPoolValue.cpp
2121
SystemZCopyPhysRegs.cpp
2222
SystemZElimCompare.cpp
23+
SystemZFinalizeReassociation.cpp
2324
SystemZFrameLowering.cpp
2425
SystemZHazardRecognizer.cpp
2526
SystemZISelDAGToDAG.cpp

llvm/lib/Target/SystemZ/SystemZ.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,12 +195,14 @@ FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM);
195195
FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
196196
FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM);
197197
FunctionPass *createSystemZCopyPhysRegsPass(SystemZTargetMachine &TM);
198+
FunctionPass *createSystemZFinalizeReassociationPass(SystemZTargetMachine &TM);
198199
FunctionPass *createSystemZPostRewritePass(SystemZTargetMachine &TM);
199200
FunctionPass *createSystemZTDCPass();
200201

201202
void initializeSystemZCopyPhysRegsPass(PassRegistry &);
202203
void initializeSystemZDAGToDAGISelPass(PassRegistry &);
203204
void initializeSystemZElimComparePass(PassRegistry &);
205+
void initializeSystemZFinalizeReassociationPass(PassRegistry &);
204206
void initializeSystemZLDCleanupPass(PassRegistry &);
205207
void initializeSystemZLongBranchPass(PassRegistry &);
206208
void initializeSystemZPostRewritePass(PassRegistry &);
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
//===---- SystemZFinalizeReassociation.cpp - Finalize FP reassociation ----===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This pass is the last step of the process of enabling reassociation with
10+
// the MachineCombiner. These are the steps involved:
11+
//
12+
// 1. Instruction selection: Disable reg/mem folding for any operations that
13+
// are reassociable since MachineCombiner will not succeed
14+
// otherwise. Instead select a reg/reg pseudo that pretends to clobber CC.
15+
//
16+
// 2. MachineCombiner: Performs reassociation with the reg/reg instructions.
17+
//
18+
// 3. PeepholeOptimizer: fold loads into reg/mem instructions after
19+
// reassociation. The reg/mem opcode sets CC which is why the special
20+
// reg/reg pseudo is needed.
21+
//
22+
// 4. Convert any remaining pseudos into the target opcodes that do not
23+
// clobber CC (this pass).
24+
//
25+
//===----------------------------------------------------------------------===//
26+
27+
#include "SystemZMachineFunctionInfo.h"
28+
#include "SystemZTargetMachine.h"
29+
#include "llvm/CodeGen/MachineDominators.h"
30+
#include "llvm/CodeGen/MachineFunctionPass.h"
31+
#include "llvm/CodeGen/MachineInstrBuilder.h"
32+
#include "llvm/CodeGen/MachineRegisterInfo.h"
33+
#include "llvm/CodeGen/TargetInstrInfo.h"
34+
#include "llvm/CodeGen/TargetRegisterInfo.h"
35+
#include "llvm/Target/TargetMachine.h"
36+
37+
using namespace llvm;
38+
39+
namespace {
40+
41+
class SystemZFinalizeReassociation : public MachineFunctionPass {
42+
public:
43+
static char ID;
44+
SystemZFinalizeReassociation()
45+
: MachineFunctionPass(ID), TII(nullptr), MRI(nullptr) {
46+
initializeSystemZFinalizeReassociationPass(*PassRegistry::getPassRegistry());
47+
}
48+
49+
bool runOnMachineFunction(MachineFunction &MF) override;
50+
void getAnalysisUsage(AnalysisUsage &AU) const override;
51+
52+
private:
53+
54+
bool visitMBB(MachineBasicBlock &MBB);
55+
56+
const SystemZInstrInfo *TII;
57+
MachineRegisterInfo *MRI;
58+
};
59+
60+
char SystemZFinalizeReassociation::ID = 0;
61+
62+
} // end anonymous namespace
63+
64+
INITIALIZE_PASS(SystemZFinalizeReassociation, "systemz-finalize-reassoc",
65+
"SystemZ Finalize Reassociation", false, false)
66+
67+
FunctionPass *llvm::
68+
createSystemZFinalizeReassociationPass(SystemZTargetMachine &TM) {
69+
return new SystemZFinalizeReassociation();
70+
}
71+
72+
void SystemZFinalizeReassociation::getAnalysisUsage(AnalysisUsage &AU) const {
73+
AU.setPreservesCFG();
74+
MachineFunctionPass::getAnalysisUsage(AU);
75+
}
76+
77+
bool SystemZFinalizeReassociation::visitMBB(MachineBasicBlock &MBB) {
78+
bool Changed = false;
79+
for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
80+
unsigned PseudoOpcode = MI.getOpcode();
81+
unsigned TargetOpcode =
82+
PseudoOpcode == SystemZ::WFADB_CCPseudo ? SystemZ::WFADB
83+
: PseudoOpcode == SystemZ::WFASB_CCPseudo ? SystemZ::WFASB
84+
: PseudoOpcode == SystemZ::WFSDB_CCPseudo ? SystemZ::WFSDB
85+
: PseudoOpcode == SystemZ::WFSSB_CCPseudo ? SystemZ::WFSSB
86+
: PseudoOpcode == SystemZ::WFMDB_CCPseudo ? SystemZ::WFMDB
87+
: PseudoOpcode == SystemZ::WFMSB_CCPseudo ? SystemZ::WFMSB
88+
: PseudoOpcode == SystemZ::WFMADB_CCPseudo ? SystemZ::WFMADB
89+
: PseudoOpcode == SystemZ::WFMASB_CCPseudo ? SystemZ::WFMASB
90+
: 0;
91+
if (TargetOpcode) {
92+
// PeepholeOptimizer will not fold any loads across basic blocks, which
93+
// however seems beneficial, so do it here:
94+
bool Folded = false;
95+
for (unsigned Op = 1; Op <= 2; ++Op) {
96+
Register Reg = MI.getOperand(Op).getReg();
97+
if (MachineInstr *DefMI = MRI->getVRegDef(Reg))
98+
if (TII->optimizeLoadInstr(MI, MRI, Reg, DefMI)) {
99+
MI.eraseFromParent();
100+
DefMI->eraseFromParent();
101+
MRI->markUsesInDebugValueAsUndef(Reg);
102+
Folded = true;
103+
break;
104+
}
105+
}
106+
107+
if (!Folded) {
108+
MI.setDesc(TII->get(TargetOpcode));
109+
int CCIdx = MI.findRegisterDefOperandIdx(SystemZ::CC);
110+
MI.removeOperand(CCIdx);
111+
}
112+
Changed = true;
113+
}
114+
}
115+
return Changed;
116+
}
117+
118+
bool SystemZFinalizeReassociation::runOnMachineFunction(MachineFunction &F) {
119+
TII = F.getSubtarget<SystemZSubtarget>().getInstrInfo();
120+
MRI = &F.getRegInfo();
121+
122+
bool Modified = false;
123+
for (auto &MBB : F)
124+
Modified |= visitMBB(MBB);
125+
126+
return Modified;
127+
}

llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,13 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
347347
// Try to expand a boolean SELECT_CCMASK using an IPM sequence.
348348
SDValue expandSelectBoolean(SDNode *Node);
349349

350+
// Return true if the flags of N and the subtarget allows for reassociation.
351+
bool isReassociable(SDNode *N) const {
352+
return N->getFlags().hasAllowReassociation() &&
353+
N->getFlags().hasNoSignedZeros() &&
354+
Subtarget->hasVector();
355+
}
356+
350357
public:
351358
static char ID;
352359

llvm/lib/Target/SystemZ/SystemZInstrFP.td

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -430,8 +430,10 @@ let Uses = [FPC], mayRaiseFPException = 1,
430430
def ADBR : BinaryRRE<"adbr", 0xB31A, any_fadd, FP64, FP64>;
431431
def AXBR : BinaryRRE<"axbr", 0xB34A, any_fadd, FP128, FP128>;
432432
}
433-
defm AEB : BinaryRXEAndPseudo<"aeb", 0xED0A, any_fadd, FP32, load, 4>;
434-
defm ADB : BinaryRXEAndPseudo<"adb", 0xED1A, any_fadd, FP64, load, 8>;
433+
defm AEB : BinaryRXEAndPseudo<"aeb", 0xED0A, z_any_fadd_noreassoc, FP32,
434+
load, 4>;
435+
defm ADB : BinaryRXEAndPseudo<"adb", 0xED1A, z_any_fadd_noreassoc, FP64,
436+
load, 8>;
435437
}
436438

437439
// Subtraction.
@@ -441,8 +443,10 @@ let Uses = [FPC], mayRaiseFPException = 1,
441443
def SDBR : BinaryRRE<"sdbr", 0xB31B, any_fsub, FP64, FP64>;
442444
def SXBR : BinaryRRE<"sxbr", 0xB34B, any_fsub, FP128, FP128>;
443445

444-
defm SEB : BinaryRXEAndPseudo<"seb", 0xED0B, any_fsub, FP32, load, 4>;
445-
defm SDB : BinaryRXEAndPseudo<"sdb", 0xED1B, any_fsub, FP64, load, 8>;
446+
defm SEB : BinaryRXEAndPseudo<"seb", 0xED0B, z_any_fsub_noreassoc, FP32,
447+
load, 4>;
448+
defm SDB : BinaryRXEAndPseudo<"sdb", 0xED1B, z_any_fsub_noreassoc, FP64,
449+
load, 8>;
446450
}
447451

448452
// Multiplication.
@@ -452,8 +456,10 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
452456
def MDBR : BinaryRRE<"mdbr", 0xB31C, any_fmul, FP64, FP64>;
453457
def MXBR : BinaryRRE<"mxbr", 0xB34C, any_fmul, FP128, FP128>;
454458
}
455-
defm MEEB : BinaryRXEAndPseudo<"meeb", 0xED17, any_fmul, FP32, load, 4>;
456-
defm MDB : BinaryRXEAndPseudo<"mdb", 0xED1C, any_fmul, FP64, load, 8>;
459+
defm MEEB : BinaryRXEAndPseudo<"meeb", 0xED17, z_any_fmul_noreassoc, FP32,
460+
load, 4>;
461+
defm MDB : BinaryRXEAndPseudo<"mdb", 0xED1C, z_any_fmul_noreassoc, FP64,
462+
load, 8>;
457463
}
458464

459465
// f64 multiplication of two FP32 registers.
@@ -495,8 +501,10 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
495501
def MAEBR : TernaryRRD<"maebr", 0xB30E, z_any_fma, FP32, FP32>;
496502
def MADBR : TernaryRRD<"madbr", 0xB31E, z_any_fma, FP64, FP64>;
497503

498-
defm MAEB : TernaryRXFAndPseudo<"maeb", 0xED0E, z_any_fma, FP32, FP32, load, 4>;
499-
defm MADB : TernaryRXFAndPseudo<"madb", 0xED1E, z_any_fma, FP64, FP64, load, 8>;
504+
defm MAEB : TernaryRXFAndPseudo<"maeb", 0xED0E, z_any_fma_noreassoc, FP32,
505+
FP32, load, 4>;
506+
defm MADB : TernaryRXFAndPseudo<"madb", 0xED1E, z_any_fma_noreassoc, FP64,
507+
FP64, load, 8>;
500508
}
501509

502510
// Fused multiply-subtract.

llvm/lib/Target/SystemZ/SystemZInstrFormats.td

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5536,3 +5536,33 @@ multiclass StringRRE<string mnemonic, bits<16> opcode,
55365536
[(set GR64:$end, (operator GR64:$start1, GR64:$start2,
55375537
GR32:$char))]>;
55385538
}
5539+
5540+
multiclass BinaryVRRcAndCCPseudo<string mnemonic, bits<16> opcode,
5541+
SDPatternOperator operator,
5542+
SDPatternOperator reassoc_operator,
5543+
TypedReg tr1, TypedReg tr2, bits<4> type = 0,
5544+
bits<4> m5 = 0, bits<4> m6 = 0,
5545+
string fp_mnemonic = ""> {
5546+
def "" : BinaryVRRc<mnemonic, opcode, operator, tr1, tr2, type, m5, m6,
5547+
fp_mnemonic>;
5548+
let Defs = [CC], AddedComplexity = 1 in // Win over "".
5549+
def _CCPseudo : Pseudo<(outs tr1.op:$V1), (ins tr2.op:$V2, tr2.op:$V3),
5550+
[(set (tr1.vt tr1.op:$V1),
5551+
(reassoc_operator (tr2.vt tr2.op:$V2),
5552+
(tr2.vt tr2.op:$V3)))]>;
5553+
}
5554+
5555+
multiclass TernaryVRReAndCCPseudo<string mnemonic, bits<16> opcode,
5556+
SDPatternOperator operator,
5557+
SDPatternOperator reassoc_operator,
5558+
TypedReg tr1, TypedReg tr2, bits<4> m5 = 0,
5559+
bits<4> type = 0, string fp_mnemonic = ""> {
5560+
def "" : TernaryVRRe<mnemonic, opcode, operator, tr1, tr2, m5, type,
5561+
fp_mnemonic>;
5562+
let Defs = [CC], AddedComplexity = 1 in // Win over "".
5563+
def _CCPseudo : Pseudo<(outs tr1.op:$V1),
5564+
(ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4),
5565+
[(set (tr1.vt tr1.op:$V1), (reassoc_operator (tr2.vt tr2.op:$V2),
5566+
(tr2.vt tr2.op:$V3),
5567+
(tr1.vt tr1.op:$V4)))]>;
5568+
}

0 commit comments

Comments
 (0)