Skip to content

Commit 674574d

Browse files
authored
Promote 32bit pseudo instr that infer extsw removal to 64bit in PPCMIPeephole (#85451)
Fixes: #71030 Bug only happens in 64bit involving spills. Since we don't know when the spill will happen, all instructions in the chain used to deduce sign extension for eliminating 'extsw' will need to be promoted to 64-bit pseudo instructions. The following instruction will promoted in PPCMIPeepholes: EXTSH, LHA, ISEL to EXTSH8, LHA8, ISEL8
1 parent 913cd11 commit 674574d

13 files changed

+290
-25
lines changed

llvm/lib/Target/PowerPC/P10InstrResources.td

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -825,15 +825,17 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read],
825825
def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read],
826826
(instrs
827827
SRADI_rec,
828-
SRAWI_rec
828+
SRAWI_rec,
829+
SRAWI8_rec
829830
)>;
830831

831832
// Single crack instructions
832833
// 4 Cycles ALU2 operations, 2 input operands
833834
def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
834835
(instrs
835836
SRAD_rec,
836-
SRAW_rec
837+
SRAW_rec,
838+
SRAW8_rec
837839
)>;
838840

839841
// 2-way crack instructions
@@ -926,7 +928,7 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read],
926928
SETNBC, SETNBC8,
927929
SETNBCR, SETNBCR8,
928930
SRADI, SRADI_32,
929-
SRAWI,
931+
SRAWI, SRAWI8,
930932
SUBFIC, SUBFIC8,
931933
SUBFME, SUBFME8,
932934
SUBFME8O, SUBFMEO,
@@ -1008,7 +1010,7 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
10081010
SLD,
10091011
SLW, SLW8,
10101012
SRAD,
1011-
SRAW,
1013+
SRAW, SRAW8,
10121014
SRD,
10131015
SRW, SRW8,
10141016
SUBF, SUBF8,

llvm/lib/Target/PowerPC/P9InstrResources.td

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,8 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C],
189189
(instregex "F(N)?ABS(D|S)$"),
190190
(instregex "FNEG(D|S)$"),
191191
(instregex "FCPSGN(D|S)$"),
192-
(instregex "SRAW(I)?$"),
192+
(instregex "SRAW(8)?$"),
193+
(instregex "SRAWI(8)?$"),
193194
(instregex "ISEL(8)?$"),
194195
RLDIMI,
195196
XSIEXPDP,
@@ -1091,7 +1092,8 @@ def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
10911092
(instregex "RLD(I)?C(R|L)_rec$"),
10921093
(instregex "RLW(IMI|INM|NM)(8)?_rec$"),
10931094
(instregex "SLW(8)?_rec$"),
1094-
(instregex "SRAW(I)?_rec$"),
1095+
(instregex "SRAW(8)?_rec$"),
1096+
(instregex "SRAWI(8)?_rec$"),
10951097
(instregex "SRW(8)?_rec$"),
10961098
RLDICL_32_rec,
10971099
RLDIMI_rec

llvm/lib/Target/PowerPC/PPC.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,18 @@ def getAltVSXFMAOpcode : InstrMapping {
552552
let ValueCols = [["1"]];
553553
}
554554

555+
def get64BitInstrFromSignedExt32BitInstr : InstrMapping {
556+
let FilterClass = "SExt32To64";
557+
// Instructions with the same opcode.
558+
let RowFields = ["Inst"];
559+
// Instructions with the same Interpretation64Bit value form a column.
560+
let ColFields = ["Interpretation64Bit"];
561+
// The key column are not the Interpretation64Bit-form instructions.
562+
let KeyCol = ["0"];
563+
// Value columns are the Interpretation64Bit-form instructions.
564+
let ValueCols = [["1"]];
565+
}
566+
555567
//===----------------------------------------------------------------------===//
556568
// Register File Description
557569
//===----------------------------------------------------------------------===//

llvm/lib/Target/PowerPC/PPCInstr64Bit.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -932,6 +932,14 @@ defm SLW8 : XForm_6r<31, 24, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
932932
"slw", "$RA, $RST, $RB", IIC_IntGeneral, []>, ZExt32To64;
933933
defm SRW8 : XForm_6r<31, 536, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
934934
"srw", "$RA, $RST, $RB", IIC_IntGeneral, []>, ZExt32To64;
935+
936+
defm SRAW8 : XForm_6rc<31, 792, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
937+
"sraw", "$RA, $RST, $RB", IIC_IntShift,
938+
[]>, SExt32To64;
939+
940+
defm SRAWI8 : XForm_10rc<31, 824, (outs g8rc:$RA), (ins g8rc:$RST, u5imm:$RB),
941+
"srawi", "$RA, $RST, $RB", IIC_IntShift, []>, SExt32To64;
942+
935943
} // Interpretation64Bit
936944

937945
// For fast-isel:

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5250,6 +5250,215 @@ bool PPCInstrInfo::isTOCSaveMI(const MachineInstr &MI) const {
52505250
// We limit the max depth to track incoming values of PHIs or binary ops
52515251
// (e.g. AND) to avoid excessive cost.
52525252
const unsigned MAX_BINOP_DEPTH = 1;
5253+
5254+
// This function will promote the instruction which defines the register `Reg`
5255+
// in the parameter from a 32-bit to a 64-bit instruction if needed. The logic
5256+
// used to check whether an instruction needs to be promoted or not is similar
5257+
// to the logic used to check whether or not a defined register is sign or zero
5258+
// extended within the function PPCInstrInfo::isSignOrZeroExtended.
5259+
// Additionally, the `promoteInstr32To64ForElimEXTSW` function is recursive.
5260+
// BinOpDepth does not count all of the recursions. The parameter BinOpDepth is
5261+
// incremented only when `promoteInstr32To64ForElimEXTSW` calls itself more
5262+
// than once. This is done to prevent exponential recursion.
5263+
void PPCInstrInfo::promoteInstr32To64ForElimEXTSW(const Register &Reg,
5264+
MachineRegisterInfo *MRI,
5265+
unsigned BinOpDepth,
5266+
LiveVariables *LV) const {
5267+
if (!Reg.isVirtual())
5268+
return;
5269+
5270+
MachineInstr *MI = MRI->getVRegDef(Reg);
5271+
if (!MI)
5272+
return;
5273+
5274+
unsigned Opcode = MI->getOpcode();
5275+
5276+
switch (Opcode) {
5277+
case PPC::OR:
5278+
case PPC::ISEL:
5279+
case PPC::OR8:
5280+
case PPC::PHI: {
5281+
if (BinOpDepth >= MAX_BINOP_DEPTH)
5282+
break;
5283+
unsigned OperandEnd = 3, OperandStride = 1;
5284+
if (Opcode == PPC::PHI) {
5285+
OperandEnd = MI->getNumOperands();
5286+
OperandStride = 2;
5287+
}
5288+
5289+
for (unsigned I = 1; I < OperandEnd; I += OperandStride) {
5290+
assert(MI->getOperand(I).isReg() && "Operand must be register");
5291+
promoteInstr32To64ForElimEXTSW(MI->getOperand(I).getReg(), MRI,
5292+
BinOpDepth + 1, LV);
5293+
}
5294+
5295+
break;
5296+
}
5297+
case PPC::COPY: {
5298+
// Refers to the logic of the `case PPC::COPY` statement in the function
5299+
// PPCInstrInfo::isSignOrZeroExtended().
5300+
5301+
Register SrcReg = MI->getOperand(1).getReg();
5302+
// In both ELFv1 and v2 ABI, method parameters and the return value
5303+
// are sign- or zero-extended.
5304+
const MachineFunction *MF = MI->getMF();
5305+
if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
5306+
// If this is a copy from another register, we recursively promote the
5307+
// source.
5308+
promoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth, LV);
5309+
return;
5310+
}
5311+
5312+
// From here on everything is SVR4ABI. COPY will be eliminated in the other
5313+
// pass, we do not need promote the COPY pseudo opcode.
5314+
5315+
if (SrcReg != PPC::X3)
5316+
// If this is a copy from another register, we recursively promote the
5317+
// source.
5318+
promoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth, LV);
5319+
return;
5320+
}
5321+
case PPC::ORI:
5322+
case PPC::XORI:
5323+
case PPC::ORIS:
5324+
case PPC::XORIS:
5325+
case PPC::ORI8:
5326+
case PPC::XORI8:
5327+
case PPC::ORIS8:
5328+
case PPC::XORIS8:
5329+
promoteInstr32To64ForElimEXTSW(MI->getOperand(1).getReg(), MRI, BinOpDepth,
5330+
LV);
5331+
break;
5332+
case PPC::AND:
5333+
case PPC::AND8:
5334+
if (BinOpDepth >= MAX_BINOP_DEPTH)
5335+
break;
5336+
5337+
promoteInstr32To64ForElimEXTSW(MI->getOperand(1).getReg(), MRI,
5338+
BinOpDepth + 1, LV);
5339+
promoteInstr32To64ForElimEXTSW(MI->getOperand(2).getReg(), MRI,
5340+
BinOpDepth + 1, LV);
5341+
break;
5342+
}
5343+
5344+
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
5345+
if (RC == &PPC::G8RCRegClass || RC == &PPC::G8RC_and_G8RC_NOX0RegClass)
5346+
return;
5347+
5348+
const PPCInstrInfo *TII =
5349+
MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
5350+
5351+
// Map the 32bit to 64bit opcodes for instructions that are not signed or zero
5352+
// extended themselves, but may have operands who's destination registers of
5353+
// signed or zero extended instructions.
5354+
std::unordered_map<unsigned, unsigned> OpcodeMap = {
5355+
{PPC::OR, PPC::OR8}, {PPC::ISEL, PPC::ISEL8},
5356+
{PPC::ORI, PPC::ORI8}, {PPC::XORI, PPC::XORI8},
5357+
{PPC::ORIS, PPC::ORIS8}, {PPC::XORIS, PPC::XORIS8},
5358+
{PPC::AND, PPC::AND8}};
5359+
5360+
int NewOpcode = -1;
5361+
auto It = OpcodeMap.find(Opcode);
5362+
if (It != OpcodeMap.end()) {
5363+
// Set the new opcode to the mapped 64-bit version.
5364+
NewOpcode = It->second;
5365+
} else {
5366+
if (!TII->isSExt32To64(Opcode))
5367+
return;
5368+
5369+
// The TableGen function `get64BitInstrFromSignedExt32BitInstr` is used to
5370+
// map the 32-bit instruction with the `SExt32To64` flag to the 64-bit
5371+
// instruction with the same opcode.
5372+
NewOpcode = PPC::get64BitInstrFromSignedExt32BitInstr(Opcode);
5373+
}
5374+
5375+
assert(NewOpcode != -1 &&
5376+
"Must have a 64-bit opcode to map the 32-bit opcode!");
5377+
5378+
const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
5379+
const MCInstrDesc &MCID = TII->get(NewOpcode);
5380+
const TargetRegisterClass *NewRC =
5381+
TRI->getRegClass(MCID.operands()[0].RegClass);
5382+
5383+
Register SrcReg = MI->getOperand(0).getReg();
5384+
const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);
5385+
5386+
// If the register class of the defined register in the 32-bit instruction
5387+
// is the same as the register class of the defined register in the promoted
5388+
// 64-bit instruction, we do not need to promote the instruction.
5389+
if (NewRC == SrcRC)
5390+
return;
5391+
5392+
DebugLoc DL = MI->getDebugLoc();
5393+
auto MBB = MI->getParent();
5394+
5395+
// Since the pseudo-opcode of the instruction is promoted from 32-bit to
5396+
// 64-bit, if the source reg class of the original instruction belongs to
5397+
// PPC::GRCRegClass or PPC::GPRC_and_GPRC_NOR0RegClass, we need to promote
5398+
// the operand to PPC::G8CRegClass or PPC::G8RC_and_G8RC_NOR0RegClass,
5399+
// respectively.
5400+
DenseMap<unsigned, Register> PromoteRegs;
5401+
for (unsigned i = 1; i < MI->getNumOperands(); i++) {
5402+
MachineOperand &Operand = MI->getOperand(i);
5403+
if (!Operand.isReg())
5404+
continue;
5405+
5406+
Register OperandReg = Operand.getReg();
5407+
if (!OperandReg.isVirtual())
5408+
continue;
5409+
5410+
const TargetRegisterClass *NewUsedRegRC =
5411+
TRI->getRegClass(MCID.operands()[i].RegClass);
5412+
const TargetRegisterClass *OrgRC = MRI->getRegClass(OperandReg);
5413+
if (NewUsedRegRC != OrgRC && (OrgRC == &PPC::GPRCRegClass ||
5414+
OrgRC == &PPC::GPRC_and_GPRC_NOR0RegClass)) {
5415+
// Promote the used 32-bit register to 64-bit register.
5416+
Register TmpReg = MRI->createVirtualRegister(NewUsedRegRC);
5417+
Register DstTmpReg = MRI->createVirtualRegister(NewUsedRegRC);
5418+
BuildMI(*MBB, MI, DL, TII->get(PPC::IMPLICIT_DEF), TmpReg);
5419+
BuildMI(*MBB, MI, DL, TII->get(PPC::INSERT_SUBREG), DstTmpReg)
5420+
.addReg(TmpReg)
5421+
.addReg(OperandReg)
5422+
.addImm(PPC::sub_32);
5423+
PromoteRegs[i] = DstTmpReg;
5424+
}
5425+
}
5426+
5427+
Register NewDefinedReg = MRI->createVirtualRegister(NewRC);
5428+
5429+
BuildMI(*MBB, MI, DL, TII->get(NewOpcode), NewDefinedReg);
5430+
MachineBasicBlock::instr_iterator Iter(MI);
5431+
--Iter;
5432+
MachineInstrBuilder MIBuilder(*Iter->getMF(), Iter);
5433+
for (unsigned i = 1; i < MI->getNumOperands(); i++) {
5434+
if (PromoteRegs.find(i) != PromoteRegs.end())
5435+
MIBuilder.addReg(PromoteRegs[i], RegState::Kill);
5436+
else
5437+
Iter->addOperand(MI->getOperand(i));
5438+
}
5439+
5440+
for (unsigned i = 1; i < Iter->getNumOperands(); i++) {
5441+
MachineOperand &Operand = Iter->getOperand(i);
5442+
if (!Operand.isReg())
5443+
continue;
5444+
Register OperandReg = Operand.getReg();
5445+
if (!OperandReg.isVirtual())
5446+
continue;
5447+
LV->recomputeForSingleDefVirtReg(OperandReg);
5448+
}
5449+
5450+
MI->eraseFromParent();
5451+
5452+
// A defined register may be used by other instructions that are 32-bit.
5453+
// After the defined register is promoted to 64-bit for the promoted
5454+
// instruction, we need to demote the 64-bit defined register back to a
5455+
// 32-bit register
5456+
BuildMI(*MBB, ++Iter, DL, TII->get(PPC::COPY), SrcReg)
5457+
.addReg(NewDefinedReg, RegState::Kill, PPC::sub_32);
5458+
LV->recomputeForSingleDefVirtReg(NewDefinedReg);
5459+
return;
5460+
}
5461+
52535462
// The isSignOrZeroExtended function is recursive. The parameter BinOpDepth
52545463
// does not count all of the recursions. The parameter BinOpDepth is incremented
52555464
// only when isSignOrZeroExtended calls itself more than once. This is done to

llvm/lib/Target/PowerPC/PPCInstrInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "PPC.h"
1818
#include "PPCRegisterInfo.h"
1919
#include "llvm/ADT/SmallSet.h"
20+
#include "llvm/CodeGen/LiveVariables.h"
2021
#include "llvm/CodeGen/TargetInstrInfo.h"
2122

2223
#define GET_INSTRINFO_HEADER
@@ -625,6 +626,10 @@ class PPCInstrInfo : public PPCGenInstrInfo {
625626
const MachineRegisterInfo *MRI) const {
626627
return isSignOrZeroExtended(Reg, 0, MRI).second;
627628
}
629+
void promoteInstr32To64ForElimEXTSW(const Register &Reg,
630+
MachineRegisterInfo *MRI,
631+
unsigned BinOpDepth,
632+
LiveVariables *LV) const;
628633

629634
bool convertToImmediateForm(MachineInstr &MI,
630635
SmallSet<Register, 4> &RegsToUpdate,

llvm/lib/Target/PowerPC/PPCMIPeephole.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1053,7 +1053,16 @@ bool PPCMIPeephole::simplifyCode() {
10531053
} else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
10541054
TII->isSignExtended(NarrowReg, MRI)) {
10551055
// We can eliminate EXTSW if the input is known to be already
1056-
// sign-extended.
1056+
// sign-extended. However, we are not sure whether a spill will occur
1057+
// during register allocation. If there is no promotion, it will use
1058+
// 'stw' instead of 'std', and 'lwz' instead of 'ld' when spilling,
1059+
// since the register class is 32-bits. Consequently, the high 32-bit
1060+
// information will be lost. Therefore, all these instructions in the
1061+
// chain used to deduce sign extension to eliminate the 'extsw' will
1062+
// need to be promoted to 64-bit pseudo instructions when the 'extsw'
1063+
// is eliminated.
1064+
TII->promoteInstr32To64ForElimEXTSW(NarrowReg, MRI, 0, LV);
1065+
10571066
LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
10581067
Register TmpReg =
10591068
MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);

llvm/lib/Target/PowerPC/PPCScheduleP7.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ let SchedModel = P7Model in {
216216
RLWNM, RLWNM8, RLWNM_rec, RLDIMI, RLDIMI_rec,
217217
RLDICL_32, RLDICL_32_64, RLDICL_32_rec, RLDICR_32, RLWINM8_rec, RLWNM8_rec,
218218
SLD, SLD_rec, SLW, SLW8, SLW_rec, SLW8_rec, SRD, SRD_rec, SRW, SRW8, SRW_rec,
219-
SRW8_rec, SRADI, SRADI_rec, SRAWI, SRAWI_rec, SRAD, SRAD_rec, SRAW, SRAW_rec,
219+
SRW8_rec, SRADI, SRADI_rec, SRAWI, SRAWI_rec, SRAWI8, SRAWI8_rec, SRAD, SRAD_rec, SRAW, SRAW_rec, SRAW8, SRAW8_rec,
220220
SRADI_32, SUBFE, SUBFE8, SUBFE8O_rec, SUBFEO_rec
221221
)>;
222222

llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -604,7 +604,7 @@ body: |
604604
%2 = LI 48
605605
%5 = COPY %0.sub_32
606606
%8 = SRW killed %5, killed %2
607-
; CHECK: LI 0
607+
; CHECK: LI8 0
608608
; CHECK-LATE: li 3, 0
609609
$x3 = EXTSW_32_64 %8
610610
BLR8 implicit $lr8, implicit $rm, implicit $x3
@@ -722,7 +722,7 @@ body: |
722722
%3 = COPY %0.sub_32
723723
%4 = SRAW killed %3, killed %2, implicit-def dead $carry
724724
; CHECK: LI 48
725-
; CHECK: SRAW killed %3, killed %2, implicit-def dead $carry
725+
; CHECK: SRAW8 killed %7, killed %9, implicit-def $carry, implicit-def dead $carry
726726
; CHECK-LATE: sraw 3, 3, 4
727727
%5 = EXTSW_32_64 killed %4
728728
$x3 = COPY %5
@@ -779,7 +779,7 @@ body: |
779779
%2 = LI 80
780780
%3 = COPY %0.sub_32
781781
%4 = SRAW_rec killed %3, %2, implicit-def dead $carry, implicit-def $cr0
782-
; CHECK: SRAW_rec killed %3, %2, implicit-def dead $carry, implicit-def $cr0
782+
; CHECK: SRAW8_rec killed %10, killed %12, implicit-def $carry, implicit-def $cr0, implicit-def dead $carry, implicit-def $cr0
783783
; CHECK-LATE: sraw. 3, 3, 4
784784
%5 = COPY killed $cr0
785785
%6 = ISEL %2, %4, %5.sub_eq

0 commit comments

Comments
 (0)