Skip to content

Commit 6c730b1

Browse files
committed
test EnableLoopTermFold
1 parent aec273e commit 6c730b1

File tree

12 files changed

+265
-28
lines changed

12 files changed

+265
-28
lines changed

llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -598,6 +598,175 @@ static int getEquivalentCallShort(int Opcode) {
598598
}
599599
}
600600

601+
static bool canSwapLoadStoreWith(const MachineInstr &I, const MachineInstr &N) {
602+
if (N.mayLoadOrStore())
603+
return false;
604+
bool ImayLoad = I.mayLoad();
605+
606+
auto *Fn = I.getParent()->getParent();
607+
auto *TRI = Fn->getSubtarget().getRegisterInfo();
608+
for(const auto &MO_I : I.operands()) {
609+
if (!MO_I.isReg())
610+
continue;
611+
Register RegI = MO_I.getReg();
612+
for(const auto &MO_N : N.operands()) {
613+
if (!MO_N.isReg())
614+
continue;
615+
Register RegN = MO_N.getReg();
616+
if ((MO_N.isDef() || ImayLoad) && (TRI->isSubRegisterEq(RegN, RegI) || TRI->isSubRegisterEq(RegI, RegN)))
617+
return false;
618+
}
619+
}
620+
return true;
621+
}
622+
623+
static void adjustForDelaySlot(MachineBasicBlock &MBB) {
624+
for (Iter I = MBB.begin(); I != MBB.end(); ++I) {
625+
// SW $x, $1, imm1
626+
// ADDIU $1, $1, imm2
627+
// BEQ $1, $y, Lable
628+
// can be
629+
// ADDIU $1, $1, imm2
630+
// SW $x, $1, imm1-imm2
631+
// BEQ $1, $y, Lable
632+
// so that SW can be placed into delay slot.
633+
// FIXME: Support other LD/ST instrutions with non-16bit Imm.
634+
size_t StoreImmBits = 0;
635+
switch (I->getOpcode()) {
636+
default:
637+
break;
638+
case Mips::SW:
639+
case Mips::LW:
640+
case Mips::LWu:
641+
case Mips::SW64:
642+
case Mips::LW64:
643+
case Mips::SW_MM:
644+
case Mips::LW_MM:
645+
case Mips::SW_MMR6:
646+
case Mips::LW_MMR6:
647+
case Mips::SWC1:
648+
case Mips::SWC2:
649+
case Mips::SWC3:
650+
case Mips::LWC1:
651+
case Mips::LWC2:
652+
case Mips::LWC3:
653+
case Mips::SD:
654+
case Mips::LD:
655+
case Mips::SDC1:
656+
case Mips::LDC1:
657+
case Mips::SDC164:
658+
case Mips::LDC164:
659+
case Mips::SDC1_D64_MMR6:
660+
case Mips::SDC1_MM_D32:
661+
case Mips::SDC1_MM_D64:
662+
case Mips::LDC1_D64_MMR6:
663+
case Mips::LDC1_MM_D32:
664+
case Mips::LDC1_MM_D64:
665+
case Mips::SDC2:
666+
case Mips::LDC2:
667+
case Mips::SDC3:
668+
case Mips::LDC3:
669+
case Mips::SH:
670+
case Mips::LH:
671+
case Mips::LHu:
672+
case Mips::SH64:
673+
case Mips::LH64:
674+
case Mips::LHu64:
675+
case Mips::SH_MM:
676+
case Mips::LH_MM:
677+
case Mips::LHu_MM:
678+
case Mips::SH_MMR6:
679+
case Mips::SB:
680+
case Mips::LB:
681+
case Mips::LBu:
682+
case Mips::SB64:
683+
case Mips::LB64:
684+
case Mips::LBu64:
685+
case Mips::SB_MM:
686+
case Mips::LB_MM:
687+
case Mips::LBu_MM:
688+
case Mips::SB_MMR6:
689+
case Mips::LB_MMR6:
690+
StoreImmBits = 16;
691+
break;
692+
case Mips::SWC2_R6:
693+
case Mips::SWC2_MMR6:
694+
case Mips::SDC2_R6:
695+
case Mips::SDC2_MMR6:
696+
StoreImmBits = 11;
697+
break;
698+
case Mips::LWE:
699+
case Mips::LWE_MM:
700+
case Mips::SWE:
701+
case Mips::SWE_MM:
702+
case Mips::LHE:
703+
case Mips::LHE_MM:
704+
case Mips::LHuE:
705+
case Mips::LHuE_MM:
706+
case Mips::SHE:
707+
case Mips::SHE_MM:
708+
case Mips::LBE:
709+
case Mips::LBE_MM:
710+
case Mips::LBuE:
711+
case Mips::LBuE_MM:
712+
case Mips::SBE:
713+
case Mips::SBE_MM:
714+
StoreImmBits = 9;
715+
break;
716+
}
717+
Iter N = I;
718+
while (StoreImmBits > 0 && N != MBB.end()) {
719+
N = std::next(N);
720+
if (N == MBB.end())
721+
break;
722+
bool Clobbered = false;
723+
assert ((I->getOperand(0).isReg() && I->getOperand(1).isReg()) && "Bad Load/Store instruction");
724+
switch (N->getOpcode()) {
725+
default: break;
726+
case Mips::ADDiu:
727+
case Mips::ADDiu_MM:
728+
case Mips::DADDiu: {
729+
if (I->getOperand(2).isImm() &&
730+
N->getOperand(0).isReg() && N->getOperand(1).isReg() &&
731+
I->getOperand(1).getReg() == N->getOperand(1).getReg() &&
732+
N->getOperand(0).getReg() == N->getOperand(1).getReg() &&
733+
I->getOperand(0).getReg() != N->getOperand(1).getReg() &&
734+
I->getOperand(1).getReg() != Mips::SP &&
735+
I->getOperand(1).getReg() != Mips::SP_64 &&
736+
N->getOperand(2).isImm()) {
737+
int64_t StoreImm = I->getOperand(2).getImm();
738+
int64_t ADDiuImm = N->getOperand(2).getImm();
739+
int64_t NewStoreImm = StoreImm - ADDiuImm;
740+
if ((StoreImmBits == 16 && isInt<16>(NewStoreImm)) ||
741+
(StoreImmBits == 12 && !isInt<12>(NewStoreImm)) ||
742+
(StoreImmBits == 11 && !isInt<11>(NewStoreImm)) ||
743+
(StoreImmBits == 10 && !isInt<10>(NewStoreImm)) ||
744+
(StoreImmBits == 9 && !isInt<9>(NewStoreImm)) ||
745+
(StoreImmBits == 4 && !isInt<4>(NewStoreImm)))
746+
LLVM_DEBUG(dbgs() << DEBUG_TYPE ": Found Store and ADDiu.\n";
747+
I->dump());
748+
else {
749+
LLVM_DEBUG(dbgs() << DEBUG_TYPE
750+
": Found Store and ADDiu, while Imm overflowed.\n";
751+
I->dump());
752+
continue;
753+
}
754+
MBB.remove(&(*I));
755+
I->getOperand(2).setImm(NewStoreImm);
756+
MBB.insertAfter(N, &(*I));
757+
Clobbered = true;
758+
}
759+
break;
760+
}
761+
}
762+
if (!canSwapLoadStoreWith(*I, *N))
763+
Clobbered = true;
764+
if (Clobbered)
765+
break;
766+
}
767+
}
768+
}
769+
601770
/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
602771
/// We assume there is only one delay slot per delayed instruction.
603772
bool MipsDelaySlotFiller::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
@@ -606,6 +775,8 @@ bool MipsDelaySlotFiller::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
606775
bool InMicroMipsMode = STI.inMicroMipsMode();
607776
const MipsInstrInfo *TII = STI.getInstrInfo();
608777

778+
adjustForDelaySlot(MBB);
779+
609780
for (Iter I = MBB.begin(); I != MBB.end(); ++I) {
610781
if (!hasUnoccupiedSlot(&*I))
611782
continue;

llvm/lib/Target/Mips/MipsISelLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4394,6 +4394,9 @@ bool MipsTargetLowering::isLegalAddressingMode(const DataLayout &DL,
43944394
if (AM.BaseGV)
43954395
return false;
43964396

4397+
if (!isInt<16>(AM.BaseOffs))
4398+
return false;
4399+
43974400
switch (AM.Scale) {
43984401
case 0: // "r+i" or just "i", depending on HasBaseReg.
43994402
break;
@@ -4431,6 +4434,14 @@ bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
44314434
return Imm.isZero();
44324435
}
44334436

4437+
bool MipsTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
4438+
return isInt<16>(Imm);
4439+
}
4440+
4441+
bool MipsTargetLowering::isLegalAddImmediate(int64_t Imm) const {
4442+
return isInt<16>(Imm);
4443+
}
4444+
44344445
unsigned MipsTargetLowering::getJumpTableEncoding() const {
44354446

44364447
// FIXME: For space reasons this should be: EK_GPRel32BlockAddress.

llvm/lib/Target/Mips/MipsISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -707,6 +707,9 @@ class TargetRegisterClass;
707707
bool isFPImmLegal(const APFloat &Imm, EVT VT,
708708
bool ForCodeSize) const override;
709709

710+
bool isLegalICmpImmediate(int64_t Imm) const override;
711+
bool isLegalAddImmediate(int64_t Imm) const override;
712+
710713
unsigned getJumpTableEncoding() const override;
711714
bool useSoftFloat() const override;
712715

llvm/lib/Target/Mips/MipsInstrInfo.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -678,6 +678,34 @@ bool MipsInstrInfo::HasLoadDelaySlot(const MachineInstr &MI) const {
678678
}
679679
}
680680

681+
bool MipsInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
682+
const unsigned Opcode = MI.getOpcode();
683+
switch (Opcode) {
684+
default:
685+
break;
686+
case Mips::ORi:
687+
case Mips::ORi64:
688+
case Mips::ORi_MM:
689+
case Mips::ANDi:
690+
case Mips::ANDi64:
691+
case Mips::ANDi_MM:
692+
if (MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == -1)
693+
return true;
694+
[[fallthrough]];
695+
case Mips::ADDiu:
696+
case Mips::ADDiu_MM:
697+
case Mips::DADDiu:
698+
case Mips::XORi:
699+
case Mips::XORi64:
700+
case Mips::XORi_MM:
701+
if (MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0)
702+
return true;
703+
return MI.getOperand(1).isReg() &&
704+
(MI.getOperand(1).getReg() == Mips::ZERO || MI.getOperand(1).getReg() == Mips::ZERO_64);
705+
}
706+
return MI.isAsCheapAsAMove();
707+
}
708+
681709
/// Return the number of bytes of code the specified instruction may be.
682710
unsigned MipsInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
683711
switch (MI.getOpcode()) {

llvm/lib/Target/Mips/MipsInstrInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,8 @@ class MipsInstrInfo : public MipsGenInstrInfo {
113113
/// Predicate to determine if an instruction has a load delay slot.
114114
bool HasLoadDelaySlot(const MachineInstr &MI) const;
115115

116+
bool isAsCheapAsAMove(const MachineInstr &MI) const override;
117+
116118
/// Insert nop instruction when hazard condition is found
117119
void insertNoop(MachineBasicBlock &MBB,
118120
MachineBasicBlock::iterator MI) const override;

llvm/lib/Target/Mips/MipsInstrInfo.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2034,13 +2034,13 @@ let AdditionalPredicates = [NotInMicroMips] in {
20342034

20352035
def ANDi : MMRel, StdMMR6Rel,
20362036
ArithLogicI<"andi", uimm16, GPR32Opnd, II_ANDI, imm32ZExt16, and>,
2037-
ADDI_FM<0xc>, ISA_MIPS1;
2037+
ADDI_FM<0xc>, IsAsCheapAsAMove, ISA_MIPS1;
20382038
def ORi : MMRel, StdMMR6Rel,
20392039
ArithLogicI<"ori", uimm16, GPR32Opnd, II_ORI, imm32ZExt16, or>,
2040-
ADDI_FM<0xd>, ISA_MIPS1;
2040+
ADDI_FM<0xd>, IsAsCheapAsAMove, ISA_MIPS1;
20412041
def XORi : MMRel, StdMMR6Rel,
20422042
ArithLogicI<"xori", uimm16, GPR32Opnd, II_XORI, imm32ZExt16, xor>,
2043-
ADDI_FM<0xe>, ISA_MIPS1;
2043+
ADDI_FM<0xe>, IsAsCheapAsAMove, ISA_MIPS1;
20442044
def ADDi : MMRel, ArithLogicI<"addi", simm16_relaxed, GPR32Opnd, II_ADDI>,
20452045
ADDI_FM<0x8>, ISA_MIPS1_NOT_32R6_64R6;
20462046
def SLTi : MMRel, SetCC_I<"slti", setlt, simm16, immSExt16, GPR32Opnd>,

llvm/lib/Target/Mips/MipsTargetMachine.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ class MipsPassConfig : public TargetPassConfig {
233233
// can break this requirement, so disable it when long branch pass is
234234
// enabled.
235235
EnableTailMerge = !getMipsSubtarget().enableLongBranchPass();
236+
EnableLoopTermFold = true;
236237
}
237238

238239
MipsTargetMachine &getMipsTargetMachine() const {

llvm/lib/Target/Mips/MipsTargetTransformInfo.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,18 @@ bool MipsTTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) {
1515
return TLI->isOperationLegalOrCustom(IsSigned ? ISD::SDIVREM : ISD::UDIVREM,
1616
VT);
1717
}
18+
19+
bool MipsTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
20+
const TargetTransformInfo::LSRCost &C2) {
21+
// MIPS specific here are "instruction number 1st priority".
22+
// If we need to emit adds inside the loop to add up base registers, then
23+
// we need at least one extra temporary register.
24+
unsigned C1NumRegs = C1.NumRegs + (C1.NumBaseAdds != 0);
25+
unsigned C2NumRegs = C2.NumRegs + (C2.NumBaseAdds != 0);
26+
return std::tie(C1.Insns, C1NumRegs, C1.AddRecCost,
27+
C1.NumIVMuls, C1.NumBaseAdds,
28+
C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
29+
std::tie(C2.Insns, C2NumRegs, C2.AddRecCost,
30+
C2.NumIVMuls, C2.NumBaseAdds,
31+
C2.ScaleCost, C2.ImmCost, C2.SetupCost);
32+
}

llvm/lib/Target/Mips/MipsTargetTransformInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ class MipsTTIImpl : public BasicTTIImplBase<MipsTTIImpl> {
3333
TLI(ST->getTargetLowering()) {}
3434

3535
bool hasDivRemOp(Type *DataType, bool IsSigned);
36+
37+
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
38+
const TargetTransformInfo::LSRCost &C2);
3639
};
3740

3841
} // end namespace llvm

llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_atomic.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -188,10 +188,10 @@ define double @atomic_load_unordered_f64(ptr %ptr) {
188188
; MIPS32-NEXT: .cfi_def_cfa_offset 24
189189
; MIPS32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
190190
; MIPS32-NEXT: .cfi_offset 31, -4
191-
; MIPS32-NEXT: ori $5, $zero, 0
192191
; MIPS32-NEXT: ori $1, $zero, 128
193-
; MIPS32-NEXT: jal __atomic_load_8
194192
; MIPS32-NEXT: addu $4, $4, $1
193+
; MIPS32-NEXT: jal __atomic_load_8
194+
; MIPS32-NEXT: ori $5, $zero, 0
195195
; MIPS32-NEXT: mtc1 $2, $f0
196196
; MIPS32-NEXT: mtc1 $3, $f1
197197
; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
@@ -400,10 +400,10 @@ define double @atomic_load_monotonic_f64(ptr %ptr) {
400400
; MIPS32-NEXT: .cfi_def_cfa_offset 24
401401
; MIPS32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
402402
; MIPS32-NEXT: .cfi_offset 31, -4
403-
; MIPS32-NEXT: ori $5, $zero, 0
404403
; MIPS32-NEXT: ori $1, $zero, 128
405-
; MIPS32-NEXT: jal __atomic_load_8
406404
; MIPS32-NEXT: addu $4, $4, $1
405+
; MIPS32-NEXT: jal __atomic_load_8
406+
; MIPS32-NEXT: ori $5, $zero, 0
407407
; MIPS32-NEXT: mtc1 $2, $f0
408408
; MIPS32-NEXT: mtc1 $3, $f1
409409
; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
@@ -626,10 +626,10 @@ define double @atomic_load_acquire_f64(ptr %ptr) {
626626
; MIPS32-NEXT: .cfi_def_cfa_offset 24
627627
; MIPS32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
628628
; MIPS32-NEXT: .cfi_offset 31, -4
629-
; MIPS32-NEXT: ori $5, $zero, 2
630629
; MIPS32-NEXT: ori $1, $zero, 128
631-
; MIPS32-NEXT: jal __atomic_load_8
632630
; MIPS32-NEXT: addu $4, $4, $1
631+
; MIPS32-NEXT: jal __atomic_load_8
632+
; MIPS32-NEXT: ori $5, $zero, 2
633633
; MIPS32-NEXT: mtc1 $2, $f0
634634
; MIPS32-NEXT: mtc1 $3, $f1
635635
; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
@@ -853,10 +853,10 @@ define double @atomic_load_seq_cst_f64(ptr %ptr) {
853853
; MIPS32-NEXT: .cfi_def_cfa_offset 24
854854
; MIPS32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
855855
; MIPS32-NEXT: .cfi_offset 31, -4
856-
; MIPS32-NEXT: ori $5, $zero, 5
857856
; MIPS32-NEXT: ori $1, $zero, 128
858-
; MIPS32-NEXT: jal __atomic_load_8
859857
; MIPS32-NEXT: addu $4, $4, $1
858+
; MIPS32-NEXT: jal __atomic_load_8
859+
; MIPS32-NEXT: ori $5, $zero, 5
860860
; MIPS32-NEXT: mtc1 $2, $f0
861861
; MIPS32-NEXT: mtc1 $3, $f1
862862
; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload

llvm/test/CodeGen/Mips/brdelayslot.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -134,9 +134,12 @@ declare void @foo11()
134134
;
135135
; SUCCBB-LABEL: succbbs_loop1:
136136
; SUCCBB: blez $5, $BB
137-
; SUCCBB-NEXT: addiu
138-
; SUCCBB: bnez ${{[0-9]+}}, $BB
139-
; SUCCBB-NEXT: addiu
137+
; SUCCBB-NEXT: sll
138+
; SUCCBB-NEXT: # %bb
139+
; SUCCBB-NEXT: addu
140+
; SUCCBB: addiu ${{[0-9]+}}, ${{[0-9]+}}, 4
141+
; SUCCBB-NEXT: bne ${{[0-9]+}}, ${{[0-9]+}}, $BB
142+
; SUCCBB-NEXT: nop
140143

141144
define i32 @succbbs_loop1(ptr nocapture %a, i32 %n) {
142145
entry:

0 commit comments

Comments
 (0)