Skip to content

Commit 910e562

Browse files
ronliebeasyonaadit
authored andcommitted
Revert "AMDGPU: Handle folding frame indexes into s_add_i32 (llvm#101694)"
This reverts commit 8039886. breaks hip_on_rocclr build Change-Id: I37907f36632c22cecbd1d12efe9758e4f23b7ebf
1 parent 6567062 commit 910e562

17 files changed

+1321
-1140
lines changed

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 0 additions & 293 deletions
Original file line numberDiff line numberDiff line change
@@ -2445,299 +2445,6 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
24452445
MI->eraseFromParent();
24462446
return true;
24472447
}
2448-
case AMDGPU::V_ADD_U32_e32:
2449-
case AMDGPU::V_ADD_U32_e64:
2450-
case AMDGPU::V_ADD_CO_U32_e32:
2451-
case AMDGPU::V_ADD_CO_U32_e64: {
2452-
// TODO: Handle sub, and, or.
2453-
unsigned NumDefs = MI->getNumExplicitDefs();
2454-
unsigned Src0Idx = NumDefs;
2455-
2456-
bool HasClamp = false;
2457-
MachineOperand *VCCOp = nullptr;
2458-
2459-
switch (MI->getOpcode()) {
2460-
case AMDGPU::V_ADD_U32_e32:
2461-
break;
2462-
case AMDGPU::V_ADD_U32_e64:
2463-
HasClamp = MI->getOperand(3).getImm();
2464-
break;
2465-
case AMDGPU::V_ADD_CO_U32_e32:
2466-
VCCOp = &MI->getOperand(3);
2467-
break;
2468-
case AMDGPU::V_ADD_CO_U32_e64:
2469-
VCCOp = &MI->getOperand(1);
2470-
HasClamp = MI->getOperand(4).getImm();
2471-
break;
2472-
default:
2473-
break;
2474-
}
2475-
bool DeadVCC = !VCCOp || VCCOp->isDead();
2476-
MachineOperand &DstOp = MI->getOperand(0);
2477-
Register DstReg = DstOp.getReg();
2478-
2479-
unsigned OtherOpIdx =
2480-
FIOperandNum == Src0Idx ? FIOperandNum + 1 : Src0Idx;
2481-
MachineOperand *OtherOp = &MI->getOperand(OtherOpIdx);
2482-
2483-
unsigned Src1Idx = Src0Idx + 1;
2484-
Register MaterializedReg = FrameReg;
2485-
Register ScavengedVGPR;
2486-
2487-
if (FrameReg && !ST.enableFlatScratch()) {
2488-
// We should just do an in-place update of the result register. However,
2489-
// the value there may also be used by the add, in which case we need a
2490-
// temporary register.
2491-
//
2492-
// FIXME: The scavenger is not finding the result register in the
2493-
// common case where the add does not read the register.
2494-
2495-
ScavengedVGPR = RS->scavengeRegisterBackwards(
2496-
AMDGPU::VGPR_32RegClass, MI, /*RestoreAfter=*/false, /*SPAdj=*/0);
2497-
2498-
// TODO: If we have a free SGPR, it's sometimes better to use a scalar
2499-
// shift.
2500-
BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64))
2501-
.addDef(ScavengedVGPR, RegState::Renamable)
2502-
.addImm(ST.getWavefrontSizeLog2())
2503-
.addReg(FrameReg);
2504-
MaterializedReg = ScavengedVGPR;
2505-
}
2506-
2507-
int64_t Offset = FrameInfo.getObjectOffset(Index);
2508-
// For the non-immediate case, we could fall through to the default
2509-
// handling, but we do an in-place update of the result register here to
2510-
// avoid scavenging another register.
2511-
if (OtherOp->isImm()) {
2512-
OtherOp->setImm(OtherOp->getImm() + Offset);
2513-
Offset = 0;
2514-
}
2515-
2516-
if ((!OtherOp->isImm() || OtherOp->getImm() != 0) && MaterializedReg) {
2517-
if (ST.enableFlatScratch() &&
2518-
!TII->isOperandLegal(*MI, Src1Idx, OtherOp)) {
2519-
// We didn't need the shift above, so we have an SGPR for the frame
2520-
// register, but may have a VGPR only operand.
2521-
//
2522-
// TODO: On gfx10+, we can easily change the opcode to the e64 version
2523-
// and use the higher constant bus restriction to avoid this copy.
2524-
2525-
if (!ScavengedVGPR) {
2526-
ScavengedVGPR = RS->scavengeRegisterBackwards(
2527-
AMDGPU::VGPR_32RegClass, MI, /*RestoreAfter=*/false,
2528-
/*SPAdj=*/0);
2529-
}
2530-
2531-
assert(ScavengedVGPR != DstReg);
2532-
2533-
BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), ScavengedVGPR)
2534-
.addReg(MaterializedReg,
2535-
MaterializedReg != FrameReg ? RegState::Kill : 0);
2536-
MaterializedReg = ScavengedVGPR;
2537-
}
2538-
2539-
// TODO: In the flat scratch case, if this is an add of an SGPR, and SCC
2540-
// is not live, we could use a scalar add + vector add instead of 2
2541-
// vector adds.
2542-
auto AddI32 = BuildMI(*MBB, *MI, DL, TII->get(MI->getOpcode()))
2543-
.addDef(DstReg, RegState::Renamable);
2544-
if (NumDefs == 2)
2545-
AddI32.add(MI->getOperand(1));
2546-
2547-
unsigned MaterializedRegFlags =
2548-
MaterializedReg != FrameReg ? RegState::Kill : 0;
2549-
2550-
if (isVGPRClass(getPhysRegBaseClass(MaterializedReg))) {
2551-
// If we know we have a VGPR already, it's more likely the other
2552-
// operand is a legal vsrc0.
2553-
AddI32
2554-
.add(*OtherOp)
2555-
.addReg(MaterializedReg, MaterializedRegFlags);
2556-
} else {
2557-
// Commute operands to avoid violating VOP2 restrictions. This will
2558-
// typically happen when using scratch.
2559-
AddI32
2560-
.addReg(MaterializedReg, MaterializedRegFlags)
2561-
.add(*OtherOp);
2562-
}
2563-
2564-
if (MI->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 ||
2565-
MI->getOpcode() == AMDGPU::V_ADD_U32_e64)
2566-
AddI32.addImm(0); // clamp
2567-
2568-
if (MI->getOpcode() == AMDGPU::V_ADD_CO_U32_e32)
2569-
AddI32.setOperandDead(3); // Dead vcc
2570-
2571-
MaterializedReg = DstReg;
2572-
2573-
OtherOp->ChangeToRegister(MaterializedReg, false);
2574-
OtherOp->setIsKill(true);
2575-
FIOp->ChangeToImmediate(Offset);
2576-
Offset = 0;
2577-
} else if (Offset != 0) {
2578-
assert(!MaterializedReg);
2579-
FIOp->ChangeToImmediate(Offset);
2580-
Offset = 0;
2581-
} else {
2582-
if (DeadVCC && !HasClamp) {
2583-
assert(Offset == 0);
2584-
2585-
// TODO: Losing kills and implicit operands. Just mutate to copy and
2586-
// let lowerCopy deal with it?
2587-
if (OtherOp->isReg() && OtherOp->getReg() == DstReg) {
2588-
// Folded to an identity copy.
2589-
MI->eraseFromParent();
2590-
return true;
2591-
}
2592-
2593-
// The immediate value should be in OtherOp
2594-
MI->setDesc(TII->get(AMDGPU::V_MOV_B32_e32));
2595-
MI->removeOperand(FIOperandNum);
2596-
2597-
unsigned NumOps = MI->getNumOperands();
2598-
for (unsigned I = NumOps - 2; I >= 2; --I)
2599-
MI->removeOperand(I);
2600-
2601-
if (NumDefs == 2)
2602-
MI->removeOperand(1);
2603-
2604-
// The code below can't deal with a mov.
2605-
return true;
2606-
}
2607-
2608-
// This folded to a constant, but we have to keep the add around for
2609-
// pointless implicit defs or clamp modifier.
2610-
FIOp->ChangeToImmediate(0);
2611-
}
2612-
2613-
// Try to improve legality by commuting.
2614-
if (!TII->isOperandLegal(*MI, Src1Idx) && TII->commuteInstruction(*MI)) {
2615-
std::swap(FIOp, OtherOp);
2616-
std::swap(FIOperandNum, OtherOpIdx);
2617-
}
2618-
2619-
for (unsigned SrcIdx : {Src1Idx, Src0Idx}) {
2620-
// Depending on operand constraints we may need to insert another copy.
2621-
if (!TII->isOperandLegal(*MI, SrcIdx)) {
2622-
// If commuting didn't make the operands legal, we need to materialize
2623-
// in a register.
2624-
// TODO: Can use SGPR on gfx10+ in some cases.
2625-
if (!ScavengedVGPR) {
2626-
ScavengedVGPR = RS->scavengeRegisterBackwards(
2627-
AMDGPU::VGPR_32RegClass, MI, /*RestoreAfter=*/false,
2628-
/*SPAdj=*/0);
2629-
}
2630-
2631-
assert(ScavengedVGPR != DstReg);
2632-
2633-
MachineOperand &Src = MI->getOperand(SrcIdx);
2634-
BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), ScavengedVGPR)
2635-
.add(Src);
2636-
2637-
Src.ChangeToRegister(ScavengedVGPR, false);
2638-
Src.setIsKill(true);
2639-
}
2640-
}
2641-
2642-
// Fold out add of 0 case that can appear in kernels.
2643-
if (FIOp->isImm() && FIOp->getImm() == 0 && DeadVCC && !HasClamp) {
2644-
if (OtherOp->isReg() && OtherOp->getReg() != DstReg) {
2645-
BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::COPY), DstReg).add(*OtherOp);
2646-
}
2647-
2648-
MI->eraseFromParent();
2649-
}
2650-
2651-
return true;
2652-
}
2653-
case AMDGPU::S_ADD_I32: {
2654-
// TODO: Handle s_or_b32, s_and_b32.
2655-
unsigned OtherOpIdx = FIOperandNum == 1 ? 2 : 1;
2656-
MachineOperand &OtherOp = MI->getOperand(OtherOpIdx);
2657-
2658-
assert(FrameReg || MFI->isBottomOfStack());
2659-
2660-
MachineOperand &DstOp = MI->getOperand(0);
2661-
const DebugLoc &DL = MI->getDebugLoc();
2662-
Register MaterializedReg = FrameReg;
2663-
2664-
// Defend against live scc, which should never happen in practice.
2665-
bool DeadSCC = MI->getOperand(3).isDead();
2666-
2667-
Register TmpReg;
2668-
2669-
if (FrameReg && !ST.enableFlatScratch()) {
2670-
// FIXME: In the common case where the add does not also read its result
2671-
// (i.e. this isn't a reg += fi), it's not finding the dest reg as
2672-
// available.
2673-
TmpReg = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass, MI,
2674-
false, 0);
2675-
BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_LSHR_B32))
2676-
.addDef(TmpReg, RegState::Renamable)
2677-
.addReg(FrameReg)
2678-
.addImm(ST.getWavefrontSizeLog2())
2679-
.setOperandDead(3); // Set SCC dead
2680-
MaterializedReg = TmpReg;
2681-
}
2682-
2683-
int64_t Offset = FrameInfo.getObjectOffset(Index);
2684-
2685-
// For the non-immediate case, we could fall through to the default
2686-
// handling, but we do an in-place update of the result register here to
2687-
// avoid scavenging another register.
2688-
if (OtherOp.isImm()) {
2689-
OtherOp.setImm(OtherOp.getImm() + Offset);
2690-
Offset = 0;
2691-
2692-
if (MaterializedReg)
2693-
FIOp->ChangeToRegister(MaterializedReg, false);
2694-
else
2695-
FIOp->ChangeToImmediate(0);
2696-
} else if (MaterializedReg) {
2697-
// If we can't fold the other operand, do another increment.
2698-
Register DstReg = DstOp.getReg();
2699-
2700-
if (!TmpReg && MaterializedReg == FrameReg) {
2701-
TmpReg = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
2702-
MI, false, 0);
2703-
DstReg = TmpReg;
2704-
}
2705-
2706-
auto AddI32 = BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_ADD_I32))
2707-
.addDef(DstReg, RegState::Renamable)
2708-
.addReg(MaterializedReg, RegState::Kill)
2709-
.add(OtherOp);
2710-
if (DeadSCC)
2711-
AddI32.setOperandDead(3);
2712-
2713-
MaterializedReg = DstReg;
2714-
2715-
OtherOp.ChangeToRegister(MaterializedReg, false);
2716-
OtherOp.setIsKill(true);
2717-
OtherOp.setIsRenamable(true);
2718-
FIOp->ChangeToImmediate(Offset);
2719-
} else {
2720-
// If we don't have any other offset to apply, we can just directly
2721-
// interpret the frame index as the offset.
2722-
FIOp->ChangeToImmediate(Offset);
2723-
}
2724-
2725-
if (DeadSCC && OtherOp.isImm() && OtherOp.getImm() == 0) {
2726-
assert(Offset == 0);
2727-
MI->removeOperand(3);
2728-
MI->removeOperand(OtherOpIdx);
2729-
MI->setDesc(TII->get(FIOp->isReg() ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
2730-
} else if (DeadSCC && FIOp->isImm() && FIOp->getImm() == 0) {
2731-
assert(Offset == 0);
2732-
MI->removeOperand(3);
2733-
MI->removeOperand(FIOperandNum);
2734-
MI->setDesc(
2735-
TII->get(OtherOp.isReg() ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
2736-
}
2737-
2738-
assert(!FIOp->isFI());
2739-
return true;
2740-
}
27412448
default: {
27422449
// Other access to frame index
27432450
const DebugLoc &DL = MI->getDebugLoc();

0 commit comments

Comments
 (0)