Skip to content

Cherry-pick different fix for AArch64 truncating FP stores #128

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 45 additions & 38 deletions llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ class AArch64InstructionSelector : public InstructionSelector {
bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);

/// Eliminate same-sized cross-bank copies into stores before selectImpl().
bool contractCrossBankCopyIntoStore(MachineInstr &I,
bool contractCrossBankCopyIntoStore(GStore &I,
MachineRegisterInfo &MRI);

bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
Expand Down Expand Up @@ -1939,8 +1939,9 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
return true;
}
case TargetOpcode::G_STORE: {
bool Changed = contractCrossBankCopyIntoStore(I, MRI);
MachineOperand &SrcOp = I.getOperand(0);
auto &StoreMI = cast<GStore>(I);
bool Changed = contractCrossBankCopyIntoStore(StoreMI, MRI);
MachineOperand &SrcOp = StoreMI.getOperand(0);
if (MRI.getType(SrcOp.getReg()).isPointer()) {
// Allow matching with imported patterns for stores of pointers. Unlike
// G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
Expand All @@ -1951,6 +1952,28 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
Changed = true;
}
#if 0
// Now look for truncating stores to the FPR bank. We don't support these,
// but since truncating store formation happens before RBS, we can only
// split them up again here. We don't want to assign truncstores to GPR only
// since that would have a perf impact due to extra moves.
LLT SrcTy = MRI.getType(SrcReg);
if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
if (SrcTy.isScalar() &&
SrcTy.getSizeInBits() > StoreMI.getMemSizeInBits()) {
// Generate an explicit truncate and make this into a non-truncating
// store.
auto Trunc =
MIB.buildTrunc(LLT::scalar(StoreMI.getMemSizeInBits()), SrcReg);
MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
if (!select(*Trunc)) {
return false;
}
SrcOp.setReg(Trunc.getReg(0));
return true;
}
}
#endif
return Changed;
}
case TargetOpcode::G_PTR_ADD:
Expand Down Expand Up @@ -2086,8 +2109,7 @@ bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
}

bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
MachineInstr &I, MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
GStore &StoreMI, MachineRegisterInfo &MRI) {
// If we're storing a scalar, it doesn't matter what register bank that
// scalar is on. All that matters is the size.
//
Expand All @@ -2102,11 +2124,11 @@ bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
// G_STORE %x:gpr(s32)
//
// And then continue the selection process normally.
Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
Register DefDstReg = getSrcRegIgnoringCopies(StoreMI.getValueReg(), MRI);
if (!DefDstReg.isValid())
return false;
LLT DefDstTy = MRI.getType(DefDstReg);
Register StoreSrcReg = I.getOperand(0).getReg();
Register StoreSrcReg = StoreMI.getValueReg();
LLT StoreSrcTy = MRI.getType(StoreSrcReg);

// If we get something strange like a physical register, then we shouldn't
Expand All @@ -2118,12 +2140,16 @@ bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
return false;

// Is this store a truncating one?
if (StoreSrcTy.getSizeInBits() != StoreMI.getMemSizeInBits())
return false;

if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
RBI.getRegBank(DefDstReg, MRI, TRI))
return false;

// We have a cross-bank copy, which is entering a store. Let's fold it.
I.getOperand(0).setReg(DefDstReg);
StoreMI.getOperand(0).setReg(DefDstReg);
return true;
}

Expand Down Expand Up @@ -2702,29 +2728,29 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_ZEXTLOAD:
case TargetOpcode::G_LOAD:
case TargetOpcode::G_STORE: {
GLoadStore &LdSt = cast<GLoadStore>(I);
bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
LLT PtrTy = MRI.getType(LdSt.getPointerReg());
LLT PtrTy = MRI.getType(I.getOperand(1).getReg());

if (PtrTy != LLT::pointer(0, 64)) {
LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
<< ", expected: " << LLT::pointer(0, 64) << '\n');
return false;
}

uint64_t MemSizeInBytes = LdSt.getMemSize();
unsigned MemSizeInBits = LdSt.getMemSizeInBits();
AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
auto &MemOp = **I.memoperands_begin();
uint64_t MemSizeInBytes = MemOp.getSize();
unsigned MemSizeInBits = MemSizeInBytes * 8;
AtomicOrdering Order = MemOp.getSuccessOrdering();

// Need special instructions for atomics that affect ordering.
if (Order != AtomicOrdering::NotAtomic &&
Order != AtomicOrdering::Unordered &&
Order != AtomicOrdering::Monotonic) {
assert(!isa<GZExtLoad>(LdSt));
assert(I.getOpcode() != TargetOpcode::G_ZEXTLOAD);
if (MemSizeInBytes > 64)
return false;

if (isa<GLoad>(LdSt)) {
if (I.getOpcode() == TargetOpcode::G_LOAD) {
static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH,
AArch64::LDARW, AArch64::LDARX};
I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
Expand All @@ -2738,7 +2764,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}

#ifndef NDEBUG
const Register PtrReg = LdSt.getPointerReg();
const Register PtrReg = I.getOperand(1).getReg();
const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
// Sanity-check the pointer register.
assert(PtrRB.getID() == AArch64::GPRRegBankID &&
Expand All @@ -2747,31 +2773,13 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
"Load/Store pointer operand isn't a pointer");
#endif

const Register ValReg = LdSt.getReg(0);
const LLT ValTy = MRI.getType(ValReg);
const Register ValReg = I.getOperand(0).getReg();
const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);

// The code below doesn't support truncating stores, so we need to split it
// again.
if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
unsigned SubReg;
LLT MemTy = LdSt.getMMO().getMemoryType();
auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
if (!getSubRegForClass(RC, TRI, SubReg))
return false;

// Generate a subreg copy.
auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
.addReg(ValReg, 0, SubReg)
.getReg(0);
RBI.constrainGenericRegister(Copy, *RC, MRI);
LdSt.getOperand(0).setReg(Copy);
}

// Helper lambda for partially selecting I. Either returns the original
// instruction with an updated opcode, or a new instruction.
auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
bool IsStore = isa<GStore>(I);
bool IsStore = I.getOpcode() == TargetOpcode::G_STORE;
const unsigned NewOpc =
selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
if (NewOpc == I.getOpcode())
Expand All @@ -2788,8 +2796,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {

// Folded something. Create a new instruction and return it.
auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
Register CurValReg = I.getOperand(0).getReg();
IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg);
NewInst.cloneMemRefs(I);
for (auto &Fn : *AddrModeFns)
Fn(NewInst);
Expand Down
69 changes: 40 additions & 29 deletions llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir
Original file line number Diff line number Diff line change
@@ -1,15 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s

--- |
define void @contract_s64_gpr(i64* %addr) { ret void }
define void @contract_s32_gpr(i32* %addr) { ret void }
define void @contract_s64_fpr(i64* %addr) { ret void }
define void @contract_s32_fpr(i32* %addr) { ret void }
define void @contract_s16_fpr(i16* %addr) { ret void }
define void @contract_g_unmerge_values_first(i128* %addr) { ret void }
define void @contract_g_unmerge_values_second(i128* %addr) { ret void }
...
---
name: contract_s64_gpr
legalized: true
Expand All @@ -20,11 +11,11 @@ body: |
; CHECK-LABEL: name: contract_s64_gpr
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
; CHECK: STRXui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr)
; CHECK: STRXui [[COPY1]], [[COPY]], 0 :: (store (s64))
%0:gpr(p0) = COPY $x0
%1:gpr(s64) = COPY $x1
%2:fpr(s64) = COPY %1
G_STORE %2:fpr(s64), %0 :: (store (s64) into %ir.addr)
G_STORE %2:fpr(s64), %0 :: (store (s64))
...
---
name: contract_s32_gpr
Expand All @@ -36,11 +27,11 @@ body: |
; CHECK-LABEL: name: contract_s32_gpr
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
; CHECK: STRWui [[COPY1]], [[COPY]], 0 :: (store (s32) into %ir.addr)
; CHECK: STRWui [[COPY1]], [[COPY]], 0 :: (store (s32))
%0:gpr(p0) = COPY $x0
%1:gpr(s32) = COPY $w1
%2:fpr(s32) = COPY %1
G_STORE %2:fpr(s32), %0 :: (store (s32) into %ir.addr)
G_STORE %2:fpr(s32), %0 :: (store (s32))
...
---
name: contract_s64_fpr
Expand All @@ -52,11 +43,11 @@ body: |
; CHECK-LABEL: name: contract_s64_fpr
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr)
; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64))
%0:gpr(p0) = COPY $x0
%1:fpr(s64) = COPY $d1
%2:gpr(s64) = COPY %1
G_STORE %2:gpr(s64), %0 :: (store (s64) into %ir.addr)
G_STORE %2:gpr(s64), %0 :: (store (s64))
...
---
name: contract_s32_fpr
Expand All @@ -68,11 +59,11 @@ body: |
; CHECK-LABEL: name: contract_s32_fpr
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
; CHECK: STRSui [[COPY1]], [[COPY]], 0 :: (store (s32) into %ir.addr)
; CHECK: STRSui [[COPY1]], [[COPY]], 0 :: (store (s32))
%0:gpr(p0) = COPY $x0
%1:fpr(s32) = COPY $s1
%2:gpr(s32) = COPY %1
G_STORE %2:gpr(s32), %0 :: (store (s32) into %ir.addr)
G_STORE %2:gpr(s32), %0 :: (store (s32))
...
---
name: contract_s16_fpr
Expand All @@ -84,11 +75,11 @@ body: |
; CHECK-LABEL: name: contract_s16_fpr
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:fpr16 = COPY $h1
; CHECK: STRHui [[COPY1]], [[COPY]], 0 :: (store (s16) into %ir.addr)
; CHECK: STRHui [[COPY1]], [[COPY]], 0 :: (store (s16))
%0:gpr(p0) = COPY $x0
%1:fpr(s16) = COPY $h1
%2:gpr(s16) = COPY %1
G_STORE %2:gpr(s16), %0 :: (store (s16) into %ir.addr)
G_STORE %2:gpr(s16), %0 :: (store (s16))
...
---
name: contract_g_unmerge_values_first
Expand All @@ -99,15 +90,16 @@ body: |
liveins: $x0, $x1
; CHECK-LABEL: name: contract_g_unmerge_values_first
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[LOAD:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[LOAD]].dsub
; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr)
; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (dereferenceable load (<2 x s64>))
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[LDRQui]].dsub
; CHECK: [[CPYi64_:%[0-9]+]]:fpr64 = CPYi64 [[LDRQui]], 1
; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64))
%0:gpr(p0) = COPY $x0
%1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load (<2 x s64>) from %ir.addr)
%1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load (<2 x s64>))
%2:fpr(s64), %3:fpr(s64) = G_UNMERGE_VALUES %1:fpr(<2 x s64>)
%4:gpr(s64) = COPY %2
%5:gpr(s64) = COPY %3
G_STORE %4:gpr(s64), %0 :: (store (s64) into %ir.addr)
G_STORE %4:gpr(s64), %0 :: (store (s64))
...
---
name: contract_g_unmerge_values_second
Expand All @@ -118,12 +110,31 @@ body: |
liveins: $x0, $x1
; CHECK-LABEL: name: contract_g_unmerge_values_second
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[LOAD:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = CPYi64 [[LOAD]], 1
; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr)
; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (dereferenceable load (<2 x s64>))
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[LDRQui]].dsub
; CHECK: [[CPYi64_:%[0-9]+]]:fpr64 = CPYi64 [[LDRQui]], 1
; CHECK: STRDui [[CPYi64_]], [[COPY]], 0 :: (store (s64))
%0:gpr(p0) = COPY $x0
%1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load (<2 x s64>) from %ir.addr)
%1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load (<2 x s64>))
%2:fpr(s64), %3:fpr(s64) = G_UNMERGE_VALUES %1:fpr(<2 x s64>)
%4:gpr(s64) = COPY %2
%5:gpr(s64) = COPY %3
G_STORE %5:gpr(s64), %0 :: (store (s64) into %ir.addr)
G_STORE %5:gpr(s64), %0 :: (store (s64))
...
---
name: contract_s16_truncstore
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $x0, $s1
; CHECK-LABEL: name: contract_s16_truncstore
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
; CHECK: STRHHui [[COPY2]], [[COPY]], 0 :: (store (s16))
%0:gpr(p0) = COPY $x0
%1:fpr(s32) = COPY $s1
%2:gpr(s32) = COPY %1
G_STORE %2:gpr(s32), %0 :: (store (s16))
...
Loading