Skip to content

[AArch64][GlobalISel] Fix incorrect handling of fp truncating stores. #127

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 31 additions & 11 deletions llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
Expand Down Expand Up @@ -2701,29 +2702,29 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_ZEXTLOAD:
case TargetOpcode::G_LOAD:
case TargetOpcode::G_STORE: {
GLoadStore &LdSt = cast<GLoadStore>(I);
bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
LLT PtrTy = MRI.getType(LdSt.getPointerReg());

if (PtrTy != LLT::pointer(0, 64)) {
LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
<< ", expected: " << LLT::pointer(0, 64) << '\n');
return false;
}

auto &MemOp = **I.memoperands_begin();
uint64_t MemSizeInBytes = MemOp.getSize();
unsigned MemSizeInBits = MemSizeInBytes * 8;
AtomicOrdering Order = MemOp.getSuccessOrdering();
uint64_t MemSizeInBytes = LdSt.getMemSize();
unsigned MemSizeInBits = LdSt.getMemSizeInBits();
AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();

// Need special instructions for atomics that affect ordering.
if (Order != AtomicOrdering::NotAtomic &&
Order != AtomicOrdering::Unordered &&
Order != AtomicOrdering::Monotonic) {
assert(I.getOpcode() != TargetOpcode::G_ZEXTLOAD);
assert(!isa<GZExtLoad>(LdSt));
if (MemSizeInBytes > 64)
return false;

if (I.getOpcode() == TargetOpcode::G_LOAD) {
if (isa<GLoad>(LdSt)) {
static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH,
AArch64::LDARW, AArch64::LDARX};
I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
Expand All @@ -2737,7 +2738,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}

#ifndef NDEBUG
const Register PtrReg = I.getOperand(1).getReg();
const Register PtrReg = LdSt.getPointerReg();
const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
// Sanity-check the pointer register.
assert(PtrRB.getID() == AArch64::GPRRegBankID &&
Expand All @@ -2746,13 +2747,31 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
"Load/Store pointer operand isn't a pointer");
#endif

const Register ValReg = I.getOperand(0).getReg();
const Register ValReg = LdSt.getReg(0);
const LLT ValTy = MRI.getType(ValReg);
const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);

// The code below doesn't support truncating stores, so we need to split it
// again.
if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
unsigned SubReg;
LLT MemTy = LdSt.getMMO().getMemoryType();
auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
if (!getSubRegForClass(RC, TRI, SubReg))
return false;

// Generate a subreg copy.
auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
.addReg(ValReg, 0, SubReg)
.getReg(0);
RBI.constrainGenericRegister(Copy, *RC, MRI);
LdSt.getOperand(0).setReg(Copy);
}

// Helper lambda for partially selecting I. Either returns the original
// instruction with an updated opcode, or a new instruction.
auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
bool IsStore = I.getOpcode() == TargetOpcode::G_STORE;
bool IsStore = isa<GStore>(I);
const unsigned NewOpc =
selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
if (NewOpc == I.getOpcode())
Expand All @@ -2769,7 +2788,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {

// Folded something. Create a new instruction and return it.
auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg);
Register CurValReg = I.getOperand(0).getReg();
IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
NewInst.cloneMemRefs(I);
for (auto &Fn : *AddrModeFns)
Fn(NewInst);
Expand Down
116 changes: 116 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/select-store-truncating-float.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
--- |
define void @truncating_f32(double %x) {
%alloca = alloca i32, align 4
%bitcast = bitcast double %x to i64
%trunc = trunc i64 %bitcast to i32
store i32 %trunc, i32* %alloca, align 4
ret void
}

define void @truncating_f16(double %x) {
%alloca = alloca i16, align 2
%bitcast = bitcast double %x to i64
%trunc = trunc i64 %bitcast to i16
store i16 %trunc, i16* %alloca, align 2
ret void
}

define void @truncating_f8(double %x) {
%alloca = alloca i8, align 1
%bitcast = bitcast double %x to i64
%trunc = trunc i64 %bitcast to i8
store i8 %trunc, i8* %alloca, align 1
ret void
}

...
---
name: truncating_f32
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
liveins:
- { reg: '$d0' }
frameInfo:
maxAlignment: 4
stack:
- { id: 0, name: alloca, size: 4, alignment: 4 }
machineFunctionInfo: {}
body: |
bb.1 (%ir-block.0):
liveins: $d0

; CHECK-LABEL: name: truncating_f32
; CHECK: liveins: $d0
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]].ssub
; CHECK: STRSui [[COPY1]], %stack.0.alloca, 0 :: (store (s32) into %ir.alloca)
; CHECK: RET_ReallyLR
%0:fpr(s64) = COPY $d0
%1:gpr(p0) = G_FRAME_INDEX %stack.0.alloca
G_STORE %0(s64), %1(p0) :: (store (s32) into %ir.alloca)
RET_ReallyLR

...
---
name: truncating_f16
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
liveins:
- { reg: '$d0' }
frameInfo:
maxAlignment: 2
stack:
- { id: 0, name: alloca, size: 2, alignment: 2 }
machineFunctionInfo: {}
body: |
bb.1 (%ir-block.0):
liveins: $d0

; CHECK-LABEL: name: truncating_f16
; CHECK: liveins: $d0
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
; CHECK: [[COPY1:%[0-9]+]]:fpr16 = COPY [[COPY]].hsub
; CHECK: STRHui [[COPY1]], %stack.0.alloca, 0 :: (store (s16) into %ir.alloca)
; CHECK: RET_ReallyLR
%0:fpr(s64) = COPY $d0
%1:gpr(p0) = G_FRAME_INDEX %stack.0.alloca
G_STORE %0(s64), %1(p0) :: (store (s16) into %ir.alloca)
RET_ReallyLR

...
---
name: truncating_f8
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
liveins:
- { reg: '$d0' }
frameInfo:
maxAlignment: 1
stack:
- { id: 0, name: alloca, size: 1, alignment: 1 }
machineFunctionInfo: {}
body: |
bb.1 (%ir-block.0):
liveins: $d0

; CHECK-LABEL: name: truncating_f8
; CHECK: liveins: $d0
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
; CHECK: [[COPY1:%[0-9]+]]:fpr16 = COPY [[COPY]].hsub
; CHECK: [[COPY2:%[0-9]+]]:fpr8 = COPY [[COPY1]]
; CHECK: STRBui [[COPY2]], %stack.0.alloca, 0 :: (store (s8) into %ir.alloca)
; CHECK: RET_ReallyLR
%0:fpr(s64) = COPY $d0
%1:gpr(p0) = G_FRAME_INDEX %stack.0.alloca
G_STORE %0(s64), %1(p0) :: (store (s8) into %ir.alloca)
RET_ReallyLR

...
Original file line number Diff line number Diff line change
Expand Up @@ -278,13 +278,13 @@ body: |
; CHECK-LABEL: name: test_rule96_id2146_at_idx8070
; CHECK: liveins: $x0
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load (s1))
; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load (s8))
; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[LDRBui]]
; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 0, 0
; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 0, 7
; CHECK: $noreg = PATCHABLE_RET [[UBFMWri]]
%2:gpr(p0) = COPY $x0
%0:fpr(s1) = G_LOAD %2(p0) :: (load (s1))
%1:gpr(s32) = G_ZEXT %0(s1)
%0:fpr(s8) = G_LOAD %2(p0) :: (load (s8))
%1:gpr(s32) = G_ZEXT %0(s8)
$noreg = PATCHABLE_RET %1(s32)

...
Expand Down