Skip to content

Commit 04fb9b7

Browse files
committed
[AArch64][GlobalISel] Fix incorrect handling of fp truncating stores.
When the tablegen patterns fail to select a truncating scalar FPR store, our manual selection code also failed to handle it silently, trying to generate an invalid copy. Fix this by adding support in the manual code to generate a proper subreg copy before selecting a non-truncating store.
1 parent f653bee commit 04fb9b7

File tree

3 files changed

+149
-15
lines changed

3 files changed

+149
-15
lines changed

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

+30-11
Original file line numberDiff line numberDiff line change
@@ -2723,29 +2723,29 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
27232723
case TargetOpcode::G_ZEXTLOAD:
27242724
case TargetOpcode::G_LOAD:
27252725
case TargetOpcode::G_STORE: {
2726+
GLoadStore &LdSt = cast<GLoadStore>(I);
27262727
bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2727-
LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
2728+
LLT PtrTy = MRI.getType(LdSt.getPointerReg());
27282729

27292730
if (PtrTy != LLT::pointer(0, 64)) {
27302731
LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
27312732
<< ", expected: " << LLT::pointer(0, 64) << '\n');
27322733
return false;
27332734
}
27342735

2735-
auto &MemOp = **I.memoperands_begin();
2736-
uint64_t MemSizeInBytes = MemOp.getSize();
2737-
unsigned MemSizeInBits = MemSizeInBytes * 8;
2738-
AtomicOrdering Order = MemOp.getSuccessOrdering();
2736+
uint64_t MemSizeInBytes = LdSt.getMemSize();
2737+
unsigned MemSizeInBits = LdSt.getMemSizeInBits();
2738+
AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
27392739

27402740
// Need special instructions for atomics that affect ordering.
27412741
if (Order != AtomicOrdering::NotAtomic &&
27422742
Order != AtomicOrdering::Unordered &&
27432743
Order != AtomicOrdering::Monotonic) {
2744-
assert(I.getOpcode() != TargetOpcode::G_ZEXTLOAD);
2744+
assert(!isa<GZExtLoad>(LdSt));
27452745
if (MemSizeInBytes > 64)
27462746
return false;
27472747

2748-
if (I.getOpcode() == TargetOpcode::G_LOAD) {
2748+
if (isa<GLoad>(LdSt)) {
27492749
static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH,
27502750
AArch64::LDARW, AArch64::LDARX};
27512751
I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
@@ -2759,7 +2759,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
27592759
}
27602760

27612761
#ifndef NDEBUG
2762-
const Register PtrReg = I.getOperand(1).getReg();
2762+
const Register PtrReg = LdSt.getPointerReg();
27632763
const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
27642764
// Sanity-check the pointer register.
27652765
assert(PtrRB.getID() == AArch64::GPRRegBankID &&
@@ -2768,13 +2768,31 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
27682768
"Load/Store pointer operand isn't a pointer");
27692769
#endif
27702770

2771-
const Register ValReg = I.getOperand(0).getReg();
2771+
const Register ValReg = LdSt.getReg(0);
2772+
const LLT ValTy = MRI.getType(ValReg);
27722773
const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
27732774

2775+
// The code below doesn't support truncating stores, so we need to split it
2776+
// again.
2777+
if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2778+
unsigned SubReg;
2779+
LLT MemTy = LdSt.getMMO().getMemoryType();
2780+
auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
2781+
if (!getSubRegForClass(RC, TRI, SubReg))
2782+
return false;
2783+
2784+
// Generate a subreg copy.
2785+
auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2786+
.addReg(ValReg, 0, SubReg)
2787+
.getReg(0);
2788+
RBI.constrainGenericRegister(Copy, *RC, MRI);
2789+
LdSt.getOperand(0).setReg(Copy);
2790+
}
2791+
27742792
// Helper lambda for partially selecting I. Either returns the original
27752793
// instruction with an updated opcode, or a new instruction.
27762794
auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2777-
bool IsStore = I.getOpcode() == TargetOpcode::G_STORE;
2795+
bool IsStore = isa<GStore>(I);
27782796
const unsigned NewOpc =
27792797
selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
27802798
if (NewOpc == I.getOpcode())
@@ -2791,7 +2809,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
27912809

27922810
// Folded something. Create a new instruction and return it.
27932811
auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2794-
IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg);
2812+
Register CurValReg = I.getOperand(0).getReg();
2813+
IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
27952814
NewInst.cloneMemRefs(I);
27962815
for (auto &Fn : *AddrModeFns)
27972816
Fn(NewInst);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
3+
--- |
4+
define void @truncating_f32(double %x) {
5+
%alloca = alloca i32, align 4
6+
%bitcast = bitcast double %x to i64
7+
%trunc = trunc i64 %bitcast to i32
8+
store i32 %trunc, i32* %alloca, align 4
9+
ret void
10+
}
11+
12+
define void @truncating_f16(double %x) {
13+
%alloca = alloca i16, align 2
14+
%bitcast = bitcast double %x to i64
15+
%trunc = trunc i64 %bitcast to i16
16+
store i16 %trunc, i16* %alloca, align 2
17+
ret void
18+
}
19+
20+
define void @truncating_f8(double %x) {
21+
%alloca = alloca i8, align 1
22+
%bitcast = bitcast double %x to i64
23+
%trunc = trunc i64 %bitcast to i8
24+
store i8 %trunc, i8* %alloca, align 1
25+
ret void
26+
}
27+
28+
...
29+
---
30+
name: truncating_f32
31+
alignment: 4
32+
legalized: true
33+
regBankSelected: true
34+
tracksRegLiveness: true
35+
liveins:
36+
- { reg: '$d0' }
37+
frameInfo:
38+
maxAlignment: 4
39+
stack:
40+
- { id: 0, name: alloca, size: 4, alignment: 4 }
41+
machineFunctionInfo: {}
42+
body: |
43+
bb.1 (%ir-block.0):
44+
liveins: $d0
45+
46+
; CHECK-LABEL: name: truncating_f32
47+
; CHECK: liveins: $d0
48+
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
49+
; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]].ssub
50+
; CHECK: STRSui [[COPY1]], %stack.0.alloca, 0 :: (store (s32) into %ir.alloca)
51+
; CHECK: RET_ReallyLR
52+
%0:fpr(s64) = COPY $d0
53+
%1:gpr(p0) = G_FRAME_INDEX %stack.0.alloca
54+
G_STORE %0(s64), %1(p0) :: (store (s32) into %ir.alloca)
55+
RET_ReallyLR
56+
57+
...
58+
---
59+
name: truncating_f16
60+
alignment: 4
61+
legalized: true
62+
regBankSelected: true
63+
tracksRegLiveness: true
64+
liveins:
65+
- { reg: '$d0' }
66+
frameInfo:
67+
maxAlignment: 2
68+
stack:
69+
- { id: 0, name: alloca, size: 2, alignment: 2 }
70+
machineFunctionInfo: {}
71+
body: |
72+
bb.1 (%ir-block.0):
73+
liveins: $d0
74+
75+
; CHECK-LABEL: name: truncating_f16
76+
; CHECK: liveins: $d0
77+
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
78+
; CHECK: [[COPY1:%[0-9]+]]:fpr16 = COPY [[COPY]].hsub
79+
; CHECK: STRHui [[COPY1]], %stack.0.alloca, 0 :: (store (s16) into %ir.alloca)
80+
; CHECK: RET_ReallyLR
81+
%0:fpr(s64) = COPY $d0
82+
%1:gpr(p0) = G_FRAME_INDEX %stack.0.alloca
83+
G_STORE %0(s64), %1(p0) :: (store (s16) into %ir.alloca)
84+
RET_ReallyLR
85+
86+
...
87+
---
88+
name: truncating_f8
89+
alignment: 4
90+
legalized: true
91+
regBankSelected: true
92+
tracksRegLiveness: true
93+
liveins:
94+
- { reg: '$d0' }
95+
frameInfo:
96+
maxAlignment: 1
97+
stack:
98+
- { id: 0, name: alloca, size: 1, alignment: 1 }
99+
machineFunctionInfo: {}
100+
body: |
101+
bb.1 (%ir-block.0):
102+
liveins: $d0
103+
104+
; CHECK-LABEL: name: truncating_f8
105+
; CHECK: liveins: $d0
106+
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
107+
; CHECK: [[COPY1:%[0-9]+]]:fpr8 = COPY [[COPY]].bsub
108+
; CHECK: STRBui [[COPY1]], %stack.0.alloca, 0 :: (store (s8) into %ir.alloca)
109+
; CHECK: RET_ReallyLR
110+
%0:fpr(s64) = COPY $d0
111+
%1:gpr(p0) = G_FRAME_INDEX %stack.0.alloca
112+
G_STORE %0(s64), %1(p0) :: (store (s8) into %ir.alloca)
113+
RET_ReallyLR
114+
115+
...

llvm/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir

+4-4
Original file line numberDiff line numberDiff line change
@@ -278,13 +278,13 @@ body: |
278278
; CHECK-LABEL: name: test_rule96_id2146_at_idx8070
279279
; CHECK: liveins: $x0
280280
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
281-
; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load (s1))
281+
; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load (s8))
282282
; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[LDRBui]]
283-
; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 0, 0
283+
; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 0, 7
284284
; CHECK: $noreg = PATCHABLE_RET [[UBFMWri]]
285285
%2:gpr(p0) = COPY $x0
286-
%0:fpr(s1) = G_LOAD %2(p0) :: (load (s1))
287-
%1:gpr(s32) = G_ZEXT %0(s1)
286+
%0:fpr(s8) = G_LOAD %2(p0) :: (load (s8))
287+
%1:gpr(s32) = G_ZEXT %0(s8)
288288
$noreg = PATCHABLE_RET %1(s32)
289289
290290
...

0 commit comments

Comments
 (0)