Skip to content

Commit 67bf3ac

Browse files
committed
[AArch64][GlobalISel] Don't contract cross-bank copies into truncating stores.
Truncating stores with GPR bank sources shouldn't be mutated into using FPR bank sources, since those aren't supported. Ideally this should be a selection failure in the tablegen patterns, but for now avoid generating them.
1 parent 2bd7c30 commit 67bf3ac

File tree

2 files changed

+75
-37
lines changed

2 files changed

+75
-37
lines changed

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

+35-8
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "MCTargetDesc/AArch64AddressingModes.h"
2323
#include "MCTargetDesc/AArch64MCTargetDesc.h"
2424
#include "llvm/ADT/Optional.h"
25+
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
2526
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
2627
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
2728
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
@@ -104,7 +105,7 @@ class AArch64InstructionSelector : public InstructionSelector {
104105
bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
105106

106107
/// Eliminate same-sized cross-bank copies into stores before selectImpl().
107-
bool contractCrossBankCopyIntoStore(MachineInstr &I,
108+
bool contractCrossBankCopyIntoStore(GStore &I,
108109
MachineRegisterInfo &MRI);
109110

110111
bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
@@ -1934,8 +1935,9 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
19341935
return true;
19351936
}
19361937
case TargetOpcode::G_STORE: {
1937-
bool Changed = contractCrossBankCopyIntoStore(I, MRI);
1938-
MachineOperand &SrcOp = I.getOperand(0);
1938+
auto &StoreMI = cast<GStore>(I);
1939+
bool Changed = contractCrossBankCopyIntoStore(StoreMI, MRI);
1940+
MachineOperand &SrcOp = StoreMI.getOperand(0);
19391941
if (MRI.getType(SrcOp.getReg()).isPointer()) {
19401942
// Allow matching with imported patterns for stores of pointers. Unlike
19411943
// G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
@@ -1946,6 +1948,28 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
19461948
RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
19471949
Changed = true;
19481950
}
1951+
#if 0
1952+
// Now look for truncating stores to the FPR bank. We don't support these,
1953+
// but since truncating store formation happens before RBS, we can only
1954+
// split them up again here. We don't want to assign truncstores to GPR only
1955+
// since that would have a perf impact due to extra moves.
1956+
LLT SrcTy = MRI.getType(SrcReg);
1957+
if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
1958+
if (SrcTy.isScalar() &&
1959+
SrcTy.getSizeInBits() > StoreMI.getMemSizeInBits()) {
1960+
// Generate an explicit truncate and make this into a non-truncating
1961+
// store.
1962+
auto Trunc =
1963+
MIB.buildTrunc(LLT::scalar(StoreMI.getMemSizeInBits()), SrcReg);
1964+
MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
1965+
if (!select(*Trunc)) {
1966+
return false;
1967+
}
1968+
SrcOp.setReg(Trunc.getReg(0));
1969+
return true;
1970+
}
1971+
}
1972+
#endif
19491973
return Changed;
19501974
}
19511975
case TargetOpcode::G_PTR_ADD:
@@ -2081,8 +2105,7 @@ bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
20812105
}
20822106

20832107
bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2084-
MachineInstr &I, MachineRegisterInfo &MRI) {
2085-
assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2108+
GStore &StoreMI, MachineRegisterInfo &MRI) {
20862109
// If we're storing a scalar, it doesn't matter what register bank that
20872110
// scalar is on. All that matters is the size.
20882111
//
@@ -2097,11 +2120,11 @@ bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
20972120
// G_STORE %x:gpr(s32)
20982121
//
20992122
// And then continue the selection process normally.
2100-
Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2123+
Register DefDstReg = getSrcRegIgnoringCopies(StoreMI.getValueReg(), MRI);
21012124
if (!DefDstReg.isValid())
21022125
return false;
21032126
LLT DefDstTy = MRI.getType(DefDstReg);
2104-
Register StoreSrcReg = I.getOperand(0).getReg();
2127+
Register StoreSrcReg = StoreMI.getValueReg();
21052128
LLT StoreSrcTy = MRI.getType(StoreSrcReg);
21062129

21072130
// If we get something strange like a physical register, then we shouldn't
@@ -2113,12 +2136,16 @@ bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
21132136
if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
21142137
return false;
21152138

2139+
// Is this store a truncating one?
2140+
if (StoreSrcTy.getSizeInBits() != StoreMI.getMemSizeInBits())
2141+
return false;
2142+
21162143
if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
21172144
RBI.getRegBank(DefDstReg, MRI, TRI))
21182145
return false;
21192146

21202147
// We have a cross-bank copy, which is entering a store. Let's fold it.
2121-
I.getOperand(0).setReg(DefDstReg);
2148+
StoreMI.getOperand(0).setReg(DefDstReg);
21222149
return true;
21232150
}
21242151

Original file line numberDiff line numberDiff line change
@@ -1,15 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
22
# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
33

4-
--- |
5-
define void @contract_s64_gpr(i64* %addr) { ret void }
6-
define void @contract_s32_gpr(i32* %addr) { ret void }
7-
define void @contract_s64_fpr(i64* %addr) { ret void }
8-
define void @contract_s32_fpr(i32* %addr) { ret void }
9-
define void @contract_s16_fpr(i16* %addr) { ret void }
10-
define void @contract_g_unmerge_values_first(i128* %addr) { ret void }
11-
define void @contract_g_unmerge_values_second(i128* %addr) { ret void }
12-
...
134
---
145
name: contract_s64_gpr
156
legalized: true
@@ -20,11 +11,11 @@ body: |
2011
; CHECK-LABEL: name: contract_s64_gpr
2112
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
2213
; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
23-
; CHECK: STRXui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr)
14+
; CHECK: STRXui [[COPY1]], [[COPY]], 0 :: (store (s64))
2415
%0:gpr(p0) = COPY $x0
2516
%1:gpr(s64) = COPY $x1
2617
%2:fpr(s64) = COPY %1
27-
G_STORE %2:fpr(s64), %0 :: (store (s64) into %ir.addr)
18+
G_STORE %2:fpr(s64), %0 :: (store (s64))
2819
...
2920
---
3021
name: contract_s32_gpr
@@ -36,11 +27,11 @@ body: |
3627
; CHECK-LABEL: name: contract_s32_gpr
3728
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
3829
; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
39-
; CHECK: STRWui [[COPY1]], [[COPY]], 0 :: (store (s32) into %ir.addr)
30+
; CHECK: STRWui [[COPY1]], [[COPY]], 0 :: (store (s32))
4031
%0:gpr(p0) = COPY $x0
4132
%1:gpr(s32) = COPY $w1
4233
%2:fpr(s32) = COPY %1
43-
G_STORE %2:fpr(s32), %0 :: (store (s32) into %ir.addr)
34+
G_STORE %2:fpr(s32), %0 :: (store (s32))
4435
...
4536
---
4637
name: contract_s64_fpr
@@ -52,11 +43,11 @@ body: |
5243
; CHECK-LABEL: name: contract_s64_fpr
5344
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
5445
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
55-
; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr)
46+
; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64))
5647
%0:gpr(p0) = COPY $x0
5748
%1:fpr(s64) = COPY $d1
5849
%2:gpr(s64) = COPY %1
59-
G_STORE %2:gpr(s64), %0 :: (store (s64) into %ir.addr)
50+
G_STORE %2:gpr(s64), %0 :: (store (s64))
6051
...
6152
---
6253
name: contract_s32_fpr
@@ -68,11 +59,11 @@ body: |
6859
; CHECK-LABEL: name: contract_s32_fpr
6960
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
7061
; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
71-
; CHECK: STRSui [[COPY1]], [[COPY]], 0 :: (store (s32) into %ir.addr)
62+
; CHECK: STRSui [[COPY1]], [[COPY]], 0 :: (store (s32))
7263
%0:gpr(p0) = COPY $x0
7364
%1:fpr(s32) = COPY $s1
7465
%2:gpr(s32) = COPY %1
75-
G_STORE %2:gpr(s32), %0 :: (store (s32) into %ir.addr)
66+
G_STORE %2:gpr(s32), %0 :: (store (s32))
7667
...
7768
---
7869
name: contract_s16_fpr
@@ -84,11 +75,11 @@ body: |
8475
; CHECK-LABEL: name: contract_s16_fpr
8576
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
8677
; CHECK: [[COPY1:%[0-9]+]]:fpr16 = COPY $h1
87-
; CHECK: STRHui [[COPY1]], [[COPY]], 0 :: (store (s16) into %ir.addr)
78+
; CHECK: STRHui [[COPY1]], [[COPY]], 0 :: (store (s16))
8879
%0:gpr(p0) = COPY $x0
8980
%1:fpr(s16) = COPY $h1
9081
%2:gpr(s16) = COPY %1
91-
G_STORE %2:gpr(s16), %0 :: (store (s16) into %ir.addr)
82+
G_STORE %2:gpr(s16), %0 :: (store (s16))
9283
...
9384
---
9485
name: contract_g_unmerge_values_first
@@ -99,15 +90,16 @@ body: |
9990
liveins: $x0, $x1
10091
; CHECK-LABEL: name: contract_g_unmerge_values_first
10192
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
102-
; CHECK: [[LOAD:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0
103-
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[LOAD]].dsub
104-
; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr)
93+
; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (dereferenceable load (<2 x s64>))
94+
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[LDRQui]].dsub
95+
; CHECK: [[CPYi64_:%[0-9]+]]:fpr64 = CPYi64 [[LDRQui]], 1
96+
; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64))
10597
%0:gpr(p0) = COPY $x0
106-
%1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load (<2 x s64>) from %ir.addr)
98+
%1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load (<2 x s64>))
10799
%2:fpr(s64), %3:fpr(s64) = G_UNMERGE_VALUES %1:fpr(<2 x s64>)
108100
%4:gpr(s64) = COPY %2
109101
%5:gpr(s64) = COPY %3
110-
G_STORE %4:gpr(s64), %0 :: (store (s64) into %ir.addr)
102+
G_STORE %4:gpr(s64), %0 :: (store (s64))
111103
...
112104
---
113105
name: contract_g_unmerge_values_second
@@ -118,12 +110,31 @@ body: |
118110
liveins: $x0, $x1
119111
; CHECK-LABEL: name: contract_g_unmerge_values_second
120112
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
121-
; CHECK: [[LOAD:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0
122-
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = CPYi64 [[LOAD]], 1
123-
; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr)
113+
; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (dereferenceable load (<2 x s64>))
114+
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[LDRQui]].dsub
115+
; CHECK: [[CPYi64_:%[0-9]+]]:fpr64 = CPYi64 [[LDRQui]], 1
116+
; CHECK: STRDui [[CPYi64_]], [[COPY]], 0 :: (store (s64))
124117
%0:gpr(p0) = COPY $x0
125-
%1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load (<2 x s64>) from %ir.addr)
118+
%1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load (<2 x s64>))
126119
%2:fpr(s64), %3:fpr(s64) = G_UNMERGE_VALUES %1:fpr(<2 x s64>)
127120
%4:gpr(s64) = COPY %2
128121
%5:gpr(s64) = COPY %3
129-
G_STORE %5:gpr(s64), %0 :: (store (s64) into %ir.addr)
122+
G_STORE %5:gpr(s64), %0 :: (store (s64))
123+
...
124+
---
125+
name: contract_s16_truncstore
126+
legalized: true
127+
regBankSelected: true
128+
body: |
129+
bb.0:
130+
liveins: $x0, $s1
131+
; CHECK-LABEL: name: contract_s16_truncstore
132+
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
133+
; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
134+
; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
135+
; CHECK: STRHHui [[COPY2]], [[COPY]], 0 :: (store (s16))
136+
%0:gpr(p0) = COPY $x0
137+
%1:fpr(s32) = COPY $s1
138+
%2:gpr(s32) = COPY %1
139+
G_STORE %2:gpr(s32), %0 :: (store (s16))
140+
...

0 commit comments

Comments
 (0)