Skip to content

Commit f9c9088

Browse files
authored
[RISCV] Split TuneShiftedZExtFusion (#76032)
We split `TuneShiftedZExtFusion` into three fusions to make them reusable and match the GCC implementation[1]. The zexth/zextw fusions can be reused by XiangShan[2] and other commercial processors, but shifted zero extension is not so common. `macro-fusions-veyron-v1.mir` is renamed so it's not relevant to specific processor. References: [1] https://gcc.gnu.org/pipermail/gcc-patches/2023-November/637303.html [2] https://xiangshan-doc.readthedocs.io/zh_CN/latest/frontend/decode
1 parent 90f816e commit f9c9088

File tree

5 files changed

+108
-27
lines changed

5 files changed

+108
-27
lines changed

llvm/lib/Target/RISCV/RISCVFeatures.td

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -977,9 +977,19 @@ def TuneLUIADDIFusion
977977
def TuneAUIPCADDIFusion
978978
: SubtargetFeature<"auipc-addi-fusion", "HasAUIPCADDIFusion",
979979
"true", "Enable AUIPC+ADDI macrofusion">;
980-
def TuneShiftedZExtFusion
981-
: SubtargetFeature<"shifted-zext-fusion", "HasShiftedZExtFusion",
982-
"true", "Enable SLLI+SRLI to be fused when computing (shifted) zero extension">;
980+
981+
def TuneZExtHFusion
982+
: SubtargetFeature<"zexth-fusion", "HasZExtHFusion",
983+
"true", "Enable SLLI+SRLI to be fused to zero extension of halfword">;
984+
985+
def TuneZExtWFusion
986+
: SubtargetFeature<"zextw-fusion", "HasZExtWFusion",
987+
"true", "Enable SLLI+SRLI to be fused to zero extension of word">;
988+
989+
def TuneShiftedZExtWFusion
990+
: SubtargetFeature<"shifted-zextw-fusion", "HasShiftedZExtWFusion",
991+
"true", "Enable SLLI+SRLI to be fused when computing (shifted) zero extension of word">;
992+
983993
def TuneLDADDFusion
984994
: SubtargetFeature<"ld-add-fusion", "HasLDADDFusion",
985995
"true", "Enable LD+ADD macrofusion.">;

llvm/lib/Target/RISCV/RISCVMacroFusion.cpp

Lines changed: 67 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -58,27 +58,74 @@ static bool isLDADD(const MachineInstr *FirstMI, const MachineInstr &SecondMI) {
5858
return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
5959
}
6060

61-
// Fuse these patterns:
62-
//
63-
// slli rd, rs1, 32
64-
// srli rd, rd, x
65-
// where 0 <= x <= 32
66-
//
67-
// and
68-
//
61+
// Fuse zero extension of halfword:
6962
// slli rd, rs1, 48
63+
// srli rd, rd, 48
64+
static bool isZExtH(const MachineInstr *FirstMI, const MachineInstr &SecondMI) {
65+
if (SecondMI.getOpcode() != RISCV::SRLI)
66+
return false;
67+
68+
if (!SecondMI.getOperand(2).isImm())
69+
return false;
70+
71+
if (SecondMI.getOperand(2).getImm() != 48)
72+
return false;
73+
74+
// Given SecondMI, when FirstMI is unspecified, we must return
75+
// if SecondMI may be part of a fused pair at all.
76+
if (!FirstMI)
77+
return true;
78+
79+
if (FirstMI->getOpcode() != RISCV::SLLI)
80+
return false;
81+
82+
if (FirstMI->getOperand(2).getImm() != 48)
83+
return false;
84+
85+
return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
86+
}
87+
88+
// Fuse zero extension of word:
89+
// slli rd, rs1, 32
90+
// srli rd, rd, 32
91+
static bool isZExtW(const MachineInstr *FirstMI, const MachineInstr &SecondMI) {
92+
if (SecondMI.getOpcode() != RISCV::SRLI)
93+
return false;
94+
95+
if (!SecondMI.getOperand(2).isImm())
96+
return false;
97+
98+
if (SecondMI.getOperand(2).getImm() != 32)
99+
return false;
100+
101+
// Given SecondMI, when FirstMI is unspecified, we must return
102+
// if SecondMI may be part of a fused pair at all.
103+
if (!FirstMI)
104+
return true;
105+
106+
if (FirstMI->getOpcode() != RISCV::SLLI)
107+
return false;
108+
109+
if (FirstMI->getOperand(2).getImm() != 32)
110+
return false;
111+
112+
return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
113+
}
114+
115+
// Fuse shifted zero extension of word:
116+
// slli rd, rs1, 32
70117
// srli rd, rd, x
71-
static bool isShiftedZExt(const MachineInstr *FirstMI,
72-
const MachineInstr &SecondMI) {
118+
// where 0 <= x < 32
119+
static bool isShiftedZExtW(const MachineInstr *FirstMI,
120+
const MachineInstr &SecondMI) {
73121
if (SecondMI.getOpcode() != RISCV::SRLI)
74122
return false;
75123

76124
if (!SecondMI.getOperand(2).isImm())
77125
return false;
78126

79127
unsigned SRLIImm = SecondMI.getOperand(2).getImm();
80-
bool IsShiftBy48 = SRLIImm == 48;
81-
if (SRLIImm > 32 && !IsShiftBy48)
128+
if (SRLIImm >= 32)
82129
return false;
83130

84131
// Given SecondMI, when FirstMI is unspecified, we must return
@@ -89,8 +136,7 @@ static bool isShiftedZExt(const MachineInstr *FirstMI,
89136
if (FirstMI->getOpcode() != RISCV::SLLI)
90137
return false;
91138

92-
unsigned SLLIImm = FirstMI->getOperand(2).getImm();
93-
if (IsShiftBy48 ? (SLLIImm != 48) : (SLLIImm != 32))
139+
if (FirstMI->getOperand(2).getImm() != 32)
94140
return false;
95141

96142
return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
@@ -144,7 +190,13 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
144190
if (ST.hasAUIPCADDIFusion() && isAUIPCADDI(FirstMI, SecondMI))
145191
return true;
146192

147-
if (ST.hasShiftedZExtFusion() && isShiftedZExt(FirstMI, SecondMI))
193+
if (ST.hasZExtHFusion() && isZExtH(FirstMI, SecondMI))
194+
return true;
195+
196+
if (ST.hasZExtWFusion() && isZExtW(FirstMI, SecondMI))
197+
return true;
198+
199+
if (ST.hasShiftedZExtWFusion() && isShiftedZExtW(FirstMI, SecondMI))
148200
return true;
149201

150202
if (ST.hasLDADDFusion() && isLDADD(FirstMI, SecondMI))

llvm/lib/Target/RISCV/RISCVProcessors.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,9 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1",
276276
[TuneVentanaVeyron,
277277
TuneLUIADDIFusion,
278278
TuneAUIPCADDIFusion,
279-
TuneShiftedZExtFusion,
279+
TuneZExtHFusion,
280+
TuneZExtWFusion,
281+
TuneShiftedZExtWFusion,
280282
TuneLDADDFusion]>;
281283

282284
def XIANGSHAN_NANHU : RISCVProcessorModel<"xiangshan-nanhu",

llvm/lib/Target/RISCV/RISCVSubtarget.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,8 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
190190
}
191191

192192
bool hasMacroFusion() const {
193-
return hasLUIADDIFusion() || hasAUIPCADDIFusion() ||
194-
hasShiftedZExtFusion() || hasLDADDFusion();
193+
return hasLUIADDIFusion() || hasAUIPCADDIFusion() || hasZExtHFusion() ||
194+
hasZExtWFusion() || hasShiftedZExtWFusion() || hasLDADDFusion();
195195
}
196196

197197
// Vector codegen related methods.

llvm/test/CodeGen/RISCV/macro-fusions-veyron-v1.mir renamed to llvm/test/CodeGen/RISCV/macro-fusions.mir

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# REQUIRES: asserts
2-
# RUN: llc -mtriple=riscv64-linux-gnu -mcpu=veyron-v1 -x=mir < %s \
2+
# RUN: llc -mtriple=riscv64-linux-gnu -x=mir < %s \
33
# RUN: -debug-only=machine-scheduler -start-before=machine-scheduler 2>&1 \
4-
# RUN: -mattr=+lui-addi-fusion,+auipc-addi-fusion,+shifted-zext-fusion,+ld-add-fusion \
4+
# RUN: -mattr=+lui-addi-fusion,+auipc-addi-fusion,+zexth-fusion,+zextw-fusion,+shifted-zextw-fusion,+ld-add-fusion \
55
# RUN: | FileCheck %s
66

77
# CHECK: lui_addi:%bb.0
@@ -38,10 +38,10 @@ body: |
3838
PseudoRET
3939
...
4040

41-
# CHECK: slli_srli
41+
# CHECK: slli_srli_shifted_zext
4242
# CHECK: Macro fuse: {{.*}}SLLI - SRLI
4343
---
44-
name: slli_srli
44+
name: shifted_zext
4545
tracksRegLiveness: true
4646
body: |
4747
bb.0.entry:
@@ -55,10 +55,10 @@ body: |
5555
PseudoRET
5656
...
5757

58-
# CHECK: slli_srli_48
58+
# CHECK: slli_srli_zexth
5959
# CHECK: Macro fuse: {{.*}}SLLI - SRLI
6060
---
61-
name: slli_srli_48
61+
name: zexth
6262
tracksRegLiveness: true
6363
body: |
6464
bb.0.entry:
@@ -72,6 +72,23 @@ body: |
7272
PseudoRET
7373
...
7474

75+
# CHECK: slli_srli_zextw
76+
# CHECK: Macro fuse: {{.*}}SLLI - SRLI
77+
---
78+
name: zextw
79+
tracksRegLiveness: true
80+
body: |
81+
bb.0.entry:
82+
liveins: $x10
83+
%1:gpr = COPY $x10
84+
%2:gpr = SLLI %1, 32
85+
%3:gpr = XORI %1, 3
86+
%4:gpr = SRLI %2, 32
87+
$x10 = COPY %3
88+
$x11 = COPY %4
89+
PseudoRET
90+
...
91+
7592
# CHECK: slli_srli_no_fusion_0
7693
# CHECK-NOT: Macro fuse: {{.*}}SLLI - SRLI
7794
---

0 commit comments

Comments
 (0)