Skip to content

Commit eaf87d3

Browse files
committed
[LoongArch] Optimize for immediate value materialization using BSTRINS_D instruction
Reviewed By: heiher, SixWeining Pull Request: #106332
1 parent 5b77e25 commit eaf87d3

File tree

11 files changed

+161
-69
lines changed

11 files changed

+161
-69
lines changed

llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1291,14 +1291,32 @@ void LoongArchAsmParser::emitLoadImm(MCInst &Inst, SMLoc IDLoc,
12911291
Imm = SignExtend64<32>(Imm);
12921292

12931293
for (LoongArchMatInt::Inst &Inst : LoongArchMatInt::generateInstSeq(Imm)) {
1294-
unsigned Opc = Inst.Opc;
1295-
if (Opc == LoongArch::LU12I_W)
1296-
Out.emitInstruction(MCInstBuilder(Opc).addReg(DestReg).addImm(Inst.Imm),
1297-
getSTI());
1298-
else
1294+
switch (Inst.Opc) {
1295+
case LoongArch::LU12I_W:
12991296
Out.emitInstruction(
1300-
MCInstBuilder(Opc).addReg(DestReg).addReg(SrcReg).addImm(Inst.Imm),
1297+
MCInstBuilder(Inst.Opc).addReg(DestReg).addImm(Inst.Imm), getSTI());
1298+
break;
1299+
case LoongArch::ADDI_W:
1300+
case LoongArch::ORI:
1301+
case LoongArch::LU32I_D:
1302+
case LoongArch::LU52I_D:
1303+
Out.emitInstruction(
1304+
MCInstBuilder(Inst.Opc).addReg(DestReg).addReg(SrcReg).addImm(
1305+
Inst.Imm),
13011306
getSTI());
1307+
break;
1308+
case LoongArch::BSTRINS_D:
1309+
Out.emitInstruction(MCInstBuilder(Inst.Opc)
1310+
.addReg(DestReg)
1311+
.addReg(SrcReg)
1312+
.addReg(SrcReg)
1313+
.addImm(Inst.Imm >> 32)
1314+
.addImm(Inst.Imm & 0xFF),
1315+
getSTI());
1316+
break;
1317+
default:
1318+
llvm_unreachable("unexpected opcode generated by LoongArchMatInt");
1319+
}
13021320
SrcReg = DestReg;
13031321
}
13041322
}

llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,26 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
6262
// The instructions in the sequence are handled here.
6363
for (LoongArchMatInt::Inst &Inst : LoongArchMatInt::generateInstSeq(Imm)) {
6464
SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, GRLenVT);
65-
if (Inst.Opc == LoongArch::LU12I_W)
66-
Result = CurDAG->getMachineNode(LoongArch::LU12I_W, DL, GRLenVT, SDImm);
67-
else
65+
switch (Inst.Opc) {
66+
case LoongArch::LU12I_W:
67+
Result = CurDAG->getMachineNode(Inst.Opc, DL, GRLenVT, SDImm);
68+
break;
69+
case LoongArch::ADDI_W:
70+
case LoongArch::ORI:
71+
case LoongArch::LU32I_D:
72+
case LoongArch::LU52I_D:
6873
Result = CurDAG->getMachineNode(Inst.Opc, DL, GRLenVT, SrcReg, SDImm);
74+
break;
75+
case LoongArch::BSTRINS_D:
76+
Result = CurDAG->getMachineNode(
77+
Inst.Opc, DL, GRLenVT,
78+
{SrcReg, SrcReg,
79+
CurDAG->getTargetConstant(Inst.Imm >> 32, DL, GRLenVT),
80+
CurDAG->getTargetConstant(Inst.Imm & 0xFF, DL, GRLenVT)});
81+
break;
82+
default:
83+
llvm_unreachable("unexpected opcode generated by LoongArchMatInt");
84+
}
6985
SrcReg = SDValue(Result, 0);
7086
}
7187

llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,14 @@ void LoongArchInstrInfo::movImm(MachineBasicBlock &MBB,
210210
.addImm(Inst.Imm)
211211
.setMIFlag(Flag);
212212
break;
213+
case LoongArch::BSTRINS_D:
214+
BuildMI(MBB, MBBI, DL, get(Inst.Opc), DstReg)
215+
.addReg(SrcReg, RegState::Kill)
216+
.addReg(SrcReg, RegState::Kill)
217+
.addImm(Inst.Imm >> 32)
218+
.addImm(Inst.Imm & 0xFF)
219+
.setMIFlag(Flag);
220+
break;
213221
default:
214222
assert(false && "Unknown insn emitted by LoongArchMatInt");
215223
}

llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,13 @@ LoongArchMatInt::InstSeq LoongArchMatInt::generateInstSeq(int64_t Val) {
2626
const int64_t Lo12 = Val & 0xFFF;
2727
InstSeq Insts;
2828

29+
// LU52I_D used for: Bits[63:52] | Bits[51:0].
2930
if (Highest12 != 0 && SignExtend64<52>(Val) == 0) {
3031
Insts.push_back(Inst(LoongArch::LU52I_D, SignExtend64<12>(Highest12)));
3132
return Insts;
3233
}
3334

35+
// lo32
3436
if (Hi20 == 0)
3537
Insts.push_back(Inst(LoongArch::ORI, Lo12));
3638
else if (SignExtend32<1>(Lo12 >> 11) == SignExtend32<20>(Hi20))
@@ -41,11 +43,82 @@ LoongArchMatInt::InstSeq LoongArchMatInt::generateInstSeq(int64_t Val) {
4143
Insts.push_back(Inst(LoongArch::ORI, Lo12));
4244
}
4345

46+
// hi32
47+
// Higher20
4448
if (SignExtend32<1>(Hi20 >> 19) != SignExtend32<20>(Higher20))
4549
Insts.push_back(Inst(LoongArch::LU32I_D, SignExtend64<20>(Higher20)));
4650

51+
// Highest12
4752
if (SignExtend32<1>(Higher20 >> 19) != SignExtend32<12>(Highest12))
4853
Insts.push_back(Inst(LoongArch::LU52I_D, SignExtend64<12>(Highest12)));
4954

55+
size_t N = Insts.size();
56+
if (N < 3)
57+
return Insts;
58+
59+
// When the number of instruction sequences is greater than 2, we have the
60+
// opportunity to optimize using the BSTRINS_D instruction. The scenario is as
61+
// follows:
62+
//
63+
// N of Insts = 3
64+
// 1. ORI + LU32I_D + LU52I_D => ORI + BSTRINS_D, TmpVal = ORI
65+
// 2. ADDI_W + LU32I_D + LU52I_D => ADDI_W + BSTRINS_D, TmpVal = ADDI_W
66+
// 3. LU12I_W + ORI + LU32I_D => ORI + BSTRINS_D, TmpVal = ORI
67+
// 4. LU12I_W + LU32I_D + LU52I_D => LU12I_W + BSTRINS_D, TmpVal = LU12I_W
68+
//
69+
// N of Insts = 4
70+
// 5. LU12I_W + ORI + LU32I_D + LU52I_D => LU12I_W + ORI + BSTRINS_D
71+
// => ORI + LU52I_D + BSTRINS_D
72+
// TmpVal = (LU12I_W | ORI) or (ORI | LU52I_D)
73+
// The BSTRINS_D instruction will use the `TmpVal` to construct the `Val`.
74+
uint64_t TmpVal1 = 0;
75+
uint64_t TmpVal2 = 0;
76+
switch (Insts[0].Opc) {
77+
default:
78+
llvm_unreachable("unexpected opcode");
79+
break;
80+
case LoongArch::LU12I_W:
81+
if (Insts[1].Opc == LoongArch::ORI) {
82+
TmpVal1 = Insts[1].Imm;
83+
if (N == 3)
84+
break;
85+
TmpVal2 = Insts[3].Imm << 52 | TmpVal1;
86+
}
87+
TmpVal1 |= Insts[0].Imm << 12;
88+
break;
89+
case LoongArch::ORI:
90+
case LoongArch::ADDI_W:
91+
TmpVal1 = Insts[0].Imm;
92+
break;
93+
}
94+
95+
for (uint64_t Msb = 32; Msb < 64; ++Msb) {
96+
uint64_t HighMask = ~((1ULL << (Msb + 1)) - 1);
97+
for (uint64_t Lsb = Msb; Lsb > 0; --Lsb) {
98+
uint64_t LowMask = (1ULL << Lsb) - 1;
99+
uint64_t Mask = HighMask | LowMask;
100+
uint64_t LsbToZero = TmpVal1 & ((1UL << (Msb - Lsb + 1)) - 1);
101+
uint64_t MsbToLsb = LsbToZero << Lsb;
102+
if ((MsbToLsb | (TmpVal1 & Mask)) == (uint64_t)Val) {
103+
if (Insts[1].Opc == LoongArch::ORI && N == 3)
104+
Insts[0] = Insts[1];
105+
Insts.pop_back_n(2);
106+
Insts.push_back(Inst(LoongArch::BSTRINS_D, Msb << 32 | Lsb));
107+
return Insts;
108+
}
109+
if (TmpVal2 != 0) {
110+
LsbToZero = TmpVal2 & ((1UL << (Msb - Lsb + 1)) - 1);
111+
MsbToLsb = LsbToZero << Lsb;
112+
if ((MsbToLsb | (TmpVal2 & Mask)) == (uint64_t)Val) {
113+
Insts[0] = Insts[1];
114+
Insts[1] = Insts[3];
115+
Insts.pop_back_n(2);
116+
Insts.push_back(Inst(LoongArch::BSTRINS_D, Msb << 32 | Lsb));
117+
return Insts;
118+
}
119+
}
120+
}
121+
}
122+
50123
return Insts;
51124
}

llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ namespace llvm {
1616
namespace LoongArchMatInt {
1717
struct Inst {
1818
unsigned Opc;
19+
// Imm: Opc's imm operand, if Opc == BSTRINS_D, Imm = MSB << 32 | LSB.
1920
int64_t Imm;
2021
Inst(unsigned Opc, int64_t Imm) : Opc(Opc), Imm(Imm) {}
2122
};

llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -338,14 +338,12 @@ define i64 @test_ctpop_i64(i64 %a) nounwind {
338338
; LA64-NEXT: srli.d $a1, $a0, 1
339339
; LA64-NEXT: lu12i.w $a2, 349525
340340
; LA64-NEXT: ori $a2, $a2, 1365
341-
; LA64-NEXT: lu32i.d $a2, 349525
342-
; LA64-NEXT: lu52i.d $a2, $a2, 1365
341+
; LA64-NEXT: bstrins.d $a2, $a2, 62, 32
343342
; LA64-NEXT: and $a1, $a1, $a2
344343
; LA64-NEXT: sub.d $a0, $a0, $a1
345344
; LA64-NEXT: lu12i.w $a1, 209715
346345
; LA64-NEXT: ori $a1, $a1, 819
347-
; LA64-NEXT: lu32i.d $a1, 209715
348-
; LA64-NEXT: lu52i.d $a1, $a1, 819
346+
; LA64-NEXT: bstrins.d $a1, $a1, 61, 32
349347
; LA64-NEXT: and $a2, $a0, $a1
350348
; LA64-NEXT: srli.d $a0, $a0, 2
351349
; LA64-NEXT: and $a0, $a0, $a1
@@ -354,13 +352,11 @@ define i64 @test_ctpop_i64(i64 %a) nounwind {
354352
; LA64-NEXT: add.d $a0, $a0, $a1
355353
; LA64-NEXT: lu12i.w $a1, 61680
356354
; LA64-NEXT: ori $a1, $a1, 3855
357-
; LA64-NEXT: lu32i.d $a1, -61681
358-
; LA64-NEXT: lu52i.d $a1, $a1, 240
355+
; LA64-NEXT: bstrins.d $a1, $a1, 59, 32
359356
; LA64-NEXT: and $a0, $a0, $a1
360357
; LA64-NEXT: lu12i.w $a1, 4112
361358
; LA64-NEXT: ori $a1, $a1, 257
362-
; LA64-NEXT: lu32i.d $a1, 65793
363-
; LA64-NEXT: lu52i.d $a1, $a1, 16
359+
; LA64-NEXT: bstrins.d $a1, $a1, 56, 32
364360
; LA64-NEXT: mul.d $a0, $a0, $a1
365361
; LA64-NEXT: srli.d $a0, $a0, 56
366362
; LA64-NEXT: ret

llvm/test/CodeGen/LoongArch/imm.ll

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,7 @@ define i64 @imm0008000000000fff() {
4747
; CHECK-LABEL: imm0008000000000fff:
4848
; CHECK: # %bb.0:
4949
; CHECK-NEXT: ori $a0, $zero, 4095
50-
; CHECK-NEXT: lu32i.d $a0, -524288
51-
; CHECK-NEXT: lu52i.d $a0, $a0, 0
50+
; CHECK-NEXT: bstrins.d $a0, $a0, 51, 51
5251
; CHECK-NEXT: ret
5352
ret i64 2251799813689343
5453
}
@@ -168,9 +167,8 @@ define i64 @imm0008000080000800() {
168167
define i64 @imm14000000a() {
169168
; CHECK-LABEL: imm14000000a:
170169
; CHECK: # %bb.0:
171-
; CHECK-NEXT: lu12i.w $a0, 262144
172-
; CHECK-NEXT: ori $a0, $a0, 10
173-
; CHECK-NEXT: lu32i.d $a0, 1
170+
; CHECK-NEXT: ori $a0, $zero, 10
171+
; CHECK-NEXT: bstrins.d $a0, $a0, 32, 29
174172
; CHECK-NEXT: ret
175173
ret i64 5368709130
176174
}
@@ -179,8 +177,7 @@ define i64 @imm0fff000000000fff() {
179177
; CHECK-LABEL: imm0fff000000000fff:
180178
; CHECK: # %bb.0:
181179
; CHECK-NEXT: ori $a0, $zero, 4095
182-
; CHECK-NEXT: lu32i.d $a0, -65536
183-
; CHECK-NEXT: lu52i.d $a0, $a0, 255
180+
; CHECK-NEXT: bstrins.d $a0, $a0, 59, 48
184181
; CHECK-NEXT: ret
185182
ret i64 1152640029630140415
186183
}
@@ -189,8 +186,7 @@ define i64 @immffecffffffffffec() {
189186
; CHECK-LABEL: immffecffffffffffec:
190187
; CHECK: # %bb.0:
191188
; CHECK-NEXT: addi.w $a0, $zero, -20
192-
; CHECK-NEXT: lu32i.d $a0, -196609
193-
; CHECK-NEXT: lu52i.d $a0, $a0, -2
189+
; CHECK-NEXT: bstrins.d $a0, $a0, 52, 48
194190
; CHECK-NEXT: ret
195191
ret i64 -5348024557502484
196192
}
@@ -199,8 +195,7 @@ define i64 @imm1c000000700000() {
199195
; CHECK-LABEL: imm1c000000700000:
200196
; CHECK: # %bb.0:
201197
; CHECK-NEXT: lu12i.w $a0, 1792
202-
; CHECK-NEXT: lu32i.d $a0, -262144
203-
; CHECK-NEXT: lu52i.d $a0, $a0, 1
198+
; CHECK-NEXT: bstrins.d $a0, $a0, 52, 30
204199
; CHECK-NEXT: ret
205200
ret i64 7881299355238400
206201
}
@@ -210,19 +205,17 @@ define i64 @immf0f0f0f0f0f0f0f0() {
210205
; CHECK: # %bb.0:
211206
; CHECK-NEXT: lu12i.w $a0, -61681
212207
; CHECK-NEXT: ori $a0, $a0, 240
213-
; CHECK-NEXT: lu32i.d $a0, 61680
214-
; CHECK-NEXT: lu52i.d $a0, $a0, -241
208+
; CHECK-NEXT: bstrins.d $a0, $a0, 59, 32
215209
; CHECK-NEXT: ret
216210
ret i64 -1085102592571150096
217211
}
218212

219213
define i64 @imm110000014000000a() {
220214
; CHECK-LABEL: imm110000014000000a:
221215
; CHECK: # %bb.0:
222-
; CHECK-NEXT: lu12i.w $a0, 262144
223-
; CHECK-NEXT: ori $a0, $a0, 10
224-
; CHECK-NEXT: lu32i.d $a0, 1
216+
; CHECK-NEXT: ori $a0, $zero, 10
225217
; CHECK-NEXT: lu52i.d $a0, $a0, 272
218+
; CHECK-NEXT: bstrins.d $a0, $a0, 32, 29
226219
; CHECK-NEXT: ret
227220
ret i64 1224979104013484042
228221
}

llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -973,9 +973,8 @@ define i64 @ld_sd_constant(i64 %a) nounwind {
973973
; LA64NOPIC-LABEL: ld_sd_constant:
974974
; LA64NOPIC: # %bb.0:
975975
; LA64NOPIC-NEXT: lu12i.w $a1, -136485
976-
; LA64NOPIC-NEXT: ori $a1, $a1, 3823
977-
; LA64NOPIC-NEXT: lu32i.d $a1, -147729
978-
; LA64NOPIC-NEXT: lu52i.d $a2, $a1, -534
976+
; LA64NOPIC-NEXT: ori $a2, $a1, 3823
977+
; LA64NOPIC-NEXT: bstrins.d $a2, $a2, 61, 32
979978
; LA64NOPIC-NEXT: ld.d $a1, $a2, 0
980979
; LA64NOPIC-NEXT: st.d $a0, $a2, 0
981980
; LA64NOPIC-NEXT: move $a0, $a1
@@ -984,9 +983,8 @@ define i64 @ld_sd_constant(i64 %a) nounwind {
984983
; LA64PIC-LABEL: ld_sd_constant:
985984
; LA64PIC: # %bb.0:
986985
; LA64PIC-NEXT: lu12i.w $a1, -136485
987-
; LA64PIC-NEXT: ori $a1, $a1, 3823
988-
; LA64PIC-NEXT: lu32i.d $a1, -147729
989-
; LA64PIC-NEXT: lu52i.d $a2, $a1, -534
986+
; LA64PIC-NEXT: ori $a2, $a1, 3823
987+
; LA64PIC-NEXT: bstrins.d $a2, $a2, 61, 32
990988
; LA64PIC-NEXT: ld.d $a1, $a2, 0
991989
; LA64PIC-NEXT: st.d $a0, $a2, 0
992990
; LA64PIC-NEXT: move $a0, $a1

llvm/test/CodeGen/LoongArch/merge-base-offset.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1128,8 +1128,7 @@ define dso_local ptr @load_addr_offset_614750729487779976() nounwind {
11281128
; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(g_a64)
11291129
; LA64-NEXT: lu12i.w $a1, 279556
11301130
; LA64-NEXT: ori $a1, $a1, 1088
1131-
; LA64-NEXT: lu32i.d $a1, 17472
1132-
; LA64-NEXT: lu52i.d $a1, $a1, 1092
1131+
; LA64-NEXT: bstrins.d $a1, $a1, 62, 32
11331132
; LA64-NEXT: add.d $a0, $a0, $a1
11341133
; LA64-NEXT: ret
11351134
;
@@ -1142,8 +1141,7 @@ define dso_local ptr @load_addr_offset_614750729487779976() nounwind {
11421141
; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
11431142
; LA64-LARGE-NEXT: lu12i.w $a1, 279556
11441143
; LA64-LARGE-NEXT: ori $a1, $a1, 1088
1145-
; LA64-LARGE-NEXT: lu32i.d $a1, 17472
1146-
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, 1092
1144+
; LA64-LARGE-NEXT: bstrins.d $a1, $a1, 62, 32
11471145
; LA64-LARGE-NEXT: add.d $a0, $a0, $a1
11481146
; LA64-LARGE-NEXT: ret
11491147
entry:

0 commit comments

Comments
 (0)