Skip to content

[BOLT] Optimize the codegen of createLoadImmediate for AArch64. #137413

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 83 additions & 8 deletions bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//

#include "AArch64ExpandImm.h"
#include "AArch64InstrInfo.h"
#include "AArch64MCSymbolizer.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
Expand Down Expand Up @@ -2173,14 +2174,88 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {

InstructionListType createLoadImmediate(const MCPhysReg Dest,
uint64_t Imm) const override {
InstructionListType Insts(4);
int Shift = 48;
for (int I = 0; I < 4; I++, Shift -= 16) {
Insts[I].setOpcode(AArch64::MOVKXi);
Insts[I].addOperand(MCOperand::createReg(Dest));
Insts[I].addOperand(MCOperand::createReg(Dest));
Insts[I].addOperand(MCOperand::createImm((Imm >> Shift) & 0xFFFF));
Insts[I].addOperand(MCOperand::createImm(Shift));
const MCRegisterClass RC = RegInfo->getRegClass(Dest);
unsigned BitSize = RC.getSizeInBits();
InstructionListType Insts;
SmallVector<AArch64_IMM::ImmInsnModel, 4> IIMs;
AArch64_IMM::expandMOVImm(Imm, BitSize, IIMs);
assert(IIMs.size() != 0);
for (auto I = IIMs.begin(), E = IIMs.end(); I != E; ++I) {
switch (I->Opcode) {
default:
llvm_unreachable("unhandled!");
break;

case AArch64::ORRWri:
case AArch64::ORRXri:
if (I->Op1 == 0) {
MCInst Inst;
Inst.setOpcode(I->Opcode);
Inst.addOperand(MCOperand::createReg(Dest));
Inst.addOperand(MCOperand::createReg(BitSize == 32 ? AArch64::WZR
: AArch64::XZR));
Inst.addOperand(MCOperand::createImm(I->Op2));
Insts.push_back(Inst);
} else {
MCInst Inst;
Inst.setOpcode(I->Opcode);
Inst.addOperand(MCOperand::createReg(Dest));
Inst.addOperand(MCOperand::createReg(Dest));
Inst.addOperand(MCOperand::createImm(I->Op2));
Insts.push_back(Inst);
}
break;
case AArch64::ORRWrs:
case AArch64::ORRXrs: {
MCInst Inst;
Inst.setOpcode(I->Opcode);
Inst.addOperand(MCOperand::createReg(Dest));
Inst.addOperand(MCOperand::createReg(Dest));
Inst.addOperand(MCOperand::createReg(Dest));
Inst.addOperand(MCOperand::createImm(I->Op2));
Insts.push_back(Inst);
} break;
case AArch64::ANDXri:
case AArch64::EORXri:
if (I->Op1 == 0) {
MCInst Inst;
Inst.setOpcode(I->Opcode);
Inst.addOperand(MCOperand::createReg(Dest));
Inst.addOperand(MCOperand::createReg(BitSize == 32 ? AArch64::WZR
: AArch64::XZR));
Inst.addOperand(MCOperand::createImm(I->Op2));
Insts.push_back(Inst);
} else {
MCInst Inst;
Inst.setOpcode(I->Opcode);
Inst.addOperand(MCOperand::createReg(Dest));
Inst.addOperand(MCOperand::createReg(Dest));
Inst.addOperand(MCOperand::createImm(I->Op2));
Insts.push_back(Inst);
}
break;
case AArch64::MOVNWi:
case AArch64::MOVNXi:
case AArch64::MOVZWi:
case AArch64::MOVZXi: {
MCInst Inst;
Inst.setOpcode(I->Opcode);
Inst.addOperand(MCOperand::createReg(Dest));
Inst.addOperand(MCOperand::createImm(I->Op1));
Inst.addOperand(MCOperand::createImm(I->Op2));
Insts.push_back(Inst);
} break;
case AArch64::MOVKWi:
case AArch64::MOVKXi: {
MCInst Inst;
Inst.setOpcode(I->Opcode);
Inst.addOperand(MCOperand::createReg(Dest));
Inst.addOperand(MCOperand::createReg(Dest));
Inst.addOperand(MCOperand::createImm(I->Op1));
Inst.addOperand(MCOperand::createImm(I->Op2));
Insts.push_back(Inst);
} break;
}
}
return Insts;
}
Expand Down
105 changes: 105 additions & 0 deletions bolt/unittests/Core/MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
//===----------------------------------------------------------------------===//

#ifdef AARCH64_AVAILABLE
#include "AArch64.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#endif // AARCH64_AVAILABLE

Expand Down Expand Up @@ -167,6 +169,109 @@ TEST_P(MCPlusBuilderTester, AArch64_CmpJNE) {
ASSERT_EQ(Label, BB->getLabel());
}

TEST_P(MCPlusBuilderTester, AArch64_LoadZero) {
if (GetParam() != Triple::aarch64)
GTEST_SKIP();
BinaryFunction *BF = BC->createInjectedBinaryFunction("BF", true);
std::unique_ptr<BinaryBasicBlock> BB = BF->createBasicBlock();

InstructionListType Instrs = BC->MIB->createLoadImmediate(AArch64::X0, 0);
BB->addInstructions(Instrs.begin(), Instrs.end());

ASSERT_EQ(BB->size(), 1);
auto II = BB->begin();
// mov x0, #0
ASSERT_EQ(II->getOpcode(), AArch64::MOVZXi);
ASSERT_EQ(II->getOperand(0).getReg(), AArch64::X0);
ASSERT_EQ(II->getOperand(1).getImm(), 0);
ASSERT_EQ(II->getOperand(2).getImm(), 0);
}

TEST_P(MCPlusBuilderTester, AArch64_LoadImm16) {
if (GetParam() != Triple::aarch64)
GTEST_SKIP();
BinaryFunction *BF = BC->createInjectedBinaryFunction("BF", true);
std::unique_ptr<BinaryBasicBlock> BB = BF->createBasicBlock();

InstructionListType Instrs = BC->MIB->createLoadImmediate(AArch64::X0, 2);
BB->addInstructions(Instrs.begin(), Instrs.end());

ASSERT_EQ(BB->size(), 1);
auto II = BB->begin();
// mov x0, #2
ASSERT_EQ(II->getOpcode(), AArch64::MOVZXi);
ASSERT_EQ(II->getOperand(0).getReg(), AArch64::X0);
ASSERT_EQ(II->getOperand(1).getImm(), 2);
ASSERT_EQ(II->getOperand(2).getImm(), 0);
}

TEST_P(MCPlusBuilderTester, AArch64_LoadImm64) {
if (GetParam() != Triple::aarch64)
GTEST_SKIP();
BinaryFunction *BF = BC->createInjectedBinaryFunction("BF", true);
std::unique_ptr<BinaryBasicBlock> BB = BF->createBasicBlock();

int64_t Imm = ((uint64_t)4) << 48 | ((uint64_t)3) << 32 | 2 << 16 | 1;
InstructionListType Instrs = BC->MIB->createLoadImmediate(AArch64::X0, Imm);
BB->addInstructions(Instrs.begin(), Instrs.end());

ASSERT_EQ(BB->size(), 4);
auto II = BB->begin();
// mov x0, #1
ASSERT_EQ(II->getOpcode(), AArch64::MOVZXi);
ASSERT_EQ(II->getOperand(0).getReg(), AArch64::X0);
ASSERT_EQ(II->getOperand(1).getImm(), 1);
ASSERT_EQ(II->getOperand(2).getImm(), 0);
II++;
// movk x0, #2, lsl #16
ASSERT_EQ(II->getOpcode(), AArch64::MOVKXi);
ASSERT_EQ(II->getOperand(0).getReg(), AArch64::X0);
ASSERT_EQ(II->getOperand(1).getReg(), AArch64::X0);
ASSERT_EQ(II->getOperand(2).getImm(), 2);
ASSERT_EQ(II->getOperand(3).getImm(), 16);
II++;
// movk x0, #3, lsl #32
ASSERT_EQ(II->getOpcode(), AArch64::MOVKXi);
ASSERT_EQ(II->getOperand(0).getReg(), AArch64::X0);
ASSERT_EQ(II->getOperand(1).getReg(), AArch64::X0);
ASSERT_EQ(II->getOperand(2).getImm(), 3);
ASSERT_EQ(II->getOperand(3).getImm(), 32);
II++;
// movk x0, #4, lsl #48
ASSERT_EQ(II->getOpcode(), AArch64::MOVKXi);
ASSERT_EQ(II->getOperand(0).getReg(), AArch64::X0);
ASSERT_EQ(II->getOperand(1).getReg(), AArch64::X0);
ASSERT_EQ(II->getOperand(2).getImm(), 4);
ASSERT_EQ(II->getOperand(3).getImm(), 48);
}

TEST_P(MCPlusBuilderTester, AArch64_LoadImm64Partial) {
if (GetParam() != Triple::aarch64)
GTEST_SKIP();
BinaryFunction *BF = BC->createInjectedBinaryFunction("BF", true);
std::unique_ptr<BinaryBasicBlock> BB = BF->createBasicBlock();

int64_t Imm = ((uint64_t)4) << 48 | 2 << 16;
InstructionListType Instrs = BC->MIB->createLoadImmediate(AArch64::X0, Imm);
BB->addInstructions(Instrs.begin(), Instrs.end());

ASSERT_EQ(BB->size(), 2);
auto II = BB->begin();
// orr x0, xzr, #0x20000
ASSERT_EQ(II->getOpcode(), AArch64::ORRXri);
ASSERT_EQ(II->getOperand(0).getReg(), AArch64::X0);
ASSERT_EQ(II->getOperand(1).getReg(), AArch64::XZR);
ASSERT_EQ(II->getOperand(2).getImm(),
AArch64_AM::encodeLogicalImmediate(2 << 16, 64));
II++;
// orr x0, x0, #0x4000000000000
ASSERT_EQ(II->getOpcode(), AArch64::ORRXri);
ASSERT_EQ(II->getOperand(0).getReg(), AArch64::X0);
ASSERT_EQ(II->getOperand(1).getReg(), AArch64::X0);
ASSERT_EQ(II->getOperand(2).getImm(),
AArch64_AM::encodeLogicalImmediate(((uint64_t)4) << 48, 64));
}

TEST_P(MCPlusBuilderTester, testAccessedRegsImplicitDef) {
if (GetParam() != Triple::aarch64)
GTEST_SKIP();
Expand Down
Loading