Skip to content

Commit fc6e97c

Browse files
authored
[MC][X86] Avoid copying MCInst in emitInstrEnd (#94947)
Copying an MCInst isn't cheap (copies all operands) and the whole instruction is only used for the Intel erratum mitigation, which is off by default. In all other cases, the opcode alone suffices. This slightly pessimizes code that uses moves to segment registers -- but that's uncommon and not performance-sensitive anyway. As a related change, also call canPadInst() only when the result is actually used, which is typically only the case in emitInstrEnd. This gives a minor performance improvement.
1 parent 214ff50 commit fc6e97c

File tree

1 file changed

+34
-19
lines changed

1 file changed

+34
-19
lines changed

llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp

+34-19
Original file line numberDiff line numberDiff line change
@@ -125,9 +125,10 @@ class X86AsmBackend : public MCAsmBackend {
125125
unsigned TargetPrefixMax = 0;
126126

127127
MCInst PrevInst;
128+
unsigned PrevInstOpcode = 0;
128129
MCBoundaryAlignFragment *PendingBA = nullptr;
129130
std::pair<MCFragment *, size_t> PrevInstPosition;
130-
bool CanPadInst = false;
131+
bool IsRightAfterData = false;
131132

132133
uint8_t determinePaddingPrefix(const MCInst &Inst) const;
133134
bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
@@ -267,8 +268,8 @@ static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
267268
}
268269

269270
/// Check if the instruction is a prefix.
270-
static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) {
271-
return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags);
271+
static bool isPrefix(unsigned Opcode, const MCInstrInfo &MCII) {
272+
return X86II::isPrefix(MCII.get(Opcode).TSFlags);
272273
}
273274

274275
/// Check if the instruction is valid as the first instruction in macro fusion.
@@ -382,9 +383,9 @@ bool X86AsmBackend::allowEnhancedRelaxation() const {
382383

383384
/// X86 has certain instructions which enable interrupts exactly one
384385
/// instruction *after* the instruction which stores to SS. Return true if the
385-
/// given instruction has such an interrupt delay slot.
386-
static bool hasInterruptDelaySlot(const MCInst &Inst) {
387-
switch (Inst.getOpcode()) {
386+
/// given instruction may have such an interrupt delay slot.
387+
static bool mayHaveInterruptDelaySlot(unsigned InstOpcode) {
388+
switch (InstOpcode) {
388389
case X86::POPSS16:
389390
case X86::POPSS32:
390391
case X86::STI:
@@ -394,9 +395,9 @@ static bool hasInterruptDelaySlot(const MCInst &Inst) {
394395
case X86::MOV32sr:
395396
case X86::MOV64sr:
396397
case X86::MOV16sm:
397-
if (Inst.getOperand(0).getReg() == X86::SS)
398-
return true;
399-
break;
398+
// In fact, this is only the case if the first operand is SS. However, as
399+
// segment moves occur extremely rarely, this is just a minor pessimization.
400+
return true;
400401
}
401402
return false;
402403
}
@@ -450,22 +451,22 @@ bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
450451
// TLSCALL).
451452
return false;
452453

453-
if (hasInterruptDelaySlot(PrevInst))
454+
if (mayHaveInterruptDelaySlot(PrevInstOpcode))
454455
// If this instruction follows an interrupt enabling instruction with a one
455456
// instruction delay, inserting a nop would change behavior.
456457
return false;
457458

458-
if (isPrefix(PrevInst, *MCII))
459+
if (isPrefix(PrevInstOpcode, *MCII))
459460
// If this instruction follows a prefix, inserting a nop/prefix would change
460461
// semantic.
461462
return false;
462463

463-
if (isPrefix(Inst, *MCII))
464+
if (isPrefix(Inst.getOpcode(), *MCII))
464465
// If this instruction is a prefix, inserting a prefix would change
465466
// semantic.
466467
return false;
467468

468-
if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition))
469+
if (IsRightAfterData)
469470
// If this instruction follows any data, there is no clear
470471
// instruction boundary, inserting a nop/prefix would change semantic.
471472
return false;
@@ -509,16 +510,24 @@ bool X86AsmBackend::needAlign(const MCInst &Inst) const {
509510
/// Insert BoundaryAlignFragment before instructions to align branches.
510511
void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
511512
const MCInst &Inst, const MCSubtargetInfo &STI) {
512-
CanPadInst = canPadInst(Inst, OS);
513+
// Used by canPadInst. Done here, because in emitInstructionEnd, the current
514+
// fragment will have changed.
515+
IsRightAfterData =
516+
isRightAfterData(OS.getCurrentFragment(), PrevInstPosition);
513517

514518
if (!canPadBranches(OS))
515519
return;
516520

521+
// NB: PrevInst only valid if canPadBranches is true.
517522
if (!isMacroFused(PrevInst, Inst))
518523
// Macro fusion doesn't happen indeed, clear the pending.
519524
PendingBA = nullptr;
520525

521-
if (!CanPadInst)
526+
// When branch padding is enabled (basically the skx102 erratum => unlikely),
527+
// we call canPadInst (not cheap) twice. However, in the common case, we can
528+
// avoid unnecessary calls to that, as this is otherwise only used for
529+
// relaxable fragments.
530+
if (!canPadInst(Inst, OS))
522531
return;
523532

524533
if (PendingBA && PendingBA->getNextNode() == OS.getCurrentFragment()) {
@@ -552,16 +561,22 @@ void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
552561
}
553562

554563
/// Set the last fragment to be aligned for the BoundaryAlignFragment.
555-
void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) {
556-
PrevInst = Inst;
564+
void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS,
565+
const MCInst &Inst) {
557566
MCFragment *CF = OS.getCurrentFragment();
558-
PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
559567
if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))
560-
F->setAllowAutoPadding(CanPadInst);
568+
F->setAllowAutoPadding(canPadInst(Inst, OS));
569+
570+
// Update PrevInstOpcode here, canPadInst() reads that.
571+
PrevInstOpcode = Inst.getOpcode();
572+
PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
561573

562574
if (!canPadBranches(OS))
563575
return;
564576

577+
// PrevInst is only needed if canPadBranches. Copying an MCInst isn't cheap.
578+
PrevInst = Inst;
579+
565580
if (!needAlign(Inst) || !PendingBA)
566581
return;
567582

0 commit comments

Comments
 (0)