@@ -223,8 +223,8 @@ namespace {
223
223
void HoistPostRA (MachineInstr *MI, unsigned Def, MachineLoop *CurLoop,
224
224
MachineBasicBlock *CurPreheader);
225
225
226
- void ProcessMI (MachineInstr *MI, BitVector &PhysRegDefs ,
227
- BitVector &PhysRegClobbers, SmallSet<int , 32 > &StoredFIs,
226
+ void ProcessMI (MachineInstr *MI, BitVector &RUDefs, BitVector &RUClobbers ,
227
+ SmallSet<int , 32 > &StoredFIs,
228
228
SmallVectorImpl<CandidateInfo> &Candidates,
229
229
MachineLoop *CurLoop);
230
230
@@ -423,10 +423,47 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
423
423
return false ;
424
424
}
425
425
426
+ static void applyBitsNotInRegMaskToRegUnitsMask (const TargetRegisterInfo &TRI,
427
+ BitVector &RUs,
428
+ const uint32_t *Mask) {
429
+ // Iterate over the RegMask raw to avoid constructing a BitVector, which is
430
+ // expensive as it implies dynamically allocating memory.
431
+ //
432
+ // We also work backwards.
433
+ const unsigned NumRegs = TRI.getNumRegs ();
434
+ const unsigned MaskWords = (NumRegs + 31 ) / 32 ;
435
+ for (unsigned K = 0 ; K < MaskWords; ++K) {
436
+ // We want to set the bits that aren't in RegMask, so flip it.
437
+ uint32_t Word = ~Mask[K];
438
+
439
+ // Iterate all set bits, starting from the right.
440
+ while (Word) {
441
+ const unsigned SetBitIdx = countr_zero (Word);
442
+
443
+ // The bits are numbered from the LSB in each word.
444
+ const unsigned PhysReg = (K * 32 ) + SetBitIdx;
445
+
446
+ // Clear the bit at SetBitIdx. Doing it this way appears to generate less
447
+ // instructions on x86. This works because negating a number will flip all
448
+ // the bits after SetBitIdx. So (Word & -Word) == (1 << SetBitIdx), but
449
+ // faster.
450
+ Word ^= Word & -Word;
451
+
452
+ if (PhysReg == NumRegs)
453
+ return ;
454
+
455
+ if (PhysReg) {
456
+ for (MCRegUnitIterator RUI (PhysReg, &TRI); RUI.isValid (); ++RUI)
457
+ RUs.set (*RUI);
458
+ }
459
+ }
460
+ }
461
+ }
462
+
426
463
// / Examine the instruction for potentai LICM candidate. Also
427
464
// / gather register def and frame object update information.
428
- void MachineLICMBase::ProcessMI (MachineInstr *MI, BitVector &PhysRegDefs ,
429
- BitVector &PhysRegClobbers ,
465
+ void MachineLICMBase::ProcessMI (MachineInstr *MI, BitVector &RUDefs ,
466
+ BitVector &RUClobbers ,
430
467
SmallSet<int , 32 > &StoredFIs,
431
468
SmallVectorImpl<CandidateInfo> &Candidates,
432
469
MachineLoop *CurLoop) {
@@ -448,7 +485,7 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
448
485
// We can't hoist an instruction defining a physreg that is clobbered in
449
486
// the loop.
450
487
if (MO.isRegMask ()) {
451
- PhysRegClobbers. setBitsNotInMask ( MO.getRegMask ());
488
+ applyBitsNotInRegMaskToRegUnitsMask (*TRI, RUClobbers, MO.getRegMask ());
452
489
continue ;
453
490
}
454
491
@@ -460,16 +497,22 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
460
497
assert (Reg.isPhysical () && " Not expecting virtual register!" );
461
498
462
499
if (!MO.isDef ()) {
463
- if (Reg && (PhysRegDefs.test (Reg) || PhysRegClobbers.test (Reg)))
464
- // If it's using a non-loop-invariant register, then it's obviously not
465
- // safe to hoist.
466
- HasNonInvariantUse = true ;
500
+ if (!HasNonInvariantUse) {
501
+ for (MCRegUnitIterator RUI (Reg, TRI); RUI.isValid (); ++RUI) {
502
+ // If it's using a non-loop-invariant register, then it's obviously
503
+ // not safe to hoist.
504
+ if (RUDefs.test (*RUI) || RUClobbers.test (*RUI)) {
505
+ HasNonInvariantUse = true ;
506
+ break ;
507
+ }
508
+ }
509
+ }
467
510
continue ;
468
511
}
469
512
470
513
if (MO.isImplicit ()) {
471
- for (MCRegAliasIterator AI (Reg, TRI, true ); AI .isValid (); ++AI )
472
- PhysRegClobbers .set (*AI );
514
+ for (MCRegUnitIterator RUI (Reg, TRI); RUI .isValid (); ++RUI )
515
+ RUClobbers .set (*RUI );
473
516
if (!MO.isDead ())
474
517
// Non-dead implicit def? This cannot be hoisted.
475
518
RuledOut = true ;
@@ -488,19 +531,18 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
488
531
// If we have already seen another instruction that defines the same
489
532
// register, then this is not safe. Two defs is indicated by setting a
490
533
// PhysRegClobbers bit.
491
- for (MCRegAliasIterator AS (Reg, TRI, true ); AS.isValid (); ++AS) {
492
- if (PhysRegDefs.test (*AS))
493
- PhysRegClobbers.set (*AS);
534
+ for (MCRegUnitIterator RUI (Reg, TRI); RUI.isValid (); ++RUI) {
535
+ if (RUDefs.test (*RUI)) {
536
+ RUClobbers.set (*RUI);
537
+ RuledOut = true ;
538
+ } else if (RUClobbers.test (*RUI)) {
539
+ // MI defined register is seen defined by another instruction in
540
+ // the loop, it cannot be a LICM candidate.
541
+ RuledOut = true ;
542
+ }
543
+
544
+ RUDefs.set (*RUI);
494
545
}
495
- // Need a second loop because MCRegAliasIterator can visit the same
496
- // register twice.
497
- for (MCRegAliasIterator AS (Reg, TRI, true ); AS.isValid (); ++AS)
498
- PhysRegDefs.set (*AS);
499
-
500
- if (PhysRegClobbers.test (Reg))
501
- // MI defined register is seen defined by another instruction in
502
- // the loop, it cannot be a LICM candidate.
503
- RuledOut = true ;
504
546
}
505
547
506
548
// Only consider reloads for now and remats which do not have register
@@ -521,9 +563,9 @@ void MachineLICMBase::HoistRegionPostRA(MachineLoop *CurLoop,
521
563
if (!Preheader)
522
564
return ;
523
565
524
- unsigned NumRegs = TRI->getNumRegs ();
525
- BitVector PhysRegDefs (NumRegs ); // Regs defined once in the loop.
526
- BitVector PhysRegClobbers (NumRegs ); // Regs defined more than once.
566
+ unsigned NumRegUnits = TRI->getNumRegUnits ();
567
+ BitVector RUDefs (NumRegUnits ); // RUs defined once in the loop.
568
+ BitVector RUClobbers (NumRegUnits ); // RUs defined more than once.
527
569
528
570
SmallVector<CandidateInfo, 32 > Candidates;
529
571
SmallSet<int , 32 > StoredFIs;
@@ -540,22 +582,21 @@ void MachineLICMBase::HoistRegionPostRA(MachineLoop *CurLoop,
540
582
// FIXME: That means a reload that're reused in successor block(s) will not
541
583
// be LICM'ed.
542
584
for (const auto &LI : BB->liveins ()) {
543
- for (MCRegAliasIterator AI (LI.PhysReg , TRI, true ); AI .isValid (); ++AI )
544
- PhysRegDefs .set (*AI );
585
+ for (MCRegUnitIterator RUI (LI.PhysReg , TRI); RUI .isValid (); ++RUI )
586
+ RUDefs .set (*RUI );
545
587
}
546
588
547
589
// Funclet entry blocks will clobber all registers
548
590
if (const uint32_t *Mask = BB->getBeginClobberMask (TRI))
549
- PhysRegClobbers. setBitsNotInMask ( Mask);
591
+ applyBitsNotInRegMaskToRegUnitsMask (*TRI, RUClobbers, Mask);
550
592
551
593
SpeculationState = SpeculateUnknown;
552
594
for (MachineInstr &MI : *BB)
553
- ProcessMI (&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates,
554
- CurLoop);
595
+ ProcessMI (&MI, RUDefs, RUClobbers, StoredFIs, Candidates, CurLoop);
555
596
}
556
597
557
598
// Gather the registers read / clobbered by the terminator.
558
- BitVector TermRegs (NumRegs );
599
+ BitVector TermRUs (NumRegUnits );
559
600
MachineBasicBlock::iterator TI = Preheader->getFirstTerminator ();
560
601
if (TI != Preheader->end ()) {
561
602
for (const MachineOperand &MO : TI->operands ()) {
@@ -564,8 +605,8 @@ void MachineLICMBase::HoistRegionPostRA(MachineLoop *CurLoop,
564
605
Register Reg = MO.getReg ();
565
606
if (!Reg)
566
607
continue ;
567
- for (MCRegAliasIterator AI (Reg, TRI, true ); AI .isValid (); ++AI )
568
- TermRegs .set (*AI );
608
+ for (MCRegUnitIterator RUI (Reg, TRI); RUI .isValid (); ++RUI )
609
+ TermRUs .set (*RUI );
569
610
}
570
611
}
571
612
@@ -583,24 +624,36 @@ void MachineLICMBase::HoistRegionPostRA(MachineLoop *CurLoop,
583
624
continue ;
584
625
585
626
unsigned Def = Candidate.Def ;
586
- if (!PhysRegClobbers.test (Def) && !TermRegs.test (Def)) {
587
- bool Safe = true ;
588
- MachineInstr *MI = Candidate.MI ;
589
- for (const MachineOperand &MO : MI->all_uses ()) {
590
- if (!MO.getReg ())
591
- continue ;
592
- Register Reg = MO.getReg ();
593
- if (PhysRegDefs.test (Reg) ||
594
- PhysRegClobbers.test (Reg)) {
627
+ bool Safe = true ;
628
+ for (MCRegUnitIterator RUI (Def, TRI); RUI.isValid (); ++RUI) {
629
+ if (RUClobbers.test (*RUI) || TermRUs.test (*RUI)) {
630
+ Safe = false ;
631
+ break ;
632
+ }
633
+ }
634
+
635
+ if (!Safe)
636
+ continue ;
637
+
638
+ MachineInstr *MI = Candidate.MI ;
639
+ for (const MachineOperand &MO : MI->all_uses ()) {
640
+ if (!MO.getReg ())
641
+ continue ;
642
+ for (MCRegUnitIterator RUI (MO.getReg (), TRI); RUI.isValid (); ++RUI) {
643
+ if (RUDefs.test (*RUI) || RUClobbers.test (*RUI)) {
595
644
// If it's using a non-loop-invariant register, then it's obviously
596
645
// not safe to hoist.
597
646
Safe = false ;
598
647
break ;
599
648
}
600
649
}
601
- if (Safe)
602
- HoistPostRA (MI, Candidate.Def , CurLoop, CurPreheader);
650
+
651
+ if (!Safe)
652
+ break ;
603
653
}
654
+
655
+ if (Safe)
656
+ HoistPostRA (MI, Candidate.Def , CurLoop, CurPreheader);
604
657
}
605
658
}
606
659
0 commit comments