@@ -65,6 +65,7 @@ STATISTIC(NumCopiesEliminated, "Number of copies of EFLAGS eliminated");
65
65
STATISTIC (NumSetCCsInserted, " Number of setCC instructions inserted" );
66
66
STATISTIC (NumTestsInserted, " Number of test instructions inserted" );
67
67
STATISTIC (NumAddsInserted, " Number of adds instructions inserted" );
68
+ STATISTIC (NumNFsConvertedTo, " Number of NF instructions converted to" );
68
69
69
70
namespace {
70
71
@@ -235,6 +236,19 @@ static MachineBasicBlock &splitBlock(MachineBasicBlock &MBB,
235
236
return NewMBB;
236
237
}
237
238
239
+ enum EFLAGSClobber { NoClobber, EvitableClobber, InevitableClobber };
240
+
241
+ static EFLAGSClobber getClobberType (const MachineInstr &MI) {
242
+ const MachineOperand *FlagDef =
243
+ MI.findRegisterDefOperand (X86::EFLAGS, /* TRI=*/ nullptr );
244
+ if (!FlagDef)
245
+ return NoClobber;
246
+ if (FlagDef->isDead () && X86::getNFVariant (MI.getOpcode ()))
247
+ return EvitableClobber;
248
+
249
+ return InevitableClobber;
250
+ }
251
+
238
252
bool X86FlagsCopyLoweringPass::runOnMachineFunction (MachineFunction &MF) {
239
253
LLVM_DEBUG (dbgs () << " ********** " << getPassName () << " : " << MF.getName ()
240
254
<< " **********\n " );
@@ -254,14 +268,107 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
254
268
// turn copied again we visit the first one first. This ensures we can find
255
269
// viable locations for testing the original EFLAGS that dominate all the
256
270
// uses across complex CFGs.
257
- SmallVector <MachineInstr *, 4 > Copies;
271
+ SmallSetVector <MachineInstr *, 4 > Copies;
258
272
ReversePostOrderTraversal<MachineFunction *> RPOT (&MF);
259
273
for (MachineBasicBlock *MBB : RPOT)
260
274
for (MachineInstr &MI : *MBB)
261
275
if (MI.getOpcode () == TargetOpcode::COPY &&
262
276
MI.getOperand (0 ).getReg () == X86::EFLAGS)
263
- Copies.push_back (&MI);
277
+ Copies.insert (&MI);
278
+
279
+ // Try to elminate the copys by transform the instructions between copy and
280
+ // copydef to the NF (no flags update) variants, e.g.
281
+ //
282
+ // %1:gr64 = COPY $eflags
283
+ // OP1 implicit-def dead $eflags
284
+ // $eflags = COPY %1
285
+ // OP2 cc, implicit $eflags
286
+ //
287
+ // ->
288
+ //
289
+ // OP1_NF
290
+ // OP2 implicit $eflags
291
+ if (Subtarget->hasNF ()) {
292
+ SmallSetVector<MachineInstr *, 4 > RemovedCopies;
293
+ // CopyIIt may be invalidated by removing copies.
294
+ auto CopyIIt = Copies.begin (), CopyIEnd = Copies.end ();
295
+ while (CopyIIt != CopyIEnd) {
296
+ auto NCopyIIt = std::next (CopyIIt);
297
+ SmallSetVector<MachineInstr *, 4 > EvitableClobbers;
298
+ MachineInstr *CopyI = *CopyIIt;
299
+ MachineOperand &VOp = CopyI->getOperand (1 );
300
+ MachineInstr *CopyDefI = MRI->getVRegDef (VOp.getReg ());
301
+ MachineBasicBlock *CopyIMBB = CopyI->getParent ();
302
+ MachineBasicBlock *CopyDefIMBB = CopyDefI->getParent ();
303
+ // Walk all basic blocks reachable in depth-first iteration on the inverse
304
+ // CFG from CopyIMBB to CopyDefIMBB. These blocks are all the blocks that
305
+ // may be executed between the execution of CopyDefIMBB and CopyIMBB. On
306
+ // all execution paths, instructions from CopyDefI to CopyI (exclusive)
307
+ // has to be NF-convertible if it clobbers flags.
308
+ for (auto BI = idf_begin (CopyIMBB), BE = idf_end (CopyDefIMBB); BI != BE;
309
+ ++BI) {
310
+ MachineBasicBlock *MBB = *BI;
311
+ for (auto I = (MBB != CopyDefIMBB)
312
+ ? MBB->begin ()
313
+ : std::next (MachineBasicBlock::iterator (CopyDefI)),
314
+ E = (MBB != CopyIMBB) ? MBB->end ()
315
+ : MachineBasicBlock::iterator (CopyI);
316
+ I != E; ++I) {
317
+ MachineInstr &MI = *I;
318
+ EFLAGSClobber ClobberType = getClobberType (MI);
319
+ if (ClobberType == NoClobber)
320
+ continue ;
321
+
322
+ if (ClobberType == InevitableClobber)
323
+ goto ProcessNextCopyI;
324
+
325
+ assert (ClobberType == EvitableClobber && " unexpected workflow" );
326
+ EvitableClobbers.insert (&MI);
327
+ }
328
+ }
329
+ // Covert evitable clobbers into NF variants and remove the copyies.
330
+ RemovedCopies.insert (CopyI);
331
+ CopyI->eraseFromParent ();
332
+ if (MRI->use_nodbg_empty (CopyDefI->getOperand (0 ).getReg ())) {
333
+ RemovedCopies.insert (CopyDefI);
334
+ CopyDefI->eraseFromParent ();
335
+ }
336
+ ++NumCopiesEliminated;
337
+ for (auto *Clobber : EvitableClobbers) {
338
+ unsigned NewOpc = X86::getNFVariant (Clobber->getOpcode ());
339
+ assert (NewOpc && " evitable clobber must have a NF variant" );
340
+ Clobber->setDesc (TII->get (NewOpc));
341
+ Clobber->removeOperand (
342
+ Clobber->findRegisterDefOperand (X86::EFLAGS, /* TRI=*/ nullptr )
343
+ ->getOperandNo ());
344
+ ++NumNFsConvertedTo;
345
+ }
346
+ // Update liveins for basic blocks in the path
347
+ for (auto BI = idf_begin (CopyIMBB), BE = idf_end (CopyDefIMBB); BI != BE;
348
+ ++BI)
349
+ if (*BI != CopyDefIMBB)
350
+ BI->addLiveIn (X86::EFLAGS);
351
+ ProcessNextCopyI:
352
+ CopyIIt = NCopyIIt;
353
+ }
354
+ Copies.set_subtract (RemovedCopies);
355
+ }
264
356
357
+ // For the rest of copies that cannot be eliminated by NF transform, we use
358
+ // setcc to preserve the flags in GPR32 before OP1, and recheck its value
359
+ // before using the flags, e.g.
360
+ //
361
+ // %1:gr64 = COPY $eflags
362
+ // OP1 implicit-def dead $eflags
363
+ // $eflags = COPY %1
364
+ // OP2 cc, implicit $eflags
365
+ //
366
+ // ->
367
+ //
368
+ // %1:gr8 = SETCCr cc, implicit $eflags
369
+ // OP1 implicit-def dead $eflags
370
+ // TEST8rr %1, %1, implicit-def $eflags
371
+ // OP2 ne, implicit $eflags
265
372
for (MachineInstr *CopyI : Copies) {
266
373
MachineBasicBlock &MBB = *CopyI->getParent ();
267
374
0 commit comments