@@ -68,6 +68,8 @@ class AArch64ExpandPseudo : public MachineFunctionPass {
68
68
bool expandMOVImm (MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
69
69
unsigned BitSize);
70
70
71
+ bool expand_DestructiveOp (MachineInstr &MI, MachineBasicBlock &MBB,
72
+ MachineBasicBlock::iterator MBBI);
71
73
bool expandCMP_SWAP (MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
72
74
unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
73
75
unsigned ExtendImm, unsigned ZeroReg,
@@ -344,6 +346,176 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
344
346
return true ;
345
347
}
346
348
349
+ // / \brief Expand Pseudos to Instructions with destructive operands.
350
+ // /
351
+ // / This mechanism uses MOVPRFX instructions for zeroing the false lanes
352
+ // / or for fixing relaxed register allocation conditions to comply with
353
+ // / the instructions register constraints. The latter case may be cheaper
354
+ // / than setting the register constraints in the register allocator,
355
+ // / since that will insert regular MOV instructions rather than MOVPRFX.
356
+ // /
357
+ // / Example (after register allocation):
358
+ // /
359
+ // / FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
360
+ // /
361
+ // / * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
362
+ // / * We cannot map directly to FSUB_ZPmZ_B because the register
363
+ // / constraints of the instruction are not met.
364
+ // / * Also the _ZERO specifies the false lanes need to be zeroed.
365
+ // /
366
+ // / We first try to see if the destructive operand == result operand,
367
+ // / if not, we try to swap the operands, e.g.
368
+ // /
369
+ // / FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1
370
+ // /
371
+ // / But because FSUB_ZPmZ is not commutative, this is semantically
372
+ // / different, so we need a reverse instruction:
373
+ // /
374
+ // / FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
375
+ // /
376
+ // / Then we implement the zeroing of the false lanes of Z0 by adding
377
+ // / a zeroing MOVPRFX instruction:
378
+ // /
379
+ // / MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
380
+ // / FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
381
+ // /
382
+ // / Note that this can only be done for _ZERO or _UNDEF variants where
383
+ // / we can guarantee the false lanes to be zeroed (by implementing this)
384
+ // / or that they are undef (don't care / not used), otherwise the
385
+ // / swapping of operands is illegal because the operation is not
386
+ // / (or cannot be emulated to be) fully commutative.
387
+ bool AArch64ExpandPseudo::expand_DestructiveOp (
388
+ MachineInstr &MI,
389
+ MachineBasicBlock &MBB,
390
+ MachineBasicBlock::iterator MBBI) {
391
+ unsigned Opcode = AArch64::getSVEPseudoMap (MI.getOpcode ());
392
+ uint64_t DType = TII->get (Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
393
+ uint64_t FalseLanes = MI.getDesc ().TSFlags & AArch64::FalseLanesMask;
394
+ bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
395
+
396
+ unsigned DstReg = MI.getOperand (0 ).getReg ();
397
+ bool DstIsDead = MI.getOperand (0 ).isDead ();
398
+
399
+ if (DType == AArch64::DestructiveBinary)
400
+ assert (DstReg != MI.getOperand (3 ).getReg ());
401
+
402
+ bool UseRev = false ;
403
+ unsigned PredIdx, DOPIdx, SrcIdx;
404
+ switch (DType) {
405
+ case AArch64::DestructiveBinaryComm:
406
+ case AArch64::DestructiveBinaryCommWithRev:
407
+ if (DstReg == MI.getOperand (3 ).getReg ()) {
408
+ // FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1
409
+ std::tie (PredIdx, DOPIdx, SrcIdx) = std::make_tuple (1 , 3 , 2 );
410
+ UseRev = true ;
411
+ break ;
412
+ }
413
+ LLVM_FALLTHROUGH;
414
+ case AArch64::DestructiveBinary:
415
+ std::tie (PredIdx, DOPIdx, SrcIdx) = std::make_tuple (1 , 2 , 3 );
416
+ break ;
417
+ default :
418
+ llvm_unreachable (" Unsupported Destructive Operand type" );
419
+ }
420
+
421
+ #ifndef NDEBUG
422
+ // MOVPRFX can only be used if the destination operand
423
+ // is the destructive operand, not as any other operand,
424
+ // so the Destructive Operand must be unique.
425
+ bool DOPRegIsUnique = false ;
426
+ switch (DType) {
427
+ case AArch64::DestructiveBinaryComm:
428
+ case AArch64::DestructiveBinaryCommWithRev:
429
+ DOPRegIsUnique =
430
+ DstReg != MI.getOperand (DOPIdx).getReg () ||
431
+ MI.getOperand (DOPIdx).getReg () != MI.getOperand (SrcIdx).getReg ();
432
+ break ;
433
+ }
434
+
435
+ assert (DOPRegIsUnique && " The destructive operand should be unique" );
436
+ #endif
437
+
438
+ // Resolve the reverse opcode
439
+ if (UseRev) {
440
+ if (AArch64::getSVERevInstr (Opcode) != -1 )
441
+ Opcode = AArch64::getSVERevInstr (Opcode);
442
+ else if (AArch64::getSVEOrigInstr (Opcode) != -1 )
443
+ Opcode = AArch64::getSVEOrigInstr (Opcode);
444
+ }
445
+
446
+ // Get the right MOVPRFX
447
+ uint64_t ElementSize = TII->getElementSizeForOpcode (Opcode);
448
+ unsigned MovPrfx, MovPrfxZero;
449
+ switch (ElementSize) {
450
+ case AArch64::ElementSizeNone:
451
+ case AArch64::ElementSizeB:
452
+ MovPrfx = AArch64::MOVPRFX_ZZ;
453
+ MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
454
+ break ;
455
+ case AArch64::ElementSizeH:
456
+ MovPrfx = AArch64::MOVPRFX_ZZ;
457
+ MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
458
+ break ;
459
+ case AArch64::ElementSizeS:
460
+ MovPrfx = AArch64::MOVPRFX_ZZ;
461
+ MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
462
+ break ;
463
+ case AArch64::ElementSizeD:
464
+ MovPrfx = AArch64::MOVPRFX_ZZ;
465
+ MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
466
+ break ;
467
+ default :
468
+ llvm_unreachable (" Unsupported ElementSize" );
469
+ }
470
+
471
+ //
472
+ // Create the destructive operation (if required)
473
+ //
474
+ MachineInstrBuilder PRFX, DOP;
475
+ if (FalseZero) {
476
+ assert (ElementSize != AArch64::ElementSizeNone &&
477
+ " This instruction is unpredicated" );
478
+
479
+ // Merge source operand into destination register
480
+ PRFX = BuildMI (MBB, MBBI, MI.getDebugLoc (), TII->get (MovPrfxZero))
481
+ .addReg (DstReg, RegState::Define)
482
+ .addReg (MI.getOperand (PredIdx).getReg ())
483
+ .addReg (MI.getOperand (DOPIdx).getReg ());
484
+
485
+ // After the movprfx, the destructive operand is same as Dst
486
+ DOPIdx = 0 ;
487
+ } else if (DstReg != MI.getOperand (DOPIdx).getReg ()) {
488
+ PRFX = BuildMI (MBB, MBBI, MI.getDebugLoc (), TII->get (MovPrfx))
489
+ .addReg (DstReg, RegState::Define)
490
+ .addReg (MI.getOperand (DOPIdx).getReg ());
491
+ DOPIdx = 0 ;
492
+ }
493
+
494
+ //
495
+ // Create the destructive operation
496
+ //
497
+ DOP = BuildMI (MBB, MBBI, MI.getDebugLoc (), TII->get (Opcode))
498
+ .addReg (DstReg, RegState::Define | getDeadRegState (DstIsDead));
499
+
500
+ switch (DType) {
501
+ case AArch64::DestructiveBinaryComm:
502
+ case AArch64::DestructiveBinaryCommWithRev:
503
+ DOP.add (MI.getOperand (PredIdx))
504
+ .addReg (MI.getOperand (DOPIdx).getReg (), RegState::Kill)
505
+ .add (MI.getOperand (SrcIdx));
506
+ break ;
507
+ }
508
+
509
+ if (PRFX) {
510
+ finalizeBundle (MBB, PRFX->getIterator (), MBBI->getIterator ());
511
+ transferImpOps (MI, PRFX, DOP);
512
+ } else
513
+ transferImpOps (MI, DOP, DOP);
514
+
515
+ MI.eraseFromParent ();
516
+ return true ;
517
+ }
518
+
347
519
bool AArch64ExpandPseudo::expandSetTagLoop (
348
520
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
349
521
MachineBasicBlock::iterator &NextMBBI) {
@@ -425,6 +597,17 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
425
597
MachineBasicBlock::iterator &NextMBBI) {
426
598
MachineInstr &MI = *MBBI;
427
599
unsigned Opcode = MI.getOpcode ();
600
+
601
+ // Check if we can expand the destructive op
602
+ int OrigInstr = AArch64::getSVEPseudoMap (MI.getOpcode ());
603
+ if (OrigInstr != -1 ) {
604
+ auto &Orig = TII->get (OrigInstr);
605
+ if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask)
606
+ != AArch64::NotDestructive) {
607
+ return expand_DestructiveOp (MI, MBB, MBBI);
608
+ }
609
+ }
610
+
428
611
switch (Opcode) {
429
612
default :
430
613
break ;
0 commit comments