@@ -2260,23 +2260,24 @@ static unsigned getCommutedVPERMV3Opcode(unsigned Opcode) {
2260
2260
MachineInstr *X86InstrInfo::commuteInstructionImpl (MachineInstr &MI, bool NewMI,
2261
2261
unsigned OpIdx1,
2262
2262
unsigned OpIdx2) const {
2263
- auto cloneIfNew = [NewMI ](MachineInstr &MI) -> MachineInstr & {
2264
- if (NewMI)
2265
- return * MI.getParent ()->getParent ()->CloneMachineInstr (&MI);
2266
- return MI;
2263
+ auto CloneIfNew = [& ](MachineInstr &MI) {
2264
+ return std::exchange (NewMI, false )
2265
+ ? MI.getParent ()->getParent ()->CloneMachineInstr (&MI)
2266
+ : & MI;
2267
2267
};
2268
+ MachineInstr *WorkingMI = nullptr ;
2269
+ unsigned Opc = MI.getOpcode ();
2268
2270
2269
- switch (MI.getOpcode ()) {
2270
- case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I)
2271
- case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I)
2272
- case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I)
2273
- case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I)
2274
- case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I)
2275
- case X86::SHLD64rri8: { // A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B,
2276
- // (64-I)
2277
- unsigned Opc;
2271
+ switch (Opc) {
2272
+ // SHLD B, C, I <-> SHRD C, B, (BitWidth - I)
2273
+ case X86::SHRD16rri8:
2274
+ case X86::SHLD16rri8:
2275
+ case X86::SHRD32rri8:
2276
+ case X86::SHLD32rri8:
2277
+ case X86::SHRD64rri8:
2278
+ case X86::SHLD64rri8: {
2278
2279
unsigned Size ;
2279
- switch (MI. getOpcode () ) {
2280
+ switch (Opc ) {
2280
2281
default :
2281
2282
llvm_unreachable (" Unreachable!" );
2282
2283
case X86::SHRD16rri8:
@@ -2304,32 +2305,27 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
2304
2305
Opc = X86::SHRD64rri8;
2305
2306
break ;
2306
2307
}
2307
- unsigned Amt = MI.getOperand (3 ).getImm ();
2308
- auto &WorkingMI = cloneIfNew (MI);
2309
- WorkingMI.setDesc (get (Opc));
2310
- WorkingMI.getOperand (3 ).setImm (Size - Amt);
2311
- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2312
- OpIdx1, OpIdx2);
2308
+ WorkingMI = CloneIfNew (MI);
2309
+ WorkingMI->setDesc (get (Opc));
2310
+ WorkingMI->getOperand (3 ).setImm (Size - MI.getOperand (3 ).getImm ());
2311
+ break ;
2313
2312
}
2314
2313
case X86::PFSUBrr:
2315
- case X86::PFSUBRrr: {
2314
+ case X86::PFSUBRrr:
2316
2315
// PFSUB x, y: x = x - y
2317
2316
// PFSUBR x, y: x = y - x
2318
- unsigned Opc =
2319
- (X86::PFSUBRrr == MI.getOpcode () ? X86::PFSUBrr : X86::PFSUBRrr);
2320
- auto &WorkingMI = cloneIfNew (MI);
2321
- WorkingMI.setDesc (get (Opc));
2322
- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2323
- OpIdx1, OpIdx2);
2324
- }
2317
+ WorkingMI = CloneIfNew (MI);
2318
+ WorkingMI->setDesc (
2319
+ get (X86::PFSUBRrr == Opc ? X86::PFSUBrr : X86::PFSUBRrr));
2320
+ break ;
2325
2321
case X86::BLENDPDrri:
2326
2322
case X86::BLENDPSrri:
2327
2323
case X86::VBLENDPDrri:
2328
2324
case X86::VBLENDPSrri:
2329
2325
// If we're optimizing for size, try to use MOVSD/MOVSS.
2330
2326
if (MI.getParent ()->getParent ()->getFunction ().hasOptSize ()) {
2331
- unsigned Mask, Opc ;
2332
- switch (MI. getOpcode () ) {
2327
+ unsigned Mask;
2328
+ switch (Opc ) {
2333
2329
default :
2334
2330
llvm_unreachable (" Unreachable!" );
2335
2331
case X86::BLENDPDrri:
@@ -2350,12 +2346,10 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
2350
2346
break ;
2351
2347
}
2352
2348
if ((MI.getOperand (3 ).getImm () ^ Mask) == 1 ) {
2353
- auto &WorkingMI = cloneIfNew (MI);
2354
- WorkingMI.setDesc (get (Opc));
2355
- WorkingMI.removeOperand (3 );
2356
- return TargetInstrInfo::commuteInstructionImpl (WorkingMI,
2357
- /* NewMI=*/ false , OpIdx1,
2358
- OpIdx2);
2349
+ WorkingMI = CloneIfNew (MI);
2350
+ WorkingMI->setDesc (get (Opc));
2351
+ WorkingMI->removeOperand (3 );
2352
+ break ;
2359
2353
}
2360
2354
}
2361
2355
[[fallthrough]];
@@ -2367,7 +2361,7 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
2367
2361
case X86::VPBLENDDYrri:
2368
2362
case X86::VPBLENDWYrri: {
2369
2363
int8_t Mask;
2370
- switch (MI. getOpcode () ) {
2364
+ switch (Opc ) {
2371
2365
default :
2372
2366
llvm_unreachable (" Unreachable!" );
2373
2367
case X86::BLENDPDrri:
@@ -2408,10 +2402,9 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
2408
2402
// Using int8_t to ensure it will be sign extended to the int64_t that
2409
2403
// setImm takes in order to match isel behavior.
2410
2404
int8_t Imm = MI.getOperand (3 ).getImm () & Mask;
2411
- auto &WorkingMI = cloneIfNew (MI);
2412
- WorkingMI.getOperand (3 ).setImm (Mask ^ Imm);
2413
- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2414
- OpIdx1, OpIdx2);
2405
+ WorkingMI = CloneIfNew (MI);
2406
+ WorkingMI->getOperand (3 ).setImm (Mask ^ Imm);
2407
+ break ;
2415
2408
}
2416
2409
case X86::INSERTPSrr:
2417
2410
case X86::VINSERTPSrr:
@@ -2428,10 +2421,9 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
2428
2421
unsigned AltIdx = llvm::countr_zero ((ZMask | (1 << DstIdx)) ^ 15 );
2429
2422
assert (AltIdx < 4 && " Illegal insertion index" );
2430
2423
unsigned AltImm = (AltIdx << 6 ) | (AltIdx << 4 ) | ZMask;
2431
- auto &WorkingMI = cloneIfNew (MI);
2432
- WorkingMI.getOperand (MI.getNumOperands () - 1 ).setImm (AltImm);
2433
- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2434
- OpIdx1, OpIdx2);
2424
+ WorkingMI = CloneIfNew (MI);
2425
+ WorkingMI->getOperand (MI.getNumOperands () - 1 ).setImm (AltImm);
2426
+ break ;
2435
2427
}
2436
2428
return nullptr ;
2437
2429
}
@@ -2441,8 +2433,8 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
2441
2433
case X86::VMOVSSrr: {
2442
2434
// On SSE41 or later we can commute a MOVSS/MOVSD to a BLENDPS/BLENDPD.
2443
2435
if (Subtarget.hasSSE41 ()) {
2444
- unsigned Mask, Opc ;
2445
- switch (MI. getOpcode () ) {
2436
+ unsigned Mask;
2437
+ switch (Opc ) {
2446
2438
default :
2447
2439
llvm_unreachable (" Unreachable!" );
2448
2440
case X86::MOVSDrr:
@@ -2463,31 +2455,24 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
2463
2455
break ;
2464
2456
}
2465
2457
2466
- auto &WorkingMI = cloneIfNew (MI);
2467
- WorkingMI.setDesc (get (Opc));
2468
- WorkingMI.addOperand (MachineOperand::CreateImm (Mask));
2469
- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2470
- OpIdx1, OpIdx2);
2458
+ WorkingMI = CloneIfNew (MI);
2459
+ WorkingMI->setDesc (get (Opc));
2460
+ WorkingMI->addOperand (MachineOperand::CreateImm (Mask));
2461
+ break ;
2471
2462
}
2472
2463
2473
- // Convert to SHUFPD.
2474
- assert (MI.getOpcode () == X86::MOVSDrr &&
2475
- " Can only commute MOVSDrr without SSE4.1" );
2476
-
2477
- auto &WorkingMI = cloneIfNew (MI);
2478
- WorkingMI.setDesc (get (X86::SHUFPDrri));
2479
- WorkingMI.addOperand (MachineOperand::CreateImm (0x02 ));
2480
- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2481
- OpIdx1, OpIdx2);
2464
+ WorkingMI = CloneIfNew (MI);
2465
+ WorkingMI->setDesc (get (X86::SHUFPDrri));
2466
+ WorkingMI->addOperand (MachineOperand::CreateImm (0x02 ));
2467
+ break ;
2482
2468
}
2483
2469
case X86::SHUFPDrri: {
2484
2470
// Commute to MOVSD.
2485
2471
assert (MI.getOperand (3 ).getImm () == 0x02 && " Unexpected immediate!" );
2486
- auto &WorkingMI = cloneIfNew (MI);
2487
- WorkingMI.setDesc (get (X86::MOVSDrr));
2488
- WorkingMI.removeOperand (3 );
2489
- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2490
- OpIdx1, OpIdx2);
2472
+ WorkingMI = CloneIfNew (MI);
2473
+ WorkingMI->setDesc (get (X86::MOVSDrr));
2474
+ WorkingMI->removeOperand (3 );
2475
+ break ;
2491
2476
}
2492
2477
case X86::PCLMULQDQrr:
2493
2478
case X86::VPCLMULQDQrr:
@@ -2500,10 +2485,9 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
2500
2485
unsigned Imm = MI.getOperand (3 ).getImm ();
2501
2486
unsigned Src1Hi = Imm & 0x01 ;
2502
2487
unsigned Src2Hi = Imm & 0x10 ;
2503
- auto &WorkingMI = cloneIfNew (MI);
2504
- WorkingMI.getOperand (3 ).setImm ((Src1Hi << 4 ) | (Src2Hi >> 4 ));
2505
- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2506
- OpIdx1, OpIdx2);
2488
+ WorkingMI = CloneIfNew (MI);
2489
+ WorkingMI->getOperand (3 ).setImm ((Src1Hi << 4 ) | (Src2Hi >> 4 ));
2490
+ break ;
2507
2491
}
2508
2492
case X86::VPCMPBZ128rri:
2509
2493
case X86::VPCMPUBZ128rri:
@@ -2552,31 +2536,26 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
2552
2536
case X86::VPCMPWZ256rrik:
2553
2537
case X86::VPCMPUWZ256rrik:
2554
2538
case X86::VPCMPWZrrik:
2555
- case X86::VPCMPUWZrrik: {
2539
+ case X86::VPCMPUWZrrik:
2540
+ WorkingMI = CloneIfNew (MI);
2556
2541
// Flip comparison mode immediate (if necessary).
2557
- unsigned Imm = MI.getOperand (MI.getNumOperands () - 1 ).getImm () & 0x7 ;
2558
- Imm = X86::getSwappedVPCMPImm (Imm);
2559
- auto &WorkingMI = cloneIfNew (MI);
2560
- WorkingMI.getOperand (MI.getNumOperands () - 1 ).setImm (Imm);
2561
- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2562
- OpIdx1, OpIdx2);
2563
- }
2542
+ WorkingMI->getOperand (MI.getNumOperands () - 1 )
2543
+ .setImm (X86::getSwappedVPCMPImm (
2544
+ MI.getOperand (MI.getNumOperands () - 1 ).getImm () & 0x7 ));
2545
+ break ;
2564
2546
case X86::VPCOMBri:
2565
2547
case X86::VPCOMUBri:
2566
2548
case X86::VPCOMDri:
2567
2549
case X86::VPCOMUDri:
2568
2550
case X86::VPCOMQri:
2569
2551
case X86::VPCOMUQri:
2570
2552
case X86::VPCOMWri:
2571
- case X86::VPCOMUWri: {
2553
+ case X86::VPCOMUWri:
2554
+ WorkingMI = CloneIfNew (MI);
2572
2555
// Flip comparison mode immediate (if necessary).
2573
- unsigned Imm = MI.getOperand (3 ).getImm () & 0x7 ;
2574
- Imm = X86::getSwappedVPCOMImm (Imm);
2575
- auto &WorkingMI = cloneIfNew (MI);
2576
- WorkingMI.getOperand (3 ).setImm (Imm);
2577
- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2578
- OpIdx1, OpIdx2);
2579
- }
2556
+ WorkingMI->getOperand (3 ).setImm (
2557
+ X86::getSwappedVPCOMImm (MI.getOperand (3 ).getImm () & 0x7 ));
2558
+ break ;
2580
2559
case X86::VCMPSDZrr:
2581
2560
case X86::VCMPSSZrr:
2582
2561
case X86::VCMPPDZrri:
@@ -2594,35 +2573,28 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
2594
2573
case X86::VCMPPDZ128rrik:
2595
2574
case X86::VCMPPSZ128rrik:
2596
2575
case X86::VCMPPDZ256rrik:
2597
- case X86::VCMPPSZ256rrik: {
2598
- unsigned Imm =
2599
- MI.getOperand (MI.getNumExplicitOperands () - 1 ).getImm () & 0x1f ;
2600
- Imm = X86::getSwappedVCMPImm (Imm);
2601
- auto &WorkingMI = cloneIfNew (MI);
2602
- WorkingMI.getOperand (MI.getNumExplicitOperands () - 1 ).setImm (Imm);
2603
- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2604
- OpIdx1, OpIdx2);
2605
- }
2576
+ case X86::VCMPPSZ256rrik:
2577
+ WorkingMI = CloneIfNew (MI);
2578
+ WorkingMI->getOperand (MI.getNumExplicitOperands () - 1 )
2579
+ .setImm (X86::getSwappedVCMPImm (
2580
+ MI.getOperand (MI.getNumExplicitOperands () - 1 ).getImm () & 0x1f ));
2581
+ break ;
2606
2582
case X86::VPERM2F128rr:
2607
- case X86::VPERM2I128rr: {
2583
+ case X86::VPERM2I128rr:
2608
2584
// Flip permute source immediate.
2609
2585
// Imm & 0x02: lo = if set, select Op1.lo/hi else Op0.lo/hi.
2610
2586
// Imm & 0x20: hi = if set, select Op1.lo/hi else Op0.lo/hi.
2611
- int8_t Imm = MI.getOperand (3 ).getImm () & 0xFF ;
2612
- auto &WorkingMI = cloneIfNew (MI);
2613
- WorkingMI.getOperand (3 ).setImm (Imm ^ 0x22 );
2614
- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2615
- OpIdx1, OpIdx2);
2616
- }
2587
+ WorkingMI = CloneIfNew (MI);
2588
+ WorkingMI->getOperand (3 ).setImm ((MI.getOperand (3 ).getImm () & 0xFF ) ^ 0x22 );
2589
+ break ;
2617
2590
case X86::MOVHLPSrr:
2618
2591
case X86::UNPCKHPDrr:
2619
2592
case X86::VMOVHLPSrr:
2620
2593
case X86::VUNPCKHPDrr:
2621
2594
case X86::VMOVHLPSZrr:
2622
- case X86::VUNPCKHPDZ128rr: {
2595
+ case X86::VUNPCKHPDZ128rr:
2623
2596
assert (Subtarget.hasSSE2 () && " Commuting MOVHLP/UNPCKHPD requires SSE2!" );
2624
2597
2625
- unsigned Opc = MI.getOpcode ();
2626
2598
switch (Opc) {
2627
2599
default :
2628
2600
llvm_unreachable (" Unreachable!" );
@@ -2645,20 +2617,17 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
2645
2617
Opc = X86::VMOVHLPSZrr;
2646
2618
break ;
2647
2619
}
2648
- auto &WorkingMI = cloneIfNew (MI);
2649
- WorkingMI.setDesc (get (Opc));
2650
- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2651
- OpIdx1, OpIdx2);
2652
- }
2620
+ WorkingMI = CloneIfNew (MI);
2621
+ WorkingMI->setDesc (get (Opc));
2622
+ break ;
2653
2623
case X86::CMOV16rr:
2654
2624
case X86::CMOV32rr:
2655
2625
case X86::CMOV64rr: {
2656
- auto & WorkingMI = cloneIfNew (MI);
2626
+ WorkingMI = CloneIfNew (MI);
2657
2627
unsigned OpNo = MI.getDesc ().getNumOperands () - 1 ;
2658
2628
X86::CondCode CC = static_cast <X86::CondCode>(MI.getOperand (OpNo).getImm ());
2659
- WorkingMI.getOperand (OpNo).setImm (X86::GetOppositeBranchCondition (CC));
2660
- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2661
- OpIdx1, OpIdx2);
2629
+ WorkingMI->getOperand (OpNo).setImm (X86::GetOppositeBranchCondition (CC));
2630
+ break ;
2662
2631
}
2663
2632
case X86::VPTERNLOGDZrri:
2664
2633
case X86::VPTERNLOGDZrmi:
@@ -2702,34 +2671,25 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
2702
2671
case X86::VPTERNLOGQZ128rmbikz:
2703
2672
case X86::VPTERNLOGQZ256rmbikz:
2704
2673
case X86::VPTERNLOGQZrmbikz: {
2705
- auto &WorkingMI = cloneIfNew (MI);
2706
- commuteVPTERNLOG (WorkingMI, OpIdx1, OpIdx2);
2707
- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2708
- OpIdx1, OpIdx2);
2674
+ WorkingMI = CloneIfNew (MI);
2675
+ commuteVPTERNLOG (*WorkingMI, OpIdx1, OpIdx2);
2676
+ break ;
2709
2677
}
2710
- default : {
2711
- if (isCommutableVPERMV3Instruction (MI.getOpcode ())) {
2712
- unsigned Opc = getCommutedVPERMV3Opcode (MI.getOpcode ());
2713
- auto &WorkingMI = cloneIfNew (MI);
2714
- WorkingMI.setDesc (get (Opc));
2715
- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2716
- OpIdx1, OpIdx2);
2678
+ default :
2679
+ if (isCommutableVPERMV3Instruction (Opc)) {
2680
+ WorkingMI = CloneIfNew (MI);
2681
+ WorkingMI->setDesc (get (getCommutedVPERMV3Opcode (Opc)));
2682
+ break ;
2717
2683
}
2718
2684
2719
- const X86InstrFMA3Group *FMA3Group =
2720
- getFMA3Group (MI.getOpcode (), MI.getDesc ().TSFlags );
2721
- if (FMA3Group) {
2722
- unsigned Opc =
2723
- getFMA3OpcodeToCommuteOperands (MI, OpIdx1, OpIdx2, *FMA3Group);
2724
- auto &WorkingMI = cloneIfNew (MI);
2725
- WorkingMI.setDesc (get (Opc));
2726
- return TargetInstrInfo::commuteInstructionImpl (WorkingMI, /* NewMI=*/ false ,
2727
- OpIdx1, OpIdx2);
2685
+ if (auto *FMA3Group = getFMA3Group (Opc, MI.getDesc ().TSFlags )) {
2686
+ WorkingMI = CloneIfNew (MI);
2687
+ WorkingMI->setDesc (
2688
+ get (getFMA3OpcodeToCommuteOperands (MI, OpIdx1, OpIdx2, *FMA3Group)));
2689
+ break ;
2728
2690
}
2729
-
2730
- return TargetInstrInfo::commuteInstructionImpl (MI, NewMI, OpIdx1, OpIdx2);
2731
- }
2732
2691
}
2692
+ return TargetInstrInfo::commuteInstructionImpl (MI, NewMI, OpIdx1, OpIdx2);
2733
2693
}
2734
2694
2735
2695
bool X86InstrInfo::findThreeSrcCommutedOpIndices (const MachineInstr &MI,
0 commit comments