@@ -2445,299 +2445,6 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
2445
2445
MI->eraseFromParent ();
2446
2446
return true ;
2447
2447
}
2448
- case AMDGPU::V_ADD_U32_e32:
2449
- case AMDGPU::V_ADD_U32_e64:
2450
- case AMDGPU::V_ADD_CO_U32_e32:
2451
- case AMDGPU::V_ADD_CO_U32_e64: {
2452
- // TODO: Handle sub, and, or.
2453
- unsigned NumDefs = MI->getNumExplicitDefs ();
2454
- unsigned Src0Idx = NumDefs;
2455
-
2456
- bool HasClamp = false ;
2457
- MachineOperand *VCCOp = nullptr ;
2458
-
2459
- switch (MI->getOpcode ()) {
2460
- case AMDGPU::V_ADD_U32_e32:
2461
- break ;
2462
- case AMDGPU::V_ADD_U32_e64:
2463
- HasClamp = MI->getOperand (3 ).getImm ();
2464
- break ;
2465
- case AMDGPU::V_ADD_CO_U32_e32:
2466
- VCCOp = &MI->getOperand (3 );
2467
- break ;
2468
- case AMDGPU::V_ADD_CO_U32_e64:
2469
- VCCOp = &MI->getOperand (1 );
2470
- HasClamp = MI->getOperand (4 ).getImm ();
2471
- break ;
2472
- default :
2473
- break ;
2474
- }
2475
- bool DeadVCC = !VCCOp || VCCOp->isDead ();
2476
- MachineOperand &DstOp = MI->getOperand (0 );
2477
- Register DstReg = DstOp.getReg ();
2478
-
2479
- unsigned OtherOpIdx =
2480
- FIOperandNum == Src0Idx ? FIOperandNum + 1 : Src0Idx;
2481
- MachineOperand *OtherOp = &MI->getOperand (OtherOpIdx);
2482
-
2483
- unsigned Src1Idx = Src0Idx + 1 ;
2484
- Register MaterializedReg = FrameReg;
2485
- Register ScavengedVGPR;
2486
-
2487
- if (FrameReg && !ST.enableFlatScratch ()) {
2488
- // We should just do an in-place update of the result register. However,
2489
- // the value there may also be used by the add, in which case we need a
2490
- // temporary register.
2491
- //
2492
- // FIXME: The scavenger is not finding the result register in the
2493
- // common case where the add does not read the register.
2494
-
2495
- ScavengedVGPR = RS->scavengeRegisterBackwards (
2496
- AMDGPU::VGPR_32RegClass, MI, /* RestoreAfter=*/ false , /* SPAdj=*/ 0 );
2497
-
2498
- // TODO: If we have a free SGPR, it's sometimes better to use a scalar
2499
- // shift.
2500
- BuildMI (*MBB, *MI, DL, TII->get (AMDGPU::V_LSHRREV_B32_e64))
2501
- .addDef (ScavengedVGPR, RegState::Renamable)
2502
- .addImm (ST.getWavefrontSizeLog2 ())
2503
- .addReg (FrameReg);
2504
- MaterializedReg = ScavengedVGPR;
2505
- }
2506
-
2507
- int64_t Offset = FrameInfo.getObjectOffset (Index);
2508
- // For the non-immediate case, we could fall through to the default
2509
- // handling, but we do an in-place update of the result register here to
2510
- // avoid scavenging another register.
2511
- if (OtherOp->isImm ()) {
2512
- OtherOp->setImm (OtherOp->getImm () + Offset);
2513
- Offset = 0 ;
2514
- }
2515
-
2516
- if ((!OtherOp->isImm () || OtherOp->getImm () != 0 ) && MaterializedReg) {
2517
- if (ST.enableFlatScratch () &&
2518
- !TII->isOperandLegal (*MI, Src1Idx, OtherOp)) {
2519
- // We didn't need the shift above, so we have an SGPR for the frame
2520
- // register, but may have a VGPR only operand.
2521
- //
2522
- // TODO: On gfx10+, we can easily change the opcode to the e64 version
2523
- // and use the higher constant bus restriction to avoid this copy.
2524
-
2525
- if (!ScavengedVGPR) {
2526
- ScavengedVGPR = RS->scavengeRegisterBackwards (
2527
- AMDGPU::VGPR_32RegClass, MI, /* RestoreAfter=*/ false ,
2528
- /* SPAdj=*/ 0 );
2529
- }
2530
-
2531
- assert (ScavengedVGPR != DstReg);
2532
-
2533
- BuildMI (*MBB, *MI, DL, TII->get (AMDGPU::V_MOV_B32_e32), ScavengedVGPR)
2534
- .addReg (MaterializedReg,
2535
- MaterializedReg != FrameReg ? RegState::Kill : 0 );
2536
- MaterializedReg = ScavengedVGPR;
2537
- }
2538
-
2539
- // TODO: In the flat scratch case, if this is an add of an SGPR, and SCC
2540
- // is not live, we could use a scalar add + vector add instead of 2
2541
- // vector adds.
2542
- auto AddI32 = BuildMI (*MBB, *MI, DL, TII->get (MI->getOpcode ()))
2543
- .addDef (DstReg, RegState::Renamable);
2544
- if (NumDefs == 2 )
2545
- AddI32.add (MI->getOperand (1 ));
2546
-
2547
- unsigned MaterializedRegFlags =
2548
- MaterializedReg != FrameReg ? RegState::Kill : 0 ;
2549
-
2550
- if (isVGPRClass (getPhysRegBaseClass (MaterializedReg))) {
2551
- // If we know we have a VGPR already, it's more likely the other
2552
- // operand is a legal vsrc0.
2553
- AddI32
2554
- .add (*OtherOp)
2555
- .addReg (MaterializedReg, MaterializedRegFlags);
2556
- } else {
2557
- // Commute operands to avoid violating VOP2 restrictions. This will
2558
- // typically happen when using scratch.
2559
- AddI32
2560
- .addReg (MaterializedReg, MaterializedRegFlags)
2561
- .add (*OtherOp);
2562
- }
2563
-
2564
- if (MI->getOpcode () == AMDGPU::V_ADD_CO_U32_e64 ||
2565
- MI->getOpcode () == AMDGPU::V_ADD_U32_e64)
2566
- AddI32.addImm (0 ); // clamp
2567
-
2568
- if (MI->getOpcode () == AMDGPU::V_ADD_CO_U32_e32)
2569
- AddI32.setOperandDead (3 ); // Dead vcc
2570
-
2571
- MaterializedReg = DstReg;
2572
-
2573
- OtherOp->ChangeToRegister (MaterializedReg, false );
2574
- OtherOp->setIsKill (true );
2575
- FIOp->ChangeToImmediate (Offset);
2576
- Offset = 0 ;
2577
- } else if (Offset != 0 ) {
2578
- assert (!MaterializedReg);
2579
- FIOp->ChangeToImmediate (Offset);
2580
- Offset = 0 ;
2581
- } else {
2582
- if (DeadVCC && !HasClamp) {
2583
- assert (Offset == 0 );
2584
-
2585
- // TODO: Losing kills and implicit operands. Just mutate to copy and
2586
- // let lowerCopy deal with it?
2587
- if (OtherOp->isReg () && OtherOp->getReg () == DstReg) {
2588
- // Folded to an identity copy.
2589
- MI->eraseFromParent ();
2590
- return true ;
2591
- }
2592
-
2593
- // The immediate value should be in OtherOp
2594
- MI->setDesc (TII->get (AMDGPU::V_MOV_B32_e32));
2595
- MI->removeOperand (FIOperandNum);
2596
-
2597
- unsigned NumOps = MI->getNumOperands ();
2598
- for (unsigned I = NumOps - 2 ; I >= 2 ; --I)
2599
- MI->removeOperand (I);
2600
-
2601
- if (NumDefs == 2 )
2602
- MI->removeOperand (1 );
2603
-
2604
- // The code below can't deal with a mov.
2605
- return true ;
2606
- }
2607
-
2608
- // This folded to a constant, but we have to keep the add around for
2609
- // pointless implicit defs or clamp modifier.
2610
- FIOp->ChangeToImmediate (0 );
2611
- }
2612
-
2613
- // Try to improve legality by commuting.
2614
- if (!TII->isOperandLegal (*MI, Src1Idx) && TII->commuteInstruction (*MI)) {
2615
- std::swap (FIOp, OtherOp);
2616
- std::swap (FIOperandNum, OtherOpIdx);
2617
- }
2618
-
2619
- for (unsigned SrcIdx : {Src1Idx, Src0Idx}) {
2620
- // Depending on operand constraints we may need to insert another copy.
2621
- if (!TII->isOperandLegal (*MI, SrcIdx)) {
2622
- // If commuting didn't make the operands legal, we need to materialize
2623
- // in a register.
2624
- // TODO: Can use SGPR on gfx10+ in some cases.
2625
- if (!ScavengedVGPR) {
2626
- ScavengedVGPR = RS->scavengeRegisterBackwards (
2627
- AMDGPU::VGPR_32RegClass, MI, /* RestoreAfter=*/ false ,
2628
- /* SPAdj=*/ 0 );
2629
- }
2630
-
2631
- assert (ScavengedVGPR != DstReg);
2632
-
2633
- MachineOperand &Src = MI->getOperand (SrcIdx);
2634
- BuildMI (*MBB, *MI, DL, TII->get (AMDGPU::V_MOV_B32_e32), ScavengedVGPR)
2635
- .add (Src);
2636
-
2637
- Src.ChangeToRegister (ScavengedVGPR, false );
2638
- Src.setIsKill (true );
2639
- }
2640
- }
2641
-
2642
- // Fold out add of 0 case that can appear in kernels.
2643
- if (FIOp->isImm () && FIOp->getImm () == 0 && DeadVCC && !HasClamp) {
2644
- if (OtherOp->isReg () && OtherOp->getReg () != DstReg) {
2645
- BuildMI (*MBB, *MI, DL, TII->get (AMDGPU::COPY), DstReg).add (*OtherOp);
2646
- }
2647
-
2648
- MI->eraseFromParent ();
2649
- }
2650
-
2651
- return true ;
2652
- }
2653
- case AMDGPU::S_ADD_I32: {
2654
- // TODO: Handle s_or_b32, s_and_b32.
2655
- unsigned OtherOpIdx = FIOperandNum == 1 ? 2 : 1 ;
2656
- MachineOperand &OtherOp = MI->getOperand (OtherOpIdx);
2657
-
2658
- assert (FrameReg || MFI->isBottomOfStack ());
2659
-
2660
- MachineOperand &DstOp = MI->getOperand (0 );
2661
- const DebugLoc &DL = MI->getDebugLoc ();
2662
- Register MaterializedReg = FrameReg;
2663
-
2664
- // Defend against live scc, which should never happen in practice.
2665
- bool DeadSCC = MI->getOperand (3 ).isDead ();
2666
-
2667
- Register TmpReg;
2668
-
2669
- if (FrameReg && !ST.enableFlatScratch ()) {
2670
- // FIXME: In the common case where the add does not also read its result
2671
- // (i.e. this isn't a reg += fi), it's not finding the dest reg as
2672
- // available.
2673
- TmpReg = RS->scavengeRegisterBackwards (AMDGPU::SReg_32_XM0RegClass, MI,
2674
- false , 0 );
2675
- BuildMI (*MBB, *MI, DL, TII->get (AMDGPU::S_LSHR_B32))
2676
- .addDef (TmpReg, RegState::Renamable)
2677
- .addReg (FrameReg)
2678
- .addImm (ST.getWavefrontSizeLog2 ())
2679
- .setOperandDead (3 ); // Set SCC dead
2680
- MaterializedReg = TmpReg;
2681
- }
2682
-
2683
- int64_t Offset = FrameInfo.getObjectOffset (Index);
2684
-
2685
- // For the non-immediate case, we could fall through to the default
2686
- // handling, but we do an in-place update of the result register here to
2687
- // avoid scavenging another register.
2688
- if (OtherOp.isImm ()) {
2689
- OtherOp.setImm (OtherOp.getImm () + Offset);
2690
- Offset = 0 ;
2691
-
2692
- if (MaterializedReg)
2693
- FIOp->ChangeToRegister (MaterializedReg, false );
2694
- else
2695
- FIOp->ChangeToImmediate (0 );
2696
- } else if (MaterializedReg) {
2697
- // If we can't fold the other operand, do another increment.
2698
- Register DstReg = DstOp.getReg ();
2699
-
2700
- if (!TmpReg && MaterializedReg == FrameReg) {
2701
- TmpReg = RS->scavengeRegisterBackwards (AMDGPU::SReg_32_XM0RegClass,
2702
- MI, false , 0 );
2703
- DstReg = TmpReg;
2704
- }
2705
-
2706
- auto AddI32 = BuildMI (*MBB, *MI, DL, TII->get (AMDGPU::S_ADD_I32))
2707
- .addDef (DstReg, RegState::Renamable)
2708
- .addReg (MaterializedReg, RegState::Kill)
2709
- .add (OtherOp);
2710
- if (DeadSCC)
2711
- AddI32.setOperandDead (3 );
2712
-
2713
- MaterializedReg = DstReg;
2714
-
2715
- OtherOp.ChangeToRegister (MaterializedReg, false );
2716
- OtherOp.setIsKill (true );
2717
- OtherOp.setIsRenamable (true );
2718
- FIOp->ChangeToImmediate (Offset);
2719
- } else {
2720
- // If we don't have any other offset to apply, we can just directly
2721
- // interpret the frame index as the offset.
2722
- FIOp->ChangeToImmediate (Offset);
2723
- }
2724
-
2725
- if (DeadSCC && OtherOp.isImm () && OtherOp.getImm () == 0 ) {
2726
- assert (Offset == 0 );
2727
- MI->removeOperand (3 );
2728
- MI->removeOperand (OtherOpIdx);
2729
- MI->setDesc (TII->get (FIOp->isReg () ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
2730
- } else if (DeadSCC && FIOp->isImm () && FIOp->getImm () == 0 ) {
2731
- assert (Offset == 0 );
2732
- MI->removeOperand (3 );
2733
- MI->removeOperand (FIOperandNum);
2734
- MI->setDesc (
2735
- TII->get (OtherOp.isReg () ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
2736
- }
2737
-
2738
- assert (!FIOp->isFI ());
2739
- return true ;
2740
- }
2741
2448
default : {
2742
2449
// Other access to frame index
2743
2450
const DebugLoc &DL = MI->getDebugLoc ();
0 commit comments