llvm · nikic · Sep 16, 2024 · Sep 11, 2024 · Sep 12, 2024 · Sep 13, 2024
diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -1203,19 +1203,6 @@ class TargetRegisterInfo : public MCRegisterInfo {
   virtual bool isNonallocatableRegisterCalleeSave(MCRegister Reg) const {
     return false;
   }
-
-  /// Returns if the architecture being targeted has the required Pseudo
-  /// Instructions for initializing the register. By default this returns false,
-  /// but where it is overriden for an architecture, the behaviour will be
-  /// different. This can either be a check to ensure the Register Class is
-  /// present, or to return true as an indication the architecture supports the
-  /// pass. If using the method that does not check for the Register Class, it
-  /// is imperative to ensure all required Pseudo Instructions are implemented,
-  /// otherwise compilation may fail with an `Unexpected register class` error.
-  virtual bool
-  doesRegClassHavePseudoInitUndef(const TargetRegisterClass *RC) const {
-    return false;
-  }
 };
 
 //===----------------------------------------------------------------------===//

diff --git a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
@@ -333,13 +333,13 @@ class TargetSubtargetInfo : public MCSubtargetInfo {
   /// Get the list of MacroFusion predicates.
   virtual std::vector<MacroFusionPredTy> getMacroFusions() const { return {}; };
 
-  /// supportsInitUndef is used to determine if an architecture supports
-  /// the Init Undef Pass. By default, it is assumed that it will not support
-  /// the pass, with architecture specific overrides providing the information
-  /// where they are implemented.
-  virtual bool supportsInitUndef() const { return false; }
+  /// Whether the target has instructions where an early-clobber result
+  /// operand cannot overlap with an undef input operand.
+  virtual bool requiresDisjointEarlyClobberAndUndef() const {
+    // Conservatively assume such instructions exist by default.
+    return true;
+  }
 };
-
 } // end namespace llvm
 
 #endif // LLVM_CODEGEN_TARGETSUBTARGETINFO_H
diff --git a/llvm/lib/CodeGen/InitUndef.cpp b/llvm/lib/CodeGen/InitUndef.cpp
@@ -120,8 +120,6 @@ bool InitUndef::handleReg(MachineInstr *MI) {
       continue;
     if (!UseMO.getReg().isVirtual())
       continue;
-    if (!TRI->doesRegClassHavePseudoInitUndef(MRI->getRegClass(UseMO.getReg())))
-      continue;
 
     if (UseMO.isUndef() || findImplictDefMIFromReg(UseMO.getReg(), MRI))
       Changed |= fixupIllOperand(MI, UseMO);
@@ -140,8 +138,6 @@ bool InitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI,
       continue;
     if (UseMO.isTied())
       continue;
-    if (!TRI->doesRegClassHavePseudoInitUndef(MRI->getRegClass(UseMO.getReg())))
-      continue;
 
     Register Reg = UseMO.getReg();
     if (NewRegs.count(Reg))
@@ -246,13 +242,9 @@ bool InitUndef::processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB,
 bool InitUndef::runOnMachineFunction(MachineFunction &MF) {
   ST = &MF.getSubtarget();
 
-  // supportsInitUndef is implemented to reflect if an architecture has support
-  // for the InitUndef pass. Support comes from having the relevant Pseudo
-  // instructions that can be used to initialize the register. The function
-  // returns false by default so requires an implementation per architecture.
-  // Support can be added by overriding the function in a way that best fits
-  // the architecture.
-  if (!ST->supportsInitUndef())
+  // The pass is only needed if early-clobber defs and undef ops cannot be
+  // allocated to the same register.
+  if (!ST->requiresDisjointEarlyClobberAndUndef())
     return false;
 
   MRI = &MF.getRegInfo();

diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1587,6 +1587,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
     // the nop.
     return true;
   }
+
+  bool requiresDisjointEarlyClobberAndUndef() const override {
+    // AMDGPU doesn't care if early-clobber and undef operands are allocated
+    // to the same register.
+    return false;
+  }
 };
 
 class GCNUserSGPRUsageInfo {

diff --git a/llvm/lib/Target/AMDGPU/R600Subtarget.h b/llvm/lib/Target/AMDGPU/R600Subtarget.h
@@ -160,6 +160,12 @@ class R600Subtarget final : public R600GenSubtargetInfo,
   unsigned getMinWavesPerEU() const override {
     return AMDGPU::IsaInfo::getMinWavesPerEU(this);
   }
+
+  bool requiresDisjointEarlyClobberAndUndef() const override {
+    // AMDGPU doesn't care if early-clobber and undef operands are allocated
+    // to the same register.
+    return false;
+  }
 };
 
 } // end namespace llvm

diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -240,20 +240,6 @@ class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
                             unsigned SrcSubReg) const override;
 
   int getSEHRegNum(unsigned i) const { return getEncodingValue(i); }
-
-  bool doesRegClassHavePseudoInitUndef(
-      const TargetRegisterClass *RC) const override {
-    (void)RC;
-    // For the ARM Architecture we want to always return true because all
-    // required PseudoInitUndef types have been added. If compilation fails due
-    // to `Unexpected register class`, this is likely to be because the specific
-    // register being used is not support by Init Undef and needs the Pseudo
-    // Instruction adding to ARMInstrInfo.td. If this is implemented as a
-    // conditional check, this could create a false positive where Init Undef is
-    // not running, skipping the instruction and moving to the next. This could
-    // lead to illegal instructions being generated by the register allocator.
-    return true;
-  }
 };
 
 } // end namespace llvm

diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -209,13 +209,6 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
     return &InstrInfo->getRegisterInfo();
   }
 
-  /// The correct instructions have been implemented to initialize undef
-  /// registers, therefore the ARM Architecture is supported by the Init Undef
-  /// Pass. This will return true as the pass needs to be supported for all
-  /// types of instructions. The pass will then perform more checks to ensure it
-  /// should be applying the Pseudo Instructions.
-  bool supportsInitUndef() const override { return true; }
-
   const CallLowering *getCallLowering() const override;
   InstructionSelector *getInstructionSelector() const override;
   const LegalizerInfo *getLegalizerInfo() const override;

diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
@@ -130,11 +130,6 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
                              const MachineFunction &MF, const VirtRegMap *VRM,
                              const LiveRegMatrix *Matrix) const override;
 
-  bool doesRegClassHavePseudoInitUndef(
-      const TargetRegisterClass *RC) const override {
-    return isVRRegClass(RC);
-  }
-
   static bool isVRRegClass(const TargetRegisterClass *RC) {
     return RISCVRI::isVRegClass(RC->TSFlags) &&
            RISCVRI::getNF(RC->TSFlags) == 1;

diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -306,8 +306,6 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
   unsigned getTailDupAggressiveThreshold() const {
     return TuneInfo->TailDupAggressiveThreshold;
   }
-
-  bool supportsInitUndef() const override { return hasVInstructions(); }
 };
 } // End llvm namespace
 

diff --git a/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll b/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll
@@ -354,11 +354,10 @@ define dso_local i32 @test_store_release_i64(i32, i64 %val, ptr %addr) {
 }
 
 ; The stxp result cannot be allocated to the same register as the inputs.
-; FIXME: This is a miscompile.
 define dso_local i32 @test_stxp_undef(ptr %p, i64 %x) nounwind {
 ; CHECK-LABEL: test_stxp_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    stxp w8, x8, x1, [x0]
+; CHECK-NEXT:    stxp w8, x9, x1, [x0]
 ; CHECK-NEXT:    mov w0, w8
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.aarch64.stxp(i64 undef, i64 %x, ptr %p)

diff --git a/llvm/test/CodeGen/AArch64/init-undef.mir b/llvm/test/CodeGen/AArch64/init-undef.mir
@@ -0,0 +1,27 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=aarch64-- -run-pass=init-undef -o - %s | FileCheck %s
+
+---
+name:            test_stxp_undef
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: test_stxp_undef
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr64 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[INIT_UNDEF:%[0-9]+]]:gpr64 = INIT_UNDEF
+    ; CHECK-NEXT: early-clobber %3:gpr32 = STXPX killed [[INIT_UNDEF]], [[COPY]], [[COPY1]] :: (volatile store (s128))
+    ; CHECK-NEXT: $w0 = COPY %3
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %1:gpr64 = COPY $x1
+    %0:gpr64common = COPY $x0
+    %3:gpr64 = IMPLICIT_DEF
+    early-clobber %2:gpr32 = STXPX killed %3, %1, %0 :: (volatile store (s128))
+    $w0 = COPY %2
+    RET_ReallyLR implicit $w0
+
+...
diff --git a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll
@@ -25,7 +25,7 @@ define void @last_chance_recoloring_failure() {
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 16 * vlenb
 ; CHECK-NEXT:    li a0, 55
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vloxseg2ei32.v v16, (a0), v8
+; CHECK-NEXT:    vloxseg2ei32.v v16, (a1), v8
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add a0, sp, a0
@@ -81,7 +81,7 @@ define void @last_chance_recoloring_failure() {
 ; SUBREGLIVENESS-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 16 * vlenb
 ; SUBREGLIVENESS-NEXT:    li a0, 55
 ; SUBREGLIVENESS-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; SUBREGLIVENESS-NEXT:    vloxseg2ei32.v v16, (a0), v8
+; SUBREGLIVENESS-NEXT:    vloxseg2ei32.v v16, (a1), v8
 ; SUBREGLIVENESS-NEXT:    csrr a0, vlenb
 ; SUBREGLIVENESS-NEXT:    slli a0, a0, 3
 ; SUBREGLIVENESS-NEXT:    add a0, sp, a0