Skip to content

Reland "RegisterCoalescer: Add implicit-def of super register when coalescing SUBREG_TO_REG" #134408

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: users/sdesmalen-arm/srlt-commute-implicit-def
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 70 additions & 16 deletions llvm/lib/CodeGen/RegisterCoalescer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,11 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate {
/// number if it is not zero. If DstReg is a physical register and the
/// existing subregister number of the def / use being updated is not zero,
/// make sure to set it to the correct physical subregister.
void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx);
///
/// If \p IsSubregToReg, we are coalescing a DstReg = SUBREG_TO_REG
/// SrcReg. This introduces an implicit-def of DstReg on coalesced users.
void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx,
bool IsSubregToReg);

/// If the given machine operand reads only undefined lanes add an undef
/// flag.
Expand Down Expand Up @@ -1444,6 +1448,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,

// CopyMI may have implicit operands, save them so that we can transfer them
// over to the newly materialized instruction after CopyMI is removed.
LaneBitmask NewMIImplicitOpsMask;
SmallVector<MachineOperand, 4> ImplicitOps;
ImplicitOps.reserve(CopyMI->getNumOperands() -
CopyMI->getDesc().getNumOperands());
Expand All @@ -1458,6 +1463,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
(MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) &&
"unexpected implicit virtual register def");
ImplicitOps.push_back(MO);
if (MO.isDef() && MO.getReg().isVirtual() &&
MRI->shouldTrackSubRegLiveness(DstReg))
NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
}
}

Expand Down Expand Up @@ -1500,14 +1508,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
} else {
assert(MO.getReg() == NewMI.getOperand(0).getReg());

// We're only expecting another def of the main output, so the range
// should get updated with the regular output range.
//
// FIXME: The range updating below probably needs updating to look at
// the super register if subranges are tracked.
assert(!MRI->shouldTrackSubRegLiveness(DstReg) &&
"subrange update for implicit-def of super register may not be "
"properly handled");
// If lanemasks need to be tracked, compile the lanemask of the NewMI
// implicit def operands to avoid subranges for the super-regs from
// being removed by code later on in this function.
if (MRI->shouldTrackSubRegLiveness(MO.getReg()))
NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
}
}
}
Expand All @@ -1531,7 +1536,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
MRI->setRegClass(DstReg, NewRC);

// Update machine operands and add flags.
updateRegDefsUses(DstReg, DstReg, DstIdx);
updateRegDefsUses(DstReg, DstReg, DstIdx, false);
NewMI.getOperand(0).setSubReg(NewIdx);
// updateRegDefUses can add an "undef" flag to the definition, since
// it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make
Expand Down Expand Up @@ -1607,7 +1612,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator();
for (LiveInterval::SubRange &SR : DstInt.subranges()) {
if ((SR.LaneMask & DstMask).none()) {
if ((SR.LaneMask & DstMask).none() &&
(SR.LaneMask & NewMIImplicitOpsMask).none()) {
LLVM_DEBUG(dbgs()
<< "Removing undefined SubRange "
<< PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
Expand Down Expand Up @@ -1872,7 +1878,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
}

void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
unsigned SubIdx) {
unsigned SubIdx, bool IsSubregToReg) {
bool DstIsPhys = DstReg.isPhysical();
LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);

Expand All @@ -1892,6 +1898,14 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
}
}

// If DstInt already has a subrange for the unused lanes, then we shouldn't
// create duplicate subranges when we update the interval for unused lanes.
LaneBitmask DefinedLanes;
if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
for (LiveInterval::SubRange &SR : DstInt->subranges())
DefinedLanes |= SR.LaneMask;
}

SmallPtrSet<MachineInstr *, 8> Visited;
for (MachineRegisterInfo::reg_instr_iterator I = MRI->reg_instr_begin(SrcReg),
E = MRI->reg_instr_end();
Expand All @@ -1915,15 +1929,21 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr())
Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));

bool FullDef = true;
bool DeadDef = false;

// Replace SrcReg with DstReg in all UseMI operands.
for (unsigned Op : Ops) {
MachineOperand &MO = UseMI->getOperand(Op);

// Adjust <undef> flags in case of sub-register joins. We don't want to
// turn a full def into a read-modify-write sub-register def and vice
// versa.
if (SubIdx && MO.isDef())
if (SubIdx && MO.isDef()) {
MO.setIsUndef(!Reads);
FullDef = false;
DeadDef = MO.isDead();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think dead flags are required to be accurate, it might be safer to check if LiveIntervals thinks it's really daed

}

// A subreg use of a partially undef (super) register may be a complete
// undef use now and then has to be marked that way.
Expand Down Expand Up @@ -1956,6 +1976,35 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
MO.substVirtReg(DstReg, SubIdx, *TRI);
}

if (IsSubregToReg && !FullDef && !DeadDef) {
// If the coalesed instruction doesn't fully define the register, we need
// to preserve the original super register liveness for SUBREG_TO_REG.
//
// We pretended SUBREG_TO_REG was a regular copy for coalescing purposes,
// but it introduces liveness for other subregisters. Downstream users may
// have been relying on those bits, so we need to ensure their liveness is
// captured with a def of other lanes.
//
// The implicit-def only needs adding if we track subregister liveness
// for this register, otherwise there is no point.

if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
assert(DstInt->hasSubRanges() &&
"SUBREG_TO_REG should have resulted in subrange");
LaneBitmask DstMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
LaneBitmask UnusedLanes = DstMask & ~UsedLanes & ~DefinedLanes;
if ((UnusedLanes).any()) {
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
DstInt->createSubRangeFrom(Allocator, UnusedLanes, *DstInt);
DefinedLanes |= UnusedLanes;
}

MachineInstrBuilder MIB(*MF, UseMI);
MIB.addReg(DstReg, RegState::ImplicitDefine);
}
}

LLVM_DEBUG({
dbgs() << "\t\tupdated: ";
if (!UseMI->isDebugInstr())
Expand Down Expand Up @@ -2157,6 +2206,8 @@ bool RegisterCoalescer::joinCopy(
});
}

const bool IsSubregToReg = CopyMI->isSubregToReg();

ShrinkMask = LaneBitmask::getNone();
ShrinkMainRange = false;

Expand Down Expand Up @@ -2226,9 +2277,12 @@ bool RegisterCoalescer::joinCopy(

// Rewrite all SrcReg operands to DstReg.
// Also update DstReg operands to include DstIdx if it is set.
if (CP.getDstIdx())
updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
if (CP.getDstIdx()) {
assert(!IsSubregToReg && "can this happen?");
updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx(), false);
}
updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(),
IsSubregToReg);

// Shrink subregister ranges if necessary.
if (ShrinkMask.any()) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios < %s | FileCheck %s
; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios -enable-subreg-liveness=false < %s | sed -e "/; kill: /d" | FileCheck %s
; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios -enable-subreg-liveness=true < %s | FileCheck %s

; Check there's no assert in spilling from implicit-def operands on an
; IMPLICIT_DEF.
Expand Down Expand Up @@ -92,7 +93,6 @@ define void @widget(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %a
; CHECK-NEXT: ldr x8, [sp, #40] ; 8-byte Folded Reload
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: mov x1, xzr
; CHECK-NEXT: ; kill: def $w8 killed $w8 killed $x8 def $x8
; CHECK-NEXT: str x8, [sp]
; CHECK-NEXT: bl _fprintf
; CHECK-NEXT: brk #0x1
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,12 @@ define i32 @caller() nounwind ssp {
; CHECK-NEXT: sub sp, sp, #208
; CHECK-NEXT: mov w8, #10 ; =0xa
; CHECK-NEXT: mov w9, #9 ; =0x9
; CHECK-NEXT: mov w10, #8 ; =0x8
; CHECK-NEXT: mov w0, #1 ; =0x1
; CHECK-NEXT: stp x9, x8, [sp, #24]
; CHECK-NEXT: mov w8, #7 ; =0x7
; CHECK-NEXT: mov w8, #8 ; =0x8
; CHECK-NEXT: mov w9, #6 ; =0x6
; CHECK-NEXT: mov w0, #1 ; =0x1
; CHECK-NEXT: str x8, [sp, #16]
; CHECK-NEXT: mov w8, #7 ; =0x7
; CHECK-NEXT: mov w1, #2 ; =0x2
; CHECK-NEXT: mov w2, #3 ; =0x3
; CHECK-NEXT: mov w3, #4 ; =0x4
Expand All @@ -46,8 +47,7 @@ define i32 @caller() nounwind ssp {
; CHECK-NEXT: stp x22, x21, [sp, #160] ; 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #176] ; 16-byte Folded Spill
; CHECK-NEXT: stp x29, x30, [sp, #192] ; 16-byte Folded Spill
; CHECK-NEXT: stp x8, x10, [sp, #8]
; CHECK-NEXT: str x9, [sp]
; CHECK-NEXT: stp x9, x8, [sp]
; CHECK-NEXT: bl _callee
; CHECK-NEXT: ldp x29, x30, [sp, #192] ; 16-byte Folded Reload
; CHECK-NEXT: ldp x20, x19, [sp, #176] ; 16-byte Folded Reload
Expand Down
51 changes: 51 additions & 0 deletions llvm/test/CodeGen/AArch64/reduced-coalescer-issue.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -enable-subreg-liveness=false < %s | FileCheck %s
; RUN: llc -enable-subreg-liveness=true < %s | FileCheck %s

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "aarch64-unknown-linux-gnu"

define void @_ZN4llvm5APInt6divideEPKmjS2_jPmS3_(i32 %lhsWords, i32 %rhsWords) {
; CHECK-LABEL: _ZN4llvm5APInt6divideEPKmjS2_jPmS3_:
; CHECK: // %bb.0:
; CHECK-NEXT: lsl w9, w0, #1
; CHECK-NEXT: mov w10, #1 // =0x1
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: sub w9, w9, w1, lsl #1
; CHECK-NEXT: bfi w0, w8, #1, #31
; CHECK-NEXT: lsr w9, w9, #1
; CHECK-NEXT: bfi w10, w9, #2, #30
; CHECK-NEXT: cmp w10, #0
; CHECK-NEXT: b.hs .LBB0_2
; CHECK-NEXT: // %bb.1: // %if.then15
; CHECK-NEXT: lsl x8, x0, #2
; CHECK-NEXT: ldr xzr, [x8]
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: b _Znam
%mul = shl i32 %rhsWords, 1
%mul1 = shl i32 %lhsWords, 1
%sub = sub i32 %mul1, %mul
%add7 = or i32 %mul1, 1
%idxprom = zext i32 %add7 to i64
%mul3 = shl i32 %sub, 1
%add4 = or i32 %mul3, 1
%1 = icmp ult i32 %add4, 0
br i1 %1, label %if.then15, label %3

common.ret: ; preds = %3, %if.then15
ret void

if.then15: ; preds = %0
%idxprom12 = zext i32 %add7 to i64
%arrayidx13 = getelementptr [128 x i32], ptr null, i64 0, i64 %idxprom12
%2 = load volatile ptr, ptr %arrayidx13, align 8
br label %common.ret

3: ; preds = %0
%call = tail call ptr @_Znam(i64 %idxprom)
br label %common.ret
}

declare ptr @_Znam(i64)
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=aarch64 -start-before=register-coalescer -stop-after=virtregrewriter -enable-subreg-liveness=false -o - %s | FileCheck %s --check-prefix=SRLT
# RUN: llc -mtriple=aarch64 -start-before=register-coalescer -stop-after=virtregrewriter -enable-subreg-liveness=true -o - %s | FileCheck %s --check-prefix=NOSRLT
---
name: test
tracksRegLiveness: true
body: |
bb.0:
liveins: $x1
; SRLT-LABEL: name: test
; SRLT: liveins: $x1
; SRLT-NEXT: {{ $}}
; SRLT-NEXT: renamable $x0 = COPY $x1
; SRLT-NEXT: renamable $w1 = ORRWrr $wzr, renamable $w0, implicit-def $x1
; SRLT-NEXT: RET_ReallyLR implicit $x1, implicit $x0
;
; NOSRLT-LABEL: name: test
; NOSRLT: liveins: $x1
; NOSRLT-NEXT: {{ $}}
; NOSRLT-NEXT: renamable $x0 = COPY $x1
; NOSRLT-NEXT: renamable $w1 = ORRWrr $wzr, renamable $w0, implicit-def renamable $x1
; NOSRLT-NEXT: RET_ReallyLR implicit $x1, implicit $x0
%190:gpr64 = COPY killed $x1
%191:gpr32 = COPY %190.sub_32:gpr64
%192:gpr32 = ORRWrr $wzr, killed %191:gpr32
%193:gpr64all = SUBREG_TO_REG 0, killed %192:gpr32, %subreg.sub_32
$x0 = COPY killed %190:gpr64
$x1 = COPY killed %193:gpr64all
RET_ReallyLR implicit $x1, implicit $x0
...
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
# CHECK-DBG: ********** JOINING INTERVALS ***********
# CHECK-DBG: ********** INTERVALS **********
# CHECK-DBG: %0 [16r,32r:0) 0@16r weight:0.000000e+00
# CHECK-DBG: %3 [48r,112r:0) 0@48r L0000000000000040 [48r,112r:0) 0@48r weight:0.000000e+00
# CHECK-DBG: %4 [80r,112e:1)[112e,112d:0) 0@112e 1@80r L0000000000000080 [112e,112d:0) 0@112e L0000000000000040 [80r,112e:1)[112e,112d:0) 0@112e 1@80r weight:0.000000e+00
# CHECK-DBG: %3 [48r,112r:0) 0@48r L0000000000000080 [48r,112r:0) 0@48r L0000000000000040 [48r,112r:0) 0@48r weight:0.000000e+00
# CHECK-DBG: %4 [80r,112e:1)[112e,112d:0) 0@112e 1@80r L0000000000000080 [80r,112e:1)[112e,112d:0) 0@112e 1@80r L0000000000000040 [80r,112e:1)[112e,112d:0) 0@112e 1@80r weight:0.000000e+00
# CHECK-DBG: %5 [32r,112r:1)[112r,112d:0) 0@112r 1@32r weight:0.000000e+00
---
name: test
Expand Down Expand Up @@ -43,7 +43,7 @@ body: |
# CHECK-DBG: %1 [32r,48B:2)[48B,320r:0)[320r,368B:1) 0@48B-phi 1@320r 2@32r
# CHECK-DBG-SAME: weight:0.000000e+00
# CHECK-DBG: %3 [80r,160B:2)[240r,272B:1)[288r,304B:0)[304B,320r:3) 0@288r 1@240r 2@80r 3@304B-phi
# CHECK-DBG-SAME: L0000000000000080 [288r,304B:0)[304B,320r:3) 0@288r 1@x 2@x 3@304B-phi
# CHECK-DBG-SAME: L0000000000000080 [240r,272B:1)[288r,304B:0)[304B,320r:3) 0@288r 1@240r 2@x 3@304B-phi
# CHECK-DBG-SAME: L0000000000000040 [80r,160B:2)[240r,272B:1)[288r,304B:0)[304B,320r:3) 0@288r 1@240r 2@80r 3@304B-phi
# CHECK-DBG-SAME: weight:0.000000e+00
---
Expand Down Expand Up @@ -127,3 +127,55 @@ body: |
B %bb.1

...
# Test that the interval `L0000000000000080 [112r,112d:1)` is not removed,
# when removing undefined subranges.
#
# CHECK-DBG: ********** REGISTER COALESCER **********
# CHECK-DBG: ********** Function: reproducer3
# CHECK-DBG: ********** JOINING INTERVALS ***********
# CHECK-DBG: ********** INTERVALS **********
# CHECK-DBG: W0 [0B,32r:0)[320r,336r:1) 0@0B-phi 1@320r
# CHECK-DBG: W1 [0B,16r:0) 0@0B-phi
# CHECK-DBG: %0 [16r,64r:0) 0@16r weight:0.000000e+00
# CHECK-DBG: %1 [32r,128r:0) 0@32r weight:0.000000e+00
# CHECK-DBG: %2 [48r,64r:0) 0@48r weight:0.000000e+00
# CHECK-DBG: %3 [64r,80r:0) 0@64r weight:0.000000e+00
# CHECK-DBG: %4 [80r,176r:0) 0@80r weight:0.000000e+00
# CHECK-DBG: %7 [112r,128r:1)[128r,256r:0)[304B,320r:0) 0@128r 1@112r
# CHECK-DBG-SAME: L0000000000000080 [112r,112d:1)[128r,256r:0)[304B,320r:0) 0@128r 1@112r
# CHECK-DBG-SAME: L0000000000000040 [112r,128r:1)[128r,256r:0)[304B,320r:0) 0@128r 1@112r
# CHECK-DBG-SAME: weight:0.000000e+00
# CHECK-DBG: %8 [96r,176r:1)[176r,192r:0) 0@176r 1@96r weight:0.000000e+00
# CHECK-DBG: %9 [256r,272r:0) 0@256r weight:0.000000e+00
---
name: reproducer3
tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1

%0:gpr32 = COPY killed $w1
%1:gpr32 = COPY killed $w0
%3:gpr32 = UBFMWri %1, 31, 30
%4:gpr32 = SUBWrs killed %3, killed %0, 1
%5:gpr32 = UBFMWri killed %4, 1, 31
%6:gpr32 = MOVi32imm 1
%7:gpr32 = COPY %6
%7:gpr32 = BFMWri %7, killed %1, 31, 30
%8:gpr64 = SUBREG_TO_REG 0, killed %7, %subreg.sub_32
%9:gpr32common = COPY killed %6
%9:gpr32common = BFMWri %9, killed %5, 30, 29
dead $wzr = SUBSWri killed %9, 0, 0, implicit-def $nzcv
Bcc 2, %bb.2, implicit killed $nzcv
B %bb.1

bb.1:
%10:gpr64common = UBFMXri killed %8, 62, 61
dead $xzr = LDRXui killed %10, 0
RET_ReallyLR

bb.2:
$x0 = COPY killed %8
RET_ReallyLR implicit killed $x0

...
7 changes: 3 additions & 4 deletions llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -329,11 +329,10 @@ define <2 x half> @chain_hi_to_lo_global() {
; GFX11-TRUE16: ; %bb.0: ; %bb
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 2
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v3, 0
; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 0
; GFX11-TRUE16-NEXT: global_load_d16_hi_b16 v0, v[1:2], off
; GFX11-TRUE16-NEXT: global_load_d16_hi_b16 v0, v[2:3], off
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@broxigarchen I noticed these tests changed, but I couldn't really tell whether these changes are functionally equivalent.

Copy link
Contributor

@broxigarchen broxigarchen Apr 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi Sander, these changes seems good to me. Since I am not familiar with this pass, I will leave the approval to the other reviewers

; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
Expand Down
Loading