Skip to content

Commit b548254

Browse files
arsenmjph-13
authored andcommitted
AMDGPU: Try to perform copy to agpr from reg_sequence at the copy (llvm#129463)
SIFoldOperands is frustratingly written in a def-folds-into-use iteration pattern, with a few random cases starting at the uses. We were handling this case by looking at the reg_sequence, and finding the copy. This did not work for the most basic pattern of materializing a vector constant that started in SGPRs. It just happens there is an optimization bug in SelectionDAG that produced the expected pattern. Perform an additional attempt at the fold rooted at the copy. This mostly shows test improvements. There were some tricky updates to perform. remaining-virtual-register-operands.ll managed to stop failing the allocator, so needed to be tricked into failing again. I also do not understand what schedule-xdl-resource.ll is trying to do for the test so this changes it to some random output that exists in the debug output.
1 parent 568995e commit b548254

15 files changed

+2051
-2458
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 106 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1510,76 +1510,128 @@ bool SIFoldOperandsImpl::foldCopyToAGPRRegSequence(MachineInstr *CopyMI) const {
15101510
// only accept VGPR or inline immediate. Recreate a reg_sequence with its
15111511
// initializers right here, so we will rematerialize immediates and avoid
15121512
// copies via different reg classes.
1513-
if (!TRI->isAGPR(*MRI, CopyMI->getOperand(0).getReg()))
1513+
const TargetRegisterClass *DefRC =
1514+
MRI->getRegClass(CopyMI->getOperand(0).getReg());
1515+
if (!TRI->isAGPRClass(DefRC))
15141516
return false;
1517+
15151518
Register UseReg = CopyMI->getOperand(1).getReg();
1516-
SmallVector<std::pair<MachineOperand *, unsigned>, 32> Defs;
1517-
if (!getRegSeqInit(Defs, UseReg, AMDGPU::OPERAND_REG_INLINE_C_INT32))
1519+
MachineInstr *RegSeq = MRI->getVRegDef(UseReg);
1520+
if (!RegSeq || !RegSeq->isRegSequence())
15181521
return false;
15191522

15201523
const DebugLoc &DL = CopyMI->getDebugLoc();
15211524
MachineBasicBlock &MBB = *CopyMI->getParent();
15221525

1526+
MachineInstrBuilder B(*MBB.getParent(), CopyMI);
1527+
DenseMap<TargetInstrInfo::RegSubRegPair, Register> VGPRCopies;
1528+
SmallSetVector<TargetInstrInfo::RegSubRegPair, 32> SeenInputs;
1529+
1530+
const TargetRegisterClass *UseRC =
1531+
MRI->getRegClass(CopyMI->getOperand(1).getReg());
1532+
1533+
// Value, subregindex for new REG_SEQUENCE
1534+
SmallVector<std::pair<MachineOperand *, unsigned>, 32> NewDefs;
1535+
1536+
unsigned NumRegSeqOperands = RegSeq->getNumOperands();
1537+
unsigned NumFoldable = 0;
1538+
1539+
for (unsigned I = 1; I != NumRegSeqOperands; I += 2) {
1540+
MachineOperand &RegOp = RegSeq->getOperand(I);
1541+
unsigned SubRegIdx = RegSeq->getOperand(I + 1).getImm();
1542+
1543+
if (RegOp.getSubReg()) {
1544+
// TODO: Handle subregister compose
1545+
NewDefs.emplace_back(&RegOp, SubRegIdx);
1546+
continue;
1547+
}
1548+
1549+
MachineOperand *Lookup = lookUpCopyChain(*TII, *MRI, RegOp.getReg());
1550+
if (!Lookup)
1551+
Lookup = &RegOp;
1552+
1553+
if (Lookup->isImm()) {
1554+
// Check if this is an agpr_32 subregister.
1555+
const TargetRegisterClass *DestSuperRC = TRI->getMatchingSuperRegClass(
1556+
DefRC, &AMDGPU::AGPR_32RegClass, SubRegIdx);
1557+
if (DestSuperRC &&
1558+
TII->isInlineConstant(*Lookup, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
1559+
++NumFoldable;
1560+
NewDefs.emplace_back(Lookup, SubRegIdx);
1561+
continue;
1562+
}
1563+
}
1564+
1565+
const TargetRegisterClass *InputRC =
1566+
Lookup->isReg() ? MRI->getRegClass(Lookup->getReg())
1567+
: MRI->getRegClass(RegOp.getReg());
1568+
1569+
// TODO: Account for Lookup->getSubReg()
1570+
1571+
// If we can't find a matching super class, this is an SGPR->AGPR or
1572+
// VGPR->AGPR subreg copy (or something constant-like we have to materialize
1573+
// in the AGPR). We can't directly copy from SGPR to AGPR on gfx908, so we
1574+
// want to rewrite to copy to an intermediate VGPR class.
1575+
const TargetRegisterClass *MatchRC =
1576+
TRI->getMatchingSuperRegClass(DefRC, InputRC, SubRegIdx);
1577+
if (!MatchRC) {
1578+
++NumFoldable;
1579+
NewDefs.emplace_back(&RegOp, SubRegIdx);
1580+
continue;
1581+
}
1582+
1583+
NewDefs.emplace_back(&RegOp, SubRegIdx);
1584+
}
1585+
1586+
// Do not clone a reg_sequence and merely change the result register class.
1587+
if (NumFoldable == 0)
1588+
return false;
1589+
15231590
CopyMI->setDesc(TII->get(AMDGPU::REG_SEQUENCE));
15241591
for (unsigned I = CopyMI->getNumOperands() - 1; I > 0; --I)
15251592
CopyMI->removeOperand(I);
15261593

1527-
MachineInstrBuilder B(*MBB.getParent(), CopyMI);
1528-
DenseMap<TargetInstrInfo::RegSubRegPair, Register> VGPRCopies;
1529-
SmallSetVector<TargetInstrInfo::RegSubRegPair, 32> SeenAGPRs;
1530-
for (unsigned I = 0, NumElts = Defs.size(); I != NumElts; ++I) {
1531-
MachineOperand *Def = Defs[I].first;
1532-
TargetInstrInfo::RegSubRegPair CopyToVGPR;
1533-
if (Def->isImm() &&
1534-
TII->isInlineConstant(*Def, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
1535-
int64_t Imm = Def->getImm();
1536-
1537-
auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
1594+
for (auto [Def, DestSubIdx] : NewDefs) {
1595+
if (!Def->isReg()) {
1596+
// TODO: Should we use single write for each repeated value like in
1597+
// register case?
1598+
Register Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
15381599
BuildMI(MBB, CopyMI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp)
1539-
.addImm(Imm);
1600+
.add(*Def);
15401601
B.addReg(Tmp);
1541-
} else if (Def->isReg() && TRI->isAGPR(*MRI, Def->getReg())) {
1542-
auto Src = getRegSubRegPair(*Def);
1602+
} else {
1603+
TargetInstrInfo::RegSubRegPair Src = getRegSubRegPair(*Def);
15431604
Def->setIsKill(false);
1544-
if (!SeenAGPRs.insert(Src)) {
1605+
1606+
Register &VGPRCopy = VGPRCopies[Src];
1607+
if (!VGPRCopy) {
1608+
const TargetRegisterClass *VGPRUseSubRC =
1609+
TRI->getSubRegisterClass(UseRC, DestSubIdx);
1610+
15451611
// We cannot build a reg_sequence out of the same registers, they
15461612
// must be copied. Better do it here before copyPhysReg() created
15471613
// several reads to do the AGPR->VGPR->AGPR copy.
1548-
CopyToVGPR = Src;
1549-
} else {
1550-
B.addReg(Src.Reg, Def->isUndef() ? RegState::Undef : 0, Src.SubReg);
1551-
}
1552-
} else {
1553-
assert(Def->isReg());
1554-
Def->setIsKill(false);
1555-
auto Src = getRegSubRegPair(*Def);
15561614

1557-
// Direct copy from SGPR to AGPR is not possible. To avoid creation
1558-
// of exploded copies SGPR->VGPR->AGPR in the copyPhysReg() later,
1559-
// create a copy here and track if we already have such a copy.
1560-
if (TRI->isSGPRReg(*MRI, Src.Reg)) {
1561-
CopyToVGPR = Src;
1615+
// Direct copy from SGPR to AGPR is not possible on gfx908. To avoid
1616+
// creation of exploded copies SGPR->VGPR->AGPR in the copyPhysReg()
1617+
// later, create a copy here and track if we already have such a copy.
1618+
if (TRI->getSubRegisterClass(MRI->getRegClass(Src.Reg), Src.SubReg) !=
1619+
VGPRUseSubRC) {
1620+
VGPRCopy = MRI->createVirtualRegister(VGPRUseSubRC);
1621+
BuildMI(MBB, CopyMI, DL, TII->get(AMDGPU::COPY), VGPRCopy).add(*Def);
1622+
B.addReg(VGPRCopy);
1623+
} else {
1624+
// If it is already a VGPR, do not copy the register.
1625+
B.add(*Def);
1626+
}
15621627
} else {
1563-
auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
1564-
BuildMI(MBB, CopyMI, DL, TII->get(AMDGPU::COPY), Tmp).add(*Def);
1565-
B.addReg(Tmp);
1628+
B.addReg(VGPRCopy);
15661629
}
15671630
}
15681631

1569-
if (CopyToVGPR.Reg) {
1570-
auto [It, Inserted] = VGPRCopies.try_emplace(CopyToVGPR);
1571-
Register &Vgpr = It->second;
1572-
if (Inserted) {
1573-
Vgpr = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1574-
BuildMI(MBB, CopyMI, DL, TII->get(AMDGPU::COPY), Vgpr).add(*Def);
1575-
}
1576-
Register Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
1577-
BuildMI(MBB, CopyMI, DL, TII->get(AMDGPU::COPY), Tmp).addReg(Vgpr);
1578-
B.addReg(Tmp);
1579-
}
1580-
1581-
B.addImm(Defs[I].second);
1632+
B.addImm(DestSubIdx);
15821633
}
1634+
15831635
LLVM_DEBUG(dbgs() << "Folded " << *CopyMI);
15841636
return true;
15851637
}
@@ -1634,6 +1686,13 @@ bool SIFoldOperandsImpl::tryFoldFoldableCopy(
16341686
foldCopyToVGPROfScalarAddOfFrameIndex(DstReg, OpToFold.getReg(), MI))
16351687
return true;
16361688

1689+
// Fold copy to AGPR through reg_sequence
1690+
// TODO: Handle with subregister extract
1691+
if (OpToFold.isReg() && MI.isCopy() && !MI.getOperand(1).getSubReg()) {
1692+
if (foldCopyToAGPRRegSequence(&MI))
1693+
return true;
1694+
}
1695+
16371696
bool Changed = foldInstOperand(MI, OpToFold);
16381697

16391698
// If we managed to fold all uses of this copy then we might as well

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll

Lines changed: 31 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,15 @@ define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(ptr addrspace(1) %arg) #
1616
; GCN-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
1717
; GCN-NEXT: s_mov_b64 s[36:37], 1
1818
; GCN-NEXT: v_pk_mov_b32 v[0:1], s[36:37], s[36:37] op_sel:[0,1]
19-
; GCN-NEXT: s_mov_b32 s36, 2
20-
; GCN-NEXT: v_pk_mov_b32 v[2:3], s[36:37], s[36:37] op_sel:[0,1]
19+
; GCN-NEXT: s_mov_b32 s38, 2
20+
; GCN-NEXT: s_mov_b32 s39, s37
2121
; GCN-NEXT: s_waitcnt lgkmcnt(0)
2222
; GCN-NEXT: s_load_dwordx16 s[0:15], s[34:35], 0x0
2323
; GCN-NEXT: s_load_dwordx16 s[16:31], s[34:35], 0x40
24+
; GCN-NEXT: v_pk_mov_b32 v[2:3], s[38:39], s[38:39] op_sel:[0,1]
2425
; GCN-NEXT: s_waitcnt lgkmcnt(0)
2526
; GCN-NEXT: v_accvgpr_write_b32 a0, s0
27+
; GCN-NEXT: v_accvgpr_write_b32 a16, s16
2628
; GCN-NEXT: v_accvgpr_write_b32 a1, s1
2729
; GCN-NEXT: v_accvgpr_write_b32 a2, s2
2830
; GCN-NEXT: v_accvgpr_write_b32 a3, s3
@@ -38,7 +40,6 @@ define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(ptr addrspace(1) %arg) #
3840
; GCN-NEXT: v_accvgpr_write_b32 a13, s13
3941
; GCN-NEXT: v_accvgpr_write_b32 a14, s14
4042
; GCN-NEXT: v_accvgpr_write_b32 a15, s15
41-
; GCN-NEXT: v_accvgpr_write_b32 a16, s16
4243
; GCN-NEXT: v_accvgpr_write_b32 a17, s17
4344
; GCN-NEXT: v_accvgpr_write_b32 a18, s18
4445
; GCN-NEXT: v_accvgpr_write_b32 a19, s19
@@ -317,31 +318,29 @@ bb:
317318
define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, double %a, double %b) #0 {
318319
; GCN-LABEL: test_mfma_f64_16x16x4f64_imm:
319320
; GCN: ; %bb.0: ; %bb
320-
; GCN-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
321-
; GCN-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x34
322-
; GCN-NEXT: s_mov_b64 s[0:1], 0
321+
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
322+
; GCN-NEXT: s_load_dwordx2 s[10:11], s[4:5], 0x34
323323
; GCN-NEXT: s_mov_b64 s[6:7], 1.0
324-
; GCN-NEXT: s_mov_b64 s[2:3], s[0:1]
324+
; GCN-NEXT: s_mov_b64 s[8:9], 0
325+
; GCN-NEXT: v_accvgpr_write_b32 a0, s8
325326
; GCN-NEXT: s_waitcnt lgkmcnt(0)
326-
; GCN-NEXT: v_pk_mov_b32 v[0:1], s[10:11], s[10:11] op_sel:[0,1]
327-
; GCN-NEXT: s_mov_b64 s[4:5], s[0:1]
328-
; GCN-NEXT: v_accvgpr_write_b32 a0, s0
329-
; GCN-NEXT: v_accvgpr_write_b32 a1, s1
330-
; GCN-NEXT: v_accvgpr_write_b32 a2, s2
331-
; GCN-NEXT: v_accvgpr_write_b32 a3, s3
332-
; GCN-NEXT: v_accvgpr_write_b32 a4, s4
333-
; GCN-NEXT: v_accvgpr_write_b32 a5, s5
327+
; GCN-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
328+
; GCN-NEXT: v_accvgpr_write_b32 a2, s8
329+
; GCN-NEXT: v_accvgpr_write_b32 a4, s8
334330
; GCN-NEXT: v_accvgpr_write_b32 a6, s6
331+
; GCN-NEXT: v_accvgpr_write_b32 a1, s9
332+
; GCN-NEXT: v_accvgpr_write_b32 a3, s9
333+
; GCN-NEXT: v_accvgpr_write_b32 a5, s9
335334
; GCN-NEXT: v_accvgpr_write_b32 a7, s7
336-
; GCN-NEXT: v_pk_mov_b32 v[2:3], s[12:13], s[12:13] op_sel:[0,1]
335+
; GCN-NEXT: v_pk_mov_b32 v[2:3], s[10:11], s[10:11] op_sel:[0,1]
337336
; GCN-NEXT: s_nop 1
338337
; GCN-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7]
339338
; GCN-NEXT: v_mov_b32_e32 v0, 0
340339
; GCN-NEXT: s_nop 7
341340
; GCN-NEXT: s_nop 7
342341
; GCN-NEXT: s_nop 0
343-
; GCN-NEXT: global_store_dwordx4 v0, a[0:3], s[8:9]
344-
; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[8:9] offset:16
342+
; GCN-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
343+
; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
345344
; GCN-NEXT: s_endpgm
346345
bb:
347346
%mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> <double 0.0, double 0.0, double 0.0, double 1.0>, i32 0, i32 0, i32 0)
@@ -352,32 +351,29 @@ bb:
352351
define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) %arg, double %a, double %b) #0 {
353352
; GCN-LABEL: test_mfma_f64_16x16x4f64_splat_lit:
354353
; GCN: ; %bb.0: ; %bb
355-
; GCN-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
356-
; GCN-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x34
357-
; GCN-NEXT: s_mov_b32 s0, 0
358-
; GCN-NEXT: s_mov_b32 s1, 0x405ec000
359-
; GCN-NEXT: s_mov_b64 s[2:3], s[0:1]
354+
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
355+
; GCN-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x34
356+
; GCN-NEXT: s_mov_b32 s6, 0
357+
; GCN-NEXT: s_mov_b32 s7, 0x405ec000
358+
; GCN-NEXT: v_accvgpr_write_b32 a0, s6
360359
; GCN-NEXT: s_waitcnt lgkmcnt(0)
361-
; GCN-NEXT: v_pk_mov_b32 v[0:1], s[10:11], s[10:11] op_sel:[0,1]
362-
; GCN-NEXT: s_mov_b64 s[4:5], s[0:1]
363-
; GCN-NEXT: s_mov_b64 s[6:7], s[0:1]
364-
; GCN-NEXT: v_accvgpr_write_b32 a0, s0
365-
; GCN-NEXT: v_accvgpr_write_b32 a1, s1
366-
; GCN-NEXT: v_accvgpr_write_b32 a2, s2
367-
; GCN-NEXT: v_accvgpr_write_b32 a3, s3
368-
; GCN-NEXT: v_accvgpr_write_b32 a4, s4
369-
; GCN-NEXT: v_accvgpr_write_b32 a5, s5
360+
; GCN-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
361+
; GCN-NEXT: v_accvgpr_write_b32 a2, s6
362+
; GCN-NEXT: v_accvgpr_write_b32 a4, s6
370363
; GCN-NEXT: v_accvgpr_write_b32 a6, s6
364+
; GCN-NEXT: v_accvgpr_write_b32 a1, s7
365+
; GCN-NEXT: v_accvgpr_write_b32 a3, s7
366+
; GCN-NEXT: v_accvgpr_write_b32 a5, s7
371367
; GCN-NEXT: v_accvgpr_write_b32 a7, s7
372-
; GCN-NEXT: v_pk_mov_b32 v[2:3], s[12:13], s[12:13] op_sel:[0,1]
368+
; GCN-NEXT: v_pk_mov_b32 v[2:3], s[8:9], s[8:9] op_sel:[0,1]
373369
; GCN-NEXT: s_nop 1
374370
; GCN-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7]
375371
; GCN-NEXT: v_mov_b32_e32 v0, 0
376372
; GCN-NEXT: s_nop 7
377373
; GCN-NEXT: s_nop 7
378374
; GCN-NEXT: s_nop 0
379-
; GCN-NEXT: global_store_dwordx4 v0, a[0:3], s[8:9]
380-
; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[8:9] offset:16
375+
; GCN-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
376+
; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
381377
; GCN-NEXT: s_endpgm
382378
bb:
383379
%mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> <double 123.0, double 123.0, double 123.0, double 123.0>, i32 0, i32 0, i32 0)

llvm/test/CodeGen/AMDGPU/acc-ldst.ll

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -190,11 +190,11 @@ bb:
190190

191191
; NB: for atomics both vdata and vdst shall be either VGPR or AGPR
192192
; GCN-LABEL: {{^}}test_atomic_mfma_4xi32_atomic_store:
193+
; GCN: v_accvgpr_write_b32 [[A_ZERO:a[0-9]+]], 0
193194
; GCN: global_atomic_sub [[IN:v[0-9]+]], v{{[0-9:]+}}, v{{[0-9]+}}, s[{{[0-9:]+}}] glc
195+
; GCN-DAG: v_accvgpr_mov_b32 a{{[0-9]+}}, [[A_ZERO]]
196+
; GCN-DAG: v_accvgpr_mov_b32 a{{[0-9]+}}, [[A_ZERO]]
194197
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, [[IN]]
195-
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
196-
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
197-
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
198198
; GCN: v_mfma_i32_4x4x4i8 a[[[N:[0-9]+]]:
199199
; GCN: v_accvgpr_read_b32 [[V:v[0-9]+]], a[[N]]{{$}}
200200
; GCN: global_atomic_add v{{[0-9]+}}, v{{[0-9:]+}}, [[V]], s[{{[0-9:]+}}] glc
@@ -217,7 +217,10 @@ bb:
217217

218218
; GCN-LABEL: {{^}}test_atomic_mfma_4xi32_atomic64_store:
219219
; GCN: global_atomic_sub_x2 v[{{[0-9:]+}}], v{{[0-9:]+}}, v[{{[0-9:]+}}], s[{{[0-9:]+}}] glc
220-
; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
220+
; GCN: v_accvgpr_write_b32 [[A_ZERO:a[0-9]+]], 0
221+
; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, [[A_ZERO]]
222+
; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
223+
; GCN: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
221224
; GCN: v_mfma_i32_4x4x4i8 a[[[N:[0-9]+]]:
222225
; GCN: v_accvgpr_read_b32 v{{[0-9]+}}, a{{[0-9]+}}
223226
; GCN: v_accvgpr_read_b32 v{{[0-9]+}}, a{{[0-9]+}}

0 commit comments

Comments
 (0)