-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[AMDGPU] Do not remat instructions with PhysReg uses #124366
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Change-Id: Ibb1f27bf559d641e26f5a4ae80d3561516c7a5e0
@llvm/pr-subscribers-backend-amdgpu Author: Jeffrey Byrnes (jrbyrnes) ChangesThis blocks rematerialization during scheduling if the instruction has a non accepted PhysReg use. Currently, there aren't any checks like this in place, and we may create invalid code: https://godbolt.org/z/xjPjdcorf Patch is 39.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/124366.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index b00105ae9bd528..6e693066de10b6 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1842,15 +1842,23 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
return true;
}
-// Copied from MachineLICM
bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
if (!DAG.TII->isTriviallyReMaterializable(MI))
return false;
- for (const MachineOperand &MO : MI.all_uses())
+ for (const MachineOperand &MO : MI.all_uses()) {
if (MO.getReg().isVirtual())
return false;
+ // We can't remat physreg uses, unless it is a constant or an ignorable
+ // use (e.g. implicit exec use on VALU instructions)
+ if (MO.getReg().isPhysical()) {
+ if (DAG.MRI.isConstantPhysReg(MO.getReg()) || DAG.TII->isIgnorableUse(MO))
+ continue;
+ return false;
+ }
+ }
+
return true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
index 9f264de531950b..7662abc0aaf858 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
@@ -84,13 +84,11 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
S_NOP 0
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -191,14 +189,12 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %24
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %23
S_NOP 0, implicit %0, implicit %1
@@ -300,7 +296,6 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
@@ -308,7 +303,6 @@ body: |
S_NOP 0, implicit %23
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -408,7 +402,6 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
@@ -416,7 +409,6 @@ body: |
S_NOP 0, implicit %22, implicit %23
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -529,7 +521,6 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
@@ -537,14 +528,12 @@ body: |
S_NOP 0, implicit %23
bb.2:
- ; predcessors: %bb.1
successors: %bb.3
%25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
S_NOP 0
bb.3:
- ; predecessors: %bb.2
successors: %bb.4
%26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
@@ -552,7 +541,6 @@ body: |
S_NOP 0, implicit %25
bb.4:
- ; predcessors: %bb.3
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -666,7 +654,6 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
@@ -674,7 +661,6 @@ body: |
S_NOP 0, implicit %23, implicit %22
bb.2:
- ; predcessors: %bb.1
successors: %bb.3
%25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
@@ -682,7 +668,6 @@ body: |
S_NOP 0
bb.3:
- ; predecessors: %bb.2
successors: %bb.4
%27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
@@ -690,7 +675,6 @@ body: |
S_NOP 0, implicit %25, implicit %26
bb.4:
- ; predcessors: %bb.3
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -949,14 +933,12 @@ body: |
undef %23.sub0:vreg_64 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%23.sub1:vreg_64 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %23
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -1053,7 +1035,6 @@ body: |
undef %21.sub0:vreg_128 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%21.sub1:vreg_128 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
@@ -1062,7 +1043,6 @@ body: |
S_NOP 0, implicit %21
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -1581,7 +1561,6 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
@@ -1589,7 +1568,6 @@ body: |
S_NOP 0, implicit %24, implicit %25
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %23
S_NOP 0, implicit %0, implicit %1
@@ -2528,14 +2506,12 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %24
bb.2:
- ; predcessors: %bb.1
successors: %bb.3
S_NOP 0, implicit %23
@@ -2543,7 +2519,6 @@ body: |
S_NOP 0
bb.3:
- ; predecessors: %bb.2
successors: %bb.4
%26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
@@ -2551,7 +2526,6 @@ body: |
S_NOP 0, implicit %26, implicit %27
bb.4:
- ; predcessors: %bb.3
S_NOP 0, implicit %25
S_NOP 0, implicit %0, implicit %1
@@ -2650,7 +2624,6 @@ body: |
%21.sub1:vreg_128 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%21.sub2:vreg_128 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
@@ -2658,7 +2631,6 @@ body: |
S_NOP 0, implicit %21
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -2759,7 +2731,6 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
@@ -2767,7 +2738,6 @@ body: |
S_NOP 0, implicit %23
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %23
S_NOP 0, implicit %0, implicit %1
@@ -5030,7 +5000,6 @@ body: |
%21.sub1:vreg_128 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%21.sub2:vreg_128 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
@@ -5038,7 +5007,6 @@ body: |
S_NOP 0, implicit %21
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -5137,14 +5105,12 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %23, implicit %24
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -5242,7 +5208,6 @@ body: |
%22:vreg_64 = nofpexcept V_CVT_F64_I32_e32 22, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
@@ -5250,7 +5215,6 @@ body: |
S_NOP 0, implicit %22
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -5348,7 +5312,6 @@ body: |
%22:vreg_64 = nofpexcept V_CVT_F64_I32_e32 22, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
@@ -5357,7 +5320,6 @@ body: |
S_NOP 0, implicit %22
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -5456,7 +5418,6 @@ body: |
%22:vreg_64 = nofpexcept V_CVT_F64_I32_e32 22, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
@@ -5466,7 +5427,6 @@ body: |
S_NOP 0, implicit %22
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -5562,14 +5522,12 @@ body: |
%22:vreg_64 = nofpexcept V_CVT_F64_I32_e32 22, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %22, implicit %23
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -5669,14 +5627,12 @@ body: |
undef %23.sub1:vreg_64_align2 = V_MOV_B32_e32 23, implicit $exec
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %24
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %23.sub1
S_NOP 0, implicit %0, implicit %1
@@ -5779,14 +5735,12 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %24
bb.2:
- ; predcessors: %bb.1
DBG_VALUE %23, 0, 0
S_NOP 0, implicit %23
@@ -5889,14 +5843,12 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %24
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %23
S_NOP 0, implicit %0, implicit %1
@@ -5914,3 +5866,411 @@ body: |
S_ENDPGM 0
...
+---
+name: test_occ_8_physreg_use
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GFX908-LABEL: name: test_occ_8_physreg_use
+ ; GFX908: bb.0:
+ ; GFX908-NEXT: successors: %bb.1(0x80000000)
+ ; GFX908-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: $vgpr8 = IMPLICIT_DEF
+ ; GFX908-NEXT: $vgpr9 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
+ ; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr8, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr9, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-NEXT: S_BRANCH %bb.1
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF29]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_31]], implicit [[DEF30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_11]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_12]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_13]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_6]], implicit [[V_CVT_I32_F32_e32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_7]], implicit [[V_CVT_I32_F32_e32_15]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_8]], implicit [[V_CVT_I32_F32_e32_16]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_9]], implicit [[V_CVT_I32_F32_e32_17]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_10]], implicit [[V_CVT_I32_F32_e32_18]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
+ ; GFX908-NEXT:...
[truncated]
|
if (MO.getReg().isVirtual()) | ||
return false; | ||
|
||
// We can't remat physreg uses, unless it is a constant or an ignorable | ||
// use (e.g. implicit exec use on VALU instructions) | ||
if (MO.getReg().isPhysical()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You already know it's not virtual because of the "if" above.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed by #124327
This blocks rematerialization during scheduling if the instruction has a non accepted PhysReg use.
Currently, there aren't any checks like this in place, and we may create invalid code: https://godbolt.org/z/xjPjdcorf