Skip to content

Commit e31c3cc

Browse files
petar-avramovicmichaelselehov
authored andcommitted
AMDGPU: Fix temporal divergence introduced by machine-sink (llvm#67456)
Temporal divergence that was present in input or introduced in IR transforms, like code-sinking or LICM, is handled in SIFixSGPRCopies by changing sgpr source instr to vgpr instr. After 5b657f5, that moved LICM after AMDGPUCodeGenPrepare, machine-sinking can introduce temporal divergence by sinking instructions outside of the cycle. Add isSafeToSink callback in TargetInstrInfo. Change-Id: I753744d3807cc05dfca687ed359fdceb9afa2fdc
1 parent 4d82d54 commit e31c3cc

File tree

2 files changed

+8
-6
lines changed

2 files changed

+8
-6
lines changed

llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
167167
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s59
168168
; CHECK-NEXT: s_add_i32 s58, s58, 4
169169
; CHECK-NEXT: s_add_i32 s4, s55, s58
170+
; CHECK-NEXT: v_add_nc_u32_e32 v0, s58, v57
170171
; CHECK-NEXT: s_add_i32 s5, s4, 5
171172
; CHECK-NEXT: s_add_i32 s4, s4, 1
172173
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, s5, v42
@@ -267,7 +268,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
267268
; CHECK-NEXT: .LBB0_16: ; %Flow43
268269
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
269270
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s57
270-
; CHECK-NEXT: v_add_nc_u32_e32 v57, s58, v57
271+
; CHECK-NEXT: v_mov_b32_e32 v57, v0
271272
; CHECK-NEXT: .LBB0_17: ; %Flow44
272273
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
273274
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s56
@@ -869,6 +870,7 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
869870
; CHECK-NEXT: s_add_i32 s7, s7, 4
870871
; CHECK-NEXT: v_add_nc_u32_e32 v43, 1, v43
871872
; CHECK-NEXT: s_add_i32 s8, s4, s7
873+
; CHECK-NEXT: v_add_nc_u32_e32 v0, s7, v47
872874
; CHECK-NEXT: s_add_i32 s9, s8, 5
873875
; CHECK-NEXT: s_add_i32 s8, s8, 1
874876
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, s9, v41
@@ -879,7 +881,7 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
879881
; CHECK-NEXT: ; %bb.4: ; %Flow3
880882
; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1
881883
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s6
882-
; CHECK-NEXT: v_add_nc_u32_e32 v47, s7, v47
884+
; CHECK-NEXT: v_mov_b32_e32 v47, v0
883885
; CHECK-NEXT: .LBB1_5: ; %Flow4
884886
; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1
885887
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s5

llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ body: |
2222
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_1]], %bb.0, %6, %bb.1
2323
; CHECK-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.0, %8, %bb.1
2424
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PHI1]], [[S_MOV_B32_2]], implicit-def dead $scc
25+
; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_]], [[S_ADD_I32_]], 0, implicit $exec
2526
; CHECK-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[S_ADD_I32_]], 0, 0, implicit $mode, implicit $exec
2627
; CHECK-NEXT: [[V_CMP_GT_F32_e64_:%[0-9]+]]:sreg_32 = nofpexcept V_CMP_GT_F32_e64 0, killed [[V_CVT_F32_U32_e64_]], 0, [[COPY]], 0, implicit $mode, implicit $exec
2728
; CHECK-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_32 = SI_IF_BREAK killed [[V_CMP_GT_F32_e64_]], [[PHI]], implicit-def dead $scc
@@ -30,7 +31,6 @@ body: |
3031
; CHECK-NEXT: {{ $}}
3132
; CHECK-NEXT: bb.2:
3233
; CHECK-NEXT: SI_END_CF [[SI_IF_BREAK]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
33-
; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_]], [[S_ADD_I32_]], 0, implicit $exec
3434
; CHECK-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
3535
; CHECK-NEXT: SI_RETURN
3636
bb.0:
@@ -83,6 +83,9 @@ body: |
8383
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_1]], %bb.0, %6, %bb.1
8484
; CHECK-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.0, %8, %bb.1
8585
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PHI1]], [[S_MOV_B32_2]], implicit-def dead $scc
86+
; CHECK-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_ADD_I32_]], [[S_MOV_B32_2]], implicit-def dead $scc
87+
; CHECK-NEXT: [[S_ADD_I32_2:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_ADD_I32_1]], [[S_MOV_B32_2]], implicit-def dead $scc
88+
; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_2]], [[S_ADD_I32_2]], 0, implicit $exec
8689
; CHECK-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[S_ADD_I32_]], 0, 0, implicit $mode, implicit $exec
8790
; CHECK-NEXT: [[V_CMP_GT_F32_e64_:%[0-9]+]]:sreg_32 = nofpexcept V_CMP_GT_F32_e64 0, killed [[V_CVT_F32_U32_e64_]], 0, [[COPY]], 0, implicit $mode, implicit $exec
8891
; CHECK-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_32 = SI_IF_BREAK killed [[V_CMP_GT_F32_e64_]], [[PHI]], implicit-def dead $scc
@@ -91,9 +94,6 @@ body: |
9194
; CHECK-NEXT: {{ $}}
9295
; CHECK-NEXT: bb.2:
9396
; CHECK-NEXT: SI_END_CF [[SI_IF_BREAK]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
94-
; CHECK-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_ADD_I32_]], [[S_MOV_B32_2]], implicit-def dead $scc
95-
; CHECK-NEXT: [[S_ADD_I32_2:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_ADD_I32_1]], [[S_MOV_B32_2]], implicit-def dead $scc
96-
; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_2]], [[S_ADD_I32_2]], 0, implicit $exec
9797
; CHECK-NEXT: FLAT_STORE_DWORD [[COPY1]], [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
9898
; CHECK-NEXT: SI_RETURN
9999
bb.0:

0 commit comments

Comments
 (0)