Skip to content

Commit 4d82d54

Browse files
petar-avramovicmichaelselehov
authored andcommitted
Cherry-picking fix for 413775
AMDGPU: Add test for temporal divergence introduced by machine-sink Introduced by 5b657f5 that moved LICM after AMDGPUCodeGenPrepare. Some instructions are no longer sunk during ir optimizations but in machine-sinking instead. If vgpr instruction used sgpr defined inside the cycle is sunk outside of the cycle we end up with not-handled case of temporal divergence. Add test for theoretical case when SALU instruction (represents uniform value) is sunk outside of the cycle. Add a test when SALU instruction can be sunk if it edits lane mask. Change-Id: I18623ff7f99c06cb3f8982861c38ce0fd017f4bc
1 parent 288a298 commit 4d82d54

File tree

3 files changed

+248
-107
lines changed

3 files changed

+248
-107
lines changed
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -run-pass=machine-sink -o - %s | FileCheck %s
3+
4+
---
5+
name: multi_else_break
6+
tracksRegLiveness: true
7+
body: |
8+
; CHECK-LABEL: name: multi_else_break
9+
; CHECK: bb.0:
10+
; CHECK-NEXT: successors: %bb.1(0x80000000)
11+
; CHECK-NEXT: liveins: $vgpr4, $vgpr5
12+
; CHECK-NEXT: {{ $}}
13+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5
14+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4
15+
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
16+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], implicit $exec
17+
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
18+
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
19+
; CHECK-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
20+
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
21+
; CHECK-NEXT: {{ $}}
22+
; CHECK-NEXT: bb.1:
23+
; CHECK-NEXT: successors: %bb.2(0x80000000)
24+
; CHECK-NEXT: {{ $}}
25+
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.0, %9, %bb.6
26+
; CHECK-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY2]], %bb.0, %11, %bb.6
27+
; CHECK-NEXT: {{ $}}
28+
; CHECK-NEXT: bb.2:
29+
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000)
30+
; CHECK-NEXT: {{ $}}
31+
; CHECK-NEXT: [[PHI2:%[0-9]+]]:sreg_32 = PHI [[DEF1]], %bb.1, %13, %bb.5
32+
; CHECK-NEXT: [[PHI3:%[0-9]+]]:sreg_32 = PHI [[DEF]], %bb.1, %15, %bb.5
33+
; CHECK-NEXT: [[PHI4:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.1, %17, %bb.5
34+
; CHECK-NEXT: [[PHI5:%[0-9]+]]:vgpr_32 = PHI [[PHI1]], %bb.1, %19, %bb.5
35+
; CHECK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[PHI5]], [[COPY1]], implicit $exec
36+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF2]]
37+
; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI3]], $exec_lo, implicit-def $scc
38+
; CHECK-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI2]], $exec_lo, implicit-def $scc
39+
; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_LT_I32_e64_]], %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
40+
; CHECK-NEXT: S_BRANCH %bb.4
41+
; CHECK-NEXT: {{ $}}
42+
; CHECK-NEXT: bb.3:
43+
; CHECK-NEXT: SI_END_CF %9, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
44+
; CHECK-NEXT: S_ENDPGM 0
45+
; CHECK-NEXT: {{ $}}
46+
; CHECK-NEXT: bb.4:
47+
; CHECK-NEXT: successors: %bb.5(0x80000000)
48+
; CHECK-NEXT: {{ $}}
49+
; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PHI5]], [[S_MOV_B32_1]], 0, implicit $exec
50+
; CHECK-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[COPY]], [[V_ADD_U32_e64_]], implicit $exec
51+
; CHECK-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32 = S_ANDN2_B32 [[S_OR_B32_]], $exec_lo, implicit-def $scc
52+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_ANDN2_B32_]]
53+
; CHECK-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32 = S_ANDN2_B32 [[S_OR_B32_1]], $exec_lo, implicit-def $scc
54+
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[V_CMP_NE_U32_e64_]], $exec_lo, implicit-def $scc
55+
; CHECK-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_ANDN2_B32_1]], [[S_AND_B32_]], implicit-def $scc
56+
; CHECK-NEXT: {{ $}}
57+
; CHECK-NEXT: bb.5:
58+
; CHECK-NEXT: successors: %bb.6(0x04000000), %bb.2(0x7c000000)
59+
; CHECK-NEXT: {{ $}}
60+
; CHECK-NEXT: [[PHI6:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_1]], %bb.2, [[S_OR_B32_2]], %bb.4
61+
; CHECK-NEXT: [[PHI7:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]], %bb.2, [[COPY4]], %bb.4
62+
; CHECK-NEXT: [[PHI8:%[0-9]+]]:vgpr_32 = PHI [[COPY3]], %bb.2, [[V_ADD_U32_e64_]], %bb.4
63+
; CHECK-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
64+
; CHECK-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_32 = SI_IF_BREAK [[PHI6]], [[PHI4]], implicit-def dead $scc
65+
; CHECK-NEXT: SI_LOOP [[SI_IF_BREAK]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
66+
; CHECK-NEXT: S_BRANCH %bb.6
67+
; CHECK-NEXT: {{ $}}
68+
; CHECK-NEXT: bb.6:
69+
; CHECK-NEXT: successors: %bb.3(0x04000000), %bb.1(0x7c000000)
70+
; CHECK-NEXT: {{ $}}
71+
; CHECK-NEXT: [[PHI9:%[0-9]+]]:vgpr_32 = PHI [[PHI8]], %bb.5
72+
; CHECK-NEXT: SI_END_CF [[SI_IF_BREAK]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
73+
; CHECK-NEXT: [[SI_IF_BREAK1:%[0-9]+]]:sreg_32 = SI_IF_BREAK [[PHI7]], [[PHI]], implicit-def dead $scc
74+
; CHECK-NEXT: SI_LOOP [[SI_IF_BREAK1]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
75+
; CHECK-NEXT: S_BRANCH %bb.3
76+
bb.0:
77+
successors: %bb.1(0x80000000)
78+
liveins: $vgpr4, $vgpr5
79+
80+
%21:vgpr_32 = COPY $vgpr5
81+
%20:vgpr_32 = COPY $vgpr4
82+
%23:sreg_32 = S_MOV_B32 0
83+
%33:vgpr_32 = COPY %23, implicit $exec
84+
%38:sreg_32 = IMPLICIT_DEF
85+
%44:sreg_32 = IMPLICIT_DEF
86+
%26:sreg_32 = IMPLICIT_DEF
87+
%29:sreg_32 = S_MOV_B32 1
88+
89+
bb.1:
90+
successors: %bb.2(0x80000000)
91+
92+
%0:sreg_32 = PHI %23, %bb.0, %12, %bb.6
93+
%1:vgpr_32 = PHI %33, %bb.0, %13, %bb.6
94+
95+
bb.2:
96+
successors: %bb.4(0x40000000), %bb.5(0x40000000)
97+
98+
%48:sreg_32 = PHI %44, %bb.1, %10, %bb.5
99+
%42:sreg_32 = PHI %38, %bb.1, %8, %bb.5
100+
%2:sreg_32 = PHI %23, %bb.1, %11, %bb.5
101+
%3:vgpr_32 = PHI %1, %bb.1, %9, %bb.5
102+
%27:sreg_32 = V_CMP_LT_I32_e64 %3, %20, implicit $exec
103+
%36:vgpr_32 = COPY %26
104+
%39:sreg_32 = S_OR_B32 %42, $exec_lo, implicit-def $scc
105+
%45:sreg_32 = S_OR_B32 %48, $exec_lo, implicit-def $scc
106+
%4:sreg_32 = SI_IF killed %27, %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
107+
S_BRANCH %bb.4
108+
109+
bb.3:
110+
SI_END_CF %12, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
111+
S_ENDPGM 0
112+
113+
bb.4:
114+
successors: %bb.5(0x80000000)
115+
116+
%6:vgpr_32 = V_ADD_U32_e64 %3, %29, 0, implicit $exec
117+
%30:sreg_32 = V_CMP_NE_U32_e64 %21, %6, implicit $exec
118+
%43:sreg_32 = S_ANDN2_B32 %39, $exec_lo, implicit-def $scc
119+
%40:sreg_32 = COPY %43
120+
%49:sreg_32 = S_ANDN2_B32 %45, $exec_lo, implicit-def $scc
121+
%50:sreg_32 = S_AND_B32 %30, $exec_lo, implicit-def $scc
122+
%46:sreg_32 = S_OR_B32 %49, %50, implicit-def $scc
123+
124+
bb.5:
125+
successors: %bb.6(0x04000000), %bb.2(0x7c000000)
126+
127+
%10:sreg_32 = PHI %45, %bb.2, %46, %bb.4
128+
%8:sreg_32 = PHI %39, %bb.2, %40, %bb.4
129+
%9:vgpr_32 = PHI %36, %bb.2, %6, %bb.4
130+
SI_END_CF %4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
131+
%11:sreg_32 = SI_IF_BREAK %10, %2, implicit-def dead $scc
132+
%12:sreg_32 = SI_IF_BREAK %8, %0, implicit-def dead $scc
133+
SI_LOOP %11, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
134+
S_BRANCH %bb.6
135+
136+
bb.6:
137+
successors: %bb.3(0x04000000), %bb.1(0x7c000000)
138+
139+
%13:vgpr_32 = PHI %9, %bb.5
140+
SI_END_CF %11, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
141+
SI_LOOP %12, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
142+
S_BRANCH %bb.3
143+
...

0 commit comments

Comments
 (0)