|
1 |
| -; RUN: llc -amdgpu-conditional-discard-transformations=1 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s |
| 1 | +; RUN: llc -amdgpu-conditional-discard-transformations=1 --march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,KILL %s |
| 2 | +; RUN: llc -amdgpu-conditional-discard-transformations=1 -amdgpu-transform-discard-to-demote --march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,DEMOTE %s |
2 | 3 |
|
3 | 4 | ; Check that the branch is removed by the discard opt.
|
4 | 5 |
|
5 | 6 | ; GCN-LABEL: {{^}}if_with_kill_true_cond:
|
6 | 7 | ; GCN: v_cmp_ne_u32_e32 vcc,
|
7 | 8 | ; GCN-NEXT: s_and_b64 exec, exec, vcc
|
8 |
| -; GCN-NOT: branch |
9 | 9 | define amdgpu_ps void @if_with_kill_true_cond(i32 %arg) {
|
10 | 10 | .entry:
|
11 | 11 | %cmp = icmp eq i32 %arg, 32
|
|
24 | 24 | ; GCN-LABEL: {{^}}if_with_kill_false_cond:
|
25 | 25 | ; GCN: v_cmp_eq_u32_e32 vcc,
|
26 | 26 | ; GCN-NEXT: s_and_b64 exec, exec, vcc
|
27 |
| -; GCN-NOT: branch |
28 | 27 | define amdgpu_ps void @if_with_kill_false_cond(i32 %arg) {
|
29 | 28 | .entry:
|
30 | 29 | %cmp = icmp eq i32 %arg, 32
|
@@ -127,8 +126,73 @@ endif:
|
127 | 126 | ret void
|
128 | 127 | }
|
129 | 128 |
|
| 129 | + |
| 130 | +; GCN-LABEL: {{^}}wqm_kill_to_demote1: |
| 131 | +; GCN-NEXT: ; %.entry |
| 132 | +; GCN: s_mov_b64 [[ORIG:s\[[0-9]+:[0-9]+\]]], exec |
| 133 | +; GCN: s_wqm_b64 exec, exec |
| 134 | +; DEMOTE: s_and_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], [[ORIG]], vcc |
| 135 | +; GCN: image_sample |
| 136 | +; GCN: v_add_f32_e32 |
| 137 | +; DEMOTE: s_and_b64 exec, exec, [[LIVE]] |
| 138 | +; KILL: s_and_b64 exec, exec, [[ORIG]] |
| 139 | +; GCN: image_sample |
| 140 | +define amdgpu_ps <4 x float> @wqm_kill_to_demote1(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %idx, float %data, float %coord, float %coord2, float %z) { |
| 141 | +.entry: |
| 142 | + %z.cmp = fcmp olt float %z, 0.0 |
| 143 | + br i1 %z.cmp, label %.continue, label %.kill |
| 144 | + |
| 145 | +.kill: |
| 146 | + call void @llvm.amdgcn.kill(i1 false) |
| 147 | + br label %.export |
| 148 | + |
| 149 | +.continue: |
| 150 | + %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0 |
| 151 | + %tex0 = extractelement <4 x float> %tex, i32 0 |
| 152 | + %tex1 = extractelement <4 x float> %tex, i32 0 |
| 153 | + %coord1 = fadd float %tex0, %tex1 |
| 154 | + %rtex.src = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord1, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0 |
| 155 | + br label %.export |
| 156 | + |
| 157 | +.export: |
| 158 | + %rtex = phi <4 x float> [ undef, %.kill ], [ %rtex.src, %.continue ] |
| 159 | + ret <4 x float> %rtex |
| 160 | +} |
| 161 | + |
| 162 | + |
| 163 | +; GCN-LABEL: {{^}}wqm_kill_to_demote2: |
| 164 | +; GCN-NEXT: ; %.entry |
| 165 | +; GCN: s_mov_b64 [[ORIG:s\[[0-9]+:[0-9]+\]]], exec |
| 166 | +; GCN: s_wqm_b64 exec, exec |
| 167 | +; GCN: image_sample |
| 168 | +; DEMOTE: s_and_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], [[ORIG]], vcc |
| 169 | +; GCN: v_add_f32_e32 |
| 170 | +; DEMOTE: s_and_b64 exec, exec, [[LIVE]] |
| 171 | +; KILL: s_and_b64 exec, exec, [[ORIG]] |
| 172 | +; GCN: image_sample |
| 173 | +define amdgpu_ps <4 x float> @wqm_kill_to_demote2(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %idx, float %data, float %coord, float %coord2, float %z) { |
| 174 | +.entry: |
| 175 | + %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0 |
| 176 | + %tex0 = extractelement <4 x float> %tex, i32 0 |
| 177 | + %tex1 = extractelement <4 x float> %tex, i32 0 |
| 178 | + %z.cmp = fcmp olt float %tex0, 0.0 |
| 179 | + br i1 %z.cmp, label %.continue, label %.kill |
| 180 | + |
| 181 | +.kill: |
| 182 | + call void @llvm.amdgcn.kill(i1 false) |
| 183 | + br label %.continue |
| 184 | + |
| 185 | +.continue: |
| 186 | + %coord1 = fadd float %tex0, %tex1 |
| 187 | + %rtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord1, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0 |
| 188 | + |
| 189 | + ret <4 x float> %rtex |
| 190 | +} |
| 191 | + |
130 | 192 | attributes #0 = { nounwind }
|
| 193 | +attributes #1 = { nounwind readnone } |
131 | 194 |
|
132 | 195 | declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #0
|
133 | 196 | declare void @llvm.amdgcn.kill(i1) #0
|
| 197 | +declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
134 | 198 |
|
0 commit comments