Skip to content

Commit 26c8789

Browse files
piotrAMDGerrit Code Review
authored and
Gerrit Code Review
committed
Merge "[AMDGPU] Reorder target-specific passes" into amd-gfx
2 parents da5f3ad + 779cb6d commit 26c8789

File tree

3 files changed

+35
-5
lines changed

3 files changed

+35
-5
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -857,6 +857,8 @@ bool GCNPassConfig::addPreISel() {
857857
// FIXME: We need to run a pass to propagate the attributes when calls are
858858
// supported.
859859

860+
addPass(createSinkingPass());
861+
860862
if (EnableConditionalDiscardTransformations)
861863
addPass(createAMDGPUConditionalDiscardPass());
862864

@@ -866,7 +868,6 @@ bool GCNPassConfig::addPreISel() {
866868
if (!LateCFGStructurize) {
867869
addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
868870
}
869-
addPass(createSinkingPass());
870871

871872
// This is a temporary fix for the issue of dealing with in loop uniform values
872873
// where the uses out of the loop are non-uniform. LCSSA creates a PHI at the

llvm/test/CodeGen/AMDGPU/discard-optimization.ll

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,35 @@ define amdgpu_ps <4 x float> @wqm_kill_to_demote2(<8 x i32> inreg %rsrc, <4 x i3
189189
ret <4 x float> %rtex
190190
}
191191

192+
193+
; GCN-LABEL: {{^}}sinking_image_sample:
194+
; GCN-NEXT: ; %.entry
195+
; GCN-NOT: image_sample
196+
; GCN: s_cbranch_exec
197+
; GCN: image_sample
198+
define amdgpu_ps void @sinking_image_sample(float %arg0, <8 x i32> inreg %arg1, <4 x i32> inreg %arg2, float %arg3) {
199+
.entry:
200+
%tmp0 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %arg0, <8 x i32> %arg1, <4 x i32> %arg2, i1 false, i32 0, i32 0)
201+
%tmp1 = fcmp olt float %arg3, 0.000000e+00
202+
br i1 %tmp1, label %kill_br, label %next
203+
204+
kill_br:
205+
call void @llvm.amdgcn.kill(i1 false)
206+
br label %exit
207+
208+
next:
209+
%tmp2 = extractelement <4 x float> %tmp0, i32 2
210+
%tmp3 = extractelement <4 x float> %tmp0, i32 3
211+
%tmp4 = fadd reassoc nnan nsz arcp contract float %tmp2, %tmp3
212+
br label %exit
213+
214+
exit: ; preds = %bb102
215+
%outp = phi float [ %tmp4, %next ], [ undef, %kill_br]
216+
call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 15, float %outp, float %outp, float %outp, float %outp, i1 immarg true, i1 immarg true)
217+
ret void
218+
}
219+
220+
192221
attributes #0 = { nounwind }
193222
attributes #1 = { nounwind readnone }
194223

llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@
66
; with exec.
77

88
; GCN-LABEL: {{^}}needs_and:
9-
; GCN: s_xor_b64 [[REG1:[^ ,]*]], {{[^ ,]*, -1$}}
10-
; GCN: s_and_b64 [[REG2:[^ ,]*]], exec, [[REG1]]
11-
; GCN: s_or_b64 [[REG3:[^ ,]*]], [[REG2]],
12-
; GCN: s_andn2_b64 exec, exec, [[REG3]]
139

10+
; GCN: s_or_b64 exec, exec, [[REG1:[^ ,]*]]
11+
; GCN: s_andn2_b64 exec, exec, [[REG2:[^ ,]*]]
12+
; GCN: s_or_b64 [[REG2:[^ ,]*]], [[REG1:[^ ,]*]], [[REG2:[^ ,]*]]
13+
; GCN: s_or_b64 exec, exec, [[REG2:[^ ,]*]]
1414
define void @needs_and(i32 %arg) {
1515
entry:
1616
br label %loop

0 commit comments

Comments
 (0)