Skip to content

Commit 9adc99b

Browse files
kmitropoulouKonstantina Mitropoulou
and
Konstantina Mitropoulou
authored
[AMDGPU] Always emit SI_KILL_I1_PSEUDO for uniform floating point branches. (#124028)
- **[NFC] Use GCNPat instead of Pat.** - **[AMDGPU] Always emit SI_KILL_I1_PSEUDO for uniform floating point branches.** --------- Co-authored-by: Konstantina Mitropoulou <[email protected]>
1 parent e0054e9 commit 9adc99b

File tree

3 files changed

+52
-7
lines changed

3 files changed

+52
-7
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2498,6 +2498,9 @@ def HasNotMADIntraFwdBug : Predicate<"!Subtarget->hasMADIntraFwdBug()">;
24982498
def HasSALUFloatInsts : Predicate<"Subtarget->hasSALUFloatInsts()">,
24992499
AssemblerPredicate<(all_of FeatureSALUFloatInsts)>;
25002500

2501+
def NotHasSALUFloatInsts : Predicate<"!Subtarget->hasSALUFloatInsts()">,
2502+
AssemblerPredicate<(all_of (not FeatureSALUFloatInsts))>;
2503+
25012504
def HasPseudoScalarTrans : Predicate<"Subtarget->hasPseudoScalarTrans()">,
25022505
AssemblerPredicate<(all_of FeaturePseudoScalarTrans)>;
25032506

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1053,39 +1053,40 @@ def : GCNPat<
10531053
(SI_ELSE $src, $target)
10541054
>;
10551055

1056-
def : Pat <
1056+
def : GCNPat <
10571057
(int_amdgcn_kill i1:$src),
10581058
(SI_KILL_I1_PSEUDO SCSrc_i1:$src, 0)
10591059
>;
10601060

1061-
def : Pat <
1061+
def : GCNPat <
10621062
(int_amdgcn_kill (i1 (not i1:$src))),
10631063
(SI_KILL_I1_PSEUDO SCSrc_i1:$src, -1)
10641064
>;
10651065

1066-
def : Pat <
1066+
let SubtargetPredicate = NotHasSALUFloatInsts in
1067+
def : GCNPat <
10671068
(int_amdgcn_kill (i1 (setcc f32:$src, InlineImmFP32:$imm, cond:$cond))),
10681069
(SI_KILL_F32_COND_IMM_PSEUDO VSrc_b32:$src, (bitcast_fpimm_to_i32 $imm), (cond_as_i32imm $cond))
10691070
>;
10701071

1071-
def : Pat <
1072+
def : GCNPat <
10721073
(int_amdgcn_wqm_demote i1:$src),
10731074
(SI_DEMOTE_I1 SCSrc_i1:$src, 0)
10741075
>;
10751076

1076-
def : Pat <
1077+
def : GCNPat <
10771078
(int_amdgcn_wqm_demote (i1 (not i1:$src))),
10781079
(SI_DEMOTE_I1 SCSrc_i1:$src, -1)
10791080
>;
10801081

10811082
// TODO: we could add more variants for other types of conditionals
10821083

1083-
def : Pat <
1084+
def : GCNPat <
10841085
(i64 (int_amdgcn_icmp i1:$src, (i1 0), (i32 33))),
10851086
(COPY $src) // Return the SGPRs representing i1 src
10861087
>;
10871088

1088-
def : Pat <
1089+
def : GCNPat <
10891090
(i32 (int_amdgcn_icmp i1:$src, (i1 0), (i32 33))),
10901091
(COPY $src) // Return the SGPRs representing i1 src
10911092
>;
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -stop-after=amdgpu-isel < %s 2>&1 | FileCheck %s
3+
4+
define amdgpu_ps void @_amdgpu_ps_main() {
5+
; CHECK-LABEL: name: _amdgpu_ps_main
6+
; CHECK: bb.0.entry:
7+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
8+
; CHECK-NEXT: {{ $}}
9+
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
10+
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3
11+
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM killed [[REG_SEQUENCE]], 0, 0 :: (dereferenceable invariant load (s32))
12+
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
13+
; CHECK-NEXT: nofpexcept S_CMP_NLT_F32 [[S_BUFFER_LOAD_DWORD_IMM]], [[S_MOV_B32_1]], implicit-def $scc, implicit $mode
14+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc
15+
; CHECK-NEXT: SI_KILL_I1_PSEUDO killed [[COPY]], 0, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
16+
; CHECK-NEXT: nofpexcept S_CMP_LT_F32 [[S_BUFFER_LOAD_DWORD_IMM]], [[S_MOV_B32_1]], implicit-def $scc, implicit $mode
17+
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
18+
; CHECK-NEXT: S_BRANCH %bb.1
19+
; CHECK-NEXT: {{ $}}
20+
; CHECK-NEXT: bb.1.bb1:
21+
; CHECK-NEXT: successors: %bb.2(0x80000000)
22+
; CHECK-NEXT: {{ $}}
23+
; CHECK-NEXT: bb.2.bb2:
24+
; CHECK-NEXT: S_ENDPGM 0
25+
entry:
26+
%i = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> zeroinitializer, i32 0, i32 0)
27+
%i1 = bitcast i32 %i to float
28+
%i2 = fcmp uge float %i1, 0.000000e+00
29+
call void @llvm.amdgcn.kill(i1 %i2)
30+
br i1 %i2, label %bb1, label %bb2
31+
32+
bb1: ; preds = %entry
33+
%i3 = call i64 @llvm.amdgcn.s.getpc()
34+
%i4 = and i64 %i3, 1
35+
%i5 = inttoptr i64 %i4 to ptr addrspace(4)
36+
%i6 = getelementptr i8, ptr addrspace(4) %i5, i64 32
37+
br label %bb2
38+
39+
bb2: ; preds = %bb, %entry
40+
ret void
41+
}

0 commit comments

Comments
 (0)