Skip to content

Commit 317c41b

Browse files
MachineUniformityAnalysis: Improve isConstantOrUndefValuePhi
Change existing code to match what LLVM-IR version is doing via PHINode::hasConstantOrUndefValue. Most notably this improves number of values that can be allocated to sgpr in AMDGPU's RBSelect. Common case here are phis that appear in structurize-cfg lowering for cycles with multiple exits: Undef incoming value is coming from block that reached cycle exit condition, if other incoming is uniform keep the phi uniform despite the fact it is joining values from pair of blocks that are entered via divergent condition branch.
1 parent f354d30 commit 317c41b

File tree

9 files changed

+188
-176
lines changed

9 files changed

+188
-176
lines changed

llvm/lib/CodeGen/MachineSSAContext.cpp

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,28 @@ const MachineBasicBlock *MachineSSAContext::getDefBlock(Register value) const {
5454
return F->getRegInfo().getVRegDef(value)->getParent();
5555
}
5656

57+
static bool isUndef(const MachineInstr &MI) {
58+
return MI.getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
59+
MI.getOpcode() == TargetOpcode::IMPLICIT_DEF;
60+
}
61+
62+
/// MachineInstr equivalent of PHINode::hasConstantOrUndefValue()
5763
template <>
58-
bool MachineSSAContext::isConstantOrUndefValuePhi(const MachineInstr &Phi) {
59-
return Phi.isConstantValuePHI();
64+
bool MachineSSAContext::isConstantOrUndefValuePhi(const MachineInstr &MI) {
65+
if (!MI.isPHI())
66+
return false;
67+
const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
68+
Register This = MI.getOperand(0).getReg();
69+
Register ConstantValue;
70+
for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2) {
71+
Register Incoming = MI.getOperand(i).getReg();
72+
if (Incoming != This && !isUndef(*MRI.getVRegDef(Incoming))) {
73+
if (ConstantValue && ConstantValue != Incoming)
74+
return false;
75+
ConstantValue = Incoming;
76+
}
77+
}
78+
return true;
6079
}
6180

6281
template <>

llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,24 @@
11
# RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s
22
# CHECK-LABEL: MachineUniformityInfo for function: hidden_diverge
33
# CHECK-LABEL: BLOCK bb.0
4-
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x)
5-
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_ICMP intpred(slt)
6-
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_XOR %{{[0-9]*}}:_, %{{[0-9]*}}:_
7-
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
8-
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
9-
# CHECK: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.1
10-
# CHECK: DIVERGENT: G_BR %bb.2
4+
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x)
5+
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_ICMP intpred(slt)
6+
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_XOR %{{[0-9]*}}:_, %{{[0-9]*}}:_
7+
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
8+
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
9+
# CHECK: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.1
10+
# CHECK: DIVERGENT: G_BR %bb.2
1111
# CHECK-LABEL: BLOCK bb.1
1212
# CHECK-LABEL: BLOCK bb.2
13-
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_PHI %{{[0-9]*}}:_(s32), %bb.1, %{{[0-9]*}}:_(s32), %bb.0
14-
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_PHI %{{[0-9]*}}:_(s1), %bb.1, %{{[0-9]*}}:_(s1), %bb.0
15-
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
16-
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
17-
# CHECK: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.3
18-
# CHECK: DIVERGENT: G_BR %bb.4
13+
# CHECK-NOT: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_PHI %{{[0-9]*}}:_(s32), %bb.1, %{{[0-9]*}}:_(s32), %bb.0
14+
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_PHI %{{[0-9]*}}:_(s1), %bb.1, %{{[0-9]*}}:_(s1), %bb.0
15+
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
16+
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
17+
# CHECK: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.3
18+
# CHECK: DIVERGENT: G_BR %bb.4
1919
# CHECK-LABEL: BLOCK bb.3
2020
# CHECK-LABEL: BLOCK bb.4
21-
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_PHI %{{[0-9]*}}:_(s32), %bb.2, %{{[0-9]*}}:_(s32), %bb.3
21+
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_PHI %{{[0-9]*}}:_(s32), %bb.2, %{{[0-9]*}}:_(s32), %bb.3
2222

2323
---
2424
name: hidden_diverge

llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-loop-diverge.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@
2222
# CHECK-NOT: DIVERGENT: G_BR %bb.5
2323

2424
# CHECK-LABEL: BLOCK bb.4
25-
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_PHI %{{[0-9]*}}:_(s32), %bb.3, %{{[0-9]*}}:_(s32), %bb.2
25+
# CHECK-NOT: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_PHI %{{[0-9]*}}:_(s32), %bb.3, %{{[0-9]*}}:_(s32), %bb.2
2626

2727
# CHECK-LABEL: BLOCK bb.5
28-
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_PHI %{{[0-9]*}}:_(s32), %bb.3, %{{[0-9]*}}:_(s32), %bb.4
28+
# CHECK-NOT: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_PHI %{{[0-9]*}}:_(s32), %bb.3, %{{[0-9]*}}:_(s32), %bb.4
2929

3030
---
3131
name: hidden_loop_diverge

llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/uses-value-from-cycle.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,10 @@ body: |
4040
4141
bb.5:
4242
successors: %bb.6(0x04000000), %bb.2(0x7c000000)
43-
; CHECK: DIVERGENT: %{{[0-9]}}: %{{[0-9]}}:_(s32) = G_PHI
44-
; CHECK: DIVERGENT: %{{[0-9]}}: %{{[0-9]}}:_(s32) = G_PHI
45-
; CHECK: DIVERGENT: %{{[0-9]}}: %{{[0-9]}}:_(s32) = G_PHI
46-
; CHECK-NOT: DIVERGENT: %{{[0-9]}}: %{{[0-9]}}:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break)
43+
; CHECK-NOT: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_PHI
44+
; CHECK-NOT: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_PHI
45+
; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_PHI
46+
; CHECK-NOT: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break)
4747
%19:_(s32) = G_PHI %18(s32), %bb.7, %25(s32), %bb.4
4848
%20:_(s32) = G_PHI %6(s32), %bb.7, %25(s32), %bb.4
4949
%21:_(s1) = G_PHI %34(s1), %bb.7, %33(s1), %bb.4

llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir

Lines changed: 33 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -971,12 +971,10 @@ body: |
971971
; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
972972
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
973973
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF
974-
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[DEF]](s1)
975974
; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY4]](s32), [[COPY1]]
976975
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
977976
; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
978-
; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
979-
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1)
977+
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1)
980978
; GFX10-NEXT: G_BR %bb.7
981979
; GFX10-NEXT: {{ $}}
982980
; GFX10-NEXT: bb.1:
@@ -989,19 +987,18 @@ body: |
989987
; GFX10-NEXT: bb.2:
990988
; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000)
991989
; GFX10-NEXT: {{ $}}
992-
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI %67(s1), %bb.6, %71(s1), %bb.7
993-
; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI %49(s1), %bb.6, %48(s1), %bb.7
994-
; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI %35(s1), %bb.6, %34(s1), %bb.7
990+
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI %53(s1), %bb.6, %57(s1), %bb.7
991+
; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI %35(s1), %bb.6, %34(s1), %bb.7
992+
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI %12(s1), %bb.6, [[DEF]](s1), %bb.7
993+
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI2]](s1)
995994
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1)
996995
; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
997-
; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1)
998-
; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY10]](s1)
999996
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(s32)
1000997
; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY9]](s1), %17(s32)
1001998
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc
1002-
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
999+
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc
10031000
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
1004-
; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_]](s1)
1001+
; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_]](s1)
10051002
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec
10061003
; GFX10-NEXT: G_BR %bb.4
10071004
; GFX10-NEXT: {{ $}}
@@ -1018,28 +1015,27 @@ body: |
10181015
; GFX10-NEXT: {{ $}}
10191016
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
10201017
; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY5]](s32), [[COPY]]
1021-
; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1)
1018+
; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1)
10221019
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
1023-
; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C2]](s1)
1020+
; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C2]](s1)
10241021
; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[C2]]
10251022
; GFX10-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP2]], [[XOR]]
10261023
; GFX10-NEXT: [[INT2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[OR]](s1), %25(s32)
1027-
; GFX10-NEXT: [[DEF4:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
1028-
; GFX10-NEXT: [[DEF5:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
1029-
; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %63(s1), $exec_lo, implicit-def $scc
1030-
; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc
1024+
; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
1025+
; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %49(s1), $exec_lo, implicit-def $scc
1026+
; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
10311027
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
1032-
; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc
1033-
; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc
1028+
; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
1029+
; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc
10341030
; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
10351031
; GFX10-NEXT: SI_LOOP [[INT2]](s32), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec
10361032
; GFX10-NEXT: G_BR %bb.5
10371033
; GFX10-NEXT: {{ $}}
10381034
; GFX10-NEXT: bb.5:
10391035
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INT2]](s32), %bb.4
1040-
; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1)
1036+
; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1)
10411037
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32)
1042-
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY15]](s1), [[COPY3]], [[COPY2]]
1038+
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY13]](s1), [[COPY3]], [[COPY2]]
10431039
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[SELECT]](s32)
10441040
; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
10451041
; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
@@ -1049,42 +1045,32 @@ body: |
10491045
; GFX10-NEXT: {{ $}}
10501046
; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INT1]](s32), %bb.3
10511047
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
1052-
; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
1053-
; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
1048+
; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
10541049
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32)
10551050
; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %42(s1), $exec_lo, implicit-def $scc
1056-
; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY17]](s1), implicit-def $scc
1051+
; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc
10571052
; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
1058-
; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %56(s1), $exec_lo, implicit-def $scc
1059-
; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY16]](s1), implicit-def $scc
1060-
; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc
1061-
; GFX10-NEXT: [[DEF6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
1053+
; GFX10-NEXT: [[DEF4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
10621054
; GFX10-NEXT: G_BR %bb.2
10631055
; GFX10-NEXT: {{ $}}
10641056
; GFX10-NEXT: bb.7:
10651057
; GFX10-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
10661058
; GFX10-NEXT: {{ $}}
1067-
; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY7]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.2, [[S_OR_B32_2]](s1), %bb.4
1068-
; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32(s1) = PHI [[DEF3]](s1), %bb.0, [[PHI7]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4
1069-
; GFX10-NEXT: [[PHI8:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.0, [[PHI1]](s1), %bb.2, [[DEF5]](s1), %bb.4
1070-
; GFX10-NEXT: [[PHI9:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, [[PHI2]](s1), %bb.2, [[DEF4]](s1), %bb.4
1071-
; GFX10-NEXT: [[PHI10:%[0-9]+]]:_(s32) = G_PHI [[INT2]](s32), %bb.4, [[PHI10]](s32), %bb.2, [[C]](s32), %bb.0
1072-
; GFX10-NEXT: [[PHI11:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.4, [[INT]](s32), %bb.2, [[C]](s32), %bb.0
1073-
; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI6]](s1)
1074-
; GFX10-NEXT: [[COPY19:%[0-9]+]]:sreg_32(s1) = COPY [[PHI7]](s1)
1075-
; GFX10-NEXT: [[COPY20:%[0-9]+]]:sreg_32(s1) = COPY [[PHI8]](s1)
1076-
; GFX10-NEXT: [[COPY21:%[0-9]+]]:sreg_32(s1) = COPY [[PHI9]](s1)
1059+
; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY6]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.2, [[S_OR_B32_2]](s1), %bb.4
1060+
; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.0, [[PHI7]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4
1061+
; GFX10-NEXT: [[PHI8:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, [[PHI1]](s1), %bb.2, [[DEF3]](s1), %bb.4
1062+
; GFX10-NEXT: [[PHI9:%[0-9]+]]:_(s32) = G_PHI [[INT2]](s32), %bb.4, [[PHI9]](s32), %bb.2, [[C]](s32), %bb.0
1063+
; GFX10-NEXT: [[PHI10:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.4, [[INT]](s32), %bb.2, [[C]](s32), %bb.0
1064+
; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI6]](s1)
1065+
; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI7]](s1)
1066+
; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[PHI8]](s1)
10771067
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
1078-
; GFX10-NEXT: [[COPY22:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1)
1079-
; GFX10-NEXT: [[S_ANDN2_B32_5:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY21]](s1), $exec_lo, implicit-def $scc
1080-
; GFX10-NEXT: [[S_AND_B32_5:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY6]](s1), implicit-def $scc
1081-
; GFX10-NEXT: [[S_OR_B32_5:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_5]](s1), [[S_AND_B32_5]](s1), implicit-def $scc
1082-
; GFX10-NEXT: [[COPY23:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_5]](s1)
1083-
; GFX10-NEXT: [[S_ANDN2_B32_6:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY20]](s1), $exec_lo, implicit-def $scc
1084-
; GFX10-NEXT: [[S_AND_B32_6:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY22]](s1), implicit-def $scc
1085-
; GFX10-NEXT: [[S_OR_B32_6:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_6]](s1), [[S_AND_B32_6]](s1), implicit-def $scc
1086-
; GFX10-NEXT: [[COPY24:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_6]](s1)
1087-
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY18]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
1068+
; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1)
1069+
; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY17]](s1), $exec_lo, implicit-def $scc
1070+
; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY18]](s1), implicit-def $scc
1071+
; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc
1072+
; GFX10-NEXT: [[COPY19:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_4]](s1)
1073+
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY15]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
10881074
; GFX10-NEXT: G_BR %bb.1
10891075
bb.0:
10901076
successors: %bb.7(0x80000000)

0 commit comments

Comments
 (0)