Skip to content

Commit 2bada41

Browse files
authored
StructurizeCFG: Use poison instead of undef (#130459)
There are a surprising number of codegen changes from this.
1 parent 81ca350 commit 2bada41

26 files changed

+591
-622
lines changed

llvm/lib/Transforms/Scalar/StructurizeCFG.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -859,10 +859,10 @@ void StructurizeCFG::setPhiValues() {
859859
PhiMap &Map = It->second;
860860
SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To];
861861
for (const auto &[Phi, Incoming] : Map) {
862-
Value *Undef = UndefValue::get(Phi->getType());
862+
Value *Poison = PoisonValue::get(Phi->getType());
863863
Updater.Initialize(Phi->getType(), "");
864-
Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
865-
Updater.AddAvailableValue(To, Undef);
864+
Updater.AddAvailableValue(&Func->getEntryBlock(), Poison);
865+
Updater.AddAvailableValue(To, Poison);
866866

867867
// Use leader phi's incoming if there is.
868868
auto LeaderIt = PhiClasses.findLeader(Phi);
@@ -891,7 +891,7 @@ void StructurizeCFG::setPhiValues() {
891891
if (Updater.HasValueForBlock(UB))
892892
continue;
893893

894-
Updater.AddAvailableValue(UB, Undef);
894+
Updater.AddAvailableValue(UB, Poison);
895895
}
896896

897897
for (BasicBlock *FI : From)
@@ -1182,9 +1182,9 @@ void StructurizeCFG::rebuildSSA() {
11821182
continue;
11831183

11841184
if (!Initialized) {
1185-
Value *Undef = UndefValue::get(I.getType());
1185+
Value *Poison = PoisonValue::get(I.getType());
11861186
Updater.Initialize(I.getType(), "");
1187-
Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
1187+
Updater.AddAvailableValue(&Func->getEntryBlock(), Poison);
11881188
Updater.AddAvailableValue(BB, &I);
11891189
Initialized = true;
11901190
}

llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -199,33 +199,31 @@ define amdgpu_kernel void @break_loop(i32 %arg) {
199199
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
200200
; CHECK-NEXT: v_subrev_u32_e32 v0, s0, v0
201201
; CHECK-NEXT: s_mov_b64 s[0:1], 0
202-
; CHECK-NEXT: s_branch .LBB5_3
203-
; CHECK-NEXT: .LBB5_1: ; %bb4
204-
; CHECK-NEXT: ; in Loop: Header=BB5_3 Depth=1
205-
; CHECK-NEXT: global_load_dword v2, v[0:1], off glc
206-
; CHECK-NEXT: s_waitcnt vmcnt(0)
207-
; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
208-
; CHECK-NEXT: v_cmp_ge_i32_e32 vcc, v0, v2
209-
; CHECK-NEXT: s_and_b64 s[4:5], exec, vcc
210-
; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
211-
; CHECK-NEXT: .LBB5_2: ; %Flow
212-
; CHECK-NEXT: ; in Loop: Header=BB5_3 Depth=1
202+
; CHECK-NEXT: s_branch .LBB5_2
203+
; CHECK-NEXT: .LBB5_1: ; %Flow
204+
; CHECK-NEXT: ; in Loop: Header=BB5_2 Depth=1
213205
; CHECK-NEXT: s_and_b64 s[4:5], exec, s[2:3]
214206
; CHECK-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1]
215207
; CHECK-NEXT: s_andn2_b64 exec, exec, s[0:1]
216-
; CHECK-NEXT: s_cbranch_execz .LBB5_5
217-
; CHECK-NEXT: .LBB5_3: ; %bb1
208+
; CHECK-NEXT: s_cbranch_execz .LBB5_4
209+
; CHECK-NEXT: .LBB5_2: ; %bb1
218210
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
219211
; CHECK-NEXT: v_add_u32_e32 v1, 1, v1
220212
; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
221213
; CHECK-NEXT: s_and_b64 s[4:5], exec, -1
222214
; CHECK-NEXT: v_cmp_le_i32_e32 vcc, 0, v1
223215
; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
224-
; CHECK-NEXT: s_cbranch_vccz .LBB5_1
225-
; CHECK-NEXT: ; %bb.4: ; in Loop: Header=BB5_3 Depth=1
226-
; CHECK-NEXT: ; implicit-def: $vgpr1
227-
; CHECK-NEXT: s_branch .LBB5_2
228-
; CHECK-NEXT: .LBB5_5: ; %bb9
216+
; CHECK-NEXT: s_cbranch_vccnz .LBB5_1
217+
; CHECK-NEXT: ; %bb.3: ; %bb4
218+
; CHECK-NEXT: ; in Loop: Header=BB5_2 Depth=1
219+
; CHECK-NEXT: global_load_dword v2, v[0:1], off glc
220+
; CHECK-NEXT: s_waitcnt vmcnt(0)
221+
; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
222+
; CHECK-NEXT: v_cmp_ge_i32_e32 vcc, v0, v2
223+
; CHECK-NEXT: s_and_b64 s[4:5], exec, vcc
224+
; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
225+
; CHECK-NEXT: s_branch .LBB5_1
226+
; CHECK-NEXT: .LBB5_4: ; %bb9
229227
; CHECK-NEXT: s_endpgm
230228
bb:
231229
%id = call i32 @llvm.amdgcn.workitem.id.x()

llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -646,13 +646,13 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
646646
; GFX908-NEXT: v_add_f32_e32 v9, v9, v15
647647
; GFX908-NEXT: v_add_f32_e32 v10, v10, v12
648648
; GFX908-NEXT: v_add_f32_e32 v11, v11, v13
649-
; GFX908-NEXT: s_mov_b64 s[20:21], -1
650649
; GFX908-NEXT: s_branch .LBB3_4
651650
; GFX908-NEXT: .LBB3_7: ; in Loop: Header=BB3_5 Depth=2
652651
; GFX908-NEXT: s_mov_b64 s[20:21], s[16:17]
653652
; GFX908-NEXT: s_andn2_b64 vcc, exec, s[20:21]
654653
; GFX908-NEXT: s_cbranch_vccz .LBB3_4
655654
; GFX908-NEXT: ; %bb.8: ; in Loop: Header=BB3_2 Depth=1
655+
; GFX908-NEXT: s_mov_b64 s[20:21], -1
656656
; GFX908-NEXT: ; implicit-def: $vgpr2_vgpr3
657657
; GFX908-NEXT: ; implicit-def: $sgpr18_sgpr19
658658
; GFX908-NEXT: .LBB3_9: ; %loop.exit.guard
@@ -798,13 +798,13 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
798798
; GFX90A-NEXT: v_pk_add_f32 v[8:9], v[8:9], v[26:27]
799799
; GFX90A-NEXT: v_pk_add_f32 v[10:11], v[10:11], v[16:17]
800800
; GFX90A-NEXT: v_pk_add_f32 v[12:13], v[12:13], v[14:15]
801-
; GFX90A-NEXT: s_mov_b64 s[20:21], -1
802801
; GFX90A-NEXT: s_branch .LBB3_4
803802
; GFX90A-NEXT: .LBB3_7: ; in Loop: Header=BB3_5 Depth=2
804803
; GFX90A-NEXT: s_mov_b64 s[20:21], s[16:17]
805804
; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[20:21]
806805
; GFX90A-NEXT: s_cbranch_vccz .LBB3_4
807806
; GFX90A-NEXT: ; %bb.8: ; in Loop: Header=BB3_2 Depth=1
807+
; GFX90A-NEXT: s_mov_b64 s[20:21], -1
808808
; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
809809
; GFX90A-NEXT: ; implicit-def: $sgpr18_sgpr19
810810
; GFX90A-NEXT: .LBB3_9: ; %loop.exit.guard

llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
1515
; CHECK-NEXT: s_mov_b32 s12, 0
1616
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
1717
; CHECK-NEXT: s_cmp_lg_u32 s52, 0
18-
; CHECK-NEXT: s_cbranch_scc1 .LBB0_8
18+
; CHECK-NEXT: s_cbranch_scc1 .LBB0_9
1919
; CHECK-NEXT: ; %bb.1: ; %if.end13.i.i
2020
; CHECK-NEXT: s_cmp_eq_u32 s54, 0
2121
; CHECK-NEXT: s_cbranch_scc1 .LBB0_4
@@ -26,36 +26,36 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
2626
; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s12
2727
; CHECK-NEXT: s_cbranch_vccz .LBB0_5
2828
; CHECK-NEXT: ; %bb.3:
29-
; CHECK-NEXT: s_mov_b32 s48, 0
30-
; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12
31-
; CHECK-NEXT: s_cbranch_vccz .LBB0_6
32-
; CHECK-NEXT: s_branch .LBB0_7
29+
; CHECK-NEXT: s_mov_b32 s18, 0
30+
; CHECK-NEXT: s_branch .LBB0_6
3331
; CHECK-NEXT: .LBB0_4:
3432
; CHECK-NEXT: s_mov_b32 s14, s12
3533
; CHECK-NEXT: s_mov_b32 s15, s12
3634
; CHECK-NEXT: s_mov_b32 s13, s12
3735
; CHECK-NEXT: s_mov_b64 s[50:51], s[14:15]
3836
; CHECK-NEXT: s_mov_b64 s[48:49], s[12:13]
39-
; CHECK-NEXT: s_branch .LBB0_7
37+
; CHECK-NEXT: s_branch .LBB0_8
4038
; CHECK-NEXT: .LBB0_5: ; %if.then263.i.i
4139
; CHECK-NEXT: v_cmp_lt_f32_e64 s12, s53, 0
42-
; CHECK-NEXT: s_mov_b32 s48, 1.0
40+
; CHECK-NEXT: s_mov_b32 s18, 1.0
4341
; CHECK-NEXT: s_mov_b32 s17, 0x7fc00000
42+
; CHECK-NEXT: .LBB0_6: ; %Flow
43+
; CHECK-NEXT: s_mov_b32 s48, 1.0
44+
; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12
4445
; CHECK-NEXT: s_mov_b32 s49, s48
4546
; CHECK-NEXT: s_mov_b32 s50, s48
4647
; CHECK-NEXT: s_mov_b32 s51, s48
47-
; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12
48-
; CHECK-NEXT: s_cbranch_vccnz .LBB0_7
49-
; CHECK-NEXT: .LBB0_6: ; %if.end273.i.i
48+
; CHECK-NEXT: s_cbranch_vccnz .LBB0_8
49+
; CHECK-NEXT: ; %bb.7: ; %if.end273.i.i
5050
; CHECK-NEXT: s_add_u32 s12, s8, 40
5151
; CHECK-NEXT: s_addc_u32 s13, s9, 0
52-
; CHECK-NEXT: s_getpc_b64 s[18:19]
53-
; CHECK-NEXT: s_add_u32 s18, s18, _Z3dotDv3_fS_@gotpcrel32@lo+4
54-
; CHECK-NEXT: s_addc_u32 s19, s19, _Z3dotDv3_fS_@gotpcrel32@hi+12
52+
; CHECK-NEXT: s_getpc_b64 s[20:21]
53+
; CHECK-NEXT: s_add_u32 s20, s20, _Z3dotDv3_fS_@gotpcrel32@lo+4
54+
; CHECK-NEXT: s_addc_u32 s21, s21, _Z3dotDv3_fS_@gotpcrel32@hi+12
5555
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
56-
; CHECK-NEXT: s_load_dwordx2 s[18:19], s[18:19], 0x0
56+
; CHECK-NEXT: s_load_dwordx2 s[20:21], s[20:21], 0x0
5757
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 10, v1
58-
; CHECK-NEXT: v_add_f32_e64 v1, s17, s48
58+
; CHECK-NEXT: v_add_f32_e64 v1, s17, s18
5959
; CHECK-NEXT: s_mov_b64 s[34:35], s[8:9]
6060
; CHECK-NEXT: s_mov_b64 s[8:9], s[12:13]
6161
; CHECK-NEXT: s_mov_b32 s12, s14
@@ -67,18 +67,18 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
6767
; CHECK-NEXT: s_mov_b32 s14, s16
6868
; CHECK-NEXT: s_mov_b32 s48, 0
6969
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
70-
; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19]
70+
; CHECK-NEXT: s_swappc_b64 s[30:31], s[20:21]
7171
; CHECK-NEXT: s_mov_b64 s[8:9], s[34:35]
7272
; CHECK-NEXT: s_mov_b32 s49, s48
7373
; CHECK-NEXT: s_mov_b32 s50, s48
7474
; CHECK-NEXT: s_mov_b32 s51, s48
75-
; CHECK-NEXT: .LBB0_7: ; %if.end294.i.i
75+
; CHECK-NEXT: .LBB0_8: ; %if.end294.i.i
7676
; CHECK-NEXT: v_mov_b32_e32 v0, 0
7777
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:12
7878
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8
7979
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
8080
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0
81-
; CHECK-NEXT: .LBB0_8: ; %kernel_direct_lighting.exit
81+
; CHECK-NEXT: .LBB0_9: ; %kernel_direct_lighting.exit
8282
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x20
8383
; CHECK-NEXT: v_mov_b32_e32 v0, s48
8484
; CHECK-NEXT: v_mov_b32_e32 v4, 0

0 commit comments

Comments
 (0)