Skip to content

Commit 1f1f820

Browse files
authored
AMDGPU: Switch simplifydemandedbits-recursion.ll to generated checks (#131317)
This just checked the s_endpgm. Generate full checks, and remove undefs.
1 parent a6ae965 commit 1f1f820

File tree

1 file changed

+71
-8
lines changed

1 file changed

+71
-8
lines changed

llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll

Lines changed: 71 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12
; RUN: llc -mtriple=amdgcn < %s | FileCheck %s
23

34
; Check we can compile this bugpoint-reduced test without an
@@ -9,17 +10,79 @@
910

1011
@0 = external unnamed_addr addrspace(3) global [462 x float], align 4
1112

12-
; Function Attrs: nounwind readnone speculatable
1313
declare i32 @llvm.amdgcn.workitem.id.y() #0
14-
15-
; Function Attrs: nounwind readnone speculatable
1614
declare i32 @llvm.amdgcn.workitem.id.x() #0
17-
18-
; Function Attrs: nounwind readnone speculatable
1915
declare float @llvm.fmuladd.f32(float, float, float) #0
2016

21-
; CHECK: s_endpgm
2217
define amdgpu_kernel void @foo(ptr addrspace(1) noalias nocapture readonly %arg, ptr addrspace(1) noalias nocapture readonly %arg1, ptr addrspace(1) noalias nocapture %arg2, float %arg3, i1 %c0, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5) local_unnamed_addr !reqd_work_group_size !0 {
18+
; CHECK-LABEL: foo:
19+
; CHECK: ; %bb.0: ; %bb
20+
; CHECK-NEXT: s_load_dword s6, s[4:5], 0x10
21+
; CHECK-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x10
22+
; CHECK-NEXT: s_load_dword s10, s[4:5], 0x11
23+
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 2, v0
24+
; CHECK-NEXT: s_movk_i32 s0, 0x54
25+
; CHECK-NEXT: v_mov_b32_e32 v0, 0
26+
; CHECK-NEXT: v_mad_u32_u24 v1, v1, s0, v2
27+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
28+
; CHECK-NEXT: s_bitcmp1_b32 s6, 8
29+
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
30+
; CHECK-NEXT: s_bitcmp1_b32 s6, 16
31+
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
32+
; CHECK-NEXT: s_cselect_b64 s[4:5], -1, 0
33+
; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v2
34+
; CHECK-NEXT: s_xor_b64 s[4:5], s[4:5], -1
35+
; CHECK-NEXT: s_bitcmp1_b32 s2, 24
36+
; CHECK-NEXT: s_cselect_b64 s[6:7], -1, 0
37+
; CHECK-NEXT: s_xor_b64 s[6:7], s[6:7], -1
38+
; CHECK-NEXT: s_bitcmp1_b32 s3, 0
39+
; CHECK-NEXT: s_cselect_b64 s[8:9], -1, 0
40+
; CHECK-NEXT: s_bitcmp1_b32 s10, 8
41+
; CHECK-NEXT: s_cselect_b64 s[10:11], -1, 0
42+
; CHECK-NEXT: s_and_b64 s[2:3], exec, s[6:7]
43+
; CHECK-NEXT: s_and_b64 s[4:5], exec, s[4:5]
44+
; CHECK-NEXT: s_and_b64 s[6:7], exec, s[10:11]
45+
; CHECK-NEXT: s_and_b64 s[8:9], exec, s[8:9]
46+
; CHECK-NEXT: s_mov_b32 m0, -1
47+
; CHECK-NEXT: .LBB0_1: ; %.loopexit145
48+
; CHECK-NEXT: ; =>This Loop Header: Depth=1
49+
; CHECK-NEXT: ; Child Loop BB0_3 Depth 2
50+
; CHECK-NEXT: ; Child Loop BB0_4 Depth 3
51+
; CHECK-NEXT: ; Child Loop BB0_5 Depth 2
52+
; CHECK-NEXT: v_mov_b32_e32 v2, v1
53+
; CHECK-NEXT: s_branch .LBB0_3
54+
; CHECK-NEXT: .LBB0_2: ; %.loopexit
55+
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=2
56+
; CHECK-NEXT: v_add_i32_e32 v2, vcc, 0x540, v2
57+
; CHECK-NEXT: s_mov_b64 vcc, s[4:5]
58+
; CHECK-NEXT: s_cbranch_vccnz .LBB0_5
59+
; CHECK-NEXT: .LBB0_3: ; %bb13
60+
; CHECK-NEXT: ; Parent Loop BB0_1 Depth=1
61+
; CHECK-NEXT: ; => This Loop Header: Depth=2
62+
; CHECK-NEXT: ; Child Loop BB0_4 Depth 3
63+
; CHECK-NEXT: s_and_b64 vcc, exec, s[0:1]
64+
; CHECK-NEXT: v_mov_b32_e32 v3, v2
65+
; CHECK-NEXT: s_cbranch_vccnz .LBB0_2
66+
; CHECK-NEXT: .LBB0_4: ; %bb21
67+
; CHECK-NEXT: ; Parent Loop BB0_1 Depth=1
68+
; CHECK-NEXT: ; Parent Loop BB0_3 Depth=2
69+
; CHECK-NEXT: ; => This Inner Loop Header: Depth=3
70+
; CHECK-NEXT: ds_write_b32 v3, v0
71+
; CHECK-NEXT: v_add_i32_e32 v3, vcc, 32, v3
72+
; CHECK-NEXT: s_mov_b64 vcc, s[2:3]
73+
; CHECK-NEXT: s_cbranch_vccz .LBB0_4
74+
; CHECK-NEXT: s_branch .LBB0_2
75+
; CHECK-NEXT: .LBB0_5: ; %bb31
76+
; CHECK-NEXT: ; Parent Loop BB0_1 Depth=1
77+
; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
78+
; CHECK-NEXT: s_mov_b64 vcc, s[6:7]
79+
; CHECK-NEXT: s_cbranch_vccz .LBB0_5
80+
; CHECK-NEXT: ; %bb.6: ; %bb30
81+
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
82+
; CHECK-NEXT: s_mov_b64 vcc, s[8:9]
83+
; CHECK-NEXT: s_cbranch_vccz .LBB0_1
84+
; CHECK-NEXT: ; %bb.7: ; %bb11
85+
; CHECK-NEXT: s_endpgm
2386
bb:
2487
%tmp = tail call i32 @llvm.amdgcn.workitem.id.y()
2588
%tmp4 = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -47,7 +110,7 @@ bb13: ; preds = %.loopexit, %.loopex
47110

48111
bb17: ; preds = %bb13
49112
%tmp18 = mul i32 %tmp15, 224
50-
%tmp19 = add i32 undef, %tmp18
113+
%tmp19 = add i32 0, %tmp18
51114
br label %bb21
52115

53116
.loopexit: ; preds = %bb21, %bb13
@@ -58,7 +121,7 @@ bb21: ; preds = %bb21, %bb17
58121
%tmp22 = phi i32 [ %tmp4, %bb17 ], [ %tmp25, %bb21 ]
59122
%tmp23 = add i32 %tmp22, %tmp16
60123
%tmp24 = getelementptr inbounds float, ptr addrspace(3) @0, i32 %tmp23
61-
store float undef, ptr addrspace(3) %tmp24, align 4
124+
store float 0.0, ptr addrspace(3) %tmp24, align 4
62125
%tmp25 = add nuw i32 %tmp22, 8
63126
br i1 %c3, label %bb21, label %.loopexit
64127

0 commit comments

Comments
 (0)