1
+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
1
2
; RUN: llc -mtriple=amdgcn < %s | FileCheck %s
2
3
3
4
; Check we can compile this bugpoint-reduced test without an
9
10
10
11
@0 = external unnamed_addr addrspace (3 ) global [462 x float ], align 4
11
12
12
- ; Function Attrs: nounwind readnone speculatable
13
13
declare i32 @llvm.amdgcn.workitem.id.y () #0
14
-
15
- ; Function Attrs: nounwind readnone speculatable
16
14
declare i32 @llvm.amdgcn.workitem.id.x () #0
17
-
18
- ; Function Attrs: nounwind readnone speculatable
19
15
declare float @llvm.fmuladd.f32 (float , float , float ) #0
20
16
21
- ; CHECK: s_endpgm
22
17
define amdgpu_kernel void @foo (ptr addrspace (1 ) noalias nocapture readonly %arg , ptr addrspace (1 ) noalias nocapture readonly %arg1 , ptr addrspace (1 ) noalias nocapture %arg2 , float %arg3 , i1 %c0 , i1 %c1 , i1 %c2 , i1 %c3 , i1 %c4 , i1 %c5 ) local_unnamed_addr !reqd_work_group_size !0 {
18
+ ; CHECK-LABEL: foo:
19
+ ; CHECK: ; %bb.0: ; %bb
20
+ ; CHECK-NEXT: s_load_dword s6, s[4:5], 0x10
21
+ ; CHECK-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x10
22
+ ; CHECK-NEXT: s_load_dword s10, s[4:5], 0x11
23
+ ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 2, v0
24
+ ; CHECK-NEXT: s_movk_i32 s0, 0x54
25
+ ; CHECK-NEXT: v_mov_b32_e32 v0, 0
26
+ ; CHECK-NEXT: v_mad_u32_u24 v1, v1, s0, v2
27
+ ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
28
+ ; CHECK-NEXT: s_bitcmp1_b32 s6, 8
29
+ ; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
30
+ ; CHECK-NEXT: s_bitcmp1_b32 s6, 16
31
+ ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
32
+ ; CHECK-NEXT: s_cselect_b64 s[4:5], -1, 0
33
+ ; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v2
34
+ ; CHECK-NEXT: s_xor_b64 s[4:5], s[4:5], -1
35
+ ; CHECK-NEXT: s_bitcmp1_b32 s2, 24
36
+ ; CHECK-NEXT: s_cselect_b64 s[6:7], -1, 0
37
+ ; CHECK-NEXT: s_xor_b64 s[6:7], s[6:7], -1
38
+ ; CHECK-NEXT: s_bitcmp1_b32 s3, 0
39
+ ; CHECK-NEXT: s_cselect_b64 s[8:9], -1, 0
40
+ ; CHECK-NEXT: s_bitcmp1_b32 s10, 8
41
+ ; CHECK-NEXT: s_cselect_b64 s[10:11], -1, 0
42
+ ; CHECK-NEXT: s_and_b64 s[2:3], exec, s[6:7]
43
+ ; CHECK-NEXT: s_and_b64 s[4:5], exec, s[4:5]
44
+ ; CHECK-NEXT: s_and_b64 s[6:7], exec, s[10:11]
45
+ ; CHECK-NEXT: s_and_b64 s[8:9], exec, s[8:9]
46
+ ; CHECK-NEXT: s_mov_b32 m0, -1
47
+ ; CHECK-NEXT: .LBB0_1: ; %.loopexit145
48
+ ; CHECK-NEXT: ; =>This Loop Header: Depth=1
49
+ ; CHECK-NEXT: ; Child Loop BB0_3 Depth 2
50
+ ; CHECK-NEXT: ; Child Loop BB0_4 Depth 3
51
+ ; CHECK-NEXT: ; Child Loop BB0_5 Depth 2
52
+ ; CHECK-NEXT: v_mov_b32_e32 v2, v1
53
+ ; CHECK-NEXT: s_branch .LBB0_3
54
+ ; CHECK-NEXT: .LBB0_2: ; %.loopexit
55
+ ; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=2
56
+ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, 0x540, v2
57
+ ; CHECK-NEXT: s_mov_b64 vcc, s[4:5]
58
+ ; CHECK-NEXT: s_cbranch_vccnz .LBB0_5
59
+ ; CHECK-NEXT: .LBB0_3: ; %bb13
60
+ ; CHECK-NEXT: ; Parent Loop BB0_1 Depth=1
61
+ ; CHECK-NEXT: ; => This Loop Header: Depth=2
62
+ ; CHECK-NEXT: ; Child Loop BB0_4 Depth 3
63
+ ; CHECK-NEXT: s_and_b64 vcc, exec, s[0:1]
64
+ ; CHECK-NEXT: v_mov_b32_e32 v3, v2
65
+ ; CHECK-NEXT: s_cbranch_vccnz .LBB0_2
66
+ ; CHECK-NEXT: .LBB0_4: ; %bb21
67
+ ; CHECK-NEXT: ; Parent Loop BB0_1 Depth=1
68
+ ; CHECK-NEXT: ; Parent Loop BB0_3 Depth=2
69
+ ; CHECK-NEXT: ; => This Inner Loop Header: Depth=3
70
+ ; CHECK-NEXT: ds_write_b32 v3, v0
71
+ ; CHECK-NEXT: v_add_i32_e32 v3, vcc, 32, v3
72
+ ; CHECK-NEXT: s_mov_b64 vcc, s[2:3]
73
+ ; CHECK-NEXT: s_cbranch_vccz .LBB0_4
74
+ ; CHECK-NEXT: s_branch .LBB0_2
75
+ ; CHECK-NEXT: .LBB0_5: ; %bb31
76
+ ; CHECK-NEXT: ; Parent Loop BB0_1 Depth=1
77
+ ; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
78
+ ; CHECK-NEXT: s_mov_b64 vcc, s[6:7]
79
+ ; CHECK-NEXT: s_cbranch_vccz .LBB0_5
80
+ ; CHECK-NEXT: ; %bb.6: ; %bb30
81
+ ; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
82
+ ; CHECK-NEXT: s_mov_b64 vcc, s[8:9]
83
+ ; CHECK-NEXT: s_cbranch_vccz .LBB0_1
84
+ ; CHECK-NEXT: ; %bb.7: ; %bb11
85
+ ; CHECK-NEXT: s_endpgm
23
86
bb:
24
87
%tmp = tail call i32 @llvm.amdgcn.workitem.id.y ()
25
88
%tmp4 = tail call i32 @llvm.amdgcn.workitem.id.x ()
@@ -47,7 +110,7 @@ bb13: ; preds = %.loopexit, %.loopex
47
110
48
111
bb17: ; preds = %bb13
49
112
%tmp18 = mul i32 %tmp15 , 224
50
- %tmp19 = add i32 undef , %tmp18
113
+ %tmp19 = add i32 0 , %tmp18
51
114
br label %bb21
52
115
53
116
.loopexit: ; preds = %bb21, %bb13
@@ -58,7 +121,7 @@ bb21: ; preds = %bb21, %bb17
58
121
%tmp22 = phi i32 [ %tmp4 , %bb17 ], [ %tmp25 , %bb21 ]
59
122
%tmp23 = add i32 %tmp22 , %tmp16
60
123
%tmp24 = getelementptr inbounds float , ptr addrspace (3 ) @0 , i32 %tmp23
61
- store float undef , ptr addrspace (3 ) %tmp24 , align 4
124
+ store float 0 . 0 , ptr addrspace (3 ) %tmp24 , align 4
62
125
%tmp25 = add nuw i32 %tmp22 , 8
63
126
br i1 %c3 , label %bb21 , label %.loopexit
64
127
0 commit comments