1
1
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2
- ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
3
2
; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=IR %s
4
3
; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
5
4
@@ -48,9 +47,9 @@ define amdgpu_kernel void @reduced_nested_loop_conditions(i64 addrspace(3)* noca
48
47
; GCN-NEXT: s_endpgm
49
48
; IR-LABEL: @reduced_nested_loop_conditions(
50
49
; IR-NEXT: bb:
51
- ; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #4
50
+ ; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR4:[0-9]+]]
52
51
; IR-NEXT: [[MY_TMP1:%.*]] = getelementptr inbounds i64, i64 addrspace(3)* [[ARG:%.*]], i32 [[MY_TMP]]
53
- ; IR-NEXT: [[MY_TMP2:%.*]] = load volatile i64, i64 addrspace(3)* [[MY_TMP1]]
52
+ ; IR-NEXT: [[MY_TMP2:%.*]] = load volatile i64, i64 addrspace(3)* [[MY_TMP1]], align 4
54
53
; IR-NEXT: br label [[BB5:%.*]]
55
54
; IR: bb3:
56
55
; IR-NEXT: br i1 true, label [[BB4:%.*]], label [[BB13:%.*]]
@@ -84,7 +83,7 @@ define amdgpu_kernel void @reduced_nested_loop_conditions(i64 addrspace(3)* noca
84
83
; IR: bb16:
85
84
; IR-NEXT: [[MY_TMP17:%.*]] = extractelement <2 x i32> [[MY_TMP15]], i64 1
86
85
; IR-NEXT: [[MY_TMP18:%.*]] = getelementptr inbounds i32, i32 addrspace(3)* undef, i32 [[MY_TMP17]]
87
- ; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[MY_TMP18]]
86
+ ; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[MY_TMP18]], align 4
88
87
; IR-NEXT: br label [[BB20]]
89
88
; IR: bb20:
90
89
; IR-NEXT: [[MY_TMP21]] = phi i32 [ [[MY_TMP19]], [[BB16]] ], [ 0, [[BB13]] ]
@@ -93,6 +92,7 @@ define amdgpu_kernel void @reduced_nested_loop_conditions(i64 addrspace(3)* noca
93
92
; IR: bb23:
94
93
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP6]])
95
94
; IR-NEXT: ret void
95
+ ;
96
96
bb:
97
97
%my.tmp = tail call i32 @llvm.amdgcn.workitem.id.x () #1
98
98
%my.tmp1 = getelementptr inbounds i64 , i64 addrspace (3 )* %arg , i32 %my.tmp
@@ -190,19 +190,19 @@ define amdgpu_kernel void @nested_loop_conditions(i64 addrspace(1)* nocapture %a
190
190
; GCN-NEXT: s_endpgm
191
191
; IR-LABEL: @nested_loop_conditions(
192
192
; IR-NEXT: bb:
193
- ; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #4
193
+ ; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR4]]
194
194
; IR-NEXT: [[MY_TMP1:%.*]] = zext i32 [[MY_TMP]] to i64
195
195
; IR-NEXT: [[MY_TMP2:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[ARG:%.*]], i64 [[MY_TMP1]]
196
196
; IR-NEXT: [[MY_TMP3:%.*]] = load i64, i64 addrspace(1)* [[MY_TMP2]], align 16
197
197
; IR-NEXT: [[MY_TMP932:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* undef, align 16
198
198
; IR-NEXT: [[MY_TMP1033:%.*]] = extractelement <4 x i32> [[MY_TMP932]], i64 0
199
- ; IR-NEXT: [[MY_TMP1134:%.*]] = load volatile i32, i32 addrspace(1)* undef
199
+ ; IR-NEXT: [[MY_TMP1134:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
200
200
; IR-NEXT: [[MY_TMP1235:%.*]] = icmp slt i32 [[MY_TMP1134]], 9
201
201
; IR-NEXT: br i1 [[MY_TMP1235]], label [[BB14_LR_PH:%.*]], label [[FLOW:%.*]]
202
202
; IR: bb14.lr.ph:
203
203
; IR-NEXT: br label [[BB14:%.*]]
204
204
; IR: Flow3:
205
- ; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP21 :%.*]])
205
+ ; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP20 :%.*]])
206
206
; IR-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP14:%.*]])
207
207
; IR-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0
208
208
; IR-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1
@@ -244,7 +244,7 @@ define amdgpu_kernel void @nested_loop_conditions(i64 addrspace(1)* nocapture %a
244
244
; IR-NEXT: [[TMP17:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP16]])
245
245
; IR-NEXT: br i1 [[TMP17]], label [[FLOW2:%.*]], label [[BB14]]
246
246
; IR: bb18:
247
- ; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, i32 addrspace(1)* undef
247
+ ; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
248
248
; IR-NEXT: [[MY_TMP20:%.*]] = icmp slt i32 [[MY_TMP19]], 9
249
249
; IR-NEXT: br i1 [[MY_TMP20]], label [[BB21]], label [[BB18]]
250
250
; IR: bb21:
@@ -261,21 +261,22 @@ define amdgpu_kernel void @nested_loop_conditions(i64 addrspace(1)* nocapture %a
261
261
; IR-NEXT: [[MY_TMP8:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* undef, i64 [[MY_TMP7]]
262
262
; IR-NEXT: [[MY_TMP9]] = load <4 x i32>, <4 x i32> addrspace(1)* [[MY_TMP8]], align 16
263
263
; IR-NEXT: [[MY_TMP10]] = extractelement <4 x i32> [[MY_TMP9]], i64 0
264
- ; IR-NEXT: [[MY_TMP11:%.*]] = load volatile i32, i32 addrspace(1)* undef
264
+ ; IR-NEXT: [[MY_TMP11:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
265
265
; IR-NEXT: [[MY_TMP12]] = icmp sge i32 [[MY_TMP11]], 9
266
266
; IR-NEXT: br label [[FLOW1]]
267
267
; IR: Flow2:
268
268
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP16]])
269
- ; IR-NEXT: [[TMP19 :%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP15]])
270
- ; IR-NEXT: [[TMP20 :%.*]] = extractvalue { i1, i64 } [[TMP19 ]], 0
271
- ; IR-NEXT: [[TMP21 ]] = extractvalue { i1, i64 } [[TMP19 ]], 1
272
- ; IR-NEXT: br i1 [[TMP20 ]], label [[BB31_LOOPEXIT:%.*]], label [[FLOW3]]
269
+ ; IR-NEXT: [[TMP18 :%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP15]])
270
+ ; IR-NEXT: [[TMP19 :%.*]] = extractvalue { i1, i64 } [[TMP18 ]], 0
271
+ ; IR-NEXT: [[TMP20 ]] = extractvalue { i1, i64 } [[TMP18 ]], 1
272
+ ; IR-NEXT: br i1 [[TMP19 ]], label [[BB31_LOOPEXIT:%.*]], label [[FLOW3]]
273
273
; IR: bb31.loopexit:
274
274
; IR-NEXT: br label [[FLOW3]]
275
275
; IR: bb31:
276
276
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP7]])
277
- ; IR-NEXT: store volatile i32 0, i32 addrspace(1)* undef
277
+ ; IR-NEXT: store volatile i32 0, i32 addrspace(1)* undef, align 4
278
278
; IR-NEXT: ret void
279
+ ;
279
280
bb:
280
281
%my.tmp = tail call i32 @llvm.amdgcn.workitem.id.x () #1
281
282
%my.tmp1 = zext i32 %my.tmp to i64
0 commit comments