Skip to content

Commit 6d93280

Browse files
authored
[msan][NFC] Add tests for Arm NEON smaxv/sminv (#129741)
This patch precommits tests for the smaxv/sminv intrinsics, which are currently handled suboptimally by visitInstruction. These are the signed versions of umaxv/uminv (#129661). Future work will update MSan to apply handleVectorReduceIntrinsic.
1 parent 27a8501 commit 6d93280

File tree

2 files changed

+646
-0
lines changed

2 files changed

+646
-0
lines changed
Lines changed: 323 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,323 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -passes=msan -S | FileCheck %s
3+
;
4+
; Forked from llvm/test/CodeGen/AArch64/arm64-uminv.ll
5+
;
6+
; Handled suboptimally (visitInstruction):
7+
; - llvm.aarch64.neon.smaxv
8+
9+
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
10+
target triple = "aarch64--linux-android9001"
11+
12+
define signext i8 @test_vmaxv_s8(<8 x i8> %a1) #0 {
13+
; CHECK-LABEL: define signext i8 @test_vmaxv_s8(
14+
; CHECK-SAME: <8 x i8> [[A1:%.*]]) #[[ATTR0:[0-9]+]] {
15+
; CHECK-NEXT: [[ENTRY:.*:]]
16+
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
17+
; CHECK-NEXT: call void @llvm.donothing()
18+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to i64
19+
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
20+
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1:![0-9]+]]
21+
; CHECK: [[BB2]]:
22+
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3:[0-9]+]]
23+
; CHECK-NEXT: unreachable
24+
; CHECK: [[BB3]]:
25+
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> [[A1]])
26+
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i8
27+
; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8
28+
; CHECK-NEXT: ret i8 [[TMP4]]
29+
;
30+
entry:
31+
%vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a1)
32+
%0 = trunc i32 %vmaxv.i to i8
33+
ret i8 %0
34+
}
35+
36+
define signext i16 @test_vmaxv_s16(<4 x i16> %a1) #0 {
37+
; CHECK-LABEL: define signext i16 @test_vmaxv_s16(
38+
; CHECK-SAME: <4 x i16> [[A1:%.*]]) #[[ATTR0]] {
39+
; CHECK-NEXT: [[ENTRY:.*:]]
40+
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
41+
; CHECK-NEXT: call void @llvm.donothing()
42+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to i64
43+
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
44+
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
45+
; CHECK: [[BB2]]:
46+
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
47+
; CHECK-NEXT: unreachable
48+
; CHECK: [[BB3]]:
49+
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> [[A1]])
50+
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i16
51+
; CHECK-NEXT: store i16 0, ptr @__msan_retval_tls, align 8
52+
; CHECK-NEXT: ret i16 [[TMP4]]
53+
;
54+
entry:
55+
%vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a1)
56+
%0 = trunc i32 %vmaxv.i to i16
57+
ret i16 %0
58+
}
59+
60+
define i32 @test_vmaxv_s32(<2 x i32> %a1) #0 {
61+
; CHECK-LABEL: define i32 @test_vmaxv_s32(
62+
; CHECK-SAME: <2 x i32> [[A1:%.*]]) #[[ATTR0]] {
63+
; CHECK-NEXT: [[ENTRY:.*:]]
64+
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
65+
; CHECK-NEXT: call void @llvm.donothing()
66+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to i64
67+
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
68+
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
69+
; CHECK: [[BB2]]:
70+
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
71+
; CHECK-NEXT: unreachable
72+
; CHECK: [[BB3]]:
73+
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> [[A1]])
74+
; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
75+
; CHECK-NEXT: ret i32 [[VMAXV_I]]
76+
;
77+
entry:
78+
%vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> %a1)
79+
ret i32 %vmaxv.i
80+
}
81+
82+
define signext i8 @test_vmaxvq_s8(<16 x i8> %a1) #0 {
83+
; CHECK-LABEL: define signext i8 @test_vmaxvq_s8(
84+
; CHECK-SAME: <16 x i8> [[A1:%.*]]) #[[ATTR0]] {
85+
; CHECK-NEXT: [[ENTRY:.*:]]
86+
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
87+
; CHECK-NEXT: call void @llvm.donothing()
88+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
89+
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0
90+
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
91+
; CHECK: [[BB2]]:
92+
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
93+
; CHECK-NEXT: unreachable
94+
; CHECK: [[BB3]]:
95+
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> [[A1]])
96+
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i8
97+
; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8
98+
; CHECK-NEXT: ret i8 [[TMP4]]
99+
;
100+
entry:
101+
%vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a1)
102+
%0 = trunc i32 %vmaxv.i to i8
103+
ret i8 %0
104+
}
105+
106+
define signext i16 @test_vmaxvq_s16(<8 x i16> %a1) #0 {
107+
; CHECK-LABEL: define signext i16 @test_vmaxvq_s16(
108+
; CHECK-SAME: <8 x i16> [[A1:%.*]]) #[[ATTR0]] {
109+
; CHECK-NEXT: [[ENTRY:.*:]]
110+
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
111+
; CHECK-NEXT: call void @llvm.donothing()
112+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128
113+
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0
114+
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
115+
; CHECK: [[BB2]]:
116+
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
117+
; CHECK-NEXT: unreachable
118+
; CHECK: [[BB3]]:
119+
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> [[A1]])
120+
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i16
121+
; CHECK-NEXT: store i16 0, ptr @__msan_retval_tls, align 8
122+
; CHECK-NEXT: ret i16 [[TMP4]]
123+
;
124+
entry:
125+
%vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a1)
126+
%0 = trunc i32 %vmaxv.i to i16
127+
ret i16 %0
128+
}
129+
130+
define i32 @test_vmaxvq_s32(<4 x i32> %a1) #0 {
131+
; CHECK-LABEL: define i32 @test_vmaxvq_s32(
132+
; CHECK-SAME: <4 x i32> [[A1:%.*]]) #[[ATTR0]] {
133+
; CHECK-NEXT: [[ENTRY:.*:]]
134+
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
135+
; CHECK-NEXT: call void @llvm.donothing()
136+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128
137+
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0
138+
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
139+
; CHECK: [[BB2]]:
140+
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
141+
; CHECK-NEXT: unreachable
142+
; CHECK: [[BB3]]:
143+
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> [[A1]])
144+
; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
145+
; CHECK-NEXT: ret i32 [[VMAXV_I]]
146+
;
147+
entry:
148+
%vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a1)
149+
ret i32 %vmaxv.i
150+
}
151+
152+
define <8 x i8> @test_vmaxv_s8_used_by_laneop(<8 x i8> %a1, <8 x i8> %a2) #0 {
153+
; CHECK-LABEL: define <8 x i8> @test_vmaxv_s8_used_by_laneop(
154+
; CHECK-SAME: <8 x i8> [[A1:%.*]], <8 x i8> [[A2:%.*]]) #[[ATTR0]] {
155+
; CHECK-NEXT: [[ENTRY:.*:]]
156+
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
157+
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
158+
; CHECK-NEXT: call void @llvm.donothing()
159+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to i64
160+
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
161+
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
162+
; CHECK: [[BB3]]:
163+
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
164+
; CHECK-NEXT: unreachable
165+
; CHECK: [[BB4]]:
166+
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> [[A2]])
167+
; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8
168+
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i8> [[TMP1]], i8 0, i32 3
169+
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i8> [[A1]], i8 [[TMP6]], i32 3
170+
; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
171+
; CHECK-NEXT: ret <8 x i8> [[TMP7]]
172+
;
173+
entry:
174+
%0 = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a2)
175+
%1 = trunc i32 %0 to i8
176+
%2 = insertelement <8 x i8> %a1, i8 %1, i32 3
177+
ret <8 x i8> %2
178+
}
179+
180+
define <4 x i16> @test_vmaxv_s16_used_by_laneop(<4 x i16> %a1, <4 x i16> %a2) #0 {
181+
; CHECK-LABEL: define <4 x i16> @test_vmaxv_s16_used_by_laneop(
182+
; CHECK-SAME: <4 x i16> [[A1:%.*]], <4 x i16> [[A2:%.*]]) #[[ATTR0]] {
183+
; CHECK-NEXT: [[ENTRY:.*:]]
184+
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
185+
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
186+
; CHECK-NEXT: call void @llvm.donothing()
187+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to i64
188+
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
189+
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
190+
; CHECK: [[BB3]]:
191+
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
192+
; CHECK-NEXT: unreachable
193+
; CHECK: [[BB4]]:
194+
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> [[A2]])
195+
; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16
196+
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i16> [[TMP1]], i16 0, i32 3
197+
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i16> [[A1]], i16 [[TMP6]], i32 3
198+
; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
199+
; CHECK-NEXT: ret <4 x i16> [[TMP7]]
200+
;
201+
entry:
202+
%0 = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a2)
203+
%1 = trunc i32 %0 to i16
204+
%2 = insertelement <4 x i16> %a1, i16 %1, i32 3
205+
ret <4 x i16> %2
206+
}
207+
208+
define <2 x i32> @test_vmaxv_s32_used_by_laneop(<2 x i32> %a1, <2 x i32> %a2) #0 {
209+
; CHECK-LABEL: define <2 x i32> @test_vmaxv_s32_used_by_laneop(
210+
; CHECK-SAME: <2 x i32> [[A1:%.*]], <2 x i32> [[A2:%.*]]) #[[ATTR0]] {
211+
; CHECK-NEXT: [[ENTRY:.*:]]
212+
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
213+
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
214+
; CHECK-NEXT: call void @llvm.donothing()
215+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to i64
216+
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
217+
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
218+
; CHECK: [[BB3]]:
219+
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
220+
; CHECK-NEXT: unreachable
221+
; CHECK: [[BB4]]:
222+
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> [[A2]])
223+
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP1]], i32 0, i32 1
224+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[A1]], i32 [[TMP5]], i32 1
225+
; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
226+
; CHECK-NEXT: ret <2 x i32> [[TMP6]]
227+
;
228+
entry:
229+
%0 = tail call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> %a2)
230+
%1 = insertelement <2 x i32> %a1, i32 %0, i32 1
231+
ret <2 x i32> %1
232+
}
233+
234+
define <16 x i8> @test_vmaxvq_s8_used_by_laneop(<16 x i8> %a1, <16 x i8> %a2) #0 {
235+
; CHECK-LABEL: define <16 x i8> @test_vmaxvq_s8_used_by_laneop(
236+
; CHECK-SAME: <16 x i8> [[A1:%.*]], <16 x i8> [[A2:%.*]]) #[[ATTR0]] {
237+
; CHECK-NEXT: [[ENTRY:.*:]]
238+
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
239+
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
240+
; CHECK-NEXT: call void @llvm.donothing()
241+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
242+
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
243+
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
244+
; CHECK: [[BB3]]:
245+
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
246+
; CHECK-NEXT: unreachable
247+
; CHECK: [[BB4]]:
248+
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> [[A2]])
249+
; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8
250+
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> [[TMP1]], i8 0, i32 3
251+
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i8> [[A1]], i8 [[TMP6]], i32 3
252+
; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
253+
; CHECK-NEXT: ret <16 x i8> [[TMP7]]
254+
;
255+
entry:
256+
%0 = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a2)
257+
%1 = trunc i32 %0 to i8
258+
%2 = insertelement <16 x i8> %a1, i8 %1, i32 3
259+
ret <16 x i8> %2
260+
}
261+
262+
define <8 x i16> @test_vmaxvq_s16_used_by_laneop(<8 x i16> %a1, <8 x i16> %a2) #0 {
263+
; CHECK-LABEL: define <8 x i16> @test_vmaxvq_s16_used_by_laneop(
264+
; CHECK-SAME: <8 x i16> [[A1:%.*]], <8 x i16> [[A2:%.*]]) #[[ATTR0]] {
265+
; CHECK-NEXT: [[ENTRY:.*:]]
266+
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
267+
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
268+
; CHECK-NEXT: call void @llvm.donothing()
269+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to i128
270+
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
271+
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
272+
; CHECK: [[BB3]]:
273+
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
274+
; CHECK-NEXT: unreachable
275+
; CHECK: [[BB4]]:
276+
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> [[A2]])
277+
; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16
278+
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> [[TMP1]], i16 0, i32 3
279+
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i16> [[A1]], i16 [[TMP6]], i32 3
280+
; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
281+
; CHECK-NEXT: ret <8 x i16> [[TMP7]]
282+
;
283+
entry:
284+
%0 = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a2)
285+
%1 = trunc i32 %0 to i16
286+
%2 = insertelement <8 x i16> %a1, i16 %1, i32 3
287+
ret <8 x i16> %2
288+
}
289+
290+
define <4 x i32> @test_vmaxvq_s32_used_by_laneop(<4 x i32> %a1, <4 x i32> %a2) #0 {
291+
; CHECK-LABEL: define <4 x i32> @test_vmaxvq_s32_used_by_laneop(
292+
; CHECK-SAME: <4 x i32> [[A1:%.*]], <4 x i32> [[A2:%.*]]) #[[ATTR0]] {
293+
; CHECK-NEXT: [[ENTRY:.*:]]
294+
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
295+
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
296+
; CHECK-NEXT: call void @llvm.donothing()
297+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to i128
298+
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
299+
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
300+
; CHECK: [[BB3]]:
301+
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
302+
; CHECK-NEXT: unreachable
303+
; CHECK: [[BB4]]:
304+
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> [[A2]])
305+
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP1]], i32 0, i32 3
306+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[A1]], i32 [[TMP5]], i32 3
307+
; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
308+
; CHECK-NEXT: ret <4 x i32> [[TMP6]]
309+
;
310+
entry:
311+
%0 = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a2)
312+
%1 = insertelement <4 x i32> %a1, i32 %0, i32 3
313+
ret <4 x i32> %1
314+
}
315+
316+
declare i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32>)
317+
declare i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16>)
318+
declare i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8>)
319+
declare i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32>)
320+
declare i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16>)
321+
declare i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8>)
322+
323+
attributes #0 = { sanitize_memory }

0 commit comments

Comments
 (0)