Skip to content

Commit 2302e4c

Browse files
authored
Reland "VectorUtils: mark xrint as trivially vectorizable" (#71416)
With the recent change 98c90a1 (ISel: introduce vector ISD::LRINT, ISD::LLRINT; custom RISCV lowering), it is now possible for SLPVectorizer, LoopVectorize, and Scalarizer to operate on llvm.lrint and llvm.llrint, with vector codegen for the RISC-V target. Make a trivial change to VectorUtils, and update the corresponding tests. A couple of important fixes have been landed since the original patch was landed and reverted, and it is now safe to re-land the patch: 5e1d81a (LegalizeIntegerTypes: implement PromoteIntRes for xrint) and fd887a3 (LegalizeVectorTypes: fix bug in widening of vec result in xrint). See also #71399, which proves that lrint and llrint will indeed produce vector codegen on RISC-V. Fixes #55208.
1 parent c3851a9 commit 2302e4c

File tree

4 files changed

+48
-139
lines changed

4 files changed

+48
-139
lines changed

llvm/lib/Analysis/VectorUtils.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
9191
case Intrinsic::canonicalize:
9292
case Intrinsic::fptosi_sat:
9393
case Intrinsic::fptoui_sat:
94+
case Intrinsic::lrint:
95+
case Intrinsic::llrint:
9496
return true;
9597
default:
9698
return false;
@@ -122,6 +124,8 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
122124
switch (ID) {
123125
case Intrinsic::fptosi_sat:
124126
case Intrinsic::fptoui_sat:
127+
case Intrinsic::lrint:
128+
case Intrinsic::llrint:
125129
return OpdIdx == -1 || OpdIdx == 0;
126130
case Intrinsic::is_fpclass:
127131
return OpdIdx == 0;

llvm/test/Transforms/LoopVectorize/intrinsic.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1602,7 +1602,7 @@ declare i32 @llvm.lrint.i32.f32(float)
16021602

16031603
define void @lrint_i32_f32(ptr %x, ptr %y, i64 %n) {
16041604
; CHECK-LABEL: @lrint_i32_f32(
1605-
; CHECK-NOT: llvm.lrint.v4i32.v4f32
1605+
; CHECK: llvm.lrint.v4i32.v4f32
16061606
; CHECK: ret void
16071607
;
16081608
entry:
@@ -1628,7 +1628,7 @@ declare i64 @llvm.llrint.i64.f32(float)
16281628

16291629
define void @llrint_i64_f32(ptr %x, ptr %y, i64 %n) {
16301630
; CHECK-LABEL: @llrint_i64_f32(
1631-
; CHECK-NOT: llvm.llrint.v4i32.v4f32
1631+
; CHECK: llvm.llrint.v4i64.v4f32
16321632
; CHECK: ret void
16331633
;
16341634
entry:

llvm/test/Transforms/SLPVectorizer/RISCV/fround.ll

Lines changed: 30 additions & 135 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,8 @@ define <2 x i32> @lrint_v2i32f32(ptr %a) {
3434
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
3535
; CHECK-NEXT: entry:
3636
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[A]], align 8
37-
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
38-
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT]])
39-
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <2 x i32> undef, i32 [[TMP1]], i32 0
40-
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i32 1
41-
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_1]])
42-
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <2 x i32> [[VECINS]], i32 [[TMP2]], i32 1
43-
; CHECK-NEXT: ret <2 x i32> [[VECINS_1]]
37+
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.lrint.v2i32.v2f32(<2 x float> [[TMP0]])
38+
; CHECK-NEXT: ret <2 x i32> [[TMP1]]
4439
;
4540
entry:
4641
%0 = load <2 x float>, ptr %a
@@ -58,19 +53,8 @@ define <4 x i32> @lrint_v4i32f32(ptr %a) {
5853
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
5954
; CHECK-NEXT: entry:
6055
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
61-
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
62-
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT]])
63-
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0
64-
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
65-
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_1]])
66-
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x i32> [[VECINS]], i32 [[TMP2]], i32 1
67-
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
68-
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_2]])
69-
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x i32> [[VECINS_1]], i32 [[TMP3]], i32 2
70-
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
71-
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_3]])
72-
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x i32> [[VECINS_2]], i32 [[TMP4]], i32 3
73-
; CHECK-NEXT: ret <4 x i32> [[VECINS_3]]
56+
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> [[TMP0]])
57+
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
7458
;
7559
entry:
7660
%0 = load <4 x float>, ptr %a
@@ -94,31 +78,8 @@ define <8 x i32> @lrint_v8i32f32(ptr %a) {
9478
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
9579
; CHECK-NEXT: entry:
9680
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[A]], align 32
97-
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <8 x float> [[TMP0]], i32 0
98-
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT]])
99-
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <8 x i32> undef, i32 [[TMP1]], i32 0
100-
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <8 x float> [[TMP0]], i32 1
101-
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_1]])
102-
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <8 x i32> [[VECINS]], i32 [[TMP2]], i32 1
103-
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <8 x float> [[TMP0]], i32 2
104-
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_2]])
105-
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <8 x i32> [[VECINS_1]], i32 [[TMP3]], i32 2
106-
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <8 x float> [[TMP0]], i32 3
107-
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_3]])
108-
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <8 x i32> [[VECINS_2]], i32 [[TMP4]], i32 3
109-
; CHECK-NEXT: [[VECEXT_4:%.*]] = extractelement <8 x float> [[TMP0]], i32 4
110-
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_4]])
111-
; CHECK-NEXT: [[VECINS_4:%.*]] = insertelement <8 x i32> [[VECINS_3]], i32 [[TMP5]], i32 4
112-
; CHECK-NEXT: [[VECEXT_5:%.*]] = extractelement <8 x float> [[TMP0]], i32 5
113-
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_5]])
114-
; CHECK-NEXT: [[VECINS_5:%.*]] = insertelement <8 x i32> [[VECINS_4]], i32 [[TMP6]], i32 5
115-
; CHECK-NEXT: [[VECEXT_6:%.*]] = extractelement <8 x float> [[TMP0]], i32 6
116-
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_6]])
117-
; CHECK-NEXT: [[VECINS_6:%.*]] = insertelement <8 x i32> [[VECINS_5]], i32 [[TMP7]], i32 6
118-
; CHECK-NEXT: [[VECEXT_7:%.*]] = extractelement <8 x float> [[TMP0]], i32 7
119-
; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.lrint.i32.f32(float [[VECEXT_7]])
120-
; CHECK-NEXT: [[VECINS_7:%.*]] = insertelement <8 x i32> [[VECINS_6]], i32 [[TMP8]], i32 7
121-
; CHECK-NEXT: ret <8 x i32> [[VECINS_7]]
81+
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> [[TMP0]])
82+
; CHECK-NEXT: ret <8 x i32> [[TMP1]]
12283
;
12384
entry:
12485
%0 = load <8 x float>, ptr %a
@@ -154,13 +115,8 @@ define <2 x i64> @lrint_v2i64f32(ptr %a) {
154115
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
155116
; CHECK-NEXT: entry:
156117
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[A]], align 8
157-
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i64 0
158-
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT]])
159-
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <2 x i64> undef, i64 [[TMP1]], i64 0
160-
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i64 1
161-
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_1]])
162-
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <2 x i64> [[VECINS]], i64 [[TMP2]], i64 1
163-
; CHECK-NEXT: ret <2 x i64> [[VECINS_1]]
118+
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> [[TMP0]])
119+
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
164120
;
165121
entry:
166122
%0 = load <2 x float>, ptr %a
@@ -178,19 +134,8 @@ define <4 x i64> @lrint_v4i64f32(ptr %a) {
178134
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
179135
; CHECK-NEXT: entry:
180136
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
181-
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
182-
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT]])
183-
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x i64> undef, i64 [[TMP1]], i64 0
184-
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
185-
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_1]])
186-
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x i64> [[VECINS]], i64 [[TMP2]], i64 1
187-
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
188-
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_2]])
189-
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x i64> [[VECINS_1]], i64 [[TMP3]], i64 2
190-
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
191-
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_3]])
192-
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x i64> [[VECINS_2]], i64 [[TMP4]], i64 3
193-
; CHECK-NEXT: ret <4 x i64> [[VECINS_3]]
137+
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> [[TMP0]])
138+
; CHECK-NEXT: ret <4 x i64> [[TMP1]]
194139
;
195140
entry:
196141
%0 = load <4 x float>, ptr %a
@@ -214,31 +159,14 @@ define <8 x i64> @lrint_v8i64f32(ptr %a) {
214159
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
215160
; CHECK-NEXT: entry:
216161
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[A]], align 32
217-
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <8 x float> [[TMP0]], i64 0
218-
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT]])
219-
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <8 x i64> undef, i64 [[TMP1]], i64 0
220-
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <8 x float> [[TMP0]], i64 1
221-
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_1]])
222-
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <8 x i64> [[VECINS]], i64 [[TMP2]], i64 1
223-
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <8 x float> [[TMP0]], i64 2
224-
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_2]])
225-
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <8 x i64> [[VECINS_1]], i64 [[TMP3]], i64 2
226-
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <8 x float> [[TMP0]], i64 3
227-
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_3]])
228-
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <8 x i64> [[VECINS_2]], i64 [[TMP4]], i64 3
229-
; CHECK-NEXT: [[VECEXT_4:%.*]] = extractelement <8 x float> [[TMP0]], i64 4
230-
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_4]])
231-
; CHECK-NEXT: [[VECINS_4:%.*]] = insertelement <8 x i64> [[VECINS_3]], i64 [[TMP5]], i64 4
232-
; CHECK-NEXT: [[VECEXT_5:%.*]] = extractelement <8 x float> [[TMP0]], i64 5
233-
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_5]])
234-
; CHECK-NEXT: [[VECINS_5:%.*]] = insertelement <8 x i64> [[VECINS_4]], i64 [[TMP6]], i64 5
235-
; CHECK-NEXT: [[VECEXT_6:%.*]] = extractelement <8 x float> [[TMP0]], i64 6
236-
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_6]])
237-
; CHECK-NEXT: [[VECINS_6:%.*]] = insertelement <8 x i64> [[VECINS_5]], i64 [[TMP7]], i64 6
238-
; CHECK-NEXT: [[VECEXT_7:%.*]] = extractelement <8 x float> [[TMP0]], i64 7
239-
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.lrint.i64.f32(float [[VECEXT_7]])
240-
; CHECK-NEXT: [[VECINS_7:%.*]] = insertelement <8 x i64> [[VECINS_6]], i64 [[TMP8]], i64 7
241-
; CHECK-NEXT: ret <8 x i64> [[VECINS_7]]
162+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
163+
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> [[TMP1]])
164+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
165+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
166+
; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> [[TMP4]])
167+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
168+
; CHECK-NEXT: [[VECINS_71:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
169+
; CHECK-NEXT: ret <8 x i64> [[VECINS_71]]
242170
;
243171
entry:
244172
%0 = load <8 x float>, ptr %a
@@ -274,13 +202,8 @@ define <2 x i64> @llrint_v2i64f32(ptr %a) {
274202
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
275203
; CHECK-NEXT: entry:
276204
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[A]], align 8
277-
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i64 0
278-
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT]])
279-
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <2 x i64> undef, i64 [[TMP1]], i64 0
280-
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i64 1
281-
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_1]])
282-
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <2 x i64> [[VECINS]], i64 [[TMP2]], i64 1
283-
; CHECK-NEXT: ret <2 x i64> [[VECINS_1]]
205+
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> [[TMP0]])
206+
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
284207
;
285208
entry:
286209
%0 = load <2 x float>, ptr %a
@@ -298,19 +221,8 @@ define <4 x i64> @llrint_v4i64f32(ptr %a) {
298221
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
299222
; CHECK-NEXT: entry:
300223
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
301-
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
302-
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT]])
303-
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x i64> undef, i64 [[TMP1]], i64 0
304-
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
305-
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_1]])
306-
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x i64> [[VECINS]], i64 [[TMP2]], i64 1
307-
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
308-
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_2]])
309-
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x i64> [[VECINS_1]], i64 [[TMP3]], i64 2
310-
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
311-
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_3]])
312-
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x i64> [[VECINS_2]], i64 [[TMP4]], i64 3
313-
; CHECK-NEXT: ret <4 x i64> [[VECINS_3]]
224+
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> [[TMP0]])
225+
; CHECK-NEXT: ret <4 x i64> [[TMP1]]
314226
;
315227
entry:
316228
%0 = load <4 x float>, ptr %a
@@ -334,31 +246,14 @@ define <8 x i64> @llrint_v8i64f32(ptr %a) {
334246
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
335247
; CHECK-NEXT: entry:
336248
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[A]], align 32
337-
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <8 x float> [[TMP0]], i64 0
338-
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT]])
339-
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <8 x i64> undef, i64 [[TMP1]], i64 0
340-
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <8 x float> [[TMP0]], i64 1
341-
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_1]])
342-
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <8 x i64> [[VECINS]], i64 [[TMP2]], i64 1
343-
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <8 x float> [[TMP0]], i64 2
344-
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_2]])
345-
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <8 x i64> [[VECINS_1]], i64 [[TMP3]], i64 2
346-
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <8 x float> [[TMP0]], i64 3
347-
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_3]])
348-
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <8 x i64> [[VECINS_2]], i64 [[TMP4]], i64 3
349-
; CHECK-NEXT: [[VECEXT_4:%.*]] = extractelement <8 x float> [[TMP0]], i64 4
350-
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_4]])
351-
; CHECK-NEXT: [[VECINS_4:%.*]] = insertelement <8 x i64> [[VECINS_3]], i64 [[TMP5]], i64 4
352-
; CHECK-NEXT: [[VECEXT_5:%.*]] = extractelement <8 x float> [[TMP0]], i64 5
353-
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_5]])
354-
; CHECK-NEXT: [[VECINS_5:%.*]] = insertelement <8 x i64> [[VECINS_4]], i64 [[TMP6]], i64 5
355-
; CHECK-NEXT: [[VECEXT_6:%.*]] = extractelement <8 x float> [[TMP0]], i64 6
356-
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_6]])
357-
; CHECK-NEXT: [[VECINS_6:%.*]] = insertelement <8 x i64> [[VECINS_5]], i64 [[TMP7]], i64 6
358-
; CHECK-NEXT: [[VECEXT_7:%.*]] = extractelement <8 x float> [[TMP0]], i64 7
359-
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.llrint.i64.f32(float [[VECEXT_7]])
360-
; CHECK-NEXT: [[VECINS_7:%.*]] = insertelement <8 x i64> [[VECINS_6]], i64 [[TMP8]], i64 7
361-
; CHECK-NEXT: ret <8 x i64> [[VECINS_7]]
249+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
250+
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> [[TMP1]])
251+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
252+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
253+
; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> [[TMP4]])
254+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
255+
; CHECK-NEXT: [[VECINS_71:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
256+
; CHECK-NEXT: ret <8 x i64> [[VECINS_71]]
362257
;
363258
entry:
364259
%0 = load <8 x float>, ptr %a

llvm/test/Transforms/Scalarizer/intrinsics.ll

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,12 @@ define <2 x i32> @scalarize_fptoui_sat(<2 x float> %x) #0 {
217217

218218
define <2 x i32> @scalarize_lrint(<2 x float> %x) #0 {
219219
; CHECK-LABEL: @scalarize_lrint(
220-
; CHECK-NEXT: [[RND:%.*]] = call <2 x i32> @llvm.lrint.v2i32.v2f32(<2 x float> [[X:%.*]])
220+
; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
221+
; CHECK-NEXT: [[RND_I0:%.*]] = call i32 @llvm.lrint.i32.f32(float [[X_I0]])
222+
; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
223+
; CHECK-NEXT: [[RND_I1:%.*]] = call i32 @llvm.lrint.i32.f32(float [[X_I1]])
224+
; CHECK-NEXT: [[RND_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RND_I0]], i64 0
225+
; CHECK-NEXT: [[RND:%.*]] = insertelement <2 x i32> [[RND_UPTO0]], i32 [[RND_I1]], i64 1
221226
; CHECK-NEXT: ret <2 x i32> [[RND]]
222227
;
223228
%rnd = call <2 x i32> @llvm.lrint.v2i32.v2f32(<2 x float> %x)
@@ -226,7 +231,12 @@ define <2 x i32> @scalarize_lrint(<2 x float> %x) #0 {
226231

227232
define <2 x i32> @scalarize_llrint(<2 x float> %x) #0 {
228233
; CHECK-LABEL: @scalarize_llrint(
229-
; CHECK-NEXT: [[RND:%.*]] = call <2 x i32> @llvm.llrint.v2i32.v2f32(<2 x float> [[X:%.*]])
234+
; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
235+
; CHECK-NEXT: [[RND_I0:%.*]] = call i32 @llvm.llrint.i32.f32(float [[X_I0]])
236+
; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
237+
; CHECK-NEXT: [[RND_I1:%.*]] = call i32 @llvm.llrint.i32.f32(float [[X_I1]])
238+
; CHECK-NEXT: [[RND_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RND_I0]], i64 0
239+
; CHECK-NEXT: [[RND:%.*]] = insertelement <2 x i32> [[RND_UPTO0]], i32 [[RND_I1]], i64 1
230240
; CHECK-NEXT: ret <2 x i32> [[RND]]
231241
;
232242
%rnd = call <2 x i32> @llvm.llrint.v2i32.v2f32(<2 x float> %x)

0 commit comments

Comments
 (0)