Skip to content

Commit 70a54bc

Browse files
[SLP]Improve/fix extracts calculations for non-power-of-2 elements.
One of the previous patches introduced initial support for non-power-of-2 number of elements but some parts of the SLP vectorizer still were not adjusted to handle the costs correctly. Patch fixes it by improving analysis of the non-power-of-2 number of elements and fixes in the cost of the extractelements instructions. Reviewers: RKSimon Reviewed By: RKSimon Pull Request: #93213
1 parent 0f26aa5 commit 70a54bc

File tree

8 files changed

+214
-308
lines changed

8 files changed

+214
-308
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 95 additions & 52 deletions
Large diffs are not rendered by default.

llvm/test/Transforms/SLPVectorizer/AMDGPU/reduction.ll

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -56,25 +56,12 @@ define half @reduction_half16(<16 x half> %vec16) {
5656
;
5757
; VI-LABEL: @reduction_half16(
5858
; VI-NEXT: entry:
59-
; VI-NEXT: [[ELT8:%.*]] = extractelement <16 x half> [[VEC16:%.*]], i64 8
60-
; VI-NEXT: [[ELT9:%.*]] = extractelement <16 x half> [[VEC16]], i64 9
61-
; VI-NEXT: [[ELT10:%.*]] = extractelement <16 x half> [[VEC16]], i64 10
62-
; VI-NEXT: [[ELT11:%.*]] = extractelement <16 x half> [[VEC16]], i64 11
63-
; VI-NEXT: [[ELT12:%.*]] = extractelement <16 x half> [[VEC16]], i64 12
64-
; VI-NEXT: [[ELT13:%.*]] = extractelement <16 x half> [[VEC16]], i64 13
65-
; VI-NEXT: [[ELT14:%.*]] = extractelement <16 x half> [[VEC16]], i64 14
66-
; VI-NEXT: [[ELT15:%.*]] = extractelement <16 x half> [[VEC16]], i64 15
67-
; VI-NEXT: [[TMP0:%.*]] = shufflevector <16 x half> [[VEC16]], <16 x half> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
59+
; VI-NEXT: [[TMP0:%.*]] = shufflevector <16 x half> [[VEC16:%.*]], <16 x half> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6860
; VI-NEXT: [[TMP1:%.*]] = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> [[TMP0]])
69-
; VI-NEXT: [[OP_RDX:%.*]] = fadd fast half [[TMP1]], [[ELT8]]
70-
; VI-NEXT: [[OP_RDX1:%.*]] = fadd fast half [[ELT9]], [[ELT10]]
71-
; VI-NEXT: [[OP_RDX2:%.*]] = fadd fast half [[ELT11]], [[ELT12]]
72-
; VI-NEXT: [[OP_RDX3:%.*]] = fadd fast half [[ELT13]], [[ELT14]]
73-
; VI-NEXT: [[OP_RDX4:%.*]] = fadd fast half [[OP_RDX]], [[OP_RDX1]]
74-
; VI-NEXT: [[OP_RDX5:%.*]] = fadd fast half [[OP_RDX2]], [[OP_RDX3]]
75-
; VI-NEXT: [[OP_RDX6:%.*]] = fadd fast half [[OP_RDX4]], [[OP_RDX5]]
76-
; VI-NEXT: [[OP_RDX7:%.*]] = fadd fast half [[OP_RDX6]], [[ELT15]]
77-
; VI-NEXT: ret half [[OP_RDX7]]
61+
; VI-NEXT: [[TMP2:%.*]] = shufflevector <16 x half> [[VEC16]], <16 x half> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
62+
; VI-NEXT: [[TMP3:%.*]] = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> [[TMP2]])
63+
; VI-NEXT: [[OP_RDX:%.*]] = fadd fast half [[TMP1]], [[TMP3]]
64+
; VI-NEXT: ret half [[OP_RDX]]
7865
;
7966
entry:
8067
%elt0 = extractelement <16 x half> %vec16, i64 0

llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll

Lines changed: 60 additions & 84 deletions
Large diffs are not rendered by default.

llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -51,25 +51,23 @@ define <8 x float> @ceil_floor(<8 x float> %a) {
5151
;
5252
; AVX-LABEL: @ceil_floor(
5353
; AVX-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i64 0
54-
; AVX-NEXT: [[A1:%.*]] = extractelement <8 x float> [[A]], i64 1
55-
; AVX-NEXT: [[A2:%.*]] = extractelement <8 x float> [[A]], i64 2
5654
; AVX-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i64 3
5755
; AVX-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]])
58-
; AVX-NEXT: [[AB1:%.*]] = call float @llvm.floor.f32(float [[A1]])
59-
; AVX-NEXT: [[AB2:%.*]] = call float @llvm.floor.f32(float [[A2]])
56+
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 1, i32 2>
57+
; AVX-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]])
6058
; AVX-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]])
61-
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 4, i32 5>
62-
; AVX-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP1]])
63-
; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 6, i32 7>
64-
; AVX-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP3]])
59+
; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 4, i32 5>
60+
; AVX-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]])
61+
; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 6, i32 7>
62+
; AVX-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]])
6563
; AVX-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i64 0
66-
; AVX-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[AB1]], i64 1
67-
; AVX-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[AB2]], i64 2
68-
; AVX-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[AB3]], i64 3
69-
; AVX-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
70-
; AVX-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 poison, i32 poison>
71-
; AVX-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
72-
; AVX-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
64+
; AVX-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
65+
; AVX-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> <i32 0, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
66+
; AVX-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R23]], float [[AB3]], i64 3
67+
; AVX-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
68+
; AVX-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 poison, i32 poison>
69+
; AVX-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
70+
; AVX-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
7371
; AVX-NEXT: ret <8 x float> [[R71]]
7472
;
7573
; AVX2-LABEL: @ceil_floor(

llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -51,25 +51,23 @@ define <8 x float> @ceil_floor(<8 x float> %a) {
5151
;
5252
; AVX-LABEL: @ceil_floor(
5353
; AVX-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i64 0
54-
; AVX-NEXT: [[A1:%.*]] = extractelement <8 x float> [[A]], i64 1
55-
; AVX-NEXT: [[A2:%.*]] = extractelement <8 x float> [[A]], i64 2
5654
; AVX-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i64 3
5755
; AVX-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]])
58-
; AVX-NEXT: [[AB1:%.*]] = call float @llvm.floor.f32(float [[A1]])
59-
; AVX-NEXT: [[AB2:%.*]] = call float @llvm.floor.f32(float [[A2]])
56+
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 1, i32 2>
57+
; AVX-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]])
6058
; AVX-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]])
61-
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 4, i32 5>
62-
; AVX-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP1]])
63-
; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 6, i32 7>
64-
; AVX-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP3]])
59+
; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 4, i32 5>
60+
; AVX-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]])
61+
; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 6, i32 7>
62+
; AVX-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]])
6563
; AVX-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i64 0
66-
; AVX-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[AB1]], i64 1
67-
; AVX-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[AB2]], i64 2
68-
; AVX-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[AB3]], i64 3
69-
; AVX-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
70-
; AVX-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 poison, i32 poison>
71-
; AVX-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
72-
; AVX-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
64+
; AVX-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
65+
; AVX-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> <i32 0, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
66+
; AVX-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R23]], float [[AB3]], i64 3
67+
; AVX-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
68+
; AVX-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 poison, i32 poison>
69+
; AVX-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
70+
; AVX-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
7371
; AVX-NEXT: ret <8 x float> [[R71]]
7472
;
7573
; AVX2-LABEL: @ceil_floor(

llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll

Lines changed: 14 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
22
; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SSE
33
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SLM
4-
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
5-
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
6-
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
7-
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
4+
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX
5+
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX
6+
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX
7+
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX
88

99
;
1010
; 128-bit vectors
@@ -213,62 +213,16 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) {
213213

214214
; PR50392
215215
define <4 x double> @test_v4f64_partial_swizzle(<4 x double> %a, <4 x double> %b) {
216-
; SSE-LABEL: @test_v4f64_partial_swizzle(
217-
; SSE-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B:%.*]], i64 2
218-
; SSE-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3
219-
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B]], <2 x i32> <i32 0, i32 4>
220-
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5>
221-
; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
222-
; SSE-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]]
223-
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 poison>
224-
; SSE-NEXT: [[R03:%.*]] = insertelement <4 x double> [[TMP4]], double [[R3]], i64 3
225-
; SSE-NEXT: ret <4 x double> [[R03]]
226-
;
227-
; SLM-LABEL: @test_v4f64_partial_swizzle(
228-
; SLM-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B:%.*]], i64 2
229-
; SLM-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3
230-
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B]], <2 x i32> <i32 0, i32 4>
231-
; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5>
232-
; SLM-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
233-
; SLM-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]]
234-
; SLM-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 poison>
235-
; SLM-NEXT: [[R03:%.*]] = insertelement <4 x double> [[TMP4]], double [[R3]], i64 3
236-
; SLM-NEXT: ret <4 x double> [[R03]]
237-
;
238-
; AVX1-LABEL: @test_v4f64_partial_swizzle(
239-
; AVX1-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0
240-
; AVX1-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1
241-
; AVX1-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]]
242-
; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 2>
243-
; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> <i32 0, i32 3>
244-
; AVX1-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
245-
; AVX1-NEXT: [[R00:%.*]] = insertelement <4 x double> poison, double [[R0]], i64 0
246-
; AVX1-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
247-
; AVX1-NEXT: [[R031:%.*]] = shufflevector <4 x double> [[R00]], <4 x double> [[TMP4]], <4 x i32> <i32 0, i32 poison, i32 4, i32 5>
248-
; AVX1-NEXT: ret <4 x double> [[R031]]
249-
;
250-
; AVX2-LABEL: @test_v4f64_partial_swizzle(
251-
; AVX2-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0
252-
; AVX2-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1
253-
; AVX2-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]]
254-
; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 2>
255-
; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> <i32 0, i32 3>
256-
; AVX2-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
257-
; AVX2-NEXT: [[R00:%.*]] = insertelement <4 x double> poison, double [[R0]], i64 0
258-
; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
259-
; AVX2-NEXT: [[R031:%.*]] = shufflevector <4 x double> [[R00]], <4 x double> [[TMP4]], <4 x i32> <i32 0, i32 poison, i32 4, i32 5>
260-
; AVX2-NEXT: ret <4 x double> [[R031]]
261-
;
262-
; AVX512-LABEL: @test_v4f64_partial_swizzle(
263-
; AVX512-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B:%.*]], i64 2
264-
; AVX512-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3
265-
; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B]], <2 x i32> <i32 0, i32 4>
266-
; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5>
267-
; AVX512-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
268-
; AVX512-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]]
269-
; AVX512-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 poison>
270-
; AVX512-NEXT: [[R03:%.*]] = insertelement <4 x double> [[TMP4]], double [[R3]], i64 3
271-
; AVX512-NEXT: ret <4 x double> [[R03]]
216+
; CHECK-LABEL: @test_v4f64_partial_swizzle(
217+
; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B:%.*]], i64 2
218+
; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3
219+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B]], <2 x i32> <i32 0, i32 4>
220+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5>
221+
; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
222+
; CHECK-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]]
223+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 poison>
224+
; CHECK-NEXT: [[R03:%.*]] = insertelement <4 x double> [[TMP4]], double [[R3]], i64 3
225+
; CHECK-NEXT: ret <4 x double> [[R03]]
272226
;
273227
%a0 = extractelement <4 x double> %a, i64 0
274228
%a1 = extractelement <4 x double> %a, i64 1

0 commit comments

Comments
 (0)