Skip to content

Commit a7f3d17

Browse files
authored
[GlobalISel] Add support for interleave and deinterleave intrinsics to IRTranslator (#85199)
This patch adds support for the @llvm.experimental.vector.{interleave2, deinterleave2} intrinsics to IRTranslator for fixed-width vector types. They are lowered to vector shuffles, in roughly the same manner as SelectionDAG.
1 parent 41bdcaa commit a7f3d17

File tree

7 files changed

+266
-68
lines changed

7 files changed

+266
-68
lines changed

llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,14 @@ class IRTranslator : public MachineFunctionPass {
243243
bool translateMemFunc(const CallInst &CI, MachineIRBuilder &MIRBuilder,
244244
unsigned Opcode);
245245

246+
// Translate @llvm.experimental.vector.interleave2 and
247+
// @llvm.experimental.vector.deinterleave2 intrinsics for fixed-width vector
248+
// types into vector shuffles.
249+
bool translateVectorInterleave2Intrinsic(const CallInst &CI,
250+
MachineIRBuilder &MIRBuilder);
251+
bool translateVectorDeinterleave2Intrinsic(const CallInst &CI,
252+
MachineIRBuilder &MIRBuilder);
253+
246254
void getStackGuard(Register DstReg, MachineIRBuilder &MIRBuilder);
247255

248256
bool translateOverflowIntrinsic(const CallInst &CI, unsigned Op,

llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "llvm/Analysis/Loads.h"
2222
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
2323
#include "llvm/Analysis/ValueTracking.h"
24+
#include "llvm/Analysis/VectorUtils.h"
2425
#include "llvm/CodeGen/Analysis.h"
2526
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
2627
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
@@ -1770,6 +1771,41 @@ bool IRTranslator::translateMemFunc(const CallInst &CI,
17701771
return true;
17711772
}
17721773

1774+
bool IRTranslator::translateVectorInterleave2Intrinsic(
1775+
const CallInst &CI, MachineIRBuilder &MIRBuilder) {
1776+
assert(CI.getIntrinsicID() == Intrinsic::experimental_vector_interleave2 &&
1777+
"This function can only be called on the interleave2 intrinsic!");
1778+
// Canonicalize interleave2 to G_SHUFFLE_VECTOR (similar to SelectionDAG).
1779+
Register Op0 = getOrCreateVReg(*CI.getOperand(0));
1780+
Register Op1 = getOrCreateVReg(*CI.getOperand(1));
1781+
Register Res = getOrCreateVReg(CI);
1782+
1783+
LLT OpTy = MRI->getType(Op0);
1784+
MIRBuilder.buildShuffleVector(Res, Op0, Op1,
1785+
createInterleaveMask(OpTy.getNumElements(), 2));
1786+
1787+
return true;
1788+
}
1789+
1790+
bool IRTranslator::translateVectorDeinterleave2Intrinsic(
1791+
const CallInst &CI, MachineIRBuilder &MIRBuilder) {
1792+
assert(CI.getIntrinsicID() == Intrinsic::experimental_vector_deinterleave2 &&
1793+
"This function can only be called on the deinterleave2 intrinsic!");
1794+
// Canonicalize deinterleave2 to shuffles that extract sub-vectors (similar to
1795+
// SelectionDAG).
1796+
Register Op = getOrCreateVReg(*CI.getOperand(0));
1797+
auto Undef = MIRBuilder.buildUndef(MRI->getType(Op));
1798+
ArrayRef<Register> Res = getOrCreateVRegs(CI);
1799+
1800+
LLT ResTy = MRI->getType(Res[0]);
1801+
MIRBuilder.buildShuffleVector(Res[0], Op, Undef,
1802+
createStrideMask(0, 2, ResTy.getNumElements()));
1803+
MIRBuilder.buildShuffleVector(Res[1], Op, Undef,
1804+
createStrideMask(1, 2, ResTy.getNumElements()));
1805+
1806+
return true;
1807+
}
1808+
17731809
void IRTranslator::getStackGuard(Register DstReg,
17741810
MachineIRBuilder &MIRBuilder) {
17751811
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
@@ -2474,6 +2510,21 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
24742510

24752511
return true;
24762512
}
2513+
2514+
case Intrinsic::experimental_vector_interleave2:
2515+
case Intrinsic::experimental_vector_deinterleave2: {
2516+
// Both intrinsics have at least one operand.
2517+
Value *Op0 = CI.getOperand(0);
2518+
LLT ResTy = getLLTForType(*Op0->getType(), MIRBuilder.getDataLayout());
2519+
if (!ResTy.isFixedVector())
2520+
return false;
2521+
2522+
if (CI.getIntrinsicID() == Intrinsic::experimental_vector_interleave2)
2523+
return translateVectorInterleave2Intrinsic(CI, MIRBuilder);
2524+
2525+
return translateVectorDeinterleave2Intrinsic(CI, MIRBuilder);
2526+
}
2527+
24772528
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
24782529
case Intrinsic::INTRINSIC:
24792530
#include "llvm/IR/ConstrainedOps.def"
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -O0 -mtriple=aarch64-- --global-isel --global-isel-abort=2 --verify-machineinstrs --stop-after=irtranslator %s -o - | FileCheck %s
3+
4+
define void @vector_deinterleave2_v4i32(<4 x i32> %a) {
5+
; CHECK-LABEL: name: vector_deinterleave2_v4i32
6+
; CHECK: bb.1 (%ir-block.0):
7+
; CHECK-NEXT: liveins: $q0
8+
; CHECK-NEXT: {{ $}}
9+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
10+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
11+
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[DEF]], shufflemask(0, 2)
12+
; CHECK-NEXT: [[SHUF1:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[DEF]], shufflemask(1, 3)
13+
; CHECK-NEXT: RET_ReallyLR
14+
%res = call {<2 x i32>, <2 x i32>} @llvm.experimental.vector.deinterleave2.v4i32(<4 x i32> %a)
15+
ret void
16+
}
17+
18+
define void @vector_deinterleave2_v8f32(<8 x float> %a) {
19+
; CHECK-LABEL: name: vector_deinterleave2_v8f32
20+
; CHECK: bb.1 (%ir-block.0):
21+
; CHECK-NEXT: liveins: $q0, $q1
22+
; CHECK-NEXT: {{ $}}
23+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
24+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
25+
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
26+
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x s64>)
27+
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[BITCAST]](<4 x s32>), [[BITCAST1]](<4 x s32>)
28+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF
29+
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[CONCAT_VECTORS]](<8 x s32>), [[DEF]], shufflemask(0, 2, 4, 6)
30+
; CHECK-NEXT: [[SHUF1:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[CONCAT_VECTORS]](<8 x s32>), [[DEF]], shufflemask(1, 3, 5, 7)
31+
; CHECK-NEXT: RET_ReallyLR
32+
%res = call {<4 x float>, <4 x float>} @llvm.experimental.vector.deinterleave2.v8f32(<8 x float> %a)
33+
ret void
34+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -O0 -mtriple=aarch64-- --global-isel --global-isel-abort=2 --verify-machineinstrs --stop-after=irtranslator %s -o - | FileCheck %s
3+
4+
define void @vector_interleave2_v4i32(<2 x i32> %a, <2 x i32> %b) {
5+
; CHECK-LABEL: name: vector_interleave2_v4i32
6+
; CHECK: bb.1 (%ir-block.0):
7+
; CHECK-NEXT: liveins: $d0, $d1
8+
; CHECK-NEXT: {{ $}}
9+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
10+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
11+
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[COPY1]], shufflemask(0, 2, 1, 3)
12+
; CHECK-NEXT: RET_ReallyLR
13+
%res = call <4 x i32> @llvm.experimental.vector.interleave2.v4i32(<2 x i32> %a, <2 x i32> %b)
14+
ret void
15+
}
16+
17+
define void @vector_interleave2_v8f32(<4 x float> %a, <4 x float> %b) {
18+
; CHECK-LABEL: name: vector_interleave2_v8f32
19+
; CHECK: bb.1 (%ir-block.0):
20+
; CHECK-NEXT: liveins: $q0, $q1
21+
; CHECK-NEXT: {{ $}}
22+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
23+
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
24+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
25+
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x s64>)
26+
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s32>) = G_SHUFFLE_VECTOR [[BITCAST]](<4 x s32>), [[BITCAST1]], shufflemask(0, 4, 1, 5, 2, 6, 3, 7)
27+
; CHECK-NEXT: RET_ReallyLR
28+
%res = call <8 x float> @llvm.experimental.vector.interleave2.v8f32(<4 x float> %a, <4 x float> %b)
29+
ret void
30+
}

llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll

Lines changed: 56 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,42 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s
3-
; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve -o - | FileCheck %s
4-
; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve2 -o - | FileCheck %s
2+
; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3+
; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
4+
; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve2 -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
5+
; RUN: llc < %s --global-isel --global-isel-abort=2 --mattr=+complxnum,+neon,+fullfp16 -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
6+
; RUN: llc < %s --global-isel --global-isel-abort=2 --mattr=+complxnum,+neon,+fullfp16,+sve -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
7+
; RUN: llc < %s --global-isel --global-isel-abort=2 --mattr=+complxnum,+neon,+fullfp16,+sve2 -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
58

69
target triple = "aarch64"
710

11+
; CHECK-GI: warning: Instruction selection used fallback path for complex_add_v16f16
12+
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for complex_add_v32f16
13+
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for complex_add_v16f16_with_intrinsic
14+
815
; Expected to not transform
916
define <2 x half> @complex_add_v2f16(<2 x half> %a, <2 x half> %b) {
10-
; CHECK-LABEL: complex_add_v2f16:
11-
; CHECK: // %bb.0: // %entry
12-
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
13-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
14-
; CHECK-NEXT: mov h2, v0.h[1]
15-
; CHECK-NEXT: mov h3, v1.h[1]
16-
; CHECK-NEXT: fsub h1, h1, h2
17-
; CHECK-NEXT: fadd h0, h3, h0
18-
; CHECK-NEXT: mov v1.h[1], v0.h[0]
19-
; CHECK-NEXT: fmov d0, d1
20-
; CHECK-NEXT: ret
17+
; CHECK-SD-LABEL: complex_add_v2f16:
18+
; CHECK-SD: // %bb.0: // %entry
19+
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
20+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
21+
; CHECK-SD-NEXT: mov h2, v0.h[1]
22+
; CHECK-SD-NEXT: mov h3, v1.h[1]
23+
; CHECK-SD-NEXT: fsub h1, h1, h2
24+
; CHECK-SD-NEXT: fadd h0, h3, h0
25+
; CHECK-SD-NEXT: mov v1.h[1], v0.h[0]
26+
; CHECK-SD-NEXT: fmov d0, d1
27+
; CHECK-SD-NEXT: ret
28+
;
29+
; CHECK-GI-LABEL: complex_add_v2f16:
30+
; CHECK-GI: // %bb.0: // %entry
31+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
32+
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
33+
; CHECK-GI-NEXT: mov h2, v0.h[1]
34+
; CHECK-GI-NEXT: mov h3, v1.h[1]
35+
; CHECK-GI-NEXT: fsub h1, h1, h2
36+
; CHECK-GI-NEXT: fadd h0, h3, h0
37+
; CHECK-GI-NEXT: mov v1.h[1], v0.h[0]
38+
; CHECK-GI-NEXT: fmov d0, d1
39+
; CHECK-GI-NEXT: ret
2140
entry:
2241
%a.real = shufflevector <2 x half> %a, <2 x half> zeroinitializer, <1 x i32> <i32 0>
2342
%a.imag = shufflevector <2 x half> %a, <2 x half> zeroinitializer, <1 x i32> <i32 1>
@@ -162,17 +181,29 @@ entry:
162181

163182
; Expected not to transform as it is integer
164183
define <16 x i16> @complex_add_v16i16(<16 x i16> %a, <16 x i16> %b) {
165-
; CHECK-LABEL: complex_add_v16i16:
166-
; CHECK: // %bb.0: // %entry
167-
; CHECK-NEXT: uzp1 v4.8h, v2.8h, v3.8h
168-
; CHECK-NEXT: uzp1 v5.8h, v0.8h, v1.8h
169-
; CHECK-NEXT: uzp2 v0.8h, v0.8h, v1.8h
170-
; CHECK-NEXT: uzp2 v1.8h, v2.8h, v3.8h
171-
; CHECK-NEXT: sub v2.8h, v4.8h, v0.8h
172-
; CHECK-NEXT: add v1.8h, v1.8h, v5.8h
173-
; CHECK-NEXT: zip1 v0.8h, v2.8h, v1.8h
174-
; CHECK-NEXT: zip2 v1.8h, v2.8h, v1.8h
175-
; CHECK-NEXT: ret
184+
; CHECK-SD-LABEL: complex_add_v16i16:
185+
; CHECK-SD: // %bb.0: // %entry
186+
; CHECK-SD-NEXT: uzp1 v4.8h, v2.8h, v3.8h
187+
; CHECK-SD-NEXT: uzp1 v5.8h, v0.8h, v1.8h
188+
; CHECK-SD-NEXT: uzp2 v0.8h, v0.8h, v1.8h
189+
; CHECK-SD-NEXT: uzp2 v1.8h, v2.8h, v3.8h
190+
; CHECK-SD-NEXT: sub v2.8h, v4.8h, v0.8h
191+
; CHECK-SD-NEXT: add v1.8h, v1.8h, v5.8h
192+
; CHECK-SD-NEXT: zip1 v0.8h, v2.8h, v1.8h
193+
; CHECK-SD-NEXT: zip2 v1.8h, v2.8h, v1.8h
194+
; CHECK-SD-NEXT: ret
195+
;
196+
; CHECK-GI-LABEL: complex_add_v16i16:
197+
; CHECK-GI: // %bb.0: // %entry
198+
; CHECK-GI-NEXT: uzp1 v4.8h, v0.8h, v1.8h
199+
; CHECK-GI-NEXT: uzp2 v0.8h, v0.8h, v1.8h
200+
; CHECK-GI-NEXT: uzp1 v1.8h, v2.8h, v3.8h
201+
; CHECK-GI-NEXT: uzp2 v2.8h, v2.8h, v3.8h
202+
; CHECK-GI-NEXT: sub v1.8h, v1.8h, v0.8h
203+
; CHECK-GI-NEXT: add v2.8h, v2.8h, v4.8h
204+
; CHECK-GI-NEXT: zip1 v0.8h, v1.8h, v2.8h
205+
; CHECK-GI-NEXT: zip2 v1.8h, v1.8h, v2.8h
206+
; CHECK-GI-NEXT: ret
176207
entry:
177208
%a.real = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
178209
%a.imag = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>

llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll

Lines changed: 54 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,50 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s
2+
; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3+
; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
34

45
define {<2 x half>, <2 x half>} @vector_deinterleave_v2f16_v4f16(<4 x half> %vec) {
5-
; CHECK-LABEL: vector_deinterleave_v2f16_v4f16:
6-
; CHECK: // %bb.0:
7-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
8-
; CHECK-NEXT: dup v2.2s, v0.s[1]
9-
; CHECK-NEXT: mov v1.16b, v2.16b
10-
; CHECK-NEXT: mov v1.h[0], v0.h[1]
11-
; CHECK-NEXT: mov v0.h[1], v2.h[0]
12-
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
13-
; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
14-
; CHECK-NEXT: ret
6+
; CHECK-SD-LABEL: vector_deinterleave_v2f16_v4f16:
7+
; CHECK-SD: // %bb.0:
8+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
9+
; CHECK-SD-NEXT: dup v2.2s, v0.s[1]
10+
; CHECK-SD-NEXT: mov v1.16b, v2.16b
11+
; CHECK-SD-NEXT: mov v1.h[0], v0.h[1]
12+
; CHECK-SD-NEXT: mov v0.h[1], v2.h[0]
13+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
14+
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
15+
; CHECK-SD-NEXT: ret
16+
;
17+
; CHECK-GI-LABEL: vector_deinterleave_v2f16_v4f16:
18+
; CHECK-GI: // %bb.0:
19+
; CHECK-GI-NEXT: uzp1 v2.4h, v0.4h, v0.4h
20+
; CHECK-GI-NEXT: uzp2 v1.4h, v0.4h, v0.4h
21+
; CHECK-GI-NEXT: mov h0, v2.h[1]
22+
; CHECK-GI-NEXT: mov h3, v1.h[1]
23+
; CHECK-GI-NEXT: mov v2.h[1], v0.h[0]
24+
; CHECK-GI-NEXT: mov v1.h[1], v3.h[0]
25+
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1
26+
; CHECK-GI-NEXT: fmov d0, d2
27+
; CHECK-GI-NEXT: ret
1528
%retval = call {<2 x half>, <2 x half>} @llvm.experimental.vector.deinterleave2.v4f16(<4 x half> %vec)
1629
ret {<2 x half>, <2 x half>} %retval
1730
}
1831

1932
define {<4 x half>, <4 x half>} @vector_deinterleave_v4f16_v8f16(<8 x half> %vec) {
20-
; CHECK-LABEL: vector_deinterleave_v4f16_v8f16:
21-
; CHECK: // %bb.0:
22-
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
23-
; CHECK-NEXT: uzp1 v2.4h, v0.4h, v1.4h
24-
; CHECK-NEXT: uzp2 v1.4h, v0.4h, v1.4h
25-
; CHECK-NEXT: fmov d0, d2
26-
; CHECK-NEXT: ret
33+
; CHECK-SD-LABEL: vector_deinterleave_v4f16_v8f16:
34+
; CHECK-SD: // %bb.0:
35+
; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
36+
; CHECK-SD-NEXT: uzp1 v2.4h, v0.4h, v1.4h
37+
; CHECK-SD-NEXT: uzp2 v1.4h, v0.4h, v1.4h
38+
; CHECK-SD-NEXT: fmov d0, d2
39+
; CHECK-SD-NEXT: ret
40+
;
41+
; CHECK-GI-LABEL: vector_deinterleave_v4f16_v8f16:
42+
; CHECK-GI: // %bb.0:
43+
; CHECK-GI-NEXT: uzp1 v2.8h, v0.8h, v0.8h
44+
; CHECK-GI-NEXT: uzp2 v1.8h, v0.8h, v0.8h
45+
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1
46+
; CHECK-GI-NEXT: fmov d0, d2
47+
; CHECK-GI-NEXT: ret
2748
%retval = call {<4 x half>, <4 x half>} @llvm.experimental.vector.deinterleave2.v8f16(<8 x half> %vec)
2849
ret {<4 x half>, <4 x half>} %retval
2950
}
@@ -40,13 +61,21 @@ define {<8 x half>, <8 x half>} @vector_deinterleave_v8f16_v16f16(<16 x half> %v
4061
}
4162

4263
define {<2 x float>, <2 x float>} @vector_deinterleave_v2f32_v4f32(<4 x float> %vec) {
43-
; CHECK-LABEL: vector_deinterleave_v2f32_v4f32:
44-
; CHECK: // %bb.0:
45-
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
46-
; CHECK-NEXT: zip1 v2.2s, v0.2s, v1.2s
47-
; CHECK-NEXT: zip2 v1.2s, v0.2s, v1.2s
48-
; CHECK-NEXT: fmov d0, d2
49-
; CHECK-NEXT: ret
64+
; CHECK-SD-LABEL: vector_deinterleave_v2f32_v4f32:
65+
; CHECK-SD: // %bb.0:
66+
; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
67+
; CHECK-SD-NEXT: zip1 v2.2s, v0.2s, v1.2s
68+
; CHECK-SD-NEXT: zip2 v1.2s, v0.2s, v1.2s
69+
; CHECK-SD-NEXT: fmov d0, d2
70+
; CHECK-SD-NEXT: ret
71+
;
72+
; CHECK-GI-LABEL: vector_deinterleave_v2f32_v4f32:
73+
; CHECK-GI: // %bb.0:
74+
; CHECK-GI-NEXT: uzp1 v2.4s, v0.4s, v0.4s
75+
; CHECK-GI-NEXT: uzp2 v1.4s, v0.4s, v0.4s
76+
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1
77+
; CHECK-GI-NEXT: fmov d0, d2
78+
; CHECK-GI-NEXT: ret
5079
%retval = call {<2 x float>, <2 x float>} @llvm.experimental.vector.deinterleave2.v4f32(<4 x float> %vec)
5180
ret {<2 x float>, <2 x float>} %retval
5281
}

0 commit comments

Comments
 (0)