Skip to content

Commit 1b622ff

Browse files
committed
[VP] IR expansion for inttoptr/ptrtoint
Add basic handling for VP ops that can expand to cast intrinsics Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D159478
1 parent c7b25fa commit 1b622ff

File tree

2 files changed

+148
-0
lines changed

2 files changed

+148
-0
lines changed

llvm/lib/CodeGen/ExpandVectorPredication.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,10 @@ struct CachingVPExpander {
179179
Value *expandPredicationInReduction(IRBuilder<> &Builder,
180180
VPReductionIntrinsic &PI);
181181

182+
/// Lower this VP cast operation to a non-VP intrinsic.
183+
Value *expandPredicationToCastIntrinsic(IRBuilder<> &Builder,
184+
VPIntrinsic &VPI);
185+
182186
/// Lower this VP memory operation to a non-VP intrinsic.
183187
Value *expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder,
184188
VPIntrinsic &VPI);
@@ -436,6 +440,27 @@ CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder,
436440
return Reduction;
437441
}
438442

443+
Value *CachingVPExpander::expandPredicationToCastIntrinsic(IRBuilder<> &Builder,
444+
VPIntrinsic &VPI) {
445+
// TODO: Add anthor Cast Intrinsic, VP_TRUNC/VP_ZEXT
446+
switch (VPI.getIntrinsicID()) {
447+
default:
448+
llvm_unreachable("Not a VP memory intrinsic");
449+
case Intrinsic::vp_inttoptr: {
450+
Value *NewOp =
451+
Builder.CreateIntToPtr(VPI.getOperand(0), VPI.getType(), VPI.getName());
452+
replaceOperation(*NewOp, VPI);
453+
return NewOp;
454+
}
455+
case Intrinsic::vp_ptrtoint: {
456+
Value *NewOp =
457+
Builder.CreatePtrToInt(VPI.getOperand(0), VPI.getType(), VPI.getName());
458+
replaceOperation(*NewOp, VPI);
459+
return NewOp;
460+
}
461+
}
462+
}
463+
439464
Value *
440465
CachingVPExpander::expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder,
441466
VPIntrinsic &VPI) {
@@ -598,6 +623,10 @@ Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) {
598623
if (auto *VPCmp = dyn_cast<VPCmpIntrinsic>(&VPI))
599624
return expandPredicationInComparison(Builder, *VPCmp);
600625

626+
if (VPCastIntrinsic::isVPCast(VPI.getIntrinsicID())) {
627+
return expandPredicationToCastIntrinsic(Builder, VPI);
628+
}
629+
601630
switch (VPI.getIntrinsicID()) {
602631
default:
603632
break;
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2+
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=SSE
4+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
5+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
6+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512
7+
8+
declare <4 x ptr> @llvm.vp.inttoptr.v4p0.v4i32(<4 x i32>, <4 x i1>, i32)
9+
define <4 x ptr> @inttoptr_v4p0_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
10+
; X86-LABEL: inttoptr_v4p0_v4i32:
11+
; X86: # %bb.0:
12+
; X86-NEXT: retl
13+
;
14+
; SSE-LABEL: inttoptr_v4p0_v4i32:
15+
; SSE: # %bb.0:
16+
; SSE-NEXT: movaps %xmm0, %xmm1
17+
; SSE-NEXT: xorps %xmm2, %xmm2
18+
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
19+
; SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
20+
; SSE-NEXT: retq
21+
;
22+
; AVX1-LABEL: inttoptr_v4p0_v4i32:
23+
; AVX1: # %bb.0:
24+
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
25+
; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
26+
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
27+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
28+
; AVX1-NEXT: retq
29+
;
30+
; AVX2-LABEL: inttoptr_v4p0_v4i32:
31+
; AVX2: # %bb.0:
32+
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
33+
; AVX2-NEXT: retq
34+
;
35+
; AVX512-LABEL: inttoptr_v4p0_v4i32:
36+
; AVX512: # %bb.0:
37+
; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
38+
; AVX512-NEXT: retq
39+
%v = call <4 x ptr> @llvm.vp.inttoptr.v4p0.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
40+
ret <4 x ptr> %v
41+
}
42+
43+
declare <4 x ptr> @llvm.vp.inttoptr.v4p0.v4i64(<4 x i64>, <4 x i1>, i32)
44+
45+
define <4 x ptr> @inttoptr_v4p0_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
46+
; X86-LABEL: inttoptr_v4p0_v4i64:
47+
; X86: # %bb.0:
48+
; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
49+
; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
50+
; X86-NEXT: vzeroupper
51+
; X86-NEXT: retl
52+
;
53+
; SSE-LABEL: inttoptr_v4p0_v4i64:
54+
; SSE: # %bb.0:
55+
; SSE-NEXT: retq
56+
;
57+
; AVX-LABEL: inttoptr_v4p0_v4i64:
58+
; AVX: # %bb.0:
59+
; AVX-NEXT: retq
60+
%v = call <4 x ptr> @llvm.vp.inttoptr.v4p0.v4i64(<4 x i64> %va, <4 x i1> %m, i32 %evl)
61+
ret <4 x ptr> %v
62+
}
63+
64+
declare <4 x i32> @llvm.vp.ptrtoint.v4i32.v4p0(<4 x ptr>, <4 x i1>, i32)
65+
66+
define <4 x i32> @ptrtoint_v4i32_v4p0(<4 x ptr> %va, <4 x i1> %m, i32 zeroext %evl) {
67+
; X86-LABEL: ptrtoint_v4i32_v4p0:
68+
; X86: # %bb.0:
69+
; X86-NEXT: retl
70+
;
71+
; SSE-LABEL: ptrtoint_v4i32_v4p0:
72+
; SSE: # %bb.0:
73+
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
74+
; SSE-NEXT: retq
75+
;
76+
; AVX1-LABEL: ptrtoint_v4i32_v4p0:
77+
; AVX1: # %bb.0:
78+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
79+
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
80+
; AVX1-NEXT: vzeroupper
81+
; AVX1-NEXT: retq
82+
;
83+
; AVX2-LABEL: ptrtoint_v4i32_v4p0:
84+
; AVX2: # %bb.0:
85+
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
86+
; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
87+
; AVX2-NEXT: vzeroupper
88+
; AVX2-NEXT: retq
89+
;
90+
; AVX512-LABEL: ptrtoint_v4i32_v4p0:
91+
; AVX512: # %bb.0:
92+
; AVX512-NEXT: vpmovqd %ymm0, %xmm0
93+
; AVX512-NEXT: vzeroupper
94+
; AVX512-NEXT: retq
95+
%v = call <4 x i32> @llvm.vp.ptrtoint.v4i32.v4p0(<4 x ptr> %va, <4 x i1> %m, i32 %evl)
96+
ret <4 x i32> %v
97+
}
98+
99+
declare <4 x i64> @llvm.vp.ptrtoint.v4i64.v4p0(<4 x ptr>, <4 x i1>, i32)
100+
101+
define <4 x i64> @ptrtoint_v4i64_v4p0(<4 x ptr> %va, <4 x i1> %m, i32 zeroext %evl) {
102+
; X86-LABEL: ptrtoint_v4i64_v4p0:
103+
; X86: # %bb.0:
104+
; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
105+
; X86-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
106+
; X86-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
107+
; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
108+
; X86-NEXT: retl
109+
;
110+
; SSE-LABEL: ptrtoint_v4i64_v4p0:
111+
; SSE: # %bb.0:
112+
; SSE-NEXT: retq
113+
;
114+
; AVX-LABEL: ptrtoint_v4i64_v4p0:
115+
; AVX: # %bb.0:
116+
; AVX-NEXT: retq
117+
%v = call <4 x i64> @llvm.vp.ptrtoint.v4i64.v4p0(<4 x ptr> %va, <4 x i1> %m, i32 %evl)
118+
ret <4 x i64> %v
119+
}

0 commit comments

Comments
 (0)