Skip to content

Commit 15e9a6c

Browse files
author
Francesco Petrogalli
committed
[llvm][CodeGen] Do not scalarize llvm.masked.[gather|scatter] operating on scalable vectors.
This patch prevents the `llvm.masked.gather` and `llvm.masked.scatter` intrinsics to be scalarized when invoked on scalable vectors. The change in `Function.cpp` is needed to prevent the warning that is raised when `getNumElements` is used in place of `getElementCount` on `VectorType` instances. The tests guards for regressions on this change. The tests makes sure that calls to `llvm.masked.[gather|scatter]` are still scalarized when: # the intrinsics are operating on fixed size vectors, and # the compiler is not targeting fixed length SVE code generation. Reviewed By: efriedma, sdesmalen Differential Revision: https://reviews.llvm.org/D86249
1 parent 09c3424 commit 15e9a6c

File tree

4 files changed

+133
-2
lines changed

4 files changed

+133
-2
lines changed

llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -865,6 +865,12 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
865865
bool &ModifiedDT) {
866866
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
867867
if (II) {
868+
// The scalarization code below does not work for scalable vectors.
869+
if (isa<ScalableVectorType>(II->getType()) ||
870+
any_of(II->arg_operands(),
871+
[](Value *V) { return isa<ScalableVectorType>(V->getType()); }))
872+
return false;
873+
868874
switch (II->getIntrinsicID()) {
869875
default:
870876
break;

llvm/lib/IR/Function.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1400,8 +1400,7 @@ static bool matchIntrinsicType(
14001400
auto *ReferenceType = dyn_cast<VectorType>(ArgTys[RefArgNumber]);
14011401
auto *ThisArgVecTy = dyn_cast<VectorType>(Ty);
14021402
if (!ThisArgVecTy || !ReferenceType ||
1403-
(cast<FixedVectorType>(ReferenceType)->getNumElements() !=
1404-
cast<FixedVectorType>(ThisArgVecTy)->getNumElements()))
1403+
(ReferenceType->getElementCount() != ThisArgVecTy->getElementCount()))
14051404
return true;
14061405
PointerType *ThisArgEltTy =
14071406
dyn_cast<PointerType>(ThisArgVecTy->getElementType());
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
; RUN: opt -mtriple=aarch64-linux-gnu -mattr=+sve -scalarize-masked-mem-intrin -S < %s 2>%t | FileCheck %s
2+
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
3+
4+
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
5+
; WARN-NOT: warning
6+
7+
; Testing that masked gathers operating on scalable vectors that are
8+
; packed in SVE registers are not scalarized.
9+
10+
; CHECK-LABEL: @masked_gather_nxv4i32(
11+
; CHECK: call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32
12+
define <vscale x 4 x i32> @masked_gather_nxv4i32(<vscale x 4 x i32*> %ld, <vscale x 4 x i1> %masks, <vscale x 4 x i32> %passthru) {
13+
%res = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32(<vscale x 4 x i32*> %ld, i32 0, <vscale x 4 x i1> %masks, <vscale x 4 x i32> %passthru)
14+
ret <vscale x 4 x i32> %res
15+
}
16+
17+
; Testing that masked gathers operating on scalable vectors of FP data
18+
; that is packed in SVE registers are not scalarized.
19+
20+
; CHECK-LABEL: @masked_gather_nxv2f64(
21+
; CHECK: call <vscale x 2 x double> @llvm.masked.gather.nxv2f64
22+
define <vscale x 2 x double> @masked_gather_nxv2f64(<vscale x 2 x double*> %ld, <vscale x 2 x i1> %masks, <vscale x 2 x double> %passthru) {
23+
%res = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*> %ld, i32 0, <vscale x 2 x i1> %masks, <vscale x 2 x double> %passthru)
24+
ret <vscale x 2 x double> %res
25+
}
26+
27+
; Testing that masked gathers operating on scalable vectors of FP data
28+
; that is unpacked in SVE registers are not scalarized.
29+
30+
; CHECK-LABEL: @masked_gather_nxv2f16(
31+
; CHECK: call <vscale x 2 x half> @llvm.masked.gather.nxv2f16
32+
define <vscale x 2 x half> @masked_gather_nxv2f16(<vscale x 2 x half*> %ld, <vscale x 2 x i1> %masks, <vscale x 2 x half> %passthru) {
33+
%res = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*> %ld, i32 0, <vscale x 2 x i1> %masks, <vscale x 2 x half> %passthru)
34+
ret <vscale x 2 x half> %res
35+
}
36+
37+
; Testing that masked gathers operating on 64-bit fixed vectors are
38+
; scalarized because NEON doesn't have support for masked gather
39+
; instructions.
40+
41+
; CHECK-LABEL: @masked_gather_v2f32(
42+
; CHECK-NOT: @llvm.masked.gather.v2f32(
43+
define <2 x float> @masked_gather_v2f32(<2 x float*> %ld, <2 x i1> %masks, <2 x float> %passthru) {
44+
%res = call <2 x float> @llvm.masked.gather.v2f32(<2 x float*> %ld, i32 0, <2 x i1> %masks, <2 x float> %passthru)
45+
ret <2 x float> %res
46+
}
47+
48+
; Testing that masked gathers operating on 128-bit fixed vectors are
49+
; scalarized because NEON doesn't have support for masked gather
50+
; instructions and because we are not targeting fixed width SVE.
51+
52+
; CHECK-LABEL: @masked_gather_v4i32(
53+
; CHECK-NOT: @llvm.masked.gather.v4i32(
54+
define <4 x i32> @masked_gather_v4i32(<4 x i32*> %ld, <4 x i1> %masks, <4 x i32> %passthru) {
55+
%res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ld, i32 0, <4 x i1> %masks, <4 x i32> %passthru)
56+
ret <4 x i32> %res
57+
}
58+
59+
declare <vscale x 4 x i32> @llvm.masked.gather.nxv4i32(<vscale x 4 x i32*> %ptrs, i32 %align, <vscale x 4 x i1> %masks, <vscale x 4 x i32> %passthru)
60+
declare <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*> %ptrs, i32 %align, <vscale x 2 x i1> %masks, <vscale x 2 x double> %passthru)
61+
declare <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*> %ptrs, i32 %align, <vscale x 2 x i1> %masks, <vscale x 2 x half> %passthru)
62+
declare <2 x float> @llvm.masked.gather.v2f32(<2 x float*> %ptrs, i32 %align, <2 x i1> %masks, <2 x float> %passthru)
63+
declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 %align, <4 x i1> %masks, <4 x i32> %passthru)
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
; RUN: opt -mtriple=aarch64-linux-gnu -mattr=+sve -scalarize-masked-mem-intrin -S < %s 2>%t | FileCheck %s
2+
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
3+
4+
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
5+
; WARN-NOT: warning
6+
7+
; Testing that masked scatters operating on scalable vectors that are
8+
; packed in SVE registers are not scalarized.
9+
10+
; CHECK-LABEL: @masked_scatter_nxv4i32(
11+
; CHECK: call void @llvm.masked.scatter.nxv4i32
12+
define void @masked_scatter_nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i32*> %ptrs, <vscale x 4 x i1> %masks) {
13+
call void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i32*> %ptrs, i32 0, <vscale x 4 x i1> %masks)
14+
ret void
15+
}
16+
17+
; Testing that masked scatters operating on scalable vectors of FP
18+
; data that is packed in SVE registers are not scalarized.
19+
20+
; CHECK-LABEL: @masked_scatter_nxv2f64(
21+
; CHECK: call void @llvm.masked.scatter.nxv2f64
22+
define void @masked_scatter_nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %ptrs, <vscale x 2 x i1> %masks) {
23+
call void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
24+
ret void
25+
}
26+
27+
; Testing that masked scatters operating on scalable vectors of FP
28+
; data that is unpacked in SVE registers are not scalarized.
29+
30+
; CHECK-LABEL: @masked_scatter_nxv2f16(
31+
; CHECK: call void @llvm.masked.scatter.nxv2f16
32+
define void @masked_scatter_nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x half*> %ptrs, <vscale x 2 x i1> %masks) {
33+
call void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x half*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
34+
ret void
35+
}
36+
37+
; Testing that masked scatters operating on 64-bit fixed vectors are
38+
; scalarized because NEON doesn't have support for masked scatter
39+
; instructions.
40+
41+
; CHECK-LABEL: @masked_scatter_v2f32(
42+
; CHECK-NOT: @llvm.masked.scatter.v2f32(
43+
define void @masked_scatter_v2f32(<2 x float> %data, <2 x float*> %ptrs, <2 x i1> %masks) {
44+
call void @llvm.masked.scatter.v2f32(<2 x float> %data, <2 x float*> %ptrs, i32 0, <2 x i1> %masks)
45+
ret void
46+
}
47+
48+
; Testing that masked scatters operating on 128-bit fixed vectors are
49+
; scalarized because NEON doesn't have support for masked scatter
50+
; instructions and because we are not targeting fixed width SVE.
51+
52+
; CHECK-LABEL: @masked_scatter_v4i32(
53+
; CHECK-NOT: @llvm.masked.scatter.v4i32(
54+
define void @masked_scatter_v4i32(<4 x i32> %data, <4 x i32*> %ptrs, <4 x i1> %masks) {
55+
call void @llvm.masked.scatter.v4i32(<4 x i32> %data, <4 x i32*> %ptrs, i32 0, <4 x i1> %masks)
56+
ret void
57+
}
58+
59+
declare void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i32*> %ptrs, i32 %align, <vscale x 4 x i1> %masks)
60+
declare void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %ptrs, i32 %align, <vscale x 2 x i1> %masks)
61+
declare void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x half*> %ptrs, i32 %align, <vscale x 2 x i1> %masks)
62+
declare void @llvm.masked.scatter.v2f32(<2 x float> %data, <2 x float*> %ptrs, i32 %align, <2 x i1> %masks)
63+
declare void @llvm.masked.scatter.v4i32(<4 x i32> %data, <4 x i32*> %ptrs, i32 %align, <4 x i1> %masks)

0 commit comments

Comments
 (0)