|
| 1 | +; RUN: opt -mtriple=aarch64-linux-gnu -mattr=+sve -scalarize-masked-mem-intrin -S < %s 2>%t | FileCheck %s |
| 2 | +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t |
| 3 | + |
| 4 | +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. |
| 5 | +; WARN-NOT: warning |
| 6 | + |
| 7 | +; Testing that masked gathers operating on scalable vectors that are |
| 8 | +; packed in SVE registers are not scalarized. |
| 9 | + |
| 10 | +; CHECK-LABEL: @masked_gather_nxv4i32( |
| 11 | +; CHECK: call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32 |
| 12 | +define <vscale x 4 x i32> @masked_gather_nxv4i32(<vscale x 4 x i32*> %ld, <vscale x 4 x i1> %masks, <vscale x 4 x i32> %passthru) { |
| 13 | + %res = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32(<vscale x 4 x i32*> %ld, i32 0, <vscale x 4 x i1> %masks, <vscale x 4 x i32> %passthru) |
| 14 | + ret <vscale x 4 x i32> %res |
| 15 | +} |
| 16 | + |
| 17 | +; Testing that masked gathers operating on scalable vectors of FP data |
| 18 | +; that is packed in SVE registers are not scalarized. |
| 19 | + |
| 20 | +; CHECK-LABEL: @masked_gather_nxv2f64( |
| 21 | +; CHECK: call <vscale x 2 x double> @llvm.masked.gather.nxv2f64 |
| 22 | +define <vscale x 2 x double> @masked_gather_nxv2f64(<vscale x 2 x double*> %ld, <vscale x 2 x i1> %masks, <vscale x 2 x double> %passthru) { |
| 23 | + %res = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*> %ld, i32 0, <vscale x 2 x i1> %masks, <vscale x 2 x double> %passthru) |
| 24 | + ret <vscale x 2 x double> %res |
| 25 | +} |
| 26 | + |
| 27 | +; Testing that masked gathers operating on scalable vectors of FP data |
| 28 | +; that is unpacked in SVE registers are not scalarized. |
| 29 | + |
| 30 | +; CHECK-LABEL: @masked_gather_nxv2f16( |
| 31 | +; CHECK: call <vscale x 2 x half> @llvm.masked.gather.nxv2f16 |
| 32 | +define <vscale x 2 x half> @masked_gather_nxv2f16(<vscale x 2 x half*> %ld, <vscale x 2 x i1> %masks, <vscale x 2 x half> %passthru) { |
| 33 | + %res = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*> %ld, i32 0, <vscale x 2 x i1> %masks, <vscale x 2 x half> %passthru) |
| 34 | + ret <vscale x 2 x half> %res |
| 35 | +} |
| 36 | + |
| 37 | +; Testing that masked gathers operating on 64-bit fixed vectors are |
| 38 | +; scalarized because NEON doesn't have support for masked gather |
| 39 | +; instructions. |
| 40 | + |
| 41 | +; CHECK-LABEL: @masked_gather_v2f32( |
| 42 | +; CHECK-NOT: @llvm.masked.gather.v2f32( |
| 43 | +define <2 x float> @masked_gather_v2f32(<2 x float*> %ld, <2 x i1> %masks, <2 x float> %passthru) { |
| 44 | + %res = call <2 x float> @llvm.masked.gather.v2f32(<2 x float*> %ld, i32 0, <2 x i1> %masks, <2 x float> %passthru) |
| 45 | + ret <2 x float> %res |
| 46 | +} |
| 47 | + |
| 48 | +; Testing that masked gathers operating on 128-bit fixed vectors are |
| 49 | +; scalarized because NEON doesn't have support for masked gather |
| 50 | +; instructions and because we are not targeting fixed width SVE. |
| 51 | + |
| 52 | +; CHECK-LABEL: @masked_gather_v4i32( |
| 53 | +; CHECK-NOT: @llvm.masked.gather.v4i32( |
| 54 | +define <4 x i32> @masked_gather_v4i32(<4 x i32*> %ld, <4 x i1> %masks, <4 x i32> %passthru) { |
| 55 | + %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ld, i32 0, <4 x i1> %masks, <4 x i32> %passthru) |
| 56 | + ret <4 x i32> %res |
| 57 | +} |
| 58 | + |
| 59 | +declare <vscale x 4 x i32> @llvm.masked.gather.nxv4i32(<vscale x 4 x i32*> %ptrs, i32 %align, <vscale x 4 x i1> %masks, <vscale x 4 x i32> %passthru) |
| 60 | +declare <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*> %ptrs, i32 %align, <vscale x 2 x i1> %masks, <vscale x 2 x double> %passthru) |
| 61 | +declare <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*> %ptrs, i32 %align, <vscale x 2 x i1> %masks, <vscale x 2 x half> %passthru) |
| 62 | +declare <2 x float> @llvm.masked.gather.v2f32(<2 x float*> %ptrs, i32 %align, <2 x i1> %masks, <2 x float> %passthru) |
| 63 | +declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 %align, <4 x i1> %masks, <4 x i32> %passthru) |
0 commit comments