|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 |
1 | 2 | ; RUN: opt < %s -passes=instcombine -mtriple=arm -S | FileCheck %s
|
2 | 3 |
|
3 | 4 | ; The alignment arguments for NEON load/store intrinsics can be increased
|
4 | 5 | ; by instcombine. Check for this.
|
5 | 6 |
|
6 |
| -; CHECK: vld4.v2i32.p0({{.*}}, i32 32) |
7 |
| -; CHECK: vst4.p0.v2i32({{.*}}, i32 16) |
8 |
| - |
9 | 7 | @x = common global [8 x i32] zeroinitializer, align 32
|
10 | 8 | @y = common global [8 x i32] zeroinitializer, align 16
|
11 | 9 |
|
12 |
| -%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } |
13 |
| - |
14 |
| -define void @test() nounwind ssp { |
15 |
| - %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32.p0(ptr @x, i32 1) |
16 |
| - %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0 |
17 |
| - %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 1 |
18 |
| - %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2 |
19 |
| - %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 3 |
| 10 | +define void @test() { |
| 11 | +; CHECK-LABEL: define void @test() { |
| 12 | +; CHECK-NEXT: [[TMP1:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32.p0(ptr nonnull @x, i32 32) |
| 13 | +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP1]], 0 |
| 14 | +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP1]], 1 |
| 15 | +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP1]], 2 |
| 16 | +; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP1]], 3 |
| 17 | +; CHECK-NEXT: call void @llvm.arm.neon.vst4.p0.v2i32(ptr nonnull @y, <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], i32 16) |
| 18 | +; CHECK-NEXT: ret void |
| 19 | +; |
| 20 | + %tmp1 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32.p0(ptr @x, i32 1) |
| 21 | + %tmp2 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %tmp1, 0 |
| 22 | + %tmp3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %tmp1, 1 |
| 23 | + %tmp4 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %tmp1, 2 |
| 24 | + %tmp5 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %tmp1, 3 |
20 | 25 | call void @llvm.arm.neon.vst4.p0.v2i32(ptr @y, <2 x i32> %tmp2, <2 x i32> %tmp3, <2 x i32> %tmp4, <2 x i32> %tmp5, i32 1)
|
21 | 26 | ret void
|
22 | 27 | }
|
23 | 28 |
|
24 |
| -declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32.p0(ptr, i32) nounwind readonly |
25 |
| -declare void @llvm.arm.neon.vst4.p0.v2i32(ptr, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind |
| 29 | +define { <4 x i16>, <4 x i16> } @test_vld1x2_no_align(ptr align 16 %a) { |
| 30 | +; CHECK-LABEL: define { <4 x i16>, <4 x i16> } @test_vld1x2_no_align( |
| 31 | +; CHECK-SAME: ptr align 16 [[A:%.*]]) { |
| 32 | +; CHECK-NEXT: [[TMP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld1x2.v4i16.p0(ptr [[A]]) |
| 33 | +; CHECK-NEXT: ret { <4 x i16>, <4 x i16> } [[TMP]] |
| 34 | +; |
| 35 | + %tmp = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld1x2.v4i16.p0(ptr %a) |
| 36 | + ret { <4 x i16>, <4 x i16> } %tmp |
| 37 | +} |
| 38 | + |
| 39 | +define { <4 x i16>, <4 x i16> } @test_vld1x2_lower_align(ptr align 16 %a) { |
| 40 | +; CHECK-LABEL: define { <4 x i16>, <4 x i16> } @test_vld1x2_lower_align( |
| 41 | +; CHECK-SAME: ptr align 16 [[A:%.*]]) { |
| 42 | +; CHECK-NEXT: [[TMP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld1x2.v4i16.p0(ptr align 8 [[A]]) |
| 43 | +; CHECK-NEXT: ret { <4 x i16>, <4 x i16> } [[TMP]] |
| 44 | +; |
| 45 | + %tmp = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld1x2.v4i16.p0(ptr align 8 %a) |
| 46 | + ret { <4 x i16>, <4 x i16> } %tmp |
| 47 | +} |
| 48 | + |
| 49 | +define { <4 x i16>, <4 x i16> } @test_vld1x2_higher_align(ptr align 8 %a) { |
| 50 | +; CHECK-LABEL: define { <4 x i16>, <4 x i16> } @test_vld1x2_higher_align( |
| 51 | +; CHECK-SAME: ptr align 8 [[A:%.*]]) { |
| 52 | +; CHECK-NEXT: [[TMP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld1x2.v4i16.p0(ptr align 16 [[A]]) |
| 53 | +; CHECK-NEXT: ret { <4 x i16>, <4 x i16> } [[TMP]] |
| 54 | +; |
| 55 | + %tmp = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld1x2.v4i16.p0(ptr align 16 %a) |
| 56 | + ret { <4 x i16>, <4 x i16> } %tmp |
| 57 | +} |
| 58 | + |
| 59 | +define void @test_vst1x2_no_align(ptr align 16 %a, <4 x i16> %b0, <4 x i16> %b1) { |
| 60 | +; CHECK-LABEL: define void @test_vst1x2_no_align( |
| 61 | +; CHECK-SAME: ptr align 16 [[A:%.*]], <4 x i16> [[B0:%.*]], <4 x i16> [[B1:%.*]]) { |
| 62 | +; CHECK-NEXT: call void @llvm.arm.neon.vst1x2.p0.v4i16(ptr [[A]], <4 x i16> [[B0]], <4 x i16> [[B1]]) |
| 63 | +; CHECK-NEXT: ret void |
| 64 | +; |
| 65 | + call void @llvm.arm.neon.vst1x2.p0.v4i16(ptr %a, <4 x i16> %b0, <4 x i16> %b1) |
| 66 | + ret void |
| 67 | +} |
0 commit comments