Skip to content

Commit 1ccee0e

Browse files
committed
[ARM,MVE] Make vqrshrun generate the right instruction.
Summary: A copy-paste error in `arm_mve.td` meant that the MVE `vqrshrun` intrinsic family was generating the `vqshrun` machine instruction, because in the IR intrinsic call, the rounding flag argument was set to 0 rather than 1. Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard Reviewed By: dmgreen Subscribers: kristof.beyls, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D72496
1 parent 870f691 commit 1ccee0e

File tree

2 files changed

+9
-9
lines changed

2 files changed

+9
-9
lines changed

clang/include/clang/Basic/arm_mve.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -681,7 +681,7 @@ let params = [s16, s32, u16, u32], pnt = PNT_NType in {
681681
}
682682
let params = [s16, s32], pnt = PNT_NType in {
683683
defm vqshrun : VSHRN<UHalfVector, imm_1toHalfN, (? 1,0,1,0)>;
684-
defm vqrshrun : VSHRN<UHalfVector, imm_1toHalfN, (? 1,0,1,0)>;
684+
defm vqrshrun : VSHRN<UHalfVector, imm_1toHalfN, (? 1,1,1,0)>;
685685
}
686686
let params = T.Int, pnt = PNT_NType in {
687687
defm vsli : DyadicImmShift<Vector, imm_0toNm1>;

clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm-dyadic.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1086,7 +1086,7 @@ uint16x8_t test_vqrshrntq_m_n_u32(uint16x8_t a, uint32x4_t b, mve_pred16_t p)
10861086

10871087
// CHECK-LABEL: @test_vqrshrunbq_n_s16(
10881088
// CHECK-NEXT: entry:
1089-
// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 7, i32 1, i32 0, i32 1, i32 0, i32 0)
1089+
// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 7, i32 1, i32 1, i32 1, i32 0, i32 0)
10901090
// CHECK-NEXT: ret <16 x i8> [[TMP0]]
10911091
//
10921092
uint8x16_t test_vqrshrunbq_n_s16(uint8x16_t a, int16x8_t b)
@@ -1100,7 +1100,7 @@ uint8x16_t test_vqrshrunbq_n_s16(uint8x16_t a, int16x8_t b)
11001100

11011101
// CHECK-LABEL: @test_vqrshrunbq_n_s32(
11021102
// CHECK-NEXT: entry:
1103-
// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, i32 1, i32 0, i32 1, i32 0, i32 0)
1103+
// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, i32 1, i32 1, i32 1, i32 0, i32 0)
11041104
// CHECK-NEXT: ret <8 x i16> [[TMP0]]
11051105
//
11061106
uint16x8_t test_vqrshrunbq_n_s32(uint16x8_t a, int32x4_t b)
@@ -1114,7 +1114,7 @@ uint16x8_t test_vqrshrunbq_n_s32(uint16x8_t a, int32x4_t b)
11141114

11151115
// CHECK-LABEL: @test_vqrshruntq_n_s16(
11161116
// CHECK-NEXT: entry:
1117-
// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, i32 1, i32 0, i32 1, i32 0, i32 1)
1117+
// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, i32 1, i32 1, i32 1, i32 0, i32 1)
11181118
// CHECK-NEXT: ret <16 x i8> [[TMP0]]
11191119
//
11201120
uint8x16_t test_vqrshruntq_n_s16(uint8x16_t a, int16x8_t b)
@@ -1128,7 +1128,7 @@ uint8x16_t test_vqrshruntq_n_s16(uint8x16_t a, int16x8_t b)
11281128

11291129
// CHECK-LABEL: @test_vqrshruntq_n_s32(
11301130
// CHECK-NEXT: entry:
1131-
// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 3, i32 1, i32 0, i32 1, i32 0, i32 1)
1131+
// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 3, i32 1, i32 1, i32 1, i32 0, i32 1)
11321132
// CHECK-NEXT: ret <8 x i16> [[TMP0]]
11331133
//
11341134
uint16x8_t test_vqrshruntq_n_s32(uint16x8_t a, int32x4_t b)
@@ -1144,7 +1144,7 @@ uint16x8_t test_vqrshruntq_n_s32(uint16x8_t a, int32x4_t b)
11441144
// CHECK-NEXT: entry:
11451145
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
11461146
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
1147-
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 4, i32 1, i32 0, i32 1, i32 0, i32 0, <8 x i1> [[TMP1]])
1147+
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 4, i32 1, i32 1, i32 1, i32 0, i32 0, <8 x i1> [[TMP1]])
11481148
// CHECK-NEXT: ret <16 x i8> [[TMP2]]
11491149
//
11501150
uint8x16_t test_vqrshrunbq_m_n_s16(uint8x16_t a, int16x8_t b, mve_pred16_t p)
@@ -1160,7 +1160,7 @@ uint8x16_t test_vqrshrunbq_m_n_s16(uint8x16_t a, int16x8_t b, mve_pred16_t p)
11601160
// CHECK-NEXT: entry:
11611161
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
11621162
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1163-
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 10, i32 1, i32 0, i32 1, i32 0, i32 0, <4 x i1> [[TMP1]])
1163+
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 10, i32 1, i32 1, i32 1, i32 0, i32 0, <4 x i1> [[TMP1]])
11641164
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
11651165
//
11661166
uint16x8_t test_vqrshrunbq_m_n_s32(uint16x8_t a, int32x4_t b, mve_pred16_t p)
@@ -1176,7 +1176,7 @@ uint16x8_t test_vqrshrunbq_m_n_s32(uint16x8_t a, int32x4_t b, mve_pred16_t p)
11761176
// CHECK-NEXT: entry:
11771177
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
11781178
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
1179-
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 3, i32 1, i32 0, i32 1, i32 0, i32 1, <8 x i1> [[TMP1]])
1179+
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 3, i32 1, i32 1, i32 1, i32 0, i32 1, <8 x i1> [[TMP1]])
11801180
// CHECK-NEXT: ret <16 x i8> [[TMP2]]
11811181
//
11821182
uint8x16_t test_vqrshruntq_m_n_s16(uint8x16_t a, int16x8_t b, mve_pred16_t p)
@@ -1192,7 +1192,7 @@ uint8x16_t test_vqrshruntq_m_n_s16(uint8x16_t a, int16x8_t b, mve_pred16_t p)
11921192
// CHECK-NEXT: entry:
11931193
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
11941194
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1195-
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 13, i32 1, i32 0, i32 1, i32 0, i32 1, <4 x i1> [[TMP1]])
1195+
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 13, i32 1, i32 1, i32 1, i32 0, i32 1, <4 x i1> [[TMP1]])
11961196
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
11971197
//
11981198
uint16x8_t test_vqrshruntq_m_n_s32(uint16x8_t a, int32x4_t b, mve_pred16_t p)

0 commit comments

Comments
 (0)