[clang][AArch64] Correct return type of Neon vqmovun intrinsics

DavidSpickett · DavidSpickett · commit 349af8054218 · 2020-09-21T09:21:51.000+01:00
Neon intrinsics vqmovunh_s16, vqmovuns_s32, vqmovund_s64 should have unsigned return types. See https://developer.arm.com/architectures/instruction-sets/simd-isas/neon/intrinsics?search=vqmovun Fixes https://bugs.llvm.org/show_bug.cgi?id=46840 Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D85118
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
@@ -830,7 +830,7 @@ def XTN2 : SOpInst<"vmovn_high", "(<Q)<Q", "silUsUiUl", OP_XTN>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Signed integer saturating extract and unsigned narrow to high
-def SQXTUN2 : SOpInst<"vqmovun_high", "(<U)(<q).", "HsHiHl", OP_SQXTUN>;
+def SQXTUN2 : SOpInst<"vqmovun_high", "(<U)(<Uq).", "HsHiHl", OP_SQXTUN>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Integer saturating extract and narrow to high
@@ -1498,7 +1498,7 @@ def SCALAR_SQDMULL : SInst<"vqdmull", "(1>)11", "SsSi">;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Scalar Signed Saturating Extract Unsigned Narrow
-def SCALAR_SQXTUN : SInst<"vqmovun", "(1<)1", "SsSiSl">;
+def SCALAR_SQXTUN : SInst<"vqmovun", "(U1<)1", "SsSiSl">;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Scalar Signed Saturating Extract Narrow
diff --git a/clang/test/CodeGen/aarch64-neon-intrinsics.c b/clang/test/CodeGen/aarch64-neon-intrinsics.c
@@ -14094,24 +14094,24 @@ int64_t test_vqdmulls_s32(int32_t a, int32_t b) {
 // CHECK:   [[VQMOVUNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[TMP0]])
 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVUNH_S16_I]], i64 0
 // CHECK:   ret i8 [[TMP1]]
-int8_t test_vqmovunh_s16(int16_t a) {
-  return (int8_t)vqmovunh_s16(a);
+uint8_t test_vqmovunh_s16(int16_t a) {
+  return (uint8_t)vqmovunh_s16(a);
 }
 
 // CHECK-LABEL: @test_vqmovuns_s32(
 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
 // CHECK:   [[VQMOVUNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[TMP0]])
 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVUNS_S32_I]], i64 0
 // CHECK:   ret i16 [[TMP1]]
-int16_t test_vqmovuns_s32(int32_t a) {
-  return (int16_t)vqmovuns_s32(a);
+uint16_t test_vqmovuns_s32(int32_t a) {
+  return (uint16_t)vqmovuns_s32(a);
 }
 
 // CHECK-LABEL: @test_vqmovund_s64(
 // CHECK:   [[VQMOVUND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %a)
 // CHECK:   ret i32 [[VQMOVUND_S64_I]]
-int32_t test_vqmovund_s64(int64_t a) {
-  return (int32_t)vqmovund_s64(a);
+uint32_t test_vqmovund_s64(int64_t a) {
+  return (uint32_t)vqmovund_s64(a);
 }
 
 // CHECK-LABEL: @test_vqmovnh_s16(
diff --git a/clang/test/CodeGen/aarch64-neon-misc.c b/clang/test/CodeGen/aarch64-neon-misc.c
@@ -1908,7 +1908,7 @@ int32x2_t test_vqmovun_s64(int64x2_t a) {
 // CHECK:   [[VQMOVUN_V1_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> %b)
 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQMOVUN_V1_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
-int8x16_t test_vqmovun_high_s16(int8x8_t a, int16x8_t b) {
+uint8x16_t test_vqmovun_high_s16(uint8x8_t a, int16x8_t b) {
   return vqmovun_high_s16(a, b);
 }
 
@@ -1918,7 +1918,7 @@ int8x16_t test_vqmovun_high_s16(int8x8_t a, int16x8_t b) {
 // CHECK:   [[VQMOVUN_V2_I_I:%.*]] = bitcast <4 x i16> [[VQMOVUN_V1_I_I]] to <8 x i8>
 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQMOVUN_V1_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
-int16x8_t test_vqmovun_high_s32(int16x4_t a, int32x4_t b) {
+uint16x8_t test_vqmovun_high_s32(uint16x4_t a, int32x4_t b) {
   return vqmovun_high_s32(a, b);
 }
 
@@ -1928,7 +1928,7 @@ int16x8_t test_vqmovun_high_s32(int16x4_t a, int32x4_t b) {
 // CHECK:   [[VQMOVUN_V2_I_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I_I]] to <8 x i8>
 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQMOVUN_V1_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
-int32x4_t test_vqmovun_high_s64(int32x2_t a, int64x2_t b) {
+uint32x4_t test_vqmovun_high_s64(uint32x2_t a, int64x2_t b) {
   return vqmovun_high_s64(a, b);
 }
 
diff --git a/clang/test/Sema/arm64-neon-header.c b/clang/test/Sema/arm64-neon-header.c
@@ -2,6 +2,6 @@
 
 #include <arm_neon.h>
 
-int16x8_t foo(int8x8_t p0, int16x8_t p1) {
+int16x8_t foo(uint8x8_t p0, int16x8_t p1) {
   return vqmovun_high_s16(p0, p1); // expected-warning {{incompatible vector types returning 'uint8x16_t'}}
 }

Original file line number	Diff line number	Diff line change
`@@ -2,6 +2,6 @@`
`2`	`2`
`3`	`3`	`#include <arm_neon.h>`
`4`	`4`
`5`		`-int16x8_t foo(int8x8_t p0, int16x8_t p1) {`
	`5`	`+int16x8_t foo(uint8x8_t p0, int16x8_t p1) {`
`6`	`6`	`return vqmovun_high_s16(p0, p1); // expected-warning {{incompatible vector types returning 'uint8x16_t'}}`
`7`	`7`	`}`