Skip to content

[AArch64] Implement NEON vamin/vamax intrinsics #99041

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion clang/include/clang/Basic/arm_neon.td
Original file line number Diff line number Diff line change
Expand Up @@ -2120,4 +2120,9 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "lut" in {
def VLUTI4_BF_X2_Q : SInst<"vluti4_laneq_x2", ".2(<U)I", "Qb",
[ImmCheck<3, ImmCheck0_3>]>;
}
}
}

let ArchGuard = "defined(__aarch64__)", TargetGuard = "neon,faminmax" in {
def FAMIN : WInst<"vamin", "...", "fhQdQfQh">;
def FAMAX : WInst<"vamax", "...", "fhQdQfQh">;
}
17 changes: 17 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13573,6 +13573,23 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
}

case NEON::BI__builtin_neon_vamin_f16:
case NEON::BI__builtin_neon_vaminq_f16:
case NEON::BI__builtin_neon_vamin_f32:
case NEON::BI__builtin_neon_vaminq_f32:
case NEON::BI__builtin_neon_vaminq_f64: {
Int = Intrinsic::aarch64_neon_famin;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famin");
}
case NEON::BI__builtin_neon_vamax_f16:
case NEON::BI__builtin_neon_vamaxq_f16:
case NEON::BI__builtin_neon_vamax_f32:
case NEON::BI__builtin_neon_vamaxq_f32:
case NEON::BI__builtin_neon_vamaxq_f64: {
Int = Intrinsic::aarch64_neon_famax;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famax");
}
}
}

Expand Down
107 changes: 107 additions & 0 deletions clang/test/CodeGen/aarch64-neon-faminmax-intrinsics.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +faminmax -O3 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +faminmax -S -O3 -Werror -Wall -o /dev/null %s

// REQUIRES: aarch64-registered-target

#include <arm_neon.h>

// CHECK-LABEL: define dso_local <4 x half> @test_vamin_f16(
// CHECK-SAME: <4 x half> noundef [[VN:%.*]], <4 x half> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[FAMIN2_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.famin.v4f16(<4 x half> [[VN]], <4 x half> [[VM]])
// CHECK-NEXT: ret <4 x half> [[FAMIN2_I]]
//
float16x4_t test_vamin_f16(float16x4_t vn, float16x4_t vm) {
return vamin_f16(vn, vm);
}

// CHECK-LABEL: define dso_local <8 x half> @test_vaminq_f16(
// CHECK-SAME: <8 x half> noundef [[VN:%.*]], <8 x half> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[FAMIN2_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.famin.v8f16(<8 x half> [[VN]], <8 x half> [[VM]])
// CHECK-NEXT: ret <8 x half> [[FAMIN2_I]]
//
float16x8_t test_vaminq_f16(float16x8_t vn, float16x8_t vm) {
return vaminq_f16(vn, vm);
}

// CHECK-LABEL: define dso_local <2 x float> @test_vamin_f32(
// CHECK-SAME: <2 x float> noundef [[VN:%.*]], <2 x float> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[FAMIN2_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.famin.v2f32(<2 x float> [[VN]], <2 x float> [[VM]])
// CHECK-NEXT: ret <2 x float> [[FAMIN2_I]]
//
float32x2_t test_vamin_f32(float32x2_t vn, float32x2_t vm) {
return vamin_f32(vn, vm);
}

// CHECK-LABEL: define dso_local <4 x float> @test_vaminq_f32(
// CHECK-SAME: <4 x float> noundef [[VN:%.*]], <4 x float> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[FAMIN2_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.famin.v4f32(<4 x float> [[VN]], <4 x float> [[VM]])
// CHECK-NEXT: ret <4 x float> [[FAMIN2_I]]
//
float32x4_t test_vaminq_f32(float32x4_t vn, float32x4_t vm) {
return vaminq_f32(vn, vm);
}

// CHECK-LABEL: define dso_local <2 x double> @test_vaminq_f64(
// CHECK-SAME: <2 x double> noundef [[VN:%.*]], <2 x double> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[FAMIN2_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.famin.v2f64(<2 x double> [[VN]], <2 x double> [[VM]])
// CHECK-NEXT: ret <2 x double> [[FAMIN2_I]]
//
float64x2_t test_vaminq_f64(float64x2_t vn, float64x2_t vm) {
return vaminq_f64(vn, vm);
}

// CHECK-LABEL: define dso_local <4 x half> @test_vamax_f16(
// CHECK-SAME: <4 x half> noundef [[VN:%.*]], <4 x half> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[FAMAX2_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.famax.v4f16(<4 x half> [[VN]], <4 x half> [[VM]])
// CHECK-NEXT: ret <4 x half> [[FAMAX2_I]]
//
float16x4_t test_vamax_f16(float16x4_t vn, float16x4_t vm) {
return vamax_f16(vn, vm);
}

// CHECK-LABEL: define dso_local <8 x half> @test_vamaxq_f16(
// CHECK-SAME: <8 x half> noundef [[VN:%.*]], <8 x half> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[FAMAX2_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.famax.v8f16(<8 x half> [[VN]], <8 x half> [[VM]])
// CHECK-NEXT: ret <8 x half> [[FAMAX2_I]]
//
float16x8_t test_vamaxq_f16(float16x8_t vn, float16x8_t vm) {
return vamaxq_f16(vn, vm);
}

// CHECK-LABEL: define dso_local <2 x float> @test_vamax_f32(
// CHECK-SAME: <2 x float> noundef [[VN:%.*]], <2 x float> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[FAMAX2_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.famax.v2f32(<2 x float> [[VN]], <2 x float> [[VM]])
// CHECK-NEXT: ret <2 x float> [[FAMAX2_I]]
//
float32x2_t test_vamax_f32(float32x2_t vn, float32x2_t vm) {
return vamax_f32(vn, vm);
}

// CHECK-LABEL: define dso_local <4 x float> @test_vamaxq_f32(
// CHECK-SAME: <4 x float> noundef [[VN:%.*]], <4 x float> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[FAMAX2_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.famax.v4f32(<4 x float> [[VN]], <4 x float> [[VM]])
// CHECK-NEXT: ret <4 x float> [[FAMAX2_I]]
//
float32x4_t test_vamaxq_f32(float32x4_t vn, float32x4_t vm) {
return vamaxq_f32(vn, vm);
}

// CHECK-LABEL: define dso_local <2 x double> @test_vamaxq_f64(
// CHECK-SAME: <2 x double> noundef [[VN:%.*]], <2 x double> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[FAMAX2_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.famax.v2f64(<2 x double> [[VN]], <2 x double> [[VM]])
// CHECK-NEXT: ret <2 x double> [[FAMAX2_I]]
//
float64x2_t test_vamaxq_f64(float64x2_t vn, float64x2_t vm) {
return vamaxq_f64(vn, vm);
}
35 changes: 35 additions & 0 deletions clang/test/Sema/aarch64-neon-faminmax-no-faminmax.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -emit-llvm -verify %s -o /dev/null

// REQUIRES: aarch64-registered-target

#include <arm_neon.h>

float16x4_t a16x4, b16x4;
float16x8_t a16x8, b16x8;
float32x2_t a32x2, b32x2;
float32x4_t a32x4, b32x4;
float64x2_t a64x2, b64x2;


void test() {
(void) vamin_f16 (a16x4, b16x4);
// expected-error@-1 {{always_inline function 'vamin_f16' requires target feature 'faminmax'}}
(void) vaminq_f16(a16x8, b16x8);
// expected-error@-1 {{always_inline function 'vaminq_f16' requires target feature 'faminmax'}}
(void) vamin_f32 (a32x2, b32x2);
// expected-error@-1 {{always_inline function 'vamin_f32' requires target feature 'faminmax'}}
(void) vaminq_f32(a32x4, b32x4);
// expected-error@-1 {{always_inline function 'vaminq_f32' requires target feature 'faminmax'}}
(void) vaminq_f64(a64x2, b64x2);
// expected-error@-1 {{always_inline function 'vaminq_f64' requires target feature 'faminmax'}}
(void) vamax_f16 (a16x4, b16x4);
// expected-error@-1 {{always_inline function 'vamax_f16' requires target feature 'faminmax'}}
(void) vamaxq_f16(a16x8, b16x8);
// expected-error@-1 {{always_inline function 'vamaxq_f16' requires target feature 'faminmax'}}
(void) vamax_f32 (a32x2, b32x2);
// expected-error@-1 {{always_inline function 'vamax_f32' requires target feature 'faminmax'}}
(void) vamaxq_f32(a32x4, b32x4);
// expected-error@-1 {{always_inline function 'vamaxq_f32' requires target feature 'faminmax'}}
(void) vamaxq_f64(a64x2, b64x2);
// expected-error@-1 {{always_inline function 'vamaxq_f64' requires target feature 'faminmax'}}
}
34 changes: 34 additions & 0 deletions clang/test/Sema/aarch64-neon-faminmax-no-neon.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +faminmax -emit-llvm -verify %s -o /dev/null

// REQUIRES: aarch64-registered-target

#include <arm_neon.h>

float16x4_t a16x4, b16x4;
float16x8_t a16x8, b16x8;
float32x2_t a32x2, b32x2;
float32x4_t a32x4, b32x4;
float64x2_t a64x2, b64x2;

void test () {
(void) vamin_f16 (a16x4, b16x4);
// expected-error@-1 {{always_inline function 'vamin_f16' requires target feature 'neon'}}
(void) vaminq_f16(a16x8, b16x8);
// expected-error@-1 {{always_inline function 'vaminq_f16' requires target feature 'neon'}}
(void) vamin_f32 (a32x2, b32x2);
// expected-error@-1 {{always_inline function 'vamin_f32' requires target feature 'neon'}}
(void) vaminq_f32(a32x4, b32x4);
// expected-error@-1 {{always_inline function 'vaminq_f32' requires target feature 'neon'}}
(void) vaminq_f64(a64x2, b64x2);
// expected-error@-1 {{always_inline function 'vaminq_f64' requires target feature 'neon'}}
(void) vamax_f16 (a16x4, b16x4);
// expected-error@-1 {{always_inline function 'vamax_f16' requires target feature 'neon'}}
(void) vamaxq_f16(a16x8, b16x8);
// expected-error@-1 {{always_inline function 'vamaxq_f16' requires target feature 'neon'}}
(void) vamax_f32 (a32x2, b32x2);
// expected-error@-1 {{always_inline function 'vamax_f32' requires target feature 'neon'}}
(void) vamaxq_f32(a32x4, b32x4);
// expected-error@-1 {{always_inline function 'vamaxq_f32' requires target feature 'neon'}}
(void) vamaxq_f64(a64x2, b64x2);
// expected-error@-1 {{always_inline function 'vamaxq_f64' requires target feature 'neon'}}
}
3 changes: 3 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAArch64.td
Original file line number Diff line number Diff line change
Expand Up @@ -3801,3 +3801,6 @@ def int_aarch64_sve_famax_u : AdvSIMD_Pred2VectorArg_Intrinsic;

def int_aarch64_sve_famin : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_famin_u : AdvSIMD_Pred2VectorArg_Intrinsic;
// Neon absolute maximum and minimum
def int_aarch64_neon_famax : AdvSIMD_2VectorArg_Intrinsic;
def int_aarch64_neon_famin : AdvSIMD_2VectorArg_Intrinsic;
6 changes: 4 additions & 2 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -10134,13 +10134,15 @@ let Uses = [FPMR, FPCR], Predicates = [HasFP8] in {
// fminimum(abs(a), abs(b)) -> famin(a, b)
// fminnum[nnan](abs(a), abs(b)) -> famin(a, b)
def AArch64famin : PatFrags<(ops node:$Rn, node:$Rm),
[(fminimum (fabs node:$Rn), (fabs node:$Rm)),
[(int_aarch64_neon_famin node:$Rn, node:$Rm),
(fminimum (fabs node:$Rn), (fabs node:$Rm)),
(fminnum_nnan (fabs node:$Rn), (fabs node:$Rm))]>;

// fmaximum(abs(a), abs(b)) -> famax(a, b)
// fmaxnum[nnan](abs(a), abs(b)) -> famax(a, b)
def AArch64famax : PatFrags<(ops node:$Rn, node:$Rm),
[(fmaximum (fabs node:$Rn), (fabs node:$Rm)),
[(int_aarch64_neon_famax node:$Rn, node:$Rm),
(fmaximum (fabs node:$Rn), (fabs node:$Rm)),
(fmaxnum_nnan (fabs node:$Rn), (fabs node:$Rm))]>;

let Predicates = [HasNEON, HasFAMINMAX] in {
Expand Down
96 changes: 96 additions & 0 deletions llvm/test/CodeGen/AArch64/neon-famin-famax.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s | FileCheck %s

target triple = "aarch64-linux"

define <4 x half> @test_famin_f16(<4 x half> %vn, <4 x half> %vm) #0 {
; CHECK-LABEL: test_famin_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: famin v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ret
%res = call <4 x half> @llvm.aarch64.neon.famin.v4f16(<4 x half> %vn, <4 x half> %vm)
ret <4 x half> %res
}

define <8 x half> @test_famin2_f16(<8 x half> %vn, <8 x half> %vm) #0 {
; CHECK-LABEL: test_famin2_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: famin v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%res = call <8 x half> @llvm.aarch64.neon.famin.v8f16(<8 x half> %vn, <8 x half> %vm)
ret <8 x half> %res
}

define <2 x float> @test_famin_f32(<2 x float> %vn, <2 x float> %vm) #0 {
; CHECK-LABEL: test_famin_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: famin v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%res = call <2 x float> @llvm.aarch64.neon.famin.v2f32(<2 x float> %vn, <2 x float> %vm)
ret <2 x float> %res
}

define <4 x float> @test_famin2_f32(<4 x float> %vn, <4 x float> %vm) #0 {
; CHECK-LABEL: test_famin2_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: famin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%res = call <4 x float> @llvm.aarch64.neon.famin.v4f32(<4 x float> %vn, <4 x float> %vm)
ret <4 x float> %res
}

define <2 x double> @test_famin_f64(<2 x double> %vn, <2 x double> %vm) #0 {
; CHECK-LABEL: test_famin_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: famin v0.2d, v0.2d, v1.2d
; CHECK-NEXT: ret
%res = call <2 x double> @llvm.aarch64.neon.famin.v2f64(<2 x double> %vn, <2 x double> %vm)
ret <2 x double> %res
}

define <4 x half> @test_famax_f16(<4 x half> %vn, <4 x half> %vm) #0 {
; CHECK-LABEL: test_famax_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: famax v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ret
%res = call <4 x half> @llvm.aarch64.neon.famax.v4f16(<4 x half> %vn, <4 x half> %vm)
ret <4 x half> %res
}

define <8 x half> @test_famax2_f16(<8 x half> %vn, <8 x half> %vm) #0 {
; CHECK-LABEL: test_famax2_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: famax v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%res = call <8 x half> @llvm.aarch64.neon.famax.v8f16(<8 x half> %vn, <8 x half> %vm)
ret <8 x half> %res
}

define <2 x float> @test_famax_f32(<2 x float> %vn, <2 x float> %vm) #0 {
; CHECK-LABEL: test_famax_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: famax v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%res = call <2 x float> @llvm.aarch64.neon.famax.v2f32(<2 x float> %vn, <2 x float> %vm)
ret <2 x float> %res
}

define <4 x float> @test_famax2_f32(<4 x float> %vn, <4 x float> %vm) #0 {
; CHECK-LABEL: test_famax2_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: famax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%res = call <4 x float> @llvm.aarch64.neon.famax.v4f32(<4 x float> %vn, <4 x float> %vm)
ret <4 x float> %res
}

define <2 x double> @test_famax_f64(<2 x double> %vn, <2 x double> %vm) #0 {
; CHECK-LABEL: test_famax_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: famax v0.2d, v0.2d, v1.2d
; CHECK-NEXT: ret
%res = call <2 x double> @llvm.aarch64.neon.famax.v2f64(<2 x double> %vn, <2 x double> %vm)
ret <2 x double> %res
}

attributes #0 = { "target-features"="+neon,+faminmax" }
Loading