Skip to content

Commit 2f0308e

Browse files
authored
[arm64] Add tan intrinsic lowering (#94545)
This change is an implementation of #87367 investigation on supporting IEEE math operations as intrinsics. Which was discussed in this RFC: https://discourse.llvm.org/t/rfc-all-the-math-intrinsics/78294 This PR is just for Tan. Now that x86 tan backend landed: #90503 we can add other backends since the shared pieces are in tree now. Changes: - `llvm/include/llvm/Analysis/VecFuncs.def` - vectorization of tan for arm64 backends. - `llvm/lib/Target/AArch64/AArch64FastISel.cpp` - Add tan to the libcall table - `llvm/lib/Target/AArch64/AArch64ISelLowering.cpp` - Add tan expansion for f128, f16, and vector\neon operations - `llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp` define `G_FTAN` as a legal arm64 instruction resolves #94755
1 parent ac02168 commit 2f0308e

16 files changed

+704
-41
lines changed

llvm/include/llvm/Analysis/VecFuncs.def

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,11 @@ TLI_DEFINE_VECFUNC("llvm.sin.f64", "_simd_sin_d2", FIXED(2), "_ZGV_LLVM_N2v")
9292
TLI_DEFINE_VECFUNC("sinf", "_simd_sin_f4", FIXED(4), "_ZGV_LLVM_N4v")
9393
TLI_DEFINE_VECFUNC("llvm.sin.f32", "_simd_sin_f4", FIXED(4), "_ZGV_LLVM_N4v")
9494

95+
TLI_DEFINE_VECFUNC("tan", "_simd_tan_d2", FIXED(2), "_ZGV_LLVM_N2v")
96+
TLI_DEFINE_VECFUNC("llvm.tan.f64", "_simd_tan_d2", FIXED(2), "_ZGV_LLVM_N2v")
97+
TLI_DEFINE_VECFUNC("tanf", "_simd_tan_f4", FIXED(4), "_ZGV_LLVM_N4v")
98+
TLI_DEFINE_VECFUNC("llvm.tan.f32", "_simd_tan_f4", FIXED(4), "_ZGV_LLVM_N4v")
99+
95100
// Floating-Point Arithmetic and Auxiliary Functions
96101
TLI_DEFINE_VECFUNC("cbrt", "_simd_cbrt_d2", FIXED(2), "_ZGV_LLVM_N2v")
97102
TLI_DEFINE_VECFUNC("cbrtf", "_simd_cbrt_f4", FIXED(4), "_ZGV_LLVM_N4v")
@@ -584,6 +589,7 @@ TLI_DEFINE_VECFUNC("sinpi", "_ZGVnN2v_sinpi", FIXED(2), "_ZGV_LLVM_N2v")
584589
TLI_DEFINE_VECFUNC("sqrt", "_ZGVnN2v_sqrt", FIXED(2), "_ZGV_LLVM_N2v")
585590

586591
TLI_DEFINE_VECFUNC("tan", "_ZGVnN2v_tan", FIXED(2), "_ZGV_LLVM_N2v")
592+
TLI_DEFINE_VECFUNC("llvm.tan.f64", "_ZGVnN2v_tan", FIXED(2), "_ZGV_LLVM_N2v")
587593

588594
TLI_DEFINE_VECFUNC("tanh", "_ZGVnN2v_tanh", FIXED(2), "_ZGV_LLVM_N2v")
589595

@@ -681,6 +687,7 @@ TLI_DEFINE_VECFUNC("sinpif", "_ZGVnN4v_sinpif", FIXED(4), "_ZGV_LLVM_N4v")
681687
TLI_DEFINE_VECFUNC("sqrtf", "_ZGVnN4v_sqrtf", FIXED(4), "_ZGV_LLVM_N4v")
682688

683689
TLI_DEFINE_VECFUNC("tanf", "_ZGVnN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v")
690+
TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVnN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v")
684691

685692
TLI_DEFINE_VECFUNC("tanhf", "_ZGVnN4v_tanhf", FIXED(4), "_ZGV_LLVM_N4v")
686693

@@ -828,6 +835,8 @@ TLI_DEFINE_VECFUNC("sqrtf", "_ZGVsMxv_sqrtf", SCALABLE(4), MASKED, "_ZGVsMxv")
828835

829836
TLI_DEFINE_VECFUNC("tan", "_ZGVsMxv_tan", SCALABLE(2), MASKED, "_ZGVsMxv")
830837
TLI_DEFINE_VECFUNC("tanf", "_ZGVsMxv_tanf", SCALABLE(4), MASKED, "_ZGVsMxv")
838+
TLI_DEFINE_VECFUNC("llvm.tan.f64", "_ZGVsMxv_tan", SCALABLE(2), MASKED, "_ZGVsMxv")
839+
TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVsMxv_tanf", SCALABLE(4), MASKED, "_ZGVsMxv")
831840

832841
TLI_DEFINE_VECFUNC("tanh", "_ZGVsMxv_tanh", SCALABLE(2), MASKED, "_ZGVsMxv")
833842
TLI_DEFINE_VECFUNC("tanhf", "_ZGVsMxv_tanhf", SCALABLE(4), MASKED, "_ZGVsMxv")
@@ -1087,6 +1096,11 @@ TLI_DEFINE_VECFUNC("tanf", "armpl_vtanq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
10871096
TLI_DEFINE_VECFUNC("tan", "armpl_svtan_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
10881097
TLI_DEFINE_VECFUNC("tanf", "armpl_svtan_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
10891098

1099+
TLI_DEFINE_VECFUNC("llvm.tan.f64", "armpl_vtanq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
1100+
TLI_DEFINE_VECFUNC("llvm.tan.f32", "armpl_vtanq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
1101+
TLI_DEFINE_VECFUNC("llvm.tan.f64", "armpl_svtan_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
1102+
TLI_DEFINE_VECFUNC("llvm.tan.f32", "armpl_svtan_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
1103+
10901104
TLI_DEFINE_VECFUNC("tanh", "armpl_vtanhq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
10911105
TLI_DEFINE_VECFUNC("tanhf", "armpl_vtanhq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
10921106
TLI_DEFINE_VECFUNC("tanh", "armpl_svtanh_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")

llvm/lib/Target/AArch64/AArch64FastISel.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3534,6 +3534,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
35343534
}
35353535
case Intrinsic::sin:
35363536
case Intrinsic::cos:
3537+
case Intrinsic::tan:
35373538
case Intrinsic::pow: {
35383539
MVT RetVT;
35393540
if (!isTypeLegal(II->getType(), RetVT))
@@ -3542,11 +3543,11 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
35423543
if (RetVT != MVT::f32 && RetVT != MVT::f64)
35433544
return false;
35443545

3545-
static const RTLIB::Libcall LibCallTable[3][2] = {
3546-
{ RTLIB::SIN_F32, RTLIB::SIN_F64 },
3547-
{ RTLIB::COS_F32, RTLIB::COS_F64 },
3548-
{ RTLIB::POW_F32, RTLIB::POW_F64 }
3549-
};
3546+
static const RTLIB::Libcall LibCallTable[4][2] = {
3547+
{RTLIB::SIN_F32, RTLIB::SIN_F64},
3548+
{RTLIB::COS_F32, RTLIB::COS_F64},
3549+
{RTLIB::TAN_F32, RTLIB::TAN_F64},
3550+
{RTLIB::POW_F32, RTLIB::POW_F64}};
35503551
RTLIB::Libcall LC;
35513552
bool Is64Bit = RetVT == MVT::f64;
35523553
switch (II->getIntrinsicID()) {
@@ -3558,9 +3559,12 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
35583559
case Intrinsic::cos:
35593560
LC = LibCallTable[1][Is64Bit];
35603561
break;
3561-
case Intrinsic::pow:
3562+
case Intrinsic::tan:
35623563
LC = LibCallTable[2][Is64Bit];
35633564
break;
3565+
case Intrinsic::pow:
3566+
LC = LibCallTable[3][Is64Bit];
3567+
break;
35643568
}
35653569

35663570
ArgListTy Args;

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 30 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -543,6 +543,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
543543
setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
544544
setOperationAction(ISD::FSQRT, MVT::f128, Expand);
545545
setOperationAction(ISD::FSUB, MVT::f128, LibCall);
546+
setOperationAction(ISD::FTAN, MVT::f128, Expand);
546547
setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
547548
setOperationAction(ISD::SETCC, MVT::f128, Custom);
548549
setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
@@ -727,14 +728,14 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
727728
setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Promote);
728729
}
729730

730-
for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI,
731-
ISD::FCOS, ISD::FSIN, ISD::FSINCOS,
732-
ISD::FEXP, ISD::FEXP2, ISD::FEXP10,
733-
ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
734-
ISD::STRICT_FREM,
735-
ISD::STRICT_FPOW, ISD::STRICT_FPOWI, ISD::STRICT_FCOS,
736-
ISD::STRICT_FSIN, ISD::STRICT_FEXP, ISD::STRICT_FEXP2,
737-
ISD::STRICT_FLOG, ISD::STRICT_FLOG2, ISD::STRICT_FLOG10}) {
731+
for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI,
732+
ISD::FCOS, ISD::FSIN, ISD::FSINCOS,
733+
ISD::FTAN, ISD::FEXP, ISD::FEXP2,
734+
ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
735+
ISD::FLOG10, ISD::STRICT_FREM, ISD::STRICT_FPOW,
736+
ISD::STRICT_FPOWI, ISD::STRICT_FCOS, ISD::STRICT_FSIN,
737+
ISD::STRICT_FEXP, ISD::STRICT_FEXP2, ISD::STRICT_FLOG,
738+
ISD::STRICT_FLOG2, ISD::STRICT_FLOG10}) {
738739
setOperationAction(Op, MVT::f16, Promote);
739740
setOperationAction(Op, MVT::v4f16, Expand);
740741
setOperationAction(Op, MVT::v8f16, Expand);
@@ -1171,26 +1172,27 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
11711172
if (Subtarget->isNeonAvailable()) {
11721173
// FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
11731174
// silliness like this:
1175+
// clang-format off
11741176
for (auto Op :
1175-
{ISD::SELECT, ISD::SELECT_CC,
1176-
ISD::BR_CC, ISD::FADD, ISD::FSUB,
1177-
ISD::FMUL, ISD::FDIV, ISD::FMA,
1178-
ISD::FNEG, ISD::FABS, ISD::FCEIL,
1179-
ISD::FSQRT, ISD::FFLOOR, ISD::FNEARBYINT,
1180-
ISD::FSIN, ISD::FCOS, ISD::FPOW,
1181-
ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
1182-
ISD::FEXP, ISD::FEXP2, ISD::FEXP10,
1183-
ISD::FRINT, ISD::FROUND, ISD::FROUNDEVEN,
1184-
ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM,
1185-
ISD::FMINIMUM, ISD::FMAXIMUM, ISD::STRICT_FADD,
1186-
ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
1187-
ISD::STRICT_FMA, ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR,
1188-
ISD::STRICT_FSQRT, ISD::STRICT_FRINT, ISD::STRICT_FNEARBYINT,
1189-
ISD::STRICT_FROUND, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN,
1190-
ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, ISD::STRICT_FMINIMUM,
1191-
ISD::STRICT_FMAXIMUM})
1177+
{ISD::SELECT, ISD::SELECT_CC,
1178+
ISD::BR_CC, ISD::FADD, ISD::FSUB,
1179+
ISD::FMUL, ISD::FDIV, ISD::FMA,
1180+
ISD::FNEG, ISD::FABS, ISD::FCEIL,
1181+
ISD::FSQRT, ISD::FFLOOR, ISD::FNEARBYINT,
1182+
ISD::FSIN, ISD::FCOS, ISD::FTAN,
1183+
ISD::FPOW, ISD::FLOG, ISD::FLOG2,
1184+
ISD::FLOG10, ISD::FEXP, ISD::FEXP2,
1185+
ISD::FEXP10, ISD::FRINT, ISD::FROUND,
1186+
ISD::FROUNDEVEN, ISD::FTRUNC, ISD::FMINNUM,
1187+
ISD::FMAXNUM, ISD::FMINIMUM, ISD::FMAXIMUM,
1188+
ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
1189+
ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FCEIL,
1190+
ISD::STRICT_FFLOOR, ISD::STRICT_FSQRT, ISD::STRICT_FRINT,
1191+
ISD::STRICT_FNEARBYINT, ISD::STRICT_FROUND, ISD::STRICT_FTRUNC,
1192+
ISD::STRICT_FROUNDEVEN, ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM,
1193+
ISD::STRICT_FMINIMUM, ISD::STRICT_FMAXIMUM})
11921194
setOperationAction(Op, MVT::v1f64, Expand);
1193-
1195+
// clang-format on
11941196
for (auto Op :
11951197
{ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::SINT_TO_FP, ISD::UINT_TO_FP,
11961198
ISD::FP_ROUND, ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT, ISD::MUL,
@@ -1622,6 +1624,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
16221624
setOperationAction(ISD::FCOS, VT, Expand);
16231625
setOperationAction(ISD::FSIN, VT, Expand);
16241626
setOperationAction(ISD::FSINCOS, VT, Expand);
1627+
setOperationAction(ISD::FTAN, VT, Expand);
16251628
setOperationAction(ISD::FEXP, VT, Expand);
16261629
setOperationAction(ISD::FEXP2, VT, Expand);
16271630
setOperationAction(ISD::FEXP10, VT, Expand);
@@ -1803,6 +1806,7 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
18031806
if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
18041807
setOperationAction(ISD::FSIN, VT, Expand);
18051808
setOperationAction(ISD::FCOS, VT, Expand);
1809+
setOperationAction(ISD::FTAN, VT, Expand);
18061810
setOperationAction(ISD::FPOW, VT, Expand);
18071811
setOperationAction(ISD::FLOG, VT, Expand);
18081812
setOperationAction(ISD::FLOG2, VT, Expand);

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -267,9 +267,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
267267
.libcallFor({{s64, s128}})
268268
.minScalarOrElt(1, MinFPScalar);
269269

270-
getActionDefinitionsBuilder(
271-
{G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2, G_FLOG10,
272-
G_FEXP, G_FEXP2, G_FEXP10})
270+
getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
271+
G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10})
273272
// We need a call for these, so we always need to scalarize.
274273
.scalarize(0)
275274
// Regardless of FP16 support, widen 16-bit elements to 32-bits.

llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2313,6 +2313,14 @@ define float @test_sin_f32(float %x) {
23132313
ret float %y
23142314
}
23152315

2316+
declare float @llvm.tan.f32(float)
2317+
define float @test_tan_f32(float %x) {
2318+
; CHECK-LABEL: name: test_tan_f32
2319+
; CHECK: %{{[0-9]+}}:_(s32) = G_FTAN %{{[0-9]+}}
2320+
%y = call float @llvm.tan.f32(float %x)
2321+
ret float %y
2322+
}
2323+
23162324
declare float @llvm.sqrt.f32(float)
23172325
define float @test_sqrt_f32(float %x) {
23182326
; CHECK-LABEL: name: test_sqrt_f32

0 commit comments

Comments
 (0)