Skip to content

Commit bc94a99

Browse files
committed
add amd_vrs16_tanhf support. remove sinh test declares as amdlibm doesn't support it
https://github.com/amd/aocl-libm-ose/blob/9c0b67293ba01e509a6308247d82a8f1adfbbc67/scripts/libalm.def#L224
1 parent 1cea067 commit bc94a99

File tree

2 files changed

+75
-7
lines changed

2 files changed

+75
-7
lines changed

llvm/include/llvm/Analysis/VecFuncs.def

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1331,6 +1331,11 @@ TLI_DEFINE_VECFUNC("llvm.cosh.f32", "amd_vrs8_coshf", FIXED(8), NOMASK, "_ZGV_LL
13311331

13321332
TLI_DEFINE_VECFUNC("tanhf", "amd_vrs4_tanhf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
13331333
TLI_DEFINE_VECFUNC("tanhf", "amd_vrs8_tanhf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
1334+
TLI_DEFINE_VECFUNC("tanhf", "amd_vrs16_tanhf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
1335+
1336+
TLI_DEFINE_VECFUNC("llvm.tanh.f32", "amd_vrs4_tanhf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
1337+
TLI_DEFINE_VECFUNC("llvm.tanh.f32", "amd_vrs8_tanhf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
1338+
TLI_DEFINE_VECFUNC("llvm.tanh.f32", "amd_vrs16_tanhf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
13341339

13351340
TLI_DEFINE_VECFUNC("cbrt", "amd_vrd2_cbrt", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
13361341
TLI_DEFINE_VECFUNC("cbrtf", "amd_vrs4_cbrtf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")

llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll

Lines changed: 70 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,7 @@ declare float @coshf(float) #0
3939
declare double @llvm.cosh.f64(double) #0
4040
declare float @llvm.cosh.f32(float) #0
4141

42-
declare double @sinh(double) #0
43-
declare float @sinhf(float) #0
44-
declare double @llvm.sinh.f64(double) #0
45-
declare float @llvm.sinh.f32(float) #0
46-
47-
declare double @tanh(double) #0
4842
declare float @tanhf(float) #0
49-
declare double @llvm.tanh.f64(double) #0
5043
declare float @llvm.tanh.f32(float) #0
5144

5245
declare double @pow(double, double) #0
@@ -303,6 +296,10 @@ define void @tan_f64(ptr nocapture %varray) {
303296
; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_tan(<4 x double> [[TMP4:%.*]])
304297
; CHECK: ret void
305298
;
299+
; CHECK-AVX-VF2-LABEL: @tan_f64(
300+
; CHECK-AVX-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_tan(<2 x double> [[TMP4:%.*]])
301+
; CHECK-AVX-VF2: ret void
302+
;
306303
; CHECK-AVX512-VF8-LABEL: @tan_f64(
307304
; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_tan(<8 x double> [[TMP4:%.*]])
308305
; CHECK-AVX512-VF8: ret void
@@ -357,6 +354,10 @@ define void @tan_f64_intrinsic(ptr nocapture %varray) {
357354
; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_tan(<4 x double> [[TMP4:%.*]])
358355
; CHECK: ret void
359356
;
357+
; CHECK-AVX-VF2-LABEL: @tan_f64_intrinsic(
358+
; CHECK-AVX-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_tan(<2 x double> [[TMP4:%.*]])
359+
; CHECK-AVX-VF2: ret void
360+
;
360361
; CHECK-AVX512-VF8-LABEL: @tan_f64_intrinsic(
361362
; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_tan(<8 x double> [[TMP4:%.*]])
362363
; CHECK-AVX512-VF8: ret void
@@ -565,6 +566,10 @@ define void @atan_f64(ptr nocapture %varray) {
565566
; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_atan(<4 x double> [[TMP4:%.*]])
566567
; CHECK: ret void
567568
;
569+
; CHECK-AVX-VF2-LABEL: @atan_f64(
570+
; CHECK-AVX-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_atan(<2 x double> [[TMP4:%.*]])
571+
; CHECK-AVX-VF2: ret void
572+
;
568573
; CHECK-AVX512-VF8-LABEL: @atan_f64(
569574
; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_atan(<8 x double> [[TMP4:%.*]])
570575
; CHECK-AVX512-VF8: ret void
@@ -619,6 +624,10 @@ define void @atan_f64_intrinsic(ptr nocapture %varray) {
619624
; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_atan(<4 x double> [[TMP4:%.*]])
620625
; CHECK: ret void
621626
;
627+
; CHECK-AVX-VF2-LABEL: @atan_f64_intrinsic(
628+
; CHECK-AVX-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_atan(<2 x double> [[TMP4:%.*]])
629+
; CHECK-AVX-VF2: ret void
630+
;
622631
; CHECK-AVX512-VF8-LABEL: @atan_f64_intrinsic(
623632
; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_atan(<8 x double> [[TMP4:%.*]])
624633
; CHECK-AVX512-VF8: ret void
@@ -760,6 +769,60 @@ for.end:
760769
ret void
761770
}
762771

772+
define void @tanh_f32(ptr nocapture %varray) {
773+
; CHECK-LABEL: @tanh_f32(
774+
; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanhf(<4 x float> [[TMP4:%.*]])
775+
; CHECK: ret void
776+
;
777+
; CHECK-AVX512-VF16-LABEL: @tanh_f32(
778+
; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanhf(<16 x float> [[TMP4:%.*]])
779+
; CHECK-AVX512-VF16: ret void
780+
;
781+
entry:
782+
br label %for.body
783+
784+
for.body:
785+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
786+
%tmp = trunc i64 %iv to i32
787+
%conv = sitofp i32 %tmp to float
788+
%call = tail call float @tanhf(float %conv)
789+
%arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
790+
store float %call, ptr %arrayidx, align 4
791+
%iv.next = add nuw nsw i64 %iv, 1
792+
%exitcond = icmp eq i64 %iv.next, 1000
793+
br i1 %exitcond, label %for.end, label %for.body
794+
795+
for.end:
796+
ret void
797+
}
798+
799+
define void @tanh_f32_intrinsic(ptr nocapture %varray) {
800+
; CHECK-LABEL: @tanh_f32_intrinsic(
801+
; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanhf(<4 x float> [[TMP4:%.*]])
802+
; CHECK: ret void
803+
;
804+
; CHECK-AVX512-VF16-LABEL: @tanh_f32_intrinsic(
805+
; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanhf(<16 x float> [[TMP4:%.*]])
806+
; CHECK-AVX512-VF16: ret void
807+
;
808+
entry:
809+
br label %for.body
810+
811+
for.body:
812+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
813+
%tmp = trunc i64 %iv to i32
814+
%conv = sitofp i32 %tmp to float
815+
%call = tail call float @llvm.tanh.f32(float %conv)
816+
%arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
817+
store float %call, ptr %arrayidx, align 4
818+
%iv.next = add nuw nsw i64 %iv, 1
819+
%exitcond = icmp eq i64 %iv.next, 1000
820+
br i1 %exitcond, label %for.end, label %for.body
821+
822+
for.end:
823+
ret void
824+
}
825+
763826
define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) {
764827
; CHECK-LABEL: @pow_f64(
765828
; CHECK: [[TMP8:%.*]] = call <4 x double> @amd_vrd4_pow(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])

0 commit comments

Comments
 (0)