Skip to content

Commit 1cea067

Browse files
committed

File tree

2 files changed

+319
-2
lines changed

2 files changed

+319
-2
lines changed

llvm/include/llvm/Analysis/VecFuncs.def

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1292,10 +1292,16 @@ TLI_DEFINE_VECFUNC("asinf", "amd_vrs4_asinf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
12921292
TLI_DEFINE_VECFUNC("asinf", "amd_vrs8_asinf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
12931293
TLI_DEFINE_VECFUNC("asinf", "amd_vrs16_asinf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
12941294

1295+
TLI_DEFINE_VECFUNC("llvm.asin.f64", "amd_vrd8_asin", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
1296+
TLI_DEFINE_VECFUNC("llvm.asin.f32", "amd_vrs4_asinf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
1297+
TLI_DEFINE_VECFUNC("llvm.asin.f32", "amd_vrs8_asinf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
1298+
TLI_DEFINE_VECFUNC("llvm.asin.f32", "amd_vrs16_asinf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
1299+
12951300

12961301
TLI_DEFINE_VECFUNC("acosf", "amd_vrs4_acosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
12971302
TLI_DEFINE_VECFUNC("acosf", "amd_vrs8_acosf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
12981303
TLI_DEFINE_VECFUNC("acosf", "amd_vrs16_acosf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
1304+
12991305
TLI_DEFINE_VECFUNC("llvm.acos.f32", "amd_vrs16_acosf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
13001306
TLI_DEFINE_VECFUNC("llvm.acos.f32", "amd_vrs8_acosf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
13011307
TLI_DEFINE_VECFUNC("llvm.acos.f32", "amd_vrs4_acosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
@@ -1308,9 +1314,21 @@ TLI_DEFINE_VECFUNC("atanf", "amd_vrs4_atanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
13081314
TLI_DEFINE_VECFUNC("atanf", "amd_vrs8_atanf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
13091315
TLI_DEFINE_VECFUNC("atanf", "amd_vrs16_atanf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
13101316

1317+
TLI_DEFINE_VECFUNC("llvm.atan.f64", "amd_vrd2_atan", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
1318+
TLI_DEFINE_VECFUNC("llvm.atan.f64", "amd_vrd4_atan", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
1319+
TLI_DEFINE_VECFUNC("llvm.atan.f64", "amd_vrd8_atan", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
1320+
TLI_DEFINE_VECFUNC("llvm.atan.f32", "amd_vrs4_atanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
1321+
TLI_DEFINE_VECFUNC("llvm.atan.f32", "amd_vrs8_atanf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
1322+
TLI_DEFINE_VECFUNC("llvm.atan.f32", "amd_vrs16_atanf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
1323+
1324+
TLI_DEFINE_VECFUNC("cosh", "amd_vrd2_cosh" , FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
13111325
TLI_DEFINE_VECFUNC("coshf", "amd_vrs4_coshf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
13121326
TLI_DEFINE_VECFUNC("coshf", "amd_vrs8_coshf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
13131327

1328+
TLI_DEFINE_VECFUNC("llvm.cosh.f64", "amd_vrd2_cosh" , FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
1329+
TLI_DEFINE_VECFUNC("llvm.cosh.f32", "amd_vrs4_coshf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
1330+
TLI_DEFINE_VECFUNC("llvm.cosh.f32", "amd_vrs8_coshf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
1331+
13141332
TLI_DEFINE_VECFUNC("tanhf", "amd_vrs4_tanhf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
13151333
TLI_DEFINE_VECFUNC("tanhf", "amd_vrs8_tanhf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
13161334

llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll

Lines changed: 301 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -mattr=avx -S < %s | FileCheck %s
2+
; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -mattr=avx -S < %s | FileCheck %s --check-prefix=CHECK-AVX-VF2
23
; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -mattr=+avx512f -S < %s | FileCheck %s --check-prefix=CHECK-AVX512-VF8
34
; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -force-vector-width=16 -force-vector-interleave=1 -mattr=+avx512f -S < %s | FileCheck %s --check-prefix=CHECK-AVX512-VF16
45

@@ -20,9 +21,7 @@ declare float @tanf(float) #0
2021
declare double @llvm.tan.f64(double) #0
2122
declare float @llvm.tan.f32(float) #0
2223

23-
declare double @acos(double) #0
2424
declare float @acosf(float) #0
25-
declare double @llvm.acos.f64(double) #0
2625
declare float @llvm.acos.f32(float) #0
2726

2827
declare double @asin(double) #0
@@ -461,6 +460,306 @@ for.end:
461460
ret void
462461
}
463462

463+
define void @asin_f64(ptr nocapture %varray) {
464+
; CHECK-AVX512-VF8-LABEL: @asin_f64(
465+
; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_asin(<8 x double> [[TMP4:%.*]])
466+
; CHECK-AVX512-VF8: ret void
467+
;
468+
entry:
469+
br label %for.body
470+
471+
for.body:
472+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
473+
%tmp = trunc i64 %iv to i32
474+
%conv = sitofp i32 %tmp to double
475+
%call = tail call double @asin(double %conv)
476+
%arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
477+
store double %call, ptr %arrayidx, align 4
478+
%iv.next = add nuw nsw i64 %iv, 1
479+
%exitcond = icmp eq i64 %iv.next, 1000
480+
br i1 %exitcond, label %for.end, label %for.body
481+
482+
for.end:
483+
ret void
484+
}
485+
486+
define void @asin_f32(ptr nocapture %varray) {
487+
; CHECK-LABEL: @asin_f32(
488+
; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_asinf(<4 x float> [[TMP4:%.*]])
489+
; CHECK: ret void
490+
;
491+
; CHECK-AVX512-VF16-LABEL: @asin_f32(
492+
; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_asinf(<16 x float> [[TMP4:%.*]])
493+
; CHECK-AVX512-VF16: ret void
494+
;
495+
entry:
496+
br label %for.body
497+
498+
for.body:
499+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
500+
%tmp = trunc i64 %iv to i32
501+
%conv = sitofp i32 %tmp to float
502+
%call = tail call float @asinf(float %conv)
503+
%arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
504+
store float %call, ptr %arrayidx, align 4
505+
%iv.next = add nuw nsw i64 %iv, 1
506+
%exitcond = icmp eq i64 %iv.next, 1000
507+
br i1 %exitcond, label %for.end, label %for.body
508+
509+
for.end:
510+
ret void
511+
}
512+
513+
define void @asin_f64_intrinsic(ptr nocapture %varray) {
514+
; CHECK-AVX512-VF8-LABEL: @asin_f64_intrinsic(
515+
; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_asin(<8 x double> [[TMP4:%.*]])
516+
; CHECK-AVX512-VF8: ret void
517+
;
518+
entry:
519+
br label %for.body
520+
521+
for.body:
522+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
523+
%tmp = trunc i64 %iv to i32
524+
%conv = sitofp i32 %tmp to double
525+
%call = tail call double @llvm.asin.f64(double %conv)
526+
%arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
527+
store double %call, ptr %arrayidx, align 4
528+
%iv.next = add nuw nsw i64 %iv, 1
529+
%exitcond = icmp eq i64 %iv.next, 1000
530+
br i1 %exitcond, label %for.end, label %for.body
531+
532+
for.end:
533+
ret void
534+
}
535+
536+
define void @asin_f32_intrinsic(ptr nocapture %varray) {
537+
; CHECK-LABEL: @asin_f32_intrinsic(
538+
; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_asinf(<4 x float> [[TMP4:%.*]])
539+
; CHECK: ret void
540+
;
541+
; CHECK-AVX512-VF16-LABEL: @asin_f32_intrinsic(
542+
; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_asinf(<16 x float> [[TMP4:%.*]])
543+
; CHECK-AVX512-VF16: ret void
544+
;
545+
entry:
546+
br label %for.body
547+
548+
for.body:
549+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
550+
%tmp = trunc i64 %iv to i32
551+
%conv = sitofp i32 %tmp to float
552+
%call = tail call float @llvm.asin.f32(float %conv)
553+
%arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
554+
store float %call, ptr %arrayidx, align 4
555+
%iv.next = add nuw nsw i64 %iv, 1
556+
%exitcond = icmp eq i64 %iv.next, 1000
557+
br i1 %exitcond, label %for.end, label %for.body
558+
559+
for.end:
560+
ret void
561+
}
562+
563+
define void @atan_f64(ptr nocapture %varray) {
564+
; CHECK-LABEL: @atan_f64(
565+
; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_atan(<4 x double> [[TMP4:%.*]])
566+
; CHECK: ret void
567+
;
568+
; CHECK-AVX512-VF8-LABEL: @atan_f64(
569+
; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_atan(<8 x double> [[TMP4:%.*]])
570+
; CHECK-AVX512-VF8: ret void
571+
;
572+
entry:
573+
br label %for.body
574+
575+
for.body:
576+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
577+
%tmp = trunc i64 %iv to i32
578+
%conv = sitofp i32 %tmp to double
579+
%call = tail call double @atan(double %conv)
580+
%arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
581+
store double %call, ptr %arrayidx, align 4
582+
%iv.next = add nuw nsw i64 %iv, 1
583+
%exitcond = icmp eq i64 %iv.next, 1000
584+
br i1 %exitcond, label %for.end, label %for.body
585+
586+
for.end:
587+
ret void
588+
}
589+
590+
define void @atan_f32(ptr nocapture %varray) {
591+
; CHECK-LABEL: @atan_f32(
592+
; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_atanf(<4 x float> [[TMP4:%.*]])
593+
; CHECK: ret void
594+
;
595+
; CHECK-AVX512-VF16-LABEL: @atan_f32(
596+
; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_atanf(<16 x float> [[TMP4:%.*]])
597+
; CHECK-AVX512-VF16: ret void
598+
;
599+
entry:
600+
br label %for.body
601+
602+
for.body:
603+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
604+
%tmp = trunc i64 %iv to i32
605+
%conv = sitofp i32 %tmp to float
606+
%call = tail call float @atanf(float %conv)
607+
%arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
608+
store float %call, ptr %arrayidx, align 4
609+
%iv.next = add nuw nsw i64 %iv, 1
610+
%exitcond = icmp eq i64 %iv.next, 1000
611+
br i1 %exitcond, label %for.end, label %for.body
612+
613+
for.end:
614+
ret void
615+
}
616+
617+
define void @atan_f64_intrinsic(ptr nocapture %varray) {
618+
; CHECK-LABEL: @atan_f64_intrinsic(
619+
; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_atan(<4 x double> [[TMP4:%.*]])
620+
; CHECK: ret void
621+
;
622+
; CHECK-AVX512-VF8-LABEL: @atan_f64_intrinsic(
623+
; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_atan(<8 x double> [[TMP4:%.*]])
624+
; CHECK-AVX512-VF8: ret void
625+
;
626+
entry:
627+
br label %for.body
628+
629+
for.body:
630+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
631+
%tmp = trunc i64 %iv to i32
632+
%conv = sitofp i32 %tmp to double
633+
%call = tail call double @llvm.atan.f64(double %conv)
634+
%arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
635+
store double %call, ptr %arrayidx, align 4
636+
%iv.next = add nuw nsw i64 %iv, 1
637+
%exitcond = icmp eq i64 %iv.next, 1000
638+
br i1 %exitcond, label %for.end, label %for.body
639+
640+
for.end:
641+
ret void
642+
}
643+
644+
define void @atan_f32_intrinsic(ptr nocapture %varray) {
645+
; CHECK-LABEL: @atan_f32_intrinsic(
646+
; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_atanf(<4 x float> [[TMP4:%.*]])
647+
; CHECK: ret void
648+
;
649+
; CHECK-AVX512-VF16-LABEL: @atan_f32_intrinsic(
650+
; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_atanf(<16 x float> [[TMP4:%.*]])
651+
; CHECK-AVX512-VF16: ret void
652+
;
653+
entry:
654+
br label %for.body
655+
656+
for.body:
657+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
658+
%tmp = trunc i64 %iv to i32
659+
%conv = sitofp i32 %tmp to float
660+
%call = tail call float @llvm.atan.f32(float %conv)
661+
%arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
662+
store float %call, ptr %arrayidx, align 4
663+
%iv.next = add nuw nsw i64 %iv, 1
664+
%exitcond = icmp eq i64 %iv.next, 1000
665+
br i1 %exitcond, label %for.end, label %for.body
666+
667+
for.end:
668+
ret void
669+
}
670+
671+
define void @cosh_f64(ptr nocapture %varray) {
672+
; CHECK-AVX-VF2-LABEL: @cosh_f64(
673+
; CHECK-AVX-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cosh(<2 x double> [[TMP4:%.*]])
674+
; CHECK-AVX-VF2: ret void
675+
;
676+
entry:
677+
br label %for.body
678+
679+
for.body:
680+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
681+
%tmp = trunc i64 %iv to i32
682+
%conv = sitofp i32 %tmp to double
683+
%call = tail call double @cosh(double %conv)
684+
%arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
685+
store double %call, ptr %arrayidx, align 4
686+
%iv.next = add nuw nsw i64 %iv, 1
687+
%exitcond = icmp eq i64 %iv.next, 1000
688+
br i1 %exitcond, label %for.end, label %for.body
689+
690+
for.end:
691+
ret void
692+
}
693+
694+
define void @cosh_f32(ptr nocapture %varray) {
695+
; CHECK-LABEL: @cosh_f32(
696+
; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_coshf(<4 x float> [[TMP4:%.*]])
697+
; CHECK: ret void
698+
;
699+
entry:
700+
br label %for.body
701+
702+
for.body:
703+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
704+
%tmp = trunc i64 %iv to i32
705+
%conv = sitofp i32 %tmp to float
706+
%call = tail call float @coshf(float %conv)
707+
%arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
708+
store float %call, ptr %arrayidx, align 4
709+
%iv.next = add nuw nsw i64 %iv, 1
710+
%exitcond = icmp eq i64 %iv.next, 1000
711+
br i1 %exitcond, label %for.end, label %for.body
712+
713+
for.end:
714+
ret void
715+
}
716+
717+
define void @cosh_f64_intrinsic(ptr nocapture %varray) {
718+
; CHECK-AVX-VF2-LABEL: @cosh_f64_intrinsic(
719+
; CHECK-AVX-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cosh(<2 x double> [[TMP4:%.*]])
720+
; CHECK-AVX-VF2: ret void
721+
;
722+
entry:
723+
br label %for.body
724+
725+
for.body:
726+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
727+
%tmp = trunc i64 %iv to i32
728+
%conv = sitofp i32 %tmp to double
729+
%call = tail call double @llvm.cosh.f64(double %conv)
730+
%arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
731+
store double %call, ptr %arrayidx, align 4
732+
%iv.next = add nuw nsw i64 %iv, 1
733+
%exitcond = icmp eq i64 %iv.next, 1000
734+
br i1 %exitcond, label %for.end, label %for.body
735+
736+
for.end:
737+
ret void
738+
}
739+
740+
define void @cosh_f32_intrinsic(ptr nocapture %varray) {
741+
; CHECK-LABEL: @cosh_f32_intrinsic(
742+
; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_coshf(<4 x float> [[TMP4:%.*]])
743+
; CHECK: ret void
744+
;
745+
entry:
746+
br label %for.body
747+
748+
for.body:
749+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
750+
%tmp = trunc i64 %iv to i32
751+
%conv = sitofp i32 %tmp to float
752+
%call = tail call float @llvm.cosh.f32(float %conv)
753+
%arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
754+
store float %call, ptr %arrayidx, align 4
755+
%iv.next = add nuw nsw i64 %iv, 1
756+
%exitcond = icmp eq i64 %iv.next, 1000
757+
br i1 %exitcond, label %for.end, label %for.body
758+
759+
for.end:
760+
ret void
761+
}
762+
464763
define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) {
465764
; CHECK-LABEL: @pow_f64(
466765
; CHECK: [[TMP8:%.*]] = call <4 x double> @amd_vrd4_pow(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])

0 commit comments

Comments
 (0)