1
1
; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -mattr=avx -S < %s | FileCheck %s
2
+ ; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -mattr=avx -S < %s | FileCheck %s --check-prefix=CHECK-AVX-VF2
2
3
; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -mattr=+avx512f -S < %s | FileCheck %s --check-prefix=CHECK-AVX512-VF8
3
4
; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -force-vector-width=16 -force-vector-interleave=1 -mattr=+avx512f -S < %s | FileCheck %s --check-prefix=CHECK-AVX512-VF16
4
5
@@ -20,9 +21,7 @@ declare float @tanf(float) #0
20
21
declare double @llvm.tan.f64 (double ) #0
21
22
declare float @llvm.tan.f32 (float ) #0
22
23
23
- declare double @acos (double ) #0
24
24
declare float @acosf (float ) #0
25
- declare double @llvm.acos.f64 (double ) #0
26
25
declare float @llvm.acos.f32 (float ) #0
27
26
28
27
declare double @asin (double ) #0
@@ -461,6 +460,306 @@ for.end:
461
460
ret void
462
461
}
463
462
463
+ define void @asin_f64 (ptr nocapture %varray ) {
464
+ ; CHECK-AVX512-VF8-LABEL: @asin_f64(
465
+ ; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_asin(<8 x double> [[TMP4:%.*]])
466
+ ; CHECK-AVX512-VF8: ret void
467
+ ;
468
+ entry:
469
+ br label %for.body
470
+
471
+ for.body:
472
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
473
+ %tmp = trunc i64 %iv to i32
474
+ %conv = sitofp i32 %tmp to double
475
+ %call = tail call double @asin (double %conv )
476
+ %arrayidx = getelementptr inbounds double , ptr %varray , i64 %iv
477
+ store double %call , ptr %arrayidx , align 4
478
+ %iv.next = add nuw nsw i64 %iv , 1
479
+ %exitcond = icmp eq i64 %iv.next , 1000
480
+ br i1 %exitcond , label %for.end , label %for.body
481
+
482
+ for.end:
483
+ ret void
484
+ }
485
+
486
+ define void @asin_f32 (ptr nocapture %varray ) {
487
+ ; CHECK-LABEL: @asin_f32(
488
+ ; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_asinf(<4 x float> [[TMP4:%.*]])
489
+ ; CHECK: ret void
490
+ ;
491
+ ; CHECK-AVX512-VF16-LABEL: @asin_f32(
492
+ ; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_asinf(<16 x float> [[TMP4:%.*]])
493
+ ; CHECK-AVX512-VF16: ret void
494
+ ;
495
+ entry:
496
+ br label %for.body
497
+
498
+ for.body:
499
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
500
+ %tmp = trunc i64 %iv to i32
501
+ %conv = sitofp i32 %tmp to float
502
+ %call = tail call float @asinf (float %conv )
503
+ %arrayidx = getelementptr inbounds float , ptr %varray , i64 %iv
504
+ store float %call , ptr %arrayidx , align 4
505
+ %iv.next = add nuw nsw i64 %iv , 1
506
+ %exitcond = icmp eq i64 %iv.next , 1000
507
+ br i1 %exitcond , label %for.end , label %for.body
508
+
509
+ for.end:
510
+ ret void
511
+ }
512
+
513
+ define void @asin_f64_intrinsic (ptr nocapture %varray ) {
514
+ ; CHECK-AVX512-VF8-LABEL: @asin_f64_intrinsic(
515
+ ; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_asin(<8 x double> [[TMP4:%.*]])
516
+ ; CHECK-AVX512-VF8: ret void
517
+ ;
518
+ entry:
519
+ br label %for.body
520
+
521
+ for.body:
522
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
523
+ %tmp = trunc i64 %iv to i32
524
+ %conv = sitofp i32 %tmp to double
525
+ %call = tail call double @llvm.asin.f64 (double %conv )
526
+ %arrayidx = getelementptr inbounds double , ptr %varray , i64 %iv
527
+ store double %call , ptr %arrayidx , align 4
528
+ %iv.next = add nuw nsw i64 %iv , 1
529
+ %exitcond = icmp eq i64 %iv.next , 1000
530
+ br i1 %exitcond , label %for.end , label %for.body
531
+
532
+ for.end:
533
+ ret void
534
+ }
535
+
536
+ define void @asin_f32_intrinsic (ptr nocapture %varray ) {
537
+ ; CHECK-LABEL: @asin_f32_intrinsic(
538
+ ; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_asinf(<4 x float> [[TMP4:%.*]])
539
+ ; CHECK: ret void
540
+ ;
541
+ ; CHECK-AVX512-VF16-LABEL: @asin_f32_intrinsic(
542
+ ; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_asinf(<16 x float> [[TMP4:%.*]])
543
+ ; CHECK-AVX512-VF16: ret void
544
+ ;
545
+ entry:
546
+ br label %for.body
547
+
548
+ for.body:
549
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
550
+ %tmp = trunc i64 %iv to i32
551
+ %conv = sitofp i32 %tmp to float
552
+ %call = tail call float @llvm.asin.f32 (float %conv )
553
+ %arrayidx = getelementptr inbounds float , ptr %varray , i64 %iv
554
+ store float %call , ptr %arrayidx , align 4
555
+ %iv.next = add nuw nsw i64 %iv , 1
556
+ %exitcond = icmp eq i64 %iv.next , 1000
557
+ br i1 %exitcond , label %for.end , label %for.body
558
+
559
+ for.end:
560
+ ret void
561
+ }
562
+
563
+ define void @atan_f64 (ptr nocapture %varray ) {
564
+ ; CHECK-LABEL: @atan_f64(
565
+ ; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_atan(<4 x double> [[TMP4:%.*]])
566
+ ; CHECK: ret void
567
+ ;
568
+ ; CHECK-AVX512-VF8-LABEL: @atan_f64(
569
+ ; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_atan(<8 x double> [[TMP4:%.*]])
570
+ ; CHECK-AVX512-VF8: ret void
571
+ ;
572
+ entry:
573
+ br label %for.body
574
+
575
+ for.body:
576
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
577
+ %tmp = trunc i64 %iv to i32
578
+ %conv = sitofp i32 %tmp to double
579
+ %call = tail call double @atan (double %conv )
580
+ %arrayidx = getelementptr inbounds double , ptr %varray , i64 %iv
581
+ store double %call , ptr %arrayidx , align 4
582
+ %iv.next = add nuw nsw i64 %iv , 1
583
+ %exitcond = icmp eq i64 %iv.next , 1000
584
+ br i1 %exitcond , label %for.end , label %for.body
585
+
586
+ for.end:
587
+ ret void
588
+ }
589
+
590
+ define void @atan_f32 (ptr nocapture %varray ) {
591
+ ; CHECK-LABEL: @atan_f32(
592
+ ; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_atanf(<4 x float> [[TMP4:%.*]])
593
+ ; CHECK: ret void
594
+ ;
595
+ ; CHECK-AVX512-VF16-LABEL: @atan_f32(
596
+ ; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_atanf(<16 x float> [[TMP4:%.*]])
597
+ ; CHECK-AVX512-VF16: ret void
598
+ ;
599
+ entry:
600
+ br label %for.body
601
+
602
+ for.body:
603
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
604
+ %tmp = trunc i64 %iv to i32
605
+ %conv = sitofp i32 %tmp to float
606
+ %call = tail call float @atanf (float %conv )
607
+ %arrayidx = getelementptr inbounds float , ptr %varray , i64 %iv
608
+ store float %call , ptr %arrayidx , align 4
609
+ %iv.next = add nuw nsw i64 %iv , 1
610
+ %exitcond = icmp eq i64 %iv.next , 1000
611
+ br i1 %exitcond , label %for.end , label %for.body
612
+
613
+ for.end:
614
+ ret void
615
+ }
616
+
617
+ define void @atan_f64_intrinsic (ptr nocapture %varray ) {
618
+ ; CHECK-LABEL: @atan_f64_intrinsic(
619
+ ; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_atan(<4 x double> [[TMP4:%.*]])
620
+ ; CHECK: ret void
621
+ ;
622
+ ; CHECK-AVX512-VF8-LABEL: @atan_f64_intrinsic(
623
+ ; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_atan(<8 x double> [[TMP4:%.*]])
624
+ ; CHECK-AVX512-VF8: ret void
625
+ ;
626
+ entry:
627
+ br label %for.body
628
+
629
+ for.body:
630
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
631
+ %tmp = trunc i64 %iv to i32
632
+ %conv = sitofp i32 %tmp to double
633
+ %call = tail call double @llvm.atan.f64 (double %conv )
634
+ %arrayidx = getelementptr inbounds double , ptr %varray , i64 %iv
635
+ store double %call , ptr %arrayidx , align 4
636
+ %iv.next = add nuw nsw i64 %iv , 1
637
+ %exitcond = icmp eq i64 %iv.next , 1000
638
+ br i1 %exitcond , label %for.end , label %for.body
639
+
640
+ for.end:
641
+ ret void
642
+ }
643
+
644
+ define void @atan_f32_intrinsic (ptr nocapture %varray ) {
645
+ ; CHECK-LABEL: @atan_f32_intrinsic(
646
+ ; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_atanf(<4 x float> [[TMP4:%.*]])
647
+ ; CHECK: ret void
648
+ ;
649
+ ; CHECK-AVX512-VF16-LABEL: @atan_f32_intrinsic(
650
+ ; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_atanf(<16 x float> [[TMP4:%.*]])
651
+ ; CHECK-AVX512-VF16: ret void
652
+ ;
653
+ entry:
654
+ br label %for.body
655
+
656
+ for.body:
657
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
658
+ %tmp = trunc i64 %iv to i32
659
+ %conv = sitofp i32 %tmp to float
660
+ %call = tail call float @llvm.atan.f32 (float %conv )
661
+ %arrayidx = getelementptr inbounds float , ptr %varray , i64 %iv
662
+ store float %call , ptr %arrayidx , align 4
663
+ %iv.next = add nuw nsw i64 %iv , 1
664
+ %exitcond = icmp eq i64 %iv.next , 1000
665
+ br i1 %exitcond , label %for.end , label %for.body
666
+
667
+ for.end:
668
+ ret void
669
+ }
670
+
671
+ define void @cosh_f64 (ptr nocapture %varray ) {
672
+ ; CHECK-AVX-VF2-LABEL: @cosh_f64(
673
+ ; CHECK-AVX-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cosh(<2 x double> [[TMP4:%.*]])
674
+ ; CHECK-AVX-VF2: ret void
675
+ ;
676
+ entry:
677
+ br label %for.body
678
+
679
+ for.body:
680
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
681
+ %tmp = trunc i64 %iv to i32
682
+ %conv = sitofp i32 %tmp to double
683
+ %call = tail call double @cosh (double %conv )
684
+ %arrayidx = getelementptr inbounds double , ptr %varray , i64 %iv
685
+ store double %call , ptr %arrayidx , align 4
686
+ %iv.next = add nuw nsw i64 %iv , 1
687
+ %exitcond = icmp eq i64 %iv.next , 1000
688
+ br i1 %exitcond , label %for.end , label %for.body
689
+
690
+ for.end:
691
+ ret void
692
+ }
693
+
694
+ define void @cosh_f32 (ptr nocapture %varray ) {
695
+ ; CHECK-LABEL: @cosh_f32(
696
+ ; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_coshf(<4 x float> [[TMP4:%.*]])
697
+ ; CHECK: ret void
698
+ ;
699
+ entry:
700
+ br label %for.body
701
+
702
+ for.body:
703
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
704
+ %tmp = trunc i64 %iv to i32
705
+ %conv = sitofp i32 %tmp to float
706
+ %call = tail call float @coshf (float %conv )
707
+ %arrayidx = getelementptr inbounds float , ptr %varray , i64 %iv
708
+ store float %call , ptr %arrayidx , align 4
709
+ %iv.next = add nuw nsw i64 %iv , 1
710
+ %exitcond = icmp eq i64 %iv.next , 1000
711
+ br i1 %exitcond , label %for.end , label %for.body
712
+
713
+ for.end:
714
+ ret void
715
+ }
716
+
717
+ define void @cosh_f64_intrinsic (ptr nocapture %varray ) {
718
+ ; CHECK-AVX-VF2-LABEL: @cosh_f64_intrinsic(
719
+ ; CHECK-AVX-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cosh(<2 x double> [[TMP4:%.*]])
720
+ ; CHECK-AVX-VF2: ret void
721
+ ;
722
+ entry:
723
+ br label %for.body
724
+
725
+ for.body:
726
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
727
+ %tmp = trunc i64 %iv to i32
728
+ %conv = sitofp i32 %tmp to double
729
+ %call = tail call double @llvm.cosh.f64 (double %conv )
730
+ %arrayidx = getelementptr inbounds double , ptr %varray , i64 %iv
731
+ store double %call , ptr %arrayidx , align 4
732
+ %iv.next = add nuw nsw i64 %iv , 1
733
+ %exitcond = icmp eq i64 %iv.next , 1000
734
+ br i1 %exitcond , label %for.end , label %for.body
735
+
736
+ for.end:
737
+ ret void
738
+ }
739
+
740
+ define void @cosh_f32_intrinsic (ptr nocapture %varray ) {
741
+ ; CHECK-LABEL: @cosh_f32_intrinsic(
742
+ ; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_coshf(<4 x float> [[TMP4:%.*]])
743
+ ; CHECK: ret void
744
+ ;
745
+ entry:
746
+ br label %for.body
747
+
748
+ for.body:
749
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
750
+ %tmp = trunc i64 %iv to i32
751
+ %conv = sitofp i32 %tmp to float
752
+ %call = tail call float @llvm.cosh.f32 (float %conv )
753
+ %arrayidx = getelementptr inbounds float , ptr %varray , i64 %iv
754
+ store float %call , ptr %arrayidx , align 4
755
+ %iv.next = add nuw nsw i64 %iv , 1
756
+ %exitcond = icmp eq i64 %iv.next , 1000
757
+ br i1 %exitcond , label %for.end , label %for.body
758
+
759
+ for.end:
760
+ ret void
761
+ }
762
+
464
763
define void @pow_f64 (ptr nocapture %varray , ptr nocapture readonly %exp ) {
465
764
; CHECK-LABEL: @pow_f64(
466
765
; CHECK: [[TMP8:%.*]] = call <4 x double> @amd_vrd4_pow(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
0 commit comments