1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
- ; RUN: llc --mtriple=loongarch32 --mattr=+d --verify-machineinstrs < %s \
2
+ ; RUN: llc --mtriple=loongarch32 --mattr=+lasx --verify-machineinstrs < %s \
3
3
; RUN: | FileCheck --check-prefix=LA32 %s
4
- ; RUN: llc --mtriple=loongarch64 --mattr=+d --verify-machineinstrs < %s \
4
+ ; RUN: llc --mtriple=loongarch64 --mattr=+lasx --verify-machineinstrs < %s \
5
5
; RUN: | FileCheck --check-prefix=LA64 %s
6
- ; RUN: llc --mtriple=loongarch64 --mattr=+d --verify-machineinstrs \
6
+ ; RUN: llc --mtriple=loongarch64 --mattr=+lasx --verify-machineinstrs \
7
7
; RUN: --code-model=large < %s | FileCheck --check-prefix=LA64-LARGE %s
8
8
9
9
@g_i8 = dso_local global i8 0
@@ -405,10 +405,7 @@ define dso_local void @store_f64() nounwind {
405
405
; LA32-LABEL: store_f64:
406
406
; LA32: # %bb.0: # %entry
407
407
; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_f64)
408
- ; LA32-NEXT: addi.w $a1, $zero, 1
409
- ; LA32-NEXT: movgr2fr.w $fa0, $a1
410
- ; LA32-NEXT: ffint.s.w $fa0, $fa0
411
- ; LA32-NEXT: fcvt.d.s $fa0, $fa0
408
+ ; LA32-NEXT: vldi $vr0, -912
412
409
; LA32-NEXT: fst.d $fa0, $a0, %pc_lo12(g_f64)
413
410
; LA32-NEXT: ret
414
411
;
@@ -538,6 +535,184 @@ entry:
538
535
ret void
539
536
}
540
537
538
+ @g_i32x4_src = dso_local global [4 x i32 ] zeroinitializer , align 16
539
+ @g_i32x4_dst = dso_local global [4 x i32 ] zeroinitializer , align 16
540
+
541
+ define dso_local void @copy_i32x4 () nounwind {
542
+ ; LA32-LABEL: copy_i32x4:
543
+ ; LA32: # %bb.0: # %entry
544
+ ; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_src)
545
+ ; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(g_i32x4_src)
546
+ ; LA32-NEXT: vld $vr0, $a0, 0
547
+ ; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_dst)
548
+ ; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(g_i32x4_dst)
549
+ ; LA32-NEXT: vst $vr0, $a0, 0
550
+ ; LA32-NEXT: ret
551
+ ;
552
+ ; LA64-LABEL: copy_i32x4:
553
+ ; LA64: # %bb.0: # %entry
554
+ ; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_src)
555
+ ; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(g_i32x4_src)
556
+ ; LA64-NEXT: vld $vr0, $a0, 0
557
+ ; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_dst)
558
+ ; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(g_i32x4_dst)
559
+ ; LA64-NEXT: vst $vr0, $a0, 0
560
+ ; LA64-NEXT: ret
561
+ ;
562
+ ; LA64-LARGE-LABEL: copy_i32x4:
563
+ ; LA64-LARGE: # %bb.0: # %entry
564
+ ; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_src)
565
+ ; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_i32x4_src)
566
+ ; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_i32x4_src)
567
+ ; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_i32x4_src)
568
+ ; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
569
+ ; LA64-LARGE-NEXT: vld $vr0, $a0, 0
570
+ ; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_dst)
571
+ ; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_i32x4_dst)
572
+ ; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_i32x4_dst)
573
+ ; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_i32x4_dst)
574
+ ; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
575
+ ; LA64-LARGE-NEXT: vst $vr0, $a0, 0
576
+ ; LA64-LARGE-NEXT: ret
577
+ entry:
578
+ %0 = load <4 x i32 >, ptr @g_i32x4_src , align 16
579
+ store <4 x i32 > %0 , ptr @g_i32x4_dst , align 16
580
+ ret void
581
+ }
582
+
583
+ @g_i32x8_src = dso_local global [8 x i32 ] zeroinitializer , align 32
584
+ @g_i32x8_dst = dso_local global [8 x i32 ] zeroinitializer , align 32
585
+
586
+ define dso_local void @copy_i32x8 () nounwind {
587
+ ; LA32-LABEL: copy_i32x8:
588
+ ; LA32: # %bb.0: # %entry
589
+ ; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_src)
590
+ ; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(g_i32x4_src)
591
+ ; LA32-NEXT: xvld $xr0, $a0, 0
592
+ ; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_dst)
593
+ ; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(g_i32x4_dst)
594
+ ; LA32-NEXT: xvst $xr0, $a0, 0
595
+ ; LA32-NEXT: ret
596
+ ;
597
+ ; LA64-LABEL: copy_i32x8:
598
+ ; LA64: # %bb.0: # %entry
599
+ ; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_src)
600
+ ; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(g_i32x4_src)
601
+ ; LA64-NEXT: xvld $xr0, $a0, 0
602
+ ; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_dst)
603
+ ; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(g_i32x4_dst)
604
+ ; LA64-NEXT: xvst $xr0, $a0, 0
605
+ ; LA64-NEXT: ret
606
+ ;
607
+ ; LA64-LARGE-LABEL: copy_i32x8:
608
+ ; LA64-LARGE: # %bb.0: # %entry
609
+ ; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_src)
610
+ ; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_i32x4_src)
611
+ ; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_i32x4_src)
612
+ ; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_i32x4_src)
613
+ ; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
614
+ ; LA64-LARGE-NEXT: xvld $xr0, $a0, 0
615
+ ; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_dst)
616
+ ; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_i32x4_dst)
617
+ ; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_i32x4_dst)
618
+ ; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_i32x4_dst)
619
+ ; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
620
+ ; LA64-LARGE-NEXT: xvst $xr0, $a0, 0
621
+ ; LA64-LARGE-NEXT: ret
622
+ entry:
623
+ %0 = load <8 x i32 >, ptr @g_i32x4_src , align 32
624
+ store <8 x i32 > %0 , ptr @g_i32x4_dst , align 32
625
+ ret void
626
+ }
627
+
628
+ @g_i8x16 = dso_local global <16 x i8 > zeroinitializer , align 16
629
+
630
+ define void @copy_i8_to_i8x16 () {
631
+ ; LA32-LABEL: copy_i8_to_i8x16:
632
+ ; LA32: # %bb.0: # %entry
633
+ ; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_i8)
634
+ ; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(g_i8)
635
+ ; LA32-NEXT: vldrepl.b $vr0, $a0, 0
636
+ ; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_i8x16)
637
+ ; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(g_i8x16)
638
+ ; LA32-NEXT: vst $vr0, $a0, 0
639
+ ; LA32-NEXT: ret
640
+ ;
641
+ ; LA64-LABEL: copy_i8_to_i8x16:
642
+ ; LA64: # %bb.0: # %entry
643
+ ; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_i8)
644
+ ; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(g_i8)
645
+ ; LA64-NEXT: vldrepl.b $vr0, $a0, 0
646
+ ; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_i8x16)
647
+ ; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(g_i8x16)
648
+ ; LA64-NEXT: vst $vr0, $a0, 0
649
+ ; LA64-NEXT: ret
650
+ ;
651
+ ; LA64-LARGE-LABEL: copy_i8_to_i8x16:
652
+ ; LA64-LARGE: # %bb.0: # %entry
653
+ ; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_i8)
654
+ ; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_i8)
655
+ ; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_i8)
656
+ ; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_i8)
657
+ ; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
658
+ ; LA64-LARGE-NEXT: vldrepl.b $vr0, $a0, 0
659
+ ; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_i8x16)
660
+ ; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_i8x16)
661
+ ; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_i8x16)
662
+ ; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_i8x16)
663
+ ; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
664
+ ; LA64-LARGE-NEXT: vst $vr0, $a0, 0
665
+ ; LA64-LARGE-NEXT: ret
666
+ entry:
667
+ %0 = call <16 x i8 > @llvm.loongarch.lsx.vldrepl.b (ptr @g_i8 , i32 0 )
668
+ store <16 x i8 > %0 , ptr @g_i8x16 , align 16
669
+ ret void
670
+ }
671
+
672
+ @g_i8x32 = dso_local global <32 x i8 > zeroinitializer , align 32
673
+
674
+ define void @copy_i8_to_i8x32 () {
675
+ ; LA32-LABEL: copy_i8_to_i8x32:
676
+ ; LA32: # %bb.0: # %entry
677
+ ; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_i8)
678
+ ; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(g_i8)
679
+ ; LA32-NEXT: xvldrepl.b $xr0, $a0, 0
680
+ ; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_i8x32)
681
+ ; LA32-NEXT: addi.w $a0, $a0, %pc_lo12(g_i8x32)
682
+ ; LA32-NEXT: xvst $xr0, $a0, 0
683
+ ; LA32-NEXT: ret
684
+ ;
685
+ ; LA64-LABEL: copy_i8_to_i8x32:
686
+ ; LA64: # %bb.0: # %entry
687
+ ; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_i8)
688
+ ; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(g_i8)
689
+ ; LA64-NEXT: xvldrepl.b $xr0, $a0, 0
690
+ ; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_i8x32)
691
+ ; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(g_i8x32)
692
+ ; LA64-NEXT: xvst $xr0, $a0, 0
693
+ ; LA64-NEXT: ret
694
+ ;
695
+ ; LA64-LARGE-LABEL: copy_i8_to_i8x32:
696
+ ; LA64-LARGE: # %bb.0: # %entry
697
+ ; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_i8)
698
+ ; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_i8)
699
+ ; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_i8)
700
+ ; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_i8)
701
+ ; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
702
+ ; LA64-LARGE-NEXT: xvldrepl.b $xr0, $a0, 0
703
+ ; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_i8x32)
704
+ ; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_i8x32)
705
+ ; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_i8x32)
706
+ ; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_i8x32)
707
+ ; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
708
+ ; LA64-LARGE-NEXT: xvst $xr0, $a0, 0
709
+ ; LA64-LARGE-NEXT: ret
710
+ entry:
711
+ %0 = call <32 x i8 > @llvm.loongarch.lasx.xvldrepl.b (ptr @g_i8 , i32 0 )
712
+ store <32 x i8 > %0 , ptr @g_i8x32 , align 32
713
+ ret void
714
+ }
715
+
541
716
@g_rmw = dso_local global i64 0
542
717
543
718
define dso_local void @rmw () nounwind {
@@ -659,23 +834,23 @@ define dso_local void @control_flow_with_mem_access() nounwind {
659
834
; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_a32+4)
660
835
; LA32-NEXT: ld.w $a1, $a0, %pc_lo12(g_a32+4)
661
836
; LA32-NEXT: ori $a2, $zero, 1
662
- ; LA32-NEXT: blt $a1, $a2, .LBB21_2
837
+ ; LA32-NEXT: blt $a1, $a2, .LBB25_2
663
838
; LA32-NEXT: # %bb.1: # %if.then
664
839
; LA32-NEXT: ori $a1, $zero, 10
665
840
; LA32-NEXT: st.w $a1, $a0, %pc_lo12(g_a32+4)
666
- ; LA32-NEXT: .LBB21_2 : # %if.end
841
+ ; LA32-NEXT: .LBB25_2 : # %if.end
667
842
; LA32-NEXT: ret
668
843
;
669
844
; LA64-LABEL: control_flow_with_mem_access:
670
845
; LA64: # %bb.0: # %entry
671
846
; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_a32+4)
672
847
; LA64-NEXT: ld.w $a1, $a0, %pc_lo12(g_a32+4)
673
848
; LA64-NEXT: ori $a2, $zero, 1
674
- ; LA64-NEXT: blt $a1, $a2, .LBB21_2
849
+ ; LA64-NEXT: blt $a1, $a2, .LBB25_2
675
850
; LA64-NEXT: # %bb.1: # %if.then
676
851
; LA64-NEXT: ori $a1, $zero, 10
677
852
; LA64-NEXT: st.w $a1, $a0, %pc_lo12(g_a32+4)
678
- ; LA64-NEXT: .LBB21_2 : # %if.end
853
+ ; LA64-NEXT: .LBB25_2 : # %if.end
679
854
; LA64-NEXT: ret
680
855
;
681
856
; LA64-LARGE-LABEL: control_flow_with_mem_access:
@@ -686,15 +861,15 @@ define dso_local void @control_flow_with_mem_access() nounwind {
686
861
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a32+4)
687
862
; LA64-LARGE-NEXT: ldx.w $a0, $a1, $a0
688
863
; LA64-LARGE-NEXT: ori $a1, $zero, 1
689
- ; LA64-LARGE-NEXT: blt $a0, $a1, .LBB21_2
864
+ ; LA64-LARGE-NEXT: blt $a0, $a1, .LBB25_2
690
865
; LA64-LARGE-NEXT: # %bb.1: # %if.then
691
866
; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a32+4)
692
867
; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a32+4)
693
868
; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a32+4)
694
869
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a32+4)
695
870
; LA64-LARGE-NEXT: ori $a2, $zero, 10
696
871
; LA64-LARGE-NEXT: stx.w $a2, $a1, $a0
697
- ; LA64-LARGE-NEXT: .LBB21_2 : # %if.end
872
+ ; LA64-LARGE-NEXT: .LBB25_2 : # %if.end
698
873
; LA64-LARGE-NEXT: ret
699
874
entry:
700
875
%0 = load i32 , ptr getelementptr inbounds ([1 x i32 ], ptr @g_a32 , i32 1 ), align 4
0 commit comments