@@ -692,6 +692,104 @@ define <vscale x 2 x double> @splice_nxv2f64_neg3(<vscale x 2 x double> %a, <vsc
692
692
ret <vscale x 2 x double > %res
693
693
}
694
694
695
+ define <vscale x 2 x bfloat> @splice_nxv2bf16_neg_idx (<vscale x 2 x bfloat> %a , <vscale x 2 x bfloat> %b ) #0 {
696
+ ; CHECK-LABEL: splice_nxv2bf16_neg_idx:
697
+ ; CHECK: // %bb.0:
698
+ ; CHECK-NEXT: ptrue p0.d, vl1
699
+ ; CHECK-NEXT: rev p0.d, p0.d
700
+ ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
701
+ ; CHECK-NEXT: ret
702
+ %res = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16 (<vscale x 2 x bfloat> %a , <vscale x 2 x bfloat> %b , i32 -1 )
703
+ ret <vscale x 2 x bfloat> %res
704
+ }
705
+
706
+ define <vscale x 2 x bfloat> @splice_nxv2bf16_neg2_idx (<vscale x 2 x bfloat> %a , <vscale x 2 x bfloat> %b ) #0 {
707
+ ; CHECK-LABEL: splice_nxv2bf16_neg2_idx:
708
+ ; CHECK: // %bb.0:
709
+ ; CHECK-NEXT: ptrue p0.d, vl2
710
+ ; CHECK-NEXT: rev p0.d, p0.d
711
+ ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
712
+ ; CHECK-NEXT: ret
713
+ %res = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16 (<vscale x 2 x bfloat> %a , <vscale x 2 x bfloat> %b , i32 -2 )
714
+ ret <vscale x 2 x bfloat> %res
715
+ }
716
+
717
+ define <vscale x 2 x bfloat> @splice_nxv2bf16_first_idx (<vscale x 2 x bfloat> %a , <vscale x 2 x bfloat> %b ) #0 {
718
+ ; CHECK-LABEL: splice_nxv2bf16_first_idx:
719
+ ; CHECK: // %bb.0:
720
+ ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8
721
+ ; CHECK-NEXT: ret
722
+ %res = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16 (<vscale x 2 x bfloat> %a , <vscale x 2 x bfloat> %b , i32 1 )
723
+ ret <vscale x 2 x bfloat> %res
724
+ }
725
+
726
+ define <vscale x 2 x bfloat> @splice_nxv2bf16_last_idx (<vscale x 2 x bfloat> %a , <vscale x 2 x bfloat> %b ) vscale_range(16 ,16 ) #0 {
727
+ ; CHECK-LABEL: splice_nxv2bf16_last_idx:
728
+ ; CHECK: // %bb.0:
729
+ ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #248
730
+ ; CHECK-NEXT: ret
731
+ %res = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16 (<vscale x 2 x bfloat> %a , <vscale x 2 x bfloat> %b , i32 31 )
732
+ ret <vscale x 2 x bfloat> %res
733
+ }
734
+
735
+ define <vscale x 4 x bfloat> @splice_nxv4bf16_neg_idx (<vscale x 4 x bfloat> %a , <vscale x 4 x bfloat> %b ) #0 {
736
+ ; CHECK-LABEL: splice_nxv4bf16_neg_idx:
737
+ ; CHECK: // %bb.0:
738
+ ; CHECK-NEXT: ptrue p0.s, vl1
739
+ ; CHECK-NEXT: rev p0.s, p0.s
740
+ ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
741
+ ; CHECK-NEXT: ret
742
+ %res = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16 (<vscale x 4 x bfloat> %a , <vscale x 4 x bfloat> %b , i32 -1 )
743
+ ret <vscale x 4 x bfloat> %res
744
+ }
745
+
746
+ define <vscale x 4 x bfloat> @splice_nxv4bf16_neg3_idx (<vscale x 4 x bfloat> %a , <vscale x 4 x bfloat> %b ) #0 {
747
+ ; CHECK-LABEL: splice_nxv4bf16_neg3_idx:
748
+ ; CHECK: // %bb.0:
749
+ ; CHECK-NEXT: ptrue p0.s, vl3
750
+ ; CHECK-NEXT: rev p0.s, p0.s
751
+ ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
752
+ ; CHECK-NEXT: ret
753
+ %res = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16 (<vscale x 4 x bfloat> %a , <vscale x 4 x bfloat> %b , i32 -3 )
754
+ ret <vscale x 4 x bfloat> %res
755
+ }
756
+
757
+ define <vscale x 4 x bfloat> @splice_nxv4bf16_first_idx (<vscale x 4 x bfloat> %a , <vscale x 4 x bfloat> %b ) #0 {
758
+ ; CHECK-LABEL: splice_nxv4bf16_first_idx:
759
+ ; CHECK: // %bb.0:
760
+ ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #4
761
+ ; CHECK-NEXT: ret
762
+ %res = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16 (<vscale x 4 x bfloat> %a , <vscale x 4 x bfloat> %b , i32 1 )
763
+ ret <vscale x 4 x bfloat> %res
764
+ }
765
+
766
+ define <vscale x 4 x bfloat> @splice_nxv4bf16_last_idx (<vscale x 4 x bfloat> %a , <vscale x 4 x bfloat> %b ) vscale_range(16 ,16 ) #0 {
767
+ ; CHECK-LABEL: splice_nxv4bf16_last_idx:
768
+ ; CHECK: // %bb.0:
769
+ ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #252
770
+ ; CHECK-NEXT: ret
771
+ %res = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16 (<vscale x 4 x bfloat> %a , <vscale x 4 x bfloat> %b , i32 63 )
772
+ ret <vscale x 4 x bfloat> %res
773
+ }
774
+
775
+ define <vscale x 8 x bfloat> @splice_nxv8bf16_first_idx (<vscale x 8 x bfloat> %a , <vscale x 8 x bfloat> %b ) #0 {
776
+ ; CHECK-LABEL: splice_nxv8bf16_first_idx:
777
+ ; CHECK: // %bb.0:
778
+ ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #2
779
+ ; CHECK-NEXT: ret
780
+ %res = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16 (<vscale x 8 x bfloat> %a , <vscale x 8 x bfloat> %b , i32 1 )
781
+ ret <vscale x 8 x bfloat> %res
782
+ }
783
+
784
+ define <vscale x 8 x bfloat> @splice_nxv8bf16_last_idx (<vscale x 8 x bfloat> %a , <vscale x 8 x bfloat> %b ) vscale_range(16 ,16 ) #0 {
785
+ ; CHECK-LABEL: splice_nxv8bf16_last_idx:
786
+ ; CHECK: // %bb.0:
787
+ ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #254
788
+ ; CHECK-NEXT: ret
789
+ %res = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16 (<vscale x 8 x bfloat> %a , <vscale x 8 x bfloat> %b , i32 127 )
790
+ ret <vscale x 8 x bfloat> %res
791
+ }
792
+
695
793
; Ensure predicate based splice is promoted to use ZPRs.
696
794
define <vscale x 2 x i1 > @splice_nxv2i1 (<vscale x 2 x i1 > %a , <vscale x 2 x i1 > %b ) #0 {
697
795
; CHECK-LABEL: splice_nxv2i1:
@@ -834,12 +932,14 @@ declare <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1>, <vscale
834
932
declare <vscale x 4 x i1 > @llvm.vector.splice.nxv4i1 (<vscale x 4 x i1 >, <vscale x 4 x i1 >, i32 )
835
933
declare <vscale x 8 x i1 > @llvm.vector.splice.nxv8i1 (<vscale x 8 x i1 >, <vscale x 8 x i1 >, i32 )
836
934
declare <vscale x 16 x i1 > @llvm.vector.splice.nxv16i1 (<vscale x 16 x i1 >, <vscale x 16 x i1 >, i32 )
935
+
837
936
declare <vscale x 2 x i8 > @llvm.vector.splice.nxv2i8 (<vscale x 2 x i8 >, <vscale x 2 x i8 >, i32 )
838
937
declare <vscale x 16 x i8 > @llvm.vector.splice.nxv16i8 (<vscale x 16 x i8 >, <vscale x 16 x i8 >, i32 )
839
938
declare <vscale x 8 x i16 > @llvm.vector.splice.nxv8i16 (<vscale x 8 x i16 >, <vscale x 8 x i16 >, i32 )
840
939
declare <vscale x 4 x i32 > @llvm.vector.splice.nxv4i32 (<vscale x 4 x i32 >, <vscale x 4 x i32 >, i32 )
841
940
declare <vscale x 8 x i32 > @llvm.vector.splice.nxv8i32 (<vscale x 8 x i32 >, <vscale x 8 x i32 >, i32 )
842
941
declare <vscale x 2 x i64 > @llvm.vector.splice.nxv2i64 (<vscale x 2 x i64 >, <vscale x 2 x i64 >, i32 )
942
+
843
943
declare <vscale x 2 x half > @llvm.vector.splice.nxv2f16 (<vscale x 2 x half >, <vscale x 2 x half >, i32 )
844
944
declare <vscale x 4 x half > @llvm.vector.splice.nxv4f16 (<vscale x 4 x half >, <vscale x 4 x half >, i32 )
845
945
declare <vscale x 8 x half > @llvm.vector.splice.nxv8f16 (<vscale x 8 x half >, <vscale x 8 x half >, i32 )
@@ -848,4 +948,8 @@ declare <vscale x 4 x float> @llvm.vector.splice.nxv4f32(<vscale x 4 x float>, <
848
948
declare <vscale x 16 x float > @llvm.vector.splice.nxv16f32 (<vscale x 16 x float >, <vscale x 16 x float >, i32 )
849
949
declare <vscale x 2 x double > @llvm.vector.splice.nxv2f64 (<vscale x 2 x double >, <vscale x 2 x double >, i32 )
850
950
951
+ declare <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16 (<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, i32 )
952
+ declare <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16 (<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, i32 )
953
+ declare <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16 (<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32 )
954
+
851
955
attributes #0 = { nounwind "target-features" ="+sve" }
0 commit comments