Skip to content

Commit 5d1fe3f

Browse files
committed
[PowerPC] Implemented Vector Multiply Builtins
This patch implements the builtins for Vector Multiply Builtins (vmulxxd family of instructions), and adds the appropriate test cases for these builtins. The builtins utilize the vector multiply instructions itnroduced with ISA 3.1. Differential Revision: https://reviews.llvm.org/D83955
1 parent 01700c4 commit 5d1fe3f

File tree

6 files changed

+175
-7
lines changed

6 files changed

+175
-7
lines changed

clang/include/clang/Basic/BuiltinsPPC.def

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,11 @@ BUILTIN(__builtin_altivec_vmulouh, "V4UiV8UsV8Us", "")
100100
BUILTIN(__builtin_altivec_vmulosh, "V4SiV8SsV8Ss", "")
101101
BUILTIN(__builtin_altivec_vmulouw, "V2ULLiV4UiV4Ui", "")
102102
BUILTIN(__builtin_altivec_vmulosw, "V2SLLiV4SiV4Si", "")
103+
BUILTIN(__builtin_altivec_vmuleud, "V1ULLLiV2ULLiV2ULLi", "")
104+
BUILTIN(__builtin_altivec_vmulesd, "V1SLLLiV2SLLiV2SLLi", "")
105+
BUILTIN(__builtin_altivec_vmuloud, "V1ULLLiV2ULLiV2ULLi", "")
106+
BUILTIN(__builtin_altivec_vmulosd, "V1SLLLiV2SLLiV2SLLi", "")
107+
BUILTIN(__builtin_altivec_vmsumcud, "V1ULLLiV2ULLiV2ULLiV1ULLLi", "")
103108

104109
BUILTIN(__builtin_altivec_vnmsubfp, "V4fV4fV4fV4f", "")
105110

clang/lib/Headers/altivec.h

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5487,6 +5487,16 @@ vec_msum(vector unsigned short __a, vector unsigned short __b,
54875487
return __builtin_altivec_vmsumuhm(__a, __b, __c);
54885488
}
54895489

5490+
/* vec_msumc */
5491+
5492+
#ifdef __POWER10_VECTOR__
5493+
static __inline__ vector unsigned __int128 __ATTRS_o_ai
5494+
vec_msumc(vector unsigned long long __a, vector unsigned long long __b,
5495+
vector unsigned __int128 __c) {
5496+
return __builtin_altivec_vmsumcud(__a, __b, __c);
5497+
}
5498+
#endif
5499+
54905500
/* vec_vmsummbm */
54915501

54925502
static __inline__ vector int __attribute__((__always_inline__))
@@ -5713,6 +5723,26 @@ vec_mule(vector unsigned int __a, vector unsigned int __b) {
57135723
}
57145724
#endif
57155725

5726+
#ifdef __POWER10_VECTOR__
5727+
static __inline__ vector signed __int128 __ATTRS_o_ai
5728+
vec_mule(vector signed long long __a, vector signed long long __b) {
5729+
#ifdef __LITTLE_ENDIAN__
5730+
return __builtin_altivec_vmulosd(__a, __b);
5731+
#else
5732+
return __builtin_altivec_vmulesd(__a, __b);
5733+
#endif
5734+
}
5735+
5736+
static __inline__ vector unsigned __int128 __ATTRS_o_ai
5737+
vec_mule(vector unsigned long long __a, vector unsigned long long __b) {
5738+
#ifdef __LITTLE_ENDIAN__
5739+
return __builtin_altivec_vmuloud(__a, __b);
5740+
#else
5741+
return __builtin_altivec_vmuleud(__a, __b);
5742+
#endif
5743+
}
5744+
#endif
5745+
57165746
/* vec_vmulesb */
57175747

57185748
static __inline__ vector short __attribute__((__always_inline__))
@@ -5839,6 +5869,26 @@ vec_mulo(vector unsigned int __a, vector unsigned int __b) {
58395869
}
58405870
#endif
58415871

5872+
#ifdef __POWER10_VECTOR__
5873+
static __inline__ vector signed __int128 __ATTRS_o_ai
5874+
vec_mulo(vector signed long long __a, vector signed long long __b) {
5875+
#ifdef __LITTLE_ENDIAN__
5876+
return __builtin_altivec_vmulesd(__a, __b);
5877+
#else
5878+
return __builtin_altivec_vmulosd(__a, __b);
5879+
#endif
5880+
}
5881+
5882+
static __inline__ vector unsigned __int128 __ATTRS_o_ai
5883+
vec_mulo(vector unsigned long long __a, vector unsigned long long __b) {
5884+
#ifdef __LITTLE_ENDIAN__
5885+
return __builtin_altivec_vmuleud(__a, __b);
5886+
#else
5887+
return __builtin_altivec_vmuloud(__a, __b);
5888+
#endif
5889+
}
5890+
#endif
5891+
58425892
/* vec_vmulosb */
58435893

58445894
static __inline__ vector short __attribute__((__always_inline__))

clang/test/CodeGen/builtins-ppc-p10vector.c

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -928,6 +928,44 @@ int test_vec_test_lsbb_all_zeros(void) {
928928
return vec_test_lsbb_all_zeros(vuca);
929929
}
930930

931+
vector unsigned __int128 test_vec_mule_u128(void) {
932+
// CHECK-BE: @llvm.ppc.altivec.vmuleud(<2 x i64>
933+
// CHECK-BE-NEXT: ret <1 x i128>
934+
// CHECK-LE: @llvm.ppc.altivec.vmuloud(<2 x i64>
935+
// CHECK-LE-NEXT: ret <1 x i128>
936+
return vec_mule(vulla, vullb);
937+
}
938+
939+
vector signed __int128 test_vec_mule_s128(void) {
940+
// CHECK-BE: @llvm.ppc.altivec.vmulesd(<2 x i64>
941+
// CHECK-BE-NEXT: ret <1 x i128>
942+
// CHECK-LE: @llvm.ppc.altivec.vmulosd(<2 x i64>
943+
// CHECK-LE-NEXT: ret <1 x i128>
944+
return vec_mule(vslla, vsllb);
945+
}
946+
947+
vector unsigned __int128 test_vec_mulo_u128(void) {
948+
// CHECK-BE: @llvm.ppc.altivec.vmuloud(<2 x i64>
949+
// CHECK-BE-NEXT: ret <1 x i128>
950+
// CHECK-LE: @llvm.ppc.altivec.vmuleud(<2 x i64>
951+
// CHECK-LE-NEXT: ret <1 x i128>
952+
return vec_mulo(vulla, vullb);
953+
}
954+
955+
vector signed __int128 test_vec_mulo_s128(void) {
956+
// CHECK-BE: @llvm.ppc.altivec.vmulosd(<2 x i64>
957+
// CHECK-BE-NEXT: ret <1 x i128>
958+
// CHECK-LE: @llvm.ppc.altivec.vmulesd(<2 x i64>
959+
// CHECK-LE-NEXT: ret <1 x i128>
960+
return vec_mulo(vslla, vsllb);
961+
}
962+
963+
vector unsigned __int128 test_vec_msumc_u128(void) {
964+
// CHECK: @llvm.ppc.altivec.vmsumcud(<2 x i64>
965+
// CHECK-NEXT: ret <1 x i128>
966+
return vec_msumc(vulla, vullb, vui128a);
967+
}
968+
931969
vector signed __int128 test_vec_xl_sext_i8(void) {
932970
// CHECK: load i8
933971
// CHECK: sext i8

llvm/include/llvm/IR/IntrinsicsPowerPC.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,13 @@ class PowerPC_Vec_QQQ_Intrinsic<string GCCIntSuffix>
193193
[llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty],
194194
[IntrNoMem]>;
195195

196+
/// PowerPC_Vec_QDD_Intrinsic - A PowerPC intrinsic that takes two v2i64
197+
/// vectors and returns one v1i128. These intrinsics have no side effects.
198+
class PowerPC_Vec_QDD_Intrinsic<string GCCIntSuffix>
199+
: PowerPC_Vec_Intrinsic<GCCIntSuffix,
200+
[llvm_v1i128_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
201+
[IntrNoMem]>;
202+
196203
//===----------------------------------------------------------------------===//
197204
// PowerPC VSX Intrinsic Class Definitions.
198205
//
@@ -673,6 +680,9 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
673680
def int_ppc_altivec_vmsumuhs : GCCBuiltin<"__builtin_altivec_vmsumuhs">,
674681
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
675682
llvm_v4i32_ty], [IntrNoMem]>;
683+
def int_ppc_altivec_vmsumcud : GCCBuiltin<"__builtin_altivec_vmsumcud">,
684+
Intrinsic<[llvm_v1i128_ty],
685+
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v1i128_ty], [IntrNoMem]>;
676686

677687
// Vector Multiply Instructions.
678688
def int_ppc_altivec_vmulesb : GCCBuiltin<"__builtin_altivec_vmulesb">,
@@ -684,6 +694,7 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
684694
def int_ppc_altivec_vmulesw : GCCBuiltin<"__builtin_altivec_vmulesw">,
685695
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
686696
[IntrNoMem]>;
697+
def int_ppc_altivec_vmulesd : PowerPC_Vec_QDD_Intrinsic<"vmulesd">;
687698
def int_ppc_altivec_vmuleub : GCCBuiltin<"__builtin_altivec_vmuleub">,
688699
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
689700
[IntrNoMem]>;
@@ -693,6 +704,7 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
693704
def int_ppc_altivec_vmuleuw : GCCBuiltin<"__builtin_altivec_vmuleuw">,
694705
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
695706
[IntrNoMem]>;
707+
def int_ppc_altivec_vmuleud : PowerPC_Vec_QDD_Intrinsic<"vmuleud">;
696708

697709
def int_ppc_altivec_vmulosb : GCCBuiltin<"__builtin_altivec_vmulosb">,
698710
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
@@ -703,6 +715,7 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
703715
def int_ppc_altivec_vmulosw : GCCBuiltin<"__builtin_altivec_vmulosw">,
704716
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
705717
[IntrNoMem]>;
718+
def int_ppc_altivec_vmulosd : PowerPC_Vec_QDD_Intrinsic<"vmulosd">;
706719
def int_ppc_altivec_vmuloub : GCCBuiltin<"__builtin_altivec_vmuloub">,
707720
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
708721
[IntrNoMem]>;
@@ -712,6 +725,7 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
712725
def int_ppc_altivec_vmulouw : GCCBuiltin<"__builtin_altivec_vmulouw">,
713726
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
714727
[IntrNoMem]>;
728+
def int_ppc_altivec_vmuloud : PowerPC_Vec_QDD_Intrinsic<"vmuloud">;
715729

716730
// Vector Sum Instructions.
717731
def int_ppc_altivec_vsumsws : GCCBuiltin<"__builtin_altivec_vsumsws">,

llvm/lib/Target/PowerPC/PPCInstrPrefix.td

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1256,16 +1256,25 @@ let Predicates = [IsISA3_1] in {
12561256
}
12571257

12581258
def VMULESD : VXForm_1<968, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
1259-
"vmulesd $vD, $vA, $vB", IIC_VecGeneral, []>;
1259+
"vmulesd $vD, $vA, $vB", IIC_VecGeneral,
1260+
[(set v1i128:$vD, (int_ppc_altivec_vmulesd v2i64:$vA,
1261+
v2i64:$vB))]>;
12601262
def VMULEUD : VXForm_1<712, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
1261-
"vmuleud $vD, $vA, $vB", IIC_VecGeneral, []>;
1263+
"vmuleud $vD, $vA, $vB", IIC_VecGeneral,
1264+
[(set v1i128:$vD, (int_ppc_altivec_vmuleud v2i64:$vA,
1265+
v2i64:$vB))]>;
12621266
def VMULOSD : VXForm_1<456, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
1263-
"vmulosd $vD, $vA, $vB", IIC_VecGeneral, []>;
1267+
"vmulosd $vD, $vA, $vB", IIC_VecGeneral,
1268+
[(set v1i128:$vD, (int_ppc_altivec_vmulosd v2i64:$vA,
1269+
v2i64:$vB))]>;
12641270
def VMULOUD : VXForm_1<200, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
1265-
"vmuloud $vD, $vA, $vB", IIC_VecGeneral, []>;
1266-
def VMSUMCUD : VAForm_1a<23, (outs vrrc:$vD),
1267-
(ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
1268-
"vmsumcud $vD, $vA, $vB, $vC", IIC_VecGeneral, []>;
1271+
"vmuloud $vD, $vA, $vB", IIC_VecGeneral,
1272+
[(set v1i128:$vD, (int_ppc_altivec_vmuloud v2i64:$vA,
1273+
v2i64:$vB))]>;
1274+
def VMSUMCUD : VAForm_1a<23, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
1275+
"vmsumcud $vD, $vA, $vB, $vC", IIC_VecGeneral,
1276+
[(set v1i128:$vD, (int_ppc_altivec_vmsumcud
1277+
v2i64:$vA, v2i64:$vB, v1i128:$vC))]>;
12691278
def VDIVSQ : VXForm_1<267, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
12701279
"vdivsq $vD, $vA, $vB", IIC_VecGeneral, []>;
12711280
def VDIVUQ : VXForm_1<11, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),

llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
; This includes the low order and high order versions of vector multiply.
1111
; The low order version operates on doublewords, whereas the high order version
1212
; operates on signed and unsigned words and doublewords.
13+
; This file also includes 128 bit vector multiply instructions.
1314

1415
define <2 x i64> @test_vmulld(<2 x i64> %a, <2 x i64> %b) {
1516
; CHECK-LABEL: test_vmulld:
@@ -122,3 +123,54 @@ entry:
122123
%mulh = tail call <2 x i64> @llvm.ppc.altivec.vmulhud(<2 x i64> %a, <2 x i64> %b)
123124
ret <2 x i64> %mulh
124125
}
126+
127+
declare <1 x i128> @llvm.ppc.altivec.vmuleud(<2 x i64>, <2 x i64>) nounwind readnone
128+
declare <1 x i128> @llvm.ppc.altivec.vmuloud(<2 x i64>, <2 x i64>) nounwind readnone
129+
declare <1 x i128> @llvm.ppc.altivec.vmulesd(<2 x i64>, <2 x i64>) nounwind readnone
130+
declare <1 x i128> @llvm.ppc.altivec.vmulosd(<2 x i64>, <2 x i64>) nounwind readnone
131+
declare <1 x i128> @llvm.ppc.altivec.vmsumcud(<2 x i64>, <2 x i64>, <1 x i128>) nounwind readnone
132+
133+
define <1 x i128> @test_vmuleud(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
134+
; CHECK-LABEL: test_vmuleud:
135+
; CHECK: # %bb.0:
136+
; CHECK-NEXT: vmuleud v2, v2, v3
137+
; CHECK-NEXT: blr
138+
%tmp = tail call <1 x i128> @llvm.ppc.altivec.vmuleud(<2 x i64> %x, <2 x i64> %y)
139+
ret <1 x i128> %tmp
140+
}
141+
142+
define <1 x i128> @test_vmuloud(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
143+
; CHECK-LABEL: test_vmuloud:
144+
; CHECK: # %bb.0:
145+
; CHECK-NEXT: vmuloud v2, v2, v3
146+
; CHECK-NEXT: blr
147+
%tmp = tail call <1 x i128> @llvm.ppc.altivec.vmuloud(<2 x i64> %x, <2 x i64> %y)
148+
ret <1 x i128> %tmp
149+
}
150+
151+
define <1 x i128> @test_vmulesd(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
152+
; CHECK-LABEL: test_vmulesd:
153+
; CHECK: # %bb.0:
154+
; CHECK-NEXT: vmulesd v2, v2, v3
155+
; CHECK-NEXT: blr
156+
%tmp = tail call <1 x i128> @llvm.ppc.altivec.vmulesd(<2 x i64> %x, <2 x i64> %y)
157+
ret <1 x i128> %tmp
158+
}
159+
160+
define <1 x i128> @test_vmulosd(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
161+
; CHECK-LABEL: test_vmulosd:
162+
; CHECK: # %bb.0:
163+
; CHECK-NEXT: vmulosd v2, v2, v3
164+
; CHECK-NEXT: blr
165+
%tmp = tail call <1 x i128> @llvm.ppc.altivec.vmulosd(<2 x i64> %x, <2 x i64> %y)
166+
ret <1 x i128> %tmp
167+
}
168+
169+
define <1 x i128> @test_vmsumcud(<2 x i64> %x, <2 x i64> %y, <1 x i128> %z) nounwind readnone {
170+
; CHECK-LABEL: test_vmsumcud:
171+
; CHECK: # %bb.0:
172+
; CHECK-NEXT: vmsumcud v2, v2, v3, v4
173+
; CHECK-NEXT: blr
174+
%tmp = tail call <1 x i128> @llvm.ppc.altivec.vmsumcud(<2 x i64> %x, <2 x i64> %y, <1 x i128> %z)
175+
ret <1 x i128> %tmp
176+
}

0 commit comments

Comments
 (0)