Skip to content

Commit a00cbca

Browse files
RolandF77frederik-h
authored andcommitted
[PowerPC] Add intrinsics and tests for basic Dense Math enablement instructions (llvm#129913)
Add intrinsics and tests for Dense Math basic enablement instructions dmsetdmrz, dmmr, dmxor.
1 parent 9a03b5c commit a00cbca

File tree

3 files changed

+150
-3
lines changed

3 files changed

+150
-3
lines changed

llvm/include/llvm/IR/IntrinsicsPowerPC.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1644,6 +1644,16 @@ let TargetPrefix = "ppc" in {
16441644
def int_ppc_mma_xxsetaccz :
16451645
DefaultAttrsIntrinsic<[llvm_v512i1_ty], [], [IntrNoMem]>;
16461646

1647+
def int_ppc_mma_dmsetdmrz :
1648+
DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [], [IntrNoMem]>;
1649+
1650+
def int_ppc_mma_dmmr :
1651+
DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty], [IntrNoMem]>;
1652+
1653+
def int_ppc_mma_dmxor :
1654+
DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty,
1655+
llvm_v1024i1_ty], [IntrNoMem]>;
1656+
16471657
// MMA Reduced-Precision: Outer Product Intrinsic Definitions.
16481658
defm int_ppc_mma_xvi4ger8 :
16491659
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;

llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,12 +105,15 @@ let Predicates = [IsISAFuture] in {
105105
"dmxxinstfdmr256 $AT, $XBp, $P", []>;
106106

107107
def DMMR : XForm_ATB3<31, 6, 177, (outs dmr:$AT), (ins dmr:$AB),
108-
"dmmr $AT, $AB", []>;
108+
"dmmr $AT, $AB",
109+
[(set v1024i1:$AT, (int_ppc_mma_dmmr v1024i1:$AB))]>;
109110

110111
def DMXOR : XForm_ATB3<31, 7, 177, (outs dmr:$AT), (ins dmr:$ATi, dmr:$AB),
111-
"dmxor $AT, $AB", []>,
112+
"dmxor $AT, $AB",
113+
[(set v1024i1:$AT, (int_ppc_mma_dmxor v1024i1:$ATi, v1024i1:$AB))]>,
112114
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
113115

114116
def DMSETDMRZ : XForm_AT3<31, 2, 177, (outs dmr:$AT), (ins),
115-
"dmsetdmrz $AT", NoItinerary, []>;
117+
"dmsetdmrz $AT", NoItinerary,
118+
[(set v1024i1:$AT, (int_ppc_mma_dmsetdmrz))]>;
116119
}
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3+
; RUN: -mcpu=future -ppc-asm-full-reg-names \
4+
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
5+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix \
6+
; RUN: -mcpu=future -ppc-asm-full-reg-names \
7+
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
8+
9+
define void @tdmrz(ptr nocapture readonly %vp1, ptr nocapture %resp) {
10+
; CHECK-LABEL: tdmrz:
11+
; CHECK: # %bb.0: # %entry
12+
; CHECK-NEXT: dmsetdmrz dmr0
13+
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
14+
; CHECK-NEXT: stxvp vsp34, 96(r4)
15+
; CHECK-NEXT: stxvp vsp36, 64(r4)
16+
; CHECK-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
17+
; CHECK-NEXT: stxvp vsp34, 32(r4)
18+
; CHECK-NEXT: stxvp vsp36, 0(r4)
19+
; CHECK-NEXT: blr
20+
;
21+
; CHECK-BE-LABEL: tdmrz:
22+
; CHECK-BE: # %bb.0: # %entry
23+
; CHECK-BE-NEXT: dmsetdmrz dmr0
24+
; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
25+
; CHECK-BE-NEXT: stxvp vsp36, 96(r4)
26+
; CHECK-BE-NEXT: stxvp vsp34, 64(r4)
27+
; CHECK-BE-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
28+
; CHECK-BE-NEXT: stxvp vsp36, 32(r4)
29+
; CHECK-BE-NEXT: stxvp vsp34, 0(r4)
30+
; CHECK-BE-NEXT: blr
31+
entry:
32+
%z = call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
33+
store <1024 x i1> %z, ptr %resp, align 32
34+
ret void
35+
}
36+
37+
define void @tdmmr(ptr nocapture readonly %vp1, ptr nocapture %resp) {
38+
; CHECK-LABEL: tdmmr:
39+
; CHECK: # %bb.0: # %entry
40+
; CHECK-NEXT: lxvp vsp34, 0(r3)
41+
; CHECK-NEXT: lxvp vsp36, 32(r3)
42+
; CHECK-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1
43+
; CHECK-NEXT: lxvp vsp34, 64(r3)
44+
; CHECK-NEXT: lxvp vsp36, 96(r3)
45+
; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
46+
; CHECK-NEXT: dmmr dmr0, dmr0
47+
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
48+
; CHECK-NEXT: stxvp vsp34, 96(r4)
49+
; CHECK-NEXT: stxvp vsp36, 64(r4)
50+
; CHECK-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
51+
; CHECK-NEXT: stxvp vsp34, 32(r4)
52+
; CHECK-NEXT: stxvp vsp36, 0(r4)
53+
; CHECK-NEXT: blr
54+
;
55+
; CHECK-BE-LABEL: tdmmr:
56+
; CHECK-BE: # %bb.0: # %entry
57+
; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
58+
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
59+
; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1
60+
; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
61+
; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
62+
; CHECK-BE-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
63+
; CHECK-BE-NEXT: dmmr dmr0, dmr0
64+
; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
65+
; CHECK-BE-NEXT: stxvp vsp36, 96(r4)
66+
; CHECK-BE-NEXT: stxvp vsp34, 64(r4)
67+
; CHECK-BE-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
68+
; CHECK-BE-NEXT: stxvp vsp36, 32(r4)
69+
; CHECK-BE-NEXT: stxvp vsp34, 0(r4)
70+
; CHECK-BE-NEXT: blr
71+
entry:
72+
%l = load <1024 x i1>, ptr %vp1, align 32
73+
%c = call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> %l)
74+
store <1024 x i1> %c, ptr %resp, align 32
75+
ret void
76+
}
77+
78+
define void @tdmxor(ptr nocapture readonly %vp1, ptr %vp2, ptr nocapture %resp) {
79+
; CHECK-LABEL: tdmxor:
80+
; CHECK: # %bb.0: # %entry
81+
; CHECK-NEXT: lxvp vsp34, 0(r3)
82+
; CHECK-NEXT: lxvp vsp36, 32(r3)
83+
; CHECK-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1
84+
; CHECK-NEXT: lxvp vsp34, 64(r3)
85+
; CHECK-NEXT: lxvp vsp36, 96(r3)
86+
; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
87+
; CHECK-NEXT: lxvp vsp34, 0(r4)
88+
; CHECK-NEXT: lxvp vsp36, 32(r4)
89+
; CHECK-NEXT: dmxxinstfdmr512 wacc_hi1, vsp36, vsp34, 1
90+
; CHECK-NEXT: lxvp vsp34, 64(r4)
91+
; CHECK-NEXT: lxvp vsp36, 96(r4)
92+
; CHECK-NEXT: dmxxinstfdmr512 wacc1, vsp36, vsp34, 0
93+
; CHECK-NEXT: dmxor dmr0, dmr1
94+
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
95+
; CHECK-NEXT: stxvp vsp34, 96(r5)
96+
; CHECK-NEXT: stxvp vsp36, 64(r5)
97+
; CHECK-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
98+
; CHECK-NEXT: stxvp vsp34, 32(r5)
99+
; CHECK-NEXT: stxvp vsp36, 0(r5)
100+
; CHECK-NEXT: blr
101+
;
102+
; CHECK-BE-LABEL: tdmxor:
103+
; CHECK-BE: # %bb.0: # %entry
104+
; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
105+
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
106+
; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1
107+
; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
108+
; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
109+
; CHECK-BE-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
110+
; CHECK-BE-NEXT: lxvp vsp34, 96(r4)
111+
; CHECK-BE-NEXT: lxvp vsp36, 64(r4)
112+
; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi1, vsp36, vsp34, 1
113+
; CHECK-BE-NEXT: lxvp vsp34, 32(r4)
114+
; CHECK-BE-NEXT: lxvp vsp36, 0(r4)
115+
; CHECK-BE-NEXT: dmxxinstfdmr512 wacc1, vsp36, vsp34, 0
116+
; CHECK-BE-NEXT: dmxor dmr0, dmr1
117+
; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
118+
; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
119+
; CHECK-BE-NEXT: stxvp vsp34, 64(r5)
120+
; CHECK-BE-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
121+
; CHECK-BE-NEXT: stxvp vsp36, 32(r5)
122+
; CHECK-BE-NEXT: stxvp vsp34, 0(r5)
123+
; CHECK-BE-NEXT: blr
124+
entry:
125+
%l = load <1024 x i1>, ptr %vp1, align 32
126+
%r = load <1024 x i1>, ptr %vp2, align 32
127+
%x = call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> %l, <1024 x i1> %r)
128+
store <1024 x i1> %x, ptr %resp, align 32
129+
ret void
130+
}
131+
132+
declare <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
133+
declare <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1>)
134+
declare <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1>, <1024 x i1>)

0 commit comments

Comments
 (0)