Skip to content

Commit 3ae9cea

Browse files
boomanaiden154hamphet
authored andcommitted
[X86] Complete AMD znver4 AVX512 zeroing idioms (llvm#108740)
This patch completes scheduling information for the AVX512 zeroing idioms according to the znver4 software optimization guide.
1 parent 5cd4db2 commit 3ae9cea

File tree

4 files changed

+445
-392
lines changed

4 files changed

+445
-392
lines changed

llvm/lib/Target/X86/X86ScheduleZnver4.td

Lines changed: 68 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1839,35 +1839,59 @@ def Zn4WriteFZeroIdiom : SchedWriteVariant<[
18391839
]>;
18401840
// NOTE: XORPSrr, XORPDrr are not zero-cycle!
18411841
def : InstRW<[Zn4WriteFZeroIdiom], (instrs VXORPSrr, VXORPDrr,
1842-
VANDNPSrr, VANDNPDrr)>;
1842+
VXORPSZ128rr,
1843+
VXORPDZ128rr,
1844+
VANDNPSrr, VANDNPDrr,
1845+
VANDNPSZ128rr,
1846+
VANDNPDZ128rr)>;
18431847

18441848
def Zn4WriteFZeroIdiomY : SchedWriteVariant<[
18451849
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
18461850
SchedVar<NoSchedPred, [WriteFLogicY]>
18471851
]>;
18481852
def : InstRW<[Zn4WriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr,
1849-
VANDNPSYrr, VANDNPDYrr)>;
1853+
VXORPSZ256rr,
1854+
VXORPDZ256rr,
1855+
VANDNPSYrr, VANDNPDYrr,
1856+
VANDNPSZ256rr,
1857+
VANDNPDZ256rr)>;
1858+
1859+
def Zn4WriteFZeroIdiomZ : SchedWriteVariant<[
1860+
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
1861+
SchedVar<NoSchedPred, [WriteFLogicZ]>
1862+
]>;
1863+
def : InstRW<[Zn4WriteFZeroIdiomZ], (instrs VXORPSZrr, VXORPDZrr,
1864+
VANDNPSZrr, VANDNPDZrr)>;
18501865

18511866
def Zn4WriteVZeroIdiomLogicX : SchedWriteVariant<[
18521867
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
18531868
SchedVar<NoSchedPred, [WriteVecLogicX]>
18541869
]>;
18551870
// NOTE: PXORrr,PANDNrr are not zero-cycle!
1856-
def : InstRW<[Zn4WriteVZeroIdiomLogicX], (instrs VPXORrr, VPANDNrr)>;
1871+
def : InstRW<[Zn4WriteVZeroIdiomLogicX], (instrs VPXORrr,
1872+
VPXORDZ128rr,
1873+
VPXORQZ128rr,
1874+
VPANDNrr,
1875+
VPANDNDZ128rr,
1876+
VPANDNQZ128rr)>;
18571877

1858-
// TODO: This should be extended to incorporate all of the AVX512 zeroing
1859-
// idioms that can be executed by the renamer.
1860-
def Zn4WriteVZeroIdiomLogicZ : SchedWriteVariant<[
1878+
def Zn4WriteVZeroIdiomLogicY : SchedWriteVariant<[
18611879
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
1862-
SchedVar<NoSchedPred, [WriteVecLogicZ]>
1880+
SchedVar<NoSchedPred, [WriteVecLogicY]>
18631881
]>;
1864-
def : InstRW<[Zn4WriteVZeroIdiomLogicZ], (instrs VPXORDZrr)>;
1882+
def : InstRW<[Zn4WriteVZeroIdiomLogicY], (instrs VPXORYrr,
1883+
VPXORDZ256rr,
1884+
VPXORQZ256rr,
1885+
VPANDNYrr,
1886+
VPANDNDZ256rr,
1887+
VPANDNQZ256rr)>;
18651888

1866-
def Zn4WriteVZeroIdiomLogicY : SchedWriteVariant<[
1889+
def Zn4WriteVZeroIdiomLogicZ : SchedWriteVariant<[
18671890
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
1868-
SchedVar<NoSchedPred, [WriteVecLogicY]>
1891+
SchedVar<NoSchedPred, [WriteVecLogicZ]>
18691892
]>;
1870-
def : InstRW<[Zn4WriteVZeroIdiomLogicY], (instrs VPXORYrr, VPANDNYrr)>;
1893+
def : InstRW<[Zn4WriteVZeroIdiomLogicZ], (instrs VPXORDZrr, VPXORQZrr,
1894+
VPANDNDZrr, VPANDNQZrr)>;
18711895

18721896
def Zn4WriteVZeroIdiomALUX : SchedWriteVariant<[
18731897
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
@@ -1877,15 +1901,29 @@ def Zn4WriteVZeroIdiomALUX : SchedWriteVariant<[
18771901
// PCMPGTBrr, PCMPGTWrr, PCMPGTDrr, PCMPGTQrr are not zero-cycle!
18781902
def : InstRW<[Zn4WriteVZeroIdiomALUX],
18791903
(instrs VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
1880-
VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr)>;
1904+
VPSUBBZ128rr, VPSUBWZ128rr, VPSUBDZ128rr, VPSUBQZ128rr,
1905+
VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr,
1906+
VPCMPGTBZ128rr, VPCMPGTWZ128rr,
1907+
VPCMPGTDZ128rr, VPCMPGTQZ128rr)>;
18811908

18821909
def Zn4WriteVZeroIdiomALUY : SchedWriteVariant<[
18831910
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
18841911
SchedVar<NoSchedPred, [WriteVecALUY]>
18851912
]>;
18861913
def : InstRW<[Zn4WriteVZeroIdiomALUY],
18871914
(instrs VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr,
1888-
VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr)>;
1915+
VPSUBBZ256rr, VPSUBWZ256rr, VPSUBDZ256rr, VPSUBQZ256rr,
1916+
VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr,
1917+
VPCMPGTBZ256rr, VPCMPGTWZ256rr,
1918+
VPCMPGTDZ256rr, VPCMPGTQZ256rr)>;
1919+
1920+
def Zn4WriteVZeroIdiomALUZ : SchedWriteVariant<[
1921+
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
1922+
SchedVar<NoSchedPred, [WriteVecALUZ]>
1923+
]>;
1924+
def : InstRW<[Zn4WriteVZeroIdiomALUY],
1925+
(instrs VPSUBBZrr, VPSUBWZrr, VPSUBDZrr, VPSUBQZrr,
1926+
VPCMPGTBZrr, VPCMPGTWZrr, VPCMPGTDZrr, VPCMPGTQZrr)>;
18891927

18901928
def : IsZeroIdiomFunction<[
18911929
// GPR Zero-idioms.
@@ -1940,9 +1978,24 @@ def : IsZeroIdiomFunction<[
19401978
], ZeroIdiomPredicate>,
19411979

19421980
// AVX ZMM Zero-idioms.
1943-
// TODO: This should be expanded to incorporate all AVX512 zeroing idioms.
19441981
DepBreakingClass<[
1945-
VPXORDZrr
1982+
// fp variants.
1983+
VXORPSZrr, VXORPDZrr,
1984+
VXORPSZ128rr, VXORPDZ128rr, VXORPSZ256rr, VXORPDZ256rr,
1985+
VANDNPSZrr, VANDNPDZrr,
1986+
VANDNPSZ128rr, VANDNPDZ128rr, VANDNPSZ256rr, VANDNPDZ256rr,
1987+
1988+
// int variants.
1989+
VPCMPGTBZrr, VPCMPGTWZrr, VPCMPGTDZrr, VPCMPGTQZrr,
1990+
VPCMPGTBZ128rr, VPCMPGTWZ128rr, VPCMPGTDZ128rr, VPCMPGTQZ128rr,
1991+
VPCMPGTBZ256rr, VPCMPGTWZ256rr, VPCMPGTDZ256rr, VPCMPGTQZ256rr,
1992+
VPANDNDZrr, VPANDNQZrr,
1993+
VPANDNDZ128rr, VPANDNQZ128rr, VPANDNDZ256rr, VPANDNQZ256rr,
1994+
VPXORDZrr, VPXORQZrr,
1995+
VPXORDZ128rr, VPXORQZ128rr, VPXORDZ256rr, VPXORQZ256rr,
1996+
VPSUBBZrr, VPSUBWZrr, VPSUBDZrr, VPSUBQZrr,
1997+
VPSUBBZ128rr, VPSUBWZ128rr, VPSUBDZ128rr, VPSUBQZ128rr,
1998+
VPSUBBZ256rr, VPSUBWZ256rr, VPSUBDZ256rr, VPSUBQZ256rr,
19461999
], ZeroIdiomPredicate>,
19472000
]>;
19482001

0 commit comments

Comments
 (0)