Skip to content

Commit e503227

Browse files
VigneshwarJtstellar
authored andcommitted
AMDGPU: Handle gfx950 XDL Write-VGPR-VALU-WAW wait state change (#126132)
There are additional wait states for XDL write VALU WAW hazard in gfx950 compared to gfx940. (cherry picked from commit 1188b1f)
1 parent 6b57839 commit e503227

File tree

2 files changed

+26
-15
lines changed

2 files changed

+26
-15
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2605,12 +2605,14 @@ static int GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(int NumPasses) {
26052605
return NumPasses + 2;
26062606
}
26072607

2608-
static int GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses) {
2609-
// 2 pass -> 5
2610-
// 4 pass -> 7
2611-
// 8 pass -> 11
2612-
// 16 pass -> 19
2613-
return NumPasses + 3;
2608+
static int GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses,
2609+
bool IsGFX950) {
2610+
// xdl def cycles | gfx940 | gfx950
2611+
// 2 pass | 5 5
2612+
// 4 pass | 7 8
2613+
// 8 pass | 11 12
2614+
// 16 pass | 19 20
2615+
return NumPasses + 3 + (NumPasses != 2 && IsGFX950);
26142616
}
26152617

26162618
static int GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses,
@@ -2858,7 +2860,8 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
28582860
} else if (ST.hasGFX940Insts()) {
28592861
NeedWaitStates =
28602862
isXDL(ST, *MFMA)
2861-
? GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(NumPasses)
2863+
? GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(
2864+
NumPasses, ST.hasGFX950Insts())
28622865
: GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(NumPasses);
28632866
} else {
28642867
switch (NumPasses) {

llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -958,7 +958,8 @@ body: |
958958
# GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_write
959959
# GCN: V_MFMA
960960
# GCN-NEXT: S_NOP 7
961-
# GCN-NEXT: S_NOP 2
961+
# GFX940-NEXT: S_NOP 2
962+
# GFX950-NEXT: S_NOP 3
962963
# GCN-NEXT: V_MOV_B32
963964
name: xdl_smfma16x16_write_vgpr_valu_write
964965
body: |
@@ -970,7 +971,8 @@ body: |
970971
# GCN: V_MFMA
971972
# GCN-NEXT: S_NOP 7
972973
# GCN-NEXT: S_NOP 7
973-
# GCN-NEXT: S_NOP 2
974+
# GFX940-NEXT: S_NOP 2
975+
# GFX950-NEXT: S_NOP 3
974976
# GCN-NEXT: V_MOV_B32
975977
name: xdl_smfma32x32_write_vgpr_valu_write
976978
body: |
@@ -991,7 +993,8 @@ body: |
991993
# GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_f16_write
992994
# GCN: V_MFMA
993995
# GCN-NEXT: S_NOP 7
994-
# GCN-NEXT: S_NOP 2
996+
# GFX940-NEXT: S_NOP 2
997+
# GFX950-NEXT: S_NOP 3
995998
# GCN-NEXT: V_FMA_F16_e64
996999
name: xdl_smfma16x16_write_vgpr_valu_f16_write
9971000
body: |
@@ -1003,7 +1006,8 @@ body: |
10031006
# GCN: V_MFMA
10041007
# GCN-NEXT: S_NOP 7
10051008
# GCN-NEXT: S_NOP 7
1006-
# GCN-NEXT: S_NOP 2
1009+
# GFX940-NEXT: S_NOP 2
1010+
# GFX950-NEXT: S_NOP 3
10071011
# GCN-NEXT: V_FMA_F16_e64
10081012
name: xdl_smfma32x32_write_vgpr_valu_f16_write
10091013
body: |
@@ -1024,7 +1028,8 @@ body: |
10241028
# GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_sdwa_write
10251029
# GCN: V_MFMA
10261030
# GCN-NEXT: S_NOP 7
1027-
# GCN-NEXT: S_NOP 2
1031+
# GFX940-NEXT: S_NOP 2
1032+
# GFX950-NEXT: S_NOP 3
10281033
# GCN-NEXT: V_MOV_B32_sdwa
10291034
name: xdl_smfma16x16_write_vgpr_valu_sdwa_write
10301035
body: |
@@ -1761,7 +1766,8 @@ body: |
17611766
...
17621767
# GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_valu_write
17631768
# GCN: V_MFMA
1764-
# GCN-NEXT: S_NOP 6
1769+
# GFX940-NEXT: S_NOP 6
1770+
# GFX950-NEXT: S_NOP 7
17651771
# GCN-NEXT: V_MOV_B32
17661772
name: xdl_sgemm16X16X16_mfma_write_vgpr_valu_write
17671773
body: |
@@ -2072,7 +2078,8 @@ body: |
20722078
...
20732079
# GCN-LABEL: name: smfmac16x16_read_vgpr_srcc_valu_write
20742080
# GCN: V_SMFMAC
2075-
# GCN-NEXT: S_NOP 6
2081+
# GFX940-NEXT: S_NOP 6
2082+
# GFX950-NEXT: S_NOP 7
20762083
# GCN-NEXT: V_MOV_B32
20772084
name: smfmac16x16_read_vgpr_srcc_valu_write
20782085
body: |
@@ -2102,7 +2109,8 @@ body: |
21022109
# GCN-LABEL: name: smfmac32x32_read_vgpr_srcc_valu_write
21032110
# GCN: V_SMFMAC
21042111
# GCN-NEXT: S_NOP 7
2105-
# GCN-NEXT: S_NOP 2
2112+
# GFX940-NEXT: S_NOP 2
2113+
# GFX950-NEXT: S_NOP 3
21062114
# GCN-NEXT: V_MOV_B32
21072115
name: smfmac32x32_read_vgpr_srcc_valu_write
21082116
body: |

0 commit comments

Comments
 (0)