Skip to content

Commit d90707c

Browse files
arsenmpravinjagtap
authored andcommitted
AMDGPU: Refine gfx950 xdl-write-vgpr hazard cases (llvm#117285)
The 2-pass XDL write VGPR, read by non-XDL SGEMM/DGEMM case was 1 wait state overly conservative. Previously, for gfx940, the XDL/non-XDL cases happened to have the same number of cycles in all cases. Now the XDL consumer case has an additional state for 2 pass sources.
1 parent 28d45b1 commit d90707c

File tree

2 files changed

+23
-14
lines changed

2 files changed

+23
-14
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2228,8 +2228,8 @@ int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) {
22282228
}
22292229

22302230
static int
2231-
GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses,
2232-
bool IsGFX950) {
2231+
GFX940_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates(int NumPasses,
2232+
bool IsGFX950) {
22332233
// xdl def cycles | gfx940 | gfx950
22342234
// 2 pass | 3 4
22352235
// 4 pass | 5 6
@@ -2238,6 +2238,17 @@ GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses,
22382238
return NumPasses + 1 + IsGFX950;
22392239
}
22402240

2241+
static int
2242+
GFX940_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates(int NumPasses,
2243+
bool IsGFX950) {
2244+
// xdl def cycles | gfx940 | gfx950
2245+
// 2 pass | 3 3
2246+
// 4 pass | 5 6
2247+
// 8 pass | 9 10
2248+
// 16 pass | 17 18
2249+
return NumPasses + 1 + (NumPasses != 2 && IsGFX950);
2250+
}
2251+
22412252
static int
22422253
GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses) {
22432254
// 2 pass -> 2
@@ -2375,8 +2386,11 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
23752386

23762387
NeedWaitStates =
23772388
isXDL(ST, *MI1)
2378-
? GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(
2379-
NumPasses, ST.hasGFX950Insts())
2389+
? (isXDL(ST, *MI)
2390+
? GFX940_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates(
2391+
NumPasses, ST.hasGFX950Insts())
2392+
: GFX940_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates(
2393+
NumPasses, ST.hasGFX950Insts()))
23802394
: GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(
23812395
NumPasses);
23822396
break;

llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,7 @@ body: |
156156
...
157157
# GCN-LABEL: name: sgemm4x4_mfma_write_vgpr_mfma_read_overlap
158158
# GCN: V_MFMA
159-
# GFX940-NEXT: S_NOP 2
160-
# GFX950-NEXT: S_NOP 3
159+
# GCN-NEXT: S_NOP 2
161160
# GCN-NEXT: V_MFMA
162161
name: sgemm4x4_mfma_write_vgpr_mfma_read_overlap
163162
body: |
@@ -348,8 +347,7 @@ body: |
348347
...
349348
# GCN-LABEL: name: sgemm4x4_mfma_write_vgpr_dgemm_mfma_read_overlap
350349
# GCN: V_MFMA
351-
# GFX940-NEXT: S_NOP 2
352-
# GFX950-NEXT: S_NOP 3
350+
# GCN-NEXT: S_NOP 2
353351
# GCN-NEXT: V_MFMA
354352
name: sgemm4x4_mfma_write_vgpr_dgemm_mfma_read_overlap
355353
body: |
@@ -1403,8 +1401,7 @@ body: |
14031401
...
14041402
# GCN-LABEL: name: sgemm4x4_mfma_write_agpr_dgemm_mfma_read_overlap
14051403
# GCN: V_MFMA
1406-
# GFX940-NEXT: S_NOP 2
1407-
# GFX950-NEXT: S_NOP 3
1404+
# GCN-NEXT: S_NOP 2
14081405
# GCN-NEXT: V_MFMA
14091406
name: sgemm4x4_mfma_write_agpr_dgemm_mfma_read_overlap
14101407
body: |
@@ -1885,8 +1882,7 @@ body: |
18851882
...
18861883
# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_agpr_mfma_read_overlap
18871884
# GCN: V_MFMA
1888-
# GFX940-NEXT: S_NOP 2
1889-
# GFX950-NEXT: S_NOP 3
1885+
# GCN-NEXT: S_NOP 2
18901886
# GCN-NEXT: V_MFMA
18911887
name: xdl_sgemm4x4_mfma_write_agpr_mfma_read_overlap
18921888
body: |
@@ -2220,8 +2216,7 @@ body: |
22202216
# 2 pass source
22212217
# GCN-LABEL: name: xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srcc
22222218
# GCN: V_MFMA
2223-
# GFX940-NEXT: S_NOP 2
2224-
# GFX950-NEXT: S_NOP 3
2219+
# GCN-NEXT: S_NOP 2
22252220
# GCN-NEXT: V_MFMA
22262221
name: xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srcc
22272222
body: |

0 commit comments

Comments
 (0)