Skip to content

Commit 46b12a2

Browse files
committed
[AMDGPU] Fix SDWA commuting
SDWA insts miss reverse opcode, which causes them to be treated as commutable with default reverse opcode i.e. their own opcode. As a result, SWDA F16 sub A, B and Sub B, A are merged by machine CSE. The correct behavior is to merged sub A, B and subrev B, A instead of sub B, A. This issues caused failures in rocFFT tests. Another issue is that src0_sel and src1_sel are not swapped when SDWA insts are commuted. Verified that this fixes rocFFT tests failure.
1 parent 803ab28 commit 46b12a2

File tree

3 files changed

+67
-4
lines changed

3 files changed

+67
-4
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2812,6 +2812,10 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
28122812
swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_modifiers,
28132813
Src1, AMDGPU::OpName::src1_modifiers);
28142814

2815+
if (isSDWA(MI))
2816+
swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_sel, Src1,
2817+
AMDGPU::OpName::src1_sel);
2818+
28152819
CommutedMI->setDesc(get(CommutedOpcode));
28162820
}
28172821

llvm/lib/Target/AMDGPU/VOP2Instructions.td

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -174,10 +174,12 @@ multiclass VOP2Inst_e64<string opName,
174174

175175
multiclass VOP2Inst_sdwa<string opName,
176176
VOPProfile P,
177+
string revOp = opName,
177178
bit GFX9Renamed = 0> {
178179
let renamedInGFX9 = GFX9Renamed in {
179180
if P.HasExtSDWA then
180-
def _sdwa : VOP2_SDWA_Pseudo <opName, P>;
181+
def _sdwa : VOP2_SDWA_Pseudo <opName, P>,
182+
Commutable_REV<revOp#"_sdwa", !eq(revOp, opName)>;
181183
} // End renamedInGFX9 = GFX9Renamed
182184
}
183185

@@ -188,7 +190,7 @@ multiclass VOP2Inst<string opName,
188190
bit GFX9Renamed = 0> :
189191
VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>,
190192
VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>,
191-
VOP2Inst_sdwa<opName, P, GFX9Renamed> {
193+
VOP2Inst_sdwa<opName, P, revOp, GFX9Renamed> {
192194
let renamedInGFX9 = GFX9Renamed in {
193195
if P.HasExtDPP then
194196
def _dpp : VOP2_DPP_Pseudo <opName, P>;
@@ -237,7 +239,7 @@ multiclass VOP2Inst_VOPD<string opName,
237239
bit GFX9Renamed = 0> :
238240
VOP2Inst_e32_VOPD<opName, P, VOPDOp, VOPDName, node, revOp, GFX9Renamed>,
239241
VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>,
240-
VOP2Inst_sdwa<opName, P, GFX9Renamed> {
242+
VOP2Inst_sdwa<opName, P, revOp, GFX9Renamed> {
241243
let renamedInGFX9 = GFX9Renamed in {
242244
if P.HasExtDPP then
243245
def _dpp : VOP2_DPP_Pseudo <opName, P>;
@@ -259,7 +261,8 @@ multiclass VOP2bInst <string opName,
259261
}
260262

261263
if P.HasExtSDWA then
262-
def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
264+
def _sdwa : VOP2_SDWA_Pseudo <opName, P>,
265+
Commutable_REV<revOp#"_sdwa", !eq(revOp, opName)> {
263266
let AsmMatchConverter = "cvtSdwaVOP2b";
264267
}
265268
if P.HasExtDPP then

llvm/test/CodeGen/AMDGPU/sdwa-cse.mir

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -run-pass=machine-cse -verify-machineinstrs %s -o - 2>&1 | FileCheck --check-prefix=GCN %s
2+
3+
# GCN-LABEL: name: test_machine_cse_subtraction_sdwa_f16_no_merge
4+
# GCN: %2:vgpr_32 = contract nofpexcept V_SUB_F16_sdwa 0, %0.sub0, 0, %1.sub0, 0, 0, 6, 0, 5, 5, implicit $mode, implicit $exec
5+
# GCN: %3:vgpr_32 = contract nofpexcept V_SUB_F16_sdwa 0, %1.sub0, 0, %0.sub0, 0, 0, 6, 0, 5, 5, implicit $mode, implicit $exec
6+
# GCN: %5:vgpr_32 = contract nofpexcept V_ADD_F16_e32 %2, %4, implicit $mode, implicit $exec
7+
# GCN: %6:vgpr_32 = contract nofpexcept V_ADD_F16_e32 %3, %4, implicit $mode, implicit $exec
8+
# GCN: DS_WRITE2_B32_gfx9 undef %7:vgpr_32, %5, %6, 0, 1, 0, implicit $exec
9+
---
10+
name: test_machine_cse_subtraction_sdwa_f16_no_merge
11+
body: |
12+
bb.0:
13+
%0:vreg_64 = IMPLICIT_DEF
14+
%1:vreg_64 = IMPLICIT_DEF
15+
%2:vgpr_32 = contract nofpexcept V_SUB_F16_sdwa 0, %0.sub0, 0, %1.sub0, 0, 0, 6, 0, 5, 5, implicit $mode, implicit $exec
16+
%3:vgpr_32 = contract nofpexcept V_SUB_F16_sdwa 0, %1.sub0, 0, %0.sub0, 0, 0, 6, 0, 5, 5, implicit $mode, implicit $exec
17+
%4:vgpr_32 = IMPLICIT_DEF
18+
%5:vgpr_32 = contract nofpexcept V_ADD_F16_e32 %2, %4, implicit $mode, implicit $exec
19+
%6:vgpr_32 = contract nofpexcept V_ADD_F16_e32 %3, %4, implicit $mode, implicit $exec
20+
DS_WRITE2_B32_gfx9 undef %7:vgpr_32, %5, %6, 0, 1, 0, implicit $exec
21+
...
22+
23+
# GCN-LABEL: name: test_machine_cse_subtraction_sdwa_f16_merge_same_src_sel
24+
# GCN: %2:vgpr_32 = contract nofpexcept V_SUB_F16_sdwa 0, %0.sub0, 0, %1.sub0, 0, 0, 6, 0, 5, 5, implicit $mode, implicit $exec
25+
# GCN: %5:vgpr_32 = contract nofpexcept V_ADD_F16_e32 %2, %4, implicit $mode, implicit $exec
26+
# GCN: DS_WRITE2_B32_gfx9 undef %7:vgpr_32, %5, %5, 0, 1, 0, implicit $exec
27+
---
28+
name: test_machine_cse_subtraction_sdwa_f16_merge_same_src_sel
29+
body: |
30+
bb.0:
31+
%0:vreg_64 = IMPLICIT_DEF
32+
%1:vreg_64 = IMPLICIT_DEF
33+
%2:vgpr_32 = contract nofpexcept V_SUB_F16_sdwa 0, %0.sub0, 0, %1.sub0, 0, 0, 6, 0, 5, 5, implicit $mode, implicit $exec
34+
%3:vgpr_32 = contract nofpexcept V_SUBREV_F16_sdwa 0, %1.sub0, 0, %0.sub0, 0, 0, 6, 0, 5, 5, implicit $mode, implicit $exec
35+
%4:vgpr_32 = IMPLICIT_DEF
36+
%5:vgpr_32 = contract nofpexcept V_ADD_F16_e32 %2, %4, implicit $mode, implicit $exec
37+
%6:vgpr_32 = contract nofpexcept V_ADD_F16_e32 %3, %4, implicit $mode, implicit $exec
38+
DS_WRITE2_B32_gfx9 undef %7:vgpr_32, %5, %6, 0, 1, 0, implicit $exec
39+
...
40+
41+
# GCN-LABEL: name: test_machine_cse_subtraction_sdwa_f16_merge_diff_src_sel
42+
# GCN: %2:vgpr_32 = contract nofpexcept V_SUB_F16_sdwa 0, %0.sub0, 0, %1.sub0, 0, 0, 6, 0, 6, 5, implicit $mode, implicit $exec
43+
# GCN: %5:vgpr_32 = contract nofpexcept V_ADD_F16_e32 %2, %4, implicit $mode, implicit $exec
44+
# GCN: DS_WRITE2_B32_gfx9 undef %7:vgpr_32, %5, %5, 0, 1, 0, implicit $exec
45+
---
46+
name: test_machine_cse_subtraction_sdwa_f16_merge_diff_src_sel
47+
body: |
48+
bb.0:
49+
%0:vreg_64 = IMPLICIT_DEF
50+
%1:vreg_64 = IMPLICIT_DEF
51+
%2:vgpr_32 = contract nofpexcept V_SUB_F16_sdwa 0, %0.sub0, 0, %1.sub0, 0, 0, 6, 0, 6, 5, implicit $mode, implicit $exec
52+
%3:vgpr_32 = contract nofpexcept V_SUBREV_F16_sdwa 0, %1.sub0, 0, %0.sub0, 0, 0, 6, 0, 5, 6, implicit $mode, implicit $exec
53+
%4:vgpr_32 = IMPLICIT_DEF
54+
%5:vgpr_32 = contract nofpexcept V_ADD_F16_e32 %2, %4, implicit $mode, implicit $exec
55+
%6:vgpr_32 = contract nofpexcept V_ADD_F16_e32 %3, %4, implicit $mode, implicit $exec
56+
DS_WRITE2_B32_gfx9 undef %7:vgpr_32, %5, %6, 0, 1, 0, implicit $exec

0 commit comments

Comments
 (0)