Skip to content

Commit ab4b6cd

Browse files
committed
used isXNACKEnabled subtarget option in the PatFrag to optimize the selection.
1 parent 18445b3 commit ab4b6cd

File tree

227 files changed

+34517
-33674
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

227 files changed

+34517
-33674
lines changed

llvm/lib/Target/AMDGPU/SMInstructions.td

Lines changed: 70 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -867,103 +867,60 @@ def SMRDBufferImm32 : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm32">;
867867
def SMRDBufferSgprImm : ComplexPattern<iPTR, 2, "SelectSMRDBufferSgprImm">;
868868

869869
class SMRDAlignedLoadPat<PatFrag Op> : PatFrag <(ops node:$ptr), (Op node:$ptr), [{
870+
// Ignore the alignment check if XNACK support is disabled.
871+
if (!Subtarget->isXNACKEnabled())
872+
return true;
873+
870874
// Returns true if it is a naturally aligned multi-dword load.
871875
LoadSDNode *Ld = cast<LoadSDNode>(N);
872876
unsigned Size = Ld->getMemoryVT().getStoreSize();
873-
return (Size <= 4) || (Ld->getAlign().value() >= PowerOf2Ceil(Size));
877+
return Size <= 4 || Ld->getAlign().value() >= Size;
874878
}]> {
875879
let GISelPredicateCode = [{
876-
auto &Ld = cast<GLoad>(MI);
877-
TypeSize Size = Ld.getMMO().getSize().getValue();
878-
return (Size <= 4) || (Ld.getMMO().getAlign().value() >= PowerOf2Ceil(Size));
880+
if (!Subtarget->isXNACKEnabled())
881+
return true;
882+
883+
auto &Ld = cast<GLoad>(MI);
884+
TypeSize Size = Ld.getMMO().getSize().getValue();
885+
return Size <= 4 || Ld.getMMO().getAlign().value() >= Size;
879886
}];
880887
}
881888

882889
class SMRDUnalignedLoadPat<PatFrag Op> : PatFrag <(ops node:$ptr), (Op node:$ptr), [{
890+
// Do the alignment check if XNACK support is enabled.
891+
if (!Subtarget->isXNACKEnabled())
892+
return false;
893+
883894
// Returns true if it is an under aligned multi-dword load.
884895
LoadSDNode *Ld = cast<LoadSDNode>(N);
885896
unsigned Size = Ld->getMemoryVT().getStoreSize();
886-
return (Size > 4) && (Ld->getAlign().value() < PowerOf2Ceil(Size));
897+
return Size > 4 && (Ld->getAlign().value() < Size);
887898
}]> {
888899
let GISelPredicateCode = [{
889-
auto &Ld = cast<GLoad>(MI);
890-
TypeSize Size = Ld.getMMO().getSize().getValue();
891-
return (Size > 4) && (Ld.getMMO().getAlign().value() < PowerOf2Ceil(Size));
900+
if (!Subtarget->isXNACKEnabled())
901+
return false;
902+
903+
auto &Ld = cast<GLoad>(MI);
904+
TypeSize Size = Ld.getMMO().getSize().getValue();
905+
return Size > 4 && (Ld.getMMO().getAlign().value() < Size);
892906
}];
893907
}
894908

895-
def alignedmultidwordload : SMRDAlignedLoadPat<smrd_load>;
896-
def unalignedmultidwordload : SMRDUnalignedLoadPat<smrd_load>;
897-
898-
multiclass SMRD_Align_Pattern <string Instr, ValueType vt> {
899-
900-
// 1. IMM offset
901-
def : GCNPat <
902-
(alignedmultidwordload (SMRDImm i64:$sbase, i32:$offset)),
903-
(vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))> {
904-
let OtherPredicates = [isGFX8Plus];
905-
}
906-
def : GCNPat <
907-
(unalignedmultidwordload (SMRDImm i64:$sbase, i32:$offset)),
908-
(vt (!cast<SM_Pseudo>(Instr#"_IMM_ec") $sbase, $offset, 0))> {
909-
let OtherPredicates = [isGFX8Plus];
910-
}
911-
912-
// 2. SGPR offset
913-
def : GCNPat <
914-
(alignedmultidwordload (SMRDSgpr i64:$sbase, i32:$soffset)),
915-
(vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $soffset, 0))> {
916-
let OtherPredicates = [isGFX8Only];
917-
}
918-
def : GCNPat <
919-
(unalignedmultidwordload (SMRDSgpr i64:$sbase, i32:$soffset)),
920-
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_ec") $sbase, $soffset, 0))> {
921-
let OtherPredicates = [isGFX8Only];
922-
}
923-
def : GCNPat <
924-
(alignedmultidwordload (SMRDSgpr i64:$sbase, i32:$soffset)),
925-
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, 0, 0))> {
926-
let OtherPredicates = [isGFX9Plus];
927-
}
928-
def : GCNPat <
929-
(unalignedmultidwordload (SMRDSgpr i64:$sbase, i32:$soffset)),
930-
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM_ec") $sbase, $soffset, 0, 0))> {
931-
let OtherPredicates = [isGFX9Plus];
932-
}
933-
934-
// 3. SGPR+IMM offset
935-
def : GCNPat <
936-
(alignedmultidwordload (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)),
937-
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, $offset, 0))> {
938-
let OtherPredicates = [isGFX9Plus];
939-
}
940-
def : GCNPat <
941-
(unalignedmultidwordload (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)),
942-
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM_ec") $sbase, $soffset, $offset, 0))> {
943-
let OtherPredicates = [isGFX9Plus];
944-
}
945-
946-
// 4. No offset
947-
def : GCNPat <
948-
(vt (alignedmultidwordload (i64 SReg_64:$sbase))),
949-
(vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0))> {
950-
let OtherPredicates = [isGFX8Plus];
951-
}
952-
def : GCNPat <
953-
(vt (unalignedmultidwordload (i64 SReg_64:$sbase))),
954-
(vt (!cast<SM_Pseudo>(Instr#"_IMM_ec") i64:$sbase, 0, 0))> {
955-
let OtherPredicates = [isGFX8Plus];
956-
}
957-
}
909+
def aligned_smrd_load : SMRDAlignedLoadPat<smrd_load>;
910+
def unaligned_smrd_load : SMRDUnalignedLoadPat<smrd_load>;
958911

959912
multiclass SMRD_Pattern <string Instr, ValueType vt, bit immci = true> {
960913

961914
// 1. IMM offset
962915
def : GCNPat <
963-
(smrd_load (SMRDImm i64:$sbase, i32:$offset)),
964-
(vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))> {
965-
let OtherPredicates = [isGFX6GFX7];
966-
}
916+
(aligned_smrd_load (SMRDImm i64:$sbase, i32:$offset)),
917+
(vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))
918+
>;
919+
if !gt(vt.Size, 32) then
920+
def : GCNPat <
921+
(unaligned_smrd_load (SMRDImm i64:$sbase, i32:$offset)),
922+
(vt (!cast<SM_Pseudo>(Instr#"_IMM_ec") $sbase, $offset, 0))
923+
>;
967924

968925
// 2. 32-bit IMM offset on CI
969926
if immci then def : GCNPat <
@@ -974,19 +931,49 @@ multiclass SMRD_Pattern <string Instr, ValueType vt, bit immci = true> {
974931

975932
// 3. SGPR offset
976933
def : GCNPat <
977-
(smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)),
934+
(aligned_smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)),
978935
(vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $soffset, 0))> {
979-
let OtherPredicates = [isGFX6GFX7];
936+
let OtherPredicates = [isNotGFX9Plus];
980937
}
981-
982-
// 4. No offset
983938
def : GCNPat <
984-
(vt (smrd_load (i64 SReg_64:$sbase))),
985-
(vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0))> {
986-
let OtherPredicates = [isGFX6GFX7];
939+
(aligned_smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)),
940+
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, 0, 0))> {
941+
let OtherPredicates = [isGFX9Plus];
942+
}
943+
if !gt(vt.Size, 32) then {
944+
def : GCNPat <
945+
(unaligned_smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)),
946+
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_ec") $sbase, $soffset, 0))> {
947+
let OtherPredicates = [isNotGFX9Plus];
948+
}
949+
def : GCNPat <
950+
(unaligned_smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)),
951+
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM_ec") $sbase, $soffset, 0, 0))> {
952+
let OtherPredicates = [isGFX9Plus];
953+
}
987954
}
988955

989-
defm : SMRD_Align_Pattern<Instr, vt>;
956+
// 4. SGPR+IMM offset
957+
def : GCNPat <
958+
(aligned_smrd_load (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)),
959+
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, $offset, 0))> {
960+
let OtherPredicates = [isGFX9Plus];
961+
}
962+
if !gt(vt.Size, 32) then
963+
def : GCNPat <
964+
(unaligned_smrd_load (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)),
965+
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM_ec") $sbase, $soffset, $offset, 0))> {
966+
let OtherPredicates = [isGFX9Plus];
967+
}
968+
969+
// 5. No offset
970+
def : GCNPat <
971+
(vt (aligned_smrd_load (i64 SReg_64:$sbase))),
972+
(vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0))>;
973+
if !gt(vt.Size, 32) then
974+
def : GCNPat <
975+
(vt (unaligned_smrd_load (i64 SReg_64:$sbase))),
976+
(vt (!cast<SM_Pseudo>(Instr#"_IMM_ec") i64:$sbase, 0, 0))>;
990977
}
991978

992979
multiclass SMLoad_Pattern <string Instr, ValueType vt, bit immci = true> {

llvm/test/CodeGen/AMDGPU/GlobalISel/addsubu64.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@ define amdgpu_kernel void @s_add_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
77
; GFX11: ; %bb.0: ; %entry
88
; GFX11-NEXT: s_clause 0x1
99
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
10-
; GFX11-NEXT: s_load_b64 s[2:3], s[0:1], 0x34
10+
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
1111
; GFX11-NEXT: v_mov_b32_e32 v2, 0
1212
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
13-
; GFX11-NEXT: s_add_u32 s0, s6, s2
14-
; GFX11-NEXT: s_addc_u32 s1, s7, s3
13+
; GFX11-NEXT: s_add_u32 s0, s6, s0
14+
; GFX11-NEXT: s_addc_u32 s1, s7, s1
1515
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1616
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1717
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
@@ -23,10 +23,10 @@ define amdgpu_kernel void @s_add_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
2323
; GFX12: ; %bb.0: ; %entry
2424
; GFX12-NEXT: s_clause 0x1
2525
; GFX12-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
26-
; GFX12-NEXT: s_load_b64 s[2:3], s[0:1], 0x34
26+
; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
2727
; GFX12-NEXT: v_mov_b32_e32 v2, 0
2828
; GFX12-NEXT: s_wait_kmcnt 0x0
29-
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[6:7], s[2:3]
29+
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[6:7], s[0:1]
3030
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3131
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
3232
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[4:5]
@@ -59,11 +59,11 @@ define amdgpu_kernel void @s_sub_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
5959
; GFX11: ; %bb.0: ; %entry
6060
; GFX11-NEXT: s_clause 0x1
6161
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
62-
; GFX11-NEXT: s_load_b64 s[2:3], s[0:1], 0x34
62+
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
6363
; GFX11-NEXT: v_mov_b32_e32 v2, 0
6464
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
65-
; GFX11-NEXT: s_sub_u32 s0, s6, s2
66-
; GFX11-NEXT: s_subb_u32 s1, s7, s3
65+
; GFX11-NEXT: s_sub_u32 s0, s6, s0
66+
; GFX11-NEXT: s_subb_u32 s1, s7, s1
6767
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
6868
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
6969
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
@@ -75,10 +75,10 @@ define amdgpu_kernel void @s_sub_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
7575
; GFX12: ; %bb.0: ; %entry
7676
; GFX12-NEXT: s_clause 0x1
7777
; GFX12-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
78-
; GFX12-NEXT: s_load_b64 s[2:3], s[0:1], 0x34
78+
; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
7979
; GFX12-NEXT: v_mov_b32_e32 v2, 0
8080
; GFX12-NEXT: s_wait_kmcnt 0x0
81-
; GFX12-NEXT: s_sub_nc_u64 s[0:1], s[6:7], s[2:3]
81+
; GFX12-NEXT: s_sub_nc_u64 s[0:1], s[6:7], s[0:1]
8282
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
8383
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
8484
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[4:5]

llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,9 +131,9 @@ define amdgpu_kernel void @brcond_sgpr_trunc_and(i32 %cond0, i32 %cond1) {
131131
;
132132
; WAVE32-LABEL: brcond_sgpr_trunc_and:
133133
; WAVE32: ; %bb.0: ; %entry
134-
; WAVE32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
134+
; WAVE32-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
135135
; WAVE32-NEXT: s_waitcnt lgkmcnt(0)
136-
; WAVE32-NEXT: s_and_b32 s0, s2, s3
136+
; WAVE32-NEXT: s_and_b32 s0, s0, s1
137137
; WAVE32-NEXT: s_xor_b32 s0, s0, 1
138138
; WAVE32-NEXT: s_and_b32 s0, s0, 1
139139
; WAVE32-NEXT: s_cmp_lg_u32 s0, 0

llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1401,20 +1401,20 @@ define amdgpu_kernel void @cvt_ubyte0_or_multiuse(ptr addrspace(1) %in, ptr addr
14011401
;
14021402
; VI-LABEL: cvt_ubyte0_or_multiuse:
14031403
; VI: ; %bb.0: ; %bb
1404-
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
1404+
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
14051405
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
14061406
; VI-NEXT: s_waitcnt lgkmcnt(0)
1407-
; VI-NEXT: v_mov_b32_e32 v0, s4
1408-
; VI-NEXT: v_mov_b32_e32 v1, s5
1407+
; VI-NEXT: v_mov_b32_e32 v0, s0
1408+
; VI-NEXT: v_mov_b32_e32 v1, s1
14091409
; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v2
14101410
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
14111411
; VI-NEXT: flat_load_dword v0, v[0:1]
14121412
; VI-NEXT: s_waitcnt vmcnt(0)
14131413
; VI-NEXT: v_or_b32_e32 v0, 0x80000001, v0
14141414
; VI-NEXT: v_cvt_f32_ubyte0_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
14151415
; VI-NEXT: v_add_f32_e32 v2, v0, v1
1416-
; VI-NEXT: v_mov_b32_e32 v0, s6
1417-
; VI-NEXT: v_mov_b32_e32 v1, s7
1416+
; VI-NEXT: v_mov_b32_e32 v0, s2
1417+
; VI-NEXT: v_mov_b32_e32 v1, s3
14181418
; VI-NEXT: flat_store_dword v[0:1], v2
14191419
; VI-NEXT: s_endpgm
14201420
bb:

0 commit comments

Comments
 (0)