Skip to content

Commit 6dee075

Browse files
committed
Used OtherPredicates to simplify the PatFrag and to optimize the patterns.
1 parent 7dac5ee commit 6dee075

File tree

8 files changed

+67
-101
lines changed

8 files changed

+67
-101
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2026,6 +2026,8 @@ def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarry()">,
20262026

20272027
def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarry()">;
20282028

2029+
def HasXNACKEnabled : Predicate<"Subtarget->isXNACKEnabled()">;
2030+
20292031
def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">,
20302032
AssemblerPredicate<(all_of Feature16BitInsts)>;
20312033

llvm/lib/Target/AMDGPU/SMInstructions.td

Lines changed: 48 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -867,19 +867,12 @@ def SMRDBufferImm32 : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm32">;
867867
def SMRDBufferSgprImm : ComplexPattern<iPTR, 2, "SelectSMRDBufferSgprImm">;
868868

869869
class SMRDAlignedLoadPat<PatFrag Op> : PatFrag <(ops node:$ptr), (Op node:$ptr), [{
870-
// Ignore the alignment check if XNACK support is disabled.
871-
if (!Subtarget->isXNACKEnabled())
872-
return true;
873-
874870
// Returns true if it is a single dword load or naturally aligned multi-dword load.
875871
LoadSDNode *Ld = cast<LoadSDNode>(N);
876872
unsigned Size = Ld->getMemoryVT().getStoreSize();
877873
return Size <= 4 || Ld->getAlign().value() >= Size;
878874
}]> {
879875
let GISelPredicateCode = [{
880-
if (!Subtarget->isXNACKEnabled())
881-
return true;
882-
883876
auto &Ld = cast<GLoad>(MI);
884877
TypeSize Size = Ld.getMMO().getSize().getValue();
885878
return Size <= 4 || Ld.getMMO().getAlign().value() >= Size;
@@ -888,79 +881,59 @@ class SMRDAlignedLoadPat<PatFrag Op> : PatFrag <(ops node:$ptr), (Op node:$ptr),
888881

889882
def aligned_smrd_load : SMRDAlignedLoadPat<smrd_load>;
890883

891-
multiclass SMRD_Pattern <string Instr, ValueType vt, bit immci = true> {
884+
multiclass SMRD_Patterns <string Instr, ValueType vt, PatFrag frag,
885+
bit immci = true, string suffix = ""> {
886+
// 1. IMM offset
887+
def : GCNPat <
888+
(frag (SMRDImm i64:$sbase, i32:$offset)),
889+
(vt (!cast<SM_Pseudo>(Instr#"_IMM"#suffix) $sbase, $offset, 0))>;
890+
891+
// 2. 32-bit IMM offset on CI
892+
if immci then def : GCNPat <
893+
(frag (SMRDImm32 i64:$sbase, i32:$offset)),
894+
(vt (!cast<InstSI>(Instr#"_IMM_ci"#suffix) $sbase, $offset, 0))> {
895+
let SubtargetPredicate = isGFX7Only;
896+
}
897+
898+
// 3. SGPR offset
899+
def : GCNPat <
900+
(frag (SMRDSgpr i64:$sbase, i32:$soffset)),
901+
(vt (!cast<SM_Pseudo>(Instr#"_SGPR"#suffix) $sbase, $soffset, 0))> {
902+
let SubtargetPredicate = isNotGFX9Plus;
903+
}
904+
def : GCNPat <
905+
(frag (SMRDSgpr i64:$sbase, i32:$soffset)),
906+
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM"#suffix) $sbase, $soffset, 0, 0))> {
907+
let SubtargetPredicate = isGFX9Plus;
908+
}
892909

893-
let AddedComplexity = 101 in {
894-
// 1. IMM offset
895-
def : GCNPat <
896-
(aligned_smrd_load (SMRDImm i64:$sbase, i32:$offset)),
897-
(vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))>;
898-
899-
// 2. 32-bit IMM offset on CI
900-
if immci then def : GCNPat <
901-
(smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
902-
(vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0))> {
903-
let SubtargetPredicate = isGFX7Only;
904-
}
905-
906-
// 3. SGPR offset
907-
def : GCNPat <
908-
(aligned_smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)),
909-
(vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $soffset, 0))> {
910-
let SubtargetPredicate = isNotGFX9Plus;
911-
}
912-
def : GCNPat <
913-
(aligned_smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)),
914-
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, 0, 0))> {
915-
let SubtargetPredicate = isGFX9Plus;
916-
}
917-
918-
// 4. SGPR+IMM offset
919-
def : GCNPat <
920-
(aligned_smrd_load (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)),
921-
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, $offset, 0))> {
922-
let SubtargetPredicate = isGFX9Plus;
923-
}
924-
925-
// 5. No offset
926-
def : GCNPat <
927-
(vt (aligned_smrd_load (i64 SReg_64:$sbase))),
928-
(vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0))>;
910+
// 4. SGPR+IMM offset
911+
def : GCNPat <
912+
(frag (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)),
913+
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM"#suffix) $sbase, $soffset, $offset, 0))> {
914+
let SubtargetPredicate = isGFX9Plus;
929915
}
930916

931-
// The constrained multi-dword load equivalents.
917+
// 5. No offset
918+
def : GCNPat <
919+
(vt (frag (i64 SReg_64:$sbase))),
920+
(vt (!cast<SM_Pseudo>(Instr#"_IMM"#suffix) i64:$sbase, 0, 0))>;
921+
}
922+
923+
multiclass SMRD_Pattern <string Instr, ValueType vt, bit immci = true> {
924+
// High priority when XNACK is enabled and the load was naturally aligned.
925+
let OtherPredicates = [HasXNACKEnabled], AddedComplexity = 102 in
926+
defm: SMRD_Patterns <Instr, vt, aligned_smrd_load, immci>;
927+
928+
// XNACK is enabled and the load wasn't naturally aligned. The constrained sload variant.
932929
if !gt(vt.Size, 32) then {
933-
let AddedComplexity = 100 in {
934-
// 1. IMM offset
935-
def : GCNPat <
936-
(smrd_load (SMRDImm i64:$sbase, i32:$offset)),
937-
(vt (!cast<SM_Pseudo>(Instr#"_IMM_ec") $sbase, $offset, 0))>;
938-
939-
// 2. SGPR offset
940-
def : GCNPat <
941-
(smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)),
942-
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_ec") $sbase, $soffset, 0))> {
943-
let SubtargetPredicate = isNotGFX9Plus;
944-
}
945-
def : GCNPat <
946-
(smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)),
947-
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM_ec") $sbase, $soffset, 0, 0))> {
948-
let SubtargetPredicate = isGFX9Plus;
949-
}
950-
951-
// 3. SGPR+IMM offset
952-
def : GCNPat <
953-
(smrd_load (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)),
954-
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM_ec") $sbase, $soffset, $offset, 0))> {
955-
let SubtargetPredicate = isGFX9Plus;
956-
}
957-
958-
// 4. No offset
959-
def : GCNPat <
960-
(vt (smrd_load (i64 SReg_64:$sbase))),
961-
(vt (!cast<SM_Pseudo>(Instr#"_IMM_ec") i64:$sbase, 0, 0))>;
962-
}
930+
let OtherPredicates = [HasXNACKEnabled], AddedComplexity = 101 in
931+
defm: SMRD_Patterns <Instr, vt, smrd_load, /*immci=*/false, /*suffix=*/"_ec">;
963932
}
933+
934+
// XNACK is disabled.
935+
let AddedComplexity = 100 in
936+
defm: SMRD_Patterns <Instr, vt, smrd_load, immci>;
964937
}
965938

966939
multiclass SMLoad_Pattern <string Instr, ValueType vt, bit immci = true> {

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1102,8 +1102,8 @@ body: |
11021102
; GFX7: liveins: $sgpr0_sgpr1
11031103
; GFX7-NEXT: {{ $}}
11041104
; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
1105-
; GFX7-NEXT: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 255, 0 :: (load (s32), addrspace 4)
1106-
; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM_ci]]
1105+
; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 255, 0 :: (load (s32), addrspace 4)
1106+
; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
11071107
;
11081108
; GFX8-LABEL: name: load_constant_s32_from_4_gep_1020
11091109
; GFX8: liveins: $sgpr0_sgpr1

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,11 @@ regBankSelected: true
2020
# GCN: [[PTR:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
2121

2222
# Immediate offset:
23-
# SI: S_LOAD_DWORD_IMM [[PTR]], 1, 0
24-
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1, 0
23+
# SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0
2524
# VI: S_LOAD_DWORD_IMM [[PTR]], 4, 0
2625

2726
# Max immediate offset for SI
28-
# SI: S_LOAD_DWORD_IMM [[PTR]], 255, 0
29-
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 255, 0
27+
# SICI: S_LOAD_DWORD_IMM [[PTR]], 255, 0
3028
# VI: S_LOAD_DWORD_IMM [[PTR]], 1020, 0
3129

3230
# Immediate overflow for SI
@@ -198,8 +196,7 @@ body: |
198196
# GCN: %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 44
199197

200198
# VI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 64, 0 :: (dereferenceable invariant load (s32), addrspace 4)
201-
# SI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0 :: (dereferenceable invariant load (s32), addrspace 4)
202-
# CI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci %0, 16, 0 :: (dereferenceable invariant load (s32), addrspace 4)
199+
# SICI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0 :: (dereferenceable invariant load (s32), addrspace 4)
203200

204201
---
205202

llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-atomics.ll

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -729,15 +729,14 @@ define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f32_off4_slc(<4 x i32> inre
729729
;
730730
; G_GFX7-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc:
731731
; G_GFX7: ; %bb.0: ; %main_body
732-
; G_GFX7-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd
733-
; G_GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
734-
; G_GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xf
732+
; G_GFX7-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
735733
; G_GFX7-NEXT: s_waitcnt lgkmcnt(0)
736-
; G_GFX7-NEXT: v_mov_b32_e32 v0, s2
737-
; G_GFX7-NEXT: v_mov_b32_e32 v1, s3
738-
; G_GFX7-NEXT: buffer_atomic_fmax v0, v1, s[4:7], 4 offen glc slc
734+
; G_GFX7-NEXT: v_mov_b32_e32 v0, s4
735+
; G_GFX7-NEXT: v_mov_b32_e32 v1, s5
736+
; G_GFX7-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 4 offen glc slc
739737
; G_GFX7-NEXT: s_mov_b32 s2, -1
740738
; G_GFX7-NEXT: s_mov_b32 s3, 0xf000
739+
; G_GFX7-NEXT: s_mov_b64 s[0:1], s[6:7]
741740
; G_GFX7-NEXT: s_waitcnt vmcnt(0)
742741
; G_GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
743742
; G_GFX7-NEXT: s_endpgm

llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-ptr-atomics.ll

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -658,15 +658,14 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_max_rtn_f32_off4_slc(ptr addrsp
658658
;
659659
; G_GFX7-LABEL: raw_ptr_buffer_atomic_max_rtn_f32_off4_slc:
660660
; G_GFX7: ; %bb.0: ; %main_body
661-
; G_GFX7-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd
662-
; G_GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
663-
; G_GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xf
661+
; G_GFX7-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
664662
; G_GFX7-NEXT: s_waitcnt lgkmcnt(0)
665-
; G_GFX7-NEXT: v_mov_b32_e32 v0, s2
666-
; G_GFX7-NEXT: v_mov_b32_e32 v1, s3
667-
; G_GFX7-NEXT: buffer_atomic_fmax v0, v1, s[4:7], 4 offen glc slc
663+
; G_GFX7-NEXT: v_mov_b32_e32 v0, s4
664+
; G_GFX7-NEXT: v_mov_b32_e32 v1, s5
665+
; G_GFX7-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 4 offen glc slc
668666
; G_GFX7-NEXT: s_mov_b32 s2, -1
669667
; G_GFX7-NEXT: s_mov_b32 s3, 0xf000
668+
; G_GFX7-NEXT: s_mov_b64 s[0:1], s[6:7]
670669
; G_GFX7-NEXT: s_waitcnt vmcnt(0)
671670
; G_GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
672671
; G_GFX7-NEXT: s_endpgm

llvm/test/CodeGen/AMDGPU/fp64-min-max-buffer-atomics.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -492,9 +492,7 @@ define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> inre
492492
;
493493
; G_GFX7-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
494494
; G_GFX7: ; %bb.0: ; %main_body
495-
; G_GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd
496-
; G_GFX7-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0xf
497-
; G_GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
495+
; G_GFX7-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
498496
; G_GFX7-NEXT: s_mov_b32 m0, -1
499497
; G_GFX7-NEXT: s_waitcnt lgkmcnt(0)
500498
; G_GFX7-NEXT: v_mov_b32_e32 v0, s4

llvm/test/CodeGen/AMDGPU/fp64-min-max-buffer-ptr-atomics.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -492,9 +492,7 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr addrsp
492492
;
493493
; G_GFX7-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc:
494494
; G_GFX7: ; %bb.0: ; %main_body
495-
; G_GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd
496-
; G_GFX7-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0xf
497-
; G_GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
495+
; G_GFX7-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
498496
; G_GFX7-NEXT: s_mov_b32 m0, -1
499497
; G_GFX7-NEXT: s_waitcnt lgkmcnt(0)
500498
; G_GFX7-NEXT: v_mov_b32_e32 v0, s4

0 commit comments

Comments
 (0)