-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[AMDGPU] Reduce duplication in FLAT atomic definitions #85383
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This simplifies the case where the tablegen name of the defm for the Real is the same as the name of the Pseudo.
@llvm/pr-subscribers-backend-amdgpu Author: Jay Foad (jayfoad) ChangesThis simplifies the case where the tablegen name of the defm for the Full diff: https://github.com/llvm/llvm-project/pull/85383.diff 1 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index f42d4ae416bd76..3ed74354242664 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -1787,45 +1787,46 @@ def FLAT_STORE_DWORDX2_ci : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>;
def FLAT_STORE_DWORDX4_ci : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>;
def FLAT_STORE_DWORDX3_ci : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>;
-multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> {
+multiclass FLAT_Real_Atomics_ci <bits<7> op> {
+ defvar ps = !cast<FLAT_Pseudo>(NAME);
def _ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
}
-defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>;
-defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>;
-defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>;
-defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>;
-defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>;
-defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>;
-defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>;
-defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>;
-defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>;
-defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>;
-defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>;
-defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>;
-defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>;
-defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>;
-defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>;
-defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>;
-defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>;
-defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>;
-defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>;
-defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>;
-defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>;
-defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>;
-defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>;
-defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>;
-defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>;
-defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>;
+defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_ci <0x30>;
+defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_ci <0x31>;
+defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_ci <0x32>;
+defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_ci <0x33>;
+defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_ci <0x35>;
+defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_ci <0x36>;
+defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_ci <0x37>;
+defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_ci <0x38>;
+defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_ci <0x39>;
+defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_ci <0x3a>;
+defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_ci <0x3b>;
+defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_ci <0x3c>;
+defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_ci <0x3d>;
+defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_ci <0x50>;
+defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_ci <0x51>;
+defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_ci <0x52>;
+defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_ci <0x53>;
+defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_ci <0x55>;
+defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_ci <0x56>;
+defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_ci <0x57>;
+defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_ci <0x58>;
+defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_ci <0x59>;
+defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_ci <0x5a>;
+defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_ci <0x5b>;
+defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_ci <0x5c>;
+defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_ci <0x5d>;
// CI Only flat instructions
-defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>;
-defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>;
-defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>;
-defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>;
-defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>;
-defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>;
+defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_ci <0x3e>;
+defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_ci <0x3f>;
+defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_ci <0x40>;
+defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_ci <0x5e>;
+defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_ci <0x5f>;
+defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_ci <0x60>;
//===----------------------------------------------------------------------===//
@@ -1925,8 +1926,9 @@ def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>;
def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
-multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps,
+multiclass FLAT_Real_Atomics_vi <bits<7> op,
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
+ defvar ps = !cast<FLAT_Pseudo>(NAME);
def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>;
}
@@ -1939,32 +1941,32 @@ multiclass FLAT_Global_Real_Atomics_vi<bits<7> op,
}
-defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>;
-defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>;
-defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>;
-defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>;
-defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>;
-defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>;
-defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>;
-defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>;
-defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>;
-defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>;
-defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>;
-defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>;
-defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>;
-defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>;
-defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>;
-defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>;
-defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>;
-defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>;
-defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>;
-defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>;
-defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>;
-defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>;
-defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>;
-defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>;
-defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>;
-defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>;
+defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40>;
+defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_vi <0x41>;
+defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_vi <0x42>;
+defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_vi <0x43>;
+defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_vi <0x44>;
+defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_vi <0x45>;
+defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_vi <0x46>;
+defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_vi <0x47>;
+defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_vi <0x48>;
+defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_vi <0x49>;
+defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_vi <0x4a>;
+defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_vi <0x4b>;
+defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_vi <0x4c>;
+defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_vi <0x60>;
+defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61>;
+defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_vi <0x62>;
+defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_vi <0x63>;
+defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_vi <0x64>;
+defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_vi <0x65>;
+defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_vi <0x66>;
+defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_vi <0x67>;
+defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_vi <0x68>;
+defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_vi <0x69>;
+defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_vi <0x6a>;
+defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_vi <0x6b>;
+defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_vi <0x6c>;
defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>;
defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>;
@@ -2060,9 +2062,9 @@ let SubtargetPredicate = isGFX8GFX9NotGFX940 in {
}
let SubtargetPredicate = isGFX90AOnly in {
- defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_vi<0x4f, FLAT_ATOMIC_ADD_F64, 0>;
- defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_vi<0x50, FLAT_ATOMIC_MIN_F64, 0>;
- defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_vi<0x51, FLAT_ATOMIC_MAX_F64, 0>;
+ defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_vi<0x4f, 0>;
+ defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_vi<0x50, 0>;
+ defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_vi<0x51, 0>;
defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_vi<0x4f, 0>;
defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_vi<0x50, 0>;
defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_vi<0x51, 0>;
@@ -2073,7 +2075,8 @@ multiclass FLAT_Real_AllAddr_gfx940<bits<7> op> {
def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
}
-multiclass FLAT_Real_Atomics_gfx940 <bits<7> op, FLAT_Pseudo ps> {
+multiclass FLAT_Real_Atomics_gfx940 <bits<7> op> {
+ defvar ps = !cast<FLAT_Pseudo>(NAME);
def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
def _RTN_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
}
@@ -2089,15 +2092,15 @@ let SubtargetPredicate = isGFX940Plus in {
defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_gfx940 <0x04d>;
defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_gfx940 <0x04e>;
- defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_gfx940<0x4f, FLAT_ATOMIC_ADD_F64>;
- defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_gfx940<0x50, FLAT_ATOMIC_MIN_F64>;
- defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_gfx940<0x51, FLAT_ATOMIC_MAX_F64>;
+ defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_gfx940<0x4f>;
+ defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_gfx940<0x50>;
+ defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_gfx940<0x51>;
defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx940<0x4f>;
defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx940<0x50>;
defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx940<0x51>;
- defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi<0x4d, FLAT_ATOMIC_ADD_F32>;
- defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi<0x4e, FLAT_ATOMIC_PK_ADD_F16>;
- defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi<0x52, FLAT_ATOMIC_PK_ADD_BF16>;
+ defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi<0x4d>;
+ defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi<0x4e>;
+ defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi<0x52>;
defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>;
} // End SubtargetPredicate = isGFX940Plus
|
kosarev
approved these changes
Mar 15, 2024
arsenm
approved these changes
Mar 15, 2024
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
This simplifies the case where the tablegen name of the defm for the
Real is the same as the name of the Pseudo.