-
Notifications
You must be signed in to change notification settings - Fork 13.6k
AMDGPU/GlobalISel: add RegBankLegalize rules for select #132384
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-amdgpu Author: Petar Avramovic (petar-avramovic) ChangesUniform condition S1 is AnyExtended to S32 and high bits are cleaned using Patch is 145.66 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/132384.diff 4 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index 7301cba9e8ed3..0f5f3545ac8eb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -243,6 +243,22 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
MI.eraseFromParent();
break;
}
+ case SplitTo32Sel: {
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst) == V4S16 ? V2S16 : S32;
+ auto Op2 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(2).getReg());
+ auto Op3 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(3).getReg());
+ Register Cond = MI.getOperand(1).getReg();
+ auto Flags = MI.getFlags();
+ auto ResLo =
+ B.buildSelect({VgprRB, Ty}, Cond, Op2.getReg(0), Op3.getReg(0), Flags);
+ auto ResHi =
+ B.buildSelect({VgprRB, Ty}, Cond, Op2.getReg(1), Op3.getReg(1), Flags);
+
+ B.buildMergeLikeInstr(Dst, {ResLo, ResHi});
+ MI.eraseFromParent();
+ break;
+ }
case Div_BFE: {
Register Dst = MI.getOperand(0).getReg();
assert(MRI.getType(Dst) == LLT::scalar(64));
@@ -453,7 +469,8 @@ LLT RegBankLegalizeHelper::getBTyFromID(RegBankLLTMappingApplyID ID, LLT Ty) {
case UniInVgprB64:
if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) ||
Ty == LLT::fixed_vector(4, 16) || Ty == LLT::pointer(0, 64) ||
- Ty == LLT::pointer(1, 64) || Ty == LLT::pointer(4, 64))
+ Ty == LLT::pointer(1, 64) || Ty == LLT::pointer(4, 64) ||
+ Ty == LLT::pointer(999, 64))
return Ty;
return LLT();
case SgprB96:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index b4ef4ecc3fe28..96b0a7d634f7e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -198,7 +198,7 @@ UniformityLLTOpPredicateID LLTToBId(LLT Ty) {
return B32;
if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) ||
Ty == LLT::fixed_vector(4, 16) || Ty == LLT::pointer(1, 64) ||
- Ty == LLT::pointer(4, 64))
+ Ty == LLT::pointer(4, 64) || Ty == LLT::pointer(999, 64))
return B64;
if (Ty == LLT::fixed_vector(3, 32))
return B96;
@@ -485,8 +485,12 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
addRulesForGOpcs({G_BR}).Any({{_}, {{}, {None}}});
addRulesForGOpcs({G_SELECT}, StandardB)
+ .Any({{DivS16}, {{Vgpr16}, {Vcc, Vgpr16, Vgpr16}}})
+ .Any({{UniS16}, {{Sgpr16}, {Sgpr32AExtBoolInReg, Sgpr16, Sgpr16}}})
.Div(B32, {{VgprB32}, {Vcc, VgprB32, VgprB32}})
- .Uni(B32, {{SgprB32}, {Sgpr32AExtBoolInReg, SgprB32, SgprB32}});
+ .Uni(B32, {{SgprB32}, {Sgpr32AExtBoolInReg, SgprB32, SgprB32}})
+ .Div(B64, {{VgprB64}, {Vcc, VgprB64, VgprB64}, SplitTo32Sel})
+ .Uni(B64, {{SgprB64}, {Sgpr32AExtBoolInReg, SgprB64, SgprB64}});
addRulesForGOpcs({G_ANYEXT})
.Any({{UniS16, S1}, {{None}, {None}}}) // should be combined away
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
index cdf70d99d4a9e..058e58c1a94ce 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
@@ -177,6 +177,7 @@ enum LoweringMethodID {
Div_BFE,
VgprToVccCopy,
SplitTo32,
+ SplitTo32Sel,
Ext32To64,
UniCstExt,
SplitLoad,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-select.mir
index 810724dab685d..762f7b9500367 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-select.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-select.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -global-isel %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck -check-prefix=FAST %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -global-isel %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck -check-prefix=GREEDY %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - | FileCheck %s
---
name: select_s32_scc_ss
@@ -8,29 +7,17 @@ legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
- ; FAST-LABEL: name: select_s32_scc_ss
- ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
- ; FAST-NEXT: {{ $}}
- ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
- ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
- ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
- ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1)
- ; FAST-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT]](s32), [[COPY2]], [[COPY3]]
- ;
- ; GREEDY-LABEL: name: select_s32_scc_ss
- ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
- ; GREEDY-NEXT: {{ $}}
- ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
- ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
- ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
- ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1)
- ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT]](s32), [[COPY2]], [[COPY3]]
+ ; CHECK-LABEL: name: select_s32_scc_ss
+ ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ICMP]], [[C]]
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[COPY2]], [[COPY3]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $sgpr2
@@ -45,31 +32,17 @@ legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0
- ; FAST-LABEL: name: select_s32_scc_sv
- ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0
- ; FAST-NEXT: {{ $}}
- ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
- ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
- ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
- ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
- ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
- ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY3]]
- ;
- ; GREEDY-LABEL: name: select_s32_scc_sv
- ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0
- ; GREEDY-NEXT: {{ $}}
- ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
- ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
- ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
- ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
- ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
- ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY3]]
+ ; CHECK-LABEL: name: select_s32_scc_sv
+ ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
+ ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[ICMP]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[AMDGPU_COPY_VCC_SCC]](s1), [[COPY4]], [[COPY3]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $sgpr2
@@ -85,31 +58,17 @@ legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0
- ; FAST-LABEL: name: select_s32_scc_vs
- ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0
- ; FAST-NEXT: {{ $}}
- ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
- ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
- ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
- ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
- ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
- ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY3]], [[COPY5]]
- ;
- ; GREEDY-LABEL: name: select_s32_scc_vs
- ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0
- ; GREEDY-NEXT: {{ $}}
- ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
- ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
- ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
- ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
- ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
- ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY3]], [[COPY5]]
+ ; CHECK-LABEL: name: select_s32_scc_vs
+ ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
+ ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[ICMP]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[AMDGPU_COPY_VCC_SCC]](s1), [[COPY3]], [[COPY4]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $sgpr2
@@ -124,29 +83,16 @@ legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
- ; FAST-LABEL: name: select_s32_scc_vv
- ; FAST: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
- ; FAST-NEXT: {{ $}}
- ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
- ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
- ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
- ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
- ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY2]], [[COPY3]]
- ;
- ; GREEDY-LABEL: name: select_s32_scc_vv
- ; GREEDY: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
- ; GREEDY-NEXT: {{ $}}
- ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
- ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
- ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
- ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
- ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY2]], [[COPY3]]
+ ; CHECK-LABEL: name: select_s32_scc_vv
+ ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
+ ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[ICMP]](s32)
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[AMDGPU_COPY_VCC_SCC]](s1), [[COPY2]], [[COPY3]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $vgpr0
@@ -161,29 +107,17 @@ legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
- ; FAST-LABEL: name: select_s32_vcc_ss
- ; FAST: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
- ; FAST-NEXT: {{ $}}
- ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
- ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[COPY3]]
- ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[COPY5]]
- ;
- ; GREEDY-LABEL: name: select_s32_vcc_ss
- ; GREEDY: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
- ; GREEDY-NEXT: {{ $}}
- ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
- ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[COPY3]]
- ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[COPY5]]
+ ; CHECK-LABEL: name: select_s32_vcc_ss
+ ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[COPY3]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[COPY5]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $vgpr0
@@ -198,27 +132,16 @@ legalized: true
body: |
bb.0:
liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2
- ; FAST-LABEL: name: select_s32_vcc_sv
- ; FAST: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2
- ; FAST-NEXT: {{ $}}
- ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
- ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]]
- ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[COPY3]]
- ;
- ; GREEDY-LABEL: name: select_s32_vcc_sv
- ; GREEDY: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2
- ; GREEDY-NEXT: {{ $}}
- ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
- ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]]
- ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[COPY3]]
+ ; CHECK-LABEL: name: select_s32_vcc_sv
+ ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[COPY3]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s32) = COPY $vgpr1
@@ -233,27 +156,16 @@ legalized: true
body: |
bb.0:
liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2
- ; FAST-LABEL: name: select_s32_vcc_vs
- ; FAST: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2
- ; FAST-NEXT: {{ $}}
- ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
- ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]]
- ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY3]], [[COPY4]]
- ;
- ; GREEDY-LABEL: name: select_s32_vcc_vs
- ; GREEDY: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2
- ; GREEDY-NEXT: {{ $}}
- ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
- ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]]
- ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY3]], [[COPY4]]
+ ; CHECK-LABEL: name: select_s32_vcc_vs
+ ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY3]], [[COPY4]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s32) = COPY $vgpr1
@@ -268,25 +180,15 @@ legalized: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; FAST-LABEL: name: select_s32_vcc_vv
- ; FAST: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; FAST-NEXT: {{ $}}
- ; FAST-NEXT: [[COPY:%[0...
[truncated]
|
2ac46e4
to
fdca6f3
Compare
6560c53
to
6d31707
Compare
fdca6f3
to
55cb190
Compare
6d31707
to
58d5945
Compare
@@ -492,7 +511,8 @@ LLT RegBankLegalizeHelper::getBTyFromID(RegBankLLTMappingApplyID ID, LLT Ty) { | |||
case UniInVgprB64: | |||
if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) || | |||
Ty == LLT::fixed_vector(4, 16) || Ty == LLT::pointer(0, 64) || | |||
Ty == LLT::pointer(1, 64) || Ty == LLT::pointer(4, 64)) | |||
Ty == LLT::pointer(1, 64) || Ty == LLT::pointer(4, 64) || | |||
Ty == LLT::pointer(999, 64)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think we should have special case handling for address space 999. The tests using that AS surely just use it to represent "any other address space"?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't know much about the topic, would all pointer types with addr space greater then 6 all be accepted and be 64 bits?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
55cb190
to
c57e522
Compare
58d5945
to
2bb763f
Compare
2bb763f
to
ea57c82
Compare
c57e522
to
61c28d2
Compare
@@ -485,7 +504,8 @@ LLT RegBankLegalizeHelper::getBTyFromID(RegBankLLTMappingApplyID ID, LLT Ty) { | |||
case UniInVgprB64: | |||
if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) || | |||
Ty == LLT::fixed_vector(4, 16) || Ty == LLT::pointer(0, 64) || | |||
Ty == LLT::pointer(1, 64) || Ty == LLT::pointer(4, 64)) | |||
Ty == LLT::pointer(1, 64) || Ty == LLT::pointer(4, 64) || | |||
(Ty.isPointer() && Ty.getAddressSpace() > AMDGPUAS::MAX_AMDGPU_ADDRESS)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What case is this trying to handle? Is it tested?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should try to recognize any unhandled address spaces as global aliases, there's a p999 test
@@ -198,7 +198,8 @@ UniformityLLTOpPredicateID LLTToBId(LLT Ty) { | |||
return B32; | |||
if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) || | |||
Ty == LLT::fixed_vector(4, 16) || Ty == LLT::pointer(1, 64) || | |||
Ty == LLT::pointer(4, 64)) | |||
Ty == LLT::pointer(4, 64) || | |||
(Ty.isPointer() && Ty.getAddressSpace() > AMDGPUAS::MAX_AMDGPU_ADDRESS)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
same question here
@@ -286,6 +287,22 @@ void RegBankLegalizeHelper::lowerSplitTo32(MachineInstr &MI) { | |||
MI.eraseFromParent(); | |||
} | |||
|
|||
void RegBankLegalizeHelper::lowerSplitTo32Sel(MachineInstr &MI) { | |||
Register Dst = MI.getOperand(0).getReg(); | |||
LLT Ty = MRI.getType(Dst) == V4S16 ? V2S16 : S32; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LLT Ty = MRI.getType(Dst) == V4S16 ? V2S16 : S32; | |
LLT Ty = (MRI.getType(Dst) == V4S16 ? V2S16 : S32); |
nit for clarity
- if you expect the type to be something else when it isn't
V4S16
, add an assert?
; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] | ||
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 | ||
; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ICMP]], [[C]] | ||
; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(p999) = G_SELECT [[AND]](s32), [[COPY2]], [[COPY3]] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Already existing test. As suggested by Fabian, this is check for any other address space not p999 specifically.
ea57c82
to
da0dd13
Compare
61c28d2
to
915bee3
Compare
915bee3
to
649adb1
Compare
da0dd13
to
2c7c01d
Compare
LLT DstTy = MRI.getType(Dst); | ||
assert(DstTy == V4S16 || DstTy == V2S32 || DstTy == S64 || | ||
(DstTy.isPointer() && DstTy.getSizeInBits() == 64)); | ||
LLT Ty = (DstTy == V4S16 ? V2S16 : S32); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LLT Ty = (DstTy == V4S16 ? V2S16 : S32); | |
LLT Ty = DstTy == V4S16 ? V2S16 : S32; |
@@ -286,6 +287,25 @@ void RegBankLegalizeHelper::lowerSplitTo32(MachineInstr &MI) { | |||
MI.eraseFromParent(); | |||
} | |||
|
|||
void RegBankLegalizeHelper::lowerSplitTo32Sel(MachineInstr &MI) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Spell out Select
@@ -485,7 +504,8 @@ LLT RegBankLegalizeHelper::getBTyFromID(RegBankLLTMappingApplyID ID, LLT Ty) { | |||
case UniInVgprB64: | |||
if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) || | |||
Ty == LLT::fixed_vector(4, 16) || Ty == LLT::pointer(0, 64) || | |||
Ty == LLT::pointer(1, 64) || Ty == LLT::pointer(4, 64)) | |||
Ty == LLT::pointer(1, 64) || Ty == LLT::pointer(4, 64) || | |||
(Ty.isPointer() && Ty.getAddressSpace() > AMDGPUAS::MAX_AMDGPU_ADDRESS)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should try to recognize any unhandled address spaces as global aliases, there's a p999 test
@@ -372,6 +392,8 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI, | |||
return lowerS_BFE(MI); | |||
case SplitTo32: | |||
return lowerSplitTo32(MI); | |||
case SplitTo32Sel: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Spell out select
649adb1
to
ffc8507
Compare
2c7c01d
to
a2bb5ea
Compare
a2bb5ea
to
b550564
Compare
5083994
to
9716154
Compare
b550564
to
9990bbd
Compare
Merge activity
|
9716154
to
dc8b75b
Compare
Uniform condition S1 is AnyExtended to S32 and high bits are cleaned using AND with 1. Divergent S1 uses VCC. Using B32/B64 rules to cover scalars vector and pointer types. Divergent B64 is split to S32.
9990bbd
to
68528d5
Compare
Uniform condition S1 is AnyExtended to S32 and high bits are cleaned using
AND with 1. Divergent S1 uses VCC.
Using B32/B64 rules to cover scalars vector and pointer types.
Divergent B64 is split to S32.