Skip to content

AMDGPU/GlobalISel: add RegBankLegalize rules for bitfield extract #132381

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 109 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,13 @@
#include "AMDGPUGlobalISelUtils.h"
#include "AMDGPUInstrInfo.h"
#include "AMDGPURegisterBankInfo.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineUniformityAnalysis.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"

#define DEBUG_TYPE "amdgpu-regbanklegalize"

Expand All @@ -27,7 +31,8 @@ using namespace AMDGPU;
RegBankLegalizeHelper::RegBankLegalizeHelper(
MachineIRBuilder &B, const MachineUniformityInfo &MUI,
const RegisterBankInfo &RBI, const RegBankLegalizeRules &RBLRules)
: B(B), MRI(*B.getMRI()), MUI(MUI), RBI(RBI), RBLRules(RBLRules),
: ST(B.getMF().getSubtarget<GCNSubtarget>()), B(B), MRI(*B.getMRI()),
MUI(MUI), RBI(RBI), RBLRules(RBLRules),
SgprRB(&RBI.getRegBank(AMDGPU::SGPRRegBankID)),
VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)),
VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {}
Expand Down Expand Up @@ -127,6 +132,105 @@ void RegBankLegalizeHelper::widenLoad(MachineInstr &MI, LLT WideTy,
MI.eraseFromParent();
}

static bool isSignedBFE(MachineInstr &MI) {
if (GIntrinsic *GI = dyn_cast<GIntrinsic>(&MI))
return (GI->is(Intrinsic::amdgcn_sbfe));

return MI.getOpcode() == AMDGPU::G_SBFX;
}

void RegBankLegalizeHelper::lowerV_BFE(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
assert(MRI.getType(Dst) == LLT::scalar(64));
bool Signed = isSignedBFE(MI);
unsigned FirstOpnd = isa<GIntrinsic>(MI) ? 2 : 1;
// Extract bitfield from Src, LSBit is the least-significant bit for the
// extraction (field offset) and Width is size of bitfield.
Register Src = MI.getOperand(FirstOpnd).getReg();
Register LSBit = MI.getOperand(FirstOpnd + 1).getReg();
Register Width = MI.getOperand(FirstOpnd + 2).getReg();
// Comments are for signed bitfield extract, similar for unsigned. x is sign
// bit. s is sign, l is LSB and y are remaining bits of bitfield to extract.

// Src >> LSBit Hi|Lo: x?????syyyyyyl??? -> xxxx?????syyyyyyl
unsigned SHROpc = Signed ? AMDGPU::G_ASHR : AMDGPU::G_LSHR;
auto SHRSrc = B.buildInstr(SHROpc, {{VgprRB, S64}}, {Src, LSBit});

auto ConstWidth = getIConstantVRegValWithLookThrough(Width, MRI);

// Expand to Src >> LSBit << (64 - Width) >> (64 - Width)
// << (64 - Width): Hi|Lo: xxxx?????syyyyyyl -> syyyyyyl000000000
// >> (64 - Width): Hi|Lo: syyyyyyl000000000 -> ssssssssssyyyyyyl
if (!ConstWidth) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this functionally the same as the intrinsic case?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume so, looks like intrinsics were introduced first, then the G_ opcodes

auto Amt = B.buildSub(VgprRB_S32, B.buildConstant(SgprRB_S32, 64), Width);
auto SignBit = B.buildShl({VgprRB, S64}, SHRSrc, Amt);
B.buildInstr(SHROpc, {Dst}, {SignBit, Amt});
MI.eraseFromParent();
return;
}

uint64_t WidthImm = ConstWidth->Value.getZExtValue();
auto UnmergeSHRSrc = B.buildUnmerge(VgprRB_S32, SHRSrc);
Register SHRSrcLo = UnmergeSHRSrc.getReg(0);
Register SHRSrcHi = UnmergeSHRSrc.getReg(1);
auto Zero = B.buildConstant({VgprRB, S32}, 0);
unsigned BFXOpc = Signed ? AMDGPU::G_SBFX : AMDGPU::G_UBFX;

if (WidthImm <= 32) {
// SHRSrc Hi|Lo: ????????|???syyyl -> ????????|ssssyyyl
auto Lo = B.buildInstr(BFXOpc, {VgprRB_S32}, {SHRSrcLo, Zero, Width});
MachineInstrBuilder Hi;
if (Signed) {
// SHRSrc Hi|Lo: ????????|ssssyyyl -> ssssssss|ssssyyyl
Hi = B.buildAShr(VgprRB_S32, Lo, B.buildConstant(VgprRB_S32, 31));
} else {
// SHRSrc Hi|Lo: ????????|000syyyl -> 00000000|000syyyl
Hi = Zero;
}
B.buildMergeLikeInstr(Dst, {Lo, Hi});
} else {
auto Amt = B.buildConstant(VgprRB_S32, WidthImm - 32);
// SHRSrc Hi|Lo: ??????sy|yyyyyyyl -> sssssssy|yyyyyyyl
auto Hi = B.buildInstr(BFXOpc, {VgprRB_S32}, {SHRSrcHi, Zero, Amt});
B.buildMergeLikeInstr(Dst, {SHRSrcLo, Hi});
}

MI.eraseFromParent();
}

void RegBankLegalizeHelper::lowerS_BFE(MachineInstr &MI) {
Register DstReg = MI.getOperand(0).getReg();
LLT Ty = MRI.getType(DstReg);
bool Signed = isSignedBFE(MI);
unsigned FirstOpnd = isa<GIntrinsic>(MI) ? 2 : 1;
Register Src = MI.getOperand(FirstOpnd).getReg();
Register LSBit = MI.getOperand(FirstOpnd + 1).getReg();
Register Width = MI.getOperand(FirstOpnd + 2).getReg();
// For uniform bit field extract there are 4 available instructions, but
// LSBit(field offset) and Width(size of bitfield) need to be packed in S32,
// field offset in low and size in high 16 bits.

// Src1 Hi16|Lo16 = Size|FieldOffset
auto Mask = B.buildConstant(SgprRB_S32, maskTrailingOnes<unsigned>(6));
auto FieldOffset = B.buildAnd(SgprRB_S32, LSBit, Mask);
Comment on lines +214 to +215
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think there's a buildZextInReg helper

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

buildZextInReg builds multiple instructions and does not set reg banks, this way we set all register banks

auto Size = B.buildShl(SgprRB_S32, Width, B.buildConstant(SgprRB_S32, 16));
auto Src1 = B.buildOr(SgprRB_S32, FieldOffset, Size);
unsigned Opc32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
unsigned Opc64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
Comment on lines +218 to +219
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm worried about this harming the ability of downstream transforms and analyses to do anything with the operation. We should still be able to handle known and demanded bits. Also don't want to break any patterns that read BFE inputs. Except for the intrinsic with variable inputs case, can't you just use the G_* opcodes?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are no tablegen patterns for S_BFE, known bits for GlobalISel are missing support for many generic opcodes not to mention target intrinsics. Can we leave that for another patch?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are no tablegen patterns for S_BFE

Since the two operands are merged into one operand, I think you need to manually select it. Should add a fixme to stop directly selecting it

unsigned Opc = Ty == S32 ? Opc32 : Opc64;

// Select machine instruction, because of reg class constraining, insert
// copies from reg class to reg bank.
auto S_BFE = B.buildInstr(Opc, {{SgprRB, Ty}},
{B.buildCopy(Ty, Src), B.buildCopy(S32, Src1)});
if (!constrainSelectedInstRegOperands(*S_BFE, *ST.getInstrInfo(),
*ST.getRegisterInfo(), RBI))
llvm_unreachable("failed to constrain BFE");

B.buildCopy(DstReg, S_BFE->getOperand(0).getReg());
MI.eraseFromParent();
}

void RegBankLegalizeHelper::lower(MachineInstr &MI,
const RegBankLLTMapping &Mapping,
SmallSet<Register, 4> &WaterfallSgprs) {
Expand Down Expand Up @@ -225,6 +329,10 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
MI.eraseFromParent();
break;
}
case V_BFE:
return lowerV_BFE(MI);
case S_BFE:
return lowerS_BFE(MI);
case SplitLoad: {
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
unsigned Size = DstTy.getSizeInBits();
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ namespace AMDGPU {
// to replace instruction. In other case InstApplyMethod will create new
// instruction(s).
class RegBankLegalizeHelper {
const GCNSubtarget &ST;
MachineIRBuilder &B;
MachineRegisterInfo &MRI;
const MachineUniformityInfo &MUI;
Expand Down Expand Up @@ -108,6 +109,9 @@ class RegBankLegalizeHelper {

void lower(MachineInstr &MI, const RegBankLLTMapping &Mapping,
SmallSet<Register, 4> &SgprWaterfallOperandRegs);

void lowerV_BFE(MachineInstr &MI);
void lowerS_BFE(MachineInstr &MI);
};

} // end namespace AMDGPU
Expand Down
13 changes: 12 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,14 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});

addRulesForGOpcs({G_LSHR}, Standard).Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}});

addRulesForGOpcs({G_UBFX, G_SBFX}, Standard)
.Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, S_BFE})
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
.Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32, Sgpr32}, S_BFE})
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32, Vgpr32}, V_BFE});

// Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT, G_FCONSTANT
// and G_FREEZE here, rest is trivially regbankselected earlier
addRulesForGOpcs({G_IMPLICIT_DEF}).Any({{UniS1}, {{Sgpr32Trunc}, {}}});
Expand Down Expand Up @@ -628,6 +636,9 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Div(S32, {{}, {Vgpr32, None, Vgpr32, Vgpr32}});

addRulesForIOpcs({amdgcn_readfirstlane})
.Any({{UniS32, _, DivS32}, {{}, {Sgpr32, None, Vgpr32}}});
.Any({{UniS32, _, DivS32}, {{}, {Sgpr32, None, Vgpr32}}})
// this should not exist in the first place, it is from call lowering
// readfirstlaning just in case register is not in sgpr.
.Any({{UniS32, _, UniS32}, {{}, {Sgpr32, None, Vgpr32}}});

} // end initialize rules
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,8 @@ enum LoweringMethodID {
DoNotLower,
VccExtToSel,
UniExtToSel,
S_BFE,
V_BFE,
VgprToVccCopy,
SplitTo32,
Ext32To64,
Expand Down
66 changes: 36 additions & 30 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sbfx.mir
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" -verify-machineinstrs -o - %s | FileCheck %s

...

Expand Down Expand Up @@ -96,12 +95,11 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY]], [[COPY1]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 64
; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[ASHR]], [[SUB]](s32)
; CHECK-NEXT: [[ASHR1:%[0-9]+]]:vgpr(s64) = G_ASHR [[SHL]], [[SUB]](s32)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY %3:vgpr(s64)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ASHR1]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s32) = COPY $vgpr2
%2:_(s32) = COPY $vgpr3
Expand All @@ -124,12 +122,11 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY]], [[COPY1]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 64
; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[ASHR]], [[SUB]](s32)
; CHECK-NEXT: [[ASHR1:%[0-9]+]]:vgpr(s64) = G_ASHR [[SHL]], [[SUB]](s32)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY %3:vgpr(s64)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ASHR1]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s32) = COPY $vgpr0
%2:_(s32) = COPY $vgpr1
Expand Down Expand Up @@ -216,12 +213,11 @@ body: |
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64)
; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY3]], [[COPY1]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 64
; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[ASHR]], [[SUB]](s32)
; CHECK-NEXT: [[ASHR1:%[0-9]+]]:vgpr(s64) = G_ASHR [[SHL]], [[SUB]](s32)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY %3:vgpr(s64)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ASHR1]](s64)
%0:_(s64) = COPY $sgpr0_sgpr1
%1:_(s32) = COPY $vgpr0
%2:_(s32) = COPY $vgpr1
Expand Down Expand Up @@ -266,16 +262,19 @@ body: |
; CHECK-LABEL: name: test_sbfx_s32_sss
; CHECK: liveins: $sgpr0, $sgpr1, $sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63
; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY1]], [[C]]
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY2]], [[C1]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]]
; CHECK-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32(s32) = S_BFE_I32 [[COPY]](s32), [[OR]](s32), implicit-def $scc
; CHECK-NEXT: $sgpr0 = COPY [[S_BFE_I32_]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[AND]], [[SHL]]
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s32) = COPY [[OR]](s32)
; CHECK-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32(s32) = S_BFE_I32 [[COPY3]](s32), [[COPY4]](s32), implicit-def $scc
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY [[S_BFE_I32_]](s32)
; CHECK-NEXT: $sgpr0 = COPY [[COPY5]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $sgpr2
Expand All @@ -294,16 +293,18 @@ body: |
; CHECK-LABEL: name: test_sbfx_s32_sii
; CHECK: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63
; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[C]], [[C2]]
; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C1]], [[C3]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]]
; CHECK-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32(s32) = S_BFE_I32 [[COPY]](s32), [[OR]](s32), implicit-def $scc
; CHECK-NEXT: $sgpr0 = COPY [[S_BFE_I32_]](s32)
; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 655360
; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 655361
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32(s32) = COPY [[C5]](s32)
; CHECK-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32(s32) = S_BFE_I32 [[COPY1]](s32), [[COPY2]](s32), implicit-def $scc
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY [[S_BFE_I32_]](s32)
; CHECK-NEXT: $sgpr0 = COPY [[COPY3]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = G_CONSTANT i32 1
%2:_(s32) = G_CONSTANT i32 10
Expand All @@ -324,16 +325,19 @@ body: |
; CHECK-LABEL: name: test_sbfx_s64_sss
; CHECK: liveins: $sgpr0_sgpr1, $sgpr0, $sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64(s64) = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63
; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY1]], [[C]]
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY2]], [[C1]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]]
; CHECK-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64(s64) = S_BFE_I64 [[COPY]](s64), [[OR]](s32), implicit-def $scc
; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]](s64)
; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[AND]], [[SHL]]
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_64(s64) = COPY [[COPY]](s64)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s32) = COPY [[OR]](s32)
; CHECK-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64(s64) = S_BFE_I64 [[COPY3]](s64), [[COPY4]](s32), implicit-def $scc
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s64) = COPY [[S_BFE_I64_]](s64)
; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[COPY5]](s64)
%0:_(s64) = COPY $sgpr0_sgpr1
%1:_(s32) = COPY $sgpr0
%2:_(s32) = COPY $sgpr1
Expand All @@ -352,15 +356,17 @@ body: |
; CHECK-LABEL: name: test_sbfx_s64_sii
; CHECK: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64(s64) = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63
; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[C]], [[C2]]
; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C1]], [[C3]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]]
; CHECK-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64(s64) = S_BFE_I64 [[COPY]](s64), [[OR]](s32), implicit-def $scc
; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 655360
; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 655361
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64(s64) = COPY [[COPY]](s64)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32(s32) = COPY [[C5]](s32)
; CHECK-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64(s64) = S_BFE_I64 [[COPY1]](s64), [[COPY2]](s32), implicit-def $scc
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s64) = COPY [[S_BFE_I64_]](s64)
%0:_(s64) = COPY $sgpr0_sgpr1
%1:_(s32) = G_CONSTANT i32 1
%2:_(s32) = G_CONSTANT i32 10
Expand Down
Loading
Loading