Skip to content

Commit 38cec04

Browse files
AMDGPU/GlobalISel: add RegBankLegalize rules for bitfield extract (#132381)
Divergent S32 instruction is available, for S64 need to lower to S32. Uniform instructions available for both S32 and S64 but need to pack bitfield offset and size of bitfield into S32. Uniform instruction is straight up selected since there is no available isel pattern.
1 parent e6b43bd commit 38cec04

8 files changed

+206
-69
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp

Lines changed: 109 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,13 @@
1515
#include "AMDGPUGlobalISelUtils.h"
1616
#include "AMDGPUInstrInfo.h"
1717
#include "AMDGPURegisterBankInfo.h"
18+
#include "GCNSubtarget.h"
1819
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
20+
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
1921
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
22+
#include "llvm/CodeGen/MachineInstr.h"
2023
#include "llvm/CodeGen/MachineUniformityAnalysis.h"
24+
#include "llvm/IR/IntrinsicsAMDGPU.h"
2125

2226
#define DEBUG_TYPE "amdgpu-regbanklegalize"
2327

@@ -27,7 +31,8 @@ using namespace AMDGPU;
2731
RegBankLegalizeHelper::RegBankLegalizeHelper(
2832
MachineIRBuilder &B, const MachineUniformityInfo &MUI,
2933
const RegisterBankInfo &RBI, const RegBankLegalizeRules &RBLRules)
30-
: B(B), MRI(*B.getMRI()), MUI(MUI), RBI(RBI), RBLRules(RBLRules),
34+
: ST(B.getMF().getSubtarget<GCNSubtarget>()), B(B), MRI(*B.getMRI()),
35+
MUI(MUI), RBI(RBI), RBLRules(RBLRules),
3136
SgprRB(&RBI.getRegBank(AMDGPU::SGPRRegBankID)),
3237
VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)),
3338
VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {}
@@ -127,6 +132,105 @@ void RegBankLegalizeHelper::widenLoad(MachineInstr &MI, LLT WideTy,
127132
MI.eraseFromParent();
128133
}
129134

135+
static bool isSignedBFE(MachineInstr &MI) {
136+
if (GIntrinsic *GI = dyn_cast<GIntrinsic>(&MI))
137+
return (GI->is(Intrinsic::amdgcn_sbfe));
138+
139+
return MI.getOpcode() == AMDGPU::G_SBFX;
140+
}
141+
142+
void RegBankLegalizeHelper::lowerV_BFE(MachineInstr &MI) {
143+
Register Dst = MI.getOperand(0).getReg();
144+
assert(MRI.getType(Dst) == LLT::scalar(64));
145+
bool Signed = isSignedBFE(MI);
146+
unsigned FirstOpnd = isa<GIntrinsic>(MI) ? 2 : 1;
147+
// Extract bitfield from Src, LSBit is the least-significant bit for the
148+
// extraction (field offset) and Width is size of bitfield.
149+
Register Src = MI.getOperand(FirstOpnd).getReg();
150+
Register LSBit = MI.getOperand(FirstOpnd + 1).getReg();
151+
Register Width = MI.getOperand(FirstOpnd + 2).getReg();
152+
// Comments are for signed bitfield extract, similar for unsigned. x is sign
153+
// bit. s is sign, l is LSB and y are remaining bits of bitfield to extract.
154+
155+
// Src >> LSBit Hi|Lo: x?????syyyyyyl??? -> xxxx?????syyyyyyl
156+
unsigned SHROpc = Signed ? AMDGPU::G_ASHR : AMDGPU::G_LSHR;
157+
auto SHRSrc = B.buildInstr(SHROpc, {{VgprRB, S64}}, {Src, LSBit});
158+
159+
auto ConstWidth = getIConstantVRegValWithLookThrough(Width, MRI);
160+
161+
// Expand to Src >> LSBit << (64 - Width) >> (64 - Width)
162+
// << (64 - Width): Hi|Lo: xxxx?????syyyyyyl -> syyyyyyl000000000
163+
// >> (64 - Width): Hi|Lo: syyyyyyl000000000 -> ssssssssssyyyyyyl
164+
if (!ConstWidth) {
165+
auto Amt = B.buildSub(VgprRB_S32, B.buildConstant(SgprRB_S32, 64), Width);
166+
auto SignBit = B.buildShl({VgprRB, S64}, SHRSrc, Amt);
167+
B.buildInstr(SHROpc, {Dst}, {SignBit, Amt});
168+
MI.eraseFromParent();
169+
return;
170+
}
171+
172+
uint64_t WidthImm = ConstWidth->Value.getZExtValue();
173+
auto UnmergeSHRSrc = B.buildUnmerge(VgprRB_S32, SHRSrc);
174+
Register SHRSrcLo = UnmergeSHRSrc.getReg(0);
175+
Register SHRSrcHi = UnmergeSHRSrc.getReg(1);
176+
auto Zero = B.buildConstant({VgprRB, S32}, 0);
177+
unsigned BFXOpc = Signed ? AMDGPU::G_SBFX : AMDGPU::G_UBFX;
178+
179+
if (WidthImm <= 32) {
180+
// SHRSrc Hi|Lo: ????????|???syyyl -> ????????|ssssyyyl
181+
auto Lo = B.buildInstr(BFXOpc, {VgprRB_S32}, {SHRSrcLo, Zero, Width});
182+
MachineInstrBuilder Hi;
183+
if (Signed) {
184+
// SHRSrc Hi|Lo: ????????|ssssyyyl -> ssssssss|ssssyyyl
185+
Hi = B.buildAShr(VgprRB_S32, Lo, B.buildConstant(VgprRB_S32, 31));
186+
} else {
187+
// SHRSrc Hi|Lo: ????????|000syyyl -> 00000000|000syyyl
188+
Hi = Zero;
189+
}
190+
B.buildMergeLikeInstr(Dst, {Lo, Hi});
191+
} else {
192+
auto Amt = B.buildConstant(VgprRB_S32, WidthImm - 32);
193+
// SHRSrc Hi|Lo: ??????sy|yyyyyyyl -> sssssssy|yyyyyyyl
194+
auto Hi = B.buildInstr(BFXOpc, {VgprRB_S32}, {SHRSrcHi, Zero, Amt});
195+
B.buildMergeLikeInstr(Dst, {SHRSrcLo, Hi});
196+
}
197+
198+
MI.eraseFromParent();
199+
}
200+
201+
void RegBankLegalizeHelper::lowerS_BFE(MachineInstr &MI) {
202+
Register DstReg = MI.getOperand(0).getReg();
203+
LLT Ty = MRI.getType(DstReg);
204+
bool Signed = isSignedBFE(MI);
205+
unsigned FirstOpnd = isa<GIntrinsic>(MI) ? 2 : 1;
206+
Register Src = MI.getOperand(FirstOpnd).getReg();
207+
Register LSBit = MI.getOperand(FirstOpnd + 1).getReg();
208+
Register Width = MI.getOperand(FirstOpnd + 2).getReg();
209+
// For uniform bit field extract there are 4 available instructions, but
210+
// LSBit(field offset) and Width(size of bitfield) need to be packed in S32,
211+
// field offset in low and size in high 16 bits.
212+
213+
// Src1 Hi16|Lo16 = Size|FieldOffset
214+
auto Mask = B.buildConstant(SgprRB_S32, maskTrailingOnes<unsigned>(6));
215+
auto FieldOffset = B.buildAnd(SgprRB_S32, LSBit, Mask);
216+
auto Size = B.buildShl(SgprRB_S32, Width, B.buildConstant(SgprRB_S32, 16));
217+
auto Src1 = B.buildOr(SgprRB_S32, FieldOffset, Size);
218+
unsigned Opc32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
219+
unsigned Opc64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
220+
unsigned Opc = Ty == S32 ? Opc32 : Opc64;
221+
222+
// Select machine instruction, because of reg class constraining, insert
223+
// copies from reg class to reg bank.
224+
auto S_BFE = B.buildInstr(Opc, {{SgprRB, Ty}},
225+
{B.buildCopy(Ty, Src), B.buildCopy(S32, Src1)});
226+
if (!constrainSelectedInstRegOperands(*S_BFE, *ST.getInstrInfo(),
227+
*ST.getRegisterInfo(), RBI))
228+
llvm_unreachable("failed to constrain BFE");
229+
230+
B.buildCopy(DstReg, S_BFE->getOperand(0).getReg());
231+
MI.eraseFromParent();
232+
}
233+
130234
void RegBankLegalizeHelper::lower(MachineInstr &MI,
131235
const RegBankLLTMapping &Mapping,
132236
SmallSet<Register, 4> &WaterfallSgprs) {
@@ -225,6 +329,10 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
225329
MI.eraseFromParent();
226330
break;
227331
}
332+
case V_BFE:
333+
return lowerV_BFE(MI);
334+
case S_BFE:
335+
return lowerS_BFE(MI);
228336
case SplitLoad: {
229337
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
230338
unsigned Size = DstTy.getSizeInBits();

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ namespace AMDGPU {
2626
// to replace instruction. In other case InstApplyMethod will create new
2727
// instruction(s).
2828
class RegBankLegalizeHelper {
29+
const GCNSubtarget &ST;
2930
MachineIRBuilder &B;
3031
MachineRegisterInfo &MRI;
3132
const MachineUniformityInfo &MUI;
@@ -108,6 +109,9 @@ class RegBankLegalizeHelper {
108109

109110
void lower(MachineInstr &MI, const RegBankLLTMapping &Mapping,
110111
SmallSet<Register, 4> &SgprWaterfallOperandRegs);
112+
113+
void lowerV_BFE(MachineInstr &MI);
114+
void lowerS_BFE(MachineInstr &MI);
111115
};
112116

113117
} // end namespace AMDGPU

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,14 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
450450
.Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
451451
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
452452

453+
addRulesForGOpcs({G_LSHR}, Standard).Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}});
454+
455+
addRulesForGOpcs({G_UBFX, G_SBFX}, Standard)
456+
.Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, S_BFE})
457+
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
458+
.Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32, Sgpr32}, S_BFE})
459+
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32, Vgpr32}, V_BFE});
460+
453461
// Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT, G_FCONSTANT
454462
// and G_FREEZE here, rest is trivially regbankselected earlier
455463
addRulesForGOpcs({G_IMPLICIT_DEF}).Any({{UniS1}, {{Sgpr32Trunc}, {}}});
@@ -628,6 +636,9 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
628636
.Div(S32, {{}, {Vgpr32, None, Vgpr32, Vgpr32}});
629637

630638
addRulesForIOpcs({amdgcn_readfirstlane})
631-
.Any({{UniS32, _, DivS32}, {{}, {Sgpr32, None, Vgpr32}}});
639+
.Any({{UniS32, _, DivS32}, {{}, {Sgpr32, None, Vgpr32}}})
640+
// this should not exist in the first place, it is from call lowering
641+
// readfirstlaning just in case register is not in sgpr.
642+
.Any({{UniS32, _, UniS32}, {{}, {Sgpr32, None, Vgpr32}}});
632643

633644
} // end initialize rules

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,8 @@ enum LoweringMethodID {
168168
DoNotLower,
169169
VccExtToSel,
170170
UniExtToSel,
171+
S_BFE,
172+
V_BFE,
171173
VgprToVccCopy,
172174
SplitTo32,
173175
Ext32To64,

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sbfx.mir

Lines changed: 36 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s
3-
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" -verify-machineinstrs -o - %s | FileCheck %s
43

54
...
65

@@ -96,12 +95,11 @@ body: |
9695
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
9796
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
9897
; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY]], [[COPY1]](s32)
99-
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
100-
; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64
98+
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 64
10199
; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]]
102100
; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[ASHR]], [[SUB]](s32)
103101
; CHECK-NEXT: [[ASHR1:%[0-9]+]]:vgpr(s64) = G_ASHR [[SHL]], [[SUB]](s32)
104-
; CHECK-NEXT: $vgpr0_vgpr1 = COPY %3:vgpr(s64)
102+
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ASHR1]](s64)
105103
%0:_(s64) = COPY $vgpr0_vgpr1
106104
%1:_(s32) = COPY $vgpr2
107105
%2:_(s32) = COPY $vgpr3
@@ -124,12 +122,11 @@ body: |
124122
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
125123
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
126124
; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY]], [[COPY1]](s32)
127-
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
128-
; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64
125+
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 64
129126
; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]]
130127
; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[ASHR]], [[SUB]](s32)
131128
; CHECK-NEXT: [[ASHR1:%[0-9]+]]:vgpr(s64) = G_ASHR [[SHL]], [[SUB]](s32)
132-
; CHECK-NEXT: $vgpr0_vgpr1 = COPY %3:vgpr(s64)
129+
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ASHR1]](s64)
133130
%0:_(s64) = COPY $vgpr0_vgpr1
134131
%1:_(s32) = COPY $vgpr0
135132
%2:_(s32) = COPY $vgpr1
@@ -216,12 +213,11 @@ body: |
216213
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
217214
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64)
218215
; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY3]], [[COPY1]](s32)
219-
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
220-
; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64
216+
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 64
221217
; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]]
222218
; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[ASHR]], [[SUB]](s32)
223219
; CHECK-NEXT: [[ASHR1:%[0-9]+]]:vgpr(s64) = G_ASHR [[SHL]], [[SUB]](s32)
224-
; CHECK-NEXT: $vgpr0_vgpr1 = COPY %3:vgpr(s64)
220+
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ASHR1]](s64)
225221
%0:_(s64) = COPY $sgpr0_sgpr1
226222
%1:_(s32) = COPY $vgpr0
227223
%2:_(s32) = COPY $vgpr1
@@ -266,16 +262,19 @@ body: |
266262
; CHECK-LABEL: name: test_sbfx_s32_sss
267263
; CHECK: liveins: $sgpr0, $sgpr1, $sgpr3
268264
; CHECK-NEXT: {{ $}}
269-
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32(s32) = COPY $sgpr0
265+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
270266
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
271267
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
272268
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63
273269
; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY1]], [[C]]
274270
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
275271
; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY2]], [[C1]](s32)
276-
; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]]
277-
; CHECK-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32(s32) = S_BFE_I32 [[COPY]](s32), [[OR]](s32), implicit-def $scc
278-
; CHECK-NEXT: $sgpr0 = COPY [[S_BFE_I32_]](s32)
272+
; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[AND]], [[SHL]]
273+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s32) = COPY [[COPY]](s32)
274+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s32) = COPY [[OR]](s32)
275+
; CHECK-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32(s32) = S_BFE_I32 [[COPY3]](s32), [[COPY4]](s32), implicit-def $scc
276+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY [[S_BFE_I32_]](s32)
277+
; CHECK-NEXT: $sgpr0 = COPY [[COPY5]](s32)
279278
%0:_(s32) = COPY $sgpr0
280279
%1:_(s32) = COPY $sgpr1
281280
%2:_(s32) = COPY $sgpr2
@@ -294,16 +293,18 @@ body: |
294293
; CHECK-LABEL: name: test_sbfx_s32_sii
295294
; CHECK: liveins: $sgpr0
296295
; CHECK-NEXT: {{ $}}
297-
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32(s32) = COPY $sgpr0
296+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
298297
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
299298
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10
300299
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63
301-
; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[C]], [[C2]]
302300
; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
303-
; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C1]], [[C3]](s32)
304-
; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]]
305-
; CHECK-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32(s32) = S_BFE_I32 [[COPY]](s32), [[OR]](s32), implicit-def $scc
306-
; CHECK-NEXT: $sgpr0 = COPY [[S_BFE_I32_]](s32)
301+
; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 655360
302+
; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 655361
303+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[COPY]](s32)
304+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32(s32) = COPY [[C5]](s32)
305+
; CHECK-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32(s32) = S_BFE_I32 [[COPY1]](s32), [[COPY2]](s32), implicit-def $scc
306+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY [[S_BFE_I32_]](s32)
307+
; CHECK-NEXT: $sgpr0 = COPY [[COPY3]](s32)
307308
%0:_(s32) = COPY $sgpr0
308309
%1:_(s32) = G_CONSTANT i32 1
309310
%2:_(s32) = G_CONSTANT i32 10
@@ -324,16 +325,19 @@ body: |
324325
; CHECK-LABEL: name: test_sbfx_s64_sss
325326
; CHECK: liveins: $sgpr0_sgpr1, $sgpr0, $sgpr1
326327
; CHECK-NEXT: {{ $}}
327-
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64(s64) = COPY $sgpr0_sgpr1
328+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
328329
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
329330
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
330331
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63
331332
; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY1]], [[C]]
332333
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
333334
; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY2]], [[C1]](s32)
334-
; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]]
335-
; CHECK-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64(s64) = S_BFE_I64 [[COPY]](s64), [[OR]](s32), implicit-def $scc
336-
; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]](s64)
335+
; CHECK-NEXT: [[OR:%[0-9]+]]:sgpr(s32) = G_OR [[AND]], [[SHL]]
336+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_64(s64) = COPY [[COPY]](s64)
337+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s32) = COPY [[OR]](s32)
338+
; CHECK-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64(s64) = S_BFE_I64 [[COPY3]](s64), [[COPY4]](s32), implicit-def $scc
339+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s64) = COPY [[S_BFE_I64_]](s64)
340+
; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[COPY5]](s64)
337341
%0:_(s64) = COPY $sgpr0_sgpr1
338342
%1:_(s32) = COPY $sgpr0
339343
%2:_(s32) = COPY $sgpr1
@@ -352,15 +356,17 @@ body: |
352356
; CHECK-LABEL: name: test_sbfx_s64_sii
353357
; CHECK: liveins: $sgpr0_sgpr1
354358
; CHECK-NEXT: {{ $}}
355-
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64(s64) = COPY $sgpr0_sgpr1
359+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
356360
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
357361
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10
358362
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63
359-
; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[C]], [[C2]]
360363
; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
361-
; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C1]], [[C3]](s32)
362-
; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]]
363-
; CHECK-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64(s64) = S_BFE_I64 [[COPY]](s64), [[OR]](s32), implicit-def $scc
364+
; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 655360
365+
; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 655361
366+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64(s64) = COPY [[COPY]](s64)
367+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32(s32) = COPY [[C5]](s32)
368+
; CHECK-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64(s64) = S_BFE_I64 [[COPY1]](s64), [[COPY2]](s32), implicit-def $scc
369+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s64) = COPY [[S_BFE_I64_]](s64)
364370
%0:_(s64) = COPY $sgpr0_sgpr1
365371
%1:_(s32) = G_CONSTANT i32 1
366372
%2:_(s32) = G_CONSTANT i32 10

0 commit comments

Comments
 (0)