Skip to content

Commit 4e63e04

Browse files
authored
[AMDGPU] Canonicalize G_ZEXT of the shift amount in RegBankCombiner (#131792)
Canonicalize it to a G_AND instead so that ISel patterns can pick it up and ignore it, as the shift instructions only read low bits. G_ZEXT would be lowered to a v/s_and anyway in most cases. I'm also looking at making a DAG version of this in a separate patch.
1 parent d441d28 commit 4e63e04

File tree

5 files changed

+216
-76
lines changed

5 files changed

+216
-76
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCombine.td

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,22 @@ def combine_fmul_with_select_to_fldexp : GICombineRule<
134134
[{ return Helper.matchCombineFmulWithSelectToFldexp(*${root}, *${sel}, ${matchinfo}); }]),
135135
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
136136

137+
// (shift x, (zext amt)) -> (shift x, (and (anyext amt), mask)
138+
//
139+
// The pattern is longer, but is better for matching during ISel.
140+
class canonicalize_zext_shift_amt<Instruction opc> : GICombineRule<
141+
(defs root:$dst),
142+
(match (G_ZEXT $amt, $amtsrc):$zext,
143+
(opc $dst, $src, $amt):$shift),
144+
(apply [{ applyCanonicalizeZextShiftAmt(*${shift}, *${zext}); }])>;
145+
146+
def canonicalize_zext_lshr : canonicalize_zext_shift_amt<G_LSHR>;
147+
def canonicalize_zext_ashr : canonicalize_zext_shift_amt<G_ASHR>;
148+
def canonicalize_zext_shl : canonicalize_zext_shift_amt<G_SHL>;
149+
150+
def zext_of_shift_amount_combines : GICombineGroup<[
151+
canonicalize_zext_lshr, canonicalize_zext_ashr, canonicalize_zext_shl
152+
]>;
137153

138154
let Predicates = [Has16BitInsts, NotHasMed3_16] in {
139155
// For gfx8, expand f16-fmed3-as-f32 into a min/max f16 sequence. This
@@ -182,5 +198,5 @@ def AMDGPURegBankCombiner : GICombiner<
182198
zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain,
183199
fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp,
184200
identity_combines, redundant_and, constant_fold_cast_op,
185-
cast_of_cast_combines, sext_trunc]> {
201+
cast_of_cast_combines, sext_trunc, zext_of_shift_amount_combines]> {
186202
}

llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ class AMDGPURegBankCombinerImpl : public Combiner {
8787
void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
8888
void applyClamp(MachineInstr &MI, Register &Reg) const;
8989

90+
void applyCanonicalizeZextShiftAmt(MachineInstr &MI, MachineInstr &Ext) const;
91+
9092
private:
9193
SIModeRegisterDefaults getMode() const;
9294
bool getIEEE() const;
@@ -362,6 +364,34 @@ void AMDGPURegBankCombinerImpl::applyMed3(MachineInstr &MI,
362364
MI.eraseFromParent();
363365
}
364366

367+
void AMDGPURegBankCombinerImpl::applyCanonicalizeZextShiftAmt(
368+
MachineInstr &MI, MachineInstr &Ext) const {
369+
unsigned ShOpc = MI.getOpcode();
370+
assert(ShOpc == AMDGPU::G_SHL || ShOpc == AMDGPU::G_LSHR ||
371+
ShOpc == AMDGPU::G_ASHR);
372+
assert(Ext.getOpcode() == AMDGPU::G_ZEXT);
373+
374+
Register AmtReg = Ext.getOperand(1).getReg();
375+
Register ShDst = MI.getOperand(0).getReg();
376+
Register ShSrc = MI.getOperand(1).getReg();
377+
378+
LLT ExtAmtTy = MRI.getType(Ext.getOperand(0).getReg());
379+
LLT AmtTy = MRI.getType(AmtReg);
380+
381+
auto &RB = *MRI.getRegBank(AmtReg);
382+
383+
auto NewExt = B.buildAnyExt(ExtAmtTy, AmtReg);
384+
auto Mask = B.buildConstant(
385+
ExtAmtTy, maskTrailingOnes<uint64_t>(AmtTy.getScalarSizeInBits()));
386+
auto And = B.buildAnd(ExtAmtTy, NewExt, Mask);
387+
B.buildInstr(ShOpc, {ShDst}, {ShSrc, And});
388+
389+
MRI.setRegBank(NewExt.getReg(0), RB);
390+
MRI.setRegBank(Mask.getReg(0), RB);
391+
MRI.setRegBank(And.getReg(0), RB);
392+
MI.eraseFromParent();
393+
}
394+
365395
SIModeRegisterDefaults AMDGPURegBankCombinerImpl::getMode() const {
366396
return MF.getInfo<SIMachineFunctionInfo>()->getMode();
367397
}
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=amdgcn -run-pass=amdgpu-regbank-combiner %s -o - | FileCheck %s
3+
4+
---
5+
name: lshr_zext_i16
6+
tracksRegLiveness: true
7+
body: |
8+
bb.0:
9+
liveins: $sgpr0, $sgpr1
10+
11+
; CHECK-LABEL: name: lshr_zext_i16
12+
; CHECK: liveins: $sgpr0, $sgpr1
13+
; CHECK-NEXT: {{ $}}
14+
; CHECK-NEXT: %src:sgpr(s32) = COPY $sgpr0
15+
; CHECK-NEXT: %regamt:sgpr(s32) = COPY $sgpr1
16+
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
17+
; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND %regamt, [[C]]
18+
; CHECK-NEXT: %res:sgpr(s32) = G_LSHR %src, [[AND]](s32)
19+
; CHECK-NEXT: $sgpr0 = COPY %res(s32)
20+
%src:sgpr(s32) = COPY $sgpr0
21+
%regamt:sgpr(s32) = COPY $sgpr1
22+
%amt:sgpr(s16) = G_TRUNC %regamt
23+
%zextamt:sgpr(s32) = G_ZEXT %amt
24+
%res:sgpr(s32) = G_LSHR %src, %zextamt
25+
$sgpr0 = COPY %res
26+
...
27+
28+
---
29+
name: ashr_zext_i16
30+
tracksRegLiveness: true
31+
body: |
32+
bb.0:
33+
liveins: $sgpr0, $sgpr1
34+
35+
; CHECK-LABEL: name: ashr_zext_i16
36+
; CHECK: liveins: $sgpr0, $sgpr1
37+
; CHECK-NEXT: {{ $}}
38+
; CHECK-NEXT: %src:sgpr(s32) = COPY $sgpr0
39+
; CHECK-NEXT: %regamt:sgpr(s32) = COPY $sgpr1
40+
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
41+
; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND %regamt, [[C]]
42+
; CHECK-NEXT: %res:sgpr(s32) = G_ASHR %src, [[AND]](s32)
43+
; CHECK-NEXT: $sgpr0 = COPY %res(s32)
44+
%src:sgpr(s32) = COPY $sgpr0
45+
%regamt:sgpr(s32) = COPY $sgpr1
46+
%amt:sgpr(s16) = G_TRUNC %regamt
47+
%zextamt:sgpr(s32) = G_ZEXT %amt
48+
%res:sgpr(s32) = G_ASHR %src, %zextamt
49+
$sgpr0 = COPY %res
50+
...
51+
52+
---
53+
name: shl_zext_i16
54+
tracksRegLiveness: true
55+
body: |
56+
bb.0:
57+
liveins: $sgpr0, $sgpr1
58+
59+
; CHECK-LABEL: name: shl_zext_i16
60+
; CHECK: liveins: $sgpr0, $sgpr1
61+
; CHECK-NEXT: {{ $}}
62+
; CHECK-NEXT: %src:sgpr(s32) = COPY $sgpr0
63+
; CHECK-NEXT: %regamt:sgpr(s32) = COPY $sgpr1
64+
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
65+
; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND %regamt, [[C]]
66+
; CHECK-NEXT: %res:sgpr(s32) = G_SHL %src, [[AND]](s32)
67+
; CHECK-NEXT: $sgpr0 = COPY %res(s32)
68+
%src:sgpr(s32) = COPY $sgpr0
69+
%regamt:sgpr(s32) = COPY $sgpr1
70+
%amt:sgpr(s16) = G_TRUNC %regamt
71+
%zextamt:sgpr(s32) = G_ZEXT %amt
72+
%res:sgpr(s32) = G_SHL %src, %zextamt
73+
$sgpr0 = COPY %res
74+
...
75+
76+
---
77+
name: lshr_zext_i8
78+
tracksRegLiveness: true
79+
body: |
80+
bb.0:
81+
liveins: $sgpr0, $sgpr1
82+
83+
; CHECK-LABEL: name: lshr_zext_i8
84+
; CHECK: liveins: $sgpr0, $sgpr1
85+
; CHECK-NEXT: {{ $}}
86+
; CHECK-NEXT: %src:sgpr(s32) = COPY $sgpr0
87+
; CHECK-NEXT: %regamt:sgpr(s32) = COPY $sgpr1
88+
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 255
89+
; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND %regamt, [[C]]
90+
; CHECK-NEXT: %res:sgpr(s32) = G_LSHR %src, [[AND]](s32)
91+
; CHECK-NEXT: $sgpr0 = COPY %res(s32)
92+
%src:sgpr(s32) = COPY $sgpr0
93+
%regamt:sgpr(s32) = COPY $sgpr1
94+
%amt:sgpr(s8) = G_TRUNC %regamt
95+
%zextamt:sgpr(s32) = G_ZEXT %amt
96+
%res:sgpr(s32) = G_LSHR %src, %zextamt
97+
$sgpr0 = COPY %res
98+
...
99+
100+
---
101+
name: ashr_zext_i8
102+
tracksRegLiveness: true
103+
body: |
104+
bb.0:
105+
liveins: $sgpr0, $sgpr1
106+
107+
; CHECK-LABEL: name: ashr_zext_i8
108+
; CHECK: liveins: $sgpr0, $sgpr1
109+
; CHECK-NEXT: {{ $}}
110+
; CHECK-NEXT: %src:sgpr(s32) = COPY $sgpr0
111+
; CHECK-NEXT: %regamt:sgpr(s32) = COPY $sgpr1
112+
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 255
113+
; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND %regamt, [[C]]
114+
; CHECK-NEXT: %res:sgpr(s32) = G_ASHR %src, [[AND]](s32)
115+
; CHECK-NEXT: $sgpr0 = COPY %res(s32)
116+
%src:sgpr(s32) = COPY $sgpr0
117+
%regamt:sgpr(s32) = COPY $sgpr1
118+
%amt:sgpr(s8) = G_TRUNC %regamt
119+
%zextamt:sgpr(s32) = G_ZEXT %amt
120+
%res:sgpr(s32) = G_ASHR %src, %zextamt
121+
$sgpr0 = COPY %res
122+
...
123+
124+
---
125+
name: shl_zext_i8
126+
tracksRegLiveness: true
127+
body: |
128+
bb.0:
129+
liveins: $sgpr0, $sgpr1
130+
131+
; CHECK-LABEL: name: shl_zext_i8
132+
; CHECK: liveins: $sgpr0, $sgpr1
133+
; CHECK-NEXT: {{ $}}
134+
; CHECK-NEXT: %src:sgpr(s32) = COPY $sgpr0
135+
; CHECK-NEXT: %regamt:sgpr(s32) = COPY $sgpr1
136+
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 255
137+
; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND %regamt, [[C]]
138+
; CHECK-NEXT: %res:sgpr(s32) = G_SHL %src, [[AND]](s32)
139+
; CHECK-NEXT: $sgpr0 = COPY %res(s32)
140+
%src:sgpr(s32) = COPY $sgpr0
141+
%regamt:sgpr(s32) = COPY $sgpr1
142+
%amt:sgpr(s8) = G_TRUNC %regamt
143+
%zextamt:sgpr(s32) = G_ZEXT %amt
144+
%res:sgpr(s32) = G_SHL %src, %zextamt
145+
$sgpr0 = COPY %res
146+
...

llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll

Lines changed: 3 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3329,9 +3329,7 @@ define i16 @v_fshl_i16(i16 %lhs, i16 %rhs, i16 %amt) {
33293329
; GFX6-NEXT: v_and_b32_e32 v3, 15, v2
33303330
; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2
33313331
; GFX6-NEXT: v_and_b32_e32 v2, 15, v2
3332-
; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3
33333332
; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 15
3334-
; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
33353333
; GFX6-NEXT: v_lshlrev_b32_e32 v0, v3, v0
33363334
; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
33373335
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
@@ -3486,10 +3484,8 @@ define amdgpu_ps half @v_fshl_i16_ssv(i16 inreg %lhs, i16 inreg %rhs, i16 %amt)
34863484
; GFX6-NEXT: v_and_b32_e32 v1, 15, v0
34873485
; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
34883486
; GFX6-NEXT: v_and_b32_e32 v0, 15, v0
3489-
; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
34903487
; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1
34913488
; GFX6-NEXT: s_bfe_u32 s0, s1, 0xf0001
3492-
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
34933489
; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0
34943490
; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
34953491
; GFX6-NEXT: ; return to shader part epilog
@@ -3793,20 +3789,16 @@ define <2 x i16> @v_fshl_v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) {
37933789
; GFX6-NEXT: v_and_b32_e32 v6, 15, v4
37943790
; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4
37953791
; GFX6-NEXT: v_and_b32_e32 v4, 15, v4
3796-
; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6
37973792
; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 15
3798-
; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
37993793
; GFX6-NEXT: v_lshlrev_b32_e32 v0, v6, v0
38003794
; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2
38013795
; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
38023796
; GFX6-NEXT: v_and_b32_e32 v2, 15, v5
38033797
; GFX6-NEXT: v_xor_b32_e32 v4, -1, v5
38043798
; GFX6-NEXT: v_and_b32_e32 v4, 15, v4
3805-
; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
38063799
; GFX6-NEXT: v_lshlrev_b32_e32 v1, v2, v1
38073800
; GFX6-NEXT: v_bfe_u32 v2, v3, 1, 15
3808-
; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v4
3809-
; GFX6-NEXT: v_lshrrev_b32_e32 v2, v3, v2
3801+
; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2
38103802
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
38113803
; GFX6-NEXT: s_setpc_b64 s[30:31]
38123804
;
@@ -3942,18 +3934,14 @@ define amdgpu_ps float @v_fshl_v2i16_ssv(<2 x i16> inreg %lhs, <2 x i16> inreg %
39423934
; GFX6-NEXT: v_and_b32_e32 v2, 15, v0
39433935
; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
39443936
; GFX6-NEXT: v_and_b32_e32 v0, 15, v0
3945-
; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
39463937
; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2
39473938
; GFX6-NEXT: s_bfe_u32 s0, s2, 0xf0001
3948-
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
39493939
; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0
39503940
; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
39513941
; GFX6-NEXT: v_and_b32_e32 v2, 15, v1
39523942
; GFX6-NEXT: v_xor_b32_e32 v1, -1, v1
39533943
; GFX6-NEXT: v_and_b32_e32 v1, 15, v1
3954-
; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
39553944
; GFX6-NEXT: s_bfe_u32 s0, s3, 0xf0001
3956-
; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
39573945
; GFX6-NEXT: v_lshl_b32_e32 v2, s1, v2
39583946
; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1
39593947
; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
@@ -4450,28 +4438,22 @@ define <3 x half> @v_fshl_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt)
44504438
; GFX6-NEXT: v_and_b32_e32 v9, 15, v6
44514439
; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6
44524440
; GFX6-NEXT: v_and_b32_e32 v6, 15, v6
4453-
; GFX6-NEXT: v_and_b32_e32 v9, 0xffff, v9
44544441
; GFX6-NEXT: v_bfe_u32 v3, v3, 1, 15
4455-
; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6
44564442
; GFX6-NEXT: v_lshlrev_b32_e32 v0, v9, v0
44574443
; GFX6-NEXT: v_lshrrev_b32_e32 v3, v6, v3
44584444
; GFX6-NEXT: v_or_b32_e32 v0, v0, v3
44594445
; GFX6-NEXT: v_and_b32_e32 v3, 15, v7
44604446
; GFX6-NEXT: v_xor_b32_e32 v6, -1, v7
44614447
; GFX6-NEXT: v_and_b32_e32 v6, 15, v6
4462-
; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3
44634448
; GFX6-NEXT: v_lshlrev_b32_e32 v1, v3, v1
44644449
; GFX6-NEXT: v_bfe_u32 v3, v4, 1, 15
4465-
; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v6
4466-
; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3
4450+
; GFX6-NEXT: v_lshrrev_b32_e32 v3, v6, v3
44674451
; GFX6-NEXT: v_or_b32_e32 v1, v1, v3
44684452
; GFX6-NEXT: v_and_b32_e32 v3, 15, v8
44694453
; GFX6-NEXT: v_xor_b32_e32 v4, -1, v8
44704454
; GFX6-NEXT: v_and_b32_e32 v4, 15, v4
4471-
; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3
44724455
; GFX6-NEXT: v_lshlrev_b32_e32 v2, v3, v2
44734456
; GFX6-NEXT: v_bfe_u32 v3, v5, 1, 15
4474-
; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
44754457
; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3
44764458
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
44774459
; GFX6-NEXT: s_setpc_b64 s[30:31]
@@ -4790,37 +4772,29 @@ define <4 x half> @v_fshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt)
47904772
; GFX6-NEXT: v_and_b32_e32 v12, 15, v8
47914773
; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8
47924774
; GFX6-NEXT: v_and_b32_e32 v8, 15, v8
4793-
; GFX6-NEXT: v_and_b32_e32 v12, 0xffff, v12
47944775
; GFX6-NEXT: v_bfe_u32 v4, v4, 1, 15
4795-
; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v8
47964776
; GFX6-NEXT: v_lshlrev_b32_e32 v0, v12, v0
47974777
; GFX6-NEXT: v_lshrrev_b32_e32 v4, v8, v4
47984778
; GFX6-NEXT: v_or_b32_e32 v0, v0, v4
47994779
; GFX6-NEXT: v_and_b32_e32 v4, 15, v9
48004780
; GFX6-NEXT: v_xor_b32_e32 v8, -1, v9
48014781
; GFX6-NEXT: v_and_b32_e32 v8, 15, v8
4802-
; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
48034782
; GFX6-NEXT: v_lshlrev_b32_e32 v1, v4, v1
48044783
; GFX6-NEXT: v_bfe_u32 v4, v5, 1, 15
4805-
; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v8
4806-
; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4
4784+
; GFX6-NEXT: v_lshrrev_b32_e32 v4, v8, v4
48074785
; GFX6-NEXT: v_or_b32_e32 v1, v1, v4
48084786
; GFX6-NEXT: v_and_b32_e32 v4, 15, v10
48094787
; GFX6-NEXT: v_xor_b32_e32 v5, -1, v10
48104788
; GFX6-NEXT: v_and_b32_e32 v5, 15, v5
4811-
; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
48124789
; GFX6-NEXT: v_lshlrev_b32_e32 v2, v4, v2
48134790
; GFX6-NEXT: v_bfe_u32 v4, v6, 1, 15
4814-
; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v5
48154791
; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4
48164792
; GFX6-NEXT: v_or_b32_e32 v2, v2, v4
48174793
; GFX6-NEXT: v_and_b32_e32 v4, 15, v11
48184794
; GFX6-NEXT: v_xor_b32_e32 v5, -1, v11
48194795
; GFX6-NEXT: v_and_b32_e32 v5, 15, v5
4820-
; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
48214796
; GFX6-NEXT: v_lshlrev_b32_e32 v3, v4, v3
48224797
; GFX6-NEXT: v_bfe_u32 v4, v7, 1, 15
4823-
; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v5
48244798
; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4
48254799
; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
48264800
; GFX6-NEXT: s_setpc_b64 s[30:31]

0 commit comments

Comments
 (0)