Skip to content

Commit ba4bcce

Browse files
author
Thorsten Schütt
authored
[GlobalIsel] Combine trunc of binop (#107721)
trunc (binop X, C) --> binop (trunc X, trunc C) --> binop (trunc X, C`) Try to narrow the width of math or bitwise logic instructions by pulling a truncate ahead of binary operators. Vx and Nx cores consider 32-bit and 64-bit basic arithmetic equal in costs.
1 parent 44fc987 commit ba4bcce

18 files changed

+3396
-3033
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -831,6 +831,12 @@ class CombinerHelper {
831831
/// Combine ors.
832832
bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo);
833833

834+
/// trunc (binop X, C) --> binop (trunc X, trunc C).
835+
bool matchNarrowBinop(const MachineInstr &TruncMI,
836+
const MachineInstr &BinopMI, BuildFnTy &MatchInfo);
837+
838+
bool matchCastOfInteger(const MachineInstr &CastMI, APInt &MatchInfo);
839+
834840
/// Combine addos.
835841
bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo);
836842

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1867,6 +1867,33 @@ class buildvector_of_opcode<Instruction castOpcode> : GICombineRule <
18671867

18681868
def buildvector_of_truncate : buildvector_of_opcode<G_TRUNC>;
18691869

1870+
// narrow binop.
1871+
// trunc (binop X, C) --> binop (trunc X, trunc C)
1872+
class narrow_binop_opcode<Instruction binopOpcode> : GICombineRule <
1873+
(defs root:$root, build_fn_matchinfo:$matchinfo),
1874+
(match (G_CONSTANT $const, $imm),
1875+
(binopOpcode $binop, $x, $const):$Binop,
1876+
(G_TRUNC $root, $binop):$Trunc,
1877+
[{ return Helper.matchNarrowBinop(*${Trunc}, *${Binop}, ${matchinfo}); }]),
1878+
(apply [{ Helper.applyBuildFn(*${Trunc}, ${matchinfo}); }])>;
1879+
1880+
def narrow_binop_add : narrow_binop_opcode<G_ADD>;
1881+
def narrow_binop_sub : narrow_binop_opcode<G_SUB>;
1882+
def narrow_binop_mul : narrow_binop_opcode<G_MUL>;
1883+
def narrow_binop_and : narrow_binop_opcode<G_AND>;
1884+
def narrow_binop_or : narrow_binop_opcode<G_OR>;
1885+
def narrow_binop_xor : narrow_binop_opcode<G_XOR>;
1886+
1887+
// Cast of integer.
1888+
class integer_of_opcode<Instruction castOpcode> : GICombineRule <
1889+
(defs root:$root, apint_matchinfo:$matchinfo),
1890+
(match (G_CONSTANT $int, $imm),
1891+
(castOpcode $root, $int):$Cast,
1892+
[{ return Helper.matchCastOfInteger(*${Cast}, ${matchinfo}); }]),
1893+
(apply [{ Helper.replaceInstWithConstant(*${Cast}, ${matchinfo}); }])>;
1894+
1895+
def integer_of_truncate : integer_of_opcode<G_TRUNC>;
1896+
18701897
def cast_combines: GICombineGroup<[
18711898
truncate_of_zext,
18721899
truncate_of_sext,
@@ -1881,7 +1908,14 @@ def cast_combines: GICombineGroup<[
18811908
anyext_of_anyext,
18821909
anyext_of_zext,
18831910
anyext_of_sext,
1884-
buildvector_of_truncate
1911+
buildvector_of_truncate,
1912+
narrow_binop_add,
1913+
narrow_binop_sub,
1914+
narrow_binop_mul,
1915+
narrow_binop_and,
1916+
narrow_binop_or,
1917+
narrow_binop_xor,
1918+
integer_of_truncate
18851919
]>;
18861920

18871921

llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,3 +313,49 @@ bool CombinerHelper::matchCastOfBuildVector(const MachineInstr &CastMI,
313313

314314
return true;
315315
}
316+
317+
bool CombinerHelper::matchNarrowBinop(const MachineInstr &TruncMI,
318+
const MachineInstr &BinopMI,
319+
BuildFnTy &MatchInfo) {
320+
const GTrunc *Trunc = cast<GTrunc>(&TruncMI);
321+
const GBinOp *BinOp = cast<GBinOp>(&BinopMI);
322+
323+
if (!MRI.hasOneNonDBGUse(BinOp->getReg(0)))
324+
return false;
325+
326+
Register Dst = Trunc->getReg(0);
327+
LLT DstTy = MRI.getType(Dst);
328+
329+
// Is narrow binop legal?
330+
if (!isLegalOrBeforeLegalizer({BinOp->getOpcode(), {DstTy}}))
331+
return false;
332+
333+
MatchInfo = [=](MachineIRBuilder &B) {
334+
auto LHS = B.buildTrunc(DstTy, BinOp->getLHSReg());
335+
auto RHS = B.buildTrunc(DstTy, BinOp->getRHSReg());
336+
B.buildInstr(BinOp->getOpcode(), {Dst}, {LHS, RHS});
337+
};
338+
339+
return true;
340+
}
341+
342+
bool CombinerHelper::matchCastOfInteger(const MachineInstr &CastMI,
343+
APInt &MatchInfo) {
344+
const GExtOrTruncOp *Cast = cast<GExtOrTruncOp>(&CastMI);
345+
346+
APInt Input = getIConstantFromReg(Cast->getSrcReg(), MRI);
347+
348+
LLT DstTy = MRI.getType(Cast->getReg(0));
349+
350+
if (!isConstantLegalOrBeforeLegalizer(DstTy))
351+
return false;
352+
353+
switch (Cast->getOpcode()) {
354+
case TargetOpcode::G_TRUNC: {
355+
MatchInfo = Input.trunc(DstTy.getScalarSizeInBits());
356+
return true;
357+
}
358+
default:
359+
return false;
360+
}
361+
}
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s --check-prefixes=CHECK
3+
4+
---
5+
name: test_combine_trunc_xor_i64
6+
body: |
7+
bb.1:
8+
; CHECK-LABEL: name: test_combine_trunc_xor_i64
9+
; CHECK: %lhs:_(s64) = COPY $x0
10+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s64)
11+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
12+
; CHECK-NEXT: %small:_(s32) = G_XOR [[TRUNC]], [[C]]
13+
; CHECK-NEXT: $w0 = COPY %small(s32)
14+
%lhs:_(s64) = COPY $x0
15+
%rhs:_(s64) = G_CONSTANT i64 5
16+
%res:_(s64) = G_XOR %lhs, %rhs
17+
%small:_(s32) = G_TRUNC %res(s64)
18+
$w0 = COPY %small(s32)
19+
...
20+
---
21+
name: test_combine_trunc_add_i64
22+
body: |
23+
bb.1:
24+
; CHECK-LABEL: name: test_combine_trunc_add_i64
25+
; CHECK: %lhs:_(s64) = COPY $x0
26+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s64)
27+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
28+
; CHECK-NEXT: %small:_(s32) = G_ADD [[TRUNC]], [[C]]
29+
; CHECK-NEXT: $w0 = COPY %small(s32)
30+
%lhs:_(s64) = COPY $x0
31+
%rhs:_(s64) = G_CONSTANT i64 5
32+
%res:_(s64) = G_ADD %lhs, %rhs
33+
%small:_(s32) = G_TRUNC %res(s64)
34+
$w0 = COPY %small(s32)
35+
...
36+
---
37+
name: test_combine_trunc_mul_i64
38+
body: |
39+
bb.1:
40+
; CHECK-LABEL: name: test_combine_trunc_mul_i64
41+
; CHECK: %lhs:_(s64) = COPY $x0
42+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s64)
43+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
44+
; CHECK-NEXT: %small:_(s32) = G_MUL [[TRUNC]], [[C]]
45+
; CHECK-NEXT: $w0 = COPY %small(s32)
46+
%lhs:_(s64) = COPY $x0
47+
%rhs:_(s64) = G_CONSTANT i64 5
48+
%res:_(s64) = G_MUL %lhs, %rhs
49+
%small:_(s32) = G_TRUNC %res(s64)
50+
$w0 = COPY %small(s32)
51+
...
52+
---
53+
name: test_combine_trunc_and_i64
54+
body: |
55+
bb.1:
56+
; CHECK-LABEL: name: test_combine_trunc_and_i64
57+
; CHECK: %lhs:_(s64) = COPY $x0
58+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s64)
59+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
60+
; CHECK-NEXT: %small:_(s32) = G_AND [[TRUNC]], [[C]]
61+
; CHECK-NEXT: $w0 = COPY %small(s32)
62+
%lhs:_(s64) = COPY $x0
63+
%rhs:_(s64) = G_CONSTANT i64 5
64+
%res:_(s64) = G_AND %lhs, %rhs
65+
%small:_(s32) = G_TRUNC %res(s64)
66+
$w0 = COPY %small(s32)
67+
...
68+
---
69+
name: test_combine_trunc_or_i64
70+
body: |
71+
bb.1:
72+
; CHECK-LABEL: name: test_combine_trunc_or_i64
73+
; CHECK: %lhs:_(s64) = COPY $x0
74+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s64)
75+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
76+
; CHECK-NEXT: %small:_(s32) = G_OR [[TRUNC]], [[C]]
77+
; CHECK-NEXT: $w0 = COPY %small(s32)
78+
%lhs:_(s64) = COPY $x0
79+
%rhs:_(s64) = G_CONSTANT i64 5
80+
%res:_(s64) = G_OR %lhs, %rhs
81+
%small:_(s32) = G_TRUNC %res(s64)
82+
$w0 = COPY %small(s32)
83+
...
84+
---
85+
name: test_combine_trunc_sub_i128
86+
body: |
87+
bb.1:
88+
; CHECK-LABEL: name: test_combine_trunc_sub_i128
89+
; CHECK: %lhs:_(s128) = COPY $q0
90+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s128)
91+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
92+
; CHECK-NEXT: %small:_(s32) = G_SUB [[TRUNC]], [[C]]
93+
; CHECK-NEXT: $w0 = COPY %small(s32)
94+
%lhs:_(s128) = COPY $q0
95+
%rhs:_(s128) = G_CONSTANT i128 5
96+
%res:_(s128) = G_SUB %lhs, %rhs
97+
%small:_(s32) = G_TRUNC %res(s128)
98+
$w0 = COPY %small(s32)
99+
...
100+
---
101+
name: test_combine_trunc_sub_i128_multi_use
102+
body: |
103+
bb.1:
104+
; CHECK-LABEL: name: test_combine_trunc_sub_i128_multi_use
105+
; CHECK: %lhs:_(s128) = COPY $q0
106+
; CHECK-NEXT: %rhs:_(s128) = G_CONSTANT i128 5
107+
; CHECK-NEXT: %res:_(s128) = G_SUB %lhs, %rhs
108+
; CHECK-NEXT: %small:_(s32) = G_TRUNC %res(s128)
109+
; CHECK-NEXT: $q0 = COPY %res(s128)
110+
; CHECK-NEXT: $w0 = COPY %small(s32)
111+
%lhs:_(s128) = COPY $q0
112+
%rhs:_(s128) = G_CONSTANT i128 5
113+
%res:_(s128) = G_SUB %lhs, %rhs
114+
%small:_(s32) = G_TRUNC %res(s128)
115+
$q0 = COPY %res(s128)
116+
$w0 = COPY %small(s32)
117+
...
118+
---
119+
name: test_combine_trunc_xor_vector_pattern_did_not_match
120+
body: |
121+
bb.1:
122+
; CHECK-LABEL: name: test_combine_trunc_xor_vector_pattern_did_not_match
123+
; CHECK: %arg1:_(s64) = COPY $x0
124+
; CHECK-NEXT: %arg2:_(s64) = COPY $x0
125+
; CHECK-NEXT: %lhs:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
126+
; CHECK-NEXT: %rhs:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
127+
; CHECK-NEXT: %res:_(<2 x s64>) = G_XOR %lhs, %rhs
128+
; CHECK-NEXT: %small:_(<2 x s16>) = G_TRUNC %res(<2 x s64>)
129+
; CHECK-NEXT: $w0 = COPY %small(<2 x s16>)
130+
%arg1:_(s64) = COPY $x0
131+
%arg2:_(s64) = COPY $x0
132+
%lhs:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
133+
%rhs:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
134+
%res:_(<2 x s64>) = G_XOR %lhs, %rhs
135+
%small:_(<2 x s16>) = G_TRUNC %res(<2 x s64>)
136+
$w0 = COPY %small(<2 x s16>)

llvm/test/CodeGen/AArch64/GlobalISel/inline-memset.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -224,10 +224,10 @@ body: |
224224
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
225225
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
226226
; CHECK-NEXT: G_STORE [[C]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.dst + 8, align 1)
227-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s64)
228-
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
229-
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
230-
; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[PTR_ADD1]](p0) :: (store (s16) into %ir.dst + 16, align 1)
227+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 16448
228+
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
229+
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
230+
; CHECK-NEXT: G_STORE [[C2]](s16), [[PTR_ADD1]](p0) :: (store (s16) into %ir.dst + 16, align 1)
231231
; CHECK-NEXT: RET_ReallyLR
232232
%0:_(p0) = COPY $x0
233233
%1:_(s8) = G_CONSTANT i8 64

llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-conflict.mir

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,8 @@ tracksRegLiveness: true
88
body: |
99
bb.1:
1010
; CHECK-LABEL: name: test
11-
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
12-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
13-
; CHECK-NEXT: $w0 = COPY [[TRUNC]](s32)
11+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
12+
; CHECK-NEXT: $w0 = COPY [[C]](s32)
1413
; CHECK-NEXT: RET_ReallyLR implicit $w0
1514
%0:_(s16) = G_CONSTANT i16 0
1615
%2:_(s1) = G_CONSTANT i1 true
@@ -41,9 +40,7 @@ body: |
4140
bb.1:
4241
; CHECK-LABEL: name: test_inverted_div_rem
4342
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
44-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s32)
45-
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8)
46-
; CHECK-NEXT: $w0 = COPY [[SEXT]](s32)
43+
; CHECK-NEXT: $w0 = COPY [[C]](s32)
4744
; CHECK-NEXT: RET_ReallyLR implicit $w0
4845
%0:_(s16) = G_CONSTANT i16 0
4946
%2:_(s1) = G_CONSTANT i1 true

llvm/test/CodeGen/AMDGPU/GlobalISel/combine-itofp.mir

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -193,10 +193,10 @@ body: |
193193
; CHECK: liveins: $vgpr0_vgpr1
194194
; CHECK-NEXT: {{ $}}
195195
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
196-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255
197-
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
198-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64)
199-
; CHECK-NEXT: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[TRUNC]]
196+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
197+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
198+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]]
199+
; CHECK-NEXT: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]]
200200
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CVT_F32_UBYTE0_]](s32)
201201
%0:_(s64) = COPY $vgpr0_vgpr1
202202
%1:_(s64) = G_CONSTANT i64 255
@@ -216,10 +216,10 @@ body: |
216216
; CHECK: liveins: $vgpr0_vgpr1
217217
; CHECK-NEXT: {{ $}}
218218
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
219-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255
220-
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
221-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64)
222-
; CHECK-NEXT: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[TRUNC]]
219+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
220+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
221+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]]
222+
; CHECK-NEXT: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]]
223223
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CVT_F32_UBYTE0_]](s32)
224224
%0:_(s64) = COPY $vgpr0_vgpr1
225225
%1:_(s64) = G_CONSTANT i64 255

llvm/test/CodeGen/AMDGPU/GlobalISel/combine-zext-trunc.mir

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,11 @@ body: |
1212
; GCN: liveins: $vgpr0
1313
; GCN-NEXT: {{ $}}
1414
; GCN-NEXT: %var:_(s32) = COPY $vgpr0
15-
; GCN-NEXT: %c3FFF:_(s32) = G_CONSTANT i32 16383
16-
; GCN-NEXT: %low_bits:_(s32) = G_AND %var, %c3FFF
17-
; GCN-NEXT: $vgpr0 = COPY %low_bits(s32)
15+
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC %var(s32)
16+
; GCN-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 16383
17+
; GCN-NEXT: %trunc:_(s16) = G_AND [[TRUNC]], [[C]]
18+
; GCN-NEXT: %zext:_(s32) = G_ZEXT %trunc(s16)
19+
; GCN-NEXT: $vgpr0 = COPY %zext(s32)
1820
%var:_(s32) = COPY $vgpr0
1921
%c3FFF:_(s32) = G_CONSTANT i32 16383
2022
%low_bits:_(s32) = G_AND %var, %c3FFF
@@ -34,10 +36,8 @@ body: |
3436
; GCN: liveins: $vgpr0
3537
; GCN-NEXT: {{ $}}
3638
; GCN-NEXT: %var:_(s32) = COPY $vgpr0
37-
; GCN-NEXT: %cFFFFF:_(s32) = G_CONSTANT i32 1048575
38-
; GCN-NEXT: %low_bits:_(s32) = G_AND %var, %cFFFFF
39-
; GCN-NEXT: %trunc:_(s16) = G_TRUNC %low_bits(s32)
40-
; GCN-NEXT: %zext:_(s32) = G_ZEXT %trunc(s16)
39+
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC %var(s32)
40+
; GCN-NEXT: %zext:_(s32) = G_ZEXT [[TRUNC]](s16)
4141
; GCN-NEXT: $vgpr0 = COPY %zext(s32)
4242
%var:_(s32) = COPY $vgpr0
4343
%cFFFFF:_(s32) = G_CONSTANT i32 1048575
@@ -58,9 +58,9 @@ body: |
5858
; GCN: liveins: $vgpr0_vgpr1
5959
; GCN-NEXT: {{ $}}
6060
; GCN-NEXT: %var:_(s64) = COPY $vgpr0_vgpr1
61-
; GCN-NEXT: %c3FFF:_(s64) = G_CONSTANT i64 16383
62-
; GCN-NEXT: %low_bits:_(s64) = G_AND %var, %c3FFF
63-
; GCN-NEXT: %trunc:_(s16) = G_TRUNC %low_bits(s64)
61+
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC %var(s64)
62+
; GCN-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 16383
63+
; GCN-NEXT: %trunc:_(s16) = G_AND [[TRUNC]], [[C]]
6464
; GCN-NEXT: %zext:_(s32) = G_ZEXT %trunc(s16)
6565
; GCN-NEXT: $vgpr0 = COPY %zext(s32)
6666
%var:_(s64) = COPY $vgpr0_vgpr1
@@ -82,9 +82,9 @@ body: |
8282
; GCN: liveins: $vgpr0
8383
; GCN-NEXT: {{ $}}
8484
; GCN-NEXT: %var:_(s32) = COPY $vgpr0
85-
; GCN-NEXT: %c3FFF:_(s32) = G_CONSTANT i32 16383
86-
; GCN-NEXT: %low_bits:_(s32) = G_AND %var, %c3FFF
87-
; GCN-NEXT: %trunc:_(s16) = G_TRUNC %low_bits(s32)
85+
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC %var(s32)
86+
; GCN-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 16383
87+
; GCN-NEXT: %trunc:_(s16) = G_AND [[TRUNC]], [[C]]
8888
; GCN-NEXT: %zext:_(s64) = G_ZEXT %trunc(s16)
8989
; GCN-NEXT: $vgpr0_vgpr1 = COPY %zext(s64)
9090
%var:_(s32) = COPY $vgpr0

llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -238,13 +238,12 @@ define amdgpu_cs void @single_lane_execution_attribute(i32 inreg %.userdata0, <3
238238
; GFX10-NEXT: s_load_dwordx8 s[4:11], s[12:13], 0x0
239239
; GFX10-NEXT: v_mbcnt_hi_u32_b32 v1, -1, v1
240240
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 2, v1
241-
; GFX10-NEXT: v_and_b32_e32 v3, 1, v1
242-
; GFX10-NEXT: v_xor_b32_e32 v3, 1, v3
241+
; GFX10-NEXT: v_xor_b32_e32 v3, 1, v1
243242
; GFX10-NEXT: v_and_b32_e32 v3, 1, v3
244-
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
245-
; GFX10-NEXT: buffer_load_dword v2, v2, s[4:7], 0 offen
246243
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v3
247244
; GFX10-NEXT: ; implicit-def: $vgpr3
245+
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
246+
; GFX10-NEXT: buffer_load_dword v2, v2, s[4:7], 0 offen
248247
; GFX10-NEXT: s_waitcnt vmcnt(0)
249248
; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 0, v2
250249
; GFX10-NEXT: s_cbranch_vccnz .LBB4_4

0 commit comments

Comments
 (0)