Skip to content

Commit db8c84f

Browse files
authored
[GlobalIsel] Push cast through select. (llvm#100539)
1 parent a55df23 commit db8c84f

File tree

12 files changed

+263
-33
lines changed

12 files changed

+263
-33
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,12 @@ class CombinerHelper {
129129

130130
const TargetLowering &getTargetLowering() const;
131131

132+
const MachineFunction &getMachineFunction() const;
133+
134+
const DataLayout &getDataLayout() const;
135+
136+
LLVMContext &getContext() const;
137+
132138
/// \returns true if the combiner is running pre-legalization.
133139
bool isPreLegalize() const;
134140

@@ -884,6 +890,9 @@ class CombinerHelper {
884890
bool matchTruncateOfExt(const MachineInstr &Root, const MachineInstr &ExtMI,
885891
BuildFnTy &MatchInfo);
886892

893+
bool matchCastOfSelect(const MachineInstr &Cast, const MachineInstr &SelectMI,
894+
BuildFnTy &MatchInfo);
895+
887896
private:
888897
/// Checks for legality of an indexed variant of \p LdSt.
889898
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
@@ -996,6 +1005,8 @@ class CombinerHelper {
9961005

9971006
// Simplify (cmp cc0 x, y) (&& or ||) (cmp cc1 x, y) -> cmp cc2 x, y.
9981007
bool tryFoldLogicOfFCmps(GLogicalBinOp *Logic, BuildFnTy &MatchInfo);
1008+
1009+
bool isCastFree(unsigned Opcode, LLT ToTy, LLT FromTy) const;
9991010
};
10001011
} // namespace llvm
10011012

llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -934,6 +934,22 @@ class GExtOp : public GCastOp {
934934
};
935935
};
936936

937+
/// Represents an integer-like extending or truncating operation.
938+
class GExtOrTruncOp : public GCastOp {
939+
public:
940+
static bool classof(const MachineInstr *MI) {
941+
switch (MI->getOpcode()) {
942+
case TargetOpcode::G_SEXT:
943+
case TargetOpcode::G_ZEXT:
944+
case TargetOpcode::G_ANYEXT:
945+
case TargetOpcode::G_TRUNC:
946+
return true;
947+
default:
948+
return false;
949+
}
950+
};
951+
};
952+
937953
} // namespace llvm
938954

939955
#endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1771,10 +1771,25 @@ def truncate_of_zext : truncate_of_opcode<G_ZEXT>;
17711771
def truncate_of_sext : truncate_of_opcode<G_SEXT>;
17721772
def truncate_of_anyext : truncate_of_opcode<G_ANYEXT>;
17731773

1774+
// Push cast through select.
1775+
class select_of_opcode<Instruction castOpcode> : GICombineRule <
1776+
(defs root:$root, build_fn_matchinfo:$matchinfo),
1777+
(match (G_SELECT $select, $cond, $true, $false):$Select,
1778+
(castOpcode $root, $select):$Cast,
1779+
[{ return Helper.matchCastOfSelect(*${Cast}, *${Select}, ${matchinfo}); }]),
1780+
(apply [{ Helper.applyBuildFn(*${Cast}, ${matchinfo}); }])>;
1781+
1782+
def select_of_zext : select_of_opcode<G_ZEXT>;
1783+
def select_of_anyext : select_of_opcode<G_ANYEXT>;
1784+
def select_of_truncate : select_of_opcode<G_TRUNC>;
1785+
17741786
def cast_combines: GICombineGroup<[
17751787
truncate_of_zext,
17761788
truncate_of_sext,
1777-
truncate_of_anyext
1789+
truncate_of_anyext,
1790+
select_of_zext,
1791+
select_of_anyext,
1792+
select_of_truncate
17781793
]>;
17791794

17801795

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,16 @@ const TargetLowering &CombinerHelper::getTargetLowering() const {
6868
return *Builder.getMF().getSubtarget().getTargetLowering();
6969
}
7070

71+
const MachineFunction &CombinerHelper::getMachineFunction() const {
72+
return Builder.getMF();
73+
}
74+
75+
const DataLayout &CombinerHelper::getDataLayout() const {
76+
return getMachineFunction().getDataLayout();
77+
}
78+
79+
LLVMContext &CombinerHelper::getContext() const { return Builder.getContext(); }
80+
7181
/// \returns The little endian in-memory byte position of byte \p I in a
7282
/// \p ByteWidth bytes wide type.
7383
///

llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,3 +161,51 @@ bool CombinerHelper::matchTruncateOfExt(const MachineInstr &Root,
161161

162162
return false;
163163
}
164+
165+
bool CombinerHelper::isCastFree(unsigned Opcode, LLT ToTy, LLT FromTy) const {
166+
const TargetLowering &TLI = getTargetLowering();
167+
const DataLayout &DL = getDataLayout();
168+
LLVMContext &Ctx = getContext();
169+
170+
switch (Opcode) {
171+
case TargetOpcode::G_ANYEXT:
172+
case TargetOpcode::G_ZEXT:
173+
return TLI.isZExtFree(FromTy, ToTy, DL, Ctx);
174+
case TargetOpcode::G_TRUNC:
175+
return TLI.isTruncateFree(FromTy, ToTy, DL, Ctx);
176+
default:
177+
return false;
178+
}
179+
}
180+
181+
bool CombinerHelper::matchCastOfSelect(const MachineInstr &CastMI,
182+
const MachineInstr &SelectMI,
183+
BuildFnTy &MatchInfo) {
184+
const GExtOrTruncOp *Cast = cast<GExtOrTruncOp>(&CastMI);
185+
const GSelect *Select = cast<GSelect>(&SelectMI);
186+
187+
if (!MRI.hasOneNonDBGUse(Select->getReg(0)))
188+
return false;
189+
190+
Register Dst = Cast->getReg(0);
191+
LLT DstTy = MRI.getType(Dst);
192+
LLT CondTy = MRI.getType(Select->getCondReg());
193+
Register TrueReg = Select->getTrueReg();
194+
Register FalseReg = Select->getFalseReg();
195+
LLT SrcTy = MRI.getType(TrueReg);
196+
Register Cond = Select->getCondReg();
197+
198+
if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SELECT, {DstTy, CondTy}}))
199+
return false;
200+
201+
if (!isCastFree(Cast->getOpcode(), DstTy, SrcTy))
202+
return false;
203+
204+
MatchInfo = [=](MachineIRBuilder &B) {
205+
auto True = B.buildInstr(Cast->getOpcode(), {DstTy}, {TrueReg});
206+
auto False = B.buildInstr(Cast->getOpcode(), {DstTy}, {FalseReg});
207+
B.buildSelect(Dst, Cond, True, False);
208+
};
209+
210+
return true;
211+
}
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s --check-prefixes=CHECK,CHECK-PRE
3+
# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s | FileCheck %s --check-prefixes=CHECK,CHECK-POST
4+
5+
---
6+
name: test_combine_trunc_select
7+
legalized: true
8+
body: |
9+
bb.1:
10+
; CHECK-PRE-LABEL: name: test_combine_trunc_select
11+
; CHECK-PRE: %cond:_(s32) = COPY $w0
12+
; CHECK-PRE-NEXT: %lhs:_(s64) = COPY $x0
13+
; CHECK-PRE-NEXT: %rhs:_(s64) = COPY $x0
14+
; CHECK-PRE-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s64)
15+
; CHECK-PRE-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %rhs(s64)
16+
; CHECK-PRE-NEXT: %small:_(s32) = G_SELECT %cond(s32), [[TRUNC]], [[TRUNC1]]
17+
; CHECK-PRE-NEXT: $w0 = COPY %small(s32)
18+
;
19+
; CHECK-POST-LABEL: name: test_combine_trunc_select
20+
; CHECK-POST: %cond:_(s32) = COPY $w0
21+
; CHECK-POST-NEXT: %lhs:_(s64) = COPY $x0
22+
; CHECK-POST-NEXT: %rhs:_(s64) = COPY $x0
23+
; CHECK-POST-NEXT: %res:_(s64) = G_SELECT %cond(s32), %lhs, %rhs
24+
; CHECK-POST-NEXT: %small:_(s32) = G_TRUNC %res(s64)
25+
; CHECK-POST-NEXT: $w0 = COPY %small(s32)
26+
%cond:_(s32) = COPY $w0
27+
%lhs:_(s64) = COPY $x0
28+
%rhs:_(s64) = COPY $x0
29+
%res:_(s64) = G_SELECT %cond(s32), %lhs, %rhs
30+
%small:_(s32) = G_TRUNC %res(s64)
31+
$w0 = COPY %small(s32)
32+
...
33+
---
34+
name: test_combine_zext_select
35+
legalized: true
36+
body: |
37+
bb.1:
38+
; CHECK-PRE-LABEL: name: test_combine_zext_select
39+
; CHECK-PRE: %cond:_(s32) = COPY $w0
40+
; CHECK-PRE-NEXT: %lhs:_(s32) = COPY $w0
41+
; CHECK-PRE-NEXT: %rhs:_(s32) = COPY $w0
42+
; CHECK-PRE-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %lhs(s32)
43+
; CHECK-PRE-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT %rhs(s32)
44+
; CHECK-PRE-NEXT: %big:_(s64) = G_SELECT %cond(s32), [[ZEXT]], [[ZEXT1]]
45+
; CHECK-PRE-NEXT: $x0 = COPY %big(s64)
46+
;
47+
; CHECK-POST-LABEL: name: test_combine_zext_select
48+
; CHECK-POST: %cond:_(s32) = COPY $w0
49+
; CHECK-POST-NEXT: %lhs:_(s32) = COPY $w0
50+
; CHECK-POST-NEXT: %rhs:_(s32) = COPY $w0
51+
; CHECK-POST-NEXT: %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
52+
; CHECK-POST-NEXT: %big:_(s64) = G_ZEXT %res(s32)
53+
; CHECK-POST-NEXT: $x0 = COPY %big(s64)
54+
%cond:_(s32) = COPY $w0
55+
%lhs:_(s32) = COPY $w0
56+
%rhs:_(s32) = COPY $w0
57+
%res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
58+
%big:_(s64) = G_ZEXT %res(s32)
59+
$x0 = COPY %big(s64)
60+
...
61+
---
62+
name: test_combine_anyzext_select
63+
legalized: true
64+
body: |
65+
bb.1:
66+
; CHECK-PRE-LABEL: name: test_combine_anyzext_select
67+
; CHECK-PRE: %cond:_(s32) = COPY $w0
68+
; CHECK-PRE-NEXT: %lhs:_(s32) = COPY $w0
69+
; CHECK-PRE-NEXT: %rhs:_(s32) = COPY $w0
70+
; CHECK-PRE-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %lhs(s32)
71+
; CHECK-PRE-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT %rhs(s32)
72+
; CHECK-PRE-NEXT: %big:_(s64) = G_SELECT %cond(s32), [[ANYEXT]], [[ANYEXT1]]
73+
; CHECK-PRE-NEXT: $x0 = COPY %big(s64)
74+
;
75+
; CHECK-POST-LABEL: name: test_combine_anyzext_select
76+
; CHECK-POST: %cond:_(s32) = COPY $w0
77+
; CHECK-POST-NEXT: %lhs:_(s32) = COPY $w0
78+
; CHECK-POST-NEXT: %rhs:_(s32) = COPY $w0
79+
; CHECK-POST-NEXT: %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
80+
; CHECK-POST-NEXT: %big:_(s64) = G_ANYEXT %res(s32)
81+
; CHECK-POST-NEXT: $x0 = COPY %big(s64)
82+
%cond:_(s32) = COPY $w0
83+
%lhs:_(s32) = COPY $w0
84+
%rhs:_(s32) = COPY $w0
85+
%res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
86+
%big:_(s64) = G_ANYEXT %res(s32)
87+
$x0 = COPY %big(s64)
88+
...
89+
---
90+
name: test_combine_anyzext_select_multi_use
91+
legalized: true
92+
body: |
93+
bb.1:
94+
; CHECK-LABEL: name: test_combine_anyzext_select_multi_use
95+
; CHECK: %cond:_(s32) = COPY $w0
96+
; CHECK-NEXT: %lhs:_(s32) = COPY $w0
97+
; CHECK-NEXT: %rhs:_(s32) = COPY $w0
98+
; CHECK-NEXT: %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
99+
; CHECK-NEXT: %big:_(s64) = G_ANYEXT %res(s32)
100+
; CHECK-NEXT: $x0 = COPY %big(s64)
101+
; CHECK-NEXT: $w0 = COPY %res(s32)
102+
%cond:_(s32) = COPY $w0
103+
%lhs:_(s32) = COPY $w0
104+
%rhs:_(s32) = COPY $w0
105+
%res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
106+
%big:_(s64) = G_ANYEXT %res(s32)
107+
$x0 = COPY %big(s64)
108+
$w0 = COPY %res(s32)
109+
...
110+
---
111+
name: test_combine_trunc_select_vector_out_of_budget
112+
legalized: true
113+
body: |
114+
bb.1:
115+
; CHECK-LABEL: name: test_combine_trunc_select_vector_out_of_budget
116+
; CHECK: %cond:_(<2 x s32>) = COPY $x0
117+
; CHECK-NEXT: %arg1:_(s64) = COPY $x0
118+
; CHECK-NEXT: %arg2:_(s64) = COPY $x0
119+
; CHECK-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
120+
; CHECK-NEXT: %bv2:_(<2 x s64>) = G_BUILD_VECTOR %arg2(s64), %arg1(s64)
121+
; CHECK-NEXT: %res:_(<2 x s64>) = G_SELECT %cond(<2 x s32>), %bv, %bv2
122+
; CHECK-NEXT: %small:_(<2 x s32>) = G_TRUNC %res(<2 x s64>)
123+
; CHECK-NEXT: $x0 = COPY %small(<2 x s32>)
124+
%cond:_(<2 x s32>) = COPY $x0
125+
%arg1:_(s64) = COPY $x0
126+
%arg2:_(s64) = COPY $x0
127+
%bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
128+
%bv2:_(<2 x s64>) = G_BUILD_VECTOR %arg2(s64), %arg1(s64)
129+
%res:_(<2 x s64>) = G_SELECT %cond(<2 x s32>), %bv, %bv2
130+
%small:_(<2 x s32>) = G_TRUNC %res(<2 x s64>)
131+
$x0 = COPY %small(<2 x s32>)

llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1845,39 +1845,37 @@ define amdgpu_ps i65 @s_ashr_i65(i65 inreg %value, i65 inreg %amount) {
18451845
; GCN-NEXT: s_lshr_b64 s[2:3], s[0:1], s3
18461846
; GCN-NEXT: s_lshl_b64 s[8:9], s[4:5], s8
18471847
; GCN-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
1848-
; GCN-NEXT: s_ashr_i32 s8, s5, 31
1848+
; GCN-NEXT: s_ashr_i32 s7, s5, 31
18491849
; GCN-NEXT: s_ashr_i64 s[4:5], s[4:5], s10
18501850
; GCN-NEXT: s_cmp_lg_u32 s11, 0
18511851
; GCN-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
18521852
; GCN-NEXT: s_cmp_lg_u32 s12, 0
1853-
; GCN-NEXT: s_mov_b32 s9, s8
18541853
; GCN-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
18551854
; GCN-NEXT: s_cmp_lg_u32 s11, 0
1856-
; GCN-NEXT: s_cselect_b64 s[2:3], s[6:7], s[8:9]
1855+
; GCN-NEXT: s_cselect_b32 s2, s6, s7
18571856
; GCN-NEXT: ; return to shader part epilog
18581857
;
18591858
; GFX10PLUS-LABEL: s_ashr_i65:
18601859
; GFX10PLUS: ; %bb.0:
18611860
; GFX10PLUS-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000
1862-
; GFX10PLUS-NEXT: s_sub_i32 s12, s3, 64
1863-
; GFX10PLUS-NEXT: s_sub_i32 s8, 64, s3
1861+
; GFX10PLUS-NEXT: s_sub_i32 s10, s3, 64
1862+
; GFX10PLUS-NEXT: s_sub_i32 s2, 64, s3
18641863
; GFX10PLUS-NEXT: s_cmp_lt_u32 s3, 64
1865-
; GFX10PLUS-NEXT: s_cselect_b32 s13, 1, 0
1864+
; GFX10PLUS-NEXT: s_cselect_b32 s11, 1, 0
18661865
; GFX10PLUS-NEXT: s_cmp_eq_u32 s3, 0
1867-
; GFX10PLUS-NEXT: s_cselect_b32 s14, 1, 0
1868-
; GFX10PLUS-NEXT: s_ashr_i64 s[6:7], s[4:5], s3
1869-
; GFX10PLUS-NEXT: s_lshr_b64 s[2:3], s[0:1], s3
1870-
; GFX10PLUS-NEXT: s_lshl_b64 s[8:9], s[4:5], s8
1871-
; GFX10PLUS-NEXT: s_ashr_i32 s10, s5, 31
1872-
; GFX10PLUS-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
1873-
; GFX10PLUS-NEXT: s_ashr_i64 s[4:5], s[4:5], s12
1874-
; GFX10PLUS-NEXT: s_cmp_lg_u32 s13, 0
1875-
; GFX10PLUS-NEXT: s_mov_b32 s11, s10
1876-
; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
1877-
; GFX10PLUS-NEXT: s_cmp_lg_u32 s14, 0
1878-
; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
1879-
; GFX10PLUS-NEXT: s_cmp_lg_u32 s13, 0
1880-
; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[6:7], s[10:11]
1866+
; GFX10PLUS-NEXT: s_cselect_b32 s12, 1, 0
1867+
; GFX10PLUS-NEXT: s_lshr_b64 s[6:7], s[0:1], s3
1868+
; GFX10PLUS-NEXT: s_lshl_b64 s[8:9], s[4:5], s2
1869+
; GFX10PLUS-NEXT: s_ashr_i64 s[2:3], s[4:5], s3
1870+
; GFX10PLUS-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
1871+
; GFX10PLUS-NEXT: s_ashr_i32 s3, s5, 31
1872+
; GFX10PLUS-NEXT: s_ashr_i64 s[4:5], s[4:5], s10
1873+
; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0
1874+
; GFX10PLUS-NEXT: s_cselect_b64 s[4:5], s[6:7], s[4:5]
1875+
; GFX10PLUS-NEXT: s_cmp_lg_u32 s12, 0
1876+
; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5]
1877+
; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0
1878+
; GFX10PLUS-NEXT: s_cselect_b32 s2, s2, s3
18811879
; GFX10PLUS-NEXT: ; return to shader part epilog
18821880
%result = ashr i65 %value, %amount
18831881
ret i65 %result

llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1766,7 +1766,7 @@ define amdgpu_ps i65 @s_lshr_i65(i65 inreg %value, i65 inreg %amount) {
17661766
; GCN-NEXT: s_cmp_lg_u32 s12, 0
17671767
; GCN-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
17681768
; GCN-NEXT: s_cmp_lg_u32 s11, 0
1769-
; GCN-NEXT: s_cselect_b64 s[2:3], s[6:7], 0
1769+
; GCN-NEXT: s_cselect_b32 s2, s6, 0
17701770
; GCN-NEXT: ; return to shader part epilog
17711771
;
17721772
; GFX10PLUS-LABEL: s_lshr_i65:
@@ -1788,7 +1788,7 @@ define amdgpu_ps i65 @s_lshr_i65(i65 inreg %value, i65 inreg %amount) {
17881788
; GFX10PLUS-NEXT: s_cmp_lg_u32 s12, 0
17891789
; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5]
17901790
; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0
1791-
; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
1791+
; GFX10PLUS-NEXT: s_cselect_b32 s2, s2, 0
17921792
; GFX10PLUS-NEXT: ; return to shader part epilog
17931793
%result = lshr i65 %value, %amount
17941794
ret i65 %result

llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1733,9 +1733,9 @@ define amdgpu_ps i65 @s_shl_i65(i65 inreg %value, i65 inreg %amount) {
17331733
; GCN-NEXT: s_lshl_b64 s[8:9], s[0:1], s10
17341734
; GCN-NEXT: s_cmp_lg_u32 s11, 0
17351735
; GCN-NEXT: s_cselect_b64 s[0:1], s[4:5], 0
1736-
; GCN-NEXT: s_cselect_b64 s[4:5], s[6:7], s[8:9]
1736+
; GCN-NEXT: s_cselect_b32 s3, s6, s8
17371737
; GCN-NEXT: s_cmp_lg_u32 s12, 0
1738-
; GCN-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
1738+
; GCN-NEXT: s_cselect_b32 s2, s2, s3
17391739
; GCN-NEXT: ; return to shader part epilog
17401740
;
17411741
; GFX10PLUS-LABEL: s_shl_i65:
@@ -1753,9 +1753,9 @@ define amdgpu_ps i65 @s_shl_i65(i65 inreg %value, i65 inreg %amount) {
17531753
; GFX10PLUS-NEXT: s_lshl_b64 s[6:7], s[0:1], s10
17541754
; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0
17551755
; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[8:9], 0
1756-
; GFX10PLUS-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7]
1756+
; GFX10PLUS-NEXT: s_cselect_b32 s3, s4, s6
17571757
; GFX10PLUS-NEXT: s_cmp_lg_u32 s12, 0
1758-
; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
1758+
; GFX10PLUS-NEXT: s_cselect_b32 s2, s2, s3
17591759
; GFX10PLUS-NEXT: ; return to shader part epilog
17601760
%result = shl i65 %value, %amount
17611761
ret i65 %result

llvm/test/CodeGen/AMDGPU/ctlz.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1593,7 +1593,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
15931593
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
15941594
; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1
15951595
; GFX10-GISEL-NEXT: v_subrev_nc_u32_e32 v1, 24, v1
1596-
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, -1, vcc_lo
1596+
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0xffff, vcc_lo
15971597
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
15981598
; GFX10-GISEL-NEXT: global_store_byte v1, v0, s[4:5]
15991599
; GFX10-GISEL-NEXT: s_endpgm

0 commit comments

Comments
 (0)