Skip to content

Commit 66f2d09

Browse files
committed
[DAGCombiner] Transform (zext (select c, load1, load2)) -> (select c, zextload1, zextload2)
If extload is legal, following transform (zext (select c, load1, load2)) -> (select c, zextload1, zextload2) can save one ext instruction. Differential Revision: https://reviews.llvm.org/D95086
1 parent ea2ff54 commit 66f2d09

File tree

2 files changed

+105
-34
lines changed

2 files changed

+105
-34
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10029,6 +10029,77 @@ SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
1002910029
return SDValue();
1003010030
}
1003110031

10032+
/// Check if N satisfies:
10033+
/// N is used once.
10034+
/// N is a Load.
10035+
/// The load is compatible with ExtOpcode. It means
10036+
/// If load has explicit zero/sign extension, ExpOpcode must have the same
10037+
/// extension.
10038+
/// Otherwise returns true.
10039+
static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
10040+
if (!N.hasOneUse())
10041+
return false;
10042+
10043+
if (!isa<LoadSDNode>(N))
10044+
return false;
10045+
10046+
LoadSDNode *Load = cast<LoadSDNode>(N);
10047+
ISD::LoadExtType LoadExt = Load->getExtensionType();
10048+
if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
10049+
return true;
10050+
10051+
// Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
10052+
// extension.
10053+
if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
10054+
(LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
10055+
return false;
10056+
10057+
return true;
10058+
}
10059+
10060+
/// Fold
10061+
/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
10062+
/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
10063+
/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
10064+
/// This function is called by the DAGCombiner when visiting sext/zext/aext
10065+
/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
10066+
static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
10067+
SelectionDAG &DAG) {
10068+
unsigned Opcode = N->getOpcode();
10069+
SDValue N0 = N->getOperand(0);
10070+
EVT VT = N->getValueType(0);
10071+
SDLoc DL(N);
10072+
10073+
assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
10074+
Opcode == ISD::ANY_EXTEND) &&
10075+
"Expected EXTEND dag node in input!");
10076+
10077+
if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
10078+
!N0.hasOneUse())
10079+
return SDValue();
10080+
10081+
SDValue Op1 = N0->getOperand(1);
10082+
SDValue Op2 = N0->getOperand(2);
10083+
if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
10084+
return SDValue();
10085+
10086+
auto ExtLoadOpcode = ISD::EXTLOAD;
10087+
if (Opcode == ISD::SIGN_EXTEND)
10088+
ExtLoadOpcode = ISD::SEXTLOAD;
10089+
else if (Opcode == ISD::ZERO_EXTEND)
10090+
ExtLoadOpcode = ISD::ZEXTLOAD;
10091+
10092+
LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
10093+
LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
10094+
if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
10095+
!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()))
10096+
return SDValue();
10097+
10098+
SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
10099+
SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
10100+
return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
10101+
}
10102+
1003210103
/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
1003310104
/// a build_vector of constants.
1003410105
/// This function is called by the DAGCombiner when visiting sext/zext/aext
@@ -10813,6 +10884,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
1081310884
return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
1081410885
}
1081510886

10887+
if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
10888+
return Res;
10889+
1081610890
return SDValue();
1081710891
}
1081810892

@@ -11125,6 +11199,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
1112511199
if (SDValue NewCtPop = widenCtPop(N, DAG))
1112611200
return NewCtPop;
1112711201

11202+
if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11203+
return Res;
11204+
1112811205
return SDValue();
1112911206
}
1113011207

@@ -11277,6 +11354,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
1127711354
if (SDValue NewCtPop = widenCtPop(N, DAG))
1127811355
return NewCtPop;
1127911356

11357+
if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11358+
return Res;
11359+
1128011360
return SDValue();
1128111361
}
1128211362

llvm/test/CodeGen/X86/select-ext.ll

Lines changed: 25 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,14 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s
33

4-
; TODO: (zext(select c, load1, load2)) -> (select c, zextload1, zextload2)
4+
; (zext(select c, load1, load2)) -> (select c, zextload1, zextload2)
55
define i64 @zext_scalar(i8* %p, i1 zeroext %c) {
66
; CHECK-LABEL: zext_scalar:
77
; CHECK: # %bb.0:
8-
; CHECK-NEXT: movzbl (%rdi), %eax
9-
; CHECK-NEXT: movzbl 1(%rdi), %ecx
8+
; CHECK-NEXT: movzbl (%rdi), %ecx
9+
; CHECK-NEXT: movzbl 1(%rdi), %eax
1010
; CHECK-NEXT: testl %esi, %esi
11-
; CHECK-NEXT: cmovel %eax, %ecx
12-
; CHECK-NEXT: movzbl %cl, %eax
11+
; CHECK-NEXT: cmoveq %rcx, %rax
1312
; CHECK-NEXT: retq
1413
%ld1 = load volatile i8, i8* %p
1514
%arrayidx1 = getelementptr inbounds i8, i8* %p, i64 1
@@ -22,13 +21,10 @@ define i64 @zext_scalar(i8* %p, i1 zeroext %c) {
2221
define i64 @zext_scalar2(i8* %p, i16* %q, i1 zeroext %c) {
2322
; CHECK-LABEL: zext_scalar2:
2423
; CHECK: # %bb.0:
25-
; CHECK-NEXT: movzbl (%rdi), %eax
26-
; CHECK-NEXT: testl %edx, %edx
27-
; CHECK-NEXT: je .LBB1_2
28-
; CHECK-NEXT: # %bb.1:
24+
; CHECK-NEXT: movzbl (%rdi), %ecx
2925
; CHECK-NEXT: movzwl (%rsi), %eax
30-
; CHECK-NEXT: .LBB1_2:
31-
; CHECK-NEXT: movzwl %ax, %eax
26+
; CHECK-NEXT: testl %edx, %edx
27+
; CHECK-NEXT: cmoveq %rcx, %rax
3228
; CHECK-NEXT: retq
3329
%ld1 = load volatile i8, i8* %p
3430
%ext_ld1 = zext i8 %ld1 to i16
@@ -58,15 +54,14 @@ define i64 @zext_scalar_neg(i8* %p, i16* %q, i1 zeroext %c) {
5854
ret i64 %cond
5955
}
6056

61-
; TODO: (sext(select c, load1, load2)) -> (select c, sextload1, sextload2)
57+
; (sext(select c, load1, load2)) -> (select c, sextload1, sextload2)
6258
define i64 @sext_scalar(i8* %p, i1 zeroext %c) {
6359
; CHECK-LABEL: sext_scalar:
6460
; CHECK: # %bb.0:
65-
; CHECK-NEXT: movzbl (%rdi), %eax
66-
; CHECK-NEXT: movzbl 1(%rdi), %ecx
61+
; CHECK-NEXT: movsbq (%rdi), %rcx
62+
; CHECK-NEXT: movsbq 1(%rdi), %rax
6763
; CHECK-NEXT: testl %esi, %esi
68-
; CHECK-NEXT: cmovel %eax, %ecx
69-
; CHECK-NEXT: movsbq %cl, %rax
64+
; CHECK-NEXT: cmoveq %rcx, %rax
7065
; CHECK-NEXT: retq
7166
%ld1 = load volatile i8, i8* %p
7267
%arrayidx1 = getelementptr inbounds i8, i8* %p, i64 1
@@ -80,14 +75,13 @@ define i64 @sext_scalar(i8* %p, i1 zeroext %c) {
8075
define <2 x i64> @zext_vector_i1(<2 x i32>* %p, i1 zeroext %c) {
8176
; CHECK-LABEL: zext_vector_i1:
8277
; CHECK: # %bb.0:
83-
; CHECK-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
84-
; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
78+
; CHECK-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
79+
; CHECK-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
8580
; CHECK-NEXT: testl %esi, %esi
8681
; CHECK-NEXT: jne .LBB4_2
8782
; CHECK-NEXT: # %bb.1:
8883
; CHECK-NEXT: movdqa %xmm1, %xmm0
8984
; CHECK-NEXT: .LBB4_2:
90-
; CHECK-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9185
; CHECK-NEXT: retq
9286
%ld1 = load volatile <2 x i32>, <2 x i32>* %p
9387
%arrayidx1 = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i64 1
@@ -100,12 +94,11 @@ define <2 x i64> @zext_vector_i1(<2 x i32>* %p, i1 zeroext %c) {
10094
define <2 x i64> @zext_vector_v2i1(<2 x i32>* %p, <2 x i1> %c) {
10195
; CHECK-LABEL: zext_vector_v2i1:
10296
; CHECK: # %bb.0:
103-
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
104-
; CHECK-NEXT: pslld $31, %xmm0
105-
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
106-
; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
107-
; CHECK-NEXT: blendvps %xmm0, %xmm2, %xmm1
108-
; CHECK-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
97+
; CHECK-NEXT: psllq $63, %xmm0
98+
; CHECK-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
99+
; CHECK-NEXT: pmovzxdq {{.*#+}} xmm2 = mem[0],zero,mem[1],zero
100+
; CHECK-NEXT: blendvpd %xmm0, %xmm2, %xmm1
101+
; CHECK-NEXT: movapd %xmm1, %xmm0
109102
; CHECK-NEXT: retq
110103
%ld1 = load volatile <2 x i32>, <2 x i32>* %p
111104
%arrayidx1 = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i64 1
@@ -119,14 +112,13 @@ define <2 x i64> @zext_vector_v2i1(<2 x i32>* %p, <2 x i1> %c) {
119112
define <2 x i64> @sext_vector_i1(<2 x i32>* %p, i1 zeroext %c) {
120113
; CHECK-LABEL: sext_vector_i1:
121114
; CHECK: # %bb.0:
122-
; CHECK-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
123-
; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
115+
; CHECK-NEXT: pmovsxdq (%rdi), %xmm1
116+
; CHECK-NEXT: pmovsxdq 8(%rdi), %xmm0
124117
; CHECK-NEXT: testl %esi, %esi
125118
; CHECK-NEXT: jne .LBB6_2
126119
; CHECK-NEXT: # %bb.1:
127120
; CHECK-NEXT: movdqa %xmm1, %xmm0
128121
; CHECK-NEXT: .LBB6_2:
129-
; CHECK-NEXT: pmovsxdq %xmm0, %xmm0
130122
; CHECK-NEXT: retq
131123
%ld1 = load volatile <2 x i32>, <2 x i32>* %p
132124
%arrayidx1 = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i64 1
@@ -139,12 +131,11 @@ define <2 x i64> @sext_vector_i1(<2 x i32>* %p, i1 zeroext %c) {
139131
define <2 x i64> @sext_vector_v2i1(<2 x i32>* %p, <2 x i1> %c) {
140132
; CHECK-LABEL: sext_vector_v2i1:
141133
; CHECK: # %bb.0:
142-
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
143-
; CHECK-NEXT: pslld $31, %xmm0
144-
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
145-
; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
146-
; CHECK-NEXT: blendvps %xmm0, %xmm2, %xmm1
147-
; CHECK-NEXT: pmovsxdq %xmm1, %xmm0
134+
; CHECK-NEXT: psllq $63, %xmm0
135+
; CHECK-NEXT: pmovsxdq (%rdi), %xmm1
136+
; CHECK-NEXT: pmovsxdq 8(%rdi), %xmm2
137+
; CHECK-NEXT: blendvpd %xmm0, %xmm2, %xmm1
138+
; CHECK-NEXT: movapd %xmm1, %xmm0
148139
; CHECK-NEXT: retq
149140
%ld1 = load volatile <2 x i32>, <2 x i32>* %p
150141
%arrayidx1 = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i64 1

0 commit comments

Comments
 (0)