Skip to content

Commit f0460fa

Browse files
committed
[AArch64] Improve target hook function to decide folding (mul (add x, c1), c2)
Prevent the folding if it leads to worse code. Reviewed By: dmgreen, kda Differential Revision: https://reviews.llvm.org/D108871
1 parent 9d7d34c commit f0460fa

File tree

4 files changed

+49
-25
lines changed

4 files changed

+49
-25
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12190,6 +12190,33 @@ bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const {
1219012190
return IsLegal;
1219112191
}
1219212192

12193+
// Return false to prevent folding
12194+
// (mul (add x, c1), c2) -> (add (mul x, c2), c2*c1) in DAGCombine,
12195+
// if the folding leads to worse code.
12196+
bool AArch64TargetLowering::isMulAddWithConstProfitable(
12197+
const SDValue &AddNode, const SDValue &ConstNode) const {
12198+
// Let the DAGCombiner decide for vector types and large types.
12199+
const EVT VT = AddNode.getValueType();
12200+
if (VT.isVector() || VT.getScalarSizeInBits() > 64)
12201+
return true;
12202+
12203+
// It is worse if c1 is legal add immediate, while c1*c2 is not
12204+
// and has to be composed by at least two instructions.
12205+
const ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
12206+
const ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
12207+
const int64_t C1 = C1Node->getSExtValue();
12208+
const APInt C1C2 = C1Node->getAPIntValue() * C2Node->getAPIntValue();
12209+
if (!isLegalAddImmediate(C1) || isLegalAddImmediate(C1C2.getSExtValue()))
12210+
return true;
12211+
SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
12212+
AArch64_IMM::expandMOVImm(C1C2.getZExtValue(), VT.getSizeInBits(), Insn);
12213+
if (Insn.size() > 1)
12214+
return false;
12215+
12216+
// Default to true and let the DAGCombiner decide.
12217+
return true;
12218+
}
12219+
1219312220
// Integer comparisons are implemented with ADDS/SUBS, so the range of valid
1219412221
// immediates is the same as for an add or a sub.
1219512222
bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const {

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -595,6 +595,9 @@ class AArch64TargetLowering : public TargetLowering {
595595
bool isLegalAddImmediate(int64_t) const override;
596596
bool isLegalICmpImmediate(int64_t) const override;
597597

598+
bool isMulAddWithConstProfitable(const SDValue &AddNode,
599+
const SDValue &ConstNode) const override;
600+
598601
bool shouldConsiderGEPOffsetSplit() const override;
599602

600603
EVT getOptimalMemOpType(const MemOp &Op,

llvm/test/CodeGen/AArch64/addimm-mulimm.ll

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -104,10 +104,9 @@ define signext i32 @addimm_mulimm_accept_13(i32 signext %a) {
104104
define i64 @addimm_mulimm_reject_00(i64 %a) {
105105
; CHECK-LABEL: addimm_mulimm_reject_00:
106106
; CHECK: // %bb.0:
107-
; CHECK-NEXT: mov w9, #1200
108-
; CHECK-NEXT: mov w8, #3700
109-
; CHECK-NEXT: movk w9, #175, lsl #16
110-
; CHECK-NEXT: madd x0, x0, x8, x9
107+
; CHECK-NEXT: add x8, x0, #3100
108+
; CHECK-NEXT: mov w9, #3700
109+
; CHECK-NEXT: mul x0, x8, x9
111110
; CHECK-NEXT: ret
112111
%tmp0 = add i64 %a, 3100
113112
%tmp1 = mul i64 %tmp0, 3700
@@ -117,10 +116,9 @@ define i64 @addimm_mulimm_reject_00(i64 %a) {
117116
define i64 @addimm_mulimm_reject_01(i64 %a) {
118117
; CHECK-LABEL: addimm_mulimm_reject_01:
119118
; CHECK: // %bb.0:
120-
; CHECK-NEXT: mov x9, #-1200
121-
; CHECK-NEXT: mov w8, #3700
122-
; CHECK-NEXT: movk x9, #65360, lsl #16
123-
; CHECK-NEXT: madd x0, x0, x8, x9
119+
; CHECK-NEXT: sub x8, x0, #3100
120+
; CHECK-NEXT: mov w9, #3700
121+
; CHECK-NEXT: mul x0, x8, x9
124122
; CHECK-NEXT: ret
125123
%tmp0 = add i64 %a, -3100
126124
%tmp1 = mul i64 %tmp0, 3700
@@ -130,10 +128,9 @@ define i64 @addimm_mulimm_reject_01(i64 %a) {
130128
define signext i32 @addimm_mulimm_reject_02(i32 signext %a) {
131129
; CHECK-LABEL: addimm_mulimm_reject_02:
132130
; CHECK: // %bb.0:
133-
; CHECK-NEXT: mov w9, #1200
134-
; CHECK-NEXT: mov w8, #3700
135-
; CHECK-NEXT: movk w9, #175, lsl #16
136-
; CHECK-NEXT: madd w0, w0, w8, w9
131+
; CHECK-NEXT: add w8, w0, #3100
132+
; CHECK-NEXT: mov w9, #3700
133+
; CHECK-NEXT: mul w0, w8, w9
137134
; CHECK-NEXT: ret
138135
%tmp0 = add i32 %a, 3100
139136
%tmp1 = mul i32 %tmp0, 3700
@@ -143,10 +140,9 @@ define signext i32 @addimm_mulimm_reject_02(i32 signext %a) {
143140
define signext i32 @addimm_mulimm_reject_03(i32 signext %a) {
144141
; CHECK-LABEL: addimm_mulimm_reject_03:
145142
; CHECK: // %bb.0:
146-
; CHECK-NEXT: mov w9, #64336
147-
; CHECK-NEXT: mov w8, #3700
148-
; CHECK-NEXT: movk w9, #65360, lsl #16
149-
; CHECK-NEXT: madd w0, w0, w8, w9
143+
; CHECK-NEXT: sub w8, w0, #3100
144+
; CHECK-NEXT: mov w9, #3700
145+
; CHECK-NEXT: mul w0, w8, w9
150146
; CHECK-NEXT: ret
151147
%tmp0 = add i32 %a, -3100
152148
%tmp1 = mul i32 %tmp0, 3700

llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -154,11 +154,10 @@ define i1 @t32_6_3(i32 %X) nounwind {
154154
define i1 @t32_6_4(i32 %X) nounwind {
155155
; CHECK-LABEL: t32_6_4:
156156
; CHECK: // %bb.0:
157-
; CHECK-NEXT: mov w8, #43691
158-
; CHECK-NEXT: mov w9, #21844
159-
; CHECK-NEXT: movk w8, #43690, lsl #16
160-
; CHECK-NEXT: movk w9, #21845, lsl #16
161-
; CHECK-NEXT: madd w8, w0, w8, w9
157+
; CHECK-NEXT: mov w9, #43691
158+
; CHECK-NEXT: sub w8, w0, #4
159+
; CHECK-NEXT: movk w9, #43690, lsl #16
160+
; CHECK-NEXT: mul w8, w8, w9
162161
; CHECK-NEXT: mov w9, #43690
163162
; CHECK-NEXT: ror w8, w8, #1
164163
; CHECK-NEXT: movk w9, #10922, lsl #16
@@ -173,11 +172,10 @@ define i1 @t32_6_4(i32 %X) nounwind {
173172
define i1 @t32_6_5(i32 %X) nounwind {
174173
; CHECK-LABEL: t32_6_5:
175174
; CHECK: // %bb.0:
176-
; CHECK-NEXT: mov w8, #43691
177-
; CHECK-NEXT: mov w9, #43689
178-
; CHECK-NEXT: movk w8, #43690, lsl #16
175+
; CHECK-NEXT: mov w9, #43691
176+
; CHECK-NEXT: sub w8, w0, #5
179177
; CHECK-NEXT: movk w9, #43690, lsl #16
180-
; CHECK-NEXT: madd w8, w0, w8, w9
178+
; CHECK-NEXT: mul w8, w8, w9
181179
; CHECK-NEXT: mov w9, #43690
182180
; CHECK-NEXT: ror w8, w8, #1
183181
; CHECK-NEXT: movk w9, #10922, lsl #16

0 commit comments

Comments
 (0)