-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[AArch64][GlobalISel] Combine MUL(AND(LSHR(X, 15), 0x10001), 0xffff) to CMLTz #92915
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: None (chuongg3) ChangesFull diff: https://github.com/llvm/llvm-project/pull/92915.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 10cad6d192440..bee9b07b9d230 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -265,6 +265,14 @@ def or_to_bsp: GICombineRule <
(apply [{ applyOrToBSP(*${root}, MRI, B, ${matchinfo}); }])
>;
+// Combines Mul(And(Srl(X, 15), 0x10001), 0xffff) into CMLTz
+def combine_mul_cmlt : GICombineRule<
+ (defs root:$root, register_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_MUL):$root,
+ [{ return matchCombineMulCMLT(*${root}, MRI, ${matchinfo}); }]),
+ (apply [{ applyCombineMulCMLT(*${root}, MRI, B, ${matchinfo}); }])
+>;
+
// Post-legalization combines which should happen at all optimization levels.
// (E.g. ones that facilitate matching for the selector) For example, matching
// pseudos.
@@ -295,5 +303,6 @@ def AArch64PostLegalizerCombiner
ptr_add_immed_chain, overlapping_and,
split_store_zero_128, undef_combines,
select_to_minmax, or_to_bsp, combine_concat_vector,
- commute_constant_to_rhs]> {
+ commute_constant_to_rhs,
+ combine_mul_cmlt]> {
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index d8ca5494ba50a..82f2904ad8d43 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -381,6 +381,60 @@ void applyOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,
MI.eraseFromParent();
}
+bool matchCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI,
+ Register &SrcReg) {
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ if (DstTy != LLT::fixed_vector(2, 64) && DstTy != LLT::fixed_vector(2, 32) &&
+ DstTy != LLT::fixed_vector(4, 32) && DstTy != LLT::fixed_vector(4, 16) &&
+ DstTy != LLT::fixed_vector(8, 16))
+ return false;
+
+ auto AndMI = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
+ if (AndMI->getOpcode() != TargetOpcode::G_AND)
+ return false;
+ auto LShrMI = getDefIgnoringCopies(AndMI->getOperand(1).getReg(), MRI);
+ if (LShrMI->getOpcode() != TargetOpcode::G_LSHR)
+ return false;
+
+ // Check the constant splat values
+ auto V1 = isConstantOrConstantSplatVector(
+ *MRI.getVRegDef(MI.getOperand(2).getReg()), MRI);
+ auto V2 = isConstantOrConstantSplatVector(
+ *MRI.getVRegDef(AndMI->getOperand(2).getReg()), MRI);
+ auto V3 = isConstantOrConstantSplatVector(
+ *MRI.getVRegDef(LShrMI->getOperand(2).getReg()), MRI);
+ if (!V1.has_value() || !V2.has_value() || !V3.has_value())
+ return false;
+ unsigned HalfSize = DstTy.getScalarSizeInBits() / 2;
+ if (!V1.value().isMask(HalfSize) || V2.value() != (1ULL | 1ULL << HalfSize) ||
+ V3 != (HalfSize - 1))
+ return false;
+
+ SrcReg = LShrMI->getOperand(1).getReg();
+
+ return true;
+}
+
+void applyCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B, Register &SrcReg) {
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT HalfTy =
+ DstTy.changeElementCount(DstTy.getElementCount().multiplyCoefficientBy(2))
+ .changeElementSize(DstTy.getScalarSizeInBits() / 2);
+
+ Register ZeroVec = B.buildConstant(HalfTy, 0).getReg(0);
+ Register CastReg =
+ B.buildInstr(TargetOpcode::G_BITCAST, {HalfTy}, {SrcReg}).getReg(0);
+ Register CMLTReg =
+ B.buildICmp(CmpInst::Predicate::ICMP_SLT, HalfTy, CastReg, ZeroVec)
+ .getReg(0);
+
+ B.buildInstr(TargetOpcode::G_BITCAST, {DstReg}, {CMLTReg}).getReg(0);
+ MI.eraseFromParent();
+}
+
class AArch64PostLegalizerCombinerImpl : public Combiner {
protected:
// TODO: Make CombinerHelper methods const.
diff --git a/llvm/test/CodeGen/AArch64/mulcmle.ll b/llvm/test/CodeGen/AArch64/mulcmle.ll
index 5c216b8550080..32bc5c5e63b3e 100644
--- a/llvm/test/CodeGen/AArch64/mulcmle.ll
+++ b/llvm/test/CodeGen/AArch64/mulcmle.ll
@@ -1,11 +1,22 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 %s -o - -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define <1 x i64> @v1i64(<1 x i64> %a) {
-; CHECK-LABEL: v1i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: cmlt v0.2s, v0.2s, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v1i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmlt v0.2s, v0.2s, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v1i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fmov x8, d0
+; CHECK-GI-NEXT: lsr x8, x8, #31
+; CHECK-GI-NEXT: and x8, x8, #0x100000001
+; CHECK-GI-NEXT: lsl x9, x8, #32
+; CHECK-GI-NEXT: sub x8, x9, x8
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ret
%b = lshr <1 x i64> %a, <i64 31>
%c = and <1 x i64> %b, <i64 4294967297>
%d = mul nuw <1 x i64> %c, <i64 4294967295>
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It might be good to say this is the equivalent of https://reviews.llvm.org/D130874 in the commit message.
llvm/test/CodeGen/AArch64/mulcmle.ll
Outdated
; CHECK-GI-NEXT: lsl x9, x8, #32 | ||
; CHECK-GI-NEXT: sub x8, x9, x8 | ||
; CHECK-GI-NEXT: fmov d0, x8 | ||
; CHECK-GI-NEXT: ret |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are you planning to optimize this sequence in some further patch? I see this is sub-optimal compared to SDAG.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In GlobalISel, all v1 types are treated as scalar values and we do not necessarily want the same optimization for scalar types.
d858a53
to
2cf47f0
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks
…to CMLTz This patch mirrors the following SelectionDAG patch for GlobalISel: https://reviews.llvm.org/D130874
2cf47f0
to
3d354d0
Compare
No description provided.