Skip to content

[AArch64][GlobalISel] Combine MUL(AND(LSHR(X, 15), 0x10001), 0xffff) to CMLTz #92915

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 29, 2024

Conversation

chuongg3
Copy link
Contributor

No description provided.

@llvmbot
Copy link
Member

llvmbot commented May 21, 2024

@llvm/pr-subscribers-backend-aarch64

Author: None (chuongg3)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/92915.diff

3 Files Affected:

  • (modified) llvm/lib/Target/AArch64/AArch64Combine.td (+10-1)
  • (modified) llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp (+54)
  • (modified) llvm/test/CodeGen/AArch64/mulcmle.ll (+16-5)
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 10cad6d192440..bee9b07b9d230 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -265,6 +265,14 @@ def or_to_bsp: GICombineRule <
   (apply [{ applyOrToBSP(*${root}, MRI, B, ${matchinfo}); }])
 >;
 
+// Combines Mul(And(Srl(X, 15), 0x10001), 0xffff) into CMLTz
+def combine_mul_cmlt : GICombineRule<
+  (defs root:$root, register_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_MUL):$root,
+        [{ return matchCombineMulCMLT(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyCombineMulCMLT(*${root}, MRI, B, ${matchinfo}); }])
+>;
+
 // Post-legalization combines which should happen at all optimization levels.
 // (E.g. ones that facilitate matching for the selector) For example, matching
 // pseudos.
@@ -295,5 +303,6 @@ def AArch64PostLegalizerCombiner
                         ptr_add_immed_chain, overlapping_and,
                         split_store_zero_128, undef_combines,
                         select_to_minmax, or_to_bsp, combine_concat_vector,
-                        commute_constant_to_rhs]> {
+                        commute_constant_to_rhs,
+                        combine_mul_cmlt]> {
 }
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index d8ca5494ba50a..82f2904ad8d43 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -381,6 +381,60 @@ void applyOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,
   MI.eraseFromParent();
 }
 
+bool matchCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI,
+                         Register &SrcReg) {
+  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+  if (DstTy != LLT::fixed_vector(2, 64) && DstTy != LLT::fixed_vector(2, 32) &&
+      DstTy != LLT::fixed_vector(4, 32) && DstTy != LLT::fixed_vector(4, 16) &&
+      DstTy != LLT::fixed_vector(8, 16))
+    return false;
+
+  auto AndMI = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
+  if (AndMI->getOpcode() != TargetOpcode::G_AND)
+    return false;
+  auto LShrMI = getDefIgnoringCopies(AndMI->getOperand(1).getReg(), MRI);
+  if (LShrMI->getOpcode() != TargetOpcode::G_LSHR)
+    return false;
+
+  // Check the constant splat values
+  auto V1 = isConstantOrConstantSplatVector(
+      *MRI.getVRegDef(MI.getOperand(2).getReg()), MRI);
+  auto V2 = isConstantOrConstantSplatVector(
+      *MRI.getVRegDef(AndMI->getOperand(2).getReg()), MRI);
+  auto V3 = isConstantOrConstantSplatVector(
+      *MRI.getVRegDef(LShrMI->getOperand(2).getReg()), MRI);
+  if (!V1.has_value() || !V2.has_value() || !V3.has_value())
+    return false;
+  unsigned HalfSize = DstTy.getScalarSizeInBits() / 2;
+  if (!V1.value().isMask(HalfSize) || V2.value() != (1ULL | 1ULL << HalfSize) ||
+      V3 != (HalfSize - 1))
+    return false;
+
+  SrcReg = LShrMI->getOperand(1).getReg();
+
+  return true;
+}
+
+void applyCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI,
+                         MachineIRBuilder &B, Register &SrcReg) {
+  Register DstReg = MI.getOperand(0).getReg();
+  LLT DstTy = MRI.getType(DstReg);
+  LLT HalfTy =
+      DstTy.changeElementCount(DstTy.getElementCount().multiplyCoefficientBy(2))
+          .changeElementSize(DstTy.getScalarSizeInBits() / 2);
+
+  Register ZeroVec = B.buildConstant(HalfTy, 0).getReg(0);
+  Register CastReg =
+      B.buildInstr(TargetOpcode::G_BITCAST, {HalfTy}, {SrcReg}).getReg(0);
+  Register CMLTReg =
+      B.buildICmp(CmpInst::Predicate::ICMP_SLT, HalfTy, CastReg, ZeroVec)
+          .getReg(0);
+
+  B.buildInstr(TargetOpcode::G_BITCAST, {DstReg}, {CMLTReg}).getReg(0);
+  MI.eraseFromParent();
+}
+
 class AArch64PostLegalizerCombinerImpl : public Combiner {
 protected:
   // TODO: Make CombinerHelper methods const.
diff --git a/llvm/test/CodeGen/AArch64/mulcmle.ll b/llvm/test/CodeGen/AArch64/mulcmle.ll
index 5c216b8550080..32bc5c5e63b3e 100644
--- a/llvm/test/CodeGen/AArch64/mulcmle.ll
+++ b/llvm/test/CodeGen/AArch64/mulcmle.ll
@@ -1,11 +1,22 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 %s -o - -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 define <1 x i64> @v1i64(<1 x i64> %a) {
-; CHECK-LABEL: v1i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmlt v0.2s, v0.2s, #0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: v1i64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    cmlt v0.2s, v0.2s, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: v1i64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    lsr x8, x8, #31
+; CHECK-GI-NEXT:    and x8, x8, #0x100000001
+; CHECK-GI-NEXT:    lsl x9, x8, #32
+; CHECK-GI-NEXT:    sub x8, x9, x8
+; CHECK-GI-NEXT:    fmov d0, x8
+; CHECK-GI-NEXT:    ret
   %b = lshr <1 x i64> %a, <i64 31>
   %c = and <1 x i64> %b, <i64 4294967297>
   %d = mul nuw <1 x i64> %c, <i64 4294967295>

Copy link
Collaborator

@davemgreen davemgreen left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be good to say this is the equivalent of https://reviews.llvm.org/D130874 in the commit message.

; CHECK-GI-NEXT: lsl x9, x8, #32
; CHECK-GI-NEXT: sub x8, x9, x8
; CHECK-GI-NEXT: fmov d0, x8
; CHECK-GI-NEXT: ret
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you planning to optimize this sequence in some further patch? I see this is sub-optimal compared to SDAG.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In GlobalISel, all v1 types are treated as scalar values and we do not necessarily want the same optimization for scalar types.

@chuongg3 chuongg3 force-pushed the GlobalISel_Combine_Mul_CMLT branch from d858a53 to 2cf47f0 Compare May 28, 2024 16:37
Copy link
Collaborator

@davemgreen davemgreen left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM, thanks

…to CMLTz

This patch mirrors the following SelectionDAG patch for GlobalISel:
https://reviews.llvm.org/D130874
@chuongg3 chuongg3 force-pushed the GlobalISel_Combine_Mul_CMLT branch from 2cf47f0 to 3d354d0 Compare May 29, 2024 12:47
@chuongg3 chuongg3 merged commit 23366d4 into llvm:main May 29, 2024
4 of 7 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants