[AArch64][GlobalISel] Combine MUL(AND(LSHR(X, 15), 0x10001), 0xffff) to CMLTz #92915

chuongg3 · 2024-05-21T13:28:13Z

No description provided.

llvmbot · 2024-05-21T13:28:45Z

@llvm/pr-subscribers-backend-aarch64

Author: None (chuongg3)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/92915.diff

3 Files Affected:

(modified) llvm/lib/Target/AArch64/AArch64Combine.td (+10-1)
(modified) llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp (+54)
(modified) llvm/test/CodeGen/AArch64/mulcmle.ll (+16-5)

diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 10cad6d192440..bee9b07b9d230 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -265,6 +265,14 @@ def or_to_bsp: GICombineRule <
   (apply [{ applyOrToBSP(*${root}, MRI, B, ${matchinfo}); }])
 >;
 
+// Combines Mul(And(Srl(X, 15), 0x10001), 0xffff) into CMLTz
+def combine_mul_cmlt : GICombineRule<
+  (defs root:$root, register_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_MUL):$root,
+        [{ return matchCombineMulCMLT(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyCombineMulCMLT(*${root}, MRI, B, ${matchinfo}); }])
+>;
+
 // Post-legalization combines which should happen at all optimization levels.
 // (E.g. ones that facilitate matching for the selector) For example, matching
 // pseudos.
@@ -295,5 +303,6 @@ def AArch64PostLegalizerCombiner
                         ptr_add_immed_chain, overlapping_and,
                         split_store_zero_128, undef_combines,
                         select_to_minmax, or_to_bsp, combine_concat_vector,
-                        commute_constant_to_rhs]> {
+                        commute_constant_to_rhs,
+                        combine_mul_cmlt]> {
 }
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index d8ca5494ba50a..82f2904ad8d43 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -381,6 +381,60 @@ void applyOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,
   MI.eraseFromParent();
 }
 
+bool matchCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI,
+                         Register &SrcReg) {
+  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+  if (DstTy != LLT::fixed_vector(2, 64) && DstTy != LLT::fixed_vector(2, 32) &&
+      DstTy != LLT::fixed_vector(4, 32) && DstTy != LLT::fixed_vector(4, 16) &&
+      DstTy != LLT::fixed_vector(8, 16))
+    return false;
+
+  auto AndMI = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
+  if (AndMI->getOpcode() != TargetOpcode::G_AND)
+    return false;
+  auto LShrMI = getDefIgnoringCopies(AndMI->getOperand(1).getReg(), MRI);
+  if (LShrMI->getOpcode() != TargetOpcode::G_LSHR)
+    return false;
+
+  // Check the constant splat values
+  auto V1 = isConstantOrConstantSplatVector(
+      *MRI.getVRegDef(MI.getOperand(2).getReg()), MRI);
+  auto V2 = isConstantOrConstantSplatVector(
+      *MRI.getVRegDef(AndMI->getOperand(2).getReg()), MRI);
+  auto V3 = isConstantOrConstantSplatVector(
+      *MRI.getVRegDef(LShrMI->getOperand(2).getReg()), MRI);
+  if (!V1.has_value() || !V2.has_value() || !V3.has_value())
+    return false;
+  unsigned HalfSize = DstTy.getScalarSizeInBits() / 2;
+  if (!V1.value().isMask(HalfSize) || V2.value() != (1ULL | 1ULL << HalfSize) ||
+      V3 != (HalfSize - 1))
+    return false;
+
+  SrcReg = LShrMI->getOperand(1).getReg();
+
+  return true;
+}
+
+void applyCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI,
+                         MachineIRBuilder &B, Register &SrcReg) {
+  Register DstReg = MI.getOperand(0).getReg();
+  LLT DstTy = MRI.getType(DstReg);
+  LLT HalfTy =
+      DstTy.changeElementCount(DstTy.getElementCount().multiplyCoefficientBy(2))
+          .changeElementSize(DstTy.getScalarSizeInBits() / 2);
+
+  Register ZeroVec = B.buildConstant(HalfTy, 0).getReg(0);
+  Register CastReg =
+      B.buildInstr(TargetOpcode::G_BITCAST, {HalfTy}, {SrcReg}).getReg(0);
+  Register CMLTReg =
+      B.buildICmp(CmpInst::Predicate::ICMP_SLT, HalfTy, CastReg, ZeroVec)
+          .getReg(0);
+
+  B.buildInstr(TargetOpcode::G_BITCAST, {DstReg}, {CMLTReg}).getReg(0);
+  MI.eraseFromParent();
+}
+
 class AArch64PostLegalizerCombinerImpl : public Combiner {
 protected:
   // TODO: Make CombinerHelper methods const.
diff --git a/llvm/test/CodeGen/AArch64/mulcmle.ll b/llvm/test/CodeGen/AArch64/mulcmle.ll
index 5c216b8550080..32bc5c5e63b3e 100644
--- a/llvm/test/CodeGen/AArch64/mulcmle.ll
+++ b/llvm/test/CodeGen/AArch64/mulcmle.ll
@@ -1,11 +1,22 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 %s -o - -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 define <1 x i64> @v1i64(<1 x i64> %a) {
-; CHECK-LABEL: v1i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmlt v0.2s, v0.2s, #0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: v1i64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    cmlt v0.2s, v0.2s, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: v1i64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    lsr x8, x8, #31
+; CHECK-GI-NEXT:    and x8, x8, #0x100000001
+; CHECK-GI-NEXT:    lsl x9, x8, #32
+; CHECK-GI-NEXT:    sub x8, x9, x8
+; CHECK-GI-NEXT:    fmov d0, x8
+; CHECK-GI-NEXT:    ret
   %b = lshr <1 x i64> %a, <i64 31>
   %c = and <1 x i64> %b, <i64 4294967297>
   %d = mul nuw <1 x i64> %c, <i64 4294967295>

davemgreen

It might be good to say this is the equivalent of https://reviews.llvm.org/D130874 in the commit message.

llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp

llvm/test/CodeGen/AArch64/mulcmle.ll

madhur13490 · 2024-05-21T15:55:44Z

llvm/test/CodeGen/AArch64/mulcmle.ll

+; CHECK-GI-NEXT:    lsl x9, x8, #32
+; CHECK-GI-NEXT:    sub x8, x9, x8
+; CHECK-GI-NEXT:    fmov d0, x8
+; CHECK-GI-NEXT:    ret


Are you planning to optimize this sequence in some further patch? I see this is sub-optimal compared to SDAG.

In GlobalISel, all v1 types are treated as scalar values and we do not necessarily want the same optimization for scalar types.

davemgreen

LGTM, thanks

…92915)

…to CMLTz This patch mirrors the following SelectionDAG patch for GlobalISel: https://reviews.llvm.org/D130874

chuongg3 requested review from aemerson and davemgreen May 21, 2024 13:28

llvmbot added the backend:AArch64 label May 21, 2024

davemgreen reviewed May 21, 2024

View reviewed changes

llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp Show resolved Hide resolved

llvm/test/CodeGen/AArch64/mulcmle.ll Outdated Show resolved Hide resolved

madhur13490 reviewed May 21, 2024

View reviewed changes

chuongg3 force-pushed the GlobalISel_Combine_Mul_CMLT branch from d858a53 to 2cf47f0 Compare May 28, 2024 16:37

davemgreen approved these changes May 29, 2024

View reviewed changes

chuongg3 added a commit that referenced this pull request May 29, 2024

[AArch64][NFC] Pre-commit Test for Combine MUL(AND(LSHR)) to CMLTz (#…

3ce9b86

…92915)

[AArch64][GlobalISel] Combine MUL(AND(LSHR(X, 15), 0x10001), 0xffff) …

3d354d0

…to CMLTz This patch mirrors the following SelectionDAG patch for GlobalISel: https://reviews.llvm.org/D130874

chuongg3 force-pushed the GlobalISel_Combine_Mul_CMLT branch from 2cf47f0 to 3d354d0 Compare May 29, 2024 12:47

chuongg3 merged commit 23366d4 into llvm:main May 29, 2024
4 of 7 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AArch64][GlobalISel] Combine MUL(AND(LSHR(X, 15), 0x10001), 0xffff) to CMLTz #92915

[AArch64][GlobalISel] Combine MUL(AND(LSHR(X, 15), 0x10001), 0xffff) to CMLTz #92915

Uh oh!

chuongg3 commented May 21, 2024

Uh oh!

llvmbot commented May 21, 2024

Uh oh!

davemgreen left a comment

Uh oh!

Uh oh!

Uh oh!

madhur13490 May 21, 2024

Uh oh!

chuongg3 May 28, 2024

Uh oh!

davemgreen left a comment

Uh oh!

Uh oh!

Uh oh!

[AArch64][GlobalISel] Combine MUL(AND(LSHR(X, 15), 0x10001), 0xffff) to CMLTz #92915

[AArch64][GlobalISel] Combine MUL(AND(LSHR(X, 15), 0x10001), 0xffff) to CMLTz #92915

Uh oh!

Conversation

chuongg3 commented May 21, 2024

Uh oh!

llvmbot commented May 21, 2024

Uh oh!

davemgreen left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

madhur13490 May 21, 2024

Choose a reason for hiding this comment

Uh oh!

chuongg3 May 28, 2024

Choose a reason for hiding this comment

Uh oh!

davemgreen left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!