Skip to content

[LoongArch] Enable interleaved vectorization #92629

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 21, 2024
Merged

[LoongArch] Enable interleaved vectorization #92629

merged 1 commit into from
May 21, 2024

Conversation

heiher
Copy link
Member

@heiher heiher commented May 18, 2024

This PR enables interleaved vectorization for LoongArch, with a default interleaving factor of 2.

@llvmbot
Copy link
Member

llvmbot commented May 18, 2024

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-loongarch

Author: hev (heiher)

Changes

This PR enables interleaved vectorization for LoongArch, with a default interleaving factor of 2.


Full diff: https://github.com/llvm/llvm-project/pull/92629.diff

5 Files Affected:

  • (modified) llvm/lib/Target/LoongArch/LoongArchSubtarget.h (+2)
  • (modified) llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp (+4)
  • (modified) llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h (+1)
  • (modified) llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll (+14-8)
  • (added) llvm/test/Transforms/LoopVectorize/LoongArch/loongarch-interleaved.ll (+39)
diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
index b87ea6e2ec32b..390ce45552db5 100644
--- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
+++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
@@ -37,6 +37,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo {
 #include "LoongArchGenSubtargetInfo.inc"
 
   unsigned GRLen = 32;
+  uint8_t MaxInterleaveFactor = 2;
   MVT GRLenVT = MVT::i32;
   LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown;
   LoongArchFrameLowering FrameLowering;
@@ -99,6 +100,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo {
   Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
   Align getPrefLoopAlignment() const { return PrefLoopAlignment; }
   unsigned getMaxBytesForAlignment() const { return MaxBytesForAlignment; }
+  unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
   bool enableMachineScheduler() const override { return true; }
 };
 } // end namespace llvm
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
index add1c60d89d21..710650acba304 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
@@ -69,6 +69,10 @@ unsigned LoongArchTTIImpl::getRegisterClassForType(bool Vector,
   return LoongArchRegisterClass::GPRRC;
 }
 
+unsigned LoongArchTTIImpl::getMaxInterleaveFactor(ElementCount VF) {
+  return ST->getMaxInterleaveFactor();
+}
+
 const char *LoongArchTTIImpl::getRegisterClassName(unsigned ClassID) const {
   switch (ClassID) {
   case LoongArchRegisterClass::GPRRC:
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
index 34c18163bbdb6..06a03d29931d1 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
@@ -43,6 +43,7 @@ class LoongArchTTIImpl : public BasicTTIImplBase<LoongArchTTIImpl> {
   TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;
   unsigned getNumberOfRegisters(unsigned ClassID) const;
   unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
+  unsigned getMaxInterleaveFactor(ElementCount VF);
   const char *getRegisterClassName(unsigned ClassID) const;
 
   // TODO: Implement more hooks to provide TTI machinery for LoongArch.
diff --git a/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll
index 7172f0907e77e..28c1eef84e257 100644
--- a/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll
+++ b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll
@@ -22,14 +22,20 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[TMP2]], align 8
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
-; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP7:%.*]] = add <4 x i64> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    store <4 x i64> [[TMP6]], ptr [[TMP4]], align 8
+; CHECK-NEXT:    store <4 x i64> [[TMP7]], ptr [[TMP5]], align 8
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/LoongArch/loongarch-interleaved.ll b/llvm/test/Transforms/LoopVectorize/LoongArch/loongarch-interleaved.ll
new file mode 100644
index 0000000000000..be9b170491b9c
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/LoongArch/loongarch-interleaved.ll
@@ -0,0 +1,39 @@
+; REQUIRES: asserts
+; RUN: opt --passes=loop-vectorize,dce,instcombine --mtriple loongarch64 \
+; RUN:   -S < %s 2>&1 | FileCheck %s
+
+; CHECK-LABEL: foo
+; CHECK: %{{.*}} = add {{.*}}, 2
+
+; Function Attrs: nofree norecurse nosync nounwind writeonly
+define dso_local void @foo(i32 signext %n, ptr nocapture %A) local_unnamed_addr #0 {
+entry:
+  %cmp5 = icmp sgt i32 %n, 0
+  br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:                               ; preds = %entry
+  %wide.trip.count = zext i32 %n to i64
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %0 = trunc i64 %indvars.iv to i32
+  store i32 %0, ptr %arrayidx, align 4, !tbaa !4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body, !llvm.loop !8
+}
+
+!4 = !{!5, !5, i64 0}
+!5 = !{!"int", !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C/C++ TBAA"}
+!8 = distinct !{!8, !9}
+!9 = !{!"llvm.loop.mustprogress"}

@heiher heiher merged commit 1e86e92 into llvm:main May 21, 2024
3 of 4 checks passed
@heiher heiher deleted the iv branch May 21, 2024 07:31
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants