Skip to content

[LoongArch] Enable interleaved vectorization #92629

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo {
#include "LoongArchGenSubtargetInfo.inc"

unsigned GRLen = 32;
// TODO: The default value is empirical and conservative. Override the
// default in initializeProperties once we support optimizing for more
// uarches.
uint8_t MaxInterleaveFactor = 2;
MVT GRLenVT = MVT::i32;
LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown;
LoongArchFrameLowering FrameLowering;
Expand Down Expand Up @@ -99,6 +103,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo {
Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
Align getPrefLoopAlignment() const { return PrefLoopAlignment; }
unsigned getMaxBytesForAlignment() const { return MaxBytesForAlignment; }
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
bool enableMachineScheduler() const override { return true; }
};
} // end namespace llvm
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ unsigned LoongArchTTIImpl::getRegisterClassForType(bool Vector,
return LoongArchRegisterClass::GPRRC;
}

unsigned LoongArchTTIImpl::getMaxInterleaveFactor(ElementCount VF) {
return ST->getMaxInterleaveFactor();
}

const char *LoongArchTTIImpl::getRegisterClassName(unsigned ClassID) const {
switch (ClassID) {
case LoongArchRegisterClass::GPRRC:
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ class LoongArchTTIImpl : public BasicTTIImplBase<LoongArchTTIImpl> {
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;
unsigned getNumberOfRegisters(unsigned ClassID) const;
unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
unsigned getMaxInterleaveFactor(ElementCount VF);
const char *getRegisterClassName(unsigned ClassID) const;

// TODO: Implement more hooks to provide TTI machinery for LoongArch.
Expand Down
22 changes: 14 additions & 8 deletions llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,20 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[TMP2]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i64> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: store <4 x i64> [[TMP6]], ptr [[TMP4]], align 8
; CHECK-NEXT: store <4 x i64> [[TMP7]], ptr [[TMP5]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
; REQUIRES: asserts
; RUN: opt --passes=loop-vectorize,dce,instcombine --mtriple loongarch64 \
; RUN: -S < %s 2>&1 | FileCheck %s

; CHECK-LABEL: foo
; CHECK: %{{.*}} = add {{.*}}, 2

; Function Attrs: nofree norecurse nosync nounwind writeonly
define dso_local void @foo(i32 signext %n, ptr nocapture %A) local_unnamed_addr #0 {
entry:
%cmp5 = icmp sgt i32 %n, 0
br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader: ; preds = %entry
%wide.trip.count = zext i32 %n to i64
br label %for.body

for.cond.cleanup.loopexit: ; preds = %for.body
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
ret void

for.body: ; preds = %for.body.preheader, %for.body
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
%0 = trunc i64 %indvars.iv to i32
store i32 %0, ptr %arrayidx, align 4, !tbaa !4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body, !llvm.loop !8
}

!4 = !{!5, !5, i64 0}
!5 = !{!"int", !6, i64 0}
!6 = !{!"omnipotent char", !7, i64 0}
!7 = !{!"Simple C/C++ TBAA"}
!8 = distinct !{!8, !9}
!9 = !{!"llvm.loop.mustprogress"}
Loading