Skip to content

Commit 1e86e92

Browse files
authored
[LoongArch] Enable interleaved vectorization (llvm#92629)
This PR enables interleaved vectorization for LoongArch, with a default interleaving factor of `2`.
1 parent d094bb6 commit 1e86e92

File tree

5 files changed

+63
-8
lines changed

5 files changed

+63
-8
lines changed

llvm/lib/Target/LoongArch/LoongArchSubtarget.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo {
3737
#include "LoongArchGenSubtargetInfo.inc"
3838

3939
unsigned GRLen = 32;
40+
// TODO: The default value is empirical and conservative. Override the
41+
// default in initializeProperties once we support optimizing for more
42+
// uarches.
43+
uint8_t MaxInterleaveFactor = 2;
4044
MVT GRLenVT = MVT::i32;
4145
LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown;
4246
LoongArchFrameLowering FrameLowering;
@@ -99,6 +103,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo {
99103
Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
100104
Align getPrefLoopAlignment() const { return PrefLoopAlignment; }
101105
unsigned getMaxBytesForAlignment() const { return MaxBytesForAlignment; }
106+
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
102107
bool enableMachineScheduler() const override { return true; }
103108
};
104109
} // end namespace llvm

llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,10 @@ unsigned LoongArchTTIImpl::getRegisterClassForType(bool Vector,
6969
return LoongArchRegisterClass::GPRRC;
7070
}
7171

72+
unsigned LoongArchTTIImpl::getMaxInterleaveFactor(ElementCount VF) {
73+
return ST->getMaxInterleaveFactor();
74+
}
75+
7276
const char *LoongArchTTIImpl::getRegisterClassName(unsigned ClassID) const {
7377
switch (ClassID) {
7478
case LoongArchRegisterClass::GPRRC:

llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ class LoongArchTTIImpl : public BasicTTIImplBase<LoongArchTTIImpl> {
4343
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;
4444
unsigned getNumberOfRegisters(unsigned ClassID) const;
4545
unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
46+
unsigned getMaxInterleaveFactor(ElementCount VF);
4647
const char *getRegisterClassName(unsigned ClassID) const;
4748

4849
// TODO: Implement more hooks to provide TTI machinery for LoongArch.

llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,20 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v) {
2222
; CHECK: vector.body:
2323
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2424
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
25-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
26-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
27-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
28-
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
29-
; CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[TMP2]], align 8
30-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
31-
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
32-
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
25+
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4
26+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
27+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
28+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
29+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4
30+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
31+
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
32+
; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
33+
; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i64> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
34+
; CHECK-NEXT: store <4 x i64> [[TMP6]], ptr [[TMP4]], align 8
35+
; CHECK-NEXT: store <4 x i64> [[TMP7]], ptr [[TMP5]], align 8
36+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
37+
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
38+
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
3339
; CHECK: middle.block:
3440
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
3541
; CHECK: scalar.ph:
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
; REQUIRES: asserts
2+
; RUN: opt --passes=loop-vectorize,dce,instcombine --mtriple loongarch64 \
3+
; RUN: -S < %s 2>&1 | FileCheck %s
4+
5+
; CHECK-LABEL: foo
6+
; CHECK: %{{.*}} = add {{.*}}, 2
7+
8+
; Function Attrs: nofree norecurse nosync nounwind writeonly
9+
define dso_local void @foo(i32 signext %n, ptr nocapture %A) local_unnamed_addr #0 {
10+
entry:
11+
%cmp5 = icmp sgt i32 %n, 0
12+
br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup
13+
14+
for.body.preheader: ; preds = %entry
15+
%wide.trip.count = zext i32 %n to i64
16+
br label %for.body
17+
18+
for.cond.cleanup.loopexit: ; preds = %for.body
19+
br label %for.cond.cleanup
20+
21+
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
22+
ret void
23+
24+
for.body: ; preds = %for.body.preheader, %for.body
25+
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
26+
%arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
27+
%0 = trunc i64 %indvars.iv to i32
28+
store i32 %0, ptr %arrayidx, align 4, !tbaa !4
29+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
30+
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
31+
br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body, !llvm.loop !8
32+
}
33+
34+
!4 = !{!5, !5, i64 0}
35+
!5 = !{!"int", !6, i64 0}
36+
!6 = !{!"omnipotent char", !7, i64 0}
37+
!7 = !{!"Simple C/C++ TBAA"}
38+
!8 = distinct !{!8, !9}
39+
!9 = !{!"llvm.loop.mustprogress"}

0 commit comments

Comments
 (0)