Skip to content

Commit 910098e

Browse files
authored
[RISCV] Match strided vector bases in RISCVGatherScatterLowering (#93972)
Currently we only match GEPs with a scalar base pointer, but a common pattern that's emitted from the loop vectorizer is a strided vector base plus some sort of scalar offset: %base = getelementptr i64, ptr %p, <vscale x 1 x i64> %step %gep = getelementptr i64, <vscale x 1 x ptr> %base, i64 %offset This is common for accesses into a struct e.g. f[i].b below: struct F { int a; char b; }; void foo(struct F *f) { for (int i = 0; i < 1024; i += 2) { f[i].a++; f[i].b++; } } This patch handles this case in RISCVGatherScatterLowering by recursing on the base pointer if it's a vector. With this we can convert roughly 80% of the indexed loads and stores emitted to strided loads and stores on SPEC CPU 2017, -O3 -march=rva22u64_v
1 parent bf4eaec commit 910098e

File tree

2 files changed

+24
-9
lines changed

2 files changed

+24
-9
lines changed

llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -349,8 +349,27 @@ RISCVGatherScatterLowering::determineBaseAndStride(Instruction *Ptr,
349349

350350
SmallVector<Value *, 2> Ops(GEP->operands());
351351

352+
// If the base pointer is a vector, check if it's strided.
353+
Value *Base = GEP->getPointerOperand();
354+
if (auto *BaseInst = dyn_cast<Instruction>(Base);
355+
BaseInst && BaseInst->getType()->isVectorTy()) {
356+
// If GEP's offset is scalar then we can add it to the base pointer's base.
357+
auto IsScalar = [](Value *Idx) { return !Idx->getType()->isVectorTy(); };
358+
if (all_of(GEP->indices(), IsScalar)) {
359+
auto [BaseBase, Stride] = determineBaseAndStride(BaseInst, Builder);
360+
if (BaseBase) {
361+
Builder.SetInsertPoint(GEP);
362+
SmallVector<Value *> Indices(GEP->indices());
363+
Value *OffsetBase =
364+
Builder.CreateGEP(GEP->getSourceElementType(), BaseBase, Indices,
365+
GEP->getName() + "offset", GEP->isInBounds());
366+
return {OffsetBase, Stride};
367+
}
368+
}
369+
}
370+
352371
// Base pointer needs to be a scalar.
353-
Value *ScalarBase = Ops[0];
372+
Value *ScalarBase = Base;
354373
if (ScalarBase->getType()->isVectorTy()) {
355374
ScalarBase = getSplatValue(ScalarBase);
356375
if (!ScalarBase)

llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -301,10 +301,8 @@ define void @constant_stride(<vscale x 1 x i64> %x, ptr %p, i64 %stride) {
301301

302302
define <vscale x 1 x i64> @vector_base_scalar_offset(ptr %p, i64 %offset) {
303303
; CHECK-LABEL: @vector_base_scalar_offset(
304-
; CHECK-NEXT: [[STEP:%.*]] = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
305-
; CHECK-NEXT: [[PTRS1:%.*]] = getelementptr i64, ptr [[P:%.*]], <vscale x 1 x i64> [[STEP]]
306-
; CHECK-NEXT: [[PTRS2:%.*]] = getelementptr i64, <vscale x 1 x ptr> [[PTRS1]], i64 [[OFFSET:%.*]]
307-
; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[PTRS2]], i32 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> poison)
304+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET:%.*]]
305+
; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.riscv.masked.strided.load.nxv1i64.p0.i64(<vscale x 1 x i64> poison, ptr [[TMP1]], i64 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer))
308306
; CHECK-NEXT: ret <vscale x 1 x i64> [[X]]
309307
;
310308
%step = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
@@ -321,10 +319,8 @@ define <vscale x 1 x i64> @vector_base_scalar_offset(ptr %p, i64 %offset) {
321319

322320
define <vscale x 1 x i64> @splat_base_scalar_offset(ptr %p, i64 %offset) {
323321
; CHECK-LABEL: @splat_base_scalar_offset(
324-
; CHECK-NEXT: [[HEAD:%.*]] = insertelement <vscale x 1 x ptr> poison, ptr [[P:%.*]], i32 0
325-
; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <vscale x 1 x ptr> [[HEAD]], <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer
326-
; CHECK-NEXT: [[PTRS:%.*]] = getelementptr i64, <vscale x 1 x ptr> [[SPLAT]], i64 [[OFFSET:%.*]]
327-
; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[PTRS]], i32 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> poison)
322+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET:%.*]]
323+
; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.riscv.masked.strided.load.nxv1i64.p0.i64(<vscale x 1 x i64> poison, ptr [[TMP1]], i64 0, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer))
328324
; CHECK-NEXT: ret <vscale x 1 x i64> [[X]]
329325
;
330326
%head = insertelement <vscale x 1 x ptr> poison, ptr %p, i32 0

0 commit comments

Comments
 (0)