Skip to content

Commit 1e828f8

Browse files
committed
[SROA]: Only defer trying partial sized ptr or ptr vector types
Change-Id: Ic77f87290905addadd5819dff2d0c62f031022ab
1 parent a8cb9db commit 1e828f8

File tree

2 files changed

+115
-27
lines changed

2 files changed

+115
-27
lines changed

llvm/lib/Transforms/Scalar/SROA.cpp

Lines changed: 53 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2257,6 +2257,41 @@ checkVectorTypesForPromotion(Partition &P, const DataLayout &DL,
22572257
return nullptr;
22582258
}
22592259

2260+
static VectorType *createAndCheckVectorTypesForPromotion(
2261+
SetVector<Type *> &OtherTys, ArrayRef<VectorType *> CandidateTysCopy,
2262+
function_ref<void(Type *)> CheckCandidateType, Partition &P,
2263+
const DataLayout &DL, SmallVectorImpl<VectorType *> &CandidateTys,
2264+
bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy,
2265+
bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy) {
2266+
[[maybe_unused]] VectorType *OriginalElt =
2267+
CandidateTysCopy.size() ? CandidateTysCopy[0] : nullptr;
2268+
// Consider additional vector types where the element type size is a
2269+
// multiple of load/store element size.
2270+
for (Type *Ty : OtherTys) {
2271+
if (!VectorType::isValidElementType(Ty))
2272+
continue;
2273+
unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
2274+
// Make a copy of CandidateTys and iterate through it, because we
2275+
// might append to CandidateTys in the loop.
2276+
for (VectorType *const VTy : CandidateTysCopy) {
2277+
// The elements in the copy should remain invariant throughout the loop
2278+
assert(CandidateTysCopy[0] == OriginalElt && "Different Element");
2279+
unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
2280+
unsigned ElementSize =
2281+
DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2282+
if (TypeSize != VectorSize && TypeSize != ElementSize &&
2283+
VectorSize % TypeSize == 0) {
2284+
VectorType *NewVTy = VectorType::get(Ty, VectorSize / TypeSize, false);
2285+
CheckCandidateType(NewVTy);
2286+
}
2287+
}
2288+
}
2289+
2290+
return checkVectorTypesForPromotion(P, DL, CandidateTys, HaveCommonEltTy,
2291+
CommonEltTy, HaveVecPtrTy,
2292+
HaveCommonVecPtrTy, CommonVecPtrTy);
2293+
}
2294+
22602295
/// Test whether the given alloca partitioning and range of slices can be
22612296
/// promoted to a vector.
22622297
///
@@ -2271,6 +2306,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
22712306
// we have different element types.
22722307
SmallVector<VectorType *, 4> CandidateTys;
22732308
SetVector<Type *> LoadStoreTys;
2309+
SetVector<Type *> DeferredTys;
22742310
Type *CommonEltTy = nullptr;
22752311
VectorType *CommonVecPtrTy = nullptr;
22762312
bool HaveVecPtrTy = false;
@@ -2314,42 +2350,32 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
23142350
Ty = SI->getValueOperand()->getType();
23152351
else
23162352
continue;
2353+
2354+
auto CandTy = Ty->getScalarType();
2355+
if (CandTy->isPointerTy() && (S.beginOffset() != P.beginOffset() ||
2356+
S.endOffset() != P.endOffset())) {
2357+
DeferredTys.insert(Ty);
2358+
continue;
2359+
}
2360+
23172361
LoadStoreTys.insert(Ty);
23182362
// Consider any loads or stores that are the exact size of the slice.
23192363
if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset())
23202364
CheckCandidateType(Ty);
23212365
}
23222366

2323-
if (auto *VTy = checkVectorTypesForPromotion(
2324-
P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2367+
SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
2368+
if (auto *VTy = createAndCheckVectorTypesForPromotion(
2369+
LoadStoreTys, CandidateTysCopy, CheckCandidateType, P, DL,
2370+
CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
23252371
HaveCommonVecPtrTy, CommonVecPtrTy))
23262372
return VTy;
23272373

2328-
// Consider additional vector types where the element type size is a
2329-
// multiple of load/store element size.
2330-
for (Type *Ty : LoadStoreTys) {
2331-
if (!VectorType::isValidElementType(Ty))
2332-
continue;
2333-
unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
2334-
// Make a copy of CandidateTys and iterate through it, because we might
2335-
// append to CandidateTys in the loop.
2336-
SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
2337-
CandidateTys.clear();
2338-
for (VectorType *&VTy : CandidateTysCopy) {
2339-
unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
2340-
unsigned ElementSize =
2341-
DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2342-
if (TypeSize != VectorSize && TypeSize != ElementSize &&
2343-
VectorSize % TypeSize == 0) {
2344-
VectorType *NewVTy = VectorType::get(Ty, VectorSize / TypeSize, false);
2345-
CheckCandidateType(NewVTy);
2346-
}
2347-
}
2348-
}
2349-
2350-
return checkVectorTypesForPromotion(P, DL, CandidateTys, HaveCommonEltTy,
2351-
CommonEltTy, HaveVecPtrTy,
2352-
HaveCommonVecPtrTy, CommonVecPtrTy);
2374+
CandidateTys.clear();
2375+
return createAndCheckVectorTypesForPromotion(
2376+
DeferredTys, CandidateTysCopy, CheckCandidateType, P, DL, CandidateTys,
2377+
HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
2378+
CommonVecPtrTy);
23532379
}
23542380

23552381
/// Test whether a slice of an alloca is valid for integer widening.

llvm/test/Transforms/SROA/vector-promotion.ll

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1392,6 +1392,68 @@ define <4 x ptr> @ptrLoadStoreTysPtr(ptr %init, i64 %val2) {
13921392
ret <4 x ptr> %sroaval
13931393
}
13941394

1395+
define <4 x i32> @validLoadStoreTy([2 x i64] %cond.coerce) {
1396+
; CHECK-LABEL: @validLoadStoreTy(
1397+
; CHECK-NEXT: entry:
1398+
; CHECK-NEXT: [[COND_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE:%.*]], 0
1399+
; CHECK-NEXT: [[COND_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[COND_COERCE_FCA_0_EXTRACT]], i32 0
1400+
; CHECK-NEXT: [[COND_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE]], 1
1401+
; CHECK-NEXT: [[COND_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[COND_SROA_0_0_VEC_INSERT]], i64 [[COND_COERCE_FCA_1_EXTRACT]], i32 1
1402+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[COND_SROA_0_8_VEC_INSERT]] to <4 x i32>
1403+
; CHECK-NEXT: ret <4 x i32> [[TMP0]]
1404+
;
1405+
; DEBUG-LABEL: @validLoadStoreTy(
1406+
; DEBUG-NEXT: entry:
1407+
; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META553:![0-9]+]], metadata !DIExpression()), !dbg [[DBG557:![0-9]+]]
1408+
; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META554:![0-9]+]], metadata !DIExpression()), !dbg [[DBG558:![0-9]+]]
1409+
; DEBUG-NEXT: [[COND_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE:%.*]], 0, !dbg [[DBG559:![0-9]+]]
1410+
; DEBUG-NEXT: [[COND_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[COND_COERCE_FCA_0_EXTRACT]], i32 0, !dbg [[DBG559]]
1411+
; DEBUG-NEXT: [[COND_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE]], 1, !dbg [[DBG559]]
1412+
; DEBUG-NEXT: [[COND_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[COND_SROA_0_0_VEC_INSERT]], i64 [[COND_COERCE_FCA_1_EXTRACT]], i32 1, !dbg [[DBG559]]
1413+
; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META555:![0-9]+]], metadata !DIExpression()), !dbg [[DBG560:![0-9]+]]
1414+
; DEBUG-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[COND_SROA_0_8_VEC_INSERT]] to <4 x i32>, !dbg [[DBG561:![0-9]+]]
1415+
; DEBUG-NEXT: call void @llvm.dbg.value(metadata <4 x i32> [[TMP0]], metadata [[META556:![0-9]+]], metadata !DIExpression()), !dbg [[DBG561]]
1416+
; DEBUG-NEXT: ret <4 x i32> [[TMP0]], !dbg [[DBG562:![0-9]+]]
1417+
;
1418+
entry:
1419+
%cond = alloca <4 x i32>, align 8
1420+
%coerce.dive2 = getelementptr inbounds <4 x i32>, ptr %cond, i32 0, i32 0
1421+
store [2 x i64] %cond.coerce, ptr %coerce.dive2, align 8
1422+
%m5 = getelementptr inbounds <4 x i32>, ptr %cond, i32 0, i32 0
1423+
%0 = load <4 x i32>, ptr %m5, align 8
1424+
ret <4 x i32> %0
1425+
}
1426+
1427+
; The following test should not crash the compiler
1428+
; (calls to CheckCandidateType from createAndCheckVectorTypesForPromotion may change the memory to hold CandidateTys.data())
1429+
define noundef zeroext i1 @CandidateTysRealloc() personality ptr null {
1430+
entry:
1431+
%alloca = alloca <4x i32>, align 16
1432+
store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr %alloca, align 16
1433+
br label %bb.1
1434+
1435+
bb.1:
1436+
br label %bb.1
1437+
1438+
bb.2:
1439+
%Load0 = load <4 x i32>, ptr %alloca, align 16
1440+
store <4 x i32> zeroinitializer, ptr %alloca, align 16
1441+
%Load1 = load <4 x i32>, ptr %alloca, align 16
1442+
br label %bb.3
1443+
1444+
bb.3:
1445+
br label %bb.3
1446+
1447+
bb.4:
1448+
%Load2 = load i64, ptr %alloca, align 16
1449+
%Load3 = load <4 x i32>, ptr %alloca, align 16
1450+
store <4 x i32> zeroinitializer, ptr %alloca, align 16
1451+
br label %bb.5
1452+
1453+
bb.5:
1454+
br label %bb.5
1455+
}
1456+
13951457
declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
13961458
declare void @llvm.lifetime.end.p0(i64, ptr)
13971459
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:

0 commit comments

Comments
 (0)