Skip to content

Commit a062a9f

Browse files
committed
[clang][RISCV] Enable struct of homogeneous scalable vector as function argument
Currently llvm support struct as function input, so RISCV tuple type can just use struct of homogeneous scalable vector instead of flatten them.
1 parent 45d2d77 commit a062a9f

File tree

511 files changed

+242841
-378198
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

511 files changed

+242841
-378198
lines changed

clang/lib/CodeGen/CGCall.cpp

Lines changed: 75 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -3216,6 +3216,25 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
32163216
}
32173217
}
32183218

3219+
llvm::StructType *STy =
3220+
dyn_cast<llvm::StructType>(ArgI.getCoerceToType());
3221+
llvm::TypeSize StructSize;
3222+
llvm::TypeSize PtrElementSize;
3223+
if (ArgI.isDirect() && !ArgI.getCanBeFlattened() && STy &&
3224+
STy->getNumElements() > 1) {
3225+
StructSize = CGM.getDataLayout().getTypeAllocSize(STy);
3226+
PtrElementSize =
3227+
CGM.getDataLayout().getTypeAllocSize(ConvertTypeForMem(Ty));
3228+
if (STy->containsHomogeneousScalableVectorTypes()) {
3229+
assert(StructSize == PtrElementSize &&
3230+
"Only allow non-fractional movement of structure with"
3231+
"homogeneous scalable vector type");
3232+
3233+
ArgVals.push_back(ParamValue::forDirect(AI));
3234+
break;
3235+
}
3236+
}
3237+
32193238
Address Alloca = CreateMemTemp(Ty, getContext().getDeclAlign(Arg),
32203239
Arg->getName());
32213240

@@ -3224,53 +3243,29 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
32243243

32253244
// Fast-isel and the optimizer generally like scalar values better than
32263245
// FCAs, so we flatten them if this is safe to do for this argument.
3227-
llvm::StructType *STy = dyn_cast<llvm::StructType>(ArgI.getCoerceToType());
32283246
if (ArgI.isDirect() && ArgI.getCanBeFlattened() && STy &&
32293247
STy->getNumElements() > 1) {
3230-
llvm::TypeSize StructSize = CGM.getDataLayout().getTypeAllocSize(STy);
3231-
llvm::TypeSize PtrElementSize =
3232-
CGM.getDataLayout().getTypeAllocSize(Ptr.getElementType());
3233-
if (StructSize.isScalable()) {
3234-
assert(STy->containsHomogeneousScalableVectorTypes() &&
3235-
"ABI only supports structure with homogeneous scalable vector "
3236-
"type");
3237-
assert(StructSize == PtrElementSize &&
3238-
"Only allow non-fractional movement of structure with"
3239-
"homogeneous scalable vector type");
3240-
assert(STy->getNumElements() == NumIRArgs);
3241-
3242-
llvm::Value *LoadedStructValue = llvm::PoisonValue::get(STy);
3243-
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
3244-
auto *AI = Fn->getArg(FirstIRArg + i);
3245-
AI->setName(Arg->getName() + ".coerce" + Twine(i));
3246-
LoadedStructValue =
3247-
Builder.CreateInsertValue(LoadedStructValue, AI, i);
3248-
}
3248+
uint64_t SrcSize = StructSize.getFixedValue();
3249+
uint64_t DstSize = PtrElementSize.getFixedValue();
32493250

3250-
Builder.CreateStore(LoadedStructValue, Ptr);
3251+
Address AddrToStoreInto = Address::invalid();
3252+
if (SrcSize <= DstSize) {
3253+
AddrToStoreInto = Ptr.withElementType(STy);
32513254
} else {
3252-
uint64_t SrcSize = StructSize.getFixedValue();
3253-
uint64_t DstSize = PtrElementSize.getFixedValue();
3254-
3255-
Address AddrToStoreInto = Address::invalid();
3256-
if (SrcSize <= DstSize) {
3257-
AddrToStoreInto = Ptr.withElementType(STy);
3258-
} else {
3259-
AddrToStoreInto =
3260-
CreateTempAlloca(STy, Alloca.getAlignment(), "coerce");
3261-
}
3255+
AddrToStoreInto =
3256+
CreateTempAlloca(STy, Alloca.getAlignment(), "coerce");
3257+
}
32623258

3263-
assert(STy->getNumElements() == NumIRArgs);
3264-
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
3265-
auto AI = Fn->getArg(FirstIRArg + i);
3266-
AI->setName(Arg->getName() + ".coerce" + Twine(i));
3267-
Address EltPtr = Builder.CreateStructGEP(AddrToStoreInto, i);
3268-
Builder.CreateStore(AI, EltPtr);
3269-
}
3259+
assert(STy->getNumElements() == NumIRArgs);
3260+
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
3261+
auto AI = Fn->getArg(FirstIRArg + i);
3262+
AI->setName(Arg->getName() + ".coerce" + Twine(i));
3263+
Address EltPtr = Builder.CreateStructGEP(AddrToStoreInto, i);
3264+
Builder.CreateStore(AI, EltPtr);
3265+
}
32703266

3271-
if (SrcSize > DstSize) {
3272-
Builder.CreateMemCpy(Ptr, AddrToStoreInto, DstSize);
3273-
}
3267+
if (SrcSize > DstSize) {
3268+
Builder.CreateMemCpy(Ptr, AddrToStoreInto, DstSize);
32743269
}
32753270
} else {
32763271
// Simple case, just do a coerced store of the argument into the alloca.
@@ -5287,6 +5282,24 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
52875282
break;
52885283
}
52895284

5285+
llvm::StructType *STy =
5286+
dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType());
5287+
llvm::Type *SrcTy = ConvertTypeForMem(I->Ty);
5288+
llvm::TypeSize SrcTypeSize;
5289+
llvm::TypeSize DstTypeSize;
5290+
if (STy && ArgInfo.isDirect() && !ArgInfo.getCanBeFlattened()) {
5291+
SrcTypeSize = CGM.getDataLayout().getTypeAllocSize(SrcTy);
5292+
DstTypeSize = CGM.getDataLayout().getTypeAllocSize(STy);
5293+
if (STy->containsHomogeneousScalableVectorTypes()) {
5294+
assert(SrcTypeSize == DstTypeSize &&
5295+
"Only allow non-fractional movement of structure with "
5296+
"homogeneous scalable vector type");
5297+
5298+
IRCallArgs[FirstIRArg] = I->getKnownRValue().getScalarVal();
5299+
break;
5300+
}
5301+
}
5302+
52905303
// FIXME: Avoid the conversion through memory if possible.
52915304
Address Src = Address::invalid();
52925305
if (!I->isAggregate()) {
@@ -5302,54 +5315,30 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
53025315

53035316
// Fast-isel and the optimizer generally like scalar values better than
53045317
// FCAs, so we flatten them if this is safe to do for this argument.
5305-
llvm::StructType *STy =
5306-
dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType());
53075318
if (STy && ArgInfo.isDirect() && ArgInfo.getCanBeFlattened()) {
5308-
llvm::Type *SrcTy = Src.getElementType();
5309-
llvm::TypeSize SrcTypeSize =
5310-
CGM.getDataLayout().getTypeAllocSize(SrcTy);
5311-
llvm::TypeSize DstTypeSize = CGM.getDataLayout().getTypeAllocSize(STy);
5312-
if (SrcTypeSize.isScalable()) {
5313-
assert(STy->containsHomogeneousScalableVectorTypes() &&
5314-
"ABI only supports structure with homogeneous scalable vector "
5315-
"type");
5316-
assert(SrcTypeSize == DstTypeSize &&
5317-
"Only allow non-fractional movement of structure with "
5318-
"homogeneous scalable vector type");
5319-
assert(NumIRArgs == STy->getNumElements());
5320-
5321-
llvm::Value *StoredStructValue =
5322-
Builder.CreateLoad(Src, Src.getName() + ".tuple");
5323-
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
5324-
llvm::Value *Extract = Builder.CreateExtractValue(
5325-
StoredStructValue, i, Src.getName() + ".extract" + Twine(i));
5326-
IRCallArgs[FirstIRArg + i] = Extract;
5327-
}
5319+
uint64_t SrcSize = SrcTypeSize.getFixedValue();
5320+
uint64_t DstSize = DstTypeSize.getFixedValue();
5321+
5322+
// If the source type is smaller than the destination type of the
5323+
// coerce-to logic, copy the source value into a temp alloca the size
5324+
// of the destination type to allow loading all of it. The bits past
5325+
// the source value are left undef.
5326+
if (SrcSize < DstSize) {
5327+
Address TempAlloca = CreateTempAlloca(STy, Src.getAlignment(),
5328+
Src.getName() + ".coerce");
5329+
Builder.CreateMemCpy(TempAlloca, Src, SrcSize);
5330+
Src = TempAlloca;
53285331
} else {
5329-
uint64_t SrcSize = SrcTypeSize.getFixedValue();
5330-
uint64_t DstSize = DstTypeSize.getFixedValue();
5331-
5332-
// If the source type is smaller than the destination type of the
5333-
// coerce-to logic, copy the source value into a temp alloca the size
5334-
// of the destination type to allow loading all of it. The bits past
5335-
// the source value are left undef.
5336-
if (SrcSize < DstSize) {
5337-
Address TempAlloca = CreateTempAlloca(STy, Src.getAlignment(),
5338-
Src.getName() + ".coerce");
5339-
Builder.CreateMemCpy(TempAlloca, Src, SrcSize);
5340-
Src = TempAlloca;
5341-
} else {
5342-
Src = Src.withElementType(STy);
5343-
}
5332+
Src = Src.withElementType(STy);
5333+
}
53445334

5345-
assert(NumIRArgs == STy->getNumElements());
5346-
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
5347-
Address EltPtr = Builder.CreateStructGEP(Src, i);
5348-
llvm::Value *LI = Builder.CreateLoad(EltPtr);
5349-
if (ArgHasMaybeUndefAttr)
5350-
LI = Builder.CreateFreeze(LI);
5351-
IRCallArgs[FirstIRArg + i] = LI;
5352-
}
5335+
assert(NumIRArgs == STy->getNumElements());
5336+
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
5337+
Address EltPtr = Builder.CreateStructGEP(Src, i);
5338+
llvm::Value *LI = Builder.CreateLoad(EltPtr);
5339+
if (ArgHasMaybeUndefAttr)
5340+
LI = Builder.CreateFreeze(LI);
5341+
IRCallArgs[FirstIRArg + i] = LI;
53535342
}
53545343
} else {
53555344
// In the simple case, just pass the coerced loaded value.

clang/lib/CodeGen/Targets/RISCV.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -433,7 +433,13 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
433433
return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
434434
}
435435

436-
return ABIArgInfo::getDirect();
436+
ABIArgInfo Info = ABIArgInfo::getDirect();
437+
438+
// If it is tuple type, it can't be flattened.
439+
if (llvm::StructType *STy = dyn_cast<llvm::StructType>(CGT.ConvertType(Ty)))
440+
Info.setCanBeFlattened(!STy->containsHomogeneousScalableVectorTypes());
441+
442+
return Info;
437443
}
438444

439445
if (const VectorType *VT = Ty->getAs<VectorType>())

0 commit comments

Comments
 (0)