-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[Clang][counted_by] Refactor __builtin_dynamic_object_size on FAMs #122198
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Refactoring of how __builtin_dynamic_object_size() is calculated for flexible array members (in preparation for adding support for the 'counted_by' attribute on pointers in structs). The only functionality change is that we use the already emitted Expr code to build our calculations off of rather than re-emitting the Expr. That allows the 'StructFieldAccess' visitor to sift through all casts and ArraySubscriptExprs to find the first MemberExpr. We build our GEPs and calculate offsets based off of relative distances from that MemberExpr. The testcase passes execution tests. There are four categories to support: struct p; struct s { /* ... */ int count; struct p *array[] __attribute__((counted_by(count))); }; 1) Object size of full flexible array member's size 'ptr->array': size_t count = (size_t) ptr->count; size_t flexible_array_member_base_size = sizeof (*ptr->array); size_t flexible_array_member_size = count * flexible_array_member_base_size; return flexible_array_member_size; 2) Object size from partial flexible array member '&ptr->array[idx]': size_t count = (size_t) ptr->count; size_t index = (size_t) idx; size_t flexible_array_member_base_size = sizeof (*ptr->array); size_t flexible_array_member_size = count * flexible_array_member_base_size; size_t index_size = index * flexible_array_member_base_size; return flexible_array_member_size - index_size; 3) Object size from '&ptr->field': size_t count = (size_t) ptr->count; size_t sizeof_struct = sizeof (struct s); size_t flexible_array_member_base_size = sizeof (*ptr->array); size_t flexible_array_member_size = count * flexible_array_member_base_size; size_t field_offset = offsetof (struct s, field); size_t offset_diff = sizeof_struct - field_offset; return flexible_array_member_size + offset_diff; 4) Object size from '&ptr->field[idx]': size_t count = (size_t) ptr->count; size_t index = (size_t) idx; size_t sizeof_struct = sizeof (struct s); size_t field_base_size = sizeof (*ptr->field); size_t flexible_array_member_base_size = sizeof (*ptr->array); size_t flexible_array_member_size = count * flexible_array_member_base_size; size_t field_offset = offsetof (struct s, field) + index * field_base_size; size_t offset_diff = sizeof_struct - field_offset; return offset_diff + flexible_array_member_size; Signed-off-by: Bill Wendling <[email protected]>
@llvm/pr-subscribers-clang-codegen @llvm/pr-subscribers-clang Author: Bill Wendling (bwendling) ChangesRefactoring of how __builtin_dynamic_object_size() is calculated for flexible array members (in preparation for adding support for the 'counted_by' attribute on pointers in structs). The only functionality change is that we use the already emitted Expr code to build our calculations off of rather than re-emitting the Expr. That allows the 'StructFieldAccess' visitor to sift through all casts and ArraySubscriptExprs to find the first MemberExpr. We build our GEPs and calculate offsets based off of relative distances from that MemberExpr. The testcase passes execution tests. There are four categories to support:
Patch is 127.44 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/122198.diff 3 Files Affected:
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index dcea32969fb990..61450bcb5f9af8 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -29,6 +29,7 @@
#include "clang/AST/Expr.h"
#include "clang/AST/OSLog.h"
#include "clang/AST/OperationKinds.h"
+#include "clang/AST/StmtVisitor.h"
#include "clang/AST/Type.h"
#include "clang/Basic/TargetBuiltins.h"
#include "clang/Basic/TargetInfo.h"
@@ -1060,33 +1061,82 @@ CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
}
-const FieldDecl *CodeGenFunction::FindFlexibleArrayMemberFieldAndOffset(
- ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl,
- uint64_t &Offset) {
+namespace {
+
+/// StructFieldAccess is a simple visitor class to grab the first MemberExpr
+/// from an Expr. It records any ArraySubscriptExpr we meet along the way.
+struct StructFieldAccess
+ : public ConstStmtVisitor<StructFieldAccess, const MemberExpr *> {
+ const ArraySubscriptExpr *ASE = nullptr;
+
+ const MemberExpr *VisitMemberExpr(const MemberExpr *E) { return E; }
+
+ const MemberExpr *VisitArraySubscriptExpr(const ArraySubscriptExpr *E) {
+ ASE = E;
+ return Visit(E->getBase());
+ }
+ const MemberExpr *VisitCastExpr(const CastExpr *E) {
+ return Visit(E->getSubExpr());
+ }
+ const MemberExpr *VisitParenExpr(const ParenExpr *E) {
+ return Visit(E->getSubExpr());
+ }
+ const MemberExpr *VisitUnaryAddrOf(const clang::UnaryOperator *E) {
+ return Visit(E->getSubExpr());
+ }
+ const MemberExpr *VisitUnaryDeref(const clang::UnaryOperator *E) {
+ return Visit(E->getSubExpr());
+ }
+};
+
+} // end anonymous namespace
+
+/// Find a struct's flexible array member. It may be embedded inside multiple
+/// sub-structs, but must still be the last field.
+static const FieldDecl *FindFlexibleArrayMemberField(CodeGenFunction &CGF,
+ ASTContext &Ctx,
+ const RecordDecl *RD) {
const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
- getLangOpts().getStrictFlexArraysLevel();
- uint32_t FieldNo = 0;
+ CGF.getLangOpts().getStrictFlexArraysLevel();
if (RD->isImplicit())
return nullptr;
for (const FieldDecl *FD : RD->fields()) {
- if ((!FAMDecl || FD == FAMDecl) &&
- Decl::isFlexibleArrayMemberLike(
+ if (Decl::isFlexibleArrayMemberLike(
Ctx, FD, FD->getType(), StrictFlexArraysLevel,
- /*IgnoreTemplateOrMacroSubstitution=*/true)) {
- const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
- Offset += Layout.getFieldOffset(FieldNo);
+ /*IgnoreTemplateOrMacroSubstitution=*/true))
return FD;
+
+ if (auto RT = FD->getType()->getAs<RecordType>())
+ if (const FieldDecl *FD =
+ FindFlexibleArrayMemberField(CGF, Ctx, RT->getAsRecordDecl()))
+ return FD;
+ }
+
+ return nullptr;
+}
+
+/// Calculate the offset of a struct field. It may be embedded inside multiple
+/// sub-structs.
+static bool GetFieldOffset(ASTContext &Ctx, const RecordDecl *RD,
+ const FieldDecl *FD, int64_t &Offset) {
+ if (RD->isImplicit())
+ return false;
+
+ const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
+ uint32_t FieldNo = 0;
+
+ for (const FieldDecl *Field : RD->fields()) {
+ if (Field == FD) {
+ Offset += Layout.getFieldOffset(FieldNo);
+ return true;
}
- QualType Ty = FD->getType();
- if (Ty->isRecordType()) {
- if (const FieldDecl *Field = FindFlexibleArrayMemberFieldAndOffset(
- Ctx, Ty->getAsRecordDecl(), FAMDecl, Offset)) {
- const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
+ if (auto RT = Field->getType()->getAs<RecordType>()) {
+ if (GetFieldOffset(Ctx, RT->getAsRecordDecl(), FD, Offset)) {
Offset += Layout.getFieldOffset(FieldNo);
- return Field;
+ return true;
}
}
@@ -1094,204 +1144,248 @@ const FieldDecl *CodeGenFunction::FindFlexibleArrayMemberFieldAndOffset(
++FieldNo;
}
- return nullptr;
+ return false;
}
-static unsigned CountCountedByAttrs(const RecordDecl *RD) {
- unsigned Num = 0;
+llvm::Value *
+CodeGenFunction::emitCountedByMemberSize(const Expr *E, llvm::Value *EmittedE,
+ unsigned Type,
+ llvm::IntegerType *ResType) {
+ ASTContext &Ctx = getContext();
- for (const FieldDecl *FD : RD->fields()) {
- if (FD->getType()->isCountAttributedType())
- return ++Num;
+ // Note: If the whole struct is specificed in the __bdos (i.e. Visitor
+ // returns a DeclRefExpr). The calculation of the whole size of the structure
+ // with a flexible array member can be done in two ways:
+ //
+ // 1) sizeof(struct S) + count * sizeof(typeof(fam))
+ // 2) offsetof(struct S, fam) + count * sizeof(typeof(fam))
+ //
+ // The first will add additional padding after the end of the array
+ // allocation while the second method is more precise, but not quite expected
+ // from programmers. See
+ // https://lore.kernel.org/lkml/ZvV6X5FPBBW7CO1f@archlinux/ for a discussion
+ // of the topic.
+ //
+ // GCC isn't (currently) able to calculate __bdos on a pointer to the whole
+ // structure. Therefore, because of the above issue, we choose to match what
+ // GCC does for consistency's sake.
- QualType Ty = FD->getType();
- if (Ty->isRecordType())
- Num += CountCountedByAttrs(Ty->getAsRecordDecl());
- }
+ StructFieldAccess Visitor;
+ const MemberExpr *ME = Visitor.Visit(E);
+ if (!ME)
+ return nullptr;
- return Num;
-}
+ const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl());
+ if (!FD)
+ return nullptr;
-llvm::Value *
-CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
- llvm::IntegerType *ResType) {
- // The code generated here calculates the size of a struct with a flexible
- // array member that uses the counted_by attribute. There are two instances
- // we handle:
+ const RecordDecl *RD = FD->getDeclContext()->getOuterLexicalRecordContext();
+ const FieldDecl *FlexibleArrayMemberFD = nullptr;
+
+ if (Decl::isFlexibleArrayMemberLike(
+ Ctx, FD, FD->getType(), getLangOpts().getStrictFlexArraysLevel(),
+ /*IgnoreTemplateOrMacroSubstitution=*/true))
+ FlexibleArrayMemberFD = FD;
+ else
+ FlexibleArrayMemberFD = FindFlexibleArrayMemberField(*this, Ctx, RD);
+
+ if (!FlexibleArrayMemberFD ||
+ !FlexibleArrayMemberFD->getType()->isCountAttributedType())
+ return nullptr;
+
+ const FieldDecl *CountFD = FlexibleArrayMemberFD->findCountedByField();
+ if (!CountFD)
+ // Can't find the field referenced by the "counted_by" attribute.
+ return nullptr;
+
+ const Expr *Idx = nullptr;
+ if (Visitor.ASE) {
+ Idx = Visitor.ASE->getIdx();
+
+ if (Idx->HasSideEffects(Ctx))
+ // We can't have side-effects.
+ return getDefaultBuiltinObjectSizeResult(Type, ResType);
+
+ if (const auto *IL = dyn_cast<IntegerLiteral>(Idx)) {
+ int64_t Val = IL->getValue().getSExtValue();
+ if (Val < 0)
+ return getDefaultBuiltinObjectSizeResult(Type, ResType);
+
+ // The index is 0, so we don't need to take it into account.
+ if (Val == 0)
+ Idx = nullptr;
+ }
+ }
+
+ // Calculate the flexible array member's object size using these formulae
+ // (note: if the calculation is negative, we return 0.):
//
- // struct s {
- // unsigned long flags;
- // int count;
- // int array[] __attribute__((counted_by(count)));
- // }
+ // struct p;
+ // struct s {
+ // /* ... */
+ // int count;
+ // struct p *array[] __attribute__((counted_by(count)));
+ // };
//
- // 1) bdos of the flexible array itself:
+ // 1) 'ptr->array':
//
- // __builtin_dynamic_object_size(p->array, 1) ==
- // p->count * sizeof(*p->array)
+ // size_t count = (size_t) ptr->count;
//
- // 2) bdos of a pointer into the flexible array:
+ // size_t flexible_array_member_base_size = sizeof (*ptr->array);
+ // size_t flexible_array_member_size =
+ // count * flexible_array_member_base_size;
//
- // __builtin_dynamic_object_size(&p->array[42], 1) ==
- // (p->count - 42) * sizeof(*p->array)
+ // return flexible_array_member_size;
//
- // 2) bdos of the whole struct, including the flexible array:
+ // 2) '&ptr->array[idx]':
//
- // __builtin_dynamic_object_size(p, 1) ==
- // max(sizeof(struct s),
- // offsetof(struct s, array) + p->count * sizeof(*p->array))
+ // size_t count = (size_t) ptr->count;
+ // size_t index = (size_t) idx;
//
- ASTContext &Ctx = getContext();
- const Expr *Base = E->IgnoreParenImpCasts();
- const Expr *Idx = nullptr;
-
- if (const auto *UO = dyn_cast<UnaryOperator>(Base);
- UO && UO->getOpcode() == UO_AddrOf) {
- Expr *SubExpr = UO->getSubExpr()->IgnoreParenImpCasts();
- if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(SubExpr)) {
- Base = ASE->getBase()->IgnoreParenImpCasts();
- Idx = ASE->getIdx()->IgnoreParenImpCasts();
-
- if (const auto *IL = dyn_cast<IntegerLiteral>(Idx)) {
- int64_t Val = IL->getValue().getSExtValue();
- if (Val < 0)
- return getDefaultBuiltinObjectSizeResult(Type, ResType);
-
- if (Val == 0)
- // The index is 0, so we don't need to take it into account.
- Idx = nullptr;
- }
- } else {
- // Potential pointer to another element in the struct.
- Base = SubExpr;
- }
- }
+ // size_t flexible_array_member_base_size = sizeof (*ptr->array);
+ // size_t flexible_array_member_size =
+ // count * flexible_array_member_base_size;
+ //
+ // size_t index_size = index * flexible_array_member_base_size;
+ //
+ // return flexible_array_member_size - index_size;
+ //
+ // 3) '&ptr->field':
+ //
+ // size_t count = (size_t) ptr->count;
+ // size_t sizeof_struct = sizeof (struct s);
+ //
+ // size_t flexible_array_member_base_size = sizeof (*ptr->array);
+ // size_t flexible_array_member_size =
+ // count * flexible_array_member_base_size;
+ //
+ // size_t field_offset = offsetof (struct s, field);
+ // size_t offset_diff = struct_size - field_offset;
+ //
+ // return flexible_array_member_size + offset_diff;
+ //
+ // 4) '&ptr->field_array[idx]':
+ //
+ // size_t count = (size_t) ptr->count;
+ // size_t index = (size_t) idx;
+ // size_t sizeof_struct = sizeof (struct s);
+ //
+ // size_t flexible_array_member_base_size = sizeof (*ptr->array);
+ // size_t flexible_array_member_size =
+ // count * flexible_array_member_base_size;
+ //
+ // size_t field_base_size = sizeof (*ptr->field_array);
+ // size_t field_offset = offsetof (struct s, field)
+ // field_offset += index * field_base_size;
+ //
+ // size_t offset_diff = sizeof_struct - field_offset;
+ //
+ // return offset_diff + flexible_array_member_size;
- // Get the flexible array member Decl.
- const RecordDecl *OuterRD = nullptr;
- const FieldDecl *FAMDecl = nullptr;
- if (const auto *ME = dyn_cast<MemberExpr>(Base)) {
- // Check if \p Base is referencing the FAM itself.
- const ValueDecl *VD = ME->getMemberDecl();
- OuterRD = VD->getDeclContext()->getOuterLexicalRecordContext();
- FAMDecl = dyn_cast<FieldDecl>(VD);
- if (!FAMDecl)
- return nullptr;
- } else if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) {
- // Check if we're pointing to the whole struct.
- QualType Ty = DRE->getDecl()->getType();
- if (Ty->isPointerType())
- Ty = Ty->getPointeeType();
- OuterRD = Ty->getAsRecordDecl();
-
- // If we have a situation like this:
- //
- // struct union_of_fams {
- // int flags;
- // union {
- // signed char normal_field;
- // struct {
- // int count1;
- // int arr1[] __counted_by(count1);
- // };
- // struct {
- // signed char count2;
- // int arr2[] __counted_by(count2);
- // };
- // };
- // };
- //
- // We don't know which 'count' to use in this scenario:
- //
- // size_t get_size(struct union_of_fams *p) {
- // return __builtin_dynamic_object_size(p, 1);
- // }
- //
- // Instead of calculating a wrong number, we give up.
- if (OuterRD && CountCountedByAttrs(OuterRD) > 1)
- return nullptr;
- }
+ QualType CountTy = CountFD->getType();
+ bool IsSigned = CountTy->isSignedIntegerType();
- if (!OuterRD)
- return nullptr;
+ QualType FlexibleArrayMemberTy = FlexibleArrayMemberFD->getType();
+ QualType FieldTy = FD->getType();
- // We call FindFlexibleArrayMemberAndOffset even if FAMDecl is non-null to
- // get its offset.
- uint64_t Offset = 0;
- FAMDecl =
- FindFlexibleArrayMemberFieldAndOffset(Ctx, OuterRD, FAMDecl, Offset);
- Offset = Ctx.toCharUnitsFromBits(Offset).getQuantity();
+ // Explicit cast because otherwise the CharWidth will promote an i32's into
+ // u64's leading to overflows..
+ int64_t CharWidth = static_cast<int64_t>(CGM.getContext().getCharWidth());
- if (!FAMDecl || !FAMDecl->getType()->isCountAttributedType())
- // No flexible array member found or it doesn't have the "counted_by"
- // attribute.
+ // size_t count = (size_t) ptr->count;
+ Value *Count = EmitLoadOfCountedByField(ME, FlexibleArrayMemberFD, CountFD);
+ if (!Count)
return nullptr;
+ Count = Builder.CreateIntCast(Count, ResType, IsSigned, "count");
- const FieldDecl *CountedByFD = FAMDecl->findCountedByField();
- if (!CountedByFD)
- // Can't find the field referenced by the "counted_by" attribute.
- return nullptr;
-
- if (isa<DeclRefExpr>(Base))
- // The whole struct is specificed in the __bdos. The calculation of the
- // whole size of the structure can be done in two ways:
- //
- // 1) sizeof(struct S) + count * sizeof(typeof(fam))
- // 2) offsetof(struct S, fam) + count * sizeof(typeof(fam))
- //
- // The first will add additional padding after the end of the array,
- // allocation while the second method is more precise, but not quite
- // expected from programmers. See
- // https://lore.kernel.org/lkml/ZvV6X5FPBBW7CO1f@archlinux/ for a
- // discussion of the topic.
- //
- // GCC isn't (currently) able to calculate __bdos on a pointer to the whole
- // structure. Therefore, because of the above issue, we'll choose to match
- // what GCC does for consistency's sake.
- return nullptr;
+ // size_t index = (size_t) ptr->index;
+ Value *Index = nullptr;
+ if (Idx) {
+ bool IdxSigned = Idx->getType()->isSignedIntegerType();
+ Index = EmitAnyExprToTemp(Idx).getScalarVal();
+ Index = Builder.CreateIntCast(Index, ResType, IdxSigned, "index");
+ }
+
+ // size_t sizeof_struct = sizeof (struct s);
+ llvm::StructType *StructTy = getTypes().getCGRecordLayout(RD).getLLVMType();
+ const llvm::DataLayout &Layout = Builder.GetInsertBlock()->getDataLayout();
+ TypeSize Size = Layout.getTypeSizeInBits(StructTy);
+ Value *SizeofStruct =
+ llvm::ConstantInt::get(ResType, Size.getKnownMinValue() / CharWidth);
+
+ // size_t field_offset = offsetof (struct s, field);
+ Value *FieldOffset = nullptr;
+ if (FlexibleArrayMemberFD != FD) {
+ int64_t Offset = 0;
+ GetFieldOffset(Ctx, RD, FD, Offset);
+ FieldOffset = llvm::ConstantInt::get(ResType, Offset / CharWidth, IsSigned);
+ }
+
+ // size_t flexible_array_member_base_size = sizeof (*ptr->array);
+ const ArrayType *ArrayTy = Ctx.getAsArrayType(FlexibleArrayMemberTy);
+ CharUnits BaseSize = Ctx.getTypeSizeInChars(ArrayTy->getElementType());
+ auto *FlexibleArrayMemberBaseSize =
+ llvm::ConstantInt::get(ResType, BaseSize.getQuantity(), IsSigned);
+
+ // size_t flexible_array_member_size =
+ // count * flexible_array_member_base_size;
+ Value *FlexibleArrayMemberSize =
+ Builder.CreateMul(Count, FlexibleArrayMemberBaseSize,
+ "flexible_array_member_size", !IsSigned, IsSigned);
+
+ auto CheckForNegative = [&](Value *Res) {
+ Value *Cmp = Builder.CreateIsNotNeg(Res);
+ if (Index)
+ Cmp = Builder.CreateAnd(Builder.CreateIsNotNeg(Index), Cmp);
+ return Builder.CreateSelect(Cmp, Res,
+ ConstantInt::get(ResType, 0, IsSigned));
+ };
- // Build a load of the counted_by field.
- bool IsSigned = CountedByFD->getType()->isSignedIntegerType();
- Value *CountedByInst = EmitLoadOfCountedByField(Base, FAMDecl, CountedByFD);
- if (!CountedByInst)
- return getDefaultBuiltinObjectSizeResult(Type, ResType);
+ if (FlexibleArrayMemberFD == FD) {
+ if (Idx) { // Option (2) '&ptr->array[idx]'
+ // size_t index_size = index * flexible_array_member_base_size;
+ Value *IndexSize = Builder.CreateMul(FlexibleArrayMemberBaseSize, Index,
+ "index_size", !IsSigned, IsSigned);
- CountedByInst = Builder.CreateIntCast(CountedByInst, ResType, IsSigned);
+ // return flexible_array_member_size - index_size;
+ return CheckForNegative(Builder.CreateSub(
+ FlexibleArrayMemberSize, IndexSize, "result", !IsSigned, IsSigned));
+ }
+ } else {
+ if (Idx) { // Option (4) '&ptr->field_array[idx]'
+ // size_t field_base_size = sizeof (*ptr->field_array);
+ const ArrayType *ArrayTy = Ctx.getAsArrayType(FieldTy);
+ CharUnits BaseSize = Ctx.getTypeSizeInChars(ArrayTy->getElementType());
+ auto *FieldBaseSize =
+ llvm::ConstantInt::get(ResType, BaseSize.getQuantity(), IsSigned);
+
+ // field_offset += index * field_base_size;
+ Value *Mul = Builder.CreateMul(Index, FieldBaseSize, "field_offset",
+ !IsSigned, IsSigned);
+ FieldOffset = Builder.CreateAdd(FieldOffset, Mul);
+
+ // size_t offset_diff = flexible_array_member_offset - field_offset;
+ Value *OffsetDiff = Builder.CreateSub(SizeofStruct, FieldOffset,
+ "offset_diff", !IsSigned, IsSigned);
+
+ // return offset_diff + flexible_array_member_size;
+ return CheckForNegative(
+ Builder.CreateAdd(FlexibleArrayMemberSize, OffsetDiff, "result"));
+ } else { // Option (3) '&ptr->field'
+ // size_t offset_diff = flexible_array_member_offset - field_offset;
+ Value *OffsetDiff = Builder.CreateSub(SizeofStruct, FieldOffset,
+ "offset_diff", !IsSigned, IsSigned);
+
+ // return flexible_array_member_size + offset_diff;
+ return CheckForNegative(
+ Builder.CreateAdd(FlexibleArrayMemberSize, OffsetDiff, "result"));
+ }
+ }
- // Build a load of the index and subtract it from the count.
- Value *IdxInst = nullptr;
- if (Idx) {
- if (Idx->HasSideEffects(getContext()))
- // We can't have side-effects.
- return getDefaultBuiltinObjectSizeResult(Type, ResType);
+ // Option (1) 'ptr->array'
- bool IdxSigned = Idx->getType()->isSignedIntegerType();
- IdxInst = EmitAnyExprToTemp(Idx).getScalarVal();
- IdxInst = Builder.CreateIntCast(IdxInst, ResType, IdxSigned);
-
- // We go ahead with the calculation here. If the index turns out to be
- // negative, we'll catch it at the end.
- CountedByInst =
- Builder.CreateSub(CountedByInst, IdxInst, "", !IsSigned, IsSigned);
- }
-
- // Calculate how large the flexible array member is in bytes.
- const ArrayType *ArrayTy = Ctx.getAsArrayType(FAMDecl->getType());
- CharUnits Size = Ctx.getTypeSizeInChars(ArrayTy->getElementType());
- llvm::Constant *ElemSize =
- llvm::ConstantInt::get(ResType, Size.getQuantity(), IsSigned);
- Value *Res =
- Builder.CreateMul(CountedByInst, ElemSize, "", !IsSigned, IsSigned);
- Res = Builder.CreateIntCast(Res, ResType, IsSigned);
-
- // A negative \p IdxInst or \p CountedByInst me...
[truncated]
|
Howdy! This is essentially a complete rewrite of 00b6d03. The original was overly complex and fragile. The main difference with this PR is basing the calculations off of the already emitted The testcase changes are pretty large, but I visually reviewed the changes and ran the resulting code and it produced the expected results. (It even uncovered a bug! :-D) Please take a look and let me know any issues you have. It's mostly an NFC patch, but it's so large and invasive that I can't really mark it as such. |
…mitting scalar expressions and getting the DataLayout.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for addressing comments! LGTM. You may still need to wait on Clang code owners review though.
// GCC isn't (currently) able to calculate __bdos on a pointer to the whole | ||
// structure. Therefore, because of the above issue, we choose to match what | ||
// GCC does for consistency's sake. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This seems to imply a bug in gcc. What if that changes in the future?
Is there a bug on file in their issue tracker you could link to from here? If not, does it make sense to file one, then link to it from here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't believe there's a bug report related to this issue. IIRC, GCC isn't able to do this because they're not able to distinguish between a pointer to a single object or an array of objects (waves hands around). So asking the __bdos
of a pointer is more ambiguous for them. If what I said is true, then it's a systemic issue in GCC and not easily fixable for a single feature. So I don't know if filing a bug would gain much traction. We're also in close contact with the GCC maintainer of this feature so we're likely to catch any changes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Speaking as a distro downstream, we've generally been pretty happy with the increased rigor of clang's __bdos
behaviour, after cleaning up the initial fallout (which has been fixed upstream in most cases).
Losing this would be a shame.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks! We removed the original feature here because it was asserting in the Linux kernel due to calculations being off. If this is a regression for you, please file a bug and we'll discuss the best way to move forward with support for it. (Note that this has already been removed in the main trunk, so it wasn't omitted for this patch.)
clang/lib/CodeGen/CGBuiltin.cpp
Outdated
|
||
auto CheckForNegative = [&](Value *Res) { | ||
Value *Cmp = Builder.CreateIsNotNeg(Res); | ||
if (Index) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems that this is true when Idx
is truthy. I wonder if we could reuse the same expression in both places to get CSE?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure how CSE works inside functors, but I can use Idx
here and give it a fighting chance. :-)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
hopefully inlined, maybe not given the number of callers. But I have a suggestion for just inlining it if each branch produces a single Value*
scoped outside of themselves, so that there's only one call to CheckForNegative
, at which point, you might as well just manually inline the lambda.
…or. Make non-public ivar non-public.
} | ||
|
||
/// Calculate the offset of a struct field. It may be embedded inside multiple | ||
/// sub-structs. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps worth noting in the comment that you're searching for the offset of FD
in RD
(IIUC)?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
?? That's what the comment says...
Num += CountCountedByAttrs(Ty->getAsRecordDecl()); | ||
} | ||
if (GetFieldOffset(Ctx, RD, FD, Offset)) | ||
return std::optional<int64_t>(Offset); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you can do:
return std::optional<int64_t>(Offset); | |
return {Offset}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's keep the original way. It's clearer, at least to me. :-)
clang/lib/CodeGen/CGBuiltin.cpp
Outdated
// size_t field_offset = offsetof (struct s, field); | ||
Value *FieldOffset = nullptr; | ||
if (FlexibleArrayMemberFD != FD) { | ||
std::optional<int64_t> Offset = GetFieldOffset(Ctx, RD, FD); | ||
if (!Offset) | ||
return nullptr; | ||
FieldOffset = | ||
llvm::ConstantInt::get(ResType, *Offset / CharWidth, IsSigned); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Cool, now FieldOffset
can get downscoped into the else
branch of the if (FlexibleArrayMemberFD == FD)
comparison on L1361. Seeing the conditional if (FlexibleArrayMemberFD != FD)
on L1456 is a tell.
Same for CharWidth
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I want this to happen before any code is emitted, which happens before the if statement. This is fine here. :-)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As for the other constants, I'm trying to follow the pseudo-code above. I don't think it adds to the complexity (or generates code) to create the constants first.
|
||
const MemberExpr *VisitArraySubscriptExpr(const ArraySubscriptExpr *E) { | ||
AddrOfSeen = false; // '&ptr->array[idx]' is okay. | ||
ASE = E; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What happens if you have &ptr->array2d[idx1][idx2]
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like GCC returns -1 in this situation. I'll have us do the same.
return Visit(E->getBase()); | ||
} | ||
const MemberExpr *VisitCastExpr(const CastExpr *E) { | ||
return Visit(E->getSubExpr()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Like I've mentioned before, I generally prefer explicitly classifying casts by CastKind, instead of ignoring the casts, then trying to infer what casts happened later. In particular, you do not want to look through lvalue-to-rvalue conversions in this context.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay, so what do I do with an lvalue-to-rvalue cast when I encounter it?
As I explained in the comment, I'm using the already emitted Expr
to calculate the offset to the 'count' object. To get that, all I need is the MemberExpr
of the flexible array member.
Edit: Specifically, the only part of the original Expr that might be emitted is any index in the ArraySubscriptExpr. So it's not like the visitor in CGExpr.cpp where we use the lvalue-to-rvalue cast during the emission.
…ere and default to llvm.objectsize().
Are there anymore comments? |
Friendly Ping. |
clang/lib/CodeGen/CGBuiltin.cpp
Outdated
// size_t flexible_array_member_size = | ||
// count * flexible_array_member_base_size; | ||
// | ||
// if (flexible_array_member_size < 0) | ||
// return 0; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to concern ourselves with overflow; what if count
is SIZE_MAX
? Note: size_t
is unsigned, so the comparison is a tautology. Are the comments incorrect in that flexible_array_member_base_size
is interpreted as a ssize_t
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The pseudo-code is only an approximation of what we're calculating. The result type of __builtin{_dynamic}_object_size
is size_t
. I try my best to catch any negative results in the code.
At the moment, I can't do anything here because GitHub fucked up a merge and now it seems like I can't revert it back to something sensible. >:-(
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The result type of __builtin{_dynamic}_object_size is size_t.
Right, but I'm more curious about the type of the count
field. Can the count
be negative? FWICT, this isn't checked (we check the result AFTER multiplying it by the flexible_array_member_base_size
, but could that calculation have overflowed?); the tests indicate it's just zero extended, but not checked until after the multiplication. (Perhaps we don't need to solve all of these in this PR, but perhaps we should file issues/TODOs with a list of issues to fix). I'm worried that at runtime the count
is somehow set to -1
, and the array access is allowed because it's interpreted as a large positive number, which then overflows when multiplied by flexible_array_member_base_size
and passes the "is negative" check, allowing access when access should be denied.
clang/lib/CodeGen/CGBuiltin.cpp
Outdated
// size_t flexible_array_member_size = | ||
// count * flexible_array_member_base_size; | ||
// | ||
// if (flexible_array_member_size < 0) | ||
// return 0; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The result type of __builtin{_dynamic}_object_size is size_t.
Right, but I'm more curious about the type of the count
field. Can the count
be negative? FWICT, this isn't checked (we check the result AFTER multiplying it by the flexible_array_member_base_size
, but could that calculation have overflowed?); the tests indicate it's just zero extended, but not checked until after the multiplication. (Perhaps we don't need to solve all of these in this PR, but perhaps we should file issues/TODOs with a list of issues to fix). I'm worried that at runtime the count
is somehow set to -1
, and the array access is allowed because it's interpreted as a large positive number, which then overflows when multiplied by flexible_array_member_base_size
and passes the "is negative" check, allowing access when access should be denied.
@nickdesaulniers I understand what you're saying, and it was addressed in the original patch. I do my best to carry the signed-ness of the variables through to the end so that the checks work out (e.g. the checks for negatives aren't removed after optimizations, which they would be if the results were unsigned). |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, I appreciate the comments about common usage patterns; they help make it easier to understand (subjectively) the 4 cases you handle in code. I still think we may need to triple check how this extension behaves with count fields that do not get extended AND are negative at runtime (if we don't already have tests for that), but perhaps is orthogonal to this initial refactoring. You may want to get another +1 before landing (perhaps @efriedma-quic or @rjmccall ).
I'll look into test cases with negative values. Thanks! |
Friendly ping for any further comments. |
This breaks
I can reduce up something shortly. |
I'll look into it. |
Refactoring of how __builtin_dynamic_object_size() is calculated for flexible array members (in preparation for adding support for the 'counted_by' attribute on pointers in structs).
The only functionality change is that we use the already emitted Expr code to build our calculations off of rather than re-emitting the Expr. That allows the 'StructFieldAccess' visitor to sift through all casts and ArraySubscriptExprs to find the first MemberExpr. We build our GEPs and calculate offsets based off of relative distances from that MemberExpr.
The testcase passes execution tests.
Calculate the flexible array member's object size using these formulae
(note: if the calculation is negative, we return 0.):
'ptr->array':
count = ptr->count;
flexible_array_member_base_size = sizeof (*ptr->array);
flexible_array_member_size =
count * flexible_array_member_base_size;
if (flexible_array_member_size < 0)
return 0;
return flexible_array_member_size;
'&ptr->array[idx]':
count = ptr->count;
index = idx;
flexible_array_member_base_size = sizeof (*ptr->array);
flexible_array_member_size =
count * flexible_array_member_base_size;
index_size = index * flexible_array_member_base_size;
if (flexible_array_member_size < 0 || index < 0)
return 0;
return flexible_array_member_size - index_size;
'&ptr->field':
count = ptr->count;
sizeof_struct = sizeof (struct s);
flexible_array_member_base_size = sizeof (*ptr->array);
flexible_array_member_size =
count * flexible_array_member_base_size;
field_offset = offsetof (struct s, field);
offset_diff = sizeof_struct - field_offset;
if (flexible_array_member_size < 0)
return 0;
return offset_diff + flexible_array_member_size;
'&ptr->field_array[idx]':
count = ptr->count;
index = idx;
sizeof_struct = sizeof (struct s);
flexible_array_member_base_size = sizeof (*ptr->array);
flexible_array_member_size =
count * flexible_array_member_base_size;
field_base_size = sizeof (*ptr->field_array);
field_offset = offsetof (struct s, field)
field_offset += index * field_base_size;
offset_diff = sizeof_struct - field_offset;
if (flexible_array_member_size < 0 || index < 0)
return 0;
return offset_diff + flexible_array_member_size;