-
Notifications
You must be signed in to change notification settings - Fork 13.6k
Don't rely on undefined behavior to store how a User
object's allocation is laid out
#105714
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-llvm-ir Author: Daniel Paoliello (dpaoliello) ChangesIn We discovered this issue when trying to build LLVM using MSVC's The fix for this is to move the fields that are set during Full diff: https://github.com/llvm/llvm-project/pull/105714.diff 7 Files Affected:
diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
index afae564bf022d2..58717f50408201 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -1256,7 +1256,7 @@ class CallBase : public Instruction {
using Instruction::Instruction;
- bool hasDescriptor() const { return Value::HasDescriptor; }
+ bool hasDescriptor() const { return getHeader().HasDescriptor; }
unsigned getNumSubclassExtraOperands() const {
switch (getOpcode()) {
diff --git a/llvm/include/llvm/IR/OperandTraits.h b/llvm/include/llvm/IR/OperandTraits.h
index ffece6324aab02..306f5f97e9f2bf 100644
--- a/llvm/include/llvm/IR/OperandTraits.h
+++ b/llvm/include/llvm/IR/OperandTraits.h
@@ -32,10 +32,10 @@ struct FixedNumOperandTraits {
static_assert(
!std::is_polymorphic<SubClass>::value,
"adding virtual methods to subclasses of User breaks use lists");
- return reinterpret_cast<Use*>(U) - ARITY;
+ return static_cast<User *>(U)->getIntrusiveOperands();
}
static Use *op_end(SubClass* U) {
- return reinterpret_cast<Use*>(U);
+ return static_cast<User *>(U)->getIntrusiveOperands() + ARITY;
}
static unsigned operands(const User*) {
return ARITY;
@@ -70,10 +70,11 @@ struct VariadicOperandTraits {
static_assert(
!std::is_polymorphic<SubClass>::value,
"adding virtual methods to subclasses of User breaks use lists");
- return reinterpret_cast<Use*>(U) - static_cast<User*>(U)->getNumOperands();
+ return static_cast<User *>(U)->getIntrusiveOperands();
}
static Use *op_end(SubClass* U) {
- return reinterpret_cast<Use*>(U);
+ return static_cast<User *>(U)->getIntrusiveOperands() +
+ static_cast<User *>(U)->getNumOperands();
}
static unsigned operands(const User *U) {
return U->getNumOperands();
diff --git a/llvm/include/llvm/IR/User.h b/llvm/include/llvm/IR/User.h
index a9cf60151e5dc6..4436454499f9f2 100644
--- a/llvm/include/llvm/IR/User.h
+++ b/llvm/include/llvm/IR/User.h
@@ -44,11 +44,50 @@ struct OperandTraits;
class User : public Value {
template <unsigned>
friend struct HungoffOperandTraits;
+ template <typename SubClass, unsigned MINARITY>
+ friend struct VariadicOperandTraits;
+ template <typename SubClass, unsigned ARITY>
+ friend struct FixedNumOperandTraits;
LLVM_ATTRIBUTE_ALWAYS_INLINE static void *
allocateFixedOperandUser(size_t, unsigned, unsigned);
protected:
+ union Header {
+ // A `User` object, its `Header`, operands and descriptor are all allocated
+ // together (see `operator new`). Since the `User` object is after the other
+ // objects, we use the padding here to force `Header` to be the same size as
+ // a pointer so that the `User` object is aligned correctly.
+ std::size_t _padding;
+ struct Contents {
+ /// The number of operands in the subclass.
+ ///
+ /// This member is defined by this class, but not used for anything.
+ /// Subclasses can use it to store their number of operands, if they have
+ /// any.
+ ///
+ /// This is stored here to save space in User on 64-bit hosts. Since most
+ /// instances of Value have operands, 32-bit hosts aren't significantly
+ /// affected.
+ ///
+ /// Note, this should *NOT* be used directly by any class other than User.
+ /// User uses this value to find the Use list.
+ enum : unsigned { NumUserOperandsBits = 27 };
+ unsigned NumUserOperands : NumUserOperandsBits;
+ unsigned HasHungOffUses : 1;
+ unsigned HasDescriptor : 1;
+ } contents;
+ };
+ static_assert(sizeof(Header) == sizeof(void *),
+ "Header should be the same size as a pointer");
+
+ Header::Contents &getHeader() {
+ return (reinterpret_cast<Header *>(this) - 1)->contents;
+ }
+
+ const Header::Contents &getHeader() const {
+ return (reinterpret_cast<const Header *>(this) - 1)->contents;
+ }
/// Allocate a User with an operand pointer co-allocated.
///
/// This is used for subclasses which need to allocate a variable number
@@ -72,11 +111,12 @@ class User : public Value {
User(Type *ty, unsigned vty, Use *, unsigned NumOps)
: Value(ty, vty) {
- assert(NumOps < (1u << NumUserOperandsBits) && "Too many operands");
- NumUserOperands = NumOps;
+ assert(NumOps < (1u << Header::Contents::NumUserOperandsBits) &&
+ "Too many operands");
+ getHeader().NumUserOperands = NumOps;
// If we have hung off uses, then the operand list should initially be
// null.
- assert((!HasHungOffUses || !getOperandList()) &&
+ assert((!getHeader().HasHungOffUses || !getOperandList()) &&
"Error in initializing hung off uses for User");
}
@@ -139,40 +179,48 @@ class User : public Value {
private:
const Use *getHungOffOperands() const {
- return *(reinterpret_cast<const Use *const *>(this) - 1);
+ return *(reinterpret_cast<const Use *const *>(&getHeader()) - 1);
}
- Use *&getHungOffOperands() { return *(reinterpret_cast<Use **>(this) - 1); }
+ Use *&getHungOffOperands() {
+ return *(reinterpret_cast<Use **>(&getHeader()) - 1);
+ }
const Use *getIntrusiveOperands() const {
- return reinterpret_cast<const Use *>(this) - NumUserOperands;
+ auto &operandInfo = getHeader();
+ return reinterpret_cast<const Use *>(&operandInfo) -
+ operandInfo.NumUserOperands;
}
Use *getIntrusiveOperands() {
- return reinterpret_cast<Use *>(this) - NumUserOperands;
+ auto &operandInfo = getHeader();
+ return reinterpret_cast<Use *>(&operandInfo) - operandInfo.NumUserOperands;
}
void setOperandList(Use *NewList) {
- assert(HasHungOffUses &&
+ assert(getHeader().HasHungOffUses &&
"Setting operand list only required for hung off uses");
getHungOffOperands() = NewList;
}
public:
const Use *getOperandList() const {
- return HasHungOffUses ? getHungOffOperands() : getIntrusiveOperands();
+ return getHeader().HasHungOffUses ? getHungOffOperands()
+ : getIntrusiveOperands();
}
Use *getOperandList() {
return const_cast<Use *>(static_cast<const User *>(this)->getOperandList());
}
+ unsigned getNumOperands() const { return getHeader().NumUserOperands; }
+
Value *getOperand(unsigned i) const {
- assert(i < NumUserOperands && "getOperand() out of range!");
+ assert(i < getNumOperands() && "getOperand() out of range!");
return getOperandList()[i];
}
void setOperand(unsigned i, Value *Val) {
- assert(i < NumUserOperands && "setOperand() out of range!");
+ assert(i < getNumOperands() && "setOperand() out of range!");
assert((!isa<Constant>((const Value*)this) ||
isa<GlobalValue>((const Value*)this)) &&
"Cannot mutate a constant with setOperand!");
@@ -180,16 +228,14 @@ class User : public Value {
}
const Use &getOperandUse(unsigned i) const {
- assert(i < NumUserOperands && "getOperandUse() out of range!");
+ assert(i < getNumOperands() && "getOperandUse() out of range!");
return getOperandList()[i];
}
Use &getOperandUse(unsigned i) {
- assert(i < NumUserOperands && "getOperandUse() out of range!");
+ assert(i < getNumOperands() && "getOperandUse() out of range!");
return getOperandList()[i];
}
- unsigned getNumOperands() const { return NumUserOperands; }
-
/// Returns the descriptor co-allocated with this User instance.
ArrayRef<const uint8_t> getDescriptor() const;
@@ -206,16 +252,18 @@ class User : public Value {
/// 1 operand before delete.
void setGlobalVariableNumOperands(unsigned NumOps) {
assert(NumOps <= 1 && "GlobalVariable can only have 0 or 1 operands");
- NumUserOperands = NumOps;
+ getHeader().NumUserOperands = NumOps;
}
/// Subclasses with hung off uses need to manage the operand count
/// themselves. In these instances, the operand count isn't used to find the
/// OperandList, so there's no issue in having the operand count change.
void setNumHungOffUseOperands(unsigned NumOps) {
- assert(HasHungOffUses && "Must have hung off uses to use this method");
- assert(NumOps < (1u << NumUserOperandsBits) && "Too many operands");
- NumUserOperands = NumOps;
+ assert(getHeader().HasHungOffUses &&
+ "Must have hung off uses to use this method");
+ assert(NumOps < (1u << Header::Contents::NumUserOperandsBits) &&
+ "Too many operands");
+ getHeader().NumUserOperands = NumOps;
}
/// A droppable user is a user for which uses can be dropped without affecting
@@ -233,11 +281,9 @@ class User : public Value {
op_iterator op_begin() { return getOperandList(); }
const_op_iterator op_begin() const { return getOperandList(); }
- op_iterator op_end() {
- return getOperandList() + NumUserOperands;
- }
+ op_iterator op_end() { return getOperandList() + getNumOperands(); }
const_op_iterator op_end() const {
- return getOperandList() + NumUserOperands;
+ return getOperandList() + getNumOperands();
}
op_range operands() {
return op_range(op_begin(), op_end());
diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h
index 945081b77e9536..43eca2f428d8ad 100644
--- a/llvm/include/llvm/IR/Value.h
+++ b/llvm/include/llvm/IR/Value.h
@@ -92,27 +92,10 @@ class Value {
unsigned short SubclassData;
protected:
- /// The number of operands in the subclass.
- ///
- /// This member is defined by this class, but not used for anything.
- /// Subclasses can use it to store their number of operands, if they have
- /// any.
- ///
- /// This is stored here to save space in User on 64-bit hosts. Since most
- /// instances of Value have operands, 32-bit hosts aren't significantly
- /// affected.
- ///
- /// Note, this should *NOT* be used directly by any class other than User.
- /// User uses this value to find the Use list.
- enum : unsigned { NumUserOperandsBits = 27 };
- unsigned NumUserOperands : NumUserOperandsBits;
-
// Use the same type as the bitfield above so that MSVC will pack them.
unsigned IsUsedByMD : 1;
unsigned HasName : 1;
unsigned HasMetadata : 1; // Has metadata attached to this?
- unsigned HasHungOffUses : 1;
- unsigned HasDescriptor : 1;
private:
Type *VTy;
diff --git a/llvm/lib/IR/User.cpp b/llvm/lib/IR/User.cpp
index 637af7aaa24530..959447f95066a2 100644
--- a/llvm/lib/IR/User.cpp
+++ b/llvm/lib/IR/User.cpp
@@ -48,7 +48,7 @@ bool User::replaceUsesOfWith(Value *From, Value *To) {
//===----------------------------------------------------------------------===//
void User::allocHungoffUses(unsigned N, bool IsPhi) {
- assert(HasHungOffUses && "alloc must have hung off uses");
+ assert(getHeader().HasHungOffUses && "alloc must have hung off uses");
static_assert(alignof(Use) >= alignof(BasicBlock *),
"Alignment is insufficient for 'hung-off-uses' pieces");
@@ -65,7 +65,7 @@ void User::allocHungoffUses(unsigned N, bool IsPhi) {
}
void User::growHungoffUses(unsigned NewNumUses, bool IsPhi) {
- assert(HasHungOffUses && "realloc must have hung off uses");
+ assert(getHeader().HasHungOffUses && "realloc must have hung off uses");
unsigned OldNumUses = getNumOperands();
@@ -102,8 +102,8 @@ ArrayRef<const uint8_t> User::getDescriptor() const {
}
MutableArrayRef<uint8_t> User::getDescriptor() {
- assert(HasDescriptor && "Don't call otherwise!");
- assert(!HasHungOffUses && "Invariant!");
+ assert(getHeader().HasDescriptor && "Don't call otherwise!");
+ assert(!getHeader().HasHungOffUses && "Invariant!");
auto *DI = reinterpret_cast<DescriptorInfo *>(getIntrusiveOperands()) - 1;
assert(DI->SizeInBytes != 0 && "Should not have had a descriptor otherwise!");
@@ -122,7 +122,8 @@ bool User::isDroppable() const {
void *User::allocateFixedOperandUser(size_t Size, unsigned Us,
unsigned DescBytes) {
- assert(Us < (1u << NumUserOperandsBits) && "Too many operands");
+ assert(Us < (1u << Header::Contents::NumUserOperandsBits) &&
+ "Too many operands");
static_assert(sizeof(DescriptorInfo) % sizeof(void *) == 0, "Required below");
@@ -131,14 +132,20 @@ void *User::allocateFixedOperandUser(size_t Size, unsigned Us,
assert(DescBytesToAllocate % sizeof(void *) == 0 &&
"We need this to satisfy alignment constraints for Uses");
- uint8_t *Storage = static_cast<uint8_t *>(
- ::operator new(Size + sizeof(Use) * Us + DescBytesToAllocate));
+ uint8_t *Storage = static_cast<uint8_t *>(::operator new(
+ Size + sizeof(Header) + sizeof(Use) * Us + DescBytesToAllocate));
Use *Start = reinterpret_cast<Use *>(Storage + DescBytesToAllocate);
Use *End = Start + Us;
- User *Obj = reinterpret_cast<User*>(End);
- Obj->NumUserOperands = Us;
- Obj->HasHungOffUses = false;
- Obj->HasDescriptor = DescBytes != 0;
+ Header *OI = reinterpret_cast<Header *>(End);
+ User *Obj = reinterpret_cast<User *>(OI + 1);
+ Obj->getHeader().NumUserOperands = Us;
+ Obj->getHeader().HasHungOffUses = false;
+ Obj->getHeader().HasDescriptor = DescBytes != 0;
+ assert(&(OI->contents) == &Obj->getHeader() &&
+ "getHeader() is returning the wrong location");
+ assert(Start == Obj->getIntrusiveOperands() &&
+ "getIntrusiveOperands() is returning the wrong location");
+
for (; Start != End; Start++)
new (Start) Use(Obj);
@@ -160,12 +167,17 @@ void *User::operator new(size_t Size, unsigned Us, unsigned DescBytes) {
void *User::operator new(size_t Size) {
// Allocate space for a single Use*
- void *Storage = ::operator new(Size + sizeof(Use *));
+ void *Storage = ::operator new(Size + sizeof(Header) + sizeof(Use *));
Use **HungOffOperandList = static_cast<Use **>(Storage);
- User *Obj = reinterpret_cast<User *>(HungOffOperandList + 1);
- Obj->NumUserOperands = 0;
- Obj->HasHungOffUses = true;
- Obj->HasDescriptor = false;
+ Header *OI = reinterpret_cast<Header *>(HungOffOperandList + 1);
+ User *Obj = reinterpret_cast<User *>(OI + 1);
+ Obj->getHeader().NumUserOperands = 0;
+ Obj->getHeader().HasHungOffUses = true;
+ Obj->getHeader().HasDescriptor = false;
+ assert(&(OI->contents) == &Obj->getHeader() &&
+ "getHeader() is returning the wrong location");
+ assert(HungOffOperandList == &Obj->getHungOffOperands() &&
+ "getHungOffOperands() is returning the wrong location");
*HungOffOperandList = nullptr;
return Obj;
}
@@ -180,24 +192,24 @@ LLVM_NO_SANITIZE_MEMORY_ATTRIBUTE void User::operator delete(void *Usr) {
// Hung off uses use a single Use* before the User, while other subclasses
// use a Use[] allocated prior to the user.
User *Obj = static_cast<User *>(Usr);
- if (Obj->HasHungOffUses) {
- assert(!Obj->HasDescriptor && "not supported!");
+ if (Obj->getHeader().HasHungOffUses) {
+ assert(!Obj->getHeader().HasDescriptor && "not supported!");
- Use **HungOffOperandList = static_cast<Use **>(Usr) - 1;
+ Use **HungOffOperandList = &(Obj->getHungOffOperands());
// drop the hung off uses.
- Use::zap(*HungOffOperandList, *HungOffOperandList + Obj->NumUserOperands,
+ Use::zap(*HungOffOperandList, *HungOffOperandList + Obj->getNumOperands(),
/* Delete */ true);
::operator delete(HungOffOperandList);
- } else if (Obj->HasDescriptor) {
- Use *UseBegin = static_cast<Use *>(Usr) - Obj->NumUserOperands;
- Use::zap(UseBegin, UseBegin + Obj->NumUserOperands, /* Delete */ false);
+ } else if (Obj->getHeader().HasDescriptor) {
+ Use *UseBegin = Obj->getIntrusiveOperands();
+ Use::zap(UseBegin, UseBegin + Obj->getNumOperands(), /* Delete */ false);
auto *DI = reinterpret_cast<DescriptorInfo *>(UseBegin) - 1;
uint8_t *Storage = reinterpret_cast<uint8_t *>(DI) - DI->SizeInBytes;
::operator delete(Storage);
} else {
- Use *Storage = static_cast<Use *>(Usr) - Obj->NumUserOperands;
- Use::zap(Storage, Storage + Obj->NumUserOperands,
+ Use *Storage = Obj->getIntrusiveOperands();
+ Use::zap(Storage, Storage + Obj->getNumOperands(),
/* Delete */ false);
::operator delete(Storage);
}
diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp
index b2ee75811fbb7d..b8259b75a8efa0 100644
--- a/llvm/lib/IR/Value.cpp
+++ b/llvm/lib/IR/Value.cpp
@@ -52,8 +52,8 @@ static inline Type *checkType(Type *Ty) {
Value::Value(Type *ty, unsigned scid)
: SubclassID(scid), HasValueHandle(0), SubclassOptionalData(0),
- SubclassData(0), NumUserOperands(0), IsUsedByMD(false), HasName(false),
- HasMetadata(false), VTy(checkType(ty)), UseList(nullptr) {
+ SubclassData(0), IsUsedByMD(false), HasName(false), HasMetadata(false),
+ VTy(checkType(ty)), UseList(nullptr) {
static_assert(ConstantFirstVal == 0, "!(SubclassID < ConstantFirstVal)");
// FIXME: Why isn't this in the subclass gunk??
// Note, we cannot call isa<CallInst> before the CallInst has been
diff --git a/llvm/utils/LLVMVisualizers/llvm.natvis b/llvm/utils/LLVMVisualizers/llvm.natvis
index d83ae8013c51e2..a968eb8f7773a4 100644
--- a/llvm/utils/LLVMVisualizers/llvm.natvis
+++ b/llvm/utils/LLVMVisualizers/llvm.natvis
@@ -329,8 +329,7 @@ For later versions of Visual Studio, no setup is required.
<DisplayString Condition="HasName">$(Type) {*VTy} {this->getName()} {SubclassData}</DisplayString>
<DisplayString Condition="!HasName">$(Type) {*VTy} anon {SubclassData}</DisplayString>
<Expand>
- <Item Name="[Inst]" Condition="SubclassID > InstructionVal">(Instruction*)this</Item>
- <Item Name="Operands">(User*)this</Item>
+ <Item Name="[Inst]" Condition="SubclassID > InstructionVal">(llvm::Instruction*)this</Item>
<LinkedListItems>
<HeadPointer>UseList</HeadPointer>
<NextPointer>Next</NextPointer>
@@ -357,15 +356,15 @@ For later versions of Visual Studio, no setup is required.
<DisplayString Condition="HasName">$(Type) {*VTy} {this->getName()} {SubclassData}</DisplayString>
<DisplayString Condition="!HasName">$(Type) {*VTy} anon {SubclassData}</DisplayString>
<Expand>
- <Item Name="[Value]">(Value*)this,nd</Item>
+ <Item Name="[Value]">(llvm::User*)this,nd</Item>
<Item Name="[Type]">*VTy</Item>
- <ArrayItems Condition="!HasHungOffUses">
- <Size>NumUserOperands</Size>
- <ValuePointer>(llvm::Use*)this - NumUserOperands</ValuePointer>
+ <ArrayItems Condition="!((llvm::User::Header*)this - 1)->contents.HasHungOffUses">
+ <Size>((llvm::User::Header*)this - 1)->contents.NumUserOperands</Size>
+ <ValuePointer>(llvm::Use*)((llvm::User::Header*)this - 1) - ((llvm::User::Header*)this - 1)->contents.NumUserOperands</ValuePointer>
</ArrayItems>
- <ArrayItems Condition="HasHungOffUses">
- <Size>NumUserOperands</Size>
- <ValuePointer>*((llvm::Use**)this - 1)</ValuePointer>
+ <ArrayItems Condition="((llvm::User::Header*)this - 1)->contents.HasHungOffUses">
+ <Size>((llvm::User::Header*)this - 1)->contents.NumUserOperands</Size>
+ <ValuePointer>*((llvm::Use**)((llvm::User::Header*)this - 1) - 1)</ValuePointer>
</ArrayItems>
</Expand>
</Type>
@@ -373,7 +372,7 @@ For later versions of Visual Studio, no setup is required.
<Type Name="llvm::Instruction">
<DisplayString>{getOpcodeName(SubclassID - InstructionVal)}</DisplayString>
<Expand>
- <Item Name="[User]">(User*)this,nd</Item>
+ <Item Name="[User]">(llvm::User*)this,nd</Item>
</Expand>
</Type>
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for working on this, it is past time that we did something about it. I remember bumping into this years ago.
I'm not sure this is the best memory layout, but it's going to take some time to think about that.
In the meantime, I started browsing MLIR code to try to figure out how they store the number of operands, presumably without this same UB. I didn't get an answer. @joker-eph , who is the best PoC for that?
For MLIR, here is the code to create and setup an Operation: https://github.com/llvm/llvm-project/blob/main/mlir/lib/IR/Operation.cpp#L114-L120 |
Yeah, that's one other option: instead of splitting this logic between |
I've been reviewing the Use system code, and we already pass in a dead All the fixed operand call sites can pass the false / zero / no-op-flag values to an updated User constructor, and while we're at it, we can drop the dead Use operand list pointer. |
Done - please let me know if you like the approach, I was trying to make it easy to get "correct" (i.e., avoiding folks from having to know exactly what specific arguments to pass to the constructor). |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks! I like this explicit approach. I spent like 15minutes prototyping a version of this with two booleans, and it was awful, unreadable, easy to confuse, hard to refactor, etc etc. I was going to try again with a flags enum, but I like your approach with the bitfield.
llvm/include/llvm/IR/User.h
Outdated
/// Information about how a User object was allocated, to be passed into the | ||
/// User constructor. | ||
/// | ||
/// DO NOT USE DIRECTLY. Use one of the `AllocMarker` structs instead. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it feasible to enforce this guidance to avoid direct use through explicit private constructors and friend class declarations, or is that too difficult? It seems like it might not be worth it, you'd have to make the constructors private, then friend the marker classes, and then maybe explicitly default the copy constructor and make it public, so it can be freely passed around.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually, turned out to be easier than that: I moved the conversion into AllocInfo
as constructors, so now the only way to create one is to have an AllocMarker
.
llvm/lib/IR/Globals.cpp
Outdated
@@ -454,6 +453,9 @@ GlobalVariable::GlobalVariable(Type *Ty, bool constant, LinkageTypes Link, | |||
assert(InitVal->getType() == Ty && | |||
"Initializer should be the same type as the GlobalVariable!"); | |||
Op<0>() = InitVal; | |||
} else { | |||
// HACK: This is incorrect, we should have allocated less memory. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see this corresponds to the GlobalVariable operator delete comments that set it back to 1. Would it make more sense to reset NumUserOperands back to 1 in the destructor, just for symmetry?
This also raises a broader question of whether it is well-defined to read NumOperands in User::operator delete
after the destructor has run. We still do that, and that would interfere with tools like MSan that poison memory in destructors.
Concretely, please update this comment to explain what's going on here in more detail. It's not incorrect as written. All GlobalVariables allocate one Use, and for variable declarations that lack an initializer, we reduce the operand list size to zero. That has to be undone before deletion, or deallocation will fail.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, I didn't see that operator delete
fixed this!
I moved the fixup into the destructor, and moved setGlobalVariableNumOperands
into GlobalVariable
and made it private (as it's only used in GlobalVariable
).
I also removed my comment in the ctor as I'm now calling setGlobalVariableNumOperands
and that (and the dtor) have comments to explain what's happening.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for doing this! I think this looks good.
In
User::operator new
a single allocation is created to store theUser
object itself, "intrusive" operands or a pointer for "hung off" operands, and the descriptor. After allocation, details about the layout (number of operands, how the operands are stored, if there is a descriptor) are stored in theUser
object by settings its fields. TheValue
andUser
constructors are then very careful not to initialize these fields so that the values set during allocation can be subsequently read. However, when theUser
object is returned fromoperator new
its value is technically "indeterminate" and so reading a field without first initializing it is undefined behavior (and will be erroneous in C++26).We discovered this issue when trying to build LLVM using MSVC's
/sdl
flag which clears class fields after allocation (the docs say that this feature shouldn't be turned on for custom allocators and should only clear pointers, but that doesn't seem to match the implementation). MSVC's behavior both with and without the/sdl
flag is standards conforming since a program is supposed to initialize storage before reading from it, thus the compiler implementation changing any values will never be observed in a well-formed program. The standard also provides no provisions for making storage bytes not indeterminate by setting them during allocation oroperator new
.The fix for this is to create a set of types that encode the layout and provide these to both
operator new
and the constructor:AllocMarker
types are used to select whichoperator new
to use.AllocMarker
can then be implicitly converted to aAllocInfo
which tells the constructor how the type was laid out.