-
Notifications
You must be signed in to change notification settings - Fork 13.6k
Remove type-punning in LazyOffsetPtr. #112806
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang Author: Richard Smith (zygoloid) ChangesDon't try to read the Fixes #111993 Full diff: https://github.com/llvm/llvm-project/pull/112806.diff 1 Files Affected:
diff --git a/clang/include/clang/AST/ExternalASTSource.h b/clang/include/clang/AST/ExternalASTSource.h
index 385c32edbae0fd..7603ed24afacbc 100644
--- a/clang/include/clang/AST/ExternalASTSource.h
+++ b/clang/include/clang/AST/ExternalASTSource.h
@@ -18,6 +18,7 @@
#include "clang/AST/DeclBase.h"
#include "clang/Basic/LLVM.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/bit.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/ADT/PointerUnion.h"
@@ -321,50 +322,87 @@ class ExternalASTSource : public RefCountedBase<ExternalASTSource> {
/// external AST source itself.
template<typename T, typename OffsT, T* (ExternalASTSource::*Get)(OffsT Offset)>
struct LazyOffsetPtr {
- /// Either a pointer to an AST node or the offset within the
- /// external AST source where the AST node can be found.
- ///
- /// If the low bit is clear, a pointer to the AST node. If the low
- /// bit is set, the upper 63 bits are the offset.
- mutable uint64_t Ptr = 0;
+ /// Storage for a pointer or an offset, for the case where pointers are 64
+ /// bits wide. The least-significant bit is used as the discriminator. If the
+ /// bit is clear, a pointer to the AST node. If the bit is set, the upper 63
+ /// bits are the offset.
+ union StorageType64 {
+ StorageType64(uint64_t Offset) : ShiftedOffset((Offset << 1) | 1) {
+ assert(isOffset());
+ }
+ StorageType64(T *Ptr) : Pointer(Ptr) { assert(!isOffset()); }
+
+ bool isOffset() { return llvm::bit_cast<uint64_t>(*this) & 1; }
+ uint64_t offset() { return ShiftedOffset >> 1; }
+ T *&pointer() { return Pointer; }
+
+ uint64_t ShiftedOffset;
+ T *Pointer;
+ };
+
+ /// Storage for a pointer or an offset, for the case where pointers are not 64
+ /// bits wide.
+ union StorageType32 {
+ StorageType32(uint64_t Off) : Offset{true, Off} {}
+ StorageType32(T *Ptr) : Pointer{false, Ptr} {}
+
+ bool isOffset() { return Offset.IsOffset; }
+ uint64_t offset() { return Offset.Offset; }
+ T *&pointer() { return Pointer.Pointer; }
+
+ struct OffsetType {
+ uint64_t IsOffset : 1;
+ uint64_t Offset : 63;
+ } Offset;
+ struct PointerType {
+ uint64_t IsOffset : 1;
+ T *Pointer;
+ } Pointer;
+ };
+
+ mutable std::conditional_t<sizeof(uint64_t) == sizeof(T *), StorageType64,
+ StorageType32>
+ Storage;
public:
- LazyOffsetPtr() = default;
- explicit LazyOffsetPtr(T *Ptr) : Ptr(reinterpret_cast<uint64_t>(Ptr)) {}
+ LazyOffsetPtr() : LazyOffsetPtr(nullptr) {}
+ explicit LazyOffsetPtr(T *Ptr) : Storage(Ptr) {}
- explicit LazyOffsetPtr(uint64_t Offset) : Ptr((Offset << 1) | 0x01) {
- assert((Offset << 1 >> 1) == Offset && "Offsets must require < 63 bits");
+ explicit LazyOffsetPtr(uint64_t Offset) : Storage(Offset) {
+ assert(Offset == Storage.offset() && "Offsets must require < 63 bits");
if (Offset == 0)
- Ptr = 0;
+ Storage = nullptr;
}
LazyOffsetPtr &operator=(T *Ptr) {
- this->Ptr = reinterpret_cast<uint64_t>(Ptr);
+ Storage = Ptr;
return *this;
}
LazyOffsetPtr &operator=(uint64_t Offset) {
- assert((Offset << 1 >> 1) == Offset && "Offsets must require < 63 bits");
if (Offset == 0)
- Ptr = 0;
- else
- Ptr = (Offset << 1) | 0x01;
-
+ Storage = nullptr;
+ else {
+ Storage = Offset;
+ assert(Offset == Storage.offset() && "Offsets must require < 63 bits");
+ }
return *this;
}
/// Whether this pointer is non-NULL.
///
/// This operation does not require the AST node to be deserialized.
- explicit operator bool() const { return Ptr != 0; }
+ explicit operator bool() const {
+ return Storage.isOffset() || Storage.pointer();
+ }
/// Whether this pointer is non-NULL.
///
/// This operation does not require the AST node to be deserialized.
- bool isValid() const { return Ptr != 0; }
+ bool isValid() const { return operator bool(); }
/// Whether this pointer is currently stored as an offset.
- bool isOffset() const { return Ptr & 0x01; }
+ bool isOffset() const { return Storage.isOffset(); }
/// Retrieve the pointer to the AST node that this lazy pointer points to.
///
@@ -375,17 +413,18 @@ struct LazyOffsetPtr {
if (isOffset()) {
assert(Source &&
"Cannot deserialize a lazy pointer without an AST source");
- Ptr = reinterpret_cast<uint64_t>((Source->*Get)(OffsT(Ptr >> 1)));
+ Storage = (Source->*Get)(static_cast<OffsT>(Storage.offset()));
}
- return reinterpret_cast<T*>(Ptr);
+ return Storage.pointer();
}
/// Retrieve the address of the AST node pointer. Deserializes the pointee if
- /// necessary.
+ /// necessary. This function should not exist, and we would like to remove it.
+ /// Do not add new calls to it.
T **getAddressOfPointer(ExternalASTSource *Source) const {
// Ensure the integer is in pointer form.
(void)get(Source);
- return reinterpret_cast<T**>(&Ptr);
+ return &Storage.pointer();
}
};
|
You can test this locally with the following command:git-clang-format --diff 90767bc41bd69fb4b9ac01a8420ef58bbbaeab7c 992f6457be13418c4c1d8490502b66896125a296 --extensions h -- clang/include/clang/AST/ExternalASTSource.h View the diff from clang-format here.diff --git a/clang/include/clang/AST/ExternalASTSource.h b/clang/include/clang/AST/ExternalASTSource.h
index 7603ed24af..db97b71321 100644
--- a/clang/include/clang/AST/ExternalASTSource.h
+++ b/clang/include/clang/AST/ExternalASTSource.h
@@ -18,12 +18,12 @@
#include "clang/AST/DeclBase.h"
#include "clang/Basic/LLVM.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/bit.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/bit.h"
#include "llvm/ADT/iterator.h"
#include "llvm/Support/PointerLikeTypeTraits.h"
#include <cassert>
|
}; | ||
|
||
mutable std::conditional_t<sizeof(uint64_t) == sizeof(T *), StorageType64, | ||
StorageType32> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Systems where sizeof(void *) > 8
end up using StorageType32 which is highly inefficient, as PointerType will be two pointers in size rather than just using a single pointer as storage.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's true. How much should we worry about that? Such systems aren't going to be efficient in general due to the large numbers of pointers in the AST.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Well, CHERI is such a system. LLVM isn't ported to it today, but I'd rather not see the problem get worse if it can be avoided. And whilst, yes, more memory is needed, quadrupling the size of a structure when the pointer size doubles makes the problem far worse than it need be.
Your sizes aren't correct in the description here. This issue occurs on big endian 32-bit systems, and the pointer is stored in the lower bits, which are lost because of using address-of when turning the 64-bit number into a pointer. i.e. storing 0x12345678 yields:
If you just cast down to a |
Yeah, sorry, just a typo. Fixed. |
Closing in favour of #112927. |
Don't try to read the
uint64_t
stored inLazyOffsetPtr
as aT*
viagetAddressOfPointer
. This violates aliasing rules and doesn't work at all onbig-endian 32-bit systems where the pointer is stored in the second four bytes
of the
uint64_t
.Fixes #111993