-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[lld-macho][ObjC] Implement category merging into base class #92448
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -379,12 +379,21 @@ class ObjcCategoryMerger { | |
InfoWriteSection catPtrListInfo; | ||
}; | ||
|
||
// Information about a pointer list in the original categories (method lists, | ||
// protocol lists, etc) | ||
// Information about a pointer list in the original categories or class(method | ||
// lists, protocol lists, etc) | ||
struct PointerListInfo { | ||
PointerListInfo() = default; | ||
PointerListInfo(const PointerListInfo &) = default; | ||
PointerListInfo(const char *_categoryPrefix, uint32_t _pointersPerStruct) | ||
: categoryPrefix(_categoryPrefix), | ||
pointersPerStruct(_pointersPerStruct) {} | ||
|
||
inline bool operator==(const PointerListInfo &cmp) { | ||
return pointersPerStruct == cmp.pointersPerStruct && | ||
structSize == cmp.structSize && structCount == cmp.structCount && | ||
allPtrs == cmp.allPtrs; | ||
} | ||
|
||
const char *categoryPrefix; | ||
|
||
uint32_t pointersPerStruct = 0; | ||
|
@@ -395,9 +404,9 @@ class ObjcCategoryMerger { | |
std::vector<Symbol *> allPtrs; | ||
}; | ||
|
||
// Full information about all the categories that extend a class. This will | ||
// include all the additional methods, protocols, and properties that are | ||
// contained in all the categories that extend a particular class. | ||
// Full information describing an ObjC class . This will include all the | ||
// additional methods, protocols, and properties that are contained in the | ||
// class and all the categories that extend a particular class. | ||
struct ClassExtensionInfo { | ||
ClassExtensionInfo(CategoryLayout &_catLayout) : catLayout(_catLayout){}; | ||
|
||
|
@@ -449,16 +458,19 @@ class ObjcCategoryMerger { | |
void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset, | ||
PointerListInfo &ptrList); | ||
|
||
PointerListInfo parseProtocolListInfo(const ConcatInputSection *isec, | ||
uint32_t secOffset); | ||
|
||
void parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset, | ||
PointerListInfo &ptrList); | ||
|
||
void emitAndLinkPointerList(Defined *parentSym, uint32_t linkAtOffset, | ||
const ClassExtensionInfo &extInfo, | ||
const PointerListInfo &ptrList); | ||
|
||
void emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset, | ||
const ClassExtensionInfo &extInfo, | ||
const PointerListInfo &ptrList); | ||
Defined *emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset, | ||
const ClassExtensionInfo &extInfo, | ||
const PointerListInfo &ptrList); | ||
|
||
Defined *emitCategory(const ClassExtensionInfo &extInfo); | ||
Defined *emitCatListEntrySec(const std::string &forCategoryName, | ||
|
@@ -474,6 +486,10 @@ class ObjcCategoryMerger { | |
uint32_t offset); | ||
Defined *tryGetDefinedAtIsecOffset(const ConcatInputSection *isec, | ||
uint32_t offset); | ||
Defined *getClassRo(const Defined *classSym, bool getMetaRo); | ||
void mergeCategoriesIntoBaseClass(const Defined *baseClass, | ||
std::vector<InfoInputCategory> &categories); | ||
void eraseSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset); | ||
void tryEraseDefinedAtIsecOffset(const ConcatInputSection *isec, | ||
uint32_t offset); | ||
|
||
|
@@ -552,6 +568,29 @@ ObjcCategoryMerger::tryGetDefinedAtIsecOffset(const ConcatInputSection *isec, | |
return dyn_cast_or_null<Defined>(sym); | ||
} | ||
|
||
// Get the class's ro_data symbol. If getMetaRo is true, then we will return | ||
// the meta-class's ro_data symbol. Otherwise, we will return the class | ||
// (instance) ro_data symbol. | ||
Defined *ObjcCategoryMerger::getClassRo(const Defined *classSym, | ||
bool getMetaRo) { | ||
ConcatInputSection *isec = dyn_cast<ConcatInputSection>(classSym->isec()); | ||
if (!isec) | ||
return nullptr; | ||
|
||
if (!getMetaRo) | ||
return tryGetDefinedAtIsecOffset(isec, classLayout.roDataOffset + | ||
classSym->value); | ||
|
||
Defined *metaClass = tryGetDefinedAtIsecOffset( | ||
isec, classLayout.metaClassOffset + classSym->value); | ||
if (!metaClass) | ||
return nullptr; | ||
|
||
return tryGetDefinedAtIsecOffset( | ||
dyn_cast<ConcatInputSection>(metaClass->isec()), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can it be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
classLayout.roDataOffset); | ||
} | ||
|
||
// Given an ConcatInputSection or CStringInputSection and an offset, if there is | ||
// a symbol(Defined) at that offset, then erase the symbol (mark it not live) | ||
void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset( | ||
|
@@ -663,6 +702,15 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec, | |
"Protocol list end offset does not match expected size"); | ||
} | ||
|
||
// Parse a protocol list and return the PointerListInfo for it | ||
ObjcCategoryMerger::PointerListInfo | ||
ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec, | ||
uint32_t secOffset) { | ||
PointerListInfo ptrList; | ||
parseProtocolListInfo(isec, secOffset, ptrList); | ||
return ptrList; | ||
} | ||
|
||
// Parse a pointer list that might be linked to ConcatInputSection at a given | ||
// offset. This can be used for instance methods, class methods, instance props | ||
// and class props since they have the same format. | ||
|
@@ -769,11 +817,11 @@ void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo, | |
|
||
// Generate a protocol list (including header) and link it into the parent at | ||
// the specified offset. | ||
void ObjcCategoryMerger::emitAndLinkProtocolList( | ||
Defined *ObjcCategoryMerger::emitAndLinkProtocolList( | ||
Defined *parentSym, uint32_t linkAtOffset, | ||
const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) { | ||
if (ptrList.allPtrs.empty()) | ||
return; | ||
return nullptr; | ||
|
||
assert(ptrList.allPtrs.size() == ptrList.structCount); | ||
|
||
|
@@ -820,6 +868,8 @@ void ObjcCategoryMerger::emitAndLinkProtocolList( | |
infoCategoryWriter.catPtrListInfo.relocTemplate); | ||
offset += target->wordSize; | ||
} | ||
|
||
return ptrListSym; | ||
} | ||
|
||
// Generate a pointer list (including header) and link it into the parent at the | ||
|
@@ -1265,10 +1315,15 @@ void ObjcCategoryMerger::removeRefsToErasedIsecs() { | |
void ObjcCategoryMerger::doMerge() { | ||
collectAndValidateCategoriesData(); | ||
|
||
for (auto &entry : categoryMap) | ||
if (entry.second.size() > 1) | ||
for (auto &[baseClass, catInfos] : categoryMap) { | ||
if (auto *baseClassDef = dyn_cast<Defined>(baseClass)) { | ||
// Merge all categories into the base class | ||
mergeCategoriesIntoBaseClass(baseClassDef, catInfos); | ||
} else if (catInfos.size() > 1) { | ||
// Merge all categories into a new, single category | ||
mergeCategoriesIntoSingleCategory(entry.second); | ||
mergeCategoriesIntoSingleCategory(catInfos); | ||
} | ||
} | ||
|
||
// Erase all categories that were merged | ||
eraseMergedCategories(); | ||
|
@@ -1302,3 +1357,101 @@ void objc::mergeCategories() { | |
} | ||
|
||
void objc::doCleanup() { ObjcCategoryMerger::doCleanup(); } | ||
|
||
void ObjcCategoryMerger::mergeCategoriesIntoBaseClass( | ||
const Defined *baseClass, std::vector<InfoInputCategory> &categories) { | ||
assert(categories.size() >= 1 && "Expected at least one category to merge"); | ||
|
||
// Collect all the info from the categories | ||
ClassExtensionInfo extInfo(catLayout); | ||
for (auto &catInfo : categories) { | ||
parseCatInfoToExtInfo(catInfo, extInfo); | ||
} | ||
|
||
// Get metadata for the base class | ||
Defined *metaRo = getClassRo(baseClass, /*getMetaRo=*/true); | ||
ConcatInputSection *metaIsec = dyn_cast<ConcatInputSection>(metaRo->isec()); | ||
Defined *classRo = getClassRo(baseClass, /*getMetaRo=*/false); | ||
ConcatInputSection *classIsec = dyn_cast<ConcatInputSection>(classRo->isec()); | ||
|
||
// Now collect the info from the base class from the various lists in the | ||
// class metadata | ||
|
||
// Protocol lists are a special case - the same protocol list is in classRo | ||
// and metaRo, so we only need to parse it once | ||
parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset, | ||
alx32 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
extInfo.protocols); | ||
|
||
// Check that the classRo and metaRo protocol lists are identical | ||
assert( | ||
parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset) == | ||
parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset) && | ||
"Category merger expects classRo and metaRo to have the same protocol " | ||
"list"); | ||
|
||
parsePointerListInfo(metaIsec, roClassLayout.baseMethodsOffset, | ||
extInfo.classMethods); | ||
parsePointerListInfo(classIsec, roClassLayout.baseMethodsOffset, | ||
extInfo.instanceMethods); | ||
|
||
parsePointerListInfo(metaIsec, roClassLayout.basePropertiesOffset, | ||
extInfo.classProps); | ||
parsePointerListInfo(classIsec, roClassLayout.basePropertiesOffset, | ||
extInfo.instanceProps); | ||
|
||
// Erase the old lists - these will be generated and replaced | ||
eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseMethodsOffset); | ||
eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseProtocolsOffset); | ||
eraseSymbolAtIsecOffset(metaIsec, roClassLayout.basePropertiesOffset); | ||
eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseMethodsOffset); | ||
eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseProtocolsOffset); | ||
eraseSymbolAtIsecOffset(classIsec, roClassLayout.basePropertiesOffset); | ||
|
||
// Emit the newly merged lists - first into the meta RO then into the class RO | ||
// First we emit and link the protocol list into the meta RO. Then we link it | ||
// in the classRo as well (they're supposed to be identical) | ||
if (Defined *protoListSym = | ||
emitAndLinkProtocolList(metaRo, roClassLayout.baseProtocolsOffset, | ||
extInfo, extInfo.protocols)) { | ||
createSymbolReference(classRo, protoListSym, | ||
roClassLayout.baseProtocolsOffset, | ||
infoCategoryWriter.catBodyInfo.relocTemplate); | ||
} | ||
|
||
emitAndLinkPointerList(metaRo, roClassLayout.baseMethodsOffset, extInfo, | ||
extInfo.classMethods); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This code pattern seems unfortunate from the previous/existing work. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
How should the enumeration look like ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe it's too disruptive at this moment in this change. I'm just saying with this |
||
emitAndLinkPointerList(classRo, roClassLayout.baseMethodsOffset, extInfo, | ||
extInfo.instanceMethods); | ||
|
||
emitAndLinkPointerList(metaRo, roClassLayout.basePropertiesOffset, extInfo, | ||
extInfo.classProps); | ||
|
||
emitAndLinkPointerList(classRo, roClassLayout.basePropertiesOffset, extInfo, | ||
extInfo.instanceProps); | ||
|
||
// Mark all the categories as merged - this will be used to erase them later | ||
for (auto &catInfo : categories) | ||
catInfo.wasMerged = true; | ||
} | ||
|
||
// Erase the symbol at a given offset in an InputSection | ||
void ObjcCategoryMerger::eraseSymbolAtIsecOffset(ConcatInputSection *isec, | ||
uint32_t offset) { | ||
Defined *sym = tryGetDefinedAtIsecOffset(isec, offset); | ||
if (!sym) | ||
return; | ||
|
||
// Remove the symbol from isec->symbols | ||
assert(isa<Defined>(sym) && "Can only erase a Defined"); | ||
llvm::erase(isec->symbols, sym); | ||
|
||
// Remove the relocs that refer to this symbol | ||
auto removeAtOff = [offset](Reloc const &r) { return r.offset == offset; }; | ||
llvm::erase_if(isec->relocs, removeAtOff); | ||
|
||
// Now, if the symbol fully occupies a ConcatInputSection, we can also erase | ||
// the whole ConcatInputSection | ||
if (ConcatInputSection *cisec = dyn_cast<ConcatInputSection>(sym->isec())) | ||
if (cisec->data.size() == sym->size) | ||
eraseISec(cisec); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see this is only used by an assertion. Checking size only but not other pointer contents is okay? I don't have a better suggestion, but this seems unnatural to define an equality while only considering the part of contents.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
allPtrs == cmp.allPtrs
on line394
compares the pointer list contents - i.e. that the symbols in the array are identical (as pointer values) and are in the same order.I am not sure what you mean here, do you mean comparing the symbols by pointer values is not OK ? Because this should be OK as the symbols are unique pointers.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh. I missed that part. btw, what about
categoryPrefix
? doesn't it matter?Anyhow, overall I don't feel we need this operator overloading just for an assertion, and wonder if we can inline them somehow for an assertion.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
categoryPrefix
shouldn't matter either way. Yes, operator overloading seems necessary but not sure how else to easily achieve the result. I guess we could do something likeparseProtocolListInfo(a).structSize == parseProtocolListInfo(b).structSize && parseProtocolListInfo(a). ...
, which would mean callingparseProtocolListInfo
for each field which looks worse than the current approach.