Skip to content

Commit b963931

Browse files
authored
[lld-macho][ObjC] Implement category merging into base class (#92448)
Currently category merging only supports merging multiple categories into one. With this commit we add the ability to fully merge categories into the base class, if the base class is included in the current module. This is the optimal approach for defined classes.
1 parent 16a5fd3 commit b963931

File tree

3 files changed

+500
-14
lines changed

3 files changed

+500
-14
lines changed

lld/MachO/ObjC.cpp

Lines changed: 166 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -379,12 +379,21 @@ class ObjcCategoryMerger {
379379
InfoWriteSection catPtrListInfo;
380380
};
381381

382-
// Information about a pointer list in the original categories (method lists,
383-
// protocol lists, etc)
382+
// Information about a pointer list in the original categories or class(method
383+
// lists, protocol lists, etc)
384384
struct PointerListInfo {
385+
PointerListInfo() = default;
386+
PointerListInfo(const PointerListInfo &) = default;
385387
PointerListInfo(const char *_categoryPrefix, uint32_t _pointersPerStruct)
386388
: categoryPrefix(_categoryPrefix),
387389
pointersPerStruct(_pointersPerStruct) {}
390+
391+
inline bool operator==(const PointerListInfo &cmp) {
392+
return pointersPerStruct == cmp.pointersPerStruct &&
393+
structSize == cmp.structSize && structCount == cmp.structCount &&
394+
allPtrs == cmp.allPtrs;
395+
}
396+
388397
const char *categoryPrefix;
389398

390399
uint32_t pointersPerStruct = 0;
@@ -395,9 +404,9 @@ class ObjcCategoryMerger {
395404
std::vector<Symbol *> allPtrs;
396405
};
397406

398-
// Full information about all the categories that extend a class. This will
399-
// include all the additional methods, protocols, and properties that are
400-
// contained in all the categories that extend a particular class.
407+
// Full information describing an ObjC class . This will include all the
408+
// additional methods, protocols, and properties that are contained in the
409+
// class and all the categories that extend a particular class.
401410
struct ClassExtensionInfo {
402411
ClassExtensionInfo(CategoryLayout &_catLayout) : catLayout(_catLayout){};
403412

@@ -449,16 +458,19 @@ class ObjcCategoryMerger {
449458
void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset,
450459
PointerListInfo &ptrList);
451460

461+
PointerListInfo parseProtocolListInfo(const ConcatInputSection *isec,
462+
uint32_t secOffset);
463+
452464
void parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset,
453465
PointerListInfo &ptrList);
454466

455467
void emitAndLinkPointerList(Defined *parentSym, uint32_t linkAtOffset,
456468
const ClassExtensionInfo &extInfo,
457469
const PointerListInfo &ptrList);
458470

459-
void emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset,
460-
const ClassExtensionInfo &extInfo,
461-
const PointerListInfo &ptrList);
471+
Defined *emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset,
472+
const ClassExtensionInfo &extInfo,
473+
const PointerListInfo &ptrList);
462474

463475
Defined *emitCategory(const ClassExtensionInfo &extInfo);
464476
Defined *emitCatListEntrySec(const std::string &forCategoryName,
@@ -474,6 +486,10 @@ class ObjcCategoryMerger {
474486
uint32_t offset);
475487
Defined *tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
476488
uint32_t offset);
489+
Defined *getClassRo(const Defined *classSym, bool getMetaRo);
490+
void mergeCategoriesIntoBaseClass(const Defined *baseClass,
491+
std::vector<InfoInputCategory> &categories);
492+
void eraseSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset);
477493
void tryEraseDefinedAtIsecOffset(const ConcatInputSection *isec,
478494
uint32_t offset);
479495

@@ -552,6 +568,29 @@ ObjcCategoryMerger::tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
552568
return dyn_cast_or_null<Defined>(sym);
553569
}
554570

571+
// Get the class's ro_data symbol. If getMetaRo is true, then we will return
572+
// the meta-class's ro_data symbol. Otherwise, we will return the class
573+
// (instance) ro_data symbol.
574+
Defined *ObjcCategoryMerger::getClassRo(const Defined *classSym,
575+
bool getMetaRo) {
576+
ConcatInputSection *isec = dyn_cast<ConcatInputSection>(classSym->isec());
577+
if (!isec)
578+
return nullptr;
579+
580+
if (!getMetaRo)
581+
return tryGetDefinedAtIsecOffset(isec, classLayout.roDataOffset +
582+
classSym->value);
583+
584+
Defined *metaClass = tryGetDefinedAtIsecOffset(
585+
isec, classLayout.metaClassOffset + classSym->value);
586+
if (!metaClass)
587+
return nullptr;
588+
589+
return tryGetDefinedAtIsecOffset(
590+
dyn_cast<ConcatInputSection>(metaClass->isec()),
591+
classLayout.roDataOffset);
592+
}
593+
555594
// Given an ConcatInputSection or CStringInputSection and an offset, if there is
556595
// a symbol(Defined) at that offset, then erase the symbol (mark it not live)
557596
void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(
@@ -663,6 +702,15 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
663702
"Protocol list end offset does not match expected size");
664703
}
665704

705+
// Parse a protocol list and return the PointerListInfo for it
706+
ObjcCategoryMerger::PointerListInfo
707+
ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
708+
uint32_t secOffset) {
709+
PointerListInfo ptrList;
710+
parseProtocolListInfo(isec, secOffset, ptrList);
711+
return ptrList;
712+
}
713+
666714
// Parse a pointer list that might be linked to ConcatInputSection at a given
667715
// offset. This can be used for instance methods, class methods, instance props
668716
// and class props since they have the same format.
@@ -769,11 +817,11 @@ void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
769817

770818
// Generate a protocol list (including header) and link it into the parent at
771819
// the specified offset.
772-
void ObjcCategoryMerger::emitAndLinkProtocolList(
820+
Defined *ObjcCategoryMerger::emitAndLinkProtocolList(
773821
Defined *parentSym, uint32_t linkAtOffset,
774822
const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) {
775823
if (ptrList.allPtrs.empty())
776-
return;
824+
return nullptr;
777825

778826
assert(ptrList.allPtrs.size() == ptrList.structCount);
779827

@@ -820,6 +868,8 @@ void ObjcCategoryMerger::emitAndLinkProtocolList(
820868
infoCategoryWriter.catPtrListInfo.relocTemplate);
821869
offset += target->wordSize;
822870
}
871+
872+
return ptrListSym;
823873
}
824874

825875
// Generate a pointer list (including header) and link it into the parent at the
@@ -1265,10 +1315,15 @@ void ObjcCategoryMerger::removeRefsToErasedIsecs() {
12651315
void ObjcCategoryMerger::doMerge() {
12661316
collectAndValidateCategoriesData();
12671317

1268-
for (auto &entry : categoryMap)
1269-
if (entry.second.size() > 1)
1318+
for (auto &[baseClass, catInfos] : categoryMap) {
1319+
if (auto *baseClassDef = dyn_cast<Defined>(baseClass)) {
1320+
// Merge all categories into the base class
1321+
mergeCategoriesIntoBaseClass(baseClassDef, catInfos);
1322+
} else if (catInfos.size() > 1) {
12701323
// Merge all categories into a new, single category
1271-
mergeCategoriesIntoSingleCategory(entry.second);
1324+
mergeCategoriesIntoSingleCategory(catInfos);
1325+
}
1326+
}
12721327

12731328
// Erase all categories that were merged
12741329
eraseMergedCategories();
@@ -1302,3 +1357,101 @@ void objc::mergeCategories() {
13021357
}
13031358

13041359
void objc::doCleanup() { ObjcCategoryMerger::doCleanup(); }
1360+
1361+
void ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
1362+
const Defined *baseClass, std::vector<InfoInputCategory> &categories) {
1363+
assert(categories.size() >= 1 && "Expected at least one category to merge");
1364+
1365+
// Collect all the info from the categories
1366+
ClassExtensionInfo extInfo(catLayout);
1367+
for (auto &catInfo : categories) {
1368+
parseCatInfoToExtInfo(catInfo, extInfo);
1369+
}
1370+
1371+
// Get metadata for the base class
1372+
Defined *metaRo = getClassRo(baseClass, /*getMetaRo=*/true);
1373+
ConcatInputSection *metaIsec = dyn_cast<ConcatInputSection>(metaRo->isec());
1374+
Defined *classRo = getClassRo(baseClass, /*getMetaRo=*/false);
1375+
ConcatInputSection *classIsec = dyn_cast<ConcatInputSection>(classRo->isec());
1376+
1377+
// Now collect the info from the base class from the various lists in the
1378+
// class metadata
1379+
1380+
// Protocol lists are a special case - the same protocol list is in classRo
1381+
// and metaRo, so we only need to parse it once
1382+
parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
1383+
extInfo.protocols);
1384+
1385+
// Check that the classRo and metaRo protocol lists are identical
1386+
assert(
1387+
parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset) ==
1388+
parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset) &&
1389+
"Category merger expects classRo and metaRo to have the same protocol "
1390+
"list");
1391+
1392+
parsePointerListInfo(metaIsec, roClassLayout.baseMethodsOffset,
1393+
extInfo.classMethods);
1394+
parsePointerListInfo(classIsec, roClassLayout.baseMethodsOffset,
1395+
extInfo.instanceMethods);
1396+
1397+
parsePointerListInfo(metaIsec, roClassLayout.basePropertiesOffset,
1398+
extInfo.classProps);
1399+
parsePointerListInfo(classIsec, roClassLayout.basePropertiesOffset,
1400+
extInfo.instanceProps);
1401+
1402+
// Erase the old lists - these will be generated and replaced
1403+
eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseMethodsOffset);
1404+
eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseProtocolsOffset);
1405+
eraseSymbolAtIsecOffset(metaIsec, roClassLayout.basePropertiesOffset);
1406+
eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseMethodsOffset);
1407+
eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseProtocolsOffset);
1408+
eraseSymbolAtIsecOffset(classIsec, roClassLayout.basePropertiesOffset);
1409+
1410+
// Emit the newly merged lists - first into the meta RO then into the class RO
1411+
// First we emit and link the protocol list into the meta RO. Then we link it
1412+
// in the classRo as well (they're supposed to be identical)
1413+
if (Defined *protoListSym =
1414+
emitAndLinkProtocolList(metaRo, roClassLayout.baseProtocolsOffset,
1415+
extInfo, extInfo.protocols)) {
1416+
createSymbolReference(classRo, protoListSym,
1417+
roClassLayout.baseProtocolsOffset,
1418+
infoCategoryWriter.catBodyInfo.relocTemplate);
1419+
}
1420+
1421+
emitAndLinkPointerList(metaRo, roClassLayout.baseMethodsOffset, extInfo,
1422+
extInfo.classMethods);
1423+
emitAndLinkPointerList(classRo, roClassLayout.baseMethodsOffset, extInfo,
1424+
extInfo.instanceMethods);
1425+
1426+
emitAndLinkPointerList(metaRo, roClassLayout.basePropertiesOffset, extInfo,
1427+
extInfo.classProps);
1428+
1429+
emitAndLinkPointerList(classRo, roClassLayout.basePropertiesOffset, extInfo,
1430+
extInfo.instanceProps);
1431+
1432+
// Mark all the categories as merged - this will be used to erase them later
1433+
for (auto &catInfo : categories)
1434+
catInfo.wasMerged = true;
1435+
}
1436+
1437+
// Erase the symbol at a given offset in an InputSection
1438+
void ObjcCategoryMerger::eraseSymbolAtIsecOffset(ConcatInputSection *isec,
1439+
uint32_t offset) {
1440+
Defined *sym = tryGetDefinedAtIsecOffset(isec, offset);
1441+
if (!sym)
1442+
return;
1443+
1444+
// Remove the symbol from isec->symbols
1445+
assert(isa<Defined>(sym) && "Can only erase a Defined");
1446+
llvm::erase(isec->symbols, sym);
1447+
1448+
// Remove the relocs that refer to this symbol
1449+
auto removeAtOff = [offset](Reloc const &r) { return r.offset == offset; };
1450+
llvm::erase_if(isec->relocs, removeAtOff);
1451+
1452+
// Now, if the symbol fully occupies a ConcatInputSection, we can also erase
1453+
// the whole ConcatInputSection
1454+
if (ConcatInputSection *cisec = dyn_cast<ConcatInputSection>(sym->isec()))
1455+
if (cisec->data.size() == sym->size)
1456+
eraseISec(cisec);
1457+
}

0 commit comments

Comments
 (0)