Skip to content

Commit d68eb5b

Browse files
authored
[lld-macho][NFC] Track category merger input data source language for better verification (#95473)
This change adds tracking for the source language of the various input structs used by the category merger. Identification is based on expected symbol names. It also adds checks to ensure we're dealing with the expected data in known scenarios.
1 parent 2c2f490 commit d68eb5b

File tree

2 files changed

+51
-22
lines changed

2 files changed

+51
-22
lines changed

lld/MachO/ObjC.cpp

Lines changed: 50 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -349,11 +349,15 @@ void objc::checkCategories() {
349349
namespace {
350350

351351
class ObjcCategoryMerger {
352+
// In which language was a particular construct originally defined
353+
enum SourceLanguage { Unknown, ObjC, Swift };
354+
352355
// Information about an input category
353356
struct InfoInputCategory {
354357
ConcatInputSection *catListIsec;
355358
ConcatInputSection *catBodyIsec;
356359
uint32_t offCatListIsec = 0;
360+
SourceLanguage sourceLanguage = SourceLanguage::Unknown;
357361

358362
bool wasMerged = false;
359363
};
@@ -413,7 +417,9 @@ class ObjcCategoryMerger {
413417
// Merged names of containers. Ex: base|firstCategory|secondCategory|...
414418
std::string mergedContainerName;
415419
std::string baseClassName;
416-
Symbol *baseClass = nullptr;
420+
const Symbol *baseClass = nullptr;
421+
SourceLanguage baseClassSourceLanguage = SourceLanguage::Unknown;
422+
417423
CategoryLayout &catLayout;
418424

419425
// In case we generate new data, mark the new data as belonging to this file
@@ -456,10 +462,12 @@ class ObjcCategoryMerger {
456462
ClassExtensionInfo &extInfo);
457463

458464
void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset,
459-
PointerListInfo &ptrList);
465+
PointerListInfo &ptrList,
466+
SourceLanguage sourceLang);
460467

461468
PointerListInfo parseProtocolListInfo(const ConcatInputSection *isec,
462-
uint32_t secOffset);
469+
uint32_t secOffset,
470+
SourceLanguage sourceLang);
463471

464472
void parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset,
465473
PointerListInfo &ptrList);
@@ -653,9 +661,9 @@ void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
653661
// Parse a protocol list that might be linked to ConcatInputSection at a given
654662
// offset. The format of the protocol list is different than other lists (prop
655663
// lists, method lists) so we need to parse it differently
656-
void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
657-
uint32_t secOffset,
658-
PointerListInfo &ptrList) {
664+
void ObjcCategoryMerger::parseProtocolListInfo(
665+
const ConcatInputSection *isec, uint32_t secOffset,
666+
PointerListInfo &ptrList, [[maybe_unused]] SourceLanguage sourceLang) {
659667
assert((isec && (secOffset + target->wordSize <= isec->data.size())) &&
660668
"Tried to read pointer list beyond protocol section end");
661669

@@ -684,8 +692,10 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
684692
[[maybe_unused]] uint32_t expectedListSizeSwift =
685693
expectedListSize - target->wordSize;
686694

687-
assert((expectedListSize == ptrListSym->isec()->data.size() ||
688-
expectedListSizeSwift == ptrListSym->isec()->data.size()) &&
695+
assert(((expectedListSize == ptrListSym->isec()->data.size() &&
696+
sourceLang == SourceLanguage::ObjC) ||
697+
(expectedListSizeSwift == ptrListSym->isec()->data.size() &&
698+
sourceLang == SourceLanguage::Swift)) &&
689699
"Protocol list does not match expected size");
690700

691701
uint32_t off = protocolListHeaderLayout.totalSize;
@@ -708,9 +718,10 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
708718
// Parse a protocol list and return the PointerListInfo for it
709719
ObjcCategoryMerger::PointerListInfo
710720
ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
711-
uint32_t secOffset) {
721+
uint32_t secOffset,
722+
SourceLanguage sourceLang) {
712723
PointerListInfo ptrList;
713-
parseProtocolListInfo(isec, secOffset, ptrList);
724+
parseProtocolListInfo(isec, secOffset, ptrList, sourceLang);
714725
return ptrList;
715726
}
716727

@@ -809,7 +820,7 @@ void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
809820
extInfo.classMethods);
810821

811822
parseProtocolListInfo(catInfo.catBodyIsec, catLayout.protocolsOffset,
812-
extInfo.protocols);
823+
extInfo.protocols, catInfo.sourceLanguage);
813824

814825
parsePointerListInfo(catInfo.catBodyIsec, catLayout.instancePropsOffset,
815826
extInfo.instanceProps);
@@ -1151,22 +1162,28 @@ void ObjcCategoryMerger::collectAndValidateCategoriesData() {
11511162
if (nlCategories.count(categorySym))
11521163
continue;
11531164

1154-
assert(categorySym->getName().starts_with(objc::symbol_names::category) ||
1155-
categorySym->getName().starts_with(
1156-
objc::symbol_names::swift_objc_category));
1157-
11581165
auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec());
11591166
assert(catBodyIsec &&
11601167
"Category data section is not an ConcatInputSection");
11611168

1169+
SourceLanguage eLang = SourceLanguage::Unknown;
1170+
if (categorySym->getName().starts_with(objc::symbol_names::category))
1171+
eLang = SourceLanguage::ObjC;
1172+
else if (categorySym->getName().starts_with(
1173+
objc::symbol_names::swift_objc_category))
1174+
eLang = SourceLanguage::Swift;
1175+
else
1176+
llvm_unreachable("Unexpected category symbol name");
1177+
1178+
InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off, eLang};
1179+
11621180
// Check that the category has a reloc at 'klassOffset' (which is
11631181
// a pointer to the class symbol)
11641182

11651183
Symbol *classSym =
11661184
tryGetSymbolAtIsecOffset(catBodyIsec, catLayout.klassOffset);
11671185
assert(classSym && "Category does not have a valid base class");
11681186

1169-
InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off};
11701187
categoryMap[classSym].push_back(catInputInfo);
11711188

11721189
collectCategoryWriterInfoFromCategory(catInputInfo);
@@ -1366,6 +1383,16 @@ void ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
13661383

13671384
// Collect all the info from the categories
13681385
ClassExtensionInfo extInfo(catLayout);
1386+
extInfo.baseClass = baseClass;
1387+
1388+
if (baseClass->getName().starts_with(objc::symbol_names::klass))
1389+
extInfo.baseClassSourceLanguage = SourceLanguage::ObjC;
1390+
else if (baseClass->getName().starts_with(
1391+
objc::symbol_names::swift_objc_klass))
1392+
extInfo.baseClassSourceLanguage = SourceLanguage::Swift;
1393+
else
1394+
llvm_unreachable("Unexpected base class symbol name");
1395+
13691396
for (auto &catInfo : categories) {
13701397
parseCatInfoToExtInfo(catInfo, extInfo);
13711398
}
@@ -1382,14 +1409,15 @@ void ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
13821409
// Protocol lists are a special case - the same protocol list is in classRo
13831410
// and metaRo, so we only need to parse it once
13841411
parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
1385-
extInfo.protocols);
1412+
extInfo.protocols, extInfo.baseClassSourceLanguage);
13861413

13871414
// Check that the classRo and metaRo protocol lists are identical
1388-
assert(
1389-
parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset) ==
1390-
parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset) &&
1391-
"Category merger expects classRo and metaRo to have the same protocol "
1392-
"list");
1415+
assert(parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
1416+
extInfo.baseClassSourceLanguage) ==
1417+
parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset,
1418+
extInfo.baseClassSourceLanguage) &&
1419+
"Category merger expects classRo and metaRo to have the same protocol "
1420+
"list");
13931421

13941422
parsePointerListInfo(metaIsec, roClassLayout.baseMethodsOffset,
13951423
extInfo.classMethods);

lld/MachO/ObjC.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ constexpr const char categoryClassMethods[] =
3434
constexpr const char categoryProtocols[] = "__OBJC_CATEGORY_PROTOCOLS_$_";
3535

3636
constexpr const char swift_objc_category[] = "__CATEGORY_";
37+
constexpr const char swift_objc_klass[] = "_$s";
3738
} // namespace symbol_names
3839

3940
// Check for duplicate method names within related categories / classes.

0 commit comments

Comments
 (0)