Skip to content

[lld-macho][ObjC] Implement category merging into base class #92448

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 140 additions & 13 deletions lld/MachO/ObjC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -379,8 +379,8 @@ class ObjcCategoryMerger {
InfoWriteSection catPtrListInfo;
};

// Information about a pointer list in the original categories (method lists,
// protocol lists, etc)
// Information about a pointer list in the original categories or class(method
// lists, protocol lists, etc)
struct PointerListInfo {
PointerListInfo(const char *_categoryPrefix, uint32_t _pointersPerStruct)
: categoryPrefix(_categoryPrefix),
Expand All @@ -395,9 +395,9 @@ class ObjcCategoryMerger {
std::vector<Symbol *> allPtrs;
};

// Full information about all the categories that extend a class. This will
// include all the additional methods, protocols, and properties that are
// contained in all the categories that extend a particular class.
// Full information describing an ObjC class . This will include all the
// additional methods, protocols, and properties that are contained in the
// class and all the categories that extend a particular class.
struct ClassExtensionInfo {
ClassExtensionInfo(CategoryLayout &_catLayout) : catLayout(_catLayout){};

Expand Down Expand Up @@ -456,9 +456,9 @@ class ObjcCategoryMerger {
const ClassExtensionInfo &extInfo,
const PointerListInfo &ptrList);

void emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset,
const ClassExtensionInfo &extInfo,
const PointerListInfo &ptrList);
Defined *emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset,
const ClassExtensionInfo &extInfo,
const PointerListInfo &ptrList);

Defined *emitCategory(const ClassExtensionInfo &extInfo);
Defined *emitCatListEntrySec(const std::string &forCategoryName,
Expand All @@ -474,6 +474,10 @@ class ObjcCategoryMerger {
uint32_t offset);
Defined *tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
uint32_t offset);
Defined *getClassRo(const Defined *classSym, bool getMetaRo);
void mergeCategoriesIntoBaseClass(const Defined *baseClass,
std::vector<InfoInputCategory> &categories);
void eraseSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset);
void tryEraseDefinedAtIsecOffset(const ConcatInputSection *isec,
uint32_t offset);

Expand Down Expand Up @@ -552,6 +556,29 @@ ObjcCategoryMerger::tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
return dyn_cast_or_null<Defined>(sym);
}

// Get the class's ro_data symbol. If getMetaRo is true, then we will return
// the meta-class's ro_data symbol. Otherwise, we will return the class
// (instance) ro_data symbol.
Defined *ObjcCategoryMerger::getClassRo(const Defined *classSym,
bool getMetaRo) {
ConcatInputSection *isec = dyn_cast<ConcatInputSection>(classSym->isec());
if (!isec)
return nullptr;

if (!getMetaRo)
return tryGetDefinedAtIsecOffset(isec, classLayout.roDataOffset +
classSym->value);

Defined *metaClass = tryGetDefinedAtIsecOffset(
isec, classLayout.metaClassOffset + classSym->value);
if (!metaClass)
return nullptr;

return tryGetDefinedAtIsecOffset(
dyn_cast<ConcatInputSection>(metaClass->isec()),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can it be null? Then tryGetDefinedAtIsecOffset didn't seem to check it.
Otherwise, can we make it a static case, or adding an assertion?

Copy link
Contributor Author

@alx32 alx32 May 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tryGetDefinedAtIsecOffset does check it for null. It does tryGetSymbolAtIsecOffset(which checks for null) + dyn_cast_or_null (also checks for null).

classLayout.roDataOffset);
}

// Given an ConcatInputSection or CStringInputSection and an offset, if there is
// a symbol(Defined) at that offset, then erase the symbol (mark it not live)
void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(
Expand Down Expand Up @@ -769,11 +796,11 @@ void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,

// Generate a protocol list (including header) and link it into the parent at
// the specified offset.
void ObjcCategoryMerger::emitAndLinkProtocolList(
Defined *ObjcCategoryMerger::emitAndLinkProtocolList(
Defined *parentSym, uint32_t linkAtOffset,
const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) {
if (ptrList.allPtrs.empty())
return;
return nullptr;

assert(ptrList.allPtrs.size() == ptrList.structCount);

Expand Down Expand Up @@ -820,6 +847,8 @@ void ObjcCategoryMerger::emitAndLinkProtocolList(
infoCategoryWriter.catPtrListInfo.relocTemplate);
offset += target->wordSize;
}

return ptrListSym;
}

// Generate a pointer list (including header) and link it into the parent at the
Expand Down Expand Up @@ -1265,10 +1294,15 @@ void ObjcCategoryMerger::removeRefsToErasedIsecs() {
void ObjcCategoryMerger::doMerge() {
collectAndValidateCategoriesData();

for (auto &entry : categoryMap)
if (entry.second.size() > 1)
for (auto &[baseClass, catInfos] : categoryMap) {
if (auto *baseClassDef = dyn_cast<Defined>(baseClass)) {
// Merge all categories into the base class
mergeCategoriesIntoBaseClass(baseClassDef, catInfos);
} else if (catInfos.size() > 1) {
// Merge all categories into a new, single category
mergeCategoriesIntoSingleCategory(entry.second);
mergeCategoriesIntoSingleCategory(catInfos);
}
}

// Erase all categories that were merged
eraseMergedCategories();
Expand Down Expand Up @@ -1302,3 +1336,96 @@ void objc::mergeCategories() {
}

void objc::doCleanup() { ObjcCategoryMerger::doCleanup(); }

void ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
const Defined *baseClass, std::vector<InfoInputCategory> &categories) {
assert(categories.size() >= 1 && "Expected at least one category to merge");

// Collect all the info from the categories
ClassExtensionInfo extInfo(catLayout);
for (auto &catInfo : categories) {
parseCatInfoToExtInfo(catInfo, extInfo);
}

// Get metadata for the base class
Defined *metaRo = getClassRo(baseClass, /*getMetaRo=*/true);
ConcatInputSection *metaIsec = dyn_cast<ConcatInputSection>(metaRo->isec());
Defined *classRo = getClassRo(baseClass, /*getMetaRo=*/false);
ConcatInputSection *classIsec = dyn_cast<ConcatInputSection>(classRo->isec());

// Now collect the info from the base class from the various lists in the
// class metadata
parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
extInfo.protocols);

parsePointerListInfo(metaIsec, roClassLayout.baseMethodsOffset,
extInfo.classMethods);

parsePointerListInfo(metaIsec, roClassLayout.basePropertiesOffset,
extInfo.classProps);

parsePointerListInfo(classIsec, roClassLayout.baseMethodsOffset,
extInfo.instanceMethods);

parsePointerListInfo(classIsec, roClassLayout.basePropertiesOffset,
extInfo.instanceProps);

// Erase the old lists - these will be generated and replaced
eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseMethodsOffset);
eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseProtocolsOffset);
eraseSymbolAtIsecOffset(metaIsec, roClassLayout.basePropertiesOffset);
eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseMethodsOffset);
eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseProtocolsOffset);
eraseSymbolAtIsecOffset(classIsec, roClassLayout.basePropertiesOffset);

// Emit the newly merged lists - first into the meta RO then into the class RO
emitAndLinkPointerList(metaRo, roClassLayout.baseMethodsOffset, extInfo,
extInfo.classMethods);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code pattern seems unfortunate from the previous/existing work.
All these repetitive operations are related to extInfo and its fields classMethods, ...
I think the right abstraction is to do these operations in ClassExtensionInfo so that we can call extInfo.emitAndLinkPointerList(metaRO, roClassLayout) here.
I guess we might do the similar thing for parsePointerListInfo as it's about collecting info into ClassExtensInfo. Basically, it'd be much cleaner by implementing reader/writer parts within it and enumerate each fields.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and enumerate each fields.

How should the enumeration look like ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it's too disruptive at this moment in this change. I'm just saying with this mergeCategoriesIntoBaseClass introduction, I'm seeing a lot of repetition from the previous mergeCategoriesIntoSingleCategory case. At a high-level, I'd like to see more structured data structure -- this main merger class has two components that are (1) a reader/parser which produces extInfo and (2) a writer which writes extInfo. Currently it appears all data structures are flattened, and accessed across different fields.


// Protocols are a special case - the single list is referenced by both the
// class RO and meta RO. Here we emit it and link it into the meta RO
Defined *protoListSym = emitAndLinkProtocolList(
metaRo, roClassLayout.baseProtocolsOffset, extInfo, extInfo.protocols);

emitAndLinkPointerList(metaRo, roClassLayout.basePropertiesOffset, extInfo,
extInfo.classProps);

emitAndLinkPointerList(classRo, roClassLayout.baseMethodsOffset, extInfo,
extInfo.instanceMethods);

// If we emitted a new protocol list, link it to the class RO also
if (protoListSym) {
createSymbolReference(classRo, protoListSym,
roClassLayout.baseProtocolsOffset,
infoCategoryWriter.catBodyInfo.relocTemplate);
}

emitAndLinkPointerList(classRo, roClassLayout.basePropertiesOffset, extInfo,
extInfo.instanceProps);

// Mark all the categories as merged - this will be used to erase them later
for (auto &catInfo : categories)
catInfo.wasMerged = true;
}

// Erase the symbol at a given offset in an InputSection
void ObjcCategoryMerger::eraseSymbolAtIsecOffset(ConcatInputSection *isec,
uint32_t offset) {
Defined *sym = tryGetDefinedAtIsecOffset(isec, offset);
if (!sym)
return;

// Remove the symbol from isec->symbols
assert(isa<Defined>(sym) && "Can only erase a Defined");
llvm::erase(isec->symbols, sym);

// Remove the relocs that refer to this symbol
auto removeAtOff = [offset](Reloc const &r) { return r.offset == offset; };
llvm::erase_if(isec->relocs, removeAtOff);

// Now, if the symbol fully occupies a ConcatInputSection, we can also erase
// the whole ConcatInputSection
if (ConcatInputSection *cisec = dyn_cast<ConcatInputSection>(sym->isec()))
if (cisec->data.size() == sym->size)
eraseISec(cisec);
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# REQUIRES: aarch64
# RUN: rm -rf %t; split-file %s %t && cd %t

## Create a dylib with a fake base class to link against
############ Test merging multiple categories into a single category ############
## Create a dylib with a fake base class to link against in when merging between categories
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o a64_fakedylib.o a64_fakedylib.s
# RUN: %lld -arch arm64 a64_fakedylib.o -o a64_fakedylib.dylib -dylib

Expand All @@ -14,6 +15,15 @@
# RUN: llvm-objdump --objc-meta-data --macho merge_cat_minimal_no_merge.dylib | FileCheck %s --check-prefixes=NO_MERGE_CATS
# RUN: llvm-objdump --objc-meta-data --macho merge_cat_minimal_merge.dylib | FileCheck %s --check-prefixes=MERGE_CATS

############ Test merging multiple categories into the base class ############
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o merge_base_class_minimal.o merge_base_class_minimal.s
# RUN: %lld -arch arm64 -dylib -o merge_base_class_minimal_yes_merge.dylib -objc_category_merging merge_base_class_minimal.o merge_cat_minimal.o
# RUN: %lld -arch arm64 -dylib -o merge_base_class_minimal_no_merge.dylib merge_base_class_minimal.o merge_cat_minimal.o

# RUN: llvm-objdump --objc-meta-data --macho merge_base_class_minimal_no_merge.dylib | FileCheck %s --check-prefixes=NO_MERGE_INTO_BASE
# RUN: llvm-objdump --objc-meta-data --macho merge_base_class_minimal_yes_merge.dylib | FileCheck %s --check-prefixes=YES_MERGE_INTO_BASE


#### Check merge categories enabled ###
# Check that the original categories are not there
MERGE_CATS-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category01
Expand Down Expand Up @@ -44,6 +54,28 @@ NO_MERGE_CATS: __OBJC_$_CATEGORY_MyBaseClass_$_Category01
NO_MERGE_CATS: __OBJC_$_CATEGORY_MyBaseClass_$_Category02


#### Check merge cateogires into base class is disabled ####
NO_MERGE_INTO_BASE: __OBJC_$_CATEGORY_MyBaseClass_$_Category01
NO_MERGE_INTO_BASE: __OBJC_$_CATEGORY_MyBaseClass_$_Category02

#### Check merge cateogires into base class is enabled and categories are merged into base class ####
YES_MERGE_INTO_BASE-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category01
YES_MERGE_INTO_BASE-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category02

YES_MERGE_INTO_BASE: _OBJC_CLASS_$_MyBaseClass
YES_MERGE_INTO_BASE-NEXT: _OBJC_METACLASS_$_MyBaseClass
YES_MERGE_INTO_BASE: baseMethods
YES_MERGE_INTO_BASE-NEXT: entsize 24
YES_MERGE_INTO_BASE-NEXT: count 3
YES_MERGE_INTO_BASE-NEXT: name {{.*}} cat01_InstanceMethod
YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16@0:8
YES_MERGE_INTO_BASE-NEXT: imp -[MyBaseClass(Category01) cat01_InstanceMethod]
YES_MERGE_INTO_BASE-NEXT: name {{.*}} cat02_InstanceMethod
YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16@0:8
YES_MERGE_INTO_BASE-NEXT: imp -[MyBaseClass(Category02) cat02_InstanceMethod]
YES_MERGE_INTO_BASE-NEXT: name {{.*}} baseInstanceMethod
YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16@0:8
YES_MERGE_INTO_BASE-NEXT: imp -[MyBaseClass baseInstanceMethod]

#--- a64_fakedylib.s

Expand Down Expand Up @@ -156,3 +188,94 @@ L_OBJC_IMAGE_INFO:

.addrsig
.addrsig_sym __OBJC_$_CATEGORY_MyBaseClass_$_Category01

#--- merge_base_class_minimal.s
; clang -c merge_base_class_minimal.mm -O3 -target arm64-apple-macos -arch arm64 -S -o merge_base_class_minimal.s
; ================== Generated from ObjC: ==================
; __attribute__((objc_root_class))
; @interface MyBaseClass
; - (void)baseInstanceMethod;
; @end
;
; @implementation MyBaseClass
; - (void)baseInstanceMethod {}
; @end
; ================== Generated from ObjC ==================
.section __TEXT,__text,regular,pure_instructions
.build_version macos, 11, 0
.p2align 2
"-[MyBaseClass baseInstanceMethod]":
.cfi_startproc
; %bb.0:
ret
.cfi_endproc
.section __DATA,__objc_data
.globl _OBJC_CLASS_$_MyBaseClass
.p2align 3, 0x0
_OBJC_CLASS_$_MyBaseClass:
.quad _OBJC_METACLASS_$_MyBaseClass
.quad 0
.quad 0
.quad 0
.quad __OBJC_CLASS_RO_$_MyBaseClass
.globl _OBJC_METACLASS_$_MyBaseClass
.p2align 3, 0x0
_OBJC_METACLASS_$_MyBaseClass:
.quad _OBJC_METACLASS_$_MyBaseClass
.quad _OBJC_CLASS_$_MyBaseClass
.quad 0
.quad 0
.quad __OBJC_METACLASS_RO_$_MyBaseClass
.section __TEXT,__objc_classname,cstring_literals
l_OBJC_CLASS_NAME_:
.asciz "MyBaseClass"
.section __DATA,__objc_const
.p2align 3, 0x0
__OBJC_METACLASS_RO_$_MyBaseClass:
.long 3
.long 40
.long 40
.space 4
.quad 0
.quad l_OBJC_CLASS_NAME_
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.section __TEXT,__objc_methname,cstring_literals
l_OBJC_METH_VAR_NAME_:
.asciz "baseInstanceMethod"
.section __TEXT,__objc_methtype,cstring_literals
l_OBJC_METH_VAR_TYPE_:
.asciz "v16@0:8"
.section __DATA,__objc_const
.p2align 3, 0x0
__OBJC_$_INSTANCE_METHODS_MyBaseClass:
.long 24
.long 1
.quad l_OBJC_METH_VAR_NAME_
.quad l_OBJC_METH_VAR_TYPE_
.quad "-[MyBaseClass baseInstanceMethod]"
.p2align 3, 0x0
__OBJC_CLASS_RO_$_MyBaseClass:
.long 2
.long 0
.long 0
.space 4
.quad 0
.quad l_OBJC_CLASS_NAME_
.quad __OBJC_$_INSTANCE_METHODS_MyBaseClass
.quad 0
.quad 0
.quad 0
.quad 0
.section __DATA,__objc_classlist,regular,no_dead_strip
.p2align 3, 0x0
l_OBJC_LABEL_CLASS_$:
.quad _OBJC_CLASS_$_MyBaseClass
.section __DATA,__objc_imageinfo,regular,no_dead_strip
L_OBJC_IMAGE_INFO:
.long 0
.long 64
.subsections_via_symbols
Loading