Skip to content

[lld-macho] icf objc stubs #79730

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Feb 1, 2024
Merged

[lld-macho] icf objc stubs #79730

merged 4 commits into from
Feb 1, 2024

Conversation

kyulee-com
Copy link
Contributor

This supports icf for objc stubs.

@llvmbot
Copy link
Member

llvmbot commented Jan 28, 2024

@llvm/pr-subscribers-lld

@llvm/pr-subscribers-lld-macho

Author: Kyungwoo Lee (kyulee-com)

Changes

This supports icf for objc stubs.


Full diff: https://github.com/llvm/llvm-project/pull/79730.diff

7 Files Affected:

  • (modified) lld/MachO/Driver.cpp (+2-3)
  • (modified) lld/MachO/SyntheticSections.cpp (+71-11)
  • (modified) lld/MachO/SyntheticSections.h (+9-1)
  • (modified) lld/MachO/Writer.cpp (+10-1)
  • (added) lld/test/MachO/arm64-objc-stubs-dead.s (+27)
  • (modified) lld/test/MachO/objc-selrefs.s (-3)
  • (modified) lld/test/MachO/x86-64-objc-stubs.s (+10)
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 411fbcfcf233eb8..519bd483dacb688 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1275,11 +1275,10 @@ static void foldIdenticalLiterals() {
 static void addSynthenticMethnames() {
   std::string &data = *make<std::string>();
   llvm::raw_string_ostream os(data);
-  const int prefixLength = ObjCStubsSection::symbolPrefix.size();
   for (Symbol *sym : symtab->getSymbols())
     if (isa<Undefined>(sym))
-      if (sym->getName().starts_with(ObjCStubsSection::symbolPrefix))
-        os << sym->getName().drop_front(prefixLength) << '\0';
+      if (ObjCStubsSection::isObjCStubSymbol(sym))
+        os << ObjCStubsSection::getMethName(sym) << '\0';
 
   if (data.empty())
     return;
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 53220ad04b842c1..d3674a2b2c5dcc5 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -814,11 +814,54 @@ ObjCStubsSection::ObjCStubsSection()
               : target->objcStubsSmallAlignment;
 }
 
+bool ObjCStubsSection::isObjCStubSymbol(Symbol *sym) {
+  return sym->getName().starts_with(symbolPrefix);
+}
+
+StringRef ObjCStubsSection::getMethName(Symbol *sym) {
+  assert(isObjCStubSymbol(sym) && "not an objc stub");
+  auto name = sym->getName();
+  StringRef methname = name.drop_front(symbolPrefix.size());
+  return methname;
+}
+
+void ObjCStubsSection::initialize() {
+  // Do not fold selrefs without ICF.
+  if (config->icfLevel == ICFLevel::none)
+    return;
+
+  // Search methnames already referenced in __objc_selrefs
+  // Map the name to the corresponding selref entry
+  // which we will reuse when creating objc stubs.
+  for (ConcatInputSection *isec : inputSections) {
+    if (isec->shouldOmitFromOutput())
+      continue;
+    if (isec->getName() != "__objc_selrefs")
+      continue;
+    // We expect a single relocation per selref entry to __objc_methname that
+    // might be aggregated.
+    assert(isec->relocs.size() == 1);
+    auto Reloc = isec->relocs[0];
+    if (const auto *sym = Reloc.referent.dyn_cast<Symbol *>()) {
+      if (const auto *d = dyn_cast<Defined>(sym)) {
+        auto *cisec = cast<CStringInputSection>(d->isec);
+        auto methname = cisec->getStringRefAtOffset(d->value);
+        methnameToselref[methname] = isec;
+      }
+    }
+  }
+}
+
 void ObjCStubsSection::addEntry(Symbol *sym) {
-  assert(sym->getName().starts_with(symbolPrefix) && "not an objc stub");
-  StringRef methname = sym->getName().drop_front(symbolPrefix.size());
-  offsets.push_back(
-      in.objcMethnameSection->getStringOffset(methname).outSecOff);
+  StringRef methname = getMethName(sym);
+  // We will create a selref entry for each unique methname.
+  if (!methnameToselref.count(methname) &&
+      !methnameToidxOffsetPair.count(methname)) {
+    auto methnameOffset =
+        in.objcMethnameSection->getStringOffset(methname).outSecOff;
+    auto selIndex = methnameToidxOffsetPair.size();
+    methnameToidxOffsetPair[methname] = {selIndex, methnameOffset};
+  }
 
   auto stubSize = config->objcStubsMode == ObjCStubsMode::fast
                       ? target->objcStubsFastSize
@@ -853,10 +896,12 @@ void ObjCStubsSection::setUp() {
       in.stubs->addEntry(objcMsgSend);
   }
 
-  size_t size = offsets.size() * target->wordSize;
+  size_t size = methnameToidxOffsetPair.size() * target->wordSize;
   uint8_t *selrefsData = bAlloc().Allocate<uint8_t>(size);
-  for (size_t i = 0, n = offsets.size(); i < n; ++i)
-    write64le(&selrefsData[i * target->wordSize], offsets[i]);
+  for (const auto &[methname, idxOffsetPair] : methnameToidxOffsetPair) {
+    const auto &[idx, offset] = idxOffsetPair;
+    write64le(&selrefsData[idx * target->wordSize], offset);
+  }
 
   in.objcSelrefs =
       makeSyntheticInputSection(segment_names::data, section_names::objcSelrefs,
@@ -865,12 +910,13 @@ void ObjCStubsSection::setUp() {
                                 /*align=*/target->wordSize);
   in.objcSelrefs->live = true;
 
-  for (size_t i = 0, n = offsets.size(); i < n; ++i) {
+  for (const auto &[methname, idxOffsetPair] : methnameToidxOffsetPair) {
+    const auto &[idx, offset] = idxOffsetPair;
     in.objcSelrefs->relocs.push_back(
         {/*type=*/target->unsignedRelocType,
          /*pcrel=*/false, /*length=*/3,
-         /*offset=*/static_cast<uint32_t>(i * target->wordSize),
-         /*addend=*/offsets[i] * in.objcMethnameSection->align,
+         /*offset=*/static_cast<uint32_t>(idx * target->wordSize),
+         /*addend=*/offset * in.objcMethnameSection->align,
          /*referent=*/in.objcMethnameSection->isec});
   }
 
@@ -894,8 +940,22 @@ void ObjCStubsSection::writeTo(uint8_t *buf) const {
   uint64_t stubOffset = 0;
   for (size_t i = 0, n = symbols.size(); i < n; ++i) {
     Defined *sym = symbols[i];
+    uint64_t selBaseAddr;
+    uint64_t selIndex;
+
+    auto methname = getMethName(sym);
+    auto j = methnameToselref.find(methname);
+    if (j != methnameToselref.end()) {
+      selBaseAddr = j->second->getVA(0);
+      selIndex = 0;
+    } else {
+      auto k = methnameToidxOffsetPair.find(methname);
+      assert(k != methnameToIdxOffsetPair.end());
+      selBaseAddr = in.objcSelrefs->getVA();
+      selIndex = k->second.first;
+    }
     target->writeObjCMsgSendStub(buf + stubOffset, sym, in.objcStubs->addr,
-                                 stubOffset, in.objcSelrefs->getVA(), i,
+                                 stubOffset, selBaseAddr, selIndex,
                                  objcMsgSend);
   }
 }
diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index 5fb7b6e09e8e63e..00944ebfaee5eed 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -324,6 +324,7 @@ class StubHelperSection final : public SyntheticSection {
 class ObjCStubsSection final : public SyntheticSection {
 public:
   ObjCStubsSection();
+  void initialize();
   void addEntry(Symbol *sym);
   uint64_t getSize() const override;
   bool isNeeded() const override { return !symbols.empty(); }
@@ -332,10 +333,17 @@ class ObjCStubsSection final : public SyntheticSection {
   void setUp();
 
   static constexpr llvm::StringLiteral symbolPrefix = "_objc_msgSend$";
+  static bool isObjCStubSymbol(Symbol *sym);
+  static StringRef getMethName(Symbol *sym);
 
 private:
   std::vector<Defined *> symbols;
-  std::vector<uint32_t> offsets;
+  // Existing mapping from methname to selref (0 index is assumed).
+  llvm::StringMap<InputSection *> methnameToselref;
+  // Newly created mapping from methname to the pair of index (selref) and offset
+  // (methname).
+  llvm::MapVector<StringRef, std::pair<uint32_t, uint32_t>>
+      methnameToidxOffsetPair;
   Symbol *objcMsgSend = nullptr;
 };
 
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 1b0e64abe843adf..65b598d1d7c422a 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -720,6 +720,7 @@ static void addNonWeakDefinition(const Defined *defined) {
 
 void Writer::scanSymbols() {
   TimeTraceScope timeScope("Scan symbols");
+  in.objcStubs->initialize();
   for (Symbol *sym : symtab->getSymbols()) {
     if (auto *defined = dyn_cast<Defined>(sym)) {
       if (!defined->isLive())
@@ -736,8 +737,16 @@ void Writer::scanSymbols() {
       dysym->getFile()->refState =
           std::max(dysym->getFile()->refState, dysym->getRefState());
     } else if (isa<Undefined>(sym)) {
-      if (sym->getName().starts_with(ObjCStubsSection::symbolPrefix))
+      if (ObjCStubsSection::isObjCStubSymbol(sym)) {
+        // When -dead_strip is enabled, we don't want to emit any dead stubs.
+        // Although this stub symbol is yet undefined, addSym() was called
+        // during MarkLive.
+        if (config->deadStrip) {
+          if (!sym->isLive())
+            continue;
+        }
         in.objcStubs->addEntry(sym);
+      }
     }
   }
 
diff --git a/lld/test/MachO/arm64-objc-stubs-dead.s b/lld/test/MachO/arm64-objc-stubs-dead.s
new file mode 100644
index 000000000000000..5dcb171c17eac58
--- /dev/null
+++ b/lld/test/MachO/arm64-objc-stubs-dead.s
@@ -0,0 +1,27 @@
+# REQUIRES: aarch64
+
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t.o
+
+# RUN: %lld -arch arm64 -lSystem -U _objc_msgSend -o %t.out %t.o
+# RUN: llvm-nm %t.out | FileCheck %s
+# RUN: %lld -arch arm64 -lSystem -U _objc_msgSend -dead_strip -o %t.out %t.o
+# RUN: llvm-nm %t.out | FileCheck %s --check-prefix=DEAD
+
+# CHECK: _foo
+# CHECK: _objc_msgSend$length
+
+# DEAD-NOT: _foo
+# DEAD-NOT: _objc_msgSend$length
+
+.section __TEXT,__text
+
+.globl _foo
+_foo:
+  bl  _objc_msgSend$length
+  ret
+
+.globl _main
+_main:
+  ret
+
+.subsections_via_symbols
diff --git a/lld/test/MachO/objc-selrefs.s b/lld/test/MachO/objc-selrefs.s
index 9dff440727ac34b..6d144f4938b4542 100644
--- a/lld/test/MachO/objc-selrefs.s
+++ b/lld/test/MachO/objc-selrefs.s
@@ -24,7 +24,6 @@
 # SELREFS-NEXT: __TEXT:__objc_methname:length
 # SELREFS-EMPTY:
 
-## We don't yet support dedup'ing implicitly-defined selrefs.
 # RUN: %lld -dylib -arch arm64 -lSystem --icf=all -o %t/explicit-and-implicit \
 # RUN:   %t/explicit-selrefs-1.o %t/explicit-selrefs-2.o %t/implicit-selrefs.o
 # RUN: llvm-otool -vs __DATA __objc_selrefs %t/explicit-and-implicit \
@@ -43,8 +42,6 @@
 # EXPLICIT-AND-IMPLICIT:      Contents of (__DATA,__objc_selrefs) section
 # EXPLICIT-AND-IMPLICIT-NEXT: __TEXT:__objc_methname:foo
 # EXPLICIT-AND-IMPLICIT-NEXT: __TEXT:__objc_methname:bar
-# NOTE: Ideally this wouldn't exist, but while it does it needs to point to the deduplicated string
-# EXPLICIT-AND-IMPLICIT-NEXT: __TEXT:__objc_methname:foo
 # EXPLICIT-AND-IMPLICIT-NEXT: __TEXT:__objc_methname:length
 
 #--- explicit-selrefs-1.s
diff --git a/lld/test/MachO/x86-64-objc-stubs.s b/lld/test/MachO/x86-64-objc-stubs.s
index 2dd8d559377156b..1001d94b8d2cd12 100644
--- a/lld/test/MachO/x86-64-objc-stubs.s
+++ b/lld/test/MachO/x86-64-objc-stubs.s
@@ -10,6 +10,9 @@
 
 # WARNING: warning: -objc_stubs_small is not yet implemented, defaulting to -objc_stubs_fast
 
+# RUN: %lld -arch x86_64 -lSystem -o %t-icf.out --icf=all %t.o
+# RUN: llvm-otool -vs __DATA __objc_selrefs %t-icf.out | FileCheck %s --check-prefix=ICF
+
 # CHECK: Sections:
 # CHECK: __got            {{[0-9a-f]*}} [[#%x, GOTSTART:]] DATA
 # CHECK: __objc_selrefs   {{[0-9a-f]*}} [[#%x, SELSTART:]] DATA
@@ -21,6 +24,13 @@
 # CHECK-NEXT: [[#%x, FOOSELREF:]]  __TEXT:__objc_methname:foo
 # CHECK-NEXT: [[#%x, LENGTHSELREF:]]  __TEXT:__objc_methname:length
 
+# ICF: Contents of (__DATA,__objc_selrefs) section
+
+# ICF-NEXT: {{[0-9a-f]*}}  __TEXT:__objc_methname:foo
+# ICF-NEXT: {{[0-9a-f]*}}  __TEXT:__objc_methname:bar
+# ICF-NEXT: {{[0-9a-f]*}}  __TEXT:__objc_methname:length
+# ICF-EMPTY:
+
 # CHECK: Contents of (__TEXT,__objc_stubs) section
 
 # CHECK-NEXT: _objc_msgSend$foo:

Copy link

github-actions bot commented Jan 28, 2024

✅ With the latest revision this PR passed the C/C++ code formatter.

@kyulee-com
Copy link
Contributor Author

kyulee-com commented Jan 28, 2024

The first commit it depends on is actually #79726.
I want to send separate PRs, but not sure how to stack them.

@kyulee-com kyulee-com force-pushed the icfstub branch 2 times, most recently from b7d9c36 to e1a8f42 Compare January 28, 2024 07:11
@kyulee-com
Copy link
Contributor Author

@int3 Can you take a look again, please?

@int3
Copy link
Contributor

int3 commented Feb 1, 2024

lgtm!

@kyulee-com kyulee-com merged commit 3913931 into llvm:main Feb 1, 2024
agozillon pushed a commit to agozillon/llvm-project that referenced this pull request Feb 5, 2024
This supports icf for objc stubs.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants