Skip to content

Commit b88ea0a

Browse files
dpaoliellonikic
authored andcommitted
[llvm][aarch64] Fix Arm64EC name mangling algorithm (llvm#115567)
Arm64EC uses a special name mangling mode that adds `$$h` between the symbol name and its type. In MSVC's name mangling `@` is used to separate the name and type BUT it is also used for other purposes, such as the separator between paths in a fully qualified name. The original algorithm was quite fragile and made assumptions that didn't hold true for all MSVC mangled symbols, so instead of trying to improve this algorithm we are now using the demangler to indicate where the insertion point should be (i.e., to parse the fully-qualified name and return the current string offset). Also fixed `isArm64ECMangledFunctionName` to search for `@$$h` since the `$$h` must always be after a `@`. Fixes llvm#115231
1 parent 08374c2 commit b88ea0a

File tree

6 files changed

+126
-20
lines changed

6 files changed

+126
-20
lines changed

llvm/include/llvm/Demangle/Demangle.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#define LLVM_DEMANGLE_DEMANGLE_H
1111

1212
#include <cstddef>
13+
#include <optional>
1314
#include <string>
1415
#include <string_view>
1516

@@ -54,6 +55,9 @@ enum MSDemangleFlags {
5455
char *microsoftDemangle(std::string_view mangled_name, size_t *n_read,
5556
int *status, MSDemangleFlags Flags = MSDF_None);
5657

58+
std::optional<size_t>
59+
getArm64ECInsertionPointInMangledName(std::string_view MangledName);
60+
5761
// Demangles a Rust v0 mangled symbol.
5862
char *rustDemangle(std::string_view MangledName);
5963

llvm/include/llvm/Demangle/MicrosoftDemangle.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#ifndef LLVM_DEMANGLE_MICROSOFTDEMANGLE_H
1010
#define LLVM_DEMANGLE_MICROSOFTDEMANGLE_H
1111

12+
#include "llvm/Demangle/Demangle.h"
1213
#include "llvm/Demangle/MicrosoftDemangleNodes.h"
1314

1415
#include <cassert>
@@ -141,6 +142,9 @@ enum class FunctionIdentifierCodeGroup { Basic, Under, DoubleUnder };
141142
// It has a set of functions to parse mangled symbols into Type instances.
142143
// It also has a set of functions to convert Type instances to strings.
143144
class Demangler {
145+
friend std::optional<size_t>
146+
llvm::getArm64ECInsertionPointInMangledName(std::string_view MangledName);
147+
144148
public:
145149
Demangler() = default;
146150
virtual ~Demangler() = default;

llvm/include/llvm/IR/Mangler.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,12 @@ void emitLinkerFlagsForUsedCOFF(raw_ostream &OS, const GlobalValue *GV,
5656
std::optional<std::string> getArm64ECMangledFunctionName(StringRef Name);
5757
std::optional<std::string> getArm64ECDemangledFunctionName(StringRef Name);
5858

59+
/// Check if an ARM64EC function name is mangled.
60+
bool inline isArm64ECMangledFunctionName(StringRef Name) {
61+
return Name[0] == '#' ||
62+
(Name[0] == '?' && Name.find("@$$h") != StringRef::npos);
63+
}
64+
5965
} // End llvm namespace
6066

6167
#endif

llvm/lib/Demangle/MicrosoftDemangle.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <array>
2525
#include <cctype>
2626
#include <cstdio>
27+
#include <optional>
2728
#include <string_view>
2829
#include <tuple>
2930

@@ -2424,6 +2425,24 @@ void Demangler::dumpBackReferences() {
24242425
std::printf("\n");
24252426
}
24262427

2428+
std::optional<size_t>
2429+
llvm::getArm64ECInsertionPointInMangledName(std::string_view MangledName) {
2430+
std::string_view ProcessedName{MangledName};
2431+
2432+
// We only support this for MSVC-style C++ symbols.
2433+
if (!consumeFront(ProcessedName, '?'))
2434+
return std::nullopt;
2435+
2436+
// The insertion point is just after the name of the symbol, so parse that to
2437+
// remove it from the processed name.
2438+
Demangler D;
2439+
D.demangleFullyQualifiedSymbolName(ProcessedName);
2440+
if (D.Error)
2441+
return std::nullopt;
2442+
2443+
return MangledName.length() - ProcessedName.length();
2444+
}
2445+
24272446
char *llvm::microsoftDemangle(std::string_view MangledName, size_t *NMangled,
24282447
int *Status, MSDemangleFlags Flags) {
24292448
Demangler D;

llvm/lib/IR/Mangler.cpp

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "llvm/ADT/SmallString.h"
1515
#include "llvm/ADT/StringExtras.h"
1616
#include "llvm/ADT/Twine.h"
17+
#include "llvm/Demangle/Demangle.h"
1718
#include "llvm/IR/DataLayout.h"
1819
#include "llvm/IR/DerivedTypes.h"
1920
#include "llvm/IR/Function.h"
@@ -291,30 +292,25 @@ void llvm::emitLinkerFlagsForUsedCOFF(raw_ostream &OS, const GlobalValue *GV,
291292
}
292293

293294
std::optional<std::string> llvm::getArm64ECMangledFunctionName(StringRef Name) {
294-
bool IsCppFn = Name[0] == '?';
295-
if (IsCppFn && Name.contains("$$h"))
296-
return std::nullopt;
297-
if (!IsCppFn && Name[0] == '#')
295+
if (Name[0] != '?') {
296+
// For non-C++ symbols, prefix the name with "#" unless it's already
297+
// mangled.
298+
if (Name[0] == '#')
299+
return std::nullopt;
300+
return std::optional<std::string>(("#" + Name).str());
301+
}
302+
303+
// If the name contains $$h, then it is already mangled.
304+
if (Name.contains("$$h"))
298305
return std::nullopt;
299306

300-
StringRef Prefix = "$$h";
301-
size_t InsertIdx = 0;
302-
if (IsCppFn) {
303-
InsertIdx = Name.find("@@");
304-
size_t ThreeAtSignsIdx = Name.find("@@@");
305-
if (InsertIdx != std::string::npos && InsertIdx != ThreeAtSignsIdx) {
306-
InsertIdx += 2;
307-
} else {
308-
InsertIdx = Name.find("@");
309-
if (InsertIdx != std::string::npos)
310-
InsertIdx++;
311-
}
312-
} else {
313-
Prefix = "#";
314-
}
307+
// Ask the demangler where we should insert "$$h".
308+
auto InsertIdx = getArm64ECInsertionPointInMangledName(Name);
309+
if (!InsertIdx)
310+
return std::nullopt;
315311

316312
return std::optional<std::string>(
317-
(Name.substr(0, InsertIdx) + Prefix + Name.substr(InsertIdx)).str());
313+
(Name.substr(0, *InsertIdx) + "$$h" + Name.substr(*InsertIdx)).str());
318314
}
319315

320316
std::optional<std::string>

llvm/unittests/IR/ManglerTest.cpp

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,4 +174,81 @@ TEST(ManglerTest, GOFF) {
174174
"L#foo");
175175
}
176176

177+
TEST(ManglerTest, Arm64EC) {
178+
constexpr std::string_view Arm64ECNames[] = {
179+
// Basic C name.
180+
"#Foo",
181+
182+
// Basic C++ name.
183+
"?foo@@$$hYAHXZ",
184+
185+
// Regression test: https://github.com/llvm/llvm-project/issues/115231
186+
"?GetValue@?$Wrapper@UA@@@@$$hQEBAHXZ",
187+
188+
// Symbols from:
189+
// ```
190+
// namespace A::B::C::D {
191+
// struct Base {
192+
// virtual int f() { return 0; }
193+
// };
194+
// }
195+
// struct Derived : public A::B::C::D::Base {
196+
// virtual int f() override { return 1; }
197+
// };
198+
// A::B::C::D::Base* MakeObj() { return new Derived(); }
199+
// ```
200+
// void * __cdecl operator new(unsigned __int64)
201+
"??2@$$hYAPEAX_K@Z",
202+
// public: virtual int __cdecl A::B::C::D::Base::f(void)
203+
"?f@Base@D@C@B@A@@$$hUEAAHXZ",
204+
// public: __cdecl A::B::C::D::Base::Base(void)
205+
"??0Base@D@C@B@A@@$$hQEAA@XZ",
206+
// public: virtual int __cdecl Derived::f(void)
207+
"?f@Derived@@$$hUEAAHXZ",
208+
// public: __cdecl Derived::Derived(void)
209+
"??0Derived@@$$hQEAA@XZ",
210+
// struct A::B::C::D::Base * __cdecl MakeObj(void)
211+
"?MakeObj@@$$hYAPEAUBase@D@C@B@A@@XZ",
212+
213+
// Symbols from:
214+
// ```
215+
// template <typename T> struct WW { struct Z{}; };
216+
// template <typename X> struct Wrapper {
217+
// int GetValue(typename WW<X>::Z) const;
218+
// };
219+
// struct A { };
220+
// template <typename X> int Wrapper<X>::GetValue(typename WW<X>::Z) const
221+
// { return 3; }
222+
// template class Wrapper<A>;
223+
// ```
224+
// public: int __cdecl Wrapper<struct A>::GetValue(struct WW<struct
225+
// A>::Z)const
226+
"?GetValue@?$Wrapper@UA@@@@$$hQEBAHUZ@?$WW@UA@@@@@Z",
227+
};
228+
229+
for (const auto &Arm64ECName : Arm64ECNames) {
230+
// Check that this is a mangled name.
231+
EXPECT_TRUE(isArm64ECMangledFunctionName(Arm64ECName))
232+
<< "Test case: " << Arm64ECName;
233+
// Refuse to mangle it again.
234+
EXPECT_FALSE(getArm64ECMangledFunctionName(Arm64ECName).has_value())
235+
<< "Test case: " << Arm64ECName;
236+
237+
// Demangle.
238+
auto Arm64Name = getArm64ECDemangledFunctionName(Arm64ECName);
239+
EXPECT_TRUE(Arm64Name.has_value()) << "Test case: " << Arm64ECName;
240+
// Check that it is not mangled.
241+
EXPECT_FALSE(isArm64ECMangledFunctionName(Arm64Name.value()))
242+
<< "Test case: " << Arm64ECName;
243+
// Refuse to demangle it again.
244+
EXPECT_FALSE(getArm64ECDemangledFunctionName(Arm64Name.value()).has_value())
245+
<< "Test case: " << Arm64ECName;
246+
247+
// Round-trip.
248+
auto RoundTripArm64ECName =
249+
getArm64ECMangledFunctionName(Arm64Name.value());
250+
EXPECT_EQ(RoundTripArm64ECName, Arm64ECName);
251+
}
252+
}
253+
177254
} // end anonymous namespace

0 commit comments

Comments
 (0)