Skip to content

Commit e0bcc78

Browse files
committed
Fix Arm64EC name mangling algorithm
1 parent a6d299d commit e0bcc78

File tree

6 files changed

+113
-12
lines changed

6 files changed

+113
-12
lines changed

llvm/include/llvm/Demangle/Demangle.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#define LLVM_DEMANGLE_DEMANGLE_H
1111

1212
#include <cstddef>
13+
#include <optional>
1314
#include <string>
1415
#include <string_view>
1516

@@ -54,6 +55,9 @@ enum MSDemangleFlags {
5455
char *microsoftDemangle(std::string_view mangled_name, size_t *n_read,
5556
int *status, MSDemangleFlags Flags = MSDF_None);
5657

58+
std::optional<size_t>
59+
getArm64ECInsertionPointInMangledName(std::string_view MangledName);
60+
5761
// Demangles a Rust v0 mangled symbol.
5862
char *rustDemangle(std::string_view MangledName);
5963

llvm/include/llvm/Demangle/MicrosoftDemangle.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#ifndef LLVM_DEMANGLE_MICROSOFTDEMANGLE_H
1010
#define LLVM_DEMANGLE_MICROSOFTDEMANGLE_H
1111

12+
#include "llvm/Demangle/Demangle.h"
1213
#include "llvm/Demangle/MicrosoftDemangleNodes.h"
1314

1415
#include <cassert>
@@ -141,6 +142,9 @@ enum class FunctionIdentifierCodeGroup { Basic, Under, DoubleUnder };
141142
// It has a set of functions to parse mangled symbols into Type instances.
142143
// It also has a set of functions to convert Type instances to strings.
143144
class Demangler {
145+
friend std::optional<size_t>
146+
llvm::getArm64ECInsertionPointInMangledName(std::string_view MangledName);
147+
144148
public:
145149
Demangler() = default;
146150
virtual ~Demangler() = default;

llvm/include/llvm/IR/Mangler.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ std::optional<std::string> getArm64ECDemangledFunctionName(StringRef Name);
6464
/// Check if an ARM64EC function name is mangled.
6565
bool inline isArm64ECMangledFunctionName(StringRef Name) {
6666
return Name[0] == '#' ||
67-
(Name[0] == '?' && Name.find("$$h") != StringRef::npos);
67+
(Name[0] == '?' && Name.find("@$$h") != StringRef::npos);
6868
}
6969

7070
} // End llvm namespace

llvm/lib/Demangle/MicrosoftDemangle.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <array>
2525
#include <cctype>
2626
#include <cstdio>
27+
#include <optional>
2728
#include <string_view>
2829
#include <tuple>
2930

@@ -2428,6 +2429,24 @@ void Demangler::dumpBackReferences() {
24282429
std::printf("\n");
24292430
}
24302431

2432+
std::optional<size_t>
2433+
llvm::getArm64ECInsertionPointInMangledName(std::string_view MangledName) {
2434+
std::string_view ProcessedName{MangledName};
2435+
2436+
// We only support this for MSVC-style C++ symbols.
2437+
if (!consumeFront(ProcessedName, '?'))
2438+
return std::nullopt;
2439+
2440+
// The insertion point is just after the name of the symbol, so parse that to
2441+
// remove it from the processed name.
2442+
Demangler D;
2443+
D.demangleFullyQualifiedSymbolName(ProcessedName);
2444+
if (D.Error)
2445+
return std::nullopt;
2446+
2447+
return MangledName.length() - ProcessedName.length();
2448+
}
2449+
24312450
char *llvm::microsoftDemangle(std::string_view MangledName, size_t *NMangled,
24322451
int *Status, MSDemangleFlags Flags) {
24332452
Demangler D;

llvm/lib/IR/Mangler.cpp

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "llvm/ADT/SmallString.h"
1515
#include "llvm/ADT/StringExtras.h"
1616
#include "llvm/ADT/Twine.h"
17+
#include "llvm/Demangle/Demangle.h"
1718
#include "llvm/IR/DataLayout.h"
1819
#include "llvm/IR/DerivedTypes.h"
1920
#include "llvm/IR/Function.h"
@@ -299,21 +300,17 @@ std::optional<std::string> llvm::getArm64ECMangledFunctionName(StringRef Name) {
299300
return std::optional<std::string>(("#" + Name).str());
300301
}
301302

302-
// Insert the ARM64EC "$$h" tag after the mangled function name.
303+
// If the name contains $$h, then it is already mangled.
303304
if (Name.contains("$$h"))
304305
return std::nullopt;
305-
size_t InsertIdx = Name.find("@@");
306-
size_t ThreeAtSignsIdx = Name.find("@@@");
307-
if (InsertIdx != std::string::npos && InsertIdx != ThreeAtSignsIdx) {
308-
InsertIdx += 2;
309-
} else {
310-
InsertIdx = Name.find("@");
311-
if (InsertIdx != std::string::npos)
312-
InsertIdx++;
313-
}
306+
307+
// Ask the demangler where we should insert "$$h".
308+
auto InsertIdx = getArm64ECInsertionPointInMangledName(Name);
309+
if (!InsertIdx)
310+
return std::nullopt;
314311

315312
return std::optional<std::string>(
316-
(Name.substr(0, InsertIdx) + "$$h" + Name.substr(InsertIdx)).str());
313+
(Name.substr(0, *InsertIdx) + "$$h" + Name.substr(*InsertIdx)).str());
317314
}
318315

319316
std::optional<std::string>

llvm/unittests/IR/ManglerTest.cpp

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,4 +172,81 @@ TEST(ManglerTest, GOFF) {
172172
"L#foo");
173173
}
174174

175+
TEST(ManglerTest, Arm64EC) {
176+
constexpr std::string_view Arm64ECNames[] = {
177+
// Basic C name.
178+
"#Foo",
179+
180+
// Basic C++ name.
181+
"?foo@@$$hYAHXZ",
182+
183+
// Regression test: https://github.com/llvm/llvm-project/issues/115231
184+
"?GetValue@?$Wrapper@UA@@@@$$hQEBAHXZ",
185+
186+
// Symbols from:
187+
// ```
188+
// namespace A::B::C::D {
189+
// struct Base {
190+
// virtual int f() { return 0; }
191+
// };
192+
// }
193+
// struct Derived : public A::B::C::D::Base {
194+
// virtual int f() override { return 1; }
195+
// };
196+
// A::B::C::D::Base* MakeObj() { return new Derived(); }
197+
// ```
198+
// void * __cdecl operator new(unsigned __int64)
199+
"??2@$$hYAPEAX_K@Z",
200+
// public: virtual int __cdecl A::B::C::D::Base::f(void)
201+
"?f@Base@D@C@B@A@@$$hUEAAHXZ",
202+
// public: __cdecl A::B::C::D::Base::Base(void)
203+
"??0Base@D@C@B@A@@$$hQEAA@XZ",
204+
// public: virtual int __cdecl Derived::f(void)
205+
"?f@Derived@@$$hUEAAHXZ",
206+
// public: __cdecl Derived::Derived(void)
207+
"??0Derived@@$$hQEAA@XZ",
208+
// struct A::B::C::D::Base * __cdecl MakeObj(void)
209+
"?MakeObj@@$$hYAPEAUBase@D@C@B@A@@XZ",
210+
211+
// Symbols from:
212+
// ```
213+
// template <typename T> struct WW { struct Z{}; };
214+
// template <typename X> struct Wrapper {
215+
// int GetValue(typename WW<X>::Z) const;
216+
// };
217+
// struct A { };
218+
// template <typename X> int Wrapper<X>::GetValue(typename WW<X>::Z) const
219+
// { return 3; }
220+
// template class Wrapper<A>;
221+
// ```
222+
// public: int __cdecl Wrapper<struct A>::GetValue(struct WW<struct
223+
// A>::Z)const
224+
"?GetValue@?$Wrapper@UA@@@@$$hQEBAHUZ@?$WW@UA@@@@@Z",
225+
};
226+
227+
for (const auto &Arm64ECName : Arm64ECNames) {
228+
// Check that this is a mangled name.
229+
EXPECT_TRUE(isArm64ECMangledFunctionName(Arm64ECName))
230+
<< "Test case: " << Arm64ECName;
231+
// Refuse to mangle it again.
232+
EXPECT_FALSE(getArm64ECMangledFunctionName(Arm64ECName).has_value())
233+
<< "Test case: " << Arm64ECName;
234+
235+
// Demangle.
236+
auto Arm64Name = getArm64ECDemangledFunctionName(Arm64ECName);
237+
EXPECT_TRUE(Arm64Name.has_value()) << "Test case: " << Arm64ECName;
238+
// Check that it is not mangled.
239+
EXPECT_FALSE(isArm64ECMangledFunctionName(Arm64Name.value()))
240+
<< "Test case: " << Arm64ECName;
241+
// Refuse to demangle it again.
242+
EXPECT_FALSE(getArm64ECDemangledFunctionName(Arm64Name.value()).has_value())
243+
<< "Test case: " << Arm64ECName;
244+
245+
// Round-trip.
246+
auto RoundTripArm64ECName =
247+
getArm64ECMangledFunctionName(Arm64Name.value());
248+
EXPECT_EQ(RoundTripArm64ECName, Arm64ECName);
249+
}
250+
}
251+
175252
} // end anonymous namespace

0 commit comments

Comments
 (0)