xref: /freebsd-src/contrib/llvm-project/llvm/lib/Demangle/DLangDemangle.cpp (revision 0eae32dcef82f6f06de6419a0d623d7def0cc8f6)
1349cc55cSDimitry Andric //===--- DLangDemangle.cpp ------------------------------------------------===//
2349cc55cSDimitry Andric //
3349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6349cc55cSDimitry Andric //
7349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
8349cc55cSDimitry Andric ///
9349cc55cSDimitry Andric /// \file
10349cc55cSDimitry Andric /// This file defines a demangler for the D programming language as specified
11349cc55cSDimitry Andric /// in the ABI specification, available at:
12349cc55cSDimitry Andric /// https://dlang.org/spec/abi.html#name_mangling
13349cc55cSDimitry Andric ///
14349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
15349cc55cSDimitry Andric 
16349cc55cSDimitry Andric #include "llvm/Demangle/Demangle.h"
174824e7fdSDimitry Andric #include "llvm/Demangle/StringView.h"
18349cc55cSDimitry Andric #include "llvm/Demangle/Utility.h"
19349cc55cSDimitry Andric 
204824e7fdSDimitry Andric #include <cctype>
21349cc55cSDimitry Andric #include <cstring>
224824e7fdSDimitry Andric #include <limits>
23349cc55cSDimitry Andric 
24349cc55cSDimitry Andric using namespace llvm;
25349cc55cSDimitry Andric using llvm::itanium_demangle::OutputBuffer;
264824e7fdSDimitry Andric using llvm::itanium_demangle::StringView;
274824e7fdSDimitry Andric 
284824e7fdSDimitry Andric namespace {
294824e7fdSDimitry Andric 
304824e7fdSDimitry Andric /// Demangle information structure.
314824e7fdSDimitry Andric struct Demangler {
324824e7fdSDimitry Andric   /// Initialize the information structure we use to pass around information.
334824e7fdSDimitry Andric   ///
344824e7fdSDimitry Andric   /// \param Mangled String to demangle.
354824e7fdSDimitry Andric   Demangler(const char *Mangled);
364824e7fdSDimitry Andric 
374824e7fdSDimitry Andric   /// Extract and demangle the mangled symbol and append it to the output
384824e7fdSDimitry Andric   /// string.
394824e7fdSDimitry Andric   ///
404824e7fdSDimitry Andric   /// \param Demangled Output buffer to write the demangled name.
414824e7fdSDimitry Andric   ///
424824e7fdSDimitry Andric   /// \return The remaining string on success or nullptr on failure.
434824e7fdSDimitry Andric   ///
444824e7fdSDimitry Andric   /// \see https://dlang.org/spec/abi.html#name_mangling .
454824e7fdSDimitry Andric   /// \see https://dlang.org/spec/abi.html#MangledName .
464824e7fdSDimitry Andric   const char *parseMangle(OutputBuffer *Demangled);
474824e7fdSDimitry Andric 
484824e7fdSDimitry Andric private:
494824e7fdSDimitry Andric   /// Extract and demangle a given mangled symbol and append it to the output
504824e7fdSDimitry Andric   /// string.
514824e7fdSDimitry Andric   ///
524824e7fdSDimitry Andric   /// \param Demangled output buffer to write the demangled name.
534824e7fdSDimitry Andric   /// \param Mangled mangled symbol to be demangled.
544824e7fdSDimitry Andric   ///
554824e7fdSDimitry Andric   /// \return The remaining string on success or nullptr on failure.
564824e7fdSDimitry Andric   ///
574824e7fdSDimitry Andric   /// \see https://dlang.org/spec/abi.html#name_mangling .
584824e7fdSDimitry Andric   /// \see https://dlang.org/spec/abi.html#MangledName .
594824e7fdSDimitry Andric   const char *parseMangle(OutputBuffer *Demangled, const char *Mangled);
604824e7fdSDimitry Andric 
614824e7fdSDimitry Andric   /// Extract the number from a given string.
624824e7fdSDimitry Andric   ///
634824e7fdSDimitry Andric   /// \param Mangled string to extract the number.
644824e7fdSDimitry Andric   /// \param Ret assigned result value.
654824e7fdSDimitry Andric   ///
664824e7fdSDimitry Andric   /// \return The remaining string on success or nullptr on failure.
674824e7fdSDimitry Andric   ///
684824e7fdSDimitry Andric   /// \note A result larger than UINT_MAX is considered a failure.
694824e7fdSDimitry Andric   ///
704824e7fdSDimitry Andric   /// \see https://dlang.org/spec/abi.html#Number .
714824e7fdSDimitry Andric   const char *decodeNumber(const char *Mangled, unsigned long *Ret);
724824e7fdSDimitry Andric 
734824e7fdSDimitry Andric   /// Check whether it is the beginning of a symbol name.
744824e7fdSDimitry Andric   ///
754824e7fdSDimitry Andric   /// \param Mangled string to extract the symbol name.
764824e7fdSDimitry Andric   ///
774824e7fdSDimitry Andric   /// \return true on success, false otherwise.
784824e7fdSDimitry Andric   ///
794824e7fdSDimitry Andric   /// \see https://dlang.org/spec/abi.html#SymbolName .
804824e7fdSDimitry Andric   bool isSymbolName(const char *Mangled);
814824e7fdSDimitry Andric 
824824e7fdSDimitry Andric   /// Extract and demangle an identifier from a given mangled symbol append it
834824e7fdSDimitry Andric   /// to the output string.
844824e7fdSDimitry Andric   ///
854824e7fdSDimitry Andric   /// \param Demangled Output buffer to write the demangled name.
864824e7fdSDimitry Andric   /// \param Mangled Mangled symbol to be demangled.
874824e7fdSDimitry Andric   ///
884824e7fdSDimitry Andric   /// \return The remaining string on success or nullptr on failure.
894824e7fdSDimitry Andric   ///
904824e7fdSDimitry Andric   /// \see https://dlang.org/spec/abi.html#SymbolName .
914824e7fdSDimitry Andric   const char *parseIdentifier(OutputBuffer *Demangled, const char *Mangled);
924824e7fdSDimitry Andric 
934824e7fdSDimitry Andric   /// Extract and demangle the plain identifier from a given mangled symbol and
944824e7fdSDimitry Andric   /// prepend/append it to the output string, with a special treatment for some
954824e7fdSDimitry Andric   /// magic compiler generated symbols.
964824e7fdSDimitry Andric   ///
974824e7fdSDimitry Andric   /// \param Demangled Output buffer to write the demangled name.
984824e7fdSDimitry Andric   /// \param Mangled Mangled symbol to be demangled.
994824e7fdSDimitry Andric   /// \param Len Length of the mangled symbol name.
1004824e7fdSDimitry Andric   ///
1014824e7fdSDimitry Andric   /// \return The remaining string on success or nullptr on failure.
1024824e7fdSDimitry Andric   ///
1034824e7fdSDimitry Andric   /// \see https://dlang.org/spec/abi.html#LName .
1044824e7fdSDimitry Andric   const char *parseLName(OutputBuffer *Demangled, const char *Mangled,
1054824e7fdSDimitry Andric                          unsigned long Len);
1064824e7fdSDimitry Andric 
1074824e7fdSDimitry Andric   /// Extract and demangle the qualified symbol from a given mangled symbol
1084824e7fdSDimitry Andric   /// append it to the output string.
1094824e7fdSDimitry Andric   ///
1104824e7fdSDimitry Andric   /// \param Demangled Output buffer to write the demangled name.
1114824e7fdSDimitry Andric   /// \param Mangled Mangled symbol to be demangled.
1124824e7fdSDimitry Andric   ///
1134824e7fdSDimitry Andric   /// \return The remaining string on success or nullptr on failure.
1144824e7fdSDimitry Andric   ///
1154824e7fdSDimitry Andric   /// \see https://dlang.org/spec/abi.html#QualifiedName .
1164824e7fdSDimitry Andric   const char *parseQualified(OutputBuffer *Demangled, const char *Mangled);
1174824e7fdSDimitry Andric 
1184824e7fdSDimitry Andric   /// The string we are demangling.
1194824e7fdSDimitry Andric   const char *Str;
1204824e7fdSDimitry Andric };
1214824e7fdSDimitry Andric 
1224824e7fdSDimitry Andric } // namespace
1234824e7fdSDimitry Andric 
1244824e7fdSDimitry Andric const char *Demangler::decodeNumber(const char *Mangled, unsigned long *Ret) {
1254824e7fdSDimitry Andric   // Return nullptr if trying to extract something that isn't a digit.
1264824e7fdSDimitry Andric   if (Mangled == nullptr || !std::isdigit(*Mangled))
1274824e7fdSDimitry Andric     return nullptr;
1284824e7fdSDimitry Andric 
1294824e7fdSDimitry Andric   unsigned long Val = 0;
1304824e7fdSDimitry Andric 
1314824e7fdSDimitry Andric   do {
1324824e7fdSDimitry Andric     unsigned long Digit = Mangled[0] - '0';
1334824e7fdSDimitry Andric 
1344824e7fdSDimitry Andric     // Check for overflow.
1354824e7fdSDimitry Andric     if (Val > (std::numeric_limits<unsigned int>::max() - Digit) / 10)
1364824e7fdSDimitry Andric       return nullptr;
1374824e7fdSDimitry Andric 
1384824e7fdSDimitry Andric     Val = Val * 10 + Digit;
1394824e7fdSDimitry Andric     ++Mangled;
1404824e7fdSDimitry Andric   } while (std::isdigit(*Mangled));
1414824e7fdSDimitry Andric 
1424824e7fdSDimitry Andric   if (*Mangled == '\0')
1434824e7fdSDimitry Andric     return nullptr;
1444824e7fdSDimitry Andric 
1454824e7fdSDimitry Andric   *Ret = Val;
1464824e7fdSDimitry Andric   return Mangled;
1474824e7fdSDimitry Andric }
1484824e7fdSDimitry Andric 
1494824e7fdSDimitry Andric bool Demangler::isSymbolName(const char *Mangled) {
1504824e7fdSDimitry Andric   if (std::isdigit(*Mangled))
1514824e7fdSDimitry Andric     return true;
1524824e7fdSDimitry Andric 
1534824e7fdSDimitry Andric   // TODO: Handle symbol back references and template instances.
1544824e7fdSDimitry Andric   return false;
1554824e7fdSDimitry Andric }
1564824e7fdSDimitry Andric 
1574824e7fdSDimitry Andric const char *Demangler::parseMangle(OutputBuffer *Demangled,
1584824e7fdSDimitry Andric                                    const char *Mangled) {
1594824e7fdSDimitry Andric   // A D mangled symbol is comprised of both scope and type information.
1604824e7fdSDimitry Andric   //    MangleName:
1614824e7fdSDimitry Andric   //        _D QualifiedName Type
1624824e7fdSDimitry Andric   //        _D QualifiedName Z
1634824e7fdSDimitry Andric   //        ^
1644824e7fdSDimitry Andric   // The caller should have guaranteed that the start pointer is at the
1654824e7fdSDimitry Andric   // above location.
1664824e7fdSDimitry Andric   // Note that type is never a function type, but only the return type of
1674824e7fdSDimitry Andric   // a function or the type of a variable.
1684824e7fdSDimitry Andric   Mangled += 2;
1694824e7fdSDimitry Andric 
1704824e7fdSDimitry Andric   Mangled = parseQualified(Demangled, Mangled);
1714824e7fdSDimitry Andric 
1724824e7fdSDimitry Andric   if (Mangled != nullptr) {
1734824e7fdSDimitry Andric     // Artificial symbols end with 'Z' and have no type.
1744824e7fdSDimitry Andric     if (*Mangled == 'Z')
1754824e7fdSDimitry Andric       ++Mangled;
1764824e7fdSDimitry Andric     else {
1774824e7fdSDimitry Andric       // TODO: Implement symbols with types.
1784824e7fdSDimitry Andric       return nullptr;
1794824e7fdSDimitry Andric     }
1804824e7fdSDimitry Andric   }
1814824e7fdSDimitry Andric 
1824824e7fdSDimitry Andric   return Mangled;
1834824e7fdSDimitry Andric }
1844824e7fdSDimitry Andric 
1854824e7fdSDimitry Andric const char *Demangler::parseQualified(OutputBuffer *Demangled,
1864824e7fdSDimitry Andric                                       const char *Mangled) {
1874824e7fdSDimitry Andric   // Qualified names are identifiers separated by their encoded length.
1884824e7fdSDimitry Andric   // Nested functions also encode their argument types without specifying
1894824e7fdSDimitry Andric   // what they return.
1904824e7fdSDimitry Andric   //    QualifiedName:
1914824e7fdSDimitry Andric   //        SymbolFunctionName
1924824e7fdSDimitry Andric   //        SymbolFunctionName QualifiedName
1934824e7fdSDimitry Andric   //        ^
1944824e7fdSDimitry Andric   //    SymbolFunctionName:
1954824e7fdSDimitry Andric   //        SymbolName
1964824e7fdSDimitry Andric   //        SymbolName TypeFunctionNoReturn
1974824e7fdSDimitry Andric   //        SymbolName M TypeFunctionNoReturn
1984824e7fdSDimitry Andric   //        SymbolName M TypeModifiers TypeFunctionNoReturn
1994824e7fdSDimitry Andric   // The start pointer should be at the above location.
2004824e7fdSDimitry Andric 
2014824e7fdSDimitry Andric   // Whether it has more than one symbol
2024824e7fdSDimitry Andric   size_t NotFirst = false;
2034824e7fdSDimitry Andric   do {
2044824e7fdSDimitry Andric     // Skip over anonymous symbols.
2054824e7fdSDimitry Andric     if (*Mangled == '0') {
2064824e7fdSDimitry Andric       do
2074824e7fdSDimitry Andric         ++Mangled;
2084824e7fdSDimitry Andric       while (*Mangled == '0');
2094824e7fdSDimitry Andric 
2104824e7fdSDimitry Andric       continue;
2114824e7fdSDimitry Andric     }
2124824e7fdSDimitry Andric 
2134824e7fdSDimitry Andric     if (NotFirst)
2144824e7fdSDimitry Andric       *Demangled << '.';
2154824e7fdSDimitry Andric     NotFirst = true;
2164824e7fdSDimitry Andric 
2174824e7fdSDimitry Andric     Mangled = parseIdentifier(Demangled, Mangled);
2184824e7fdSDimitry Andric 
2194824e7fdSDimitry Andric   } while (Mangled && isSymbolName(Mangled));
2204824e7fdSDimitry Andric 
2214824e7fdSDimitry Andric   return Mangled;
2224824e7fdSDimitry Andric }
2234824e7fdSDimitry Andric 
2244824e7fdSDimitry Andric const char *Demangler::parseIdentifier(OutputBuffer *Demangled,
2254824e7fdSDimitry Andric                                        const char *Mangled) {
2264824e7fdSDimitry Andric   unsigned long Len;
2274824e7fdSDimitry Andric 
2284824e7fdSDimitry Andric   if (Mangled == nullptr || *Mangled == '\0')
2294824e7fdSDimitry Andric     return nullptr;
2304824e7fdSDimitry Andric 
2314824e7fdSDimitry Andric   // TODO: Parse back references and lengthless template instances.
2324824e7fdSDimitry Andric 
2334824e7fdSDimitry Andric   const char *Endptr = decodeNumber(Mangled, &Len);
2344824e7fdSDimitry Andric 
2354824e7fdSDimitry Andric   if (Endptr == nullptr || Len == 0)
2364824e7fdSDimitry Andric     return nullptr;
2374824e7fdSDimitry Andric 
2384824e7fdSDimitry Andric   if (strlen(Endptr) < Len)
2394824e7fdSDimitry Andric     return nullptr;
2404824e7fdSDimitry Andric 
2414824e7fdSDimitry Andric   Mangled = Endptr;
2424824e7fdSDimitry Andric 
2434824e7fdSDimitry Andric   // TODO: Parse template instances with a length prefix.
2444824e7fdSDimitry Andric 
245*0eae32dcSDimitry Andric   // There can be multiple different declarations in the same function that
246*0eae32dcSDimitry Andric   // have the same mangled name.  To make the mangled names unique, a fake
247*0eae32dcSDimitry Andric   // parent in the form `__Sddd' is added to the symbol.
248*0eae32dcSDimitry Andric   if (Len >= 4 && Mangled[0] == '_' && Mangled[1] == '_' && Mangled[2] == 'S') {
249*0eae32dcSDimitry Andric     const char *NumPtr = Mangled + 3;
250*0eae32dcSDimitry Andric     while (NumPtr < (Mangled + Len) && std::isdigit(*NumPtr))
251*0eae32dcSDimitry Andric       ++NumPtr;
252*0eae32dcSDimitry Andric 
253*0eae32dcSDimitry Andric     if (Mangled + Len == NumPtr) {
254*0eae32dcSDimitry Andric       // Skip over the fake parent.
255*0eae32dcSDimitry Andric       Mangled += Len;
256*0eae32dcSDimitry Andric       return parseIdentifier(Demangled, Mangled);
257*0eae32dcSDimitry Andric     }
258*0eae32dcSDimitry Andric 
259*0eae32dcSDimitry Andric     // Else demangle it as a plain identifier.
260*0eae32dcSDimitry Andric   }
261*0eae32dcSDimitry Andric 
2624824e7fdSDimitry Andric   return parseLName(Demangled, Mangled, Len);
2634824e7fdSDimitry Andric }
2644824e7fdSDimitry Andric 
2654824e7fdSDimitry Andric const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled,
2664824e7fdSDimitry Andric                                   unsigned long Len) {
267*0eae32dcSDimitry Andric   switch (Len) {
268*0eae32dcSDimitry Andric   case 6:
269*0eae32dcSDimitry Andric     if (strncmp(Mangled, "__initZ", Len + 1) == 0) {
270*0eae32dcSDimitry Andric       // The static initializer for a given symbol.
271*0eae32dcSDimitry Andric       Demangled->prepend("initializer for ");
272*0eae32dcSDimitry Andric       Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
273*0eae32dcSDimitry Andric       Mangled += Len;
274*0eae32dcSDimitry Andric       return Mangled;
275*0eae32dcSDimitry Andric     }
276*0eae32dcSDimitry Andric     if (strncmp(Mangled, "__vtblZ", Len + 1) == 0) {
277*0eae32dcSDimitry Andric       // The vtable symbol for a given class.
278*0eae32dcSDimitry Andric       Demangled->prepend("vtable for ");
279*0eae32dcSDimitry Andric       Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
280*0eae32dcSDimitry Andric       Mangled += Len;
281*0eae32dcSDimitry Andric       return Mangled;
282*0eae32dcSDimitry Andric     }
283*0eae32dcSDimitry Andric     break;
284*0eae32dcSDimitry Andric 
285*0eae32dcSDimitry Andric   case 7:
286*0eae32dcSDimitry Andric     if (strncmp(Mangled, "__ClassZ", Len + 1) == 0) {
287*0eae32dcSDimitry Andric       // The classinfo symbol for a given class.
288*0eae32dcSDimitry Andric       Demangled->prepend("ClassInfo for ");
289*0eae32dcSDimitry Andric       Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
290*0eae32dcSDimitry Andric       Mangled += Len;
291*0eae32dcSDimitry Andric       return Mangled;
292*0eae32dcSDimitry Andric     }
293*0eae32dcSDimitry Andric     break;
294*0eae32dcSDimitry Andric 
295*0eae32dcSDimitry Andric   case 11:
296*0eae32dcSDimitry Andric     if (strncmp(Mangled, "__InterfaceZ", Len + 1) == 0) {
297*0eae32dcSDimitry Andric       // The interface symbol for a given class.
298*0eae32dcSDimitry Andric       Demangled->prepend("Interface for ");
299*0eae32dcSDimitry Andric       Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
300*0eae32dcSDimitry Andric       Mangled += Len;
301*0eae32dcSDimitry Andric       return Mangled;
302*0eae32dcSDimitry Andric     }
303*0eae32dcSDimitry Andric     break;
304*0eae32dcSDimitry Andric 
305*0eae32dcSDimitry Andric   case 12:
306*0eae32dcSDimitry Andric     if (strncmp(Mangled, "__ModuleInfoZ", Len + 1) == 0) {
307*0eae32dcSDimitry Andric       // The ModuleInfo symbol for a given module.
308*0eae32dcSDimitry Andric       Demangled->prepend("ModuleInfo for ");
309*0eae32dcSDimitry Andric       Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
310*0eae32dcSDimitry Andric       Mangled += Len;
311*0eae32dcSDimitry Andric       return Mangled;
312*0eae32dcSDimitry Andric     }
313*0eae32dcSDimitry Andric     break;
314*0eae32dcSDimitry Andric   }
315*0eae32dcSDimitry Andric 
3164824e7fdSDimitry Andric   *Demangled << StringView(Mangled, Len);
3174824e7fdSDimitry Andric   Mangled += Len;
3184824e7fdSDimitry Andric 
3194824e7fdSDimitry Andric   return Mangled;
3204824e7fdSDimitry Andric }
3214824e7fdSDimitry Andric 
3224824e7fdSDimitry Andric Demangler::Demangler(const char *Mangled) : Str(Mangled) {}
3234824e7fdSDimitry Andric 
3244824e7fdSDimitry Andric const char *Demangler::parseMangle(OutputBuffer *Demangled) {
3254824e7fdSDimitry Andric   return parseMangle(Demangled, this->Str);
3264824e7fdSDimitry Andric }
327349cc55cSDimitry Andric 
328349cc55cSDimitry Andric char *llvm::dlangDemangle(const char *MangledName) {
329349cc55cSDimitry Andric   if (MangledName == nullptr || strncmp(MangledName, "_D", 2) != 0)
330349cc55cSDimitry Andric     return nullptr;
331349cc55cSDimitry Andric 
332349cc55cSDimitry Andric   OutputBuffer Demangled;
333349cc55cSDimitry Andric   if (!initializeOutputBuffer(nullptr, nullptr, Demangled, 1024))
334349cc55cSDimitry Andric     return nullptr;
335349cc55cSDimitry Andric 
3364824e7fdSDimitry Andric   if (strcmp(MangledName, "_Dmain") == 0) {
337349cc55cSDimitry Andric     Demangled << "D main";
3384824e7fdSDimitry Andric   } else {
3394824e7fdSDimitry Andric 
3404824e7fdSDimitry Andric     Demangler D = Demangler(MangledName);
3414824e7fdSDimitry Andric     MangledName = D.parseMangle(&Demangled);
3424824e7fdSDimitry Andric 
3434824e7fdSDimitry Andric     // Check that the entire symbol was successfully demangled.
3444824e7fdSDimitry Andric     if (MangledName == nullptr || *MangledName != '\0') {
3454824e7fdSDimitry Andric       std::free(Demangled.getBuffer());
3464824e7fdSDimitry Andric       return nullptr;
3474824e7fdSDimitry Andric     }
3484824e7fdSDimitry Andric   }
349349cc55cSDimitry Andric 
350349cc55cSDimitry Andric   // OutputBuffer's internal buffer is not null terminated and therefore we need
351349cc55cSDimitry Andric   // to add it to comply with C null terminated strings.
352349cc55cSDimitry Andric   if (Demangled.getCurrentPosition() > 0) {
353349cc55cSDimitry Andric     Demangled << '\0';
354349cc55cSDimitry Andric     Demangled.setCurrentPosition(Demangled.getCurrentPosition() - 1);
355349cc55cSDimitry Andric     return Demangled.getBuffer();
356349cc55cSDimitry Andric   }
357349cc55cSDimitry Andric 
3584824e7fdSDimitry Andric   std::free(Demangled.getBuffer());
359349cc55cSDimitry Andric   return nullptr;
360349cc55cSDimitry Andric }
361