1349cc55cSDimitry Andric //===--- DLangDemangle.cpp ------------------------------------------------===// 2349cc55cSDimitry Andric // 3349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6349cc55cSDimitry Andric // 7349cc55cSDimitry Andric //===----------------------------------------------------------------------===// 8349cc55cSDimitry Andric /// 9349cc55cSDimitry Andric /// \file 10349cc55cSDimitry Andric /// This file defines a demangler for the D programming language as specified 11349cc55cSDimitry Andric /// in the ABI specification, available at: 12349cc55cSDimitry Andric /// https://dlang.org/spec/abi.html#name_mangling 13349cc55cSDimitry Andric /// 14349cc55cSDimitry Andric //===----------------------------------------------------------------------===// 15349cc55cSDimitry Andric 16349cc55cSDimitry Andric #include "llvm/Demangle/Demangle.h" 174824e7fdSDimitry Andric #include "llvm/Demangle/StringView.h" 18349cc55cSDimitry Andric #include "llvm/Demangle/Utility.h" 19349cc55cSDimitry Andric 204824e7fdSDimitry Andric #include <cctype> 21349cc55cSDimitry Andric #include <cstring> 224824e7fdSDimitry Andric #include <limits> 23349cc55cSDimitry Andric 24349cc55cSDimitry Andric using namespace llvm; 25349cc55cSDimitry Andric using llvm::itanium_demangle::OutputBuffer; 264824e7fdSDimitry Andric using llvm::itanium_demangle::StringView; 274824e7fdSDimitry Andric 284824e7fdSDimitry Andric namespace { 294824e7fdSDimitry Andric 304824e7fdSDimitry Andric /// Demangle information structure. 314824e7fdSDimitry Andric struct Demangler { 324824e7fdSDimitry Andric /// Initialize the information structure we use to pass around information. 334824e7fdSDimitry Andric /// 344824e7fdSDimitry Andric /// \param Mangled String to demangle. 354824e7fdSDimitry Andric Demangler(const char *Mangled); 364824e7fdSDimitry Andric 374824e7fdSDimitry Andric /// Extract and demangle the mangled symbol and append it to the output 384824e7fdSDimitry Andric /// string. 394824e7fdSDimitry Andric /// 404824e7fdSDimitry Andric /// \param Demangled Output buffer to write the demangled name. 414824e7fdSDimitry Andric /// 424824e7fdSDimitry Andric /// \return The remaining string on success or nullptr on failure. 434824e7fdSDimitry Andric /// 444824e7fdSDimitry Andric /// \see https://dlang.org/spec/abi.html#name_mangling . 454824e7fdSDimitry Andric /// \see https://dlang.org/spec/abi.html#MangledName . 464824e7fdSDimitry Andric const char *parseMangle(OutputBuffer *Demangled); 474824e7fdSDimitry Andric 484824e7fdSDimitry Andric private: 494824e7fdSDimitry Andric /// Extract and demangle a given mangled symbol and append it to the output 504824e7fdSDimitry Andric /// string. 514824e7fdSDimitry Andric /// 524824e7fdSDimitry Andric /// \param Demangled output buffer to write the demangled name. 534824e7fdSDimitry Andric /// \param Mangled mangled symbol to be demangled. 544824e7fdSDimitry Andric /// 554824e7fdSDimitry Andric /// \return The remaining string on success or nullptr on failure. 564824e7fdSDimitry Andric /// 574824e7fdSDimitry Andric /// \see https://dlang.org/spec/abi.html#name_mangling . 584824e7fdSDimitry Andric /// \see https://dlang.org/spec/abi.html#MangledName . 594824e7fdSDimitry Andric const char *parseMangle(OutputBuffer *Demangled, const char *Mangled); 604824e7fdSDimitry Andric 614824e7fdSDimitry Andric /// Extract the number from a given string. 624824e7fdSDimitry Andric /// 634824e7fdSDimitry Andric /// \param Mangled string to extract the number. 644824e7fdSDimitry Andric /// \param Ret assigned result value. 654824e7fdSDimitry Andric /// 664824e7fdSDimitry Andric /// \return The remaining string on success or nullptr on failure. 674824e7fdSDimitry Andric /// 684824e7fdSDimitry Andric /// \note A result larger than UINT_MAX is considered a failure. 694824e7fdSDimitry Andric /// 704824e7fdSDimitry Andric /// \see https://dlang.org/spec/abi.html#Number . 714824e7fdSDimitry Andric const char *decodeNumber(const char *Mangled, unsigned long *Ret); 724824e7fdSDimitry Andric 734824e7fdSDimitry Andric /// Check whether it is the beginning of a symbol name. 744824e7fdSDimitry Andric /// 754824e7fdSDimitry Andric /// \param Mangled string to extract the symbol name. 764824e7fdSDimitry Andric /// 774824e7fdSDimitry Andric /// \return true on success, false otherwise. 784824e7fdSDimitry Andric /// 794824e7fdSDimitry Andric /// \see https://dlang.org/spec/abi.html#SymbolName . 804824e7fdSDimitry Andric bool isSymbolName(const char *Mangled); 814824e7fdSDimitry Andric 824824e7fdSDimitry Andric /// Extract and demangle an identifier from a given mangled symbol append it 834824e7fdSDimitry Andric /// to the output string. 844824e7fdSDimitry Andric /// 854824e7fdSDimitry Andric /// \param Demangled Output buffer to write the demangled name. 864824e7fdSDimitry Andric /// \param Mangled Mangled symbol to be demangled. 874824e7fdSDimitry Andric /// 884824e7fdSDimitry Andric /// \return The remaining string on success or nullptr on failure. 894824e7fdSDimitry Andric /// 904824e7fdSDimitry Andric /// \see https://dlang.org/spec/abi.html#SymbolName . 914824e7fdSDimitry Andric const char *parseIdentifier(OutputBuffer *Demangled, const char *Mangled); 924824e7fdSDimitry Andric 934824e7fdSDimitry Andric /// Extract and demangle the plain identifier from a given mangled symbol and 944824e7fdSDimitry Andric /// prepend/append it to the output string, with a special treatment for some 954824e7fdSDimitry Andric /// magic compiler generated symbols. 964824e7fdSDimitry Andric /// 974824e7fdSDimitry Andric /// \param Demangled Output buffer to write the demangled name. 984824e7fdSDimitry Andric /// \param Mangled Mangled symbol to be demangled. 994824e7fdSDimitry Andric /// \param Len Length of the mangled symbol name. 1004824e7fdSDimitry Andric /// 1014824e7fdSDimitry Andric /// \return The remaining string on success or nullptr on failure. 1024824e7fdSDimitry Andric /// 1034824e7fdSDimitry Andric /// \see https://dlang.org/spec/abi.html#LName . 1044824e7fdSDimitry Andric const char *parseLName(OutputBuffer *Demangled, const char *Mangled, 1054824e7fdSDimitry Andric unsigned long Len); 1064824e7fdSDimitry Andric 1074824e7fdSDimitry Andric /// Extract and demangle the qualified symbol from a given mangled symbol 1084824e7fdSDimitry Andric /// append it to the output string. 1094824e7fdSDimitry Andric /// 1104824e7fdSDimitry Andric /// \param Demangled Output buffer to write the demangled name. 1114824e7fdSDimitry Andric /// \param Mangled Mangled symbol to be demangled. 1124824e7fdSDimitry Andric /// 1134824e7fdSDimitry Andric /// \return The remaining string on success or nullptr on failure. 1144824e7fdSDimitry Andric /// 1154824e7fdSDimitry Andric /// \see https://dlang.org/spec/abi.html#QualifiedName . 1164824e7fdSDimitry Andric const char *parseQualified(OutputBuffer *Demangled, const char *Mangled); 1174824e7fdSDimitry Andric 1184824e7fdSDimitry Andric /// The string we are demangling. 1194824e7fdSDimitry Andric const char *Str; 1204824e7fdSDimitry Andric }; 1214824e7fdSDimitry Andric 1224824e7fdSDimitry Andric } // namespace 1234824e7fdSDimitry Andric 1244824e7fdSDimitry Andric const char *Demangler::decodeNumber(const char *Mangled, unsigned long *Ret) { 1254824e7fdSDimitry Andric // Return nullptr if trying to extract something that isn't a digit. 1264824e7fdSDimitry Andric if (Mangled == nullptr || !std::isdigit(*Mangled)) 1274824e7fdSDimitry Andric return nullptr; 1284824e7fdSDimitry Andric 1294824e7fdSDimitry Andric unsigned long Val = 0; 1304824e7fdSDimitry Andric 1314824e7fdSDimitry Andric do { 1324824e7fdSDimitry Andric unsigned long Digit = Mangled[0] - '0'; 1334824e7fdSDimitry Andric 1344824e7fdSDimitry Andric // Check for overflow. 1354824e7fdSDimitry Andric if (Val > (std::numeric_limits<unsigned int>::max() - Digit) / 10) 1364824e7fdSDimitry Andric return nullptr; 1374824e7fdSDimitry Andric 1384824e7fdSDimitry Andric Val = Val * 10 + Digit; 1394824e7fdSDimitry Andric ++Mangled; 1404824e7fdSDimitry Andric } while (std::isdigit(*Mangled)); 1414824e7fdSDimitry Andric 1424824e7fdSDimitry Andric if (*Mangled == '\0') 1434824e7fdSDimitry Andric return nullptr; 1444824e7fdSDimitry Andric 1454824e7fdSDimitry Andric *Ret = Val; 1464824e7fdSDimitry Andric return Mangled; 1474824e7fdSDimitry Andric } 1484824e7fdSDimitry Andric 1494824e7fdSDimitry Andric bool Demangler::isSymbolName(const char *Mangled) { 1504824e7fdSDimitry Andric if (std::isdigit(*Mangled)) 1514824e7fdSDimitry Andric return true; 1524824e7fdSDimitry Andric 1534824e7fdSDimitry Andric // TODO: Handle symbol back references and template instances. 1544824e7fdSDimitry Andric return false; 1554824e7fdSDimitry Andric } 1564824e7fdSDimitry Andric 1574824e7fdSDimitry Andric const char *Demangler::parseMangle(OutputBuffer *Demangled, 1584824e7fdSDimitry Andric const char *Mangled) { 1594824e7fdSDimitry Andric // A D mangled symbol is comprised of both scope and type information. 1604824e7fdSDimitry Andric // MangleName: 1614824e7fdSDimitry Andric // _D QualifiedName Type 1624824e7fdSDimitry Andric // _D QualifiedName Z 1634824e7fdSDimitry Andric // ^ 1644824e7fdSDimitry Andric // The caller should have guaranteed that the start pointer is at the 1654824e7fdSDimitry Andric // above location. 1664824e7fdSDimitry Andric // Note that type is never a function type, but only the return type of 1674824e7fdSDimitry Andric // a function or the type of a variable. 1684824e7fdSDimitry Andric Mangled += 2; 1694824e7fdSDimitry Andric 1704824e7fdSDimitry Andric Mangled = parseQualified(Demangled, Mangled); 1714824e7fdSDimitry Andric 1724824e7fdSDimitry Andric if (Mangled != nullptr) { 1734824e7fdSDimitry Andric // Artificial symbols end with 'Z' and have no type. 1744824e7fdSDimitry Andric if (*Mangled == 'Z') 1754824e7fdSDimitry Andric ++Mangled; 1764824e7fdSDimitry Andric else { 1774824e7fdSDimitry Andric // TODO: Implement symbols with types. 1784824e7fdSDimitry Andric return nullptr; 1794824e7fdSDimitry Andric } 1804824e7fdSDimitry Andric } 1814824e7fdSDimitry Andric 1824824e7fdSDimitry Andric return Mangled; 1834824e7fdSDimitry Andric } 1844824e7fdSDimitry Andric 1854824e7fdSDimitry Andric const char *Demangler::parseQualified(OutputBuffer *Demangled, 1864824e7fdSDimitry Andric const char *Mangled) { 1874824e7fdSDimitry Andric // Qualified names are identifiers separated by their encoded length. 1884824e7fdSDimitry Andric // Nested functions also encode their argument types without specifying 1894824e7fdSDimitry Andric // what they return. 1904824e7fdSDimitry Andric // QualifiedName: 1914824e7fdSDimitry Andric // SymbolFunctionName 1924824e7fdSDimitry Andric // SymbolFunctionName QualifiedName 1934824e7fdSDimitry Andric // ^ 1944824e7fdSDimitry Andric // SymbolFunctionName: 1954824e7fdSDimitry Andric // SymbolName 1964824e7fdSDimitry Andric // SymbolName TypeFunctionNoReturn 1974824e7fdSDimitry Andric // SymbolName M TypeFunctionNoReturn 1984824e7fdSDimitry Andric // SymbolName M TypeModifiers TypeFunctionNoReturn 1994824e7fdSDimitry Andric // The start pointer should be at the above location. 2004824e7fdSDimitry Andric 2014824e7fdSDimitry Andric // Whether it has more than one symbol 2024824e7fdSDimitry Andric size_t NotFirst = false; 2034824e7fdSDimitry Andric do { 2044824e7fdSDimitry Andric // Skip over anonymous symbols. 2054824e7fdSDimitry Andric if (*Mangled == '0') { 2064824e7fdSDimitry Andric do 2074824e7fdSDimitry Andric ++Mangled; 2084824e7fdSDimitry Andric while (*Mangled == '0'); 2094824e7fdSDimitry Andric 2104824e7fdSDimitry Andric continue; 2114824e7fdSDimitry Andric } 2124824e7fdSDimitry Andric 2134824e7fdSDimitry Andric if (NotFirst) 2144824e7fdSDimitry Andric *Demangled << '.'; 2154824e7fdSDimitry Andric NotFirst = true; 2164824e7fdSDimitry Andric 2174824e7fdSDimitry Andric Mangled = parseIdentifier(Demangled, Mangled); 2184824e7fdSDimitry Andric 2194824e7fdSDimitry Andric } while (Mangled && isSymbolName(Mangled)); 2204824e7fdSDimitry Andric 2214824e7fdSDimitry Andric return Mangled; 2224824e7fdSDimitry Andric } 2234824e7fdSDimitry Andric 2244824e7fdSDimitry Andric const char *Demangler::parseIdentifier(OutputBuffer *Demangled, 2254824e7fdSDimitry Andric const char *Mangled) { 2264824e7fdSDimitry Andric unsigned long Len; 2274824e7fdSDimitry Andric 2284824e7fdSDimitry Andric if (Mangled == nullptr || *Mangled == '\0') 2294824e7fdSDimitry Andric return nullptr; 2304824e7fdSDimitry Andric 2314824e7fdSDimitry Andric // TODO: Parse back references and lengthless template instances. 2324824e7fdSDimitry Andric 2334824e7fdSDimitry Andric const char *Endptr = decodeNumber(Mangled, &Len); 2344824e7fdSDimitry Andric 2354824e7fdSDimitry Andric if (Endptr == nullptr || Len == 0) 2364824e7fdSDimitry Andric return nullptr; 2374824e7fdSDimitry Andric 2384824e7fdSDimitry Andric if (strlen(Endptr) < Len) 2394824e7fdSDimitry Andric return nullptr; 2404824e7fdSDimitry Andric 2414824e7fdSDimitry Andric Mangled = Endptr; 2424824e7fdSDimitry Andric 2434824e7fdSDimitry Andric // TODO: Parse template instances with a length prefix. 2444824e7fdSDimitry Andric 245*0eae32dcSDimitry Andric // There can be multiple different declarations in the same function that 246*0eae32dcSDimitry Andric // have the same mangled name. To make the mangled names unique, a fake 247*0eae32dcSDimitry Andric // parent in the form `__Sddd' is added to the symbol. 248*0eae32dcSDimitry Andric if (Len >= 4 && Mangled[0] == '_' && Mangled[1] == '_' && Mangled[2] == 'S') { 249*0eae32dcSDimitry Andric const char *NumPtr = Mangled + 3; 250*0eae32dcSDimitry Andric while (NumPtr < (Mangled + Len) && std::isdigit(*NumPtr)) 251*0eae32dcSDimitry Andric ++NumPtr; 252*0eae32dcSDimitry Andric 253*0eae32dcSDimitry Andric if (Mangled + Len == NumPtr) { 254*0eae32dcSDimitry Andric // Skip over the fake parent. 255*0eae32dcSDimitry Andric Mangled += Len; 256*0eae32dcSDimitry Andric return parseIdentifier(Demangled, Mangled); 257*0eae32dcSDimitry Andric } 258*0eae32dcSDimitry Andric 259*0eae32dcSDimitry Andric // Else demangle it as a plain identifier. 260*0eae32dcSDimitry Andric } 261*0eae32dcSDimitry Andric 2624824e7fdSDimitry Andric return parseLName(Demangled, Mangled, Len); 2634824e7fdSDimitry Andric } 2644824e7fdSDimitry Andric 2654824e7fdSDimitry Andric const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled, 2664824e7fdSDimitry Andric unsigned long Len) { 267*0eae32dcSDimitry Andric switch (Len) { 268*0eae32dcSDimitry Andric case 6: 269*0eae32dcSDimitry Andric if (strncmp(Mangled, "__initZ", Len + 1) == 0) { 270*0eae32dcSDimitry Andric // The static initializer for a given symbol. 271*0eae32dcSDimitry Andric Demangled->prepend("initializer for "); 272*0eae32dcSDimitry Andric Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); 273*0eae32dcSDimitry Andric Mangled += Len; 274*0eae32dcSDimitry Andric return Mangled; 275*0eae32dcSDimitry Andric } 276*0eae32dcSDimitry Andric if (strncmp(Mangled, "__vtblZ", Len + 1) == 0) { 277*0eae32dcSDimitry Andric // The vtable symbol for a given class. 278*0eae32dcSDimitry Andric Demangled->prepend("vtable for "); 279*0eae32dcSDimitry Andric Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); 280*0eae32dcSDimitry Andric Mangled += Len; 281*0eae32dcSDimitry Andric return Mangled; 282*0eae32dcSDimitry Andric } 283*0eae32dcSDimitry Andric break; 284*0eae32dcSDimitry Andric 285*0eae32dcSDimitry Andric case 7: 286*0eae32dcSDimitry Andric if (strncmp(Mangled, "__ClassZ", Len + 1) == 0) { 287*0eae32dcSDimitry Andric // The classinfo symbol for a given class. 288*0eae32dcSDimitry Andric Demangled->prepend("ClassInfo for "); 289*0eae32dcSDimitry Andric Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); 290*0eae32dcSDimitry Andric Mangled += Len; 291*0eae32dcSDimitry Andric return Mangled; 292*0eae32dcSDimitry Andric } 293*0eae32dcSDimitry Andric break; 294*0eae32dcSDimitry Andric 295*0eae32dcSDimitry Andric case 11: 296*0eae32dcSDimitry Andric if (strncmp(Mangled, "__InterfaceZ", Len + 1) == 0) { 297*0eae32dcSDimitry Andric // The interface symbol for a given class. 298*0eae32dcSDimitry Andric Demangled->prepend("Interface for "); 299*0eae32dcSDimitry Andric Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); 300*0eae32dcSDimitry Andric Mangled += Len; 301*0eae32dcSDimitry Andric return Mangled; 302*0eae32dcSDimitry Andric } 303*0eae32dcSDimitry Andric break; 304*0eae32dcSDimitry Andric 305*0eae32dcSDimitry Andric case 12: 306*0eae32dcSDimitry Andric if (strncmp(Mangled, "__ModuleInfoZ", Len + 1) == 0) { 307*0eae32dcSDimitry Andric // The ModuleInfo symbol for a given module. 308*0eae32dcSDimitry Andric Demangled->prepend("ModuleInfo for "); 309*0eae32dcSDimitry Andric Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); 310*0eae32dcSDimitry Andric Mangled += Len; 311*0eae32dcSDimitry Andric return Mangled; 312*0eae32dcSDimitry Andric } 313*0eae32dcSDimitry Andric break; 314*0eae32dcSDimitry Andric } 315*0eae32dcSDimitry Andric 3164824e7fdSDimitry Andric *Demangled << StringView(Mangled, Len); 3174824e7fdSDimitry Andric Mangled += Len; 3184824e7fdSDimitry Andric 3194824e7fdSDimitry Andric return Mangled; 3204824e7fdSDimitry Andric } 3214824e7fdSDimitry Andric 3224824e7fdSDimitry Andric Demangler::Demangler(const char *Mangled) : Str(Mangled) {} 3234824e7fdSDimitry Andric 3244824e7fdSDimitry Andric const char *Demangler::parseMangle(OutputBuffer *Demangled) { 3254824e7fdSDimitry Andric return parseMangle(Demangled, this->Str); 3264824e7fdSDimitry Andric } 327349cc55cSDimitry Andric 328349cc55cSDimitry Andric char *llvm::dlangDemangle(const char *MangledName) { 329349cc55cSDimitry Andric if (MangledName == nullptr || strncmp(MangledName, "_D", 2) != 0) 330349cc55cSDimitry Andric return nullptr; 331349cc55cSDimitry Andric 332349cc55cSDimitry Andric OutputBuffer Demangled; 333349cc55cSDimitry Andric if (!initializeOutputBuffer(nullptr, nullptr, Demangled, 1024)) 334349cc55cSDimitry Andric return nullptr; 335349cc55cSDimitry Andric 3364824e7fdSDimitry Andric if (strcmp(MangledName, "_Dmain") == 0) { 337349cc55cSDimitry Andric Demangled << "D main"; 3384824e7fdSDimitry Andric } else { 3394824e7fdSDimitry Andric 3404824e7fdSDimitry Andric Demangler D = Demangler(MangledName); 3414824e7fdSDimitry Andric MangledName = D.parseMangle(&Demangled); 3424824e7fdSDimitry Andric 3434824e7fdSDimitry Andric // Check that the entire symbol was successfully demangled. 3444824e7fdSDimitry Andric if (MangledName == nullptr || *MangledName != '\0') { 3454824e7fdSDimitry Andric std::free(Demangled.getBuffer()); 3464824e7fdSDimitry Andric return nullptr; 3474824e7fdSDimitry Andric } 3484824e7fdSDimitry Andric } 349349cc55cSDimitry Andric 350349cc55cSDimitry Andric // OutputBuffer's internal buffer is not null terminated and therefore we need 351349cc55cSDimitry Andric // to add it to comply with C null terminated strings. 352349cc55cSDimitry Andric if (Demangled.getCurrentPosition() > 0) { 353349cc55cSDimitry Andric Demangled << '\0'; 354349cc55cSDimitry Andric Demangled.setCurrentPosition(Demangled.getCurrentPosition() - 1); 355349cc55cSDimitry Andric return Demangled.getBuffer(); 356349cc55cSDimitry Andric } 357349cc55cSDimitry Andric 3584824e7fdSDimitry Andric std::free(Demangled.getBuffer()); 359349cc55cSDimitry Andric return nullptr; 360349cc55cSDimitry Andric } 361