1 //===--- DLangDemangle.cpp ------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file defines a demangler for the D programming language as specified 11 /// in the ABI specification, available at: 12 /// https://dlang.org/spec/abi.html#name_mangling 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/Demangle/Demangle.h" 17 #include "llvm/Demangle/StringView.h" 18 #include "llvm/Demangle/Utility.h" 19 20 #include <cctype> 21 #include <cstring> 22 #include <limits> 23 24 using namespace llvm; 25 using llvm::itanium_demangle::OutputBuffer; 26 using llvm::itanium_demangle::StringView; 27 28 namespace { 29 30 /// Demangle information structure. 31 struct Demangler { 32 /// Initialize the information structure we use to pass around information. 33 /// 34 /// \param Mangled String to demangle. 35 Demangler(const char *Mangled); 36 37 /// Extract and demangle the mangled symbol and append it to the output 38 /// string. 39 /// 40 /// \param Demangled Output buffer to write the demangled name. 41 /// 42 /// \return The remaining string on success or nullptr on failure. 43 /// 44 /// \see https://dlang.org/spec/abi.html#name_mangling . 45 /// \see https://dlang.org/spec/abi.html#MangledName . 46 const char *parseMangle(OutputBuffer *Demangled); 47 48 private: 49 /// Extract and demangle a given mangled symbol and append it to the output 50 /// string. 51 /// 52 /// \param Demangled output buffer to write the demangled name. 53 /// \param Mangled mangled symbol to be demangled. 54 /// 55 /// \return The remaining string on success or nullptr on failure. 56 /// 57 /// \see https://dlang.org/spec/abi.html#name_mangling . 58 /// \see https://dlang.org/spec/abi.html#MangledName . 59 const char *parseMangle(OutputBuffer *Demangled, const char *Mangled); 60 61 /// Extract the number from a given string. 62 /// 63 /// \param Mangled string to extract the number. 64 /// \param Ret assigned result value. 65 /// 66 /// \return The remaining string on success or nullptr on failure. 67 /// 68 /// \note A result larger than UINT_MAX is considered a failure. 69 /// 70 /// \see https://dlang.org/spec/abi.html#Number . 71 const char *decodeNumber(const char *Mangled, unsigned long *Ret); 72 73 /// Check whether it is the beginning of a symbol name. 74 /// 75 /// \param Mangled string to extract the symbol name. 76 /// 77 /// \return true on success, false otherwise. 78 /// 79 /// \see https://dlang.org/spec/abi.html#SymbolName . 80 bool isSymbolName(const char *Mangled); 81 82 /// Extract and demangle an identifier from a given mangled symbol append it 83 /// to the output string. 84 /// 85 /// \param Demangled Output buffer to write the demangled name. 86 /// \param Mangled Mangled symbol to be demangled. 87 /// 88 /// \return The remaining string on success or nullptr on failure. 89 /// 90 /// \see https://dlang.org/spec/abi.html#SymbolName . 91 const char *parseIdentifier(OutputBuffer *Demangled, const char *Mangled); 92 93 /// Extract and demangle the plain identifier from a given mangled symbol and 94 /// prepend/append it to the output string, with a special treatment for some 95 /// magic compiler generated symbols. 96 /// 97 /// \param Demangled Output buffer to write the demangled name. 98 /// \param Mangled Mangled symbol to be demangled. 99 /// \param Len Length of the mangled symbol name. 100 /// 101 /// \return The remaining string on success or nullptr on failure. 102 /// 103 /// \see https://dlang.org/spec/abi.html#LName . 104 const char *parseLName(OutputBuffer *Demangled, const char *Mangled, 105 unsigned long Len); 106 107 /// Extract and demangle the qualified symbol from a given mangled symbol 108 /// append it to the output string. 109 /// 110 /// \param Demangled Output buffer to write the demangled name. 111 /// \param Mangled Mangled symbol to be demangled. 112 /// 113 /// \return The remaining string on success or nullptr on failure. 114 /// 115 /// \see https://dlang.org/spec/abi.html#QualifiedName . 116 const char *parseQualified(OutputBuffer *Demangled, const char *Mangled); 117 118 /// The string we are demangling. 119 const char *Str; 120 }; 121 122 } // namespace 123 124 const char *Demangler::decodeNumber(const char *Mangled, unsigned long *Ret) { 125 // Return nullptr if trying to extract something that isn't a digit. 126 if (Mangled == nullptr || !std::isdigit(*Mangled)) 127 return nullptr; 128 129 unsigned long Val = 0; 130 131 do { 132 unsigned long Digit = Mangled[0] - '0'; 133 134 // Check for overflow. 135 if (Val > (std::numeric_limits<unsigned int>::max() - Digit) / 10) 136 return nullptr; 137 138 Val = Val * 10 + Digit; 139 ++Mangled; 140 } while (std::isdigit(*Mangled)); 141 142 if (*Mangled == '\0') 143 return nullptr; 144 145 *Ret = Val; 146 return Mangled; 147 } 148 149 bool Demangler::isSymbolName(const char *Mangled) { 150 if (std::isdigit(*Mangled)) 151 return true; 152 153 // TODO: Handle symbol back references and template instances. 154 return false; 155 } 156 157 const char *Demangler::parseMangle(OutputBuffer *Demangled, 158 const char *Mangled) { 159 // A D mangled symbol is comprised of both scope and type information. 160 // MangleName: 161 // _D QualifiedName Type 162 // _D QualifiedName Z 163 // ^ 164 // The caller should have guaranteed that the start pointer is at the 165 // above location. 166 // Note that type is never a function type, but only the return type of 167 // a function or the type of a variable. 168 Mangled += 2; 169 170 Mangled = parseQualified(Demangled, Mangled); 171 172 if (Mangled != nullptr) { 173 // Artificial symbols end with 'Z' and have no type. 174 if (*Mangled == 'Z') 175 ++Mangled; 176 else { 177 // TODO: Implement symbols with types. 178 return nullptr; 179 } 180 } 181 182 return Mangled; 183 } 184 185 const char *Demangler::parseQualified(OutputBuffer *Demangled, 186 const char *Mangled) { 187 // Qualified names are identifiers separated by their encoded length. 188 // Nested functions also encode their argument types without specifying 189 // what they return. 190 // QualifiedName: 191 // SymbolFunctionName 192 // SymbolFunctionName QualifiedName 193 // ^ 194 // SymbolFunctionName: 195 // SymbolName 196 // SymbolName TypeFunctionNoReturn 197 // SymbolName M TypeFunctionNoReturn 198 // SymbolName M TypeModifiers TypeFunctionNoReturn 199 // The start pointer should be at the above location. 200 201 // Whether it has more than one symbol 202 size_t NotFirst = false; 203 do { 204 // Skip over anonymous symbols. 205 if (*Mangled == '0') { 206 do 207 ++Mangled; 208 while (*Mangled == '0'); 209 210 continue; 211 } 212 213 if (NotFirst) 214 *Demangled << '.'; 215 NotFirst = true; 216 217 Mangled = parseIdentifier(Demangled, Mangled); 218 219 } while (Mangled && isSymbolName(Mangled)); 220 221 return Mangled; 222 } 223 224 const char *Demangler::parseIdentifier(OutputBuffer *Demangled, 225 const char *Mangled) { 226 unsigned long Len; 227 228 if (Mangled == nullptr || *Mangled == '\0') 229 return nullptr; 230 231 // TODO: Parse back references and lengthless template instances. 232 233 const char *Endptr = decodeNumber(Mangled, &Len); 234 235 if (Endptr == nullptr || Len == 0) 236 return nullptr; 237 238 if (strlen(Endptr) < Len) 239 return nullptr; 240 241 Mangled = Endptr; 242 243 // TODO: Parse template instances with a length prefix. 244 245 return parseLName(Demangled, Mangled, Len); 246 } 247 248 const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled, 249 unsigned long Len) { 250 *Demangled << StringView(Mangled, Len); 251 Mangled += Len; 252 253 return Mangled; 254 } 255 256 Demangler::Demangler(const char *Mangled) : Str(Mangled) {} 257 258 const char *Demangler::parseMangle(OutputBuffer *Demangled) { 259 return parseMangle(Demangled, this->Str); 260 } 261 262 char *llvm::dlangDemangle(const char *MangledName) { 263 if (MangledName == nullptr || strncmp(MangledName, "_D", 2) != 0) 264 return nullptr; 265 266 OutputBuffer Demangled; 267 if (!initializeOutputBuffer(nullptr, nullptr, Demangled, 1024)) 268 return nullptr; 269 270 if (strcmp(MangledName, "_Dmain") == 0) { 271 Demangled << "D main"; 272 } else { 273 274 Demangler D = Demangler(MangledName); 275 MangledName = D.parseMangle(&Demangled); 276 277 // Check that the entire symbol was successfully demangled. 278 if (MangledName == nullptr || *MangledName != '\0') { 279 std::free(Demangled.getBuffer()); 280 return nullptr; 281 } 282 } 283 284 // OutputBuffer's internal buffer is not null terminated and therefore we need 285 // to add it to comply with C null terminated strings. 286 if (Demangled.getCurrentPosition() > 0) { 287 Demangled << '\0'; 288 Demangled.setCurrentPosition(Demangled.getCurrentPosition() - 1); 289 return Demangled.getBuffer(); 290 } 291 292 std::free(Demangled.getBuffer()); 293 return nullptr; 294 } 295