1 //===--- DLangDemangle.cpp ------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file defines a demangler for the D programming language as specified 11 /// in the ABI specification, available at: 12 /// https://dlang.org/spec/abi.html#name_mangling 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/Demangle/Demangle.h" 17 #include "llvm/Demangle/StringView.h" 18 #include "llvm/Demangle/Utility.h" 19 20 #include <cctype> 21 #include <cstring> 22 #include <limits> 23 24 using namespace llvm; 25 using llvm::itanium_demangle::OutputBuffer; 26 using llvm::itanium_demangle::StringView; 27 28 namespace { 29 30 /// Demangle information structure. 31 struct Demangler { 32 /// Initialize the information structure we use to pass around information. 33 /// 34 /// \param Mangled String to demangle. 35 Demangler(const char *Mangled); 36 37 /// Extract and demangle the mangled symbol and append it to the output 38 /// string. 39 /// 40 /// \param Demangled Output buffer to write the demangled name. 41 /// 42 /// \return The remaining string on success or nullptr on failure. 43 /// 44 /// \see https://dlang.org/spec/abi.html#name_mangling . 45 /// \see https://dlang.org/spec/abi.html#MangledName . 46 const char *parseMangle(OutputBuffer *Demangled); 47 48 private: 49 /// Extract and demangle a given mangled symbol and append it to the output 50 /// string. 51 /// 52 /// \param Demangled output buffer to write the demangled name. 53 /// \param Mangled mangled symbol to be demangled. 54 /// 55 /// \return The remaining string on success or nullptr on failure. 56 /// 57 /// \see https://dlang.org/spec/abi.html#name_mangling . 58 /// \see https://dlang.org/spec/abi.html#MangledName . 59 const char *parseMangle(OutputBuffer *Demangled, const char *Mangled); 60 61 /// Extract the number from a given string. 62 /// 63 /// \param Mangled string to extract the number. 64 /// \param Ret assigned result value. 65 /// 66 /// \return The remaining string on success or nullptr on failure. 67 /// 68 /// \note A result larger than UINT_MAX is considered a failure. 69 /// 70 /// \see https://dlang.org/spec/abi.html#Number . 71 const char *decodeNumber(const char *Mangled, unsigned long *Ret); 72 73 /// Check whether it is the beginning of a symbol name. 74 /// 75 /// \param Mangled string to extract the symbol name. 76 /// 77 /// \return true on success, false otherwise. 78 /// 79 /// \see https://dlang.org/spec/abi.html#SymbolName . 80 bool isSymbolName(const char *Mangled); 81 82 /// Extract and demangle an identifier from a given mangled symbol append it 83 /// to the output string. 84 /// 85 /// \param Demangled Output buffer to write the demangled name. 86 /// \param Mangled Mangled symbol to be demangled. 87 /// 88 /// \return The remaining string on success or nullptr on failure. 89 /// 90 /// \see https://dlang.org/spec/abi.html#SymbolName . 91 const char *parseIdentifier(OutputBuffer *Demangled, const char *Mangled); 92 93 /// Extract and demangle the plain identifier from a given mangled symbol and 94 /// prepend/append it to the output string, with a special treatment for some 95 /// magic compiler generated symbols. 96 /// 97 /// \param Demangled Output buffer to write the demangled name. 98 /// \param Mangled Mangled symbol to be demangled. 99 /// \param Len Length of the mangled symbol name. 100 /// 101 /// \return The remaining string on success or nullptr on failure. 102 /// 103 /// \see https://dlang.org/spec/abi.html#LName . 104 const char *parseLName(OutputBuffer *Demangled, const char *Mangled, 105 unsigned long Len); 106 107 /// Extract and demangle the qualified symbol from a given mangled symbol 108 /// append it to the output string. 109 /// 110 /// \param Demangled Output buffer to write the demangled name. 111 /// \param Mangled Mangled symbol to be demangled. 112 /// 113 /// \return The remaining string on success or nullptr on failure. 114 /// 115 /// \see https://dlang.org/spec/abi.html#QualifiedName . 116 const char *parseQualified(OutputBuffer *Demangled, const char *Mangled); 117 118 /// The string we are demangling. 119 const char *Str; 120 }; 121 122 } // namespace 123 124 const char *Demangler::decodeNumber(const char *Mangled, unsigned long *Ret) { 125 // Return nullptr if trying to extract something that isn't a digit. 126 if (Mangled == nullptr || !std::isdigit(*Mangled)) 127 return nullptr; 128 129 unsigned long Val = 0; 130 131 do { 132 unsigned long Digit = Mangled[0] - '0'; 133 134 // Check for overflow. 135 if (Val > (std::numeric_limits<unsigned int>::max() - Digit) / 10) 136 return nullptr; 137 138 Val = Val * 10 + Digit; 139 ++Mangled; 140 } while (std::isdigit(*Mangled)); 141 142 if (*Mangled == '\0') 143 return nullptr; 144 145 *Ret = Val; 146 return Mangled; 147 } 148 149 bool Demangler::isSymbolName(const char *Mangled) { 150 if (std::isdigit(*Mangled)) 151 return true; 152 153 // TODO: Handle symbol back references and template instances. 154 return false; 155 } 156 157 const char *Demangler::parseMangle(OutputBuffer *Demangled, 158 const char *Mangled) { 159 // A D mangled symbol is comprised of both scope and type information. 160 // MangleName: 161 // _D QualifiedName Type 162 // _D QualifiedName Z 163 // ^ 164 // The caller should have guaranteed that the start pointer is at the 165 // above location. 166 // Note that type is never a function type, but only the return type of 167 // a function or the type of a variable. 168 Mangled += 2; 169 170 Mangled = parseQualified(Demangled, Mangled); 171 172 if (Mangled != nullptr) { 173 // Artificial symbols end with 'Z' and have no type. 174 if (*Mangled == 'Z') 175 ++Mangled; 176 else { 177 // TODO: Implement symbols with types. 178 return nullptr; 179 } 180 } 181 182 return Mangled; 183 } 184 185 const char *Demangler::parseQualified(OutputBuffer *Demangled, 186 const char *Mangled) { 187 // Qualified names are identifiers separated by their encoded length. 188 // Nested functions also encode their argument types without specifying 189 // what they return. 190 // QualifiedName: 191 // SymbolFunctionName 192 // SymbolFunctionName QualifiedName 193 // ^ 194 // SymbolFunctionName: 195 // SymbolName 196 // SymbolName TypeFunctionNoReturn 197 // SymbolName M TypeFunctionNoReturn 198 // SymbolName M TypeModifiers TypeFunctionNoReturn 199 // The start pointer should be at the above location. 200 201 // Whether it has more than one symbol 202 size_t NotFirst = false; 203 do { 204 // Skip over anonymous symbols. 205 if (*Mangled == '0') { 206 do 207 ++Mangled; 208 while (*Mangled == '0'); 209 210 continue; 211 } 212 213 if (NotFirst) 214 *Demangled << '.'; 215 NotFirst = true; 216 217 Mangled = parseIdentifier(Demangled, Mangled); 218 219 } while (Mangled && isSymbolName(Mangled)); 220 221 return Mangled; 222 } 223 224 const char *Demangler::parseIdentifier(OutputBuffer *Demangled, 225 const char *Mangled) { 226 unsigned long Len; 227 228 if (Mangled == nullptr || *Mangled == '\0') 229 return nullptr; 230 231 // TODO: Parse back references and lengthless template instances. 232 233 const char *Endptr = decodeNumber(Mangled, &Len); 234 235 if (Endptr == nullptr || Len == 0) 236 return nullptr; 237 238 if (strlen(Endptr) < Len) 239 return nullptr; 240 241 Mangled = Endptr; 242 243 // TODO: Parse template instances with a length prefix. 244 245 // There can be multiple different declarations in the same function that 246 // have the same mangled name. To make the mangled names unique, a fake 247 // parent in the form `__Sddd' is added to the symbol. 248 if (Len >= 4 && Mangled[0] == '_' && Mangled[1] == '_' && Mangled[2] == 'S') { 249 const char *NumPtr = Mangled + 3; 250 while (NumPtr < (Mangled + Len) && std::isdigit(*NumPtr)) 251 ++NumPtr; 252 253 if (Mangled + Len == NumPtr) { 254 // Skip over the fake parent. 255 Mangled += Len; 256 return parseIdentifier(Demangled, Mangled); 257 } 258 259 // Else demangle it as a plain identifier. 260 } 261 262 return parseLName(Demangled, Mangled, Len); 263 } 264 265 const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled, 266 unsigned long Len) { 267 switch (Len) { 268 case 6: 269 if (strncmp(Mangled, "__initZ", Len + 1) == 0) { 270 // The static initializer for a given symbol. 271 Demangled->prepend("initializer for "); 272 Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); 273 Mangled += Len; 274 return Mangled; 275 } 276 if (strncmp(Mangled, "__vtblZ", Len + 1) == 0) { 277 // The vtable symbol for a given class. 278 Demangled->prepend("vtable for "); 279 Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); 280 Mangled += Len; 281 return Mangled; 282 } 283 break; 284 285 case 7: 286 if (strncmp(Mangled, "__ClassZ", Len + 1) == 0) { 287 // The classinfo symbol for a given class. 288 Demangled->prepend("ClassInfo for "); 289 Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); 290 Mangled += Len; 291 return Mangled; 292 } 293 break; 294 295 case 11: 296 if (strncmp(Mangled, "__InterfaceZ", Len + 1) == 0) { 297 // The interface symbol for a given class. 298 Demangled->prepend("Interface for "); 299 Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); 300 Mangled += Len; 301 return Mangled; 302 } 303 break; 304 305 case 12: 306 if (strncmp(Mangled, "__ModuleInfoZ", Len + 1) == 0) { 307 // The ModuleInfo symbol for a given module. 308 Demangled->prepend("ModuleInfo for "); 309 Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); 310 Mangled += Len; 311 return Mangled; 312 } 313 break; 314 } 315 316 *Demangled << StringView(Mangled, Len); 317 Mangled += Len; 318 319 return Mangled; 320 } 321 322 Demangler::Demangler(const char *Mangled) : Str(Mangled) {} 323 324 const char *Demangler::parseMangle(OutputBuffer *Demangled) { 325 return parseMangle(Demangled, this->Str); 326 } 327 328 char *llvm::dlangDemangle(const char *MangledName) { 329 if (MangledName == nullptr || strncmp(MangledName, "_D", 2) != 0) 330 return nullptr; 331 332 OutputBuffer Demangled; 333 if (!initializeOutputBuffer(nullptr, nullptr, Demangled, 1024)) 334 return nullptr; 335 336 if (strcmp(MangledName, "_Dmain") == 0) { 337 Demangled << "D main"; 338 } else { 339 340 Demangler D = Demangler(MangledName); 341 MangledName = D.parseMangle(&Demangled); 342 343 // Check that the entire symbol was successfully demangled. 344 if (MangledName == nullptr || *MangledName != '\0') { 345 std::free(Demangled.getBuffer()); 346 return nullptr; 347 } 348 } 349 350 // OutputBuffer's internal buffer is not null terminated and therefore we need 351 // to add it to comply with C null terminated strings. 352 if (Demangled.getCurrentPosition() > 0) { 353 Demangled << '\0'; 354 Demangled.setCurrentPosition(Demangled.getCurrentPosition() - 1); 355 return Demangled.getBuffer(); 356 } 357 358 std::free(Demangled.getBuffer()); 359 return nullptr; 360 } 361