xref: /freebsd-src/contrib/llvm-project/llvm/lib/Demangle/DLangDemangle.cpp (revision 0eae32dcef82f6f06de6419a0d623d7def0cc8f6)
1 //===--- DLangDemangle.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file defines a demangler for the D programming language as specified
11 /// in the ABI specification, available at:
12 /// https://dlang.org/spec/abi.html#name_mangling
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Demangle/Demangle.h"
17 #include "llvm/Demangle/StringView.h"
18 #include "llvm/Demangle/Utility.h"
19 
20 #include <cctype>
21 #include <cstring>
22 #include <limits>
23 
24 using namespace llvm;
25 using llvm::itanium_demangle::OutputBuffer;
26 using llvm::itanium_demangle::StringView;
27 
28 namespace {
29 
30 /// Demangle information structure.
31 struct Demangler {
32   /// Initialize the information structure we use to pass around information.
33   ///
34   /// \param Mangled String to demangle.
35   Demangler(const char *Mangled);
36 
37   /// Extract and demangle the mangled symbol and append it to the output
38   /// string.
39   ///
40   /// \param Demangled Output buffer to write the demangled name.
41   ///
42   /// \return The remaining string on success or nullptr on failure.
43   ///
44   /// \see https://dlang.org/spec/abi.html#name_mangling .
45   /// \see https://dlang.org/spec/abi.html#MangledName .
46   const char *parseMangle(OutputBuffer *Demangled);
47 
48 private:
49   /// Extract and demangle a given mangled symbol and append it to the output
50   /// string.
51   ///
52   /// \param Demangled output buffer to write the demangled name.
53   /// \param Mangled mangled symbol to be demangled.
54   ///
55   /// \return The remaining string on success or nullptr on failure.
56   ///
57   /// \see https://dlang.org/spec/abi.html#name_mangling .
58   /// \see https://dlang.org/spec/abi.html#MangledName .
59   const char *parseMangle(OutputBuffer *Demangled, const char *Mangled);
60 
61   /// Extract the number from a given string.
62   ///
63   /// \param Mangled string to extract the number.
64   /// \param Ret assigned result value.
65   ///
66   /// \return The remaining string on success or nullptr on failure.
67   ///
68   /// \note A result larger than UINT_MAX is considered a failure.
69   ///
70   /// \see https://dlang.org/spec/abi.html#Number .
71   const char *decodeNumber(const char *Mangled, unsigned long *Ret);
72 
73   /// Check whether it is the beginning of a symbol name.
74   ///
75   /// \param Mangled string to extract the symbol name.
76   ///
77   /// \return true on success, false otherwise.
78   ///
79   /// \see https://dlang.org/spec/abi.html#SymbolName .
80   bool isSymbolName(const char *Mangled);
81 
82   /// Extract and demangle an identifier from a given mangled symbol append it
83   /// to the output string.
84   ///
85   /// \param Demangled Output buffer to write the demangled name.
86   /// \param Mangled Mangled symbol to be demangled.
87   ///
88   /// \return The remaining string on success or nullptr on failure.
89   ///
90   /// \see https://dlang.org/spec/abi.html#SymbolName .
91   const char *parseIdentifier(OutputBuffer *Demangled, const char *Mangled);
92 
93   /// Extract and demangle the plain identifier from a given mangled symbol and
94   /// prepend/append it to the output string, with a special treatment for some
95   /// magic compiler generated symbols.
96   ///
97   /// \param Demangled Output buffer to write the demangled name.
98   /// \param Mangled Mangled symbol to be demangled.
99   /// \param Len Length of the mangled symbol name.
100   ///
101   /// \return The remaining string on success or nullptr on failure.
102   ///
103   /// \see https://dlang.org/spec/abi.html#LName .
104   const char *parseLName(OutputBuffer *Demangled, const char *Mangled,
105                          unsigned long Len);
106 
107   /// Extract and demangle the qualified symbol from a given mangled symbol
108   /// append it to the output string.
109   ///
110   /// \param Demangled Output buffer to write the demangled name.
111   /// \param Mangled Mangled symbol to be demangled.
112   ///
113   /// \return The remaining string on success or nullptr on failure.
114   ///
115   /// \see https://dlang.org/spec/abi.html#QualifiedName .
116   const char *parseQualified(OutputBuffer *Demangled, const char *Mangled);
117 
118   /// The string we are demangling.
119   const char *Str;
120 };
121 
122 } // namespace
123 
124 const char *Demangler::decodeNumber(const char *Mangled, unsigned long *Ret) {
125   // Return nullptr if trying to extract something that isn't a digit.
126   if (Mangled == nullptr || !std::isdigit(*Mangled))
127     return nullptr;
128 
129   unsigned long Val = 0;
130 
131   do {
132     unsigned long Digit = Mangled[0] - '0';
133 
134     // Check for overflow.
135     if (Val > (std::numeric_limits<unsigned int>::max() - Digit) / 10)
136       return nullptr;
137 
138     Val = Val * 10 + Digit;
139     ++Mangled;
140   } while (std::isdigit(*Mangled));
141 
142   if (*Mangled == '\0')
143     return nullptr;
144 
145   *Ret = Val;
146   return Mangled;
147 }
148 
149 bool Demangler::isSymbolName(const char *Mangled) {
150   if (std::isdigit(*Mangled))
151     return true;
152 
153   // TODO: Handle symbol back references and template instances.
154   return false;
155 }
156 
157 const char *Demangler::parseMangle(OutputBuffer *Demangled,
158                                    const char *Mangled) {
159   // A D mangled symbol is comprised of both scope and type information.
160   //    MangleName:
161   //        _D QualifiedName Type
162   //        _D QualifiedName Z
163   //        ^
164   // The caller should have guaranteed that the start pointer is at the
165   // above location.
166   // Note that type is never a function type, but only the return type of
167   // a function or the type of a variable.
168   Mangled += 2;
169 
170   Mangled = parseQualified(Demangled, Mangled);
171 
172   if (Mangled != nullptr) {
173     // Artificial symbols end with 'Z' and have no type.
174     if (*Mangled == 'Z')
175       ++Mangled;
176     else {
177       // TODO: Implement symbols with types.
178       return nullptr;
179     }
180   }
181 
182   return Mangled;
183 }
184 
185 const char *Demangler::parseQualified(OutputBuffer *Demangled,
186                                       const char *Mangled) {
187   // Qualified names are identifiers separated by their encoded length.
188   // Nested functions also encode their argument types without specifying
189   // what they return.
190   //    QualifiedName:
191   //        SymbolFunctionName
192   //        SymbolFunctionName QualifiedName
193   //        ^
194   //    SymbolFunctionName:
195   //        SymbolName
196   //        SymbolName TypeFunctionNoReturn
197   //        SymbolName M TypeFunctionNoReturn
198   //        SymbolName M TypeModifiers TypeFunctionNoReturn
199   // The start pointer should be at the above location.
200 
201   // Whether it has more than one symbol
202   size_t NotFirst = false;
203   do {
204     // Skip over anonymous symbols.
205     if (*Mangled == '0') {
206       do
207         ++Mangled;
208       while (*Mangled == '0');
209 
210       continue;
211     }
212 
213     if (NotFirst)
214       *Demangled << '.';
215     NotFirst = true;
216 
217     Mangled = parseIdentifier(Demangled, Mangled);
218 
219   } while (Mangled && isSymbolName(Mangled));
220 
221   return Mangled;
222 }
223 
224 const char *Demangler::parseIdentifier(OutputBuffer *Demangled,
225                                        const char *Mangled) {
226   unsigned long Len;
227 
228   if (Mangled == nullptr || *Mangled == '\0')
229     return nullptr;
230 
231   // TODO: Parse back references and lengthless template instances.
232 
233   const char *Endptr = decodeNumber(Mangled, &Len);
234 
235   if (Endptr == nullptr || Len == 0)
236     return nullptr;
237 
238   if (strlen(Endptr) < Len)
239     return nullptr;
240 
241   Mangled = Endptr;
242 
243   // TODO: Parse template instances with a length prefix.
244 
245   // There can be multiple different declarations in the same function that
246   // have the same mangled name.  To make the mangled names unique, a fake
247   // parent in the form `__Sddd' is added to the symbol.
248   if (Len >= 4 && Mangled[0] == '_' && Mangled[1] == '_' && Mangled[2] == 'S') {
249     const char *NumPtr = Mangled + 3;
250     while (NumPtr < (Mangled + Len) && std::isdigit(*NumPtr))
251       ++NumPtr;
252 
253     if (Mangled + Len == NumPtr) {
254       // Skip over the fake parent.
255       Mangled += Len;
256       return parseIdentifier(Demangled, Mangled);
257     }
258 
259     // Else demangle it as a plain identifier.
260   }
261 
262   return parseLName(Demangled, Mangled, Len);
263 }
264 
265 const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled,
266                                   unsigned long Len) {
267   switch (Len) {
268   case 6:
269     if (strncmp(Mangled, "__initZ", Len + 1) == 0) {
270       // The static initializer for a given symbol.
271       Demangled->prepend("initializer for ");
272       Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
273       Mangled += Len;
274       return Mangled;
275     }
276     if (strncmp(Mangled, "__vtblZ", Len + 1) == 0) {
277       // The vtable symbol for a given class.
278       Demangled->prepend("vtable for ");
279       Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
280       Mangled += Len;
281       return Mangled;
282     }
283     break;
284 
285   case 7:
286     if (strncmp(Mangled, "__ClassZ", Len + 1) == 0) {
287       // The classinfo symbol for a given class.
288       Demangled->prepend("ClassInfo for ");
289       Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
290       Mangled += Len;
291       return Mangled;
292     }
293     break;
294 
295   case 11:
296     if (strncmp(Mangled, "__InterfaceZ", Len + 1) == 0) {
297       // The interface symbol for a given class.
298       Demangled->prepend("Interface for ");
299       Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
300       Mangled += Len;
301       return Mangled;
302     }
303     break;
304 
305   case 12:
306     if (strncmp(Mangled, "__ModuleInfoZ", Len + 1) == 0) {
307       // The ModuleInfo symbol for a given module.
308       Demangled->prepend("ModuleInfo for ");
309       Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
310       Mangled += Len;
311       return Mangled;
312     }
313     break;
314   }
315 
316   *Demangled << StringView(Mangled, Len);
317   Mangled += Len;
318 
319   return Mangled;
320 }
321 
322 Demangler::Demangler(const char *Mangled) : Str(Mangled) {}
323 
324 const char *Demangler::parseMangle(OutputBuffer *Demangled) {
325   return parseMangle(Demangled, this->Str);
326 }
327 
328 char *llvm::dlangDemangle(const char *MangledName) {
329   if (MangledName == nullptr || strncmp(MangledName, "_D", 2) != 0)
330     return nullptr;
331 
332   OutputBuffer Demangled;
333   if (!initializeOutputBuffer(nullptr, nullptr, Demangled, 1024))
334     return nullptr;
335 
336   if (strcmp(MangledName, "_Dmain") == 0) {
337     Demangled << "D main";
338   } else {
339 
340     Demangler D = Demangler(MangledName);
341     MangledName = D.parseMangle(&Demangled);
342 
343     // Check that the entire symbol was successfully demangled.
344     if (MangledName == nullptr || *MangledName != '\0') {
345       std::free(Demangled.getBuffer());
346       return nullptr;
347     }
348   }
349 
350   // OutputBuffer's internal buffer is not null terminated and therefore we need
351   // to add it to comply with C null terminated strings.
352   if (Demangled.getCurrentPosition() > 0) {
353     Demangled << '\0';
354     Demangled.setCurrentPosition(Demangled.getCurrentPosition() - 1);
355     return Demangled.getBuffer();
356   }
357 
358   std::free(Demangled.getBuffer());
359   return nullptr;
360 }
361