xref: /freebsd-src/contrib/llvm-project/llvm/lib/Demangle/DLangDemangle.cpp (revision 4824e7fd18a1223177218d4aec1b3c6c5c4a444e)
1 //===--- DLangDemangle.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file defines a demangler for the D programming language as specified
11 /// in the ABI specification, available at:
12 /// https://dlang.org/spec/abi.html#name_mangling
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Demangle/Demangle.h"
17 #include "llvm/Demangle/StringView.h"
18 #include "llvm/Demangle/Utility.h"
19 
20 #include <cctype>
21 #include <cstring>
22 #include <limits>
23 
24 using namespace llvm;
25 using llvm::itanium_demangle::OutputBuffer;
26 using llvm::itanium_demangle::StringView;
27 
28 namespace {
29 
30 /// Demangle information structure.
31 struct Demangler {
32   /// Initialize the information structure we use to pass around information.
33   ///
34   /// \param Mangled String to demangle.
35   Demangler(const char *Mangled);
36 
37   /// Extract and demangle the mangled symbol and append it to the output
38   /// string.
39   ///
40   /// \param Demangled Output buffer to write the demangled name.
41   ///
42   /// \return The remaining string on success or nullptr on failure.
43   ///
44   /// \see https://dlang.org/spec/abi.html#name_mangling .
45   /// \see https://dlang.org/spec/abi.html#MangledName .
46   const char *parseMangle(OutputBuffer *Demangled);
47 
48 private:
49   /// Extract and demangle a given mangled symbol and append it to the output
50   /// string.
51   ///
52   /// \param Demangled output buffer to write the demangled name.
53   /// \param Mangled mangled symbol to be demangled.
54   ///
55   /// \return The remaining string on success or nullptr on failure.
56   ///
57   /// \see https://dlang.org/spec/abi.html#name_mangling .
58   /// \see https://dlang.org/spec/abi.html#MangledName .
59   const char *parseMangle(OutputBuffer *Demangled, const char *Mangled);
60 
61   /// Extract the number from a given string.
62   ///
63   /// \param Mangled string to extract the number.
64   /// \param Ret assigned result value.
65   ///
66   /// \return The remaining string on success or nullptr on failure.
67   ///
68   /// \note A result larger than UINT_MAX is considered a failure.
69   ///
70   /// \see https://dlang.org/spec/abi.html#Number .
71   const char *decodeNumber(const char *Mangled, unsigned long *Ret);
72 
73   /// Check whether it is the beginning of a symbol name.
74   ///
75   /// \param Mangled string to extract the symbol name.
76   ///
77   /// \return true on success, false otherwise.
78   ///
79   /// \see https://dlang.org/spec/abi.html#SymbolName .
80   bool isSymbolName(const char *Mangled);
81 
82   /// Extract and demangle an identifier from a given mangled symbol append it
83   /// to the output string.
84   ///
85   /// \param Demangled Output buffer to write the demangled name.
86   /// \param Mangled Mangled symbol to be demangled.
87   ///
88   /// \return The remaining string on success or nullptr on failure.
89   ///
90   /// \see https://dlang.org/spec/abi.html#SymbolName .
91   const char *parseIdentifier(OutputBuffer *Demangled, const char *Mangled);
92 
93   /// Extract and demangle the plain identifier from a given mangled symbol and
94   /// prepend/append it to the output string, with a special treatment for some
95   /// magic compiler generated symbols.
96   ///
97   /// \param Demangled Output buffer to write the demangled name.
98   /// \param Mangled Mangled symbol to be demangled.
99   /// \param Len Length of the mangled symbol name.
100   ///
101   /// \return The remaining string on success or nullptr on failure.
102   ///
103   /// \see https://dlang.org/spec/abi.html#LName .
104   const char *parseLName(OutputBuffer *Demangled, const char *Mangled,
105                          unsigned long Len);
106 
107   /// Extract and demangle the qualified symbol from a given mangled symbol
108   /// append it to the output string.
109   ///
110   /// \param Demangled Output buffer to write the demangled name.
111   /// \param Mangled Mangled symbol to be demangled.
112   ///
113   /// \return The remaining string on success or nullptr on failure.
114   ///
115   /// \see https://dlang.org/spec/abi.html#QualifiedName .
116   const char *parseQualified(OutputBuffer *Demangled, const char *Mangled);
117 
118   /// The string we are demangling.
119   const char *Str;
120 };
121 
122 } // namespace
123 
124 const char *Demangler::decodeNumber(const char *Mangled, unsigned long *Ret) {
125   // Return nullptr if trying to extract something that isn't a digit.
126   if (Mangled == nullptr || !std::isdigit(*Mangled))
127     return nullptr;
128 
129   unsigned long Val = 0;
130 
131   do {
132     unsigned long Digit = Mangled[0] - '0';
133 
134     // Check for overflow.
135     if (Val > (std::numeric_limits<unsigned int>::max() - Digit) / 10)
136       return nullptr;
137 
138     Val = Val * 10 + Digit;
139     ++Mangled;
140   } while (std::isdigit(*Mangled));
141 
142   if (*Mangled == '\0')
143     return nullptr;
144 
145   *Ret = Val;
146   return Mangled;
147 }
148 
149 bool Demangler::isSymbolName(const char *Mangled) {
150   if (std::isdigit(*Mangled))
151     return true;
152 
153   // TODO: Handle symbol back references and template instances.
154   return false;
155 }
156 
157 const char *Demangler::parseMangle(OutputBuffer *Demangled,
158                                    const char *Mangled) {
159   // A D mangled symbol is comprised of both scope and type information.
160   //    MangleName:
161   //        _D QualifiedName Type
162   //        _D QualifiedName Z
163   //        ^
164   // The caller should have guaranteed that the start pointer is at the
165   // above location.
166   // Note that type is never a function type, but only the return type of
167   // a function or the type of a variable.
168   Mangled += 2;
169 
170   Mangled = parseQualified(Demangled, Mangled);
171 
172   if (Mangled != nullptr) {
173     // Artificial symbols end with 'Z' and have no type.
174     if (*Mangled == 'Z')
175       ++Mangled;
176     else {
177       // TODO: Implement symbols with types.
178       return nullptr;
179     }
180   }
181 
182   return Mangled;
183 }
184 
185 const char *Demangler::parseQualified(OutputBuffer *Demangled,
186                                       const char *Mangled) {
187   // Qualified names are identifiers separated by their encoded length.
188   // Nested functions also encode their argument types without specifying
189   // what they return.
190   //    QualifiedName:
191   //        SymbolFunctionName
192   //        SymbolFunctionName QualifiedName
193   //        ^
194   //    SymbolFunctionName:
195   //        SymbolName
196   //        SymbolName TypeFunctionNoReturn
197   //        SymbolName M TypeFunctionNoReturn
198   //        SymbolName M TypeModifiers TypeFunctionNoReturn
199   // The start pointer should be at the above location.
200 
201   // Whether it has more than one symbol
202   size_t NotFirst = false;
203   do {
204     // Skip over anonymous symbols.
205     if (*Mangled == '0') {
206       do
207         ++Mangled;
208       while (*Mangled == '0');
209 
210       continue;
211     }
212 
213     if (NotFirst)
214       *Demangled << '.';
215     NotFirst = true;
216 
217     Mangled = parseIdentifier(Demangled, Mangled);
218 
219   } while (Mangled && isSymbolName(Mangled));
220 
221   return Mangled;
222 }
223 
224 const char *Demangler::parseIdentifier(OutputBuffer *Demangled,
225                                        const char *Mangled) {
226   unsigned long Len;
227 
228   if (Mangled == nullptr || *Mangled == '\0')
229     return nullptr;
230 
231   // TODO: Parse back references and lengthless template instances.
232 
233   const char *Endptr = decodeNumber(Mangled, &Len);
234 
235   if (Endptr == nullptr || Len == 0)
236     return nullptr;
237 
238   if (strlen(Endptr) < Len)
239     return nullptr;
240 
241   Mangled = Endptr;
242 
243   // TODO: Parse template instances with a length prefix.
244 
245   return parseLName(Demangled, Mangled, Len);
246 }
247 
248 const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled,
249                                   unsigned long Len) {
250   *Demangled << StringView(Mangled, Len);
251   Mangled += Len;
252 
253   return Mangled;
254 }
255 
256 Demangler::Demangler(const char *Mangled) : Str(Mangled) {}
257 
258 const char *Demangler::parseMangle(OutputBuffer *Demangled) {
259   return parseMangle(Demangled, this->Str);
260 }
261 
262 char *llvm::dlangDemangle(const char *MangledName) {
263   if (MangledName == nullptr || strncmp(MangledName, "_D", 2) != 0)
264     return nullptr;
265 
266   OutputBuffer Demangled;
267   if (!initializeOutputBuffer(nullptr, nullptr, Demangled, 1024))
268     return nullptr;
269 
270   if (strcmp(MangledName, "_Dmain") == 0) {
271     Demangled << "D main";
272   } else {
273 
274     Demangler D = Demangler(MangledName);
275     MangledName = D.parseMangle(&Demangled);
276 
277     // Check that the entire symbol was successfully demangled.
278     if (MangledName == nullptr || *MangledName != '\0') {
279       std::free(Demangled.getBuffer());
280       return nullptr;
281     }
282   }
283 
284   // OutputBuffer's internal buffer is not null terminated and therefore we need
285   // to add it to comply with C null terminated strings.
286   if (Demangled.getCurrentPosition() > 0) {
287     Demangled << '\0';
288     Demangled.setCurrentPosition(Demangled.getCurrentPosition() - 1);
289     return Demangled.getBuffer();
290   }
291 
292   std::free(Demangled.getBuffer());
293   return nullptr;
294 }
295