xref: /llvm-project/llvm/lib/Demangle/RustDemangle.cpp (revision 78e949159d105b7947dbae973080ea343e8f9eda)
1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a demangler for Rust v0 mangled symbols as specified in
10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Demangle/RustDemangle.h"
15 #include "llvm/Demangle/Demangle.h"
16 
17 #include <algorithm>
18 #include <cassert>
19 #include <cstring>
20 #include <limits>
21 
22 using namespace llvm;
23 using namespace rust_demangle;
24 
25 char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N,
26                          int *Status) {
27   if (MangledName == nullptr || (Buf != nullptr && N == nullptr)) {
28     if (Status != nullptr)
29       *Status = demangle_invalid_args;
30     return nullptr;
31   }
32 
33   // Return early if mangled name doesn't look like a Rust symbol.
34   StringView Mangled(MangledName);
35   if (!Mangled.startsWith("_R")) {
36     if (Status != nullptr)
37       *Status = demangle_invalid_mangled_name;
38     return nullptr;
39   }
40 
41   Demangler D;
42   if (!initializeOutputStream(nullptr, nullptr, D.Output, 1024)) {
43     if (Status != nullptr)
44       *Status = demangle_memory_alloc_failure;
45     return nullptr;
46   }
47 
48   if (!D.demangle(Mangled)) {
49     if (Status != nullptr)
50       *Status = demangle_invalid_mangled_name;
51     std::free(D.Output.getBuffer());
52     return nullptr;
53   }
54 
55   D.Output += '\0';
56   char *Demangled = D.Output.getBuffer();
57   size_t DemangledLen = D.Output.getCurrentPosition();
58 
59   if (Buf != nullptr) {
60     if (DemangledLen <= *N) {
61       std::memcpy(Buf, Demangled, DemangledLen);
62       std::free(Demangled);
63       Demangled = Buf;
64     } else {
65       std::free(Buf);
66     }
67   }
68 
69   if (N != nullptr)
70     *N = DemangledLen;
71 
72   if (Status != nullptr)
73     *Status = demangle_success;
74 
75   return Demangled;
76 }
77 
78 Demangler::Demangler(size_t MaxRecursionLevel)
79     : MaxRecursionLevel(MaxRecursionLevel) {}
80 
81 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
82 
83 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
84 
85 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
86 
87 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
88 static inline bool isValid(const char C) {
89   return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
90 }
91 
92 // Demangles Rust v0 mangled symbol. Returns true when successful, and false
93 // otherwise. The demangled symbol is stored in Output field. It is
94 // responsibility of the caller to free the memory behind the output stream.
95 //
96 // <symbol-name> = "_R" <path> [<instantiating-crate>]
97 bool Demangler::demangle(StringView Mangled) {
98   Position = 0;
99   Error = false;
100   RecursionLevel = 0;
101 
102   if (!Mangled.consumeFront("_R")) {
103     Error = true;
104     return false;
105   }
106   Input = Mangled;
107 
108   demanglePath();
109 
110   // FIXME parse optional <instantiating-crate>.
111 
112   if (Position != Input.size())
113     Error = true;
114 
115   return !Error;
116 }
117 
118 // <path> = "C" <identifier>               // crate root
119 //        | "M" <impl-path> <type>         // <T> (inherent impl)
120 //        | "X" <impl-path> <type> <path>  // <T as Trait> (trait impl)
121 //        | "Y" <type> <path>              // <T as Trait> (trait definition)
122 //        | "N" <ns> <path> <identifier>   // ...::ident (nested path)
123 //        | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
124 //        | <backref>
125 // <identifier> = [<disambiguator>] <undisambiguated-identifier>
126 // <ns> = "C"      // closure
127 //      | "S"      // shim
128 //      | <A-Z>    // other special namespaces
129 //      | <a-z>    // internal namespaces
130 void Demangler::demanglePath() {
131   if (Error || RecursionLevel >= MaxRecursionLevel) {
132     Error = true;
133     return;
134   }
135   SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
136 
137   switch (consume()) {
138   case 'C': {
139     parseOptionalBase62Number('s');
140     Identifier Ident = parseIdentifier();
141     print(Ident.Name);
142     break;
143   }
144   case 'N': {
145     char NS = consume();
146     if (!isLower(NS) && !isUpper(NS)) {
147       Error = true;
148       break;
149     }
150     demanglePath();
151 
152     uint64_t Disambiguator = parseOptionalBase62Number('s');
153     Identifier Ident = parseIdentifier();
154 
155     if (isUpper(NS)) {
156       // Special namespaces
157       print("::{");
158       if (NS == 'C')
159         print("closure");
160       else if (NS == 'S')
161         print("shim");
162       else
163         print(NS);
164       if (!Ident.empty()) {
165         print(":");
166         print(Ident.Name);
167       }
168       print('#');
169       printDecimalNumber(Disambiguator);
170       print('}');
171     } else {
172       // Implementation internal namespaces.
173       if (!Ident.empty()) {
174         print("::");
175         print(Ident.Name);
176       }
177     }
178     break;
179   }
180   default:
181     // FIXME parse remaining productions.
182     Error = true;
183     break;
184   }
185 }
186 
187 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
188 Identifier Demangler::parseIdentifier() {
189   bool Punycode = consumeIf('u');
190   uint64_t Bytes = parseDecimalNumber();
191 
192   // Underscore resolves the ambiguity when identifier starts with a decimal
193   // digit or another underscore.
194   consumeIf('_');
195 
196   if (Error || Bytes > Input.size() - Position) {
197     Error = true;
198     return {};
199   }
200   StringView S = Input.substr(Position, Bytes);
201   Position += Bytes;
202 
203   if (!std::all_of(S.begin(), S.end(), isValid)) {
204     Error = true;
205     return {};
206   }
207 
208   return {S, Punycode};
209 }
210 
211 // Parses optional base 62 number. The presence of a number is determined using
212 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise.
213 uint64_t Demangler::parseOptionalBase62Number(char Tag) {
214   if (!consumeIf(Tag))
215     return 0;
216 
217   uint64_t N = parseBase62Number();
218   if (Error || !addAssign(N, 1))
219     return 0;
220 
221   return N;
222 }
223 
224 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
225 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
226 // "1_" encodes 2, etc.
227 //
228 // <base-62-number> = {<0-9a-zA-Z>} "_"
229 uint64_t Demangler::parseBase62Number() {
230   if (consumeIf('_'))
231     return 0;
232 
233   uint64_t Value = 0;
234 
235   while (true) {
236     uint64_t Digit;
237     char C = consume();
238 
239     if (C == '_') {
240       break;
241     } else if (isDigit(C)) {
242       Digit = C - '0';
243     } else if (isLower(C)) {
244       Digit = 10 + (C - 'a');
245     } else if (isUpper(C)) {
246       Digit = 10 + 26 + (C - 'A');
247     } else {
248       Error = true;
249       return 0;
250     }
251 
252     if (!mulAssign(Value, 62))
253       return 0;
254 
255     if (!addAssign(Value, Digit))
256       return 0;
257   }
258 
259   if (!addAssign(Value, 1))
260     return 0;
261 
262   return Value;
263 }
264 
265 // Parses a decimal number that had been encoded without any leading zeros.
266 //
267 // <decimal-number> = "0"
268 //                  | <1-9> {<0-9>}
269 uint64_t Demangler::parseDecimalNumber() {
270   char C = look();
271   if (!isDigit(C)) {
272     Error = true;
273     return 0;
274   }
275 
276   if (C == '0') {
277     consume();
278     return 0;
279   }
280 
281   uint64_t Value = 0;
282 
283   while (isDigit(look())) {
284     if (!mulAssign(Value, 10)) {
285       Error = true;
286       return 0;
287     }
288 
289     uint64_t D = consume() - '0';
290     if (!addAssign(Value, D))
291       return 0;
292   }
293 
294   return Value;
295 }
296