xref: /netbsd-src/external/gpl3/gcc/dist/libiberty/rust-demangle.c (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1 /* Demangler for the Rust programming language
2    Copyright (C) 2016-2022 Free Software Foundation, Inc.
3    Written by David Tolnay (dtolnay@gmail.com).
4    Rewritten by Eduard-Mihai Burtescu (eddyb@lyken.rs) for v0 support.
5 
6 This file is part of the libiberty library.
7 Libiberty is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Library General Public
9 License as published by the Free Software Foundation; either
10 version 2 of the License, or (at your option) any later version.
11 
12 In addition to the permissions in the GNU Library General Public
13 License, the Free Software Foundation gives you unlimited permission
14 to link the compiled version of this file into combinations with other
15 programs, and to distribute those combinations without any restriction
16 coming from the use of this file.  (The Library Public License
17 restrictions do apply in other respects; for example, they cover
18 modification of the file, and distribution when not linked into a
19 combined executable.)
20 
21 Libiberty is distributed in the hope that it will be useful,
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24 Library General Public License for more details.
25 
26 You should have received a copy of the GNU Library General Public
27 License along with libiberty; see the file COPYING.LIB.
28 If not, see <http://www.gnu.org/licenses/>.  */
29 
30 
31 #ifdef HAVE_CONFIG_H
32 #include "config.h"
33 #endif
34 
35 #include "safe-ctype.h"
36 
37 #include <inttypes.h>
38 #include <sys/types.h>
39 #include <string.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 
43 #ifdef HAVE_STRING_H
44 #include <string.h>
45 #else
46 extern size_t strlen(const char *s);
47 extern int strncmp(const char *s1, const char *s2, size_t n);
48 extern void *memset(void *s, int c, size_t n);
49 #endif
50 
51 #include <demangle.h>
52 #include "libiberty.h"
53 
54 struct rust_demangler
55 {
56   const char *sym;
57   size_t sym_len;
58 
59   void *callback_opaque;
60   demangle_callbackref callback;
61 
62   /* Position of the next character to read from the symbol. */
63   size_t next;
64 
65   /* Non-zero if any error occurred. */
66   int errored;
67 
68   /* Non-zero if nothing should be printed. */
69   int skipping_printing;
70 
71   /* Non-zero if printing should be verbose (e.g. include hashes). */
72   int verbose;
73 
74   /* Rust mangling version, with legacy mangling being -1. */
75   int version;
76 
77   /* Recursion depth.  */
78   unsigned int recursion;
79   /* Maximum number of times demangle_path may be called recursively.  */
80 #define RUST_MAX_RECURSION_COUNT  1024
81 #define RUST_NO_RECURSION_LIMIT   ((unsigned int) -1)
82 
83   uint64_t bound_lifetime_depth;
84 };
85 
86 /* Parsing functions. */
87 
88 static char
peek(const struct rust_demangler * rdm)89 peek (const struct rust_demangler *rdm)
90 {
91   if (rdm->next < rdm->sym_len)
92     return rdm->sym[rdm->next];
93   return 0;
94 }
95 
96 static int
eat(struct rust_demangler * rdm,char c)97 eat (struct rust_demangler *rdm, char c)
98 {
99   if (peek (rdm) == c)
100     {
101       rdm->next++;
102       return 1;
103     }
104   else
105     return 0;
106 }
107 
108 static char
next(struct rust_demangler * rdm)109 next (struct rust_demangler *rdm)
110 {
111   char c = peek (rdm);
112   if (!c)
113     rdm->errored = 1;
114   else
115     rdm->next++;
116   return c;
117 }
118 
119 static uint64_t
parse_integer_62(struct rust_demangler * rdm)120 parse_integer_62 (struct rust_demangler *rdm)
121 {
122   char c;
123   uint64_t x;
124 
125   if (eat (rdm, '_'))
126     return 0;
127 
128   x = 0;
129   while (!eat (rdm, '_'))
130     {
131       c = next (rdm);
132       x *= 62;
133       if (ISDIGIT (c))
134         x += c - '0';
135       else if (ISLOWER (c))
136         x += 10 + (c - 'a');
137       else if (ISUPPER (c))
138         x += 10 + 26 + (c - 'A');
139       else
140         {
141           rdm->errored = 1;
142           return 0;
143         }
144     }
145   return x + 1;
146 }
147 
148 static uint64_t
parse_opt_integer_62(struct rust_demangler * rdm,char tag)149 parse_opt_integer_62 (struct rust_demangler *rdm, char tag)
150 {
151   if (!eat (rdm, tag))
152     return 0;
153   return 1 + parse_integer_62 (rdm);
154 }
155 
156 static uint64_t
parse_disambiguator(struct rust_demangler * rdm)157 parse_disambiguator (struct rust_demangler *rdm)
158 {
159   return parse_opt_integer_62 (rdm, 's');
160 }
161 
162 static size_t
parse_hex_nibbles(struct rust_demangler * rdm,uint64_t * value)163 parse_hex_nibbles (struct rust_demangler *rdm, uint64_t *value)
164 {
165   char c;
166   size_t hex_len;
167 
168   hex_len = 0;
169   *value = 0;
170 
171   while (!eat (rdm, '_'))
172     {
173       *value <<= 4;
174 
175       c = next (rdm);
176       if (ISDIGIT (c))
177         *value |= c - '0';
178       else if (c >= 'a' && c <= 'f')
179         *value |= 10 + (c - 'a');
180       else
181         {
182           rdm->errored = 1;
183           return 0;
184         }
185       hex_len++;
186     }
187 
188   return hex_len;
189 }
190 
191 struct rust_mangled_ident
192 {
193   /* ASCII part of the identifier. */
194   const char *ascii;
195   size_t ascii_len;
196 
197   /* Punycode insertion codes for Unicode codepoints, if any. */
198   const char *punycode;
199   size_t punycode_len;
200 };
201 
202 static struct rust_mangled_ident
parse_ident(struct rust_demangler * rdm)203 parse_ident (struct rust_demangler *rdm)
204 {
205   char c;
206   size_t start, len;
207   int is_punycode = 0;
208   struct rust_mangled_ident ident;
209 
210   ident.ascii = NULL;
211   ident.ascii_len = 0;
212   ident.punycode = NULL;
213   ident.punycode_len = 0;
214 
215   if (rdm->version != -1)
216     is_punycode = eat (rdm, 'u');
217 
218   c = next (rdm);
219   if (!ISDIGIT (c))
220     {
221       rdm->errored = 1;
222       return ident;
223     }
224   len = c - '0';
225 
226   if (c != '0')
227     while (ISDIGIT (peek (rdm)))
228       len = len * 10 + (next (rdm) - '0');
229 
230   /* Skip past the optional `_` separator (v0). */
231   if (rdm->version != -1)
232     eat (rdm, '_');
233 
234   start = rdm->next;
235   rdm->next += len;
236   /* Check for overflows. */
237   if ((start > rdm->next) || (rdm->next > rdm->sym_len))
238     {
239       rdm->errored = 1;
240       return ident;
241     }
242 
243   ident.ascii = rdm->sym + start;
244   ident.ascii_len = len;
245 
246   if (is_punycode)
247     {
248       ident.punycode_len = 0;
249       while (ident.ascii_len > 0)
250         {
251           ident.ascii_len--;
252 
253           /* The last '_' is a separator between ascii & punycode. */
254           if (ident.ascii[ident.ascii_len] == '_')
255             break;
256 
257           ident.punycode_len++;
258         }
259       if (!ident.punycode_len)
260         {
261           rdm->errored = 1;
262           return ident;
263         }
264       ident.punycode = ident.ascii + (len - ident.punycode_len);
265     }
266 
267   if (ident.ascii_len == 0)
268     ident.ascii = NULL;
269 
270   return ident;
271 }
272 
273 /* Printing functions. */
274 
275 static void
print_str(struct rust_demangler * rdm,const char * data,size_t len)276 print_str (struct rust_demangler *rdm, const char *data, size_t len)
277 {
278   if (!rdm->errored && !rdm->skipping_printing)
279     rdm->callback (data, len, rdm->callback_opaque);
280 }
281 
282 #define PRINT(s) print_str (rdm, s, strlen (s))
283 
284 static void
print_uint64(struct rust_demangler * rdm,uint64_t x)285 print_uint64 (struct rust_demangler *rdm, uint64_t x)
286 {
287   char s[21];
288   snprintf (s, 21, "%" PRIu64, x);
289   PRINT (s);
290 }
291 
292 static void
print_uint64_hex(struct rust_demangler * rdm,uint64_t x)293 print_uint64_hex (struct rust_demangler *rdm, uint64_t x)
294 {
295   char s[17];
296   snprintf (s, 17, "%" PRIx64, x);
297   PRINT (s);
298 }
299 
300 /* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */
301 static int
decode_lower_hex_nibble(char nibble)302 decode_lower_hex_nibble (char nibble)
303 {
304   if ('0' <= nibble && nibble <= '9')
305     return nibble - '0';
306   if ('a' <= nibble && nibble <= 'f')
307     return 0xa + (nibble - 'a');
308   return -1;
309 }
310 
311 /* Return the unescaped character for a "$...$" escape, or 0 if invalid. */
312 static char
decode_legacy_escape(const char * e,size_t len,size_t * out_len)313 decode_legacy_escape (const char *e, size_t len, size_t *out_len)
314 {
315   char c = 0;
316   size_t escape_len = 0;
317   int lo_nibble = -1, hi_nibble = -1;
318 
319   if (len < 3 || e[0] != '$')
320     return 0;
321 
322   e++;
323   len--;
324 
325   if (e[0] == 'C')
326     {
327       escape_len = 1;
328 
329       c = ',';
330     }
331   else if (len > 2)
332     {
333       escape_len = 2;
334 
335       if (e[0] == 'S' && e[1] == 'P')
336         c = '@';
337       else if (e[0] == 'B' && e[1] == 'P')
338         c = '*';
339       else if (e[0] == 'R' && e[1] == 'F')
340         c = '&';
341       else if (e[0] == 'L' && e[1] == 'T')
342         c = '<';
343       else if (e[0] == 'G' && e[1] == 'T')
344         c = '>';
345       else if (e[0] == 'L' && e[1] == 'P')
346         c = '(';
347       else if (e[0] == 'R' && e[1] == 'P')
348         c = ')';
349       else if (e[0] == 'u' && len > 3)
350         {
351           escape_len = 3;
352 
353           hi_nibble = decode_lower_hex_nibble (e[1]);
354           if (hi_nibble < 0)
355             return 0;
356           lo_nibble = decode_lower_hex_nibble (e[2]);
357           if (lo_nibble < 0)
358             return 0;
359 
360           /* Only allow non-control ASCII characters. */
361           if (hi_nibble > 7)
362             return 0;
363           c = (hi_nibble << 4) | lo_nibble;
364           if (c < 0x20)
365             return 0;
366         }
367     }
368 
369   if (!c || len <= escape_len || e[escape_len] != '$')
370     return 0;
371 
372   *out_len = 2 + escape_len;
373   return c;
374 }
375 
376 static void
print_ident(struct rust_demangler * rdm,struct rust_mangled_ident ident)377 print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident)
378 {
379   char unescaped;
380   uint8_t *out, *p, d;
381   size_t len, cap, punycode_pos, j;
382   /* Punycode parameters and state. */
383   uint32_t c;
384   size_t base, t_min, t_max, skew, damp, bias, i;
385   size_t delta, w, k, t;
386 
387   if (rdm->errored || rdm->skipping_printing)
388     return;
389 
390   if (rdm->version == -1)
391     {
392       /* Ignore leading underscores preceding escape sequences.
393          The mangler inserts an underscore to make sure the
394          identifier begins with a XID_Start character. */
395       if (ident.ascii_len >= 2 && ident.ascii[0] == '_'
396           && ident.ascii[1] == '$')
397         {
398           ident.ascii++;
399           ident.ascii_len--;
400         }
401 
402       while (ident.ascii_len > 0)
403         {
404           /* Handle legacy escape sequences ("$...$", ".." or "."). */
405           if (ident.ascii[0] == '$')
406             {
407               unescaped
408                   = decode_legacy_escape (ident.ascii, ident.ascii_len, &len);
409               if (unescaped)
410                 print_str (rdm, &unescaped, 1);
411               else
412                 {
413                   /* Unexpected escape sequence, print the rest verbatim. */
414                   print_str (rdm, ident.ascii, ident.ascii_len);
415                   return;
416                 }
417             }
418           else if (ident.ascii[0] == '.')
419             {
420               if (ident.ascii_len >= 2 && ident.ascii[1] == '.')
421                 {
422                   /* ".." becomes "::" */
423                   PRINT ("::");
424                   len = 2;
425                 }
426               else
427                 {
428                   PRINT (".");
429                   len = 1;
430                 }
431             }
432           else
433             {
434               /* Print everything before the next escape sequence, at once. */
435               for (len = 0; len < ident.ascii_len; len++)
436                 if (ident.ascii[len] == '$' || ident.ascii[len] == '.')
437                   break;
438 
439               print_str (rdm, ident.ascii, len);
440             }
441 
442           ident.ascii += len;
443           ident.ascii_len -= len;
444         }
445 
446       return;
447     }
448 
449   if (!ident.punycode)
450     {
451       print_str (rdm, ident.ascii, ident.ascii_len);
452       return;
453     }
454 
455   len = 0;
456   cap = 4;
457   while (cap < ident.ascii_len)
458     {
459       cap *= 2;
460       /* Check for overflows. */
461       if ((cap * 4) / 4 != cap)
462         {
463           rdm->errored = 1;
464           return;
465         }
466     }
467 
468   /* Store the output codepoints as groups of 4 UTF-8 bytes. */
469   out = (uint8_t *)malloc (cap * 4);
470   if (!out)
471     {
472       rdm->errored = 1;
473       return;
474     }
475 
476   /* Populate initial output from ASCII fragment. */
477   for (len = 0; len < ident.ascii_len; len++)
478     {
479       p = out + 4 * len;
480       p[0] = 0;
481       p[1] = 0;
482       p[2] = 0;
483       p[3] = ident.ascii[len];
484     }
485 
486   /* Punycode parameters and initial state. */
487   base = 36;
488   t_min = 1;
489   t_max = 26;
490   skew = 38;
491   damp = 700;
492   bias = 72;
493   i = 0;
494   c = 0x80;
495 
496   punycode_pos = 0;
497   while (punycode_pos < ident.punycode_len)
498     {
499       /* Read one delta value. */
500       delta = 0;
501       w = 1;
502       k = 0;
503       do
504         {
505           k += base;
506           t = k < bias ? 0 : (k - bias);
507           if (t < t_min)
508             t = t_min;
509           if (t > t_max)
510             t = t_max;
511 
512           if (punycode_pos >= ident.punycode_len)
513             goto cleanup;
514           d = ident.punycode[punycode_pos++];
515 
516           if (ISLOWER (d))
517             d = d - 'a';
518           else if (ISDIGIT (d))
519             d = 26 + (d - '0');
520           else
521             {
522               rdm->errored = 1;
523               goto cleanup;
524             }
525 
526           delta += d * w;
527           w *= base - t;
528         }
529       while (d >= t);
530 
531       /* Compute the new insert position and character. */
532       len++;
533       i += delta;
534       c += i / len;
535       i %= len;
536 
537       /* Ensure enough space is available. */
538       if (cap < len)
539         {
540           cap *= 2;
541           /* Check for overflows. */
542           if ((cap * 4) / 4 != cap || cap < len)
543             {
544               rdm->errored = 1;
545               goto cleanup;
546             }
547         }
548       p = (uint8_t *)realloc (out, cap * 4);
549       if (!p)
550         {
551           rdm->errored = 1;
552           goto cleanup;
553         }
554       out = p;
555 
556       /* Move the characters after the insert position. */
557       p = out + i * 4;
558       memmove (p + 4, p, (len - i - 1) * 4);
559 
560       /* Insert the new character, as UTF-8 bytes. */
561       p[0] = c >= 0x10000 ? 0xf0 | (c >> 18) : 0;
562       p[1] = c >= 0x800 ? (c < 0x10000 ? 0xe0 : 0x80) | ((c >> 12) & 0x3f) : 0;
563       p[2] = (c < 0x800 ? 0xc0 : 0x80) | ((c >> 6) & 0x3f);
564       p[3] = 0x80 | (c & 0x3f);
565 
566       /* If there are no more deltas, decoding is complete. */
567       if (punycode_pos == ident.punycode_len)
568         break;
569 
570       i++;
571 
572       /* Perform bias adaptation. */
573       delta /= damp;
574       damp = 2;
575 
576       delta += delta / len;
577       k = 0;
578       while (delta > ((base - t_min) * t_max) / 2)
579         {
580           delta /= base - t_min;
581           k += base;
582         }
583       bias = k + ((base - t_min + 1) * delta) / (delta + skew);
584     }
585 
586   /* Remove all the 0 bytes to leave behind an UTF-8 string. */
587   for (i = 0, j = 0; i < len * 4; i++)
588     if (out[i] != 0)
589       out[j++] = out[i];
590 
591   print_str (rdm, (const char *)out, j);
592 
593 cleanup:
594   free (out);
595 }
596 
597 /* Print the lifetime according to the previously decoded index.
598    An index of `0` always refers to `'_`, but starting with `1`,
599    indices refer to late-bound lifetimes introduced by a binder. */
600 static void
print_lifetime_from_index(struct rust_demangler * rdm,uint64_t lt)601 print_lifetime_from_index (struct rust_demangler *rdm, uint64_t lt)
602 {
603   char c;
604   uint64_t depth;
605 
606   PRINT ("'");
607   if (lt == 0)
608     {
609       PRINT ("_");
610       return;
611     }
612 
613   depth = rdm->bound_lifetime_depth - lt;
614   /* Try to print lifetimes alphabetically first. */
615   if (depth < 26)
616     {
617       c = 'a' + depth;
618       print_str (rdm, &c, 1);
619     }
620   else
621     {
622       /* Use `'_123` after running out of letters. */
623       PRINT ("_");
624       print_uint64 (rdm, depth);
625     }
626 }
627 
628 /* Demangling functions. */
629 
630 static void demangle_binder (struct rust_demangler *rdm);
631 static void demangle_path (struct rust_demangler *rdm, int in_value);
632 static void demangle_generic_arg (struct rust_demangler *rdm);
633 static void demangle_type (struct rust_demangler *rdm);
634 static int demangle_path_maybe_open_generics (struct rust_demangler *rdm);
635 static void demangle_dyn_trait (struct rust_demangler *rdm);
636 static void demangle_const (struct rust_demangler *rdm);
637 static void demangle_const_uint (struct rust_demangler *rdm);
638 static void demangle_const_int (struct rust_demangler *rdm);
639 static void demangle_const_bool (struct rust_demangler *rdm);
640 static void demangle_const_char (struct rust_demangler *rdm);
641 
642 /* Optionally enter a binder ('G') for late-bound lifetimes,
643    printing e.g. `for<'a, 'b> `, and make those lifetimes visible
644    to the caller (via depth level, which the caller should reset). */
645 static void
demangle_binder(struct rust_demangler * rdm)646 demangle_binder (struct rust_demangler *rdm)
647 {
648   uint64_t i, bound_lifetimes;
649 
650   if (rdm->errored)
651     return;
652 
653   bound_lifetimes = parse_opt_integer_62 (rdm, 'G');
654   if (bound_lifetimes > 0)
655     {
656       PRINT ("for<");
657       for (i = 0; i < bound_lifetimes; i++)
658         {
659           if (i > 0)
660             PRINT (", ");
661           rdm->bound_lifetime_depth++;
662           print_lifetime_from_index (rdm, 1);
663         }
664       PRINT ("> ");
665     }
666 }
667 
668 static void
demangle_path(struct rust_demangler * rdm,int in_value)669 demangle_path (struct rust_demangler *rdm, int in_value)
670 {
671   char tag, ns;
672   int was_skipping_printing;
673   size_t i, backref, old_next;
674   uint64_t dis;
675   struct rust_mangled_ident name;
676 
677   if (rdm->errored)
678     return;
679 
680   if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
681     {
682       ++ rdm->recursion;
683       if (rdm->recursion > RUST_MAX_RECURSION_COUNT)
684 	/* FIXME: There ought to be a way to report
685 	   that the recursion limit has been reached.  */
686 	goto fail_return;
687     }
688 
689   switch (tag = next (rdm))
690     {
691     case 'C':
692       dis = parse_disambiguator (rdm);
693       name = parse_ident (rdm);
694 
695       print_ident (rdm, name);
696       if (rdm->verbose)
697         {
698           PRINT ("[");
699           print_uint64_hex (rdm, dis);
700           PRINT ("]");
701         }
702       break;
703     case 'N':
704       ns = next (rdm);
705       if (!ISLOWER (ns) && !ISUPPER (ns))
706 	goto fail_return;
707 
708       demangle_path (rdm, in_value);
709 
710       dis = parse_disambiguator (rdm);
711       name = parse_ident (rdm);
712 
713       if (ISUPPER (ns))
714         {
715           /* Special namespaces, like closures and shims. */
716           PRINT ("::{");
717           switch (ns)
718             {
719             case 'C':
720               PRINT ("closure");
721               break;
722             case 'S':
723               PRINT ("shim");
724               break;
725             default:
726               print_str (rdm, &ns, 1);
727             }
728           if (name.ascii || name.punycode)
729             {
730               PRINT (":");
731               print_ident (rdm, name);
732             }
733           PRINT ("#");
734           print_uint64 (rdm, dis);
735           PRINT ("}");
736         }
737       else
738         {
739           /* Implementation-specific/unspecified namespaces. */
740 
741           if (name.ascii || name.punycode)
742             {
743               PRINT ("::");
744               print_ident (rdm, name);
745             }
746         }
747       break;
748     case 'M':
749     case 'X':
750       /* Ignore the `impl`'s own path.*/
751       parse_disambiguator (rdm);
752       was_skipping_printing = rdm->skipping_printing;
753       rdm->skipping_printing = 1;
754       demangle_path (rdm, in_value);
755       rdm->skipping_printing = was_skipping_printing;
756       /* fallthrough */
757     case 'Y':
758       PRINT ("<");
759       demangle_type (rdm);
760       if (tag != 'M')
761         {
762           PRINT (" as ");
763           demangle_path (rdm, 0);
764         }
765       PRINT (">");
766       break;
767     case 'I':
768       demangle_path (rdm, in_value);
769       if (in_value)
770         PRINT ("::");
771       PRINT ("<");
772       for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
773         {
774           if (i > 0)
775             PRINT (", ");
776           demangle_generic_arg (rdm);
777         }
778       PRINT (">");
779       break;
780     case 'B':
781       backref = parse_integer_62 (rdm);
782       if (!rdm->skipping_printing)
783         {
784           old_next = rdm->next;
785           rdm->next = backref;
786           demangle_path (rdm, in_value);
787           rdm->next = old_next;
788         }
789       break;
790     default:
791       goto fail_return;
792     }
793   goto pass_return;
794 
795  fail_return:
796   rdm->errored = 1;
797  pass_return:
798   if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
799     -- rdm->recursion;
800 }
801 
802 static void
demangle_generic_arg(struct rust_demangler * rdm)803 demangle_generic_arg (struct rust_demangler *rdm)
804 {
805   uint64_t lt;
806   if (eat (rdm, 'L'))
807     {
808       lt = parse_integer_62 (rdm);
809       print_lifetime_from_index (rdm, lt);
810     }
811   else if (eat (rdm, 'K'))
812     demangle_const (rdm);
813   else
814     demangle_type (rdm);
815 }
816 
817 static const char *
basic_type(char tag)818 basic_type (char tag)
819 {
820   switch (tag)
821     {
822     case 'b':
823       return "bool";
824     case 'c':
825       return "char";
826     case 'e':
827       return "str";
828     case 'u':
829       return "()";
830     case 'a':
831       return "i8";
832     case 's':
833       return "i16";
834     case 'l':
835       return "i32";
836     case 'x':
837       return "i64";
838     case 'n':
839       return "i128";
840     case 'i':
841       return "isize";
842     case 'h':
843       return "u8";
844     case 't':
845       return "u16";
846     case 'm':
847       return "u32";
848     case 'y':
849       return "u64";
850     case 'o':
851       return "u128";
852     case 'j':
853       return "usize";
854     case 'f':
855       return "f32";
856     case 'd':
857       return "f64";
858     case 'z':
859       return "!";
860     case 'p':
861       return "_";
862     case 'v':
863       return "...";
864 
865     default:
866       return NULL;
867     }
868 }
869 
870 static void
demangle_type(struct rust_demangler * rdm)871 demangle_type (struct rust_demangler *rdm)
872 {
873   char tag;
874   size_t i, old_next, backref;
875   uint64_t lt, old_bound_lifetime_depth;
876   const char *basic;
877   struct rust_mangled_ident abi;
878 
879   if (rdm->errored)
880     return;
881 
882   tag = next (rdm);
883 
884   basic = basic_type (tag);
885   if (basic)
886     {
887       PRINT (basic);
888       return;
889     }
890 
891    if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
892     {
893       ++ rdm->recursion;
894       if (rdm->recursion > RUST_MAX_RECURSION_COUNT)
895 	/* FIXME: There ought to be a way to report
896 	   that the recursion limit has been reached.  */
897 	{
898 	  rdm->errored = 1;
899 	  -- rdm->recursion;
900 	  return;
901 	}
902     }
903 
904   switch (tag)
905     {
906     case 'R':
907     case 'Q':
908       PRINT ("&");
909       if (eat (rdm, 'L'))
910         {
911           lt = parse_integer_62 (rdm);
912           if (lt)
913             {
914               print_lifetime_from_index (rdm, lt);
915               PRINT (" ");
916             }
917         }
918       if (tag != 'R')
919         PRINT ("mut ");
920       demangle_type (rdm);
921       break;
922     case 'P':
923     case 'O':
924       PRINT ("*");
925       if (tag != 'P')
926         PRINT ("mut ");
927       else
928         PRINT ("const ");
929       demangle_type (rdm);
930       break;
931     case 'A':
932     case 'S':
933       PRINT ("[");
934       demangle_type (rdm);
935       if (tag == 'A')
936         {
937           PRINT ("; ");
938           demangle_const (rdm);
939         }
940       PRINT ("]");
941       break;
942     case 'T':
943       PRINT ("(");
944       for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
945         {
946           if (i > 0)
947             PRINT (", ");
948           demangle_type (rdm);
949         }
950       if (i == 1)
951         PRINT (",");
952       PRINT (")");
953       break;
954     case 'F':
955       old_bound_lifetime_depth = rdm->bound_lifetime_depth;
956       demangle_binder (rdm);
957 
958       if (eat (rdm, 'U'))
959         PRINT ("unsafe ");
960 
961       if (eat (rdm, 'K'))
962         {
963           if (eat (rdm, 'C'))
964             {
965               abi.ascii = "C";
966               abi.ascii_len = 1;
967             }
968           else
969             {
970               abi = parse_ident (rdm);
971               if (!abi.ascii || abi.punycode)
972                 {
973                   rdm->errored = 1;
974                   goto restore;
975                 }
976             }
977 
978           PRINT ("extern \"");
979 
980           /* If the ABI had any `-`, they were replaced with `_`,
981              so the parts between `_` have to be re-joined with `-`. */
982           for (i = 0; i < abi.ascii_len; i++)
983             {
984               if (abi.ascii[i] == '_')
985                 {
986                   print_str (rdm, abi.ascii, i);
987                   PRINT ("-");
988                   abi.ascii += i + 1;
989                   abi.ascii_len -= i + 1;
990                   i = 0;
991                 }
992             }
993           print_str (rdm, abi.ascii, abi.ascii_len);
994 
995           PRINT ("\" ");
996         }
997 
998       PRINT ("fn(");
999       for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
1000         {
1001           if (i > 0)
1002             PRINT (", ");
1003           demangle_type (rdm);
1004         }
1005       PRINT (")");
1006 
1007       if (eat (rdm, 'u'))
1008         {
1009           /* Skip printing the return type if it's 'u', i.e. `()`. */
1010         }
1011       else
1012         {
1013           PRINT (" -> ");
1014           demangle_type (rdm);
1015         }
1016 
1017     /* Restore `bound_lifetime_depth` to outside the binder. */
1018     restore:
1019       rdm->bound_lifetime_depth = old_bound_lifetime_depth;
1020       break;
1021     case 'D':
1022       PRINT ("dyn ");
1023 
1024       old_bound_lifetime_depth = rdm->bound_lifetime_depth;
1025       demangle_binder (rdm);
1026 
1027       for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
1028         {
1029           if (i > 0)
1030             PRINT (" + ");
1031           demangle_dyn_trait (rdm);
1032         }
1033 
1034       /* Restore `bound_lifetime_depth` to outside the binder. */
1035       rdm->bound_lifetime_depth = old_bound_lifetime_depth;
1036 
1037       if (!eat (rdm, 'L'))
1038         {
1039           rdm->errored = 1;
1040           return;
1041         }
1042       lt = parse_integer_62 (rdm);
1043       if (lt)
1044         {
1045           PRINT (" + ");
1046           print_lifetime_from_index (rdm, lt);
1047         }
1048       break;
1049     case 'B':
1050       backref = parse_integer_62 (rdm);
1051       if (!rdm->skipping_printing)
1052         {
1053           old_next = rdm->next;
1054           rdm->next = backref;
1055           demangle_type (rdm);
1056           rdm->next = old_next;
1057         }
1058       break;
1059     default:
1060       /* Go back to the tag, so `demangle_path` also sees it. */
1061       rdm->next--;
1062       demangle_path (rdm, 0);
1063     }
1064 
1065   if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
1066     -- rdm->recursion;
1067 }
1068 
1069 /* A trait in a trait object may have some "existential projections"
1070    (i.e. associated type bindings) after it, which should be printed
1071    in the `<...>` of the trait, e.g. `dyn Trait<T, U, Assoc=X>`.
1072    To this end, this method will keep the `<...>` of an 'I' path
1073    open, by omitting the `>`, and return `Ok(true)` in that case. */
1074 static int
demangle_path_maybe_open_generics(struct rust_demangler * rdm)1075 demangle_path_maybe_open_generics (struct rust_demangler *rdm)
1076 {
1077   int open;
1078   size_t i, old_next, backref;
1079 
1080   open = 0;
1081 
1082   if (rdm->errored)
1083     return open;
1084 
1085   if (eat (rdm, 'B'))
1086     {
1087       backref = parse_integer_62 (rdm);
1088       if (!rdm->skipping_printing)
1089         {
1090           old_next = rdm->next;
1091           rdm->next = backref;
1092           open = demangle_path_maybe_open_generics (rdm);
1093           rdm->next = old_next;
1094         }
1095     }
1096   else if (eat (rdm, 'I'))
1097     {
1098       demangle_path (rdm, 0);
1099       PRINT ("<");
1100       open = 1;
1101       for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
1102         {
1103           if (i > 0)
1104             PRINT (", ");
1105           demangle_generic_arg (rdm);
1106         }
1107     }
1108   else
1109     demangle_path (rdm, 0);
1110   return open;
1111 }
1112 
1113 static void
demangle_dyn_trait(struct rust_demangler * rdm)1114 demangle_dyn_trait (struct rust_demangler *rdm)
1115 {
1116   int open;
1117   struct rust_mangled_ident name;
1118 
1119   if (rdm->errored)
1120     return;
1121 
1122   open = demangle_path_maybe_open_generics (rdm);
1123 
1124   while (eat (rdm, 'p'))
1125     {
1126       if (!open)
1127         PRINT ("<");
1128       else
1129         PRINT (", ");
1130       open = 1;
1131 
1132       name = parse_ident (rdm);
1133       print_ident (rdm, name);
1134       PRINT (" = ");
1135       demangle_type (rdm);
1136     }
1137 
1138   if (open)
1139     PRINT (">");
1140 }
1141 
1142 static void
demangle_const(struct rust_demangler * rdm)1143 demangle_const (struct rust_demangler *rdm)
1144 {
1145   char ty_tag;
1146   size_t old_next, backref;
1147 
1148   if (rdm->errored)
1149     return;
1150 
1151   if (eat (rdm, 'B'))
1152     {
1153       backref = parse_integer_62 (rdm);
1154       if (!rdm->skipping_printing)
1155         {
1156           old_next = rdm->next;
1157           rdm->next = backref;
1158           demangle_const (rdm);
1159           rdm->next = old_next;
1160         }
1161       return;
1162     }
1163 
1164   ty_tag = next (rdm);
1165   switch (ty_tag)
1166     {
1167     /* Placeholder. */
1168     case 'p':
1169       PRINT ("_");
1170       return;
1171 
1172     /* Unsigned integer types. */
1173     case 'h':
1174     case 't':
1175     case 'm':
1176     case 'y':
1177     case 'o':
1178     case 'j':
1179       demangle_const_uint (rdm);
1180       break;
1181 
1182     /* Signed integer types. */
1183     case 'a':
1184     case 's':
1185     case 'l':
1186     case 'x':
1187     case 'n':
1188     case 'i':
1189       demangle_const_int (rdm);
1190       break;
1191 
1192     /* Boolean. */
1193     case 'b':
1194       demangle_const_bool (rdm);
1195       break;
1196 
1197     /* Character. */
1198     case 'c':
1199       demangle_const_char (rdm);
1200       break;
1201 
1202     default:
1203       rdm->errored = 1;
1204       return;
1205     }
1206 
1207   if (rdm->errored)
1208     return;
1209 
1210   if (rdm->verbose)
1211     {
1212       PRINT (": ");
1213       PRINT (basic_type (ty_tag));
1214     }
1215 }
1216 
1217 static void
demangle_const_uint(struct rust_demangler * rdm)1218 demangle_const_uint (struct rust_demangler *rdm)
1219 {
1220   size_t hex_len;
1221   uint64_t value;
1222 
1223   if (rdm->errored)
1224     return;
1225 
1226   hex_len = parse_hex_nibbles (rdm, &value);
1227 
1228   if (hex_len > 16)
1229     {
1230       /* Print anything that doesn't fit in `uint64_t` verbatim. */
1231       PRINT ("0x");
1232       print_str (rdm, rdm->sym + (rdm->next - hex_len), hex_len);
1233     }
1234   else if (hex_len > 0)
1235     print_uint64 (rdm, value);
1236   else
1237     rdm->errored = 1;
1238 }
1239 
1240 static void
demangle_const_int(struct rust_demangler * rdm)1241 demangle_const_int (struct rust_demangler *rdm)
1242 {
1243   if (eat (rdm, 'n'))
1244     PRINT ("-");
1245   demangle_const_uint (rdm);
1246 }
1247 
1248 static void
demangle_const_bool(struct rust_demangler * rdm)1249 demangle_const_bool (struct rust_demangler *rdm)
1250 {
1251   uint64_t value;
1252 
1253   if (parse_hex_nibbles (rdm, &value) != 1)
1254     {
1255       rdm->errored = 1;
1256       return;
1257     }
1258 
1259   if (value == 0)
1260     PRINT ("false");
1261   else if (value == 1)
1262     PRINT ("true");
1263   else
1264     rdm->errored = 1;
1265 }
1266 
1267 static void
demangle_const_char(struct rust_demangler * rdm)1268 demangle_const_char (struct rust_demangler *rdm)
1269 {
1270   size_t hex_len;
1271   uint64_t value;
1272 
1273   hex_len = parse_hex_nibbles (rdm, &value);
1274 
1275   if (hex_len == 0 || hex_len > 8)
1276     {
1277       rdm->errored = 1;
1278       return;
1279     }
1280 
1281   /* Match Rust's character "debug" output as best as we can. */
1282   PRINT ("'");
1283   if (value == '\t')
1284     PRINT ("\\t");
1285   else if (value == '\r')
1286     PRINT ("\\r");
1287   else if (value == '\n')
1288     PRINT ("\\n");
1289   else if (value > ' ' && value < '~')
1290     {
1291       /* Rust also considers many non-ASCII codepoints to be printable, but
1292 	 that logic is not easily ported to C. */
1293       char c = value;
1294       print_str (rdm, &c, 1);
1295     }
1296   else
1297     {
1298       PRINT ("\\u{");
1299       print_uint64_hex (rdm, value);
1300       PRINT ("}");
1301     }
1302   PRINT ("'");
1303 }
1304 
1305 /* A legacy hash is the prefix "h" followed by 16 lowercase hex digits.
1306    The hex digits must contain at least 5 distinct digits. */
1307 static int
is_legacy_prefixed_hash(struct rust_mangled_ident ident)1308 is_legacy_prefixed_hash (struct rust_mangled_ident ident)
1309 {
1310   uint16_t seen;
1311   int nibble;
1312   size_t i, count;
1313 
1314   if (ident.ascii_len != 17 || ident.ascii[0] != 'h')
1315     return 0;
1316 
1317   seen = 0;
1318   for (i = 0; i < 16; i++)
1319     {
1320       nibble = decode_lower_hex_nibble (ident.ascii[1 + i]);
1321       if (nibble < 0)
1322         return 0;
1323       seen |= (uint16_t)1 << nibble;
1324     }
1325 
1326   /* Count how many distinct digits were seen. */
1327   count = 0;
1328   while (seen)
1329     {
1330       if (seen & 1)
1331         count++;
1332       seen >>= 1;
1333     }
1334 
1335   return count >= 5;
1336 }
1337 
1338 int
rust_demangle_callback(const char * mangled,int options,demangle_callbackref callback,void * opaque)1339 rust_demangle_callback (const char *mangled, int options,
1340                         demangle_callbackref callback, void *opaque)
1341 {
1342   const char *p;
1343   struct rust_demangler rdm;
1344   struct rust_mangled_ident ident;
1345 
1346   rdm.sym = mangled;
1347   rdm.sym_len = 0;
1348 
1349   rdm.callback_opaque = opaque;
1350   rdm.callback = callback;
1351 
1352   rdm.next = 0;
1353   rdm.errored = 0;
1354   rdm.skipping_printing = 0;
1355   rdm.verbose = (options & DMGL_VERBOSE) != 0;
1356   rdm.version = 0;
1357   rdm.recursion = (options & DMGL_NO_RECURSE_LIMIT) ? RUST_NO_RECURSION_LIMIT : 0;
1358   rdm.bound_lifetime_depth = 0;
1359 
1360   /* Rust symbols always start with _R (v0) or _ZN (legacy). */
1361   if (rdm.sym[0] == '_' && rdm.sym[1] == 'R')
1362     rdm.sym += 2;
1363   else if (rdm.sym[0] == '_' && rdm.sym[1] == 'Z' && rdm.sym[2] == 'N')
1364     {
1365       rdm.sym += 3;
1366       rdm.version = -1;
1367     }
1368   else
1369     return 0;
1370 
1371   /* Paths (v0) always start with uppercase characters. */
1372   if (rdm.version != -1 && !ISUPPER (rdm.sym[0]))
1373     return 0;
1374 
1375   /* Rust symbols (v0) use only [_0-9a-zA-Z] characters. */
1376   for (p = rdm.sym; *p; p++)
1377     {
1378       /* Rust v0 symbols can have '.' suffixes, ignore those.  */
1379       if (rdm.version == 0 && *p == '.')
1380         break;
1381 
1382       rdm.sym_len++;
1383 
1384       if (*p == '_' || ISALNUM (*p))
1385         continue;
1386 
1387       /* Legacy Rust symbols can also contain [.:$] characters.
1388          Or @ in the .suffix (which will be skipped, see below). */
1389       if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':'
1390                                 || *p == '@'))
1391         continue;
1392 
1393       return 0;
1394     }
1395 
1396   /* Legacy Rust symbols need to be handled separately. */
1397   if (rdm.version == -1)
1398     {
1399       /* Legacy Rust symbols always end with E.  But can be followed by a
1400          .suffix (which we want to ignore).  */
1401       int dot_suffix = 1;
1402       while (rdm.sym_len > 0 &&
1403              !(dot_suffix && rdm.sym[rdm.sym_len - 1] == 'E'))
1404         {
1405           dot_suffix = rdm.sym[rdm.sym_len - 1] == '.';
1406           rdm.sym_len--;
1407         }
1408 
1409       if (!(rdm.sym_len > 0 && rdm.sym[rdm.sym_len - 1] == 'E'))
1410         return 0;
1411       rdm.sym_len--;
1412 
1413       /* Legacy Rust symbols also always end with a path segment
1414          that encodes a 16 hex digit hash, i.e. '17h[a-f0-9]{16}'.
1415          This early check, before any parse_ident calls, should
1416          quickly filter out most C++ symbols unrelated to Rust. */
1417       if (!(rdm.sym_len > 19
1418             && !memcmp (&rdm.sym[rdm.sym_len - 19], "17h", 3)))
1419         return 0;
1420 
1421       do
1422         {
1423           ident = parse_ident (&rdm);
1424           if (rdm.errored || !ident.ascii)
1425             return 0;
1426         }
1427       while (rdm.next < rdm.sym_len);
1428 
1429       /* The last path segment should be the hash. */
1430       if (!is_legacy_prefixed_hash (ident))
1431         return 0;
1432 
1433       /* Reset the state for a second pass, to print the symbol. */
1434       rdm.next = 0;
1435       if (!rdm.verbose && rdm.sym_len > 19)
1436         {
1437           /* Hide the last segment, containing the hash, if not verbose. */
1438           rdm.sym_len -= 19;
1439         }
1440 
1441       do
1442         {
1443           if (rdm.next > 0)
1444             print_str (&rdm, "::", 2);
1445 
1446           ident = parse_ident (&rdm);
1447           print_ident (&rdm, ident);
1448         }
1449       while (rdm.next < rdm.sym_len);
1450     }
1451   else
1452     {
1453       demangle_path (&rdm, 1);
1454 
1455       /* Skip instantiating crate. */
1456       if (!rdm.errored && rdm.next < rdm.sym_len)
1457         {
1458           rdm.skipping_printing = 1;
1459           demangle_path (&rdm, 0);
1460         }
1461 
1462       /* It's an error to not reach the end. */
1463       rdm.errored |= rdm.next != rdm.sym_len;
1464     }
1465 
1466   return !rdm.errored;
1467 }
1468 
1469 /* Growable string buffers. */
1470 struct str_buf
1471 {
1472   char *ptr;
1473   size_t len;
1474   size_t cap;
1475   int errored;
1476 };
1477 
1478 static void
str_buf_reserve(struct str_buf * buf,size_t extra)1479 str_buf_reserve (struct str_buf *buf, size_t extra)
1480 {
1481   size_t available, min_new_cap, new_cap;
1482   char *new_ptr;
1483 
1484   /* Allocation failed before. */
1485   if (buf->errored)
1486     return;
1487 
1488   available = buf->cap - buf->len;
1489 
1490   if (extra <= available)
1491     return;
1492 
1493   min_new_cap = buf->cap + (extra - available);
1494 
1495   /* Check for overflows. */
1496   if (min_new_cap < buf->cap)
1497     {
1498       buf->errored = 1;
1499       return;
1500     }
1501 
1502   new_cap = buf->cap;
1503 
1504   if (new_cap == 0)
1505     new_cap = 4;
1506 
1507   /* Double capacity until sufficiently large. */
1508   while (new_cap < min_new_cap)
1509     {
1510       new_cap *= 2;
1511 
1512       /* Check for overflows. */
1513       if (new_cap < buf->cap)
1514         {
1515           buf->errored = 1;
1516           return;
1517         }
1518     }
1519 
1520   new_ptr = (char *)realloc (buf->ptr, new_cap);
1521   if (new_ptr == NULL)
1522     {
1523       free (buf->ptr);
1524       buf->ptr = NULL;
1525       buf->len = 0;
1526       buf->cap = 0;
1527       buf->errored = 1;
1528     }
1529   else
1530     {
1531       buf->ptr = new_ptr;
1532       buf->cap = new_cap;
1533     }
1534 }
1535 
1536 static void
str_buf_append(struct str_buf * buf,const char * data,size_t len)1537 str_buf_append (struct str_buf *buf, const char *data, size_t len)
1538 {
1539   str_buf_reserve (buf, len);
1540   if (buf->errored)
1541     return;
1542 
1543   memcpy (buf->ptr + buf->len, data, len);
1544   buf->len += len;
1545 }
1546 
1547 static void
str_buf_demangle_callback(const char * data,size_t len,void * opaque)1548 str_buf_demangle_callback (const char *data, size_t len, void *opaque)
1549 {
1550   str_buf_append ((struct str_buf *)opaque, data, len);
1551 }
1552 
1553 char *
rust_demangle(const char * mangled,int options)1554 rust_demangle (const char *mangled, int options)
1555 {
1556   struct str_buf out;
1557   int success;
1558 
1559   out.ptr = NULL;
1560   out.len = 0;
1561   out.cap = 0;
1562   out.errored = 0;
1563 
1564   success = rust_demangle_callback (mangled, options,
1565                                     str_buf_demangle_callback, &out);
1566 
1567   if (!success)
1568     {
1569       free (out.ptr);
1570       return NULL;
1571     }
1572 
1573   str_buf_append (&out, "\0", 1);
1574   return out.ptr;
1575 }
1576