xref: /netbsd-src/external/gpl3/binutils.old/dist/libiberty/rust-demangle.c (revision c42dbd0ed2e61fe6eda8590caa852ccf34719964)
1 /* Demangler for the Rust programming language
2    Copyright (C) 2016-2022 Free Software Foundation, Inc.
3    Written by David Tolnay (dtolnay@gmail.com).
4    Rewritten by Eduard-Mihai Burtescu (eddyb@lyken.rs) for v0 support.
5 
6 This file is part of the libiberty library.
7 Libiberty is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Library General Public
9 License as published by the Free Software Foundation; either
10 version 2 of the License, or (at your option) any later version.
11 
12 In addition to the permissions in the GNU Library General Public
13 License, the Free Software Foundation gives you unlimited permission
14 to link the compiled version of this file into combinations with other
15 programs, and to distribute those combinations without any restriction
16 coming from the use of this file.  (The Library Public License
17 restrictions do apply in other respects; for example, they cover
18 modification of the file, and distribution when not linked into a
19 combined executable.)
20 
21 Libiberty is distributed in the hope that it will be useful,
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24 Library General Public License for more details.
25 
26 You should have received a copy of the GNU Library General Public
27 License along with libiberty; see the file COPYING.LIB.
28 If not, see <http://www.gnu.org/licenses/>.  */
29 
30 
31 #ifdef HAVE_CONFIG_H
32 #include "config.h"
33 #endif
34 
35 #include "safe-ctype.h"
36 
37 #include <inttypes.h>
38 #include <sys/types.h>
39 #include <string.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 
43 #ifdef HAVE_STRING_H
44 #include <string.h>
45 #else
46 extern size_t strlen(const char *s);
47 extern int strncmp(const char *s1, const char *s2, size_t n);
48 extern void *memset(void *s, int c, size_t n);
49 #endif
50 
51 #include <demangle.h>
52 #include "libiberty.h"
53 
54 struct rust_demangler
55 {
56   const char *sym;
57   size_t sym_len;
58 
59   void *callback_opaque;
60   demangle_callbackref callback;
61 
62   /* Position of the next character to read from the symbol. */
63   size_t next;
64 
65   /* Non-zero if any error occurred. */
66   int errored;
67 
68   /* Non-zero if nothing should be printed. */
69   int skipping_printing;
70 
71   /* Non-zero if printing should be verbose (e.g. include hashes). */
72   int verbose;
73 
74   /* Rust mangling version, with legacy mangling being -1. */
75   int version;
76 
77   /* Recursion depth.  */
78   unsigned int recursion;
79   /* Maximum number of times demangle_path may be called recursively.  */
80 #define RUST_MAX_RECURSION_COUNT  1024
81 #define RUST_NO_RECURSION_LIMIT   ((unsigned int) -1)
82 
83   uint64_t bound_lifetime_depth;
84 };
85 
86 /* Parsing functions. */
87 
88 static char
peek(const struct rust_demangler * rdm)89 peek (const struct rust_demangler *rdm)
90 {
91   if (rdm->next < rdm->sym_len)
92     return rdm->sym[rdm->next];
93   return 0;
94 }
95 
96 static int
eat(struct rust_demangler * rdm,char c)97 eat (struct rust_demangler *rdm, char c)
98 {
99   if (peek (rdm) == c)
100     {
101       rdm->next++;
102       return 1;
103     }
104   else
105     return 0;
106 }
107 
108 static char
next(struct rust_demangler * rdm)109 next (struct rust_demangler *rdm)
110 {
111   char c = peek (rdm);
112   if (!c)
113     rdm->errored = 1;
114   else
115     rdm->next++;
116   return c;
117 }
118 
119 static uint64_t
parse_integer_62(struct rust_demangler * rdm)120 parse_integer_62 (struct rust_demangler *rdm)
121 {
122   char c;
123   uint64_t x;
124 
125   if (eat (rdm, '_'))
126     return 0;
127 
128   x = 0;
129   while (!eat (rdm, '_') && !rdm->errored)
130     {
131       c = next (rdm);
132       x *= 62;
133       if (ISDIGIT (c))
134         x += c - '0';
135       else if (ISLOWER (c))
136         x += 10 + (c - 'a');
137       else if (ISUPPER (c))
138         x += 10 + 26 + (c - 'A');
139       else
140         {
141           rdm->errored = 1;
142           return 0;
143         }
144     }
145   return x + 1;
146 }
147 
148 static uint64_t
parse_opt_integer_62(struct rust_demangler * rdm,char tag)149 parse_opt_integer_62 (struct rust_demangler *rdm, char tag)
150 {
151   if (!eat (rdm, tag))
152     return 0;
153   return 1 + parse_integer_62 (rdm);
154 }
155 
156 static uint64_t
parse_disambiguator(struct rust_demangler * rdm)157 parse_disambiguator (struct rust_demangler *rdm)
158 {
159   return parse_opt_integer_62 (rdm, 's');
160 }
161 
162 static size_t
parse_hex_nibbles(struct rust_demangler * rdm,uint64_t * value)163 parse_hex_nibbles (struct rust_demangler *rdm, uint64_t *value)
164 {
165   char c;
166   size_t hex_len;
167 
168   hex_len = 0;
169   *value = 0;
170 
171   while (!eat (rdm, '_'))
172     {
173       *value <<= 4;
174 
175       c = next (rdm);
176       if (ISDIGIT (c))
177         *value |= c - '0';
178       else if (c >= 'a' && c <= 'f')
179         *value |= 10 + (c - 'a');
180       else
181         {
182           rdm->errored = 1;
183           return 0;
184         }
185       hex_len++;
186     }
187 
188   return hex_len;
189 }
190 
191 struct rust_mangled_ident
192 {
193   /* ASCII part of the identifier. */
194   const char *ascii;
195   size_t ascii_len;
196 
197   /* Punycode insertion codes for Unicode codepoints, if any. */
198   const char *punycode;
199   size_t punycode_len;
200 };
201 
202 static struct rust_mangled_ident
parse_ident(struct rust_demangler * rdm)203 parse_ident (struct rust_demangler *rdm)
204 {
205   char c;
206   size_t start, len;
207   int is_punycode = 0;
208   struct rust_mangled_ident ident;
209 
210   ident.ascii = NULL;
211   ident.ascii_len = 0;
212   ident.punycode = NULL;
213   ident.punycode_len = 0;
214 
215   if (rdm->version != -1)
216     is_punycode = eat (rdm, 'u');
217 
218   c = next (rdm);
219   if (!ISDIGIT (c))
220     {
221       rdm->errored = 1;
222       return ident;
223     }
224   len = c - '0';
225 
226   if (c != '0')
227     while (ISDIGIT (peek (rdm)))
228       len = len * 10 + (next (rdm) - '0');
229 
230   /* Skip past the optional `_` separator (v0). */
231   if (rdm->version != -1)
232     eat (rdm, '_');
233 
234   start = rdm->next;
235   rdm->next += len;
236   /* Check for overflows. */
237   if ((start > rdm->next) || (rdm->next > rdm->sym_len))
238     {
239       rdm->errored = 1;
240       return ident;
241     }
242 
243   ident.ascii = rdm->sym + start;
244   ident.ascii_len = len;
245 
246   if (is_punycode)
247     {
248       ident.punycode_len = 0;
249       while (ident.ascii_len > 0)
250         {
251           ident.ascii_len--;
252 
253           /* The last '_' is a separator between ascii & punycode. */
254           if (ident.ascii[ident.ascii_len] == '_')
255             break;
256 
257           ident.punycode_len++;
258         }
259       if (!ident.punycode_len)
260         {
261           rdm->errored = 1;
262           return ident;
263         }
264       ident.punycode = ident.ascii + (len - ident.punycode_len);
265     }
266 
267   if (ident.ascii_len == 0)
268     ident.ascii = NULL;
269 
270   return ident;
271 }
272 
273 /* Printing functions. */
274 
275 static void
print_str(struct rust_demangler * rdm,const char * data,size_t len)276 print_str (struct rust_demangler *rdm, const char *data, size_t len)
277 {
278   if (!rdm->errored && !rdm->skipping_printing)
279     rdm->callback (data, len, rdm->callback_opaque);
280 }
281 
282 #define PRINT(s) print_str (rdm, s, strlen (s))
283 
284 static void
print_uint64(struct rust_demangler * rdm,uint64_t x)285 print_uint64 (struct rust_demangler *rdm, uint64_t x)
286 {
287   char s[21];
288   snprintf (s, 21, "%" PRIu64, x);
289   PRINT (s);
290 }
291 
292 static void
print_uint64_hex(struct rust_demangler * rdm,uint64_t x)293 print_uint64_hex (struct rust_demangler *rdm, uint64_t x)
294 {
295   char s[17];
296   snprintf (s, 17, "%" PRIx64, x);
297   PRINT (s);
298 }
299 
300 /* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */
301 static int
decode_lower_hex_nibble(char nibble)302 decode_lower_hex_nibble (char nibble)
303 {
304   if ('0' <= nibble && nibble <= '9')
305     return nibble - '0';
306   if ('a' <= nibble && nibble <= 'f')
307     return 0xa + (nibble - 'a');
308   return -1;
309 }
310 
311 /* Return the unescaped character for a "$...$" escape, or 0 if invalid. */
312 static char
decode_legacy_escape(const char * e,size_t len,size_t * out_len)313 decode_legacy_escape (const char *e, size_t len, size_t *out_len)
314 {
315   char c = 0;
316   size_t escape_len = 0;
317   int lo_nibble = -1, hi_nibble = -1;
318 
319   if (len < 3 || e[0] != '$')
320     return 0;
321 
322   e++;
323   len--;
324 
325   if (e[0] == 'C')
326     {
327       escape_len = 1;
328 
329       c = ',';
330     }
331   else if (len > 2)
332     {
333       escape_len = 2;
334 
335       if (e[0] == 'S' && e[1] == 'P')
336         c = '@';
337       else if (e[0] == 'B' && e[1] == 'P')
338         c = '*';
339       else if (e[0] == 'R' && e[1] == 'F')
340         c = '&';
341       else if (e[0] == 'L' && e[1] == 'T')
342         c = '<';
343       else if (e[0] == 'G' && e[1] == 'T')
344         c = '>';
345       else if (e[0] == 'L' && e[1] == 'P')
346         c = '(';
347       else if (e[0] == 'R' && e[1] == 'P')
348         c = ')';
349       else if (e[0] == 'u' && len > 3)
350         {
351           escape_len = 3;
352 
353           hi_nibble = decode_lower_hex_nibble (e[1]);
354           if (hi_nibble < 0)
355             return 0;
356           lo_nibble = decode_lower_hex_nibble (e[2]);
357           if (lo_nibble < 0)
358             return 0;
359 
360           /* Only allow non-control ASCII characters. */
361           if (hi_nibble > 7)
362             return 0;
363           c = (hi_nibble << 4) | lo_nibble;
364           if (c < 0x20)
365             return 0;
366         }
367     }
368 
369   if (!c || len <= escape_len || e[escape_len] != '$')
370     return 0;
371 
372   *out_len = 2 + escape_len;
373   return c;
374 }
375 
376 static void
print_ident(struct rust_demangler * rdm,struct rust_mangled_ident ident)377 print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident)
378 {
379   char unescaped;
380   uint8_t *out, *p, d;
381   size_t len, cap, punycode_pos, j;
382   /* Punycode parameters and state. */
383   uint32_t c;
384   size_t base, t_min, t_max, skew, damp, bias, i;
385   size_t delta, w, k, t;
386 
387   if (rdm->errored || rdm->skipping_printing)
388     return;
389 
390   if (rdm->version == -1)
391     {
392       /* Ignore leading underscores preceding escape sequences.
393          The mangler inserts an underscore to make sure the
394          identifier begins with a XID_Start character. */
395       if (ident.ascii_len >= 2 && ident.ascii[0] == '_'
396           && ident.ascii[1] == '$')
397         {
398           ident.ascii++;
399           ident.ascii_len--;
400         }
401 
402       while (ident.ascii_len > 0)
403         {
404           /* Handle legacy escape sequences ("$...$", ".." or "."). */
405           if (ident.ascii[0] == '$')
406             {
407               unescaped
408                   = decode_legacy_escape (ident.ascii, ident.ascii_len, &len);
409               if (unescaped)
410                 print_str (rdm, &unescaped, 1);
411               else
412                 {
413                   /* Unexpected escape sequence, print the rest verbatim. */
414                   print_str (rdm, ident.ascii, ident.ascii_len);
415                   return;
416                 }
417             }
418           else if (ident.ascii[0] == '.')
419             {
420               if (ident.ascii_len >= 2 && ident.ascii[1] == '.')
421                 {
422                   /* ".." becomes "::" */
423                   PRINT ("::");
424                   len = 2;
425                 }
426               else
427                 {
428                   PRINT (".");
429                   len = 1;
430                 }
431             }
432           else
433             {
434               /* Print everything before the next escape sequence, at once. */
435               for (len = 0; len < ident.ascii_len; len++)
436                 if (ident.ascii[len] == '$' || ident.ascii[len] == '.')
437                   break;
438 
439               print_str (rdm, ident.ascii, len);
440             }
441 
442           ident.ascii += len;
443           ident.ascii_len -= len;
444         }
445 
446       return;
447     }
448 
449   if (!ident.punycode)
450     {
451       print_str (rdm, ident.ascii, ident.ascii_len);
452       return;
453     }
454 
455   len = 0;
456   cap = 4;
457   while (cap < ident.ascii_len)
458     {
459       cap *= 2;
460       /* Check for overflows. */
461       if ((cap * 4) / 4 != cap)
462         {
463           rdm->errored = 1;
464           return;
465         }
466     }
467 
468   /* Store the output codepoints as groups of 4 UTF-8 bytes. */
469   out = (uint8_t *)malloc (cap * 4);
470   if (!out)
471     {
472       rdm->errored = 1;
473       return;
474     }
475 
476   /* Populate initial output from ASCII fragment. */
477   for (len = 0; len < ident.ascii_len; len++)
478     {
479       p = out + 4 * len;
480       p[0] = 0;
481       p[1] = 0;
482       p[2] = 0;
483       p[3] = ident.ascii[len];
484     }
485 
486   /* Punycode parameters and initial state. */
487   base = 36;
488   t_min = 1;
489   t_max = 26;
490   skew = 38;
491   damp = 700;
492   bias = 72;
493   i = 0;
494   c = 0x80;
495 
496   punycode_pos = 0;
497   while (punycode_pos < ident.punycode_len)
498     {
499       /* Read one delta value. */
500       delta = 0;
501       w = 1;
502       k = 0;
503       do
504         {
505           k += base;
506           t = k < bias ? 0 : (k - bias);
507           if (t < t_min)
508             t = t_min;
509           if (t > t_max)
510             t = t_max;
511 
512           if (punycode_pos >= ident.punycode_len)
513             goto cleanup;
514           d = ident.punycode[punycode_pos++];
515 
516           if (ISLOWER (d))
517             d = d - 'a';
518           else if (ISDIGIT (d))
519             d = 26 + (d - '0');
520           else
521             {
522               rdm->errored = 1;
523               goto cleanup;
524             }
525 
526           delta += d * w;
527           w *= base - t;
528         }
529       while (d >= t);
530 
531       /* Compute the new insert position and character. */
532       len++;
533       i += delta;
534       c += i / len;
535       i %= len;
536 
537       /* Ensure enough space is available. */
538       if (cap < len)
539         {
540           cap *= 2;
541           /* Check for overflows. */
542           if ((cap * 4) / 4 != cap || cap < len)
543             {
544               rdm->errored = 1;
545               goto cleanup;
546             }
547         }
548       p = (uint8_t *)realloc (out, cap * 4);
549       if (!p)
550         {
551           rdm->errored = 1;
552           goto cleanup;
553         }
554       out = p;
555 
556       /* Move the characters after the insert position. */
557       p = out + i * 4;
558       memmove (p + 4, p, (len - i - 1) * 4);
559 
560       /* Insert the new character, as UTF-8 bytes. */
561       p[0] = c >= 0x10000 ? 0xf0 | (c >> 18) : 0;
562       p[1] = c >= 0x800 ? (c < 0x10000 ? 0xe0 : 0x80) | ((c >> 12) & 0x3f) : 0;
563       p[2] = (c < 0x800 ? 0xc0 : 0x80) | ((c >> 6) & 0x3f);
564       p[3] = 0x80 | (c & 0x3f);
565 
566       /* If there are no more deltas, decoding is complete. */
567       if (punycode_pos == ident.punycode_len)
568         break;
569 
570       i++;
571 
572       /* Perform bias adaptation. */
573       delta /= damp;
574       damp = 2;
575 
576       delta += delta / len;
577       k = 0;
578       while (delta > ((base - t_min) * t_max) / 2)
579         {
580           delta /= base - t_min;
581           k += base;
582         }
583       bias = k + ((base - t_min + 1) * delta) / (delta + skew);
584     }
585 
586   /* Remove all the 0 bytes to leave behind an UTF-8 string. */
587   for (i = 0, j = 0; i < len * 4; i++)
588     if (out[i] != 0)
589       out[j++] = out[i];
590 
591   print_str (rdm, (const char *)out, j);
592 
593 cleanup:
594   free (out);
595 }
596 
597 /* Print the lifetime according to the previously decoded index.
598    An index of `0` always refers to `'_`, but starting with `1`,
599    indices refer to late-bound lifetimes introduced by a binder. */
600 static void
print_lifetime_from_index(struct rust_demangler * rdm,uint64_t lt)601 print_lifetime_from_index (struct rust_demangler *rdm, uint64_t lt)
602 {
603   char c;
604   uint64_t depth;
605 
606   PRINT ("'");
607   if (lt == 0)
608     {
609       PRINT ("_");
610       return;
611     }
612 
613   depth = rdm->bound_lifetime_depth - lt;
614   /* Try to print lifetimes alphabetically first. */
615   if (depth < 26)
616     {
617       c = 'a' + depth;
618       print_str (rdm, &c, 1);
619     }
620   else
621     {
622       /* Use `'_123` after running out of letters. */
623       PRINT ("_");
624       print_uint64 (rdm, depth);
625     }
626 }
627 
628 /* Demangling functions. */
629 
630 static void demangle_binder (struct rust_demangler *rdm);
631 static void demangle_path (struct rust_demangler *rdm, int in_value);
632 static void demangle_generic_arg (struct rust_demangler *rdm);
633 static void demangle_type (struct rust_demangler *rdm);
634 static int demangle_path_maybe_open_generics (struct rust_demangler *rdm);
635 static void demangle_dyn_trait (struct rust_demangler *rdm);
636 static void demangle_const (struct rust_demangler *rdm);
637 static void demangle_const_uint (struct rust_demangler *rdm);
638 static void demangle_const_int (struct rust_demangler *rdm);
639 static void demangle_const_bool (struct rust_demangler *rdm);
640 static void demangle_const_char (struct rust_demangler *rdm);
641 
642 /* Optionally enter a binder ('G') for late-bound lifetimes,
643    printing e.g. `for<'a, 'b> `, and make those lifetimes visible
644    to the caller (via depth level, which the caller should reset). */
645 static void
demangle_binder(struct rust_demangler * rdm)646 demangle_binder (struct rust_demangler *rdm)
647 {
648   uint64_t i, bound_lifetimes;
649 
650   if (rdm->errored)
651     return;
652 
653   bound_lifetimes = parse_opt_integer_62 (rdm, 'G');
654   if (bound_lifetimes > 0)
655     {
656       PRINT ("for<");
657       for (i = 0; i < bound_lifetimes; i++)
658         {
659           if (i > 0)
660             PRINT (", ");
661           rdm->bound_lifetime_depth++;
662           print_lifetime_from_index (rdm, 1);
663         }
664       PRINT ("> ");
665     }
666 }
667 
668 static void
demangle_path(struct rust_demangler * rdm,int in_value)669 demangle_path (struct rust_demangler *rdm, int in_value)
670 {
671   char tag, ns;
672   int was_skipping_printing;
673   size_t i, backref, old_next;
674   uint64_t dis;
675   struct rust_mangled_ident name;
676 
677   if (rdm->errored)
678     return;
679 
680   if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
681     {
682       ++ rdm->recursion;
683       if (rdm->recursion > RUST_MAX_RECURSION_COUNT)
684 	/* FIXME: There ought to be a way to report
685 	   that the recursion limit has been reached.  */
686 	goto fail_return;
687     }
688 
689   switch (tag = next (rdm))
690     {
691     case 'C':
692       dis = parse_disambiguator (rdm);
693       name = parse_ident (rdm);
694 
695       print_ident (rdm, name);
696       if (rdm->verbose)
697         {
698           PRINT ("[");
699           print_uint64_hex (rdm, dis);
700           PRINT ("]");
701         }
702       break;
703     case 'N':
704       ns = next (rdm);
705       if (!ISLOWER (ns) && !ISUPPER (ns))
706 	goto fail_return;
707 
708       demangle_path (rdm, in_value);
709 
710       dis = parse_disambiguator (rdm);
711       name = parse_ident (rdm);
712 
713       if (ISUPPER (ns))
714         {
715           /* Special namespaces, like closures and shims. */
716           PRINT ("::{");
717           switch (ns)
718             {
719             case 'C':
720               PRINT ("closure");
721               break;
722             case 'S':
723               PRINT ("shim");
724               break;
725             default:
726               print_str (rdm, &ns, 1);
727             }
728           if (name.ascii || name.punycode)
729             {
730               PRINT (":");
731               print_ident (rdm, name);
732             }
733           PRINT ("#");
734           print_uint64 (rdm, dis);
735           PRINT ("}");
736         }
737       else
738         {
739           /* Implementation-specific/unspecified namespaces. */
740 
741           if (name.ascii || name.punycode)
742             {
743               PRINT ("::");
744               print_ident (rdm, name);
745             }
746         }
747       break;
748     case 'M':
749     case 'X':
750       /* Ignore the `impl`'s own path.*/
751       parse_disambiguator (rdm);
752       was_skipping_printing = rdm->skipping_printing;
753       rdm->skipping_printing = 1;
754       demangle_path (rdm, in_value);
755       rdm->skipping_printing = was_skipping_printing;
756       /* fallthrough */
757     case 'Y':
758       PRINT ("<");
759       demangle_type (rdm);
760       if (tag != 'M')
761         {
762           PRINT (" as ");
763           demangle_path (rdm, 0);
764         }
765       PRINT (">");
766       break;
767     case 'I':
768       demangle_path (rdm, in_value);
769       if (in_value)
770         PRINT ("::");
771       PRINT ("<");
772       for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
773         {
774           if (i > 0)
775             PRINT (", ");
776           demangle_generic_arg (rdm);
777         }
778       PRINT (">");
779       break;
780     case 'B':
781       backref = parse_integer_62 (rdm);
782       if (!rdm->skipping_printing)
783         {
784           old_next = rdm->next;
785           rdm->next = backref;
786           demangle_path (rdm, in_value);
787           rdm->next = old_next;
788         }
789       break;
790     default:
791       goto fail_return;
792     }
793   goto pass_return;
794 
795  fail_return:
796   rdm->errored = 1;
797  pass_return:
798   if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
799     -- rdm->recursion;
800 }
801 
802 static void
demangle_generic_arg(struct rust_demangler * rdm)803 demangle_generic_arg (struct rust_demangler *rdm)
804 {
805   uint64_t lt;
806   if (eat (rdm, 'L'))
807     {
808       lt = parse_integer_62 (rdm);
809       print_lifetime_from_index (rdm, lt);
810     }
811   else if (eat (rdm, 'K'))
812     demangle_const (rdm);
813   else
814     demangle_type (rdm);
815 }
816 
817 static const char *
basic_type(char tag)818 basic_type (char tag)
819 {
820   switch (tag)
821     {
822     case 'b':
823       return "bool";
824     case 'c':
825       return "char";
826     case 'e':
827       return "str";
828     case 'u':
829       return "()";
830     case 'a':
831       return "i8";
832     case 's':
833       return "i16";
834     case 'l':
835       return "i32";
836     case 'x':
837       return "i64";
838     case 'n':
839       return "i128";
840     case 'i':
841       return "isize";
842     case 'h':
843       return "u8";
844     case 't':
845       return "u16";
846     case 'm':
847       return "u32";
848     case 'y':
849       return "u64";
850     case 'o':
851       return "u128";
852     case 'j':
853       return "usize";
854     case 'f':
855       return "f32";
856     case 'd':
857       return "f64";
858     case 'z':
859       return "!";
860     case 'p':
861       return "_";
862     case 'v':
863       return "...";
864 
865     default:
866       return NULL;
867     }
868 }
869 
870 static void
demangle_type(struct rust_demangler * rdm)871 demangle_type (struct rust_demangler *rdm)
872 {
873   char tag;
874   size_t i, old_next, backref;
875   uint64_t lt, old_bound_lifetime_depth;
876   const char *basic;
877   struct rust_mangled_ident abi;
878 
879   if (rdm->errored)
880     return;
881 
882   tag = next (rdm);
883 
884   basic = basic_type (tag);
885   if (basic)
886     {
887       PRINT (basic);
888       return;
889     }
890 
891    if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
892     {
893       ++ rdm->recursion;
894       if (rdm->recursion > RUST_MAX_RECURSION_COUNT)
895 	/* FIXME: There ought to be a way to report
896 	   that the recursion limit has been reached.  */
897 	{
898 	  rdm->errored = 1;
899 	  -- rdm->recursion;
900 	  return;
901 	}
902     }
903 
904   switch (tag)
905     {
906     case 'R':
907     case 'Q':
908       PRINT ("&");
909       if (eat (rdm, 'L'))
910         {
911           lt = parse_integer_62 (rdm);
912           if (lt)
913             {
914               print_lifetime_from_index (rdm, lt);
915               PRINT (" ");
916             }
917         }
918       if (tag != 'R')
919         PRINT ("mut ");
920       demangle_type (rdm);
921       break;
922     case 'P':
923     case 'O':
924       PRINT ("*");
925       if (tag != 'P')
926         PRINT ("mut ");
927       else
928         PRINT ("const ");
929       demangle_type (rdm);
930       break;
931     case 'A':
932     case 'S':
933       PRINT ("[");
934       demangle_type (rdm);
935       if (tag == 'A')
936         {
937           PRINT ("; ");
938           demangle_const (rdm);
939         }
940       PRINT ("]");
941       break;
942     case 'T':
943       PRINT ("(");
944       for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
945         {
946           if (i > 0)
947             PRINT (", ");
948           demangle_type (rdm);
949         }
950       if (i == 1)
951         PRINT (",");
952       PRINT (")");
953       break;
954     case 'F':
955       old_bound_lifetime_depth = rdm->bound_lifetime_depth;
956       demangle_binder (rdm);
957 
958       if (eat (rdm, 'U'))
959         PRINT ("unsafe ");
960 
961       if (eat (rdm, 'K'))
962         {
963           if (eat (rdm, 'C'))
964             {
965               abi.ascii = "C";
966               abi.ascii_len = 1;
967             }
968           else
969             {
970               abi = parse_ident (rdm);
971               if (!abi.ascii || abi.punycode)
972                 {
973                   rdm->errored = 1;
974                   goto restore;
975                 }
976             }
977 
978           PRINT ("extern \"");
979 
980           /* If the ABI had any `-`, they were replaced with `_`,
981              so the parts between `_` have to be re-joined with `-`. */
982           for (i = 0; i < abi.ascii_len; i++)
983             {
984               if (abi.ascii[i] == '_')
985                 {
986                   print_str (rdm, abi.ascii, i);
987                   PRINT ("-");
988                   abi.ascii += i + 1;
989                   abi.ascii_len -= i + 1;
990                   i = 0;
991                 }
992             }
993           print_str (rdm, abi.ascii, abi.ascii_len);
994 
995           PRINT ("\" ");
996         }
997 
998       PRINT ("fn(");
999       for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
1000         {
1001           if (i > 0)
1002             PRINT (", ");
1003           demangle_type (rdm);
1004         }
1005       PRINT (")");
1006 
1007       if (eat (rdm, 'u'))
1008         {
1009           /* Skip printing the return type if it's 'u', i.e. `()`. */
1010         }
1011       else
1012         {
1013           PRINT (" -> ");
1014           demangle_type (rdm);
1015         }
1016 
1017     /* Restore `bound_lifetime_depth` to outside the binder. */
1018     restore:
1019       rdm->bound_lifetime_depth = old_bound_lifetime_depth;
1020       break;
1021     case 'D':
1022       PRINT ("dyn ");
1023 
1024       old_bound_lifetime_depth = rdm->bound_lifetime_depth;
1025       demangle_binder (rdm);
1026 
1027       for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
1028         {
1029           if (i > 0)
1030             PRINT (" + ");
1031           demangle_dyn_trait (rdm);
1032         }
1033 
1034       /* Restore `bound_lifetime_depth` to outside the binder. */
1035       rdm->bound_lifetime_depth = old_bound_lifetime_depth;
1036 
1037       if (!eat (rdm, 'L'))
1038         {
1039           rdm->errored = 1;
1040           return;
1041         }
1042       lt = parse_integer_62 (rdm);
1043       if (lt)
1044         {
1045           PRINT (" + ");
1046           print_lifetime_from_index (rdm, lt);
1047         }
1048       break;
1049     case 'B':
1050       backref = parse_integer_62 (rdm);
1051       if (!rdm->skipping_printing)
1052         {
1053           old_next = rdm->next;
1054           rdm->next = backref;
1055           demangle_type (rdm);
1056           rdm->next = old_next;
1057         }
1058       break;
1059     default:
1060       /* Go back to the tag, so `demangle_path` also sees it. */
1061       rdm->next--;
1062       demangle_path (rdm, 0);
1063     }
1064 
1065   if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
1066     -- rdm->recursion;
1067 }
1068 
1069 /* A trait in a trait object may have some "existential projections"
1070    (i.e. associated type bindings) after it, which should be printed
1071    in the `<...>` of the trait, e.g. `dyn Trait<T, U, Assoc=X>`.
1072    To this end, this method will keep the `<...>` of an 'I' path
1073    open, by omitting the `>`, and return `Ok(true)` in that case. */
1074 static int
demangle_path_maybe_open_generics(struct rust_demangler * rdm)1075 demangle_path_maybe_open_generics (struct rust_demangler *rdm)
1076 {
1077   int open;
1078   size_t i, old_next, backref;
1079 
1080   open = 0;
1081 
1082   if (rdm->errored)
1083     return open;
1084 
1085   if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
1086     {
1087       ++ rdm->recursion;
1088       if (rdm->recursion > RUST_MAX_RECURSION_COUNT)
1089 	{
1090 	  /* FIXME: There ought to be a way to report
1091 	     that the recursion limit has been reached.  */
1092 	  rdm->errored = 1;
1093 	  goto end_of_func;
1094 	}
1095     }
1096 
1097   if (eat (rdm, 'B'))
1098     {
1099       backref = parse_integer_62 (rdm);
1100       if (!rdm->skipping_printing)
1101         {
1102           old_next = rdm->next;
1103           rdm->next = backref;
1104           open = demangle_path_maybe_open_generics (rdm);
1105           rdm->next = old_next;
1106         }
1107     }
1108   else if (eat (rdm, 'I'))
1109     {
1110       demangle_path (rdm, 0);
1111       PRINT ("<");
1112       open = 1;
1113       for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
1114         {
1115           if (i > 0)
1116             PRINT (", ");
1117           demangle_generic_arg (rdm);
1118         }
1119     }
1120   else
1121     demangle_path (rdm, 0);
1122 
1123  end_of_func:
1124   if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
1125     -- rdm->recursion;
1126 
1127   return open;
1128 }
1129 
1130 static void
demangle_dyn_trait(struct rust_demangler * rdm)1131 demangle_dyn_trait (struct rust_demangler *rdm)
1132 {
1133   int open;
1134   struct rust_mangled_ident name;
1135 
1136   if (rdm->errored)
1137     return;
1138 
1139   open = demangle_path_maybe_open_generics (rdm);
1140 
1141   while (eat (rdm, 'p'))
1142     {
1143       if (!open)
1144         PRINT ("<");
1145       else
1146         PRINT (", ");
1147       open = 1;
1148 
1149       name = parse_ident (rdm);
1150       print_ident (rdm, name);
1151       PRINT (" = ");
1152       demangle_type (rdm);
1153     }
1154 
1155   if (open)
1156     PRINT (">");
1157 }
1158 
1159 static void
demangle_const(struct rust_demangler * rdm)1160 demangle_const (struct rust_demangler *rdm)
1161 {
1162   char ty_tag;
1163   size_t old_next, backref;
1164 
1165   if (rdm->errored)
1166     return;
1167 
1168   if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
1169     {
1170       ++ rdm->recursion;
1171       if (rdm->recursion > RUST_MAX_RECURSION_COUNT)
1172 	/* FIXME: There ought to be a way to report
1173 	   that the recursion limit has been reached.  */
1174 	goto fail_return;
1175     }
1176 
1177   if (eat (rdm, 'B'))
1178     {
1179       backref = parse_integer_62 (rdm);
1180       if (!rdm->skipping_printing)
1181         {
1182           old_next = rdm->next;
1183           rdm->next = backref;
1184           demangle_const (rdm);
1185           rdm->next = old_next;
1186         }
1187       goto pass_return;
1188     }
1189 
1190   ty_tag = next (rdm);
1191   switch (ty_tag)
1192     {
1193     /* Placeholder. */
1194     case 'p':
1195       PRINT ("_");
1196       goto pass_return;
1197 
1198     /* Unsigned integer types. */
1199     case 'h':
1200     case 't':
1201     case 'm':
1202     case 'y':
1203     case 'o':
1204     case 'j':
1205       demangle_const_uint (rdm);
1206       break;
1207 
1208     /* Signed integer types. */
1209     case 'a':
1210     case 's':
1211     case 'l':
1212     case 'x':
1213     case 'n':
1214     case 'i':
1215       demangle_const_int (rdm);
1216       break;
1217 
1218     /* Boolean. */
1219     case 'b':
1220       demangle_const_bool (rdm);
1221       break;
1222 
1223     /* Character. */
1224     case 'c':
1225       demangle_const_char (rdm);
1226       break;
1227 
1228     default:
1229       goto fail_return;
1230     }
1231 
1232   if (!rdm->errored && rdm->verbose)
1233     {
1234       PRINT (": ");
1235       PRINT (basic_type (ty_tag));
1236     }
1237   goto pass_return;
1238 
1239  fail_return:
1240   rdm->errored = 1;
1241  pass_return:
1242   if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
1243     -- rdm->recursion;
1244 }
1245 
1246 static void
demangle_const_uint(struct rust_demangler * rdm)1247 demangle_const_uint (struct rust_demangler *rdm)
1248 {
1249   size_t hex_len;
1250   uint64_t value;
1251 
1252   if (rdm->errored)
1253     return;
1254 
1255   hex_len = parse_hex_nibbles (rdm, &value);
1256 
1257   if (hex_len > 16)
1258     {
1259       /* Print anything that doesn't fit in `uint64_t` verbatim. */
1260       PRINT ("0x");
1261       print_str (rdm, rdm->sym + (rdm->next - hex_len), hex_len);
1262     }
1263   else if (hex_len > 0)
1264     print_uint64 (rdm, value);
1265   else
1266     rdm->errored = 1;
1267 }
1268 
1269 static void
demangle_const_int(struct rust_demangler * rdm)1270 demangle_const_int (struct rust_demangler *rdm)
1271 {
1272   if (eat (rdm, 'n'))
1273     PRINT ("-");
1274   demangle_const_uint (rdm);
1275 }
1276 
1277 static void
demangle_const_bool(struct rust_demangler * rdm)1278 demangle_const_bool (struct rust_demangler *rdm)
1279 {
1280   uint64_t value;
1281 
1282   if (parse_hex_nibbles (rdm, &value) != 1)
1283     {
1284       rdm->errored = 1;
1285       return;
1286     }
1287 
1288   if (value == 0)
1289     PRINT ("false");
1290   else if (value == 1)
1291     PRINT ("true");
1292   else
1293     rdm->errored = 1;
1294 }
1295 
1296 static void
demangle_const_char(struct rust_demangler * rdm)1297 demangle_const_char (struct rust_demangler *rdm)
1298 {
1299   size_t hex_len;
1300   uint64_t value;
1301 
1302   hex_len = parse_hex_nibbles (rdm, &value);
1303 
1304   if (hex_len == 0 || hex_len > 8)
1305     {
1306       rdm->errored = 1;
1307       return;
1308     }
1309 
1310   /* Match Rust's character "debug" output as best as we can. */
1311   PRINT ("'");
1312   if (value == '\t')
1313     PRINT ("\\t");
1314   else if (value == '\r')
1315     PRINT ("\\r");
1316   else if (value == '\n')
1317     PRINT ("\\n");
1318   else if (value > ' ' && value < '~')
1319     {
1320       /* Rust also considers many non-ASCII codepoints to be printable, but
1321 	 that logic is not easily ported to C. */
1322       char c = value;
1323       print_str (rdm, &c, 1);
1324     }
1325   else
1326     {
1327       PRINT ("\\u{");
1328       print_uint64_hex (rdm, value);
1329       PRINT ("}");
1330     }
1331   PRINT ("'");
1332 }
1333 
1334 /* A legacy hash is the prefix "h" followed by 16 lowercase hex digits.
1335    The hex digits must contain at least 5 distinct digits. */
1336 static int
is_legacy_prefixed_hash(struct rust_mangled_ident ident)1337 is_legacy_prefixed_hash (struct rust_mangled_ident ident)
1338 {
1339   uint16_t seen;
1340   int nibble;
1341   size_t i, count;
1342 
1343   if (ident.ascii_len != 17 || ident.ascii[0] != 'h')
1344     return 0;
1345 
1346   seen = 0;
1347   for (i = 0; i < 16; i++)
1348     {
1349       nibble = decode_lower_hex_nibble (ident.ascii[1 + i]);
1350       if (nibble < 0)
1351         return 0;
1352       seen |= (uint16_t)1 << nibble;
1353     }
1354 
1355   /* Count how many distinct digits were seen. */
1356   count = 0;
1357   while (seen)
1358     {
1359       if (seen & 1)
1360         count++;
1361       seen >>= 1;
1362     }
1363 
1364   return count >= 5;
1365 }
1366 
1367 int
rust_demangle_callback(const char * mangled,int options,demangle_callbackref callback,void * opaque)1368 rust_demangle_callback (const char *mangled, int options,
1369                         demangle_callbackref callback, void *opaque)
1370 {
1371   const char *p;
1372   struct rust_demangler rdm;
1373   struct rust_mangled_ident ident;
1374 
1375   rdm.sym = mangled;
1376   rdm.sym_len = 0;
1377 
1378   rdm.callback_opaque = opaque;
1379   rdm.callback = callback;
1380 
1381   rdm.next = 0;
1382   rdm.errored = 0;
1383   rdm.skipping_printing = 0;
1384   rdm.verbose = (options & DMGL_VERBOSE) != 0;
1385   rdm.version = 0;
1386   rdm.recursion = (options & DMGL_NO_RECURSE_LIMIT) ? RUST_NO_RECURSION_LIMIT : 0;
1387   rdm.bound_lifetime_depth = 0;
1388 
1389   /* Rust symbols always start with _R (v0) or _ZN (legacy). */
1390   if (rdm.sym[0] == '_' && rdm.sym[1] == 'R')
1391     rdm.sym += 2;
1392   else if (rdm.sym[0] == '_' && rdm.sym[1] == 'Z' && rdm.sym[2] == 'N')
1393     {
1394       rdm.sym += 3;
1395       rdm.version = -1;
1396     }
1397   else
1398     return 0;
1399 
1400   /* Paths (v0) always start with uppercase characters. */
1401   if (rdm.version != -1 && !ISUPPER (rdm.sym[0]))
1402     return 0;
1403 
1404   /* Rust symbols (v0) use only [_0-9a-zA-Z] characters. */
1405   for (p = rdm.sym; *p; p++)
1406     {
1407       /* Rust v0 symbols can have '.' suffixes, ignore those.  */
1408       if (rdm.version == 0 && *p == '.')
1409         break;
1410 
1411       rdm.sym_len++;
1412 
1413       if (*p == '_' || ISALNUM (*p))
1414         continue;
1415 
1416       /* Legacy Rust symbols can also contain [.:$] characters.
1417          Or @ in the .suffix (which will be skipped, see below). */
1418       if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':'
1419                                 || *p == '@'))
1420         continue;
1421 
1422       return 0;
1423     }
1424 
1425   /* Legacy Rust symbols need to be handled separately. */
1426   if (rdm.version == -1)
1427     {
1428       /* Legacy Rust symbols always end with E.  But can be followed by a
1429          .suffix (which we want to ignore).  */
1430       int dot_suffix = 1;
1431       while (rdm.sym_len > 0 &&
1432              !(dot_suffix && rdm.sym[rdm.sym_len - 1] == 'E'))
1433         {
1434           dot_suffix = rdm.sym[rdm.sym_len - 1] == '.';
1435           rdm.sym_len--;
1436         }
1437 
1438       if (!(rdm.sym_len > 0 && rdm.sym[rdm.sym_len - 1] == 'E'))
1439         return 0;
1440       rdm.sym_len--;
1441 
1442       /* Legacy Rust symbols also always end with a path segment
1443          that encodes a 16 hex digit hash, i.e. '17h[a-f0-9]{16}'.
1444          This early check, before any parse_ident calls, should
1445          quickly filter out most C++ symbols unrelated to Rust. */
1446       if (!(rdm.sym_len > 19
1447             && !memcmp (&rdm.sym[rdm.sym_len - 19], "17h", 3)))
1448         return 0;
1449 
1450       do
1451         {
1452           ident = parse_ident (&rdm);
1453           if (rdm.errored || !ident.ascii)
1454             return 0;
1455         }
1456       while (rdm.next < rdm.sym_len);
1457 
1458       /* The last path segment should be the hash. */
1459       if (!is_legacy_prefixed_hash (ident))
1460         return 0;
1461 
1462       /* Reset the state for a second pass, to print the symbol. */
1463       rdm.next = 0;
1464       if (!rdm.verbose && rdm.sym_len > 19)
1465         {
1466           /* Hide the last segment, containing the hash, if not verbose. */
1467           rdm.sym_len -= 19;
1468         }
1469 
1470       do
1471         {
1472           if (rdm.next > 0)
1473             print_str (&rdm, "::", 2);
1474 
1475           ident = parse_ident (&rdm);
1476           print_ident (&rdm, ident);
1477         }
1478       while (rdm.next < rdm.sym_len);
1479     }
1480   else
1481     {
1482       demangle_path (&rdm, 1);
1483 
1484       /* Skip instantiating crate. */
1485       if (!rdm.errored && rdm.next < rdm.sym_len)
1486         {
1487           rdm.skipping_printing = 1;
1488           demangle_path (&rdm, 0);
1489         }
1490 
1491       /* It's an error to not reach the end. */
1492       rdm.errored |= rdm.next != rdm.sym_len;
1493     }
1494 
1495   return !rdm.errored;
1496 }
1497 
1498 /* Growable string buffers. */
1499 struct str_buf
1500 {
1501   char *ptr;
1502   size_t len;
1503   size_t cap;
1504   int errored;
1505 };
1506 
1507 static void
str_buf_reserve(struct str_buf * buf,size_t extra)1508 str_buf_reserve (struct str_buf *buf, size_t extra)
1509 {
1510   size_t available, min_new_cap, new_cap;
1511   char *new_ptr;
1512 
1513   /* Allocation failed before. */
1514   if (buf->errored)
1515     return;
1516 
1517   available = buf->cap - buf->len;
1518 
1519   if (extra <= available)
1520     return;
1521 
1522   min_new_cap = buf->cap + (extra - available);
1523 
1524   /* Check for overflows. */
1525   if (min_new_cap < buf->cap)
1526     {
1527       buf->errored = 1;
1528       return;
1529     }
1530 
1531   new_cap = buf->cap;
1532 
1533   if (new_cap == 0)
1534     new_cap = 4;
1535 
1536   /* Double capacity until sufficiently large. */
1537   while (new_cap < min_new_cap)
1538     {
1539       new_cap *= 2;
1540 
1541       /* Check for overflows. */
1542       if (new_cap < buf->cap)
1543         {
1544           buf->errored = 1;
1545           return;
1546         }
1547     }
1548 
1549   new_ptr = (char *)realloc (buf->ptr, new_cap);
1550   if (new_ptr == NULL)
1551     {
1552       free (buf->ptr);
1553       buf->ptr = NULL;
1554       buf->len = 0;
1555       buf->cap = 0;
1556       buf->errored = 1;
1557     }
1558   else
1559     {
1560       buf->ptr = new_ptr;
1561       buf->cap = new_cap;
1562     }
1563 }
1564 
1565 static void
str_buf_append(struct str_buf * buf,const char * data,size_t len)1566 str_buf_append (struct str_buf *buf, const char *data, size_t len)
1567 {
1568   str_buf_reserve (buf, len);
1569   if (buf->errored)
1570     return;
1571 
1572   memcpy (buf->ptr + buf->len, data, len);
1573   buf->len += len;
1574 }
1575 
1576 static void
str_buf_demangle_callback(const char * data,size_t len,void * opaque)1577 str_buf_demangle_callback (const char *data, size_t len, void *opaque)
1578 {
1579   str_buf_append ((struct str_buf *)opaque, data, len);
1580 }
1581 
1582 char *
rust_demangle(const char * mangled,int options)1583 rust_demangle (const char *mangled, int options)
1584 {
1585   struct str_buf out;
1586   int success;
1587 
1588   out.ptr = NULL;
1589   out.len = 0;
1590   out.cap = 0;
1591   out.errored = 0;
1592 
1593   success = rust_demangle_callback (mangled, options,
1594                                     str_buf_demangle_callback, &out);
1595 
1596   if (!success)
1597     {
1598       free (out.ptr);
1599       return NULL;
1600     }
1601 
1602   str_buf_append (&out, "\0", 1);
1603   return out.ptr;
1604 }
1605