111
|
1 /* Demangler for the Rust programming language
|
145
|
2 Copyright (C) 2016-2020 Free Software Foundation, Inc.
|
111
|
3 Written by David Tolnay (dtolnay@gmail.com).
|
|
4
|
|
5 This file is part of the libiberty library.
|
|
6 Libiberty is free software; you can redistribute it and/or
|
|
7 modify it under the terms of the GNU Library General Public
|
|
8 License as published by the Free Software Foundation; either
|
|
9 version 2 of the License, or (at your option) any later version.
|
|
10
|
|
11 In addition to the permissions in the GNU Library General Public
|
|
12 License, the Free Software Foundation gives you unlimited permission
|
|
13 to link the compiled version of this file into combinations with other
|
|
14 programs, and to distribute those combinations without any restriction
|
|
15 coming from the use of this file. (The Library Public License
|
|
16 restrictions do apply in other respects; for example, they cover
|
|
17 modification of the file, and distribution when not linked into a
|
|
18 combined executable.)
|
|
19
|
|
20 Libiberty is distributed in the hope that it will be useful,
|
|
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
23 Library General Public License for more details.
|
|
24
|
|
25 You should have received a copy of the GNU Library General Public
|
|
26 License along with libiberty; see the file COPYING.LIB.
|
|
27 If not, see <http://www.gnu.org/licenses/>. */
|
|
28
|
|
29
|
|
30 #ifdef HAVE_CONFIG_H
|
|
31 #include "config.h"
|
|
32 #endif
|
|
33
|
|
34 #include "safe-ctype.h"
|
|
35
|
145
|
36 #include <inttypes.h>
|
111
|
37 #include <sys/types.h>
|
|
38 #include <string.h>
|
|
39 #include <stdio.h>
|
145
|
40 #include <stdlib.h>
|
111
|
41
|
|
42 #ifdef HAVE_STRING_H
|
|
43 #include <string.h>
|
|
44 #else
|
|
45 extern size_t strlen(const char *s);
|
|
46 extern int strncmp(const char *s1, const char *s2, size_t n);
|
|
47 extern void *memset(void *s, int c, size_t n);
|
|
48 #endif
|
|
49
|
|
50 #include <demangle.h>
|
|
51 #include "libiberty.h"
|
|
52
|
145
|
53 struct rust_demangler
|
|
54 {
|
|
55 const char *sym;
|
|
56 size_t sym_len;
|
111
|
57
|
145
|
58 void *callback_opaque;
|
|
59 demangle_callbackref callback;
|
111
|
60
|
145
|
61 /* Position of the next character to read from the symbol. */
|
|
62 size_t next;
|
111
|
63
|
145
|
64 /* Non-zero if any error occurred. */
|
|
65 int errored;
|
|
66
|
|
67 /* Non-zero if printing should be verbose (e.g. include hashes). */
|
|
68 int verbose;
|
111
|
69
|
145
|
70 /* Rust mangling version, with legacy mangling being -1. */
|
|
71 int version;
|
|
72 };
|
111
|
73
|
145
|
74 /* Parsing functions. */
|
111
|
75
|
145
|
76 static char
|
|
77 peek (const struct rust_demangler *rdm)
|
|
78 {
|
|
79 if (rdm->next < rdm->sym_len)
|
|
80 return rdm->sym[rdm->next];
|
|
81 return 0;
|
|
82 }
|
111
|
83
|
145
|
84 static char
|
|
85 next (struct rust_demangler *rdm)
|
|
86 {
|
|
87 char c = peek (rdm);
|
|
88 if (!c)
|
|
89 rdm->errored = 1;
|
|
90 else
|
|
91 rdm->next++;
|
|
92 return c;
|
|
93 }
|
|
94
|
|
95 struct rust_mangled_ident
|
|
96 {
|
|
97 /* ASCII part of the identifier. */
|
|
98 const char *ascii;
|
|
99 size_t ascii_len;
|
|
100 };
|
|
101
|
|
102 static struct rust_mangled_ident
|
|
103 parse_ident (struct rust_demangler *rdm)
|
|
104 {
|
|
105 char c;
|
|
106 size_t start, len;
|
|
107 struct rust_mangled_ident ident;
|
|
108
|
|
109 ident.ascii = NULL;
|
|
110 ident.ascii_len = 0;
|
111
|
111
|
145
|
112 c = next (rdm);
|
|
113 if (!ISDIGIT (c))
|
|
114 {
|
|
115 rdm->errored = 1;
|
|
116 return ident;
|
|
117 }
|
|
118 len = c - '0';
|
111
|
119
|
145
|
120 if (c != '0')
|
|
121 while (ISDIGIT (peek (rdm)))
|
|
122 len = len * 10 + (next (rdm) - '0');
|
111
|
123
|
145
|
124 start = rdm->next;
|
|
125 rdm->next += len;
|
|
126 /* Check for overflows. */
|
|
127 if ((start > rdm->next) || (rdm->next > rdm->sym_len))
|
|
128 {
|
|
129 rdm->errored = 1;
|
|
130 return ident;
|
|
131 }
|
111
|
132
|
145
|
133 ident.ascii = rdm->sym + start;
|
|
134 ident.ascii_len = len;
|
111
|
135
|
145
|
136 if (ident.ascii_len == 0)
|
|
137 ident.ascii = NULL;
|
111
|
138
|
145
|
139 return ident;
|
|
140 }
|
111
|
141
|
145
|
142 /* Printing functions. */
|
|
143
|
|
144 static void
|
|
145 print_str (struct rust_demangler *rdm, const char *data, size_t len)
|
|
146 {
|
|
147 if (!rdm->errored)
|
|
148 rdm->callback (data, len, rdm->callback_opaque);
|
|
149 }
|
|
150
|
|
151 #define PRINT(s) print_str (rdm, s, strlen (s))
|
111
|
152
|
145
|
153 /* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */
|
|
154 static int
|
|
155 decode_lower_hex_nibble (char nibble)
|
|
156 {
|
|
157 if ('0' <= nibble && nibble <= '9')
|
|
158 return nibble - '0';
|
|
159 if ('a' <= nibble && nibble <= 'f')
|
|
160 return 0xa + (nibble - 'a');
|
|
161 return -1;
|
|
162 }
|
111
|
163
|
145
|
164 /* Return the unescaped character for a "$...$" escape, or 0 if invalid. */
|
|
165 static char
|
|
166 decode_legacy_escape (const char *e, size_t len, size_t *out_len)
|
111
|
167 {
|
145
|
168 char c = 0;
|
|
169 size_t escape_len = 0;
|
|
170 int lo_nibble = -1, hi_nibble = -1;
|
111
|
171
|
145
|
172 if (len < 3 || e[0] != '$')
|
111
|
173 return 0;
|
|
174
|
145
|
175 e++;
|
|
176 len--;
|
|
177
|
|
178 if (e[0] == 'C')
|
|
179 {
|
|
180 escape_len = 1;
|
|
181
|
|
182 c = ',';
|
|
183 }
|
|
184 else if (len > 2)
|
|
185 {
|
|
186 escape_len = 2;
|
111
|
187
|
145
|
188 if (e[0] == 'S' && e[1] == 'P')
|
|
189 c = '@';
|
|
190 else if (e[0] == 'B' && e[1] == 'P')
|
|
191 c = '*';
|
|
192 else if (e[0] == 'R' && e[1] == 'F')
|
|
193 c = '&';
|
|
194 else if (e[0] == 'L' && e[1] == 'T')
|
|
195 c = '<';
|
|
196 else if (e[0] == 'G' && e[1] == 'T')
|
|
197 c = '>';
|
|
198 else if (e[0] == 'L' && e[1] == 'P')
|
|
199 c = '(';
|
|
200 else if (e[0] == 'R' && e[1] == 'P')
|
|
201 c = ')';
|
|
202 else if (e[0] == 'u' && len > 3)
|
|
203 {
|
|
204 escape_len = 3;
|
|
205
|
|
206 hi_nibble = decode_lower_hex_nibble (e[1]);
|
|
207 if (hi_nibble < 0)
|
|
208 return 0;
|
|
209 lo_nibble = decode_lower_hex_nibble (e[2]);
|
|
210 if (lo_nibble < 0)
|
|
211 return 0;
|
|
212
|
|
213 /* Only allow non-control ASCII characters. */
|
|
214 if (hi_nibble > 7)
|
|
215 return 0;
|
|
216 c = (hi_nibble << 4) | lo_nibble;
|
|
217 if (c < 0x20)
|
|
218 return 0;
|
|
219 }
|
|
220 }
|
|
221
|
|
222 if (!c || len <= escape_len || e[escape_len] != '$')
|
111
|
223 return 0;
|
|
224
|
145
|
225 *out_len = 2 + escape_len;
|
|
226 return c;
|
111
|
227 }
|
|
228
|
145
|
229 static void
|
|
230 print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident)
|
111
|
231 {
|
145
|
232 char unescaped;
|
|
233 size_t len;
|
111
|
234
|
145
|
235 if (rdm->errored)
|
111
|
236 return;
|
|
237
|
145
|
238 if (rdm->version == -1)
|
|
239 {
|
|
240 /* Ignore leading underscores preceding escape sequences.
|
|
241 The mangler inserts an underscore to make sure the
|
|
242 identifier begins with a XID_Start character. */
|
|
243 if (ident.ascii_len >= 2 && ident.ascii[0] == '_'
|
|
244 && ident.ascii[1] == '$')
|
|
245 {
|
|
246 ident.ascii++;
|
|
247 ident.ascii_len--;
|
|
248 }
|
111
|
249
|
145
|
250 while (ident.ascii_len > 0)
|
|
251 {
|
|
252 /* Handle legacy escape sequences ("$...$", ".." or "."). */
|
|
253 if (ident.ascii[0] == '$')
|
|
254 {
|
|
255 unescaped
|
|
256 = decode_legacy_escape (ident.ascii, ident.ascii_len, &len);
|
|
257 if (unescaped)
|
|
258 print_str (rdm, &unescaped, 1);
|
|
259 else
|
|
260 {
|
|
261 /* Unexpected escape sequence, print the rest verbatim. */
|
|
262 print_str (rdm, ident.ascii, ident.ascii_len);
|
|
263 return;
|
|
264 }
|
|
265 }
|
|
266 else if (ident.ascii[0] == '.')
|
|
267 {
|
|
268 if (ident.ascii_len >= 2 && ident.ascii[1] == '.')
|
|
269 {
|
|
270 /* ".." becomes "::" */
|
|
271 PRINT ("::");
|
|
272 len = 2;
|
|
273 }
|
|
274 else
|
|
275 {
|
|
276 /* "." becomes "-" */
|
|
277 PRINT ("-");
|
|
278 len = 1;
|
|
279 }
|
|
280 }
|
|
281 else
|
|
282 {
|
|
283 /* Print everything before the next escape sequence, at once. */
|
|
284 for (len = 0; len < ident.ascii_len; len++)
|
|
285 if (ident.ascii[len] == '$' || ident.ascii[len] == '.')
|
|
286 break;
|
111
|
287
|
145
|
288 print_str (rdm, ident.ascii, len);
|
|
289 }
|
|
290
|
|
291 ident.ascii += len;
|
|
292 ident.ascii_len -= len;
|
|
293 }
|
|
294
|
|
295 return;
|
|
296 }
|
|
297 }
|
|
298
|
|
299 /* A legacy hash is the prefix "h" followed by 16 lowercase hex digits.
|
|
300 The hex digits must contain at least 5 distinct digits. */
|
|
301 static int
|
|
302 is_legacy_prefixed_hash (struct rust_mangled_ident ident)
|
|
303 {
|
|
304 uint16_t seen;
|
|
305 int nibble;
|
|
306 size_t i, count;
|
|
307
|
|
308 if (ident.ascii_len != 17 || ident.ascii[0] != 'h')
|
|
309 return 0;
|
|
310
|
|
311 seen = 0;
|
|
312 for (i = 0; i < 16; i++)
|
|
313 {
|
|
314 nibble = decode_lower_hex_nibble (ident.ascii[1 + i]);
|
|
315 if (nibble < 0)
|
|
316 return 0;
|
|
317 seen |= (uint16_t)1 << nibble;
|
|
318 }
|
|
319
|
|
320 /* Count how many distinct digits were seen. */
|
|
321 count = 0;
|
|
322 while (seen)
|
|
323 {
|
|
324 if (seen & 1)
|
|
325 count++;
|
|
326 seen >>= 1;
|
|
327 }
|
|
328
|
|
329 return count >= 5;
|
111
|
330 }
|
|
331
|
145
|
332 int
|
|
333 rust_demangle_callback (const char *mangled, int options,
|
|
334 demangle_callbackref callback, void *opaque)
|
111
|
335 {
|
145
|
336 const char *p;
|
|
337 struct rust_demangler rdm;
|
|
338 struct rust_mangled_ident ident;
|
|
339
|
|
340 rdm.sym = mangled;
|
|
341 rdm.sym_len = 0;
|
|
342
|
|
343 rdm.callback_opaque = opaque;
|
|
344 rdm.callback = callback;
|
|
345
|
|
346 rdm.next = 0;
|
|
347 rdm.errored = 0;
|
|
348 rdm.verbose = (options & DMGL_VERBOSE) != 0;
|
|
349 rdm.version = 0;
|
|
350
|
|
351 /* Rust symbols always start with _ZN (legacy). */
|
|
352 if (rdm.sym[0] == '_' && rdm.sym[1] == 'Z' && rdm.sym[2] == 'N')
|
|
353 {
|
|
354 rdm.sym += 3;
|
|
355 rdm.version = -1;
|
|
356 }
|
|
357 else
|
|
358 return 0;
|
|
359
|
|
360 /* Legacy Rust symbols use only [_0-9a-zA-Z.:$] characters. */
|
|
361 for (p = rdm.sym; *p; p++)
|
|
362 {
|
|
363 rdm.sym_len++;
|
|
364
|
|
365 if (*p == '_' || ISALNUM (*p))
|
|
366 continue;
|
|
367
|
|
368 if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':'))
|
|
369 continue;
|
|
370
|
|
371 return 0;
|
|
372 }
|
111
|
373
|
145
|
374 /* Legacy Rust symbols need to be handled separately. */
|
|
375 if (rdm.version == -1)
|
|
376 {
|
|
377 /* Legacy Rust symbols always end with E. */
|
|
378 if (!(rdm.sym_len > 0 && rdm.sym[rdm.sym_len - 1] == 'E'))
|
|
379 return 0;
|
|
380 rdm.sym_len--;
|
|
381
|
|
382 /* Legacy Rust symbols also always end with a path segment
|
|
383 that encodes a 16 hex digit hash, i.e. '17h[a-f0-9]{16}'.
|
|
384 This early check, before any parse_ident calls, should
|
|
385 quickly filter out most C++ symbols unrelated to Rust. */
|
|
386 if (!(rdm.sym_len > 19
|
|
387 && !memcmp (&rdm.sym[rdm.sym_len - 19], "17h", 3)))
|
|
388 return 0;
|
|
389
|
|
390 do
|
|
391 {
|
|
392 ident = parse_ident (&rdm);
|
|
393 if (rdm.errored || !ident.ascii)
|
|
394 return 0;
|
|
395 }
|
|
396 while (rdm.next < rdm.sym_len);
|
|
397
|
|
398 /* The last path segment should be the hash. */
|
|
399 if (!is_legacy_prefixed_hash (ident))
|
|
400 return 0;
|
|
401
|
|
402 /* Reset the state for a second pass, to print the symbol. */
|
|
403 rdm.next = 0;
|
|
404 if (!rdm.verbose && rdm.sym_len > 19)
|
|
405 {
|
|
406 /* Hide the last segment, containing the hash, if not verbose. */
|
|
407 rdm.sym_len -= 19;
|
|
408 }
|
|
409
|
|
410 do
|
|
411 {
|
|
412 if (rdm.next > 0)
|
|
413 print_str (&rdm, "::", 2);
|
|
414
|
|
415 ident = parse_ident (&rdm);
|
|
416 print_ident (&rdm, ident);
|
|
417 }
|
|
418 while (rdm.next < rdm.sym_len);
|
|
419 }
|
|
420 else
|
111
|
421 return 0;
|
|
422
|
145
|
423 return !rdm.errored;
|
|
424 }
|
|
425
|
|
426 /* Growable string buffers. */
|
|
427 struct str_buf
|
|
428 {
|
|
429 char *ptr;
|
|
430 size_t len;
|
|
431 size_t cap;
|
|
432 int errored;
|
|
433 };
|
|
434
|
|
435 static void
|
|
436 str_buf_reserve (struct str_buf *buf, size_t extra)
|
|
437 {
|
|
438 size_t available, min_new_cap, new_cap;
|
|
439 char *new_ptr;
|
|
440
|
|
441 /* Allocation failed before. */
|
|
442 if (buf->errored)
|
|
443 return;
|
|
444
|
|
445 available = buf->cap - buf->len;
|
|
446
|
|
447 if (extra <= available)
|
|
448 return;
|
|
449
|
|
450 min_new_cap = buf->cap + (extra - available);
|
|
451
|
|
452 /* Check for overflows. */
|
|
453 if (min_new_cap < buf->cap)
|
|
454 {
|
|
455 buf->errored = 1;
|
|
456 return;
|
|
457 }
|
|
458
|
|
459 new_cap = buf->cap;
|
|
460
|
|
461 if (new_cap == 0)
|
|
462 new_cap = 4;
|
|
463
|
|
464 /* Double capacity until sufficiently large. */
|
|
465 while (new_cap < min_new_cap)
|
|
466 {
|
|
467 new_cap *= 2;
|
|
468
|
|
469 /* Check for overflows. */
|
|
470 if (new_cap < buf->cap)
|
|
471 {
|
|
472 buf->errored = 1;
|
|
473 return;
|
|
474 }
|
|
475 }
|
111
|
476
|
145
|
477 new_ptr = (char *)realloc (buf->ptr, new_cap);
|
|
478 if (new_ptr == NULL)
|
|
479 {
|
|
480 free (buf->ptr);
|
|
481 buf->ptr = NULL;
|
|
482 buf->len = 0;
|
|
483 buf->cap = 0;
|
|
484 buf->errored = 1;
|
|
485 }
|
|
486 else
|
|
487 {
|
|
488 buf->ptr = new_ptr;
|
|
489 buf->cap = new_cap;
|
|
490 }
|
|
491 }
|
|
492
|
|
493 static void
|
|
494 str_buf_append (struct str_buf *buf, const char *data, size_t len)
|
|
495 {
|
|
496 str_buf_reserve (buf, len);
|
|
497 if (buf->errored)
|
|
498 return;
|
|
499
|
|
500 memcpy (buf->ptr + buf->len, data, len);
|
|
501 buf->len += len;
|
|
502 }
|
111
|
503
|
145
|
504 static void
|
|
505 str_buf_demangle_callback (const char *data, size_t len, void *opaque)
|
|
506 {
|
|
507 str_buf_append ((struct str_buf *)opaque, data, len);
|
111
|
508 }
|
145
|
509
|
|
510 char *
|
|
511 rust_demangle (const char *mangled, int options)
|
|
512 {
|
|
513 struct str_buf out;
|
|
514 int success;
|
|
515
|
|
516 out.ptr = NULL;
|
|
517 out.len = 0;
|
|
518 out.cap = 0;
|
|
519 out.errored = 0;
|
|
520
|
|
521 success = rust_demangle_callback (mangled, options,
|
|
522 str_buf_demangle_callback, &out);
|
|
523
|
|
524 if (!success)
|
|
525 {
|
|
526 free (out.ptr);
|
|
527 return NULL;
|
|
528 }
|
|
529
|
|
530 str_buf_append (&out, "\0", 1);
|
|
531 return out.ptr;
|
|
532 }
|