1 /* Character set conversion support for GDB. 2 3 Copyright (C) 2001, 2003, 2007, 2008, 2009, 2010 4 Free Software Foundation, Inc. 5 6 This file is part of GDB. 7 8 This program is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or 11 (at your option) any later version. 12 13 This program is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 20 21 #include "defs.h" 22 #include "charset.h" 23 #include "gdbcmd.h" 24 #include "gdb_assert.h" 25 #include "gdb_obstack.h" 26 #include "gdb_wait.h" 27 #include "charset-list.h" 28 #include "vec.h" 29 #include "environ.h" 30 #include "arch-utils.h" 31 32 #include <stddef.h> 33 #include "gdb_string.h" 34 #include <ctype.h> 35 36 #ifdef USE_WIN32API 37 #include <windows.h> 38 #endif 39 40 /* How GDB's character set support works 41 42 GDB has three global settings: 43 44 - The `current host character set' is the character set GDB should 45 use in talking to the user, and which (hopefully) the user's 46 terminal knows how to display properly. Most users should not 47 change this. 48 49 - The `current target character set' is the character set the 50 program being debugged uses. 51 52 - The `current target wide character set' is the wide character set 53 the program being debugged uses, that is, the encoding used for 54 wchar_t. 55 56 There are commands to set each of these, and mechanisms for 57 choosing reasonable default values. GDB has a global list of 58 character sets that it can use as its host or target character 59 sets. 60 61 The header file `charset.h' declares various functions that 62 different pieces of GDB need to perform tasks like: 63 64 - printing target strings and characters to the user's terminal 65 (mostly target->host conversions), 66 67 - building target-appropriate representations of strings and 68 characters the user enters in expressions (mostly host->target 69 conversions), 70 71 and so on. 72 73 To avoid excessive code duplication and maintenance efforts, 74 GDB simply requires a capable iconv function. Users on platforms 75 without a suitable iconv can use the GNU iconv library. */ 76 77 78 #ifdef PHONY_ICONV 79 80 /* Provide a phony iconv that does as little as possible. Also, 81 arrange for there to be a single available character set. */ 82 83 #undef GDB_DEFAULT_HOST_CHARSET 84 #define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1" 85 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1" 86 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1" 87 #undef DEFAULT_CHARSET_NAMES 88 #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET , 89 90 #undef iconv_t 91 #define iconv_t int 92 #undef iconv_open 93 #undef iconv 94 #undef iconv_close 95 96 #undef ICONV_CONST 97 #define ICONV_CONST const 98 99 /* Some systems don't have EILSEQ, so we define it here, but not as 100 EINVAL, because callers of `iconv' want to distinguish EINVAL and 101 EILSEQ. This is what iconv.h from libiconv does as well. Note 102 that wchar.h may also define EILSEQ, so this needs to be after we 103 include wchar.h, which happens in defs.h through gdb_wchar.h. */ 104 #ifndef EILSEQ 105 #define EILSEQ ENOENT 106 #endif 107 108 iconv_t 109 iconv_open (const char *to, const char *from) 110 { 111 /* We allow conversions from UTF-32BE, wchar_t, and the host charset. 112 We allow conversions to wchar_t and the host charset. */ 113 if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t") 114 && strcmp (from, GDB_DEFAULT_HOST_CHARSET)) 115 return -1; 116 if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET)) 117 return -1; 118 119 /* Return 1 if we are converting from UTF-32BE, 0 otherwise. This is 120 used as a flag in calls to iconv. */ 121 return !strcmp (from, "UTF-32BE"); 122 } 123 124 int 125 iconv_close (iconv_t arg) 126 { 127 return 0; 128 } 129 130 size_t 131 iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft, 132 char **outbuf, size_t *outbytesleft) 133 { 134 if (utf_flag) 135 { 136 while (*inbytesleft >= 4) 137 { 138 size_t j; 139 unsigned long c = 0; 140 141 for (j = 0; j < 4; ++j) 142 { 143 c <<= 8; 144 c += (*inbuf)[j] & 0xff; 145 } 146 147 if (c >= 256) 148 { 149 errno = EILSEQ; 150 return -1; 151 } 152 **outbuf = c & 0xff; 153 ++*outbuf; 154 --*outbytesleft; 155 156 ++*inbuf; 157 *inbytesleft -= 4; 158 } 159 if (*inbytesleft < 4) 160 { 161 errno = EINVAL; 162 return -1; 163 } 164 } 165 else 166 { 167 /* In all other cases we simply copy input bytes to the 168 output. */ 169 size_t amt = *inbytesleft; 170 171 if (amt > *outbytesleft) 172 amt = *outbytesleft; 173 memcpy (*outbuf, *inbuf, amt); 174 *inbuf += amt; 175 *outbuf += amt; 176 *inbytesleft -= amt; 177 *outbytesleft -= amt; 178 } 179 180 if (*inbytesleft) 181 { 182 errno = E2BIG; 183 return -1; 184 } 185 186 /* The number of non-reversible conversions -- but they were all 187 reversible. */ 188 return 0; 189 } 190 191 #endif 192 193 194 195 /* The global lists of character sets and translations. */ 196 197 198 #ifndef GDB_DEFAULT_TARGET_CHARSET 199 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1" 200 #endif 201 202 #ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET 203 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32" 204 #endif 205 206 static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET; 207 static const char *host_charset_name = "auto"; 208 static void 209 show_host_charset_name (struct ui_file *file, int from_tty, 210 struct cmd_list_element *c, 211 const char *value) 212 { 213 if (!strcmp (value, "auto")) 214 fprintf_filtered (file, 215 _("The host character set is \"auto; currently %s\".\n"), 216 auto_host_charset_name); 217 else 218 fprintf_filtered (file, _("The host character set is \"%s\".\n"), value); 219 } 220 221 static const char *target_charset_name = "auto"; 222 static void 223 show_target_charset_name (struct ui_file *file, int from_tty, 224 struct cmd_list_element *c, const char *value) 225 { 226 if (!strcmp (value, "auto")) 227 fprintf_filtered (file, 228 _("The target character set is \"auto; " 229 "currently %s\".\n"), 230 gdbarch_auto_charset (get_current_arch ())); 231 else 232 fprintf_filtered (file, _("The target character set is \"%s\".\n"), 233 value); 234 } 235 236 static const char *target_wide_charset_name = "auto"; 237 static void 238 show_target_wide_charset_name (struct ui_file *file, int from_tty, 239 struct cmd_list_element *c, const char *value) 240 { 241 if (!strcmp (value, "auto")) 242 fprintf_filtered (file, 243 _("The target wide character set is \"auto; " 244 "currently %s\".\n"), 245 gdbarch_auto_wide_charset (get_current_arch ())); 246 else 247 fprintf_filtered (file, _("The target wide character set is \"%s\".\n"), 248 value); 249 } 250 251 static const char *default_charset_names[] = 252 { 253 DEFAULT_CHARSET_NAMES 254 0 255 }; 256 257 static const char **charset_enum; 258 259 260 /* If the target wide character set has big- or little-endian 261 variants, these are the corresponding names. */ 262 static const char *target_wide_charset_be_name; 263 static const char *target_wide_charset_le_name; 264 265 /* The architecture for which the BE- and LE-names are valid. */ 266 static struct gdbarch *be_le_arch; 267 268 /* A helper function which sets the target wide big- and little-endian 269 character set names, if possible. */ 270 271 static void 272 set_be_le_names (struct gdbarch *gdbarch) 273 { 274 int i, len; 275 const char *target_wide; 276 277 if (be_le_arch == gdbarch) 278 return; 279 be_le_arch = gdbarch; 280 281 target_wide_charset_le_name = NULL; 282 target_wide_charset_be_name = NULL; 283 284 target_wide = target_wide_charset_name; 285 if (!strcmp (target_wide, "auto")) 286 target_wide = gdbarch_auto_wide_charset (gdbarch); 287 288 len = strlen (target_wide); 289 for (i = 0; charset_enum[i]; ++i) 290 { 291 if (strncmp (target_wide, charset_enum[i], len)) 292 continue; 293 if ((charset_enum[i][len] == 'B' 294 || charset_enum[i][len] == 'L') 295 && charset_enum[i][len + 1] == 'E' 296 && charset_enum[i][len + 2] == '\0') 297 { 298 if (charset_enum[i][len] == 'B') 299 target_wide_charset_be_name = charset_enum[i]; 300 else 301 target_wide_charset_le_name = charset_enum[i]; 302 } 303 } 304 } 305 306 /* 'Set charset', 'set host-charset', 'set target-charset', 'set 307 target-wide-charset', 'set charset' sfunc's. */ 308 309 static void 310 validate (struct gdbarch *gdbarch) 311 { 312 iconv_t desc; 313 const char *host_cset = host_charset (); 314 const char *target_cset = target_charset (gdbarch); 315 const char *target_wide_cset = target_wide_charset_name; 316 317 if (!strcmp (target_wide_cset, "auto")) 318 target_wide_cset = gdbarch_auto_wide_charset (gdbarch); 319 320 desc = iconv_open (target_wide_cset, host_cset); 321 if (desc == (iconv_t) -1) 322 error ("Cannot convert between character sets `%s' and `%s'", 323 target_wide_cset, host_cset); 324 iconv_close (desc); 325 326 desc = iconv_open (target_cset, host_cset); 327 if (desc == (iconv_t) -1) 328 error ("Cannot convert between character sets `%s' and `%s'", 329 target_cset, host_cset); 330 iconv_close (desc); 331 332 /* Clear the cache. */ 333 be_le_arch = NULL; 334 } 335 336 /* This is the sfunc for the 'set charset' command. */ 337 static void 338 set_charset_sfunc (char *charset, int from_tty, struct cmd_list_element *c) 339 { 340 /* CAREFUL: set the target charset here as well. */ 341 target_charset_name = host_charset_name; 342 validate (get_current_arch ()); 343 } 344 345 /* 'set host-charset' command sfunc. We need a wrapper here because 346 the function needs to have a specific signature. */ 347 static void 348 set_host_charset_sfunc (char *charset, int from_tty, 349 struct cmd_list_element *c) 350 { 351 validate (get_current_arch ()); 352 } 353 354 /* Wrapper for the 'set target-charset' command. */ 355 static void 356 set_target_charset_sfunc (char *charset, int from_tty, 357 struct cmd_list_element *c) 358 { 359 validate (get_current_arch ()); 360 } 361 362 /* Wrapper for the 'set target-wide-charset' command. */ 363 static void 364 set_target_wide_charset_sfunc (char *charset, int from_tty, 365 struct cmd_list_element *c) 366 { 367 validate (get_current_arch ()); 368 } 369 370 /* sfunc for the 'show charset' command. */ 371 static void 372 show_charset (struct ui_file *file, int from_tty, struct cmd_list_element *c, 373 const char *name) 374 { 375 show_host_charset_name (file, from_tty, c, host_charset_name); 376 show_target_charset_name (file, from_tty, c, target_charset_name); 377 show_target_wide_charset_name (file, from_tty, c, target_wide_charset_name); 378 } 379 380 381 /* Accessor functions. */ 382 383 const char * 384 host_charset (void) 385 { 386 if (!strcmp (host_charset_name, "auto")) 387 return auto_host_charset_name; 388 return host_charset_name; 389 } 390 391 const char * 392 target_charset (struct gdbarch *gdbarch) 393 { 394 if (!strcmp (target_charset_name, "auto")) 395 return gdbarch_auto_charset (gdbarch); 396 return target_charset_name; 397 } 398 399 const char * 400 target_wide_charset (struct gdbarch *gdbarch) 401 { 402 enum bfd_endian byte_order = gdbarch_byte_order (gdbarch); 403 404 set_be_le_names (gdbarch); 405 if (byte_order == BFD_ENDIAN_BIG) 406 { 407 if (target_wide_charset_be_name) 408 return target_wide_charset_be_name; 409 } 410 else 411 { 412 if (target_wide_charset_le_name) 413 return target_wide_charset_le_name; 414 } 415 416 if (!strcmp (target_wide_charset_name, "auto")) 417 return gdbarch_auto_wide_charset (gdbarch); 418 419 return target_wide_charset_name; 420 } 421 422 423 /* Host character set management. For the time being, we assume that 424 the host character set is some superset of ASCII. */ 425 426 char 427 host_letter_to_control_character (char c) 428 { 429 if (c == '?') 430 return 0177; 431 return c & 0237; 432 } 433 434 /* Convert a host character, C, to its hex value. C must already have 435 been validated using isxdigit. */ 436 437 int 438 host_hex_value (char c) 439 { 440 if (isdigit (c)) 441 return c - '0'; 442 if (c >= 'a' && c <= 'f') 443 return 10 + c - 'a'; 444 gdb_assert (c >= 'A' && c <= 'F'); 445 return 10 + c - 'A'; 446 } 447 448 449 /* Public character management functions. */ 450 451 /* A cleanup function which is run to close an iconv descriptor. */ 452 453 static void 454 cleanup_iconv (void *p) 455 { 456 iconv_t *descp = p; 457 iconv_close (*descp); 458 } 459 460 static size_t 461 convert_wchar (gdb_wchar_t **pinp, size_t *pinleft, char **poutp, size_t *poutleft) 462 { 463 char tmp[MB_CUR_MAX]; 464 int r; 465 466 while (*pinleft >= sizeof(gdb_wchar_t)) 467 { 468 r = wctomb(tmp, **pinp); 469 470 if (r == -1) 471 perror_with_name ("Internal error while converting character sets"); 472 473 if (*poutleft < r) 474 { 475 errno = E2BIG; 476 return (size_t) -1; 477 } 478 479 memcpy(*poutp, tmp, r); 480 *poutp += r; 481 *poutleft -= r; 482 ++*pinp; 483 *pinleft -= sizeof(gdb_wchar_t); 484 } 485 486 if (*pinleft != 0) 487 return EINVAL; 488 489 return 0; 490 } 491 492 void 493 convert_between_encodings (const char *from, const char *to, 494 const gdb_byte *bytes, unsigned int num_bytes, 495 int width, struct obstack *output, 496 enum transliterations translit) 497 { 498 iconv_t desc; 499 struct cleanup *cleanups; 500 size_t inleft; 501 char *inp; 502 unsigned int space_request; 503 int use_wctomb = 0; 504 505 /* Often, the host and target charsets will be the same. */ 506 if (!strcmp (from, to)) 507 { 508 obstack_grow (output, bytes, num_bytes); 509 return; 510 } 511 512 if (!strcmp (from, "wchar_t")) 513 { 514 if (strcmp (to, host_charset ())) 515 perror_with_name ("Converting character sets"); 516 cleanups = NULL; /* silence gcc complaints */ 517 use_wctomb = 1; 518 } 519 else 520 { 521 desc = iconv_open (to, from); 522 if (desc == (iconv_t) -1) 523 perror_with_name ("Converting character sets"); 524 cleanups = make_cleanup (cleanup_iconv, &desc); 525 } 526 527 inleft = num_bytes; 528 inp = (char *) bytes; 529 530 space_request = num_bytes; 531 532 while (inleft > 0) 533 { 534 char *outp; 535 size_t outleft, r; 536 int old_size; 537 538 old_size = obstack_object_size (output); 539 obstack_blank (output, space_request); 540 541 outp = obstack_base (output) + old_size; 542 outleft = space_request; 543 544 if (use_wctomb) 545 r = convert_wchar((gdb_wchar_t **)(void *)&inp, &inleft, &outp, &outleft); 546 else 547 r = iconv (desc, (ICONV_CONST char **) &inp, &inleft, &outp, &outleft); 548 549 /* Now make sure that the object on the obstack only includes 550 bytes we have converted. */ 551 obstack_blank (output, - (int) outleft); 552 553 if (r == (size_t) -1) 554 { 555 switch (errno) 556 { 557 case EILSEQ: 558 { 559 int i; 560 561 /* Invalid input sequence. */ 562 if (translit == translit_none) 563 error (_("Could not convert character to `%s' character set"), 564 to); 565 566 /* We emit escape sequence for the bytes, skip them, 567 and try again. */ 568 for (i = 0; i < width; ++i) 569 { 570 char octal[5]; 571 572 sprintf (octal, "\\%.3o", *inp & 0xff); 573 obstack_grow_str (output, octal); 574 575 ++inp; 576 --inleft; 577 } 578 } 579 break; 580 581 case E2BIG: 582 /* We ran out of space in the output buffer. Make it 583 bigger next time around. */ 584 space_request *= 2; 585 break; 586 587 case EINVAL: 588 /* Incomplete input sequence. FIXME: ought to report this 589 to the caller somehow. */ 590 inleft = 0; 591 break; 592 593 default: 594 perror_with_name ("Internal error while converting character sets"); 595 } 596 } 597 } 598 599 if (!use_wctomb) 600 do_cleanups (cleanups); 601 } 602 603 604 605 /* An iterator that returns host wchar_t's from a target string. */ 606 struct wchar_iterator 607 { 608 /* The underlying iconv descriptor. */ 609 iconv_t desc; 610 611 /* The input string. This is updated as convert characters. */ 612 char *input; 613 /* The number of bytes remaining in the input. */ 614 size_t bytes; 615 616 /* The width of an input character. */ 617 size_t width; 618 619 /* The intermediate buffer */ 620 char *inter; 621 size_t inter_size; 622 size_t inter_len; 623 624 /* The output byte. */ 625 gdb_wchar_t out; 626 }; 627 628 /* Create a new iterator. */ 629 struct wchar_iterator * 630 make_wchar_iterator (const gdb_byte *input, size_t bytes, const char *charset, 631 size_t width) 632 { 633 struct wchar_iterator *result; 634 iconv_t desc; 635 636 desc = iconv_open (host_charset (), charset); 637 if (desc == (iconv_t) -1) 638 perror_with_name ("Converting character sets"); 639 640 result = XNEW (struct wchar_iterator); 641 result->desc = desc; 642 result->input = (char *) input; 643 result->bytes = bytes; 644 result->width = width; 645 646 result->inter = XNEW (char); 647 result->inter_size = 1; 648 result->inter_len = 0; 649 650 return result; 651 } 652 653 static void 654 do_cleanup_iterator (void *p) 655 { 656 struct wchar_iterator *iter = p; 657 658 iconv_close (iter->desc); 659 xfree (iter->inter); 660 xfree (iter); 661 } 662 663 struct cleanup * 664 make_cleanup_wchar_iterator (struct wchar_iterator *iter) 665 { 666 return make_cleanup (do_cleanup_iterator, iter); 667 } 668 669 int 670 wchar_iterate (struct wchar_iterator *iter, 671 enum wchar_iterate_result *out_result, 672 gdb_wchar_t **out_chars, 673 const gdb_byte **ptr, 674 size_t *len) 675 { 676 size_t out_request; 677 char *orig_inptr = iter->input; 678 size_t orig_in = iter->bytes; 679 680 /* Try to convert some characters. At first we try to convert just 681 a single character. The reason for this is that iconv does not 682 necessarily update its outgoing arguments when it encounters an 683 invalid input sequence -- but we want to reliably report this to 684 our caller so it can emit an escape sequence. */ 685 while (iter->inter_len == 0 && iter->bytes > 0) 686 { 687 out_request = 1; 688 while (iter->bytes > 0) 689 { 690 char *outptr = (char *) &iter->inter[iter->inter_len]; 691 size_t out_avail = out_request; 692 693 size_t r = iconv (iter->desc, 694 (ICONV_CONST char **) &iter->input, &iter->bytes, 695 &outptr, &out_avail); 696 if (r == (size_t) -1) 697 { 698 switch (errno) 699 { 700 case EILSEQ: 701 /* Invalid input sequence. Skip it, and let the caller 702 know about it. */ 703 *out_result = wchar_iterate_invalid; 704 *ptr = iter->input; 705 *len = iter->width; 706 iter->input += iter->width; 707 iter->bytes -= iter->width; 708 return 0; 709 710 case E2BIG: 711 /* We ran out of space. We still might have converted a 712 character; if so, return it. Otherwise, grow the 713 buffer and try again. */ 714 if (out_avail < out_request) 715 break; 716 717 ++out_request; 718 if (out_request > iter->inter_size) 719 { 720 iter->inter_size = out_request; 721 iter->inter = xrealloc (iter->inter, out_request); 722 } 723 continue; 724 725 case EINVAL: 726 /* Incomplete input sequence. Let the caller know, and 727 arrange for future calls to see EOF. */ 728 *out_result = wchar_iterate_incomplete; 729 *ptr = iter->input; 730 *len = iter->bytes; 731 iter->bytes = 0; 732 return 0; 733 734 default: 735 perror_with_name ("Internal error while converting character sets"); 736 } 737 } 738 739 /* We converted something. */ 740 iter->inter_len += out_request - out_avail; 741 break; 742 } 743 } 744 745 if (iter->inter_len > 0) 746 { 747 int r; 748 749 /* Now convert from our charset to wchar_t */ 750 r = mbtowc(&iter->out, &iter->inter[0], iter->inter_len); 751 752 /* This must never happen: we just converted to a valid charset! */ 753 if (r < 0) 754 perror_with_name ("Internal error while converting character sets"); 755 756 /* NUL bytes are alright */ 757 if (r == 0) 758 r = 1; 759 760 iter->inter_len -= r; 761 memmove(&iter->inter[0], &iter->inter[r], iter->inter_len); 762 763 *out_result = wchar_iterate_ok; 764 *out_chars = &iter->out; 765 *ptr = orig_inptr; 766 *len = orig_in - iter->bytes; 767 return 1; 768 } 769 770 /* Really done. */ 771 *out_result = wchar_iterate_eof; 772 return -1; 773 } 774 775 776 /* The charset.c module initialization function. */ 777 778 extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */ 779 780 DEF_VEC_P (char_ptr); 781 782 static VEC (char_ptr) *charsets; 783 784 #ifdef PHONY_ICONV 785 786 static void 787 find_charset_names (void) 788 { 789 VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET); 790 VEC_safe_push (char_ptr, charsets, NULL); 791 } 792 793 #else /* PHONY_ICONV */ 794 795 /* Sometimes, libiconv redefines iconvlist as libiconvlist -- but 796 provides different symbols in the static and dynamic libraries. 797 So, configure may see libiconvlist but not iconvlist. But, calling 798 iconvlist is the right thing to do and will work. Hence we do a 799 check here but unconditionally call iconvlist below. */ 800 #if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST) 801 802 /* A helper function that adds some character sets to the vector of 803 all character sets. This is a callback function for iconvlist. */ 804 805 static int 806 add_one (unsigned int count, const char *const *names, void *data) 807 { 808 unsigned int i; 809 810 for (i = 0; i < count; ++i) 811 VEC_safe_push (char_ptr, charsets, xstrdup (names[i])); 812 813 return 0; 814 } 815 816 static void 817 find_charset_names (void) 818 { 819 iconvlist (add_one, NULL); 820 VEC_safe_push (char_ptr, charsets, NULL); 821 } 822 823 #else 824 825 /* Return non-zero if LINE (output from iconv) should be ignored. 826 Older iconv programs (e.g. 2.2.2) include the human readable 827 introduction even when stdout is not a tty. Newer versions omit 828 the intro if stdout is not a tty. */ 829 830 static int 831 ignore_line_p (const char *line) 832 { 833 /* This table is used to filter the output. If this text appears 834 anywhere in the line, it is ignored (strstr is used). */ 835 static const char * const ignore_lines[] = 836 { 837 "The following", 838 "not necessarily", 839 "the FROM and TO", 840 "listed with several", 841 NULL 842 }; 843 int i; 844 845 for (i = 0; ignore_lines[i] != NULL; ++i) 846 { 847 if (strstr (line, ignore_lines[i]) != NULL) 848 return 1; 849 } 850 851 return 0; 852 } 853 854 static void 855 find_charset_names (void) 856 { 857 struct pex_obj *child; 858 char *args[3]; 859 int err, status; 860 int fail = 1; 861 struct gdb_environ *iconv_env; 862 863 /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is not 864 a tty. We need to recognize it and ignore it. This text is subject 865 to translation, so force LANGUAGE=C. */ 866 iconv_env = make_environ (); 867 init_environ (iconv_env); 868 set_in_environ (iconv_env, "LANGUAGE", "C"); 869 set_in_environ (iconv_env, "LC_ALL", "C"); 870 871 child = pex_init (PEX_USE_PIPES, "iconv", NULL); 872 873 args[0] = "iconv"; 874 args[1] = "-l"; 875 args[2] = NULL; 876 /* Note that we simply ignore errors here. */ 877 if (!pex_run_in_environment (child, PEX_SEARCH | PEX_STDERR_TO_STDOUT, 878 "iconv", args, environ_vector (iconv_env), 879 NULL, NULL, &err)) 880 { 881 FILE *in = pex_read_output (child, 0); 882 883 /* POSIX says that iconv -l uses an unspecified format. We 884 parse the glibc and libiconv formats; feel free to add others 885 as needed. */ 886 887 while (!feof (in)) 888 { 889 /* The size of buf is chosen arbitrarily. */ 890 char buf[1024]; 891 char *start, *r; 892 int len; 893 894 r = fgets (buf, sizeof (buf), in); 895 if (!r) 896 break; 897 len = strlen (r); 898 if (len <= 3) 899 continue; 900 if (ignore_line_p (r)) 901 continue; 902 903 /* Strip off the newline. */ 904 --len; 905 /* Strip off one or two '/'s. glibc will print lines like 906 "8859_7//", but also "10646-1:1993/UCS4/". */ 907 if (buf[len - 1] == '/') 908 --len; 909 if (buf[len - 1] == '/') 910 --len; 911 buf[len] = '\0'; 912 913 /* libiconv will print multiple entries per line, separated 914 by spaces. Older iconvs will print multiple entries per line, 915 indented by two spaces, and separated by ", " 916 (i.e. the human readable form). */ 917 start = buf; 918 while (1) 919 { 920 int keep_going; 921 char *p; 922 923 /* Skip leading blanks. */ 924 for (p = start; *p && *p == ' '; ++p) 925 ; 926 start = p; 927 /* Find the next space, comma, or end-of-line. */ 928 for ( ; *p && *p != ' ' && *p != ','; ++p) 929 ; 930 /* Ignore an empty result. */ 931 if (p == start) 932 break; 933 keep_going = *p; 934 *p = '\0'; 935 VEC_safe_push (char_ptr, charsets, xstrdup (start)); 936 if (!keep_going) 937 break; 938 /* Skip any extra spaces. */ 939 for (start = p + 1; *start && *start == ' '; ++start) 940 ; 941 } 942 } 943 944 if (pex_get_status (child, 1, &status) 945 && WIFEXITED (status) && !WEXITSTATUS (status)) 946 fail = 0; 947 948 } 949 950 pex_free (child); 951 free_environ (iconv_env); 952 953 if (fail) 954 { 955 /* Some error occurred, so drop the vector. */ 956 int ix; 957 char *elt; 958 for (ix = 0; VEC_iterate (char_ptr, charsets, ix, elt); ++ix) 959 xfree (elt); 960 VEC_truncate (char_ptr, charsets, 0); 961 } 962 else 963 VEC_safe_push (char_ptr, charsets, NULL); 964 } 965 966 #endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */ 967 #endif /* PHONY_ICONV */ 968 969 /* The "auto" target charset used by default_auto_charset. */ 970 static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET; 971 972 const char * 973 default_auto_charset (void) 974 { 975 return auto_target_charset_name; 976 } 977 978 const char * 979 default_auto_wide_charset (void) 980 { 981 return GDB_DEFAULT_TARGET_WIDE_CHARSET; 982 } 983 984 void 985 _initialize_charset (void) 986 { 987 /* The first element is always "auto". */ 988 VEC_safe_push (char_ptr, charsets, xstrdup ("auto")); 989 find_charset_names (); 990 991 if (VEC_length (char_ptr, charsets) > 1) 992 charset_enum = (const char **) VEC_address (char_ptr, charsets); 993 else 994 charset_enum = default_charset_names; 995 996 #ifndef PHONY_ICONV 997 #ifdef HAVE_LANGINFO_CODESET 998 /* The result of nl_langinfo may be overwritten later. This may 999 leak a little memory, if the user later changes the host charset, 1000 but that doesn't matter much. */ 1001 auto_host_charset_name = xstrdup (nl_langinfo (CODESET)); 1002 /* Solaris will return `646' here -- but the Solaris iconv then 1003 does not accept this. Darwin (and maybe FreeBSD) may return "" here, 1004 which GNU libiconv doesn't like (infinite loop). */ 1005 if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name) 1006 auto_host_charset_name = "ASCII"; 1007 auto_target_charset_name = auto_host_charset_name; 1008 #elif defined (USE_WIN32API) 1009 { 1010 static char w32_host_default_charset[16]; /* "CP" + x<=5 digits + paranoia. */ 1011 1012 snprintf (w32_host_default_charset, sizeof w32_host_default_charset, 1013 "CP%d", GetACP()); 1014 auto_host_charset_name = w32_host_default_charset; 1015 auto_target_charset_name = auto_host_charset_name; 1016 } 1017 #endif 1018 #endif 1019 1020 add_setshow_enum_cmd ("charset", class_support, 1021 charset_enum, &host_charset_name, _("\ 1022 Set the host and target character sets."), _("\ 1023 Show the host and target character sets."), _("\ 1024 The `host character set' is the one used by the system GDB is running on.\n\ 1025 The `target character set' is the one used by the program being debugged.\n\ 1026 You may only use supersets of ASCII for your host character set; GDB does\n\ 1027 not support any others.\n\ 1028 To see a list of the character sets GDB supports, type `set charset <TAB>'."), 1029 /* Note that the sfunc below needs to set 1030 target_charset_name, because the 'set 1031 charset' command sets two variables. */ 1032 set_charset_sfunc, 1033 show_charset, 1034 &setlist, &showlist); 1035 1036 add_setshow_enum_cmd ("host-charset", class_support, 1037 charset_enum, &host_charset_name, _("\ 1038 Set the host character set."), _("\ 1039 Show the host character set."), _("\ 1040 The `host character set' is the one used by the system GDB is running on.\n\ 1041 You may only use supersets of ASCII for your host character set; GDB does\n\ 1042 not support any others.\n\ 1043 To see a list of the character sets GDB supports, type `set host-charset <TAB>'."), 1044 set_host_charset_sfunc, 1045 show_host_charset_name, 1046 &setlist, &showlist); 1047 1048 add_setshow_enum_cmd ("target-charset", class_support, 1049 charset_enum, &target_charset_name, _("\ 1050 Set the target character set."), _("\ 1051 Show the target character set."), _("\ 1052 The `target character set' is the one used by the program being debugged.\n\ 1053 GDB translates characters and strings between the host and target\n\ 1054 character sets as needed.\n\ 1055 To see a list of the character sets GDB supports, type `set target-charset'<TAB>"), 1056 set_target_charset_sfunc, 1057 show_target_charset_name, 1058 &setlist, &showlist); 1059 1060 add_setshow_enum_cmd ("target-wide-charset", class_support, 1061 charset_enum, &target_wide_charset_name, 1062 _("\ 1063 Set the target wide character set."), _("\ 1064 Show the target wide character set."), _("\ 1065 The `target wide character set' is the one used by the program being debugged.\n\ 1066 In particular it is the encoding used by `wchar_t'.\n\ 1067 GDB translates characters and strings between the host and target\n\ 1068 character sets as needed.\n\ 1069 To see a list of the character sets GDB supports, type\n\ 1070 `set target-wide-charset'<TAB>"), 1071 set_target_wide_charset_sfunc, 1072 show_target_wide_charset_name, 1073 &setlist, &showlist); 1074 } 1075