1 /* Character set conversion support for GDB. 2 3 Copyright (C) 2001, 2003, 2007-2012 Free Software Foundation, Inc. 4 5 This file is part of GDB. 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 19 20 #include "defs.h" 21 #include "charset.h" 22 #include "gdbcmd.h" 23 #include "gdb_assert.h" 24 #include "gdb_obstack.h" 25 #include "gdb_wait.h" 26 #include "charset-list.h" 27 #include "vec.h" 28 #include "environ.h" 29 #include "arch-utils.h" 30 31 #include <stddef.h> 32 #include "gdb_string.h" 33 #include <ctype.h> 34 35 #ifdef USE_WIN32API 36 #include <windows.h> 37 #endif 38 39 /* How GDB's character set support works 40 41 GDB has three global settings: 42 43 - The `current host character set' is the character set GDB should 44 use in talking to the user, and which (hopefully) the user's 45 terminal knows how to display properly. Most users should not 46 change this. 47 48 - The `current target character set' is the character set the 49 program being debugged uses. 50 51 - The `current target wide character set' is the wide character set 52 the program being debugged uses, that is, the encoding used for 53 wchar_t. 54 55 There are commands to set each of these, and mechanisms for 56 choosing reasonable default values. GDB has a global list of 57 character sets that it can use as its host or target character 58 sets. 59 60 The header file `charset.h' declares various functions that 61 different pieces of GDB need to perform tasks like: 62 63 - printing target strings and characters to the user's terminal 64 (mostly target->host conversions), 65 66 - building target-appropriate representations of strings and 67 characters the user enters in expressions (mostly host->target 68 conversions), 69 70 and so on. 71 72 To avoid excessive code duplication and maintenance efforts, 73 GDB simply requires a capable iconv function. Users on platforms 74 without a suitable iconv can use the GNU iconv library. */ 75 76 77 #ifdef PHONY_ICONV 78 79 /* Provide a phony iconv that does as little as possible. Also, 80 arrange for there to be a single available character set. */ 81 82 #undef GDB_DEFAULT_HOST_CHARSET 83 #define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1" 84 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1" 85 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1" 86 #undef DEFAULT_CHARSET_NAMES 87 #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET , 88 89 #undef iconv_t 90 #define iconv_t int 91 #undef iconv_open 92 #define iconv_open phony_iconv_open 93 #undef iconv 94 #define iconv phony_iconv 95 #undef iconv_close 96 #define iconv_close phony_iconv_close 97 98 #undef ICONV_CONST 99 #define ICONV_CONST const 100 101 /* Some systems don't have EILSEQ, so we define it here, but not as 102 EINVAL, because callers of `iconv' want to distinguish EINVAL and 103 EILSEQ. This is what iconv.h from libiconv does as well. Note 104 that wchar.h may also define EILSEQ, so this needs to be after we 105 include wchar.h, which happens in defs.h through gdb_wchar.h. */ 106 #ifndef EILSEQ 107 #define EILSEQ ENOENT 108 #endif 109 110 iconv_t 111 phony_iconv_open (const char *to, const char *from) 112 { 113 /* We allow conversions from UTF-32BE, wchar_t, and the host charset. 114 We allow conversions to wchar_t and the host charset. */ 115 if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t") 116 && strcmp (from, GDB_DEFAULT_HOST_CHARSET)) 117 return -1; 118 if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET)) 119 return -1; 120 121 /* Return 1 if we are converting from UTF-32BE, 0 otherwise. This is 122 used as a flag in calls to iconv. */ 123 return !strcmp (from, "UTF-32BE"); 124 } 125 126 int 127 phony_iconv_close (iconv_t arg) 128 { 129 return 0; 130 } 131 132 size_t 133 phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft, 134 char **outbuf, size_t *outbytesleft) 135 { 136 if (utf_flag) 137 { 138 while (*inbytesleft >= 4) 139 { 140 size_t j; 141 unsigned long c = 0; 142 143 for (j = 0; j < 4; ++j) 144 { 145 c <<= 8; 146 c += (*inbuf)[j] & 0xff; 147 } 148 149 if (c >= 256) 150 { 151 errno = EILSEQ; 152 return -1; 153 } 154 **outbuf = c & 0xff; 155 ++*outbuf; 156 --*outbytesleft; 157 158 ++*inbuf; 159 *inbytesleft -= 4; 160 } 161 if (*inbytesleft < 4) 162 { 163 errno = EINVAL; 164 return -1; 165 } 166 } 167 else 168 { 169 /* In all other cases we simply copy input bytes to the 170 output. */ 171 size_t amt = *inbytesleft; 172 173 if (amt > *outbytesleft) 174 amt = *outbytesleft; 175 memcpy (*outbuf, *inbuf, amt); 176 *inbuf += amt; 177 *outbuf += amt; 178 *inbytesleft -= amt; 179 *outbytesleft -= amt; 180 } 181 182 if (*inbytesleft) 183 { 184 errno = E2BIG; 185 return -1; 186 } 187 188 /* The number of non-reversible conversions -- but they were all 189 reversible. */ 190 return 0; 191 } 192 193 #endif 194 195 196 197 /* The global lists of character sets and translations. */ 198 199 200 #ifndef GDB_DEFAULT_TARGET_CHARSET 201 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1" 202 #endif 203 204 #ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET 205 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32" 206 #endif 207 208 static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET; 209 static const char *host_charset_name = "auto"; 210 static void 211 show_host_charset_name (struct ui_file *file, int from_tty, 212 struct cmd_list_element *c, 213 const char *value) 214 { 215 if (!strcmp (value, "auto")) 216 fprintf_filtered (file, 217 _("The host character set is \"auto; currently %s\".\n"), 218 auto_host_charset_name); 219 else 220 fprintf_filtered (file, _("The host character set is \"%s\".\n"), value); 221 } 222 223 static const char *target_charset_name = "auto"; 224 static void 225 show_target_charset_name (struct ui_file *file, int from_tty, 226 struct cmd_list_element *c, const char *value) 227 { 228 if (!strcmp (value, "auto")) 229 fprintf_filtered (file, 230 _("The target character set is \"auto; " 231 "currently %s\".\n"), 232 gdbarch_auto_charset (get_current_arch ())); 233 else 234 fprintf_filtered (file, _("The target character set is \"%s\".\n"), 235 value); 236 } 237 238 static const char *target_wide_charset_name = "auto"; 239 static void 240 show_target_wide_charset_name (struct ui_file *file, 241 int from_tty, 242 struct cmd_list_element *c, 243 const char *value) 244 { 245 if (!strcmp (value, "auto")) 246 fprintf_filtered (file, 247 _("The target wide character set is \"auto; " 248 "currently %s\".\n"), 249 gdbarch_auto_wide_charset (get_current_arch ())); 250 else 251 fprintf_filtered (file, _("The target wide character set is \"%s\".\n"), 252 value); 253 } 254 255 static const char *default_charset_names[] = 256 { 257 DEFAULT_CHARSET_NAMES 258 0 259 }; 260 261 static const char **charset_enum; 262 263 264 /* If the target wide character set has big- or little-endian 265 variants, these are the corresponding names. */ 266 static const char *target_wide_charset_be_name; 267 static const char *target_wide_charset_le_name; 268 269 /* The architecture for which the BE- and LE-names are valid. */ 270 static struct gdbarch *be_le_arch; 271 272 /* A helper function which sets the target wide big- and little-endian 273 character set names, if possible. */ 274 275 static void 276 set_be_le_names (struct gdbarch *gdbarch) 277 { 278 int i, len; 279 const char *target_wide; 280 281 if (be_le_arch == gdbarch) 282 return; 283 be_le_arch = gdbarch; 284 285 target_wide_charset_le_name = NULL; 286 target_wide_charset_be_name = NULL; 287 288 target_wide = target_wide_charset_name; 289 if (!strcmp (target_wide, "auto")) 290 target_wide = gdbarch_auto_wide_charset (gdbarch); 291 292 len = strlen (target_wide); 293 for (i = 0; charset_enum[i]; ++i) 294 { 295 if (strncmp (target_wide, charset_enum[i], len)) 296 continue; 297 if ((charset_enum[i][len] == 'B' 298 || charset_enum[i][len] == 'L') 299 && charset_enum[i][len + 1] == 'E' 300 && charset_enum[i][len + 2] == '\0') 301 { 302 if (charset_enum[i][len] == 'B') 303 target_wide_charset_be_name = charset_enum[i]; 304 else 305 target_wide_charset_le_name = charset_enum[i]; 306 } 307 } 308 } 309 310 /* 'Set charset', 'set host-charset', 'set target-charset', 'set 311 target-wide-charset', 'set charset' sfunc's. */ 312 313 static void 314 validate (struct gdbarch *gdbarch) 315 { 316 iconv_t desc; 317 const char *host_cset = host_charset (); 318 const char *target_cset = target_charset (gdbarch); 319 const char *target_wide_cset = target_wide_charset_name; 320 321 if (!strcmp (target_wide_cset, "auto")) 322 target_wide_cset = gdbarch_auto_wide_charset (gdbarch); 323 324 desc = iconv_open (target_wide_cset, host_cset); 325 if (desc == (iconv_t) -1) 326 error (_("Cannot convert between character sets `%s' and `%s'"), 327 target_wide_cset, host_cset); 328 iconv_close (desc); 329 330 desc = iconv_open (target_cset, host_cset); 331 if (desc == (iconv_t) -1) 332 error (_("Cannot convert between character sets `%s' and `%s'"), 333 target_cset, host_cset); 334 iconv_close (desc); 335 336 /* Clear the cache. */ 337 be_le_arch = NULL; 338 } 339 340 /* This is the sfunc for the 'set charset' command. */ 341 static void 342 set_charset_sfunc (char *charset, int from_tty, 343 struct cmd_list_element *c) 344 { 345 /* CAREFUL: set the target charset here as well. */ 346 target_charset_name = host_charset_name; 347 validate (get_current_arch ()); 348 } 349 350 /* 'set host-charset' command sfunc. We need a wrapper here because 351 the function needs to have a specific signature. */ 352 static void 353 set_host_charset_sfunc (char *charset, int from_tty, 354 struct cmd_list_element *c) 355 { 356 validate (get_current_arch ()); 357 } 358 359 /* Wrapper for the 'set target-charset' command. */ 360 static void 361 set_target_charset_sfunc (char *charset, int from_tty, 362 struct cmd_list_element *c) 363 { 364 validate (get_current_arch ()); 365 } 366 367 /* Wrapper for the 'set target-wide-charset' command. */ 368 static void 369 set_target_wide_charset_sfunc (char *charset, int from_tty, 370 struct cmd_list_element *c) 371 { 372 validate (get_current_arch ()); 373 } 374 375 /* sfunc for the 'show charset' command. */ 376 static void 377 show_charset (struct ui_file *file, int from_tty, 378 struct cmd_list_element *c, 379 const char *name) 380 { 381 show_host_charset_name (file, from_tty, c, host_charset_name); 382 show_target_charset_name (file, from_tty, c, target_charset_name); 383 show_target_wide_charset_name (file, from_tty, c, 384 target_wide_charset_name); 385 } 386 387 388 /* Accessor functions. */ 389 390 const char * 391 host_charset (void) 392 { 393 if (!strcmp (host_charset_name, "auto")) 394 return auto_host_charset_name; 395 return host_charset_name; 396 } 397 398 const char * 399 target_charset (struct gdbarch *gdbarch) 400 { 401 if (!strcmp (target_charset_name, "auto")) 402 return gdbarch_auto_charset (gdbarch); 403 return target_charset_name; 404 } 405 406 const char * 407 target_wide_charset (struct gdbarch *gdbarch) 408 { 409 enum bfd_endian byte_order = gdbarch_byte_order (gdbarch); 410 411 set_be_le_names (gdbarch); 412 if (byte_order == BFD_ENDIAN_BIG) 413 { 414 if (target_wide_charset_be_name) 415 return target_wide_charset_be_name; 416 } 417 else 418 { 419 if (target_wide_charset_le_name) 420 return target_wide_charset_le_name; 421 } 422 423 if (!strcmp (target_wide_charset_name, "auto")) 424 return gdbarch_auto_wide_charset (gdbarch); 425 426 return target_wide_charset_name; 427 } 428 429 430 /* Host character set management. For the time being, we assume that 431 the host character set is some superset of ASCII. */ 432 433 char 434 host_letter_to_control_character (char c) 435 { 436 if (c == '?') 437 return 0177; 438 return c & 0237; 439 } 440 441 /* Convert a host character, C, to its hex value. C must already have 442 been validated using isxdigit. */ 443 444 int 445 host_hex_value (char c) 446 { 447 if (isdigit (c)) 448 return c - '0'; 449 if (c >= 'a' && c <= 'f') 450 return 10 + c - 'a'; 451 gdb_assert (c >= 'A' && c <= 'F'); 452 return 10 + c - 'A'; 453 } 454 455 456 /* Public character management functions. */ 457 458 /* A cleanup function which is run to close an iconv descriptor. */ 459 460 static void 461 cleanup_iconv (void *p) 462 { 463 iconv_t *descp = p; 464 iconv_close (*descp); 465 } 466 467 void 468 convert_between_encodings (const char *from, const char *to, 469 const gdb_byte *bytes, unsigned int num_bytes, 470 int width, struct obstack *output, 471 enum transliterations translit) 472 { 473 iconv_t desc; 474 struct cleanup *cleanups; 475 size_t inleft; 476 char *inp; 477 unsigned int space_request; 478 479 /* Often, the host and target charsets will be the same. */ 480 if (!strcmp (from, to)) 481 { 482 obstack_grow (output, bytes, num_bytes); 483 return; 484 } 485 486 desc = iconv_open (to, from); 487 if (desc == (iconv_t) -1) 488 perror_with_name (_("Converting character sets")); 489 cleanups = make_cleanup (cleanup_iconv, &desc); 490 491 inleft = num_bytes; 492 inp = (char *) bytes; 493 494 space_request = num_bytes; 495 496 while (inleft > 0) 497 { 498 char *outp; 499 size_t outleft, r; 500 int old_size; 501 502 old_size = obstack_object_size (output); 503 obstack_blank (output, space_request); 504 505 outp = obstack_base (output) + old_size; 506 outleft = space_request; 507 508 r = iconv (desc, (ICONV_CONST char **) &inp, &inleft, &outp, &outleft); 509 510 /* Now make sure that the object on the obstack only includes 511 bytes we have converted. */ 512 obstack_blank (output, - (int) outleft); 513 514 if (r == (size_t) -1) 515 { 516 switch (errno) 517 { 518 case EILSEQ: 519 { 520 int i; 521 522 /* Invalid input sequence. */ 523 if (translit == translit_none) 524 error (_("Could not convert character " 525 "to `%s' character set"), to); 526 527 /* We emit escape sequence for the bytes, skip them, 528 and try again. */ 529 for (i = 0; i < width; ++i) 530 { 531 char octal[5]; 532 533 sprintf (octal, "\\%.3o", *inp & 0xff); 534 obstack_grow_str (output, octal); 535 536 ++inp; 537 --inleft; 538 } 539 } 540 break; 541 542 case E2BIG: 543 /* We ran out of space in the output buffer. Make it 544 bigger next time around. */ 545 space_request *= 2; 546 break; 547 548 case EINVAL: 549 /* Incomplete input sequence. FIXME: ought to report this 550 to the caller somehow. */ 551 inleft = 0; 552 break; 553 554 default: 555 perror_with_name (_("Internal error while " 556 "converting character sets")); 557 } 558 } 559 } 560 561 do_cleanups (cleanups); 562 } 563 564 565 566 /* An iterator that returns host wchar_t's from a target string. */ 567 struct wchar_iterator 568 { 569 /* The underlying iconv descriptor. */ 570 iconv_t desc; 571 572 /* The input string. This is updated as convert characters. */ 573 char *input; 574 /* The number of bytes remaining in the input. */ 575 size_t bytes; 576 577 /* The width of an input character. */ 578 size_t width; 579 580 /* The output buffer and its size. */ 581 gdb_wchar_t *out; 582 size_t out_size; 583 }; 584 585 /* Create a new iterator. */ 586 struct wchar_iterator * 587 make_wchar_iterator (const gdb_byte *input, size_t bytes, 588 const char *charset, size_t width) 589 { 590 struct wchar_iterator *result; 591 iconv_t desc; 592 593 desc = iconv_open (INTERMEDIATE_ENCODING, charset); 594 if (desc == (iconv_t) -1) 595 perror_with_name (_("Converting character sets")); 596 597 result = XNEW (struct wchar_iterator); 598 result->desc = desc; 599 result->input = (char *) input; 600 result->bytes = bytes; 601 result->width = width; 602 603 result->out = XNEW (gdb_wchar_t); 604 result->out_size = 1; 605 606 return result; 607 } 608 609 static void 610 do_cleanup_iterator (void *p) 611 { 612 struct wchar_iterator *iter = p; 613 614 iconv_close (iter->desc); 615 xfree (iter->out); 616 xfree (iter); 617 } 618 619 struct cleanup * 620 make_cleanup_wchar_iterator (struct wchar_iterator *iter) 621 { 622 return make_cleanup (do_cleanup_iterator, iter); 623 } 624 625 int 626 wchar_iterate (struct wchar_iterator *iter, 627 enum wchar_iterate_result *out_result, 628 gdb_wchar_t **out_chars, 629 const gdb_byte **ptr, 630 size_t *len) 631 { 632 size_t out_request; 633 634 /* Try to convert some characters. At first we try to convert just 635 a single character. The reason for this is that iconv does not 636 necessarily update its outgoing arguments when it encounters an 637 invalid input sequence -- but we want to reliably report this to 638 our caller so it can emit an escape sequence. */ 639 out_request = 1; 640 while (iter->bytes > 0) 641 { 642 char *outptr = (char *) &iter->out[0]; 643 char *orig_inptr = iter->input; 644 size_t orig_in = iter->bytes; 645 size_t out_avail = out_request * sizeof (gdb_wchar_t); 646 size_t num; 647 size_t r = iconv (iter->desc, 648 (ICONV_CONST char **) &iter->input, 649 &iter->bytes, &outptr, &out_avail); 650 651 if (r == (size_t) -1) 652 { 653 switch (errno) 654 { 655 case EILSEQ: 656 /* Invalid input sequence. We still might have 657 converted a character; if so, return it. */ 658 if (out_avail < out_request * sizeof (gdb_wchar_t)) 659 break; 660 661 /* Otherwise skip the first invalid character, and let 662 the caller know about it. */ 663 *out_result = wchar_iterate_invalid; 664 *ptr = iter->input; 665 *len = iter->width; 666 iter->input += iter->width; 667 iter->bytes -= iter->width; 668 return 0; 669 670 case E2BIG: 671 /* We ran out of space. We still might have converted a 672 character; if so, return it. Otherwise, grow the 673 buffer and try again. */ 674 if (out_avail < out_request * sizeof (gdb_wchar_t)) 675 break; 676 677 ++out_request; 678 if (out_request > iter->out_size) 679 { 680 iter->out_size = out_request; 681 iter->out = xrealloc (iter->out, 682 out_request * sizeof (gdb_wchar_t)); 683 } 684 continue; 685 686 case EINVAL: 687 /* Incomplete input sequence. Let the caller know, and 688 arrange for future calls to see EOF. */ 689 *out_result = wchar_iterate_incomplete; 690 *ptr = iter->input; 691 *len = iter->bytes; 692 iter->bytes = 0; 693 return 0; 694 695 default: 696 perror_with_name (_("Internal error while " 697 "converting character sets")); 698 } 699 } 700 701 /* We converted something. */ 702 num = out_request - out_avail / sizeof (gdb_wchar_t); 703 *out_result = wchar_iterate_ok; 704 *out_chars = iter->out; 705 *ptr = orig_inptr; 706 *len = orig_in - iter->bytes; 707 return num; 708 } 709 710 /* Really done. */ 711 *out_result = wchar_iterate_eof; 712 return -1; 713 } 714 715 716 /* The charset.c module initialization function. */ 717 718 extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */ 719 720 DEF_VEC_P (char_ptr); 721 722 static VEC (char_ptr) *charsets; 723 724 #ifdef PHONY_ICONV 725 726 static void 727 find_charset_names (void) 728 { 729 VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET); 730 VEC_safe_push (char_ptr, charsets, NULL); 731 } 732 733 #else /* PHONY_ICONV */ 734 735 /* Sometimes, libiconv redefines iconvlist as libiconvlist -- but 736 provides different symbols in the static and dynamic libraries. 737 So, configure may see libiconvlist but not iconvlist. But, calling 738 iconvlist is the right thing to do and will work. Hence we do a 739 check here but unconditionally call iconvlist below. */ 740 #if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST) 741 742 /* A helper function that adds some character sets to the vector of 743 all character sets. This is a callback function for iconvlist. */ 744 745 static int 746 add_one (unsigned int count, const char *const *names, void *data) 747 { 748 unsigned int i; 749 750 for (i = 0; i < count; ++i) 751 VEC_safe_push (char_ptr, charsets, xstrdup (names[i])); 752 753 return 0; 754 } 755 756 static void 757 find_charset_names (void) 758 { 759 iconvlist (add_one, NULL); 760 VEC_safe_push (char_ptr, charsets, NULL); 761 } 762 763 #else 764 765 /* Return non-zero if LINE (output from iconv) should be ignored. 766 Older iconv programs (e.g. 2.2.2) include the human readable 767 introduction even when stdout is not a tty. Newer versions omit 768 the intro if stdout is not a tty. */ 769 770 static int 771 ignore_line_p (const char *line) 772 { 773 /* This table is used to filter the output. If this text appears 774 anywhere in the line, it is ignored (strstr is used). */ 775 static const char * const ignore_lines[] = 776 { 777 "The following", 778 "not necessarily", 779 "the FROM and TO", 780 "listed with several", 781 NULL 782 }; 783 int i; 784 785 for (i = 0; ignore_lines[i] != NULL; ++i) 786 { 787 if (strstr (line, ignore_lines[i]) != NULL) 788 return 1; 789 } 790 791 return 0; 792 } 793 794 static void 795 find_charset_names (void) 796 { 797 struct pex_obj *child; 798 char *args[3]; 799 int err, status; 800 int fail = 1; 801 int flags; 802 struct gdb_environ *iconv_env; 803 char *iconv_program; 804 805 /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is 806 not a tty. We need to recognize it and ignore it. This text is 807 subject to translation, so force LANGUAGE=C. */ 808 iconv_env = make_environ (); 809 init_environ (iconv_env); 810 set_in_environ (iconv_env, "LANGUAGE", "C"); 811 set_in_environ (iconv_env, "LC_ALL", "C"); 812 813 child = pex_init (PEX_USE_PIPES, "iconv", NULL); 814 815 #ifdef ICONV_BIN 816 { 817 char *iconv_dir = relocate_gdb_directory (ICONV_BIN, 818 ICONV_BIN_RELOCATABLE); 819 iconv_program = concat (iconv_dir, SLASH_STRING, "iconv", NULL); 820 xfree (iconv_dir); 821 } 822 #else 823 iconv_program = xstrdup ("iconv"); 824 #endif 825 args[0] = iconv_program; 826 args[1] = "-l"; 827 args[2] = NULL; 828 flags = PEX_STDERR_TO_STDOUT; 829 #ifndef ICONV_BIN 830 flags |= PEX_SEARCH; 831 #endif 832 /* Note that we simply ignore errors here. */ 833 if (!pex_run_in_environment (child, flags, 834 args[0], args, environ_vector (iconv_env), 835 NULL, NULL, &err)) 836 { 837 FILE *in = pex_read_output (child, 0); 838 839 /* POSIX says that iconv -l uses an unspecified format. We 840 parse the glibc and libiconv formats; feel free to add others 841 as needed. */ 842 843 while (!feof (in)) 844 { 845 /* The size of buf is chosen arbitrarily. */ 846 char buf[1024]; 847 char *start, *r; 848 int len; 849 850 r = fgets (buf, sizeof (buf), in); 851 if (!r) 852 break; 853 len = strlen (r); 854 if (len <= 3) 855 continue; 856 if (ignore_line_p (r)) 857 continue; 858 859 /* Strip off the newline. */ 860 --len; 861 /* Strip off one or two '/'s. glibc will print lines like 862 "8859_7//", but also "10646-1:1993/UCS4/". */ 863 if (buf[len - 1] == '/') 864 --len; 865 if (buf[len - 1] == '/') 866 --len; 867 buf[len] = '\0'; 868 869 /* libiconv will print multiple entries per line, separated 870 by spaces. Older iconvs will print multiple entries per 871 line, indented by two spaces, and separated by ", " 872 (i.e. the human readable form). */ 873 start = buf; 874 while (1) 875 { 876 int keep_going; 877 char *p; 878 879 /* Skip leading blanks. */ 880 for (p = start; *p && *p == ' '; ++p) 881 ; 882 start = p; 883 /* Find the next space, comma, or end-of-line. */ 884 for ( ; *p && *p != ' ' && *p != ','; ++p) 885 ; 886 /* Ignore an empty result. */ 887 if (p == start) 888 break; 889 keep_going = *p; 890 *p = '\0'; 891 VEC_safe_push (char_ptr, charsets, xstrdup (start)); 892 if (!keep_going) 893 break; 894 /* Skip any extra spaces. */ 895 for (start = p + 1; *start && *start == ' '; ++start) 896 ; 897 } 898 } 899 900 if (pex_get_status (child, 1, &status) 901 && WIFEXITED (status) && !WEXITSTATUS (status)) 902 fail = 0; 903 904 } 905 906 xfree (iconv_program); 907 pex_free (child); 908 free_environ (iconv_env); 909 910 if (fail) 911 { 912 /* Some error occurred, so drop the vector. */ 913 int ix; 914 char *elt; 915 for (ix = 0; VEC_iterate (char_ptr, charsets, ix, elt); ++ix) 916 xfree (elt); 917 VEC_truncate (char_ptr, charsets, 0); 918 } 919 else 920 VEC_safe_push (char_ptr, charsets, NULL); 921 } 922 923 #endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */ 924 #endif /* PHONY_ICONV */ 925 926 /* The "auto" target charset used by default_auto_charset. */ 927 static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET; 928 929 const char * 930 default_auto_charset (void) 931 { 932 return auto_target_charset_name; 933 } 934 935 const char * 936 default_auto_wide_charset (void) 937 { 938 return GDB_DEFAULT_TARGET_WIDE_CHARSET; 939 } 940 941 942 #ifdef USE_INTERMEDIATE_ENCODING_FUNCTION 943 /* Macro used for UTF or UCS endianness suffix. */ 944 #if WORDS_BIGENDIAN 945 #define ENDIAN_SUFFIX "BE" 946 #else 947 #define ENDIAN_SUFFIX "LE" 948 #endif 949 950 /* The code below serves to generate a compile time error if 951 gdb_wchar_t type is not of size 2 nor 4, despite the fact that 952 macro __STDC_ISO_10646__ is defined. 953 This is better than a gdb_assert call, because GDB cannot handle 954 strings correctly if this size is different. */ 955 956 extern char your_gdb_wchar_t_is_bogus[(sizeof (gdb_wchar_t) == 2 957 || sizeof (gdb_wchar_t) == 4) 958 ? 1 : -1]; 959 960 /* intermediate_encoding returns the charset unsed internally by 961 GDB to convert between target and host encodings. As the test above 962 compiled, sizeof (gdb_wchar_t) is either 2 or 4 bytes. 963 UTF-16/32 is tested first, UCS-2/4 is tested as a second option, 964 otherwise an error is generated. */ 965 966 const char * 967 intermediate_encoding (void) 968 { 969 iconv_t desc; 970 static const char *stored_result = NULL; 971 char *result; 972 int i; 973 974 if (stored_result) 975 return stored_result; 976 result = xstrprintf ("UTF-%d%s", (int) (sizeof (gdb_wchar_t) * 8), 977 ENDIAN_SUFFIX); 978 /* Check that the name is supported by iconv_open. */ 979 desc = iconv_open (result, host_charset ()); 980 if (desc != (iconv_t) -1) 981 { 982 iconv_close (desc); 983 stored_result = result; 984 return result; 985 } 986 /* Not valid, free the allocated memory. */ 987 xfree (result); 988 /* Second try, with UCS-2 type. */ 989 result = xstrprintf ("UCS-%d%s", (int) sizeof (gdb_wchar_t), 990 ENDIAN_SUFFIX); 991 /* Check that the name is supported by iconv_open. */ 992 desc = iconv_open (result, host_charset ()); 993 if (desc != (iconv_t) -1) 994 { 995 iconv_close (desc); 996 stored_result = result; 997 return result; 998 } 999 /* Not valid, free the allocated memory. */ 1000 xfree (result); 1001 /* No valid charset found, generate error here. */ 1002 error (_("Unable to find a vaild charset for string conversions")); 1003 } 1004 1005 #endif /* USE_INTERMEDIATE_ENCODING_FUNCTION */ 1006 1007 void 1008 _initialize_charset (void) 1009 { 1010 /* The first element is always "auto". */ 1011 VEC_safe_push (char_ptr, charsets, xstrdup ("auto")); 1012 find_charset_names (); 1013 1014 if (VEC_length (char_ptr, charsets) > 1) 1015 charset_enum = (const char **) VEC_address (char_ptr, charsets); 1016 else 1017 charset_enum = default_charset_names; 1018 1019 #ifndef PHONY_ICONV 1020 #ifdef HAVE_LANGINFO_CODESET 1021 /* The result of nl_langinfo may be overwritten later. This may 1022 leak a little memory, if the user later changes the host charset, 1023 but that doesn't matter much. */ 1024 auto_host_charset_name = xstrdup (nl_langinfo (CODESET)); 1025 /* Solaris will return `646' here -- but the Solaris iconv then does 1026 not accept this. Darwin (and maybe FreeBSD) may return "" here, 1027 which GNU libiconv doesn't like (infinite loop). */ 1028 if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name) 1029 auto_host_charset_name = "ASCII"; 1030 auto_target_charset_name = auto_host_charset_name; 1031 #elif defined (USE_WIN32API) 1032 { 1033 /* "CP" + x<=5 digits + paranoia. */ 1034 static char w32_host_default_charset[16]; 1035 1036 snprintf (w32_host_default_charset, sizeof w32_host_default_charset, 1037 "CP%d", GetACP()); 1038 auto_host_charset_name = w32_host_default_charset; 1039 auto_target_charset_name = auto_host_charset_name; 1040 } 1041 #endif 1042 #endif 1043 1044 add_setshow_enum_cmd ("charset", class_support, 1045 charset_enum, &host_charset_name, _("\ 1046 Set the host and target character sets."), _("\ 1047 Show the host and target character sets."), _("\ 1048 The `host character set' is the one used by the system GDB is running on.\n\ 1049 The `target character set' is the one used by the program being debugged.\n\ 1050 You may only use supersets of ASCII for your host character set; GDB does\n\ 1051 not support any others.\n\ 1052 To see a list of the character sets GDB supports, type `set charset <TAB>'."), 1053 /* Note that the sfunc below needs to set 1054 target_charset_name, because the 'set 1055 charset' command sets two variables. */ 1056 set_charset_sfunc, 1057 show_charset, 1058 &setlist, &showlist); 1059 1060 add_setshow_enum_cmd ("host-charset", class_support, 1061 charset_enum, &host_charset_name, _("\ 1062 Set the host character set."), _("\ 1063 Show the host character set."), _("\ 1064 The `host character set' is the one used by the system GDB is running on.\n\ 1065 You may only use supersets of ASCII for your host character set; GDB does\n\ 1066 not support any others.\n\ 1067 To see a list of the character sets GDB supports, type `set host-charset <TAB>'."), 1068 set_host_charset_sfunc, 1069 show_host_charset_name, 1070 &setlist, &showlist); 1071 1072 add_setshow_enum_cmd ("target-charset", class_support, 1073 charset_enum, &target_charset_name, _("\ 1074 Set the target character set."), _("\ 1075 Show the target character set."), _("\ 1076 The `target character set' is the one used by the program being debugged.\n\ 1077 GDB translates characters and strings between the host and target\n\ 1078 character sets as needed.\n\ 1079 To see a list of the character sets GDB supports, type `set target-charset'<TAB>"), 1080 set_target_charset_sfunc, 1081 show_target_charset_name, 1082 &setlist, &showlist); 1083 1084 add_setshow_enum_cmd ("target-wide-charset", class_support, 1085 charset_enum, &target_wide_charset_name, 1086 _("\ 1087 Set the target wide character set."), _("\ 1088 Show the target wide character set."), _("\ 1089 The `target wide character set' is the one used by the program being debugged.\ 1090 \nIn particular it is the encoding used by `wchar_t'.\n\ 1091 GDB translates characters and strings between the host and target\n\ 1092 character sets as needed.\n\ 1093 To see a list of the character sets GDB supports, type\n\ 1094 `set target-wide-charset'<TAB>"), 1095 set_target_wide_charset_sfunc, 1096 show_target_wide_charset_name, 1097 &setlist, &showlist); 1098 } 1099