1 /* Character set conversion support for GDB. 2 3 Copyright (C) 2001-2015 Free Software Foundation, Inc. 4 5 This file is part of GDB. 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 19 20 #include "defs.h" 21 #include "charset.h" 22 #include "gdbcmd.h" 23 #include "gdb_obstack.h" 24 #include "gdb_wait.h" 25 #include "charset-list.h" 26 #include "vec.h" 27 #include "environ.h" 28 #include "arch-utils.h" 29 #include "gdb_vecs.h" 30 #include <ctype.h> 31 32 #ifdef USE_WIN32API 33 #include <windows.h> 34 #endif 35 36 /* How GDB's character set support works 37 38 GDB has three global settings: 39 40 - The `current host character set' is the character set GDB should 41 use in talking to the user, and which (hopefully) the user's 42 terminal knows how to display properly. Most users should not 43 change this. 44 45 - The `current target character set' is the character set the 46 program being debugged uses. 47 48 - The `current target wide character set' is the wide character set 49 the program being debugged uses, that is, the encoding used for 50 wchar_t. 51 52 There are commands to set each of these, and mechanisms for 53 choosing reasonable default values. GDB has a global list of 54 character sets that it can use as its host or target character 55 sets. 56 57 The header file `charset.h' declares various functions that 58 different pieces of GDB need to perform tasks like: 59 60 - printing target strings and characters to the user's terminal 61 (mostly target->host conversions), 62 63 - building target-appropriate representations of strings and 64 characters the user enters in expressions (mostly host->target 65 conversions), 66 67 and so on. 68 69 To avoid excessive code duplication and maintenance efforts, 70 GDB simply requires a capable iconv function. Users on platforms 71 without a suitable iconv can use the GNU iconv library. */ 72 73 74 #ifdef PHONY_ICONV 75 76 /* Provide a phony iconv that does as little as possible. Also, 77 arrange for there to be a single available character set. */ 78 79 #undef GDB_DEFAULT_HOST_CHARSET 80 #define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1" 81 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1" 82 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1" 83 #undef DEFAULT_CHARSET_NAMES 84 #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET , 85 86 #undef iconv_t 87 #define iconv_t int 88 #undef iconv_open 89 #define iconv_open phony_iconv_open 90 #undef iconv 91 #define iconv phony_iconv 92 #undef iconv_close 93 #define iconv_close phony_iconv_close 94 95 #undef ICONV_CONST 96 #define ICONV_CONST const 97 98 static iconv_t 99 phony_iconv_open (const char *to, const char *from) 100 { 101 /* We allow conversions from UTF-32BE, wchar_t, and the host charset. 102 We allow conversions to wchar_t and the host charset. */ 103 if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t") 104 && strcmp (from, GDB_DEFAULT_HOST_CHARSET)) 105 return -1; 106 if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET)) 107 return -1; 108 109 /* Return 1 if we are converting from UTF-32BE, 0 otherwise. This is 110 used as a flag in calls to iconv. */ 111 return !strcmp (from, "UTF-32BE"); 112 } 113 114 static int 115 phony_iconv_close (iconv_t arg) 116 { 117 return 0; 118 } 119 120 static size_t 121 phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft, 122 char **outbuf, size_t *outbytesleft) 123 { 124 if (utf_flag) 125 { 126 while (*inbytesleft >= 4) 127 { 128 size_t j; 129 unsigned long c = 0; 130 131 for (j = 0; j < 4; ++j) 132 { 133 c <<= 8; 134 c += (*inbuf)[j] & 0xff; 135 } 136 137 if (c >= 256) 138 { 139 errno = EILSEQ; 140 return -1; 141 } 142 **outbuf = c & 0xff; 143 ++*outbuf; 144 --*outbytesleft; 145 146 ++*inbuf; 147 *inbytesleft -= 4; 148 } 149 if (*inbytesleft < 4) 150 { 151 errno = EINVAL; 152 return -1; 153 } 154 } 155 else 156 { 157 /* In all other cases we simply copy input bytes to the 158 output. */ 159 size_t amt = *inbytesleft; 160 161 if (amt > *outbytesleft) 162 amt = *outbytesleft; 163 memcpy (*outbuf, *inbuf, amt); 164 *inbuf += amt; 165 *outbuf += amt; 166 *inbytesleft -= amt; 167 *outbytesleft -= amt; 168 } 169 170 if (*inbytesleft) 171 { 172 errno = E2BIG; 173 return -1; 174 } 175 176 /* The number of non-reversible conversions -- but they were all 177 reversible. */ 178 return 0; 179 } 180 181 #else /* PHONY_ICONV */ 182 183 /* On systems that don't have EILSEQ, GNU iconv's iconv.h defines it 184 to ENOENT, while gnulib defines it to a different value. Always 185 map ENOENT to gnulib's EILSEQ, leaving callers agnostic. */ 186 187 static size_t 188 gdb_iconv (iconv_t utf_flag, ICONV_CONST char **inbuf, size_t *inbytesleft, 189 char **outbuf, size_t *outbytesleft) 190 { 191 size_t ret; 192 193 ret = iconv (utf_flag, inbuf, inbytesleft, outbuf, outbytesleft); 194 if (errno == ENOENT) 195 errno = EILSEQ; 196 return ret; 197 } 198 199 #undef iconv 200 #define iconv gdb_iconv 201 202 #endif /* PHONY_ICONV */ 203 204 205 /* The global lists of character sets and translations. */ 206 207 208 #ifndef GDB_DEFAULT_TARGET_CHARSET 209 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1" 210 #endif 211 212 #ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET 213 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32" 214 #endif 215 216 static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET; 217 static const char *host_charset_name = "auto"; 218 static void 219 show_host_charset_name (struct ui_file *file, int from_tty, 220 struct cmd_list_element *c, 221 const char *value) 222 { 223 if (!strcmp (value, "auto")) 224 fprintf_filtered (file, 225 _("The host character set is \"auto; currently %s\".\n"), 226 auto_host_charset_name); 227 else 228 fprintf_filtered (file, _("The host character set is \"%s\".\n"), value); 229 } 230 231 static const char *target_charset_name = "auto"; 232 static void 233 show_target_charset_name (struct ui_file *file, int from_tty, 234 struct cmd_list_element *c, const char *value) 235 { 236 if (!strcmp (value, "auto")) 237 fprintf_filtered (file, 238 _("The target character set is \"auto; " 239 "currently %s\".\n"), 240 gdbarch_auto_charset (get_current_arch ())); 241 else 242 fprintf_filtered (file, _("The target character set is \"%s\".\n"), 243 value); 244 } 245 246 static const char *target_wide_charset_name = "auto"; 247 static void 248 show_target_wide_charset_name (struct ui_file *file, 249 int from_tty, 250 struct cmd_list_element *c, 251 const char *value) 252 { 253 if (!strcmp (value, "auto")) 254 fprintf_filtered (file, 255 _("The target wide character set is \"auto; " 256 "currently %s\".\n"), 257 gdbarch_auto_wide_charset (get_current_arch ())); 258 else 259 fprintf_filtered (file, _("The target wide character set is \"%s\".\n"), 260 value); 261 } 262 263 static const char *default_charset_names[] = 264 { 265 DEFAULT_CHARSET_NAMES 266 0 267 }; 268 269 static const char **charset_enum; 270 271 272 /* If the target wide character set has big- or little-endian 273 variants, these are the corresponding names. */ 274 static const char *target_wide_charset_be_name; 275 static const char *target_wide_charset_le_name; 276 277 /* The architecture for which the BE- and LE-names are valid. */ 278 static struct gdbarch *be_le_arch; 279 280 /* A helper function which sets the target wide big- and little-endian 281 character set names, if possible. */ 282 283 static void 284 set_be_le_names (struct gdbarch *gdbarch) 285 { 286 int i, len; 287 const char *target_wide; 288 289 if (be_le_arch == gdbarch) 290 return; 291 be_le_arch = gdbarch; 292 293 target_wide_charset_le_name = NULL; 294 target_wide_charset_be_name = NULL; 295 296 target_wide = target_wide_charset_name; 297 if (!strcmp (target_wide, "auto")) 298 target_wide = gdbarch_auto_wide_charset (gdbarch); 299 300 len = strlen (target_wide); 301 for (i = 0; charset_enum[i]; ++i) 302 { 303 if (strncmp (target_wide, charset_enum[i], len)) 304 continue; 305 if ((charset_enum[i][len] == 'B' 306 || charset_enum[i][len] == 'L') 307 && charset_enum[i][len + 1] == 'E' 308 && charset_enum[i][len + 2] == '\0') 309 { 310 if (charset_enum[i][len] == 'B') 311 target_wide_charset_be_name = charset_enum[i]; 312 else 313 target_wide_charset_le_name = charset_enum[i]; 314 } 315 } 316 } 317 318 /* 'Set charset', 'set host-charset', 'set target-charset', 'set 319 target-wide-charset', 'set charset' sfunc's. */ 320 321 static void 322 validate (struct gdbarch *gdbarch) 323 { 324 iconv_t desc; 325 const char *host_cset = host_charset (); 326 const char *target_cset = target_charset (gdbarch); 327 const char *target_wide_cset = target_wide_charset_name; 328 329 if (!strcmp (target_wide_cset, "auto")) 330 target_wide_cset = gdbarch_auto_wide_charset (gdbarch); 331 332 desc = iconv_open (target_wide_cset, host_cset); 333 if (desc == (iconv_t) -1) 334 error (_("Cannot convert between character sets `%s' and `%s'"), 335 target_wide_cset, host_cset); 336 iconv_close (desc); 337 338 desc = iconv_open (target_cset, host_cset); 339 if (desc == (iconv_t) -1) 340 error (_("Cannot convert between character sets `%s' and `%s'"), 341 target_cset, host_cset); 342 iconv_close (desc); 343 344 /* Clear the cache. */ 345 be_le_arch = NULL; 346 } 347 348 /* This is the sfunc for the 'set charset' command. */ 349 static void 350 set_charset_sfunc (char *charset, int from_tty, 351 struct cmd_list_element *c) 352 { 353 /* CAREFUL: set the target charset here as well. */ 354 target_charset_name = host_charset_name; 355 validate (get_current_arch ()); 356 } 357 358 /* 'set host-charset' command sfunc. We need a wrapper here because 359 the function needs to have a specific signature. */ 360 static void 361 set_host_charset_sfunc (char *charset, int from_tty, 362 struct cmd_list_element *c) 363 { 364 validate (get_current_arch ()); 365 } 366 367 /* Wrapper for the 'set target-charset' command. */ 368 static void 369 set_target_charset_sfunc (char *charset, int from_tty, 370 struct cmd_list_element *c) 371 { 372 validate (get_current_arch ()); 373 } 374 375 /* Wrapper for the 'set target-wide-charset' command. */ 376 static void 377 set_target_wide_charset_sfunc (char *charset, int from_tty, 378 struct cmd_list_element *c) 379 { 380 validate (get_current_arch ()); 381 } 382 383 /* sfunc for the 'show charset' command. */ 384 static void 385 show_charset (struct ui_file *file, int from_tty, 386 struct cmd_list_element *c, 387 const char *name) 388 { 389 show_host_charset_name (file, from_tty, c, host_charset_name); 390 show_target_charset_name (file, from_tty, c, target_charset_name); 391 show_target_wide_charset_name (file, from_tty, c, 392 target_wide_charset_name); 393 } 394 395 396 /* Accessor functions. */ 397 398 const char * 399 host_charset (void) 400 { 401 if (!strcmp (host_charset_name, "auto")) 402 return auto_host_charset_name; 403 return host_charset_name; 404 } 405 406 const char * 407 target_charset (struct gdbarch *gdbarch) 408 { 409 if (!strcmp (target_charset_name, "auto")) 410 return gdbarch_auto_charset (gdbarch); 411 return target_charset_name; 412 } 413 414 const char * 415 target_wide_charset (struct gdbarch *gdbarch) 416 { 417 enum bfd_endian byte_order = gdbarch_byte_order (gdbarch); 418 419 set_be_le_names (gdbarch); 420 if (byte_order == BFD_ENDIAN_BIG) 421 { 422 if (target_wide_charset_be_name) 423 return target_wide_charset_be_name; 424 } 425 else 426 { 427 if (target_wide_charset_le_name) 428 return target_wide_charset_le_name; 429 } 430 431 if (!strcmp (target_wide_charset_name, "auto")) 432 return gdbarch_auto_wide_charset (gdbarch); 433 434 return target_wide_charset_name; 435 } 436 437 438 /* Host character set management. For the time being, we assume that 439 the host character set is some superset of ASCII. */ 440 441 char 442 host_letter_to_control_character (char c) 443 { 444 if (c == '?') 445 return 0177; 446 return c & 0237; 447 } 448 449 /* Convert a host character, C, to its hex value. C must already have 450 been validated using isxdigit. */ 451 452 int 453 host_hex_value (char c) 454 { 455 if (isdigit (c)) 456 return c - '0'; 457 if (c >= 'a' && c <= 'f') 458 return 10 + c - 'a'; 459 gdb_assert (c >= 'A' && c <= 'F'); 460 return 10 + c - 'A'; 461 } 462 463 464 /* Public character management functions. */ 465 466 /* A cleanup function which is run to close an iconv descriptor. */ 467 468 static void 469 cleanup_iconv (void *p) 470 { 471 iconv_t *descp = p; 472 iconv_close (*descp); 473 } 474 475 void 476 convert_between_encodings (const char *from, const char *to, 477 const gdb_byte *bytes, unsigned int num_bytes, 478 int width, struct obstack *output, 479 enum transliterations translit) 480 { 481 iconv_t desc; 482 struct cleanup *cleanups; 483 size_t inleft; 484 ICONV_CONST char *inp; 485 unsigned int space_request; 486 487 /* Often, the host and target charsets will be the same. */ 488 if (!strcmp (from, to)) 489 { 490 obstack_grow (output, bytes, num_bytes); 491 return; 492 } 493 494 desc = iconv_open (to, from); 495 if (desc == (iconv_t) -1) 496 perror_with_name (_("Converting character sets")); 497 cleanups = make_cleanup (cleanup_iconv, &desc); 498 499 inleft = num_bytes; 500 inp = (ICONV_CONST char *) bytes; 501 502 space_request = num_bytes; 503 504 while (inleft > 0) 505 { 506 char *outp; 507 size_t outleft, r; 508 int old_size; 509 510 old_size = obstack_object_size (output); 511 obstack_blank (output, space_request); 512 513 outp = (char *) obstack_base (output) + old_size; 514 outleft = space_request; 515 516 r = iconv (desc, &inp, &inleft, &outp, &outleft); 517 518 /* Now make sure that the object on the obstack only includes 519 bytes we have converted. */ 520 obstack_blank_fast (output, -outleft); 521 522 if (r == (size_t) -1) 523 { 524 switch (errno) 525 { 526 case EILSEQ: 527 { 528 int i; 529 530 /* Invalid input sequence. */ 531 if (translit == translit_none) 532 error (_("Could not convert character " 533 "to `%s' character set"), to); 534 535 /* We emit escape sequence for the bytes, skip them, 536 and try again. */ 537 for (i = 0; i < width; ++i) 538 { 539 char octal[5]; 540 541 xsnprintf (octal, sizeof (octal), "\\%.3o", *inp & 0xff); 542 obstack_grow_str (output, octal); 543 544 ++inp; 545 --inleft; 546 } 547 } 548 break; 549 550 case E2BIG: 551 /* We ran out of space in the output buffer. Make it 552 bigger next time around. */ 553 space_request *= 2; 554 break; 555 556 case EINVAL: 557 /* Incomplete input sequence. FIXME: ought to report this 558 to the caller somehow. */ 559 inleft = 0; 560 break; 561 562 default: 563 perror_with_name (_("Internal error while " 564 "converting character sets")); 565 } 566 } 567 } 568 569 do_cleanups (cleanups); 570 } 571 572 573 574 /* An iterator that returns host wchar_t's from a target string. */ 575 struct wchar_iterator 576 { 577 /* The underlying iconv descriptor. */ 578 iconv_t desc; 579 580 /* The input string. This is updated as convert characters. */ 581 const gdb_byte *input; 582 /* The number of bytes remaining in the input. */ 583 size_t bytes; 584 585 /* The width of an input character. */ 586 size_t width; 587 588 /* The output buffer and its size. */ 589 gdb_wchar_t *out; 590 size_t out_size; 591 }; 592 593 /* Create a new iterator. */ 594 struct wchar_iterator * 595 make_wchar_iterator (const gdb_byte *input, size_t bytes, 596 const char *charset, size_t width) 597 { 598 struct wchar_iterator *result; 599 iconv_t desc; 600 601 desc = iconv_open (INTERMEDIATE_ENCODING, charset); 602 if (desc == (iconv_t) -1) 603 perror_with_name (_("Converting character sets")); 604 605 result = XNEW (struct wchar_iterator); 606 result->desc = desc; 607 result->input = input; 608 result->bytes = bytes; 609 result->width = width; 610 611 result->out = XNEW (gdb_wchar_t); 612 result->out_size = 1; 613 614 return result; 615 } 616 617 static void 618 do_cleanup_iterator (void *p) 619 { 620 struct wchar_iterator *iter = p; 621 622 iconv_close (iter->desc); 623 xfree (iter->out); 624 xfree (iter); 625 } 626 627 struct cleanup * 628 make_cleanup_wchar_iterator (struct wchar_iterator *iter) 629 { 630 return make_cleanup (do_cleanup_iterator, iter); 631 } 632 633 int 634 wchar_iterate (struct wchar_iterator *iter, 635 enum wchar_iterate_result *out_result, 636 gdb_wchar_t **out_chars, 637 const gdb_byte **ptr, 638 size_t *len) 639 { 640 size_t out_request; 641 642 /* Try to convert some characters. At first we try to convert just 643 a single character. The reason for this is that iconv does not 644 necessarily update its outgoing arguments when it encounters an 645 invalid input sequence -- but we want to reliably report this to 646 our caller so it can emit an escape sequence. */ 647 out_request = 1; 648 while (iter->bytes > 0) 649 { 650 ICONV_CONST char *inptr = (ICONV_CONST char *) iter->input; 651 char *outptr = (char *) &iter->out[0]; 652 const gdb_byte *orig_inptr = iter->input; 653 size_t orig_in = iter->bytes; 654 size_t out_avail = out_request * sizeof (gdb_wchar_t); 655 size_t num; 656 size_t r = iconv (iter->desc, &inptr, &iter->bytes, &outptr, &out_avail); 657 658 iter->input = (gdb_byte *) inptr; 659 660 if (r == (size_t) -1) 661 { 662 switch (errno) 663 { 664 case EILSEQ: 665 /* Invalid input sequence. We still might have 666 converted a character; if so, return it. */ 667 if (out_avail < out_request * sizeof (gdb_wchar_t)) 668 break; 669 670 /* Otherwise skip the first invalid character, and let 671 the caller know about it. */ 672 *out_result = wchar_iterate_invalid; 673 *ptr = iter->input; 674 *len = iter->width; 675 iter->input += iter->width; 676 iter->bytes -= iter->width; 677 return 0; 678 679 case E2BIG: 680 /* We ran out of space. We still might have converted a 681 character; if so, return it. Otherwise, grow the 682 buffer and try again. */ 683 if (out_avail < out_request * sizeof (gdb_wchar_t)) 684 break; 685 686 ++out_request; 687 if (out_request > iter->out_size) 688 { 689 iter->out_size = out_request; 690 iter->out = xrealloc (iter->out, 691 out_request * sizeof (gdb_wchar_t)); 692 } 693 continue; 694 695 case EINVAL: 696 /* Incomplete input sequence. Let the caller know, and 697 arrange for future calls to see EOF. */ 698 *out_result = wchar_iterate_incomplete; 699 *ptr = iter->input; 700 *len = iter->bytes; 701 iter->bytes = 0; 702 return 0; 703 704 default: 705 perror_with_name (_("Internal error while " 706 "converting character sets")); 707 } 708 } 709 710 /* We converted something. */ 711 num = out_request - out_avail / sizeof (gdb_wchar_t); 712 *out_result = wchar_iterate_ok; 713 *out_chars = iter->out; 714 *ptr = orig_inptr; 715 *len = orig_in - iter->bytes; 716 return num; 717 } 718 719 /* Really done. */ 720 *out_result = wchar_iterate_eof; 721 return -1; 722 } 723 724 725 /* The charset.c module initialization function. */ 726 727 extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */ 728 729 static VEC (char_ptr) *charsets; 730 731 #ifdef PHONY_ICONV 732 733 static void 734 find_charset_names (void) 735 { 736 VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET); 737 VEC_safe_push (char_ptr, charsets, NULL); 738 } 739 740 #else /* PHONY_ICONV */ 741 742 /* Sometimes, libiconv redefines iconvlist as libiconvlist -- but 743 provides different symbols in the static and dynamic libraries. 744 So, configure may see libiconvlist but not iconvlist. But, calling 745 iconvlist is the right thing to do and will work. Hence we do a 746 check here but unconditionally call iconvlist below. */ 747 #if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST) 748 749 /* A helper function that adds some character sets to the vector of 750 all character sets. This is a callback function for iconvlist. */ 751 752 static int 753 add_one (unsigned int count, const char *const *names, void *data) 754 { 755 unsigned int i; 756 757 for (i = 0; i < count; ++i) 758 VEC_safe_push (char_ptr, charsets, xstrdup (names[i])); 759 760 return 0; 761 } 762 763 static void 764 find_charset_names (void) 765 { 766 iconvlist (add_one, NULL); 767 VEC_safe_push (char_ptr, charsets, NULL); 768 } 769 770 #else 771 772 /* Return non-zero if LINE (output from iconv) should be ignored. 773 Older iconv programs (e.g. 2.2.2) include the human readable 774 introduction even when stdout is not a tty. Newer versions omit 775 the intro if stdout is not a tty. */ 776 777 static int 778 ignore_line_p (const char *line) 779 { 780 /* This table is used to filter the output. If this text appears 781 anywhere in the line, it is ignored (strstr is used). */ 782 static const char * const ignore_lines[] = 783 { 784 "The following", 785 "not necessarily", 786 "the FROM and TO", 787 "listed with several", 788 NULL 789 }; 790 int i; 791 792 for (i = 0; ignore_lines[i] != NULL; ++i) 793 { 794 if (strstr (line, ignore_lines[i]) != NULL) 795 return 1; 796 } 797 798 return 0; 799 } 800 801 static void 802 find_charset_names (void) 803 { 804 struct pex_obj *child; 805 char *args[3]; 806 int err, status; 807 int fail = 1; 808 int flags; 809 struct gdb_environ *iconv_env; 810 char *iconv_program; 811 812 /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is 813 not a tty. We need to recognize it and ignore it. This text is 814 subject to translation, so force LANGUAGE=C. */ 815 iconv_env = make_environ (); 816 init_environ (iconv_env); 817 set_in_environ (iconv_env, "LANGUAGE", "C"); 818 set_in_environ (iconv_env, "LC_ALL", "C"); 819 820 child = pex_init (PEX_USE_PIPES, "iconv", NULL); 821 822 #ifdef ICONV_BIN 823 { 824 char *iconv_dir = relocate_gdb_directory (ICONV_BIN, 825 ICONV_BIN_RELOCATABLE); 826 iconv_program = concat (iconv_dir, SLASH_STRING, "iconv", NULL); 827 xfree (iconv_dir); 828 } 829 #else 830 iconv_program = xstrdup ("iconv"); 831 #endif 832 args[0] = iconv_program; 833 args[1] = "-l"; 834 args[2] = NULL; 835 flags = PEX_STDERR_TO_STDOUT; 836 #ifndef ICONV_BIN 837 flags |= PEX_SEARCH; 838 #endif 839 /* Note that we simply ignore errors here. */ 840 if (!pex_run_in_environment (child, flags, 841 args[0], args, environ_vector (iconv_env), 842 NULL, NULL, &err)) 843 { 844 FILE *in = pex_read_output (child, 0); 845 846 /* POSIX says that iconv -l uses an unspecified format. We 847 parse the glibc and libiconv formats; feel free to add others 848 as needed. */ 849 850 while (in != NULL && !feof (in)) 851 { 852 /* The size of buf is chosen arbitrarily. */ 853 char buf[1024]; 854 char *start, *r; 855 int len; 856 857 r = fgets (buf, sizeof (buf), in); 858 if (!r) 859 break; 860 len = strlen (r); 861 if (len <= 3) 862 continue; 863 if (ignore_line_p (r)) 864 continue; 865 866 /* Strip off the newline. */ 867 --len; 868 /* Strip off one or two '/'s. glibc will print lines like 869 "8859_7//", but also "10646-1:1993/UCS4/". */ 870 if (buf[len - 1] == '/') 871 --len; 872 if (buf[len - 1] == '/') 873 --len; 874 buf[len] = '\0'; 875 876 /* libiconv will print multiple entries per line, separated 877 by spaces. Older iconvs will print multiple entries per 878 line, indented by two spaces, and separated by ", " 879 (i.e. the human readable form). */ 880 start = buf; 881 while (1) 882 { 883 int keep_going; 884 char *p; 885 886 /* Skip leading blanks. */ 887 for (p = start; *p && *p == ' '; ++p) 888 ; 889 start = p; 890 /* Find the next space, comma, or end-of-line. */ 891 for ( ; *p && *p != ' ' && *p != ','; ++p) 892 ; 893 /* Ignore an empty result. */ 894 if (p == start) 895 break; 896 keep_going = *p; 897 *p = '\0'; 898 VEC_safe_push (char_ptr, charsets, xstrdup (start)); 899 if (!keep_going) 900 break; 901 /* Skip any extra spaces. */ 902 for (start = p + 1; *start && *start == ' '; ++start) 903 ; 904 } 905 } 906 907 if (pex_get_status (child, 1, &status) 908 && WIFEXITED (status) && !WEXITSTATUS (status)) 909 fail = 0; 910 911 } 912 913 xfree (iconv_program); 914 pex_free (child); 915 free_environ (iconv_env); 916 917 if (fail) 918 { 919 /* Some error occurred, so drop the vector. */ 920 free_char_ptr_vec (charsets); 921 charsets = NULL; 922 } 923 else 924 VEC_safe_push (char_ptr, charsets, NULL); 925 } 926 927 #endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */ 928 #endif /* PHONY_ICONV */ 929 930 /* The "auto" target charset used by default_auto_charset. */ 931 static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET; 932 933 const char * 934 default_auto_charset (void) 935 { 936 return auto_target_charset_name; 937 } 938 939 const char * 940 default_auto_wide_charset (void) 941 { 942 return GDB_DEFAULT_TARGET_WIDE_CHARSET; 943 } 944 945 946 #ifdef USE_INTERMEDIATE_ENCODING_FUNCTION 947 /* Macro used for UTF or UCS endianness suffix. */ 948 #if WORDS_BIGENDIAN 949 #define ENDIAN_SUFFIX "BE" 950 #else 951 #define ENDIAN_SUFFIX "LE" 952 #endif 953 954 /* The code below serves to generate a compile time error if 955 gdb_wchar_t type is not of size 2 nor 4, despite the fact that 956 macro __STDC_ISO_10646__ is defined. 957 This is better than a gdb_assert call, because GDB cannot handle 958 strings correctly if this size is different. */ 959 960 extern char your_gdb_wchar_t_is_bogus[(sizeof (gdb_wchar_t) == 2 961 || sizeof (gdb_wchar_t) == 4) 962 ? 1 : -1]; 963 964 /* intermediate_encoding returns the charset used internally by 965 GDB to convert between target and host encodings. As the test above 966 compiled, sizeof (gdb_wchar_t) is either 2 or 4 bytes. 967 UTF-16/32 is tested first, UCS-2/4 is tested as a second option, 968 otherwise an error is generated. */ 969 970 const char * 971 intermediate_encoding (void) 972 { 973 iconv_t desc; 974 static const char *stored_result = NULL; 975 char *result; 976 977 if (stored_result) 978 return stored_result; 979 result = xstrprintf ("UTF-%d%s", (int) (sizeof (gdb_wchar_t) * 8), 980 ENDIAN_SUFFIX); 981 /* Check that the name is supported by iconv_open. */ 982 desc = iconv_open (result, host_charset ()); 983 if (desc != (iconv_t) -1) 984 { 985 iconv_close (desc); 986 stored_result = result; 987 return result; 988 } 989 /* Not valid, free the allocated memory. */ 990 xfree (result); 991 /* Second try, with UCS-2 type. */ 992 result = xstrprintf ("UCS-%d%s", (int) sizeof (gdb_wchar_t), 993 ENDIAN_SUFFIX); 994 /* Check that the name is supported by iconv_open. */ 995 desc = iconv_open (result, host_charset ()); 996 if (desc != (iconv_t) -1) 997 { 998 iconv_close (desc); 999 stored_result = result; 1000 return result; 1001 } 1002 /* Not valid, free the allocated memory. */ 1003 xfree (result); 1004 /* No valid charset found, generate error here. */ 1005 error (_("Unable to find a vaild charset for string conversions")); 1006 } 1007 1008 #endif /* USE_INTERMEDIATE_ENCODING_FUNCTION */ 1009 1010 void 1011 _initialize_charset (void) 1012 { 1013 /* The first element is always "auto". */ 1014 VEC_safe_push (char_ptr, charsets, xstrdup ("auto")); 1015 find_charset_names (); 1016 1017 if (VEC_length (char_ptr, charsets) > 1) 1018 charset_enum = (const char **) VEC_address (char_ptr, charsets); 1019 else 1020 charset_enum = default_charset_names; 1021 1022 #ifndef PHONY_ICONV 1023 #ifdef HAVE_LANGINFO_CODESET 1024 /* The result of nl_langinfo may be overwritten later. This may 1025 leak a little memory, if the user later changes the host charset, 1026 but that doesn't matter much. */ 1027 auto_host_charset_name = xstrdup (nl_langinfo (CODESET)); 1028 /* Solaris will return `646' here -- but the Solaris iconv then does 1029 not accept this. Darwin (and maybe FreeBSD) may return "" here, 1030 which GNU libiconv doesn't like (infinite loop). */ 1031 if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name) 1032 auto_host_charset_name = "ASCII"; 1033 auto_target_charset_name = auto_host_charset_name; 1034 #elif defined (USE_WIN32API) 1035 { 1036 /* "CP" + x<=5 digits + paranoia. */ 1037 static char w32_host_default_charset[16]; 1038 1039 snprintf (w32_host_default_charset, sizeof w32_host_default_charset, 1040 "CP%d", GetACP()); 1041 auto_host_charset_name = w32_host_default_charset; 1042 auto_target_charset_name = auto_host_charset_name; 1043 } 1044 #endif 1045 #endif 1046 1047 add_setshow_enum_cmd ("charset", class_support, 1048 charset_enum, &host_charset_name, _("\ 1049 Set the host and target character sets."), _("\ 1050 Show the host and target character sets."), _("\ 1051 The `host character set' is the one used by the system GDB is running on.\n\ 1052 The `target character set' is the one used by the program being debugged.\n\ 1053 You may only use supersets of ASCII for your host character set; GDB does\n\ 1054 not support any others.\n\ 1055 To see a list of the character sets GDB supports, type `set charset <TAB>'."), 1056 /* Note that the sfunc below needs to set 1057 target_charset_name, because the 'set 1058 charset' command sets two variables. */ 1059 set_charset_sfunc, 1060 show_charset, 1061 &setlist, &showlist); 1062 1063 add_setshow_enum_cmd ("host-charset", class_support, 1064 charset_enum, &host_charset_name, _("\ 1065 Set the host character set."), _("\ 1066 Show the host character set."), _("\ 1067 The `host character set' is the one used by the system GDB is running on.\n\ 1068 You may only use supersets of ASCII for your host character set; GDB does\n\ 1069 not support any others.\n\ 1070 To see a list of the character sets GDB supports, type `set host-charset <TAB>'."), 1071 set_host_charset_sfunc, 1072 show_host_charset_name, 1073 &setlist, &showlist); 1074 1075 add_setshow_enum_cmd ("target-charset", class_support, 1076 charset_enum, &target_charset_name, _("\ 1077 Set the target character set."), _("\ 1078 Show the target character set."), _("\ 1079 The `target character set' is the one used by the program being debugged.\n\ 1080 GDB translates characters and strings between the host and target\n\ 1081 character sets as needed.\n\ 1082 To see a list of the character sets GDB supports, type `set target-charset'<TAB>"), 1083 set_target_charset_sfunc, 1084 show_target_charset_name, 1085 &setlist, &showlist); 1086 1087 add_setshow_enum_cmd ("target-wide-charset", class_support, 1088 charset_enum, &target_wide_charset_name, 1089 _("\ 1090 Set the target wide character set."), _("\ 1091 Show the target wide character set."), _("\ 1092 The `target wide character set' is the one used by the program being debugged.\ 1093 \nIn particular it is the encoding used by `wchar_t'.\n\ 1094 GDB translates characters and strings between the host and target\n\ 1095 character sets as needed.\n\ 1096 To see a list of the character sets GDB supports, type\n\ 1097 `set target-wide-charset'<TAB>"), 1098 set_target_wide_charset_sfunc, 1099 show_target_wide_charset_name, 1100 &setlist, &showlist); 1101 } 1102