15796c8dcSSimon Schubert /* Character set conversion support for GDB. 25796c8dcSSimon Schubert 3*a45ae5f8SJohn Marino Copyright (C) 2001, 2003, 2007-2012 Free Software Foundation, Inc. 45796c8dcSSimon Schubert 55796c8dcSSimon Schubert This file is part of GDB. 65796c8dcSSimon Schubert 75796c8dcSSimon Schubert This program is free software; you can redistribute it and/or modify 85796c8dcSSimon Schubert it under the terms of the GNU General Public License as published by 95796c8dcSSimon Schubert the Free Software Foundation; either version 3 of the License, or 105796c8dcSSimon Schubert (at your option) any later version. 115796c8dcSSimon Schubert 125796c8dcSSimon Schubert This program is distributed in the hope that it will be useful, 135796c8dcSSimon Schubert but WITHOUT ANY WARRANTY; without even the implied warranty of 145796c8dcSSimon Schubert MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 155796c8dcSSimon Schubert GNU General Public License for more details. 165796c8dcSSimon Schubert 175796c8dcSSimon Schubert You should have received a copy of the GNU General Public License 185796c8dcSSimon Schubert along with this program. If not, see <http://www.gnu.org/licenses/>. */ 195796c8dcSSimon Schubert 205796c8dcSSimon Schubert #include "defs.h" 215796c8dcSSimon Schubert #include "charset.h" 225796c8dcSSimon Schubert #include "gdbcmd.h" 235796c8dcSSimon Schubert #include "gdb_assert.h" 245796c8dcSSimon Schubert #include "gdb_obstack.h" 255796c8dcSSimon Schubert #include "gdb_wait.h" 265796c8dcSSimon Schubert #include "charset-list.h" 275796c8dcSSimon Schubert #include "vec.h" 28cf7f2e2dSJohn Marino #include "environ.h" 29cf7f2e2dSJohn Marino #include "arch-utils.h" 305796c8dcSSimon Schubert 315796c8dcSSimon Schubert #include <stddef.h> 325796c8dcSSimon Schubert #include "gdb_string.h" 335796c8dcSSimon Schubert #include <ctype.h> 345796c8dcSSimon Schubert 35cf7f2e2dSJohn Marino #ifdef USE_WIN32API 36cf7f2e2dSJohn Marino #include <windows.h> 37cf7f2e2dSJohn Marino #endif 385796c8dcSSimon Schubert 395796c8dcSSimon Schubert /* How GDB's character set support works 405796c8dcSSimon Schubert 415796c8dcSSimon Schubert GDB has three global settings: 425796c8dcSSimon Schubert 435796c8dcSSimon Schubert - The `current host character set' is the character set GDB should 445796c8dcSSimon Schubert use in talking to the user, and which (hopefully) the user's 455796c8dcSSimon Schubert terminal knows how to display properly. Most users should not 465796c8dcSSimon Schubert change this. 475796c8dcSSimon Schubert 485796c8dcSSimon Schubert - The `current target character set' is the character set the 495796c8dcSSimon Schubert program being debugged uses. 505796c8dcSSimon Schubert 515796c8dcSSimon Schubert - The `current target wide character set' is the wide character set 525796c8dcSSimon Schubert the program being debugged uses, that is, the encoding used for 535796c8dcSSimon Schubert wchar_t. 545796c8dcSSimon Schubert 555796c8dcSSimon Schubert There are commands to set each of these, and mechanisms for 565796c8dcSSimon Schubert choosing reasonable default values. GDB has a global list of 575796c8dcSSimon Schubert character sets that it can use as its host or target character 585796c8dcSSimon Schubert sets. 595796c8dcSSimon Schubert 605796c8dcSSimon Schubert The header file `charset.h' declares various functions that 615796c8dcSSimon Schubert different pieces of GDB need to perform tasks like: 625796c8dcSSimon Schubert 635796c8dcSSimon Schubert - printing target strings and characters to the user's terminal 645796c8dcSSimon Schubert (mostly target->host conversions), 655796c8dcSSimon Schubert 665796c8dcSSimon Schubert - building target-appropriate representations of strings and 675796c8dcSSimon Schubert characters the user enters in expressions (mostly host->target 685796c8dcSSimon Schubert conversions), 695796c8dcSSimon Schubert 705796c8dcSSimon Schubert and so on. 715796c8dcSSimon Schubert 725796c8dcSSimon Schubert To avoid excessive code duplication and maintenance efforts, 735796c8dcSSimon Schubert GDB simply requires a capable iconv function. Users on platforms 745796c8dcSSimon Schubert without a suitable iconv can use the GNU iconv library. */ 755796c8dcSSimon Schubert 765796c8dcSSimon Schubert 775796c8dcSSimon Schubert #ifdef PHONY_ICONV 785796c8dcSSimon Schubert 795796c8dcSSimon Schubert /* Provide a phony iconv that does as little as possible. Also, 805796c8dcSSimon Schubert arrange for there to be a single available character set. */ 815796c8dcSSimon Schubert 825796c8dcSSimon Schubert #undef GDB_DEFAULT_HOST_CHARSET 835796c8dcSSimon Schubert #define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1" 845796c8dcSSimon Schubert #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1" 855796c8dcSSimon Schubert #define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1" 865796c8dcSSimon Schubert #undef DEFAULT_CHARSET_NAMES 875796c8dcSSimon Schubert #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET , 885796c8dcSSimon Schubert 895796c8dcSSimon Schubert #undef iconv_t 905796c8dcSSimon Schubert #define iconv_t int 915796c8dcSSimon Schubert #undef iconv_open 92c50c785cSJohn Marino #define iconv_open phony_iconv_open 935796c8dcSSimon Schubert #undef iconv 94c50c785cSJohn Marino #define iconv phony_iconv 955796c8dcSSimon Schubert #undef iconv_close 96c50c785cSJohn Marino #define iconv_close phony_iconv_close 975796c8dcSSimon Schubert 985796c8dcSSimon Schubert #undef ICONV_CONST 995796c8dcSSimon Schubert #define ICONV_CONST const 1005796c8dcSSimon Schubert 1015796c8dcSSimon Schubert /* Some systems don't have EILSEQ, so we define it here, but not as 1025796c8dcSSimon Schubert EINVAL, because callers of `iconv' want to distinguish EINVAL and 1035796c8dcSSimon Schubert EILSEQ. This is what iconv.h from libiconv does as well. Note 1045796c8dcSSimon Schubert that wchar.h may also define EILSEQ, so this needs to be after we 1055796c8dcSSimon Schubert include wchar.h, which happens in defs.h through gdb_wchar.h. */ 1065796c8dcSSimon Schubert #ifndef EILSEQ 1075796c8dcSSimon Schubert #define EILSEQ ENOENT 1085796c8dcSSimon Schubert #endif 1095796c8dcSSimon Schubert 1105796c8dcSSimon Schubert iconv_t 111c50c785cSJohn Marino phony_iconv_open (const char *to, const char *from) 1125796c8dcSSimon Schubert { 113cf7f2e2dSJohn Marino /* We allow conversions from UTF-32BE, wchar_t, and the host charset. 1145796c8dcSSimon Schubert We allow conversions to wchar_t and the host charset. */ 115cf7f2e2dSJohn Marino if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t") 1165796c8dcSSimon Schubert && strcmp (from, GDB_DEFAULT_HOST_CHARSET)) 1175796c8dcSSimon Schubert return -1; 1185796c8dcSSimon Schubert if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET)) 1195796c8dcSSimon Schubert return -1; 1205796c8dcSSimon Schubert 121cf7f2e2dSJohn Marino /* Return 1 if we are converting from UTF-32BE, 0 otherwise. This is 1225796c8dcSSimon Schubert used as a flag in calls to iconv. */ 123cf7f2e2dSJohn Marino return !strcmp (from, "UTF-32BE"); 1245796c8dcSSimon Schubert } 1255796c8dcSSimon Schubert 1265796c8dcSSimon Schubert int 127c50c785cSJohn Marino phony_iconv_close (iconv_t arg) 1285796c8dcSSimon Schubert { 1295796c8dcSSimon Schubert return 0; 1305796c8dcSSimon Schubert } 1315796c8dcSSimon Schubert 1325796c8dcSSimon Schubert size_t 133c50c785cSJohn Marino phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft, 1345796c8dcSSimon Schubert char **outbuf, size_t *outbytesleft) 1355796c8dcSSimon Schubert { 136cf7f2e2dSJohn Marino if (utf_flag) 1375796c8dcSSimon Schubert { 1385796c8dcSSimon Schubert while (*inbytesleft >= 4) 1395796c8dcSSimon Schubert { 1405796c8dcSSimon Schubert size_t j; 1415796c8dcSSimon Schubert unsigned long c = 0; 1425796c8dcSSimon Schubert 1435796c8dcSSimon Schubert for (j = 0; j < 4; ++j) 1445796c8dcSSimon Schubert { 1455796c8dcSSimon Schubert c <<= 8; 1465796c8dcSSimon Schubert c += (*inbuf)[j] & 0xff; 1475796c8dcSSimon Schubert } 1485796c8dcSSimon Schubert 1495796c8dcSSimon Schubert if (c >= 256) 1505796c8dcSSimon Schubert { 1515796c8dcSSimon Schubert errno = EILSEQ; 1525796c8dcSSimon Schubert return -1; 1535796c8dcSSimon Schubert } 1545796c8dcSSimon Schubert **outbuf = c & 0xff; 1555796c8dcSSimon Schubert ++*outbuf; 1565796c8dcSSimon Schubert --*outbytesleft; 1575796c8dcSSimon Schubert 1585796c8dcSSimon Schubert ++*inbuf; 1595796c8dcSSimon Schubert *inbytesleft -= 4; 1605796c8dcSSimon Schubert } 1615796c8dcSSimon Schubert if (*inbytesleft < 4) 1625796c8dcSSimon Schubert { 1635796c8dcSSimon Schubert errno = EINVAL; 1645796c8dcSSimon Schubert return -1; 1655796c8dcSSimon Schubert } 1665796c8dcSSimon Schubert } 1675796c8dcSSimon Schubert else 1685796c8dcSSimon Schubert { 1695796c8dcSSimon Schubert /* In all other cases we simply copy input bytes to the 1705796c8dcSSimon Schubert output. */ 1715796c8dcSSimon Schubert size_t amt = *inbytesleft; 172cf7f2e2dSJohn Marino 1735796c8dcSSimon Schubert if (amt > *outbytesleft) 1745796c8dcSSimon Schubert amt = *outbytesleft; 1755796c8dcSSimon Schubert memcpy (*outbuf, *inbuf, amt); 1765796c8dcSSimon Schubert *inbuf += amt; 1775796c8dcSSimon Schubert *outbuf += amt; 1785796c8dcSSimon Schubert *inbytesleft -= amt; 1795796c8dcSSimon Schubert *outbytesleft -= amt; 1805796c8dcSSimon Schubert } 1815796c8dcSSimon Schubert 1825796c8dcSSimon Schubert if (*inbytesleft) 1835796c8dcSSimon Schubert { 1845796c8dcSSimon Schubert errno = E2BIG; 1855796c8dcSSimon Schubert return -1; 1865796c8dcSSimon Schubert } 1875796c8dcSSimon Schubert 1885796c8dcSSimon Schubert /* The number of non-reversible conversions -- but they were all 1895796c8dcSSimon Schubert reversible. */ 1905796c8dcSSimon Schubert return 0; 1915796c8dcSSimon Schubert } 1925796c8dcSSimon Schubert 1935796c8dcSSimon Schubert #endif 1945796c8dcSSimon Schubert 1955796c8dcSSimon Schubert 1965796c8dcSSimon Schubert 1975796c8dcSSimon Schubert /* The global lists of character sets and translations. */ 1985796c8dcSSimon Schubert 1995796c8dcSSimon Schubert 2005796c8dcSSimon Schubert #ifndef GDB_DEFAULT_TARGET_CHARSET 2015796c8dcSSimon Schubert #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1" 2025796c8dcSSimon Schubert #endif 2035796c8dcSSimon Schubert 2045796c8dcSSimon Schubert #ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET 205cf7f2e2dSJohn Marino #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32" 2065796c8dcSSimon Schubert #endif 2075796c8dcSSimon Schubert 2085796c8dcSSimon Schubert static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET; 2095796c8dcSSimon Schubert static const char *host_charset_name = "auto"; 2105796c8dcSSimon Schubert static void 2115796c8dcSSimon Schubert show_host_charset_name (struct ui_file *file, int from_tty, 2125796c8dcSSimon Schubert struct cmd_list_element *c, 2135796c8dcSSimon Schubert const char *value) 2145796c8dcSSimon Schubert { 2155796c8dcSSimon Schubert if (!strcmp (value, "auto")) 2165796c8dcSSimon Schubert fprintf_filtered (file, 2175796c8dcSSimon Schubert _("The host character set is \"auto; currently %s\".\n"), 2185796c8dcSSimon Schubert auto_host_charset_name); 2195796c8dcSSimon Schubert else 2205796c8dcSSimon Schubert fprintf_filtered (file, _("The host character set is \"%s\".\n"), value); 2215796c8dcSSimon Schubert } 2225796c8dcSSimon Schubert 223cf7f2e2dSJohn Marino static const char *target_charset_name = "auto"; 2245796c8dcSSimon Schubert static void 2255796c8dcSSimon Schubert show_target_charset_name (struct ui_file *file, int from_tty, 2265796c8dcSSimon Schubert struct cmd_list_element *c, const char *value) 2275796c8dcSSimon Schubert { 228cf7f2e2dSJohn Marino if (!strcmp (value, "auto")) 229cf7f2e2dSJohn Marino fprintf_filtered (file, 230cf7f2e2dSJohn Marino _("The target character set is \"auto; " 231cf7f2e2dSJohn Marino "currently %s\".\n"), 232cf7f2e2dSJohn Marino gdbarch_auto_charset (get_current_arch ())); 233cf7f2e2dSJohn Marino else 2345796c8dcSSimon Schubert fprintf_filtered (file, _("The target character set is \"%s\".\n"), 2355796c8dcSSimon Schubert value); 2365796c8dcSSimon Schubert } 2375796c8dcSSimon Schubert 238cf7f2e2dSJohn Marino static const char *target_wide_charset_name = "auto"; 2395796c8dcSSimon Schubert static void 240c50c785cSJohn Marino show_target_wide_charset_name (struct ui_file *file, 241c50c785cSJohn Marino int from_tty, 242c50c785cSJohn Marino struct cmd_list_element *c, 243c50c785cSJohn Marino const char *value) 2445796c8dcSSimon Schubert { 245cf7f2e2dSJohn Marino if (!strcmp (value, "auto")) 246cf7f2e2dSJohn Marino fprintf_filtered (file, 247cf7f2e2dSJohn Marino _("The target wide character set is \"auto; " 248cf7f2e2dSJohn Marino "currently %s\".\n"), 249cf7f2e2dSJohn Marino gdbarch_auto_wide_charset (get_current_arch ())); 250cf7f2e2dSJohn Marino else 2515796c8dcSSimon Schubert fprintf_filtered (file, _("The target wide character set is \"%s\".\n"), 2525796c8dcSSimon Schubert value); 2535796c8dcSSimon Schubert } 2545796c8dcSSimon Schubert 2555796c8dcSSimon Schubert static const char *default_charset_names[] = 2565796c8dcSSimon Schubert { 2575796c8dcSSimon Schubert DEFAULT_CHARSET_NAMES 2585796c8dcSSimon Schubert 0 2595796c8dcSSimon Schubert }; 2605796c8dcSSimon Schubert 2615796c8dcSSimon Schubert static const char **charset_enum; 2625796c8dcSSimon Schubert 2635796c8dcSSimon Schubert 2645796c8dcSSimon Schubert /* If the target wide character set has big- or little-endian 2655796c8dcSSimon Schubert variants, these are the corresponding names. */ 2665796c8dcSSimon Schubert static const char *target_wide_charset_be_name; 2675796c8dcSSimon Schubert static const char *target_wide_charset_le_name; 2685796c8dcSSimon Schubert 269cf7f2e2dSJohn Marino /* The architecture for which the BE- and LE-names are valid. */ 270cf7f2e2dSJohn Marino static struct gdbarch *be_le_arch; 271cf7f2e2dSJohn Marino 272cf7f2e2dSJohn Marino /* A helper function which sets the target wide big- and little-endian 273cf7f2e2dSJohn Marino character set names, if possible. */ 2745796c8dcSSimon Schubert 2755796c8dcSSimon Schubert static void 276cf7f2e2dSJohn Marino set_be_le_names (struct gdbarch *gdbarch) 2775796c8dcSSimon Schubert { 2785796c8dcSSimon Schubert int i, len; 279cf7f2e2dSJohn Marino const char *target_wide; 280cf7f2e2dSJohn Marino 281cf7f2e2dSJohn Marino if (be_le_arch == gdbarch) 282cf7f2e2dSJohn Marino return; 283cf7f2e2dSJohn Marino be_le_arch = gdbarch; 2845796c8dcSSimon Schubert 2855796c8dcSSimon Schubert target_wide_charset_le_name = NULL; 2865796c8dcSSimon Schubert target_wide_charset_be_name = NULL; 2875796c8dcSSimon Schubert 288cf7f2e2dSJohn Marino target_wide = target_wide_charset_name; 289cf7f2e2dSJohn Marino if (!strcmp (target_wide, "auto")) 290cf7f2e2dSJohn Marino target_wide = gdbarch_auto_wide_charset (gdbarch); 291cf7f2e2dSJohn Marino 292cf7f2e2dSJohn Marino len = strlen (target_wide); 2935796c8dcSSimon Schubert for (i = 0; charset_enum[i]; ++i) 2945796c8dcSSimon Schubert { 295cf7f2e2dSJohn Marino if (strncmp (target_wide, charset_enum[i], len)) 2965796c8dcSSimon Schubert continue; 2975796c8dcSSimon Schubert if ((charset_enum[i][len] == 'B' 2985796c8dcSSimon Schubert || charset_enum[i][len] == 'L') 2995796c8dcSSimon Schubert && charset_enum[i][len + 1] == 'E' 3005796c8dcSSimon Schubert && charset_enum[i][len + 2] == '\0') 3015796c8dcSSimon Schubert { 3025796c8dcSSimon Schubert if (charset_enum[i][len] == 'B') 3035796c8dcSSimon Schubert target_wide_charset_be_name = charset_enum[i]; 3045796c8dcSSimon Schubert else 3055796c8dcSSimon Schubert target_wide_charset_le_name = charset_enum[i]; 3065796c8dcSSimon Schubert } 3075796c8dcSSimon Schubert } 3085796c8dcSSimon Schubert } 3095796c8dcSSimon Schubert 3105796c8dcSSimon Schubert /* 'Set charset', 'set host-charset', 'set target-charset', 'set 3115796c8dcSSimon Schubert target-wide-charset', 'set charset' sfunc's. */ 3125796c8dcSSimon Schubert 3135796c8dcSSimon Schubert static void 314cf7f2e2dSJohn Marino validate (struct gdbarch *gdbarch) 3155796c8dcSSimon Schubert { 3165796c8dcSSimon Schubert iconv_t desc; 3175796c8dcSSimon Schubert const char *host_cset = host_charset (); 318cf7f2e2dSJohn Marino const char *target_cset = target_charset (gdbarch); 319cf7f2e2dSJohn Marino const char *target_wide_cset = target_wide_charset_name; 3205796c8dcSSimon Schubert 321cf7f2e2dSJohn Marino if (!strcmp (target_wide_cset, "auto")) 322cf7f2e2dSJohn Marino target_wide_cset = gdbarch_auto_wide_charset (gdbarch); 323cf7f2e2dSJohn Marino 324cf7f2e2dSJohn Marino desc = iconv_open (target_wide_cset, host_cset); 3255796c8dcSSimon Schubert if (desc == (iconv_t) -1) 326c50c785cSJohn Marino error (_("Cannot convert between character sets `%s' and `%s'"), 327cf7f2e2dSJohn Marino target_wide_cset, host_cset); 3285796c8dcSSimon Schubert iconv_close (desc); 3295796c8dcSSimon Schubert 330cf7f2e2dSJohn Marino desc = iconv_open (target_cset, host_cset); 3315796c8dcSSimon Schubert if (desc == (iconv_t) -1) 332c50c785cSJohn Marino error (_("Cannot convert between character sets `%s' and `%s'"), 333cf7f2e2dSJohn Marino target_cset, host_cset); 3345796c8dcSSimon Schubert iconv_close (desc); 3355796c8dcSSimon Schubert 336cf7f2e2dSJohn Marino /* Clear the cache. */ 337cf7f2e2dSJohn Marino be_le_arch = NULL; 3385796c8dcSSimon Schubert } 3395796c8dcSSimon Schubert 3405796c8dcSSimon Schubert /* This is the sfunc for the 'set charset' command. */ 3415796c8dcSSimon Schubert static void 342c50c785cSJohn Marino set_charset_sfunc (char *charset, int from_tty, 343c50c785cSJohn Marino struct cmd_list_element *c) 3445796c8dcSSimon Schubert { 3455796c8dcSSimon Schubert /* CAREFUL: set the target charset here as well. */ 3465796c8dcSSimon Schubert target_charset_name = host_charset_name; 347cf7f2e2dSJohn Marino validate (get_current_arch ()); 3485796c8dcSSimon Schubert } 3495796c8dcSSimon Schubert 3505796c8dcSSimon Schubert /* 'set host-charset' command sfunc. We need a wrapper here because 3515796c8dcSSimon Schubert the function needs to have a specific signature. */ 3525796c8dcSSimon Schubert static void 3535796c8dcSSimon Schubert set_host_charset_sfunc (char *charset, int from_tty, 3545796c8dcSSimon Schubert struct cmd_list_element *c) 3555796c8dcSSimon Schubert { 356cf7f2e2dSJohn Marino validate (get_current_arch ()); 3575796c8dcSSimon Schubert } 3585796c8dcSSimon Schubert 3595796c8dcSSimon Schubert /* Wrapper for the 'set target-charset' command. */ 3605796c8dcSSimon Schubert static void 3615796c8dcSSimon Schubert set_target_charset_sfunc (char *charset, int from_tty, 3625796c8dcSSimon Schubert struct cmd_list_element *c) 3635796c8dcSSimon Schubert { 364cf7f2e2dSJohn Marino validate (get_current_arch ()); 3655796c8dcSSimon Schubert } 3665796c8dcSSimon Schubert 3675796c8dcSSimon Schubert /* Wrapper for the 'set target-wide-charset' command. */ 3685796c8dcSSimon Schubert static void 3695796c8dcSSimon Schubert set_target_wide_charset_sfunc (char *charset, int from_tty, 3705796c8dcSSimon Schubert struct cmd_list_element *c) 3715796c8dcSSimon Schubert { 372cf7f2e2dSJohn Marino validate (get_current_arch ()); 3735796c8dcSSimon Schubert } 3745796c8dcSSimon Schubert 3755796c8dcSSimon Schubert /* sfunc for the 'show charset' command. */ 3765796c8dcSSimon Schubert static void 377c50c785cSJohn Marino show_charset (struct ui_file *file, int from_tty, 378c50c785cSJohn Marino struct cmd_list_element *c, 3795796c8dcSSimon Schubert const char *name) 3805796c8dcSSimon Schubert { 3815796c8dcSSimon Schubert show_host_charset_name (file, from_tty, c, host_charset_name); 3825796c8dcSSimon Schubert show_target_charset_name (file, from_tty, c, target_charset_name); 383c50c785cSJohn Marino show_target_wide_charset_name (file, from_tty, c, 384c50c785cSJohn Marino target_wide_charset_name); 3855796c8dcSSimon Schubert } 3865796c8dcSSimon Schubert 3875796c8dcSSimon Schubert 3885796c8dcSSimon Schubert /* Accessor functions. */ 3895796c8dcSSimon Schubert 3905796c8dcSSimon Schubert const char * 3915796c8dcSSimon Schubert host_charset (void) 3925796c8dcSSimon Schubert { 3935796c8dcSSimon Schubert if (!strcmp (host_charset_name, "auto")) 3945796c8dcSSimon Schubert return auto_host_charset_name; 3955796c8dcSSimon Schubert return host_charset_name; 3965796c8dcSSimon Schubert } 3975796c8dcSSimon Schubert 3985796c8dcSSimon Schubert const char * 399cf7f2e2dSJohn Marino target_charset (struct gdbarch *gdbarch) 4005796c8dcSSimon Schubert { 401cf7f2e2dSJohn Marino if (!strcmp (target_charset_name, "auto")) 402cf7f2e2dSJohn Marino return gdbarch_auto_charset (gdbarch); 4035796c8dcSSimon Schubert return target_charset_name; 4045796c8dcSSimon Schubert } 4055796c8dcSSimon Schubert 4065796c8dcSSimon Schubert const char * 407cf7f2e2dSJohn Marino target_wide_charset (struct gdbarch *gdbarch) 4085796c8dcSSimon Schubert { 409cf7f2e2dSJohn Marino enum bfd_endian byte_order = gdbarch_byte_order (gdbarch); 410cf7f2e2dSJohn Marino 411cf7f2e2dSJohn Marino set_be_le_names (gdbarch); 4125796c8dcSSimon Schubert if (byte_order == BFD_ENDIAN_BIG) 4135796c8dcSSimon Schubert { 4145796c8dcSSimon Schubert if (target_wide_charset_be_name) 4155796c8dcSSimon Schubert return target_wide_charset_be_name; 4165796c8dcSSimon Schubert } 4175796c8dcSSimon Schubert else 4185796c8dcSSimon Schubert { 4195796c8dcSSimon Schubert if (target_wide_charset_le_name) 4205796c8dcSSimon Schubert return target_wide_charset_le_name; 4215796c8dcSSimon Schubert } 4225796c8dcSSimon Schubert 423cf7f2e2dSJohn Marino if (!strcmp (target_wide_charset_name, "auto")) 424cf7f2e2dSJohn Marino return gdbarch_auto_wide_charset (gdbarch); 425cf7f2e2dSJohn Marino 4265796c8dcSSimon Schubert return target_wide_charset_name; 4275796c8dcSSimon Schubert } 4285796c8dcSSimon Schubert 4295796c8dcSSimon Schubert 4305796c8dcSSimon Schubert /* Host character set management. For the time being, we assume that 4315796c8dcSSimon Schubert the host character set is some superset of ASCII. */ 4325796c8dcSSimon Schubert 4335796c8dcSSimon Schubert char 4345796c8dcSSimon Schubert host_letter_to_control_character (char c) 4355796c8dcSSimon Schubert { 4365796c8dcSSimon Schubert if (c == '?') 4375796c8dcSSimon Schubert return 0177; 4385796c8dcSSimon Schubert return c & 0237; 4395796c8dcSSimon Schubert } 4405796c8dcSSimon Schubert 4415796c8dcSSimon Schubert /* Convert a host character, C, to its hex value. C must already have 4425796c8dcSSimon Schubert been validated using isxdigit. */ 4435796c8dcSSimon Schubert 4445796c8dcSSimon Schubert int 4455796c8dcSSimon Schubert host_hex_value (char c) 4465796c8dcSSimon Schubert { 4475796c8dcSSimon Schubert if (isdigit (c)) 4485796c8dcSSimon Schubert return c - '0'; 4495796c8dcSSimon Schubert if (c >= 'a' && c <= 'f') 4505796c8dcSSimon Schubert return 10 + c - 'a'; 4515796c8dcSSimon Schubert gdb_assert (c >= 'A' && c <= 'F'); 4525796c8dcSSimon Schubert return 10 + c - 'A'; 4535796c8dcSSimon Schubert } 4545796c8dcSSimon Schubert 4555796c8dcSSimon Schubert 4565796c8dcSSimon Schubert /* Public character management functions. */ 4575796c8dcSSimon Schubert 4585796c8dcSSimon Schubert /* A cleanup function which is run to close an iconv descriptor. */ 4595796c8dcSSimon Schubert 4605796c8dcSSimon Schubert static void 4615796c8dcSSimon Schubert cleanup_iconv (void *p) 4625796c8dcSSimon Schubert { 4635796c8dcSSimon Schubert iconv_t *descp = p; 4645796c8dcSSimon Schubert iconv_close (*descp); 4655796c8dcSSimon Schubert } 4665796c8dcSSimon Schubert 4675796c8dcSSimon Schubert void 4685796c8dcSSimon Schubert convert_between_encodings (const char *from, const char *to, 4695796c8dcSSimon Schubert const gdb_byte *bytes, unsigned int num_bytes, 4705796c8dcSSimon Schubert int width, struct obstack *output, 4715796c8dcSSimon Schubert enum transliterations translit) 4725796c8dcSSimon Schubert { 4735796c8dcSSimon Schubert iconv_t desc; 4745796c8dcSSimon Schubert struct cleanup *cleanups; 4755796c8dcSSimon Schubert size_t inleft; 4765796c8dcSSimon Schubert char *inp; 4775796c8dcSSimon Schubert unsigned int space_request; 4785796c8dcSSimon Schubert 4795796c8dcSSimon Schubert /* Often, the host and target charsets will be the same. */ 4805796c8dcSSimon Schubert if (!strcmp (from, to)) 4815796c8dcSSimon Schubert { 4825796c8dcSSimon Schubert obstack_grow (output, bytes, num_bytes); 4835796c8dcSSimon Schubert return; 4845796c8dcSSimon Schubert } 4855796c8dcSSimon Schubert 4865796c8dcSSimon Schubert desc = iconv_open (to, from); 4875796c8dcSSimon Schubert if (desc == (iconv_t) -1) 488c50c785cSJohn Marino perror_with_name (_("Converting character sets")); 4895796c8dcSSimon Schubert cleanups = make_cleanup (cleanup_iconv, &desc); 4905796c8dcSSimon Schubert 4915796c8dcSSimon Schubert inleft = num_bytes; 4925796c8dcSSimon Schubert inp = (char *) bytes; 4935796c8dcSSimon Schubert 4945796c8dcSSimon Schubert space_request = num_bytes; 4955796c8dcSSimon Schubert 4965796c8dcSSimon Schubert while (inleft > 0) 4975796c8dcSSimon Schubert { 4985796c8dcSSimon Schubert char *outp; 4995796c8dcSSimon Schubert size_t outleft, r; 5005796c8dcSSimon Schubert int old_size; 5015796c8dcSSimon Schubert 5025796c8dcSSimon Schubert old_size = obstack_object_size (output); 5035796c8dcSSimon Schubert obstack_blank (output, space_request); 5045796c8dcSSimon Schubert 5055796c8dcSSimon Schubert outp = obstack_base (output) + old_size; 5065796c8dcSSimon Schubert outleft = space_request; 5075796c8dcSSimon Schubert 5085796c8dcSSimon Schubert r = iconv (desc, (ICONV_CONST char **) &inp, &inleft, &outp, &outleft); 5095796c8dcSSimon Schubert 5105796c8dcSSimon Schubert /* Now make sure that the object on the obstack only includes 5115796c8dcSSimon Schubert bytes we have converted. */ 5125796c8dcSSimon Schubert obstack_blank (output, - (int) outleft); 5135796c8dcSSimon Schubert 5145796c8dcSSimon Schubert if (r == (size_t) -1) 5155796c8dcSSimon Schubert { 5165796c8dcSSimon Schubert switch (errno) 5175796c8dcSSimon Schubert { 5185796c8dcSSimon Schubert case EILSEQ: 5195796c8dcSSimon Schubert { 5205796c8dcSSimon Schubert int i; 5215796c8dcSSimon Schubert 5225796c8dcSSimon Schubert /* Invalid input sequence. */ 5235796c8dcSSimon Schubert if (translit == translit_none) 524c50c785cSJohn Marino error (_("Could not convert character " 525c50c785cSJohn Marino "to `%s' character set"), to); 5265796c8dcSSimon Schubert 5275796c8dcSSimon Schubert /* We emit escape sequence for the bytes, skip them, 5285796c8dcSSimon Schubert and try again. */ 5295796c8dcSSimon Schubert for (i = 0; i < width; ++i) 5305796c8dcSSimon Schubert { 5315796c8dcSSimon Schubert char octal[5]; 5325796c8dcSSimon Schubert 5335796c8dcSSimon Schubert sprintf (octal, "\\%.3o", *inp & 0xff); 5345796c8dcSSimon Schubert obstack_grow_str (output, octal); 5355796c8dcSSimon Schubert 5365796c8dcSSimon Schubert ++inp; 5375796c8dcSSimon Schubert --inleft; 5385796c8dcSSimon Schubert } 5395796c8dcSSimon Schubert } 5405796c8dcSSimon Schubert break; 5415796c8dcSSimon Schubert 5425796c8dcSSimon Schubert case E2BIG: 5435796c8dcSSimon Schubert /* We ran out of space in the output buffer. Make it 5445796c8dcSSimon Schubert bigger next time around. */ 5455796c8dcSSimon Schubert space_request *= 2; 5465796c8dcSSimon Schubert break; 5475796c8dcSSimon Schubert 5485796c8dcSSimon Schubert case EINVAL: 5495796c8dcSSimon Schubert /* Incomplete input sequence. FIXME: ought to report this 5505796c8dcSSimon Schubert to the caller somehow. */ 5515796c8dcSSimon Schubert inleft = 0; 5525796c8dcSSimon Schubert break; 5535796c8dcSSimon Schubert 5545796c8dcSSimon Schubert default: 555c50c785cSJohn Marino perror_with_name (_("Internal error while " 556c50c785cSJohn Marino "converting character sets")); 5575796c8dcSSimon Schubert } 5585796c8dcSSimon Schubert } 5595796c8dcSSimon Schubert } 5605796c8dcSSimon Schubert 5615796c8dcSSimon Schubert do_cleanups (cleanups); 5625796c8dcSSimon Schubert } 5635796c8dcSSimon Schubert 5645796c8dcSSimon Schubert 5655796c8dcSSimon Schubert 5665796c8dcSSimon Schubert /* An iterator that returns host wchar_t's from a target string. */ 5675796c8dcSSimon Schubert struct wchar_iterator 5685796c8dcSSimon Schubert { 5695796c8dcSSimon Schubert /* The underlying iconv descriptor. */ 5705796c8dcSSimon Schubert iconv_t desc; 5715796c8dcSSimon Schubert 5725796c8dcSSimon Schubert /* The input string. This is updated as convert characters. */ 5735796c8dcSSimon Schubert char *input; 5745796c8dcSSimon Schubert /* The number of bytes remaining in the input. */ 5755796c8dcSSimon Schubert size_t bytes; 5765796c8dcSSimon Schubert 5775796c8dcSSimon Schubert /* The width of an input character. */ 5785796c8dcSSimon Schubert size_t width; 5795796c8dcSSimon Schubert 5805796c8dcSSimon Schubert /* The output buffer and its size. */ 5815796c8dcSSimon Schubert gdb_wchar_t *out; 5825796c8dcSSimon Schubert size_t out_size; 5835796c8dcSSimon Schubert }; 5845796c8dcSSimon Schubert 5855796c8dcSSimon Schubert /* Create a new iterator. */ 5865796c8dcSSimon Schubert struct wchar_iterator * 587c50c785cSJohn Marino make_wchar_iterator (const gdb_byte *input, size_t bytes, 588c50c785cSJohn Marino const char *charset, size_t width) 5895796c8dcSSimon Schubert { 5905796c8dcSSimon Schubert struct wchar_iterator *result; 5915796c8dcSSimon Schubert iconv_t desc; 5925796c8dcSSimon Schubert 5935796c8dcSSimon Schubert desc = iconv_open (INTERMEDIATE_ENCODING, charset); 5945796c8dcSSimon Schubert if (desc == (iconv_t) -1) 595c50c785cSJohn Marino perror_with_name (_("Converting character sets")); 5965796c8dcSSimon Schubert 5975796c8dcSSimon Schubert result = XNEW (struct wchar_iterator); 5985796c8dcSSimon Schubert result->desc = desc; 5995796c8dcSSimon Schubert result->input = (char *) input; 6005796c8dcSSimon Schubert result->bytes = bytes; 6015796c8dcSSimon Schubert result->width = width; 6025796c8dcSSimon Schubert 6035796c8dcSSimon Schubert result->out = XNEW (gdb_wchar_t); 6045796c8dcSSimon Schubert result->out_size = 1; 6055796c8dcSSimon Schubert 6065796c8dcSSimon Schubert return result; 6075796c8dcSSimon Schubert } 6085796c8dcSSimon Schubert 6095796c8dcSSimon Schubert static void 6105796c8dcSSimon Schubert do_cleanup_iterator (void *p) 6115796c8dcSSimon Schubert { 6125796c8dcSSimon Schubert struct wchar_iterator *iter = p; 6135796c8dcSSimon Schubert 6145796c8dcSSimon Schubert iconv_close (iter->desc); 6155796c8dcSSimon Schubert xfree (iter->out); 6165796c8dcSSimon Schubert xfree (iter); 6175796c8dcSSimon Schubert } 6185796c8dcSSimon Schubert 6195796c8dcSSimon Schubert struct cleanup * 6205796c8dcSSimon Schubert make_cleanup_wchar_iterator (struct wchar_iterator *iter) 6215796c8dcSSimon Schubert { 6225796c8dcSSimon Schubert return make_cleanup (do_cleanup_iterator, iter); 6235796c8dcSSimon Schubert } 6245796c8dcSSimon Schubert 6255796c8dcSSimon Schubert int 6265796c8dcSSimon Schubert wchar_iterate (struct wchar_iterator *iter, 6275796c8dcSSimon Schubert enum wchar_iterate_result *out_result, 6285796c8dcSSimon Schubert gdb_wchar_t **out_chars, 6295796c8dcSSimon Schubert const gdb_byte **ptr, 6305796c8dcSSimon Schubert size_t *len) 6315796c8dcSSimon Schubert { 6325796c8dcSSimon Schubert size_t out_request; 6335796c8dcSSimon Schubert 6345796c8dcSSimon Schubert /* Try to convert some characters. At first we try to convert just 6355796c8dcSSimon Schubert a single character. The reason for this is that iconv does not 6365796c8dcSSimon Schubert necessarily update its outgoing arguments when it encounters an 6375796c8dcSSimon Schubert invalid input sequence -- but we want to reliably report this to 6385796c8dcSSimon Schubert our caller so it can emit an escape sequence. */ 6395796c8dcSSimon Schubert out_request = 1; 6405796c8dcSSimon Schubert while (iter->bytes > 0) 6415796c8dcSSimon Schubert { 6425796c8dcSSimon Schubert char *outptr = (char *) &iter->out[0]; 6435796c8dcSSimon Schubert char *orig_inptr = iter->input; 6445796c8dcSSimon Schubert size_t orig_in = iter->bytes; 6455796c8dcSSimon Schubert size_t out_avail = out_request * sizeof (gdb_wchar_t); 6465796c8dcSSimon Schubert size_t num; 6475796c8dcSSimon Schubert size_t r = iconv (iter->desc, 648c50c785cSJohn Marino (ICONV_CONST char **) &iter->input, 649c50c785cSJohn Marino &iter->bytes, &outptr, &out_avail); 650cf7f2e2dSJohn Marino 6515796c8dcSSimon Schubert if (r == (size_t) -1) 6525796c8dcSSimon Schubert { 6535796c8dcSSimon Schubert switch (errno) 6545796c8dcSSimon Schubert { 6555796c8dcSSimon Schubert case EILSEQ: 656c50c785cSJohn Marino /* Invalid input sequence. We still might have 657c50c785cSJohn Marino converted a character; if so, return it. */ 658c50c785cSJohn Marino if (out_avail < out_request * sizeof (gdb_wchar_t)) 659c50c785cSJohn Marino break; 660c50c785cSJohn Marino 661c50c785cSJohn Marino /* Otherwise skip the first invalid character, and let 662c50c785cSJohn Marino the caller know about it. */ 6635796c8dcSSimon Schubert *out_result = wchar_iterate_invalid; 6645796c8dcSSimon Schubert *ptr = iter->input; 6655796c8dcSSimon Schubert *len = iter->width; 6665796c8dcSSimon Schubert iter->input += iter->width; 6675796c8dcSSimon Schubert iter->bytes -= iter->width; 6685796c8dcSSimon Schubert return 0; 6695796c8dcSSimon Schubert 6705796c8dcSSimon Schubert case E2BIG: 6715796c8dcSSimon Schubert /* We ran out of space. We still might have converted a 6725796c8dcSSimon Schubert character; if so, return it. Otherwise, grow the 6735796c8dcSSimon Schubert buffer and try again. */ 6745796c8dcSSimon Schubert if (out_avail < out_request * sizeof (gdb_wchar_t)) 6755796c8dcSSimon Schubert break; 6765796c8dcSSimon Schubert 6775796c8dcSSimon Schubert ++out_request; 6785796c8dcSSimon Schubert if (out_request > iter->out_size) 6795796c8dcSSimon Schubert { 6805796c8dcSSimon Schubert iter->out_size = out_request; 6815796c8dcSSimon Schubert iter->out = xrealloc (iter->out, 6825796c8dcSSimon Schubert out_request * sizeof (gdb_wchar_t)); 6835796c8dcSSimon Schubert } 6845796c8dcSSimon Schubert continue; 6855796c8dcSSimon Schubert 6865796c8dcSSimon Schubert case EINVAL: 6875796c8dcSSimon Schubert /* Incomplete input sequence. Let the caller know, and 6885796c8dcSSimon Schubert arrange for future calls to see EOF. */ 6895796c8dcSSimon Schubert *out_result = wchar_iterate_incomplete; 6905796c8dcSSimon Schubert *ptr = iter->input; 6915796c8dcSSimon Schubert *len = iter->bytes; 6925796c8dcSSimon Schubert iter->bytes = 0; 6935796c8dcSSimon Schubert return 0; 6945796c8dcSSimon Schubert 6955796c8dcSSimon Schubert default: 696c50c785cSJohn Marino perror_with_name (_("Internal error while " 697c50c785cSJohn Marino "converting character sets")); 6985796c8dcSSimon Schubert } 6995796c8dcSSimon Schubert } 7005796c8dcSSimon Schubert 7015796c8dcSSimon Schubert /* We converted something. */ 7025796c8dcSSimon Schubert num = out_request - out_avail / sizeof (gdb_wchar_t); 7035796c8dcSSimon Schubert *out_result = wchar_iterate_ok; 7045796c8dcSSimon Schubert *out_chars = iter->out; 7055796c8dcSSimon Schubert *ptr = orig_inptr; 7065796c8dcSSimon Schubert *len = orig_in - iter->bytes; 7075796c8dcSSimon Schubert return num; 7085796c8dcSSimon Schubert } 7095796c8dcSSimon Schubert 7105796c8dcSSimon Schubert /* Really done. */ 7115796c8dcSSimon Schubert *out_result = wchar_iterate_eof; 7125796c8dcSSimon Schubert return -1; 7135796c8dcSSimon Schubert } 7145796c8dcSSimon Schubert 7155796c8dcSSimon Schubert 7165796c8dcSSimon Schubert /* The charset.c module initialization function. */ 7175796c8dcSSimon Schubert 7185796c8dcSSimon Schubert extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */ 7195796c8dcSSimon Schubert 7205796c8dcSSimon Schubert DEF_VEC_P (char_ptr); 7215796c8dcSSimon Schubert 7225796c8dcSSimon Schubert static VEC (char_ptr) *charsets; 7235796c8dcSSimon Schubert 7245796c8dcSSimon Schubert #ifdef PHONY_ICONV 7255796c8dcSSimon Schubert 7265796c8dcSSimon Schubert static void 7275796c8dcSSimon Schubert find_charset_names (void) 7285796c8dcSSimon Schubert { 7295796c8dcSSimon Schubert VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET); 7305796c8dcSSimon Schubert VEC_safe_push (char_ptr, charsets, NULL); 7315796c8dcSSimon Schubert } 7325796c8dcSSimon Schubert 7335796c8dcSSimon Schubert #else /* PHONY_ICONV */ 7345796c8dcSSimon Schubert 7355796c8dcSSimon Schubert /* Sometimes, libiconv redefines iconvlist as libiconvlist -- but 7365796c8dcSSimon Schubert provides different symbols in the static and dynamic libraries. 7375796c8dcSSimon Schubert So, configure may see libiconvlist but not iconvlist. But, calling 7385796c8dcSSimon Schubert iconvlist is the right thing to do and will work. Hence we do a 7395796c8dcSSimon Schubert check here but unconditionally call iconvlist below. */ 7405796c8dcSSimon Schubert #if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST) 7415796c8dcSSimon Schubert 7425796c8dcSSimon Schubert /* A helper function that adds some character sets to the vector of 7435796c8dcSSimon Schubert all character sets. This is a callback function for iconvlist. */ 7445796c8dcSSimon Schubert 7455796c8dcSSimon Schubert static int 7465796c8dcSSimon Schubert add_one (unsigned int count, const char *const *names, void *data) 7475796c8dcSSimon Schubert { 7485796c8dcSSimon Schubert unsigned int i; 7495796c8dcSSimon Schubert 7505796c8dcSSimon Schubert for (i = 0; i < count; ++i) 7515796c8dcSSimon Schubert VEC_safe_push (char_ptr, charsets, xstrdup (names[i])); 7525796c8dcSSimon Schubert 7535796c8dcSSimon Schubert return 0; 7545796c8dcSSimon Schubert } 7555796c8dcSSimon Schubert 7565796c8dcSSimon Schubert static void 7575796c8dcSSimon Schubert find_charset_names (void) 7585796c8dcSSimon Schubert { 7595796c8dcSSimon Schubert iconvlist (add_one, NULL); 7605796c8dcSSimon Schubert VEC_safe_push (char_ptr, charsets, NULL); 7615796c8dcSSimon Schubert } 7625796c8dcSSimon Schubert 7635796c8dcSSimon Schubert #else 7645796c8dcSSimon Schubert 765cf7f2e2dSJohn Marino /* Return non-zero if LINE (output from iconv) should be ignored. 766cf7f2e2dSJohn Marino Older iconv programs (e.g. 2.2.2) include the human readable 767cf7f2e2dSJohn Marino introduction even when stdout is not a tty. Newer versions omit 768cf7f2e2dSJohn Marino the intro if stdout is not a tty. */ 769cf7f2e2dSJohn Marino 770cf7f2e2dSJohn Marino static int 771cf7f2e2dSJohn Marino ignore_line_p (const char *line) 772cf7f2e2dSJohn Marino { 773cf7f2e2dSJohn Marino /* This table is used to filter the output. If this text appears 774cf7f2e2dSJohn Marino anywhere in the line, it is ignored (strstr is used). */ 775cf7f2e2dSJohn Marino static const char * const ignore_lines[] = 776cf7f2e2dSJohn Marino { 777cf7f2e2dSJohn Marino "The following", 778cf7f2e2dSJohn Marino "not necessarily", 779cf7f2e2dSJohn Marino "the FROM and TO", 780cf7f2e2dSJohn Marino "listed with several", 781cf7f2e2dSJohn Marino NULL 782cf7f2e2dSJohn Marino }; 783cf7f2e2dSJohn Marino int i; 784cf7f2e2dSJohn Marino 785cf7f2e2dSJohn Marino for (i = 0; ignore_lines[i] != NULL; ++i) 786cf7f2e2dSJohn Marino { 787cf7f2e2dSJohn Marino if (strstr (line, ignore_lines[i]) != NULL) 788cf7f2e2dSJohn Marino return 1; 789cf7f2e2dSJohn Marino } 790cf7f2e2dSJohn Marino 791cf7f2e2dSJohn Marino return 0; 792cf7f2e2dSJohn Marino } 793cf7f2e2dSJohn Marino 7945796c8dcSSimon Schubert static void 7955796c8dcSSimon Schubert find_charset_names (void) 7965796c8dcSSimon Schubert { 7975796c8dcSSimon Schubert struct pex_obj *child; 7985796c8dcSSimon Schubert char *args[3]; 7995796c8dcSSimon Schubert int err, status; 8005796c8dcSSimon Schubert int fail = 1; 801*a45ae5f8SJohn Marino int flags; 802cf7f2e2dSJohn Marino struct gdb_environ *iconv_env; 803*a45ae5f8SJohn Marino char *iconv_program; 8045796c8dcSSimon Schubert 805c50c785cSJohn Marino /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is 806c50c785cSJohn Marino not a tty. We need to recognize it and ignore it. This text is 807c50c785cSJohn Marino subject to translation, so force LANGUAGE=C. */ 808cf7f2e2dSJohn Marino iconv_env = make_environ (); 809cf7f2e2dSJohn Marino init_environ (iconv_env); 810cf7f2e2dSJohn Marino set_in_environ (iconv_env, "LANGUAGE", "C"); 811cf7f2e2dSJohn Marino set_in_environ (iconv_env, "LC_ALL", "C"); 812cf7f2e2dSJohn Marino 813cf7f2e2dSJohn Marino child = pex_init (PEX_USE_PIPES, "iconv", NULL); 8145796c8dcSSimon Schubert 815*a45ae5f8SJohn Marino #ifdef ICONV_BIN 816*a45ae5f8SJohn Marino { 817*a45ae5f8SJohn Marino char *iconv_dir = relocate_gdb_directory (ICONV_BIN, 818*a45ae5f8SJohn Marino ICONV_BIN_RELOCATABLE); 819*a45ae5f8SJohn Marino iconv_program = concat (iconv_dir, SLASH_STRING, "iconv", NULL); 820*a45ae5f8SJohn Marino xfree (iconv_dir); 821*a45ae5f8SJohn Marino } 822*a45ae5f8SJohn Marino #else 823*a45ae5f8SJohn Marino iconv_program = xstrdup ("iconv"); 824*a45ae5f8SJohn Marino #endif 825*a45ae5f8SJohn Marino args[0] = iconv_program; 8265796c8dcSSimon Schubert args[1] = "-l"; 8275796c8dcSSimon Schubert args[2] = NULL; 828*a45ae5f8SJohn Marino flags = PEX_STDERR_TO_STDOUT; 829*a45ae5f8SJohn Marino #ifndef ICONV_BIN 830*a45ae5f8SJohn Marino flags |= PEX_SEARCH; 831*a45ae5f8SJohn Marino #endif 8325796c8dcSSimon Schubert /* Note that we simply ignore errors here. */ 833*a45ae5f8SJohn Marino if (!pex_run_in_environment (child, flags, 834*a45ae5f8SJohn Marino args[0], args, environ_vector (iconv_env), 835cf7f2e2dSJohn Marino NULL, NULL, &err)) 8365796c8dcSSimon Schubert { 8375796c8dcSSimon Schubert FILE *in = pex_read_output (child, 0); 8385796c8dcSSimon Schubert 8395796c8dcSSimon Schubert /* POSIX says that iconv -l uses an unspecified format. We 8405796c8dcSSimon Schubert parse the glibc and libiconv formats; feel free to add others 8415796c8dcSSimon Schubert as needed. */ 842cf7f2e2dSJohn Marino 8435796c8dcSSimon Schubert while (!feof (in)) 8445796c8dcSSimon Schubert { 8455796c8dcSSimon Schubert /* The size of buf is chosen arbitrarily. */ 8465796c8dcSSimon Schubert char buf[1024]; 8475796c8dcSSimon Schubert char *start, *r; 848cf7f2e2dSJohn Marino int len; 8495796c8dcSSimon Schubert 8505796c8dcSSimon Schubert r = fgets (buf, sizeof (buf), in); 8515796c8dcSSimon Schubert if (!r) 8525796c8dcSSimon Schubert break; 8535796c8dcSSimon Schubert len = strlen (r); 8545796c8dcSSimon Schubert if (len <= 3) 8555796c8dcSSimon Schubert continue; 856cf7f2e2dSJohn Marino if (ignore_line_p (r)) 857cf7f2e2dSJohn Marino continue; 858cf7f2e2dSJohn Marino 8595796c8dcSSimon Schubert /* Strip off the newline. */ 8605796c8dcSSimon Schubert --len; 8615796c8dcSSimon Schubert /* Strip off one or two '/'s. glibc will print lines like 8625796c8dcSSimon Schubert "8859_7//", but also "10646-1:1993/UCS4/". */ 8635796c8dcSSimon Schubert if (buf[len - 1] == '/') 8645796c8dcSSimon Schubert --len; 8655796c8dcSSimon Schubert if (buf[len - 1] == '/') 8665796c8dcSSimon Schubert --len; 8675796c8dcSSimon Schubert buf[len] = '\0'; 8685796c8dcSSimon Schubert 8695796c8dcSSimon Schubert /* libiconv will print multiple entries per line, separated 870c50c785cSJohn Marino by spaces. Older iconvs will print multiple entries per 871c50c785cSJohn Marino line, indented by two spaces, and separated by ", " 872cf7f2e2dSJohn Marino (i.e. the human readable form). */ 8735796c8dcSSimon Schubert start = buf; 8745796c8dcSSimon Schubert while (1) 8755796c8dcSSimon Schubert { 8765796c8dcSSimon Schubert int keep_going; 8775796c8dcSSimon Schubert char *p; 8785796c8dcSSimon Schubert 879cf7f2e2dSJohn Marino /* Skip leading blanks. */ 880cf7f2e2dSJohn Marino for (p = start; *p && *p == ' '; ++p) 881cf7f2e2dSJohn Marino ; 882cf7f2e2dSJohn Marino start = p; 883cf7f2e2dSJohn Marino /* Find the next space, comma, or end-of-line. */ 884cf7f2e2dSJohn Marino for ( ; *p && *p != ' ' && *p != ','; ++p) 8855796c8dcSSimon Schubert ; 8865796c8dcSSimon Schubert /* Ignore an empty result. */ 8875796c8dcSSimon Schubert if (p == start) 8885796c8dcSSimon Schubert break; 8895796c8dcSSimon Schubert keep_going = *p; 8905796c8dcSSimon Schubert *p = '\0'; 8915796c8dcSSimon Schubert VEC_safe_push (char_ptr, charsets, xstrdup (start)); 8925796c8dcSSimon Schubert if (!keep_going) 8935796c8dcSSimon Schubert break; 8945796c8dcSSimon Schubert /* Skip any extra spaces. */ 8955796c8dcSSimon Schubert for (start = p + 1; *start && *start == ' '; ++start) 8965796c8dcSSimon Schubert ; 8975796c8dcSSimon Schubert } 8985796c8dcSSimon Schubert } 8995796c8dcSSimon Schubert 9005796c8dcSSimon Schubert if (pex_get_status (child, 1, &status) 9015796c8dcSSimon Schubert && WIFEXITED (status) && !WEXITSTATUS (status)) 9025796c8dcSSimon Schubert fail = 0; 9035796c8dcSSimon Schubert 9045796c8dcSSimon Schubert } 9055796c8dcSSimon Schubert 906*a45ae5f8SJohn Marino xfree (iconv_program); 9075796c8dcSSimon Schubert pex_free (child); 908cf7f2e2dSJohn Marino free_environ (iconv_env); 9095796c8dcSSimon Schubert 9105796c8dcSSimon Schubert if (fail) 9115796c8dcSSimon Schubert { 9125796c8dcSSimon Schubert /* Some error occurred, so drop the vector. */ 9135796c8dcSSimon Schubert int ix; 9145796c8dcSSimon Schubert char *elt; 9155796c8dcSSimon Schubert for (ix = 0; VEC_iterate (char_ptr, charsets, ix, elt); ++ix) 9165796c8dcSSimon Schubert xfree (elt); 9175796c8dcSSimon Schubert VEC_truncate (char_ptr, charsets, 0); 9185796c8dcSSimon Schubert } 9195796c8dcSSimon Schubert else 9205796c8dcSSimon Schubert VEC_safe_push (char_ptr, charsets, NULL); 9215796c8dcSSimon Schubert } 9225796c8dcSSimon Schubert 9235796c8dcSSimon Schubert #endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */ 9245796c8dcSSimon Schubert #endif /* PHONY_ICONV */ 9255796c8dcSSimon Schubert 926cf7f2e2dSJohn Marino /* The "auto" target charset used by default_auto_charset. */ 927cf7f2e2dSJohn Marino static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET; 928cf7f2e2dSJohn Marino 929cf7f2e2dSJohn Marino const char * 930cf7f2e2dSJohn Marino default_auto_charset (void) 931cf7f2e2dSJohn Marino { 932cf7f2e2dSJohn Marino return auto_target_charset_name; 933cf7f2e2dSJohn Marino } 934cf7f2e2dSJohn Marino 935cf7f2e2dSJohn Marino const char * 936cf7f2e2dSJohn Marino default_auto_wide_charset (void) 937cf7f2e2dSJohn Marino { 938cf7f2e2dSJohn Marino return GDB_DEFAULT_TARGET_WIDE_CHARSET; 939cf7f2e2dSJohn Marino } 940cf7f2e2dSJohn Marino 941c50c785cSJohn Marino 942c50c785cSJohn Marino #ifdef USE_INTERMEDIATE_ENCODING_FUNCTION 943c50c785cSJohn Marino /* Macro used for UTF or UCS endianness suffix. */ 944c50c785cSJohn Marino #if WORDS_BIGENDIAN 945c50c785cSJohn Marino #define ENDIAN_SUFFIX "BE" 946c50c785cSJohn Marino #else 947c50c785cSJohn Marino #define ENDIAN_SUFFIX "LE" 948c50c785cSJohn Marino #endif 949c50c785cSJohn Marino 950c50c785cSJohn Marino /* The code below serves to generate a compile time error if 951c50c785cSJohn Marino gdb_wchar_t type is not of size 2 nor 4, despite the fact that 952c50c785cSJohn Marino macro __STDC_ISO_10646__ is defined. 953c50c785cSJohn Marino This is better than a gdb_assert call, because GDB cannot handle 954c50c785cSJohn Marino strings correctly if this size is different. */ 955c50c785cSJohn Marino 956c50c785cSJohn Marino extern char your_gdb_wchar_t_is_bogus[(sizeof (gdb_wchar_t) == 2 957c50c785cSJohn Marino || sizeof (gdb_wchar_t) == 4) 958c50c785cSJohn Marino ? 1 : -1]; 959c50c785cSJohn Marino 960c50c785cSJohn Marino /* intermediate_encoding returns the charset unsed internally by 961c50c785cSJohn Marino GDB to convert between target and host encodings. As the test above 962c50c785cSJohn Marino compiled, sizeof (gdb_wchar_t) is either 2 or 4 bytes. 963c50c785cSJohn Marino UTF-16/32 is tested first, UCS-2/4 is tested as a second option, 964c50c785cSJohn Marino otherwise an error is generated. */ 965c50c785cSJohn Marino 966c50c785cSJohn Marino const char * 967c50c785cSJohn Marino intermediate_encoding (void) 968c50c785cSJohn Marino { 969c50c785cSJohn Marino iconv_t desc; 970c50c785cSJohn Marino static const char *stored_result = NULL; 971c50c785cSJohn Marino char *result; 972c50c785cSJohn Marino int i; 973c50c785cSJohn Marino 974c50c785cSJohn Marino if (stored_result) 975c50c785cSJohn Marino return stored_result; 976c50c785cSJohn Marino result = xstrprintf ("UTF-%d%s", (int) (sizeof (gdb_wchar_t) * 8), 977c50c785cSJohn Marino ENDIAN_SUFFIX); 978c50c785cSJohn Marino /* Check that the name is supported by iconv_open. */ 979c50c785cSJohn Marino desc = iconv_open (result, host_charset ()); 980c50c785cSJohn Marino if (desc != (iconv_t) -1) 981c50c785cSJohn Marino { 982c50c785cSJohn Marino iconv_close (desc); 983c50c785cSJohn Marino stored_result = result; 984c50c785cSJohn Marino return result; 985c50c785cSJohn Marino } 986c50c785cSJohn Marino /* Not valid, free the allocated memory. */ 987c50c785cSJohn Marino xfree (result); 988c50c785cSJohn Marino /* Second try, with UCS-2 type. */ 989c50c785cSJohn Marino result = xstrprintf ("UCS-%d%s", (int) sizeof (gdb_wchar_t), 990c50c785cSJohn Marino ENDIAN_SUFFIX); 991c50c785cSJohn Marino /* Check that the name is supported by iconv_open. */ 992c50c785cSJohn Marino desc = iconv_open (result, host_charset ()); 993c50c785cSJohn Marino if (desc != (iconv_t) -1) 994c50c785cSJohn Marino { 995c50c785cSJohn Marino iconv_close (desc); 996c50c785cSJohn Marino stored_result = result; 997c50c785cSJohn Marino return result; 998c50c785cSJohn Marino } 999c50c785cSJohn Marino /* Not valid, free the allocated memory. */ 1000c50c785cSJohn Marino xfree (result); 1001c50c785cSJohn Marino /* No valid charset found, generate error here. */ 1002c50c785cSJohn Marino error (_("Unable to find a vaild charset for string conversions")); 1003c50c785cSJohn Marino } 1004c50c785cSJohn Marino 1005c50c785cSJohn Marino #endif /* USE_INTERMEDIATE_ENCODING_FUNCTION */ 1006c50c785cSJohn Marino 10075796c8dcSSimon Schubert void 10085796c8dcSSimon Schubert _initialize_charset (void) 10095796c8dcSSimon Schubert { 1010cf7f2e2dSJohn Marino /* The first element is always "auto". */ 10115796c8dcSSimon Schubert VEC_safe_push (char_ptr, charsets, xstrdup ("auto")); 10125796c8dcSSimon Schubert find_charset_names (); 10135796c8dcSSimon Schubert 10145796c8dcSSimon Schubert if (VEC_length (char_ptr, charsets) > 1) 10155796c8dcSSimon Schubert charset_enum = (const char **) VEC_address (char_ptr, charsets); 10165796c8dcSSimon Schubert else 10175796c8dcSSimon Schubert charset_enum = default_charset_names; 10185796c8dcSSimon Schubert 10195796c8dcSSimon Schubert #ifndef PHONY_ICONV 10205796c8dcSSimon Schubert #ifdef HAVE_LANGINFO_CODESET 1021cf7f2e2dSJohn Marino /* The result of nl_langinfo may be overwritten later. This may 1022cf7f2e2dSJohn Marino leak a little memory, if the user later changes the host charset, 1023cf7f2e2dSJohn Marino but that doesn't matter much. */ 1024cf7f2e2dSJohn Marino auto_host_charset_name = xstrdup (nl_langinfo (CODESET)); 1025c50c785cSJohn Marino /* Solaris will return `646' here -- but the Solaris iconv then does 1026c50c785cSJohn Marino not accept this. Darwin (and maybe FreeBSD) may return "" here, 1027cf7f2e2dSJohn Marino which GNU libiconv doesn't like (infinite loop). */ 1028cf7f2e2dSJohn Marino if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name) 10295796c8dcSSimon Schubert auto_host_charset_name = "ASCII"; 1030cf7f2e2dSJohn Marino auto_target_charset_name = auto_host_charset_name; 1031cf7f2e2dSJohn Marino #elif defined (USE_WIN32API) 1032cf7f2e2dSJohn Marino { 1033c50c785cSJohn Marino /* "CP" + x<=5 digits + paranoia. */ 1034c50c785cSJohn Marino static char w32_host_default_charset[16]; 10355796c8dcSSimon Schubert 1036cf7f2e2dSJohn Marino snprintf (w32_host_default_charset, sizeof w32_host_default_charset, 1037cf7f2e2dSJohn Marino "CP%d", GetACP()); 1038cf7f2e2dSJohn Marino auto_host_charset_name = w32_host_default_charset; 1039cf7f2e2dSJohn Marino auto_target_charset_name = auto_host_charset_name; 1040cf7f2e2dSJohn Marino } 10415796c8dcSSimon Schubert #endif 10425796c8dcSSimon Schubert #endif 10435796c8dcSSimon Schubert 10445796c8dcSSimon Schubert add_setshow_enum_cmd ("charset", class_support, 1045cf7f2e2dSJohn Marino charset_enum, &host_charset_name, _("\ 10465796c8dcSSimon Schubert Set the host and target character sets."), _("\ 10475796c8dcSSimon Schubert Show the host and target character sets."), _("\ 10485796c8dcSSimon Schubert The `host character set' is the one used by the system GDB is running on.\n\ 10495796c8dcSSimon Schubert The `target character set' is the one used by the program being debugged.\n\ 10505796c8dcSSimon Schubert You may only use supersets of ASCII for your host character set; GDB does\n\ 10515796c8dcSSimon Schubert not support any others.\n\ 10525796c8dcSSimon Schubert To see a list of the character sets GDB supports, type `set charset <TAB>'."), 10535796c8dcSSimon Schubert /* Note that the sfunc below needs to set 10545796c8dcSSimon Schubert target_charset_name, because the 'set 10555796c8dcSSimon Schubert charset' command sets two variables. */ 10565796c8dcSSimon Schubert set_charset_sfunc, 10575796c8dcSSimon Schubert show_charset, 10585796c8dcSSimon Schubert &setlist, &showlist); 10595796c8dcSSimon Schubert 10605796c8dcSSimon Schubert add_setshow_enum_cmd ("host-charset", class_support, 10615796c8dcSSimon Schubert charset_enum, &host_charset_name, _("\ 10625796c8dcSSimon Schubert Set the host character set."), _("\ 10635796c8dcSSimon Schubert Show the host character set."), _("\ 10645796c8dcSSimon Schubert The `host character set' is the one used by the system GDB is running on.\n\ 10655796c8dcSSimon Schubert You may only use supersets of ASCII for your host character set; GDB does\n\ 10665796c8dcSSimon Schubert not support any others.\n\ 10675796c8dcSSimon Schubert To see a list of the character sets GDB supports, type `set host-charset <TAB>'."), 10685796c8dcSSimon Schubert set_host_charset_sfunc, 10695796c8dcSSimon Schubert show_host_charset_name, 10705796c8dcSSimon Schubert &setlist, &showlist); 10715796c8dcSSimon Schubert 10725796c8dcSSimon Schubert add_setshow_enum_cmd ("target-charset", class_support, 1073cf7f2e2dSJohn Marino charset_enum, &target_charset_name, _("\ 10745796c8dcSSimon Schubert Set the target character set."), _("\ 10755796c8dcSSimon Schubert Show the target character set."), _("\ 10765796c8dcSSimon Schubert The `target character set' is the one used by the program being debugged.\n\ 10775796c8dcSSimon Schubert GDB translates characters and strings between the host and target\n\ 10785796c8dcSSimon Schubert character sets as needed.\n\ 10795796c8dcSSimon Schubert To see a list of the character sets GDB supports, type `set target-charset'<TAB>"), 10805796c8dcSSimon Schubert set_target_charset_sfunc, 10815796c8dcSSimon Schubert show_target_charset_name, 10825796c8dcSSimon Schubert &setlist, &showlist); 10835796c8dcSSimon Schubert 10845796c8dcSSimon Schubert add_setshow_enum_cmd ("target-wide-charset", class_support, 1085cf7f2e2dSJohn Marino charset_enum, &target_wide_charset_name, 10865796c8dcSSimon Schubert _("\ 10875796c8dcSSimon Schubert Set the target wide character set."), _("\ 10885796c8dcSSimon Schubert Show the target wide character set."), _("\ 1089c50c785cSJohn Marino The `target wide character set' is the one used by the program being debugged.\ 1090c50c785cSJohn Marino \nIn particular it is the encoding used by `wchar_t'.\n\ 10915796c8dcSSimon Schubert GDB translates characters and strings between the host and target\n\ 10925796c8dcSSimon Schubert character sets as needed.\n\ 10935796c8dcSSimon Schubert To see a list of the character sets GDB supports, type\n\ 10945796c8dcSSimon Schubert `set target-wide-charset'<TAB>"), 10955796c8dcSSimon Schubert set_target_wide_charset_sfunc, 10965796c8dcSSimon Schubert show_target_wide_charset_name, 10975796c8dcSSimon Schubert &setlist, &showlist); 10985796c8dcSSimon Schubert } 1099