xref: /dflybsd-src/contrib/gdb-7/gdb/charset.c (revision cf7f2e2d389e8012d562650bd94d7e433f449d6e)
15796c8dcSSimon Schubert /* Character set conversion support for GDB.
25796c8dcSSimon Schubert 
3*cf7f2e2dSJohn Marino    Copyright (C) 2001, 2003, 2007, 2008, 2009, 2010
4*cf7f2e2dSJohn Marino    Free Software Foundation, Inc.
55796c8dcSSimon Schubert 
65796c8dcSSimon Schubert    This file is part of GDB.
75796c8dcSSimon Schubert 
85796c8dcSSimon Schubert    This program is free software; you can redistribute it and/or modify
95796c8dcSSimon Schubert    it under the terms of the GNU General Public License as published by
105796c8dcSSimon Schubert    the Free Software Foundation; either version 3 of the License, or
115796c8dcSSimon Schubert    (at your option) any later version.
125796c8dcSSimon Schubert 
135796c8dcSSimon Schubert    This program is distributed in the hope that it will be useful,
145796c8dcSSimon Schubert    but WITHOUT ANY WARRANTY; without even the implied warranty of
155796c8dcSSimon Schubert    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
165796c8dcSSimon Schubert    GNU General Public License for more details.
175796c8dcSSimon Schubert 
185796c8dcSSimon Schubert    You should have received a copy of the GNU General Public License
195796c8dcSSimon Schubert    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
205796c8dcSSimon Schubert 
215796c8dcSSimon Schubert #include "defs.h"
225796c8dcSSimon Schubert #include "charset.h"
235796c8dcSSimon Schubert #include "gdbcmd.h"
245796c8dcSSimon Schubert #include "gdb_assert.h"
255796c8dcSSimon Schubert #include "gdb_obstack.h"
265796c8dcSSimon Schubert #include "gdb_wait.h"
275796c8dcSSimon Schubert #include "charset-list.h"
285796c8dcSSimon Schubert #include "vec.h"
29*cf7f2e2dSJohn Marino #include "environ.h"
30*cf7f2e2dSJohn Marino #include "arch-utils.h"
315796c8dcSSimon Schubert 
325796c8dcSSimon Schubert #include <stddef.h>
335796c8dcSSimon Schubert #include "gdb_string.h"
345796c8dcSSimon Schubert #include <ctype.h>
355796c8dcSSimon Schubert 
36*cf7f2e2dSJohn Marino #ifdef USE_WIN32API
37*cf7f2e2dSJohn Marino #include <windows.h>
38*cf7f2e2dSJohn Marino #endif
395796c8dcSSimon Schubert 
405796c8dcSSimon Schubert /* How GDB's character set support works
415796c8dcSSimon Schubert 
425796c8dcSSimon Schubert    GDB has three global settings:
435796c8dcSSimon Schubert 
445796c8dcSSimon Schubert    - The `current host character set' is the character set GDB should
455796c8dcSSimon Schubert      use in talking to the user, and which (hopefully) the user's
465796c8dcSSimon Schubert      terminal knows how to display properly.  Most users should not
475796c8dcSSimon Schubert      change this.
485796c8dcSSimon Schubert 
495796c8dcSSimon Schubert    - The `current target character set' is the character set the
505796c8dcSSimon Schubert      program being debugged uses.
515796c8dcSSimon Schubert 
525796c8dcSSimon Schubert    - The `current target wide character set' is the wide character set
535796c8dcSSimon Schubert      the program being debugged uses, that is, the encoding used for
545796c8dcSSimon Schubert      wchar_t.
555796c8dcSSimon Schubert 
565796c8dcSSimon Schubert    There are commands to set each of these, and mechanisms for
575796c8dcSSimon Schubert    choosing reasonable default values.  GDB has a global list of
585796c8dcSSimon Schubert    character sets that it can use as its host or target character
595796c8dcSSimon Schubert    sets.
605796c8dcSSimon Schubert 
615796c8dcSSimon Schubert    The header file `charset.h' declares various functions that
625796c8dcSSimon Schubert    different pieces of GDB need to perform tasks like:
635796c8dcSSimon Schubert 
645796c8dcSSimon Schubert    - printing target strings and characters to the user's terminal
655796c8dcSSimon Schubert      (mostly target->host conversions),
665796c8dcSSimon Schubert 
675796c8dcSSimon Schubert    - building target-appropriate representations of strings and
685796c8dcSSimon Schubert      characters the user enters in expressions (mostly host->target
695796c8dcSSimon Schubert      conversions),
705796c8dcSSimon Schubert 
715796c8dcSSimon Schubert      and so on.
725796c8dcSSimon Schubert 
735796c8dcSSimon Schubert    To avoid excessive code duplication and maintenance efforts,
745796c8dcSSimon Schubert    GDB simply requires a capable iconv function.  Users on platforms
755796c8dcSSimon Schubert    without a suitable iconv can use the GNU iconv library.  */
765796c8dcSSimon Schubert 
775796c8dcSSimon Schubert 
785796c8dcSSimon Schubert #ifdef PHONY_ICONV
795796c8dcSSimon Schubert 
805796c8dcSSimon Schubert /* Provide a phony iconv that does as little as possible.  Also,
815796c8dcSSimon Schubert    arrange for there to be a single available character set.  */
825796c8dcSSimon Schubert 
835796c8dcSSimon Schubert #undef GDB_DEFAULT_HOST_CHARSET
845796c8dcSSimon Schubert #define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
855796c8dcSSimon Schubert #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
865796c8dcSSimon Schubert #define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
875796c8dcSSimon Schubert #undef DEFAULT_CHARSET_NAMES
885796c8dcSSimon Schubert #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
895796c8dcSSimon Schubert 
905796c8dcSSimon Schubert #undef iconv_t
915796c8dcSSimon Schubert #define iconv_t int
925796c8dcSSimon Schubert #undef iconv_open
935796c8dcSSimon Schubert #undef iconv
945796c8dcSSimon Schubert #undef iconv_close
955796c8dcSSimon Schubert 
965796c8dcSSimon Schubert #undef ICONV_CONST
975796c8dcSSimon Schubert #define ICONV_CONST const
985796c8dcSSimon Schubert 
995796c8dcSSimon Schubert /* Some systems don't have EILSEQ, so we define it here, but not as
1005796c8dcSSimon Schubert    EINVAL, because callers of `iconv' want to distinguish EINVAL and
1015796c8dcSSimon Schubert    EILSEQ.  This is what iconv.h from libiconv does as well.  Note
1025796c8dcSSimon Schubert    that wchar.h may also define EILSEQ, so this needs to be after we
1035796c8dcSSimon Schubert    include wchar.h, which happens in defs.h through gdb_wchar.h.  */
1045796c8dcSSimon Schubert #ifndef EILSEQ
1055796c8dcSSimon Schubert #define EILSEQ ENOENT
1065796c8dcSSimon Schubert #endif
1075796c8dcSSimon Schubert 
1085796c8dcSSimon Schubert iconv_t
1095796c8dcSSimon Schubert iconv_open (const char *to, const char *from)
1105796c8dcSSimon Schubert {
111*cf7f2e2dSJohn Marino   /* We allow conversions from UTF-32BE, wchar_t, and the host charset.
1125796c8dcSSimon Schubert      We allow conversions to wchar_t and the host charset.  */
113*cf7f2e2dSJohn Marino   if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t")
1145796c8dcSSimon Schubert       && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
1155796c8dcSSimon Schubert     return -1;
1165796c8dcSSimon Schubert   if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
1175796c8dcSSimon Schubert     return -1;
1185796c8dcSSimon Schubert 
119*cf7f2e2dSJohn Marino   /* Return 1 if we are converting from UTF-32BE, 0 otherwise.  This is
1205796c8dcSSimon Schubert      used as a flag in calls to iconv.  */
121*cf7f2e2dSJohn Marino   return !strcmp (from, "UTF-32BE");
1225796c8dcSSimon Schubert }
1235796c8dcSSimon Schubert 
1245796c8dcSSimon Schubert int
1255796c8dcSSimon Schubert iconv_close (iconv_t arg)
1265796c8dcSSimon Schubert {
1275796c8dcSSimon Schubert   return 0;
1285796c8dcSSimon Schubert }
1295796c8dcSSimon Schubert 
1305796c8dcSSimon Schubert size_t
131*cf7f2e2dSJohn Marino iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
1325796c8dcSSimon Schubert        char **outbuf, size_t *outbytesleft)
1335796c8dcSSimon Schubert {
134*cf7f2e2dSJohn Marino   if (utf_flag)
1355796c8dcSSimon Schubert     {
1365796c8dcSSimon Schubert       while (*inbytesleft >= 4)
1375796c8dcSSimon Schubert 	{
1385796c8dcSSimon Schubert 	  size_t j;
1395796c8dcSSimon Schubert 	  unsigned long c = 0;
1405796c8dcSSimon Schubert 
1415796c8dcSSimon Schubert 	  for (j = 0; j < 4; ++j)
1425796c8dcSSimon Schubert 	    {
1435796c8dcSSimon Schubert 	      c <<= 8;
1445796c8dcSSimon Schubert 	      c += (*inbuf)[j] & 0xff;
1455796c8dcSSimon Schubert 	    }
1465796c8dcSSimon Schubert 
1475796c8dcSSimon Schubert 	  if (c >= 256)
1485796c8dcSSimon Schubert 	    {
1495796c8dcSSimon Schubert 	      errno = EILSEQ;
1505796c8dcSSimon Schubert 	      return -1;
1515796c8dcSSimon Schubert 	    }
1525796c8dcSSimon Schubert 	  **outbuf = c & 0xff;
1535796c8dcSSimon Schubert 	  ++*outbuf;
1545796c8dcSSimon Schubert 	  --*outbytesleft;
1555796c8dcSSimon Schubert 
1565796c8dcSSimon Schubert 	  ++*inbuf;
1575796c8dcSSimon Schubert 	  *inbytesleft -= 4;
1585796c8dcSSimon Schubert 	}
1595796c8dcSSimon Schubert       if (*inbytesleft < 4)
1605796c8dcSSimon Schubert 	{
1615796c8dcSSimon Schubert 	  errno = EINVAL;
1625796c8dcSSimon Schubert 	  return -1;
1635796c8dcSSimon Schubert 	}
1645796c8dcSSimon Schubert     }
1655796c8dcSSimon Schubert   else
1665796c8dcSSimon Schubert     {
1675796c8dcSSimon Schubert       /* In all other cases we simply copy input bytes to the
1685796c8dcSSimon Schubert 	 output.  */
1695796c8dcSSimon Schubert       size_t amt = *inbytesleft;
170*cf7f2e2dSJohn Marino 
1715796c8dcSSimon Schubert       if (amt > *outbytesleft)
1725796c8dcSSimon Schubert 	amt = *outbytesleft;
1735796c8dcSSimon Schubert       memcpy (*outbuf, *inbuf, amt);
1745796c8dcSSimon Schubert       *inbuf += amt;
1755796c8dcSSimon Schubert       *outbuf += amt;
1765796c8dcSSimon Schubert       *inbytesleft -= amt;
1775796c8dcSSimon Schubert       *outbytesleft -= amt;
1785796c8dcSSimon Schubert     }
1795796c8dcSSimon Schubert 
1805796c8dcSSimon Schubert   if (*inbytesleft)
1815796c8dcSSimon Schubert     {
1825796c8dcSSimon Schubert       errno = E2BIG;
1835796c8dcSSimon Schubert       return -1;
1845796c8dcSSimon Schubert     }
1855796c8dcSSimon Schubert 
1865796c8dcSSimon Schubert   /* The number of non-reversible conversions -- but they were all
1875796c8dcSSimon Schubert      reversible.  */
1885796c8dcSSimon Schubert   return 0;
1895796c8dcSSimon Schubert }
1905796c8dcSSimon Schubert 
1915796c8dcSSimon Schubert #endif
1925796c8dcSSimon Schubert 
1935796c8dcSSimon Schubert 
1945796c8dcSSimon Schubert 
1955796c8dcSSimon Schubert /* The global lists of character sets and translations.  */
1965796c8dcSSimon Schubert 
1975796c8dcSSimon Schubert 
1985796c8dcSSimon Schubert #ifndef GDB_DEFAULT_TARGET_CHARSET
1995796c8dcSSimon Schubert #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
2005796c8dcSSimon Schubert #endif
2015796c8dcSSimon Schubert 
2025796c8dcSSimon Schubert #ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
203*cf7f2e2dSJohn Marino #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
2045796c8dcSSimon Schubert #endif
2055796c8dcSSimon Schubert 
2065796c8dcSSimon Schubert static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;
2075796c8dcSSimon Schubert static const char *host_charset_name = "auto";
2085796c8dcSSimon Schubert static void
2095796c8dcSSimon Schubert show_host_charset_name (struct ui_file *file, int from_tty,
2105796c8dcSSimon Schubert 			struct cmd_list_element *c,
2115796c8dcSSimon Schubert 			const char *value)
2125796c8dcSSimon Schubert {
2135796c8dcSSimon Schubert   if (!strcmp (value, "auto"))
2145796c8dcSSimon Schubert     fprintf_filtered (file,
2155796c8dcSSimon Schubert 		      _("The host character set is \"auto; currently %s\".\n"),
2165796c8dcSSimon Schubert 		      auto_host_charset_name);
2175796c8dcSSimon Schubert   else
2185796c8dcSSimon Schubert     fprintf_filtered (file, _("The host character set is \"%s\".\n"), value);
2195796c8dcSSimon Schubert }
2205796c8dcSSimon Schubert 
221*cf7f2e2dSJohn Marino static const char *target_charset_name = "auto";
2225796c8dcSSimon Schubert static void
2235796c8dcSSimon Schubert show_target_charset_name (struct ui_file *file, int from_tty,
2245796c8dcSSimon Schubert 			  struct cmd_list_element *c, const char *value)
2255796c8dcSSimon Schubert {
226*cf7f2e2dSJohn Marino   if (!strcmp (value, "auto"))
227*cf7f2e2dSJohn Marino     fprintf_filtered (file,
228*cf7f2e2dSJohn Marino 		      _("The target character set is \"auto; "
229*cf7f2e2dSJohn Marino 		        "currently %s\".\n"),
230*cf7f2e2dSJohn Marino 		      gdbarch_auto_charset (get_current_arch ()));
231*cf7f2e2dSJohn Marino   else
2325796c8dcSSimon Schubert     fprintf_filtered (file, _("The target character set is \"%s\".\n"),
2335796c8dcSSimon Schubert 		      value);
2345796c8dcSSimon Schubert }
2355796c8dcSSimon Schubert 
236*cf7f2e2dSJohn Marino static const char *target_wide_charset_name = "auto";
2375796c8dcSSimon Schubert static void
2385796c8dcSSimon Schubert show_target_wide_charset_name (struct ui_file *file, int from_tty,
2395796c8dcSSimon Schubert 			       struct cmd_list_element *c, const char *value)
2405796c8dcSSimon Schubert {
241*cf7f2e2dSJohn Marino   if (!strcmp (value, "auto"))
242*cf7f2e2dSJohn Marino     fprintf_filtered (file,
243*cf7f2e2dSJohn Marino 		      _("The target wide character set is \"auto; "
244*cf7f2e2dSJohn Marino 		        "currently %s\".\n"),
245*cf7f2e2dSJohn Marino 		      gdbarch_auto_wide_charset (get_current_arch ()));
246*cf7f2e2dSJohn Marino   else
2475796c8dcSSimon Schubert     fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
2485796c8dcSSimon Schubert 		      value);
2495796c8dcSSimon Schubert }
2505796c8dcSSimon Schubert 
2515796c8dcSSimon Schubert static const char *default_charset_names[] =
2525796c8dcSSimon Schubert {
2535796c8dcSSimon Schubert   DEFAULT_CHARSET_NAMES
2545796c8dcSSimon Schubert   0
2555796c8dcSSimon Schubert };
2565796c8dcSSimon Schubert 
2575796c8dcSSimon Schubert static const char **charset_enum;
2585796c8dcSSimon Schubert 
2595796c8dcSSimon Schubert 
2605796c8dcSSimon Schubert /* If the target wide character set has big- or little-endian
2615796c8dcSSimon Schubert    variants, these are the corresponding names.  */
2625796c8dcSSimon Schubert static const char *target_wide_charset_be_name;
2635796c8dcSSimon Schubert static const char *target_wide_charset_le_name;
2645796c8dcSSimon Schubert 
265*cf7f2e2dSJohn Marino /* The architecture for which the BE- and LE-names are valid.  */
266*cf7f2e2dSJohn Marino static struct gdbarch *be_le_arch;
267*cf7f2e2dSJohn Marino 
268*cf7f2e2dSJohn Marino /* A helper function which sets the target wide big- and little-endian
269*cf7f2e2dSJohn Marino    character set names, if possible.  */
2705796c8dcSSimon Schubert 
2715796c8dcSSimon Schubert static void
272*cf7f2e2dSJohn Marino set_be_le_names (struct gdbarch *gdbarch)
2735796c8dcSSimon Schubert {
2745796c8dcSSimon Schubert   int i, len;
275*cf7f2e2dSJohn Marino   const char *target_wide;
276*cf7f2e2dSJohn Marino 
277*cf7f2e2dSJohn Marino   if (be_le_arch == gdbarch)
278*cf7f2e2dSJohn Marino     return;
279*cf7f2e2dSJohn Marino   be_le_arch = gdbarch;
2805796c8dcSSimon Schubert 
2815796c8dcSSimon Schubert   target_wide_charset_le_name = NULL;
2825796c8dcSSimon Schubert   target_wide_charset_be_name = NULL;
2835796c8dcSSimon Schubert 
284*cf7f2e2dSJohn Marino   target_wide = target_wide_charset_name;
285*cf7f2e2dSJohn Marino   if (!strcmp (target_wide, "auto"))
286*cf7f2e2dSJohn Marino     target_wide = gdbarch_auto_wide_charset (gdbarch);
287*cf7f2e2dSJohn Marino 
288*cf7f2e2dSJohn Marino   len = strlen (target_wide);
2895796c8dcSSimon Schubert   for (i = 0; charset_enum[i]; ++i)
2905796c8dcSSimon Schubert     {
291*cf7f2e2dSJohn Marino       if (strncmp (target_wide, charset_enum[i], len))
2925796c8dcSSimon Schubert 	continue;
2935796c8dcSSimon Schubert       if ((charset_enum[i][len] == 'B'
2945796c8dcSSimon Schubert 	   || charset_enum[i][len] == 'L')
2955796c8dcSSimon Schubert 	  && charset_enum[i][len + 1] == 'E'
2965796c8dcSSimon Schubert 	  && charset_enum[i][len + 2] == '\0')
2975796c8dcSSimon Schubert 	{
2985796c8dcSSimon Schubert 	  if (charset_enum[i][len] == 'B')
2995796c8dcSSimon Schubert 	    target_wide_charset_be_name = charset_enum[i];
3005796c8dcSSimon Schubert 	  else
3015796c8dcSSimon Schubert 	    target_wide_charset_le_name = charset_enum[i];
3025796c8dcSSimon Schubert 	}
3035796c8dcSSimon Schubert     }
3045796c8dcSSimon Schubert }
3055796c8dcSSimon Schubert 
3065796c8dcSSimon Schubert /* 'Set charset', 'set host-charset', 'set target-charset', 'set
3075796c8dcSSimon Schubert    target-wide-charset', 'set charset' sfunc's.  */
3085796c8dcSSimon Schubert 
3095796c8dcSSimon Schubert static void
310*cf7f2e2dSJohn Marino validate (struct gdbarch *gdbarch)
3115796c8dcSSimon Schubert {
3125796c8dcSSimon Schubert   iconv_t desc;
3135796c8dcSSimon Schubert   const char *host_cset = host_charset ();
314*cf7f2e2dSJohn Marino   const char *target_cset = target_charset (gdbarch);
315*cf7f2e2dSJohn Marino   const char *target_wide_cset = target_wide_charset_name;
3165796c8dcSSimon Schubert 
317*cf7f2e2dSJohn Marino   if (!strcmp (target_wide_cset, "auto"))
318*cf7f2e2dSJohn Marino     target_wide_cset = gdbarch_auto_wide_charset (gdbarch);
319*cf7f2e2dSJohn Marino 
320*cf7f2e2dSJohn Marino   desc = iconv_open (target_wide_cset, host_cset);
3215796c8dcSSimon Schubert   if (desc == (iconv_t) -1)
3225796c8dcSSimon Schubert     error ("Cannot convert between character sets `%s' and `%s'",
323*cf7f2e2dSJohn Marino 	   target_wide_cset, host_cset);
3245796c8dcSSimon Schubert   iconv_close (desc);
3255796c8dcSSimon Schubert 
326*cf7f2e2dSJohn Marino   desc = iconv_open (target_cset, host_cset);
3275796c8dcSSimon Schubert   if (desc == (iconv_t) -1)
3285796c8dcSSimon Schubert     error ("Cannot convert between character sets `%s' and `%s'",
329*cf7f2e2dSJohn Marino 	   target_cset, host_cset);
3305796c8dcSSimon Schubert   iconv_close (desc);
3315796c8dcSSimon Schubert 
332*cf7f2e2dSJohn Marino   /* Clear the cache.  */
333*cf7f2e2dSJohn Marino   be_le_arch = NULL;
3345796c8dcSSimon Schubert }
3355796c8dcSSimon Schubert 
3365796c8dcSSimon Schubert /* This is the sfunc for the 'set charset' command.  */
3375796c8dcSSimon Schubert static void
3385796c8dcSSimon Schubert set_charset_sfunc (char *charset, int from_tty, struct cmd_list_element *c)
3395796c8dcSSimon Schubert {
3405796c8dcSSimon Schubert   /* CAREFUL: set the target charset here as well. */
3415796c8dcSSimon Schubert   target_charset_name = host_charset_name;
342*cf7f2e2dSJohn Marino   validate (get_current_arch ());
3435796c8dcSSimon Schubert }
3445796c8dcSSimon Schubert 
3455796c8dcSSimon Schubert /* 'set host-charset' command sfunc.  We need a wrapper here because
3465796c8dcSSimon Schubert    the function needs to have a specific signature.  */
3475796c8dcSSimon Schubert static void
3485796c8dcSSimon Schubert set_host_charset_sfunc (char *charset, int from_tty,
3495796c8dcSSimon Schubert 			struct cmd_list_element *c)
3505796c8dcSSimon Schubert {
351*cf7f2e2dSJohn Marino   validate (get_current_arch ());
3525796c8dcSSimon Schubert }
3535796c8dcSSimon Schubert 
3545796c8dcSSimon Schubert /* Wrapper for the 'set target-charset' command.  */
3555796c8dcSSimon Schubert static void
3565796c8dcSSimon Schubert set_target_charset_sfunc (char *charset, int from_tty,
3575796c8dcSSimon Schubert 			  struct cmd_list_element *c)
3585796c8dcSSimon Schubert {
359*cf7f2e2dSJohn Marino   validate (get_current_arch ());
3605796c8dcSSimon Schubert }
3615796c8dcSSimon Schubert 
3625796c8dcSSimon Schubert /* Wrapper for the 'set target-wide-charset' command.  */
3635796c8dcSSimon Schubert static void
3645796c8dcSSimon Schubert set_target_wide_charset_sfunc (char *charset, int from_tty,
3655796c8dcSSimon Schubert 			       struct cmd_list_element *c)
3665796c8dcSSimon Schubert {
367*cf7f2e2dSJohn Marino   validate (get_current_arch ());
3685796c8dcSSimon Schubert }
3695796c8dcSSimon Schubert 
3705796c8dcSSimon Schubert /* sfunc for the 'show charset' command.  */
3715796c8dcSSimon Schubert static void
3725796c8dcSSimon Schubert show_charset (struct ui_file *file, int from_tty, struct cmd_list_element *c,
3735796c8dcSSimon Schubert 	      const char *name)
3745796c8dcSSimon Schubert {
3755796c8dcSSimon Schubert   show_host_charset_name (file, from_tty, c, host_charset_name);
3765796c8dcSSimon Schubert   show_target_charset_name (file, from_tty, c, target_charset_name);
3775796c8dcSSimon Schubert   show_target_wide_charset_name (file, from_tty, c, target_wide_charset_name);
3785796c8dcSSimon Schubert }
3795796c8dcSSimon Schubert 
3805796c8dcSSimon Schubert 
3815796c8dcSSimon Schubert /* Accessor functions.  */
3825796c8dcSSimon Schubert 
3835796c8dcSSimon Schubert const char *
3845796c8dcSSimon Schubert host_charset (void)
3855796c8dcSSimon Schubert {
3865796c8dcSSimon Schubert   if (!strcmp (host_charset_name, "auto"))
3875796c8dcSSimon Schubert     return auto_host_charset_name;
3885796c8dcSSimon Schubert   return host_charset_name;
3895796c8dcSSimon Schubert }
3905796c8dcSSimon Schubert 
3915796c8dcSSimon Schubert const char *
392*cf7f2e2dSJohn Marino target_charset (struct gdbarch *gdbarch)
3935796c8dcSSimon Schubert {
394*cf7f2e2dSJohn Marino   if (!strcmp (target_charset_name, "auto"))
395*cf7f2e2dSJohn Marino     return gdbarch_auto_charset (gdbarch);
3965796c8dcSSimon Schubert   return target_charset_name;
3975796c8dcSSimon Schubert }
3985796c8dcSSimon Schubert 
3995796c8dcSSimon Schubert const char *
400*cf7f2e2dSJohn Marino target_wide_charset (struct gdbarch *gdbarch)
4015796c8dcSSimon Schubert {
402*cf7f2e2dSJohn Marino   enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
403*cf7f2e2dSJohn Marino 
404*cf7f2e2dSJohn Marino   set_be_le_names (gdbarch);
4055796c8dcSSimon Schubert   if (byte_order == BFD_ENDIAN_BIG)
4065796c8dcSSimon Schubert     {
4075796c8dcSSimon Schubert       if (target_wide_charset_be_name)
4085796c8dcSSimon Schubert 	return target_wide_charset_be_name;
4095796c8dcSSimon Schubert     }
4105796c8dcSSimon Schubert   else
4115796c8dcSSimon Schubert     {
4125796c8dcSSimon Schubert       if (target_wide_charset_le_name)
4135796c8dcSSimon Schubert 	return target_wide_charset_le_name;
4145796c8dcSSimon Schubert     }
4155796c8dcSSimon Schubert 
416*cf7f2e2dSJohn Marino   if (!strcmp (target_wide_charset_name, "auto"))
417*cf7f2e2dSJohn Marino     return gdbarch_auto_wide_charset (gdbarch);
418*cf7f2e2dSJohn Marino 
4195796c8dcSSimon Schubert   return target_wide_charset_name;
4205796c8dcSSimon Schubert }
4215796c8dcSSimon Schubert 
4225796c8dcSSimon Schubert 
4235796c8dcSSimon Schubert /* Host character set management.  For the time being, we assume that
4245796c8dcSSimon Schubert    the host character set is some superset of ASCII.  */
4255796c8dcSSimon Schubert 
4265796c8dcSSimon Schubert char
4275796c8dcSSimon Schubert host_letter_to_control_character (char c)
4285796c8dcSSimon Schubert {
4295796c8dcSSimon Schubert   if (c == '?')
4305796c8dcSSimon Schubert     return 0177;
4315796c8dcSSimon Schubert   return c & 0237;
4325796c8dcSSimon Schubert }
4335796c8dcSSimon Schubert 
4345796c8dcSSimon Schubert /* Convert a host character, C, to its hex value.  C must already have
4355796c8dcSSimon Schubert    been validated using isxdigit.  */
4365796c8dcSSimon Schubert 
4375796c8dcSSimon Schubert int
4385796c8dcSSimon Schubert host_hex_value (char c)
4395796c8dcSSimon Schubert {
4405796c8dcSSimon Schubert   if (isdigit (c))
4415796c8dcSSimon Schubert     return c - '0';
4425796c8dcSSimon Schubert   if (c >= 'a' && c <= 'f')
4435796c8dcSSimon Schubert     return 10 + c - 'a';
4445796c8dcSSimon Schubert   gdb_assert (c >= 'A' && c <= 'F');
4455796c8dcSSimon Schubert   return 10 + c - 'A';
4465796c8dcSSimon Schubert }
4475796c8dcSSimon Schubert 
4485796c8dcSSimon Schubert 
4495796c8dcSSimon Schubert /* Public character management functions.  */
4505796c8dcSSimon Schubert 
4515796c8dcSSimon Schubert /* A cleanup function which is run to close an iconv descriptor.  */
4525796c8dcSSimon Schubert 
4535796c8dcSSimon Schubert static void
4545796c8dcSSimon Schubert cleanup_iconv (void *p)
4555796c8dcSSimon Schubert {
4565796c8dcSSimon Schubert   iconv_t *descp = p;
4575796c8dcSSimon Schubert   iconv_close (*descp);
4585796c8dcSSimon Schubert }
4595796c8dcSSimon Schubert 
4605796c8dcSSimon Schubert void
4615796c8dcSSimon Schubert convert_between_encodings (const char *from, const char *to,
4625796c8dcSSimon Schubert 			   const gdb_byte *bytes, unsigned int num_bytes,
4635796c8dcSSimon Schubert 			   int width, struct obstack *output,
4645796c8dcSSimon Schubert 			   enum transliterations translit)
4655796c8dcSSimon Schubert {
4665796c8dcSSimon Schubert   iconv_t desc;
4675796c8dcSSimon Schubert   struct cleanup *cleanups;
4685796c8dcSSimon Schubert   size_t inleft;
4695796c8dcSSimon Schubert   char *inp;
4705796c8dcSSimon Schubert   unsigned int space_request;
4715796c8dcSSimon Schubert 
4725796c8dcSSimon Schubert   /* Often, the host and target charsets will be the same.  */
4735796c8dcSSimon Schubert   if (!strcmp (from, to))
4745796c8dcSSimon Schubert     {
4755796c8dcSSimon Schubert       obstack_grow (output, bytes, num_bytes);
4765796c8dcSSimon Schubert       return;
4775796c8dcSSimon Schubert     }
4785796c8dcSSimon Schubert 
4795796c8dcSSimon Schubert   desc = iconv_open (to, from);
4805796c8dcSSimon Schubert   if (desc == (iconv_t) -1)
4815796c8dcSSimon Schubert     perror_with_name ("Converting character sets");
4825796c8dcSSimon Schubert   cleanups = make_cleanup (cleanup_iconv, &desc);
4835796c8dcSSimon Schubert 
4845796c8dcSSimon Schubert   inleft = num_bytes;
4855796c8dcSSimon Schubert   inp = (char *) bytes;
4865796c8dcSSimon Schubert 
4875796c8dcSSimon Schubert   space_request = num_bytes;
4885796c8dcSSimon Schubert 
4895796c8dcSSimon Schubert   while (inleft > 0)
4905796c8dcSSimon Schubert     {
4915796c8dcSSimon Schubert       char *outp;
4925796c8dcSSimon Schubert       size_t outleft, r;
4935796c8dcSSimon Schubert       int old_size;
4945796c8dcSSimon Schubert 
4955796c8dcSSimon Schubert       old_size = obstack_object_size (output);
4965796c8dcSSimon Schubert       obstack_blank (output, space_request);
4975796c8dcSSimon Schubert 
4985796c8dcSSimon Schubert       outp = obstack_base (output) + old_size;
4995796c8dcSSimon Schubert       outleft = space_request;
5005796c8dcSSimon Schubert 
5015796c8dcSSimon Schubert       r = iconv (desc, (ICONV_CONST char **) &inp, &inleft, &outp, &outleft);
5025796c8dcSSimon Schubert 
5035796c8dcSSimon Schubert       /* Now make sure that the object on the obstack only includes
5045796c8dcSSimon Schubert 	 bytes we have converted.  */
5055796c8dcSSimon Schubert       obstack_blank (output, - (int) outleft);
5065796c8dcSSimon Schubert 
5075796c8dcSSimon Schubert       if (r == (size_t) -1)
5085796c8dcSSimon Schubert 	{
5095796c8dcSSimon Schubert 	  switch (errno)
5105796c8dcSSimon Schubert 	    {
5115796c8dcSSimon Schubert 	    case EILSEQ:
5125796c8dcSSimon Schubert 	      {
5135796c8dcSSimon Schubert 		int i;
5145796c8dcSSimon Schubert 
5155796c8dcSSimon Schubert 		/* Invalid input sequence.  */
5165796c8dcSSimon Schubert 		if (translit == translit_none)
5175796c8dcSSimon Schubert 		  error (_("Could not convert character to `%s' character set"),
5185796c8dcSSimon Schubert 			 to);
5195796c8dcSSimon Schubert 
5205796c8dcSSimon Schubert 		/* We emit escape sequence for the bytes, skip them,
5215796c8dcSSimon Schubert 		   and try again.  */
5225796c8dcSSimon Schubert 		for (i = 0; i < width; ++i)
5235796c8dcSSimon Schubert 		  {
5245796c8dcSSimon Schubert 		    char octal[5];
5255796c8dcSSimon Schubert 
5265796c8dcSSimon Schubert 		    sprintf (octal, "\\%.3o", *inp & 0xff);
5275796c8dcSSimon Schubert 		    obstack_grow_str (output, octal);
5285796c8dcSSimon Schubert 
5295796c8dcSSimon Schubert 		    ++inp;
5305796c8dcSSimon Schubert 		    --inleft;
5315796c8dcSSimon Schubert 		  }
5325796c8dcSSimon Schubert 	      }
5335796c8dcSSimon Schubert 	      break;
5345796c8dcSSimon Schubert 
5355796c8dcSSimon Schubert 	    case E2BIG:
5365796c8dcSSimon Schubert 	      /* We ran out of space in the output buffer.  Make it
5375796c8dcSSimon Schubert 		 bigger next time around.  */
5385796c8dcSSimon Schubert 	      space_request *= 2;
5395796c8dcSSimon Schubert 	      break;
5405796c8dcSSimon Schubert 
5415796c8dcSSimon Schubert 	    case EINVAL:
5425796c8dcSSimon Schubert 	      /* Incomplete input sequence.  FIXME: ought to report this
5435796c8dcSSimon Schubert 		 to the caller somehow.  */
5445796c8dcSSimon Schubert 	      inleft = 0;
5455796c8dcSSimon Schubert 	      break;
5465796c8dcSSimon Schubert 
5475796c8dcSSimon Schubert 	    default:
5485796c8dcSSimon Schubert 	      perror_with_name ("Internal error while converting character sets");
5495796c8dcSSimon Schubert 	    }
5505796c8dcSSimon Schubert 	}
5515796c8dcSSimon Schubert     }
5525796c8dcSSimon Schubert 
5535796c8dcSSimon Schubert   do_cleanups (cleanups);
5545796c8dcSSimon Schubert }
5555796c8dcSSimon Schubert 
5565796c8dcSSimon Schubert 
5575796c8dcSSimon Schubert 
5585796c8dcSSimon Schubert /* An iterator that returns host wchar_t's from a target string.  */
5595796c8dcSSimon Schubert struct wchar_iterator
5605796c8dcSSimon Schubert {
5615796c8dcSSimon Schubert   /* The underlying iconv descriptor.  */
5625796c8dcSSimon Schubert   iconv_t desc;
5635796c8dcSSimon Schubert 
5645796c8dcSSimon Schubert   /* The input string.  This is updated as convert characters.  */
5655796c8dcSSimon Schubert   char *input;
5665796c8dcSSimon Schubert   /* The number of bytes remaining in the input.  */
5675796c8dcSSimon Schubert   size_t bytes;
5685796c8dcSSimon Schubert 
5695796c8dcSSimon Schubert   /* The width of an input character.  */
5705796c8dcSSimon Schubert   size_t width;
5715796c8dcSSimon Schubert 
5725796c8dcSSimon Schubert   /* The output buffer and its size.  */
5735796c8dcSSimon Schubert   gdb_wchar_t *out;
5745796c8dcSSimon Schubert   size_t out_size;
5755796c8dcSSimon Schubert };
5765796c8dcSSimon Schubert 
5775796c8dcSSimon Schubert /* Create a new iterator.  */
5785796c8dcSSimon Schubert struct wchar_iterator *
5795796c8dcSSimon Schubert make_wchar_iterator (const gdb_byte *input, size_t bytes, const char *charset,
5805796c8dcSSimon Schubert 		     size_t width)
5815796c8dcSSimon Schubert {
5825796c8dcSSimon Schubert   struct wchar_iterator *result;
5835796c8dcSSimon Schubert   iconv_t desc;
5845796c8dcSSimon Schubert 
5855796c8dcSSimon Schubert   desc = iconv_open (INTERMEDIATE_ENCODING, charset);
5865796c8dcSSimon Schubert   if (desc == (iconv_t) -1)
5875796c8dcSSimon Schubert     perror_with_name ("Converting character sets");
5885796c8dcSSimon Schubert 
5895796c8dcSSimon Schubert   result = XNEW (struct wchar_iterator);
5905796c8dcSSimon Schubert   result->desc = desc;
5915796c8dcSSimon Schubert   result->input = (char *) input;
5925796c8dcSSimon Schubert   result->bytes = bytes;
5935796c8dcSSimon Schubert   result->width = width;
5945796c8dcSSimon Schubert 
5955796c8dcSSimon Schubert   result->out = XNEW (gdb_wchar_t);
5965796c8dcSSimon Schubert   result->out_size = 1;
5975796c8dcSSimon Schubert 
5985796c8dcSSimon Schubert   return result;
5995796c8dcSSimon Schubert }
6005796c8dcSSimon Schubert 
6015796c8dcSSimon Schubert static void
6025796c8dcSSimon Schubert do_cleanup_iterator (void *p)
6035796c8dcSSimon Schubert {
6045796c8dcSSimon Schubert   struct wchar_iterator *iter = p;
6055796c8dcSSimon Schubert 
6065796c8dcSSimon Schubert   iconv_close (iter->desc);
6075796c8dcSSimon Schubert   xfree (iter->out);
6085796c8dcSSimon Schubert   xfree (iter);
6095796c8dcSSimon Schubert }
6105796c8dcSSimon Schubert 
6115796c8dcSSimon Schubert struct cleanup *
6125796c8dcSSimon Schubert make_cleanup_wchar_iterator (struct wchar_iterator *iter)
6135796c8dcSSimon Schubert {
6145796c8dcSSimon Schubert   return make_cleanup (do_cleanup_iterator, iter);
6155796c8dcSSimon Schubert }
6165796c8dcSSimon Schubert 
6175796c8dcSSimon Schubert int
6185796c8dcSSimon Schubert wchar_iterate (struct wchar_iterator *iter,
6195796c8dcSSimon Schubert 	       enum wchar_iterate_result *out_result,
6205796c8dcSSimon Schubert 	       gdb_wchar_t **out_chars,
6215796c8dcSSimon Schubert 	       const gdb_byte **ptr,
6225796c8dcSSimon Schubert 	       size_t *len)
6235796c8dcSSimon Schubert {
6245796c8dcSSimon Schubert   size_t out_request;
6255796c8dcSSimon Schubert 
6265796c8dcSSimon Schubert   /* Try to convert some characters.  At first we try to convert just
6275796c8dcSSimon Schubert      a single character.  The reason for this is that iconv does not
6285796c8dcSSimon Schubert      necessarily update its outgoing arguments when it encounters an
6295796c8dcSSimon Schubert      invalid input sequence -- but we want to reliably report this to
6305796c8dcSSimon Schubert      our caller so it can emit an escape sequence.  */
6315796c8dcSSimon Schubert   out_request = 1;
6325796c8dcSSimon Schubert   while (iter->bytes > 0)
6335796c8dcSSimon Schubert     {
6345796c8dcSSimon Schubert       char *outptr = (char *) &iter->out[0];
6355796c8dcSSimon Schubert       char *orig_inptr = iter->input;
6365796c8dcSSimon Schubert       size_t orig_in = iter->bytes;
6375796c8dcSSimon Schubert       size_t out_avail = out_request * sizeof (gdb_wchar_t);
6385796c8dcSSimon Schubert       size_t num;
6395796c8dcSSimon Schubert       size_t r = iconv (iter->desc,
6405796c8dcSSimon Schubert 			(ICONV_CONST char **) &iter->input, &iter->bytes,
6415796c8dcSSimon Schubert 			&outptr, &out_avail);
642*cf7f2e2dSJohn Marino 
6435796c8dcSSimon Schubert       if (r == (size_t) -1)
6445796c8dcSSimon Schubert 	{
6455796c8dcSSimon Schubert 	  switch (errno)
6465796c8dcSSimon Schubert 	    {
6475796c8dcSSimon Schubert 	    case EILSEQ:
6485796c8dcSSimon Schubert 	      /* Invalid input sequence.  Skip it, and let the caller
6495796c8dcSSimon Schubert 		 know about it.  */
6505796c8dcSSimon Schubert 	      *out_result = wchar_iterate_invalid;
6515796c8dcSSimon Schubert 	      *ptr = iter->input;
6525796c8dcSSimon Schubert 	      *len = iter->width;
6535796c8dcSSimon Schubert 	      iter->input += iter->width;
6545796c8dcSSimon Schubert 	      iter->bytes -= iter->width;
6555796c8dcSSimon Schubert 	      return 0;
6565796c8dcSSimon Schubert 
6575796c8dcSSimon Schubert 	    case E2BIG:
6585796c8dcSSimon Schubert 	      /* We ran out of space.  We still might have converted a
6595796c8dcSSimon Schubert 		 character; if so, return it.  Otherwise, grow the
6605796c8dcSSimon Schubert 		 buffer and try again.  */
6615796c8dcSSimon Schubert 	      if (out_avail < out_request * sizeof (gdb_wchar_t))
6625796c8dcSSimon Schubert 		break;
6635796c8dcSSimon Schubert 
6645796c8dcSSimon Schubert 	      ++out_request;
6655796c8dcSSimon Schubert 	      if (out_request > iter->out_size)
6665796c8dcSSimon Schubert 		{
6675796c8dcSSimon Schubert 		  iter->out_size = out_request;
6685796c8dcSSimon Schubert 		  iter->out = xrealloc (iter->out,
6695796c8dcSSimon Schubert 					out_request * sizeof (gdb_wchar_t));
6705796c8dcSSimon Schubert 		}
6715796c8dcSSimon Schubert 	      continue;
6725796c8dcSSimon Schubert 
6735796c8dcSSimon Schubert 	    case EINVAL:
6745796c8dcSSimon Schubert 	      /* Incomplete input sequence.  Let the caller know, and
6755796c8dcSSimon Schubert 		 arrange for future calls to see EOF.  */
6765796c8dcSSimon Schubert 	      *out_result = wchar_iterate_incomplete;
6775796c8dcSSimon Schubert 	      *ptr = iter->input;
6785796c8dcSSimon Schubert 	      *len = iter->bytes;
6795796c8dcSSimon Schubert 	      iter->bytes = 0;
6805796c8dcSSimon Schubert 	      return 0;
6815796c8dcSSimon Schubert 
6825796c8dcSSimon Schubert 	    default:
6835796c8dcSSimon Schubert 	      perror_with_name ("Internal error while converting character sets");
6845796c8dcSSimon Schubert 	    }
6855796c8dcSSimon Schubert 	}
6865796c8dcSSimon Schubert 
6875796c8dcSSimon Schubert       /* We converted something.  */
6885796c8dcSSimon Schubert       num = out_request - out_avail / sizeof (gdb_wchar_t);
6895796c8dcSSimon Schubert       *out_result = wchar_iterate_ok;
6905796c8dcSSimon Schubert       *out_chars = iter->out;
6915796c8dcSSimon Schubert       *ptr = orig_inptr;
6925796c8dcSSimon Schubert       *len = orig_in - iter->bytes;
6935796c8dcSSimon Schubert       return num;
6945796c8dcSSimon Schubert     }
6955796c8dcSSimon Schubert 
6965796c8dcSSimon Schubert   /* Really done.  */
6975796c8dcSSimon Schubert   *out_result = wchar_iterate_eof;
6985796c8dcSSimon Schubert   return -1;
6995796c8dcSSimon Schubert }
7005796c8dcSSimon Schubert 
7015796c8dcSSimon Schubert 
7025796c8dcSSimon Schubert /* The charset.c module initialization function.  */
7035796c8dcSSimon Schubert 
7045796c8dcSSimon Schubert extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */
7055796c8dcSSimon Schubert 
7065796c8dcSSimon Schubert DEF_VEC_P (char_ptr);
7075796c8dcSSimon Schubert 
7085796c8dcSSimon Schubert static VEC (char_ptr) *charsets;
7095796c8dcSSimon Schubert 
7105796c8dcSSimon Schubert #ifdef PHONY_ICONV
7115796c8dcSSimon Schubert 
7125796c8dcSSimon Schubert static void
7135796c8dcSSimon Schubert find_charset_names (void)
7145796c8dcSSimon Schubert {
7155796c8dcSSimon Schubert   VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET);
7165796c8dcSSimon Schubert   VEC_safe_push (char_ptr, charsets, NULL);
7175796c8dcSSimon Schubert }
7185796c8dcSSimon Schubert 
7195796c8dcSSimon Schubert #else /* PHONY_ICONV */
7205796c8dcSSimon Schubert 
7215796c8dcSSimon Schubert /* Sometimes, libiconv redefines iconvlist as libiconvlist -- but
7225796c8dcSSimon Schubert    provides different symbols in the static and dynamic libraries.
7235796c8dcSSimon Schubert    So, configure may see libiconvlist but not iconvlist.  But, calling
7245796c8dcSSimon Schubert    iconvlist is the right thing to do and will work.  Hence we do a
7255796c8dcSSimon Schubert    check here but unconditionally call iconvlist below.  */
7265796c8dcSSimon Schubert #if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST)
7275796c8dcSSimon Schubert 
7285796c8dcSSimon Schubert /* A helper function that adds some character sets to the vector of
7295796c8dcSSimon Schubert    all character sets.  This is a callback function for iconvlist.  */
7305796c8dcSSimon Schubert 
7315796c8dcSSimon Schubert static int
7325796c8dcSSimon Schubert add_one (unsigned int count, const char *const *names, void *data)
7335796c8dcSSimon Schubert {
7345796c8dcSSimon Schubert   unsigned int i;
7355796c8dcSSimon Schubert 
7365796c8dcSSimon Schubert   for (i = 0; i < count; ++i)
7375796c8dcSSimon Schubert     VEC_safe_push (char_ptr, charsets, xstrdup (names[i]));
7385796c8dcSSimon Schubert 
7395796c8dcSSimon Schubert   return 0;
7405796c8dcSSimon Schubert }
7415796c8dcSSimon Schubert 
7425796c8dcSSimon Schubert static void
7435796c8dcSSimon Schubert find_charset_names (void)
7445796c8dcSSimon Schubert {
7455796c8dcSSimon Schubert   iconvlist (add_one, NULL);
7465796c8dcSSimon Schubert   VEC_safe_push (char_ptr, charsets, NULL);
7475796c8dcSSimon Schubert }
7485796c8dcSSimon Schubert 
7495796c8dcSSimon Schubert #else
7505796c8dcSSimon Schubert 
751*cf7f2e2dSJohn Marino /* Return non-zero if LINE (output from iconv) should be ignored.
752*cf7f2e2dSJohn Marino    Older iconv programs (e.g. 2.2.2) include the human readable
753*cf7f2e2dSJohn Marino    introduction even when stdout is not a tty.  Newer versions omit
754*cf7f2e2dSJohn Marino    the intro if stdout is not a tty.  */
755*cf7f2e2dSJohn Marino 
756*cf7f2e2dSJohn Marino static int
757*cf7f2e2dSJohn Marino ignore_line_p (const char *line)
758*cf7f2e2dSJohn Marino {
759*cf7f2e2dSJohn Marino   /* This table is used to filter the output.  If this text appears
760*cf7f2e2dSJohn Marino      anywhere in the line, it is ignored (strstr is used).  */
761*cf7f2e2dSJohn Marino   static const char * const ignore_lines[] =
762*cf7f2e2dSJohn Marino     {
763*cf7f2e2dSJohn Marino       "The following",
764*cf7f2e2dSJohn Marino       "not necessarily",
765*cf7f2e2dSJohn Marino       "the FROM and TO",
766*cf7f2e2dSJohn Marino       "listed with several",
767*cf7f2e2dSJohn Marino       NULL
768*cf7f2e2dSJohn Marino     };
769*cf7f2e2dSJohn Marino   int i;
770*cf7f2e2dSJohn Marino 
771*cf7f2e2dSJohn Marino   for (i = 0; ignore_lines[i] != NULL; ++i)
772*cf7f2e2dSJohn Marino     {
773*cf7f2e2dSJohn Marino       if (strstr (line, ignore_lines[i]) != NULL)
774*cf7f2e2dSJohn Marino 	return 1;
775*cf7f2e2dSJohn Marino     }
776*cf7f2e2dSJohn Marino 
777*cf7f2e2dSJohn Marino   return 0;
778*cf7f2e2dSJohn Marino }
779*cf7f2e2dSJohn Marino 
7805796c8dcSSimon Schubert static void
7815796c8dcSSimon Schubert find_charset_names (void)
7825796c8dcSSimon Schubert {
7835796c8dcSSimon Schubert   struct pex_obj *child;
7845796c8dcSSimon Schubert   char *args[3];
7855796c8dcSSimon Schubert   int err, status;
7865796c8dcSSimon Schubert   int fail = 1;
787*cf7f2e2dSJohn Marino   struct gdb_environ *iconv_env;
7885796c8dcSSimon Schubert 
789*cf7f2e2dSJohn Marino   /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is not
790*cf7f2e2dSJohn Marino      a tty.  We need to recognize it and ignore it.  This text is subject
791*cf7f2e2dSJohn Marino      to translation, so force LANGUAGE=C.  */
792*cf7f2e2dSJohn Marino   iconv_env = make_environ ();
793*cf7f2e2dSJohn Marino   init_environ (iconv_env);
794*cf7f2e2dSJohn Marino   set_in_environ (iconv_env, "LANGUAGE", "C");
795*cf7f2e2dSJohn Marino   set_in_environ (iconv_env, "LC_ALL", "C");
796*cf7f2e2dSJohn Marino 
797*cf7f2e2dSJohn Marino   child = pex_init (PEX_USE_PIPES, "iconv", NULL);
7985796c8dcSSimon Schubert 
7995796c8dcSSimon Schubert   args[0] = "iconv";
8005796c8dcSSimon Schubert   args[1] = "-l";
8015796c8dcSSimon Schubert   args[2] = NULL;
8025796c8dcSSimon Schubert   /* Note that we simply ignore errors here.  */
803*cf7f2e2dSJohn Marino   if (!pex_run_in_environment (child, PEX_SEARCH | PEX_STDERR_TO_STDOUT,
804*cf7f2e2dSJohn Marino 			       "iconv", args, environ_vector (iconv_env),
805*cf7f2e2dSJohn Marino 			       NULL, NULL, &err))
8065796c8dcSSimon Schubert     {
8075796c8dcSSimon Schubert       FILE *in = pex_read_output (child, 0);
8085796c8dcSSimon Schubert 
8095796c8dcSSimon Schubert       /* POSIX says that iconv -l uses an unspecified format.  We
8105796c8dcSSimon Schubert 	 parse the glibc and libiconv formats; feel free to add others
8115796c8dcSSimon Schubert 	 as needed.  */
812*cf7f2e2dSJohn Marino 
8135796c8dcSSimon Schubert       while (!feof (in))
8145796c8dcSSimon Schubert 	{
8155796c8dcSSimon Schubert 	  /* The size of buf is chosen arbitrarily.  */
8165796c8dcSSimon Schubert 	  char buf[1024];
8175796c8dcSSimon Schubert 	  char *start, *r;
818*cf7f2e2dSJohn Marino 	  int len;
8195796c8dcSSimon Schubert 
8205796c8dcSSimon Schubert 	  r = fgets (buf, sizeof (buf), in);
8215796c8dcSSimon Schubert 	  if (!r)
8225796c8dcSSimon Schubert 	    break;
8235796c8dcSSimon Schubert 	  len = strlen (r);
8245796c8dcSSimon Schubert 	  if (len <= 3)
8255796c8dcSSimon Schubert 	    continue;
826*cf7f2e2dSJohn Marino 	  if (ignore_line_p (r))
827*cf7f2e2dSJohn Marino 	    continue;
828*cf7f2e2dSJohn Marino 
8295796c8dcSSimon Schubert 	  /* Strip off the newline.  */
8305796c8dcSSimon Schubert 	  --len;
8315796c8dcSSimon Schubert 	  /* Strip off one or two '/'s.  glibc will print lines like
8325796c8dcSSimon Schubert 	     "8859_7//", but also "10646-1:1993/UCS4/".  */
8335796c8dcSSimon Schubert 	  if (buf[len - 1] == '/')
8345796c8dcSSimon Schubert 	    --len;
8355796c8dcSSimon Schubert 	  if (buf[len - 1] == '/')
8365796c8dcSSimon Schubert 	    --len;
8375796c8dcSSimon Schubert 	  buf[len] = '\0';
8385796c8dcSSimon Schubert 
8395796c8dcSSimon Schubert 	  /* libiconv will print multiple entries per line, separated
840*cf7f2e2dSJohn Marino 	     by spaces.  Older iconvs will print multiple entries per line,
841*cf7f2e2dSJohn Marino 	     indented by two spaces, and separated by ", "
842*cf7f2e2dSJohn Marino 	     (i.e. the human readable form).  */
8435796c8dcSSimon Schubert 	  start = buf;
8445796c8dcSSimon Schubert 	  while (1)
8455796c8dcSSimon Schubert 	    {
8465796c8dcSSimon Schubert 	      int keep_going;
8475796c8dcSSimon Schubert 	      char *p;
8485796c8dcSSimon Schubert 
849*cf7f2e2dSJohn Marino 	      /* Skip leading blanks.  */
850*cf7f2e2dSJohn Marino 	      for (p = start; *p && *p == ' '; ++p)
851*cf7f2e2dSJohn Marino 		;
852*cf7f2e2dSJohn Marino 	      start = p;
853*cf7f2e2dSJohn Marino 	      /* Find the next space, comma, or end-of-line.  */
854*cf7f2e2dSJohn Marino 	      for ( ; *p && *p != ' ' && *p != ','; ++p)
8555796c8dcSSimon Schubert 		;
8565796c8dcSSimon Schubert 	      /* Ignore an empty result.  */
8575796c8dcSSimon Schubert 	      if (p == start)
8585796c8dcSSimon Schubert 		break;
8595796c8dcSSimon Schubert 	      keep_going = *p;
8605796c8dcSSimon Schubert 	      *p = '\0';
8615796c8dcSSimon Schubert 	      VEC_safe_push (char_ptr, charsets, xstrdup (start));
8625796c8dcSSimon Schubert 	      if (!keep_going)
8635796c8dcSSimon Schubert 		break;
8645796c8dcSSimon Schubert 	      /* Skip any extra spaces.  */
8655796c8dcSSimon Schubert 	      for (start = p + 1; *start && *start == ' '; ++start)
8665796c8dcSSimon Schubert 		;
8675796c8dcSSimon Schubert 	    }
8685796c8dcSSimon Schubert 	}
8695796c8dcSSimon Schubert 
8705796c8dcSSimon Schubert       if (pex_get_status (child, 1, &status)
8715796c8dcSSimon Schubert 	  && WIFEXITED (status) && !WEXITSTATUS (status))
8725796c8dcSSimon Schubert 	fail = 0;
8735796c8dcSSimon Schubert 
8745796c8dcSSimon Schubert     }
8755796c8dcSSimon Schubert 
8765796c8dcSSimon Schubert   pex_free (child);
877*cf7f2e2dSJohn Marino   free_environ (iconv_env);
8785796c8dcSSimon Schubert 
8795796c8dcSSimon Schubert   if (fail)
8805796c8dcSSimon Schubert     {
8815796c8dcSSimon Schubert       /* Some error occurred, so drop the vector.  */
8825796c8dcSSimon Schubert       int ix;
8835796c8dcSSimon Schubert       char *elt;
8845796c8dcSSimon Schubert       for (ix = 0; VEC_iterate (char_ptr, charsets, ix, elt); ++ix)
8855796c8dcSSimon Schubert 	xfree (elt);
8865796c8dcSSimon Schubert       VEC_truncate (char_ptr, charsets, 0);
8875796c8dcSSimon Schubert     }
8885796c8dcSSimon Schubert   else
8895796c8dcSSimon Schubert     VEC_safe_push (char_ptr, charsets, NULL);
8905796c8dcSSimon Schubert }
8915796c8dcSSimon Schubert 
8925796c8dcSSimon Schubert #endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
8935796c8dcSSimon Schubert #endif /* PHONY_ICONV */
8945796c8dcSSimon Schubert 
895*cf7f2e2dSJohn Marino /* The "auto" target charset used by default_auto_charset.  */
896*cf7f2e2dSJohn Marino static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
897*cf7f2e2dSJohn Marino 
898*cf7f2e2dSJohn Marino const char *
899*cf7f2e2dSJohn Marino default_auto_charset (void)
900*cf7f2e2dSJohn Marino {
901*cf7f2e2dSJohn Marino   return auto_target_charset_name;
902*cf7f2e2dSJohn Marino }
903*cf7f2e2dSJohn Marino 
904*cf7f2e2dSJohn Marino const char *
905*cf7f2e2dSJohn Marino default_auto_wide_charset (void)
906*cf7f2e2dSJohn Marino {
907*cf7f2e2dSJohn Marino   return GDB_DEFAULT_TARGET_WIDE_CHARSET;
908*cf7f2e2dSJohn Marino }
909*cf7f2e2dSJohn Marino 
9105796c8dcSSimon Schubert void
9115796c8dcSSimon Schubert _initialize_charset (void)
9125796c8dcSSimon Schubert {
913*cf7f2e2dSJohn Marino   /* The first element is always "auto".  */
9145796c8dcSSimon Schubert   VEC_safe_push (char_ptr, charsets, xstrdup ("auto"));
9155796c8dcSSimon Schubert   find_charset_names ();
9165796c8dcSSimon Schubert 
9175796c8dcSSimon Schubert   if (VEC_length (char_ptr, charsets) > 1)
9185796c8dcSSimon Schubert     charset_enum = (const char **) VEC_address (char_ptr, charsets);
9195796c8dcSSimon Schubert   else
9205796c8dcSSimon Schubert     charset_enum = default_charset_names;
9215796c8dcSSimon Schubert 
9225796c8dcSSimon Schubert #ifndef PHONY_ICONV
9235796c8dcSSimon Schubert #ifdef HAVE_LANGINFO_CODESET
924*cf7f2e2dSJohn Marino   /* The result of nl_langinfo may be overwritten later.  This may
925*cf7f2e2dSJohn Marino      leak a little memory, if the user later changes the host charset,
926*cf7f2e2dSJohn Marino      but that doesn't matter much.  */
927*cf7f2e2dSJohn Marino   auto_host_charset_name = xstrdup (nl_langinfo (CODESET));
9285796c8dcSSimon Schubert   /* Solaris will return `646' here -- but the Solaris iconv then
929*cf7f2e2dSJohn Marino      does not accept this.  Darwin (and maybe FreeBSD) may return "" here,
930*cf7f2e2dSJohn Marino      which GNU libiconv doesn't like (infinite loop).  */
931*cf7f2e2dSJohn Marino   if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name)
9325796c8dcSSimon Schubert     auto_host_charset_name = "ASCII";
933*cf7f2e2dSJohn Marino   auto_target_charset_name = auto_host_charset_name;
934*cf7f2e2dSJohn Marino #elif defined (USE_WIN32API)
935*cf7f2e2dSJohn Marino   {
936*cf7f2e2dSJohn Marino     static char w32_host_default_charset[16]; /* "CP" + x<=5 digits + paranoia. */
9375796c8dcSSimon Schubert 
938*cf7f2e2dSJohn Marino     snprintf (w32_host_default_charset, sizeof w32_host_default_charset,
939*cf7f2e2dSJohn Marino 	      "CP%d", GetACP());
940*cf7f2e2dSJohn Marino     auto_host_charset_name = w32_host_default_charset;
941*cf7f2e2dSJohn Marino     auto_target_charset_name = auto_host_charset_name;
942*cf7f2e2dSJohn Marino   }
9435796c8dcSSimon Schubert #endif
9445796c8dcSSimon Schubert #endif
9455796c8dcSSimon Schubert 
9465796c8dcSSimon Schubert   add_setshow_enum_cmd ("charset", class_support,
947*cf7f2e2dSJohn Marino 			charset_enum, &host_charset_name, _("\
9485796c8dcSSimon Schubert Set the host and target character sets."), _("\
9495796c8dcSSimon Schubert Show the host and target character sets."), _("\
9505796c8dcSSimon Schubert The `host character set' is the one used by the system GDB is running on.\n\
9515796c8dcSSimon Schubert The `target character set' is the one used by the program being debugged.\n\
9525796c8dcSSimon Schubert You may only use supersets of ASCII for your host character set; GDB does\n\
9535796c8dcSSimon Schubert not support any others.\n\
9545796c8dcSSimon Schubert To see a list of the character sets GDB supports, type `set charset <TAB>'."),
9555796c8dcSSimon Schubert 			/* Note that the sfunc below needs to set
9565796c8dcSSimon Schubert 			   target_charset_name, because the 'set
9575796c8dcSSimon Schubert 			   charset' command sets two variables.  */
9585796c8dcSSimon Schubert 			set_charset_sfunc,
9595796c8dcSSimon Schubert 			show_charset,
9605796c8dcSSimon Schubert 			&setlist, &showlist);
9615796c8dcSSimon Schubert 
9625796c8dcSSimon Schubert   add_setshow_enum_cmd ("host-charset", class_support,
9635796c8dcSSimon Schubert 			charset_enum, &host_charset_name, _("\
9645796c8dcSSimon Schubert Set the host character set."), _("\
9655796c8dcSSimon Schubert Show the host character set."), _("\
9665796c8dcSSimon Schubert The `host character set' is the one used by the system GDB is running on.\n\
9675796c8dcSSimon Schubert You may only use supersets of ASCII for your host character set; GDB does\n\
9685796c8dcSSimon Schubert not support any others.\n\
9695796c8dcSSimon Schubert To see a list of the character sets GDB supports, type `set host-charset <TAB>'."),
9705796c8dcSSimon Schubert 			set_host_charset_sfunc,
9715796c8dcSSimon Schubert 			show_host_charset_name,
9725796c8dcSSimon Schubert 			&setlist, &showlist);
9735796c8dcSSimon Schubert 
9745796c8dcSSimon Schubert   add_setshow_enum_cmd ("target-charset", class_support,
975*cf7f2e2dSJohn Marino 			charset_enum, &target_charset_name, _("\
9765796c8dcSSimon Schubert Set the target character set."), _("\
9775796c8dcSSimon Schubert Show the target character set."), _("\
9785796c8dcSSimon Schubert The `target character set' is the one used by the program being debugged.\n\
9795796c8dcSSimon Schubert GDB translates characters and strings between the host and target\n\
9805796c8dcSSimon Schubert character sets as needed.\n\
9815796c8dcSSimon Schubert To see a list of the character sets GDB supports, type `set target-charset'<TAB>"),
9825796c8dcSSimon Schubert 			set_target_charset_sfunc,
9835796c8dcSSimon Schubert 			show_target_charset_name,
9845796c8dcSSimon Schubert 			&setlist, &showlist);
9855796c8dcSSimon Schubert 
9865796c8dcSSimon Schubert   add_setshow_enum_cmd ("target-wide-charset", class_support,
987*cf7f2e2dSJohn Marino 			charset_enum, &target_wide_charset_name,
9885796c8dcSSimon Schubert 			_("\
9895796c8dcSSimon Schubert Set the target wide character set."), _("\
9905796c8dcSSimon Schubert Show the target wide character set."), _("\
9915796c8dcSSimon Schubert The `target wide character set' is the one used by the program being debugged.\n\
9925796c8dcSSimon Schubert In particular it is the encoding used by `wchar_t'.\n\
9935796c8dcSSimon Schubert GDB translates characters and strings between the host and target\n\
9945796c8dcSSimon Schubert character sets as needed.\n\
9955796c8dcSSimon Schubert To see a list of the character sets GDB supports, type\n\
9965796c8dcSSimon Schubert `set target-wide-charset'<TAB>"),
9975796c8dcSSimon Schubert 			set_target_wide_charset_sfunc,
9985796c8dcSSimon Schubert 			show_target_wide_charset_name,
9995796c8dcSSimon Schubert 			&setlist, &showlist);
10005796c8dcSSimon Schubert }
1001