xref: /dflybsd-src/contrib/gdb-7/gdb/charset.c (revision de8e141f24382815c10a4012d209bbbf7abf1112)
15796c8dcSSimon Schubert /* Character set conversion support for GDB.
25796c8dcSSimon Schubert 
3*ef5ccd6cSJohn Marino    Copyright (C) 2001-2013 Free Software Foundation, Inc.
45796c8dcSSimon Schubert 
55796c8dcSSimon Schubert    This file is part of GDB.
65796c8dcSSimon Schubert 
75796c8dcSSimon Schubert    This program is free software; you can redistribute it and/or modify
85796c8dcSSimon Schubert    it under the terms of the GNU General Public License as published by
95796c8dcSSimon Schubert    the Free Software Foundation; either version 3 of the License, or
105796c8dcSSimon Schubert    (at your option) any later version.
115796c8dcSSimon Schubert 
125796c8dcSSimon Schubert    This program is distributed in the hope that it will be useful,
135796c8dcSSimon Schubert    but WITHOUT ANY WARRANTY; without even the implied warranty of
145796c8dcSSimon Schubert    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
155796c8dcSSimon Schubert    GNU General Public License for more details.
165796c8dcSSimon Schubert 
175796c8dcSSimon Schubert    You should have received a copy of the GNU General Public License
185796c8dcSSimon Schubert    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
195796c8dcSSimon Schubert 
205796c8dcSSimon Schubert #include "defs.h"
215796c8dcSSimon Schubert #include "charset.h"
225796c8dcSSimon Schubert #include "gdbcmd.h"
235796c8dcSSimon Schubert #include "gdb_assert.h"
245796c8dcSSimon Schubert #include "gdb_obstack.h"
255796c8dcSSimon Schubert #include "gdb_wait.h"
265796c8dcSSimon Schubert #include "charset-list.h"
275796c8dcSSimon Schubert #include "vec.h"
28cf7f2e2dSJohn Marino #include "environ.h"
29cf7f2e2dSJohn Marino #include "arch-utils.h"
30*ef5ccd6cSJohn Marino #include "gdb_vecs.h"
315796c8dcSSimon Schubert 
325796c8dcSSimon Schubert #include <stddef.h>
335796c8dcSSimon Schubert #include "gdb_string.h"
345796c8dcSSimon Schubert #include <ctype.h>
355796c8dcSSimon Schubert 
36cf7f2e2dSJohn Marino #ifdef USE_WIN32API
37cf7f2e2dSJohn Marino #include <windows.h>
38cf7f2e2dSJohn Marino #endif
395796c8dcSSimon Schubert 
405796c8dcSSimon Schubert /* How GDB's character set support works
415796c8dcSSimon Schubert 
425796c8dcSSimon Schubert    GDB has three global settings:
435796c8dcSSimon Schubert 
445796c8dcSSimon Schubert    - The `current host character set' is the character set GDB should
455796c8dcSSimon Schubert      use in talking to the user, and which (hopefully) the user's
465796c8dcSSimon Schubert      terminal knows how to display properly.  Most users should not
475796c8dcSSimon Schubert      change this.
485796c8dcSSimon Schubert 
495796c8dcSSimon Schubert    - The `current target character set' is the character set the
505796c8dcSSimon Schubert      program being debugged uses.
515796c8dcSSimon Schubert 
525796c8dcSSimon Schubert    - The `current target wide character set' is the wide character set
535796c8dcSSimon Schubert      the program being debugged uses, that is, the encoding used for
545796c8dcSSimon Schubert      wchar_t.
555796c8dcSSimon Schubert 
565796c8dcSSimon Schubert    There are commands to set each of these, and mechanisms for
575796c8dcSSimon Schubert    choosing reasonable default values.  GDB has a global list of
585796c8dcSSimon Schubert    character sets that it can use as its host or target character
595796c8dcSSimon Schubert    sets.
605796c8dcSSimon Schubert 
615796c8dcSSimon Schubert    The header file `charset.h' declares various functions that
625796c8dcSSimon Schubert    different pieces of GDB need to perform tasks like:
635796c8dcSSimon Schubert 
645796c8dcSSimon Schubert    - printing target strings and characters to the user's terminal
655796c8dcSSimon Schubert      (mostly target->host conversions),
665796c8dcSSimon Schubert 
675796c8dcSSimon Schubert    - building target-appropriate representations of strings and
685796c8dcSSimon Schubert      characters the user enters in expressions (mostly host->target
695796c8dcSSimon Schubert      conversions),
705796c8dcSSimon Schubert 
715796c8dcSSimon Schubert      and so on.
725796c8dcSSimon Schubert 
735796c8dcSSimon Schubert    To avoid excessive code duplication and maintenance efforts,
745796c8dcSSimon Schubert    GDB simply requires a capable iconv function.  Users on platforms
755796c8dcSSimon Schubert    without a suitable iconv can use the GNU iconv library.  */
765796c8dcSSimon Schubert 
775796c8dcSSimon Schubert 
785796c8dcSSimon Schubert #ifdef PHONY_ICONV
795796c8dcSSimon Schubert 
805796c8dcSSimon Schubert /* Provide a phony iconv that does as little as possible.  Also,
815796c8dcSSimon Schubert    arrange for there to be a single available character set.  */
825796c8dcSSimon Schubert 
835796c8dcSSimon Schubert #undef GDB_DEFAULT_HOST_CHARSET
845796c8dcSSimon Schubert #define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
855796c8dcSSimon Schubert #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
865796c8dcSSimon Schubert #define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
875796c8dcSSimon Schubert #undef DEFAULT_CHARSET_NAMES
885796c8dcSSimon Schubert #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
895796c8dcSSimon Schubert 
905796c8dcSSimon Schubert #undef iconv_t
915796c8dcSSimon Schubert #define iconv_t int
925796c8dcSSimon Schubert #undef iconv_open
93c50c785cSJohn Marino #define iconv_open phony_iconv_open
945796c8dcSSimon Schubert #undef iconv
95c50c785cSJohn Marino #define iconv phony_iconv
965796c8dcSSimon Schubert #undef iconv_close
97c50c785cSJohn Marino #define iconv_close phony_iconv_close
985796c8dcSSimon Schubert 
995796c8dcSSimon Schubert #undef ICONV_CONST
1005796c8dcSSimon Schubert #define ICONV_CONST const
1015796c8dcSSimon Schubert 
1025796c8dcSSimon Schubert /* Some systems don't have EILSEQ, so we define it here, but not as
1035796c8dcSSimon Schubert    EINVAL, because callers of `iconv' want to distinguish EINVAL and
1045796c8dcSSimon Schubert    EILSEQ.  This is what iconv.h from libiconv does as well.  Note
1055796c8dcSSimon Schubert    that wchar.h may also define EILSEQ, so this needs to be after we
1065796c8dcSSimon Schubert    include wchar.h, which happens in defs.h through gdb_wchar.h.  */
1075796c8dcSSimon Schubert #ifndef EILSEQ
1085796c8dcSSimon Schubert #define EILSEQ ENOENT
1095796c8dcSSimon Schubert #endif
1105796c8dcSSimon Schubert 
111*ef5ccd6cSJohn Marino static iconv_t
phony_iconv_open(const char * to,const char * from)112c50c785cSJohn Marino phony_iconv_open (const char *to, const char *from)
1135796c8dcSSimon Schubert {
114cf7f2e2dSJohn Marino   /* We allow conversions from UTF-32BE, wchar_t, and the host charset.
1155796c8dcSSimon Schubert      We allow conversions to wchar_t and the host charset.  */
116cf7f2e2dSJohn Marino   if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t")
1175796c8dcSSimon Schubert       && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
1185796c8dcSSimon Schubert     return -1;
1195796c8dcSSimon Schubert   if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
1205796c8dcSSimon Schubert     return -1;
1215796c8dcSSimon Schubert 
122cf7f2e2dSJohn Marino   /* Return 1 if we are converting from UTF-32BE, 0 otherwise.  This is
1235796c8dcSSimon Schubert      used as a flag in calls to iconv.  */
124cf7f2e2dSJohn Marino   return !strcmp (from, "UTF-32BE");
1255796c8dcSSimon Schubert }
1265796c8dcSSimon Schubert 
127*ef5ccd6cSJohn Marino static int
phony_iconv_close(iconv_t arg)128c50c785cSJohn Marino phony_iconv_close (iconv_t arg)
1295796c8dcSSimon Schubert {
1305796c8dcSSimon Schubert   return 0;
1315796c8dcSSimon Schubert }
1325796c8dcSSimon Schubert 
133*ef5ccd6cSJohn Marino static size_t
phony_iconv(iconv_t utf_flag,const char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)134c50c785cSJohn Marino phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
1355796c8dcSSimon Schubert 	     char **outbuf, size_t *outbytesleft)
1365796c8dcSSimon Schubert {
137cf7f2e2dSJohn Marino   if (utf_flag)
1385796c8dcSSimon Schubert     {
1395796c8dcSSimon Schubert       while (*inbytesleft >= 4)
1405796c8dcSSimon Schubert 	{
1415796c8dcSSimon Schubert 	  size_t j;
1425796c8dcSSimon Schubert 	  unsigned long c = 0;
1435796c8dcSSimon Schubert 
1445796c8dcSSimon Schubert 	  for (j = 0; j < 4; ++j)
1455796c8dcSSimon Schubert 	    {
1465796c8dcSSimon Schubert 	      c <<= 8;
1475796c8dcSSimon Schubert 	      c += (*inbuf)[j] & 0xff;
1485796c8dcSSimon Schubert 	    }
1495796c8dcSSimon Schubert 
1505796c8dcSSimon Schubert 	  if (c >= 256)
1515796c8dcSSimon Schubert 	    {
1525796c8dcSSimon Schubert 	      errno = EILSEQ;
1535796c8dcSSimon Schubert 	      return -1;
1545796c8dcSSimon Schubert 	    }
1555796c8dcSSimon Schubert 	  **outbuf = c & 0xff;
1565796c8dcSSimon Schubert 	  ++*outbuf;
1575796c8dcSSimon Schubert 	  --*outbytesleft;
1585796c8dcSSimon Schubert 
1595796c8dcSSimon Schubert 	  ++*inbuf;
1605796c8dcSSimon Schubert 	  *inbytesleft -= 4;
1615796c8dcSSimon Schubert 	}
1625796c8dcSSimon Schubert       if (*inbytesleft < 4)
1635796c8dcSSimon Schubert 	{
1645796c8dcSSimon Schubert 	  errno = EINVAL;
1655796c8dcSSimon Schubert 	  return -1;
1665796c8dcSSimon Schubert 	}
1675796c8dcSSimon Schubert     }
1685796c8dcSSimon Schubert   else
1695796c8dcSSimon Schubert     {
1705796c8dcSSimon Schubert       /* In all other cases we simply copy input bytes to the
1715796c8dcSSimon Schubert 	 output.  */
1725796c8dcSSimon Schubert       size_t amt = *inbytesleft;
173cf7f2e2dSJohn Marino 
1745796c8dcSSimon Schubert       if (amt > *outbytesleft)
1755796c8dcSSimon Schubert 	amt = *outbytesleft;
1765796c8dcSSimon Schubert       memcpy (*outbuf, *inbuf, amt);
1775796c8dcSSimon Schubert       *inbuf += amt;
1785796c8dcSSimon Schubert       *outbuf += amt;
1795796c8dcSSimon Schubert       *inbytesleft -= amt;
1805796c8dcSSimon Schubert       *outbytesleft -= amt;
1815796c8dcSSimon Schubert     }
1825796c8dcSSimon Schubert 
1835796c8dcSSimon Schubert   if (*inbytesleft)
1845796c8dcSSimon Schubert     {
1855796c8dcSSimon Schubert       errno = E2BIG;
1865796c8dcSSimon Schubert       return -1;
1875796c8dcSSimon Schubert     }
1885796c8dcSSimon Schubert 
1895796c8dcSSimon Schubert   /* The number of non-reversible conversions -- but they were all
1905796c8dcSSimon Schubert      reversible.  */
1915796c8dcSSimon Schubert   return 0;
1925796c8dcSSimon Schubert }
1935796c8dcSSimon Schubert 
1945796c8dcSSimon Schubert #endif
1955796c8dcSSimon Schubert 
1965796c8dcSSimon Schubert 
1975796c8dcSSimon Schubert 
1985796c8dcSSimon Schubert /* The global lists of character sets and translations.  */
1995796c8dcSSimon Schubert 
2005796c8dcSSimon Schubert 
2015796c8dcSSimon Schubert #ifndef GDB_DEFAULT_TARGET_CHARSET
2025796c8dcSSimon Schubert #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
2035796c8dcSSimon Schubert #endif
2045796c8dcSSimon Schubert 
2055796c8dcSSimon Schubert #ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
206cf7f2e2dSJohn Marino #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
2075796c8dcSSimon Schubert #endif
2085796c8dcSSimon Schubert 
2095796c8dcSSimon Schubert static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;
2105796c8dcSSimon Schubert static const char *host_charset_name = "auto";
2115796c8dcSSimon Schubert static void
show_host_charset_name(struct ui_file * file,int from_tty,struct cmd_list_element * c,const char * value)2125796c8dcSSimon Schubert show_host_charset_name (struct ui_file *file, int from_tty,
2135796c8dcSSimon Schubert 			struct cmd_list_element *c,
2145796c8dcSSimon Schubert 			const char *value)
2155796c8dcSSimon Schubert {
2165796c8dcSSimon Schubert   if (!strcmp (value, "auto"))
2175796c8dcSSimon Schubert     fprintf_filtered (file,
2185796c8dcSSimon Schubert 		      _("The host character set is \"auto; currently %s\".\n"),
2195796c8dcSSimon Schubert 		      auto_host_charset_name);
2205796c8dcSSimon Schubert   else
2215796c8dcSSimon Schubert     fprintf_filtered (file, _("The host character set is \"%s\".\n"), value);
2225796c8dcSSimon Schubert }
2235796c8dcSSimon Schubert 
224cf7f2e2dSJohn Marino static const char *target_charset_name = "auto";
2255796c8dcSSimon Schubert static void
show_target_charset_name(struct ui_file * file,int from_tty,struct cmd_list_element * c,const char * value)2265796c8dcSSimon Schubert show_target_charset_name (struct ui_file *file, int from_tty,
2275796c8dcSSimon Schubert 			  struct cmd_list_element *c, const char *value)
2285796c8dcSSimon Schubert {
229cf7f2e2dSJohn Marino   if (!strcmp (value, "auto"))
230cf7f2e2dSJohn Marino     fprintf_filtered (file,
231cf7f2e2dSJohn Marino 		      _("The target character set is \"auto; "
232cf7f2e2dSJohn Marino 		        "currently %s\".\n"),
233cf7f2e2dSJohn Marino 		      gdbarch_auto_charset (get_current_arch ()));
234cf7f2e2dSJohn Marino   else
2355796c8dcSSimon Schubert     fprintf_filtered (file, _("The target character set is \"%s\".\n"),
2365796c8dcSSimon Schubert 		      value);
2375796c8dcSSimon Schubert }
2385796c8dcSSimon Schubert 
239cf7f2e2dSJohn Marino static const char *target_wide_charset_name = "auto";
2405796c8dcSSimon Schubert static void
show_target_wide_charset_name(struct ui_file * file,int from_tty,struct cmd_list_element * c,const char * value)241c50c785cSJohn Marino show_target_wide_charset_name (struct ui_file *file,
242c50c785cSJohn Marino 			       int from_tty,
243c50c785cSJohn Marino 			       struct cmd_list_element *c,
244c50c785cSJohn Marino 			       const char *value)
2455796c8dcSSimon Schubert {
246cf7f2e2dSJohn Marino   if (!strcmp (value, "auto"))
247cf7f2e2dSJohn Marino     fprintf_filtered (file,
248cf7f2e2dSJohn Marino 		      _("The target wide character set is \"auto; "
249cf7f2e2dSJohn Marino 		        "currently %s\".\n"),
250cf7f2e2dSJohn Marino 		      gdbarch_auto_wide_charset (get_current_arch ()));
251cf7f2e2dSJohn Marino   else
2525796c8dcSSimon Schubert     fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
2535796c8dcSSimon Schubert 		      value);
2545796c8dcSSimon Schubert }
2555796c8dcSSimon Schubert 
2565796c8dcSSimon Schubert static const char *default_charset_names[] =
2575796c8dcSSimon Schubert {
2585796c8dcSSimon Schubert   DEFAULT_CHARSET_NAMES
2595796c8dcSSimon Schubert   0
2605796c8dcSSimon Schubert };
2615796c8dcSSimon Schubert 
2625796c8dcSSimon Schubert static const char **charset_enum;
2635796c8dcSSimon Schubert 
2645796c8dcSSimon Schubert 
2655796c8dcSSimon Schubert /* If the target wide character set has big- or little-endian
2665796c8dcSSimon Schubert    variants, these are the corresponding names.  */
2675796c8dcSSimon Schubert static const char *target_wide_charset_be_name;
2685796c8dcSSimon Schubert static const char *target_wide_charset_le_name;
2695796c8dcSSimon Schubert 
270cf7f2e2dSJohn Marino /* The architecture for which the BE- and LE-names are valid.  */
271cf7f2e2dSJohn Marino static struct gdbarch *be_le_arch;
272cf7f2e2dSJohn Marino 
273cf7f2e2dSJohn Marino /* A helper function which sets the target wide big- and little-endian
274cf7f2e2dSJohn Marino    character set names, if possible.  */
2755796c8dcSSimon Schubert 
2765796c8dcSSimon Schubert static void
set_be_le_names(struct gdbarch * gdbarch)277cf7f2e2dSJohn Marino set_be_le_names (struct gdbarch *gdbarch)
2785796c8dcSSimon Schubert {
2795796c8dcSSimon Schubert   int i, len;
280cf7f2e2dSJohn Marino   const char *target_wide;
281cf7f2e2dSJohn Marino 
282cf7f2e2dSJohn Marino   if (be_le_arch == gdbarch)
283cf7f2e2dSJohn Marino     return;
284cf7f2e2dSJohn Marino   be_le_arch = gdbarch;
2855796c8dcSSimon Schubert 
2865796c8dcSSimon Schubert   target_wide_charset_le_name = NULL;
2875796c8dcSSimon Schubert   target_wide_charset_be_name = NULL;
2885796c8dcSSimon Schubert 
289cf7f2e2dSJohn Marino   target_wide = target_wide_charset_name;
290cf7f2e2dSJohn Marino   if (!strcmp (target_wide, "auto"))
291cf7f2e2dSJohn Marino     target_wide = gdbarch_auto_wide_charset (gdbarch);
292cf7f2e2dSJohn Marino 
293cf7f2e2dSJohn Marino   len = strlen (target_wide);
2945796c8dcSSimon Schubert   for (i = 0; charset_enum[i]; ++i)
2955796c8dcSSimon Schubert     {
296cf7f2e2dSJohn Marino       if (strncmp (target_wide, charset_enum[i], len))
2975796c8dcSSimon Schubert 	continue;
2985796c8dcSSimon Schubert       if ((charset_enum[i][len] == 'B'
2995796c8dcSSimon Schubert 	   || charset_enum[i][len] == 'L')
3005796c8dcSSimon Schubert 	  && charset_enum[i][len + 1] == 'E'
3015796c8dcSSimon Schubert 	  && charset_enum[i][len + 2] == '\0')
3025796c8dcSSimon Schubert 	{
3035796c8dcSSimon Schubert 	  if (charset_enum[i][len] == 'B')
3045796c8dcSSimon Schubert 	    target_wide_charset_be_name = charset_enum[i];
3055796c8dcSSimon Schubert 	  else
3065796c8dcSSimon Schubert 	    target_wide_charset_le_name = charset_enum[i];
3075796c8dcSSimon Schubert 	}
3085796c8dcSSimon Schubert     }
3095796c8dcSSimon Schubert }
3105796c8dcSSimon Schubert 
3115796c8dcSSimon Schubert /* 'Set charset', 'set host-charset', 'set target-charset', 'set
3125796c8dcSSimon Schubert    target-wide-charset', 'set charset' sfunc's.  */
3135796c8dcSSimon Schubert 
3145796c8dcSSimon Schubert static void
validate(struct gdbarch * gdbarch)315cf7f2e2dSJohn Marino validate (struct gdbarch *gdbarch)
3165796c8dcSSimon Schubert {
3175796c8dcSSimon Schubert   iconv_t desc;
3185796c8dcSSimon Schubert   const char *host_cset = host_charset ();
319cf7f2e2dSJohn Marino   const char *target_cset = target_charset (gdbarch);
320cf7f2e2dSJohn Marino   const char *target_wide_cset = target_wide_charset_name;
3215796c8dcSSimon Schubert 
322cf7f2e2dSJohn Marino   if (!strcmp (target_wide_cset, "auto"))
323cf7f2e2dSJohn Marino     target_wide_cset = gdbarch_auto_wide_charset (gdbarch);
324cf7f2e2dSJohn Marino 
325cf7f2e2dSJohn Marino   desc = iconv_open (target_wide_cset, host_cset);
3265796c8dcSSimon Schubert   if (desc == (iconv_t) -1)
327c50c785cSJohn Marino     error (_("Cannot convert between character sets `%s' and `%s'"),
328cf7f2e2dSJohn Marino 	   target_wide_cset, host_cset);
3295796c8dcSSimon Schubert   iconv_close (desc);
3305796c8dcSSimon Schubert 
331cf7f2e2dSJohn Marino   desc = iconv_open (target_cset, host_cset);
3325796c8dcSSimon Schubert   if (desc == (iconv_t) -1)
333c50c785cSJohn Marino     error (_("Cannot convert between character sets `%s' and `%s'"),
334cf7f2e2dSJohn Marino 	   target_cset, host_cset);
3355796c8dcSSimon Schubert   iconv_close (desc);
3365796c8dcSSimon Schubert 
337cf7f2e2dSJohn Marino   /* Clear the cache.  */
338cf7f2e2dSJohn Marino   be_le_arch = NULL;
3395796c8dcSSimon Schubert }
3405796c8dcSSimon Schubert 
3415796c8dcSSimon Schubert /* This is the sfunc for the 'set charset' command.  */
3425796c8dcSSimon Schubert static void
set_charset_sfunc(char * charset,int from_tty,struct cmd_list_element * c)343c50c785cSJohn Marino set_charset_sfunc (char *charset, int from_tty,
344c50c785cSJohn Marino 		   struct cmd_list_element *c)
3455796c8dcSSimon Schubert {
3465796c8dcSSimon Schubert   /* CAREFUL: set the target charset here as well.  */
3475796c8dcSSimon Schubert   target_charset_name = host_charset_name;
348cf7f2e2dSJohn Marino   validate (get_current_arch ());
3495796c8dcSSimon Schubert }
3505796c8dcSSimon Schubert 
3515796c8dcSSimon Schubert /* 'set host-charset' command sfunc.  We need a wrapper here because
3525796c8dcSSimon Schubert    the function needs to have a specific signature.  */
3535796c8dcSSimon Schubert static void
set_host_charset_sfunc(char * charset,int from_tty,struct cmd_list_element * c)3545796c8dcSSimon Schubert set_host_charset_sfunc (char *charset, int from_tty,
3555796c8dcSSimon Schubert 			struct cmd_list_element *c)
3565796c8dcSSimon Schubert {
357cf7f2e2dSJohn Marino   validate (get_current_arch ());
3585796c8dcSSimon Schubert }
3595796c8dcSSimon Schubert 
3605796c8dcSSimon Schubert /* Wrapper for the 'set target-charset' command.  */
3615796c8dcSSimon Schubert static void
set_target_charset_sfunc(char * charset,int from_tty,struct cmd_list_element * c)3625796c8dcSSimon Schubert set_target_charset_sfunc (char *charset, int from_tty,
3635796c8dcSSimon Schubert 			  struct cmd_list_element *c)
3645796c8dcSSimon Schubert {
365cf7f2e2dSJohn Marino   validate (get_current_arch ());
3665796c8dcSSimon Schubert }
3675796c8dcSSimon Schubert 
3685796c8dcSSimon Schubert /* Wrapper for the 'set target-wide-charset' command.  */
3695796c8dcSSimon Schubert static void
set_target_wide_charset_sfunc(char * charset,int from_tty,struct cmd_list_element * c)3705796c8dcSSimon Schubert set_target_wide_charset_sfunc (char *charset, int from_tty,
3715796c8dcSSimon Schubert 			       struct cmd_list_element *c)
3725796c8dcSSimon Schubert {
373cf7f2e2dSJohn Marino   validate (get_current_arch ());
3745796c8dcSSimon Schubert }
3755796c8dcSSimon Schubert 
3765796c8dcSSimon Schubert /* sfunc for the 'show charset' command.  */
3775796c8dcSSimon Schubert static void
show_charset(struct ui_file * file,int from_tty,struct cmd_list_element * c,const char * name)378c50c785cSJohn Marino show_charset (struct ui_file *file, int from_tty,
379c50c785cSJohn Marino 	      struct cmd_list_element *c,
3805796c8dcSSimon Schubert 	      const char *name)
3815796c8dcSSimon Schubert {
3825796c8dcSSimon Schubert   show_host_charset_name (file, from_tty, c, host_charset_name);
3835796c8dcSSimon Schubert   show_target_charset_name (file, from_tty, c, target_charset_name);
384c50c785cSJohn Marino   show_target_wide_charset_name (file, from_tty, c,
385c50c785cSJohn Marino 				 target_wide_charset_name);
3865796c8dcSSimon Schubert }
3875796c8dcSSimon Schubert 
3885796c8dcSSimon Schubert 
3895796c8dcSSimon Schubert /* Accessor functions.  */
3905796c8dcSSimon Schubert 
3915796c8dcSSimon Schubert const char *
host_charset(void)3925796c8dcSSimon Schubert host_charset (void)
3935796c8dcSSimon Schubert {
3945796c8dcSSimon Schubert   if (!strcmp (host_charset_name, "auto"))
3955796c8dcSSimon Schubert     return auto_host_charset_name;
3965796c8dcSSimon Schubert   return host_charset_name;
3975796c8dcSSimon Schubert }
3985796c8dcSSimon Schubert 
3995796c8dcSSimon Schubert const char *
target_charset(struct gdbarch * gdbarch)400cf7f2e2dSJohn Marino target_charset (struct gdbarch *gdbarch)
4015796c8dcSSimon Schubert {
402cf7f2e2dSJohn Marino   if (!strcmp (target_charset_name, "auto"))
403cf7f2e2dSJohn Marino     return gdbarch_auto_charset (gdbarch);
4045796c8dcSSimon Schubert   return target_charset_name;
4055796c8dcSSimon Schubert }
4065796c8dcSSimon Schubert 
4075796c8dcSSimon Schubert const char *
target_wide_charset(struct gdbarch * gdbarch)408cf7f2e2dSJohn Marino target_wide_charset (struct gdbarch *gdbarch)
4095796c8dcSSimon Schubert {
410cf7f2e2dSJohn Marino   enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
411cf7f2e2dSJohn Marino 
412cf7f2e2dSJohn Marino   set_be_le_names (gdbarch);
4135796c8dcSSimon Schubert   if (byte_order == BFD_ENDIAN_BIG)
4145796c8dcSSimon Schubert     {
4155796c8dcSSimon Schubert       if (target_wide_charset_be_name)
4165796c8dcSSimon Schubert 	return target_wide_charset_be_name;
4175796c8dcSSimon Schubert     }
4185796c8dcSSimon Schubert   else
4195796c8dcSSimon Schubert     {
4205796c8dcSSimon Schubert       if (target_wide_charset_le_name)
4215796c8dcSSimon Schubert 	return target_wide_charset_le_name;
4225796c8dcSSimon Schubert     }
4235796c8dcSSimon Schubert 
424cf7f2e2dSJohn Marino   if (!strcmp (target_wide_charset_name, "auto"))
425cf7f2e2dSJohn Marino     return gdbarch_auto_wide_charset (gdbarch);
426cf7f2e2dSJohn Marino 
4275796c8dcSSimon Schubert   return target_wide_charset_name;
4285796c8dcSSimon Schubert }
4295796c8dcSSimon Schubert 
4305796c8dcSSimon Schubert 
4315796c8dcSSimon Schubert /* Host character set management.  For the time being, we assume that
4325796c8dcSSimon Schubert    the host character set is some superset of ASCII.  */
4335796c8dcSSimon Schubert 
4345796c8dcSSimon Schubert char
host_letter_to_control_character(char c)4355796c8dcSSimon Schubert host_letter_to_control_character (char c)
4365796c8dcSSimon Schubert {
4375796c8dcSSimon Schubert   if (c == '?')
4385796c8dcSSimon Schubert     return 0177;
4395796c8dcSSimon Schubert   return c & 0237;
4405796c8dcSSimon Schubert }
4415796c8dcSSimon Schubert 
4425796c8dcSSimon Schubert /* Convert a host character, C, to its hex value.  C must already have
4435796c8dcSSimon Schubert    been validated using isxdigit.  */
4445796c8dcSSimon Schubert 
4455796c8dcSSimon Schubert int
host_hex_value(char c)4465796c8dcSSimon Schubert host_hex_value (char c)
4475796c8dcSSimon Schubert {
4485796c8dcSSimon Schubert   if (isdigit (c))
4495796c8dcSSimon Schubert     return c - '0';
4505796c8dcSSimon Schubert   if (c >= 'a' && c <= 'f')
4515796c8dcSSimon Schubert     return 10 + c - 'a';
4525796c8dcSSimon Schubert   gdb_assert (c >= 'A' && c <= 'F');
4535796c8dcSSimon Schubert   return 10 + c - 'A';
4545796c8dcSSimon Schubert }
4555796c8dcSSimon Schubert 
4565796c8dcSSimon Schubert 
4575796c8dcSSimon Schubert /* Public character management functions.  */
4585796c8dcSSimon Schubert 
4595796c8dcSSimon Schubert /* A cleanup function which is run to close an iconv descriptor.  */
4605796c8dcSSimon Schubert 
4615796c8dcSSimon Schubert static void
cleanup_iconv(void * p)4625796c8dcSSimon Schubert cleanup_iconv (void *p)
4635796c8dcSSimon Schubert {
4645796c8dcSSimon Schubert   iconv_t *descp = p;
4655796c8dcSSimon Schubert   iconv_close (*descp);
4665796c8dcSSimon Schubert }
4675796c8dcSSimon Schubert 
4685796c8dcSSimon Schubert void
convert_between_encodings(const char * from,const char * to,const gdb_byte * bytes,unsigned int num_bytes,int width,struct obstack * output,enum transliterations translit)4695796c8dcSSimon Schubert convert_between_encodings (const char *from, const char *to,
4705796c8dcSSimon Schubert 			   const gdb_byte *bytes, unsigned int num_bytes,
4715796c8dcSSimon Schubert 			   int width, struct obstack *output,
4725796c8dcSSimon Schubert 			   enum transliterations translit)
4735796c8dcSSimon Schubert {
4745796c8dcSSimon Schubert   iconv_t desc;
4755796c8dcSSimon Schubert   struct cleanup *cleanups;
4765796c8dcSSimon Schubert   size_t inleft;
477*ef5ccd6cSJohn Marino   ICONV_CONST char *inp;
4785796c8dcSSimon Schubert   unsigned int space_request;
4795796c8dcSSimon Schubert 
4805796c8dcSSimon Schubert   /* Often, the host and target charsets will be the same.  */
4815796c8dcSSimon Schubert   if (!strcmp (from, to))
4825796c8dcSSimon Schubert     {
4835796c8dcSSimon Schubert       obstack_grow (output, bytes, num_bytes);
4845796c8dcSSimon Schubert       return;
4855796c8dcSSimon Schubert     }
4865796c8dcSSimon Schubert 
4875796c8dcSSimon Schubert   desc = iconv_open (to, from);
4885796c8dcSSimon Schubert   if (desc == (iconv_t) -1)
489c50c785cSJohn Marino     perror_with_name (_("Converting character sets"));
4905796c8dcSSimon Schubert   cleanups = make_cleanup (cleanup_iconv, &desc);
4915796c8dcSSimon Schubert 
4925796c8dcSSimon Schubert   inleft = num_bytes;
493*ef5ccd6cSJohn Marino   inp = (ICONV_CONST char *) bytes;
4945796c8dcSSimon Schubert 
4955796c8dcSSimon Schubert   space_request = num_bytes;
4965796c8dcSSimon Schubert 
4975796c8dcSSimon Schubert   while (inleft > 0)
4985796c8dcSSimon Schubert     {
4995796c8dcSSimon Schubert       char *outp;
5005796c8dcSSimon Schubert       size_t outleft, r;
5015796c8dcSSimon Schubert       int old_size;
5025796c8dcSSimon Schubert 
5035796c8dcSSimon Schubert       old_size = obstack_object_size (output);
5045796c8dcSSimon Schubert       obstack_blank (output, space_request);
5055796c8dcSSimon Schubert 
5065796c8dcSSimon Schubert       outp = obstack_base (output) + old_size;
5075796c8dcSSimon Schubert       outleft = space_request;
5085796c8dcSSimon Schubert 
509*ef5ccd6cSJohn Marino       r = iconv (desc, &inp, &inleft, &outp, &outleft);
5105796c8dcSSimon Schubert 
5115796c8dcSSimon Schubert       /* Now make sure that the object on the obstack only includes
5125796c8dcSSimon Schubert 	 bytes we have converted.  */
5135796c8dcSSimon Schubert       obstack_blank (output, - (int) outleft);
5145796c8dcSSimon Schubert 
5155796c8dcSSimon Schubert       if (r == (size_t) -1)
5165796c8dcSSimon Schubert 	{
5175796c8dcSSimon Schubert 	  switch (errno)
5185796c8dcSSimon Schubert 	    {
5195796c8dcSSimon Schubert 	    case EILSEQ:
5205796c8dcSSimon Schubert 	      {
5215796c8dcSSimon Schubert 		int i;
5225796c8dcSSimon Schubert 
5235796c8dcSSimon Schubert 		/* Invalid input sequence.  */
5245796c8dcSSimon Schubert 		if (translit == translit_none)
525c50c785cSJohn Marino 		  error (_("Could not convert character "
526c50c785cSJohn Marino 			   "to `%s' character set"), to);
5275796c8dcSSimon Schubert 
5285796c8dcSSimon Schubert 		/* We emit escape sequence for the bytes, skip them,
5295796c8dcSSimon Schubert 		   and try again.  */
5305796c8dcSSimon Schubert 		for (i = 0; i < width; ++i)
5315796c8dcSSimon Schubert 		  {
5325796c8dcSSimon Schubert 		    char octal[5];
5335796c8dcSSimon Schubert 
534*ef5ccd6cSJohn Marino 		    xsnprintf (octal, sizeof (octal), "\\%.3o", *inp & 0xff);
5355796c8dcSSimon Schubert 		    obstack_grow_str (output, octal);
5365796c8dcSSimon Schubert 
5375796c8dcSSimon Schubert 		    ++inp;
5385796c8dcSSimon Schubert 		    --inleft;
5395796c8dcSSimon Schubert 		  }
5405796c8dcSSimon Schubert 	      }
5415796c8dcSSimon Schubert 	      break;
5425796c8dcSSimon Schubert 
5435796c8dcSSimon Schubert 	    case E2BIG:
5445796c8dcSSimon Schubert 	      /* We ran out of space in the output buffer.  Make it
5455796c8dcSSimon Schubert 		 bigger next time around.  */
5465796c8dcSSimon Schubert 	      space_request *= 2;
5475796c8dcSSimon Schubert 	      break;
5485796c8dcSSimon Schubert 
5495796c8dcSSimon Schubert 	    case EINVAL:
5505796c8dcSSimon Schubert 	      /* Incomplete input sequence.  FIXME: ought to report this
5515796c8dcSSimon Schubert 		 to the caller somehow.  */
5525796c8dcSSimon Schubert 	      inleft = 0;
5535796c8dcSSimon Schubert 	      break;
5545796c8dcSSimon Schubert 
5555796c8dcSSimon Schubert 	    default:
556c50c785cSJohn Marino 	      perror_with_name (_("Internal error while "
557c50c785cSJohn Marino 				  "converting character sets"));
5585796c8dcSSimon Schubert 	    }
5595796c8dcSSimon Schubert 	}
5605796c8dcSSimon Schubert     }
5615796c8dcSSimon Schubert 
5625796c8dcSSimon Schubert   do_cleanups (cleanups);
5635796c8dcSSimon Schubert }
5645796c8dcSSimon Schubert 
5655796c8dcSSimon Schubert 
5665796c8dcSSimon Schubert 
5675796c8dcSSimon Schubert /* An iterator that returns host wchar_t's from a target string.  */
5685796c8dcSSimon Schubert struct wchar_iterator
5695796c8dcSSimon Schubert {
5705796c8dcSSimon Schubert   /* The underlying iconv descriptor.  */
5715796c8dcSSimon Schubert   iconv_t desc;
5725796c8dcSSimon Schubert 
5735796c8dcSSimon Schubert   /* The input string.  This is updated as convert characters.  */
574*ef5ccd6cSJohn Marino   const gdb_byte *input;
5755796c8dcSSimon Schubert   /* The number of bytes remaining in the input.  */
5765796c8dcSSimon Schubert   size_t bytes;
5775796c8dcSSimon Schubert 
5785796c8dcSSimon Schubert   /* The width of an input character.  */
5795796c8dcSSimon Schubert   size_t width;
5805796c8dcSSimon Schubert 
5815796c8dcSSimon Schubert   /* The output buffer and its size.  */
5825796c8dcSSimon Schubert   gdb_wchar_t *out;
5835796c8dcSSimon Schubert   size_t out_size;
5845796c8dcSSimon Schubert };
5855796c8dcSSimon Schubert 
5865796c8dcSSimon Schubert /* Create a new iterator.  */
5875796c8dcSSimon Schubert struct wchar_iterator *
make_wchar_iterator(const gdb_byte * input,size_t bytes,const char * charset,size_t width)588c50c785cSJohn Marino make_wchar_iterator (const gdb_byte *input, size_t bytes,
589c50c785cSJohn Marino 		     const char *charset, size_t width)
5905796c8dcSSimon Schubert {
5915796c8dcSSimon Schubert   struct wchar_iterator *result;
5925796c8dcSSimon Schubert   iconv_t desc;
5935796c8dcSSimon Schubert 
5945796c8dcSSimon Schubert   desc = iconv_open (INTERMEDIATE_ENCODING, charset);
5955796c8dcSSimon Schubert   if (desc == (iconv_t) -1)
596c50c785cSJohn Marino     perror_with_name (_("Converting character sets"));
5975796c8dcSSimon Schubert 
5985796c8dcSSimon Schubert   result = XNEW (struct wchar_iterator);
5995796c8dcSSimon Schubert   result->desc = desc;
600*ef5ccd6cSJohn Marino   result->input = input;
6015796c8dcSSimon Schubert   result->bytes = bytes;
6025796c8dcSSimon Schubert   result->width = width;
6035796c8dcSSimon Schubert 
6045796c8dcSSimon Schubert   result->out = XNEW (gdb_wchar_t);
6055796c8dcSSimon Schubert   result->out_size = 1;
6065796c8dcSSimon Schubert 
6075796c8dcSSimon Schubert   return result;
6085796c8dcSSimon Schubert }
6095796c8dcSSimon Schubert 
6105796c8dcSSimon Schubert static void
do_cleanup_iterator(void * p)6115796c8dcSSimon Schubert do_cleanup_iterator (void *p)
6125796c8dcSSimon Schubert {
6135796c8dcSSimon Schubert   struct wchar_iterator *iter = p;
6145796c8dcSSimon Schubert 
6155796c8dcSSimon Schubert   iconv_close (iter->desc);
6165796c8dcSSimon Schubert   xfree (iter->out);
6175796c8dcSSimon Schubert   xfree (iter);
6185796c8dcSSimon Schubert }
6195796c8dcSSimon Schubert 
6205796c8dcSSimon Schubert struct cleanup *
make_cleanup_wchar_iterator(struct wchar_iterator * iter)6215796c8dcSSimon Schubert make_cleanup_wchar_iterator (struct wchar_iterator *iter)
6225796c8dcSSimon Schubert {
6235796c8dcSSimon Schubert   return make_cleanup (do_cleanup_iterator, iter);
6245796c8dcSSimon Schubert }
6255796c8dcSSimon Schubert 
6265796c8dcSSimon Schubert int
wchar_iterate(struct wchar_iterator * iter,enum wchar_iterate_result * out_result,gdb_wchar_t ** out_chars,const gdb_byte ** ptr,size_t * len)6275796c8dcSSimon Schubert wchar_iterate (struct wchar_iterator *iter,
6285796c8dcSSimon Schubert 	       enum wchar_iterate_result *out_result,
6295796c8dcSSimon Schubert 	       gdb_wchar_t **out_chars,
6305796c8dcSSimon Schubert 	       const gdb_byte **ptr,
6315796c8dcSSimon Schubert 	       size_t *len)
6325796c8dcSSimon Schubert {
6335796c8dcSSimon Schubert   size_t out_request;
6345796c8dcSSimon Schubert 
6355796c8dcSSimon Schubert   /* Try to convert some characters.  At first we try to convert just
6365796c8dcSSimon Schubert      a single character.  The reason for this is that iconv does not
6375796c8dcSSimon Schubert      necessarily update its outgoing arguments when it encounters an
6385796c8dcSSimon Schubert      invalid input sequence -- but we want to reliably report this to
6395796c8dcSSimon Schubert      our caller so it can emit an escape sequence.  */
6405796c8dcSSimon Schubert   out_request = 1;
6415796c8dcSSimon Schubert   while (iter->bytes > 0)
6425796c8dcSSimon Schubert     {
643*ef5ccd6cSJohn Marino       ICONV_CONST char *inptr = (ICONV_CONST char *) iter->input;
6445796c8dcSSimon Schubert       char *outptr = (char *) &iter->out[0];
645*ef5ccd6cSJohn Marino       const gdb_byte *orig_inptr = iter->input;
6465796c8dcSSimon Schubert       size_t orig_in = iter->bytes;
6475796c8dcSSimon Schubert       size_t out_avail = out_request * sizeof (gdb_wchar_t);
6485796c8dcSSimon Schubert       size_t num;
649*ef5ccd6cSJohn Marino       size_t r = iconv (iter->desc, &inptr, &iter->bytes, &outptr, &out_avail);
650*ef5ccd6cSJohn Marino 
651*ef5ccd6cSJohn Marino       iter->input = (gdb_byte *) inptr;
652cf7f2e2dSJohn Marino 
6535796c8dcSSimon Schubert       if (r == (size_t) -1)
6545796c8dcSSimon Schubert 	{
6555796c8dcSSimon Schubert 	  switch (errno)
6565796c8dcSSimon Schubert 	    {
6575796c8dcSSimon Schubert 	    case EILSEQ:
658c50c785cSJohn Marino 	      /* Invalid input sequence.  We still might have
659c50c785cSJohn Marino 		 converted a character; if so, return it.  */
660c50c785cSJohn Marino 	      if (out_avail < out_request * sizeof (gdb_wchar_t))
661c50c785cSJohn Marino 		break;
662c50c785cSJohn Marino 
663c50c785cSJohn Marino 	      /* Otherwise skip the first invalid character, and let
664c50c785cSJohn Marino 		 the caller know about it.  */
6655796c8dcSSimon Schubert 	      *out_result = wchar_iterate_invalid;
6665796c8dcSSimon Schubert 	      *ptr = iter->input;
6675796c8dcSSimon Schubert 	      *len = iter->width;
6685796c8dcSSimon Schubert 	      iter->input += iter->width;
6695796c8dcSSimon Schubert 	      iter->bytes -= iter->width;
6705796c8dcSSimon Schubert 	      return 0;
6715796c8dcSSimon Schubert 
6725796c8dcSSimon Schubert 	    case E2BIG:
6735796c8dcSSimon Schubert 	      /* We ran out of space.  We still might have converted a
6745796c8dcSSimon Schubert 		 character; if so, return it.  Otherwise, grow the
6755796c8dcSSimon Schubert 		 buffer and try again.  */
6765796c8dcSSimon Schubert 	      if (out_avail < out_request * sizeof (gdb_wchar_t))
6775796c8dcSSimon Schubert 		break;
6785796c8dcSSimon Schubert 
6795796c8dcSSimon Schubert 	      ++out_request;
6805796c8dcSSimon Schubert 	      if (out_request > iter->out_size)
6815796c8dcSSimon Schubert 		{
6825796c8dcSSimon Schubert 		  iter->out_size = out_request;
6835796c8dcSSimon Schubert 		  iter->out = xrealloc (iter->out,
6845796c8dcSSimon Schubert 					out_request * sizeof (gdb_wchar_t));
6855796c8dcSSimon Schubert 		}
6865796c8dcSSimon Schubert 	      continue;
6875796c8dcSSimon Schubert 
6885796c8dcSSimon Schubert 	    case EINVAL:
6895796c8dcSSimon Schubert 	      /* Incomplete input sequence.  Let the caller know, and
6905796c8dcSSimon Schubert 		 arrange for future calls to see EOF.  */
6915796c8dcSSimon Schubert 	      *out_result = wchar_iterate_incomplete;
6925796c8dcSSimon Schubert 	      *ptr = iter->input;
6935796c8dcSSimon Schubert 	      *len = iter->bytes;
6945796c8dcSSimon Schubert 	      iter->bytes = 0;
6955796c8dcSSimon Schubert 	      return 0;
6965796c8dcSSimon Schubert 
6975796c8dcSSimon Schubert 	    default:
698c50c785cSJohn Marino 	      perror_with_name (_("Internal error while "
699c50c785cSJohn Marino 				  "converting character sets"));
7005796c8dcSSimon Schubert 	    }
7015796c8dcSSimon Schubert 	}
7025796c8dcSSimon Schubert 
7035796c8dcSSimon Schubert       /* We converted something.  */
7045796c8dcSSimon Schubert       num = out_request - out_avail / sizeof (gdb_wchar_t);
7055796c8dcSSimon Schubert       *out_result = wchar_iterate_ok;
7065796c8dcSSimon Schubert       *out_chars = iter->out;
7075796c8dcSSimon Schubert       *ptr = orig_inptr;
7085796c8dcSSimon Schubert       *len = orig_in - iter->bytes;
7095796c8dcSSimon Schubert       return num;
7105796c8dcSSimon Schubert     }
7115796c8dcSSimon Schubert 
7125796c8dcSSimon Schubert   /* Really done.  */
7135796c8dcSSimon Schubert   *out_result = wchar_iterate_eof;
7145796c8dcSSimon Schubert   return -1;
7155796c8dcSSimon Schubert }
7165796c8dcSSimon Schubert 
7175796c8dcSSimon Schubert 
7185796c8dcSSimon Schubert /* The charset.c module initialization function.  */
7195796c8dcSSimon Schubert 
7205796c8dcSSimon Schubert extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */
7215796c8dcSSimon Schubert 
VEC(char_ptr)7225796c8dcSSimon Schubert static VEC (char_ptr) *charsets;
7235796c8dcSSimon Schubert 
7245796c8dcSSimon Schubert #ifdef PHONY_ICONV
7255796c8dcSSimon Schubert 
7265796c8dcSSimon Schubert static void
7275796c8dcSSimon Schubert find_charset_names (void)
7285796c8dcSSimon Schubert {
7295796c8dcSSimon Schubert   VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET);
7305796c8dcSSimon Schubert   VEC_safe_push (char_ptr, charsets, NULL);
7315796c8dcSSimon Schubert }
7325796c8dcSSimon Schubert 
7335796c8dcSSimon Schubert #else /* PHONY_ICONV */
7345796c8dcSSimon Schubert 
7355796c8dcSSimon Schubert /* Sometimes, libiconv redefines iconvlist as libiconvlist -- but
7365796c8dcSSimon Schubert    provides different symbols in the static and dynamic libraries.
7375796c8dcSSimon Schubert    So, configure may see libiconvlist but not iconvlist.  But, calling
7385796c8dcSSimon Schubert    iconvlist is the right thing to do and will work.  Hence we do a
7395796c8dcSSimon Schubert    check here but unconditionally call iconvlist below.  */
7405796c8dcSSimon Schubert #if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST)
7415796c8dcSSimon Schubert 
7425796c8dcSSimon Schubert /* A helper function that adds some character sets to the vector of
7435796c8dcSSimon Schubert    all character sets.  This is a callback function for iconvlist.  */
7445796c8dcSSimon Schubert 
7455796c8dcSSimon Schubert static int
7465796c8dcSSimon Schubert add_one (unsigned int count, const char *const *names, void *data)
7475796c8dcSSimon Schubert {
7485796c8dcSSimon Schubert   unsigned int i;
7495796c8dcSSimon Schubert 
7505796c8dcSSimon Schubert   for (i = 0; i < count; ++i)
7515796c8dcSSimon Schubert     VEC_safe_push (char_ptr, charsets, xstrdup (names[i]));
7525796c8dcSSimon Schubert 
7535796c8dcSSimon Schubert   return 0;
7545796c8dcSSimon Schubert }
7555796c8dcSSimon Schubert 
7565796c8dcSSimon Schubert static void
7575796c8dcSSimon Schubert find_charset_names (void)
7585796c8dcSSimon Schubert {
7595796c8dcSSimon Schubert   iconvlist (add_one, NULL);
7605796c8dcSSimon Schubert   VEC_safe_push (char_ptr, charsets, NULL);
7615796c8dcSSimon Schubert }
7625796c8dcSSimon Schubert 
7635796c8dcSSimon Schubert #else
7645796c8dcSSimon Schubert 
765cf7f2e2dSJohn Marino /* Return non-zero if LINE (output from iconv) should be ignored.
766cf7f2e2dSJohn Marino    Older iconv programs (e.g. 2.2.2) include the human readable
767cf7f2e2dSJohn Marino    introduction even when stdout is not a tty.  Newer versions omit
768cf7f2e2dSJohn Marino    the intro if stdout is not a tty.  */
769cf7f2e2dSJohn Marino 
770cf7f2e2dSJohn Marino static int
771cf7f2e2dSJohn Marino ignore_line_p (const char *line)
772cf7f2e2dSJohn Marino {
773cf7f2e2dSJohn Marino   /* This table is used to filter the output.  If this text appears
774cf7f2e2dSJohn Marino      anywhere in the line, it is ignored (strstr is used).  */
775cf7f2e2dSJohn Marino   static const char * const ignore_lines[] =
776cf7f2e2dSJohn Marino     {
777cf7f2e2dSJohn Marino       "The following",
778cf7f2e2dSJohn Marino       "not necessarily",
779cf7f2e2dSJohn Marino       "the FROM and TO",
780cf7f2e2dSJohn Marino       "listed with several",
781cf7f2e2dSJohn Marino       NULL
782cf7f2e2dSJohn Marino     };
783cf7f2e2dSJohn Marino   int i;
784cf7f2e2dSJohn Marino 
785cf7f2e2dSJohn Marino   for (i = 0; ignore_lines[i] != NULL; ++i)
786cf7f2e2dSJohn Marino     {
787cf7f2e2dSJohn Marino       if (strstr (line, ignore_lines[i]) != NULL)
788cf7f2e2dSJohn Marino 	return 1;
789cf7f2e2dSJohn Marino     }
790cf7f2e2dSJohn Marino 
791cf7f2e2dSJohn Marino   return 0;
792cf7f2e2dSJohn Marino }
793cf7f2e2dSJohn Marino 
7945796c8dcSSimon Schubert static void
7955796c8dcSSimon Schubert find_charset_names (void)
7965796c8dcSSimon Schubert {
7975796c8dcSSimon Schubert   struct pex_obj *child;
7985796c8dcSSimon Schubert   char *args[3];
7995796c8dcSSimon Schubert   int err, status;
8005796c8dcSSimon Schubert   int fail = 1;
801a45ae5f8SJohn Marino   int flags;
802cf7f2e2dSJohn Marino   struct gdb_environ *iconv_env;
803a45ae5f8SJohn Marino   char *iconv_program;
8045796c8dcSSimon Schubert 
805c50c785cSJohn Marino   /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is
806c50c785cSJohn Marino      not a tty.  We need to recognize it and ignore it.  This text is
807c50c785cSJohn Marino      subject to translation, so force LANGUAGE=C.  */
808cf7f2e2dSJohn Marino   iconv_env = make_environ ();
809cf7f2e2dSJohn Marino   init_environ (iconv_env);
810cf7f2e2dSJohn Marino   set_in_environ (iconv_env, "LANGUAGE", "C");
811cf7f2e2dSJohn Marino   set_in_environ (iconv_env, "LC_ALL", "C");
812cf7f2e2dSJohn Marino 
813cf7f2e2dSJohn Marino   child = pex_init (PEX_USE_PIPES, "iconv", NULL);
8145796c8dcSSimon Schubert 
815a45ae5f8SJohn Marino #ifdef ICONV_BIN
816a45ae5f8SJohn Marino   {
817a45ae5f8SJohn Marino     char *iconv_dir = relocate_gdb_directory (ICONV_BIN,
818a45ae5f8SJohn Marino 					      ICONV_BIN_RELOCATABLE);
819a45ae5f8SJohn Marino     iconv_program = concat (iconv_dir, SLASH_STRING, "iconv", NULL);
820a45ae5f8SJohn Marino     xfree (iconv_dir);
821a45ae5f8SJohn Marino   }
822a45ae5f8SJohn Marino #else
823a45ae5f8SJohn Marino   iconv_program = xstrdup ("iconv");
824a45ae5f8SJohn Marino #endif
825a45ae5f8SJohn Marino   args[0] = iconv_program;
8265796c8dcSSimon Schubert   args[1] = "-l";
8275796c8dcSSimon Schubert   args[2] = NULL;
828a45ae5f8SJohn Marino   flags = PEX_STDERR_TO_STDOUT;
829a45ae5f8SJohn Marino #ifndef ICONV_BIN
830a45ae5f8SJohn Marino   flags |= PEX_SEARCH;
831a45ae5f8SJohn Marino #endif
8325796c8dcSSimon Schubert   /* Note that we simply ignore errors here.  */
833a45ae5f8SJohn Marino   if (!pex_run_in_environment (child, flags,
834a45ae5f8SJohn Marino 			       args[0], args, environ_vector (iconv_env),
835cf7f2e2dSJohn Marino 			       NULL, NULL, &err))
8365796c8dcSSimon Schubert     {
8375796c8dcSSimon Schubert       FILE *in = pex_read_output (child, 0);
8385796c8dcSSimon Schubert 
8395796c8dcSSimon Schubert       /* POSIX says that iconv -l uses an unspecified format.  We
8405796c8dcSSimon Schubert 	 parse the glibc and libiconv formats; feel free to add others
8415796c8dcSSimon Schubert 	 as needed.  */
842cf7f2e2dSJohn Marino 
843*ef5ccd6cSJohn Marino       while (in != NULL && !feof (in))
8445796c8dcSSimon Schubert 	{
8455796c8dcSSimon Schubert 	  /* The size of buf is chosen arbitrarily.  */
8465796c8dcSSimon Schubert 	  char buf[1024];
8475796c8dcSSimon Schubert 	  char *start, *r;
848cf7f2e2dSJohn Marino 	  int len;
8495796c8dcSSimon Schubert 
8505796c8dcSSimon Schubert 	  r = fgets (buf, sizeof (buf), in);
8515796c8dcSSimon Schubert 	  if (!r)
8525796c8dcSSimon Schubert 	    break;
8535796c8dcSSimon Schubert 	  len = strlen (r);
8545796c8dcSSimon Schubert 	  if (len <= 3)
8555796c8dcSSimon Schubert 	    continue;
856cf7f2e2dSJohn Marino 	  if (ignore_line_p (r))
857cf7f2e2dSJohn Marino 	    continue;
858cf7f2e2dSJohn Marino 
8595796c8dcSSimon Schubert 	  /* Strip off the newline.  */
8605796c8dcSSimon Schubert 	  --len;
8615796c8dcSSimon Schubert 	  /* Strip off one or two '/'s.  glibc will print lines like
8625796c8dcSSimon Schubert 	     "8859_7//", but also "10646-1:1993/UCS4/".  */
8635796c8dcSSimon Schubert 	  if (buf[len - 1] == '/')
8645796c8dcSSimon Schubert 	    --len;
8655796c8dcSSimon Schubert 	  if (buf[len - 1] == '/')
8665796c8dcSSimon Schubert 	    --len;
8675796c8dcSSimon Schubert 	  buf[len] = '\0';
8685796c8dcSSimon Schubert 
8695796c8dcSSimon Schubert 	  /* libiconv will print multiple entries per line, separated
870c50c785cSJohn Marino 	     by spaces.  Older iconvs will print multiple entries per
871c50c785cSJohn Marino 	     line, indented by two spaces, and separated by ", "
872cf7f2e2dSJohn Marino 	     (i.e. the human readable form).  */
8735796c8dcSSimon Schubert 	  start = buf;
8745796c8dcSSimon Schubert 	  while (1)
8755796c8dcSSimon Schubert 	    {
8765796c8dcSSimon Schubert 	      int keep_going;
8775796c8dcSSimon Schubert 	      char *p;
8785796c8dcSSimon Schubert 
879cf7f2e2dSJohn Marino 	      /* Skip leading blanks.  */
880cf7f2e2dSJohn Marino 	      for (p = start; *p && *p == ' '; ++p)
881cf7f2e2dSJohn Marino 		;
882cf7f2e2dSJohn Marino 	      start = p;
883cf7f2e2dSJohn Marino 	      /* Find the next space, comma, or end-of-line.  */
884cf7f2e2dSJohn Marino 	      for ( ; *p && *p != ' ' && *p != ','; ++p)
8855796c8dcSSimon Schubert 		;
8865796c8dcSSimon Schubert 	      /* Ignore an empty result.  */
8875796c8dcSSimon Schubert 	      if (p == start)
8885796c8dcSSimon Schubert 		break;
8895796c8dcSSimon Schubert 	      keep_going = *p;
8905796c8dcSSimon Schubert 	      *p = '\0';
8915796c8dcSSimon Schubert 	      VEC_safe_push (char_ptr, charsets, xstrdup (start));
8925796c8dcSSimon Schubert 	      if (!keep_going)
8935796c8dcSSimon Schubert 		break;
8945796c8dcSSimon Schubert 	      /* Skip any extra spaces.  */
8955796c8dcSSimon Schubert 	      for (start = p + 1; *start && *start == ' '; ++start)
8965796c8dcSSimon Schubert 		;
8975796c8dcSSimon Schubert 	    }
8985796c8dcSSimon Schubert 	}
8995796c8dcSSimon Schubert 
9005796c8dcSSimon Schubert       if (pex_get_status (child, 1, &status)
9015796c8dcSSimon Schubert 	  && WIFEXITED (status) && !WEXITSTATUS (status))
9025796c8dcSSimon Schubert 	fail = 0;
9035796c8dcSSimon Schubert 
9045796c8dcSSimon Schubert     }
9055796c8dcSSimon Schubert 
906a45ae5f8SJohn Marino   xfree (iconv_program);
9075796c8dcSSimon Schubert   pex_free (child);
908cf7f2e2dSJohn Marino   free_environ (iconv_env);
9095796c8dcSSimon Schubert 
9105796c8dcSSimon Schubert   if (fail)
9115796c8dcSSimon Schubert     {
9125796c8dcSSimon Schubert       /* Some error occurred, so drop the vector.  */
913*ef5ccd6cSJohn Marino       free_char_ptr_vec (charsets);
914*ef5ccd6cSJohn Marino       charsets = NULL;
9155796c8dcSSimon Schubert     }
9165796c8dcSSimon Schubert   else
9175796c8dcSSimon Schubert     VEC_safe_push (char_ptr, charsets, NULL);
9185796c8dcSSimon Schubert }
9195796c8dcSSimon Schubert 
9205796c8dcSSimon Schubert #endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
9215796c8dcSSimon Schubert #endif /* PHONY_ICONV */
9225796c8dcSSimon Schubert 
923cf7f2e2dSJohn Marino /* The "auto" target charset used by default_auto_charset.  */
924cf7f2e2dSJohn Marino static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
925cf7f2e2dSJohn Marino 
926cf7f2e2dSJohn Marino const char *
default_auto_charset(void)927cf7f2e2dSJohn Marino default_auto_charset (void)
928cf7f2e2dSJohn Marino {
929cf7f2e2dSJohn Marino   return auto_target_charset_name;
930cf7f2e2dSJohn Marino }
931cf7f2e2dSJohn Marino 
932cf7f2e2dSJohn Marino const char *
default_auto_wide_charset(void)933cf7f2e2dSJohn Marino default_auto_wide_charset (void)
934cf7f2e2dSJohn Marino {
935cf7f2e2dSJohn Marino   return GDB_DEFAULT_TARGET_WIDE_CHARSET;
936cf7f2e2dSJohn Marino }
937cf7f2e2dSJohn Marino 
938c50c785cSJohn Marino 
939c50c785cSJohn Marino #ifdef USE_INTERMEDIATE_ENCODING_FUNCTION
940c50c785cSJohn Marino /* Macro used for UTF or UCS endianness suffix.  */
941c50c785cSJohn Marino #if WORDS_BIGENDIAN
942c50c785cSJohn Marino #define ENDIAN_SUFFIX "BE"
943c50c785cSJohn Marino #else
944c50c785cSJohn Marino #define ENDIAN_SUFFIX "LE"
945c50c785cSJohn Marino #endif
946c50c785cSJohn Marino 
947c50c785cSJohn Marino /* The code below serves to generate a compile time error if
948c50c785cSJohn Marino    gdb_wchar_t type is not of size 2 nor 4, despite the fact that
949c50c785cSJohn Marino    macro __STDC_ISO_10646__ is defined.
950c50c785cSJohn Marino    This is better than a gdb_assert call, because GDB cannot handle
951c50c785cSJohn Marino    strings correctly if this size is different.  */
952c50c785cSJohn Marino 
953c50c785cSJohn Marino extern char your_gdb_wchar_t_is_bogus[(sizeof (gdb_wchar_t) == 2
954c50c785cSJohn Marino 				       || sizeof (gdb_wchar_t) == 4)
955c50c785cSJohn Marino 				      ? 1 : -1];
956c50c785cSJohn Marino 
957c50c785cSJohn Marino /* intermediate_encoding returns the charset unsed internally by
958c50c785cSJohn Marino    GDB to convert between target and host encodings. As the test above
959c50c785cSJohn Marino    compiled, sizeof (gdb_wchar_t) is either 2 or 4 bytes.
960c50c785cSJohn Marino    UTF-16/32 is tested first, UCS-2/4 is tested as a second option,
961c50c785cSJohn Marino    otherwise an error is generated.  */
962c50c785cSJohn Marino 
963c50c785cSJohn Marino const char *
intermediate_encoding(void)964c50c785cSJohn Marino intermediate_encoding (void)
965c50c785cSJohn Marino {
966c50c785cSJohn Marino   iconv_t desc;
967c50c785cSJohn Marino   static const char *stored_result = NULL;
968c50c785cSJohn Marino   char *result;
969c50c785cSJohn Marino 
970c50c785cSJohn Marino   if (stored_result)
971c50c785cSJohn Marino     return stored_result;
972c50c785cSJohn Marino   result = xstrprintf ("UTF-%d%s", (int) (sizeof (gdb_wchar_t) * 8),
973c50c785cSJohn Marino 		       ENDIAN_SUFFIX);
974c50c785cSJohn Marino   /* Check that the name is supported by iconv_open.  */
975c50c785cSJohn Marino   desc = iconv_open (result, host_charset ());
976c50c785cSJohn Marino   if (desc != (iconv_t) -1)
977c50c785cSJohn Marino     {
978c50c785cSJohn Marino       iconv_close (desc);
979c50c785cSJohn Marino       stored_result = result;
980c50c785cSJohn Marino       return result;
981c50c785cSJohn Marino     }
982c50c785cSJohn Marino   /* Not valid, free the allocated memory.  */
983c50c785cSJohn Marino   xfree (result);
984c50c785cSJohn Marino   /* Second try, with UCS-2 type.  */
985c50c785cSJohn Marino   result = xstrprintf ("UCS-%d%s", (int) sizeof (gdb_wchar_t),
986c50c785cSJohn Marino 		       ENDIAN_SUFFIX);
987c50c785cSJohn Marino   /* Check that the name is supported by iconv_open.  */
988c50c785cSJohn Marino   desc = iconv_open (result, host_charset ());
989c50c785cSJohn Marino   if (desc != (iconv_t) -1)
990c50c785cSJohn Marino     {
991c50c785cSJohn Marino       iconv_close (desc);
992c50c785cSJohn Marino       stored_result = result;
993c50c785cSJohn Marino       return result;
994c50c785cSJohn Marino     }
995c50c785cSJohn Marino   /* Not valid, free the allocated memory.  */
996c50c785cSJohn Marino   xfree (result);
997c50c785cSJohn Marino   /* No valid charset found, generate error here.  */
998c50c785cSJohn Marino   error (_("Unable to find a vaild charset for string conversions"));
999c50c785cSJohn Marino }
1000c50c785cSJohn Marino 
1001c50c785cSJohn Marino #endif /* USE_INTERMEDIATE_ENCODING_FUNCTION */
1002c50c785cSJohn Marino 
10035796c8dcSSimon Schubert void
_initialize_charset(void)10045796c8dcSSimon Schubert _initialize_charset (void)
10055796c8dcSSimon Schubert {
1006cf7f2e2dSJohn Marino   /* The first element is always "auto".  */
10075796c8dcSSimon Schubert   VEC_safe_push (char_ptr, charsets, xstrdup ("auto"));
10085796c8dcSSimon Schubert   find_charset_names ();
10095796c8dcSSimon Schubert 
10105796c8dcSSimon Schubert   if (VEC_length (char_ptr, charsets) > 1)
10115796c8dcSSimon Schubert     charset_enum = (const char **) VEC_address (char_ptr, charsets);
10125796c8dcSSimon Schubert   else
10135796c8dcSSimon Schubert     charset_enum = default_charset_names;
10145796c8dcSSimon Schubert 
10155796c8dcSSimon Schubert #ifndef PHONY_ICONV
10165796c8dcSSimon Schubert #ifdef HAVE_LANGINFO_CODESET
1017cf7f2e2dSJohn Marino   /* The result of nl_langinfo may be overwritten later.  This may
1018cf7f2e2dSJohn Marino      leak a little memory, if the user later changes the host charset,
1019cf7f2e2dSJohn Marino      but that doesn't matter much.  */
1020cf7f2e2dSJohn Marino   auto_host_charset_name = xstrdup (nl_langinfo (CODESET));
1021c50c785cSJohn Marino   /* Solaris will return `646' here -- but the Solaris iconv then does
1022c50c785cSJohn Marino      not accept this.  Darwin (and maybe FreeBSD) may return "" here,
1023cf7f2e2dSJohn Marino      which GNU libiconv doesn't like (infinite loop).  */
1024cf7f2e2dSJohn Marino   if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name)
10255796c8dcSSimon Schubert     auto_host_charset_name = "ASCII";
1026cf7f2e2dSJohn Marino   auto_target_charset_name = auto_host_charset_name;
1027cf7f2e2dSJohn Marino #elif defined (USE_WIN32API)
1028cf7f2e2dSJohn Marino   {
1029c50c785cSJohn Marino     /* "CP" + x<=5 digits + paranoia.  */
1030c50c785cSJohn Marino     static char w32_host_default_charset[16];
10315796c8dcSSimon Schubert 
1032cf7f2e2dSJohn Marino     snprintf (w32_host_default_charset, sizeof w32_host_default_charset,
1033cf7f2e2dSJohn Marino 	      "CP%d", GetACP());
1034cf7f2e2dSJohn Marino     auto_host_charset_name = w32_host_default_charset;
1035cf7f2e2dSJohn Marino     auto_target_charset_name = auto_host_charset_name;
1036cf7f2e2dSJohn Marino   }
10375796c8dcSSimon Schubert #endif
10385796c8dcSSimon Schubert #endif
10395796c8dcSSimon Schubert 
10405796c8dcSSimon Schubert   add_setshow_enum_cmd ("charset", class_support,
1041cf7f2e2dSJohn Marino 			charset_enum, &host_charset_name, _("\
10425796c8dcSSimon Schubert Set the host and target character sets."), _("\
10435796c8dcSSimon Schubert Show the host and target character sets."), _("\
10445796c8dcSSimon Schubert The `host character set' is the one used by the system GDB is running on.\n\
10455796c8dcSSimon Schubert The `target character set' is the one used by the program being debugged.\n\
10465796c8dcSSimon Schubert You may only use supersets of ASCII for your host character set; GDB does\n\
10475796c8dcSSimon Schubert not support any others.\n\
10485796c8dcSSimon Schubert To see a list of the character sets GDB supports, type `set charset <TAB>'."),
10495796c8dcSSimon Schubert 			/* Note that the sfunc below needs to set
10505796c8dcSSimon Schubert 			   target_charset_name, because the 'set
10515796c8dcSSimon Schubert 			   charset' command sets two variables.  */
10525796c8dcSSimon Schubert 			set_charset_sfunc,
10535796c8dcSSimon Schubert 			show_charset,
10545796c8dcSSimon Schubert 			&setlist, &showlist);
10555796c8dcSSimon Schubert 
10565796c8dcSSimon Schubert   add_setshow_enum_cmd ("host-charset", class_support,
10575796c8dcSSimon Schubert 			charset_enum, &host_charset_name, _("\
10585796c8dcSSimon Schubert Set the host character set."), _("\
10595796c8dcSSimon Schubert Show the host character set."), _("\
10605796c8dcSSimon Schubert The `host character set' is the one used by the system GDB is running on.\n\
10615796c8dcSSimon Schubert You may only use supersets of ASCII for your host character set; GDB does\n\
10625796c8dcSSimon Schubert not support any others.\n\
10635796c8dcSSimon Schubert To see a list of the character sets GDB supports, type `set host-charset <TAB>'."),
10645796c8dcSSimon Schubert 			set_host_charset_sfunc,
10655796c8dcSSimon Schubert 			show_host_charset_name,
10665796c8dcSSimon Schubert 			&setlist, &showlist);
10675796c8dcSSimon Schubert 
10685796c8dcSSimon Schubert   add_setshow_enum_cmd ("target-charset", class_support,
1069cf7f2e2dSJohn Marino 			charset_enum, &target_charset_name, _("\
10705796c8dcSSimon Schubert Set the target character set."), _("\
10715796c8dcSSimon Schubert Show the target character set."), _("\
10725796c8dcSSimon Schubert The `target character set' is the one used by the program being debugged.\n\
10735796c8dcSSimon Schubert GDB translates characters and strings between the host and target\n\
10745796c8dcSSimon Schubert character sets as needed.\n\
10755796c8dcSSimon Schubert To see a list of the character sets GDB supports, type `set target-charset'<TAB>"),
10765796c8dcSSimon Schubert 			set_target_charset_sfunc,
10775796c8dcSSimon Schubert 			show_target_charset_name,
10785796c8dcSSimon Schubert 			&setlist, &showlist);
10795796c8dcSSimon Schubert 
10805796c8dcSSimon Schubert   add_setshow_enum_cmd ("target-wide-charset", class_support,
1081cf7f2e2dSJohn Marino 			charset_enum, &target_wide_charset_name,
10825796c8dcSSimon Schubert 			_("\
10835796c8dcSSimon Schubert Set the target wide character set."), _("\
10845796c8dcSSimon Schubert Show the target wide character set."), _("\
1085c50c785cSJohn Marino The `target wide character set' is the one used by the program being debugged.\
1086c50c785cSJohn Marino \nIn particular it is the encoding used by `wchar_t'.\n\
10875796c8dcSSimon Schubert GDB translates characters and strings between the host and target\n\
10885796c8dcSSimon Schubert character sets as needed.\n\
10895796c8dcSSimon Schubert To see a list of the character sets GDB supports, type\n\
10905796c8dcSSimon Schubert `set target-wide-charset'<TAB>"),
10915796c8dcSSimon Schubert 			set_target_wide_charset_sfunc,
10925796c8dcSSimon Schubert 			show_target_wide_charset_name,
10935796c8dcSSimon Schubert 			&setlist, &showlist);
10945796c8dcSSimon Schubert }
1095