xref: /netbsd-src/external/gpl3/gdb/dist/gdb/charset.c (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 /* Character set conversion support for GDB.
2 
3    Copyright (C) 2001-2020 Free Software Foundation, Inc.
4 
5    This file is part of GDB.
6 
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
19 
20 #include "defs.h"
21 #include "charset.h"
22 #include "gdbcmd.h"
23 #include "gdb_obstack.h"
24 #include "gdbsupport/gdb_wait.h"
25 #include "charset-list.h"
26 #include "gdbsupport/environ.h"
27 #include "arch-utils.h"
28 #include "gdbsupport/gdb_vecs.h"
29 #include <ctype.h>
30 
31 #ifdef USE_WIN32API
32 #include <windows.h>
33 #endif
34 
35 /* How GDB's character set support works
36 
37    GDB has three global settings:
38 
39    - The `current host character set' is the character set GDB should
40      use in talking to the user, and which (hopefully) the user's
41      terminal knows how to display properly.  Most users should not
42      change this.
43 
44    - The `current target character set' is the character set the
45      program being debugged uses.
46 
47    - The `current target wide character set' is the wide character set
48      the program being debugged uses, that is, the encoding used for
49      wchar_t.
50 
51    There are commands to set each of these, and mechanisms for
52    choosing reasonable default values.  GDB has a global list of
53    character sets that it can use as its host or target character
54    sets.
55 
56    The header file `charset.h' declares various functions that
57    different pieces of GDB need to perform tasks like:
58 
59    - printing target strings and characters to the user's terminal
60      (mostly target->host conversions),
61 
62    - building target-appropriate representations of strings and
63      characters the user enters in expressions (mostly host->target
64      conversions),
65 
66      and so on.
67 
68    To avoid excessive code duplication and maintenance efforts,
69    GDB simply requires a capable iconv function.  Users on platforms
70    without a suitable iconv can use the GNU iconv library.  */
71 
72 
73 #ifdef PHONY_ICONV
74 
75 /* Provide a phony iconv that does as little as possible.  Also,
76    arrange for there to be a single available character set.  */
77 
78 #undef GDB_DEFAULT_HOST_CHARSET
79 #ifdef USE_WIN32API
80 # define GDB_DEFAULT_HOST_CHARSET "CP1252"
81 #else
82 # define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
83 #endif
84 #define GDB_DEFAULT_TARGET_CHARSET GDB_DEFAULT_HOST_CHARSET
85 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
86 #undef DEFAULT_CHARSET_NAMES
87 #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
88 
89 #undef iconv_t
90 #define iconv_t int
91 #undef iconv_open
92 #define iconv_open phony_iconv_open
93 #undef iconv
94 #define iconv phony_iconv
95 #undef iconv_close
96 #define iconv_close phony_iconv_close
97 
98 #undef ICONV_CONST
99 #define ICONV_CONST const
100 
101 /* We allow conversions from UTF-32, wchar_t, and the host charset.
102    We allow conversions to wchar_t and the host charset.
103    Return 1 if we are converting from UTF-32BE, 2 if from UTF32-LE,
104    0 otherwise.  This is used as a flag in calls to iconv.  */
105 
106 static iconv_t
107 phony_iconv_open (const char *to, const char *from)
108 {
109   if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
110     return -1;
111 
112   if (!strcmp (from, "UTF-32BE") || !strcmp (from, "UTF-32"))
113     return 1;
114 
115   if (!strcmp (from, "UTF-32LE"))
116     return 2;
117 
118   if (strcmp (from, "wchar_t") && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
119     return -1;
120 
121   return 0;
122 }
123 
124 static int
125 phony_iconv_close (iconv_t arg)
126 {
127   return 0;
128 }
129 
130 static size_t
131 phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
132 	     char **outbuf, size_t *outbytesleft)
133 {
134   if (utf_flag)
135     {
136       enum bfd_endian endian
137 	= utf_flag == 1 ? BFD_ENDIAN_BIG : BFD_ENDIAN_LITTLE;
138       while (*inbytesleft >= 4)
139 	{
140 	  unsigned long c
141 	    = extract_unsigned_integer ((const gdb_byte *)*inbuf, 4, endian);
142 
143 	  if (c >= 256)
144 	    {
145 	      errno = EILSEQ;
146 	      return -1;
147 	    }
148 	  if (*outbytesleft < 1)
149 	    {
150 	      errno = E2BIG;
151 	      return -1;
152 	    }
153 	  **outbuf = c & 0xff;
154 	  ++*outbuf;
155 	  --*outbytesleft;
156 
157 	  *inbuf += 4;
158 	  *inbytesleft -= 4;
159 	}
160       if (*inbytesleft)
161 	{
162 	  /* Partial sequence on input.  */
163 	  errno = EINVAL;
164 	  return -1;
165 	}
166     }
167   else
168     {
169       /* In all other cases we simply copy input bytes to the
170 	 output.  */
171       size_t amt = *inbytesleft;
172 
173       if (amt > *outbytesleft)
174 	amt = *outbytesleft;
175       memcpy (*outbuf, *inbuf, amt);
176       *inbuf += amt;
177       *outbuf += amt;
178       *inbytesleft -= amt;
179       *outbytesleft -= amt;
180       if (*inbytesleft)
181 	{
182 	  errno = E2BIG;
183 	  return -1;
184 	}
185     }
186 
187   /* The number of non-reversible conversions -- but they were all
188      reversible.  */
189   return 0;
190 }
191 
192 #else /* PHONY_ICONV */
193 
194 /* On systems that don't have EILSEQ, GNU iconv's iconv.h defines it
195    to ENOENT, while gnulib defines it to a different value.  Always
196    map ENOENT to gnulib's EILSEQ, leaving callers agnostic.  */
197 
198 static size_t
199 gdb_iconv (iconv_t utf_flag, ICONV_CONST char **inbuf, size_t *inbytesleft,
200 	   char **outbuf, size_t *outbytesleft)
201 {
202   size_t ret;
203 
204   ret = iconv (utf_flag, inbuf, inbytesleft, outbuf, outbytesleft);
205   if (errno == ENOENT)
206     errno = EILSEQ;
207   return ret;
208 }
209 
210 #undef iconv
211 #define iconv gdb_iconv
212 
213 #endif /* PHONY_ICONV */
214 
215 
216 /* The global lists of character sets and translations.  */
217 
218 
219 #ifndef GDB_DEFAULT_TARGET_CHARSET
220 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
221 #endif
222 
223 #ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
224 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
225 #endif
226 
227 static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;
228 static const char *host_charset_name = "auto";
229 static void
230 show_host_charset_name (struct ui_file *file, int from_tty,
231 			struct cmd_list_element *c,
232 			const char *value)
233 {
234   if (!strcmp (value, "auto"))
235     fprintf_filtered (file,
236 		      _("The host character set is \"auto; currently %s\".\n"),
237 		      auto_host_charset_name);
238   else
239     fprintf_filtered (file, _("The host character set is \"%s\".\n"), value);
240 }
241 
242 static const char *target_charset_name = "auto";
243 static void
244 show_target_charset_name (struct ui_file *file, int from_tty,
245 			  struct cmd_list_element *c, const char *value)
246 {
247   if (!strcmp (value, "auto"))
248     fprintf_filtered (file,
249 		      _("The target character set is \"auto; "
250 		        "currently %s\".\n"),
251 		      gdbarch_auto_charset (get_current_arch ()));
252   else
253     fprintf_filtered (file, _("The target character set is \"%s\".\n"),
254 		      value);
255 }
256 
257 static const char *target_wide_charset_name = "auto";
258 static void
259 show_target_wide_charset_name (struct ui_file *file,
260 			       int from_tty,
261 			       struct cmd_list_element *c,
262 			       const char *value)
263 {
264   if (!strcmp (value, "auto"))
265     fprintf_filtered (file,
266 		      _("The target wide character set is \"auto; "
267 		        "currently %s\".\n"),
268 		      gdbarch_auto_wide_charset (get_current_arch ()));
269   else
270     fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
271 		      value);
272 }
273 
274 static const char *default_charset_names[] =
275 {
276   DEFAULT_CHARSET_NAMES
277   0
278 };
279 
280 static const char **charset_enum;
281 
282 
283 /* If the target wide character set has big- or little-endian
284    variants, these are the corresponding names.  */
285 static const char *target_wide_charset_be_name;
286 static const char *target_wide_charset_le_name;
287 
288 /* The architecture for which the BE- and LE-names are valid.  */
289 static struct gdbarch *be_le_arch;
290 
291 /* A helper function which sets the target wide big- and little-endian
292    character set names, if possible.  */
293 
294 static void
295 set_be_le_names (struct gdbarch *gdbarch)
296 {
297   if (be_le_arch == gdbarch)
298     return;
299   be_le_arch = gdbarch;
300 
301 #ifdef PHONY_ICONV
302   /* Match the wide charset names recognized by phony_iconv_open.  */
303   target_wide_charset_le_name = "UTF-32LE";
304   target_wide_charset_be_name = "UTF-32BE";
305 #else
306   int i, len;
307   const char *target_wide;
308 
309   target_wide_charset_le_name = NULL;
310   target_wide_charset_be_name = NULL;
311 
312   target_wide = target_wide_charset_name;
313   if (!strcmp (target_wide, "auto"))
314     target_wide = gdbarch_auto_wide_charset (gdbarch);
315 
316   len = strlen (target_wide);
317   for (i = 0; charset_enum[i]; ++i)
318     {
319       if (strncmp (target_wide, charset_enum[i], len))
320 	continue;
321       if ((charset_enum[i][len] == 'B'
322 	   || charset_enum[i][len] == 'L')
323 	  && charset_enum[i][len + 1] == 'E'
324 	  && charset_enum[i][len + 2] == '\0')
325 	{
326 	  if (charset_enum[i][len] == 'B')
327 	    target_wide_charset_be_name = charset_enum[i];
328 	  else
329 	    target_wide_charset_le_name = charset_enum[i];
330 	}
331     }
332 # endif  /* PHONY_ICONV */
333 }
334 
335 /* 'Set charset', 'set host-charset', 'set target-charset', 'set
336    target-wide-charset', 'set charset' sfunc's.  */
337 
338 static void
339 validate (struct gdbarch *gdbarch)
340 {
341   iconv_t desc;
342   const char *host_cset = host_charset ();
343   const char *target_cset = target_charset (gdbarch);
344   const char *target_wide_cset = target_wide_charset_name;
345 
346   if (!strcmp (target_wide_cset, "auto"))
347     target_wide_cset = gdbarch_auto_wide_charset (gdbarch);
348 
349   desc = iconv_open (target_wide_cset, host_cset);
350   if (desc == (iconv_t) -1)
351     error (_("Cannot convert between character sets `%s' and `%s'"),
352 	   target_wide_cset, host_cset);
353   iconv_close (desc);
354 
355   desc = iconv_open (target_cset, host_cset);
356   if (desc == (iconv_t) -1)
357     error (_("Cannot convert between character sets `%s' and `%s'"),
358 	   target_cset, host_cset);
359   iconv_close (desc);
360 
361   /* Clear the cache.  */
362   be_le_arch = NULL;
363 }
364 
365 /* This is the sfunc for the 'set charset' command.  */
366 static void
367 set_charset_sfunc (const char *charset, int from_tty,
368 		   struct cmd_list_element *c)
369 {
370   /* CAREFUL: set the target charset here as well.  */
371   target_charset_name = host_charset_name;
372   validate (get_current_arch ());
373 }
374 
375 /* 'set host-charset' command sfunc.  We need a wrapper here because
376    the function needs to have a specific signature.  */
377 static void
378 set_host_charset_sfunc (const char *charset, int from_tty,
379 			struct cmd_list_element *c)
380 {
381   validate (get_current_arch ());
382 }
383 
384 /* Wrapper for the 'set target-charset' command.  */
385 static void
386 set_target_charset_sfunc (const char *charset, int from_tty,
387 			  struct cmd_list_element *c)
388 {
389   validate (get_current_arch ());
390 }
391 
392 /* Wrapper for the 'set target-wide-charset' command.  */
393 static void
394 set_target_wide_charset_sfunc (const char *charset, int from_tty,
395 			       struct cmd_list_element *c)
396 {
397   validate (get_current_arch ());
398 }
399 
400 /* sfunc for the 'show charset' command.  */
401 static void
402 show_charset (struct ui_file *file, int from_tty,
403 	      struct cmd_list_element *c,
404 	      const char *name)
405 {
406   show_host_charset_name (file, from_tty, c, host_charset_name);
407   show_target_charset_name (file, from_tty, c, target_charset_name);
408   show_target_wide_charset_name (file, from_tty, c,
409 				 target_wide_charset_name);
410 }
411 
412 
413 /* Accessor functions.  */
414 
415 const char *
416 host_charset (void)
417 {
418   if (!strcmp (host_charset_name, "auto"))
419     return auto_host_charset_name;
420   return host_charset_name;
421 }
422 
423 const char *
424 target_charset (struct gdbarch *gdbarch)
425 {
426   if (!strcmp (target_charset_name, "auto"))
427     return gdbarch_auto_charset (gdbarch);
428   return target_charset_name;
429 }
430 
431 const char *
432 target_wide_charset (struct gdbarch *gdbarch)
433 {
434   enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
435 
436   set_be_le_names (gdbarch);
437   if (byte_order == BFD_ENDIAN_BIG)
438     {
439       if (target_wide_charset_be_name)
440 	return target_wide_charset_be_name;
441     }
442   else
443     {
444       if (target_wide_charset_le_name)
445 	return target_wide_charset_le_name;
446     }
447 
448   if (!strcmp (target_wide_charset_name, "auto"))
449     return gdbarch_auto_wide_charset (gdbarch);
450 
451   return target_wide_charset_name;
452 }
453 
454 
455 /* Host character set management.  For the time being, we assume that
456    the host character set is some superset of ASCII.  */
457 
458 char
459 host_letter_to_control_character (char c)
460 {
461   if (c == '?')
462     return 0177;
463   return c & 0237;
464 }
465 
466 /* Convert a host character, C, to its hex value.  C must already have
467    been validated using isxdigit.  */
468 
469 int
470 host_hex_value (char c)
471 {
472   if (isdigit (c))
473     return c - '0';
474   if (c >= 'a' && c <= 'f')
475     return 10 + c - 'a';
476   gdb_assert (c >= 'A' && c <= 'F');
477   return 10 + c - 'A';
478 }
479 
480 
481 /* Public character management functions.  */
482 
483 class iconv_wrapper
484 {
485 public:
486 
487   iconv_wrapper (const char *to, const char *from)
488   {
489     m_desc = iconv_open (to, from);
490     if (m_desc == (iconv_t) -1)
491       perror_with_name (_("Converting character sets"));
492   }
493 
494   ~iconv_wrapper ()
495   {
496     iconv_close (m_desc);
497   }
498 
499   size_t convert (ICONV_CONST char **inp, size_t *inleft, char **outp,
500 		  size_t *outleft)
501   {
502     return iconv (m_desc, inp, inleft, outp, outleft);
503   }
504 
505 private:
506 
507   iconv_t m_desc;
508 };
509 
510 void
511 convert_between_encodings (const char *from, const char *to,
512 			   const gdb_byte *bytes, unsigned int num_bytes,
513 			   int width, struct obstack *output,
514 			   enum transliterations translit)
515 {
516   size_t inleft;
517   ICONV_CONST char *inp;
518   unsigned int space_request;
519 
520   /* Often, the host and target charsets will be the same.  */
521   if (!strcmp (from, to))
522     {
523       obstack_grow (output, bytes, num_bytes);
524       return;
525     }
526 
527   iconv_wrapper desc (to, from);
528 
529   inleft = num_bytes;
530   inp = (ICONV_CONST char *) bytes;
531 
532   space_request = num_bytes;
533 
534   while (inleft > 0)
535     {
536       char *outp;
537       size_t outleft, r;
538       int old_size;
539 
540       old_size = obstack_object_size (output);
541       obstack_blank (output, space_request);
542 
543       outp = (char *) obstack_base (output) + old_size;
544       outleft = space_request;
545 
546       r = desc.convert (&inp, &inleft, &outp, &outleft);
547 
548       /* Now make sure that the object on the obstack only includes
549 	 bytes we have converted.  */
550       obstack_blank_fast (output, -(ssize_t) outleft);
551 
552       if (r == (size_t) -1)
553 	{
554 	  switch (errno)
555 	    {
556 	    case EILSEQ:
557 	      {
558 		int i;
559 
560 		/* Invalid input sequence.  */
561 		if (translit == translit_none)
562 		  error (_("Could not convert character "
563 			   "to `%s' character set"), to);
564 
565 		/* We emit escape sequence for the bytes, skip them,
566 		   and try again.  */
567 		for (i = 0; i < width; ++i)
568 		  {
569 		    char octal[5];
570 
571 		    xsnprintf (octal, sizeof (octal), "\\%.3o", *inp & 0xff);
572 		    obstack_grow_str (output, octal);
573 
574 		    ++inp;
575 		    --inleft;
576 		  }
577 	      }
578 	      break;
579 
580 	    case E2BIG:
581 	      /* We ran out of space in the output buffer.  Make it
582 		 bigger next time around.  */
583 	      space_request *= 2;
584 	      break;
585 
586 	    case EINVAL:
587 	      /* Incomplete input sequence.  FIXME: ought to report this
588 		 to the caller somehow.  */
589 	      inleft = 0;
590 	      break;
591 
592 	    default:
593 	      perror_with_name (_("Internal error while "
594 				  "converting character sets"));
595 	    }
596 	}
597     }
598 }
599 
600 
601 
602 /* Create a new iterator.  */
603 wchar_iterator::wchar_iterator (const gdb_byte *input, size_t bytes,
604 				const char *charset, size_t width)
605 : m_input (input),
606   m_bytes (bytes),
607   m_width (width),
608   m_out (1)
609 {
610   m_desc = iconv_open (INTERMEDIATE_ENCODING, charset);
611   if (m_desc == (iconv_t) -1)
612     perror_with_name (_("Converting character sets"));
613 }
614 
615 wchar_iterator::~wchar_iterator ()
616 {
617   if (m_desc != (iconv_t) -1)
618     iconv_close (m_desc);
619 }
620 
621 int
622 wchar_iterator::iterate (enum wchar_iterate_result *out_result,
623 			 gdb_wchar_t **out_chars,
624 			 const gdb_byte **ptr,
625 			 size_t *len)
626 {
627   size_t out_request;
628 
629   /* Try to convert some characters.  At first we try to convert just
630      a single character.  The reason for this is that iconv does not
631      necessarily update its outgoing arguments when it encounters an
632      invalid input sequence -- but we want to reliably report this to
633      our caller so it can emit an escape sequence.  */
634   out_request = 1;
635   while (m_bytes > 0)
636     {
637       ICONV_CONST char *inptr = (ICONV_CONST char *) m_input;
638       char *outptr = (char *) m_out.data ();
639       const gdb_byte *orig_inptr = m_input;
640       size_t orig_in = m_bytes;
641       size_t out_avail = out_request * sizeof (gdb_wchar_t);
642       size_t num;
643       size_t r = iconv (m_desc, &inptr, &m_bytes, &outptr, &out_avail);
644 
645       m_input = (gdb_byte *) inptr;
646 
647       if (r == (size_t) -1)
648 	{
649 	  switch (errno)
650 	    {
651 	    case EILSEQ:
652 	      /* Invalid input sequence.  We still might have
653 		 converted a character; if so, return it.  */
654 	      if (out_avail < out_request * sizeof (gdb_wchar_t))
655 		break;
656 
657 	      /* Otherwise skip the first invalid character, and let
658 		 the caller know about it.  */
659 	      *out_result = wchar_iterate_invalid;
660 	      *ptr = m_input;
661 	      *len = m_width;
662 	      m_input += m_width;
663 	      m_bytes -= m_width;
664 	      return 0;
665 
666 	    case E2BIG:
667 	      /* We ran out of space.  We still might have converted a
668 		 character; if so, return it.  Otherwise, grow the
669 		 buffer and try again.  */
670 	      if (out_avail < out_request * sizeof (gdb_wchar_t))
671 		break;
672 
673 	      ++out_request;
674 	      if (out_request > m_out.size ())
675 		m_out.resize (out_request);
676 	      continue;
677 
678 	    case EINVAL:
679 	      /* Incomplete input sequence.  Let the caller know, and
680 		 arrange for future calls to see EOF.  */
681 	      *out_result = wchar_iterate_incomplete;
682 	      *ptr = m_input;
683 	      *len = m_bytes;
684 	      m_bytes = 0;
685 	      return 0;
686 
687 	    default:
688 	      perror_with_name (_("Internal error while "
689 				  "converting character sets"));
690 	    }
691 	}
692 
693       /* We converted something.  */
694       num = out_request - out_avail / sizeof (gdb_wchar_t);
695       *out_result = wchar_iterate_ok;
696       *out_chars = m_out.data ();
697       *ptr = orig_inptr;
698       *len = orig_in - m_bytes;
699       return num;
700     }
701 
702   /* Really done.  */
703   *out_result = wchar_iterate_eof;
704   return -1;
705 }
706 
707 struct charset_vector
708 {
709   ~charset_vector ()
710   {
711     clear ();
712   }
713 
714   void clear ()
715   {
716     for (char *c : charsets)
717       xfree (c);
718 
719     charsets.clear ();
720   }
721 
722   std::vector<char *> charsets;
723 };
724 
725 static charset_vector charsets;
726 
727 #ifdef PHONY_ICONV
728 
729 static void
730 find_charset_names (void)
731 {
732   charsets.charsets.push_back (xstrdup (GDB_DEFAULT_HOST_CHARSET));
733   charsets.charsets.push_back (NULL);
734 }
735 
736 #else /* PHONY_ICONV */
737 
738 /* Sometimes, libiconv redefines iconvlist as libiconvlist -- but
739    provides different symbols in the static and dynamic libraries.
740    So, configure may see libiconvlist but not iconvlist.  But, calling
741    iconvlist is the right thing to do and will work.  Hence we do a
742    check here but unconditionally call iconvlist below.  */
743 #if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST)
744 
745 /* A helper function that adds some character sets to the vector of
746    all character sets.  This is a callback function for iconvlist.  */
747 
748 static int
749 add_one (unsigned int count, const char *const *names, void *data)
750 {
751   unsigned int i;
752 
753   for (i = 0; i < count; ++i)
754     charsets.charsets.push_back (xstrdup (names[i]));
755 
756   return 0;
757 }
758 
759 static void
760 find_charset_names (void)
761 {
762   iconvlist (add_one, NULL);
763 
764   charsets.charsets.push_back (NULL);
765 }
766 
767 #else
768 
769 /* Return non-zero if LINE (output from iconv) should be ignored.
770    Older iconv programs (e.g. 2.2.2) include the human readable
771    introduction even when stdout is not a tty.  Newer versions omit
772    the intro if stdout is not a tty.  */
773 
774 static int
775 ignore_line_p (const char *line)
776 {
777   /* This table is used to filter the output.  If this text appears
778      anywhere in the line, it is ignored (strstr is used).  */
779   static const char * const ignore_lines[] =
780     {
781       "The following",
782       "not necessarily",
783       "the FROM and TO",
784       "listed with several",
785       NULL
786     };
787   int i;
788 
789   for (i = 0; ignore_lines[i] != NULL; ++i)
790     {
791       if (strstr (line, ignore_lines[i]) != NULL)
792 	return 1;
793     }
794 
795   return 0;
796 }
797 
798 static void
799 find_charset_names (void)
800 {
801   struct pex_obj *child;
802   const char *args[3];
803   int err, status;
804   int fail = 1;
805   int flags;
806   gdb_environ iconv_env = gdb_environ::from_host_environ ();
807   char *iconv_program;
808 
809   /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is
810      not a tty.  We need to recognize it and ignore it.  This text is
811      subject to translation, so force LANGUAGE=C.  */
812   iconv_env.set ("LANGUAGE", "C");
813   iconv_env.set ("LC_ALL", "C");
814 
815   child = pex_init (PEX_USE_PIPES, "iconv", NULL);
816 
817 #ifdef ICONV_BIN
818   {
819     std::string iconv_dir = relocate_gdb_directory (ICONV_BIN,
820 						    ICONV_BIN_RELOCATABLE);
821     iconv_program
822       = concat (iconv_dir.c_str(), SLASH_STRING, "iconv", (char *) NULL);
823   }
824 #else
825   iconv_program = xstrdup ("iconv");
826 #endif
827   args[0] = iconv_program;
828   args[1] = "-l";
829   args[2] = NULL;
830   flags = PEX_STDERR_TO_STDOUT;
831 #ifndef ICONV_BIN
832   flags |= PEX_SEARCH;
833 #endif
834   /* Note that we simply ignore errors here.  */
835   if (!pex_run_in_environment (child, flags,
836 			       args[0], const_cast<char **> (args),
837 			       iconv_env.envp (),
838 			       NULL, NULL, &err))
839     {
840       FILE *in = pex_read_output (child, 0);
841 
842       /* POSIX says that iconv -l uses an unspecified format.  We
843 	 parse the glibc and libiconv formats; feel free to add others
844 	 as needed.  */
845 
846       while (in != NULL && !feof (in))
847 	{
848 	  /* The size of buf is chosen arbitrarily.  */
849 	  char buf[1024];
850 	  char *start, *r;
851 	  int len;
852 
853 	  r = fgets (buf, sizeof (buf), in);
854 	  if (!r)
855 	    break;
856 	  len = strlen (r);
857 	  if (len <= 3)
858 	    continue;
859 	  if (ignore_line_p (r))
860 	    continue;
861 
862 	  /* Strip off the newline.  */
863 	  --len;
864 	  /* Strip off one or two '/'s.  glibc will print lines like
865 	     "8859_7//", but also "10646-1:1993/UCS4/".  */
866 	  if (buf[len - 1] == '/')
867 	    --len;
868 	  if (buf[len - 1] == '/')
869 	    --len;
870 	  buf[len] = '\0';
871 
872 	  /* libiconv will print multiple entries per line, separated
873 	     by spaces.  Older iconvs will print multiple entries per
874 	     line, indented by two spaces, and separated by ", "
875 	     (i.e. the human readable form).  */
876 	  start = buf;
877 	  while (1)
878 	    {
879 	      int keep_going;
880 	      char *p;
881 
882 	      /* Skip leading blanks.  */
883 	      for (p = start; *p && *p == ' '; ++p)
884 		;
885 	      start = p;
886 	      /* Find the next space, comma, or end-of-line.  */
887 	      for ( ; *p && *p != ' ' && *p != ','; ++p)
888 		;
889 	      /* Ignore an empty result.  */
890 	      if (p == start)
891 		break;
892 	      keep_going = *p;
893 	      *p = '\0';
894 	      charsets.charsets.push_back (xstrdup (start));
895 	      if (!keep_going)
896 		break;
897 	      /* Skip any extra spaces.  */
898 	      for (start = p + 1; *start && *start == ' '; ++start)
899 		;
900 	    }
901 	}
902 
903       if (pex_get_status (child, 1, &status)
904 	  && WIFEXITED (status) && !WEXITSTATUS (status))
905 	fail = 0;
906 
907     }
908 
909   xfree (iconv_program);
910   pex_free (child);
911 
912   if (fail)
913     {
914       /* Some error occurred, so drop the vector.  */
915       charsets.clear ();
916     }
917   else
918     charsets.charsets.push_back (NULL);
919 }
920 
921 #endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
922 #endif /* PHONY_ICONV */
923 
924 /* The "auto" target charset used by default_auto_charset.  */
925 static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
926 
927 const char *
928 default_auto_charset (void)
929 {
930   return auto_target_charset_name;
931 }
932 
933 const char *
934 default_auto_wide_charset (void)
935 {
936   return GDB_DEFAULT_TARGET_WIDE_CHARSET;
937 }
938 
939 
940 #ifdef USE_INTERMEDIATE_ENCODING_FUNCTION
941 /* Macro used for UTF or UCS endianness suffix.  */
942 #if WORDS_BIGENDIAN
943 #define ENDIAN_SUFFIX "BE"
944 #else
945 #define ENDIAN_SUFFIX "LE"
946 #endif
947 
948 /* GDB cannot handle strings correctly if this size is different.  */
949 
950 gdb_static_assert (sizeof (gdb_wchar_t) == 2 || sizeof (gdb_wchar_t) == 4);
951 
952 /* intermediate_encoding returns the charset used internally by
953    GDB to convert between target and host encodings. As the test above
954    compiled, sizeof (gdb_wchar_t) is either 2 or 4 bytes.
955    UTF-16/32 is tested first, UCS-2/4 is tested as a second option,
956    otherwise an error is generated.  */
957 
958 const char *
959 intermediate_encoding (void)
960 {
961   iconv_t desc;
962   static const char *stored_result = NULL;
963   char *result;
964 
965   if (stored_result)
966     return stored_result;
967   result = xstrprintf ("UTF-%d%s", (int) (sizeof (gdb_wchar_t) * 8),
968 		       ENDIAN_SUFFIX);
969   /* Check that the name is supported by iconv_open.  */
970   desc = iconv_open (result, host_charset ());
971   if (desc != (iconv_t) -1)
972     {
973       iconv_close (desc);
974       stored_result = result;
975       return result;
976     }
977   /* Not valid, free the allocated memory.  */
978   xfree (result);
979   /* Second try, with UCS-2 type.  */
980   result = xstrprintf ("UCS-%d%s", (int) sizeof (gdb_wchar_t),
981 		       ENDIAN_SUFFIX);
982   /* Check that the name is supported by iconv_open.  */
983   desc = iconv_open (result, host_charset ());
984   if (desc != (iconv_t) -1)
985     {
986       iconv_close (desc);
987       stored_result = result;
988       return result;
989     }
990   /* Not valid, free the allocated memory.  */
991   xfree (result);
992   /* No valid charset found, generate error here.  */
993   error (_("Unable to find a valid charset for string conversions"));
994 }
995 
996 #endif /* USE_INTERMEDIATE_ENCODING_FUNCTION */
997 
998 void _initialize_charset ();
999 void
1000 _initialize_charset ()
1001 {
1002   /* The first element is always "auto".  */
1003   charsets.charsets.push_back (xstrdup ("auto"));
1004   find_charset_names ();
1005 
1006   if (charsets.charsets.size () > 1)
1007     charset_enum = (const char **) charsets.charsets.data ();
1008   else
1009     charset_enum = default_charset_names;
1010 
1011 #ifndef PHONY_ICONV
1012 #ifdef HAVE_LANGINFO_CODESET
1013   /* The result of nl_langinfo may be overwritten later.  This may
1014      leak a little memory, if the user later changes the host charset,
1015      but that doesn't matter much.  */
1016   auto_host_charset_name = xstrdup (nl_langinfo (CODESET));
1017   /* Solaris will return `646' here -- but the Solaris iconv then does
1018      not accept this.  Darwin (and maybe FreeBSD) may return "" here,
1019      which GNU libiconv doesn't like (infinite loop).  */
1020   if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name)
1021     auto_host_charset_name = "ASCII";
1022   auto_target_charset_name = auto_host_charset_name;
1023 #elif defined (USE_WIN32API)
1024   {
1025     /* "CP" + x<=5 digits + paranoia.  */
1026     static char w32_host_default_charset[16];
1027 
1028     snprintf (w32_host_default_charset, sizeof w32_host_default_charset,
1029 	      "CP%d", GetACP());
1030     auto_host_charset_name = w32_host_default_charset;
1031     auto_target_charset_name = auto_host_charset_name;
1032   }
1033 #endif
1034 #endif
1035 
1036   add_setshow_enum_cmd ("charset", class_support,
1037 			charset_enum, &host_charset_name, _("\
1038 Set the host and target character sets."), _("\
1039 Show the host and target character sets."), _("\
1040 The `host character set' is the one used by the system GDB is running on.\n\
1041 The `target character set' is the one used by the program being debugged.\n\
1042 You may only use supersets of ASCII for your host character set; GDB does\n\
1043 not support any others.\n\
1044 To see a list of the character sets GDB supports, type `set charset <TAB>'."),
1045 			/* Note that the sfunc below needs to set
1046 			   target_charset_name, because the 'set
1047 			   charset' command sets two variables.  */
1048 			set_charset_sfunc,
1049 			show_charset,
1050 			&setlist, &showlist);
1051 
1052   add_setshow_enum_cmd ("host-charset", class_support,
1053 			charset_enum, &host_charset_name, _("\
1054 Set the host character set."), _("\
1055 Show the host character set."), _("\
1056 The `host character set' is the one used by the system GDB is running on.\n\
1057 You may only use supersets of ASCII for your host character set; GDB does\n\
1058 not support any others.\n\
1059 To see a list of the character sets GDB supports, type `set host-charset <TAB>'."),
1060 			set_host_charset_sfunc,
1061 			show_host_charset_name,
1062 			&setlist, &showlist);
1063 
1064   add_setshow_enum_cmd ("target-charset", class_support,
1065 			charset_enum, &target_charset_name, _("\
1066 Set the target character set."), _("\
1067 Show the target character set."), _("\
1068 The `target character set' is the one used by the program being debugged.\n\
1069 GDB translates characters and strings between the host and target\n\
1070 character sets as needed.\n\
1071 To see a list of the character sets GDB supports, type `set target-charset'<TAB>"),
1072 			set_target_charset_sfunc,
1073 			show_target_charset_name,
1074 			&setlist, &showlist);
1075 
1076   add_setshow_enum_cmd ("target-wide-charset", class_support,
1077 			charset_enum, &target_wide_charset_name,
1078 			_("\
1079 Set the target wide character set."), _("\
1080 Show the target wide character set."), _("\
1081 The `target wide character set' is the one used by the program being debugged.\
1082 \nIn particular it is the encoding used by `wchar_t'.\n\
1083 GDB translates characters and strings between the host and target\n\
1084 character sets as needed.\n\
1085 To see a list of the character sets GDB supports, type\n\
1086 `set target-wide-charset'<TAB>"),
1087 			set_target_wide_charset_sfunc,
1088 			show_target_wide_charset_name,
1089 			&setlist, &showlist);
1090 }
1091