xref: /netbsd-src/external/gpl3/gdb/dist/gdb/charset.c (revision 3117ece4fc4a4ca4489ba793710b60b0d26bab6c)
1 /* Character set conversion support for GDB.
2 
3    Copyright (C) 2001-2024 Free Software Foundation, Inc.
4 
5    This file is part of GDB.
6 
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
19 
20 #include "charset.h"
21 #include "cli/cli-cmds.h"
22 #include "gdbsupport/gdb_obstack.h"
23 #include "gdbsupport/gdb_wait.h"
24 #include "charset-list.h"
25 #include "gdbsupport/environ.h"
26 #include "arch-utils.h"
27 #include "gdbsupport/gdb_vecs.h"
28 #include <ctype.h>
29 
30 #ifdef USE_WIN32API
31 #include <windows.h>
32 #endif
33 
34 /* How GDB's character set support works
35 
36    GDB has three global settings:
37 
38    - The `current host character set' is the character set GDB should
39      use in talking to the user, and which (hopefully) the user's
40      terminal knows how to display properly.  Most users should not
41      change this.
42 
43    - The `current target character set' is the character set the
44      program being debugged uses.
45 
46    - The `current target wide character set' is the wide character set
47      the program being debugged uses, that is, the encoding used for
48      wchar_t.
49 
50    There are commands to set each of these, and mechanisms for
51    choosing reasonable default values.  GDB has a global list of
52    character sets that it can use as its host or target character
53    sets.
54 
55    The header file `charset.h' declares various functions that
56    different pieces of GDB need to perform tasks like:
57 
58    - printing target strings and characters to the user's terminal
59      (mostly target->host conversions),
60 
61    - building target-appropriate representations of strings and
62      characters the user enters in expressions (mostly host->target
63      conversions),
64 
65      and so on.
66 
67    To avoid excessive code duplication and maintenance efforts,
68    GDB simply requires a capable iconv function.  Users on platforms
69    without a suitable iconv can use the GNU iconv library.  */
70 
71 
72 #ifdef PHONY_ICONV
73 
74 #include "extract-store-integer.h"
75 
76 /* Provide a phony iconv that does as little as possible.  Also,
77    arrange for there to be a single available character set.  */
78 
79 #undef GDB_DEFAULT_HOST_CHARSET
80 #ifdef USE_WIN32API
81 # define GDB_DEFAULT_HOST_CHARSET "CP1252"
82 #else
83 # define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
84 #endif
85 #define GDB_DEFAULT_TARGET_CHARSET GDB_DEFAULT_HOST_CHARSET
86 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
87 #undef DEFAULT_CHARSET_NAMES
88 #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
89 
90 #undef iconv_t
91 #define iconv_t int
92 #undef iconv_open
93 #define iconv_open phony_iconv_open
94 #undef iconv
95 #define iconv phony_iconv
96 #undef iconv_close
97 #define iconv_close phony_iconv_close
98 
99 #undef ICONV_CONST
100 #define ICONV_CONST const
101 
102 /* We allow conversions from UTF-32, wchar_t, and the host charset.
103    We allow conversions to wchar_t and the host charset.
104    Return 1 if we are converting from UTF-32BE, 2 if from UTF32-LE,
105    0 otherwise.  This is used as a flag in calls to iconv.  */
106 
107 static iconv_t
108 phony_iconv_open (const char *to, const char *from)
109 {
110   if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
111     return -1;
112 
113   if (!strcmp (from, "UTF-32BE") || !strcmp (from, "UTF-32"))
114     return 1;
115 
116   if (!strcmp (from, "UTF-32LE"))
117     return 2;
118 
119   if (strcmp (from, "wchar_t") && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
120     return -1;
121 
122   return 0;
123 }
124 
125 static int
126 phony_iconv_close (iconv_t arg)
127 {
128   return 0;
129 }
130 
131 static size_t
132 phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
133 	     char **outbuf, size_t *outbytesleft)
134 {
135   if (utf_flag)
136     {
137       enum bfd_endian endian
138 	= utf_flag == 1 ? BFD_ENDIAN_BIG : BFD_ENDIAN_LITTLE;
139       while (*inbytesleft >= 4)
140 	{
141 	  unsigned long c
142 	    = extract_unsigned_integer ((const gdb_byte *)*inbuf, 4, endian);
143 
144 	  if (c >= 256)
145 	    {
146 	      errno = EILSEQ;
147 	      return -1;
148 	    }
149 	  if (*outbytesleft < 1)
150 	    {
151 	      errno = E2BIG;
152 	      return -1;
153 	    }
154 	  **outbuf = c & 0xff;
155 	  ++*outbuf;
156 	  --*outbytesleft;
157 
158 	  *inbuf += 4;
159 	  *inbytesleft -= 4;
160 	}
161       if (*inbytesleft)
162 	{
163 	  /* Partial sequence on input.  */
164 	  errno = EINVAL;
165 	  return -1;
166 	}
167     }
168   else
169     {
170       /* In all other cases we simply copy input bytes to the
171 	 output.  */
172       size_t amt = *inbytesleft;
173 
174       if (amt > *outbytesleft)
175 	amt = *outbytesleft;
176       memcpy (*outbuf, *inbuf, amt);
177       *inbuf += amt;
178       *outbuf += amt;
179       *inbytesleft -= amt;
180       *outbytesleft -= amt;
181       if (*inbytesleft)
182 	{
183 	  errno = E2BIG;
184 	  return -1;
185 	}
186     }
187 
188   /* The number of non-reversible conversions -- but they were all
189      reversible.  */
190   return 0;
191 }
192 
193 #else /* PHONY_ICONV */
194 
195 /* On systems that don't have EILSEQ, GNU iconv's iconv.h defines it
196    to ENOENT, while gnulib defines it to a different value.  Always
197    map ENOENT to gnulib's EILSEQ, leaving callers agnostic.  */
198 
199 static size_t
200 gdb_iconv (iconv_t utf_flag, ICONV_CONST char **inbuf, size_t *inbytesleft,
201 	   char **outbuf, size_t *outbytesleft)
202 {
203   size_t ret;
204 
205   ret = iconv (utf_flag, inbuf, inbytesleft, outbuf, outbytesleft);
206   if (errno == ENOENT)
207     errno = EILSEQ;
208   return ret;
209 }
210 
211 #undef iconv
212 #define iconv gdb_iconv
213 
214 #endif /* PHONY_ICONV */
215 
216 
217 /* The global lists of character sets and translations.  */
218 
219 
220 #ifndef GDB_DEFAULT_TARGET_CHARSET
221 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
222 #endif
223 
224 #ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
225 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
226 #endif
227 
228 static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;
229 static const char *host_charset_name = "auto";
230 static void
231 show_host_charset_name (struct ui_file *file, int from_tty,
232 			struct cmd_list_element *c,
233 			const char *value)
234 {
235   if (!strcmp (value, "auto"))
236     gdb_printf (file,
237 		_("The host character set is \"auto; currently %s\".\n"),
238 		auto_host_charset_name);
239   else
240     gdb_printf (file, _("The host character set is \"%s\".\n"), value);
241 }
242 
243 static const char *target_charset_name = "auto";
244 static void
245 show_target_charset_name (struct ui_file *file, int from_tty,
246 			  struct cmd_list_element *c, const char *value)
247 {
248   if (!strcmp (value, "auto"))
249     gdb_printf (file,
250 		_("The target character set is \"auto; "
251 		  "currently %s\".\n"),
252 		gdbarch_auto_charset (get_current_arch ()));
253   else
254     gdb_printf (file, _("The target character set is \"%s\".\n"),
255 		value);
256 }
257 
258 static const char *target_wide_charset_name = "auto";
259 static void
260 show_target_wide_charset_name (struct ui_file *file,
261 			       int from_tty,
262 			       struct cmd_list_element *c,
263 			       const char *value)
264 {
265   if (!strcmp (value, "auto"))
266     gdb_printf (file,
267 		_("The target wide character set is \"auto; "
268 		  "currently %s\".\n"),
269 		gdbarch_auto_wide_charset (get_current_arch ()));
270   else
271     gdb_printf (file, _("The target wide character set is \"%s\".\n"),
272 		value);
273 }
274 
275 static const char * const default_charset_names[] =
276 {
277   DEFAULT_CHARSET_NAMES
278   0
279 };
280 
281 static const char * const *charset_enum;
282 
283 
284 /* If the target wide character set has big- or little-endian
285    variants, these are the corresponding names.  */
286 static const char *target_wide_charset_be_name;
287 static const char *target_wide_charset_le_name;
288 
289 /* The architecture for which the BE- and LE-names are valid.  */
290 static struct gdbarch *be_le_arch;
291 
292 /* A helper function which sets the target wide big- and little-endian
293    character set names, if possible.  */
294 
295 static void
296 set_be_le_names (struct gdbarch *gdbarch)
297 {
298   if (be_le_arch == gdbarch)
299     return;
300   be_le_arch = gdbarch;
301 
302 #ifdef PHONY_ICONV
303   /* Match the wide charset names recognized by phony_iconv_open.  */
304   target_wide_charset_le_name = "UTF-32LE";
305   target_wide_charset_be_name = "UTF-32BE";
306 #else
307   int i, len;
308   const char *target_wide;
309 
310   target_wide_charset_le_name = NULL;
311   target_wide_charset_be_name = NULL;
312 
313   target_wide = target_wide_charset_name;
314   if (!strcmp (target_wide, "auto"))
315     target_wide = gdbarch_auto_wide_charset (gdbarch);
316 
317   len = strlen (target_wide);
318   for (i = 0; charset_enum[i]; ++i)
319     {
320       if (strncmp (target_wide, charset_enum[i], len))
321 	continue;
322       if ((charset_enum[i][len] == 'B'
323 	   || charset_enum[i][len] == 'L')
324 	  && charset_enum[i][len + 1] == 'E'
325 	  && charset_enum[i][len + 2] == '\0')
326 	{
327 	  if (charset_enum[i][len] == 'B')
328 	    target_wide_charset_be_name = charset_enum[i];
329 	  else
330 	    target_wide_charset_le_name = charset_enum[i];
331 	}
332     }
333 # endif  /* PHONY_ICONV */
334 }
335 
336 /* 'Set charset', 'set host-charset', 'set target-charset', 'set
337    target-wide-charset', 'set charset' sfunc's.  */
338 
339 static void
340 validate (struct gdbarch *gdbarch)
341 {
342   iconv_t desc;
343   const char *host_cset = host_charset ();
344   const char *target_cset = target_charset (gdbarch);
345   const char *target_wide_cset = target_wide_charset_name;
346 
347   if (!strcmp (target_wide_cset, "auto"))
348     target_wide_cset = gdbarch_auto_wide_charset (gdbarch);
349 
350   desc = iconv_open (target_wide_cset, host_cset);
351   if (desc == (iconv_t) -1)
352     error (_("Cannot convert between character sets `%s' and `%s'"),
353 	   target_wide_cset, host_cset);
354   iconv_close (desc);
355 
356   desc = iconv_open (target_cset, host_cset);
357   if (desc == (iconv_t) -1)
358     error (_("Cannot convert between character sets `%s' and `%s'"),
359 	   target_cset, host_cset);
360   iconv_close (desc);
361 
362   /* Clear the cache.  */
363   be_le_arch = NULL;
364 }
365 
366 /* This is the sfunc for the 'set charset' command.  */
367 static void
368 set_charset_sfunc (const char *charset, int from_tty,
369 		   struct cmd_list_element *c)
370 {
371   /* CAREFUL: set the target charset here as well.  */
372   target_charset_name = host_charset_name;
373   validate (get_current_arch ());
374 }
375 
376 /* 'set host-charset' command sfunc.  We need a wrapper here because
377    the function needs to have a specific signature.  */
378 static void
379 set_host_charset_sfunc (const char *charset, int from_tty,
380 			struct cmd_list_element *c)
381 {
382   validate (get_current_arch ());
383 }
384 
385 /* Wrapper for the 'set target-charset' command.  */
386 static void
387 set_target_charset_sfunc (const char *charset, int from_tty,
388 			  struct cmd_list_element *c)
389 {
390   validate (get_current_arch ());
391 }
392 
393 /* Wrapper for the 'set target-wide-charset' command.  */
394 static void
395 set_target_wide_charset_sfunc (const char *charset, int from_tty,
396 			       struct cmd_list_element *c)
397 {
398   validate (get_current_arch ());
399 }
400 
401 /* sfunc for the 'show charset' command.  */
402 static void
403 show_charset (struct ui_file *file, int from_tty,
404 	      struct cmd_list_element *c,
405 	      const char *name)
406 {
407   show_host_charset_name (file, from_tty, c, host_charset_name);
408   show_target_charset_name (file, from_tty, c, target_charset_name);
409   show_target_wide_charset_name (file, from_tty, c,
410 				 target_wide_charset_name);
411 }
412 
413 
414 /* Accessor functions.  */
415 
416 const char *
417 host_charset (void)
418 {
419   if (!strcmp (host_charset_name, "auto"))
420     return auto_host_charset_name;
421   return host_charset_name;
422 }
423 
424 const char *
425 target_charset (struct gdbarch *gdbarch)
426 {
427   if (!strcmp (target_charset_name, "auto"))
428     return gdbarch_auto_charset (gdbarch);
429   return target_charset_name;
430 }
431 
432 const char *
433 target_wide_charset (struct gdbarch *gdbarch)
434 {
435   enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
436 
437   set_be_le_names (gdbarch);
438   if (byte_order == BFD_ENDIAN_BIG)
439     {
440       if (target_wide_charset_be_name)
441 	return target_wide_charset_be_name;
442     }
443   else
444     {
445       if (target_wide_charset_le_name)
446 	return target_wide_charset_le_name;
447     }
448 
449   if (!strcmp (target_wide_charset_name, "auto"))
450     return gdbarch_auto_wide_charset (gdbarch);
451 
452   return target_wide_charset_name;
453 }
454 
455 
456 /* Host character set management.  For the time being, we assume that
457    the host character set is some superset of ASCII.  */
458 
459 char
460 host_letter_to_control_character (char c)
461 {
462   if (c == '?')
463     return 0177;
464   return c & 0237;
465 }
466 
467 
468 /* Public character management functions.  */
469 
470 class iconv_wrapper
471 {
472 public:
473 
474   iconv_wrapper (const char *to, const char *from)
475   {
476     m_desc = iconv_open (to, from);
477     if (m_desc == (iconv_t) -1)
478       perror_with_name (_("Converting character sets"));
479   }
480 
481   ~iconv_wrapper ()
482   {
483     iconv_close (m_desc);
484   }
485 
486   size_t convert (ICONV_CONST char **inp, size_t *inleft, char **outp,
487 		  size_t *outleft)
488   {
489     return iconv (m_desc, inp, inleft, outp, outleft);
490   }
491 
492 private:
493 
494   iconv_t m_desc;
495 };
496 
497 void
498 convert_between_encodings (const char *from, const char *to,
499 			   const gdb_byte *bytes, unsigned int num_bytes,
500 			   int width, struct obstack *output,
501 			   enum transliterations translit)
502 {
503   size_t inleft;
504   ICONV_CONST char *inp;
505   unsigned int space_request;
506 
507   /* Often, the host and target charsets will be the same.  */
508   if (!strcmp (from, to))
509     {
510       obstack_grow (output, bytes, num_bytes);
511       return;
512     }
513 
514   iconv_wrapper desc (to, from);
515 
516   inleft = num_bytes;
517   inp = (ICONV_CONST char *) bytes;
518 
519   space_request = num_bytes;
520 
521   while (inleft > 0)
522     {
523       char *outp;
524       size_t outleft, r;
525       int old_size;
526 
527       old_size = obstack_object_size (output);
528       obstack_blank (output, space_request);
529 
530       outp = (char *) obstack_base (output) + old_size;
531       outleft = space_request;
532 
533       r = desc.convert (&inp, &inleft, &outp, &outleft);
534 
535       /* Now make sure that the object on the obstack only includes
536 	 bytes we have converted.  */
537       obstack_blank_fast (output, -(ssize_t) outleft);
538 
539       if (r == (size_t) -1)
540 	{
541 	  switch (errno)
542 	    {
543 	    case EILSEQ:
544 	      {
545 		int i;
546 
547 		/* Invalid input sequence.  */
548 		if (translit == translit_none)
549 		  error (_("Could not convert character "
550 			   "to `%s' character set"), to);
551 
552 		/* We emit escape sequence for the bytes, skip them,
553 		   and try again.  */
554 		for (i = 0; i < width; ++i)
555 		  {
556 		    char octal[5];
557 
558 		    xsnprintf (octal, sizeof (octal), "\\%.3o", *inp & 0xff);
559 		    obstack_grow_str (output, octal);
560 
561 		    ++inp;
562 		    --inleft;
563 		  }
564 	      }
565 	      break;
566 
567 	    case E2BIG:
568 	      /* We ran out of space in the output buffer.  Make it
569 		 bigger next time around.  */
570 	      space_request *= 2;
571 	      break;
572 
573 	    case EINVAL:
574 	      /* Incomplete input sequence.  FIXME: ought to report this
575 		 to the caller somehow.  */
576 	      inleft = 0;
577 	      break;
578 
579 	    default:
580 	      perror_with_name (_("Internal error while "
581 				  "converting character sets"));
582 	    }
583 	}
584     }
585 }
586 
587 
588 
589 /* Create a new iterator.  */
590 wchar_iterator::wchar_iterator (const gdb_byte *input, size_t bytes,
591 				const char *charset, size_t width)
592 : m_input (input),
593   m_bytes (bytes),
594   m_width (width),
595   m_out (1)
596 {
597   m_desc = iconv_open (INTERMEDIATE_ENCODING, charset);
598   if (m_desc == (iconv_t) -1)
599     perror_with_name (_("Converting character sets"));
600 }
601 
602 wchar_iterator::~wchar_iterator ()
603 {
604   if (m_desc != (iconv_t) -1)
605     iconv_close (m_desc);
606 }
607 
608 int
609 wchar_iterator::iterate (enum wchar_iterate_result *out_result,
610 			 gdb_wchar_t **out_chars,
611 			 const gdb_byte **ptr,
612 			 size_t *len)
613 {
614   size_t out_request;
615 
616   /* Try to convert some characters.  At first we try to convert just
617      a single character.  The reason for this is that iconv does not
618      necessarily update its outgoing arguments when it encounters an
619      invalid input sequence -- but we want to reliably report this to
620      our caller so it can emit an escape sequence.  */
621   out_request = 1;
622   while (m_bytes > 0)
623     {
624       ICONV_CONST char *inptr = (ICONV_CONST char *) m_input;
625       char *outptr = (char *) m_out.data ();
626       const gdb_byte *orig_inptr = m_input;
627       size_t orig_in = m_bytes;
628       size_t out_avail = out_request * sizeof (gdb_wchar_t);
629       size_t num;
630       size_t r = iconv (m_desc, &inptr, &m_bytes, &outptr, &out_avail);
631 
632       m_input = (gdb_byte *) inptr;
633 
634       if (r == (size_t) -1)
635 	{
636 	  switch (errno)
637 	    {
638 	    case EILSEQ:
639 	      /* Invalid input sequence.  We still might have
640 		 converted a character; if so, return it.  */
641 	      if (out_avail < out_request * sizeof (gdb_wchar_t))
642 		break;
643 
644 	      /* Otherwise skip the first invalid character, and let
645 		 the caller know about it.  */
646 	      *out_result = wchar_iterate_invalid;
647 	      *ptr = m_input;
648 	      *len = m_width;
649 	      m_input += m_width;
650 	      m_bytes -= m_width;
651 	      return 0;
652 
653 	    case E2BIG:
654 	      /* We ran out of space.  We still might have converted a
655 		 character; if so, return it.  Otherwise, grow the
656 		 buffer and try again.  */
657 	      if (out_avail < out_request * sizeof (gdb_wchar_t))
658 		break;
659 
660 	      ++out_request;
661 	      if (out_request > m_out.size ())
662 		m_out.resize (out_request);
663 	      continue;
664 
665 	    case EINVAL:
666 	      /* Incomplete input sequence.  Let the caller know, and
667 		 arrange for future calls to see EOF.  */
668 	      *out_result = wchar_iterate_incomplete;
669 	      *ptr = m_input;
670 	      *len = m_bytes;
671 	      m_bytes = 0;
672 	      return 0;
673 
674 	    default:
675 	      perror_with_name (_("Internal error while "
676 				  "converting character sets"));
677 	    }
678 	}
679 
680       /* We converted something.  */
681       num = out_request - out_avail / sizeof (gdb_wchar_t);
682       *out_result = wchar_iterate_ok;
683       *out_chars = m_out.data ();
684       *ptr = orig_inptr;
685       *len = orig_in - m_bytes;
686       return num;
687     }
688 
689   /* Really done.  */
690   *out_result = wchar_iterate_eof;
691   return -1;
692 }
693 
694 struct charset_vector
695 {
696   ~charset_vector ()
697   {
698     /* Note that we do not call charset_vector::clear, which would also xfree
699        the elements.  This destructor is only called after exit, at which point
700        those will be freed anyway on process exit, so not freeing them now is
701        not classified as a memory leak.  OTOH, freeing them now might be
702        classified as a data race, because some worker thread might still be
703        accessing them.  */
704     charsets.clear ();
705   }
706 
707   void clear ()
708   {
709     for (char *c : charsets)
710       xfree (c);
711 
712     charsets.clear ();
713   }
714 
715   std::vector<char *> charsets;
716 };
717 
718 static charset_vector charsets;
719 
720 #ifdef PHONY_ICONV
721 
722 static void
723 find_charset_names (void)
724 {
725   charsets.charsets.push_back (xstrdup (GDB_DEFAULT_HOST_CHARSET));
726   charsets.charsets.push_back (NULL);
727 }
728 
729 #else /* PHONY_ICONV */
730 
731 /* Sometimes, libiconv redefines iconvlist as libiconvlist -- but
732    provides different symbols in the static and dynamic libraries.
733    So, configure may see libiconvlist but not iconvlist.  But, calling
734    iconvlist is the right thing to do and will work.  Hence we do a
735    check here but unconditionally call iconvlist below.  */
736 #if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST)
737 
738 /* A helper function that adds some character sets to the vector of
739    all character sets.  This is a callback function for iconvlist.  */
740 
741 static int
742 add_one (unsigned int count, const char *const *names, void *data)
743 {
744   unsigned int i;
745 
746   for (i = 0; i < count; ++i)
747     charsets.charsets.push_back (xstrdup (names[i]));
748 
749   return 0;
750 }
751 
752 static void
753 find_charset_names (void)
754 {
755   iconvlist (add_one, NULL);
756 
757   charsets.charsets.push_back (NULL);
758 }
759 
760 #else
761 
762 /* Return non-zero if LINE (output from iconv) should be ignored.
763    Older iconv programs (e.g. 2.2.2) include the human readable
764    introduction even when stdout is not a tty.  Newer versions omit
765    the intro if stdout is not a tty.  */
766 
767 static int
768 ignore_line_p (const char *line)
769 {
770   /* This table is used to filter the output.  If this text appears
771      anywhere in the line, it is ignored (strstr is used).  */
772   static const char * const ignore_lines[] =
773     {
774       "The following",
775       "not necessarily",
776       "the FROM and TO",
777       "listed with several",
778       NULL
779     };
780   int i;
781 
782   for (i = 0; ignore_lines[i] != NULL; ++i)
783     {
784       if (strstr (line, ignore_lines[i]) != NULL)
785 	return 1;
786     }
787 
788   return 0;
789 }
790 
791 static void
792 find_charset_names (void)
793 {
794   struct pex_obj *child;
795   const char *args[3];
796   int err, status;
797   int fail = 1;
798   int flags;
799   gdb_environ iconv_env = gdb_environ::from_host_environ ();
800   char *iconv_program;
801 
802   /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is
803      not a tty.  We need to recognize it and ignore it.  This text is
804      subject to translation, so force LANGUAGE=C.  */
805   iconv_env.set ("LANGUAGE", "C");
806   iconv_env.set ("LC_ALL", "C");
807 
808   child = pex_init (PEX_USE_PIPES, "iconv", NULL);
809 
810 #ifdef ICONV_BIN
811   {
812     std::string iconv_dir = relocate_gdb_directory (ICONV_BIN,
813 						    ICONV_BIN_RELOCATABLE);
814     iconv_program
815       = concat (iconv_dir.c_str(), SLASH_STRING, "iconv", (char *) NULL);
816   }
817 #else
818   iconv_program = xstrdup ("iconv");
819 #endif
820   args[0] = iconv_program;
821   args[1] = "-l";
822   args[2] = NULL;
823   flags = PEX_STDERR_TO_STDOUT;
824 #ifndef ICONV_BIN
825   flags |= PEX_SEARCH;
826 #endif
827   /* Note that we simply ignore errors here.  */
828   if (!pex_run_in_environment (child, flags,
829 			       args[0], const_cast<char **> (args),
830 			       iconv_env.envp (),
831 			       NULL, NULL, &err))
832     {
833       FILE *in = pex_read_output (child, 0);
834 
835       /* POSIX says that iconv -l uses an unspecified format.  We
836 	 parse the glibc and libiconv formats; feel free to add others
837 	 as needed.  */
838 
839       while (in != NULL && !feof (in))
840 	{
841 	  /* The size of buf is chosen arbitrarily.  */
842 	  char buf[1024];
843 	  char *start, *r;
844 	  int len;
845 
846 	  r = fgets (buf, sizeof (buf), in);
847 	  if (!r)
848 	    break;
849 	  len = strlen (r);
850 	  if (len <= 3)
851 	    continue;
852 	  if (ignore_line_p (r))
853 	    continue;
854 
855 	  /* Strip off the newline.  */
856 	  --len;
857 	  /* Strip off one or two '/'s.  glibc will print lines like
858 	     "8859_7//", but also "10646-1:1993/UCS4/".  */
859 	  if (buf[len - 1] == '/')
860 	    --len;
861 	  if (buf[len - 1] == '/')
862 	    --len;
863 	  buf[len] = '\0';
864 
865 	  /* libiconv will print multiple entries per line, separated
866 	     by spaces.  Older iconvs will print multiple entries per
867 	     line, indented by two spaces, and separated by ", "
868 	     (i.e. the human readable form).  */
869 	  start = buf;
870 	  while (1)
871 	    {
872 	      int keep_going;
873 	      char *p;
874 
875 	      /* Skip leading blanks.  */
876 	      for (p = start; *p && *p == ' '; ++p)
877 		;
878 	      start = p;
879 	      /* Find the next space, comma, or end-of-line.  */
880 	      for ( ; *p && *p != ' ' && *p != ','; ++p)
881 		;
882 	      /* Ignore an empty result.  */
883 	      if (p == start)
884 		break;
885 	      keep_going = *p;
886 	      *p = '\0';
887 	      charsets.charsets.push_back (xstrdup (start));
888 	      if (!keep_going)
889 		break;
890 	      /* Skip any extra spaces.  */
891 	      for (start = p + 1; *start && *start == ' '; ++start)
892 		;
893 	    }
894 	}
895 
896       if (pex_get_status (child, 1, &status)
897 	  && WIFEXITED (status) && !WEXITSTATUS (status))
898 	fail = 0;
899 
900     }
901 
902   xfree (iconv_program);
903   pex_free (child);
904 
905   if (fail)
906     {
907       /* Some error occurred, so drop the vector.  */
908       charsets.clear ();
909     }
910   else
911     charsets.charsets.push_back (NULL);
912 }
913 
914 #endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
915 #endif /* PHONY_ICONV */
916 
917 /* The "auto" target charset used by default_auto_charset.  */
918 static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
919 
920 const char *
921 default_auto_charset (void)
922 {
923   return auto_target_charset_name;
924 }
925 
926 const char *
927 default_auto_wide_charset (void)
928 {
929   return GDB_DEFAULT_TARGET_WIDE_CHARSET;
930 }
931 
932 
933 #ifdef USE_INTERMEDIATE_ENCODING_FUNCTION
934 /* Macro used for UTF or UCS endianness suffix.  */
935 #if WORDS_BIGENDIAN
936 #define ENDIAN_SUFFIX "BE"
937 #else
938 #define ENDIAN_SUFFIX "LE"
939 #endif
940 
941 /* GDB cannot handle strings correctly if this size is different.  */
942 
943 static_assert (sizeof (gdb_wchar_t) == 2 || sizeof (gdb_wchar_t) == 4);
944 
945 /* intermediate_encoding returns the charset used internally by
946    GDB to convert between target and host encodings. As the test above
947    compiled, sizeof (gdb_wchar_t) is either 2 or 4 bytes.
948    UTF-16/32 is tested first, UCS-2/4 is tested as a second option,
949    otherwise an error is generated.  */
950 
951 const char *
952 intermediate_encoding (void)
953 {
954   iconv_t desc;
955   static const char *stored_result = NULL;
956   gdb::unique_xmalloc_ptr<char> result;
957 
958   if (stored_result)
959     return stored_result;
960   result = xstrprintf ("UTF-%d%s", (int) (sizeof (gdb_wchar_t) * 8),
961 		       ENDIAN_SUFFIX);
962   /* Check that the name is supported by iconv_open.  */
963   desc = iconv_open (result.get (), host_charset ());
964   if (desc != (iconv_t) -1)
965     {
966       iconv_close (desc);
967       stored_result = result.release ();
968       return stored_result;
969     }
970   /* Second try, with UCS-2 type.  */
971   result = xstrprintf ("UCS-%d%s", (int) sizeof (gdb_wchar_t),
972 		       ENDIAN_SUFFIX);
973   /* Check that the name is supported by iconv_open.  */
974   desc = iconv_open (result.get (), host_charset ());
975   if (desc != (iconv_t) -1)
976     {
977       iconv_close (desc);
978       stored_result = result.release ();
979       return stored_result;
980     }
981   /* No valid charset found, generate error here.  */
982   error (_("Unable to find a valid charset for string conversions"));
983 }
984 
985 #endif /* USE_INTERMEDIATE_ENCODING_FUNCTION */
986 
987 void _initialize_charset ();
988 void
989 _initialize_charset ()
990 {
991   /* The first element is always "auto".  */
992   charsets.charsets.push_back (xstrdup ("auto"));
993   find_charset_names ();
994 
995   if (charsets.charsets.size () > 1)
996     charset_enum = (const char * const *) charsets.charsets.data ();
997   else
998     charset_enum = default_charset_names;
999 
1000 #ifndef PHONY_ICONV
1001 #ifdef HAVE_LANGINFO_CODESET
1002   /* The result of nl_langinfo may be overwritten later.  This may
1003      leak a little memory, if the user later changes the host charset,
1004      but that doesn't matter much.  */
1005   auto_host_charset_name = xstrdup (nl_langinfo (CODESET));
1006   /* Solaris will return `646' here -- but the Solaris iconv then does
1007      not accept this.  Darwin (and maybe FreeBSD) may return "" here,
1008      which GNU libiconv doesn't like (infinite loop).  */
1009   if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name)
1010     auto_host_charset_name = "ASCII";
1011   auto_target_charset_name = auto_host_charset_name;
1012 #elif defined (USE_WIN32API)
1013   {
1014     /* "CP" + x<=5 digits + paranoia.  */
1015     static char w32_host_default_charset[16];
1016 
1017     snprintf (w32_host_default_charset, sizeof w32_host_default_charset,
1018 	      "CP%d", GetACP());
1019     auto_host_charset_name = w32_host_default_charset;
1020     auto_target_charset_name = auto_host_charset_name;
1021   }
1022 #endif
1023 #endif
1024 
1025   /* Recall that the first element is always "auto".  */
1026   host_charset_name = charset_enum[0];
1027   gdb_assert (strcmp (host_charset_name, "auto") == 0);
1028   add_setshow_enum_cmd ("charset", class_support,
1029 			charset_enum, &host_charset_name, _("\
1030 Set the host and target character sets."), _("\
1031 Show the host and target character sets."), _("\
1032 The `host character set' is the one used by the system GDB is running on.\n\
1033 The `target character set' is the one used by the program being debugged.\n\
1034 You may only use supersets of ASCII for your host character set; GDB does\n\
1035 not support any others.\n\
1036 To see a list of the character sets GDB supports, type `set charset <TAB>'."),
1037 			/* Note that the sfunc below needs to set
1038 			   target_charset_name, because the 'set
1039 			   charset' command sets two variables.  */
1040 			set_charset_sfunc,
1041 			show_charset,
1042 			&setlist, &showlist);
1043 
1044   add_setshow_enum_cmd ("host-charset", class_support,
1045 			charset_enum, &host_charset_name, _("\
1046 Set the host character set."), _("\
1047 Show the host character set."), _("\
1048 The `host character set' is the one used by the system GDB is running on.\n\
1049 You may only use supersets of ASCII for your host character set; GDB does\n\
1050 not support any others.\n\
1051 To see a list of the character sets GDB supports, type `set host-charset <TAB>'."),
1052 			set_host_charset_sfunc,
1053 			show_host_charset_name,
1054 			&setlist, &showlist);
1055 
1056   /* Recall that the first element is always "auto".  */
1057   target_charset_name = charset_enum[0];
1058   gdb_assert (strcmp (target_charset_name, "auto") == 0);
1059   add_setshow_enum_cmd ("target-charset", class_support,
1060 			charset_enum, &target_charset_name, _("\
1061 Set the target character set."), _("\
1062 Show the target character set."), _("\
1063 The `target character set' is the one used by the program being debugged.\n\
1064 GDB translates characters and strings between the host and target\n\
1065 character sets as needed.\n\
1066 To see a list of the character sets GDB supports, type `set target-charset'<TAB>"),
1067 			set_target_charset_sfunc,
1068 			show_target_charset_name,
1069 			&setlist, &showlist);
1070 
1071   /* Recall that the first element is always "auto".  */
1072   target_wide_charset_name = charset_enum[0];
1073   gdb_assert (strcmp (target_wide_charset_name, "auto") == 0);
1074   add_setshow_enum_cmd ("target-wide-charset", class_support,
1075 			charset_enum, &target_wide_charset_name,
1076 			_("\
1077 Set the target wide character set."), _("\
1078 Show the target wide character set."), _("\
1079 The `target wide character set' is the one used by the program being debugged.\
1080 \nIn particular it is the encoding used by `wchar_t'.\n\
1081 GDB translates characters and strings between the host and target\n\
1082 character sets as needed.\n\
1083 To see a list of the character sets GDB supports, type\n\
1084 `set target-wide-charset'<TAB>"),
1085 			set_target_wide_charset_sfunc,
1086 			show_target_wide_charset_name,
1087 			&setlist, &showlist);
1088 }
1089