xref: /netbsd-src/external/gpl3/gdb/dist/gdb/charset.c (revision a24efa7dea9f1f56c3bdb15a927d3516792ace1c)
1 /* Character set conversion support for GDB.
2 
3    Copyright (C) 2001-2015 Free Software Foundation, Inc.
4 
5    This file is part of GDB.
6 
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
19 
20 #include "defs.h"
21 #include "charset.h"
22 #include "gdbcmd.h"
23 #include "gdb_obstack.h"
24 #include "gdb_wait.h"
25 #include "charset-list.h"
26 #include "vec.h"
27 #include "environ.h"
28 #include "arch-utils.h"
29 #include "gdb_vecs.h"
30 #include <ctype.h>
31 
32 #ifdef USE_WIN32API
33 #include <windows.h>
34 #endif
35 
36 /* How GDB's character set support works
37 
38    GDB has three global settings:
39 
40    - The `current host character set' is the character set GDB should
41      use in talking to the user, and which (hopefully) the user's
42      terminal knows how to display properly.  Most users should not
43      change this.
44 
45    - The `current target character set' is the character set the
46      program being debugged uses.
47 
48    - The `current target wide character set' is the wide character set
49      the program being debugged uses, that is, the encoding used for
50      wchar_t.
51 
52    There are commands to set each of these, and mechanisms for
53    choosing reasonable default values.  GDB has a global list of
54    character sets that it can use as its host or target character
55    sets.
56 
57    The header file `charset.h' declares various functions that
58    different pieces of GDB need to perform tasks like:
59 
60    - printing target strings and characters to the user's terminal
61      (mostly target->host conversions),
62 
63    - building target-appropriate representations of strings and
64      characters the user enters in expressions (mostly host->target
65      conversions),
66 
67      and so on.
68 
69    To avoid excessive code duplication and maintenance efforts,
70    GDB simply requires a capable iconv function.  Users on platforms
71    without a suitable iconv can use the GNU iconv library.  */
72 
73 
74 #ifdef PHONY_ICONV
75 
76 /* Provide a phony iconv that does as little as possible.  Also,
77    arrange for there to be a single available character set.  */
78 
79 #undef GDB_DEFAULT_HOST_CHARSET
80 #define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
81 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
82 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
83 #undef DEFAULT_CHARSET_NAMES
84 #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
85 
86 #undef iconv_t
87 #define iconv_t int
88 #undef iconv_open
89 #define iconv_open phony_iconv_open
90 #undef iconv
91 #define iconv phony_iconv
92 #undef iconv_close
93 #define iconv_close phony_iconv_close
94 
95 #undef ICONV_CONST
96 #define ICONV_CONST const
97 
98 static iconv_t
99 phony_iconv_open (const char *to, const char *from)
100 {
101   /* We allow conversions from UTF-32BE, wchar_t, and the host charset.
102      We allow conversions to wchar_t and the host charset.  */
103   if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t")
104       && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
105     return -1;
106   if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
107     return -1;
108 
109   /* Return 1 if we are converting from UTF-32BE, 0 otherwise.  This is
110      used as a flag in calls to iconv.  */
111   return !strcmp (from, "UTF-32BE");
112 }
113 
114 static int
115 phony_iconv_close (iconv_t arg)
116 {
117   return 0;
118 }
119 
120 static size_t
121 phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
122 	     char **outbuf, size_t *outbytesleft)
123 {
124   if (utf_flag)
125     {
126       while (*inbytesleft >= 4)
127 	{
128 	  size_t j;
129 	  unsigned long c = 0;
130 
131 	  for (j = 0; j < 4; ++j)
132 	    {
133 	      c <<= 8;
134 	      c += (*inbuf)[j] & 0xff;
135 	    }
136 
137 	  if (c >= 256)
138 	    {
139 	      errno = EILSEQ;
140 	      return -1;
141 	    }
142 	  **outbuf = c & 0xff;
143 	  ++*outbuf;
144 	  --*outbytesleft;
145 
146 	  ++*inbuf;
147 	  *inbytesleft -= 4;
148 	}
149       if (*inbytesleft < 4)
150 	{
151 	  errno = EINVAL;
152 	  return -1;
153 	}
154     }
155   else
156     {
157       /* In all other cases we simply copy input bytes to the
158 	 output.  */
159       size_t amt = *inbytesleft;
160 
161       if (amt > *outbytesleft)
162 	amt = *outbytesleft;
163       memcpy (*outbuf, *inbuf, amt);
164       *inbuf += amt;
165       *outbuf += amt;
166       *inbytesleft -= amt;
167       *outbytesleft -= amt;
168     }
169 
170   if (*inbytesleft)
171     {
172       errno = E2BIG;
173       return -1;
174     }
175 
176   /* The number of non-reversible conversions -- but they were all
177      reversible.  */
178   return 0;
179 }
180 
181 #else /* PHONY_ICONV */
182 
183 /* On systems that don't have EILSEQ, GNU iconv's iconv.h defines it
184    to ENOENT, while gnulib defines it to a different value.  Always
185    map ENOENT to gnulib's EILSEQ, leaving callers agnostic.  */
186 
187 static size_t
188 gdb_iconv (iconv_t utf_flag, ICONV_CONST char **inbuf, size_t *inbytesleft,
189 	   char **outbuf, size_t *outbytesleft)
190 {
191   size_t ret;
192 
193   ret = iconv (utf_flag, inbuf, inbytesleft, outbuf, outbytesleft);
194   if (errno == ENOENT)
195     errno = EILSEQ;
196   return ret;
197 }
198 
199 #undef iconv
200 #define iconv gdb_iconv
201 
202 #endif /* PHONY_ICONV */
203 
204 
205 /* The global lists of character sets and translations.  */
206 
207 
208 #ifndef GDB_DEFAULT_TARGET_CHARSET
209 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
210 #endif
211 
212 #ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
213 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
214 #endif
215 
216 static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;
217 static const char *host_charset_name = "auto";
218 static void
219 show_host_charset_name (struct ui_file *file, int from_tty,
220 			struct cmd_list_element *c,
221 			const char *value)
222 {
223   if (!strcmp (value, "auto"))
224     fprintf_filtered (file,
225 		      _("The host character set is \"auto; currently %s\".\n"),
226 		      auto_host_charset_name);
227   else
228     fprintf_filtered (file, _("The host character set is \"%s\".\n"), value);
229 }
230 
231 static const char *target_charset_name = "auto";
232 static void
233 show_target_charset_name (struct ui_file *file, int from_tty,
234 			  struct cmd_list_element *c, const char *value)
235 {
236   if (!strcmp (value, "auto"))
237     fprintf_filtered (file,
238 		      _("The target character set is \"auto; "
239 		        "currently %s\".\n"),
240 		      gdbarch_auto_charset (get_current_arch ()));
241   else
242     fprintf_filtered (file, _("The target character set is \"%s\".\n"),
243 		      value);
244 }
245 
246 static const char *target_wide_charset_name = "auto";
247 static void
248 show_target_wide_charset_name (struct ui_file *file,
249 			       int from_tty,
250 			       struct cmd_list_element *c,
251 			       const char *value)
252 {
253   if (!strcmp (value, "auto"))
254     fprintf_filtered (file,
255 		      _("The target wide character set is \"auto; "
256 		        "currently %s\".\n"),
257 		      gdbarch_auto_wide_charset (get_current_arch ()));
258   else
259     fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
260 		      value);
261 }
262 
263 static const char *default_charset_names[] =
264 {
265   DEFAULT_CHARSET_NAMES
266   0
267 };
268 
269 static const char **charset_enum;
270 
271 
272 /* If the target wide character set has big- or little-endian
273    variants, these are the corresponding names.  */
274 static const char *target_wide_charset_be_name;
275 static const char *target_wide_charset_le_name;
276 
277 /* The architecture for which the BE- and LE-names are valid.  */
278 static struct gdbarch *be_le_arch;
279 
280 /* A helper function which sets the target wide big- and little-endian
281    character set names, if possible.  */
282 
283 static void
284 set_be_le_names (struct gdbarch *gdbarch)
285 {
286   int i, len;
287   const char *target_wide;
288 
289   if (be_le_arch == gdbarch)
290     return;
291   be_le_arch = gdbarch;
292 
293   target_wide_charset_le_name = NULL;
294   target_wide_charset_be_name = NULL;
295 
296   target_wide = target_wide_charset_name;
297   if (!strcmp (target_wide, "auto"))
298     target_wide = gdbarch_auto_wide_charset (gdbarch);
299 
300   len = strlen (target_wide);
301   for (i = 0; charset_enum[i]; ++i)
302     {
303       if (strncmp (target_wide, charset_enum[i], len))
304 	continue;
305       if ((charset_enum[i][len] == 'B'
306 	   || charset_enum[i][len] == 'L')
307 	  && charset_enum[i][len + 1] == 'E'
308 	  && charset_enum[i][len + 2] == '\0')
309 	{
310 	  if (charset_enum[i][len] == 'B')
311 	    target_wide_charset_be_name = charset_enum[i];
312 	  else
313 	    target_wide_charset_le_name = charset_enum[i];
314 	}
315     }
316 }
317 
318 /* 'Set charset', 'set host-charset', 'set target-charset', 'set
319    target-wide-charset', 'set charset' sfunc's.  */
320 
321 static void
322 validate (struct gdbarch *gdbarch)
323 {
324   iconv_t desc;
325   const char *host_cset = host_charset ();
326   const char *target_cset = target_charset (gdbarch);
327   const char *target_wide_cset = target_wide_charset_name;
328 
329   if (!strcmp (target_wide_cset, "auto"))
330     target_wide_cset = gdbarch_auto_wide_charset (gdbarch);
331 
332   desc = iconv_open (target_wide_cset, host_cset);
333   if (desc == (iconv_t) -1)
334     error (_("Cannot convert between character sets `%s' and `%s'"),
335 	   target_wide_cset, host_cset);
336   iconv_close (desc);
337 
338   desc = iconv_open (target_cset, host_cset);
339   if (desc == (iconv_t) -1)
340     error (_("Cannot convert between character sets `%s' and `%s'"),
341 	   target_cset, host_cset);
342   iconv_close (desc);
343 
344   /* Clear the cache.  */
345   be_le_arch = NULL;
346 }
347 
348 /* This is the sfunc for the 'set charset' command.  */
349 static void
350 set_charset_sfunc (char *charset, int from_tty,
351 		   struct cmd_list_element *c)
352 {
353   /* CAREFUL: set the target charset here as well.  */
354   target_charset_name = host_charset_name;
355   validate (get_current_arch ());
356 }
357 
358 /* 'set host-charset' command sfunc.  We need a wrapper here because
359    the function needs to have a specific signature.  */
360 static void
361 set_host_charset_sfunc (char *charset, int from_tty,
362 			struct cmd_list_element *c)
363 {
364   validate (get_current_arch ());
365 }
366 
367 /* Wrapper for the 'set target-charset' command.  */
368 static void
369 set_target_charset_sfunc (char *charset, int from_tty,
370 			  struct cmd_list_element *c)
371 {
372   validate (get_current_arch ());
373 }
374 
375 /* Wrapper for the 'set target-wide-charset' command.  */
376 static void
377 set_target_wide_charset_sfunc (char *charset, int from_tty,
378 			       struct cmd_list_element *c)
379 {
380   validate (get_current_arch ());
381 }
382 
383 /* sfunc for the 'show charset' command.  */
384 static void
385 show_charset (struct ui_file *file, int from_tty,
386 	      struct cmd_list_element *c,
387 	      const char *name)
388 {
389   show_host_charset_name (file, from_tty, c, host_charset_name);
390   show_target_charset_name (file, from_tty, c, target_charset_name);
391   show_target_wide_charset_name (file, from_tty, c,
392 				 target_wide_charset_name);
393 }
394 
395 
396 /* Accessor functions.  */
397 
398 const char *
399 host_charset (void)
400 {
401   if (!strcmp (host_charset_name, "auto"))
402     return auto_host_charset_name;
403   return host_charset_name;
404 }
405 
406 const char *
407 target_charset (struct gdbarch *gdbarch)
408 {
409   if (!strcmp (target_charset_name, "auto"))
410     return gdbarch_auto_charset (gdbarch);
411   return target_charset_name;
412 }
413 
414 const char *
415 target_wide_charset (struct gdbarch *gdbarch)
416 {
417   enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
418 
419   set_be_le_names (gdbarch);
420   if (byte_order == BFD_ENDIAN_BIG)
421     {
422       if (target_wide_charset_be_name)
423 	return target_wide_charset_be_name;
424     }
425   else
426     {
427       if (target_wide_charset_le_name)
428 	return target_wide_charset_le_name;
429     }
430 
431   if (!strcmp (target_wide_charset_name, "auto"))
432     return gdbarch_auto_wide_charset (gdbarch);
433 
434   return target_wide_charset_name;
435 }
436 
437 
438 /* Host character set management.  For the time being, we assume that
439    the host character set is some superset of ASCII.  */
440 
441 char
442 host_letter_to_control_character (char c)
443 {
444   if (c == '?')
445     return 0177;
446   return c & 0237;
447 }
448 
449 /* Convert a host character, C, to its hex value.  C must already have
450    been validated using isxdigit.  */
451 
452 int
453 host_hex_value (char c)
454 {
455   if (isdigit (c))
456     return c - '0';
457   if (c >= 'a' && c <= 'f')
458     return 10 + c - 'a';
459   gdb_assert (c >= 'A' && c <= 'F');
460   return 10 + c - 'A';
461 }
462 
463 
464 /* Public character management functions.  */
465 
466 /* A cleanup function which is run to close an iconv descriptor.  */
467 
468 static void
469 cleanup_iconv (void *p)
470 {
471   iconv_t *descp = p;
472   iconv_close (*descp);
473 }
474 
475 void
476 convert_between_encodings (const char *from, const char *to,
477 			   const gdb_byte *bytes, unsigned int num_bytes,
478 			   int width, struct obstack *output,
479 			   enum transliterations translit)
480 {
481   iconv_t desc;
482   struct cleanup *cleanups;
483   size_t inleft;
484   ICONV_CONST char *inp;
485   unsigned int space_request;
486 
487   /* Often, the host and target charsets will be the same.  */
488   if (!strcmp (from, to))
489     {
490       obstack_grow (output, bytes, num_bytes);
491       return;
492     }
493 
494   desc = iconv_open (to, from);
495   if (desc == (iconv_t) -1)
496     perror_with_name (_("Converting character sets"));
497   cleanups = make_cleanup (cleanup_iconv, &desc);
498 
499   inleft = num_bytes;
500   inp = (ICONV_CONST char *) bytes;
501 
502   space_request = num_bytes;
503 
504   while (inleft > 0)
505     {
506       char *outp;
507       size_t outleft, r;
508       int old_size;
509 
510       old_size = obstack_object_size (output);
511       obstack_blank (output, space_request);
512 
513       outp = (char *) obstack_base (output) + old_size;
514       outleft = space_request;
515 
516       r = iconv (desc, &inp, &inleft, &outp, &outleft);
517 
518       /* Now make sure that the object on the obstack only includes
519 	 bytes we have converted.  */
520       obstack_blank_fast (output, -outleft);
521 
522       if (r == (size_t) -1)
523 	{
524 	  switch (errno)
525 	    {
526 	    case EILSEQ:
527 	      {
528 		int i;
529 
530 		/* Invalid input sequence.  */
531 		if (translit == translit_none)
532 		  error (_("Could not convert character "
533 			   "to `%s' character set"), to);
534 
535 		/* We emit escape sequence for the bytes, skip them,
536 		   and try again.  */
537 		for (i = 0; i < width; ++i)
538 		  {
539 		    char octal[5];
540 
541 		    xsnprintf (octal, sizeof (octal), "\\%.3o", *inp & 0xff);
542 		    obstack_grow_str (output, octal);
543 
544 		    ++inp;
545 		    --inleft;
546 		  }
547 	      }
548 	      break;
549 
550 	    case E2BIG:
551 	      /* We ran out of space in the output buffer.  Make it
552 		 bigger next time around.  */
553 	      space_request *= 2;
554 	      break;
555 
556 	    case EINVAL:
557 	      /* Incomplete input sequence.  FIXME: ought to report this
558 		 to the caller somehow.  */
559 	      inleft = 0;
560 	      break;
561 
562 	    default:
563 	      perror_with_name (_("Internal error while "
564 				  "converting character sets"));
565 	    }
566 	}
567     }
568 
569   do_cleanups (cleanups);
570 }
571 
572 
573 
574 /* An iterator that returns host wchar_t's from a target string.  */
575 struct wchar_iterator
576 {
577   /* The underlying iconv descriptor.  */
578   iconv_t desc;
579 
580   /* The input string.  This is updated as convert characters.  */
581   const gdb_byte *input;
582   /* The number of bytes remaining in the input.  */
583   size_t bytes;
584 
585   /* The width of an input character.  */
586   size_t width;
587 
588   /* The output buffer and its size.  */
589   gdb_wchar_t *out;
590   size_t out_size;
591 };
592 
593 /* Create a new iterator.  */
594 struct wchar_iterator *
595 make_wchar_iterator (const gdb_byte *input, size_t bytes,
596 		     const char *charset, size_t width)
597 {
598   struct wchar_iterator *result;
599   iconv_t desc;
600 
601   desc = iconv_open (INTERMEDIATE_ENCODING, charset);
602   if (desc == (iconv_t) -1)
603     perror_with_name (_("Converting character sets"));
604 
605   result = XNEW (struct wchar_iterator);
606   result->desc = desc;
607   result->input = input;
608   result->bytes = bytes;
609   result->width = width;
610 
611   result->out = XNEW (gdb_wchar_t);
612   result->out_size = 1;
613 
614   return result;
615 }
616 
617 static void
618 do_cleanup_iterator (void *p)
619 {
620   struct wchar_iterator *iter = p;
621 
622   iconv_close (iter->desc);
623   xfree (iter->out);
624   xfree (iter);
625 }
626 
627 struct cleanup *
628 make_cleanup_wchar_iterator (struct wchar_iterator *iter)
629 {
630   return make_cleanup (do_cleanup_iterator, iter);
631 }
632 
633 int
634 wchar_iterate (struct wchar_iterator *iter,
635 	       enum wchar_iterate_result *out_result,
636 	       gdb_wchar_t **out_chars,
637 	       const gdb_byte **ptr,
638 	       size_t *len)
639 {
640   size_t out_request;
641 
642   /* Try to convert some characters.  At first we try to convert just
643      a single character.  The reason for this is that iconv does not
644      necessarily update its outgoing arguments when it encounters an
645      invalid input sequence -- but we want to reliably report this to
646      our caller so it can emit an escape sequence.  */
647   out_request = 1;
648   while (iter->bytes > 0)
649     {
650       ICONV_CONST char *inptr = (ICONV_CONST char *) iter->input;
651       char *outptr = (char *) &iter->out[0];
652       const gdb_byte *orig_inptr = iter->input;
653       size_t orig_in = iter->bytes;
654       size_t out_avail = out_request * sizeof (gdb_wchar_t);
655       size_t num;
656       size_t r = iconv (iter->desc, &inptr, &iter->bytes, &outptr, &out_avail);
657 
658       iter->input = (gdb_byte *) inptr;
659 
660       if (r == (size_t) -1)
661 	{
662 	  switch (errno)
663 	    {
664 	    case EILSEQ:
665 	      /* Invalid input sequence.  We still might have
666 		 converted a character; if so, return it.  */
667 	      if (out_avail < out_request * sizeof (gdb_wchar_t))
668 		break;
669 
670 	      /* Otherwise skip the first invalid character, and let
671 		 the caller know about it.  */
672 	      *out_result = wchar_iterate_invalid;
673 	      *ptr = iter->input;
674 	      *len = iter->width;
675 	      iter->input += iter->width;
676 	      iter->bytes -= iter->width;
677 	      return 0;
678 
679 	    case E2BIG:
680 	      /* We ran out of space.  We still might have converted a
681 		 character; if so, return it.  Otherwise, grow the
682 		 buffer and try again.  */
683 	      if (out_avail < out_request * sizeof (gdb_wchar_t))
684 		break;
685 
686 	      ++out_request;
687 	      if (out_request > iter->out_size)
688 		{
689 		  iter->out_size = out_request;
690 		  iter->out = xrealloc (iter->out,
691 					out_request * sizeof (gdb_wchar_t));
692 		}
693 	      continue;
694 
695 	    case EINVAL:
696 	      /* Incomplete input sequence.  Let the caller know, and
697 		 arrange for future calls to see EOF.  */
698 	      *out_result = wchar_iterate_incomplete;
699 	      *ptr = iter->input;
700 	      *len = iter->bytes;
701 	      iter->bytes = 0;
702 	      return 0;
703 
704 	    default:
705 	      perror_with_name (_("Internal error while "
706 				  "converting character sets"));
707 	    }
708 	}
709 
710       /* We converted something.  */
711       num = out_request - out_avail / sizeof (gdb_wchar_t);
712       *out_result = wchar_iterate_ok;
713       *out_chars = iter->out;
714       *ptr = orig_inptr;
715       *len = orig_in - iter->bytes;
716       return num;
717     }
718 
719   /* Really done.  */
720   *out_result = wchar_iterate_eof;
721   return -1;
722 }
723 
724 
725 /* The charset.c module initialization function.  */
726 
727 extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */
728 
729 static VEC (char_ptr) *charsets;
730 
731 #ifdef PHONY_ICONV
732 
733 static void
734 find_charset_names (void)
735 {
736   VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET);
737   VEC_safe_push (char_ptr, charsets, NULL);
738 }
739 
740 #else /* PHONY_ICONV */
741 
742 /* Sometimes, libiconv redefines iconvlist as libiconvlist -- but
743    provides different symbols in the static and dynamic libraries.
744    So, configure may see libiconvlist but not iconvlist.  But, calling
745    iconvlist is the right thing to do and will work.  Hence we do a
746    check here but unconditionally call iconvlist below.  */
747 #if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST)
748 
749 /* A helper function that adds some character sets to the vector of
750    all character sets.  This is a callback function for iconvlist.  */
751 
752 static int
753 add_one (unsigned int count, const char *const *names, void *data)
754 {
755   unsigned int i;
756 
757   for (i = 0; i < count; ++i)
758     VEC_safe_push (char_ptr, charsets, xstrdup (names[i]));
759 
760   return 0;
761 }
762 
763 static void
764 find_charset_names (void)
765 {
766   iconvlist (add_one, NULL);
767   VEC_safe_push (char_ptr, charsets, NULL);
768 }
769 
770 #else
771 
772 /* Return non-zero if LINE (output from iconv) should be ignored.
773    Older iconv programs (e.g. 2.2.2) include the human readable
774    introduction even when stdout is not a tty.  Newer versions omit
775    the intro if stdout is not a tty.  */
776 
777 static int
778 ignore_line_p (const char *line)
779 {
780   /* This table is used to filter the output.  If this text appears
781      anywhere in the line, it is ignored (strstr is used).  */
782   static const char * const ignore_lines[] =
783     {
784       "The following",
785       "not necessarily",
786       "the FROM and TO",
787       "listed with several",
788       NULL
789     };
790   int i;
791 
792   for (i = 0; ignore_lines[i] != NULL; ++i)
793     {
794       if (strstr (line, ignore_lines[i]) != NULL)
795 	return 1;
796     }
797 
798   return 0;
799 }
800 
801 static void
802 find_charset_names (void)
803 {
804   struct pex_obj *child;
805   char *args[3];
806   int err, status;
807   int fail = 1;
808   int flags;
809   struct gdb_environ *iconv_env;
810   char *iconv_program;
811 
812   /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is
813      not a tty.  We need to recognize it and ignore it.  This text is
814      subject to translation, so force LANGUAGE=C.  */
815   iconv_env = make_environ ();
816   init_environ (iconv_env);
817   set_in_environ (iconv_env, "LANGUAGE", "C");
818   set_in_environ (iconv_env, "LC_ALL", "C");
819 
820   child = pex_init (PEX_USE_PIPES, "iconv", NULL);
821 
822 #ifdef ICONV_BIN
823   {
824     char *iconv_dir = relocate_gdb_directory (ICONV_BIN,
825 					      ICONV_BIN_RELOCATABLE);
826     iconv_program = concat (iconv_dir, SLASH_STRING, "iconv", NULL);
827     xfree (iconv_dir);
828   }
829 #else
830   iconv_program = xstrdup ("iconv");
831 #endif
832   args[0] = iconv_program;
833   args[1] = "-l";
834   args[2] = NULL;
835   flags = PEX_STDERR_TO_STDOUT;
836 #ifndef ICONV_BIN
837   flags |= PEX_SEARCH;
838 #endif
839   /* Note that we simply ignore errors here.  */
840   if (!pex_run_in_environment (child, flags,
841 			       args[0], args, environ_vector (iconv_env),
842 			       NULL, NULL, &err))
843     {
844       FILE *in = pex_read_output (child, 0);
845 
846       /* POSIX says that iconv -l uses an unspecified format.  We
847 	 parse the glibc and libiconv formats; feel free to add others
848 	 as needed.  */
849 
850       while (in != NULL && !feof (in))
851 	{
852 	  /* The size of buf is chosen arbitrarily.  */
853 	  char buf[1024];
854 	  char *start, *r;
855 	  int len;
856 
857 	  r = fgets (buf, sizeof (buf), in);
858 	  if (!r)
859 	    break;
860 	  len = strlen (r);
861 	  if (len <= 3)
862 	    continue;
863 	  if (ignore_line_p (r))
864 	    continue;
865 
866 	  /* Strip off the newline.  */
867 	  --len;
868 	  /* Strip off one or two '/'s.  glibc will print lines like
869 	     "8859_7//", but also "10646-1:1993/UCS4/".  */
870 	  if (buf[len - 1] == '/')
871 	    --len;
872 	  if (buf[len - 1] == '/')
873 	    --len;
874 	  buf[len] = '\0';
875 
876 	  /* libiconv will print multiple entries per line, separated
877 	     by spaces.  Older iconvs will print multiple entries per
878 	     line, indented by two spaces, and separated by ", "
879 	     (i.e. the human readable form).  */
880 	  start = buf;
881 	  while (1)
882 	    {
883 	      int keep_going;
884 	      char *p;
885 
886 	      /* Skip leading blanks.  */
887 	      for (p = start; *p && *p == ' '; ++p)
888 		;
889 	      start = p;
890 	      /* Find the next space, comma, or end-of-line.  */
891 	      for ( ; *p && *p != ' ' && *p != ','; ++p)
892 		;
893 	      /* Ignore an empty result.  */
894 	      if (p == start)
895 		break;
896 	      keep_going = *p;
897 	      *p = '\0';
898 	      VEC_safe_push (char_ptr, charsets, xstrdup (start));
899 	      if (!keep_going)
900 		break;
901 	      /* Skip any extra spaces.  */
902 	      for (start = p + 1; *start && *start == ' '; ++start)
903 		;
904 	    }
905 	}
906 
907       if (pex_get_status (child, 1, &status)
908 	  && WIFEXITED (status) && !WEXITSTATUS (status))
909 	fail = 0;
910 
911     }
912 
913   xfree (iconv_program);
914   pex_free (child);
915   free_environ (iconv_env);
916 
917   if (fail)
918     {
919       /* Some error occurred, so drop the vector.  */
920       free_char_ptr_vec (charsets);
921       charsets = NULL;
922     }
923   else
924     VEC_safe_push (char_ptr, charsets, NULL);
925 }
926 
927 #endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
928 #endif /* PHONY_ICONV */
929 
930 /* The "auto" target charset used by default_auto_charset.  */
931 static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
932 
933 const char *
934 default_auto_charset (void)
935 {
936   return auto_target_charset_name;
937 }
938 
939 const char *
940 default_auto_wide_charset (void)
941 {
942   return GDB_DEFAULT_TARGET_WIDE_CHARSET;
943 }
944 
945 
946 #ifdef USE_INTERMEDIATE_ENCODING_FUNCTION
947 /* Macro used for UTF or UCS endianness suffix.  */
948 #if WORDS_BIGENDIAN
949 #define ENDIAN_SUFFIX "BE"
950 #else
951 #define ENDIAN_SUFFIX "LE"
952 #endif
953 
954 /* The code below serves to generate a compile time error if
955    gdb_wchar_t type is not of size 2 nor 4, despite the fact that
956    macro __STDC_ISO_10646__ is defined.
957    This is better than a gdb_assert call, because GDB cannot handle
958    strings correctly if this size is different.  */
959 
960 extern char your_gdb_wchar_t_is_bogus[(sizeof (gdb_wchar_t) == 2
961 				       || sizeof (gdb_wchar_t) == 4)
962 				      ? 1 : -1];
963 
964 /* intermediate_encoding returns the charset used internally by
965    GDB to convert between target and host encodings. As the test above
966    compiled, sizeof (gdb_wchar_t) is either 2 or 4 bytes.
967    UTF-16/32 is tested first, UCS-2/4 is tested as a second option,
968    otherwise an error is generated.  */
969 
970 const char *
971 intermediate_encoding (void)
972 {
973   iconv_t desc;
974   static const char *stored_result = NULL;
975   char *result;
976 
977   if (stored_result)
978     return stored_result;
979   result = xstrprintf ("UTF-%d%s", (int) (sizeof (gdb_wchar_t) * 8),
980 		       ENDIAN_SUFFIX);
981   /* Check that the name is supported by iconv_open.  */
982   desc = iconv_open (result, host_charset ());
983   if (desc != (iconv_t) -1)
984     {
985       iconv_close (desc);
986       stored_result = result;
987       return result;
988     }
989   /* Not valid, free the allocated memory.  */
990   xfree (result);
991   /* Second try, with UCS-2 type.  */
992   result = xstrprintf ("UCS-%d%s", (int) sizeof (gdb_wchar_t),
993 		       ENDIAN_SUFFIX);
994   /* Check that the name is supported by iconv_open.  */
995   desc = iconv_open (result, host_charset ());
996   if (desc != (iconv_t) -1)
997     {
998       iconv_close (desc);
999       stored_result = result;
1000       return result;
1001     }
1002   /* Not valid, free the allocated memory.  */
1003   xfree (result);
1004   /* No valid charset found, generate error here.  */
1005   error (_("Unable to find a vaild charset for string conversions"));
1006 }
1007 
1008 #endif /* USE_INTERMEDIATE_ENCODING_FUNCTION */
1009 
1010 void
1011 _initialize_charset (void)
1012 {
1013   /* The first element is always "auto".  */
1014   VEC_safe_push (char_ptr, charsets, xstrdup ("auto"));
1015   find_charset_names ();
1016 
1017   if (VEC_length (char_ptr, charsets) > 1)
1018     charset_enum = (const char **) VEC_address (char_ptr, charsets);
1019   else
1020     charset_enum = default_charset_names;
1021 
1022 #ifndef PHONY_ICONV
1023 #ifdef HAVE_LANGINFO_CODESET
1024   /* The result of nl_langinfo may be overwritten later.  This may
1025      leak a little memory, if the user later changes the host charset,
1026      but that doesn't matter much.  */
1027   auto_host_charset_name = xstrdup (nl_langinfo (CODESET));
1028   /* Solaris will return `646' here -- but the Solaris iconv then does
1029      not accept this.  Darwin (and maybe FreeBSD) may return "" here,
1030      which GNU libiconv doesn't like (infinite loop).  */
1031   if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name)
1032     auto_host_charset_name = "ASCII";
1033   auto_target_charset_name = auto_host_charset_name;
1034 #elif defined (USE_WIN32API)
1035   {
1036     /* "CP" + x<=5 digits + paranoia.  */
1037     static char w32_host_default_charset[16];
1038 
1039     snprintf (w32_host_default_charset, sizeof w32_host_default_charset,
1040 	      "CP%d", GetACP());
1041     auto_host_charset_name = w32_host_default_charset;
1042     auto_target_charset_name = auto_host_charset_name;
1043   }
1044 #endif
1045 #endif
1046 
1047   add_setshow_enum_cmd ("charset", class_support,
1048 			charset_enum, &host_charset_name, _("\
1049 Set the host and target character sets."), _("\
1050 Show the host and target character sets."), _("\
1051 The `host character set' is the one used by the system GDB is running on.\n\
1052 The `target character set' is the one used by the program being debugged.\n\
1053 You may only use supersets of ASCII for your host character set; GDB does\n\
1054 not support any others.\n\
1055 To see a list of the character sets GDB supports, type `set charset <TAB>'."),
1056 			/* Note that the sfunc below needs to set
1057 			   target_charset_name, because the 'set
1058 			   charset' command sets two variables.  */
1059 			set_charset_sfunc,
1060 			show_charset,
1061 			&setlist, &showlist);
1062 
1063   add_setshow_enum_cmd ("host-charset", class_support,
1064 			charset_enum, &host_charset_name, _("\
1065 Set the host character set."), _("\
1066 Show the host character set."), _("\
1067 The `host character set' is the one used by the system GDB is running on.\n\
1068 You may only use supersets of ASCII for your host character set; GDB does\n\
1069 not support any others.\n\
1070 To see a list of the character sets GDB supports, type `set host-charset <TAB>'."),
1071 			set_host_charset_sfunc,
1072 			show_host_charset_name,
1073 			&setlist, &showlist);
1074 
1075   add_setshow_enum_cmd ("target-charset", class_support,
1076 			charset_enum, &target_charset_name, _("\
1077 Set the target character set."), _("\
1078 Show the target character set."), _("\
1079 The `target character set' is the one used by the program being debugged.\n\
1080 GDB translates characters and strings between the host and target\n\
1081 character sets as needed.\n\
1082 To see a list of the character sets GDB supports, type `set target-charset'<TAB>"),
1083 			set_target_charset_sfunc,
1084 			show_target_charset_name,
1085 			&setlist, &showlist);
1086 
1087   add_setshow_enum_cmd ("target-wide-charset", class_support,
1088 			charset_enum, &target_wide_charset_name,
1089 			_("\
1090 Set the target wide character set."), _("\
1091 Show the target wide character set."), _("\
1092 The `target wide character set' is the one used by the program being debugged.\
1093 \nIn particular it is the encoding used by `wchar_t'.\n\
1094 GDB translates characters and strings between the host and target\n\
1095 character sets as needed.\n\
1096 To see a list of the character sets GDB supports, type\n\
1097 `set target-wide-charset'<TAB>"),
1098 			set_target_wide_charset_sfunc,
1099 			show_target_wide_charset_name,
1100 			&setlist, &showlist);
1101 }
1102