xref: /dflybsd-src/contrib/gdb-7/gdb/charset.h (revision de8e141f24382815c10a4012d209bbbf7abf1112)
15796c8dcSSimon Schubert /* Character set conversion support for GDB.
2*ef5ccd6cSJohn Marino    Copyright (C) 2001-2013 Free Software Foundation, Inc.
35796c8dcSSimon Schubert 
45796c8dcSSimon Schubert    This file is part of GDB.
55796c8dcSSimon Schubert 
65796c8dcSSimon Schubert    This program is free software; you can redistribute it and/or modify
75796c8dcSSimon Schubert    it under the terms of the GNU General Public License as published by
85796c8dcSSimon Schubert    the Free Software Foundation; either version 3 of the License, or
95796c8dcSSimon Schubert    (at your option) any later version.
105796c8dcSSimon Schubert 
115796c8dcSSimon Schubert    This program is distributed in the hope that it will be useful,
125796c8dcSSimon Schubert    but WITHOUT ANY WARRANTY; without even the implied warranty of
135796c8dcSSimon Schubert    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
145796c8dcSSimon Schubert    GNU General Public License for more details.
155796c8dcSSimon Schubert 
165796c8dcSSimon Schubert    You should have received a copy of the GNU General Public License
175796c8dcSSimon Schubert    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
185796c8dcSSimon Schubert 
195796c8dcSSimon Schubert #ifndef CHARSET_H
205796c8dcSSimon Schubert #define CHARSET_H
215796c8dcSSimon Schubert 
225796c8dcSSimon Schubert /* If the target program uses a different character set than the host,
235796c8dcSSimon Schubert    GDB has some support for translating between the two; GDB converts
245796c8dcSSimon Schubert    characters and strings to the host character set before displaying
255796c8dcSSimon Schubert    them, and converts characters and strings appearing in expressions
265796c8dcSSimon Schubert    entered by the user to the target character set.
275796c8dcSSimon Schubert 
285796c8dcSSimon Schubert    GDB's code pretty much assumes that the host character set is some
295796c8dcSSimon Schubert    superset of ASCII; there are plenty if ('0' + n) expressions and
305796c8dcSSimon Schubert    the like.  */
315796c8dcSSimon Schubert 
325796c8dcSSimon Schubert /* Return the name of the current host/target character set.  The
335796c8dcSSimon Schubert    result is owned by the charset module; the caller should not free
345796c8dcSSimon Schubert    it.  */
355796c8dcSSimon Schubert const char *host_charset (void);
36cf7f2e2dSJohn Marino const char *target_charset (struct gdbarch *gdbarch);
37cf7f2e2dSJohn Marino const char *target_wide_charset (struct gdbarch *gdbarch);
385796c8dcSSimon Schubert 
395796c8dcSSimon Schubert /* These values are used to specify the type of transliteration done
405796c8dcSSimon Schubert    by convert_between_encodings.  */
415796c8dcSSimon Schubert enum transliterations
425796c8dcSSimon Schubert   {
435796c8dcSSimon Schubert     /* Error on failure to convert.  */
445796c8dcSSimon Schubert     translit_none,
455796c8dcSSimon Schubert     /* Transliterate to host char.  */
465796c8dcSSimon Schubert     translit_char
475796c8dcSSimon Schubert   };
485796c8dcSSimon Schubert 
495796c8dcSSimon Schubert /* Convert between two encodings.
505796c8dcSSimon Schubert 
515796c8dcSSimon Schubert    FROM is the name of the source encoding.
525796c8dcSSimon Schubert    TO is the name of the target encoding.
535796c8dcSSimon Schubert    BYTES holds the bytes to convert; this is assumed to be characters
545796c8dcSSimon Schubert    in the target encoding.
555796c8dcSSimon Schubert    NUM_BYTES is the number of bytes.
565796c8dcSSimon Schubert    WIDTH is the width of a character from the FROM charset, in bytes.
575796c8dcSSimon Schubert    For a variable width encoding, WIDTH should be the size of a "base
585796c8dcSSimon Schubert    character".
595796c8dcSSimon Schubert    OUTPUT is an obstack where the converted data is written.  The
605796c8dcSSimon Schubert    caller is responsible for initializing the obstack, and for
615796c8dcSSimon Schubert    destroying the obstack should an error occur.
625796c8dcSSimon Schubert    TRANSLIT specifies how invalid conversions should be handled.  */
63c50c785cSJohn Marino 
645796c8dcSSimon Schubert void convert_between_encodings (const char *from, const char *to,
65c50c785cSJohn Marino 				const gdb_byte *bytes,
66c50c785cSJohn Marino 				unsigned int num_bytes,
675796c8dcSSimon Schubert 				int width, struct obstack *output,
685796c8dcSSimon Schubert 				enum transliterations translit);
695796c8dcSSimon Schubert 
705796c8dcSSimon Schubert 
715796c8dcSSimon Schubert /* These values are used by wchar_iterate to report errors.  */
725796c8dcSSimon Schubert enum wchar_iterate_result
735796c8dcSSimon Schubert   {
745796c8dcSSimon Schubert     /* Ordinary return.  */
755796c8dcSSimon Schubert     wchar_iterate_ok,
765796c8dcSSimon Schubert     /* Invalid input sequence.  */
775796c8dcSSimon Schubert     wchar_iterate_invalid,
785796c8dcSSimon Schubert     /* Incomplete input sequence at the end of the input.  */
795796c8dcSSimon Schubert     wchar_iterate_incomplete,
805796c8dcSSimon Schubert     /* EOF.  */
815796c8dcSSimon Schubert     wchar_iterate_eof
825796c8dcSSimon Schubert   };
835796c8dcSSimon Schubert 
845796c8dcSSimon Schubert /* Declaration of the opaque wchar iterator type.  */
855796c8dcSSimon Schubert struct wchar_iterator;
865796c8dcSSimon Schubert 
875796c8dcSSimon Schubert /* Create a new character iterator which returns wchar_t's.  INPUT is
885796c8dcSSimon Schubert    the input buffer.  BYTES is the number of bytes in the input
895796c8dcSSimon Schubert    buffer.  CHARSET is the name of the character set in which INPUT is
905796c8dcSSimon Schubert    encoded.  WIDTH is the number of bytes in a base character of
915796c8dcSSimon Schubert    CHARSET.
925796c8dcSSimon Schubert 
935796c8dcSSimon Schubert    This function either returns a new character set iterator, or calls
945796c8dcSSimon Schubert    error.  The result can be freed using a cleanup; see
955796c8dcSSimon Schubert    make_cleanup_wchar_iterator.  */
96c50c785cSJohn Marino struct wchar_iterator *make_wchar_iterator (const gdb_byte *input,
97c50c785cSJohn Marino 					    size_t bytes,
985796c8dcSSimon Schubert 					    const char *charset,
995796c8dcSSimon Schubert 					    size_t width);
1005796c8dcSSimon Schubert 
1015796c8dcSSimon Schubert /* Return a new cleanup suitable for destroying the wchar iterator
1025796c8dcSSimon Schubert    ITER.  */
1035796c8dcSSimon Schubert struct cleanup *make_cleanup_wchar_iterator (struct wchar_iterator *iter);
1045796c8dcSSimon Schubert 
1055796c8dcSSimon Schubert /* Perform a single iteration of a wchar_t iterator.
1065796c8dcSSimon Schubert 
1075796c8dcSSimon Schubert    Returns the number of characters converted.  A negative result
1085796c8dcSSimon Schubert    means that EOF has been reached.  A positive result indicates the
1095796c8dcSSimon Schubert    number of valid wchar_ts in the result; *OUT_CHARS is updated to
1105796c8dcSSimon Schubert    point to the first valid character.
1115796c8dcSSimon Schubert 
1125796c8dcSSimon Schubert    In all cases aside from EOF, *PTR is set to point to the first
1135796c8dcSSimon Schubert    converted target byte.  *LEN is set to the number of bytes
1145796c8dcSSimon Schubert    converted.
1155796c8dcSSimon Schubert 
1165796c8dcSSimon Schubert    A zero result means one of several unusual results.  *OUT_RESULT is
1175796c8dcSSimon Schubert    set to indicate the type of un-ordinary return.
1185796c8dcSSimon Schubert 
1195796c8dcSSimon Schubert    wchar_iterate_invalid means that an invalid input character was
1205796c8dcSSimon Schubert    seen.  The iterator is advanced by WIDTH (the argument to
1215796c8dcSSimon Schubert    make_wchar_iterator) bytes.
1225796c8dcSSimon Schubert 
1235796c8dcSSimon Schubert    wchar_iterate_incomplete means that an incomplete character was
1245796c8dcSSimon Schubert    seen at the end of the input sequence.
1255796c8dcSSimon Schubert 
1265796c8dcSSimon Schubert    wchar_iterate_eof means that all bytes were successfully
1275796c8dcSSimon Schubert    converted.  The other output arguments are not set.  */
1285796c8dcSSimon Schubert int wchar_iterate (struct wchar_iterator *iter,
1295796c8dcSSimon Schubert 		   enum wchar_iterate_result *out_result,
1305796c8dcSSimon Schubert 		   gdb_wchar_t **out_chars,
1315796c8dcSSimon Schubert 		   const gdb_byte **ptr, size_t *len);
1325796c8dcSSimon Schubert 
1335796c8dcSSimon Schubert 
1345796c8dcSSimon Schubert 
1355796c8dcSSimon Schubert /* GDB needs to know a few details of its execution character set.
1365796c8dcSSimon Schubert    This knowledge is isolated here and in charset.c.  */
1375796c8dcSSimon Schubert 
1385796c8dcSSimon Schubert /* The escape character.  */
1395796c8dcSSimon Schubert #define HOST_ESCAPE_CHAR 27
1405796c8dcSSimon Schubert 
1415796c8dcSSimon Schubert /* Convert a letter, like 'c', to its corresponding control
1425796c8dcSSimon Schubert    character.  */
1435796c8dcSSimon Schubert char host_letter_to_control_character (char c);
1445796c8dcSSimon Schubert 
1455796c8dcSSimon Schubert /* Convert a hex digit character to its numeric value.  E.g., 'f' is
1465796c8dcSSimon Schubert    converted to 15.  This function assumes that C is a valid hex
1475796c8dcSSimon Schubert    digit.  Both upper- and lower-case letters are recognized.  */
1485796c8dcSSimon Schubert int host_hex_value (char c);
1495796c8dcSSimon Schubert 
1505796c8dcSSimon Schubert #endif /* CHARSET_H */
151