15796c8dcSSimon Schubert /* Character set conversion support for GDB. 2*ef5ccd6cSJohn Marino Copyright (C) 2001-2013 Free Software Foundation, Inc. 35796c8dcSSimon Schubert 45796c8dcSSimon Schubert This file is part of GDB. 55796c8dcSSimon Schubert 65796c8dcSSimon Schubert This program is free software; you can redistribute it and/or modify 75796c8dcSSimon Schubert it under the terms of the GNU General Public License as published by 85796c8dcSSimon Schubert the Free Software Foundation; either version 3 of the License, or 95796c8dcSSimon Schubert (at your option) any later version. 105796c8dcSSimon Schubert 115796c8dcSSimon Schubert This program is distributed in the hope that it will be useful, 125796c8dcSSimon Schubert but WITHOUT ANY WARRANTY; without even the implied warranty of 135796c8dcSSimon Schubert MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 145796c8dcSSimon Schubert GNU General Public License for more details. 155796c8dcSSimon Schubert 165796c8dcSSimon Schubert You should have received a copy of the GNU General Public License 175796c8dcSSimon Schubert along with this program. If not, see <http://www.gnu.org/licenses/>. */ 185796c8dcSSimon Schubert 195796c8dcSSimon Schubert #ifndef CHARSET_H 205796c8dcSSimon Schubert #define CHARSET_H 215796c8dcSSimon Schubert 225796c8dcSSimon Schubert /* If the target program uses a different character set than the host, 235796c8dcSSimon Schubert GDB has some support for translating between the two; GDB converts 245796c8dcSSimon Schubert characters and strings to the host character set before displaying 255796c8dcSSimon Schubert them, and converts characters and strings appearing in expressions 265796c8dcSSimon Schubert entered by the user to the target character set. 275796c8dcSSimon Schubert 285796c8dcSSimon Schubert GDB's code pretty much assumes that the host character set is some 295796c8dcSSimon Schubert superset of ASCII; there are plenty if ('0' + n) expressions and 305796c8dcSSimon Schubert the like. */ 315796c8dcSSimon Schubert 325796c8dcSSimon Schubert /* Return the name of the current host/target character set. The 335796c8dcSSimon Schubert result is owned by the charset module; the caller should not free 345796c8dcSSimon Schubert it. */ 355796c8dcSSimon Schubert const char *host_charset (void); 36cf7f2e2dSJohn Marino const char *target_charset (struct gdbarch *gdbarch); 37cf7f2e2dSJohn Marino const char *target_wide_charset (struct gdbarch *gdbarch); 385796c8dcSSimon Schubert 395796c8dcSSimon Schubert /* These values are used to specify the type of transliteration done 405796c8dcSSimon Schubert by convert_between_encodings. */ 415796c8dcSSimon Schubert enum transliterations 425796c8dcSSimon Schubert { 435796c8dcSSimon Schubert /* Error on failure to convert. */ 445796c8dcSSimon Schubert translit_none, 455796c8dcSSimon Schubert /* Transliterate to host char. */ 465796c8dcSSimon Schubert translit_char 475796c8dcSSimon Schubert }; 485796c8dcSSimon Schubert 495796c8dcSSimon Schubert /* Convert between two encodings. 505796c8dcSSimon Schubert 515796c8dcSSimon Schubert FROM is the name of the source encoding. 525796c8dcSSimon Schubert TO is the name of the target encoding. 535796c8dcSSimon Schubert BYTES holds the bytes to convert; this is assumed to be characters 545796c8dcSSimon Schubert in the target encoding. 555796c8dcSSimon Schubert NUM_BYTES is the number of bytes. 565796c8dcSSimon Schubert WIDTH is the width of a character from the FROM charset, in bytes. 575796c8dcSSimon Schubert For a variable width encoding, WIDTH should be the size of a "base 585796c8dcSSimon Schubert character". 595796c8dcSSimon Schubert OUTPUT is an obstack where the converted data is written. The 605796c8dcSSimon Schubert caller is responsible for initializing the obstack, and for 615796c8dcSSimon Schubert destroying the obstack should an error occur. 625796c8dcSSimon Schubert TRANSLIT specifies how invalid conversions should be handled. */ 63c50c785cSJohn Marino 645796c8dcSSimon Schubert void convert_between_encodings (const char *from, const char *to, 65c50c785cSJohn Marino const gdb_byte *bytes, 66c50c785cSJohn Marino unsigned int num_bytes, 675796c8dcSSimon Schubert int width, struct obstack *output, 685796c8dcSSimon Schubert enum transliterations translit); 695796c8dcSSimon Schubert 705796c8dcSSimon Schubert 715796c8dcSSimon Schubert /* These values are used by wchar_iterate to report errors. */ 725796c8dcSSimon Schubert enum wchar_iterate_result 735796c8dcSSimon Schubert { 745796c8dcSSimon Schubert /* Ordinary return. */ 755796c8dcSSimon Schubert wchar_iterate_ok, 765796c8dcSSimon Schubert /* Invalid input sequence. */ 775796c8dcSSimon Schubert wchar_iterate_invalid, 785796c8dcSSimon Schubert /* Incomplete input sequence at the end of the input. */ 795796c8dcSSimon Schubert wchar_iterate_incomplete, 805796c8dcSSimon Schubert /* EOF. */ 815796c8dcSSimon Schubert wchar_iterate_eof 825796c8dcSSimon Schubert }; 835796c8dcSSimon Schubert 845796c8dcSSimon Schubert /* Declaration of the opaque wchar iterator type. */ 855796c8dcSSimon Schubert struct wchar_iterator; 865796c8dcSSimon Schubert 875796c8dcSSimon Schubert /* Create a new character iterator which returns wchar_t's. INPUT is 885796c8dcSSimon Schubert the input buffer. BYTES is the number of bytes in the input 895796c8dcSSimon Schubert buffer. CHARSET is the name of the character set in which INPUT is 905796c8dcSSimon Schubert encoded. WIDTH is the number of bytes in a base character of 915796c8dcSSimon Schubert CHARSET. 925796c8dcSSimon Schubert 935796c8dcSSimon Schubert This function either returns a new character set iterator, or calls 945796c8dcSSimon Schubert error. The result can be freed using a cleanup; see 955796c8dcSSimon Schubert make_cleanup_wchar_iterator. */ 96c50c785cSJohn Marino struct wchar_iterator *make_wchar_iterator (const gdb_byte *input, 97c50c785cSJohn Marino size_t bytes, 985796c8dcSSimon Schubert const char *charset, 995796c8dcSSimon Schubert size_t width); 1005796c8dcSSimon Schubert 1015796c8dcSSimon Schubert /* Return a new cleanup suitable for destroying the wchar iterator 1025796c8dcSSimon Schubert ITER. */ 1035796c8dcSSimon Schubert struct cleanup *make_cleanup_wchar_iterator (struct wchar_iterator *iter); 1045796c8dcSSimon Schubert 1055796c8dcSSimon Schubert /* Perform a single iteration of a wchar_t iterator. 1065796c8dcSSimon Schubert 1075796c8dcSSimon Schubert Returns the number of characters converted. A negative result 1085796c8dcSSimon Schubert means that EOF has been reached. A positive result indicates the 1095796c8dcSSimon Schubert number of valid wchar_ts in the result; *OUT_CHARS is updated to 1105796c8dcSSimon Schubert point to the first valid character. 1115796c8dcSSimon Schubert 1125796c8dcSSimon Schubert In all cases aside from EOF, *PTR is set to point to the first 1135796c8dcSSimon Schubert converted target byte. *LEN is set to the number of bytes 1145796c8dcSSimon Schubert converted. 1155796c8dcSSimon Schubert 1165796c8dcSSimon Schubert A zero result means one of several unusual results. *OUT_RESULT is 1175796c8dcSSimon Schubert set to indicate the type of un-ordinary return. 1185796c8dcSSimon Schubert 1195796c8dcSSimon Schubert wchar_iterate_invalid means that an invalid input character was 1205796c8dcSSimon Schubert seen. The iterator is advanced by WIDTH (the argument to 1215796c8dcSSimon Schubert make_wchar_iterator) bytes. 1225796c8dcSSimon Schubert 1235796c8dcSSimon Schubert wchar_iterate_incomplete means that an incomplete character was 1245796c8dcSSimon Schubert seen at the end of the input sequence. 1255796c8dcSSimon Schubert 1265796c8dcSSimon Schubert wchar_iterate_eof means that all bytes were successfully 1275796c8dcSSimon Schubert converted. The other output arguments are not set. */ 1285796c8dcSSimon Schubert int wchar_iterate (struct wchar_iterator *iter, 1295796c8dcSSimon Schubert enum wchar_iterate_result *out_result, 1305796c8dcSSimon Schubert gdb_wchar_t **out_chars, 1315796c8dcSSimon Schubert const gdb_byte **ptr, size_t *len); 1325796c8dcSSimon Schubert 1335796c8dcSSimon Schubert 1345796c8dcSSimon Schubert 1355796c8dcSSimon Schubert /* GDB needs to know a few details of its execution character set. 1365796c8dcSSimon Schubert This knowledge is isolated here and in charset.c. */ 1375796c8dcSSimon Schubert 1385796c8dcSSimon Schubert /* The escape character. */ 1395796c8dcSSimon Schubert #define HOST_ESCAPE_CHAR 27 1405796c8dcSSimon Schubert 1415796c8dcSSimon Schubert /* Convert a letter, like 'c', to its corresponding control 1425796c8dcSSimon Schubert character. */ 1435796c8dcSSimon Schubert char host_letter_to_control_character (char c); 1445796c8dcSSimon Schubert 1455796c8dcSSimon Schubert /* Convert a hex digit character to its numeric value. E.g., 'f' is 1465796c8dcSSimon Schubert converted to 15. This function assumes that C is a valid hex 1475796c8dcSSimon Schubert digit. Both upper- and lower-case letters are recognized. */ 1485796c8dcSSimon Schubert int host_hex_value (char c); 1495796c8dcSSimon Schubert 1505796c8dcSSimon Schubert #endif /* CHARSET_H */ 151