1 /* Character set conversion support for GDB. 2 Copyright (C) 2001-2019 Free Software Foundation, Inc. 3 4 This file is part of GDB. 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 18 19 #ifndef CHARSET_H 20 #define CHARSET_H 21 22 #include "common/def-vector.h" 23 24 /* If the target program uses a different character set than the host, 25 GDB has some support for translating between the two; GDB converts 26 characters and strings to the host character set before displaying 27 them, and converts characters and strings appearing in expressions 28 entered by the user to the target character set. 29 30 GDB's code pretty much assumes that the host character set is some 31 superset of ASCII; there are plenty if ('0' + n) expressions and 32 the like. */ 33 34 /* Return the name of the current host/target character set. The 35 result is owned by the charset module; the caller should not free 36 it. */ 37 const char *host_charset (void); 38 const char *target_charset (struct gdbarch *gdbarch); 39 const char *target_wide_charset (struct gdbarch *gdbarch); 40 41 /* These values are used to specify the type of transliteration done 42 by convert_between_encodings. */ 43 enum transliterations 44 { 45 /* Error on failure to convert. */ 46 translit_none, 47 /* Transliterate to host char. */ 48 translit_char 49 }; 50 51 /* Convert between two encodings. 52 53 FROM is the name of the source encoding. 54 TO is the name of the target encoding. 55 BYTES holds the bytes to convert; this is assumed to be characters 56 in the target encoding. 57 NUM_BYTES is the number of bytes. 58 WIDTH is the width of a character from the FROM charset, in bytes. 59 For a variable width encoding, WIDTH should be the size of a "base 60 character". 61 OUTPUT is an obstack where the converted data is written. The 62 caller is responsible for initializing the obstack, and for 63 destroying the obstack should an error occur. 64 TRANSLIT specifies how invalid conversions should be handled. */ 65 66 void convert_between_encodings (const char *from, const char *to, 67 const gdb_byte *bytes, 68 unsigned int num_bytes, 69 int width, struct obstack *output, 70 enum transliterations translit); 71 72 73 /* These values are used by wchar_iterate to report errors. */ 74 enum wchar_iterate_result 75 { 76 /* Ordinary return. */ 77 wchar_iterate_ok, 78 /* Invalid input sequence. */ 79 wchar_iterate_invalid, 80 /* Incomplete input sequence at the end of the input. */ 81 wchar_iterate_incomplete, 82 /* EOF. */ 83 wchar_iterate_eof 84 }; 85 86 /* An iterator that returns host wchar_t's from a target string. */ 87 class wchar_iterator 88 { 89 public: 90 91 /* Create a new character iterator which returns wchar_t's. INPUT is 92 the input buffer. BYTES is the number of bytes in the input 93 buffer. CHARSET is the name of the character set in which INPUT is 94 encoded. WIDTH is the number of bytes in a base character of 95 CHARSET. 96 97 This constructor can throw on error. */ 98 wchar_iterator (const gdb_byte *input, size_t bytes, const char *charset, 99 size_t width); 100 101 ~wchar_iterator (); 102 103 /* Perform a single iteration of a wchar_t iterator. 104 105 Returns the number of characters converted. A negative result 106 means that EOF has been reached. A positive result indicates the 107 number of valid wchar_ts in the result; *OUT_CHARS is updated to 108 point to the first valid character. 109 110 In all cases aside from EOF, *PTR is set to point to the first 111 converted target byte. *LEN is set to the number of bytes 112 converted. 113 114 A zero result means one of several unusual results. *OUT_RESULT is 115 set to indicate the type of un-ordinary return. 116 117 wchar_iterate_invalid means that an invalid input character was 118 seen. The iterator is advanced by WIDTH (the argument to 119 the wchar_iterator constructor) bytes. 120 121 wchar_iterate_incomplete means that an incomplete character was 122 seen at the end of the input sequence. 123 124 wchar_iterate_eof means that all bytes were successfully 125 converted. The other output arguments are not set. */ 126 int iterate (enum wchar_iterate_result *out_result, gdb_wchar_t **out_chars, 127 const gdb_byte **ptr, size_t *len); 128 129 private: 130 131 /* The underlying iconv descriptor. */ 132 #ifdef PHONY_ICONV 133 int m_desc; 134 #else 135 iconv_t m_desc; 136 #endif 137 138 /* The input string. This is updated as we convert characters. */ 139 const gdb_byte *m_input; 140 /* The number of bytes remaining in the input. */ 141 size_t m_bytes; 142 143 /* The width of an input character. */ 144 size_t m_width; 145 146 /* The output buffer. */ 147 gdb::def_vector<gdb_wchar_t> m_out; 148 }; 149 150 151 152 /* GDB needs to know a few details of its execution character set. 153 This knowledge is isolated here and in charset.c. */ 154 155 /* The escape character. */ 156 #define HOST_ESCAPE_CHAR 27 157 158 /* Convert a letter, like 'c', to its corresponding control 159 character. */ 160 char host_letter_to_control_character (char c); 161 162 /* Convert a hex digit character to its numeric value. E.g., 'f' is 163 converted to 15. This function assumes that C is a valid hex 164 digit. Both upper- and lower-case letters are recognized. */ 165 int host_hex_value (char c); 166 167 #endif /* CHARSET_H */ 168