1 // std::codecvt implementation details, DragonFly version -*- C++ -*- 2 3 // Copyright (C) 2015-2018 Free Software Foundation, Inc. 4 // 5 // This file is part of the GNU ISO C++ Library. This library is free 6 // software; you can redistribute it and/or modify it under the 7 // terms of the GNU General Public License as published by the 8 // Free Software Foundation; either version 3, or (at your option) 9 // any later version. 10 11 // This library is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU General Public License for more details. 15 16 // Under Section 7 of GPL version 3, you are granted additional 17 // permissions described in the GCC Runtime Library Exception, version 18 // 3.1, as published by the Free Software Foundation. 19 20 // You should have received a copy of the GNU General Public License and 21 // a copy of the GCC Runtime Library Exception along with this program; 22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 // <http://www.gnu.org/licenses/>. 24 25 // 26 // ISO C++ 14882: 22.2.1.5 - Template class codecvt 27 // 28 29 // Written by Benjamin Kosnik <bkoz@redhat.com> 30 // Modified for DragonFly by John Marino <gnugcc@marino.st> 31 32 #include <locale> 33 #include <cstring> 34 #include <cstdlib> // For MB_CUR_MAX 35 #include <climits> // For MB_LEN_MAX 36 37 namespace std _GLIBCXX_VISIBILITY(default) 38 { 39 _GLIBCXX_BEGIN_NAMESPACE_VERSION 40 41 // Specializations. 42 #ifdef _GLIBCXX_USE_WCHAR_T 43 codecvt_base::result 44 codecvt<wchar_t, char, mbstate_t>:: 45 do_out(state_type& __state, const intern_type* __from, 46 const intern_type* __from_end, const intern_type*& __from_next, 47 extern_type* __to, extern_type* __to_end, 48 extern_type*& __to_next) const 49 { 50 result __ret = ok; 51 state_type __tmp_state(__state); 52 53 __c_locale __old = (__c_locale)uselocale((locale_t)_M_c_locale_codecvt); 54 55 // wcsnrtombs is *very* fast but stops if encounters NUL characters: 56 // in case we fall back to wcrtomb and then continue, in a loop. 57 // NB: wcsnrtombs is a GNU extension 58 for (__from_next = __from, __to_next = __to; 59 __from_next < __from_end && __to_next < __to_end 60 && __ret == ok;) 61 { 62 const intern_type* __from_chunk_end = wmemchr(__from_next, L'\0', 63 __from_end - __from_next); 64 if (!__from_chunk_end) 65 __from_chunk_end = __from_end; 66 67 __from = __from_next; 68 const size_t __conv = wcsnrtombs(__to_next, &__from_next, 69 __from_chunk_end - __from_next, 70 __to_end - __to_next, &__state); 71 if (__conv == static_cast<size_t>(-1)) 72 { 73 // In case of error, in order to stop at the exact place we 74 // have to start again from the beginning with a series of 75 // wcrtomb. 76 for (; __from < __from_next; ++__from) 77 __to_next += wcrtomb(__to_next, *__from, &__tmp_state); 78 __state = __tmp_state; 79 __ret = error; 80 } 81 else if (__from_next && __from_next < __from_chunk_end) 82 { 83 __to_next += __conv; 84 __ret = partial; 85 } 86 else 87 { 88 __from_next = __from_chunk_end; 89 __to_next += __conv; 90 } 91 92 if (__from_next < __from_end && __ret == ok) 93 { 94 extern_type __buf[MB_LEN_MAX]; 95 __tmp_state = __state; 96 const size_t __conv2 = wcrtomb(__buf, *__from_next, &__tmp_state); 97 if (__conv2 > static_cast<size_t>(__to_end - __to_next)) 98 __ret = partial; 99 else 100 { 101 memcpy(__to_next, __buf, __conv2); 102 __state = __tmp_state; 103 __to_next += __conv2; 104 ++__from_next; 105 } 106 } 107 } 108 109 uselocale((locale_t)__old); 110 111 return __ret; 112 } 113 114 codecvt_base::result 115 codecvt<wchar_t, char, mbstate_t>:: 116 do_in(state_type& __state, const extern_type* __from, 117 const extern_type* __from_end, const extern_type*& __from_next, 118 intern_type* __to, intern_type* __to_end, 119 intern_type*& __to_next) const 120 { 121 result __ret = ok; 122 state_type __tmp_state(__state); 123 124 __c_locale __old = (__c_locale)uselocale((locale_t)_M_c_locale_codecvt); 125 126 // mbsnrtowcs is *very* fast but stops if encounters NUL characters: 127 // in case we store a L'\0' and then continue, in a loop. 128 // NB: mbsnrtowcs is a GNU extension 129 for (__from_next = __from, __to_next = __to; 130 __from_next < __from_end && __to_next < __to_end 131 && __ret == ok;) 132 { 133 const extern_type* __from_chunk_end; 134 __from_chunk_end = static_cast<const extern_type*>(memchr(__from_next, '\0', 135 __from_end 136 - __from_next)); 137 if (!__from_chunk_end) 138 __from_chunk_end = __from_end; 139 140 __from = __from_next; 141 size_t __conv = mbsnrtowcs(__to_next, &__from_next, 142 __from_chunk_end - __from_next, 143 __to_end - __to_next, &__state); 144 if (__conv == static_cast<size_t>(-1)) 145 { 146 // In case of error, in order to stop at the exact place we 147 // have to start again from the beginning with a series of 148 // mbrtowc. 149 for (;; ++__to_next, __from += __conv) 150 { 151 __conv = mbrtowc(__to_next, __from, __from_end - __from, 152 &__tmp_state); 153 if (__conv == static_cast<size_t>(-1) 154 || __conv == static_cast<size_t>(-2)) 155 break; 156 } 157 __from_next = __from; 158 __state = __tmp_state; 159 __ret = error; 160 } 161 else if (__from_next && __from_next < __from_chunk_end) 162 { 163 // It is unclear what to return in this case (see DR 382). 164 __to_next += __conv; 165 __ret = partial; 166 } 167 else 168 { 169 __from_next = __from_chunk_end; 170 __to_next += __conv; 171 } 172 173 if (__from_next < __from_end && __ret == ok) 174 { 175 if (__to_next < __to_end) 176 { 177 // XXX Probably wrong for stateful encodings 178 __tmp_state = __state; 179 ++__from_next; 180 *__to_next++ = L'\0'; 181 } 182 else 183 __ret = partial; 184 } 185 } 186 187 uselocale((locale_t)__old); 188 189 return __ret; 190 } 191 192 int 193 codecvt<wchar_t, char, mbstate_t>:: 194 do_encoding() const throw() 195 { 196 // XXX This implementation assumes that the encoding is 197 // stateless and is either single-byte or variable-width. 198 int __ret = 0; 199 __c_locale __old = (__c_locale)uselocale((locale_t)_M_c_locale_codecvt); 200 if (MB_CUR_MAX == 1) 201 __ret = 1; 202 uselocale((locale_t)__old); 203 return __ret; 204 } 205 206 int 207 codecvt<wchar_t, char, mbstate_t>:: 208 do_max_length() const throw() 209 { 210 __c_locale __old = (__c_locale)uselocale((locale_t)_M_c_locale_codecvt); 211 // XXX Probably wrong for stateful encodings. 212 int __ret = MB_CUR_MAX; 213 uselocale((locale_t)__old); 214 return __ret; 215 } 216 217 int 218 codecvt<wchar_t, char, mbstate_t>:: 219 do_length(state_type& __state, const extern_type* __from, 220 const extern_type* __end, size_t __max) const 221 { 222 int __ret = 0; 223 state_type __tmp_state(__state); 224 225 __c_locale __old = (__c_locale)uselocale((locale_t)_M_c_locale_codecvt); 226 227 // mbsnrtowcs is *very* fast but stops if encounters NUL characters: 228 // in case we advance past it and then continue, in a loop. 229 // NB: mbsnrtowcs is in POSIX.1-2008 230 231 const size_t __to_len = 1024; // Size of alloca'd output buffer 232 233 // A dummy internal buffer is needed in order for mbsnrtocws to consider 234 // its fourth parameter (it wouldn't with NULL as first parameter). 235 wchar_t* __to = static_cast<wchar_t*>(__builtin_alloca(sizeof(wchar_t) 236 * __to_len)); 237 while (__from < __end && __max) 238 { 239 const extern_type* __from_chunk_end; 240 __from_chunk_end = static_cast<const extern_type*>(memchr(__from, '\0', 241 __end 242 - __from)); 243 if (!__from_chunk_end) 244 __from_chunk_end = __end; 245 246 const extern_type* __tmp_from = __from; 247 size_t __conv = mbsnrtowcs(__to, &__from, 248 __from_chunk_end - __from, 249 __max > __to_len ? __to_len : __max, 250 &__state); 251 if (__conv == static_cast<size_t>(-1)) 252 { 253 // In case of error, in order to stop at the exact place we 254 // have to start again from the beginning with a series of 255 // mbrtowc. 256 for (__from = __tmp_from;; __from += __conv) 257 { 258 __conv = mbrtowc(0, __from, __end - __from, 259 &__tmp_state); 260 if (__conv == static_cast<size_t>(-1) 261 || __conv == static_cast<size_t>(-2)) 262 break; 263 } 264 __state = __tmp_state; 265 __ret += __from - __tmp_from; 266 break; 267 } 268 if (!__from) 269 __from = __from_chunk_end; 270 271 __ret += __from - __tmp_from; 272 __max -= __conv; 273 274 if (__from < __end && __max) 275 { 276 // XXX Probably wrong for stateful encodings 277 __tmp_state = __state; 278 ++__from; 279 ++__ret; 280 --__max; 281 } 282 } 283 284 uselocale((locale_t)__old); 285 286 return __ret; 287 } 288 #endif 289 290 _GLIBCXX_END_NAMESPACE_VERSION 291 } // namespace 292