1 // wstring_convert implementation -*- C++ -*- 2 3 // Copyright (C) 2015 Free Software Foundation, Inc. 4 // 5 // This file is part of the GNU ISO C++ Library. This library is free 6 // software; you can redistribute it and/or modify it under the 7 // terms of the GNU General Public License as published by the 8 // Free Software Foundation; either version 3, or (at your option) 9 // any later version. 10 11 // This library is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU General Public License for more details. 15 16 // Under Section 7 of GPL version 3, you are granted additional 17 // permissions described in the GCC Runtime Library Exception, version 18 // 3.1, as published by the Free Software Foundation. 19 20 // You should have received a copy of the GNU General Public License and 21 // a copy of the GCC Runtime Library Exception along with this program; 22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 // <http://www.gnu.org/licenses/>. 24 25 /** @file bits/locale_conv.h 26 * This is an internal header file, included by other library headers. 27 * Do not attempt to use it directly. @headername{locale} 28 */ 29 30 #ifndef _LOCALE_CONV_H 31 #define _LOCALE_CONV_H 1 32 33 #if __cplusplus < 201103L 34 # include <bits/c++0x_warning.h> 35 #else 36 37 #include <streambuf> 38 #include "stringfwd.h" 39 #include "allocator.h" 40 #include "codecvt.h" 41 #include "unique_ptr.h" 42 43 namespace std _GLIBCXX_VISIBILITY(default) 44 { 45 _GLIBCXX_BEGIN_NAMESPACE_VERSION 46 47 /** 48 * @addtogroup locales 49 * @{ 50 */ 51 52 template<typename _OutStr, typename _InChar, typename _Codecvt, 53 typename _State, typename _Fn> 54 bool 55 __do_str_codecvt(const _InChar* __first, const _InChar* __last, 56 _OutStr& __outstr, const _Codecvt& __cvt, _State& __state, 57 size_t& __count, _Fn __fn) 58 { 59 if (__first == __last) 60 { 61 __outstr.clear(); 62 __count = 0; 63 return true; 64 } 65 66 size_t __outchars = 0; 67 auto __next = __first; 68 const auto __maxlen = __cvt.max_length() + 1; 69 70 codecvt_base::result __result; 71 do 72 { 73 __outstr.resize(__outstr.size() + (__last - __next) * __maxlen); 74 auto __outnext = &__outstr.front() + __outchars; 75 auto const __outlast = &__outstr.back() + 1; 76 __result = (__cvt.*__fn)(__state, __next, __last, __next, 77 __outnext, __outlast, __outnext); 78 __outchars = __outnext - &__outstr.front(); 79 } 80 while (__result == codecvt_base::partial && __next != __last 81 && (__outstr.size() - __outchars) < __maxlen); 82 83 if (__result == codecvt_base::error) 84 return false; 85 86 if (__result == codecvt_base::noconv) 87 { 88 __outstr.assign(__first, __last); 89 __count = __last - __first; 90 } 91 else 92 { 93 __outstr.resize(__outchars); 94 __count = __next - __first; 95 } 96 97 return true; 98 } 99 100 // Convert narrow character string to wide. 101 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 102 inline bool 103 __str_codecvt_in(const char* __first, const char* __last, 104 basic_string<_CharT, _Traits, _Alloc>& __outstr, 105 const codecvt<_CharT, char, _State>& __cvt, 106 _State& __state, size_t& __count) 107 { 108 using _Codecvt = codecvt<_CharT, char, _State>; 109 using _ConvFn 110 = codecvt_base::result 111 (_Codecvt::*)(_State&, const char*, const char*, const char*&, 112 _CharT*, _CharT*, _CharT*&) const; 113 _ConvFn __fn = &codecvt<_CharT, char, _State>::in; 114 return __do_str_codecvt(__first, __last, __outstr, __cvt, __state, 115 __count, __fn); 116 } 117 118 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 119 inline bool 120 __str_codecvt_in(const char* __first, const char* __last, 121 basic_string<_CharT, _Traits, _Alloc>& __outstr, 122 const codecvt<_CharT, char, _State>& __cvt) 123 { 124 _State __state = {}; 125 size_t __n; 126 return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n); 127 } 128 129 // Convert wide character string to narrow. 130 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 131 inline bool 132 __str_codecvt_out(const _CharT* __first, const _CharT* __last, 133 basic_string<char, _Traits, _Alloc>& __outstr, 134 const codecvt<_CharT, char, _State>& __cvt, 135 _State& __state, size_t& __count) 136 { 137 using _Codecvt = codecvt<_CharT, char, _State>; 138 using _ConvFn 139 = codecvt_base::result 140 (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&, 141 char*, char*, char*&) const; 142 _ConvFn __fn = &codecvt<_CharT, char, _State>::out; 143 return __do_str_codecvt(__first, __last, __outstr, __cvt, __state, 144 __count, __fn); 145 } 146 147 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 148 inline bool 149 __str_codecvt_out(const _CharT* __first, const _CharT* __last, 150 basic_string<char, _Traits, _Alloc>& __outstr, 151 const codecvt<_CharT, char, _State>& __cvt) 152 { 153 _State __state = {}; 154 size_t __n; 155 return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n); 156 } 157 158 #ifdef _GLIBCXX_USE_WCHAR_T 159 160 _GLIBCXX_BEGIN_NAMESPACE_CXX11 161 162 /// String conversions 163 template<typename _Codecvt, typename _Elem = wchar_t, 164 typename _Wide_alloc = allocator<_Elem>, 165 typename _Byte_alloc = allocator<char>> 166 class wstring_convert 167 { 168 public: 169 typedef basic_string<char, char_traits<char>, _Byte_alloc> byte_string; 170 typedef basic_string<_Elem, char_traits<_Elem>, _Wide_alloc> wide_string; 171 typedef typename _Codecvt::state_type state_type; 172 typedef typename wide_string::traits_type::int_type int_type; 173 174 /** Default constructor. 175 * 176 * @param __pcvt The facet to use for conversions. 177 * 178 * Takes ownership of @p __pcvt and will delete it in the destructor. 179 */ 180 explicit 181 wstring_convert(_Codecvt* __pcvt = new _Codecvt()) : _M_cvt(__pcvt) 182 { 183 if (!_M_cvt) 184 __throw_logic_error("wstring_convert"); 185 } 186 187 /** Construct with an initial converstion state. 188 * 189 * @param __pcvt The facet to use for conversions. 190 * @param __state Initial conversion state. 191 * 192 * Takes ownership of @p __pcvt and will delete it in the destructor. 193 * The object's conversion state will persist between conversions. 194 */ 195 wstring_convert(_Codecvt* __pcvt, state_type __state) 196 : _M_cvt(__pcvt), _M_state(__state), _M_with_cvtstate(true) 197 { 198 if (!_M_cvt) 199 __throw_logic_error("wstring_convert"); 200 } 201 202 /** Construct with error strings. 203 * 204 * @param __byte_err A string to return on failed conversions. 205 * @param __wide_err A wide string to return on failed conversions. 206 */ 207 explicit 208 wstring_convert(const byte_string& __byte_err, 209 const wide_string& __wide_err = wide_string()) 210 : _M_cvt(new _Codecvt), 211 _M_byte_err_string(__byte_err), _M_wide_err_string(__wide_err), 212 _M_with_strings(true) 213 { 214 if (!_M_cvt) 215 __throw_logic_error("wstring_convert"); 216 } 217 218 ~wstring_convert() = default; 219 220 // _GLIBCXX_RESOLVE_LIB_DEFECTS 221 // 2176. Special members for wstring_convert and wbuffer_convert 222 wstring_convert(const wstring_convert&) = delete; 223 wstring_convert& operator=(const wstring_convert&) = delete; 224 225 /// @{ Convert from bytes. 226 wide_string 227 from_bytes(char __byte) 228 { 229 char __bytes[2] = { __byte }; 230 return from_bytes(__bytes, __bytes+1); 231 } 232 233 wide_string 234 from_bytes(const char* __ptr) 235 { return from_bytes(__ptr, __ptr+char_traits<char>::length(__ptr)); } 236 237 wide_string 238 from_bytes(const byte_string& __str) 239 { 240 auto __ptr = __str.data(); 241 return from_bytes(__ptr, __ptr + __str.size()); 242 } 243 244 wide_string 245 from_bytes(const char* __first, const char* __last) 246 { 247 if (!_M_with_cvtstate) 248 _M_state = state_type(); 249 wide_string __out{ _M_wide_err_string.get_allocator() }; 250 if (__str_codecvt_in(__first, __last, __out, *_M_cvt, _M_state, 251 _M_count)) 252 return __out; 253 if (_M_with_strings) 254 return _M_wide_err_string; 255 __throw_range_error("wstring_convert::from_bytes"); 256 } 257 /// @} 258 259 /// @{ Convert to bytes. 260 byte_string 261 to_bytes(_Elem __wchar) 262 { 263 _Elem __wchars[2] = { __wchar }; 264 return to_bytes(__wchars, __wchars+1); 265 } 266 267 byte_string 268 to_bytes(const _Elem* __ptr) 269 { 270 return to_bytes(__ptr, __ptr+wide_string::traits_type::length(__ptr)); 271 } 272 273 byte_string 274 to_bytes(const wide_string& __wstr) 275 { 276 auto __ptr = __wstr.data(); 277 return to_bytes(__ptr, __ptr + __wstr.size()); 278 } 279 280 byte_string 281 to_bytes(const _Elem* __first, const _Elem* __last) 282 { 283 if (!_M_with_cvtstate) 284 _M_state = state_type(); 285 byte_string __out{ _M_byte_err_string.get_allocator() }; 286 if (__str_codecvt_out(__first, __last, __out, *_M_cvt, _M_state, 287 _M_count)) 288 return __out; 289 if (_M_with_strings) 290 return _M_byte_err_string; 291 __throw_range_error("wstring_convert::to_bytes"); 292 } 293 /// @} 294 295 // _GLIBCXX_RESOLVE_LIB_DEFECTS 296 // 2174. wstring_convert::converted() should be noexcept 297 /// The number of elements successfully converted in the last conversion. 298 size_t converted() const noexcept { return _M_count; } 299 300 /// The final conversion state of the last conversion. 301 state_type state() const { return _M_state; } 302 303 private: 304 unique_ptr<_Codecvt> _M_cvt; 305 byte_string _M_byte_err_string; 306 wide_string _M_wide_err_string; 307 state_type _M_state = state_type(); 308 size_t _M_count = 0; 309 bool _M_with_cvtstate = false; 310 bool _M_with_strings = false; 311 }; 312 313 _GLIBCXX_END_NAMESPACE_CXX11 314 315 /// Buffer conversions 316 template<typename _Codecvt, typename _Elem = wchar_t, 317 typename _Tr = char_traits<_Elem>> 318 class wbuffer_convert : public basic_streambuf<_Elem, _Tr> 319 { 320 typedef basic_streambuf<_Elem, _Tr> _Wide_streambuf; 321 322 public: 323 typedef typename _Codecvt::state_type state_type; 324 325 /** Default constructor. 326 * 327 * @param __bytebuf The underlying byte stream buffer. 328 * @param __pcvt The facet to use for conversions. 329 * @param __state Initial conversion state. 330 * 331 * Takes ownership of @p __pcvt and will delete it in the destructor. 332 */ 333 explicit 334 wbuffer_convert(streambuf* __bytebuf = 0, _Codecvt* __pcvt = new _Codecvt, 335 state_type __state = state_type()) 336 : _M_buf(__bytebuf), _M_cvt(__pcvt), _M_state(__state) 337 { 338 if (!_M_cvt) 339 __throw_logic_error("wbuffer_convert"); 340 341 _M_always_noconv = _M_cvt->always_noconv(); 342 343 if (_M_buf) 344 { 345 this->setp(_M_put_area, _M_put_area + _S_buffer_length); 346 this->setg(_M_get_area + _S_putback_length, 347 _M_get_area + _S_putback_length, 348 _M_get_area + _S_putback_length); 349 } 350 } 351 352 ~wbuffer_convert() = default; 353 354 // _GLIBCXX_RESOLVE_LIB_DEFECTS 355 // 2176. Special members for wstring_convert and wbuffer_convert 356 wbuffer_convert(const wbuffer_convert&) = delete; 357 wbuffer_convert& operator=(const wbuffer_convert&) = delete; 358 359 streambuf* rdbuf() const noexcept { return _M_buf; } 360 361 streambuf* 362 rdbuf(streambuf *__bytebuf) noexcept 363 { 364 auto __prev = _M_buf; 365 _M_buf = __bytebuf; 366 return __prev; 367 } 368 369 /// The conversion state following the last conversion. 370 state_type state() const noexcept { return _M_state; } 371 372 protected: 373 int 374 sync() 375 { return _M_buf && _M_conv_put() && _M_buf->pubsync() ? 0 : -1; } 376 377 typename _Wide_streambuf::int_type 378 overflow(typename _Wide_streambuf::int_type __out) 379 { 380 if (!_M_buf || !_M_conv_put()) 381 return _Tr::eof(); 382 else if (!_Tr::eq_int_type(__out, _Tr::eof())) 383 return this->sputc(__out); 384 return _Tr::not_eof(__out); 385 } 386 387 typename _Wide_streambuf::int_type 388 underflow() 389 { 390 if (!_M_buf) 391 return _Tr::eof(); 392 393 if (this->gptr() < this->egptr() || (_M_buf && _M_conv_get())) 394 return _Tr::to_int_type(*this->gptr()); 395 else 396 return _Tr::eof(); 397 } 398 399 streamsize 400 xsputn(const typename _Wide_streambuf::char_type* __s, streamsize __n) 401 { 402 if (!_M_buf || __n == 0) 403 return 0; 404 streamsize __done = 0; 405 do 406 { 407 auto __nn = std::min<streamsize>(this->epptr() - this->pptr(), 408 __n - __done); 409 _Tr::copy(this->pptr(), __s + __done, __nn); 410 this->pbump(__nn); 411 __done += __nn; 412 } while (__done < __n && _M_conv_put()); 413 return __done; 414 } 415 416 private: 417 // fill the get area from converted contents of the byte stream buffer 418 bool 419 _M_conv_get() 420 { 421 const streamsize __pb1 = this->gptr() - this->eback(); 422 const streamsize __pb2 = _S_putback_length; 423 const streamsize __npb = std::min(__pb1, __pb2); 424 425 _Tr::move(_M_get_area + _S_putback_length - __npb, 426 this->gptr() - __npb, __npb); 427 428 streamsize __nbytes = sizeof(_M_get_buf) - _M_unconv; 429 __nbytes = std::min(__nbytes, _M_buf->in_avail()); 430 if (__nbytes < 1) 431 __nbytes == 1; 432 __nbytes = _M_buf->sgetn(_M_get_buf + _M_unconv, __nbytes); 433 if (__nbytes < 1) 434 return false; 435 __nbytes += _M_unconv; 436 437 // convert _M_get_buf into _M_get_area 438 439 _Elem* __outbuf = _M_get_area + _S_putback_length; 440 _Elem* __outnext = __outbuf; 441 const char* __bnext = _M_get_buf; 442 443 codecvt_base::result __result; 444 if (_M_always_noconv) 445 __result = codecvt_base::noconv; 446 else 447 { 448 _Elem* __outend = _M_get_area + _S_buffer_length; 449 450 __result = _M_cvt->in(_M_state, 451 __bnext, __bnext + __nbytes, __bnext, 452 __outbuf, __outend, __outnext); 453 } 454 455 if (__result == codecvt_base::noconv) 456 { 457 // cast is safe because noconv means _Elem is same type as char 458 auto __get_buf = reinterpret_cast<const _Elem*>(_M_get_buf); 459 _Tr::copy(__outbuf, __get_buf, __nbytes); 460 _M_unconv = 0; 461 return true; 462 } 463 464 if ((_M_unconv = _M_get_buf + __nbytes - __bnext)) 465 char_traits<char>::move(_M_get_buf, __bnext, _M_unconv); 466 467 this->setg(__outbuf, __outbuf, __outnext); 468 469 return __result != codecvt_base::error; 470 } 471 472 // unused 473 bool 474 _M_put(...) 475 { return false; } 476 477 bool 478 _M_put(const char* __p, streamsize __n) 479 { 480 if (_M_buf->sputn(__p, __n) < __n) 481 return false; 482 } 483 484 // convert the put area and write to the byte stream buffer 485 bool 486 _M_conv_put() 487 { 488 _Elem* const __first = this->pbase(); 489 const _Elem* const __last = this->pptr(); 490 const streamsize __pending = __last - __first; 491 492 if (_M_always_noconv) 493 return _M_put(__first, __pending); 494 495 char __outbuf[2 * _S_buffer_length]; 496 497 const _Elem* __next = __first; 498 const _Elem* __start; 499 do 500 { 501 __start = __next; 502 char* __outnext = __outbuf; 503 char* const __outlast = __outbuf + sizeof(__outbuf); 504 auto __result = _M_cvt->out(_M_state, __next, __last, __next, 505 __outnext, __outlast, __outnext); 506 if (__result == codecvt_base::error) 507 return false; 508 else if (__result == codecvt_base::noconv) 509 return _M_put(__next, __pending); 510 511 if (!_M_put(__outbuf, __outnext - __outbuf)) 512 return false; 513 } 514 while (__next != __last && __next != __start); 515 516 if (__next != __last) 517 _Tr::move(__first, __next, __last - __next); 518 519 this->pbump(__first - __next); 520 return __next != __first; 521 } 522 523 streambuf* _M_buf; 524 unique_ptr<_Codecvt> _M_cvt; 525 state_type _M_state; 526 527 static const streamsize _S_buffer_length = 32; 528 static const streamsize _S_putback_length = 3; 529 _Elem _M_put_area[_S_buffer_length]; 530 _Elem _M_get_area[_S_buffer_length]; 531 streamsize _M_unconv = 0; 532 char _M_get_buf[_S_buffer_length-_S_putback_length]; 533 bool _M_always_noconv; 534 }; 535 536 #endif // _GLIBCXX_USE_WCHAR_T 537 538 /// @} group locales 539 540 _GLIBCXX_END_NAMESPACE_VERSION 541 } // namespace 542 543 #endif // __cplusplus 544 545 #endif /* _LOCALE_CONV_H */ 546