xref: /netbsd-src/external/gpl3/gcc/dist/libstdc++-v3/include/bits/locale_conv.h (revision 479d8f7d843cc1b22d497efdf1f27a50ee8418d4)
1 // wstring_convert implementation -*- C++ -*-
2 
3 // Copyright (C) 2015 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /** @file bits/locale_conv.h
26  *  This is an internal header file, included by other library headers.
27  *  Do not attempt to use it directly. @headername{locale}
28  */
29 
30 #ifndef _LOCALE_CONV_H
31 #define _LOCALE_CONV_H 1
32 
33 #if __cplusplus < 201103L
34 # include <bits/c++0x_warning.h>
35 #else
36 
37 #include <streambuf>
38 #include "stringfwd.h"
39 #include "allocator.h"
40 #include "codecvt.h"
41 #include "unique_ptr.h"
42 
43 namespace std _GLIBCXX_VISIBILITY(default)
44 {
45 _GLIBCXX_BEGIN_NAMESPACE_VERSION
46 
47   /**
48    * @addtogroup locales
49    * @{
50    */
51 
52   template<typename _OutStr, typename _InChar, typename _Codecvt,
53 	   typename _State, typename _Fn>
54     bool
55     __do_str_codecvt(const _InChar* __first, const _InChar* __last,
56 		     _OutStr& __outstr, const _Codecvt& __cvt, _State& __state,
57 		     size_t& __count, _Fn __fn)
58     {
59       if (__first == __last)
60 	{
61 	  __outstr.clear();
62 	  __count = 0;
63 	  return true;
64 	}
65 
66       size_t __outchars = 0;
67       auto __next = __first;
68       const auto __maxlen = __cvt.max_length() + 1;
69 
70       codecvt_base::result __result;
71       do
72 	{
73 	  __outstr.resize(__outstr.size() + (__last - __next) * __maxlen);
74 	  auto __outnext = &__outstr.front() + __outchars;
75 	  auto const __outlast = &__outstr.back() + 1;
76 	  __result = (__cvt.*__fn)(__state, __next, __last, __next,
77 					__outnext, __outlast, __outnext);
78 	  __outchars = __outnext - &__outstr.front();
79 	}
80       while (__result == codecvt_base::partial && __next != __last
81 	     && (__outstr.size() - __outchars) < __maxlen);
82 
83       if (__result == codecvt_base::error)
84 	return false;
85 
86       if (__result == codecvt_base::noconv)
87 	{
88 	  __outstr.assign(__first, __last);
89 	  __count = __last - __first;
90 	}
91       else
92 	{
93 	  __outstr.resize(__outchars);
94 	  __count = __next - __first;
95 	}
96 
97       return true;
98     }
99 
100   // Convert narrow character string to wide.
101   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
102     inline bool
103     __str_codecvt_in(const char* __first, const char* __last,
104 		     basic_string<_CharT, _Traits, _Alloc>& __outstr,
105 		     const codecvt<_CharT, char, _State>& __cvt,
106 		     _State& __state, size_t& __count)
107     {
108       using _Codecvt = codecvt<_CharT, char, _State>;
109       using _ConvFn
110 	= codecvt_base::result
111 	  (_Codecvt::*)(_State&, const char*, const char*, const char*&,
112 			_CharT*, _CharT*, _CharT*&) const;
113       _ConvFn __fn = &codecvt<_CharT, char, _State>::in;
114       return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
115 			      __count, __fn);
116     }
117 
118   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
119     inline bool
120     __str_codecvt_in(const char* __first, const char* __last,
121 		     basic_string<_CharT, _Traits, _Alloc>& __outstr,
122 		     const codecvt<_CharT, char, _State>& __cvt)
123     {
124       _State __state = {};
125       size_t __n;
126       return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n);
127     }
128 
129   // Convert wide character string to narrow.
130   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
131     inline bool
132     __str_codecvt_out(const _CharT* __first, const _CharT* __last,
133 		      basic_string<char, _Traits, _Alloc>& __outstr,
134 		      const codecvt<_CharT, char, _State>& __cvt,
135 		      _State& __state, size_t& __count)
136     {
137       using _Codecvt = codecvt<_CharT, char, _State>;
138       using _ConvFn
139 	= codecvt_base::result
140 	  (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
141 			char*, char*, char*&) const;
142       _ConvFn __fn = &codecvt<_CharT, char, _State>::out;
143       return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
144 			      __count, __fn);
145     }
146 
147   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
148     inline bool
149     __str_codecvt_out(const _CharT* __first, const _CharT* __last,
150 		      basic_string<char, _Traits, _Alloc>& __outstr,
151 		      const codecvt<_CharT, char, _State>& __cvt)
152     {
153       _State __state = {};
154       size_t __n;
155       return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
156     }
157 
158 #ifdef _GLIBCXX_USE_WCHAR_T
159 
160 _GLIBCXX_BEGIN_NAMESPACE_CXX11
161 
162   /// String conversions
163   template<typename _Codecvt, typename _Elem = wchar_t,
164 	   typename _Wide_alloc = allocator<_Elem>,
165 	   typename _Byte_alloc = allocator<char>>
166     class wstring_convert
167     {
168     public:
169       typedef basic_string<char, char_traits<char>, _Byte_alloc>   byte_string;
170       typedef basic_string<_Elem, char_traits<_Elem>, _Wide_alloc> wide_string;
171       typedef typename _Codecvt::state_type 			   state_type;
172       typedef typename wide_string::traits_type::int_type	   int_type;
173 
174       /** Default constructor.
175        *
176        * @param  __pcvt The facet to use for conversions.
177        *
178        * Takes ownership of @p __pcvt and will delete it in the destructor.
179        */
180       explicit
181       wstring_convert(_Codecvt* __pcvt = new _Codecvt()) : _M_cvt(__pcvt)
182       {
183 	if (!_M_cvt)
184 	  __throw_logic_error("wstring_convert");
185       }
186 
187       /** Construct with an initial converstion state.
188        *
189        * @param  __pcvt The facet to use for conversions.
190        * @param  __state Initial conversion state.
191        *
192        * Takes ownership of @p __pcvt and will delete it in the destructor.
193        * The object's conversion state will persist between conversions.
194        */
195       wstring_convert(_Codecvt* __pcvt, state_type __state)
196       : _M_cvt(__pcvt), _M_state(__state), _M_with_cvtstate(true)
197       {
198 	if (!_M_cvt)
199 	  __throw_logic_error("wstring_convert");
200       }
201 
202       /** Construct with error strings.
203        *
204        * @param  __byte_err A string to return on failed conversions.
205        * @param  __wide_err A wide string to return on failed conversions.
206        */
207       explicit
208       wstring_convert(const byte_string& __byte_err,
209 		      const wide_string& __wide_err = wide_string())
210       : _M_cvt(new _Codecvt),
211 	_M_byte_err_string(__byte_err), _M_wide_err_string(__wide_err),
212 	_M_with_strings(true)
213       {
214 	if (!_M_cvt)
215 	  __throw_logic_error("wstring_convert");
216       }
217 
218       ~wstring_convert() = default;
219 
220       // _GLIBCXX_RESOLVE_LIB_DEFECTS
221       // 2176. Special members for wstring_convert and wbuffer_convert
222       wstring_convert(const wstring_convert&) = delete;
223       wstring_convert& operator=(const wstring_convert&) = delete;
224 
225       /// @{ Convert from bytes.
226       wide_string
227       from_bytes(char __byte)
228       {
229 	char __bytes[2] = { __byte };
230 	return from_bytes(__bytes, __bytes+1);
231       }
232 
233       wide_string
234       from_bytes(const char* __ptr)
235       { return from_bytes(__ptr, __ptr+char_traits<char>::length(__ptr)); }
236 
237       wide_string
238       from_bytes(const byte_string& __str)
239       {
240 	auto __ptr = __str.data();
241 	return from_bytes(__ptr, __ptr + __str.size());
242       }
243 
244       wide_string
245       from_bytes(const char* __first, const char* __last)
246       {
247 	if (!_M_with_cvtstate)
248 	  _M_state = state_type();
249 	wide_string __out{ _M_wide_err_string.get_allocator() };
250 	if (__str_codecvt_in(__first, __last, __out, *_M_cvt, _M_state,
251 			     _M_count))
252 	  return __out;
253 	if (_M_with_strings)
254 	  return _M_wide_err_string;
255 	__throw_range_error("wstring_convert::from_bytes");
256       }
257       /// @}
258 
259       /// @{ Convert to bytes.
260       byte_string
261       to_bytes(_Elem __wchar)
262       {
263 	_Elem __wchars[2] = { __wchar };
264 	return to_bytes(__wchars, __wchars+1);
265       }
266 
267       byte_string
268       to_bytes(const _Elem* __ptr)
269       {
270 	return to_bytes(__ptr, __ptr+wide_string::traits_type::length(__ptr));
271       }
272 
273       byte_string
274       to_bytes(const wide_string& __wstr)
275       {
276 	auto __ptr = __wstr.data();
277 	return to_bytes(__ptr, __ptr + __wstr.size());
278       }
279 
280       byte_string
281       to_bytes(const _Elem* __first, const _Elem* __last)
282       {
283 	if (!_M_with_cvtstate)
284 	  _M_state = state_type();
285 	byte_string __out{ _M_byte_err_string.get_allocator() };
286 	if (__str_codecvt_out(__first, __last, __out, *_M_cvt, _M_state,
287 			      _M_count))
288 	  return __out;
289 	if (_M_with_strings)
290 	  return _M_byte_err_string;
291 	__throw_range_error("wstring_convert::to_bytes");
292       }
293       /// @}
294 
295       // _GLIBCXX_RESOLVE_LIB_DEFECTS
296       // 2174. wstring_convert::converted() should be noexcept
297       /// The number of elements successfully converted in the last conversion.
298       size_t converted() const noexcept { return _M_count; }
299 
300       /// The final conversion state of the last conversion.
301       state_type state() const { return _M_state; }
302 
303     private:
304       unique_ptr<_Codecvt>	_M_cvt;
305       byte_string		_M_byte_err_string;
306       wide_string		_M_wide_err_string;
307       state_type		_M_state = state_type();
308       size_t			_M_count = 0;
309       bool			_M_with_cvtstate = false;
310       bool			_M_with_strings = false;
311     };
312 
313 _GLIBCXX_END_NAMESPACE_CXX11
314 
315   /// Buffer conversions
316   template<typename _Codecvt, typename _Elem = wchar_t,
317 	   typename _Tr = char_traits<_Elem>>
318     class wbuffer_convert : public basic_streambuf<_Elem, _Tr>
319     {
320       typedef basic_streambuf<_Elem, _Tr> _Wide_streambuf;
321 
322     public:
323       typedef typename _Codecvt::state_type state_type;
324 
325       /** Default constructor.
326        *
327        * @param  __bytebuf The underlying byte stream buffer.
328        * @param  __pcvt    The facet to use for conversions.
329        * @param  __state   Initial conversion state.
330        *
331        * Takes ownership of @p __pcvt and will delete it in the destructor.
332        */
333       explicit
334       wbuffer_convert(streambuf* __bytebuf = 0, _Codecvt* __pcvt = new _Codecvt,
335 		      state_type __state = state_type())
336       : _M_buf(__bytebuf), _M_cvt(__pcvt), _M_state(__state)
337       {
338 	if (!_M_cvt)
339 	  __throw_logic_error("wbuffer_convert");
340 
341 	_M_always_noconv = _M_cvt->always_noconv();
342 
343 	if (_M_buf)
344 	  {
345 	    this->setp(_M_put_area, _M_put_area + _S_buffer_length);
346 	    this->setg(_M_get_area + _S_putback_length,
347 		       _M_get_area + _S_putback_length,
348 		       _M_get_area + _S_putback_length);
349 	  }
350       }
351 
352       ~wbuffer_convert() = default;
353 
354       // _GLIBCXX_RESOLVE_LIB_DEFECTS
355       // 2176. Special members for wstring_convert and wbuffer_convert
356       wbuffer_convert(const wbuffer_convert&) = delete;
357       wbuffer_convert& operator=(const wbuffer_convert&) = delete;
358 
359       streambuf* rdbuf() const noexcept { return _M_buf; }
360 
361       streambuf*
362       rdbuf(streambuf *__bytebuf) noexcept
363       {
364 	auto __prev = _M_buf;
365 	_M_buf = __bytebuf;
366 	return __prev;
367       }
368 
369       /// The conversion state following the last conversion.
370       state_type state() const noexcept { return _M_state; }
371 
372     protected:
373       int
374       sync()
375       { return _M_buf && _M_conv_put() && _M_buf->pubsync() ? 0 : -1; }
376 
377       typename _Wide_streambuf::int_type
378       overflow(typename _Wide_streambuf::int_type __out)
379       {
380 	if (!_M_buf || !_M_conv_put())
381 	  return _Tr::eof();
382 	else if (!_Tr::eq_int_type(__out, _Tr::eof()))
383 	  return this->sputc(__out);
384 	return _Tr::not_eof(__out);
385       }
386 
387       typename _Wide_streambuf::int_type
388       underflow()
389       {
390 	if (!_M_buf)
391 	  return _Tr::eof();
392 
393 	if (this->gptr() < this->egptr() || (_M_buf && _M_conv_get()))
394 	  return _Tr::to_int_type(*this->gptr());
395 	else
396 	  return _Tr::eof();
397       }
398 
399       streamsize
400       xsputn(const typename _Wide_streambuf::char_type* __s, streamsize __n)
401       {
402 	if (!_M_buf || __n == 0)
403 	  return 0;
404 	streamsize __done = 0;
405 	do
406 	{
407 	  auto __nn = std::min<streamsize>(this->epptr() - this->pptr(),
408 					   __n - __done);
409 	  _Tr::copy(this->pptr(), __s + __done, __nn);
410 	  this->pbump(__nn);
411 	  __done += __nn;
412 	} while (__done < __n && _M_conv_put());
413 	return __done;
414       }
415 
416     private:
417       // fill the get area from converted contents of the byte stream buffer
418       bool
419       _M_conv_get()
420       {
421 	const streamsize __pb1 = this->gptr() - this->eback();
422 	const streamsize __pb2 = _S_putback_length;
423 	const streamsize __npb = std::min(__pb1, __pb2);
424 
425 	_Tr::move(_M_get_area + _S_putback_length - __npb,
426 		  this->gptr() - __npb, __npb);
427 
428 	streamsize __nbytes = sizeof(_M_get_buf) - _M_unconv;
429 	__nbytes = std::min(__nbytes, _M_buf->in_avail());
430 	if (__nbytes < 1)
431 	  __nbytes == 1;
432 	__nbytes = _M_buf->sgetn(_M_get_buf + _M_unconv, __nbytes);
433 	if (__nbytes < 1)
434 	  return false;
435 	__nbytes += _M_unconv;
436 
437 	// convert _M_get_buf into _M_get_area
438 
439 	_Elem* __outbuf = _M_get_area + _S_putback_length;
440 	_Elem* __outnext = __outbuf;
441 	const char* __bnext = _M_get_buf;
442 
443 	codecvt_base::result __result;
444 	if (_M_always_noconv)
445 	  __result = codecvt_base::noconv;
446 	else
447 	  {
448 	    _Elem* __outend = _M_get_area + _S_buffer_length;
449 
450 	    __result = _M_cvt->in(_M_state,
451 				  __bnext, __bnext + __nbytes, __bnext,
452 				  __outbuf, __outend, __outnext);
453 	  }
454 
455 	if (__result == codecvt_base::noconv)
456 	  {
457 	    // cast is safe because noconv means _Elem is same type as char
458 	    auto __get_buf = reinterpret_cast<const _Elem*>(_M_get_buf);
459 	    _Tr::copy(__outbuf, __get_buf, __nbytes);
460 	    _M_unconv = 0;
461 	    return true;
462 	  }
463 
464 	if ((_M_unconv = _M_get_buf + __nbytes - __bnext))
465 	  char_traits<char>::move(_M_get_buf, __bnext, _M_unconv);
466 
467 	this->setg(__outbuf, __outbuf, __outnext);
468 
469 	return __result != codecvt_base::error;
470       }
471 
472       // unused
473       bool
474       _M_put(...)
475       { return false; }
476 
477       bool
478       _M_put(const char* __p, streamsize __n)
479       {
480 	if (_M_buf->sputn(__p, __n) < __n)
481 	  return false;
482       }
483 
484       // convert the put area and write to the byte stream buffer
485       bool
486       _M_conv_put()
487       {
488 	_Elem* const __first = this->pbase();
489 	const _Elem* const __last = this->pptr();
490 	const streamsize __pending = __last - __first;
491 
492 	if (_M_always_noconv)
493 	  return _M_put(__first, __pending);
494 
495 	char __outbuf[2 * _S_buffer_length];
496 
497 	const _Elem* __next = __first;
498 	const _Elem* __start;
499 	do
500 	  {
501 	    __start = __next;
502 	    char* __outnext = __outbuf;
503 	    char* const __outlast = __outbuf + sizeof(__outbuf);
504 	    auto __result = _M_cvt->out(_M_state, __next, __last, __next,
505 					__outnext, __outlast, __outnext);
506 	    if (__result == codecvt_base::error)
507 	      return false;
508 	    else if (__result == codecvt_base::noconv)
509 	      return _M_put(__next, __pending);
510 
511 	    if (!_M_put(__outbuf, __outnext - __outbuf))
512 	      return false;
513 	  }
514 	while (__next != __last && __next != __start);
515 
516 	if (__next != __last)
517 	  _Tr::move(__first, __next, __last - __next);
518 
519 	this->pbump(__first - __next);
520 	return __next != __first;
521       }
522 
523       streambuf*		_M_buf;
524       unique_ptr<_Codecvt>	_M_cvt;
525       state_type		_M_state;
526 
527       static const streamsize	_S_buffer_length = 32;
528       static const streamsize	_S_putback_length = 3;
529       _Elem                     _M_put_area[_S_buffer_length];
530       _Elem                     _M_get_area[_S_buffer_length];
531       streamsize		_M_unconv = 0;
532       char			_M_get_buf[_S_buffer_length-_S_putback_length];
533       bool			_M_always_noconv;
534     };
535 
536 #endif  // _GLIBCXX_USE_WCHAR_T
537 
538   /// @} group locales
539 
540 _GLIBCXX_END_NAMESPACE_VERSION
541 } // namespace
542 
543 #endif // __cplusplus
544 
545 #endif /* _LOCALE_CONV_H */
546