xref: /dflybsd-src/contrib/gcc-8.0/libstdc++-v3/config/locale/dragonfly/codecvt_members.cc (revision bd23261bc0d14eee4147ed68391498b1c45ec286)
138fd1498Szrj // std::codecvt implementation details, DragonFly version -*- C++ -*-
238fd1498Szrj 
338fd1498Szrj // Copyright (C) 2015-2018 Free Software Foundation, Inc.
438fd1498Szrj //
538fd1498Szrj // This file is part of the GNU ISO C++ Library.  This library is free
638fd1498Szrj // software; you can redistribute it and/or modify it under the
738fd1498Szrj // terms of the GNU General Public License as published by the
838fd1498Szrj // Free Software Foundation; either version 3, or (at your option)
938fd1498Szrj // any later version.
1038fd1498Szrj 
1138fd1498Szrj // This library is distributed in the hope that it will be useful,
1238fd1498Szrj // but WITHOUT ANY WARRANTY; without even the implied warranty of
1338fd1498Szrj // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1438fd1498Szrj // GNU General Public License for more details.
1538fd1498Szrj 
1638fd1498Szrj // Under Section 7 of GPL version 3, you are granted additional
1738fd1498Szrj // permissions described in the GCC Runtime Library Exception, version
1838fd1498Szrj // 3.1, as published by the Free Software Foundation.
1938fd1498Szrj 
2038fd1498Szrj // You should have received a copy of the GNU General Public License and
2138fd1498Szrj // a copy of the GCC Runtime Library Exception along with this program;
2238fd1498Szrj // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
2338fd1498Szrj // <http://www.gnu.org/licenses/>.
2438fd1498Szrj 
2538fd1498Szrj //
2638fd1498Szrj // ISO C++ 14882: 22.2.1.5 - Template class codecvt
2738fd1498Szrj //
2838fd1498Szrj 
2938fd1498Szrj // Written by Benjamin Kosnik <bkoz@redhat.com>
3038fd1498Szrj // Modified for DragonFly by John Marino <gnugcc@marino.st>
3138fd1498Szrj 
3238fd1498Szrj #include <locale>
3338fd1498Szrj #include <cstring>
3438fd1498Szrj #include <cstdlib>  // For MB_CUR_MAX
3538fd1498Szrj #include <climits>  // For MB_LEN_MAX
3638fd1498Szrj 
3738fd1498Szrj namespace std _GLIBCXX_VISIBILITY(default)
3838fd1498Szrj {
3938fd1498Szrj _GLIBCXX_BEGIN_NAMESPACE_VERSION
4038fd1498Szrj 
4138fd1498Szrj   // Specializations.
4238fd1498Szrj #ifdef _GLIBCXX_USE_WCHAR_T
4338fd1498Szrj   codecvt_base::result
4438fd1498Szrj   codecvt<wchar_t, char, mbstate_t>::
do_out(state_type & __state,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const4538fd1498Szrj   do_out(state_type& __state, const intern_type* __from,
4638fd1498Szrj 	 const intern_type* __from_end, const intern_type*& __from_next,
4738fd1498Szrj 	 extern_type* __to, extern_type* __to_end,
4838fd1498Szrj 	 extern_type*& __to_next) const
4938fd1498Szrj   {
5038fd1498Szrj     result __ret = ok;
5138fd1498Szrj     state_type __tmp_state(__state);
5238fd1498Szrj 
5338fd1498Szrj     __c_locale __old = (__c_locale)uselocale((locale_t)_M_c_locale_codecvt);
5438fd1498Szrj 
5538fd1498Szrj     // wcsnrtombs is *very* fast but stops if encounters NUL characters:
5638fd1498Szrj     // in case we fall back to wcrtomb and then continue, in a loop.
5738fd1498Szrj     // NB: wcsnrtombs is a GNU extension
5838fd1498Szrj     for (__from_next = __from, __to_next = __to;
5938fd1498Szrj 	 __from_next < __from_end && __to_next < __to_end
6038fd1498Szrj 	 && __ret == ok;)
6138fd1498Szrj       {
6238fd1498Szrj 	const intern_type* __from_chunk_end = wmemchr(__from_next, L'\0',
6338fd1498Szrj 						      __from_end - __from_next);
6438fd1498Szrj 	if (!__from_chunk_end)
6538fd1498Szrj 	  __from_chunk_end = __from_end;
6638fd1498Szrj 
6738fd1498Szrj 	__from = __from_next;
6838fd1498Szrj 	const size_t __conv = wcsnrtombs(__to_next, &__from_next,
6938fd1498Szrj 					 __from_chunk_end - __from_next,
7038fd1498Szrj 					 __to_end - __to_next, &__state);
7138fd1498Szrj 	if (__conv == static_cast<size_t>(-1))
7238fd1498Szrj 	  {
7338fd1498Szrj 	    // In case of error, in order to stop at the exact place we
7438fd1498Szrj 	    // have to start again from the beginning with a series of
7538fd1498Szrj 	    // wcrtomb.
7638fd1498Szrj 	    for (; __from < __from_next; ++__from)
7738fd1498Szrj 	      __to_next += wcrtomb(__to_next, *__from, &__tmp_state);
7838fd1498Szrj 	    __state = __tmp_state;
7938fd1498Szrj 	    __ret = error;
8038fd1498Szrj 	  }
8138fd1498Szrj 	else if (__from_next && __from_next < __from_chunk_end)
8238fd1498Szrj 	  {
8338fd1498Szrj 	    __to_next += __conv;
8438fd1498Szrj 	    __ret = partial;
8538fd1498Szrj 	  }
8638fd1498Szrj 	else
8738fd1498Szrj 	  {
8838fd1498Szrj 	    __from_next = __from_chunk_end;
8938fd1498Szrj 	    __to_next += __conv;
9038fd1498Szrj 	  }
9138fd1498Szrj 
9238fd1498Szrj 	if (__from_next < __from_end && __ret == ok)
9338fd1498Szrj 	  {
9438fd1498Szrj 	    extern_type __buf[MB_LEN_MAX];
9538fd1498Szrj 	    __tmp_state = __state;
9638fd1498Szrj 	    const size_t __conv2 = wcrtomb(__buf, *__from_next, &__tmp_state);
9738fd1498Szrj 	    if (__conv2 > static_cast<size_t>(__to_end - __to_next))
9838fd1498Szrj 	      __ret = partial;
9938fd1498Szrj 	    else
10038fd1498Szrj 	      {
10138fd1498Szrj 		memcpy(__to_next, __buf, __conv2);
10238fd1498Szrj 		__state = __tmp_state;
10338fd1498Szrj 		__to_next += __conv2;
10438fd1498Szrj 		++__from_next;
10538fd1498Szrj 	      }
10638fd1498Szrj 	  }
10738fd1498Szrj       }
10838fd1498Szrj 
10938fd1498Szrj     uselocale((locale_t)__old);
11038fd1498Szrj 
11138fd1498Szrj     return __ret;
11238fd1498Szrj   }
11338fd1498Szrj 
11438fd1498Szrj   codecvt_base::result
11538fd1498Szrj   codecvt<wchar_t, char, mbstate_t>::
do_in(state_type & __state,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const11638fd1498Szrj   do_in(state_type& __state, const extern_type* __from,
11738fd1498Szrj 	const extern_type* __from_end, const extern_type*& __from_next,
11838fd1498Szrj 	intern_type* __to, intern_type* __to_end,
11938fd1498Szrj 	intern_type*& __to_next) const
12038fd1498Szrj   {
12138fd1498Szrj     result __ret = ok;
12238fd1498Szrj     state_type __tmp_state(__state);
12338fd1498Szrj 
12438fd1498Szrj     __c_locale __old = (__c_locale)uselocale((locale_t)_M_c_locale_codecvt);
12538fd1498Szrj 
12638fd1498Szrj     // mbsnrtowcs is *very* fast but stops if encounters NUL characters:
12738fd1498Szrj     // in case we store a L'\0' and then continue, in a loop.
12838fd1498Szrj     // NB: mbsnrtowcs is a GNU extension
12938fd1498Szrj     for (__from_next = __from, __to_next = __to;
13038fd1498Szrj 	 __from_next < __from_end && __to_next < __to_end
13138fd1498Szrj 	 && __ret == ok;)
13238fd1498Szrj       {
13338fd1498Szrj 	const extern_type* __from_chunk_end;
13438fd1498Szrj 	__from_chunk_end = static_cast<const extern_type*>(memchr(__from_next, '\0',
13538fd1498Szrj 								  __from_end
13638fd1498Szrj 								  - __from_next));
13738fd1498Szrj 	if (!__from_chunk_end)
13838fd1498Szrj 	  __from_chunk_end = __from_end;
13938fd1498Szrj 
14038fd1498Szrj 	__from = __from_next;
14138fd1498Szrj 	size_t __conv = mbsnrtowcs(__to_next, &__from_next,
14238fd1498Szrj 				   __from_chunk_end - __from_next,
14338fd1498Szrj 				   __to_end - __to_next, &__state);
14438fd1498Szrj 	if (__conv == static_cast<size_t>(-1))
14538fd1498Szrj 	  {
14638fd1498Szrj 	    // In case of error, in order to stop at the exact place we
14738fd1498Szrj 	    // have to start again from the beginning with a series of
14838fd1498Szrj 	    // mbrtowc.
14938fd1498Szrj 	    for (;; ++__to_next, __from += __conv)
15038fd1498Szrj 	      {
15138fd1498Szrj 		__conv = mbrtowc(__to_next, __from, __from_end - __from,
15238fd1498Szrj 				 &__tmp_state);
15338fd1498Szrj 		if (__conv == static_cast<size_t>(-1)
15438fd1498Szrj 		    || __conv == static_cast<size_t>(-2))
15538fd1498Szrj 		  break;
15638fd1498Szrj 	      }
15738fd1498Szrj 	    __from_next = __from;
15838fd1498Szrj 	    __state = __tmp_state;
15938fd1498Szrj 	    __ret = error;
16038fd1498Szrj 	  }
16138fd1498Szrj 	else if (__from_next && __from_next < __from_chunk_end)
16238fd1498Szrj 	  {
16338fd1498Szrj 	    // It is unclear what to return in this case (see DR 382).
16438fd1498Szrj 	    __to_next += __conv;
16538fd1498Szrj 	    __ret = partial;
16638fd1498Szrj 	  }
16738fd1498Szrj 	else
16838fd1498Szrj 	  {
16938fd1498Szrj 	    __from_next = __from_chunk_end;
17038fd1498Szrj 	    __to_next += __conv;
17138fd1498Szrj 	  }
17238fd1498Szrj 
17338fd1498Szrj 	if (__from_next < __from_end && __ret == ok)
17438fd1498Szrj 	  {
17538fd1498Szrj 	    if (__to_next < __to_end)
17638fd1498Szrj 	      {
17738fd1498Szrj 		// XXX Probably wrong for stateful encodings
17838fd1498Szrj 		__tmp_state = __state;
17938fd1498Szrj 		++__from_next;
18038fd1498Szrj 		*__to_next++ = L'\0';
18138fd1498Szrj 	      }
18238fd1498Szrj 	    else
18338fd1498Szrj 	      __ret = partial;
18438fd1498Szrj 	  }
18538fd1498Szrj       }
18638fd1498Szrj 
18738fd1498Szrj     uselocale((locale_t)__old);
18838fd1498Szrj 
18938fd1498Szrj     return __ret;
19038fd1498Szrj   }
19138fd1498Szrj 
19238fd1498Szrj   int
19338fd1498Szrj   codecvt<wchar_t, char, mbstate_t>::
do_encoding() const19438fd1498Szrj   do_encoding() const throw()
19538fd1498Szrj   {
19638fd1498Szrj     // XXX This implementation assumes that the encoding is
19738fd1498Szrj     // stateless and is either single-byte or variable-width.
19838fd1498Szrj     int __ret = 0;
19938fd1498Szrj     __c_locale __old = (__c_locale)uselocale((locale_t)_M_c_locale_codecvt);
20038fd1498Szrj     if (MB_CUR_MAX == 1)
20138fd1498Szrj       __ret = 1;
20238fd1498Szrj     uselocale((locale_t)__old);
20338fd1498Szrj     return __ret;
20438fd1498Szrj   }
20538fd1498Szrj 
20638fd1498Szrj   int
20738fd1498Szrj   codecvt<wchar_t, char, mbstate_t>::
do_max_length() const20838fd1498Szrj   do_max_length() const throw()
20938fd1498Szrj   {
21038fd1498Szrj     __c_locale __old = (__c_locale)uselocale((locale_t)_M_c_locale_codecvt);
21138fd1498Szrj     // XXX Probably wrong for stateful encodings.
21238fd1498Szrj     int __ret = MB_CUR_MAX;
21338fd1498Szrj     uselocale((locale_t)__old);
21438fd1498Szrj     return __ret;
21538fd1498Szrj   }
21638fd1498Szrj 
21738fd1498Szrj   int
21838fd1498Szrj   codecvt<wchar_t, char, mbstate_t>::
do_length(state_type & __state,const extern_type * __from,const extern_type * __end,size_t __max) const21938fd1498Szrj   do_length(state_type& __state, const extern_type* __from,
22038fd1498Szrj 	    const extern_type* __end, size_t __max) const
22138fd1498Szrj   {
22238fd1498Szrj     int __ret = 0;
22338fd1498Szrj     state_type __tmp_state(__state);
22438fd1498Szrj 
22538fd1498Szrj     __c_locale __old = (__c_locale)uselocale((locale_t)_M_c_locale_codecvt);
22638fd1498Szrj 
22738fd1498Szrj     // mbsnrtowcs is *very* fast but stops if encounters NUL characters:
22838fd1498Szrj     // in case we advance past it and then continue, in a loop.
229*bd23261bSJohn Marino     // NB: mbsnrtowcs is in POSIX.1-2008
230*bd23261bSJohn Marino 
231*bd23261bSJohn Marino     const size_t __to_len = 1024; // Size of alloca'd output buffer
23238fd1498Szrj 
23338fd1498Szrj     // A dummy internal buffer is needed in order for mbsnrtocws to consider
23438fd1498Szrj     // its fourth parameter (it wouldn't with NULL as first parameter).
23538fd1498Szrj     wchar_t* __to = static_cast<wchar_t*>(__builtin_alloca(sizeof(wchar_t)
236*bd23261bSJohn Marino 							   * __to_len));
23738fd1498Szrj     while (__from < __end && __max)
23838fd1498Szrj       {
23938fd1498Szrj 	const extern_type* __from_chunk_end;
24038fd1498Szrj 	__from_chunk_end = static_cast<const extern_type*>(memchr(__from, '\0',
24138fd1498Szrj 								  __end
24238fd1498Szrj 								  - __from));
24338fd1498Szrj 	if (!__from_chunk_end)
24438fd1498Szrj 	  __from_chunk_end = __end;
24538fd1498Szrj 
24638fd1498Szrj 	const extern_type* __tmp_from = __from;
24738fd1498Szrj 	size_t __conv = mbsnrtowcs(__to, &__from,
24838fd1498Szrj 				   __from_chunk_end - __from,
249*bd23261bSJohn Marino 				   __max > __to_len ? __to_len : __max,
250*bd23261bSJohn Marino 				   &__state);
25138fd1498Szrj 	if (__conv == static_cast<size_t>(-1))
25238fd1498Szrj 	  {
25338fd1498Szrj 	    // In case of error, in order to stop at the exact place we
25438fd1498Szrj 	    // have to start again from the beginning with a series of
25538fd1498Szrj 	    // mbrtowc.
25638fd1498Szrj 	    for (__from = __tmp_from;; __from += __conv)
25738fd1498Szrj 	      {
25838fd1498Szrj 		__conv = mbrtowc(0, __from, __end - __from,
25938fd1498Szrj 				 &__tmp_state);
26038fd1498Szrj 		if (__conv == static_cast<size_t>(-1)
26138fd1498Szrj 		    || __conv == static_cast<size_t>(-2))
26238fd1498Szrj 		  break;
26338fd1498Szrj 	      }
26438fd1498Szrj 	    __state = __tmp_state;
26538fd1498Szrj 	    __ret += __from - __tmp_from;
26638fd1498Szrj 	    break;
26738fd1498Szrj 	  }
26838fd1498Szrj 	if (!__from)
26938fd1498Szrj 	  __from = __from_chunk_end;
27038fd1498Szrj 
27138fd1498Szrj 	__ret += __from - __tmp_from;
27238fd1498Szrj 	__max -= __conv;
27338fd1498Szrj 
27438fd1498Szrj 	if (__from < __end && __max)
27538fd1498Szrj 	  {
27638fd1498Szrj 	    // XXX Probably wrong for stateful encodings
27738fd1498Szrj 	    __tmp_state = __state;
27838fd1498Szrj 	    ++__from;
27938fd1498Szrj 	    ++__ret;
28038fd1498Szrj 	    --__max;
28138fd1498Szrj 	  }
28238fd1498Szrj       }
28338fd1498Szrj 
28438fd1498Szrj     uselocale((locale_t)__old);
28538fd1498Szrj 
28638fd1498Szrj     return __ret;
28738fd1498Szrj   }
28838fd1498Szrj #endif
28938fd1498Szrj 
29038fd1498Szrj _GLIBCXX_END_NAMESPACE_VERSION
29138fd1498Szrj } // namespace
292