138fd1498Szrj // std::codecvt implementation details, DragonFly version -*- C++ -*- 238fd1498Szrj 338fd1498Szrj // Copyright (C) 2015-2018 Free Software Foundation, Inc. 438fd1498Szrj // 538fd1498Szrj // This file is part of the GNU ISO C++ Library. This library is free 638fd1498Szrj // software; you can redistribute it and/or modify it under the 738fd1498Szrj // terms of the GNU General Public License as published by the 838fd1498Szrj // Free Software Foundation; either version 3, or (at your option) 938fd1498Szrj // any later version. 1038fd1498Szrj 1138fd1498Szrj // This library is distributed in the hope that it will be useful, 1238fd1498Szrj // but WITHOUT ANY WARRANTY; without even the implied warranty of 1338fd1498Szrj // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1438fd1498Szrj // GNU General Public License for more details. 1538fd1498Szrj 1638fd1498Szrj // Under Section 7 of GPL version 3, you are granted additional 1738fd1498Szrj // permissions described in the GCC Runtime Library Exception, version 1838fd1498Szrj // 3.1, as published by the Free Software Foundation. 1938fd1498Szrj 2038fd1498Szrj // You should have received a copy of the GNU General Public License and 2138fd1498Szrj // a copy of the GCC Runtime Library Exception along with this program; 2238fd1498Szrj // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 2338fd1498Szrj // <http://www.gnu.org/licenses/>. 2438fd1498Szrj 2538fd1498Szrj // 2638fd1498Szrj // ISO C++ 14882: 22.2.1.5 - Template class codecvt 2738fd1498Szrj // 2838fd1498Szrj 2938fd1498Szrj // Written by Benjamin Kosnik <bkoz@redhat.com> 3038fd1498Szrj // Modified for DragonFly by John Marino <gnugcc@marino.st> 3138fd1498Szrj 3238fd1498Szrj #include <locale> 3338fd1498Szrj #include <cstring> 3438fd1498Szrj #include <cstdlib> // For MB_CUR_MAX 3538fd1498Szrj #include <climits> // For MB_LEN_MAX 3638fd1498Szrj 3738fd1498Szrj namespace std _GLIBCXX_VISIBILITY(default) 3838fd1498Szrj { 3938fd1498Szrj _GLIBCXX_BEGIN_NAMESPACE_VERSION 4038fd1498Szrj 4138fd1498Szrj // Specializations. 4238fd1498Szrj #ifdef _GLIBCXX_USE_WCHAR_T 4338fd1498Szrj codecvt_base::result 4438fd1498Szrj codecvt<wchar_t, char, mbstate_t>:: do_out(state_type & __state,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const4538fd1498Szrj do_out(state_type& __state, const intern_type* __from, 4638fd1498Szrj const intern_type* __from_end, const intern_type*& __from_next, 4738fd1498Szrj extern_type* __to, extern_type* __to_end, 4838fd1498Szrj extern_type*& __to_next) const 4938fd1498Szrj { 5038fd1498Szrj result __ret = ok; 5138fd1498Szrj state_type __tmp_state(__state); 5238fd1498Szrj 5338fd1498Szrj __c_locale __old = (__c_locale)uselocale((locale_t)_M_c_locale_codecvt); 5438fd1498Szrj 5538fd1498Szrj // wcsnrtombs is *very* fast but stops if encounters NUL characters: 5638fd1498Szrj // in case we fall back to wcrtomb and then continue, in a loop. 5738fd1498Szrj // NB: wcsnrtombs is a GNU extension 5838fd1498Szrj for (__from_next = __from, __to_next = __to; 5938fd1498Szrj __from_next < __from_end && __to_next < __to_end 6038fd1498Szrj && __ret == ok;) 6138fd1498Szrj { 6238fd1498Szrj const intern_type* __from_chunk_end = wmemchr(__from_next, L'\0', 6338fd1498Szrj __from_end - __from_next); 6438fd1498Szrj if (!__from_chunk_end) 6538fd1498Szrj __from_chunk_end = __from_end; 6638fd1498Szrj 6738fd1498Szrj __from = __from_next; 6838fd1498Szrj const size_t __conv = wcsnrtombs(__to_next, &__from_next, 6938fd1498Szrj __from_chunk_end - __from_next, 7038fd1498Szrj __to_end - __to_next, &__state); 7138fd1498Szrj if (__conv == static_cast<size_t>(-1)) 7238fd1498Szrj { 7338fd1498Szrj // In case of error, in order to stop at the exact place we 7438fd1498Szrj // have to start again from the beginning with a series of 7538fd1498Szrj // wcrtomb. 7638fd1498Szrj for (; __from < __from_next; ++__from) 7738fd1498Szrj __to_next += wcrtomb(__to_next, *__from, &__tmp_state); 7838fd1498Szrj __state = __tmp_state; 7938fd1498Szrj __ret = error; 8038fd1498Szrj } 8138fd1498Szrj else if (__from_next && __from_next < __from_chunk_end) 8238fd1498Szrj { 8338fd1498Szrj __to_next += __conv; 8438fd1498Szrj __ret = partial; 8538fd1498Szrj } 8638fd1498Szrj else 8738fd1498Szrj { 8838fd1498Szrj __from_next = __from_chunk_end; 8938fd1498Szrj __to_next += __conv; 9038fd1498Szrj } 9138fd1498Szrj 9238fd1498Szrj if (__from_next < __from_end && __ret == ok) 9338fd1498Szrj { 9438fd1498Szrj extern_type __buf[MB_LEN_MAX]; 9538fd1498Szrj __tmp_state = __state; 9638fd1498Szrj const size_t __conv2 = wcrtomb(__buf, *__from_next, &__tmp_state); 9738fd1498Szrj if (__conv2 > static_cast<size_t>(__to_end - __to_next)) 9838fd1498Szrj __ret = partial; 9938fd1498Szrj else 10038fd1498Szrj { 10138fd1498Szrj memcpy(__to_next, __buf, __conv2); 10238fd1498Szrj __state = __tmp_state; 10338fd1498Szrj __to_next += __conv2; 10438fd1498Szrj ++__from_next; 10538fd1498Szrj } 10638fd1498Szrj } 10738fd1498Szrj } 10838fd1498Szrj 10938fd1498Szrj uselocale((locale_t)__old); 11038fd1498Szrj 11138fd1498Szrj return __ret; 11238fd1498Szrj } 11338fd1498Szrj 11438fd1498Szrj codecvt_base::result 11538fd1498Szrj codecvt<wchar_t, char, mbstate_t>:: do_in(state_type & __state,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const11638fd1498Szrj do_in(state_type& __state, const extern_type* __from, 11738fd1498Szrj const extern_type* __from_end, const extern_type*& __from_next, 11838fd1498Szrj intern_type* __to, intern_type* __to_end, 11938fd1498Szrj intern_type*& __to_next) const 12038fd1498Szrj { 12138fd1498Szrj result __ret = ok; 12238fd1498Szrj state_type __tmp_state(__state); 12338fd1498Szrj 12438fd1498Szrj __c_locale __old = (__c_locale)uselocale((locale_t)_M_c_locale_codecvt); 12538fd1498Szrj 12638fd1498Szrj // mbsnrtowcs is *very* fast but stops if encounters NUL characters: 12738fd1498Szrj // in case we store a L'\0' and then continue, in a loop. 12838fd1498Szrj // NB: mbsnrtowcs is a GNU extension 12938fd1498Szrj for (__from_next = __from, __to_next = __to; 13038fd1498Szrj __from_next < __from_end && __to_next < __to_end 13138fd1498Szrj && __ret == ok;) 13238fd1498Szrj { 13338fd1498Szrj const extern_type* __from_chunk_end; 13438fd1498Szrj __from_chunk_end = static_cast<const extern_type*>(memchr(__from_next, '\0', 13538fd1498Szrj __from_end 13638fd1498Szrj - __from_next)); 13738fd1498Szrj if (!__from_chunk_end) 13838fd1498Szrj __from_chunk_end = __from_end; 13938fd1498Szrj 14038fd1498Szrj __from = __from_next; 14138fd1498Szrj size_t __conv = mbsnrtowcs(__to_next, &__from_next, 14238fd1498Szrj __from_chunk_end - __from_next, 14338fd1498Szrj __to_end - __to_next, &__state); 14438fd1498Szrj if (__conv == static_cast<size_t>(-1)) 14538fd1498Szrj { 14638fd1498Szrj // In case of error, in order to stop at the exact place we 14738fd1498Szrj // have to start again from the beginning with a series of 14838fd1498Szrj // mbrtowc. 14938fd1498Szrj for (;; ++__to_next, __from += __conv) 15038fd1498Szrj { 15138fd1498Szrj __conv = mbrtowc(__to_next, __from, __from_end - __from, 15238fd1498Szrj &__tmp_state); 15338fd1498Szrj if (__conv == static_cast<size_t>(-1) 15438fd1498Szrj || __conv == static_cast<size_t>(-2)) 15538fd1498Szrj break; 15638fd1498Szrj } 15738fd1498Szrj __from_next = __from; 15838fd1498Szrj __state = __tmp_state; 15938fd1498Szrj __ret = error; 16038fd1498Szrj } 16138fd1498Szrj else if (__from_next && __from_next < __from_chunk_end) 16238fd1498Szrj { 16338fd1498Szrj // It is unclear what to return in this case (see DR 382). 16438fd1498Szrj __to_next += __conv; 16538fd1498Szrj __ret = partial; 16638fd1498Szrj } 16738fd1498Szrj else 16838fd1498Szrj { 16938fd1498Szrj __from_next = __from_chunk_end; 17038fd1498Szrj __to_next += __conv; 17138fd1498Szrj } 17238fd1498Szrj 17338fd1498Szrj if (__from_next < __from_end && __ret == ok) 17438fd1498Szrj { 17538fd1498Szrj if (__to_next < __to_end) 17638fd1498Szrj { 17738fd1498Szrj // XXX Probably wrong for stateful encodings 17838fd1498Szrj __tmp_state = __state; 17938fd1498Szrj ++__from_next; 18038fd1498Szrj *__to_next++ = L'\0'; 18138fd1498Szrj } 18238fd1498Szrj else 18338fd1498Szrj __ret = partial; 18438fd1498Szrj } 18538fd1498Szrj } 18638fd1498Szrj 18738fd1498Szrj uselocale((locale_t)__old); 18838fd1498Szrj 18938fd1498Szrj return __ret; 19038fd1498Szrj } 19138fd1498Szrj 19238fd1498Szrj int 19338fd1498Szrj codecvt<wchar_t, char, mbstate_t>:: do_encoding() const19438fd1498Szrj do_encoding() const throw() 19538fd1498Szrj { 19638fd1498Szrj // XXX This implementation assumes that the encoding is 19738fd1498Szrj // stateless and is either single-byte or variable-width. 19838fd1498Szrj int __ret = 0; 19938fd1498Szrj __c_locale __old = (__c_locale)uselocale((locale_t)_M_c_locale_codecvt); 20038fd1498Szrj if (MB_CUR_MAX == 1) 20138fd1498Szrj __ret = 1; 20238fd1498Szrj uselocale((locale_t)__old); 20338fd1498Szrj return __ret; 20438fd1498Szrj } 20538fd1498Szrj 20638fd1498Szrj int 20738fd1498Szrj codecvt<wchar_t, char, mbstate_t>:: do_max_length() const20838fd1498Szrj do_max_length() const throw() 20938fd1498Szrj { 21038fd1498Szrj __c_locale __old = (__c_locale)uselocale((locale_t)_M_c_locale_codecvt); 21138fd1498Szrj // XXX Probably wrong for stateful encodings. 21238fd1498Szrj int __ret = MB_CUR_MAX; 21338fd1498Szrj uselocale((locale_t)__old); 21438fd1498Szrj return __ret; 21538fd1498Szrj } 21638fd1498Szrj 21738fd1498Szrj int 21838fd1498Szrj codecvt<wchar_t, char, mbstate_t>:: do_length(state_type & __state,const extern_type * __from,const extern_type * __end,size_t __max) const21938fd1498Szrj do_length(state_type& __state, const extern_type* __from, 22038fd1498Szrj const extern_type* __end, size_t __max) const 22138fd1498Szrj { 22238fd1498Szrj int __ret = 0; 22338fd1498Szrj state_type __tmp_state(__state); 22438fd1498Szrj 22538fd1498Szrj __c_locale __old = (__c_locale)uselocale((locale_t)_M_c_locale_codecvt); 22638fd1498Szrj 22738fd1498Szrj // mbsnrtowcs is *very* fast but stops if encounters NUL characters: 22838fd1498Szrj // in case we advance past it and then continue, in a loop. 229*bd23261bSJohn Marino // NB: mbsnrtowcs is in POSIX.1-2008 230*bd23261bSJohn Marino 231*bd23261bSJohn Marino const size_t __to_len = 1024; // Size of alloca'd output buffer 23238fd1498Szrj 23338fd1498Szrj // A dummy internal buffer is needed in order for mbsnrtocws to consider 23438fd1498Szrj // its fourth parameter (it wouldn't with NULL as first parameter). 23538fd1498Szrj wchar_t* __to = static_cast<wchar_t*>(__builtin_alloca(sizeof(wchar_t) 236*bd23261bSJohn Marino * __to_len)); 23738fd1498Szrj while (__from < __end && __max) 23838fd1498Szrj { 23938fd1498Szrj const extern_type* __from_chunk_end; 24038fd1498Szrj __from_chunk_end = static_cast<const extern_type*>(memchr(__from, '\0', 24138fd1498Szrj __end 24238fd1498Szrj - __from)); 24338fd1498Szrj if (!__from_chunk_end) 24438fd1498Szrj __from_chunk_end = __end; 24538fd1498Szrj 24638fd1498Szrj const extern_type* __tmp_from = __from; 24738fd1498Szrj size_t __conv = mbsnrtowcs(__to, &__from, 24838fd1498Szrj __from_chunk_end - __from, 249*bd23261bSJohn Marino __max > __to_len ? __to_len : __max, 250*bd23261bSJohn Marino &__state); 25138fd1498Szrj if (__conv == static_cast<size_t>(-1)) 25238fd1498Szrj { 25338fd1498Szrj // In case of error, in order to stop at the exact place we 25438fd1498Szrj // have to start again from the beginning with a series of 25538fd1498Szrj // mbrtowc. 25638fd1498Szrj for (__from = __tmp_from;; __from += __conv) 25738fd1498Szrj { 25838fd1498Szrj __conv = mbrtowc(0, __from, __end - __from, 25938fd1498Szrj &__tmp_state); 26038fd1498Szrj if (__conv == static_cast<size_t>(-1) 26138fd1498Szrj || __conv == static_cast<size_t>(-2)) 26238fd1498Szrj break; 26338fd1498Szrj } 26438fd1498Szrj __state = __tmp_state; 26538fd1498Szrj __ret += __from - __tmp_from; 26638fd1498Szrj break; 26738fd1498Szrj } 26838fd1498Szrj if (!__from) 26938fd1498Szrj __from = __from_chunk_end; 27038fd1498Szrj 27138fd1498Szrj __ret += __from - __tmp_from; 27238fd1498Szrj __max -= __conv; 27338fd1498Szrj 27438fd1498Szrj if (__from < __end && __max) 27538fd1498Szrj { 27638fd1498Szrj // XXX Probably wrong for stateful encodings 27738fd1498Szrj __tmp_state = __state; 27838fd1498Szrj ++__from; 27938fd1498Szrj ++__ret; 28038fd1498Szrj --__max; 28138fd1498Szrj } 28238fd1498Szrj } 28338fd1498Szrj 28438fd1498Szrj uselocale((locale_t)__old); 28538fd1498Szrj 28638fd1498Szrj return __ret; 28738fd1498Szrj } 28838fd1498Szrj #endif 28938fd1498Szrj 29038fd1498Szrj _GLIBCXX_END_NAMESPACE_VERSION 29138fd1498Szrj } // namespace 292