1*e4b17023SJohn Marino // std::codecvt implementation details, generic version -*- C++ -*- 2*e4b17023SJohn Marino 3*e4b17023SJohn Marino // Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2009, 2010 4*e4b17023SJohn Marino // Free Software Foundation, Inc. 5*e4b17023SJohn Marino // 6*e4b17023SJohn Marino // This file is part of the GNU ISO C++ Library. This library is free 7*e4b17023SJohn Marino // software; you can redistribute it and/or modify it under the 8*e4b17023SJohn Marino // terms of the GNU General Public License as published by the 9*e4b17023SJohn Marino // Free Software Foundation; either version 3, or (at your option) 10*e4b17023SJohn Marino // any later version. 11*e4b17023SJohn Marino 12*e4b17023SJohn Marino // This library is distributed in the hope that it will be useful, 13*e4b17023SJohn Marino // but WITHOUT ANY WARRANTY; without even the implied warranty of 14*e4b17023SJohn Marino // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15*e4b17023SJohn Marino // GNU General Public License for more details. 16*e4b17023SJohn Marino 17*e4b17023SJohn Marino // Under Section 7 of GPL version 3, you are granted additional 18*e4b17023SJohn Marino // permissions described in the GCC Runtime Library Exception, version 19*e4b17023SJohn Marino // 3.1, as published by the Free Software Foundation. 20*e4b17023SJohn Marino 21*e4b17023SJohn Marino // You should have received a copy of the GNU General Public License and 22*e4b17023SJohn Marino // a copy of the GCC Runtime Library Exception along with this program; 23*e4b17023SJohn Marino // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24*e4b17023SJohn Marino // <http://www.gnu.org/licenses/>. 25*e4b17023SJohn Marino 26*e4b17023SJohn Marino // 27*e4b17023SJohn Marino // ISO C++ 14882: 22.2.1.5 - Template class codecvt 28*e4b17023SJohn Marino // 29*e4b17023SJohn Marino 30*e4b17023SJohn Marino // Written by Benjamin Kosnik <bkoz@redhat.com> 31*e4b17023SJohn Marino 32*e4b17023SJohn Marino #include <locale> 33*e4b17023SJohn Marino #include <cstdlib> // For MB_CUR_MAX 34*e4b17023SJohn Marino #include <climits> // For MB_LEN_MAX 35*e4b17023SJohn Marino #include <cstring> 36*e4b17023SJohn Marino 37*e4b17023SJohn Marino namespace std _GLIBCXX_VISIBILITY(default) 38*e4b17023SJohn Marino { 39*e4b17023SJohn Marino _GLIBCXX_BEGIN_NAMESPACE_VERSION 40*e4b17023SJohn Marino 41*e4b17023SJohn Marino // Specializations. 42*e4b17023SJohn Marino #ifdef _GLIBCXX_USE_WCHAR_T 43*e4b17023SJohn Marino codecvt_base::result 44*e4b17023SJohn Marino codecvt<wchar_t, char, mbstate_t>:: do_out(state_type & __state,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const45*e4b17023SJohn Marino do_out(state_type& __state, const intern_type* __from, 46*e4b17023SJohn Marino const intern_type* __from_end, const intern_type*& __from_next, 47*e4b17023SJohn Marino extern_type* __to, extern_type* __to_end, 48*e4b17023SJohn Marino extern_type*& __to_next) const 49*e4b17023SJohn Marino { 50*e4b17023SJohn Marino result __ret = ok; 51*e4b17023SJohn Marino // The conversion must be done using a temporary destination buffer 52*e4b17023SJohn Marino // since it is not possible to pass the size of the buffer to wcrtomb 53*e4b17023SJohn Marino state_type __tmp_state(__state); 54*e4b17023SJohn Marino 55*e4b17023SJohn Marino // The conversion must be done by calling wcrtomb in a loop rather 56*e4b17023SJohn Marino // than using wcsrtombs because wcsrtombs assumes that the input is 57*e4b17023SJohn Marino // zero-terminated. 58*e4b17023SJohn Marino 59*e4b17023SJohn Marino // Either we can upper bound the total number of external characters to 60*e4b17023SJohn Marino // something smaller than __to_end - __to or the conversion must be done 61*e4b17023SJohn Marino // using a temporary destination buffer since it is not possible to 62*e4b17023SJohn Marino // pass the size of the buffer to wcrtomb 63*e4b17023SJohn Marino if (MB_CUR_MAX * (__from_end - __from) - (__to_end - __to) <= 0) 64*e4b17023SJohn Marino while (__from < __from_end) 65*e4b17023SJohn Marino { 66*e4b17023SJohn Marino const size_t __conv = wcrtomb(__to, *__from, &__tmp_state); 67*e4b17023SJohn Marino if (__conv == static_cast<size_t>(-1)) 68*e4b17023SJohn Marino { 69*e4b17023SJohn Marino __ret = error; 70*e4b17023SJohn Marino break; 71*e4b17023SJohn Marino } 72*e4b17023SJohn Marino __state = __tmp_state; 73*e4b17023SJohn Marino __to += __conv; 74*e4b17023SJohn Marino __from++; 75*e4b17023SJohn Marino } 76*e4b17023SJohn Marino else 77*e4b17023SJohn Marino { 78*e4b17023SJohn Marino extern_type __buf[MB_LEN_MAX]; 79*e4b17023SJohn Marino while (__from < __from_end && __to < __to_end) 80*e4b17023SJohn Marino { 81*e4b17023SJohn Marino const size_t __conv = wcrtomb(__buf, *__from, &__tmp_state); 82*e4b17023SJohn Marino if (__conv == static_cast<size_t>(-1)) 83*e4b17023SJohn Marino { 84*e4b17023SJohn Marino __ret = error; 85*e4b17023SJohn Marino break; 86*e4b17023SJohn Marino } 87*e4b17023SJohn Marino else if (__conv > static_cast<size_t>(__to_end - __to)) 88*e4b17023SJohn Marino { 89*e4b17023SJohn Marino __ret = partial; 90*e4b17023SJohn Marino break; 91*e4b17023SJohn Marino } 92*e4b17023SJohn Marino 93*e4b17023SJohn Marino memcpy(__to, __buf, __conv); 94*e4b17023SJohn Marino __state = __tmp_state; 95*e4b17023SJohn Marino __to += __conv; 96*e4b17023SJohn Marino __from++; 97*e4b17023SJohn Marino } 98*e4b17023SJohn Marino } 99*e4b17023SJohn Marino 100*e4b17023SJohn Marino if (__ret == ok && __from < __from_end) 101*e4b17023SJohn Marino __ret = partial; 102*e4b17023SJohn Marino 103*e4b17023SJohn Marino __from_next = __from; 104*e4b17023SJohn Marino __to_next = __to; 105*e4b17023SJohn Marino return __ret; 106*e4b17023SJohn Marino } 107*e4b17023SJohn Marino 108*e4b17023SJohn Marino codecvt_base::result 109*e4b17023SJohn Marino codecvt<wchar_t, char, mbstate_t>:: do_in(state_type & __state,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const110*e4b17023SJohn Marino do_in(state_type& __state, const extern_type* __from, 111*e4b17023SJohn Marino const extern_type* __from_end, const extern_type*& __from_next, 112*e4b17023SJohn Marino intern_type* __to, intern_type* __to_end, 113*e4b17023SJohn Marino intern_type*& __to_next) const 114*e4b17023SJohn Marino { 115*e4b17023SJohn Marino result __ret = ok; 116*e4b17023SJohn Marino // This temporary state object is neccessary so __state won't be modified 117*e4b17023SJohn Marino // if [__from, __from_end) is a partial multibyte character. 118*e4b17023SJohn Marino state_type __tmp_state(__state); 119*e4b17023SJohn Marino 120*e4b17023SJohn Marino // Conversion must be done by calling mbrtowc in a loop rather than 121*e4b17023SJohn Marino // by calling mbsrtowcs because mbsrtowcs assumes that the input 122*e4b17023SJohn Marino // sequence is zero-terminated. 123*e4b17023SJohn Marino while (__from < __from_end && __to < __to_end) 124*e4b17023SJohn Marino { 125*e4b17023SJohn Marino size_t __conv = mbrtowc(__to, __from, __from_end - __from, 126*e4b17023SJohn Marino &__tmp_state); 127*e4b17023SJohn Marino if (__conv == static_cast<size_t>(-1)) 128*e4b17023SJohn Marino { 129*e4b17023SJohn Marino __ret = error; 130*e4b17023SJohn Marino break; 131*e4b17023SJohn Marino } 132*e4b17023SJohn Marino else if (__conv == static_cast<size_t>(-2)) 133*e4b17023SJohn Marino { 134*e4b17023SJohn Marino // It is unclear what to return in this case (see DR 382). 135*e4b17023SJohn Marino __ret = partial; 136*e4b17023SJohn Marino break; 137*e4b17023SJohn Marino } 138*e4b17023SJohn Marino else if (__conv == 0) 139*e4b17023SJohn Marino { 140*e4b17023SJohn Marino // XXX Probably wrong for stateful encodings 141*e4b17023SJohn Marino __conv = 1; 142*e4b17023SJohn Marino *__to = L'\0'; 143*e4b17023SJohn Marino } 144*e4b17023SJohn Marino 145*e4b17023SJohn Marino __state = __tmp_state; 146*e4b17023SJohn Marino __to++; 147*e4b17023SJohn Marino __from += __conv; 148*e4b17023SJohn Marino } 149*e4b17023SJohn Marino 150*e4b17023SJohn Marino // It is not clear that __from < __from_end implies __ret != ok 151*e4b17023SJohn Marino // (see DR 382). 152*e4b17023SJohn Marino if (__ret == ok && __from < __from_end) 153*e4b17023SJohn Marino __ret = partial; 154*e4b17023SJohn Marino 155*e4b17023SJohn Marino __from_next = __from; 156*e4b17023SJohn Marino __to_next = __to; 157*e4b17023SJohn Marino return __ret; 158*e4b17023SJohn Marino } 159*e4b17023SJohn Marino 160*e4b17023SJohn Marino int 161*e4b17023SJohn Marino codecvt<wchar_t, char, mbstate_t>:: do_encoding() const162*e4b17023SJohn Marino do_encoding() const throw() 163*e4b17023SJohn Marino { 164*e4b17023SJohn Marino // XXX This implementation assumes that the encoding is 165*e4b17023SJohn Marino // stateless and is either single-byte or variable-width. 166*e4b17023SJohn Marino int __ret = 0; 167*e4b17023SJohn Marino if (MB_CUR_MAX == 1) 168*e4b17023SJohn Marino __ret = 1; 169*e4b17023SJohn Marino return __ret; 170*e4b17023SJohn Marino } 171*e4b17023SJohn Marino 172*e4b17023SJohn Marino int 173*e4b17023SJohn Marino codecvt<wchar_t, char, mbstate_t>:: do_max_length() const174*e4b17023SJohn Marino do_max_length() const throw() 175*e4b17023SJohn Marino { 176*e4b17023SJohn Marino // XXX Probably wrong for stateful encodings. 177*e4b17023SJohn Marino int __ret = MB_CUR_MAX; 178*e4b17023SJohn Marino return __ret; 179*e4b17023SJohn Marino } 180*e4b17023SJohn Marino 181*e4b17023SJohn Marino int 182*e4b17023SJohn Marino codecvt<wchar_t, char, mbstate_t>:: do_length(state_type & __state,const extern_type * __from,const extern_type * __end,size_t __max) const183*e4b17023SJohn Marino do_length(state_type& __state, const extern_type* __from, 184*e4b17023SJohn Marino const extern_type* __end, size_t __max) const 185*e4b17023SJohn Marino { 186*e4b17023SJohn Marino int __ret = 0; 187*e4b17023SJohn Marino state_type __tmp_state(__state); 188*e4b17023SJohn Marino 189*e4b17023SJohn Marino while (__from < __end && __max) 190*e4b17023SJohn Marino { 191*e4b17023SJohn Marino size_t __conv = mbrtowc(0, __from, __end - __from, &__tmp_state); 192*e4b17023SJohn Marino if (__conv == static_cast<size_t>(-1)) 193*e4b17023SJohn Marino { 194*e4b17023SJohn Marino // Invalid source character 195*e4b17023SJohn Marino break; 196*e4b17023SJohn Marino } 197*e4b17023SJohn Marino else if (__conv == static_cast<size_t>(-2)) 198*e4b17023SJohn Marino { 199*e4b17023SJohn Marino // Remainder of input does not form a complete destination 200*e4b17023SJohn Marino // character. 201*e4b17023SJohn Marino break; 202*e4b17023SJohn Marino } 203*e4b17023SJohn Marino else if (__conv == 0) 204*e4b17023SJohn Marino { 205*e4b17023SJohn Marino // XXX Probably wrong for stateful encodings 206*e4b17023SJohn Marino __conv = 1; 207*e4b17023SJohn Marino } 208*e4b17023SJohn Marino 209*e4b17023SJohn Marino __state = __tmp_state; 210*e4b17023SJohn Marino __from += __conv; 211*e4b17023SJohn Marino __ret += __conv; 212*e4b17023SJohn Marino __max--; 213*e4b17023SJohn Marino } 214*e4b17023SJohn Marino 215*e4b17023SJohn Marino return __ret; 216*e4b17023SJohn Marino } 217*e4b17023SJohn Marino #endif 218*e4b17023SJohn Marino 219*e4b17023SJohn Marino _GLIBCXX_END_NAMESPACE_VERSION 220*e4b17023SJohn Marino } // namespace 221