xref: /netbsd-src/external/gpl3/gcc/dist/libstdc++-v3/config/locale/gnu/codecvt_members.cc (revision 4391d5e9d4f291db41e3b3ba26a01b5e51364aae)
1 // std::codecvt implementation details, GNU version -*- C++ -*-
2 
3 // Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2009
4 // Free Software Foundation, Inc.
5 //
6 // This file is part of the GNU ISO C++ Library.  This library is free
7 // software; you can redistribute it and/or modify it under the
8 // terms of the GNU General Public License as published by the
9 // Free Software Foundation; either version 3, or (at your option)
10 // any later version.
11 
12 // This library is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 // GNU General Public License for more details.
16 
17 // Under Section 7 of GPL version 3, you are granted additional
18 // permissions described in the GCC Runtime Library Exception, version
19 // 3.1, as published by the Free Software Foundation.
20 
21 // You should have received a copy of the GNU General Public License and
22 // a copy of the GCC Runtime Library Exception along with this program;
23 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24 // <http://www.gnu.org/licenses/>.
25 
26 //
27 // ISO C++ 14882: 22.2.1.5 - Template class codecvt
28 //
29 
30 // Written by Benjamin Kosnik <bkoz@redhat.com>
31 
32 #include <locale>
33 #include <cstdlib>  // For MB_CUR_MAX
34 #include <climits>  // For MB_LEN_MAX
35 #include <bits/c++locale_internal.h>
36 
37 _GLIBCXX_BEGIN_NAMESPACE(std)
38 
39   // Specializations.
40 #ifdef _GLIBCXX_USE_WCHAR_T
41   codecvt_base::result
42   codecvt<wchar_t, char, mbstate_t>::
43   do_out(state_type& __state, const intern_type* __from,
44 	 const intern_type* __from_end, const intern_type*& __from_next,
45 	 extern_type* __to, extern_type* __to_end,
46 	 extern_type*& __to_next) const
47   {
48     result __ret = ok;
49     state_type __tmp_state(__state);
50 
51 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
52     __c_locale __old = __uselocale(_M_c_locale_codecvt);
53 #endif
54 
55     // wcsnrtombs is *very* fast but stops if encounters NUL characters:
56     // in case we fall back to wcrtomb and then continue, in a loop.
57     // NB: wcsnrtombs is a GNU extension
58     for (__from_next = __from, __to_next = __to;
59 	 __from_next < __from_end && __to_next < __to_end
60 	 && __ret == ok;)
61       {
62 	const intern_type* __from_chunk_end = wmemchr(__from_next, L'\0',
63 						      __from_end - __from_next);
64 	if (!__from_chunk_end)
65 	  __from_chunk_end = __from_end;
66 
67 	__from = __from_next;
68 	const size_t __conv = wcsnrtombs(__to_next, &__from_next,
69 					 __from_chunk_end - __from_next,
70 					 __to_end - __to_next, &__state);
71 	if (__conv == static_cast<size_t>(-1))
72 	  {
73 	    // In case of error, in order to stop at the exact place we
74 	    // have to start again from the beginning with a series of
75 	    // wcrtomb.
76 	    for (; __from < __from_next; ++__from)
77 	      __to_next += wcrtomb(__to_next, *__from, &__tmp_state);
78 	    __state = __tmp_state;
79 	    __ret = error;
80 	  }
81 	else if (__from_next && __from_next < __from_chunk_end)
82 	  {
83 	    __to_next += __conv;
84 	    __ret = partial;
85 	  }
86 	else
87 	  {
88 	    __from_next = __from_chunk_end;
89 	    __to_next += __conv;
90 	  }
91 
92 	if (__from_next < __from_end && __ret == ok)
93 	  {
94 	    extern_type __buf[MB_LEN_MAX];
95 	    __tmp_state = __state;
96 	    const size_t __conv2 = wcrtomb(__buf, *__from_next, &__tmp_state);
97 	    if (__conv2 > static_cast<size_t>(__to_end - __to_next))
98 	      __ret = partial;
99 	    else
100 	      {
101 		memcpy(__to_next, __buf, __conv2);
102 		__state = __tmp_state;
103 		__to_next += __conv2;
104 		++__from_next;
105 	      }
106 	  }
107       }
108 
109 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
110     __uselocale(__old);
111 #endif
112 
113     return __ret;
114   }
115 
116   codecvt_base::result
117   codecvt<wchar_t, char, mbstate_t>::
118   do_in(state_type& __state, const extern_type* __from,
119 	const extern_type* __from_end, const extern_type*& __from_next,
120 	intern_type* __to, intern_type* __to_end,
121 	intern_type*& __to_next) const
122   {
123     result __ret = ok;
124     state_type __tmp_state(__state);
125 
126 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
127     __c_locale __old = __uselocale(_M_c_locale_codecvt);
128 #endif
129 
130     // mbsnrtowcs is *very* fast but stops if encounters NUL characters:
131     // in case we store a L'\0' and then continue, in a loop.
132     // NB: mbsnrtowcs is a GNU extension
133     for (__from_next = __from, __to_next = __to;
134 	 __from_next < __from_end && __to_next < __to_end
135 	 && __ret == ok;)
136       {
137 	const extern_type* __from_chunk_end;
138 	__from_chunk_end = static_cast<const extern_type*>(memchr(__from_next, '\0',
139 								  __from_end
140 								  - __from_next));
141 	if (!__from_chunk_end)
142 	  __from_chunk_end = __from_end;
143 
144 	__from = __from_next;
145 	size_t __conv = mbsnrtowcs(__to_next, &__from_next,
146 				   __from_chunk_end - __from_next,
147 				   __to_end - __to_next, &__state);
148 	if (__conv == static_cast<size_t>(-1))
149 	  {
150 	    // In case of error, in order to stop at the exact place we
151 	    // have to start again from the beginning with a series of
152 	    // mbrtowc.
153 	    for (;; ++__to_next, __from += __conv)
154 	      {
155 		__conv = mbrtowc(__to_next, __from, __from_end - __from,
156 				 &__tmp_state);
157 		if (__conv == static_cast<size_t>(-1)
158 		    || __conv == static_cast<size_t>(-2))
159 		  break;
160 	      }
161 	    __from_next = __from;
162 	    __state = __tmp_state;
163 	    __ret = error;
164 	  }
165 	else if (__from_next && __from_next < __from_chunk_end)
166 	  {
167 	    // It is unclear what to return in this case (see DR 382).
168 	    __to_next += __conv;
169 	    __ret = partial;
170 	  }
171 	else
172 	  {
173 	    __from_next = __from_chunk_end;
174 	    __to_next += __conv;
175 	  }
176 
177 	if (__from_next < __from_end && __ret == ok)
178 	  {
179 	    if (__to_next < __to_end)
180 	      {
181 		// XXX Probably wrong for stateful encodings
182 		__tmp_state = __state;
183 		++__from_next;
184 		*__to_next++ = L'\0';
185 	      }
186 	    else
187 	      __ret = partial;
188 	  }
189       }
190 
191 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
192     __uselocale(__old);
193 #endif
194 
195     return __ret;
196   }
197 
198   int
199   codecvt<wchar_t, char, mbstate_t>::
200   do_encoding() const throw()
201   {
202     // XXX This implementation assumes that the encoding is
203     // stateless and is either single-byte or variable-width.
204     int __ret = 0;
205 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
206     __c_locale __old = __uselocale(_M_c_locale_codecvt);
207 #endif
208     if (MB_CUR_MAX == 1)
209       __ret = 1;
210 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
211     __uselocale(__old);
212 #endif
213     return __ret;
214   }
215 
216   int
217   codecvt<wchar_t, char, mbstate_t>::
218   do_max_length() const throw()
219   {
220 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
221     __c_locale __old = __uselocale(_M_c_locale_codecvt);
222 #endif
223     // XXX Probably wrong for stateful encodings.
224     int __ret = MB_CUR_MAX;
225 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
226     __uselocale(__old);
227 #endif
228     return __ret;
229   }
230 
231   int
232   codecvt<wchar_t, char, mbstate_t>::
233   do_length(state_type& __state, const extern_type* __from,
234 	    const extern_type* __end, size_t __max) const
235   {
236     int __ret = 0;
237     state_type __tmp_state(__state);
238 
239 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
240     __c_locale __old = __uselocale(_M_c_locale_codecvt);
241 #endif
242 
243     // mbsnrtowcs is *very* fast but stops if encounters NUL characters:
244     // in case we advance past it and then continue, in a loop.
245     // NB: mbsnrtowcs is a GNU extension
246 
247     // A dummy internal buffer is needed in order for mbsnrtocws to consider
248     // its fourth parameter (it wouldn't with NULL as first parameter).
249     wchar_t* __to = static_cast<wchar_t*>(__builtin_alloca(sizeof(wchar_t)
250 							   * __max));
251     while (__from < __end && __max)
252       {
253 	const extern_type* __from_chunk_end;
254 	__from_chunk_end = static_cast<const extern_type*>(memchr(__from, '\0',
255 								  __end
256 								  - __from));
257 	if (!__from_chunk_end)
258 	  __from_chunk_end = __end;
259 
260 	const extern_type* __tmp_from = __from;
261 	size_t __conv = mbsnrtowcs(__to, &__from,
262 				   __from_chunk_end - __from,
263 				   __max, &__state);
264 	if (__conv == static_cast<size_t>(-1))
265 	  {
266 	    // In case of error, in order to stop at the exact place we
267 	    // have to start again from the beginning with a series of
268 	    // mbrtowc.
269 	    for (__from = __tmp_from;; __from += __conv)
270 	      {
271 		__conv = mbrtowc(NULL, __from, __end - __from,
272 				 &__tmp_state);
273 		if (__conv == static_cast<size_t>(-1)
274 		    || __conv == static_cast<size_t>(-2))
275 		  break;
276 	      }
277 	    __state = __tmp_state;
278 	    __ret += __from - __tmp_from;
279 	    break;
280 	  }
281 	if (!__from)
282 	  __from = __from_chunk_end;
283 
284 	__ret += __from - __tmp_from;
285 	__max -= __conv;
286 
287 	if (__from < __end && __max)
288 	  {
289 	    // XXX Probably wrong for stateful encodings
290 	    __tmp_state = __state;
291 	    ++__from;
292 	    ++__ret;
293 	    --__max;
294 	  }
295       }
296 
297 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
298     __uselocale(__old);
299 #endif
300 
301     return __ret;
302   }
303 #endif
304 
305 _GLIBCXX_END_NAMESPACE
306