xref: /netbsd-src/external/gpl3/gcc.old/dist/libstdc++-v3/include/ext/codecvt_specializations.h (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 // Locale support (codecvt) -*- C++ -*-
2 
3 // Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2009
4 //  Free Software Foundation, Inc.
5 //
6 // This file is part of the GNU ISO C++ Library.  This library is free
7 // software; you can redistribute it and/or modify it under the
8 // terms of the GNU General Public License as published by the
9 // Free Software Foundation; either version 3, or (at your option)
10 // any later version.
11 
12 // This library is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 // GNU General Public License for more details.
16 
17 // Under Section 7 of GPL version 3, you are granted additional
18 // permissions described in the GCC Runtime Library Exception, version
19 // 3.1, as published by the Free Software Foundation.
20 
21 // You should have received a copy of the GNU General Public License and
22 // a copy of the GCC Runtime Library Exception along with this program;
23 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24 // <http://www.gnu.org/licenses/>.
25 
26 //
27 // ISO C++ 14882: 22.2.1.5 Template class codecvt
28 //
29 
30 // Written by Benjamin Kosnik <bkoz@redhat.com>
31 
32 /** @file ext/codecvt_specializations.h
33  *  This file is a GNU extension to the Standard C++ Library.
34  */
35 
36 #ifndef _EXT_CODECVT_SPECIALIZATIONS_H
37 #define _EXT_CODECVT_SPECIALIZATIONS_H 1
38 
39 #include <bits/c++config.h>
40 #include <locale>
41 #include <iconv.h>
42 
43 _GLIBCXX_BEGIN_NAMESPACE(__gnu_cxx)
44 
45   /// Extension to use iconv for dealing with character encodings.
46   // This includes conversions and comparisons between various character
47   // sets.  This object encapsulates data that may need to be shared between
48   // char_traits, codecvt and ctype.
49   class encoding_state
50   {
51   public:
52     // Types:
53     // NB: A conversion descriptor subsumes and enhances the
54     // functionality of a simple state type such as mbstate_t.
55     typedef iconv_t	descriptor_type;
56 
57   protected:
58     // Name of internal character set encoding.
59     std::string	       	_M_int_enc;
60 
61     // Name of external character set encoding.
62     std::string  	_M_ext_enc;
63 
64     // Conversion descriptor between external encoding to internal encoding.
65     descriptor_type	_M_in_desc;
66 
67     // Conversion descriptor between internal encoding to external encoding.
68     descriptor_type	_M_out_desc;
69 
70     // The byte-order marker for the external encoding, if necessary.
71     int			_M_ext_bom;
72 
73     // The byte-order marker for the internal encoding, if necessary.
74     int			_M_int_bom;
75 
76     // Number of external bytes needed to construct one complete
77     // character in the internal encoding.
78     // NB: -1 indicates variable, or stateful, encodings.
79     int 		_M_bytes;
80 
81   public:
82     explicit
83     encoding_state()
84     : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0), _M_bytes(0)
85     { }
86 
87     explicit
88     encoding_state(const char* __int, const char* __ext,
89 		   int __ibom = 0, int __ebom = 0, int __bytes = 1)
90     : _M_int_enc(__int), _M_ext_enc(__ext), _M_in_desc(0), _M_out_desc(0),
91       _M_ext_bom(__ebom), _M_int_bom(__ibom), _M_bytes(__bytes)
92     { init(); }
93 
94     // 21.1.2 traits typedefs
95     // p4
96     // typedef STATE_T state_type
97     // requires: state_type shall meet the requirements of
98     // CopyConstructible types (20.1.3)
99     // NB: This does not preserve the actual state of the conversion
100     // descriptor member, but it does duplicate the encoding
101     // information.
102     encoding_state(const encoding_state& __obj) : _M_in_desc(0), _M_out_desc(0)
103     { construct(__obj); }
104 
105     // Need assignment operator as well.
106     encoding_state&
107     operator=(const encoding_state& __obj)
108     {
109       construct(__obj);
110       return *this;
111     }
112 
113     ~encoding_state()
114     { destroy(); }
115 
116     bool
117     good() const throw()
118     {
119       const descriptor_type __err = (iconv_t)(-1);
120       bool __test = _M_in_desc && _M_in_desc != __err;
121       __test &=  _M_out_desc && _M_out_desc != __err;
122       return __test;
123     }
124 
125     int
126     character_ratio() const
127     { return _M_bytes; }
128 
129     const std::string
130     internal_encoding() const
131     { return _M_int_enc; }
132 
133     int
134     internal_bom() const
135     { return _M_int_bom; }
136 
137     const std::string
138     external_encoding() const
139     { return _M_ext_enc; }
140 
141     int
142     external_bom() const
143     { return _M_ext_bom; }
144 
145     const descriptor_type&
146     in_descriptor() const
147     { return _M_in_desc; }
148 
149     const descriptor_type&
150     out_descriptor() const
151     { return _M_out_desc; }
152 
153   protected:
154     void
155     init()
156     {
157       const descriptor_type __err = (iconv_t)(-1);
158       const bool __have_encodings = _M_int_enc.size() && _M_ext_enc.size();
159       if (!_M_in_desc && __have_encodings)
160 	{
161 	  _M_in_desc = iconv_open(_M_int_enc.c_str(), _M_ext_enc.c_str());
162 	  if (_M_in_desc == __err)
163 	    std::__throw_runtime_error(__N("encoding_state::_M_init "
164 				    "creating iconv input descriptor failed"));
165 	}
166       if (!_M_out_desc && __have_encodings)
167 	{
168 	  _M_out_desc = iconv_open(_M_ext_enc.c_str(), _M_int_enc.c_str());
169 	  if (_M_out_desc == __err)
170 	    std::__throw_runtime_error(__N("encoding_state::_M_init "
171 				  "creating iconv output descriptor failed"));
172 	}
173     }
174 
175     void
176     construct(const encoding_state& __obj)
177     {
178       destroy();
179       _M_int_enc = __obj._M_int_enc;
180       _M_ext_enc = __obj._M_ext_enc;
181       _M_ext_bom = __obj._M_ext_bom;
182       _M_int_bom = __obj._M_int_bom;
183       _M_bytes = __obj._M_bytes;
184       init();
185     }
186 
187     void
188     destroy() throw()
189     {
190       const descriptor_type __err = (iconv_t)(-1);
191       if (_M_in_desc && _M_in_desc != __err)
192 	{
193 	  iconv_close(_M_in_desc);
194 	  _M_in_desc = 0;
195 	}
196       if (_M_out_desc && _M_out_desc != __err)
197 	{
198 	  iconv_close(_M_out_desc);
199 	  _M_out_desc = 0;
200 	}
201     }
202   };
203 
204   /// encoding_char_traits
205   // Custom traits type with encoding_state for the state type, and the
206   // associated fpos<encoding_state> for the position type, all other
207   // bits equivalent to the required char_traits instantiations.
208   template<typename _CharT>
209     struct encoding_char_traits : public std::char_traits<_CharT>
210     {
211       typedef encoding_state				state_type;
212       typedef typename std::fpos<state_type>		pos_type;
213     };
214 
215 _GLIBCXX_END_NAMESPACE
216 
217 
218 _GLIBCXX_BEGIN_NAMESPACE(std)
219 
220   using __gnu_cxx::encoding_state;
221 
222   /// codecvt<InternT, _ExternT, encoding_state> specialization.
223   // This partial specialization takes advantage of iconv to provide
224   // code conversions between a large number of character encodings.
225   template<typename _InternT, typename _ExternT>
226     class codecvt<_InternT, _ExternT, encoding_state>
227     : public __codecvt_abstract_base<_InternT, _ExternT, encoding_state>
228     {
229     public:
230       // Types:
231       typedef codecvt_base::result			result;
232       typedef _InternT 					intern_type;
233       typedef _ExternT 					extern_type;
234       typedef __gnu_cxx::encoding_state 		state_type;
235       typedef state_type::descriptor_type 		descriptor_type;
236 
237       // Data Members:
238       static locale::id 		id;
239 
240       explicit
241       codecvt(size_t __refs = 0)
242       : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs)
243       { }
244 
245       explicit
246       codecvt(state_type& __enc, size_t __refs = 0)
247       : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs)
248       { }
249 
250      protected:
251       virtual
252       ~codecvt() { }
253 
254       virtual result
255       do_out(state_type& __state, const intern_type* __from,
256 	     const intern_type* __from_end, const intern_type*& __from_next,
257 	     extern_type* __to, extern_type* __to_end,
258 	     extern_type*& __to_next) const;
259 
260       virtual result
261       do_unshift(state_type& __state, extern_type* __to,
262 		 extern_type* __to_end, extern_type*& __to_next) const;
263 
264       virtual result
265       do_in(state_type& __state, const extern_type* __from,
266 	    const extern_type* __from_end, const extern_type*& __from_next,
267 	    intern_type* __to, intern_type* __to_end,
268 	    intern_type*& __to_next) const;
269 
270       virtual int
271       do_encoding() const throw();
272 
273       virtual bool
274       do_always_noconv() const throw();
275 
276       virtual int
277       do_length(state_type&, const extern_type* __from,
278 		const extern_type* __end, size_t __max) const;
279 
280       virtual int
281       do_max_length() const throw();
282     };
283 
284   template<typename _InternT, typename _ExternT>
285     locale::id
286     codecvt<_InternT, _ExternT, encoding_state>::id;
287 
288   // This adaptor works around the signature problems of the second
289   // argument to iconv():  SUSv2 and others use 'const char**', but glibc 2.2
290   // uses 'char**', which matches the POSIX 1003.1-2001 standard.
291   // Using this adaptor, g++ will do the work for us.
292   template<typename _Tp>
293     inline size_t
294     __iconv_adaptor(size_t(*__func)(iconv_t, _Tp, size_t*, char**, size_t*),
295                     iconv_t __cd, char** __inbuf, size_t* __inbytes,
296                     char** __outbuf, size_t* __outbytes)
297     { return __func(__cd, (_Tp)__inbuf, __inbytes, __outbuf, __outbytes); }
298 
299   template<typename _InternT, typename _ExternT>
300     codecvt_base::result
301     codecvt<_InternT, _ExternT, encoding_state>::
302     do_out(state_type& __state, const intern_type* __from,
303 	   const intern_type* __from_end, const intern_type*& __from_next,
304 	   extern_type* __to, extern_type* __to_end,
305 	   extern_type*& __to_next) const
306     {
307       result __ret = codecvt_base::error;
308       if (__state.good())
309 	{
310 	  const descriptor_type& __desc = __state.out_descriptor();
311 	  const size_t __fmultiple = sizeof(intern_type);
312 	  size_t __fbytes = __fmultiple * (__from_end - __from);
313 	  const size_t __tmultiple = sizeof(extern_type);
314 	  size_t __tbytes = __tmultiple * (__to_end - __to);
315 
316 	  // Argument list for iconv specifies a byte sequence. Thus,
317 	  // all to/from arrays must be brutally casted to char*.
318 	  char* __cto = reinterpret_cast<char*>(__to);
319 	  char* __cfrom;
320 	  size_t __conv;
321 
322 	  // Some encodings need a byte order marker as the first item
323 	  // in the byte stream, to designate endian-ness. The default
324 	  // value for the byte order marker is NULL, so if this is
325 	  // the case, it's not necessary and we can just go on our
326 	  // merry way.
327 	  int __int_bom = __state.internal_bom();
328 	  if (__int_bom)
329 	    {
330 	      size_t __size = __from_end - __from;
331 	      intern_type* __cfixed = static_cast<intern_type*>
332 		(__builtin_alloca(sizeof(intern_type) * (__size + 1)));
333 	      __cfixed[0] = static_cast<intern_type>(__int_bom);
334 	      char_traits<intern_type>::copy(__cfixed + 1, __from, __size);
335 	      __cfrom = reinterpret_cast<char*>(__cfixed);
336 	      __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
337                                         &__fbytes, &__cto, &__tbytes);
338 	    }
339 	  else
340 	    {
341 	      intern_type* __cfixed = const_cast<intern_type*>(__from);
342 	      __cfrom = reinterpret_cast<char*>(__cfixed);
343 	      __conv = __iconv_adaptor(iconv, __desc, &__cfrom, &__fbytes,
344 				       &__cto, &__tbytes);
345 	    }
346 
347 	  if (__conv != size_t(-1))
348 	    {
349 	      __from_next = reinterpret_cast<const intern_type*>(__cfrom);
350 	      __to_next = reinterpret_cast<extern_type*>(__cto);
351 	      __ret = codecvt_base::ok;
352 	    }
353 	  else
354 	    {
355 	      if (__fbytes < __fmultiple * (__from_end - __from))
356 		{
357 		  __from_next = reinterpret_cast<const intern_type*>(__cfrom);
358 		  __to_next = reinterpret_cast<extern_type*>(__cto);
359 		  __ret = codecvt_base::partial;
360 		}
361 	      else
362 		__ret = codecvt_base::error;
363 	    }
364 	}
365       return __ret;
366     }
367 
368   template<typename _InternT, typename _ExternT>
369     codecvt_base::result
370     codecvt<_InternT, _ExternT, encoding_state>::
371     do_unshift(state_type& __state, extern_type* __to,
372 	       extern_type* __to_end, extern_type*& __to_next) const
373     {
374       result __ret = codecvt_base::error;
375       if (__state.good())
376 	{
377 	  const descriptor_type& __desc = __state.in_descriptor();
378 	  const size_t __tmultiple = sizeof(intern_type);
379 	  size_t __tlen = __tmultiple * (__to_end - __to);
380 
381 	  // Argument list for iconv specifies a byte sequence. Thus,
382 	  // all to/from arrays must be brutally casted to char*.
383 	  char* __cto = reinterpret_cast<char*>(__to);
384 	  size_t __conv = __iconv_adaptor(iconv,__desc, NULL, NULL,
385                                           &__cto, &__tlen);
386 
387 	  if (__conv != size_t(-1))
388 	    {
389 	      __to_next = reinterpret_cast<extern_type*>(__cto);
390 	      if (__tlen == __tmultiple * (__to_end - __to))
391 		__ret = codecvt_base::noconv;
392 	      else if (__tlen == 0)
393 		__ret = codecvt_base::ok;
394 	      else
395 		__ret = codecvt_base::partial;
396 	    }
397 	  else
398 	    __ret = codecvt_base::error;
399 	}
400       return __ret;
401     }
402 
403   template<typename _InternT, typename _ExternT>
404     codecvt_base::result
405     codecvt<_InternT, _ExternT, encoding_state>::
406     do_in(state_type& __state, const extern_type* __from,
407 	  const extern_type* __from_end, const extern_type*& __from_next,
408 	  intern_type* __to, intern_type* __to_end,
409 	  intern_type*& __to_next) const
410     {
411       result __ret = codecvt_base::error;
412       if (__state.good())
413 	{
414 	  const descriptor_type& __desc = __state.in_descriptor();
415 	  const size_t __fmultiple = sizeof(extern_type);
416 	  size_t __flen = __fmultiple * (__from_end - __from);
417 	  const size_t __tmultiple = sizeof(intern_type);
418 	  size_t __tlen = __tmultiple * (__to_end - __to);
419 
420 	  // Argument list for iconv specifies a byte sequence. Thus,
421 	  // all to/from arrays must be brutally casted to char*.
422 	  char* __cto = reinterpret_cast<char*>(__to);
423 	  char* __cfrom;
424 	  size_t __conv;
425 
426 	  // Some encodings need a byte order marker as the first item
427 	  // in the byte stream, to designate endian-ness. The default
428 	  // value for the byte order marker is NULL, so if this is
429 	  // the case, it's not necessary and we can just go on our
430 	  // merry way.
431 	  int __ext_bom = __state.external_bom();
432 	  if (__ext_bom)
433 	    {
434 	      size_t __size = __from_end - __from;
435 	      extern_type* __cfixed =  static_cast<extern_type*>
436 		(__builtin_alloca(sizeof(extern_type) * (__size + 1)));
437 	      __cfixed[0] = static_cast<extern_type>(__ext_bom);
438 	      char_traits<extern_type>::copy(__cfixed + 1, __from, __size);
439 	      __cfrom = reinterpret_cast<char*>(__cfixed);
440 	      __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
441                                        &__flen, &__cto, &__tlen);
442 	    }
443 	  else
444 	    {
445 	      extern_type* __cfixed = const_cast<extern_type*>(__from);
446 	      __cfrom = reinterpret_cast<char*>(__cfixed);
447 	      __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
448                                        &__flen, &__cto, &__tlen);
449 	    }
450 
451 
452 	  if (__conv != size_t(-1))
453 	    {
454 	      __from_next = reinterpret_cast<const extern_type*>(__cfrom);
455 	      __to_next = reinterpret_cast<intern_type*>(__cto);
456 	      __ret = codecvt_base::ok;
457 	    }
458 	  else
459 	    {
460 	      if (__flen < static_cast<size_t>(__from_end - __from))
461 		{
462 		  __from_next = reinterpret_cast<const extern_type*>(__cfrom);
463 		  __to_next = reinterpret_cast<intern_type*>(__cto);
464 		  __ret = codecvt_base::partial;
465 		}
466 	      else
467 		__ret = codecvt_base::error;
468 	    }
469 	}
470       return __ret;
471     }
472 
473   template<typename _InternT, typename _ExternT>
474     int
475     codecvt<_InternT, _ExternT, encoding_state>::
476     do_encoding() const throw()
477     {
478       int __ret = 0;
479       if (sizeof(_ExternT) <= sizeof(_InternT))
480 	__ret = sizeof(_InternT) / sizeof(_ExternT);
481       return __ret;
482     }
483 
484   template<typename _InternT, typename _ExternT>
485     bool
486     codecvt<_InternT, _ExternT, encoding_state>::
487     do_always_noconv() const throw()
488     { return false; }
489 
490   template<typename _InternT, typename _ExternT>
491     int
492     codecvt<_InternT, _ExternT, encoding_state>::
493     do_length(state_type&, const extern_type* __from,
494 	      const extern_type* __end, size_t __max) const
495     { return std::min(__max, static_cast<size_t>(__end - __from)); }
496 
497   // _GLIBCXX_RESOLVE_LIB_DEFECTS
498   // 74.  Garbled text for codecvt::do_max_length
499   template<typename _InternT, typename _ExternT>
500     int
501     codecvt<_InternT, _ExternT, encoding_state>::
502     do_max_length() const throw()
503     { return 1; }
504 
505 _GLIBCXX_END_NAMESPACE
506 
507 #endif
508