xref: /dflybsd-src/contrib/diffutils/lib/striconv.c (revision a816ce8e217e98e622474a79fb633b37dc22092e)
1 /* -*- buffer-read-only: t -*- vi: set ro: */
2 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
3 /* Charset conversion.
4    Copyright (C) 2001-2007, 2010-2011 Free Software Foundation, Inc.
5    Written by Bruno Haible and Simon Josefsson.
6 
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, write to the Free Software Foundation,
19    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
20 
21 #include <config.h>
22 
23 /* Specification.  */
24 #include "striconv.h"
25 
26 #include <errno.h>
27 #include <stdlib.h>
28 #include <string.h>
29 
30 #if HAVE_ICONV
31 # include <iconv.h>
32 /* Get MB_LEN_MAX, CHAR_BIT.  */
33 # include <limits.h>
34 #endif
35 
36 #include "c-strcase.h"
37 
38 #ifndef SIZE_MAX
39 # define SIZE_MAX ((size_t) -1)
40 #endif
41 
42 
43 #if HAVE_ICONV
44 
45 int
46 mem_cd_iconv (const char *src, size_t srclen, iconv_t cd,
47               char **resultp, size_t *lengthp)
48 {
49 # define tmpbufsize 4096
50   size_t length;
51   char *result;
52 
53   /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
54 # if defined _LIBICONV_VERSION \
55      || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
56           || defined __sun)
57   /* Set to the initial state.  */
58   iconv (cd, NULL, NULL, NULL, NULL);
59 # endif
60 
61   /* Determine the length we need.  */
62   {
63     size_t count = 0;
64     /* The alignment is needed when converting e.g. to glibc's WCHAR_T or
65        libiconv's UCS-4-INTERNAL encoding.  */
66     union { unsigned int align; char buf[tmpbufsize]; } tmp;
67 # define tmpbuf tmp.buf
68     const char *inptr = src;
69     size_t insize = srclen;
70 
71     while (insize > 0)
72       {
73         char *outptr = tmpbuf;
74         size_t outsize = tmpbufsize;
75         size_t res = iconv (cd,
76                             (ICONV_CONST char **) &inptr, &insize,
77                             &outptr, &outsize);
78 
79         if (res == (size_t)(-1))
80           {
81             if (errno == E2BIG)
82               ;
83             else if (errno == EINVAL)
84               break;
85             else
86               return -1;
87           }
88 # if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
89         /* Irix iconv() inserts a NUL byte if it cannot convert.
90            NetBSD iconv() inserts a question mark if it cannot convert.
91            Only GNU libiconv and GNU libc are known to prefer to fail rather
92            than doing a lossy conversion.  */
93         else if (res > 0)
94           {
95             errno = EILSEQ;
96             return -1;
97           }
98 # endif
99         count += outptr - tmpbuf;
100       }
101     /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
102 # if defined _LIBICONV_VERSION \
103      || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
104           || defined __sun)
105     {
106       char *outptr = tmpbuf;
107       size_t outsize = tmpbufsize;
108       size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
109 
110       if (res == (size_t)(-1))
111         return -1;
112       count += outptr - tmpbuf;
113     }
114 # endif
115     length = count;
116 # undef tmpbuf
117   }
118 
119   if (length == 0)
120     {
121       *lengthp = 0;
122       return 0;
123     }
124   if (*resultp != NULL && *lengthp >= length)
125     result = *resultp;
126   else
127     {
128       result = (char *) malloc (length);
129       if (result == NULL)
130         {
131           errno = ENOMEM;
132           return -1;
133         }
134     }
135 
136   /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
137 # if defined _LIBICONV_VERSION \
138      || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
139           || defined __sun)
140   /* Return to the initial state.  */
141   iconv (cd, NULL, NULL, NULL, NULL);
142 # endif
143 
144   /* Do the conversion for real.  */
145   {
146     const char *inptr = src;
147     size_t insize = srclen;
148     char *outptr = result;
149     size_t outsize = length;
150 
151     while (insize > 0)
152       {
153         size_t res = iconv (cd,
154                             (ICONV_CONST char **) &inptr, &insize,
155                             &outptr, &outsize);
156 
157         if (res == (size_t)(-1))
158           {
159             if (errno == EINVAL)
160               break;
161             else
162               goto fail;
163           }
164 # if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
165         /* Irix iconv() inserts a NUL byte if it cannot convert.
166            NetBSD iconv() inserts a question mark if it cannot convert.
167            Only GNU libiconv and GNU libc are known to prefer to fail rather
168            than doing a lossy conversion.  */
169         else if (res > 0)
170           {
171             errno = EILSEQ;
172             goto fail;
173           }
174 # endif
175       }
176     /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
177 # if defined _LIBICONV_VERSION \
178      || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
179           || defined __sun)
180     {
181       size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
182 
183       if (res == (size_t)(-1))
184         goto fail;
185     }
186 # endif
187     if (outsize != 0)
188       abort ();
189   }
190 
191   *resultp = result;
192   *lengthp = length;
193 
194   return 0;
195 
196  fail:
197   {
198     if (result != *resultp)
199       {
200         int saved_errno = errno;
201         free (result);
202         errno = saved_errno;
203       }
204     return -1;
205   }
206 # undef tmpbufsize
207 }
208 
209 char *
210 str_cd_iconv (const char *src, iconv_t cd)
211 {
212   /* For most encodings, a trailing NUL byte in the input will be converted
213      to a trailing NUL byte in the output.  But not for UTF-7.  So that this
214      function is usable for UTF-7, we have to exclude the NUL byte from the
215      conversion and add it by hand afterwards.  */
216 # if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
217   /* Irix iconv() inserts a NUL byte if it cannot convert.
218      NetBSD iconv() inserts a question mark if it cannot convert.
219      Only GNU libiconv and GNU libc are known to prefer to fail rather
220      than doing a lossy conversion.  For other iconv() implementations,
221      we have to look at the number of irreversible conversions returned;
222      but this information is lost when iconv() returns for an E2BIG reason.
223      Therefore we cannot use the second, faster algorithm.  */
224 
225   char *result = NULL;
226   size_t length = 0;
227   int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length);
228   char *final_result;
229 
230   if (retval < 0)
231     {
232       if (result != NULL)
233         abort ();
234       return NULL;
235     }
236 
237   /* Add the terminating NUL byte.  */
238   final_result =
239     (result != NULL ? realloc (result, length + 1) : malloc (length + 1));
240   if (final_result == NULL)
241     {
242       free (result);
243       errno = ENOMEM;
244       return NULL;
245     }
246   final_result[length] = '\0';
247 
248   return final_result;
249 
250 # else
251   /* This algorithm is likely faster than the one above.  But it may produce
252      iconv() returns for an E2BIG reason, when the output size guess is too
253      small.  Therefore it can only be used when we don't need the number of
254      irreversible conversions performed.  */
255   char *result;
256   size_t result_size;
257   size_t length;
258   const char *inptr = src;
259   size_t inbytes_remaining = strlen (src);
260 
261   /* Make a guess for the worst-case output size, in order to avoid a
262      realloc.  It's OK if the guess is wrong as long as it is not zero and
263      doesn't lead to an integer overflow.  */
264   result_size = inbytes_remaining;
265   {
266     size_t approx_sqrt_SIZE_MAX = SIZE_MAX >> (sizeof (size_t) * CHAR_BIT / 2);
267     if (result_size <= approx_sqrt_SIZE_MAX / MB_LEN_MAX)
268       result_size *= MB_LEN_MAX;
269   }
270   result_size += 1; /* for the terminating NUL */
271 
272   result = (char *) malloc (result_size);
273   if (result == NULL)
274     {
275       errno = ENOMEM;
276       return NULL;
277     }
278 
279   /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
280 # if defined _LIBICONV_VERSION \
281      || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
282           || defined __sun)
283   /* Set to the initial state.  */
284   iconv (cd, NULL, NULL, NULL, NULL);
285 # endif
286 
287   /* Do the conversion.  */
288   {
289     char *outptr = result;
290     size_t outbytes_remaining = result_size - 1;
291 
292     for (;;)
293       {
294         /* Here inptr + inbytes_remaining = src + strlen (src),
295                 outptr + outbytes_remaining = result + result_size - 1.  */
296         size_t res = iconv (cd,
297                             (ICONV_CONST char **) &inptr, &inbytes_remaining,
298                             &outptr, &outbytes_remaining);
299 
300         if (res == (size_t)(-1))
301           {
302             if (errno == EINVAL)
303               break;
304             else if (errno == E2BIG)
305               {
306                 size_t used = outptr - result;
307                 size_t newsize = result_size * 2;
308                 char *newresult;
309 
310                 if (!(newsize > result_size))
311                   {
312                     errno = ENOMEM;
313                     goto failed;
314                   }
315                 newresult = (char *) realloc (result, newsize);
316                 if (newresult == NULL)
317                   {
318                     errno = ENOMEM;
319                     goto failed;
320                   }
321                 result = newresult;
322                 result_size = newsize;
323                 outptr = result + used;
324                 outbytes_remaining = result_size - 1 - used;
325               }
326             else
327               goto failed;
328           }
329         else
330           break;
331       }
332     /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
333 # if defined _LIBICONV_VERSION \
334      || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
335           || defined __sun)
336     for (;;)
337       {
338         /* Here outptr + outbytes_remaining = result + result_size - 1.  */
339         size_t res = iconv (cd, NULL, NULL, &outptr, &outbytes_remaining);
340 
341         if (res == (size_t)(-1))
342           {
343             if (errno == E2BIG)
344               {
345                 size_t used = outptr - result;
346                 size_t newsize = result_size * 2;
347                 char *newresult;
348 
349                 if (!(newsize > result_size))
350                   {
351                     errno = ENOMEM;
352                     goto failed;
353                   }
354                 newresult = (char *) realloc (result, newsize);
355                 if (newresult == NULL)
356                   {
357                     errno = ENOMEM;
358                     goto failed;
359                   }
360                 result = newresult;
361                 result_size = newsize;
362                 outptr = result + used;
363                 outbytes_remaining = result_size - 1 - used;
364               }
365             else
366               goto failed;
367           }
368         else
369           break;
370       }
371 # endif
372 
373     /* Add the terminating NUL byte.  */
374     *outptr++ = '\0';
375 
376     length = outptr - result;
377   }
378 
379   /* Give away unused memory.  */
380   if (length < result_size)
381     {
382       char *smaller_result = (char *) realloc (result, length);
383 
384       if (smaller_result != NULL)
385         result = smaller_result;
386     }
387 
388   return result;
389 
390  failed:
391   {
392     int saved_errno = errno;
393     free (result);
394     errno = saved_errno;
395     return NULL;
396   }
397 
398 # endif
399 }
400 
401 #endif
402 
403 char *
404 str_iconv (const char *src, const char *from_codeset, const char *to_codeset)
405 {
406   if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0)
407     {
408       char *result = strdup (src);
409 
410       if (result == NULL)
411         errno = ENOMEM;
412       return result;
413     }
414   else
415     {
416 #if HAVE_ICONV
417       iconv_t cd;
418       char *result;
419 
420       /* Avoid glibc-2.1 bug with EUC-KR.  */
421 # if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
422      && !defined _LIBICONV_VERSION
423       if (c_strcasecmp (from_codeset, "EUC-KR") == 0
424           || c_strcasecmp (to_codeset, "EUC-KR") == 0)
425         {
426           errno = EINVAL;
427           return NULL;
428         }
429 # endif
430       cd = iconv_open (to_codeset, from_codeset);
431       if (cd == (iconv_t) -1)
432         return NULL;
433 
434       result = str_cd_iconv (src, cd);
435 
436       if (result == NULL)
437         {
438           /* Close cd, but preserve the errno from str_cd_iconv.  */
439           int saved_errno = errno;
440           iconv_close (cd);
441           errno = saved_errno;
442         }
443       else
444         {
445           if (iconv_close (cd) < 0)
446             {
447               /* Return NULL, but free the allocated memory, and while doing
448                  that, preserve the errno from iconv_close.  */
449               int saved_errno = errno;
450               free (result);
451               errno = saved_errno;
452               return NULL;
453             }
454         }
455       return result;
456 #else
457       /* This is a different error code than if iconv_open existed but didn't
458          support from_codeset and to_codeset, so that the caller can emit
459          an error message such as
460            "iconv() is not supported. Installing GNU libiconv and
461             then reinstalling this package would fix this."  */
462       errno = ENOSYS;
463       return NULL;
464 #endif
465     }
466 }
467