1 /* Message list charset and locale charset handling.
2 Copyright (C) 2001-2003, 2005-2006 Free Software Foundation, Inc.
3 Written by Bruno Haible <haible@clisp.cons.org>, 2001.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19
20 #ifdef HAVE_CONFIG_H
21 # include "config.h"
22 #endif
23 #include <alloca.h>
24
25 /* Specification. */
26 #include "msgl-iconv.h"
27
28 #include <stdbool.h>
29 #include <stdlib.h>
30 #include <string.h>
31
32 #if HAVE_ICONV
33 # include <iconv.h>
34 #endif
35
36 #include "progname.h"
37 #include "basename.h"
38 #include "message.h"
39 #include "po-charset.h"
40 #include "xstriconv.h"
41 #include "msgl-ascii.h"
42 #include "xalloc.h"
43 #include "xallocsa.h"
44 #include "c-strstr.h"
45 #include "xvasprintf.h"
46 #include "po-xerror.h"
47 #include "gettext.h"
48
49 #define _(str) gettext (str)
50
51
52 #if HAVE_ICONV
53
54 static void conversion_error (const struct conversion_context* context)
55 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
56 __attribute__ ((noreturn))
57 #endif
58 ;
59 static void
conversion_error(const struct conversion_context * context)60 conversion_error (const struct conversion_context* context)
61 {
62 if (context->to_code == po_charset_utf8)
63 /* If a conversion to UTF-8 fails, the problem lies in the input. */
64 po_xerror (PO_SEVERITY_FATAL_ERROR, context->message, NULL, 0, 0, false,
65 xasprintf (_("%s: input is not valid in \"%s\" encoding"),
66 context->from_filename, context->from_code));
67 else
68 po_xerror (PO_SEVERITY_FATAL_ERROR, context->message, NULL, 0, 0, false,
69 xasprintf (_("\
70 %s: error while converting from \"%s\" encoding to \"%s\" encoding"),
71 context->from_filename, context->from_code,
72 context->to_code));
73 /* NOTREACHED */
74 abort ();
75 }
76
77 char *
convert_string(iconv_t cd,const char * string,const struct conversion_context * context)78 convert_string (iconv_t cd, const char *string,
79 const struct conversion_context* context)
80 {
81 size_t len = strlen (string) + 1;
82 char *result = NULL;
83 size_t resultlen;
84
85 if (xmem_cd_iconv (string, len, cd, &result, &resultlen) == 0)
86 /* Verify the result has exactly one NUL byte, at the end. */
87 if (resultlen > 0 && result[resultlen - 1] == '\0'
88 && strlen (result) == resultlen - 1)
89 return result;
90
91 conversion_error (context);
92 /* NOTREACHED */
93 return NULL;
94 }
95
96 static void
convert_string_list(iconv_t cd,string_list_ty * slp,const struct conversion_context * context)97 convert_string_list (iconv_t cd, string_list_ty *slp,
98 const struct conversion_context* context)
99 {
100 size_t i;
101
102 if (slp != NULL)
103 for (i = 0; i < slp->nitems; i++)
104 slp->item[i] = convert_string (cd, slp->item[i], context);
105 }
106
107 static void
convert_prev_msgid(iconv_t cd,message_ty * mp,const struct conversion_context * context)108 convert_prev_msgid (iconv_t cd, message_ty *mp,
109 const struct conversion_context* context)
110 {
111 if (mp->prev_msgctxt != NULL)
112 mp->prev_msgctxt = convert_string (cd, mp->prev_msgctxt, context);
113 if (mp->prev_msgid != NULL)
114 mp->prev_msgid = convert_string (cd, mp->prev_msgid, context);
115 if (mp->prev_msgid_plural != NULL)
116 mp->prev_msgid_plural = convert_string (cd, mp->prev_msgid_plural, context);
117 }
118
119 static void
convert_msgid(iconv_t cd,message_ty * mp,const struct conversion_context * context)120 convert_msgid (iconv_t cd, message_ty *mp,
121 const struct conversion_context* context)
122 {
123 if (mp->msgctxt != NULL)
124 mp->msgctxt = convert_string (cd, mp->msgctxt, context);
125 mp->msgid = convert_string (cd, mp->msgid, context);
126 if (mp->msgid_plural != NULL)
127 mp->msgid_plural = convert_string (cd, mp->msgid_plural, context);
128 }
129
130 static void
convert_msgstr(iconv_t cd,message_ty * mp,const struct conversion_context * context)131 convert_msgstr (iconv_t cd, message_ty *mp,
132 const struct conversion_context* context)
133 {
134 char *result = NULL;
135 size_t resultlen;
136
137 if (!(mp->msgstr_len > 0 && mp->msgstr[mp->msgstr_len - 1] == '\0'))
138 abort ();
139
140 if (xmem_cd_iconv (mp->msgstr, mp->msgstr_len, cd, &result, &resultlen) == 0)
141 /* Verify the result has a NUL byte at the end. */
142 if (resultlen > 0 && result[resultlen - 1] == '\0')
143 /* Verify the result has the same number of NUL bytes. */
144 {
145 const char *p;
146 const char *pend;
147 int nulcount1;
148 int nulcount2;
149
150 for (p = mp->msgstr, pend = p + mp->msgstr_len, nulcount1 = 0;
151 p < pend;
152 p += strlen (p) + 1, nulcount1++);
153 for (p = result, pend = p + resultlen, nulcount2 = 0;
154 p < pend;
155 p += strlen (p) + 1, nulcount2++);
156
157 if (nulcount1 == nulcount2)
158 {
159 mp->msgstr = result;
160 mp->msgstr_len = resultlen;
161 return;
162 }
163 }
164
165 conversion_error (context);
166 }
167
168 #endif
169
170
171 bool
iconv_message_list(message_list_ty * mlp,const char * canon_from_code,const char * canon_to_code,const char * from_filename)172 iconv_message_list (message_list_ty *mlp,
173 const char *canon_from_code, const char *canon_to_code,
174 const char *from_filename)
175 {
176 bool canon_from_code_overridden = (canon_from_code != NULL);
177 bool msgids_changed;
178 size_t j;
179
180 /* If the list is empty, nothing to do. */
181 if (mlp->nitems == 0)
182 return false;
183
184 /* Search the header entry, and extract and replace the charset name. */
185 for (j = 0; j < mlp->nitems; j++)
186 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
187 {
188 const char *header = mlp->item[j]->msgstr;
189
190 if (header != NULL)
191 {
192 const char *charsetstr = c_strstr (header, "charset=");
193
194 if (charsetstr != NULL)
195 {
196 size_t len;
197 char *charset;
198 const char *canon_charset;
199 size_t len1, len2, len3;
200 char *new_header;
201
202 charsetstr += strlen ("charset=");
203 len = strcspn (charsetstr, " \t\n");
204 charset = (char *) xallocsa (len + 1);
205 memcpy (charset, charsetstr, len);
206 charset[len] = '\0';
207
208 canon_charset = po_charset_canonicalize (charset);
209 if (canon_charset == NULL)
210 {
211 if (!canon_from_code_overridden)
212 {
213 /* Don't give an error for POT files, because POT
214 files usually contain only ASCII msgids. */
215 const char *filename = from_filename;
216 size_t filenamelen;
217
218 if (filename != NULL
219 && (filenamelen = strlen (filename)) >= 4
220 && memcmp (filename + filenamelen - 4, ".pot", 4)
221 == 0
222 && strcmp (charset, "CHARSET") == 0)
223 canon_charset = po_charset_ascii;
224 else
225 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0,
226 false, xasprintf (_("\
227 present charset \"%s\" is not a portable encoding name"),
228 charset));
229 }
230 }
231 else
232 {
233 if (canon_from_code == NULL)
234 canon_from_code = canon_charset;
235 else if (canon_from_code != canon_charset)
236 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0,
237 false,
238 xasprintf (_("\
239 two different charsets \"%s\" and \"%s\" in input file"),
240 canon_from_code, canon_charset));
241 }
242 freesa (charset);
243
244 len1 = charsetstr - header;
245 len2 = strlen (canon_to_code);
246 len3 = (header + strlen (header)) - (charsetstr + len);
247 new_header = (char *) xmalloc (len1 + len2 + len3 + 1);
248 memcpy (new_header, header, len1);
249 memcpy (new_header + len1, canon_to_code, len2);
250 memcpy (new_header + len1 + len2, charsetstr + len, len3 + 1);
251 mlp->item[j]->msgstr = new_header;
252 mlp->item[j]->msgstr_len = len1 + len2 + len3 + 1;
253 }
254 }
255 }
256 if (canon_from_code == NULL)
257 {
258 if (is_ascii_message_list (mlp))
259 canon_from_code = po_charset_ascii;
260 else
261 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
262 _("\
263 input file doesn't contain a header entry with a charset specification"));
264 }
265
266 msgids_changed = false;
267
268 /* If the two encodings are the same, nothing to do. */
269 if (canon_from_code != canon_to_code)
270 {
271 #if HAVE_ICONV
272 iconv_t cd;
273 struct conversion_context context;
274
275 /* Avoid glibc-2.1 bug with EUC-KR. */
276 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
277 if (strcmp (canon_from_code, "EUC-KR") == 0)
278 cd = (iconv_t)(-1);
279 else
280 # endif
281 cd = iconv_open (canon_to_code, canon_from_code);
282 if (cd == (iconv_t)(-1))
283 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
284 xasprintf (_("\
285 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
286 and iconv() does not support this conversion."),
287 canon_from_code, canon_to_code,
288 basename (program_name)));
289
290 context.from_code = canon_from_code;
291 context.to_code = canon_to_code;
292 context.from_filename = from_filename;
293
294 for (j = 0; j < mlp->nitems; j++)
295 {
296 message_ty *mp = mlp->item[j];
297
298 if ((mp->msgctxt != NULL && !is_ascii_string (mp->msgctxt))
299 || !is_ascii_string (mp->msgid))
300 msgids_changed = true;
301 context.message = mp;
302 convert_string_list (cd, mp->comment, &context);
303 convert_string_list (cd, mp->comment_dot, &context);
304 convert_prev_msgid (cd, mp, &context);
305 convert_msgid (cd, mp, &context);
306 convert_msgstr (cd, mp, &context);
307 }
308
309 iconv_close (cd);
310
311 if (msgids_changed)
312 if (message_list_msgids_changed (mlp))
313 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
314 xasprintf (_("\
315 Conversion from \"%s\" to \"%s\" introduces duplicates: \
316 some different msgids become equal."),
317 canon_from_code, canon_to_code));
318 #else
319 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
320 xasprintf (_("\
321 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \
322 This version was built without iconv()."),
323 canon_from_code, canon_to_code,
324 basename (program_name)));
325 #endif
326 }
327
328 return msgids_changed;
329 }
330
331 msgdomain_list_ty *
iconv_msgdomain_list(msgdomain_list_ty * mdlp,const char * to_code,const char * from_filename)332 iconv_msgdomain_list (msgdomain_list_ty *mdlp,
333 const char *to_code,
334 const char *from_filename)
335 {
336 const char *canon_to_code;
337 size_t k;
338
339 /* Canonicalize target encoding. */
340 canon_to_code = po_charset_canonicalize (to_code);
341 if (canon_to_code == NULL)
342 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
343 xasprintf (_("\
344 target charset \"%s\" is not a portable encoding name."),
345 to_code));
346
347 for (k = 0; k < mdlp->nitems; k++)
348 iconv_message_list (mdlp->item[k]->messages, mdlp->encoding, canon_to_code,
349 from_filename);
350
351 mdlp->encoding = canon_to_code;
352 return mdlp;
353 }
354
355 #if HAVE_ICONV
356
357 static bool
iconvable_string(iconv_t cd,const char * string)358 iconvable_string (iconv_t cd, const char *string)
359 {
360 size_t len = strlen (string) + 1;
361 char *result = NULL;
362 size_t resultlen;
363
364 if (xmem_cd_iconv (string, len, cd, &result, &resultlen) == 0)
365 {
366 /* Test if the result has exactly one NUL byte, at the end. */
367 bool ok = (resultlen > 0 && result[resultlen - 1] == '\0'
368 && strlen (result) == resultlen - 1);
369 free (result);
370 return ok;
371 }
372 return false;
373 }
374
375 static bool
iconvable_string_list(iconv_t cd,string_list_ty * slp)376 iconvable_string_list (iconv_t cd, string_list_ty *slp)
377 {
378 size_t i;
379
380 if (slp != NULL)
381 for (i = 0; i < slp->nitems; i++)
382 if (!iconvable_string (cd, slp->item[i]))
383 return false;
384 return true;
385 }
386
387 static bool
iconvable_prev_msgid(iconv_t cd,message_ty * mp)388 iconvable_prev_msgid (iconv_t cd, message_ty *mp)
389 {
390 if (mp->prev_msgctxt != NULL)
391 if (!iconvable_string (cd, mp->prev_msgctxt))
392 return false;
393 if (mp->prev_msgid != NULL)
394 if (!iconvable_string (cd, mp->prev_msgid))
395 return false;
396 if (mp->msgid_plural != NULL)
397 if (!iconvable_string (cd, mp->prev_msgid_plural))
398 return false;
399 return true;
400 }
401
402 static bool
iconvable_msgid(iconv_t cd,message_ty * mp)403 iconvable_msgid (iconv_t cd, message_ty *mp)
404 {
405 if (mp->msgctxt != NULL)
406 if (!iconvable_string (cd, mp->msgctxt))
407 return false;
408 if (!iconvable_string (cd, mp->msgid))
409 return false;
410 if (mp->msgid_plural != NULL)
411 if (!iconvable_string (cd, mp->msgid_plural))
412 return false;
413 return true;
414 }
415
416 static bool
iconvable_msgstr(iconv_t cd,message_ty * mp)417 iconvable_msgstr (iconv_t cd, message_ty *mp)
418 {
419 char *result = NULL;
420 size_t resultlen;
421
422 if (!(mp->msgstr_len > 0 && mp->msgstr[mp->msgstr_len - 1] == '\0'))
423 abort ();
424
425 if (xmem_cd_iconv (mp->msgstr, mp->msgstr_len, cd, &result, &resultlen) == 0)
426 {
427 bool ok = false;
428
429 /* Test if the result has a NUL byte at the end. */
430 if (resultlen > 0 && result[resultlen - 1] == '\0')
431 /* Test if the result has the same number of NUL bytes. */
432 {
433 const char *p;
434 const char *pend;
435 int nulcount1;
436 int nulcount2;
437
438 for (p = mp->msgstr, pend = p + mp->msgstr_len, nulcount1 = 0;
439 p < pend;
440 p += strlen (p) + 1, nulcount1++);
441 for (p = result, pend = p + resultlen, nulcount2 = 0;
442 p < pend;
443 p += strlen (p) + 1, nulcount2++);
444
445 if (nulcount1 == nulcount2)
446 ok = true;
447 }
448
449 free (result);
450 return ok;
451 }
452 return false;
453 }
454
455 #endif
456
457 bool
is_message_list_iconvable(message_list_ty * mlp,const char * canon_from_code,const char * canon_to_code)458 is_message_list_iconvable (message_list_ty *mlp,
459 const char *canon_from_code,
460 const char *canon_to_code)
461 {
462 bool canon_from_code_overridden = (canon_from_code != NULL);
463 size_t j;
464
465 /* If the list is empty, nothing to check. */
466 if (mlp->nitems == 0)
467 return true;
468
469 /* Search the header entry, and extract the charset name. */
470 for (j = 0; j < mlp->nitems; j++)
471 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
472 {
473 const char *header = mlp->item[j]->msgstr;
474
475 if (header != NULL)
476 {
477 const char *charsetstr = c_strstr (header, "charset=");
478
479 if (charsetstr != NULL)
480 {
481 size_t len;
482 char *charset;
483 const char *canon_charset;
484
485 charsetstr += strlen ("charset=");
486 len = strcspn (charsetstr, " \t\n");
487 charset = (char *) xallocsa (len + 1);
488 memcpy (charset, charsetstr, len);
489 charset[len] = '\0';
490
491 canon_charset = po_charset_canonicalize (charset);
492 if (canon_charset == NULL)
493 {
494 if (!canon_from_code_overridden)
495 {
496 /* Don't give an error for POT files, because POT
497 files usually contain only ASCII msgids. */
498 if (strcmp (charset, "CHARSET") == 0)
499 canon_charset = po_charset_ascii;
500 else
501 {
502 /* charset is not a portable encoding name. */
503 freesa (charset);
504 return false;
505 }
506 }
507 }
508 else
509 {
510 if (canon_from_code == NULL)
511 canon_from_code = canon_charset;
512 else if (canon_from_code != canon_charset)
513 {
514 /* Two different charsets in input file. */
515 freesa (charset);
516 return false;
517 }
518 }
519 freesa (charset);
520 }
521 }
522 }
523 if (canon_from_code == NULL)
524 {
525 if (is_ascii_message_list (mlp))
526 canon_from_code = po_charset_ascii;
527 else
528 /* Input file lacks a header entry with a charset specification. */
529 return false;
530 }
531
532 /* If the two encodings are the same, nothing to check. */
533 if (canon_from_code != canon_to_code)
534 {
535 #if HAVE_ICONV
536 iconv_t cd;
537
538 /* Avoid glibc-2.1 bug with EUC-KR. */
539 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
540 if (strcmp (canon_from_code, "EUC-KR") == 0)
541 cd = (iconv_t)(-1);
542 else
543 # endif
544 cd = iconv_open (canon_to_code, canon_from_code);
545 if (cd == (iconv_t)(-1))
546 /* iconv() doesn't support this conversion. */
547 return false;
548
549 for (j = 0; j < mlp->nitems; j++)
550 {
551 message_ty *mp = mlp->item[j];
552
553 if (!(iconvable_string_list (cd, mp->comment)
554 && iconvable_string_list (cd, mp->comment_dot)
555 && iconvable_prev_msgid (cd, mp)
556 && iconvable_msgid (cd, mp)
557 && iconvable_msgstr (cd, mp)))
558 return false;
559 }
560
561 iconv_close (cd);
562 #else
563 /* This version was built without iconv(). */
564 return false;
565 #endif
566 }
567
568 return true;
569 }
570