1 /* Message list charset and locale charset handling. 2 Copyright (C) 2001-2003, 2005-2006 Free Software Foundation, Inc. 3 Written by Bruno Haible <haible@clisp.cons.org>, 2001. 4 5 This program is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 2, or (at your option) 8 any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; if not, write to the Free Software Foundation, 17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 18 19 20 #ifdef HAVE_CONFIG_H 21 # include "config.h" 22 #endif 23 #include <alloca.h> 24 25 /* Specification. */ 26 #include "msgl-iconv.h" 27 28 #include <stdbool.h> 29 #include <stdlib.h> 30 #include <string.h> 31 32 #if HAVE_ICONV 33 # include <iconv.h> 34 #endif 35 36 #include "progname.h" 37 #include "basename.h" 38 #include "message.h" 39 #include "po-charset.h" 40 #include "xstriconv.h" 41 #include "msgl-ascii.h" 42 #include "xalloc.h" 43 #include "xallocsa.h" 44 #include "c-strstr.h" 45 #include "xvasprintf.h" 46 #include "po-xerror.h" 47 #include "gettext.h" 48 49 #define _(str) gettext (str) 50 51 52 #if HAVE_ICONV 53 54 static void conversion_error (const struct conversion_context* context) 55 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2) 56 __attribute__ ((noreturn)) 57 #endif 58 ; 59 static void 60 conversion_error (const struct conversion_context* context) 61 { 62 if (context->to_code == po_charset_utf8) 63 /* If a conversion to UTF-8 fails, the problem lies in the input. */ 64 po_xerror (PO_SEVERITY_FATAL_ERROR, context->message, NULL, 0, 0, false, 65 xasprintf (_("%s: input is not valid in \"%s\" encoding"), 66 context->from_filename, context->from_code)); 67 else 68 po_xerror (PO_SEVERITY_FATAL_ERROR, context->message, NULL, 0, 0, false, 69 xasprintf (_("\ 70 %s: error while converting from \"%s\" encoding to \"%s\" encoding"), 71 context->from_filename, context->from_code, 72 context->to_code)); 73 /* NOTREACHED */ 74 abort (); 75 } 76 77 char * 78 convert_string (iconv_t cd, const char *string, 79 const struct conversion_context* context) 80 { 81 size_t len = strlen (string) + 1; 82 char *result = NULL; 83 size_t resultlen; 84 85 if (xmem_cd_iconv (string, len, cd, &result, &resultlen) == 0) 86 /* Verify the result has exactly one NUL byte, at the end. */ 87 if (resultlen > 0 && result[resultlen - 1] == '\0' 88 && strlen (result) == resultlen - 1) 89 return result; 90 91 conversion_error (context); 92 /* NOTREACHED */ 93 return NULL; 94 } 95 96 static void 97 convert_string_list (iconv_t cd, string_list_ty *slp, 98 const struct conversion_context* context) 99 { 100 size_t i; 101 102 if (slp != NULL) 103 for (i = 0; i < slp->nitems; i++) 104 slp->item[i] = convert_string (cd, slp->item[i], context); 105 } 106 107 static void 108 convert_prev_msgid (iconv_t cd, message_ty *mp, 109 const struct conversion_context* context) 110 { 111 if (mp->prev_msgctxt != NULL) 112 mp->prev_msgctxt = convert_string (cd, mp->prev_msgctxt, context); 113 if (mp->prev_msgid != NULL) 114 mp->prev_msgid = convert_string (cd, mp->prev_msgid, context); 115 if (mp->prev_msgid_plural != NULL) 116 mp->prev_msgid_plural = convert_string (cd, mp->prev_msgid_plural, context); 117 } 118 119 static void 120 convert_msgid (iconv_t cd, message_ty *mp, 121 const struct conversion_context* context) 122 { 123 if (mp->msgctxt != NULL) 124 mp->msgctxt = convert_string (cd, mp->msgctxt, context); 125 mp->msgid = convert_string (cd, mp->msgid, context); 126 if (mp->msgid_plural != NULL) 127 mp->msgid_plural = convert_string (cd, mp->msgid_plural, context); 128 } 129 130 static void 131 convert_msgstr (iconv_t cd, message_ty *mp, 132 const struct conversion_context* context) 133 { 134 char *result = NULL; 135 size_t resultlen; 136 137 if (!(mp->msgstr_len > 0 && mp->msgstr[mp->msgstr_len - 1] == '\0')) 138 abort (); 139 140 if (xmem_cd_iconv (mp->msgstr, mp->msgstr_len, cd, &result, &resultlen) == 0) 141 /* Verify the result has a NUL byte at the end. */ 142 if (resultlen > 0 && result[resultlen - 1] == '\0') 143 /* Verify the result has the same number of NUL bytes. */ 144 { 145 const char *p; 146 const char *pend; 147 int nulcount1; 148 int nulcount2; 149 150 for (p = mp->msgstr, pend = p + mp->msgstr_len, nulcount1 = 0; 151 p < pend; 152 p += strlen (p) + 1, nulcount1++); 153 for (p = result, pend = p + resultlen, nulcount2 = 0; 154 p < pend; 155 p += strlen (p) + 1, nulcount2++); 156 157 if (nulcount1 == nulcount2) 158 { 159 mp->msgstr = result; 160 mp->msgstr_len = resultlen; 161 return; 162 } 163 } 164 165 conversion_error (context); 166 } 167 168 #endif 169 170 171 bool 172 iconv_message_list (message_list_ty *mlp, 173 const char *canon_from_code, const char *canon_to_code, 174 const char *from_filename) 175 { 176 bool canon_from_code_overridden = (canon_from_code != NULL); 177 bool msgids_changed; 178 size_t j; 179 180 /* If the list is empty, nothing to do. */ 181 if (mlp->nitems == 0) 182 return false; 183 184 /* Search the header entry, and extract and replace the charset name. */ 185 for (j = 0; j < mlp->nitems; j++) 186 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete) 187 { 188 const char *header = mlp->item[j]->msgstr; 189 190 if (header != NULL) 191 { 192 const char *charsetstr = c_strstr (header, "charset="); 193 194 if (charsetstr != NULL) 195 { 196 size_t len; 197 char *charset; 198 const char *canon_charset; 199 size_t len1, len2, len3; 200 char *new_header; 201 202 charsetstr += strlen ("charset="); 203 len = strcspn (charsetstr, " \t\n"); 204 charset = (char *) xallocsa (len + 1); 205 memcpy (charset, charsetstr, len); 206 charset[len] = '\0'; 207 208 canon_charset = po_charset_canonicalize (charset); 209 if (canon_charset == NULL) 210 { 211 if (!canon_from_code_overridden) 212 { 213 /* Don't give an error for POT files, because POT 214 files usually contain only ASCII msgids. */ 215 const char *filename = from_filename; 216 size_t filenamelen; 217 218 if (filename != NULL 219 && (filenamelen = strlen (filename)) >= 4 220 && memcmp (filename + filenamelen - 4, ".pot", 4) 221 == 0 222 && strcmp (charset, "CHARSET") == 0) 223 canon_charset = po_charset_ascii; 224 else 225 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, 226 false, xasprintf (_("\ 227 present charset \"%s\" is not a portable encoding name"), 228 charset)); 229 } 230 } 231 else 232 { 233 if (canon_from_code == NULL) 234 canon_from_code = canon_charset; 235 else if (canon_from_code != canon_charset) 236 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, 237 false, 238 xasprintf (_("\ 239 two different charsets \"%s\" and \"%s\" in input file"), 240 canon_from_code, canon_charset)); 241 } 242 freesa (charset); 243 244 len1 = charsetstr - header; 245 len2 = strlen (canon_to_code); 246 len3 = (header + strlen (header)) - (charsetstr + len); 247 new_header = (char *) xmalloc (len1 + len2 + len3 + 1); 248 memcpy (new_header, header, len1); 249 memcpy (new_header + len1, canon_to_code, len2); 250 memcpy (new_header + len1 + len2, charsetstr + len, len3 + 1); 251 mlp->item[j]->msgstr = new_header; 252 mlp->item[j]->msgstr_len = len1 + len2 + len3 + 1; 253 } 254 } 255 } 256 if (canon_from_code == NULL) 257 { 258 if (is_ascii_message_list (mlp)) 259 canon_from_code = po_charset_ascii; 260 else 261 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false, 262 _("\ 263 input file doesn't contain a header entry with a charset specification")); 264 } 265 266 msgids_changed = false; 267 268 /* If the two encodings are the same, nothing to do. */ 269 if (canon_from_code != canon_to_code) 270 { 271 #if HAVE_ICONV 272 iconv_t cd; 273 struct conversion_context context; 274 275 /* Avoid glibc-2.1 bug with EUC-KR. */ 276 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION 277 if (strcmp (canon_from_code, "EUC-KR") == 0) 278 cd = (iconv_t)(-1); 279 else 280 # endif 281 cd = iconv_open (canon_to_code, canon_from_code); 282 if (cd == (iconv_t)(-1)) 283 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false, 284 xasprintf (_("\ 285 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \ 286 and iconv() does not support this conversion."), 287 canon_from_code, canon_to_code, 288 basename (program_name))); 289 290 context.from_code = canon_from_code; 291 context.to_code = canon_to_code; 292 context.from_filename = from_filename; 293 294 for (j = 0; j < mlp->nitems; j++) 295 { 296 message_ty *mp = mlp->item[j]; 297 298 if ((mp->msgctxt != NULL && !is_ascii_string (mp->msgctxt)) 299 || !is_ascii_string (mp->msgid)) 300 msgids_changed = true; 301 context.message = mp; 302 convert_string_list (cd, mp->comment, &context); 303 convert_string_list (cd, mp->comment_dot, &context); 304 convert_prev_msgid (cd, mp, &context); 305 convert_msgid (cd, mp, &context); 306 convert_msgstr (cd, mp, &context); 307 } 308 309 iconv_close (cd); 310 311 if (msgids_changed) 312 if (message_list_msgids_changed (mlp)) 313 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false, 314 xasprintf (_("\ 315 Conversion from \"%s\" to \"%s\" introduces duplicates: \ 316 some different msgids become equal."), 317 canon_from_code, canon_to_code)); 318 #else 319 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false, 320 xasprintf (_("\ 321 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \ 322 This version was built without iconv()."), 323 canon_from_code, canon_to_code, 324 basename (program_name))); 325 #endif 326 } 327 328 return msgids_changed; 329 } 330 331 msgdomain_list_ty * 332 iconv_msgdomain_list (msgdomain_list_ty *mdlp, 333 const char *to_code, 334 const char *from_filename) 335 { 336 const char *canon_to_code; 337 size_t k; 338 339 /* Canonicalize target encoding. */ 340 canon_to_code = po_charset_canonicalize (to_code); 341 if (canon_to_code == NULL) 342 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false, 343 xasprintf (_("\ 344 target charset \"%s\" is not a portable encoding name."), 345 to_code)); 346 347 for (k = 0; k < mdlp->nitems; k++) 348 iconv_message_list (mdlp->item[k]->messages, mdlp->encoding, canon_to_code, 349 from_filename); 350 351 mdlp->encoding = canon_to_code; 352 return mdlp; 353 } 354 355 #if HAVE_ICONV 356 357 static bool 358 iconvable_string (iconv_t cd, const char *string) 359 { 360 size_t len = strlen (string) + 1; 361 char *result = NULL; 362 size_t resultlen; 363 364 if (xmem_cd_iconv (string, len, cd, &result, &resultlen) == 0) 365 { 366 /* Test if the result has exactly one NUL byte, at the end. */ 367 bool ok = (resultlen > 0 && result[resultlen - 1] == '\0' 368 && strlen (result) == resultlen - 1); 369 free (result); 370 return ok; 371 } 372 return false; 373 } 374 375 static bool 376 iconvable_string_list (iconv_t cd, string_list_ty *slp) 377 { 378 size_t i; 379 380 if (slp != NULL) 381 for (i = 0; i < slp->nitems; i++) 382 if (!iconvable_string (cd, slp->item[i])) 383 return false; 384 return true; 385 } 386 387 static bool 388 iconvable_prev_msgid (iconv_t cd, message_ty *mp) 389 { 390 if (mp->prev_msgctxt != NULL) 391 if (!iconvable_string (cd, mp->prev_msgctxt)) 392 return false; 393 if (mp->prev_msgid != NULL) 394 if (!iconvable_string (cd, mp->prev_msgid)) 395 return false; 396 if (mp->msgid_plural != NULL) 397 if (!iconvable_string (cd, mp->prev_msgid_plural)) 398 return false; 399 return true; 400 } 401 402 static bool 403 iconvable_msgid (iconv_t cd, message_ty *mp) 404 { 405 if (mp->msgctxt != NULL) 406 if (!iconvable_string (cd, mp->msgctxt)) 407 return false; 408 if (!iconvable_string (cd, mp->msgid)) 409 return false; 410 if (mp->msgid_plural != NULL) 411 if (!iconvable_string (cd, mp->msgid_plural)) 412 return false; 413 return true; 414 } 415 416 static bool 417 iconvable_msgstr (iconv_t cd, message_ty *mp) 418 { 419 char *result = NULL; 420 size_t resultlen; 421 422 if (!(mp->msgstr_len > 0 && mp->msgstr[mp->msgstr_len - 1] == '\0')) 423 abort (); 424 425 if (xmem_cd_iconv (mp->msgstr, mp->msgstr_len, cd, &result, &resultlen) == 0) 426 { 427 bool ok = false; 428 429 /* Test if the result has a NUL byte at the end. */ 430 if (resultlen > 0 && result[resultlen - 1] == '\0') 431 /* Test if the result has the same number of NUL bytes. */ 432 { 433 const char *p; 434 const char *pend; 435 int nulcount1; 436 int nulcount2; 437 438 for (p = mp->msgstr, pend = p + mp->msgstr_len, nulcount1 = 0; 439 p < pend; 440 p += strlen (p) + 1, nulcount1++); 441 for (p = result, pend = p + resultlen, nulcount2 = 0; 442 p < pend; 443 p += strlen (p) + 1, nulcount2++); 444 445 if (nulcount1 == nulcount2) 446 ok = true; 447 } 448 449 free (result); 450 return ok; 451 } 452 return false; 453 } 454 455 #endif 456 457 bool 458 is_message_list_iconvable (message_list_ty *mlp, 459 const char *canon_from_code, 460 const char *canon_to_code) 461 { 462 bool canon_from_code_overridden = (canon_from_code != NULL); 463 size_t j; 464 465 /* If the list is empty, nothing to check. */ 466 if (mlp->nitems == 0) 467 return true; 468 469 /* Search the header entry, and extract the charset name. */ 470 for (j = 0; j < mlp->nitems; j++) 471 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete) 472 { 473 const char *header = mlp->item[j]->msgstr; 474 475 if (header != NULL) 476 { 477 const char *charsetstr = c_strstr (header, "charset="); 478 479 if (charsetstr != NULL) 480 { 481 size_t len; 482 char *charset; 483 const char *canon_charset; 484 485 charsetstr += strlen ("charset="); 486 len = strcspn (charsetstr, " \t\n"); 487 charset = (char *) xallocsa (len + 1); 488 memcpy (charset, charsetstr, len); 489 charset[len] = '\0'; 490 491 canon_charset = po_charset_canonicalize (charset); 492 if (canon_charset == NULL) 493 { 494 if (!canon_from_code_overridden) 495 { 496 /* Don't give an error for POT files, because POT 497 files usually contain only ASCII msgids. */ 498 if (strcmp (charset, "CHARSET") == 0) 499 canon_charset = po_charset_ascii; 500 else 501 { 502 /* charset is not a portable encoding name. */ 503 freesa (charset); 504 return false; 505 } 506 } 507 } 508 else 509 { 510 if (canon_from_code == NULL) 511 canon_from_code = canon_charset; 512 else if (canon_from_code != canon_charset) 513 { 514 /* Two different charsets in input file. */ 515 freesa (charset); 516 return false; 517 } 518 } 519 freesa (charset); 520 } 521 } 522 } 523 if (canon_from_code == NULL) 524 { 525 if (is_ascii_message_list (mlp)) 526 canon_from_code = po_charset_ascii; 527 else 528 /* Input file lacks a header entry with a charset specification. */ 529 return false; 530 } 531 532 /* If the two encodings are the same, nothing to check. */ 533 if (canon_from_code != canon_to_code) 534 { 535 #if HAVE_ICONV 536 iconv_t cd; 537 538 /* Avoid glibc-2.1 bug with EUC-KR. */ 539 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION 540 if (strcmp (canon_from_code, "EUC-KR") == 0) 541 cd = (iconv_t)(-1); 542 else 543 # endif 544 cd = iconv_open (canon_to_code, canon_from_code); 545 if (cd == (iconv_t)(-1)) 546 /* iconv() doesn't support this conversion. */ 547 return false; 548 549 for (j = 0; j < mlp->nitems; j++) 550 { 551 message_ty *mp = mlp->item[j]; 552 553 if (!(iconvable_string_list (cd, mp->comment) 554 && iconvable_string_list (cd, mp->comment_dot) 555 && iconvable_prev_msgid (cd, mp) 556 && iconvable_msgid (cd, mp) 557 && iconvable_msgstr (cd, mp))) 558 return false; 559 } 560 561 iconv_close (cd); 562 #else 563 /* This version was built without iconv(). */ 564 return false; 565 #endif 566 } 567 568 return true; 569 } 570