1 /* $NetBSD: gettext.c,v 1.18 2004/01/18 08:40:40 yamt Exp $ */ 2 3 /*- 4 * Copyright (c) 2000, 2001 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $Citrus: xpg4dl/FreeBSD/lib/libintl/gettext.c,v 1.31 2001/09/27 15:18:45 yamt Exp $ 29 */ 30 31 #include <sys/cdefs.h> 32 __RCSID("$NetBSD: gettext.c,v 1.18 2004/01/18 08:40:40 yamt Exp $"); 33 34 #include <sys/param.h> 35 #include <sys/stat.h> 36 #include <sys/mman.h> 37 #include <sys/uio.h> 38 39 #include <fcntl.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <unistd.h> 43 #include <string.h> 44 #if 0 45 #include <util.h> 46 #endif 47 #include <libintl.h> 48 #include <locale.h> 49 #include "libintl_local.h" 50 #include "pathnames.h" 51 52 static const char *lookup_category __P((int)); 53 static const char *split_locale __P((const char *)); 54 static const char *lookup_mofile __P((char *, size_t, const char *, 55 const char *, const char *, const char *, struct domainbinding *)); 56 static u_int32_t flip __P((u_int32_t, u_int32_t)); 57 static int validate __P((void *, struct mohandle *)); 58 static int mapit __P((const char *, struct domainbinding *)); 59 static int unmapit __P((struct domainbinding *)); 60 static const char *lookup_hash __P((const char *, struct domainbinding *)); 61 static const char *lookup_bsearch __P((const char *, struct domainbinding *)); 62 static const char *lookup __P((const char *, struct domainbinding *)); 63 static const char *get_lang_env __P((const char *)); 64 65 /* 66 * shortcut functions. the main implementation resides in dcngettext(). 67 */ 68 char * 69 gettext(msgid) 70 const char *msgid; 71 { 72 73 return dcngettext(NULL, msgid, NULL, 1UL, LC_MESSAGES); 74 } 75 76 char * 77 dgettext(domainname, msgid) 78 const char *domainname; 79 const char *msgid; 80 { 81 82 return dcngettext(domainname, msgid, NULL, 1UL, LC_MESSAGES); 83 } 84 85 char * 86 dcgettext(domainname, msgid, category) 87 const char *domainname; 88 const char *msgid; 89 int category; 90 { 91 92 return dcngettext(domainname, msgid, NULL, 1UL, category); 93 } 94 95 char * 96 ngettext(msgid1, msgid2, n) 97 const char *msgid1; 98 const char *msgid2; 99 unsigned long int n; 100 { 101 102 return dcngettext(NULL, msgid1, msgid2, n, LC_MESSAGES); 103 } 104 105 char * 106 dngettext(domainname, msgid1, msgid2, n) 107 const char *domainname; 108 const char *msgid1; 109 const char *msgid2; 110 unsigned long int n; 111 { 112 113 return dcngettext(domainname, msgid1, msgid2, n, LC_MESSAGES); 114 } 115 116 /* 117 * dcngettext() - 118 * lookup internationalized message on database locale/category/domainname 119 * (like ja_JP.eucJP/LC_MESSAGES/domainname). 120 * if n equals to 1, internationalized message will be looked up for msgid1. 121 * otherwise, message will be looked up for msgid2. 122 * if the lookup fails, the function will return msgid1 or msgid2 as is. 123 * 124 * Even though the return type is "char *", caller should not rewrite the 125 * region pointed to by the return value (should be "const char *", but can't 126 * change it for compatibility with other implementations). 127 * 128 * by default (if domainname == NULL), domainname is taken from the value set 129 * by textdomain(). usually name of the application (like "ls") is used as 130 * domainname. category is usually LC_MESSAGES. 131 * 132 * the code reads in *.mo files generated by GNU gettext. *.mo is a host- 133 * endian encoded file. both endians are supported here, as the files are in 134 * /usr/share/locale! (or we should move those files into /usr/libdata) 135 */ 136 137 static const char * 138 lookup_category(category) 139 int category; 140 { 141 142 switch (category) { 143 case LC_COLLATE: return "LC_COLLATE"; 144 case LC_CTYPE: return "LC_CTYPE"; 145 case LC_MONETARY: return "LC_MONETARY"; 146 case LC_NUMERIC: return "LC_NUMERIC"; 147 case LC_TIME: return "LC_TIME"; 148 case LC_MESSAGES: return "LC_MESSAGES"; 149 } 150 return NULL; 151 } 152 153 /* 154 * XPG syntax: language[_territory[.codeset]][@modifier] 155 * XXX boundary check on "result" is lacking 156 */ 157 static const char * 158 split_locale(lname) 159 const char *lname; 160 { 161 char buf[BUFSIZ], tmp[BUFSIZ]; 162 char *l, *t, *c, *m; 163 static char result[BUFSIZ]; 164 165 memset(result, 0, sizeof(result)); 166 167 if (strlen(lname) + 1 > sizeof(buf)) { 168 fail: 169 return lname; 170 } 171 172 strlcpy(buf, lname, sizeof(buf)); 173 m = strrchr(buf, '@'); 174 if (m) 175 *m++ = '\0'; 176 c = strrchr(buf, '.'); 177 if (c) 178 *c++ = '\0'; 179 t = strrchr(buf, '_'); 180 if (t) 181 *t++ = '\0'; 182 l = buf; 183 if (strlen(l) == 0) 184 goto fail; 185 if (c && !t) 186 goto fail; 187 188 if (m) { 189 if (t) { 190 if (c) { 191 snprintf(tmp, sizeof(tmp), "%s_%s.%s@%s", 192 l, t, c, m); 193 strlcat(result, tmp, sizeof(result)); 194 strlcat(result, ":", sizeof(result)); 195 } 196 snprintf(tmp, sizeof(tmp), "%s_%s@%s", l, t, m); 197 strlcat(result, tmp, sizeof(result)); 198 strlcat(result, ":", sizeof(result)); 199 } 200 snprintf(tmp, sizeof(tmp), "%s@%s", l, m); 201 strlcat(result, tmp, sizeof(result)); 202 strlcat(result, ":", sizeof(result)); 203 } 204 if (t) { 205 if (c) { 206 snprintf(tmp, sizeof(tmp), "%s_%s.%s", l, t, c); 207 strlcat(result, tmp, sizeof(result)); 208 strlcat(result, ":", sizeof(result)); 209 } 210 snprintf(tmp, sizeof(tmp), "%s_%s", l, t); 211 strlcat(result, tmp, sizeof(result)); 212 strlcat(result, ":", sizeof(result)); 213 } 214 strlcat(result, l, sizeof(result)); 215 216 return result; 217 } 218 219 static const char * 220 lookup_mofile(buf, len, dir, lpath, category, domainname, db) 221 char *buf; 222 size_t len; 223 const char *dir; 224 const char *lpath; /* list of locales to be tried */ 225 const char *category; 226 const char *domainname; 227 struct domainbinding *db; 228 { 229 struct stat st; 230 char *p, *q; 231 char lpath_tmp[BUFSIZ]; 232 233 strlcpy(lpath_tmp, lpath, sizeof(lpath_tmp)); 234 q = lpath_tmp; 235 /* CONSTCOND */ 236 while (1) { 237 p = strsep(&q, ":"); 238 if (!p) 239 break; 240 if (!*p) 241 continue; 242 243 /* don't mess with default locales */ 244 if (strcmp(p, "C") == 0 || strcmp(p, "POSIX") == 0) 245 return NULL; 246 247 /* validate pathname */ 248 if (strchr(p, '/') || strchr(category, '/')) 249 continue; 250 #if 1 /*?*/ 251 if (strchr(domainname, '/')) 252 continue; 253 #endif 254 255 snprintf(buf, len, "%s/%s/%s/%s.mo", dir, p, 256 category, domainname); 257 if (stat(buf, &st) < 0) 258 continue; 259 if ((st.st_mode & S_IFMT) != S_IFREG) 260 continue; 261 262 if (mapit(buf, db) == 0) 263 return buf; 264 } 265 266 return NULL; 267 } 268 269 static u_int32_t 270 flip(v, magic) 271 u_int32_t v; 272 u_int32_t magic; 273 { 274 275 if (magic == MO_MAGIC) 276 return v; 277 else if (magic == MO_MAGIC_SWAPPED) { 278 v = ((v >> 24) & 0xff) | ((v >> 8) & 0xff00) | 279 ((v << 8) & 0xff0000) | ((v << 24) & 0xff000000); 280 return v; 281 } else { 282 abort(); 283 /*NOTREACHED*/ 284 } 285 } 286 287 static int 288 validate(arg, mohandle) 289 void *arg; 290 struct mohandle *mohandle; 291 { 292 char *p; 293 294 p = (char *)arg; 295 if (p < (char *)mohandle->addr || 296 p > (char *)mohandle->addr + mohandle->len) 297 return 0; 298 else 299 return 1; 300 } 301 302 int 303 mapit(path, db) 304 const char *path; 305 struct domainbinding *db; 306 { 307 int fd; 308 struct stat st; 309 char *base; 310 u_int32_t magic, revision; 311 struct moentry *otable, *ttable; 312 struct moentry_h *p; 313 struct mo *mo; 314 size_t l; 315 int i; 316 char *v; 317 struct mohandle *mohandle = &db->mohandle; 318 319 if (mohandle->addr && mohandle->addr != MAP_FAILED && 320 mohandle->mo.mo_magic) 321 return 0; /*already opened*/ 322 323 unmapit(db); 324 325 #if 0 326 if (secure_path(path) != 0) 327 goto fail; 328 #endif 329 if (stat(path, &st) < 0) 330 goto fail; 331 if ((st.st_mode & S_IFMT) != S_IFREG || st.st_size > GETTEXT_MMAP_MAX) 332 goto fail; 333 fd = open(path, O_RDONLY); 334 if (fd < 0) 335 goto fail; 336 if (read(fd, &magic, sizeof(magic)) != sizeof(magic) || 337 (magic != MO_MAGIC && magic != MO_MAGIC_SWAPPED)) { 338 close(fd); 339 goto fail; 340 } 341 if (read(fd, &revision, sizeof(revision)) != sizeof(revision) || 342 flip(revision, magic) != MO_REVISION) { 343 close(fd); 344 goto fail; 345 } 346 mohandle->addr = mmap(NULL, (size_t)st.st_size, PROT_READ, 347 MAP_FILE | MAP_SHARED, fd, (off_t)0); 348 if (!mohandle->addr || mohandle->addr == MAP_FAILED) { 349 close(fd); 350 goto fail; 351 } 352 close(fd); 353 mohandle->len = (size_t)st.st_size; 354 355 base = mohandle->addr; 356 mo = (struct mo *)mohandle->addr; 357 358 /* flip endian. do not flip magic number! */ 359 mohandle->mo.mo_magic = mo->mo_magic; 360 mohandle->mo.mo_revision = flip(mo->mo_revision, magic); 361 mohandle->mo.mo_nstring = flip(mo->mo_nstring, magic); 362 363 /* validate otable/ttable */ 364 otable = (struct moentry *)(base + flip(mo->mo_otable, magic)); 365 ttable = (struct moentry *)(base + flip(mo->mo_ttable, magic)); 366 if (!validate(otable, mohandle) || 367 !validate(&otable[mohandle->mo.mo_nstring], mohandle)) { 368 unmapit(db); 369 goto fail; 370 } 371 if (!validate(ttable, mohandle) || 372 !validate(&ttable[mohandle->mo.mo_nstring], mohandle)) { 373 unmapit(db); 374 goto fail; 375 } 376 377 /* allocate [ot]table, and convert to normal pointer representation. */ 378 l = sizeof(struct moentry_h) * mohandle->mo.mo_nstring; 379 mohandle->mo.mo_otable = (struct moentry_h *)malloc(l); 380 if (!mohandle->mo.mo_otable) { 381 unmapit(db); 382 goto fail; 383 } 384 mohandle->mo.mo_ttable = (struct moentry_h *)malloc(l); 385 if (!mohandle->mo.mo_ttable) { 386 unmapit(db); 387 goto fail; 388 } 389 p = mohandle->mo.mo_otable; 390 for (i = 0; i < mohandle->mo.mo_nstring; i++) { 391 p[i].len = flip(otable[i].len, magic); 392 p[i].off = base + flip(otable[i].off, magic); 393 394 if (!validate(p[i].off, mohandle) || 395 !validate(p[i].off + p[i].len + 1, mohandle)) { 396 unmapit(db); 397 goto fail; 398 } 399 } 400 p = mohandle->mo.mo_ttable; 401 for (i = 0; i < mohandle->mo.mo_nstring; i++) { 402 p[i].len = flip(ttable[i].len, magic); 403 p[i].off = base + flip(ttable[i].off, magic); 404 405 if (!validate(p[i].off, mohandle) || 406 !validate(p[i].off + p[i].len + 1, mohandle)) { 407 unmapit(db); 408 goto fail; 409 } 410 } 411 412 /* grab MIME-header and charset field */ 413 mohandle->mo.mo_header = lookup("", db); 414 if (mohandle->mo.mo_header) 415 v = strstr(mohandle->mo.mo_header, "charset="); 416 else 417 v = NULL; 418 if (v) { 419 mohandle->mo.mo_charset = strdup(v + 8); 420 if (!mohandle->mo.mo_charset) 421 goto fail; 422 v = strchr(mohandle->mo.mo_charset, '\n'); 423 if (v) 424 *v = '\0'; 425 } 426 427 /* 428 * XXX check charset, reject it if we are unable to support the charset 429 * with the current locale. 430 * for example, if we are using euc-jp locale and we are looking at 431 * *.mo file encoded by euc-kr (charset=euc-kr), we should reject 432 * the *.mo file as we cannot support it. 433 */ 434 435 return 0; 436 437 fail: 438 return -1; 439 } 440 441 static int 442 unmapit(db) 443 struct domainbinding *db; 444 { 445 struct mohandle *mohandle = &db->mohandle; 446 447 /* unmap if there's already mapped region */ 448 if (mohandle->addr && mohandle->addr != MAP_FAILED) 449 munmap(mohandle->addr, mohandle->len); 450 mohandle->addr = NULL; 451 if (mohandle->mo.mo_otable) 452 free(mohandle->mo.mo_otable); 453 if (mohandle->mo.mo_ttable) 454 free(mohandle->mo.mo_ttable); 455 if (mohandle->mo.mo_charset) 456 free(mohandle->mo.mo_charset); 457 memset(&mohandle->mo, 0, sizeof(mohandle->mo)); 458 return 0; 459 } 460 461 /* ARGSUSED */ 462 static const char * 463 lookup_hash(msgid, db) 464 const char *msgid; 465 struct domainbinding *db; 466 { 467 468 /* 469 * XXX should try a hashed lookup here, but to do so, we need to 470 * look inside the GPL'ed *.c and re-implement... 471 */ 472 return NULL; 473 } 474 475 static const char * 476 lookup_bsearch(msgid, db) 477 const char *msgid; 478 struct domainbinding *db; 479 { 480 int top, bottom, middle, omiddle; 481 int n; 482 struct mohandle *mohandle = &db->mohandle; 483 484 top = 0; 485 bottom = mohandle->mo.mo_nstring; 486 omiddle = -1; 487 /* CONSTCOND */ 488 while (1) { 489 if (top > bottom) 490 break; 491 middle = (top + bottom) / 2; 492 /* avoid possible infinite loop, when the data is not sorted */ 493 if (omiddle == middle) 494 break; 495 if (middle < 0 || middle >= mohandle->mo.mo_nstring) 496 break; 497 498 n = strcmp(msgid, mohandle->mo.mo_otable[middle].off); 499 if (n == 0) 500 return (const char *)mohandle->mo.mo_ttable[middle].off; 501 else if (n < 0) 502 bottom = middle; 503 else 504 top = middle; 505 omiddle = middle; 506 } 507 508 return NULL; 509 } 510 511 static const char * 512 lookup(msgid, db) 513 const char *msgid; 514 struct domainbinding *db; 515 { 516 const char *v; 517 518 v = lookup_hash(msgid, db); 519 if (v) 520 return v; 521 522 return lookup_bsearch(msgid, db); 523 } 524 525 static const char * 526 get_lang_env(const char *category_name) 527 { 528 const char *lang; 529 530 /* 1. see LANGUAGE variable first. */ 531 lang = getenv("LANGUAGE"); 532 if (lang) 533 return lang; 534 535 /* 2. if LANGUAGE isn't set, see LC_ALL, LC_xxx, LANG. */ 536 lang = getenv("LC_ALL"); 537 if (!lang) 538 lang = getenv(category_name); 539 if (!lang) 540 lang = getenv("LANG"); 541 542 if (!lang) 543 return 0; /* error */ 544 545 return split_locale(lang); 546 } 547 548 char * 549 dcngettext(domainname, msgid1, msgid2, n, category) 550 const char *domainname; 551 const char *msgid1; 552 const char *msgid2; 553 unsigned long int n; 554 int category; 555 { 556 const char *msgid; 557 char path[PATH_MAX]; 558 const char *lpath; 559 static char olpath[PATH_MAX]; 560 const char *cname = NULL; 561 const char *v; 562 static char *ocname = NULL; 563 static char *odomainname = NULL; 564 struct domainbinding *db; 565 566 msgid = (n == 1) ? msgid1 : msgid2; 567 if (msgid == NULL) 568 return NULL; 569 570 if (!domainname) 571 domainname = __current_domainname; 572 cname = lookup_category(category); 573 if (!domainname || !cname) 574 goto fail; 575 576 lpath = get_lang_env(cname); 577 if (!lpath) 578 goto fail; 579 580 for (db = __bindings; db; db = db->next) 581 if (strcmp(db->domainname, domainname) == 0) 582 break; 583 if (!db) { 584 if (!bindtextdomain(domainname, _PATH_TEXTDOMAIN)) 585 goto fail; 586 db = __bindings; 587 } 588 589 /* resolve relative path */ 590 /* XXX not necessary? */ 591 if (db->path[0] != '/') { 592 char buf[PATH_MAX]; 593 594 if (getcwd(buf, sizeof(buf)) == 0) 595 goto fail; 596 if (strlcat(buf, "/", sizeof(buf)) >= sizeof(buf)) 597 goto fail; 598 if (strlcat(buf, db->path, sizeof(buf)) >= sizeof(buf)) 599 goto fail; 600 strlcpy(db->path, buf, sizeof(db->path)); 601 } 602 603 /* don't bother looking it up if the values are the same */ 604 if (odomainname && strcmp(domainname, odomainname) == 0 && 605 ocname && strcmp(cname, ocname) == 0 && strcmp(lpath, olpath) == 0 && 606 db->mohandle.mo.mo_magic) 607 goto found; 608 609 /* try to find appropriate file, from $LANGUAGE */ 610 if (lookup_mofile(path, sizeof(path), db->path, lpath, cname, 611 domainname, db) == NULL) 612 goto fail; 613 614 if (odomainname) 615 free(odomainname); 616 if (ocname) 617 free(ocname); 618 odomainname = strdup(domainname); 619 ocname = strdup(cname); 620 if (!odomainname || !ocname) { 621 if (odomainname) 622 free(odomainname); 623 if (ocname) 624 free(ocname); 625 odomainname = ocname = NULL; 626 } 627 else 628 strlcpy(olpath, lpath, sizeof(olpath)); 629 630 found: 631 v = lookup(msgid, db); 632 if (v) { 633 /* 634 * convert the translated message's encoding. 635 * 636 * special case: 637 * a result of gettext("") shouldn't need any conversion. 638 */ 639 if (msgid[0]) 640 v = __gettext_iconv(v, db); 641 642 /* 643 * Given the amount of printf-format security issues, it may 644 * be a good idea to validate if the original msgid and the 645 * translated message format string carry the same printf-like 646 * format identifiers. 647 */ 648 649 msgid = v; 650 } 651 652 fail: 653 /* LINTED const cast */ 654 return (char *)msgid; 655 } 656