1 /* $NetBSD: gettext.c,v 1.13 2002/02/13 08:01:13 yamt Exp $ */ 2 3 /*- 4 * Copyright (c) 2000, 2001 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $Citrus: xpg4dl/FreeBSD/lib/libintl/gettext.c,v 1.31 2001/09/27 15:18:45 yamt Exp $ 29 */ 30 31 #include <sys/cdefs.h> 32 #if defined(LIBC_SCCS) && !defined(lint) 33 __RCSID("$NetBSD: gettext.c,v 1.13 2002/02/13 08:01:13 yamt Exp $"); 34 #endif /* LIBC_SCCS and not lint */ 35 36 #include <sys/types.h> 37 #include <sys/param.h> 38 #include <sys/stat.h> 39 #include <sys/mman.h> 40 #include <sys/uio.h> 41 42 #include <fcntl.h> 43 #include <stdio.h> 44 #include <stdlib.h> 45 #include <unistd.h> 46 #include <string.h> 47 #if 0 48 #include <util.h> 49 #endif 50 #include <libintl.h> 51 #include <locale.h> 52 #include "libintl_local.h" 53 #include "pathnames.h" 54 55 static const char *lookup_category __P((int)); 56 static const char *split_locale __P((const char *)); 57 static const char *lookup_mofile __P((char *, size_t, const char *, 58 const char *, const char *, const char *, struct domainbinding *)); 59 static u_int32_t flip __P((u_int32_t, u_int32_t)); 60 static int validate __P((void *, struct mohandle *)); 61 static int mapit __P((const char *, struct domainbinding *)); 62 static int unmapit __P((struct domainbinding *)); 63 static const char *lookup_hash __P((const char *, struct domainbinding *)); 64 static const char *lookup_bsearch __P((const char *, struct domainbinding *)); 65 static const char *lookup __P((const char *, struct domainbinding *)); 66 static const char *get_lang_env(const char *); 67 68 /* 69 * shortcut functions. the main implementation resides in dcngettext(). 70 */ 71 char * 72 gettext(msgid) 73 const char *msgid; 74 { 75 76 return dcngettext(NULL, msgid, NULL, 1UL, LC_MESSAGES); 77 } 78 79 char * 80 dgettext(domainname, msgid) 81 const char *domainname; 82 const char *msgid; 83 { 84 85 return dcngettext(domainname, msgid, NULL, 1UL, LC_MESSAGES); 86 } 87 88 char * 89 dcgettext(domainname, msgid, category) 90 const char *domainname; 91 const char *msgid; 92 int category; 93 { 94 95 return dcngettext(domainname, msgid, NULL, 1UL, category); 96 } 97 98 char * 99 ngettext(msgid1, msgid2, n) 100 const char *msgid1; 101 const char *msgid2; 102 unsigned long int n; 103 { 104 105 return dcngettext(NULL, msgid1, msgid2, n, LC_MESSAGES); 106 } 107 108 char * 109 dngettext(domainname, msgid1, msgid2, n) 110 const char *domainname; 111 const char *msgid1; 112 const char *msgid2; 113 unsigned long int n; 114 { 115 116 return dcngettext(domainname, msgid1, msgid2, n, LC_MESSAGES); 117 } 118 119 /* 120 * dcngettext() - 121 * lookup internationalized message on database locale/category/domainname 122 * (like ja_JP.eucJP/LC_MESSAGES/domainname). 123 * if n equals to 1, internationalized message will be looked up for msgid1. 124 * otherwise, message will be looked up for msgid2. 125 * if the lookup fails, the function will return msgid1 or msgid2 as is. 126 * 127 * Even though the return type is "char *", caller should not rewrite the 128 * region pointed to by the return value (should be "const char *", but can't 129 * change it for compatibility with other implementations). 130 * 131 * by default (if domainname == NULL), domainname is taken from the value set 132 * by textdomain(). usually name of the application (like "ls") is used as 133 * domainname. category is usually LC_MESSAGES. 134 * 135 * the code reads in *.mo files generated by GNU gettext. *.mo is a host- 136 * endian encoded file. both endians are supported here, as the files are in 137 * /usr/share/locale! (or we should move those files into /usr/libdata) 138 */ 139 140 static const char * 141 lookup_category(category) 142 int category; 143 { 144 145 switch (category) { 146 case LC_COLLATE: return "LC_COLLATE"; 147 case LC_CTYPE: return "LC_CTYPE"; 148 case LC_MONETARY: return "LC_MONETARY"; 149 case LC_NUMERIC: return "LC_NUMERIC"; 150 case LC_TIME: return "LC_TIME"; 151 case LC_MESSAGES: return "LC_MESSAGES"; 152 } 153 return NULL; 154 } 155 156 /* 157 * XPG syntax: language[_territory[.codeset]][@modifier] 158 * XXX boundary check on "result" is lacking 159 */ 160 static const char * 161 split_locale(lname) 162 const char *lname; 163 { 164 char buf[BUFSIZ], tmp[BUFSIZ]; 165 char *l, *t, *c, *m; 166 static char result[BUFSIZ]; 167 168 memset(result, 0, sizeof(result)); 169 170 if (strlen(lname) + 1 > sizeof(buf)) { 171 fail: 172 return lname; 173 } 174 175 strlcpy(buf, lname, sizeof(buf)); 176 m = strrchr(buf, '@'); 177 if (m) 178 *m++ = '\0'; 179 c = strrchr(buf, '.'); 180 if (c) 181 *c++ = '\0'; 182 t = strrchr(buf, '_'); 183 if (t) 184 *t++ = '\0'; 185 l = buf; 186 if (strlen(l) == 0) 187 goto fail; 188 if (c && !t) 189 goto fail; 190 191 if (m) { 192 if (t) { 193 if (c) { 194 snprintf(tmp, sizeof(tmp), "%s_%s.%s@%s", 195 l, t, c, m); 196 strlcat(result, tmp, sizeof(result)); 197 strlcat(result, ":", sizeof(result)); 198 } 199 snprintf(tmp, sizeof(tmp), "%s_%s@%s", l, t, m); 200 strlcat(result, tmp, sizeof(result)); 201 strlcat(result, ":", sizeof(result)); 202 } 203 snprintf(tmp, sizeof(tmp), "%s@%s", l, m); 204 strlcat(result, tmp, sizeof(result)); 205 strlcat(result, ":", sizeof(result)); 206 } 207 if (t) { 208 if (c) { 209 snprintf(tmp, sizeof(tmp), "%s_%s.%s", l, t, c); 210 strlcat(result, tmp, sizeof(result)); 211 strlcat(result, ":", sizeof(result)); 212 } 213 snprintf(tmp, sizeof(tmp), "%s_%s", l, t); 214 strlcat(result, tmp, sizeof(result)); 215 strlcat(result, ":", sizeof(result)); 216 } 217 strlcat(result, l, sizeof(result)); 218 219 return result; 220 } 221 222 static const char * 223 lookup_mofile(buf, len, dir, lpath, category, domainname, db) 224 char *buf; 225 size_t len; 226 const char *dir; 227 const char *lpath; /* list of locales to be tried */ 228 const char *category; 229 const char *domainname; 230 struct domainbinding *db; 231 { 232 struct stat st; 233 char *p, *q; 234 char lpath_tmp[BUFSIZ]; 235 236 strlcpy(lpath_tmp, lpath, sizeof(lpath_tmp)); 237 q = lpath_tmp; 238 /* CONSTCOND */ 239 while (1) { 240 p = strsep(&q, ":"); 241 if (!p) 242 break; 243 if (!*p) 244 continue; 245 246 /* don't mess with default locales */ 247 if (strcmp(p, "C") == 0 || strcmp(p, "POSIX") == 0) 248 return NULL; 249 250 /* validate pathname */ 251 if (strchr(p, '/') || strchr(category, '/')) 252 continue; 253 #if 1 /*?*/ 254 if (strchr(domainname, '/')) 255 continue; 256 #endif 257 258 snprintf(buf, len, "%s/%s/%s/%s.mo", dir, p, 259 category, domainname); 260 if (stat(buf, &st) < 0) 261 continue; 262 if ((st.st_mode & S_IFMT) != S_IFREG) 263 continue; 264 265 if (mapit(buf, db) == 0) 266 return buf; 267 } 268 269 return NULL; 270 } 271 272 static u_int32_t 273 flip(v, magic) 274 u_int32_t v; 275 u_int32_t magic; 276 { 277 278 if (magic == MO_MAGIC) 279 return v; 280 else if (magic == MO_MAGIC_SWAPPED) { 281 v = ((v >> 24) & 0xff) | ((v >> 8) & 0xff00) | 282 ((v << 8) & 0xff0000) | ((v << 24) & 0xff000000); 283 return v; 284 } else { 285 abort(); 286 /*NOTREACHED*/ 287 } 288 } 289 290 static int 291 validate(arg, mohandle) 292 void *arg; 293 struct mohandle *mohandle; 294 { 295 char *p; 296 297 p = (char *)arg; 298 if (p < (char *)mohandle->addr || 299 p > (char *)mohandle->addr + mohandle->len) 300 return 0; 301 else 302 return 1; 303 } 304 305 int 306 mapit(path, db) 307 const char *path; 308 struct domainbinding *db; 309 { 310 int fd; 311 struct stat st; 312 char *base; 313 u_int32_t magic, revision; 314 struct moentry *otable, *ttable; 315 struct moentry_h *p; 316 struct mo *mo; 317 size_t l; 318 int i; 319 char *v; 320 struct mohandle *mohandle = &db->mohandle; 321 322 if (mohandle->addr && mohandle->addr != MAP_FAILED && 323 mohandle->mo.mo_magic) 324 return 0; /*already opened*/ 325 326 unmapit(db); 327 328 #if 0 329 if (secure_path(path) != 0) 330 goto fail; 331 #endif 332 if (stat(path, &st) < 0) 333 goto fail; 334 if ((st.st_mode & S_IFMT) != S_IFREG || st.st_size > GETTEXT_MMAP_MAX) 335 goto fail; 336 fd = open(path, O_RDONLY); 337 if (fd < 0) 338 goto fail; 339 if (read(fd, &magic, sizeof(magic)) != sizeof(magic) || 340 (magic != MO_MAGIC && magic != MO_MAGIC_SWAPPED)) { 341 close(fd); 342 goto fail; 343 } 344 if (read(fd, &revision, sizeof(revision)) != sizeof(revision) || 345 flip(revision, magic) != MO_REVISION) { 346 close(fd); 347 goto fail; 348 } 349 mohandle->addr = mmap(NULL, (size_t)st.st_size, PROT_READ, 350 MAP_FILE | MAP_SHARED, fd, (off_t)0); 351 if (!mohandle->addr || mohandle->addr == MAP_FAILED) { 352 close(fd); 353 goto fail; 354 } 355 close(fd); 356 mohandle->len = (size_t)st.st_size; 357 358 base = mohandle->addr; 359 mo = (struct mo *)mohandle->addr; 360 361 /* flip endian. do not flip magic number! */ 362 mohandle->mo.mo_magic = mo->mo_magic; 363 mohandle->mo.mo_revision = flip(mo->mo_revision, magic); 364 mohandle->mo.mo_nstring = flip(mo->mo_nstring, magic); 365 366 /* validate otable/ttable */ 367 otable = (struct moentry *)(base + flip(mo->mo_otable, magic)); 368 ttable = (struct moentry *)(base + flip(mo->mo_ttable, magic)); 369 if (!validate(otable, mohandle) || 370 !validate(&otable[mohandle->mo.mo_nstring], mohandle)) { 371 unmapit(db); 372 goto fail; 373 } 374 if (!validate(ttable, mohandle) || 375 !validate(&ttable[mohandle->mo.mo_nstring], mohandle)) { 376 unmapit(db); 377 goto fail; 378 } 379 380 /* allocate [ot]table, and convert to normal pointer representation. */ 381 l = sizeof(struct moentry_h) * mohandle->mo.mo_nstring; 382 mohandle->mo.mo_otable = (struct moentry_h *)malloc(l); 383 if (!mohandle->mo.mo_otable) { 384 unmapit(db); 385 goto fail; 386 } 387 mohandle->mo.mo_ttable = (struct moentry_h *)malloc(l); 388 if (!mohandle->mo.mo_ttable) { 389 unmapit(db); 390 goto fail; 391 } 392 p = mohandle->mo.mo_otable; 393 for (i = 0; i < mohandle->mo.mo_nstring; i++) { 394 p[i].len = flip(otable[i].len, magic); 395 p[i].off = base + flip(otable[i].off, magic); 396 397 if (!validate(p[i].off, mohandle) || 398 !validate(p[i].off + p[i].len + 1, mohandle)) { 399 unmapit(db); 400 goto fail; 401 } 402 } 403 p = mohandle->mo.mo_ttable; 404 for (i = 0; i < mohandle->mo.mo_nstring; i++) { 405 p[i].len = flip(ttable[i].len, magic); 406 p[i].off = base + flip(ttable[i].off, magic); 407 408 if (!validate(p[i].off, mohandle) || 409 !validate(p[i].off + p[i].len + 1, mohandle)) { 410 unmapit(db); 411 goto fail; 412 } 413 } 414 415 /* grab MIME-header and charset field */ 416 mohandle->mo.mo_header = lookup("", db); 417 if (mohandle->mo.mo_header) 418 v = strstr(mohandle->mo.mo_header, "charset="); 419 else 420 v = NULL; 421 if (v) { 422 mohandle->mo.mo_charset = strdup(v + 8); 423 if (!mohandle->mo.mo_charset) 424 goto fail; 425 v = strchr(mohandle->mo.mo_charset, '\n'); 426 if (v) 427 *v = '\0'; 428 } 429 430 /* 431 * XXX check charset, reject it if we are unable to support the charset 432 * with the current locale. 433 * for example, if we are using euc-jp locale and we are looking at 434 * *.mo file encoded by euc-kr (charset=euc-kr), we should reject 435 * the *.mo file as we cannot support it. 436 */ 437 438 return 0; 439 440 fail: 441 return -1; 442 } 443 444 static int 445 unmapit(db) 446 struct domainbinding *db; 447 { 448 struct mohandle *mohandle = &db->mohandle; 449 450 /* unmap if there's already mapped region */ 451 if (mohandle->addr && mohandle->addr != MAP_FAILED) 452 munmap(mohandle->addr, mohandle->len); 453 mohandle->addr = NULL; 454 if (mohandle->mo.mo_otable) 455 free(mohandle->mo.mo_otable); 456 if (mohandle->mo.mo_ttable) 457 free(mohandle->mo.mo_ttable); 458 if (mohandle->mo.mo_charset) 459 free(mohandle->mo.mo_charset); 460 memset(&mohandle->mo, 0, sizeof(mohandle->mo)); 461 return 0; 462 } 463 464 /* ARGSUSED */ 465 static const char * 466 lookup_hash(msgid, db) 467 const char *msgid; 468 struct domainbinding *db; 469 { 470 471 /* 472 * XXX should try a hashed lookup here, but to do so, we need to 473 * look inside the GPL'ed *.c and re-implement... 474 */ 475 return NULL; 476 } 477 478 static const char * 479 lookup_bsearch(msgid, db) 480 const char *msgid; 481 struct domainbinding *db; 482 { 483 int top, bottom, middle, omiddle; 484 int n; 485 struct mohandle *mohandle = &db->mohandle; 486 487 top = 0; 488 bottom = mohandle->mo.mo_nstring; 489 omiddle = -1; 490 /* CONSTCOND */ 491 while (1) { 492 if (top > bottom) 493 break; 494 middle = (top + bottom) / 2; 495 /* avoid possible infinite loop, when the data is not sorted */ 496 if (omiddle == middle) 497 break; 498 if (middle < 0 || middle >= mohandle->mo.mo_nstring) 499 break; 500 501 n = strcmp(msgid, mohandle->mo.mo_otable[middle].off); 502 if (n == 0) 503 return (const char *)mohandle->mo.mo_ttable[middle].off; 504 else if (n < 0) 505 bottom = middle; 506 else 507 top = middle; 508 omiddle = middle; 509 } 510 511 return NULL; 512 } 513 514 static const char * 515 lookup(msgid, db) 516 const char *msgid; 517 struct domainbinding *db; 518 { 519 const char *v; 520 521 v = lookup_hash(msgid, db); 522 if (v) 523 return v; 524 525 return lookup_bsearch(msgid, db); 526 } 527 528 static const char *get_lang_env(const char *category_name) 529 { 530 const char *lang; 531 532 /* 1. see LANGUAGE variable first. */ 533 lang = getenv("LANGUAGE"); 534 if (lang) 535 return lang; 536 537 /* 2. if LANGUAGE isn't set, see LC_ALL, LC_xxx, LANG. */ 538 lang = getenv("LC_ALL"); 539 if (!lang) 540 lang = getenv(category_name); 541 if (!lang) 542 lang = getenv("LANG"); 543 544 if (!lang) 545 return 0; /* error */ 546 547 return split_locale(lang); 548 } 549 550 char * 551 dcngettext(domainname, msgid1, msgid2, n, category) 552 const char *domainname; 553 const char *msgid1; 554 const char *msgid2; 555 unsigned long int n; 556 int category; 557 { 558 const char *msgid; 559 char path[PATH_MAX]; 560 const char *lpath; 561 static char olpath[PATH_MAX]; 562 const char *cname = NULL; 563 const char *v; 564 static char *ocname = NULL; 565 static char *odomainname = NULL; 566 struct domainbinding *db; 567 568 msgid = (n == 1) ? msgid1 : msgid2; 569 if (msgid == NULL) 570 return NULL; 571 572 if (!domainname) 573 domainname = __current_domainname; 574 cname = lookup_category(category); 575 if (!domainname || !cname) 576 goto fail; 577 578 lpath = get_lang_env(cname); 579 if (!lpath) 580 goto fail; 581 582 for (db = __bindings; db; db = db->next) 583 if (strcmp(db->domainname, domainname) == 0) 584 break; 585 if (!db) { 586 if (!bindtextdomain(domainname, _PATH_TEXTDOMAIN)) 587 goto fail; 588 db = __bindings; 589 } 590 591 /* resolve relative path */ 592 /* XXX not necessary? */ 593 if (db->path[0] != '/') { 594 char buf[PATH_MAX]; 595 596 if (getcwd(buf, sizeof(buf)) == 0) 597 goto fail; 598 if (strlcat(buf, "/", sizeof(buf)) >= sizeof(buf)) 599 goto fail; 600 if (strlcat(buf, db->path, sizeof(buf)) >= sizeof(buf)) 601 goto fail; 602 strcpy(db->path, buf); 603 } 604 605 /* don't bother looking it up if the values are the same */ 606 if (odomainname && strcmp(domainname, odomainname) == 0 && 607 ocname && strcmp(cname, ocname) == 0 && strcmp(lpath, olpath) == 0 && 608 db->mohandle.mo.mo_magic) 609 goto found; 610 611 /* try to find appropriate file, from $LANGUAGE */ 612 if (lookup_mofile(path, sizeof(path), db->path, lpath, cname, 613 domainname, db) == NULL) 614 goto fail; 615 616 if (odomainname) 617 free(odomainname); 618 if (ocname) 619 free(ocname); 620 odomainname = strdup(domainname); 621 ocname = strdup(cname); 622 if (!odomainname || !ocname) { 623 if (odomainname) 624 free(odomainname); 625 if (ocname) 626 free(ocname); 627 odomainname = ocname = NULL; 628 } 629 else 630 strlcpy(olpath, lpath, sizeof(olpath)); 631 632 found: 633 v = lookup(msgid, db); 634 if (v) { 635 /* 636 * XXX call iconv() here, if translated text is encoded 637 * differently from currently-selected encoding (locale). 638 * look at Content-type header in *.mo file, in string obtained 639 * by gettext(""). 640 */ 641 642 /* 643 * Given the amount of printf-format security issues, it may 644 * be a good idea to validate if the original msgid and the 645 * translated message format string carry the same printf-like 646 * format identifiers. 647 */ 648 649 msgid = v; 650 } 651 652 fail: 653 /* LINTED const cast */ 654 return (char *)msgid; 655 } 656