1 /* $NetBSD: gettext.c,v 1.7 2000/12/15 06:37:21 itojun Exp $ */ 2 3 /*- 4 * Copyright (c) 2000 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #if defined(LIBC_SCCS) && !defined(lint) 31 __RCSID("$NetBSD: gettext.c,v 1.7 2000/12/15 06:37:21 itojun Exp $"); 32 #endif /* LIBC_SCCS and not lint */ 33 34 #include <sys/types.h> 35 #include <sys/param.h> 36 #include <sys/stat.h> 37 #include <sys/mman.h> 38 #include <sys/uio.h> 39 40 #include <fcntl.h> 41 #include <stdio.h> 42 #include <stdlib.h> 43 #include <unistd.h> 44 #include <string.h> 45 #if 0 46 #include <util.h> 47 #endif 48 #include <libintl.h> 49 #include <locale.h> 50 #include "libintl_local.h" 51 #include "pathnames.h" 52 53 static struct mohandle mohandle; 54 55 static const char *lookup_category __P((int)); 56 static const char *split_locale __P((const char *)); 57 static const char *lookup_mofile __P((char *, size_t, const char *, 58 char *, const char *, const char *)); 59 static u_int32_t flip __P((u_int32_t, u_int32_t)); 60 static int validate __P((void *)); 61 static int mapit __P((const char *)); 62 static int unmapit __P((void)); 63 static const char *lookup_hash __P((const char *)); 64 static const char *lookup_bsearch __P((const char *)); 65 static const char *lookup __P((const char *)); 66 67 /* 68 * shortcut functions. the main implementation resides in dcngettext(). 69 */ 70 char * 71 gettext(msgid) 72 const char *msgid; 73 { 74 75 return dcngettext(NULL, msgid, NULL, 1UL, LC_MESSAGES); 76 } 77 78 char * 79 dgettext(domainname, msgid) 80 const char *domainname; 81 const char *msgid; 82 { 83 84 return dcngettext(domainname, msgid, NULL, 1UL, LC_MESSAGES); 85 } 86 87 char * 88 dcgettext(domainname, msgid, category) 89 const char *domainname; 90 const char *msgid; 91 int category; 92 { 93 94 return dcngettext(domainname, msgid, NULL, 1UL, category); 95 } 96 97 char * 98 ngettext(msgid1, msgid2, n) 99 const char *msgid1; 100 const char *msgid2; 101 unsigned long int n; 102 { 103 104 return dcngettext(NULL, msgid1, msgid2, n, LC_MESSAGES); 105 } 106 107 char * 108 dngettext(domainname, msgid1, msgid2, n) 109 const char *domainname; 110 const char *msgid1; 111 const char *msgid2; 112 unsigned long int n; 113 { 114 115 return dcngettext(domainname, msgid1, msgid2, n, LC_MESSAGES); 116 } 117 118 /* 119 * dcngettext() - 120 * lookup internationalized message on database locale/category/domainname 121 * (like ja_JP.eucJP/LC_MESSAGES/domainname). 122 * if n equals to 1, internationalized message will be looked up for msgid1. 123 * otherwise, message will be looked up for msgid2. 124 * if the lookup fails, the function will return msgid1 or msgid2 as is. 125 * 126 * Even though the return type is "char *", caller should not rewrite the 127 * region pointed to by the return value (should be "const char *", but can't 128 * change it for compatibility with other implementations). 129 * 130 * by default (if domainname == NULL), domainname is taken from the value set 131 * by textdomain(). usually name of the application (like "ls") is used as 132 * domainname. category is usually LC_MESSAGES. 133 * 134 * the code reads in *.mo files generated by GNU gettext. *.mo is a host- 135 * endian encoded file. both endians are supported here, as the files are in 136 * /usr/share/locale! (or we should move those files into /usr/libdata) 137 */ 138 139 static const char * 140 lookup_category(category) 141 int category; 142 { 143 144 switch (category) { 145 case LC_COLLATE: return "LC_COLLATE"; 146 case LC_CTYPE: return "LC_CTYPE"; 147 case LC_MONETARY: return "LC_MONETARY"; 148 case LC_NUMERIC: return "LC_NUMERIC"; 149 case LC_TIME: return "LC_TIME"; 150 case LC_MESSAGES: return "LC_MESSAGES"; 151 } 152 return NULL; 153 } 154 155 /* 156 * XPG syntax: language[_territory[.codeset]][@modifier] 157 * XXX boundary check on "result" is lacking 158 */ 159 static const char * 160 split_locale(lname) 161 const char *lname; 162 { 163 char buf[BUFSIZ], tmp[BUFSIZ]; 164 char *l, *t, *c, *m; 165 static char result[BUFSIZ]; 166 167 memset(result, 0, sizeof(result)); 168 169 if (strlen(lname) + 1 > sizeof(buf)) { 170 fail: 171 return lname; 172 } 173 174 strlcpy(buf, lname, sizeof(buf)); 175 m = strrchr(buf, '@'); 176 if (m) 177 *m++ = '\0'; 178 c = strrchr(buf, '.'); 179 if (c) 180 *c++ = '\0'; 181 t = strrchr(buf, '_'); 182 if (t) 183 *t++ = '\0'; 184 l = buf; 185 if (strlen(l) == 0) 186 goto fail; 187 if (c && !t) 188 goto fail; 189 190 if (m) { 191 if (t) { 192 if (c) { 193 snprintf(tmp, sizeof(tmp), "%s_%s.%s@%s", 194 l, t, c, m); 195 strlcat(result, tmp, sizeof(result)); 196 strlcat(result, ":", sizeof(result)); 197 } 198 snprintf(tmp, sizeof(tmp), "%s_%s@%s", l, t, m); 199 strlcat(result, tmp, sizeof(result)); 200 strlcat(result, ":", sizeof(result)); 201 } 202 snprintf(tmp, sizeof(tmp), "%s@%s", l, m); 203 strlcat(result, tmp, sizeof(result)); 204 strlcat(result, ":", sizeof(result)); 205 } 206 if (t) { 207 if (c) { 208 snprintf(tmp, sizeof(tmp), "%s_%s.%s", l, t, c); 209 strlcat(result, tmp, sizeof(result)); 210 strlcat(result, ":", sizeof(result)); 211 } 212 strlcat(result, tmp, sizeof(result)); 213 strlcat(result, ":", sizeof(result)); 214 } 215 strlcat(result, l, sizeof(result)); 216 217 return result; 218 } 219 220 static const char * 221 lookup_mofile(buf, len, dir, lpath, category, domainname) 222 char *buf; 223 size_t len; 224 const char *dir; 225 char *lpath; /* list of locales to be tried */ 226 const char *category; 227 const char *domainname; 228 { 229 struct stat st; 230 char *p, *q; 231 232 q = lpath; 233 while (1) { 234 p = strsep(&q, ":"); 235 if (!p) 236 break; 237 if (!*p) 238 continue; 239 240 /* don't mess with default locales */ 241 if (strcmp(p, "C") == 0 || strcmp(p, "POSIX") == 0) 242 return NULL; 243 244 /* validate pathname */ 245 if (strchr(p, '/') || strchr(category, '/')) 246 continue; 247 #if 1 /*?*/ 248 if (strchr(domainname, '/')) 249 continue; 250 #endif 251 252 snprintf(buf, len, "%s/%s/%s/%s.mo", dir, p, 253 category, domainname); 254 if (stat(buf, &st) < 0) 255 continue; 256 if ((st.st_mode & S_IFMT) != S_IFREG) 257 continue; 258 259 if (mapit(buf) == 0) 260 return buf; 261 } 262 263 return NULL; 264 } 265 266 static u_int32_t 267 flip(v, magic) 268 u_int32_t v; 269 u_int32_t magic; 270 { 271 272 if (magic == MO_MAGIC) 273 return v; 274 else if (magic == MO_MAGIC_SWAPPED) { 275 v = ((v >> 24) & 0xff) | ((v >> 8) & 0xff00) | 276 ((v << 8) & 0xff0000) | ((v << 24) & 0xff000000); 277 return v; 278 } else { 279 abort(); 280 /*NOTREACHED*/ 281 } 282 } 283 284 static int 285 validate(arg) 286 void *arg; 287 { 288 char *p; 289 290 p = (char *)arg; 291 if (p < (char *)mohandle.addr || 292 p > (char *)mohandle.addr + mohandle.len) 293 return 0; 294 else 295 return 1; 296 } 297 298 int 299 mapit(path) 300 const char *path; 301 { 302 int fd; 303 struct stat st; 304 char *base; 305 u_int32_t magic, revision; 306 struct moentry *otable, *ttable; 307 struct moentry_h *p; 308 struct mo *mo; 309 size_t l; 310 int i; 311 char *v; 312 313 if (mohandle.addr && mohandle.addr != MAP_FAILED && 314 strcmp(path, mohandle.path) == 0) 315 return 0; /*already opened*/ 316 317 unmapit(); 318 319 #if 0 320 if (secure_path(path) != 0) 321 goto fail; 322 #endif 323 if (stat(path, &st) < 0) 324 goto fail; 325 if ((st.st_mode & S_IFMT) != S_IFREG || st.st_size > GETTEXT_MMAP_MAX) 326 goto fail; 327 fd = open(path, O_RDONLY); 328 if (fd < 0) 329 goto fail; 330 if (read(fd, &magic, sizeof(magic)) != sizeof(magic) || 331 (magic != MO_MAGIC && magic != MO_MAGIC_SWAPPED)) { 332 close(fd); 333 goto fail; 334 } 335 if (read(fd, &revision, sizeof(revision)) != sizeof(revision) || 336 flip(revision, magic) != MO_REVISION) { 337 close(fd); 338 goto fail; 339 } 340 mohandle.addr = mmap(NULL, (size_t)st.st_size, PROT_READ, 341 MAP_FILE | MAP_SHARED, fd, (off_t)0); 342 if (!mohandle.addr || mohandle.addr == MAP_FAILED) { 343 close(fd); 344 goto fail; 345 } 346 close(fd); 347 mohandle.len = (size_t)st.st_size; 348 strlcpy(mohandle.path, path, sizeof(mohandle.path)); 349 350 base = mohandle.addr; 351 mo = (struct mo *)mohandle.addr; 352 353 /* flip endian. do not flip magic number! */ 354 mohandle.mo.mo_magic = mo->mo_magic; 355 mohandle.mo.mo_revision = flip(mo->mo_revision, magic); 356 mohandle.mo.mo_nstring = flip(mo->mo_nstring, magic); 357 358 /* validate otable/ttable */ 359 otable = (struct moentry *)(base + flip(mo->mo_otable, magic)); 360 ttable = (struct moentry *)(base + flip(mo->mo_ttable, magic)); 361 if (!validate(otable) || !validate(&otable[mohandle.mo.mo_nstring])) { 362 unmapit(); 363 goto fail; 364 } 365 if (!validate(ttable) || !validate(&ttable[mohandle.mo.mo_nstring])) { 366 unmapit(); 367 goto fail; 368 } 369 370 /* allocate [ot]table, and convert to normal pointer representation. */ 371 l = sizeof(struct moentry_h) * mohandle.mo.mo_nstring; 372 mohandle.mo.mo_otable = (struct moentry_h *)malloc(l); 373 if (!mohandle.mo.mo_otable) { 374 unmapit(); 375 goto fail; 376 } 377 mohandle.mo.mo_ttable = (struct moentry_h *)malloc(l); 378 if (!mohandle.mo.mo_ttable) { 379 unmapit(); 380 goto fail; 381 } 382 p = mohandle.mo.mo_otable; 383 for (i = 0; i < mohandle.mo.mo_nstring; i++) { 384 p[i].len = flip(otable[i].len, magic); 385 p[i].off = base + flip(otable[i].off, magic); 386 387 if (!validate(p[i].off) || !validate(p[i].off + p[i].len + 1)) { 388 unmapit(); 389 goto fail; 390 } 391 } 392 p = mohandle.mo.mo_ttable; 393 for (i = 0; i < mohandle.mo.mo_nstring; i++) { 394 p[i].len = flip(ttable[i].len, magic); 395 p[i].off = base + flip(ttable[i].off, magic); 396 397 if (!validate(p[i].off) || !validate(p[i].off + p[i].len + 1)) { 398 unmapit(); 399 goto fail; 400 } 401 } 402 403 /* grab MIME-header and charset field */ 404 mohandle.mo.mo_header = lookup(""); 405 if (mohandle.mo.mo_header) 406 v = strstr(mohandle.mo.mo_header, "charset="); 407 else 408 v = NULL; 409 if (v) { 410 mohandle.mo.mo_charset = strdup(v + 8); 411 if (!mohandle.mo.mo_charset) 412 goto fail; 413 v = strchr(mohandle.mo.mo_charset, '\n'); 414 if (v) 415 *v = '\0'; 416 } 417 418 /* 419 * XXX check charset, reject it if we are unable to support the charset 420 * with the current locale. 421 * for example, if we are using euc-jp locale and we are looking at 422 * *.mo file encoded by euc-kr (charset=euc-kr), we should reject 423 * the *.mo file as we cannot support it. 424 */ 425 426 return 0; 427 428 fail: 429 return -1; 430 } 431 432 static int 433 unmapit() 434 { 435 436 /* unmap if there's already mapped region */ 437 if (mohandle.addr && mohandle.addr != MAP_FAILED) 438 munmap(mohandle.addr, mohandle.len); 439 mohandle.addr = NULL; 440 mohandle.path[0] = '\0'; 441 if (mohandle.mo.mo_otable) 442 free(mohandle.mo.mo_otable); 443 if (mohandle.mo.mo_ttable) 444 free(mohandle.mo.mo_ttable); 445 if (mohandle.mo.mo_charset) 446 free(mohandle.mo.mo_charset); 447 memset(&mohandle.mo, 0, sizeof(mohandle.mo)); 448 return 0; 449 } 450 451 static const char * 452 lookup_hash(msgid) 453 const char *msgid; 454 { 455 456 /* 457 * XXX should try a hashed lookup here, but to do so, we need to 458 * look inside the GPL'ed *.c and re-implement... 459 */ 460 return NULL; 461 } 462 463 static const char * 464 lookup_bsearch(msgid) 465 const char *msgid; 466 { 467 int top, bottom, middle, omiddle; 468 int n; 469 470 top = 0; 471 bottom = mohandle.mo.mo_nstring; 472 omiddle = -1; 473 while (1) { 474 if (top > bottom) 475 break; 476 middle = (top + bottom) / 2; 477 /* avoid possible infinite loop, when the data is not sorted */ 478 if (omiddle == middle) 479 break; 480 if (middle < 0 || middle >= mohandle.mo.mo_nstring) 481 break; 482 483 n = strcmp(msgid, mohandle.mo.mo_otable[middle].off); 484 if (n == 0) 485 return (const char *)mohandle.mo.mo_ttable[middle].off; 486 else if (n < 0) 487 bottom = middle; 488 else 489 top = middle; 490 omiddle = middle; 491 } 492 493 return NULL; 494 } 495 496 static const char * 497 lookup(msgid) 498 const char *msgid; 499 { 500 const char *v; 501 502 v = lookup_hash(msgid); 503 if (v) 504 return v; 505 506 return lookup_bsearch(msgid); 507 } 508 509 char * 510 dcngettext(domainname, msgid1, msgid2, n, category) 511 const char *domainname; 512 const char *msgid1; 513 const char *msgid2; 514 unsigned long int n; 515 int category; 516 { 517 const char *msgid; 518 char path[PATH_MAX]; 519 static char lpath[PATH_MAX]; 520 static char olpath[PATH_MAX]; 521 const char *locale; 522 const char *language; 523 const char *cname = NULL; 524 const char *v; 525 static char *ocname = NULL; 526 static char *odomainname = NULL; 527 struct domainbinding *db; 528 529 msgid = (n == 1) ? msgid1 : msgid2; 530 531 if (!domainname) 532 domainname = __binding.domainname; 533 cname = lookup_category(category); 534 if (!domainname || !cname) 535 goto fail; 536 537 language = getenv("LANGUAGE"); 538 locale = setlocale(LC_MESSAGES, NULL); /*XXX*/ 539 if (locale) 540 locale = split_locale(locale); 541 if (language && locale) { 542 if (strlen(language) + strlen(locale) + 2 > sizeof(lpath)) 543 goto fail; 544 snprintf(lpath, sizeof(lpath), "%s:%s", language, locale); 545 } else if (language) { 546 if (strlen(language) + 1 > sizeof(lpath)) 547 goto fail; 548 strlcpy(lpath, language, sizeof(lpath)); 549 } else if (locale) { 550 if (strlen(locale) + 1 > sizeof(lpath)) 551 goto fail; 552 strlcpy(lpath, locale, sizeof(lpath)); 553 } else 554 goto fail; 555 556 for (db = __binding.next; db; db = db->next) 557 if (strcmp(db->domainname, domainname) == 0) 558 break; 559 if (!db) 560 db = &__binding; 561 562 /* don't bother looking it up if the values are the same */ 563 if (odomainname && strcmp(domainname, odomainname) == 0 && 564 ocname && strcmp(cname, ocname) == 0 && strcmp(lpath, olpath) == 0) 565 goto found; 566 567 /* try to find appropriate file, from $LANGUAGE */ 568 if (lookup_mofile(path, sizeof(path), db->path, lpath, cname, 569 domainname) == NULL) 570 goto fail; 571 572 if (odomainname) 573 free(odomainname); 574 if (ocname) 575 free(ocname); 576 odomainname = strdup(domainname); 577 ocname = strdup(cname); 578 if (!odomainname || !ocname) { 579 if (odomainname) 580 free(odomainname); 581 if (ocname) 582 free(ocname); 583 odomainname = ocname = NULL; 584 goto fail; 585 } 586 587 strlcpy(olpath, lpath, sizeof(olpath)); 588 589 found: 590 v = lookup(msgid); 591 if (v) { 592 /* 593 * XXX call iconv() here, if translated text is encoded 594 * differently from currently-selected encoding (locale). 595 * look at Content-type header in *.mo file, in string obtained 596 * by gettext(""). 597 */ 598 599 /* 600 * Given the amount of printf-format security issues, it may 601 * be a good idea to validate if the original msgid and the 602 * translated message format string carry the same printf-like 603 * format identifiers. 604 */ 605 606 msgid = v; 607 } 608 609 fail: 610 /* LINTED const cast */ 611 return (char *)msgid; 612 } 613