1 /* $NetBSD: gettext.c,v 1.32 2024/04/13 02:01:38 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2000, 2001 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $Citrus: xpg4dl/FreeBSD/lib/libintl/gettext.c,v 1.31 2001/09/27 15:18:45 yamt Exp $ 29 */ 30 31 #include <sys/cdefs.h> 32 __RCSID("$NetBSD: gettext.c,v 1.32 2024/04/13 02:01:38 christos Exp $"); 33 34 #include <sys/param.h> 35 #include <sys/stat.h> 36 #include <sys/mman.h> 37 #include <sys/uio.h> 38 39 #include <assert.h> 40 #include <fcntl.h> 41 #include <stdio.h> 42 #include <stdlib.h> 43 #include <unistd.h> 44 #include <string.h> 45 #if 0 46 #include <util.h> 47 #endif 48 #include <libintl.h> 49 #include <locale.h> 50 #include "libintl_local.h" 51 #include "plural_parser.h" 52 #include "pathnames.h" 53 54 /* GNU gettext added a hack to add some context to messages. If a message is 55 * used in multiple locations, it needs some amount of context to make the 56 * translation clear to translators. GNU gettext, rather than modifying the 57 * message format, concatenates the context, \004 and the message id. 58 */ 59 #define MSGCTXT_ID_SEPARATOR '\004' 60 61 static const char *pgettext_impl(const char *, const char *, const char *, 62 const char *, unsigned long int, int); 63 static char *concatenate_ctxt_id(const char *, const char *); 64 static const char *lookup_category(int); 65 static const char *split_locale(const char *); 66 static const char *lookup_mofile(char *, size_t, const char *, const char *, 67 const char *, const char *, 68 struct domainbinding *); 69 static uint32_t flip(uint32_t, uint32_t); 70 static int validate(void *, struct mohandle *); 71 static int mapit(const char *, struct domainbinding *); 72 static int unmapit(struct domainbinding *); 73 static const char *lookup_hash(const char *, struct domainbinding *, size_t *); 74 static const char *lookup_bsearch(const char *, struct domainbinding *, 75 size_t *); 76 static const char *lookup(const char *, struct domainbinding *, size_t *); 77 static const char *get_lang_env(const char *); 78 79 /* 80 * shortcut functions. the main implementation resides in dcngettext(). 81 */ 82 char * 83 gettext(const char *msgid) 84 { 85 86 return dcngettext(NULL, msgid, NULL, 1UL, LC_MESSAGES); 87 } 88 89 char * 90 dgettext(const char *domainname, const char *msgid) 91 { 92 93 return dcngettext(domainname, msgid, NULL, 1UL, LC_MESSAGES); 94 } 95 96 char * 97 dcgettext(const char *domainname, const char *msgid, int category) 98 { 99 100 return dcngettext(domainname, msgid, NULL, 1UL, category); 101 } 102 103 char * 104 ngettext(const char *msgid1, const char *msgid2, unsigned long int n) 105 { 106 107 return dcngettext(NULL, msgid1, msgid2, n, LC_MESSAGES); 108 } 109 110 char * 111 dngettext(const char *domainname, const char *msgid1, const char *msgid2, 112 unsigned long int n) 113 { 114 115 return dcngettext(domainname, msgid1, msgid2, n, LC_MESSAGES); 116 } 117 118 const char * 119 pgettext(const char *msgctxt, const char *msgid) 120 { 121 122 return pgettext_impl(NULL, msgctxt, msgid, NULL, 1UL, LC_MESSAGES); 123 } 124 125 const char * 126 dpgettext(const char *domainname, const char *msgctxt, const char *msgid) 127 { 128 129 return pgettext_impl(domainname, msgctxt, msgid, NULL, 1UL, LC_MESSAGES); 130 } 131 132 const char * 133 dcpgettext(const char *domainname, const char *msgctxt, const char *msgid, 134 int category) 135 { 136 137 return pgettext_impl(domainname, msgctxt, msgid, NULL, 1UL, category); 138 } 139 140 const char * 141 npgettext(const char *msgctxt, const char *msgid1, const char *msgid2, 142 unsigned long int n) 143 { 144 145 return pgettext_impl(NULL, msgctxt, msgid1, msgid2, n, LC_MESSAGES); 146 } 147 148 const char * 149 dnpgettext(const char *domainname, const char *msgctxt, const char *msgid1, 150 const char *msgid2, unsigned long int n) 151 { 152 153 return pgettext_impl(domainname, msgctxt, msgid1, msgid2, n, LC_MESSAGES); 154 } 155 156 const char * 157 dcnpgettext(const char *domainname, const char *msgctxt, const char *msgid1, 158 const char *msgid2, unsigned long int n, int category) 159 { 160 161 return pgettext_impl(domainname, msgctxt, msgid1, msgid2, n, category); 162 } 163 164 static const char * 165 pgettext_impl(const char *domainname, const char *msgctxt, const char *msgid1, 166 const char *msgid2, unsigned long int n, int category) 167 { 168 char *msgctxt_id; 169 char *translation; 170 char *p; 171 172 if ((msgctxt_id = concatenate_ctxt_id(msgctxt, msgid1)) == NULL) 173 return msgid1; 174 175 translation = dcngettext(domainname, msgctxt_id, 176 msgid2, n, category); 177 free(msgctxt_id); 178 179 if (translation == msgctxt_id) 180 return msgid1; 181 182 p = strchr(translation, '\004'); 183 if (p) 184 return p + 1; 185 return translation; 186 } 187 188 /* 189 * dcngettext() - 190 * lookup internationalized message on database locale/category/domainname 191 * (like ja_JP.eucJP/LC_MESSAGES/domainname). 192 * if n equals to 1, internationalized message will be looked up for msgid1. 193 * otherwise, message will be looked up for msgid2. 194 * if the lookup fails, the function will return msgid1 or msgid2 as is. 195 * 196 * Even though the return type is "char *", caller should not rewrite the 197 * region pointed to by the return value (should be "const char *", but can't 198 * change it for compatibility with other implementations). 199 * 200 * by default (if domainname == NULL), domainname is taken from the value set 201 * by textdomain(). usually name of the application (like "ls") is used as 202 * domainname. category is usually LC_MESSAGES. 203 * 204 * the code reads in *.mo files generated by GNU gettext. *.mo is a host- 205 * endian encoded file. both endians are supported here, as the files are in 206 * /usr/share/locale! (or we should move those files into /usr/libdata) 207 */ 208 209 static char * 210 concatenate_ctxt_id(const char *msgctxt, const char *msgid) 211 { 212 char *ret; 213 214 if (asprintf(&ret, "%s%c%s", msgctxt, MSGCTXT_ID_SEPARATOR, msgid) == -1) 215 return NULL; 216 217 return ret; 218 } 219 220 static const char * 221 lookup_category(int category) 222 { 223 224 switch (category) { 225 case LC_COLLATE: return "LC_COLLATE"; 226 case LC_CTYPE: return "LC_CTYPE"; 227 case LC_MONETARY: return "LC_MONETARY"; 228 case LC_NUMERIC: return "LC_NUMERIC"; 229 case LC_TIME: return "LC_TIME"; 230 case LC_MESSAGES: return "LC_MESSAGES"; 231 } 232 return NULL; 233 } 234 235 #define MAXBUFLEN 1024 236 /* 237 * XPG syntax: language[_territory[.codeset]][@modifier] 238 * XXX boundary check on "result" is lacking 239 */ 240 static const char * 241 split_locale(const char *lname) 242 { 243 char buf[MAXBUFLEN], tmp[2 * MAXBUFLEN]; 244 char *l, *t, *c, *m; 245 static char result[4 * MAXBUFLEN]; 246 247 memset(result, 0, sizeof(result)); 248 249 if (strlen(lname) + 1 > sizeof(buf)) { 250 fail: 251 return lname; 252 } 253 254 strlcpy(buf, lname, sizeof(buf)); 255 m = strrchr(buf, '@'); 256 if (m) 257 *m++ = '\0'; 258 c = strrchr(buf, '.'); 259 if (c) 260 *c++ = '\0'; 261 t = strrchr(buf, '_'); 262 if (t) 263 *t++ = '\0'; 264 l = buf; 265 if (strlen(l) == 0) 266 goto fail; 267 if (c && !t) 268 goto fail; 269 270 if (m) { 271 if (t) { 272 if (c) { 273 snprintf(tmp, sizeof(tmp), "%s_%s.%s@%s", 274 l, t, c, m); 275 strlcat(result, tmp, sizeof(result)); 276 strlcat(result, ":", sizeof(result)); 277 } 278 snprintf(tmp, sizeof(tmp), "%s_%s@%s", l, t, m); 279 strlcat(result, tmp, sizeof(result)); 280 strlcat(result, ":", sizeof(result)); 281 } 282 snprintf(tmp, sizeof(tmp), "%s@%s", l, m); 283 strlcat(result, tmp, sizeof(result)); 284 strlcat(result, ":", sizeof(result)); 285 } 286 if (t) { 287 if (c) { 288 snprintf(tmp, sizeof(tmp), "%s_%s.%s", l, t, c); 289 strlcat(result, tmp, sizeof(result)); 290 strlcat(result, ":", sizeof(result)); 291 } 292 snprintf(tmp, sizeof(tmp), "%s_%s", l, t); 293 strlcat(result, tmp, sizeof(result)); 294 strlcat(result, ":", sizeof(result)); 295 } 296 strlcat(result, l, sizeof(result)); 297 298 return result; 299 } 300 301 static const char * 302 lookup_mofile(char *buf, size_t len, const char *dir, const char *lpath, 303 const char *category, const char *domainname, 304 struct domainbinding *db) 305 { 306 struct stat st; 307 char *p, *q; 308 char lpath_tmp[BUFSIZ]; 309 310 /* 311 * LANGUAGE is a colon separated list of locale names. 312 */ 313 314 strlcpy(lpath_tmp, lpath, sizeof(lpath_tmp)); 315 q = lpath_tmp; 316 /* CONSTCOND */ 317 while (1) { 318 p = strsep(&q, ":"); 319 if (!p) 320 break; 321 if (!*p) 322 continue; 323 324 /* don't mess with default locales */ 325 if (strcmp(p, "C") == 0 || strcmp(p, "POSIX") == 0) 326 return NULL; 327 328 /* validate pathname */ 329 if (strchr(p, '/') || strchr(category, '/')) 330 continue; 331 #if 1 /*?*/ 332 if (strchr(domainname, '/')) 333 continue; 334 #endif 335 336 int rv = snprintf(buf, len, "%s/%s/%s/%s.mo", dir, p, 337 category, domainname); 338 if (rv > (int)len) 339 return NULL; 340 if (stat(buf, &st) < 0) 341 continue; 342 if ((st.st_mode & S_IFMT) != S_IFREG) 343 continue; 344 345 if (mapit(buf, db) == 0) 346 return buf; 347 } 348 349 return NULL; 350 } 351 352 static uint32_t 353 flip(uint32_t v, uint32_t magic) 354 { 355 356 if (magic == MO_MAGIC) 357 return v; 358 else if (magic == MO_MAGIC_SWAPPED) { 359 v = ((v >> 24) & 0xff) | ((v >> 8) & 0xff00) | 360 ((v << 8) & 0xff0000) | ((v << 24) & 0xff000000); 361 return v; 362 } else { 363 abort(); 364 /*NOTREACHED*/ 365 } 366 } 367 368 static int 369 validate(void *arg, struct mohandle *mohandle) 370 { 371 char *p; 372 373 p = (char *)arg; 374 if (p < (char *)mohandle->addr || 375 p > (char *)mohandle->addr + mohandle->len) 376 return 0; 377 else 378 return 1; 379 } 380 381 /* 382 * calculate the step value if the hash value is conflicted. 383 */ 384 static __inline uint32_t 385 calc_collision_step(uint32_t hashval, uint32_t hashsize) 386 { 387 _DIAGASSERT(hashsize>2); 388 return (hashval % (hashsize - 2)) + 1; 389 } 390 391 /* 392 * calculate the next index while conflicting. 393 */ 394 static __inline uint32_t 395 calc_next_index(uint32_t curidx, uint32_t hashsize, uint32_t step) 396 { 397 return curidx+step - (curidx >= hashsize-step ? hashsize : 0); 398 } 399 400 static int 401 get_sysdep_string_table(struct mosysdepstr_h **table_h, uint32_t *ofstable, 402 uint32_t nstrings, uint32_t magic, char *base) 403 { 404 unsigned int i; 405 int j, count; 406 size_t l; 407 struct mosysdepstr *table; 408 409 for (i=0; i<nstrings; i++) { 410 /* get mosysdepstr record */ 411 /* LINTED: ignore the alignment problem. */ 412 table = (struct mosysdepstr *)(base + flip(ofstable[i], magic)); 413 /* count number of segments */ 414 count = 0; 415 while (flip(table->segs[count++].ref, magic) != MO_LASTSEG) 416 ; 417 /* get table */ 418 l = sizeof(struct mosysdepstr_h) + 419 sizeof(struct mosysdepsegentry_h) * (count-1); 420 table_h[i] = (struct mosysdepstr_h *)malloc(l); 421 if (!table_h[i]) 422 return -1; 423 memset(table_h[i], 0, l); 424 table_h[i]->off = (const char *)(base + flip(table->off, magic)); 425 for (j=0; j<count; j++) { 426 table_h[i]->segs[j].len = 427 flip(table->segs[j].len, magic); 428 table_h[i]->segs[j].ref = 429 flip(table->segs[j].ref, magic); 430 } 431 /* LINTED: ignore the alignment problem. */ 432 table = (struct mosysdepstr *)&table->segs[count]; 433 } 434 return 0; 435 } 436 437 static int 438 expand_sysdep(struct mohandle *mohandle, struct mosysdepstr_h *str) 439 { 440 int i; 441 const char *src; 442 char *dst; 443 444 /* check whether already expanded */ 445 if (str->expanded) 446 return 0; 447 448 /* calc total length */ 449 str->expanded_len = 1; 450 for (i=0; /*CONSTCOND*/1; i++) { 451 str->expanded_len += str->segs[i].len; 452 if (str->segs[i].ref == MO_LASTSEG) 453 break; 454 str->expanded_len += 455 mohandle->mo.mo_sysdep_segs[str->segs[i].ref].len; 456 } 457 /* expand */ 458 str->expanded = malloc(str->expanded_len); 459 if (!str->expanded) 460 return -1; 461 src = str->off; 462 dst = str->expanded; 463 for (i=0; /*CONSTCOND*/1; i++) { 464 memcpy(dst, src, str->segs[i].len); 465 src += str->segs[i].len; 466 dst += str->segs[i].len; 467 if (str->segs[i].ref == MO_LASTSEG) 468 break; 469 memcpy(dst, mohandle->mo.mo_sysdep_segs[str->segs[i].ref].str, 470 mohandle->mo.mo_sysdep_segs[str->segs[i].ref].len); 471 dst += mohandle->mo.mo_sysdep_segs[str->segs[i].ref].len; 472 } 473 *dst = '\0'; 474 475 return 0; 476 } 477 478 static void 479 insert_to_hash(uint32_t *htable, uint32_t hsize, const char *str, uint32_t ref) 480 { 481 uint32_t hashval, idx, step; 482 483 hashval = __intl_string_hash(str); 484 step = calc_collision_step(hashval, hsize); 485 idx = hashval % hsize; 486 487 while (htable[idx]) 488 idx = calc_next_index(idx, hsize, step); 489 490 htable[idx] = ref; 491 } 492 493 static int 494 setup_sysdep_stuffs(struct mo *mo, struct mohandle *mohandle, char *base) 495 { 496 uint32_t magic; 497 struct moentry *stable; 498 size_t l; 499 unsigned int i; 500 char *v; 501 uint32_t *ofstable; 502 503 magic = mo->mo_magic; 504 505 mohandle->mo.mo_sysdep_nsegs = flip(mo->mo_sysdep_nsegs, magic); 506 mohandle->mo.mo_sysdep_nstring = flip(mo->mo_sysdep_nstring, magic); 507 508 if (mohandle->mo.mo_sysdep_nstring == 0) 509 return 0; 510 511 /* check hash size */ 512 if (mohandle->mo.mo_hsize <= 2 || 513 mohandle->mo.mo_hsize < 514 (mohandle->mo.mo_nstring + mohandle->mo.mo_sysdep_nstring)) 515 return -1; 516 517 /* get sysdep segments */ 518 l = sizeof(struct mosysdepsegs_h) * mohandle->mo.mo_sysdep_nsegs; 519 mohandle->mo.mo_sysdep_segs = (struct mosysdepsegs_h *)malloc(l); 520 if (!mohandle->mo.mo_sysdep_segs) 521 return -1; 522 /* LINTED: ignore the alignment problem. */ 523 stable = (struct moentry *)(base + flip(mo->mo_sysdep_segoff, magic)); 524 for (i=0; i<mohandle->mo.mo_sysdep_nsegs; i++) { 525 v = base + flip(stable[i].off, magic); 526 mohandle->mo.mo_sysdep_segs[i].str = 527 __intl_sysdep_get_string_by_tag( 528 v, 529 &mohandle->mo.mo_sysdep_segs[i].len); 530 } 531 532 /* get sysdep string table */ 533 mohandle->mo.mo_sysdep_otable = 534 (struct mosysdepstr_h **)calloc(mohandle->mo.mo_sysdep_nstring, 535 sizeof(struct mosysdepstr_h *)); 536 if (!mohandle->mo.mo_sysdep_otable) 537 return -1; 538 /* LINTED: ignore the alignment problem. */ 539 ofstable = (uint32_t *)(base + flip(mo->mo_sysdep_otable, magic)); 540 if (get_sysdep_string_table(mohandle->mo.mo_sysdep_otable, ofstable, 541 mohandle->mo.mo_sysdep_nstring, magic, 542 base)) 543 return -1; 544 mohandle->mo.mo_sysdep_ttable = 545 (struct mosysdepstr_h **)calloc(mohandle->mo.mo_sysdep_nstring, 546 sizeof(struct mosysdepstr_h *)); 547 if (!mohandle->mo.mo_sysdep_ttable) 548 return -1; 549 /* LINTED: ignore the alignment problem. */ 550 ofstable = (uint32_t *)(base + flip(mo->mo_sysdep_ttable, magic)); 551 if (get_sysdep_string_table(mohandle->mo.mo_sysdep_ttable, ofstable, 552 mohandle->mo.mo_sysdep_nstring, magic, 553 base)) 554 return -1; 555 556 /* update hash */ 557 for (i=0; i<mohandle->mo.mo_sysdep_nstring; i++) { 558 if (expand_sysdep(mohandle, mohandle->mo.mo_sysdep_otable[i])) 559 return -1; 560 insert_to_hash(mohandle->mo.mo_htable, 561 mohandle->mo.mo_hsize, 562 mohandle->mo.mo_sysdep_otable[i]->expanded, 563 (i+1) | MO_HASH_SYSDEP_MASK); 564 } 565 566 return 0; 567 } 568 569 int 570 mapit(const char *path, struct domainbinding *db) 571 { 572 int fd; 573 struct stat st; 574 char *base; 575 uint32_t magic, revision, flags = 0; 576 struct moentry *otable, *ttable; 577 const uint32_t *htable; 578 struct moentry_h *p; 579 struct mo *mo; 580 size_t l, headerlen; 581 unsigned int i; 582 char *v; 583 struct mohandle *mohandle = &db->mohandle; 584 585 if (mohandle->addr && mohandle->addr != MAP_FAILED && 586 mohandle->mo.mo_magic) 587 return 0; /*already opened*/ 588 589 unmapit(db); 590 591 #if 0 592 if (secure_path(path) != 0) 593 goto fail; 594 #endif 595 if (stat(path, &st) < 0) 596 goto fail; 597 if ((st.st_mode & S_IFMT) != S_IFREG || st.st_size > GETTEXT_MMAP_MAX) 598 goto fail; 599 fd = open(path, O_RDONLY); 600 if (fd < 0) 601 goto fail; 602 if (read(fd, &magic, sizeof(magic)) != sizeof(magic) || 603 (magic != MO_MAGIC && magic != MO_MAGIC_SWAPPED)) { 604 close(fd); 605 goto fail; 606 } 607 if (read(fd, &revision, sizeof(revision)) != sizeof(revision)) { 608 close(fd); 609 goto fail; 610 } 611 switch (flip(revision, magic)) { 612 case MO_MAKE_REV(0, 0): 613 break; 614 case MO_MAKE_REV(0, 1): 615 case MO_MAKE_REV(1, 1): 616 flags |= MO_F_SYSDEP; 617 break; 618 default: 619 close(fd); 620 goto fail; 621 } 622 mohandle->addr = mmap(NULL, (size_t)st.st_size, PROT_READ, 623 MAP_FILE | MAP_SHARED, fd, (off_t)0); 624 if (!mohandle->addr || mohandle->addr == MAP_FAILED) { 625 close(fd); 626 goto fail; 627 } 628 close(fd); 629 mohandle->len = (size_t)st.st_size; 630 631 base = mohandle->addr; 632 mo = (struct mo *)mohandle->addr; 633 634 /* flip endian. do not flip magic number! */ 635 mohandle->mo.mo_magic = mo->mo_magic; 636 mohandle->mo.mo_revision = flip(mo->mo_revision, magic); 637 mohandle->mo.mo_nstring = flip(mo->mo_nstring, magic); 638 mohandle->mo.mo_hsize = flip(mo->mo_hsize, magic); 639 mohandle->mo.mo_flags = flags; 640 641 /* validate otable/ttable */ 642 /* LINTED: ignore the alignment problem. */ 643 otable = (struct moentry *)(base + flip(mo->mo_otable, magic)); 644 /* LINTED: ignore the alignment problem. */ 645 ttable = (struct moentry *)(base + flip(mo->mo_ttable, magic)); 646 if (!validate(otable, mohandle) || 647 !validate(&otable[mohandle->mo.mo_nstring], mohandle)) { 648 unmapit(db); 649 goto fail; 650 } 651 if (!validate(ttable, mohandle) || 652 !validate(&ttable[mohandle->mo.mo_nstring], mohandle)) { 653 unmapit(db); 654 goto fail; 655 } 656 657 /* allocate [ot]table, and convert to normal pointer representation. */ 658 l = sizeof(struct moentry_h) * mohandle->mo.mo_nstring; 659 mohandle->mo.mo_otable = (struct moentry_h *)malloc(l); 660 if (!mohandle->mo.mo_otable) { 661 unmapit(db); 662 goto fail; 663 } 664 mohandle->mo.mo_ttable = (struct moentry_h *)malloc(l); 665 if (!mohandle->mo.mo_ttable) { 666 unmapit(db); 667 goto fail; 668 } 669 p = mohandle->mo.mo_otable; 670 for (i = 0; i < mohandle->mo.mo_nstring; i++) { 671 p[i].len = flip(otable[i].len, magic); 672 p[i].off = base + flip(otable[i].off, magic); 673 674 if (!validate(p[i].off, mohandle) || 675 !validate(p[i].off + p[i].len + 1, mohandle)) { 676 unmapit(db); 677 goto fail; 678 } 679 } 680 p = mohandle->mo.mo_ttable; 681 for (i = 0; i < mohandle->mo.mo_nstring; i++) { 682 p[i].len = flip(ttable[i].len, magic); 683 p[i].off = base + flip(ttable[i].off, magic); 684 685 if (!validate(p[i].off, mohandle) || 686 !validate(p[i].off + p[i].len + 1, mohandle)) { 687 unmapit(db); 688 goto fail; 689 } 690 } 691 /* allocate htable, and convert it to the host order. */ 692 if (mohandle->mo.mo_hsize > 2) { 693 l = sizeof(uint32_t) * mohandle->mo.mo_hsize; 694 mohandle->mo.mo_htable = (uint32_t *)malloc(l); 695 if (!mohandle->mo.mo_htable) { 696 unmapit(db); 697 goto fail; 698 } 699 /* LINTED: ignore the alignment problem. */ 700 htable = (const uint32_t *)(base+flip(mo->mo_hoffset, magic)); 701 for (i=0; i < mohandle->mo.mo_hsize; i++) { 702 mohandle->mo.mo_htable[i] = flip(htable[i], magic); 703 if (mohandle->mo.mo_htable[i] >= 704 mohandle->mo.mo_nstring+1) { 705 /* illegal string number. */ 706 unmapit(db); 707 goto fail; 708 } 709 } 710 } 711 /* grab MIME-header and charset field */ 712 mohandle->mo.mo_header = lookup("", db, &headerlen); 713 if (mohandle->mo.mo_header) 714 v = strstr(mohandle->mo.mo_header, "charset="); 715 else 716 v = NULL; 717 if (v) { 718 mohandle->mo.mo_charset = strdup(v + 8); 719 if (!mohandle->mo.mo_charset) 720 goto fail; 721 v = strchr(mohandle->mo.mo_charset, '\n'); 722 if (v) 723 *v = '\0'; 724 } 725 if (!mohandle->mo.mo_header || 726 _gettext_parse_plural(&mohandle->mo.mo_plural, 727 &mohandle->mo.mo_nplurals, 728 mohandle->mo.mo_header, headerlen)) 729 mohandle->mo.mo_plural = NULL; 730 731 /* 732 * XXX check charset, reject it if we are unable to support the charset 733 * with the current locale. 734 * for example, if we are using euc-jp locale and we are looking at 735 * *.mo file encoded by euc-kr (charset=euc-kr), we should reject 736 * the *.mo file as we cannot support it. 737 */ 738 739 /* system dependent string support */ 740 if ((mohandle->mo.mo_flags & MO_F_SYSDEP) != 0) { 741 if (setup_sysdep_stuffs(mo, mohandle, base)) { 742 unmapit(db); 743 goto fail; 744 } 745 } 746 747 return 0; 748 749 fail: 750 return -1; 751 } 752 753 static void 754 free_sysdep_table(struct mosysdepstr_h **table, uint32_t nstring) 755 { 756 757 if (! table) 758 return; 759 760 for (uint32_t i = 0; i < nstring; i++) { 761 if (table[i]) { 762 free(table[i]->expanded); 763 free(table[i]); 764 } 765 } 766 free(table); 767 } 768 769 static int 770 unmapit(struct domainbinding *db) 771 { 772 struct mohandle *mohandle = &db->mohandle; 773 774 /* unmap if there's already mapped region */ 775 if (mohandle->addr && mohandle->addr != MAP_FAILED) 776 munmap(mohandle->addr, mohandle->len); 777 mohandle->addr = NULL; 778 free(mohandle->mo.mo_otable); 779 free(mohandle->mo.mo_ttable); 780 free(mohandle->mo.mo_charset); 781 free(mohandle->mo.mo_htable); 782 free(mohandle->mo.mo_sysdep_segs); 783 free_sysdep_table(mohandle->mo.mo_sysdep_otable, 784 mohandle->mo.mo_sysdep_nstring); 785 free_sysdep_table(mohandle->mo.mo_sysdep_ttable, 786 mohandle->mo.mo_sysdep_nstring); 787 _gettext_free_plural(mohandle->mo.mo_plural); 788 memset(&mohandle->mo, 0, sizeof(mohandle->mo)); 789 return 0; 790 } 791 792 /* ARGSUSED */ 793 static const char * 794 lookup_hash(const char *msgid, struct domainbinding *db, size_t *rlen) 795 { 796 struct mohandle *mohandle = &db->mohandle; 797 uint32_t idx, hashval, step, strno; 798 size_t len; 799 struct mosysdepstr_h *sysdep_otable, *sysdep_ttable; 800 801 if (mohandle->mo.mo_hsize <= 2 || mohandle->mo.mo_htable == NULL) 802 return NULL; 803 804 hashval = __intl_string_hash(msgid); 805 step = calc_collision_step(hashval, mohandle->mo.mo_hsize); 806 idx = hashval % mohandle->mo.mo_hsize; 807 len = strlen(msgid); 808 while (/*CONSTCOND*/1) { 809 strno = mohandle->mo.mo_htable[idx]; 810 if (strno == 0) { 811 /* unexpected miss */ 812 return NULL; 813 } 814 strno--; 815 if ((strno & MO_HASH_SYSDEP_MASK) == 0) { 816 /* system independent strings */ 817 if (len <= mohandle->mo.mo_otable[strno].len && 818 !strcmp(msgid, mohandle->mo.mo_otable[strno].off)) { 819 /* hit */ 820 if (rlen) 821 *rlen = 822 mohandle->mo.mo_ttable[strno].len; 823 return mohandle->mo.mo_ttable[strno].off; 824 } 825 } else { 826 /* system dependent strings */ 827 strno &= ~MO_HASH_SYSDEP_MASK; 828 sysdep_otable = mohandle->mo.mo_sysdep_otable[strno]; 829 sysdep_ttable = mohandle->mo.mo_sysdep_ttable[strno]; 830 if (len <= sysdep_otable->expanded_len && 831 !strcmp(msgid, sysdep_otable->expanded)) { 832 /* hit */ 833 if (expand_sysdep(mohandle, sysdep_ttable)) 834 /* memory exhausted */ 835 return NULL; 836 if (rlen) 837 *rlen = sysdep_ttable->expanded_len; 838 return sysdep_ttable->expanded; 839 } 840 } 841 idx = calc_next_index(idx, mohandle->mo.mo_hsize, step); 842 } 843 /*NOTREACHED*/ 844 } 845 846 static const char * 847 lookup_bsearch(const char *msgid, struct domainbinding *db, size_t *rlen) 848 { 849 int top, bottom, middle, omiddle; 850 int n; 851 struct mohandle *mohandle = &db->mohandle; 852 853 top = 0; 854 bottom = mohandle->mo.mo_nstring; 855 omiddle = -1; 856 /* CONSTCOND */ 857 while (1) { 858 if (top > bottom) 859 break; 860 middle = (top + bottom) / 2; 861 /* avoid possible infinite loop, when the data is not sorted */ 862 if (omiddle == middle) 863 break; 864 if ((size_t)middle >= mohandle->mo.mo_nstring) 865 break; 866 867 n = strcmp(msgid, mohandle->mo.mo_otable[middle].off); 868 if (n == 0) { 869 if (rlen) 870 *rlen = mohandle->mo.mo_ttable[middle].len; 871 return (const char *)mohandle->mo.mo_ttable[middle].off; 872 } 873 else if (n < 0) 874 bottom = middle; 875 else 876 top = middle; 877 omiddle = middle; 878 } 879 880 return NULL; 881 } 882 883 static const char * 884 lookup(const char *msgid, struct domainbinding *db, size_t *rlen) 885 { 886 const char *v; 887 888 v = lookup_hash(msgid, db, rlen); 889 if (v) 890 return v; 891 892 return lookup_bsearch(msgid, db, rlen); 893 } 894 895 static const char * 896 get_lang_env(const char *category_name) 897 { 898 const char *lang; 899 900 /* 901 * 1. see LANGUAGE variable first. 902 * 903 * LANGUAGE is a GNU extension. 904 * It's a colon separated list of locale names. 905 */ 906 lang = getenv("LANGUAGE"); 907 if (lang) 908 return lang; 909 910 /* 911 * 2. if LANGUAGE isn't set, see LC_ALL, LC_xxx, LANG. 912 * 913 * It's essentially setlocale(LC_xxx, NULL). 914 */ 915 lang = getenv("LC_ALL"); 916 if (!lang) 917 lang = getenv(category_name); 918 if (!lang) 919 lang = getenv("LANG"); 920 921 if (!lang) 922 return 0; /* error */ 923 924 return split_locale(lang); 925 } 926 927 static const char * 928 get_indexed_string(const char *str, size_t len, unsigned long idx) 929 { 930 while (idx > 0) { 931 if (len <= 1) 932 return str; 933 if (*str == '\0') 934 idx--; 935 if (len > 0) { 936 str++; 937 len--; 938 } 939 } 940 return str; 941 } 942 943 #define _NGETTEXT_DEFAULT(msgid1, msgid2, n) \ 944 ((char *)__UNCONST((n) == 1 ? (msgid1) : (msgid2))) 945 946 char * 947 dcngettext(const char *domainname, const char *msgid1, const char *msgid2, 948 unsigned long int n, int category) 949 { 950 const char *msgid; 951 char path[PATH_MAX+1]; 952 const char *lpath; 953 static char olpath[PATH_MAX]; 954 const char *cname = NULL; 955 const char *v; 956 static char *ocname = NULL; 957 static char *odomainname = NULL; 958 struct domainbinding *db; 959 unsigned long plural_index = 0; 960 size_t len; 961 962 if (!domainname) 963 domainname = __current_domainname; 964 cname = lookup_category(category); 965 if (!domainname || !cname) 966 goto fail; 967 968 lpath = get_lang_env(cname); 969 if (!lpath) 970 goto fail; 971 972 for (db = __bindings; db; db = db->next) 973 if (strcmp(db->domainname, domainname) == 0) 974 break; 975 if (!db) { 976 if (!bindtextdomain(domainname, _PATH_TEXTDOMAIN)) 977 goto fail; 978 db = __bindings; 979 } 980 981 /* resolve relative path */ 982 /* XXX not necessary? */ 983 if (db->path[0] != '/') { 984 char buf[PATH_MAX]; 985 986 if (getcwd(buf, sizeof(buf)) == 0) 987 goto fail; 988 if (strlcat(buf, "/", sizeof(buf)) >= sizeof(buf)) 989 goto fail; 990 if (strlcat(buf, db->path, sizeof(buf)) >= sizeof(buf)) 991 goto fail; 992 strlcpy(db->path, buf, sizeof(db->path)); 993 } 994 995 /* don't bother looking it up if the values are the same */ 996 if (odomainname && strcmp(domainname, odomainname) == 0 && 997 ocname && strcmp(cname, ocname) == 0 && strcmp(lpath, olpath) == 0 && 998 db->mohandle.mo.mo_magic) 999 goto found; 1000 1001 /* try to find appropriate file, from $LANGUAGE */ 1002 if (lookup_mofile(path, sizeof(path), db->path, lpath, cname, 1003 domainname, db) == NULL) 1004 goto fail; 1005 1006 free(odomainname); 1007 free(ocname); 1008 1009 odomainname = strdup(domainname); 1010 ocname = strdup(cname); 1011 if (!odomainname || !ocname) { 1012 free(odomainname); 1013 free(ocname); 1014 1015 odomainname = ocname = NULL; 1016 } 1017 else 1018 strlcpy(olpath, lpath, sizeof(olpath)); 1019 1020 found: 1021 if (db->mohandle.mo.mo_plural) { 1022 plural_index = 1023 _gettext_calculate_plural(db->mohandle.mo.mo_plural, n); 1024 if (plural_index >= db->mohandle.mo.mo_nplurals) 1025 plural_index = 0; 1026 msgid = msgid1; 1027 } else 1028 msgid = _NGETTEXT_DEFAULT(msgid1, msgid2, n); 1029 1030 if (msgid == NULL) 1031 return NULL; 1032 1033 v = lookup(msgid, db, &len); 1034 if (v) { 1035 if (db->mohandle.mo.mo_plural) 1036 v = get_indexed_string(v, len, plural_index); 1037 /* 1038 * convert the translated message's encoding. 1039 * 1040 * special case: 1041 * a result of gettext("") shouldn't need any conversion. 1042 */ 1043 if (msgid[0]) 1044 v = __gettext_iconv(v, db); 1045 1046 /* 1047 * Given the amount of printf-format security issues, it may 1048 * be a good idea to validate if the original msgid and the 1049 * translated message format string carry the same printf-like 1050 * format identifiers. 1051 */ 1052 1053 msgid = v; 1054 } 1055 1056 return (char *)__UNCONST(msgid); 1057 1058 fail: 1059 return _NGETTEXT_DEFAULT(msgid1, msgid2, n); 1060 } 1061