1 /* $NetBSD: gettext.c,v 1.33 2024/08/18 17:46:24 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2000, 2001 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $Citrus: xpg4dl/FreeBSD/lib/libintl/gettext.c,v 1.31 2001/09/27 15:18:45 yamt Exp $ 29 */ 30 31 #include <sys/cdefs.h> 32 __RCSID("$NetBSD: gettext.c,v 1.33 2024/08/18 17:46:24 christos Exp $"); 33 34 #include <sys/param.h> 35 #include <sys/stat.h> 36 #include <sys/mman.h> 37 #include <sys/uio.h> 38 39 #include <assert.h> 40 #include <fcntl.h> 41 #include <stdio.h> 42 #include <stdlib.h> 43 #include <unistd.h> 44 #include <string.h> 45 #if 0 46 #include <util.h> 47 #endif 48 #include <libintl.h> 49 #include <locale.h> 50 #include "libintl_local.h" 51 #include "plural_parser.h" 52 #include "pathnames.h" 53 54 /* GNU gettext added a hack to add some context to messages. If a message is 55 * used in multiple locations, it needs some amount of context to make the 56 * translation clear to translators. GNU gettext, rather than modifying the 57 * message format, concatenates the context, \004 and the message id. 58 */ 59 #define MSGCTXT_ID_SEPARATOR '\004' 60 61 static const char *pgettext_impl(const char *, const char *, const char *, 62 const char *, unsigned long int, int); 63 static char *concatenate_ctxt_id(const char *, const char *); 64 static const char *lookup_category(int); 65 static const char *split_locale(const char *); 66 static const char *lookup_mofile(char *, size_t, const char *, const char *, 67 const char *, const char *, 68 struct domainbinding *); 69 static uint32_t flip(uint32_t, uint32_t); 70 static int validate(void *, struct mohandle *); 71 static int mapit(const char *, struct domainbinding *); 72 static int unmapit(struct domainbinding *); 73 static const char *lookup_hash(const char *, struct domainbinding *, size_t *); 74 static const char *lookup_bsearch(const char *, struct domainbinding *, 75 size_t *); 76 static const char *lookup(const char *, struct domainbinding *, size_t *); 77 static const char *get_lang_env(const char *); 78 79 /* 80 * shortcut functions. the main implementation resides in dcngettext(). 81 */ 82 char * 83 gettext(const char *msgid) 84 { 85 86 return dcngettext(NULL, msgid, NULL, 1UL, LC_MESSAGES); 87 } 88 89 char * 90 dgettext(const char *domainname, const char *msgid) 91 { 92 93 return dcngettext(domainname, msgid, NULL, 1UL, LC_MESSAGES); 94 } 95 96 char * 97 dcgettext(const char *domainname, const char *msgid, int category) 98 { 99 100 return dcngettext(domainname, msgid, NULL, 1UL, category); 101 } 102 103 char * 104 ngettext(const char *msgid1, const char *msgid2, unsigned long int n) 105 { 106 107 return dcngettext(NULL, msgid1, msgid2, n, LC_MESSAGES); 108 } 109 110 char * 111 dngettext(const char *domainname, const char *msgid1, const char *msgid2, 112 unsigned long int n) 113 { 114 115 return dcngettext(domainname, msgid1, msgid2, n, LC_MESSAGES); 116 } 117 118 const char * 119 pgettext(const char *msgctxt, const char *msgid) 120 { 121 122 return pgettext_impl(NULL, msgctxt, msgid, NULL, 1UL, LC_MESSAGES); 123 } 124 125 const char * 126 dpgettext(const char *domainname, const char *msgctxt, const char *msgid) 127 { 128 129 return pgettext_impl(domainname, msgctxt, msgid, NULL, 1UL, LC_MESSAGES); 130 } 131 132 const char * 133 dcpgettext(const char *domainname, const char *msgctxt, const char *msgid, 134 int category) 135 { 136 137 return pgettext_impl(domainname, msgctxt, msgid, NULL, 1UL, category); 138 } 139 140 const char * 141 npgettext(const char *msgctxt, const char *msgid1, const char *msgid2, 142 unsigned long int n) 143 { 144 145 return pgettext_impl(NULL, msgctxt, msgid1, msgid2, n, LC_MESSAGES); 146 } 147 148 const char * 149 dnpgettext(const char *domainname, const char *msgctxt, const char *msgid1, 150 const char *msgid2, unsigned long int n) 151 { 152 153 return pgettext_impl(domainname, msgctxt, msgid1, msgid2, n, LC_MESSAGES); 154 } 155 156 const char * 157 dcnpgettext(const char *domainname, const char *msgctxt, const char *msgid1, 158 const char *msgid2, unsigned long int n, int category) 159 { 160 161 return pgettext_impl(domainname, msgctxt, msgid1, msgid2, n, category); 162 } 163 164 static const char * 165 pgettext_impl(const char *domainname, const char *msgctxt, const char *msgid1, 166 const char *msgid2, unsigned long int n, int category) 167 { 168 char *msgctxt_id; 169 char *translation; 170 char *p; 171 172 if ((msgctxt_id = concatenate_ctxt_id(msgctxt, msgid1)) == NULL) 173 return msgid1; 174 175 translation = dcngettext(domainname, msgctxt_id, 176 msgid2, n, category); 177 178 if (translation == msgctxt_id) { 179 free(msgctxt_id); 180 return msgid1; 181 } 182 183 free(msgctxt_id); 184 p = strchr(translation, '\004'); 185 if (p) 186 return p + 1; 187 return translation; 188 } 189 190 /* 191 * dcngettext() - 192 * lookup internationalized message on database locale/category/domainname 193 * (like ja_JP.eucJP/LC_MESSAGES/domainname). 194 * if n equals to 1, internationalized message will be looked up for msgid1. 195 * otherwise, message will be looked up for msgid2. 196 * if the lookup fails, the function will return msgid1 or msgid2 as is. 197 * 198 * Even though the return type is "char *", caller should not rewrite the 199 * region pointed to by the return value (should be "const char *", but can't 200 * change it for compatibility with other implementations). 201 * 202 * by default (if domainname == NULL), domainname is taken from the value set 203 * by textdomain(). usually name of the application (like "ls") is used as 204 * domainname. category is usually LC_MESSAGES. 205 * 206 * the code reads in *.mo files generated by GNU gettext. *.mo is a host- 207 * endian encoded file. both endians are supported here, as the files are in 208 * /usr/share/locale! (or we should move those files into /usr/libdata) 209 */ 210 211 static char * 212 concatenate_ctxt_id(const char *msgctxt, const char *msgid) 213 { 214 char *ret; 215 216 if (asprintf(&ret, "%s%c%s", msgctxt, MSGCTXT_ID_SEPARATOR, msgid) == -1) 217 return NULL; 218 219 return ret; 220 } 221 222 static const char * 223 lookup_category(int category) 224 { 225 226 switch (category) { 227 case LC_COLLATE: return "LC_COLLATE"; 228 case LC_CTYPE: return "LC_CTYPE"; 229 case LC_MONETARY: return "LC_MONETARY"; 230 case LC_NUMERIC: return "LC_NUMERIC"; 231 case LC_TIME: return "LC_TIME"; 232 case LC_MESSAGES: return "LC_MESSAGES"; 233 } 234 return NULL; 235 } 236 237 #define MAXBUFLEN 1024 238 /* 239 * XPG syntax: language[_territory[.codeset]][@modifier] 240 * XXX boundary check on "result" is lacking 241 */ 242 static const char * 243 split_locale(const char *lname) 244 { 245 char buf[MAXBUFLEN], tmp[2 * MAXBUFLEN]; 246 char *l, *t, *c, *m; 247 static char result[4 * MAXBUFLEN]; 248 249 memset(result, 0, sizeof(result)); 250 251 if (strlen(lname) + 1 > sizeof(buf)) { 252 fail: 253 return lname; 254 } 255 256 strlcpy(buf, lname, sizeof(buf)); 257 m = strrchr(buf, '@'); 258 if (m) 259 *m++ = '\0'; 260 c = strrchr(buf, '.'); 261 if (c) 262 *c++ = '\0'; 263 t = strrchr(buf, '_'); 264 if (t) 265 *t++ = '\0'; 266 l = buf; 267 if (strlen(l) == 0) 268 goto fail; 269 if (c && !t) 270 goto fail; 271 272 if (m) { 273 if (t) { 274 if (c) { 275 snprintf(tmp, sizeof(tmp), "%s_%s.%s@%s", 276 l, t, c, m); 277 strlcat(result, tmp, sizeof(result)); 278 strlcat(result, ":", sizeof(result)); 279 } 280 snprintf(tmp, sizeof(tmp), "%s_%s@%s", l, t, m); 281 strlcat(result, tmp, sizeof(result)); 282 strlcat(result, ":", sizeof(result)); 283 } 284 snprintf(tmp, sizeof(tmp), "%s@%s", l, m); 285 strlcat(result, tmp, sizeof(result)); 286 strlcat(result, ":", sizeof(result)); 287 } 288 if (t) { 289 if (c) { 290 snprintf(tmp, sizeof(tmp), "%s_%s.%s", l, t, c); 291 strlcat(result, tmp, sizeof(result)); 292 strlcat(result, ":", sizeof(result)); 293 } 294 snprintf(tmp, sizeof(tmp), "%s_%s", l, t); 295 strlcat(result, tmp, sizeof(result)); 296 strlcat(result, ":", sizeof(result)); 297 } 298 strlcat(result, l, sizeof(result)); 299 300 return result; 301 } 302 303 static const char * 304 lookup_mofile(char *buf, size_t len, const char *dir, const char *lpath, 305 const char *category, const char *domainname, 306 struct domainbinding *db) 307 { 308 struct stat st; 309 char *p, *q; 310 char lpath_tmp[BUFSIZ]; 311 312 /* 313 * LANGUAGE is a colon separated list of locale names. 314 */ 315 316 strlcpy(lpath_tmp, lpath, sizeof(lpath_tmp)); 317 q = lpath_tmp; 318 /* CONSTCOND */ 319 while (1) { 320 p = strsep(&q, ":"); 321 if (!p) 322 break; 323 if (!*p) 324 continue; 325 326 /* don't mess with default locales */ 327 if (strcmp(p, "C") == 0 || strcmp(p, "POSIX") == 0) 328 return NULL; 329 330 /* validate pathname */ 331 if (strchr(p, '/') || strchr(category, '/')) 332 continue; 333 #if 1 /*?*/ 334 if (strchr(domainname, '/')) 335 continue; 336 #endif 337 338 int rv = snprintf(buf, len, "%s/%s/%s/%s.mo", dir, p, 339 category, domainname); 340 if (rv > (int)len) 341 return NULL; 342 if (stat(buf, &st) < 0) 343 continue; 344 if ((st.st_mode & S_IFMT) != S_IFREG) 345 continue; 346 347 if (mapit(buf, db) == 0) 348 return buf; 349 } 350 351 return NULL; 352 } 353 354 static uint32_t 355 flip(uint32_t v, uint32_t magic) 356 { 357 358 if (magic == MO_MAGIC) 359 return v; 360 else if (magic == MO_MAGIC_SWAPPED) { 361 v = ((v >> 24) & 0xff) | ((v >> 8) & 0xff00) | 362 ((v << 8) & 0xff0000) | ((v << 24) & 0xff000000); 363 return v; 364 } else { 365 abort(); 366 /*NOTREACHED*/ 367 } 368 } 369 370 static int 371 validate(void *arg, struct mohandle *mohandle) 372 { 373 char *p; 374 375 p = (char *)arg; 376 if (p < (char *)mohandle->addr || 377 p > (char *)mohandle->addr + mohandle->len) 378 return 0; 379 else 380 return 1; 381 } 382 383 /* 384 * calculate the step value if the hash value is conflicted. 385 */ 386 static __inline uint32_t 387 calc_collision_step(uint32_t hashval, uint32_t hashsize) 388 { 389 _DIAGASSERT(hashsize>2); 390 return (hashval % (hashsize - 2)) + 1; 391 } 392 393 /* 394 * calculate the next index while conflicting. 395 */ 396 static __inline uint32_t 397 calc_next_index(uint32_t curidx, uint32_t hashsize, uint32_t step) 398 { 399 return curidx+step - (curidx >= hashsize-step ? hashsize : 0); 400 } 401 402 static int 403 get_sysdep_string_table(struct mosysdepstr_h **table_h, uint32_t *ofstable, 404 uint32_t nstrings, uint32_t magic, char *base) 405 { 406 unsigned int i; 407 int j, count; 408 size_t l; 409 struct mosysdepstr *table; 410 411 for (i=0; i<nstrings; i++) { 412 /* get mosysdepstr record */ 413 /* LINTED: ignore the alignment problem. */ 414 table = (struct mosysdepstr *)(base + flip(ofstable[i], magic)); 415 /* count number of segments */ 416 count = 0; 417 while (flip(table->segs[count++].ref, magic) != MO_LASTSEG) 418 ; 419 /* get table */ 420 l = sizeof(struct mosysdepstr_h) + 421 sizeof(struct mosysdepsegentry_h) * (count-1); 422 table_h[i] = (struct mosysdepstr_h *)malloc(l); 423 if (!table_h[i]) 424 return -1; 425 memset(table_h[i], 0, l); 426 table_h[i]->off = (const char *)(base + flip(table->off, magic)); 427 for (j=0; j<count; j++) { 428 table_h[i]->segs[j].len = 429 flip(table->segs[j].len, magic); 430 table_h[i]->segs[j].ref = 431 flip(table->segs[j].ref, magic); 432 } 433 /* LINTED: ignore the alignment problem. */ 434 table = (struct mosysdepstr *)&table->segs[count]; 435 } 436 return 0; 437 } 438 439 static int 440 expand_sysdep(struct mohandle *mohandle, struct mosysdepstr_h *str) 441 { 442 int i; 443 const char *src; 444 char *dst; 445 446 /* check whether already expanded */ 447 if (str->expanded) 448 return 0; 449 450 /* calc total length */ 451 str->expanded_len = 1; 452 for (i=0; /*CONSTCOND*/1; i++) { 453 str->expanded_len += str->segs[i].len; 454 if (str->segs[i].ref == MO_LASTSEG) 455 break; 456 str->expanded_len += 457 mohandle->mo.mo_sysdep_segs[str->segs[i].ref].len; 458 } 459 /* expand */ 460 str->expanded = malloc(str->expanded_len); 461 if (!str->expanded) 462 return -1; 463 src = str->off; 464 dst = str->expanded; 465 for (i=0; /*CONSTCOND*/1; i++) { 466 memcpy(dst, src, str->segs[i].len); 467 src += str->segs[i].len; 468 dst += str->segs[i].len; 469 if (str->segs[i].ref == MO_LASTSEG) 470 break; 471 memcpy(dst, mohandle->mo.mo_sysdep_segs[str->segs[i].ref].str, 472 mohandle->mo.mo_sysdep_segs[str->segs[i].ref].len); 473 dst += mohandle->mo.mo_sysdep_segs[str->segs[i].ref].len; 474 } 475 *dst = '\0'; 476 477 return 0; 478 } 479 480 static void 481 insert_to_hash(uint32_t *htable, uint32_t hsize, const char *str, uint32_t ref) 482 { 483 uint32_t hashval, idx, step; 484 485 hashval = __intl_string_hash(str); 486 step = calc_collision_step(hashval, hsize); 487 idx = hashval % hsize; 488 489 while (htable[idx]) 490 idx = calc_next_index(idx, hsize, step); 491 492 htable[idx] = ref; 493 } 494 495 static int 496 setup_sysdep_stuffs(struct mo *mo, struct mohandle *mohandle, char *base) 497 { 498 uint32_t magic; 499 struct moentry *stable; 500 size_t l; 501 unsigned int i; 502 char *v; 503 uint32_t *ofstable; 504 505 magic = mo->mo_magic; 506 507 mohandle->mo.mo_sysdep_nsegs = flip(mo->mo_sysdep_nsegs, magic); 508 mohandle->mo.mo_sysdep_nstring = flip(mo->mo_sysdep_nstring, magic); 509 510 if (mohandle->mo.mo_sysdep_nstring == 0) 511 return 0; 512 513 /* check hash size */ 514 if (mohandle->mo.mo_hsize <= 2 || 515 mohandle->mo.mo_hsize < 516 (mohandle->mo.mo_nstring + mohandle->mo.mo_sysdep_nstring)) 517 return -1; 518 519 /* get sysdep segments */ 520 l = sizeof(struct mosysdepsegs_h) * mohandle->mo.mo_sysdep_nsegs; 521 mohandle->mo.mo_sysdep_segs = (struct mosysdepsegs_h *)malloc(l); 522 if (!mohandle->mo.mo_sysdep_segs) 523 return -1; 524 /* LINTED: ignore the alignment problem. */ 525 stable = (struct moentry *)(base + flip(mo->mo_sysdep_segoff, magic)); 526 for (i=0; i<mohandle->mo.mo_sysdep_nsegs; i++) { 527 v = base + flip(stable[i].off, magic); 528 mohandle->mo.mo_sysdep_segs[i].str = 529 __intl_sysdep_get_string_by_tag( 530 v, 531 &mohandle->mo.mo_sysdep_segs[i].len); 532 } 533 534 /* get sysdep string table */ 535 mohandle->mo.mo_sysdep_otable = 536 (struct mosysdepstr_h **)calloc(mohandle->mo.mo_sysdep_nstring, 537 sizeof(struct mosysdepstr_h *)); 538 if (!mohandle->mo.mo_sysdep_otable) 539 return -1; 540 /* LINTED: ignore the alignment problem. */ 541 ofstable = (uint32_t *)(base + flip(mo->mo_sysdep_otable, magic)); 542 if (get_sysdep_string_table(mohandle->mo.mo_sysdep_otable, ofstable, 543 mohandle->mo.mo_sysdep_nstring, magic, 544 base)) 545 return -1; 546 mohandle->mo.mo_sysdep_ttable = 547 (struct mosysdepstr_h **)calloc(mohandle->mo.mo_sysdep_nstring, 548 sizeof(struct mosysdepstr_h *)); 549 if (!mohandle->mo.mo_sysdep_ttable) 550 return -1; 551 /* LINTED: ignore the alignment problem. */ 552 ofstable = (uint32_t *)(base + flip(mo->mo_sysdep_ttable, magic)); 553 if (get_sysdep_string_table(mohandle->mo.mo_sysdep_ttable, ofstable, 554 mohandle->mo.mo_sysdep_nstring, magic, 555 base)) 556 return -1; 557 558 /* update hash */ 559 for (i=0; i<mohandle->mo.mo_sysdep_nstring; i++) { 560 if (expand_sysdep(mohandle, mohandle->mo.mo_sysdep_otable[i])) 561 return -1; 562 insert_to_hash(mohandle->mo.mo_htable, 563 mohandle->mo.mo_hsize, 564 mohandle->mo.mo_sysdep_otable[i]->expanded, 565 (i+1) | MO_HASH_SYSDEP_MASK); 566 } 567 568 return 0; 569 } 570 571 int 572 mapit(const char *path, struct domainbinding *db) 573 { 574 int fd; 575 struct stat st; 576 char *base; 577 uint32_t magic, revision, flags = 0; 578 struct moentry *otable, *ttable; 579 const uint32_t *htable; 580 struct moentry_h *p; 581 struct mo *mo; 582 size_t l, headerlen; 583 unsigned int i; 584 char *v; 585 struct mohandle *mohandle = &db->mohandle; 586 587 if (mohandle->addr && mohandle->addr != MAP_FAILED && 588 mohandle->mo.mo_magic) 589 return 0; /*already opened*/ 590 591 unmapit(db); 592 593 #if 0 594 if (secure_path(path) != 0) 595 goto fail; 596 #endif 597 if (stat(path, &st) < 0) 598 goto fail; 599 if ((st.st_mode & S_IFMT) != S_IFREG || st.st_size > GETTEXT_MMAP_MAX) 600 goto fail; 601 fd = open(path, O_RDONLY); 602 if (fd < 0) 603 goto fail; 604 if (read(fd, &magic, sizeof(magic)) != sizeof(magic) || 605 (magic != MO_MAGIC && magic != MO_MAGIC_SWAPPED)) { 606 close(fd); 607 goto fail; 608 } 609 if (read(fd, &revision, sizeof(revision)) != sizeof(revision)) { 610 close(fd); 611 goto fail; 612 } 613 switch (flip(revision, magic)) { 614 case MO_MAKE_REV(0, 0): 615 break; 616 case MO_MAKE_REV(0, 1): 617 case MO_MAKE_REV(1, 1): 618 flags |= MO_F_SYSDEP; 619 break; 620 default: 621 close(fd); 622 goto fail; 623 } 624 mohandle->addr = mmap(NULL, (size_t)st.st_size, PROT_READ, 625 MAP_FILE | MAP_SHARED, fd, (off_t)0); 626 if (!mohandle->addr || mohandle->addr == MAP_FAILED) { 627 close(fd); 628 goto fail; 629 } 630 close(fd); 631 mohandle->len = (size_t)st.st_size; 632 633 base = mohandle->addr; 634 mo = (struct mo *)mohandle->addr; 635 636 /* flip endian. do not flip magic number! */ 637 mohandle->mo.mo_magic = mo->mo_magic; 638 mohandle->mo.mo_revision = flip(mo->mo_revision, magic); 639 mohandle->mo.mo_nstring = flip(mo->mo_nstring, magic); 640 mohandle->mo.mo_hsize = flip(mo->mo_hsize, magic); 641 mohandle->mo.mo_flags = flags; 642 643 /* validate otable/ttable */ 644 /* LINTED: ignore the alignment problem. */ 645 otable = (struct moentry *)(base + flip(mo->mo_otable, magic)); 646 /* LINTED: ignore the alignment problem. */ 647 ttable = (struct moentry *)(base + flip(mo->mo_ttable, magic)); 648 if (!validate(otable, mohandle) || 649 !validate(&otable[mohandle->mo.mo_nstring], mohandle)) { 650 unmapit(db); 651 goto fail; 652 } 653 if (!validate(ttable, mohandle) || 654 !validate(&ttable[mohandle->mo.mo_nstring], mohandle)) { 655 unmapit(db); 656 goto fail; 657 } 658 659 /* allocate [ot]table, and convert to normal pointer representation. */ 660 l = sizeof(struct moentry_h) * mohandle->mo.mo_nstring; 661 mohandle->mo.mo_otable = (struct moentry_h *)malloc(l); 662 if (!mohandle->mo.mo_otable) { 663 unmapit(db); 664 goto fail; 665 } 666 mohandle->mo.mo_ttable = (struct moentry_h *)malloc(l); 667 if (!mohandle->mo.mo_ttable) { 668 unmapit(db); 669 goto fail; 670 } 671 p = mohandle->mo.mo_otable; 672 for (i = 0; i < mohandle->mo.mo_nstring; i++) { 673 p[i].len = flip(otable[i].len, magic); 674 p[i].off = base + flip(otable[i].off, magic); 675 676 if (!validate(p[i].off, mohandle) || 677 !validate(p[i].off + p[i].len + 1, mohandle)) { 678 unmapit(db); 679 goto fail; 680 } 681 } 682 p = mohandle->mo.mo_ttable; 683 for (i = 0; i < mohandle->mo.mo_nstring; i++) { 684 p[i].len = flip(ttable[i].len, magic); 685 p[i].off = base + flip(ttable[i].off, magic); 686 687 if (!validate(p[i].off, mohandle) || 688 !validate(p[i].off + p[i].len + 1, mohandle)) { 689 unmapit(db); 690 goto fail; 691 } 692 } 693 /* allocate htable, and convert it to the host order. */ 694 if (mohandle->mo.mo_hsize > 2) { 695 l = sizeof(uint32_t) * mohandle->mo.mo_hsize; 696 mohandle->mo.mo_htable = (uint32_t *)malloc(l); 697 if (!mohandle->mo.mo_htable) { 698 unmapit(db); 699 goto fail; 700 } 701 /* LINTED: ignore the alignment problem. */ 702 htable = (const uint32_t *)(base+flip(mo->mo_hoffset, magic)); 703 for (i=0; i < mohandle->mo.mo_hsize; i++) { 704 mohandle->mo.mo_htable[i] = flip(htable[i], magic); 705 if (mohandle->mo.mo_htable[i] >= 706 mohandle->mo.mo_nstring+1) { 707 /* illegal string number. */ 708 unmapit(db); 709 goto fail; 710 } 711 } 712 } 713 /* grab MIME-header and charset field */ 714 mohandle->mo.mo_header = lookup("", db, &headerlen); 715 if (mohandle->mo.mo_header) 716 v = strstr(mohandle->mo.mo_header, "charset="); 717 else 718 v = NULL; 719 if (v) { 720 mohandle->mo.mo_charset = strdup(v + 8); 721 if (!mohandle->mo.mo_charset) 722 goto fail; 723 v = strchr(mohandle->mo.mo_charset, '\n'); 724 if (v) 725 *v = '\0'; 726 } 727 if (!mohandle->mo.mo_header || 728 _gettext_parse_plural(&mohandle->mo.mo_plural, 729 &mohandle->mo.mo_nplurals, 730 mohandle->mo.mo_header, headerlen)) 731 mohandle->mo.mo_plural = NULL; 732 733 /* 734 * XXX check charset, reject it if we are unable to support the charset 735 * with the current locale. 736 * for example, if we are using euc-jp locale and we are looking at 737 * *.mo file encoded by euc-kr (charset=euc-kr), we should reject 738 * the *.mo file as we cannot support it. 739 */ 740 741 /* system dependent string support */ 742 if ((mohandle->mo.mo_flags & MO_F_SYSDEP) != 0) { 743 if (setup_sysdep_stuffs(mo, mohandle, base)) { 744 unmapit(db); 745 goto fail; 746 } 747 } 748 749 return 0; 750 751 fail: 752 return -1; 753 } 754 755 static void 756 free_sysdep_table(struct mosysdepstr_h **table, uint32_t nstring) 757 { 758 759 if (! table) 760 return; 761 762 for (uint32_t i = 0; i < nstring; i++) { 763 if (table[i]) { 764 free(table[i]->expanded); 765 free(table[i]); 766 } 767 } 768 free(table); 769 } 770 771 static int 772 unmapit(struct domainbinding *db) 773 { 774 struct mohandle *mohandle = &db->mohandle; 775 776 /* unmap if there's already mapped region */ 777 if (mohandle->addr && mohandle->addr != MAP_FAILED) 778 munmap(mohandle->addr, mohandle->len); 779 mohandle->addr = NULL; 780 free(mohandle->mo.mo_otable); 781 free(mohandle->mo.mo_ttable); 782 free(mohandle->mo.mo_charset); 783 free(mohandle->mo.mo_htable); 784 free(mohandle->mo.mo_sysdep_segs); 785 free_sysdep_table(mohandle->mo.mo_sysdep_otable, 786 mohandle->mo.mo_sysdep_nstring); 787 free_sysdep_table(mohandle->mo.mo_sysdep_ttable, 788 mohandle->mo.mo_sysdep_nstring); 789 _gettext_free_plural(mohandle->mo.mo_plural); 790 memset(&mohandle->mo, 0, sizeof(mohandle->mo)); 791 return 0; 792 } 793 794 /* ARGSUSED */ 795 static const char * 796 lookup_hash(const char *msgid, struct domainbinding *db, size_t *rlen) 797 { 798 struct mohandle *mohandle = &db->mohandle; 799 uint32_t idx, hashval, step, strno; 800 size_t len; 801 struct mosysdepstr_h *sysdep_otable, *sysdep_ttable; 802 803 if (mohandle->mo.mo_hsize <= 2 || mohandle->mo.mo_htable == NULL) 804 return NULL; 805 806 hashval = __intl_string_hash(msgid); 807 step = calc_collision_step(hashval, mohandle->mo.mo_hsize); 808 idx = hashval % mohandle->mo.mo_hsize; 809 len = strlen(msgid); 810 while (/*CONSTCOND*/1) { 811 strno = mohandle->mo.mo_htable[idx]; 812 if (strno == 0) { 813 /* unexpected miss */ 814 return NULL; 815 } 816 strno--; 817 if ((strno & MO_HASH_SYSDEP_MASK) == 0) { 818 /* system independent strings */ 819 if (len <= mohandle->mo.mo_otable[strno].len && 820 !strcmp(msgid, mohandle->mo.mo_otable[strno].off)) { 821 /* hit */ 822 if (rlen) 823 *rlen = 824 mohandle->mo.mo_ttable[strno].len; 825 return mohandle->mo.mo_ttable[strno].off; 826 } 827 } else { 828 /* system dependent strings */ 829 strno &= ~MO_HASH_SYSDEP_MASK; 830 sysdep_otable = mohandle->mo.mo_sysdep_otable[strno]; 831 sysdep_ttable = mohandle->mo.mo_sysdep_ttable[strno]; 832 if (len <= sysdep_otable->expanded_len && 833 !strcmp(msgid, sysdep_otable->expanded)) { 834 /* hit */ 835 if (expand_sysdep(mohandle, sysdep_ttable)) 836 /* memory exhausted */ 837 return NULL; 838 if (rlen) 839 *rlen = sysdep_ttable->expanded_len; 840 return sysdep_ttable->expanded; 841 } 842 } 843 idx = calc_next_index(idx, mohandle->mo.mo_hsize, step); 844 } 845 /*NOTREACHED*/ 846 } 847 848 static const char * 849 lookup_bsearch(const char *msgid, struct domainbinding *db, size_t *rlen) 850 { 851 int top, bottom, middle, omiddle; 852 int n; 853 struct mohandle *mohandle = &db->mohandle; 854 855 top = 0; 856 bottom = mohandle->mo.mo_nstring; 857 omiddle = -1; 858 /* CONSTCOND */ 859 while (1) { 860 if (top > bottom) 861 break; 862 middle = (top + bottom) / 2; 863 /* avoid possible infinite loop, when the data is not sorted */ 864 if (omiddle == middle) 865 break; 866 if ((size_t)middle >= mohandle->mo.mo_nstring) 867 break; 868 869 n = strcmp(msgid, mohandle->mo.mo_otable[middle].off); 870 if (n == 0) { 871 if (rlen) 872 *rlen = mohandle->mo.mo_ttable[middle].len; 873 return (const char *)mohandle->mo.mo_ttable[middle].off; 874 } 875 else if (n < 0) 876 bottom = middle; 877 else 878 top = middle; 879 omiddle = middle; 880 } 881 882 return NULL; 883 } 884 885 static const char * 886 lookup(const char *msgid, struct domainbinding *db, size_t *rlen) 887 { 888 const char *v; 889 890 v = lookup_hash(msgid, db, rlen); 891 if (v) 892 return v; 893 894 return lookup_bsearch(msgid, db, rlen); 895 } 896 897 static const char * 898 get_lang_env(const char *category_name) 899 { 900 const char *lang; 901 902 /* 903 * 1. see LANGUAGE variable first. 904 * 905 * LANGUAGE is a GNU extension. 906 * It's a colon separated list of locale names. 907 */ 908 lang = getenv("LANGUAGE"); 909 if (lang) 910 return lang; 911 912 /* 913 * 2. if LANGUAGE isn't set, see LC_ALL, LC_xxx, LANG. 914 * 915 * It's essentially setlocale(LC_xxx, NULL). 916 */ 917 lang = getenv("LC_ALL"); 918 if (!lang) 919 lang = getenv(category_name); 920 if (!lang) 921 lang = getenv("LANG"); 922 923 if (!lang) 924 return 0; /* error */ 925 926 return split_locale(lang); 927 } 928 929 static const char * 930 get_indexed_string(const char *str, size_t len, unsigned long idx) 931 { 932 while (idx > 0) { 933 if (len <= 1) 934 return str; 935 if (*str == '\0') 936 idx--; 937 if (len > 0) { 938 str++; 939 len--; 940 } 941 } 942 return str; 943 } 944 945 #define _NGETTEXT_DEFAULT(msgid1, msgid2, n) \ 946 ((char *)__UNCONST((n) == 1 ? (msgid1) : (msgid2))) 947 948 char * 949 dcngettext(const char *domainname, const char *msgid1, const char *msgid2, 950 unsigned long int n, int category) 951 { 952 const char *msgid; 953 char path[PATH_MAX+1]; 954 const char *lpath; 955 static char olpath[PATH_MAX]; 956 const char *cname = NULL; 957 const char *v; 958 static char *ocname = NULL; 959 static char *odomainname = NULL; 960 struct domainbinding *db; 961 unsigned long plural_index = 0; 962 size_t len; 963 964 if (!domainname) 965 domainname = __current_domainname; 966 cname = lookup_category(category); 967 if (!domainname || !cname) 968 goto fail; 969 970 lpath = get_lang_env(cname); 971 if (!lpath) 972 goto fail; 973 974 for (db = __bindings; db; db = db->next) 975 if (strcmp(db->domainname, domainname) == 0) 976 break; 977 if (!db) { 978 if (!bindtextdomain(domainname, _PATH_TEXTDOMAIN)) 979 goto fail; 980 db = __bindings; 981 } 982 983 /* resolve relative path */ 984 /* XXX not necessary? */ 985 if (db->path[0] != '/') { 986 char buf[PATH_MAX]; 987 988 if (getcwd(buf, sizeof(buf)) == 0) 989 goto fail; 990 if (strlcat(buf, "/", sizeof(buf)) >= sizeof(buf)) 991 goto fail; 992 if (strlcat(buf, db->path, sizeof(buf)) >= sizeof(buf)) 993 goto fail; 994 strlcpy(db->path, buf, sizeof(db->path)); 995 } 996 997 /* don't bother looking it up if the values are the same */ 998 if (odomainname && strcmp(domainname, odomainname) == 0 && 999 ocname && strcmp(cname, ocname) == 0 && strcmp(lpath, olpath) == 0 && 1000 db->mohandle.mo.mo_magic) 1001 goto found; 1002 1003 /* try to find appropriate file, from $LANGUAGE */ 1004 if (lookup_mofile(path, sizeof(path), db->path, lpath, cname, 1005 domainname, db) == NULL) 1006 goto fail; 1007 1008 free(odomainname); 1009 free(ocname); 1010 1011 odomainname = strdup(domainname); 1012 ocname = strdup(cname); 1013 if (!odomainname || !ocname) { 1014 free(odomainname); 1015 free(ocname); 1016 1017 odomainname = ocname = NULL; 1018 } 1019 else 1020 strlcpy(olpath, lpath, sizeof(olpath)); 1021 1022 found: 1023 if (db->mohandle.mo.mo_plural) { 1024 plural_index = 1025 _gettext_calculate_plural(db->mohandle.mo.mo_plural, n); 1026 if (plural_index >= db->mohandle.mo.mo_nplurals) 1027 plural_index = 0; 1028 msgid = msgid1; 1029 } else 1030 msgid = _NGETTEXT_DEFAULT(msgid1, msgid2, n); 1031 1032 if (msgid == NULL) 1033 return NULL; 1034 1035 v = lookup(msgid, db, &len); 1036 if (v) { 1037 if (db->mohandle.mo.mo_plural) 1038 v = get_indexed_string(v, len, plural_index); 1039 /* 1040 * convert the translated message's encoding. 1041 * 1042 * special case: 1043 * a result of gettext("") shouldn't need any conversion. 1044 */ 1045 if (msgid[0]) 1046 v = __gettext_iconv(v, db); 1047 1048 /* 1049 * Given the amount of printf-format security issues, it may 1050 * be a good idea to validate if the original msgid and the 1051 * translated message format string carry the same printf-like 1052 * format identifiers. 1053 */ 1054 1055 msgid = v; 1056 } 1057 1058 return (char *)__UNCONST(msgid); 1059 1060 fail: 1061 return _NGETTEXT_DEFAULT(msgid1, msgid2, n); 1062 } 1063