1 /* $NetBSD: plural_parser.c,v 1.3 2019/10/03 16:37:45 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 */ 29 30 #include <sys/cdefs.h> 31 __RCSID("$NetBSD: plural_parser.c,v 1.3 2019/10/03 16:37:45 christos Exp $"); 32 33 #include <assert.h> 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <string.h> 37 #include <citrus/citrus_namespace.h> 38 #include <citrus/citrus_region.h> 39 #include <citrus/citrus_memstream.h> 40 #include <citrus/citrus_bcs.h> 41 #include "plural_parser.h" 42 43 #if defined(TEST_TOKENIZER) || defined(TEST_PARSER) 44 #define ALLOW_EMPTY 45 #define ALLOW_ARBITRARY_IDENTIFIER 46 #endif 47 48 #define MAX_LEN_ATOM 10 49 #define MAX_NUM_OPERANDS 3 50 51 #define T_EOF EOF 52 #define T_NONE 0x100 53 #define T_LAND 0x101 /* && */ 54 #define T_LOR 0x102 /* || */ 55 #define T_EQUALITY 0x103 /* == or != */ 56 #define T_RELATIONAL 0x104 /* <, >, <= or >= */ 57 #define T_ADDITIVE 0x105 /* + or - */ 58 #define T_MULTIPLICATIVE 0x106 /* *, / or % */ 59 #define T_IDENTIFIER 0x200 60 #define T_CONSTANT 0x201 61 #define T_ILCHAR 0x300 62 #define T_TOOLONG 0x301 63 #define T_ILTOKEN 0x302 64 #define T_ILEND 0x303 65 #define T_NOMEM 0x304 66 #define T_NOTFOUND 0x305 67 #define T_ILPLURAL 0x306 68 #define T_IS_OPERATOR(t) ((t) < 0x200) 69 #define T_IS_ERROR(t) ((t) >= 0x300) 70 71 #define OP_EQ ('='+'=') 72 #define OP_NEQ ('!'+'=') 73 #define OP_LTEQ ('<'+'=') 74 #define OP_GTEQ ('>'+'=') 75 76 #define PLURAL_NUMBER_SYMBOL "n" 77 #define NPLURALS_SYMBOL "nplurals" 78 #define LEN_NPLURAL_SYMBOL (sizeof (NPLURALS_SYMBOL) -1) 79 #define PLURAL_SYMBOL "plural" 80 #define LEN_PLURAL_SYMBOL (sizeof (PLURAL_SYMBOL) -1) 81 #define PLURAL_FORMS "Plural-Forms:" 82 #define LEN_PLURAL_FORMS (sizeof (PLURAL_FORMS) -1) 83 84 /* ---------------------------------------------------------------------- 85 * tokenizer part 86 */ 87 88 union token_data 89 { 90 unsigned long constant; 91 #ifdef ALLOW_ARBITRARY_IDENTIFIER 92 char identifier[MAX_LEN_ATOM+1]; 93 #endif 94 char op; 95 }; 96 97 struct tokenizer_context 98 { 99 struct _memstream memstream; 100 struct { 101 int token; 102 union token_data token_data; 103 } token0; 104 }; 105 106 /* initialize a tokenizer context */ 107 static void 108 init_tokenizer_context(struct tokenizer_context *tcx) 109 { 110 tcx->token0.token = T_NONE; 111 } 112 113 /* get an atom (identifier or constant) */ 114 static int 115 tokenize_atom(struct tokenizer_context *tcx, union token_data *token_data) 116 { 117 int ch, len; 118 char buf[MAX_LEN_ATOM+1]; 119 120 len = 0; 121 while (/*CONSTCOND*/1) { 122 ch = _memstream_getc(&tcx->memstream); 123 if (!(_bcs_isalnum(ch) || ch == '_')) { 124 _memstream_ungetc(&tcx->memstream, ch); 125 break; 126 } 127 if (len == MAX_LEN_ATOM) 128 return T_TOOLONG; 129 buf[len++] = ch; 130 } 131 buf[len] = '\0'; 132 if (len == 0) 133 return T_ILCHAR; 134 135 if (_bcs_isdigit((int)(unsigned char)buf[0])) { 136 unsigned long ul; 137 char *post; 138 ul = strtoul(buf, &post, 0); 139 if (buf+len != post) 140 return T_ILCHAR; 141 token_data->constant = ul; 142 return T_CONSTANT; 143 } 144 145 #ifdef ALLOW_ARBITRARY_IDENTIFIER 146 strcpy(token_data->identifier, buf); 147 return T_IDENTIFIER; 148 #else 149 if (!strcmp(buf, PLURAL_NUMBER_SYMBOL)) 150 return T_IDENTIFIER; 151 return T_ILCHAR; 152 #endif 153 } 154 155 /* tokenizer main routine */ 156 static int 157 tokenize(struct tokenizer_context *tcx, union token_data *token_data) 158 { 159 int ch, prevch; 160 161 retry: 162 ch = _memstream_getc(&tcx->memstream); 163 if (_bcs_isspace(ch)) 164 goto retry; 165 166 switch (ch) { 167 case T_EOF: 168 return ch; 169 case '+': case '-': 170 token_data->op = ch; 171 return T_ADDITIVE; 172 case '*': case '/': case '%': 173 token_data->op = ch; 174 return T_MULTIPLICATIVE; 175 case '?': case ':': case '(': case ')': 176 token_data->op = ch; 177 return ch; 178 case '&': case '|': 179 prevch = ch; 180 ch = _memstream_getc(&tcx->memstream); 181 if (ch != prevch) { 182 _memstream_ungetc(&tcx->memstream, ch); 183 return T_ILCHAR; 184 } 185 token_data->op = ch; 186 switch (ch) { 187 case '&': 188 return T_LAND; 189 case '|': 190 return T_LOR; 191 default: 192 return T_ILTOKEN; 193 } 194 case '=': case '!': case '<': case '>': 195 prevch = ch; 196 ch = _memstream_getc(&tcx->memstream); 197 if (ch != '=') { 198 _memstream_ungetc(&tcx->memstream, ch); 199 switch (prevch) { 200 case '=': 201 return T_ILCHAR; 202 case '!': 203 return '!'; 204 case '<': 205 case '>': 206 token_data->op = prevch; /* OP_LT or OP_GT */ 207 return T_RELATIONAL; 208 } 209 } 210 /* '==', '!=', '<=' or '>=' */ 211 token_data->op = ch+prevch; 212 switch (prevch) { 213 case '=': 214 case '!': 215 return T_EQUALITY; 216 case '<': 217 case '>': 218 return T_RELATIONAL; 219 } 220 /*NOTREACHED*/ 221 } 222 223 _memstream_ungetc(&tcx->memstream, ch); 224 return tokenize_atom(tcx, token_data); 225 } 226 227 /* get the next token */ 228 static int 229 get_token(struct tokenizer_context *tcx, union token_data *token_data) 230 { 231 if (tcx->token0.token != T_NONE) { 232 int token = tcx->token0.token; 233 tcx->token0.token = T_NONE; 234 *token_data = tcx->token0.token_data; 235 return token; 236 } 237 return tokenize(tcx, token_data); 238 } 239 240 /* push back the last token */ 241 static void 242 unget_token(struct tokenizer_context *tcx, 243 int token, union token_data *token_data) 244 { 245 tcx->token0.token = token; 246 tcx->token0.token_data = *token_data; 247 } 248 249 #ifdef TEST_TOKENIZER 250 251 int 252 main(int argc, char **argv) 253 { 254 struct tokenizer_context tcx; 255 union token_data token_data; 256 int token; 257 258 if (argc != 2) { 259 fprintf(stderr, "usage: %s <expression>\n", argv[0]); 260 return EXIT_FAILURE; 261 } 262 263 init_tokenizer_context(&tcx); 264 _memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1])); 265 266 while (1) { 267 token = get_token(&tcx, &token_data); 268 switch (token) { 269 case T_EOF: 270 goto quit; 271 case T_ILCHAR: 272 printf("illegal character.\n"); 273 goto quit; 274 case T_TOOLONG: 275 printf("too long atom.\n"); 276 goto quit; 277 case T_CONSTANT: 278 printf("constant: %lu\n", token_data.constant); 279 break; 280 case T_IDENTIFIER: 281 printf("symbol: %s\n", token_data.identifier); 282 break; 283 default: 284 printf("operator: "); 285 switch (token) { 286 case T_LAND: 287 printf("&&\n"); 288 break; 289 case T_LOR: 290 printf("||\n"); 291 break; 292 case T_EQUALITY: 293 printf("%c=\n", token_data.op-'='); 294 break; 295 case T_RELATIONAL: 296 switch(token_data.op) { 297 case OP_LTEQ: 298 case OP_GTEQ: 299 printf("%c=\n", token_data.op-'='); 300 break; 301 default: 302 printf("%c\n", token_data.op); 303 break; 304 } 305 break; 306 case T_ADDITIVE: 307 case T_MULTIPLICATIVE: 308 printf("%c\n", token_data.op); 309 break; 310 default: 311 printf("operator: %c\n", token); 312 } 313 } 314 } 315 quit: 316 return 0; 317 } 318 #endif /* TEST_TOKENIZER */ 319 320 321 /* ---------------------------------------------------------------------- 322 * parser part 323 * 324 * exp := cond 325 * 326 * cond := lor | lor '?' cond ':' cond 327 * 328 * lor := land ( '||' land )* 329 * 330 * land := equality ( '&&' equality )* 331 * 332 * equality := relational ( equalityops relational )* 333 * equalityops := '==' | '!=' 334 * 335 * relational := additive ( relationalops additive )* 336 * relationalops := '<' | '>' | '<=' | '>=' 337 * 338 * additive := multiplicative ( additiveops multiplicative )* 339 * additiveops := '+' | '-' 340 * 341 * multiplicative := lnot ( multiplicativeops lnot )* 342 * multiplicativeops := '*' | '/' | '%' 343 * 344 * lnot := '!' lnot | term 345 * 346 * term := literal | identifier | '(' exp ')' 347 * 348 */ 349 350 #define T_ENSURE_OK(token, label) \ 351 do { \ 352 if (T_IS_ERROR(token)) \ 353 goto label; \ 354 } while (/*CONSTCOND*/0) 355 #define T_ENSURE_SOMETHING(token, label) \ 356 do { \ 357 if ((token) == T_EOF) { \ 358 token = T_ILEND; \ 359 goto label; \ 360 } else if (T_IS_ERROR(token)) \ 361 goto label; \ 362 } while (/*CONSTCOND*/0) 363 364 #define parser_element plural_element 365 366 struct parser_element; 367 struct parser_op 368 { 369 char op; 370 struct parser_element *operands[MAX_NUM_OPERANDS]; 371 }; 372 struct parser_element 373 { 374 int kind; 375 union 376 { 377 struct parser_op parser_op; 378 union token_data token_data; 379 } u; 380 }; 381 382 struct parser_op2_transition 383 { 384 int kind; 385 const struct parser_op2_transition *next; 386 }; 387 388 /* prototypes */ 389 static int parse_cond(struct tokenizer_context *, struct parser_element *); 390 391 392 /* transition table for the 2-operand operators */ 393 #define DEF_TR(t, k, n) \ 394 static struct parser_op2_transition exp_tr_##t = { \ 395 k, &exp_tr_##n \ 396 } 397 #define DEF_TR0(t, k) \ 398 static struct parser_op2_transition exp_tr_##t = { \ 399 k, NULL /* expect lnot */ \ 400 } 401 402 DEF_TR0(multiplicative, T_MULTIPLICATIVE); 403 DEF_TR(additive, T_ADDITIVE, multiplicative); 404 DEF_TR(relational, T_RELATIONAL, additive); 405 DEF_TR(equality, T_EQUALITY, relational); 406 DEF_TR(land, T_LAND, equality); 407 DEF_TR(lor, T_LOR, land); 408 409 /* init a parser element structure */ 410 static void 411 init_parser_element(struct parser_element *pe) 412 { 413 int i; 414 415 pe->kind = T_NONE; 416 for (i=0; i<MAX_NUM_OPERANDS; i++) 417 pe->u.parser_op.operands[i] = NULL; 418 } 419 420 /* uninitialize a parser element structure with freeing children */ 421 static void free_parser_element(struct parser_element *); 422 static void 423 uninit_parser_element(struct parser_element *pe) 424 { 425 int i; 426 427 if (T_IS_OPERATOR(pe->kind)) 428 for (i=0; i<MAX_NUM_OPERANDS; i++) 429 if (pe->u.parser_op.operands[i]) 430 free_parser_element( 431 pe->u.parser_op.operands[i]); 432 } 433 434 /* free a parser element structure with freeing children */ 435 static void 436 free_parser_element(struct parser_element *pe) 437 { 438 if (pe) { 439 uninit_parser_element(pe); 440 free(pe); 441 } 442 } 443 444 445 /* copy a parser element structure shallowly */ 446 static void 447 copy_parser_element(struct parser_element *dpe, 448 const struct parser_element *spe) 449 { 450 memcpy(dpe, spe, sizeof *dpe); 451 } 452 453 /* duplicate a parser element structure shallowly */ 454 static struct parser_element * 455 dup_parser_element(const struct parser_element *pe) 456 { 457 struct parser_element *dpe = malloc(sizeof *dpe); 458 if (dpe) 459 copy_parser_element(dpe, pe); 460 return dpe; 461 } 462 463 /* term := identifier | constant | '(' exp ')' */ 464 static int 465 parse_term(struct tokenizer_context *tcx, struct parser_element *pelem) 466 { 467 struct parser_element pe0; 468 int token; 469 union token_data token_data; 470 471 token = get_token(tcx, &token_data); 472 switch (token) { 473 case '(': 474 /* '(' exp ')' */ 475 init_parser_element(&pe0); 476 /* expect exp */ 477 token = parse_cond(tcx, &pe0); 478 T_ENSURE_OK(token, err); 479 /* expect ')' */ 480 token = get_token(tcx, &token_data); 481 T_ENSURE_SOMETHING(token, err); 482 if (token != ')') { 483 unget_token(tcx, token, &token_data); 484 token = T_ILTOKEN; 485 goto err; 486 } 487 copy_parser_element(pelem, &pe0); 488 return token; 489 err: 490 uninit_parser_element(&pe0); 491 return token; 492 case T_IDENTIFIER: 493 case T_CONSTANT: 494 pelem->kind = token; 495 pelem->u.token_data = token_data; 496 return token; 497 case T_EOF: 498 return T_ILEND; 499 default: 500 return T_ILTOKEN; 501 } 502 } 503 504 /* lnot := '!' lnot | term */ 505 static int 506 parse_lnot(struct tokenizer_context *tcx, struct parser_element *pelem) 507 { 508 struct parser_element pe0; 509 int token; 510 union token_data token_data; 511 512 init_parser_element(&pe0); 513 514 /* '!' or not */ 515 token = get_token(tcx, &token_data); 516 if (token != '!') { 517 /* stop: term */ 518 unget_token(tcx, token, &token_data); 519 return parse_term(tcx, pelem); 520 } 521 522 /* '!' term */ 523 token = parse_lnot(tcx, &pe0); 524 T_ENSURE_OK(token, err); 525 526 pelem->kind = '!'; 527 pelem->u.parser_op.operands[0] = dup_parser_element(&pe0); 528 return pelem->kind; 529 err: 530 uninit_parser_element(&pe0); 531 return token; 532 } 533 534 /* ext_op := ext_next ( op ext_next )* */ 535 static int 536 parse_op2(struct tokenizer_context *tcx, struct parser_element *pelem, 537 const struct parser_op2_transition *tr) 538 { 539 struct parser_element pe0, pe1, peop; 540 int token; 541 union token_data token_data; 542 char op; 543 544 /* special case: expect lnot */ 545 if (tr == NULL) 546 return parse_lnot(tcx, pelem); 547 548 init_parser_element(&pe0); 549 init_parser_element(&pe1); 550 token = parse_op2(tcx, &pe0, tr->next); 551 T_ENSURE_OK(token, err); 552 553 while (/*CONSTCOND*/1) { 554 /* expect op or empty */ 555 token = get_token(tcx, &token_data); 556 if (token != tr->kind) { 557 /* stop */ 558 unget_token(tcx, token, &token_data); 559 copy_parser_element(pelem, &pe0); 560 break; 561 } 562 op = token_data.op; 563 /* right hand */ 564 token = parse_op2(tcx, &pe1, tr->next); 565 T_ENSURE_OK(token, err); 566 567 init_parser_element(&peop); 568 peop.kind = tr->kind; 569 peop.u.parser_op.op = op; 570 peop.u.parser_op.operands[0] = dup_parser_element(&pe0); 571 init_parser_element(&pe0); 572 peop.u.parser_op.operands[1] = dup_parser_element(&pe1); 573 init_parser_element(&pe1); 574 copy_parser_element(&pe0, &peop); 575 } 576 return pelem->kind; 577 err: 578 uninit_parser_element(&pe1); 579 uninit_parser_element(&pe0); 580 return token; 581 } 582 583 /* cond := lor | lor '?' cond ':' cond */ 584 static int 585 parse_cond(struct tokenizer_context *tcx, struct parser_element *pelem) 586 { 587 struct parser_element pe0, pe1, pe2; 588 int token; 589 union token_data token_data; 590 591 init_parser_element(&pe0); 592 init_parser_element(&pe1); 593 init_parser_element(&pe2); 594 595 /* expect lor or empty */ 596 token = parse_op2(tcx, &pe0, &exp_tr_lor); 597 T_ENSURE_OK(token, err); 598 599 /* '?' or not */ 600 token = get_token(tcx, &token_data); 601 if (token != '?') { 602 /* stop: lor */ 603 unget_token(tcx, token, &token_data); 604 copy_parser_element(pelem, &pe0); 605 return pe0.kind; 606 } 607 608 /* lor '?' cond ':' cond */ 609 /* expect cond */ 610 token = parse_cond(tcx, &pe1); 611 T_ENSURE_OK(token, err); 612 613 /* expect ':' */ 614 token = get_token(tcx, &token_data); 615 T_ENSURE_OK(token, err); 616 if (token != ':') { 617 unget_token(tcx, token, &token_data); 618 token = T_ILTOKEN; 619 goto err; 620 } 621 622 /* expect cond */ 623 token = parse_cond(tcx, &pe2); 624 T_ENSURE_OK(token, err); 625 626 pelem->kind = '?'; 627 pelem->u.parser_op.operands[0] = dup_parser_element(&pe0); 628 pelem->u.parser_op.operands[1] = dup_parser_element(&pe1); 629 pelem->u.parser_op.operands[2] = dup_parser_element(&pe2); 630 return pelem->kind; 631 err: 632 uninit_parser_element(&pe2); 633 uninit_parser_element(&pe1); 634 uninit_parser_element(&pe0); 635 return token; 636 } 637 638 static int 639 parse_exp(struct tokenizer_context *tcx, struct parser_element *pelem) 640 { 641 int token, token1; 642 union token_data token_data; 643 644 #ifdef ALLOW_EMPTY 645 /* empty check */ 646 token = get_token(tcx, &token_data); 647 if (token == T_EOF) 648 return token; 649 unget_token(tcx, token, &token_data); 650 #endif 651 652 token = parse_cond(tcx, pelem); 653 if (!T_IS_ERROR(token)) { 654 /* termination check */ 655 token1 = get_token(tcx, &token_data); 656 if (token1 == T_EOF) 657 return token; 658 else if (!T_IS_ERROR(token)) 659 unget_token(tcx, token1, &token_data); 660 return T_ILTOKEN; 661 } 662 return token; 663 } 664 665 666 #if defined(TEST_PARSER) || defined(TEST_PARSE_PLURAL) 667 #include <stdio.h> 668 669 static void dump_elem(struct parser_element *); 670 671 static void 672 dump_op2(struct parser_element *pelem) 673 { 674 dump_elem(pelem->u.parser_op.operands[0]); 675 printf(" "); 676 dump_elem(pelem->u.parser_op.operands[1]); 677 printf(")"); 678 } 679 680 static void 681 dump_op3(struct parser_element *pelem) 682 { 683 dump_elem(pelem->u.parser_op.operands[0]); 684 printf(" "); 685 dump_elem(pelem->u.parser_op.operands[1]); 686 printf(" "); 687 dump_elem(pelem->u.parser_op.operands[2]); 688 printf(")"); 689 } 690 691 static void 692 dump_elem(struct parser_element *pelem) 693 { 694 switch (pelem->kind) { 695 case T_LAND: 696 printf("(&& "); 697 dump_op2(pelem); 698 break; 699 case T_LOR: 700 printf("(|| "); 701 dump_op2(pelem); 702 break; 703 case T_EQUALITY: 704 switch (pelem->u.parser_op.op) { 705 case OP_EQ: 706 printf("(== "); 707 break; 708 case OP_NEQ: 709 printf("(!= "); 710 break; 711 } 712 dump_op2(pelem); 713 break; 714 case T_RELATIONAL: 715 switch (pelem->u.parser_op.op) { 716 case '<': 717 case '>': 718 printf("(%c ", pelem->u.parser_op.op); 719 break; 720 case OP_LTEQ: 721 case OP_GTEQ: 722 printf("(%c= ", pelem->u.parser_op.op-'='); 723 break; 724 } 725 dump_op2(pelem); 726 break; 727 case T_ADDITIVE: 728 case T_MULTIPLICATIVE: 729 printf("(%c ", pelem->u.parser_op.op); 730 dump_op2(pelem); 731 break; 732 case '!': 733 printf("(! "); 734 dump_elem(pelem->u.parser_op.operands[0]); 735 printf(")"); 736 break; 737 case '?': 738 printf("(? "); 739 dump_op3(pelem); 740 break; 741 case T_CONSTANT: 742 printf("%d", pelem->u.token_data.constant); 743 break; 744 case T_IDENTIFIER: 745 #ifdef ALLOW_ARBITRARY_IDENTIFIER 746 printf("%s", pelem->u.token_data.identifier); 747 #else 748 printf(PLURAL_NUMBER_SYMBOL); 749 #endif 750 break; 751 } 752 } 753 #endif 754 #ifdef TEST_PARSER 755 int 756 main(int argc, char **argv) 757 { 758 struct tokenizer_context tcx; 759 struct parser_element pelem; 760 int token; 761 762 if (argc != 2) { 763 fprintf(stderr, "usage: %s <expression>\n", argv[0]); 764 return EXIT_FAILURE; 765 } 766 767 init_tokenizer_context(&tcx); 768 _memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1])); 769 770 init_parser_element(&pelem); 771 token = parse_exp(&tcx, &pelem); 772 773 if (token == T_EOF) 774 printf("none"); 775 else if (T_IS_ERROR(token)) 776 printf("error: 0x%X", token); 777 else 778 dump_elem(&pelem); 779 printf("\n"); 780 781 uninit_parser_element(&pelem); 782 783 return EXIT_SUCCESS; 784 } 785 #endif /* TEST_PARSER */ 786 787 /* ---------------------------------------------------------------------- 788 * calcurate plural number 789 */ 790 static unsigned long 791 calculate_plural(const struct parser_element *pe, unsigned long n) 792 { 793 unsigned long val0, val1; 794 switch (pe->kind) { 795 case T_IDENTIFIER: 796 return n; 797 case T_CONSTANT: 798 return pe->u.token_data.constant; 799 case '?': 800 val0 = calculate_plural(pe->u.parser_op.operands[0], n); 801 if (val0) 802 val1=calculate_plural(pe->u.parser_op.operands[1], n); 803 else 804 val1=calculate_plural(pe->u.parser_op.operands[2], n); 805 return val1; 806 case '!': 807 return !calculate_plural(pe->u.parser_op.operands[0], n); 808 case T_MULTIPLICATIVE: 809 case T_ADDITIVE: 810 case T_RELATIONAL: 811 case T_EQUALITY: 812 case T_LOR: 813 case T_LAND: 814 val0 = calculate_plural(pe->u.parser_op.operands[0], n); 815 val1 = calculate_plural(pe->u.parser_op.operands[1], n); 816 switch (pe->u.parser_op.op) { 817 case '*': 818 return val0*val1; 819 case '/': 820 return val0/val1; 821 case '%': 822 return val0%val1; 823 case '+': 824 return val0+val1; 825 case '-': 826 return val0-val1; 827 case '<': 828 return val0<val1; 829 case '>': 830 return val0>val1; 831 case OP_LTEQ: 832 return val0<=val1; 833 case OP_GTEQ: 834 return val0>=val1; 835 case OP_EQ: 836 return val0==val1; 837 case OP_NEQ: 838 return val0!=val1; 839 case '|': 840 return val0||val1; 841 case '&': 842 return val0&&val1; 843 } 844 } 845 return 0; 846 } 847 848 #ifdef TEST_CALC_PLURAL 849 #include <stdio.h> 850 851 int 852 main(int argc, char **argv) 853 { 854 struct tokenizer_context tcx; 855 struct parser_element pelem; 856 int token; 857 858 if (argc != 3) { 859 fprintf(stderr, "usage: %s <expression> <n>\n", argv[0]); 860 return EXIT_FAILURE; 861 } 862 863 init_tokenizer_context(&tcx); 864 _memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1])); 865 866 init_parser_element(&pelem); 867 token = parse_exp(&tcx, &pelem); 868 869 if (token == T_EOF) 870 printf("none"); 871 else if (T_IS_ERROR(token)) 872 printf("error: 0x%X", token); 873 else { 874 printf("plural = %lu", 875 calculate_plural(&pelem, atoi(argv[2]))); 876 } 877 printf("\n"); 878 879 uninit_parser_element(&pelem); 880 881 return EXIT_SUCCESS; 882 } 883 #endif /* TEST_CALC_PLURAL */ 884 885 886 /* ---------------------------------------------------------------------- 887 * parse plural forms 888 */ 889 890 static void 891 region_skip_ws(struct _region *r) 892 { 893 const char *str = _region_head(r); 894 size_t len = _region_size(r); 895 896 str = _bcs_skip_ws_len(str, &len); 897 _region_init(r, __UNCONST(str), len); 898 } 899 900 static void 901 region_trunc_rws(struct _region *r) 902 { 903 const char *str = _region_head(r); 904 size_t len = _region_size(r); 905 906 _bcs_trunc_rws_len(str, &len); 907 _region_init(r, __UNCONST(str), len); 908 } 909 910 static int 911 region_check_prefix(struct _region *r, const char *pre, size_t prelen, 912 int ignorecase) 913 { 914 if (_region_size(r) < prelen) 915 return -1; 916 917 if (ignorecase) { 918 if (_bcs_strncasecmp(_region_head(r), pre, prelen)) 919 return -1; 920 } else { 921 if (memcmp(_region_head(r), pre, prelen)) 922 return -1; 923 } 924 return 0; 925 } 926 927 static int 928 cut_trailing_semicolon(struct _region *r) 929 { 930 931 region_trunc_rws(r); 932 if (_region_size(r) == 0 || _region_peek8(r, _region_size(r)-1) != ';') 933 return -1; 934 _region_get_subregion(r, r, 0, _region_size(r)-1); 935 return 0; 936 } 937 938 static int 939 find_plural_forms(struct _region *r) 940 { 941 struct _memstream ms; 942 struct _region rr; 943 944 _memstream_bind(&ms, r); 945 946 while (!_memstream_getln_region(&ms, &rr)) { 947 if (!region_check_prefix(&rr, 948 PLURAL_FORMS, LEN_PLURAL_FORMS, 1)) { 949 _region_get_subregion( 950 r, &rr, LEN_PLURAL_FORMS, 951 _region_size(&rr)-LEN_PLURAL_FORMS); 952 region_skip_ws(r); 953 region_trunc_rws(r); 954 return 0; 955 } 956 } 957 return -1; 958 } 959 960 static int 961 skip_assignment(struct _region *r, const char *sym, size_t symlen) 962 { 963 region_skip_ws(r); 964 if (region_check_prefix(r, sym, symlen, 0)) 965 return -1; 966 _region_get_subregion(r, r, symlen, _region_size(r)-symlen); 967 region_skip_ws(r); 968 if (_region_size(r) == 0 || _region_peek8(r, 0) != '=') 969 return -1; 970 _region_get_subregion(r, r, 1, _region_size(r)-1); 971 region_skip_ws(r); 972 return 0; 973 } 974 975 static int 976 skip_nplurals(struct _region *r, unsigned long *rnp) 977 { 978 unsigned long np; 979 char buf[MAX_LEN_ATOM+2], *endptr; 980 const char *endptrconst; 981 size_t ofs; 982 983 if (skip_assignment(r, NPLURALS_SYMBOL, LEN_NPLURAL_SYMBOL)) 984 return -1; 985 if (_region_size(r) == 0 || !_bcs_isdigit(_region_peek8(r, 0))) 986 return -1; 987 strlcpy(buf, _region_head(r), sizeof (buf)); 988 np = strtoul(buf, &endptr, 0); 989 endptrconst = _bcs_skip_ws(endptr); 990 if (*endptrconst != ';') 991 return -1; 992 ofs = endptrconst+1-buf; 993 if (_region_get_subregion(r, r, ofs, _region_size(r)-ofs)) 994 return -1; 995 if (rnp) 996 *rnp = np; 997 return 0; 998 } 999 1000 static int 1001 parse_plural_body(struct _region *r, struct parser_element **rpe) 1002 { 1003 int token; 1004 struct tokenizer_context tcx; 1005 struct parser_element pelem, *ppe; 1006 1007 init_tokenizer_context(&tcx); 1008 _memstream_bind(&tcx.memstream, r); 1009 1010 init_parser_element(&pelem); 1011 token = parse_exp(&tcx, &pelem); 1012 if (T_IS_ERROR(token)) 1013 return token; 1014 1015 ppe = dup_parser_element(&pelem); 1016 if (ppe == NULL) { 1017 uninit_parser_element(&pelem); 1018 return T_NOMEM; 1019 } 1020 1021 *rpe = ppe; 1022 1023 return 0; 1024 } 1025 1026 static int 1027 parse_plural(struct parser_element **rpe, unsigned long *rnp, 1028 const char *str, size_t len) 1029 { 1030 struct _region r; 1031 1032 _region_init(&r, __UNCONST(str), len); 1033 1034 if (find_plural_forms(&r)) 1035 return T_NOTFOUND; 1036 if (skip_nplurals(&r, rnp)) 1037 return T_ILPLURAL; 1038 if (skip_assignment(&r, PLURAL_SYMBOL, LEN_PLURAL_SYMBOL)) 1039 return T_ILPLURAL; 1040 if (cut_trailing_semicolon(&r)) 1041 return T_ILPLURAL; 1042 return parse_plural_body(&r, rpe); 1043 } 1044 1045 #ifdef TEST_PARSE_PLURAL 1046 int 1047 main(int argc, char **argv) 1048 { 1049 int ret; 1050 struct parser_element *pelem; 1051 unsigned long np; 1052 1053 if (argc != 2 && argc != 3) { 1054 fprintf(stderr, "usage: %s <mime-header> [n]\n", argv[0]); 1055 return EXIT_FAILURE; 1056 } 1057 1058 ret = parse_plural(&pelem, &np, argv[1], strlen(argv[1])); 1059 1060 if (ret == T_EOF) 1061 printf("none"); 1062 else if (T_IS_ERROR(ret)) 1063 printf("error: 0x%X", ret); 1064 else { 1065 printf("syntax tree: "); 1066 dump_elem(pelem); 1067 printf("\nnplurals = %lu", np); 1068 if (argv[2]) 1069 printf(", plural = %lu", 1070 calculate_plural(pelem, atoi(argv[2]))); 1071 free_parser_element(pelem); 1072 } 1073 printf("\n"); 1074 1075 1076 return EXIT_SUCCESS; 1077 } 1078 #endif /* TEST_PARSE_PLURAL */ 1079 1080 /* 1081 * external interface 1082 */ 1083 1084 int 1085 _gettext_parse_plural(struct gettext_plural **rpe, unsigned long *rnp, 1086 const char *str, size_t len) 1087 { 1088 return parse_plural((struct parser_element **)rpe, rnp, str, len); 1089 } 1090 1091 unsigned long 1092 _gettext_calculate_plural(const struct gettext_plural *pe, unsigned long n) 1093 { 1094 return calculate_plural((void *)__UNCONST(pe), n); 1095 } 1096 1097 void 1098 _gettext_free_plural(struct gettext_plural *pe) 1099 { 1100 free_parser_element((void *)pe); 1101 } 1102 1103 #ifdef TEST_PLURAL 1104 #include <libintl.h> 1105 #include <locale.h> 1106 1107 #define PR(n) printf("n=%d: \"%s\"\n", n, dngettext("test", "1", "2", n)) 1108 1109 int 1110 main(void) 1111 { 1112 bindtextdomain("test", "."); /* ./LANG/LC_MESSAGES/test.mo */ 1113 PR(1); 1114 PR(2); 1115 PR(3); 1116 PR(4); 1117 1118 return 0; 1119 } 1120 #endif 1121