1 /* $NetBSD: plural_parser.c,v 1.4 2022/04/19 20:32:16 rillig Exp $ */
2
3 /*-
4 * Copyright (c) 2005 Citrus Project,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 */
29
30 #include <sys/cdefs.h>
31 __RCSID("$NetBSD: plural_parser.c,v 1.4 2022/04/19 20:32:16 rillig Exp $");
32
33 #include <assert.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <citrus/citrus_namespace.h>
38 #include <citrus/citrus_region.h>
39 #include <citrus/citrus_memstream.h>
40 #include <citrus/citrus_bcs.h>
41 #include "plural_parser.h"
42
43 #if defined(TEST_TOKENIZER) || defined(TEST_PARSER)
44 #define ALLOW_EMPTY
45 #define ALLOW_ARBITRARY_IDENTIFIER
46 #endif
47
48 #define MAX_LEN_ATOM 10
49 #define MAX_NUM_OPERANDS 3
50
51 #define T_EOF EOF
52 #define T_NONE 0x100
53 #define T_LAND 0x101 /* && */
54 #define T_LOR 0x102 /* || */
55 #define T_EQUALITY 0x103 /* == or != */
56 #define T_RELATIONAL 0x104 /* <, >, <= or >= */
57 #define T_ADDITIVE 0x105 /* + or - */
58 #define T_MULTIPLICATIVE 0x106 /* *, / or % */
59 #define T_IDENTIFIER 0x200
60 #define T_CONSTANT 0x201
61 #define T_ILCHAR 0x300
62 #define T_TOOLONG 0x301
63 #define T_ILTOKEN 0x302
64 #define T_ILEND 0x303
65 #define T_NOMEM 0x304
66 #define T_NOTFOUND 0x305
67 #define T_ILPLURAL 0x306
68 #define T_IS_OPERATOR(t) ((t) < 0x200)
69 #define T_IS_ERROR(t) ((t) >= 0x300)
70
71 #define OP_EQ ('='+'=')
72 #define OP_NEQ ('!'+'=')
73 #define OP_LTEQ ('<'+'=')
74 #define OP_GTEQ ('>'+'=')
75
76 #define PLURAL_NUMBER_SYMBOL "n"
77 #define NPLURALS_SYMBOL "nplurals"
78 #define LEN_NPLURAL_SYMBOL (sizeof (NPLURALS_SYMBOL) -1)
79 #define PLURAL_SYMBOL "plural"
80 #define LEN_PLURAL_SYMBOL (sizeof (PLURAL_SYMBOL) -1)
81 #define PLURAL_FORMS "Plural-Forms:"
82 #define LEN_PLURAL_FORMS (sizeof (PLURAL_FORMS) -1)
83
84 /* ----------------------------------------------------------------------
85 * tokenizer part
86 */
87
88 union token_data
89 {
90 unsigned long constant;
91 #ifdef ALLOW_ARBITRARY_IDENTIFIER
92 char identifier[MAX_LEN_ATOM+1];
93 #endif
94 char op;
95 };
96
97 struct tokenizer_context
98 {
99 struct _memstream memstream;
100 struct {
101 int token;
102 union token_data token_data;
103 } token0;
104 };
105
106 /* initialize a tokenizer context */
107 static void
init_tokenizer_context(struct tokenizer_context * tcx)108 init_tokenizer_context(struct tokenizer_context *tcx)
109 {
110 tcx->token0.token = T_NONE;
111 }
112
113 /* get an atom (identifier or constant) */
114 static int
tokenize_atom(struct tokenizer_context * tcx,union token_data * token_data)115 tokenize_atom(struct tokenizer_context *tcx, union token_data *token_data)
116 {
117 int ch, len;
118 char buf[MAX_LEN_ATOM+1];
119
120 len = 0;
121 while (/*CONSTCOND*/1) {
122 ch = _memstream_getc(&tcx->memstream);
123 if (!(_bcs_isalnum(ch) || ch == '_')) {
124 _memstream_ungetc(&tcx->memstream, ch);
125 break;
126 }
127 if (len == MAX_LEN_ATOM)
128 return T_TOOLONG;
129 buf[len++] = ch;
130 }
131 buf[len] = '\0';
132 if (len == 0)
133 return T_ILCHAR;
134
135 if (_bcs_isdigit((int)(unsigned char)buf[0])) {
136 unsigned long ul;
137 char *post;
138 ul = strtoul(buf, &post, 0);
139 if (buf+len != post)
140 return T_ILCHAR;
141 token_data->constant = ul;
142 return T_CONSTANT;
143 }
144
145 #ifdef ALLOW_ARBITRARY_IDENTIFIER
146 strcpy(token_data->identifier, buf);
147 return T_IDENTIFIER;
148 #else
149 if (!strcmp(buf, PLURAL_NUMBER_SYMBOL))
150 return T_IDENTIFIER;
151 return T_ILCHAR;
152 #endif
153 }
154
155 /* tokenizer main routine */
156 static int
tokenize(struct tokenizer_context * tcx,union token_data * token_data)157 tokenize(struct tokenizer_context *tcx, union token_data *token_data)
158 {
159 int ch, prevch;
160
161 retry:
162 ch = _memstream_getc(&tcx->memstream);
163 if (_bcs_isspace(ch))
164 goto retry;
165
166 switch (ch) {
167 case T_EOF:
168 return ch;
169 case '+': case '-':
170 token_data->op = ch;
171 return T_ADDITIVE;
172 case '*': case '/': case '%':
173 token_data->op = ch;
174 return T_MULTIPLICATIVE;
175 case '?': case ':': case '(': case ')':
176 token_data->op = ch;
177 return ch;
178 case '&': case '|':
179 prevch = ch;
180 ch = _memstream_getc(&tcx->memstream);
181 if (ch != prevch) {
182 _memstream_ungetc(&tcx->memstream, ch);
183 return T_ILCHAR;
184 }
185 token_data->op = ch;
186 switch (ch) {
187 case '&':
188 return T_LAND;
189 case '|':
190 return T_LOR;
191 default:
192 return T_ILTOKEN;
193 }
194 case '=': case '!': case '<': case '>':
195 prevch = ch;
196 ch = _memstream_getc(&tcx->memstream);
197 if (ch != '=') {
198 _memstream_ungetc(&tcx->memstream, ch);
199 switch (prevch) {
200 case '=':
201 return T_ILCHAR;
202 case '!':
203 return '!';
204 case '<':
205 case '>':
206 token_data->op = prevch; /* OP_LT or OP_GT */
207 return T_RELATIONAL;
208 }
209 }
210 /* '==', '!=', '<=' or '>=' */
211 token_data->op = ch+prevch;
212 switch (prevch) {
213 case '=':
214 case '!':
215 return T_EQUALITY;
216 case '<':
217 case '>':
218 return T_RELATIONAL;
219 }
220 /*NOTREACHED*/
221 }
222
223 _memstream_ungetc(&tcx->memstream, ch);
224 return tokenize_atom(tcx, token_data);
225 }
226
227 /* get the next token */
228 static int
get_token(struct tokenizer_context * tcx,union token_data * token_data)229 get_token(struct tokenizer_context *tcx, union token_data *token_data)
230 {
231 if (tcx->token0.token != T_NONE) {
232 int token = tcx->token0.token;
233 tcx->token0.token = T_NONE;
234 *token_data = tcx->token0.token_data;
235 return token;
236 }
237 return tokenize(tcx, token_data);
238 }
239
240 /* push back the last token */
241 static void
unget_token(struct tokenizer_context * tcx,int token,union token_data * token_data)242 unget_token(struct tokenizer_context *tcx,
243 int token, union token_data *token_data)
244 {
245 tcx->token0.token = token;
246 tcx->token0.token_data = *token_data;
247 }
248
249 #ifdef TEST_TOKENIZER
250
251 int
main(int argc,char ** argv)252 main(int argc, char **argv)
253 {
254 struct tokenizer_context tcx;
255 union token_data token_data;
256 int token;
257
258 if (argc != 2) {
259 fprintf(stderr, "usage: %s <expression>\n", argv[0]);
260 return EXIT_FAILURE;
261 }
262
263 init_tokenizer_context(&tcx);
264 _memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1]));
265
266 while (1) {
267 token = get_token(&tcx, &token_data);
268 switch (token) {
269 case T_EOF:
270 goto quit;
271 case T_ILCHAR:
272 printf("illegal character.\n");
273 goto quit;
274 case T_TOOLONG:
275 printf("too long atom.\n");
276 goto quit;
277 case T_CONSTANT:
278 printf("constant: %lu\n", token_data.constant);
279 break;
280 case T_IDENTIFIER:
281 printf("symbol: %s\n", token_data.identifier);
282 break;
283 default:
284 printf("operator: ");
285 switch (token) {
286 case T_LAND:
287 printf("&&\n");
288 break;
289 case T_LOR:
290 printf("||\n");
291 break;
292 case T_EQUALITY:
293 printf("%c=\n", token_data.op-'=');
294 break;
295 case T_RELATIONAL:
296 switch(token_data.op) {
297 case OP_LTEQ:
298 case OP_GTEQ:
299 printf("%c=\n", token_data.op-'=');
300 break;
301 default:
302 printf("%c\n", token_data.op);
303 break;
304 }
305 break;
306 case T_ADDITIVE:
307 case T_MULTIPLICATIVE:
308 printf("%c\n", token_data.op);
309 break;
310 default:
311 printf("operator: %c\n", token);
312 }
313 }
314 }
315 quit:
316 return 0;
317 }
318 #endif /* TEST_TOKENIZER */
319
320
321 /* ----------------------------------------------------------------------
322 * parser part
323 *
324 * exp := cond
325 *
326 * cond := lor | lor '?' cond ':' cond
327 *
328 * lor := land ( '||' land )*
329 *
330 * land := equality ( '&&' equality )*
331 *
332 * equality := relational ( equalityops relational )*
333 * equalityops := '==' | '!='
334 *
335 * relational := additive ( relationalops additive )*
336 * relationalops := '<' | '>' | '<=' | '>='
337 *
338 * additive := multiplicative ( additiveops multiplicative )*
339 * additiveops := '+' | '-'
340 *
341 * multiplicative := lnot ( multiplicativeops lnot )*
342 * multiplicativeops := '*' | '/' | '%'
343 *
344 * lnot := '!' lnot | term
345 *
346 * term := literal | identifier | '(' exp ')'
347 *
348 */
349
350 #define T_ENSURE_OK(token, label) \
351 do { \
352 if (T_IS_ERROR(token)) \
353 goto label; \
354 } while (0)
355 #define T_ENSURE_SOMETHING(token, label) \
356 do { \
357 if ((token) == T_EOF) { \
358 token = T_ILEND; \
359 goto label; \
360 } else if (T_IS_ERROR(token)) \
361 goto label; \
362 } while (0)
363
364 #define parser_element plural_element
365
366 struct parser_element;
367 struct parser_op
368 {
369 char op;
370 struct parser_element *operands[MAX_NUM_OPERANDS];
371 };
372 struct parser_element
373 {
374 int kind;
375 union
376 {
377 struct parser_op parser_op;
378 union token_data token_data;
379 } u;
380 };
381
382 struct parser_op2_transition
383 {
384 int kind;
385 const struct parser_op2_transition *next;
386 };
387
388 /* prototypes */
389 static int parse_cond(struct tokenizer_context *, struct parser_element *);
390
391
392 /* transition table for the 2-operand operators */
393 #define DEF_TR(t, k, n) \
394 static struct parser_op2_transition exp_tr_##t = { \
395 k, &exp_tr_##n \
396 }
397 #define DEF_TR0(t, k) \
398 static struct parser_op2_transition exp_tr_##t = { \
399 k, NULL /* expect lnot */ \
400 }
401
402 DEF_TR0(multiplicative, T_MULTIPLICATIVE);
403 DEF_TR(additive, T_ADDITIVE, multiplicative);
404 DEF_TR(relational, T_RELATIONAL, additive);
405 DEF_TR(equality, T_EQUALITY, relational);
406 DEF_TR(land, T_LAND, equality);
407 DEF_TR(lor, T_LOR, land);
408
409 /* init a parser element structure */
410 static void
init_parser_element(struct parser_element * pe)411 init_parser_element(struct parser_element *pe)
412 {
413 int i;
414
415 pe->kind = T_NONE;
416 for (i=0; i<MAX_NUM_OPERANDS; i++)
417 pe->u.parser_op.operands[i] = NULL;
418 }
419
420 /* uninitialize a parser element structure with freeing children */
421 static void free_parser_element(struct parser_element *);
422 static void
uninit_parser_element(struct parser_element * pe)423 uninit_parser_element(struct parser_element *pe)
424 {
425 int i;
426
427 if (T_IS_OPERATOR(pe->kind))
428 for (i=0; i<MAX_NUM_OPERANDS; i++)
429 if (pe->u.parser_op.operands[i])
430 free_parser_element(
431 pe->u.parser_op.operands[i]);
432 }
433
434 /* free a parser element structure with freeing children */
435 static void
free_parser_element(struct parser_element * pe)436 free_parser_element(struct parser_element *pe)
437 {
438 if (pe) {
439 uninit_parser_element(pe);
440 free(pe);
441 }
442 }
443
444
445 /* copy a parser element structure shallowly */
446 static void
copy_parser_element(struct parser_element * dpe,const struct parser_element * spe)447 copy_parser_element(struct parser_element *dpe,
448 const struct parser_element *spe)
449 {
450 memcpy(dpe, spe, sizeof *dpe);
451 }
452
453 /* duplicate a parser element structure shallowly */
454 static struct parser_element *
dup_parser_element(const struct parser_element * pe)455 dup_parser_element(const struct parser_element *pe)
456 {
457 struct parser_element *dpe = malloc(sizeof *dpe);
458 if (dpe)
459 copy_parser_element(dpe, pe);
460 return dpe;
461 }
462
463 /* term := identifier | constant | '(' exp ')' */
464 static int
parse_term(struct tokenizer_context * tcx,struct parser_element * pelem)465 parse_term(struct tokenizer_context *tcx, struct parser_element *pelem)
466 {
467 struct parser_element pe0;
468 int token;
469 union token_data token_data;
470
471 token = get_token(tcx, &token_data);
472 switch (token) {
473 case '(':
474 /* '(' exp ')' */
475 init_parser_element(&pe0);
476 /* expect exp */
477 token = parse_cond(tcx, &pe0);
478 T_ENSURE_OK(token, err);
479 /* expect ')' */
480 token = get_token(tcx, &token_data);
481 T_ENSURE_SOMETHING(token, err);
482 if (token != ')') {
483 unget_token(tcx, token, &token_data);
484 token = T_ILTOKEN;
485 goto err;
486 }
487 copy_parser_element(pelem, &pe0);
488 return token;
489 err:
490 uninit_parser_element(&pe0);
491 return token;
492 case T_IDENTIFIER:
493 case T_CONSTANT:
494 pelem->kind = token;
495 pelem->u.token_data = token_data;
496 return token;
497 case T_EOF:
498 return T_ILEND;
499 default:
500 return T_ILTOKEN;
501 }
502 }
503
504 /* lnot := '!' lnot | term */
505 static int
parse_lnot(struct tokenizer_context * tcx,struct parser_element * pelem)506 parse_lnot(struct tokenizer_context *tcx, struct parser_element *pelem)
507 {
508 struct parser_element pe0;
509 int token;
510 union token_data token_data;
511
512 init_parser_element(&pe0);
513
514 /* '!' or not */
515 token = get_token(tcx, &token_data);
516 if (token != '!') {
517 /* stop: term */
518 unget_token(tcx, token, &token_data);
519 return parse_term(tcx, pelem);
520 }
521
522 /* '!' term */
523 token = parse_lnot(tcx, &pe0);
524 T_ENSURE_OK(token, err);
525
526 pelem->kind = '!';
527 pelem->u.parser_op.operands[0] = dup_parser_element(&pe0);
528 return pelem->kind;
529 err:
530 uninit_parser_element(&pe0);
531 return token;
532 }
533
534 /* ext_op := ext_next ( op ext_next )* */
535 static int
parse_op2(struct tokenizer_context * tcx,struct parser_element * pelem,const struct parser_op2_transition * tr)536 parse_op2(struct tokenizer_context *tcx, struct parser_element *pelem,
537 const struct parser_op2_transition *tr)
538 {
539 struct parser_element pe0, pe1, peop;
540 int token;
541 union token_data token_data;
542 char op;
543
544 /* special case: expect lnot */
545 if (tr == NULL)
546 return parse_lnot(tcx, pelem);
547
548 init_parser_element(&pe0);
549 init_parser_element(&pe1);
550 token = parse_op2(tcx, &pe0, tr->next);
551 T_ENSURE_OK(token, err);
552
553 while (/*CONSTCOND*/1) {
554 /* expect op or empty */
555 token = get_token(tcx, &token_data);
556 if (token != tr->kind) {
557 /* stop */
558 unget_token(tcx, token, &token_data);
559 copy_parser_element(pelem, &pe0);
560 break;
561 }
562 op = token_data.op;
563 /* right hand */
564 token = parse_op2(tcx, &pe1, tr->next);
565 T_ENSURE_OK(token, err);
566
567 init_parser_element(&peop);
568 peop.kind = tr->kind;
569 peop.u.parser_op.op = op;
570 peop.u.parser_op.operands[0] = dup_parser_element(&pe0);
571 init_parser_element(&pe0);
572 peop.u.parser_op.operands[1] = dup_parser_element(&pe1);
573 init_parser_element(&pe1);
574 copy_parser_element(&pe0, &peop);
575 }
576 return pelem->kind;
577 err:
578 uninit_parser_element(&pe1);
579 uninit_parser_element(&pe0);
580 return token;
581 }
582
583 /* cond := lor | lor '?' cond ':' cond */
584 static int
parse_cond(struct tokenizer_context * tcx,struct parser_element * pelem)585 parse_cond(struct tokenizer_context *tcx, struct parser_element *pelem)
586 {
587 struct parser_element pe0, pe1, pe2;
588 int token;
589 union token_data token_data;
590
591 init_parser_element(&pe0);
592 init_parser_element(&pe1);
593 init_parser_element(&pe2);
594
595 /* expect lor or empty */
596 token = parse_op2(tcx, &pe0, &exp_tr_lor);
597 T_ENSURE_OK(token, err);
598
599 /* '?' or not */
600 token = get_token(tcx, &token_data);
601 if (token != '?') {
602 /* stop: lor */
603 unget_token(tcx, token, &token_data);
604 copy_parser_element(pelem, &pe0);
605 return pe0.kind;
606 }
607
608 /* lor '?' cond ':' cond */
609 /* expect cond */
610 token = parse_cond(tcx, &pe1);
611 T_ENSURE_OK(token, err);
612
613 /* expect ':' */
614 token = get_token(tcx, &token_data);
615 T_ENSURE_OK(token, err);
616 if (token != ':') {
617 unget_token(tcx, token, &token_data);
618 token = T_ILTOKEN;
619 goto err;
620 }
621
622 /* expect cond */
623 token = parse_cond(tcx, &pe2);
624 T_ENSURE_OK(token, err);
625
626 pelem->kind = '?';
627 pelem->u.parser_op.operands[0] = dup_parser_element(&pe0);
628 pelem->u.parser_op.operands[1] = dup_parser_element(&pe1);
629 pelem->u.parser_op.operands[2] = dup_parser_element(&pe2);
630 return pelem->kind;
631 err:
632 uninit_parser_element(&pe2);
633 uninit_parser_element(&pe1);
634 uninit_parser_element(&pe0);
635 return token;
636 }
637
638 static int
parse_exp(struct tokenizer_context * tcx,struct parser_element * pelem)639 parse_exp(struct tokenizer_context *tcx, struct parser_element *pelem)
640 {
641 int token, token1;
642 union token_data token_data;
643
644 #ifdef ALLOW_EMPTY
645 /* empty check */
646 token = get_token(tcx, &token_data);
647 if (token == T_EOF)
648 return token;
649 unget_token(tcx, token, &token_data);
650 #endif
651
652 token = parse_cond(tcx, pelem);
653 if (!T_IS_ERROR(token)) {
654 /* termination check */
655 token1 = get_token(tcx, &token_data);
656 if (token1 == T_EOF)
657 return token;
658 else if (!T_IS_ERROR(token))
659 unget_token(tcx, token1, &token_data);
660 return T_ILTOKEN;
661 }
662 return token;
663 }
664
665
666 #if defined(TEST_PARSER) || defined(TEST_PARSE_PLURAL)
667 #include <stdio.h>
668
669 static void dump_elem(struct parser_element *);
670
671 static void
dump_op2(struct parser_element * pelem)672 dump_op2(struct parser_element *pelem)
673 {
674 dump_elem(pelem->u.parser_op.operands[0]);
675 printf(" ");
676 dump_elem(pelem->u.parser_op.operands[1]);
677 printf(")");
678 }
679
680 static void
dump_op3(struct parser_element * pelem)681 dump_op3(struct parser_element *pelem)
682 {
683 dump_elem(pelem->u.parser_op.operands[0]);
684 printf(" ");
685 dump_elem(pelem->u.parser_op.operands[1]);
686 printf(" ");
687 dump_elem(pelem->u.parser_op.operands[2]);
688 printf(")");
689 }
690
691 static void
dump_elem(struct parser_element * pelem)692 dump_elem(struct parser_element *pelem)
693 {
694 switch (pelem->kind) {
695 case T_LAND:
696 printf("(&& ");
697 dump_op2(pelem);
698 break;
699 case T_LOR:
700 printf("(|| ");
701 dump_op2(pelem);
702 break;
703 case T_EQUALITY:
704 switch (pelem->u.parser_op.op) {
705 case OP_EQ:
706 printf("(== ");
707 break;
708 case OP_NEQ:
709 printf("(!= ");
710 break;
711 }
712 dump_op2(pelem);
713 break;
714 case T_RELATIONAL:
715 switch (pelem->u.parser_op.op) {
716 case '<':
717 case '>':
718 printf("(%c ", pelem->u.parser_op.op);
719 break;
720 case OP_LTEQ:
721 case OP_GTEQ:
722 printf("(%c= ", pelem->u.parser_op.op-'=');
723 break;
724 }
725 dump_op2(pelem);
726 break;
727 case T_ADDITIVE:
728 case T_MULTIPLICATIVE:
729 printf("(%c ", pelem->u.parser_op.op);
730 dump_op2(pelem);
731 break;
732 case '!':
733 printf("(! ");
734 dump_elem(pelem->u.parser_op.operands[0]);
735 printf(")");
736 break;
737 case '?':
738 printf("(? ");
739 dump_op3(pelem);
740 break;
741 case T_CONSTANT:
742 printf("%d", pelem->u.token_data.constant);
743 break;
744 case T_IDENTIFIER:
745 #ifdef ALLOW_ARBITRARY_IDENTIFIER
746 printf("%s", pelem->u.token_data.identifier);
747 #else
748 printf(PLURAL_NUMBER_SYMBOL);
749 #endif
750 break;
751 }
752 }
753 #endif
754 #ifdef TEST_PARSER
755 int
main(int argc,char ** argv)756 main(int argc, char **argv)
757 {
758 struct tokenizer_context tcx;
759 struct parser_element pelem;
760 int token;
761
762 if (argc != 2) {
763 fprintf(stderr, "usage: %s <expression>\n", argv[0]);
764 return EXIT_FAILURE;
765 }
766
767 init_tokenizer_context(&tcx);
768 _memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1]));
769
770 init_parser_element(&pelem);
771 token = parse_exp(&tcx, &pelem);
772
773 if (token == T_EOF)
774 printf("none");
775 else if (T_IS_ERROR(token))
776 printf("error: 0x%X", token);
777 else
778 dump_elem(&pelem);
779 printf("\n");
780
781 uninit_parser_element(&pelem);
782
783 return EXIT_SUCCESS;
784 }
785 #endif /* TEST_PARSER */
786
787 /* ----------------------------------------------------------------------
788 * calcurate plural number
789 */
790 static unsigned long
calculate_plural(const struct parser_element * pe,unsigned long n)791 calculate_plural(const struct parser_element *pe, unsigned long n)
792 {
793 unsigned long val0, val1;
794 switch (pe->kind) {
795 case T_IDENTIFIER:
796 return n;
797 case T_CONSTANT:
798 return pe->u.token_data.constant;
799 case '?':
800 val0 = calculate_plural(pe->u.parser_op.operands[0], n);
801 if (val0)
802 val1=calculate_plural(pe->u.parser_op.operands[1], n);
803 else
804 val1=calculate_plural(pe->u.parser_op.operands[2], n);
805 return val1;
806 case '!':
807 return !calculate_plural(pe->u.parser_op.operands[0], n);
808 case T_MULTIPLICATIVE:
809 case T_ADDITIVE:
810 case T_RELATIONAL:
811 case T_EQUALITY:
812 case T_LOR:
813 case T_LAND:
814 val0 = calculate_plural(pe->u.parser_op.operands[0], n);
815 val1 = calculate_plural(pe->u.parser_op.operands[1], n);
816 switch (pe->u.parser_op.op) {
817 case '*':
818 return val0*val1;
819 case '/':
820 return val0/val1;
821 case '%':
822 return val0%val1;
823 case '+':
824 return val0+val1;
825 case '-':
826 return val0-val1;
827 case '<':
828 return val0<val1;
829 case '>':
830 return val0>val1;
831 case OP_LTEQ:
832 return val0<=val1;
833 case OP_GTEQ:
834 return val0>=val1;
835 case OP_EQ:
836 return val0==val1;
837 case OP_NEQ:
838 return val0!=val1;
839 case '|':
840 return val0||val1;
841 case '&':
842 return val0&&val1;
843 }
844 }
845 return 0;
846 }
847
848 #ifdef TEST_CALC_PLURAL
849 #include <stdio.h>
850
851 int
main(int argc,char ** argv)852 main(int argc, char **argv)
853 {
854 struct tokenizer_context tcx;
855 struct parser_element pelem;
856 int token;
857
858 if (argc != 3) {
859 fprintf(stderr, "usage: %s <expression> <n>\n", argv[0]);
860 return EXIT_FAILURE;
861 }
862
863 init_tokenizer_context(&tcx);
864 _memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1]));
865
866 init_parser_element(&pelem);
867 token = parse_exp(&tcx, &pelem);
868
869 if (token == T_EOF)
870 printf("none");
871 else if (T_IS_ERROR(token))
872 printf("error: 0x%X", token);
873 else {
874 printf("plural = %lu",
875 calculate_plural(&pelem, atoi(argv[2])));
876 }
877 printf("\n");
878
879 uninit_parser_element(&pelem);
880
881 return EXIT_SUCCESS;
882 }
883 #endif /* TEST_CALC_PLURAL */
884
885
886 /* ----------------------------------------------------------------------
887 * parse plural forms
888 */
889
890 static void
region_skip_ws(struct _region * r)891 region_skip_ws(struct _region *r)
892 {
893 const char *str = _region_head(r);
894 size_t len = _region_size(r);
895
896 str = _bcs_skip_ws_len(str, &len);
897 _region_init(r, __UNCONST(str), len);
898 }
899
900 static void
region_trunc_rws(struct _region * r)901 region_trunc_rws(struct _region *r)
902 {
903 const char *str = _region_head(r);
904 size_t len = _region_size(r);
905
906 _bcs_trunc_rws_len(str, &len);
907 _region_init(r, __UNCONST(str), len);
908 }
909
910 static int
region_check_prefix(struct _region * r,const char * pre,size_t prelen,int ignorecase)911 region_check_prefix(struct _region *r, const char *pre, size_t prelen,
912 int ignorecase)
913 {
914 if (_region_size(r) < prelen)
915 return -1;
916
917 if (ignorecase) {
918 if (_bcs_strncasecmp(_region_head(r), pre, prelen))
919 return -1;
920 } else {
921 if (memcmp(_region_head(r), pre, prelen))
922 return -1;
923 }
924 return 0;
925 }
926
927 static int
cut_trailing_semicolon(struct _region * r)928 cut_trailing_semicolon(struct _region *r)
929 {
930
931 region_trunc_rws(r);
932 if (_region_size(r) == 0 || _region_peek8(r, _region_size(r)-1) != ';')
933 return -1;
934 _region_get_subregion(r, r, 0, _region_size(r)-1);
935 return 0;
936 }
937
938 static int
find_plural_forms(struct _region * r)939 find_plural_forms(struct _region *r)
940 {
941 struct _memstream ms;
942 struct _region rr;
943
944 _memstream_bind(&ms, r);
945
946 while (!_memstream_getln_region(&ms, &rr)) {
947 if (!region_check_prefix(&rr,
948 PLURAL_FORMS, LEN_PLURAL_FORMS, 1)) {
949 _region_get_subregion(
950 r, &rr, LEN_PLURAL_FORMS,
951 _region_size(&rr)-LEN_PLURAL_FORMS);
952 region_skip_ws(r);
953 region_trunc_rws(r);
954 return 0;
955 }
956 }
957 return -1;
958 }
959
960 static int
skip_assignment(struct _region * r,const char * sym,size_t symlen)961 skip_assignment(struct _region *r, const char *sym, size_t symlen)
962 {
963 region_skip_ws(r);
964 if (region_check_prefix(r, sym, symlen, 0))
965 return -1;
966 _region_get_subregion(r, r, symlen, _region_size(r)-symlen);
967 region_skip_ws(r);
968 if (_region_size(r) == 0 || _region_peek8(r, 0) != '=')
969 return -1;
970 _region_get_subregion(r, r, 1, _region_size(r)-1);
971 region_skip_ws(r);
972 return 0;
973 }
974
975 static int
skip_nplurals(struct _region * r,unsigned long * rnp)976 skip_nplurals(struct _region *r, unsigned long *rnp)
977 {
978 unsigned long np;
979 char buf[MAX_LEN_ATOM+2], *endptr;
980 const char *endptrconst;
981 size_t ofs;
982
983 if (skip_assignment(r, NPLURALS_SYMBOL, LEN_NPLURAL_SYMBOL))
984 return -1;
985 if (_region_size(r) == 0 || !_bcs_isdigit(_region_peek8(r, 0)))
986 return -1;
987 strlcpy(buf, _region_head(r), sizeof (buf));
988 np = strtoul(buf, &endptr, 0);
989 endptrconst = _bcs_skip_ws(endptr);
990 if (*endptrconst != ';')
991 return -1;
992 ofs = endptrconst+1-buf;
993 if (_region_get_subregion(r, r, ofs, _region_size(r)-ofs))
994 return -1;
995 if (rnp)
996 *rnp = np;
997 return 0;
998 }
999
1000 static int
parse_plural_body(struct _region * r,struct parser_element ** rpe)1001 parse_plural_body(struct _region *r, struct parser_element **rpe)
1002 {
1003 int token;
1004 struct tokenizer_context tcx;
1005 struct parser_element pelem, *ppe;
1006
1007 init_tokenizer_context(&tcx);
1008 _memstream_bind(&tcx.memstream, r);
1009
1010 init_parser_element(&pelem);
1011 token = parse_exp(&tcx, &pelem);
1012 if (T_IS_ERROR(token))
1013 return token;
1014
1015 ppe = dup_parser_element(&pelem);
1016 if (ppe == NULL) {
1017 uninit_parser_element(&pelem);
1018 return T_NOMEM;
1019 }
1020
1021 *rpe = ppe;
1022
1023 return 0;
1024 }
1025
1026 static int
parse_plural(struct parser_element ** rpe,unsigned long * rnp,const char * str,size_t len)1027 parse_plural(struct parser_element **rpe, unsigned long *rnp,
1028 const char *str, size_t len)
1029 {
1030 struct _region r;
1031
1032 _region_init(&r, __UNCONST(str), len);
1033
1034 if (find_plural_forms(&r))
1035 return T_NOTFOUND;
1036 if (skip_nplurals(&r, rnp))
1037 return T_ILPLURAL;
1038 if (skip_assignment(&r, PLURAL_SYMBOL, LEN_PLURAL_SYMBOL))
1039 return T_ILPLURAL;
1040 if (cut_trailing_semicolon(&r))
1041 return T_ILPLURAL;
1042 return parse_plural_body(&r, rpe);
1043 }
1044
1045 #ifdef TEST_PARSE_PLURAL
1046 int
main(int argc,char ** argv)1047 main(int argc, char **argv)
1048 {
1049 int ret;
1050 struct parser_element *pelem;
1051 unsigned long np;
1052
1053 if (argc != 2 && argc != 3) {
1054 fprintf(stderr, "usage: %s <mime-header> [n]\n", argv[0]);
1055 return EXIT_FAILURE;
1056 }
1057
1058 ret = parse_plural(&pelem, &np, argv[1], strlen(argv[1]));
1059
1060 if (ret == T_EOF)
1061 printf("none");
1062 else if (T_IS_ERROR(ret))
1063 printf("error: 0x%X", ret);
1064 else {
1065 printf("syntax tree: ");
1066 dump_elem(pelem);
1067 printf("\nnplurals = %lu", np);
1068 if (argv[2])
1069 printf(", plural = %lu",
1070 calculate_plural(pelem, atoi(argv[2])));
1071 free_parser_element(pelem);
1072 }
1073 printf("\n");
1074
1075
1076 return EXIT_SUCCESS;
1077 }
1078 #endif /* TEST_PARSE_PLURAL */
1079
1080 /*
1081 * external interface
1082 */
1083
1084 int
_gettext_parse_plural(struct gettext_plural ** rpe,unsigned long * rnp,const char * str,size_t len)1085 _gettext_parse_plural(struct gettext_plural **rpe, unsigned long *rnp,
1086 const char *str, size_t len)
1087 {
1088 return parse_plural((struct parser_element **)rpe, rnp, str, len);
1089 }
1090
1091 unsigned long
_gettext_calculate_plural(const struct gettext_plural * pe,unsigned long n)1092 _gettext_calculate_plural(const struct gettext_plural *pe, unsigned long n)
1093 {
1094 return calculate_plural((void *)__UNCONST(pe), n);
1095 }
1096
1097 void
_gettext_free_plural(struct gettext_plural * pe)1098 _gettext_free_plural(struct gettext_plural *pe)
1099 {
1100 free_parser_element((void *)pe);
1101 }
1102
1103 #ifdef TEST_PLURAL
1104 #include <libintl.h>
1105 #include <locale.h>
1106
1107 #define PR(n) printf("n=%d: \"%s\"\n", n, dngettext("test", "1", "2", n))
1108
1109 int
main(void)1110 main(void)
1111 {
1112 bindtextdomain("test", "."); /* ./LANG/LC_MESSAGES/test.mo */
1113 PR(1);
1114 PR(2);
1115 PR(3);
1116 PR(4);
1117
1118 return 0;
1119 }
1120 #endif
1121