xref: /netbsd-src/lib/libintl/plural_parser.c (revision 388550b026d49b7f7b7480b1113bf82bb8d6a480)
1 /*	$NetBSD: plural_parser.c,v 1.4 2022/04/19 20:32:16 rillig Exp $	*/
2 
3 /*-
4  * Copyright (c) 2005 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  */
29 
30 #include <sys/cdefs.h>
31 __RCSID("$NetBSD: plural_parser.c,v 1.4 2022/04/19 20:32:16 rillig Exp $");
32 
33 #include <assert.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <citrus/citrus_namespace.h>
38 #include <citrus/citrus_region.h>
39 #include <citrus/citrus_memstream.h>
40 #include <citrus/citrus_bcs.h>
41 #include "plural_parser.h"
42 
43 #if defined(TEST_TOKENIZER) || defined(TEST_PARSER)
44 #define ALLOW_EMPTY
45 #define ALLOW_ARBITRARY_IDENTIFIER
46 #endif
47 
48 #define MAX_LEN_ATOM		10
49 #define MAX_NUM_OPERANDS	3
50 
51 #define T_EOF			EOF
52 #define T_NONE			0x100
53 #define T_LAND			0x101	/* && */
54 #define T_LOR			0x102	/* || */
55 #define T_EQUALITY		0x103	/* == or != */
56 #define T_RELATIONAL		0x104	/* <, >, <= or >= */
57 #define T_ADDITIVE		0x105	/* + or - */
58 #define T_MULTIPLICATIVE	0x106	/* *, / or % */
59 #define T_IDENTIFIER		0x200
60 #define T_CONSTANT		0x201
61 #define T_ILCHAR		0x300
62 #define T_TOOLONG		0x301
63 #define T_ILTOKEN		0x302
64 #define T_ILEND			0x303
65 #define T_NOMEM			0x304
66 #define T_NOTFOUND		0x305
67 #define T_ILPLURAL		0x306
68 #define T_IS_OPERATOR(t)	((t) < 0x200)
69 #define T_IS_ERROR(t)		((t) >= 0x300)
70 
71 #define OP_EQ			('='+'=')
72 #define OP_NEQ			('!'+'=')
73 #define OP_LTEQ			('<'+'=')
74 #define OP_GTEQ			('>'+'=')
75 
76 #define PLURAL_NUMBER_SYMBOL	"n"
77 #define NPLURALS_SYMBOL		"nplurals"
78 #define LEN_NPLURAL_SYMBOL	(sizeof (NPLURALS_SYMBOL) -1)
79 #define PLURAL_SYMBOL		"plural"
80 #define LEN_PLURAL_SYMBOL	(sizeof (PLURAL_SYMBOL) -1)
81 #define PLURAL_FORMS		"Plural-Forms:"
82 #define LEN_PLURAL_FORMS	(sizeof (PLURAL_FORMS) -1)
83 
84 /* ----------------------------------------------------------------------
85  * tokenizer part
86  */
87 
88 union token_data
89 {
90 	unsigned long constant;
91 #ifdef ALLOW_ARBITRARY_IDENTIFIER
92 	char identifier[MAX_LEN_ATOM+1];
93 #endif
94 	char op;
95 };
96 
97 struct tokenizer_context
98 {
99 	struct _memstream memstream;
100 	struct {
101 		int token;
102 		union token_data token_data;
103 	} token0;
104 };
105 
106 /* initialize a tokenizer context */
107 static void
init_tokenizer_context(struct tokenizer_context * tcx)108 init_tokenizer_context(struct tokenizer_context *tcx)
109 {
110 	tcx->token0.token = T_NONE;
111 }
112 
113 /* get an atom (identifier or constant) */
114 static int
tokenize_atom(struct tokenizer_context * tcx,union token_data * token_data)115 tokenize_atom(struct tokenizer_context *tcx, union token_data *token_data)
116 {
117 	int ch, len;
118 	char buf[MAX_LEN_ATOM+1];
119 
120 	len = 0;
121 	while (/*CONSTCOND*/1) {
122 		ch = _memstream_getc(&tcx->memstream);
123 		if (!(_bcs_isalnum(ch) || ch == '_')) {
124 			_memstream_ungetc(&tcx->memstream, ch);
125 			break;
126 		}
127 		if (len == MAX_LEN_ATOM)
128 			return T_TOOLONG;
129 		buf[len++] = ch;
130 	}
131 	buf[len] = '\0';
132 	if (len == 0)
133 		return T_ILCHAR;
134 
135 	if (_bcs_isdigit((int)(unsigned char)buf[0])) {
136 		unsigned long ul;
137 		char *post;
138 		ul = strtoul(buf, &post, 0);
139 		if (buf+len != post)
140 			return T_ILCHAR;
141 		token_data->constant = ul;
142 		return T_CONSTANT;
143 	}
144 
145 #ifdef ALLOW_ARBITRARY_IDENTIFIER
146 	strcpy(token_data->identifier, buf);
147 	return T_IDENTIFIER;
148 #else
149 	if (!strcmp(buf, PLURAL_NUMBER_SYMBOL))
150 		return T_IDENTIFIER;
151 	return T_ILCHAR;
152 #endif
153 }
154 
155 /* tokenizer main routine */
156 static int
tokenize(struct tokenizer_context * tcx,union token_data * token_data)157 tokenize(struct tokenizer_context *tcx, union token_data *token_data)
158 {
159 	int ch, prevch;
160 
161 retry:
162 	ch = _memstream_getc(&tcx->memstream);
163 	if (_bcs_isspace(ch))
164 		goto retry;
165 
166 	switch (ch) {
167 	case T_EOF:
168 		return ch;
169 	case '+': case '-':
170 		token_data->op = ch;
171 		return T_ADDITIVE;
172 	case '*': case '/': case '%':
173 		token_data->op = ch;
174 		return T_MULTIPLICATIVE;
175 	case '?': case ':': case '(': case ')':
176 		token_data->op = ch;
177 		return ch;
178 	case '&': case '|':
179 		prevch = ch;
180 		ch = _memstream_getc(&tcx->memstream);
181 		if (ch != prevch) {
182 			_memstream_ungetc(&tcx->memstream, ch);
183 			return T_ILCHAR;
184 		}
185 		token_data->op = ch;
186 		switch (ch) {
187 		case '&':
188 			return T_LAND;
189 		case '|':
190 			return T_LOR;
191 		default:
192 			return T_ILTOKEN;
193 		}
194 	case '=': case '!': case '<': case '>':
195 		prevch = ch;
196 		ch = _memstream_getc(&tcx->memstream);
197 		if (ch != '=') {
198 			_memstream_ungetc(&tcx->memstream, ch);
199 			switch (prevch) {
200 			case '=':
201 				return T_ILCHAR;
202 			case '!':
203 				return '!';
204 			case '<':
205 			case '>':
206 				token_data->op = prevch; /* OP_LT or OP_GT */
207 				return T_RELATIONAL;
208 			}
209 		}
210 		/* '==', '!=', '<=' or '>=' */
211 		token_data->op = ch+prevch;
212 		switch (prevch) {
213 		case '=':
214 		case '!':
215 			return T_EQUALITY;
216 		case '<':
217 		case '>':
218 			return T_RELATIONAL;
219 		}
220 		/*NOTREACHED*/
221 	}
222 
223 	_memstream_ungetc(&tcx->memstream, ch);
224 	return tokenize_atom(tcx, token_data);
225 }
226 
227 /* get the next token */
228 static int
get_token(struct tokenizer_context * tcx,union token_data * token_data)229 get_token(struct tokenizer_context *tcx, union token_data *token_data)
230 {
231 	if (tcx->token0.token != T_NONE) {
232 		int token = tcx->token0.token;
233 		tcx->token0.token = T_NONE;
234 		*token_data = tcx->token0.token_data;
235 		return token;
236 	}
237 	return tokenize(tcx, token_data);
238 }
239 
240 /* push back the last token */
241 static void
unget_token(struct tokenizer_context * tcx,int token,union token_data * token_data)242 unget_token(struct tokenizer_context *tcx,
243 	    int token, union token_data *token_data)
244 {
245 	tcx->token0.token = token;
246 	tcx->token0.token_data = *token_data;
247 }
248 
249 #ifdef TEST_TOKENIZER
250 
251 int
main(int argc,char ** argv)252 main(int argc, char **argv)
253 {
254 	struct tokenizer_context tcx;
255 	union token_data token_data;
256 	int token;
257 
258 	if (argc != 2) {
259 		fprintf(stderr, "usage: %s <expression>\n", argv[0]);
260 		return EXIT_FAILURE;
261 	}
262 
263 	init_tokenizer_context(&tcx);
264 	_memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1]));
265 
266 	while (1) {
267 		token = get_token(&tcx, &token_data);
268 		switch (token) {
269 		case T_EOF:
270 			goto quit;
271 		case T_ILCHAR:
272 			printf("illegal character.\n");
273 			goto quit;
274 		case T_TOOLONG:
275 			printf("too long atom.\n");
276 			goto quit;
277 		case T_CONSTANT:
278 			printf("constant: %lu\n", token_data.constant);
279 			break;
280 		case T_IDENTIFIER:
281 			printf("symbol: %s\n", token_data.identifier);
282 			break;
283 		default:
284 			printf("operator: ");
285 			switch (token) {
286 			case T_LAND:
287 				printf("&&\n");
288 				break;
289 			case T_LOR:
290 				printf("||\n");
291 				break;
292 			case T_EQUALITY:
293 				printf("%c=\n", token_data.op-'=');
294 				break;
295 			case T_RELATIONAL:
296 				switch(token_data.op) {
297 				case OP_LTEQ:
298 				case OP_GTEQ:
299 					printf("%c=\n", token_data.op-'=');
300 					break;
301 				default:
302 					printf("%c\n", token_data.op);
303 					break;
304 				}
305 				break;
306 			case T_ADDITIVE:
307 			case T_MULTIPLICATIVE:
308 				printf("%c\n", token_data.op);
309 				break;
310 			default:
311 				printf("operator: %c\n", token);
312 			}
313 		}
314 	}
315 quit:
316 	return 0;
317 }
318 #endif /* TEST_TOKENIZER */
319 
320 
321 /* ----------------------------------------------------------------------
322  * parser part
323  *
324  * exp := cond
325  *
326  * cond := lor | lor '?' cond ':' cond
327  *
328  * lor := land ( '||' land )*
329  *
330  * land := equality ( '&&' equality )*
331  *
332  * equality := relational ( equalityops relational )*
333  * equalityops := '==' | '!='
334  *
335  * relational := additive ( relationalops additive )*
336  * relationalops := '<' | '>' | '<=' | '>='
337  *
338  * additive := multiplicative ( additiveops multiplicative )*
339  * additiveops := '+' | '-'
340  *
341  * multiplicative := lnot ( multiplicativeops lnot )*
342  * multiplicativeops := '*' | '/' | '%'
343  *
344  * lnot := '!' lnot | term
345  *
346  * term := literal | identifier | '(' exp ')'
347  *
348  */
349 
350 #define T_ENSURE_OK(token, label)					      \
351 do {									      \
352 	if (T_IS_ERROR(token))						      \
353 		goto label;						      \
354 } while (0)
355 #define T_ENSURE_SOMETHING(token, label)				      \
356 do {									      \
357 	if ((token) == T_EOF) {						      \
358 		token = T_ILEND;					      \
359 		goto label;						      \
360 	} else if (T_IS_ERROR(token))					      \
361 		goto label;						      \
362 } while (0)
363 
364 #define parser_element	plural_element
365 
366 struct parser_element;
367 struct parser_op
368 {
369 	char op;
370 	struct parser_element *operands[MAX_NUM_OPERANDS];
371 };
372 struct parser_element
373 {
374 	int kind;
375 	union
376 	{
377 		struct parser_op parser_op;
378 		union token_data token_data;
379 	} u;
380 };
381 
382 struct parser_op2_transition
383 {
384 	int					kind;
385 	const struct parser_op2_transition	*next;
386 };
387 
388 /* prototypes */
389 static int parse_cond(struct tokenizer_context *, struct parser_element *);
390 
391 
392 /* transition table for the 2-operand operators */
393 #define DEF_TR(t, k, n)							      \
394 static struct parser_op2_transition exp_tr_##t = {			      \
395 	k, &exp_tr_##n							      \
396 }
397 #define DEF_TR0(t, k)							      \
398 static struct parser_op2_transition exp_tr_##t = {			      \
399 	k, NULL /* expect lnot */					      \
400 }
401 
402 DEF_TR0(multiplicative, T_MULTIPLICATIVE);
403 DEF_TR(additive, T_ADDITIVE, multiplicative);
404 DEF_TR(relational, T_RELATIONAL, additive);
405 DEF_TR(equality, T_EQUALITY, relational);
406 DEF_TR(land, T_LAND, equality);
407 DEF_TR(lor, T_LOR, land);
408 
409 /* init a parser element structure */
410 static void
init_parser_element(struct parser_element * pe)411 init_parser_element(struct parser_element *pe)
412 {
413 	int i;
414 
415 	pe->kind = T_NONE;
416 	for (i=0; i<MAX_NUM_OPERANDS; i++)
417 		pe->u.parser_op.operands[i] = NULL;
418 }
419 
420 /* uninitialize a parser element structure with freeing children */
421 static void free_parser_element(struct parser_element *);
422 static void
uninit_parser_element(struct parser_element * pe)423 uninit_parser_element(struct parser_element *pe)
424 {
425 	int i;
426 
427 	if (T_IS_OPERATOR(pe->kind))
428 		for (i=0; i<MAX_NUM_OPERANDS; i++)
429 			if (pe->u.parser_op.operands[i])
430 				free_parser_element(
431 					pe->u.parser_op.operands[i]);
432 }
433 
434 /* free a parser element structure with freeing children */
435 static void
free_parser_element(struct parser_element * pe)436 free_parser_element(struct parser_element *pe)
437 {
438 	if (pe) {
439 		uninit_parser_element(pe);
440 		free(pe);
441 	}
442 }
443 
444 
445 /* copy a parser element structure shallowly */
446 static void
copy_parser_element(struct parser_element * dpe,const struct parser_element * spe)447 copy_parser_element(struct parser_element *dpe,
448 		    const struct parser_element *spe)
449 {
450 	memcpy(dpe, spe, sizeof *dpe);
451 }
452 
453 /* duplicate a parser element structure shallowly */
454 static struct parser_element *
dup_parser_element(const struct parser_element * pe)455 dup_parser_element(const struct parser_element *pe)
456 {
457 	struct parser_element *dpe = malloc(sizeof *dpe);
458 	if (dpe)
459 		copy_parser_element(dpe, pe);
460 	return dpe;
461 }
462 
463 /* term := identifier | constant | '(' exp ')' */
464 static int
parse_term(struct tokenizer_context * tcx,struct parser_element * pelem)465 parse_term(struct tokenizer_context *tcx, struct parser_element *pelem)
466 {
467 	struct parser_element pe0;
468 	int token;
469 	union token_data token_data;
470 
471 	token = get_token(tcx, &token_data);
472 	switch (token) {
473 	case '(':
474 		/* '(' exp ')' */
475 		init_parser_element(&pe0);
476 		/* expect exp */
477 		token = parse_cond(tcx, &pe0);
478 		T_ENSURE_OK(token, err);
479 		/* expect ')' */
480 		token = get_token(tcx, &token_data);
481 		T_ENSURE_SOMETHING(token, err);
482 		if (token != ')') {
483 			unget_token(tcx, token, &token_data);
484 			token = T_ILTOKEN;
485 			goto err;
486 		}
487 		copy_parser_element(pelem, &pe0);
488 		return token;
489 err:
490 		uninit_parser_element(&pe0);
491 		return token;
492 	case T_IDENTIFIER:
493 	case T_CONSTANT:
494 		pelem->kind = token;
495 		pelem->u.token_data = token_data;
496 		return token;
497 	case T_EOF:
498 		return T_ILEND;
499 	default:
500 		return T_ILTOKEN;
501 	}
502 }
503 
504 /* lnot := '!' lnot | term */
505 static int
parse_lnot(struct tokenizer_context * tcx,struct parser_element * pelem)506 parse_lnot(struct tokenizer_context *tcx, struct parser_element *pelem)
507 {
508 	struct parser_element pe0;
509 	int token;
510 	union token_data token_data;
511 
512 	init_parser_element(&pe0);
513 
514 	/* '!' or not */
515 	token = get_token(tcx, &token_data);
516 	if (token != '!') {
517 		/* stop: term */
518 		unget_token(tcx, token, &token_data);
519 		return parse_term(tcx, pelem);
520 	}
521 
522 	/* '!' term */
523 	token = parse_lnot(tcx, &pe0);
524 	T_ENSURE_OK(token, err);
525 
526 	pelem->kind = '!';
527 	pelem->u.parser_op.operands[0] = dup_parser_element(&pe0);
528 	return pelem->kind;
529 err:
530 	uninit_parser_element(&pe0);
531 	return token;
532 }
533 
534 /* ext_op := ext_next ( op ext_next )* */
535 static int
parse_op2(struct tokenizer_context * tcx,struct parser_element * pelem,const struct parser_op2_transition * tr)536 parse_op2(struct tokenizer_context *tcx, struct parser_element *pelem,
537 	  const struct parser_op2_transition *tr)
538 {
539 	struct parser_element pe0, pe1, peop;
540 	int token;
541 	union token_data token_data;
542 	char op;
543 
544 	/* special case: expect lnot */
545 	if (tr == NULL)
546 		return parse_lnot(tcx, pelem);
547 
548 	init_parser_element(&pe0);
549 	init_parser_element(&pe1);
550 	token = parse_op2(tcx, &pe0, tr->next);
551 	T_ENSURE_OK(token, err);
552 
553 	while (/*CONSTCOND*/1) {
554 		/* expect op or empty */
555 		token = get_token(tcx, &token_data);
556 		if (token != tr->kind) {
557 			/* stop */
558 			unget_token(tcx, token, &token_data);
559 			copy_parser_element(pelem, &pe0);
560 			break;
561 		}
562 		op = token_data.op;
563 		/* right hand */
564 		token = parse_op2(tcx, &pe1, tr->next);
565 		T_ENSURE_OK(token, err);
566 
567 		init_parser_element(&peop);
568 		peop.kind = tr->kind;
569 		peop.u.parser_op.op = op;
570 		peop.u.parser_op.operands[0] = dup_parser_element(&pe0);
571 		init_parser_element(&pe0);
572 		peop.u.parser_op.operands[1] = dup_parser_element(&pe1);
573 		init_parser_element(&pe1);
574 		copy_parser_element(&pe0, &peop);
575 	}
576 	return pelem->kind;
577 err:
578 	uninit_parser_element(&pe1);
579 	uninit_parser_element(&pe0);
580 	return token;
581 }
582 
583 /* cond := lor | lor '?' cond ':' cond */
584 static int
parse_cond(struct tokenizer_context * tcx,struct parser_element * pelem)585 parse_cond(struct tokenizer_context *tcx, struct parser_element *pelem)
586 {
587 	struct parser_element pe0, pe1, pe2;
588 	int token;
589 	union token_data token_data;
590 
591 	init_parser_element(&pe0);
592 	init_parser_element(&pe1);
593 	init_parser_element(&pe2);
594 
595 	/* expect lor or empty */
596 	token = parse_op2(tcx, &pe0, &exp_tr_lor);
597 	T_ENSURE_OK(token, err);
598 
599 	/* '?' or not */
600 	token = get_token(tcx, &token_data);
601 	if (token != '?') {
602 		/* stop: lor */
603 		unget_token(tcx, token, &token_data);
604 		copy_parser_element(pelem, &pe0);
605 		return pe0.kind;
606 	}
607 
608 	/* lor '?' cond ':' cond */
609 	/* expect cond */
610 	token = parse_cond(tcx, &pe1);
611 	T_ENSURE_OK(token, err);
612 
613 	/* expect ':' */
614 	token = get_token(tcx, &token_data);
615 	T_ENSURE_OK(token, err);
616 	if (token != ':') {
617 		unget_token(tcx, token, &token_data);
618 		token = T_ILTOKEN;
619 		goto err;
620 	}
621 
622 	/* expect cond */
623 	token = parse_cond(tcx, &pe2);
624 	T_ENSURE_OK(token, err);
625 
626 	pelem->kind = '?';
627 	pelem->u.parser_op.operands[0] = dup_parser_element(&pe0);
628 	pelem->u.parser_op.operands[1] = dup_parser_element(&pe1);
629 	pelem->u.parser_op.operands[2] = dup_parser_element(&pe2);
630 	return pelem->kind;
631 err:
632 	uninit_parser_element(&pe2);
633 	uninit_parser_element(&pe1);
634 	uninit_parser_element(&pe0);
635 	return token;
636 }
637 
638 static int
parse_exp(struct tokenizer_context * tcx,struct parser_element * pelem)639 parse_exp(struct tokenizer_context *tcx, struct parser_element *pelem)
640 {
641 	int token, token1;
642 	union token_data token_data;
643 
644 #ifdef ALLOW_EMPTY
645 	/* empty check */
646 	token = get_token(tcx, &token_data);
647 	if (token == T_EOF)
648 		return token;
649 	unget_token(tcx, token, &token_data);
650 #endif
651 
652 	token = parse_cond(tcx, pelem);
653 	if (!T_IS_ERROR(token)) {
654 		/* termination check */
655 		token1 = get_token(tcx, &token_data);
656 		if (token1 == T_EOF)
657 			return token;
658 		else if (!T_IS_ERROR(token))
659 			 unget_token(tcx, token1, &token_data);
660 		return T_ILTOKEN;
661 	}
662 	return token;
663 }
664 
665 
666 #if defined(TEST_PARSER) || defined(TEST_PARSE_PLURAL)
667 #include <stdio.h>
668 
669 static void dump_elem(struct parser_element *);
670 
671 static void
dump_op2(struct parser_element * pelem)672 dump_op2(struct parser_element *pelem)
673 {
674 	dump_elem(pelem->u.parser_op.operands[0]);
675 	printf(" ");
676 	dump_elem(pelem->u.parser_op.operands[1]);
677 	printf(")");
678 }
679 
680 static void
dump_op3(struct parser_element * pelem)681 dump_op3(struct parser_element *pelem)
682 {
683 	dump_elem(pelem->u.parser_op.operands[0]);
684 	printf(" ");
685 	dump_elem(pelem->u.parser_op.operands[1]);
686 	printf(" ");
687 	dump_elem(pelem->u.parser_op.operands[2]);
688 	printf(")");
689 }
690 
691 static void
dump_elem(struct parser_element * pelem)692 dump_elem(struct parser_element *pelem)
693 {
694 	switch (pelem->kind) {
695 	case T_LAND:
696 		printf("(&& ");
697 		dump_op2(pelem);
698 		break;
699 	case T_LOR:
700 		printf("(|| ");
701 		dump_op2(pelem);
702 		break;
703 	case T_EQUALITY:
704 		switch (pelem->u.parser_op.op) {
705 		case OP_EQ:
706 			printf("(== ");
707 			break;
708 		case OP_NEQ:
709 			printf("(!= ");
710 			break;
711 		}
712 		dump_op2(pelem);
713 		break;
714 	case T_RELATIONAL:
715 		switch (pelem->u.parser_op.op) {
716 		case '<':
717 		case '>':
718 			printf("(%c ", pelem->u.parser_op.op);
719 			break;
720 		case OP_LTEQ:
721 		case OP_GTEQ:
722 			printf("(%c= ", pelem->u.parser_op.op-'=');
723 			break;
724 		}
725 		dump_op2(pelem);
726 		break;
727 	case T_ADDITIVE:
728 	case T_MULTIPLICATIVE:
729 		printf("(%c ", pelem->u.parser_op.op);
730 		dump_op2(pelem);
731 		break;
732 	case '!':
733 		printf("(! ");
734 		dump_elem(pelem->u.parser_op.operands[0]);
735 		printf(")");
736 		break;
737 	case '?':
738 		printf("(? ");
739 		dump_op3(pelem);
740 		break;
741 	case T_CONSTANT:
742 		printf("%d", pelem->u.token_data.constant);
743 		break;
744 	case T_IDENTIFIER:
745 #ifdef ALLOW_ARBITRARY_IDENTIFIER
746 		printf("%s", pelem->u.token_data.identifier);
747 #else
748 		printf(PLURAL_NUMBER_SYMBOL);
749 #endif
750 		break;
751 	}
752 }
753 #endif
754 #ifdef TEST_PARSER
755 int
main(int argc,char ** argv)756 main(int argc, char **argv)
757 {
758 	struct tokenizer_context tcx;
759 	struct parser_element pelem;
760 	int token;
761 
762 	if (argc != 2) {
763 		fprintf(stderr, "usage: %s <expression>\n", argv[0]);
764 		return EXIT_FAILURE;
765 	}
766 
767 	init_tokenizer_context(&tcx);
768 	_memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1]));
769 
770 	init_parser_element(&pelem);
771 	token = parse_exp(&tcx, &pelem);
772 
773 	if (token == T_EOF)
774 		printf("none");
775 	else if (T_IS_ERROR(token))
776 		printf("error: 0x%X", token);
777 	else
778 		dump_elem(&pelem);
779 	printf("\n");
780 
781 	uninit_parser_element(&pelem);
782 
783 	return EXIT_SUCCESS;
784 }
785 #endif /* TEST_PARSER */
786 
787 /* ----------------------------------------------------------------------
788  * calcurate plural number
789  */
790 static unsigned long
calculate_plural(const struct parser_element * pe,unsigned long n)791 calculate_plural(const struct parser_element *pe, unsigned long n)
792 {
793 	unsigned long val0, val1;
794 	switch (pe->kind) {
795 	case T_IDENTIFIER:
796 		return n;
797 	case T_CONSTANT:
798 		return pe->u.token_data.constant;
799 	case '?':
800 		val0 = calculate_plural(pe->u.parser_op.operands[0], n);
801 		if (val0)
802 			val1=calculate_plural(pe->u.parser_op.operands[1], n);
803 		else
804 			val1=calculate_plural(pe->u.parser_op.operands[2], n);
805 		return val1;
806 	case '!':
807 		return !calculate_plural(pe->u.parser_op.operands[0], n);
808 	case T_MULTIPLICATIVE:
809 	case T_ADDITIVE:
810 	case T_RELATIONAL:
811 	case T_EQUALITY:
812 	case T_LOR:
813 	case T_LAND:
814 		val0 = calculate_plural(pe->u.parser_op.operands[0], n);
815 		val1 = calculate_plural(pe->u.parser_op.operands[1], n);
816 		switch (pe->u.parser_op.op) {
817 		case '*':
818 			return val0*val1;
819 		case '/':
820 			return val0/val1;
821 		case '%':
822 			return val0%val1;
823 		case '+':
824 			return val0+val1;
825 		case '-':
826 			return val0-val1;
827 		case '<':
828 			return val0<val1;
829 		case '>':
830 			return val0>val1;
831 		case OP_LTEQ:
832 			return val0<=val1;
833 		case OP_GTEQ:
834 			return val0>=val1;
835 		case OP_EQ:
836 			return val0==val1;
837 		case OP_NEQ:
838 			return val0!=val1;
839 		case '|':
840 			return val0||val1;
841 		case '&':
842 			return val0&&val1;
843 		}
844 	}
845 	return 0;
846 }
847 
848 #ifdef TEST_CALC_PLURAL
849 #include <stdio.h>
850 
851 int
main(int argc,char ** argv)852 main(int argc, char **argv)
853 {
854 	struct tokenizer_context tcx;
855 	struct parser_element pelem;
856 	int token;
857 
858 	if (argc != 3) {
859 		fprintf(stderr, "usage: %s <expression> <n>\n", argv[0]);
860 		return EXIT_FAILURE;
861 	}
862 
863 	init_tokenizer_context(&tcx);
864 	_memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1]));
865 
866 	init_parser_element(&pelem);
867 	token = parse_exp(&tcx, &pelem);
868 
869 	if (token == T_EOF)
870 		printf("none");
871 	else if (T_IS_ERROR(token))
872 		printf("error: 0x%X", token);
873 	else {
874 		printf("plural = %lu",
875 		       calculate_plural(&pelem, atoi(argv[2])));
876 	}
877 	printf("\n");
878 
879 	uninit_parser_element(&pelem);
880 
881 	return EXIT_SUCCESS;
882 }
883 #endif /* TEST_CALC_PLURAL */
884 
885 
886 /* ----------------------------------------------------------------------
887  * parse plural forms
888  */
889 
890 static void
region_skip_ws(struct _region * r)891 region_skip_ws(struct _region *r)
892 {
893 	const char *str = _region_head(r);
894 	size_t len = _region_size(r);
895 
896 	str = _bcs_skip_ws_len(str, &len);
897 	_region_init(r, __UNCONST(str), len);
898 }
899 
900 static void
region_trunc_rws(struct _region * r)901 region_trunc_rws(struct _region *r)
902 {
903 	const char *str = _region_head(r);
904 	size_t len = _region_size(r);
905 
906 	_bcs_trunc_rws_len(str, &len);
907 	_region_init(r, __UNCONST(str), len);
908 }
909 
910 static int
region_check_prefix(struct _region * r,const char * pre,size_t prelen,int ignorecase)911 region_check_prefix(struct _region *r, const char *pre, size_t prelen,
912 		    int ignorecase)
913 {
914 	if (_region_size(r) < prelen)
915 		return -1;
916 
917 	if (ignorecase) {
918 		if (_bcs_strncasecmp(_region_head(r), pre, prelen))
919 			return -1;
920 	} else {
921 		if (memcmp(_region_head(r), pre, prelen))
922 			return -1;
923 	}
924 	return 0;
925 }
926 
927 static int
cut_trailing_semicolon(struct _region * r)928 cut_trailing_semicolon(struct _region *r)
929 {
930 
931 	region_trunc_rws(r);
932 	if (_region_size(r) == 0 || _region_peek8(r, _region_size(r)-1) != ';')
933 		return -1;
934 	_region_get_subregion(r, r, 0, _region_size(r)-1);
935 	return 0;
936 }
937 
938 static int
find_plural_forms(struct _region * r)939 find_plural_forms(struct _region *r)
940 {
941 	struct _memstream ms;
942 	struct _region rr;
943 
944 	_memstream_bind(&ms, r);
945 
946 	while (!_memstream_getln_region(&ms, &rr)) {
947 		if (!region_check_prefix(&rr,
948 					 PLURAL_FORMS, LEN_PLURAL_FORMS, 1)) {
949 			_region_get_subregion(
950 				r, &rr, LEN_PLURAL_FORMS,
951 				_region_size(&rr)-LEN_PLURAL_FORMS);
952 			region_skip_ws(r);
953 			region_trunc_rws(r);
954 			return 0;
955 		}
956 	}
957 	return -1;
958 }
959 
960 static int
skip_assignment(struct _region * r,const char * sym,size_t symlen)961 skip_assignment(struct _region *r, const char *sym, size_t symlen)
962 {
963 	region_skip_ws(r);
964 	if (region_check_prefix(r, sym, symlen, 0))
965 		return -1;
966 	_region_get_subregion(r, r, symlen, _region_size(r)-symlen);
967 	region_skip_ws(r);
968 	if (_region_size(r) == 0 || _region_peek8(r, 0) != '=')
969 		return -1;
970 	_region_get_subregion(r, r, 1, _region_size(r)-1);
971 	region_skip_ws(r);
972 	return 0;
973 }
974 
975 static int
skip_nplurals(struct _region * r,unsigned long * rnp)976 skip_nplurals(struct _region *r, unsigned long *rnp)
977 {
978 	unsigned long np;
979 	char buf[MAX_LEN_ATOM+2], *endptr;
980 	const char *endptrconst;
981 	size_t ofs;
982 
983 	if (skip_assignment(r, NPLURALS_SYMBOL, LEN_NPLURAL_SYMBOL))
984 		return -1;
985 	if (_region_size(r) == 0 || !_bcs_isdigit(_region_peek8(r, 0)))
986 		return -1;
987 	strlcpy(buf, _region_head(r), sizeof (buf));
988 	np = strtoul(buf, &endptr, 0);
989 	endptrconst = _bcs_skip_ws(endptr);
990 	if (*endptrconst != ';')
991 		return -1;
992 	ofs = endptrconst+1-buf;
993 	if (_region_get_subregion(r, r, ofs, _region_size(r)-ofs))
994 		return -1;
995 	if (rnp)
996 		*rnp = np;
997 	return 0;
998 }
999 
1000 static int
parse_plural_body(struct _region * r,struct parser_element ** rpe)1001 parse_plural_body(struct _region *r, struct parser_element **rpe)
1002 {
1003 	int token;
1004 	struct tokenizer_context tcx;
1005 	struct parser_element pelem, *ppe;
1006 
1007 	init_tokenizer_context(&tcx);
1008 	_memstream_bind(&tcx.memstream, r);
1009 
1010 	init_parser_element(&pelem);
1011 	token = parse_exp(&tcx, &pelem);
1012 	if (T_IS_ERROR(token))
1013 		return token;
1014 
1015 	ppe = dup_parser_element(&pelem);
1016 	if (ppe == NULL) {
1017 		uninit_parser_element(&pelem);
1018 		return T_NOMEM;
1019 	}
1020 
1021 	*rpe = ppe;
1022 
1023 	return 0;
1024 }
1025 
1026 static int
parse_plural(struct parser_element ** rpe,unsigned long * rnp,const char * str,size_t len)1027 parse_plural(struct parser_element **rpe, unsigned long *rnp,
1028 	     const char *str, size_t len)
1029 {
1030 	struct _region r;
1031 
1032 	_region_init(&r, __UNCONST(str), len);
1033 
1034 	if (find_plural_forms(&r))
1035 		return T_NOTFOUND;
1036 	if (skip_nplurals(&r, rnp))
1037 		return T_ILPLURAL;
1038 	if (skip_assignment(&r, PLURAL_SYMBOL, LEN_PLURAL_SYMBOL))
1039 		return T_ILPLURAL;
1040 	if (cut_trailing_semicolon(&r))
1041 		return T_ILPLURAL;
1042 	return parse_plural_body(&r, rpe);
1043 }
1044 
1045 #ifdef TEST_PARSE_PLURAL
1046 int
main(int argc,char ** argv)1047 main(int argc, char **argv)
1048 {
1049 	int ret;
1050 	struct parser_element *pelem;
1051 	unsigned long np;
1052 
1053 	if (argc != 2 && argc != 3) {
1054 		fprintf(stderr, "usage: %s <mime-header> [n]\n", argv[0]);
1055 		return EXIT_FAILURE;
1056 	}
1057 
1058 	ret = parse_plural(&pelem, &np, argv[1], strlen(argv[1]));
1059 
1060 	if (ret == T_EOF)
1061 		printf("none");
1062 	else if (T_IS_ERROR(ret))
1063 		printf("error: 0x%X", ret);
1064 	else {
1065 		printf("syntax tree: ");
1066 		dump_elem(pelem);
1067 		printf("\nnplurals = %lu", np);
1068 		if (argv[2])
1069 			printf(", plural = %lu",
1070 			       calculate_plural(pelem, atoi(argv[2])));
1071 		free_parser_element(pelem);
1072 	}
1073 	printf("\n");
1074 
1075 
1076 	return EXIT_SUCCESS;
1077 }
1078 #endif /* TEST_PARSE_PLURAL */
1079 
1080 /*
1081  * external interface
1082  */
1083 
1084 int
_gettext_parse_plural(struct gettext_plural ** rpe,unsigned long * rnp,const char * str,size_t len)1085 _gettext_parse_plural(struct gettext_plural **rpe, unsigned long *rnp,
1086 		      const char *str, size_t len)
1087 {
1088 	return parse_plural((struct parser_element **)rpe, rnp, str, len);
1089 }
1090 
1091 unsigned long
_gettext_calculate_plural(const struct gettext_plural * pe,unsigned long n)1092 _gettext_calculate_plural(const struct gettext_plural *pe, unsigned long n)
1093 {
1094 	return calculate_plural((void *)__UNCONST(pe), n);
1095 }
1096 
1097 void
_gettext_free_plural(struct gettext_plural * pe)1098 _gettext_free_plural(struct gettext_plural *pe)
1099 {
1100 	free_parser_element((void *)pe);
1101 }
1102 
1103 #ifdef TEST_PLURAL
1104 #include <libintl.h>
1105 #include <locale.h>
1106 
1107 #define PR(n)	printf("n=%d: \"%s\"\n", n, dngettext("test", "1", "2", n))
1108 
1109 int
main(void)1110 main(void)
1111 {
1112 	bindtextdomain("test", "."); /* ./LANG/LC_MESSAGES/test.mo */
1113 	PR(1);
1114 	PR(2);
1115 	PR(3);
1116 	PR(4);
1117 
1118 	return 0;
1119 }
1120 #endif
1121