xref: /netbsd-src/lib/libintl/plural_parser.c (revision 388550b026d49b7f7b7480b1113bf82bb8d6a480)
1*388550b0Srillig /*	$NetBSD: plural_parser.c,v 1.4 2022/04/19 20:32:16 rillig Exp $	*/
2dd416aa8Stshiozak 
3dd416aa8Stshiozak /*-
4dd416aa8Stshiozak  * Copyright (c) 2005 Citrus Project,
5dd416aa8Stshiozak  * All rights reserved.
6dd416aa8Stshiozak  *
7dd416aa8Stshiozak  * Redistribution and use in source and binary forms, with or without
8dd416aa8Stshiozak  * modification, are permitted provided that the following conditions
9dd416aa8Stshiozak  * are met:
10dd416aa8Stshiozak  * 1. Redistributions of source code must retain the above copyright
11dd416aa8Stshiozak  *    notice, this list of conditions and the following disclaimer.
12dd416aa8Stshiozak  * 2. Redistributions in binary form must reproduce the above copyright
13dd416aa8Stshiozak  *    notice, this list of conditions and the following disclaimer in the
14dd416aa8Stshiozak  *    documentation and/or other materials provided with the distribution.
15dd416aa8Stshiozak  *
16dd416aa8Stshiozak  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17dd416aa8Stshiozak  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18dd416aa8Stshiozak  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19dd416aa8Stshiozak  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20dd416aa8Stshiozak  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21dd416aa8Stshiozak  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22dd416aa8Stshiozak  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23dd416aa8Stshiozak  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24dd416aa8Stshiozak  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25dd416aa8Stshiozak  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26dd416aa8Stshiozak  * SUCH DAMAGE.
27dd416aa8Stshiozak  *
28dd416aa8Stshiozak  */
29dd416aa8Stshiozak 
30dd416aa8Stshiozak #include <sys/cdefs.h>
31*388550b0Srillig __RCSID("$NetBSD: plural_parser.c,v 1.4 2022/04/19 20:32:16 rillig Exp $");
32dd416aa8Stshiozak 
33dd416aa8Stshiozak #include <assert.h>
34dd416aa8Stshiozak #include <stdio.h>
35dd416aa8Stshiozak #include <stdlib.h>
36dd416aa8Stshiozak #include <string.h>
37dd416aa8Stshiozak #include <citrus/citrus_namespace.h>
38dd416aa8Stshiozak #include <citrus/citrus_region.h>
39dd416aa8Stshiozak #include <citrus/citrus_memstream.h>
40dd416aa8Stshiozak #include <citrus/citrus_bcs.h>
41dd416aa8Stshiozak #include "plural_parser.h"
42dd416aa8Stshiozak 
43dd416aa8Stshiozak #if defined(TEST_TOKENIZER) || defined(TEST_PARSER)
44dd416aa8Stshiozak #define ALLOW_EMPTY
45dd416aa8Stshiozak #define ALLOW_ARBITRARY_IDENTIFIER
46dd416aa8Stshiozak #endif
47dd416aa8Stshiozak 
48dd416aa8Stshiozak #define MAX_LEN_ATOM		10
49dd416aa8Stshiozak #define MAX_NUM_OPERANDS	3
50dd416aa8Stshiozak 
51dd416aa8Stshiozak #define T_EOF			EOF
52dd416aa8Stshiozak #define T_NONE			0x100
53dd416aa8Stshiozak #define T_LAND			0x101	/* && */
54dd416aa8Stshiozak #define T_LOR			0x102	/* || */
55dd416aa8Stshiozak #define T_EQUALITY		0x103	/* == or != */
56dd416aa8Stshiozak #define T_RELATIONAL		0x104	/* <, >, <= or >= */
57dd416aa8Stshiozak #define T_ADDITIVE		0x105	/* + or - */
58dd416aa8Stshiozak #define T_MULTIPLICATIVE	0x106	/* *, / or % */
59dd416aa8Stshiozak #define T_IDENTIFIER		0x200
60dd416aa8Stshiozak #define T_CONSTANT		0x201
61dd416aa8Stshiozak #define T_ILCHAR		0x300
62dd416aa8Stshiozak #define T_TOOLONG		0x301
63dd416aa8Stshiozak #define T_ILTOKEN		0x302
64dd416aa8Stshiozak #define T_ILEND			0x303
65dd416aa8Stshiozak #define T_NOMEM			0x304
66dd416aa8Stshiozak #define T_NOTFOUND		0x305
67dd416aa8Stshiozak #define T_ILPLURAL		0x306
68dd416aa8Stshiozak #define T_IS_OPERATOR(t)	((t) < 0x200)
69dd416aa8Stshiozak #define T_IS_ERROR(t)		((t) >= 0x300)
70dd416aa8Stshiozak 
71dd416aa8Stshiozak #define OP_EQ			('='+'=')
72dd416aa8Stshiozak #define OP_NEQ			('!'+'=')
73dd416aa8Stshiozak #define OP_LTEQ			('<'+'=')
74dd416aa8Stshiozak #define OP_GTEQ			('>'+'=')
75dd416aa8Stshiozak 
76dd416aa8Stshiozak #define PLURAL_NUMBER_SYMBOL	"n"
77dd416aa8Stshiozak #define NPLURALS_SYMBOL		"nplurals"
78dd416aa8Stshiozak #define LEN_NPLURAL_SYMBOL	(sizeof (NPLURALS_SYMBOL) -1)
79dd416aa8Stshiozak #define PLURAL_SYMBOL		"plural"
80dd416aa8Stshiozak #define LEN_PLURAL_SYMBOL	(sizeof (PLURAL_SYMBOL) -1)
81dd416aa8Stshiozak #define PLURAL_FORMS		"Plural-Forms:"
82dd416aa8Stshiozak #define LEN_PLURAL_FORMS	(sizeof (PLURAL_FORMS) -1)
83dd416aa8Stshiozak 
84dd416aa8Stshiozak /* ----------------------------------------------------------------------
85dd416aa8Stshiozak  * tokenizer part
86dd416aa8Stshiozak  */
87dd416aa8Stshiozak 
88dd416aa8Stshiozak union token_data
89dd416aa8Stshiozak {
90dd416aa8Stshiozak 	unsigned long constant;
91dd416aa8Stshiozak #ifdef ALLOW_ARBITRARY_IDENTIFIER
92dd416aa8Stshiozak 	char identifier[MAX_LEN_ATOM+1];
93dd416aa8Stshiozak #endif
94dd416aa8Stshiozak 	char op;
95dd416aa8Stshiozak };
96dd416aa8Stshiozak 
97dd416aa8Stshiozak struct tokenizer_context
98dd416aa8Stshiozak {
99dd416aa8Stshiozak 	struct _memstream memstream;
100dd416aa8Stshiozak 	struct {
101dd416aa8Stshiozak 		int token;
102dd416aa8Stshiozak 		union token_data token_data;
103dd416aa8Stshiozak 	} token0;
104dd416aa8Stshiozak };
105dd416aa8Stshiozak 
106dd416aa8Stshiozak /* initialize a tokenizer context */
107dd416aa8Stshiozak static void
init_tokenizer_context(struct tokenizer_context * tcx)108dd416aa8Stshiozak init_tokenizer_context(struct tokenizer_context *tcx)
109dd416aa8Stshiozak {
110dd416aa8Stshiozak 	tcx->token0.token = T_NONE;
111dd416aa8Stshiozak }
112dd416aa8Stshiozak 
113dd416aa8Stshiozak /* get an atom (identifier or constant) */
114dd416aa8Stshiozak static int
tokenize_atom(struct tokenizer_context * tcx,union token_data * token_data)115dd416aa8Stshiozak tokenize_atom(struct tokenizer_context *tcx, union token_data *token_data)
116dd416aa8Stshiozak {
117dd416aa8Stshiozak 	int ch, len;
118dd416aa8Stshiozak 	char buf[MAX_LEN_ATOM+1];
119dd416aa8Stshiozak 
120dd416aa8Stshiozak 	len = 0;
121dd416aa8Stshiozak 	while (/*CONSTCOND*/1) {
122dd416aa8Stshiozak 		ch = _memstream_getc(&tcx->memstream);
123dd416aa8Stshiozak 		if (!(_bcs_isalnum(ch) || ch == '_')) {
124dd416aa8Stshiozak 			_memstream_ungetc(&tcx->memstream, ch);
125dd416aa8Stshiozak 			break;
126dd416aa8Stshiozak 		}
127dd416aa8Stshiozak 		if (len == MAX_LEN_ATOM)
128dd416aa8Stshiozak 			return T_TOOLONG;
129dd416aa8Stshiozak 		buf[len++] = ch;
130dd416aa8Stshiozak 	}
131dd416aa8Stshiozak 	buf[len] = '\0';
132dd416aa8Stshiozak 	if (len == 0)
133dd416aa8Stshiozak 		return T_ILCHAR;
134dd416aa8Stshiozak 
135dd416aa8Stshiozak 	if (_bcs_isdigit((int)(unsigned char)buf[0])) {
136dd416aa8Stshiozak 		unsigned long ul;
137dd416aa8Stshiozak 		char *post;
138dd416aa8Stshiozak 		ul = strtoul(buf, &post, 0);
139dd416aa8Stshiozak 		if (buf+len != post)
140dd416aa8Stshiozak 			return T_ILCHAR;
141dd416aa8Stshiozak 		token_data->constant = ul;
142dd416aa8Stshiozak 		return T_CONSTANT;
143dd416aa8Stshiozak 	}
144dd416aa8Stshiozak 
145dd416aa8Stshiozak #ifdef ALLOW_ARBITRARY_IDENTIFIER
146dd416aa8Stshiozak 	strcpy(token_data->identifier, buf);
147dd416aa8Stshiozak 	return T_IDENTIFIER;
148dd416aa8Stshiozak #else
149dd416aa8Stshiozak 	if (!strcmp(buf, PLURAL_NUMBER_SYMBOL))
150dd416aa8Stshiozak 		return T_IDENTIFIER;
151dd416aa8Stshiozak 	return T_ILCHAR;
152dd416aa8Stshiozak #endif
153dd416aa8Stshiozak }
154dd416aa8Stshiozak 
155dd416aa8Stshiozak /* tokenizer main routine */
156dd416aa8Stshiozak static int
tokenize(struct tokenizer_context * tcx,union token_data * token_data)157dd416aa8Stshiozak tokenize(struct tokenizer_context *tcx, union token_data *token_data)
158dd416aa8Stshiozak {
159dd416aa8Stshiozak 	int ch, prevch;
160dd416aa8Stshiozak 
161dd416aa8Stshiozak retry:
162dd416aa8Stshiozak 	ch = _memstream_getc(&tcx->memstream);
163dd416aa8Stshiozak 	if (_bcs_isspace(ch))
164dd416aa8Stshiozak 		goto retry;
165dd416aa8Stshiozak 
166dd416aa8Stshiozak 	switch (ch) {
167dd416aa8Stshiozak 	case T_EOF:
168dd416aa8Stshiozak 		return ch;
169dd416aa8Stshiozak 	case '+': case '-':
170dd416aa8Stshiozak 		token_data->op = ch;
171dd416aa8Stshiozak 		return T_ADDITIVE;
172dd416aa8Stshiozak 	case '*': case '/': case '%':
173dd416aa8Stshiozak 		token_data->op = ch;
174dd416aa8Stshiozak 		return T_MULTIPLICATIVE;
175dd416aa8Stshiozak 	case '?': case ':': case '(': case ')':
176dd416aa8Stshiozak 		token_data->op = ch;
177dd416aa8Stshiozak 		return ch;
178dd416aa8Stshiozak 	case '&': case '|':
179dd416aa8Stshiozak 		prevch = ch;
180dd416aa8Stshiozak 		ch = _memstream_getc(&tcx->memstream);
181dd416aa8Stshiozak 		if (ch != prevch) {
182dd416aa8Stshiozak 			_memstream_ungetc(&tcx->memstream, ch);
183dd416aa8Stshiozak 			return T_ILCHAR;
184dd416aa8Stshiozak 		}
185dd416aa8Stshiozak 		token_data->op = ch;
186dd416aa8Stshiozak 		switch (ch) {
187dd416aa8Stshiozak 		case '&':
188dd416aa8Stshiozak 			return T_LAND;
189dd416aa8Stshiozak 		case '|':
190dd416aa8Stshiozak 			return T_LOR;
19101d1183fSchristos 		default:
19201d1183fSchristos 			return T_ILTOKEN;
193dd416aa8Stshiozak 		}
194dd416aa8Stshiozak 	case '=': case '!': case '<': case '>':
195dd416aa8Stshiozak 		prevch = ch;
196dd416aa8Stshiozak 		ch = _memstream_getc(&tcx->memstream);
197dd416aa8Stshiozak 		if (ch != '=') {
198dd416aa8Stshiozak 			_memstream_ungetc(&tcx->memstream, ch);
199dd416aa8Stshiozak 			switch (prevch) {
200dd416aa8Stshiozak 			case '=':
201dd416aa8Stshiozak 				return T_ILCHAR;
202dd416aa8Stshiozak 			case '!':
203dd416aa8Stshiozak 				return '!';
204dd416aa8Stshiozak 			case '<':
205dd416aa8Stshiozak 			case '>':
206dd416aa8Stshiozak 				token_data->op = prevch; /* OP_LT or OP_GT */
207dd416aa8Stshiozak 				return T_RELATIONAL;
208dd416aa8Stshiozak 			}
209dd416aa8Stshiozak 		}
210dd416aa8Stshiozak 		/* '==', '!=', '<=' or '>=' */
211dd416aa8Stshiozak 		token_data->op = ch+prevch;
212dd416aa8Stshiozak 		switch (prevch) {
213dd416aa8Stshiozak 		case '=':
214dd416aa8Stshiozak 		case '!':
215dd416aa8Stshiozak 			return T_EQUALITY;
216dd416aa8Stshiozak 		case '<':
217dd416aa8Stshiozak 		case '>':
218dd416aa8Stshiozak 			return T_RELATIONAL;
219dd416aa8Stshiozak 		}
220dd416aa8Stshiozak 		/*NOTREACHED*/
221dd416aa8Stshiozak 	}
222dd416aa8Stshiozak 
223dd416aa8Stshiozak 	_memstream_ungetc(&tcx->memstream, ch);
224dd416aa8Stshiozak 	return tokenize_atom(tcx, token_data);
225dd416aa8Stshiozak }
226dd416aa8Stshiozak 
227dd416aa8Stshiozak /* get the next token */
228dd416aa8Stshiozak static int
get_token(struct tokenizer_context * tcx,union token_data * token_data)229dd416aa8Stshiozak get_token(struct tokenizer_context *tcx, union token_data *token_data)
230dd416aa8Stshiozak {
231dd416aa8Stshiozak 	if (tcx->token0.token != T_NONE) {
232dd416aa8Stshiozak 		int token = tcx->token0.token;
233dd416aa8Stshiozak 		tcx->token0.token = T_NONE;
234dd416aa8Stshiozak 		*token_data = tcx->token0.token_data;
235dd416aa8Stshiozak 		return token;
236dd416aa8Stshiozak 	}
237dd416aa8Stshiozak 	return tokenize(tcx, token_data);
238dd416aa8Stshiozak }
239dd416aa8Stshiozak 
240dd416aa8Stshiozak /* push back the last token */
241dd416aa8Stshiozak static void
unget_token(struct tokenizer_context * tcx,int token,union token_data * token_data)242dd416aa8Stshiozak unget_token(struct tokenizer_context *tcx,
243dd416aa8Stshiozak 	    int token, union token_data *token_data)
244dd416aa8Stshiozak {
245dd416aa8Stshiozak 	tcx->token0.token = token;
246dd416aa8Stshiozak 	tcx->token0.token_data = *token_data;
247dd416aa8Stshiozak }
248dd416aa8Stshiozak 
249dd416aa8Stshiozak #ifdef TEST_TOKENIZER
250dd416aa8Stshiozak 
251dd416aa8Stshiozak int
main(int argc,char ** argv)252dd416aa8Stshiozak main(int argc, char **argv)
253dd416aa8Stshiozak {
254dd416aa8Stshiozak 	struct tokenizer_context tcx;
255dd416aa8Stshiozak 	union token_data token_data;
256dd416aa8Stshiozak 	int token;
257dd416aa8Stshiozak 
258dd416aa8Stshiozak 	if (argc != 2) {
259dd416aa8Stshiozak 		fprintf(stderr, "usage: %s <expression>\n", argv[0]);
260dd416aa8Stshiozak 		return EXIT_FAILURE;
261dd416aa8Stshiozak 	}
262dd416aa8Stshiozak 
263dd416aa8Stshiozak 	init_tokenizer_context(&tcx);
264dd416aa8Stshiozak 	_memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1]));
265dd416aa8Stshiozak 
266dd416aa8Stshiozak 	while (1) {
267dd416aa8Stshiozak 		token = get_token(&tcx, &token_data);
268dd416aa8Stshiozak 		switch (token) {
269dd416aa8Stshiozak 		case T_EOF:
270dd416aa8Stshiozak 			goto quit;
271dd416aa8Stshiozak 		case T_ILCHAR:
272dd416aa8Stshiozak 			printf("illegal character.\n");
273dd416aa8Stshiozak 			goto quit;
274dd416aa8Stshiozak 		case T_TOOLONG:
275dd416aa8Stshiozak 			printf("too long atom.\n");
276dd416aa8Stshiozak 			goto quit;
277dd416aa8Stshiozak 		case T_CONSTANT:
278dd416aa8Stshiozak 			printf("constant: %lu\n", token_data.constant);
279dd416aa8Stshiozak 			break;
280dd416aa8Stshiozak 		case T_IDENTIFIER:
281dd416aa8Stshiozak 			printf("symbol: %s\n", token_data.identifier);
282dd416aa8Stshiozak 			break;
283dd416aa8Stshiozak 		default:
284dd416aa8Stshiozak 			printf("operator: ");
285dd416aa8Stshiozak 			switch (token) {
286dd416aa8Stshiozak 			case T_LAND:
287dd416aa8Stshiozak 				printf("&&\n");
288dd416aa8Stshiozak 				break;
289dd416aa8Stshiozak 			case T_LOR:
290dd416aa8Stshiozak 				printf("||\n");
291dd416aa8Stshiozak 				break;
292dd416aa8Stshiozak 			case T_EQUALITY:
293dd416aa8Stshiozak 				printf("%c=\n", token_data.op-'=');
294dd416aa8Stshiozak 				break;
295dd416aa8Stshiozak 			case T_RELATIONAL:
296dd416aa8Stshiozak 				switch(token_data.op) {
297dd416aa8Stshiozak 				case OP_LTEQ:
298dd416aa8Stshiozak 				case OP_GTEQ:
299dd416aa8Stshiozak 					printf("%c=\n", token_data.op-'=');
300dd416aa8Stshiozak 					break;
301dd416aa8Stshiozak 				default:
302dd416aa8Stshiozak 					printf("%c\n", token_data.op);
303dd416aa8Stshiozak 					break;
304dd416aa8Stshiozak 				}
305dd416aa8Stshiozak 				break;
306dd416aa8Stshiozak 			case T_ADDITIVE:
307dd416aa8Stshiozak 			case T_MULTIPLICATIVE:
308dd416aa8Stshiozak 				printf("%c\n", token_data.op);
309dd416aa8Stshiozak 				break;
310dd416aa8Stshiozak 			default:
311dd416aa8Stshiozak 				printf("operator: %c\n", token);
312dd416aa8Stshiozak 			}
313dd416aa8Stshiozak 		}
314dd416aa8Stshiozak 	}
315dd416aa8Stshiozak quit:
316dd416aa8Stshiozak 	return 0;
317dd416aa8Stshiozak }
318dd416aa8Stshiozak #endif /* TEST_TOKENIZER */
319dd416aa8Stshiozak 
320dd416aa8Stshiozak 
321dd416aa8Stshiozak /* ----------------------------------------------------------------------
322dd416aa8Stshiozak  * parser part
323dd416aa8Stshiozak  *
324dd416aa8Stshiozak  * exp := cond
325dd416aa8Stshiozak  *
326dd416aa8Stshiozak  * cond := lor | lor '?' cond ':' cond
327dd416aa8Stshiozak  *
328dd416aa8Stshiozak  * lor := land ( '||' land )*
329dd416aa8Stshiozak  *
330dd416aa8Stshiozak  * land := equality ( '&&' equality )*
331dd416aa8Stshiozak  *
332dd416aa8Stshiozak  * equality := relational ( equalityops relational )*
333dd416aa8Stshiozak  * equalityops := '==' | '!='
334dd416aa8Stshiozak  *
335dd416aa8Stshiozak  * relational := additive ( relationalops additive )*
336dd416aa8Stshiozak  * relationalops := '<' | '>' | '<=' | '>='
337dd416aa8Stshiozak  *
338dd416aa8Stshiozak  * additive := multiplicative ( additiveops multiplicative )*
339dd416aa8Stshiozak  * additiveops := '+' | '-'
340dd416aa8Stshiozak  *
341dd416aa8Stshiozak  * multiplicative := lnot ( multiplicativeops lnot )*
342dd416aa8Stshiozak  * multiplicativeops := '*' | '/' | '%'
343dd416aa8Stshiozak  *
344dd416aa8Stshiozak  * lnot := '!' lnot | term
345dd416aa8Stshiozak  *
346dd416aa8Stshiozak  * term := literal | identifier | '(' exp ')'
347dd416aa8Stshiozak  *
348dd416aa8Stshiozak  */
349dd416aa8Stshiozak 
350dd416aa8Stshiozak #define T_ENSURE_OK(token, label)					      \
351dd416aa8Stshiozak do {									      \
352dd416aa8Stshiozak 	if (T_IS_ERROR(token))						      \
353dd416aa8Stshiozak 		goto label;						      \
354*388550b0Srillig } while (0)
355dd416aa8Stshiozak #define T_ENSURE_SOMETHING(token, label)				      \
356dd416aa8Stshiozak do {									      \
357dd416aa8Stshiozak 	if ((token) == T_EOF) {						      \
358dd416aa8Stshiozak 		token = T_ILEND;					      \
359dd416aa8Stshiozak 		goto label;						      \
360dd416aa8Stshiozak 	} else if (T_IS_ERROR(token))					      \
361dd416aa8Stshiozak 		goto label;						      \
362*388550b0Srillig } while (0)
363dd416aa8Stshiozak 
364dd416aa8Stshiozak #define parser_element	plural_element
365dd416aa8Stshiozak 
366dd416aa8Stshiozak struct parser_element;
367dd416aa8Stshiozak struct parser_op
368dd416aa8Stshiozak {
369dd416aa8Stshiozak 	char op;
370dd416aa8Stshiozak 	struct parser_element *operands[MAX_NUM_OPERANDS];
371dd416aa8Stshiozak };
372dd416aa8Stshiozak struct parser_element
373dd416aa8Stshiozak {
374dd416aa8Stshiozak 	int kind;
375dd416aa8Stshiozak 	union
376dd416aa8Stshiozak 	{
377dd416aa8Stshiozak 		struct parser_op parser_op;
378dd416aa8Stshiozak 		union token_data token_data;
379dd416aa8Stshiozak 	} u;
380dd416aa8Stshiozak };
381dd416aa8Stshiozak 
382dd416aa8Stshiozak struct parser_op2_transition
383dd416aa8Stshiozak {
384dd416aa8Stshiozak 	int					kind;
385dd416aa8Stshiozak 	const struct parser_op2_transition	*next;
386dd416aa8Stshiozak };
387dd416aa8Stshiozak 
388dd416aa8Stshiozak /* prototypes */
389dd416aa8Stshiozak static int parse_cond(struct tokenizer_context *, struct parser_element *);
390dd416aa8Stshiozak 
391dd416aa8Stshiozak 
392dd416aa8Stshiozak /* transition table for the 2-operand operators */
393dd416aa8Stshiozak #define DEF_TR(t, k, n)							      \
394dd416aa8Stshiozak static struct parser_op2_transition exp_tr_##t = {			      \
395dd416aa8Stshiozak 	k, &exp_tr_##n							      \
396dd416aa8Stshiozak }
397dd416aa8Stshiozak #define DEF_TR0(t, k)							      \
398dd416aa8Stshiozak static struct parser_op2_transition exp_tr_##t = {			      \
399dd416aa8Stshiozak 	k, NULL /* expect lnot */					      \
400dd416aa8Stshiozak }
401dd416aa8Stshiozak 
402dd416aa8Stshiozak DEF_TR0(multiplicative, T_MULTIPLICATIVE);
403dd416aa8Stshiozak DEF_TR(additive, T_ADDITIVE, multiplicative);
404dd416aa8Stshiozak DEF_TR(relational, T_RELATIONAL, additive);
405dd416aa8Stshiozak DEF_TR(equality, T_EQUALITY, relational);
406dd416aa8Stshiozak DEF_TR(land, T_LAND, equality);
407dd416aa8Stshiozak DEF_TR(lor, T_LOR, land);
408dd416aa8Stshiozak 
409dd416aa8Stshiozak /* init a parser element structure */
410dd416aa8Stshiozak static void
init_parser_element(struct parser_element * pe)411dd416aa8Stshiozak init_parser_element(struct parser_element *pe)
412dd416aa8Stshiozak {
413dd416aa8Stshiozak 	int i;
414dd416aa8Stshiozak 
415dd416aa8Stshiozak 	pe->kind = T_NONE;
416dd416aa8Stshiozak 	for (i=0; i<MAX_NUM_OPERANDS; i++)
417dd416aa8Stshiozak 		pe->u.parser_op.operands[i] = NULL;
418dd416aa8Stshiozak }
419dd416aa8Stshiozak 
420dd416aa8Stshiozak /* uninitialize a parser element structure with freeing children */
421dd416aa8Stshiozak static void free_parser_element(struct parser_element *);
422dd416aa8Stshiozak static void
uninit_parser_element(struct parser_element * pe)423dd416aa8Stshiozak uninit_parser_element(struct parser_element *pe)
424dd416aa8Stshiozak {
425dd416aa8Stshiozak 	int i;
426dd416aa8Stshiozak 
427dd416aa8Stshiozak 	if (T_IS_OPERATOR(pe->kind))
428dd416aa8Stshiozak 		for (i=0; i<MAX_NUM_OPERANDS; i++)
429dd416aa8Stshiozak 			if (pe->u.parser_op.operands[i])
430dd416aa8Stshiozak 				free_parser_element(
431dd416aa8Stshiozak 					pe->u.parser_op.operands[i]);
432dd416aa8Stshiozak }
433dd416aa8Stshiozak 
434dd416aa8Stshiozak /* free a parser element structure with freeing children */
435dd416aa8Stshiozak static void
free_parser_element(struct parser_element * pe)436dd416aa8Stshiozak free_parser_element(struct parser_element *pe)
437dd416aa8Stshiozak {
438dd416aa8Stshiozak 	if (pe) {
439dd416aa8Stshiozak 		uninit_parser_element(pe);
440dd416aa8Stshiozak 		free(pe);
441dd416aa8Stshiozak 	}
442dd416aa8Stshiozak }
443dd416aa8Stshiozak 
444dd416aa8Stshiozak 
445dd416aa8Stshiozak /* copy a parser element structure shallowly */
446dd416aa8Stshiozak static void
copy_parser_element(struct parser_element * dpe,const struct parser_element * spe)447dd416aa8Stshiozak copy_parser_element(struct parser_element *dpe,
448dd416aa8Stshiozak 		    const struct parser_element *spe)
449dd416aa8Stshiozak {
450dd416aa8Stshiozak 	memcpy(dpe, spe, sizeof *dpe);
451dd416aa8Stshiozak }
452dd416aa8Stshiozak 
453dd416aa8Stshiozak /* duplicate a parser element structure shallowly */
454dd416aa8Stshiozak static struct parser_element *
dup_parser_element(const struct parser_element * pe)455dd416aa8Stshiozak dup_parser_element(const struct parser_element *pe)
456dd416aa8Stshiozak {
457dd416aa8Stshiozak 	struct parser_element *dpe = malloc(sizeof *dpe);
458dd416aa8Stshiozak 	if (dpe)
459dd416aa8Stshiozak 		copy_parser_element(dpe, pe);
460dd416aa8Stshiozak 	return dpe;
461dd416aa8Stshiozak }
462dd416aa8Stshiozak 
463dd416aa8Stshiozak /* term := identifier | constant | '(' exp ')' */
464dd416aa8Stshiozak static int
parse_term(struct tokenizer_context * tcx,struct parser_element * pelem)465dd416aa8Stshiozak parse_term(struct tokenizer_context *tcx, struct parser_element *pelem)
466dd416aa8Stshiozak {
467dd416aa8Stshiozak 	struct parser_element pe0;
468dd416aa8Stshiozak 	int token;
469dd416aa8Stshiozak 	union token_data token_data;
470dd416aa8Stshiozak 
471dd416aa8Stshiozak 	token = get_token(tcx, &token_data);
472dd416aa8Stshiozak 	switch (token) {
473dd416aa8Stshiozak 	case '(':
474dd416aa8Stshiozak 		/* '(' exp ')' */
475dd416aa8Stshiozak 		init_parser_element(&pe0);
476dd416aa8Stshiozak 		/* expect exp */
477dd416aa8Stshiozak 		token = parse_cond(tcx, &pe0);
478dd416aa8Stshiozak 		T_ENSURE_OK(token, err);
479dd416aa8Stshiozak 		/* expect ')' */
480dd416aa8Stshiozak 		token = get_token(tcx, &token_data);
481dd416aa8Stshiozak 		T_ENSURE_SOMETHING(token, err);
482dd416aa8Stshiozak 		if (token != ')') {
483dd416aa8Stshiozak 			unget_token(tcx, token, &token_data);
484dd416aa8Stshiozak 			token = T_ILTOKEN;
485dd416aa8Stshiozak 			goto err;
486dd416aa8Stshiozak 		}
487dd416aa8Stshiozak 		copy_parser_element(pelem, &pe0);
488dd416aa8Stshiozak 		return token;
489dd416aa8Stshiozak err:
490dd416aa8Stshiozak 		uninit_parser_element(&pe0);
491dd416aa8Stshiozak 		return token;
492dd416aa8Stshiozak 	case T_IDENTIFIER:
493dd416aa8Stshiozak 	case T_CONSTANT:
494dd416aa8Stshiozak 		pelem->kind = token;
495dd416aa8Stshiozak 		pelem->u.token_data = token_data;
496dd416aa8Stshiozak 		return token;
497dd416aa8Stshiozak 	case T_EOF:
498dd416aa8Stshiozak 		return T_ILEND;
499dd416aa8Stshiozak 	default:
500dd416aa8Stshiozak 		return T_ILTOKEN;
501dd416aa8Stshiozak 	}
502dd416aa8Stshiozak }
503dd416aa8Stshiozak 
504dd416aa8Stshiozak /* lnot := '!' lnot | term */
505dd416aa8Stshiozak static int
parse_lnot(struct tokenizer_context * tcx,struct parser_element * pelem)506dd416aa8Stshiozak parse_lnot(struct tokenizer_context *tcx, struct parser_element *pelem)
507dd416aa8Stshiozak {
508dd416aa8Stshiozak 	struct parser_element pe0;
509dd416aa8Stshiozak 	int token;
510dd416aa8Stshiozak 	union token_data token_data;
511dd416aa8Stshiozak 
512dd416aa8Stshiozak 	init_parser_element(&pe0);
513dd416aa8Stshiozak 
514dd416aa8Stshiozak 	/* '!' or not */
515dd416aa8Stshiozak 	token = get_token(tcx, &token_data);
516dd416aa8Stshiozak 	if (token != '!') {
517dd416aa8Stshiozak 		/* stop: term */
518dd416aa8Stshiozak 		unget_token(tcx, token, &token_data);
519dd416aa8Stshiozak 		return parse_term(tcx, pelem);
520dd416aa8Stshiozak 	}
521dd416aa8Stshiozak 
522dd416aa8Stshiozak 	/* '!' term */
523dd416aa8Stshiozak 	token = parse_lnot(tcx, &pe0);
524dd416aa8Stshiozak 	T_ENSURE_OK(token, err);
525dd416aa8Stshiozak 
526dd416aa8Stshiozak 	pelem->kind = '!';
527dd416aa8Stshiozak 	pelem->u.parser_op.operands[0] = dup_parser_element(&pe0);
528dd416aa8Stshiozak 	return pelem->kind;
529dd416aa8Stshiozak err:
530dd416aa8Stshiozak 	uninit_parser_element(&pe0);
531dd416aa8Stshiozak 	return token;
532dd416aa8Stshiozak }
533dd416aa8Stshiozak 
534dd416aa8Stshiozak /* ext_op := ext_next ( op ext_next )* */
535dd416aa8Stshiozak static int
parse_op2(struct tokenizer_context * tcx,struct parser_element * pelem,const struct parser_op2_transition * tr)536dd416aa8Stshiozak parse_op2(struct tokenizer_context *tcx, struct parser_element *pelem,
537dd416aa8Stshiozak 	  const struct parser_op2_transition *tr)
538dd416aa8Stshiozak {
539dd416aa8Stshiozak 	struct parser_element pe0, pe1, peop;
540dd416aa8Stshiozak 	int token;
541dd416aa8Stshiozak 	union token_data token_data;
542dd416aa8Stshiozak 	char op;
543dd416aa8Stshiozak 
544dd416aa8Stshiozak 	/* special case: expect lnot */
545dd416aa8Stshiozak 	if (tr == NULL)
546dd416aa8Stshiozak 		return parse_lnot(tcx, pelem);
547dd416aa8Stshiozak 
548dd416aa8Stshiozak 	init_parser_element(&pe0);
549dd416aa8Stshiozak 	init_parser_element(&pe1);
550dd416aa8Stshiozak 	token = parse_op2(tcx, &pe0, tr->next);
551dd416aa8Stshiozak 	T_ENSURE_OK(token, err);
552dd416aa8Stshiozak 
553dd416aa8Stshiozak 	while (/*CONSTCOND*/1) {
554dd416aa8Stshiozak 		/* expect op or empty */
555dd416aa8Stshiozak 		token = get_token(tcx, &token_data);
556dd416aa8Stshiozak 		if (token != tr->kind) {
557dd416aa8Stshiozak 			/* stop */
558dd416aa8Stshiozak 			unget_token(tcx, token, &token_data);
559dd416aa8Stshiozak 			copy_parser_element(pelem, &pe0);
560dd416aa8Stshiozak 			break;
561dd416aa8Stshiozak 		}
562dd416aa8Stshiozak 		op = token_data.op;
563dd416aa8Stshiozak 		/* right hand */
564dd416aa8Stshiozak 		token = parse_op2(tcx, &pe1, tr->next);
565dd416aa8Stshiozak 		T_ENSURE_OK(token, err);
566dd416aa8Stshiozak 
567dd416aa8Stshiozak 		init_parser_element(&peop);
568dd416aa8Stshiozak 		peop.kind = tr->kind;
569dd416aa8Stshiozak 		peop.u.parser_op.op = op;
570dd416aa8Stshiozak 		peop.u.parser_op.operands[0] = dup_parser_element(&pe0);
571dd416aa8Stshiozak 		init_parser_element(&pe0);
572dd416aa8Stshiozak 		peop.u.parser_op.operands[1] = dup_parser_element(&pe1);
573dd416aa8Stshiozak 		init_parser_element(&pe1);
574dd416aa8Stshiozak 		copy_parser_element(&pe0, &peop);
575dd416aa8Stshiozak 	}
576dd416aa8Stshiozak 	return pelem->kind;
577dd416aa8Stshiozak err:
578dd416aa8Stshiozak 	uninit_parser_element(&pe1);
579dd416aa8Stshiozak 	uninit_parser_element(&pe0);
580dd416aa8Stshiozak 	return token;
581dd416aa8Stshiozak }
582dd416aa8Stshiozak 
583dd416aa8Stshiozak /* cond := lor | lor '?' cond ':' cond */
584dd416aa8Stshiozak static int
parse_cond(struct tokenizer_context * tcx,struct parser_element * pelem)585dd416aa8Stshiozak parse_cond(struct tokenizer_context *tcx, struct parser_element *pelem)
586dd416aa8Stshiozak {
587dd416aa8Stshiozak 	struct parser_element pe0, pe1, pe2;
588dd416aa8Stshiozak 	int token;
589dd416aa8Stshiozak 	union token_data token_data;
590dd416aa8Stshiozak 
591dd416aa8Stshiozak 	init_parser_element(&pe0);
592dd416aa8Stshiozak 	init_parser_element(&pe1);
593dd416aa8Stshiozak 	init_parser_element(&pe2);
594dd416aa8Stshiozak 
595dd416aa8Stshiozak 	/* expect lor or empty */
596dd416aa8Stshiozak 	token = parse_op2(tcx, &pe0, &exp_tr_lor);
597dd416aa8Stshiozak 	T_ENSURE_OK(token, err);
598dd416aa8Stshiozak 
599dd416aa8Stshiozak 	/* '?' or not */
600dd416aa8Stshiozak 	token = get_token(tcx, &token_data);
601dd416aa8Stshiozak 	if (token != '?') {
602dd416aa8Stshiozak 		/* stop: lor */
603dd416aa8Stshiozak 		unget_token(tcx, token, &token_data);
604dd416aa8Stshiozak 		copy_parser_element(pelem, &pe0);
605dd416aa8Stshiozak 		return pe0.kind;
606dd416aa8Stshiozak 	}
607dd416aa8Stshiozak 
608dd416aa8Stshiozak 	/* lor '?' cond ':' cond */
609dd416aa8Stshiozak 	/* expect cond */
610dd416aa8Stshiozak 	token = parse_cond(tcx, &pe1);
611dd416aa8Stshiozak 	T_ENSURE_OK(token, err);
612dd416aa8Stshiozak 
613dd416aa8Stshiozak 	/* expect ':' */
614dd416aa8Stshiozak 	token = get_token(tcx, &token_data);
615dd416aa8Stshiozak 	T_ENSURE_OK(token, err);
616dd416aa8Stshiozak 	if (token != ':') {
617dd416aa8Stshiozak 		unget_token(tcx, token, &token_data);
618dd416aa8Stshiozak 		token = T_ILTOKEN;
619dd416aa8Stshiozak 		goto err;
620dd416aa8Stshiozak 	}
621dd416aa8Stshiozak 
622dd416aa8Stshiozak 	/* expect cond */
623dd416aa8Stshiozak 	token = parse_cond(tcx, &pe2);
624dd416aa8Stshiozak 	T_ENSURE_OK(token, err);
625dd416aa8Stshiozak 
626dd416aa8Stshiozak 	pelem->kind = '?';
627dd416aa8Stshiozak 	pelem->u.parser_op.operands[0] = dup_parser_element(&pe0);
628dd416aa8Stshiozak 	pelem->u.parser_op.operands[1] = dup_parser_element(&pe1);
629dd416aa8Stshiozak 	pelem->u.parser_op.operands[2] = dup_parser_element(&pe2);
630dd416aa8Stshiozak 	return pelem->kind;
631dd416aa8Stshiozak err:
632dd416aa8Stshiozak 	uninit_parser_element(&pe2);
633dd416aa8Stshiozak 	uninit_parser_element(&pe1);
634dd416aa8Stshiozak 	uninit_parser_element(&pe0);
635dd416aa8Stshiozak 	return token;
636dd416aa8Stshiozak }
637dd416aa8Stshiozak 
638dd416aa8Stshiozak static int
parse_exp(struct tokenizer_context * tcx,struct parser_element * pelem)639dd416aa8Stshiozak parse_exp(struct tokenizer_context *tcx, struct parser_element *pelem)
640dd416aa8Stshiozak {
641dd416aa8Stshiozak 	int token, token1;
642dd416aa8Stshiozak 	union token_data token_data;
643dd416aa8Stshiozak 
644dd416aa8Stshiozak #ifdef ALLOW_EMPTY
645dd416aa8Stshiozak 	/* empty check */
646dd416aa8Stshiozak 	token = get_token(tcx, &token_data);
647dd416aa8Stshiozak 	if (token == T_EOF)
648dd416aa8Stshiozak 		return token;
649dd416aa8Stshiozak 	unget_token(tcx, token, &token_data);
650dd416aa8Stshiozak #endif
651dd416aa8Stshiozak 
652dd416aa8Stshiozak 	token = parse_cond(tcx, pelem);
653dd416aa8Stshiozak 	if (!T_IS_ERROR(token)) {
654dd416aa8Stshiozak 		/* termination check */
655dd416aa8Stshiozak 		token1 = get_token(tcx, &token_data);
656dd416aa8Stshiozak 		if (token1 == T_EOF)
657dd416aa8Stshiozak 			return token;
658dd416aa8Stshiozak 		else if (!T_IS_ERROR(token))
659dd416aa8Stshiozak 			 unget_token(tcx, token1, &token_data);
660dd416aa8Stshiozak 		return T_ILTOKEN;
661dd416aa8Stshiozak 	}
662dd416aa8Stshiozak 	return token;
663dd416aa8Stshiozak }
664dd416aa8Stshiozak 
665dd416aa8Stshiozak 
666dd416aa8Stshiozak #if defined(TEST_PARSER) || defined(TEST_PARSE_PLURAL)
667dd416aa8Stshiozak #include <stdio.h>
668dd416aa8Stshiozak 
669dd416aa8Stshiozak static void dump_elem(struct parser_element *);
670dd416aa8Stshiozak 
671dd416aa8Stshiozak static void
dump_op2(struct parser_element * pelem)672dd416aa8Stshiozak dump_op2(struct parser_element *pelem)
673dd416aa8Stshiozak {
674dd416aa8Stshiozak 	dump_elem(pelem->u.parser_op.operands[0]);
675dd416aa8Stshiozak 	printf(" ");
676dd416aa8Stshiozak 	dump_elem(pelem->u.parser_op.operands[1]);
677dd416aa8Stshiozak 	printf(")");
678dd416aa8Stshiozak }
679dd416aa8Stshiozak 
680dd416aa8Stshiozak static void
dump_op3(struct parser_element * pelem)681dd416aa8Stshiozak dump_op3(struct parser_element *pelem)
682dd416aa8Stshiozak {
683dd416aa8Stshiozak 	dump_elem(pelem->u.parser_op.operands[0]);
684dd416aa8Stshiozak 	printf(" ");
685dd416aa8Stshiozak 	dump_elem(pelem->u.parser_op.operands[1]);
686dd416aa8Stshiozak 	printf(" ");
687dd416aa8Stshiozak 	dump_elem(pelem->u.parser_op.operands[2]);
688dd416aa8Stshiozak 	printf(")");
689dd416aa8Stshiozak }
690dd416aa8Stshiozak 
691dd416aa8Stshiozak static void
dump_elem(struct parser_element * pelem)692dd416aa8Stshiozak dump_elem(struct parser_element *pelem)
693dd416aa8Stshiozak {
694dd416aa8Stshiozak 	switch (pelem->kind) {
695dd416aa8Stshiozak 	case T_LAND:
696dd416aa8Stshiozak 		printf("(&& ");
697dd416aa8Stshiozak 		dump_op2(pelem);
698dd416aa8Stshiozak 		break;
699dd416aa8Stshiozak 	case T_LOR:
700dd416aa8Stshiozak 		printf("(|| ");
701dd416aa8Stshiozak 		dump_op2(pelem);
702dd416aa8Stshiozak 		break;
703dd416aa8Stshiozak 	case T_EQUALITY:
704dd416aa8Stshiozak 		switch (pelem->u.parser_op.op) {
705dd416aa8Stshiozak 		case OP_EQ:
706dd416aa8Stshiozak 			printf("(== ");
707dd416aa8Stshiozak 			break;
708dd416aa8Stshiozak 		case OP_NEQ:
709dd416aa8Stshiozak 			printf("(!= ");
710dd416aa8Stshiozak 			break;
711dd416aa8Stshiozak 		}
712dd416aa8Stshiozak 		dump_op2(pelem);
713dd416aa8Stshiozak 		break;
714dd416aa8Stshiozak 	case T_RELATIONAL:
715dd416aa8Stshiozak 		switch (pelem->u.parser_op.op) {
716dd416aa8Stshiozak 		case '<':
717dd416aa8Stshiozak 		case '>':
718dd416aa8Stshiozak 			printf("(%c ", pelem->u.parser_op.op);
719dd416aa8Stshiozak 			break;
720dd416aa8Stshiozak 		case OP_LTEQ:
721dd416aa8Stshiozak 		case OP_GTEQ:
722dd416aa8Stshiozak 			printf("(%c= ", pelem->u.parser_op.op-'=');
723dd416aa8Stshiozak 			break;
724dd416aa8Stshiozak 		}
725dd416aa8Stshiozak 		dump_op2(pelem);
726dd416aa8Stshiozak 		break;
727dd416aa8Stshiozak 	case T_ADDITIVE:
728dd416aa8Stshiozak 	case T_MULTIPLICATIVE:
729dd416aa8Stshiozak 		printf("(%c ", pelem->u.parser_op.op);
730dd416aa8Stshiozak 		dump_op2(pelem);
731dd416aa8Stshiozak 		break;
732dd416aa8Stshiozak 	case '!':
733dd416aa8Stshiozak 		printf("(! ");
734dd416aa8Stshiozak 		dump_elem(pelem->u.parser_op.operands[0]);
735dd416aa8Stshiozak 		printf(")");
736dd416aa8Stshiozak 		break;
737dd416aa8Stshiozak 	case '?':
738dd416aa8Stshiozak 		printf("(? ");
739dd416aa8Stshiozak 		dump_op3(pelem);
740dd416aa8Stshiozak 		break;
741dd416aa8Stshiozak 	case T_CONSTANT:
742dd416aa8Stshiozak 		printf("%d", pelem->u.token_data.constant);
743dd416aa8Stshiozak 		break;
744dd416aa8Stshiozak 	case T_IDENTIFIER:
745dd416aa8Stshiozak #ifdef ALLOW_ARBITRARY_IDENTIFIER
746dd416aa8Stshiozak 		printf("%s", pelem->u.token_data.identifier);
747dd416aa8Stshiozak #else
748dd416aa8Stshiozak 		printf(PLURAL_NUMBER_SYMBOL);
749dd416aa8Stshiozak #endif
750dd416aa8Stshiozak 		break;
751dd416aa8Stshiozak 	}
752dd416aa8Stshiozak }
753dd416aa8Stshiozak #endif
754dd416aa8Stshiozak #ifdef TEST_PARSER
755dd416aa8Stshiozak int
main(int argc,char ** argv)756dd416aa8Stshiozak main(int argc, char **argv)
757dd416aa8Stshiozak {
758dd416aa8Stshiozak 	struct tokenizer_context tcx;
759dd416aa8Stshiozak 	struct parser_element pelem;
760dd416aa8Stshiozak 	int token;
761dd416aa8Stshiozak 
762dd416aa8Stshiozak 	if (argc != 2) {
763dd416aa8Stshiozak 		fprintf(stderr, "usage: %s <expression>\n", argv[0]);
764dd416aa8Stshiozak 		return EXIT_FAILURE;
765dd416aa8Stshiozak 	}
766dd416aa8Stshiozak 
767dd416aa8Stshiozak 	init_tokenizer_context(&tcx);
768dd416aa8Stshiozak 	_memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1]));
769dd416aa8Stshiozak 
770dd416aa8Stshiozak 	init_parser_element(&pelem);
771dd416aa8Stshiozak 	token = parse_exp(&tcx, &pelem);
772dd416aa8Stshiozak 
773dd416aa8Stshiozak 	if (token == T_EOF)
774dd416aa8Stshiozak 		printf("none");
775dd416aa8Stshiozak 	else if (T_IS_ERROR(token))
776dd416aa8Stshiozak 		printf("error: 0x%X", token);
777dd416aa8Stshiozak 	else
778dd416aa8Stshiozak 		dump_elem(&pelem);
779dd416aa8Stshiozak 	printf("\n");
780dd416aa8Stshiozak 
781dd416aa8Stshiozak 	uninit_parser_element(&pelem);
782dd416aa8Stshiozak 
783dd416aa8Stshiozak 	return EXIT_SUCCESS;
784dd416aa8Stshiozak }
785dd416aa8Stshiozak #endif /* TEST_PARSER */
786dd416aa8Stshiozak 
787dd416aa8Stshiozak /* ----------------------------------------------------------------------
788dd416aa8Stshiozak  * calcurate plural number
789dd416aa8Stshiozak  */
790dd416aa8Stshiozak static unsigned long
calculate_plural(const struct parser_element * pe,unsigned long n)791dd416aa8Stshiozak calculate_plural(const struct parser_element *pe, unsigned long n)
792dd416aa8Stshiozak {
793dd416aa8Stshiozak 	unsigned long val0, val1;
794dd416aa8Stshiozak 	switch (pe->kind) {
795dd416aa8Stshiozak 	case T_IDENTIFIER:
796dd416aa8Stshiozak 		return n;
797dd416aa8Stshiozak 	case T_CONSTANT:
798dd416aa8Stshiozak 		return pe->u.token_data.constant;
799dd416aa8Stshiozak 	case '?':
800dd416aa8Stshiozak 		val0 = calculate_plural(pe->u.parser_op.operands[0], n);
801dd416aa8Stshiozak 		if (val0)
802dd416aa8Stshiozak 			val1=calculate_plural(pe->u.parser_op.operands[1], n);
803dd416aa8Stshiozak 		else
804dd416aa8Stshiozak 			val1=calculate_plural(pe->u.parser_op.operands[2], n);
805dd416aa8Stshiozak 		return val1;
806dd416aa8Stshiozak 	case '!':
807dd416aa8Stshiozak 		return !calculate_plural(pe->u.parser_op.operands[0], n);
808dd416aa8Stshiozak 	case T_MULTIPLICATIVE:
809dd416aa8Stshiozak 	case T_ADDITIVE:
810dd416aa8Stshiozak 	case T_RELATIONAL:
811dd416aa8Stshiozak 	case T_EQUALITY:
812dd416aa8Stshiozak 	case T_LOR:
813dd416aa8Stshiozak 	case T_LAND:
814dd416aa8Stshiozak 		val0 = calculate_plural(pe->u.parser_op.operands[0], n);
815dd416aa8Stshiozak 		val1 = calculate_plural(pe->u.parser_op.operands[1], n);
816dd416aa8Stshiozak 		switch (pe->u.parser_op.op) {
817dd416aa8Stshiozak 		case '*':
818dd416aa8Stshiozak 			return val0*val1;
819dd416aa8Stshiozak 		case '/':
820dd416aa8Stshiozak 			return val0/val1;
821dd416aa8Stshiozak 		case '%':
822dd416aa8Stshiozak 			return val0%val1;
823dd416aa8Stshiozak 		case '+':
824dd416aa8Stshiozak 			return val0+val1;
825dd416aa8Stshiozak 		case '-':
826dd416aa8Stshiozak 			return val0-val1;
827dd416aa8Stshiozak 		case '<':
828dd416aa8Stshiozak 			return val0<val1;
829dd416aa8Stshiozak 		case '>':
830dd416aa8Stshiozak 			return val0>val1;
831dd416aa8Stshiozak 		case OP_LTEQ:
832dd416aa8Stshiozak 			return val0<=val1;
833dd416aa8Stshiozak 		case OP_GTEQ:
834dd416aa8Stshiozak 			return val0>=val1;
835dd416aa8Stshiozak 		case OP_EQ:
836dd416aa8Stshiozak 			return val0==val1;
837dd416aa8Stshiozak 		case OP_NEQ:
838dd416aa8Stshiozak 			return val0!=val1;
839dd416aa8Stshiozak 		case '|':
840dd416aa8Stshiozak 			return val0||val1;
841dd416aa8Stshiozak 		case '&':
842dd416aa8Stshiozak 			return val0&&val1;
843dd416aa8Stshiozak 		}
844dd416aa8Stshiozak 	}
845dd416aa8Stshiozak 	return 0;
846dd416aa8Stshiozak }
847dd416aa8Stshiozak 
848dd416aa8Stshiozak #ifdef TEST_CALC_PLURAL
849dd416aa8Stshiozak #include <stdio.h>
850dd416aa8Stshiozak 
851dd416aa8Stshiozak int
main(int argc,char ** argv)852dd416aa8Stshiozak main(int argc, char **argv)
853dd416aa8Stshiozak {
854dd416aa8Stshiozak 	struct tokenizer_context tcx;
855dd416aa8Stshiozak 	struct parser_element pelem;
856dd416aa8Stshiozak 	int token;
857dd416aa8Stshiozak 
858dd416aa8Stshiozak 	if (argc != 3) {
859dd416aa8Stshiozak 		fprintf(stderr, "usage: %s <expression> <n>\n", argv[0]);
860dd416aa8Stshiozak 		return EXIT_FAILURE;
861dd416aa8Stshiozak 	}
862dd416aa8Stshiozak 
863dd416aa8Stshiozak 	init_tokenizer_context(&tcx);
864dd416aa8Stshiozak 	_memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1]));
865dd416aa8Stshiozak 
866dd416aa8Stshiozak 	init_parser_element(&pelem);
867dd416aa8Stshiozak 	token = parse_exp(&tcx, &pelem);
868dd416aa8Stshiozak 
869dd416aa8Stshiozak 	if (token == T_EOF)
870dd416aa8Stshiozak 		printf("none");
871dd416aa8Stshiozak 	else if (T_IS_ERROR(token))
872dd416aa8Stshiozak 		printf("error: 0x%X", token);
873dd416aa8Stshiozak 	else {
874dd416aa8Stshiozak 		printf("plural = %lu",
875dd416aa8Stshiozak 		       calculate_plural(&pelem, atoi(argv[2])));
876dd416aa8Stshiozak 	}
877dd416aa8Stshiozak 	printf("\n");
878dd416aa8Stshiozak 
879dd416aa8Stshiozak 	uninit_parser_element(&pelem);
880dd416aa8Stshiozak 
881dd416aa8Stshiozak 	return EXIT_SUCCESS;
882dd416aa8Stshiozak }
883dd416aa8Stshiozak #endif /* TEST_CALC_PLURAL */
884dd416aa8Stshiozak 
885dd416aa8Stshiozak 
886dd416aa8Stshiozak /* ----------------------------------------------------------------------
887dd416aa8Stshiozak  * parse plural forms
888dd416aa8Stshiozak  */
889dd416aa8Stshiozak 
890dd416aa8Stshiozak static void
region_skip_ws(struct _region * r)891dd416aa8Stshiozak region_skip_ws(struct _region *r)
892dd416aa8Stshiozak {
893dd416aa8Stshiozak 	const char *str = _region_head(r);
894dd416aa8Stshiozak 	size_t len = _region_size(r);
895dd416aa8Stshiozak 
896dd416aa8Stshiozak 	str = _bcs_skip_ws_len(str, &len);
897dd416aa8Stshiozak 	_region_init(r, __UNCONST(str), len);
898dd416aa8Stshiozak }
899dd416aa8Stshiozak 
900dd416aa8Stshiozak static void
region_trunc_rws(struct _region * r)901dd416aa8Stshiozak region_trunc_rws(struct _region *r)
902dd416aa8Stshiozak {
903dd416aa8Stshiozak 	const char *str = _region_head(r);
904dd416aa8Stshiozak 	size_t len = _region_size(r);
905dd416aa8Stshiozak 
906dd416aa8Stshiozak 	_bcs_trunc_rws_len(str, &len);
907dd416aa8Stshiozak 	_region_init(r, __UNCONST(str), len);
908dd416aa8Stshiozak }
909dd416aa8Stshiozak 
910dd416aa8Stshiozak static int
region_check_prefix(struct _region * r,const char * pre,size_t prelen,int ignorecase)911dd416aa8Stshiozak region_check_prefix(struct _region *r, const char *pre, size_t prelen,
912dd416aa8Stshiozak 		    int ignorecase)
913dd416aa8Stshiozak {
914dd416aa8Stshiozak 	if (_region_size(r) < prelen)
915dd416aa8Stshiozak 		return -1;
916dd416aa8Stshiozak 
917dd416aa8Stshiozak 	if (ignorecase) {
918dd416aa8Stshiozak 		if (_bcs_strncasecmp(_region_head(r), pre, prelen))
919dd416aa8Stshiozak 			return -1;
920dd416aa8Stshiozak 	} else {
921dd416aa8Stshiozak 		if (memcmp(_region_head(r), pre, prelen))
922dd416aa8Stshiozak 			return -1;
923dd416aa8Stshiozak 	}
924dd416aa8Stshiozak 	return 0;
925dd416aa8Stshiozak }
926dd416aa8Stshiozak 
927dd416aa8Stshiozak static int
cut_trailing_semicolon(struct _region * r)928dd416aa8Stshiozak cut_trailing_semicolon(struct _region *r)
929dd416aa8Stshiozak {
930dd416aa8Stshiozak 
931dd416aa8Stshiozak 	region_trunc_rws(r);
932dd416aa8Stshiozak 	if (_region_size(r) == 0 || _region_peek8(r, _region_size(r)-1) != ';')
933dd416aa8Stshiozak 		return -1;
934dd416aa8Stshiozak 	_region_get_subregion(r, r, 0, _region_size(r)-1);
935dd416aa8Stshiozak 	return 0;
936dd416aa8Stshiozak }
937dd416aa8Stshiozak 
938dd416aa8Stshiozak static int
find_plural_forms(struct _region * r)939dd416aa8Stshiozak find_plural_forms(struct _region *r)
940dd416aa8Stshiozak {
941dd416aa8Stshiozak 	struct _memstream ms;
942dd416aa8Stshiozak 	struct _region rr;
943dd416aa8Stshiozak 
944dd416aa8Stshiozak 	_memstream_bind(&ms, r);
945dd416aa8Stshiozak 
946dd416aa8Stshiozak 	while (!_memstream_getln_region(&ms, &rr)) {
947dd416aa8Stshiozak 		if (!region_check_prefix(&rr,
948dd416aa8Stshiozak 					 PLURAL_FORMS, LEN_PLURAL_FORMS, 1)) {
949dd416aa8Stshiozak 			_region_get_subregion(
950dd416aa8Stshiozak 				r, &rr, LEN_PLURAL_FORMS,
951dd416aa8Stshiozak 				_region_size(&rr)-LEN_PLURAL_FORMS);
952dd416aa8Stshiozak 			region_skip_ws(r);
953dd416aa8Stshiozak 			region_trunc_rws(r);
954dd416aa8Stshiozak 			return 0;
955dd416aa8Stshiozak 		}
956dd416aa8Stshiozak 	}
957dd416aa8Stshiozak 	return -1;
958dd416aa8Stshiozak }
959dd416aa8Stshiozak 
960dd416aa8Stshiozak static int
skip_assignment(struct _region * r,const char * sym,size_t symlen)961dd416aa8Stshiozak skip_assignment(struct _region *r, const char *sym, size_t symlen)
962dd416aa8Stshiozak {
963dd416aa8Stshiozak 	region_skip_ws(r);
964dd416aa8Stshiozak 	if (region_check_prefix(r, sym, symlen, 0))
965dd416aa8Stshiozak 		return -1;
966dd416aa8Stshiozak 	_region_get_subregion(r, r, symlen, _region_size(r)-symlen);
967dd416aa8Stshiozak 	region_skip_ws(r);
968dd416aa8Stshiozak 	if (_region_size(r) == 0 || _region_peek8(r, 0) != '=')
969dd416aa8Stshiozak 		return -1;
970dd416aa8Stshiozak 	_region_get_subregion(r, r, 1, _region_size(r)-1);
971dd416aa8Stshiozak 	region_skip_ws(r);
972dd416aa8Stshiozak 	return 0;
973dd416aa8Stshiozak }
974dd416aa8Stshiozak 
975dd416aa8Stshiozak static int
skip_nplurals(struct _region * r,unsigned long * rnp)976dd416aa8Stshiozak skip_nplurals(struct _region *r, unsigned long *rnp)
977dd416aa8Stshiozak {
978dd416aa8Stshiozak 	unsigned long np;
979dd416aa8Stshiozak 	char buf[MAX_LEN_ATOM+2], *endptr;
980dd416aa8Stshiozak 	const char *endptrconst;
981dd416aa8Stshiozak 	size_t ofs;
982dd416aa8Stshiozak 
983dd416aa8Stshiozak 	if (skip_assignment(r, NPLURALS_SYMBOL, LEN_NPLURAL_SYMBOL))
984dd416aa8Stshiozak 		return -1;
985dd416aa8Stshiozak 	if (_region_size(r) == 0 || !_bcs_isdigit(_region_peek8(r, 0)))
986dd416aa8Stshiozak 		return -1;
987dd416aa8Stshiozak 	strlcpy(buf, _region_head(r), sizeof (buf));
988dd416aa8Stshiozak 	np = strtoul(buf, &endptr, 0);
989dd416aa8Stshiozak 	endptrconst = _bcs_skip_ws(endptr);
990dd416aa8Stshiozak 	if (*endptrconst != ';')
991dd416aa8Stshiozak 		return -1;
992dd416aa8Stshiozak 	ofs = endptrconst+1-buf;
993dd416aa8Stshiozak 	if (_region_get_subregion(r, r, ofs, _region_size(r)-ofs))
994dd416aa8Stshiozak 		return -1;
995dd416aa8Stshiozak 	if (rnp)
996dd416aa8Stshiozak 		*rnp = np;
997dd416aa8Stshiozak 	return 0;
998dd416aa8Stshiozak }
999dd416aa8Stshiozak 
1000dd416aa8Stshiozak static int
parse_plural_body(struct _region * r,struct parser_element ** rpe)1001dd416aa8Stshiozak parse_plural_body(struct _region *r, struct parser_element **rpe)
1002dd416aa8Stshiozak {
1003dd416aa8Stshiozak 	int token;
1004dd416aa8Stshiozak 	struct tokenizer_context tcx;
1005dd416aa8Stshiozak 	struct parser_element pelem, *ppe;
1006dd416aa8Stshiozak 
1007dd416aa8Stshiozak 	init_tokenizer_context(&tcx);
1008dd416aa8Stshiozak 	_memstream_bind(&tcx.memstream, r);
1009dd416aa8Stshiozak 
1010dd416aa8Stshiozak 	init_parser_element(&pelem);
1011dd416aa8Stshiozak 	token = parse_exp(&tcx, &pelem);
1012dd416aa8Stshiozak 	if (T_IS_ERROR(token))
1013dd416aa8Stshiozak 		return token;
1014dd416aa8Stshiozak 
1015dd416aa8Stshiozak 	ppe = dup_parser_element(&pelem);
1016dd416aa8Stshiozak 	if (ppe == NULL) {
1017dd416aa8Stshiozak 		uninit_parser_element(&pelem);
1018dd416aa8Stshiozak 		return T_NOMEM;
1019dd416aa8Stshiozak 	}
1020dd416aa8Stshiozak 
1021dd416aa8Stshiozak 	*rpe = ppe;
1022dd416aa8Stshiozak 
1023dd416aa8Stshiozak 	return 0;
1024dd416aa8Stshiozak }
1025dd416aa8Stshiozak 
1026dd416aa8Stshiozak static int
parse_plural(struct parser_element ** rpe,unsigned long * rnp,const char * str,size_t len)1027dd416aa8Stshiozak parse_plural(struct parser_element **rpe, unsigned long *rnp,
1028dd416aa8Stshiozak 	     const char *str, size_t len)
1029dd416aa8Stshiozak {
1030dd416aa8Stshiozak 	struct _region r;
1031dd416aa8Stshiozak 
1032dd416aa8Stshiozak 	_region_init(&r, __UNCONST(str), len);
1033dd416aa8Stshiozak 
1034dd416aa8Stshiozak 	if (find_plural_forms(&r))
1035dd416aa8Stshiozak 		return T_NOTFOUND;
1036dd416aa8Stshiozak 	if (skip_nplurals(&r, rnp))
1037dd416aa8Stshiozak 		return T_ILPLURAL;
1038dd416aa8Stshiozak 	if (skip_assignment(&r, PLURAL_SYMBOL, LEN_PLURAL_SYMBOL))
1039dd416aa8Stshiozak 		return T_ILPLURAL;
1040dd416aa8Stshiozak 	if (cut_trailing_semicolon(&r))
1041dd416aa8Stshiozak 		return T_ILPLURAL;
1042dd416aa8Stshiozak 	return parse_plural_body(&r, rpe);
1043dd416aa8Stshiozak }
1044dd416aa8Stshiozak 
1045dd416aa8Stshiozak #ifdef TEST_PARSE_PLURAL
1046dd416aa8Stshiozak int
main(int argc,char ** argv)1047dd416aa8Stshiozak main(int argc, char **argv)
1048dd416aa8Stshiozak {
1049dd416aa8Stshiozak 	int ret;
1050dd416aa8Stshiozak 	struct parser_element *pelem;
1051dd416aa8Stshiozak 	unsigned long np;
1052dd416aa8Stshiozak 
1053dd416aa8Stshiozak 	if (argc != 2 && argc != 3) {
1054dd416aa8Stshiozak 		fprintf(stderr, "usage: %s <mime-header> [n]\n", argv[0]);
1055dd416aa8Stshiozak 		return EXIT_FAILURE;
1056dd416aa8Stshiozak 	}
1057dd416aa8Stshiozak 
1058dd416aa8Stshiozak 	ret = parse_plural(&pelem, &np, argv[1], strlen(argv[1]));
1059dd416aa8Stshiozak 
1060dd416aa8Stshiozak 	if (ret == T_EOF)
1061dd416aa8Stshiozak 		printf("none");
1062dd416aa8Stshiozak 	else if (T_IS_ERROR(ret))
1063dd416aa8Stshiozak 		printf("error: 0x%X", ret);
1064dd416aa8Stshiozak 	else {
1065dd416aa8Stshiozak 		printf("syntax tree: ");
1066dd416aa8Stshiozak 		dump_elem(pelem);
1067dd416aa8Stshiozak 		printf("\nnplurals = %lu", np);
1068dd416aa8Stshiozak 		if (argv[2])
1069dd416aa8Stshiozak 			printf(", plural = %lu",
1070dd416aa8Stshiozak 			       calculate_plural(pelem, atoi(argv[2])));
1071dd416aa8Stshiozak 		free_parser_element(pelem);
1072dd416aa8Stshiozak 	}
1073dd416aa8Stshiozak 	printf("\n");
1074dd416aa8Stshiozak 
1075dd416aa8Stshiozak 
1076dd416aa8Stshiozak 	return EXIT_SUCCESS;
1077dd416aa8Stshiozak }
1078dd416aa8Stshiozak #endif /* TEST_PARSE_PLURAL */
1079dd416aa8Stshiozak 
1080dd416aa8Stshiozak /*
1081dd416aa8Stshiozak  * external interface
1082dd416aa8Stshiozak  */
1083dd416aa8Stshiozak 
1084dd416aa8Stshiozak int
_gettext_parse_plural(struct gettext_plural ** rpe,unsigned long * rnp,const char * str,size_t len)1085dd416aa8Stshiozak _gettext_parse_plural(struct gettext_plural **rpe, unsigned long *rnp,
1086dd416aa8Stshiozak 		      const char *str, size_t len)
1087dd416aa8Stshiozak {
1088dd416aa8Stshiozak 	return parse_plural((struct parser_element **)rpe, rnp, str, len);
1089dd416aa8Stshiozak }
1090dd416aa8Stshiozak 
1091dd416aa8Stshiozak unsigned long
_gettext_calculate_plural(const struct gettext_plural * pe,unsigned long n)1092dd416aa8Stshiozak _gettext_calculate_plural(const struct gettext_plural *pe, unsigned long n)
1093dd416aa8Stshiozak {
1094dd416aa8Stshiozak 	return calculate_plural((void *)__UNCONST(pe), n);
1095dd416aa8Stshiozak }
1096dd416aa8Stshiozak 
1097dd416aa8Stshiozak void
_gettext_free_plural(struct gettext_plural * pe)1098dd416aa8Stshiozak _gettext_free_plural(struct gettext_plural *pe)
1099dd416aa8Stshiozak {
1100dd416aa8Stshiozak 	free_parser_element((void *)pe);
1101dd416aa8Stshiozak }
1102dd416aa8Stshiozak 
1103dd416aa8Stshiozak #ifdef TEST_PLURAL
1104dd416aa8Stshiozak #include <libintl.h>
1105dd416aa8Stshiozak #include <locale.h>
1106dd416aa8Stshiozak 
1107dd416aa8Stshiozak #define PR(n)	printf("n=%d: \"%s\"\n", n, dngettext("test", "1", "2", n))
1108dd416aa8Stshiozak 
1109dd416aa8Stshiozak int
main(void)1110dd416aa8Stshiozak main(void)
1111dd416aa8Stshiozak {
1112dd416aa8Stshiozak 	bindtextdomain("test", "."); /* ./LANG/LC_MESSAGES/test.mo */
1113dd416aa8Stshiozak 	PR(1);
1114dd416aa8Stshiozak 	PR(2);
1115dd416aa8Stshiozak 	PR(3);
1116dd416aa8Stshiozak 	PR(4);
1117dd416aa8Stshiozak 
1118dd416aa8Stshiozak 	return 0;
1119dd416aa8Stshiozak }
1120dd416aa8Stshiozak #endif
1121