1*388550b0Srillig /* $NetBSD: plural_parser.c,v 1.4 2022/04/19 20:32:16 rillig Exp $ */
2dd416aa8Stshiozak
3dd416aa8Stshiozak /*-
4dd416aa8Stshiozak * Copyright (c) 2005 Citrus Project,
5dd416aa8Stshiozak * All rights reserved.
6dd416aa8Stshiozak *
7dd416aa8Stshiozak * Redistribution and use in source and binary forms, with or without
8dd416aa8Stshiozak * modification, are permitted provided that the following conditions
9dd416aa8Stshiozak * are met:
10dd416aa8Stshiozak * 1. Redistributions of source code must retain the above copyright
11dd416aa8Stshiozak * notice, this list of conditions and the following disclaimer.
12dd416aa8Stshiozak * 2. Redistributions in binary form must reproduce the above copyright
13dd416aa8Stshiozak * notice, this list of conditions and the following disclaimer in the
14dd416aa8Stshiozak * documentation and/or other materials provided with the distribution.
15dd416aa8Stshiozak *
16dd416aa8Stshiozak * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17dd416aa8Stshiozak * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18dd416aa8Stshiozak * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19dd416aa8Stshiozak * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20dd416aa8Stshiozak * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21dd416aa8Stshiozak * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22dd416aa8Stshiozak * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23dd416aa8Stshiozak * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24dd416aa8Stshiozak * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25dd416aa8Stshiozak * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26dd416aa8Stshiozak * SUCH DAMAGE.
27dd416aa8Stshiozak *
28dd416aa8Stshiozak */
29dd416aa8Stshiozak
30dd416aa8Stshiozak #include <sys/cdefs.h>
31*388550b0Srillig __RCSID("$NetBSD: plural_parser.c,v 1.4 2022/04/19 20:32:16 rillig Exp $");
32dd416aa8Stshiozak
33dd416aa8Stshiozak #include <assert.h>
34dd416aa8Stshiozak #include <stdio.h>
35dd416aa8Stshiozak #include <stdlib.h>
36dd416aa8Stshiozak #include <string.h>
37dd416aa8Stshiozak #include <citrus/citrus_namespace.h>
38dd416aa8Stshiozak #include <citrus/citrus_region.h>
39dd416aa8Stshiozak #include <citrus/citrus_memstream.h>
40dd416aa8Stshiozak #include <citrus/citrus_bcs.h>
41dd416aa8Stshiozak #include "plural_parser.h"
42dd416aa8Stshiozak
43dd416aa8Stshiozak #if defined(TEST_TOKENIZER) || defined(TEST_PARSER)
44dd416aa8Stshiozak #define ALLOW_EMPTY
45dd416aa8Stshiozak #define ALLOW_ARBITRARY_IDENTIFIER
46dd416aa8Stshiozak #endif
47dd416aa8Stshiozak
48dd416aa8Stshiozak #define MAX_LEN_ATOM 10
49dd416aa8Stshiozak #define MAX_NUM_OPERANDS 3
50dd416aa8Stshiozak
51dd416aa8Stshiozak #define T_EOF EOF
52dd416aa8Stshiozak #define T_NONE 0x100
53dd416aa8Stshiozak #define T_LAND 0x101 /* && */
54dd416aa8Stshiozak #define T_LOR 0x102 /* || */
55dd416aa8Stshiozak #define T_EQUALITY 0x103 /* == or != */
56dd416aa8Stshiozak #define T_RELATIONAL 0x104 /* <, >, <= or >= */
57dd416aa8Stshiozak #define T_ADDITIVE 0x105 /* + or - */
58dd416aa8Stshiozak #define T_MULTIPLICATIVE 0x106 /* *, / or % */
59dd416aa8Stshiozak #define T_IDENTIFIER 0x200
60dd416aa8Stshiozak #define T_CONSTANT 0x201
61dd416aa8Stshiozak #define T_ILCHAR 0x300
62dd416aa8Stshiozak #define T_TOOLONG 0x301
63dd416aa8Stshiozak #define T_ILTOKEN 0x302
64dd416aa8Stshiozak #define T_ILEND 0x303
65dd416aa8Stshiozak #define T_NOMEM 0x304
66dd416aa8Stshiozak #define T_NOTFOUND 0x305
67dd416aa8Stshiozak #define T_ILPLURAL 0x306
68dd416aa8Stshiozak #define T_IS_OPERATOR(t) ((t) < 0x200)
69dd416aa8Stshiozak #define T_IS_ERROR(t) ((t) >= 0x300)
70dd416aa8Stshiozak
71dd416aa8Stshiozak #define OP_EQ ('='+'=')
72dd416aa8Stshiozak #define OP_NEQ ('!'+'=')
73dd416aa8Stshiozak #define OP_LTEQ ('<'+'=')
74dd416aa8Stshiozak #define OP_GTEQ ('>'+'=')
75dd416aa8Stshiozak
76dd416aa8Stshiozak #define PLURAL_NUMBER_SYMBOL "n"
77dd416aa8Stshiozak #define NPLURALS_SYMBOL "nplurals"
78dd416aa8Stshiozak #define LEN_NPLURAL_SYMBOL (sizeof (NPLURALS_SYMBOL) -1)
79dd416aa8Stshiozak #define PLURAL_SYMBOL "plural"
80dd416aa8Stshiozak #define LEN_PLURAL_SYMBOL (sizeof (PLURAL_SYMBOL) -1)
81dd416aa8Stshiozak #define PLURAL_FORMS "Plural-Forms:"
82dd416aa8Stshiozak #define LEN_PLURAL_FORMS (sizeof (PLURAL_FORMS) -1)
83dd416aa8Stshiozak
84dd416aa8Stshiozak /* ----------------------------------------------------------------------
85dd416aa8Stshiozak * tokenizer part
86dd416aa8Stshiozak */
87dd416aa8Stshiozak
88dd416aa8Stshiozak union token_data
89dd416aa8Stshiozak {
90dd416aa8Stshiozak unsigned long constant;
91dd416aa8Stshiozak #ifdef ALLOW_ARBITRARY_IDENTIFIER
92dd416aa8Stshiozak char identifier[MAX_LEN_ATOM+1];
93dd416aa8Stshiozak #endif
94dd416aa8Stshiozak char op;
95dd416aa8Stshiozak };
96dd416aa8Stshiozak
97dd416aa8Stshiozak struct tokenizer_context
98dd416aa8Stshiozak {
99dd416aa8Stshiozak struct _memstream memstream;
100dd416aa8Stshiozak struct {
101dd416aa8Stshiozak int token;
102dd416aa8Stshiozak union token_data token_data;
103dd416aa8Stshiozak } token0;
104dd416aa8Stshiozak };
105dd416aa8Stshiozak
106dd416aa8Stshiozak /* initialize a tokenizer context */
107dd416aa8Stshiozak static void
init_tokenizer_context(struct tokenizer_context * tcx)108dd416aa8Stshiozak init_tokenizer_context(struct tokenizer_context *tcx)
109dd416aa8Stshiozak {
110dd416aa8Stshiozak tcx->token0.token = T_NONE;
111dd416aa8Stshiozak }
112dd416aa8Stshiozak
113dd416aa8Stshiozak /* get an atom (identifier or constant) */
114dd416aa8Stshiozak static int
tokenize_atom(struct tokenizer_context * tcx,union token_data * token_data)115dd416aa8Stshiozak tokenize_atom(struct tokenizer_context *tcx, union token_data *token_data)
116dd416aa8Stshiozak {
117dd416aa8Stshiozak int ch, len;
118dd416aa8Stshiozak char buf[MAX_LEN_ATOM+1];
119dd416aa8Stshiozak
120dd416aa8Stshiozak len = 0;
121dd416aa8Stshiozak while (/*CONSTCOND*/1) {
122dd416aa8Stshiozak ch = _memstream_getc(&tcx->memstream);
123dd416aa8Stshiozak if (!(_bcs_isalnum(ch) || ch == '_')) {
124dd416aa8Stshiozak _memstream_ungetc(&tcx->memstream, ch);
125dd416aa8Stshiozak break;
126dd416aa8Stshiozak }
127dd416aa8Stshiozak if (len == MAX_LEN_ATOM)
128dd416aa8Stshiozak return T_TOOLONG;
129dd416aa8Stshiozak buf[len++] = ch;
130dd416aa8Stshiozak }
131dd416aa8Stshiozak buf[len] = '\0';
132dd416aa8Stshiozak if (len == 0)
133dd416aa8Stshiozak return T_ILCHAR;
134dd416aa8Stshiozak
135dd416aa8Stshiozak if (_bcs_isdigit((int)(unsigned char)buf[0])) {
136dd416aa8Stshiozak unsigned long ul;
137dd416aa8Stshiozak char *post;
138dd416aa8Stshiozak ul = strtoul(buf, &post, 0);
139dd416aa8Stshiozak if (buf+len != post)
140dd416aa8Stshiozak return T_ILCHAR;
141dd416aa8Stshiozak token_data->constant = ul;
142dd416aa8Stshiozak return T_CONSTANT;
143dd416aa8Stshiozak }
144dd416aa8Stshiozak
145dd416aa8Stshiozak #ifdef ALLOW_ARBITRARY_IDENTIFIER
146dd416aa8Stshiozak strcpy(token_data->identifier, buf);
147dd416aa8Stshiozak return T_IDENTIFIER;
148dd416aa8Stshiozak #else
149dd416aa8Stshiozak if (!strcmp(buf, PLURAL_NUMBER_SYMBOL))
150dd416aa8Stshiozak return T_IDENTIFIER;
151dd416aa8Stshiozak return T_ILCHAR;
152dd416aa8Stshiozak #endif
153dd416aa8Stshiozak }
154dd416aa8Stshiozak
155dd416aa8Stshiozak /* tokenizer main routine */
156dd416aa8Stshiozak static int
tokenize(struct tokenizer_context * tcx,union token_data * token_data)157dd416aa8Stshiozak tokenize(struct tokenizer_context *tcx, union token_data *token_data)
158dd416aa8Stshiozak {
159dd416aa8Stshiozak int ch, prevch;
160dd416aa8Stshiozak
161dd416aa8Stshiozak retry:
162dd416aa8Stshiozak ch = _memstream_getc(&tcx->memstream);
163dd416aa8Stshiozak if (_bcs_isspace(ch))
164dd416aa8Stshiozak goto retry;
165dd416aa8Stshiozak
166dd416aa8Stshiozak switch (ch) {
167dd416aa8Stshiozak case T_EOF:
168dd416aa8Stshiozak return ch;
169dd416aa8Stshiozak case '+': case '-':
170dd416aa8Stshiozak token_data->op = ch;
171dd416aa8Stshiozak return T_ADDITIVE;
172dd416aa8Stshiozak case '*': case '/': case '%':
173dd416aa8Stshiozak token_data->op = ch;
174dd416aa8Stshiozak return T_MULTIPLICATIVE;
175dd416aa8Stshiozak case '?': case ':': case '(': case ')':
176dd416aa8Stshiozak token_data->op = ch;
177dd416aa8Stshiozak return ch;
178dd416aa8Stshiozak case '&': case '|':
179dd416aa8Stshiozak prevch = ch;
180dd416aa8Stshiozak ch = _memstream_getc(&tcx->memstream);
181dd416aa8Stshiozak if (ch != prevch) {
182dd416aa8Stshiozak _memstream_ungetc(&tcx->memstream, ch);
183dd416aa8Stshiozak return T_ILCHAR;
184dd416aa8Stshiozak }
185dd416aa8Stshiozak token_data->op = ch;
186dd416aa8Stshiozak switch (ch) {
187dd416aa8Stshiozak case '&':
188dd416aa8Stshiozak return T_LAND;
189dd416aa8Stshiozak case '|':
190dd416aa8Stshiozak return T_LOR;
19101d1183fSchristos default:
19201d1183fSchristos return T_ILTOKEN;
193dd416aa8Stshiozak }
194dd416aa8Stshiozak case '=': case '!': case '<': case '>':
195dd416aa8Stshiozak prevch = ch;
196dd416aa8Stshiozak ch = _memstream_getc(&tcx->memstream);
197dd416aa8Stshiozak if (ch != '=') {
198dd416aa8Stshiozak _memstream_ungetc(&tcx->memstream, ch);
199dd416aa8Stshiozak switch (prevch) {
200dd416aa8Stshiozak case '=':
201dd416aa8Stshiozak return T_ILCHAR;
202dd416aa8Stshiozak case '!':
203dd416aa8Stshiozak return '!';
204dd416aa8Stshiozak case '<':
205dd416aa8Stshiozak case '>':
206dd416aa8Stshiozak token_data->op = prevch; /* OP_LT or OP_GT */
207dd416aa8Stshiozak return T_RELATIONAL;
208dd416aa8Stshiozak }
209dd416aa8Stshiozak }
210dd416aa8Stshiozak /* '==', '!=', '<=' or '>=' */
211dd416aa8Stshiozak token_data->op = ch+prevch;
212dd416aa8Stshiozak switch (prevch) {
213dd416aa8Stshiozak case '=':
214dd416aa8Stshiozak case '!':
215dd416aa8Stshiozak return T_EQUALITY;
216dd416aa8Stshiozak case '<':
217dd416aa8Stshiozak case '>':
218dd416aa8Stshiozak return T_RELATIONAL;
219dd416aa8Stshiozak }
220dd416aa8Stshiozak /*NOTREACHED*/
221dd416aa8Stshiozak }
222dd416aa8Stshiozak
223dd416aa8Stshiozak _memstream_ungetc(&tcx->memstream, ch);
224dd416aa8Stshiozak return tokenize_atom(tcx, token_data);
225dd416aa8Stshiozak }
226dd416aa8Stshiozak
227dd416aa8Stshiozak /* get the next token */
228dd416aa8Stshiozak static int
get_token(struct tokenizer_context * tcx,union token_data * token_data)229dd416aa8Stshiozak get_token(struct tokenizer_context *tcx, union token_data *token_data)
230dd416aa8Stshiozak {
231dd416aa8Stshiozak if (tcx->token0.token != T_NONE) {
232dd416aa8Stshiozak int token = tcx->token0.token;
233dd416aa8Stshiozak tcx->token0.token = T_NONE;
234dd416aa8Stshiozak *token_data = tcx->token0.token_data;
235dd416aa8Stshiozak return token;
236dd416aa8Stshiozak }
237dd416aa8Stshiozak return tokenize(tcx, token_data);
238dd416aa8Stshiozak }
239dd416aa8Stshiozak
240dd416aa8Stshiozak /* push back the last token */
241dd416aa8Stshiozak static void
unget_token(struct tokenizer_context * tcx,int token,union token_data * token_data)242dd416aa8Stshiozak unget_token(struct tokenizer_context *tcx,
243dd416aa8Stshiozak int token, union token_data *token_data)
244dd416aa8Stshiozak {
245dd416aa8Stshiozak tcx->token0.token = token;
246dd416aa8Stshiozak tcx->token0.token_data = *token_data;
247dd416aa8Stshiozak }
248dd416aa8Stshiozak
249dd416aa8Stshiozak #ifdef TEST_TOKENIZER
250dd416aa8Stshiozak
251dd416aa8Stshiozak int
main(int argc,char ** argv)252dd416aa8Stshiozak main(int argc, char **argv)
253dd416aa8Stshiozak {
254dd416aa8Stshiozak struct tokenizer_context tcx;
255dd416aa8Stshiozak union token_data token_data;
256dd416aa8Stshiozak int token;
257dd416aa8Stshiozak
258dd416aa8Stshiozak if (argc != 2) {
259dd416aa8Stshiozak fprintf(stderr, "usage: %s <expression>\n", argv[0]);
260dd416aa8Stshiozak return EXIT_FAILURE;
261dd416aa8Stshiozak }
262dd416aa8Stshiozak
263dd416aa8Stshiozak init_tokenizer_context(&tcx);
264dd416aa8Stshiozak _memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1]));
265dd416aa8Stshiozak
266dd416aa8Stshiozak while (1) {
267dd416aa8Stshiozak token = get_token(&tcx, &token_data);
268dd416aa8Stshiozak switch (token) {
269dd416aa8Stshiozak case T_EOF:
270dd416aa8Stshiozak goto quit;
271dd416aa8Stshiozak case T_ILCHAR:
272dd416aa8Stshiozak printf("illegal character.\n");
273dd416aa8Stshiozak goto quit;
274dd416aa8Stshiozak case T_TOOLONG:
275dd416aa8Stshiozak printf("too long atom.\n");
276dd416aa8Stshiozak goto quit;
277dd416aa8Stshiozak case T_CONSTANT:
278dd416aa8Stshiozak printf("constant: %lu\n", token_data.constant);
279dd416aa8Stshiozak break;
280dd416aa8Stshiozak case T_IDENTIFIER:
281dd416aa8Stshiozak printf("symbol: %s\n", token_data.identifier);
282dd416aa8Stshiozak break;
283dd416aa8Stshiozak default:
284dd416aa8Stshiozak printf("operator: ");
285dd416aa8Stshiozak switch (token) {
286dd416aa8Stshiozak case T_LAND:
287dd416aa8Stshiozak printf("&&\n");
288dd416aa8Stshiozak break;
289dd416aa8Stshiozak case T_LOR:
290dd416aa8Stshiozak printf("||\n");
291dd416aa8Stshiozak break;
292dd416aa8Stshiozak case T_EQUALITY:
293dd416aa8Stshiozak printf("%c=\n", token_data.op-'=');
294dd416aa8Stshiozak break;
295dd416aa8Stshiozak case T_RELATIONAL:
296dd416aa8Stshiozak switch(token_data.op) {
297dd416aa8Stshiozak case OP_LTEQ:
298dd416aa8Stshiozak case OP_GTEQ:
299dd416aa8Stshiozak printf("%c=\n", token_data.op-'=');
300dd416aa8Stshiozak break;
301dd416aa8Stshiozak default:
302dd416aa8Stshiozak printf("%c\n", token_data.op);
303dd416aa8Stshiozak break;
304dd416aa8Stshiozak }
305dd416aa8Stshiozak break;
306dd416aa8Stshiozak case T_ADDITIVE:
307dd416aa8Stshiozak case T_MULTIPLICATIVE:
308dd416aa8Stshiozak printf("%c\n", token_data.op);
309dd416aa8Stshiozak break;
310dd416aa8Stshiozak default:
311dd416aa8Stshiozak printf("operator: %c\n", token);
312dd416aa8Stshiozak }
313dd416aa8Stshiozak }
314dd416aa8Stshiozak }
315dd416aa8Stshiozak quit:
316dd416aa8Stshiozak return 0;
317dd416aa8Stshiozak }
318dd416aa8Stshiozak #endif /* TEST_TOKENIZER */
319dd416aa8Stshiozak
320dd416aa8Stshiozak
321dd416aa8Stshiozak /* ----------------------------------------------------------------------
322dd416aa8Stshiozak * parser part
323dd416aa8Stshiozak *
324dd416aa8Stshiozak * exp := cond
325dd416aa8Stshiozak *
326dd416aa8Stshiozak * cond := lor | lor '?' cond ':' cond
327dd416aa8Stshiozak *
328dd416aa8Stshiozak * lor := land ( '||' land )*
329dd416aa8Stshiozak *
330dd416aa8Stshiozak * land := equality ( '&&' equality )*
331dd416aa8Stshiozak *
332dd416aa8Stshiozak * equality := relational ( equalityops relational )*
333dd416aa8Stshiozak * equalityops := '==' | '!='
334dd416aa8Stshiozak *
335dd416aa8Stshiozak * relational := additive ( relationalops additive )*
336dd416aa8Stshiozak * relationalops := '<' | '>' | '<=' | '>='
337dd416aa8Stshiozak *
338dd416aa8Stshiozak * additive := multiplicative ( additiveops multiplicative )*
339dd416aa8Stshiozak * additiveops := '+' | '-'
340dd416aa8Stshiozak *
341dd416aa8Stshiozak * multiplicative := lnot ( multiplicativeops lnot )*
342dd416aa8Stshiozak * multiplicativeops := '*' | '/' | '%'
343dd416aa8Stshiozak *
344dd416aa8Stshiozak * lnot := '!' lnot | term
345dd416aa8Stshiozak *
346dd416aa8Stshiozak * term := literal | identifier | '(' exp ')'
347dd416aa8Stshiozak *
348dd416aa8Stshiozak */
349dd416aa8Stshiozak
350dd416aa8Stshiozak #define T_ENSURE_OK(token, label) \
351dd416aa8Stshiozak do { \
352dd416aa8Stshiozak if (T_IS_ERROR(token)) \
353dd416aa8Stshiozak goto label; \
354*388550b0Srillig } while (0)
355dd416aa8Stshiozak #define T_ENSURE_SOMETHING(token, label) \
356dd416aa8Stshiozak do { \
357dd416aa8Stshiozak if ((token) == T_EOF) { \
358dd416aa8Stshiozak token = T_ILEND; \
359dd416aa8Stshiozak goto label; \
360dd416aa8Stshiozak } else if (T_IS_ERROR(token)) \
361dd416aa8Stshiozak goto label; \
362*388550b0Srillig } while (0)
363dd416aa8Stshiozak
364dd416aa8Stshiozak #define parser_element plural_element
365dd416aa8Stshiozak
366dd416aa8Stshiozak struct parser_element;
367dd416aa8Stshiozak struct parser_op
368dd416aa8Stshiozak {
369dd416aa8Stshiozak char op;
370dd416aa8Stshiozak struct parser_element *operands[MAX_NUM_OPERANDS];
371dd416aa8Stshiozak };
372dd416aa8Stshiozak struct parser_element
373dd416aa8Stshiozak {
374dd416aa8Stshiozak int kind;
375dd416aa8Stshiozak union
376dd416aa8Stshiozak {
377dd416aa8Stshiozak struct parser_op parser_op;
378dd416aa8Stshiozak union token_data token_data;
379dd416aa8Stshiozak } u;
380dd416aa8Stshiozak };
381dd416aa8Stshiozak
382dd416aa8Stshiozak struct parser_op2_transition
383dd416aa8Stshiozak {
384dd416aa8Stshiozak int kind;
385dd416aa8Stshiozak const struct parser_op2_transition *next;
386dd416aa8Stshiozak };
387dd416aa8Stshiozak
388dd416aa8Stshiozak /* prototypes */
389dd416aa8Stshiozak static int parse_cond(struct tokenizer_context *, struct parser_element *);
390dd416aa8Stshiozak
391dd416aa8Stshiozak
392dd416aa8Stshiozak /* transition table for the 2-operand operators */
393dd416aa8Stshiozak #define DEF_TR(t, k, n) \
394dd416aa8Stshiozak static struct parser_op2_transition exp_tr_##t = { \
395dd416aa8Stshiozak k, &exp_tr_##n \
396dd416aa8Stshiozak }
397dd416aa8Stshiozak #define DEF_TR0(t, k) \
398dd416aa8Stshiozak static struct parser_op2_transition exp_tr_##t = { \
399dd416aa8Stshiozak k, NULL /* expect lnot */ \
400dd416aa8Stshiozak }
401dd416aa8Stshiozak
402dd416aa8Stshiozak DEF_TR0(multiplicative, T_MULTIPLICATIVE);
403dd416aa8Stshiozak DEF_TR(additive, T_ADDITIVE, multiplicative);
404dd416aa8Stshiozak DEF_TR(relational, T_RELATIONAL, additive);
405dd416aa8Stshiozak DEF_TR(equality, T_EQUALITY, relational);
406dd416aa8Stshiozak DEF_TR(land, T_LAND, equality);
407dd416aa8Stshiozak DEF_TR(lor, T_LOR, land);
408dd416aa8Stshiozak
409dd416aa8Stshiozak /* init a parser element structure */
410dd416aa8Stshiozak static void
init_parser_element(struct parser_element * pe)411dd416aa8Stshiozak init_parser_element(struct parser_element *pe)
412dd416aa8Stshiozak {
413dd416aa8Stshiozak int i;
414dd416aa8Stshiozak
415dd416aa8Stshiozak pe->kind = T_NONE;
416dd416aa8Stshiozak for (i=0; i<MAX_NUM_OPERANDS; i++)
417dd416aa8Stshiozak pe->u.parser_op.operands[i] = NULL;
418dd416aa8Stshiozak }
419dd416aa8Stshiozak
420dd416aa8Stshiozak /* uninitialize a parser element structure with freeing children */
421dd416aa8Stshiozak static void free_parser_element(struct parser_element *);
422dd416aa8Stshiozak static void
uninit_parser_element(struct parser_element * pe)423dd416aa8Stshiozak uninit_parser_element(struct parser_element *pe)
424dd416aa8Stshiozak {
425dd416aa8Stshiozak int i;
426dd416aa8Stshiozak
427dd416aa8Stshiozak if (T_IS_OPERATOR(pe->kind))
428dd416aa8Stshiozak for (i=0; i<MAX_NUM_OPERANDS; i++)
429dd416aa8Stshiozak if (pe->u.parser_op.operands[i])
430dd416aa8Stshiozak free_parser_element(
431dd416aa8Stshiozak pe->u.parser_op.operands[i]);
432dd416aa8Stshiozak }
433dd416aa8Stshiozak
434dd416aa8Stshiozak /* free a parser element structure with freeing children */
435dd416aa8Stshiozak static void
free_parser_element(struct parser_element * pe)436dd416aa8Stshiozak free_parser_element(struct parser_element *pe)
437dd416aa8Stshiozak {
438dd416aa8Stshiozak if (pe) {
439dd416aa8Stshiozak uninit_parser_element(pe);
440dd416aa8Stshiozak free(pe);
441dd416aa8Stshiozak }
442dd416aa8Stshiozak }
443dd416aa8Stshiozak
444dd416aa8Stshiozak
445dd416aa8Stshiozak /* copy a parser element structure shallowly */
446dd416aa8Stshiozak static void
copy_parser_element(struct parser_element * dpe,const struct parser_element * spe)447dd416aa8Stshiozak copy_parser_element(struct parser_element *dpe,
448dd416aa8Stshiozak const struct parser_element *spe)
449dd416aa8Stshiozak {
450dd416aa8Stshiozak memcpy(dpe, spe, sizeof *dpe);
451dd416aa8Stshiozak }
452dd416aa8Stshiozak
453dd416aa8Stshiozak /* duplicate a parser element structure shallowly */
454dd416aa8Stshiozak static struct parser_element *
dup_parser_element(const struct parser_element * pe)455dd416aa8Stshiozak dup_parser_element(const struct parser_element *pe)
456dd416aa8Stshiozak {
457dd416aa8Stshiozak struct parser_element *dpe = malloc(sizeof *dpe);
458dd416aa8Stshiozak if (dpe)
459dd416aa8Stshiozak copy_parser_element(dpe, pe);
460dd416aa8Stshiozak return dpe;
461dd416aa8Stshiozak }
462dd416aa8Stshiozak
463dd416aa8Stshiozak /* term := identifier | constant | '(' exp ')' */
464dd416aa8Stshiozak static int
parse_term(struct tokenizer_context * tcx,struct parser_element * pelem)465dd416aa8Stshiozak parse_term(struct tokenizer_context *tcx, struct parser_element *pelem)
466dd416aa8Stshiozak {
467dd416aa8Stshiozak struct parser_element pe0;
468dd416aa8Stshiozak int token;
469dd416aa8Stshiozak union token_data token_data;
470dd416aa8Stshiozak
471dd416aa8Stshiozak token = get_token(tcx, &token_data);
472dd416aa8Stshiozak switch (token) {
473dd416aa8Stshiozak case '(':
474dd416aa8Stshiozak /* '(' exp ')' */
475dd416aa8Stshiozak init_parser_element(&pe0);
476dd416aa8Stshiozak /* expect exp */
477dd416aa8Stshiozak token = parse_cond(tcx, &pe0);
478dd416aa8Stshiozak T_ENSURE_OK(token, err);
479dd416aa8Stshiozak /* expect ')' */
480dd416aa8Stshiozak token = get_token(tcx, &token_data);
481dd416aa8Stshiozak T_ENSURE_SOMETHING(token, err);
482dd416aa8Stshiozak if (token != ')') {
483dd416aa8Stshiozak unget_token(tcx, token, &token_data);
484dd416aa8Stshiozak token = T_ILTOKEN;
485dd416aa8Stshiozak goto err;
486dd416aa8Stshiozak }
487dd416aa8Stshiozak copy_parser_element(pelem, &pe0);
488dd416aa8Stshiozak return token;
489dd416aa8Stshiozak err:
490dd416aa8Stshiozak uninit_parser_element(&pe0);
491dd416aa8Stshiozak return token;
492dd416aa8Stshiozak case T_IDENTIFIER:
493dd416aa8Stshiozak case T_CONSTANT:
494dd416aa8Stshiozak pelem->kind = token;
495dd416aa8Stshiozak pelem->u.token_data = token_data;
496dd416aa8Stshiozak return token;
497dd416aa8Stshiozak case T_EOF:
498dd416aa8Stshiozak return T_ILEND;
499dd416aa8Stshiozak default:
500dd416aa8Stshiozak return T_ILTOKEN;
501dd416aa8Stshiozak }
502dd416aa8Stshiozak }
503dd416aa8Stshiozak
504dd416aa8Stshiozak /* lnot := '!' lnot | term */
505dd416aa8Stshiozak static int
parse_lnot(struct tokenizer_context * tcx,struct parser_element * pelem)506dd416aa8Stshiozak parse_lnot(struct tokenizer_context *tcx, struct parser_element *pelem)
507dd416aa8Stshiozak {
508dd416aa8Stshiozak struct parser_element pe0;
509dd416aa8Stshiozak int token;
510dd416aa8Stshiozak union token_data token_data;
511dd416aa8Stshiozak
512dd416aa8Stshiozak init_parser_element(&pe0);
513dd416aa8Stshiozak
514dd416aa8Stshiozak /* '!' or not */
515dd416aa8Stshiozak token = get_token(tcx, &token_data);
516dd416aa8Stshiozak if (token != '!') {
517dd416aa8Stshiozak /* stop: term */
518dd416aa8Stshiozak unget_token(tcx, token, &token_data);
519dd416aa8Stshiozak return parse_term(tcx, pelem);
520dd416aa8Stshiozak }
521dd416aa8Stshiozak
522dd416aa8Stshiozak /* '!' term */
523dd416aa8Stshiozak token = parse_lnot(tcx, &pe0);
524dd416aa8Stshiozak T_ENSURE_OK(token, err);
525dd416aa8Stshiozak
526dd416aa8Stshiozak pelem->kind = '!';
527dd416aa8Stshiozak pelem->u.parser_op.operands[0] = dup_parser_element(&pe0);
528dd416aa8Stshiozak return pelem->kind;
529dd416aa8Stshiozak err:
530dd416aa8Stshiozak uninit_parser_element(&pe0);
531dd416aa8Stshiozak return token;
532dd416aa8Stshiozak }
533dd416aa8Stshiozak
534dd416aa8Stshiozak /* ext_op := ext_next ( op ext_next )* */
535dd416aa8Stshiozak static int
parse_op2(struct tokenizer_context * tcx,struct parser_element * pelem,const struct parser_op2_transition * tr)536dd416aa8Stshiozak parse_op2(struct tokenizer_context *tcx, struct parser_element *pelem,
537dd416aa8Stshiozak const struct parser_op2_transition *tr)
538dd416aa8Stshiozak {
539dd416aa8Stshiozak struct parser_element pe0, pe1, peop;
540dd416aa8Stshiozak int token;
541dd416aa8Stshiozak union token_data token_data;
542dd416aa8Stshiozak char op;
543dd416aa8Stshiozak
544dd416aa8Stshiozak /* special case: expect lnot */
545dd416aa8Stshiozak if (tr == NULL)
546dd416aa8Stshiozak return parse_lnot(tcx, pelem);
547dd416aa8Stshiozak
548dd416aa8Stshiozak init_parser_element(&pe0);
549dd416aa8Stshiozak init_parser_element(&pe1);
550dd416aa8Stshiozak token = parse_op2(tcx, &pe0, tr->next);
551dd416aa8Stshiozak T_ENSURE_OK(token, err);
552dd416aa8Stshiozak
553dd416aa8Stshiozak while (/*CONSTCOND*/1) {
554dd416aa8Stshiozak /* expect op or empty */
555dd416aa8Stshiozak token = get_token(tcx, &token_data);
556dd416aa8Stshiozak if (token != tr->kind) {
557dd416aa8Stshiozak /* stop */
558dd416aa8Stshiozak unget_token(tcx, token, &token_data);
559dd416aa8Stshiozak copy_parser_element(pelem, &pe0);
560dd416aa8Stshiozak break;
561dd416aa8Stshiozak }
562dd416aa8Stshiozak op = token_data.op;
563dd416aa8Stshiozak /* right hand */
564dd416aa8Stshiozak token = parse_op2(tcx, &pe1, tr->next);
565dd416aa8Stshiozak T_ENSURE_OK(token, err);
566dd416aa8Stshiozak
567dd416aa8Stshiozak init_parser_element(&peop);
568dd416aa8Stshiozak peop.kind = tr->kind;
569dd416aa8Stshiozak peop.u.parser_op.op = op;
570dd416aa8Stshiozak peop.u.parser_op.operands[0] = dup_parser_element(&pe0);
571dd416aa8Stshiozak init_parser_element(&pe0);
572dd416aa8Stshiozak peop.u.parser_op.operands[1] = dup_parser_element(&pe1);
573dd416aa8Stshiozak init_parser_element(&pe1);
574dd416aa8Stshiozak copy_parser_element(&pe0, &peop);
575dd416aa8Stshiozak }
576dd416aa8Stshiozak return pelem->kind;
577dd416aa8Stshiozak err:
578dd416aa8Stshiozak uninit_parser_element(&pe1);
579dd416aa8Stshiozak uninit_parser_element(&pe0);
580dd416aa8Stshiozak return token;
581dd416aa8Stshiozak }
582dd416aa8Stshiozak
583dd416aa8Stshiozak /* cond := lor | lor '?' cond ':' cond */
584dd416aa8Stshiozak static int
parse_cond(struct tokenizer_context * tcx,struct parser_element * pelem)585dd416aa8Stshiozak parse_cond(struct tokenizer_context *tcx, struct parser_element *pelem)
586dd416aa8Stshiozak {
587dd416aa8Stshiozak struct parser_element pe0, pe1, pe2;
588dd416aa8Stshiozak int token;
589dd416aa8Stshiozak union token_data token_data;
590dd416aa8Stshiozak
591dd416aa8Stshiozak init_parser_element(&pe0);
592dd416aa8Stshiozak init_parser_element(&pe1);
593dd416aa8Stshiozak init_parser_element(&pe2);
594dd416aa8Stshiozak
595dd416aa8Stshiozak /* expect lor or empty */
596dd416aa8Stshiozak token = parse_op2(tcx, &pe0, &exp_tr_lor);
597dd416aa8Stshiozak T_ENSURE_OK(token, err);
598dd416aa8Stshiozak
599dd416aa8Stshiozak /* '?' or not */
600dd416aa8Stshiozak token = get_token(tcx, &token_data);
601dd416aa8Stshiozak if (token != '?') {
602dd416aa8Stshiozak /* stop: lor */
603dd416aa8Stshiozak unget_token(tcx, token, &token_data);
604dd416aa8Stshiozak copy_parser_element(pelem, &pe0);
605dd416aa8Stshiozak return pe0.kind;
606dd416aa8Stshiozak }
607dd416aa8Stshiozak
608dd416aa8Stshiozak /* lor '?' cond ':' cond */
609dd416aa8Stshiozak /* expect cond */
610dd416aa8Stshiozak token = parse_cond(tcx, &pe1);
611dd416aa8Stshiozak T_ENSURE_OK(token, err);
612dd416aa8Stshiozak
613dd416aa8Stshiozak /* expect ':' */
614dd416aa8Stshiozak token = get_token(tcx, &token_data);
615dd416aa8Stshiozak T_ENSURE_OK(token, err);
616dd416aa8Stshiozak if (token != ':') {
617dd416aa8Stshiozak unget_token(tcx, token, &token_data);
618dd416aa8Stshiozak token = T_ILTOKEN;
619dd416aa8Stshiozak goto err;
620dd416aa8Stshiozak }
621dd416aa8Stshiozak
622dd416aa8Stshiozak /* expect cond */
623dd416aa8Stshiozak token = parse_cond(tcx, &pe2);
624dd416aa8Stshiozak T_ENSURE_OK(token, err);
625dd416aa8Stshiozak
626dd416aa8Stshiozak pelem->kind = '?';
627dd416aa8Stshiozak pelem->u.parser_op.operands[0] = dup_parser_element(&pe0);
628dd416aa8Stshiozak pelem->u.parser_op.operands[1] = dup_parser_element(&pe1);
629dd416aa8Stshiozak pelem->u.parser_op.operands[2] = dup_parser_element(&pe2);
630dd416aa8Stshiozak return pelem->kind;
631dd416aa8Stshiozak err:
632dd416aa8Stshiozak uninit_parser_element(&pe2);
633dd416aa8Stshiozak uninit_parser_element(&pe1);
634dd416aa8Stshiozak uninit_parser_element(&pe0);
635dd416aa8Stshiozak return token;
636dd416aa8Stshiozak }
637dd416aa8Stshiozak
638dd416aa8Stshiozak static int
parse_exp(struct tokenizer_context * tcx,struct parser_element * pelem)639dd416aa8Stshiozak parse_exp(struct tokenizer_context *tcx, struct parser_element *pelem)
640dd416aa8Stshiozak {
641dd416aa8Stshiozak int token, token1;
642dd416aa8Stshiozak union token_data token_data;
643dd416aa8Stshiozak
644dd416aa8Stshiozak #ifdef ALLOW_EMPTY
645dd416aa8Stshiozak /* empty check */
646dd416aa8Stshiozak token = get_token(tcx, &token_data);
647dd416aa8Stshiozak if (token == T_EOF)
648dd416aa8Stshiozak return token;
649dd416aa8Stshiozak unget_token(tcx, token, &token_data);
650dd416aa8Stshiozak #endif
651dd416aa8Stshiozak
652dd416aa8Stshiozak token = parse_cond(tcx, pelem);
653dd416aa8Stshiozak if (!T_IS_ERROR(token)) {
654dd416aa8Stshiozak /* termination check */
655dd416aa8Stshiozak token1 = get_token(tcx, &token_data);
656dd416aa8Stshiozak if (token1 == T_EOF)
657dd416aa8Stshiozak return token;
658dd416aa8Stshiozak else if (!T_IS_ERROR(token))
659dd416aa8Stshiozak unget_token(tcx, token1, &token_data);
660dd416aa8Stshiozak return T_ILTOKEN;
661dd416aa8Stshiozak }
662dd416aa8Stshiozak return token;
663dd416aa8Stshiozak }
664dd416aa8Stshiozak
665dd416aa8Stshiozak
666dd416aa8Stshiozak #if defined(TEST_PARSER) || defined(TEST_PARSE_PLURAL)
667dd416aa8Stshiozak #include <stdio.h>
668dd416aa8Stshiozak
669dd416aa8Stshiozak static void dump_elem(struct parser_element *);
670dd416aa8Stshiozak
671dd416aa8Stshiozak static void
dump_op2(struct parser_element * pelem)672dd416aa8Stshiozak dump_op2(struct parser_element *pelem)
673dd416aa8Stshiozak {
674dd416aa8Stshiozak dump_elem(pelem->u.parser_op.operands[0]);
675dd416aa8Stshiozak printf(" ");
676dd416aa8Stshiozak dump_elem(pelem->u.parser_op.operands[1]);
677dd416aa8Stshiozak printf(")");
678dd416aa8Stshiozak }
679dd416aa8Stshiozak
680dd416aa8Stshiozak static void
dump_op3(struct parser_element * pelem)681dd416aa8Stshiozak dump_op3(struct parser_element *pelem)
682dd416aa8Stshiozak {
683dd416aa8Stshiozak dump_elem(pelem->u.parser_op.operands[0]);
684dd416aa8Stshiozak printf(" ");
685dd416aa8Stshiozak dump_elem(pelem->u.parser_op.operands[1]);
686dd416aa8Stshiozak printf(" ");
687dd416aa8Stshiozak dump_elem(pelem->u.parser_op.operands[2]);
688dd416aa8Stshiozak printf(")");
689dd416aa8Stshiozak }
690dd416aa8Stshiozak
691dd416aa8Stshiozak static void
dump_elem(struct parser_element * pelem)692dd416aa8Stshiozak dump_elem(struct parser_element *pelem)
693dd416aa8Stshiozak {
694dd416aa8Stshiozak switch (pelem->kind) {
695dd416aa8Stshiozak case T_LAND:
696dd416aa8Stshiozak printf("(&& ");
697dd416aa8Stshiozak dump_op2(pelem);
698dd416aa8Stshiozak break;
699dd416aa8Stshiozak case T_LOR:
700dd416aa8Stshiozak printf("(|| ");
701dd416aa8Stshiozak dump_op2(pelem);
702dd416aa8Stshiozak break;
703dd416aa8Stshiozak case T_EQUALITY:
704dd416aa8Stshiozak switch (pelem->u.parser_op.op) {
705dd416aa8Stshiozak case OP_EQ:
706dd416aa8Stshiozak printf("(== ");
707dd416aa8Stshiozak break;
708dd416aa8Stshiozak case OP_NEQ:
709dd416aa8Stshiozak printf("(!= ");
710dd416aa8Stshiozak break;
711dd416aa8Stshiozak }
712dd416aa8Stshiozak dump_op2(pelem);
713dd416aa8Stshiozak break;
714dd416aa8Stshiozak case T_RELATIONAL:
715dd416aa8Stshiozak switch (pelem->u.parser_op.op) {
716dd416aa8Stshiozak case '<':
717dd416aa8Stshiozak case '>':
718dd416aa8Stshiozak printf("(%c ", pelem->u.parser_op.op);
719dd416aa8Stshiozak break;
720dd416aa8Stshiozak case OP_LTEQ:
721dd416aa8Stshiozak case OP_GTEQ:
722dd416aa8Stshiozak printf("(%c= ", pelem->u.parser_op.op-'=');
723dd416aa8Stshiozak break;
724dd416aa8Stshiozak }
725dd416aa8Stshiozak dump_op2(pelem);
726dd416aa8Stshiozak break;
727dd416aa8Stshiozak case T_ADDITIVE:
728dd416aa8Stshiozak case T_MULTIPLICATIVE:
729dd416aa8Stshiozak printf("(%c ", pelem->u.parser_op.op);
730dd416aa8Stshiozak dump_op2(pelem);
731dd416aa8Stshiozak break;
732dd416aa8Stshiozak case '!':
733dd416aa8Stshiozak printf("(! ");
734dd416aa8Stshiozak dump_elem(pelem->u.parser_op.operands[0]);
735dd416aa8Stshiozak printf(")");
736dd416aa8Stshiozak break;
737dd416aa8Stshiozak case '?':
738dd416aa8Stshiozak printf("(? ");
739dd416aa8Stshiozak dump_op3(pelem);
740dd416aa8Stshiozak break;
741dd416aa8Stshiozak case T_CONSTANT:
742dd416aa8Stshiozak printf("%d", pelem->u.token_data.constant);
743dd416aa8Stshiozak break;
744dd416aa8Stshiozak case T_IDENTIFIER:
745dd416aa8Stshiozak #ifdef ALLOW_ARBITRARY_IDENTIFIER
746dd416aa8Stshiozak printf("%s", pelem->u.token_data.identifier);
747dd416aa8Stshiozak #else
748dd416aa8Stshiozak printf(PLURAL_NUMBER_SYMBOL);
749dd416aa8Stshiozak #endif
750dd416aa8Stshiozak break;
751dd416aa8Stshiozak }
752dd416aa8Stshiozak }
753dd416aa8Stshiozak #endif
754dd416aa8Stshiozak #ifdef TEST_PARSER
755dd416aa8Stshiozak int
main(int argc,char ** argv)756dd416aa8Stshiozak main(int argc, char **argv)
757dd416aa8Stshiozak {
758dd416aa8Stshiozak struct tokenizer_context tcx;
759dd416aa8Stshiozak struct parser_element pelem;
760dd416aa8Stshiozak int token;
761dd416aa8Stshiozak
762dd416aa8Stshiozak if (argc != 2) {
763dd416aa8Stshiozak fprintf(stderr, "usage: %s <expression>\n", argv[0]);
764dd416aa8Stshiozak return EXIT_FAILURE;
765dd416aa8Stshiozak }
766dd416aa8Stshiozak
767dd416aa8Stshiozak init_tokenizer_context(&tcx);
768dd416aa8Stshiozak _memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1]));
769dd416aa8Stshiozak
770dd416aa8Stshiozak init_parser_element(&pelem);
771dd416aa8Stshiozak token = parse_exp(&tcx, &pelem);
772dd416aa8Stshiozak
773dd416aa8Stshiozak if (token == T_EOF)
774dd416aa8Stshiozak printf("none");
775dd416aa8Stshiozak else if (T_IS_ERROR(token))
776dd416aa8Stshiozak printf("error: 0x%X", token);
777dd416aa8Stshiozak else
778dd416aa8Stshiozak dump_elem(&pelem);
779dd416aa8Stshiozak printf("\n");
780dd416aa8Stshiozak
781dd416aa8Stshiozak uninit_parser_element(&pelem);
782dd416aa8Stshiozak
783dd416aa8Stshiozak return EXIT_SUCCESS;
784dd416aa8Stshiozak }
785dd416aa8Stshiozak #endif /* TEST_PARSER */
786dd416aa8Stshiozak
787dd416aa8Stshiozak /* ----------------------------------------------------------------------
788dd416aa8Stshiozak * calcurate plural number
789dd416aa8Stshiozak */
790dd416aa8Stshiozak static unsigned long
calculate_plural(const struct parser_element * pe,unsigned long n)791dd416aa8Stshiozak calculate_plural(const struct parser_element *pe, unsigned long n)
792dd416aa8Stshiozak {
793dd416aa8Stshiozak unsigned long val0, val1;
794dd416aa8Stshiozak switch (pe->kind) {
795dd416aa8Stshiozak case T_IDENTIFIER:
796dd416aa8Stshiozak return n;
797dd416aa8Stshiozak case T_CONSTANT:
798dd416aa8Stshiozak return pe->u.token_data.constant;
799dd416aa8Stshiozak case '?':
800dd416aa8Stshiozak val0 = calculate_plural(pe->u.parser_op.operands[0], n);
801dd416aa8Stshiozak if (val0)
802dd416aa8Stshiozak val1=calculate_plural(pe->u.parser_op.operands[1], n);
803dd416aa8Stshiozak else
804dd416aa8Stshiozak val1=calculate_plural(pe->u.parser_op.operands[2], n);
805dd416aa8Stshiozak return val1;
806dd416aa8Stshiozak case '!':
807dd416aa8Stshiozak return !calculate_plural(pe->u.parser_op.operands[0], n);
808dd416aa8Stshiozak case T_MULTIPLICATIVE:
809dd416aa8Stshiozak case T_ADDITIVE:
810dd416aa8Stshiozak case T_RELATIONAL:
811dd416aa8Stshiozak case T_EQUALITY:
812dd416aa8Stshiozak case T_LOR:
813dd416aa8Stshiozak case T_LAND:
814dd416aa8Stshiozak val0 = calculate_plural(pe->u.parser_op.operands[0], n);
815dd416aa8Stshiozak val1 = calculate_plural(pe->u.parser_op.operands[1], n);
816dd416aa8Stshiozak switch (pe->u.parser_op.op) {
817dd416aa8Stshiozak case '*':
818dd416aa8Stshiozak return val0*val1;
819dd416aa8Stshiozak case '/':
820dd416aa8Stshiozak return val0/val1;
821dd416aa8Stshiozak case '%':
822dd416aa8Stshiozak return val0%val1;
823dd416aa8Stshiozak case '+':
824dd416aa8Stshiozak return val0+val1;
825dd416aa8Stshiozak case '-':
826dd416aa8Stshiozak return val0-val1;
827dd416aa8Stshiozak case '<':
828dd416aa8Stshiozak return val0<val1;
829dd416aa8Stshiozak case '>':
830dd416aa8Stshiozak return val0>val1;
831dd416aa8Stshiozak case OP_LTEQ:
832dd416aa8Stshiozak return val0<=val1;
833dd416aa8Stshiozak case OP_GTEQ:
834dd416aa8Stshiozak return val0>=val1;
835dd416aa8Stshiozak case OP_EQ:
836dd416aa8Stshiozak return val0==val1;
837dd416aa8Stshiozak case OP_NEQ:
838dd416aa8Stshiozak return val0!=val1;
839dd416aa8Stshiozak case '|':
840dd416aa8Stshiozak return val0||val1;
841dd416aa8Stshiozak case '&':
842dd416aa8Stshiozak return val0&&val1;
843dd416aa8Stshiozak }
844dd416aa8Stshiozak }
845dd416aa8Stshiozak return 0;
846dd416aa8Stshiozak }
847dd416aa8Stshiozak
848dd416aa8Stshiozak #ifdef TEST_CALC_PLURAL
849dd416aa8Stshiozak #include <stdio.h>
850dd416aa8Stshiozak
851dd416aa8Stshiozak int
main(int argc,char ** argv)852dd416aa8Stshiozak main(int argc, char **argv)
853dd416aa8Stshiozak {
854dd416aa8Stshiozak struct tokenizer_context tcx;
855dd416aa8Stshiozak struct parser_element pelem;
856dd416aa8Stshiozak int token;
857dd416aa8Stshiozak
858dd416aa8Stshiozak if (argc != 3) {
859dd416aa8Stshiozak fprintf(stderr, "usage: %s <expression> <n>\n", argv[0]);
860dd416aa8Stshiozak return EXIT_FAILURE;
861dd416aa8Stshiozak }
862dd416aa8Stshiozak
863dd416aa8Stshiozak init_tokenizer_context(&tcx);
864dd416aa8Stshiozak _memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1]));
865dd416aa8Stshiozak
866dd416aa8Stshiozak init_parser_element(&pelem);
867dd416aa8Stshiozak token = parse_exp(&tcx, &pelem);
868dd416aa8Stshiozak
869dd416aa8Stshiozak if (token == T_EOF)
870dd416aa8Stshiozak printf("none");
871dd416aa8Stshiozak else if (T_IS_ERROR(token))
872dd416aa8Stshiozak printf("error: 0x%X", token);
873dd416aa8Stshiozak else {
874dd416aa8Stshiozak printf("plural = %lu",
875dd416aa8Stshiozak calculate_plural(&pelem, atoi(argv[2])));
876dd416aa8Stshiozak }
877dd416aa8Stshiozak printf("\n");
878dd416aa8Stshiozak
879dd416aa8Stshiozak uninit_parser_element(&pelem);
880dd416aa8Stshiozak
881dd416aa8Stshiozak return EXIT_SUCCESS;
882dd416aa8Stshiozak }
883dd416aa8Stshiozak #endif /* TEST_CALC_PLURAL */
884dd416aa8Stshiozak
885dd416aa8Stshiozak
886dd416aa8Stshiozak /* ----------------------------------------------------------------------
887dd416aa8Stshiozak * parse plural forms
888dd416aa8Stshiozak */
889dd416aa8Stshiozak
890dd416aa8Stshiozak static void
region_skip_ws(struct _region * r)891dd416aa8Stshiozak region_skip_ws(struct _region *r)
892dd416aa8Stshiozak {
893dd416aa8Stshiozak const char *str = _region_head(r);
894dd416aa8Stshiozak size_t len = _region_size(r);
895dd416aa8Stshiozak
896dd416aa8Stshiozak str = _bcs_skip_ws_len(str, &len);
897dd416aa8Stshiozak _region_init(r, __UNCONST(str), len);
898dd416aa8Stshiozak }
899dd416aa8Stshiozak
900dd416aa8Stshiozak static void
region_trunc_rws(struct _region * r)901dd416aa8Stshiozak region_trunc_rws(struct _region *r)
902dd416aa8Stshiozak {
903dd416aa8Stshiozak const char *str = _region_head(r);
904dd416aa8Stshiozak size_t len = _region_size(r);
905dd416aa8Stshiozak
906dd416aa8Stshiozak _bcs_trunc_rws_len(str, &len);
907dd416aa8Stshiozak _region_init(r, __UNCONST(str), len);
908dd416aa8Stshiozak }
909dd416aa8Stshiozak
910dd416aa8Stshiozak static int
region_check_prefix(struct _region * r,const char * pre,size_t prelen,int ignorecase)911dd416aa8Stshiozak region_check_prefix(struct _region *r, const char *pre, size_t prelen,
912dd416aa8Stshiozak int ignorecase)
913dd416aa8Stshiozak {
914dd416aa8Stshiozak if (_region_size(r) < prelen)
915dd416aa8Stshiozak return -1;
916dd416aa8Stshiozak
917dd416aa8Stshiozak if (ignorecase) {
918dd416aa8Stshiozak if (_bcs_strncasecmp(_region_head(r), pre, prelen))
919dd416aa8Stshiozak return -1;
920dd416aa8Stshiozak } else {
921dd416aa8Stshiozak if (memcmp(_region_head(r), pre, prelen))
922dd416aa8Stshiozak return -1;
923dd416aa8Stshiozak }
924dd416aa8Stshiozak return 0;
925dd416aa8Stshiozak }
926dd416aa8Stshiozak
927dd416aa8Stshiozak static int
cut_trailing_semicolon(struct _region * r)928dd416aa8Stshiozak cut_trailing_semicolon(struct _region *r)
929dd416aa8Stshiozak {
930dd416aa8Stshiozak
931dd416aa8Stshiozak region_trunc_rws(r);
932dd416aa8Stshiozak if (_region_size(r) == 0 || _region_peek8(r, _region_size(r)-1) != ';')
933dd416aa8Stshiozak return -1;
934dd416aa8Stshiozak _region_get_subregion(r, r, 0, _region_size(r)-1);
935dd416aa8Stshiozak return 0;
936dd416aa8Stshiozak }
937dd416aa8Stshiozak
938dd416aa8Stshiozak static int
find_plural_forms(struct _region * r)939dd416aa8Stshiozak find_plural_forms(struct _region *r)
940dd416aa8Stshiozak {
941dd416aa8Stshiozak struct _memstream ms;
942dd416aa8Stshiozak struct _region rr;
943dd416aa8Stshiozak
944dd416aa8Stshiozak _memstream_bind(&ms, r);
945dd416aa8Stshiozak
946dd416aa8Stshiozak while (!_memstream_getln_region(&ms, &rr)) {
947dd416aa8Stshiozak if (!region_check_prefix(&rr,
948dd416aa8Stshiozak PLURAL_FORMS, LEN_PLURAL_FORMS, 1)) {
949dd416aa8Stshiozak _region_get_subregion(
950dd416aa8Stshiozak r, &rr, LEN_PLURAL_FORMS,
951dd416aa8Stshiozak _region_size(&rr)-LEN_PLURAL_FORMS);
952dd416aa8Stshiozak region_skip_ws(r);
953dd416aa8Stshiozak region_trunc_rws(r);
954dd416aa8Stshiozak return 0;
955dd416aa8Stshiozak }
956dd416aa8Stshiozak }
957dd416aa8Stshiozak return -1;
958dd416aa8Stshiozak }
959dd416aa8Stshiozak
960dd416aa8Stshiozak static int
skip_assignment(struct _region * r,const char * sym,size_t symlen)961dd416aa8Stshiozak skip_assignment(struct _region *r, const char *sym, size_t symlen)
962dd416aa8Stshiozak {
963dd416aa8Stshiozak region_skip_ws(r);
964dd416aa8Stshiozak if (region_check_prefix(r, sym, symlen, 0))
965dd416aa8Stshiozak return -1;
966dd416aa8Stshiozak _region_get_subregion(r, r, symlen, _region_size(r)-symlen);
967dd416aa8Stshiozak region_skip_ws(r);
968dd416aa8Stshiozak if (_region_size(r) == 0 || _region_peek8(r, 0) != '=')
969dd416aa8Stshiozak return -1;
970dd416aa8Stshiozak _region_get_subregion(r, r, 1, _region_size(r)-1);
971dd416aa8Stshiozak region_skip_ws(r);
972dd416aa8Stshiozak return 0;
973dd416aa8Stshiozak }
974dd416aa8Stshiozak
975dd416aa8Stshiozak static int
skip_nplurals(struct _region * r,unsigned long * rnp)976dd416aa8Stshiozak skip_nplurals(struct _region *r, unsigned long *rnp)
977dd416aa8Stshiozak {
978dd416aa8Stshiozak unsigned long np;
979dd416aa8Stshiozak char buf[MAX_LEN_ATOM+2], *endptr;
980dd416aa8Stshiozak const char *endptrconst;
981dd416aa8Stshiozak size_t ofs;
982dd416aa8Stshiozak
983dd416aa8Stshiozak if (skip_assignment(r, NPLURALS_SYMBOL, LEN_NPLURAL_SYMBOL))
984dd416aa8Stshiozak return -1;
985dd416aa8Stshiozak if (_region_size(r) == 0 || !_bcs_isdigit(_region_peek8(r, 0)))
986dd416aa8Stshiozak return -1;
987dd416aa8Stshiozak strlcpy(buf, _region_head(r), sizeof (buf));
988dd416aa8Stshiozak np = strtoul(buf, &endptr, 0);
989dd416aa8Stshiozak endptrconst = _bcs_skip_ws(endptr);
990dd416aa8Stshiozak if (*endptrconst != ';')
991dd416aa8Stshiozak return -1;
992dd416aa8Stshiozak ofs = endptrconst+1-buf;
993dd416aa8Stshiozak if (_region_get_subregion(r, r, ofs, _region_size(r)-ofs))
994dd416aa8Stshiozak return -1;
995dd416aa8Stshiozak if (rnp)
996dd416aa8Stshiozak *rnp = np;
997dd416aa8Stshiozak return 0;
998dd416aa8Stshiozak }
999dd416aa8Stshiozak
1000dd416aa8Stshiozak static int
parse_plural_body(struct _region * r,struct parser_element ** rpe)1001dd416aa8Stshiozak parse_plural_body(struct _region *r, struct parser_element **rpe)
1002dd416aa8Stshiozak {
1003dd416aa8Stshiozak int token;
1004dd416aa8Stshiozak struct tokenizer_context tcx;
1005dd416aa8Stshiozak struct parser_element pelem, *ppe;
1006dd416aa8Stshiozak
1007dd416aa8Stshiozak init_tokenizer_context(&tcx);
1008dd416aa8Stshiozak _memstream_bind(&tcx.memstream, r);
1009dd416aa8Stshiozak
1010dd416aa8Stshiozak init_parser_element(&pelem);
1011dd416aa8Stshiozak token = parse_exp(&tcx, &pelem);
1012dd416aa8Stshiozak if (T_IS_ERROR(token))
1013dd416aa8Stshiozak return token;
1014dd416aa8Stshiozak
1015dd416aa8Stshiozak ppe = dup_parser_element(&pelem);
1016dd416aa8Stshiozak if (ppe == NULL) {
1017dd416aa8Stshiozak uninit_parser_element(&pelem);
1018dd416aa8Stshiozak return T_NOMEM;
1019dd416aa8Stshiozak }
1020dd416aa8Stshiozak
1021dd416aa8Stshiozak *rpe = ppe;
1022dd416aa8Stshiozak
1023dd416aa8Stshiozak return 0;
1024dd416aa8Stshiozak }
1025dd416aa8Stshiozak
1026dd416aa8Stshiozak static int
parse_plural(struct parser_element ** rpe,unsigned long * rnp,const char * str,size_t len)1027dd416aa8Stshiozak parse_plural(struct parser_element **rpe, unsigned long *rnp,
1028dd416aa8Stshiozak const char *str, size_t len)
1029dd416aa8Stshiozak {
1030dd416aa8Stshiozak struct _region r;
1031dd416aa8Stshiozak
1032dd416aa8Stshiozak _region_init(&r, __UNCONST(str), len);
1033dd416aa8Stshiozak
1034dd416aa8Stshiozak if (find_plural_forms(&r))
1035dd416aa8Stshiozak return T_NOTFOUND;
1036dd416aa8Stshiozak if (skip_nplurals(&r, rnp))
1037dd416aa8Stshiozak return T_ILPLURAL;
1038dd416aa8Stshiozak if (skip_assignment(&r, PLURAL_SYMBOL, LEN_PLURAL_SYMBOL))
1039dd416aa8Stshiozak return T_ILPLURAL;
1040dd416aa8Stshiozak if (cut_trailing_semicolon(&r))
1041dd416aa8Stshiozak return T_ILPLURAL;
1042dd416aa8Stshiozak return parse_plural_body(&r, rpe);
1043dd416aa8Stshiozak }
1044dd416aa8Stshiozak
1045dd416aa8Stshiozak #ifdef TEST_PARSE_PLURAL
1046dd416aa8Stshiozak int
main(int argc,char ** argv)1047dd416aa8Stshiozak main(int argc, char **argv)
1048dd416aa8Stshiozak {
1049dd416aa8Stshiozak int ret;
1050dd416aa8Stshiozak struct parser_element *pelem;
1051dd416aa8Stshiozak unsigned long np;
1052dd416aa8Stshiozak
1053dd416aa8Stshiozak if (argc != 2 && argc != 3) {
1054dd416aa8Stshiozak fprintf(stderr, "usage: %s <mime-header> [n]\n", argv[0]);
1055dd416aa8Stshiozak return EXIT_FAILURE;
1056dd416aa8Stshiozak }
1057dd416aa8Stshiozak
1058dd416aa8Stshiozak ret = parse_plural(&pelem, &np, argv[1], strlen(argv[1]));
1059dd416aa8Stshiozak
1060dd416aa8Stshiozak if (ret == T_EOF)
1061dd416aa8Stshiozak printf("none");
1062dd416aa8Stshiozak else if (T_IS_ERROR(ret))
1063dd416aa8Stshiozak printf("error: 0x%X", ret);
1064dd416aa8Stshiozak else {
1065dd416aa8Stshiozak printf("syntax tree: ");
1066dd416aa8Stshiozak dump_elem(pelem);
1067dd416aa8Stshiozak printf("\nnplurals = %lu", np);
1068dd416aa8Stshiozak if (argv[2])
1069dd416aa8Stshiozak printf(", plural = %lu",
1070dd416aa8Stshiozak calculate_plural(pelem, atoi(argv[2])));
1071dd416aa8Stshiozak free_parser_element(pelem);
1072dd416aa8Stshiozak }
1073dd416aa8Stshiozak printf("\n");
1074dd416aa8Stshiozak
1075dd416aa8Stshiozak
1076dd416aa8Stshiozak return EXIT_SUCCESS;
1077dd416aa8Stshiozak }
1078dd416aa8Stshiozak #endif /* TEST_PARSE_PLURAL */
1079dd416aa8Stshiozak
1080dd416aa8Stshiozak /*
1081dd416aa8Stshiozak * external interface
1082dd416aa8Stshiozak */
1083dd416aa8Stshiozak
1084dd416aa8Stshiozak int
_gettext_parse_plural(struct gettext_plural ** rpe,unsigned long * rnp,const char * str,size_t len)1085dd416aa8Stshiozak _gettext_parse_plural(struct gettext_plural **rpe, unsigned long *rnp,
1086dd416aa8Stshiozak const char *str, size_t len)
1087dd416aa8Stshiozak {
1088dd416aa8Stshiozak return parse_plural((struct parser_element **)rpe, rnp, str, len);
1089dd416aa8Stshiozak }
1090dd416aa8Stshiozak
1091dd416aa8Stshiozak unsigned long
_gettext_calculate_plural(const struct gettext_plural * pe,unsigned long n)1092dd416aa8Stshiozak _gettext_calculate_plural(const struct gettext_plural *pe, unsigned long n)
1093dd416aa8Stshiozak {
1094dd416aa8Stshiozak return calculate_plural((void *)__UNCONST(pe), n);
1095dd416aa8Stshiozak }
1096dd416aa8Stshiozak
1097dd416aa8Stshiozak void
_gettext_free_plural(struct gettext_plural * pe)1098dd416aa8Stshiozak _gettext_free_plural(struct gettext_plural *pe)
1099dd416aa8Stshiozak {
1100dd416aa8Stshiozak free_parser_element((void *)pe);
1101dd416aa8Stshiozak }
1102dd416aa8Stshiozak
1103dd416aa8Stshiozak #ifdef TEST_PLURAL
1104dd416aa8Stshiozak #include <libintl.h>
1105dd416aa8Stshiozak #include <locale.h>
1106dd416aa8Stshiozak
1107dd416aa8Stshiozak #define PR(n) printf("n=%d: \"%s\"\n", n, dngettext("test", "1", "2", n))
1108dd416aa8Stshiozak
1109dd416aa8Stshiozak int
main(void)1110dd416aa8Stshiozak main(void)
1111dd416aa8Stshiozak {
1112dd416aa8Stshiozak bindtextdomain("test", "."); /* ./LANG/LC_MESSAGES/test.mo */
1113dd416aa8Stshiozak PR(1);
1114dd416aa8Stshiozak PR(2);
1115dd416aa8Stshiozak PR(3);
1116dd416aa8Stshiozak PR(4);
1117dd416aa8Stshiozak
1118dd416aa8Stshiozak return 0;
1119dd416aa8Stshiozak }
1120dd416aa8Stshiozak #endif
1121