xref: /netbsd-src/usr.bin/m4/expr.c (revision d9158b13b5dfe46201430699a3f7a235ecf28df3)
1 /*  File   : expr.c
2     Authors: Mike Lutz & Bob Harper
3     Editors: Ozan Yigit & Richard A. O'Keefe
4     Updated: %G%
5     Purpose: arithmetic expression evaluator.
6 
7     expr() performs a standard recursive descent parse to evaluate any
8     expression permitted byf the following grammar:
9 
10       expr    :       query EOS
11       query   :       lor
12               |       lor "?" query ":" query
13       lor     :       land { "||" land }	or OR,  for Pascal
14       land    :       bor { "&&" bor }		or AND, for Pascal
15       bor     :       bxor { "|" bxor }
16       bxor    :       band { "^" band }
17       band    :       eql { "&" eql }
18       eql     :       relat { eqrel relat }
19       relat   :       shift { rel shift }
20       shift   :       primary { shop primary }
21       primary :       term { addop term }
22       term    :       unary { mulop unary }
23       unary   :       factor
24               |       unop unary
25       factor  :       constant
26               |       "(" query ")"
27       constant:       num
28               |       "'" CHAR "'"		or '"' CHAR '"'
29       num     :       DIGIT			full ANSI C syntax
30               |       DIGIT num
31       shop    :       "<<"
32               |       ">>"
33       eqlrel  :       "="
34               |       "=="
35               |       "!="
36       rel     :       "<"			or <>, Pascal not-equal
37               |       ">"
38               |       "<="			or =<, for Prolog users.
39               |       ">="
40 
41     This expression evaluator was lifted from a public-domain
42     C Pre-Processor included with the DECUS C Compiler distribution.
43     It has been hacked somewhat to be suitable for m4.
44 
45     26-Mar-1993		Changed to work in any of EBCDIC, ASCII, DEC MNCS,
46 			or ISO 8859/n.
47 
48     26-Mar-1993		Changed to use "long int" rather than int, so that
49 			we get the same 32-bit arithmetic on a PC as on a Sun.
50 			It isn't fully portable, of course, but then on a 64-
51 			bit machine we _want_ 64-bit arithmetic...
52 			Shifting rewritten (using LONG_BIT) to give signed
53 			shifts even when (long) >> (long) is unsigned.
54 
55     26-Mar-1993		I finally got sick of the fact that &&, ||, and ?:
56 			don't do conditional evaluation.  What is the good
57 			of having eval(0&&(1/0)) crash and dump core?  Now
58 			every function has a doit? argument.
59 
60     26-Mar-1993		charcon() didn't actually accept 'abcd', which it
61 			should have.  Fixed it.
62 
63     20-Apr-1993		eval(1/0) and eval(1%0) dumped core and crashed.
64 			This is also true of the System V r 3.2 m4, but
65 			it isn't good enough for ours!  Changed it so that
66 			x % 0 => x	as per Concrete Mathematics
67 			x / 0 => error and return 0 from expr().
68 */
69 
70 #define FALSE   0
71 #define	TRUE	1
72 
73 #include <stdio.h>
74 #include <setjmp.h>
75 static jmp_buf expjump;		/* Error exit point for expr() */
76 
77 static unsigned char *nxtchr;	/* Parser scan pointer */
78 
79 #define	deblank0	while ((unsigned)(*nxtchr-1) < ' ') nxtchr++
80 #define deblank1	while ((unsigned)(*++nxtchr - 1) < ' ')
81 #define deblank2	nxtchr++; deblank1
82 
83 #include "ourlims.h"
84 static char digval[1+UCHAR_MAX];
85 
86 /*  This file should work in any C implementation that doesn't have too
87     many characters to fit in one table.  We use a table to convert
88     (unsigned) characters to numeric codes:
89 	 0 to  9	for '0' to '9'
90 	10 to 35	for 'a' to 'z'
91 	10 to 35	for 'A' to 'Z'
92 	36		for '_'
93     Instead of asking whether tolower(c) == 'a' we ask whether
94     digval[c] == DIGIT_A, and so on.  This essentially duplicates the
95     chtype[] table in main.c; we should use just one table.
96 */
97 #define	DIGIT_A 10
98 #define	DIGIT_B 11
99 #define	DIGIT_C 12
100 #define	DIGIT_D 13
101 #define	DIGIT_E 14
102 #define	DIGIT_F 15
103 #define	DIGIT_G 16
104 #define DIGIT_H 17
105 #define	DIGIT_I	18
106 #define	DIGIT_J 19
107 #define DIGIT_K 20
108 #define	DIGIT_L	21
109 #define DIGIT_M 22
110 #define DIGIT_N 23
111 #define	DIGIT_O 24
112 #define	DIGIT_P 25
113 #define	DIGIT_Q 26
114 #define	DIGIT_R	27
115 #define	DIGIT_S 28
116 #define	DIGIT_T 29
117 #define	DIGIT_U 30
118 #define	DIGIT_V 31
119 #define	DIGIT_W 32
120 #define	DIGIT_X 33
121 #define	DIGIT_Y 34
122 #define	DIGIT_Z 35
123 
124 
125 #ifdef	__STDC__
126 static long int query(int);
127 #else
128 static long int query();
129 #endif
130 
131 
132 /*  experr(msg)
133     prints an error message, resets environment to expr(), and
134     forces expr() to return FALSE.
135 */
136 void experr(msg)
137     char *msg;
138     {
139 	(void) fprintf(stderr, "m4: %s\n", msg);
140 	longjmp(expjump, -1);	/* Force expr() to return FALSE */
141     }
142 
143 
144 /*  <numcon> ::= '0x' <hex> | '0X' <hex> | '0' <oct> | <dec>
145     For ANSI C, an integer may be followed by u, l, ul, or lu,
146     in any mix of cases.  We accept and ignore those letters;
147     all the numbers are treated as long.
148 */
149 static long int numcon(doit)
150     int doit;
151     {
152 	register long int v;	/* current value */
153 	register int b;		/* base (radix) */
154 	register int c;		/* character or digit value */
155 
156 	if (!doit) {
157 	    do nxtchr++; while (digval[*nxtchr] <= 36);
158 	    deblank0;
159 	    return 0;
160 	}
161 
162 	v = digval[*nxtchr++];	/* We already know it's a digit */
163 	if (v != 0) {
164 	    b = 10;		/* decimal number */
165 	} else
166 	if (digval[*nxtchr] == DIGIT_X) {
167 	    nxtchr++;
168 	    b = 16;		/* hexadecimal number */
169 	} else {
170 	    b = 8;		/* octal number */
171 	}
172 	do {
173 	    while (digval[c = *nxtchr++] < b) v = v*b + digval[c];
174 	} while (c == '_');
175 	while (digval[c] == DIGIT_L || digval[c] == DIGIT_U) c = *nxtchr++;
176 	nxtchr--;		/* unread c */
177 	if ((unsigned)(c-1) < ' ') { deblank1; }
178 	return v;
179     }
180 
181 
182 /*  <charcon> ::= <qt> { <char> } <qt>
183     Note: multibyte constants are accepted.
184     Note: BEL (\a) and ESC (\e) have the same values in EBCDIC and ASCII.
185 */
186 static long int charcon(doit)
187     int doit;
188     {
189 	register int i;
190 	long int value;
191 	register int c;
192 	int q;
193 	int v[sizeof value];
194 
195 	q = *nxtchr++;		/* the quote character */
196 	for (i = 0; ; i++) {
197 	    c = *nxtchr++;
198 	    if (c == q) {	/* end of literal, or doubled quote */
199 		if (*nxtchr != c) break;
200 		nxtchr++;	/* doubled quote stands for one quote */
201 	    }
202 	    if (i == sizeof value) experr("Unterminated character constant");
203 	    if (c == '\\') {
204 		switch (c = *nxtchr++) {
205 		    case '0': case '1': case '2': case '3':
206 		    case '4': case '5': case '6': case '7':
207 			c -= '0';
208 			if ((unsigned)(*nxtchr - '0') < 8)
209 			    c = (c << 3) | (*nxtchr++ - '0');
210 			if ((unsigned)(*nxtchr - '0') < 8)
211 			    c = (c << 3) | (*nxtchr++ - '0');
212 			break;
213 		    case 'n': case 'N': c = '\n'; break;
214 		    case 'r': case 'R': c = '\r'; break;
215 		    case 't': case 'T': c = '\t'; break;
216 		    case 'b': case 'B': c = '\b'; break;
217 		    case 'f': case 'F': c = '\f'; break;
218 		    case 'a': case 'A': c = 007;  break;
219 		    case 'e': case 'E': c = 033;  break;
220 #if	' ' == 64
221 		    case 'd': case 'D': c = 045;  break; /*EBCDIC DEL */
222 #else
223 		    case 'd': case 'D': c = 127;  break; /* ASCII DEL */
224 #endif
225 		    default :			  break;
226 		}
227 	    }
228 	    v[i] = c;
229 	}
230 	deblank0;
231 	if (!doit) return 0;
232 	for (value = 0; --i >= 0; ) value = (value << CHAR_BIT) | v[i];
233 	return value;
234     }
235 
236 
237 /*  <unary> ::= <unop> <unary> | <factor>
238     <unop> ::= '!' || '~' | '-'
239     <factor> ::= '(' <query> ')' | <'> <char> <'> | <"> <char> <"> | <num>
240 */
241 static long int unary(doit)
242     int doit;
243     {
244 	long int v;
245 
246 	switch (nxtchr[0]) {
247 	    case 'n': case 'N':
248 			if (digval[nxtchr[1]] != DIGIT_O
249 			||  digval[nxtchr[2]] != DIGIT_T)
250 			    experr("Bad 'not'");
251 			nxtchr += 2;
252 	    case '!':	deblank1; return !unary(doit);
253 	    case '~':	deblank1; return ~unary(doit);
254 	    case '-':	deblank1; return -unary(doit);
255 	    case '+':	deblank1; return  unary(doit);
256 	    case '(':	deblank1; v = query(doit);
257 			if (nxtchr[0] != ')') experr("Bad factor");
258 			deblank1; return v;
259 	    case '\'':
260 	    case '\"':	return charcon(doit);
261 	    case '0': case '1': case '2':
262 	    case '3': case '4': case '5':
263 	    case '6': case '7': case '8':
264 	    case '9':	return numcon(doit);
265 	    default :   experr("Bad constant");
266 	}
267 	return 0;	/*NOTREACHED*/
268     }
269 
270 
271 /*  <term> ::= <unary> { <mulop> <unary> }
272     <mulop> ::= '*' | '/' || '%'
273 */
274 static long int term(doit)
275     int doit;
276     {
277 	register long int vl, vr;
278 
279 	vl = unary(doit);
280 	for (;;)
281 	    switch (nxtchr[0]) {
282 		case '*':
283 		    deblank1;
284 		    vr = unary(doit);
285 		    if (doit) vl *= vr;
286 		    break;
287 		case 'd': case 'D':
288 		    if (digval[nxtchr[1]] != DIGIT_I
289 		    ||  digval[nxtchr[2]] != DIGIT_V)
290 			experr("Bad 'div'");
291 		    nxtchr += 2;
292 		    /*FALLTHROUGH*/
293 		case '/':
294 		    deblank1;
295 		    vr = unary(doit);
296 		    if (doit) {
297 			if (vr == 0) experr("Division by 0");
298 			vl /= vr;
299 		    }
300 		    break;
301 		case 'm': case 'M':
302 		    if (digval[nxtchr[1]] != DIGIT_O
303 		    ||  digval[nxtchr[2]] != DIGIT_D)
304 			experr("Bad 'mod'");
305 		    nxtchr += 2;
306 		    /*FALLTHROUGH*/
307 		case '%':
308 		    deblank1;
309 		    vr = unary(doit);
310 		    if (doit) {
311 			if (vr != 0) vl %= vr;
312 		    }
313 		    break;
314 		default:
315 		    return vl;
316 	    }
317 	/*NOTREACHED*/
318     }
319 
320 /*  <primary> ::= <term> { <addop> <term> }
321     <addop> ::= '+' | '-'
322 */
323 static long int primary(doit)
324     int doit;
325     {
326 	register long int vl;
327 
328 	vl = term(doit);
329 	for (;;)
330 	    if (nxtchr[0] == '+') {
331 		deblank1;
332 		if (doit) vl += term(doit); else (void)term(doit);
333 	    } else
334 	    if (nxtchr[0] == '-') {
335 		deblank1;
336 		if (doit) vl -= term(doit); else (void)term(doit);
337 	    } else
338 		return vl;
339 	/*NOTREACHED*/
340     }
341 
342 
343 /*  <shift> ::= <primary> { <shop> <primary> }
344     <shop> ::= '<<' | '>>'
345 */
346 static long int shift(doit)
347     int doit;
348     {
349 	register long int vl, vr;
350 
351 	vl = primary(doit);
352 	for (;;) {
353 	    if (nxtchr[0] == '<' && nxtchr[1] == '<') {
354 		deblank2;
355 		vr = primary(doit);
356 	    } else
357 	    if (nxtchr[0] == '>' && nxtchr[1] == '>') {
358 		deblank2;
359 		vr = -primary(doit);
360 	    } else {
361 		return vl;
362 	    }
363 	    /* The following code implements shifts portably */
364 	    /* Shifts are signed shifts, and the shift count */
365 	    /* acts like repeated one-bit shifts, not modulo anything */
366 	    if (doit) {
367 		if (vr >= LONG_BIT) {
368 		    vl = 0;
369 		} else
370 		if (vr <= -LONG_BIT) {
371 		    vl = -(vl < 0);
372 		} else
373 		if (vr > 0) {
374 		    vl <<= vr;
375 		} else
376 		if (vr < 0) {
377 		    vl = (vl >> -vr) | (-(vl < 0) << (LONG_BIT + vr));
378 		}
379 	    }
380 	}
381 	/*NOTREACHED*/
382     }
383 
384 
385 /*  <relat> ::= <shift> { <rel> <shift> }
386     <rel> ::= '<=' | '>=' | '=<' | '=>' | '<' | '>'
387     Here I rely on the fact that '<<' and '>>' are swallowed by <shift>
388 */
389 static long int relat(doit)
390     int doit;
391     {
392 	register long int vl;
393 
394 	vl = shift(doit);
395 	for (;;)
396 	    switch (nxtchr[0]) {
397 		case '=':
398 		    switch (nxtchr[1]) {
399 			case '<':			/* =<, take as <= */
400 			    deblank2;
401 			    vl = vl <= shift(doit);
402 			    break;
403 			case '>':			/* =>, take as >= */
404 			    deblank2;
405 			    vl = vl >= shift(doit);
406 			    break;
407 			default:			/* == or =; OOPS */
408 			    return vl;
409 		    }
410 		    break;
411 		case '<':
412 		    if (nxtchr[1] == '=') {		/* <= */
413 			deblank2;
414 			vl = vl <= shift(doit);
415 		    } else
416 		    if (nxtchr[1] == '>') {		/* <> (Pascal) */
417 			deblank2;
418 			vl = vl != shift(doit);
419 		    } else {				/* < */
420 			deblank1;
421 			vl = vl < shift(doit);
422 		    }
423 		    break;
424 		case '>':
425 		    if (nxtchr[1] == '=') {		/* >= */
426 			deblank2;
427 			vl = vl >= shift(doit);
428 		    } else {				/* > */
429 			deblank1;
430 			vl = vl > shift(doit);
431 		    }
432 		    break;
433 		default:
434 		    return vl;
435 	}
436 	/*NOTREACHED*/
437     }
438 
439 
440 /*  <eql> ::= <relat> { <eqrel> <relat> }
441     <eqlrel> ::= '!=' | '==' | '='
442 */
443 static long int eql(doit)
444     int doit;
445     {
446 	register long int vl;
447 
448 	vl = relat(doit);
449 	for (;;)
450 	    if (nxtchr[0] == '!' && nxtchr[1] == '=') {
451 		deblank2;
452 		vl = vl != relat(doit);
453 	    } else
454 	    if (nxtchr[0] == '=' && nxtchr[1] == '=') {
455 		deblank2;
456 		vl = vl == relat(doit);
457 	    } else
458 	    if (nxtchr[0] == '=') {
459 		deblank1;
460 		vl = vl == relat(doit);
461 	    } else
462 		return vl;
463 	/*NOTREACHED*/
464     }
465 
466 
467 /*  <band> ::= <eql> { '&' <eql> }
468 */
469 static long int band(doit)
470     int doit;
471     {
472 	register long int vl;
473 
474 	vl = eql(doit);
475 	while (nxtchr[0] == '&' && nxtchr[1] != '&') {
476 	    deblank1;
477 	    if (doit) vl &= eql(doit); else (void)eql(doit);
478 	}
479 	return vl;
480     }
481 
482 
483 /*  <bxor> ::= <band> { '^' <band> }
484 */
485 static long int bxor(doit)
486     int doit;
487     {
488 	register long int vl;
489 
490 	vl = band(doit);
491 	while (nxtchr[0] == '^') {
492 	    deblank1;
493 	    if (doit) vl ^= band(doit); else (void)band(doit);
494 	}
495 	return vl;
496     }
497 
498 
499 /*  <bor> ::= <bxor> { '|' <bxor> }
500 */
501 static long int bor(doit)
502     int doit;
503     {
504 	register long int vl;
505 
506 	vl = bxor(doit);
507 	while (nxtchr[0] == '|' && nxtchr[1] != '|') {
508 	    deblank1;
509 	    if (doit) vl |= bxor(doit); else (void)bxor(doit);
510 	}
511 	return vl;
512     }
513 
514 
515 /*  <land> ::= <bor> { '&&' <bor> }
516 */
517 static long int land(doit)
518     int doit;
519     {
520 	register long int vl;
521 
522 	vl = bor(doit);
523 	for (;;) {
524 	    if (nxtchr[0] == '&') {
525 		if (nxtchr[1] != '&') break;
526 		deblank2;
527 	    } else
528 	    if (digval[nxtchr[0]] == DIGIT_A) {
529 		if (digval[nxtchr[1]] != DIGIT_N) break;
530 		if (digval[nxtchr[2]] != DIGIT_D) break;
531 		nxtchr += 2; deblank1;
532 	    } else {
533 		/* neither && nor and */
534 		break;
535 	    }
536 	    vl = bor(doit && vl) != 0;
537 	}
538 	return vl;
539     }
540 
541 
542 /*  <lor> ::= <land> { '||' <land> }
543 */
544 static long int lor(doit)
545     int doit;
546     {
547 	register long int vl;
548 
549 	vl = land(doit);
550 	for (;;) {
551 	    if (nxtchr[0] == '|') {
552 		if (nxtchr[1] != '|') break;
553 	    } else
554 	    if (digval[nxtchr[0]] == DIGIT_O) {
555 		if (digval[nxtchr[1]] != DIGIT_R) break;
556 	    } else {
557 		/* neither || nor or */
558 		break;
559 	    }
560 	    deblank2;
561 	    vl = land(doit && !vl) != 0;
562 	}
563 	return vl;
564     }
565 
566 
567 /*  <query> ::= <lor> [ '?' <query> ':' <query> ]
568 */
569 static long int query(doit)
570     int doit;
571     {
572 	register long int bool, true_val, false_val;
573 
574 	bool = lor(doit);
575 	if (*nxtchr != '?') return bool;
576 	deblank1;
577 	true_val = query(doit && bool);
578 	if (*nxtchr != ':') experr("Bad query");
579 	deblank1;
580 	false_val = query(doit && !bool);
581 	return bool ? true_val : false_val;
582     }
583 
584 
585 static void initialise_digval()
586     {
587 	register unsigned char *s;
588 	register int c;
589 
590 	for (c = 0; c <= UCHAR_MAX; c++) digval[c] = 99;
591 	for (c =  0, s = (unsigned char *)"0123456789";
592 	/*while*/ *s;
593 	/*doing*/ digval[*s++] = c++) /* skip */;
594 	for (c = 10, s = (unsigned char *)"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
595 	/*while*/ *s;
596 	/*doing*/ digval[*s++] = c++) /* skip */;
597 	for (c = 10, s = (unsigned char *)"abcdefghijklmnopqrstuvwxyz";
598 	/*while*/ *s;
599 	/*doing*/ digval[*s++] = c++) /* skip */;
600 	digval['_'] = 36;
601     }
602 
603 
604 long int expr(expbuf)
605     char *expbuf;
606     {
607 	register int rval;
608 
609 	if (digval['1'] == 0) initialise_digval();
610 	nxtchr = (unsigned char *)expbuf;
611 	deblank0;
612 	if (setjmp(expjump) != 0) return FALSE;
613 	rval = query(TRUE);
614 	if (*nxtchr) experr("Ill-formed expression");
615 	return rval;
616     }
617 
618