xref: /netbsd-src/usr.bin/m4/expr.c (revision 0b9f50897e9a9c6709320fafb4c3787fddcc0a45)
1 /*  File   : expr.c
2     Authors: Mike Lutz & Bob Harper
3     Editors: Ozan Yigit & Richard A. O'Keefe
4     Updated: %G%
5     Purpose: arithmetic expression evaluator.
6 
7     expr() performs a standard recursive descent parse to evaluate any
8     expression permitted byf the following grammar:
9 
10       expr    :       query EOS
11       query   :       lor
12               |       lor "?" query ":" query
13       lor     :       land { "||" land }	or OR,  for Pascal
14       land    :       bor { "&&" bor }		or AND, for Pascal
15       bor     :       bxor { "|" bxor }
16       bxor    :       band { "^" band }
17       band    :       eql { "&" eql }
18       eql     :       relat { eqrel relat }
19       relat   :       shift { rel shift }
20       shift   :       primary { shop primary }
21       primary :       term { addop term }
22       term    :       unary { mulop unary }
23       unary   :       factor
24               |       unop unary
25       factor  :       constant
26               |       "(" query ")"
27       constant:       num
28               |       "'" CHAR "'"		or '"' CHAR '"'
29       num     :       DIGIT			full ANSI C syntax
30               |       DIGIT num
31       shop    :       "<<"
32               |       ">>"
33       eqlrel  :       "="
34               |       "=="
35               |       "!="
36       rel     :       "<"			or <>, Pascal not-equal
37               |       ">"
38               |       "<="			or =<, for Prolog users.
39               |       ">="
40 
41     This expression evaluator was lifted from a public-domain
42     C Pre-Processor included with the DECUS C Compiler distribution.
43     It has been hacked somewhat to be suitable for m4.
44 
45     26-Mar-1993		Changed to work in any of EBCDIC, ASCII, DEC MNCS,
46 			or ISO 8859/n.
47 
48     26-Mar-1993		Changed to use "long int" rather than int, so that
49 			we get the same 32-bit arithmetic on a PC as on a Sun.
50 			It isn't fully portable, of course, but then on a 64-
51 			bit machine we _want_ 64-bit arithmetic...
52 			Shifting rewritten (using LONG_BIT) to give signed
53 			shifts even when (long) >> (long) is unsigned.
54 
55     26-Mar-1993		I finally got sick of the fact that &&, ||, and ?:
56 			don't do conditional evaluation.  What is the good
57 			of having eval(0&&(1/0)) crash and dump core?  Now
58 			every function has a doit? argument.
59 
60     26-Mar-1993		charcon() didn't actually accept 'abcd', which it
61 			should have.  Fixed it.
62 
63     20-Apr-1993		eval(1/0) and eval(1%0) dumped core and crashed.
64 			This is also true of the System V r 3.2 m4, but
65 			it isn't good enough for ours!  Changed it so that
66 			x % 0 => x	as per Concrete Mathematics
67 			x / 0 => error and return 0 from expr().
68 */
69 
70 #ifndef lint
71 static char rcsid[] = "$Id: expr.c,v 1.3 1993/08/02 17:54:38 mycroft Exp $";
72 #endif /* not lint */
73 
74 #define FALSE   0
75 #define	TRUE	1
76 
77 #include <stdio.h>
78 #include <setjmp.h>
79 static jmp_buf expjump;		/* Error exit point for expr() */
80 
81 static unsigned char *nxtchr;	/* Parser scan pointer */
82 
83 #define	deblank0	while ((unsigned)(*nxtchr-1) < ' ') nxtchr++
84 #define deblank1	while ((unsigned)(*++nxtchr - 1) < ' ')
85 #define deblank2	nxtchr++; deblank1
86 
87 #include "ourlims.h"
88 static char digval[1+UCHAR_MAX];
89 
90 /*  This file should work in any C implementation that doesn't have too
91     many characters to fit in one table.  We use a table to convert
92     (unsigned) characters to numeric codes:
93 	 0 to  9	for '0' to '9'
94 	10 to 35	for 'a' to 'z'
95 	10 to 35	for 'A' to 'Z'
96 	36		for '_'
97     Instead of asking whether tolower(c) == 'a' we ask whether
98     digval[c] == DIGIT_A, and so on.  This essentially duplicates the
99     chtype[] table in main.c; we should use just one table.
100 */
101 #define	DIGIT_A 10
102 #define	DIGIT_B 11
103 #define	DIGIT_C 12
104 #define	DIGIT_D 13
105 #define	DIGIT_E 14
106 #define	DIGIT_F 15
107 #define	DIGIT_G 16
108 #define DIGIT_H 17
109 #define	DIGIT_I	18
110 #define	DIGIT_J 19
111 #define DIGIT_K 20
112 #define	DIGIT_L	21
113 #define DIGIT_M 22
114 #define DIGIT_N 23
115 #define	DIGIT_O 24
116 #define	DIGIT_P 25
117 #define	DIGIT_Q 26
118 #define	DIGIT_R	27
119 #define	DIGIT_S 28
120 #define	DIGIT_T 29
121 #define	DIGIT_U 30
122 #define	DIGIT_V 31
123 #define	DIGIT_W 32
124 #define	DIGIT_X 33
125 #define	DIGIT_Y 34
126 #define	DIGIT_Z 35
127 
128 
129 #ifdef	__STDC__
130 static long int query(int);
131 #else
132 static long int query();
133 #endif
134 
135 
136 /*  experr(msg)
137     prints an error message, resets environment to expr(), and
138     forces expr() to return FALSE.
139 */
140 void experr(msg)
141     char *msg;
142     {
143 	(void) fprintf(stderr, "m4: %s\n", msg);
144 	longjmp(expjump, -1);	/* Force expr() to return FALSE */
145     }
146 
147 
148 /*  <numcon> ::= '0x' <hex> | '0X' <hex> | '0' <oct> | <dec>
149     For ANSI C, an integer may be followed by u, l, ul, or lu,
150     in any mix of cases.  We accept and ignore those letters;
151     all the numbers are treated as long.
152 */
153 static long int numcon(doit)
154     int doit;
155     {
156 	register long int v;	/* current value */
157 	register int b;		/* base (radix) */
158 	register int c;		/* character or digit value */
159 
160 	if (!doit) {
161 	    do nxtchr++; while (digval[*nxtchr] <= 36);
162 	    deblank0;
163 	    return 0;
164 	}
165 
166 	v = digval[*nxtchr++];	/* We already know it's a digit */
167 	if (v != 0) {
168 	    b = 10;		/* decimal number */
169 	} else
170 	if (digval[*nxtchr] == DIGIT_X) {
171 	    nxtchr++;
172 	    b = 16;		/* hexadecimal number */
173 	} else {
174 	    b = 8;		/* octal number */
175 	}
176 	do {
177 	    while (digval[c = *nxtchr++] < b) v = v*b + digval[c];
178 	} while (c == '_');
179 	while (digval[c] == DIGIT_L || digval[c] == DIGIT_U) c = *nxtchr++;
180 	nxtchr--;		/* unread c */
181 	if ((unsigned)(c-1) < ' ') { deblank1; }
182 	return v;
183     }
184 
185 
186 /*  <charcon> ::= <qt> { <char> } <qt>
187     Note: multibyte constants are accepted.
188     Note: BEL (\a) and ESC (\e) have the same values in EBCDIC and ASCII.
189 */
190 static long int charcon(doit)
191     int doit;
192     {
193 	register int i;
194 	long int value;
195 	register int c;
196 	int q;
197 	int v[sizeof value];
198 
199 	q = *nxtchr++;		/* the quote character */
200 	for (i = 0; ; i++) {
201 	    c = *nxtchr++;
202 	    if (c == q) {	/* end of literal, or doubled quote */
203 		if (*nxtchr != c) break;
204 		nxtchr++;	/* doubled quote stands for one quote */
205 	    }
206 	    if (i == sizeof value) experr("Unterminated character constant");
207 	    if (c == '\\') {
208 		switch (c = *nxtchr++) {
209 		    case '0': case '1': case '2': case '3':
210 		    case '4': case '5': case '6': case '7':
211 			c -= '0';
212 			if ((unsigned)(*nxtchr - '0') < 8)
213 			    c = (c << 3) | (*nxtchr++ - '0');
214 			if ((unsigned)(*nxtchr - '0') < 8)
215 			    c = (c << 3) | (*nxtchr++ - '0');
216 			break;
217 		    case 'n': case 'N': c = '\n'; break;
218 		    case 'r': case 'R': c = '\r'; break;
219 		    case 't': case 'T': c = '\t'; break;
220 		    case 'b': case 'B': c = '\b'; break;
221 		    case 'f': case 'F': c = '\f'; break;
222 		    case 'a': case 'A': c = 007;  break;
223 		    case 'e': case 'E': c = 033;  break;
224 #if	' ' == 64
225 		    case 'd': case 'D': c = 045;  break; /*EBCDIC DEL */
226 #else
227 		    case 'd': case 'D': c = 127;  break; /* ASCII DEL */
228 #endif
229 		    default :			  break;
230 		}
231 	    }
232 	    v[i] = c;
233 	}
234 	deblank0;
235 	if (!doit) return 0;
236 	for (value = 0; --i >= 0; ) value = (value << CHAR_BIT) | v[i];
237 	return value;
238     }
239 
240 
241 /*  <unary> ::= <unop> <unary> | <factor>
242     <unop> ::= '!' || '~' | '-'
243     <factor> ::= '(' <query> ')' | <'> <char> <'> | <"> <char> <"> | <num>
244 */
245 static long int unary(doit)
246     int doit;
247     {
248 	long int v;
249 
250 	switch (nxtchr[0]) {
251 	    case 'n': case 'N':
252 			if (digval[nxtchr[1]] != DIGIT_O
253 			||  digval[nxtchr[2]] != DIGIT_T)
254 			    experr("Bad 'not'");
255 			nxtchr += 2;
256 	    case '!':	deblank1; return !unary(doit);
257 	    case '~':	deblank1; return ~unary(doit);
258 	    case '-':	deblank1; return -unary(doit);
259 	    case '+':	deblank1; return  unary(doit);
260 	    case '(':	deblank1; v = query(doit);
261 			if (nxtchr[0] != ')') experr("Bad factor");
262 			deblank1; return v;
263 	    case '\'':
264 	    case '\"':	return charcon(doit);
265 	    case '0': case '1': case '2':
266 	    case '3': case '4': case '5':
267 	    case '6': case '7': case '8':
268 	    case '9':	return numcon(doit);
269 	    default :   experr("Bad constant");
270 	}
271 	return 0;	/*NOTREACHED*/
272     }
273 
274 
275 /*  <term> ::= <unary> { <mulop> <unary> }
276     <mulop> ::= '*' | '/' || '%'
277 */
278 static long int term(doit)
279     int doit;
280     {
281 	register long int vl, vr;
282 
283 	vl = unary(doit);
284 	for (;;)
285 	    switch (nxtchr[0]) {
286 		case '*':
287 		    deblank1;
288 		    vr = unary(doit);
289 		    if (doit) vl *= vr;
290 		    break;
291 		case 'd': case 'D':
292 		    if (digval[nxtchr[1]] != DIGIT_I
293 		    ||  digval[nxtchr[2]] != DIGIT_V)
294 			experr("Bad 'div'");
295 		    nxtchr += 2;
296 		    /*FALLTHROUGH*/
297 		case '/':
298 		    deblank1;
299 		    vr = unary(doit);
300 		    if (doit) {
301 			if (vr == 0) experr("Division by 0");
302 			vl /= vr;
303 		    }
304 		    break;
305 		case 'm': case 'M':
306 		    if (digval[nxtchr[1]] != DIGIT_O
307 		    ||  digval[nxtchr[2]] != DIGIT_D)
308 			experr("Bad 'mod'");
309 		    nxtchr += 2;
310 		    /*FALLTHROUGH*/
311 		case '%':
312 		    deblank1;
313 		    vr = unary(doit);
314 		    if (doit) {
315 			if (vr != 0) vl %= vr;
316 		    }
317 		    break;
318 		default:
319 		    return vl;
320 	    }
321 	/*NOTREACHED*/
322     }
323 
324 /*  <primary> ::= <term> { <addop> <term> }
325     <addop> ::= '+' | '-'
326 */
327 static long int primary(doit)
328     int doit;
329     {
330 	register long int vl;
331 
332 	vl = term(doit);
333 	for (;;)
334 	    if (nxtchr[0] == '+') {
335 		deblank1;
336 		if (doit) vl += term(doit); else (void)term(doit);
337 	    } else
338 	    if (nxtchr[0] == '-') {
339 		deblank1;
340 		if (doit) vl -= term(doit); else (void)term(doit);
341 	    } else
342 		return vl;
343 	/*NOTREACHED*/
344     }
345 
346 
347 /*  <shift> ::= <primary> { <shop> <primary> }
348     <shop> ::= '<<' | '>>'
349 */
350 static long int shift(doit)
351     int doit;
352     {
353 	register long int vl, vr;
354 
355 	vl = primary(doit);
356 	for (;;) {
357 	    if (nxtchr[0] == '<' && nxtchr[1] == '<') {
358 		deblank2;
359 		vr = primary(doit);
360 	    } else
361 	    if (nxtchr[0] == '>' && nxtchr[1] == '>') {
362 		deblank2;
363 		vr = -primary(doit);
364 	    } else {
365 		return vl;
366 	    }
367 	    /* The following code implements shifts portably */
368 	    /* Shifts are signed shifts, and the shift count */
369 	    /* acts like repeated one-bit shifts, not modulo anything */
370 	    if (doit) {
371 		if (vr >= LONG_BIT) {
372 		    vl = 0;
373 		} else
374 		if (vr <= -LONG_BIT) {
375 		    vl = -(vl < 0);
376 		} else
377 		if (vr > 0) {
378 		    vl <<= vr;
379 		} else
380 		if (vr < 0) {
381 		    vl = (vl >> -vr) | (-(vl < 0) << (LONG_BIT + vr));
382 		}
383 	    }
384 	}
385 	/*NOTREACHED*/
386     }
387 
388 
389 /*  <relat> ::= <shift> { <rel> <shift> }
390     <rel> ::= '<=' | '>=' | '=<' | '=>' | '<' | '>'
391     Here I rely on the fact that '<<' and '>>' are swallowed by <shift>
392 */
393 static long int relat(doit)
394     int doit;
395     {
396 	register long int vl;
397 
398 	vl = shift(doit);
399 	for (;;)
400 	    switch (nxtchr[0]) {
401 		case '=':
402 		    switch (nxtchr[1]) {
403 			case '<':			/* =<, take as <= */
404 			    deblank2;
405 			    vl = vl <= shift(doit);
406 			    break;
407 			case '>':			/* =>, take as >= */
408 			    deblank2;
409 			    vl = vl >= shift(doit);
410 			    break;
411 			default:			/* == or =; OOPS */
412 			    return vl;
413 		    }
414 		    break;
415 		case '<':
416 		    if (nxtchr[1] == '=') {		/* <= */
417 			deblank2;
418 			vl = vl <= shift(doit);
419 		    } else
420 		    if (nxtchr[1] == '>') {		/* <> (Pascal) */
421 			deblank2;
422 			vl = vl != shift(doit);
423 		    } else {				/* < */
424 			deblank1;
425 			vl = vl < shift(doit);
426 		    }
427 		    break;
428 		case '>':
429 		    if (nxtchr[1] == '=') {		/* >= */
430 			deblank2;
431 			vl = vl >= shift(doit);
432 		    } else {				/* > */
433 			deblank1;
434 			vl = vl > shift(doit);
435 		    }
436 		    break;
437 		default:
438 		    return vl;
439 	}
440 	/*NOTREACHED*/
441     }
442 
443 
444 /*  <eql> ::= <relat> { <eqrel> <relat> }
445     <eqlrel> ::= '!=' | '==' | '='
446 */
447 static long int eql(doit)
448     int doit;
449     {
450 	register long int vl;
451 
452 	vl = relat(doit);
453 	for (;;)
454 	    if (nxtchr[0] == '!' && nxtchr[1] == '=') {
455 		deblank2;
456 		vl = vl != relat(doit);
457 	    } else
458 	    if (nxtchr[0] == '=' && nxtchr[1] == '=') {
459 		deblank2;
460 		vl = vl == relat(doit);
461 	    } else
462 	    if (nxtchr[0] == '=') {
463 		deblank1;
464 		vl = vl == relat(doit);
465 	    } else
466 		return vl;
467 	/*NOTREACHED*/
468     }
469 
470 
471 /*  <band> ::= <eql> { '&' <eql> }
472 */
473 static long int band(doit)
474     int doit;
475     {
476 	register long int vl;
477 
478 	vl = eql(doit);
479 	while (nxtchr[0] == '&' && nxtchr[1] != '&') {
480 	    deblank1;
481 	    if (doit) vl &= eql(doit); else (void)eql(doit);
482 	}
483 	return vl;
484     }
485 
486 
487 /*  <bxor> ::= <band> { '^' <band> }
488 */
489 static long int bxor(doit)
490     int doit;
491     {
492 	register long int vl;
493 
494 	vl = band(doit);
495 	while (nxtchr[0] == '^') {
496 	    deblank1;
497 	    if (doit) vl ^= band(doit); else (void)band(doit);
498 	}
499 	return vl;
500     }
501 
502 
503 /*  <bor> ::= <bxor> { '|' <bxor> }
504 */
505 static long int bor(doit)
506     int doit;
507     {
508 	register long int vl;
509 
510 	vl = bxor(doit);
511 	while (nxtchr[0] == '|' && nxtchr[1] != '|') {
512 	    deblank1;
513 	    if (doit) vl |= bxor(doit); else (void)bxor(doit);
514 	}
515 	return vl;
516     }
517 
518 
519 /*  <land> ::= <bor> { '&&' <bor> }
520 */
521 static long int land(doit)
522     int doit;
523     {
524 	register long int vl;
525 
526 	vl = bor(doit);
527 	for (;;) {
528 	    if (nxtchr[0] == '&') {
529 		if (nxtchr[1] != '&') break;
530 		deblank2;
531 	    } else
532 	    if (digval[nxtchr[0]] == DIGIT_A) {
533 		if (digval[nxtchr[1]] != DIGIT_N) break;
534 		if (digval[nxtchr[2]] != DIGIT_D) break;
535 		nxtchr += 2; deblank1;
536 	    } else {
537 		/* neither && nor and */
538 		break;
539 	    }
540 	    vl = bor(doit && vl) != 0;
541 	}
542 	return vl;
543     }
544 
545 
546 /*  <lor> ::= <land> { '||' <land> }
547 */
548 static long int lor(doit)
549     int doit;
550     {
551 	register long int vl;
552 
553 	vl = land(doit);
554 	for (;;) {
555 	    if (nxtchr[0] == '|') {
556 		if (nxtchr[1] != '|') break;
557 	    } else
558 	    if (digval[nxtchr[0]] == DIGIT_O) {
559 		if (digval[nxtchr[1]] != DIGIT_R) break;
560 	    } else {
561 		/* neither || nor or */
562 		break;
563 	    }
564 	    deblank2;
565 	    vl = land(doit && !vl) != 0;
566 	}
567 	return vl;
568     }
569 
570 
571 /*  <query> ::= <lor> [ '?' <query> ':' <query> ]
572 */
573 static long int query(doit)
574     int doit;
575     {
576 	register long int bool, true_val, false_val;
577 
578 	bool = lor(doit);
579 	if (*nxtchr != '?') return bool;
580 	deblank1;
581 	true_val = query(doit && bool);
582 	if (*nxtchr != ':') experr("Bad query");
583 	deblank1;
584 	false_val = query(doit && !bool);
585 	return bool ? true_val : false_val;
586     }
587 
588 
589 static void initialise_digval()
590     {
591 	register unsigned char *s;
592 	register int c;
593 
594 	for (c = 0; c <= UCHAR_MAX; c++) digval[c] = 99;
595 	for (c =  0, s = (unsigned char *)"0123456789";
596 	/*while*/ *s;
597 	/*doing*/ digval[*s++] = c++) /* skip */;
598 	for (c = 10, s = (unsigned char *)"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
599 	/*while*/ *s;
600 	/*doing*/ digval[*s++] = c++) /* skip */;
601 	for (c = 10, s = (unsigned char *)"abcdefghijklmnopqrstuvwxyz";
602 	/*while*/ *s;
603 	/*doing*/ digval[*s++] = c++) /* skip */;
604 	digval['_'] = 36;
605     }
606 
607 
608 long int expr(expbuf)
609     char *expbuf;
610     {
611 	register int rval;
612 
613 	if (digval['1'] == 0) initialise_digval();
614 	nxtchr = (unsigned char *)expbuf;
615 	deblank0;
616 	if (setjmp(expjump) != 0) return FALSE;
617 	rval = query(TRUE);
618 	if (*nxtchr) experr("Ill-formed expression");
619 	return rval;
620     }
621 
622