xref: /netbsd-src/external/bsd/pcc/dist/pcc/cc/cpp/token.c (revision 411dcbec990c8aa9c57d3bd2f4bcacadec0b1ab5)
1 /*	Id: token.c,v 1.157 2016/01/09 09:53:44 ragge Exp 	*/
2 /*	$NetBSD: token.c,v 1.1.1.6 2016/02/09 20:28:44 plunky Exp $	*/
3 
4 /*
5  * Copyright (c) 2004,2009 Anders Magnusson. All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * Tokenizer for the C preprocessor.
30  * There are three main routines:
31  *	- fastscan() loops over the input stream searching for magic
32  *		characters that may require actions.
33  *	- yylex() returns something from the input stream that
34  *		is suitable for yacc.
35  *
36  *	Other functions of common use:
37  *	- inpch() returns a raw character from the current input stream.
38  *	- inch() is like inpch but \\n and trigraphs are expanded.
39  *	- unch() pushes back a character to the input stream.
40  *
41  * Input data can be read from either stdio or a buffer.
42  * If a buffer is read, it will return EOF when ended and then jump back
43  * to the previous buffer.
44  *	- setibuf(usch *ptr). Buffer to read from, until NULL, return EOF.
45  *		When EOF returned, pop buffer.
46  *	- setobuf(usch *ptr).  Buffer to write to
47  *
48  * There are three places data is read:
49  *	- fastscan() which has a small loop that will scan over input data.
50  *	- flscan() where everything is skipped except directives (flslvl)
51  *	- inch() that everything else uses.
52  *
53  * 5.1.1.2 Translation phases:
54  *	1) Convert UCN to UTF-8 which is what pcc uses internally (chkucn).
55  *	   Remove \r (unwanted)
56  *	   Convert trigraphs (chktg)
57  *	2) Remove \\\n.  Need extra care for identifiers and #line.
58  *	3) Tokenize.
59  *	   Remove comments (fastcmnt)
60  */
61 
62 #include "config.h"
63 
64 #include <stdlib.h>
65 #include <string.h>
66 #ifdef HAVE_UNISTD_H
67 #include <unistd.h>
68 #endif
69 #include <fcntl.h>
70 
71 #include "compat.h"
72 #include "cpp.h"
73 
74 static void cvtdig(usch **);
75 static int dig2num(int);
76 static int charcon(usch **);
77 static void elsestmt(void);
78 static void ifdefstmt(void);
79 static void ifndefstmt(void);
80 static void endifstmt(void);
81 static void ifstmt(void);
82 static void cpperror(void);
83 static void cppwarning(void);
84 static void undefstmt(void);
85 static void pragmastmt(void);
86 static void elifstmt(void);
87 
88 static int inpch(void);
89 static int chktg(void);
90 static int chkucn(void);
91 static void unch(int c);
92 
93 #define	PUTCH(ch) if (!flslvl) putch(ch)
94 /* protection against recursion in #include */
95 #define MAX_INCLEVEL	100
96 static int inclevel;
97 
98 struct includ *ifiles;
99 
100 /* some common special combos for init */
101 #define C_NL	(C_SPEC|C_WSNL)
102 #define C_DX	(C_SPEC|C_ID|C_DIGIT|C_HEX)
103 #define C_I	(C_SPEC|C_ID|C_ID0)
104 #define C_IP	(C_SPEC|C_ID|C_ID0|C_EP)
105 #define C_IX	(C_SPEC|C_ID|C_ID0|C_HEX)
106 #define C_IXE	(C_SPEC|C_ID|C_ID0|C_HEX|C_EP)
107 
108 usch spechr[256] = {
109 	0,	0,	0,	0,	C_SPEC,	C_SPEC,	0,	0,
110 	0,	C_WSNL,	C_NL,	0,	0,	C_WSNL,	0,	0,
111 	0,	0,	0,	0,	0,	0,	0,	0,
112 	0,	0,	0,	0,	0,	0,	0,	0,
113 
114 	C_WSNL,	C_2,	C_SPEC,	0,	0,	0,	C_2,	C_SPEC,
115 	0,	0,	0,	C_2,	0,	C_2,	0,	C_SPEC,
116 	C_DX,	C_DX,	C_DX,	C_DX,	C_DX,	C_DX,	C_DX,	C_DX,
117 	C_DX,	C_DX,	0,	0,	C_2,	C_2,	C_2,	C_SPEC,
118 
119 	0,	C_IX,	C_IX,	C_IX,	C_IX,	C_IXE,	C_IX,	C_I,
120 	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,
121 	C_IP,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,
122 	C_I,	C_I,	C_I,	0,	C_SPEC,	0,	0,	C_I,
123 
124 	0,	C_IX,	C_IX,	C_IX,	C_IX,	C_IXE,	C_IX,	C_I,
125 	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,
126 	C_IP,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,
127 	C_I,	C_I,	C_I,	0,	C_2,	0,	0,	0,
128 
129 /* utf-8 */
130 	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,
131 	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,
132 	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,
133 	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,
134 
135 	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,
136 	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,
137 	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,
138 	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,
139 
140 	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,
141 	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,
142 	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,
143 	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,
144 
145 	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,
146 	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,
147 	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,
148 	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,	C_I,
149 };
150 
151 /*
152  * fill up the input buffer
153  */
154 static int
inpbuf(void)155 inpbuf(void)
156 {
157 	int len;
158 
159 	if (ifiles->infil == -1)
160 		return 0;
161 	len = read(ifiles->infil, ifiles->buffer, CPPBUF);
162 	if (len == -1)
163 		error("read error on file %s", ifiles->orgfn);
164 	if (len > 0) {
165 		ifiles->buffer[len] = 0;
166 		ifiles->curptr = ifiles->buffer;
167 		ifiles->maxread = ifiles->buffer + len;
168 	}
169 	return len;
170 }
171 
172 /*
173  * Fillup input buffer to contain at least minsz characters.
174  */
175 static int
refill(int minsz)176 refill(int minsz)
177 {
178 	usch *dp;
179 	int i, sz;
180 
181 	if (ifiles->curptr+minsz < ifiles->maxread)
182 		return 0; /* already enough in input buffer */
183 
184 	sz = ifiles->maxread - ifiles->curptr;
185 	dp = ifiles->buffer - sz;
186 	for (i = 0; i < sz; i++)
187 		dp[i] = ifiles->curptr[i];
188 	i = inpbuf();
189 	ifiles->curptr = dp;
190 	if (i == 0) {
191 		ifiles->maxread = ifiles->buffer;
192 		ifiles->buffer[0] = 0;
193 	}
194 	return 0;
195 }
196 #define	REFILL(x) if (ifiles->curptr+x >= ifiles->maxread) refill(x)
197 
198 /*
199  * return a raw character from the input stream
200  */
201 static inline int
inpch(void)202 inpch(void)
203 {
204 
205 	do {
206 		if (ifiles->curptr < ifiles->maxread)
207 			return *ifiles->curptr++;
208 	} while (inpbuf() > 0);
209 
210 	return -1;
211 }
212 
213 /*
214  * push a character back to the input stream
215  */
216 static void
unch(int c)217 unch(int c)
218 {
219 	if (c == -1)
220 		return;
221 
222 	ifiles->curptr--;
223 	if (ifiles->curptr < ifiles->bbuf)
224 		error("pushback buffer full");
225 	*ifiles->curptr = (usch)c;
226 }
227 
228 /*
229  * Check for (and convert) trigraphs.
230  */
231 static int
chktg(void)232 chktg(void)
233 {
234 	int ch;
235 
236 	if ((ch = inpch()) != '?') {
237 		unch(ch);
238 		return 0;
239 	}
240 
241 	switch (ch = inpch()) {
242 	case '=':  return '#';
243 	case '(':  return '[';
244 	case ')':  return ']';
245 	case '<':  return '{';
246 	case '>':  return '}';
247 	case '/':  return '\\';
248 	case '\'': return '^';
249 	case '!':  return '|';
250 	case '-':  return '~';
251 	}
252 
253 	unch(ch);
254 	unch('?');
255 	return 0;
256 }
257 
258 /*
259  * 5.1.1.2 Translation phase 1.
260  */
261 static int
inc1(void)262 inc1(void)
263 {
264 	int ch, c2;
265 
266 	do {
267 		ch = inpch();
268 	} while (ch == '\r' || (ch == '\\' && chkucn()));
269 	if (ch == '?' && (c2 = chktg()))
270 		ch = c2;
271 	return ch;
272 }
273 
274 
275 /*
276  * 5.1.1.2 Translation phase 2.
277  */
278 int
inc2(void)279 inc2(void)
280 {
281 	int ch, c2;
282 
283 	if ((ch = inc1()) != '\\')
284 		return ch;
285 	if ((c2 = inc1()) == '\n') {
286 		ifiles->escln++;
287 		ch = inc2();
288 	} else
289 		unch(c2);
290 	return ch;
291 }
292 
293 static int incmnt;
294 /*
295  * deal with comments in the fast scanner.
296  * ps prints out the initial '/' if failing to batch comment.
297  */
298 static int
fastcmnt(int ps)299 fastcmnt(int ps)
300 {
301 	int ch, rv = 1;
302 
303 	incmnt = 1;
304 	if ((ch = inc2()) == '/') { /* C++ comment */
305 		while ((ch = inc2()) != '\n')
306 			;
307 		unch(ch);
308 	} else if (ch == '*') {
309 		for (;;) {
310 			if ((ch = inc2()) < 0)
311 				break;
312 			if (ch == '*') {
313 				if ((ch = inc2()) == '/') {
314 					break;
315 				} else
316 					unch(ch);
317 			} else if (ch == '\n') {
318 				ifiles->lineno++;
319 				putch('\n');
320 			}
321 		}
322 	} else {
323 		if (ps) PUTCH('/'); /* XXX ? */
324 		unch(ch);
325 		rv = 0;
326         }
327 	if (ch < 0)
328 		error("file ends in comment");
329 	incmnt = 0;
330 	return rv;
331 }
332 
333 /*
334  * return next char, partly phase 3.
335  */
336 static int
inch(void)337 inch(void)
338 {
339 	int ch, n;
340 
341 	ch = inc2();
342 	n = ifiles->lineno;
343 	if (ch == '/' && Cflag == 0 && fastcmnt(0)) {
344 		/* Comments 5.1.1.2 p3 */
345 		/* no space if traditional or multiline */
346 		ch = (tflag || n != ifiles->lineno) ? inch() : ' ';
347 	}
348 	return ch;
349 }
350 
351 /*
352  * check for universal-character-name on input, and
353  * unput to the pushback buffer encoded as UTF-8.
354  */
355 static int
chkucn(void)356 chkucn(void)
357 {
358 	unsigned long cp, m;
359 	int ch, n;
360 
361 	if (incmnt)
362 		return 0;
363 	if ((ch = inpch()) == -1)
364 		return 0;
365 	if (ch == 'u')
366 		n = 4;
367 	else if (ch == 'U')
368 		n = 8;
369 	else {
370 		unch(ch);
371 		return 0;
372 	}
373 
374 	cp = 0;
375 	while (n-- > 0) {
376 		if ((ch = inpch()) == -1 || (spechr[ch] & C_HEX) == 0) {
377 			warning("invalid universal character name");
378 			// XXX should actually unput the chars and return 0
379 			unch(ch); // XXX eof
380 			break;
381 		}
382 		cp = cp * 16 + dig2num(ch);
383 	}
384 
385 	if ((cp < 0xa0 && cp != 0x24 && cp != 0x40 && cp != 0x60)
386 	    || (cp >= 0xd800 && cp <= 0xdfff))	/* 6.4.3.2 */
387 		error("universal character name cannot be used");
388 
389 	if (cp > 0x7fffffff)
390 		error("universal character name out of range");
391 
392 	n = 0;
393 	m = 0x7f;
394 	while (cp > m) {
395 		unch(0x80 | (cp & 0x3f));
396 		cp >>= 6;
397 		m >>= (n++ ? 1 : 2);
398 	}
399 	unch(((m << 1) ^ 0xfe) | cp);
400 	return 1;
401 }
402 
403 /*
404  * deal with comments when -C is active.
405  * Save comments in expanded macros???
406  */
407 int
Ccmnt(void (* d)(int))408 Ccmnt(void (*d)(int))
409 {
410 	int ch;
411 
412 	if ((ch = inch()) == '/') { /* C++ comment */
413 		d(ch);
414 		do {
415 			d(ch);
416 		} while ((ch = inch()) != '\n');
417 		unch(ch);
418 		return 1;
419 	} else if (ch == '*') {
420 		d('/');
421 		d('*');
422 		for (;;) {
423 			ch = inch();
424 			d(ch);
425 			if (ch == '*') {
426 				if ((ch = inch()) == '/') {
427 					d(ch);
428 					return 1;
429 				} else
430 					unch(ch);
431 			} else if (ch == '\n') {
432 				ifiles->lineno++;
433 			}
434 		}
435 	}
436 	d('/');
437         unch(ch);
438         return 0;
439 }
440 
441 /*
442  * Traverse over spaces and comments from the input stream,
443  * Returns first non-space character.
444  */
445 static int
fastspc(void)446 fastspc(void)
447 {
448 	int ch;
449 
450 	while ((ch = inch()), ISWS(ch))
451 		;
452 	return ch;
453 }
454 
455 /*
456  * As above but only between \n and #.
457  */
458 static int
fastspcg(void)459 fastspcg(void)
460 {
461 	int ch, c2;
462 
463 	while ((ch = inch()) == '/' || ch == '%' || ISWS(ch)) {
464 		if (ch == '%') {
465 			if ((c2 = inch()) == ':')
466 				ch = '#'; /* digraphs */
467 			else
468 				unch(c2);
469 			break;
470 		}
471 		if (ch == '/') {
472 			if (Cflag)
473 				return ch;
474 			if (fastcmnt(0) == 0)
475 				break;
476 			putch(' ');
477 		} else
478 			putch(ch);
479 	}
480 	return ch;
481 }
482 
483 /*
484  * readin chars and store on heap. Warn about too long names.
485  */
486 usch *
heapid(int ch)487 heapid(int ch)
488 {
489 	usch *bp = stringbuf;
490 	do {
491 		savch(ch);
492 	} while (spechr[ch = inch()] & C_ID);
493 	savch(0);
494 	unch(ch);
495 	return bp;
496 }
497 
498 /*
499  * get a string or character constant and save it as given by d.
500  */
501 void
faststr(int bc,void (* d)(int))502 faststr(int bc, void (*d)(int))
503 {
504 	int ch;
505 
506 	incmnt = 1;
507 	d(bc);
508 	while ((ch = inc2()) != bc) {
509 		if (ch == '\n') {
510 			warning("unterminated literal");
511 			incmnt = 0;
512 			unch(ch);
513 			return;
514 		}
515 		if (ch < 0)
516 			return;
517 		if (ch == '\\') {
518 			incmnt = 0;
519 			if (chkucn())
520 				continue;
521 			incmnt = 1;
522 			d(ch);
523 			ch = inc2();
524 		}
525 		d(ch);
526 	}
527 	d(ch);
528 	incmnt = 0;
529 }
530 
531 /*
532  * get a preprocessing number and save it as given by d.
533  * Initial char ch is always stored.
534  * returns first non-pp-number char.
535  *
536  *	pp-number:	digit
537  *			. digit
538  *			pp-number digit
539  *			pp-number identifier-nondigit
540  *			pp-number e sign
541  *			pp-number E sign
542  *			pp-number p sign
543  *			pp-number P sign
544  *			pp-number .
545  */
546 int
fastnum(int ch,void (* d)(int))547 fastnum(int ch, void (*d)(int))
548 {
549 	int c2;
550 
551 	if ((spechr[ch] & C_DIGIT) == 0) {
552 		/* not digit, dot */
553 		d(ch);
554 		ch = inch();
555 		if ((spechr[ch] & C_DIGIT) == 0)
556 			return ch;
557 	}
558 	for (;;) {
559 		d(ch);
560 		if ((ch = inch()) < 0)
561 			return -1;
562 		if ((spechr[ch] & C_EP)) {
563 			if ((c2 = inch()) != '-' && c2 != '+') {
564 				if (c2 >= 0)
565 					unch(c2);
566 				break;
567 			}
568 			d(ch);
569 			ch = c2;
570 		} else if (ch == '.' || (spechr[ch] & C_ID)) {
571 			continue;
572 		} else
573 			break;
574 	}
575 	return ch;
576 }
577 
578 /*
579  * Scan quickly the input file searching for:
580  *	- '#' directives
581  *	- keywords (if not flslvl)
582  *	- comments
583  *
584  *	Handle strings, numbers and trigraphs with care.
585  *	Only data from pp files are scanned here, never any rescans.
586  *	This loop is always at trulvl.
587  */
588 static void
fastscan(void)589 fastscan(void)
590 {
591 	struct symtab *nl;
592 	int ch, c2, i, nch;
593 	usch *cp, *cp2;
594 
595 	goto run;
596 
597 	for (;;) {
598 		/* tight loop to find special chars */
599 		/* should use getchar/putchar here */
600 		for (;;) {
601 			if (ifiles->curptr < ifiles->maxread) {
602 				ch = *ifiles->curptr++;
603 			} else {
604 				if (inpbuf() > 0)
605 					continue;
606 				return;
607 			}
608 xloop:			if (ch < 0)
609 				return; /* EOF */
610 			if ((spechr[ch] & C_SPEC) != 0)
611 				break;
612 			putch(ch);
613 		}
614 
615 		REFILL(2);
616 		nch = *ifiles->curptr;
617 		switch (ch) {
618 		case WARN:
619 		case CONC:
620 			error("bad char passed");
621 			break;
622 
623 		case '/': /* Comments */
624 			if (nch != '/' && nch != '*') {
625 				putch(ch);
626 				continue;
627 			}
628 			if (Cflag == 0) {
629 				if (fastcmnt(1))
630 					putch(' '); /* 5.1.1.2 p3 */
631 			} else
632 				Ccmnt(putch);
633 			break;
634 
635 		case '\n': /* newlines, for pp directives */
636 			/* take care of leftover \n */
637 			i = ifiles->escln + 1;
638 			ifiles->lineno += i;
639 			ifiles->escln = 0;
640 			while (i-- > 0)
641 				putch('\n');
642 
643 			/* search for a # */
644 run:			while ((ch = inch()) == '\t' || ch == ' ')
645 				putch(ch);
646 			if (ch == '%') {
647 				if ((c2 = inch()) != ':')
648 					unch(c2);
649 				else
650 					ch = '#';
651 			}
652 			if (ch  == '#')
653 				ppdir();
654 			else
655 				goto xloop;
656 			break;
657 
658 		case '?':
659 			if (nch == '?' && (ch = chktg()))
660 				goto xloop;
661 			putch('?');
662 			break;
663 
664 		case '\'': /* character constant */
665 			if (tflag) {
666 				putch(ch);
667 				break;	/* character constants ignored */
668 			}
669 			/* FALLTHROUGH */
670 		case '\"': /* strings */
671 			faststr(ch, putch);
672 			break;
673 
674 		case '.':  /* for pp-number */
675 		case '0': case '1': case '2': case '3': case '4':
676 		case '5': case '6': case '7': case '8': case '9':
677 			ch = fastnum(ch, putch);
678 			goto xloop;
679 
680 		case 'u':
681 			if (nch == '8' && ifiles->curptr[1] == '\"') {
682 				putch(ch);
683 				break;
684 			}
685 			/* FALLTHROUGH */
686 		case 'L':
687 		case 'U':
688 			if (nch == '\"' || nch == '\'') {
689 				putch(ch);
690 				break;
691 			}
692 			/* FALLTHROUGH */
693 		default:
694 #ifdef PCC_DEBUG
695 			if ((spechr[ch] & C_ID) == 0)
696 				error("fastscan");
697 #endif
698 		ident:
699 			if (flslvl)
700 				error("fastscan flslvl");
701 			cp = stringbuf;
702 			heapid(ch);
703 			stringbuf = cp;
704 			if ((nl = lookup(cp, FIND))) {
705 				if (kfind(nl)) {
706 					if (*cp == '-' || *cp == '+')
707 						putch(' ');
708 					putstr(cp);
709 					for (cp2 = cp; *cp2; cp2++)
710 						;
711 					if (cp2[-1] == '-' || cp2[-1] == '+')
712 						putch(' ');
713 				}
714 			} else
715 				putstr(cp);
716 			stringbuf = cp;
717 			break;
718 
719 		case '\\':
720 			if (nch == '\n') {
721 				ifiles->escln++;
722 				ifiles->curptr++;
723 				break;
724 			}
725 			if (chkucn()) {
726 				ch = inch();
727 				goto ident;
728 			}
729 			putch('\\');
730 			break;
731 		}
732 	}
733 
734 /*eof:*/	warning("unexpected EOF");
735 	putch('\n');
736 }
737 
738 /*
739  * Store an if/elif line on heap for parsing, evaluate macros and
740  * call yyparse().
741  */
742 static usch *yyinp;
743 int inexpr;
744 static int
exprline(void)745 exprline(void)
746 {
747 	struct symtab *nl;
748 	int oCflag = Cflag;
749 	usch *cp, *bp = stringbuf;
750 	int c, d, ifdef;
751 
752 	Cflag = ifdef = 0;
753 
754 	while ((c = inch()) != '\n') {
755 		if (c == '\'' || c == '\"') {
756 			faststr(c, savch);
757 			continue;
758 		}
759 		if (ISDIGIT(c) || c == '.') {
760 			c = fastnum(c, savch);
761 			if (c == '\n')
762 				break;
763 			unch(c);
764 			continue;
765 		}
766 		if (c == 'L' || c == 'u' || c == 'U') {
767 			unch(d = inch());
768 			if (d == '\'')	/* discard wide designator */
769 				continue;
770 		}
771 		if (ISID0(c)) {
772 			cp = heapid(c);
773 			stringbuf = cp;
774 			nl = lookup(cp, FIND);
775 			if (strcmp((char *)cp, "defined") == 0) {
776 				ifdef = 1;
777 			} else if (ifdef) {
778 				savch(nl ? '1' : '0');
779 				ifdef = 0;
780 			} else if (nl != NULL) {
781 				inexpr = 1;
782 				if (kfind(nl)) {
783 					while (*stringbuf)
784 						stringbuf++;
785 				} else
786 					savch('0');
787 				inexpr = 0;
788 			} else
789 				savch('0');
790 		} else
791 			savch(c);
792 	}
793 	savch(0);
794 	unch('\n');
795 	yyinp = bp;
796 	c = yyparse();
797 	stringbuf = bp;
798 	Cflag = oCflag;
799 	return c;
800 }
801 
802 int
yylex(void)803 yylex(void)
804 {
805 	int ch, c2, t;
806 
807 	while ((ch = *yyinp++) == ' ' || ch == '\t')
808 		;
809 	t = ISDIGIT(ch) ? NUMBER : ch;
810 	if (ch < 128 && (spechr[ch] & C_2))
811 		c2 = *yyinp++;
812 	else
813 		c2 = 0;
814 
815 	switch (t) {
816 	case 0: return WARN;
817 	case '=':
818 		if (c2 == '=') return EQ;
819 		break;
820 	case '!':
821 		if (c2 == '=') return NE;
822 		break;
823 	case '|':
824 		if (c2 == '|') return OROR;
825 		break;
826 	case '&':
827 		if (c2 == '&') return ANDAND;
828 		break;
829 	case '<':
830 		if (c2 == '<') return LS;
831 		if (c2 == '=') return LE;
832 		break;
833 	case '>':
834 		if (c2 == '>') return RS;
835 		if (c2 == '=') return GE;
836 		break;
837 	case '+':
838 	case '-':
839 		if (ch == c2)
840 			error("invalid preprocessor operator %c%c", ch, c2);
841 		break;
842 
843 	case '\'':
844 		yynode.op = NUMBER;
845 		yynode.nd_val = charcon(&yyinp);
846 		return NUMBER;
847 
848 	case NUMBER:
849 		cvtdig(&yyinp);
850 		return NUMBER;
851 
852 	default:
853 		if (ISID0(t)) {
854 			yyinp--;
855 			while (ISID(*yyinp))
856 				yyinp++;
857 			yynode.nd_val = 0;
858 			return NUMBER;
859 		}
860 		return ch;
861 	}
862 	yyinp--;
863 	return ch;
864 }
865 
866 /*
867  * Let the command-line args be faked defines at beginning of file.
868  */
869 static void
prinit(struct initar * it,struct includ * ic)870 prinit(struct initar *it, struct includ *ic)
871 {
872 	const char *pre, *post;
873 	char *a;
874 
875 	if (it->next)
876 		prinit(it->next, ic);
877 	pre = post = NULL; /* XXX gcc */
878 	switch (it->type) {
879 	case 'D':
880 		pre = "#define ";
881 		if ((a = strchr(it->str, '=')) != NULL) {
882 			*a = ' ';
883 			post = "\n";
884 		} else
885 			post = " 1\n";
886 		break;
887 	case 'U':
888 		pre = "#undef ";
889 		post = "\n";
890 		break;
891 	case 'i':
892 		pre = "#include \"";
893 		post = "\"\n";
894 		break;
895 	default:
896 		error("prinit");
897 	}
898 	strlcat((char *)ic->buffer, pre, CPPBUF+1);
899 	strlcat((char *)ic->buffer, it->str, CPPBUF+1);
900 	if (strlcat((char *)ic->buffer, post, CPPBUF+1) >= CPPBUF+1)
901 		error("line exceeds buffer size");
902 
903 	ic->lineno--;
904 	while (*ic->maxread)
905 		ic->maxread++;
906 }
907 
908 /*
909  * A new file included.
910  * If ifiles == NULL, this is the first file and already opened (stdin).
911  * Return 0 on success, -1 if file to be included is not found.
912  */
913 int
pushfile(const usch * file,const usch * fn,int idx,void * incs)914 pushfile(const usch *file, const usch *fn, int idx, void *incs)
915 {
916 	extern struct initar *initar;
917 	struct includ ibuf;
918 	struct includ *ic;
919 	int otrulvl;
920 
921 	ic = &ibuf;
922 	ic->next = ifiles;
923 
924 	if (file != NULL) {
925 		if ((ic->infil = open((const char *)file, O_RDONLY)) < 0)
926 			return -1;
927 		ic->orgfn = ic->fname = file;
928 		if (++inclevel > MAX_INCLEVEL)
929 			error("limit for nested includes exceeded");
930 	} else {
931 		ic->infil = 0;
932 		ic->orgfn = ic->fname = (const usch *)"<stdin>";
933 	}
934 #ifndef BUF_STACK
935 	ic->bbuf = malloc(BBUFSZ);
936 #endif
937 	ic->buffer = ic->bbuf+PBMAX;
938 	ic->curptr = ic->buffer;
939 	ifiles = ic;
940 	ic->lineno = 1;
941 	ic->escln = 0;
942 	ic->maxread = ic->curptr;
943 	ic->idx = idx;
944 	ic->incs = incs;
945 	ic->fn = fn;
946 	prtline(1);
947 	if (initar) {
948 		int oin = ic->infil;
949 		ic->infil = -1;
950 		*ic->maxread = 0;
951 		prinit(initar, ic);
952 		initar = NULL;
953 		if (dMflag)
954 			printf("%s", (char *)ic->buffer);
955 		fastscan();
956 		prtline(1);
957 		ic->infil = oin;
958 	}
959 
960 	otrulvl = trulvl;
961 
962 	fastscan();
963 
964 	if (otrulvl != trulvl || flslvl)
965 		error("unterminated conditional");
966 
967 #ifndef BUF_STACK
968 	free(ic->bbuf);
969 #endif
970 	ifiles = ic->next;
971 	close(ic->infil);
972 	inclevel--;
973 	return 0;
974 }
975 
976 /*
977  * Print current position to output file.
978  */
979 void
prtline(int nl)980 prtline(int nl)
981 {
982 	usch *sb = stringbuf;
983 
984 	if (Mflag) {
985 		if (dMflag)
986 			return; /* no output */
987 		if (ifiles->lineno == 1 &&
988 		    (MMDflag == 0 || ifiles->idx != SYSINC)) {
989 			printf("%s: %s\n", Mfile, ifiles->fname);
990 			if (MPflag &&
991 			    strcmp((const char *)ifiles->fname, (char *)MPfile))
992 				printf("%s:\n", ifiles->fname);
993 		}
994 	} else if (!Pflag) {
995 		sheap("\n# %d \"%s\"", ifiles->lineno, ifiles->fname);
996 		if (ifiles->idx == SYSINC)
997 			sheap(" 3");
998 		if (nl) sheap("\n");
999 		putstr(sb);
1000 	}
1001 	stringbuf = sb;
1002 }
1003 
1004 void
cunput(int c)1005 cunput(int c)
1006 {
1007 #ifdef PCC_DEBUG
1008 //	if (dflag)printf(": '%c'(%d)\n", c > 31 ? c : ' ', c);
1009 #endif
1010 	unch(c);
1011 }
1012 
1013 static int
dig2num(int c)1014 dig2num(int c)
1015 {
1016 	if (c >= 'a')
1017 		c = c - 'a' + 10;
1018 	else if (c >= 'A')
1019 		c = c - 'A' + 10;
1020 	else
1021 		c = c - '0';
1022 	return c;
1023 }
1024 
1025 /*
1026  * Convert string numbers to unsigned long long and check overflow.
1027  */
1028 static void
cvtdig(usch ** yyp)1029 cvtdig(usch **yyp)
1030 {
1031 	unsigned long long rv = 0;
1032 	unsigned long long rv2 = 0;
1033 	usch *y = *yyp;
1034 	int rad;
1035 
1036 	y--;
1037 	rad = *y != '0' ? 10 : y[1] == 'x' ||  y[1] == 'X' ? 16 : 8;
1038 	if (rad == 16)
1039 		y += 2;
1040 	while ((spechr[*y] & C_HEX)) {
1041 		rv = rv * rad + dig2num(*y);
1042 		/* check overflow */
1043 		if (rv / rad < rv2)
1044 			error("constant is out of range");
1045 		rv2 = rv;
1046 		y++;
1047 	}
1048 	yynode.op = NUMBER;
1049 	while (*y == 'l' || *y == 'L' || *y == 'u' || *y == 'U') {
1050 		if (*y == 'u' || *y == 'U')
1051 			yynode.op = UNUMBER;
1052 		y++;
1053 	}
1054 	yynode.nd_uval = rv;
1055 	if ((rad == 8 || rad == 16) && yynode.nd_val < 0)
1056 		yynode.op = UNUMBER;
1057 	if (yynode.op == NUMBER && yynode.nd_val < 0)
1058 		/* too large for signed, see 6.4.4.1 */
1059 		error("constant is out of range");
1060 	*yyp = y;
1061 }
1062 
1063 static int
charcon(usch ** yyp)1064 charcon(usch **yyp)
1065 {
1066 	int val, c;
1067 	usch *p = *yyp;
1068 
1069 	val = 0;
1070 	if (*p++ == '\\') {
1071 		switch (*p++) {
1072 		case 'a': val = '\a'; break;
1073 		case 'b': val = '\b'; break;
1074 		case 'f': val = '\f'; break;
1075 		case 'n': val = '\n'; break;
1076 		case 'r': val = '\r'; break;
1077 		case 't': val = '\t'; break;
1078 		case 'v': val = '\v'; break;
1079 		case '\"': val = '\"'; break;
1080 		case '\'': val = '\''; break;
1081 		case '\\': val = '\\'; break;
1082 		case 'x':
1083 			while ((spechr[c = *p] & C_HEX)) {
1084 				val = val * 16 + dig2num(c);
1085 				p++;
1086 			}
1087 			break;
1088 		case '0': case '1': case '2': case '3': case '4':
1089 		case '5': case '6': case '7':
1090 			p--;
1091 			while ((spechr[c = *p] & C_DIGIT)) {
1092 				val = val * 8 + (c - '0');
1093 				p++;
1094 			}
1095 			break;
1096 		default: val = p[-1];
1097 		}
1098 
1099 	} else
1100 		val = p[-1];
1101 	if (*p != '\'')
1102 		error("bad charcon");
1103 	*yyp = ++p;
1104 	return val;
1105 }
1106 
1107 static void
chknl(int ignore)1108 chknl(int ignore)
1109 {
1110 	void (*f)(const char *, ...);
1111 	int t;
1112 
1113 	f = ignore ? warning : error;
1114 	if ((t = fastspc()) != '\n') {
1115 		if (t) {
1116 			f("newline expected");
1117 			/* ignore rest of line */
1118 			while ((t = inch()) >= 0 && t != '\n')
1119 				;
1120 		} else
1121 			f("no newline at end of file");
1122 	}
1123 	unch(t);
1124 }
1125 
1126 static void
elsestmt(void)1127 elsestmt(void)
1128 {
1129 	if (flslvl) {
1130 		if (elflvl > trulvl)
1131 			;
1132 		else if (--flslvl!=0)
1133 			flslvl++;
1134 		else
1135 			trulvl++;
1136 	} else if (trulvl) {
1137 		flslvl++;
1138 		trulvl--;
1139 	} else
1140 		error("#else in non-conditional section");
1141 	if (elslvl==trulvl+flslvl)
1142 		error("too many #else");
1143 	elslvl=trulvl+flslvl;
1144 	chknl(1);
1145 }
1146 
1147 static void
ifdefstmt(void)1148 ifdefstmt(void)
1149 {
1150 	usch *bp;
1151 	int ch;
1152 
1153 	if (!ISID0(ch = fastspc()))
1154 		error("bad #ifdef");
1155 	bp = heapid(ch);
1156 	stringbuf = bp;
1157 
1158 	if (lookup(bp, FIND) == NULL)
1159 		flslvl++;
1160 	else
1161 		trulvl++;
1162 	chknl(0);
1163 }
1164 
1165 static void
ifndefstmt(void)1166 ifndefstmt(void)
1167 {
1168 	usch *bp;
1169 	int ch;
1170 
1171 	if (!ISID0(ch = fastspc()))
1172 		error("bad #ifndef");
1173 	bp = heapid(ch);
1174 	stringbuf = bp;
1175 	if (lookup(bp, FIND) != NULL)
1176 		flslvl++;
1177 	else
1178 		trulvl++;
1179 	chknl(0);
1180 }
1181 
1182 static void
endifstmt(void)1183 endifstmt(void)
1184 {
1185 	if (flslvl)
1186 		flslvl--;
1187 	else if (trulvl)
1188 		trulvl--;
1189 	else
1190 		error("#endif in non-conditional section");
1191 	if (flslvl == 0)
1192 		elflvl = 0;
1193 	elslvl = 0;
1194 	chknl(1);
1195 }
1196 
1197 static void
ifstmt(void)1198 ifstmt(void)
1199 {
1200 	exprline() ? trulvl++ : flslvl++;
1201 }
1202 
1203 static void
elifstmt(void)1204 elifstmt(void)
1205 {
1206 	if (flslvl == 0)
1207 		elflvl = trulvl;
1208 	if (flslvl) {
1209 		if (elflvl > trulvl)
1210 			;
1211 		else if (--flslvl!=0)
1212 			flslvl++;
1213 		else if (exprline())
1214 			trulvl++;
1215 		else
1216 			flslvl++;
1217 	} else if (trulvl) {
1218 		flslvl++;
1219 		trulvl--;
1220 	} else
1221 		error("#elif in non-conditional section");
1222 }
1223 
1224 /* save line into stringbuf */
1225 static usch *
savln(void)1226 savln(void)
1227 {
1228 	int c;
1229 	usch *cp = stringbuf;
1230 
1231 	while ((c = inch()) != -1) {
1232 		if (c == '\n') {
1233 			unch(c);
1234 			break;
1235 		}
1236 		savch(c);
1237 	}
1238 	savch(0);
1239 
1240 	return cp;
1241 }
1242 
1243 static void
cpperror(void)1244 cpperror(void)
1245 {
1246 	usch *cp;
1247 
1248 	cp = savln();
1249 	error("#error %s", cp);
1250 	stringbuf = cp;
1251 }
1252 
1253 static void
cppwarning(void)1254 cppwarning(void)
1255 {
1256 	usch *cp;
1257 
1258 	cp = savln();
1259 	warning("#warning %s", cp);
1260 	stringbuf = cp;
1261 }
1262 
1263 static void
undefstmt(void)1264 undefstmt(void)
1265 {
1266 	struct symtab *np;
1267 	usch *bp;
1268 	int ch;
1269 
1270 	if (!ISID0(ch = fastspc()))
1271 		error("bad #undef");
1272 	bp = heapid(ch);
1273 	if ((np = lookup(bp, FIND)) != NULL)
1274 		np->value = 0;
1275 	stringbuf = bp;
1276 	chknl(0);
1277 }
1278 
1279 static void
identstmt(void)1280 identstmt(void)
1281 {
1282 	struct symtab *sp;
1283 	usch *bp;
1284 	int ch;
1285 
1286 	bp = stringbuf;
1287 	if (ISID0(ch = fastspc())) {
1288 		bp = heapid(ch);
1289 		stringbuf = bp;
1290 		if ((sp = lookup(bp, FIND)))
1291 			kfind(sp);
1292 		if (bp[0] != '\"')
1293 			goto bad;
1294 	} else if (ch == '\"') {
1295 		faststr(ch, savch);
1296 	} else
1297 		goto bad;
1298 	stringbuf = bp;
1299 	chknl(1);
1300 	return;
1301 bad:
1302 	error("bad #ident directive");
1303 }
1304 
1305 static void
pragmastmt(void)1306 pragmastmt(void)
1307 {
1308 	int ch;
1309 
1310 	putstr((const usch *)"\n#pragma");
1311 	while ((ch = inch()) != '\n' && ch > 0)
1312 		putch(ch);
1313 	unch(ch);
1314 	prtline(1);
1315 }
1316 
1317 int
cinput(void)1318 cinput(void)
1319 {
1320 
1321 	return inch();
1322 }
1323 
1324 #define	DIR_FLSLVL	001
1325 #define	DIR_FLSINC	002
1326 static struct {
1327 	const char *name;
1328 	void (*fun)(void);
1329 	int flags;
1330 } ppd[] = {
1331 	{ "ifndef", ifndefstmt, DIR_FLSINC },
1332 	{ "ifdef", ifdefstmt, DIR_FLSINC },
1333 	{ "if", ifstmt, DIR_FLSINC },
1334 	{ "include", include, 0 },
1335 	{ "else", elsestmt, DIR_FLSLVL },
1336 	{ "endif", endifstmt, DIR_FLSLVL },
1337 	{ "error", cpperror, 0 },
1338 	{ "warning", cppwarning, 0 },
1339 	{ "define", define, 0 },
1340 	{ "undef", undefstmt, 0 },
1341 	{ "line", line, 0 },
1342 	{ "pragma", pragmastmt, 0 },
1343 	{ "elif", elifstmt, DIR_FLSLVL },
1344 	{ "ident", identstmt, 0 },
1345 #ifdef GCC_COMPAT
1346 	{ "include_next", include_next, 0 },
1347 #endif
1348 };
1349 #define	NPPD	(int)(sizeof(ppd) / sizeof(ppd[0]))
1350 
1351 static void
skpln(void)1352 skpln(void)
1353 {
1354 	int ch;
1355 
1356 	/* just ignore the rest of the line */
1357 	while ((ch = inch()) != -1) {
1358 		if (ch == '\n') {
1359 			unch('\n');
1360 			break;
1361 		}
1362 	}
1363 }
1364 
1365 /*
1366  * do an even faster scan than fastscan while at flslvl.
1367  * just search for a new directive.
1368  */
1369 static void
flscan(void)1370 flscan(void)
1371 {
1372 	int ch;
1373 
1374 	for (;;) {
1375 		switch (ch = inch()) {
1376 		case -1:
1377 			return;
1378 		case '\n':
1379 			ifiles->lineno++;
1380 			putch('\n');
1381 			if ((ch = fastspcg()) == '#')
1382 				return;
1383 			unch(ch);
1384 			break;
1385 		case '/':
1386 			fastcmnt(0);	/* may be around directives */
1387 			break;
1388 		}
1389         }
1390 }
1391 
1392 
1393 /*
1394  * Handle a preprocessor directive.
1395  * # is already found.
1396  */
1397 void
ppdir(void)1398 ppdir(void)
1399 {
1400 	int ch, i, oldC;
1401 	usch *bp;
1402 
1403 	oldC = Cflag;
1404 redo:	Cflag = 0;
1405 	if ((ch = fastspc()) == '\n') { /* empty directive */
1406 		unch(ch);
1407 		Cflag = oldC;
1408 		return;
1409 	}
1410 	Cflag = oldC;
1411 	if ((spechr[ch] & C_ID0) == 0)
1412 		goto out;
1413 	bp = heapid(ch);
1414 	stringbuf = bp;
1415 
1416 	/* got some keyword */
1417 	for (i = 0; i < NPPD; i++) {
1418 		if (bp[0] == ppd[i].name[0] &&
1419 		    strcmp((char *)bp, ppd[i].name) == 0) {
1420 			if (flslvl == 0) {
1421 				(*ppd[i].fun)();
1422 				if (flslvl == 0)
1423 					return;
1424 			} else {
1425 				if (ppd[i].flags & DIR_FLSLVL) {
1426 					(*ppd[i].fun)();
1427 					if (flslvl == 0)
1428 						return;
1429 				}else if (ppd[i].flags & DIR_FLSINC)
1430 					flslvl++;
1431 			}
1432 			flscan();
1433 			goto redo;
1434 		}
1435 	}
1436 
1437 out:
1438 	if (flslvl == 0 && Aflag == 0)
1439 		error("invalid preprocessor directive");
1440 
1441 	unch(ch);
1442 	skpln();
1443 }
1444