1 /* Id: token.c,v 1.157 2016/01/09 09:53:44 ragge Exp */
2 /* $NetBSD: token.c,v 1.1.1.6 2016/02/09 20:28:44 plunky Exp $ */
3
4 /*
5 * Copyright (c) 2004,2009 Anders Magnusson. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 /*
29 * Tokenizer for the C preprocessor.
30 * There are three main routines:
31 * - fastscan() loops over the input stream searching for magic
32 * characters that may require actions.
33 * - yylex() returns something from the input stream that
34 * is suitable for yacc.
35 *
36 * Other functions of common use:
37 * - inpch() returns a raw character from the current input stream.
38 * - inch() is like inpch but \\n and trigraphs are expanded.
39 * - unch() pushes back a character to the input stream.
40 *
41 * Input data can be read from either stdio or a buffer.
42 * If a buffer is read, it will return EOF when ended and then jump back
43 * to the previous buffer.
44 * - setibuf(usch *ptr). Buffer to read from, until NULL, return EOF.
45 * When EOF returned, pop buffer.
46 * - setobuf(usch *ptr). Buffer to write to
47 *
48 * There are three places data is read:
49 * - fastscan() which has a small loop that will scan over input data.
50 * - flscan() where everything is skipped except directives (flslvl)
51 * - inch() that everything else uses.
52 *
53 * 5.1.1.2 Translation phases:
54 * 1) Convert UCN to UTF-8 which is what pcc uses internally (chkucn).
55 * Remove \r (unwanted)
56 * Convert trigraphs (chktg)
57 * 2) Remove \\\n. Need extra care for identifiers and #line.
58 * 3) Tokenize.
59 * Remove comments (fastcmnt)
60 */
61
62 #include "config.h"
63
64 #include <stdlib.h>
65 #include <string.h>
66 #ifdef HAVE_UNISTD_H
67 #include <unistd.h>
68 #endif
69 #include <fcntl.h>
70
71 #include "compat.h"
72 #include "cpp.h"
73
74 static void cvtdig(usch **);
75 static int dig2num(int);
76 static int charcon(usch **);
77 static void elsestmt(void);
78 static void ifdefstmt(void);
79 static void ifndefstmt(void);
80 static void endifstmt(void);
81 static void ifstmt(void);
82 static void cpperror(void);
83 static void cppwarning(void);
84 static void undefstmt(void);
85 static void pragmastmt(void);
86 static void elifstmt(void);
87
88 static int inpch(void);
89 static int chktg(void);
90 static int chkucn(void);
91 static void unch(int c);
92
93 #define PUTCH(ch) if (!flslvl) putch(ch)
94 /* protection against recursion in #include */
95 #define MAX_INCLEVEL 100
96 static int inclevel;
97
98 struct includ *ifiles;
99
100 /* some common special combos for init */
101 #define C_NL (C_SPEC|C_WSNL)
102 #define C_DX (C_SPEC|C_ID|C_DIGIT|C_HEX)
103 #define C_I (C_SPEC|C_ID|C_ID0)
104 #define C_IP (C_SPEC|C_ID|C_ID0|C_EP)
105 #define C_IX (C_SPEC|C_ID|C_ID0|C_HEX)
106 #define C_IXE (C_SPEC|C_ID|C_ID0|C_HEX|C_EP)
107
108 usch spechr[256] = {
109 0, 0, 0, 0, C_SPEC, C_SPEC, 0, 0,
110 0, C_WSNL, C_NL, 0, 0, C_WSNL, 0, 0,
111 0, 0, 0, 0, 0, 0, 0, 0,
112 0, 0, 0, 0, 0, 0, 0, 0,
113
114 C_WSNL, C_2, C_SPEC, 0, 0, 0, C_2, C_SPEC,
115 0, 0, 0, C_2, 0, C_2, 0, C_SPEC,
116 C_DX, C_DX, C_DX, C_DX, C_DX, C_DX, C_DX, C_DX,
117 C_DX, C_DX, 0, 0, C_2, C_2, C_2, C_SPEC,
118
119 0, C_IX, C_IX, C_IX, C_IX, C_IXE, C_IX, C_I,
120 C_I, C_I, C_I, C_I, C_I, C_I, C_I, C_I,
121 C_IP, C_I, C_I, C_I, C_I, C_I, C_I, C_I,
122 C_I, C_I, C_I, 0, C_SPEC, 0, 0, C_I,
123
124 0, C_IX, C_IX, C_IX, C_IX, C_IXE, C_IX, C_I,
125 C_I, C_I, C_I, C_I, C_I, C_I, C_I, C_I,
126 C_IP, C_I, C_I, C_I, C_I, C_I, C_I, C_I,
127 C_I, C_I, C_I, 0, C_2, 0, 0, 0,
128
129 /* utf-8 */
130 C_I, C_I, C_I, C_I, C_I, C_I, C_I, C_I,
131 C_I, C_I, C_I, C_I, C_I, C_I, C_I, C_I,
132 C_I, C_I, C_I, C_I, C_I, C_I, C_I, C_I,
133 C_I, C_I, C_I, C_I, C_I, C_I, C_I, C_I,
134
135 C_I, C_I, C_I, C_I, C_I, C_I, C_I, C_I,
136 C_I, C_I, C_I, C_I, C_I, C_I, C_I, C_I,
137 C_I, C_I, C_I, C_I, C_I, C_I, C_I, C_I,
138 C_I, C_I, C_I, C_I, C_I, C_I, C_I, C_I,
139
140 C_I, C_I, C_I, C_I, C_I, C_I, C_I, C_I,
141 C_I, C_I, C_I, C_I, C_I, C_I, C_I, C_I,
142 C_I, C_I, C_I, C_I, C_I, C_I, C_I, C_I,
143 C_I, C_I, C_I, C_I, C_I, C_I, C_I, C_I,
144
145 C_I, C_I, C_I, C_I, C_I, C_I, C_I, C_I,
146 C_I, C_I, C_I, C_I, C_I, C_I, C_I, C_I,
147 C_I, C_I, C_I, C_I, C_I, C_I, C_I, C_I,
148 C_I, C_I, C_I, C_I, C_I, C_I, C_I, C_I,
149 };
150
151 /*
152 * fill up the input buffer
153 */
154 static int
inpbuf(void)155 inpbuf(void)
156 {
157 int len;
158
159 if (ifiles->infil == -1)
160 return 0;
161 len = read(ifiles->infil, ifiles->buffer, CPPBUF);
162 if (len == -1)
163 error("read error on file %s", ifiles->orgfn);
164 if (len > 0) {
165 ifiles->buffer[len] = 0;
166 ifiles->curptr = ifiles->buffer;
167 ifiles->maxread = ifiles->buffer + len;
168 }
169 return len;
170 }
171
172 /*
173 * Fillup input buffer to contain at least minsz characters.
174 */
175 static int
refill(int minsz)176 refill(int minsz)
177 {
178 usch *dp;
179 int i, sz;
180
181 if (ifiles->curptr+minsz < ifiles->maxread)
182 return 0; /* already enough in input buffer */
183
184 sz = ifiles->maxread - ifiles->curptr;
185 dp = ifiles->buffer - sz;
186 for (i = 0; i < sz; i++)
187 dp[i] = ifiles->curptr[i];
188 i = inpbuf();
189 ifiles->curptr = dp;
190 if (i == 0) {
191 ifiles->maxread = ifiles->buffer;
192 ifiles->buffer[0] = 0;
193 }
194 return 0;
195 }
196 #define REFILL(x) if (ifiles->curptr+x >= ifiles->maxread) refill(x)
197
198 /*
199 * return a raw character from the input stream
200 */
201 static inline int
inpch(void)202 inpch(void)
203 {
204
205 do {
206 if (ifiles->curptr < ifiles->maxread)
207 return *ifiles->curptr++;
208 } while (inpbuf() > 0);
209
210 return -1;
211 }
212
213 /*
214 * push a character back to the input stream
215 */
216 static void
unch(int c)217 unch(int c)
218 {
219 if (c == -1)
220 return;
221
222 ifiles->curptr--;
223 if (ifiles->curptr < ifiles->bbuf)
224 error("pushback buffer full");
225 *ifiles->curptr = (usch)c;
226 }
227
228 /*
229 * Check for (and convert) trigraphs.
230 */
231 static int
chktg(void)232 chktg(void)
233 {
234 int ch;
235
236 if ((ch = inpch()) != '?') {
237 unch(ch);
238 return 0;
239 }
240
241 switch (ch = inpch()) {
242 case '=': return '#';
243 case '(': return '[';
244 case ')': return ']';
245 case '<': return '{';
246 case '>': return '}';
247 case '/': return '\\';
248 case '\'': return '^';
249 case '!': return '|';
250 case '-': return '~';
251 }
252
253 unch(ch);
254 unch('?');
255 return 0;
256 }
257
258 /*
259 * 5.1.1.2 Translation phase 1.
260 */
261 static int
inc1(void)262 inc1(void)
263 {
264 int ch, c2;
265
266 do {
267 ch = inpch();
268 } while (ch == '\r' || (ch == '\\' && chkucn()));
269 if (ch == '?' && (c2 = chktg()))
270 ch = c2;
271 return ch;
272 }
273
274
275 /*
276 * 5.1.1.2 Translation phase 2.
277 */
278 int
inc2(void)279 inc2(void)
280 {
281 int ch, c2;
282
283 if ((ch = inc1()) != '\\')
284 return ch;
285 if ((c2 = inc1()) == '\n') {
286 ifiles->escln++;
287 ch = inc2();
288 } else
289 unch(c2);
290 return ch;
291 }
292
293 static int incmnt;
294 /*
295 * deal with comments in the fast scanner.
296 * ps prints out the initial '/' if failing to batch comment.
297 */
298 static int
fastcmnt(int ps)299 fastcmnt(int ps)
300 {
301 int ch, rv = 1;
302
303 incmnt = 1;
304 if ((ch = inc2()) == '/') { /* C++ comment */
305 while ((ch = inc2()) != '\n')
306 ;
307 unch(ch);
308 } else if (ch == '*') {
309 for (;;) {
310 if ((ch = inc2()) < 0)
311 break;
312 if (ch == '*') {
313 if ((ch = inc2()) == '/') {
314 break;
315 } else
316 unch(ch);
317 } else if (ch == '\n') {
318 ifiles->lineno++;
319 putch('\n');
320 }
321 }
322 } else {
323 if (ps) PUTCH('/'); /* XXX ? */
324 unch(ch);
325 rv = 0;
326 }
327 if (ch < 0)
328 error("file ends in comment");
329 incmnt = 0;
330 return rv;
331 }
332
333 /*
334 * return next char, partly phase 3.
335 */
336 static int
inch(void)337 inch(void)
338 {
339 int ch, n;
340
341 ch = inc2();
342 n = ifiles->lineno;
343 if (ch == '/' && Cflag == 0 && fastcmnt(0)) {
344 /* Comments 5.1.1.2 p3 */
345 /* no space if traditional or multiline */
346 ch = (tflag || n != ifiles->lineno) ? inch() : ' ';
347 }
348 return ch;
349 }
350
351 /*
352 * check for universal-character-name on input, and
353 * unput to the pushback buffer encoded as UTF-8.
354 */
355 static int
chkucn(void)356 chkucn(void)
357 {
358 unsigned long cp, m;
359 int ch, n;
360
361 if (incmnt)
362 return 0;
363 if ((ch = inpch()) == -1)
364 return 0;
365 if (ch == 'u')
366 n = 4;
367 else if (ch == 'U')
368 n = 8;
369 else {
370 unch(ch);
371 return 0;
372 }
373
374 cp = 0;
375 while (n-- > 0) {
376 if ((ch = inpch()) == -1 || (spechr[ch] & C_HEX) == 0) {
377 warning("invalid universal character name");
378 // XXX should actually unput the chars and return 0
379 unch(ch); // XXX eof
380 break;
381 }
382 cp = cp * 16 + dig2num(ch);
383 }
384
385 if ((cp < 0xa0 && cp != 0x24 && cp != 0x40 && cp != 0x60)
386 || (cp >= 0xd800 && cp <= 0xdfff)) /* 6.4.3.2 */
387 error("universal character name cannot be used");
388
389 if (cp > 0x7fffffff)
390 error("universal character name out of range");
391
392 n = 0;
393 m = 0x7f;
394 while (cp > m) {
395 unch(0x80 | (cp & 0x3f));
396 cp >>= 6;
397 m >>= (n++ ? 1 : 2);
398 }
399 unch(((m << 1) ^ 0xfe) | cp);
400 return 1;
401 }
402
403 /*
404 * deal with comments when -C is active.
405 * Save comments in expanded macros???
406 */
407 int
Ccmnt(void (* d)(int))408 Ccmnt(void (*d)(int))
409 {
410 int ch;
411
412 if ((ch = inch()) == '/') { /* C++ comment */
413 d(ch);
414 do {
415 d(ch);
416 } while ((ch = inch()) != '\n');
417 unch(ch);
418 return 1;
419 } else if (ch == '*') {
420 d('/');
421 d('*');
422 for (;;) {
423 ch = inch();
424 d(ch);
425 if (ch == '*') {
426 if ((ch = inch()) == '/') {
427 d(ch);
428 return 1;
429 } else
430 unch(ch);
431 } else if (ch == '\n') {
432 ifiles->lineno++;
433 }
434 }
435 }
436 d('/');
437 unch(ch);
438 return 0;
439 }
440
441 /*
442 * Traverse over spaces and comments from the input stream,
443 * Returns first non-space character.
444 */
445 static int
fastspc(void)446 fastspc(void)
447 {
448 int ch;
449
450 while ((ch = inch()), ISWS(ch))
451 ;
452 return ch;
453 }
454
455 /*
456 * As above but only between \n and #.
457 */
458 static int
fastspcg(void)459 fastspcg(void)
460 {
461 int ch, c2;
462
463 while ((ch = inch()) == '/' || ch == '%' || ISWS(ch)) {
464 if (ch == '%') {
465 if ((c2 = inch()) == ':')
466 ch = '#'; /* digraphs */
467 else
468 unch(c2);
469 break;
470 }
471 if (ch == '/') {
472 if (Cflag)
473 return ch;
474 if (fastcmnt(0) == 0)
475 break;
476 putch(' ');
477 } else
478 putch(ch);
479 }
480 return ch;
481 }
482
483 /*
484 * readin chars and store on heap. Warn about too long names.
485 */
486 usch *
heapid(int ch)487 heapid(int ch)
488 {
489 usch *bp = stringbuf;
490 do {
491 savch(ch);
492 } while (spechr[ch = inch()] & C_ID);
493 savch(0);
494 unch(ch);
495 return bp;
496 }
497
498 /*
499 * get a string or character constant and save it as given by d.
500 */
501 void
faststr(int bc,void (* d)(int))502 faststr(int bc, void (*d)(int))
503 {
504 int ch;
505
506 incmnt = 1;
507 d(bc);
508 while ((ch = inc2()) != bc) {
509 if (ch == '\n') {
510 warning("unterminated literal");
511 incmnt = 0;
512 unch(ch);
513 return;
514 }
515 if (ch < 0)
516 return;
517 if (ch == '\\') {
518 incmnt = 0;
519 if (chkucn())
520 continue;
521 incmnt = 1;
522 d(ch);
523 ch = inc2();
524 }
525 d(ch);
526 }
527 d(ch);
528 incmnt = 0;
529 }
530
531 /*
532 * get a preprocessing number and save it as given by d.
533 * Initial char ch is always stored.
534 * returns first non-pp-number char.
535 *
536 * pp-number: digit
537 * . digit
538 * pp-number digit
539 * pp-number identifier-nondigit
540 * pp-number e sign
541 * pp-number E sign
542 * pp-number p sign
543 * pp-number P sign
544 * pp-number .
545 */
546 int
fastnum(int ch,void (* d)(int))547 fastnum(int ch, void (*d)(int))
548 {
549 int c2;
550
551 if ((spechr[ch] & C_DIGIT) == 0) {
552 /* not digit, dot */
553 d(ch);
554 ch = inch();
555 if ((spechr[ch] & C_DIGIT) == 0)
556 return ch;
557 }
558 for (;;) {
559 d(ch);
560 if ((ch = inch()) < 0)
561 return -1;
562 if ((spechr[ch] & C_EP)) {
563 if ((c2 = inch()) != '-' && c2 != '+') {
564 if (c2 >= 0)
565 unch(c2);
566 break;
567 }
568 d(ch);
569 ch = c2;
570 } else if (ch == '.' || (spechr[ch] & C_ID)) {
571 continue;
572 } else
573 break;
574 }
575 return ch;
576 }
577
578 /*
579 * Scan quickly the input file searching for:
580 * - '#' directives
581 * - keywords (if not flslvl)
582 * - comments
583 *
584 * Handle strings, numbers and trigraphs with care.
585 * Only data from pp files are scanned here, never any rescans.
586 * This loop is always at trulvl.
587 */
588 static void
fastscan(void)589 fastscan(void)
590 {
591 struct symtab *nl;
592 int ch, c2, i, nch;
593 usch *cp, *cp2;
594
595 goto run;
596
597 for (;;) {
598 /* tight loop to find special chars */
599 /* should use getchar/putchar here */
600 for (;;) {
601 if (ifiles->curptr < ifiles->maxread) {
602 ch = *ifiles->curptr++;
603 } else {
604 if (inpbuf() > 0)
605 continue;
606 return;
607 }
608 xloop: if (ch < 0)
609 return; /* EOF */
610 if ((spechr[ch] & C_SPEC) != 0)
611 break;
612 putch(ch);
613 }
614
615 REFILL(2);
616 nch = *ifiles->curptr;
617 switch (ch) {
618 case WARN:
619 case CONC:
620 error("bad char passed");
621 break;
622
623 case '/': /* Comments */
624 if (nch != '/' && nch != '*') {
625 putch(ch);
626 continue;
627 }
628 if (Cflag == 0) {
629 if (fastcmnt(1))
630 putch(' '); /* 5.1.1.2 p3 */
631 } else
632 Ccmnt(putch);
633 break;
634
635 case '\n': /* newlines, for pp directives */
636 /* take care of leftover \n */
637 i = ifiles->escln + 1;
638 ifiles->lineno += i;
639 ifiles->escln = 0;
640 while (i-- > 0)
641 putch('\n');
642
643 /* search for a # */
644 run: while ((ch = inch()) == '\t' || ch == ' ')
645 putch(ch);
646 if (ch == '%') {
647 if ((c2 = inch()) != ':')
648 unch(c2);
649 else
650 ch = '#';
651 }
652 if (ch == '#')
653 ppdir();
654 else
655 goto xloop;
656 break;
657
658 case '?':
659 if (nch == '?' && (ch = chktg()))
660 goto xloop;
661 putch('?');
662 break;
663
664 case '\'': /* character constant */
665 if (tflag) {
666 putch(ch);
667 break; /* character constants ignored */
668 }
669 /* FALLTHROUGH */
670 case '\"': /* strings */
671 faststr(ch, putch);
672 break;
673
674 case '.': /* for pp-number */
675 case '0': case '1': case '2': case '3': case '4':
676 case '5': case '6': case '7': case '8': case '9':
677 ch = fastnum(ch, putch);
678 goto xloop;
679
680 case 'u':
681 if (nch == '8' && ifiles->curptr[1] == '\"') {
682 putch(ch);
683 break;
684 }
685 /* FALLTHROUGH */
686 case 'L':
687 case 'U':
688 if (nch == '\"' || nch == '\'') {
689 putch(ch);
690 break;
691 }
692 /* FALLTHROUGH */
693 default:
694 #ifdef PCC_DEBUG
695 if ((spechr[ch] & C_ID) == 0)
696 error("fastscan");
697 #endif
698 ident:
699 if (flslvl)
700 error("fastscan flslvl");
701 cp = stringbuf;
702 heapid(ch);
703 stringbuf = cp;
704 if ((nl = lookup(cp, FIND))) {
705 if (kfind(nl)) {
706 if (*cp == '-' || *cp == '+')
707 putch(' ');
708 putstr(cp);
709 for (cp2 = cp; *cp2; cp2++)
710 ;
711 if (cp2[-1] == '-' || cp2[-1] == '+')
712 putch(' ');
713 }
714 } else
715 putstr(cp);
716 stringbuf = cp;
717 break;
718
719 case '\\':
720 if (nch == '\n') {
721 ifiles->escln++;
722 ifiles->curptr++;
723 break;
724 }
725 if (chkucn()) {
726 ch = inch();
727 goto ident;
728 }
729 putch('\\');
730 break;
731 }
732 }
733
734 /*eof:*/ warning("unexpected EOF");
735 putch('\n');
736 }
737
738 /*
739 * Store an if/elif line on heap for parsing, evaluate macros and
740 * call yyparse().
741 */
742 static usch *yyinp;
743 int inexpr;
744 static int
exprline(void)745 exprline(void)
746 {
747 struct symtab *nl;
748 int oCflag = Cflag;
749 usch *cp, *bp = stringbuf;
750 int c, d, ifdef;
751
752 Cflag = ifdef = 0;
753
754 while ((c = inch()) != '\n') {
755 if (c == '\'' || c == '\"') {
756 faststr(c, savch);
757 continue;
758 }
759 if (ISDIGIT(c) || c == '.') {
760 c = fastnum(c, savch);
761 if (c == '\n')
762 break;
763 unch(c);
764 continue;
765 }
766 if (c == 'L' || c == 'u' || c == 'U') {
767 unch(d = inch());
768 if (d == '\'') /* discard wide designator */
769 continue;
770 }
771 if (ISID0(c)) {
772 cp = heapid(c);
773 stringbuf = cp;
774 nl = lookup(cp, FIND);
775 if (strcmp((char *)cp, "defined") == 0) {
776 ifdef = 1;
777 } else if (ifdef) {
778 savch(nl ? '1' : '0');
779 ifdef = 0;
780 } else if (nl != NULL) {
781 inexpr = 1;
782 if (kfind(nl)) {
783 while (*stringbuf)
784 stringbuf++;
785 } else
786 savch('0');
787 inexpr = 0;
788 } else
789 savch('0');
790 } else
791 savch(c);
792 }
793 savch(0);
794 unch('\n');
795 yyinp = bp;
796 c = yyparse();
797 stringbuf = bp;
798 Cflag = oCflag;
799 return c;
800 }
801
802 int
yylex(void)803 yylex(void)
804 {
805 int ch, c2, t;
806
807 while ((ch = *yyinp++) == ' ' || ch == '\t')
808 ;
809 t = ISDIGIT(ch) ? NUMBER : ch;
810 if (ch < 128 && (spechr[ch] & C_2))
811 c2 = *yyinp++;
812 else
813 c2 = 0;
814
815 switch (t) {
816 case 0: return WARN;
817 case '=':
818 if (c2 == '=') return EQ;
819 break;
820 case '!':
821 if (c2 == '=') return NE;
822 break;
823 case '|':
824 if (c2 == '|') return OROR;
825 break;
826 case '&':
827 if (c2 == '&') return ANDAND;
828 break;
829 case '<':
830 if (c2 == '<') return LS;
831 if (c2 == '=') return LE;
832 break;
833 case '>':
834 if (c2 == '>') return RS;
835 if (c2 == '=') return GE;
836 break;
837 case '+':
838 case '-':
839 if (ch == c2)
840 error("invalid preprocessor operator %c%c", ch, c2);
841 break;
842
843 case '\'':
844 yynode.op = NUMBER;
845 yynode.nd_val = charcon(&yyinp);
846 return NUMBER;
847
848 case NUMBER:
849 cvtdig(&yyinp);
850 return NUMBER;
851
852 default:
853 if (ISID0(t)) {
854 yyinp--;
855 while (ISID(*yyinp))
856 yyinp++;
857 yynode.nd_val = 0;
858 return NUMBER;
859 }
860 return ch;
861 }
862 yyinp--;
863 return ch;
864 }
865
866 /*
867 * Let the command-line args be faked defines at beginning of file.
868 */
869 static void
prinit(struct initar * it,struct includ * ic)870 prinit(struct initar *it, struct includ *ic)
871 {
872 const char *pre, *post;
873 char *a;
874
875 if (it->next)
876 prinit(it->next, ic);
877 pre = post = NULL; /* XXX gcc */
878 switch (it->type) {
879 case 'D':
880 pre = "#define ";
881 if ((a = strchr(it->str, '=')) != NULL) {
882 *a = ' ';
883 post = "\n";
884 } else
885 post = " 1\n";
886 break;
887 case 'U':
888 pre = "#undef ";
889 post = "\n";
890 break;
891 case 'i':
892 pre = "#include \"";
893 post = "\"\n";
894 break;
895 default:
896 error("prinit");
897 }
898 strlcat((char *)ic->buffer, pre, CPPBUF+1);
899 strlcat((char *)ic->buffer, it->str, CPPBUF+1);
900 if (strlcat((char *)ic->buffer, post, CPPBUF+1) >= CPPBUF+1)
901 error("line exceeds buffer size");
902
903 ic->lineno--;
904 while (*ic->maxread)
905 ic->maxread++;
906 }
907
908 /*
909 * A new file included.
910 * If ifiles == NULL, this is the first file and already opened (stdin).
911 * Return 0 on success, -1 if file to be included is not found.
912 */
913 int
pushfile(const usch * file,const usch * fn,int idx,void * incs)914 pushfile(const usch *file, const usch *fn, int idx, void *incs)
915 {
916 extern struct initar *initar;
917 struct includ ibuf;
918 struct includ *ic;
919 int otrulvl;
920
921 ic = &ibuf;
922 ic->next = ifiles;
923
924 if (file != NULL) {
925 if ((ic->infil = open((const char *)file, O_RDONLY)) < 0)
926 return -1;
927 ic->orgfn = ic->fname = file;
928 if (++inclevel > MAX_INCLEVEL)
929 error("limit for nested includes exceeded");
930 } else {
931 ic->infil = 0;
932 ic->orgfn = ic->fname = (const usch *)"<stdin>";
933 }
934 #ifndef BUF_STACK
935 ic->bbuf = malloc(BBUFSZ);
936 #endif
937 ic->buffer = ic->bbuf+PBMAX;
938 ic->curptr = ic->buffer;
939 ifiles = ic;
940 ic->lineno = 1;
941 ic->escln = 0;
942 ic->maxread = ic->curptr;
943 ic->idx = idx;
944 ic->incs = incs;
945 ic->fn = fn;
946 prtline(1);
947 if (initar) {
948 int oin = ic->infil;
949 ic->infil = -1;
950 *ic->maxread = 0;
951 prinit(initar, ic);
952 initar = NULL;
953 if (dMflag)
954 printf("%s", (char *)ic->buffer);
955 fastscan();
956 prtline(1);
957 ic->infil = oin;
958 }
959
960 otrulvl = trulvl;
961
962 fastscan();
963
964 if (otrulvl != trulvl || flslvl)
965 error("unterminated conditional");
966
967 #ifndef BUF_STACK
968 free(ic->bbuf);
969 #endif
970 ifiles = ic->next;
971 close(ic->infil);
972 inclevel--;
973 return 0;
974 }
975
976 /*
977 * Print current position to output file.
978 */
979 void
prtline(int nl)980 prtline(int nl)
981 {
982 usch *sb = stringbuf;
983
984 if (Mflag) {
985 if (dMflag)
986 return; /* no output */
987 if (ifiles->lineno == 1 &&
988 (MMDflag == 0 || ifiles->idx != SYSINC)) {
989 printf("%s: %s\n", Mfile, ifiles->fname);
990 if (MPflag &&
991 strcmp((const char *)ifiles->fname, (char *)MPfile))
992 printf("%s:\n", ifiles->fname);
993 }
994 } else if (!Pflag) {
995 sheap("\n# %d \"%s\"", ifiles->lineno, ifiles->fname);
996 if (ifiles->idx == SYSINC)
997 sheap(" 3");
998 if (nl) sheap("\n");
999 putstr(sb);
1000 }
1001 stringbuf = sb;
1002 }
1003
1004 void
cunput(int c)1005 cunput(int c)
1006 {
1007 #ifdef PCC_DEBUG
1008 // if (dflag)printf(": '%c'(%d)\n", c > 31 ? c : ' ', c);
1009 #endif
1010 unch(c);
1011 }
1012
1013 static int
dig2num(int c)1014 dig2num(int c)
1015 {
1016 if (c >= 'a')
1017 c = c - 'a' + 10;
1018 else if (c >= 'A')
1019 c = c - 'A' + 10;
1020 else
1021 c = c - '0';
1022 return c;
1023 }
1024
1025 /*
1026 * Convert string numbers to unsigned long long and check overflow.
1027 */
1028 static void
cvtdig(usch ** yyp)1029 cvtdig(usch **yyp)
1030 {
1031 unsigned long long rv = 0;
1032 unsigned long long rv2 = 0;
1033 usch *y = *yyp;
1034 int rad;
1035
1036 y--;
1037 rad = *y != '0' ? 10 : y[1] == 'x' || y[1] == 'X' ? 16 : 8;
1038 if (rad == 16)
1039 y += 2;
1040 while ((spechr[*y] & C_HEX)) {
1041 rv = rv * rad + dig2num(*y);
1042 /* check overflow */
1043 if (rv / rad < rv2)
1044 error("constant is out of range");
1045 rv2 = rv;
1046 y++;
1047 }
1048 yynode.op = NUMBER;
1049 while (*y == 'l' || *y == 'L' || *y == 'u' || *y == 'U') {
1050 if (*y == 'u' || *y == 'U')
1051 yynode.op = UNUMBER;
1052 y++;
1053 }
1054 yynode.nd_uval = rv;
1055 if ((rad == 8 || rad == 16) && yynode.nd_val < 0)
1056 yynode.op = UNUMBER;
1057 if (yynode.op == NUMBER && yynode.nd_val < 0)
1058 /* too large for signed, see 6.4.4.1 */
1059 error("constant is out of range");
1060 *yyp = y;
1061 }
1062
1063 static int
charcon(usch ** yyp)1064 charcon(usch **yyp)
1065 {
1066 int val, c;
1067 usch *p = *yyp;
1068
1069 val = 0;
1070 if (*p++ == '\\') {
1071 switch (*p++) {
1072 case 'a': val = '\a'; break;
1073 case 'b': val = '\b'; break;
1074 case 'f': val = '\f'; break;
1075 case 'n': val = '\n'; break;
1076 case 'r': val = '\r'; break;
1077 case 't': val = '\t'; break;
1078 case 'v': val = '\v'; break;
1079 case '\"': val = '\"'; break;
1080 case '\'': val = '\''; break;
1081 case '\\': val = '\\'; break;
1082 case 'x':
1083 while ((spechr[c = *p] & C_HEX)) {
1084 val = val * 16 + dig2num(c);
1085 p++;
1086 }
1087 break;
1088 case '0': case '1': case '2': case '3': case '4':
1089 case '5': case '6': case '7':
1090 p--;
1091 while ((spechr[c = *p] & C_DIGIT)) {
1092 val = val * 8 + (c - '0');
1093 p++;
1094 }
1095 break;
1096 default: val = p[-1];
1097 }
1098
1099 } else
1100 val = p[-1];
1101 if (*p != '\'')
1102 error("bad charcon");
1103 *yyp = ++p;
1104 return val;
1105 }
1106
1107 static void
chknl(int ignore)1108 chknl(int ignore)
1109 {
1110 void (*f)(const char *, ...);
1111 int t;
1112
1113 f = ignore ? warning : error;
1114 if ((t = fastspc()) != '\n') {
1115 if (t) {
1116 f("newline expected");
1117 /* ignore rest of line */
1118 while ((t = inch()) >= 0 && t != '\n')
1119 ;
1120 } else
1121 f("no newline at end of file");
1122 }
1123 unch(t);
1124 }
1125
1126 static void
elsestmt(void)1127 elsestmt(void)
1128 {
1129 if (flslvl) {
1130 if (elflvl > trulvl)
1131 ;
1132 else if (--flslvl!=0)
1133 flslvl++;
1134 else
1135 trulvl++;
1136 } else if (trulvl) {
1137 flslvl++;
1138 trulvl--;
1139 } else
1140 error("#else in non-conditional section");
1141 if (elslvl==trulvl+flslvl)
1142 error("too many #else");
1143 elslvl=trulvl+flslvl;
1144 chknl(1);
1145 }
1146
1147 static void
ifdefstmt(void)1148 ifdefstmt(void)
1149 {
1150 usch *bp;
1151 int ch;
1152
1153 if (!ISID0(ch = fastspc()))
1154 error("bad #ifdef");
1155 bp = heapid(ch);
1156 stringbuf = bp;
1157
1158 if (lookup(bp, FIND) == NULL)
1159 flslvl++;
1160 else
1161 trulvl++;
1162 chknl(0);
1163 }
1164
1165 static void
ifndefstmt(void)1166 ifndefstmt(void)
1167 {
1168 usch *bp;
1169 int ch;
1170
1171 if (!ISID0(ch = fastspc()))
1172 error("bad #ifndef");
1173 bp = heapid(ch);
1174 stringbuf = bp;
1175 if (lookup(bp, FIND) != NULL)
1176 flslvl++;
1177 else
1178 trulvl++;
1179 chknl(0);
1180 }
1181
1182 static void
endifstmt(void)1183 endifstmt(void)
1184 {
1185 if (flslvl)
1186 flslvl--;
1187 else if (trulvl)
1188 trulvl--;
1189 else
1190 error("#endif in non-conditional section");
1191 if (flslvl == 0)
1192 elflvl = 0;
1193 elslvl = 0;
1194 chknl(1);
1195 }
1196
1197 static void
ifstmt(void)1198 ifstmt(void)
1199 {
1200 exprline() ? trulvl++ : flslvl++;
1201 }
1202
1203 static void
elifstmt(void)1204 elifstmt(void)
1205 {
1206 if (flslvl == 0)
1207 elflvl = trulvl;
1208 if (flslvl) {
1209 if (elflvl > trulvl)
1210 ;
1211 else if (--flslvl!=0)
1212 flslvl++;
1213 else if (exprline())
1214 trulvl++;
1215 else
1216 flslvl++;
1217 } else if (trulvl) {
1218 flslvl++;
1219 trulvl--;
1220 } else
1221 error("#elif in non-conditional section");
1222 }
1223
1224 /* save line into stringbuf */
1225 static usch *
savln(void)1226 savln(void)
1227 {
1228 int c;
1229 usch *cp = stringbuf;
1230
1231 while ((c = inch()) != -1) {
1232 if (c == '\n') {
1233 unch(c);
1234 break;
1235 }
1236 savch(c);
1237 }
1238 savch(0);
1239
1240 return cp;
1241 }
1242
1243 static void
cpperror(void)1244 cpperror(void)
1245 {
1246 usch *cp;
1247
1248 cp = savln();
1249 error("#error %s", cp);
1250 stringbuf = cp;
1251 }
1252
1253 static void
cppwarning(void)1254 cppwarning(void)
1255 {
1256 usch *cp;
1257
1258 cp = savln();
1259 warning("#warning %s", cp);
1260 stringbuf = cp;
1261 }
1262
1263 static void
undefstmt(void)1264 undefstmt(void)
1265 {
1266 struct symtab *np;
1267 usch *bp;
1268 int ch;
1269
1270 if (!ISID0(ch = fastspc()))
1271 error("bad #undef");
1272 bp = heapid(ch);
1273 if ((np = lookup(bp, FIND)) != NULL)
1274 np->value = 0;
1275 stringbuf = bp;
1276 chknl(0);
1277 }
1278
1279 static void
identstmt(void)1280 identstmt(void)
1281 {
1282 struct symtab *sp;
1283 usch *bp;
1284 int ch;
1285
1286 bp = stringbuf;
1287 if (ISID0(ch = fastspc())) {
1288 bp = heapid(ch);
1289 stringbuf = bp;
1290 if ((sp = lookup(bp, FIND)))
1291 kfind(sp);
1292 if (bp[0] != '\"')
1293 goto bad;
1294 } else if (ch == '\"') {
1295 faststr(ch, savch);
1296 } else
1297 goto bad;
1298 stringbuf = bp;
1299 chknl(1);
1300 return;
1301 bad:
1302 error("bad #ident directive");
1303 }
1304
1305 static void
pragmastmt(void)1306 pragmastmt(void)
1307 {
1308 int ch;
1309
1310 putstr((const usch *)"\n#pragma");
1311 while ((ch = inch()) != '\n' && ch > 0)
1312 putch(ch);
1313 unch(ch);
1314 prtline(1);
1315 }
1316
1317 int
cinput(void)1318 cinput(void)
1319 {
1320
1321 return inch();
1322 }
1323
1324 #define DIR_FLSLVL 001
1325 #define DIR_FLSINC 002
1326 static struct {
1327 const char *name;
1328 void (*fun)(void);
1329 int flags;
1330 } ppd[] = {
1331 { "ifndef", ifndefstmt, DIR_FLSINC },
1332 { "ifdef", ifdefstmt, DIR_FLSINC },
1333 { "if", ifstmt, DIR_FLSINC },
1334 { "include", include, 0 },
1335 { "else", elsestmt, DIR_FLSLVL },
1336 { "endif", endifstmt, DIR_FLSLVL },
1337 { "error", cpperror, 0 },
1338 { "warning", cppwarning, 0 },
1339 { "define", define, 0 },
1340 { "undef", undefstmt, 0 },
1341 { "line", line, 0 },
1342 { "pragma", pragmastmt, 0 },
1343 { "elif", elifstmt, DIR_FLSLVL },
1344 { "ident", identstmt, 0 },
1345 #ifdef GCC_COMPAT
1346 { "include_next", include_next, 0 },
1347 #endif
1348 };
1349 #define NPPD (int)(sizeof(ppd) / sizeof(ppd[0]))
1350
1351 static void
skpln(void)1352 skpln(void)
1353 {
1354 int ch;
1355
1356 /* just ignore the rest of the line */
1357 while ((ch = inch()) != -1) {
1358 if (ch == '\n') {
1359 unch('\n');
1360 break;
1361 }
1362 }
1363 }
1364
1365 /*
1366 * do an even faster scan than fastscan while at flslvl.
1367 * just search for a new directive.
1368 */
1369 static void
flscan(void)1370 flscan(void)
1371 {
1372 int ch;
1373
1374 for (;;) {
1375 switch (ch = inch()) {
1376 case -1:
1377 return;
1378 case '\n':
1379 ifiles->lineno++;
1380 putch('\n');
1381 if ((ch = fastspcg()) == '#')
1382 return;
1383 unch(ch);
1384 break;
1385 case '/':
1386 fastcmnt(0); /* may be around directives */
1387 break;
1388 }
1389 }
1390 }
1391
1392
1393 /*
1394 * Handle a preprocessor directive.
1395 * # is already found.
1396 */
1397 void
ppdir(void)1398 ppdir(void)
1399 {
1400 int ch, i, oldC;
1401 usch *bp;
1402
1403 oldC = Cflag;
1404 redo: Cflag = 0;
1405 if ((ch = fastspc()) == '\n') { /* empty directive */
1406 unch(ch);
1407 Cflag = oldC;
1408 return;
1409 }
1410 Cflag = oldC;
1411 if ((spechr[ch] & C_ID0) == 0)
1412 goto out;
1413 bp = heapid(ch);
1414 stringbuf = bp;
1415
1416 /* got some keyword */
1417 for (i = 0; i < NPPD; i++) {
1418 if (bp[0] == ppd[i].name[0] &&
1419 strcmp((char *)bp, ppd[i].name) == 0) {
1420 if (flslvl == 0) {
1421 (*ppd[i].fun)();
1422 if (flslvl == 0)
1423 return;
1424 } else {
1425 if (ppd[i].flags & DIR_FLSLVL) {
1426 (*ppd[i].fun)();
1427 if (flslvl == 0)
1428 return;
1429 }else if (ppd[i].flags & DIR_FLSINC)
1430 flslvl++;
1431 }
1432 flscan();
1433 goto redo;
1434 }
1435 }
1436
1437 out:
1438 if (flslvl == 0 && Aflag == 0)
1439 error("invalid preprocessor directive");
1440
1441 unch(ch);
1442 skpln();
1443 }
1444