xref: /openbsd-src/usr.bin/unifdef/unifdef.c (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1 /*
2  * Copyright (c) 2002 - 2014 Tony Finch <dot@dotat.at>
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 /*
27  * unifdef - remove ifdef'ed lines
28  *
29  * This code was derived from software contributed to Berkeley by Dave Yost.
30  * It was rewritten to support ANSI C by Tony Finch. The original version
31  * of unifdef carried the 4-clause BSD copyright licence. None of its code
32  * remains in this version (though some of the names remain) so it now
33  * carries a more liberal licence.
34  *
35  *  Wishlist:
36  *      provide an option which will append the name of the
37  *        appropriate symbol after #else's and #endif's
38  *      provide an option which will check symbols after
39  *        #else's and #endif's to see that they match their
40  *        corresponding #ifdef or #ifndef
41  *
42  *   These require better buffer handling, which would also make
43  *   it possible to handle all "dodgy" directives correctly.
44  */
45 
46 #include "unifdef.h"
47 
48 static const char copyright[] =
49     #include "version.h"
50     "@(#) $Author: jmc $\n"
51     "@(#) $URL: http://dotat.at/prog/unifdef $\n"
52 ;
53 
54 /* types of input lines: */
55 typedef enum {
56 	LT_TRUEI,		/* a true #if with ignore flag */
57 	LT_FALSEI,		/* a false #if with ignore flag */
58 	LT_IF,			/* an unknown #if */
59 	LT_TRUE,		/* a true #if */
60 	LT_FALSE,		/* a false #if */
61 	LT_ELIF,		/* an unknown #elif */
62 	LT_ELTRUE,		/* a true #elif */
63 	LT_ELFALSE,		/* a false #elif */
64 	LT_ELSE,		/* #else */
65 	LT_ENDIF,		/* #endif */
66 	LT_DODGY,		/* flag: directive is not on one line */
67 	LT_DODGY_LAST = LT_DODGY + LT_ENDIF,
68 	LT_PLAIN,		/* ordinary line */
69 	LT_EOF,			/* end of file */
70 	LT_ERROR,		/* unevaluable #if */
71 	LT_COUNT
72 } Linetype;
73 
74 static char const * const linetype_name[] = {
75 	"TRUEI", "FALSEI", "IF", "TRUE", "FALSE",
76 	"ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF",
77 	"DODGY TRUEI", "DODGY FALSEI",
78 	"DODGY IF", "DODGY TRUE", "DODGY FALSE",
79 	"DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE",
80 	"DODGY ELSE", "DODGY ENDIF",
81 	"PLAIN", "EOF", "ERROR"
82 };
83 
84 #define linetype_if2elif(lt) ((Linetype)(lt - LT_IF + LT_ELIF))
85 #define linetype_2dodgy(lt) ((Linetype)(lt + LT_DODGY))
86 
87 /* state of #if processing */
88 typedef enum {
89 	IS_OUTSIDE,
90 	IS_FALSE_PREFIX,	/* false #if followed by false #elifs */
91 	IS_TRUE_PREFIX,		/* first non-false #(el)if is true */
92 	IS_PASS_MIDDLE,		/* first non-false #(el)if is unknown */
93 	IS_FALSE_MIDDLE,	/* a false #elif after a pass state */
94 	IS_TRUE_MIDDLE,		/* a true #elif after a pass state */
95 	IS_PASS_ELSE,		/* an else after a pass state */
96 	IS_FALSE_ELSE,		/* an else after a true state */
97 	IS_TRUE_ELSE,		/* an else after only false states */
98 	IS_FALSE_TRAILER,	/* #elifs after a true are false */
99 	IS_COUNT
100 } Ifstate;
101 
102 static char const * const ifstate_name[] = {
103 	"OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX",
104 	"PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE",
105 	"PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE",
106 	"FALSE_TRAILER"
107 };
108 
109 /* state of comment parser */
110 typedef enum {
111 	NO_COMMENT = false,	/* outside a comment */
112 	C_COMMENT,		/* in a comment like this one */
113 	CXX_COMMENT,		/* between // and end of line */
114 	STARTING_COMMENT,	/* just after slash-backslash-newline */
115 	FINISHING_COMMENT,	/* star-backslash-newline in a C comment */
116 	CHAR_LITERAL,		/* inside '' */
117 	STRING_LITERAL		/* inside "" */
118 } Comment_state;
119 
120 static char const * const comment_name[] = {
121 	"NO", "C", "CXX", "STARTING", "FINISHING", "CHAR", "STRING"
122 };
123 
124 /* state of preprocessor line parser */
125 typedef enum {
126 	LS_START,		/* only space and comments on this line */
127 	LS_HASH,		/* only space, comments, and a hash */
128 	LS_DIRTY		/* this line can't be a preprocessor line */
129 } Line_state;
130 
131 static char const * const linestate_name[] = {
132 	"START", "HASH", "DIRTY"
133 };
134 
135 /*
136  * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1
137  */
138 #define	MAXDEPTH        64			/* maximum #if nesting */
139 #define	MAXLINE         4096			/* maximum length of line */
140 #define	MAXSYMS         16384			/* maximum number of symbols */
141 
142 /*
143  * Sometimes when editing a keyword the replacement text is longer, so
144  * we leave some space at the end of the tline buffer to accommodate this.
145  */
146 #define	EDITSLOP        10
147 
148 /*
149  * Globals.
150  */
151 
152 static bool             compblank;		/* -B: compress blank lines */
153 static bool             lnblank;		/* -b: blank deleted lines */
154 static bool             complement;		/* -c: do the complement */
155 static bool             debugging;		/* -d: debugging reports */
156 static bool             inplace;		/* -m: modify in place */
157 static bool             iocccok;		/* -e: fewer IOCCC errors */
158 static bool             strictlogic;		/* -K: keep ambiguous #ifs */
159 static bool             killconsts;		/* -k: eval constant #ifs */
160 static bool             lnnum;			/* -n: add #line directives */
161 static bool             symlist;		/* -s: output symbol list */
162 static bool             symdepth;		/* -S: output symbol depth */
163 static bool             text;			/* -t: this is a text file */
164 
165 static const char      *symname[MAXSYMS];	/* symbol name */
166 static const char      *value[MAXSYMS];		/* -Dsym=value */
167 static bool             ignore[MAXSYMS];	/* -iDsym or -iUsym */
168 static int              nsyms;			/* number of symbols */
169 
170 static FILE            *input;			/* input file pointer */
171 static const char      *filename;		/* input file name */
172 static int              linenum;		/* current line number */
173 static const char      *linefile;		/* file name for #line */
174 static FILE            *output;			/* output file pointer */
175 static const char      *ofilename;		/* output file name */
176 static const char      *backext;		/* backup extension */
177 static char            *tempname;		/* avoid splatting input */
178 
179 static char             tline[MAXLINE+EDITSLOP];/* input buffer plus space */
180 static char            *keyword;		/* used for editing #elif's */
181 
182 /*
183  * When processing a file, the output's newline style will match the
184  * input's, and unifdef correctly handles CRLF or LF endings whatever
185  * the platform's native style. The stdio streams are opened in binary
186  * mode to accommodate platforms whose native newline style is CRLF.
187  * When the output isn't a processed input file (when it is error /
188  * debug / diagnostic messages) then unifdef uses native line endings.
189  */
190 
191 static const char      *newline;		/* input file format */
192 static const char       newline_unix[] = "\n";
193 static const char       newline_crlf[] = "\r\n";
194 
195 static Comment_state    incomment;		/* comment parser state */
196 static Line_state       linestate;		/* #if line parser state */
197 static Ifstate          ifstate[MAXDEPTH];	/* #if processor state */
198 static bool             ignoring[MAXDEPTH];	/* ignore comments state */
199 static int              stifline[MAXDEPTH];	/* start of current #if */
200 static int              depth;			/* current #if nesting */
201 static int              delcount;		/* count of deleted lines */
202 static unsigned         blankcount;		/* count of blank lines */
203 static unsigned         blankmax;		/* maximum recent blankcount */
204 static bool             constexpr;		/* constant #if expression */
205 static bool             zerosyms;		/* to format symdepth output */
206 static bool             firstsym;		/* ditto */
207 
208 static int              exitmode;		/* exit status mode */
209 static int              exitstat;		/* program exit status */
210 
211 static void             addsym1(bool, bool, char *);
212 static void             addsym2(bool, const char *, const char *);
213 static char            *astrcat(const char *, const char *);
214 static void             cleantemp(void);
215 static void             closeio(void);
216 static void             debug(const char *, ...);
217 static void             debugsym(const char *, int);
218 static bool             defundef(void);
219 static void             defundefile(const char *);
220 static void             done(void);
221 static void             error(const char *);
222 static int              findsym(const char **);
223 static void             flushline(bool);
224 static void             hashline(void);
225 static void             help(void);
226 static Linetype         ifeval(const char **);
227 static void             ignoreoff(void);
228 static void             ignoreon(void);
229 static void             indirectsym(void);
230 static void             keywordedit(const char *);
231 static const char      *matchsym(const char *, const char *);
232 static void             nest(void);
233 static Linetype         parseline(void);
234 static void             process(void);
235 static void             processinout(const char *, const char *);
236 static const char      *skipargs(const char *);
237 static const char      *skipcomment(const char *);
238 static const char      *skiphash(void);
239 static const char      *skipline(const char *);
240 static const char      *skipsym(const char *);
241 static void             state(Ifstate);
242 static void             unnest(void);
243 static void             usage(void);
244 static void             version(void);
245 static const char      *xstrdup(const char *, const char *);
246 
247 #define endsym(c) (!isalnum((unsigned char)c) && c != '_')
248 
249 /*
250  * The main program.
251  */
252 int
253 main(int argc, char *argv[])
254 {
255 	int opt;
256 
257 	while ((opt = getopt(argc, argv, "i:D:U:f:I:M:o:x:bBcdehKklmnsStV")) != -1)
258 		switch (opt) {
259 		case 'i': /* treat stuff controlled by these symbols as text */
260 			/*
261 			 * For strict backwards-compatibility the U or D
262 			 * should be immediately after the -i but it doesn't
263 			 * matter much if we relax that requirement.
264 			 */
265 			opt = *optarg++;
266 			if (opt == 'D')
267 				addsym1(true, true, optarg);
268 			else if (opt == 'U')
269 				addsym1(true, false, optarg);
270 			else
271 				usage();
272 			break;
273 		case 'D': /* define a symbol */
274 			addsym1(false, true, optarg);
275 			break;
276 		case 'U': /* undef a symbol */
277 			addsym1(false, false, optarg);
278 			break;
279 		case 'I': /* no-op for compatibility with cpp */
280 			break;
281 		case 'b': /* blank deleted lines instead of omitting them */
282 		case 'l': /* backwards compatibility */
283 			lnblank = true;
284 			break;
285 		case 'B': /* compress blank lines around removed section */
286 			compblank = true;
287 			break;
288 		case 'c': /* treat -D as -U and vice versa */
289 			complement = true;
290 			break;
291 		case 'd':
292 			debugging = true;
293 			break;
294 		case 'e': /* fewer errors from dodgy lines */
295 			iocccok = true;
296 			break;
297 		case 'f': /* definitions file */
298 			defundefile(optarg);
299 			break;
300 		case 'h':
301 			help();
302 			break;
303 		case 'K': /* keep ambiguous #ifs */
304 			strictlogic = true;
305 			break;
306 		case 'k': /* process constant #ifs */
307 			killconsts = true;
308 			break;
309 		case 'm': /* modify in place */
310 			inplace = true;
311 			break;
312 		case 'M': /* modify in place and keep backup */
313 			inplace = true;
314 			backext = optarg;
315 			break;
316 		case 'n': /* add #line directive after deleted lines */
317 			lnnum = true;
318 			break;
319 		case 'o': /* output to a file */
320 			ofilename = optarg;
321 			break;
322 		case 's': /* only output list of symbols that control #ifs */
323 			symlist = true;
324 			break;
325 		case 'S': /* list symbols with their nesting depth */
326 			symlist = symdepth = true;
327 			break;
328 		case 't': /* don't parse C comments */
329 			text = true;
330 			break;
331 		case 'V':
332 			version();
333 			break;
334 		case 'x':
335 			exitmode = atoi(optarg);
336 			if(exitmode < 0 || exitmode > 2)
337 				usage();
338 			break;
339 		default:
340 			usage();
341 		}
342 	argc -= optind;
343 	argv += optind;
344 	if (compblank && lnblank)
345 		errx(2, "-B and -b are mutually exclusive");
346 	if (symlist && (ofilename != NULL || inplace || argc > 1))
347 		errx(2, "-s only works with one input file");
348 	if (argc > 1 && ofilename != NULL)
349 		errx(2, "-o cannot be used with multiple input files");
350 	if (argc > 1 && !inplace)
351 		errx(2, "multiple input files require -m or -M");
352 	if (argc == 0)
353 		argc = 1;
354 	if (argc == 1 && !inplace && ofilename == NULL)
355 		ofilename = "-";
356 	indirectsym();
357 
358 	atexit(cleantemp);
359 	if (ofilename != NULL)
360 		processinout(*argv, ofilename);
361 	else while (argc-- > 0) {
362 		processinout(*argv, *argv);
363 		argv++;
364 	}
365 	switch(exitmode) {
366 	case(0): exit(exitstat);
367 	case(1): exit(!exitstat);
368 	case(2): exit(0);
369 	default: abort(); /* bug */
370 	}
371 }
372 
373 /*
374  * File logistics.
375  */
376 static void
377 processinout(const char *ifn, const char *ofn)
378 {
379 	struct stat st;
380 
381 	if (ifn == NULL || strcmp(ifn, "-") == 0) {
382 		filename = "[stdin]";
383 		linefile = NULL;
384 		input = fbinmode(stdin);
385 	} else {
386 		filename = ifn;
387 		linefile = ifn;
388 		input = fopen(ifn, "rb");
389 		if (input == NULL)
390 			err(2, "can't open %s", ifn);
391 	}
392 	if (strcmp(ofn, "-") == 0) {
393 		output = fbinmode(stdout);
394 		process();
395 		return;
396 	}
397 	if (stat(ofn, &st) < 0) {
398 		output = fopen(ofn, "wb");
399 		if (output == NULL)
400 			err(2, "can't create %s", ofn);
401 		process();
402 		return;
403 	}
404 
405 	tempname = astrcat(ofn, ".XXXXXX");
406 	output = mktempmode(tempname, st.st_mode);
407 	if (output == NULL)
408 		err(2, "can't create %s", tempname);
409 
410 	process();
411 
412 	if (backext != NULL) {
413 		char *backname = astrcat(ofn, backext);
414 		if (rename(ofn, backname) < 0)
415 			err(2, "can't rename \"%s\" to \"%s\"", ofn, backname);
416 		free(backname);
417 	}
418 	if (replace(tempname, ofn) < 0)
419 		err(2, "can't rename \"%s\" to \"%s\"", tempname, ofn);
420 	free(tempname);
421 	tempname = NULL;
422 }
423 
424 /*
425  * For cleaning up if there is an error.
426  */
427 static void
428 cleantemp(void)
429 {
430 	if (tempname != NULL)
431 		remove(tempname);
432 }
433 
434 /*
435  * Self-identification functions.
436  */
437 
438 static void
439 version(void)
440 {
441 	const char *c = copyright;
442 	for (;;) {
443 		while (*++c != '$')
444 			if (*c == '\0')
445 				exit(0);
446 		while (*++c != '$')
447 			putc(*c, stderr);
448 		putc('\n', stderr);
449 	}
450 }
451 
452 static void
453 synopsis(FILE *fp)
454 {
455 	fprintf(fp,
456 	    "usage:	unifdef [-BbcdehKkmnSstV] [-[i]Dsym[=val]] [-[i]Usym] [-f defile]\n"
457 	    "		[-M backext] [-o outfile] [-x 0 | 1 | 2] file ...\n");
458 }
459 
460 static void
461 usage(void)
462 {
463 	synopsis(stderr);
464 	exit(2);
465 }
466 
467 static void
468 help(void)
469 {
470 	synopsis(stdout);
471 	printf(
472 	    "	-Dsym=val  define preprocessor symbol with given value\n"
473 	    "	-Dsym      define preprocessor symbol with value 1\n"
474 	    "	-Usym	   preprocessor symbol is undefined\n"
475 	    "	-iDsym=val \\  ignore C strings and comments\n"
476 	    "	-iDsym      ) in sections controlled by these\n"
477 	    "	-iUsym	   /  preprocessor symbols\n"
478 	    "	-fpath	file containing #define and #undef directives\n"
479 	    "	-b	blank lines instead of deleting them\n"
480 	    "	-B	compress blank lines around deleted section\n"
481 	    "	-c	complement (invert) keep vs. delete\n"
482 	    "	-d	debugging mode\n"
483 	    "	-e	ignore multiline preprocessor directives\n"
484 	    "	-h	print help\n"
485 	    "	-Ipath	extra include file path (ignored)\n"
486 	    "	-K	disable && and || short-circuiting\n"
487 	    "	-k	process constant #if expressions\n"
488 	    "	-Mext	modify in place and keep backups\n"
489 	    "	-m	modify input files in place\n"
490 	    "	-n	add #line directives to output\n"
491 	    "	-opath	output file name\n"
492 	    "	-S	list #if control symbols with nesting\n"
493 	    "	-s	list #if control symbols\n"
494 	    "	-t	ignore C strings and comments\n"
495 	    "	-V	print version\n"
496 	    "	-x{012}	exit status mode\n"
497 	);
498 	exit(0);
499 }
500 
501 /*
502  * A state transition function alters the global #if processing state
503  * in a particular way. The table below is indexed by the current
504  * processing state and the type of the current line.
505  *
506  * Nesting is handled by keeping a stack of states; some transition
507  * functions increase or decrease the depth. They also maintain the
508  * ignore state on a stack. In some complicated cases they have to
509  * alter the preprocessor directive, as follows.
510  *
511  * When we have processed a group that starts off with a known-false
512  * #if/#elif sequence (which has therefore been deleted) followed by a
513  * #elif that we don't understand and therefore must keep, we edit the
514  * latter into a #if to keep the nesting correct. We use memcpy() to
515  * overwrite the 4 byte token "elif" with "if  " without a '\0' byte.
516  *
517  * When we find a true #elif in a group, the following block will
518  * always be kept and the rest of the sequence after the next #elif or
519  * #else will be discarded. We edit the #elif into a #else and the
520  * following directive to #endif since this has the desired behaviour.
521  *
522  * "Dodgy" directives are split across multiple lines, the most common
523  * example being a multi-line comment hanging off the right of the
524  * directive. We can handle them correctly only if there is no change
525  * from printing to dropping (or vice versa) caused by that directive.
526  * If the directive is the first of a group we have a choice between
527  * failing with an error, or passing it through unchanged instead of
528  * evaluating it. The latter is not the default to avoid questions from
529  * users about unifdef unexpectedly leaving behind preprocessor directives.
530  */
531 typedef void state_fn(void);
532 
533 /* report an error */
534 static void Eelif (void) { error("Inappropriate #elif"); }
535 static void Eelse (void) { error("Inappropriate #else"); }
536 static void Eendif(void) { error("Inappropriate #endif"); }
537 static void Eeof  (void) { error("Premature EOF"); }
538 static void Eioccc(void) { error("Obfuscated preprocessor control line"); }
539 /* plain line handling */
540 static void print (void) { flushline(true); }
541 static void drop  (void) { flushline(false); }
542 /* output lacks group's start line */
543 static void Strue (void) { drop();  ignoreoff(); state(IS_TRUE_PREFIX); }
544 static void Sfalse(void) { drop();  ignoreoff(); state(IS_FALSE_PREFIX); }
545 static void Selse (void) { drop();               state(IS_TRUE_ELSE); }
546 /* print/pass this block */
547 static void Pelif (void) { print(); ignoreoff(); state(IS_PASS_MIDDLE); }
548 static void Pelse (void) { print();              state(IS_PASS_ELSE); }
549 static void Pendif(void) { print(); unnest(); }
550 /* discard this block */
551 static void Dfalse(void) { drop();  ignoreoff(); state(IS_FALSE_TRAILER); }
552 static void Delif (void) { drop();  ignoreoff(); state(IS_FALSE_MIDDLE); }
553 static void Delse (void) { drop();               state(IS_FALSE_ELSE); }
554 static void Dendif(void) { drop();  unnest(); }
555 /* first line of group */
556 static void Fdrop (void) { nest();  Dfalse(); }
557 static void Fpass (void) { nest();  Pelif(); }
558 static void Ftrue (void) { nest();  Strue(); }
559 static void Ffalse(void) { nest();  Sfalse(); }
560 /* variable pedantry for obfuscated lines */
561 static void Oiffy (void) { if (!iocccok) Eioccc(); Fpass(); ignoreon(); }
562 static void Oif   (void) { if (!iocccok) Eioccc(); Fpass(); }
563 static void Oelif (void) { if (!iocccok) Eioccc(); Pelif(); }
564 /* ignore comments in this block */
565 static void Idrop (void) { Fdrop();  ignoreon(); }
566 static void Itrue (void) { Ftrue();  ignoreon(); }
567 static void Ifalse(void) { Ffalse(); ignoreon(); }
568 /* modify this line */
569 static void Mpass (void) { memcpy(keyword, "if  ", 4); Pelif(); }
570 static void Mtrue (void) { keywordedit("else");  state(IS_TRUE_MIDDLE); }
571 static void Melif (void) { keywordedit("endif"); state(IS_FALSE_TRAILER); }
572 static void Melse (void) { keywordedit("endif"); state(IS_FALSE_ELSE); }
573 
574 static state_fn * const trans_table[IS_COUNT][LT_COUNT] = {
575 /* IS_OUTSIDE */
576 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif,
577   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Eendif,
578   print, done,  abort },
579 /* IS_FALSE_PREFIX */
580 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif,
581   Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc,
582   drop,  Eeof,  abort },
583 /* IS_TRUE_PREFIX */
584 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif,
585   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
586   print, Eeof,  abort },
587 /* IS_PASS_MIDDLE */
588 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif,
589   Oiffy, Oiffy, Fpass, Oif,   Oif,   Pelif, Oelif, Oelif, Pelse, Pendif,
590   print, Eeof,  abort },
591 /* IS_FALSE_MIDDLE */
592 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif,
593   Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
594   drop,  Eeof,  abort },
595 /* IS_TRUE_MIDDLE */
596 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif,
597   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eioccc,Eioccc,Eioccc,Eioccc,Pendif,
598   print, Eeof,  abort },
599 /* IS_PASS_ELSE */
600 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif,
601   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Pendif,
602   print, Eeof,  abort },
603 /* IS_FALSE_ELSE */
604 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif,
605   Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc,
606   drop,  Eeof,  abort },
607 /* IS_TRUE_ELSE */
608 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif,
609   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Eioccc,
610   print, Eeof,  abort },
611 /* IS_FALSE_TRAILER */
612 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif,
613   Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc,
614   drop,  Eeof,  abort }
615 /*TRUEI  FALSEI IF     TRUE   FALSE  ELIF   ELTRUE ELFALSE ELSE  ENDIF
616   TRUEI  FALSEI IF     TRUE   FALSE  ELIF   ELTRUE ELFALSE ELSE  ENDIF (DODGY)
617   PLAIN  EOF    ERROR */
618 };
619 
620 /*
621  * State machine utility functions
622  */
623 static void
624 ignoreoff(void)
625 {
626 	if (depth == 0)
627 		abort(); /* bug */
628 	ignoring[depth] = ignoring[depth-1];
629 }
630 static void
631 ignoreon(void)
632 {
633 	ignoring[depth] = true;
634 }
635 static void
636 keywordedit(const char *replacement)
637 {
638 	snprintf(keyword, tline + sizeof(tline) - keyword,
639 	    "%s%s", replacement, newline);
640 	print();
641 }
642 static void
643 nest(void)
644 {
645 	if (depth > MAXDEPTH-1)
646 		abort(); /* bug */
647 	if (depth == MAXDEPTH-1)
648 		error("Too many levels of nesting");
649 	depth += 1;
650 	stifline[depth] = linenum;
651 }
652 static void
653 unnest(void)
654 {
655 	if (depth == 0)
656 		abort(); /* bug */
657 	depth -= 1;
658 }
659 static void
660 state(Ifstate is)
661 {
662 	ifstate[depth] = is;
663 }
664 
665 /*
666  * The last state transition function. When this is called,
667  * lineval == LT_EOF, so the process() loop will terminate.
668  */
669 static void
670 done(void)
671 {
672 	if (incomment)
673 		error("EOF in comment");
674 	closeio();
675 }
676 
677 /*
678  * Write a line to the output or not, according to command line options.
679  * If writing fails, closeio() will print the error and exit.
680  */
681 static void
682 flushline(bool keep)
683 {
684 	if (symlist)
685 		return;
686 	if (keep ^ complement) {
687 		bool blankline = tline[strspn(tline, " \t\r\n")] == '\0';
688 		if (blankline && compblank && blankcount != blankmax) {
689 			delcount += 1;
690 			blankcount += 1;
691 		} else {
692 			if (lnnum && delcount > 0)
693 				hashline();
694 			if (fputs(tline, output) == EOF)
695 				closeio();
696 			delcount = 0;
697 			blankmax = blankcount = blankline ? blankcount + 1 : 0;
698 		}
699 	} else {
700 		if (lnblank && fputs(newline, output) == EOF)
701 			closeio();
702 		exitstat = 1;
703 		delcount += 1;
704 		blankcount = 0;
705 	}
706 	if (debugging && fflush(output) == EOF)
707 		closeio();
708 }
709 
710 /*
711  * Format of #line directives depends on whether we know the input filename.
712  */
713 static void
714 hashline(void)
715 {
716 	int e;
717 
718 	if (linefile == NULL)
719 		e = fprintf(output, "#line %d%s", linenum, newline);
720 	else
721 		e = fprintf(output, "#line %d \"%s\"%s",
722 		    linenum, linefile, newline);
723 	if (e < 0)
724 		closeio();
725 }
726 
727 /*
728  * Flush the output and handle errors.
729  */
730 static void
731 closeio(void)
732 {
733 	/* Tidy up after findsym(). */
734 	if (symdepth && !zerosyms)
735 		printf("\n");
736 	if (output != NULL && (ferror(output) || fclose(output) == EOF))
737 			err(2, "%s: can't write to output", filename);
738 	fclose(input);
739 }
740 
741 /*
742  * The driver for the state machine.
743  */
744 static void
745 process(void)
746 {
747 	Linetype lineval = LT_PLAIN;
748 	/* When compressing blank lines, act as if the file
749 	   is preceded by a large number of blank lines. */
750 	blankmax = blankcount = 1000;
751 	zerosyms = true;
752 	newline = NULL;
753 	linenum = 0;
754 	while (lineval != LT_EOF) {
755 		lineval = parseline();
756 		trans_table[ifstate[depth]][lineval]();
757 		debug("process line %d %s -> %s depth %d",
758 		    linenum, linetype_name[lineval],
759 		    ifstate_name[ifstate[depth]], depth);
760 	}
761 }
762 
763 /*
764  * Parse a line and determine its type. We keep the preprocessor line
765  * parser state between calls in the global variable linestate, with
766  * help from skipcomment().
767  */
768 static Linetype
769 parseline(void)
770 {
771 	const char *cp;
772 	int cursym;
773 	Linetype retval;
774 	Comment_state wascomment;
775 
776 	wascomment = incomment;
777 	cp = skiphash();
778 	if (cp == NULL)
779 		return (LT_EOF);
780 	if (newline == NULL) {
781 		if (strrchr(tline, '\n') == strrchr(tline, '\r') + 1)
782 			newline = newline_crlf;
783 		else
784 			newline = newline_unix;
785 	}
786 	if (*cp == '\0') {
787 		retval = LT_PLAIN;
788 		goto done;
789 	}
790 	keyword = tline + (cp - tline);
791 	if ((cp = matchsym("ifdef", keyword)) != NULL ||
792 	    (cp = matchsym("ifndef", keyword)) != NULL) {
793 		cp = skipcomment(cp);
794 		if ((cursym = findsym(&cp)) < 0)
795 			retval = LT_IF;
796 		else {
797 			retval = (keyword[2] == 'n')
798 			    ? LT_FALSE : LT_TRUE;
799 			if (value[cursym] == NULL)
800 				retval = (retval == LT_TRUE)
801 				    ? LT_FALSE : LT_TRUE;
802 			if (ignore[cursym])
803 				retval = (retval == LT_TRUE)
804 				    ? LT_TRUEI : LT_FALSEI;
805 		}
806 	} else if ((cp = matchsym("if", keyword)) != NULL)
807 		retval = ifeval(&cp);
808 	else if ((cp = matchsym("elif", keyword)) != NULL)
809 		retval = linetype_if2elif(ifeval(&cp));
810 	else if ((cp = matchsym("else", keyword)) != NULL)
811 		retval = LT_ELSE;
812 	else if ((cp = matchsym("endif", keyword)) != NULL)
813 		retval = LT_ENDIF;
814 	else {
815 		cp = skipsym(keyword);
816 		/* no way can we deal with a continuation inside a keyword */
817 		if (strncmp(cp, "\\\r\n", 3) == 0 ||
818 		    strncmp(cp, "\\\n", 2) == 0)
819 			Eioccc();
820 		cp = skipline(cp);
821 		retval = LT_PLAIN;
822 		goto done;
823 	}
824 	cp = skipcomment(cp);
825 	if (*cp != '\0') {
826 		cp = skipline(cp);
827 		if (retval == LT_TRUE || retval == LT_FALSE ||
828 		    retval == LT_TRUEI || retval == LT_FALSEI)
829 			retval = LT_IF;
830 		if (retval == LT_ELTRUE || retval == LT_ELFALSE)
831 			retval = LT_ELIF;
832 	}
833 	/* the following can happen if the last line of the file lacks a
834 	   newline or if there is too much whitespace in a directive */
835 	if (linestate == LS_HASH) {
836 		long len = cp - tline;
837 		if (fgets(tline + len, MAXLINE - len, input) == NULL) {
838 			if (ferror(input))
839 				err(2, "can't read %s", filename);
840 			/* append the missing newline at eof */
841 			strcpy(tline + len, newline);
842 			cp += strlen(newline);
843 			linestate = LS_START;
844 		} else {
845 			linestate = LS_DIRTY;
846 		}
847 	}
848 	if (retval != LT_PLAIN && (wascomment || linestate != LS_START)) {
849 		retval = linetype_2dodgy(retval);
850 		linestate = LS_DIRTY;
851 	}
852 done:
853 	debug("parser line %d state %s comment %s line", linenum,
854 	    comment_name[incomment], linestate_name[linestate]);
855 	return (retval);
856 }
857 
858 /*
859  * These are the binary operators that are supported by the expression
860  * evaluator.
861  */
862 static Linetype op_strict(long *p, long v, Linetype at, Linetype bt) {
863 	if(at == LT_IF || bt == LT_IF) return (LT_IF);
864 	return (*p = v, v ? LT_TRUE : LT_FALSE);
865 }
866 static Linetype op_lt(long *p, Linetype at, long a, Linetype bt, long b) {
867 	return op_strict(p, a < b, at, bt);
868 }
869 static Linetype op_gt(long *p, Linetype at, long a, Linetype bt, long b) {
870 	return op_strict(p, a > b, at, bt);
871 }
872 static Linetype op_le(long *p, Linetype at, long a, Linetype bt, long b) {
873 	return op_strict(p, a <= b, at, bt);
874 }
875 static Linetype op_ge(long *p, Linetype at, long a, Linetype bt, long b) {
876 	return op_strict(p, a >= b, at, bt);
877 }
878 static Linetype op_eq(long *p, Linetype at, long a, Linetype bt, long b) {
879 	return op_strict(p, a == b, at, bt);
880 }
881 static Linetype op_ne(long *p, Linetype at, long a, Linetype bt, long b) {
882 	return op_strict(p, a != b, at, bt);
883 }
884 static Linetype op_or(long *p, Linetype at, long a, Linetype bt, long b) {
885 	if (!strictlogic && (at == LT_TRUE || bt == LT_TRUE))
886 		return (*p = 1, LT_TRUE);
887 	return op_strict(p, a || b, at, bt);
888 }
889 static Linetype op_and(long *p, Linetype at, long a, Linetype bt, long b) {
890 	if (!strictlogic && (at == LT_FALSE || bt == LT_FALSE))
891 		return (*p = 0, LT_FALSE);
892 	return op_strict(p, a && b, at, bt);
893 }
894 
895 /*
896  * An evaluation function takes three arguments, as follows: (1) a pointer to
897  * an element of the precedence table which lists the operators at the current
898  * level of precedence; (2) a pointer to an integer which will receive the
899  * value of the expression; and (3) a pointer to a char* that points to the
900  * expression to be evaluated and that is updated to the end of the expression
901  * when evaluation is complete. The function returns LT_FALSE if the value of
902  * the expression is zero, LT_TRUE if it is non-zero, LT_IF if the expression
903  * depends on an unknown symbol, or LT_ERROR if there is a parse failure.
904  */
905 struct ops;
906 
907 typedef Linetype eval_fn(const struct ops *, long *, const char **);
908 
909 static eval_fn eval_table, eval_unary;
910 
911 /*
912  * The precedence table. Expressions involving binary operators are evaluated
913  * in a table-driven way by eval_table. When it evaluates a subexpression it
914  * calls the inner function with its first argument pointing to the next
915  * element of the table. Innermost expressions have special non-table-driven
916  * handling.
917  */
918 struct op {
919 	const char *str;
920 	Linetype (*fn)(long *, Linetype, long, Linetype, long);
921 };
922 struct ops {
923 	eval_fn *inner;
924 	struct op op[5];
925 };
926 static const struct ops eval_ops[] = {
927 	{ eval_table, { { "||", op_or } } },
928 	{ eval_table, { { "&&", op_and } } },
929 	{ eval_table, { { "==", op_eq },
930 			{ "!=", op_ne } } },
931 	{ eval_unary, { { "<=", op_le },
932 			{ ">=", op_ge },
933 			{ "<", op_lt },
934 			{ ">", op_gt } } }
935 };
936 
937 /* Current operator precedence level */
938 static long prec(const struct ops *ops)
939 {
940 	return (ops - eval_ops);
941 }
942 
943 /*
944  * Function for evaluating the innermost parts of expressions,
945  * viz. !expr (expr) number defined(symbol) symbol
946  * We reset the constexpr flag in the last two cases.
947  */
948 static Linetype
949 eval_unary(const struct ops *ops, long *valp, const char **cpp)
950 {
951 	const char *cp;
952 	char *ep;
953 	int sym;
954 	bool defparen;
955 	Linetype lt;
956 
957 	cp = skipcomment(*cpp);
958 	if (*cp == '!') {
959 		debug("eval%d !", prec(ops));
960 		cp++;
961 		lt = eval_unary(ops, valp, &cp);
962 		if (lt == LT_ERROR)
963 			return (LT_ERROR);
964 		if (lt != LT_IF) {
965 			*valp = !*valp;
966 			lt = *valp ? LT_TRUE : LT_FALSE;
967 		}
968 	} else if (*cp == '(') {
969 		cp++;
970 		debug("eval%d (", prec(ops));
971 		lt = eval_table(eval_ops, valp, &cp);
972 		if (lt == LT_ERROR)
973 			return (LT_ERROR);
974 		cp = skipcomment(cp);
975 		if (*cp++ != ')')
976 			return (LT_ERROR);
977 	} else if (isdigit((unsigned char)*cp)) {
978 		debug("eval%d number", prec(ops));
979 		*valp = strtol(cp, &ep, 0);
980 		if (ep == cp)
981 			return (LT_ERROR);
982 		lt = *valp ? LT_TRUE : LT_FALSE;
983 		cp = ep;
984 	} else if (matchsym("defined", cp) != NULL) {
985 		cp = skipcomment(cp+7);
986 		if (*cp == '(') {
987 			cp = skipcomment(cp+1);
988 			defparen = true;
989 		} else {
990 			defparen = false;
991 		}
992 		sym = findsym(&cp);
993 		cp = skipcomment(cp);
994 		if (defparen && *cp++ != ')') {
995 			debug("eval%d defined missing ')'", prec(ops));
996 			return (LT_ERROR);
997 		}
998 		if (sym < 0) {
999 			debug("eval%d defined unknown", prec(ops));
1000 			lt = LT_IF;
1001 		} else {
1002 			debug("eval%d defined %s", prec(ops), symname[sym]);
1003 			*valp = (value[sym] != NULL);
1004 			lt = *valp ? LT_TRUE : LT_FALSE;
1005 		}
1006 		constexpr = false;
1007 	} else if (!endsym(*cp)) {
1008 		debug("eval%d symbol", prec(ops));
1009 		sym = findsym(&cp);
1010 		if (sym < 0) {
1011 			lt = LT_IF;
1012 			cp = skipargs(cp);
1013 		} else if (value[sym] == NULL) {
1014 			*valp = 0;
1015 			lt = LT_FALSE;
1016 		} else {
1017 			*valp = strtol(value[sym], &ep, 0);
1018 			if (*ep != '\0' || ep == value[sym])
1019 				return (LT_ERROR);
1020 			lt = *valp ? LT_TRUE : LT_FALSE;
1021 			cp = skipargs(cp);
1022 		}
1023 		constexpr = false;
1024 	} else {
1025 		debug("eval%d bad expr", prec(ops));
1026 		return (LT_ERROR);
1027 	}
1028 
1029 	*cpp = cp;
1030 	debug("eval%d = %d", prec(ops), *valp);
1031 	return (lt);
1032 }
1033 
1034 /*
1035  * Table-driven evaluation of binary operators.
1036  */
1037 static Linetype
1038 eval_table(const struct ops *ops, long *valp, const char **cpp)
1039 {
1040 	const struct op *op;
1041 	const char *cp;
1042 	long val;
1043 	Linetype lt, rt;
1044 
1045 	debug("eval%d", prec(ops));
1046 	cp = *cpp;
1047 	lt = ops->inner(ops+1, valp, &cp);
1048 	if (lt == LT_ERROR)
1049 		return (LT_ERROR);
1050 	for (;;) {
1051 		cp = skipcomment(cp);
1052 		for (op = ops->op; op->str != NULL; op++)
1053 			if (strncmp(cp, op->str, strlen(op->str)) == 0)
1054 				break;
1055 		if (op->str == NULL)
1056 			break;
1057 		cp += strlen(op->str);
1058 		debug("eval%d %s", prec(ops), op->str);
1059 		rt = ops->inner(ops+1, &val, &cp);
1060 		if (rt == LT_ERROR)
1061 			return (LT_ERROR);
1062 		lt = op->fn(valp, lt, *valp, rt, val);
1063 	}
1064 
1065 	*cpp = cp;
1066 	debug("eval%d = %d", prec(ops), *valp);
1067 	debug("eval%d lt = %s", prec(ops), linetype_name[lt]);
1068 	return (lt);
1069 }
1070 
1071 /*
1072  * Evaluate the expression on a #if or #elif line. If we can work out
1073  * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we
1074  * return just a generic LT_IF.
1075  */
1076 static Linetype
1077 ifeval(const char **cpp)
1078 {
1079 	Linetype ret;
1080 	long val = 0;
1081 
1082 	debug("eval %s", *cpp);
1083 	constexpr = killconsts ? false : true;
1084 	ret = eval_table(eval_ops, &val, cpp);
1085 	debug("eval = %d", val);
1086 	return (constexpr ? LT_IF : ret == LT_ERROR ? LT_IF : ret);
1087 }
1088 
1089 /*
1090  * Read a line and examine its initial part to determine if it is a
1091  * preprocessor directive. Returns NULL on EOF, or a pointer to a
1092  * preprocessor directive name, or a pointer to the zero byte at the
1093  * end of the line.
1094  */
1095 static const char *
1096 skiphash(void)
1097 {
1098 	const char *cp;
1099 
1100 	linenum++;
1101 	if (fgets(tline, MAXLINE, input) == NULL) {
1102 		if (ferror(input))
1103 			err(2, "can't read %s", filename);
1104 		else
1105 			return (NULL);
1106 	}
1107 	cp = skipcomment(tline);
1108 	if (linestate == LS_START && *cp == '#') {
1109 		linestate = LS_HASH;
1110 		return (skipcomment(cp + 1));
1111 	} else if (*cp == '\0') {
1112 		return (cp);
1113 	} else {
1114 		return (skipline(cp));
1115 	}
1116 }
1117 
1118 /*
1119  * Mark a line dirty and consume the rest of it, keeping track of the
1120  * lexical state.
1121  */
1122 static const char *
1123 skipline(const char *cp)
1124 {
1125 	if (*cp != '\0')
1126 		linestate = LS_DIRTY;
1127 	while (*cp != '\0')
1128 		cp = skipcomment(cp + 1);
1129 	return (cp);
1130 }
1131 
1132 /*
1133  * Skip over comments, strings, and character literals and stop at the
1134  * next character position that is not whitespace. Between calls we keep
1135  * the comment state in the global variable incomment, and we also adjust
1136  * the global variable linestate when we see a newline.
1137  * XXX: doesn't cope with the buffer splitting inside a state transition.
1138  */
1139 static const char *
1140 skipcomment(const char *cp)
1141 {
1142 	if (text || ignoring[depth]) {
1143 		for (; isspace((unsigned char)*cp); cp++)
1144 			if (*cp == '\n')
1145 				linestate = LS_START;
1146 		return (cp);
1147 	}
1148 	while (*cp != '\0')
1149 		/* don't reset to LS_START after a line continuation */
1150 		if (strncmp(cp, "\\\r\n", 3) == 0)
1151 			cp += 3;
1152 		else if (strncmp(cp, "\\\n", 2) == 0)
1153 			cp += 2;
1154 		else switch (incomment) {
1155 		case NO_COMMENT:
1156 			if (strncmp(cp, "/\\\r\n", 4) == 0) {
1157 				incomment = STARTING_COMMENT;
1158 				cp += 4;
1159 			} else if (strncmp(cp, "/\\\n", 3) == 0) {
1160 				incomment = STARTING_COMMENT;
1161 				cp += 3;
1162 			} else if (strncmp(cp, "/*", 2) == 0) {
1163 				incomment = C_COMMENT;
1164 				cp += 2;
1165 			} else if (strncmp(cp, "//", 2) == 0) {
1166 				incomment = CXX_COMMENT;
1167 				cp += 2;
1168 			} else if (strncmp(cp, "\'", 1) == 0) {
1169 				incomment = CHAR_LITERAL;
1170 				linestate = LS_DIRTY;
1171 				cp += 1;
1172 			} else if (strncmp(cp, "\"", 1) == 0) {
1173 				incomment = STRING_LITERAL;
1174 				linestate = LS_DIRTY;
1175 				cp += 1;
1176 			} else if (strncmp(cp, "\n", 1) == 0) {
1177 				linestate = LS_START;
1178 				cp += 1;
1179 			} else if (strchr(" \r\t", *cp) != NULL) {
1180 				cp += 1;
1181 			} else
1182 				return (cp);
1183 			continue;
1184 		case CXX_COMMENT:
1185 			if (strncmp(cp, "\n", 1) == 0) {
1186 				incomment = NO_COMMENT;
1187 				linestate = LS_START;
1188 			}
1189 			cp += 1;
1190 			continue;
1191 		case CHAR_LITERAL:
1192 		case STRING_LITERAL:
1193 			if ((incomment == CHAR_LITERAL && cp[0] == '\'') ||
1194 			    (incomment == STRING_LITERAL && cp[0] == '\"')) {
1195 				incomment = NO_COMMENT;
1196 				cp += 1;
1197 			} else if (cp[0] == '\\') {
1198 				if (cp[1] == '\0')
1199 					cp += 1;
1200 				else
1201 					cp += 2;
1202 			} else if (strncmp(cp, "\n", 1) == 0) {
1203 				if (incomment == CHAR_LITERAL)
1204 					error("unterminated char literal");
1205 				else
1206 					error("unterminated string literal");
1207 			} else
1208 				cp += 1;
1209 			continue;
1210 		case C_COMMENT:
1211 			if (strncmp(cp, "*\\\r\n", 4) == 0) {
1212 				incomment = FINISHING_COMMENT;
1213 				cp += 4;
1214 			} else if (strncmp(cp, "*\\\n", 3) == 0) {
1215 				incomment = FINISHING_COMMENT;
1216 				cp += 3;
1217 			} else if (strncmp(cp, "*/", 2) == 0) {
1218 				incomment = NO_COMMENT;
1219 				cp += 2;
1220 			} else
1221 				cp += 1;
1222 			continue;
1223 		case STARTING_COMMENT:
1224 			if (*cp == '*') {
1225 				incomment = C_COMMENT;
1226 				cp += 1;
1227 			} else if (*cp == '/') {
1228 				incomment = CXX_COMMENT;
1229 				cp += 1;
1230 			} else {
1231 				incomment = NO_COMMENT;
1232 				linestate = LS_DIRTY;
1233 			}
1234 			continue;
1235 		case FINISHING_COMMENT:
1236 			if (*cp == '/') {
1237 				incomment = NO_COMMENT;
1238 				cp += 1;
1239 			} else
1240 				incomment = C_COMMENT;
1241 			continue;
1242 		default:
1243 			abort(); /* bug */
1244 		}
1245 	return (cp);
1246 }
1247 
1248 /*
1249  * Skip macro arguments.
1250  */
1251 static const char *
1252 skipargs(const char *cp)
1253 {
1254 	const char *ocp = cp;
1255 	int level = 0;
1256 	cp = skipcomment(cp);
1257 	if (*cp != '(')
1258 		return (cp);
1259 	do {
1260 		if (*cp == '(')
1261 			level++;
1262 		if (*cp == ')')
1263 			level--;
1264 		cp = skipcomment(cp+1);
1265 	} while (level != 0 && *cp != '\0');
1266 	if (level == 0)
1267 		return (cp);
1268 	else
1269 	/* Rewind and re-detect the syntax error later. */
1270 		return (ocp);
1271 }
1272 
1273 /*
1274  * Skip over an identifier.
1275  */
1276 static const char *
1277 skipsym(const char *cp)
1278 {
1279 	while (!endsym(*cp))
1280 		++cp;
1281 	return (cp);
1282 }
1283 
1284 /*
1285  * Skip whitespace and take a copy of any following identifier.
1286  */
1287 static const char *
1288 getsym(const char **cpp)
1289 {
1290 	const char *cp = *cpp, *sym;
1291 
1292 	cp = skipcomment(cp);
1293 	cp = skipsym(sym = cp);
1294 	if (cp == sym)
1295 		return NULL;
1296 	*cpp = cp;
1297 	return (xstrdup(sym, cp));
1298 }
1299 
1300 /*
1301  * Check that s (a symbol) matches the start of t, and that the
1302  * following character in t is not a symbol character. Returns a
1303  * pointer to the following character in t if there is a match,
1304  * otherwise NULL.
1305  */
1306 static const char *
1307 matchsym(const char *s, const char *t)
1308 {
1309 	while (*s != '\0' && *t != '\0')
1310 		if (*s != *t)
1311 			return (NULL);
1312 		else
1313 			++s, ++t;
1314 	if (*s == '\0' && endsym(*t))
1315 		return(t);
1316 	else
1317 		return(NULL);
1318 }
1319 
1320 /*
1321  * Look for the symbol in the symbol table. If it is found, we return
1322  * the symbol table index, else we return -1.
1323  */
1324 static int
1325 findsym(const char **strp)
1326 {
1327 	const char *str;
1328 	int symind;
1329 
1330 	str = *strp;
1331 	*strp = skipsym(str);
1332 	if (symlist) {
1333 		if (*strp == str)
1334 			return (-1);
1335 		if (symdepth && firstsym)
1336 			printf("%s%3d", zerosyms ? "" : "\n", depth);
1337 		firstsym = zerosyms = false;
1338 		printf("%s%.*s%s",
1339 		       symdepth ? " " : "",
1340 		       (int)(*strp-str), str,
1341 		       symdepth ? "" : "\n");
1342 		/* we don't care about the value of the symbol */
1343 		return (0);
1344 	}
1345 	for (symind = 0; symind < nsyms; ++symind) {
1346 		if (matchsym(symname[symind], str) != NULL) {
1347 			debugsym("findsym", symind);
1348 			return (symind);
1349 		}
1350 	}
1351 	return (-1);
1352 }
1353 
1354 /*
1355  * Resolve indirect symbol values to their final definitions.
1356  */
1357 static void
1358 indirectsym(void)
1359 {
1360 	const char *cp;
1361 	int changed, sym, ind;
1362 
1363 	do {
1364 		changed = 0;
1365 		for (sym = 0; sym < nsyms; ++sym) {
1366 			if (value[sym] == NULL)
1367 				continue;
1368 			cp = value[sym];
1369 			ind = findsym(&cp);
1370 			if (ind == -1 || ind == sym ||
1371 			    *cp != '\0' ||
1372 			    value[ind] == NULL ||
1373 			    value[ind] == value[sym])
1374 				continue;
1375 			debugsym("indir...", sym);
1376 			value[sym] = value[ind];
1377 			debugsym("...ectsym", sym);
1378 			changed++;
1379 		}
1380 	} while (changed);
1381 }
1382 
1383 /*
1384  * Add a symbol to the symbol table, specified with the format sym=val
1385  */
1386 static void
1387 addsym1(bool ignorethis, bool definethis, char *symval)
1388 {
1389 	const char *sym, *val;
1390 
1391 	sym = symval;
1392 	val = skipsym(sym);
1393 	if (definethis && *val == '=') {
1394 		symval[val - sym] = '\0';
1395 		val = val + 1;
1396 	} else if (*val == '\0') {
1397 		val = definethis ? "1" : NULL;
1398 	} else {
1399 		usage();
1400 	}
1401 	addsym2(ignorethis, sym, val);
1402 }
1403 
1404 /*
1405  * Add a symbol to the symbol table.
1406  */
1407 static void
1408 addsym2(bool ignorethis, const char *sym, const char *val)
1409 {
1410 	const char *cp = sym;
1411 	int symind;
1412 
1413 	symind = findsym(&cp);
1414 	if (symind < 0) {
1415 		if (nsyms >= MAXSYMS)
1416 			errx(2, "too many symbols");
1417 		symind = nsyms++;
1418 	}
1419 	ignore[symind] = ignorethis;
1420 	symname[symind] = sym;
1421 	value[symind] = val;
1422 	debugsym("addsym", symind);
1423 }
1424 
1425 static void
1426 debugsym(const char *why, int symind)
1427 {
1428 	debug("%s %s%c%s", why, symname[symind],
1429 	    value[symind] ? '=' : ' ',
1430 	    value[symind] ? value[symind] : "undef");
1431 }
1432 
1433 /*
1434  * Add symbols to the symbol table from a file containing
1435  * #define and #undef preprocessor directives.
1436  */
1437 static void
1438 defundefile(const char *fn)
1439 {
1440 	filename = fn;
1441 	input = fopen(fn, "rb");
1442 	if (input == NULL)
1443 		err(2, "can't open %s", fn);
1444 	linenum = 0;
1445 	while (defundef())
1446 		;
1447 	if (ferror(input))
1448 		err(2, "can't read %s", filename);
1449 	else
1450 		fclose(input);
1451 	if (incomment)
1452 		error("EOF in comment");
1453 }
1454 
1455 /*
1456  * Read and process one #define or #undef directive
1457  */
1458 static bool
1459 defundef(void)
1460 {
1461 	const char *cp, *kw, *sym, *val, *end;
1462 
1463 	cp = skiphash();
1464 	if (cp == NULL)
1465 		return (false);
1466 	if (*cp == '\0')
1467 		goto done;
1468 	/* strip trailing whitespace, and do a fairly rough check to
1469 	   avoid unsupported multi-line preprocessor directives */
1470 	end = cp + strlen(cp);
1471 	while (end > tline && strchr(" \t\n\r", end[-1]) != NULL)
1472 		--end;
1473 	if (end > tline && end[-1] == '\\')
1474 		Eioccc();
1475 
1476 	kw = cp;
1477 	if ((cp = matchsym("define", kw)) != NULL) {
1478 		sym = getsym(&cp);
1479 		if (sym == NULL)
1480 			error("missing macro name in #define");
1481 		if (*cp == '(') {
1482 			val = "1";
1483 		} else {
1484 			cp = skipcomment(cp);
1485 			val = (cp < end) ? xstrdup(cp, end) : "";
1486 		}
1487 		debug("#define");
1488 		addsym2(false, sym, val);
1489 	} else if ((cp = matchsym("undef", kw)) != NULL) {
1490 		sym = getsym(&cp);
1491 		if (sym == NULL)
1492 			error("missing macro name in #undef");
1493 		cp = skipcomment(cp);
1494 		debug("#undef");
1495 		addsym2(false, sym, NULL);
1496 	} else {
1497 		error("unrecognized preprocessor directive");
1498 	}
1499 	skipline(cp);
1500 done:
1501 	debug("parser line %d state %s comment %s line", linenum,
1502 	    comment_name[incomment], linestate_name[linestate]);
1503 	return (true);
1504 }
1505 
1506 /*
1507  * Concatenate two strings into new memory, checking for failure.
1508  */
1509 static char *
1510 astrcat(const char *s1, const char *s2)
1511 {
1512 	char *s;
1513 	int len;
1514 	size_t size;
1515 
1516 	len = snprintf(NULL, 0, "%s%s", s1, s2);
1517 	if (len < 0)
1518 		err(2, "snprintf");
1519 	size = (size_t)len + 1;
1520 	s = (char *)malloc(size);
1521 	if (s == NULL)
1522 		err(2, "malloc");
1523 	snprintf(s, size, "%s%s", s1, s2);
1524 	return (s);
1525 }
1526 
1527 /*
1528  * Duplicate a segment of a string, checking for failure.
1529  */
1530 static const char *
1531 xstrdup(const char *start, const char *end)
1532 {
1533 	size_t n;
1534 	char *s;
1535 
1536 	if (end < start) abort(); /* bug */
1537 	n = (size_t)(end - start) + 1;
1538 	s = malloc(n);
1539 	if (s == NULL)
1540 		err(2, "malloc");
1541 	snprintf(s, n, "%s", start);
1542 	return (s);
1543 }
1544 
1545 /*
1546  * Diagnostics.
1547  */
1548 static void
1549 debug(const char *msg, ...)
1550 {
1551 	va_list ap;
1552 
1553 	if (debugging) {
1554 		va_start(ap, msg);
1555 		vwarnx(msg, ap);
1556 		va_end(ap);
1557 	}
1558 }
1559 
1560 static void
1561 error(const char *msg)
1562 {
1563 	if (depth == 0)
1564 		warnx("%s: %d: %s", filename, linenum, msg);
1565 	else
1566 		warnx("%s: %d: %s (#if line %d depth %d)",
1567 		    filename, linenum, msg, stifline[depth], depth);
1568 	closeio();
1569 	errx(2, "output may be truncated");
1570 }
1571