xref: /netbsd-src/usr.bin/unifdef/unifdef.c (revision d48f14661dda8638fee055ba15d35bdfb29b9fa8)
1 /*	$NetBSD: unifdef.c,v 1.13 2006/04/30 23:56:42 christos Exp $	*/
2 
3 /*
4  * Copyright (c) 1985, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Dave Yost. It was rewritten to support ANSI C by Tony Finch.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * Copyright (c) 2002, 2003 Tony Finch <dot@dotat.at>
37  *
38  * This code is derived from software contributed to Berkeley by
39  * Dave Yost. It was rewritten to support ANSI C by Tony Finch.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  * 3. All advertising materials mentioning features or use of this software
50  *    must display the following acknowledgement:
51  *	This product includes software developed by the University of
52  *	California, Berkeley and its contributors.
53  * 4. Neither the name of the University nor the names of its contributors
54  *    may be used to endorse or promote products derived from this software
55  *    without specific prior written permission.
56  *
57  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
58  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
59  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
60  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
61  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
62  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
63  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
64  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
65  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
66  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
67  * SUCH DAMAGE.
68  */
69 
70 #include <sys/cdefs.h>
71 
72 #ifndef lint
73 #if 0
74 static const char copyright[] =
75 "@(#) Copyright (c) 1985, 1993\n\
76 	The Regents of the University of California.  All rights reserved.\n";
77 #endif
78 #ifdef __IDSTRING
79 __IDSTRING(Berkeley, "@(#)unifdef.c	8.1 (Berkeley) 6/6/93");
80 __IDSTRING(NetBSD, "$NetBSD: unifdef.c,v 1.13 2006/04/30 23:56:42 christos Exp $");
81 __IDSTRING(dotat, "$dotat: things/unifdef.c,v 1.161 2003/07/01 15:32:48 fanf2 Exp $");
82 #endif
83 #endif /* not lint */
84 #ifdef __FBSDID
85 __FBSDID("$FreeBSD: src/usr.bin/unifdef/unifdef.c,v 1.18 2003/07/01 15:30:43 fanf Exp $");
86 #endif
87 
88 /*
89  * unifdef - remove ifdef'ed lines
90  *
91  *  Wishlist:
92  *      provide an option which will append the name of the
93  *        appropriate symbol after #else's and #endif's
94  *      provide an option which will check symbols after
95  *        #else's and #endif's to see that they match their
96  *        corresponding #ifdef or #ifndef
97  *      generate #line directives in place of deleted code
98  *
99  *   The first two items above require better buffer handling, which would
100  *     also make it possible to handle all "dodgy" directives correctly.
101  */
102 
103 #include <ctype.h>
104 #include <err.h>
105 #include <stdarg.h>
106 #include <stdio.h>
107 #include <stdlib.h>
108 #include <string.h>
109 #include <unistd.h>
110 
111 #include "stdbool.h"
112 
113 /* types of input lines: */
114 typedef enum {
115 	LT_TRUEI,		/* a true #if with ignore flag */
116 	LT_FALSEI,		/* a false #if with ignore flag */
117 	LT_IF,			/* an unknown #if */
118 	LT_TRUE,		/* a true #if */
119 	LT_FALSE,		/* a false #if */
120 	LT_ELIF,		/* an unknown #elif */
121 	LT_ELTRUE,		/* a true #elif */
122 	LT_ELFALSE,		/* a false #elif */
123 	LT_ELSE,		/* #else */
124 	LT_ENDIF,		/* #endif */
125 	LT_DODGY,		/* flag: directive is not on one line */
126 	LT_DODGY_LAST = LT_DODGY + LT_ENDIF,
127 	LT_PLAIN,		/* ordinary line */
128 	LT_EOF,			/* end of file */
129 	LT_COUNT
130 } Linetype;
131 
132 static char const * const linetype_name[] = {
133 	"TRUEI", "FALSEI", "IF", "TRUE", "FALSE",
134 	"ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF",
135 	"DODGY TRUEI", "DODGY FALSEI",
136 	"DODGY IF", "DODGY TRUE", "DODGY FALSE",
137 	"DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE",
138 	"DODGY ELSE", "DODGY ENDIF",
139 	"PLAIN", "EOF"
140 };
141 
142 /* state of #if processing */
143 typedef enum {
144 	IS_OUTSIDE,
145 	IS_FALSE_PREFIX,	/* false #if followed by false #elifs */
146 	IS_TRUE_PREFIX,		/* first non-false #(el)if is true */
147 	IS_PASS_MIDDLE,		/* first non-false #(el)if is unknown */
148 	IS_FALSE_MIDDLE,	/* a false #elif after a pass state */
149 	IS_TRUE_MIDDLE,		/* a true #elif after a pass state */
150 	IS_PASS_ELSE,		/* an else after a pass state */
151 	IS_FALSE_ELSE,		/* an else after a true state */
152 	IS_TRUE_ELSE,		/* an else after only false states */
153 	IS_FALSE_TRAILER,	/* #elifs after a true are false */
154 	IS_COUNT
155 } Ifstate;
156 
157 static char const * const ifstate_name[] = {
158 	"OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX",
159 	"PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE",
160 	"PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE",
161 	"FALSE_TRAILER"
162 };
163 
164 /* state of comment parser */
165 typedef enum {
166 	NO_COMMENT = false,	/* outside a comment */
167 	C_COMMENT,		/* in a comment like this one */
168 	CXX_COMMENT,		/* between // and end of line */
169 	STARTING_COMMENT,	/* just after slash-backslash-newline */
170 	FINISHING_COMMENT	/* star-backslash-newline in a C comment */
171 } Comment_state;
172 
173 static char const * const comment_name[] = {
174 	"NO", "C", "CXX", "STARTING", "FINISHING"
175 };
176 
177 /* state of preprocessor line parser */
178 typedef enum {
179 	LS_START,		/* only space and comments on this line */
180 	LS_HASH,		/* only space, comments, and a hash */
181 	LS_DIRTY		/* this line can't be a preprocessor line */
182 } Line_state;
183 
184 static char const * const linestate_name[] = {
185 	"START", "HASH", "DIRTY"
186 };
187 
188 /*
189  * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1
190  */
191 #define	MAXDEPTH        64			/* maximum #if nesting */
192 #define	MAXLINE         4096			/* maximum length of line */
193 #define	MAXSYMS         4096			/* maximum number of symbols */
194 
195 /*
196  * Sometimes when editing a keyword the replacement text is longer, so
197  * we leave some space at the end of the tline buffer to accommodate this.
198  */
199 #define	EDITSLOP        10
200 
201 /*
202  * Globals.
203  */
204 
205 static bool             complement;		/* -c: do the complement */
206 static bool             debugging;		/* -d: debugging reports */
207 static bool             iocccok;		/* -e: fewer IOCCC errors */
208 static bool             killconsts;		/* -k: eval constant #ifs */
209 static bool             lnblank;		/* -l: blank deleted lines */
210 static bool             symlist;		/* -s: output symbol list */
211 static bool             text;			/* -t: this is a text file */
212 
213 static const char      *symname[MAXSYMS];	/* symbol name */
214 static const char      *value[MAXSYMS];		/* -Dsym=value */
215 static bool             ignore[MAXSYMS];	/* -iDsym or -iUsym */
216 static int              nsyms;			/* number of symbols */
217 
218 static FILE            *input;			/* input file pointer */
219 static const char      *filename;		/* input file name */
220 static int              linenum;		/* current line number */
221 
222 static char             tline[MAXLINE+EDITSLOP];/* input buffer plus space */
223 static char            *keyword;		/* used for editing #elif's */
224 
225 static Comment_state    incomment;		/* comment parser state */
226 static Line_state       linestate;		/* #if line parser state */
227 static Ifstate          ifstate[MAXDEPTH];	/* #if processor state */
228 static bool             ignoring[MAXDEPTH];	/* ignore comments state */
229 static int              stifline[MAXDEPTH];	/* start of current #if */
230 static int              depth;			/* current #if nesting */
231 static bool             keepthis;		/* don't delete constant #if */
232 
233 static int              exitstat;		/* program exit status */
234 
235 static void             addsym(bool, bool, char *);
236 static void             debug(const char *, ...);
237 static void             done(void);
238 static void             error(const char *);
239 static int              findsym(const char *);
240 static void             flushline(bool);
241 static Linetype         getline(void);
242 static Linetype         ifeval(const char **);
243 static void             ignoreoff(void);
244 static void             ignoreon(void);
245 static void             keywordedit(const char *);
246 static void             nest(void);
247 static void             process(void);
248 static const char      *skipcomment(const char *);
249 static const char      *skipsym(const char *);
250 static void             state(Ifstate);
251 static int              strlcmp(const char *, const char *, size_t);
252 static void             usage(void);
253 
254 #define endsym(c) (!isalpha((unsigned char)c) && !isdigit((unsigned char)c) && c != '_')
255 
256 /*
257  * The main program.
258  */
259 int
260 main(int argc, char *argv[])
261 {
262 	int opt;
263 
264 	while ((opt = getopt(argc, argv, "i:D:U:I:cdeklst")) != -1)
265 		switch (opt) {
266 		case 'i': /* treat stuff controlled by these symbols as text */
267 			/*
268 			 * For strict backwards-compatibility the U or D
269 			 * should be immediately after the -i but it doesn't
270 			 * matter much if we relax that requirement.
271 			 */
272 			opt = *optarg++;
273 			if (opt == 'D')
274 				addsym(true, true, optarg);
275 			else if (opt == 'U')
276 				addsym(true, false, optarg);
277 			else
278 				usage();
279 			break;
280 		case 'D': /* define a symbol */
281 			addsym(false, true, optarg);
282 			break;
283 		case 'U': /* undef a symbol */
284 			addsym(false, false, optarg);
285 			break;
286 		case 'I':
287 			/* no-op for compatibility with cpp */
288 			break;
289 		case 'c': /* treat -D as -U and vice versa */
290 			complement = true;
291 			break;
292 		case 'd':
293 			debugging = true;
294 			break;
295 		case 'e': /* fewer errors from dodgy lines */
296 			iocccok = true;
297 			break;
298 		case 'k': /* process constant #ifs */
299 			killconsts = true;
300 			break;
301 		case 'l': /* blank deleted lines instead of omitting them */
302 			lnblank = true;
303 			break;
304 		case 's': /* only output list of symbols that control #ifs */
305 			symlist = true;
306 			break;
307 		case 't': /* don't parse C comments */
308 			text = true;
309 			break;
310 		default:
311 			usage();
312 		}
313 	argc -= optind;
314 	argv += optind;
315 	if (nsyms == 0 && !symlist) {
316 		warnx("must -D or -U at least one symbol");
317 		usage();
318 	}
319 	if (argc > 1) {
320 		errx(2, "can only do one file");
321 	} else if (argc == 1 && strcmp(*argv, "-") != 0) {
322 		filename = *argv;
323 		input = fopen(filename, "r");
324 		if (input == NULL)
325 			err(2, "can't open %s", filename);
326 	} else {
327 		filename = "[stdin]";
328 		input = stdin;
329 	}
330 	process();
331 	abort(); /* bug */
332 }
333 
334 static void
335 usage(void)
336 {
337 	fprintf(stderr, "usage: unifdef [-cdeklst]"
338 	    " [-Dsym[=val]] [-Usym] [-iDsym[=val]] [-iUsym] ... [file]\n");
339 	exit(2);
340 }
341 
342 /*
343  * A state transition function alters the global #if processing state
344  * in a particular way. The table below is indexed by the current
345  * processing state and the type of the current line.
346  *
347  * Nesting is handled by keeping a stack of states; some transition
348  * functions increase or decrease the depth. They also maintain the
349  * ignore state on a stack. In some complicated cases they have to
350  * alter the preprocessor directive, as follows.
351  *
352  * When we have processed a group that starts off with a known-false
353  * #if/#elif sequence (which has therefore been deleted) followed by a
354  * #elif that we don't understand and therefore must keep, we edit the
355  * latter into a #if to keep the nesting correct.
356  *
357  * When we find a true #elif in a group, the following block will
358  * always be kept and the rest of the sequence after the next #elif or
359  * #else will be discarded. We edit the #elif into a #else and the
360  * following directive to #endif since this has the desired behaviour.
361  *
362  * "Dodgy" directives are split across multiple lines, the most common
363  * example being a multi-line comment hanging off the right of the
364  * directive. We can handle them correctly only if there is no change
365  * from printing to dropping (or vice versa) caused by that directive.
366  * If the directive is the first of a group we have a choice between
367  * failing with an error, or passing it through unchanged instead of
368  * evaluating it. The latter is not the default to avoid questions from
369  * users about unifdef unexpectedly leaving behind preprocessor directives.
370  */
371 typedef void state_fn(void);
372 
373 /* report an error */
374 static void Eelif (void) { error("Inappropriate #elif"); }
375 static void Eelse (void) { error("Inappropriate #else"); }
376 static void Eendif(void) { error("Inappropriate #endif"); }
377 static void Eeof  (void) { error("Premature EOF"); }
378 static void Eioccc(void) { error("Obfuscated preprocessor control line"); }
379 /* plain line handling */
380 static void print (void) { flushline(true); }
381 static void drop  (void) { flushline(false); }
382 /* output lacks group's start line */
383 static void Strue (void) { drop();  ignoreoff(); state(IS_TRUE_PREFIX); }
384 static void Sfalse(void) { drop();  ignoreoff(); state(IS_FALSE_PREFIX); }
385 static void Selse (void) { drop();               state(IS_TRUE_ELSE); }
386 /* print/pass this block */
387 static void Pelif (void) { print(); ignoreoff(); state(IS_PASS_MIDDLE); }
388 static void Pelse (void) { print();              state(IS_PASS_ELSE); }
389 static void Pendif(void) { print(); --depth; }
390 /* discard this block */
391 static void Dfalse(void) { drop();  ignoreoff(); state(IS_FALSE_TRAILER); }
392 static void Delif (void) { drop();  ignoreoff(); state(IS_FALSE_MIDDLE); }
393 static void Delse (void) { drop();               state(IS_FALSE_ELSE); }
394 static void Dendif(void) { drop();  --depth; }
395 /* first line of group */
396 static void Fdrop (void) { nest();  Dfalse(); }
397 static void Fpass (void) { nest();  Pelif(); }
398 static void Ftrue (void) { nest();  Strue(); }
399 static void Ffalse(void) { nest();  Sfalse(); }
400 /* variable pedantry for obfuscated lines */
401 static void Oiffy (void) { if (iocccok) Fpass(); else Eioccc(); ignoreon(); }
402 static void Oif   (void) { if (iocccok) Fpass(); else Eioccc(); }
403 static void Oelif (void) { if (iocccok) Pelif(); else Eioccc(); }
404 /* ignore comments in this block */
405 static void Idrop (void) { Fdrop();  ignoreon(); }
406 static void Itrue (void) { Ftrue();  ignoreon(); }
407 static void Ifalse(void) { Ffalse(); ignoreon(); }
408 /* edit this line */
409 static void Mpass (void) { strncpy(keyword, "if  ", 4); Pelif(); }
410 static void Mtrue (void) { keywordedit("else\n");  state(IS_TRUE_MIDDLE); }
411 static void Melif (void) { keywordedit("endif\n"); state(IS_FALSE_TRAILER); }
412 static void Melse (void) { keywordedit("endif\n"); state(IS_FALSE_ELSE); }
413 
414 static state_fn * const trans_table[IS_COUNT][LT_COUNT] = {
415 /* IS_OUTSIDE */
416 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif,
417   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Eendif,
418   print, done },
419 /* IS_FALSE_PREFIX */
420 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif,
421   Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc,
422   drop,  Eeof },
423 /* IS_TRUE_PREFIX */
424 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif,
425   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
426   print, Eeof },
427 /* IS_PASS_MIDDLE */
428 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif,
429   Oiffy, Oiffy, Fpass, Oif,   Oif,   Pelif, Oelif, Oelif, Pelse, Pendif,
430   print, Eeof },
431 /* IS_FALSE_MIDDLE */
432 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif,
433   Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
434   drop,  Eeof },
435 /* IS_TRUE_MIDDLE */
436 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif,
437   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eioccc,Eioccc,Eioccc,Eioccc,Pendif,
438   print, Eeof },
439 /* IS_PASS_ELSE */
440 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif,
441   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Pendif,
442   print, Eeof },
443 /* IS_FALSE_ELSE */
444 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif,
445   Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc,
446   drop,  Eeof },
447 /* IS_TRUE_ELSE */
448 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif,
449   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Eioccc,
450   print, Eeof },
451 /* IS_FALSE_TRAILER */
452 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif,
453   Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc,
454   drop,  Eeof }
455 /*TRUEI  FALSEI IF     TRUE   FALSE  ELIF   ELTRUE ELFALSE ELSE  ENDIF
456   TRUEI  FALSEI IF     TRUE   FALSE  ELIF   ELTRUE ELFALSE ELSE  ENDIF (DODGY)
457   PLAIN  EOF */
458 };
459 
460 /*
461  * State machine utility functions
462  */
463 static void
464 done(void)
465 {
466 	if (incomment)
467 		error("EOF in comment");
468 	exit(exitstat);
469 }
470 static void
471 ignoreoff(void)
472 {
473 	ignoring[depth] = ignoring[depth-1];
474 }
475 static void
476 ignoreon(void)
477 {
478 	ignoring[depth] = true;
479 }
480 static void
481 keywordedit(const char *replacement)
482 {
483 	strlcpy(keyword, replacement, tline + sizeof(tline) - keyword);
484 	print();
485 }
486 static void
487 nest(void)
488 {
489 	depth += 1;
490 	if (depth >= MAXDEPTH)
491 		error("Too many levels of nesting");
492 	stifline[depth] = linenum;
493 }
494 static void
495 state(Ifstate is)
496 {
497 	ifstate[depth] = is;
498 }
499 
500 /*
501  * Write a line to the output or not, according to command line options.
502  */
503 static void
504 flushline(bool keep)
505 {
506 	if (symlist)
507 		return;
508 	if (keep ^ complement)
509 		fputs(tline, stdout);
510 	else {
511 		if (lnblank)
512 			putc('\n', stdout);
513 		exitstat = 1;
514 	}
515 }
516 
517 /*
518  * The driver for the state machine.
519  */
520 static void
521 process(void)
522 {
523 	Linetype lineval;
524 
525 	for (;;) {
526 		linenum++;
527 		lineval = getline();
528 		trans_table[ifstate[depth]][lineval]();
529 		debug("process %s -> %s depth %d",
530 		    linetype_name[lineval],
531 		    ifstate_name[ifstate[depth]], depth);
532 	}
533 }
534 
535 /*
536  * Parse a line and determine its type. We keep the preprocessor line
537  * parser state between calls in the global variable linestate, with
538  * help from skipcomment().
539  */
540 static Linetype
541 getline(void)
542 {
543 	const char *cp;
544 	int cursym;
545 	int kwlen;
546 	Linetype retval;
547 	Comment_state wascomment;
548 
549 	if (fgets(tline, MAXLINE, input) == NULL)
550 		return (LT_EOF);
551 	retval = LT_PLAIN;
552 	wascomment = incomment;
553 	cp = skipcomment(tline);
554 	if (linestate == LS_START) {
555 		if (*cp == '#') {
556 			linestate = LS_HASH;
557 			cp = skipcomment(cp + 1);
558 		} else if (*cp != '\0')
559 			linestate = LS_DIRTY;
560 	}
561 	if (!incomment && linestate == LS_HASH) {
562 		keyword = tline + (cp - tline);
563 		cp = skipsym(cp);
564 		kwlen = cp - keyword;
565 		/* no way can we deal with a continuation inside a keyword */
566 		if (strncmp(cp, "\\\n", 2) == 0)
567 			Eioccc();
568 		if (strlcmp("ifdef", keyword, kwlen) == 0 ||
569 		    strlcmp("ifndef", keyword, kwlen) == 0) {
570 			cp = skipcomment(cp);
571 			if ((cursym = findsym(cp)) < 0)
572 				retval = LT_IF;
573 			else {
574 				retval = (keyword[2] == 'n')
575 				    ? LT_FALSE : LT_TRUE;
576 				if (value[cursym] == NULL)
577 					retval = (retval == LT_TRUE)
578 					    ? LT_FALSE : LT_TRUE;
579 				if (ignore[cursym])
580 					retval = (retval == LT_TRUE)
581 					    ? LT_TRUEI : LT_FALSEI;
582 			}
583 			cp = skipsym(cp);
584 		} else if (strlcmp("if", keyword, kwlen) == 0)
585 			retval = ifeval(&cp);
586 		else if (strlcmp("elif", keyword, kwlen) == 0)
587 			retval = ifeval(&cp) - LT_IF + LT_ELIF;
588 		else if (strlcmp("else", keyword, kwlen) == 0)
589 			retval = LT_ELSE;
590 		else if (strlcmp("endif", keyword, kwlen) == 0)
591 			retval = LT_ENDIF;
592 		else {
593 			linestate = LS_DIRTY;
594 			retval = LT_PLAIN;
595 		}
596 		cp = skipcomment(cp);
597 		if (*cp != '\0') {
598 			linestate = LS_DIRTY;
599 			if (retval == LT_TRUE || retval == LT_FALSE ||
600 			    retval == LT_TRUEI || retval == LT_FALSEI)
601 				retval = LT_IF;
602 			if (retval == LT_ELTRUE || retval == LT_ELFALSE)
603 				retval = LT_ELIF;
604 		}
605 		if (retval != LT_PLAIN && (wascomment || incomment)) {
606 			retval += LT_DODGY;
607 			if (incomment)
608 				linestate = LS_DIRTY;
609 		}
610 		/* skipcomment should have changed the state */
611 		if (linestate == LS_HASH)
612 			abort(); /* bug */
613 	}
614 	if (linestate == LS_DIRTY) {
615 		while (*cp != '\0')
616 			cp = skipcomment(cp + 1);
617 	}
618 	debug("parser %s comment %s line",
619 	    comment_name[incomment], linestate_name[linestate]);
620 	return (retval);
621 }
622 
623 /*
624  * These are the binary operators that are supported by the expression
625  * evaluator. Note that if support for division is added then we also
626  * need short-circuiting booleans because of divide-by-zero.
627  */
628 static int op_lt(int a, int b) { return (a < b); }
629 static int op_gt(int a, int b) { return (a > b); }
630 static int op_le(int a, int b) { return (a <= b); }
631 static int op_ge(int a, int b) { return (a >= b); }
632 static int op_eq(int a, int b) { return (a == b); }
633 static int op_ne(int a, int b) { return (a != b); }
634 static int op_or(int a, int b) { return (a || b); }
635 static int op_and(int a, int b) { return (a && b); }
636 
637 /*
638  * An evaluation function takes three arguments, as follows: (1) a pointer to
639  * an element of the precedence table which lists the operators at the current
640  * level of precedence; (2) a pointer to an integer which will receive the
641  * value of the expression; and (3) a pointer to a char* that points to the
642  * expression to be evaluated and that is updated to the end of the expression
643  * when evaluation is complete. The function returns LT_FALSE if the value of
644  * the expression is zero, LT_TRUE if it is non-zero, or LT_IF if the
645  * expression could not be evaluated.
646  */
647 struct ops;
648 
649 typedef Linetype eval_fn(const struct ops *, int *, const char **);
650 
651 static eval_fn eval_table, eval_unary;
652 
653 /*
654  * The precedence table. Expressions involving binary operators are evaluated
655  * in a table-driven way by eval_table. When it evaluates a subexpression it
656  * calls the inner function with its first argument pointing to the next
657  * element of the table. Innermost expressions have special non-table-driven
658  * handling.
659  */
660 static const struct ops {
661 	eval_fn *inner;
662 	struct op {
663 		const char *str;
664 		int (*fn)(int, int);
665 	} op[5];
666 } eval_ops[] = {
667 	{ eval_table, { { "||", op_or } } },
668 	{ eval_table, { { "&&", op_and } } },
669 	{ eval_table, { { "==", op_eq },
670 			{ "!=", op_ne } } },
671 	{ eval_unary, { { "<=", op_le },
672 			{ ">=", op_ge },
673 			{ "<", op_lt },
674 			{ ">", op_gt } } }
675 };
676 
677 /*
678  * Function for evaluating the innermost parts of expressions,
679  * viz. !expr (expr) defined(symbol) symbol number
680  * We reset the keepthis flag when we find a non-constant subexpression.
681  */
682 static Linetype
683 eval_unary(const struct ops *ops, int *valp, const char **cpp)
684 {
685 	const char *cp;
686 	char *ep;
687 	int sym;
688 
689 	cp = skipcomment(*cpp);
690 	if (*cp == '!') {
691 		debug("eval%d !", ops - eval_ops);
692 		cp++;
693 		if (eval_unary(ops, valp, &cp) == LT_IF)
694 			return (LT_IF);
695 		*valp = !*valp;
696 	} else if (*cp == '(') {
697 		cp++;
698 		debug("eval%d (", ops - eval_ops);
699 		if (eval_table(eval_ops, valp, &cp) == LT_IF)
700 			return (LT_IF);
701 		cp = skipcomment(cp);
702 		if (*cp++ != ')')
703 			return (LT_IF);
704 	} else if (isdigit((unsigned char)*cp)) {
705 		debug("eval%d number", ops - eval_ops);
706 		*valp = strtol(cp, &ep, 0);
707 		cp = skipsym(cp);
708 	} else if (strncmp(cp, "defined", 7) == 0 && endsym(cp[7])) {
709 		cp = skipcomment(cp+7);
710 		debug("eval%d defined", ops - eval_ops);
711 		if (*cp++ != '(')
712 			return (LT_IF);
713 		cp = skipcomment(cp);
714 		sym = findsym(cp);
715 		if (sym < 0 || !symlist)
716 			return (LT_IF);
717 		*valp = (value[sym] != NULL);
718 		cp = skipsym(cp);
719 		cp = skipcomment(cp);
720 		if (*cp++ != ')')
721 			return (LT_IF);
722 		keepthis = false;
723 	} else if (!endsym(*cp)) {
724 		debug("eval%d symbol", ops - eval_ops);
725 		sym = findsym(cp);
726 		if (sym < 0 || !symlist)
727 			return (LT_IF);
728 		if (value[sym] == NULL)
729 			*valp = 0;
730 		else {
731 			*valp = strtol(value[sym], &ep, 0);
732 			if (*ep != '\0' || ep == value[sym])
733 				return (LT_IF);
734 		}
735 		cp = skipsym(cp);
736 		keepthis = false;
737 	} else {
738 		debug("eval%d bad expr", ops - eval_ops);
739 		return (LT_IF);
740 	}
741 
742 	*cpp = cp;
743 	debug("eval%d = %d", ops - eval_ops, *valp);
744 	return (*valp ? LT_TRUE : LT_FALSE);
745 }
746 
747 /*
748  * Table-driven evaluation of binary operators.
749  */
750 static Linetype
751 eval_table(const struct ops *ops, int *valp, const char **cpp)
752 {
753 	const struct op *op;
754 	const char *cp;
755 	int val;
756 
757 	debug("eval%d", ops - eval_ops);
758 	cp = *cpp;
759 	if (ops->inner(ops+1, valp, &cp) == LT_IF)
760 		return (LT_IF);
761 	for (;;) {
762 		cp = skipcomment(cp);
763 		for (op = ops->op; op->str != NULL; op++)
764 			if (strncmp(cp, op->str, strlen(op->str)) == 0)
765 				break;
766 		if (op->str == NULL)
767 			break;
768 		cp += strlen(op->str);
769 		debug("eval%d %s", ops - eval_ops, op->str);
770 		if (ops->inner(ops+1, &val, &cp) == LT_IF)
771 			return (LT_IF);
772 		*valp = op->fn(*valp, val);
773 	}
774 
775 	*cpp = cp;
776 	debug("eval%d = %d", ops - eval_ops, *valp);
777 	return (*valp ? LT_TRUE : LT_FALSE);
778 }
779 
780 /*
781  * Evaluate the expression on a #if or #elif line. If we can work out
782  * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we
783  * return just a generic LT_IF.
784  */
785 static Linetype
786 ifeval(const char **cpp)
787 {
788 	int ret;
789 	int val;
790 
791 	debug("eval %s", *cpp);
792 	keepthis = killconsts ? false : true;
793 	ret = eval_table(eval_ops, &val, cpp);
794 	debug("eval = %d", val);
795 	return (keepthis ? LT_IF : ret);
796 }
797 
798 /*
799  * Skip over comments and stop at the next character position that is
800  * not whitespace. Between calls we keep the comment state in the
801  * global variable incomment, and we also adjust the global variable
802  * linestate when we see a newline.
803  * XXX: doesn't cope with the buffer splitting inside a state transition.
804  */
805 static const char *
806 skipcomment(const char *cp)
807 {
808 	if (text || ignoring[depth]) {
809 		for (; isspace((unsigned char)*cp); cp++)
810 			if (*cp == '\n')
811 				linestate = LS_START;
812 		return (cp);
813 	}
814 	while (*cp != '\0')
815 		/* don't reset to LS_START after a line continuation */
816 		if (strncmp(cp, "\\\n", 2) == 0)
817 			cp += 2;
818 		else switch (incomment) {
819 		case NO_COMMENT:
820 			if (strncmp(cp, "/\\\n", 3) == 0) {
821 				incomment = STARTING_COMMENT;
822 				cp += 3;
823 			} else if (strncmp(cp, "/*", 2) == 0) {
824 				incomment = C_COMMENT;
825 				cp += 2;
826 			} else if (strncmp(cp, "//", 2) == 0) {
827 				incomment = CXX_COMMENT;
828 				cp += 2;
829 			} else if (strncmp(cp, "\n", 1) == 0) {
830 				linestate = LS_START;
831 				cp += 1;
832 			} else if (strchr(" \t", *cp) != NULL) {
833 				cp += 1;
834 			} else
835 				return (cp);
836 			continue;
837 		case CXX_COMMENT:
838 			if (strncmp(cp, "\n", 1) == 0) {
839 				incomment = NO_COMMENT;
840 				linestate = LS_START;
841 			}
842 			cp += 1;
843 			continue;
844 		case C_COMMENT:
845 			if (strncmp(cp, "*\\\n", 3) == 0) {
846 				incomment = FINISHING_COMMENT;
847 				cp += 3;
848 			} else if (strncmp(cp, "*/", 2) == 0) {
849 				incomment = NO_COMMENT;
850 				cp += 2;
851 			} else
852 				cp += 1;
853 			continue;
854 		case STARTING_COMMENT:
855 			if (*cp == '*') {
856 				incomment = C_COMMENT;
857 				cp += 1;
858 			} else if (*cp == '/') {
859 				incomment = CXX_COMMENT;
860 				cp += 1;
861 			} else {
862 				incomment = NO_COMMENT;
863 				linestate = LS_DIRTY;
864 			}
865 			continue;
866 		case FINISHING_COMMENT:
867 			if (*cp == '/') {
868 				incomment = NO_COMMENT;
869 				cp += 1;
870 			} else
871 				incomment = C_COMMENT;
872 			continue;
873 		default:
874 			abort(); /* bug */
875 		}
876 	return (cp);
877 }
878 
879 /*
880  * Skip over an identifier.
881  */
882 static const char *
883 skipsym(const char *cp)
884 {
885 	while (!endsym(*cp))
886 		++cp;
887 	return (cp);
888 }
889 
890 /*
891  * Look for the symbol in the symbol table. If is is found, we return
892  * the symbol table index, else we return -1.
893  */
894 static int
895 findsym(const char *str)
896 {
897 	const char *cp;
898 	int symind;
899 
900 	cp = skipsym(str);
901 	if (cp == str)
902 		return (-1);
903 	if (symlist)
904 		printf("%.*s\n", (int)(cp-str), str);
905 	for (symind = 0; symind < nsyms; ++symind) {
906 		if (strlcmp(symname[symind], str, cp-str) == 0) {
907 			debug("findsym %s %s", symname[symind],
908 			    value[symind] ? value[symind] : "");
909 			return (symind);
910 		}
911 	}
912 	return (-1);
913 }
914 
915 /*
916  * Add a symbol to the symbol table.
917  */
918 static void
919 addsym(bool ignorethis, bool definethis, char *sym)
920 {
921 	int symind;
922 	char *val;
923 
924 	symind = findsym(sym);
925 	if (symind < 0) {
926 		if (nsyms >= MAXSYMS)
927 			errx(2, "too many symbols");
928 		symind = nsyms++;
929 	}
930 	symname[symind] = sym;
931 	ignore[symind] = ignorethis;
932 	val = sym + (skipsym(sym) - sym);
933 	if (definethis) {
934 		if (*val == '=') {
935 			value[symind] = val+1;
936 			*val = '\0';
937 		} else if (*val == '\0')
938 			value[symind] = "";
939 		else
940 			usage();
941 	} else {
942 		if (*val != '\0')
943 			usage();
944 		value[symind] = NULL;
945 	}
946 }
947 
948 /*
949  * Compare s with n characters of t.
950  * The same as strncmp() except that it checks that s[n] == '\0'.
951  */
952 static int
953 strlcmp(const char *s, const char *t, size_t n)
954 {
955 	while (n-- && *t != '\0')
956 		if (*s != *t)
957 			return ((unsigned char)*s - (unsigned char)*t);
958 		else
959 			++s, ++t;
960 	return ((unsigned char)*s);
961 }
962 
963 /*
964  * Diagnostics.
965  */
966 static void
967 debug(const char *msg, ...)
968 {
969 	va_list ap;
970 
971 	if (debugging) {
972 		va_start(ap, msg);
973 		vwarnx(msg, ap);
974 		va_end(ap);
975 	}
976 }
977 
978 static void
979 error(const char *msg)
980 {
981 	if (depth == 0)
982 		warnx("%s: %d: %s", filename, linenum, msg);
983 	else
984 		warnx("%s: %d: %s (#if line %d depth %d)",
985 		    filename, linenum, msg, stifline[depth], depth);
986 	errx(2, "output may be truncated");
987 }
988