xref: /netbsd-src/usr.bin/unifdef/unifdef.c (revision 83d4b28a8e34d47cdc13d59fe5c6789e86a4da5c)
1 /*	$NetBSD: unifdef.c,v 1.22 2012/10/13 18:26:03 christos Exp $	*/
2 
3 /*
4  * Copyright (c) 1985, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Dave Yost. It was rewritten to support ANSI C by Tony Finch.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * Copyright (c) 2002, 2003 Tony Finch <dot@dotat.at>
37  *
38  * This code is derived from software contributed to Berkeley by
39  * Dave Yost. It was rewritten to support ANSI C by Tony Finch.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  * 3. All advertising materials mentioning features or use of this software
50  *    must display the following acknowledgement:
51  *	This product includes software developed by the University of
52  *	California, Berkeley and its contributors.
53  * 4. Neither the name of the University nor the names of its contributors
54  *    may be used to endorse or promote products derived from this software
55  *    without specific prior written permission.
56  *
57  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
58  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
59  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
60  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
61  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
62  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
63  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
64  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
65  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
66  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
67  * SUCH DAMAGE.
68  */
69 
70 #include <sys/cdefs.h>
71 
72 #ifndef lint
73 #if 0
74 static const char copyright[] =
75 "@(#) Copyright (c) 1985, 1993\n\
76 	The Regents of the University of California.  All rights reserved.\n";
77 #endif
78 #ifdef __IDSTRING
79 __IDSTRING(Berkeley, "@(#)unifdef.c	8.1 (Berkeley) 6/6/93");
80 __IDSTRING(NetBSD, "$NetBSD: unifdef.c,v 1.22 2012/10/13 18:26:03 christos Exp $");
81 __IDSTRING(dotat, "$dotat: things/unifdef.c,v 1.161 2003/07/01 15:32:48 fanf2 Exp $");
82 #endif
83 #endif /* not lint */
84 #ifdef __FBSDID
85 __FBSDID("$FreeBSD: src/usr.bin/unifdef/unifdef.c,v 1.18 2003/07/01 15:30:43 fanf Exp $");
86 #endif
87 
88 /*
89  * unifdef - remove ifdef'ed lines
90  *
91  *  Wishlist:
92  *      provide an option which will append the name of the
93  *        appropriate symbol after #else's and #endif's
94  *      provide an option which will check symbols after
95  *        #else's and #endif's to see that they match their
96  *        corresponding #ifdef or #ifndef
97  *      generate #line directives in place of deleted code
98  *
99  *   The first two items above require better buffer handling, which would
100  *     also make it possible to handle all "dodgy" directives correctly.
101  */
102 
103 #include <ctype.h>
104 #include <err.h>
105 #include <libgen.h>
106 #include <stdarg.h>
107 #include <stdio.h>
108 #include <stdlib.h>
109 #include <string.h>
110 #include <unistd.h>
111 
112 #include <sys/param.h>
113 #include <sys/stat.h>
114 
115 #include "stdbool.h"
116 
117 /* types of input lines: */
118 typedef enum {
119 	LT_TRUEI,		/* a true #if with ignore flag */
120 	LT_FALSEI,		/* a false #if with ignore flag */
121 	LT_IF,			/* an unknown #if */
122 	LT_TRUE,		/* a true #if */
123 	LT_FALSE,		/* a false #if */
124 	LT_ELIF,		/* an unknown #elif */
125 	LT_ELTRUE,		/* a true #elif */
126 	LT_ELFALSE,		/* a false #elif */
127 	LT_ELSE,		/* #else */
128 	LT_ENDIF,		/* #endif */
129 	LT_DODGY,		/* flag: directive is not on one line */
130 	LT_DODGY_LAST = LT_DODGY + LT_ENDIF,
131 	LT_PLAIN,		/* ordinary line */
132 	LT_EOF,			/* end of file */
133 	LT_COUNT
134 } Linetype;
135 
136 static char const * const linetype_name[] = {
137 	"TRUEI", "FALSEI", "IF", "TRUE", "FALSE",
138 	"ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF",
139 	"DODGY TRUEI", "DODGY FALSEI",
140 	"DODGY IF", "DODGY TRUE", "DODGY FALSE",
141 	"DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE",
142 	"DODGY ELSE", "DODGY ENDIF",
143 	"PLAIN", "EOF"
144 };
145 
146 /* state of #if processing */
147 typedef enum {
148 	IS_OUTSIDE,
149 	IS_FALSE_PREFIX,	/* false #if followed by false #elifs */
150 	IS_TRUE_PREFIX,		/* first non-false #(el)if is true */
151 	IS_PASS_MIDDLE,		/* first non-false #(el)if is unknown */
152 	IS_FALSE_MIDDLE,	/* a false #elif after a pass state */
153 	IS_TRUE_MIDDLE,		/* a true #elif after a pass state */
154 	IS_PASS_ELSE,		/* an else after a pass state */
155 	IS_FALSE_ELSE,		/* an else after a true state */
156 	IS_TRUE_ELSE,		/* an else after only false states */
157 	IS_FALSE_TRAILER,	/* #elifs after a true are false */
158 	IS_COUNT
159 } Ifstate;
160 
161 static char const * const ifstate_name[] = {
162 	"OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX",
163 	"PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE",
164 	"PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE",
165 	"FALSE_TRAILER"
166 };
167 
168 /* state of comment parser */
169 typedef enum {
170 	NO_COMMENT = false,	/* outside a comment */
171 	C_COMMENT,		/* in a comment like this one */
172 	CXX_COMMENT,		/* between // and end of line */
173 	STARTING_COMMENT,	/* just after slash-backslash-newline */
174 	FINISHING_COMMENT	/* star-backslash-newline in a C comment */
175 } Comment_state;
176 
177 static char const * const comment_name[] = {
178 	"NO", "C", "CXX", "STARTING", "FINISHING"
179 };
180 
181 /* state of preprocessor line parser */
182 typedef enum {
183 	LS_START,		/* only space and comments on this line */
184 	LS_HASH,		/* only space, comments, and a hash */
185 	LS_DIRTY		/* this line can't be a preprocessor line */
186 } Line_state;
187 
188 static char const * const linestate_name[] = {
189 	"START", "HASH", "DIRTY"
190 };
191 
192 /*
193  * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1
194  */
195 #define	MAXDEPTH        64			/* maximum #if nesting */
196 #define	MAXLINE         4096			/* maximum length of line */
197 #define	MAXSYMS         4096			/* maximum number of symbols */
198 
199 /*
200  * Sometimes when editing a keyword the replacement text is longer, so
201  * we leave some space at the end of the tline buffer to accommodate this.
202  */
203 #define	EDITSLOP        10
204 
205 /*
206  * Globals.
207  */
208 
209 static bool             complement;		/* -c: do the complement */
210 static bool             debugging;		/* -d: debugging reports */
211 static bool             iocccok;		/* -e: fewer IOCCC errors */
212 static bool             killconsts;		/* -k: eval constant #ifs */
213 static bool             lnblank;		/* -l: blank deleted lines */
214 static bool             symlist;		/* -s: output symbol list */
215 static bool             text;			/* -t: this is a text file */
216 
217 static const char      *symname[MAXSYMS];	/* symbol name */
218 static const char      *value[MAXSYMS];		/* -Dsym=value */
219 static bool             ignore[MAXSYMS];	/* -iDsym or -iUsym */
220 static int              nsyms;			/* number of symbols */
221 
222 static FILE            *input;			/* input file pointer */
223 static FILE            *output;			/* output file pointer */
224 static const char      *filename;		/* input file name */
225 static char            *ofilename;		/* output file name */
226 static char             tmpname[MAXPATHLEN];	/* used when overwriting */
227 static int              linenum;		/* current line number */
228 static int              overwriting;		/* output overwrites input */
229 
230 static char             tline[MAXLINE+EDITSLOP];/* input buffer plus space */
231 static char            *keyword;		/* used for editing #elif's */
232 
233 static Comment_state    incomment;		/* comment parser state */
234 static Line_state       linestate;		/* #if line parser state */
235 static Ifstate          ifstate[MAXDEPTH];	/* #if processor state */
236 static bool             ignoring[MAXDEPTH];	/* ignore comments state */
237 static int              stifline[MAXDEPTH];	/* start of current #if */
238 static int              depth;			/* current #if nesting */
239 static bool             keepthis;		/* don't delete constant #if */
240 
241 static int              exitstat;		/* program exit status */
242 
243 static void             addsym(bool, bool, char *);
244 static void             debug(const char *, ...) __printflike(1, 2);
245 __dead static void      done(void);
246 __dead static void      error(const char *);
247 static int              findsym(const char *);
248 static void             flushline(bool);
249 static Linetype         get_line(void);
250 static Linetype         ifeval(const char **);
251 static void             ignoreoff(void);
252 static void             ignoreon(void);
253 static void             keywordedit(const char *);
254 static void             nest(void);
255 __dead static void      process(void);
256 static const char      *skipcomment(const char *);
257 static const char      *skipsym(const char *);
258 static void             state(Ifstate);
259 static int              strlcmp(const char *, const char *, size_t);
260 __dead static void      usage(void);
261 
262 #define endsym(c) (!isalpha((unsigned char)c) && !isdigit((unsigned char)c) && c != '_')
263 
264 /*
265  * The main program.
266  */
267 int
main(int argc,char * argv[])268 main(int argc, char *argv[])
269 {
270 	int opt;
271 	struct stat isb, osb;
272 
273 	while ((opt = getopt(argc, argv, "i:D:U:I:o:cdeklst")) != -1)
274 		switch (opt) {
275 		case 'i': /* treat stuff controlled by these symbols as text */
276 			/*
277 			 * For strict backwards-compatibility the U or D
278 			 * should be immediately after the -i but it doesn't
279 			 * matter much if we relax that requirement.
280 			 */
281 			opt = *optarg++;
282 			if (opt == 'D')
283 				addsym(true, true, optarg);
284 			else if (opt == 'U')
285 				addsym(true, false, optarg);
286 			else
287 				usage();
288 			break;
289 		case 'D': /* define a symbol */
290 			addsym(false, true, optarg);
291 			break;
292 		case 'U': /* undef a symbol */
293 			addsym(false, false, optarg);
294 			break;
295 		case 'I':
296 			/* no-op for compatibility with cpp */
297 			break;
298 		case 'c': /* treat -D as -U and vice versa */
299 			complement = true;
300 			break;
301 		case 'd':
302 			debugging = true;
303 			break;
304 		case 'e': /* fewer errors from dodgy lines */
305 			iocccok = true;
306 			break;
307 		case 'k': /* process constant #ifs */
308 			killconsts = true;
309 			break;
310 		case 'l': /* blank deleted lines instead of omitting them */
311 			lnblank = true;
312 			break;
313 		case 'o': /* output to a file */
314 			ofilename = optarg;
315 			break;
316 		case 's': /* only output list of symbols that control #ifs */
317 			symlist = true;
318 			break;
319 		case 't': /* don't parse C comments */
320 			text = true;
321 			break;
322 		default:
323 			usage();
324 		}
325 	argc -= optind;
326 	argv += optind;
327 	if (nsyms == 0 && !symlist) {
328 		warnx("must -D or -U at least one symbol");
329 		usage();
330 	}
331 	if (argc > 1) {
332 		errx(2, "can only do one file");
333 	} else if (argc == 1 && strcmp(*argv, "-") != 0) {
334 		filename = *argv;
335 		input = fopen(filename, "r");
336 		if (input == NULL)
337 			err(2, "can't open %s", filename);
338 	} else {
339 		filename = "[stdin]";
340 		input = stdin;
341 	}
342 	if (ofilename == NULL) {
343 		output = stdout;
344 	} else {
345 		if (stat(ofilename, &osb) == 0) {
346 			if (fstat(fileno(input), &isb) != 0)
347 				err(2, "can't fstat %s", filename);
348 
349 			overwriting = (osb.st_dev == isb.st_dev &&
350 			    osb.st_ino == isb.st_ino);
351 		}
352 		if (overwriting) {
353 			int ofd;
354 
355 			snprintf(tmpname, sizeof(tmpname), "%s/unifdef.XXXXXX",
356 				 dirname(ofilename));
357 			if ((ofd = mkstemp(tmpname)) != -1)
358 				output = fdopen(ofd, "w+");
359 			if (output == NULL)
360 				err(2, "can't create temporary file");
361 			fchmod(ofd, isb.st_mode & ACCESSPERMS);
362 		} else {
363 			output = fopen(ofilename, "w");
364 			if (output == NULL)
365 				err(2, "can't open %s", ofilename);
366 		}
367 	}
368 	process();
369 	abort(); /* bug */
370 }
371 
372 static void
usage(void)373 usage(void)
374 {
375 	fprintf(stderr, "usage: unifdef [-cdeklst] [-o output]"
376 	    " [-Dsym[=val]] [-Usym] [-iDsym[=val]] [-iUsym] ... [file]\n");
377 	exit(2);
378 }
379 
380 /*
381  * A state transition function alters the global #if processing state
382  * in a particular way. The table below is indexed by the current
383  * processing state and the type of the current line.
384  *
385  * Nesting is handled by keeping a stack of states; some transition
386  * functions increase or decrease the depth. They also maintain the
387  * ignore state on a stack. In some complicated cases they have to
388  * alter the preprocessor directive, as follows.
389  *
390  * When we have processed a group that starts off with a known-false
391  * #if/#elif sequence (which has therefore been deleted) followed by a
392  * #elif that we don't understand and therefore must keep, we edit the
393  * latter into a #if to keep the nesting correct.
394  *
395  * When we find a true #elif in a group, the following block will
396  * always be kept and the rest of the sequence after the next #elif or
397  * #else will be discarded. We edit the #elif into a #else and the
398  * following directive to #endif since this has the desired behaviour.
399  *
400  * "Dodgy" directives are split across multiple lines, the most common
401  * example being a multi-line comment hanging off the right of the
402  * directive. We can handle them correctly only if there is no change
403  * from printing to dropping (or vice versa) caused by that directive.
404  * If the directive is the first of a group we have a choice between
405  * failing with an error, or passing it through unchanged instead of
406  * evaluating it. The latter is not the default to avoid questions from
407  * users about unifdef unexpectedly leaving behind preprocessor directives.
408  */
409 typedef void state_fn(void);
410 
411 /* report an error */
Eelif(void)412 __dead static void Eelif (void) { error("Inappropriate #elif"); }
Eelse(void)413 __dead static void Eelse (void) { error("Inappropriate #else"); }
Eendif(void)414 __dead static void Eendif(void) { error("Inappropriate #endif"); }
Eeof(void)415 __dead static void Eeof  (void) { error("Premature EOF"); }
Eioccc(void)416 __dead static void Eioccc(void) { error("Obfuscated preprocessor control line"); }
417 /* plain line handling */
print(void)418 static void print (void) { flushline(true); }
drop(void)419 static void drop  (void) { flushline(false); }
420 /* output lacks group's start line */
Strue(void)421 static void Strue (void) { drop();  ignoreoff(); state(IS_TRUE_PREFIX); }
Sfalse(void)422 static void Sfalse(void) { drop();  ignoreoff(); state(IS_FALSE_PREFIX); }
Selse(void)423 static void Selse (void) { drop();               state(IS_TRUE_ELSE); }
424 /* print/pass this block */
Pelif(void)425 static void Pelif (void) { print(); ignoreoff(); state(IS_PASS_MIDDLE); }
Pelse(void)426 static void Pelse (void) { print();              state(IS_PASS_ELSE); }
Pendif(void)427 static void Pendif(void) { print(); --depth; }
428 /* discard this block */
Dfalse(void)429 static void Dfalse(void) { drop();  ignoreoff(); state(IS_FALSE_TRAILER); }
Delif(void)430 static void Delif (void) { drop();  ignoreoff(); state(IS_FALSE_MIDDLE); }
Delse(void)431 static void Delse (void) { drop();               state(IS_FALSE_ELSE); }
Dendif(void)432 static void Dendif(void) { drop();  --depth; }
433 /* first line of group */
Fdrop(void)434 static void Fdrop (void) { nest();  Dfalse(); }
Fpass(void)435 static void Fpass (void) { nest();  Pelif(); }
Ftrue(void)436 static void Ftrue (void) { nest();  Strue(); }
Ffalse(void)437 static void Ffalse(void) { nest();  Sfalse(); }
438 /* variable pedantry for obfuscated lines */
Oiffy(void)439 static void Oiffy (void) { if (iocccok) Fpass(); else Eioccc(); ignoreon(); }
Oif(void)440 static void Oif   (void) { if (iocccok) Fpass(); else Eioccc(); }
Oelif(void)441 static void Oelif (void) { if (iocccok) Pelif(); else Eioccc(); }
442 /* ignore comments in this block */
Idrop(void)443 static void Idrop (void) { Fdrop();  ignoreon(); }
Itrue(void)444 static void Itrue (void) { Ftrue();  ignoreon(); }
Ifalse(void)445 static void Ifalse(void) { Ffalse(); ignoreon(); }
446 /* edit this line */
Mpass(void)447 static void Mpass (void) { strncpy(keyword, "if  ", 4); Pelif(); }
Mtrue(void)448 static void Mtrue (void) { keywordedit("else\n");  state(IS_TRUE_MIDDLE); }
Melif(void)449 static void Melif (void) { keywordedit("endif\n"); state(IS_FALSE_TRAILER); }
Melse(void)450 static void Melse (void) { keywordedit("endif\n"); state(IS_FALSE_ELSE); }
451 
452 static state_fn * const trans_table[IS_COUNT][LT_COUNT] = {
453 /* IS_OUTSIDE */
454 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif,
455   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Eendif,
456   print, done },
457 /* IS_FALSE_PREFIX */
458 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif,
459   Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc,
460   drop,  Eeof },
461 /* IS_TRUE_PREFIX */
462 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif,
463   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
464   print, Eeof },
465 /* IS_PASS_MIDDLE */
466 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif,
467   Oiffy, Oiffy, Fpass, Oif,   Oif,   Pelif, Oelif, Oelif, Pelse, Pendif,
468   print, Eeof },
469 /* IS_FALSE_MIDDLE */
470 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif,
471   Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
472   drop,  Eeof },
473 /* IS_TRUE_MIDDLE */
474 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif,
475   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eioccc,Eioccc,Eioccc,Eioccc,Pendif,
476   print, Eeof },
477 /* IS_PASS_ELSE */
478 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif,
479   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Pendif,
480   print, Eeof },
481 /* IS_FALSE_ELSE */
482 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif,
483   Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc,
484   drop,  Eeof },
485 /* IS_TRUE_ELSE */
486 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif,
487   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Eioccc,
488   print, Eeof },
489 /* IS_FALSE_TRAILER */
490 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif,
491   Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc,
492   drop,  Eeof }
493 /*TRUEI  FALSEI IF     TRUE   FALSE  ELIF   ELTRUE ELFALSE ELSE  ENDIF
494   TRUEI  FALSEI IF     TRUE   FALSE  ELIF   ELTRUE ELFALSE ELSE  ENDIF (DODGY)
495   PLAIN  EOF */
496 };
497 
498 /*
499  * State machine utility functions
500  */
501 static void
done(void)502 done(void)
503 {
504 	if (incomment)
505 		error("EOF in comment");
506 	if (fclose(output)) {
507 		if (overwriting) {
508 			unlink(tmpname);
509 			errx(2, "%s unchanged", ofilename);
510 		}
511 	}
512 	if (overwriting && rename(tmpname, ofilename)) {
513 		unlink(tmpname);
514 		errx(2, "%s unchanged", ofilename);
515 	}
516 	exit(exitstat);
517 }
518 static void
ignoreoff(void)519 ignoreoff(void)
520 {
521 	ignoring[depth] = ignoring[depth-1];
522 }
523 static void
ignoreon(void)524 ignoreon(void)
525 {
526 	ignoring[depth] = true;
527 }
528 static void
keywordedit(const char * replacement)529 keywordedit(const char *replacement)
530 {
531 	strlcpy(keyword, replacement, tline + sizeof(tline) - keyword);
532 	print();
533 }
534 static void
nest(void)535 nest(void)
536 {
537 	depth += 1;
538 	if (depth >= MAXDEPTH)
539 		error("Too many levels of nesting");
540 	stifline[depth] = linenum;
541 }
542 static void
state(Ifstate is)543 state(Ifstate is)
544 {
545 	ifstate[depth] = is;
546 }
547 
548 /*
549  * Write a line to the output or not, according to command line options.
550  */
551 static void
flushline(bool keep)552 flushline(bool keep)
553 {
554 	if (symlist)
555 		return;
556 	if (keep ^ complement)
557 		fputs(tline, output);
558 	else {
559 		if (lnblank)
560 			putc('\n', output);
561 		exitstat = 1;
562 	}
563 }
564 
565 /*
566  * The driver for the state machine.
567  */
568 static void
process(void)569 process(void)
570 {
571 	Linetype lineval;
572 
573 	for (;;) {
574 		linenum++;
575 		lineval = get_line();
576 		trans_table[ifstate[depth]][lineval]();
577 		debug("process %s -> %s depth %d",
578 		    linetype_name[lineval],
579 		    ifstate_name[ifstate[depth]], depth);
580 	}
581 }
582 
583 /*
584  * Parse a line and determine its type. We keep the preprocessor line
585  * parser state between calls in the global variable linestate, with
586  * help from skipcomment().
587  */
588 static Linetype
get_line(void)589 get_line(void)
590 {
591 	const char *cp;
592 	int cursym;
593 	int kwlen;
594 	Linetype retval;
595 	Comment_state wascomment;
596 
597 	if (fgets(tline, MAXLINE, input) == NULL)
598 		return (LT_EOF);
599 	retval = LT_PLAIN;
600 	wascomment = incomment;
601 	cp = skipcomment(tline);
602 	if (linestate == LS_START) {
603 		if (*cp == '#') {
604 			linestate = LS_HASH;
605 			cp = skipcomment(cp + 1);
606 		} else if (*cp != '\0')
607 			linestate = LS_DIRTY;
608 	}
609 	if (!incomment && linestate == LS_HASH) {
610 		keyword = tline + (cp - tline);
611 		cp = skipsym(cp);
612 		kwlen = cp - keyword;
613 		/* no way can we deal with a continuation inside a keyword */
614 		if (strncmp(cp, "\\\n", 2) == 0)
615 			Eioccc();
616 		if (strlcmp("ifdef", keyword, kwlen) == 0 ||
617 		    strlcmp("ifndef", keyword, kwlen) == 0) {
618 			cp = skipcomment(cp);
619 			if ((cursym = findsym(cp)) < 0)
620 				retval = LT_IF;
621 			else {
622 				retval = (keyword[2] == 'n')
623 				    ? LT_FALSE : LT_TRUE;
624 				if (value[cursym] == NULL)
625 					retval = (retval == LT_TRUE)
626 					    ? LT_FALSE : LT_TRUE;
627 				if (ignore[cursym])
628 					retval = (retval == LT_TRUE)
629 					    ? LT_TRUEI : LT_FALSEI;
630 			}
631 			cp = skipsym(cp);
632 		} else if (strlcmp("if", keyword, kwlen) == 0)
633 			retval = ifeval(&cp);
634 		else if (strlcmp("elif", keyword, kwlen) == 0)
635 			retval = ifeval(&cp) - LT_IF + LT_ELIF;
636 		else if (strlcmp("else", keyword, kwlen) == 0)
637 			retval = LT_ELSE;
638 		else if (strlcmp("endif", keyword, kwlen) == 0)
639 			retval = LT_ENDIF;
640 		else {
641 			linestate = LS_DIRTY;
642 			retval = LT_PLAIN;
643 		}
644 		cp = skipcomment(cp);
645 		if (*cp != '\0') {
646 			linestate = LS_DIRTY;
647 			if (retval == LT_TRUE || retval == LT_FALSE ||
648 			    retval == LT_TRUEI || retval == LT_FALSEI)
649 				retval = LT_IF;
650 			if (retval == LT_ELTRUE || retval == LT_ELFALSE)
651 				retval = LT_ELIF;
652 		}
653 		if (retval != LT_PLAIN && (wascomment || incomment)) {
654 			retval += LT_DODGY;
655 			if (incomment)
656 				linestate = LS_DIRTY;
657 		}
658 	}
659 	if (linestate == LS_DIRTY) {
660 		while (*cp != '\0')
661 			cp = skipcomment(cp + 1);
662 	}
663 	debug("parser %s comment %s line",
664 	    comment_name[incomment], linestate_name[linestate]);
665 	return (retval);
666 }
667 
668 /*
669  * These are the binary operators that are supported by the expression
670  * evaluator. Note that if support for division is added then we also
671  * need short-circuiting booleans because of divide-by-zero.
672  */
op_lt(int a,int b)673 static int op_lt(int a, int b) { return (a < b); }
op_gt(int a,int b)674 static int op_gt(int a, int b) { return (a > b); }
op_le(int a,int b)675 static int op_le(int a, int b) { return (a <= b); }
op_ge(int a,int b)676 static int op_ge(int a, int b) { return (a >= b); }
op_eq(int a,int b)677 static int op_eq(int a, int b) { return (a == b); }
op_ne(int a,int b)678 static int op_ne(int a, int b) { return (a != b); }
op_or(int a,int b)679 static int op_or(int a, int b) { return (a || b); }
op_and(int a,int b)680 static int op_and(int a, int b) { return (a && b); }
681 
682 /*
683  * An evaluation function takes three arguments, as follows: (1) a pointer to
684  * an element of the precedence table which lists the operators at the current
685  * level of precedence; (2) a pointer to an integer which will receive the
686  * value of the expression; and (3) a pointer to a char* that points to the
687  * expression to be evaluated and that is updated to the end of the expression
688  * when evaluation is complete. The function returns LT_FALSE if the value of
689  * the expression is zero, LT_TRUE if it is non-zero, or LT_IF if the
690  * expression could not be evaluated.
691  */
692 struct ops;
693 
694 typedef Linetype eval_fn(const struct ops *, int *, const char **);
695 
696 static eval_fn eval_table, eval_unary;
697 
698 /*
699  * The precedence table. Expressions involving binary operators are evaluated
700  * in a table-driven way by eval_table. When it evaluates a subexpression it
701  * calls the inner function with its first argument pointing to the next
702  * element of the table. Innermost expressions have special non-table-driven
703  * handling.
704  */
705 static const struct ops {
706 	eval_fn *inner;
707 	struct op {
708 		const char *str;
709 		int (*fn)(int, int);
710 	} op[5];
711 } eval_ops[] = {
712 	{ eval_table, { { "||", op_or } } },
713 	{ eval_table, { { "&&", op_and } } },
714 	{ eval_table, { { "==", op_eq },
715 			{ "!=", op_ne } } },
716 	{ eval_unary, { { "<=", op_le },
717 			{ ">=", op_ge },
718 			{ "<", op_lt },
719 			{ ">", op_gt } } }
720 };
721 
722 /*
723  * Function for evaluating the innermost parts of expressions,
724  * viz. !expr (expr) defined(symbol) symbol number
725  * We reset the keepthis flag when we find a non-constant subexpression.
726  */
727 static Linetype
eval_unary(const struct ops * ops,int * valp,const char ** cpp)728 eval_unary(const struct ops *ops, int *valp, const char **cpp)
729 {
730 	const char *cp;
731 	char *ep;
732 	int sym;
733 
734 	cp = skipcomment(*cpp);
735 	if (*cp == '!') {
736 		debug("eval%td !", ops - eval_ops);
737 		cp++;
738 		if (eval_unary(ops, valp, &cp) == LT_IF)
739 			return (LT_IF);
740 		*valp = !*valp;
741 	} else if (*cp == '(') {
742 		cp++;
743 		debug("eval%td (", ops - eval_ops);
744 		if (eval_table(eval_ops, valp, &cp) == LT_IF)
745 			return (LT_IF);
746 		cp = skipcomment(cp);
747 		if (*cp++ != ')')
748 			return (LT_IF);
749 	} else if (isdigit((unsigned char)*cp)) {
750 		debug("eval%td number", ops - eval_ops);
751 		*valp = strtol(cp, &ep, 0);
752 		cp = skipsym(cp);
753 	} else if (strncmp(cp, "defined", 7) == 0 && endsym(cp[7])) {
754 		cp = skipcomment(cp+7);
755 		debug("eval%td defined", ops - eval_ops);
756 		if (*cp++ != '(')
757 			return (LT_IF);
758 		cp = skipcomment(cp);
759 		sym = findsym(cp);
760 		if (sym < 0 || symlist)
761 			return (LT_IF);
762 		*valp = (value[sym] != NULL);
763 		cp = skipsym(cp);
764 		cp = skipcomment(cp);
765 		if (*cp++ != ')')
766 			return (LT_IF);
767 		keepthis = false;
768 	} else if (!endsym(*cp)) {
769 		debug("eval%td symbol", ops - eval_ops);
770 		sym = findsym(cp);
771 		if (sym < 0 || symlist)
772 			return (LT_IF);
773 		if (value[sym] == NULL)
774 			*valp = 0;
775 		else {
776 			*valp = strtol(value[sym], &ep, 0);
777 			if (*ep != '\0' || ep == value[sym])
778 				return (LT_IF);
779 		}
780 		cp = skipsym(cp);
781 		keepthis = false;
782 	} else {
783 		debug("eval%td bad expr", ops - eval_ops);
784 		return (LT_IF);
785 	}
786 
787 	*cpp = cp;
788 	debug("eval%td = %d", ops - eval_ops, *valp);
789 	return (*valp ? LT_TRUE : LT_FALSE);
790 }
791 
792 /*
793  * Table-driven evaluation of binary operators.
794  */
795 static Linetype
eval_table(const struct ops * ops,int * valp,const char ** cpp)796 eval_table(const struct ops *ops, int *valp, const char **cpp)
797 {
798 	const struct op *op;
799 	const char *cp;
800 	int val;
801 
802 	debug("eval%td", ops - eval_ops);
803 	cp = *cpp;
804 	if (ops->inner(ops+1, valp, &cp) == LT_IF)
805 		return (LT_IF);
806 	for (;;) {
807 		cp = skipcomment(cp);
808 		for (op = ops->op; op->str != NULL; op++)
809 			if (strncmp(cp, op->str, strlen(op->str)) == 0)
810 				break;
811 		if (op->str == NULL)
812 			break;
813 		cp += strlen(op->str);
814 		debug("eval%td %s", ops - eval_ops, op->str);
815 		if (ops->inner(ops+1, &val, &cp) == LT_IF)
816 			return (LT_IF);
817 		*valp = op->fn(*valp, val);
818 	}
819 
820 	*cpp = cp;
821 	debug("eval%td = %d", ops - eval_ops, *valp);
822 	return (*valp ? LT_TRUE : LT_FALSE);
823 }
824 
825 /*
826  * Evaluate the expression on a #if or #elif line. If we can work out
827  * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we
828  * return just a generic LT_IF.
829  */
830 static Linetype
ifeval(const char ** cpp)831 ifeval(const char **cpp)
832 {
833 	int ret;
834 	int val;
835 
836 	debug("eval %s", *cpp);
837 	keepthis = killconsts ? false : true;
838 	ret = eval_table(eval_ops, &val, cpp);
839 	debug("eval = %d", val);
840 	return (keepthis ? LT_IF : ret);
841 }
842 
843 /*
844  * Skip over comments and stop at the next character position that is
845  * not whitespace. Between calls we keep the comment state in the
846  * global variable incomment, and we also adjust the global variable
847  * linestate when we see a newline.
848  * XXX: doesn't cope with the buffer splitting inside a state transition.
849  */
850 static const char *
skipcomment(const char * cp)851 skipcomment(const char *cp)
852 {
853 	if (text || ignoring[depth]) {
854 		for (; isspace((unsigned char)*cp); cp++)
855 			if (*cp == '\n')
856 				linestate = LS_START;
857 		return (cp);
858 	}
859 	while (*cp != '\0')
860 		/* don't reset to LS_START after a line continuation */
861 		if (strncmp(cp, "\\\n", 2) == 0)
862 			cp += 2;
863 		else switch (incomment) {
864 		case NO_COMMENT:
865 			if (strncmp(cp, "/\\\n", 3) == 0) {
866 				incomment = STARTING_COMMENT;
867 				cp += 3;
868 			} else if (strncmp(cp, "/*", 2) == 0) {
869 				incomment = C_COMMENT;
870 				cp += 2;
871 			} else if (strncmp(cp, "//", 2) == 0) {
872 				incomment = CXX_COMMENT;
873 				cp += 2;
874 			} else if (strncmp(cp, "\n", 1) == 0) {
875 				linestate = LS_START;
876 				cp += 1;
877 			} else if (strchr(" \t", *cp) != NULL) {
878 				cp += 1;
879 			} else
880 				return (cp);
881 			continue;
882 		case CXX_COMMENT:
883 			if (strncmp(cp, "\n", 1) == 0) {
884 				incomment = NO_COMMENT;
885 				linestate = LS_START;
886 			}
887 			cp += 1;
888 			continue;
889 		case C_COMMENT:
890 			if (strncmp(cp, "*\\\n", 3) == 0) {
891 				incomment = FINISHING_COMMENT;
892 				cp += 3;
893 			} else if (strncmp(cp, "*/", 2) == 0) {
894 				incomment = NO_COMMENT;
895 				cp += 2;
896 			} else
897 				cp += 1;
898 			continue;
899 		case STARTING_COMMENT:
900 			if (*cp == '*') {
901 				incomment = C_COMMENT;
902 				cp += 1;
903 			} else if (*cp == '/') {
904 				incomment = CXX_COMMENT;
905 				cp += 1;
906 			} else {
907 				incomment = NO_COMMENT;
908 				linestate = LS_DIRTY;
909 			}
910 			continue;
911 		case FINISHING_COMMENT:
912 			if (*cp == '/') {
913 				incomment = NO_COMMENT;
914 				cp += 1;
915 			} else
916 				incomment = C_COMMENT;
917 			continue;
918 		default:
919 			abort(); /* bug */
920 		}
921 	return (cp);
922 }
923 
924 /*
925  * Skip over an identifier.
926  */
927 static const char *
skipsym(const char * cp)928 skipsym(const char *cp)
929 {
930 	while (!endsym(*cp))
931 		++cp;
932 	return (cp);
933 }
934 
935 /*
936  * Look for the symbol in the symbol table. If it is found, we return
937  * the symbol table index, else we return -1.
938  */
939 static int
findsym(const char * str)940 findsym(const char *str)
941 {
942 	const char *cp;
943 	int symind;
944 
945 	cp = skipsym(str);
946 	if (cp == str)
947 		return (-1);
948 	if (symlist)
949 		printf("%.*s\n", (int)(cp-str), str);
950 	for (symind = 0; symind < nsyms; ++symind) {
951 		if (strlcmp(symname[symind], str, cp-str) == 0) {
952 			debug("findsym %s %s", symname[symind],
953 			    value[symind] ? value[symind] : "");
954 			return (symind);
955 		}
956 	}
957 	return (-1);
958 }
959 
960 /*
961  * Add a symbol to the symbol table.
962  */
963 static void
addsym(bool ignorethis,bool definethis,char * sym)964 addsym(bool ignorethis, bool definethis, char *sym)
965 {
966 	int symind;
967 	char *val;
968 
969 	symind = findsym(sym);
970 	if (symind < 0) {
971 		if (nsyms >= MAXSYMS)
972 			errx(2, "too many symbols");
973 		symind = nsyms++;
974 	}
975 	symname[symind] = sym;
976 	ignore[symind] = ignorethis;
977 	val = sym + (skipsym(sym) - sym);
978 	if (definethis) {
979 		if (*val == '=') {
980 			value[symind] = val+1;
981 			*val = '\0';
982 		} else if (*val == '\0')
983 			value[symind] = "";
984 		else
985 			usage();
986 	} else {
987 		if (*val != '\0')
988 			usage();
989 		value[symind] = NULL;
990 	}
991 }
992 
993 /*
994  * Compare s with n characters of t.
995  * The same as strncmp() except that it checks that s[n] == '\0'.
996  */
997 static int
strlcmp(const char * s,const char * t,size_t n)998 strlcmp(const char *s, const char *t, size_t n)
999 {
1000 	while (n-- && *t != '\0')
1001 		if (*s != *t)
1002 			return ((unsigned char)*s - (unsigned char)*t);
1003 		else
1004 			++s, ++t;
1005 	return ((unsigned char)*s);
1006 }
1007 
1008 /*
1009  * Diagnostics.
1010  */
1011 static void
debug(const char * msg,...)1012 debug(const char *msg, ...)
1013 {
1014 	va_list ap;
1015 
1016 	if (debugging) {
1017 		va_start(ap, msg);
1018 		vwarnx(msg, ap);
1019 		va_end(ap);
1020 	}
1021 }
1022 
1023 static void
error(const char * msg)1024 error(const char *msg)
1025 {
1026 	if (depth == 0)
1027 		warnx("%s: %d: %s", filename, linenum, msg);
1028 	else
1029 		warnx("%s: %d: %s (#if line %d depth %d)",
1030 		    filename, linenum, msg, stifline[depth], depth);
1031 	fclose(output);
1032 	if (overwriting) {
1033 		unlink(tmpname);
1034 		errx(2, "%s unchanged", ofilename);
1035 	}
1036 	errx(2, "output may be truncated");
1037 }
1038