1 /* $NetBSD: unifdef.c,v 1.13 2006/04/30 23:56:42 christos Exp $ */ 2 3 /* 4 * Copyright (c) 1985, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Dave Yost. It was rewritten to support ANSI C by Tony Finch. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 /* 36 * Copyright (c) 2002, 2003 Tony Finch <dot@dotat.at> 37 * 38 * This code is derived from software contributed to Berkeley by 39 * Dave Yost. It was rewritten to support ANSI C by Tony Finch. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. All advertising materials mentioning features or use of this software 50 * must display the following acknowledgement: 51 * This product includes software developed by the University of 52 * California, Berkeley and its contributors. 53 * 4. Neither the name of the University nor the names of its contributors 54 * may be used to endorse or promote products derived from this software 55 * without specific prior written permission. 56 * 57 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 58 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 59 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 60 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 61 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 62 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 63 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 64 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 65 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 66 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 67 * SUCH DAMAGE. 68 */ 69 70 #include <sys/cdefs.h> 71 72 #ifndef lint 73 #if 0 74 static const char copyright[] = 75 "@(#) Copyright (c) 1985, 1993\n\ 76 The Regents of the University of California. All rights reserved.\n"; 77 #endif 78 #ifdef __IDSTRING 79 __IDSTRING(Berkeley, "@(#)unifdef.c 8.1 (Berkeley) 6/6/93"); 80 __IDSTRING(NetBSD, "$NetBSD: unifdef.c,v 1.13 2006/04/30 23:56:42 christos Exp $"); 81 __IDSTRING(dotat, "$dotat: things/unifdef.c,v 1.161 2003/07/01 15:32:48 fanf2 Exp $"); 82 #endif 83 #endif /* not lint */ 84 #ifdef __FBSDID 85 __FBSDID("$FreeBSD: src/usr.bin/unifdef/unifdef.c,v 1.18 2003/07/01 15:30:43 fanf Exp $"); 86 #endif 87 88 /* 89 * unifdef - remove ifdef'ed lines 90 * 91 * Wishlist: 92 * provide an option which will append the name of the 93 * appropriate symbol after #else's and #endif's 94 * provide an option which will check symbols after 95 * #else's and #endif's to see that they match their 96 * corresponding #ifdef or #ifndef 97 * generate #line directives in place of deleted code 98 * 99 * The first two items above require better buffer handling, which would 100 * also make it possible to handle all "dodgy" directives correctly. 101 */ 102 103 #include <ctype.h> 104 #include <err.h> 105 #include <stdarg.h> 106 #include <stdio.h> 107 #include <stdlib.h> 108 #include <string.h> 109 #include <unistd.h> 110 111 #include "stdbool.h" 112 113 /* types of input lines: */ 114 typedef enum { 115 LT_TRUEI, /* a true #if with ignore flag */ 116 LT_FALSEI, /* a false #if with ignore flag */ 117 LT_IF, /* an unknown #if */ 118 LT_TRUE, /* a true #if */ 119 LT_FALSE, /* a false #if */ 120 LT_ELIF, /* an unknown #elif */ 121 LT_ELTRUE, /* a true #elif */ 122 LT_ELFALSE, /* a false #elif */ 123 LT_ELSE, /* #else */ 124 LT_ENDIF, /* #endif */ 125 LT_DODGY, /* flag: directive is not on one line */ 126 LT_DODGY_LAST = LT_DODGY + LT_ENDIF, 127 LT_PLAIN, /* ordinary line */ 128 LT_EOF, /* end of file */ 129 LT_COUNT 130 } Linetype; 131 132 static char const * const linetype_name[] = { 133 "TRUEI", "FALSEI", "IF", "TRUE", "FALSE", 134 "ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF", 135 "DODGY TRUEI", "DODGY FALSEI", 136 "DODGY IF", "DODGY TRUE", "DODGY FALSE", 137 "DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE", 138 "DODGY ELSE", "DODGY ENDIF", 139 "PLAIN", "EOF" 140 }; 141 142 /* state of #if processing */ 143 typedef enum { 144 IS_OUTSIDE, 145 IS_FALSE_PREFIX, /* false #if followed by false #elifs */ 146 IS_TRUE_PREFIX, /* first non-false #(el)if is true */ 147 IS_PASS_MIDDLE, /* first non-false #(el)if is unknown */ 148 IS_FALSE_MIDDLE, /* a false #elif after a pass state */ 149 IS_TRUE_MIDDLE, /* a true #elif after a pass state */ 150 IS_PASS_ELSE, /* an else after a pass state */ 151 IS_FALSE_ELSE, /* an else after a true state */ 152 IS_TRUE_ELSE, /* an else after only false states */ 153 IS_FALSE_TRAILER, /* #elifs after a true are false */ 154 IS_COUNT 155 } Ifstate; 156 157 static char const * const ifstate_name[] = { 158 "OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX", 159 "PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE", 160 "PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE", 161 "FALSE_TRAILER" 162 }; 163 164 /* state of comment parser */ 165 typedef enum { 166 NO_COMMENT = false, /* outside a comment */ 167 C_COMMENT, /* in a comment like this one */ 168 CXX_COMMENT, /* between // and end of line */ 169 STARTING_COMMENT, /* just after slash-backslash-newline */ 170 FINISHING_COMMENT /* star-backslash-newline in a C comment */ 171 } Comment_state; 172 173 static char const * const comment_name[] = { 174 "NO", "C", "CXX", "STARTING", "FINISHING" 175 }; 176 177 /* state of preprocessor line parser */ 178 typedef enum { 179 LS_START, /* only space and comments on this line */ 180 LS_HASH, /* only space, comments, and a hash */ 181 LS_DIRTY /* this line can't be a preprocessor line */ 182 } Line_state; 183 184 static char const * const linestate_name[] = { 185 "START", "HASH", "DIRTY" 186 }; 187 188 /* 189 * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1 190 */ 191 #define MAXDEPTH 64 /* maximum #if nesting */ 192 #define MAXLINE 4096 /* maximum length of line */ 193 #define MAXSYMS 4096 /* maximum number of symbols */ 194 195 /* 196 * Sometimes when editing a keyword the replacement text is longer, so 197 * we leave some space at the end of the tline buffer to accommodate this. 198 */ 199 #define EDITSLOP 10 200 201 /* 202 * Globals. 203 */ 204 205 static bool complement; /* -c: do the complement */ 206 static bool debugging; /* -d: debugging reports */ 207 static bool iocccok; /* -e: fewer IOCCC errors */ 208 static bool killconsts; /* -k: eval constant #ifs */ 209 static bool lnblank; /* -l: blank deleted lines */ 210 static bool symlist; /* -s: output symbol list */ 211 static bool text; /* -t: this is a text file */ 212 213 static const char *symname[MAXSYMS]; /* symbol name */ 214 static const char *value[MAXSYMS]; /* -Dsym=value */ 215 static bool ignore[MAXSYMS]; /* -iDsym or -iUsym */ 216 static int nsyms; /* number of symbols */ 217 218 static FILE *input; /* input file pointer */ 219 static const char *filename; /* input file name */ 220 static int linenum; /* current line number */ 221 222 static char tline[MAXLINE+EDITSLOP];/* input buffer plus space */ 223 static char *keyword; /* used for editing #elif's */ 224 225 static Comment_state incomment; /* comment parser state */ 226 static Line_state linestate; /* #if line parser state */ 227 static Ifstate ifstate[MAXDEPTH]; /* #if processor state */ 228 static bool ignoring[MAXDEPTH]; /* ignore comments state */ 229 static int stifline[MAXDEPTH]; /* start of current #if */ 230 static int depth; /* current #if nesting */ 231 static bool keepthis; /* don't delete constant #if */ 232 233 static int exitstat; /* program exit status */ 234 235 static void addsym(bool, bool, char *); 236 static void debug(const char *, ...); 237 static void done(void); 238 static void error(const char *); 239 static int findsym(const char *); 240 static void flushline(bool); 241 static Linetype getline(void); 242 static Linetype ifeval(const char **); 243 static void ignoreoff(void); 244 static void ignoreon(void); 245 static void keywordedit(const char *); 246 static void nest(void); 247 static void process(void); 248 static const char *skipcomment(const char *); 249 static const char *skipsym(const char *); 250 static void state(Ifstate); 251 static int strlcmp(const char *, const char *, size_t); 252 static void usage(void); 253 254 #define endsym(c) (!isalpha((unsigned char)c) && !isdigit((unsigned char)c) && c != '_') 255 256 /* 257 * The main program. 258 */ 259 int 260 main(int argc, char *argv[]) 261 { 262 int opt; 263 264 while ((opt = getopt(argc, argv, "i:D:U:I:cdeklst")) != -1) 265 switch (opt) { 266 case 'i': /* treat stuff controlled by these symbols as text */ 267 /* 268 * For strict backwards-compatibility the U or D 269 * should be immediately after the -i but it doesn't 270 * matter much if we relax that requirement. 271 */ 272 opt = *optarg++; 273 if (opt == 'D') 274 addsym(true, true, optarg); 275 else if (opt == 'U') 276 addsym(true, false, optarg); 277 else 278 usage(); 279 break; 280 case 'D': /* define a symbol */ 281 addsym(false, true, optarg); 282 break; 283 case 'U': /* undef a symbol */ 284 addsym(false, false, optarg); 285 break; 286 case 'I': 287 /* no-op for compatibility with cpp */ 288 break; 289 case 'c': /* treat -D as -U and vice versa */ 290 complement = true; 291 break; 292 case 'd': 293 debugging = true; 294 break; 295 case 'e': /* fewer errors from dodgy lines */ 296 iocccok = true; 297 break; 298 case 'k': /* process constant #ifs */ 299 killconsts = true; 300 break; 301 case 'l': /* blank deleted lines instead of omitting them */ 302 lnblank = true; 303 break; 304 case 's': /* only output list of symbols that control #ifs */ 305 symlist = true; 306 break; 307 case 't': /* don't parse C comments */ 308 text = true; 309 break; 310 default: 311 usage(); 312 } 313 argc -= optind; 314 argv += optind; 315 if (nsyms == 0 && !symlist) { 316 warnx("must -D or -U at least one symbol"); 317 usage(); 318 } 319 if (argc > 1) { 320 errx(2, "can only do one file"); 321 } else if (argc == 1 && strcmp(*argv, "-") != 0) { 322 filename = *argv; 323 input = fopen(filename, "r"); 324 if (input == NULL) 325 err(2, "can't open %s", filename); 326 } else { 327 filename = "[stdin]"; 328 input = stdin; 329 } 330 process(); 331 abort(); /* bug */ 332 } 333 334 static void 335 usage(void) 336 { 337 fprintf(stderr, "usage: unifdef [-cdeklst]" 338 " [-Dsym[=val]] [-Usym] [-iDsym[=val]] [-iUsym] ... [file]\n"); 339 exit(2); 340 } 341 342 /* 343 * A state transition function alters the global #if processing state 344 * in a particular way. The table below is indexed by the current 345 * processing state and the type of the current line. 346 * 347 * Nesting is handled by keeping a stack of states; some transition 348 * functions increase or decrease the depth. They also maintain the 349 * ignore state on a stack. In some complicated cases they have to 350 * alter the preprocessor directive, as follows. 351 * 352 * When we have processed a group that starts off with a known-false 353 * #if/#elif sequence (which has therefore been deleted) followed by a 354 * #elif that we don't understand and therefore must keep, we edit the 355 * latter into a #if to keep the nesting correct. 356 * 357 * When we find a true #elif in a group, the following block will 358 * always be kept and the rest of the sequence after the next #elif or 359 * #else will be discarded. We edit the #elif into a #else and the 360 * following directive to #endif since this has the desired behaviour. 361 * 362 * "Dodgy" directives are split across multiple lines, the most common 363 * example being a multi-line comment hanging off the right of the 364 * directive. We can handle them correctly only if there is no change 365 * from printing to dropping (or vice versa) caused by that directive. 366 * If the directive is the first of a group we have a choice between 367 * failing with an error, or passing it through unchanged instead of 368 * evaluating it. The latter is not the default to avoid questions from 369 * users about unifdef unexpectedly leaving behind preprocessor directives. 370 */ 371 typedef void state_fn(void); 372 373 /* report an error */ 374 static void Eelif (void) { error("Inappropriate #elif"); } 375 static void Eelse (void) { error("Inappropriate #else"); } 376 static void Eendif(void) { error("Inappropriate #endif"); } 377 static void Eeof (void) { error("Premature EOF"); } 378 static void Eioccc(void) { error("Obfuscated preprocessor control line"); } 379 /* plain line handling */ 380 static void print (void) { flushline(true); } 381 static void drop (void) { flushline(false); } 382 /* output lacks group's start line */ 383 static void Strue (void) { drop(); ignoreoff(); state(IS_TRUE_PREFIX); } 384 static void Sfalse(void) { drop(); ignoreoff(); state(IS_FALSE_PREFIX); } 385 static void Selse (void) { drop(); state(IS_TRUE_ELSE); } 386 /* print/pass this block */ 387 static void Pelif (void) { print(); ignoreoff(); state(IS_PASS_MIDDLE); } 388 static void Pelse (void) { print(); state(IS_PASS_ELSE); } 389 static void Pendif(void) { print(); --depth; } 390 /* discard this block */ 391 static void Dfalse(void) { drop(); ignoreoff(); state(IS_FALSE_TRAILER); } 392 static void Delif (void) { drop(); ignoreoff(); state(IS_FALSE_MIDDLE); } 393 static void Delse (void) { drop(); state(IS_FALSE_ELSE); } 394 static void Dendif(void) { drop(); --depth; } 395 /* first line of group */ 396 static void Fdrop (void) { nest(); Dfalse(); } 397 static void Fpass (void) { nest(); Pelif(); } 398 static void Ftrue (void) { nest(); Strue(); } 399 static void Ffalse(void) { nest(); Sfalse(); } 400 /* variable pedantry for obfuscated lines */ 401 static void Oiffy (void) { if (iocccok) Fpass(); else Eioccc(); ignoreon(); } 402 static void Oif (void) { if (iocccok) Fpass(); else Eioccc(); } 403 static void Oelif (void) { if (iocccok) Pelif(); else Eioccc(); } 404 /* ignore comments in this block */ 405 static void Idrop (void) { Fdrop(); ignoreon(); } 406 static void Itrue (void) { Ftrue(); ignoreon(); } 407 static void Ifalse(void) { Ffalse(); ignoreon(); } 408 /* edit this line */ 409 static void Mpass (void) { strncpy(keyword, "if ", 4); Pelif(); } 410 static void Mtrue (void) { keywordedit("else\n"); state(IS_TRUE_MIDDLE); } 411 static void Melif (void) { keywordedit("endif\n"); state(IS_FALSE_TRAILER); } 412 static void Melse (void) { keywordedit("endif\n"); state(IS_FALSE_ELSE); } 413 414 static state_fn * const trans_table[IS_COUNT][LT_COUNT] = { 415 /* IS_OUTSIDE */ 416 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif, 417 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eendif, 418 print, done }, 419 /* IS_FALSE_PREFIX */ 420 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif, 421 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc, 422 drop, Eeof }, 423 /* IS_TRUE_PREFIX */ 424 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif, 425 Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc, 426 print, Eeof }, 427 /* IS_PASS_MIDDLE */ 428 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif, 429 Oiffy, Oiffy, Fpass, Oif, Oif, Pelif, Oelif, Oelif, Pelse, Pendif, 430 print, Eeof }, 431 /* IS_FALSE_MIDDLE */ 432 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif, 433 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc, 434 drop, Eeof }, 435 /* IS_TRUE_MIDDLE */ 436 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif, 437 Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Pendif, 438 print, Eeof }, 439 /* IS_PASS_ELSE */ 440 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif, 441 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Pendif, 442 print, Eeof }, 443 /* IS_FALSE_ELSE */ 444 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif, 445 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc, 446 drop, Eeof }, 447 /* IS_TRUE_ELSE */ 448 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif, 449 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eioccc, 450 print, Eeof }, 451 /* IS_FALSE_TRAILER */ 452 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif, 453 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc, 454 drop, Eeof } 455 /*TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF 456 TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF (DODGY) 457 PLAIN EOF */ 458 }; 459 460 /* 461 * State machine utility functions 462 */ 463 static void 464 done(void) 465 { 466 if (incomment) 467 error("EOF in comment"); 468 exit(exitstat); 469 } 470 static void 471 ignoreoff(void) 472 { 473 ignoring[depth] = ignoring[depth-1]; 474 } 475 static void 476 ignoreon(void) 477 { 478 ignoring[depth] = true; 479 } 480 static void 481 keywordedit(const char *replacement) 482 { 483 strlcpy(keyword, replacement, tline + sizeof(tline) - keyword); 484 print(); 485 } 486 static void 487 nest(void) 488 { 489 depth += 1; 490 if (depth >= MAXDEPTH) 491 error("Too many levels of nesting"); 492 stifline[depth] = linenum; 493 } 494 static void 495 state(Ifstate is) 496 { 497 ifstate[depth] = is; 498 } 499 500 /* 501 * Write a line to the output or not, according to command line options. 502 */ 503 static void 504 flushline(bool keep) 505 { 506 if (symlist) 507 return; 508 if (keep ^ complement) 509 fputs(tline, stdout); 510 else { 511 if (lnblank) 512 putc('\n', stdout); 513 exitstat = 1; 514 } 515 } 516 517 /* 518 * The driver for the state machine. 519 */ 520 static void 521 process(void) 522 { 523 Linetype lineval; 524 525 for (;;) { 526 linenum++; 527 lineval = getline(); 528 trans_table[ifstate[depth]][lineval](); 529 debug("process %s -> %s depth %d", 530 linetype_name[lineval], 531 ifstate_name[ifstate[depth]], depth); 532 } 533 } 534 535 /* 536 * Parse a line and determine its type. We keep the preprocessor line 537 * parser state between calls in the global variable linestate, with 538 * help from skipcomment(). 539 */ 540 static Linetype 541 getline(void) 542 { 543 const char *cp; 544 int cursym; 545 int kwlen; 546 Linetype retval; 547 Comment_state wascomment; 548 549 if (fgets(tline, MAXLINE, input) == NULL) 550 return (LT_EOF); 551 retval = LT_PLAIN; 552 wascomment = incomment; 553 cp = skipcomment(tline); 554 if (linestate == LS_START) { 555 if (*cp == '#') { 556 linestate = LS_HASH; 557 cp = skipcomment(cp + 1); 558 } else if (*cp != '\0') 559 linestate = LS_DIRTY; 560 } 561 if (!incomment && linestate == LS_HASH) { 562 keyword = tline + (cp - tline); 563 cp = skipsym(cp); 564 kwlen = cp - keyword; 565 /* no way can we deal with a continuation inside a keyword */ 566 if (strncmp(cp, "\\\n", 2) == 0) 567 Eioccc(); 568 if (strlcmp("ifdef", keyword, kwlen) == 0 || 569 strlcmp("ifndef", keyword, kwlen) == 0) { 570 cp = skipcomment(cp); 571 if ((cursym = findsym(cp)) < 0) 572 retval = LT_IF; 573 else { 574 retval = (keyword[2] == 'n') 575 ? LT_FALSE : LT_TRUE; 576 if (value[cursym] == NULL) 577 retval = (retval == LT_TRUE) 578 ? LT_FALSE : LT_TRUE; 579 if (ignore[cursym]) 580 retval = (retval == LT_TRUE) 581 ? LT_TRUEI : LT_FALSEI; 582 } 583 cp = skipsym(cp); 584 } else if (strlcmp("if", keyword, kwlen) == 0) 585 retval = ifeval(&cp); 586 else if (strlcmp("elif", keyword, kwlen) == 0) 587 retval = ifeval(&cp) - LT_IF + LT_ELIF; 588 else if (strlcmp("else", keyword, kwlen) == 0) 589 retval = LT_ELSE; 590 else if (strlcmp("endif", keyword, kwlen) == 0) 591 retval = LT_ENDIF; 592 else { 593 linestate = LS_DIRTY; 594 retval = LT_PLAIN; 595 } 596 cp = skipcomment(cp); 597 if (*cp != '\0') { 598 linestate = LS_DIRTY; 599 if (retval == LT_TRUE || retval == LT_FALSE || 600 retval == LT_TRUEI || retval == LT_FALSEI) 601 retval = LT_IF; 602 if (retval == LT_ELTRUE || retval == LT_ELFALSE) 603 retval = LT_ELIF; 604 } 605 if (retval != LT_PLAIN && (wascomment || incomment)) { 606 retval += LT_DODGY; 607 if (incomment) 608 linestate = LS_DIRTY; 609 } 610 /* skipcomment should have changed the state */ 611 if (linestate == LS_HASH) 612 abort(); /* bug */ 613 } 614 if (linestate == LS_DIRTY) { 615 while (*cp != '\0') 616 cp = skipcomment(cp + 1); 617 } 618 debug("parser %s comment %s line", 619 comment_name[incomment], linestate_name[linestate]); 620 return (retval); 621 } 622 623 /* 624 * These are the binary operators that are supported by the expression 625 * evaluator. Note that if support for division is added then we also 626 * need short-circuiting booleans because of divide-by-zero. 627 */ 628 static int op_lt(int a, int b) { return (a < b); } 629 static int op_gt(int a, int b) { return (a > b); } 630 static int op_le(int a, int b) { return (a <= b); } 631 static int op_ge(int a, int b) { return (a >= b); } 632 static int op_eq(int a, int b) { return (a == b); } 633 static int op_ne(int a, int b) { return (a != b); } 634 static int op_or(int a, int b) { return (a || b); } 635 static int op_and(int a, int b) { return (a && b); } 636 637 /* 638 * An evaluation function takes three arguments, as follows: (1) a pointer to 639 * an element of the precedence table which lists the operators at the current 640 * level of precedence; (2) a pointer to an integer which will receive the 641 * value of the expression; and (3) a pointer to a char* that points to the 642 * expression to be evaluated and that is updated to the end of the expression 643 * when evaluation is complete. The function returns LT_FALSE if the value of 644 * the expression is zero, LT_TRUE if it is non-zero, or LT_IF if the 645 * expression could not be evaluated. 646 */ 647 struct ops; 648 649 typedef Linetype eval_fn(const struct ops *, int *, const char **); 650 651 static eval_fn eval_table, eval_unary; 652 653 /* 654 * The precedence table. Expressions involving binary operators are evaluated 655 * in a table-driven way by eval_table. When it evaluates a subexpression it 656 * calls the inner function with its first argument pointing to the next 657 * element of the table. Innermost expressions have special non-table-driven 658 * handling. 659 */ 660 static const struct ops { 661 eval_fn *inner; 662 struct op { 663 const char *str; 664 int (*fn)(int, int); 665 } op[5]; 666 } eval_ops[] = { 667 { eval_table, { { "||", op_or } } }, 668 { eval_table, { { "&&", op_and } } }, 669 { eval_table, { { "==", op_eq }, 670 { "!=", op_ne } } }, 671 { eval_unary, { { "<=", op_le }, 672 { ">=", op_ge }, 673 { "<", op_lt }, 674 { ">", op_gt } } } 675 }; 676 677 /* 678 * Function for evaluating the innermost parts of expressions, 679 * viz. !expr (expr) defined(symbol) symbol number 680 * We reset the keepthis flag when we find a non-constant subexpression. 681 */ 682 static Linetype 683 eval_unary(const struct ops *ops, int *valp, const char **cpp) 684 { 685 const char *cp; 686 char *ep; 687 int sym; 688 689 cp = skipcomment(*cpp); 690 if (*cp == '!') { 691 debug("eval%d !", ops - eval_ops); 692 cp++; 693 if (eval_unary(ops, valp, &cp) == LT_IF) 694 return (LT_IF); 695 *valp = !*valp; 696 } else if (*cp == '(') { 697 cp++; 698 debug("eval%d (", ops - eval_ops); 699 if (eval_table(eval_ops, valp, &cp) == LT_IF) 700 return (LT_IF); 701 cp = skipcomment(cp); 702 if (*cp++ != ')') 703 return (LT_IF); 704 } else if (isdigit((unsigned char)*cp)) { 705 debug("eval%d number", ops - eval_ops); 706 *valp = strtol(cp, &ep, 0); 707 cp = skipsym(cp); 708 } else if (strncmp(cp, "defined", 7) == 0 && endsym(cp[7])) { 709 cp = skipcomment(cp+7); 710 debug("eval%d defined", ops - eval_ops); 711 if (*cp++ != '(') 712 return (LT_IF); 713 cp = skipcomment(cp); 714 sym = findsym(cp); 715 if (sym < 0 || !symlist) 716 return (LT_IF); 717 *valp = (value[sym] != NULL); 718 cp = skipsym(cp); 719 cp = skipcomment(cp); 720 if (*cp++ != ')') 721 return (LT_IF); 722 keepthis = false; 723 } else if (!endsym(*cp)) { 724 debug("eval%d symbol", ops - eval_ops); 725 sym = findsym(cp); 726 if (sym < 0 || !symlist) 727 return (LT_IF); 728 if (value[sym] == NULL) 729 *valp = 0; 730 else { 731 *valp = strtol(value[sym], &ep, 0); 732 if (*ep != '\0' || ep == value[sym]) 733 return (LT_IF); 734 } 735 cp = skipsym(cp); 736 keepthis = false; 737 } else { 738 debug("eval%d bad expr", ops - eval_ops); 739 return (LT_IF); 740 } 741 742 *cpp = cp; 743 debug("eval%d = %d", ops - eval_ops, *valp); 744 return (*valp ? LT_TRUE : LT_FALSE); 745 } 746 747 /* 748 * Table-driven evaluation of binary operators. 749 */ 750 static Linetype 751 eval_table(const struct ops *ops, int *valp, const char **cpp) 752 { 753 const struct op *op; 754 const char *cp; 755 int val; 756 757 debug("eval%d", ops - eval_ops); 758 cp = *cpp; 759 if (ops->inner(ops+1, valp, &cp) == LT_IF) 760 return (LT_IF); 761 for (;;) { 762 cp = skipcomment(cp); 763 for (op = ops->op; op->str != NULL; op++) 764 if (strncmp(cp, op->str, strlen(op->str)) == 0) 765 break; 766 if (op->str == NULL) 767 break; 768 cp += strlen(op->str); 769 debug("eval%d %s", ops - eval_ops, op->str); 770 if (ops->inner(ops+1, &val, &cp) == LT_IF) 771 return (LT_IF); 772 *valp = op->fn(*valp, val); 773 } 774 775 *cpp = cp; 776 debug("eval%d = %d", ops - eval_ops, *valp); 777 return (*valp ? LT_TRUE : LT_FALSE); 778 } 779 780 /* 781 * Evaluate the expression on a #if or #elif line. If we can work out 782 * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we 783 * return just a generic LT_IF. 784 */ 785 static Linetype 786 ifeval(const char **cpp) 787 { 788 int ret; 789 int val; 790 791 debug("eval %s", *cpp); 792 keepthis = killconsts ? false : true; 793 ret = eval_table(eval_ops, &val, cpp); 794 debug("eval = %d", val); 795 return (keepthis ? LT_IF : ret); 796 } 797 798 /* 799 * Skip over comments and stop at the next character position that is 800 * not whitespace. Between calls we keep the comment state in the 801 * global variable incomment, and we also adjust the global variable 802 * linestate when we see a newline. 803 * XXX: doesn't cope with the buffer splitting inside a state transition. 804 */ 805 static const char * 806 skipcomment(const char *cp) 807 { 808 if (text || ignoring[depth]) { 809 for (; isspace((unsigned char)*cp); cp++) 810 if (*cp == '\n') 811 linestate = LS_START; 812 return (cp); 813 } 814 while (*cp != '\0') 815 /* don't reset to LS_START after a line continuation */ 816 if (strncmp(cp, "\\\n", 2) == 0) 817 cp += 2; 818 else switch (incomment) { 819 case NO_COMMENT: 820 if (strncmp(cp, "/\\\n", 3) == 0) { 821 incomment = STARTING_COMMENT; 822 cp += 3; 823 } else if (strncmp(cp, "/*", 2) == 0) { 824 incomment = C_COMMENT; 825 cp += 2; 826 } else if (strncmp(cp, "//", 2) == 0) { 827 incomment = CXX_COMMENT; 828 cp += 2; 829 } else if (strncmp(cp, "\n", 1) == 0) { 830 linestate = LS_START; 831 cp += 1; 832 } else if (strchr(" \t", *cp) != NULL) { 833 cp += 1; 834 } else 835 return (cp); 836 continue; 837 case CXX_COMMENT: 838 if (strncmp(cp, "\n", 1) == 0) { 839 incomment = NO_COMMENT; 840 linestate = LS_START; 841 } 842 cp += 1; 843 continue; 844 case C_COMMENT: 845 if (strncmp(cp, "*\\\n", 3) == 0) { 846 incomment = FINISHING_COMMENT; 847 cp += 3; 848 } else if (strncmp(cp, "*/", 2) == 0) { 849 incomment = NO_COMMENT; 850 cp += 2; 851 } else 852 cp += 1; 853 continue; 854 case STARTING_COMMENT: 855 if (*cp == '*') { 856 incomment = C_COMMENT; 857 cp += 1; 858 } else if (*cp == '/') { 859 incomment = CXX_COMMENT; 860 cp += 1; 861 } else { 862 incomment = NO_COMMENT; 863 linestate = LS_DIRTY; 864 } 865 continue; 866 case FINISHING_COMMENT: 867 if (*cp == '/') { 868 incomment = NO_COMMENT; 869 cp += 1; 870 } else 871 incomment = C_COMMENT; 872 continue; 873 default: 874 abort(); /* bug */ 875 } 876 return (cp); 877 } 878 879 /* 880 * Skip over an identifier. 881 */ 882 static const char * 883 skipsym(const char *cp) 884 { 885 while (!endsym(*cp)) 886 ++cp; 887 return (cp); 888 } 889 890 /* 891 * Look for the symbol in the symbol table. If is is found, we return 892 * the symbol table index, else we return -1. 893 */ 894 static int 895 findsym(const char *str) 896 { 897 const char *cp; 898 int symind; 899 900 cp = skipsym(str); 901 if (cp == str) 902 return (-1); 903 if (symlist) 904 printf("%.*s\n", (int)(cp-str), str); 905 for (symind = 0; symind < nsyms; ++symind) { 906 if (strlcmp(symname[symind], str, cp-str) == 0) { 907 debug("findsym %s %s", symname[symind], 908 value[symind] ? value[symind] : ""); 909 return (symind); 910 } 911 } 912 return (-1); 913 } 914 915 /* 916 * Add a symbol to the symbol table. 917 */ 918 static void 919 addsym(bool ignorethis, bool definethis, char *sym) 920 { 921 int symind; 922 char *val; 923 924 symind = findsym(sym); 925 if (symind < 0) { 926 if (nsyms >= MAXSYMS) 927 errx(2, "too many symbols"); 928 symind = nsyms++; 929 } 930 symname[symind] = sym; 931 ignore[symind] = ignorethis; 932 val = sym + (skipsym(sym) - sym); 933 if (definethis) { 934 if (*val == '=') { 935 value[symind] = val+1; 936 *val = '\0'; 937 } else if (*val == '\0') 938 value[symind] = ""; 939 else 940 usage(); 941 } else { 942 if (*val != '\0') 943 usage(); 944 value[symind] = NULL; 945 } 946 } 947 948 /* 949 * Compare s with n characters of t. 950 * The same as strncmp() except that it checks that s[n] == '\0'. 951 */ 952 static int 953 strlcmp(const char *s, const char *t, size_t n) 954 { 955 while (n-- && *t != '\0') 956 if (*s != *t) 957 return ((unsigned char)*s - (unsigned char)*t); 958 else 959 ++s, ++t; 960 return ((unsigned char)*s); 961 } 962 963 /* 964 * Diagnostics. 965 */ 966 static void 967 debug(const char *msg, ...) 968 { 969 va_list ap; 970 971 if (debugging) { 972 va_start(ap, msg); 973 vwarnx(msg, ap); 974 va_end(ap); 975 } 976 } 977 978 static void 979 error(const char *msg) 980 { 981 if (depth == 0) 982 warnx("%s: %d: %s", filename, linenum, msg); 983 else 984 warnx("%s: %d: %s (#if line %d depth %d)", 985 filename, linenum, msg, stifline[depth], depth); 986 errx(2, "output may be truncated"); 987 } 988