1 /* $NetBSD: checknr.c,v 1.9 2002/01/21 16:46:37 wiz Exp $ */ 2 3 /* 4 * Copyright (c) 1980, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <sys/cdefs.h> 37 #ifndef lint 38 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\ 39 The Regents of the University of California. All rights reserved.\n"); 40 #endif /* not lint */ 41 42 #ifndef lint 43 #if 0 44 static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93"; 45 #else 46 __RCSID("$NetBSD: checknr.c,v 1.9 2002/01/21 16:46:37 wiz Exp $"); 47 #endif 48 #endif /* not lint */ 49 50 /* 51 * checknr: check an nroff/troff input file for matching macro calls. 52 * we also attempt to match size and font changes, but only the embedded 53 * kind. These must end in \s0 and \fP resp. Maybe more sophistication 54 * later but for now think of these restrictions as contributions to 55 * structured typesetting. 56 */ 57 #include <ctype.h> 58 #include <stdio.h> 59 #include <stdlib.h> 60 #include <string.h> 61 62 #define MAXSTK 100 /* Stack size */ 63 #define MAXBR 100 /* Max number of bracket pairs known */ 64 #define MAXCMDS 500 /* Max number of commands known */ 65 66 /* 67 * The stack on which we remember what we've seen so far. 68 */ 69 struct stkstr { 70 int opno; /* number of opening bracket */ 71 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */ 72 int parm; /* parm to size, font, etc */ 73 int lno; /* line number the thing came in in */ 74 } stk[MAXSTK]; 75 int stktop; 76 77 /* 78 * The kinds of opening and closing brackets. 79 */ 80 struct brstr { 81 char *opbr; 82 char *clbr; 83 } br[MAXBR] = { 84 /* A few bare bones troff commands */ 85 #define SZ 0 86 { "sz", "sz"}, /* also \s */ 87 #define FT 1 88 { "ft", "ft"}, /* also \f */ 89 /* the -mm package */ 90 {"AL", "LE"}, 91 {"AS", "AE"}, 92 {"BL", "LE"}, 93 {"BS", "BE"}, 94 {"DF", "DE"}, 95 {"DL", "LE"}, 96 {"DS", "DE"}, 97 {"FS", "FE"}, 98 {"ML", "LE"}, 99 {"NS", "NE"}, 100 {"RL", "LE"}, 101 {"VL", "LE"}, 102 /* the -ms package */ 103 {"AB", "AE"}, 104 {"BD", "DE"}, 105 {"CD", "DE"}, 106 {"DS", "DE"}, 107 {"FS", "FE"}, 108 {"ID", "DE"}, 109 {"KF", "KE"}, 110 {"KS", "KE"}, 111 {"LD", "DE"}, 112 {"LG", "NL"}, 113 {"QS", "QE"}, 114 {"RS", "RE"}, 115 {"SM", "NL"}, 116 {"XA", "XE"}, 117 {"XS", "XE"}, 118 /* The -me package */ 119 {"(b", ")b"}, 120 {"(c", ")c"}, 121 {"(d", ")d"}, 122 {"(f", ")f"}, 123 {"(l", ")l"}, 124 {"(q", ")q"}, 125 {"(x", ")x"}, 126 {"(z", ")z"}, 127 /* Things needed by preprocessors */ 128 {"EQ", "EN"}, 129 {"TS", "TE"}, 130 /* Refer */ 131 {"[", "]"}, 132 {0, 0}, 133 }; 134 135 /* 136 * All commands known to nroff, plus macro packages. 137 * Used so we can complain about unrecognized commands. 138 */ 139 char *knowncmds[MAXCMDS] = { 140 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t", 141 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++", 142 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M", 143 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", 144 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2", 145 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT", 146 "D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM", 147 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", 148 "FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID", 149 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB", 150 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR", 151 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P", 152 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA", 153 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA", 154 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE", 155 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL", 156 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0", 157 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>", 158 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd", 159 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs", 160 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", 161 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", 162 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i", 163 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln", 164 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1", 165 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx", 166 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps", 167 "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb", 168 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", 169 "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp", 170 "yr", 0 171 }; 172 173 int lineno; /* current line number in input file */ 174 char *cfilename; /* name of current file */ 175 int nfiles; /* number of files to process */ 176 int fflag; /* -f: ignore \f */ 177 int sflag; /* -s: ignore \s */ 178 int ncmds; /* size of knowncmds */ 179 int slot; /* slot in knowncmds found by binsrch */ 180 181 void addcmd(char *); 182 void addmac(char *); 183 int binsrch(char *); 184 void checkknown(char *); 185 void chkcmd(char *, char *); 186 void complain(int); 187 int eq(const void *, const void *); 188 int main(int, char **); 189 void nomatch(char *); 190 void pe(int); 191 void process(FILE *); 192 void prop(int); 193 void usage(void); 194 195 int 196 main(int argc, char **argv) 197 { 198 FILE *f; 199 int i; 200 char *cp; 201 char b1[4]; 202 203 /* Figure out how many known commands there are */ 204 while (knowncmds[ncmds]) 205 ncmds++; 206 while (argc > 1 && argv[1][0] == '-') { 207 switch(argv[1][1]) { 208 209 /* -a: add pairs of macros */ 210 case 'a': 211 i = strlen(argv[1]) - 2; 212 if (i % 6 != 0) 213 usage(); 214 /* look for empty macro slots */ 215 for (i=0; br[i].opbr; i++) 216 ; 217 for (cp=argv[1]+3; cp[-1]; cp += 6) { 218 br[i].opbr = malloc(3); 219 strncpy(br[i].opbr, cp, 2); 220 br[i].clbr = malloc(3); 221 strncpy(br[i].clbr, cp+3, 2); 222 addmac(br[i].opbr); /* knows pairs are also known cmds */ 223 addmac(br[i].clbr); 224 i++; 225 } 226 break; 227 228 /* -c: add known commands */ 229 case 'c': 230 i = strlen(argv[1]) - 2; 231 if (i % 3 != 0) 232 usage(); 233 for (cp=argv[1]+3; cp[-1]; cp += 3) { 234 if (cp[2] && cp[2] != '.') 235 usage(); 236 strncpy(b1, cp, 2); 237 addmac(b1); 238 } 239 break; 240 241 /* -f: ignore font changes */ 242 case 'f': 243 fflag = 1; 244 break; 245 246 /* -s: ignore size changes */ 247 case 's': 248 sflag = 1; 249 break; 250 default: 251 usage(); 252 } 253 argc--; argv++; 254 } 255 256 nfiles = argc - 1; 257 258 if (nfiles > 0) { 259 for (i=1; i<argc; i++) { 260 cfilename = argv[i]; 261 f = fopen(cfilename, "r"); 262 if (f == NULL) 263 perror(cfilename); 264 else 265 process(f); 266 } 267 } else { 268 cfilename = "stdin"; 269 process(stdin); 270 } 271 exit(0); 272 } 273 274 void 275 usage(void) 276 { 277 printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n"); 278 exit(1); 279 } 280 281 void 282 process(FILE *f) 283 { 284 int i, n; 285 char line[256]; /* the current line */ 286 char mac[5]; /* The current macro or nroff command */ 287 int pl; 288 289 stktop = -1; 290 for (lineno = 1; fgets(line, sizeof line, f); lineno++) { 291 if (line[0] == '.') { 292 /* 293 * find and isolate the macro/command name. 294 */ 295 strncpy(mac, line+1, 4); 296 if (isspace((unsigned char)mac[0])) { 297 pe(lineno); 298 printf("Empty command\n"); 299 } else if (isspace((unsigned char)mac[1])) { 300 mac[1] = 0; 301 } else if (isspace((unsigned char)mac[2])) { 302 mac[2] = 0; 303 } else if (mac[0] != '\\' || mac[1] != '\"') { 304 pe(lineno); 305 printf("Command too long\n"); 306 } 307 308 /* 309 * Is it a known command? 310 */ 311 checkknown(mac); 312 313 /* 314 * Should we add it? 315 */ 316 if (eq(mac, "de")) 317 addcmd(line); 318 319 chkcmd(line, mac); 320 } 321 322 /* 323 * At this point we process the line looking 324 * for \s and \f. 325 */ 326 for (i=0; line[i]; i++) 327 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) { 328 if (!sflag && line[++i]=='s') { 329 pl = line[++i]; 330 if (isdigit((unsigned char)pl)) { 331 n = pl - '0'; 332 pl = ' '; 333 } else 334 n = 0; 335 while (isdigit((unsigned char)line[++i])) 336 n = 10 * n + line[i] - '0'; 337 i--; 338 if (n == 0) { 339 if (stk[stktop].opno == SZ) { 340 stktop--; 341 } else { 342 pe(lineno); 343 printf("unmatched \\s0\n"); 344 } 345 } else { 346 stk[++stktop].opno = SZ; 347 stk[stktop].pl = pl; 348 stk[stktop].parm = n; 349 stk[stktop].lno = lineno; 350 } 351 } else if (!fflag && line[i]=='f') { 352 n = line[++i]; 353 if (n == 'P') { 354 if (stk[stktop].opno == FT) { 355 stktop--; 356 } else { 357 pe(lineno); 358 printf("unmatched \\fP\n"); 359 } 360 } else { 361 stk[++stktop].opno = FT; 362 stk[stktop].pl = 1; 363 stk[stktop].parm = n; 364 stk[stktop].lno = lineno; 365 } 366 } 367 } 368 } 369 /* 370 * We've hit the end and look at all this stuff that hasn't been 371 * matched yet! Complain, complain. 372 */ 373 for (i=stktop; i>=0; i--) { 374 complain(i); 375 } 376 } 377 378 void 379 complain(int i) 380 { 381 pe(stk[i].lno); 382 printf("Unmatched "); 383 prop(i); 384 printf("\n"); 385 } 386 387 void 388 prop(int i) 389 { 390 if (stk[i].pl == 0) 391 printf(".%s", br[stk[i].opno].opbr); 392 else switch(stk[i].opno) { 393 case SZ: 394 printf("\\s%c%d", stk[i].pl, stk[i].parm); 395 break; 396 case FT: 397 printf("\\f%c", stk[i].parm); 398 break; 399 default: 400 printf("Bug: stk[%d].opno = %d = .%s, .%s", 401 i, stk[i].opno, br[stk[i].opno].opbr, 402 br[stk[i].opno].clbr); 403 } 404 } 405 406 void 407 chkcmd(char *line, char *mac) 408 { 409 int i; 410 411 /* 412 * Check to see if it matches top of stack. 413 */ 414 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr)) 415 stktop--; /* OK. Pop & forget */ 416 else { 417 /* No. Maybe it's an opener */ 418 for (i=0; br[i].opbr; i++) { 419 if (eq(mac, br[i].opbr)) { 420 /* Found. Push it. */ 421 stktop++; 422 stk[stktop].opno = i; 423 stk[stktop].pl = 0; 424 stk[stktop].parm = 0; 425 stk[stktop].lno = lineno; 426 break; 427 } 428 /* 429 * Maybe it's an unmatched closer. 430 * NOTE: this depends on the fact 431 * that none of the closers can be 432 * openers too. 433 */ 434 if (eq(mac, br[i].clbr)) { 435 nomatch(mac); 436 break; 437 } 438 } 439 } 440 } 441 442 void 443 nomatch(char *mac) 444 { 445 int i, j; 446 447 /* 448 * Look for a match further down on stack 449 * If we find one, it suggests that the stuff in 450 * between is supposed to match itself. 451 */ 452 for (j=stktop; j>=0; j--) 453 if (eq(mac,br[stk[j].opno].clbr)) { 454 /* Found. Make a good diagnostic. */ 455 if (j == stktop-2) { 456 /* 457 * Check for special case \fx..\fR and don't 458 * complain. 459 */ 460 if (stk[j+1].opno==FT && stk[j+1].parm!='R' 461 && stk[j+2].opno==FT && stk[j+2].parm=='R') { 462 stktop = j -1; 463 return; 464 } 465 /* 466 * We have two unmatched frobs. Chances are 467 * they were intended to match, so we mention 468 * them together. 469 */ 470 pe(stk[j+1].lno); 471 prop(j+1); 472 printf(" does not match %d: ", stk[j+2].lno); 473 prop(j+2); 474 printf("\n"); 475 } else for (i=j+1; i <= stktop; i++) { 476 complain(i); 477 } 478 stktop = j-1; 479 return; 480 } 481 /* Didn't find one. Throw this away. */ 482 pe(lineno); 483 printf("Unmatched .%s\n", mac); 484 } 485 486 /* eq: are two strings equal? */ 487 int 488 eq(const void *s1, const void *s2) 489 { 490 return (strcmp((char *)s1, (char *)s2) == 0); 491 } 492 493 /* print the first part of an error message, given the line number */ 494 void 495 pe(int pelineno) 496 { 497 if (nfiles > 1) 498 printf("%s: ", cfilename); 499 printf("%d: ", pelineno); 500 } 501 502 void 503 checkknown(char *mac) 504 { 505 506 if (eq(mac, ".")) 507 return; 508 if (binsrch(mac) >= 0) 509 return; 510 if (mac[0] == '\\' && mac[1] == '"') /* comments */ 511 return; 512 513 pe(lineno); 514 printf("Unknown command: .%s\n", mac); 515 } 516 517 /* 518 * We have a .de xx line in "line". Add xx to the list of known commands. 519 */ 520 void 521 addcmd(char *line) 522 { 523 char *mac; 524 525 /* grab the macro being defined */ 526 mac = line+4; 527 while (isspace((unsigned char)*mac)) 528 mac++; 529 if (*mac == 0) { 530 pe(lineno); 531 printf("illegal define: %s\n", line); 532 return; 533 } 534 mac[2] = 0; 535 if (isspace((unsigned char)mac[1]) || mac[1] == '\\') 536 mac[1] = 0; 537 if (ncmds >= MAXCMDS) { 538 printf("Only %d known commands allowed\n", MAXCMDS); 539 exit(1); 540 } 541 addmac(mac); 542 } 543 544 /* 545 * Add mac to the list. We should really have some kind of tree 546 * structure here but this is a quick-and-dirty job and I just don't 547 * have time to mess with it. (I wonder if this will come back to haunt 548 * me someday?) Anyway, I claim that .de is fairly rare in user 549 * nroff programs, and the register loop below is pretty fast. 550 */ 551 void 552 addmac(char *mac) 553 { 554 char **src, **dest, **loc; 555 556 if (binsrch(mac) >= 0){ /* it's OK to redefine something */ 557 #ifdef DEBUG 558 printf("binsrch(%s) -> already in table\n", mac); 559 #endif /* DEBUG */ 560 return; 561 } 562 /* binsrch sets slot as a side effect */ 563 #ifdef DEBUG 564 printf("binsrch(%s) -> %d\n", mac, slot); 565 #endif 566 loc = &knowncmds[slot]; 567 src = &knowncmds[ncmds-1]; 568 dest = src+1; 569 while (dest > loc) 570 *dest-- = *src--; 571 *loc = malloc(3); 572 strcpy(*loc, mac); 573 ncmds++; 574 #ifdef DEBUG 575 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], 576 knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], 577 knowncmds[slot+2], ncmds); 578 #endif 579 } 580 581 /* 582 * Do a binary search in knowncmds for mac. 583 * If found, return the index. If not, return -1. 584 */ 585 int 586 binsrch(char *mac) 587 { 588 char *p; /* pointer to current cmd in list */ 589 int d; /* difference if any */ 590 int mid; /* mid point in binary search */ 591 int top, bot; /* boundaries of bin search, inclusive */ 592 593 top = ncmds-1; 594 bot = 0; 595 while (top >= bot) { 596 mid = (top+bot)/2; 597 p = knowncmds[mid]; 598 d = p[0] - mac[0]; 599 if (d == 0) 600 d = p[1] - mac[1]; 601 if (d == 0) 602 return mid; 603 if (d < 0) 604 bot = mid + 1; 605 else 606 top = mid - 1; 607 } 608 slot = bot; /* place it would have gone */ 609 return -1; 610 } 611