1 /* 2 * Copyright (c) 1980 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 #ifndef lint 35 char copyright[] = 36 "@(#) Copyright (c) 1980 The Regents of the University of California.\n\ 37 All rights reserved.\n"; 38 #endif /* not lint */ 39 40 #ifndef lint 41 /*static char sccsid[] = "from: @(#)checknr.c 5.4 (Berkeley) 6/1/90";*/ 42 static char rcsid[] = "$Id: checknr.c,v 1.2 1993/08/01 18:18:04 mycroft Exp $"; 43 #endif /* not lint */ 44 45 /* 46 * checknr: check an nroff/troff input file for matching macro calls. 47 * we also attempt to match size and font changes, but only the embedded 48 * kind. These must end in \s0 and \fP resp. Maybe more sophistication 49 * later but for now think of these restrictions as contributions to 50 * structured typesetting. 51 */ 52 #include <stdio.h> 53 #include <ctype.h> 54 55 #define MAXSTK 100 /* Stack size */ 56 #define MAXBR 100 /* Max number of bracket pairs known */ 57 #define MAXCMDS 500 /* Max number of commands known */ 58 59 /* 60 * The stack on which we remember what we've seen so far. 61 */ 62 struct stkstr { 63 int opno; /* number of opening bracket */ 64 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */ 65 int parm; /* parm to size, font, etc */ 66 int lno; /* line number the thing came in in */ 67 } stk[MAXSTK]; 68 int stktop; 69 70 /* 71 * The kinds of opening and closing brackets. 72 */ 73 struct brstr { 74 char *opbr; 75 char *clbr; 76 } br[MAXBR] = { 77 /* A few bare bones troff commands */ 78 #define SZ 0 79 "sz", "sz", /* also \s */ 80 #define FT 1 81 "ft", "ft", /* also \f */ 82 /* the -mm package */ 83 "AL", "LE", 84 "AS", "AE", 85 "BL", "LE", 86 "BS", "BE", 87 "DF", "DE", 88 "DL", "LE", 89 "DS", "DE", 90 "FS", "FE", 91 "ML", "LE", 92 "NS", "NE", 93 "RL", "LE", 94 "VL", "LE", 95 /* the -ms package */ 96 "AB", "AE", 97 "BD", "DE", 98 "CD", "DE", 99 "DS", "DE", 100 "FS", "FE", 101 "ID", "DE", 102 "KF", "KE", 103 "KS", "KE", 104 "LD", "DE", 105 "LG", "NL", 106 "QS", "QE", 107 "RS", "RE", 108 "SM", "NL", 109 "XA", "XE", 110 "XS", "XE", 111 /* The -me package */ 112 "(b", ")b", 113 "(c", ")c", 114 "(d", ")d", 115 "(f", ")f", 116 "(l", ")l", 117 "(q", ")q", 118 "(x", ")x", 119 "(z", ")z", 120 /* Things needed by preprocessors */ 121 "EQ", "EN", 122 "TS", "TE", 123 /* Refer */ 124 "[", "]", 125 0, 0 126 }; 127 128 /* 129 * All commands known to nroff, plus macro packages. 130 * Used so we can complain about unrecognized commands. 131 */ 132 char *knowncmds[MAXCMDS] = { 133 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t", 134 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++", 135 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M", 136 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", 137 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2", 138 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT", 139 "D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM", 140 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", 141 "FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID", 142 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB", 143 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR", 144 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P", 145 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA", 146 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA", 147 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE", 148 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL", 149 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0", 150 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>", 151 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd", 152 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs", 153 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", 154 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", 155 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i", 156 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln", 157 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1", 158 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx", 159 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps", 160 "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb", 161 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", 162 "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp", 163 "yr", 0 164 }; 165 166 int lineno; /* current line number in input file */ 167 char line[256]; /* the current line */ 168 char *cfilename; /* name of current file */ 169 int nfiles; /* number of files to process */ 170 int fflag; /* -f: ignore \f */ 171 int sflag; /* -s: ignore \s */ 172 int ncmds; /* size of knowncmds */ 173 int slot; /* slot in knowncmds found by binsrch */ 174 175 char *malloc(); 176 177 main(argc, argv) 178 int argc; 179 char **argv; 180 { 181 FILE *f; 182 int i; 183 char *cp; 184 char b1[4]; 185 186 /* Figure out how many known commands there are */ 187 while (knowncmds[ncmds]) 188 ncmds++; 189 while (argc > 1 && argv[1][0] == '-') { 190 switch(argv[1][1]) { 191 192 /* -a: add pairs of macros */ 193 case 'a': 194 i = strlen(argv[1]) - 2; 195 if (i % 6 != 0) 196 usage(); 197 /* look for empty macro slots */ 198 for (i=0; br[i].opbr; i++) 199 ; 200 for (cp=argv[1]+3; cp[-1]; cp += 6) { 201 br[i].opbr = malloc(3); 202 strncpy(br[i].opbr, cp, 2); 203 br[i].clbr = malloc(3); 204 strncpy(br[i].clbr, cp+3, 2); 205 addmac(br[i].opbr); /* knows pairs are also known cmds */ 206 addmac(br[i].clbr); 207 i++; 208 } 209 break; 210 211 /* -c: add known commands */ 212 case 'c': 213 i = strlen(argv[1]) - 2; 214 if (i % 3 != 0) 215 usage(); 216 for (cp=argv[1]+3; cp[-1]; cp += 3) { 217 if (cp[2] && cp[2] != '.') 218 usage(); 219 strncpy(b1, cp, 2); 220 addmac(b1); 221 } 222 break; 223 224 /* -f: ignore font changes */ 225 case 'f': 226 fflag = 1; 227 break; 228 229 /* -s: ignore size changes */ 230 case 's': 231 sflag = 1; 232 break; 233 default: 234 usage(); 235 } 236 argc--; argv++; 237 } 238 239 nfiles = argc - 1; 240 241 if (nfiles > 0) { 242 for (i=1; i<argc; i++) { 243 cfilename = argv[i]; 244 f = fopen(cfilename, "r"); 245 if (f == NULL) 246 perror(cfilename); 247 else 248 process(f); 249 } 250 } else { 251 cfilename = "stdin"; 252 process(stdin); 253 } 254 exit(0); 255 } 256 257 usage() 258 { 259 printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n"); 260 exit(1); 261 } 262 263 process(f) 264 FILE *f; 265 { 266 register int i, n; 267 char mac[5]; /* The current macro or nroff command */ 268 int pl; 269 270 stktop = -1; 271 for (lineno = 1; fgets(line, sizeof line, f); lineno++) { 272 if (line[0] == '.') { 273 /* 274 * find and isolate the macro/command name. 275 */ 276 strncpy(mac, line+1, 4); 277 if (isspace(mac[0])) { 278 pe(lineno); 279 printf("Empty command\n"); 280 } else if (isspace(mac[1])) { 281 mac[1] = 0; 282 } else if (isspace(mac[2])) { 283 mac[2] = 0; 284 } else if (mac[0] != '\\' || mac[1] != '\"') { 285 pe(lineno); 286 printf("Command too long\n"); 287 } 288 289 /* 290 * Is it a known command? 291 */ 292 checkknown(mac); 293 294 /* 295 * Should we add it? 296 */ 297 if (eq(mac, "de")) 298 addcmd(line); 299 300 chkcmd(line, mac); 301 } 302 303 /* 304 * At this point we process the line looking 305 * for \s and \f. 306 */ 307 for (i=0; line[i]; i++) 308 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) { 309 if (!sflag && line[++i]=='s') { 310 pl = line[++i]; 311 if (isdigit(pl)) { 312 n = pl - '0'; 313 pl = ' '; 314 } else 315 n = 0; 316 while (isdigit(line[++i])) 317 n = 10 * n + line[i] - '0'; 318 i--; 319 if (n == 0) { 320 if (stk[stktop].opno == SZ) { 321 stktop--; 322 } else { 323 pe(lineno); 324 printf("unmatched \\s0\n"); 325 } 326 } else { 327 stk[++stktop].opno = SZ; 328 stk[stktop].pl = pl; 329 stk[stktop].parm = n; 330 stk[stktop].lno = lineno; 331 } 332 } else if (!fflag && line[i]=='f') { 333 n = line[++i]; 334 if (n == 'P') { 335 if (stk[stktop].opno == FT) { 336 stktop--; 337 } else { 338 pe(lineno); 339 printf("unmatched \\fP\n"); 340 } 341 } else { 342 stk[++stktop].opno = FT; 343 stk[stktop].pl = 1; 344 stk[stktop].parm = n; 345 stk[stktop].lno = lineno; 346 } 347 } 348 } 349 } 350 /* 351 * We've hit the end and look at all this stuff that hasn't been 352 * matched yet! Complain, complain. 353 */ 354 for (i=stktop; i>=0; i--) { 355 complain(i); 356 } 357 } 358 359 complain(i) 360 { 361 pe(stk[i].lno); 362 printf("Unmatched "); 363 prop(i); 364 printf("\n"); 365 } 366 367 prop(i) 368 { 369 if (stk[i].pl == 0) 370 printf(".%s", br[stk[i].opno].opbr); 371 else switch(stk[i].opno) { 372 case SZ: 373 printf("\\s%c%d", stk[i].pl, stk[i].parm); 374 break; 375 case FT: 376 printf("\\f%c", stk[i].parm); 377 break; 378 default: 379 printf("Bug: stk[%d].opno = %d = .%s, .%s", 380 i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr); 381 } 382 } 383 384 chkcmd(line, mac) 385 char *line; 386 char *mac; 387 { 388 register int i, n; 389 390 /* 391 * Check to see if it matches top of stack. 392 */ 393 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr)) 394 stktop--; /* OK. Pop & forget */ 395 else { 396 /* No. Maybe it's an opener */ 397 for (i=0; br[i].opbr; i++) { 398 if (eq(mac, br[i].opbr)) { 399 /* Found. Push it. */ 400 stktop++; 401 stk[stktop].opno = i; 402 stk[stktop].pl = 0; 403 stk[stktop].parm = 0; 404 stk[stktop].lno = lineno; 405 break; 406 } 407 /* 408 * Maybe it's an unmatched closer. 409 * NOTE: this depends on the fact 410 * that none of the closers can be 411 * openers too. 412 */ 413 if (eq(mac, br[i].clbr)) { 414 nomatch(mac); 415 break; 416 } 417 } 418 } 419 } 420 421 nomatch(mac) 422 char *mac; 423 { 424 register int i, j; 425 426 /* 427 * Look for a match further down on stack 428 * If we find one, it suggests that the stuff in 429 * between is supposed to match itself. 430 */ 431 for (j=stktop; j>=0; j--) 432 if (eq(mac,br[stk[j].opno].clbr)) { 433 /* Found. Make a good diagnostic. */ 434 if (j == stktop-2) { 435 /* 436 * Check for special case \fx..\fR and don't 437 * complain. 438 */ 439 if (stk[j+1].opno==FT && stk[j+1].parm!='R' 440 && stk[j+2].opno==FT && stk[j+2].parm=='R') { 441 stktop = j -1; 442 return; 443 } 444 /* 445 * We have two unmatched frobs. Chances are 446 * they were intended to match, so we mention 447 * them together. 448 */ 449 pe(stk[j+1].lno); 450 prop(j+1); 451 printf(" does not match %d: ", stk[j+2].lno); 452 prop(j+2); 453 printf("\n"); 454 } else for (i=j+1; i <= stktop; i++) { 455 complain(i); 456 } 457 stktop = j-1; 458 return; 459 } 460 /* Didn't find one. Throw this away. */ 461 pe(lineno); 462 printf("Unmatched .%s\n", mac); 463 } 464 465 /* eq: are two strings equal? */ 466 eq(s1, s2) 467 char *s1, *s2; 468 { 469 return (strcmp(s1, s2) == 0); 470 } 471 472 /* print the first part of an error message, given the line number */ 473 pe(lineno) 474 int lineno; 475 { 476 if (nfiles > 1) 477 printf("%s: ", cfilename); 478 printf("%d: ", lineno); 479 } 480 481 checkknown(mac) 482 char *mac; 483 { 484 485 if (eq(mac, ".")) 486 return; 487 if (binsrch(mac) >= 0) 488 return; 489 if (mac[0] == '\\' && mac[1] == '"') /* comments */ 490 return; 491 492 pe(lineno); 493 printf("Unknown command: .%s\n", mac); 494 } 495 496 /* 497 * We have a .de xx line in "line". Add xx to the list of known commands. 498 */ 499 addcmd(line) 500 char *line; 501 { 502 char *mac; 503 504 /* grab the macro being defined */ 505 mac = line+4; 506 while (isspace(*mac)) 507 mac++; 508 if (*mac == 0) { 509 pe(lineno); 510 printf("illegal define: %s\n", line); 511 return; 512 } 513 mac[2] = 0; 514 if (isspace(mac[1]) || mac[1] == '\\') 515 mac[1] = 0; 516 if (ncmds >= MAXCMDS) { 517 printf("Only %d known commands allowed\n", MAXCMDS); 518 exit(1); 519 } 520 addmac(mac); 521 } 522 523 /* 524 * Add mac to the list. We should really have some kind of tree 525 * structure here but this is a quick-and-dirty job and I just don't 526 * have time to mess with it. (I wonder if this will come back to haunt 527 * me someday?) Anyway, I claim that .de is fairly rare in user 528 * nroff programs, and the register loop below is pretty fast. 529 */ 530 addmac(mac) 531 char *mac; 532 { 533 register char **src, **dest, **loc; 534 535 if (binsrch(mac) >= 0){ /* it's OK to redefine something */ 536 #ifdef DEBUG 537 printf("binsrch(%s) -> already in table\n", mac); 538 #endif DEBUG 539 return; 540 } 541 /* binsrch sets slot as a side effect */ 542 #ifdef DEBUG 543 printf("binsrch(%s) -> %d\n", mac, slot); 544 #endif 545 loc = &knowncmds[slot]; 546 src = &knowncmds[ncmds-1]; 547 dest = src+1; 548 while (dest > loc) 549 *dest-- = *src--; 550 *loc = malloc(3); 551 strcpy(*loc, mac); 552 ncmds++; 553 #ifdef DEBUG 554 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds); 555 #endif 556 } 557 558 /* 559 * Do a binary search in knowncmds for mac. 560 * If found, return the index. If not, return -1. 561 */ 562 binsrch(mac) 563 char *mac; 564 { 565 register char *p; /* pointer to current cmd in list */ 566 register int d; /* difference if any */ 567 register int mid; /* mid point in binary search */ 568 register int top, bot; /* boundaries of bin search, inclusive */ 569 570 top = ncmds-1; 571 bot = 0; 572 while (top >= bot) { 573 mid = (top+bot)/2; 574 p = knowncmds[mid]; 575 d = p[0] - mac[0]; 576 if (d == 0) 577 d = p[1] - mac[1]; 578 if (d == 0) 579 return mid; 580 if (d < 0) 581 bot = mid + 1; 582 else 583 top = mid - 1; 584 } 585 slot = bot; /* place it would have gone */ 586 return -1; 587 } 588