1 /* 2 * Copyright (c) 1980 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 #ifndef lint 35 char copyright[] = 36 "@(#) Copyright (c) 1980 The Regents of the University of California.\n\ 37 All rights reserved.\n"; 38 #endif /* not lint */ 39 40 #ifndef lint 41 static char sccsid[] = "@(#)checknr.c 5.4 (Berkeley) 6/1/90"; 42 #endif /* not lint */ 43 44 /* 45 * checknr: check an nroff/troff input file for matching macro calls. 46 * we also attempt to match size and font changes, but only the embedded 47 * kind. These must end in \s0 and \fP resp. Maybe more sophistication 48 * later but for now think of these restrictions as contributions to 49 * structured typesetting. 50 */ 51 #include <stdio.h> 52 #include <ctype.h> 53 54 #define MAXSTK 100 /* Stack size */ 55 #define MAXBR 100 /* Max number of bracket pairs known */ 56 #define MAXCMDS 500 /* Max number of commands known */ 57 58 /* 59 * The stack on which we remember what we've seen so far. 60 */ 61 struct stkstr { 62 int opno; /* number of opening bracket */ 63 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */ 64 int parm; /* parm to size, font, etc */ 65 int lno; /* line number the thing came in in */ 66 } stk[MAXSTK]; 67 int stktop; 68 69 /* 70 * The kinds of opening and closing brackets. 71 */ 72 struct brstr { 73 char *opbr; 74 char *clbr; 75 } br[MAXBR] = { 76 /* A few bare bones troff commands */ 77 #define SZ 0 78 "sz", "sz", /* also \s */ 79 #define FT 1 80 "ft", "ft", /* also \f */ 81 /* the -mm package */ 82 "AL", "LE", 83 "AS", "AE", 84 "BL", "LE", 85 "BS", "BE", 86 "DF", "DE", 87 "DL", "LE", 88 "DS", "DE", 89 "FS", "FE", 90 "ML", "LE", 91 "NS", "NE", 92 "RL", "LE", 93 "VL", "LE", 94 /* the -ms package */ 95 "AB", "AE", 96 "BD", "DE", 97 "CD", "DE", 98 "DS", "DE", 99 "FS", "FE", 100 "ID", "DE", 101 "KF", "KE", 102 "KS", "KE", 103 "LD", "DE", 104 "LG", "NL", 105 "QS", "QE", 106 "RS", "RE", 107 "SM", "NL", 108 "XA", "XE", 109 "XS", "XE", 110 /* The -me package */ 111 "(b", ")b", 112 "(c", ")c", 113 "(d", ")d", 114 "(f", ")f", 115 "(l", ")l", 116 "(q", ")q", 117 "(x", ")x", 118 "(z", ")z", 119 /* Things needed by preprocessors */ 120 "EQ", "EN", 121 "TS", "TE", 122 /* Refer */ 123 "[", "]", 124 0, 0 125 }; 126 127 /* 128 * All commands known to nroff, plus macro packages. 129 * Used so we can complain about unrecognized commands. 130 */ 131 char *knowncmds[MAXCMDS] = { 132 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t", 133 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++", 134 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M", 135 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", 136 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2", 137 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT", 138 "D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM", 139 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", 140 "FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID", 141 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB", 142 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR", 143 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P", 144 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA", 145 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA", 146 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE", 147 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL", 148 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0", 149 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>", 150 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd", 151 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs", 152 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", 153 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", 154 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i", 155 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln", 156 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1", 157 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx", 158 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps", 159 "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb", 160 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", 161 "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp", 162 "yr", 0 163 }; 164 165 int lineno; /* current line number in input file */ 166 char line[256]; /* the current line */ 167 char *cfilename; /* name of current file */ 168 int nfiles; /* number of files to process */ 169 int fflag; /* -f: ignore \f */ 170 int sflag; /* -s: ignore \s */ 171 int ncmds; /* size of knowncmds */ 172 int slot; /* slot in knowncmds found by binsrch */ 173 174 char *malloc(); 175 176 main(argc, argv) 177 int argc; 178 char **argv; 179 { 180 FILE *f; 181 int i; 182 char *cp; 183 char b1[4]; 184 185 /* Figure out how many known commands there are */ 186 while (knowncmds[ncmds]) 187 ncmds++; 188 while (argc > 1 && argv[1][0] == '-') { 189 switch(argv[1][1]) { 190 191 /* -a: add pairs of macros */ 192 case 'a': 193 i = strlen(argv[1]) - 2; 194 if (i % 6 != 0) 195 usage(); 196 /* look for empty macro slots */ 197 for (i=0; br[i].opbr; i++) 198 ; 199 for (cp=argv[1]+3; cp[-1]; cp += 6) { 200 br[i].opbr = malloc(3); 201 strncpy(br[i].opbr, cp, 2); 202 br[i].clbr = malloc(3); 203 strncpy(br[i].clbr, cp+3, 2); 204 addmac(br[i].opbr); /* knows pairs are also known cmds */ 205 addmac(br[i].clbr); 206 i++; 207 } 208 break; 209 210 /* -c: add known commands */ 211 case 'c': 212 i = strlen(argv[1]) - 2; 213 if (i % 3 != 0) 214 usage(); 215 for (cp=argv[1]+3; cp[-1]; cp += 3) { 216 if (cp[2] && cp[2] != '.') 217 usage(); 218 strncpy(b1, cp, 2); 219 addmac(b1); 220 } 221 break; 222 223 /* -f: ignore font changes */ 224 case 'f': 225 fflag = 1; 226 break; 227 228 /* -s: ignore size changes */ 229 case 's': 230 sflag = 1; 231 break; 232 default: 233 usage(); 234 } 235 argc--; argv++; 236 } 237 238 nfiles = argc - 1; 239 240 if (nfiles > 0) { 241 for (i=1; i<argc; i++) { 242 cfilename = argv[i]; 243 f = fopen(cfilename, "r"); 244 if (f == NULL) 245 perror(cfilename); 246 else 247 process(f); 248 } 249 } else { 250 cfilename = "stdin"; 251 process(stdin); 252 } 253 exit(0); 254 } 255 256 usage() 257 { 258 printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n"); 259 exit(1); 260 } 261 262 process(f) 263 FILE *f; 264 { 265 register int i, n; 266 char mac[5]; /* The current macro or nroff command */ 267 int pl; 268 269 stktop = -1; 270 for (lineno = 1; fgets(line, sizeof line, f); lineno++) { 271 if (line[0] == '.') { 272 /* 273 * find and isolate the macro/command name. 274 */ 275 strncpy(mac, line+1, 4); 276 if (isspace(mac[0])) { 277 pe(lineno); 278 printf("Empty command\n"); 279 } else if (isspace(mac[1])) { 280 mac[1] = 0; 281 } else if (isspace(mac[2])) { 282 mac[2] = 0; 283 } else if (mac[0] != '\\' || mac[1] != '\"') { 284 pe(lineno); 285 printf("Command too long\n"); 286 } 287 288 /* 289 * Is it a known command? 290 */ 291 checkknown(mac); 292 293 /* 294 * Should we add it? 295 */ 296 if (eq(mac, "de")) 297 addcmd(line); 298 299 chkcmd(line, mac); 300 } 301 302 /* 303 * At this point we process the line looking 304 * for \s and \f. 305 */ 306 for (i=0; line[i]; i++) 307 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) { 308 if (!sflag && line[++i]=='s') { 309 pl = line[++i]; 310 if (isdigit(pl)) { 311 n = pl - '0'; 312 pl = ' '; 313 } else 314 n = 0; 315 while (isdigit(line[++i])) 316 n = 10 * n + line[i] - '0'; 317 i--; 318 if (n == 0) { 319 if (stk[stktop].opno == SZ) { 320 stktop--; 321 } else { 322 pe(lineno); 323 printf("unmatched \\s0\n"); 324 } 325 } else { 326 stk[++stktop].opno = SZ; 327 stk[stktop].pl = pl; 328 stk[stktop].parm = n; 329 stk[stktop].lno = lineno; 330 } 331 } else if (!fflag && line[i]=='f') { 332 n = line[++i]; 333 if (n == 'P') { 334 if (stk[stktop].opno == FT) { 335 stktop--; 336 } else { 337 pe(lineno); 338 printf("unmatched \\fP\n"); 339 } 340 } else { 341 stk[++stktop].opno = FT; 342 stk[stktop].pl = 1; 343 stk[stktop].parm = n; 344 stk[stktop].lno = lineno; 345 } 346 } 347 } 348 } 349 /* 350 * We've hit the end and look at all this stuff that hasn't been 351 * matched yet! Complain, complain. 352 */ 353 for (i=stktop; i>=0; i--) { 354 complain(i); 355 } 356 } 357 358 complain(i) 359 { 360 pe(stk[i].lno); 361 printf("Unmatched "); 362 prop(i); 363 printf("\n"); 364 } 365 366 prop(i) 367 { 368 if (stk[i].pl == 0) 369 printf(".%s", br[stk[i].opno].opbr); 370 else switch(stk[i].opno) { 371 case SZ: 372 printf("\\s%c%d", stk[i].pl, stk[i].parm); 373 break; 374 case FT: 375 printf("\\f%c", stk[i].parm); 376 break; 377 default: 378 printf("Bug: stk[%d].opno = %d = .%s, .%s", 379 i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr); 380 } 381 } 382 383 chkcmd(line, mac) 384 char *line; 385 char *mac; 386 { 387 register int i, n; 388 389 /* 390 * Check to see if it matches top of stack. 391 */ 392 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr)) 393 stktop--; /* OK. Pop & forget */ 394 else { 395 /* No. Maybe it's an opener */ 396 for (i=0; br[i].opbr; i++) { 397 if (eq(mac, br[i].opbr)) { 398 /* Found. Push it. */ 399 stktop++; 400 stk[stktop].opno = i; 401 stk[stktop].pl = 0; 402 stk[stktop].parm = 0; 403 stk[stktop].lno = lineno; 404 break; 405 } 406 /* 407 * Maybe it's an unmatched closer. 408 * NOTE: this depends on the fact 409 * that none of the closers can be 410 * openers too. 411 */ 412 if (eq(mac, br[i].clbr)) { 413 nomatch(mac); 414 break; 415 } 416 } 417 } 418 } 419 420 nomatch(mac) 421 char *mac; 422 { 423 register int i, j; 424 425 /* 426 * Look for a match further down on stack 427 * If we find one, it suggests that the stuff in 428 * between is supposed to match itself. 429 */ 430 for (j=stktop; j>=0; j--) 431 if (eq(mac,br[stk[j].opno].clbr)) { 432 /* Found. Make a good diagnostic. */ 433 if (j == stktop-2) { 434 /* 435 * Check for special case \fx..\fR and don't 436 * complain. 437 */ 438 if (stk[j+1].opno==FT && stk[j+1].parm!='R' 439 && stk[j+2].opno==FT && stk[j+2].parm=='R') { 440 stktop = j -1; 441 return; 442 } 443 /* 444 * We have two unmatched frobs. Chances are 445 * they were intended to match, so we mention 446 * them together. 447 */ 448 pe(stk[j+1].lno); 449 prop(j+1); 450 printf(" does not match %d: ", stk[j+2].lno); 451 prop(j+2); 452 printf("\n"); 453 } else for (i=j+1; i <= stktop; i++) { 454 complain(i); 455 } 456 stktop = j-1; 457 return; 458 } 459 /* Didn't find one. Throw this away. */ 460 pe(lineno); 461 printf("Unmatched .%s\n", mac); 462 } 463 464 /* eq: are two strings equal? */ 465 eq(s1, s2) 466 char *s1, *s2; 467 { 468 return (strcmp(s1, s2) == 0); 469 } 470 471 /* print the first part of an error message, given the line number */ 472 pe(lineno) 473 int lineno; 474 { 475 if (nfiles > 1) 476 printf("%s: ", cfilename); 477 printf("%d: ", lineno); 478 } 479 480 checkknown(mac) 481 char *mac; 482 { 483 484 if (eq(mac, ".")) 485 return; 486 if (binsrch(mac) >= 0) 487 return; 488 if (mac[0] == '\\' && mac[1] == '"') /* comments */ 489 return; 490 491 pe(lineno); 492 printf("Unknown command: .%s\n", mac); 493 } 494 495 /* 496 * We have a .de xx line in "line". Add xx to the list of known commands. 497 */ 498 addcmd(line) 499 char *line; 500 { 501 char *mac; 502 503 /* grab the macro being defined */ 504 mac = line+4; 505 while (isspace(*mac)) 506 mac++; 507 if (*mac == 0) { 508 pe(lineno); 509 printf("illegal define: %s\n", line); 510 return; 511 } 512 mac[2] = 0; 513 if (isspace(mac[1]) || mac[1] == '\\') 514 mac[1] = 0; 515 if (ncmds >= MAXCMDS) { 516 printf("Only %d known commands allowed\n", MAXCMDS); 517 exit(1); 518 } 519 addmac(mac); 520 } 521 522 /* 523 * Add mac to the list. We should really have some kind of tree 524 * structure here but this is a quick-and-dirty job and I just don't 525 * have time to mess with it. (I wonder if this will come back to haunt 526 * me someday?) Anyway, I claim that .de is fairly rare in user 527 * nroff programs, and the register loop below is pretty fast. 528 */ 529 addmac(mac) 530 char *mac; 531 { 532 register char **src, **dest, **loc; 533 534 if (binsrch(mac) >= 0){ /* it's OK to redefine something */ 535 #ifdef DEBUG 536 printf("binsrch(%s) -> already in table\n", mac); 537 #endif DEBUG 538 return; 539 } 540 /* binsrch sets slot as a side effect */ 541 #ifdef DEBUG 542 printf("binsrch(%s) -> %d\n", mac, slot); 543 #endif 544 loc = &knowncmds[slot]; 545 src = &knowncmds[ncmds-1]; 546 dest = src+1; 547 while (dest > loc) 548 *dest-- = *src--; 549 *loc = malloc(3); 550 strcpy(*loc, mac); 551 ncmds++; 552 #ifdef DEBUG 553 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds); 554 #endif 555 } 556 557 /* 558 * Do a binary search in knowncmds for mac. 559 * If found, return the index. If not, return -1. 560 */ 561 binsrch(mac) 562 char *mac; 563 { 564 register char *p; /* pointer to current cmd in list */ 565 register int d; /* difference if any */ 566 register int mid; /* mid point in binary search */ 567 register int top, bot; /* boundaries of bin search, inclusive */ 568 569 top = ncmds-1; 570 bot = 0; 571 while (top >= bot) { 572 mid = (top+bot)/2; 573 p = knowncmds[mid]; 574 d = p[0] - mac[0]; 575 if (d == 0) 576 d = p[1] - mac[1]; 577 if (d == 0) 578 return mid; 579 if (d < 0) 580 bot = mid + 1; 581 else 582 top = mid - 1; 583 } 584 slot = bot; /* place it would have gone */ 585 return -1; 586 } 587