1 /* 2 * Copyright (c) 1980 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 #ifndef lint 35 char copyright[] = 36 "@(#) Copyright (c) 1980 The Regents of the University of California.\n\ 37 All rights reserved.\n"; 38 #endif /* not lint */ 39 40 #ifndef lint 41 /*static char sccsid[] = "from: @(#)checknr.c 5.4 (Berkeley) 6/1/90";*/ 42 static char rcsid[] = "$Id: checknr.c,v 1.3 1994/12/24 15:57:02 cgd Exp $"; 43 #endif /* not lint */ 44 45 /* 46 * checknr: check an nroff/troff input file for matching macro calls. 47 * we also attempt to match size and font changes, but only the embedded 48 * kind. These must end in \s0 and \fP resp. Maybe more sophistication 49 * later but for now think of these restrictions as contributions to 50 * structured typesetting. 51 */ 52 #include <stdio.h> 53 #include <string.h> 54 #include <ctype.h> 55 56 #define MAXSTK 100 /* Stack size */ 57 #define MAXBR 100 /* Max number of bracket pairs known */ 58 #define MAXCMDS 500 /* Max number of commands known */ 59 60 /* 61 * The stack on which we remember what we've seen so far. 62 */ 63 struct stkstr { 64 int opno; /* number of opening bracket */ 65 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */ 66 int parm; /* parm to size, font, etc */ 67 int lno; /* line number the thing came in in */ 68 } stk[MAXSTK]; 69 int stktop; 70 71 /* 72 * The kinds of opening and closing brackets. 73 */ 74 struct brstr { 75 char *opbr; 76 char *clbr; 77 } br[MAXBR] = { 78 /* A few bare bones troff commands */ 79 #define SZ 0 80 "sz", "sz", /* also \s */ 81 #define FT 1 82 "ft", "ft", /* also \f */ 83 /* the -mm package */ 84 "AL", "LE", 85 "AS", "AE", 86 "BL", "LE", 87 "BS", "BE", 88 "DF", "DE", 89 "DL", "LE", 90 "DS", "DE", 91 "FS", "FE", 92 "ML", "LE", 93 "NS", "NE", 94 "RL", "LE", 95 "VL", "LE", 96 /* the -ms package */ 97 "AB", "AE", 98 "BD", "DE", 99 "CD", "DE", 100 "DS", "DE", 101 "FS", "FE", 102 "ID", "DE", 103 "KF", "KE", 104 "KS", "KE", 105 "LD", "DE", 106 "LG", "NL", 107 "QS", "QE", 108 "RS", "RE", 109 "SM", "NL", 110 "XA", "XE", 111 "XS", "XE", 112 /* The -me package */ 113 "(b", ")b", 114 "(c", ")c", 115 "(d", ")d", 116 "(f", ")f", 117 "(l", ")l", 118 "(q", ")q", 119 "(x", ")x", 120 "(z", ")z", 121 /* Things needed by preprocessors */ 122 "EQ", "EN", 123 "TS", "TE", 124 /* Refer */ 125 "[", "]", 126 0, 0 127 }; 128 129 /* 130 * All commands known to nroff, plus macro packages. 131 * Used so we can complain about unrecognized commands. 132 */ 133 char *knowncmds[MAXCMDS] = { 134 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t", 135 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++", 136 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M", 137 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", 138 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2", 139 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT", 140 "D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM", 141 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", 142 "FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID", 143 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB", 144 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR", 145 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P", 146 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA", 147 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA", 148 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE", 149 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL", 150 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0", 151 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>", 152 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd", 153 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs", 154 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", 155 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", 156 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i", 157 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln", 158 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1", 159 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx", 160 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps", 161 "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb", 162 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", 163 "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp", 164 "yr", 0 165 }; 166 167 int lineno; /* current line number in input file */ 168 char line[256]; /* the current line */ 169 char *cfilename; /* name of current file */ 170 int nfiles; /* number of files to process */ 171 int fflag; /* -f: ignore \f */ 172 int sflag; /* -s: ignore \s */ 173 int ncmds; /* size of knowncmds */ 174 int slot; /* slot in knowncmds found by binsrch */ 175 176 char *malloc(); 177 178 main(argc, argv) 179 int argc; 180 char **argv; 181 { 182 FILE *f; 183 int i; 184 char *cp; 185 char b1[4]; 186 187 /* Figure out how many known commands there are */ 188 while (knowncmds[ncmds]) 189 ncmds++; 190 while (argc > 1 && argv[1][0] == '-') { 191 switch(argv[1][1]) { 192 193 /* -a: add pairs of macros */ 194 case 'a': 195 i = strlen(argv[1]) - 2; 196 if (i % 6 != 0) 197 usage(); 198 /* look for empty macro slots */ 199 for (i=0; br[i].opbr; i++) 200 ; 201 for (cp=argv[1]+3; cp[-1]; cp += 6) { 202 br[i].opbr = malloc(3); 203 strncpy(br[i].opbr, cp, 2); 204 br[i].clbr = malloc(3); 205 strncpy(br[i].clbr, cp+3, 2); 206 addmac(br[i].opbr); /* knows pairs are also known cmds */ 207 addmac(br[i].clbr); 208 i++; 209 } 210 break; 211 212 /* -c: add known commands */ 213 case 'c': 214 i = strlen(argv[1]) - 2; 215 if (i % 3 != 0) 216 usage(); 217 for (cp=argv[1]+3; cp[-1]; cp += 3) { 218 if (cp[2] && cp[2] != '.') 219 usage(); 220 strncpy(b1, cp, 2); 221 addmac(b1); 222 } 223 break; 224 225 /* -f: ignore font changes */ 226 case 'f': 227 fflag = 1; 228 break; 229 230 /* -s: ignore size changes */ 231 case 's': 232 sflag = 1; 233 break; 234 default: 235 usage(); 236 } 237 argc--; argv++; 238 } 239 240 nfiles = argc - 1; 241 242 if (nfiles > 0) { 243 for (i=1; i<argc; i++) { 244 cfilename = argv[i]; 245 f = fopen(cfilename, "r"); 246 if (f == NULL) 247 perror(cfilename); 248 else 249 process(f); 250 } 251 } else { 252 cfilename = "stdin"; 253 process(stdin); 254 } 255 exit(0); 256 } 257 258 usage() 259 { 260 printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n"); 261 exit(1); 262 } 263 264 process(f) 265 FILE *f; 266 { 267 register int i, n; 268 char mac[5]; /* The current macro or nroff command */ 269 int pl; 270 271 stktop = -1; 272 for (lineno = 1; fgets(line, sizeof line, f); lineno++) { 273 if (line[0] == '.') { 274 /* 275 * find and isolate the macro/command name. 276 */ 277 strncpy(mac, line+1, 4); 278 if (isspace(mac[0])) { 279 pe(lineno); 280 printf("Empty command\n"); 281 } else if (isspace(mac[1])) { 282 mac[1] = 0; 283 } else if (isspace(mac[2])) { 284 mac[2] = 0; 285 } else if (mac[0] != '\\' || mac[1] != '\"') { 286 pe(lineno); 287 printf("Command too long\n"); 288 } 289 290 /* 291 * Is it a known command? 292 */ 293 checkknown(mac); 294 295 /* 296 * Should we add it? 297 */ 298 if (eq(mac, "de")) 299 addcmd(line); 300 301 chkcmd(line, mac); 302 } 303 304 /* 305 * At this point we process the line looking 306 * for \s and \f. 307 */ 308 for (i=0; line[i]; i++) 309 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) { 310 if (!sflag && line[++i]=='s') { 311 pl = line[++i]; 312 if (isdigit(pl)) { 313 n = pl - '0'; 314 pl = ' '; 315 } else 316 n = 0; 317 while (isdigit(line[++i])) 318 n = 10 * n + line[i] - '0'; 319 i--; 320 if (n == 0) { 321 if (stk[stktop].opno == SZ) { 322 stktop--; 323 } else { 324 pe(lineno); 325 printf("unmatched \\s0\n"); 326 } 327 } else { 328 stk[++stktop].opno = SZ; 329 stk[stktop].pl = pl; 330 stk[stktop].parm = n; 331 stk[stktop].lno = lineno; 332 } 333 } else if (!fflag && line[i]=='f') { 334 n = line[++i]; 335 if (n == 'P') { 336 if (stk[stktop].opno == FT) { 337 stktop--; 338 } else { 339 pe(lineno); 340 printf("unmatched \\fP\n"); 341 } 342 } else { 343 stk[++stktop].opno = FT; 344 stk[stktop].pl = 1; 345 stk[stktop].parm = n; 346 stk[stktop].lno = lineno; 347 } 348 } 349 } 350 } 351 /* 352 * We've hit the end and look at all this stuff that hasn't been 353 * matched yet! Complain, complain. 354 */ 355 for (i=stktop; i>=0; i--) { 356 complain(i); 357 } 358 } 359 360 complain(i) 361 { 362 pe(stk[i].lno); 363 printf("Unmatched "); 364 prop(i); 365 printf("\n"); 366 } 367 368 prop(i) 369 { 370 if (stk[i].pl == 0) 371 printf(".%s", br[stk[i].opno].opbr); 372 else switch(stk[i].opno) { 373 case SZ: 374 printf("\\s%c%d", stk[i].pl, stk[i].parm); 375 break; 376 case FT: 377 printf("\\f%c", stk[i].parm); 378 break; 379 default: 380 printf("Bug: stk[%d].opno = %d = .%s, .%s", 381 i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr); 382 } 383 } 384 385 chkcmd(line, mac) 386 char *line; 387 char *mac; 388 { 389 register int i, n; 390 391 /* 392 * Check to see if it matches top of stack. 393 */ 394 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr)) 395 stktop--; /* OK. Pop & forget */ 396 else { 397 /* No. Maybe it's an opener */ 398 for (i=0; br[i].opbr; i++) { 399 if (eq(mac, br[i].opbr)) { 400 /* Found. Push it. */ 401 stktop++; 402 stk[stktop].opno = i; 403 stk[stktop].pl = 0; 404 stk[stktop].parm = 0; 405 stk[stktop].lno = lineno; 406 break; 407 } 408 /* 409 * Maybe it's an unmatched closer. 410 * NOTE: this depends on the fact 411 * that none of the closers can be 412 * openers too. 413 */ 414 if (eq(mac, br[i].clbr)) { 415 nomatch(mac); 416 break; 417 } 418 } 419 } 420 } 421 422 nomatch(mac) 423 char *mac; 424 { 425 register int i, j; 426 427 /* 428 * Look for a match further down on stack 429 * If we find one, it suggests that the stuff in 430 * between is supposed to match itself. 431 */ 432 for (j=stktop; j>=0; j--) 433 if (eq(mac,br[stk[j].opno].clbr)) { 434 /* Found. Make a good diagnostic. */ 435 if (j == stktop-2) { 436 /* 437 * Check for special case \fx..\fR and don't 438 * complain. 439 */ 440 if (stk[j+1].opno==FT && stk[j+1].parm!='R' 441 && stk[j+2].opno==FT && stk[j+2].parm=='R') { 442 stktop = j -1; 443 return; 444 } 445 /* 446 * We have two unmatched frobs. Chances are 447 * they were intended to match, so we mention 448 * them together. 449 */ 450 pe(stk[j+1].lno); 451 prop(j+1); 452 printf(" does not match %d: ", stk[j+2].lno); 453 prop(j+2); 454 printf("\n"); 455 } else for (i=j+1; i <= stktop; i++) { 456 complain(i); 457 } 458 stktop = j-1; 459 return; 460 } 461 /* Didn't find one. Throw this away. */ 462 pe(lineno); 463 printf("Unmatched .%s\n", mac); 464 } 465 466 /* eq: are two strings equal? */ 467 eq(s1, s2) 468 char *s1, *s2; 469 { 470 return (strcmp(s1, s2) == 0); 471 } 472 473 /* print the first part of an error message, given the line number */ 474 pe(lineno) 475 int lineno; 476 { 477 if (nfiles > 1) 478 printf("%s: ", cfilename); 479 printf("%d: ", lineno); 480 } 481 482 checkknown(mac) 483 char *mac; 484 { 485 486 if (eq(mac, ".")) 487 return; 488 if (binsrch(mac) >= 0) 489 return; 490 if (mac[0] == '\\' && mac[1] == '"') /* comments */ 491 return; 492 493 pe(lineno); 494 printf("Unknown command: .%s\n", mac); 495 } 496 497 /* 498 * We have a .de xx line in "line". Add xx to the list of known commands. 499 */ 500 addcmd(line) 501 char *line; 502 { 503 char *mac; 504 505 /* grab the macro being defined */ 506 mac = line+4; 507 while (isspace(*mac)) 508 mac++; 509 if (*mac == 0) { 510 pe(lineno); 511 printf("illegal define: %s\n", line); 512 return; 513 } 514 mac[2] = 0; 515 if (isspace(mac[1]) || mac[1] == '\\') 516 mac[1] = 0; 517 if (ncmds >= MAXCMDS) { 518 printf("Only %d known commands allowed\n", MAXCMDS); 519 exit(1); 520 } 521 addmac(mac); 522 } 523 524 /* 525 * Add mac to the list. We should really have some kind of tree 526 * structure here but this is a quick-and-dirty job and I just don't 527 * have time to mess with it. (I wonder if this will come back to haunt 528 * me someday?) Anyway, I claim that .de is fairly rare in user 529 * nroff programs, and the register loop below is pretty fast. 530 */ 531 addmac(mac) 532 char *mac; 533 { 534 register char **src, **dest, **loc; 535 536 if (binsrch(mac) >= 0){ /* it's OK to redefine something */ 537 #ifdef DEBUG 538 printf("binsrch(%s) -> already in table\n", mac); 539 #endif DEBUG 540 return; 541 } 542 /* binsrch sets slot as a side effect */ 543 #ifdef DEBUG 544 printf("binsrch(%s) -> %d\n", mac, slot); 545 #endif 546 loc = &knowncmds[slot]; 547 src = &knowncmds[ncmds-1]; 548 dest = src+1; 549 while (dest > loc) 550 *dest-- = *src--; 551 *loc = malloc(3); 552 strcpy(*loc, mac); 553 ncmds++; 554 #ifdef DEBUG 555 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds); 556 #endif 557 } 558 559 /* 560 * Do a binary search in knowncmds for mac. 561 * If found, return the index. If not, return -1. 562 */ 563 binsrch(mac) 564 char *mac; 565 { 566 register char *p; /* pointer to current cmd in list */ 567 register int d; /* difference if any */ 568 register int mid; /* mid point in binary search */ 569 register int top, bot; /* boundaries of bin search, inclusive */ 570 571 top = ncmds-1; 572 bot = 0; 573 while (top >= bot) { 574 mid = (top+bot)/2; 575 p = knowncmds[mid]; 576 d = p[0] - mac[0]; 577 if (d == 0) 578 d = p[1] - mac[1]; 579 if (d == 0) 580 return mid; 581 if (d < 0) 582 bot = mid + 1; 583 else 584 top = mid - 1; 585 } 586 slot = bot; /* place it would have gone */ 587 return -1; 588 } 589