1 /* $NetBSD: checknr.c,v 1.4 1995/03/26 04:10:19 glass Exp $ */ 2 3 /* 4 * Copyright (c) 1980, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #ifndef lint 37 static char copyright[] = 38 "@(#) Copyright (c) 1980, 1993\n\ 39 The Regents of the University of California. All rights reserved.\n"; 40 #endif /* not lint */ 41 42 #ifndef lint 43 #if 0 44 static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93"; 45 #else 46 static char rcsid[] = "$NetBSD: checknr.c,v 1.4 1995/03/26 04:10:19 glass Exp $"; 47 #endif 48 #endif /* not lint */ 49 50 /* 51 * checknr: check an nroff/troff input file for matching macro calls. 52 * we also attempt to match size and font changes, but only the embedded 53 * kind. These must end in \s0 and \fP resp. Maybe more sophistication 54 * later but for now think of these restrictions as contributions to 55 * structured typesetting. 56 */ 57 #include <stdio.h> 58 #include <string.h> 59 #include <ctype.h> 60 61 #define MAXSTK 100 /* Stack size */ 62 #define MAXBR 100 /* Max number of bracket pairs known */ 63 #define MAXCMDS 500 /* Max number of commands known */ 64 65 /* 66 * The stack on which we remember what we've seen so far. 67 */ 68 struct stkstr { 69 int opno; /* number of opening bracket */ 70 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */ 71 int parm; /* parm to size, font, etc */ 72 int lno; /* line number the thing came in in */ 73 } stk[MAXSTK]; 74 int stktop; 75 76 /* 77 * The kinds of opening and closing brackets. 78 */ 79 struct brstr { 80 char *opbr; 81 char *clbr; 82 } br[MAXBR] = { 83 /* A few bare bones troff commands */ 84 #define SZ 0 85 "sz", "sz", /* also \s */ 86 #define FT 1 87 "ft", "ft", /* also \f */ 88 /* the -mm package */ 89 "AL", "LE", 90 "AS", "AE", 91 "BL", "LE", 92 "BS", "BE", 93 "DF", "DE", 94 "DL", "LE", 95 "DS", "DE", 96 "FS", "FE", 97 "ML", "LE", 98 "NS", "NE", 99 "RL", "LE", 100 "VL", "LE", 101 /* the -ms package */ 102 "AB", "AE", 103 "BD", "DE", 104 "CD", "DE", 105 "DS", "DE", 106 "FS", "FE", 107 "ID", "DE", 108 "KF", "KE", 109 "KS", "KE", 110 "LD", "DE", 111 "LG", "NL", 112 "QS", "QE", 113 "RS", "RE", 114 "SM", "NL", 115 "XA", "XE", 116 "XS", "XE", 117 /* The -me package */ 118 "(b", ")b", 119 "(c", ")c", 120 "(d", ")d", 121 "(f", ")f", 122 "(l", ")l", 123 "(q", ")q", 124 "(x", ")x", 125 "(z", ")z", 126 /* Things needed by preprocessors */ 127 "EQ", "EN", 128 "TS", "TE", 129 /* Refer */ 130 "[", "]", 131 0, 0 132 }; 133 134 /* 135 * All commands known to nroff, plus macro packages. 136 * Used so we can complain about unrecognized commands. 137 */ 138 char *knowncmds[MAXCMDS] = { 139 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t", 140 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++", 141 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M", 142 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", 143 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2", 144 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT", 145 "D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM", 146 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", 147 "FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID", 148 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB", 149 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR", 150 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P", 151 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA", 152 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA", 153 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE", 154 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL", 155 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0", 156 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>", 157 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd", 158 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs", 159 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", 160 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", 161 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i", 162 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln", 163 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1", 164 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx", 165 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps", 166 "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb", 167 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", 168 "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp", 169 "yr", 0 170 }; 171 172 int lineno; /* current line number in input file */ 173 char line[256]; /* the current line */ 174 char *cfilename; /* name of current file */ 175 int nfiles; /* number of files to process */ 176 int fflag; /* -f: ignore \f */ 177 int sflag; /* -s: ignore \s */ 178 int ncmds; /* size of knowncmds */ 179 int slot; /* slot in knowncmds found by binsrch */ 180 181 char *malloc(); 182 183 main(argc, argv) 184 int argc; 185 char **argv; 186 { 187 FILE *f; 188 int i; 189 char *cp; 190 char b1[4]; 191 192 /* Figure out how many known commands there are */ 193 while (knowncmds[ncmds]) 194 ncmds++; 195 while (argc > 1 && argv[1][0] == '-') { 196 switch(argv[1][1]) { 197 198 /* -a: add pairs of macros */ 199 case 'a': 200 i = strlen(argv[1]) - 2; 201 if (i % 6 != 0) 202 usage(); 203 /* look for empty macro slots */ 204 for (i=0; br[i].opbr; i++) 205 ; 206 for (cp=argv[1]+3; cp[-1]; cp += 6) { 207 br[i].opbr = malloc(3); 208 strncpy(br[i].opbr, cp, 2); 209 br[i].clbr = malloc(3); 210 strncpy(br[i].clbr, cp+3, 2); 211 addmac(br[i].opbr); /* knows pairs are also known cmds */ 212 addmac(br[i].clbr); 213 i++; 214 } 215 break; 216 217 /* -c: add known commands */ 218 case 'c': 219 i = strlen(argv[1]) - 2; 220 if (i % 3 != 0) 221 usage(); 222 for (cp=argv[1]+3; cp[-1]; cp += 3) { 223 if (cp[2] && cp[2] != '.') 224 usage(); 225 strncpy(b1, cp, 2); 226 addmac(b1); 227 } 228 break; 229 230 /* -f: ignore font changes */ 231 case 'f': 232 fflag = 1; 233 break; 234 235 /* -s: ignore size changes */ 236 case 's': 237 sflag = 1; 238 break; 239 default: 240 usage(); 241 } 242 argc--; argv++; 243 } 244 245 nfiles = argc - 1; 246 247 if (nfiles > 0) { 248 for (i=1; i<argc; i++) { 249 cfilename = argv[i]; 250 f = fopen(cfilename, "r"); 251 if (f == NULL) 252 perror(cfilename); 253 else 254 process(f); 255 } 256 } else { 257 cfilename = "stdin"; 258 process(stdin); 259 } 260 exit(0); 261 } 262 263 usage() 264 { 265 printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n"); 266 exit(1); 267 } 268 269 process(f) 270 FILE *f; 271 { 272 register int i, n; 273 char mac[5]; /* The current macro or nroff command */ 274 int pl; 275 276 stktop = -1; 277 for (lineno = 1; fgets(line, sizeof line, f); lineno++) { 278 if (line[0] == '.') { 279 /* 280 * find and isolate the macro/command name. 281 */ 282 strncpy(mac, line+1, 4); 283 if (isspace(mac[0])) { 284 pe(lineno); 285 printf("Empty command\n"); 286 } else if (isspace(mac[1])) { 287 mac[1] = 0; 288 } else if (isspace(mac[2])) { 289 mac[2] = 0; 290 } else if (mac[0] != '\\' || mac[1] != '\"') { 291 pe(lineno); 292 printf("Command too long\n"); 293 } 294 295 /* 296 * Is it a known command? 297 */ 298 checkknown(mac); 299 300 /* 301 * Should we add it? 302 */ 303 if (eq(mac, "de")) 304 addcmd(line); 305 306 chkcmd(line, mac); 307 } 308 309 /* 310 * At this point we process the line looking 311 * for \s and \f. 312 */ 313 for (i=0; line[i]; i++) 314 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) { 315 if (!sflag && line[++i]=='s') { 316 pl = line[++i]; 317 if (isdigit(pl)) { 318 n = pl - '0'; 319 pl = ' '; 320 } else 321 n = 0; 322 while (isdigit(line[++i])) 323 n = 10 * n + line[i] - '0'; 324 i--; 325 if (n == 0) { 326 if (stk[stktop].opno == SZ) { 327 stktop--; 328 } else { 329 pe(lineno); 330 printf("unmatched \\s0\n"); 331 } 332 } else { 333 stk[++stktop].opno = SZ; 334 stk[stktop].pl = pl; 335 stk[stktop].parm = n; 336 stk[stktop].lno = lineno; 337 } 338 } else if (!fflag && line[i]=='f') { 339 n = line[++i]; 340 if (n == 'P') { 341 if (stk[stktop].opno == FT) { 342 stktop--; 343 } else { 344 pe(lineno); 345 printf("unmatched \\fP\n"); 346 } 347 } else { 348 stk[++stktop].opno = FT; 349 stk[stktop].pl = 1; 350 stk[stktop].parm = n; 351 stk[stktop].lno = lineno; 352 } 353 } 354 } 355 } 356 /* 357 * We've hit the end and look at all this stuff that hasn't been 358 * matched yet! Complain, complain. 359 */ 360 for (i=stktop; i>=0; i--) { 361 complain(i); 362 } 363 } 364 365 complain(i) 366 { 367 pe(stk[i].lno); 368 printf("Unmatched "); 369 prop(i); 370 printf("\n"); 371 } 372 373 prop(i) 374 { 375 if (stk[i].pl == 0) 376 printf(".%s", br[stk[i].opno].opbr); 377 else switch(stk[i].opno) { 378 case SZ: 379 printf("\\s%c%d", stk[i].pl, stk[i].parm); 380 break; 381 case FT: 382 printf("\\f%c", stk[i].parm); 383 break; 384 default: 385 printf("Bug: stk[%d].opno = %d = .%s, .%s", 386 i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr); 387 } 388 } 389 390 chkcmd(line, mac) 391 char *line; 392 char *mac; 393 { 394 register int i, n; 395 396 /* 397 * Check to see if it matches top of stack. 398 */ 399 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr)) 400 stktop--; /* OK. Pop & forget */ 401 else { 402 /* No. Maybe it's an opener */ 403 for (i=0; br[i].opbr; i++) { 404 if (eq(mac, br[i].opbr)) { 405 /* Found. Push it. */ 406 stktop++; 407 stk[stktop].opno = i; 408 stk[stktop].pl = 0; 409 stk[stktop].parm = 0; 410 stk[stktop].lno = lineno; 411 break; 412 } 413 /* 414 * Maybe it's an unmatched closer. 415 * NOTE: this depends on the fact 416 * that none of the closers can be 417 * openers too. 418 */ 419 if (eq(mac, br[i].clbr)) { 420 nomatch(mac); 421 break; 422 } 423 } 424 } 425 } 426 427 nomatch(mac) 428 char *mac; 429 { 430 register int i, j; 431 432 /* 433 * Look for a match further down on stack 434 * If we find one, it suggests that the stuff in 435 * between is supposed to match itself. 436 */ 437 for (j=stktop; j>=0; j--) 438 if (eq(mac,br[stk[j].opno].clbr)) { 439 /* Found. Make a good diagnostic. */ 440 if (j == stktop-2) { 441 /* 442 * Check for special case \fx..\fR and don't 443 * complain. 444 */ 445 if (stk[j+1].opno==FT && stk[j+1].parm!='R' 446 && stk[j+2].opno==FT && stk[j+2].parm=='R') { 447 stktop = j -1; 448 return; 449 } 450 /* 451 * We have two unmatched frobs. Chances are 452 * they were intended to match, so we mention 453 * them together. 454 */ 455 pe(stk[j+1].lno); 456 prop(j+1); 457 printf(" does not match %d: ", stk[j+2].lno); 458 prop(j+2); 459 printf("\n"); 460 } else for (i=j+1; i <= stktop; i++) { 461 complain(i); 462 } 463 stktop = j-1; 464 return; 465 } 466 /* Didn't find one. Throw this away. */ 467 pe(lineno); 468 printf("Unmatched .%s\n", mac); 469 } 470 471 /* eq: are two strings equal? */ 472 eq(s1, s2) 473 char *s1, *s2; 474 { 475 return (strcmp(s1, s2) == 0); 476 } 477 478 /* print the first part of an error message, given the line number */ 479 pe(lineno) 480 int lineno; 481 { 482 if (nfiles > 1) 483 printf("%s: ", cfilename); 484 printf("%d: ", lineno); 485 } 486 487 checkknown(mac) 488 char *mac; 489 { 490 491 if (eq(mac, ".")) 492 return; 493 if (binsrch(mac) >= 0) 494 return; 495 if (mac[0] == '\\' && mac[1] == '"') /* comments */ 496 return; 497 498 pe(lineno); 499 printf("Unknown command: .%s\n", mac); 500 } 501 502 /* 503 * We have a .de xx line in "line". Add xx to the list of known commands. 504 */ 505 addcmd(line) 506 char *line; 507 { 508 char *mac; 509 510 /* grab the macro being defined */ 511 mac = line+4; 512 while (isspace(*mac)) 513 mac++; 514 if (*mac == 0) { 515 pe(lineno); 516 printf("illegal define: %s\n", line); 517 return; 518 } 519 mac[2] = 0; 520 if (isspace(mac[1]) || mac[1] == '\\') 521 mac[1] = 0; 522 if (ncmds >= MAXCMDS) { 523 printf("Only %d known commands allowed\n", MAXCMDS); 524 exit(1); 525 } 526 addmac(mac); 527 } 528 529 /* 530 * Add mac to the list. We should really have some kind of tree 531 * structure here but this is a quick-and-dirty job and I just don't 532 * have time to mess with it. (I wonder if this will come back to haunt 533 * me someday?) Anyway, I claim that .de is fairly rare in user 534 * nroff programs, and the register loop below is pretty fast. 535 */ 536 addmac(mac) 537 char *mac; 538 { 539 register char **src, **dest, **loc; 540 541 if (binsrch(mac) >= 0){ /* it's OK to redefine something */ 542 #ifdef DEBUG 543 printf("binsrch(%s) -> already in table\n", mac); 544 #endif DEBUG 545 return; 546 } 547 /* binsrch sets slot as a side effect */ 548 #ifdef DEBUG 549 printf("binsrch(%s) -> %d\n", mac, slot); 550 #endif 551 loc = &knowncmds[slot]; 552 src = &knowncmds[ncmds-1]; 553 dest = src+1; 554 while (dest > loc) 555 *dest-- = *src--; 556 *loc = malloc(3); 557 strcpy(*loc, mac); 558 ncmds++; 559 #ifdef DEBUG 560 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds); 561 #endif 562 } 563 564 /* 565 * Do a binary search in knowncmds for mac. 566 * If found, return the index. If not, return -1. 567 */ 568 binsrch(mac) 569 char *mac; 570 { 571 register char *p; /* pointer to current cmd in list */ 572 register int d; /* difference if any */ 573 register int mid; /* mid point in binary search */ 574 register int top, bot; /* boundaries of bin search, inclusive */ 575 576 top = ncmds-1; 577 bot = 0; 578 while (top >= bot) { 579 mid = (top+bot)/2; 580 p = knowncmds[mid]; 581 d = p[0] - mac[0]; 582 if (d == 0) 583 d = p[1] - mac[1]; 584 if (d == 0) 585 return mid; 586 if (d < 0) 587 bot = mid + 1; 588 else 589 top = mid - 1; 590 } 591 slot = bot; /* place it would have gone */ 592 return -1; 593 } 594