1 /* $NetBSD: checknr.c,v 1.5 1997/10/18 12:38:13 lukem Exp $ */ 2 3 /* 4 * Copyright (c) 1980, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <sys/cdefs.h> 37 #ifndef lint 38 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\ 39 The Regents of the University of California. All rights reserved.\n"); 40 #endif /* not lint */ 41 42 #ifndef lint 43 #if 0 44 static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93"; 45 #else 46 __RCSID("$NetBSD: checknr.c,v 1.5 1997/10/18 12:38:13 lukem Exp $"); 47 #endif 48 #endif /* not lint */ 49 50 /* 51 * checknr: check an nroff/troff input file for matching macro calls. 52 * we also attempt to match size and font changes, but only the embedded 53 * kind. These must end in \s0 and \fP resp. Maybe more sophistication 54 * later but for now think of these restrictions as contributions to 55 * structured typesetting. 56 */ 57 #include <ctype.h> 58 #include <stdio.h> 59 #include <stdlib.h> 60 #include <string.h> 61 62 #define MAXSTK 100 /* Stack size */ 63 #define MAXBR 100 /* Max number of bracket pairs known */ 64 #define MAXCMDS 500 /* Max number of commands known */ 65 66 /* 67 * The stack on which we remember what we've seen so far. 68 */ 69 struct stkstr { 70 int opno; /* number of opening bracket */ 71 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */ 72 int parm; /* parm to size, font, etc */ 73 int lno; /* line number the thing came in in */ 74 } stk[MAXSTK]; 75 int stktop; 76 77 /* 78 * The kinds of opening and closing brackets. 79 */ 80 struct brstr { 81 char *opbr; 82 char *clbr; 83 } br[MAXBR] = { 84 /* A few bare bones troff commands */ 85 #define SZ 0 86 { "sz", "sz"}, /* also \s */ 87 #define FT 1 88 { "ft", "ft"}, /* also \f */ 89 /* the -mm package */ 90 {"AL", "LE"}, 91 {"AS", "AE"}, 92 {"BL", "LE"}, 93 {"BS", "BE"}, 94 {"DF", "DE"}, 95 {"DL", "LE"}, 96 {"DS", "DE"}, 97 {"FS", "FE"}, 98 {"ML", "LE"}, 99 {"NS", "NE"}, 100 {"RL", "LE"}, 101 {"VL", "LE"}, 102 /* the -ms package */ 103 {"AB", "AE"}, 104 {"BD", "DE"}, 105 {"CD", "DE"}, 106 {"DS", "DE"}, 107 {"FS", "FE"}, 108 {"ID", "DE"}, 109 {"KF", "KE"}, 110 {"KS", "KE"}, 111 {"LD", "DE"}, 112 {"LG", "NL"}, 113 {"QS", "QE"}, 114 {"RS", "RE"}, 115 {"SM", "NL"}, 116 {"XA", "XE"}, 117 {"XS", "XE"}, 118 /* The -me package */ 119 {"(b", ")b"}, 120 {"(c", ")c"}, 121 {"(d", ")d"}, 122 {"(f", ")f"}, 123 {"(l", ")l"}, 124 {"(q", ")q"}, 125 {"(x", ")x"}, 126 {"(z", ")z"}, 127 /* Things needed by preprocessors */ 128 {"EQ", "EN"}, 129 {"TS", "TE"}, 130 /* Refer */ 131 {"[", "]"}, 132 {0, 0}, 133 }; 134 135 /* 136 * All commands known to nroff, plus macro packages. 137 * Used so we can complain about unrecognized commands. 138 */ 139 char *knowncmds[MAXCMDS] = { 140 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t", 141 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++", 142 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M", 143 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", 144 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2", 145 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT", 146 "D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM", 147 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", 148 "FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID", 149 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB", 150 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR", 151 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P", 152 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA", 153 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA", 154 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE", 155 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL", 156 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0", 157 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>", 158 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd", 159 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs", 160 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", 161 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", 162 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i", 163 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln", 164 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1", 165 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx", 166 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps", 167 "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb", 168 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", 169 "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp", 170 "yr", 0 171 }; 172 173 int lineno; /* current line number in input file */ 174 char line[256]; /* the current line */ 175 char *cfilename; /* name of current file */ 176 int nfiles; /* number of files to process */ 177 int fflag; /* -f: ignore \f */ 178 int sflag; /* -s: ignore \s */ 179 int ncmds; /* size of knowncmds */ 180 int slot; /* slot in knowncmds found by binsrch */ 181 182 void addcmd __P((char *)); 183 void addmac __P((char *)); 184 int binsrch __P((char *)); 185 void checkknown __P((char *)); 186 void chkcmd __P((char *, char *)); 187 void complain __P((int)); 188 int eq __P((const void *, const void *)); 189 int main __P((int, char **)); 190 void nomatch __P((char *)); 191 void pe __P((int)); 192 void process __P((FILE *)); 193 void prop __P((int)); 194 void usage __P((void)); 195 196 int 197 main(argc, argv) 198 int argc; 199 char **argv; 200 { 201 FILE *f; 202 int i; 203 char *cp; 204 char b1[4]; 205 206 /* Figure out how many known commands there are */ 207 while (knowncmds[ncmds]) 208 ncmds++; 209 while (argc > 1 && argv[1][0] == '-') { 210 switch(argv[1][1]) { 211 212 /* -a: add pairs of macros */ 213 case 'a': 214 i = strlen(argv[1]) - 2; 215 if (i % 6 != 0) 216 usage(); 217 /* look for empty macro slots */ 218 for (i=0; br[i].opbr; i++) 219 ; 220 for (cp=argv[1]+3; cp[-1]; cp += 6) { 221 br[i].opbr = malloc(3); 222 strncpy(br[i].opbr, cp, 2); 223 br[i].clbr = malloc(3); 224 strncpy(br[i].clbr, cp+3, 2); 225 addmac(br[i].opbr); /* knows pairs are also known cmds */ 226 addmac(br[i].clbr); 227 i++; 228 } 229 break; 230 231 /* -c: add known commands */ 232 case 'c': 233 i = strlen(argv[1]) - 2; 234 if (i % 3 != 0) 235 usage(); 236 for (cp=argv[1]+3; cp[-1]; cp += 3) { 237 if (cp[2] && cp[2] != '.') 238 usage(); 239 strncpy(b1, cp, 2); 240 addmac(b1); 241 } 242 break; 243 244 /* -f: ignore font changes */ 245 case 'f': 246 fflag = 1; 247 break; 248 249 /* -s: ignore size changes */ 250 case 's': 251 sflag = 1; 252 break; 253 default: 254 usage(); 255 } 256 argc--; argv++; 257 } 258 259 nfiles = argc - 1; 260 261 if (nfiles > 0) { 262 for (i=1; i<argc; i++) { 263 cfilename = argv[i]; 264 f = fopen(cfilename, "r"); 265 if (f == NULL) 266 perror(cfilename); 267 else 268 process(f); 269 } 270 } else { 271 cfilename = "stdin"; 272 process(stdin); 273 } 274 exit(0); 275 } 276 277 void 278 usage() 279 { 280 printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n"); 281 exit(1); 282 } 283 284 void 285 process(f) 286 FILE *f; 287 { 288 int i, n; 289 char mac[5]; /* The current macro or nroff command */ 290 int pl; 291 292 stktop = -1; 293 for (lineno = 1; fgets(line, sizeof line, f); lineno++) { 294 if (line[0] == '.') { 295 /* 296 * find and isolate the macro/command name. 297 */ 298 strncpy(mac, line+1, 4); 299 if (isspace(mac[0])) { 300 pe(lineno); 301 printf("Empty command\n"); 302 } else if (isspace(mac[1])) { 303 mac[1] = 0; 304 } else if (isspace(mac[2])) { 305 mac[2] = 0; 306 } else if (mac[0] != '\\' || mac[1] != '\"') { 307 pe(lineno); 308 printf("Command too long\n"); 309 } 310 311 /* 312 * Is it a known command? 313 */ 314 checkknown(mac); 315 316 /* 317 * Should we add it? 318 */ 319 if (eq(mac, "de")) 320 addcmd(line); 321 322 chkcmd(line, mac); 323 } 324 325 /* 326 * At this point we process the line looking 327 * for \s and \f. 328 */ 329 for (i=0; line[i]; i++) 330 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) { 331 if (!sflag && line[++i]=='s') { 332 pl = line[++i]; 333 if (isdigit(pl)) { 334 n = pl - '0'; 335 pl = ' '; 336 } else 337 n = 0; 338 while (isdigit(line[++i])) 339 n = 10 * n + line[i] - '0'; 340 i--; 341 if (n == 0) { 342 if (stk[stktop].opno == SZ) { 343 stktop--; 344 } else { 345 pe(lineno); 346 printf("unmatched \\s0\n"); 347 } 348 } else { 349 stk[++stktop].opno = SZ; 350 stk[stktop].pl = pl; 351 stk[stktop].parm = n; 352 stk[stktop].lno = lineno; 353 } 354 } else if (!fflag && line[i]=='f') { 355 n = line[++i]; 356 if (n == 'P') { 357 if (stk[stktop].opno == FT) { 358 stktop--; 359 } else { 360 pe(lineno); 361 printf("unmatched \\fP\n"); 362 } 363 } else { 364 stk[++stktop].opno = FT; 365 stk[stktop].pl = 1; 366 stk[stktop].parm = n; 367 stk[stktop].lno = lineno; 368 } 369 } 370 } 371 } 372 /* 373 * We've hit the end and look at all this stuff that hasn't been 374 * matched yet! Complain, complain. 375 */ 376 for (i=stktop; i>=0; i--) { 377 complain(i); 378 } 379 } 380 381 void 382 complain(i) 383 int i; 384 { 385 pe(stk[i].lno); 386 printf("Unmatched "); 387 prop(i); 388 printf("\n"); 389 } 390 391 void 392 prop(i) 393 int i; 394 { 395 if (stk[i].pl == 0) 396 printf(".%s", br[stk[i].opno].opbr); 397 else switch(stk[i].opno) { 398 case SZ: 399 printf("\\s%c%d", stk[i].pl, stk[i].parm); 400 break; 401 case FT: 402 printf("\\f%c", stk[i].parm); 403 break; 404 default: 405 printf("Bug: stk[%d].opno = %d = .%s, .%s", 406 i, stk[i].opno, br[stk[i].opno].opbr, 407 br[stk[i].opno].clbr); 408 } 409 } 410 411 void 412 chkcmd(line, mac) 413 char *line; 414 char *mac; 415 { 416 int i; 417 418 /* 419 * Check to see if it matches top of stack. 420 */ 421 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr)) 422 stktop--; /* OK. Pop & forget */ 423 else { 424 /* No. Maybe it's an opener */ 425 for (i=0; br[i].opbr; i++) { 426 if (eq(mac, br[i].opbr)) { 427 /* Found. Push it. */ 428 stktop++; 429 stk[stktop].opno = i; 430 stk[stktop].pl = 0; 431 stk[stktop].parm = 0; 432 stk[stktop].lno = lineno; 433 break; 434 } 435 /* 436 * Maybe it's an unmatched closer. 437 * NOTE: this depends on the fact 438 * that none of the closers can be 439 * openers too. 440 */ 441 if (eq(mac, br[i].clbr)) { 442 nomatch(mac); 443 break; 444 } 445 } 446 } 447 } 448 449 void 450 nomatch(mac) 451 char *mac; 452 { 453 int i, j; 454 455 /* 456 * Look for a match further down on stack 457 * If we find one, it suggests that the stuff in 458 * between is supposed to match itself. 459 */ 460 for (j=stktop; j>=0; j--) 461 if (eq(mac,br[stk[j].opno].clbr)) { 462 /* Found. Make a good diagnostic. */ 463 if (j == stktop-2) { 464 /* 465 * Check for special case \fx..\fR and don't 466 * complain. 467 */ 468 if (stk[j+1].opno==FT && stk[j+1].parm!='R' 469 && stk[j+2].opno==FT && stk[j+2].parm=='R') { 470 stktop = j -1; 471 return; 472 } 473 /* 474 * We have two unmatched frobs. Chances are 475 * they were intended to match, so we mention 476 * them together. 477 */ 478 pe(stk[j+1].lno); 479 prop(j+1); 480 printf(" does not match %d: ", stk[j+2].lno); 481 prop(j+2); 482 printf("\n"); 483 } else for (i=j+1; i <= stktop; i++) { 484 complain(i); 485 } 486 stktop = j-1; 487 return; 488 } 489 /* Didn't find one. Throw this away. */ 490 pe(lineno); 491 printf("Unmatched .%s\n", mac); 492 } 493 494 /* eq: are two strings equal? */ 495 int 496 eq(s1, s2) 497 const void *s1, *s2; 498 { 499 return (strcmp((char *)s1, (char *)s2) == 0); 500 } 501 502 /* print the first part of an error message, given the line number */ 503 void 504 pe(lineno) 505 int lineno; 506 { 507 if (nfiles > 1) 508 printf("%s: ", cfilename); 509 printf("%d: ", lineno); 510 } 511 512 void 513 checkknown(mac) 514 char *mac; 515 { 516 517 if (eq(mac, ".")) 518 return; 519 if (binsrch(mac) >= 0) 520 return; 521 if (mac[0] == '\\' && mac[1] == '"') /* comments */ 522 return; 523 524 pe(lineno); 525 printf("Unknown command: .%s\n", mac); 526 } 527 528 /* 529 * We have a .de xx line in "line". Add xx to the list of known commands. 530 */ 531 void 532 addcmd(line) 533 char *line; 534 { 535 char *mac; 536 537 /* grab the macro being defined */ 538 mac = line+4; 539 while (isspace(*mac)) 540 mac++; 541 if (*mac == 0) { 542 pe(lineno); 543 printf("illegal define: %s\n", line); 544 return; 545 } 546 mac[2] = 0; 547 if (isspace(mac[1]) || mac[1] == '\\') 548 mac[1] = 0; 549 if (ncmds >= MAXCMDS) { 550 printf("Only %d known commands allowed\n", MAXCMDS); 551 exit(1); 552 } 553 addmac(mac); 554 } 555 556 /* 557 * Add mac to the list. We should really have some kind of tree 558 * structure here but this is a quick-and-dirty job and I just don't 559 * have time to mess with it. (I wonder if this will come back to haunt 560 * me someday?) Anyway, I claim that .de is fairly rare in user 561 * nroff programs, and the register loop below is pretty fast. 562 */ 563 void 564 addmac(mac) 565 char *mac; 566 { 567 char **src, **dest, **loc; 568 569 if (binsrch(mac) >= 0){ /* it's OK to redefine something */ 570 #ifdef DEBUG 571 printf("binsrch(%s) -> already in table\n", mac); 572 #endif DEBUG 573 return; 574 } 575 /* binsrch sets slot as a side effect */ 576 #ifdef DEBUG 577 printf("binsrch(%s) -> %d\n", mac, slot); 578 #endif 579 loc = &knowncmds[slot]; 580 src = &knowncmds[ncmds-1]; 581 dest = src+1; 582 while (dest > loc) 583 *dest-- = *src--; 584 *loc = malloc(3); 585 strcpy(*loc, mac); 586 ncmds++; 587 #ifdef DEBUG 588 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds); 589 #endif 590 } 591 592 /* 593 * Do a binary search in knowncmds for mac. 594 * If found, return the index. If not, return -1. 595 */ 596 int 597 binsrch(mac) 598 char *mac; 599 { 600 char *p; /* pointer to current cmd in list */ 601 int d; /* difference if any */ 602 int mid; /* mid point in binary search */ 603 int top, bot; /* boundaries of bin search, inclusive */ 604 605 top = ncmds-1; 606 bot = 0; 607 while (top >= bot) { 608 mid = (top+bot)/2; 609 p = knowncmds[mid]; 610 d = p[0] - mac[0]; 611 if (d == 0) 612 d = p[1] - mac[1]; 613 if (d == 0) 614 return mid; 615 if (d < 0) 616 bot = mid + 1; 617 else 618 top = mid - 1; 619 } 620 slot = bot; /* place it would have gone */ 621 return -1; 622 } 623