1 /* $NetBSD: checknr.c,v 1.8 2002/01/21 16:40:19 wiz Exp $ */ 2 3 /* 4 * Copyright (c) 1980, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <sys/cdefs.h> 37 #ifndef lint 38 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\ 39 The Regents of the University of California. All rights reserved.\n"); 40 #endif /* not lint */ 41 42 #ifndef lint 43 #if 0 44 static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93"; 45 #else 46 __RCSID("$NetBSD: checknr.c,v 1.8 2002/01/21 16:40:19 wiz Exp $"); 47 #endif 48 #endif /* not lint */ 49 50 /* 51 * checknr: check an nroff/troff input file for matching macro calls. 52 * we also attempt to match size and font changes, but only the embedded 53 * kind. These must end in \s0 and \fP resp. Maybe more sophistication 54 * later but for now think of these restrictions as contributions to 55 * structured typesetting. 56 */ 57 #include <ctype.h> 58 #include <stdio.h> 59 #include <stdlib.h> 60 #include <string.h> 61 62 #define MAXSTK 100 /* Stack size */ 63 #define MAXBR 100 /* Max number of bracket pairs known */ 64 #define MAXCMDS 500 /* Max number of commands known */ 65 66 /* 67 * The stack on which we remember what we've seen so far. 68 */ 69 struct stkstr { 70 int opno; /* number of opening bracket */ 71 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */ 72 int parm; /* parm to size, font, etc */ 73 int lno; /* line number the thing came in in */ 74 } stk[MAXSTK]; 75 int stktop; 76 77 /* 78 * The kinds of opening and closing brackets. 79 */ 80 struct brstr { 81 char *opbr; 82 char *clbr; 83 } br[MAXBR] = { 84 /* A few bare bones troff commands */ 85 #define SZ 0 86 { "sz", "sz"}, /* also \s */ 87 #define FT 1 88 { "ft", "ft"}, /* also \f */ 89 /* the -mm package */ 90 {"AL", "LE"}, 91 {"AS", "AE"}, 92 {"BL", "LE"}, 93 {"BS", "BE"}, 94 {"DF", "DE"}, 95 {"DL", "LE"}, 96 {"DS", "DE"}, 97 {"FS", "FE"}, 98 {"ML", "LE"}, 99 {"NS", "NE"}, 100 {"RL", "LE"}, 101 {"VL", "LE"}, 102 /* the -ms package */ 103 {"AB", "AE"}, 104 {"BD", "DE"}, 105 {"CD", "DE"}, 106 {"DS", "DE"}, 107 {"FS", "FE"}, 108 {"ID", "DE"}, 109 {"KF", "KE"}, 110 {"KS", "KE"}, 111 {"LD", "DE"}, 112 {"LG", "NL"}, 113 {"QS", "QE"}, 114 {"RS", "RE"}, 115 {"SM", "NL"}, 116 {"XA", "XE"}, 117 {"XS", "XE"}, 118 /* The -me package */ 119 {"(b", ")b"}, 120 {"(c", ")c"}, 121 {"(d", ")d"}, 122 {"(f", ")f"}, 123 {"(l", ")l"}, 124 {"(q", ")q"}, 125 {"(x", ")x"}, 126 {"(z", ")z"}, 127 /* Things needed by preprocessors */ 128 {"EQ", "EN"}, 129 {"TS", "TE"}, 130 /* Refer */ 131 {"[", "]"}, 132 {0, 0}, 133 }; 134 135 /* 136 * All commands known to nroff, plus macro packages. 137 * Used so we can complain about unrecognized commands. 138 */ 139 char *knowncmds[MAXCMDS] = { 140 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t", 141 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++", 142 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M", 143 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", 144 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2", 145 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT", 146 "D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM", 147 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", 148 "FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID", 149 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB", 150 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR", 151 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P", 152 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA", 153 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA", 154 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE", 155 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL", 156 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0", 157 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>", 158 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd", 159 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs", 160 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", 161 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", 162 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i", 163 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln", 164 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1", 165 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx", 166 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps", 167 "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb", 168 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", 169 "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp", 170 "yr", 0 171 }; 172 173 int lineno; /* current line number in input file */ 174 char line[256]; /* the current line */ 175 char *cfilename; /* name of current file */ 176 int nfiles; /* number of files to process */ 177 int fflag; /* -f: ignore \f */ 178 int sflag; /* -s: ignore \s */ 179 int ncmds; /* size of knowncmds */ 180 int slot; /* slot in knowncmds found by binsrch */ 181 182 void addcmd(char *); 183 void addmac(char *); 184 int binsrch(char *); 185 void checkknown(char *); 186 void chkcmd(char *, char *); 187 void complain(int); 188 int eq(const void *, const void *); 189 int main(int, char **); 190 void nomatch(char *); 191 void pe(int); 192 void process(FILE *); 193 void prop(int); 194 void usage(void); 195 196 int 197 main(int argc, char **argv) 198 { 199 FILE *f; 200 int i; 201 char *cp; 202 char b1[4]; 203 204 /* Figure out how many known commands there are */ 205 while (knowncmds[ncmds]) 206 ncmds++; 207 while (argc > 1 && argv[1][0] == '-') { 208 switch(argv[1][1]) { 209 210 /* -a: add pairs of macros */ 211 case 'a': 212 i = strlen(argv[1]) - 2; 213 if (i % 6 != 0) 214 usage(); 215 /* look for empty macro slots */ 216 for (i=0; br[i].opbr; i++) 217 ; 218 for (cp=argv[1]+3; cp[-1]; cp += 6) { 219 br[i].opbr = malloc(3); 220 strncpy(br[i].opbr, cp, 2); 221 br[i].clbr = malloc(3); 222 strncpy(br[i].clbr, cp+3, 2); 223 addmac(br[i].opbr); /* knows pairs are also known cmds */ 224 addmac(br[i].clbr); 225 i++; 226 } 227 break; 228 229 /* -c: add known commands */ 230 case 'c': 231 i = strlen(argv[1]) - 2; 232 if (i % 3 != 0) 233 usage(); 234 for (cp=argv[1]+3; cp[-1]; cp += 3) { 235 if (cp[2] && cp[2] != '.') 236 usage(); 237 strncpy(b1, cp, 2); 238 addmac(b1); 239 } 240 break; 241 242 /* -f: ignore font changes */ 243 case 'f': 244 fflag = 1; 245 break; 246 247 /* -s: ignore size changes */ 248 case 's': 249 sflag = 1; 250 break; 251 default: 252 usage(); 253 } 254 argc--; argv++; 255 } 256 257 nfiles = argc - 1; 258 259 if (nfiles > 0) { 260 for (i=1; i<argc; i++) { 261 cfilename = argv[i]; 262 f = fopen(cfilename, "r"); 263 if (f == NULL) 264 perror(cfilename); 265 else 266 process(f); 267 } 268 } else { 269 cfilename = "stdin"; 270 process(stdin); 271 } 272 exit(0); 273 } 274 275 void 276 usage(void) 277 { 278 printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n"); 279 exit(1); 280 } 281 282 void 283 process(FILE *f) 284 { 285 int i, n; 286 char mac[5]; /* The current macro or nroff command */ 287 int pl; 288 289 stktop = -1; 290 for (lineno = 1; fgets(line, sizeof line, f); lineno++) { 291 if (line[0] == '.') { 292 /* 293 * find and isolate the macro/command name. 294 */ 295 strncpy(mac, line+1, 4); 296 if (isspace((unsigned char)mac[0])) { 297 pe(lineno); 298 printf("Empty command\n"); 299 } else if (isspace((unsigned char)mac[1])) { 300 mac[1] = 0; 301 } else if (isspace((unsigned char)mac[2])) { 302 mac[2] = 0; 303 } else if (mac[0] != '\\' || mac[1] != '\"') { 304 pe(lineno); 305 printf("Command too long\n"); 306 } 307 308 /* 309 * Is it a known command? 310 */ 311 checkknown(mac); 312 313 /* 314 * Should we add it? 315 */ 316 if (eq(mac, "de")) 317 addcmd(line); 318 319 chkcmd(line, mac); 320 } 321 322 /* 323 * At this point we process the line looking 324 * for \s and \f. 325 */ 326 for (i=0; line[i]; i++) 327 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) { 328 if (!sflag && line[++i]=='s') { 329 pl = line[++i]; 330 if (isdigit((unsigned char)pl)) { 331 n = pl - '0'; 332 pl = ' '; 333 } else 334 n = 0; 335 while (isdigit((unsigned char)line[++i])) 336 n = 10 * n + line[i] - '0'; 337 i--; 338 if (n == 0) { 339 if (stk[stktop].opno == SZ) { 340 stktop--; 341 } else { 342 pe(lineno); 343 printf("unmatched \\s0\n"); 344 } 345 } else { 346 stk[++stktop].opno = SZ; 347 stk[stktop].pl = pl; 348 stk[stktop].parm = n; 349 stk[stktop].lno = lineno; 350 } 351 } else if (!fflag && line[i]=='f') { 352 n = line[++i]; 353 if (n == 'P') { 354 if (stk[stktop].opno == FT) { 355 stktop--; 356 } else { 357 pe(lineno); 358 printf("unmatched \\fP\n"); 359 } 360 } else { 361 stk[++stktop].opno = FT; 362 stk[stktop].pl = 1; 363 stk[stktop].parm = n; 364 stk[stktop].lno = lineno; 365 } 366 } 367 } 368 } 369 /* 370 * We've hit the end and look at all this stuff that hasn't been 371 * matched yet! Complain, complain. 372 */ 373 for (i=stktop; i>=0; i--) { 374 complain(i); 375 } 376 } 377 378 void 379 complain(int i) 380 { 381 pe(stk[i].lno); 382 printf("Unmatched "); 383 prop(i); 384 printf("\n"); 385 } 386 387 void 388 prop(int i) 389 { 390 if (stk[i].pl == 0) 391 printf(".%s", br[stk[i].opno].opbr); 392 else switch(stk[i].opno) { 393 case SZ: 394 printf("\\s%c%d", stk[i].pl, stk[i].parm); 395 break; 396 case FT: 397 printf("\\f%c", stk[i].parm); 398 break; 399 default: 400 printf("Bug: stk[%d].opno = %d = .%s, .%s", 401 i, stk[i].opno, br[stk[i].opno].opbr, 402 br[stk[i].opno].clbr); 403 } 404 } 405 406 void 407 chkcmd(char *line, char *mac) 408 { 409 int i; 410 411 /* 412 * Check to see if it matches top of stack. 413 */ 414 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr)) 415 stktop--; /* OK. Pop & forget */ 416 else { 417 /* No. Maybe it's an opener */ 418 for (i=0; br[i].opbr; i++) { 419 if (eq(mac, br[i].opbr)) { 420 /* Found. Push it. */ 421 stktop++; 422 stk[stktop].opno = i; 423 stk[stktop].pl = 0; 424 stk[stktop].parm = 0; 425 stk[stktop].lno = lineno; 426 break; 427 } 428 /* 429 * Maybe it's an unmatched closer. 430 * NOTE: this depends on the fact 431 * that none of the closers can be 432 * openers too. 433 */ 434 if (eq(mac, br[i].clbr)) { 435 nomatch(mac); 436 break; 437 } 438 } 439 } 440 } 441 442 void 443 nomatch(char *mac) 444 { 445 int i, j; 446 447 /* 448 * Look for a match further down on stack 449 * If we find one, it suggests that the stuff in 450 * between is supposed to match itself. 451 */ 452 for (j=stktop; j>=0; j--) 453 if (eq(mac,br[stk[j].opno].clbr)) { 454 /* Found. Make a good diagnostic. */ 455 if (j == stktop-2) { 456 /* 457 * Check for special case \fx..\fR and don't 458 * complain. 459 */ 460 if (stk[j+1].opno==FT && stk[j+1].parm!='R' 461 && stk[j+2].opno==FT && stk[j+2].parm=='R') { 462 stktop = j -1; 463 return; 464 } 465 /* 466 * We have two unmatched frobs. Chances are 467 * they were intended to match, so we mention 468 * them together. 469 */ 470 pe(stk[j+1].lno); 471 prop(j+1); 472 printf(" does not match %d: ", stk[j+2].lno); 473 prop(j+2); 474 printf("\n"); 475 } else for (i=j+1; i <= stktop; i++) { 476 complain(i); 477 } 478 stktop = j-1; 479 return; 480 } 481 /* Didn't find one. Throw this away. */ 482 pe(lineno); 483 printf("Unmatched .%s\n", mac); 484 } 485 486 /* eq: are two strings equal? */ 487 int 488 eq(const void *s1, const void *s2) 489 { 490 return (strcmp((char *)s1, (char *)s2) == 0); 491 } 492 493 /* print the first part of an error message, given the line number */ 494 void 495 pe(int lineno) 496 { 497 if (nfiles > 1) 498 printf("%s: ", cfilename); 499 printf("%d: ", lineno); 500 } 501 502 void 503 checkknown(char *mac) 504 { 505 506 if (eq(mac, ".")) 507 return; 508 if (binsrch(mac) >= 0) 509 return; 510 if (mac[0] == '\\' && mac[1] == '"') /* comments */ 511 return; 512 513 pe(lineno); 514 printf("Unknown command: .%s\n", mac); 515 } 516 517 /* 518 * We have a .de xx line in "line". Add xx to the list of known commands. 519 */ 520 void 521 addcmd(char *line) 522 { 523 char *mac; 524 525 /* grab the macro being defined */ 526 mac = line+4; 527 while (isspace((unsigned char)*mac)) 528 mac++; 529 if (*mac == 0) { 530 pe(lineno); 531 printf("illegal define: %s\n", line); 532 return; 533 } 534 mac[2] = 0; 535 if (isspace((unsigned char)mac[1]) || mac[1] == '\\') 536 mac[1] = 0; 537 if (ncmds >= MAXCMDS) { 538 printf("Only %d known commands allowed\n", MAXCMDS); 539 exit(1); 540 } 541 addmac(mac); 542 } 543 544 /* 545 * Add mac to the list. We should really have some kind of tree 546 * structure here but this is a quick-and-dirty job and I just don't 547 * have time to mess with it. (I wonder if this will come back to haunt 548 * me someday?) Anyway, I claim that .de is fairly rare in user 549 * nroff programs, and the register loop below is pretty fast. 550 */ 551 void 552 addmac(char *mac) 553 { 554 char **src, **dest, **loc; 555 556 if (binsrch(mac) >= 0){ /* it's OK to redefine something */ 557 #ifdef DEBUG 558 printf("binsrch(%s) -> already in table\n", mac); 559 #endif /* DEBUG */ 560 return; 561 } 562 /* binsrch sets slot as a side effect */ 563 #ifdef DEBUG 564 printf("binsrch(%s) -> %d\n", mac, slot); 565 #endif 566 loc = &knowncmds[slot]; 567 src = &knowncmds[ncmds-1]; 568 dest = src+1; 569 while (dest > loc) 570 *dest-- = *src--; 571 *loc = malloc(3); 572 strcpy(*loc, mac); 573 ncmds++; 574 #ifdef DEBUG 575 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds); 576 #endif 577 } 578 579 /* 580 * Do a binary search in knowncmds for mac. 581 * If found, return the index. If not, return -1. 582 */ 583 int 584 binsrch(char *mac) 585 { 586 char *p; /* pointer to current cmd in list */ 587 int d; /* difference if any */ 588 int mid; /* mid point in binary search */ 589 int top, bot; /* boundaries of bin search, inclusive */ 590 591 top = ncmds-1; 592 bot = 0; 593 while (top >= bot) { 594 mid = (top+bot)/2; 595 p = knowncmds[mid]; 596 d = p[0] - mac[0]; 597 if (d == 0) 598 d = p[1] - mac[1]; 599 if (d == 0) 600 return mid; 601 if (d < 0) 602 bot = mid + 1; 603 else 604 top = mid - 1; 605 } 606 slot = bot; /* place it would have gone */ 607 return -1; 608 } 609