1 /* $NetBSD: checknr.c,v 1.13 2004/01/05 23:23:34 jmmv Exp $ */ 2 3 /* 4 * Copyright (c) 1980, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 #ifndef lint 34 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\ 35 The Regents of the University of California. All rights reserved.\n"); 36 #endif /* not lint */ 37 38 #ifndef lint 39 #if 0 40 static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93"; 41 #else 42 __RCSID("$NetBSD: checknr.c,v 1.13 2004/01/05 23:23:34 jmmv Exp $"); 43 #endif 44 #endif /* not lint */ 45 46 /* 47 * checknr: check an nroff/troff input file for matching macro calls. 48 * we also attempt to match size and font changes, but only the embedded 49 * kind. These must end in \s0 and \fP resp. Maybe more sophistication 50 * later but for now think of these restrictions as contributions to 51 * structured typesetting. 52 */ 53 #include <ctype.h> 54 #include <stdio.h> 55 #include <stdlib.h> 56 #include <string.h> 57 58 #define MAXSTK 100 /* Stack size */ 59 #define MAXBR 100 /* Max number of bracket pairs known */ 60 #define MAXCMDS 500 /* Max number of commands known */ 61 62 /* 63 * The stack on which we remember what we've seen so far. 64 */ 65 struct stkstr { 66 int opno; /* number of opening bracket */ 67 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */ 68 int parm; /* parm to size, font, etc */ 69 int lno; /* line number the thing came in in */ 70 } stk[MAXSTK]; 71 int stktop; 72 73 /* 74 * The kinds of opening and closing brackets. 75 */ 76 struct brstr { 77 char *opbr; 78 char *clbr; 79 } br[MAXBR] = { 80 /* A few bare bones troff commands */ 81 #define SZ 0 82 { "sz", "sz"}, /* also \s */ 83 #define FT 1 84 { "ft", "ft"}, /* also \f */ 85 /* the -mm package */ 86 {"AL", "LE"}, 87 {"AS", "AE"}, 88 {"BL", "LE"}, 89 {"BS", "BE"}, 90 {"DF", "DE"}, 91 {"DL", "LE"}, 92 {"DS", "DE"}, 93 {"FS", "FE"}, 94 {"ML", "LE"}, 95 {"NS", "NE"}, 96 {"RL", "LE"}, 97 {"VL", "LE"}, 98 /* the -ms package */ 99 {"AB", "AE"}, 100 {"BD", "DE"}, 101 {"CD", "DE"}, 102 {"DS", "DE"}, 103 {"FS", "FE"}, 104 {"ID", "DE"}, 105 {"KF", "KE"}, 106 {"KS", "KE"}, 107 {"LD", "DE"}, 108 {"LG", "NL"}, 109 {"QS", "QE"}, 110 {"RS", "RE"}, 111 {"SM", "NL"}, 112 {"XA", "XE"}, 113 {"XS", "XE"}, 114 /* The -me package */ 115 {"(b", ")b"}, 116 {"(c", ")c"}, 117 {"(d", ")d"}, 118 {"(f", ")f"}, 119 {"(l", ")l"}, 120 {"(q", ")q"}, 121 {"(x", ")x"}, 122 {"(z", ")z"}, 123 /* The -mdoc package */ 124 {"Ao", "Ac"}, 125 {"Bd", "Ed"}, 126 {"Bk", "Ek"}, 127 {"Bo", "Bc"}, 128 {"Do", "Dc"}, 129 {"Fo", "Fc"}, 130 {"Oo", "Oc"}, 131 {"Po", "Pc"}, 132 {"Qo", "Qc"}, 133 {"Rs", "Re"}, 134 {"So", "Sc"}, 135 {"Xo", "Xc"}, 136 /* Things needed by preprocessors */ 137 {"EQ", "EN"}, 138 {"TS", "TE"}, 139 /* Refer */ 140 {"[", "]"}, 141 {0, 0}, 142 }; 143 144 /* 145 * All commands known to nroff, plus macro packages. 146 * Used so we can complain about unrecognized commands. 147 */ 148 char *knowncmds[MAXCMDS] = { 149 "$c", "$f", "$h", "$p", "$s", "%A", "%B", "%C", "%D", "%I", "%J", "%N", 150 "%O", "%P", "%Q", "%R", "%T", "%V", "(b", "(c", "(d", "(f", "(l", "(q", 151 "(t", "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", 152 ")z", "++", "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", 153 "@F", "@I", "@M", "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", 154 "@r", "@t", "@z", "AB", "AE", "AF", "AI", "AL", "AM", "AS", "AT", 155 "AU", "AX", "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At", "B" , "B1", 156 "B2", "BD", "BE", "BG", "BL", "BS", "BT", "BX", "Bc", "Bd", "Bf", 157 "Bk", "Bl", "Bo", "Bq", "Bsx", "Bx", "C1", "C2", "CD", "CM", "CT", 158 "Cd", "Cm", "D" , "D1", "DA", "DE", "DF", "DL", "DS", "DT", "Db", "Dc", 159 "Dd", "Dl", "Do", "Dq", "Dt", "Dv", "EC", "EF", "EG", "EH", "EM", 160 "EN", "EQ", "EX", "Ec", "Ed", "Ef", "Ek", "El", "Em", "Eo", "Er", 161 "Ev", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", "FQ", 162 "FS", "FV", "FX", "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Ft", "Fx", 163 "H" , "HC", "HD", "HM", "HO", "HU", "I" , "ID", "IE", "IH", "IM", 164 "IP", "IX", "IZ", "Ic", "It", "KD", "KE", "KF", "KQ", "KS", "LB", 165 "LC", "LD", "LE", "LG", "LI", "LP", "Lb", "Li", "MC", "ME", "MF", 166 "MH", "ML", "MR", "MT", "ND", "NE", "NH", "NL", "NP", "NS", "Nd", 167 "Nm", "No", "Ns", "Nx", "OF", "OH", "OK", "OP", "Oc", "Oo", "Op", 168 "Os", "Ot", "Ox", "P" , "P1", "PF", "PH", "PP", "PT", "PX", "PY", 169 "Pa", "Pc", "Pf", "Po", "Pp", "Pq", "QE", "QP", "QS", "Qc", "Ql", 170 "Qo", "Qq", "R" , "RA", "RC", "RE", "RL", "RP", "RQ", "RS", "RT", 171 "Re", "Rs", "S" , "S0", "S2", "S3", "SA", "SG", "SH", "SK", "SM", 172 "SP", "SY", "Sc", "Sh", "Sm", "So", "Sq", "Ss", "St", "Sx", "Sy", 173 "T&", "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP", "TQ", 174 "TR", "TS", "TX", "Tn", "UL", "US", "UX", "Ud", "Ux", "VL", "Va", "Vt", 175 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "Xc", "Xo", 176 "Xr", "[" , "[-", "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>", 177 "[]", "\\{", "\\}", "]" , "]-", "]<", "]>", "][", "ab", "ac", "ad", "af", "am", 178 "ar", "as", "b" , "ba", "bc", "bd", "bi", "bl", "bp", "br", "bx", 179 "c.", "c2", "cc", "ce", "cf", "ch", "cs", "ct", "cu", "da", "de", 180 "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", "ef", "eh", "el", 181 "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", "fp", "ft", 182 "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i" , "ie", 183 "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln", 184 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", 185 "n1", "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", 186 "ns", "nx", "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", 187 "po", "pp", "ps", "q" , "r" , "rb", "rd", "re", "rm", "rn", "ro", 188 "rr", "rs", "rt", "sb", "sc", "sh", "sk", "so", "sp", "ss", "st", 189 "sv", "sz", "ta", "tc", "th", "ti", "tl", "tm", "tp", "tr", "u", 190 "uf", "uh", "ul", "vs", "wh", "xp", "yr", 0 191 }; 192 193 int lineno; /* current line number in input file */ 194 char *cfilename; /* name of current file */ 195 int nfiles; /* number of files to process */ 196 int fflag; /* -f: ignore \f */ 197 int sflag; /* -s: ignore \s */ 198 int ncmds; /* size of knowncmds */ 199 int slot; /* slot in knowncmds found by binsrch */ 200 201 void addcmd(char *); 202 void addmac(char *); 203 int binsrch(char *); 204 void checkknown(char *); 205 void chkcmd(char *, char *); 206 void complain(int); 207 int eq(const void *, const void *); 208 int main(int, char **); 209 void nomatch(char *); 210 void pe(int); 211 void process(FILE *); 212 void prop(int); 213 void usage(void); 214 215 int 216 main(int argc, char **argv) 217 { 218 FILE *f; 219 int i; 220 char *cp; 221 char b1[4]; 222 223 /* Figure out how many known commands there are */ 224 while (knowncmds[ncmds]) 225 ncmds++; 226 while (argc > 1 && argv[1][0] == '-') { 227 switch(argv[1][1]) { 228 229 /* -a: add pairs of macros */ 230 case 'a': 231 i = strlen(argv[1]) - 2; 232 if (i % 6 != 0) 233 usage(); 234 /* look for empty macro slots */ 235 for (i=0; br[i].opbr; i++) 236 ; 237 for (cp=argv[1]+3; cp[-1]; cp += 6) { 238 br[i].opbr = malloc(3); 239 strncpy(br[i].opbr, cp, 2); 240 br[i].clbr = malloc(3); 241 strncpy(br[i].clbr, cp+3, 2); 242 addmac(br[i].opbr); /* knows pairs are also known cmds */ 243 addmac(br[i].clbr); 244 i++; 245 } 246 break; 247 248 /* -c: add known commands */ 249 case 'c': 250 i = strlen(argv[1]) - 2; 251 if (i % 3 != 0) 252 usage(); 253 for (cp=argv[1]+3; cp[-1]; cp += 3) { 254 if (cp[2] && cp[2] != '.') 255 usage(); 256 strncpy(b1, cp, 2); 257 addmac(b1); 258 } 259 break; 260 261 /* -f: ignore font changes */ 262 case 'f': 263 fflag = 1; 264 break; 265 266 /* -s: ignore size changes */ 267 case 's': 268 sflag = 1; 269 break; 270 default: 271 usage(); 272 } 273 argc--; argv++; 274 } 275 276 nfiles = argc - 1; 277 278 if (nfiles > 0) { 279 for (i=1; i<argc; i++) { 280 cfilename = argv[i]; 281 f = fopen(cfilename, "r"); 282 if (f == NULL) 283 perror(cfilename); 284 else { 285 process(f); 286 fclose(f); 287 } 288 } 289 } else { 290 cfilename = "stdin"; 291 process(stdin); 292 } 293 exit(0); 294 } 295 296 void 297 usage(void) 298 { 299 printf("usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n"); 300 exit(1); 301 } 302 303 void 304 process(FILE *f) 305 { 306 int i, n; 307 char line[256]; /* the current line */ 308 char mac[5]; /* The current macro or nroff command */ 309 int pl; 310 311 stktop = -1; 312 for (lineno = 1; fgets(line, sizeof line, f); lineno++) { 313 if (line[0] == '.') { 314 /* 315 * find and isolate the macro/command name. 316 */ 317 strncpy(mac, line+1, 4); 318 if (isspace((unsigned char)mac[0])) { 319 pe(lineno); 320 printf("Empty command\n"); 321 } else if (isspace((unsigned char)mac[1])) { 322 mac[1] = 0; 323 } else if (isspace((unsigned char)mac[2])) { 324 mac[2] = 0; 325 } else if (mac[0] != '\\' || mac[1] != '\"') { 326 pe(lineno); 327 printf("Command too long\n"); 328 } 329 330 /* 331 * Is it a known command? 332 */ 333 checkknown(mac); 334 335 /* 336 * Should we add it? 337 */ 338 if (eq(mac, "de")) 339 addcmd(line); 340 341 chkcmd(line, mac); 342 } 343 344 /* 345 * At this point we process the line looking 346 * for \s and \f. 347 */ 348 for (i=0; line[i]; i++) 349 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) { 350 if (!sflag && line[++i]=='s') { 351 pl = line[++i]; 352 if (isdigit((unsigned char)pl)) { 353 n = pl - '0'; 354 pl = ' '; 355 } else 356 n = 0; 357 while (isdigit((unsigned char)line[++i])) 358 n = 10 * n + line[i] - '0'; 359 i--; 360 if (n == 0) { 361 if (stk[stktop].opno == SZ) { 362 stktop--; 363 } else { 364 pe(lineno); 365 printf("unmatched \\s0\n"); 366 } 367 } else { 368 stk[++stktop].opno = SZ; 369 stk[stktop].pl = pl; 370 stk[stktop].parm = n; 371 stk[stktop].lno = lineno; 372 } 373 } else if (!fflag && line[i]=='f') { 374 n = line[++i]; 375 if (n == 'P') { 376 if (stk[stktop].opno == FT) { 377 stktop--; 378 } else { 379 pe(lineno); 380 printf("unmatched \\fP\n"); 381 } 382 } else { 383 stk[++stktop].opno = FT; 384 stk[stktop].pl = 1; 385 stk[stktop].parm = n; 386 stk[stktop].lno = lineno; 387 } 388 } 389 } 390 } 391 /* 392 * We've hit the end and look at all this stuff that hasn't been 393 * matched yet! Complain, complain. 394 */ 395 for (i=stktop; i>=0; i--) { 396 complain(i); 397 } 398 } 399 400 void 401 complain(int i) 402 { 403 pe(stk[i].lno); 404 printf("Unmatched "); 405 prop(i); 406 printf("\n"); 407 } 408 409 void 410 prop(int i) 411 { 412 if (stk[i].pl == 0) 413 printf(".%s", br[stk[i].opno].opbr); 414 else switch(stk[i].opno) { 415 case SZ: 416 printf("\\s%c%d", stk[i].pl, stk[i].parm); 417 break; 418 case FT: 419 printf("\\f%c", stk[i].parm); 420 break; 421 default: 422 printf("Bug: stk[%d].opno = %d = .%s, .%s", 423 i, stk[i].opno, br[stk[i].opno].opbr, 424 br[stk[i].opno].clbr); 425 } 426 } 427 428 void 429 chkcmd(char *line, char *mac) 430 { 431 int i; 432 433 /* 434 * Check to see if it matches top of stack. 435 */ 436 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr)) 437 stktop--; /* OK. Pop & forget */ 438 else { 439 /* No. Maybe it's an opener */ 440 for (i=0; br[i].opbr; i++) { 441 if (eq(mac, br[i].opbr)) { 442 /* Found. Push it. */ 443 stktop++; 444 stk[stktop].opno = i; 445 stk[stktop].pl = 0; 446 stk[stktop].parm = 0; 447 stk[stktop].lno = lineno; 448 break; 449 } 450 /* 451 * Maybe it's an unmatched closer. 452 * NOTE: this depends on the fact 453 * that none of the closers can be 454 * openers too. 455 */ 456 if (eq(mac, br[i].clbr)) { 457 nomatch(mac); 458 break; 459 } 460 } 461 } 462 } 463 464 void 465 nomatch(char *mac) 466 { 467 int i, j; 468 469 /* 470 * Look for a match further down on stack 471 * If we find one, it suggests that the stuff in 472 * between is supposed to match itself. 473 */ 474 for (j=stktop; j>=0; j--) 475 if (eq(mac,br[stk[j].opno].clbr)) { 476 /* Found. Make a good diagnostic. */ 477 if (j == stktop-2) { 478 /* 479 * Check for special case \fx..\fR and don't 480 * complain. 481 */ 482 if (stk[j+1].opno==FT && stk[j+1].parm!='R' 483 && stk[j+2].opno==FT && stk[j+2].parm=='R') { 484 stktop = j -1; 485 return; 486 } 487 /* 488 * We have two unmatched frobs. Chances are 489 * they were intended to match, so we mention 490 * them together. 491 */ 492 pe(stk[j+1].lno); 493 prop(j+1); 494 printf(" does not match %d: ", stk[j+2].lno); 495 prop(j+2); 496 printf("\n"); 497 } else for (i=j+1; i <= stktop; i++) { 498 complain(i); 499 } 500 stktop = j-1; 501 return; 502 } 503 /* Didn't find one. Throw this away. */ 504 pe(lineno); 505 printf("Unmatched .%s\n", mac); 506 } 507 508 /* eq: are two strings equal? */ 509 int 510 eq(const void *s1, const void *s2) 511 { 512 return (strcmp((char *)s1, (char *)s2) == 0); 513 } 514 515 /* print the first part of an error message, given the line number */ 516 void 517 pe(int pelineno) 518 { 519 if (nfiles > 1) 520 printf("%s: ", cfilename); 521 printf("%d: ", pelineno); 522 } 523 524 void 525 checkknown(char *mac) 526 { 527 528 if (eq(mac, ".")) 529 return; 530 if (binsrch(mac) >= 0) 531 return; 532 if (mac[0] == '\\' && mac[1] == '"') /* comments */ 533 return; 534 535 pe(lineno); 536 printf("Unknown command: .%s\n", mac); 537 } 538 539 /* 540 * We have a .de xx line in "line". Add xx to the list of known commands. 541 */ 542 void 543 addcmd(char *line) 544 { 545 char *mac; 546 547 /* grab the macro being defined */ 548 mac = line+4; 549 while (isspace((unsigned char)*mac)) 550 mac++; 551 if (*mac == 0) { 552 pe(lineno); 553 printf("illegal define: %s\n", line); 554 return; 555 } 556 mac[2] = 0; 557 if (isspace((unsigned char)mac[1]) || mac[1] == '\\') 558 mac[1] = 0; 559 if (ncmds >= MAXCMDS) { 560 printf("Only %d known commands allowed\n", MAXCMDS); 561 exit(1); 562 } 563 addmac(mac); 564 } 565 566 /* 567 * Add mac to the list. We should really have some kind of tree 568 * structure here but this is a quick-and-dirty job and I just don't 569 * have time to mess with it. (I wonder if this will come back to haunt 570 * me someday?) Anyway, I claim that .de is fairly rare in user 571 * nroff programs, and the register loop below is pretty fast. 572 */ 573 void 574 addmac(char *mac) 575 { 576 char **src, **dest, **loc; 577 578 if (binsrch(mac) >= 0){ /* it's OK to redefine something */ 579 #ifdef DEBUG 580 printf("binsrch(%s) -> already in table\n", mac); 581 #endif /* DEBUG */ 582 return; 583 } 584 /* binsrch sets slot as a side effect */ 585 #ifdef DEBUG 586 printf("binsrch(%s) -> %d\n", mac, slot); 587 #endif 588 loc = &knowncmds[slot]; 589 src = &knowncmds[ncmds-1]; 590 dest = src+1; 591 while (dest > loc) 592 *dest-- = *src--; 593 *loc = malloc(3); 594 strcpy(*loc, mac); 595 ncmds++; 596 #ifdef DEBUG 597 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], 598 knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], 599 knowncmds[slot+2], ncmds); 600 #endif 601 } 602 603 /* 604 * Do a binary search in knowncmds for mac. 605 * If found, return the index. If not, return -1. 606 */ 607 int 608 binsrch(char *mac) 609 { 610 char *p; /* pointer to current cmd in list */ 611 int d; /* difference if any */ 612 int mid; /* mid point in binary search */ 613 int top, bot; /* boundaries of bin search, inclusive */ 614 615 top = ncmds-1; 616 bot = 0; 617 while (top >= bot) { 618 mid = (top+bot)/2; 619 p = knowncmds[mid]; 620 d = p[0] - mac[0]; 621 if (d == 0) 622 d = p[1] - mac[1]; 623 if (d == 0) 624 return mid; 625 if (d < 0) 626 bot = mid + 1; 627 else 628 top = mid - 1; 629 } 630 slot = bot; /* place it would have gone */ 631 return -1; 632 } 633