1 /* $NetBSD: checknr.c,v 1.11 2003/05/09 08:44:57 wiz Exp $ */ 2 3 /* 4 * Copyright (c) 1980, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <sys/cdefs.h> 37 #ifndef lint 38 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\ 39 The Regents of the University of California. All rights reserved.\n"); 40 #endif /* not lint */ 41 42 #ifndef lint 43 #if 0 44 static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93"; 45 #else 46 __RCSID("$NetBSD: checknr.c,v 1.11 2003/05/09 08:44:57 wiz Exp $"); 47 #endif 48 #endif /* not lint */ 49 50 /* 51 * checknr: check an nroff/troff input file for matching macro calls. 52 * we also attempt to match size and font changes, but only the embedded 53 * kind. These must end in \s0 and \fP resp. Maybe more sophistication 54 * later but for now think of these restrictions as contributions to 55 * structured typesetting. 56 */ 57 #include <ctype.h> 58 #include <stdio.h> 59 #include <stdlib.h> 60 #include <string.h> 61 62 #define MAXSTK 100 /* Stack size */ 63 #define MAXBR 100 /* Max number of bracket pairs known */ 64 #define MAXCMDS 500 /* Max number of commands known */ 65 66 /* 67 * The stack on which we remember what we've seen so far. 68 */ 69 struct stkstr { 70 int opno; /* number of opening bracket */ 71 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */ 72 int parm; /* parm to size, font, etc */ 73 int lno; /* line number the thing came in in */ 74 } stk[MAXSTK]; 75 int stktop; 76 77 /* 78 * The kinds of opening and closing brackets. 79 */ 80 struct brstr { 81 char *opbr; 82 char *clbr; 83 } br[MAXBR] = { 84 /* A few bare bones troff commands */ 85 #define SZ 0 86 { "sz", "sz"}, /* also \s */ 87 #define FT 1 88 { "ft", "ft"}, /* also \f */ 89 /* the -mm package */ 90 {"AL", "LE"}, 91 {"AS", "AE"}, 92 {"BL", "LE"}, 93 {"BS", "BE"}, 94 {"DF", "DE"}, 95 {"DL", "LE"}, 96 {"DS", "DE"}, 97 {"FS", "FE"}, 98 {"ML", "LE"}, 99 {"NS", "NE"}, 100 {"RL", "LE"}, 101 {"VL", "LE"}, 102 /* the -ms package */ 103 {"AB", "AE"}, 104 {"BD", "DE"}, 105 {"CD", "DE"}, 106 {"DS", "DE"}, 107 {"FS", "FE"}, 108 {"ID", "DE"}, 109 {"KF", "KE"}, 110 {"KS", "KE"}, 111 {"LD", "DE"}, 112 {"LG", "NL"}, 113 {"QS", "QE"}, 114 {"RS", "RE"}, 115 {"SM", "NL"}, 116 {"XA", "XE"}, 117 {"XS", "XE"}, 118 /* The -me package */ 119 {"(b", ")b"}, 120 {"(c", ")c"}, 121 {"(d", ")d"}, 122 {"(f", ")f"}, 123 {"(l", ")l"}, 124 {"(q", ")q"}, 125 {"(x", ")x"}, 126 {"(z", ")z"}, 127 /* The -mdoc package */ 128 {"Ao", "Ac"}, 129 {"Bd", "Ed"}, 130 {"Bk", "Ek"}, 131 {"Bo", "Bc"}, 132 {"Do", "Dc"}, 133 {"Fo", "Fc"}, 134 {"Oo", "Oc"}, 135 {"Po", "Pc"}, 136 {"Qo", "Qc"}, 137 {"Rs", "Re"}, 138 {"So", "Sc"}, 139 {"Xo", "Xc"}, 140 /* Things needed by preprocessors */ 141 {"EQ", "EN"}, 142 {"TS", "TE"}, 143 /* Refer */ 144 {"[", "]"}, 145 {0, 0}, 146 }; 147 148 /* 149 * All commands known to nroff, plus macro packages. 150 * Used so we can complain about unrecognized commands. 151 */ 152 char *knowncmds[MAXCMDS] = { 153 "$c", "$f", "$h", "$p", "$s", "%A", "%B", "%C", "%D", "%I", "%J", "%N", 154 "%O", "%P", "%Q", "%R", "%T", "%V", "(b", "(c", "(d", "(f", "(l", "(q", 155 "(t", "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", 156 ")z", "++", "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", 157 "@F", "@I", "@M", "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", 158 "@r", "@t", "@z", "AB", "AE", "AF", "AI", "AL", "AM", "AS", "AT", 159 "AU", "AX", "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At", "B" , "B1", 160 "B2", "BD", "BE", "BG", "BL", "BS", "BT", "BX", "Bc", "Bd", "Bf", 161 "Bk", "Bl", "Bo", "Bq", "Bsx", "Bx", "C1", "C2", "CD", "CM", "CT", 162 "Cd", "Cm", "D" , "D1", "DA", "DE", "DF", "DL", "DS", "DT", "Db", "Dc", 163 "Dd", "Dl", "Do", "Dq", "Dt", "Dv", "EC", "EF", "EG", "EH", "EM", 164 "EN", "EQ", "EX", "Ec", "Ed", "Ef", "Ek", "El", "Em", "Eo", "Er", 165 "Ev", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", "FQ", 166 "FS", "FV", "FX", "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Ft", "Fx", 167 "H" , "HC", "HD", "HM", "HO", "HU", "I" , "ID", "IE", "IH", "IM", 168 "IP", "IX", "IZ", "Ic", "It", "KD", "KE", "KF", "KQ", "KS", "LB", 169 "LC", "LD", "LE", "LG", "LI", "LP", "Lb", "Li", "MC", "ME", "MF", 170 "MH", "ML", "MR", "MT", "ND", "NE", "NH", "NL", "NP", "NS", "Nd", 171 "Nm", "No", "Ns", "Nx", "OF", "OH", "OK", "OP", "Oc", "Oo", "Op", 172 "Os", "Ot", "Ox", "P" , "P1", "PF", "PH", "PP", "PT", "PX", "PY", 173 "Pa", "Pc", "Pf", "Po", "Pp", "Pq", "QE", "QP", "QS", "Qc", "Ql", 174 "Qo", "Qq", "R" , "RA", "RC", "RE", "RL", "RP", "RQ", "RS", "RT", 175 "Re", "Rs", "S" , "S0", "S2", "S3", "SA", "SG", "SH", "SK", "SM", 176 "SP", "SY", "Sc", "Sh", "Sm", "So", "Sq", "Ss", "St", "Sx", "Sy", 177 "T&", "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP", "TQ", 178 "TR", "TS", "TX", "Tn", "UL", "US", "UX", "Ud", "Ux", "VL", "Va", "Vt", 179 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "Xc", "Xo", 180 "Xr", "[" , "[-", "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>", 181 "[]", "\\{", "\\}", "]" , "]-", "]<", "]>", "][", "ab", "ac", "ad", "af", "am", 182 "ar", "as", "b" , "ba", "bc", "bd", "bi", "bl", "bp", "br", "bx", 183 "c.", "c2", "cc", "ce", "cf", "ch", "cs", "ct", "cu", "da", "de", 184 "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", "ef", "eh", "el", 185 "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", "fp", "ft", 186 "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i" , "ie", 187 "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln", 188 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", 189 "n1", "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", 190 "ns", "nx", "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", 191 "po", "pp", "ps", "q" , "r" , "rb", "rd", "re", "rm", "rn", "ro", 192 "rr", "rs", "rt", "sb", "sc", "sh", "sk", "so", "sp", "ss", "st", 193 "sv", "sz", "ta", "tc", "th", "ti", "tl", "tm", "tp", "tr", "u", 194 "uf", "uh", "ul", "vs", "wh", "xp", "yr", 0 195 }; 196 197 int lineno; /* current line number in input file */ 198 char *cfilename; /* name of current file */ 199 int nfiles; /* number of files to process */ 200 int fflag; /* -f: ignore \f */ 201 int sflag; /* -s: ignore \s */ 202 int ncmds; /* size of knowncmds */ 203 int slot; /* slot in knowncmds found by binsrch */ 204 205 void addcmd(char *); 206 void addmac(char *); 207 int binsrch(char *); 208 void checkknown(char *); 209 void chkcmd(char *, char *); 210 void complain(int); 211 int eq(const void *, const void *); 212 int main(int, char **); 213 void nomatch(char *); 214 void pe(int); 215 void process(FILE *); 216 void prop(int); 217 void usage(void); 218 219 int 220 main(int argc, char **argv) 221 { 222 FILE *f; 223 int i; 224 char *cp; 225 char b1[4]; 226 227 /* Figure out how many known commands there are */ 228 while (knowncmds[ncmds]) 229 ncmds++; 230 while (argc > 1 && argv[1][0] == '-') { 231 switch(argv[1][1]) { 232 233 /* -a: add pairs of macros */ 234 case 'a': 235 i = strlen(argv[1]) - 2; 236 if (i % 6 != 0) 237 usage(); 238 /* look for empty macro slots */ 239 for (i=0; br[i].opbr; i++) 240 ; 241 for (cp=argv[1]+3; cp[-1]; cp += 6) { 242 br[i].opbr = malloc(3); 243 strncpy(br[i].opbr, cp, 2); 244 br[i].clbr = malloc(3); 245 strncpy(br[i].clbr, cp+3, 2); 246 addmac(br[i].opbr); /* knows pairs are also known cmds */ 247 addmac(br[i].clbr); 248 i++; 249 } 250 break; 251 252 /* -c: add known commands */ 253 case 'c': 254 i = strlen(argv[1]) - 2; 255 if (i % 3 != 0) 256 usage(); 257 for (cp=argv[1]+3; cp[-1]; cp += 3) { 258 if (cp[2] && cp[2] != '.') 259 usage(); 260 strncpy(b1, cp, 2); 261 addmac(b1); 262 } 263 break; 264 265 /* -f: ignore font changes */ 266 case 'f': 267 fflag = 1; 268 break; 269 270 /* -s: ignore size changes */ 271 case 's': 272 sflag = 1; 273 break; 274 default: 275 usage(); 276 } 277 argc--; argv++; 278 } 279 280 nfiles = argc - 1; 281 282 if (nfiles > 0) { 283 for (i=1; i<argc; i++) { 284 cfilename = argv[i]; 285 f = fopen(cfilename, "r"); 286 if (f == NULL) 287 perror(cfilename); 288 else { 289 process(f); 290 fclose(f); 291 } 292 } 293 } else { 294 cfilename = "stdin"; 295 process(stdin); 296 } 297 exit(0); 298 } 299 300 void 301 usage(void) 302 { 303 printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n"); 304 exit(1); 305 } 306 307 void 308 process(FILE *f) 309 { 310 int i, n; 311 char line[256]; /* the current line */ 312 char mac[5]; /* The current macro or nroff command */ 313 int pl; 314 315 stktop = -1; 316 for (lineno = 1; fgets(line, sizeof line, f); lineno++) { 317 if (line[0] == '.') { 318 /* 319 * find and isolate the macro/command name. 320 */ 321 strncpy(mac, line+1, 4); 322 if (isspace((unsigned char)mac[0])) { 323 pe(lineno); 324 printf("Empty command\n"); 325 } else if (isspace((unsigned char)mac[1])) { 326 mac[1] = 0; 327 } else if (isspace((unsigned char)mac[2])) { 328 mac[2] = 0; 329 } else if (mac[0] != '\\' || mac[1] != '\"') { 330 pe(lineno); 331 printf("Command too long\n"); 332 } 333 334 /* 335 * Is it a known command? 336 */ 337 checkknown(mac); 338 339 /* 340 * Should we add it? 341 */ 342 if (eq(mac, "de")) 343 addcmd(line); 344 345 chkcmd(line, mac); 346 } 347 348 /* 349 * At this point we process the line looking 350 * for \s and \f. 351 */ 352 for (i=0; line[i]; i++) 353 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) { 354 if (!sflag && line[++i]=='s') { 355 pl = line[++i]; 356 if (isdigit((unsigned char)pl)) { 357 n = pl - '0'; 358 pl = ' '; 359 } else 360 n = 0; 361 while (isdigit((unsigned char)line[++i])) 362 n = 10 * n + line[i] - '0'; 363 i--; 364 if (n == 0) { 365 if (stk[stktop].opno == SZ) { 366 stktop--; 367 } else { 368 pe(lineno); 369 printf("unmatched \\s0\n"); 370 } 371 } else { 372 stk[++stktop].opno = SZ; 373 stk[stktop].pl = pl; 374 stk[stktop].parm = n; 375 stk[stktop].lno = lineno; 376 } 377 } else if (!fflag && line[i]=='f') { 378 n = line[++i]; 379 if (n == 'P') { 380 if (stk[stktop].opno == FT) { 381 stktop--; 382 } else { 383 pe(lineno); 384 printf("unmatched \\fP\n"); 385 } 386 } else { 387 stk[++stktop].opno = FT; 388 stk[stktop].pl = 1; 389 stk[stktop].parm = n; 390 stk[stktop].lno = lineno; 391 } 392 } 393 } 394 } 395 /* 396 * We've hit the end and look at all this stuff that hasn't been 397 * matched yet! Complain, complain. 398 */ 399 for (i=stktop; i>=0; i--) { 400 complain(i); 401 } 402 } 403 404 void 405 complain(int i) 406 { 407 pe(stk[i].lno); 408 printf("Unmatched "); 409 prop(i); 410 printf("\n"); 411 } 412 413 void 414 prop(int i) 415 { 416 if (stk[i].pl == 0) 417 printf(".%s", br[stk[i].opno].opbr); 418 else switch(stk[i].opno) { 419 case SZ: 420 printf("\\s%c%d", stk[i].pl, stk[i].parm); 421 break; 422 case FT: 423 printf("\\f%c", stk[i].parm); 424 break; 425 default: 426 printf("Bug: stk[%d].opno = %d = .%s, .%s", 427 i, stk[i].opno, br[stk[i].opno].opbr, 428 br[stk[i].opno].clbr); 429 } 430 } 431 432 void 433 chkcmd(char *line, char *mac) 434 { 435 int i; 436 437 /* 438 * Check to see if it matches top of stack. 439 */ 440 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr)) 441 stktop--; /* OK. Pop & forget */ 442 else { 443 /* No. Maybe it's an opener */ 444 for (i=0; br[i].opbr; i++) { 445 if (eq(mac, br[i].opbr)) { 446 /* Found. Push it. */ 447 stktop++; 448 stk[stktop].opno = i; 449 stk[stktop].pl = 0; 450 stk[stktop].parm = 0; 451 stk[stktop].lno = lineno; 452 break; 453 } 454 /* 455 * Maybe it's an unmatched closer. 456 * NOTE: this depends on the fact 457 * that none of the closers can be 458 * openers too. 459 */ 460 if (eq(mac, br[i].clbr)) { 461 nomatch(mac); 462 break; 463 } 464 } 465 } 466 } 467 468 void 469 nomatch(char *mac) 470 { 471 int i, j; 472 473 /* 474 * Look for a match further down on stack 475 * If we find one, it suggests that the stuff in 476 * between is supposed to match itself. 477 */ 478 for (j=stktop; j>=0; j--) 479 if (eq(mac,br[stk[j].opno].clbr)) { 480 /* Found. Make a good diagnostic. */ 481 if (j == stktop-2) { 482 /* 483 * Check for special case \fx..\fR and don't 484 * complain. 485 */ 486 if (stk[j+1].opno==FT && stk[j+1].parm!='R' 487 && stk[j+2].opno==FT && stk[j+2].parm=='R') { 488 stktop = j -1; 489 return; 490 } 491 /* 492 * We have two unmatched frobs. Chances are 493 * they were intended to match, so we mention 494 * them together. 495 */ 496 pe(stk[j+1].lno); 497 prop(j+1); 498 printf(" does not match %d: ", stk[j+2].lno); 499 prop(j+2); 500 printf("\n"); 501 } else for (i=j+1; i <= stktop; i++) { 502 complain(i); 503 } 504 stktop = j-1; 505 return; 506 } 507 /* Didn't find one. Throw this away. */ 508 pe(lineno); 509 printf("Unmatched .%s\n", mac); 510 } 511 512 /* eq: are two strings equal? */ 513 int 514 eq(const void *s1, const void *s2) 515 { 516 return (strcmp((char *)s1, (char *)s2) == 0); 517 } 518 519 /* print the first part of an error message, given the line number */ 520 void 521 pe(int pelineno) 522 { 523 if (nfiles > 1) 524 printf("%s: ", cfilename); 525 printf("%d: ", pelineno); 526 } 527 528 void 529 checkknown(char *mac) 530 { 531 532 if (eq(mac, ".")) 533 return; 534 if (binsrch(mac) >= 0) 535 return; 536 if (mac[0] == '\\' && mac[1] == '"') /* comments */ 537 return; 538 539 pe(lineno); 540 printf("Unknown command: .%s\n", mac); 541 } 542 543 /* 544 * We have a .de xx line in "line". Add xx to the list of known commands. 545 */ 546 void 547 addcmd(char *line) 548 { 549 char *mac; 550 551 /* grab the macro being defined */ 552 mac = line+4; 553 while (isspace((unsigned char)*mac)) 554 mac++; 555 if (*mac == 0) { 556 pe(lineno); 557 printf("illegal define: %s\n", line); 558 return; 559 } 560 mac[2] = 0; 561 if (isspace((unsigned char)mac[1]) || mac[1] == '\\') 562 mac[1] = 0; 563 if (ncmds >= MAXCMDS) { 564 printf("Only %d known commands allowed\n", MAXCMDS); 565 exit(1); 566 } 567 addmac(mac); 568 } 569 570 /* 571 * Add mac to the list. We should really have some kind of tree 572 * structure here but this is a quick-and-dirty job and I just don't 573 * have time to mess with it. (I wonder if this will come back to haunt 574 * me someday?) Anyway, I claim that .de is fairly rare in user 575 * nroff programs, and the register loop below is pretty fast. 576 */ 577 void 578 addmac(char *mac) 579 { 580 char **src, **dest, **loc; 581 582 if (binsrch(mac) >= 0){ /* it's OK to redefine something */ 583 #ifdef DEBUG 584 printf("binsrch(%s) -> already in table\n", mac); 585 #endif /* DEBUG */ 586 return; 587 } 588 /* binsrch sets slot as a side effect */ 589 #ifdef DEBUG 590 printf("binsrch(%s) -> %d\n", mac, slot); 591 #endif 592 loc = &knowncmds[slot]; 593 src = &knowncmds[ncmds-1]; 594 dest = src+1; 595 while (dest > loc) 596 *dest-- = *src--; 597 *loc = malloc(3); 598 strcpy(*loc, mac); 599 ncmds++; 600 #ifdef DEBUG 601 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], 602 knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], 603 knowncmds[slot+2], ncmds); 604 #endif 605 } 606 607 /* 608 * Do a binary search in knowncmds for mac. 609 * If found, return the index. If not, return -1. 610 */ 611 int 612 binsrch(char *mac) 613 { 614 char *p; /* pointer to current cmd in list */ 615 int d; /* difference if any */ 616 int mid; /* mid point in binary search */ 617 int top, bot; /* boundaries of bin search, inclusive */ 618 619 top = ncmds-1; 620 bot = 0; 621 while (top >= bot) { 622 mid = (top+bot)/2; 623 p = knowncmds[mid]; 624 d = p[0] - mac[0]; 625 if (d == 0) 626 d = p[1] - mac[1]; 627 if (d == 0) 628 return mid; 629 if (d < 0) 630 bot = mid + 1; 631 else 632 top = mid - 1; 633 } 634 slot = bot; /* place it would have gone */ 635 return -1; 636 } 637