1 /* $NetBSD: checknr.c,v 1.17 2005/03/30 18:01:32 xtraeme Exp $ */ 2 3 /* 4 * Copyright (c) 1980, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 #ifndef lint 34 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\ 35 The Regents of the University of California. All rights reserved.\n"); 36 #endif /* not lint */ 37 38 #ifndef lint 39 #if 0 40 static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93"; 41 #else 42 __RCSID("$NetBSD: checknr.c,v 1.17 2005/03/30 18:01:32 xtraeme Exp $"); 43 #endif 44 #endif /* not lint */ 45 46 /* 47 * checknr: check an nroff/troff input file for matching macro calls. 48 * we also attempt to match size and font changes, but only the embedded 49 * kind. These must end in \s0 and \fP resp. Maybe more sophistication 50 * later but for now think of these restrictions as contributions to 51 * structured typesetting. 52 */ 53 #include <ctype.h> 54 #include <err.h> 55 #include <stdio.h> 56 #include <stdlib.h> 57 #include <string.h> 58 59 #define MAXSTK 100 /* Stack size */ 60 #define MAXBR 100 /* Max number of bracket pairs known */ 61 #define MAXCMDS 500 /* Max number of commands known */ 62 63 /* 64 * The stack on which we remember what we've seen so far. 65 */ 66 struct stkstr { 67 int opno; /* number of opening bracket */ 68 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */ 69 int parm; /* parm to size, font, etc */ 70 int lno; /* line number the thing came in in */ 71 } stk[MAXSTK]; 72 int stktop; 73 74 /* 75 * The kinds of opening and closing brackets. 76 */ 77 struct brstr { 78 char *opbr; 79 char *clbr; 80 } br[MAXBR] = { 81 /* A few bare bones troff commands */ 82 #define SZ 0 83 { "sz", "sz"}, /* also \s */ 84 #define FT 1 85 { "ft", "ft"}, /* also \f */ 86 /* the -mm package */ 87 {"AL", "LE"}, 88 {"AS", "AE"}, 89 {"BL", "LE"}, 90 {"BS", "BE"}, 91 {"DF", "DE"}, 92 {"DL", "LE"}, 93 {"DS", "DE"}, 94 {"FS", "FE"}, 95 {"ML", "LE"}, 96 {"NS", "NE"}, 97 {"RL", "LE"}, 98 {"VL", "LE"}, 99 /* the -ms package */ 100 {"AB", "AE"}, 101 {"BD", "DE"}, 102 {"CD", "DE"}, 103 {"DS", "DE"}, 104 {"FS", "FE"}, 105 {"ID", "DE"}, 106 {"KF", "KE"}, 107 {"KS", "KE"}, 108 {"LD", "DE"}, 109 {"LG", "NL"}, 110 {"QS", "QE"}, 111 {"RS", "RE"}, 112 {"SM", "NL"}, 113 {"XA", "XE"}, 114 {"XS", "XE"}, 115 /* The -me package */ 116 {"(b", ")b"}, 117 {"(c", ")c"}, 118 {"(d", ")d"}, 119 {"(f", ")f"}, 120 {"(l", ")l"}, 121 {"(q", ")q"}, 122 {"(x", ")x"}, 123 {"(z", ")z"}, 124 /* The -mdoc package */ 125 {"Ao", "Ac"}, 126 {"Bd", "Ed"}, 127 {"Bk", "Ek"}, 128 {"Bo", "Bc"}, 129 {"Do", "Dc"}, 130 {"Fo", "Fc"}, 131 {"Oo", "Oc"}, 132 {"Po", "Pc"}, 133 {"Qo", "Qc"}, 134 {"Rs", "Re"}, 135 {"So", "Sc"}, 136 {"Xo", "Xc"}, 137 /* Things needed by preprocessors */ 138 {"EQ", "EN"}, 139 {"TS", "TE"}, 140 /* Refer */ 141 {"[", "]"}, 142 {0, 0} 143 }; 144 145 /* 146 * All commands known to nroff, plus macro packages. 147 * Used so we can complain about unrecognized commands. 148 */ 149 char *knowncmds[MAXCMDS] = { 150 "$c", "$f", "$h", "$p", "$s", "%A", "%B", "%C", "%D", "%I", "%J", "%N", 151 "%O", "%P", "%Q", "%R", "%T", "%V", "(b", "(c", "(d", "(f", "(l", "(q", 152 "(t", "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", 153 ")z", "++", "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", 154 "@F", "@I", "@M", "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", 155 "@r", "@t", "@z", "AB", "AE", "AF", "AI", "AL", "AM", "AS", "AT", 156 "AU", "AX", "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At", "B" , "B1", 157 "B2", "BD", "BE", "BG", "BL", "BS", "BT", "BX", "Bc", "Bd", "Bf", 158 "Bk", "Bl", "Bo", "Bq", "Bsx", "Bx", "C1", "C2", "CD", "CM", "CT", 159 "Cd", "Cm", "D" , "D1", "DA", "DE", "DF", "DL", "DS", "DT", "Db", "Dc", 160 "Dd", "Dl", "Do", "Dq", "Dt", "Dv", "EC", "EF", "EG", "EH", "EM", 161 "EN", "EQ", "EX", "Ec", "Ed", "Ef", "Ek", "El", "Em", "Eo", "Er", 162 "Ev", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", "FQ", 163 "FS", "FV", "FX", "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Ft", "Fx", 164 "H" , "HC", "HD", "HM", "HO", "HU", "I" , "ID", "IE", "IH", "IM", 165 "IP", "IX", "IZ", "Ic", "In", "It", "KD", "KE", "KF", "KQ", "KS", "LB", 166 "LC", "LD", "LE", "LG", "LI", "LP", "Lb", "Li", "MC", "ME", "MF", 167 "MH", "ML", "MR", "MT", "ND", "NE", "NH", "NL", "NP", "NS", "Nd", 168 "Nm", "No", "Ns", "Nx", "OF", "OH", "OK", "OP", "Oc", "Oo", "Op", 169 "Os", "Ot", "Ox", "P" , "P1", "PF", "PH", "PP", "PT", "PX", "PY", 170 "Pa", "Pc", "Pf", "Po", "Pp", "Pq", "QE", "QP", "QS", "Qc", "Ql", 171 "Qo", "Qq", "R" , "RA", "RC", "RE", "RL", "RP", "RQ", "RS", "RT", 172 "Re", "Rs", "S" , "S0", "S2", "S3", "SA", "SG", "SH", "SK", "SM", 173 "SP", "SY", "Sc", "Sh", "Sm", "So", "Sq", "Ss", "St", "Sx", "Sy", 174 "T&", "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP", "TQ", 175 "TR", "TS", "TX", "Tn", "UL", "US", "UX", "Ud", "Ux", "VL", "Va", "Vt", 176 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "Xc", "Xo", 177 "Xr", "[" , "[-", "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>", 178 "[]", "\\{", "\\}", "]" , "]-", "]<", "]>", "][", "ab", "ac", "ad", "af", "am", 179 "ar", "as", "b" , "ba", "bc", "bd", "bi", "bl", "bp", "br", "bx", 180 "c.", "c2", "cc", "ce", "cf", "ch", "cs", "ct", "cu", "da", "de", 181 "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", "ef", "eh", "el", 182 "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", "fp", "ft", 183 "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i" , "ie", 184 "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln", 185 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", 186 "n1", "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", 187 "ns", "nx", "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", 188 "po", "pp", "ps", "q" , "r" , "rb", "rd", "re", "rm", "rn", "ro", 189 "rr", "rs", "rt", "sb", "sc", "sh", "sk", "so", "sp", "ss", "st", 190 "sv", "sz", "ta", "tc", "th", "ti", "tl", "tm", "tp", "tr", "u", 191 "uf", "uh", "ul", "vs", "wh", "xp", "yr", 0 192 }; 193 194 int lineno; /* current line number in input file */ 195 char *cfilename; /* name of current file */ 196 int nfiles; /* number of files to process */ 197 int fflag; /* -f: ignore \f */ 198 int sflag; /* -s: ignore \s */ 199 int ncmds; /* size of knowncmds */ 200 int slot; /* slot in knowncmds found by binsrch */ 201 202 void addcmd(char *); 203 void addmac(char *); 204 int binsrch(char *); 205 void checkknown(char *); 206 void chkcmd(char *, char *); 207 void complain(int); 208 int eq(const void *, const void *); 209 int main(int, char **); 210 void nomatch(char *); 211 void pe(int); 212 void process(FILE *); 213 void prop(int); 214 void usage(void); 215 216 int 217 main(int argc, char **argv) 218 { 219 FILE *f; 220 int i; 221 char *cp; 222 char b1[4]; 223 224 /* Figure out how many known commands there are */ 225 while (knowncmds[ncmds]) 226 ncmds++; 227 while (argc > 1 && argv[1][0] == '-') { 228 switch(argv[1][1]) { 229 230 /* -a: add pairs of macros */ 231 case 'a': 232 i = strlen(argv[1]) - 2; 233 if (i % 6 != 0) 234 usage(); 235 /* look for empty macro slots */ 236 for (i=0; br[i].opbr; i++) 237 ; 238 for (cp=argv[1]+3; cp[-1]; cp += 6) { 239 if (i >= MAXBR) 240 errx(1, "too many pairs"); 241 if ((br[i].opbr = malloc(3)) == NULL) 242 err(1, "malloc"); 243 strlcpy(br[i].opbr, cp, 3); 244 if ((br[i].clbr = malloc(3)) == NULL) 245 err(1, "malloc"); 246 strlcpy(br[i].clbr, cp+3, 3); 247 addmac(br[i].opbr); /* knows pairs are also known cmds */ 248 addmac(br[i].clbr); 249 i++; 250 } 251 break; 252 253 /* -c: add known commands */ 254 case 'c': 255 i = strlen(argv[1]) - 2; 256 if (i % 3 != 0) 257 usage(); 258 for (cp=argv[1]+3; cp[-1]; cp += 3) { 259 if (cp[2] && cp[2] != '.') 260 usage(); 261 strncpy(b1, cp, 2); 262 addmac(b1); 263 } 264 break; 265 266 /* -f: ignore font changes */ 267 case 'f': 268 fflag = 1; 269 break; 270 271 /* -s: ignore size changes */ 272 case 's': 273 sflag = 1; 274 break; 275 default: 276 usage(); 277 } 278 argc--; argv++; 279 } 280 281 nfiles = argc - 1; 282 283 if (nfiles > 0) { 284 for (i=1; i<argc; i++) { 285 cfilename = argv[i]; 286 f = fopen(cfilename, "r"); 287 if (f == NULL) 288 perror(cfilename); 289 else { 290 process(f); 291 fclose(f); 292 } 293 } 294 } else { 295 cfilename = "stdin"; 296 process(stdin); 297 } 298 exit(0); 299 } 300 301 void 302 usage(void) 303 { 304 (void)fprintf(stderr, 305 "usage: %s [-fs] [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] file\n", 306 getprogname()); 307 exit(1); 308 } 309 310 void 311 process(FILE *f) 312 { 313 int i, n; 314 char line[256]; /* the current line */ 315 char mac[5]; /* The current macro or nroff command */ 316 int pl; 317 318 stktop = -1; 319 for (lineno = 1; fgets(line, sizeof line, f); lineno++) { 320 if (line[0] == '.') { 321 /* 322 * find and isolate the macro/command name. 323 */ 324 strncpy(mac, line+1, 4); 325 if (isspace((unsigned char)mac[0])) { 326 pe(lineno); 327 printf("Empty command\n"); 328 } else if (isspace((unsigned char)mac[1])) { 329 mac[1] = 0; 330 } else if (isspace((unsigned char)mac[2])) { 331 mac[2] = 0; 332 } else if (mac[0] != '\\' || mac[1] != '\"') { 333 pe(lineno); 334 printf("Command too long\n"); 335 } 336 337 /* 338 * Is it a known command? 339 */ 340 checkknown(mac); 341 342 /* 343 * Should we add it? 344 */ 345 if (eq(mac, "de")) 346 addcmd(line); 347 348 chkcmd(line, mac); 349 } 350 351 /* 352 * At this point we process the line looking 353 * for \s and \f. 354 */ 355 for (i=0; line[i]; i++) 356 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) { 357 if (!sflag && line[++i]=='s') { 358 pl = line[++i]; 359 if (isdigit((unsigned char)pl)) { 360 n = pl - '0'; 361 pl = ' '; 362 } else 363 n = 0; 364 while (isdigit((unsigned char)line[++i])) 365 n = 10 * n + line[i] - '0'; 366 i--; 367 if (n == 0) { 368 if (stk[stktop].opno == SZ) { 369 stktop--; 370 } else { 371 pe(lineno); 372 printf("unmatched \\s0\n"); 373 } 374 } else { 375 stk[++stktop].opno = SZ; 376 stk[stktop].pl = pl; 377 stk[stktop].parm = n; 378 stk[stktop].lno = lineno; 379 } 380 } else if (!fflag && line[i]=='f') { 381 n = line[++i]; 382 if (n == 'P') { 383 if (stk[stktop].opno == FT) { 384 stktop--; 385 } else { 386 pe(lineno); 387 printf("unmatched \\fP\n"); 388 } 389 } else { 390 stk[++stktop].opno = FT; 391 stk[stktop].pl = 1; 392 stk[stktop].parm = n; 393 stk[stktop].lno = lineno; 394 } 395 } 396 } 397 } 398 /* 399 * We've hit the end and look at all this stuff that hasn't been 400 * matched yet! Complain, complain. 401 */ 402 for (i=stktop; i>=0; i--) { 403 complain(i); 404 } 405 } 406 407 void 408 complain(int i) 409 { 410 pe(stk[i].lno); 411 printf("Unmatched "); 412 prop(i); 413 printf("\n"); 414 } 415 416 void 417 prop(int i) 418 { 419 if (stk[i].pl == 0) 420 printf(".%s", br[stk[i].opno].opbr); 421 else switch(stk[i].opno) { 422 case SZ: 423 printf("\\s%c%d", stk[i].pl, stk[i].parm); 424 break; 425 case FT: 426 printf("\\f%c", stk[i].parm); 427 break; 428 default: 429 printf("Bug: stk[%d].opno = %d = .%s, .%s", 430 i, stk[i].opno, br[stk[i].opno].opbr, 431 br[stk[i].opno].clbr); 432 } 433 } 434 435 void 436 chkcmd(char *line, char *mac) 437 { 438 int i; 439 440 /* 441 * Check to see if it matches top of stack. 442 */ 443 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr)) 444 stktop--; /* OK. Pop & forget */ 445 else { 446 /* No. Maybe it's an opener */ 447 for (i=0; br[i].opbr; i++) { 448 if (eq(mac, br[i].opbr)) { 449 /* Found. Push it. */ 450 stktop++; 451 stk[stktop].opno = i; 452 stk[stktop].pl = 0; 453 stk[stktop].parm = 0; 454 stk[stktop].lno = lineno; 455 break; 456 } 457 /* 458 * Maybe it's an unmatched closer. 459 * NOTE: this depends on the fact 460 * that none of the closers can be 461 * openers too. 462 */ 463 if (eq(mac, br[i].clbr)) { 464 nomatch(mac); 465 break; 466 } 467 } 468 } 469 } 470 471 void 472 nomatch(char *mac) 473 { 474 int i, j; 475 476 /* 477 * Look for a match further down on stack 478 * If we find one, it suggests that the stuff in 479 * between is supposed to match itself. 480 */ 481 for (j=stktop; j>=0; j--) 482 if (eq(mac,br[stk[j].opno].clbr)) { 483 /* Found. Make a good diagnostic. */ 484 if (j == stktop-2) { 485 /* 486 * Check for special case \fx..\fR and don't 487 * complain. 488 */ 489 if (stk[j+1].opno==FT && stk[j+1].parm!='R' 490 && stk[j+2].opno==FT && stk[j+2].parm=='R') { 491 stktop = j -1; 492 return; 493 } 494 /* 495 * We have two unmatched frobs. Chances are 496 * they were intended to match, so we mention 497 * them together. 498 */ 499 pe(stk[j+1].lno); 500 prop(j+1); 501 printf(" does not match %d: ", stk[j+2].lno); 502 prop(j+2); 503 printf("\n"); 504 } else for (i=j+1; i <= stktop; i++) { 505 complain(i); 506 } 507 stktop = j-1; 508 return; 509 } 510 /* Didn't find one. Throw this away. */ 511 pe(lineno); 512 printf("Unmatched .%s\n", mac); 513 } 514 515 /* eq: are two strings equal? */ 516 int 517 eq(const void *s1, const void *s2) 518 { 519 return (strcmp((char *)s1, (char *)s2) == 0); 520 } 521 522 /* print the first part of an error message, given the line number */ 523 void 524 pe(int pelineno) 525 { 526 if (nfiles > 1) 527 printf("%s: ", cfilename); 528 printf("%d: ", pelineno); 529 } 530 531 void 532 checkknown(char *mac) 533 { 534 535 if (eq(mac, ".")) 536 return; 537 if (binsrch(mac) >= 0) 538 return; 539 if (mac[0] == '\\' && mac[1] == '"') /* comments */ 540 return; 541 542 pe(lineno); 543 printf("Unknown command: .%s\n", mac); 544 } 545 546 /* 547 * We have a .de xx line in "line". Add xx to the list of known commands. 548 */ 549 void 550 addcmd(char *line) 551 { 552 char *mac; 553 554 /* grab the macro being defined */ 555 mac = line+4; 556 while (isspace((unsigned char)*mac)) 557 mac++; 558 if (*mac == 0) { 559 pe(lineno); 560 printf("illegal define: %s\n", line); 561 return; 562 } 563 mac[2] = 0; 564 if (isspace((unsigned char)mac[1]) || mac[1] == '\\') 565 mac[1] = 0; 566 if (ncmds >= MAXCMDS) { 567 printf("Only %d known commands allowed\n", MAXCMDS); 568 exit(1); 569 } 570 addmac(mac); 571 } 572 573 /* 574 * Add mac to the list. We should really have some kind of tree 575 * structure here but this is a quick-and-dirty job and I just don't 576 * have time to mess with it. (I wonder if this will come back to haunt 577 * me someday?) Anyway, I claim that .de is fairly rare in user 578 * nroff programs, and the register loop below is pretty fast. 579 */ 580 void 581 addmac(char *mac) 582 { 583 char **src, **dest, **loc; 584 585 if (binsrch(mac) >= 0){ /* it's OK to redefine something */ 586 #ifdef DEBUG 587 printf("binsrch(%s) -> already in table\n", mac); 588 #endif /* DEBUG */ 589 return; 590 } 591 /* binsrch sets slot as a side effect */ 592 #ifdef DEBUG 593 printf("binsrch(%s) -> %d\n", mac, slot); 594 #endif 595 loc = &knowncmds[slot]; 596 src = &knowncmds[ncmds-1]; 597 dest = src+1; 598 while (dest > loc) 599 *dest-- = *src--; 600 if ((*loc = strdup(mac)) == NULL) 601 err(1, "strdup"); 602 ncmds++; 603 #ifdef DEBUG 604 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], 605 knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], 606 knowncmds[slot+2], ncmds); 607 #endif 608 } 609 610 /* 611 * Do a binary search in knowncmds for mac. 612 * If found, return the index. If not, return -1. 613 */ 614 int 615 binsrch(char *mac) 616 { 617 char *p; /* pointer to current cmd in list */ 618 int d; /* difference if any */ 619 int mid; /* mid point in binary search */ 620 int top, bot; /* boundaries of bin search, inclusive */ 621 622 top = ncmds-1; 623 bot = 0; 624 while (top >= bot) { 625 mid = (top+bot)/2; 626 p = knowncmds[mid]; 627 d = p[0] - mac[0]; 628 if (d == 0) 629 d = p[1] - mac[1]; 630 if (d == 0) 631 return mid; 632 if (d < 0) 633 bot = mid + 1; 634 else 635 top = mid - 1; 636 } 637 slot = bot; /* place it would have gone */ 638 return -1; 639 } 640