1 /* $NetBSD: checknr.c,v 1.14 2004/07/09 11:41:26 wiz Exp $ */ 2 3 /* 4 * Copyright (c) 1980, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 #ifndef lint 34 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\ 35 The Regents of the University of California. All rights reserved.\n"); 36 #endif /* not lint */ 37 38 #ifndef lint 39 #if 0 40 static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93"; 41 #else 42 __RCSID("$NetBSD: checknr.c,v 1.14 2004/07/09 11:41:26 wiz Exp $"); 43 #endif 44 #endif /* not lint */ 45 46 /* 47 * checknr: check an nroff/troff input file for matching macro calls. 48 * we also attempt to match size and font changes, but only the embedded 49 * kind. These must end in \s0 and \fP resp. Maybe more sophistication 50 * later but for now think of these restrictions as contributions to 51 * structured typesetting. 52 */ 53 #include <ctype.h> 54 #include <stdio.h> 55 #include <stdlib.h> 56 #include <string.h> 57 58 #define MAXSTK 100 /* Stack size */ 59 #define MAXBR 100 /* Max number of bracket pairs known */ 60 #define MAXCMDS 500 /* Max number of commands known */ 61 62 /* 63 * The stack on which we remember what we've seen so far. 64 */ 65 struct stkstr { 66 int opno; /* number of opening bracket */ 67 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */ 68 int parm; /* parm to size, font, etc */ 69 int lno; /* line number the thing came in in */ 70 } stk[MAXSTK]; 71 int stktop; 72 73 /* 74 * The kinds of opening and closing brackets. 75 */ 76 struct brstr { 77 char *opbr; 78 char *clbr; 79 } br[MAXBR] = { 80 /* A few bare bones troff commands */ 81 #define SZ 0 82 { "sz", "sz"}, /* also \s */ 83 #define FT 1 84 { "ft", "ft"}, /* also \f */ 85 /* the -mm package */ 86 {"AL", "LE"}, 87 {"AS", "AE"}, 88 {"BL", "LE"}, 89 {"BS", "BE"}, 90 {"DF", "DE"}, 91 {"DL", "LE"}, 92 {"DS", "DE"}, 93 {"FS", "FE"}, 94 {"ML", "LE"}, 95 {"NS", "NE"}, 96 {"RL", "LE"}, 97 {"VL", "LE"}, 98 /* the -ms package */ 99 {"AB", "AE"}, 100 {"BD", "DE"}, 101 {"CD", "DE"}, 102 {"DS", "DE"}, 103 {"FS", "FE"}, 104 {"ID", "DE"}, 105 {"KF", "KE"}, 106 {"KS", "KE"}, 107 {"LD", "DE"}, 108 {"LG", "NL"}, 109 {"QS", "QE"}, 110 {"RS", "RE"}, 111 {"SM", "NL"}, 112 {"XA", "XE"}, 113 {"XS", "XE"}, 114 /* The -me package */ 115 {"(b", ")b"}, 116 {"(c", ")c"}, 117 {"(d", ")d"}, 118 {"(f", ")f"}, 119 {"(l", ")l"}, 120 {"(q", ")q"}, 121 {"(x", ")x"}, 122 {"(z", ")z"}, 123 /* The -mdoc package */ 124 {"Ao", "Ac"}, 125 {"Bd", "Ed"}, 126 {"Bk", "Ek"}, 127 {"Bo", "Bc"}, 128 {"Do", "Dc"}, 129 {"Fo", "Fc"}, 130 {"Oo", "Oc"}, 131 {"Po", "Pc"}, 132 {"Qo", "Qc"}, 133 {"Rs", "Re"}, 134 {"So", "Sc"}, 135 {"Xo", "Xc"}, 136 /* Things needed by preprocessors */ 137 {"EQ", "EN"}, 138 {"TS", "TE"}, 139 /* Refer */ 140 {"[", "]"}, 141 {0, 0}, 142 }; 143 144 /* 145 * All commands known to nroff, plus macro packages. 146 * Used so we can complain about unrecognized commands. 147 */ 148 char *knowncmds[MAXCMDS] = { 149 "$c", "$f", "$h", "$p", "$s", "%A", "%B", "%C", "%D", "%I", "%J", "%N", 150 "%O", "%P", "%Q", "%R", "%T", "%V", "(b", "(c", "(d", "(f", "(l", "(q", 151 "(t", "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", 152 ")z", "++", "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", 153 "@F", "@I", "@M", "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", 154 "@r", "@t", "@z", "AB", "AE", "AF", "AI", "AL", "AM", "AS", "AT", 155 "AU", "AX", "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At", "B" , "B1", 156 "B2", "BD", "BE", "BG", "BL", "BS", "BT", "BX", "Bc", "Bd", "Bf", 157 "Bk", "Bl", "Bo", "Bq", "Bsx", "Bx", "C1", "C2", "CD", "CM", "CT", 158 "Cd", "Cm", "D" , "D1", "DA", "DE", "DF", "DL", "DS", "DT", "Db", "Dc", 159 "Dd", "Dl", "Do", "Dq", "Dt", "Dv", "EC", "EF", "EG", "EH", "EM", 160 "EN", "EQ", "EX", "Ec", "Ed", "Ef", "Ek", "El", "Em", "Eo", "Er", 161 "Ev", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", "FQ", 162 "FS", "FV", "FX", "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Ft", "Fx", 163 "H" , "HC", "HD", "HM", "HO", "HU", "I" , "ID", "IE", "IH", "IM", 164 "IP", "IX", "IZ", "Ic", "It", "KD", "KE", "KF", "KQ", "KS", "LB", 165 "LC", "LD", "LE", "LG", "LI", "LP", "Lb", "Li", "MC", "ME", "MF", 166 "MH", "ML", "MR", "MT", "ND", "NE", "NH", "NL", "NP", "NS", "Nd", 167 "Nm", "No", "Ns", "Nx", "OF", "OH", "OK", "OP", "Oc", "Oo", "Op", 168 "Os", "Ot", "Ox", "P" , "P1", "PF", "PH", "PP", "PT", "PX", "PY", 169 "Pa", "Pc", "Pf", "Po", "Pp", "Pq", "QE", "QP", "QS", "Qc", "Ql", 170 "Qo", "Qq", "R" , "RA", "RC", "RE", "RL", "RP", "RQ", "RS", "RT", 171 "Re", "Rs", "S" , "S0", "S2", "S3", "SA", "SG", "SH", "SK", "SM", 172 "SP", "SY", "Sc", "Sh", "Sm", "So", "Sq", "Ss", "St", "Sx", "Sy", 173 "T&", "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP", "TQ", 174 "TR", "TS", "TX", "Tn", "UL", "US", "UX", "Ud", "Ux", "VL", "Va", "Vt", 175 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "Xc", "Xo", 176 "Xr", "[" , "[-", "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>", 177 "[]", "\\{", "\\}", "]" , "]-", "]<", "]>", "][", "ab", "ac", "ad", "af", "am", 178 "ar", "as", "b" , "ba", "bc", "bd", "bi", "bl", "bp", "br", "bx", 179 "c.", "c2", "cc", "ce", "cf", "ch", "cs", "ct", "cu", "da", "de", 180 "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", "ef", "eh", "el", 181 "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", "fp", "ft", 182 "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i" , "ie", 183 "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln", 184 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", 185 "n1", "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", 186 "ns", "nx", "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", 187 "po", "pp", "ps", "q" , "r" , "rb", "rd", "re", "rm", "rn", "ro", 188 "rr", "rs", "rt", "sb", "sc", "sh", "sk", "so", "sp", "ss", "st", 189 "sv", "sz", "ta", "tc", "th", "ti", "tl", "tm", "tp", "tr", "u", 190 "uf", "uh", "ul", "vs", "wh", "xp", "yr", 0 191 }; 192 193 int lineno; /* current line number in input file */ 194 char *cfilename; /* name of current file */ 195 int nfiles; /* number of files to process */ 196 int fflag; /* -f: ignore \f */ 197 int sflag; /* -s: ignore \s */ 198 int ncmds; /* size of knowncmds */ 199 int slot; /* slot in knowncmds found by binsrch */ 200 201 void addcmd(char *); 202 void addmac(char *); 203 int binsrch(char *); 204 void checkknown(char *); 205 void chkcmd(char *, char *); 206 void complain(int); 207 int eq(const void *, const void *); 208 int main(int, char **); 209 void nomatch(char *); 210 void pe(int); 211 void process(FILE *); 212 void prop(int); 213 void usage(void); 214 215 int 216 main(int argc, char **argv) 217 { 218 FILE *f; 219 int i; 220 char *cp; 221 char b1[4]; 222 223 /* Figure out how many known commands there are */ 224 while (knowncmds[ncmds]) 225 ncmds++; 226 while (argc > 1 && argv[1][0] == '-') { 227 switch(argv[1][1]) { 228 229 /* -a: add pairs of macros */ 230 case 'a': 231 i = strlen(argv[1]) - 2; 232 if (i % 6 != 0) 233 usage(); 234 /* look for empty macro slots */ 235 for (i=0; br[i].opbr; i++) 236 ; 237 for (cp=argv[1]+3; cp[-1]; cp += 6) { 238 br[i].opbr = malloc(3); 239 strncpy(br[i].opbr, cp, 2); 240 br[i].clbr = malloc(3); 241 strncpy(br[i].clbr, cp+3, 2); 242 addmac(br[i].opbr); /* knows pairs are also known cmds */ 243 addmac(br[i].clbr); 244 i++; 245 } 246 break; 247 248 /* -c: add known commands */ 249 case 'c': 250 i = strlen(argv[1]) - 2; 251 if (i % 3 != 0) 252 usage(); 253 for (cp=argv[1]+3; cp[-1]; cp += 3) { 254 if (cp[2] && cp[2] != '.') 255 usage(); 256 strncpy(b1, cp, 2); 257 addmac(b1); 258 } 259 break; 260 261 /* -f: ignore font changes */ 262 case 'f': 263 fflag = 1; 264 break; 265 266 /* -s: ignore size changes */ 267 case 's': 268 sflag = 1; 269 break; 270 default: 271 usage(); 272 } 273 argc--; argv++; 274 } 275 276 nfiles = argc - 1; 277 278 if (nfiles > 0) { 279 for (i=1; i<argc; i++) { 280 cfilename = argv[i]; 281 f = fopen(cfilename, "r"); 282 if (f == NULL) 283 perror(cfilename); 284 else { 285 process(f); 286 fclose(f); 287 } 288 } 289 } else { 290 cfilename = "stdin"; 291 process(stdin); 292 } 293 exit(0); 294 } 295 296 void 297 usage(void) 298 { 299 (void)fprintf(stderr, 300 "usage: %s [-fs] [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] file\n", 301 getprogname()); 302 exit(1); 303 } 304 305 void 306 process(FILE *f) 307 { 308 int i, n; 309 char line[256]; /* the current line */ 310 char mac[5]; /* The current macro or nroff command */ 311 int pl; 312 313 stktop = -1; 314 for (lineno = 1; fgets(line, sizeof line, f); lineno++) { 315 if (line[0] == '.') { 316 /* 317 * find and isolate the macro/command name. 318 */ 319 strncpy(mac, line+1, 4); 320 if (isspace((unsigned char)mac[0])) { 321 pe(lineno); 322 printf("Empty command\n"); 323 } else if (isspace((unsigned char)mac[1])) { 324 mac[1] = 0; 325 } else if (isspace((unsigned char)mac[2])) { 326 mac[2] = 0; 327 } else if (mac[0] != '\\' || mac[1] != '\"') { 328 pe(lineno); 329 printf("Command too long\n"); 330 } 331 332 /* 333 * Is it a known command? 334 */ 335 checkknown(mac); 336 337 /* 338 * Should we add it? 339 */ 340 if (eq(mac, "de")) 341 addcmd(line); 342 343 chkcmd(line, mac); 344 } 345 346 /* 347 * At this point we process the line looking 348 * for \s and \f. 349 */ 350 for (i=0; line[i]; i++) 351 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) { 352 if (!sflag && line[++i]=='s') { 353 pl = line[++i]; 354 if (isdigit((unsigned char)pl)) { 355 n = pl - '0'; 356 pl = ' '; 357 } else 358 n = 0; 359 while (isdigit((unsigned char)line[++i])) 360 n = 10 * n + line[i] - '0'; 361 i--; 362 if (n == 0) { 363 if (stk[stktop].opno == SZ) { 364 stktop--; 365 } else { 366 pe(lineno); 367 printf("unmatched \\s0\n"); 368 } 369 } else { 370 stk[++stktop].opno = SZ; 371 stk[stktop].pl = pl; 372 stk[stktop].parm = n; 373 stk[stktop].lno = lineno; 374 } 375 } else if (!fflag && line[i]=='f') { 376 n = line[++i]; 377 if (n == 'P') { 378 if (stk[stktop].opno == FT) { 379 stktop--; 380 } else { 381 pe(lineno); 382 printf("unmatched \\fP\n"); 383 } 384 } else { 385 stk[++stktop].opno = FT; 386 stk[stktop].pl = 1; 387 stk[stktop].parm = n; 388 stk[stktop].lno = lineno; 389 } 390 } 391 } 392 } 393 /* 394 * We've hit the end and look at all this stuff that hasn't been 395 * matched yet! Complain, complain. 396 */ 397 for (i=stktop; i>=0; i--) { 398 complain(i); 399 } 400 } 401 402 void 403 complain(int i) 404 { 405 pe(stk[i].lno); 406 printf("Unmatched "); 407 prop(i); 408 printf("\n"); 409 } 410 411 void 412 prop(int i) 413 { 414 if (stk[i].pl == 0) 415 printf(".%s", br[stk[i].opno].opbr); 416 else switch(stk[i].opno) { 417 case SZ: 418 printf("\\s%c%d", stk[i].pl, stk[i].parm); 419 break; 420 case FT: 421 printf("\\f%c", stk[i].parm); 422 break; 423 default: 424 printf("Bug: stk[%d].opno = %d = .%s, .%s", 425 i, stk[i].opno, br[stk[i].opno].opbr, 426 br[stk[i].opno].clbr); 427 } 428 } 429 430 void 431 chkcmd(char *line, char *mac) 432 { 433 int i; 434 435 /* 436 * Check to see if it matches top of stack. 437 */ 438 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr)) 439 stktop--; /* OK. Pop & forget */ 440 else { 441 /* No. Maybe it's an opener */ 442 for (i=0; br[i].opbr; i++) { 443 if (eq(mac, br[i].opbr)) { 444 /* Found. Push it. */ 445 stktop++; 446 stk[stktop].opno = i; 447 stk[stktop].pl = 0; 448 stk[stktop].parm = 0; 449 stk[stktop].lno = lineno; 450 break; 451 } 452 /* 453 * Maybe it's an unmatched closer. 454 * NOTE: this depends on the fact 455 * that none of the closers can be 456 * openers too. 457 */ 458 if (eq(mac, br[i].clbr)) { 459 nomatch(mac); 460 break; 461 } 462 } 463 } 464 } 465 466 void 467 nomatch(char *mac) 468 { 469 int i, j; 470 471 /* 472 * Look for a match further down on stack 473 * If we find one, it suggests that the stuff in 474 * between is supposed to match itself. 475 */ 476 for (j=stktop; j>=0; j--) 477 if (eq(mac,br[stk[j].opno].clbr)) { 478 /* Found. Make a good diagnostic. */ 479 if (j == stktop-2) { 480 /* 481 * Check for special case \fx..\fR and don't 482 * complain. 483 */ 484 if (stk[j+1].opno==FT && stk[j+1].parm!='R' 485 && stk[j+2].opno==FT && stk[j+2].parm=='R') { 486 stktop = j -1; 487 return; 488 } 489 /* 490 * We have two unmatched frobs. Chances are 491 * they were intended to match, so we mention 492 * them together. 493 */ 494 pe(stk[j+1].lno); 495 prop(j+1); 496 printf(" does not match %d: ", stk[j+2].lno); 497 prop(j+2); 498 printf("\n"); 499 } else for (i=j+1; i <= stktop; i++) { 500 complain(i); 501 } 502 stktop = j-1; 503 return; 504 } 505 /* Didn't find one. Throw this away. */ 506 pe(lineno); 507 printf("Unmatched .%s\n", mac); 508 } 509 510 /* eq: are two strings equal? */ 511 int 512 eq(const void *s1, const void *s2) 513 { 514 return (strcmp((char *)s1, (char *)s2) == 0); 515 } 516 517 /* print the first part of an error message, given the line number */ 518 void 519 pe(int pelineno) 520 { 521 if (nfiles > 1) 522 printf("%s: ", cfilename); 523 printf("%d: ", pelineno); 524 } 525 526 void 527 checkknown(char *mac) 528 { 529 530 if (eq(mac, ".")) 531 return; 532 if (binsrch(mac) >= 0) 533 return; 534 if (mac[0] == '\\' && mac[1] == '"') /* comments */ 535 return; 536 537 pe(lineno); 538 printf("Unknown command: .%s\n", mac); 539 } 540 541 /* 542 * We have a .de xx line in "line". Add xx to the list of known commands. 543 */ 544 void 545 addcmd(char *line) 546 { 547 char *mac; 548 549 /* grab the macro being defined */ 550 mac = line+4; 551 while (isspace((unsigned char)*mac)) 552 mac++; 553 if (*mac == 0) { 554 pe(lineno); 555 printf("illegal define: %s\n", line); 556 return; 557 } 558 mac[2] = 0; 559 if (isspace((unsigned char)mac[1]) || mac[1] == '\\') 560 mac[1] = 0; 561 if (ncmds >= MAXCMDS) { 562 printf("Only %d known commands allowed\n", MAXCMDS); 563 exit(1); 564 } 565 addmac(mac); 566 } 567 568 /* 569 * Add mac to the list. We should really have some kind of tree 570 * structure here but this is a quick-and-dirty job and I just don't 571 * have time to mess with it. (I wonder if this will come back to haunt 572 * me someday?) Anyway, I claim that .de is fairly rare in user 573 * nroff programs, and the register loop below is pretty fast. 574 */ 575 void 576 addmac(char *mac) 577 { 578 char **src, **dest, **loc; 579 580 if (binsrch(mac) >= 0){ /* it's OK to redefine something */ 581 #ifdef DEBUG 582 printf("binsrch(%s) -> already in table\n", mac); 583 #endif /* DEBUG */ 584 return; 585 } 586 /* binsrch sets slot as a side effect */ 587 #ifdef DEBUG 588 printf("binsrch(%s) -> %d\n", mac, slot); 589 #endif 590 loc = &knowncmds[slot]; 591 src = &knowncmds[ncmds-1]; 592 dest = src+1; 593 while (dest > loc) 594 *dest-- = *src--; 595 *loc = malloc(3); 596 strcpy(*loc, mac); 597 ncmds++; 598 #ifdef DEBUG 599 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], 600 knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], 601 knowncmds[slot+2], ncmds); 602 #endif 603 } 604 605 /* 606 * Do a binary search in knowncmds for mac. 607 * If found, return the index. If not, return -1. 608 */ 609 int 610 binsrch(char *mac) 611 { 612 char *p; /* pointer to current cmd in list */ 613 int d; /* difference if any */ 614 int mid; /* mid point in binary search */ 615 int top, bot; /* boundaries of bin search, inclusive */ 616 617 top = ncmds-1; 618 bot = 0; 619 while (top >= bot) { 620 mid = (top+bot)/2; 621 p = knowncmds[mid]; 622 d = p[0] - mac[0]; 623 if (d == 0) 624 d = p[1] - mac[1]; 625 if (d == 0) 626 return mid; 627 if (d < 0) 628 bot = mid + 1; 629 else 630 top = mid - 1; 631 } 632 slot = bot; /* place it would have gone */ 633 return -1; 634 } 635