121548Sdist /*
2*61936Sbostic * Copyright (c) 1980, 1993
3*61936Sbostic * The Regents of the University of California. All rights reserved.
436182Sbostic *
542721Sbostic * %sccs.include.redist.c%
621548Sdist */
721548Sdist
815069Ssam #ifndef lint
9*61936Sbostic static char copyright[] =
10*61936Sbostic "@(#) Copyright (c) 1980, 1993\n\
11*61936Sbostic The Regents of the University of California. All rights reserved.\n";
1236182Sbostic #endif /* not lint */
1321548Sdist
1421548Sdist #ifndef lint
15*61936Sbostic static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 06/06/93";
1636182Sbostic #endif /* not lint */
1721548Sdist
18970Sbill /*
19970Sbill * checknr: check an nroff/troff input file for matching macro calls.
20970Sbill * we also attempt to match size and font changes, but only the embedded
21970Sbill * kind. These must end in \s0 and \fP resp. Maybe more sophistication
22970Sbill * later but for now think of these restrictions as contributions to
23970Sbill * structured typesetting.
24970Sbill */
25970Sbill #include <stdio.h>
26970Sbill #include <ctype.h>
27970Sbill
28970Sbill #define MAXSTK 100 /* Stack size */
29970Sbill #define MAXBR 100 /* Max number of bracket pairs known */
30970Sbill #define MAXCMDS 500 /* Max number of commands known */
31970Sbill
32970Sbill /*
33970Sbill * The stack on which we remember what we've seen so far.
34970Sbill */
35970Sbill struct stkstr {
36970Sbill int opno; /* number of opening bracket */
37970Sbill int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
38970Sbill int parm; /* parm to size, font, etc */
39970Sbill int lno; /* line number the thing came in in */
40970Sbill } stk[MAXSTK];
41970Sbill int stktop;
42970Sbill
43970Sbill /*
44970Sbill * The kinds of opening and closing brackets.
45970Sbill */
46970Sbill struct brstr {
47970Sbill char *opbr;
48970Sbill char *clbr;
49970Sbill } br[MAXBR] = {
50970Sbill /* A few bare bones troff commands */
51970Sbill #define SZ 0
52970Sbill "sz", "sz", /* also \s */
53970Sbill #define FT 1
54970Sbill "ft", "ft", /* also \f */
553574Sroot /* the -mm package */
563574Sroot "AL", "LE",
573574Sroot "AS", "AE",
583574Sroot "BL", "LE",
593574Sroot "BS", "BE",
603574Sroot "DF", "DE",
613574Sroot "DL", "LE",
623574Sroot "DS", "DE",
633574Sroot "FS", "FE",
643574Sroot "ML", "LE",
653574Sroot "NS", "NE",
663574Sroot "RL", "LE",
673574Sroot "VL", "LE",
68970Sbill /* the -ms package */
69970Sbill "AB", "AE",
7015069Ssam "BD", "DE",
713574Sroot "CD", "DE",
723574Sroot "DS", "DE",
73970Sbill "FS", "FE",
74970Sbill "ID", "DE",
753574Sroot "KF", "KE",
76970Sbill "KS", "KE",
773574Sroot "LD", "DE",
783574Sroot "LG", "NL",
79970Sbill "QS", "QE",
803574Sroot "RS", "RE",
813574Sroot "SM", "NL",
8215069Ssam "XA", "XE",
8315069Ssam "XS", "XE",
84970Sbill /* The -me package */
85970Sbill "(b", ")b",
86970Sbill "(c", ")c",
87970Sbill "(d", ")d",
88970Sbill "(f", ")f",
893574Sroot "(l", ")l",
903574Sroot "(q", ")q",
91970Sbill "(x", ")x",
923574Sroot "(z", ")z",
933574Sroot /* Things needed by preprocessors */
943574Sroot "EQ", "EN",
953574Sroot "TS", "TE",
963574Sroot /* Refer */
973574Sroot "[", "]",
98970Sbill 0, 0
99970Sbill };
100970Sbill
101970Sbill /*
1023574Sroot * All commands known to nroff, plus macro packages.
103970Sbill * Used so we can complain about unrecognized commands.
104970Sbill */
105970Sbill char *knowncmds[MAXCMDS] = {
1063574Sroot "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
1073574Sroot "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
1083574Sroot "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
1093574Sroot "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
11015069Ssam "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2",
11115069Ssam "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
11215069Ssam "D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
11315069Ssam "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
11415069Ssam "FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID",
11515069Ssam "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
11615069Ssam "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
11715069Ssam "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
11815069Ssam "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA",
11915069Ssam "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA",
12025561Sbloom "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
12125561Sbloom "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
12225561Sbloom "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0",
12325561Sbloom "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>",
12425561Sbloom "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd",
12525561Sbloom "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
12625561Sbloom "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
12725561Sbloom "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
12825561Sbloom "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
12925561Sbloom "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
13025561Sbloom "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
13125561Sbloom "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
13225561Sbloom "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
13325561Sbloom "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
13425561Sbloom "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
13525561Sbloom "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp",
13625561Sbloom "yr", 0
137970Sbill };
138970Sbill
139970Sbill int lineno; /* current line number in input file */
140970Sbill char line[256]; /* the current line */
141970Sbill char *cfilename; /* name of current file */
142970Sbill int nfiles; /* number of files to process */
143970Sbill int fflag; /* -f: ignore \f */
144970Sbill int sflag; /* -s: ignore \s */
145970Sbill int ncmds; /* size of knowncmds */
146970Sbill int slot; /* slot in knowncmds found by binsrch */
147970Sbill
148970Sbill char *malloc();
149970Sbill
main(argc,argv)150970Sbill main(argc, argv)
151970Sbill int argc;
152970Sbill char **argv;
153970Sbill {
154970Sbill FILE *f;
155970Sbill int i;
156970Sbill char *cp;
1571406Smark char b1[4];
158970Sbill
1591406Smark /* Figure out how many known commands there are */
1601406Smark while (knowncmds[ncmds])
1611406Smark ncmds++;
162970Sbill while (argc > 1 && argv[1][0] == '-') {
163970Sbill switch(argv[1][1]) {
1641406Smark
1651406Smark /* -a: add pairs of macros */
166970Sbill case 'a':
167970Sbill i = strlen(argv[1]) - 2;
1681406Smark if (i % 6 != 0)
1691406Smark usage();
170970Sbill /* look for empty macro slots */
171970Sbill for (i=0; br[i].opbr; i++)
172970Sbill ;
173970Sbill for (cp=argv[1]+3; cp[-1]; cp += 6) {
1741406Smark br[i].opbr = malloc(3);
1751406Smark strncpy(br[i].opbr, cp, 2);
1761406Smark br[i].clbr = malloc(3);
1771406Smark strncpy(br[i].clbr, cp+3, 2);
1781406Smark addmac(br[i].opbr); /* knows pairs are also known cmds */
1791406Smark addmac(br[i].clbr);
180970Sbill i++;
181970Sbill }
182970Sbill break;
1831406Smark
1841406Smark /* -c: add known commands */
1851406Smark case 'c':
1861406Smark i = strlen(argv[1]) - 2;
1871406Smark if (i % 3 != 0)
1881406Smark usage();
1891406Smark for (cp=argv[1]+3; cp[-1]; cp += 3) {
1901406Smark if (cp[2] && cp[2] != '.')
1911406Smark usage();
1921406Smark strncpy(b1, cp, 2);
1931406Smark addmac(b1);
1941406Smark }
1951406Smark break;
1961406Smark
1971406Smark /* -f: ignore font changes */
198970Sbill case 'f':
199970Sbill fflag = 1;
200970Sbill break;
2011406Smark
2021406Smark /* -s: ignore size changes */
203970Sbill case 's':
204970Sbill sflag = 1;
205970Sbill break;
206970Sbill default:
2071406Smark usage();
208970Sbill }
209970Sbill argc--; argv++;
210970Sbill }
211970Sbill
212970Sbill nfiles = argc - 1;
213970Sbill
214970Sbill if (nfiles > 0) {
215970Sbill for (i=1; i<argc; i++) {
216970Sbill cfilename = argv[i];
217970Sbill f = fopen(cfilename, "r");
218970Sbill if (f == NULL)
219970Sbill perror(cfilename);
220970Sbill else
221970Sbill process(f);
222970Sbill }
223970Sbill } else {
224970Sbill cfilename = "stdin";
225970Sbill process(stdin);
226970Sbill }
227970Sbill exit(0);
228970Sbill }
229970Sbill
usage()2301406Smark usage()
2311406Smark {
2321406Smark printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
2331406Smark exit(1);
2341406Smark }
2351406Smark
process(f)236970Sbill process(f)
237970Sbill FILE *f;
238970Sbill {
239970Sbill register int i, n;
240970Sbill char mac[5]; /* The current macro or nroff command */
241970Sbill int pl;
242970Sbill
243970Sbill stktop = -1;
244970Sbill for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
245970Sbill if (line[0] == '.') {
246970Sbill /*
247970Sbill * find and isolate the macro/command name.
248970Sbill */
249970Sbill strncpy(mac, line+1, 4);
250970Sbill if (isspace(mac[0])) {
251970Sbill pe(lineno);
252970Sbill printf("Empty command\n");
253970Sbill } else if (isspace(mac[1])) {
254970Sbill mac[1] = 0;
255970Sbill } else if (isspace(mac[2])) {
256970Sbill mac[2] = 0;
2573574Sroot } else if (mac[0] != '\\' || mac[1] != '\"') {
258970Sbill pe(lineno);
259970Sbill printf("Command too long\n");
260970Sbill }
261970Sbill
262970Sbill /*
263970Sbill * Is it a known command?
264970Sbill */
265970Sbill checkknown(mac);
266970Sbill
267970Sbill /*
268970Sbill * Should we add it?
269970Sbill */
270970Sbill if (eq(mac, "de"))
271970Sbill addcmd(line);
272970Sbill
273970Sbill chkcmd(line, mac);
274970Sbill }
275970Sbill
276970Sbill /*
277970Sbill * At this point we process the line looking
278970Sbill * for \s and \f.
279970Sbill */
280970Sbill for (i=0; line[i]; i++)
281970Sbill if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
282970Sbill if (!sflag && line[++i]=='s') {
283970Sbill pl = line[++i];
284970Sbill if (isdigit(pl)) {
285970Sbill n = pl - '0';
286970Sbill pl = ' ';
287970Sbill } else
288970Sbill n = 0;
289970Sbill while (isdigit(line[++i]))
290970Sbill n = 10 * n + line[i] - '0';
291970Sbill i--;
292970Sbill if (n == 0) {
293970Sbill if (stk[stktop].opno == SZ) {
294970Sbill stktop--;
295970Sbill } else {
296970Sbill pe(lineno);
297970Sbill printf("unmatched \\s0\n");
298970Sbill }
299970Sbill } else {
300970Sbill stk[++stktop].opno = SZ;
301970Sbill stk[stktop].pl = pl;
302970Sbill stk[stktop].parm = n;
303970Sbill stk[stktop].lno = lineno;
304970Sbill }
305970Sbill } else if (!fflag && line[i]=='f') {
306970Sbill n = line[++i];
307970Sbill if (n == 'P') {
308970Sbill if (stk[stktop].opno == FT) {
309970Sbill stktop--;
310970Sbill } else {
311970Sbill pe(lineno);
312970Sbill printf("unmatched \\fP\n");
313970Sbill }
314970Sbill } else {
315970Sbill stk[++stktop].opno = FT;
316970Sbill stk[stktop].pl = 1;
317970Sbill stk[stktop].parm = n;
318970Sbill stk[stktop].lno = lineno;
319970Sbill }
320970Sbill }
321970Sbill }
322970Sbill }
323970Sbill /*
324970Sbill * We've hit the end and look at all this stuff that hasn't been
325970Sbill * matched yet! Complain, complain.
326970Sbill */
327970Sbill for (i=stktop; i>=0; i--) {
328970Sbill complain(i);
329970Sbill }
330970Sbill }
331970Sbill
complain(i)332970Sbill complain(i)
333970Sbill {
334970Sbill pe(stk[i].lno);
335970Sbill printf("Unmatched ");
336970Sbill prop(i);
337970Sbill printf("\n");
338970Sbill }
339970Sbill
prop(i)340970Sbill prop(i)
341970Sbill {
342970Sbill if (stk[i].pl == 0)
343970Sbill printf(".%s", br[stk[i].opno].opbr);
344970Sbill else switch(stk[i].opno) {
345970Sbill case SZ:
346970Sbill printf("\\s%c%d", stk[i].pl, stk[i].parm);
347970Sbill break;
348970Sbill case FT:
349970Sbill printf("\\f%c", stk[i].parm);
350970Sbill break;
351970Sbill default:
352970Sbill printf("Bug: stk[%d].opno = %d = .%s, .%s",
353970Sbill i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
354970Sbill }
355970Sbill }
356970Sbill
chkcmd(line,mac)357970Sbill chkcmd(line, mac)
358970Sbill char *line;
359970Sbill char *mac;
360970Sbill {
361970Sbill register int i, n;
362970Sbill
363970Sbill /*
364970Sbill * Check to see if it matches top of stack.
365970Sbill */
366970Sbill if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
367970Sbill stktop--; /* OK. Pop & forget */
368970Sbill else {
369970Sbill /* No. Maybe it's an opener */
370970Sbill for (i=0; br[i].opbr; i++) {
371970Sbill if (eq(mac, br[i].opbr)) {
372970Sbill /* Found. Push it. */
373970Sbill stktop++;
374970Sbill stk[stktop].opno = i;
375970Sbill stk[stktop].pl = 0;
376970Sbill stk[stktop].parm = 0;
377970Sbill stk[stktop].lno = lineno;
378970Sbill break;
379970Sbill }
380970Sbill /*
381970Sbill * Maybe it's an unmatched closer.
382970Sbill * NOTE: this depends on the fact
383970Sbill * that none of the closers can be
384970Sbill * openers too.
385970Sbill */
386970Sbill if (eq(mac, br[i].clbr)) {
387970Sbill nomatch(mac);
388970Sbill break;
389970Sbill }
390970Sbill }
391970Sbill }
392970Sbill }
393970Sbill
nomatch(mac)394970Sbill nomatch(mac)
395970Sbill char *mac;
396970Sbill {
397970Sbill register int i, j;
398970Sbill
399970Sbill /*
400970Sbill * Look for a match further down on stack
401970Sbill * If we find one, it suggests that the stuff in
402970Sbill * between is supposed to match itself.
403970Sbill */
404970Sbill for (j=stktop; j>=0; j--)
405970Sbill if (eq(mac,br[stk[j].opno].clbr)) {
406970Sbill /* Found. Make a good diagnostic. */
407970Sbill if (j == stktop-2) {
408970Sbill /*
409970Sbill * Check for special case \fx..\fR and don't
410970Sbill * complain.
411970Sbill */
412970Sbill if (stk[j+1].opno==FT && stk[j+1].parm!='R'
413970Sbill && stk[j+2].opno==FT && stk[j+2].parm=='R') {
414970Sbill stktop = j -1;
415970Sbill return;
416970Sbill }
417970Sbill /*
418970Sbill * We have two unmatched frobs. Chances are
419970Sbill * they were intended to match, so we mention
420970Sbill * them together.
421970Sbill */
422970Sbill pe(stk[j+1].lno);
423970Sbill prop(j+1);
424970Sbill printf(" does not match %d: ", stk[j+2].lno);
425970Sbill prop(j+2);
426970Sbill printf("\n");
427970Sbill } else for (i=j+1; i <= stktop; i++) {
428970Sbill complain(i);
429970Sbill }
430970Sbill stktop = j-1;
431970Sbill return;
432970Sbill }
433970Sbill /* Didn't find one. Throw this away. */
434970Sbill pe(lineno);
435970Sbill printf("Unmatched .%s\n", mac);
436970Sbill }
437970Sbill
438970Sbill /* eq: are two strings equal? */
eq(s1,s2)439970Sbill eq(s1, s2)
440970Sbill char *s1, *s2;
441970Sbill {
442970Sbill return (strcmp(s1, s2) == 0);
443970Sbill }
444970Sbill
445970Sbill /* print the first part of an error message, given the line number */
pe(lineno)446970Sbill pe(lineno)
447970Sbill int lineno;
448970Sbill {
449970Sbill if (nfiles > 1)
450970Sbill printf("%s: ", cfilename);
451970Sbill printf("%d: ", lineno);
452970Sbill }
453970Sbill
checkknown(mac)454970Sbill checkknown(mac)
455970Sbill char *mac;
456970Sbill {
457970Sbill
458970Sbill if (eq(mac, "."))
459970Sbill return;
460970Sbill if (binsrch(mac) >= 0)
461970Sbill return;
4623574Sroot if (mac[0] == '\\' && mac[1] == '"') /* comments */
4633574Sroot return;
464970Sbill
465970Sbill pe(lineno);
466970Sbill printf("Unknown command: .%s\n", mac);
467970Sbill }
468970Sbill
469970Sbill /*
470970Sbill * We have a .de xx line in "line". Add xx to the list of known commands.
471970Sbill */
addcmd(line)472970Sbill addcmd(line)
473970Sbill char *line;
474970Sbill {
475970Sbill char *mac;
476970Sbill
477970Sbill /* grab the macro being defined */
478970Sbill mac = line+4;
479970Sbill while (isspace(*mac))
480970Sbill mac++;
481970Sbill if (*mac == 0) {
482970Sbill pe(lineno);
483970Sbill printf("illegal define: %s\n", line);
484970Sbill return;
485970Sbill }
486970Sbill mac[2] = 0;
487970Sbill if (isspace(mac[1]) || mac[1] == '\\')
488970Sbill mac[1] = 0;
489970Sbill if (ncmds >= MAXCMDS) {
490970Sbill printf("Only %d known commands allowed\n", MAXCMDS);
491970Sbill exit(1);
492970Sbill }
4931406Smark addmac(mac);
4941406Smark }
4951406Smark
4961406Smark /*
4971406Smark * Add mac to the list. We should really have some kind of tree
4981406Smark * structure here but this is a quick-and-dirty job and I just don't
4991406Smark * have time to mess with it. (I wonder if this will come back to haunt
5001406Smark * me someday?) Anyway, I claim that .de is fairly rare in user
5011406Smark * nroff programs, and the register loop below is pretty fast.
5021406Smark */
addmac(mac)5031406Smark addmac(mac)
5041406Smark char *mac;
5051406Smark {
5061406Smark register char **src, **dest, **loc;
5071406Smark
50815915Srrh if (binsrch(mac) >= 0){ /* it's OK to redefine something */
50915915Srrh #ifdef DEBUG
51015915Srrh printf("binsrch(%s) -> already in table\n", mac);
51115915Srrh #endif DEBUG
51215915Srrh return;
51315915Srrh }
514970Sbill /* binsrch sets slot as a side effect */
5151406Smark #ifdef DEBUG
5161406Smark printf("binsrch(%s) -> %d\n", mac, slot);
5171406Smark #endif
518970Sbill loc = &knowncmds[slot];
519970Sbill src = &knowncmds[ncmds-1];
520970Sbill dest = src+1;
521970Sbill while (dest > loc)
522970Sbill *dest-- = *src--;
523970Sbill *loc = malloc(3);
524970Sbill strcpy(*loc, mac);
525970Sbill ncmds++;
5261406Smark #ifdef DEBUG
5271406Smark printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
5281406Smark #endif
529970Sbill }
530970Sbill
531970Sbill /*
532970Sbill * Do a binary search in knowncmds for mac.
533970Sbill * If found, return the index. If not, return -1.
534970Sbill */
binsrch(mac)535970Sbill binsrch(mac)
536970Sbill char *mac;
537970Sbill {
538970Sbill register char *p; /* pointer to current cmd in list */
539970Sbill register int d; /* difference if any */
540970Sbill register int mid; /* mid point in binary search */
541970Sbill register int top, bot; /* boundaries of bin search, inclusive */
542970Sbill
543970Sbill top = ncmds-1;
544970Sbill bot = 0;
545970Sbill while (top >= bot) {
546970Sbill mid = (top+bot)/2;
547970Sbill p = knowncmds[mid];
548970Sbill d = p[0] - mac[0];
549970Sbill if (d == 0)
550970Sbill d = p[1] - mac[1];
551970Sbill if (d == 0)
552970Sbill return mid;
553970Sbill if (d < 0)
554970Sbill bot = mid + 1;
555970Sbill else
556970Sbill top = mid - 1;
557970Sbill }
558970Sbill slot = bot; /* place it would have gone */
559970Sbill return -1;
560970Sbill }
561