xref: /netbsd-src/usr.bin/checknr/checknr.c (revision b78fc2203001647f0afe30bd3671ea8ab8183534)
1 /*	$NetBSD: checknr.c,v 1.9 2002/01/21 16:46:37 wiz Exp $	*/
2 
3 /*
4  * Copyright (c) 1980, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by the University of
18  *	California, Berkeley and its contributors.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/cdefs.h>
37 #ifndef lint
38 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\
39 	The Regents of the University of California.  All rights reserved.\n");
40 #endif /* not lint */
41 
42 #ifndef lint
43 #if 0
44 static char sccsid[] = "@(#)checknr.c	8.1 (Berkeley) 6/6/93";
45 #else
46 __RCSID("$NetBSD: checknr.c,v 1.9 2002/01/21 16:46:37 wiz Exp $");
47 #endif
48 #endif /* not lint */
49 
50 /*
51  * checknr: check an nroff/troff input file for matching macro calls.
52  * we also attempt to match size and font changes, but only the embedded
53  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
54  * later but for now think of these restrictions as contributions to
55  * structured typesetting.
56  */
57 #include <ctype.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 
62 #define MAXSTK	100	/* Stack size */
63 #define MAXBR	100	/* Max number of bracket pairs known */
64 #define MAXCMDS	500	/* Max number of commands known */
65 
66 /*
67  * The stack on which we remember what we've seen so far.
68  */
69 struct stkstr {
70 	int opno;	/* number of opening bracket */
71 	int pl;		/* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
72 	int parm;	/* parm to size, font, etc */
73 	int lno;	/* line number the thing came in in */
74 } stk[MAXSTK];
75 int stktop;
76 
77 /*
78  * The kinds of opening and closing brackets.
79  */
80 struct brstr {
81 	char *opbr;
82 	char *clbr;
83 } br[MAXBR] = {
84 	/* A few bare bones troff commands */
85 #define SZ	0
86 	{ "sz",	"sz"},	/* also \s */
87 #define FT	1
88 	{ "ft",	"ft"},	/* also \f */
89 	/* the -mm package */
90 	{"AL",	"LE"},
91 	{"AS",	"AE"},
92 	{"BL",	"LE"},
93 	{"BS",	"BE"},
94 	{"DF",	"DE"},
95 	{"DL",	"LE"},
96 	{"DS",	"DE"},
97 	{"FS",	"FE"},
98 	{"ML",	"LE"},
99 	{"NS",	"NE"},
100 	{"RL",	"LE"},
101 	{"VL",	"LE"},
102 	/* the -ms package */
103 	{"AB",	"AE"},
104 	{"BD",	"DE"},
105 	{"CD",	"DE"},
106 	{"DS",	"DE"},
107 	{"FS",	"FE"},
108 	{"ID",	"DE"},
109 	{"KF",	"KE"},
110 	{"KS",	"KE"},
111 	{"LD",	"DE"},
112 	{"LG",	"NL"},
113 	{"QS",	"QE"},
114 	{"RS",	"RE"},
115 	{"SM",	"NL"},
116 	{"XA",	"XE"},
117 	{"XS",	"XE"},
118 	/* The -me package */
119 	{"(b",	")b"},
120 	{"(c",	")c"},
121 	{"(d",	")d"},
122 	{"(f",	")f"},
123 	{"(l",	")l"},
124 	{"(q",	")q"},
125 	{"(x",	")x"},
126 	{"(z",	")z"},
127 	/* Things needed by preprocessors */
128 	{"EQ",	"EN"},
129 	{"TS",	"TE"},
130 	/* Refer */
131 	{"[",	"]"},
132 	{0,	0},
133 };
134 
135 /*
136  * All commands known to nroff, plus macro packages.
137  * Used so we can complain about unrecognized commands.
138  */
139 char *knowncmds[MAXCMDS] = {
140 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
141 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
142 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
143 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
144 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
145 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
146 "D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
147 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
148 "FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
149 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
150 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
151 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
152 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
153 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
154 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
155 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
156 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
157 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
158 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
159 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
160 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
161 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
162 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
163 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
164 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
165 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
166 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
167 "q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
168 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
169 "ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
170 "yr", 0
171 };
172 
173 int	lineno;		/* current line number in input file */
174 char	*cfilename;	/* name of current file */
175 int	nfiles;		/* number of files to process */
176 int	fflag;		/* -f: ignore \f */
177 int	sflag;		/* -s: ignore \s */
178 int	ncmds;		/* size of knowncmds */
179 int	slot;		/* slot in knowncmds found by binsrch */
180 
181 void	addcmd(char *);
182 void	addmac(char *);
183 int	binsrch(char *);
184 void	checkknown(char *);
185 void	chkcmd(char *, char *);
186 void	complain(int);
187 int	eq(const void *, const void *);
188 int	main(int, char **);
189 void	nomatch(char *);
190 void	pe(int);
191 void	process(FILE *);
192 void	prop(int);
193 void	usage(void);
194 
195 int
196 main(int argc, char **argv)
197 {
198 	FILE *f;
199 	int i;
200 	char *cp;
201 	char b1[4];
202 
203 	/* Figure out how many known commands there are */
204 	while (knowncmds[ncmds])
205 		ncmds++;
206 	while (argc > 1 && argv[1][0] == '-') {
207 		switch(argv[1][1]) {
208 
209 		/* -a: add pairs of macros */
210 		case 'a':
211 			i = strlen(argv[1]) - 2;
212 			if (i % 6 != 0)
213 				usage();
214 			/* look for empty macro slots */
215 			for (i=0; br[i].opbr; i++)
216 				;
217 			for (cp=argv[1]+3; cp[-1]; cp += 6) {
218 				br[i].opbr = malloc(3);
219 				strncpy(br[i].opbr, cp, 2);
220 				br[i].clbr = malloc(3);
221 				strncpy(br[i].clbr, cp+3, 2);
222 				addmac(br[i].opbr);	/* knows pairs are also known cmds */
223 				addmac(br[i].clbr);
224 				i++;
225 			}
226 			break;
227 
228 		/* -c: add known commands */
229 		case 'c':
230 			i = strlen(argv[1]) - 2;
231 			if (i % 3 != 0)
232 				usage();
233 			for (cp=argv[1]+3; cp[-1]; cp += 3) {
234 				if (cp[2] && cp[2] != '.')
235 					usage();
236 				strncpy(b1, cp, 2);
237 				addmac(b1);
238 			}
239 			break;
240 
241 		/* -f: ignore font changes */
242 		case 'f':
243 			fflag = 1;
244 			break;
245 
246 		/* -s: ignore size changes */
247 		case 's':
248 			sflag = 1;
249 			break;
250 		default:
251 			usage();
252 		}
253 		argc--; argv++;
254 	}
255 
256 	nfiles = argc - 1;
257 
258 	if (nfiles > 0) {
259 		for (i=1; i<argc; i++) {
260 			cfilename = argv[i];
261 			f = fopen(cfilename, "r");
262 			if (f == NULL)
263 				perror(cfilename);
264 			else
265 				process(f);
266 		}
267 	} else {
268 		cfilename = "stdin";
269 		process(stdin);
270 	}
271 	exit(0);
272 }
273 
274 void
275 usage(void)
276 {
277 	printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
278 	exit(1);
279 }
280 
281 void
282 process(FILE *f)
283 {
284 	int i, n;
285 	char line[256];	/* the current line */
286 	char mac[5];	/* The current macro or nroff command */
287 	int pl;
288 
289 	stktop = -1;
290 	for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
291 		if (line[0] == '.') {
292 			/*
293 			 * find and isolate the macro/command name.
294 			 */
295 			strncpy(mac, line+1, 4);
296 			if (isspace((unsigned char)mac[0])) {
297 				pe(lineno);
298 				printf("Empty command\n");
299 			} else if (isspace((unsigned char)mac[1])) {
300 				mac[1] = 0;
301 			} else if (isspace((unsigned char)mac[2])) {
302 				mac[2] = 0;
303 			} else if (mac[0] != '\\' || mac[1] != '\"') {
304 				pe(lineno);
305 				printf("Command too long\n");
306 			}
307 
308 			/*
309 			 * Is it a known command?
310 			 */
311 			checkknown(mac);
312 
313 			/*
314 			 * Should we add it?
315 			 */
316 			if (eq(mac, "de"))
317 				addcmd(line);
318 
319 			chkcmd(line, mac);
320 		}
321 
322 		/*
323 		 * At this point we process the line looking
324 		 * for \s and \f.
325 		 */
326 		for (i=0; line[i]; i++)
327 			if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
328 				if (!sflag && line[++i]=='s') {
329 					pl = line[++i];
330 					if (isdigit((unsigned char)pl)) {
331 						n = pl - '0';
332 						pl = ' ';
333 					} else
334 						n = 0;
335 					while (isdigit((unsigned char)line[++i]))
336 						n = 10 * n + line[i] - '0';
337 					i--;
338 					if (n == 0) {
339 						if (stk[stktop].opno == SZ) {
340 							stktop--;
341 						} else {
342 							pe(lineno);
343 							printf("unmatched \\s0\n");
344 						}
345 					} else {
346 						stk[++stktop].opno = SZ;
347 						stk[stktop].pl = pl;
348 						stk[stktop].parm = n;
349 						stk[stktop].lno = lineno;
350 					}
351 				} else if (!fflag && line[i]=='f') {
352 					n = line[++i];
353 					if (n == 'P') {
354 						if (stk[stktop].opno == FT) {
355 							stktop--;
356 						} else {
357 							pe(lineno);
358 							printf("unmatched \\fP\n");
359 						}
360 					} else {
361 						stk[++stktop].opno = FT;
362 						stk[stktop].pl = 1;
363 						stk[stktop].parm = n;
364 						stk[stktop].lno = lineno;
365 					}
366 				}
367 			}
368 	}
369 	/*
370 	 * We've hit the end and look at all this stuff that hasn't been
371 	 * matched yet!  Complain, complain.
372 	 */
373 	for (i=stktop; i>=0; i--) {
374 		complain(i);
375 	}
376 }
377 
378 void
379 complain(int i)
380 {
381 	pe(stk[i].lno);
382 	printf("Unmatched ");
383 	prop(i);
384 	printf("\n");
385 }
386 
387 void
388 prop(int i)
389 {
390 	if (stk[i].pl == 0)
391 		printf(".%s", br[stk[i].opno].opbr);
392 	else switch(stk[i].opno) {
393 	case SZ:
394 		printf("\\s%c%d", stk[i].pl, stk[i].parm);
395 		break;
396 	case FT:
397 		printf("\\f%c", stk[i].parm);
398 		break;
399 	default:
400 		printf("Bug: stk[%d].opno = %d = .%s, .%s",
401 			i, stk[i].opno, br[stk[i].opno].opbr,
402 			br[stk[i].opno].clbr);
403 	}
404 }
405 
406 void
407 chkcmd(char *line, char *mac)
408 {
409 	int i;
410 
411 	/*
412 	 * Check to see if it matches top of stack.
413 	 */
414 	if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
415 		stktop--;	/* OK. Pop & forget */
416 	else {
417 		/* No. Maybe it's an opener */
418 		for (i=0; br[i].opbr; i++) {
419 			if (eq(mac, br[i].opbr)) {
420 				/* Found. Push it. */
421 				stktop++;
422 				stk[stktop].opno = i;
423 				stk[stktop].pl = 0;
424 				stk[stktop].parm = 0;
425 				stk[stktop].lno = lineno;
426 				break;
427 			}
428 			/*
429 			 * Maybe it's an unmatched closer.
430 			 * NOTE: this depends on the fact
431 			 * that none of the closers can be
432 			 * openers too.
433 			 */
434 			if (eq(mac, br[i].clbr)) {
435 				nomatch(mac);
436 				break;
437 			}
438 		}
439 	}
440 }
441 
442 void
443 nomatch(char *mac)
444 {
445 	int i, j;
446 
447 	/*
448 	 * Look for a match further down on stack
449 	 * If we find one, it suggests that the stuff in
450 	 * between is supposed to match itself.
451 	 */
452 	for (j=stktop; j>=0; j--)
453 		if (eq(mac,br[stk[j].opno].clbr)) {
454 			/* Found.  Make a good diagnostic. */
455 			if (j == stktop-2) {
456 				/*
457 				 * Check for special case \fx..\fR and don't
458 				 * complain.
459 				 */
460 				if (stk[j+1].opno==FT && stk[j+1].parm!='R'
461 				 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
462 					stktop = j -1;
463 					return;
464 				}
465 				/*
466 				 * We have two unmatched frobs.  Chances are
467 				 * they were intended to match, so we mention
468 				 * them together.
469 				 */
470 				pe(stk[j+1].lno);
471 				prop(j+1);
472 				printf(" does not match %d: ", stk[j+2].lno);
473 				prop(j+2);
474 				printf("\n");
475 			} else for (i=j+1; i <= stktop; i++) {
476 				complain(i);
477 			}
478 			stktop = j-1;
479 			return;
480 		}
481 	/* Didn't find one.  Throw this away. */
482 	pe(lineno);
483 	printf("Unmatched .%s\n", mac);
484 }
485 
486 /* eq: are two strings equal? */
487 int
488 eq(const void *s1, const void *s2)
489 {
490 	return (strcmp((char *)s1, (char *)s2) == 0);
491 }
492 
493 /* print the first part of an error message, given the line number */
494 void
495 pe(int pelineno)
496 {
497 	if (nfiles > 1)
498 		printf("%s: ", cfilename);
499 	printf("%d: ", pelineno);
500 }
501 
502 void
503 checkknown(char *mac)
504 {
505 
506 	if (eq(mac, "."))
507 		return;
508 	if (binsrch(mac) >= 0)
509 		return;
510 	if (mac[0] == '\\' && mac[1] == '"')	/* comments */
511 		return;
512 
513 	pe(lineno);
514 	printf("Unknown command: .%s\n", mac);
515 }
516 
517 /*
518  * We have a .de xx line in "line".  Add xx to the list of known commands.
519  */
520 void
521 addcmd(char *line)
522 {
523 	char *mac;
524 
525 	/* grab the macro being defined */
526 	mac = line+4;
527 	while (isspace((unsigned char)*mac))
528 		mac++;
529 	if (*mac == 0) {
530 		pe(lineno);
531 		printf("illegal define: %s\n", line);
532 		return;
533 	}
534 	mac[2] = 0;
535 	if (isspace((unsigned char)mac[1]) || mac[1] == '\\')
536 		mac[1] = 0;
537 	if (ncmds >= MAXCMDS) {
538 		printf("Only %d known commands allowed\n", MAXCMDS);
539 		exit(1);
540 	}
541 	addmac(mac);
542 }
543 
544 /*
545  * Add mac to the list.  We should really have some kind of tree
546  * structure here but this is a quick-and-dirty job and I just don't
547  * have time to mess with it.  (I wonder if this will come back to haunt
548  * me someday?)  Anyway, I claim that .de is fairly rare in user
549  * nroff programs, and the register loop below is pretty fast.
550  */
551 void
552 addmac(char *mac)
553 {
554 	char **src, **dest, **loc;
555 
556 	if (binsrch(mac) >= 0){	/* it's OK to redefine something */
557 #ifdef DEBUG
558 		printf("binsrch(%s) -> already in table\n", mac);
559 #endif /* DEBUG */
560 		return;
561 	}
562 	/* binsrch sets slot as a side effect */
563 #ifdef DEBUG
564 	printf("binsrch(%s) -> %d\n", mac, slot);
565 #endif
566 	loc = &knowncmds[slot];
567 	src = &knowncmds[ncmds-1];
568 	dest = src+1;
569 	while (dest > loc)
570 		*dest-- = *src--;
571 	*loc = malloc(3);
572 	strcpy(*loc, mac);
573 	ncmds++;
574 #ifdef DEBUG
575 	printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2],
576 	    knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1],
577 	    knowncmds[slot+2], ncmds);
578 #endif
579 }
580 
581 /*
582  * Do a binary search in knowncmds for mac.
583  * If found, return the index.  If not, return -1.
584  */
585 int
586 binsrch(char *mac)
587 {
588 	char *p;	/* pointer to current cmd in list */
589 	int d;		/* difference if any */
590 	int mid;	/* mid point in binary search */
591 	int top, bot;	/* boundaries of bin search, inclusive */
592 
593 	top = ncmds-1;
594 	bot = 0;
595 	while (top >= bot) {
596 		mid = (top+bot)/2;
597 		p = knowncmds[mid];
598 		d = p[0] - mac[0];
599 		if (d == 0)
600 			d = p[1] - mac[1];
601 		if (d == 0)
602 			return mid;
603 		if (d < 0)
604 			bot = mid + 1;
605 		else
606 			top = mid - 1;
607 	}
608 	slot = bot;	/* place it would have gone */
609 	return -1;
610 }
611