xref: /netbsd-src/usr.bin/checknr/checknr.c (revision 2a399c6883d870daece976daec6ffa7bb7f934ce)
1 /*	$NetBSD: checknr.c,v 1.5 1997/10/18 12:38:13 lukem Exp $	*/
2 
3 /*
4  * Copyright (c) 1980, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by the University of
18  *	California, Berkeley and its contributors.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/cdefs.h>
37 #ifndef lint
38 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\
39 	The Regents of the University of California.  All rights reserved.\n");
40 #endif /* not lint */
41 
42 #ifndef lint
43 #if 0
44 static char sccsid[] = "@(#)checknr.c	8.1 (Berkeley) 6/6/93";
45 #else
46 __RCSID("$NetBSD: checknr.c,v 1.5 1997/10/18 12:38:13 lukem Exp $");
47 #endif
48 #endif /* not lint */
49 
50 /*
51  * checknr: check an nroff/troff input file for matching macro calls.
52  * we also attempt to match size and font changes, but only the embedded
53  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
54  * later but for now think of these restrictions as contributions to
55  * structured typesetting.
56  */
57 #include <ctype.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 
62 #define MAXSTK	100	/* Stack size */
63 #define MAXBR	100	/* Max number of bracket pairs known */
64 #define MAXCMDS	500	/* Max number of commands known */
65 
66 /*
67  * The stack on which we remember what we've seen so far.
68  */
69 struct stkstr {
70 	int opno;	/* number of opening bracket */
71 	int pl;		/* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
72 	int parm;	/* parm to size, font, etc */
73 	int lno;	/* line number the thing came in in */
74 } stk[MAXSTK];
75 int stktop;
76 
77 /*
78  * The kinds of opening and closing brackets.
79  */
80 struct brstr {
81 	char *opbr;
82 	char *clbr;
83 } br[MAXBR] = {
84 	/* A few bare bones troff commands */
85 #define SZ	0
86 	{ "sz",	"sz"},	/* also \s */
87 #define FT	1
88 	{ "ft",	"ft"},	/* also \f */
89 	/* the -mm package */
90 	{"AL",	"LE"},
91 	{"AS",	"AE"},
92 	{"BL",	"LE"},
93 	{"BS",	"BE"},
94 	{"DF",	"DE"},
95 	{"DL",	"LE"},
96 	{"DS",	"DE"},
97 	{"FS",	"FE"},
98 	{"ML",	"LE"},
99 	{"NS",	"NE"},
100 	{"RL",	"LE"},
101 	{"VL",	"LE"},
102 	/* the -ms package */
103 	{"AB",	"AE"},
104 	{"BD",	"DE"},
105 	{"CD",	"DE"},
106 	{"DS",	"DE"},
107 	{"FS",	"FE"},
108 	{"ID",	"DE"},
109 	{"KF",	"KE"},
110 	{"KS",	"KE"},
111 	{"LD",	"DE"},
112 	{"LG",	"NL"},
113 	{"QS",	"QE"},
114 	{"RS",	"RE"},
115 	{"SM",	"NL"},
116 	{"XA",	"XE"},
117 	{"XS",	"XE"},
118 	/* The -me package */
119 	{"(b",	")b"},
120 	{"(c",	")c"},
121 	{"(d",	")d"},
122 	{"(f",	")f"},
123 	{"(l",	")l"},
124 	{"(q",	")q"},
125 	{"(x",	")x"},
126 	{"(z",	")z"},
127 	/* Things needed by preprocessors */
128 	{"EQ",	"EN"},
129 	{"TS",	"TE"},
130 	/* Refer */
131 	{"[",	"]"},
132 	{0,	0},
133 };
134 
135 /*
136  * All commands known to nroff, plus macro packages.
137  * Used so we can complain about unrecognized commands.
138  */
139 char *knowncmds[MAXCMDS] = {
140 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
141 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
142 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
143 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
144 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
145 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
146 "D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
147 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
148 "FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
149 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
150 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
151 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
152 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
153 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
154 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
155 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
156 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
157 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
158 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
159 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
160 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
161 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
162 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
163 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
164 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
165 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
166 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
167 "q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
168 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
169 "ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
170 "yr", 0
171 };
172 
173 int	lineno;		/* current line number in input file */
174 char	line[256];	/* the current line */
175 char	*cfilename;	/* name of current file */
176 int	nfiles;		/* number of files to process */
177 int	fflag;		/* -f: ignore \f */
178 int	sflag;		/* -s: ignore \s */
179 int	ncmds;		/* size of knowncmds */
180 int	slot;		/* slot in knowncmds found by binsrch */
181 
182 void	addcmd __P((char *));
183 void	addmac __P((char *));
184 int	binsrch __P((char *));
185 void	checkknown __P((char *));
186 void	chkcmd __P((char *, char *));
187 void	complain __P((int));
188 int	eq __P((const void *, const void *));
189 int	main __P((int, char **));
190 void	nomatch __P((char *));
191 void	pe __P((int));
192 void	process __P((FILE *));
193 void	prop __P((int));
194 void	usage __P((void));
195 
196 int
197 main(argc, argv)
198 	int argc;
199 	char **argv;
200 {
201 	FILE *f;
202 	int i;
203 	char *cp;
204 	char b1[4];
205 
206 	/* Figure out how many known commands there are */
207 	while (knowncmds[ncmds])
208 		ncmds++;
209 	while (argc > 1 && argv[1][0] == '-') {
210 		switch(argv[1][1]) {
211 
212 		/* -a: add pairs of macros */
213 		case 'a':
214 			i = strlen(argv[1]) - 2;
215 			if (i % 6 != 0)
216 				usage();
217 			/* look for empty macro slots */
218 			for (i=0; br[i].opbr; i++)
219 				;
220 			for (cp=argv[1]+3; cp[-1]; cp += 6) {
221 				br[i].opbr = malloc(3);
222 				strncpy(br[i].opbr, cp, 2);
223 				br[i].clbr = malloc(3);
224 				strncpy(br[i].clbr, cp+3, 2);
225 				addmac(br[i].opbr);	/* knows pairs are also known cmds */
226 				addmac(br[i].clbr);
227 				i++;
228 			}
229 			break;
230 
231 		/* -c: add known commands */
232 		case 'c':
233 			i = strlen(argv[1]) - 2;
234 			if (i % 3 != 0)
235 				usage();
236 			for (cp=argv[1]+3; cp[-1]; cp += 3) {
237 				if (cp[2] && cp[2] != '.')
238 					usage();
239 				strncpy(b1, cp, 2);
240 				addmac(b1);
241 			}
242 			break;
243 
244 		/* -f: ignore font changes */
245 		case 'f':
246 			fflag = 1;
247 			break;
248 
249 		/* -s: ignore size changes */
250 		case 's':
251 			sflag = 1;
252 			break;
253 		default:
254 			usage();
255 		}
256 		argc--; argv++;
257 	}
258 
259 	nfiles = argc - 1;
260 
261 	if (nfiles > 0) {
262 		for (i=1; i<argc; i++) {
263 			cfilename = argv[i];
264 			f = fopen(cfilename, "r");
265 			if (f == NULL)
266 				perror(cfilename);
267 			else
268 				process(f);
269 		}
270 	} else {
271 		cfilename = "stdin";
272 		process(stdin);
273 	}
274 	exit(0);
275 }
276 
277 void
278 usage()
279 {
280 	printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
281 	exit(1);
282 }
283 
284 void
285 process(f)
286 	FILE *f;
287 {
288 	int i, n;
289 	char mac[5];	/* The current macro or nroff command */
290 	int pl;
291 
292 	stktop = -1;
293 	for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
294 		if (line[0] == '.') {
295 			/*
296 			 * find and isolate the macro/command name.
297 			 */
298 			strncpy(mac, line+1, 4);
299 			if (isspace(mac[0])) {
300 				pe(lineno);
301 				printf("Empty command\n");
302 			} else if (isspace(mac[1])) {
303 				mac[1] = 0;
304 			} else if (isspace(mac[2])) {
305 				mac[2] = 0;
306 			} else if (mac[0] != '\\' || mac[1] != '\"') {
307 				pe(lineno);
308 				printf("Command too long\n");
309 			}
310 
311 			/*
312 			 * Is it a known command?
313 			 */
314 			checkknown(mac);
315 
316 			/*
317 			 * Should we add it?
318 			 */
319 			if (eq(mac, "de"))
320 				addcmd(line);
321 
322 			chkcmd(line, mac);
323 		}
324 
325 		/*
326 		 * At this point we process the line looking
327 		 * for \s and \f.
328 		 */
329 		for (i=0; line[i]; i++)
330 			if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
331 				if (!sflag && line[++i]=='s') {
332 					pl = line[++i];
333 					if (isdigit(pl)) {
334 						n = pl - '0';
335 						pl = ' ';
336 					} else
337 						n = 0;
338 					while (isdigit(line[++i]))
339 						n = 10 * n + line[i] - '0';
340 					i--;
341 					if (n == 0) {
342 						if (stk[stktop].opno == SZ) {
343 							stktop--;
344 						} else {
345 							pe(lineno);
346 							printf("unmatched \\s0\n");
347 						}
348 					} else {
349 						stk[++stktop].opno = SZ;
350 						stk[stktop].pl = pl;
351 						stk[stktop].parm = n;
352 						stk[stktop].lno = lineno;
353 					}
354 				} else if (!fflag && line[i]=='f') {
355 					n = line[++i];
356 					if (n == 'P') {
357 						if (stk[stktop].opno == FT) {
358 							stktop--;
359 						} else {
360 							pe(lineno);
361 							printf("unmatched \\fP\n");
362 						}
363 					} else {
364 						stk[++stktop].opno = FT;
365 						stk[stktop].pl = 1;
366 						stk[stktop].parm = n;
367 						stk[stktop].lno = lineno;
368 					}
369 				}
370 			}
371 	}
372 	/*
373 	 * We've hit the end and look at all this stuff that hasn't been
374 	 * matched yet!  Complain, complain.
375 	 */
376 	for (i=stktop; i>=0; i--) {
377 		complain(i);
378 	}
379 }
380 
381 void
382 complain(i)
383 	int i;
384 {
385 	pe(stk[i].lno);
386 	printf("Unmatched ");
387 	prop(i);
388 	printf("\n");
389 }
390 
391 void
392 prop(i)
393 	int i;
394 {
395 	if (stk[i].pl == 0)
396 		printf(".%s", br[stk[i].opno].opbr);
397 	else switch(stk[i].opno) {
398 	case SZ:
399 		printf("\\s%c%d", stk[i].pl, stk[i].parm);
400 		break;
401 	case FT:
402 		printf("\\f%c", stk[i].parm);
403 		break;
404 	default:
405 		printf("Bug: stk[%d].opno = %d = .%s, .%s",
406 			i, stk[i].opno, br[stk[i].opno].opbr,
407 			br[stk[i].opno].clbr);
408 	}
409 }
410 
411 void
412 chkcmd(line, mac)
413 	char *line;
414 	char *mac;
415 {
416 	int i;
417 
418 	/*
419 	 * Check to see if it matches top of stack.
420 	 */
421 	if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
422 		stktop--;	/* OK. Pop & forget */
423 	else {
424 		/* No. Maybe it's an opener */
425 		for (i=0; br[i].opbr; i++) {
426 			if (eq(mac, br[i].opbr)) {
427 				/* Found. Push it. */
428 				stktop++;
429 				stk[stktop].opno = i;
430 				stk[stktop].pl = 0;
431 				stk[stktop].parm = 0;
432 				stk[stktop].lno = lineno;
433 				break;
434 			}
435 			/*
436 			 * Maybe it's an unmatched closer.
437 			 * NOTE: this depends on the fact
438 			 * that none of the closers can be
439 			 * openers too.
440 			 */
441 			if (eq(mac, br[i].clbr)) {
442 				nomatch(mac);
443 				break;
444 			}
445 		}
446 	}
447 }
448 
449 void
450 nomatch(mac)
451 	char *mac;
452 {
453 	int i, j;
454 
455 	/*
456 	 * Look for a match further down on stack
457 	 * If we find one, it suggests that the stuff in
458 	 * between is supposed to match itself.
459 	 */
460 	for (j=stktop; j>=0; j--)
461 		if (eq(mac,br[stk[j].opno].clbr)) {
462 			/* Found.  Make a good diagnostic. */
463 			if (j == stktop-2) {
464 				/*
465 				 * Check for special case \fx..\fR and don't
466 				 * complain.
467 				 */
468 				if (stk[j+1].opno==FT && stk[j+1].parm!='R'
469 				 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
470 					stktop = j -1;
471 					return;
472 				}
473 				/*
474 				 * We have two unmatched frobs.  Chances are
475 				 * they were intended to match, so we mention
476 				 * them together.
477 				 */
478 				pe(stk[j+1].lno);
479 				prop(j+1);
480 				printf(" does not match %d: ", stk[j+2].lno);
481 				prop(j+2);
482 				printf("\n");
483 			} else for (i=j+1; i <= stktop; i++) {
484 				complain(i);
485 			}
486 			stktop = j-1;
487 			return;
488 		}
489 	/* Didn't find one.  Throw this away. */
490 	pe(lineno);
491 	printf("Unmatched .%s\n", mac);
492 }
493 
494 /* eq: are two strings equal? */
495 int
496 eq(s1, s2)
497 	const void *s1, *s2;
498 {
499 	return (strcmp((char *)s1, (char *)s2) == 0);
500 }
501 
502 /* print the first part of an error message, given the line number */
503 void
504 pe(lineno)
505 	int lineno;
506 {
507 	if (nfiles > 1)
508 		printf("%s: ", cfilename);
509 	printf("%d: ", lineno);
510 }
511 
512 void
513 checkknown(mac)
514 	char *mac;
515 {
516 
517 	if (eq(mac, "."))
518 		return;
519 	if (binsrch(mac) >= 0)
520 		return;
521 	if (mac[0] == '\\' && mac[1] == '"')	/* comments */
522 		return;
523 
524 	pe(lineno);
525 	printf("Unknown command: .%s\n", mac);
526 }
527 
528 /*
529  * We have a .de xx line in "line".  Add xx to the list of known commands.
530  */
531 void
532 addcmd(line)
533 	char *line;
534 {
535 	char *mac;
536 
537 	/* grab the macro being defined */
538 	mac = line+4;
539 	while (isspace(*mac))
540 		mac++;
541 	if (*mac == 0) {
542 		pe(lineno);
543 		printf("illegal define: %s\n", line);
544 		return;
545 	}
546 	mac[2] = 0;
547 	if (isspace(mac[1]) || mac[1] == '\\')
548 		mac[1] = 0;
549 	if (ncmds >= MAXCMDS) {
550 		printf("Only %d known commands allowed\n", MAXCMDS);
551 		exit(1);
552 	}
553 	addmac(mac);
554 }
555 
556 /*
557  * Add mac to the list.  We should really have some kind of tree
558  * structure here but this is a quick-and-dirty job and I just don't
559  * have time to mess with it.  (I wonder if this will come back to haunt
560  * me someday?)  Anyway, I claim that .de is fairly rare in user
561  * nroff programs, and the register loop below is pretty fast.
562  */
563 void
564 addmac(mac)
565 	char *mac;
566 {
567 	char **src, **dest, **loc;
568 
569 	if (binsrch(mac) >= 0){	/* it's OK to redefine something */
570 #ifdef DEBUG
571 		printf("binsrch(%s) -> already in table\n", mac);
572 #endif DEBUG
573 		return;
574 	}
575 	/* binsrch sets slot as a side effect */
576 #ifdef DEBUG
577 printf("binsrch(%s) -> %d\n", mac, slot);
578 #endif
579 	loc = &knowncmds[slot];
580 	src = &knowncmds[ncmds-1];
581 	dest = src+1;
582 	while (dest > loc)
583 		*dest-- = *src--;
584 	*loc = malloc(3);
585 	strcpy(*loc, mac);
586 	ncmds++;
587 #ifdef DEBUG
588 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
589 #endif
590 }
591 
592 /*
593  * Do a binary search in knowncmds for mac.
594  * If found, return the index.  If not, return -1.
595  */
596 int
597 binsrch(mac)
598 	char *mac;
599 {
600 	char *p;	/* pointer to current cmd in list */
601 	int d;		/* difference if any */
602 	int mid;	/* mid point in binary search */
603 	int top, bot;	/* boundaries of bin search, inclusive */
604 
605 	top = ncmds-1;
606 	bot = 0;
607 	while (top >= bot) {
608 		mid = (top+bot)/2;
609 		p = knowncmds[mid];
610 		d = p[0] - mac[0];
611 		if (d == 0)
612 			d = p[1] - mac[1];
613 		if (d == 0)
614 			return mid;
615 		if (d < 0)
616 			bot = mid + 1;
617 		else
618 			top = mid - 1;
619 	}
620 	slot = bot;	/* place it would have gone */
621 	return -1;
622 }
623