xref: /netbsd-src/usr.bin/checknr/checknr.c (revision 8e6ab8837d8d6b9198e67c1c445300b483e2f304)
1 /*	$NetBSD: checknr.c,v 1.11 2003/05/09 08:44:57 wiz Exp $	*/
2 
3 /*
4  * Copyright (c) 1980, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by the University of
18  *	California, Berkeley and its contributors.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/cdefs.h>
37 #ifndef lint
38 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\
39 	The Regents of the University of California.  All rights reserved.\n");
40 #endif /* not lint */
41 
42 #ifndef lint
43 #if 0
44 static char sccsid[] = "@(#)checknr.c	8.1 (Berkeley) 6/6/93";
45 #else
46 __RCSID("$NetBSD: checknr.c,v 1.11 2003/05/09 08:44:57 wiz Exp $");
47 #endif
48 #endif /* not lint */
49 
50 /*
51  * checknr: check an nroff/troff input file for matching macro calls.
52  * we also attempt to match size and font changes, but only the embedded
53  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
54  * later but for now think of these restrictions as contributions to
55  * structured typesetting.
56  */
57 #include <ctype.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 
62 #define MAXSTK	100	/* Stack size */
63 #define MAXBR	100	/* Max number of bracket pairs known */
64 #define MAXCMDS	500	/* Max number of commands known */
65 
66 /*
67  * The stack on which we remember what we've seen so far.
68  */
69 struct stkstr {
70 	int opno;	/* number of opening bracket */
71 	int pl;		/* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
72 	int parm;	/* parm to size, font, etc */
73 	int lno;	/* line number the thing came in in */
74 } stk[MAXSTK];
75 int stktop;
76 
77 /*
78  * The kinds of opening and closing brackets.
79  */
80 struct brstr {
81 	char *opbr;
82 	char *clbr;
83 } br[MAXBR] = {
84 	/* A few bare bones troff commands */
85 #define SZ	0
86 	{ "sz",	"sz"},	/* also \s */
87 #define FT	1
88 	{ "ft",	"ft"},	/* also \f */
89 	/* the -mm package */
90 	{"AL",	"LE"},
91 	{"AS",	"AE"},
92 	{"BL",	"LE"},
93 	{"BS",	"BE"},
94 	{"DF",	"DE"},
95 	{"DL",	"LE"},
96 	{"DS",	"DE"},
97 	{"FS",	"FE"},
98 	{"ML",	"LE"},
99 	{"NS",	"NE"},
100 	{"RL",	"LE"},
101 	{"VL",	"LE"},
102 	/* the -ms package */
103 	{"AB",	"AE"},
104 	{"BD",	"DE"},
105 	{"CD",	"DE"},
106 	{"DS",	"DE"},
107 	{"FS",	"FE"},
108 	{"ID",	"DE"},
109 	{"KF",	"KE"},
110 	{"KS",	"KE"},
111 	{"LD",	"DE"},
112 	{"LG",	"NL"},
113 	{"QS",	"QE"},
114 	{"RS",	"RE"},
115 	{"SM",	"NL"},
116 	{"XA",	"XE"},
117 	{"XS",	"XE"},
118 	/* The -me package */
119 	{"(b",	")b"},
120 	{"(c",	")c"},
121 	{"(d",	")d"},
122 	{"(f",	")f"},
123 	{"(l",	")l"},
124 	{"(q",	")q"},
125 	{"(x",	")x"},
126 	{"(z",	")z"},
127 	/* The -mdoc package */
128 	{"Ao",  "Ac"},
129 	{"Bd",  "Ed"},
130 	{"Bk",  "Ek"},
131 	{"Bo",  "Bc"},
132 	{"Do",  "Dc"},
133 	{"Fo",  "Fc"},
134 	{"Oo",  "Oc"},
135 	{"Po",  "Pc"},
136 	{"Qo",  "Qc"},
137 	{"Rs",  "Re"},
138 	{"So",  "Sc"},
139 	{"Xo",  "Xc"},
140 	/* Things needed by preprocessors */
141 	{"EQ",	"EN"},
142 	{"TS",	"TE"},
143 	/* Refer */
144 	{"[",	"]"},
145 	{0,	0},
146 };
147 
148 /*
149  * All commands known to nroff, plus macro packages.
150  * Used so we can complain about unrecognized commands.
151  */
152 char *knowncmds[MAXCMDS] = {
153 "$c", "$f", "$h", "$p", "$s", "%A", "%B", "%C", "%D", "%I", "%J", "%N",
154 "%O", "%P", "%Q", "%R", "%T", "%V", "(b", "(c", "(d", "(f", "(l", "(q",
155 "(t", "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x",
156 ")z", "++", "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D",
157 "@F", "@I", "@M", "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p",
158 "@r", "@t", "@z", "AB", "AE", "AF", "AI", "AL", "AM", "AS", "AT",
159 "AU", "AX", "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At", "B" ,  "B1",
160 "B2", "BD", "BE", "BG", "BL", "BS", "BT", "BX", "Bc", "Bd", "Bf",
161 "Bk", "Bl", "Bo", "Bq", "Bsx", "Bx", "C1", "C2", "CD", "CM", "CT",
162 "Cd", "Cm", "D" , "D1", "DA", "DE", "DF", "DL", "DS", "DT", "Db", "Dc",
163 "Dd", "Dl", "Do", "Dq", "Dt", "Dv", "EC", "EF", "EG", "EH", "EM",
164 "EN", "EQ", "EX", "Ec", "Ed", "Ef", "Ek", "El", "Em", "Eo", "Er",
165 "Ev", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", "FQ",
166 "FS", "FV", "FX", "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Ft", "Fx",
167 "H" , "HC", "HD", "HM", "HO", "HU", "I" , "ID", "IE", "IH", "IM",
168 "IP", "IX", "IZ", "Ic", "It", "KD", "KE", "KF", "KQ", "KS", "LB",
169 "LC", "LD", "LE", "LG", "LI", "LP", "Lb", "Li", "MC", "ME", "MF",
170 "MH", "ML", "MR", "MT", "ND", "NE", "NH", "NL", "NP", "NS", "Nd",
171 "Nm", "No", "Ns", "Nx", "OF", "OH", "OK", "OP", "Oc", "Oo", "Op",
172 "Os", "Ot", "Ox", "P" , "P1", "PF", "PH", "PP", "PT", "PX", "PY",
173 "Pa", "Pc", "Pf", "Po", "Pp", "Pq", "QE", "QP", "QS", "Qc", "Ql",
174 "Qo", "Qq", "R" , "RA", "RC", "RE", "RL", "RP", "RQ", "RS", "RT",
175 "Re", "Rs", "S" , "S0", "S2", "S3", "SA", "SG", "SH", "SK", "SM",
176 "SP", "SY", "Sc", "Sh", "Sm", "So", "Sq", "Ss", "St", "Sx", "Sy",
177 "T&", "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP", "TQ",
178 "TR", "TS", "TX", "Tn", "UL", "US", "UX", "Ud", "Ux", "VL", "Va", "Vt",
179 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "Xc", "Xo",
180 "Xr", "[" , "[-", "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>",
181 "[]", "\\{", "\\}", "]" , "]-", "]<", "]>", "][", "ab", "ac", "ad", "af", "am",
182 "ar", "as", "b" , "ba", "bc", "bd", "bi", "bl", "bp", "br", "bx",
183 "c.", "c2", "cc", "ce", "cf", "ch", "cs", "ct", "cu", "da", "de",
184 "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", "ef", "eh", "el",
185 "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", "fp", "ft",
186 "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i" , "ie",
187 "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
188 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo",
189 "n1", "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr",
190 "ns", "nx", "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn",
191 "po", "pp", "ps", "q" , "r" , "rb", "rd", "re", "rm", "rn", "ro",
192 "rr", "rs", "rt", "sb", "sc", "sh", "sk", "so", "sp", "ss", "st",
193 "sv", "sz", "ta", "tc", "th", "ti", "tl", "tm", "tp", "tr", "u",
194 "uf", "uh", "ul", "vs", "wh", "xp", "yr", 0
195 };
196 
197 int	lineno;		/* current line number in input file */
198 char	*cfilename;	/* name of current file */
199 int	nfiles;		/* number of files to process */
200 int	fflag;		/* -f: ignore \f */
201 int	sflag;		/* -s: ignore \s */
202 int	ncmds;		/* size of knowncmds */
203 int	slot;		/* slot in knowncmds found by binsrch */
204 
205 void	addcmd(char *);
206 void	addmac(char *);
207 int	binsrch(char *);
208 void	checkknown(char *);
209 void	chkcmd(char *, char *);
210 void	complain(int);
211 int	eq(const void *, const void *);
212 int	main(int, char **);
213 void	nomatch(char *);
214 void	pe(int);
215 void	process(FILE *);
216 void	prop(int);
217 void	usage(void);
218 
219 int
220 main(int argc, char **argv)
221 {
222 	FILE *f;
223 	int i;
224 	char *cp;
225 	char b1[4];
226 
227 	/* Figure out how many known commands there are */
228 	while (knowncmds[ncmds])
229 		ncmds++;
230 	while (argc > 1 && argv[1][0] == '-') {
231 		switch(argv[1][1]) {
232 
233 		/* -a: add pairs of macros */
234 		case 'a':
235 			i = strlen(argv[1]) - 2;
236 			if (i % 6 != 0)
237 				usage();
238 			/* look for empty macro slots */
239 			for (i=0; br[i].opbr; i++)
240 				;
241 			for (cp=argv[1]+3; cp[-1]; cp += 6) {
242 				br[i].opbr = malloc(3);
243 				strncpy(br[i].opbr, cp, 2);
244 				br[i].clbr = malloc(3);
245 				strncpy(br[i].clbr, cp+3, 2);
246 				addmac(br[i].opbr);	/* knows pairs are also known cmds */
247 				addmac(br[i].clbr);
248 				i++;
249 			}
250 			break;
251 
252 		/* -c: add known commands */
253 		case 'c':
254 			i = strlen(argv[1]) - 2;
255 			if (i % 3 != 0)
256 				usage();
257 			for (cp=argv[1]+3; cp[-1]; cp += 3) {
258 				if (cp[2] && cp[2] != '.')
259 					usage();
260 				strncpy(b1, cp, 2);
261 				addmac(b1);
262 			}
263 			break;
264 
265 		/* -f: ignore font changes */
266 		case 'f':
267 			fflag = 1;
268 			break;
269 
270 		/* -s: ignore size changes */
271 		case 's':
272 			sflag = 1;
273 			break;
274 		default:
275 			usage();
276 		}
277 		argc--; argv++;
278 	}
279 
280 	nfiles = argc - 1;
281 
282 	if (nfiles > 0) {
283 		for (i=1; i<argc; i++) {
284 			cfilename = argv[i];
285 			f = fopen(cfilename, "r");
286 			if (f == NULL)
287 				perror(cfilename);
288 			else {
289 				process(f);
290 				fclose(f);
291 			}
292 		}
293 	} else {
294 		cfilename = "stdin";
295 		process(stdin);
296 	}
297 	exit(0);
298 }
299 
300 void
301 usage(void)
302 {
303 	printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
304 	exit(1);
305 }
306 
307 void
308 process(FILE *f)
309 {
310 	int i, n;
311 	char line[256];	/* the current line */
312 	char mac[5];	/* The current macro or nroff command */
313 	int pl;
314 
315 	stktop = -1;
316 	for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
317 		if (line[0] == '.') {
318 			/*
319 			 * find and isolate the macro/command name.
320 			 */
321 			strncpy(mac, line+1, 4);
322 			if (isspace((unsigned char)mac[0])) {
323 				pe(lineno);
324 				printf("Empty command\n");
325 			} else if (isspace((unsigned char)mac[1])) {
326 				mac[1] = 0;
327 			} else if (isspace((unsigned char)mac[2])) {
328 				mac[2] = 0;
329 			} else if (mac[0] != '\\' || mac[1] != '\"') {
330 				pe(lineno);
331 				printf("Command too long\n");
332 			}
333 
334 			/*
335 			 * Is it a known command?
336 			 */
337 			checkknown(mac);
338 
339 			/*
340 			 * Should we add it?
341 			 */
342 			if (eq(mac, "de"))
343 				addcmd(line);
344 
345 			chkcmd(line, mac);
346 		}
347 
348 		/*
349 		 * At this point we process the line looking
350 		 * for \s and \f.
351 		 */
352 		for (i=0; line[i]; i++)
353 			if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
354 				if (!sflag && line[++i]=='s') {
355 					pl = line[++i];
356 					if (isdigit((unsigned char)pl)) {
357 						n = pl - '0';
358 						pl = ' ';
359 					} else
360 						n = 0;
361 					while (isdigit((unsigned char)line[++i]))
362 						n = 10 * n + line[i] - '0';
363 					i--;
364 					if (n == 0) {
365 						if (stk[stktop].opno == SZ) {
366 							stktop--;
367 						} else {
368 							pe(lineno);
369 							printf("unmatched \\s0\n");
370 						}
371 					} else {
372 						stk[++stktop].opno = SZ;
373 						stk[stktop].pl = pl;
374 						stk[stktop].parm = n;
375 						stk[stktop].lno = lineno;
376 					}
377 				} else if (!fflag && line[i]=='f') {
378 					n = line[++i];
379 					if (n == 'P') {
380 						if (stk[stktop].opno == FT) {
381 							stktop--;
382 						} else {
383 							pe(lineno);
384 							printf("unmatched \\fP\n");
385 						}
386 					} else {
387 						stk[++stktop].opno = FT;
388 						stk[stktop].pl = 1;
389 						stk[stktop].parm = n;
390 						stk[stktop].lno = lineno;
391 					}
392 				}
393 			}
394 	}
395 	/*
396 	 * We've hit the end and look at all this stuff that hasn't been
397 	 * matched yet!  Complain, complain.
398 	 */
399 	for (i=stktop; i>=0; i--) {
400 		complain(i);
401 	}
402 }
403 
404 void
405 complain(int i)
406 {
407 	pe(stk[i].lno);
408 	printf("Unmatched ");
409 	prop(i);
410 	printf("\n");
411 }
412 
413 void
414 prop(int i)
415 {
416 	if (stk[i].pl == 0)
417 		printf(".%s", br[stk[i].opno].opbr);
418 	else switch(stk[i].opno) {
419 	case SZ:
420 		printf("\\s%c%d", stk[i].pl, stk[i].parm);
421 		break;
422 	case FT:
423 		printf("\\f%c", stk[i].parm);
424 		break;
425 	default:
426 		printf("Bug: stk[%d].opno = %d = .%s, .%s",
427 			i, stk[i].opno, br[stk[i].opno].opbr,
428 			br[stk[i].opno].clbr);
429 	}
430 }
431 
432 void
433 chkcmd(char *line, char *mac)
434 {
435 	int i;
436 
437 	/*
438 	 * Check to see if it matches top of stack.
439 	 */
440 	if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
441 		stktop--;	/* OK. Pop & forget */
442 	else {
443 		/* No. Maybe it's an opener */
444 		for (i=0; br[i].opbr; i++) {
445 			if (eq(mac, br[i].opbr)) {
446 				/* Found. Push it. */
447 				stktop++;
448 				stk[stktop].opno = i;
449 				stk[stktop].pl = 0;
450 				stk[stktop].parm = 0;
451 				stk[stktop].lno = lineno;
452 				break;
453 			}
454 			/*
455 			 * Maybe it's an unmatched closer.
456 			 * NOTE: this depends on the fact
457 			 * that none of the closers can be
458 			 * openers too.
459 			 */
460 			if (eq(mac, br[i].clbr)) {
461 				nomatch(mac);
462 				break;
463 			}
464 		}
465 	}
466 }
467 
468 void
469 nomatch(char *mac)
470 {
471 	int i, j;
472 
473 	/*
474 	 * Look for a match further down on stack
475 	 * If we find one, it suggests that the stuff in
476 	 * between is supposed to match itself.
477 	 */
478 	for (j=stktop; j>=0; j--)
479 		if (eq(mac,br[stk[j].opno].clbr)) {
480 			/* Found.  Make a good diagnostic. */
481 			if (j == stktop-2) {
482 				/*
483 				 * Check for special case \fx..\fR and don't
484 				 * complain.
485 				 */
486 				if (stk[j+1].opno==FT && stk[j+1].parm!='R'
487 				 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
488 					stktop = j -1;
489 					return;
490 				}
491 				/*
492 				 * We have two unmatched frobs.  Chances are
493 				 * they were intended to match, so we mention
494 				 * them together.
495 				 */
496 				pe(stk[j+1].lno);
497 				prop(j+1);
498 				printf(" does not match %d: ", stk[j+2].lno);
499 				prop(j+2);
500 				printf("\n");
501 			} else for (i=j+1; i <= stktop; i++) {
502 				complain(i);
503 			}
504 			stktop = j-1;
505 			return;
506 		}
507 	/* Didn't find one.  Throw this away. */
508 	pe(lineno);
509 	printf("Unmatched .%s\n", mac);
510 }
511 
512 /* eq: are two strings equal? */
513 int
514 eq(const void *s1, const void *s2)
515 {
516 	return (strcmp((char *)s1, (char *)s2) == 0);
517 }
518 
519 /* print the first part of an error message, given the line number */
520 void
521 pe(int pelineno)
522 {
523 	if (nfiles > 1)
524 		printf("%s: ", cfilename);
525 	printf("%d: ", pelineno);
526 }
527 
528 void
529 checkknown(char *mac)
530 {
531 
532 	if (eq(mac, "."))
533 		return;
534 	if (binsrch(mac) >= 0)
535 		return;
536 	if (mac[0] == '\\' && mac[1] == '"')	/* comments */
537 		return;
538 
539 	pe(lineno);
540 	printf("Unknown command: .%s\n", mac);
541 }
542 
543 /*
544  * We have a .de xx line in "line".  Add xx to the list of known commands.
545  */
546 void
547 addcmd(char *line)
548 {
549 	char *mac;
550 
551 	/* grab the macro being defined */
552 	mac = line+4;
553 	while (isspace((unsigned char)*mac))
554 		mac++;
555 	if (*mac == 0) {
556 		pe(lineno);
557 		printf("illegal define: %s\n", line);
558 		return;
559 	}
560 	mac[2] = 0;
561 	if (isspace((unsigned char)mac[1]) || mac[1] == '\\')
562 		mac[1] = 0;
563 	if (ncmds >= MAXCMDS) {
564 		printf("Only %d known commands allowed\n", MAXCMDS);
565 		exit(1);
566 	}
567 	addmac(mac);
568 }
569 
570 /*
571  * Add mac to the list.  We should really have some kind of tree
572  * structure here but this is a quick-and-dirty job and I just don't
573  * have time to mess with it.  (I wonder if this will come back to haunt
574  * me someday?)  Anyway, I claim that .de is fairly rare in user
575  * nroff programs, and the register loop below is pretty fast.
576  */
577 void
578 addmac(char *mac)
579 {
580 	char **src, **dest, **loc;
581 
582 	if (binsrch(mac) >= 0){	/* it's OK to redefine something */
583 #ifdef DEBUG
584 		printf("binsrch(%s) -> already in table\n", mac);
585 #endif /* DEBUG */
586 		return;
587 	}
588 	/* binsrch sets slot as a side effect */
589 #ifdef DEBUG
590 	printf("binsrch(%s) -> %d\n", mac, slot);
591 #endif
592 	loc = &knowncmds[slot];
593 	src = &knowncmds[ncmds-1];
594 	dest = src+1;
595 	while (dest > loc)
596 		*dest-- = *src--;
597 	*loc = malloc(3);
598 	strcpy(*loc, mac);
599 	ncmds++;
600 #ifdef DEBUG
601 	printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2],
602 	    knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1],
603 	    knowncmds[slot+2], ncmds);
604 #endif
605 }
606 
607 /*
608  * Do a binary search in knowncmds for mac.
609  * If found, return the index.  If not, return -1.
610  */
611 int
612 binsrch(char *mac)
613 {
614 	char *p;	/* pointer to current cmd in list */
615 	int d;		/* difference if any */
616 	int mid;	/* mid point in binary search */
617 	int top, bot;	/* boundaries of bin search, inclusive */
618 
619 	top = ncmds-1;
620 	bot = 0;
621 	while (top >= bot) {
622 		mid = (top+bot)/2;
623 		p = knowncmds[mid];
624 		d = p[0] - mac[0];
625 		if (d == 0)
626 			d = p[1] - mac[1];
627 		if (d == 0)
628 			return mid;
629 		if (d < 0)
630 			bot = mid + 1;
631 		else
632 			top = mid - 1;
633 	}
634 	slot = bot;	/* place it would have gone */
635 	return -1;
636 }
637