xref: /netbsd-src/usr.bin/checknr/checknr.c (revision 21e37cc72a480a47828990a439cde7ac9ffaf0c6)
1 /*	$NetBSD: checknr.c,v 1.13 2004/01/05 23:23:34 jmmv Exp $	*/
2 
3 /*
4  * Copyright (c) 1980, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #ifndef lint
34 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\
35 	The Regents of the University of California.  All rights reserved.\n");
36 #endif /* not lint */
37 
38 #ifndef lint
39 #if 0
40 static char sccsid[] = "@(#)checknr.c	8.1 (Berkeley) 6/6/93";
41 #else
42 __RCSID("$NetBSD: checknr.c,v 1.13 2004/01/05 23:23:34 jmmv Exp $");
43 #endif
44 #endif /* not lint */
45 
46 /*
47  * checknr: check an nroff/troff input file for matching macro calls.
48  * we also attempt to match size and font changes, but only the embedded
49  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
50  * later but for now think of these restrictions as contributions to
51  * structured typesetting.
52  */
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
57 
58 #define MAXSTK	100	/* Stack size */
59 #define MAXBR	100	/* Max number of bracket pairs known */
60 #define MAXCMDS	500	/* Max number of commands known */
61 
62 /*
63  * The stack on which we remember what we've seen so far.
64  */
65 struct stkstr {
66 	int opno;	/* number of opening bracket */
67 	int pl;		/* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
68 	int parm;	/* parm to size, font, etc */
69 	int lno;	/* line number the thing came in in */
70 } stk[MAXSTK];
71 int stktop;
72 
73 /*
74  * The kinds of opening and closing brackets.
75  */
76 struct brstr {
77 	char *opbr;
78 	char *clbr;
79 } br[MAXBR] = {
80 	/* A few bare bones troff commands */
81 #define SZ	0
82 	{ "sz",	"sz"},	/* also \s */
83 #define FT	1
84 	{ "ft",	"ft"},	/* also \f */
85 	/* the -mm package */
86 	{"AL",	"LE"},
87 	{"AS",	"AE"},
88 	{"BL",	"LE"},
89 	{"BS",	"BE"},
90 	{"DF",	"DE"},
91 	{"DL",	"LE"},
92 	{"DS",	"DE"},
93 	{"FS",	"FE"},
94 	{"ML",	"LE"},
95 	{"NS",	"NE"},
96 	{"RL",	"LE"},
97 	{"VL",	"LE"},
98 	/* the -ms package */
99 	{"AB",	"AE"},
100 	{"BD",	"DE"},
101 	{"CD",	"DE"},
102 	{"DS",	"DE"},
103 	{"FS",	"FE"},
104 	{"ID",	"DE"},
105 	{"KF",	"KE"},
106 	{"KS",	"KE"},
107 	{"LD",	"DE"},
108 	{"LG",	"NL"},
109 	{"QS",	"QE"},
110 	{"RS",	"RE"},
111 	{"SM",	"NL"},
112 	{"XA",	"XE"},
113 	{"XS",	"XE"},
114 	/* The -me package */
115 	{"(b",	")b"},
116 	{"(c",	")c"},
117 	{"(d",	")d"},
118 	{"(f",	")f"},
119 	{"(l",	")l"},
120 	{"(q",	")q"},
121 	{"(x",	")x"},
122 	{"(z",	")z"},
123 	/* The -mdoc package */
124 	{"Ao",  "Ac"},
125 	{"Bd",  "Ed"},
126 	{"Bk",  "Ek"},
127 	{"Bo",  "Bc"},
128 	{"Do",  "Dc"},
129 	{"Fo",  "Fc"},
130 	{"Oo",  "Oc"},
131 	{"Po",  "Pc"},
132 	{"Qo",  "Qc"},
133 	{"Rs",  "Re"},
134 	{"So",  "Sc"},
135 	{"Xo",  "Xc"},
136 	/* Things needed by preprocessors */
137 	{"EQ",	"EN"},
138 	{"TS",	"TE"},
139 	/* Refer */
140 	{"[",	"]"},
141 	{0,	0},
142 };
143 
144 /*
145  * All commands known to nroff, plus macro packages.
146  * Used so we can complain about unrecognized commands.
147  */
148 char *knowncmds[MAXCMDS] = {
149 "$c", "$f", "$h", "$p", "$s", "%A", "%B", "%C", "%D", "%I", "%J", "%N",
150 "%O", "%P", "%Q", "%R", "%T", "%V", "(b", "(c", "(d", "(f", "(l", "(q",
151 "(t", "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x",
152 ")z", "++", "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D",
153 "@F", "@I", "@M", "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p",
154 "@r", "@t", "@z", "AB", "AE", "AF", "AI", "AL", "AM", "AS", "AT",
155 "AU", "AX", "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At", "B" ,  "B1",
156 "B2", "BD", "BE", "BG", "BL", "BS", "BT", "BX", "Bc", "Bd", "Bf",
157 "Bk", "Bl", "Bo", "Bq", "Bsx", "Bx", "C1", "C2", "CD", "CM", "CT",
158 "Cd", "Cm", "D" , "D1", "DA", "DE", "DF", "DL", "DS", "DT", "Db", "Dc",
159 "Dd", "Dl", "Do", "Dq", "Dt", "Dv", "EC", "EF", "EG", "EH", "EM",
160 "EN", "EQ", "EX", "Ec", "Ed", "Ef", "Ek", "El", "Em", "Eo", "Er",
161 "Ev", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", "FQ",
162 "FS", "FV", "FX", "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Ft", "Fx",
163 "H" , "HC", "HD", "HM", "HO", "HU", "I" , "ID", "IE", "IH", "IM",
164 "IP", "IX", "IZ", "Ic", "It", "KD", "KE", "KF", "KQ", "KS", "LB",
165 "LC", "LD", "LE", "LG", "LI", "LP", "Lb", "Li", "MC", "ME", "MF",
166 "MH", "ML", "MR", "MT", "ND", "NE", "NH", "NL", "NP", "NS", "Nd",
167 "Nm", "No", "Ns", "Nx", "OF", "OH", "OK", "OP", "Oc", "Oo", "Op",
168 "Os", "Ot", "Ox", "P" , "P1", "PF", "PH", "PP", "PT", "PX", "PY",
169 "Pa", "Pc", "Pf", "Po", "Pp", "Pq", "QE", "QP", "QS", "Qc", "Ql",
170 "Qo", "Qq", "R" , "RA", "RC", "RE", "RL", "RP", "RQ", "RS", "RT",
171 "Re", "Rs", "S" , "S0", "S2", "S3", "SA", "SG", "SH", "SK", "SM",
172 "SP", "SY", "Sc", "Sh", "Sm", "So", "Sq", "Ss", "St", "Sx", "Sy",
173 "T&", "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP", "TQ",
174 "TR", "TS", "TX", "Tn", "UL", "US", "UX", "Ud", "Ux", "VL", "Va", "Vt",
175 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "Xc", "Xo",
176 "Xr", "[" , "[-", "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>",
177 "[]", "\\{", "\\}", "]" , "]-", "]<", "]>", "][", "ab", "ac", "ad", "af", "am",
178 "ar", "as", "b" , "ba", "bc", "bd", "bi", "bl", "bp", "br", "bx",
179 "c.", "c2", "cc", "ce", "cf", "ch", "cs", "ct", "cu", "da", "de",
180 "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", "ef", "eh", "el",
181 "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", "fp", "ft",
182 "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i" , "ie",
183 "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
184 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo",
185 "n1", "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr",
186 "ns", "nx", "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn",
187 "po", "pp", "ps", "q" , "r" , "rb", "rd", "re", "rm", "rn", "ro",
188 "rr", "rs", "rt", "sb", "sc", "sh", "sk", "so", "sp", "ss", "st",
189 "sv", "sz", "ta", "tc", "th", "ti", "tl", "tm", "tp", "tr", "u",
190 "uf", "uh", "ul", "vs", "wh", "xp", "yr", 0
191 };
192 
193 int	lineno;		/* current line number in input file */
194 char	*cfilename;	/* name of current file */
195 int	nfiles;		/* number of files to process */
196 int	fflag;		/* -f: ignore \f */
197 int	sflag;		/* -s: ignore \s */
198 int	ncmds;		/* size of knowncmds */
199 int	slot;		/* slot in knowncmds found by binsrch */
200 
201 void	addcmd(char *);
202 void	addmac(char *);
203 int	binsrch(char *);
204 void	checkknown(char *);
205 void	chkcmd(char *, char *);
206 void	complain(int);
207 int	eq(const void *, const void *);
208 int	main(int, char **);
209 void	nomatch(char *);
210 void	pe(int);
211 void	process(FILE *);
212 void	prop(int);
213 void	usage(void);
214 
215 int
216 main(int argc, char **argv)
217 {
218 	FILE *f;
219 	int i;
220 	char *cp;
221 	char b1[4];
222 
223 	/* Figure out how many known commands there are */
224 	while (knowncmds[ncmds])
225 		ncmds++;
226 	while (argc > 1 && argv[1][0] == '-') {
227 		switch(argv[1][1]) {
228 
229 		/* -a: add pairs of macros */
230 		case 'a':
231 			i = strlen(argv[1]) - 2;
232 			if (i % 6 != 0)
233 				usage();
234 			/* look for empty macro slots */
235 			for (i=0; br[i].opbr; i++)
236 				;
237 			for (cp=argv[1]+3; cp[-1]; cp += 6) {
238 				br[i].opbr = malloc(3);
239 				strncpy(br[i].opbr, cp, 2);
240 				br[i].clbr = malloc(3);
241 				strncpy(br[i].clbr, cp+3, 2);
242 				addmac(br[i].opbr);	/* knows pairs are also known cmds */
243 				addmac(br[i].clbr);
244 				i++;
245 			}
246 			break;
247 
248 		/* -c: add known commands */
249 		case 'c':
250 			i = strlen(argv[1]) - 2;
251 			if (i % 3 != 0)
252 				usage();
253 			for (cp=argv[1]+3; cp[-1]; cp += 3) {
254 				if (cp[2] && cp[2] != '.')
255 					usage();
256 				strncpy(b1, cp, 2);
257 				addmac(b1);
258 			}
259 			break;
260 
261 		/* -f: ignore font changes */
262 		case 'f':
263 			fflag = 1;
264 			break;
265 
266 		/* -s: ignore size changes */
267 		case 's':
268 			sflag = 1;
269 			break;
270 		default:
271 			usage();
272 		}
273 		argc--; argv++;
274 	}
275 
276 	nfiles = argc - 1;
277 
278 	if (nfiles > 0) {
279 		for (i=1; i<argc; i++) {
280 			cfilename = argv[i];
281 			f = fopen(cfilename, "r");
282 			if (f == NULL)
283 				perror(cfilename);
284 			else {
285 				process(f);
286 				fclose(f);
287 			}
288 		}
289 	} else {
290 		cfilename = "stdin";
291 		process(stdin);
292 	}
293 	exit(0);
294 }
295 
296 void
297 usage(void)
298 {
299 	printf("usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
300 	exit(1);
301 }
302 
303 void
304 process(FILE *f)
305 {
306 	int i, n;
307 	char line[256];	/* the current line */
308 	char mac[5];	/* The current macro or nroff command */
309 	int pl;
310 
311 	stktop = -1;
312 	for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
313 		if (line[0] == '.') {
314 			/*
315 			 * find and isolate the macro/command name.
316 			 */
317 			strncpy(mac, line+1, 4);
318 			if (isspace((unsigned char)mac[0])) {
319 				pe(lineno);
320 				printf("Empty command\n");
321 			} else if (isspace((unsigned char)mac[1])) {
322 				mac[1] = 0;
323 			} else if (isspace((unsigned char)mac[2])) {
324 				mac[2] = 0;
325 			} else if (mac[0] != '\\' || mac[1] != '\"') {
326 				pe(lineno);
327 				printf("Command too long\n");
328 			}
329 
330 			/*
331 			 * Is it a known command?
332 			 */
333 			checkknown(mac);
334 
335 			/*
336 			 * Should we add it?
337 			 */
338 			if (eq(mac, "de"))
339 				addcmd(line);
340 
341 			chkcmd(line, mac);
342 		}
343 
344 		/*
345 		 * At this point we process the line looking
346 		 * for \s and \f.
347 		 */
348 		for (i=0; line[i]; i++)
349 			if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
350 				if (!sflag && line[++i]=='s') {
351 					pl = line[++i];
352 					if (isdigit((unsigned char)pl)) {
353 						n = pl - '0';
354 						pl = ' ';
355 					} else
356 						n = 0;
357 					while (isdigit((unsigned char)line[++i]))
358 						n = 10 * n + line[i] - '0';
359 					i--;
360 					if (n == 0) {
361 						if (stk[stktop].opno == SZ) {
362 							stktop--;
363 						} else {
364 							pe(lineno);
365 							printf("unmatched \\s0\n");
366 						}
367 					} else {
368 						stk[++stktop].opno = SZ;
369 						stk[stktop].pl = pl;
370 						stk[stktop].parm = n;
371 						stk[stktop].lno = lineno;
372 					}
373 				} else if (!fflag && line[i]=='f') {
374 					n = line[++i];
375 					if (n == 'P') {
376 						if (stk[stktop].opno == FT) {
377 							stktop--;
378 						} else {
379 							pe(lineno);
380 							printf("unmatched \\fP\n");
381 						}
382 					} else {
383 						stk[++stktop].opno = FT;
384 						stk[stktop].pl = 1;
385 						stk[stktop].parm = n;
386 						stk[stktop].lno = lineno;
387 					}
388 				}
389 			}
390 	}
391 	/*
392 	 * We've hit the end and look at all this stuff that hasn't been
393 	 * matched yet!  Complain, complain.
394 	 */
395 	for (i=stktop; i>=0; i--) {
396 		complain(i);
397 	}
398 }
399 
400 void
401 complain(int i)
402 {
403 	pe(stk[i].lno);
404 	printf("Unmatched ");
405 	prop(i);
406 	printf("\n");
407 }
408 
409 void
410 prop(int i)
411 {
412 	if (stk[i].pl == 0)
413 		printf(".%s", br[stk[i].opno].opbr);
414 	else switch(stk[i].opno) {
415 	case SZ:
416 		printf("\\s%c%d", stk[i].pl, stk[i].parm);
417 		break;
418 	case FT:
419 		printf("\\f%c", stk[i].parm);
420 		break;
421 	default:
422 		printf("Bug: stk[%d].opno = %d = .%s, .%s",
423 			i, stk[i].opno, br[stk[i].opno].opbr,
424 			br[stk[i].opno].clbr);
425 	}
426 }
427 
428 void
429 chkcmd(char *line, char *mac)
430 {
431 	int i;
432 
433 	/*
434 	 * Check to see if it matches top of stack.
435 	 */
436 	if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
437 		stktop--;	/* OK. Pop & forget */
438 	else {
439 		/* No. Maybe it's an opener */
440 		for (i=0; br[i].opbr; i++) {
441 			if (eq(mac, br[i].opbr)) {
442 				/* Found. Push it. */
443 				stktop++;
444 				stk[stktop].opno = i;
445 				stk[stktop].pl = 0;
446 				stk[stktop].parm = 0;
447 				stk[stktop].lno = lineno;
448 				break;
449 			}
450 			/*
451 			 * Maybe it's an unmatched closer.
452 			 * NOTE: this depends on the fact
453 			 * that none of the closers can be
454 			 * openers too.
455 			 */
456 			if (eq(mac, br[i].clbr)) {
457 				nomatch(mac);
458 				break;
459 			}
460 		}
461 	}
462 }
463 
464 void
465 nomatch(char *mac)
466 {
467 	int i, j;
468 
469 	/*
470 	 * Look for a match further down on stack
471 	 * If we find one, it suggests that the stuff in
472 	 * between is supposed to match itself.
473 	 */
474 	for (j=stktop; j>=0; j--)
475 		if (eq(mac,br[stk[j].opno].clbr)) {
476 			/* Found.  Make a good diagnostic. */
477 			if (j == stktop-2) {
478 				/*
479 				 * Check for special case \fx..\fR and don't
480 				 * complain.
481 				 */
482 				if (stk[j+1].opno==FT && stk[j+1].parm!='R'
483 				 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
484 					stktop = j -1;
485 					return;
486 				}
487 				/*
488 				 * We have two unmatched frobs.  Chances are
489 				 * they were intended to match, so we mention
490 				 * them together.
491 				 */
492 				pe(stk[j+1].lno);
493 				prop(j+1);
494 				printf(" does not match %d: ", stk[j+2].lno);
495 				prop(j+2);
496 				printf("\n");
497 			} else for (i=j+1; i <= stktop; i++) {
498 				complain(i);
499 			}
500 			stktop = j-1;
501 			return;
502 		}
503 	/* Didn't find one.  Throw this away. */
504 	pe(lineno);
505 	printf("Unmatched .%s\n", mac);
506 }
507 
508 /* eq: are two strings equal? */
509 int
510 eq(const void *s1, const void *s2)
511 {
512 	return (strcmp((char *)s1, (char *)s2) == 0);
513 }
514 
515 /* print the first part of an error message, given the line number */
516 void
517 pe(int pelineno)
518 {
519 	if (nfiles > 1)
520 		printf("%s: ", cfilename);
521 	printf("%d: ", pelineno);
522 }
523 
524 void
525 checkknown(char *mac)
526 {
527 
528 	if (eq(mac, "."))
529 		return;
530 	if (binsrch(mac) >= 0)
531 		return;
532 	if (mac[0] == '\\' && mac[1] == '"')	/* comments */
533 		return;
534 
535 	pe(lineno);
536 	printf("Unknown command: .%s\n", mac);
537 }
538 
539 /*
540  * We have a .de xx line in "line".  Add xx to the list of known commands.
541  */
542 void
543 addcmd(char *line)
544 {
545 	char *mac;
546 
547 	/* grab the macro being defined */
548 	mac = line+4;
549 	while (isspace((unsigned char)*mac))
550 		mac++;
551 	if (*mac == 0) {
552 		pe(lineno);
553 		printf("illegal define: %s\n", line);
554 		return;
555 	}
556 	mac[2] = 0;
557 	if (isspace((unsigned char)mac[1]) || mac[1] == '\\')
558 		mac[1] = 0;
559 	if (ncmds >= MAXCMDS) {
560 		printf("Only %d known commands allowed\n", MAXCMDS);
561 		exit(1);
562 	}
563 	addmac(mac);
564 }
565 
566 /*
567  * Add mac to the list.  We should really have some kind of tree
568  * structure here but this is a quick-and-dirty job and I just don't
569  * have time to mess with it.  (I wonder if this will come back to haunt
570  * me someday?)  Anyway, I claim that .de is fairly rare in user
571  * nroff programs, and the register loop below is pretty fast.
572  */
573 void
574 addmac(char *mac)
575 {
576 	char **src, **dest, **loc;
577 
578 	if (binsrch(mac) >= 0){	/* it's OK to redefine something */
579 #ifdef DEBUG
580 		printf("binsrch(%s) -> already in table\n", mac);
581 #endif /* DEBUG */
582 		return;
583 	}
584 	/* binsrch sets slot as a side effect */
585 #ifdef DEBUG
586 	printf("binsrch(%s) -> %d\n", mac, slot);
587 #endif
588 	loc = &knowncmds[slot];
589 	src = &knowncmds[ncmds-1];
590 	dest = src+1;
591 	while (dest > loc)
592 		*dest-- = *src--;
593 	*loc = malloc(3);
594 	strcpy(*loc, mac);
595 	ncmds++;
596 #ifdef DEBUG
597 	printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2],
598 	    knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1],
599 	    knowncmds[slot+2], ncmds);
600 #endif
601 }
602 
603 /*
604  * Do a binary search in knowncmds for mac.
605  * If found, return the index.  If not, return -1.
606  */
607 int
608 binsrch(char *mac)
609 {
610 	char *p;	/* pointer to current cmd in list */
611 	int d;		/* difference if any */
612 	int mid;	/* mid point in binary search */
613 	int top, bot;	/* boundaries of bin search, inclusive */
614 
615 	top = ncmds-1;
616 	bot = 0;
617 	while (top >= bot) {
618 		mid = (top+bot)/2;
619 		p = knowncmds[mid];
620 		d = p[0] - mac[0];
621 		if (d == 0)
622 			d = p[1] - mac[1];
623 		if (d == 0)
624 			return mid;
625 		if (d < 0)
626 			bot = mid + 1;
627 		else
628 			top = mid - 1;
629 	}
630 	slot = bot;	/* place it would have gone */
631 	return -1;
632 }
633