xref: /netbsd-src/usr.bin/checknr/checknr.c (revision 23c8222edbfb0f0932d88a8351d3a0cf817dfb9e)
1 /*	$NetBSD: checknr.c,v 1.14 2004/07/09 11:41:26 wiz Exp $	*/
2 
3 /*
4  * Copyright (c) 1980, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #ifndef lint
34 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\
35 	The Regents of the University of California.  All rights reserved.\n");
36 #endif /* not lint */
37 
38 #ifndef lint
39 #if 0
40 static char sccsid[] = "@(#)checknr.c	8.1 (Berkeley) 6/6/93";
41 #else
42 __RCSID("$NetBSD: checknr.c,v 1.14 2004/07/09 11:41:26 wiz Exp $");
43 #endif
44 #endif /* not lint */
45 
46 /*
47  * checknr: check an nroff/troff input file for matching macro calls.
48  * we also attempt to match size and font changes, but only the embedded
49  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
50  * later but for now think of these restrictions as contributions to
51  * structured typesetting.
52  */
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
57 
58 #define MAXSTK	100	/* Stack size */
59 #define MAXBR	100	/* Max number of bracket pairs known */
60 #define MAXCMDS	500	/* Max number of commands known */
61 
62 /*
63  * The stack on which we remember what we've seen so far.
64  */
65 struct stkstr {
66 	int opno;	/* number of opening bracket */
67 	int pl;		/* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
68 	int parm;	/* parm to size, font, etc */
69 	int lno;	/* line number the thing came in in */
70 } stk[MAXSTK];
71 int stktop;
72 
73 /*
74  * The kinds of opening and closing brackets.
75  */
76 struct brstr {
77 	char *opbr;
78 	char *clbr;
79 } br[MAXBR] = {
80 	/* A few bare bones troff commands */
81 #define SZ	0
82 	{ "sz",	"sz"},	/* also \s */
83 #define FT	1
84 	{ "ft",	"ft"},	/* also \f */
85 	/* the -mm package */
86 	{"AL",	"LE"},
87 	{"AS",	"AE"},
88 	{"BL",	"LE"},
89 	{"BS",	"BE"},
90 	{"DF",	"DE"},
91 	{"DL",	"LE"},
92 	{"DS",	"DE"},
93 	{"FS",	"FE"},
94 	{"ML",	"LE"},
95 	{"NS",	"NE"},
96 	{"RL",	"LE"},
97 	{"VL",	"LE"},
98 	/* the -ms package */
99 	{"AB",	"AE"},
100 	{"BD",	"DE"},
101 	{"CD",	"DE"},
102 	{"DS",	"DE"},
103 	{"FS",	"FE"},
104 	{"ID",	"DE"},
105 	{"KF",	"KE"},
106 	{"KS",	"KE"},
107 	{"LD",	"DE"},
108 	{"LG",	"NL"},
109 	{"QS",	"QE"},
110 	{"RS",	"RE"},
111 	{"SM",	"NL"},
112 	{"XA",	"XE"},
113 	{"XS",	"XE"},
114 	/* The -me package */
115 	{"(b",	")b"},
116 	{"(c",	")c"},
117 	{"(d",	")d"},
118 	{"(f",	")f"},
119 	{"(l",	")l"},
120 	{"(q",	")q"},
121 	{"(x",	")x"},
122 	{"(z",	")z"},
123 	/* The -mdoc package */
124 	{"Ao",  "Ac"},
125 	{"Bd",  "Ed"},
126 	{"Bk",  "Ek"},
127 	{"Bo",  "Bc"},
128 	{"Do",  "Dc"},
129 	{"Fo",  "Fc"},
130 	{"Oo",  "Oc"},
131 	{"Po",  "Pc"},
132 	{"Qo",  "Qc"},
133 	{"Rs",  "Re"},
134 	{"So",  "Sc"},
135 	{"Xo",  "Xc"},
136 	/* Things needed by preprocessors */
137 	{"EQ",	"EN"},
138 	{"TS",	"TE"},
139 	/* Refer */
140 	{"[",	"]"},
141 	{0,	0},
142 };
143 
144 /*
145  * All commands known to nroff, plus macro packages.
146  * Used so we can complain about unrecognized commands.
147  */
148 char *knowncmds[MAXCMDS] = {
149 "$c", "$f", "$h", "$p", "$s", "%A", "%B", "%C", "%D", "%I", "%J", "%N",
150 "%O", "%P", "%Q", "%R", "%T", "%V", "(b", "(c", "(d", "(f", "(l", "(q",
151 "(t", "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x",
152 ")z", "++", "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D",
153 "@F", "@I", "@M", "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p",
154 "@r", "@t", "@z", "AB", "AE", "AF", "AI", "AL", "AM", "AS", "AT",
155 "AU", "AX", "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At", "B" ,  "B1",
156 "B2", "BD", "BE", "BG", "BL", "BS", "BT", "BX", "Bc", "Bd", "Bf",
157 "Bk", "Bl", "Bo", "Bq", "Bsx", "Bx", "C1", "C2", "CD", "CM", "CT",
158 "Cd", "Cm", "D" , "D1", "DA", "DE", "DF", "DL", "DS", "DT", "Db", "Dc",
159 "Dd", "Dl", "Do", "Dq", "Dt", "Dv", "EC", "EF", "EG", "EH", "EM",
160 "EN", "EQ", "EX", "Ec", "Ed", "Ef", "Ek", "El", "Em", "Eo", "Er",
161 "Ev", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", "FQ",
162 "FS", "FV", "FX", "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Ft", "Fx",
163 "H" , "HC", "HD", "HM", "HO", "HU", "I" , "ID", "IE", "IH", "IM",
164 "IP", "IX", "IZ", "Ic", "It", "KD", "KE", "KF", "KQ", "KS", "LB",
165 "LC", "LD", "LE", "LG", "LI", "LP", "Lb", "Li", "MC", "ME", "MF",
166 "MH", "ML", "MR", "MT", "ND", "NE", "NH", "NL", "NP", "NS", "Nd",
167 "Nm", "No", "Ns", "Nx", "OF", "OH", "OK", "OP", "Oc", "Oo", "Op",
168 "Os", "Ot", "Ox", "P" , "P1", "PF", "PH", "PP", "PT", "PX", "PY",
169 "Pa", "Pc", "Pf", "Po", "Pp", "Pq", "QE", "QP", "QS", "Qc", "Ql",
170 "Qo", "Qq", "R" , "RA", "RC", "RE", "RL", "RP", "RQ", "RS", "RT",
171 "Re", "Rs", "S" , "S0", "S2", "S3", "SA", "SG", "SH", "SK", "SM",
172 "SP", "SY", "Sc", "Sh", "Sm", "So", "Sq", "Ss", "St", "Sx", "Sy",
173 "T&", "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP", "TQ",
174 "TR", "TS", "TX", "Tn", "UL", "US", "UX", "Ud", "Ux", "VL", "Va", "Vt",
175 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "Xc", "Xo",
176 "Xr", "[" , "[-", "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>",
177 "[]", "\\{", "\\}", "]" , "]-", "]<", "]>", "][", "ab", "ac", "ad", "af", "am",
178 "ar", "as", "b" , "ba", "bc", "bd", "bi", "bl", "bp", "br", "bx",
179 "c.", "c2", "cc", "ce", "cf", "ch", "cs", "ct", "cu", "da", "de",
180 "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", "ef", "eh", "el",
181 "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", "fp", "ft",
182 "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i" , "ie",
183 "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
184 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo",
185 "n1", "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr",
186 "ns", "nx", "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn",
187 "po", "pp", "ps", "q" , "r" , "rb", "rd", "re", "rm", "rn", "ro",
188 "rr", "rs", "rt", "sb", "sc", "sh", "sk", "so", "sp", "ss", "st",
189 "sv", "sz", "ta", "tc", "th", "ti", "tl", "tm", "tp", "tr", "u",
190 "uf", "uh", "ul", "vs", "wh", "xp", "yr", 0
191 };
192 
193 int	lineno;		/* current line number in input file */
194 char	*cfilename;	/* name of current file */
195 int	nfiles;		/* number of files to process */
196 int	fflag;		/* -f: ignore \f */
197 int	sflag;		/* -s: ignore \s */
198 int	ncmds;		/* size of knowncmds */
199 int	slot;		/* slot in knowncmds found by binsrch */
200 
201 void	addcmd(char *);
202 void	addmac(char *);
203 int	binsrch(char *);
204 void	checkknown(char *);
205 void	chkcmd(char *, char *);
206 void	complain(int);
207 int	eq(const void *, const void *);
208 int	main(int, char **);
209 void	nomatch(char *);
210 void	pe(int);
211 void	process(FILE *);
212 void	prop(int);
213 void	usage(void);
214 
215 int
216 main(int argc, char **argv)
217 {
218 	FILE *f;
219 	int i;
220 	char *cp;
221 	char b1[4];
222 
223 	/* Figure out how many known commands there are */
224 	while (knowncmds[ncmds])
225 		ncmds++;
226 	while (argc > 1 && argv[1][0] == '-') {
227 		switch(argv[1][1]) {
228 
229 		/* -a: add pairs of macros */
230 		case 'a':
231 			i = strlen(argv[1]) - 2;
232 			if (i % 6 != 0)
233 				usage();
234 			/* look for empty macro slots */
235 			for (i=0; br[i].opbr; i++)
236 				;
237 			for (cp=argv[1]+3; cp[-1]; cp += 6) {
238 				br[i].opbr = malloc(3);
239 				strncpy(br[i].opbr, cp, 2);
240 				br[i].clbr = malloc(3);
241 				strncpy(br[i].clbr, cp+3, 2);
242 				addmac(br[i].opbr);	/* knows pairs are also known cmds */
243 				addmac(br[i].clbr);
244 				i++;
245 			}
246 			break;
247 
248 		/* -c: add known commands */
249 		case 'c':
250 			i = strlen(argv[1]) - 2;
251 			if (i % 3 != 0)
252 				usage();
253 			for (cp=argv[1]+3; cp[-1]; cp += 3) {
254 				if (cp[2] && cp[2] != '.')
255 					usage();
256 				strncpy(b1, cp, 2);
257 				addmac(b1);
258 			}
259 			break;
260 
261 		/* -f: ignore font changes */
262 		case 'f':
263 			fflag = 1;
264 			break;
265 
266 		/* -s: ignore size changes */
267 		case 's':
268 			sflag = 1;
269 			break;
270 		default:
271 			usage();
272 		}
273 		argc--; argv++;
274 	}
275 
276 	nfiles = argc - 1;
277 
278 	if (nfiles > 0) {
279 		for (i=1; i<argc; i++) {
280 			cfilename = argv[i];
281 			f = fopen(cfilename, "r");
282 			if (f == NULL)
283 				perror(cfilename);
284 			else {
285 				process(f);
286 				fclose(f);
287 			}
288 		}
289 	} else {
290 		cfilename = "stdin";
291 		process(stdin);
292 	}
293 	exit(0);
294 }
295 
296 void
297 usage(void)
298 {
299 	(void)fprintf(stderr,
300 	    "usage: %s [-fs] [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] file\n",
301 	    getprogname());
302 	exit(1);
303 }
304 
305 void
306 process(FILE *f)
307 {
308 	int i, n;
309 	char line[256];	/* the current line */
310 	char mac[5];	/* The current macro or nroff command */
311 	int pl;
312 
313 	stktop = -1;
314 	for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
315 		if (line[0] == '.') {
316 			/*
317 			 * find and isolate the macro/command name.
318 			 */
319 			strncpy(mac, line+1, 4);
320 			if (isspace((unsigned char)mac[0])) {
321 				pe(lineno);
322 				printf("Empty command\n");
323 			} else if (isspace((unsigned char)mac[1])) {
324 				mac[1] = 0;
325 			} else if (isspace((unsigned char)mac[2])) {
326 				mac[2] = 0;
327 			} else if (mac[0] != '\\' || mac[1] != '\"') {
328 				pe(lineno);
329 				printf("Command too long\n");
330 			}
331 
332 			/*
333 			 * Is it a known command?
334 			 */
335 			checkknown(mac);
336 
337 			/*
338 			 * Should we add it?
339 			 */
340 			if (eq(mac, "de"))
341 				addcmd(line);
342 
343 			chkcmd(line, mac);
344 		}
345 
346 		/*
347 		 * At this point we process the line looking
348 		 * for \s and \f.
349 		 */
350 		for (i=0; line[i]; i++)
351 			if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
352 				if (!sflag && line[++i]=='s') {
353 					pl = line[++i];
354 					if (isdigit((unsigned char)pl)) {
355 						n = pl - '0';
356 						pl = ' ';
357 					} else
358 						n = 0;
359 					while (isdigit((unsigned char)line[++i]))
360 						n = 10 * n + line[i] - '0';
361 					i--;
362 					if (n == 0) {
363 						if (stk[stktop].opno == SZ) {
364 							stktop--;
365 						} else {
366 							pe(lineno);
367 							printf("unmatched \\s0\n");
368 						}
369 					} else {
370 						stk[++stktop].opno = SZ;
371 						stk[stktop].pl = pl;
372 						stk[stktop].parm = n;
373 						stk[stktop].lno = lineno;
374 					}
375 				} else if (!fflag && line[i]=='f') {
376 					n = line[++i];
377 					if (n == 'P') {
378 						if (stk[stktop].opno == FT) {
379 							stktop--;
380 						} else {
381 							pe(lineno);
382 							printf("unmatched \\fP\n");
383 						}
384 					} else {
385 						stk[++stktop].opno = FT;
386 						stk[stktop].pl = 1;
387 						stk[stktop].parm = n;
388 						stk[stktop].lno = lineno;
389 					}
390 				}
391 			}
392 	}
393 	/*
394 	 * We've hit the end and look at all this stuff that hasn't been
395 	 * matched yet!  Complain, complain.
396 	 */
397 	for (i=stktop; i>=0; i--) {
398 		complain(i);
399 	}
400 }
401 
402 void
403 complain(int i)
404 {
405 	pe(stk[i].lno);
406 	printf("Unmatched ");
407 	prop(i);
408 	printf("\n");
409 }
410 
411 void
412 prop(int i)
413 {
414 	if (stk[i].pl == 0)
415 		printf(".%s", br[stk[i].opno].opbr);
416 	else switch(stk[i].opno) {
417 	case SZ:
418 		printf("\\s%c%d", stk[i].pl, stk[i].parm);
419 		break;
420 	case FT:
421 		printf("\\f%c", stk[i].parm);
422 		break;
423 	default:
424 		printf("Bug: stk[%d].opno = %d = .%s, .%s",
425 			i, stk[i].opno, br[stk[i].opno].opbr,
426 			br[stk[i].opno].clbr);
427 	}
428 }
429 
430 void
431 chkcmd(char *line, char *mac)
432 {
433 	int i;
434 
435 	/*
436 	 * Check to see if it matches top of stack.
437 	 */
438 	if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
439 		stktop--;	/* OK. Pop & forget */
440 	else {
441 		/* No. Maybe it's an opener */
442 		for (i=0; br[i].opbr; i++) {
443 			if (eq(mac, br[i].opbr)) {
444 				/* Found. Push it. */
445 				stktop++;
446 				stk[stktop].opno = i;
447 				stk[stktop].pl = 0;
448 				stk[stktop].parm = 0;
449 				stk[stktop].lno = lineno;
450 				break;
451 			}
452 			/*
453 			 * Maybe it's an unmatched closer.
454 			 * NOTE: this depends on the fact
455 			 * that none of the closers can be
456 			 * openers too.
457 			 */
458 			if (eq(mac, br[i].clbr)) {
459 				nomatch(mac);
460 				break;
461 			}
462 		}
463 	}
464 }
465 
466 void
467 nomatch(char *mac)
468 {
469 	int i, j;
470 
471 	/*
472 	 * Look for a match further down on stack
473 	 * If we find one, it suggests that the stuff in
474 	 * between is supposed to match itself.
475 	 */
476 	for (j=stktop; j>=0; j--)
477 		if (eq(mac,br[stk[j].opno].clbr)) {
478 			/* Found.  Make a good diagnostic. */
479 			if (j == stktop-2) {
480 				/*
481 				 * Check for special case \fx..\fR and don't
482 				 * complain.
483 				 */
484 				if (stk[j+1].opno==FT && stk[j+1].parm!='R'
485 				 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
486 					stktop = j -1;
487 					return;
488 				}
489 				/*
490 				 * We have two unmatched frobs.  Chances are
491 				 * they were intended to match, so we mention
492 				 * them together.
493 				 */
494 				pe(stk[j+1].lno);
495 				prop(j+1);
496 				printf(" does not match %d: ", stk[j+2].lno);
497 				prop(j+2);
498 				printf("\n");
499 			} else for (i=j+1; i <= stktop; i++) {
500 				complain(i);
501 			}
502 			stktop = j-1;
503 			return;
504 		}
505 	/* Didn't find one.  Throw this away. */
506 	pe(lineno);
507 	printf("Unmatched .%s\n", mac);
508 }
509 
510 /* eq: are two strings equal? */
511 int
512 eq(const void *s1, const void *s2)
513 {
514 	return (strcmp((char *)s1, (char *)s2) == 0);
515 }
516 
517 /* print the first part of an error message, given the line number */
518 void
519 pe(int pelineno)
520 {
521 	if (nfiles > 1)
522 		printf("%s: ", cfilename);
523 	printf("%d: ", pelineno);
524 }
525 
526 void
527 checkknown(char *mac)
528 {
529 
530 	if (eq(mac, "."))
531 		return;
532 	if (binsrch(mac) >= 0)
533 		return;
534 	if (mac[0] == '\\' && mac[1] == '"')	/* comments */
535 		return;
536 
537 	pe(lineno);
538 	printf("Unknown command: .%s\n", mac);
539 }
540 
541 /*
542  * We have a .de xx line in "line".  Add xx to the list of known commands.
543  */
544 void
545 addcmd(char *line)
546 {
547 	char *mac;
548 
549 	/* grab the macro being defined */
550 	mac = line+4;
551 	while (isspace((unsigned char)*mac))
552 		mac++;
553 	if (*mac == 0) {
554 		pe(lineno);
555 		printf("illegal define: %s\n", line);
556 		return;
557 	}
558 	mac[2] = 0;
559 	if (isspace((unsigned char)mac[1]) || mac[1] == '\\')
560 		mac[1] = 0;
561 	if (ncmds >= MAXCMDS) {
562 		printf("Only %d known commands allowed\n", MAXCMDS);
563 		exit(1);
564 	}
565 	addmac(mac);
566 }
567 
568 /*
569  * Add mac to the list.  We should really have some kind of tree
570  * structure here but this is a quick-and-dirty job and I just don't
571  * have time to mess with it.  (I wonder if this will come back to haunt
572  * me someday?)  Anyway, I claim that .de is fairly rare in user
573  * nroff programs, and the register loop below is pretty fast.
574  */
575 void
576 addmac(char *mac)
577 {
578 	char **src, **dest, **loc;
579 
580 	if (binsrch(mac) >= 0){	/* it's OK to redefine something */
581 #ifdef DEBUG
582 		printf("binsrch(%s) -> already in table\n", mac);
583 #endif /* DEBUG */
584 		return;
585 	}
586 	/* binsrch sets slot as a side effect */
587 #ifdef DEBUG
588 	printf("binsrch(%s) -> %d\n", mac, slot);
589 #endif
590 	loc = &knowncmds[slot];
591 	src = &knowncmds[ncmds-1];
592 	dest = src+1;
593 	while (dest > loc)
594 		*dest-- = *src--;
595 	*loc = malloc(3);
596 	strcpy(*loc, mac);
597 	ncmds++;
598 #ifdef DEBUG
599 	printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2],
600 	    knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1],
601 	    knowncmds[slot+2], ncmds);
602 #endif
603 }
604 
605 /*
606  * Do a binary search in knowncmds for mac.
607  * If found, return the index.  If not, return -1.
608  */
609 int
610 binsrch(char *mac)
611 {
612 	char *p;	/* pointer to current cmd in list */
613 	int d;		/* difference if any */
614 	int mid;	/* mid point in binary search */
615 	int top, bot;	/* boundaries of bin search, inclusive */
616 
617 	top = ncmds-1;
618 	bot = 0;
619 	while (top >= bot) {
620 		mid = (top+bot)/2;
621 		p = knowncmds[mid];
622 		d = p[0] - mac[0];
623 		if (d == 0)
624 			d = p[1] - mac[1];
625 		if (d == 0)
626 			return mid;
627 		if (d < 0)
628 			bot = mid + 1;
629 		else
630 			top = mid - 1;
631 	}
632 	slot = bot;	/* place it would have gone */
633 	return -1;
634 }
635