xref: /netbsd-src/usr.bin/checknr/checknr.c (revision 64204053df9953d295721f9be8c2cc6f75e7bb3f)
1 /*	$NetBSD: checknr.c,v 1.8 2002/01/21 16:40:19 wiz Exp $	*/
2 
3 /*
4  * Copyright (c) 1980, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by the University of
18  *	California, Berkeley and its contributors.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/cdefs.h>
37 #ifndef lint
38 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\
39 	The Regents of the University of California.  All rights reserved.\n");
40 #endif /* not lint */
41 
42 #ifndef lint
43 #if 0
44 static char sccsid[] = "@(#)checknr.c	8.1 (Berkeley) 6/6/93";
45 #else
46 __RCSID("$NetBSD: checknr.c,v 1.8 2002/01/21 16:40:19 wiz Exp $");
47 #endif
48 #endif /* not lint */
49 
50 /*
51  * checknr: check an nroff/troff input file for matching macro calls.
52  * we also attempt to match size and font changes, but only the embedded
53  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
54  * later but for now think of these restrictions as contributions to
55  * structured typesetting.
56  */
57 #include <ctype.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 
62 #define MAXSTK	100	/* Stack size */
63 #define MAXBR	100	/* Max number of bracket pairs known */
64 #define MAXCMDS	500	/* Max number of commands known */
65 
66 /*
67  * The stack on which we remember what we've seen so far.
68  */
69 struct stkstr {
70 	int opno;	/* number of opening bracket */
71 	int pl;		/* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
72 	int parm;	/* parm to size, font, etc */
73 	int lno;	/* line number the thing came in in */
74 } stk[MAXSTK];
75 int stktop;
76 
77 /*
78  * The kinds of opening and closing brackets.
79  */
80 struct brstr {
81 	char *opbr;
82 	char *clbr;
83 } br[MAXBR] = {
84 	/* A few bare bones troff commands */
85 #define SZ	0
86 	{ "sz",	"sz"},	/* also \s */
87 #define FT	1
88 	{ "ft",	"ft"},	/* also \f */
89 	/* the -mm package */
90 	{"AL",	"LE"},
91 	{"AS",	"AE"},
92 	{"BL",	"LE"},
93 	{"BS",	"BE"},
94 	{"DF",	"DE"},
95 	{"DL",	"LE"},
96 	{"DS",	"DE"},
97 	{"FS",	"FE"},
98 	{"ML",	"LE"},
99 	{"NS",	"NE"},
100 	{"RL",	"LE"},
101 	{"VL",	"LE"},
102 	/* the -ms package */
103 	{"AB",	"AE"},
104 	{"BD",	"DE"},
105 	{"CD",	"DE"},
106 	{"DS",	"DE"},
107 	{"FS",	"FE"},
108 	{"ID",	"DE"},
109 	{"KF",	"KE"},
110 	{"KS",	"KE"},
111 	{"LD",	"DE"},
112 	{"LG",	"NL"},
113 	{"QS",	"QE"},
114 	{"RS",	"RE"},
115 	{"SM",	"NL"},
116 	{"XA",	"XE"},
117 	{"XS",	"XE"},
118 	/* The -me package */
119 	{"(b",	")b"},
120 	{"(c",	")c"},
121 	{"(d",	")d"},
122 	{"(f",	")f"},
123 	{"(l",	")l"},
124 	{"(q",	")q"},
125 	{"(x",	")x"},
126 	{"(z",	")z"},
127 	/* Things needed by preprocessors */
128 	{"EQ",	"EN"},
129 	{"TS",	"TE"},
130 	/* Refer */
131 	{"[",	"]"},
132 	{0,	0},
133 };
134 
135 /*
136  * All commands known to nroff, plus macro packages.
137  * Used so we can complain about unrecognized commands.
138  */
139 char *knowncmds[MAXCMDS] = {
140 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
141 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
142 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
143 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
144 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
145 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
146 "D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
147 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
148 "FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
149 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
150 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
151 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
152 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
153 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
154 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
155 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
156 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
157 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
158 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
159 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
160 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
161 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
162 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
163 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
164 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
165 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
166 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
167 "q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
168 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
169 "ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
170 "yr", 0
171 };
172 
173 int	lineno;		/* current line number in input file */
174 char	line[256];	/* the current line */
175 char	*cfilename;	/* name of current file */
176 int	nfiles;		/* number of files to process */
177 int	fflag;		/* -f: ignore \f */
178 int	sflag;		/* -s: ignore \s */
179 int	ncmds;		/* size of knowncmds */
180 int	slot;		/* slot in knowncmds found by binsrch */
181 
182 void	addcmd(char *);
183 void	addmac(char *);
184 int	binsrch(char *);
185 void	checkknown(char *);
186 void	chkcmd(char *, char *);
187 void	complain(int);
188 int	eq(const void *, const void *);
189 int	main(int, char **);
190 void	nomatch(char *);
191 void	pe(int);
192 void	process(FILE *);
193 void	prop(int);
194 void	usage(void);
195 
196 int
197 main(int argc, char **argv)
198 {
199 	FILE *f;
200 	int i;
201 	char *cp;
202 	char b1[4];
203 
204 	/* Figure out how many known commands there are */
205 	while (knowncmds[ncmds])
206 		ncmds++;
207 	while (argc > 1 && argv[1][0] == '-') {
208 		switch(argv[1][1]) {
209 
210 		/* -a: add pairs of macros */
211 		case 'a':
212 			i = strlen(argv[1]) - 2;
213 			if (i % 6 != 0)
214 				usage();
215 			/* look for empty macro slots */
216 			for (i=0; br[i].opbr; i++)
217 				;
218 			for (cp=argv[1]+3; cp[-1]; cp += 6) {
219 				br[i].opbr = malloc(3);
220 				strncpy(br[i].opbr, cp, 2);
221 				br[i].clbr = malloc(3);
222 				strncpy(br[i].clbr, cp+3, 2);
223 				addmac(br[i].opbr);	/* knows pairs are also known cmds */
224 				addmac(br[i].clbr);
225 				i++;
226 			}
227 			break;
228 
229 		/* -c: add known commands */
230 		case 'c':
231 			i = strlen(argv[1]) - 2;
232 			if (i % 3 != 0)
233 				usage();
234 			for (cp=argv[1]+3; cp[-1]; cp += 3) {
235 				if (cp[2] && cp[2] != '.')
236 					usage();
237 				strncpy(b1, cp, 2);
238 				addmac(b1);
239 			}
240 			break;
241 
242 		/* -f: ignore font changes */
243 		case 'f':
244 			fflag = 1;
245 			break;
246 
247 		/* -s: ignore size changes */
248 		case 's':
249 			sflag = 1;
250 			break;
251 		default:
252 			usage();
253 		}
254 		argc--; argv++;
255 	}
256 
257 	nfiles = argc - 1;
258 
259 	if (nfiles > 0) {
260 		for (i=1; i<argc; i++) {
261 			cfilename = argv[i];
262 			f = fopen(cfilename, "r");
263 			if (f == NULL)
264 				perror(cfilename);
265 			else
266 				process(f);
267 		}
268 	} else {
269 		cfilename = "stdin";
270 		process(stdin);
271 	}
272 	exit(0);
273 }
274 
275 void
276 usage(void)
277 {
278 	printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
279 	exit(1);
280 }
281 
282 void
283 process(FILE *f)
284 {
285 	int i, n;
286 	char mac[5];	/* The current macro or nroff command */
287 	int pl;
288 
289 	stktop = -1;
290 	for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
291 		if (line[0] == '.') {
292 			/*
293 			 * find and isolate the macro/command name.
294 			 */
295 			strncpy(mac, line+1, 4);
296 			if (isspace((unsigned char)mac[0])) {
297 				pe(lineno);
298 				printf("Empty command\n");
299 			} else if (isspace((unsigned char)mac[1])) {
300 				mac[1] = 0;
301 			} else if (isspace((unsigned char)mac[2])) {
302 				mac[2] = 0;
303 			} else if (mac[0] != '\\' || mac[1] != '\"') {
304 				pe(lineno);
305 				printf("Command too long\n");
306 			}
307 
308 			/*
309 			 * Is it a known command?
310 			 */
311 			checkknown(mac);
312 
313 			/*
314 			 * Should we add it?
315 			 */
316 			if (eq(mac, "de"))
317 				addcmd(line);
318 
319 			chkcmd(line, mac);
320 		}
321 
322 		/*
323 		 * At this point we process the line looking
324 		 * for \s and \f.
325 		 */
326 		for (i=0; line[i]; i++)
327 			if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
328 				if (!sflag && line[++i]=='s') {
329 					pl = line[++i];
330 					if (isdigit((unsigned char)pl)) {
331 						n = pl - '0';
332 						pl = ' ';
333 					} else
334 						n = 0;
335 					while (isdigit((unsigned char)line[++i]))
336 						n = 10 * n + line[i] - '0';
337 					i--;
338 					if (n == 0) {
339 						if (stk[stktop].opno == SZ) {
340 							stktop--;
341 						} else {
342 							pe(lineno);
343 							printf("unmatched \\s0\n");
344 						}
345 					} else {
346 						stk[++stktop].opno = SZ;
347 						stk[stktop].pl = pl;
348 						stk[stktop].parm = n;
349 						stk[stktop].lno = lineno;
350 					}
351 				} else if (!fflag && line[i]=='f') {
352 					n = line[++i];
353 					if (n == 'P') {
354 						if (stk[stktop].opno == FT) {
355 							stktop--;
356 						} else {
357 							pe(lineno);
358 							printf("unmatched \\fP\n");
359 						}
360 					} else {
361 						stk[++stktop].opno = FT;
362 						stk[stktop].pl = 1;
363 						stk[stktop].parm = n;
364 						stk[stktop].lno = lineno;
365 					}
366 				}
367 			}
368 	}
369 	/*
370 	 * We've hit the end and look at all this stuff that hasn't been
371 	 * matched yet!  Complain, complain.
372 	 */
373 	for (i=stktop; i>=0; i--) {
374 		complain(i);
375 	}
376 }
377 
378 void
379 complain(int i)
380 {
381 	pe(stk[i].lno);
382 	printf("Unmatched ");
383 	prop(i);
384 	printf("\n");
385 }
386 
387 void
388 prop(int i)
389 {
390 	if (stk[i].pl == 0)
391 		printf(".%s", br[stk[i].opno].opbr);
392 	else switch(stk[i].opno) {
393 	case SZ:
394 		printf("\\s%c%d", stk[i].pl, stk[i].parm);
395 		break;
396 	case FT:
397 		printf("\\f%c", stk[i].parm);
398 		break;
399 	default:
400 		printf("Bug: stk[%d].opno = %d = .%s, .%s",
401 			i, stk[i].opno, br[stk[i].opno].opbr,
402 			br[stk[i].opno].clbr);
403 	}
404 }
405 
406 void
407 chkcmd(char *line, char *mac)
408 {
409 	int i;
410 
411 	/*
412 	 * Check to see if it matches top of stack.
413 	 */
414 	if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
415 		stktop--;	/* OK. Pop & forget */
416 	else {
417 		/* No. Maybe it's an opener */
418 		for (i=0; br[i].opbr; i++) {
419 			if (eq(mac, br[i].opbr)) {
420 				/* Found. Push it. */
421 				stktop++;
422 				stk[stktop].opno = i;
423 				stk[stktop].pl = 0;
424 				stk[stktop].parm = 0;
425 				stk[stktop].lno = lineno;
426 				break;
427 			}
428 			/*
429 			 * Maybe it's an unmatched closer.
430 			 * NOTE: this depends on the fact
431 			 * that none of the closers can be
432 			 * openers too.
433 			 */
434 			if (eq(mac, br[i].clbr)) {
435 				nomatch(mac);
436 				break;
437 			}
438 		}
439 	}
440 }
441 
442 void
443 nomatch(char *mac)
444 {
445 	int i, j;
446 
447 	/*
448 	 * Look for a match further down on stack
449 	 * If we find one, it suggests that the stuff in
450 	 * between is supposed to match itself.
451 	 */
452 	for (j=stktop; j>=0; j--)
453 		if (eq(mac,br[stk[j].opno].clbr)) {
454 			/* Found.  Make a good diagnostic. */
455 			if (j == stktop-2) {
456 				/*
457 				 * Check for special case \fx..\fR and don't
458 				 * complain.
459 				 */
460 				if (stk[j+1].opno==FT && stk[j+1].parm!='R'
461 				 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
462 					stktop = j -1;
463 					return;
464 				}
465 				/*
466 				 * We have two unmatched frobs.  Chances are
467 				 * they were intended to match, so we mention
468 				 * them together.
469 				 */
470 				pe(stk[j+1].lno);
471 				prop(j+1);
472 				printf(" does not match %d: ", stk[j+2].lno);
473 				prop(j+2);
474 				printf("\n");
475 			} else for (i=j+1; i <= stktop; i++) {
476 				complain(i);
477 			}
478 			stktop = j-1;
479 			return;
480 		}
481 	/* Didn't find one.  Throw this away. */
482 	pe(lineno);
483 	printf("Unmatched .%s\n", mac);
484 }
485 
486 /* eq: are two strings equal? */
487 int
488 eq(const void *s1, const void *s2)
489 {
490 	return (strcmp((char *)s1, (char *)s2) == 0);
491 }
492 
493 /* print the first part of an error message, given the line number */
494 void
495 pe(int lineno)
496 {
497 	if (nfiles > 1)
498 		printf("%s: ", cfilename);
499 	printf("%d: ", lineno);
500 }
501 
502 void
503 checkknown(char *mac)
504 {
505 
506 	if (eq(mac, "."))
507 		return;
508 	if (binsrch(mac) >= 0)
509 		return;
510 	if (mac[0] == '\\' && mac[1] == '"')	/* comments */
511 		return;
512 
513 	pe(lineno);
514 	printf("Unknown command: .%s\n", mac);
515 }
516 
517 /*
518  * We have a .de xx line in "line".  Add xx to the list of known commands.
519  */
520 void
521 addcmd(char *line)
522 {
523 	char *mac;
524 
525 	/* grab the macro being defined */
526 	mac = line+4;
527 	while (isspace((unsigned char)*mac))
528 		mac++;
529 	if (*mac == 0) {
530 		pe(lineno);
531 		printf("illegal define: %s\n", line);
532 		return;
533 	}
534 	mac[2] = 0;
535 	if (isspace((unsigned char)mac[1]) || mac[1] == '\\')
536 		mac[1] = 0;
537 	if (ncmds >= MAXCMDS) {
538 		printf("Only %d known commands allowed\n", MAXCMDS);
539 		exit(1);
540 	}
541 	addmac(mac);
542 }
543 
544 /*
545  * Add mac to the list.  We should really have some kind of tree
546  * structure here but this is a quick-and-dirty job and I just don't
547  * have time to mess with it.  (I wonder if this will come back to haunt
548  * me someday?)  Anyway, I claim that .de is fairly rare in user
549  * nroff programs, and the register loop below is pretty fast.
550  */
551 void
552 addmac(char *mac)
553 {
554 	char **src, **dest, **loc;
555 
556 	if (binsrch(mac) >= 0){	/* it's OK to redefine something */
557 #ifdef DEBUG
558 		printf("binsrch(%s) -> already in table\n", mac);
559 #endif /* DEBUG */
560 		return;
561 	}
562 	/* binsrch sets slot as a side effect */
563 #ifdef DEBUG
564 printf("binsrch(%s) -> %d\n", mac, slot);
565 #endif
566 	loc = &knowncmds[slot];
567 	src = &knowncmds[ncmds-1];
568 	dest = src+1;
569 	while (dest > loc)
570 		*dest-- = *src--;
571 	*loc = malloc(3);
572 	strcpy(*loc, mac);
573 	ncmds++;
574 #ifdef DEBUG
575 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
576 #endif
577 }
578 
579 /*
580  * Do a binary search in knowncmds for mac.
581  * If found, return the index.  If not, return -1.
582  */
583 int
584 binsrch(char *mac)
585 {
586 	char *p;	/* pointer to current cmd in list */
587 	int d;		/* difference if any */
588 	int mid;	/* mid point in binary search */
589 	int top, bot;	/* boundaries of bin search, inclusive */
590 
591 	top = ncmds-1;
592 	bot = 0;
593 	while (top >= bot) {
594 		mid = (top+bot)/2;
595 		p = knowncmds[mid];
596 		d = p[0] - mac[0];
597 		if (d == 0)
598 			d = p[1] - mac[1];
599 		if (d == 0)
600 			return mid;
601 		if (d < 0)
602 			bot = mid + 1;
603 		else
604 			top = mid - 1;
605 	}
606 	slot = bot;	/* place it would have gone */
607 	return -1;
608 }
609