xref: /netbsd-src/usr.bin/checknr/checknr.c (revision 1f2744e6e4915c9da2a3f980279398c4cf7d5e6d)
1 /*	$NetBSD: checknr.c,v 1.4 1995/03/26 04:10:19 glass Exp $	*/
2 
3 /*
4  * Copyright (c) 1980, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by the University of
18  *	California, Berkeley and its contributors.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #ifndef lint
37 static char copyright[] =
38 "@(#) Copyright (c) 1980, 1993\n\
39 	The Regents of the University of California.  All rights reserved.\n";
40 #endif /* not lint */
41 
42 #ifndef lint
43 #if 0
44 static char sccsid[] = "@(#)checknr.c	8.1 (Berkeley) 6/6/93";
45 #else
46 static char rcsid[] = "$NetBSD: checknr.c,v 1.4 1995/03/26 04:10:19 glass Exp $";
47 #endif
48 #endif /* not lint */
49 
50 /*
51  * checknr: check an nroff/troff input file for matching macro calls.
52  * we also attempt to match size and font changes, but only the embedded
53  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
54  * later but for now think of these restrictions as contributions to
55  * structured typesetting.
56  */
57 #include <stdio.h>
58 #include <string.h>
59 #include <ctype.h>
60 
61 #define MAXSTK	100	/* Stack size */
62 #define MAXBR	100	/* Max number of bracket pairs known */
63 #define MAXCMDS	500	/* Max number of commands known */
64 
65 /*
66  * The stack on which we remember what we've seen so far.
67  */
68 struct stkstr {
69 	int opno;	/* number of opening bracket */
70 	int pl;		/* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
71 	int parm;	/* parm to size, font, etc */
72 	int lno;	/* line number the thing came in in */
73 } stk[MAXSTK];
74 int stktop;
75 
76 /*
77  * The kinds of opening and closing brackets.
78  */
79 struct brstr {
80 	char *opbr;
81 	char *clbr;
82 } br[MAXBR] = {
83 	/* A few bare bones troff commands */
84 #define SZ	0
85 	"sz",	"sz",	/* also \s */
86 #define FT	1
87 	"ft",	"ft",	/* also \f */
88 	/* the -mm package */
89 	"AL",	"LE",
90 	"AS",	"AE",
91 	"BL",	"LE",
92 	"BS",	"BE",
93 	"DF",	"DE",
94 	"DL",	"LE",
95 	"DS",	"DE",
96 	"FS",	"FE",
97 	"ML",	"LE",
98 	"NS",	"NE",
99 	"RL",	"LE",
100 	"VL",	"LE",
101 	/* the -ms package */
102 	"AB",	"AE",
103 	"BD",	"DE",
104 	"CD",	"DE",
105 	"DS",	"DE",
106 	"FS",	"FE",
107 	"ID",	"DE",
108 	"KF",	"KE",
109 	"KS",	"KE",
110 	"LD",	"DE",
111 	"LG",	"NL",
112 	"QS",	"QE",
113 	"RS",	"RE",
114 	"SM",	"NL",
115 	"XA",	"XE",
116 	"XS",	"XE",
117 	/* The -me package */
118 	"(b",	")b",
119 	"(c",	")c",
120 	"(d",	")d",
121 	"(f",	")f",
122 	"(l",	")l",
123 	"(q",	")q",
124 	"(x",	")x",
125 	"(z",	")z",
126 	/* Things needed by preprocessors */
127 	"EQ",	"EN",
128 	"TS",	"TE",
129 	/* Refer */
130 	"[",	"]",
131 	0,	0
132 };
133 
134 /*
135  * All commands known to nroff, plus macro packages.
136  * Used so we can complain about unrecognized commands.
137  */
138 char *knowncmds[MAXCMDS] = {
139 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
140 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
141 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
142 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
143 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
144 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
145 "D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
146 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
147 "FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
148 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
149 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
150 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
151 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
152 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
153 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
154 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
155 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
156 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
157 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
158 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
159 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
160 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
161 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
162 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
163 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
164 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
165 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
166 "q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
167 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
168 "ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
169 "yr", 0
170 };
171 
172 int	lineno;		/* current line number in input file */
173 char	line[256];	/* the current line */
174 char	*cfilename;	/* name of current file */
175 int	nfiles;		/* number of files to process */
176 int	fflag;		/* -f: ignore \f */
177 int	sflag;		/* -s: ignore \s */
178 int	ncmds;		/* size of knowncmds */
179 int	slot;		/* slot in knowncmds found by binsrch */
180 
181 char	*malloc();
182 
183 main(argc, argv)
184 int argc;
185 char **argv;
186 {
187 	FILE *f;
188 	int i;
189 	char *cp;
190 	char b1[4];
191 
192 	/* Figure out how many known commands there are */
193 	while (knowncmds[ncmds])
194 		ncmds++;
195 	while (argc > 1 && argv[1][0] == '-') {
196 		switch(argv[1][1]) {
197 
198 		/* -a: add pairs of macros */
199 		case 'a':
200 			i = strlen(argv[1]) - 2;
201 			if (i % 6 != 0)
202 				usage();
203 			/* look for empty macro slots */
204 			for (i=0; br[i].opbr; i++)
205 				;
206 			for (cp=argv[1]+3; cp[-1]; cp += 6) {
207 				br[i].opbr = malloc(3);
208 				strncpy(br[i].opbr, cp, 2);
209 				br[i].clbr = malloc(3);
210 				strncpy(br[i].clbr, cp+3, 2);
211 				addmac(br[i].opbr);	/* knows pairs are also known cmds */
212 				addmac(br[i].clbr);
213 				i++;
214 			}
215 			break;
216 
217 		/* -c: add known commands */
218 		case 'c':
219 			i = strlen(argv[1]) - 2;
220 			if (i % 3 != 0)
221 				usage();
222 			for (cp=argv[1]+3; cp[-1]; cp += 3) {
223 				if (cp[2] && cp[2] != '.')
224 					usage();
225 				strncpy(b1, cp, 2);
226 				addmac(b1);
227 			}
228 			break;
229 
230 		/* -f: ignore font changes */
231 		case 'f':
232 			fflag = 1;
233 			break;
234 
235 		/* -s: ignore size changes */
236 		case 's':
237 			sflag = 1;
238 			break;
239 		default:
240 			usage();
241 		}
242 		argc--; argv++;
243 	}
244 
245 	nfiles = argc - 1;
246 
247 	if (nfiles > 0) {
248 		for (i=1; i<argc; i++) {
249 			cfilename = argv[i];
250 			f = fopen(cfilename, "r");
251 			if (f == NULL)
252 				perror(cfilename);
253 			else
254 				process(f);
255 		}
256 	} else {
257 		cfilename = "stdin";
258 		process(stdin);
259 	}
260 	exit(0);
261 }
262 
263 usage()
264 {
265 	printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
266 	exit(1);
267 }
268 
269 process(f)
270 FILE *f;
271 {
272 	register int i, n;
273 	char mac[5];	/* The current macro or nroff command */
274 	int pl;
275 
276 	stktop = -1;
277 	for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
278 		if (line[0] == '.') {
279 			/*
280 			 * find and isolate the macro/command name.
281 			 */
282 			strncpy(mac, line+1, 4);
283 			if (isspace(mac[0])) {
284 				pe(lineno);
285 				printf("Empty command\n");
286 			} else if (isspace(mac[1])) {
287 				mac[1] = 0;
288 			} else if (isspace(mac[2])) {
289 				mac[2] = 0;
290 			} else if (mac[0] != '\\' || mac[1] != '\"') {
291 				pe(lineno);
292 				printf("Command too long\n");
293 			}
294 
295 			/*
296 			 * Is it a known command?
297 			 */
298 			checkknown(mac);
299 
300 			/*
301 			 * Should we add it?
302 			 */
303 			if (eq(mac, "de"))
304 				addcmd(line);
305 
306 			chkcmd(line, mac);
307 		}
308 
309 		/*
310 		 * At this point we process the line looking
311 		 * for \s and \f.
312 		 */
313 		for (i=0; line[i]; i++)
314 			if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
315 				if (!sflag && line[++i]=='s') {
316 					pl = line[++i];
317 					if (isdigit(pl)) {
318 						n = pl - '0';
319 						pl = ' ';
320 					} else
321 						n = 0;
322 					while (isdigit(line[++i]))
323 						n = 10 * n + line[i] - '0';
324 					i--;
325 					if (n == 0) {
326 						if (stk[stktop].opno == SZ) {
327 							stktop--;
328 						} else {
329 							pe(lineno);
330 							printf("unmatched \\s0\n");
331 						}
332 					} else {
333 						stk[++stktop].opno = SZ;
334 						stk[stktop].pl = pl;
335 						stk[stktop].parm = n;
336 						stk[stktop].lno = lineno;
337 					}
338 				} else if (!fflag && line[i]=='f') {
339 					n = line[++i];
340 					if (n == 'P') {
341 						if (stk[stktop].opno == FT) {
342 							stktop--;
343 						} else {
344 							pe(lineno);
345 							printf("unmatched \\fP\n");
346 						}
347 					} else {
348 						stk[++stktop].opno = FT;
349 						stk[stktop].pl = 1;
350 						stk[stktop].parm = n;
351 						stk[stktop].lno = lineno;
352 					}
353 				}
354 			}
355 	}
356 	/*
357 	 * We've hit the end and look at all this stuff that hasn't been
358 	 * matched yet!  Complain, complain.
359 	 */
360 	for (i=stktop; i>=0; i--) {
361 		complain(i);
362 	}
363 }
364 
365 complain(i)
366 {
367 	pe(stk[i].lno);
368 	printf("Unmatched ");
369 	prop(i);
370 	printf("\n");
371 }
372 
373 prop(i)
374 {
375 	if (stk[i].pl == 0)
376 		printf(".%s", br[stk[i].opno].opbr);
377 	else switch(stk[i].opno) {
378 	case SZ:
379 		printf("\\s%c%d", stk[i].pl, stk[i].parm);
380 		break;
381 	case FT:
382 		printf("\\f%c", stk[i].parm);
383 		break;
384 	default:
385 		printf("Bug: stk[%d].opno = %d = .%s, .%s",
386 			i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
387 	}
388 }
389 
390 chkcmd(line, mac)
391 char *line;
392 char *mac;
393 {
394 	register int i, n;
395 
396 	/*
397 	 * Check to see if it matches top of stack.
398 	 */
399 	if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
400 		stktop--;	/* OK. Pop & forget */
401 	else {
402 		/* No. Maybe it's an opener */
403 		for (i=0; br[i].opbr; i++) {
404 			if (eq(mac, br[i].opbr)) {
405 				/* Found. Push it. */
406 				stktop++;
407 				stk[stktop].opno = i;
408 				stk[stktop].pl = 0;
409 				stk[stktop].parm = 0;
410 				stk[stktop].lno = lineno;
411 				break;
412 			}
413 			/*
414 			 * Maybe it's an unmatched closer.
415 			 * NOTE: this depends on the fact
416 			 * that none of the closers can be
417 			 * openers too.
418 			 */
419 			if (eq(mac, br[i].clbr)) {
420 				nomatch(mac);
421 				break;
422 			}
423 		}
424 	}
425 }
426 
427 nomatch(mac)
428 char *mac;
429 {
430 	register int i, j;
431 
432 	/*
433 	 * Look for a match further down on stack
434 	 * If we find one, it suggests that the stuff in
435 	 * between is supposed to match itself.
436 	 */
437 	for (j=stktop; j>=0; j--)
438 		if (eq(mac,br[stk[j].opno].clbr)) {
439 			/* Found.  Make a good diagnostic. */
440 			if (j == stktop-2) {
441 				/*
442 				 * Check for special case \fx..\fR and don't
443 				 * complain.
444 				 */
445 				if (stk[j+1].opno==FT && stk[j+1].parm!='R'
446 				 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
447 					stktop = j -1;
448 					return;
449 				}
450 				/*
451 				 * We have two unmatched frobs.  Chances are
452 				 * they were intended to match, so we mention
453 				 * them together.
454 				 */
455 				pe(stk[j+1].lno);
456 				prop(j+1);
457 				printf(" does not match %d: ", stk[j+2].lno);
458 				prop(j+2);
459 				printf("\n");
460 			} else for (i=j+1; i <= stktop; i++) {
461 				complain(i);
462 			}
463 			stktop = j-1;
464 			return;
465 		}
466 	/* Didn't find one.  Throw this away. */
467 	pe(lineno);
468 	printf("Unmatched .%s\n", mac);
469 }
470 
471 /* eq: are two strings equal? */
472 eq(s1, s2)
473 char *s1, *s2;
474 {
475 	return (strcmp(s1, s2) == 0);
476 }
477 
478 /* print the first part of an error message, given the line number */
479 pe(lineno)
480 int lineno;
481 {
482 	if (nfiles > 1)
483 		printf("%s: ", cfilename);
484 	printf("%d: ", lineno);
485 }
486 
487 checkknown(mac)
488 char *mac;
489 {
490 
491 	if (eq(mac, "."))
492 		return;
493 	if (binsrch(mac) >= 0)
494 		return;
495 	if (mac[0] == '\\' && mac[1] == '"')	/* comments */
496 		return;
497 
498 	pe(lineno);
499 	printf("Unknown command: .%s\n", mac);
500 }
501 
502 /*
503  * We have a .de xx line in "line".  Add xx to the list of known commands.
504  */
505 addcmd(line)
506 char *line;
507 {
508 	char *mac;
509 
510 	/* grab the macro being defined */
511 	mac = line+4;
512 	while (isspace(*mac))
513 		mac++;
514 	if (*mac == 0) {
515 		pe(lineno);
516 		printf("illegal define: %s\n", line);
517 		return;
518 	}
519 	mac[2] = 0;
520 	if (isspace(mac[1]) || mac[1] == '\\')
521 		mac[1] = 0;
522 	if (ncmds >= MAXCMDS) {
523 		printf("Only %d known commands allowed\n", MAXCMDS);
524 		exit(1);
525 	}
526 	addmac(mac);
527 }
528 
529 /*
530  * Add mac to the list.  We should really have some kind of tree
531  * structure here but this is a quick-and-dirty job and I just don't
532  * have time to mess with it.  (I wonder if this will come back to haunt
533  * me someday?)  Anyway, I claim that .de is fairly rare in user
534  * nroff programs, and the register loop below is pretty fast.
535  */
536 addmac(mac)
537 char *mac;
538 {
539 	register char **src, **dest, **loc;
540 
541 	if (binsrch(mac) >= 0){	/* it's OK to redefine something */
542 #ifdef DEBUG
543 		printf("binsrch(%s) -> already in table\n", mac);
544 #endif DEBUG
545 		return;
546 	}
547 	/* binsrch sets slot as a side effect */
548 #ifdef DEBUG
549 printf("binsrch(%s) -> %d\n", mac, slot);
550 #endif
551 	loc = &knowncmds[slot];
552 	src = &knowncmds[ncmds-1];
553 	dest = src+1;
554 	while (dest > loc)
555 		*dest-- = *src--;
556 	*loc = malloc(3);
557 	strcpy(*loc, mac);
558 	ncmds++;
559 #ifdef DEBUG
560 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
561 #endif
562 }
563 
564 /*
565  * Do a binary search in knowncmds for mac.
566  * If found, return the index.  If not, return -1.
567  */
568 binsrch(mac)
569 char *mac;
570 {
571 	register char *p;	/* pointer to current cmd in list */
572 	register int d;		/* difference if any */
573 	register int mid;	/* mid point in binary search */
574 	register int top, bot;	/* boundaries of bin search, inclusive */
575 
576 	top = ncmds-1;
577 	bot = 0;
578 	while (top >= bot) {
579 		mid = (top+bot)/2;
580 		p = knowncmds[mid];
581 		d = p[0] - mac[0];
582 		if (d == 0)
583 			d = p[1] - mac[1];
584 		if (d == 0)
585 			return mid;
586 		if (d < 0)
587 			bot = mid + 1;
588 		else
589 			top = mid - 1;
590 	}
591 	slot = bot;	/* place it would have gone */
592 	return -1;
593 }
594