xref: /netbsd-src/usr.bin/checknr/checknr.c (revision ce63d6c20fc4ec8ddc95c84bb229e3c4ecf82b69)
1 /*
2  * Copyright (c) 1980 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #ifndef lint
35 char copyright[] =
36 "@(#) Copyright (c) 1980 The Regents of the University of California.\n\
37  All rights reserved.\n";
38 #endif /* not lint */
39 
40 #ifndef lint
41 static char sccsid[] = "@(#)checknr.c	5.4 (Berkeley) 6/1/90";
42 #endif /* not lint */
43 
44 /*
45  * checknr: check an nroff/troff input file for matching macro calls.
46  * we also attempt to match size and font changes, but only the embedded
47  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
48  * later but for now think of these restrictions as contributions to
49  * structured typesetting.
50  */
51 #include <stdio.h>
52 #include <ctype.h>
53 
54 #define MAXSTK	100	/* Stack size */
55 #define MAXBR	100	/* Max number of bracket pairs known */
56 #define MAXCMDS	500	/* Max number of commands known */
57 
58 /*
59  * The stack on which we remember what we've seen so far.
60  */
61 struct stkstr {
62 	int opno;	/* number of opening bracket */
63 	int pl;		/* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
64 	int parm;	/* parm to size, font, etc */
65 	int lno;	/* line number the thing came in in */
66 } stk[MAXSTK];
67 int stktop;
68 
69 /*
70  * The kinds of opening and closing brackets.
71  */
72 struct brstr {
73 	char *opbr;
74 	char *clbr;
75 } br[MAXBR] = {
76 	/* A few bare bones troff commands */
77 #define SZ	0
78 	"sz",	"sz",	/* also \s */
79 #define FT	1
80 	"ft",	"ft",	/* also \f */
81 	/* the -mm package */
82 	"AL",	"LE",
83 	"AS",	"AE",
84 	"BL",	"LE",
85 	"BS",	"BE",
86 	"DF",	"DE",
87 	"DL",	"LE",
88 	"DS",	"DE",
89 	"FS",	"FE",
90 	"ML",	"LE",
91 	"NS",	"NE",
92 	"RL",	"LE",
93 	"VL",	"LE",
94 	/* the -ms package */
95 	"AB",	"AE",
96 	"BD",	"DE",
97 	"CD",	"DE",
98 	"DS",	"DE",
99 	"FS",	"FE",
100 	"ID",	"DE",
101 	"KF",	"KE",
102 	"KS",	"KE",
103 	"LD",	"DE",
104 	"LG",	"NL",
105 	"QS",	"QE",
106 	"RS",	"RE",
107 	"SM",	"NL",
108 	"XA",	"XE",
109 	"XS",	"XE",
110 	/* The -me package */
111 	"(b",	")b",
112 	"(c",	")c",
113 	"(d",	")d",
114 	"(f",	")f",
115 	"(l",	")l",
116 	"(q",	")q",
117 	"(x",	")x",
118 	"(z",	")z",
119 	/* Things needed by preprocessors */
120 	"EQ",	"EN",
121 	"TS",	"TE",
122 	/* Refer */
123 	"[",	"]",
124 	0,	0
125 };
126 
127 /*
128  * All commands known to nroff, plus macro packages.
129  * Used so we can complain about unrecognized commands.
130  */
131 char *knowncmds[MAXCMDS] = {
132 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
133 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
134 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
135 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
136 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
137 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
138 "D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
139 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
140 "FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
141 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
142 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
143 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
144 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
145 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
146 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
147 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
148 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
149 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
150 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
151 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
152 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
153 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
154 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
155 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
156 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
157 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
158 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
159 "q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
160 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
161 "ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
162 "yr", 0
163 };
164 
165 int	lineno;		/* current line number in input file */
166 char	line[256];	/* the current line */
167 char	*cfilename;	/* name of current file */
168 int	nfiles;		/* number of files to process */
169 int	fflag;		/* -f: ignore \f */
170 int	sflag;		/* -s: ignore \s */
171 int	ncmds;		/* size of knowncmds */
172 int	slot;		/* slot in knowncmds found by binsrch */
173 
174 char	*malloc();
175 
176 main(argc, argv)
177 int argc;
178 char **argv;
179 {
180 	FILE *f;
181 	int i;
182 	char *cp;
183 	char b1[4];
184 
185 	/* Figure out how many known commands there are */
186 	while (knowncmds[ncmds])
187 		ncmds++;
188 	while (argc > 1 && argv[1][0] == '-') {
189 		switch(argv[1][1]) {
190 
191 		/* -a: add pairs of macros */
192 		case 'a':
193 			i = strlen(argv[1]) - 2;
194 			if (i % 6 != 0)
195 				usage();
196 			/* look for empty macro slots */
197 			for (i=0; br[i].opbr; i++)
198 				;
199 			for (cp=argv[1]+3; cp[-1]; cp += 6) {
200 				br[i].opbr = malloc(3);
201 				strncpy(br[i].opbr, cp, 2);
202 				br[i].clbr = malloc(3);
203 				strncpy(br[i].clbr, cp+3, 2);
204 				addmac(br[i].opbr);	/* knows pairs are also known cmds */
205 				addmac(br[i].clbr);
206 				i++;
207 			}
208 			break;
209 
210 		/* -c: add known commands */
211 		case 'c':
212 			i = strlen(argv[1]) - 2;
213 			if (i % 3 != 0)
214 				usage();
215 			for (cp=argv[1]+3; cp[-1]; cp += 3) {
216 				if (cp[2] && cp[2] != '.')
217 					usage();
218 				strncpy(b1, cp, 2);
219 				addmac(b1);
220 			}
221 			break;
222 
223 		/* -f: ignore font changes */
224 		case 'f':
225 			fflag = 1;
226 			break;
227 
228 		/* -s: ignore size changes */
229 		case 's':
230 			sflag = 1;
231 			break;
232 		default:
233 			usage();
234 		}
235 		argc--; argv++;
236 	}
237 
238 	nfiles = argc - 1;
239 
240 	if (nfiles > 0) {
241 		for (i=1; i<argc; i++) {
242 			cfilename = argv[i];
243 			f = fopen(cfilename, "r");
244 			if (f == NULL)
245 				perror(cfilename);
246 			else
247 				process(f);
248 		}
249 	} else {
250 		cfilename = "stdin";
251 		process(stdin);
252 	}
253 	exit(0);
254 }
255 
256 usage()
257 {
258 	printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
259 	exit(1);
260 }
261 
262 process(f)
263 FILE *f;
264 {
265 	register int i, n;
266 	char mac[5];	/* The current macro or nroff command */
267 	int pl;
268 
269 	stktop = -1;
270 	for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
271 		if (line[0] == '.') {
272 			/*
273 			 * find and isolate the macro/command name.
274 			 */
275 			strncpy(mac, line+1, 4);
276 			if (isspace(mac[0])) {
277 				pe(lineno);
278 				printf("Empty command\n");
279 			} else if (isspace(mac[1])) {
280 				mac[1] = 0;
281 			} else if (isspace(mac[2])) {
282 				mac[2] = 0;
283 			} else if (mac[0] != '\\' || mac[1] != '\"') {
284 				pe(lineno);
285 				printf("Command too long\n");
286 			}
287 
288 			/*
289 			 * Is it a known command?
290 			 */
291 			checkknown(mac);
292 
293 			/*
294 			 * Should we add it?
295 			 */
296 			if (eq(mac, "de"))
297 				addcmd(line);
298 
299 			chkcmd(line, mac);
300 		}
301 
302 		/*
303 		 * At this point we process the line looking
304 		 * for \s and \f.
305 		 */
306 		for (i=0; line[i]; i++)
307 			if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
308 				if (!sflag && line[++i]=='s') {
309 					pl = line[++i];
310 					if (isdigit(pl)) {
311 						n = pl - '0';
312 						pl = ' ';
313 					} else
314 						n = 0;
315 					while (isdigit(line[++i]))
316 						n = 10 * n + line[i] - '0';
317 					i--;
318 					if (n == 0) {
319 						if (stk[stktop].opno == SZ) {
320 							stktop--;
321 						} else {
322 							pe(lineno);
323 							printf("unmatched \\s0\n");
324 						}
325 					} else {
326 						stk[++stktop].opno = SZ;
327 						stk[stktop].pl = pl;
328 						stk[stktop].parm = n;
329 						stk[stktop].lno = lineno;
330 					}
331 				} else if (!fflag && line[i]=='f') {
332 					n = line[++i];
333 					if (n == 'P') {
334 						if (stk[stktop].opno == FT) {
335 							stktop--;
336 						} else {
337 							pe(lineno);
338 							printf("unmatched \\fP\n");
339 						}
340 					} else {
341 						stk[++stktop].opno = FT;
342 						stk[stktop].pl = 1;
343 						stk[stktop].parm = n;
344 						stk[stktop].lno = lineno;
345 					}
346 				}
347 			}
348 	}
349 	/*
350 	 * We've hit the end and look at all this stuff that hasn't been
351 	 * matched yet!  Complain, complain.
352 	 */
353 	for (i=stktop; i>=0; i--) {
354 		complain(i);
355 	}
356 }
357 
358 complain(i)
359 {
360 	pe(stk[i].lno);
361 	printf("Unmatched ");
362 	prop(i);
363 	printf("\n");
364 }
365 
366 prop(i)
367 {
368 	if (stk[i].pl == 0)
369 		printf(".%s", br[stk[i].opno].opbr);
370 	else switch(stk[i].opno) {
371 	case SZ:
372 		printf("\\s%c%d", stk[i].pl, stk[i].parm);
373 		break;
374 	case FT:
375 		printf("\\f%c", stk[i].parm);
376 		break;
377 	default:
378 		printf("Bug: stk[%d].opno = %d = .%s, .%s",
379 			i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
380 	}
381 }
382 
383 chkcmd(line, mac)
384 char *line;
385 char *mac;
386 {
387 	register int i, n;
388 
389 	/*
390 	 * Check to see if it matches top of stack.
391 	 */
392 	if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
393 		stktop--;	/* OK. Pop & forget */
394 	else {
395 		/* No. Maybe it's an opener */
396 		for (i=0; br[i].opbr; i++) {
397 			if (eq(mac, br[i].opbr)) {
398 				/* Found. Push it. */
399 				stktop++;
400 				stk[stktop].opno = i;
401 				stk[stktop].pl = 0;
402 				stk[stktop].parm = 0;
403 				stk[stktop].lno = lineno;
404 				break;
405 			}
406 			/*
407 			 * Maybe it's an unmatched closer.
408 			 * NOTE: this depends on the fact
409 			 * that none of the closers can be
410 			 * openers too.
411 			 */
412 			if (eq(mac, br[i].clbr)) {
413 				nomatch(mac);
414 				break;
415 			}
416 		}
417 	}
418 }
419 
420 nomatch(mac)
421 char *mac;
422 {
423 	register int i, j;
424 
425 	/*
426 	 * Look for a match further down on stack
427 	 * If we find one, it suggests that the stuff in
428 	 * between is supposed to match itself.
429 	 */
430 	for (j=stktop; j>=0; j--)
431 		if (eq(mac,br[stk[j].opno].clbr)) {
432 			/* Found.  Make a good diagnostic. */
433 			if (j == stktop-2) {
434 				/*
435 				 * Check for special case \fx..\fR and don't
436 				 * complain.
437 				 */
438 				if (stk[j+1].opno==FT && stk[j+1].parm!='R'
439 				 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
440 					stktop = j -1;
441 					return;
442 				}
443 				/*
444 				 * We have two unmatched frobs.  Chances are
445 				 * they were intended to match, so we mention
446 				 * them together.
447 				 */
448 				pe(stk[j+1].lno);
449 				prop(j+1);
450 				printf(" does not match %d: ", stk[j+2].lno);
451 				prop(j+2);
452 				printf("\n");
453 			} else for (i=j+1; i <= stktop; i++) {
454 				complain(i);
455 			}
456 			stktop = j-1;
457 			return;
458 		}
459 	/* Didn't find one.  Throw this away. */
460 	pe(lineno);
461 	printf("Unmatched .%s\n", mac);
462 }
463 
464 /* eq: are two strings equal? */
465 eq(s1, s2)
466 char *s1, *s2;
467 {
468 	return (strcmp(s1, s2) == 0);
469 }
470 
471 /* print the first part of an error message, given the line number */
472 pe(lineno)
473 int lineno;
474 {
475 	if (nfiles > 1)
476 		printf("%s: ", cfilename);
477 	printf("%d: ", lineno);
478 }
479 
480 checkknown(mac)
481 char *mac;
482 {
483 
484 	if (eq(mac, "."))
485 		return;
486 	if (binsrch(mac) >= 0)
487 		return;
488 	if (mac[0] == '\\' && mac[1] == '"')	/* comments */
489 		return;
490 
491 	pe(lineno);
492 	printf("Unknown command: .%s\n", mac);
493 }
494 
495 /*
496  * We have a .de xx line in "line".  Add xx to the list of known commands.
497  */
498 addcmd(line)
499 char *line;
500 {
501 	char *mac;
502 
503 	/* grab the macro being defined */
504 	mac = line+4;
505 	while (isspace(*mac))
506 		mac++;
507 	if (*mac == 0) {
508 		pe(lineno);
509 		printf("illegal define: %s\n", line);
510 		return;
511 	}
512 	mac[2] = 0;
513 	if (isspace(mac[1]) || mac[1] == '\\')
514 		mac[1] = 0;
515 	if (ncmds >= MAXCMDS) {
516 		printf("Only %d known commands allowed\n", MAXCMDS);
517 		exit(1);
518 	}
519 	addmac(mac);
520 }
521 
522 /*
523  * Add mac to the list.  We should really have some kind of tree
524  * structure here but this is a quick-and-dirty job and I just don't
525  * have time to mess with it.  (I wonder if this will come back to haunt
526  * me someday?)  Anyway, I claim that .de is fairly rare in user
527  * nroff programs, and the register loop below is pretty fast.
528  */
529 addmac(mac)
530 char *mac;
531 {
532 	register char **src, **dest, **loc;
533 
534 	if (binsrch(mac) >= 0){	/* it's OK to redefine something */
535 #ifdef DEBUG
536 		printf("binsrch(%s) -> already in table\n", mac);
537 #endif DEBUG
538 		return;
539 	}
540 	/* binsrch sets slot as a side effect */
541 #ifdef DEBUG
542 printf("binsrch(%s) -> %d\n", mac, slot);
543 #endif
544 	loc = &knowncmds[slot];
545 	src = &knowncmds[ncmds-1];
546 	dest = src+1;
547 	while (dest > loc)
548 		*dest-- = *src--;
549 	*loc = malloc(3);
550 	strcpy(*loc, mac);
551 	ncmds++;
552 #ifdef DEBUG
553 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
554 #endif
555 }
556 
557 /*
558  * Do a binary search in knowncmds for mac.
559  * If found, return the index.  If not, return -1.
560  */
561 binsrch(mac)
562 char *mac;
563 {
564 	register char *p;	/* pointer to current cmd in list */
565 	register int d;		/* difference if any */
566 	register int mid;	/* mid point in binary search */
567 	register int top, bot;	/* boundaries of bin search, inclusive */
568 
569 	top = ncmds-1;
570 	bot = 0;
571 	while (top >= bot) {
572 		mid = (top+bot)/2;
573 		p = knowncmds[mid];
574 		d = p[0] - mac[0];
575 		if (d == 0)
576 			d = p[1] - mac[1];
577 		if (d == 0)
578 			return mid;
579 		if (d < 0)
580 			bot = mid + 1;
581 		else
582 			top = mid - 1;
583 	}
584 	slot = bot;	/* place it would have gone */
585 	return -1;
586 }
587