xref: /netbsd-src/usr.bin/checknr/checknr.c (revision d9158b13b5dfe46201430699a3f7a235ecf28df3)
1 /*
2  * Copyright (c) 1980 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #ifndef lint
35 char copyright[] =
36 "@(#) Copyright (c) 1980 The Regents of the University of California.\n\
37  All rights reserved.\n";
38 #endif /* not lint */
39 
40 #ifndef lint
41 /*static char sccsid[] = "from: @(#)checknr.c	5.4 (Berkeley) 6/1/90";*/
42 static char rcsid[] = "$Id: checknr.c,v 1.2 1993/08/01 18:18:04 mycroft Exp $";
43 #endif /* not lint */
44 
45 /*
46  * checknr: check an nroff/troff input file for matching macro calls.
47  * we also attempt to match size and font changes, but only the embedded
48  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
49  * later but for now think of these restrictions as contributions to
50  * structured typesetting.
51  */
52 #include <stdio.h>
53 #include <ctype.h>
54 
55 #define MAXSTK	100	/* Stack size */
56 #define MAXBR	100	/* Max number of bracket pairs known */
57 #define MAXCMDS	500	/* Max number of commands known */
58 
59 /*
60  * The stack on which we remember what we've seen so far.
61  */
62 struct stkstr {
63 	int opno;	/* number of opening bracket */
64 	int pl;		/* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
65 	int parm;	/* parm to size, font, etc */
66 	int lno;	/* line number the thing came in in */
67 } stk[MAXSTK];
68 int stktop;
69 
70 /*
71  * The kinds of opening and closing brackets.
72  */
73 struct brstr {
74 	char *opbr;
75 	char *clbr;
76 } br[MAXBR] = {
77 	/* A few bare bones troff commands */
78 #define SZ	0
79 	"sz",	"sz",	/* also \s */
80 #define FT	1
81 	"ft",	"ft",	/* also \f */
82 	/* the -mm package */
83 	"AL",	"LE",
84 	"AS",	"AE",
85 	"BL",	"LE",
86 	"BS",	"BE",
87 	"DF",	"DE",
88 	"DL",	"LE",
89 	"DS",	"DE",
90 	"FS",	"FE",
91 	"ML",	"LE",
92 	"NS",	"NE",
93 	"RL",	"LE",
94 	"VL",	"LE",
95 	/* the -ms package */
96 	"AB",	"AE",
97 	"BD",	"DE",
98 	"CD",	"DE",
99 	"DS",	"DE",
100 	"FS",	"FE",
101 	"ID",	"DE",
102 	"KF",	"KE",
103 	"KS",	"KE",
104 	"LD",	"DE",
105 	"LG",	"NL",
106 	"QS",	"QE",
107 	"RS",	"RE",
108 	"SM",	"NL",
109 	"XA",	"XE",
110 	"XS",	"XE",
111 	/* The -me package */
112 	"(b",	")b",
113 	"(c",	")c",
114 	"(d",	")d",
115 	"(f",	")f",
116 	"(l",	")l",
117 	"(q",	")q",
118 	"(x",	")x",
119 	"(z",	")z",
120 	/* Things needed by preprocessors */
121 	"EQ",	"EN",
122 	"TS",	"TE",
123 	/* Refer */
124 	"[",	"]",
125 	0,	0
126 };
127 
128 /*
129  * All commands known to nroff, plus macro packages.
130  * Used so we can complain about unrecognized commands.
131  */
132 char *knowncmds[MAXCMDS] = {
133 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
134 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
135 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
136 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
137 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
138 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
139 "D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
140 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
141 "FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
142 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
143 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
144 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
145 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
146 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
147 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
148 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
149 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
150 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
151 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
152 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
153 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
154 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
155 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
156 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
157 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
158 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
159 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
160 "q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
161 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
162 "ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
163 "yr", 0
164 };
165 
166 int	lineno;		/* current line number in input file */
167 char	line[256];	/* the current line */
168 char	*cfilename;	/* name of current file */
169 int	nfiles;		/* number of files to process */
170 int	fflag;		/* -f: ignore \f */
171 int	sflag;		/* -s: ignore \s */
172 int	ncmds;		/* size of knowncmds */
173 int	slot;		/* slot in knowncmds found by binsrch */
174 
175 char	*malloc();
176 
177 main(argc, argv)
178 int argc;
179 char **argv;
180 {
181 	FILE *f;
182 	int i;
183 	char *cp;
184 	char b1[4];
185 
186 	/* Figure out how many known commands there are */
187 	while (knowncmds[ncmds])
188 		ncmds++;
189 	while (argc > 1 && argv[1][0] == '-') {
190 		switch(argv[1][1]) {
191 
192 		/* -a: add pairs of macros */
193 		case 'a':
194 			i = strlen(argv[1]) - 2;
195 			if (i % 6 != 0)
196 				usage();
197 			/* look for empty macro slots */
198 			for (i=0; br[i].opbr; i++)
199 				;
200 			for (cp=argv[1]+3; cp[-1]; cp += 6) {
201 				br[i].opbr = malloc(3);
202 				strncpy(br[i].opbr, cp, 2);
203 				br[i].clbr = malloc(3);
204 				strncpy(br[i].clbr, cp+3, 2);
205 				addmac(br[i].opbr);	/* knows pairs are also known cmds */
206 				addmac(br[i].clbr);
207 				i++;
208 			}
209 			break;
210 
211 		/* -c: add known commands */
212 		case 'c':
213 			i = strlen(argv[1]) - 2;
214 			if (i % 3 != 0)
215 				usage();
216 			for (cp=argv[1]+3; cp[-1]; cp += 3) {
217 				if (cp[2] && cp[2] != '.')
218 					usage();
219 				strncpy(b1, cp, 2);
220 				addmac(b1);
221 			}
222 			break;
223 
224 		/* -f: ignore font changes */
225 		case 'f':
226 			fflag = 1;
227 			break;
228 
229 		/* -s: ignore size changes */
230 		case 's':
231 			sflag = 1;
232 			break;
233 		default:
234 			usage();
235 		}
236 		argc--; argv++;
237 	}
238 
239 	nfiles = argc - 1;
240 
241 	if (nfiles > 0) {
242 		for (i=1; i<argc; i++) {
243 			cfilename = argv[i];
244 			f = fopen(cfilename, "r");
245 			if (f == NULL)
246 				perror(cfilename);
247 			else
248 				process(f);
249 		}
250 	} else {
251 		cfilename = "stdin";
252 		process(stdin);
253 	}
254 	exit(0);
255 }
256 
257 usage()
258 {
259 	printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
260 	exit(1);
261 }
262 
263 process(f)
264 FILE *f;
265 {
266 	register int i, n;
267 	char mac[5];	/* The current macro or nroff command */
268 	int pl;
269 
270 	stktop = -1;
271 	for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
272 		if (line[0] == '.') {
273 			/*
274 			 * find and isolate the macro/command name.
275 			 */
276 			strncpy(mac, line+1, 4);
277 			if (isspace(mac[0])) {
278 				pe(lineno);
279 				printf("Empty command\n");
280 			} else if (isspace(mac[1])) {
281 				mac[1] = 0;
282 			} else if (isspace(mac[2])) {
283 				mac[2] = 0;
284 			} else if (mac[0] != '\\' || mac[1] != '\"') {
285 				pe(lineno);
286 				printf("Command too long\n");
287 			}
288 
289 			/*
290 			 * Is it a known command?
291 			 */
292 			checkknown(mac);
293 
294 			/*
295 			 * Should we add it?
296 			 */
297 			if (eq(mac, "de"))
298 				addcmd(line);
299 
300 			chkcmd(line, mac);
301 		}
302 
303 		/*
304 		 * At this point we process the line looking
305 		 * for \s and \f.
306 		 */
307 		for (i=0; line[i]; i++)
308 			if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
309 				if (!sflag && line[++i]=='s') {
310 					pl = line[++i];
311 					if (isdigit(pl)) {
312 						n = pl - '0';
313 						pl = ' ';
314 					} else
315 						n = 0;
316 					while (isdigit(line[++i]))
317 						n = 10 * n + line[i] - '0';
318 					i--;
319 					if (n == 0) {
320 						if (stk[stktop].opno == SZ) {
321 							stktop--;
322 						} else {
323 							pe(lineno);
324 							printf("unmatched \\s0\n");
325 						}
326 					} else {
327 						stk[++stktop].opno = SZ;
328 						stk[stktop].pl = pl;
329 						stk[stktop].parm = n;
330 						stk[stktop].lno = lineno;
331 					}
332 				} else if (!fflag && line[i]=='f') {
333 					n = line[++i];
334 					if (n == 'P') {
335 						if (stk[stktop].opno == FT) {
336 							stktop--;
337 						} else {
338 							pe(lineno);
339 							printf("unmatched \\fP\n");
340 						}
341 					} else {
342 						stk[++stktop].opno = FT;
343 						stk[stktop].pl = 1;
344 						stk[stktop].parm = n;
345 						stk[stktop].lno = lineno;
346 					}
347 				}
348 			}
349 	}
350 	/*
351 	 * We've hit the end and look at all this stuff that hasn't been
352 	 * matched yet!  Complain, complain.
353 	 */
354 	for (i=stktop; i>=0; i--) {
355 		complain(i);
356 	}
357 }
358 
359 complain(i)
360 {
361 	pe(stk[i].lno);
362 	printf("Unmatched ");
363 	prop(i);
364 	printf("\n");
365 }
366 
367 prop(i)
368 {
369 	if (stk[i].pl == 0)
370 		printf(".%s", br[stk[i].opno].opbr);
371 	else switch(stk[i].opno) {
372 	case SZ:
373 		printf("\\s%c%d", stk[i].pl, stk[i].parm);
374 		break;
375 	case FT:
376 		printf("\\f%c", stk[i].parm);
377 		break;
378 	default:
379 		printf("Bug: stk[%d].opno = %d = .%s, .%s",
380 			i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
381 	}
382 }
383 
384 chkcmd(line, mac)
385 char *line;
386 char *mac;
387 {
388 	register int i, n;
389 
390 	/*
391 	 * Check to see if it matches top of stack.
392 	 */
393 	if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
394 		stktop--;	/* OK. Pop & forget */
395 	else {
396 		/* No. Maybe it's an opener */
397 		for (i=0; br[i].opbr; i++) {
398 			if (eq(mac, br[i].opbr)) {
399 				/* Found. Push it. */
400 				stktop++;
401 				stk[stktop].opno = i;
402 				stk[stktop].pl = 0;
403 				stk[stktop].parm = 0;
404 				stk[stktop].lno = lineno;
405 				break;
406 			}
407 			/*
408 			 * Maybe it's an unmatched closer.
409 			 * NOTE: this depends on the fact
410 			 * that none of the closers can be
411 			 * openers too.
412 			 */
413 			if (eq(mac, br[i].clbr)) {
414 				nomatch(mac);
415 				break;
416 			}
417 		}
418 	}
419 }
420 
421 nomatch(mac)
422 char *mac;
423 {
424 	register int i, j;
425 
426 	/*
427 	 * Look for a match further down on stack
428 	 * If we find one, it suggests that the stuff in
429 	 * between is supposed to match itself.
430 	 */
431 	for (j=stktop; j>=0; j--)
432 		if (eq(mac,br[stk[j].opno].clbr)) {
433 			/* Found.  Make a good diagnostic. */
434 			if (j == stktop-2) {
435 				/*
436 				 * Check for special case \fx..\fR and don't
437 				 * complain.
438 				 */
439 				if (stk[j+1].opno==FT && stk[j+1].parm!='R'
440 				 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
441 					stktop = j -1;
442 					return;
443 				}
444 				/*
445 				 * We have two unmatched frobs.  Chances are
446 				 * they were intended to match, so we mention
447 				 * them together.
448 				 */
449 				pe(stk[j+1].lno);
450 				prop(j+1);
451 				printf(" does not match %d: ", stk[j+2].lno);
452 				prop(j+2);
453 				printf("\n");
454 			} else for (i=j+1; i <= stktop; i++) {
455 				complain(i);
456 			}
457 			stktop = j-1;
458 			return;
459 		}
460 	/* Didn't find one.  Throw this away. */
461 	pe(lineno);
462 	printf("Unmatched .%s\n", mac);
463 }
464 
465 /* eq: are two strings equal? */
466 eq(s1, s2)
467 char *s1, *s2;
468 {
469 	return (strcmp(s1, s2) == 0);
470 }
471 
472 /* print the first part of an error message, given the line number */
473 pe(lineno)
474 int lineno;
475 {
476 	if (nfiles > 1)
477 		printf("%s: ", cfilename);
478 	printf("%d: ", lineno);
479 }
480 
481 checkknown(mac)
482 char *mac;
483 {
484 
485 	if (eq(mac, "."))
486 		return;
487 	if (binsrch(mac) >= 0)
488 		return;
489 	if (mac[0] == '\\' && mac[1] == '"')	/* comments */
490 		return;
491 
492 	pe(lineno);
493 	printf("Unknown command: .%s\n", mac);
494 }
495 
496 /*
497  * We have a .de xx line in "line".  Add xx to the list of known commands.
498  */
499 addcmd(line)
500 char *line;
501 {
502 	char *mac;
503 
504 	/* grab the macro being defined */
505 	mac = line+4;
506 	while (isspace(*mac))
507 		mac++;
508 	if (*mac == 0) {
509 		pe(lineno);
510 		printf("illegal define: %s\n", line);
511 		return;
512 	}
513 	mac[2] = 0;
514 	if (isspace(mac[1]) || mac[1] == '\\')
515 		mac[1] = 0;
516 	if (ncmds >= MAXCMDS) {
517 		printf("Only %d known commands allowed\n", MAXCMDS);
518 		exit(1);
519 	}
520 	addmac(mac);
521 }
522 
523 /*
524  * Add mac to the list.  We should really have some kind of tree
525  * structure here but this is a quick-and-dirty job and I just don't
526  * have time to mess with it.  (I wonder if this will come back to haunt
527  * me someday?)  Anyway, I claim that .de is fairly rare in user
528  * nroff programs, and the register loop below is pretty fast.
529  */
530 addmac(mac)
531 char *mac;
532 {
533 	register char **src, **dest, **loc;
534 
535 	if (binsrch(mac) >= 0){	/* it's OK to redefine something */
536 #ifdef DEBUG
537 		printf("binsrch(%s) -> already in table\n", mac);
538 #endif DEBUG
539 		return;
540 	}
541 	/* binsrch sets slot as a side effect */
542 #ifdef DEBUG
543 printf("binsrch(%s) -> %d\n", mac, slot);
544 #endif
545 	loc = &knowncmds[slot];
546 	src = &knowncmds[ncmds-1];
547 	dest = src+1;
548 	while (dest > loc)
549 		*dest-- = *src--;
550 	*loc = malloc(3);
551 	strcpy(*loc, mac);
552 	ncmds++;
553 #ifdef DEBUG
554 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
555 #endif
556 }
557 
558 /*
559  * Do a binary search in knowncmds for mac.
560  * If found, return the index.  If not, return -1.
561  */
562 binsrch(mac)
563 char *mac;
564 {
565 	register char *p;	/* pointer to current cmd in list */
566 	register int d;		/* difference if any */
567 	register int mid;	/* mid point in binary search */
568 	register int top, bot;	/* boundaries of bin search, inclusive */
569 
570 	top = ncmds-1;
571 	bot = 0;
572 	while (top >= bot) {
573 		mid = (top+bot)/2;
574 		p = knowncmds[mid];
575 		d = p[0] - mac[0];
576 		if (d == 0)
577 			d = p[1] - mac[1];
578 		if (d == 0)
579 			return mid;
580 		if (d < 0)
581 			bot = mid + 1;
582 		else
583 			top = mid - 1;
584 	}
585 	slot = bot;	/* place it would have gone */
586 	return -1;
587 }
588