xref: /minix3/usr.bin/xstr/xstr.c (revision 3179b9b918b79627288341afcb230649990f9e9c)
1 /*	$NetBSD: xstr.c,v 1.25 2011/09/16 15:39:31 joerg Exp $	*/
2 
3 /*
4  * Copyright (c) 1980, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #ifndef lint
34 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\
35  The Regents of the University of California.  All rights reserved.");
36 #endif /* not lint */
37 
38 #ifndef lint
39 #if 0
40 static char sccsid[] = "@(#)xstr.c	8.1 (Berkeley) 6/9/93";
41 #else
42 __RCSID("$NetBSD: xstr.c,v 1.25 2011/09/16 15:39:31 joerg Exp $");
43 #endif
44 #endif /* not lint */
45 
46 #include <sys/param.h>
47 #include <signal.h>
48 #include <errno.h>
49 #include <unistd.h>
50 #include <stdio.h>
51 #include <ctype.h>
52 #include <string.h>
53 #include <stdlib.h>
54 #include <err.h>
55 #include "pathnames.h"
56 
57 /*
58  * xstr - extract and hash strings in a C program
59  *
60  * Bill Joy UCB
61  * November, 1978
62  */
63 
64 static off_t	hashit(const char *, int);
65 __dead static void	onintr(int);
66 static off_t	yankstr(char **);
67 static int	octdigit(char);
68 static void	inithash(void);
69 static int	fgetNUL(char *, int, FILE *);
70 static int	xgetc(FILE *);
71 static void	flushsh(void);
72 static void	found(int, off_t, const char *);
73 static void	prstr(const char *);
74 static void	xsdotc(void);
75 static char	lastchr(const char *);
76 static int	istail(const char *, const char *);
77 static void	process(const char *);
78 __dead static void	usage(void);
79 
80 static off_t	tellpt;
81 static off_t	mesgpt;
82 static char	stringtmpfile[MAXPATHLEN];
83 static const char *strings =	"strings";
84 static const char *array =	0;
85 static int	cflg;
86 static int	vflg;
87 static int	readstd;
88 static char	linebuf[8192];
89 
90 #define	BUCKETS	128
91 
92 static struct	hash {
93 	off_t	hpt;
94 	char	*hstr;
95 	struct	hash *hnext;
96 	short	hnew;
97 } bucket[BUCKETS];
98 
99 int
main(int argc,char * argv[])100 main(int argc, char *argv[])
101 {
102 	int c;
103 
104 	while ((c = getopt(argc, argv, "-cvl:")) != -1)
105 		switch (c) {
106 		case '-':
107 			readstd++;
108 			break;
109 		case 'c':
110 			cflg++;
111 			break;
112 		case 'v':
113 			vflg++;
114 			break;
115 		case 'l':
116 			array = optarg;
117 			break;
118 		default:
119 			usage();
120 		}
121 	argc -= optind;
122 	argv += optind;
123 
124 	if (array == 0)
125 		array = "xstr";
126 
127 	if (signal(SIGINT, SIG_IGN) == SIG_DFL)
128 		(void)signal(SIGINT, onintr);
129 	if (cflg || (argc == 0 && !readstd))
130 		inithash();
131 	else {
132 		int	fd;
133 
134 		snprintf(stringtmpfile, sizeof(stringtmpfile),
135 		    "%s%s.XXXXXX", _PATH_TMP, "xstr");
136 		strings = stringtmpfile;
137 		fd = mkstemp(stringtmpfile);
138 		if (fd == -1)
139 			err(1, "mkstemp failed");
140 		close(fd);
141 	}
142 	while (readstd || argc > 0) {
143 		if (freopen("x.c", "w", stdout) == NULL)
144 			err(1, "Cannot open `%s'", "x.c");
145 		if (!readstd && freopen(argv[0], "r", stdin) == NULL)
146 			err(1, "Cannot open `%s'", argv[0]);
147 		process("x.c");
148 		if (readstd == 0)
149 			argc--, argv++;
150 		else
151 			readstd = 0;
152 	};
153 	flushsh();
154 	if (cflg == 0)
155 		xsdotc();
156 	if (strings[0] == '/')
157 		(void)unlink(strings);
158 	exit(0);
159 }
160 
161 static void
process(const char * name)162 process(const char *name)
163 {
164 	char *cp;
165 	int c;
166 	int incomm = 0;
167 	int inasm = 0;
168 	int asmparnest = 0;
169 	int ret;
170 
171 	printf("extern char\t%s[];\n", array);
172 	for (;;) {
173 		if (fgets(linebuf, sizeof linebuf, stdin) == NULL) {
174 			if (ferror(stdin))
175 				err(1, "Error reading `%s'", name);
176 			break;
177 		}
178 		if (linebuf[0] == '#') {
179 			printf("%s", linebuf);
180 			continue;
181 		}
182 		for (cp = linebuf; (c = *cp++);)
183 			switch (c) {
184 
185 			case '"':
186 				if (incomm || inasm)
187 					goto def;
188 				if ((ret = (int) yankstr(&cp)) == -1)
189 					goto out;
190 				printf("(&%s[%d])", array, ret);
191 				break;
192 
193 			case '\'':
194 				if (incomm || inasm)
195 					goto def;
196 				putchar(c);
197 				if (*cp)
198 					putchar(*cp++);
199 				break;
200 
201 			case '/':
202 				if (incomm || *cp != '*')
203 					goto def;
204 				incomm = 1;
205 				cp++;
206 				printf("/*");
207 				continue;
208 
209 			case '*':
210 				if (incomm && *cp == '/') {
211 					incomm = 0;
212 					cp++;
213 					printf("*/");
214 					continue;
215 				}
216 				goto def;
217 
218 			case '(':
219 				if (!incomm && inasm)
220 					asmparnest++;
221 				goto def;
222 
223 			case ')':
224 				if (!incomm && inasm && !--asmparnest)
225 					inasm = 0;
226 				goto def;
227 
228 			case '_':
229 				if (incomm || inasm)
230 					goto def;
231 				if (!strncmp(cp, "_asm", 4)) {
232 					cp += 4;
233 					printf("__asm");
234 					if (!strncmp(cp, "__", 2)) {
235 						cp += 2;
236 						printf("__");
237 					}
238 					if (isalnum((unsigned char)*cp) ||
239 					    *cp == '_')
240 						goto def;
241 					asmparnest = 0;
242 					inasm = 1;
243 				} else
244 					goto def;
245 				break;
246 def:
247 			default:
248 				putchar(c);
249 				break;
250 			}
251 	}
252 out:
253 	if (ferror(stdout)) {
254 		warn("Error reading `%s'", "x.c");
255 		onintr(1);
256 	}
257 }
258 
259 static off_t
yankstr(char ** cpp)260 yankstr(char **cpp)
261 {
262 	char *cp = *cpp;
263 	int c, ch;
264 	char *dbuf, *dp, *edp;
265 	const char *tp;
266 	off_t hash;
267 	size_t bsiz = BUFSIZ;
268 
269 	if ((dp = dbuf = malloc(bsiz)) == NULL)
270 		err(1, "malloc");
271 	edp = dbuf + bsiz;
272 
273 	while ((c = *cp++) != '\0') {
274 		switch (c) {
275 
276 		case '"':
277 			/* Look for a concatenated string */
278 			for (;;) {
279 				while (isspace((unsigned char)*cp))
280 					cp++;
281 				if (*cp == '\0') {
282 					if (fgets(linebuf,
283 					    sizeof linebuf, stdin) == NULL) {
284 						if (ferror(stdin))
285 							err(1,
286 							"Error reading `x.c'");
287 						goto out;
288 					}
289 					cp = linebuf;
290 				} else {
291 					if (*cp == '"') {
292 						cp++;
293 						if (*cp == '"') {
294 							cp++;
295 							continue;
296 						} else {
297 							c = *cp++;
298 							goto gotc;
299 						}
300 					} else {
301 						cp++;
302 						goto out;
303 					}
304 				}
305 			}
306 			/*NOTREACHED*/
307 		case '\\':
308 			c = *cp++;
309 			if (c == 0)
310 				break;
311 			if (c == '\n') {
312 				if (fgets(linebuf, sizeof linebuf, stdin)
313 				    == NULL) {
314 					if (ferror(stdin))
315 						err(1, "Error reading `x.c'");
316 					return(-1);
317 				}
318 				cp = linebuf;
319 				continue;
320 			}
321 			for (tp = "b\bt\tr\rn\nf\f\\\\\"\""; (ch = *tp++); tp++)
322 				if (c == ch) {
323 					c = *tp;
324 					goto gotc;
325 				}
326 			if (!octdigit(c)) {
327 				*dp++ = '\\';
328 				break;
329 			}
330 			c -= '0';
331 			if (!octdigit(*cp))
332 				break;
333 			c <<= 3, c += *cp++ - '0';
334 			if (!octdigit(*cp))
335 				break;
336 			c <<= 3, c += *cp++ - '0';
337 			break;
338 		}
339 gotc:
340 		if (dp >= edp - 1) {
341 			char *nbuf;
342 			bsiz += BUFSIZ;
343 			if ((nbuf = realloc(dbuf, bsiz)) == NULL) {
344 				free(dbuf);
345 				err(1, "realloc");
346 			}
347 			dp = nbuf + (dp - dbuf);
348 			edp = nbuf + bsiz;
349 			dbuf = nbuf;
350 		}
351 		*dp++ = c;
352 	}
353 out:
354 	*cpp = --cp;
355 	*dp = '\0';
356 	hash = hashit(dbuf, 1);
357 	free(dbuf);
358 	return hash;
359 }
360 
361 static int
octdigit(char c)362 octdigit(char c)
363 {
364 
365 	return (isdigit((unsigned char)c) && c != '8' && c != '9');
366 }
367 
368 static void
inithash(void)369 inithash(void)
370 {
371 	char buf[BUFSIZ];
372 	FILE *mesgread = fopen(strings, "r");
373 
374 	if (mesgread == NULL)
375 		return;
376 	for (;;) {
377 		mesgpt = tellpt;
378 		if (fgetNUL(buf, sizeof buf, mesgread) == 0)
379 			break;
380 		(void)hashit(buf, 0);
381 	}
382 	(void)fclose(mesgread);
383 }
384 
385 static int
fgetNUL(char * obuf,int rmdr,FILE * file)386 fgetNUL(char *obuf, int rmdr, FILE *file)
387 {
388 	int c;
389 	char *buf = obuf;
390 
391 	c = 0;	/* XXXGCC -Wuninitialized */
392 
393 	while (--rmdr > 0 && (c = xgetc(file) != 0 && c != EOF))
394 		*buf++ = c;
395 	*buf++ = 0;
396 	return (feof(file) || ferror(file)) ? 0 : 1;
397 }
398 
399 static int
xgetc(FILE * file)400 xgetc(FILE *file)
401 {
402 
403 	tellpt++;
404 	return getc(file);
405 }
406 
407 
408 static off_t
hashit(const char * str,int new)409 hashit(const char *str, int new)
410 {
411 	int i;
412 	struct hash *hp, *hp0;
413 
414 	hp = hp0 = &bucket[lastchr(str) & 0177];
415 	while (hp->hnext) {
416 		hp = hp->hnext;
417 		i = istail(str, hp->hstr);
418 		if (i >= 0)
419 			return (hp->hpt + i);
420 	}
421 	if ((hp = calloc(1, sizeof (*hp))) == NULL)
422 		err(1, NULL);
423 	hp->hpt = mesgpt;
424 	if ((hp->hstr = strdup(str)) == NULL)
425 		err(1, NULL);
426 	mesgpt += strlen(hp->hstr) + 1;
427 	hp->hnext = hp0->hnext;
428 	hp->hnew = new;
429 	hp0->hnext = hp;
430 	return (hp->hpt);
431 }
432 
433 static void
flushsh(void)434 flushsh(void)
435 {
436 	int i;
437 	struct hash *hp;
438 	FILE *mesgwrit;
439 	int old = 0, new = 0;
440 
441 	for (i = 0; i < BUCKETS; i++)
442 		for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext)
443 			if (hp->hnew)
444 				new++;
445 			else
446 				old++;
447 	if (new == 0 && old != 0)
448 		return;
449 	mesgwrit = fopen(strings, old ? "r+" : "w");
450 	if (mesgwrit == NULL)
451 		err(1, "Cannot open `%s'", strings);
452 	for (i = 0; i < BUCKETS; i++)
453 		for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext) {
454 			found(hp->hnew, hp->hpt, hp->hstr);
455 			if (hp->hnew) {
456 				(void)fseek(mesgwrit, hp->hpt, 0);
457 				(void)fwrite(hp->hstr, strlen(hp->hstr) + 1, 1,
458 				    mesgwrit);
459 				if (ferror(mesgwrit))
460 					err(1, "Error writing `%s'", strings);
461 			}
462 		}
463 	if (fclose(mesgwrit) == EOF)
464 		err(1, "Error closing `%s'", strings);
465 }
466 
467 static void
found(int new,off_t off,const char * str)468 found(int new, off_t off, const char *str)
469 {
470 	if (vflg == 0)
471 		return;
472 	if (!new)
473 		(void)fprintf(stderr, "found at %d:", (int) off);
474 	else
475 		(void)fprintf(stderr, "new at %d:", (int) off);
476 	prstr(str);
477 	(void)fprintf(stderr, "\n");
478 }
479 
480 static void
prstr(const char * cp)481 prstr(const char *cp)
482 {
483 	int c;
484 
485 	while ((c = (*cp++ & 0377)) != '\0')
486 		if (c < ' ')
487 			(void)fprintf(stderr, "^%c", c + '`');
488 		else if (c == 0177)
489 			(void)fprintf(stderr, "^?");
490 		else if (c > 0200)
491 			(void)fprintf(stderr, "\\%03o", c);
492 		else
493 			(void)fprintf(stderr, "%c", c);
494 }
495 
496 static void
xsdotc(void)497 xsdotc(void)
498 {
499 	FILE *strf = fopen(strings, "r");
500 	FILE *xdotcf;
501 
502 	if (strf == NULL)
503 		err(1, "Cannot open `%s'", strings);
504 	xdotcf = fopen("xs.c", "w");
505 	if (xdotcf == NULL)
506 		err(1, "Cannot open `%s'", "xs.c");
507 	(void)fprintf(xdotcf, "char\t%s[] = {\n", array);
508 	for (;;) {
509 		int i, c;
510 
511 		for (i = 0; i < 8; i++) {
512 			c = getc(strf);
513 			if (ferror(strf)) {
514 				warn("Error reading `%s'", strings);
515 				onintr(1);
516 			}
517 			if (feof(strf)) {
518 				(void)fprintf(xdotcf, "\n");
519 				goto out;
520 			}
521 			(void)fprintf(xdotcf, "0x%02x,", c);
522 		}
523 		(void)fprintf(xdotcf, "\n");
524 	}
525 out:
526 	(void)fprintf(xdotcf, "};\n");
527 	(void)fclose(xdotcf);
528 	(void)fclose(strf);
529 }
530 
531 static char
lastchr(const char * cp)532 lastchr(const char *cp)
533 {
534 
535 	while (cp[0] && cp[1])
536 		cp++;
537 	return (*cp);
538 }
539 
540 static int
istail(const char * str,const char * of)541 istail(const char *str, const char *of)
542 {
543 	int d = strlen(of) - strlen(str);
544 
545 	if (d < 0 || strcmp(&of[d], str) != 0)
546 		return (-1);
547 	return (d);
548 }
549 
550 static void
onintr(int dummy)551 onintr(int dummy)
552 {
553 
554 	(void)signal(SIGINT, SIG_IGN);
555 	if (strings[0] == '/')
556 		(void)unlink(strings);
557 	(void)unlink("x.c");
558 	(void)unlink("xs.c");
559 	exit(dummy);
560 }
561 
562 static void
usage(void)563 usage(void)
564 {
565 
566 	(void)fprintf(stderr, "usage: %s [-cv] [-l array] [-] [<name> ...]\n",
567 	    getprogname());
568 	exit(1);
569 }
570