1 /* $NetBSD: xstr.c,v 1.25 2011/09/16 15:39:31 joerg Exp $ */ 2 3 /* 4 * Copyright (c) 1980, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 #ifndef lint 34 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\ 35 The Regents of the University of California. All rights reserved."); 36 #endif /* not lint */ 37 38 #ifndef lint 39 #if 0 40 static char sccsid[] = "@(#)xstr.c 8.1 (Berkeley) 6/9/93"; 41 #else 42 __RCSID("$NetBSD: xstr.c,v 1.25 2011/09/16 15:39:31 joerg Exp $"); 43 #endif 44 #endif /* not lint */ 45 46 #include <sys/param.h> 47 #include <signal.h> 48 #include <errno.h> 49 #include <unistd.h> 50 #include <stdio.h> 51 #include <ctype.h> 52 #include <string.h> 53 #include <stdlib.h> 54 #include <err.h> 55 #include "pathnames.h" 56 57 /* 58 * xstr - extract and hash strings in a C program 59 * 60 * Bill Joy UCB 61 * November, 1978 62 */ 63 64 static off_t hashit(const char *, int); 65 __dead static void onintr(int); 66 static off_t yankstr(char **); 67 static int octdigit(char); 68 static void inithash(void); 69 static int fgetNUL(char *, int, FILE *); 70 static int xgetc(FILE *); 71 static void flushsh(void); 72 static void found(int, off_t, const char *); 73 static void prstr(const char *); 74 static void xsdotc(void); 75 static char lastchr(const char *); 76 static int istail(const char *, const char *); 77 static void process(const char *); 78 __dead static void usage(void); 79 80 static off_t tellpt; 81 static off_t mesgpt; 82 static char stringtmpfile[MAXPATHLEN]; 83 static const char *strings = "strings"; 84 static const char *array = 0; 85 static int cflg; 86 static int vflg; 87 static int readstd; 88 static char linebuf[8192]; 89 90 #define BUCKETS 128 91 92 static struct hash { 93 off_t hpt; 94 char *hstr; 95 struct hash *hnext; 96 short hnew; 97 } bucket[BUCKETS]; 98 99 int 100 main(int argc, char *argv[]) 101 { 102 int c; 103 104 while ((c = getopt(argc, argv, "-cvl:")) != -1) 105 switch (c) { 106 case '-': 107 readstd++; 108 break; 109 case 'c': 110 cflg++; 111 break; 112 case 'v': 113 vflg++; 114 break; 115 case 'l': 116 array = optarg; 117 break; 118 default: 119 usage(); 120 } 121 argc -= optind; 122 argv += optind; 123 124 if (array == 0) 125 array = "xstr"; 126 127 if (signal(SIGINT, SIG_IGN) == SIG_DFL) 128 (void)signal(SIGINT, onintr); 129 if (cflg || (argc == 0 && !readstd)) 130 inithash(); 131 else { 132 int fd; 133 134 snprintf(stringtmpfile, sizeof(stringtmpfile), 135 "%s%s.XXXXXX", _PATH_TMP, "xstr"); 136 strings = stringtmpfile; 137 fd = mkstemp(stringtmpfile); 138 if (fd == -1) 139 err(1, "mkstemp failed"); 140 close(fd); 141 } 142 while (readstd || argc > 0) { 143 if (freopen("x.c", "w", stdout) == NULL) 144 err(1, "Cannot open `%s'", "x.c"); 145 if (!readstd && freopen(argv[0], "r", stdin) == NULL) 146 err(1, "Cannot open `%s'", argv[0]); 147 process("x.c"); 148 if (readstd == 0) 149 argc--, argv++; 150 else 151 readstd = 0; 152 }; 153 flushsh(); 154 if (cflg == 0) 155 xsdotc(); 156 if (strings[0] == '/') 157 (void)unlink(strings); 158 exit(0); 159 } 160 161 static void 162 process(const char *name) 163 { 164 char *cp; 165 int c; 166 int incomm = 0; 167 int inasm = 0; 168 int asmparnest = 0; 169 int ret; 170 171 printf("extern char\t%s[];\n", array); 172 for (;;) { 173 if (fgets(linebuf, sizeof linebuf, stdin) == NULL) { 174 if (ferror(stdin)) 175 err(1, "Error reading `%s'", name); 176 break; 177 } 178 if (linebuf[0] == '#') { 179 printf("%s", linebuf); 180 continue; 181 } 182 for (cp = linebuf; (c = *cp++);) 183 switch (c) { 184 185 case '"': 186 if (incomm || inasm) 187 goto def; 188 if ((ret = (int) yankstr(&cp)) == -1) 189 goto out; 190 printf("(&%s[%d])", array, ret); 191 break; 192 193 case '\'': 194 if (incomm || inasm) 195 goto def; 196 putchar(c); 197 if (*cp) 198 putchar(*cp++); 199 break; 200 201 case '/': 202 if (incomm || *cp != '*') 203 goto def; 204 incomm = 1; 205 cp++; 206 printf("/*"); 207 continue; 208 209 case '*': 210 if (incomm && *cp == '/') { 211 incomm = 0; 212 cp++; 213 printf("*/"); 214 continue; 215 } 216 goto def; 217 218 case '(': 219 if (!incomm && inasm) 220 asmparnest++; 221 goto def; 222 223 case ')': 224 if (!incomm && inasm && !--asmparnest) 225 inasm = 0; 226 goto def; 227 228 case '_': 229 if (incomm || inasm) 230 goto def; 231 if (!strncmp(cp, "_asm", 4)) { 232 cp += 4; 233 printf("__asm"); 234 if (!strncmp(cp, "__", 2)) { 235 cp += 2; 236 printf("__"); 237 } 238 if (isalnum((unsigned char)*cp) || 239 *cp == '_') 240 goto def; 241 asmparnest = 0; 242 inasm = 1; 243 } else 244 goto def; 245 break; 246 def: 247 default: 248 putchar(c); 249 break; 250 } 251 } 252 out: 253 if (ferror(stdout)) { 254 warn("Error reading `%s'", "x.c"); 255 onintr(1); 256 } 257 } 258 259 static off_t 260 yankstr(char **cpp) 261 { 262 char *cp = *cpp; 263 int c, ch; 264 char *dbuf, *dp, *edp; 265 const char *tp; 266 off_t hash; 267 size_t bsiz = BUFSIZ; 268 269 if ((dp = dbuf = malloc(bsiz)) == NULL) 270 err(1, "malloc"); 271 edp = dbuf + bsiz; 272 273 while ((c = *cp++) != '\0') { 274 switch (c) { 275 276 case '"': 277 /* Look for a concatenated string */ 278 for (;;) { 279 while (isspace((unsigned char)*cp)) 280 cp++; 281 if (*cp == '\0') { 282 if (fgets(linebuf, 283 sizeof linebuf, stdin) == NULL) { 284 if (ferror(stdin)) 285 err(1, 286 "Error reading `x.c'"); 287 goto out; 288 } 289 cp = linebuf; 290 } else { 291 if (*cp == '"') { 292 cp++; 293 if (*cp == '"') { 294 cp++; 295 continue; 296 } else { 297 c = *cp++; 298 goto gotc; 299 } 300 } else { 301 cp++; 302 goto out; 303 } 304 } 305 } 306 /*NOTREACHED*/ 307 case '\\': 308 c = *cp++; 309 if (c == 0) 310 break; 311 if (c == '\n') { 312 if (fgets(linebuf, sizeof linebuf, stdin) 313 == NULL) { 314 if (ferror(stdin)) 315 err(1, "Error reading `x.c'"); 316 return(-1); 317 } 318 cp = linebuf; 319 continue; 320 } 321 for (tp = "b\bt\tr\rn\nf\f\\\\\"\""; (ch = *tp++); tp++) 322 if (c == ch) { 323 c = *tp; 324 goto gotc; 325 } 326 if (!octdigit(c)) { 327 *dp++ = '\\'; 328 break; 329 } 330 c -= '0'; 331 if (!octdigit(*cp)) 332 break; 333 c <<= 3, c += *cp++ - '0'; 334 if (!octdigit(*cp)) 335 break; 336 c <<= 3, c += *cp++ - '0'; 337 break; 338 } 339 gotc: 340 if (dp >= edp - 1) { 341 char *nbuf; 342 bsiz += BUFSIZ; 343 if ((nbuf = realloc(dbuf, bsiz)) == NULL) { 344 free(dbuf); 345 err(1, "realloc"); 346 } 347 dp = nbuf + (dp - dbuf); 348 edp = nbuf + bsiz; 349 dbuf = nbuf; 350 } 351 *dp++ = c; 352 } 353 out: 354 *cpp = --cp; 355 *dp = '\0'; 356 hash = hashit(dbuf, 1); 357 free(dbuf); 358 return hash; 359 } 360 361 static int 362 octdigit(char c) 363 { 364 365 return (isdigit((unsigned char)c) && c != '8' && c != '9'); 366 } 367 368 static void 369 inithash(void) 370 { 371 char buf[BUFSIZ]; 372 FILE *mesgread = fopen(strings, "r"); 373 374 if (mesgread == NULL) 375 return; 376 for (;;) { 377 mesgpt = tellpt; 378 if (fgetNUL(buf, sizeof buf, mesgread) == 0) 379 break; 380 (void)hashit(buf, 0); 381 } 382 (void)fclose(mesgread); 383 } 384 385 static int 386 fgetNUL(char *obuf, int rmdr, FILE *file) 387 { 388 int c; 389 char *buf = obuf; 390 391 c = 0; /* XXXGCC -Wuninitialized */ 392 393 while (--rmdr > 0 && (c = xgetc(file) != 0 && c != EOF)) 394 *buf++ = c; 395 *buf++ = 0; 396 return (feof(file) || ferror(file)) ? 0 : 1; 397 } 398 399 static int 400 xgetc(FILE *file) 401 { 402 403 tellpt++; 404 return getc(file); 405 } 406 407 408 static off_t 409 hashit(const char *str, int new) 410 { 411 int i; 412 struct hash *hp, *hp0; 413 414 hp = hp0 = &bucket[lastchr(str) & 0177]; 415 while (hp->hnext) { 416 hp = hp->hnext; 417 i = istail(str, hp->hstr); 418 if (i >= 0) 419 return (hp->hpt + i); 420 } 421 if ((hp = calloc(1, sizeof (*hp))) == NULL) 422 err(1, NULL); 423 hp->hpt = mesgpt; 424 if ((hp->hstr = strdup(str)) == NULL) 425 err(1, NULL); 426 mesgpt += strlen(hp->hstr) + 1; 427 hp->hnext = hp0->hnext; 428 hp->hnew = new; 429 hp0->hnext = hp; 430 return (hp->hpt); 431 } 432 433 static void 434 flushsh(void) 435 { 436 int i; 437 struct hash *hp; 438 FILE *mesgwrit; 439 int old = 0, new = 0; 440 441 for (i = 0; i < BUCKETS; i++) 442 for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext) 443 if (hp->hnew) 444 new++; 445 else 446 old++; 447 if (new == 0 && old != 0) 448 return; 449 mesgwrit = fopen(strings, old ? "r+" : "w"); 450 if (mesgwrit == NULL) 451 err(1, "Cannot open `%s'", strings); 452 for (i = 0; i < BUCKETS; i++) 453 for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext) { 454 found(hp->hnew, hp->hpt, hp->hstr); 455 if (hp->hnew) { 456 (void)fseek(mesgwrit, hp->hpt, 0); 457 (void)fwrite(hp->hstr, strlen(hp->hstr) + 1, 1, 458 mesgwrit); 459 if (ferror(mesgwrit)) 460 err(1, "Error writing `%s'", strings); 461 } 462 } 463 if (fclose(mesgwrit) == EOF) 464 err(1, "Error closing `%s'", strings); 465 } 466 467 static void 468 found(int new, off_t off, const char *str) 469 { 470 if (vflg == 0) 471 return; 472 if (!new) 473 (void)fprintf(stderr, "found at %d:", (int) off); 474 else 475 (void)fprintf(stderr, "new at %d:", (int) off); 476 prstr(str); 477 (void)fprintf(stderr, "\n"); 478 } 479 480 static void 481 prstr(const char *cp) 482 { 483 int c; 484 485 while ((c = (*cp++ & 0377)) != '\0') 486 if (c < ' ') 487 (void)fprintf(stderr, "^%c", c + '`'); 488 else if (c == 0177) 489 (void)fprintf(stderr, "^?"); 490 else if (c > 0200) 491 (void)fprintf(stderr, "\\%03o", c); 492 else 493 (void)fprintf(stderr, "%c", c); 494 } 495 496 static void 497 xsdotc(void) 498 { 499 FILE *strf = fopen(strings, "r"); 500 FILE *xdotcf; 501 502 if (strf == NULL) 503 err(1, "Cannot open `%s'", strings); 504 xdotcf = fopen("xs.c", "w"); 505 if (xdotcf == NULL) 506 err(1, "Cannot open `%s'", "xs.c"); 507 (void)fprintf(xdotcf, "char\t%s[] = {\n", array); 508 for (;;) { 509 int i, c; 510 511 for (i = 0; i < 8; i++) { 512 c = getc(strf); 513 if (ferror(strf)) { 514 warn("Error reading `%s'", strings); 515 onintr(1); 516 } 517 if (feof(strf)) { 518 (void)fprintf(xdotcf, "\n"); 519 goto out; 520 } 521 (void)fprintf(xdotcf, "0x%02x,", c); 522 } 523 (void)fprintf(xdotcf, "\n"); 524 } 525 out: 526 (void)fprintf(xdotcf, "};\n"); 527 (void)fclose(xdotcf); 528 (void)fclose(strf); 529 } 530 531 static char 532 lastchr(const char *cp) 533 { 534 535 while (cp[0] && cp[1]) 536 cp++; 537 return (*cp); 538 } 539 540 static int 541 istail(const char *str, const char *of) 542 { 543 int d = strlen(of) - strlen(str); 544 545 if (d < 0 || strcmp(&of[d], str) != 0) 546 return (-1); 547 return (d); 548 } 549 550 static void 551 onintr(int dummy) 552 { 553 554 (void)signal(SIGINT, SIG_IGN); 555 if (strings[0] == '/') 556 (void)unlink(strings); 557 (void)unlink("x.c"); 558 (void)unlink("xs.c"); 559 exit(dummy); 560 } 561 562 static void 563 usage(void) 564 { 565 566 (void)fprintf(stderr, "usage: %s [-cv] [-l array] [-] [<name> ...]\n", 567 getprogname()); 568 exit(1); 569 } 570