1 /* $NetBSD: strfile.c,v 1.34 2011/08/31 16:24:55 plunky Exp $ */ 2 3 /*- 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Ken Arnold. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #ifdef __NetBSD__ 36 #include <sys/cdefs.h> 37 #ifndef lint 38 __COPYRIGHT("@(#) Copyright (c) 1989, 1993\ 39 The Regents of the University of California. All rights reserved."); 40 #endif /* not lint */ 41 42 #ifndef lint 43 #if 0 44 static char sccsid[] = "@(#)strfile.c 8.1 (Berkeley) 5/31/93"; 45 #else 46 __RCSID("$NetBSD: strfile.c,v 1.34 2011/08/31 16:24:55 plunky Exp $"); 47 #endif 48 #endif /* not lint */ 49 #endif /* __NetBSD__ */ 50 51 /* n.b.: this file is used at build-time - i.e. during build.sh. */ 52 53 #include <sys/types.h> 54 #include <sys/param.h> 55 #include <ctype.h> 56 #include <stdio.h> 57 #include <stdlib.h> 58 #include <string.h> 59 #include <time.h> 60 #include <unistd.h> 61 #include <inttypes.h> 62 #include <err.h> 63 64 #include "strfile.h" 65 66 #ifndef MAXPATHLEN 67 #define MAXPATHLEN 1024 68 #endif /* MAXPATHLEN */ 69 70 #if defined(__NetBSD__) || defined(__dead) 71 #define NORETURN __dead 72 #elif defined __GNUC__ 73 #define NORETURN __attribute__((__noreturn__)) 74 #else 75 #define NORETURN 76 #endif 77 78 /* 79 * This program takes a file composed of strings separated by 80 * lines starting with two consecutive delimiting character (default 81 * character is '%') and creates another file which consists of a table 82 * describing the file (structure from "strfile.h"), a table of seek 83 * pointers to the start of the strings, and the strings, each terminated 84 * by a null byte. Usage: 85 * 86 * % strfile [-iorsx] [ -cC ] sourcefile [ datafile ] 87 * 88 * c - Change delimiting character from '%' to 'C' 89 * s - Silent. Give no summary of data processed at the end of 90 * the run. 91 * o - order the strings in alphabetic order 92 * i - if ordering, ignore case 93 * r - randomize the order of the strings 94 * x - set rotated bit 95 * 96 * Ken Arnold Sept. 7, 1978 -- 97 * 98 * Added ordering options. 99 */ 100 101 # define TRUE 1 102 # define FALSE 0 103 104 # define STORING_PTRS (Oflag || Rflag) 105 # define CHUNKSIZE 512 106 107 # define ALLOC(ptr,sz) do { \ 108 if (ptr == NULL) \ 109 ptr = malloc(CHUNKSIZE * sizeof *ptr); \ 110 else if (((sz) + 1) % CHUNKSIZE == 0) \ 111 ptr = realloc(ptr, ((sz) + CHUNKSIZE) * sizeof *ptr); \ 112 if (ptr == NULL) \ 113 err(1, "out of space"); \ 114 } while (0) 115 116 typedef struct { 117 char first; 118 off_t pos; 119 } STR; 120 121 static char *Infile = NULL; /* input file name */ 122 static char Outfile[MAXPATHLEN] = ""; /* output file name */ 123 static char Delimch = '%'; /* delimiting character */ 124 125 static int Sflag = FALSE; /* silent run flag */ 126 static int Oflag = FALSE; /* ordering flag */ 127 static int Iflag = FALSE; /* ignore case flag */ 128 static int Rflag = FALSE; /* randomize order flag */ 129 static int Xflag = FALSE; /* set rotated bit */ 130 static long Num_pts = 0; /* number of pointers/strings */ 131 132 static off_t *Seekpts; 133 134 static FILE *Sort_1, *Sort_2; /* pointers for sorting */ 135 136 static STRFILE Tbl; /* statistics table */ 137 138 static STR *Firstch; /* first chars of each string */ 139 140 141 static uint32_t h2nl(uint32_t h); 142 static void getargs(int argc, char **argv); 143 static void usage(void) NORETURN; 144 static void add_offset(FILE *fp, off_t off); 145 static void do_order(void); 146 static int cmp_str(const void *vp1, const void *vp2); 147 static void randomize(void); 148 static void fwrite_be_offt(off_t off, FILE *f); 149 150 151 /* 152 * main: 153 * Drive the sucker. There are two main modes -- either we store 154 * the seek pointers, if the table is to be sorted or randomized, 155 * or we write the pointer directly to the file, if we are to stay 156 * in file order. If the former, we allocate and re-allocate in 157 * CHUNKSIZE blocks; if the latter, we just write each pointer, 158 * and then seek back to the beginning to write in the table. 159 */ 160 int 161 main(int ac, char **av) 162 { 163 char *sp, dc; 164 FILE *inf, *outf; 165 off_t last_off, length, pos, *p; 166 int first, cnt; 167 char *nsp; 168 STR *fp; 169 static char string[257]; 170 171 /* sanity test */ 172 if (sizeof(uint32_t) != 4) 173 errx(1, "sizeof(uint32_t) != 4"); 174 175 getargs(ac, av); /* evalute arguments */ 176 dc = Delimch; 177 if ((inf = fopen(Infile, "r")) == NULL) 178 err(1, "open `%s'", Infile); 179 180 if ((outf = fopen(Outfile, "w")) == NULL) 181 err(1, "open `%s'", Outfile); 182 if (!STORING_PTRS) 183 (void) fseek(outf, sizeof Tbl, SEEK_SET); 184 185 /* 186 * Write the strings onto the file 187 */ 188 189 Tbl.str_longlen = 0; 190 Tbl.str_shortlen = (unsigned int) 0x7fffffff; 191 Tbl.str_delim = dc; 192 Tbl.str_version = VERSION; 193 first = Oflag; 194 add_offset(outf, ftell(inf)); 195 last_off = 0; 196 do { 197 sp = fgets(string, 256, inf); 198 if (sp == NULL || (sp[0] == dc && sp[1] == '\n')) { 199 pos = ftell(inf); 200 length = pos - last_off - (sp ? strlen(sp) : 0); 201 last_off = pos; 202 if (!length) 203 continue; 204 add_offset(outf, pos); 205 if ((off_t)Tbl.str_longlen < length) 206 Tbl.str_longlen = length; 207 if ((off_t)Tbl.str_shortlen > length) 208 Tbl.str_shortlen = length; 209 first = Oflag; 210 } 211 else if (first) { 212 for (nsp = sp; !isalnum((unsigned char)*nsp); nsp++) 213 continue; 214 ALLOC(Firstch, Num_pts); 215 fp = &Firstch[Num_pts - 1]; 216 if (Iflag && isupper((unsigned char)*nsp)) 217 fp->first = tolower((unsigned char)*nsp); 218 else 219 fp->first = *nsp; 220 fp->pos = Seekpts[Num_pts - 1]; 221 first = FALSE; 222 } 223 } while (sp != NULL); 224 225 /* 226 * write the tables in 227 */ 228 229 (void) fclose(inf); 230 231 if (Oflag) 232 do_order(); 233 else if (Rflag) 234 randomize(); 235 236 if (Xflag) 237 Tbl.str_flags |= STR_ROTATED; 238 239 if (!Sflag) { 240 printf("\"%s\" created\n", Outfile); 241 if (Num_pts == 2) 242 puts("There was 1 string"); 243 else 244 printf("There were %d strings\n", (int)(Num_pts - 1)); 245 printf("Longest string: %lu byte%s\n", (unsigned long)Tbl.str_longlen, 246 Tbl.str_longlen == 1 ? "" : "s"); 247 printf("Shortest string: %lu byte%s\n", (unsigned long)Tbl.str_shortlen, 248 Tbl.str_shortlen == 1 ? "" : "s"); 249 } 250 251 (void) fseek(outf, (off_t) 0, SEEK_SET); 252 Tbl.str_version = h2nl(Tbl.str_version); 253 Tbl.str_numstr = h2nl(Num_pts - 1); 254 Tbl.str_longlen = h2nl(Tbl.str_longlen); 255 Tbl.str_shortlen = h2nl(Tbl.str_shortlen); 256 Tbl.str_flags = h2nl(Tbl.str_flags); 257 (void) fwrite((char *) &Tbl, sizeof Tbl, 1, outf); 258 if (STORING_PTRS) { 259 for (p = Seekpts, cnt = Num_pts; cnt--; ++p) 260 fwrite_be_offt(*p, outf); 261 } 262 fflush(outf); 263 if (ferror(outf)) 264 err(1, "fwrite %s", Outfile); 265 (void) fclose(outf); 266 exit(0); 267 } 268 269 /* 270 * This routine evaluates arguments from the command line 271 */ 272 static void 273 getargs(int argc, char **argv) 274 { 275 int ch; 276 extern int optind; 277 extern char *optarg; 278 279 while ((ch = getopt(argc, argv, "c:iorsx")) != -1) 280 switch(ch) { 281 case 'c': /* new delimiting char */ 282 Delimch = *optarg; 283 if (!isascii(Delimch)) { 284 printf("bad delimiting character: '\\%o\n'", 285 Delimch); 286 } 287 break; 288 case 'i': /* ignore case in ordering */ 289 Iflag++; 290 break; 291 case 'o': /* order strings */ 292 Oflag++; 293 break; 294 case 'r': /* randomize pointers */ 295 Rflag++; 296 break; 297 case 's': /* silent */ 298 Sflag++; 299 break; 300 case 'x': /* set the rotated bit */ 301 Xflag++; 302 break; 303 case '?': 304 default: 305 usage(); 306 } 307 argv += optind; 308 309 if (*argv) { 310 Infile = *argv; 311 if (*++argv) 312 (void) strcpy(Outfile, *argv); 313 } 314 if (!Infile) { 315 puts("No input file name"); 316 usage(); 317 } 318 if (*Outfile == '\0') { 319 (void) strcpy(Outfile, Infile); 320 (void) strcat(Outfile, ".dat"); 321 } 322 } 323 324 static void 325 usage(void) 326 { 327 (void) fprintf(stderr, 328 "Usage: %s [-iorsx] [-c char] sourcefile [datafile]\n", 329 getprogname()); 330 exit(1); 331 } 332 333 /* 334 * add_offset: 335 * Add an offset to the list, or write it out, as appropriate. 336 */ 337 static void 338 add_offset(FILE *fp, off_t off) 339 { 340 341 if (!STORING_PTRS) { 342 fwrite_be_offt(off, fp); 343 } else { 344 ALLOC(Seekpts, Num_pts + 1); 345 Seekpts[Num_pts] = off; 346 } 347 Num_pts++; 348 } 349 350 /* 351 * do_order: 352 * Order the strings alphabetically (possibly ignoring case). 353 */ 354 static void 355 do_order(void) 356 { 357 int i; 358 off_t *lp; 359 STR *fp; 360 361 Sort_1 = fopen(Infile, "r"); 362 Sort_2 = fopen(Infile, "r"); 363 qsort((char *) Firstch, (int) Tbl.str_numstr, sizeof *Firstch, cmp_str); 364 i = Tbl.str_numstr; 365 lp = Seekpts; 366 fp = Firstch; 367 while (i--) 368 *lp++ = fp++->pos; 369 (void) fclose(Sort_1); 370 (void) fclose(Sort_2); 371 Tbl.str_flags |= STR_ORDERED; 372 } 373 374 static int 375 cmp_str(const void *vp1, const void *vp2) 376 { 377 const STR *p1, *p2; 378 int c1, c2; 379 int n1, n2; 380 381 p1 = (const STR *)vp1; 382 p2 = (const STR *)vp2; 383 384 # define SET_N(nf,ch) (nf = (ch == '\n')) 385 # define IS_END(ch,nf) (ch == Delimch && nf) 386 387 c1 = p1->first; 388 c2 = p2->first; 389 if (c1 != c2) 390 return c1 - c2; 391 392 (void) fseek(Sort_1, p1->pos, SEEK_SET); 393 (void) fseek(Sort_2, p2->pos, SEEK_SET); 394 395 n1 = FALSE; 396 n2 = FALSE; 397 while (!isalnum(c1 = getc(Sort_1)) && c1 != '\0') 398 SET_N(n1, c1); 399 while (!isalnum(c2 = getc(Sort_2)) && c2 != '\0') 400 SET_N(n2, c2); 401 402 while (!IS_END(c1, n1) && !IS_END(c2, n2)) { 403 if (Iflag) { 404 if (isupper(c1)) 405 c1 = tolower(c1); 406 if (isupper(c2)) 407 c2 = tolower(c2); 408 } 409 if (c1 != c2) 410 return c1 - c2; 411 SET_N(n1, c1); 412 SET_N(n2, c2); 413 c1 = getc(Sort_1); 414 c2 = getc(Sort_2); 415 } 416 if (IS_END(c1, n1)) 417 c1 = 0; 418 if (IS_END(c2, n2)) 419 c2 = 0; 420 return c1 - c2; 421 } 422 423 /* 424 * randomize: 425 * Randomize the order of the string table. We must be careful 426 * not to randomize across delimiter boundaries. All 427 * randomization is done within each block. 428 */ 429 static void 430 randomize(void) 431 { 432 int cnt, i; 433 off_t tmp; 434 off_t *sp; 435 436 srandom((int)(time(NULL) + getpid())); 437 438 Tbl.str_flags |= STR_RANDOM; 439 cnt = Tbl.str_numstr; 440 441 /* 442 * move things around randomly 443 */ 444 445 for (sp = Seekpts; cnt > 0; cnt--, sp++) { 446 i = random() % cnt; 447 tmp = sp[0]; 448 sp[0] = sp[i]; 449 sp[i] = tmp; 450 } 451 } 452 453 /* 454 * fwrite_be_offt: 455 * Write out the off paramater as a 64 bit big endian number 456 */ 457 458 static void 459 fwrite_be_offt(off_t off, FILE *f) 460 { 461 int i; 462 unsigned char c[8]; 463 464 for (i = 7; i >= 0; i--) { 465 c[i] = off & 0xff; 466 off >>= 8; 467 } 468 fwrite(c, sizeof(c), 1, f); 469 } 470 471 static uint32_t 472 h2nl(uint32_t h) 473 { 474 unsigned char c[4]; 475 uint32_t rv; 476 477 c[0] = (h >> 24) & 0xff; 478 c[1] = (h >> 16) & 0xff; 479 c[2] = (h >> 8) & 0xff; 480 c[3] = (h >> 0) & 0xff; 481 memcpy(&rv, c, sizeof rv); 482 483 return (rv); 484 } 485