1 /* $OpenBSD: strfile.c,v 1.28 2016/03/07 12:07:56 mestre Exp $ */ 2 /* $NetBSD: strfile.c,v 1.4 1995/04/24 12:23:09 cgd Exp $ */ 3 4 /*- 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Ken Arnold. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <ctype.h> 37 #include <err.h> 38 #include <limits.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <string.h> 42 #include <unistd.h> 43 44 #include "strfile.h" 45 46 /* 47 * This program takes a file composed of strings separated by 48 * lines starting with two consecutive delimiting character (default 49 * character is '%') and creates another file which consists of a table 50 * describing the file (structure from "strfile.h"), a table of seek 51 * pointers to the start of the strings, and the strings, each terminated 52 * by a null byte. Usage: 53 * 54 * % strfile [-iorsx] [ -cC ] sourcefile [ datafile ] 55 * 56 * c - Change delimiting character from '%' to 'C' 57 * s - Silent. Give no summary of data processed at the end of 58 * the run. 59 * o - order the strings in alphabetic order 60 * i - if ordering, ignore case 61 * r - randomize the order of the strings 62 * x - set rotated bit 63 * 64 * Ken Arnold Sept. 7, 1978 -- 65 * 66 * Added ordering options. 67 */ 68 69 #define TRUE 1 70 #define FALSE 0 71 72 #define STORING_PTRS (Oflag || Rflag) 73 #define CHUNKSIZE 512 74 75 # define ALLOC(ptr,sz) do { \ 76 if (ptr == NULL) \ 77 ptr = calloc(CHUNKSIZE, sizeof *ptr); \ 78 else if (((sz) + 1) % CHUNKSIZE == 0) \ 79 ptr = reallocarray(ptr, \ 80 (sz) + CHUNKSIZE, \ 81 sizeof(*ptr)); \ 82 if (ptr == NULL) \ 83 err(1, NULL); \ 84 } while (0) 85 86 typedef struct { 87 char first; 88 int32_t pos; 89 } STR; 90 91 char *Infile = NULL, /* input file name */ 92 Outfile[PATH_MAX] = "", /* output file name */ 93 Delimch = '%'; /* delimiting character */ 94 95 int Sflag = FALSE; /* silent run flag */ 96 int Oflag = FALSE; /* ordering flag */ 97 int Iflag = FALSE; /* ignore case flag */ 98 int Rflag = FALSE; /* randomize order flag */ 99 int Xflag = FALSE; /* set rotated bit */ 100 long Num_pts = 0; /* number of pointers/strings */ 101 102 int32_t *Seekpts; 103 104 FILE *Sort_1, *Sort_2; /* pointers for sorting */ 105 106 STRFILE Tbl; /* statistics table */ 107 108 STR *Firstch; /* first chars of each string */ 109 110 111 void add_offset(FILE *, int32_t); 112 int cmp_str(const void *, const void *); 113 void do_order(void); 114 void getargs(int, char **); 115 void randomize(void); 116 char *unctrl(char); 117 __dead void usage(void); 118 119 /* 120 * main: 121 * Drive the sucker. There are two main modes -- either we store 122 * the seek pointers, if the table is to be sorted or randomized, 123 * or we write the pointer directly to the file, if we are to stay 124 * in file order. If the former, we allocate and re-allocate in 125 * CHUNKSIZE blocks; if the latter, we just write each pointer, 126 * and then seek back to the beginning to write in the table. 127 */ 128 int 129 main(int ac, char *av[]) 130 { 131 char *sp, dc; 132 FILE *inf, *outf; 133 int32_t last_off, length, pos; 134 int32_t *p; 135 int first, cnt; 136 char *nsp; 137 STR *fp; 138 static char string[257]; 139 140 if (pledge("stdio rpath wpath cpath", NULL) == -1) 141 err(1, "pledge"); 142 143 getargs(ac, av); /* evalute arguments */ 144 dc = Delimch; 145 if ((inf = fopen(Infile, "r")) == NULL) 146 err(1, "%s", Infile); 147 148 if ((outf = fopen(Outfile, "w")) == NULL) 149 err(1, "%s", Outfile); 150 151 if (pledge("stdio", NULL) == -1) 152 err(1, "pledge"); 153 154 if (!STORING_PTRS) 155 (void) fseek(outf, sizeof Tbl, SEEK_SET); 156 157 /* 158 * Write the strings onto the file 159 */ 160 161 Tbl.str_longlen = 0; 162 Tbl.str_shortlen = (unsigned int) 0xffffffff; 163 Tbl.str_delim = dc; 164 Tbl.str_version = VERSION; 165 first = Oflag; 166 add_offset(outf, ftell(inf)); 167 last_off = 0; 168 do { 169 sp = fgets(string, sizeof(string), inf); 170 if (sp == NULL || (sp[0] == dc && sp[1] == '\n')) { 171 pos = ftell(inf); 172 length = pos - last_off - (sp ? strlen(sp) : 0); 173 last_off = pos; 174 if (!length) 175 continue; 176 add_offset(outf, pos); 177 if (Tbl.str_longlen < (u_int32_t)length) 178 Tbl.str_longlen = length; 179 if (Tbl.str_shortlen > (u_int32_t)length) 180 Tbl.str_shortlen = length; 181 first = Oflag; 182 } else if (first) { 183 for (nsp = sp; !isalnum((unsigned char)*nsp); nsp++) 184 continue; 185 ALLOC(Firstch, Num_pts); 186 fp = &Firstch[Num_pts - 1]; 187 if (Iflag && isupper((unsigned char)*nsp)) 188 fp->first = tolower((unsigned char)*nsp); 189 else 190 fp->first = *nsp; 191 fp->pos = Seekpts[Num_pts - 1]; 192 first = FALSE; 193 } 194 } while (sp != NULL); 195 196 /* 197 * write the tables in 198 */ 199 200 (void) fclose(inf); 201 Tbl.str_numstr = Num_pts - 1; 202 if (Tbl.str_numstr == 0) 203 Tbl.str_shortlen = 0; 204 205 if (Oflag) 206 do_order(); 207 else if (Rflag) 208 randomize(); 209 210 if (Xflag) 211 Tbl.str_flags |= STR_ROTATED; 212 213 if (!Sflag) { 214 printf("\"%s\" created\n", Outfile); 215 if (Tbl.str_numstr == 1) 216 puts("There was 1 string"); 217 else 218 printf("There were %u strings\n", Tbl.str_numstr); 219 printf("Longest string: %lu byte%s\n", 220 (unsigned long) Tbl.str_longlen, 221 Tbl.str_longlen == 1 ? "" : "s"); 222 printf("Shortest string: %lu byte%s\n", 223 (unsigned long) Tbl.str_shortlen, 224 Tbl.str_shortlen == 1 ? "" : "s"); 225 } 226 227 (void) fseek(outf, 0, SEEK_SET); 228 Tbl.str_version = htonl(Tbl.str_version); 229 Tbl.str_numstr = htonl(Tbl.str_numstr); 230 Tbl.str_longlen = htonl(Tbl.str_longlen); 231 Tbl.str_shortlen = htonl(Tbl.str_shortlen); 232 Tbl.str_flags = htonl(Tbl.str_flags); 233 (void) fwrite(&Tbl.str_version, sizeof(Tbl.str_version), 1, outf); 234 (void) fwrite(&Tbl.str_numstr, sizeof(Tbl.str_numstr), 1, outf); 235 (void) fwrite(&Tbl.str_longlen, sizeof(Tbl.str_longlen), 1, outf); 236 (void) fwrite(&Tbl.str_shortlen, sizeof(Tbl.str_shortlen), 1, outf); 237 (void) fwrite(&Tbl.str_flags, sizeof(Tbl.str_flags), 1, outf); 238 (void) fwrite( Tbl.stuff, sizeof(Tbl.stuff), 1, outf); 239 if (STORING_PTRS) { 240 for (p = Seekpts, cnt = Num_pts; cnt--; ++p) { 241 *p = htonl(*p); 242 (void) fwrite(p, sizeof(*p), 1, outf); 243 } 244 } 245 if (fclose(outf)) 246 err(1, "fclose `%s'", Outfile); 247 return 0; 248 } 249 250 /* 251 * This routine evaluates arguments from the command line 252 */ 253 void 254 getargs(int argc, char *argv[]) 255 { 256 extern char *optarg; 257 extern int optind; 258 int ch; 259 260 while ((ch = getopt(argc, argv, "c:hiorsx")) != -1) { 261 switch(ch) { 262 case 'c': /* new delimiting char */ 263 Delimch = *optarg; 264 if (!isascii((unsigned char)Delimch)) { 265 printf("bad delimiting character: '\\%o\n'", 266 Delimch); 267 } 268 break; 269 case 'i': /* ignore case in ordering */ 270 Iflag++; 271 break; 272 case 'o': /* order strings */ 273 Oflag++; 274 break; 275 case 'r': /* randomize pointers */ 276 Rflag++; 277 break; 278 case 's': /* silent */ 279 Sflag++; 280 break; 281 case 'x': /* set the rotated bit */ 282 Xflag++; 283 break; 284 case 'h': 285 default: 286 usage(); 287 } 288 } 289 argv += optind; 290 291 if (*argv) { 292 Infile = *argv; 293 if (*++argv) 294 (void) strlcpy(Outfile, *argv, sizeof Outfile); 295 } 296 if (!Infile) { 297 puts("No input file name"); 298 usage(); 299 } 300 if (*Outfile == '\0') { 301 (void) strlcpy(Outfile, Infile, sizeof(Outfile)); 302 if (strlcat(Outfile, ".dat", sizeof(Outfile)) >= sizeof(Outfile)) 303 errx(1, "`%s': name too long", Infile); 304 } 305 } 306 307 void 308 usage(void) 309 { 310 (void) fprintf(stderr, 311 "%s [-iorsx] [-c char] sourcefile [datafile]\n", getprogname()); 312 exit(1); 313 } 314 315 /* 316 * add_offset: 317 * Add an offset to the list, or write it out, as appropriate. 318 */ 319 void 320 add_offset(FILE *fp, int32_t off) 321 { 322 int32_t net; 323 324 if (!STORING_PTRS) { 325 net = htonl(off); 326 fwrite(&net, 1, sizeof net, fp); 327 } else { 328 ALLOC(Seekpts, Num_pts + 1); 329 Seekpts[Num_pts] = off; 330 } 331 Num_pts++; 332 } 333 334 /* 335 * do_order: 336 * Order the strings alphabetically (possibly ignoring case). 337 */ 338 void 339 do_order(void) 340 { 341 int i; 342 int32_t *lp; 343 STR *fp; 344 345 Sort_1 = fopen(Infile, "r"); 346 Sort_2 = fopen(Infile, "r"); 347 qsort((char *) Firstch, (int) Tbl.str_numstr, sizeof *Firstch, cmp_str); 348 i = Tbl.str_numstr; 349 lp = Seekpts; 350 fp = Firstch; 351 while (i--) 352 *lp++ = fp++->pos; 353 (void) fclose(Sort_1); 354 (void) fclose(Sort_2); 355 Tbl.str_flags |= STR_ORDERED; 356 } 357 358 /* 359 * cmp_str: 360 * Compare two strings in the file 361 */ 362 char * 363 unctrl(char c) 364 { 365 static char buf[3]; 366 367 if (isprint((unsigned char)c)) { 368 buf[0] = c; 369 buf[1] = '\0'; 370 } else if (c == 0177) { 371 buf[0] = '^'; 372 buf[1] = '?'; 373 } else { 374 buf[0] = '^'; 375 buf[1] = c + 'A' - 1; 376 } 377 return buf; 378 } 379 380 int 381 cmp_str(const void *p1, const void *p2) 382 { 383 int c1, c2; 384 int n1, n2; 385 386 # define SET_N(nf,ch) (nf = (ch == '\n')) 387 # define IS_END(ch,nf) (ch == Delimch && nf) 388 389 c1 = ((STR *)p1)->first; 390 c2 = ((STR *)p2)->first; 391 if (c1 != c2) 392 return c1 - c2; 393 394 (void) fseek(Sort_1, ((STR *)p1)->pos, SEEK_SET); 395 (void) fseek(Sort_2, ((STR *)p2)->pos, SEEK_SET); 396 397 n1 = FALSE; 398 n2 = FALSE; 399 while (!isalnum(c1 = getc(Sort_1)) && c1 != '\0') 400 SET_N(n1, c1); 401 while (!isalnum(c2 = getc(Sort_2)) && c2 != '\0') 402 SET_N(n2, c2); 403 404 while (!IS_END(c1, n1) && !IS_END(c2, n2)) { 405 if (Iflag) { 406 if (isupper(c1)) 407 c1 = tolower(c1); 408 if (isupper(c2)) 409 c2 = tolower(c2); 410 } 411 if (c1 != c2) 412 return c1 - c2; 413 SET_N(n1, c1); 414 SET_N(n2, c2); 415 c1 = getc(Sort_1); 416 c2 = getc(Sort_2); 417 } 418 if (IS_END(c1, n1)) 419 c1 = 0; 420 if (IS_END(c2, n2)) 421 c2 = 0; 422 return c1 - c2; 423 } 424 425 /* 426 * randomize: 427 * Randomize the order of the string table. We must be careful 428 * not to randomize across delimiter boundaries. All 429 * randomization is done within each block. 430 */ 431 void 432 randomize(void) 433 { 434 int cnt, i; 435 int32_t tmp; 436 int32_t *sp; 437 438 Tbl.str_flags |= STR_RANDOM; 439 cnt = Tbl.str_numstr; 440 441 /* 442 * move things around randomly 443 */ 444 445 for (sp = Seekpts; cnt > 0; cnt--, sp++) { 446 i = arc4random_uniform(cnt); 447 tmp = sp[0]; 448 sp[0] = sp[i]; 449 sp[i] = tmp; 450 } 451 } 452