1*4887Schin /*********************************************************************** 2*4887Schin * * 3*4887Schin * This software is part of the ast package * 4*4887Schin * Copyright (c) 1985-2007 AT&T Knowledge Ventures * 5*4887Schin * and is licensed under the * 6*4887Schin * Common Public License, Version 1.0 * 7*4887Schin * by AT&T Knowledge Ventures * 8*4887Schin * * 9*4887Schin * A copy of the License is available at * 10*4887Schin * http://www.opensource.org/licenses/cpl1.0.txt * 11*4887Schin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12*4887Schin * * 13*4887Schin * Information and Software Systems Research * 14*4887Schin * AT&T Research * 15*4887Schin * Florham Park NJ * 16*4887Schin * * 17*4887Schin * Glenn Fowler <gsf@research.att.com> * 18*4887Schin * David Korn <dgk@research.att.com> * 19*4887Schin * Phong Vo <kpv@research.att.com> * 20*4887Schin * * 21*4887Schin ***********************************************************************/ 22*4887Schin #pragma prototyped 23*4887Schin /* 24*4887Schin * original code 25*4887Schin * 26*4887Schin * James A. Woods, Informatics General Corporation, 27*4887Schin * NASA Ames Research Center, 6/81. 28*4887Schin * Usenix ;login:, February/March, 1983, p. 8. 29*4887Schin * 30*4887Schin * discipline/method interface 31*4887Schin * 32*4887Schin * Glenn Fowler 33*4887Schin * AT&T Research 34*4887Schin * modified from the original BSD source 35*4887Schin * 36*4887Schin * 'fastfind' scans a file list for the full pathname of a file 37*4887Schin * given only a piece of the name. The list is processed with 38*4887Schin * with "front-compression" and bigram coding. Front compression reduces 39*4887Schin * space by a factor of 4-5, bigram coding by a further 20-25%. 40*4887Schin * 41*4887Schin * there are 4 methods: 42*4887Schin * 43*4887Schin * FF_old original with 7 bit bigram encoding (no magic) 44*4887Schin * FF_gnu 8 bit clean front compression (FF_gnu_magic) 45*4887Schin * FF_dir FF_gnu with sfgetl/sfputl and trailing / on dirs (FF_dir_magic) 46*4887Schin * FF_typ FF_dir with (mime) types (FF_typ_magic) 47*4887Schin * 48*4887Schin * the bigram encoding steals the eighth bit (that's why its FF_old) 49*4887Schin * maybe one day we'll limit it to readonly: 50*4887Schin * 51*4887Schin * 0-2*FF_OFF likeliest differential counts + offset to make nonnegative 52*4887Schin * FF_ESC 4 byte big-endian out-of-range count+FF_OFF follows 53*4887Schin * FF_MIN-FF_MAX ascii residue 54*4887Schin * >=FF_MAX bigram codes 55*4887Schin * 56*4887Schin * a two-tiered string search technique is employed 57*4887Schin * 58*4887Schin * a metacharacter-free subpattern and partial pathname is matched 59*4887Schin * backwards to avoid full expansion of the pathname list 60*4887Schin * 61*4887Schin * then the actual shell glob-style regular expression (if in this form) 62*4887Schin * is matched against the candidate pathnames using the slower regexec() 63*4887Schin * 64*4887Schin * The original BSD code is covered by the BSD license: 65*4887Schin * 66*4887Schin * Copyright (c) 1985, 1993, 1999 67*4887Schin * The Regents of the University of California. All rights reserved. 68*4887Schin * 69*4887Schin * Redistribution and use in source and binary forms, with or without 70*4887Schin * modification, are permitted provided that the following conditions 71*4887Schin * are met: 72*4887Schin * 1. Redistributions of source code must retain the above copyright 73*4887Schin * notice, this list of conditions and the following disclaimer. 74*4887Schin * 2. Redistributions in binary form must reproduce the above copyright 75*4887Schin * notice, this list of conditions and the following disclaimer in the 76*4887Schin * documentation and/or other materials provided with the distribution. 77*4887Schin * 3. Neither the name of the University nor the names of its contributors 78*4887Schin * may be used to endorse or promote products derived from this software 79*4887Schin * without specific prior written permission. 80*4887Schin * 81*4887Schin * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 82*4887Schin * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 83*4887Schin * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 84*4887Schin * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 85*4887Schin * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 86*4887Schin * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 87*4887Schin * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 88*4887Schin * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 89*4887Schin * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 90*4887Schin * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 91*4887Schin * SUCH DAMAGE. 92*4887Schin */ 93*4887Schin 94*4887Schin static const char id[] = "\n@(#)$Id: fastfind (AT&T Research) 2002-10-02 $\0\n"; 95*4887Schin 96*4887Schin static const char lib[] = "libast:fastfind"; 97*4887Schin 98*4887Schin #include "findlib.h" 99*4887Schin 100*4887Schin #define FIND_MATCH "*/(find|locate)/*" 101*4887Schin 102*4887Schin /* 103*4887Schin * this db could be anywhere 104*4887Schin * findcodes[] directories are checked for findnames[i] 105*4887Schin */ 106*4887Schin 107*4887Schin static char* findcodes[] = 108*4887Schin { 109*4887Schin 0, 110*4887Schin 0, 111*4887Schin FIND_CODES, 112*4887Schin "/usr/local/share/lib", 113*4887Schin "/usr/local/lib", 114*4887Schin "/usr/share/lib", 115*4887Schin "/usr/lib", 116*4887Schin "/var/spool", 117*4887Schin "/usr/local/var", 118*4887Schin "/var/lib", 119*4887Schin "/var/lib/slocate", 120*4887Schin "/var/db", 121*4887Schin }; 122*4887Schin 123*4887Schin static char* findnames[] = 124*4887Schin { 125*4887Schin "find/codes", 126*4887Schin "find/find.codes", 127*4887Schin "locate/locatedb", 128*4887Schin "locatedb", 129*4887Schin "locate.database", 130*4887Schin "slocate.db", 131*4887Schin }; 132*4887Schin 133*4887Schin /* 134*4887Schin * convert t to lower case and drop leading x- and x- after / 135*4887Schin * converted value copied to b of size n 136*4887Schin */ 137*4887Schin 138*4887Schin char* 139*4887Schin typefix(char* buf, size_t n, register const char* t) 140*4887Schin { 141*4887Schin register int c; 142*4887Schin register char* b = buf; 143*4887Schin 144*4887Schin if ((*t == 'x' || *t == 'X') && *(t + 1) == '-') 145*4887Schin t += 2; 146*4887Schin while (c = *t++) 147*4887Schin { 148*4887Schin if (isupper(c)) 149*4887Schin c = tolower(c); 150*4887Schin if ((*b++ = c) == '/' && (*t == 'x' || *t == 'X') && *(t + 1) == '-') 151*4887Schin t += 2; 152*4887Schin } 153*4887Schin *b = 0; 154*4887Schin return buf; 155*4887Schin } 156*4887Schin 157*4887Schin /* 158*4887Schin * return a fastfind stream handle for pattern 159*4887Schin */ 160*4887Schin 161*4887Schin Find_t* 162*4887Schin findopen(const char* file, const char* pattern, const char* type, Finddisc_t* disc) 163*4887Schin { 164*4887Schin register Find_t* fp; 165*4887Schin register char* p; 166*4887Schin register char* s; 167*4887Schin register char* b; 168*4887Schin register int i; 169*4887Schin register int j; 170*4887Schin char* path; 171*4887Schin int brace = 0; 172*4887Schin int paren = 0; 173*4887Schin int k; 174*4887Schin int q; 175*4887Schin int fd; 176*4887Schin int uid; 177*4887Schin Vmalloc_t* vm; 178*4887Schin Type_t* tp; 179*4887Schin struct stat st; 180*4887Schin 181*4887Schin 182*4887Schin if (!(vm = vmopen(Vmdcheap, Vmbest, 0))) 183*4887Schin goto nospace; 184*4887Schin 185*4887Schin /* 186*4887Schin * NOTE: searching for FIND_CODES would be much simpler if we 187*4887Schin * just stuck with our own, but we also support GNU 188*4887Schin * locate codes and have to search for the one of a 189*4887Schin * bazillion possible names for that file 190*4887Schin */ 191*4887Schin 192*4887Schin if (!findcodes[1]) 193*4887Schin findcodes[1] = getenv(FIND_CODES_ENV); 194*4887Schin if (disc->flags & FIND_GENERATE) 195*4887Schin { 196*4887Schin if (!(fp = (Find_t*)vmnewof(vm, 0, Find_t, 1, sizeof(Encode_t) - sizeof(Code_t)))) 197*4887Schin goto nospace; 198*4887Schin fp->vm = vm; 199*4887Schin fp->id = lib; 200*4887Schin fp->disc = disc; 201*4887Schin fp->generate = 1; 202*4887Schin if (file && (!*file || streq(file, "-"))) 203*4887Schin file = 0; 204*4887Schin uid = geteuid(); 205*4887Schin j = (findcodes[0] = (char*)file) && *file == '/' ? 1 : elementsof(findcodes); 206*4887Schin 207*4887Schin /* 208*4887Schin * look for the codes file, but since it may not exist yet, 209*4887Schin * also look for the containing directory if i<2 or if 210*4887Schin * it is sufficiently qualified (FIND_MATCH) 211*4887Schin */ 212*4887Schin 213*4887Schin for (i = 0; i < j; i++) 214*4887Schin if (path = findcodes[i]) 215*4887Schin { 216*4887Schin if (*path == '/') 217*4887Schin { 218*4887Schin if (!stat(path, &st)) 219*4887Schin { 220*4887Schin if (S_ISDIR(st.st_mode)) 221*4887Schin { 222*4887Schin for (k = 0; k < elementsof(findnames); k++) 223*4887Schin { 224*4887Schin sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s/%s", path, findnames[k]); 225*4887Schin if (!eaccess(fp->encode.file, R_OK|W_OK)) 226*4887Schin { 227*4887Schin path = fp->encode.file; 228*4887Schin break; 229*4887Schin } 230*4887Schin if (strchr(findnames[k], '/') && (b = strrchr(fp->encode.file, '/'))) 231*4887Schin { 232*4887Schin *b = 0; 233*4887Schin if (!stat(fp->encode.file, &st) && st.st_uid == uid && (st.st_mode & S_IWUSR)) 234*4887Schin { 235*4887Schin *b = '/'; 236*4887Schin path = fp->encode.file; 237*4887Schin break; 238*4887Schin } 239*4887Schin } 240*4887Schin } 241*4887Schin if (k < elementsof(findnames)) 242*4887Schin break; 243*4887Schin } 244*4887Schin else if (st.st_uid == uid && (st.st_mode & S_IWUSR)) 245*4887Schin { 246*4887Schin sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s", path); 247*4887Schin path = fp->encode.file; 248*4887Schin break; 249*4887Schin } 250*4887Schin } 251*4887Schin else if (i < 2 || strmatch(path, FIND_MATCH)) 252*4887Schin { 253*4887Schin sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s", path); 254*4887Schin if (b = strrchr(fp->encode.file, '/')) 255*4887Schin { 256*4887Schin *b = 0; 257*4887Schin if (!stat(fp->encode.file, &st) && st.st_uid == uid && (st.st_mode & S_IWUSR)) 258*4887Schin { 259*4887Schin *b = '/'; 260*4887Schin path = fp->encode.file; 261*4887Schin break; 262*4887Schin } 263*4887Schin } 264*4887Schin } 265*4887Schin } 266*4887Schin else if (pathpath(fp->encode.file, path, "", PATH_REGULAR|PATH_READ|PATH_WRITE)) 267*4887Schin { 268*4887Schin path = fp->encode.file; 269*4887Schin break; 270*4887Schin } 271*4887Schin else if (b = strrchr(path, '/')) 272*4887Schin { 273*4887Schin sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%-.*s", b - path, path); 274*4887Schin if (pathpath(fp->encode.temp, fp->encode.file, "", PATH_EXECUTE|PATH_READ|PATH_WRITE) && 275*4887Schin !stat(fp->encode.temp, &st) && st.st_uid == uid && (st.st_mode & S_IWUSR)) 276*4887Schin { 277*4887Schin sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s%s", fp->encode.temp, b); 278*4887Schin path = fp->encode.file; 279*4887Schin break; 280*4887Schin } 281*4887Schin } 282*4887Schin } 283*4887Schin if (i >= j) 284*4887Schin { 285*4887Schin if (fp->disc->errorf) 286*4887Schin (*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot locate codes", file ? file : findcodes[2]); 287*4887Schin goto drop; 288*4887Schin } 289*4887Schin if (fp->disc->flags & FIND_OLD) 290*4887Schin { 291*4887Schin /* 292*4887Schin * FF_old generates temp data that is read 293*4887Schin * in a second pass to generate the real codes 294*4887Schin */ 295*4887Schin 296*4887Schin fp->method = FF_old; 297*4887Schin if (!(fp->fp = sftmp(32 * PATH_MAX))) 298*4887Schin { 299*4887Schin if (fp->disc->errorf) 300*4887Schin (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "cannot create tmp file"); 301*4887Schin goto drop; 302*4887Schin } 303*4887Schin } 304*4887Schin else 305*4887Schin { 306*4887Schin /* 307*4887Schin * the rest generate into a temp file that 308*4887Schin * is simply renamed on completion 309*4887Schin */ 310*4887Schin 311*4887Schin if (s = strrchr(path, '/')) 312*4887Schin { 313*4887Schin *s = 0; 314*4887Schin p = path; 315*4887Schin } 316*4887Schin else 317*4887Schin p = "."; 318*4887Schin if (!pathtemp(fp->encode.temp, sizeof(fp->encode.temp), p, "ff", &fd)) 319*4887Schin { 320*4887Schin if (fp->disc->errorf) 321*4887Schin (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot create tmp file in this directory", p ? p : "."); 322*4887Schin goto drop; 323*4887Schin } 324*4887Schin if (s) 325*4887Schin *s = '/'; 326*4887Schin if (!(fp->fp = sfnew(NiL, NiL, (size_t)SF_UNBOUND, fd, SF_WRITE))) 327*4887Schin { 328*4887Schin if (fp->disc->errorf) 329*4887Schin (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot open tmp file", fp->encode.temp); 330*4887Schin close(fd); 331*4887Schin goto drop; 332*4887Schin } 333*4887Schin if (fp->disc->flags & FIND_TYPE) 334*4887Schin { 335*4887Schin fp->method = FF_typ; 336*4887Schin fp->encode.namedisc.key = offsetof(Type_t, name); 337*4887Schin fp->encode.namedisc.link = offsetof(Type_t, byname); 338*4887Schin fp->encode.indexdisc.key = offsetof(Type_t, index); 339*4887Schin fp->encode.indexdisc.size = sizeof(unsigned long); 340*4887Schin fp->encode.indexdisc.link = offsetof(Type_t, byindex); 341*4887Schin s = "system/dir"; 342*4887Schin if (!(fp->encode.namedict = dtopen(&fp->encode.namedisc, Dttree)) || !(fp->encode.indexdict = dtopen(&fp->encode.indexdisc, Dttree)) || !(tp = newof(0, Type_t, 1, strlen(s) + 1))) 343*4887Schin { 344*4887Schin if (fp->encode.namedict) 345*4887Schin dtclose(fp->encode.namedict); 346*4887Schin if (fp->disc->errorf) 347*4887Schin (*fp->disc->errorf)(fp, fp->disc, 2, "cannot allocate type table"); 348*4887Schin goto drop; 349*4887Schin } 350*4887Schin 351*4887Schin /* 352*4887Schin * type index 1 is always system/dir 353*4887Schin */ 354*4887Schin 355*4887Schin tp->index = ++fp->types; 356*4887Schin strcpy(tp->name, s); 357*4887Schin dtinsert(fp->encode.namedict, tp); 358*4887Schin dtinsert(fp->encode.indexdict, tp); 359*4887Schin } 360*4887Schin else if (fp->disc->flags & FIND_GNU) 361*4887Schin { 362*4887Schin fp->method = FF_gnu; 363*4887Schin sfputc(fp->fp, 0); 364*4887Schin sfputr(fp->fp, FF_gnu_magic, 0); 365*4887Schin } 366*4887Schin else 367*4887Schin { 368*4887Schin fp->method = FF_dir; 369*4887Schin sfputc(fp->fp, 0); 370*4887Schin sfputr(fp->fp, FF_dir_magic, 0); 371*4887Schin } 372*4887Schin } 373*4887Schin } 374*4887Schin else 375*4887Schin { 376*4887Schin i = sizeof(Decode_t) + sizeof(Code_t); 377*4887Schin if (!pattern || !*pattern) 378*4887Schin pattern = "*"; 379*4887Schin i += (j = 2 * (strlen(pattern) + 1)); 380*4887Schin if (!(fp = (Find_t*)vmnewof(vm, 0, Find_t, 1, i))) 381*4887Schin { 382*4887Schin vmclose(vm); 383*4887Schin return 0; 384*4887Schin } 385*4887Schin fp->vm = vm; 386*4887Schin fp->id = lib; 387*4887Schin fp->disc = disc; 388*4887Schin if (disc->flags & FIND_ICASE) 389*4887Schin fp->decode.ignorecase = 1; 390*4887Schin j = (findcodes[0] = (char*)file) && *file == '/' ? 1 : elementsof(findcodes); 391*4887Schin for (i = 0; i < j; i++) 392*4887Schin if (path = findcodes[i]) 393*4887Schin { 394*4887Schin if (*path == '/') 395*4887Schin { 396*4887Schin if (!stat(path, &st)) 397*4887Schin { 398*4887Schin if (S_ISDIR(st.st_mode)) 399*4887Schin { 400*4887Schin for (k = 0; k < elementsof(findnames); k++) 401*4887Schin { 402*4887Schin sfsprintf(fp->decode.path, sizeof(fp->decode.path), "%s/%s", path, findnames[k]); 403*4887Schin if (fp->fp = sfopen(NiL, fp->decode.path, "r")) 404*4887Schin { 405*4887Schin path = fp->decode.path; 406*4887Schin break; 407*4887Schin } 408*4887Schin } 409*4887Schin if (fp->fp) 410*4887Schin break; 411*4887Schin } 412*4887Schin else if (fp->fp = sfopen(NiL, path, "r")) 413*4887Schin break; 414*4887Schin } 415*4887Schin } 416*4887Schin else if ((path = pathpath(fp->decode.path, path, "", PATH_REGULAR|PATH_READ)) && (fp->fp = sfopen(NiL, path, "r"))) 417*4887Schin break; 418*4887Schin } 419*4887Schin if (!fp->fp) 420*4887Schin { 421*4887Schin if (fp->disc->errorf) 422*4887Schin (*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot locate codes", file ? file : findcodes[2]); 423*4887Schin goto drop; 424*4887Schin } 425*4887Schin if (fstat(sffileno(fp->fp), &st)) 426*4887Schin { 427*4887Schin if (fp->disc->errorf) 428*4887Schin (*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot stat codes", path); 429*4887Schin goto drop; 430*4887Schin } 431*4887Schin if (fp->secure = ((st.st_mode & (S_IRGRP|S_IROTH)) == S_IRGRP) && st.st_gid == getegid() && getegid() != getgid()) 432*4887Schin setgid(getgid()); 433*4887Schin fp->stamp = st.st_mtime; 434*4887Schin b = (s = fp->decode.temp) + 1; 435*4887Schin for (i = 0; i < elementsof(fp->decode.bigram1); i++) 436*4887Schin { 437*4887Schin if ((j = sfgetc(fp->fp)) == EOF) 438*4887Schin goto invalid; 439*4887Schin if (!(*s++ = fp->decode.bigram1[i] = j) && i) 440*4887Schin { 441*4887Schin i = -i; 442*4887Schin break; 443*4887Schin } 444*4887Schin if ((j = sfgetc(fp->fp)) == EOF) 445*4887Schin goto invalid; 446*4887Schin if (!(*s++ = fp->decode.bigram2[i] = j) && (i || fp->decode.bigram1[0] >= '0' && fp->decode.bigram1[0] <= '1')) 447*4887Schin break; 448*4887Schin } 449*4887Schin if (streq(b, FF_typ_magic)) 450*4887Schin { 451*4887Schin if (type) 452*4887Schin { 453*4887Schin type = (const char*)typefix(fp->decode.bigram2, sizeof(fp->decode.bigram2), type); 454*4887Schin memset(fp->decode.bigram1, 0, sizeof(fp->decode.bigram1)); 455*4887Schin } 456*4887Schin fp->method = FF_typ; 457*4887Schin for (j = 0, i = 1;; i++) 458*4887Schin { 459*4887Schin if (!(s = sfgetr(fp->fp, 0, 0))) 460*4887Schin goto invalid; 461*4887Schin if (!*s) 462*4887Schin break; 463*4887Schin if (type && strmatch(s, type)) 464*4887Schin { 465*4887Schin FF_SET_TYPE(fp, i); 466*4887Schin j++; 467*4887Schin } 468*4887Schin } 469*4887Schin if (type && !j) 470*4887Schin goto drop; 471*4887Schin fp->types = j; 472*4887Schin } 473*4887Schin else if (streq(b, FF_dir_magic)) 474*4887Schin fp->method = FF_dir; 475*4887Schin else if (streq(b, FF_gnu_magic)) 476*4887Schin fp->method = FF_gnu; 477*4887Schin else if (!*b && *--b >= '0' && *b <= '1') 478*4887Schin { 479*4887Schin fp->method = FF_gnu; 480*4887Schin while (j = sfgetc(fp->fp)) 481*4887Schin { 482*4887Schin if (j == EOF || fp->decode.count >= sizeof(fp->decode.path)) 483*4887Schin goto invalid; 484*4887Schin fp->decode.path[fp->decode.count++] = j; 485*4887Schin } 486*4887Schin } 487*4887Schin else 488*4887Schin { 489*4887Schin fp->method = FF_old; 490*4887Schin if (i < 0) 491*4887Schin { 492*4887Schin if ((j = sfgetc(fp->fp)) == EOF) 493*4887Schin goto invalid; 494*4887Schin fp->decode.bigram2[i = -i] = j; 495*4887Schin } 496*4887Schin while (++i < elementsof(fp->decode.bigram1)) 497*4887Schin { 498*4887Schin if ((j = sfgetc(fp->fp)) == EOF) 499*4887Schin goto invalid; 500*4887Schin fp->decode.bigram1[i] = j; 501*4887Schin if ((j = sfgetc(fp->fp)) == EOF) 502*4887Schin goto invalid; 503*4887Schin fp->decode.bigram2[i] = j; 504*4887Schin } 505*4887Schin if ((fp->decode.peek = sfgetc(fp->fp)) != FF_OFF) 506*4887Schin goto invalid; 507*4887Schin } 508*4887Schin 509*4887Schin /* 510*4887Schin * set up the physical dir table 511*4887Schin */ 512*4887Schin 513*4887Schin if (disc->version >= 19980301L) 514*4887Schin { 515*4887Schin fp->verifyf = disc->verifyf; 516*4887Schin if (disc->dirs && *disc->dirs) 517*4887Schin { 518*4887Schin for (k = 0; disc->dirs[k]; k++); 519*4887Schin if (k == 1 && streq(disc->dirs[0], "/")) 520*4887Schin k = 0; 521*4887Schin if (k) 522*4887Schin { 523*4887Schin if (!(fp->dirs = vmnewof(fp->vm, 0, char*, 2 * k + 1, 0))) 524*4887Schin goto drop; 525*4887Schin if (!(fp->lens = vmnewof(fp->vm, 0, int, 2 * k, 0))) 526*4887Schin goto drop; 527*4887Schin p = 0; 528*4887Schin b = fp->decode.temp; 529*4887Schin j = fp->method == FF_old || fp->method == FF_gnu; 530*4887Schin 531*4887Schin /* 532*4887Schin * fill the dir list with logical and 533*4887Schin * physical names since we don't know 534*4887Schin * which way the db was encoded (it 535*4887Schin * could be *both* ways) 536*4887Schin */ 537*4887Schin 538*4887Schin for (i = q = 0; i < k; i++) 539*4887Schin { 540*4887Schin if (*(s = disc->dirs[i]) == '/') 541*4887Schin sfsprintf(b, sizeof(fp->decode.temp) - 1, "%s", s); 542*4887Schin else if (!p && !(p = getcwd(fp->decode.path, sizeof(fp->decode.path)))) 543*4887Schin goto nospace; 544*4887Schin else 545*4887Schin sfsprintf(b, sizeof(fp->decode.temp) - 1, "%s/%s", p, s); 546*4887Schin s = pathcanon(b, 0); 547*4887Schin *s = '/'; 548*4887Schin *(s + 1) = 0; 549*4887Schin if (!(fp->dirs[q] = vmstrdup(fp->vm, b))) 550*4887Schin goto nospace; 551*4887Schin if (j) 552*4887Schin (fp->dirs[q])[s - b] = 0; 553*4887Schin q++; 554*4887Schin *s = 0; 555*4887Schin s = pathcanon(b, PATH_PHYSICAL); 556*4887Schin *s = '/'; 557*4887Schin *(s + 1) = 0; 558*4887Schin if (!strneq(b, fp->dirs[q - 1], s - b)) 559*4887Schin { 560*4887Schin if (!(fp->dirs[q] = vmstrdup(fp->vm, b))) 561*4887Schin goto nospace; 562*4887Schin if (j) 563*4887Schin (fp->dirs[q])[s - b] = 0; 564*4887Schin q++; 565*4887Schin } 566*4887Schin } 567*4887Schin strsort(fp->dirs, q, strcasecmp); 568*4887Schin for (i = 0; i < q; i++) 569*4887Schin fp->lens[i] = strlen(fp->dirs[i]); 570*4887Schin } 571*4887Schin } 572*4887Schin } 573*4887Schin if (fp->verifyf || (disc->flags & FIND_VERIFY)) 574*4887Schin { 575*4887Schin if (fp->method != FF_dir && fp->method != FF_typ) 576*4887Schin { 577*4887Schin if (fp->disc->errorf) 578*4887Schin (*fp->disc->errorf)(fp, fp->disc, 2, "%s: %s code format does not support directory verification", path, fp->method == FF_gnu ? FF_gnu_magic : "OLD-BIGRAM"); 579*4887Schin goto drop; 580*4887Schin } 581*4887Schin fp->verify = 1; 582*4887Schin } 583*4887Schin 584*4887Schin /* 585*4887Schin * extract last glob-free subpattern in name for fast pre-match 586*4887Schin * prepend 0 for backwards match 587*4887Schin */ 588*4887Schin 589*4887Schin if (p = s = (char*)pattern) 590*4887Schin { 591*4887Schin b = fp->decode.pattern; 592*4887Schin for (;;) 593*4887Schin { 594*4887Schin switch (*b++ = *p++) 595*4887Schin { 596*4887Schin case 0: 597*4887Schin break; 598*4887Schin case '\\': 599*4887Schin s = p; 600*4887Schin if (!*p++) 601*4887Schin break; 602*4887Schin continue; 603*4887Schin case '[': 604*4887Schin if (!brace) 605*4887Schin { 606*4887Schin brace++; 607*4887Schin if (*p == ']') 608*4887Schin p++; 609*4887Schin } 610*4887Schin continue; 611*4887Schin case ']': 612*4887Schin if (brace) 613*4887Schin { 614*4887Schin brace--; 615*4887Schin s = p; 616*4887Schin } 617*4887Schin continue; 618*4887Schin case '(': 619*4887Schin if (!brace) 620*4887Schin paren++; 621*4887Schin continue; 622*4887Schin case ')': 623*4887Schin if (!brace && paren > 0 && !--paren) 624*4887Schin s = p; 625*4887Schin continue; 626*4887Schin case '|': 627*4887Schin case '&': 628*4887Schin if (!brace && !paren) 629*4887Schin { 630*4887Schin s = ""; 631*4887Schin break; 632*4887Schin } 633*4887Schin continue; 634*4887Schin case '*': 635*4887Schin case '?': 636*4887Schin s = p; 637*4887Schin continue; 638*4887Schin default: 639*4887Schin continue; 640*4887Schin } 641*4887Schin break; 642*4887Schin } 643*4887Schin if (s != pattern && !streq(pattern, "*")) 644*4887Schin { 645*4887Schin fp->decode.match = 1; 646*4887Schin if (i = regcomp(&fp->decode.re, pattern, REG_SHELL|REG_AUGMENTED|(fp->decode.ignorecase?REG_ICASE:0))) 647*4887Schin { 648*4887Schin if (disc->errorf) 649*4887Schin { 650*4887Schin regerror(i, &fp->decode.re, fp->decode.temp, sizeof(fp->decode.temp)); 651*4887Schin (*fp->disc->errorf)(fp, fp->disc, 2, "%s: %s", pattern, fp->decode.temp); 652*4887Schin } 653*4887Schin goto drop; 654*4887Schin } 655*4887Schin } 656*4887Schin if (*s) 657*4887Schin { 658*4887Schin *b++ = 0; 659*4887Schin while (i = *s++) 660*4887Schin *b++ = i; 661*4887Schin *b-- = 0; 662*4887Schin fp->decode.end = b; 663*4887Schin if (fp->decode.ignorecase) 664*4887Schin for (s = fp->decode.pattern; s <= b; s++) 665*4887Schin if (isupper(*s)) 666*4887Schin *s = tolower(*s); 667*4887Schin } 668*4887Schin } 669*4887Schin } 670*4887Schin return fp; 671*4887Schin nospace: 672*4887Schin if (disc->errorf) 673*4887Schin (*fp->disc->errorf)(fp, fp->disc, 2, "out of space"); 674*4887Schin if (!vm) 675*4887Schin return 0; 676*4887Schin if (!fp) 677*4887Schin { 678*4887Schin vmclose(vm); 679*4887Schin return 0; 680*4887Schin } 681*4887Schin goto drop; 682*4887Schin invalid: 683*4887Schin if (fp->disc->errorf) 684*4887Schin (*fp->disc->errorf)(fp, fp->disc, 2, "%s: invalid codes", path); 685*4887Schin drop: 686*4887Schin if (!fp->generate && fp->decode.match) 687*4887Schin regfree(&fp->decode.re); 688*4887Schin if (fp->fp) 689*4887Schin sfclose(fp->fp); 690*4887Schin vmclose(fp->vm); 691*4887Schin return 0; 692*4887Schin } 693*4887Schin 694*4887Schin /* 695*4887Schin * return the next fastfind path 696*4887Schin * 0 returned when list exhausted 697*4887Schin */ 698*4887Schin 699*4887Schin char* 700*4887Schin findread(register Find_t* fp) 701*4887Schin { 702*4887Schin register char* p; 703*4887Schin register char* q; 704*4887Schin register char* s; 705*4887Schin register char* b; 706*4887Schin register char* e; 707*4887Schin register int c; 708*4887Schin register int n; 709*4887Schin register int m; 710*4887Schin int ignorecase; 711*4887Schin int t; 712*4887Schin unsigned char w[4]; 713*4887Schin struct stat st; 714*4887Schin 715*4887Schin if (fp->generate) 716*4887Schin return 0; 717*4887Schin if (fp->decode.restore) 718*4887Schin { 719*4887Schin *fp->decode.restore = '/'; 720*4887Schin fp->decode.restore = 0; 721*4887Schin } 722*4887Schin ignorecase = fp->decode.ignorecase ? STR_ICASE : 0; 723*4887Schin c = fp->decode.peek; 724*4887Schin next: 725*4887Schin for (;;) 726*4887Schin { 727*4887Schin switch (fp->method) 728*4887Schin { 729*4887Schin case FF_dir: 730*4887Schin t = 0; 731*4887Schin n = sfgetl(fp->fp); 732*4887Schin goto grab; 733*4887Schin case FF_gnu: 734*4887Schin if ((c = sfgetc(fp->fp)) == EOF) 735*4887Schin return 0; 736*4887Schin if (c == 0x80) 737*4887Schin { 738*4887Schin if ((c = sfgetc(fp->fp)) == EOF) 739*4887Schin return 0; 740*4887Schin n = c << 8; 741*4887Schin if ((c = sfgetc(fp->fp)) == EOF) 742*4887Schin return 0; 743*4887Schin n |= c; 744*4887Schin if (n & 0x8000) 745*4887Schin n = (n - 0xffff) - 1; 746*4887Schin } 747*4887Schin else if ((n = c) & 0x80) 748*4887Schin n = (n - 0xff) - 1; 749*4887Schin t = 0; 750*4887Schin goto grab; 751*4887Schin case FF_typ: 752*4887Schin t = sfgetu(fp->fp); 753*4887Schin n = sfgetl(fp->fp); 754*4887Schin grab: 755*4887Schin p = fp->decode.path + (fp->decode.count += n); 756*4887Schin do 757*4887Schin { 758*4887Schin if ((c = sfgetc(fp->fp)) == EOF) 759*4887Schin return 0; 760*4887Schin } while (*p++ = c); 761*4887Schin p -= 2; 762*4887Schin break; 763*4887Schin case FF_old: 764*4887Schin if (c == EOF) 765*4887Schin { 766*4887Schin fp->decode.peek = c; 767*4887Schin return 0; 768*4887Schin } 769*4887Schin if (c == FF_ESC) 770*4887Schin { 771*4887Schin if (sfread(fp->fp, w, sizeof(w)) != sizeof(w)) 772*4887Schin return 0; 773*4887Schin if (fp->decode.swap >= 0) 774*4887Schin { 775*4887Schin c = (int32_t)((w[0] << 24) | (w[1] << 16) | (w[2] << 8) | w[3]); 776*4887Schin if (!fp->decode.swap) 777*4887Schin { 778*4887Schin /* 779*4887Schin * the old format uses machine 780*4887Schin * byte order; this test uses 781*4887Schin * the smallest magnitude of 782*4887Schin * both byte orders on the 783*4887Schin * first encoded path motion 784*4887Schin * to determine the original 785*4887Schin * byte order 786*4887Schin */ 787*4887Schin 788*4887Schin m = c; 789*4887Schin if (m < 0) 790*4887Schin m = -m; 791*4887Schin n = (int32_t)((w[3] << 24) | (w[2] << 16) | (w[1] << 8) | w[0]); 792*4887Schin if (n < 0) 793*4887Schin n = -n; 794*4887Schin if (m < n) 795*4887Schin fp->decode.swap = 1; 796*4887Schin else 797*4887Schin { 798*4887Schin fp->decode.swap = -1; 799*4887Schin c = (int32_t)((w[3] << 24) | (w[2] << 16) | (w[1] << 8) | w[0]); 800*4887Schin } 801*4887Schin } 802*4887Schin } 803*4887Schin else 804*4887Schin c = (int32_t)((w[3] << 24) | (w[2] << 16) | (w[1] << 8) | w[0]); 805*4887Schin } 806*4887Schin fp->decode.count += c - FF_OFF; 807*4887Schin for (p = fp->decode.path + fp->decode.count; (c = sfgetc(fp->fp)) > FF_ESC;) 808*4887Schin if (c & (1<<(CHAR_BIT-1))) 809*4887Schin { 810*4887Schin *p++ = fp->decode.bigram1[c & ((1<<(CHAR_BIT-1))-1)]; 811*4887Schin *p++ = fp->decode.bigram2[c & ((1<<(CHAR_BIT-1))-1)]; 812*4887Schin } 813*4887Schin else 814*4887Schin *p++ = c; 815*4887Schin *p-- = 0; 816*4887Schin t = 0; 817*4887Schin break; 818*4887Schin } 819*4887Schin b = fp->decode.path; 820*4887Schin if (fp->decode.found) 821*4887Schin fp->decode.found = 0; 822*4887Schin else 823*4887Schin b += fp->decode.count; 824*4887Schin if (fp->dirs) 825*4887Schin for (;;) 826*4887Schin { 827*4887Schin if (!*fp->dirs) 828*4887Schin return 0; 829*4887Schin 830*4887Schin /* 831*4887Schin * use the ordering and lengths to prune 832*4887Schin * comparison function calls 833*4887Schin * (*fp->dirs)[*fp->lens]=='/' if its 834*4887Schin * already been matched 835*4887Schin */ 836*4887Schin 837*4887Schin if ((n = p - fp->decode.path + 1) > (m = *fp->lens)) 838*4887Schin { 839*4887Schin if (!(*fp->dirs)[m]) 840*4887Schin goto next; 841*4887Schin if (!strncasecmp(*fp->dirs, fp->decode.path, m)) 842*4887Schin break; 843*4887Schin } 844*4887Schin else if (n == m) 845*4887Schin { 846*4887Schin if (!(*fp->dirs)[m]) 847*4887Schin { 848*4887Schin if (!(n = strcasecmp(*fp->dirs, fp->decode.path)) && (ignorecase || !strcmp(*fp->dirs, fp->decode.path))) 849*4887Schin { 850*4887Schin if (m > 0) 851*4887Schin { 852*4887Schin (*fp->dirs)[m] = '/'; 853*4887Schin if ((*fp->dirs)[m - 1] != '/') 854*4887Schin (*fp->dirs)[++(*fp->lens)] = '/'; 855*4887Schin } 856*4887Schin break; 857*4887Schin } 858*4887Schin if (n >= 0) 859*4887Schin goto next; 860*4887Schin } 861*4887Schin } 862*4887Schin else if (!(*fp->dirs)[m]) 863*4887Schin goto next; 864*4887Schin fp->dirs++; 865*4887Schin fp->lens++; 866*4887Schin } 867*4887Schin if (fp->verify && (*p == '/' || t == 1)) 868*4887Schin { 869*4887Schin if ((n = p - fp->decode.path)) 870*4887Schin *p = 0; 871*4887Schin else 872*4887Schin n = 1; 873*4887Schin if (fp->verifyf) 874*4887Schin n = (*fp->verifyf)(fp, fp->decode.path, n, fp->disc); 875*4887Schin else if (stat(fp->decode.path, &st)) 876*4887Schin n = -1; 877*4887Schin else if ((unsigned long)st.st_mtime > fp->stamp) 878*4887Schin n = 1; 879*4887Schin else 880*4887Schin n = 0; 881*4887Schin *p = '/'; 882*4887Schin 883*4887Schin /* 884*4887Schin * n<0 skip this subtree 885*4887Schin * n==0 keep as is 886*4887Schin * n>0 read this dir now 887*4887Schin */ 888*4887Schin 889*4887Schin /* NOT IMPLEMENTED YET */ 890*4887Schin } 891*4887Schin if (FF_OK_TYPE(fp, t)) 892*4887Schin { 893*4887Schin if (fp->decode.end) 894*4887Schin { 895*4887Schin if (*(s = p) == '/') 896*4887Schin s--; 897*4887Schin if (*fp->decode.pattern == '/' && b > fp->decode.path) 898*4887Schin b--; 899*4887Schin for (; s >= b; s--) 900*4887Schin if (*s == *fp->decode.end || ignorecase && tolower(*s) == *fp->decode.end) 901*4887Schin { 902*4887Schin if (ignorecase) 903*4887Schin for (e = fp->decode.end - 1, q = s - 1; *e && (*q == *e || tolower(*q) == *e); e--, q--); 904*4887Schin else 905*4887Schin for (e = fp->decode.end - 1, q = s - 1; *e && *q == *e; e--, q--); 906*4887Schin if (!*e) 907*4887Schin { 908*4887Schin fp->decode.found = 1; 909*4887Schin if (!fp->decode.match || strgrpmatch(fp->decode.path, fp->decode.pattern, NiL, 0, STR_MAXIMAL|STR_LEFT|STR_RIGHT|ignorecase)) 910*4887Schin { 911*4887Schin fp->decode.peek = c; 912*4887Schin if (*p == '/') 913*4887Schin *(fp->decode.restore = p) = 0; 914*4887Schin if (!fp->secure || !access(fp->decode.path, F_OK)) 915*4887Schin return fp->decode.path; 916*4887Schin } 917*4887Schin break; 918*4887Schin } 919*4887Schin } 920*4887Schin } 921*4887Schin else if (!fp->decode.match || !(n = regexec(&fp->decode.re, fp->decode.path, 0, NiL, 0))) 922*4887Schin { 923*4887Schin fp->decode.peek = c; 924*4887Schin if (*p == '/' && p > fp->decode.path) 925*4887Schin *(fp->decode.restore = p) = 0; 926*4887Schin if (!fp->secure || !access(fp->decode.path, F_OK)) 927*4887Schin return fp->decode.path; 928*4887Schin } 929*4887Schin else if (n != REG_NOMATCH) 930*4887Schin { 931*4887Schin if (fp->disc->errorf) 932*4887Schin { 933*4887Schin regerror(n, &fp->decode.re, fp->decode.temp, sizeof(fp->decode.temp)); 934*4887Schin (*fp->disc->errorf)(fp, fp->disc, 2, "%s: %s", fp->decode.pattern, fp->decode.temp); 935*4887Schin } 936*4887Schin return 0; 937*4887Schin } 938*4887Schin } 939*4887Schin } 940*4887Schin } 941*4887Schin 942*4887Schin /* 943*4887Schin * add path to the code table 944*4887Schin * paths are assumed to be in sort order 945*4887Schin */ 946*4887Schin 947*4887Schin int 948*4887Schin findwrite(register Find_t* fp, const char* path, size_t len, const char* type) 949*4887Schin { 950*4887Schin register unsigned char* s; 951*4887Schin register unsigned char* e; 952*4887Schin register unsigned char* p; 953*4887Schin register int n; 954*4887Schin register int d; 955*4887Schin register Type_t* x; 956*4887Schin register unsigned long u; 957*4887Schin 958*4887Schin if (!fp->generate) 959*4887Schin return -1; 960*4887Schin if (type && fp->method == FF_dir) 961*4887Schin { 962*4887Schin len = sfsprintf(fp->encode.mark, sizeof(fp->encode.mark), "%-.*s/", len, path); 963*4887Schin path = fp->encode.mark; 964*4887Schin } 965*4887Schin s = (unsigned char*)path; 966*4887Schin if (len <= 0) 967*4887Schin len = strlen(path); 968*4887Schin if (len < sizeof(fp->encode.path)) 969*4887Schin e = s + len++; 970*4887Schin else 971*4887Schin { 972*4887Schin len = sizeof(fp->encode.path) - 1; 973*4887Schin e = s + len; 974*4887Schin } 975*4887Schin p = (unsigned char*)fp->encode.path; 976*4887Schin while (s < e) 977*4887Schin { 978*4887Schin if (*s != *p++) 979*4887Schin break; 980*4887Schin s++; 981*4887Schin } 982*4887Schin n = s - (unsigned char*)path; 983*4887Schin switch (fp->method) 984*4887Schin { 985*4887Schin case FF_gnu: 986*4887Schin d = n - fp->encode.prefix; 987*4887Schin if (d >= -127 && d <= 127) 988*4887Schin sfputc(fp->fp, d & 0xff); 989*4887Schin else 990*4887Schin { 991*4887Schin sfputc(fp->fp, 0x80); 992*4887Schin sfputc(fp->fp, (d >> 8) & 0xff); 993*4887Schin sfputc(fp->fp, d & 0xff); 994*4887Schin } 995*4887Schin fp->encode.prefix = n; 996*4887Schin sfputr(fp->fp, (char*)s, 0); 997*4887Schin break; 998*4887Schin case FF_old: 999*4887Schin sfprintf(fp->fp, "%ld", n - fp->encode.prefix + FF_OFF); 1000*4887Schin fp->encode.prefix = n; 1001*4887Schin sfputc(fp->fp, ' '); 1002*4887Schin p = s; 1003*4887Schin while (s < e) 1004*4887Schin { 1005*4887Schin n = *s++; 1006*4887Schin if (s >= e) 1007*4887Schin break; 1008*4887Schin fp->encode.code[n][*s++]++; 1009*4887Schin } 1010*4887Schin while (p < e) 1011*4887Schin { 1012*4887Schin if ((n = *p++) < FF_MIN || n >= FF_MAX) 1013*4887Schin n = '?'; 1014*4887Schin sfputc(fp->fp, n); 1015*4887Schin } 1016*4887Schin sfputc(fp->fp, 0); 1017*4887Schin break; 1018*4887Schin case FF_typ: 1019*4887Schin if (type) 1020*4887Schin { 1021*4887Schin type = (const char*)typefix((char*)fp->encode.bigram, sizeof(fp->encode.bigram), type); 1022*4887Schin if (x = (Type_t*)dtmatch(fp->encode.namedict, type)) 1023*4887Schin u = x->index; 1024*4887Schin else if (!(x = newof(0, Type_t, 1, strlen(type) + 1))) 1025*4887Schin u = 0; 1026*4887Schin else 1027*4887Schin { 1028*4887Schin u = x->index = ++fp->types; 1029*4887Schin strcpy(x->name, type); 1030*4887Schin dtinsert(fp->encode.namedict, x); 1031*4887Schin dtinsert(fp->encode.indexdict, x); 1032*4887Schin } 1033*4887Schin } 1034*4887Schin else 1035*4887Schin u = 0; 1036*4887Schin sfputu(fp->fp, u); 1037*4887Schin /*FALLTHROUGH...*/ 1038*4887Schin case FF_dir: 1039*4887Schin d = n - fp->encode.prefix; 1040*4887Schin sfputl(fp->fp, d); 1041*4887Schin fp->encode.prefix = n; 1042*4887Schin sfputr(fp->fp, (char*)s, 0); 1043*4887Schin break; 1044*4887Schin } 1045*4887Schin memcpy(fp->encode.path, path, len); 1046*4887Schin return 0; 1047*4887Schin } 1048*4887Schin 1049*4887Schin /* 1050*4887Schin * findsync() helper 1051*4887Schin */ 1052*4887Schin 1053*4887Schin static int 1054*4887Schin finddone(register Find_t* fp) 1055*4887Schin { 1056*4887Schin int r; 1057*4887Schin 1058*4887Schin if (sfsync(fp->fp)) 1059*4887Schin { 1060*4887Schin if (fp->disc->errorf) 1061*4887Schin (*fp->disc->errorf)(fp, fp->disc, 2, "%s: write error [sfsync]", fp->encode.file); 1062*4887Schin return -1; 1063*4887Schin } 1064*4887Schin if (sferror(fp->fp)) 1065*4887Schin { 1066*4887Schin if (fp->disc->errorf) 1067*4887Schin (*fp->disc->errorf)(fp, fp->disc, 2, "%s: write error [sferror]", fp->encode.file); 1068*4887Schin return -1; 1069*4887Schin } 1070*4887Schin r = sfclose(fp->fp); 1071*4887Schin fp->fp = 0; 1072*4887Schin if (r) 1073*4887Schin { 1074*4887Schin if (fp->disc->errorf) 1075*4887Schin (*fp->disc->errorf)(fp, fp->disc, 2, "%s: write error [sfclose]", fp->encode.file); 1076*4887Schin return -1; 1077*4887Schin } 1078*4887Schin return 0; 1079*4887Schin } 1080*4887Schin 1081*4887Schin /* 1082*4887Schin * finish the code table 1083*4887Schin */ 1084*4887Schin 1085*4887Schin static int 1086*4887Schin findsync(register Find_t* fp) 1087*4887Schin { 1088*4887Schin register char* s; 1089*4887Schin register int n; 1090*4887Schin register int m; 1091*4887Schin register int d; 1092*4887Schin register Type_t* x; 1093*4887Schin char* t; 1094*4887Schin int b; 1095*4887Schin long z; 1096*4887Schin Sfio_t* sp; 1097*4887Schin 1098*4887Schin switch (fp->method) 1099*4887Schin { 1100*4887Schin case FF_dir: 1101*4887Schin case FF_gnu: 1102*4887Schin /* 1103*4887Schin * replace the real file with the temp file 1104*4887Schin */ 1105*4887Schin 1106*4887Schin if (finddone(fp)) 1107*4887Schin goto bad; 1108*4887Schin remove(fp->encode.file); 1109*4887Schin if (rename(fp->encode.temp, fp->encode.file)) 1110*4887Schin { 1111*4887Schin if (fp->disc->errorf) 1112*4887Schin (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot rename from tmp file %s", fp->encode.file, fp->encode.temp); 1113*4887Schin remove(fp->encode.temp); 1114*4887Schin return -1; 1115*4887Schin } 1116*4887Schin break; 1117*4887Schin case FF_old: 1118*4887Schin /* 1119*4887Schin * determine the top FF_MAX bigrams 1120*4887Schin */ 1121*4887Schin 1122*4887Schin for (n = 0; n < FF_MAX; n++) 1123*4887Schin for (m = 0; m < FF_MAX; m++) 1124*4887Schin fp->encode.hits[fp->encode.code[n][m]]++; 1125*4887Schin fp->encode.hits[0] = 0; 1126*4887Schin m = 1; 1127*4887Schin for (n = USHRT_MAX; n >= 0; n--) 1128*4887Schin if (d = fp->encode.hits[n]) 1129*4887Schin { 1130*4887Schin fp->encode.hits[n] = m; 1131*4887Schin if ((m += d) > FF_MAX) 1132*4887Schin break; 1133*4887Schin } 1134*4887Schin while (--n >= 0) 1135*4887Schin fp->encode.hits[n] = 0; 1136*4887Schin for (n = FF_MAX - 1; n >= 0; n--) 1137*4887Schin for (m = FF_MAX - 1; m >= 0; m--) 1138*4887Schin if (fp->encode.hits[fp->encode.code[n][m]]) 1139*4887Schin { 1140*4887Schin d = fp->encode.code[n][m]; 1141*4887Schin b = fp->encode.hits[d] - 1; 1142*4887Schin fp->encode.code[n][m] = b + FF_MAX; 1143*4887Schin if (fp->encode.hits[d]++ >= FF_MAX) 1144*4887Schin fp->encode.hits[d] = 0; 1145*4887Schin fp->encode.bigram[b *= 2] = n; 1146*4887Schin fp->encode.bigram[b + 1] = m; 1147*4887Schin } 1148*4887Schin else 1149*4887Schin fp->encode.code[n][m] = 0; 1150*4887Schin 1151*4887Schin /* 1152*4887Schin * commit the real file 1153*4887Schin */ 1154*4887Schin 1155*4887Schin if (sfseek(fp->fp, (Sfoff_t)0, SEEK_SET)) 1156*4887Schin { 1157*4887Schin if (fp->disc->errorf) 1158*4887Schin (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "cannot rewind tmp file"); 1159*4887Schin return -1; 1160*4887Schin } 1161*4887Schin if (!(sp = sfopen(NiL, fp->encode.file, "w"))) 1162*4887Schin goto badcreate; 1163*4887Schin 1164*4887Schin /* 1165*4887Schin * dump the bigrams 1166*4887Schin */ 1167*4887Schin 1168*4887Schin sfwrite(sp, fp->encode.bigram, sizeof(fp->encode.bigram)); 1169*4887Schin 1170*4887Schin /* 1171*4887Schin * encode the massaged paths 1172*4887Schin */ 1173*4887Schin 1174*4887Schin while (s = sfgetr(fp->fp, 0, 0)) 1175*4887Schin { 1176*4887Schin z = strtol(s, &t, 0); 1177*4887Schin s = t; 1178*4887Schin if (z < 0 || z > 2 * FF_OFF) 1179*4887Schin { 1180*4887Schin sfputc(sp, FF_ESC); 1181*4887Schin sfputc(sp, (z >> 24)); 1182*4887Schin sfputc(sp, (z >> 16)); 1183*4887Schin sfputc(sp, (z >> 8)); 1184*4887Schin sfputc(sp, z); 1185*4887Schin } 1186*4887Schin else 1187*4887Schin sfputc(sp, z); 1188*4887Schin while (n = *s++) 1189*4887Schin { 1190*4887Schin if (!(m = *s++)) 1191*4887Schin { 1192*4887Schin sfputc(sp, n); 1193*4887Schin break; 1194*4887Schin } 1195*4887Schin if (d = fp->encode.code[n][m]) 1196*4887Schin sfputc(sp, d); 1197*4887Schin else 1198*4887Schin { 1199*4887Schin sfputc(sp, n); 1200*4887Schin sfputc(sp, m); 1201*4887Schin } 1202*4887Schin } 1203*4887Schin } 1204*4887Schin sfclose(fp->fp); 1205*4887Schin fp->fp = sp; 1206*4887Schin if (finddone(fp)) 1207*4887Schin goto bad; 1208*4887Schin break; 1209*4887Schin case FF_typ: 1210*4887Schin if (finddone(fp)) 1211*4887Schin goto bad; 1212*4887Schin if (!(fp->fp = sfopen(NiL, fp->encode.temp, "r"))) 1213*4887Schin { 1214*4887Schin if (fp->disc->errorf) 1215*4887Schin (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot read tmp file", fp->encode.temp); 1216*4887Schin remove(fp->encode.temp); 1217*4887Schin return -1; 1218*4887Schin } 1219*4887Schin 1220*4887Schin /* 1221*4887Schin * commit the output file 1222*4887Schin */ 1223*4887Schin 1224*4887Schin if (!(sp = sfopen(NiL, fp->encode.file, "w"))) 1225*4887Schin goto badcreate; 1226*4887Schin 1227*4887Schin /* 1228*4887Schin * write the header magic 1229*4887Schin */ 1230*4887Schin 1231*4887Schin sfputc(sp, 0); 1232*4887Schin sfputr(sp, FF_typ_magic, 0); 1233*4887Schin 1234*4887Schin /* 1235*4887Schin * write the type table in index order starting with 1 1236*4887Schin */ 1237*4887Schin 1238*4887Schin for (x = (Type_t*)dtfirst(fp->encode.indexdict); x; x = (Type_t*)dtnext(fp->encode.indexdict, x)) 1239*4887Schin sfputr(sp, x->name, 0); 1240*4887Schin sfputc(sp, 0); 1241*4887Schin 1242*4887Schin /* 1243*4887Schin * append the front compressed strings 1244*4887Schin */ 1245*4887Schin 1246*4887Schin if (sfmove(fp->fp, sp, SF_UNBOUND, -1) < 0 || !sfeof(fp->fp)) 1247*4887Schin { 1248*4887Schin sfclose(sp); 1249*4887Schin if (fp->disc->errorf) 1250*4887Schin (*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot append codes", fp->encode.file); 1251*4887Schin goto bad; 1252*4887Schin } 1253*4887Schin sfclose(fp->fp); 1254*4887Schin fp->fp = sp; 1255*4887Schin if (finddone(fp)) 1256*4887Schin goto bad; 1257*4887Schin remove(fp->encode.temp); 1258*4887Schin break; 1259*4887Schin } 1260*4887Schin return 0; 1261*4887Schin badcreate: 1262*4887Schin if (fp->disc->errorf) 1263*4887Schin (*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot write codes", fp->encode.file); 1264*4887Schin bad: 1265*4887Schin if (fp->fp) 1266*4887Schin { 1267*4887Schin sfclose(fp->fp); 1268*4887Schin fp->fp = 0; 1269*4887Schin } 1270*4887Schin remove(fp->encode.temp); 1271*4887Schin return -1; 1272*4887Schin } 1273*4887Schin 1274*4887Schin /* 1275*4887Schin * close an open fastfind stream 1276*4887Schin */ 1277*4887Schin 1278*4887Schin int 1279*4887Schin findclose(register Find_t* fp) 1280*4887Schin { 1281*4887Schin int n = 0; 1282*4887Schin 1283*4887Schin if (!fp) 1284*4887Schin return -1; 1285*4887Schin if (fp->generate) 1286*4887Schin { 1287*4887Schin n = findsync(fp); 1288*4887Schin if (fp->encode.indexdict) 1289*4887Schin dtclose(fp->encode.indexdict); 1290*4887Schin if (fp->encode.namedict) 1291*4887Schin dtclose(fp->encode.namedict); 1292*4887Schin } 1293*4887Schin else 1294*4887Schin { 1295*4887Schin if (fp->decode.match) 1296*4887Schin regfree(&fp->decode.re); 1297*4887Schin n = 0; 1298*4887Schin } 1299*4887Schin if (fp->fp) 1300*4887Schin sfclose(fp->fp); 1301*4887Schin vmclose(fp->vm); 1302*4887Schin return n; 1303*4887Schin } 1304