1*40087Sbostic /* 2*40087Sbostic * Copyright (c) 1989 The Regents of the University of California. 3*40087Sbostic * All rights reserved. 4*40087Sbostic * 5*40087Sbostic * This code is derived from software contributed to Berkeley by 6*40087Sbostic * Guido van Rossum. 7*40087Sbostic * 8*40087Sbostic * Redistribution and use in source and binary forms are permitted 9*40087Sbostic * provided that the above copyright notice and this paragraph are 10*40087Sbostic * duplicated in all such forms and that any documentation, 11*40087Sbostic * advertising materials, and other materials related to such 12*40087Sbostic * distribution and use acknowledge that the software was developed 13*40087Sbostic * by the University of California, Berkeley. The name of the 14*40087Sbostic * University may not be used to endorse or promote products derived 15*40087Sbostic * from this software without specific prior written permission. 16*40087Sbostic * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 17*40087Sbostic * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 18*40087Sbostic * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 19*40087Sbostic */ 20*40087Sbostic 21*40087Sbostic #if defined(LIBC_SCCS) && !defined(lint) 22*40087Sbostic static char sccsid[] = "@(#)glob.c 5.1 (Berkeley) 02/15/90"; 23*40087Sbostic #endif /* LIBC_SCCS and not lint */ 24*40087Sbostic 25*40087Sbostic /* 26*40087Sbostic * Glob: the interface is a superset of the one defined in POSIX 1003.2, 27*40087Sbostic * draft 9. 28*40087Sbostic * 29*40087Sbostic * The [!...] convention to negate a range is supported (SysV, Posix, ksh). 30*40087Sbostic * 31*40087Sbostic * Optional extra services, controlled by flags not defined by POSIX: 32*40087Sbostic * GLOB_QUOTE: escaping convention: \ inhibits any special meaning 33*40087Sbostic the following character might have (except \ at end of 34*40087Sbostic * string is kept); 35*40087Sbostic */ 36*40087Sbostic 37*40087Sbostic #include <sys/param.h> 38*40087Sbostic #include <sys/stat.h> 39*40087Sbostic #include <dirent.h> 40*40087Sbostic #include <glob.h> 41*40087Sbostic #include <ctype.h> 42*40087Sbostic #include <errno.h> 43*40087Sbostic #include <string.h> 44*40087Sbostic #include <stdio.h> 45*40087Sbostic 46*40087Sbostic char *malloc(), *realloc(); 47*40087Sbostic 48*40087Sbostic typedef int bool_t; 49*40087Sbostic 50*40087Sbostic #define DOLLAR '$' 51*40087Sbostic #define DOT '.' 52*40087Sbostic #define EOS '\0' 53*40087Sbostic #define LBRACKET '[' 54*40087Sbostic #define NOT '!' 55*40087Sbostic #define QUESTION '?' 56*40087Sbostic #define QUOTE '\\' 57*40087Sbostic #define RANGE '-' 58*40087Sbostic #define RBRACKET ']' 59*40087Sbostic #define SEP '/' 60*40087Sbostic #define STAR '*' 61*40087Sbostic #define TILDE '~' 62*40087Sbostic #define UNDERSCORE '_' 63*40087Sbostic 64*40087Sbostic #define METABIT 0x80 65*40087Sbostic #define META(c) ((c)|METABIT) 66*40087Sbostic #define M_ALL META('*') 67*40087Sbostic #define M_END META(']') 68*40087Sbostic #define M_NOT META('!') 69*40087Sbostic #define M_ONE META('?') 70*40087Sbostic #define M_RNG META('-') 71*40087Sbostic #define M_SET META('[') 72*40087Sbostic #define ismeta(c) (((c)&METABIT) != 0) 73*40087Sbostic 74*40087Sbostic static 75*40087Sbostic compare(p, q) 76*40087Sbostic char **p, **q; 77*40087Sbostic { 78*40087Sbostic return(strcmp(*p, *q)); 79*40087Sbostic } 80*40087Sbostic 81*40087Sbostic 82*40087Sbostic /* 83*40087Sbostic * The main glob() routine: compiles the pattern (optionally processing 84*40087Sbostic * quotes), calls glob1() to do the real pattern matching, and finally 85*40087Sbostic * sorts the list (unless unsorted operation is requested). Returns 0 86*40087Sbostic * if things went well, nonzero if errors occurred. It is not an error 87*40087Sbostic * to find no matches. 88*40087Sbostic */ 89*40087Sbostic glob(pattern, flags, errfunc, pglob) 90*40087Sbostic char *pattern; 91*40087Sbostic int flags, (*errfunc)(); 92*40087Sbostic glob_t *pglob; 93*40087Sbostic { 94*40087Sbostic int err, oldpathc; 95*40087Sbostic char *bufnext, *bufend, *compilebuf, *compilepat, *patnext; 96*40087Sbostic char c, patbuf[MAXPATHLEN+1]; 97*40087Sbostic 98*40087Sbostic patnext = pattern; 99*40087Sbostic if (!(flags & GLOB_APPEND)) { 100*40087Sbostic pglob->gl_pathc = 0; 101*40087Sbostic pglob->gl_pathv = NULL; 102*40087Sbostic if (!(flags & GLOB_DOOFFS)) 103*40087Sbostic pglob->gl_offs = 0; 104*40087Sbostic } 105*40087Sbostic pglob->gl_flags = flags; 106*40087Sbostic pglob->gl_errfunc = errfunc; 107*40087Sbostic oldpathc = pglob->gl_pathc; 108*40087Sbostic 109*40087Sbostic bufnext = patbuf; 110*40087Sbostic bufend = bufnext+MAXPATHLEN; 111*40087Sbostic 112*40087Sbostic compilebuf = bufnext; 113*40087Sbostic compilepat = patnext; 114*40087Sbostic while (bufnext < bufend && (c = *patnext++) != EOS) { 115*40087Sbostic switch (c) { 116*40087Sbostic case LBRACKET: 117*40087Sbostic c = *patnext; 118*40087Sbostic if (c == NOT) 119*40087Sbostic ++patnext; 120*40087Sbostic if (*patnext == EOS || 121*40087Sbostic strchr(patnext+1, RBRACKET) == NULL) { 122*40087Sbostic *bufnext++ = LBRACKET; 123*40087Sbostic if (c == NOT) 124*40087Sbostic --patnext; 125*40087Sbostic break; 126*40087Sbostic } 127*40087Sbostic *bufnext++ = M_SET; 128*40087Sbostic if (c == NOT) 129*40087Sbostic *bufnext++ = M_NOT; 130*40087Sbostic c = *patnext++; 131*40087Sbostic do { 132*40087Sbostic /* todo: quoting */ 133*40087Sbostic *bufnext++ = c; 134*40087Sbostic if (*patnext == RANGE && 135*40087Sbostic (c = patnext[1]) != RBRACKET) { 136*40087Sbostic *bufnext++ = M_RNG; 137*40087Sbostic *bufnext++ = c; 138*40087Sbostic patnext += 2; 139*40087Sbostic } 140*40087Sbostic } while ((c = *patnext++) != RBRACKET); 141*40087Sbostic *bufnext++ = M_END; 142*40087Sbostic break; 143*40087Sbostic case QUESTION: 144*40087Sbostic *bufnext++ = M_ONE; 145*40087Sbostic break; 146*40087Sbostic case QUOTE: 147*40087Sbostic if (!(flags & GLOB_QUOTE)) 148*40087Sbostic *bufnext++ = QUOTE; 149*40087Sbostic else { 150*40087Sbostic if ((c = *patnext++) == EOS) { 151*40087Sbostic c = QUOTE; 152*40087Sbostic --patnext; 153*40087Sbostic } 154*40087Sbostic *bufnext++ = c; 155*40087Sbostic } 156*40087Sbostic break; 157*40087Sbostic case STAR: 158*40087Sbostic *bufnext++ = M_ALL; 159*40087Sbostic break; 160*40087Sbostic default: 161*40087Sbostic *bufnext++ = c; 162*40087Sbostic break; 163*40087Sbostic } 164*40087Sbostic } 165*40087Sbostic *bufnext = EOS; 166*40087Sbostic 167*40087Sbostic if ((err = glob1(patbuf, pglob)) != 0) 168*40087Sbostic return(err); 169*40087Sbostic 170*40087Sbostic if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) { 171*40087Sbostic if (!(flags & GLOB_QUOTE)) 172*40087Sbostic (void)strcpy(compilebuf, compilepat); 173*40087Sbostic else { 174*40087Sbostic /* 175*40087Sbostic * copy pattern, interpreting quotes; this is slightly 176*40087Sbostic * different than the interpretation of quotes above 177*40087Sbostic * -- which should prevail? 178*40087Sbostic */ 179*40087Sbostic while (*compilepat != EOS) { 180*40087Sbostic if (*compilepat == QUOTE) { 181*40087Sbostic if (*++compilepat == EOS) 182*40087Sbostic --compilepat; 183*40087Sbostic } 184*40087Sbostic *compilebuf++ = *compilepat++; 185*40087Sbostic } 186*40087Sbostic *compilebuf = EOS; 187*40087Sbostic } 188*40087Sbostic return(globextend(patbuf, pglob)); 189*40087Sbostic } else if (!(flags & GLOB_NOSORT)) 190*40087Sbostic qsort((char*) (pglob->gl_pathv + pglob->gl_offs + oldpathc), 191*40087Sbostic pglob->gl_pathc - oldpathc, sizeof(char*), compare); 192*40087Sbostic return(0); 193*40087Sbostic } 194*40087Sbostic 195*40087Sbostic static 196*40087Sbostic glob1(pattern, pglob) 197*40087Sbostic char *pattern; 198*40087Sbostic glob_t *pglob; 199*40087Sbostic { 200*40087Sbostic char pathbuf[MAXPATHLEN+1]; 201*40087Sbostic 202*40087Sbostic /* 203*40087Sbostic * a null pathname is invalid -- POSIX 1003.1 sect. 2.4. */ 204*40087Sbostic if (*pattern == EOS) 205*40087Sbostic return(0); 206*40087Sbostic return(glob2(pathbuf, pathbuf, pattern, pglob)); 207*40087Sbostic } 208*40087Sbostic 209*40087Sbostic /* 210*40087Sbostic * functions glob2 and glob3 are mutually recursive; there is one level 211*40087Sbostic * of recursion for each segment in the pattern that contains one or 212*40087Sbostic * more meta characters. 213*40087Sbostic */ 214*40087Sbostic static 215*40087Sbostic glob2(pathbuf, pathend, pattern, pglob) 216*40087Sbostic char *pathbuf, *pathend, *pattern; 217*40087Sbostic glob_t *pglob; 218*40087Sbostic { 219*40087Sbostic struct stat sbuf; 220*40087Sbostic bool_t anymeta = 0; 221*40087Sbostic char *p, *q; 222*40087Sbostic 223*40087Sbostic /* 224*40087Sbostic * loop over pattern segments until end of pattern or until 225*40087Sbostic * segment with meta character found. 226*40087Sbostic */ 227*40087Sbostic for (;;) { 228*40087Sbostic if (*pattern == EOS) { /* end of pattern? */ 229*40087Sbostic *pathend = EOS; 230*40087Sbostic if (stat(pathbuf, &sbuf) != 0) 231*40087Sbostic return(0); /* need error call here? */ 232*40087Sbostic if ((pglob->gl_flags & GLOB_MARK) && 233*40087Sbostic pathend[-1] != SEP && S_ISDIR(sbuf.st_mode)) { 234*40087Sbostic *pathend++ = SEP; 235*40087Sbostic *pathend = EOS; 236*40087Sbostic } 237*40087Sbostic return(globextend(pathbuf, pglob)); 238*40087Sbostic } 239*40087Sbostic 240*40087Sbostic /* find end of next segment, copy tentatively to pathend */ 241*40087Sbostic q = pathend; 242*40087Sbostic p = pattern; 243*40087Sbostic while (*p != EOS && *p != SEP) { 244*40087Sbostic if (ismeta(*p)) 245*40087Sbostic anymeta = 1; 246*40087Sbostic *q++ = *p++; 247*40087Sbostic } 248*40087Sbostic 249*40087Sbostic if (!anymeta) { /* no expansion, do next segment */ 250*40087Sbostic pathend = q; 251*40087Sbostic pattern = p; 252*40087Sbostic while (*pattern == SEP) 253*40087Sbostic *pathend++ = *pattern++; 254*40087Sbostic } else /* need expansion, recurse */ 255*40087Sbostic return(glob3(pathbuf, pathend, pattern, p, pglob)); 256*40087Sbostic } 257*40087Sbostic /* NOTREACHED */ 258*40087Sbostic } 259*40087Sbostic 260*40087Sbostic static 261*40087Sbostic glob3(pathbuf, pathend, pattern, restpattern, pglob) 262*40087Sbostic char *pathbuf, *pathend, *pattern, *restpattern; 263*40087Sbostic glob_t *pglob; 264*40087Sbostic { 265*40087Sbostic extern int errno; 266*40087Sbostic DIR *dirp; 267*40087Sbostic struct dirent *dp; 268*40087Sbostic int len, err; 269*40087Sbostic 270*40087Sbostic *pathend = EOS; 271*40087Sbostic errno = 0; 272*40087Sbostic if (!(dirp = opendir(pathbuf))) 273*40087Sbostic /* todo: don't call for ENOENT or ENOTDIR? */ 274*40087Sbostic if (pglob->gl_errfunc && 275*40087Sbostic (*pglob->gl_errfunc)(pathbuf, errno) || 276*40087Sbostic (pglob->gl_flags & GLOB_ERR)) 277*40087Sbostic return(GLOB_ABEND); 278*40087Sbostic else 279*40087Sbostic return(0); 280*40087Sbostic 281*40087Sbostic err = 0; 282*40087Sbostic 283*40087Sbostic /* search directory for matching names */ 284*40087Sbostic while ((dp = readdir(dirp))) { 285*40087Sbostic /* initial DOT must be matched literally */ 286*40087Sbostic if (dp->d_name[0] == DOT && *pattern != DOT) 287*40087Sbostic continue; 288*40087Sbostic if (!match(dp->d_name, pattern, restpattern)) 289*40087Sbostic continue; 290*40087Sbostic len = dp->d_namlen; 291*40087Sbostic (void)strcpy(pathend, dp->d_name); 292*40087Sbostic err = glob2(pathbuf, pathend+len, restpattern, pglob); 293*40087Sbostic if (err) 294*40087Sbostic break; 295*40087Sbostic } 296*40087Sbostic /* todo: check error from readdir? */ 297*40087Sbostic (void)closedir(dirp); 298*40087Sbostic return(err); 299*40087Sbostic } 300*40087Sbostic 301*40087Sbostic 302*40087Sbostic /* 303*40087Sbostic * Extend the gl_pathv member of a glob_t structure to accomodate a new item, 304*40087Sbostic * add the new item, and update gl_pathc. 305*40087Sbostic * 306*40087Sbostic * This assumes the BSD realloc, which only copies the block when its size 307*40087Sbostic * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic 308*40087Sbostic * behavior. 309*40087Sbostic * 310*40087Sbostic * Return 0 if new item added, error code if memory couldn't be allocated. 311*40087Sbostic * 312*40087Sbostic * Invariant of the glob_t structure: 313*40087Sbostic * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and 314*40087Sbostic * gl_pathv points to (gl_offs + gl_pathc + 1) items. 315*40087Sbostic */ 316*40087Sbostic static 317*40087Sbostic globextend(path, pglob) 318*40087Sbostic char *path; 319*40087Sbostic glob_t *pglob; 320*40087Sbostic { 321*40087Sbostic register char **pathv; 322*40087Sbostic register int i; 323*40087Sbostic u_int copysize, newsize; 324*40087Sbostic char *copy; 325*40087Sbostic 326*40087Sbostic newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs); 327*40087Sbostic pathv = (char **)realloc((char *)(pathv = pglob->gl_pathv), newsize); 328*40087Sbostic if (pathv == NULL) 329*40087Sbostic return(GLOB_NOSPACE); 330*40087Sbostic 331*40087Sbostic if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) { 332*40087Sbostic /* first time around -- clear initial gl_offs items */ 333*40087Sbostic pathv += pglob->gl_offs; 334*40087Sbostic for (i = pglob->gl_offs; --i >= 0; ) 335*40087Sbostic *--pathv = NULL; 336*40087Sbostic } 337*40087Sbostic pglob->gl_pathv = pathv; 338*40087Sbostic 339*40087Sbostic copysize = strlen(path) + 1; 340*40087Sbostic if ((copy = malloc(copysize)) != NULL) { 341*40087Sbostic (void)strcpy(copy, path); 342*40087Sbostic pathv[pglob->gl_offs + pglob->gl_pathc++] = copy; 343*40087Sbostic } 344*40087Sbostic pathv[pglob->gl_offs + pglob->gl_pathc] = NULL; 345*40087Sbostic return((copy == NULL) ? GLOB_NOSPACE : 0); 346*40087Sbostic } 347*40087Sbostic 348*40087Sbostic 349*40087Sbostic /* 350*40087Sbostic * pattern matching function for filenames. Each occurrence of the * 351*40087Sbostic * pattern causes a recursion level. 352*40087Sbostic */ 353*40087Sbostic static bool_t 354*40087Sbostic match(name, pat, patend) 355*40087Sbostic register char *name, *pat, *patend; 356*40087Sbostic { 357*40087Sbostic bool_t ok, negate_range; 358*40087Sbostic char c, k; 359*40087Sbostic 360*40087Sbostic while (pat < patend) { 361*40087Sbostic c = *pat++; 362*40087Sbostic switch (c & 0xff) { 363*40087Sbostic case M_ALL: 364*40087Sbostic if (pat == patend) 365*40087Sbostic return(1); 366*40087Sbostic for (; *name != EOS; ++name) { 367*40087Sbostic if (match(name, pat, patend)) 368*40087Sbostic return(1); 369*40087Sbostic } 370*40087Sbostic return(0); 371*40087Sbostic case M_ONE: 372*40087Sbostic if (*name++ == EOS) 373*40087Sbostic return(0); 374*40087Sbostic break; 375*40087Sbostic case M_SET: 376*40087Sbostic ok = 0; 377*40087Sbostic k = *name++; 378*40087Sbostic if (negate_range = (*pat & 0xff) == M_NOT) 379*40087Sbostic ++pat; 380*40087Sbostic while (((c = *pat++) & 0xff) != M_END) { 381*40087Sbostic if ((*pat & 0xff) == M_RNG) { 382*40087Sbostic if (c <= k && k <= pat[1]) 383*40087Sbostic ok = 1; 384*40087Sbostic pat += 2; 385*40087Sbostic } 386*40087Sbostic else if (c == k) 387*40087Sbostic ok = 1; 388*40087Sbostic } 389*40087Sbostic if (ok == negate_range) 390*40087Sbostic return(0); 391*40087Sbostic break; 392*40087Sbostic default: 393*40087Sbostic if (*name++ != c) 394*40087Sbostic return(0); 395*40087Sbostic break; 396*40087Sbostic } 397*40087Sbostic } 398*40087Sbostic return(*name == EOS); 399*40087Sbostic } 400*40087Sbostic 401*40087Sbostic /* free allocated data belonging to a glob_t structure */ 402*40087Sbostic void 403*40087Sbostic globfree(pglob) 404*40087Sbostic glob_t *pglob; 405*40087Sbostic { 406*40087Sbostic register int i; 407*40087Sbostic register char **pp; 408*40087Sbostic 409*40087Sbostic if (pglob->gl_pathv != NULL) { 410*40087Sbostic pp = pglob->gl_pathv + pglob->gl_offs; 411*40087Sbostic for (i = pglob->gl_pathc; i--; ++pp) 412*40087Sbostic if (*pp) 413*40087Sbostic (void)free(*pp); 414*40087Sbostic (void)free((char *)pp); 415*40087Sbostic } 416*40087Sbostic } 417