xref: /openbsd-src/usr.bin/make/dir.c (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1 /*	$OpenPackages$ */
2 /*	$OpenBSD: dir.c,v 1.34 2001/05/29 12:53:39 espie Exp $ */
3 /*	$NetBSD: dir.c,v 1.14 1997/03/29 16:51:26 christos Exp $	*/
4 
5 /*
6  * Copyright (c) 1999 Marc Espie.
7  *
8  * Extensive code changes for the OpenBSD project.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE OPENBSD PROJECT AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OPENBSD
23  * PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 /*
32  * Copyright (c) 1988, 1989, 1990 The Regents of the University of California.
33  * Copyright (c) 1988, 1989 by Adam de Boor
34  * Copyright (c) 1989 by Berkeley Softworks
35  * All rights reserved.
36  *
37  * This code is derived from software contributed to Berkeley by
38  * Adam de Boor.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. All advertising materials mentioning features or use of this software
49  *    must display the following acknowledgement:
50  *	This product includes software developed by the University of
51  *	California, Berkeley and its contributors.
52  * 4. Neither the name of the University nor the names of its contributors
53  *    may be used to endorse or promote products derived from this software
54  *    without specific prior written permission.
55  *
56  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66  * SUCH DAMAGE.
67  */
68 
69 #include <sys/param.h>
70 #include <sys/types.h>
71 #include <sys/stat.h>
72 #include <dirent.h>
73 #include <limits.h>
74 #include <stddef.h>
75 #include <stdio.h>
76 #include <stdlib.h>
77 #include <string.h>
78 #include "config.h"
79 #include "defines.h"
80 #include "ohash.h"
81 #include "dir.h"
82 #include "lst.h"
83 #include "memory.h"
84 #include "buf.h"
85 #include "gnode.h"
86 #include "arch.h"
87 #include "targ.h"
88 #include "error.h"
89 #include "str.h"
90 #include "timestamp.h"
91 
92 
93 typedef struct Path_ {
94     int 	  refCount;	/* Number of paths with this directory */
95 #ifdef DEBUG_DIRECTORY_CACHE
96     int 	  hits; 	/* the number of times a file in this
97 				 * directory has been found */
98 #endif
99     struct ohash   files;	/* Hash table of files in directory */
100     char	  name[1];	/* Name of directory */
101 } Path;
102 
103 /*	A search path consists of a Lst of Path structures. A Path structure
104  *	has in it the name of the directory and a hash table of all the files
105  *	in the directory. This is used to cut down on the number of system
106  *	calls necessary to find implicit dependents and their like. Since
107  *	these searches are made before any actions are taken, we need not
108  *	worry about the directory changing due to creation commands. If this
109  *	hampers the style of some makefiles, they must be changed.
110  *
111  *	A list of all previously-read directories is kept in the
112  *	openDirectories cache.
113  *
114  *	The need for the caching of whole directories is brought about by
115  *	the multi-level transformation code in suff.c, which tends to search
116  *	for far more files than regular make does. In the initial
117  *	implementation, the amount of time spent performing "stat" calls was
118  *	truly astronomical. The problem with hashing at the start is,
119  *	of course, that pmake doesn't then detect changes to these directories
120  *	during the course of the make. Three possibilities suggest themselves:
121  *
122  *	    1) just use stat to test for a file's existence. As mentioned
123  *	       above, this is very inefficient due to the number of checks
124  *	       engendered by the multi-level transformation code.
125  *	    2) use readdir() and company to search the directories, keeping
126  *	       them open between checks. I have tried this and while it
127  *	       didn't slow down the process too much, it could severely
128  *	       affect the amount of parallelism available as each directory
129  *	       open would take another file descriptor out of play for
130  *	       handling I/O for another job. Given that it is only recently
131  *	       that UNIX OS's have taken to allowing more than 20 or 32
132  *	       file descriptors for a process, this doesn't seem acceptable
133  *	       to me.
134  *	    3) record the mtime of the directory in the Path structure and
135  *	       verify the directory hasn't changed since the contents were
136  *	       hashed. This will catch the creation or deletion of files,
137  *	       but not the updating of files. However, since it is the
138  *	       creation and deletion that is the problem, this could be
139  *	       a good thing to do. Unfortunately, if the directory (say ".")
140  *	       were fairly large and changed fairly frequently, the constant
141  *	       rehashing could seriously degrade performance. It might be
142  *	       good in such cases to keep track of the number of rehashes
143  *	       and if the number goes over a (small) limit, resort to using
144  *	       stat in its place.
145  *
146  *	An additional thing to consider is that pmake is used primarily
147  *	to create C programs and until recently pcc-based compilers refused
148  *	to allow you to specify where the resulting object file should be
149  *	placed. This forced all objects to be created in the current
150  *	directory. This isn't meant as a full excuse, just an explanation of
151  *	some of the reasons for the caching used here.
152  *
153  *	One more note: the location of a target's file is only performed
154  *	on the downward traversal of the graph and then only for terminal
155  *	nodes in the graph. This could be construed as wrong in some cases,
156  *	but prevents inadvertent modification of files when the "installed"
157  *	directory for a file is provided in the search path.
158  *
159  *	Another data structure maintained by this module is an mtime
160  *	cache used when the searching of cached directories fails to find
161  *	a file. In the past, Dir_FindFile would simply perform an access()
162  *	call in such a case to determine if the file could be found using
163  *	just the name given. When this hit, however, all that was gained
164  *	was the knowledge that the file existed. Given that an access() is
165  *	essentially a stat() without the copyout() call, and that the same
166  *	filesystem overhead would have to be incurred in Dir_MTime, it made
167  *	sense to replace the access() with a stat() and record the mtime
168  *	in a cache for when Dir_MTime was actually called.  */
169 
170 static LIST   thedirSearchPath;		/* main search path */
171 Lst	      dirSearchPath= &thedirSearchPath;
172 
173 #ifdef DEBUG_DIRECTORY_CACHE
174 /* Variables for gathering statistics on the efficiency of the hashing
175  * mechanism.  */
176 static int    hits,			/* Found in directory cache */
177 	      misses,			/* Sad, but not evil misses */
178 	      nearmisses,		/* Found under search path */
179 	      bigmisses;		/* Sought by itself */
180 #endif
181 
182 static Path	  *dot; 		/* contents of current directory */
183 
184 struct file_stamp {
185 	TIMESTAMP mtime;		/* time stamp... */
186 	char name[1];			/* ...for that file.  */
187 };
188 
189 static struct ohash   openDirectories;	/* cache all open directories */
190 
191 /* Global structure used to cache mtimes.  XXX We don't cache an mtime
192  * before a caller actually looks up for the given time, because of the
193  * possibility a caller might update the file and invalidate the cache
194  * entry, and we don't look up in this cache except as a last resort.
195  */
196 static struct ohash mtimes;
197 
198 
199 /* There are three distinct hash structures:
200  * - to collate files's last modification times (global mtimes)
201  * - to collate file names (in each Path structure)
202  * - to collate known directories (global openDirectories).  */
203 static struct ohash_info stamp_info = { offsetof(struct file_stamp, name),
204     NULL, hash_alloc, hash_free, element_alloc };
205 
206 static struct ohash_info file_info = { 0,
207     NULL, hash_alloc, hash_free, element_alloc };
208 
209 static struct ohash_info dir_info = { offsetof(Path, name),
210     NULL, hash_alloc, hash_free, element_alloc };
211 
212 /* add_file(path, name): add a file name to a path hash structure. */
213 static void add_file(Path *, const char *);
214 /* n = find_file_hashi(p, name, end, hv): retrieve name in a path hash
215  * 	structure. */
216 static char *find_file_hashi(Path *, const char *, const char *, u_int32_t);
217 
218 /* stamp = find_stampi(name, end): look for (name, end) in the global
219  *	cache. */
220 static struct file_stamp *find_stampi(const char *, const char *);
221 /* record_stamp(name, timestamp): record timestamp for name in the global
222  * 	cache. */
223 static void record_stamp(const char *, TIMESTAMP);
224 
225 /* free_hash(o): free a ohash structure, where each element can be free'd. */
226 static void free_hash(struct ohash *);
227 
228 /* p = DirReaddiri(name, end): read an actual directory, caching results
229  * 	as we go.  */
230 static Path *DirReaddiri(const char *, const char *);
231 /* Handles wildcard expansion on a given directory. */
232 static void DirMatchFilesi(const char *, const char *, Path *, Lst);
233 /* Handles simple wildcard expansion on a path. */
234 static void PathMatchFilesi(const char *, const char *, Lst, Lst);
235 /* Handles wildcards expansion except for curly braces. */
236 static void DirExpandWildi(const char *, const char *, Lst, Lst);
237 #define DirExpandWild(s, l1, l2) DirExpandWildi(s, strchr(s, '\0'), l1, l2)
238 /* Handles wildcard expansion including curly braces. */
239 static void DirExpandCurlyi(const char *, const char *, Lst, Lst);
240 
241 /* Debugging: show each word in an expansion list. */
242 static void DirPrintWord(void *);
243 /* Debugging: show a dir name in a path. */
244 static void DirPrintDir(void *);
245 
246 static void
247 record_stamp(file, t)
248     const char		*file;
249     TIMESTAMP		t;
250 {
251     unsigned		slot;
252     const char		*end = NULL;
253     struct file_stamp	*n;
254 
255     slot = ohash_qlookupi(&mtimes, file, &end);
256     n = ohash_find(&mtimes, slot);
257     if (n)
258 	n->mtime = t;
259     else {
260 	n = ohash_create_entry(&stamp_info, file, &end);
261 	n->mtime = t;
262 	ohash_insert(&mtimes, slot, n);
263     }
264 }
265 
266 static struct file_stamp *
267 find_stampi(file, end)
268     const char	*file;
269     const char	*end;
270 {
271     return ohash_find(&mtimes, ohash_qlookupi(&mtimes, file, &end));
272 }
273 
274 static void
275 add_file(p, file)
276     Path		*p;
277     const char		*file;
278 {
279     unsigned		slot;
280     const char		*end = NULL;
281     char		*n;
282     struct ohash 	*h = &p->files;
283 
284     slot = ohash_qlookupi(h, file, &end);
285     n = ohash_find(h, slot);
286     if (n == NULL) {
287 	n = ohash_create_entry(&file_info, file, &end);
288 	ohash_insert(h, slot, n);
289     }
290 }
291 
292 static char *
293 find_file_hashi(p, file, e, hv)
294     Path		*p;
295     const char		*file;
296     const char		*e;
297     u_int32_t		hv;
298 {
299     struct ohash 	*h = &p->files;
300 
301     return ohash_find(h, ohash_lookup_interval(h, file, e, hv));
302 }
303 
304 static void
305 free_hash(h)
306     struct ohash 	*h;
307 {
308     void		*e;
309     unsigned		i;
310 
311     for (e = ohash_first(h, &i); e != NULL; e = ohash_next(h, &i))
312 	free(e);
313     ohash_delete(h);
314 }
315 
316 
317 /* Side Effects: cache the current directory */
318 void
319 Dir_Init()
320 {
321     char *dotname = ".";
322 
323     Lst_Init(dirSearchPath);
324     ohash_init(&openDirectories, 4, &dir_info);
325     ohash_init(&mtimes, 4, &stamp_info);
326 
327 
328     dot = DirReaddiri(dotname, dotname+1);
329 
330     if (!dot)
331     	Error("Can't access current directory");
332 
333     /* We always need to have dot around, so we increment its reference count
334      * to make sure it won't be destroyed.  */
335     dot->refCount++;
336 }
337 
338 #ifdef CLEANUP
339 void
340 Dir_End()
341 {
342     struct Path *p;
343     unsigned int i;
344 
345     dot->refCount--;
346     Dir_Destroy(dot);
347     Lst_Destroy(dirSearchPath, Dir_Destroy);
348     for (p = ohash_first(&openDirectories, &i); p != NULL;
349 	p = ohash_next(&openDirectories, &i))
350 	    Dir_Destroy(p);
351     ohash_delete(&openDirectories);
352     free_hash(&mtimes);
353 }
354 #endif
355 
356 
357 /* XXX: This code is not 100% correct ([^]] fails) */
358 bool
359 Dir_HasWildcardsi(name, end)
360     const char		*name;
361     const char 		*end;
362 {
363     const char		*cp;
364     bool		wild = false;
365     unsigned long	brace = 0, bracket = 0;
366 
367     for (cp = name; cp != end; cp++) {
368 	switch (*cp) {
369 	case '{':
370 	    brace++;
371 	    wild = true;
372 	    break;
373 	case '}':
374 	    if (brace == 0)
375 		return false;
376 	    brace--;
377 	    break;
378 	case '[':
379 	    bracket++;
380 	    wild = true;
381 	    break;
382 	case ']':
383 	    if (bracket == 0)
384 		return false;
385 	    bracket--;
386 	    break;
387 	case '?':
388 	case '*':
389 	    wild = true;
390 	    break;
391 	default:
392 	    break;
393 	}
394     }
395     return wild && bracket == 0 && brace == 0;
396 }
397 
398 /*-
399  *-----------------------------------------------------------------------
400  * DirMatchFilesi --
401  *	Given a pattern and a Path structure, see if any files
402  *	match the pattern and add their names to the 'expansions' list if
403  *	any do. This is incomplete -- it doesn't take care of patterns like
404  *	src / *src / *.c properly (just *.c on any of the directories), but it
405  *	will do for now.
406  *-----------------------------------------------------------------------
407  */
408 static void
409 DirMatchFilesi(pattern, end, p, expansions)
410     const char		*pattern;	/* Pattern to look for */
411     const char		*end;		/* End of pattern */
412     Path		*p;		/* Directory to search */
413     Lst 		expansions;	/* Place to store the results */
414 {
415     unsigned int	search; 	/* Index into the directory's table */
416     const char		*entry; 	/* Current entry in the table */
417     bool		isDot;		/* Is the directory "." ? */
418 
419     isDot = p->name[0] == '.' && p->name[1] == '\0';
420 
421     for (entry = ohash_first(&p->files, &search); entry != NULL;
422 	 entry = ohash_next(&p->files, &search)) {
423 	/* See if the file matches the given pattern. We follow the UNIX
424 	 * convention that dot files will only be found if the pattern
425 	 * begins with a dot (the hashing scheme doesn't hash . or ..,
426 	 * so they won't match `.*'.  */
427 	if (*pattern != '.' && *entry == '.')
428 	    continue;
429 	if (Str_Matchi(entry, strchr(entry, '\0'), pattern, end))
430 	    Lst_AtEnd(expansions,
431 		isDot ? estrdup(entry) : Str_concat(p->name, entry, '/'));
432     }
433 }
434 
435 /*-
436  *-----------------------------------------------------------------------
437  * PathMatchFilesi --
438  *	Traverse directories in the path, calling DirMatchFiles for each.
439  *	NOTE: This doesn't handle patterns in directories.
440  *-----------------------------------------------------------------------
441  */
442 static void
443 PathMatchFilesi(word, end, path, expansions)
444     const char	*word;		/* Word to expand */
445     const char  *end;		/* End of word */
446     Lst 	path;		/* Path on which to look */
447     Lst 	expansions;	/* Place to store the result */
448 {
449     LstNode	ln;		/* Current node */
450 
451     for (ln = Lst_First(path); ln != NULL; ln = Lst_Adv(ln))
452 	DirMatchFilesi(word, end, (Path *)Lst_Datum(ln), expansions);
453 }
454 
455 static void
456 DirPrintWord(word)
457     void	*word;
458 {
459     printf("%s ", (char *)word);
460 }
461 
462 /*-
463  *-----------------------------------------------------------------------
464  * DirExpandWildi:
465  *	Expand all wild cards in a fully qualified name, except for
466  *	curly braces.
467  * Side-effect:
468  *	Will hash any directory in which a file is found, and add it to
469  *	the path, on the assumption that future lookups will find files
470  *	there as well.
471  *-----------------------------------------------------------------------
472  */
473 static void
474 DirExpandWildi(word, end, path, expansions)
475     const char	*word;		/* the word to expand */
476     const char  *end;		/* end of word */
477     Lst 	path;		/* the list of directories in which to find
478 				 * the resulting files */
479     Lst 	expansions;	/* the list on which to place the results */
480 {
481     const char	*cp;
482     const char	*slash; 	/* keep track of first slash before wildcard */
483 
484     slash = memchr(word, '/', end - word);
485     if (slash == NULL) {
486 	/* First the files in dot.  */
487 	DirMatchFilesi(word, end, dot, expansions);
488 
489 	/* Then the files in every other directory on the path.  */
490 	PathMatchFilesi(word, end, path, expansions);
491 	return;
492     }
493     /* The thing has a directory component -- find the first wildcard
494      * in the string.  */
495     slash = word;
496     for (cp = word; cp != end; cp++) {
497 	if (*cp == '/')
498 	    slash = cp;
499 	if (*cp == '?' || *cp == '[' || *cp == '*') {
500 
501 	    if (slash != word) {
502 		char	*dirpath;
503 
504 		/* If the glob isn't in the first component, try and find
505 		 * all the components up to the one with a wildcard.  */
506 		dirpath = Dir_FindFilei(word, slash+1, path);
507 		/* dirpath is null if we can't find the leading component
508 		 * XXX: Dir_FindFile won't find internal components.
509 		 * i.e. if the path contains ../Etc/Object and we're
510 		 * looking for Etc, it won't be found. */
511 		if (dirpath != NULL) {
512 		    char *dp;
513 		    LIST temp;
514 
515 		    dp = strchr(dirpath, '\0');
516 		    while (dp > dirpath && dp[-1] == '/')
517 		    	dp--;
518 
519 		    Lst_Init(&temp);
520 		    Dir_AddDiri(&temp, dirpath, dp);
521 		    PathMatchFilesi(slash+1, end, &temp, expansions);
522 		    Lst_Destroy(&temp, NOFREE);
523 		}
524 	    } else
525 		/* Start the search from the local directory.  */
526 		PathMatchFilesi(word, end, path, expansions);
527 	    return;
528 	}
529     }
530     /* Return the file -- this should never happen.  */
531     PathMatchFilesi(word, end, path, expansions);
532 }
533 
534 /*-
535  *-----------------------------------------------------------------------
536  * DirExpandCurly --
537  *	Expand curly braces like the C shell, and other wildcards as per
538  *	Str_Match.
539  *	XXX: if curly expansion yields a result with
540  *	no wildcards, the result is placed on the list WITHOUT CHECKING
541  *	FOR ITS EXISTENCE.
542  *-----------------------------------------------------------------------
543  */
544 static void
545 DirExpandCurlyi(word, endw, path, expansions)
546     const char	*word;		/* Entire word to expand */
547     const char  *endw;		/* End of word */
548     Lst 	path;		/* Search path to use */
549     Lst 	expansions;	/* Place to store the expansions */
550 {
551     const char	*cp2;		/* Pointer for checking for wildcards in
552 				 * expansion before calling Dir_Expand */
553     LIST	curled; 	/* Queue of words to expand */
554     char	*toexpand;	/* Current word to expand */
555     bool	dowild; 	/* Wildcard left after curlies ? */
556 
557     /* Determine once and for all if there is something else going on */
558     dowild = false;
559     for (cp2 = word; cp2 != endw; cp2++)
560 	if (*cp2 == '*' || *cp2 == '?' || *cp2 == '[') {
561 		dowild = true;
562 		break;
563 	}
564 
565     /* Prime queue with copy of initial word */
566     Lst_Init(&curled);
567     Lst_EnQueue(&curled, Str_dupi(word, endw));
568     while ((toexpand = (char *)Lst_DeQueue(&curled)) != NULL) {
569 	const char	*brace;
570 	const char	*start; /* Start of current chunk of brace clause */
571 	const char	*end;	/* Character after the closing brace */
572 	int		bracelevel;
573 				/* Keep track of nested braces. If we hit
574 				 * the right brace with bracelevel == 0,
575 				 * this is the end of the clause. */
576 	size_t		otherLen;
577 				/* The length of the non-curlied part of
578 				 * the current expansion */
579 
580 	/* End case: no curly left to expand */
581 	brace = strchr(toexpand, '{');
582 	if (brace == NULL) {
583 	    if (dowild) {
584 		DirExpandWild(toexpand, path, expansions);
585 		free(toexpand);
586 	    } else
587 		Lst_AtEnd(expansions, toexpand);
588 	    continue;
589 	}
590 
591 	start = brace+1;
592 
593 	/* Find the end of the brace clause first, being wary of nested brace
594 	 * clauses.  */
595 	for (end = start, bracelevel = 0;; end++) {
596 	    if (*end == '{')
597 		bracelevel++;
598 	    else if (*end == '\0') {
599 		Error("Unterminated {} clause \"%s\"", start);
600 		return;
601 	    } else if (*end == '}' && bracelevel-- == 0)
602 		break;
603 	}
604 	end++;
605 	otherLen = brace - toexpand + strlen(end);
606 
607 	for (;;) {
608 	    char	*file;	/* To hold current expansion */
609 	    const char	*cp;	/* Current position in brace clause */
610 
611 	    /* Find the end of the current expansion */
612 	    for (bracelevel = 0, cp = start;
613 	    	bracelevel != 0 || (*cp != '}' && *cp != ','); cp++) {
614 		if (*cp == '{')
615 		    bracelevel++;
616 		else if (*cp == '}')
617 		    bracelevel--;
618 	    }
619 
620 	    /* Build the current combination and enqueue it.  */
621 	    file = emalloc(otherLen + cp - start + 1);
622 	    if (brace != toexpand)
623 	    	memcpy(file, toexpand, brace-toexpand);
624 	    if (cp != start)
625 	    	memcpy(file+(brace-toexpand), start, cp-start);
626 	    strcpy(file+(brace-toexpand)+(cp-start), end);
627 	    Lst_EnQueue(&curled, file);
628 	    if (*cp == '}')
629 	    	break;
630 	    start = cp+1;
631 	}
632 	free(toexpand);
633     }
634 }
635 
636 /* Side effects:
637  * 	Dir_Expandi will hash directories that were not yet visited */
638 void
639 Dir_Expandi(word, end, path, expansions)
640     const char	*word;		/* the word to expand */
641     const char  *end;		/* end of word */
642     Lst 	path;		/* the list of directories in which to find
643 				 * the resulting files */
644     Lst 	expansions;	/* the list on which to place the results */
645 {
646     const char	*cp;
647 
648     if (DEBUG(DIR)) {
649     	char *s = Str_dupi(word, end);
650 	printf("expanding \"%s\"...", s);
651 	free(s);
652     }
653 
654     cp = memchr(word, '{', end - word);
655     if (cp)
656 	DirExpandCurlyi(word, end, path, expansions);
657     else
658 	DirExpandWildi(word, end, path, expansions);
659 
660     if (DEBUG(DIR)) {
661 	Lst_Every(expansions, DirPrintWord);
662 	fputc('\n', stdout);
663     }
664 }
665 
666 
667 /*-
668  * Side Effects:
669  *	If the file is found in a directory which is not on the path
670  *	already (either 'name' is absolute or it is a relative path
671  *	[ dir1/.../dirn/file ] which exists below one of the directories
672  *	already on the search path), its directory is added to the end
673  *	of the path on the assumption that there will be more files in
674  *	that directory later on.
675  */
676 char *
677 Dir_FindFilei(name, end, path)
678     const char		*name;
679     const char		*end;
680     Lst 		path;
681 {
682     Path		*p;	/* current path member */
683     char		*p1;	/* pointer into p->name */
684     const char		*p2;	/* pointer into name */
685     LstNode		ln;	/* a list element */
686     char		*file;	/* the current filename to check */
687     char		*temp;	/* index into file */
688     const char		*cp;	/* index of first slash, if any */
689     bool		hasSlash;
690     struct stat 	stb;	/* Buffer for stat, if necessary */
691     struct file_stamp	*entry; /* Entry for mtimes table */
692     u_int32_t		hv;	/* hash value for last component in file name */
693     char		*q;	/* Str_dupi(name, end) */
694 
695     /* Find the final component of the name and note whether name has a
696      * slash in it */
697     cp = Str_rchri(name, end, '/');
698     if (cp) {
699 	hasSlash = true;
700 	cp++;
701     } else {
702 	hasSlash = false;
703 	cp = name;
704     }
705 
706     hv = ohash_interval(cp, &end);
707 
708     if (DEBUG(DIR))
709 	printf("Searching for %s...", name);
710     /* No matter what, we always look for the file in the current directory
711      * before anywhere else and we always return exactly what the caller
712      * specified. */
713     if ((!hasSlash || (cp - name == 2 && *name == '.')) &&
714 	find_file_hashi(dot, cp, end, hv) != NULL) {
715 	    if (DEBUG(DIR))
716 		printf("in '.'\n");
717 #ifdef DEBUG_DIRECTORY_CACHE
718 	    hits++;
719 	    dot->hits++;
720 #endif
721 	    return Str_dupi(name, end);
722     }
723 
724     /* Then, we look through all the directories on path, seeking one
725      * containing the final component of name and whose final
726      * component(s) match name's initial component(s).
727      * If found, we concatenate the directory name and the
728      * final component and return the resulting string.  */
729     for (ln = Lst_First(path); ln != NULL; ln = Lst_Adv(ln)) {
730 	p = (Path *)Lst_Datum(ln);
731 	if (DEBUG(DIR))
732 	    printf("%s...", p->name);
733 	if (find_file_hashi(p, cp, end, hv) != NULL) {
734 	    if (DEBUG(DIR))
735 		printf("here...");
736 	    if (hasSlash) {
737 		/* If the name had a slash, its initial components and p's
738 		 * final components must match. This is false if a mismatch
739 		 * is encountered before all of the initial components
740 		 * have been checked (p2 > name at the end of the loop), or
741 		 * we matched only part of one of the components of p
742 		 * along with all the rest of them (*p1 != '/').  */
743 		p1 = p->name + strlen(p->name) - 1;
744 		p2 = cp - 2;
745 		while (p2 >= name && p1 >= p->name && *p1 == *p2) {
746 		    p1--;
747 		    p2--;
748 		}
749 		if (p2 >= name || (p1 >= p->name && *p1 != '/')) {
750 		    if (DEBUG(DIR))
751 			printf("component mismatch -- continuing...");
752 		    continue;
753 		}
754 	    }
755 	    file = Str_concati(p->name, strchr(p->name, '\0'), cp, end, '/');
756 	    if (DEBUG(DIR))
757 		printf("returning %s\n", file);
758 #ifdef DEBUG_DIRECTORY_CACHE
759 	    p->hits++;
760 	    hits++;
761 #endif
762 	    return file;
763 	} else if (hasSlash) {
764 	    /* If the file has a leading path component and that component
765 	     * exactly matches the entire name of the current search
766 	     * directory, we assume the file doesn't exist and return NULL.  */
767 	    for (p1 = p->name, p2 = name; *p1 && *p1 == *p2; p1++, p2++)
768 		continue;
769 	    if (*p1 == '\0' && p2 == cp - 1) {
770 		if (DEBUG(DIR))
771 		    printf("has to be here but isn't -- returning NULL\n");
772 		return NULL;
773 	    }
774 	}
775     }
776 
777     /* We didn't find the file on any existing member of the path.
778      * If the name doesn't contain a slash, end of story.
779      * If it does contain a slash, however, it could be in a subdirectory
780      * of one of the members of the search path. (eg., for path=/usr/include
781      * and name=sys/types.h, the above search fails to turn up types.h
782      * in /usr/include, even though /usr/include/sys/types.h exists).
783      *
784      * We only perform this look-up for non-absolute file names.
785      *
786      * Whenever we score a hit, we assume there will be more matches from
787      * that directory, and append all but the last component of the
788      * resulting name onto the search path. */
789     if (!hasSlash) {
790 	if (DEBUG(DIR))
791 	    printf("failed.\n");
792 #ifdef DEBUG_DIRECTORY_CACHE
793 	misses++;
794 #endif
795 	return NULL;
796     }
797 
798     if (*name != '/') {
799 	bool checkedDot = false;
800 
801 	if (DEBUG(DIR))
802 	    printf("failed. Trying subdirectories...");
803 	for (ln = Lst_First(path); ln != NULL; ln = Lst_Adv(ln)) {
804 	    p = (Path *)Lst_Datum(ln);
805 	    if (p != dot)
806 		file = Str_concati(p->name, strchr(p->name, '\0'), name, end, '/');
807 	    else {
808 		/* Checking in dot -- DON'T put a leading ./ on the thing.  */
809 		file = Str_dupi(name, end);
810 		checkedDot = true;
811 	    }
812 	    if (DEBUG(DIR))
813 		printf("checking %s...", file);
814 
815 	    if (stat(file, &stb) == 0) {
816 		TIMESTAMP mtime;
817 
818 		ts_set_from_stat(stb, mtime);
819 		if (DEBUG(DIR))
820 		    printf("got it.\n");
821 
822 		/* We've found another directory to search. We know there
823 		 * is a slash in 'file'. We call Dir_AddDiri to add the
824 		 * new directory onto the existing search path. Once
825 		 * that's done, we return the file name, knowing that
826 		 * should a file in this directory ever be referenced again
827 		 * in such a manner, we will find it without having to do
828 		 * numerous access calls.  */
829 		temp = strrchr(file, '/');
830 		Dir_AddDiri(path, file, temp);
831 
832 		/* Save the modification time so if it's needed, we don't have
833 		 * to fetch it again.  */
834 		if (DEBUG(DIR))
835 		    printf("Caching %s for %s\n", Targ_FmtTime(mtime),
836 			    file);
837 		record_stamp(file, mtime);
838 #ifdef DEBUG_DIRECTORY_CACHE
839 		nearmisses++;
840 #endif
841 		return file;
842 	    } else
843 		free(file);
844 	}
845 
846 	if (DEBUG(DIR))
847 	    printf("failed. ");
848 
849 	if (checkedDot) {
850 	    /* Already checked by the given name, since . was in the path,
851 	     * so no point in proceeding...  */
852 	    if (DEBUG(DIR))
853 		printf("Checked . already, returning NULL\n");
854 	    return NULL;
855 	}
856     }
857 
858     /* Didn't find it that way, either. Last resort: look for the file
859      * in the global mtime cache, then on the disk.
860      * If this doesn't succeed, we finally return a NULL pointer.
861      *
862      * We cannot add this directory onto the search path because
863      * of this amusing case:
864      * $(INSTALLDIR)/$(FILE): $(FILE)
865      *
866      * $(FILE) exists in $(INSTALLDIR) but not in the current one.
867      * When searching for $(FILE), we will find it in $(INSTALLDIR)
868      * b/c we added it here. This is not good...  */
869     q = Str_dupi(name, end);
870     if (DEBUG(DIR))
871 	printf("Looking for \"%s\"...", q);
872 
873 #ifdef DEBUG_DIRECTORY_CACHE
874     bigmisses++;
875 #endif
876     entry = find_stampi(name, end);
877     if (entry != NULL) {
878 	if (DEBUG(DIR))
879 	    printf("got it (in mtime cache)\n");
880 	return q;
881     } else if (stat(q, &stb) == 0) {
882 	TIMESTAMP mtime;
883 
884 	ts_set_from_stat(stb, mtime);
885 	if (DEBUG(DIR))
886 	    printf("Caching %s for %s\n", Targ_FmtTime(mtime),
887 		    q);
888 	record_stamp(q, mtime);
889 	return q;
890     } else {
891 	if (DEBUG(DIR))
892 	    printf("failed. Returning NULL\n");
893 	free(q);
894 	return NULL;
895     }
896 }
897 
898 /* Read a directory, either from the disk, or from the cache.  */
899 static Path *
900 DirReaddiri(name, end)
901     const char		*name;
902     const char		*end;
903 {
904     Path		*p;	/* pointer to new Path structure */
905     DIR 		*d;	/* for reading directory */
906     struct dirent	*dp;	/* entry in directory */
907     unsigned int	slot;
908 
909     slot = ohash_qlookupi(&openDirectories, name, &end);
910     p = ohash_find(&openDirectories, slot);
911 
912     if (p != NULL)
913 	return p;
914 
915     p = ohash_create_entry(&dir_info, name, &end);
916 #ifdef DEBUG_DIRECTORY_CACHE
917     p->hits = 0;
918 #endif
919     p->refCount = 0;
920     ohash_init(&p->files, 4, &file_info);
921 
922     if (DEBUG(DIR)) {
923 	printf("Caching %s...", p->name);
924 	fflush(stdout);
925     }
926 
927     if ((d = opendir(p->name)) == NULL)
928 	return NULL;
929     /* Skip the first two entries -- these will *always* be . and ..  */
930     (void)readdir(d);
931     (void)readdir(d);
932 
933     while ((dp = readdir(d)) != NULL) {
934 #if defined(sun) && defined(d_ino) /* d_ino is a sunos4 #define for d_fileno */
935 	/* The sun directory library doesn't check for a 0 inode
936 	 * (0-inode slots just take up space), so we have to do
937 	 * it ourselves.  */
938 	if (dp->d_fileno == 0)
939 	    continue;
940 #endif /* sun && d_ino */
941 	add_file(p, dp->d_name);
942     }
943     (void)closedir(d);
944     if (DEBUG(DIR))
945 	printf("done\n");
946 
947     ohash_insert(&openDirectories, slot, p);
948     return p;
949 }
950 
951 /*-
952  *-----------------------------------------------------------------------
953  * Dir_AddDiri --
954  *	Add the given name to the end of the given path. The order of
955  *	the arguments is backwards so ParseDoDependency can do a
956  *	Lst_ForEach of its list of paths...
957  *
958  * Side Effects:
959  *	A structure is added to the list and the directory is
960  *	read and hashed.
961  *-----------------------------------------------------------------------
962  */
963 
964 void
965 Dir_AddDiri(path, name, end)
966     Lst 	path;	/* the path to which the directory should be added */
967     const char	*name;	/* the name of the directory to add */
968     const char	*end;
969 {
970     Path	*p;	/* pointer to new Path structure */
971 
972     p = DirReaddiri(name, end);
973     if (p == NULL)
974 	return;
975     if (p->refCount == 0)
976 	Lst_AtEnd(path, p);
977     else if (!Lst_AddNew(path, p))
978 	return;
979     p->refCount++;
980 }
981 
982 /*-
983  *-----------------------------------------------------------------------
984  * Dir_CopyDir --
985  *	Callback function for duplicating a search path via Lst_Duplicate.
986  *	Ups the reference count for the directory.
987  *
988  * Results:
989  *	Returns the Path it was given.
990  *
991  * Side Effects:
992  *	The refCount of the path is incremented.
993  *-----------------------------------------------------------------------
994  */
995 void *
996 Dir_CopyDir(p)
997     void *p;
998 {
999     ((Path *)p)->refCount++;
1000     return p;
1001 }
1002 
1003 /*-
1004  *-----------------------------------------------------------------------
1005  * Dir_MakeFlags --
1006  *	Make a string by taking all the directories in the given search
1007  *	path and preceding them by the given flag. Used by the suffix
1008  *	module to create variables for compilers based on suffix search
1009  *	paths.
1010  *
1011  * Results:
1012  *	The string mentioned above. Note that there is no space between
1013  *	the given flag and each directory. The empty string is returned if
1014  *	Things don't go well.
1015  *-----------------------------------------------------------------------
1016  */
1017 char *
1018 Dir_MakeFlags(flag, path)
1019     const char	  *flag;  /* flag which should precede each directory */
1020     Lst 	  path;   /* list of directories */
1021 {
1022     LstNode	  ln;	  /* the node of the current directory */
1023     BUFFER	  buf;
1024 
1025     Buf_Init(&buf, 0);
1026 
1027     for (ln = Lst_First(path); ln != NULL; ln = Lst_Adv(ln)) {
1028 	    Buf_AddString(&buf, flag);
1029 	    Buf_AddString(&buf, ((Path *)Lst_Datum(ln))->name);
1030 	    Buf_AddSpace(&buf);
1031     }
1032 
1033     return Buf_Retrieve(&buf);
1034 }
1035 
1036 /*-
1037  *-----------------------------------------------------------------------
1038  * Dir_Destroy --
1039  *	Nuke a directory descriptor, if possible. Callback procedure
1040  *	for the suffixes module when destroying a search path.
1041  *
1042  * Side Effects:
1043  *	If no other path references this directory (refCount == 0),
1044  *	the Path and all its data are freed.
1045  *-----------------------------------------------------------------------
1046  */
1047 void
1048 Dir_Destroy(pp)
1049     void	*pp;		/* The directory descriptor to nuke */
1050 {
1051     Path	*p = (Path *)pp;
1052 
1053     if (--p->refCount == 0) {
1054 	ohash_remove(&openDirectories, ohash_qlookup(&openDirectories, p->name));
1055 	free_hash(&p->files);
1056 	free(p);
1057     }
1058 }
1059 
1060 /*-
1061  *-----------------------------------------------------------------------
1062  * Dir_Concat --
1063  *	Concatenate two paths, adding the second to the end of the first.
1064  *	Makes sure to avoid duplicates.
1065  *
1066  * Side Effects:
1067  *	Reference counts for added dirs are upped.
1068  *-----------------------------------------------------------------------
1069  */
1070 void
1071 Dir_Concat(path1, path2)
1072     Lst 	path1;		/* Dest */
1073     Lst 	path2;		/* Source */
1074 {
1075     LstNode	ln;
1076     Path	*p;
1077 
1078     for (ln = Lst_First(path2); ln != NULL; ln = Lst_Adv(ln)) {
1079 	p = (Path *)Lst_Datum(ln);
1080 	if (Lst_AddNew(path1, p))
1081 	    p->refCount++;
1082     }
1083 }
1084 
1085 #ifdef DEBUG_DIRECTORY_CACHE
1086 void
1087 Dir_PrintDirectories()
1088 {
1089     Path		*p;
1090     unsigned int	i;
1091 
1092     printf("#*** Directory Cache:\n");
1093     printf("# Stats: %d hits %d misses %d near misses %d losers (%d%%)\n",
1094 	      hits, misses, nearmisses, bigmisses,
1095 	      (hits+bigmisses+nearmisses ?
1096 	       hits * 100 / (hits + bigmisses + nearmisses) : 0));
1097     printf("# %-20s referenced\thits\n", "directory");
1098     for (p = ohash_first(&openDirectories, &i); p != NULL;
1099 	p = ohash_next(&openDirectories, &i))
1100 	    printf("# %-20s %10d\t%4d\n", p->name, p->refCount, p->hits);
1101 }
1102 #endif
1103 
1104 static void
1105 DirPrintDir(p)
1106     void	*p;
1107 {
1108     printf("%s ", ((Path *)p)->name);
1109 }
1110 
1111 void
1112 Dir_PrintPath(path)
1113     Lst path;
1114 {
1115     Lst_Every(path, DirPrintDir);
1116 }
1117 
1118 TIMESTAMP
1119 Dir_MTime(gn)
1120     GNode	  *gn;	      /* the file whose modification time is
1121 			       * desired */
1122 {
1123     char	  *fullName;  /* the full pathname of name */
1124     struct stat   stb;	      /* buffer for finding the mod time */
1125     struct file_stamp
1126 		  *entry;
1127     unsigned int  slot;
1128     TIMESTAMP	  mtime;
1129 
1130     if (gn->type & OP_ARCHV)
1131 	return Arch_MTime(gn);
1132 
1133     if (gn->path == NULL) {
1134 	fullName = Dir_FindFile(gn->name, dirSearchPath);
1135 	if (fullName == NULL)
1136 	    fullName = estrdup(gn->name);
1137     } else
1138 	fullName = gn->path;
1139 
1140     slot = ohash_qlookup(&mtimes, fullName);
1141     entry = ohash_find(&mtimes, slot);
1142     if (entry != NULL) {
1143 	/* Only do this once -- the second time folks are checking to
1144 	 * see if the file was actually updated, so we need to actually go
1145 	 * to the file system.	*/
1146 	if (DEBUG(DIR))
1147 	    printf("Using cached time %s for %s\n",
1148 		    Targ_FmtTime(entry->mtime), fullName);
1149 	mtime = entry->mtime;
1150 	free(entry);
1151 	ohash_remove(&mtimes, slot);
1152     } else if (stat(fullName, &stb) == 0)
1153 	ts_set_from_stat(stb, mtime);
1154     else {
1155 	if (gn->type & OP_MEMBER) {
1156 	    if (fullName != gn->path)
1157 		free(fullName);
1158 	    return Arch_MemMTime(gn);
1159 	} else
1160 	    ts_set_out_of_date(mtime);
1161     }
1162     if (fullName && gn->path == NULL)
1163 	gn->path = fullName;
1164 
1165     gn->mtime = mtime;
1166     return gn->mtime;
1167 }
1168 
1169