xref: /openbsd-src/usr.bin/make/direxpand.c (revision c9fc29cfc69a20b8ffa3404f847e4bf84cf4130a)
1*c9fc29cfSespie /*	$OpenBSD: direxpand.c,v 1.9 2023/09/04 11:35:11 espie Exp $ */
2267e030aSespie /*
3267e030aSespie  * Copyright (c) 1999,2007 Marc Espie.
4267e030aSespie  *
5267e030aSespie  * Extensive code changes for the OpenBSD project.
6267e030aSespie  *
7267e030aSespie  * Redistribution and use in source and binary forms, with or without
8267e030aSespie  * modification, are permitted provided that the following conditions
9267e030aSespie  * are met:
10267e030aSespie  * 1. Redistributions of source code must retain the above copyright
11267e030aSespie  *    notice, this list of conditions and the following disclaimer.
12267e030aSespie  * 2. Redistributions in binary form must reproduce the above copyright
13267e030aSespie  *    notice, this list of conditions and the following disclaimer in the
14267e030aSespie  *    documentation and/or other materials provided with the distribution.
15267e030aSespie  *
16267e030aSespie  * THIS SOFTWARE IS PROVIDED BY THE OPENBSD PROJECT AND CONTRIBUTORS
17267e030aSespie  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18267e030aSespie  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19267e030aSespie  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OPENBSD
20267e030aSespie  * PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21267e030aSespie  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22267e030aSespie  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23267e030aSespie  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24267e030aSespie  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25267e030aSespie  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26267e030aSespie  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27267e030aSespie  */
28267e030aSespie /*
29267e030aSespie  * Copyright (c) 1988, 1989, 1990 The Regents of the University of California.
30267e030aSespie  * Copyright (c) 1988, 1989 by Adam de Boor
31267e030aSespie  * Copyright (c) 1989 by Berkeley Softworks
32267e030aSespie  * All rights reserved.
33267e030aSespie  *
34267e030aSespie  * This code is derived from software contributed to Berkeley by
35267e030aSespie  * Adam de Boor.
36267e030aSespie  *
37267e030aSespie  * Redistribution and use in source and binary forms, with or without
38267e030aSespie  * modification, are permitted provided that the following conditions
39267e030aSespie  * are met:
40267e030aSespie  * 1. Redistributions of source code must retain the above copyright
41267e030aSespie  *    notice, this list of conditions and the following disclaimer.
42267e030aSespie  * 2. Redistributions in binary form must reproduce the above copyright
43267e030aSespie  *    notice, this list of conditions and the following disclaimer in the
44267e030aSespie  *    documentation and/or other materials provided with the distribution.
45267e030aSespie  * 3. Neither the name of the University nor the names of its contributors
46267e030aSespie  *    may be used to endorse or promote products derived from this software
47267e030aSespie  *    without specific prior written permission.
48267e030aSespie  *
49267e030aSespie  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50267e030aSespie  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51267e030aSespie  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52267e030aSespie  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53267e030aSespie  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54267e030aSespie  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55267e030aSespie  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56267e030aSespie  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57267e030aSespie  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58267e030aSespie  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59267e030aSespie  * SUCH DAMAGE.
60267e030aSespie  */
61267e030aSespie 
62267e030aSespie #include <stdio.h>
63267e030aSespie #include <stdlib.h>
64267e030aSespie #include <string.h>
65267e030aSespie #include "defines.h"
66267e030aSespie #include "lst.h"
67267e030aSespie #include "dir.h"
68267e030aSespie #include "direxpand.h"
69267e030aSespie #include "error.h"
70267e030aSespie #include "memory.h"
71267e030aSespie #include "str.h"
72267e030aSespie 
73267e030aSespie /* Handles simple wildcard expansion on a path. */
74267e030aSespie static void PathMatchFilesi(const char *, const char *, Lst, Lst);
75267e030aSespie /* Handles wildcards expansion except for curly braces. */
76267e030aSespie static void DirExpandWildi(const char *, const char *, Lst, Lst);
77267e030aSespie #define DirExpandWild(s, l1, l2) DirExpandWildi(s, strchr(s, '\0'), l1, l2)
78267e030aSespie /* Handles wildcard expansion including curly braces. */
79267e030aSespie static void DirExpandCurlyi(const char *, const char *, Lst, Lst);
80267e030aSespie 
81267e030aSespie /* Debugging: show each word in an expansion list. */
82267e030aSespie static void DirPrintWord(void *);
83267e030aSespie 
84267e030aSespie /*-
85267e030aSespie  *-----------------------------------------------------------------------
86267e030aSespie  * PathMatchFilesi --
87267e030aSespie  *	Traverse directories in the path, calling Dir_MatchFiles for each.
88267e030aSespie  *	NOTE: This doesn't handle patterns in directories.
89267e030aSespie  *-----------------------------------------------------------------------
90267e030aSespie  */
91267e030aSespie static void
PathMatchFilesi(const char * word,const char * eword,Lst path,Lst expansions)92267e030aSespie PathMatchFilesi(const char *word, const char *eword, Lst path, Lst expansions)
93267e030aSespie {
94267e030aSespie 	LstNode	ln;		/* Current node */
95267e030aSespie 
96267e030aSespie 	for (ln = Lst_First(path); ln != NULL; ln = Lst_Adv(ln))
97568f6c05Sespie 		Dir_MatchFilesi(word, eword, Lst_Datum(ln), expansions);
98267e030aSespie }
99267e030aSespie 
100267e030aSespie /*-
101267e030aSespie  *-----------------------------------------------------------------------
102267e030aSespie  * DirExpandWildi:
103267e030aSespie  *	Expand all wild cards in a fully qualified name, except for
104267e030aSespie  *	curly braces.
105267e030aSespie  * Side-effect:
106267e030aSespie  *	Will hash any directory in which a file is found, and add it to
107267e030aSespie  *	the path, on the assumption that future lookups will find files
108267e030aSespie  *	there as well.
109267e030aSespie  *-----------------------------------------------------------------------
110267e030aSespie  */
111267e030aSespie static void
DirExpandWildi(const char * word,const char * eword,Lst path,Lst expansions)112267e030aSespie DirExpandWildi(const char *word, const char *eword, Lst path, Lst expansions)
113267e030aSespie {
114267e030aSespie 	const char *cp;
115267e030aSespie 	const char *slash; /* keep track of first slash before wildcard */
116267e030aSespie 
117267e030aSespie 	slash = memchr(word, '/', eword - word);
118267e030aSespie 	if (slash == NULL) {
119267e030aSespie 		/* First the files in dot.  */
120267e030aSespie 		Dir_MatchFilesi(word, eword, dot, expansions);
121267e030aSespie 
122267e030aSespie 		/* Then the files in every other directory on the path.  */
123267e030aSespie 		PathMatchFilesi(word, eword, path, expansions);
124267e030aSespie 		return;
125267e030aSespie 	}
126267e030aSespie 	/* The thing has a directory component -- find the first wildcard
127267e030aSespie 	 * in the string.  */
128267e030aSespie 	slash = word;
129267e030aSespie 	for (cp = word; cp != eword; cp++) {
130267e030aSespie 		if (*cp == '/')
131267e030aSespie 			slash = cp;
132267e030aSespie 		if (*cp == '?' || *cp == '[' || *cp == '*') {
133267e030aSespie 
134267e030aSespie 			if (slash != word) {
135267e030aSespie 				char	*dirpath;
136267e030aSespie 
137267e030aSespie 				/* If the glob isn't in the first component,
138267e030aSespie 				 * try and find all the components up to
139267e030aSespie 				 * the one with a wildcard.  */
140267e030aSespie 				dirpath = Dir_FindFilei(word, slash+1, path);
141267e030aSespie 				/* dirpath is null if we can't find the
142267e030aSespie 				 * leading component
143267e030aSespie 				 * XXX: Dir_FindFile won't find internal
144267e030aSespie 				 * components.  i.e. if the path contains
145267e030aSespie 				 * ../Etc/Object and we're looking for Etc,
146267e030aSespie 				 * it won't be found. */
147267e030aSespie 				if (dirpath != NULL) {
148267e030aSespie 					char *dp;
149267e030aSespie 					LIST temp;
150267e030aSespie 
151267e030aSespie 					dp = strchr(dirpath, '\0');
152267e030aSespie 					while (dp > dirpath && dp[-1] == '/')
153267e030aSespie 						dp--;
154267e030aSespie 
155267e030aSespie 					Lst_Init(&temp);
156267e030aSespie 					Dir_AddDiri(&temp, dirpath, dp);
157267e030aSespie 					PathMatchFilesi(slash+1, eword, &temp,
158267e030aSespie 					    expansions);
159267e030aSespie 					Lst_Destroy(&temp, NOFREE);
160267e030aSespie 				}
161267e030aSespie 			} else
162267e030aSespie 				/* Start the search from the local directory. */
163267e030aSespie 				PathMatchFilesi(word, eword, path, expansions);
164267e030aSespie 			return;
165267e030aSespie 		}
166267e030aSespie 	}
167267e030aSespie 	/* Return the file -- this should never happen.  */
168267e030aSespie 	PathMatchFilesi(word, eword, path, expansions);
169267e030aSespie }
170267e030aSespie 
171267e030aSespie /*-
172267e030aSespie  *-----------------------------------------------------------------------
173267e030aSespie  * DirExpandCurly --
174267e030aSespie  *	Expand curly braces like the C shell, and other wildcards as per
175267e030aSespie  *	Str_Match.
176267e030aSespie  *	XXX: if curly expansion yields a result with
177267e030aSespie  *	no wildcards, the result is placed on the list WITHOUT CHECKING
178267e030aSespie  *	FOR ITS EXISTENCE.
179267e030aSespie  *-----------------------------------------------------------------------
180267e030aSespie  */
181267e030aSespie static void
DirExpandCurlyi(const char * word,const char * eword,Lst path,Lst expansions)182267e030aSespie DirExpandCurlyi(const char *word, const char *eword, Lst path, Lst expansions)
183267e030aSespie {
184267e030aSespie 	const char *cp2;/* Pointer for checking for wildcards in
185267e030aSespie 			 * expansion before calling Dir_Expand */
186267e030aSespie 	LIST curled; 	/* Queue of words to expand */
187267e030aSespie 	char *toexpand;	/* Current word to expand */
188267e030aSespie 	bool dowild; 	/* Wildcard left after curlies ? */
189267e030aSespie 
190267e030aSespie 	/* Determine once and for all if there is something else going on */
191267e030aSespie 	dowild = false;
192267e030aSespie 	for (cp2 = word; cp2 != eword; cp2++)
193267e030aSespie 		if (*cp2 == '*' || *cp2 == '?' || *cp2 == '[') {
194267e030aSespie 			dowild = true;
195267e030aSespie 			break;
196267e030aSespie 		}
197267e030aSespie 
198267e030aSespie 	/* Prime queue with copy of initial word */
199267e030aSespie 	Lst_Init(&curled);
200267e030aSespie 	Lst_EnQueue(&curled, Str_dupi(word, eword));
201568f6c05Sespie 	while ((toexpand = Lst_DeQueue(&curled)) != NULL) {
202267e030aSespie 		const char *brace;
203267e030aSespie 		const char *start;
204267e030aSespie 				/* Start of current chunk of brace clause */
205267e030aSespie 		const char *end;/* Character after the closing brace */
206267e030aSespie 		int bracelevel; /* Keep track of nested braces. If we hit
207267e030aSespie 				 * the right brace with bracelevel == 0,
208267e030aSespie 				 * this is the end of the clause. */
209267e030aSespie 		size_t endLen;  /* The length of the ending non-curlied
210267e030aSespie 				 * part of the current expansion */
211267e030aSespie 
212267e030aSespie 		/* End case: no curly left to expand */
213267e030aSespie 		brace = strchr(toexpand, '{');
214267e030aSespie 		if (brace == NULL) {
215267e030aSespie 			if (dowild) {
216267e030aSespie 				DirExpandWild(toexpand, path, expansions);
217267e030aSespie 				free(toexpand);
218267e030aSespie 			} else
219267e030aSespie 				Lst_AtEnd(expansions, toexpand);
220267e030aSespie 			continue;
221267e030aSespie 		}
222267e030aSespie 
223267e030aSespie 		start = brace+1;
224267e030aSespie 
225267e030aSespie 		/* Find the end of the brace clause first, being wary of
226267e030aSespie 		 * nested brace clauses.  */
227267e030aSespie 		for (end = start, bracelevel = 0;; end++) {
228267e030aSespie 			if (*end == '{')
229267e030aSespie 				bracelevel++;
230267e030aSespie 			else if (*end == '\0') {
231267e030aSespie 				Error("Unterminated {} clause \"%s\"", start);
232267e030aSespie 				return;
233267e030aSespie 			} else if (*end == '}' && bracelevel-- == 0)
234267e030aSespie 				break;
235267e030aSespie 		}
236267e030aSespie 		end++;
237267e030aSespie 		endLen = strlen(end);
238267e030aSespie 
239267e030aSespie 		for (;;) {
240267e030aSespie 			char *file;	/* To hold current expansion */
241267e030aSespie 			const char *cp;	/* Current position in brace clause */
242267e030aSespie 
243267e030aSespie 			/* Find the end of the current expansion */
244267e030aSespie 			for (bracelevel = 0, cp = start;
245267e030aSespie 			    bracelevel != 0 || (*cp != '}' && *cp != ',');
246267e030aSespie 			    cp++) {
247267e030aSespie 				if (*cp == '{')
248267e030aSespie 					bracelevel++;
249267e030aSespie 				else if (*cp == '}')
250267e030aSespie 					bracelevel--;
251267e030aSespie 			}
252267e030aSespie 
253267e030aSespie 			/* Build the current combination and enqueue it.  */
254267e030aSespie 			file = emalloc((brace - toexpand) + (cp - start) +
255267e030aSespie 			    endLen + 1);
256267e030aSespie 			if (brace != toexpand)
257267e030aSespie 				memcpy(file, toexpand, brace-toexpand);
258267e030aSespie 			if (cp != start)
259267e030aSespie 				memcpy(file+(brace-toexpand), start, cp-start);
260267e030aSespie 			memcpy(file+(brace-toexpand)+(cp-start), end,
261267e030aSespie 			    endLen + 1);
262267e030aSespie 			Lst_EnQueue(&curled, file);
263267e030aSespie 			if (*cp == '}')
264267e030aSespie 				break;
265267e030aSespie 			start = cp+1;
266267e030aSespie 		}
267267e030aSespie 		free(toexpand);
268267e030aSespie 	}
269267e030aSespie }
270267e030aSespie 
271267e030aSespie /* Side effects:
272267e030aSespie  * 	Dir_Expandi will hash directories that were not yet visited */
273267e030aSespie void
Dir_Expandi(const char * word,const char * eword,Lst path,Lst expansions)274267e030aSespie Dir_Expandi(const char *word, const char *eword, Lst path, Lst expansions)
275267e030aSespie {
276267e030aSespie 	const char	*cp;
277267e030aSespie 
278267e030aSespie 	if (DEBUG(DIR)) {
279267e030aSespie 		char *s = Str_dupi(word, eword);
280267e030aSespie 		printf("expanding \"%s\"...", s);
281267e030aSespie 		free(s);
282267e030aSespie 	}
283267e030aSespie 
284267e030aSespie 	cp = memchr(word, '{', eword - word);
285267e030aSespie 	if (cp)
286267e030aSespie 		DirExpandCurlyi(word, eword, path, expansions);
287267e030aSespie 	else
288267e030aSespie 		DirExpandWildi(word, eword, path, expansions);
289267e030aSespie 
290267e030aSespie 	if (DEBUG(DIR)) {
291267e030aSespie 		Lst_Every(expansions, DirPrintWord);
292267e030aSespie 		fputc('\n', stdout);
293267e030aSespie 	}
294267e030aSespie }
295267e030aSespie 
296267e030aSespie static void
DirPrintWord(void * word)297267e030aSespie DirPrintWord(void *word)
298267e030aSespie {
2996eef43d0Sespie 	const char *s = word;
3007f5855dcSespie 	printf("%s ", s);
301267e030aSespie }
302267e030aSespie 
303267e030aSespie 
304267e030aSespie /* XXX: This code is not 100% correct ([^]] fails) */
305267e030aSespie bool
Dir_HasWildcardsi(const char * name,const char * ename)306267e030aSespie Dir_HasWildcardsi(const char *name, const char *ename)
307267e030aSespie {
308267e030aSespie 	const char *cp;
309267e030aSespie 	bool wild = false;
310267e030aSespie 	unsigned long brace = 0, bracket = 0;
311267e030aSespie 
312267e030aSespie 	for (cp = name; cp != ename; cp++) {
313267e030aSespie 		switch (*cp) {
314267e030aSespie 		case '{':
315267e030aSespie 			brace++;
316267e030aSespie 			wild = true;
317267e030aSespie 			break;
318267e030aSespie 		case '}':
319267e030aSespie 			if (brace == 0)
320267e030aSespie 				return false;
321267e030aSespie 			brace--;
322267e030aSespie 			break;
323267e030aSespie 		case '[':
324267e030aSespie 			bracket++;
325267e030aSespie 			wild = true;
326267e030aSespie 			break;
327267e030aSespie 		case ']':
328267e030aSespie 			if (bracket == 0)
329267e030aSespie 				return false;
330267e030aSespie 			bracket--;
331267e030aSespie 			break;
332267e030aSespie 		case '?':
333267e030aSespie 		case '*':
334267e030aSespie 			wild = true;
335267e030aSespie 			break;
336267e030aSespie 		default:
337267e030aSespie 			break;
338267e030aSespie 		}
339267e030aSespie 	}
340267e030aSespie 	return wild && bracket == 0 && brace == 0;
341267e030aSespie }
342267e030aSespie 
343267e030aSespie 
344