xref: /openbsd-src/usr.bin/make/str.c (revision db3296cf5c1dd9058ceecc3a29fe4aaa0bd26000)
1 /*	$OpenPackages$ */
2 /*	$OpenBSD: str.c,v 1.20 2003/06/03 02:56:12 millert Exp $	*/
3 /*	$NetBSD: str.c,v 1.13 1996/11/06 17:59:23 christos Exp $	*/
4 
5 /*-
6  * Copyright (c) 1988, 1989, 1990, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  * Copyright (c) 1989 by Berkeley Softworks
9  * All rights reserved.
10  *
11  * This code is derived from software contributed to Berkeley by
12  * Adam de Boor.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38 
39 #include <ctype.h>
40 #include <string.h>
41 #include "config.h"
42 #include "defines.h"
43 #include "str.h"
44 #include "memory.h"
45 #include "buf.h"
46 
47 char *
48 Str_concati(s1, e1, s2, e2, sep)
49     const char *s1, *e1, *s2, *e2;
50     int sep;
51 {
52     size_t len1, len2;
53     char *result;
54 
55     /* get the length of both strings */
56     len1 = e1 - s1;
57     len2 = e2 - s2;
58 
59     /* space for separator */
60     if (sep)
61 	len1++;
62     result = emalloc(len1 + len2 + 1);
63 
64     /* copy first string into place */
65     memcpy(result, s1, len1);
66 
67     /* add separator character */
68     if (sep)
69 	result[len1-1] = sep;
70 
71     /* copy second string plus EOS into place */
72     memcpy(result + len1, s2, len2);
73     result[len1+len2] = '\0';
74     return result;
75 }
76 
77 /*-
78  * brk_string --
79  *	Fracture a string into an array of words (as delineated by tabs or
80  *	spaces) taking quotation marks into account.  Leading tabs/spaces
81  *	are ignored.
82  *
83  * returns --
84  *	Pointer to the array of pointers to the words.	To make life easier,
85  *	the first word is always the value of the .MAKE variable.
86  */
87 char **
88 brk_string(str, store_argc, buffer)
89     const char *str;
90     int *store_argc;
91     char **buffer;
92 {
93     int argc;
94     char ch;
95     char inquote;
96     const char *p;
97     char *start, *t;
98     size_t len;
99     int argmax = 50;
100     size_t curlen = 0;
101     char **argv = emalloc((argmax + 1) * sizeof(char *));
102 
103     /* skip leading space chars. */
104     for (; *str == ' ' || *str == '\t'; ++str)
105 	continue;
106 
107     /* allocate room for a copy of the string */
108     if ((len = strlen(str) + 1) > curlen)
109 	*buffer = emalloc(curlen = len);
110 
111     /*
112      * copy the string; at the same time, parse backslashes,
113      * quotes and build the argument list.
114      */
115     argc = 0;
116     inquote = '\0';
117     for (p = str, start = t = *buffer;; ++p) {
118 	switch (ch = *p) {
119 	case '"':
120 	case '\'':
121 	    if (inquote) {
122 		if (inquote == ch)
123 		    inquote = '\0';
124 		else
125 		    break;
126 	    } else {
127 		inquote = ch;
128 		/* Don't miss "" or '' */
129 		if (start == NULL && p[1] == inquote) {
130 		    start = t + 1;
131 		    break;
132 		}
133 	    }
134 	    continue;
135 	case ' ':
136 	case '\t':
137 	case '\n':
138 	    if (inquote)
139 		break;
140 	    if (!start)
141 		continue;
142 	    /* FALLTHROUGH */
143 	case '\0':
144 	    /*
145 	     * end of a token -- make sure there's enough argv
146 	     * space and save off a pointer.
147 	     */
148 	    if (!start)
149 		goto done;
150 
151 	    *t++ = '\0';
152 	    if (argc == argmax) {
153 		argmax *= 2;		/* ramp up fast */
154 		argv = erealloc(argv, (argmax + 1) * sizeof(char *));
155 	    }
156 	    argv[argc++] = start;
157 	    start = NULL;
158 	    if (ch == '\n' || ch == '\0')
159 		goto done;
160 	    continue;
161 	case '\\':
162 	    switch (ch = *++p) {
163 	    case '\0':
164 	    case '\n':
165 		/* hmmm; fix it up as best we can */
166 		ch = '\\';
167 		--p;
168 		break;
169 	    case 'b':
170 		ch = '\b';
171 		break;
172 	    case 'f':
173 		ch = '\f';
174 		break;
175 	    case 'n':
176 		ch = '\n';
177 		break;
178 	    case 'r':
179 		ch = '\r';
180 		break;
181 	    case 't':
182 		ch = '\t';
183 		break;
184 	    }
185 		break;
186 	}
187 	if (!start)
188 	    start = t;
189 	*t++ = ch;
190     }
191 done:
192     argv[argc] = NULL;
193     *store_argc = argc;
194     return argv;
195 }
196 
197 
198 const char *
199 iterate_words(end)
200     const char	**end;
201 {
202     const char	*start, *p;
203     char	state = 0;
204     start = *end;
205 
206     while (isspace(*start))
207 	start++;
208     if (*start == '\0')
209 	return NULL;
210 
211     for (p = start;; p++)
212 	switch(*p) {
213 	    case '\\':
214 		if (p[1] != '\0')
215 		    p++;
216 		break;
217 	    case '\'':
218 	    case '"':
219 		if (state == *p)
220 		    state = 0;
221 		else if (state == 0)
222 		    state = *p;
223 		break;
224 	    case ' ':
225 	    case '\t':
226 		if (state != 0)
227 		    break;
228 		/* FALLTHROUGH */
229 	    case '\0':
230 		*end = p;
231 		return start;
232 	    default:
233 		break;
234 	    }
235 }
236 
237 bool
238 Str_Matchi(string, estring, pattern, end)
239     const char *string; 		/* String */
240     const char *estring;		/* End of string */
241     const char *pattern;		/* Pattern */
242     const char *end;			/* End of Pattern */
243 {
244     while (pattern != end) {
245 	/* Check for a "*" as the next pattern character.  It matches
246 	 * any substring.  We handle this by calling ourselves
247 	 * recursively for each postfix of string, until either we
248 	 * match or we reach the end of the string.  */
249 	if (*pattern == '*') {
250 	    pattern++;
251 	    /* Skip over contiguous  sequences of `?*', so that recursive
252 	     * calls only occur on `real' characters.  */
253 	    while (pattern != end && (*pattern == '?' || *pattern == '*')) {
254 		if (*pattern == '?') {
255 		    if (string == estring)
256 			return false;
257 		    else
258 			string++;
259 		}
260 		pattern++;
261 	    }
262 	    if (pattern == end)
263 		return true;
264 	    for (; string != estring; string++)
265 		if (Str_Matchi(string, estring, pattern, end))
266 		    return true;
267 	    return false;
268 	} else if (string == estring)
269 	    return false;
270 	/* Check for a "[" as the next pattern character.  It is
271 	 * followed by a list of characters that are acceptable, or
272 	 * by a range (two characters separated by "-").  */
273 	else if (*pattern == '[') {
274 	    pattern++;
275 	    if (pattern == end)
276 		return false;
277 	    if (*pattern == '!' || *pattern == '^') {
278 		pattern++;
279 		if (pattern == end)
280 			return false;
281 		/* Negative match */
282 		for (;;) {
283 		    if (*pattern == '\\') {
284 			if (++pattern == end)
285 			    return false;
286 		    }
287 		    if (*pattern == *string)
288 			return false;
289 		    if (pattern[1] == '-') {
290 			if (pattern + 2 == end)
291 			    return false;
292 			if (*pattern < *string && *string <= pattern[2])
293 			    return false;
294 			if (pattern[2] <= *string && *string < *pattern)
295 			    return false;
296 			pattern += 3;
297 		    } else
298 			pattern++;
299 		    if (pattern == end)
300 			return false;
301 		    /* The test for ']' is done at the end so that ']'
302 		     * can be used at the start of the range without '\' */
303 		    if (*pattern == ']')
304 			break;
305 		}
306 	    } else {
307 		for (;;) {
308 		    if (*pattern == '\\') {
309 			if (++pattern == end)
310 			    return false;
311 		    }
312 		    if (*pattern == *string)
313 			break;
314 		    if (pattern[1] == '-') {
315 			if (pattern + 2 == end)
316 			    return false;
317 			if (*pattern < *string && *string <= pattern[2])
318 			    break;
319 			if (pattern[2] <= *string && *string < *pattern)
320 			    break;
321 			pattern += 3;
322 		    } else
323 			pattern++;
324 		    /* The test for ']' is done at the end so that ']'
325 		     * can be used at the start of the range without '\' */
326 		    if (pattern == end || *pattern == ']')
327 			return false;
328 		}
329 		/* Found matching character, skip over rest of class.  */
330 		while (*pattern != ']') {
331 		    if (*pattern == '\\')
332 			pattern++;
333 		    /* A non-terminated character class is ok.	*/
334 		    if (pattern == end)
335 			break;
336 		    pattern++;
337 		}
338 	    }
339 	}
340 	/* '?' matches any single character, so shunt test.  */
341 	else if (*pattern != '?') {
342 	    /* If the next pattern character is '\', just strip off the
343 	     * '\' so we do exact matching on the character that follows.  */
344 	    if (*pattern == '\\') {
345 		if (++pattern == end)
346 		    return false;
347 	    }
348 	    /* There's no special character.  Just make sure that
349 	     * the next characters of each string match.  */
350 	    if (*pattern != *string)
351 		return false;
352 	}
353 	pattern++;
354 	string++;
355     }
356     if (string == estring)
357 	return true;
358     else
359 	return false;
360 }
361 
362 
363 /*-
364  *-----------------------------------------------------------------------
365  * Str_SYSVMatch --
366  *	Check word against pattern for a match (% is wild),
367  *
368  * Results:
369  *	Returns the beginning position of a match or null. The number
370  *	of characters matched is returned in len.
371  *-----------------------------------------------------------------------
372  */
373 const char *
374 Str_SYSVMatch(word, pattern, len)
375     const char	*word;		/* Word to examine */
376     const char	*pattern;	/* Pattern to examine against */
377     size_t	*len;		/* Number of characters to substitute */
378 {
379     const char *p = pattern;
380     const char *w = word;
381     const char *m;
382 
383     if (*p == '\0') {
384 	/* Null pattern is the whole string.  */
385 	*len = strlen(w);
386 	return w;
387     }
388 
389     if ((m = strchr(p, '%')) != NULL) {
390 	/* Check that the prefix matches.  */
391 	for (; p != m && *w && *w == *p; w++, p++)
392 	     continue;
393 
394 	if (p != m)
395 	    return NULL;	/* No match.  */
396 
397 	if (*++p == '\0') {
398 	    /* No more pattern, return the rest of the string.	*/
399 	    *len = strlen(w);
400 	    return w;
401 	}
402     }
403 
404     m = w;
405 
406     /* Find a matching tail.  */
407     do {
408 	if (strcmp(p, w) == 0) {
409 	    *len = w - m;
410 	    return m;
411 	}
412     } while (*w++ != '\0');
413 
414 
415     return NULL;
416 }
417 
418 
419 /*-
420  *-----------------------------------------------------------------------
421  * Str_SYSVSubst --
422  *	Substitute '%' on the pattern with len characters from src.
423  *	If the pattern does not contain a '%' prepend len characters
424  *	from src.
425  *
426  * Side Effects:
427  *	Places result on buf
428  *-----------------------------------------------------------------------
429  */
430 void
431 Str_SYSVSubst(buf, pat, src, len)
432     Buffer buf;
433     const char *pat;
434     const char *src;
435     size_t   len;
436 {
437     const char *m;
438 
439     if ((m = strchr(pat, '%')) != NULL) {
440 	/* Copy the prefix.  */
441 	Buf_Addi(buf, pat, m);
442 	/* Skip the %.	*/
443 	pat = m + 1;
444     }
445 
446     /* Copy the pattern.  */
447     Buf_AddChars(buf, len, src);
448 
449     /* Append the rest.  */
450     Buf_AddString(buf, pat);
451 }
452 
453 char *
454 Str_dupi(begin, end)
455     const char *begin;
456     const char *end;
457 {
458     char *s;
459 
460     s = emalloc(end - begin + 1);
461     memcpy(s, begin, end - begin);
462     s[end-begin] = '\0';
463     return s;
464 }
465 
466 char *
467 escape_dupi(begin, end, set)
468     const char *begin;
469     const char *end;
470     const char *set;
471 {
472     char *s, *t;
473 
474     t = s = emalloc(end - begin + 1);
475     while (begin != end) {
476 	if (*begin == '\\') {
477 	    begin++;
478 	    if (begin == end) {
479 		*t++ = '\\';
480 		break;
481 	    }
482 	    if (strchr(set, *begin) == NULL)
483 		*t++ = '\\';
484 	}
485 	*t++ = *begin++;
486     }
487     *t++ = '\0';
488     return s;
489 }
490 
491 char *
492 Str_rchri(s, e, c)
493     const char *s;
494     const char *e;
495     int c;
496 {
497     if (s != e)
498 	do {
499 	    if (*--e == c)
500 		return (char *)e;
501 	} while (e != s);
502     return NULL;
503 }
504