xref: /openbsd-src/usr.bin/make/str.c (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1 /*	$OpenPackages$ */
2 /*	$OpenBSD: str.c,v 1.19 2001/05/23 12:34:49 espie Exp $	*/
3 /*	$NetBSD: str.c,v 1.13 1996/11/06 17:59:23 christos Exp $	*/
4 
5 /*-
6  * Copyright (c) 1988, 1989, 1990, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  * Copyright (c) 1989 by Berkeley Softworks
9  * All rights reserved.
10  *
11  * This code is derived from software contributed to Berkeley by
12  * Adam de Boor.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. All advertising materials mentioning features or use of this software
23  *    must display the following acknowledgement:
24  *	This product includes software developed by the University of
25  *	California, Berkeley and its contributors.
26  * 4. Neither the name of the University nor the names of its contributors
27  *    may be used to endorse or promote products derived from this software
28  *    without specific prior written permission.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40  * SUCH DAMAGE.
41  */
42 
43 #include <ctype.h>
44 #include <string.h>
45 #include "config.h"
46 #include "defines.h"
47 #include "str.h"
48 #include "memory.h"
49 #include "buf.h"
50 
51 char *
52 Str_concati(s1, e1, s2, e2, sep)
53     const char *s1, *e1, *s2, *e2;
54     int sep;
55 {
56     size_t len1, len2;
57     char *result;
58 
59     /* get the length of both strings */
60     len1 = e1 - s1;
61     len2 = e2 - s2;
62 
63     /* space for separator */
64     if (sep)
65 	len1++;
66     result = emalloc(len1 + len2 + 1);
67 
68     /* copy first string into place */
69     memcpy(result, s1, len1);
70 
71     /* add separator character */
72     if (sep)
73 	result[len1-1] = sep;
74 
75     /* copy second string plus EOS into place */
76     memcpy(result + len1, s2, len2);
77     result[len1+len2] = '\0';
78     return result;
79 }
80 
81 /*-
82  * brk_string --
83  *	Fracture a string into an array of words (as delineated by tabs or
84  *	spaces) taking quotation marks into account.  Leading tabs/spaces
85  *	are ignored.
86  *
87  * returns --
88  *	Pointer to the array of pointers to the words.	To make life easier,
89  *	the first word is always the value of the .MAKE variable.
90  */
91 char **
92 brk_string(str, store_argc, buffer)
93     const char *str;
94     int *store_argc;
95     char **buffer;
96 {
97     int argc;
98     char ch;
99     char inquote;
100     const char *p;
101     char *start, *t;
102     size_t len;
103     int argmax = 50;
104     size_t curlen = 0;
105     char **argv = emalloc((argmax + 1) * sizeof(char *));
106 
107     /* skip leading space chars. */
108     for (; *str == ' ' || *str == '\t'; ++str)
109 	continue;
110 
111     /* allocate room for a copy of the string */
112     if ((len = strlen(str) + 1) > curlen)
113 	*buffer = emalloc(curlen = len);
114 
115     /*
116      * copy the string; at the same time, parse backslashes,
117      * quotes and build the argument list.
118      */
119     argc = 0;
120     inquote = '\0';
121     for (p = str, start = t = *buffer;; ++p) {
122 	switch (ch = *p) {
123 	case '"':
124 	case '\'':
125 	    if (inquote) {
126 		if (inquote == ch)
127 		    inquote = '\0';
128 		else
129 		    break;
130 	    } else {
131 		inquote = ch;
132 		/* Don't miss "" or '' */
133 		if (start == NULL && p[1] == inquote) {
134 		    start = t + 1;
135 		    break;
136 		}
137 	    }
138 	    continue;
139 	case ' ':
140 	case '\t':
141 	case '\n':
142 	    if (inquote)
143 		break;
144 	    if (!start)
145 		continue;
146 	    /* FALLTHROUGH */
147 	case '\0':
148 	    /*
149 	     * end of a token -- make sure there's enough argv
150 	     * space and save off a pointer.
151 	     */
152 	    if (!start)
153 		goto done;
154 
155 	    *t++ = '\0';
156 	    if (argc == argmax) {
157 		argmax *= 2;		/* ramp up fast */
158 		argv = erealloc(argv, (argmax + 1) * sizeof(char *));
159 	    }
160 	    argv[argc++] = start;
161 	    start = NULL;
162 	    if (ch == '\n' || ch == '\0')
163 		goto done;
164 	    continue;
165 	case '\\':
166 	    switch (ch = *++p) {
167 	    case '\0':
168 	    case '\n':
169 		/* hmmm; fix it up as best we can */
170 		ch = '\\';
171 		--p;
172 		break;
173 	    case 'b':
174 		ch = '\b';
175 		break;
176 	    case 'f':
177 		ch = '\f';
178 		break;
179 	    case 'n':
180 		ch = '\n';
181 		break;
182 	    case 'r':
183 		ch = '\r';
184 		break;
185 	    case 't':
186 		ch = '\t';
187 		break;
188 	    }
189 		break;
190 	}
191 	if (!start)
192 	    start = t;
193 	*t++ = ch;
194     }
195 done:
196     argv[argc] = NULL;
197     *store_argc = argc;
198     return argv;
199 }
200 
201 
202 const char *
203 iterate_words(end)
204     const char	**end;
205 {
206     const char	*start, *p;
207     char	state = 0;
208     start = *end;
209 
210     while (isspace(*start))
211 	start++;
212     if (*start == '\0')
213 	return NULL;
214 
215     for (p = start;; p++)
216 	switch(*p) {
217 	    case '\\':
218 		if (p[1] != '\0')
219 		    p++;
220 		break;
221 	    case '\'':
222 	    case '"':
223 		if (state == *p)
224 		    state = 0;
225 		else if (state == 0)
226 		    state = *p;
227 		break;
228 	    case ' ':
229 	    case '\t':
230 		if (state != 0)
231 		    break;
232 		/* FALLTHROUGH */
233 	    case '\0':
234 		*end = p;
235 		return start;
236 	    default:
237 		break;
238 	    }
239 }
240 
241 bool
242 Str_Matchi(string, estring, pattern, end)
243     const char *string; 		/* String */
244     const char *estring;		/* End of string */
245     const char *pattern;		/* Pattern */
246     const char *end;			/* End of Pattern */
247 {
248     while (pattern != end) {
249 	/* Check for a "*" as the next pattern character.  It matches
250 	 * any substring.  We handle this by calling ourselves
251 	 * recursively for each postfix of string, until either we
252 	 * match or we reach the end of the string.  */
253 	if (*pattern == '*') {
254 	    pattern++;
255 	    /* Skip over contiguous  sequences of `?*', so that recursive
256 	     * calls only occur on `real' characters.  */
257 	    while (pattern != end && (*pattern == '?' || *pattern == '*')) {
258 		if (*pattern == '?') {
259 		    if (string == estring)
260 			return false;
261 		    else
262 			string++;
263 		}
264 		pattern++;
265 	    }
266 	    if (pattern == end)
267 		return true;
268 	    for (; string != estring; string++)
269 		if (Str_Matchi(string, estring, pattern, end))
270 		    return true;
271 	    return false;
272 	} else if (string == estring)
273 	    return false;
274 	/* Check for a "[" as the next pattern character.  It is
275 	 * followed by a list of characters that are acceptable, or
276 	 * by a range (two characters separated by "-").  */
277 	else if (*pattern == '[') {
278 	    pattern++;
279 	    if (pattern == end)
280 		return false;
281 	    if (*pattern == '!' || *pattern == '^') {
282 		pattern++;
283 		if (pattern == end)
284 			return false;
285 		/* Negative match */
286 		for (;;) {
287 		    if (*pattern == '\\') {
288 			if (++pattern == end)
289 			    return false;
290 		    }
291 		    if (*pattern == *string)
292 			return false;
293 		    if (pattern[1] == '-') {
294 			if (pattern + 2 == end)
295 			    return false;
296 			if (*pattern < *string && *string <= pattern[2])
297 			    return false;
298 			if (pattern[2] <= *string && *string < *pattern)
299 			    return false;
300 			pattern += 3;
301 		    } else
302 			pattern++;
303 		    if (pattern == end)
304 			return false;
305 		    /* The test for ']' is done at the end so that ']'
306 		     * can be used at the start of the range without '\' */
307 		    if (*pattern == ']')
308 			break;
309 		}
310 	    } else {
311 		for (;;) {
312 		    if (*pattern == '\\') {
313 			if (++pattern == end)
314 			    return false;
315 		    }
316 		    if (*pattern == *string)
317 			break;
318 		    if (pattern[1] == '-') {
319 			if (pattern + 2 == end)
320 			    return false;
321 			if (*pattern < *string && *string <= pattern[2])
322 			    break;
323 			if (pattern[2] <= *string && *string < *pattern)
324 			    break;
325 			pattern += 3;
326 		    } else
327 			pattern++;
328 		    /* The test for ']' is done at the end so that ']'
329 		     * can be used at the start of the range without '\' */
330 		    if (pattern == end || *pattern == ']')
331 			return false;
332 		}
333 		/* Found matching character, skip over rest of class.  */
334 		while (*pattern != ']') {
335 		    if (*pattern == '\\')
336 			pattern++;
337 		    /* A non-terminated character class is ok.	*/
338 		    if (pattern == end)
339 			break;
340 		    pattern++;
341 		}
342 	    }
343 	}
344 	/* '?' matches any single character, so shunt test.  */
345 	else if (*pattern != '?') {
346 	    /* If the next pattern character is '\', just strip off the
347 	     * '\' so we do exact matching on the character that follows.  */
348 	    if (*pattern == '\\') {
349 		if (++pattern == end)
350 		    return false;
351 	    }
352 	    /* There's no special character.  Just make sure that
353 	     * the next characters of each string match.  */
354 	    if (*pattern != *string)
355 		return false;
356 	}
357 	pattern++;
358 	string++;
359     }
360     if (string == estring)
361 	return true;
362     else
363 	return false;
364 }
365 
366 
367 /*-
368  *-----------------------------------------------------------------------
369  * Str_SYSVMatch --
370  *	Check word against pattern for a match (% is wild),
371  *
372  * Results:
373  *	Returns the beginning position of a match or null. The number
374  *	of characters matched is returned in len.
375  *-----------------------------------------------------------------------
376  */
377 const char *
378 Str_SYSVMatch(word, pattern, len)
379     const char	*word;		/* Word to examine */
380     const char	*pattern;	/* Pattern to examine against */
381     size_t	*len;		/* Number of characters to substitute */
382 {
383     const char *p = pattern;
384     const char *w = word;
385     const char *m;
386 
387     if (*p == '\0') {
388 	/* Null pattern is the whole string.  */
389 	*len = strlen(w);
390 	return w;
391     }
392 
393     if ((m = strchr(p, '%')) != NULL) {
394 	/* Check that the prefix matches.  */
395 	for (; p != m && *w && *w == *p; w++, p++)
396 	     continue;
397 
398 	if (p != m)
399 	    return NULL;	/* No match.  */
400 
401 	if (*++p == '\0') {
402 	    /* No more pattern, return the rest of the string.	*/
403 	    *len = strlen(w);
404 	    return w;
405 	}
406     }
407 
408     m = w;
409 
410     /* Find a matching tail.  */
411     do {
412 	if (strcmp(p, w) == 0) {
413 	    *len = w - m;
414 	    return m;
415 	}
416     } while (*w++ != '\0');
417 
418 
419     return NULL;
420 }
421 
422 
423 /*-
424  *-----------------------------------------------------------------------
425  * Str_SYSVSubst --
426  *	Substitute '%' on the pattern with len characters from src.
427  *	If the pattern does not contain a '%' prepend len characters
428  *	from src.
429  *
430  * Side Effects:
431  *	Places result on buf
432  *-----------------------------------------------------------------------
433  */
434 void
435 Str_SYSVSubst(buf, pat, src, len)
436     Buffer buf;
437     const char *pat;
438     const char *src;
439     size_t   len;
440 {
441     const char *m;
442 
443     if ((m = strchr(pat, '%')) != NULL) {
444 	/* Copy the prefix.  */
445 	Buf_Addi(buf, pat, m);
446 	/* Skip the %.	*/
447 	pat = m + 1;
448     }
449 
450     /* Copy the pattern.  */
451     Buf_AddChars(buf, len, src);
452 
453     /* Append the rest.  */
454     Buf_AddString(buf, pat);
455 }
456 
457 char *
458 Str_dupi(begin, end)
459     const char *begin;
460     const char *end;
461 {
462     char *s;
463 
464     s = emalloc(end - begin + 1);
465     memcpy(s, begin, end - begin);
466     s[end-begin] = '\0';
467     return s;
468 }
469 
470 char *
471 escape_dupi(begin, end, set)
472     const char *begin;
473     const char *end;
474     const char *set;
475 {
476     char *s, *t;
477 
478     t = s = emalloc(end - begin + 1);
479     while (begin != end) {
480 	if (*begin == '\\') {
481 	    begin++;
482 	    if (begin == end) {
483 		*t++ = '\\';
484 		break;
485 	    }
486 	    if (strchr(set, *begin) == NULL)
487 		*t++ = '\\';
488 	}
489 	*t++ = *begin++;
490     }
491     *t++ = '\0';
492     return s;
493 }
494 
495 char *
496 Str_rchri(s, e, c)
497     const char *s;
498     const char *e;
499     int c;
500 {
501     if (s != e)
502 	do {
503 	    if (*--e == c)
504 		return (char *)e;
505 	} while (e != s);
506     return NULL;
507 }
508