xref: /openbsd-src/usr.bin/make/str.c (revision 99fd087599a8791921855f21bd7e36130f39aadc)
1 /*	$OpenBSD: str.c,v 1.32 2019/05/21 17:21:02 espie Exp $	*/
2 /*	$NetBSD: str.c,v 1.13 1996/11/06 17:59:23 christos Exp $	*/
3 
4 /*-
5  * Copyright (c) 1988, 1989, 1990, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * Copyright (c) 1989 by Berkeley Softworks
8  * All rights reserved.
9  *
10  * This code is derived from software contributed to Berkeley by
11  * Adam de Boor.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  */
37 
38 #include <ctype.h>
39 #include <string.h>
40 #include "config.h"
41 #include "defines.h"
42 #include "str.h"
43 #include "memory.h"
44 #include "buf.h"
45 
46 /* helpers for Str_Matchi */
47 static bool range_match(char, const char **, const char *);
48 static bool star_match(const char *, const char *, const char *, const char *);
49 
50 char *
51 Str_concati(const char *s1, const char *e1, const char *s2, const char *e2,
52     int sep)
53 {
54 	size_t len1, len2;
55 	char *result;
56 
57 	/* get the length of both strings */
58 	len1 = e1 - s1;
59 	len2 = e2 - s2;
60 
61 	/* space for separator */
62 	if (sep)
63 		len1++;
64 	result = emalloc(len1 + len2 + 1);
65 
66 	/* copy first string into place */
67 	memcpy(result, s1, len1);
68 
69 	/* add separator character */
70 	if (sep)
71 		result[len1-1] = sep;
72 
73 	/* copy second string plus EOS into place */
74 	memcpy(result + len1, s2, len2);
75 	result[len1+len2] = '\0';
76 	return result;
77 }
78 
79 /*-
80  * brk_string --
81  *	Fracture a string into an array of words (as delineated by tabs or
82  *	spaces) taking quotation marks into account.  Leading tabs/spaces
83  *	are ignored.
84  *
85  * returns --
86  *	Pointer to the array of pointers to the words.	Fills up
87  *	store_args with its size.
88  *	The returned parameters are allocated in a single buffer,
89  *	return as *buffer, to be freed later.
90  */
91 char **
92 brk_string(const char *str, int *store_argc, char **buffer)
93 {
94 	int argc;
95 	char ch;
96 	char inquote;
97 	const char *p;
98 	char *start, *t;
99 	size_t len;
100 	int argmax = 50;	/* start at 50 */
101 	size_t curlen = 0;
102 	char **argv = ereallocarray(NULL, argmax + 1, sizeof(char *));
103 
104 	/* skip leading space chars. */
105 	for (; *str == ' ' || *str == '\t'; ++str)
106 		continue;
107 
108 	/* allocate room for a copy of the string */
109 	if ((len = strlen(str) + 1) > curlen)
110 		*buffer = emalloc(curlen = len);
111 
112 	/*
113 	 * copy the string; at the same time, parse backslashes,
114 	 * quotes and build the argument list.
115 	 */
116 	argc = 0;
117 	inquote = '\0';
118 	for (p = str, start = t = *buffer;; ++p) {
119 		switch (ch = *p) {
120 		case '"':
121 		case '\'':
122 			if (inquote) {
123 				if (inquote == ch)
124 					inquote = '\0';
125 				else
126 					break;
127 			} else {
128 				inquote = ch;
129 				/* Don't miss "" or '' */
130 				if (start == NULL && p[1] == inquote) {
131 					start = t + 1;
132 					break;
133 				}
134 			}
135 			continue;
136 		case ' ':
137 		case '\t':
138 		case '\n':
139 			if (inquote)
140 				break;
141 			if (!start)
142 				continue;
143 			/* FALLTHROUGH */
144 		case '\0':
145 			/*
146 			 * end of a token -- make sure there's enough argv
147 			 * space and save off a pointer.
148 			 */
149 			if (!start)
150 				goto done;
151 
152 			*t++ = '\0';
153 			if (argc == argmax) {
154 				argmax *= 2;	/* ramp up fast */
155 				argv = ereallocarray(argv,
156 				    (argmax + 1), sizeof(char *));
157 			}
158 			argv[argc++] = start;
159 			start = NULL;
160 			if (ch == '\n' || ch == '\0')
161 				goto done;
162 			continue;
163 		case '\\':
164 			switch (ch = *++p) {
165 			case '\0':
166 			case '\n':
167 				/* hmmm; fix it up as best we can */
168 				ch = '\\';
169 				--p;
170 				break;
171 			case 'b':
172 				ch = '\b';
173 				break;
174 			case 'f':
175 				ch = '\f';
176 				break;
177 			case 'n':
178 				ch = '\n';
179 				break;
180 			case 'r':
181 				ch = '\r';
182 				break;
183 			case 't':
184 				ch = '\t';
185 				break;
186 			}
187 			    break;
188 		}
189 		if (!start)
190 			start = t;
191 		*t++ = ch;
192 	}
193     done:
194 	    argv[argc] = NULL;
195 	    *store_argc = argc;
196 	    return argv;
197 }
198 
199 
200 const char *
201 iterate_words(const char **end)
202 {
203 	const char	*start, *p;
204 	char	state = 0;
205 	start = *end;
206 
207 	while (ISSPACE(*start))
208 		start++;
209 	if (*start == '\0')
210 		return NULL;
211 
212 	for (p = start;; p++)
213 	    switch(*p) {
214 	    case '\\':
215 		    if (p[1] != '\0')
216 			    p++;
217 		    break;
218 	    case '\'':
219 	    case '"':
220 		    if (state == *p)
221 			    state = 0;
222 		    else if (state == 0)
223 			    state = *p;
224 		    break;
225 	    case ' ':
226 	    case '\t':
227 		    if (state != 0)
228 			    break;
229 		    /* FALLTHROUGH */
230 	    case '\0':
231 		    *end = p;
232 		    return start;
233 	    default:
234 		    break;
235 	    }
236 }
237 
238 static bool
239 star_match(const char *string, const char *estring,
240     const char *pattern, const char *epattern)
241 {
242 	/* '*' matches any substring.  We handle this by calling ourselves
243 	 * recursively for each postfix of string, until either we match or
244 	 * we reach the end of the string.  */
245 	pattern++;
246 	/* Skip over contiguous  sequences of `?*', so that
247 	 * recursive calls only occur on `real' characters.  */
248 	while (pattern != epattern &&
249 		(*pattern == '?' || *pattern == '*')) {
250 		if (*pattern == '?') {
251 			if (string == estring)
252 				return false;
253 			else
254 				string++;
255 		}
256 		pattern++;
257 	}
258 	if (pattern == epattern)
259 		return true;
260 	for (; string != estring; string++)
261 		if (Str_Matchi(string, estring, pattern,
262 		    epattern))
263 			return true;
264 	return false;
265 }
266 
267 static bool
268 range_match(char c, const char **ppat, const char *epattern)
269 {
270 	if (*ppat == epattern) {
271 		if (c == '[')
272 			return true;
273 		else
274 			return false;
275 	}
276 	if (**ppat == '!' || **ppat == '^') {
277 		(*ppat)++;
278 		return !range_match(c, ppat, epattern);
279 	}
280 	for (;;) {
281 		if (**ppat == '\\') {
282 			if (++(*ppat) == epattern)
283 				return false;
284 		}
285 		if (**ppat == c)
286 			break;
287 		if ((*ppat)[1] == '-') {
288 			if (*ppat + 2 == epattern)
289 				return false;
290 			if (**ppat < c && c <= (*ppat)[2])
291 				break;
292 			if ((*ppat)[2] <= c && c < **ppat)
293 				break;
294 			*ppat += 3;
295 		} else
296 			(*ppat)++;
297 		/* The test for ']' is done at the end
298 		 * so that ']' can be used at the
299 		 * start of the range without '\' */
300 		if (*ppat == epattern || **ppat == ']')
301 			return false;
302 	}
303 	/* Found matching character, skip over rest
304 	 * of class.  */
305 	while (**ppat != ']') {
306 		if (**ppat == '\\')
307 			(*ppat)++;
308 		/* A non-terminated character class
309 		 * is ok. */
310 		if (*ppat == epattern)
311 			break;
312 		(*ppat)++;
313 	}
314 	return true;
315 }
316 
317 bool
318 Str_Matchi(const char *string, const char *estring,
319     const char *pattern, const char *epattern)
320 {
321 	while (pattern != epattern) {
322 		/* Check for a "*" as the next pattern character.  */
323 		if (*pattern == '*')
324 			return star_match(string, estring, pattern, epattern);
325 		else if (string == estring)
326 			return false;
327 		/* Check for a "[" as the next pattern character.  It is
328 		 * followed by a list of characters that are acceptable, or
329 		 * by a range (two characters separated by "-").  */
330 		else if (*pattern == '[') {
331 			pattern++;
332 			if (!range_match(*string, &pattern, epattern))
333 				return false;
334 
335 		}
336 		/* '?' matches any single character, so shunt test.  */
337 		else if (*pattern != '?') {
338 			/* If the next pattern character is '\', just strip
339 			 * off the '\' so we do exact matching on the
340 			 * character that follows.  */
341 			if (*pattern == '\\') {
342 				if (++pattern == epattern)
343 					return false;
344 			}
345 			/* There's no special character.  Just make sure that
346 			 * the next characters of each string match.  */
347 			if (*pattern != *string)
348 				return false;
349 		}
350 		pattern++;
351 		string++;
352 	}
353 	if (string == estring)
354 		return true;
355 	else
356 		return false;
357 }
358 
359 
360 /*-
361  *-----------------------------------------------------------------------
362  * Str_SYSVMatch --
363  *	Check word against pattern for a match (% is wild),
364  *
365  * Results:
366  *	Returns the beginning position of a match or null. The number
367  *	of characters matched is returned in len.
368  *-----------------------------------------------------------------------
369  */
370 const char *
371 Str_SYSVMatch(const char *word, const char *pattern, size_t *len)
372 {
373 	const char *p = pattern;
374 	const char *w = word;
375 	const char *m;
376 
377 	if (*p == '\0') {
378 		/* Null pattern is the whole string.  */
379 		*len = strlen(w);
380 		return w;
381 	}
382 
383 	if ((m = strchr(p, '%')) != NULL) {
384 		/* Check that the prefix matches.  */
385 		for (; p != m && *w && *w == *p; w++, p++)
386 			 continue;
387 
388 		if (p != m)
389 			return NULL;	/* No match.  */
390 
391 		if (*++p == '\0') {
392 			/* No more pattern, return the rest of the string. */
393 			*len = strlen(w);
394 			return w;
395 		}
396 	}
397 
398 	m = w;
399 
400 	/* Find a matching tail.  */
401 	do {
402 		if (strcmp(p, w) == 0) {
403 			*len = w - m;
404 			return m;
405 		}
406 	} while (*w++ != '\0');
407 
408 	return NULL;
409 }
410 
411 
412 /*-
413  *-----------------------------------------------------------------------
414  * Str_SYSVSubst --
415  *	Substitute '%' in the pattern with len characters from src.
416  *	If the pattern does not contain a '%' prepend len characters
417  *	from src.
418  *
419  * Side Effects:
420  *	Adds result to buf
421  *-----------------------------------------------------------------------
422  */
423 void
424 Str_SYSVSubst(Buffer buf, const char *pat, const char *src, size_t len)
425 {
426 	const char *m;
427 
428 	if ((m = strchr(pat, '%')) != NULL) {
429 		/* Copy the prefix.  */
430 		Buf_Addi(buf, pat, m);
431 		/* Skip the %.	*/
432 		pat = m + 1;
433 	}
434 
435 	/* Copy the pattern.  */
436 	Buf_AddChars(buf, len, src);
437 
438 	/* Append the rest.  */
439 	Buf_AddString(buf, pat);
440 }
441 
442 char *
443 Str_dupi(const char *begin, const char *end)
444 {
445 	char *s;
446 
447 	s = emalloc(end - begin + 1);
448 	memcpy(s, begin, end - begin);
449 	s[end-begin] = '\0';
450 	return s;
451 }
452 
453 char *
454 escape_dupi(const char *begin, const char *end, const char *set)
455 {
456 	char *s, *t;
457 
458 	t = s = emalloc(end - begin + 1);
459 	while (begin != end) {
460 		if (*begin == '\\') {
461 			begin++;
462 			if (begin == end) {
463 				*t++ = '\\';
464 				break;
465 			}
466 			if (strchr(set, *begin) == NULL)
467 				*t++ = '\\';
468 		}
469 		*t++ = *begin++;
470 	}
471 	*t++ = '\0';
472 	return s;
473 }
474 
475 char *
476 Str_rchri(const char *begin, const char *end, int c)
477 {
478 	if (begin != end)
479 		do {
480 			if (*--end == c)
481 				return (char *)end;
482 		} while (end != begin);
483 	return NULL;
484 }
485