1 /* $OpenBSD: str.c,v 1.33 2023/09/04 11:35:11 espie Exp $ */
2 /* $NetBSD: str.c,v 1.13 1996/11/06 17:59:23 christos Exp $ */
3
4 /*-
5 * Copyright (c) 1988, 1989, 1990, 1993
6 * The Regents of the University of California. All rights reserved.
7 * Copyright (c) 1989 by Berkeley Softworks
8 * All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * Adam de Boor.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 */
37
38 #include <ctype.h>
39 #include <string.h>
40 #include "defines.h"
41 #include "str.h"
42 #include "memory.h"
43 #include "buf.h"
44
45 /* helpers for Str_Matchi */
46 static bool range_match(char, const char **, const char *);
47 static bool star_match(const char *, const char *, const char *, const char *);
48
49 char *
Str_concati(const char * s1,const char * e1,const char * s2,const char * e2,int sep)50 Str_concati(const char *s1, const char *e1, const char *s2, const char *e2,
51 int sep)
52 {
53 size_t len1, len2;
54 char *result;
55
56 /* get the length of both strings */
57 len1 = e1 - s1;
58 len2 = e2 - s2;
59
60 /* space for separator */
61 if (sep)
62 len1++;
63 result = emalloc(len1 + len2 + 1);
64
65 /* copy first string into place */
66 memcpy(result, s1, len1);
67
68 /* add separator character */
69 if (sep)
70 result[len1-1] = sep;
71
72 /* copy second string plus EOS into place */
73 memcpy(result + len1, s2, len2);
74 result[len1+len2] = '\0';
75 return result;
76 }
77
78 /*-
79 * brk_string --
80 * Fracture a string into an array of words (as delineated by tabs or
81 * spaces) taking quotation marks into account. Leading tabs/spaces
82 * are ignored.
83 *
84 * returns --
85 * Pointer to the array of pointers to the words. Fills up
86 * store_args with its size.
87 * The returned parameters are allocated in a single buffer,
88 * return as *buffer, to be freed later.
89 */
90 char **
brk_string(const char * str,int * store_argc,char ** buffer)91 brk_string(const char *str, int *store_argc, char **buffer)
92 {
93 int argc;
94 char ch;
95 char inquote;
96 const char *p;
97 char *start, *t;
98 size_t len;
99 int argmax = 50; /* start at 50 */
100 size_t curlen = 0;
101 char **argv = ereallocarray(NULL, argmax + 1, sizeof(char *));
102
103 /* skip leading space chars. */
104 for (; *str == ' ' || *str == '\t'; ++str)
105 continue;
106
107 /* allocate room for a copy of the string */
108 if ((len = strlen(str) + 1) > curlen)
109 *buffer = emalloc(curlen = len);
110
111 /*
112 * copy the string; at the same time, parse backslashes,
113 * quotes and build the argument list.
114 */
115 argc = 0;
116 inquote = '\0';
117 for (p = str, start = t = *buffer;; ++p) {
118 switch (ch = *p) {
119 case '"':
120 case '\'':
121 if (inquote) {
122 if (inquote == ch)
123 inquote = '\0';
124 else
125 break;
126 } else {
127 inquote = ch;
128 /* Don't miss "" or '' */
129 if (start == NULL && p[1] == inquote) {
130 start = t + 1;
131 break;
132 }
133 }
134 continue;
135 case ' ':
136 case '\t':
137 case '\n':
138 if (inquote)
139 break;
140 if (!start)
141 continue;
142 /* FALLTHROUGH */
143 case '\0':
144 /*
145 * end of a token -- make sure there's enough argv
146 * space and save off a pointer.
147 */
148 if (!start)
149 goto done;
150
151 *t++ = '\0';
152 if (argc == argmax) {
153 argmax *= 2; /* ramp up fast */
154 argv = ereallocarray(argv,
155 (argmax + 1), sizeof(char *));
156 }
157 argv[argc++] = start;
158 start = NULL;
159 if (ch == '\n' || ch == '\0')
160 goto done;
161 continue;
162 case '\\':
163 switch (ch = *++p) {
164 case '\0':
165 case '\n':
166 /* hmmm; fix it up as best we can */
167 ch = '\\';
168 --p;
169 break;
170 case 'b':
171 ch = '\b';
172 break;
173 case 'f':
174 ch = '\f';
175 break;
176 case 'n':
177 ch = '\n';
178 break;
179 case 'r':
180 ch = '\r';
181 break;
182 case 't':
183 ch = '\t';
184 break;
185 }
186 break;
187 }
188 if (!start)
189 start = t;
190 *t++ = ch;
191 }
192 done:
193 argv[argc] = NULL;
194 *store_argc = argc;
195 return argv;
196 }
197
198
199 const char *
iterate_words(const char ** end)200 iterate_words(const char **end)
201 {
202 const char *start, *p;
203 char state = 0;
204 start = *end;
205
206 while (ISSPACE(*start))
207 start++;
208 if (*start == '\0')
209 return NULL;
210
211 for (p = start;; p++)
212 switch(*p) {
213 case '\\':
214 if (p[1] != '\0')
215 p++;
216 break;
217 case '\'':
218 case '"':
219 if (state == *p)
220 state = 0;
221 else if (state == 0)
222 state = *p;
223 break;
224 case ' ':
225 case '\t':
226 if (state != 0)
227 break;
228 /* FALLTHROUGH */
229 case '\0':
230 *end = p;
231 return start;
232 default:
233 break;
234 }
235 }
236
237 static bool
star_match(const char * string,const char * estring,const char * pattern,const char * epattern)238 star_match(const char *string, const char *estring,
239 const char *pattern, const char *epattern)
240 {
241 /* '*' matches any substring. We handle this by calling ourselves
242 * recursively for each postfix of string, until either we match or
243 * we reach the end of the string. */
244 pattern++;
245 /* Skip over contiguous sequences of `?*', so that
246 * recursive calls only occur on `real' characters. */
247 while (pattern != epattern &&
248 (*pattern == '?' || *pattern == '*')) {
249 if (*pattern == '?') {
250 if (string == estring)
251 return false;
252 else
253 string++;
254 }
255 pattern++;
256 }
257 if (pattern == epattern)
258 return true;
259 for (; string != estring; string++)
260 if (Str_Matchi(string, estring, pattern,
261 epattern))
262 return true;
263 return false;
264 }
265
266 static bool
range_match(char c,const char ** ppat,const char * epattern)267 range_match(char c, const char **ppat, const char *epattern)
268 {
269 if (*ppat == epattern) {
270 if (c == '[')
271 return true;
272 else
273 return false;
274 }
275 if (**ppat == '!' || **ppat == '^') {
276 (*ppat)++;
277 return !range_match(c, ppat, epattern);
278 }
279 for (;;) {
280 if (**ppat == '\\') {
281 if (++(*ppat) == epattern)
282 return false;
283 }
284 if (**ppat == c)
285 break;
286 if ((*ppat)[1] == '-') {
287 if (*ppat + 2 == epattern)
288 return false;
289 if (**ppat < c && c <= (*ppat)[2])
290 break;
291 if ((*ppat)[2] <= c && c < **ppat)
292 break;
293 *ppat += 3;
294 } else
295 (*ppat)++;
296 /* The test for ']' is done at the end
297 * so that ']' can be used at the
298 * start of the range without '\' */
299 if (*ppat == epattern || **ppat == ']')
300 return false;
301 }
302 /* Found matching character, skip over rest
303 * of class. */
304 while (**ppat != ']') {
305 if (**ppat == '\\')
306 (*ppat)++;
307 /* A non-terminated character class
308 * is ok. */
309 if (*ppat == epattern)
310 break;
311 (*ppat)++;
312 }
313 return true;
314 }
315
316 bool
Str_Matchi(const char * string,const char * estring,const char * pattern,const char * epattern)317 Str_Matchi(const char *string, const char *estring,
318 const char *pattern, const char *epattern)
319 {
320 while (pattern != epattern) {
321 /* Check for a "*" as the next pattern character. */
322 if (*pattern == '*')
323 return star_match(string, estring, pattern, epattern);
324 else if (string == estring)
325 return false;
326 /* Check for a "[" as the next pattern character. It is
327 * followed by a list of characters that are acceptable, or
328 * by a range (two characters separated by "-"). */
329 else if (*pattern == '[') {
330 pattern++;
331 if (!range_match(*string, &pattern, epattern))
332 return false;
333
334 }
335 /* '?' matches any single character, so shunt test. */
336 else if (*pattern != '?') {
337 /* If the next pattern character is '\', just strip
338 * off the '\' so we do exact matching on the
339 * character that follows. */
340 if (*pattern == '\\') {
341 if (++pattern == epattern)
342 return false;
343 }
344 /* There's no special character. Just make sure that
345 * the next characters of each string match. */
346 if (*pattern != *string)
347 return false;
348 }
349 pattern++;
350 string++;
351 }
352 if (string == estring)
353 return true;
354 else
355 return false;
356 }
357
358
359 /*-
360 *-----------------------------------------------------------------------
361 * Str_SYSVMatch --
362 * Check word against pattern for a match (% is wild),
363 *
364 * Results:
365 * Returns the beginning position of a match or null. The number
366 * of characters matched is returned in len.
367 *-----------------------------------------------------------------------
368 */
369 const char *
Str_SYSVMatch(const char * word,const char * pattern,size_t * len)370 Str_SYSVMatch(const char *word, const char *pattern, size_t *len)
371 {
372 const char *p = pattern;
373 const char *w = word;
374 const char *m;
375
376 if (*p == '\0') {
377 /* Null pattern is the whole string. */
378 *len = strlen(w);
379 return w;
380 }
381
382 if ((m = strchr(p, '%')) != NULL) {
383 /* Check that the prefix matches. */
384 for (; p != m && *w && *w == *p; w++, p++)
385 continue;
386
387 if (p != m)
388 return NULL; /* No match. */
389
390 if (*++p == '\0') {
391 /* No more pattern, return the rest of the string. */
392 *len = strlen(w);
393 return w;
394 }
395 }
396
397 m = w;
398
399 /* Find a matching tail. */
400 do {
401 if (strcmp(p, w) == 0) {
402 *len = w - m;
403 return m;
404 }
405 } while (*w++ != '\0');
406
407 return NULL;
408 }
409
410
411 /*-
412 *-----------------------------------------------------------------------
413 * Str_SYSVSubst --
414 * Substitute '%' in the pattern with len characters from src.
415 * If the pattern does not contain a '%' prepend len characters
416 * from src.
417 *
418 * Side Effects:
419 * Adds result to buf
420 *-----------------------------------------------------------------------
421 */
422 void
Str_SYSVSubst(Buffer buf,const char * pat,const char * src,size_t len)423 Str_SYSVSubst(Buffer buf, const char *pat, const char *src, size_t len)
424 {
425 const char *m;
426
427 if ((m = strchr(pat, '%')) != NULL) {
428 /* Copy the prefix. */
429 Buf_Addi(buf, pat, m);
430 /* Skip the %. */
431 pat = m + 1;
432 }
433
434 /* Copy the pattern. */
435 Buf_AddChars(buf, len, src);
436
437 /* Append the rest. */
438 Buf_AddString(buf, pat);
439 }
440
441 char *
Str_dupi(const char * begin,const char * end)442 Str_dupi(const char *begin, const char *end)
443 {
444 char *s;
445
446 s = emalloc(end - begin + 1);
447 memcpy(s, begin, end - begin);
448 s[end-begin] = '\0';
449 return s;
450 }
451
452 char *
escape_dupi(const char * begin,const char * end,const char * set)453 escape_dupi(const char *begin, const char *end, const char *set)
454 {
455 char *s, *t;
456
457 t = s = emalloc(end - begin + 1);
458 while (begin != end) {
459 if (*begin == '\\') {
460 begin++;
461 if (begin == end) {
462 *t++ = '\\';
463 break;
464 }
465 if (strchr(set, *begin) == NULL)
466 *t++ = '\\';
467 }
468 *t++ = *begin++;
469 }
470 *t++ = '\0';
471 return s;
472 }
473
474 char *
Str_rchri(const char * begin,const char * end,int c)475 Str_rchri(const char *begin, const char *end, int c)
476 {
477 if (begin != end)
478 do {
479 if (*--end == c)
480 return (char *)end;
481 } while (end != begin);
482 return NULL;
483 }
484