1 /* $OpenBSD: str.c,v 1.32 2019/05/21 17:21:02 espie Exp $ */ 2 /* $NetBSD: str.c,v 1.13 1996/11/06 17:59:23 christos Exp $ */ 3 4 /*- 5 * Copyright (c) 1988, 1989, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * Copyright (c) 1989 by Berkeley Softworks 8 * All rights reserved. 9 * 10 * This code is derived from software contributed to Berkeley by 11 * Adam de Boor. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 #include <ctype.h> 39 #include <string.h> 40 #include "config.h" 41 #include "defines.h" 42 #include "str.h" 43 #include "memory.h" 44 #include "buf.h" 45 46 /* helpers for Str_Matchi */ 47 static bool range_match(char, const char **, const char *); 48 static bool star_match(const char *, const char *, const char *, const char *); 49 50 char * 51 Str_concati(const char *s1, const char *e1, const char *s2, const char *e2, 52 int sep) 53 { 54 size_t len1, len2; 55 char *result; 56 57 /* get the length of both strings */ 58 len1 = e1 - s1; 59 len2 = e2 - s2; 60 61 /* space for separator */ 62 if (sep) 63 len1++; 64 result = emalloc(len1 + len2 + 1); 65 66 /* copy first string into place */ 67 memcpy(result, s1, len1); 68 69 /* add separator character */ 70 if (sep) 71 result[len1-1] = sep; 72 73 /* copy second string plus EOS into place */ 74 memcpy(result + len1, s2, len2); 75 result[len1+len2] = '\0'; 76 return result; 77 } 78 79 /*- 80 * brk_string -- 81 * Fracture a string into an array of words (as delineated by tabs or 82 * spaces) taking quotation marks into account. Leading tabs/spaces 83 * are ignored. 84 * 85 * returns -- 86 * Pointer to the array of pointers to the words. Fills up 87 * store_args with its size. 88 * The returned parameters are allocated in a single buffer, 89 * return as *buffer, to be freed later. 90 */ 91 char ** 92 brk_string(const char *str, int *store_argc, char **buffer) 93 { 94 int argc; 95 char ch; 96 char inquote; 97 const char *p; 98 char *start, *t; 99 size_t len; 100 int argmax = 50; /* start at 50 */ 101 size_t curlen = 0; 102 char **argv = ereallocarray(NULL, argmax + 1, sizeof(char *)); 103 104 /* skip leading space chars. */ 105 for (; *str == ' ' || *str == '\t'; ++str) 106 continue; 107 108 /* allocate room for a copy of the string */ 109 if ((len = strlen(str) + 1) > curlen) 110 *buffer = emalloc(curlen = len); 111 112 /* 113 * copy the string; at the same time, parse backslashes, 114 * quotes and build the argument list. 115 */ 116 argc = 0; 117 inquote = '\0'; 118 for (p = str, start = t = *buffer;; ++p) { 119 switch (ch = *p) { 120 case '"': 121 case '\'': 122 if (inquote) { 123 if (inquote == ch) 124 inquote = '\0'; 125 else 126 break; 127 } else { 128 inquote = ch; 129 /* Don't miss "" or '' */ 130 if (start == NULL && p[1] == inquote) { 131 start = t + 1; 132 break; 133 } 134 } 135 continue; 136 case ' ': 137 case '\t': 138 case '\n': 139 if (inquote) 140 break; 141 if (!start) 142 continue; 143 /* FALLTHROUGH */ 144 case '\0': 145 /* 146 * end of a token -- make sure there's enough argv 147 * space and save off a pointer. 148 */ 149 if (!start) 150 goto done; 151 152 *t++ = '\0'; 153 if (argc == argmax) { 154 argmax *= 2; /* ramp up fast */ 155 argv = ereallocarray(argv, 156 (argmax + 1), sizeof(char *)); 157 } 158 argv[argc++] = start; 159 start = NULL; 160 if (ch == '\n' || ch == '\0') 161 goto done; 162 continue; 163 case '\\': 164 switch (ch = *++p) { 165 case '\0': 166 case '\n': 167 /* hmmm; fix it up as best we can */ 168 ch = '\\'; 169 --p; 170 break; 171 case 'b': 172 ch = '\b'; 173 break; 174 case 'f': 175 ch = '\f'; 176 break; 177 case 'n': 178 ch = '\n'; 179 break; 180 case 'r': 181 ch = '\r'; 182 break; 183 case 't': 184 ch = '\t'; 185 break; 186 } 187 break; 188 } 189 if (!start) 190 start = t; 191 *t++ = ch; 192 } 193 done: 194 argv[argc] = NULL; 195 *store_argc = argc; 196 return argv; 197 } 198 199 200 const char * 201 iterate_words(const char **end) 202 { 203 const char *start, *p; 204 char state = 0; 205 start = *end; 206 207 while (ISSPACE(*start)) 208 start++; 209 if (*start == '\0') 210 return NULL; 211 212 for (p = start;; p++) 213 switch(*p) { 214 case '\\': 215 if (p[1] != '\0') 216 p++; 217 break; 218 case '\'': 219 case '"': 220 if (state == *p) 221 state = 0; 222 else if (state == 0) 223 state = *p; 224 break; 225 case ' ': 226 case '\t': 227 if (state != 0) 228 break; 229 /* FALLTHROUGH */ 230 case '\0': 231 *end = p; 232 return start; 233 default: 234 break; 235 } 236 } 237 238 static bool 239 star_match(const char *string, const char *estring, 240 const char *pattern, const char *epattern) 241 { 242 /* '*' matches any substring. We handle this by calling ourselves 243 * recursively for each postfix of string, until either we match or 244 * we reach the end of the string. */ 245 pattern++; 246 /* Skip over contiguous sequences of `?*', so that 247 * recursive calls only occur on `real' characters. */ 248 while (pattern != epattern && 249 (*pattern == '?' || *pattern == '*')) { 250 if (*pattern == '?') { 251 if (string == estring) 252 return false; 253 else 254 string++; 255 } 256 pattern++; 257 } 258 if (pattern == epattern) 259 return true; 260 for (; string != estring; string++) 261 if (Str_Matchi(string, estring, pattern, 262 epattern)) 263 return true; 264 return false; 265 } 266 267 static bool 268 range_match(char c, const char **ppat, const char *epattern) 269 { 270 if (*ppat == epattern) { 271 if (c == '[') 272 return true; 273 else 274 return false; 275 } 276 if (**ppat == '!' || **ppat == '^') { 277 (*ppat)++; 278 return !range_match(c, ppat, epattern); 279 } 280 for (;;) { 281 if (**ppat == '\\') { 282 if (++(*ppat) == epattern) 283 return false; 284 } 285 if (**ppat == c) 286 break; 287 if ((*ppat)[1] == '-') { 288 if (*ppat + 2 == epattern) 289 return false; 290 if (**ppat < c && c <= (*ppat)[2]) 291 break; 292 if ((*ppat)[2] <= c && c < **ppat) 293 break; 294 *ppat += 3; 295 } else 296 (*ppat)++; 297 /* The test for ']' is done at the end 298 * so that ']' can be used at the 299 * start of the range without '\' */ 300 if (*ppat == epattern || **ppat == ']') 301 return false; 302 } 303 /* Found matching character, skip over rest 304 * of class. */ 305 while (**ppat != ']') { 306 if (**ppat == '\\') 307 (*ppat)++; 308 /* A non-terminated character class 309 * is ok. */ 310 if (*ppat == epattern) 311 break; 312 (*ppat)++; 313 } 314 return true; 315 } 316 317 bool 318 Str_Matchi(const char *string, const char *estring, 319 const char *pattern, const char *epattern) 320 { 321 while (pattern != epattern) { 322 /* Check for a "*" as the next pattern character. */ 323 if (*pattern == '*') 324 return star_match(string, estring, pattern, epattern); 325 else if (string == estring) 326 return false; 327 /* Check for a "[" as the next pattern character. It is 328 * followed by a list of characters that are acceptable, or 329 * by a range (two characters separated by "-"). */ 330 else if (*pattern == '[') { 331 pattern++; 332 if (!range_match(*string, &pattern, epattern)) 333 return false; 334 335 } 336 /* '?' matches any single character, so shunt test. */ 337 else if (*pattern != '?') { 338 /* If the next pattern character is '\', just strip 339 * off the '\' so we do exact matching on the 340 * character that follows. */ 341 if (*pattern == '\\') { 342 if (++pattern == epattern) 343 return false; 344 } 345 /* There's no special character. Just make sure that 346 * the next characters of each string match. */ 347 if (*pattern != *string) 348 return false; 349 } 350 pattern++; 351 string++; 352 } 353 if (string == estring) 354 return true; 355 else 356 return false; 357 } 358 359 360 /*- 361 *----------------------------------------------------------------------- 362 * Str_SYSVMatch -- 363 * Check word against pattern for a match (% is wild), 364 * 365 * Results: 366 * Returns the beginning position of a match or null. The number 367 * of characters matched is returned in len. 368 *----------------------------------------------------------------------- 369 */ 370 const char * 371 Str_SYSVMatch(const char *word, const char *pattern, size_t *len) 372 { 373 const char *p = pattern; 374 const char *w = word; 375 const char *m; 376 377 if (*p == '\0') { 378 /* Null pattern is the whole string. */ 379 *len = strlen(w); 380 return w; 381 } 382 383 if ((m = strchr(p, '%')) != NULL) { 384 /* Check that the prefix matches. */ 385 for (; p != m && *w && *w == *p; w++, p++) 386 continue; 387 388 if (p != m) 389 return NULL; /* No match. */ 390 391 if (*++p == '\0') { 392 /* No more pattern, return the rest of the string. */ 393 *len = strlen(w); 394 return w; 395 } 396 } 397 398 m = w; 399 400 /* Find a matching tail. */ 401 do { 402 if (strcmp(p, w) == 0) { 403 *len = w - m; 404 return m; 405 } 406 } while (*w++ != '\0'); 407 408 return NULL; 409 } 410 411 412 /*- 413 *----------------------------------------------------------------------- 414 * Str_SYSVSubst -- 415 * Substitute '%' in the pattern with len characters from src. 416 * If the pattern does not contain a '%' prepend len characters 417 * from src. 418 * 419 * Side Effects: 420 * Adds result to buf 421 *----------------------------------------------------------------------- 422 */ 423 void 424 Str_SYSVSubst(Buffer buf, const char *pat, const char *src, size_t len) 425 { 426 const char *m; 427 428 if ((m = strchr(pat, '%')) != NULL) { 429 /* Copy the prefix. */ 430 Buf_Addi(buf, pat, m); 431 /* Skip the %. */ 432 pat = m + 1; 433 } 434 435 /* Copy the pattern. */ 436 Buf_AddChars(buf, len, src); 437 438 /* Append the rest. */ 439 Buf_AddString(buf, pat); 440 } 441 442 char * 443 Str_dupi(const char *begin, const char *end) 444 { 445 char *s; 446 447 s = emalloc(end - begin + 1); 448 memcpy(s, begin, end - begin); 449 s[end-begin] = '\0'; 450 return s; 451 } 452 453 char * 454 escape_dupi(const char *begin, const char *end, const char *set) 455 { 456 char *s, *t; 457 458 t = s = emalloc(end - begin + 1); 459 while (begin != end) { 460 if (*begin == '\\') { 461 begin++; 462 if (begin == end) { 463 *t++ = '\\'; 464 break; 465 } 466 if (strchr(set, *begin) == NULL) 467 *t++ = '\\'; 468 } 469 *t++ = *begin++; 470 } 471 *t++ = '\0'; 472 return s; 473 } 474 475 char * 476 Str_rchri(const char *begin, const char *end, int c) 477 { 478 if (begin != end) 479 do { 480 if (*--end == c) 481 return (char *)end; 482 } while (end != begin); 483 return NULL; 484 } 485