1 /* $OpenPackages$ */ 2 /* $OpenBSD: str.c,v 1.20 2003/06/03 02:56:12 millert Exp $ */ 3 /* $NetBSD: str.c,v 1.13 1996/11/06 17:59:23 christos Exp $ */ 4 5 /*- 6 * Copyright (c) 1988, 1989, 1990, 1993 7 * The Regents of the University of California. All rights reserved. 8 * Copyright (c) 1989 by Berkeley Softworks 9 * All rights reserved. 10 * 11 * This code is derived from software contributed to Berkeley by 12 * Adam de Boor. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 #include <ctype.h> 40 #include <string.h> 41 #include "config.h" 42 #include "defines.h" 43 #include "str.h" 44 #include "memory.h" 45 #include "buf.h" 46 47 char * 48 Str_concati(s1, e1, s2, e2, sep) 49 const char *s1, *e1, *s2, *e2; 50 int sep; 51 { 52 size_t len1, len2; 53 char *result; 54 55 /* get the length of both strings */ 56 len1 = e1 - s1; 57 len2 = e2 - s2; 58 59 /* space for separator */ 60 if (sep) 61 len1++; 62 result = emalloc(len1 + len2 + 1); 63 64 /* copy first string into place */ 65 memcpy(result, s1, len1); 66 67 /* add separator character */ 68 if (sep) 69 result[len1-1] = sep; 70 71 /* copy second string plus EOS into place */ 72 memcpy(result + len1, s2, len2); 73 result[len1+len2] = '\0'; 74 return result; 75 } 76 77 /*- 78 * brk_string -- 79 * Fracture a string into an array of words (as delineated by tabs or 80 * spaces) taking quotation marks into account. Leading tabs/spaces 81 * are ignored. 82 * 83 * returns -- 84 * Pointer to the array of pointers to the words. To make life easier, 85 * the first word is always the value of the .MAKE variable. 86 */ 87 char ** 88 brk_string(str, store_argc, buffer) 89 const char *str; 90 int *store_argc; 91 char **buffer; 92 { 93 int argc; 94 char ch; 95 char inquote; 96 const char *p; 97 char *start, *t; 98 size_t len; 99 int argmax = 50; 100 size_t curlen = 0; 101 char **argv = emalloc((argmax + 1) * sizeof(char *)); 102 103 /* skip leading space chars. */ 104 for (; *str == ' ' || *str == '\t'; ++str) 105 continue; 106 107 /* allocate room for a copy of the string */ 108 if ((len = strlen(str) + 1) > curlen) 109 *buffer = emalloc(curlen = len); 110 111 /* 112 * copy the string; at the same time, parse backslashes, 113 * quotes and build the argument list. 114 */ 115 argc = 0; 116 inquote = '\0'; 117 for (p = str, start = t = *buffer;; ++p) { 118 switch (ch = *p) { 119 case '"': 120 case '\'': 121 if (inquote) { 122 if (inquote == ch) 123 inquote = '\0'; 124 else 125 break; 126 } else { 127 inquote = ch; 128 /* Don't miss "" or '' */ 129 if (start == NULL && p[1] == inquote) { 130 start = t + 1; 131 break; 132 } 133 } 134 continue; 135 case ' ': 136 case '\t': 137 case '\n': 138 if (inquote) 139 break; 140 if (!start) 141 continue; 142 /* FALLTHROUGH */ 143 case '\0': 144 /* 145 * end of a token -- make sure there's enough argv 146 * space and save off a pointer. 147 */ 148 if (!start) 149 goto done; 150 151 *t++ = '\0'; 152 if (argc == argmax) { 153 argmax *= 2; /* ramp up fast */ 154 argv = erealloc(argv, (argmax + 1) * sizeof(char *)); 155 } 156 argv[argc++] = start; 157 start = NULL; 158 if (ch == '\n' || ch == '\0') 159 goto done; 160 continue; 161 case '\\': 162 switch (ch = *++p) { 163 case '\0': 164 case '\n': 165 /* hmmm; fix it up as best we can */ 166 ch = '\\'; 167 --p; 168 break; 169 case 'b': 170 ch = '\b'; 171 break; 172 case 'f': 173 ch = '\f'; 174 break; 175 case 'n': 176 ch = '\n'; 177 break; 178 case 'r': 179 ch = '\r'; 180 break; 181 case 't': 182 ch = '\t'; 183 break; 184 } 185 break; 186 } 187 if (!start) 188 start = t; 189 *t++ = ch; 190 } 191 done: 192 argv[argc] = NULL; 193 *store_argc = argc; 194 return argv; 195 } 196 197 198 const char * 199 iterate_words(end) 200 const char **end; 201 { 202 const char *start, *p; 203 char state = 0; 204 start = *end; 205 206 while (isspace(*start)) 207 start++; 208 if (*start == '\0') 209 return NULL; 210 211 for (p = start;; p++) 212 switch(*p) { 213 case '\\': 214 if (p[1] != '\0') 215 p++; 216 break; 217 case '\'': 218 case '"': 219 if (state == *p) 220 state = 0; 221 else if (state == 0) 222 state = *p; 223 break; 224 case ' ': 225 case '\t': 226 if (state != 0) 227 break; 228 /* FALLTHROUGH */ 229 case '\0': 230 *end = p; 231 return start; 232 default: 233 break; 234 } 235 } 236 237 bool 238 Str_Matchi(string, estring, pattern, end) 239 const char *string; /* String */ 240 const char *estring; /* End of string */ 241 const char *pattern; /* Pattern */ 242 const char *end; /* End of Pattern */ 243 { 244 while (pattern != end) { 245 /* Check for a "*" as the next pattern character. It matches 246 * any substring. We handle this by calling ourselves 247 * recursively for each postfix of string, until either we 248 * match or we reach the end of the string. */ 249 if (*pattern == '*') { 250 pattern++; 251 /* Skip over contiguous sequences of `?*', so that recursive 252 * calls only occur on `real' characters. */ 253 while (pattern != end && (*pattern == '?' || *pattern == '*')) { 254 if (*pattern == '?') { 255 if (string == estring) 256 return false; 257 else 258 string++; 259 } 260 pattern++; 261 } 262 if (pattern == end) 263 return true; 264 for (; string != estring; string++) 265 if (Str_Matchi(string, estring, pattern, end)) 266 return true; 267 return false; 268 } else if (string == estring) 269 return false; 270 /* Check for a "[" as the next pattern character. It is 271 * followed by a list of characters that are acceptable, or 272 * by a range (two characters separated by "-"). */ 273 else if (*pattern == '[') { 274 pattern++; 275 if (pattern == end) 276 return false; 277 if (*pattern == '!' || *pattern == '^') { 278 pattern++; 279 if (pattern == end) 280 return false; 281 /* Negative match */ 282 for (;;) { 283 if (*pattern == '\\') { 284 if (++pattern == end) 285 return false; 286 } 287 if (*pattern == *string) 288 return false; 289 if (pattern[1] == '-') { 290 if (pattern + 2 == end) 291 return false; 292 if (*pattern < *string && *string <= pattern[2]) 293 return false; 294 if (pattern[2] <= *string && *string < *pattern) 295 return false; 296 pattern += 3; 297 } else 298 pattern++; 299 if (pattern == end) 300 return false; 301 /* The test for ']' is done at the end so that ']' 302 * can be used at the start of the range without '\' */ 303 if (*pattern == ']') 304 break; 305 } 306 } else { 307 for (;;) { 308 if (*pattern == '\\') { 309 if (++pattern == end) 310 return false; 311 } 312 if (*pattern == *string) 313 break; 314 if (pattern[1] == '-') { 315 if (pattern + 2 == end) 316 return false; 317 if (*pattern < *string && *string <= pattern[2]) 318 break; 319 if (pattern[2] <= *string && *string < *pattern) 320 break; 321 pattern += 3; 322 } else 323 pattern++; 324 /* The test for ']' is done at the end so that ']' 325 * can be used at the start of the range without '\' */ 326 if (pattern == end || *pattern == ']') 327 return false; 328 } 329 /* Found matching character, skip over rest of class. */ 330 while (*pattern != ']') { 331 if (*pattern == '\\') 332 pattern++; 333 /* A non-terminated character class is ok. */ 334 if (pattern == end) 335 break; 336 pattern++; 337 } 338 } 339 } 340 /* '?' matches any single character, so shunt test. */ 341 else if (*pattern != '?') { 342 /* If the next pattern character is '\', just strip off the 343 * '\' so we do exact matching on the character that follows. */ 344 if (*pattern == '\\') { 345 if (++pattern == end) 346 return false; 347 } 348 /* There's no special character. Just make sure that 349 * the next characters of each string match. */ 350 if (*pattern != *string) 351 return false; 352 } 353 pattern++; 354 string++; 355 } 356 if (string == estring) 357 return true; 358 else 359 return false; 360 } 361 362 363 /*- 364 *----------------------------------------------------------------------- 365 * Str_SYSVMatch -- 366 * Check word against pattern for a match (% is wild), 367 * 368 * Results: 369 * Returns the beginning position of a match or null. The number 370 * of characters matched is returned in len. 371 *----------------------------------------------------------------------- 372 */ 373 const char * 374 Str_SYSVMatch(word, pattern, len) 375 const char *word; /* Word to examine */ 376 const char *pattern; /* Pattern to examine against */ 377 size_t *len; /* Number of characters to substitute */ 378 { 379 const char *p = pattern; 380 const char *w = word; 381 const char *m; 382 383 if (*p == '\0') { 384 /* Null pattern is the whole string. */ 385 *len = strlen(w); 386 return w; 387 } 388 389 if ((m = strchr(p, '%')) != NULL) { 390 /* Check that the prefix matches. */ 391 for (; p != m && *w && *w == *p; w++, p++) 392 continue; 393 394 if (p != m) 395 return NULL; /* No match. */ 396 397 if (*++p == '\0') { 398 /* No more pattern, return the rest of the string. */ 399 *len = strlen(w); 400 return w; 401 } 402 } 403 404 m = w; 405 406 /* Find a matching tail. */ 407 do { 408 if (strcmp(p, w) == 0) { 409 *len = w - m; 410 return m; 411 } 412 } while (*w++ != '\0'); 413 414 415 return NULL; 416 } 417 418 419 /*- 420 *----------------------------------------------------------------------- 421 * Str_SYSVSubst -- 422 * Substitute '%' on the pattern with len characters from src. 423 * If the pattern does not contain a '%' prepend len characters 424 * from src. 425 * 426 * Side Effects: 427 * Places result on buf 428 *----------------------------------------------------------------------- 429 */ 430 void 431 Str_SYSVSubst(buf, pat, src, len) 432 Buffer buf; 433 const char *pat; 434 const char *src; 435 size_t len; 436 { 437 const char *m; 438 439 if ((m = strchr(pat, '%')) != NULL) { 440 /* Copy the prefix. */ 441 Buf_Addi(buf, pat, m); 442 /* Skip the %. */ 443 pat = m + 1; 444 } 445 446 /* Copy the pattern. */ 447 Buf_AddChars(buf, len, src); 448 449 /* Append the rest. */ 450 Buf_AddString(buf, pat); 451 } 452 453 char * 454 Str_dupi(begin, end) 455 const char *begin; 456 const char *end; 457 { 458 char *s; 459 460 s = emalloc(end - begin + 1); 461 memcpy(s, begin, end - begin); 462 s[end-begin] = '\0'; 463 return s; 464 } 465 466 char * 467 escape_dupi(begin, end, set) 468 const char *begin; 469 const char *end; 470 const char *set; 471 { 472 char *s, *t; 473 474 t = s = emalloc(end - begin + 1); 475 while (begin != end) { 476 if (*begin == '\\') { 477 begin++; 478 if (begin == end) { 479 *t++ = '\\'; 480 break; 481 } 482 if (strchr(set, *begin) == NULL) 483 *t++ = '\\'; 484 } 485 *t++ = *begin++; 486 } 487 *t++ = '\0'; 488 return s; 489 } 490 491 char * 492 Str_rchri(s, e, c) 493 const char *s; 494 const char *e; 495 int c; 496 { 497 if (s != e) 498 do { 499 if (*--e == c) 500 return (char *)e; 501 } while (e != s); 502 return NULL; 503 } 504