1 /* $OpenBSD: util.c,v 1.17 2003/07/20 22:16:52 millert Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/stat.h> 31 32 #include <ctype.h> 33 #include <err.h> 34 #include <errno.h> 35 #include <fts.h> 36 #include <regex.h> 37 #include <stdio.h> 38 #include <stdlib.h> 39 #include <string.h> 40 #include <unistd.h> 41 #include <zlib.h> 42 43 #include "grep.h" 44 45 /* 46 * Process a file line by line... 47 */ 48 49 static int linesqueued; 50 static int procline(str_t *l, int); 51 static int grep_search(fastgrep_t *, unsigned char *, int, regmatch_t *pmatch); 52 static int grep_cmp(const unsigned char *, const unsigned char *, size_t); 53 static void grep_revstr(unsigned char *, int); 54 55 int 56 grep_tree(char **argv) 57 { 58 FTS *fts; 59 FTSENT *p; 60 int c, fts_flags; 61 62 c = fts_flags = 0; 63 64 if (Hflag) 65 fts_flags = FTS_COMFOLLOW; 66 if (Pflag) 67 fts_flags = FTS_PHYSICAL; 68 if (Sflag) 69 fts_flags = FTS_LOGICAL; 70 71 fts_flags |= FTS_NOSTAT | FTS_NOCHDIR; 72 73 if (!(fts = fts_open(argv, fts_flags, NULL))) 74 err(2, NULL); 75 while ((p = fts_read(fts)) != NULL) { 76 switch (p->fts_info) { 77 case FTS_DNR: 78 break; 79 case FTS_ERR: 80 errx(2, "%s: %s", p->fts_path, strerror(p->fts_errno)); 81 break; 82 case FTS_DP: 83 break; 84 default: 85 c += procfile(p->fts_path); 86 break; 87 } 88 } 89 90 return c; 91 } 92 93 int 94 procfile(char *fn) 95 { 96 str_t ln; 97 file_t *f; 98 int c, t, z, nottext; 99 100 if (fn == NULL) { 101 fn = "(standard input)"; 102 f = grep_fdopen(STDIN_FILENO, "r"); 103 } else { 104 f = grep_open(fn, "r"); 105 } 106 if (f == NULL) { 107 if (!sflag) 108 warn("%s", fn); 109 return 0; 110 } 111 112 nottext = grep_bin_file(f); 113 if (nottext && binbehave == BIN_FILE_SKIP) { 114 grep_close(f); 115 return 0; 116 } 117 118 ln.file = fn; 119 ln.line_no = 0; 120 linesqueued = 0; 121 ln.off = -1; 122 123 if (Bflag > 0) 124 initqueue(); 125 for (c = 0; !(lflag && c);) { 126 ln.off += ln.len + 1; 127 if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL) 128 break; 129 if (ln.len > 0 && ln.dat[ln.len - 1] == '\n') 130 --ln.len; 131 ln.line_no++; 132 133 z = tail; 134 135 if ((t = procline(&ln, nottext)) == 0 && Bflag > 0 && z == 0) { 136 enqueue(&ln); 137 linesqueued++; 138 } 139 c += t; 140 } 141 if (Bflag > 0) 142 clearqueue(); 143 grep_close(f); 144 145 if (cflag) { 146 if (!hflag) 147 printf("%s:", ln.file); 148 printf("%u\n", c); 149 } 150 if (lflag && c != 0) 151 printf("%s\n", fn); 152 if (Lflag && c == 0) 153 printf("%s\n", fn); 154 if (c && !cflag && !lflag && !Lflag && 155 binbehave == BIN_FILE_BIN && nottext && !qflag) 156 printf("Binary file %s matches\n", fn); 157 158 return c; 159 } 160 161 162 /* 163 * Process an individual line in a file. Return non-zero if it matches. 164 */ 165 166 #define isword(x) (isalnum(x) || (x) == '_') 167 168 static int 169 procline(str_t *l, int nottext) 170 { 171 regmatch_t pmatch; 172 int c, i, r; 173 174 if (matchall) { 175 c = !vflag; 176 goto print; 177 } 178 179 for (c = i = 0; i < patterns; i++) { 180 pmatch.rm_so = 0; 181 pmatch.rm_eo = l->len; 182 if (fg_pattern[i].pattern) 183 r = grep_search(&fg_pattern[i], (unsigned char *)l->dat, 184 l->len, &pmatch); 185 else 186 r = regexec(&r_pattern[i], l->dat, 1, &pmatch, eflags); 187 if (r == 0) { 188 if (wflag) { 189 if ((pmatch.rm_so != 0 && 190 isword(l->dat[pmatch.rm_so - 1])) || 191 (pmatch.rm_eo != l->len && 192 isword(l->dat[pmatch.rm_eo]))) 193 r = REG_NOMATCH; 194 } 195 if (xflag) { 196 if (pmatch.rm_so != 0 || pmatch.rm_eo != l->len) 197 r = REG_NOMATCH; 198 } 199 } 200 if (r == 0) { 201 c++; 202 break; 203 } 204 } 205 if (vflag) 206 c = !c; 207 208 print: 209 if (c && binbehave == BIN_FILE_BIN && nottext) 210 return c; /* Binary file */ 211 212 if ((tail > 0 || c) && !cflag && !qflag) { 213 if (c) { 214 if (first > 0 && tail == 0 && (Bflag < linesqueued) && 215 (Aflag || Bflag)) 216 printf("--\n"); 217 first = 1; 218 tail = Aflag; 219 if (Bflag > 0) 220 printqueue(); 221 linesqueued = 0; 222 printline(l, ':'); 223 } else { 224 printline(l, '-'); 225 tail--; 226 } 227 } 228 return c; 229 } 230 231 /* 232 * Returns: -1 on failure, 0 on success 233 */ 234 int 235 fastcomp(fastgrep_t *fg, const char *pattern) 236 { 237 int i; 238 int bol = 0; 239 int eol = 0; 240 int origPatternLen; 241 int shiftPatternLen; 242 int hasDot = 0; 243 int firstHalfDot = -1; 244 int firstLastHalfDot = -1; 245 int lastHalfDot = 0; 246 247 if (Fflag) { 248 fg->pattern = NULL; 249 return (-1); 250 } 251 252 /* Initialize. */ 253 origPatternLen = fg->patternLen = strlen(pattern); 254 fg->bol = 0; 255 fg->eol = 0; 256 fg->reversedSearch = 0; 257 258 /* Remove end-of-line character ('$'). */ 259 if (pattern[fg->patternLen - 1] == '$') { 260 eol++; 261 fg->eol = 1; 262 fg->patternLen--; 263 boleol = 1; 264 } 265 266 /* Remove beginning-of-line character ('^'). */ 267 if (pattern[0] == '^') { 268 bol++; 269 fg->bol = 1; 270 fg->patternLen--; 271 boleol = 1; 272 } 273 274 /* 275 * Copy pattern minus '^' and '$' characters at the beginning and 276 * ending of the string respectively. 277 */ 278 fg->pattern = grep_strdup(pattern + bol); 279 280 /* Look for ways to cheat...er...avoid the full regex engine. */ 281 for (i = 0; i < fg->patternLen; i++) 282 { 283 /* Can still cheat? */ 284 if ((isalnum(fg->pattern[i])) || isspace(fg->pattern[i]) || 285 (fg->pattern[i] == '_') || (fg->pattern[i] == ',') || 286 (fg->pattern[i] == '^') || (fg->pattern[i] == '$') || 287 (fg->pattern[i] == '=') || (fg->pattern[i] == '-') || 288 (fg->pattern[i] == ':') || (fg->pattern[i] == '/')) { 289 /* As long as it is good, upper case it for later. */ 290 if (iflag) 291 fg->pattern[i] = toupper(fg->pattern[i]); 292 } else if (fg->pattern[i] == '.') { 293 hasDot = i; 294 if (i < fg->patternLen / 2) { 295 if (firstHalfDot < -1) 296 /* Closest dot to the beginning */ 297 firstHalfDot = i; 298 } else { 299 /* Closest dot to the end of the pattern. */ 300 lastHalfDot = i; 301 if (firstLastHalfDot < 0) 302 firstLastHalfDot = i; 303 } 304 } else { 305 /* Free memory and let others know this is empty. */ 306 free(fg->pattern); 307 fg->pattern = NULL; 308 return (-1); 309 } 310 } 311 312 /* 313 * Determine if a reverse search would be faster based on the placement 314 * of the dots. 315 */ 316 if ((!(lflag || cflag)) && ((!(bol || eol)) && 317 ((lastHalfDot) && ((firstHalfDot < 0) || 318 ((fg->patternLen - (lastHalfDot + 1)) < firstHalfDot))))) { 319 fg->reversedSearch = 1; 320 hasDot = fg->patternLen - (firstHalfDot < 0 ? 321 firstLastHalfDot : firstHalfDot) - 1; 322 grep_revstr(fg->pattern, fg->patternLen); 323 } 324 325 /* 326 * Normal Quick Search would require a shift based on the position the 327 * next character after the comparison is within the pattern. With 328 * wildcards, the position of the last dot effects the maximum shift 329 * distance. 330 * The closer to the end the wild card is the slower the search. A 331 * reverse version of this algorithm would be useful for wildcards near 332 * the end of the string. 333 * 334 * Examples: 335 * Pattern Max shift 336 * ------- --------- 337 * this 5 338 * .his 4 339 * t.is 3 340 * th.s 2 341 * thi. 1 342 */ 343 344 /* Adjust the shift based on location of the last dot ('.'). */ 345 shiftPatternLen = fg->patternLen - hasDot; 346 347 /* Preprocess pattern. */ 348 for (i = 0; i <= UCHAR_MAX; i++) 349 fg->qsBc[i] = shiftPatternLen; 350 for (i = hasDot + 1; i < fg->patternLen; i++) { 351 fg->qsBc[fg->pattern[i]] = fg->patternLen - i; 352 /* 353 * If case is ignored, make the jump apply to both upper and 354 * lower cased characters. As the pattern is stored in upper 355 * case, apply the same to the lower case equivalents. 356 */ 357 if (iflag) 358 fg->qsBc[tolower(fg->pattern[i])] = fg->patternLen - i; 359 } 360 361 /* 362 * Put pattern back to normal after pre-processing to allow for easy 363 * comparisons later. 364 */ 365 if (fg->reversedSearch) 366 grep_revstr(fg->pattern, fg->patternLen); 367 368 return (0); 369 } 370 371 static int 372 grep_search(fastgrep_t *fg, unsigned char *data, int dataLen, regmatch_t *pmatch) 373 { 374 int j; 375 int rtrnVal = REG_NOMATCH; 376 377 pmatch->rm_so = -1; 378 pmatch->rm_eo = -1; 379 380 /* No point in going farther if we do not have enough data. */ 381 if (dataLen < fg->patternLen) 382 return (rtrnVal); 383 384 /* Only try once at the beginning or ending of the line. */ 385 if (fg->bol || fg->eol) { 386 /* Simple text comparison. */ 387 /* Verify data is >= pattern length before searching on it. */ 388 if (dataLen >= fg->patternLen) { 389 /* Determine where in data to start search at. */ 390 if (fg->eol) 391 j = dataLen - fg->patternLen; 392 else 393 j = 0; 394 if (!((fg->bol && fg->eol) && (dataLen != fg->patternLen))) 395 if (grep_cmp(fg->pattern, data + j, fg->patternLen) == -1) { 396 rtrnVal = 0; 397 pmatch->rm_so = j; 398 pmatch->rm_eo = j + fg->patternLen; 399 } 400 } 401 } else if (fg->reversedSearch) { 402 /* Quick Search algorithm. */ 403 j = dataLen; 404 do { 405 if (grep_cmp(fg->pattern, data + j - fg->patternLen, 406 fg->patternLen) == -1) { 407 rtrnVal = 0; 408 pmatch->rm_so = j - fg->patternLen; 409 pmatch->rm_eo = j; 410 break; 411 } 412 /* Shift if within bounds, otherwise, we are done. */ 413 if (j == fg->patternLen) 414 break; 415 j -= fg->qsBc[data[j - fg->patternLen - 1]]; 416 } while (j >= fg->patternLen); 417 } else { 418 /* Quick Search algorithm. */ 419 j = 0; 420 do { 421 if (grep_cmp(fg->pattern, data + j, fg->patternLen) == -1) { 422 rtrnVal = 0; 423 pmatch->rm_so = j; 424 pmatch->rm_eo = j + fg->patternLen; 425 break; 426 } 427 428 /* Shift if within bounds, otherwise, we are done. */ 429 if (j + fg->patternLen == dataLen) 430 break; 431 else 432 j += fg->qsBc[data[j + fg->patternLen]]; 433 } while (j <= (dataLen - fg->patternLen)); 434 } 435 436 return (rtrnVal); 437 } 438 439 440 void * 441 grep_malloc(size_t size) 442 { 443 void *ptr; 444 445 if ((ptr = malloc(size)) == NULL) 446 err(2, "malloc"); 447 return ptr; 448 } 449 450 void * 451 grep_realloc(void *ptr, size_t size) 452 { 453 if ((ptr = realloc(ptr, size)) == NULL) 454 err(2, "realloc"); 455 return ptr; 456 } 457 458 unsigned char * 459 grep_strdup(const char *str) 460 { 461 unsigned char *ptr; 462 463 if ((ptr = (unsigned char *)strdup(str)) == NULL) 464 err(2, "strdup"); 465 return ptr; 466 } 467 468 /* 469 * Returns: i >= 0 on failure (position that it failed) 470 * -1 on success 471 */ 472 int 473 grep_cmp(const unsigned char *pattern, const unsigned char *data, size_t len) 474 { 475 int i; 476 477 for (i = 0; i < len; i++) { 478 if (((pattern[i] == data[i]) || (pattern[i] == '.')) || 479 (iflag && pattern[i] == toupper(data[i]))) 480 continue; 481 return (i); 482 } 483 484 return (-1); 485 } 486 487 static void 488 grep_revstr(unsigned char *str, int len) 489 { 490 int i; 491 char c; 492 493 for (i = 0; i < len / 2; i++) { 494 c = str[i]; 495 str[i] = str[len - i - 1]; 496 str[len - i - 1] = c; 497 } 498 } 499 500 void 501 printline(str_t *line, int sep) 502 { 503 int n; 504 505 n = 0; 506 if (!hflag) { 507 fputs(line->file, stdout); 508 ++n; 509 } 510 if (nflag) { 511 if (n) 512 putchar(sep); 513 printf("%d", line->line_no); 514 ++n; 515 } 516 if (bflag) { 517 if (n) 518 putchar(sep); 519 printf("%lu", (unsigned long)line->off); 520 } 521 if (n) 522 putchar(sep); 523 fwrite(line->dat, line->len, 1, stdout); 524 putchar('\n'); 525 } 526