1*19f8bc08Sotto /* $OpenBSD: util.c,v 1.55 2016/04/04 05:49:47 otto Exp $ */ 2fd6e2b5bSderaadt 3fe07e37bSderaadt /*- 4fe07e37bSderaadt * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav 5fe07e37bSderaadt * All rights reserved. 6fe07e37bSderaadt * 7fe07e37bSderaadt * Redistribution and use in source and binary forms, with or without 8fe07e37bSderaadt * modification, are permitted provided that the following conditions 9fe07e37bSderaadt * are met: 10fe07e37bSderaadt * 1. Redistributions of source code must retain the above copyright 11fe07e37bSderaadt * notice, this list of conditions and the following disclaimer. 12fe07e37bSderaadt * 2. Redistributions in binary form must reproduce the above copyright 13fe07e37bSderaadt * notice, this list of conditions and the following disclaimer in the 14fe07e37bSderaadt * documentation and/or other materials provided with the distribution. 15fe07e37bSderaadt * 16fe07e37bSderaadt * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17fe07e37bSderaadt * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18fe07e37bSderaadt * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19fe07e37bSderaadt * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20fe07e37bSderaadt * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21fe07e37bSderaadt * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22fe07e37bSderaadt * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23fe07e37bSderaadt * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24fe07e37bSderaadt * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25fe07e37bSderaadt * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26fe07e37bSderaadt * SUCH DAMAGE. 27fe07e37bSderaadt */ 28fe07e37bSderaadt 29fe07e37bSderaadt #include <sys/types.h> 30fe07e37bSderaadt #include <sys/stat.h> 31fe07e37bSderaadt 32fe07e37bSderaadt #include <ctype.h> 33fe07e37bSderaadt #include <err.h> 34fe07e37bSderaadt #include <errno.h> 35fe07e37bSderaadt #include <fts.h> 36fe07e37bSderaadt #include <regex.h> 3757c37fdaSmillert #include <stdbool.h> 38fe07e37bSderaadt #include <stdio.h> 39fe07e37bSderaadt #include <stdlib.h> 40fe07e37bSderaadt #include <string.h> 41fe07e37bSderaadt #include <unistd.h> 42fe07e37bSderaadt #include <zlib.h> 43fe07e37bSderaadt 44fe07e37bSderaadt #include "grep.h" 45fe07e37bSderaadt 46fe07e37bSderaadt /* 47fe07e37bSderaadt * Process a file line by line... 48fe07e37bSderaadt */ 49fe07e37bSderaadt 50fe07e37bSderaadt static int linesqueued; 51f057df86Stedu static int procline(str_t *l, int); 5214774268Stedu static int grep_search(fastgrep_t *, char *, size_t, regmatch_t *pmatch); 53f56cb323Stedu #ifndef SMALL 5457c37fdaSmillert static bool grep_cmp(const char *, const char *, size_t); 5570c36c4eStedu static void grep_revstr(unsigned char *, int); 56f56cb323Stedu #endif 57fe07e37bSderaadt 58fe07e37bSderaadt int 59fe07e37bSderaadt grep_tree(char **argv) 60fe07e37bSderaadt { 61fe07e37bSderaadt FTS *fts; 62fe07e37bSderaadt FTSENT *p; 63fe07e37bSderaadt int c, fts_flags; 64fe07e37bSderaadt 6540155c30Stedu c = 0; 66fe07e37bSderaadt 6740155c30Stedu fts_flags = FTS_PHYSICAL | FTS_NOSTAT | FTS_NOCHDIR; 68fe07e37bSderaadt 690536143aSmillert if (!(fts = fts_open(argv, fts_flags, NULL))) 70f91b5226Smillert err(2, NULL); 71fe07e37bSderaadt while ((p = fts_read(fts)) != NULL) { 72fe07e37bSderaadt switch (p->fts_info) { 73fe07e37bSderaadt case FTS_DNR: 74fe07e37bSderaadt break; 75fe07e37bSderaadt case FTS_ERR: 7620351096Smillert file_err = 1; 7720351096Smillert if(!sflag) 785ad04d35Sguenther warnc(p->fts_errno, "%s", p->fts_path); 79fe07e37bSderaadt break; 80fe07e37bSderaadt case FTS_DP: 81fe07e37bSderaadt break; 82fe07e37bSderaadt default: 83fe07e37bSderaadt c += procfile(p->fts_path); 84fe07e37bSderaadt break; 85fe07e37bSderaadt } 86fe07e37bSderaadt } 874760cb14Sotto if (errno) 884760cb14Sotto err(2, "fts_read"); 89f12e5d26Suebayasi fts_close(fts); 90fe07e37bSderaadt return c; 91fe07e37bSderaadt } 92fe07e37bSderaadt 93fe07e37bSderaadt int 94fe07e37bSderaadt procfile(char *fn) 95fe07e37bSderaadt { 96fe07e37bSderaadt str_t ln; 97fe07e37bSderaadt file_t *f; 98f057df86Stedu int c, t, z, nottext; 99fe07e37bSderaadt 100fe07e37bSderaadt if (fn == NULL) { 101fe07e37bSderaadt fn = "(standard input)"; 102fe07e37bSderaadt f = grep_fdopen(STDIN_FILENO, "r"); 103fe07e37bSderaadt } else { 104fe07e37bSderaadt f = grep_open(fn, "r"); 105fe07e37bSderaadt } 106fe07e37bSderaadt if (f == NULL) { 10720351096Smillert file_err = 1; 108fe07e37bSderaadt if (!sflag) 109fe07e37bSderaadt warn("%s", fn); 110fe07e37bSderaadt return 0; 111fe07e37bSderaadt } 112f057df86Stedu 113f057df86Stedu nottext = grep_bin_file(f); 114f057df86Stedu if (nottext && binbehave == BIN_FILE_SKIP) { 115fe07e37bSderaadt grep_close(f); 116fe07e37bSderaadt return 0; 117fe07e37bSderaadt } 118fe07e37bSderaadt 119fe07e37bSderaadt ln.file = fn; 120fe07e37bSderaadt ln.line_no = 0; 1211d5054b0Sespie ln.len = 0; 122fe07e37bSderaadt linesqueued = 0; 1232eb140a3Sjaredy tail = 0; 124fe07e37bSderaadt ln.off = -1; 125fe07e37bSderaadt 126fe07e37bSderaadt if (Bflag > 0) 127fe07e37bSderaadt initqueue(); 12899210d50Sotto for (c = 0; c == 0 || !(lflag || qflag); ) { 129fe07e37bSderaadt ln.off += ln.len + 1; 130fe07e37bSderaadt if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL) 131fe07e37bSderaadt break; 132fe07e37bSderaadt if (ln.len > 0 && ln.dat[ln.len - 1] == '\n') 133fe07e37bSderaadt --ln.len; 134fe07e37bSderaadt ln.line_no++; 135fe07e37bSderaadt 136fe07e37bSderaadt z = tail; 137fe07e37bSderaadt 138f057df86Stedu if ((t = procline(&ln, nottext)) == 0 && Bflag > 0 && z == 0) { 139fe07e37bSderaadt enqueue(&ln); 140fe07e37bSderaadt linesqueued++; 141fe07e37bSderaadt } 142fe07e37bSderaadt c += t; 143fe07e37bSderaadt } 144fe07e37bSderaadt if (Bflag > 0) 145fe07e37bSderaadt clearqueue(); 146fe07e37bSderaadt grep_close(f); 147fe07e37bSderaadt 148fe07e37bSderaadt if (cflag) { 149fe07e37bSderaadt if (!hflag) 150fe07e37bSderaadt printf("%s:", ln.file); 151fe07e37bSderaadt printf("%u\n", c); 152fe07e37bSderaadt } 153fe07e37bSderaadt if (lflag && c != 0) 154fe07e37bSderaadt printf("%s\n", fn); 155fe07e37bSderaadt if (Lflag && c == 0) 156fe07e37bSderaadt printf("%s\n", fn); 157f057df86Stedu if (c && !cflag && !lflag && !Lflag && 1580c0c855bStedu binbehave == BIN_FILE_BIN && nottext && !qflag) 159f057df86Stedu printf("Binary file %s matches\n", fn); 160f057df86Stedu 161fe07e37bSderaadt return c; 162fe07e37bSderaadt } 163fe07e37bSderaadt 164fe07e37bSderaadt 165fe07e37bSderaadt /* 166fe07e37bSderaadt * Process an individual line in a file. Return non-zero if it matches. 167fe07e37bSderaadt */ 168fe07e37bSderaadt 1696cd4fad2Sderaadt #define isword(x) (isalnum((unsigned char)x) || (x) == '_') 170fe07e37bSderaadt 171fe07e37bSderaadt static int 172f057df86Stedu procline(str_t *l, int nottext) 173fe07e37bSderaadt { 174fe07e37bSderaadt regmatch_t pmatch; 175c85fc9dcSdhartmei int c, i, r; 176f7ac433eSaschrijver regoff_t offset; 177f7ac433eSaschrijver 178f7ac433eSaschrijver /* size_t will be converted to regoff_t. ssize_t is guaranteed to fit 179f7ac433eSaschrijver * into regoff_t */ 180f7ac433eSaschrijver if (l->len > SSIZE_MAX) { 181f7ac433eSaschrijver errx(2, "Line is too big to process"); 182f7ac433eSaschrijver } 183fe07e37bSderaadt 18414774268Stedu c = 0; 18514774268Stedu i = 0; 186fe07e37bSderaadt if (matchall) { 1871b60208bStedu c = 1; 188fe07e37bSderaadt goto print; 189fe07e37bSderaadt } 190fe07e37bSderaadt 19114774268Stedu for (i = 0; i < patterns; i++) { 19214774268Stedu offset = 0; 19314774268Stedu redo: 1940c9d1b5fSmillert if (fg_pattern[i].pattern) { 19514774268Stedu r = grep_search(&fg_pattern[i], l->dat + offset, 19614774268Stedu l->len - offset, &pmatch); 19714774268Stedu pmatch.rm_so += offset; 19814774268Stedu pmatch.rm_eo += offset; 1990c9d1b5fSmillert } else { 20014774268Stedu pmatch.rm_so = offset; 201cbb0f937Sotto pmatch.rm_eo = l->len; 202674ee822Smillert r = regexec(&r_pattern[i], l->dat, 1, &pmatch, eflags); 203fe07e37bSderaadt } 2040c9d1b5fSmillert if (r == 0 && xflag) { 205fe07e37bSderaadt if (pmatch.rm_so != 0 || pmatch.rm_eo != l->len) 206fe07e37bSderaadt r = REG_NOMATCH; 207fe07e37bSderaadt } 208c85fc9dcSdhartmei if (r == 0) { 20914774268Stedu c = 1; 21031a151d0Smillert if (oflag && pmatch.rm_so != pmatch.rm_eo) 21114774268Stedu goto print; 212fe07e37bSderaadt break; 213fe07e37bSderaadt } 214fe07e37bSderaadt } 21514774268Stedu if (oflag) 21614774268Stedu return c; 21714774268Stedu print: 218c85fc9dcSdhartmei if (vflag) 219c85fc9dcSdhartmei c = !c; 220fe07e37bSderaadt 221f057df86Stedu if (c && binbehave == BIN_FILE_BIN && nottext) 222f057df86Stedu return c; /* Binary file */ 223f057df86Stedu 224fe07e37bSderaadt if ((tail > 0 || c) && !cflag && !qflag) { 225fe07e37bSderaadt if (c) { 226621edca7Sderaadt if (first > 0 && tail == 0 && (Bflag < linesqueued) && 227621edca7Sderaadt (Aflag || Bflag)) 228fe07e37bSderaadt printf("--\n"); 229fe07e37bSderaadt first = 1; 230fe07e37bSderaadt tail = Aflag; 231fe07e37bSderaadt if (Bflag > 0) 232fe07e37bSderaadt printqueue(); 233fe07e37bSderaadt linesqueued = 0; 23414774268Stedu printline(l, ':', oflag ? &pmatch : NULL); 235fe07e37bSderaadt } else { 23614774268Stedu printline(l, '-', oflag ? &pmatch : NULL); 237fe07e37bSderaadt tail--; 238fe07e37bSderaadt } 239fe07e37bSderaadt } 24014774268Stedu if (oflag && !matchall) { 24114774268Stedu offset = pmatch.rm_eo; 24214774268Stedu goto redo; 24314774268Stedu } 244fe07e37bSderaadt return c; 245fe07e37bSderaadt } 246fe07e37bSderaadt 247f56cb323Stedu #ifndef SMALL 2485c151985Sotto void 2496cd4fad2Sderaadt fgrepcomp(fastgrep_t *fg, const unsigned char *pattern) 250acf867c4Smillert { 251acf867c4Smillert int i; 252acf867c4Smillert 253acf867c4Smillert /* Initialize. */ 254acf867c4Smillert fg->patternLen = strlen(pattern); 255acf867c4Smillert fg->bol = 0; 256acf867c4Smillert fg->eol = 0; 257acf867c4Smillert fg->wmatch = wflag; 258acf867c4Smillert fg->reversedSearch = 0; 259acf867c4Smillert 260acf867c4Smillert /* 261acf867c4Smillert * Make a copy and upper case it for later if in -i mode, 262acf867c4Smillert * else just copy the pointer. 263acf867c4Smillert */ 264acf867c4Smillert if (iflag) { 265acf867c4Smillert fg->pattern = grep_malloc(fg->patternLen + 1); 266acf867c4Smillert for (i = 0; i < fg->patternLen; i++) 267acf867c4Smillert fg->pattern[i] = toupper(pattern[i]); 268acf867c4Smillert fg->pattern[fg->patternLen] = '\0'; 269acf867c4Smillert } else 270aeda29f3Sderaadt fg->pattern = (unsigned char *)pattern; /* really const */ 271acf867c4Smillert 272acf867c4Smillert /* Preprocess pattern. */ 273acf867c4Smillert for (i = 0; i <= UCHAR_MAX; i++) 274acf867c4Smillert fg->qsBc[i] = fg->patternLen; 275acf867c4Smillert for (i = 1; i < fg->patternLen; i++) { 276acf867c4Smillert fg->qsBc[fg->pattern[i]] = fg->patternLen - i; 277acf867c4Smillert /* 278acf867c4Smillert * If case is ignored, make the jump apply to both upper and 279acf867c4Smillert * lower cased characters. As the pattern is stored in upper 280acf867c4Smillert * case, apply the same to the lower case equivalents. 281acf867c4Smillert */ 282acf867c4Smillert if (iflag) 283acf867c4Smillert fg->qsBc[tolower(fg->pattern[i])] = fg->patternLen - i; 284acf867c4Smillert } 285acf867c4Smillert } 286f56cb323Stedu #endif 287acf867c4Smillert 288acf867c4Smillert /* 289acf867c4Smillert * Returns: -1 on failure, 0 on success 290acf867c4Smillert */ 291acf867c4Smillert int 29270c36c4eStedu fastcomp(fastgrep_t *fg, const char *pattern) 29370c36c4eStedu { 294f56cb323Stedu #ifdef SMALL 295f56cb323Stedu return -1; 296f56cb323Stedu #else 29770c36c4eStedu int i; 29870c36c4eStedu int bol = 0; 29970c36c4eStedu int eol = 0; 30070c36c4eStedu int shiftPatternLen; 30170c36c4eStedu int hasDot = 0; 30270c36c4eStedu int firstHalfDot = -1; 30370c36c4eStedu int firstLastHalfDot = -1; 30470c36c4eStedu int lastHalfDot = 0; 30570c36c4eStedu 30670c36c4eStedu /* Initialize. */ 307aeda29f3Sderaadt fg->patternLen = strlen(pattern); 30870c36c4eStedu fg->bol = 0; 30970c36c4eStedu fg->eol = 0; 3100c9d1b5fSmillert fg->wmatch = 0; 31170c36c4eStedu fg->reversedSearch = 0; 31270c36c4eStedu 31370c36c4eStedu /* Remove end-of-line character ('$'). */ 31451098fefSeric if (fg->patternLen > 0 && pattern[fg->patternLen - 1] == '$') { 31570c36c4eStedu eol++; 31670c36c4eStedu fg->eol = 1; 31770c36c4eStedu fg->patternLen--; 31870c36c4eStedu } 31970c36c4eStedu 32070c36c4eStedu /* Remove beginning-of-line character ('^'). */ 32170c36c4eStedu if (pattern[0] == '^') { 32270c36c4eStedu bol++; 32370c36c4eStedu fg->bol = 1; 32470c36c4eStedu fg->patternLen--; 32570c36c4eStedu } 32670c36c4eStedu 3270c9d1b5fSmillert /* Remove enclosing [[:<:]] and [[:>:]] (word match). */ 32830c8a262Sotto if (wflag) { 32930c8a262Sotto /* basic re's use \( \), extended re's ( ) */ 33030c8a262Sotto int extra = Eflag ? 1 : 2; 33130c8a262Sotto fg->patternLen -= 14 + 2 * extra; 33230c8a262Sotto fg->wmatch = 7 + extra; 33330c8a262Sotto } else if (fg->patternLen >= 14 && 3340c9d1b5fSmillert strncmp(pattern + fg->bol, "[[:<:]]", 7) == 0 && 335394a0db6Smillert strncmp(pattern + fg->bol + fg->patternLen - 7, "[[:>:]]", 7) == 0) { 3360c9d1b5fSmillert fg->patternLen -= 14; 3370c9d1b5fSmillert fg->wmatch = 7; 3380c9d1b5fSmillert } 3390c9d1b5fSmillert 34070c36c4eStedu /* 3410c9d1b5fSmillert * Copy pattern minus '^' and '$' characters as well as word 3420c9d1b5fSmillert * match character classes at the beginning and ending of the 3430c9d1b5fSmillert * string respectively. 34470c36c4eStedu */ 3450c9d1b5fSmillert fg->pattern = grep_malloc(fg->patternLen + 1); 3460c9d1b5fSmillert memcpy(fg->pattern, pattern + bol + fg->wmatch, fg->patternLen); 3470c9d1b5fSmillert fg->pattern[fg->patternLen] = '\0'; 34870c36c4eStedu 34970c36c4eStedu /* Look for ways to cheat...er...avoid the full regex engine. */ 35070c36c4eStedu for (i = 0; i < fg->patternLen; i++) 35170c36c4eStedu { 352bcaf0f1eStedu switch (fg->pattern[i]) { 353bcaf0f1eStedu case '.': 35470c36c4eStedu hasDot = i; 35570c36c4eStedu if (i < fg->patternLen / 2) { 35603c8a690Sotto if (firstHalfDot < 0) 35770c36c4eStedu /* Closest dot to the beginning */ 35870c36c4eStedu firstHalfDot = i; 35970c36c4eStedu } else { 36070c36c4eStedu /* Closest dot to the end of the pattern. */ 36170c36c4eStedu lastHalfDot = i; 36270c36c4eStedu if (firstLastHalfDot < 0) 36370c36c4eStedu firstLastHalfDot = i; 36470c36c4eStedu } 365bcaf0f1eStedu break; 366bcaf0f1eStedu case '(': case ')': 367bcaf0f1eStedu case '{': case '}': 368bcaf0f1eStedu /* Special in BRE if preceded by '\\' */ 369bcaf0f1eStedu case '?': 370bcaf0f1eStedu case '+': 371bcaf0f1eStedu case '|': 372bcaf0f1eStedu /* Not special in BRE. */ 373bcaf0f1eStedu if (!Eflag) 374bcaf0f1eStedu goto nonspecial; 375bcaf0f1eStedu case '\\': 376bcaf0f1eStedu case '*': 377bcaf0f1eStedu case '[': case ']': 37870c36c4eStedu /* Free memory and let others know this is empty. */ 37970c36c4eStedu free(fg->pattern); 38070c36c4eStedu fg->pattern = NULL; 38170c36c4eStedu return (-1); 382bcaf0f1eStedu default: 383bcaf0f1eStedu nonspecial: 384bcaf0f1eStedu if (iflag) 385bcaf0f1eStedu fg->pattern[i] = toupper(fg->pattern[i]); 386bcaf0f1eStedu break; 38770c36c4eStedu } 38870c36c4eStedu } 38970c36c4eStedu 39070c36c4eStedu /* 39170c36c4eStedu * Determine if a reverse search would be faster based on the placement 39270c36c4eStedu * of the dots. 39370c36c4eStedu */ 394*19f8bc08Sotto if ((!(lflag || cflag || oflag)) && ((!(bol || eol)) && 39570c36c4eStedu ((lastHalfDot) && ((firstHalfDot < 0) || 39670c36c4eStedu ((fg->patternLen - (lastHalfDot + 1)) < firstHalfDot))))) { 39770c36c4eStedu fg->reversedSearch = 1; 39870c36c4eStedu hasDot = fg->patternLen - (firstHalfDot < 0 ? 39970c36c4eStedu firstLastHalfDot : firstHalfDot) - 1; 40070c36c4eStedu grep_revstr(fg->pattern, fg->patternLen); 40170c36c4eStedu } 40270c36c4eStedu 40370c36c4eStedu /* 40470c36c4eStedu * Normal Quick Search would require a shift based on the position the 40570c36c4eStedu * next character after the comparison is within the pattern. With 40670c36c4eStedu * wildcards, the position of the last dot effects the maximum shift 40770c36c4eStedu * distance. 40870c36c4eStedu * The closer to the end the wild card is the slower the search. A 40970c36c4eStedu * reverse version of this algorithm would be useful for wildcards near 41070c36c4eStedu * the end of the string. 41170c36c4eStedu * 41270c36c4eStedu * Examples: 41370c36c4eStedu * Pattern Max shift 41470c36c4eStedu * ------- --------- 41570c36c4eStedu * this 5 41670c36c4eStedu * .his 4 41770c36c4eStedu * t.is 3 41870c36c4eStedu * th.s 2 41970c36c4eStedu * thi. 1 42070c36c4eStedu */ 42170c36c4eStedu 42270c36c4eStedu /* Adjust the shift based on location of the last dot ('.'). */ 42370c36c4eStedu shiftPatternLen = fg->patternLen - hasDot; 42470c36c4eStedu 42570c36c4eStedu /* Preprocess pattern. */ 42670c36c4eStedu for (i = 0; i <= UCHAR_MAX; i++) 42770c36c4eStedu fg->qsBc[i] = shiftPatternLen; 42870c36c4eStedu for (i = hasDot + 1; i < fg->patternLen; i++) { 42970c36c4eStedu fg->qsBc[fg->pattern[i]] = fg->patternLen - i; 43070c36c4eStedu /* 43170c36c4eStedu * If case is ignored, make the jump apply to both upper and 43270c36c4eStedu * lower cased characters. As the pattern is stored in upper 43370c36c4eStedu * case, apply the same to the lower case equivalents. 43470c36c4eStedu */ 43570c36c4eStedu if (iflag) 43670c36c4eStedu fg->qsBc[tolower(fg->pattern[i])] = fg->patternLen - i; 43770c36c4eStedu } 43870c36c4eStedu 43970c36c4eStedu /* 44070c36c4eStedu * Put pattern back to normal after pre-processing to allow for easy 44170c36c4eStedu * comparisons later. 44270c36c4eStedu */ 44370c36c4eStedu if (fg->reversedSearch) 44470c36c4eStedu grep_revstr(fg->pattern, fg->patternLen); 44570c36c4eStedu 44670c36c4eStedu return (0); 447f56cb323Stedu #endif 44870c36c4eStedu } 44970c36c4eStedu 450809cad80Sotto /* 451809cad80Sotto * Word boundaries using regular expressions are defined as the point 452809cad80Sotto * of transition from a non-word char to a word char, or vice versa. 453809cad80Sotto * This means that grep -w +a and grep -w a+ never match anything, 454809cad80Sotto * because they lack a starting or ending transition, but grep -w a+b 455809cad80Sotto * does match a line containing a+b. 456809cad80Sotto */ 4570c9d1b5fSmillert #define wmatch(d, l, s, e) \ 458809cad80Sotto ((s == 0 || !isword(d[s-1])) && (e == l || !isword(d[e])) && \ 459809cad80Sotto e > s && isword(d[s]) && isword(d[e-1])) 4600c9d1b5fSmillert 461674ee822Smillert static int 46214774268Stedu grep_search(fastgrep_t *fg, char *data, size_t dataLen, regmatch_t *pmatch) 46370c36c4eStedu { 464f56cb323Stedu #ifdef SMALL 465f56cb323Stedu return 0; 466f56cb323Stedu #else 467f7ac433eSaschrijver regoff_t j; 46870c36c4eStedu int rtrnVal = REG_NOMATCH; 46970c36c4eStedu 470674ee822Smillert pmatch->rm_so = -1; 471674ee822Smillert pmatch->rm_eo = -1; 472674ee822Smillert 47370c36c4eStedu /* No point in going farther if we do not have enough data. */ 47470c36c4eStedu if (dataLen < fg->patternLen) 47570c36c4eStedu return (rtrnVal); 47670c36c4eStedu 47770c36c4eStedu /* Only try once at the beginning or ending of the line. */ 47870c36c4eStedu if (fg->bol || fg->eol) { 47970c36c4eStedu /* Simple text comparison. */ 48070c36c4eStedu /* Verify data is >= pattern length before searching on it. */ 48170c36c4eStedu if (dataLen >= fg->patternLen) { 48270c36c4eStedu /* Determine where in data to start search at. */ 48370c36c4eStedu if (fg->eol) 48470c36c4eStedu j = dataLen - fg->patternLen; 48570c36c4eStedu else 48670c36c4eStedu j = 0; 48770c36c4eStedu if (!((fg->bol && fg->eol) && (dataLen != fg->patternLen))) 4880c9d1b5fSmillert if (grep_cmp(fg->pattern, data + j, 48957c37fdaSmillert fg->patternLen)) { 490674ee822Smillert pmatch->rm_so = j; 491674ee822Smillert pmatch->rm_eo = j + fg->patternLen; 4920c9d1b5fSmillert if (!fg->wmatch || wmatch(data, dataLen, 4930c9d1b5fSmillert pmatch->rm_so, pmatch->rm_eo)) 4940c9d1b5fSmillert rtrnVal = 0; 495674ee822Smillert } 49670c36c4eStedu } 49770c36c4eStedu } else if (fg->reversedSearch) { 49870c36c4eStedu /* Quick Search algorithm. */ 49988410190Smillert j = dataLen; 50088410190Smillert do { 50170c36c4eStedu if (grep_cmp(fg->pattern, data + j - fg->patternLen, 50257c37fdaSmillert fg->patternLen)) { 503674ee822Smillert pmatch->rm_so = j - fg->patternLen; 504674ee822Smillert pmatch->rm_eo = j; 5050c9d1b5fSmillert if (!fg->wmatch || wmatch(data, dataLen, 5060c9d1b5fSmillert pmatch->rm_so, pmatch->rm_eo)) { 5070c9d1b5fSmillert rtrnVal = 0; 50870c36c4eStedu break; 50970c36c4eStedu } 5100c9d1b5fSmillert } 51188410190Smillert /* Shift if within bounds, otherwise, we are done. */ 51288410190Smillert if (j == fg->patternLen) 51388410190Smillert break; 51414774268Stedu j -= fg->qsBc[(unsigned char)data[j - fg->patternLen - 1]]; 51588410190Smillert } while (j >= fg->patternLen); 51670c36c4eStedu } else { 51770c36c4eStedu /* Quick Search algorithm. */ 51870c36c4eStedu j = 0; 51970c36c4eStedu do { 52057c37fdaSmillert if (grep_cmp(fg->pattern, data + j, fg->patternLen)) { 521674ee822Smillert pmatch->rm_so = j; 522674ee822Smillert pmatch->rm_eo = j + fg->patternLen; 5233d12d7eeSjaredy if (fg->patternLen == 0 || !fg->wmatch || 5243d12d7eeSjaredy wmatch(data, dataLen, pmatch->rm_so, 5253d12d7eeSjaredy pmatch->rm_eo)) { 5260c9d1b5fSmillert rtrnVal = 0; 52770c36c4eStedu break; 52870c36c4eStedu } 5290c9d1b5fSmillert } 53070c36c4eStedu 53170c36c4eStedu /* Shift if within bounds, otherwise, we are done. */ 53270c36c4eStedu if (j + fg->patternLen == dataLen) 53370c36c4eStedu break; 53470c36c4eStedu else 53514774268Stedu j += fg->qsBc[(unsigned char)data[j + fg->patternLen]]; 53670c36c4eStedu } while (j <= (dataLen - fg->patternLen)); 53770c36c4eStedu } 53870c36c4eStedu 53970c36c4eStedu return (rtrnVal); 540f56cb323Stedu #endif 54170c36c4eStedu } 54270c36c4eStedu 54370c36c4eStedu 544fe07e37bSderaadt void * 545fe07e37bSderaadt grep_malloc(size_t size) 546fe07e37bSderaadt { 547fe07e37bSderaadt void *ptr; 548fe07e37bSderaadt 549fe07e37bSderaadt if ((ptr = malloc(size)) == NULL) 550f91b5226Smillert err(2, "malloc"); 551fe07e37bSderaadt return ptr; 552fe07e37bSderaadt } 553fe07e37bSderaadt 554fe07e37bSderaadt void * 5551ed98fdfSderaadt grep_calloc(size_t nmemb, size_t size) 5561ed98fdfSderaadt { 5571ed98fdfSderaadt void *ptr; 5581ed98fdfSderaadt 5591ed98fdfSderaadt if ((ptr = calloc(nmemb, size)) == NULL) 5601ed98fdfSderaadt err(2, "calloc"); 5611ed98fdfSderaadt return ptr; 5621ed98fdfSderaadt } 5631ed98fdfSderaadt 5641ed98fdfSderaadt void * 565fe07e37bSderaadt grep_realloc(void *ptr, size_t size) 566fe07e37bSderaadt { 567fe07e37bSderaadt if ((ptr = realloc(ptr, size)) == NULL) 568f91b5226Smillert err(2, "realloc"); 569fe07e37bSderaadt return ptr; 570fe07e37bSderaadt } 571fe07e37bSderaadt 57262ab4a5fSderaadt void * 57362ab4a5fSderaadt grep_reallocarray(void *ptr, size_t nmemb, size_t size) 57462ab4a5fSderaadt { 57562ab4a5fSderaadt if ((ptr = reallocarray(ptr, nmemb, size)) == NULL) 57662ab4a5fSderaadt err(2, "reallocarray"); 57762ab4a5fSderaadt return ptr; 57862ab4a5fSderaadt } 57962ab4a5fSderaadt 580f56cb323Stedu #ifndef SMALL 58170c36c4eStedu /* 58257c37fdaSmillert * Returns: true on success, false on failure 58370c36c4eStedu */ 58457c37fdaSmillert static bool 58514774268Stedu grep_cmp(const char *pattern, const char *data, size_t len) 58670c36c4eStedu { 58757c37fdaSmillert size_t i; 58870c36c4eStedu 58970c36c4eStedu for (i = 0; i < len; i++) { 590acf867c4Smillert if (((pattern[i] == data[i]) || (!Fflag && pattern[i] == '.')) 5910340d4f1Smmcc || (iflag && pattern[i] == toupper((unsigned char)data[i]))) 59270c36c4eStedu continue; 59357c37fdaSmillert return false; 59470c36c4eStedu } 59570c36c4eStedu 59657c37fdaSmillert return true; 59770c36c4eStedu } 59870c36c4eStedu 59970c36c4eStedu static void 60070c36c4eStedu grep_revstr(unsigned char *str, int len) 60170c36c4eStedu { 60270c36c4eStedu int i; 60370c36c4eStedu char c; 60470c36c4eStedu 60570c36c4eStedu for (i = 0; i < len / 2; i++) { 60670c36c4eStedu c = str[i]; 60770c36c4eStedu str[i] = str[len - i - 1]; 60870c36c4eStedu str[len - i - 1] = c; 60970c36c4eStedu } 61070c36c4eStedu } 611f56cb323Stedu #endif 61270c36c4eStedu 613fe07e37bSderaadt void 61414774268Stedu printline(str_t *line, int sep, regmatch_t *pmatch) 615fe07e37bSderaadt { 616fe07e37bSderaadt int n; 617fe07e37bSderaadt 618fe07e37bSderaadt n = 0; 619fe07e37bSderaadt if (!hflag) { 620fe07e37bSderaadt fputs(line->file, stdout); 621fe07e37bSderaadt ++n; 622fe07e37bSderaadt } 623fe07e37bSderaadt if (nflag) { 624fe07e37bSderaadt if (n) 625fe07e37bSderaadt putchar(sep); 6262f35e644Smmcc printf("%lld", line->line_no); 627fe07e37bSderaadt ++n; 628fe07e37bSderaadt } 629fe07e37bSderaadt if (bflag) { 630fe07e37bSderaadt if (n) 631fe07e37bSderaadt putchar(sep); 632b02c2e6bSotto printf("%lld", (long long)line->off); 633b02c2e6bSotto ++n; 634fe07e37bSderaadt } 635fe07e37bSderaadt if (n) 636fe07e37bSderaadt putchar(sep); 63714774268Stedu if (pmatch) 63814774268Stedu fwrite(line->dat + pmatch->rm_so, 63914774268Stedu pmatch->rm_eo - pmatch->rm_so, 1, stdout); 64014774268Stedu else 641fe07e37bSderaadt fwrite(line->dat, line->len, 1, stdout); 642fe07e37bSderaadt putchar('\n'); 643fe07e37bSderaadt } 644