xref: /netbsd-src/external/gpl2/grep/dist/src/search.c (revision fb69a85ab0bac94047f5be60bf0f33641f617669)
1 /*	$NetBSD: search.c,v 1.2 2016/01/10 22:16:40 christos Exp $	*/
2 
3 /* search.c - searching subroutines using dfa, kwset and regex for grep.
4    Copyright 1992, 1998, 2000 Free Software Foundation, Inc.
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 2, or (at your option)
9    any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, write to the Free Software
18    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
19    02111-1307, USA.  */
20 
21 /* Written August 1992 by Mike Haertel. */
22 
23 #ifdef HAVE_CONFIG_H
24 # include <config.h>
25 #endif
26 #include <sys/types.h>
27 #if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC && defined HAVE_WCTYPE
28 /* We can handle multibyte string.  */
29 # define MBS_SUPPORT
30 # include <wchar.h>
31 # include <wctype.h>
32 #endif
33 
34 #include "system.h"
35 #include "grep.h"
36 #include "regex.h"
37 #include "dfa.h"
38 #include "kwset.h"
39 #include "error.h"
40 #include "xalloc.h"
41 #ifdef HAVE_LIBPCRE
42 # include <pcre.h>
43 #endif
44 
45 #define NCHAR (UCHAR_MAX + 1)
46 
47 /* For -w, we also consider _ to be word constituent.  */
48 #define WCHAR(C) (ISALNUM(C) || (C) == '_')
49 
50 /* DFA compiled regexp. */
51 static struct dfa dfa;
52 
53 /* The Regex compiled patterns.  */
54 static struct patterns
55 {
56   /* Regex compiled regexp. */
57   struct re_pattern_buffer regexbuf;
58   struct re_registers regs; /* This is here on account of a BRAIN-DEAD
59 			       Q@#%!# library interface in regex.c.  */
60 } patterns0;
61 
62 struct patterns *patterns;
63 size_t pcount;
64 
65 /* KWset compiled pattern.  For Ecompile and Gcompile, we compile
66    a list of strings, at least one of which is known to occur in
67    any string matching the regexp. */
68 static kwset_t kwset;
69 
70 /* Number of compiled fixed strings known to exactly match the regexp.
71    If kwsexec returns < kwset_exact_matches, then we don't need to
72    call the regexp matcher at all. */
73 static int kwset_exact_matches;
74 
75 #if defined(MBS_SUPPORT)
76 static char* check_multibyte_string PARAMS ((char const *buf, size_t size));
77 #endif
78 static void kwsinit PARAMS ((void));
79 static void kwsmusts PARAMS ((void));
80 static void Gcompile PARAMS ((char const *, size_t));
81 static void Ecompile PARAMS ((char const *, size_t));
82 static size_t EGexecute PARAMS ((char const *, size_t, size_t *, int ));
83 static void Fcompile PARAMS ((char const *, size_t));
84 static size_t Fexecute PARAMS ((char const *, size_t, size_t *, int));
85 static void Pcompile PARAMS ((char const *, size_t ));
86 static size_t Pexecute PARAMS ((char const *, size_t, size_t *, int));
87 
88 void
dfaerror(char const * mesg)89 dfaerror (char const *mesg)
90 {
91   error (2, 0, mesg);
92 }
93 
94 static void
kwsinit(void)95 kwsinit (void)
96 {
97   static char trans[NCHAR];
98   int i;
99 
100   if (match_icase)
101     for (i = 0; i < NCHAR; ++i)
102       trans[i] = TOLOWER (i);
103 
104   if (!(kwset = kwsalloc (match_icase ? trans : (char *) 0)))
105     error (2, 0, _("memory exhausted"));
106 }
107 
108 /* If the DFA turns out to have some set of fixed strings one of
109    which must occur in the match, then we build a kwset matcher
110    to find those strings, and thus quickly filter out impossible
111    matches. */
112 static void
kwsmusts(void)113 kwsmusts (void)
114 {
115   struct dfamust const *dm;
116   char const *err;
117 
118   if (dfa.musts)
119     {
120       kwsinit ();
121       /* First, we compile in the substrings known to be exact
122 	 matches.  The kwset matcher will return the index
123 	 of the matching string that it chooses. */
124       for (dm = dfa.musts; dm; dm = dm->next)
125 	{
126 	  if (!dm->exact)
127 	    continue;
128 	  ++kwset_exact_matches;
129 	  if ((err = kwsincr (kwset, dm->must, strlen (dm->must))) != 0)
130 	    error (2, 0, err);
131 	}
132       /* Now, we compile the substrings that will require
133 	 the use of the regexp matcher.  */
134       for (dm = dfa.musts; dm; dm = dm->next)
135 	{
136 	  if (dm->exact)
137 	    continue;
138 	  if ((err = kwsincr (kwset, dm->must, strlen (dm->must))) != 0)
139 	    error (2, 0, err);
140 	}
141       if ((err = kwsprep (kwset)) != 0)
142 	error (2, 0, err);
143     }
144 }
145 
146 #ifdef MBS_SUPPORT
147 /* This function allocate the array which correspond to "buf".
148    Then this check multibyte string and mark on the positions which
149    are not singlebyte character nor the first byte of a multibyte
150    character.  Caller must free the array.  */
151 static char*
check_multibyte_string(char const * buf,size_t size)152 check_multibyte_string(char const *buf, size_t size)
153 {
154   char *mb_properties = malloc(size);
155   mbstate_t cur_state;
156   size_t i;
157   memset(&cur_state, 0, sizeof(mbstate_t));
158   memset(mb_properties, 0, sizeof(char)*size);
159   for (i = 0; i < size ;)
160     {
161       size_t mbclen;
162       mbclen = mbrlen(buf + i, size - i, &cur_state);
163 
164       if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
165 	{
166 	  /* An invalid sequence, or a truncated multibyte character.
167 	     We treat it as a singlebyte character.  */
168 	  mbclen = 1;
169 	}
170       mb_properties[i] = mbclen;
171       i += mbclen;
172     }
173 
174   return mb_properties;
175 }
176 #endif
177 
178 static void
Gcompile(char const * pattern,size_t size)179 Gcompile (char const *pattern, size_t size)
180 {
181   const char *err;
182   char const *sep;
183   size_t total = size;
184   char const *motif = pattern;
185 
186   re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE);
187   dfasyntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte);
188 
189   /* For GNU regex compiler we have to pass the patterns separately to detect
190      errors like "[\nallo\n]\n".  The patterns here are "[", "allo" and "]"
191      GNU regex should have raise a syntax error.  The same for backref, where
192      the backref should have been local to each pattern.  */
193   do
194     {
195       size_t len;
196       sep = memchr (motif, '\n', total);
197       if (sep)
198 	{
199 	  len = sep - motif;
200 	  sep++;
201 	  total -= (len + 1);
202 	}
203       else
204 	{
205 	  len = total;
206 	  total = 0;
207 	}
208 
209       patterns = realloc (patterns, (pcount + 1) * sizeof (*patterns));
210       if (patterns == NULL)
211 	error (2, errno, _("memory exhausted"));
212 
213       patterns[pcount] = patterns0;
214 
215       if ((err = re_compile_pattern (motif, len,
216 				    &(patterns[pcount].regexbuf))) != 0)
217 	error (2, 0, err);
218       pcount++;
219 
220       motif = sep;
221     } while (sep && total != 0);
222 
223   /* In the match_words and match_lines cases, we use a different pattern
224      for the DFA matcher that will quickly throw out cases that won't work.
225      Then if DFA succeeds we do some hairy stuff using the regex matcher
226      to decide whether the match should really count. */
227   if (match_words || match_lines)
228     {
229       /* In the whole-word case, we use the pattern:
230 	 \(^\|[^[:alnum:]_]\)\(userpattern\)\([^[:alnum:]_]|$\).
231 	 In the whole-line case, we use the pattern:
232 	 ^\(userpattern\)$.  */
233 
234       static char const line_beg[] = "^\\(";
235       static char const line_end[] = "\\)$";
236       static char const word_beg[] = "\\(^\\|[^[:alnum:]_]\\)\\(";
237       static char const word_end[] = "\\)\\([^[:alnum:]_]\\|$\\)";
238       char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
239       size_t i;
240       strcpy (n, match_lines ? line_beg : word_beg);
241       i = strlen (n);
242       memcpy (n + i, pattern, size);
243       i += size;
244       strcpy (n + i, match_lines ? line_end : word_end);
245       i += strlen (n + i);
246       pattern = n;
247       size = i;
248     }
249 
250   dfacomp (pattern, size, &dfa, 1);
251   kwsmusts ();
252 }
253 
254 static void
Ecompile(char const * pattern,size_t size)255 Ecompile (char const *pattern, size_t size)
256 {
257   const char *err;
258   const char *sep;
259   size_t total = size;
260   char const *motif = pattern;
261 
262   if (strcmp (matcher, "awk") == 0)
263     {
264       re_set_syntax (RE_SYNTAX_AWK);
265       dfasyntax (RE_SYNTAX_AWK, match_icase, eolbyte);
266     }
267   else
268     {
269       re_set_syntax (RE_SYNTAX_POSIX_EGREP);
270       dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte);
271     }
272 
273   /* For GNU regex compiler we have to pass the patterns separately to detect
274      errors like "[\nallo\n]\n".  The patterns here are "[", "allo" and "]"
275      GNU regex should have raise a syntax error.  The same for backref, where
276      the backref should have been local to each pattern.  */
277   do
278     {
279       size_t len;
280       sep = memchr (motif, '\n', total);
281       if (sep)
282 	{
283 	  len = sep - motif;
284 	  sep++;
285 	  total -= (len + 1);
286 	}
287       else
288 	{
289 	  len = total;
290 	  total = 0;
291 	}
292 
293       patterns = realloc (patterns, (pcount + 1) * sizeof (*patterns));
294       if (patterns == NULL)
295 	error (2, errno, _("memory exhausted"));
296       patterns[pcount] = patterns0;
297 
298       if ((err = re_compile_pattern (motif, len,
299 				    &(patterns[pcount].regexbuf))) != 0)
300 	error (2, 0, err);
301       pcount++;
302 
303       motif = sep;
304     } while (sep && total != 0);
305 
306   /* In the match_words and match_lines cases, we use a different pattern
307      for the DFA matcher that will quickly throw out cases that won't work.
308      Then if DFA succeeds we do some hairy stuff using the regex matcher
309      to decide whether the match should really count. */
310   if (match_words || match_lines)
311     {
312       /* In the whole-word case, we use the pattern:
313 	 (^|[^[:alnum:]_])(userpattern)([^[:alnum:]_]|$).
314 	 In the whole-line case, we use the pattern:
315 	 ^(userpattern)$.  */
316 
317       static char const line_beg[] = "^(";
318       static char const line_end[] = ")$";
319       static char const word_beg[] = "(^|[^[:alnum:]_])(";
320       static char const word_end[] = ")([^[:alnum:]_]|$)";
321       char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
322       size_t i;
323       strcpy (n, match_lines ? line_beg : word_beg);
324       i = strlen(n);
325       memcpy (n + i, pattern, size);
326       i += size;
327       strcpy (n + i, match_lines ? line_end : word_end);
328       i += strlen (n + i);
329       pattern = n;
330       size = i;
331     }
332 
333   dfacomp (pattern, size, &dfa, 1);
334   kwsmusts ();
335 }
336 
337 static size_t
EGexecute(char const * buf,size_t size,size_t * match_size,int exact)338 EGexecute (char const *buf, size_t size, size_t *match_size, int exact)
339 {
340   register char const *buflim, *beg, *end;
341   char eol = eolbyte;
342   int backref;
343   ptrdiff_t start, len;
344   struct kwsmatch kwsm;
345   size_t i;
346 #ifdef MBS_SUPPORT
347   char *mb_properties = NULL;
348 #endif /* MBS_SUPPORT */
349 
350 #ifdef MBS_SUPPORT
351   if (MB_CUR_MAX > 1 && kwset)
352     mb_properties = check_multibyte_string(buf, size);
353 #endif /* MBS_SUPPORT */
354 
355   buflim = buf + size;
356 
357   for (beg = end = buf; end < buflim; beg = end)
358     {
359       if (!exact)
360 	{
361 	  if (kwset)
362 	    {
363 	      /* Find a possible match using the KWset matcher. */
364 	      size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
365 	      if (offset == (size_t) -1)
366 	        goto failure;
367 	      beg += offset;
368 	      /* Narrow down to the line containing the candidate, and
369 		 run it through DFA. */
370 	      end = memchr(beg, eol, buflim - beg);
371 	      end++;
372 #ifdef MBS_SUPPORT
373 	      if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0)
374 		continue;
375 #endif
376 	      while (beg > buf && beg[-1] != eol)
377 		--beg;
378 	      if (kwsm.index < kwset_exact_matches)
379 		goto success_in_beg_and_end;
380 	      if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
381 		continue;
382 	    }
383 	  else
384 	    {
385 	      /* No good fixed strings; start with DFA. */
386 	      size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref);
387 	      if (offset == (size_t) -1)
388 		break;
389 	      /* Narrow down to the line we've found. */
390 	      beg += offset;
391 	      end = memchr (beg, eol, buflim - beg);
392 	      end++;
393 	      while (beg > buf && beg[-1] != eol)
394 		--beg;
395 	    }
396 	  /* Successful, no backreferences encountered! */
397 	  if (!backref)
398 	    goto success_in_beg_and_end;
399 	}
400       else
401 	end = beg + size;
402 
403       /* If we've made it to this point, this means DFA has seen
404 	 a probable match, and we need to run it through Regex. */
405       for (i = 0; i < pcount; i++)
406 	{
407 	  patterns[i].regexbuf.not_eol = 0;
408 	  if (0 <= (start = re_search (&(patterns[i].regexbuf), beg,
409 				       end - beg - 1, 0,
410 				       end - beg - 1, &(patterns[i].regs))))
411 	    {
412 	      len = patterns[i].regs.end[0] - start;
413 	      if (exact && !match_words)
414 	        goto success_in_start_and_len;
415 	      if ((!match_lines && !match_words)
416 		  || (match_lines && len == end - beg - 1))
417 		goto success_in_beg_and_end;
418 	      /* If -w, check if the match aligns with word boundaries.
419 		 We do this iteratively because:
420 		 (a) the line may contain more than one occurence of the
421 		 pattern, and
422 		 (b) Several alternatives in the pattern might be valid at a
423 		 given point, and we may need to consider a shorter one to
424 		 find a word boundary.  */
425 	      if (match_words)
426 		while (start >= 0)
427 		  {
428 		    if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1]))
429 			&& (len == end - beg - 1
430 			    || !WCHAR ((unsigned char) beg[start + len])))
431 		      goto success_in_start_and_len;
432 		    if (len > 0)
433 		      {
434 			/* Try a shorter length anchored at the same place. */
435 			--len;
436 			patterns[i].regexbuf.not_eol = 1;
437 			len = re_match (&(patterns[i].regexbuf), beg,
438 					start + len, start,
439 					&(patterns[i].regs));
440 		      }
441 		    if (len <= 0)
442 		      {
443 			/* Try looking further on. */
444 			if (start == end - beg - 1)
445 			  break;
446 			++start;
447 			patterns[i].regexbuf.not_eol = 0;
448 			start = re_search (&(patterns[i].regexbuf), beg,
449 					   end - beg - 1,
450 					   start, end - beg - 1 - start,
451 					   &(patterns[i].regs));
452 			len = patterns[i].regs.end[0] - start;
453 		      }
454 		  }
455 	    }
456 	} /* for Regex patterns.  */
457     } /* for (beg = end ..) */
458 
459  failure:
460 #ifdef MBS_SUPPORT
461   if (MB_CUR_MAX > 1 && mb_properties)
462     free (mb_properties);
463 #endif /* MBS_SUPPORT */
464   return (size_t) -1;
465 
466  success_in_beg_and_end:
467   len = end - beg;
468   start = beg - buf;
469   /* FALLTHROUGH */
470 
471  success_in_start_and_len:
472 #ifdef MBS_SUPPORT
473   if (MB_CUR_MAX > 1 && mb_properties)
474     free (mb_properties);
475 #endif /* MBS_SUPPORT */
476   *match_size = len;
477   return start;
478 }
479 
480 static void
Fcompile(char const * pattern,size_t size)481 Fcompile (char const *pattern, size_t size)
482 {
483   char const *beg, *lim, *err;
484 
485   kwsinit ();
486   beg = pattern;
487   do
488     {
489       for (lim = beg; lim < pattern + size && *lim != '\n'; ++lim)
490 	;
491       if ((err = kwsincr (kwset, beg, lim - beg)) != 0)
492 	error (2, 0, err);
493       if (lim < pattern + size)
494 	++lim;
495       beg = lim;
496     }
497   while (beg < pattern + size);
498 
499   if ((err = kwsprep (kwset)) != 0)
500     error (2, 0, err);
501 }
502 
503 static size_t
Fexecute(char const * buf,size_t size,size_t * match_size,int exact)504 Fexecute (char const *buf, size_t size, size_t *match_size, int exact)
505 {
506   register char const *beg, *try, *end;
507   register size_t len;
508   char eol = eolbyte;
509   struct kwsmatch kwsmatch;
510 #ifdef MBS_SUPPORT
511   char *mb_properties;
512   if (MB_CUR_MAX > 1)
513     mb_properties = check_multibyte_string (buf, size);
514 #endif /* MBS_SUPPORT */
515 
516   for (beg = buf; beg <= buf + size; ++beg)
517     {
518       size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
519       if (offset == (size_t) -1)
520 	goto failure;
521 #ifdef MBS_SUPPORT
522       if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
523 	continue; /* It is a part of multibyte character.  */
524 #endif /* MBS_SUPPORT */
525       beg += offset;
526       len = kwsmatch.size[0];
527       if (exact && !match_words)
528 	goto success_in_beg_and_len;
529       if (match_lines)
530 	{
531 	  if (beg > buf && beg[-1] != eol)
532 	    continue;
533 	  if (beg + len < buf + size && beg[len] != eol)
534 	    continue;
535 	  goto success;
536 	}
537       else if (match_words)
538 	{
539 	  while (offset >= 0)
540 	    {
541 	      if ((offset == 0 || !WCHAR ((unsigned char) beg[-1]))
542 	          && (len == end - beg - 1 || !WCHAR ((unsigned char) beg[len])))
543 	        {
544 	          if (!exact)
545 	            /* Returns the whole line now we know there's a word match. */
546 	            goto success;
547 	          else
548 	            /* Returns just this word match. */
549 	            goto success_in_beg_and_len;
550 	        }
551 	      if (len > 0)
552 	        {
553 	          /* Try a shorter length anchored at the same place. */
554 	          --len;
555 	          offset = kwsexec (kwset, beg, len, &kwsmatch);
556 	          if (offset == -1) {
557 	            break; /* Try a different anchor. */
558 	          }
559 	          beg += offset;
560 	          len = kwsmatch.size[0];
561 	        }
562 	    }
563 	}
564       else
565 	goto success;
566     }
567 
568  failure:
569 #ifdef MBS_SUPPORT
570   if (MB_CUR_MAX > 1)
571     free (mb_properties);
572 #endif /* MBS_SUPPORT */
573   return -1;
574 
575  success:
576   end = memchr (beg + len, eol, (buf + size) - (beg + len));
577   end++;
578   while (buf < beg && beg[-1] != eol)
579     --beg;
580   len = end - beg;
581   /* FALLTHROUGH */
582 
583  success_in_beg_and_len:
584   *match_size = len;
585 #ifdef MBS_SUPPORT
586   if (MB_CUR_MAX > 1)
587     free (mb_properties);
588 #endif /* MBS_SUPPORT */
589   return beg - buf;
590 }
591 
592 #if HAVE_LIBPCRE
593 /* Compiled internal form of a Perl regular expression.  */
594 static pcre *cre;
595 
596 /* Additional information about the pattern.  */
597 static pcre_extra *extra;
598 #endif
599 
600 static void
Pcompile(char const * pattern,size_t size)601 Pcompile (char const *pattern, size_t size)
602 {
603 #if !HAVE_LIBPCRE
604   error (2, 0, _("The -P option is not supported"));
605 #else
606   int e;
607   char const *ep;
608   char *re = xmalloc (4 * size + 7);
609   int flags = PCRE_MULTILINE | (match_icase ? PCRE_CASELESS : 0);
610   char const *patlim = pattern + size;
611   char *n = re;
612   char const *p;
613   char const *pnul;
614 
615   /* FIXME: Remove this restriction.  */
616   if (eolbyte != '\n')
617     error (2, 0, _("The -P and -z options cannot be combined"));
618 
619   *n = '\0';
620   if (match_lines)
621     strcpy (n, "^(");
622   if (match_words)
623     strcpy (n, "\\b(");
624   n += strlen (n);
625 
626   /* The PCRE interface doesn't allow NUL bytes in the pattern, so
627      replace each NUL byte in the pattern with the four characters
628      "\000", removing a preceding backslash if there are an odd
629      number of backslashes before the NUL.
630 
631      FIXME: This method does not work with some multibyte character
632      encodings, notably Shift-JIS, where a multibyte character can end
633      in a backslash byte.  */
634   for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1)
635     {
636       memcpy (n, p, pnul - p);
637       n += pnul - p;
638       for (p = pnul; pattern < p && p[-1] == '\\'; p--)
639 	continue;
640       n -= (pnul - p) & 1;
641       strcpy (n, "\\000");
642       n += 4;
643     }
644 
645   memcpy (n, p, patlim - p);
646   n += patlim - p;
647   *n = '\0';
648   if (match_words)
649     strcpy (n, ")\\b");
650   if (match_lines)
651     strcpy (n, ")$");
652 
653   cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ());
654   if (!cre)
655     error (2, 0, ep);
656 
657   extra = pcre_study (cre, 0, &ep);
658   if (ep)
659     error (2, 0, ep);
660 
661   free (re);
662 #endif
663 }
664 
665 static size_t
Pexecute(char const * buf,size_t size,size_t * match_size,int exact)666 Pexecute (char const *buf, size_t size, size_t *match_size, int exact)
667 {
668 #if !HAVE_LIBPCRE
669   abort ();
670   return -1;
671 #else
672   /* This array must have at least two elements; everything after that
673      is just for performance improvement in pcre_exec.  */
674   int sub[300];
675 
676   int e = pcre_exec (cre, extra, buf, size, 0, 0,
677 		     sub, sizeof sub / sizeof *sub);
678 
679   if (e <= 0)
680     {
681       switch (e)
682 	{
683 	case PCRE_ERROR_NOMATCH:
684 	  return -1;
685 
686 	case PCRE_ERROR_NOMEMORY:
687 	  error (2, 0, _("Memory exhausted"));
688 
689 	default:
690 	  abort ();
691 	}
692     }
693   else
694     {
695       /* Narrow down to the line we've found.  */
696       char const *beg = buf + sub[0];
697       char const *end = buf + sub[1];
698       char const *buflim = buf + size;
699       char eol = eolbyte;
700       if (!exact)
701 	{
702 	  end = memchr (end, eol, buflim - end);
703 	  end++;
704 	  while (buf < beg && beg[-1] != eol)
705 	    --beg;
706 	}
707 
708       *match_size = end - beg;
709       return beg - buf;
710     }
711 #endif
712 }
713 
714 struct matcher const matchers[] = {
715   { "default", Gcompile, EGexecute },
716   { "grep", Gcompile, EGexecute },
717   { "egrep", Ecompile, EGexecute },
718   { "awk", Ecompile, EGexecute },
719   { "fgrep", Fcompile, Fexecute },
720   { "perl", Pcompile, Pexecute },
721   { "", 0, 0 },
722 };
723