1*84d9c625SLionel Sambuc /* $NetBSD: pattern.c,v 1.3 2013/09/04 19:44:21 tron Exp $ */
2f7cf2976SLionel Sambuc
3f7cf2976SLionel Sambuc /*
4*84d9c625SLionel Sambuc * Copyright (C) 1984-2012 Mark Nudelman
5f7cf2976SLionel Sambuc *
6f7cf2976SLionel Sambuc * You may distribute under the terms of either the GNU General Public
7f7cf2976SLionel Sambuc * License or the Less License, as specified in the README file.
8f7cf2976SLionel Sambuc *
9*84d9c625SLionel Sambuc * For more information, see the README file.
10f7cf2976SLionel Sambuc */
11f7cf2976SLionel Sambuc
12f7cf2976SLionel Sambuc /*
13f7cf2976SLionel Sambuc * Routines to do pattern matching.
14f7cf2976SLionel Sambuc */
15f7cf2976SLionel Sambuc
16f7cf2976SLionel Sambuc #include "less.h"
17f7cf2976SLionel Sambuc #include "pattern.h"
18f7cf2976SLionel Sambuc
19f7cf2976SLionel Sambuc extern int caseless;
20f7cf2976SLionel Sambuc
21f7cf2976SLionel Sambuc /*
22f7cf2976SLionel Sambuc * Compile a search pattern, for future use by match_pattern.
23f7cf2976SLionel Sambuc */
24f7cf2976SLionel Sambuc static int
compile_pattern2(pattern,search_type,comp_pattern)25f7cf2976SLionel Sambuc compile_pattern2(pattern, search_type, comp_pattern)
26f7cf2976SLionel Sambuc char *pattern;
27f7cf2976SLionel Sambuc int search_type;
28f7cf2976SLionel Sambuc void **comp_pattern;
29f7cf2976SLionel Sambuc {
30*84d9c625SLionel Sambuc if (search_type & SRCH_NO_REGEX)
31*84d9c625SLionel Sambuc return (0);
32f7cf2976SLionel Sambuc {
33*84d9c625SLionel Sambuc #if HAVE_GNU_REGEX
34*84d9c625SLionel Sambuc struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
35*84d9c625SLionel Sambuc ecalloc(1, sizeof(struct re_pattern_buffer));
36*84d9c625SLionel Sambuc struct re_pattern_buffer **pcomp =
37*84d9c625SLionel Sambuc (struct re_pattern_buffer **) comp_pattern;
38*84d9c625SLionel Sambuc re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
39*84d9c625SLionel Sambuc if (re_compile_pattern(pattern, strlen(pattern), comp))
40*84d9c625SLionel Sambuc {
41*84d9c625SLionel Sambuc free(comp);
42*84d9c625SLionel Sambuc error("Invalid pattern", NULL_PARG);
43*84d9c625SLionel Sambuc return (-1);
44*84d9c625SLionel Sambuc }
45*84d9c625SLionel Sambuc if (*pcomp != NULL)
46*84d9c625SLionel Sambuc regfree(*pcomp);
47*84d9c625SLionel Sambuc *pcomp = comp;
48*84d9c625SLionel Sambuc #endif
49f7cf2976SLionel Sambuc #if HAVE_POSIX_REGCOMP
50f7cf2976SLionel Sambuc regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
51f7cf2976SLionel Sambuc regex_t **pcomp = (regex_t **) comp_pattern;
52f7cf2976SLionel Sambuc if (regcomp(comp, pattern, REGCOMP_FLAG))
53f7cf2976SLionel Sambuc {
54f7cf2976SLionel Sambuc free(comp);
55f7cf2976SLionel Sambuc error("Invalid pattern", NULL_PARG);
56f7cf2976SLionel Sambuc return (-1);
57f7cf2976SLionel Sambuc }
58f7cf2976SLionel Sambuc if (*pcomp != NULL)
59f7cf2976SLionel Sambuc regfree(*pcomp);
60f7cf2976SLionel Sambuc *pcomp = comp;
61f7cf2976SLionel Sambuc #endif
62f7cf2976SLionel Sambuc #if HAVE_PCRE
63f7cf2976SLionel Sambuc pcre *comp;
64f7cf2976SLionel Sambuc pcre **pcomp = (pcre **) comp_pattern;
65*84d9c625SLionel Sambuc constant char *errstring;
66f7cf2976SLionel Sambuc int erroffset;
67f7cf2976SLionel Sambuc PARG parg;
68f7cf2976SLionel Sambuc comp = pcre_compile(pattern, 0,
69f7cf2976SLionel Sambuc &errstring, &erroffset, NULL);
70f7cf2976SLionel Sambuc if (comp == NULL)
71f7cf2976SLionel Sambuc {
72f7cf2976SLionel Sambuc parg.p_string = (char *) errstring;
73f7cf2976SLionel Sambuc error("%s", &parg);
74f7cf2976SLionel Sambuc return (-1);
75f7cf2976SLionel Sambuc }
76f7cf2976SLionel Sambuc *pcomp = comp;
77f7cf2976SLionel Sambuc #endif
78f7cf2976SLionel Sambuc #if HAVE_RE_COMP
79f7cf2976SLionel Sambuc PARG parg;
80f7cf2976SLionel Sambuc int *pcomp = (int *) comp_pattern;
81f7cf2976SLionel Sambuc if ((parg.p_string = re_comp(pattern)) != NULL)
82f7cf2976SLionel Sambuc {
83f7cf2976SLionel Sambuc error("%s", &parg);
84f7cf2976SLionel Sambuc return (-1);
85f7cf2976SLionel Sambuc }
86f7cf2976SLionel Sambuc *pcomp = 1;
87f7cf2976SLionel Sambuc #endif
88f7cf2976SLionel Sambuc #if HAVE_REGCMP
89f7cf2976SLionel Sambuc char *comp;
90f7cf2976SLionel Sambuc char **pcomp = (char **) comp_pattern;
91f7cf2976SLionel Sambuc if ((comp = regcmp(pattern, 0)) == NULL)
92f7cf2976SLionel Sambuc {
93f7cf2976SLionel Sambuc error("Invalid pattern", NULL_PARG);
94f7cf2976SLionel Sambuc return (-1);
95f7cf2976SLionel Sambuc }
96f7cf2976SLionel Sambuc if (pcomp != NULL)
97f7cf2976SLionel Sambuc free(*pcomp);
98f7cf2976SLionel Sambuc *pcomp = comp;
99f7cf2976SLionel Sambuc #endif
100f7cf2976SLionel Sambuc #if HAVE_V8_REGCOMP
101f7cf2976SLionel Sambuc struct regexp *comp;
102f7cf2976SLionel Sambuc struct regexp **pcomp = (struct regexp **) comp_pattern;
103f7cf2976SLionel Sambuc if ((comp = regcomp(pattern)) == NULL)
104f7cf2976SLionel Sambuc {
105f7cf2976SLionel Sambuc /*
106f7cf2976SLionel Sambuc * regcomp has already printed an error message
107f7cf2976SLionel Sambuc * via regerror().
108f7cf2976SLionel Sambuc */
109f7cf2976SLionel Sambuc return (-1);
110f7cf2976SLionel Sambuc }
111f7cf2976SLionel Sambuc if (*pcomp != NULL)
112f7cf2976SLionel Sambuc free(*pcomp);
113f7cf2976SLionel Sambuc *pcomp = comp;
114f7cf2976SLionel Sambuc #endif
115f7cf2976SLionel Sambuc }
116f7cf2976SLionel Sambuc return (0);
117f7cf2976SLionel Sambuc }
118f7cf2976SLionel Sambuc
119f7cf2976SLionel Sambuc /*
120f7cf2976SLionel Sambuc * Like compile_pattern2, but convert the pattern to lowercase if necessary.
121f7cf2976SLionel Sambuc */
122f7cf2976SLionel Sambuc public int
compile_pattern(pattern,search_type,comp_pattern)123f7cf2976SLionel Sambuc compile_pattern(pattern, search_type, comp_pattern)
124f7cf2976SLionel Sambuc char *pattern;
125f7cf2976SLionel Sambuc int search_type;
126f7cf2976SLionel Sambuc void **comp_pattern;
127f7cf2976SLionel Sambuc {
128f7cf2976SLionel Sambuc char *cvt_pattern;
129f7cf2976SLionel Sambuc int result;
130f7cf2976SLionel Sambuc
131f7cf2976SLionel Sambuc if (caseless != OPT_ONPLUS)
132f7cf2976SLionel Sambuc cvt_pattern = pattern;
133f7cf2976SLionel Sambuc else
134f7cf2976SLionel Sambuc {
135f7cf2976SLionel Sambuc cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
136f7cf2976SLionel Sambuc cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC);
137f7cf2976SLionel Sambuc }
138f7cf2976SLionel Sambuc result = compile_pattern2(cvt_pattern, search_type, comp_pattern);
139f7cf2976SLionel Sambuc if (cvt_pattern != pattern)
140f7cf2976SLionel Sambuc free(cvt_pattern);
141f7cf2976SLionel Sambuc return (result);
142f7cf2976SLionel Sambuc }
143f7cf2976SLionel Sambuc
144f7cf2976SLionel Sambuc /*
145f7cf2976SLionel Sambuc * Forget that we have a compiled pattern.
146f7cf2976SLionel Sambuc */
147f7cf2976SLionel Sambuc public void
uncompile_pattern(pattern)148f7cf2976SLionel Sambuc uncompile_pattern(pattern)
149f7cf2976SLionel Sambuc void **pattern;
150f7cf2976SLionel Sambuc {
151*84d9c625SLionel Sambuc #if HAVE_GNU_REGEX
152*84d9c625SLionel Sambuc struct re_pattern_buffer **pcomp = (struct re_pattern_buffer **) pattern;
153*84d9c625SLionel Sambuc if (*pcomp != NULL)
154*84d9c625SLionel Sambuc regfree(*pcomp);
155*84d9c625SLionel Sambuc *pcomp = NULL;
156*84d9c625SLionel Sambuc #endif
157f7cf2976SLionel Sambuc #if HAVE_POSIX_REGCOMP
158f7cf2976SLionel Sambuc regex_t **pcomp = (regex_t **) pattern;
159f7cf2976SLionel Sambuc if (*pcomp != NULL)
160f7cf2976SLionel Sambuc regfree(*pcomp);
161f7cf2976SLionel Sambuc *pcomp = NULL;
162f7cf2976SLionel Sambuc #endif
163f7cf2976SLionel Sambuc #if HAVE_PCRE
164f7cf2976SLionel Sambuc pcre **pcomp = (pcre **) pattern;
165f7cf2976SLionel Sambuc if (*pcomp != NULL)
166f7cf2976SLionel Sambuc pcre_free(*pcomp);
167f7cf2976SLionel Sambuc *pcomp = NULL;
168f7cf2976SLionel Sambuc #endif
169f7cf2976SLionel Sambuc #if HAVE_RE_COMP
170f7cf2976SLionel Sambuc int *pcomp = (int *) pattern;
171f7cf2976SLionel Sambuc *pcomp = 0;
172f7cf2976SLionel Sambuc #endif
173f7cf2976SLionel Sambuc #if HAVE_REGCMP
174f7cf2976SLionel Sambuc char **pcomp = (char **) pattern;
175f7cf2976SLionel Sambuc if (*pcomp != NULL)
176f7cf2976SLionel Sambuc free(*pcomp);
177f7cf2976SLionel Sambuc *pcomp = NULL;
178f7cf2976SLionel Sambuc #endif
179f7cf2976SLionel Sambuc #if HAVE_V8_REGCOMP
180f7cf2976SLionel Sambuc struct regexp **pcomp = (struct regexp **) pattern;
181f7cf2976SLionel Sambuc if (*pcomp != NULL)
182f7cf2976SLionel Sambuc free(*pcomp);
183f7cf2976SLionel Sambuc *pcomp = NULL;
184f7cf2976SLionel Sambuc #endif
185f7cf2976SLionel Sambuc }
186f7cf2976SLionel Sambuc
187f7cf2976SLionel Sambuc /*
188f7cf2976SLionel Sambuc * Is a compiled pattern null?
189f7cf2976SLionel Sambuc */
190f7cf2976SLionel Sambuc public int
is_null_pattern(pattern)191f7cf2976SLionel Sambuc is_null_pattern(pattern)
192f7cf2976SLionel Sambuc void *pattern;
193f7cf2976SLionel Sambuc {
194*84d9c625SLionel Sambuc #if HAVE_GNU_REGEX
195*84d9c625SLionel Sambuc return (pattern == NULL);
196*84d9c625SLionel Sambuc #endif
197f7cf2976SLionel Sambuc #if HAVE_POSIX_REGCOMP
198f7cf2976SLionel Sambuc return (pattern == NULL);
199f7cf2976SLionel Sambuc #endif
200f7cf2976SLionel Sambuc #if HAVE_PCRE
201f7cf2976SLionel Sambuc return (pattern == NULL);
202f7cf2976SLionel Sambuc #endif
203f7cf2976SLionel Sambuc #if HAVE_RE_COMP
204f7cf2976SLionel Sambuc return (pattern == 0);
205f7cf2976SLionel Sambuc #endif
206f7cf2976SLionel Sambuc #if HAVE_REGCMP
207f7cf2976SLionel Sambuc return (pattern == NULL);
208f7cf2976SLionel Sambuc #endif
209f7cf2976SLionel Sambuc #if HAVE_V8_REGCOMP
210f7cf2976SLionel Sambuc return (pattern == NULL);
211f7cf2976SLionel Sambuc #endif
212f7cf2976SLionel Sambuc }
213f7cf2976SLionel Sambuc
214f7cf2976SLionel Sambuc /*
215f7cf2976SLionel Sambuc * Simple pattern matching function.
216f7cf2976SLionel Sambuc * It supports no metacharacters like *, etc.
217f7cf2976SLionel Sambuc */
218f7cf2976SLionel Sambuc static int
match(pattern,pattern_len,buf,buf_len,pfound,pend)219f7cf2976SLionel Sambuc match(pattern, pattern_len, buf, buf_len, pfound, pend)
220f7cf2976SLionel Sambuc char *pattern;
221f7cf2976SLionel Sambuc int pattern_len;
222f7cf2976SLionel Sambuc char *buf;
223f7cf2976SLionel Sambuc int buf_len;
224f7cf2976SLionel Sambuc char **pfound, **pend;
225f7cf2976SLionel Sambuc {
226f7cf2976SLionel Sambuc register char *pp, *lp;
227f7cf2976SLionel Sambuc register char *pattern_end = pattern + pattern_len;
228f7cf2976SLionel Sambuc register char *buf_end = buf + buf_len;
229f7cf2976SLionel Sambuc
230f7cf2976SLionel Sambuc for ( ; buf < buf_end; buf++)
231f7cf2976SLionel Sambuc {
232f7cf2976SLionel Sambuc for (pp = pattern, lp = buf; *pp == *lp; pp++, lp++)
233f7cf2976SLionel Sambuc if (pp == pattern_end || lp == buf_end)
234f7cf2976SLionel Sambuc break;
235f7cf2976SLionel Sambuc if (pp == pattern_end)
236f7cf2976SLionel Sambuc {
237f7cf2976SLionel Sambuc if (pfound != NULL)
238f7cf2976SLionel Sambuc *pfound = buf;
239f7cf2976SLionel Sambuc if (pend != NULL)
240f7cf2976SLionel Sambuc *pend = lp;
241f7cf2976SLionel Sambuc return (1);
242f7cf2976SLionel Sambuc }
243f7cf2976SLionel Sambuc }
244f7cf2976SLionel Sambuc return (0);
245f7cf2976SLionel Sambuc }
246f7cf2976SLionel Sambuc
247f7cf2976SLionel Sambuc /*
248f7cf2976SLionel Sambuc * Perform a pattern match with the previously compiled pattern.
249f7cf2976SLionel Sambuc * Set sp and ep to the start and end of the matched string.
250f7cf2976SLionel Sambuc */
251f7cf2976SLionel Sambuc public int
match_pattern(pattern,tpattern,line,line_len,sp,ep,notbol,search_type)252f7cf2976SLionel Sambuc match_pattern(pattern, tpattern, line, line_len, sp, ep, notbol, search_type)
253f7cf2976SLionel Sambuc void *pattern;
254f7cf2976SLionel Sambuc char *tpattern;
255f7cf2976SLionel Sambuc char *line;
256f7cf2976SLionel Sambuc int line_len;
257f7cf2976SLionel Sambuc char **sp;
258f7cf2976SLionel Sambuc char **ep;
259f7cf2976SLionel Sambuc int notbol;
260f7cf2976SLionel Sambuc int search_type;
261f7cf2976SLionel Sambuc {
262f7cf2976SLionel Sambuc int matched;
263*84d9c625SLionel Sambuc #if HAVE_GNU_REGEX
264*84d9c625SLionel Sambuc struct re_pattern_buffer *spattern = (struct re_pattern_buffer *) pattern;
265*84d9c625SLionel Sambuc #endif
266f7cf2976SLionel Sambuc #if HAVE_POSIX_REGCOMP
267f7cf2976SLionel Sambuc regex_t *spattern = (regex_t *) pattern;
268f7cf2976SLionel Sambuc #endif
269f7cf2976SLionel Sambuc #if HAVE_PCRE
270f7cf2976SLionel Sambuc pcre *spattern = (pcre *) pattern;
271f7cf2976SLionel Sambuc #endif
272f7cf2976SLionel Sambuc #if HAVE_RE_COMP
273f7cf2976SLionel Sambuc int spattern = (int) pattern;
274f7cf2976SLionel Sambuc #endif
275f7cf2976SLionel Sambuc #if HAVE_REGCMP
276f7cf2976SLionel Sambuc char *spattern = (char *) pattern;
277f7cf2976SLionel Sambuc #endif
278f7cf2976SLionel Sambuc #if HAVE_V8_REGCOMP
279f7cf2976SLionel Sambuc struct regexp *spattern = (struct regexp *) pattern;
280f7cf2976SLionel Sambuc #endif
281f7cf2976SLionel Sambuc
282*84d9c625SLionel Sambuc #if NO_REGEX
283*84d9c625SLionel Sambuc search_type |= SRCH_NO_REGEX;
284*84d9c625SLionel Sambuc #endif
285f7cf2976SLionel Sambuc if (search_type & SRCH_NO_REGEX)
286f7cf2976SLionel Sambuc matched = match(tpattern, strlen(tpattern), line, line_len, sp, ep);
287f7cf2976SLionel Sambuc else
288f7cf2976SLionel Sambuc {
289*84d9c625SLionel Sambuc #if HAVE_GNU_REGEX
290*84d9c625SLionel Sambuc {
291*84d9c625SLionel Sambuc struct re_registers search_regs;
292*84d9c625SLionel Sambuc regoff_t *starts = (regoff_t *) ecalloc(1, sizeof (regoff_t));
293*84d9c625SLionel Sambuc regoff_t *ends = (regoff_t *) ecalloc(1, sizeof (regoff_t));
294*84d9c625SLionel Sambuc spattern->not_bol = notbol;
295*84d9c625SLionel Sambuc re_set_registers(spattern, &search_regs, 1, starts, ends);
296*84d9c625SLionel Sambuc matched = re_search(spattern, line, line_len, 0, line_len, &search_regs) >= 0;
297*84d9c625SLionel Sambuc if (matched)
298*84d9c625SLionel Sambuc {
299*84d9c625SLionel Sambuc *sp = line + search_regs.start[0];
300*84d9c625SLionel Sambuc *ep = line + search_regs.end[0];
301*84d9c625SLionel Sambuc }
302*84d9c625SLionel Sambuc free(starts);
303*84d9c625SLionel Sambuc free(ends);
304*84d9c625SLionel Sambuc }
305*84d9c625SLionel Sambuc #endif
306f7cf2976SLionel Sambuc #if HAVE_POSIX_REGCOMP
307f7cf2976SLionel Sambuc {
308f7cf2976SLionel Sambuc regmatch_t rm;
309f7cf2976SLionel Sambuc int flags = (notbol) ? REG_NOTBOL : 0;
310f7cf2976SLionel Sambuc matched = !regexec(spattern, line, 1, &rm, flags);
311f7cf2976SLionel Sambuc if (matched)
312f7cf2976SLionel Sambuc {
313f7cf2976SLionel Sambuc #ifndef __WATCOMC__
314f7cf2976SLionel Sambuc *sp = line + rm.rm_so;
315f7cf2976SLionel Sambuc *ep = line + rm.rm_eo;
316f7cf2976SLionel Sambuc #else
317f7cf2976SLionel Sambuc *sp = rm.rm_sp;
318f7cf2976SLionel Sambuc *ep = rm.rm_ep;
319f7cf2976SLionel Sambuc #endif
320f7cf2976SLionel Sambuc }
321f7cf2976SLionel Sambuc }
322f7cf2976SLionel Sambuc #endif
323f7cf2976SLionel Sambuc #if HAVE_PCRE
324f7cf2976SLionel Sambuc {
325f7cf2976SLionel Sambuc int flags = (notbol) ? PCRE_NOTBOL : 0;
326f7cf2976SLionel Sambuc int ovector[3];
327f7cf2976SLionel Sambuc matched = pcre_exec(spattern, NULL, line, line_len,
328f7cf2976SLionel Sambuc 0, flags, ovector, 3) >= 0;
329f7cf2976SLionel Sambuc if (matched)
330f7cf2976SLionel Sambuc {
331f7cf2976SLionel Sambuc *sp = line + ovector[0];
332f7cf2976SLionel Sambuc *ep = line + ovector[1];
333f7cf2976SLionel Sambuc }
334f7cf2976SLionel Sambuc }
335f7cf2976SLionel Sambuc #endif
336f7cf2976SLionel Sambuc #if HAVE_RE_COMP
337f7cf2976SLionel Sambuc matched = (re_exec(line) == 1);
338f7cf2976SLionel Sambuc /*
339f7cf2976SLionel Sambuc * re_exec doesn't seem to provide a way to get the matched string.
340f7cf2976SLionel Sambuc */
341f7cf2976SLionel Sambuc *sp = *ep = NULL;
342f7cf2976SLionel Sambuc #endif
343f7cf2976SLionel Sambuc #if HAVE_REGCMP
344f7cf2976SLionel Sambuc *ep = regex(spattern, line);
345f7cf2976SLionel Sambuc matched = (*ep != NULL);
346f7cf2976SLionel Sambuc if (matched)
347f7cf2976SLionel Sambuc *sp = __loc1;
348f7cf2976SLionel Sambuc #endif
349f7cf2976SLionel Sambuc #if HAVE_V8_REGCOMP
350f7cf2976SLionel Sambuc #if HAVE_REGEXEC2
351f7cf2976SLionel Sambuc matched = regexec2(spattern, line, notbol);
352f7cf2976SLionel Sambuc #else
353f7cf2976SLionel Sambuc matched = regexec(spattern, line);
354f7cf2976SLionel Sambuc #endif
355f7cf2976SLionel Sambuc if (matched)
356f7cf2976SLionel Sambuc {
357f7cf2976SLionel Sambuc *sp = spattern->startp[0];
358f7cf2976SLionel Sambuc *ep = spattern->endp[0];
359f7cf2976SLionel Sambuc }
360f7cf2976SLionel Sambuc #endif
361f7cf2976SLionel Sambuc }
362f7cf2976SLionel Sambuc matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
363f7cf2976SLionel Sambuc ((search_type & SRCH_NO_MATCH) && !matched);
364f7cf2976SLionel Sambuc return (matched);
365f7cf2976SLionel Sambuc }
366f7cf2976SLionel Sambuc
367