xref: /openbsd-src/usr.bin/less/pattern.c (revision e5157e49389faebcb42b7237d55fbf096d9c2523)
1 /*
2  * Copyright (C) 1984-2012  Mark Nudelman
3  *
4  * You may distribute under the terms of either the GNU General Public
5  * License or the Less License, as specified in the README file.
6  *
7  * For more information, see the README file.
8  */
9 
10 /*
11  * Routines to do pattern matching.
12  */
13 
14 #include "less.h"
15 #include "pattern.h"
16 
17 extern int caseless;
18 extern int less_is_more;
19 
20 /*
21  * Compile a search pattern, for future use by match_pattern.
22  */
23 	static int
24 compile_pattern2(pattern, search_type, comp_pattern)
25 	char *pattern;
26 	int search_type;
27 	void **comp_pattern;
28 {
29 	if (search_type & SRCH_NO_REGEX)
30 		return (0);
31   {
32 #if HAVE_GNU_REGEX
33 	struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
34 		ecalloc(1, sizeof(struct re_pattern_buffer));
35 	struct re_pattern_buffer **pcomp =
36 		(struct re_pattern_buffer **) comp_pattern;
37 	re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
38 	if (re_compile_pattern(pattern, strlen(pattern), comp))
39 	{
40 		free(comp);
41 		error("Invalid pattern", NULL_PARG);
42 		return (-1);
43 	}
44 	if (*pcomp != NULL)
45 		regfree(*pcomp);
46 	*pcomp = comp;
47 #endif
48 #if HAVE_POSIX_REGCOMP
49 	regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
50 	regex_t **pcomp = (regex_t **) comp_pattern;
51 	if (regcomp(comp, pattern, less_is_more ? 0 : REGCOMP_FLAG))
52 	{
53 		free(comp);
54 		error("Invalid pattern", NULL_PARG);
55 		return (-1);
56 	}
57 	if (*pcomp != NULL)
58 		regfree(*pcomp);
59 	*pcomp = comp;
60 #endif
61 #if HAVE_PCRE
62 	pcre *comp;
63 	pcre **pcomp = (pcre **) comp_pattern;
64 	constant char *errstring;
65 	int erroffset;
66 	PARG parg;
67 	comp = pcre_compile(pattern, 0,
68 			&errstring, &erroffset, NULL);
69 	if (comp == NULL)
70 	{
71 		parg.p_string = (char *) errstring;
72 		error("%s", &parg);
73 		return (-1);
74 	}
75 	*pcomp = comp;
76 #endif
77 #if HAVE_RE_COMP
78 	PARG parg;
79 	int *pcomp = (int *) comp_pattern;
80 	if ((parg.p_string = re_comp(pattern)) != NULL)
81 	{
82 		error("%s", &parg);
83 		return (-1);
84 	}
85 	*pcomp = 1;
86 #endif
87 #if HAVE_REGCMP
88 	char *comp;
89 	char **pcomp = (char **) comp_pattern;
90 	if ((comp = regcmp(pattern, 0)) == NULL)
91 	{
92 		error("Invalid pattern", NULL_PARG);
93 		return (-1);
94 	}
95 	if (pcomp != NULL)
96 		free(*pcomp);
97 	*pcomp = comp;
98 #endif
99 #if HAVE_V8_REGCOMP
100 	struct regexp *comp;
101 	struct regexp **pcomp = (struct regexp **) comp_pattern;
102 	if ((comp = regcomp(pattern)) == NULL)
103 	{
104 		/*
105 		 * regcomp has already printed an error message
106 		 * via regerror().
107 		 */
108 		return (-1);
109 	}
110 	if (*pcomp != NULL)
111 		free(*pcomp);
112 	*pcomp = comp;
113 #endif
114   }
115 	return (0);
116 }
117 
118 /*
119  * Like compile_pattern2, but convert the pattern to lowercase if necessary.
120  */
121 	public int
122 compile_pattern(pattern, search_type, comp_pattern)
123 	char *pattern;
124 	int search_type;
125 	void **comp_pattern;
126 {
127 	char *cvt_pattern;
128 	int result;
129 
130 	if (caseless != OPT_ONPLUS && (caseless != OPT_ON || !less_is_more))
131 		cvt_pattern = pattern;
132 	else
133 	{
134 		cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
135 		cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC);
136 	}
137 	result = compile_pattern2(cvt_pattern, search_type, comp_pattern);
138 	if (cvt_pattern != pattern)
139 		free(cvt_pattern);
140 	return (result);
141 }
142 
143 /*
144  * Forget that we have a compiled pattern.
145  */
146 	public void
147 uncompile_pattern(pattern)
148 	void **pattern;
149 {
150 #if HAVE_GNU_REGEX
151 	struct re_pattern_buffer **pcomp = (struct re_pattern_buffer **) pattern;
152 	if (*pcomp != NULL)
153 		regfree(*pcomp);
154 	*pcomp = NULL;
155 #endif
156 #if HAVE_POSIX_REGCOMP
157 	regex_t **pcomp = (regex_t **) pattern;
158 	if (*pcomp != NULL)
159 		regfree(*pcomp);
160 	*pcomp = NULL;
161 #endif
162 #if HAVE_PCRE
163 	pcre **pcomp = (pcre **) pattern;
164 	if (*pcomp != NULL)
165 		pcre_free(*pcomp);
166 	*pcomp = NULL;
167 #endif
168 #if HAVE_RE_COMP
169 	int *pcomp = (int *) pattern;
170 	*pcomp = 0;
171 #endif
172 #if HAVE_REGCMP
173 	char **pcomp = (char **) pattern;
174 	if (*pcomp != NULL)
175 		free(*pcomp);
176 	*pcomp = NULL;
177 #endif
178 #if HAVE_V8_REGCOMP
179 	struct regexp **pcomp = (struct regexp **) pattern;
180 	if (*pcomp != NULL)
181 		free(*pcomp);
182 	*pcomp = NULL;
183 #endif
184 }
185 
186 /*
187  * Is a compiled pattern null?
188  */
189 	public int
190 is_null_pattern(pattern)
191 	void *pattern;
192 {
193 #if HAVE_GNU_REGEX
194 	return (pattern == NULL);
195 #endif
196 #if HAVE_POSIX_REGCOMP
197 	return (pattern == NULL);
198 #endif
199 #if HAVE_PCRE
200 	return (pattern == NULL);
201 #endif
202 #if HAVE_RE_COMP
203 	return (pattern == 0);
204 #endif
205 #if HAVE_REGCMP
206 	return (pattern == NULL);
207 #endif
208 #if HAVE_V8_REGCOMP
209 	return (pattern == NULL);
210 #endif
211 }
212 
213 /*
214  * Simple pattern matching function.
215  * It supports no metacharacters like *, etc.
216  */
217 	static int
218 match(pattern, pattern_len, buf, buf_len, pfound, pend)
219 	char *pattern;
220 	int pattern_len;
221 	char *buf;
222 	int buf_len;
223 	char **pfound, **pend;
224 {
225 	register char *pp, *lp;
226 	register char *pattern_end = pattern + pattern_len;
227 	register char *buf_end = buf + buf_len;
228 
229 	for ( ;  buf < buf_end;  buf++)
230 	{
231 		for (pp = pattern, lp = buf;  *pp == *lp;  pp++, lp++)
232 			if (pp == pattern_end || lp == buf_end)
233 				break;
234 		if (pp == pattern_end)
235 		{
236 			if (pfound != NULL)
237 				*pfound = buf;
238 			if (pend != NULL)
239 				*pend = lp;
240 			return (1);
241 		}
242 	}
243 	return (0);
244 }
245 
246 /*
247  * Perform a pattern match with the previously compiled pattern.
248  * Set sp and ep to the start and end of the matched string.
249  */
250 	public int
251 match_pattern(pattern, tpattern, line, line_len, sp, ep, notbol, search_type)
252 	void *pattern;
253 	char *tpattern;
254 	char *line;
255 	int line_len;
256 	char **sp;
257 	char **ep;
258 	int notbol;
259 	int search_type;
260 {
261 	int matched;
262 #if HAVE_GNU_REGEX
263 	struct re_pattern_buffer *spattern = (struct re_pattern_buffer *) pattern;
264 #endif
265 #if HAVE_POSIX_REGCOMP
266 	regex_t *spattern = (regex_t *) pattern;
267 #endif
268 #if HAVE_PCRE
269 	pcre *spattern = (pcre *) pattern;
270 #endif
271 #if HAVE_RE_COMP
272 	int spattern = (int) pattern;
273 #endif
274 #if HAVE_REGCMP
275 	char *spattern = (char *) pattern;
276 #endif
277 #if HAVE_V8_REGCOMP
278 	struct regexp *spattern = (struct regexp *) pattern;
279 #endif
280 
281 #if NO_REGEX
282 	search_type |= SRCH_NO_REGEX;
283 #endif
284 	if (search_type & SRCH_NO_REGEX)
285 		matched = match(tpattern, strlen(tpattern), line, line_len, sp, ep);
286 	else
287 	{
288 #if HAVE_GNU_REGEX
289 	{
290 		struct re_registers search_regs;
291 		regoff_t *starts = (regoff_t *) ecalloc(1, sizeof (regoff_t));
292 		regoff_t *ends = (regoff_t *) ecalloc(1, sizeof (regoff_t));
293 		spattern->not_bol = notbol;
294 		re_set_registers(spattern, &search_regs, 1, starts, ends);
295 		matched = re_search(spattern, line, line_len, 0, line_len, &search_regs) >= 0;
296 		if (matched)
297 		{
298 			*sp = line + search_regs.start[0];
299 			*ep = line + search_regs.end[0];
300 		}
301 		free(starts);
302 		free(ends);
303 	}
304 #endif
305 #if HAVE_POSIX_REGCOMP
306 	{
307 		regmatch_t rm;
308 		int flags = (notbol) ? REG_NOTBOL : 0;
309 #ifdef REG_STARTEND
310 		flags |= REG_STARTEND;
311 		rm.rm_so = 0;
312 		rm.rm_eo = line_len;
313 #endif
314 		matched = !regexec(spattern, line, 1, &rm, flags);
315 		if (matched)
316 		{
317 #ifndef __WATCOMC__
318 			*sp = line + rm.rm_so;
319 			*ep = line + rm.rm_eo;
320 #else
321 			*sp = rm.rm_sp;
322 			*ep = rm.rm_ep;
323 #endif
324 		}
325 	}
326 #endif
327 #if HAVE_PCRE
328 	{
329 		int flags = (notbol) ? PCRE_NOTBOL : 0;
330 		int ovector[3];
331 		matched = pcre_exec(spattern, NULL, line, line_len,
332 			0, flags, ovector, 3) >= 0;
333 		if (matched)
334 		{
335 			*sp = line + ovector[0];
336 			*ep = line + ovector[1];
337 		}
338 	}
339 #endif
340 #if HAVE_RE_COMP
341 	matched = (re_exec(line) == 1);
342 	/*
343 	 * re_exec doesn't seem to provide a way to get the matched string.
344 	 */
345 	*sp = *ep = NULL;
346 #endif
347 #if HAVE_REGCMP
348 	*ep = regex(spattern, line);
349 	matched = (*ep != NULL);
350 	if (matched)
351 		*sp = __loc1;
352 #endif
353 #if HAVE_V8_REGCOMP
354 #if HAVE_REGEXEC2
355 	matched = regexec2(spattern, line, notbol);
356 #else
357 	matched = regexec(spattern, line);
358 #endif
359 	if (matched)
360 	{
361 		*sp = spattern->startp[0];
362 		*ep = spattern->endp[0];
363 	}
364 #endif
365 	}
366 	matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
367 			((search_type & SRCH_NO_MATCH) && !matched);
368 	return (matched);
369 }
370 
371