xref: /netbsd-src/external/bsd/less/dist/pattern.c (revision 838f5788460f0f133b15d706e644d692a9d4d6ec)
1 /*	$NetBSD: pattern.c,v 1.4 2023/10/06 05:49:49 simonb Exp $	*/
2 
3 /*
4  * Copyright (C) 1984-2023  Mark Nudelman
5  *
6  * You may distribute under the terms of either the GNU General Public
7  * License or the Less License, as specified in the README file.
8  *
9  * For more information, see the README file.
10  */
11 
12 /*
13  * Routines to do pattern matching.
14  */
15 
16 #include "less.h"
17 
18 extern int caseless;
19 extern int is_caseless;
20 extern int utf_mode;
21 
22 /*
23  * Compile a search pattern, for future use by match_pattern.
24  */
compile_pattern2(char * pattern,int search_type,PATTERN_TYPE * comp_pattern,int show_error)25 static int compile_pattern2(char *pattern, int search_type, PATTERN_TYPE *comp_pattern, int show_error)
26 {
27 	if (search_type & SRCH_NO_REGEX)
28 		return (0);
29   {
30 #if HAVE_GNU_REGEX
31 	struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
32 		ecalloc(1, sizeof(struct re_pattern_buffer));
33 	re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
34 	if (re_compile_pattern(pattern, strlen(pattern), comp))
35 	{
36 		free(comp);
37 		if (show_error)
38 			error("Invalid pattern", NULL_PARG);
39 		return (-1);
40 	}
41 	if (*comp_pattern != NULL)
42 	{
43 		regfree(*comp_pattern);
44 		free(*comp_pattern);
45 	}
46 	*comp_pattern = comp;
47 #endif
48 #if HAVE_POSIX_REGCOMP
49 	regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
50 	if (regcomp(comp, pattern, REGCOMP_FLAG | (is_caseless ? REG_ICASE : 0)))
51 	{
52 		free(comp);
53 		if (show_error)
54 			error("Invalid pattern", NULL_PARG);
55 		return (-1);
56 	}
57 	if (*comp_pattern != NULL)
58 	{
59 		regfree(*comp_pattern);
60 		free(*comp_pattern);
61 	}
62 	*comp_pattern = comp;
63 #endif
64 #if HAVE_PCRE
65 	constant char *errstring;
66 	int erroffset;
67 	PARG parg;
68 	pcre *comp = pcre_compile(pattern,
69 			((utf_mode) ? PCRE_UTF8 | PCRE_NO_UTF8_CHECK : 0) |
70 			(is_caseless ? PCRE_CASELESS : 0),
71 			&errstring, &erroffset, NULL);
72 	if (comp == NULL)
73 	{
74 		parg.p_string = (char *) errstring;
75 		if (show_error)
76 			error("%s", &parg);
77 		return (-1);
78 	}
79 	*comp_pattern = comp;
80 #endif
81 #if HAVE_PCRE2
82 	int errcode;
83 	PCRE2_SIZE erroffset;
84 	PARG parg;
85 	pcre2_code *comp = pcre2_compile((PCRE2_SPTR)pattern, strlen(pattern),
86 			(is_caseless ? PCRE2_CASELESS : 0),
87 			&errcode, &erroffset, NULL);
88 	if (comp == NULL)
89 	{
90 		if (show_error)
91 		{
92 			char msg[160];
93 			pcre2_get_error_message(errcode, (PCRE2_UCHAR*)msg, sizeof(msg));
94 			parg.p_string = msg;
95 			error("%s", &parg);
96 		}
97 		return (-1);
98 	}
99 	*comp_pattern = comp;
100 #endif
101 #if HAVE_RE_COMP
102 	PARG parg;
103 	if ((parg.p_string = re_comp(pattern)) != NULL)
104 	{
105 		if (show_error)
106 			error("%s", &parg);
107 		return (-1);
108 	}
109 	*comp_pattern = 1;
110 #endif
111 #if HAVE_REGCMP
112 	char *comp;
113 	if ((comp = regcmp(pattern, 0)) == NULL)
114 	{
115 		if (show_error)
116 			error("Invalid pattern", NULL_PARG);
117 		return (-1);
118 	}
119 	if (comp_pattern != NULL)
120 		free(*comp_pattern);
121 	*comp_pattern = comp;
122 #endif
123 #if HAVE_V8_REGCOMP
124 	struct regexp *comp;
125 	reg_show_error = show_error;
126 	comp = regcomp(pattern);
127 	reg_show_error = 1;
128 	if (comp == NULL)
129 	{
130 		/*
131 		 * regcomp has already printed an error message
132 		 * via regerror().
133 		 */
134 		return (-1);
135 	}
136 	if (*comp_pattern != NULL)
137 		free(*comp_pattern);
138 	*comp_pattern = comp;
139 #endif
140   }
141 	return (0);
142 }
143 
144 /*
145  * Like compile_pattern2, but convert the pattern to lowercase if necessary.
146  */
compile_pattern(char * pattern,int search_type,int show_error,PATTERN_TYPE * comp_pattern)147 public int compile_pattern(char *pattern, int search_type, int show_error, PATTERN_TYPE *comp_pattern)
148 {
149 	char *cvt_pattern;
150 	int result;
151 
152 	if (caseless != OPT_ONPLUS || (re_handles_caseless && !(search_type & SRCH_NO_REGEX)))
153 		cvt_pattern = pattern;
154 	else
155 	{
156 		cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
157 		cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC);
158 	}
159 	result = compile_pattern2(cvt_pattern, search_type, comp_pattern, show_error);
160 	if (cvt_pattern != pattern)
161 		free(cvt_pattern);
162 	return (result);
163 }
164 
165 /*
166  * Forget that we have a compiled pattern.
167  */
uncompile_pattern(PATTERN_TYPE * pattern)168 public void uncompile_pattern(PATTERN_TYPE *pattern)
169 {
170 #if HAVE_GNU_REGEX
171 	if (*pattern != NULL)
172 	{
173 		regfree(*pattern);
174 		free(*pattern);
175 	}
176 	*pattern = NULL;
177 #endif
178 #if HAVE_POSIX_REGCOMP
179 	if (*pattern != NULL)
180 	{
181 		regfree(*pattern);
182 		free(*pattern);
183 	}
184 	*pattern = NULL;
185 #endif
186 #if HAVE_PCRE
187 	if (*pattern != NULL)
188 		pcre_free(*pattern);
189 	*pattern = NULL;
190 #endif
191 #if HAVE_PCRE2
192 	if (*pattern != NULL)
193 		pcre2_code_free(*pattern);
194 	*pattern = NULL;
195 #endif
196 #if HAVE_RE_COMP
197 	*pattern = 0;
198 #endif
199 #if HAVE_REGCMP
200 	if (*pattern != NULL)
201 		free(*pattern);
202 	*pattern = NULL;
203 #endif
204 #if HAVE_V8_REGCOMP
205 	if (*pattern != NULL)
206 		free(*pattern);
207 	*pattern = NULL;
208 #endif
209 }
210 
211 #if 0
212 /*
213  * Can a pattern be successfully compiled?
214  */
215 public int valid_pattern(char *pattern)
216 {
217 	PATTERN_TYPE comp_pattern;
218 	int result;
219 
220 	SET_NULL_PATTERN(comp_pattern);
221 	result = compile_pattern2(pattern, 0, &comp_pattern, 0);
222 	if (result != 0)
223 		return (0);
224 	uncompile_pattern(&comp_pattern);
225 	return (1);
226 }
227 #endif
228 
229 /*
230  * Is a compiled pattern null?
231  */
is_null_pattern(PATTERN_TYPE pattern)232 public int is_null_pattern(PATTERN_TYPE pattern)
233 {
234 #if HAVE_GNU_REGEX
235 	return (pattern == NULL);
236 #endif
237 #if HAVE_POSIX_REGCOMP
238 	return (pattern == NULL);
239 #endif
240 #if HAVE_PCRE
241 	return (pattern == NULL);
242 #endif
243 #if HAVE_PCRE2
244 	return (pattern == NULL);
245 #endif
246 #if HAVE_RE_COMP
247 	return (pattern == 0);
248 #endif
249 #if HAVE_REGCMP
250 	return (pattern == NULL);
251 #endif
252 #if HAVE_V8_REGCOMP
253 	return (pattern == NULL);
254 #endif
255 #if NO_REGEX
256 	return (pattern == NULL);
257 #endif
258 }
259 /*
260  * Simple pattern matching function.
261  * It supports no metacharacters like *, etc.
262  */
match(char * pattern,int pattern_len,char * buf,int buf_len,char *** sp,char *** ep,int nsubs)263 static int match(char *pattern, int pattern_len, char *buf, int buf_len, char ***sp, char ***ep, int nsubs)
264 {
265 	char *pp, *lp;
266 	char *pattern_end = pattern + pattern_len;
267 	char *buf_end = buf + buf_len;
268 
269 	for ( ;  buf < buf_end;  buf++)
270 	{
271 		for (pp = pattern, lp = buf;  ;  pp++, lp++)
272 		{
273 			char cp = *pp;
274 			char cl = *lp;
275 			if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp))
276 				cp = ASCII_TO_LOWER(cp);
277 			if (cp != cl)
278 				break;
279 			if (pp == pattern_end || lp == buf_end)
280 				break;
281 		}
282 		if (pp == pattern_end)
283 		{
284 			*(*sp)++ = buf;
285 			*(*ep)++ = lp;
286 			return (1);
287 		}
288 	}
289 	**sp = **ep = NULL;
290 	return (0);
291 }
292 
293 /*
294  * Perform a pattern match with the previously compiled pattern.
295  * Set sp[0] and ep[0] to the start and end of the matched string.
296  * Set sp[i] and ep[i] to the start and end of the i-th matched subpattern.
297  * Subpatterns are defined by parentheses in the regex language.
298  */
match_pattern1(PATTERN_TYPE pattern,char * tpattern,char * line,int line_len,char ** sp,char ** ep,int nsp,int notbol,int search_type)299 static int match_pattern1(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type)
300 {
301 	int matched;
302 
303 #if NO_REGEX
304 	search_type |= SRCH_NO_REGEX;
305 #endif
306 	if (search_type & SRCH_NO_REGEX)
307 		matched = match(tpattern, strlen(tpattern), line, line_len, &sp, &ep, nsp);
308 	else
309 	{
310 #if HAVE_GNU_REGEX
311 	{
312 		struct re_registers search_regs;
313 		pattern->not_bol = notbol;
314 		pattern->regs_allocated = REGS_UNALLOCATED;
315 		matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0;
316 		if (matched)
317 		{
318 			*sp++ = line + search_regs.start[0];
319 			*ep++ = line + search_regs.end[0];
320 		}
321 	}
322 #endif
323 #if HAVE_POSIX_REGCOMP
324 	{
325 		#define RM_COUNT (NUM_SEARCH_COLORS+2)
326 		regmatch_t rm[RM_COUNT];
327 		int flags = (notbol) ? REG_NOTBOL : 0;
328 #ifdef REG_STARTEND
329 		flags |= REG_STARTEND;
330 		rm[0].rm_so = 0;
331 		rm[0].rm_eo = line_len;
332 #endif
333 		matched = !regexec(pattern, line, RM_COUNT, rm, flags);
334 		if (matched)
335 		{
336 			int i;
337 			int ecount;
338 			for (ecount = RM_COUNT;  ecount > 0;  ecount--)
339 				if (rm[ecount-1].rm_so >= 0)
340 					break;
341 			if (ecount >= nsp)
342 				ecount = nsp-1;
343 			for (i = 0;  i < ecount;  i++)
344 			{
345 				if (rm[i].rm_so < 0)
346 				{
347 					*sp++ = *ep++ = line;
348 				} else
349 				{
350 #ifndef __WATCOMC__
351 					*sp++ = line + rm[i].rm_so;
352 					*ep++ = line + rm[i].rm_eo;
353 #else
354 					*sp++ = rm[i].rm_sp;
355 					*ep++ = rm[i].rm_ep;
356 #endif
357 				}
358 			}
359 		}
360 	}
361 #endif
362 #if HAVE_PCRE
363 	{
364 		#define OVECTOR_COUNT ((3*NUM_SEARCH_COLORS)+3)
365 		int ovector[OVECTOR_COUNT];
366 		int flags = (notbol) ? PCRE_NOTBOL : 0;
367 		int i;
368 		int ecount;
369 		int mcount = pcre_exec(pattern, NULL, line, line_len,
370 			0, flags, ovector, OVECTOR_COUNT);
371 		matched = (mcount > 0);
372 		ecount = nsp-1;
373 		if (ecount > mcount) ecount = mcount;
374 		for (i = 0;  i < ecount*2; )
375 		{
376 			if (ovector[i] < 0 || ovector[i+1] < 0)
377 			{
378 				*sp++ = *ep++ = line;
379 				i += 2;
380 			} else
381 			{
382 				*sp++ = line + ovector[i++];
383 				*ep++ = line + ovector[i++];
384 			}
385 		}
386 	}
387 #endif
388 #if HAVE_PCRE2
389 	{
390 		int flags = (notbol) ? PCRE2_NOTBOL : 0;
391 		pcre2_match_data *md = pcre2_match_data_create(nsp-1, NULL);
392 		int mcount = pcre2_match(pattern, (PCRE2_SPTR)line, line_len,
393 			0, flags, md, NULL);
394 		matched = (mcount > 0);
395 		if (matched)
396 		{
397 			PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
398 			int i;
399 			int ecount = nsp-1;
400 			if (ecount > mcount) ecount = mcount;
401 			for (i = 0;  i < ecount*2; )
402 			{
403 				if (ovector[i] < 0 || ovector[i+1] < 0)
404 				{
405 					*sp++ = *ep++ = line;
406 					i += 2;
407 				} else
408 				{
409 					*sp++ = line + ovector[i++];
410 					*ep++ = line + ovector[i++];
411 				}
412 			}
413 		}
414 		pcre2_match_data_free(md);
415 	}
416 #endif
417 #if HAVE_RE_COMP
418 	matched = (re_exec(line) == 1);
419 	/*
420 	 * re_exec doesn't seem to provide a way to get the matched string.
421 	 */
422 #endif
423 #if HAVE_REGCMP
424 	matched = ((*ep++ = regex(pattern, line)) != NULL);
425 	if (matched)
426 		*sp++ = __loc1;
427 #endif
428 #if HAVE_V8_REGCOMP
429 #if HAVE_REGEXEC2
430 	matched = regexec2(pattern, line, notbol);
431 #else
432 	matched = regexec(pattern, line);
433 #endif
434 	if (matched)
435 	{
436 		*sp++ = pattern->startp[0];
437 		*ep++ = pattern->endp[0];
438 	}
439 #endif
440 	}
441 	*sp = *ep = NULL;
442 	matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
443 			((search_type & SRCH_NO_MATCH) && !matched);
444 	return (matched);
445 }
446 
match_pattern(PATTERN_TYPE pattern,char * tpattern,char * line,int line_len,char ** sp,char ** ep,int nsp,int notbol,int search_type)447 public int match_pattern(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type)
448 {
449 	int matched = match_pattern1(pattern, tpattern, line, line_len, sp, ep, nsp, notbol, search_type);
450 	int i;
451 	for (i = 1;  i <= NUM_SEARCH_COLORS;  i++)
452 	{
453 		if ((search_type & SRCH_SUBSEARCH(i)) && ep[i] == sp[i])
454 			matched = 0;
455 	}
456 	return matched;
457 }
458 
459 /*
460  * Return the name of the pattern matching library.
461  */
pattern_lib_name(void)462 public char * pattern_lib_name(void)
463 {
464 #if HAVE_GNU_REGEX
465 	return ("GNU");
466 #else
467 #if HAVE_POSIX_REGCOMP
468 	return ("POSIX");
469 #else
470 #if HAVE_PCRE2
471 	return ("PCRE2");
472 #else
473 #if HAVE_PCRE
474 	return ("PCRE");
475 #else
476 #if HAVE_RE_COMP
477 	return ("BSD");
478 #else
479 #if HAVE_REGCMP
480 	return ("V8");
481 #else
482 #if HAVE_V8_REGCOMP
483 	return ("Spencer V8");
484 #else
485 	return ("no");
486 #endif
487 #endif
488 #endif
489 #endif
490 #endif
491 #endif
492 #endif
493 }
494