1 /* $NetBSD: pattern.c,v 1.4 2023/10/06 05:49:49 simonb Exp $ */
2
3 /*
4 * Copyright (C) 1984-2023 Mark Nudelman
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Less License, as specified in the README file.
8 *
9 * For more information, see the README file.
10 */
11
12 /*
13 * Routines to do pattern matching.
14 */
15
16 #include "less.h"
17
18 extern int caseless;
19 extern int is_caseless;
20 extern int utf_mode;
21
22 /*
23 * Compile a search pattern, for future use by match_pattern.
24 */
compile_pattern2(char * pattern,int search_type,PATTERN_TYPE * comp_pattern,int show_error)25 static int compile_pattern2(char *pattern, int search_type, PATTERN_TYPE *comp_pattern, int show_error)
26 {
27 if (search_type & SRCH_NO_REGEX)
28 return (0);
29 {
30 #if HAVE_GNU_REGEX
31 struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
32 ecalloc(1, sizeof(struct re_pattern_buffer));
33 re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
34 if (re_compile_pattern(pattern, strlen(pattern), comp))
35 {
36 free(comp);
37 if (show_error)
38 error("Invalid pattern", NULL_PARG);
39 return (-1);
40 }
41 if (*comp_pattern != NULL)
42 {
43 regfree(*comp_pattern);
44 free(*comp_pattern);
45 }
46 *comp_pattern = comp;
47 #endif
48 #if HAVE_POSIX_REGCOMP
49 regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
50 if (regcomp(comp, pattern, REGCOMP_FLAG | (is_caseless ? REG_ICASE : 0)))
51 {
52 free(comp);
53 if (show_error)
54 error("Invalid pattern", NULL_PARG);
55 return (-1);
56 }
57 if (*comp_pattern != NULL)
58 {
59 regfree(*comp_pattern);
60 free(*comp_pattern);
61 }
62 *comp_pattern = comp;
63 #endif
64 #if HAVE_PCRE
65 constant char *errstring;
66 int erroffset;
67 PARG parg;
68 pcre *comp = pcre_compile(pattern,
69 ((utf_mode) ? PCRE_UTF8 | PCRE_NO_UTF8_CHECK : 0) |
70 (is_caseless ? PCRE_CASELESS : 0),
71 &errstring, &erroffset, NULL);
72 if (comp == NULL)
73 {
74 parg.p_string = (char *) errstring;
75 if (show_error)
76 error("%s", &parg);
77 return (-1);
78 }
79 *comp_pattern = comp;
80 #endif
81 #if HAVE_PCRE2
82 int errcode;
83 PCRE2_SIZE erroffset;
84 PARG parg;
85 pcre2_code *comp = pcre2_compile((PCRE2_SPTR)pattern, strlen(pattern),
86 (is_caseless ? PCRE2_CASELESS : 0),
87 &errcode, &erroffset, NULL);
88 if (comp == NULL)
89 {
90 if (show_error)
91 {
92 char msg[160];
93 pcre2_get_error_message(errcode, (PCRE2_UCHAR*)msg, sizeof(msg));
94 parg.p_string = msg;
95 error("%s", &parg);
96 }
97 return (-1);
98 }
99 *comp_pattern = comp;
100 #endif
101 #if HAVE_RE_COMP
102 PARG parg;
103 if ((parg.p_string = re_comp(pattern)) != NULL)
104 {
105 if (show_error)
106 error("%s", &parg);
107 return (-1);
108 }
109 *comp_pattern = 1;
110 #endif
111 #if HAVE_REGCMP
112 char *comp;
113 if ((comp = regcmp(pattern, 0)) == NULL)
114 {
115 if (show_error)
116 error("Invalid pattern", NULL_PARG);
117 return (-1);
118 }
119 if (comp_pattern != NULL)
120 free(*comp_pattern);
121 *comp_pattern = comp;
122 #endif
123 #if HAVE_V8_REGCOMP
124 struct regexp *comp;
125 reg_show_error = show_error;
126 comp = regcomp(pattern);
127 reg_show_error = 1;
128 if (comp == NULL)
129 {
130 /*
131 * regcomp has already printed an error message
132 * via regerror().
133 */
134 return (-1);
135 }
136 if (*comp_pattern != NULL)
137 free(*comp_pattern);
138 *comp_pattern = comp;
139 #endif
140 }
141 return (0);
142 }
143
144 /*
145 * Like compile_pattern2, but convert the pattern to lowercase if necessary.
146 */
compile_pattern(char * pattern,int search_type,int show_error,PATTERN_TYPE * comp_pattern)147 public int compile_pattern(char *pattern, int search_type, int show_error, PATTERN_TYPE *comp_pattern)
148 {
149 char *cvt_pattern;
150 int result;
151
152 if (caseless != OPT_ONPLUS || (re_handles_caseless && !(search_type & SRCH_NO_REGEX)))
153 cvt_pattern = pattern;
154 else
155 {
156 cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
157 cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC);
158 }
159 result = compile_pattern2(cvt_pattern, search_type, comp_pattern, show_error);
160 if (cvt_pattern != pattern)
161 free(cvt_pattern);
162 return (result);
163 }
164
165 /*
166 * Forget that we have a compiled pattern.
167 */
uncompile_pattern(PATTERN_TYPE * pattern)168 public void uncompile_pattern(PATTERN_TYPE *pattern)
169 {
170 #if HAVE_GNU_REGEX
171 if (*pattern != NULL)
172 {
173 regfree(*pattern);
174 free(*pattern);
175 }
176 *pattern = NULL;
177 #endif
178 #if HAVE_POSIX_REGCOMP
179 if (*pattern != NULL)
180 {
181 regfree(*pattern);
182 free(*pattern);
183 }
184 *pattern = NULL;
185 #endif
186 #if HAVE_PCRE
187 if (*pattern != NULL)
188 pcre_free(*pattern);
189 *pattern = NULL;
190 #endif
191 #if HAVE_PCRE2
192 if (*pattern != NULL)
193 pcre2_code_free(*pattern);
194 *pattern = NULL;
195 #endif
196 #if HAVE_RE_COMP
197 *pattern = 0;
198 #endif
199 #if HAVE_REGCMP
200 if (*pattern != NULL)
201 free(*pattern);
202 *pattern = NULL;
203 #endif
204 #if HAVE_V8_REGCOMP
205 if (*pattern != NULL)
206 free(*pattern);
207 *pattern = NULL;
208 #endif
209 }
210
211 #if 0
212 /*
213 * Can a pattern be successfully compiled?
214 */
215 public int valid_pattern(char *pattern)
216 {
217 PATTERN_TYPE comp_pattern;
218 int result;
219
220 SET_NULL_PATTERN(comp_pattern);
221 result = compile_pattern2(pattern, 0, &comp_pattern, 0);
222 if (result != 0)
223 return (0);
224 uncompile_pattern(&comp_pattern);
225 return (1);
226 }
227 #endif
228
229 /*
230 * Is a compiled pattern null?
231 */
is_null_pattern(PATTERN_TYPE pattern)232 public int is_null_pattern(PATTERN_TYPE pattern)
233 {
234 #if HAVE_GNU_REGEX
235 return (pattern == NULL);
236 #endif
237 #if HAVE_POSIX_REGCOMP
238 return (pattern == NULL);
239 #endif
240 #if HAVE_PCRE
241 return (pattern == NULL);
242 #endif
243 #if HAVE_PCRE2
244 return (pattern == NULL);
245 #endif
246 #if HAVE_RE_COMP
247 return (pattern == 0);
248 #endif
249 #if HAVE_REGCMP
250 return (pattern == NULL);
251 #endif
252 #if HAVE_V8_REGCOMP
253 return (pattern == NULL);
254 #endif
255 #if NO_REGEX
256 return (pattern == NULL);
257 #endif
258 }
259 /*
260 * Simple pattern matching function.
261 * It supports no metacharacters like *, etc.
262 */
match(char * pattern,int pattern_len,char * buf,int buf_len,char *** sp,char *** ep,int nsubs)263 static int match(char *pattern, int pattern_len, char *buf, int buf_len, char ***sp, char ***ep, int nsubs)
264 {
265 char *pp, *lp;
266 char *pattern_end = pattern + pattern_len;
267 char *buf_end = buf + buf_len;
268
269 for ( ; buf < buf_end; buf++)
270 {
271 for (pp = pattern, lp = buf; ; pp++, lp++)
272 {
273 char cp = *pp;
274 char cl = *lp;
275 if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp))
276 cp = ASCII_TO_LOWER(cp);
277 if (cp != cl)
278 break;
279 if (pp == pattern_end || lp == buf_end)
280 break;
281 }
282 if (pp == pattern_end)
283 {
284 *(*sp)++ = buf;
285 *(*ep)++ = lp;
286 return (1);
287 }
288 }
289 **sp = **ep = NULL;
290 return (0);
291 }
292
293 /*
294 * Perform a pattern match with the previously compiled pattern.
295 * Set sp[0] and ep[0] to the start and end of the matched string.
296 * Set sp[i] and ep[i] to the start and end of the i-th matched subpattern.
297 * Subpatterns are defined by parentheses in the regex language.
298 */
match_pattern1(PATTERN_TYPE pattern,char * tpattern,char * line,int line_len,char ** sp,char ** ep,int nsp,int notbol,int search_type)299 static int match_pattern1(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type)
300 {
301 int matched;
302
303 #if NO_REGEX
304 search_type |= SRCH_NO_REGEX;
305 #endif
306 if (search_type & SRCH_NO_REGEX)
307 matched = match(tpattern, strlen(tpattern), line, line_len, &sp, &ep, nsp);
308 else
309 {
310 #if HAVE_GNU_REGEX
311 {
312 struct re_registers search_regs;
313 pattern->not_bol = notbol;
314 pattern->regs_allocated = REGS_UNALLOCATED;
315 matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0;
316 if (matched)
317 {
318 *sp++ = line + search_regs.start[0];
319 *ep++ = line + search_regs.end[0];
320 }
321 }
322 #endif
323 #if HAVE_POSIX_REGCOMP
324 {
325 #define RM_COUNT (NUM_SEARCH_COLORS+2)
326 regmatch_t rm[RM_COUNT];
327 int flags = (notbol) ? REG_NOTBOL : 0;
328 #ifdef REG_STARTEND
329 flags |= REG_STARTEND;
330 rm[0].rm_so = 0;
331 rm[0].rm_eo = line_len;
332 #endif
333 matched = !regexec(pattern, line, RM_COUNT, rm, flags);
334 if (matched)
335 {
336 int i;
337 int ecount;
338 for (ecount = RM_COUNT; ecount > 0; ecount--)
339 if (rm[ecount-1].rm_so >= 0)
340 break;
341 if (ecount >= nsp)
342 ecount = nsp-1;
343 for (i = 0; i < ecount; i++)
344 {
345 if (rm[i].rm_so < 0)
346 {
347 *sp++ = *ep++ = line;
348 } else
349 {
350 #ifndef __WATCOMC__
351 *sp++ = line + rm[i].rm_so;
352 *ep++ = line + rm[i].rm_eo;
353 #else
354 *sp++ = rm[i].rm_sp;
355 *ep++ = rm[i].rm_ep;
356 #endif
357 }
358 }
359 }
360 }
361 #endif
362 #if HAVE_PCRE
363 {
364 #define OVECTOR_COUNT ((3*NUM_SEARCH_COLORS)+3)
365 int ovector[OVECTOR_COUNT];
366 int flags = (notbol) ? PCRE_NOTBOL : 0;
367 int i;
368 int ecount;
369 int mcount = pcre_exec(pattern, NULL, line, line_len,
370 0, flags, ovector, OVECTOR_COUNT);
371 matched = (mcount > 0);
372 ecount = nsp-1;
373 if (ecount > mcount) ecount = mcount;
374 for (i = 0; i < ecount*2; )
375 {
376 if (ovector[i] < 0 || ovector[i+1] < 0)
377 {
378 *sp++ = *ep++ = line;
379 i += 2;
380 } else
381 {
382 *sp++ = line + ovector[i++];
383 *ep++ = line + ovector[i++];
384 }
385 }
386 }
387 #endif
388 #if HAVE_PCRE2
389 {
390 int flags = (notbol) ? PCRE2_NOTBOL : 0;
391 pcre2_match_data *md = pcre2_match_data_create(nsp-1, NULL);
392 int mcount = pcre2_match(pattern, (PCRE2_SPTR)line, line_len,
393 0, flags, md, NULL);
394 matched = (mcount > 0);
395 if (matched)
396 {
397 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
398 int i;
399 int ecount = nsp-1;
400 if (ecount > mcount) ecount = mcount;
401 for (i = 0; i < ecount*2; )
402 {
403 if (ovector[i] < 0 || ovector[i+1] < 0)
404 {
405 *sp++ = *ep++ = line;
406 i += 2;
407 } else
408 {
409 *sp++ = line + ovector[i++];
410 *ep++ = line + ovector[i++];
411 }
412 }
413 }
414 pcre2_match_data_free(md);
415 }
416 #endif
417 #if HAVE_RE_COMP
418 matched = (re_exec(line) == 1);
419 /*
420 * re_exec doesn't seem to provide a way to get the matched string.
421 */
422 #endif
423 #if HAVE_REGCMP
424 matched = ((*ep++ = regex(pattern, line)) != NULL);
425 if (matched)
426 *sp++ = __loc1;
427 #endif
428 #if HAVE_V8_REGCOMP
429 #if HAVE_REGEXEC2
430 matched = regexec2(pattern, line, notbol);
431 #else
432 matched = regexec(pattern, line);
433 #endif
434 if (matched)
435 {
436 *sp++ = pattern->startp[0];
437 *ep++ = pattern->endp[0];
438 }
439 #endif
440 }
441 *sp = *ep = NULL;
442 matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
443 ((search_type & SRCH_NO_MATCH) && !matched);
444 return (matched);
445 }
446
match_pattern(PATTERN_TYPE pattern,char * tpattern,char * line,int line_len,char ** sp,char ** ep,int nsp,int notbol,int search_type)447 public int match_pattern(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type)
448 {
449 int matched = match_pattern1(pattern, tpattern, line, line_len, sp, ep, nsp, notbol, search_type);
450 int i;
451 for (i = 1; i <= NUM_SEARCH_COLORS; i++)
452 {
453 if ((search_type & SRCH_SUBSEARCH(i)) && ep[i] == sp[i])
454 matched = 0;
455 }
456 return matched;
457 }
458
459 /*
460 * Return the name of the pattern matching library.
461 */
pattern_lib_name(void)462 public char * pattern_lib_name(void)
463 {
464 #if HAVE_GNU_REGEX
465 return ("GNU");
466 #else
467 #if HAVE_POSIX_REGCOMP
468 return ("POSIX");
469 #else
470 #if HAVE_PCRE2
471 return ("PCRE2");
472 #else
473 #if HAVE_PCRE
474 return ("PCRE");
475 #else
476 #if HAVE_RE_COMP
477 return ("BSD");
478 #else
479 #if HAVE_REGCMP
480 return ("V8");
481 #else
482 #if HAVE_V8_REGCOMP
483 return ("Spencer V8");
484 #else
485 return ("no");
486 #endif
487 #endif
488 #endif
489 #endif
490 #endif
491 #endif
492 #endif
493 }
494