1 /*
2 * Copyright (C) 1984-2024 Mark Nudelman
3 *
4 * You may distribute under the terms of either the GNU General Public
5 * License or the Less License, as specified in the README file.
6 *
7 * For more information, see the README file.
8 */
9
10 /*
11 * Routines to do pattern matching.
12 */
13
14 #include "less.h"
15
16 extern int caseless;
17 extern int is_caseless;
18 extern int utf_mode;
19
20 /*
21 * Compile a search pattern, for future use by match_pattern.
22 */
compile_pattern2(constant char * pattern,int search_type,PATTERN_TYPE * comp_pattern,int show_error)23 static int compile_pattern2(constant char *pattern, int search_type, PATTERN_TYPE *comp_pattern, int show_error)
24 {
25 if (search_type & SRCH_NO_REGEX)
26 return (0);
27 {
28 #if HAVE_GNU_REGEX
29 struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
30 ecalloc(1, sizeof(struct re_pattern_buffer));
31 re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
32 if (re_compile_pattern(pattern, strlen(pattern), comp))
33 {
34 free(comp);
35 if (show_error)
36 error("Invalid pattern", NULL_PARG);
37 return (-1);
38 }
39 if (*comp_pattern != NULL)
40 {
41 regfree(*comp_pattern);
42 free(*comp_pattern);
43 }
44 *comp_pattern = comp;
45 #endif
46 #if HAVE_POSIX_REGCOMP
47 regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
48 if (regcomp(comp, pattern, REGCOMP_FLAG | (is_caseless ? REG_ICASE : 0)))
49 {
50 free(comp);
51 if (show_error)
52 error("Invalid pattern", NULL_PARG);
53 return (-1);
54 }
55 if (*comp_pattern != NULL)
56 {
57 regfree(*comp_pattern);
58 free(*comp_pattern);
59 }
60 *comp_pattern = comp;
61 #endif
62 #if HAVE_PCRE
63 constant char *errstring;
64 int erroffset;
65 PARG parg;
66 pcre *comp = pcre_compile(pattern,
67 ((utf_mode) ? PCRE_UTF8 | PCRE_NO_UTF8_CHECK : 0) |
68 (is_caseless ? PCRE_CASELESS : 0),
69 &errstring, &erroffset, NULL);
70 if (comp == NULL)
71 {
72 parg.p_string = (char *) errstring;
73 if (show_error)
74 error("%s", &parg);
75 return (-1);
76 }
77 *comp_pattern = comp;
78 #endif
79 #if HAVE_PCRE2
80 int errcode;
81 PCRE2_SIZE erroffset;
82 PARG parg;
83 pcre2_code *comp = pcre2_compile((PCRE2_SPTR)pattern, strlen(pattern),
84 (is_caseless ? PCRE2_CASELESS : 0),
85 &errcode, &erroffset, NULL);
86 if (comp == NULL)
87 {
88 if (show_error)
89 {
90 char msg[160];
91 pcre2_get_error_message(errcode, (PCRE2_UCHAR*)msg, sizeof(msg));
92 parg.p_string = msg;
93 error("%s", &parg);
94 }
95 return (-1);
96 }
97 *comp_pattern = comp;
98 #endif
99 #if HAVE_RE_COMP
100 PARG parg;
101 if ((parg.p_string = re_comp(pattern)) != NULL)
102 {
103 if (show_error)
104 error("%s", &parg);
105 return (-1);
106 }
107 *comp_pattern = 1;
108 #endif
109 #if HAVE_REGCMP
110 char *comp;
111 if ((comp = regcmp(pattern, 0)) == NULL)
112 {
113 if (show_error)
114 error("Invalid pattern", NULL_PARG);
115 return (-1);
116 }
117 if (comp_pattern != NULL)
118 free(*comp_pattern);
119 *comp_pattern = comp;
120 #endif
121 #if HAVE_V8_REGCOMP
122 struct regexp *comp;
123 reg_show_error = show_error;
124 comp = regcomp(pattern);
125 reg_show_error = 1;
126 if (comp == NULL)
127 {
128 /*
129 * regcomp has already printed an error message
130 * via regerror().
131 */
132 return (-1);
133 }
134 if (*comp_pattern != NULL)
135 free(*comp_pattern);
136 *comp_pattern = comp;
137 #endif
138 }
139 return (0);
140 }
141
142 /*
143 * Like compile_pattern2, but convert the pattern to lowercase if necessary.
144 */
compile_pattern(constant char * pattern,int search_type,int show_error,PATTERN_TYPE * comp_pattern)145 public int compile_pattern(constant char *pattern, int search_type, int show_error, PATTERN_TYPE *comp_pattern)
146 {
147 int result;
148
149 if (caseless != OPT_ONPLUS || (re_handles_caseless && !(search_type & SRCH_NO_REGEX)))
150 {
151 result = compile_pattern2(pattern, search_type, comp_pattern, show_error);
152 } else
153 {
154 char *cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
155 cvt_text(cvt_pattern, pattern, NULL, NULL, CVT_TO_LC);
156 result = compile_pattern2(cvt_pattern, search_type, comp_pattern, show_error);
157 free(cvt_pattern);
158 }
159 return (result);
160 }
161
162 /*
163 * Forget that we have a compiled pattern.
164 */
uncompile_pattern(PATTERN_TYPE * pattern)165 public void uncompile_pattern(PATTERN_TYPE *pattern)
166 {
167 #if HAVE_GNU_REGEX
168 if (*pattern != NULL)
169 {
170 regfree(*pattern);
171 free(*pattern);
172 }
173 *pattern = NULL;
174 #endif
175 #if HAVE_POSIX_REGCOMP
176 if (*pattern != NULL)
177 {
178 regfree(*pattern);
179 free(*pattern);
180 }
181 *pattern = NULL;
182 #endif
183 #if HAVE_PCRE
184 if (*pattern != NULL)
185 pcre_free(*pattern);
186 *pattern = NULL;
187 #endif
188 #if HAVE_PCRE2
189 if (*pattern != NULL)
190 pcre2_code_free(*pattern);
191 *pattern = NULL;
192 #endif
193 #if HAVE_RE_COMP
194 *pattern = 0;
195 #endif
196 #if HAVE_REGCMP
197 if (*pattern != NULL)
198 free(*pattern);
199 *pattern = NULL;
200 #endif
201 #if HAVE_V8_REGCOMP
202 if (*pattern != NULL)
203 free(*pattern);
204 *pattern = NULL;
205 #endif
206 }
207
208 #if 0
209 /*
210 * Can a pattern be successfully compiled?
211 */
212 public int valid_pattern(char *pattern)
213 {
214 PATTERN_TYPE comp_pattern;
215 int result;
216
217 SET_NULL_PATTERN(comp_pattern);
218 result = compile_pattern2(pattern, 0, &comp_pattern, 0);
219 if (result != 0)
220 return (0);
221 uncompile_pattern(&comp_pattern);
222 return (1);
223 }
224 #endif
225
226 /*
227 * Is a compiled pattern null?
228 */
is_null_pattern(PATTERN_TYPE pattern)229 public lbool is_null_pattern(PATTERN_TYPE pattern)
230 {
231 #if HAVE_GNU_REGEX
232 return (pattern == NULL);
233 #endif
234 #if HAVE_POSIX_REGCOMP
235 return (pattern == NULL);
236 #endif
237 #if HAVE_PCRE
238 return (pattern == NULL);
239 #endif
240 #if HAVE_PCRE2
241 return (pattern == NULL);
242 #endif
243 #if HAVE_RE_COMP
244 return (pattern == 0);
245 #endif
246 #if HAVE_REGCMP
247 return (pattern == NULL);
248 #endif
249 #if HAVE_V8_REGCOMP
250 return (pattern == NULL);
251 #endif
252 #if NO_REGEX
253 return (pattern == NULL);
254 #endif
255 }
256 /*
257 * Simple pattern matching function.
258 * It supports no metacharacters like *, etc.
259 */
match(constant char * pattern,size_t pattern_len,constant char * buf,int buf_len,constant char *** sp,constant char *** ep,int nsubs)260 static int match(constant char *pattern, size_t pattern_len, constant char *buf, int buf_len, constant char ***sp, constant char ***ep, int nsubs)
261 {
262 constant char *pp;
263 constant char *lp;
264 constant char *pattern_end = pattern + pattern_len;
265 constant char *buf_end = buf + buf_len;
266
267 (void) nsubs;
268 for ( ; buf < buf_end; buf++)
269 {
270 for (pp = pattern, lp = buf; ; pp++, lp++)
271 {
272 char cp = *pp;
273 char cl = *lp;
274 if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp))
275 cp = ASCII_TO_LOWER(cp);
276 if (cp != cl)
277 break;
278 if (pp == pattern_end || lp == buf_end)
279 break;
280 }
281 if (pp == pattern_end)
282 {
283 *(*sp)++ = buf;
284 *(*ep)++ = lp;
285 return (1);
286 }
287 }
288 **sp = **ep = NULL;
289 return (0);
290 }
291
292 /*
293 * Perform a pattern match with the previously compiled pattern.
294 * Set sp[0] and ep[0] to the start and end of the matched string.
295 * Set sp[i] and ep[i] to the start and end of the i-th matched subpattern.
296 * Subpatterns are defined by parentheses in the regex language.
297 */
match_pattern1(PATTERN_TYPE pattern,constant char * tpattern,constant char * line,size_t aline_len,constant char ** sp,constant char ** ep,int nsp,int notbol,int search_type)298 static int match_pattern1(PATTERN_TYPE pattern, constant char *tpattern, constant char *line, size_t aline_len, constant char **sp, constant char **ep, int nsp, int notbol, int search_type)
299 {
300 int matched;
301 int line_len = (int) aline_len; /*{{type-issue}}*/
302
303 #if NO_REGEX
304 search_type |= SRCH_NO_REGEX;
305 #endif
306 if (search_type & SRCH_NO_REGEX)
307 matched = match(tpattern, strlen(tpattern), line, line_len, &sp, &ep, nsp);
308 else
309 {
310 #if HAVE_GNU_REGEX
311 {
312 struct re_registers search_regs;
313 pattern->not_bol = notbol;
314 pattern->regs_allocated = REGS_UNALLOCATED;
315 matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0;
316 if (matched)
317 {
318 *sp++ = line + search_regs.start[0];
319 *ep++ = line + search_regs.end[0];
320 }
321 }
322 #endif
323 #if HAVE_POSIX_REGCOMP
324 {
325 #define RM_COUNT (NUM_SEARCH_COLORS+2)
326 regmatch_t rm[RM_COUNT];
327 int flags = (notbol) ? REG_NOTBOL : 0;
328 #ifdef REG_STARTEND
329 flags |= REG_STARTEND;
330 rm[0].rm_so = 0;
331 rm[0].rm_eo = line_len;
332 #endif
333 matched = !regexec(pattern, line, RM_COUNT, rm, flags);
334 if (matched)
335 {
336 int i;
337 int ecount;
338 for (ecount = RM_COUNT; ecount > 0; ecount--)
339 if (rm[ecount-1].rm_so >= 0)
340 break;
341 if (ecount >= nsp)
342 ecount = nsp-1;
343 for (i = 0; i < ecount; i++)
344 {
345 if (rm[i].rm_so < 0)
346 {
347 *sp++ = *ep++ = line;
348 } else
349 {
350 #ifndef __WATCOMC__
351 *sp++ = line + rm[i].rm_so;
352 *ep++ = line + rm[i].rm_eo;
353 #else
354 *sp++ = rm[i].rm_sp;
355 *ep++ = rm[i].rm_ep;
356 #endif
357 }
358 }
359 }
360 }
361 #endif
362 #if HAVE_PCRE
363 {
364 #define OVECTOR_COUNT ((3*NUM_SEARCH_COLORS)+3)
365 int ovector[OVECTOR_COUNT];
366 int flags = (notbol) ? PCRE_NOTBOL : 0;
367 int i;
368 int ecount;
369 int mcount = pcre_exec(pattern, NULL, line, line_len,
370 0, flags, ovector, OVECTOR_COUNT);
371 matched = (mcount > 0);
372 ecount = nsp-1;
373 if (ecount > mcount) ecount = mcount;
374 for (i = 0; i < ecount*2; )
375 {
376 if (ovector[i] < 0 || ovector[i+1] < 0)
377 {
378 *sp++ = *ep++ = line;
379 i += 2;
380 } else
381 {
382 *sp++ = line + ovector[i++];
383 *ep++ = line + ovector[i++];
384 }
385 }
386 }
387 #endif
388 #if HAVE_PCRE2
389 {
390 int flags = (notbol) ? PCRE2_NOTBOL : 0;
391 pcre2_match_data *md = pcre2_match_data_create(nsp-1, NULL);
392 int mcount = pcre2_match(pattern, (PCRE2_SPTR)line, line_len,
393 0, flags, md, NULL);
394 matched = (mcount > 0);
395 if (matched)
396 {
397 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
398 int i;
399 int ecount = nsp-1;
400 if (ecount > mcount) ecount = mcount;
401 for (i = 0; i < ecount*2; )
402 {
403 if (ovector[i] < 0 || ovector[i+1] < 0)
404 {
405 *sp++ = *ep++ = line;
406 i += 2;
407 } else
408 {
409 *sp++ = line + ovector[i++];
410 *ep++ = line + ovector[i++];
411 }
412 }
413 }
414 pcre2_match_data_free(md);
415 }
416 #endif
417 #if HAVE_RE_COMP
418 matched = (re_exec(line) == 1);
419 /*
420 * re_exec doesn't seem to provide a way to get the matched string.
421 */
422 #endif
423 #if HAVE_REGCMP
424 matched = ((*ep++ = regex(pattern, line)) != NULL);
425 if (matched)
426 *sp++ = __loc1;
427 #endif
428 #if HAVE_V8_REGCOMP
429 #if HAVE_REGEXEC2
430 matched = regexec2(pattern, line, notbol);
431 #else
432 matched = regexec(pattern, line);
433 #endif
434 if (matched)
435 {
436 *sp++ = pattern->startp[0];
437 *ep++ = pattern->endp[0];
438 }
439 #endif
440 }
441 *sp = *ep = NULL;
442 matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
443 ((search_type & SRCH_NO_MATCH) && !matched);
444 return (matched);
445 }
446
match_pattern(PATTERN_TYPE pattern,constant char * tpattern,constant char * line,size_t line_len,constant char ** sp,constant char ** ep,int nsp,int notbol,int search_type)447 public int match_pattern(PATTERN_TYPE pattern, constant char *tpattern, constant char *line, size_t line_len, constant char **sp, constant char **ep, int nsp, int notbol, int search_type)
448 {
449 int matched = match_pattern1(pattern, tpattern, line, line_len, sp, ep, nsp, notbol, search_type);
450 int i;
451 for (i = 1; i <= NUM_SEARCH_COLORS; i++)
452 {
453 if ((search_type & SRCH_SUBSEARCH(i)) && ep[i] == sp[i])
454 matched = 0;
455 }
456 return matched;
457 }
458
459 /*
460 * Return the name of the pattern matching library.
461 */
pattern_lib_name(void)462 public constant char * pattern_lib_name(void)
463 {
464 #if HAVE_GNU_REGEX
465 return ("GNU");
466 #else
467 #if HAVE_POSIX_REGCOMP
468 return ("POSIX");
469 #else
470 #if HAVE_PCRE2
471 return ("PCRE2");
472 #else
473 #if HAVE_PCRE
474 return ("PCRE");
475 #else
476 #if HAVE_RE_COMP
477 return ("BSD");
478 #else
479 #if HAVE_REGCMP
480 return ("V8");
481 #else
482 #if HAVE_V8_REGCOMP
483 return ("Spencer V8");
484 #else
485 return ("no");
486 #endif
487 #endif
488 #endif
489 #endif
490 #endif
491 #endif
492 #endif
493 }
494