1 /* $NetBSD: dict_pcre.c,v 1.5 2023/12/23 20:30:46 christos Exp $ */
2
3 /*++
4 /* NAME
5 /* dict_pcre 3
6 /* SUMMARY
7 /* dictionary manager interface to PCRE regular expression library
8 /* SYNOPSIS
9 /* #include <dict_pcre.h>
10 /*
11 /* DICT *dict_pcre_open(name, dummy, dict_flags)
12 /* const char *name;
13 /* int dummy;
14 /* int dict_flags;
15 /* DESCRIPTION
16 /* dict_pcre_open() opens the named file and compiles the contained
17 /* regular expressions. The result object can be used to match strings
18 /* against the table.
19 /* SEE ALSO
20 /* dict(3) generic dictionary manager
21 /* pcre_table(5) PCRE table configuration
22 /* AUTHOR(S)
23 /* Andrew McNamara
24 /* andrewm@connect.com.au
25 /* connect.com.au Pty. Ltd.
26 /* Level 3, 213 Miller St
27 /* North Sydney, NSW, Australia
28 /*
29 /* Wietse Venema
30 /* IBM T.J. Watson Research
31 /* P.O. Box 704
32 /* Yorktown Heights, NY 10598, USA
33 /*
34 /* Wietse Venema
35 /* Google, Inc.
36 /* 111 8th Avenue
37 /* New York, NY 10011, USA
38 /*--*/
39
40 #include "sys_defs.h"
41
42 #ifdef HAS_PCRE
43
44 /* System library. */
45
46 #include <sys/stat.h>
47 #include <stdio.h> /* sprintf() prototype */
48 #include <stdlib.h>
49 #include <unistd.h>
50 #include <string.h>
51 #include <ctype.h>
52
53 #ifdef STRCASECMP_IN_STRINGS_H
54 #include <strings.h>
55 #endif
56
57 #if HAS_PCRE == 1
58 #include <pcre.h>
59 #elif HAS_PCRE == 2
60 #define PCRE2_CODE_UNIT_WIDTH 8
61 #include <pcre2.h>
62 #else
63 #error "define HAS_PCRE=2 or HAS_PCRE=1"
64 #endif
65
66 /* Utility library. */
67
68 #include "mymalloc.h"
69 #include "msg.h"
70 #include "safe.h"
71 #include "vstream.h"
72 #include "vstring.h"
73 #include "stringops.h"
74 #include "readlline.h"
75 #include "dict.h"
76 #include "dict_pcre.h"
77 #include "mac_parse.h"
78 #include "warn_stat.h"
79 #include "mvect.h"
80
81 /*
82 * Backwards compatibility.
83 */
84 #if HAS_PCRE == 1
85 /* PCRE Legacy JIT supprt. */
86 #ifdef PCRE_STUDY_JIT_COMPILE
87 #define DICT_PCRE_FREE_STUDY(x) pcre_free_study(x)
88 #else
89 #define DICT_PCRE_FREE_STUDY(x) pcre_free((char *) (x))
90 #endif
91
92 /* PCRE Compiled pattern. */
93 #define DICT_PCRE_CODE pcre
94 #define DICT_PCRE_CODE_FREE(x) myfree((void *) (x))
95
96 /* Old-style hints versus new-style match_data. */
97 #define DICT_PCRE_MATCH_HINT_TYPE pcre_extra *
98 #define DICT_PCRE_MATCH_HINT_NAME hints
99 #define DICT_PCRE_MATCH_HINT(x) ((x)->DICT_PCRE_MATCH_HINT_NAME)
100 #define DICT_PCRE_MATCH_HINT_FREE(x) do { \
101 if (DICT_PCRE_MATCH_HINT(x)) \
102 DICT_PCRE_FREE_STUDY(DICT_PCRE_MATCH_HINT(x)); \
103 } while (0)
104
105 /* PCRE Pattern options. */
106 #define DICT_PCRE_CASELESS PCRE_CASELESS
107 #define DICT_PCRE_MULTILINE PCRE_MULTILINE
108 #define DICT_PCRE_DOTALL PCRE_DOTALL
109 #define DICT_PCRE_EXTENDED PCRE_EXTENDED
110 #define DICT_PCRE_ANCHORED PCRE_ANCHORED
111 #define DICT_PCRE_DOLLAR_ENDONLY PCRE_DOLLAR_ENDONLY
112 #define DICT_PCRE_UNGREEDY PCRE_UNGREEDY
113 #define DICT_PCRE_EXTRA PCRE_EXTRA
114
115 /* PCRE Number of captures in pattern. */
116 #ifdef PCRE_INFO_CAPTURECOUNT
117 #define DICT_PCRE_CAPTURECOUNT_T int
118 #endif
119
120 #else /* HAS_PCRE */
121
122 /* PCRE2 Compiled pattern. */
123 #define DICT_PCRE_CODE pcre2_code
124 #define DICT_PCRE_CODE_FREE(x) pcre2_code_free(x)
125
126 /* PCRE2 Old-style hints versus new-style match_data. */
127 #define DICT_PCRE_MATCH_HINT_TYPE pcre2_match_data *
128 #define DICT_PCRE_MATCH_HINT_NAME match_data
129 #define DICT_PCRE_MATCH_HINT(x) ((x)->DICT_PCRE_MATCH_HINT_NAME)
130 #define DICT_PCRE_MATCH_HINT_FREE(x) \
131 pcre2_match_data_free(DICT_PCRE_MATCH_HINT(x))
132
133 /* PCRE2 Pattern options. */
134 #define DICT_PCRE_CASELESS PCRE2_CASELESS
135 #define DICT_PCRE_MULTILINE PCRE2_MULTILINE
136 #define DICT_PCRE_DOTALL PCRE2_DOTALL
137 #define DICT_PCRE_EXTENDED PCRE2_EXTENDED
138 #define DICT_PCRE_ANCHORED PCRE2_ANCHORED
139 #define DICT_PCRE_DOLLAR_ENDONLY PCRE2_DOLLAR_ENDONLY
140 #define DICT_PCRE_UNGREEDY PCRE2_UNGREEDY
141 #define DICT_PCRE_EXTRA 0
142
143 /* PCRE2 Number of captures in pattern. */
144 #define DICT_PCRE_CAPTURECOUNT_T uint32_t
145
146 #endif /* HAS_PCRE */
147
148 /*
149 * Support for IF/ENDIF based on an idea by Bert Driehuis.
150 */
151 #define DICT_PCRE_OP_MATCH 1 /* Match this regexp */
152 #define DICT_PCRE_OP_IF 2 /* Increase if/endif nesting on match */
153 #define DICT_PCRE_OP_ENDIF 3 /* Decrease if/endif nesting on match */
154
155 /*
156 * Max strings captured by regexp - essentially the max number of (..)
157 */
158 #if HAS_PCRE == 1
159 #define PCRE_MAX_CAPTURE 99
160 #endif
161
162 /*
163 * Regular expression before and after compilation.
164 */
165 typedef struct {
166 char *regexp; /* regular expression */
167 int options; /* options */
168 int match; /* positive or negative match */
169 } DICT_PCRE_REGEXP;
170
171 typedef struct {
172 DICT_PCRE_CODE *pattern; /* the compiled pattern */
173 DICT_PCRE_MATCH_HINT_TYPE DICT_PCRE_MATCH_HINT_NAME;
174 } DICT_PCRE_ENGINE;
175
176 /*
177 * Compiled generic rule, and subclasses that derive from it.
178 */
179 typedef struct DICT_PCRE_RULE {
180 int op; /* DICT_PCRE_OP_MATCH/IF/ENDIF */
181 int lineno; /* source file line number */
182 struct DICT_PCRE_RULE *next; /* next rule in dict */
183 } DICT_PCRE_RULE;
184
185 typedef struct {
186 DICT_PCRE_RULE rule; /* generic part */
187 DICT_PCRE_CODE *pattern; /* compiled pattern */
188 DICT_PCRE_MATCH_HINT_TYPE DICT_PCRE_MATCH_HINT_NAME;
189 char *replacement; /* replacement string */
190 int match; /* positive or negative match */
191 size_t max_sub; /* largest $number in replacement */
192 } DICT_PCRE_MATCH_RULE;
193
194 typedef struct {
195 DICT_PCRE_RULE rule; /* generic members */
196 DICT_PCRE_CODE *pattern; /* compiled pattern */
197 DICT_PCRE_MATCH_HINT_TYPE DICT_PCRE_MATCH_HINT_NAME;
198 int match; /* positive or negative match */
199 struct DICT_PCRE_RULE *endif_rule; /* matching endif rule */
200 } DICT_PCRE_IF_RULE;
201
202 /*
203 * PCRE map.
204 */
205 typedef struct {
206 DICT dict; /* generic members */
207 DICT_PCRE_RULE *head;
208 VSTRING *expansion_buf; /* lookup result */
209 } DICT_PCRE;
210
211 #if HAS_PCRE == 1
212 static int dict_pcre_init = 0; /* flag need to init pcre library */
213
214 #endif
215
216 /*
217 * Context for $number expansion callback.
218 */
219 typedef struct {
220 DICT_PCRE *dict_pcre; /* the dictionary handle */
221 #if HAS_PCRE == 1
222 DICT_PCRE_MATCH_RULE *match_rule; /* the rule we matched */
223 #endif
224 const char *lookup_string; /* string against which we match */
225 #if HAS_PCRE == 1
226 int offsets[PCRE_MAX_CAPTURE * 3]; /* Cut substrings */
227 #else /* HAS_PCRE */
228 PCRE2_SIZE *ovector; /* matched string offsets */
229 #endif /* HAS_PCRE */
230 int matches; /* Count of cuts */
231 } DICT_PCRE_EXPAND_CONTEXT;
232
233 /*
234 * Context for $number pre-scan callback.
235 */
236 typedef struct {
237 const char *mapname; /* name of regexp map */
238 int lineno; /* where in file */
239 size_t max_sub; /* Largest $n seen */
240 char *literal; /* constant result, $$ -> $ */
241 } DICT_PCRE_PRESCAN_CONTEXT;
242
243 /*
244 * Compatibility.
245 */
246 #ifndef MAC_PARSE_OK
247 #define MAC_PARSE_OK 0
248 #endif
249
250 /*
251 * Macros to make dense code more accessible.
252 */
253 #define NULL_STARTOFFSET (0)
254 #define NULL_EXEC_OPTIONS (0)
255
256 /* dict_pcre_expand - replace $number with matched text */
257
dict_pcre_expand(int type,VSTRING * buf,void * ptr)258 static int dict_pcre_expand(int type, VSTRING *buf, void *ptr)
259 {
260 DICT_PCRE_EXPAND_CONTEXT *ctxt = (DICT_PCRE_EXPAND_CONTEXT *) ptr;
261 DICT_PCRE *dict_pcre = ctxt->dict_pcre;
262 int n;
263
264 #if HAS_PCRE == 1
265 DICT_PCRE_MATCH_RULE *match_rule = ctxt->match_rule;
266 const char *pp;
267 int ret;
268
269 #else
270 PCRE2_SPTR start;
271 PCRE2_SIZE length;
272
273 #endif
274
275 /*
276 * Replace $0-${99} with strings cut from matched text.
277 */
278 if (type == MAC_PARSE_VARNAME) {
279 n = atoi(vstring_str(buf));
280 #if HAS_PCRE == 1
281 ret = pcre_get_substring(ctxt->lookup_string, ctxt->offsets,
282 ctxt->matches, n, &pp);
283 if (ret < 0) {
284 if (ret == PCRE_ERROR_NOSUBSTRING)
285 return (MAC_PARSE_UNDEF);
286 else
287 msg_fatal("pcre map %s, line %d: pcre_get_substring error: %d",
288 dict_pcre->dict.name, match_rule->rule.lineno, ret);
289 }
290 if (*pp == 0) {
291 myfree((void *) pp);
292 return (MAC_PARSE_UNDEF);
293 }
294 vstring_strcat(dict_pcre->expansion_buf, pp);
295 myfree((void *) pp);
296 return (MAC_PARSE_OK);
297 #else
298 start = (unsigned char *) ctxt->lookup_string + ctxt->ovector[2 * n];
299 length = ctxt->ovector[2 * n + 1] - ctxt->ovector[2 * n];
300 if (length == 0)
301 return (MAC_PARSE_UNDEF);
302 vstring_strncat(dict_pcre->expansion_buf, (char *) start, length);
303 return (MAC_PARSE_OK);
304 #endif
305 }
306
307 /*
308 * Straight text - duplicate with no substitution.
309 */
310 else {
311 vstring_strcat(dict_pcre->expansion_buf, vstring_str(buf));
312 return (MAC_PARSE_OK);
313 }
314 }
315
316 #if HAS_PCRE == 2
317
318 #define DICT_PCRE_GET_ERROR_BUF_LEN 256
319
320 /* dict_pcre_get_error - convert PCRE2 error number or text */
321
dict_pcre_get_error(VSTRING * buf,int errval)322 static char *dict_pcre_get_error(VSTRING *buf, int errval)
323 {
324 ssize_t len;
325
326 VSTRING_SPACE(buf, DICT_PCRE_GET_ERROR_BUF_LEN);
327 if ((len = pcre2_get_error_message(errval,
328 (unsigned char *) vstring_str(buf),
329 DICT_PCRE_GET_ERROR_BUF_LEN)) > 0) {
330 vstring_set_payload_size(buf, len);
331 } else
332 vstring_sprintf(buf, "unexpected pcre2 error code %d", errval);
333 return (vstring_str(buf));
334 }
335
336 #endif /* HAS_PCRE == 2 */
337
338 /* dict_pcre_exec_error - report matching error */
339
dict_pcre_exec_error(const char * mapname,int lineno,int errval)340 static void dict_pcre_exec_error(const char *mapname, int lineno, int errval)
341 {
342 #if HAS_PCRE == 1
343 switch (errval) {
344 case 0:
345 msg_warn("pcre map %s, line %d: too many (...)",
346 mapname, lineno);
347 return;
348 case PCRE_ERROR_NULL:
349 case PCRE_ERROR_BADOPTION:
350 msg_warn("pcre map %s, line %d: bad args to re_exec",
351 mapname, lineno);
352 return;
353 case PCRE_ERROR_BADMAGIC:
354 case PCRE_ERROR_UNKNOWN_NODE:
355 msg_warn("pcre map %s, line %d: corrupt compiled regexp",
356 mapname, lineno);
357 return;
358 #ifdef PCRE_ERROR_NOMEMORY
359 case PCRE_ERROR_NOMEMORY:
360 msg_warn("pcre map %s, line %d: out of memory",
361 mapname, lineno);
362 return;
363 #endif
364 #ifdef PCRE_ERROR_MATCHLIMIT
365 case PCRE_ERROR_MATCHLIMIT:
366 msg_warn("pcre map %s, line %d: backtracking limit exceeded",
367 mapname, lineno);
368 return;
369 #endif
370 #ifdef PCRE_ERROR_BADUTF8
371 case PCRE_ERROR_BADUTF8:
372 msg_warn("pcre map %s, line %d: bad UTF-8 sequence in search string",
373 mapname, lineno);
374 return;
375 #endif
376 #ifdef PCRE_ERROR_BADUTF8_OFFSET
377 case PCRE_ERROR_BADUTF8_OFFSET:
378 msg_warn("pcre map %s, line %d: bad UTF-8 start offset in search string",
379 mapname, lineno);
380 return;
381 #endif
382 default:
383 msg_warn("pcre map %s, line %d: unknown pcre_exec error: %d",
384 mapname, lineno, errval);
385 return;
386 }
387 #else /* HAS_PCRE */
388 VSTRING *buf = vstring_alloc(DICT_PCRE_GET_ERROR_BUF_LEN);
389
390 msg_warn("pcre map %s, line %d: %s", mapname, lineno,
391 dict_pcre_get_error(buf, errval));
392 vstring_free(buf);
393 #endif /* HAS_PCRE */
394 }
395
396 /*
397 * Inlined to reduce function call overhead in the time-critical loop.
398 */
399 #if HAS_PCRE == 1
400 #define DICT_PCRE_EXEC(ctxt, map, line, pattern, hints, match, str, len) \
401 ((ctxt).matches = pcre_exec((pattern), (hints), (str), (len), \
402 NULL_STARTOFFSET, NULL_EXEC_OPTIONS, \
403 (ctxt).offsets, PCRE_MAX_CAPTURE * 3), \
404 (ctxt).matches > 0 ? (match) : \
405 (ctxt).matches == PCRE_ERROR_NOMATCH ? !(match) : \
406 (dict_pcre_exec_error((map), (line), (ctxt).matches), 0))
407 #else
408 #define DICT_PCRE_EXEC(ctxt, map, line, pattern, match_data, match, str, len) \
409 ((ctxt).matches = pcre2_match((pattern), (unsigned char *) (str), (len), \
410 NULL_STARTOFFSET, NULL_EXEC_OPTIONS, \
411 (match_data), (pcre2_match_context *) 0), \
412 (ctxt).matches > 0 ? (match) : \
413 (ctxt).matches == PCRE2_ERROR_NOMATCH ? !(match) : \
414 (dict_pcre_exec_error((map), (line), (ctxt).matches), 0))
415 #endif
416
417 /* dict_pcre_lookup - match string and perform optional substitution */
418
dict_pcre_lookup(DICT * dict,const char * lookup_string)419 static const char *dict_pcre_lookup(DICT *dict, const char *lookup_string)
420 {
421 DICT_PCRE *dict_pcre = (DICT_PCRE *) dict;
422 DICT_PCRE_RULE *rule;
423 DICT_PCRE_IF_RULE *if_rule;
424 DICT_PCRE_MATCH_RULE *match_rule;
425 int lookup_len = strlen(lookup_string);
426 DICT_PCRE_EXPAND_CONTEXT ctxt;
427
428 dict->error = 0;
429
430 if (msg_verbose)
431 msg_info("dict_pcre_lookup: %s: %s", dict->name, lookup_string);
432
433 /*
434 * Optionally fold the key.
435 */
436 if (dict->flags & DICT_FLAG_FOLD_MUL) {
437 if (dict->fold_buf == 0)
438 dict->fold_buf = vstring_alloc(10);
439 vstring_strcpy(dict->fold_buf, lookup_string);
440 lookup_string = lowercase(vstring_str(dict->fold_buf));
441 }
442 for (rule = dict_pcre->head; rule; rule = rule->next) {
443
444 switch (rule->op) {
445
446 /*
447 * Search for a matching expression.
448 */
449 case DICT_PCRE_OP_MATCH:
450 match_rule = (DICT_PCRE_MATCH_RULE *) rule;
451 if (!DICT_PCRE_EXEC(ctxt, dict->name, rule->lineno,
452 match_rule->pattern,
453 DICT_PCRE_MATCH_HINT(match_rule),
454 match_rule->match, lookup_string, lookup_len))
455 continue;
456
457 /*
458 * Skip $number substitutions when the replacement text contains
459 * no $number strings, as learned during the compile time
460 * pre-scan. The pre-scan already replaced $$ by $.
461 */
462 if (match_rule->max_sub == 0)
463 return match_rule->replacement;
464
465 /*
466 * We've got a match. Perform substitution on replacement string.
467 */
468 if (dict_pcre->expansion_buf == 0)
469 dict_pcre->expansion_buf = vstring_alloc(10);
470 VSTRING_RESET(dict_pcre->expansion_buf);
471 ctxt.dict_pcre = dict_pcre;
472 #if HAS_PCRE == 1
473 ctxt.match_rule = match_rule;
474 #else
475 ctxt.ovector = pcre2_get_ovector_pointer(match_rule->match_data);
476 #endif
477 ctxt.lookup_string = lookup_string;
478
479 if (mac_parse(match_rule->replacement, dict_pcre_expand,
480 (void *) &ctxt) & MAC_PARSE_ERROR)
481 msg_fatal("pcre map %s, line %d: bad replacement syntax",
482 dict->name, rule->lineno);
483
484 VSTRING_TERMINATE(dict_pcre->expansion_buf);
485 return (vstring_str(dict_pcre->expansion_buf));
486
487 /*
488 * Conditional. XXX We provide space for matched substring info
489 * because PCRE uses part of it as workspace for backtracking.
490 * PCRE will allocate memory if it runs out of backtracking
491 * storage.
492 */
493 case DICT_PCRE_OP_IF:
494 if_rule = (DICT_PCRE_IF_RULE *) rule;
495 if (DICT_PCRE_EXEC(ctxt, dict->name, rule->lineno,
496 if_rule->pattern,
497 DICT_PCRE_MATCH_HINT(if_rule),
498 if_rule->match, lookup_string, lookup_len))
499 continue;
500 /* An IF without matching ENDIF has no "endif" rule. */
501 if ((rule = if_rule->endif_rule) == 0)
502 return (0);
503 /* FALLTHROUGH */
504
505 /*
506 * ENDIF after IF.
507 */
508 case DICT_PCRE_OP_ENDIF:
509 continue;
510
511 default:
512 msg_panic("dict_pcre_lookup: impossible operation %d", rule->op);
513 }
514 }
515 return (0);
516 }
517
518 /* dict_pcre_close - close pcre dictionary */
519
dict_pcre_close(DICT * dict)520 static void dict_pcre_close(DICT *dict)
521 {
522 DICT_PCRE *dict_pcre = (DICT_PCRE *) dict;
523 DICT_PCRE_RULE *rule;
524 DICT_PCRE_RULE *next;
525 DICT_PCRE_MATCH_RULE *match_rule;
526 DICT_PCRE_IF_RULE *if_rule;
527
528 for (rule = dict_pcre->head; rule; rule = next) {
529 next = rule->next;
530 switch (rule->op) {
531 case DICT_PCRE_OP_MATCH:
532 match_rule = (DICT_PCRE_MATCH_RULE *) rule;
533 if (match_rule->pattern)
534 DICT_PCRE_CODE_FREE(match_rule->pattern);
535 DICT_PCRE_MATCH_HINT_FREE(match_rule);
536 if (match_rule->replacement)
537 myfree((void *) match_rule->replacement);
538 break;
539 case DICT_PCRE_OP_IF:
540 if_rule = (DICT_PCRE_IF_RULE *) rule;
541 if (if_rule->pattern)
542 DICT_PCRE_CODE_FREE(if_rule->pattern);
543 DICT_PCRE_MATCH_HINT_FREE(if_rule);
544 break;
545 case DICT_PCRE_OP_ENDIF:
546 break;
547 default:
548 msg_panic("dict_pcre_close: unknown operation %d", rule->op);
549 }
550 myfree((void *) rule);
551 }
552 if (dict_pcre->expansion_buf)
553 vstring_free(dict_pcre->expansion_buf);
554 if (dict->fold_buf)
555 vstring_free(dict->fold_buf);
556 dict_free(dict);
557 }
558
559 /* dict_pcre_get_pattern - extract pattern from rule */
560
dict_pcre_get_pattern(const char * mapname,int lineno,char ** bufp,DICT_PCRE_REGEXP * pattern)561 static int dict_pcre_get_pattern(const char *mapname, int lineno, char **bufp,
562 DICT_PCRE_REGEXP *pattern)
563 {
564 char *p = *bufp;
565 char re_delimiter;
566
567 /*
568 * Process negation operators.
569 */
570 pattern->match = 1;
571 for (;;) {
572 if (*p == '!')
573 pattern->match = !pattern->match;
574 else if (!ISSPACE(*p))
575 break;
576 p++;
577 }
578 if (*p == 0) {
579 msg_warn("pcre map %s, line %d: no regexp: skipping this rule",
580 mapname, lineno);
581 return (0);
582 }
583 re_delimiter = *p++;
584 pattern->regexp = p;
585
586 /*
587 * Search for second delimiter, handling backslash escape.
588 */
589 while (*p) {
590 if (*p == '\\') {
591 ++p;
592 if (*p == 0)
593 break;
594 } else if (*p == re_delimiter)
595 break;
596 ++p;
597 }
598
599 if (!*p) {
600 msg_warn("pcre map %s, line %d: no closing regexp delimiter \"%c\": "
601 "ignoring this rule", mapname, lineno, re_delimiter);
602 return (0);
603 }
604 *p++ = 0; /* Null term the regexp */
605
606 /*
607 * Parse any regexp options.
608 */
609 pattern->options = DICT_PCRE_CASELESS | DICT_PCRE_DOTALL;
610 while (*p && !ISSPACE(*p)) {
611 switch (*p) {
612 case 'i':
613 pattern->options ^= DICT_PCRE_CASELESS;
614 break;
615 case 'm':
616 pattern->options ^= DICT_PCRE_MULTILINE;
617 break;
618 case 's':
619 pattern->options ^= DICT_PCRE_DOTALL;
620 break;
621 case 'x':
622 pattern->options ^= DICT_PCRE_EXTENDED;
623 break;
624 case 'A':
625 pattern->options ^= DICT_PCRE_ANCHORED;
626 break;
627 case 'E':
628 pattern->options ^= DICT_PCRE_DOLLAR_ENDONLY;
629 break;
630 case 'U':
631 pattern->options ^= DICT_PCRE_UNGREEDY;
632 break;
633 case 'X':
634 #if DICT_PCRE_EXTRA != 0
635 pattern->options ^= DICT_PCRE_EXTRA;
636 #else
637 msg_warn("pcre map %s, line %d: ignoring obsolete regexp "
638 "option \"%c\"", mapname, lineno, *p);
639 #endif
640 break;
641 default:
642 msg_warn("pcre map %s, line %d: unknown regexp option \"%c\": "
643 "skipping this rule", mapname, lineno, *p);
644 return (0);
645 }
646 ++p;
647 }
648 *bufp = p;
649 return (1);
650 }
651
652 /* dict_pcre_prescan - sanity check $number instances in replacement text */
653
dict_pcre_prescan(int type,VSTRING * buf,void * context)654 static int dict_pcre_prescan(int type, VSTRING *buf, void *context)
655 {
656 DICT_PCRE_PRESCAN_CONTEXT *ctxt = (DICT_PCRE_PRESCAN_CONTEXT *) context;
657 size_t n;
658
659 /*
660 * Keep a copy of literal text (with $$ already replaced by $) if and
661 * only if the replacement text contains no $number expression. This way
662 * we can avoid having to scan the replacement text at lookup time.
663 */
664 if (type == MAC_PARSE_VARNAME) {
665 if (ctxt->literal) {
666 myfree(ctxt->literal);
667 ctxt->literal = 0;
668 }
669 if (!alldig(vstring_str(buf))) {
670 msg_warn("pcre map %s, line %d: non-numeric replacement index \"%s\"",
671 ctxt->mapname, ctxt->lineno, vstring_str(buf));
672 return (MAC_PARSE_ERROR);
673 }
674 n = atoi(vstring_str(buf));
675 if (n < 1) {
676 msg_warn("pcre map %s, line %d: out of range replacement index \"%s\"",
677 ctxt->mapname, ctxt->lineno, vstring_str(buf));
678 return (MAC_PARSE_ERROR);
679 }
680 if (n > ctxt->max_sub)
681 ctxt->max_sub = n;
682 } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) {
683 if (ctxt->literal)
684 msg_panic("pcre map %s, line %d: multiple literals but no $number",
685 ctxt->mapname, ctxt->lineno);
686 ctxt->literal = mystrdup(vstring_str(buf));
687 }
688 return (MAC_PARSE_OK);
689 }
690
691 /* dict_pcre_compile - compile pattern */
692
dict_pcre_compile(const char * mapname,int lineno,DICT_PCRE_REGEXP * pattern,DICT_PCRE_ENGINE * engine)693 static int dict_pcre_compile(const char *mapname, int lineno,
694 DICT_PCRE_REGEXP *pattern,
695 DICT_PCRE_ENGINE *engine)
696 {
697 #if HAS_PCRE == 1
698 const char *error;
699 int errptr;
700
701 engine->pattern = pcre_compile(pattern->regexp, pattern->options,
702 &error, &errptr, NULL);
703 if (engine->pattern == 0) {
704 msg_warn("pcre map %s, line %d: error in regex at offset %d: %s",
705 mapname, lineno, errptr, error);
706 return (0);
707 }
708 engine->hints = pcre_study(engine->pattern, 0, &error);
709 if (error != 0) {
710 msg_warn("pcre map %s, line %d: error while studying regex: %s",
711 mapname, lineno, error);
712 DICT_PCRE_CODE_FREE(engine->pattern);
713 return (0);
714 }
715 #else
716 int error;
717 size_t errptr;
718
719 engine->pattern = pcre2_compile((unsigned char *) pattern->regexp,
720 PCRE2_ZERO_TERMINATED,
721 pattern->options, &error, &errptr, NULL);
722 if (engine->pattern == 0) {
723 VSTRING *buf = vstring_alloc(DICT_PCRE_GET_ERROR_BUF_LEN);
724
725 msg_warn("pcre map %s, line %d: error in regex at offset %lu: %s",
726 mapname, lineno, (unsigned long) errptr,
727 dict_pcre_get_error(buf, error));
728 vstring_free(buf);
729 return (0);
730 }
731 engine->match_data = pcre2_match_data_create_from_pattern(
732 engine->pattern, (void *) 0);
733 #endif
734 return (1);
735 }
736
737 /* dict_pcre_rule_alloc - fill in a generic rule structure */
738
dict_pcre_rule_alloc(int op,int lineno,size_t size)739 static DICT_PCRE_RULE *dict_pcre_rule_alloc(int op, int lineno, size_t size)
740 {
741 DICT_PCRE_RULE *rule;
742
743 rule = (DICT_PCRE_RULE *) mymalloc(size);
744 rule->op = op;
745 rule->lineno = lineno;
746 rule->next = 0;
747
748 return (rule);
749 }
750
751 /* dict_pcre_parse_rule - parse and compile one rule */
752
dict_pcre_parse_rule(DICT * dict,const char * mapname,int lineno,char * line,int nesting)753 static DICT_PCRE_RULE *dict_pcre_parse_rule(DICT *dict, const char *mapname,
754 int lineno, char *line,
755 int nesting)
756 {
757 char *p;
758
759 #ifdef DICT_PCRE_CAPTURECOUNT_T
760 DICT_PCRE_CAPTURECOUNT_T actual_sub;
761
762 #endif
763 #if 0
764 uint32_t namecount;
765
766 #endif
767
768 p = line;
769
770 /*
771 * An ordinary match rule takes one pattern and replacement text.
772 */
773 if (!ISALNUM(*p)) {
774 DICT_PCRE_REGEXP regexp;
775 DICT_PCRE_ENGINE engine;
776 DICT_PCRE_PRESCAN_CONTEXT prescan_context;
777 DICT_PCRE_MATCH_RULE *match_rule;
778
779 /*
780 * Get the pattern string and options.
781 */
782 if (dict_pcre_get_pattern(mapname, lineno, &p, ®exp) == 0)
783 return (0);
784
785 /*
786 * Get the replacement text.
787 */
788 while (*p && ISSPACE(*p))
789 ++p;
790 if (!*p)
791 msg_warn("pcre map %s, line %d: no replacement text: "
792 "using empty string", mapname, lineno);
793
794 /*
795 * Sanity check the $number instances in the replacement text.
796 */
797 prescan_context.mapname = mapname;
798 prescan_context.lineno = lineno;
799 prescan_context.max_sub = 0;
800 prescan_context.literal = 0;
801
802 /*
803 * The optimizer will eliminate code duplication and/or dead code.
804 */
805 #define CREATE_MATCHOP_ERROR_RETURN(rval) do { \
806 if (prescan_context.literal) \
807 myfree(prescan_context.literal); \
808 return (rval); \
809 } while (0)
810
811 if (dict->flags & DICT_FLAG_SRC_RHS_IS_FILE) {
812 VSTRING *base64_buf;
813 char *err;
814
815 if ((base64_buf = dict_file_to_b64(dict, p)) == 0) {
816 err = dict_file_get_error(dict);
817 msg_warn("pcre map %s, line %d: %s: skipping this rule",
818 mapname, lineno, err);
819 myfree(err);
820 CREATE_MATCHOP_ERROR_RETURN(0);
821 }
822 p = vstring_str(base64_buf);
823 }
824 if (mac_parse(p, dict_pcre_prescan, (void *) &prescan_context)
825 & MAC_PARSE_ERROR) {
826 msg_warn("pcre map %s, line %d: bad replacement syntax: "
827 "skipping this rule", mapname, lineno);
828 CREATE_MATCHOP_ERROR_RETURN(0);
829 }
830
831 /*
832 * Substring replacement not possible with negative regexps.
833 */
834 if (prescan_context.max_sub > 0 && regexp.match == 0) {
835 msg_warn("pcre map %s, line %d: $number found in negative match "
836 "replacement text: skipping this rule", mapname, lineno);
837 CREATE_MATCHOP_ERROR_RETURN(0);
838 }
839 if (prescan_context.max_sub > 0 && (dict->flags & DICT_FLAG_NO_REGSUB)) {
840 msg_warn("pcre map %s, line %d: "
841 "regular expression substitution is not allowed: "
842 "skipping this rule", mapname, lineno);
843 CREATE_MATCHOP_ERROR_RETURN(0);
844 }
845
846 /*
847 * Compile the pattern.
848 */
849 if (dict_pcre_compile(mapname, lineno, ®exp, &engine) == 0)
850 CREATE_MATCHOP_ERROR_RETURN(0);
851 #ifdef DICT_PCRE_CAPTURECOUNT_T
852 #if HAS_PCRE == 1
853 if (pcre_fullinfo(engine.pattern, engine.hints,
854 PCRE_INFO_CAPTURECOUNT,
855 (void *) &actual_sub) != 0)
856 msg_panic("pcre map %s, line %d: pcre_fullinfo failed",
857 mapname, lineno);
858 #else /* HAS_PCRE */
859 #if 0
860 if (pcre2_pattern_info(
861 engine.pattern, PCRE2_INFO_NAMECOUNT, &namecount) != 0)
862 msg_panic("pcre map %s, line %d: pcre2_pattern_info failed",
863 mapname, lineno);
864 if (namecount > 0) {
865 msg_warn("pcre map %s, line %d: named substrings are not supported",
866 mapname, lineno);
867 if (engine.pattern)
868 DICT_PCRE_CODE_FREE(engine.pattern);
869 DICT_PCRE_MATCH_HINT_FREE(&engine);
870 CREATE_MATCHOP_ERROR_RETURN(0);
871 }
872 #endif
873 if (pcre2_pattern_info(engine.pattern, PCRE2_INFO_CAPTURECOUNT,
874 (void *) &actual_sub) != 0)
875 msg_panic("pcre map %s, line %d: pcre2_pattern_info failed",
876 mapname, lineno);
877 #endif /* HAS_PCRE */
878 if (prescan_context.max_sub > actual_sub) {
879 msg_warn("pcre map %s, line %d: out of range replacement index \"%d\": "
880 "skipping this rule", mapname, lineno,
881 (int) prescan_context.max_sub);
882 if (engine.pattern)
883 DICT_PCRE_CODE_FREE(engine.pattern);
884 DICT_PCRE_MATCH_HINT_FREE(&engine);
885 CREATE_MATCHOP_ERROR_RETURN(0);
886 }
887 #endif /* DICT_PCRE_CAPTURECOUNT_T */
888
889 /*
890 * Save the result.
891 */
892 match_rule = (DICT_PCRE_MATCH_RULE *)
893 dict_pcre_rule_alloc(DICT_PCRE_OP_MATCH, lineno,
894 sizeof(DICT_PCRE_MATCH_RULE));
895 match_rule->match = regexp.match;
896 match_rule->max_sub = prescan_context.max_sub;
897 if (prescan_context.literal)
898 match_rule->replacement = prescan_context.literal;
899 else
900 match_rule->replacement = mystrdup(p);
901 match_rule->pattern = engine.pattern;
902 DICT_PCRE_MATCH_HINT(match_rule) = DICT_PCRE_MATCH_HINT(&engine);
903 return ((DICT_PCRE_RULE *) match_rule);
904 }
905
906 /*
907 * The IF operator takes one pattern but no replacement text.
908 */
909 else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) {
910 DICT_PCRE_REGEXP regexp;
911 DICT_PCRE_ENGINE engine;
912 DICT_PCRE_IF_RULE *if_rule;
913
914 p += 2;
915
916 /*
917 * Get the pattern.
918 */
919 while (*p && ISSPACE(*p))
920 p++;
921 if (!dict_pcre_get_pattern(mapname, lineno, &p, ®exp))
922 return (0);
923
924 /*
925 * Warn about out-of-place text.
926 */
927 while (*p && ISSPACE(*p))
928 ++p;
929 if (*p) {
930 msg_warn("pcre map %s, line %d: ignoring extra text after "
931 "IF statement: \"%s\"", mapname, lineno, p);
932 msg_warn("pcre map %s, line %d: do not prepend whitespace"
933 " to statements between IF and ENDIF", mapname, lineno);
934 }
935
936 /*
937 * Compile the pattern.
938 */
939 if (dict_pcre_compile(mapname, lineno, ®exp, &engine) == 0)
940 return (0);
941
942 /*
943 * Save the result.
944 */
945 if_rule = (DICT_PCRE_IF_RULE *)
946 dict_pcre_rule_alloc(DICT_PCRE_OP_IF, lineno,
947 sizeof(DICT_PCRE_IF_RULE));
948 if_rule->match = regexp.match;
949 if_rule->pattern = engine.pattern;
950 DICT_PCRE_MATCH_HINT(if_rule) = DICT_PCRE_MATCH_HINT(&engine);
951 if_rule->endif_rule = 0;
952 return ((DICT_PCRE_RULE *) if_rule);
953 }
954
955 /*
956 * The ENDIF operator takes no patterns and no replacement text.
957 */
958 else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) {
959 DICT_PCRE_RULE *rule;
960
961 p += 5;
962
963 /*
964 * Warn about out-of-place ENDIFs.
965 */
966 if (nesting == 0) {
967 msg_warn("pcre map %s, line %d: ignoring ENDIF without matching IF",
968 mapname, lineno);
969 return (0);
970 }
971
972 /*
973 * Warn about out-of-place text.
974 */
975 while (*p && ISSPACE(*p))
976 ++p;
977 if (*p)
978 msg_warn("pcre map %s, line %d: ignoring extra text after ENDIF",
979 mapname, lineno);
980
981 /*
982 * Save the result.
983 */
984 rule = dict_pcre_rule_alloc(DICT_PCRE_OP_ENDIF, lineno,
985 sizeof(DICT_PCRE_RULE));
986 return (rule);
987 }
988
989 /*
990 * Unrecognized input.
991 */
992 else {
993 msg_warn("pcre map %s, line %d: ignoring unrecognized request",
994 mapname, lineno);
995 return (0);
996 }
997 }
998
999 /* dict_pcre_open - load and compile a file containing regular expressions */
1000
dict_pcre_open(const char * mapname,int open_flags,int dict_flags)1001 DICT *dict_pcre_open(const char *mapname, int open_flags, int dict_flags)
1002 {
1003 const char myname[] = "dict_pcre_open";
1004 DICT_PCRE *dict_pcre;
1005 VSTREAM *map_fp = 0;
1006 struct stat st;
1007 VSTRING *why = 0;
1008 VSTRING *line_buffer = 0;
1009 DICT_PCRE_RULE *last_rule = 0;
1010 DICT_PCRE_RULE *rule;
1011 int last_line = 0;
1012 int lineno;
1013 int nesting = 0;
1014 char *p;
1015 DICT_PCRE_RULE **rule_stack = 0;
1016 MVECT mvect;
1017
1018 /*
1019 * Let the optimizer worry about eliminating redundant code.
1020 */
1021 #define DICT_PCRE_OPEN_RETURN(d) do { \
1022 DICT *__d = (d); \
1023 if (map_fp != 0) \
1024 vstream_fclose(map_fp); \
1025 if (line_buffer != 0) \
1026 vstring_free(line_buffer); \
1027 if (why != 0) \
1028 vstring_free(why); \
1029 return (__d); \
1030 } while (0)
1031
1032 /*
1033 * Sanity checks.
1034 */
1035 if (open_flags != O_RDONLY)
1036 DICT_PCRE_OPEN_RETURN(dict_surrogate(DICT_TYPE_PCRE, mapname,
1037 open_flags, dict_flags,
1038 "%s:%s map requires O_RDONLY access mode",
1039 DICT_TYPE_PCRE, mapname));
1040
1041 /*
1042 * Open the configuration file.
1043 */
1044 if ((map_fp = dict_stream_open(DICT_TYPE_PCRE, mapname, O_RDONLY,
1045 dict_flags, &st, &why)) == 0)
1046 DICT_PCRE_OPEN_RETURN(dict_surrogate(DICT_TYPE_PCRE, mapname,
1047 open_flags, dict_flags,
1048 "%s", vstring_str(why)));
1049 line_buffer = vstring_alloc(100);
1050
1051 dict_pcre = (DICT_PCRE *) dict_alloc(DICT_TYPE_PCRE, mapname,
1052 sizeof(*dict_pcre));
1053 dict_pcre->dict.lookup = dict_pcre_lookup;
1054 dict_pcre->dict.close = dict_pcre_close;
1055 dict_pcre->dict.flags = dict_flags | DICT_FLAG_PATTERN;
1056 if (dict_flags & DICT_FLAG_FOLD_MUL)
1057 dict_pcre->dict.fold_buf = vstring_alloc(10);
1058 dict_pcre->head = 0;
1059 dict_pcre->expansion_buf = 0;
1060
1061 #if HAS_PCRE == 1
1062 if (dict_pcre_init == 0) {
1063 pcre_malloc = (void *(*) (size_t)) mymalloc;
1064 pcre_free = (void (*) (void *)) myfree;
1065 dict_pcre_init = 1;
1066 }
1067 #endif
1068 dict_pcre->dict.owner.uid = st.st_uid;
1069 dict_pcre->dict.owner.status = (st.st_uid != 0);
1070
1071 /*
1072 * Parse the pcre table.
1073 */
1074 while (readllines(line_buffer, map_fp, &last_line, &lineno)) {
1075 p = vstring_str(line_buffer);
1076 trimblanks(p, 0)[0] = 0; /* Trim space at end */
1077 if (*p == 0)
1078 continue;
1079 rule = dict_pcre_parse_rule(&dict_pcre->dict, mapname, lineno,
1080 p, nesting);
1081 if (rule == 0)
1082 continue;
1083 if (rule->op == DICT_PCRE_OP_IF) {
1084 if (rule_stack == 0)
1085 rule_stack = (DICT_PCRE_RULE **) mvect_alloc(&mvect,
1086 sizeof(*rule_stack), nesting + 1,
1087 (MVECT_FN) 0, (MVECT_FN) 0);
1088 else
1089 rule_stack =
1090 (DICT_PCRE_RULE **) mvect_realloc(&mvect, nesting + 1);
1091 rule_stack[nesting] = rule;
1092 nesting++;
1093 } else if (rule->op == DICT_PCRE_OP_ENDIF) {
1094 DICT_PCRE_IF_RULE *if_rule;
1095
1096 if (nesting-- <= 0)
1097 /* Already handled in dict_pcre_parse_rule(). */
1098 msg_panic("%s: ENDIF without IF", myname);
1099 if (rule_stack[nesting]->op != DICT_PCRE_OP_IF)
1100 msg_panic("%s: unexpected rule stack element type %d",
1101 myname, rule_stack[nesting]->op);
1102 if_rule = (DICT_PCRE_IF_RULE *) rule_stack[nesting];
1103 if_rule->endif_rule = rule;
1104 }
1105 if (last_rule == 0)
1106 dict_pcre->head = rule;
1107 else
1108 last_rule->next = rule;
1109 last_rule = rule;
1110 }
1111
1112 while (nesting-- > 0)
1113 msg_warn("pcre map %s, line %d: IF has no matching ENDIF",
1114 mapname, rule_stack[nesting]->lineno);
1115
1116 if (rule_stack)
1117 (void) mvect_free(&mvect);
1118
1119 dict_file_purge_buffers(&dict_pcre->dict);
1120 DICT_PCRE_OPEN_RETURN(DICT_DEBUG (&dict_pcre->dict));
1121 }
1122
1123 #endif /* HAS_PCRE */
1124