xref: /netbsd-src/external/ibm-public/postfix/dist/src/util/dict_pcre.c (revision 7d62b00eb9ad855ffcd7da46b41e23feb5476fac)
1 /*	$NetBSD: dict_pcre.c,v 1.4 2022/10/08 16:12:50 christos Exp $	*/
2 
3 /*++
4 /* NAME
5 /*	dict_pcre 3
6 /* SUMMARY
7 /*	dictionary manager interface to PCRE regular expression library
8 /* SYNOPSIS
9 /*	#include <dict_pcre.h>
10 /*
11 /*	DICT	*dict_pcre_open(name, dummy, dict_flags)
12 /*	const char *name;
13 /*	int	dummy;
14 /*	int	dict_flags;
15 /* DESCRIPTION
16 /*	dict_pcre_open() opens the named file and compiles the contained
17 /*	regular expressions. The result object can be used to match strings
18 /*	against the table.
19 /* SEE ALSO
20 /*	dict(3) generic dictionary manager
21 /* AUTHOR(S)
22 /*	Andrew McNamara
23 /*	andrewm@connect.com.au
24 /*	connect.com.au Pty. Ltd.
25 /*	Level 3, 213 Miller St
26 /*	North Sydney, NSW, Australia
27 /*
28 /*	Wietse Venema
29 /*	IBM T.J. Watson Research
30 /*	P.O. Box 704
31 /*	Yorktown Heights, NY 10598, USA
32 /*
33 /*	Wietse Venema
34 /*	Google, Inc.
35 /*	111 8th Avenue
36 /*	New York, NY 10011, USA
37 /*--*/
38 
39 #include "sys_defs.h"
40 
41 #ifdef HAS_PCRE
42 
43 /* System library. */
44 
45 #include <sys/stat.h>
46 #include <stdio.h>			/* sprintf() prototype */
47 #include <stdlib.h>
48 #include <unistd.h>
49 #include <string.h>
50 #include <ctype.h>
51 
52 #ifdef STRCASECMP_IN_STRINGS_H
53 #include <strings.h>
54 #endif
55 
56 #if HAS_PCRE == 1
57 #include <pcre.h>
58 #elif HAS_PCRE == 2
59 #define PCRE2_CODE_UNIT_WIDTH	8
60 #include <pcre2.h>
61 #else
62 #error "define HAS_PCRE=2 or HAS_PCRE=1"
63 #endif
64 
65 /* Utility library. */
66 
67 #include "mymalloc.h"
68 #include "msg.h"
69 #include "safe.h"
70 #include "vstream.h"
71 #include "vstring.h"
72 #include "stringops.h"
73 #include "readlline.h"
74 #include "dict.h"
75 #include "dict_pcre.h"
76 #include "mac_parse.h"
77 #include "warn_stat.h"
78 #include "mvect.h"
79 
80  /*
81   * Backwards compatibility.
82   */
83 #if HAS_PCRE == 1
84  /* PCRE Legacy JIT supprt. */
85 #ifdef PCRE_STUDY_JIT_COMPILE
86 #define DICT_PCRE_FREE_STUDY(x)	pcre_free_study(x)
87 #else
88 #define DICT_PCRE_FREE_STUDY(x)	pcre_free((char *) (x))
89 #endif
90 
91  /* PCRE Compiled pattern. */
92 #define DICT_PCRE_CODE		pcre
93 #define DICT_PCRE_CODE_FREE(x)	myfree((void *) (x))
94 
95  /* Old-style hints versus new-style match_data. */
96 #define DICT_PCRE_MATCH_HINT_TYPE pcre_extra *
97 #define DICT_PCRE_MATCH_HINT_NAME hints
98 #define DICT_PCRE_MATCH_HINT(x) ((x)->DICT_PCRE_MATCH_HINT_NAME)
99 #define DICT_PCRE_MATCH_HINT_FREE(x) do { \
100 	if (DICT_PCRE_MATCH_HINT(x)) \
101 	    DICT_PCRE_FREE_STUDY(DICT_PCRE_MATCH_HINT(x)); \
102     } while (0)
103 
104  /* PCRE Pattern options. */
105 #define DICT_PCRE_CASELESS	PCRE_CASELESS
106 #define DICT_PCRE_MULTILINE	PCRE_MULTILINE
107 #define DICT_PCRE_DOTALL	PCRE_DOTALL
108 #define DICT_PCRE_EXTENDED	PCRE_EXTENDED
109 #define DICT_PCRE_ANCHORED	PCRE_ANCHORED
110 #define DICT_PCRE_DOLLAR_ENDONLY PCRE_DOLLAR_ENDONLY
111 #define DICT_PCRE_UNGREEDY	PCRE_UNGREEDY
112 #define DICT_PCRE_EXTRA		PCRE_EXTRA
113 
114  /* PCRE Number of captures in pattern. */
115 #ifdef PCRE_INFO_CAPTURECOUNT
116 #define DICT_PCRE_CAPTURECOUNT_T int
117 #endif
118 
119 #else					/* HAS_PCRE */
120 
121  /* PCRE2 Compiled pattern. */
122 #define DICT_PCRE_CODE		pcre2_code
123 #define DICT_PCRE_CODE_FREE(x)	pcre2_code_free(x)
124 
125  /* PCRE2 Old-style hints versus new-style match_data. */
126 #define DICT_PCRE_MATCH_HINT_TYPE pcre2_match_data *
127 #define DICT_PCRE_MATCH_HINT_NAME match_data
128 #define DICT_PCRE_MATCH_HINT(x)	((x)->DICT_PCRE_MATCH_HINT_NAME)
129 #define DICT_PCRE_MATCH_HINT_FREE(x) \
130 	pcre2_match_data_free(DICT_PCRE_MATCH_HINT(x))
131 
132  /* PCRE2 Pattern options. */
133 #define DICT_PCRE_CASELESS	PCRE2_CASELESS
134 #define DICT_PCRE_MULTILINE	PCRE2_MULTILINE
135 #define DICT_PCRE_DOTALL	PCRE2_DOTALL
136 #define DICT_PCRE_EXTENDED	PCRE2_EXTENDED
137 #define DICT_PCRE_ANCHORED	PCRE2_ANCHORED
138 #define DICT_PCRE_DOLLAR_ENDONLY PCRE2_DOLLAR_ENDONLY
139 #define DICT_PCRE_UNGREEDY	PCRE2_UNGREEDY
140 #define DICT_PCRE_EXTRA		0
141 
142  /* PCRE2 Number of captures in pattern. */
143 #define	DICT_PCRE_CAPTURECOUNT_T uint32_t
144 
145 #endif					/* HAS_PCRE */
146 
147  /*
148   * Support for IF/ENDIF based on an idea by Bert Driehuis.
149   */
150 #define DICT_PCRE_OP_MATCH    1		/* Match this regexp */
151 #define DICT_PCRE_OP_IF       2		/* Increase if/endif nesting on match */
152 #define DICT_PCRE_OP_ENDIF    3		/* Decrease if/endif nesting on match */
153 
154  /*
155   * Max strings captured by regexp - essentially the max number of (..)
156   */
157 #if HAS_PCRE == 1
158 #define PCRE_MAX_CAPTURE	99
159 #endif
160 
161  /*
162   * Regular expression before and after compilation.
163   */
164 typedef struct {
165     char   *regexp;			/* regular expression */
166     int     options;			/* options */
167     int     match;			/* positive or negative match */
168 } DICT_PCRE_REGEXP;
169 
170 typedef struct {
171     DICT_PCRE_CODE *pattern;		/* the compiled pattern */
172     DICT_PCRE_MATCH_HINT_TYPE DICT_PCRE_MATCH_HINT_NAME;
173 } DICT_PCRE_ENGINE;
174 
175  /*
176   * Compiled generic rule, and subclasses that derive from it.
177   */
178 typedef struct DICT_PCRE_RULE {
179     int     op;				/* DICT_PCRE_OP_MATCH/IF/ENDIF */
180     int     lineno;			/* source file line number */
181     struct DICT_PCRE_RULE *next;	/* next rule in dict */
182 } DICT_PCRE_RULE;
183 
184 typedef struct {
185     DICT_PCRE_RULE rule;		/* generic part */
186     DICT_PCRE_CODE *pattern;		/* compiled pattern */
187     DICT_PCRE_MATCH_HINT_TYPE DICT_PCRE_MATCH_HINT_NAME;
188     char   *replacement;		/* replacement string */
189     int     match;			/* positive or negative match */
190     size_t  max_sub;			/* largest $number in replacement */
191 } DICT_PCRE_MATCH_RULE;
192 
193 typedef struct {
194     DICT_PCRE_RULE rule;		/* generic members */
195     DICT_PCRE_CODE *pattern;		/* compiled pattern */
196     DICT_PCRE_MATCH_HINT_TYPE DICT_PCRE_MATCH_HINT_NAME;
197     int     match;			/* positive or negative match */
198     struct DICT_PCRE_RULE *endif_rule;	/* matching endif rule */
199 } DICT_PCRE_IF_RULE;
200 
201  /*
202   * PCRE map.
203   */
204 typedef struct {
205     DICT    dict;			/* generic members */
206     DICT_PCRE_RULE *head;
207     VSTRING *expansion_buf;		/* lookup result */
208 } DICT_PCRE;
209 
210 #if HAS_PCRE == 1
211 static int dict_pcre_init = 0;		/* flag need to init pcre library */
212 
213 #endif
214 
215 /*
216  * Context for $number expansion callback.
217  */
218 typedef struct {
219     DICT_PCRE *dict_pcre;		/* the dictionary handle */
220 #if HAS_PCRE == 1
221     DICT_PCRE_MATCH_RULE *match_rule;	/* the rule we matched */
222 #endif
223     const char *lookup_string;		/* string against which we match */
224 #if HAS_PCRE == 1
225     int     offsets[PCRE_MAX_CAPTURE * 3];	/* Cut substrings */
226 #else					/* HAS_PCRE */
227     PCRE2_SIZE *ovector;		/* matched string offsets */
228 #endif					/* HAS_PCRE */
229     int     matches;			/* Count of cuts */
230 } DICT_PCRE_EXPAND_CONTEXT;
231 
232  /*
233   * Context for $number pre-scan callback.
234   */
235 typedef struct {
236     const char *mapname;		/* name of regexp map */
237     int     lineno;			/* where in file */
238     size_t  max_sub;			/* Largest $n seen */
239     char   *literal;			/* constant result, $$ -> $ */
240 } DICT_PCRE_PRESCAN_CONTEXT;
241 
242  /*
243   * Compatibility.
244   */
245 #ifndef MAC_PARSE_OK
246 #define MAC_PARSE_OK 0
247 #endif
248 
249  /*
250   * Macros to make dense code more accessible.
251   */
252 #define NULL_STARTOFFSET	(0)
253 #define NULL_EXEC_OPTIONS 	(0)
254 
255 /* dict_pcre_expand - replace $number with matched text */
256 
257 static int dict_pcre_expand(int type, VSTRING *buf, void *ptr)
258 {
259     DICT_PCRE_EXPAND_CONTEXT *ctxt = (DICT_PCRE_EXPAND_CONTEXT *) ptr;
260     DICT_PCRE *dict_pcre = ctxt->dict_pcre;
261     int     n;
262 
263 #if HAS_PCRE == 1
264     DICT_PCRE_MATCH_RULE *match_rule = ctxt->match_rule;
265     const char *pp;
266     int     ret;
267 
268 #else
269     PCRE2_SPTR start;
270     PCRE2_SIZE length;
271 
272 #endif
273 
274     /*
275      * Replace $0-${99} with strings cut from matched text.
276      */
277     if (type == MAC_PARSE_VARNAME) {
278 	n = atoi(vstring_str(buf));
279 #if HAS_PCRE == 1
280 	ret = pcre_get_substring(ctxt->lookup_string, ctxt->offsets,
281 				 ctxt->matches, n, &pp);
282 	if (ret < 0) {
283 	    if (ret == PCRE_ERROR_NOSUBSTRING)
284 		return (MAC_PARSE_UNDEF);
285 	    else
286 		msg_fatal("pcre map %s, line %d: pcre_get_substring error: %d",
287 			dict_pcre->dict.name, match_rule->rule.lineno, ret);
288 	}
289 	if (*pp == 0) {
290 	    myfree((void *) pp);
291 	    return (MAC_PARSE_UNDEF);
292 	}
293 	vstring_strcat(dict_pcre->expansion_buf, pp);
294 	myfree((void *) pp);
295 	return (MAC_PARSE_OK);
296 #else
297 	start = (unsigned char *) ctxt->lookup_string + ctxt->ovector[2 * n];
298 	length = ctxt->ovector[2 * n + 1] - ctxt->ovector[2 * n];
299 	if (length == 0)
300 	    return (MAC_PARSE_UNDEF);
301 	vstring_strncat(dict_pcre->expansion_buf, (char *) start, length);
302 	return (MAC_PARSE_OK);
303 #endif
304     }
305 
306     /*
307      * Straight text - duplicate with no substitution.
308      */
309     else {
310 	vstring_strcat(dict_pcre->expansion_buf, vstring_str(buf));
311 	return (MAC_PARSE_OK);
312     }
313 }
314 
315 #if HAS_PCRE == 2
316 
317 #define DICT_PCRE_GET_ERROR_BUF_LEN	256
318 
319 /* dict_pcre_get_error - convert PCRE2 error number or text */
320 
321 static char *dict_pcre_get_error(VSTRING *buf, int errval)
322 {
323     ssize_t len;
324 
325     VSTRING_SPACE(buf, DICT_PCRE_GET_ERROR_BUF_LEN);
326     if ((len = pcre2_get_error_message(errval,
327 				       (unsigned char *) vstring_str(buf),
328 				       DICT_PCRE_GET_ERROR_BUF_LEN)) > 0) {
329 	vstring_set_payload_size(buf, len);
330     } else
331 	vstring_sprintf(buf, "unexpected pcre2 error code %d", errval);
332     return (vstring_str(buf));
333 }
334 
335 #endif					/* HAS_PCRE == 2 */
336 
337 /* dict_pcre_exec_error - report matching error */
338 
339 static void dict_pcre_exec_error(const char *mapname, int lineno, int errval)
340 {
341 #if HAS_PCRE == 1
342     switch (errval) {
343 	case 0:
344 	msg_warn("pcre map %s, line %d: too many (...)",
345 		 mapname, lineno);
346 	return;
347     case PCRE_ERROR_NULL:
348     case PCRE_ERROR_BADOPTION:
349 	msg_warn("pcre map %s, line %d: bad args to re_exec",
350 		 mapname, lineno);
351 	return;
352     case PCRE_ERROR_BADMAGIC:
353     case PCRE_ERROR_UNKNOWN_NODE:
354 	msg_warn("pcre map %s, line %d: corrupt compiled regexp",
355 		 mapname, lineno);
356 	return;
357 #ifdef PCRE_ERROR_NOMEMORY
358     case PCRE_ERROR_NOMEMORY:
359 	msg_warn("pcre map %s, line %d: out of memory",
360 		 mapname, lineno);
361 	return;
362 #endif
363 #ifdef PCRE_ERROR_MATCHLIMIT
364     case PCRE_ERROR_MATCHLIMIT:
365 	msg_warn("pcre map %s, line %d: backtracking limit exceeded",
366 		 mapname, lineno);
367 	return;
368 #endif
369 #ifdef PCRE_ERROR_BADUTF8
370     case PCRE_ERROR_BADUTF8:
371 	msg_warn("pcre map %s, line %d: bad UTF-8 sequence in search string",
372 		 mapname, lineno);
373 	return;
374 #endif
375 #ifdef PCRE_ERROR_BADUTF8_OFFSET
376     case PCRE_ERROR_BADUTF8_OFFSET:
377 	msg_warn("pcre map %s, line %d: bad UTF-8 start offset in search string",
378 		 mapname, lineno);
379 	return;
380 #endif
381     default:
382 	msg_warn("pcre map %s, line %d: unknown pcre_exec error: %d",
383 		 mapname, lineno, errval);
384 	return;
385     }
386 #else					/* HAS_PCRE */
387     VSTRING *buf = vstring_alloc(DICT_PCRE_GET_ERROR_BUF_LEN);
388 
389     msg_warn("pcre map %s, line %d: %s", mapname, lineno,
390 	     dict_pcre_get_error(buf, errval));
391     vstring_free(buf);
392 #endif						/* HAS_PCRE */
393 }
394 
395  /*
396   * Inlined to reduce function call overhead in the time-critical loop.
397   */
398 #if HAS_PCRE == 1
399 #define DICT_PCRE_EXEC(ctxt, map, line, pattern, hints, match, str, len) \
400     ((ctxt).matches = pcre_exec((pattern), (hints), (str), (len), \
401 				NULL_STARTOFFSET, NULL_EXEC_OPTIONS, \
402 				(ctxt).offsets, PCRE_MAX_CAPTURE * 3), \
403      (ctxt).matches > 0 ? (match) : \
404      (ctxt).matches == PCRE_ERROR_NOMATCH ? !(match) : \
405      (dict_pcre_exec_error((map), (line), (ctxt).matches), 0))
406 #else
407 #define DICT_PCRE_EXEC(ctxt, map, line, pattern, match_data, match, str, len) \
408     ((ctxt).matches = pcre2_match((pattern), (unsigned char *) (str), (len), \
409 				NULL_STARTOFFSET, NULL_EXEC_OPTIONS, \
410 				(match_data), (pcre2_match_context *) 0), \
411      (ctxt).matches > 0 ? (match) : \
412      (ctxt).matches == PCRE2_ERROR_NOMATCH ? !(match) : \
413      (dict_pcre_exec_error((map), (line), (ctxt).matches), 0))
414 #endif
415 
416 /* dict_pcre_lookup - match string and perform optional substitution */
417 
418 static const char *dict_pcre_lookup(DICT *dict, const char *lookup_string)
419 {
420     DICT_PCRE *dict_pcre = (DICT_PCRE *) dict;
421     DICT_PCRE_RULE *rule;
422     DICT_PCRE_IF_RULE *if_rule;
423     DICT_PCRE_MATCH_RULE *match_rule;
424     int     lookup_len = strlen(lookup_string);
425     DICT_PCRE_EXPAND_CONTEXT ctxt;
426 
427     dict->error = 0;
428 
429     if (msg_verbose)
430 	msg_info("dict_pcre_lookup: %s: %s", dict->name, lookup_string);
431 
432     /*
433      * Optionally fold the key.
434      */
435     if (dict->flags & DICT_FLAG_FOLD_MUL) {
436 	if (dict->fold_buf == 0)
437 	    dict->fold_buf = vstring_alloc(10);
438 	vstring_strcpy(dict->fold_buf, lookup_string);
439 	lookup_string = lowercase(vstring_str(dict->fold_buf));
440     }
441     for (rule = dict_pcre->head; rule; rule = rule->next) {
442 
443 	switch (rule->op) {
444 
445 	    /*
446 	     * Search for a matching expression.
447 	     */
448 	case DICT_PCRE_OP_MATCH:
449 	    match_rule = (DICT_PCRE_MATCH_RULE *) rule;
450 	    if (!DICT_PCRE_EXEC(ctxt, dict->name, rule->lineno,
451 				match_rule->pattern,
452 				DICT_PCRE_MATCH_HINT(match_rule),
453 			      match_rule->match, lookup_string, lookup_len))
454 		continue;
455 
456 	    /*
457 	     * Skip $number substitutions when the replacement text contains
458 	     * no $number strings, as learned during the compile time
459 	     * pre-scan. The pre-scan already replaced $$ by $.
460 	     */
461 	    if (match_rule->max_sub == 0)
462 		return match_rule->replacement;
463 
464 	    /*
465 	     * We've got a match. Perform substitution on replacement string.
466 	     */
467 	    if (dict_pcre->expansion_buf == 0)
468 		dict_pcre->expansion_buf = vstring_alloc(10);
469 	    VSTRING_RESET(dict_pcre->expansion_buf);
470 	    ctxt.dict_pcre = dict_pcre;
471 #if HAS_PCRE == 1
472 	    ctxt.match_rule = match_rule;
473 #else
474 	    ctxt.ovector = pcre2_get_ovector_pointer(match_rule->match_data);
475 #endif
476 	    ctxt.lookup_string = lookup_string;
477 
478 	    if (mac_parse(match_rule->replacement, dict_pcre_expand,
479 			  (void *) &ctxt) & MAC_PARSE_ERROR)
480 		msg_fatal("pcre map %s, line %d: bad replacement syntax",
481 			  dict->name, rule->lineno);
482 
483 	    VSTRING_TERMINATE(dict_pcre->expansion_buf);
484 	    return (vstring_str(dict_pcre->expansion_buf));
485 
486 	    /*
487 	     * Conditional. XXX We provide space for matched substring info
488 	     * because PCRE uses part of it as workspace for backtracking.
489 	     * PCRE will allocate memory if it runs out of backtracking
490 	     * storage.
491 	     */
492 	case DICT_PCRE_OP_IF:
493 	    if_rule = (DICT_PCRE_IF_RULE *) rule;
494 	    if (DICT_PCRE_EXEC(ctxt, dict->name, rule->lineno,
495 			       if_rule->pattern,
496 			       DICT_PCRE_MATCH_HINT(if_rule),
497 			       if_rule->match, lookup_string, lookup_len))
498 		continue;
499 	    /* An IF without matching ENDIF has no "endif" rule. */
500 	    if ((rule = if_rule->endif_rule) == 0)
501 		return (0);
502 	    /* FALLTHROUGH */
503 
504 	    /*
505 	     * ENDIF after IF.
506 	     */
507 	case DICT_PCRE_OP_ENDIF:
508 	    continue;
509 
510 	default:
511 	    msg_panic("dict_pcre_lookup: impossible operation %d", rule->op);
512 	}
513     }
514     return (0);
515 }
516 
517 /* dict_pcre_close - close pcre dictionary */
518 
519 static void dict_pcre_close(DICT *dict)
520 {
521     DICT_PCRE *dict_pcre = (DICT_PCRE *) dict;
522     DICT_PCRE_RULE *rule;
523     DICT_PCRE_RULE *next;
524     DICT_PCRE_MATCH_RULE *match_rule;
525     DICT_PCRE_IF_RULE *if_rule;
526 
527     for (rule = dict_pcre->head; rule; rule = next) {
528 	next = rule->next;
529 	switch (rule->op) {
530 	case DICT_PCRE_OP_MATCH:
531 	    match_rule = (DICT_PCRE_MATCH_RULE *) rule;
532 	    if (match_rule->pattern)
533 		DICT_PCRE_CODE_FREE(match_rule->pattern);
534 	    DICT_PCRE_MATCH_HINT_FREE(match_rule);
535 	    if (match_rule->replacement)
536 		myfree((void *) match_rule->replacement);
537 	    break;
538 	case DICT_PCRE_OP_IF:
539 	    if_rule = (DICT_PCRE_IF_RULE *) rule;
540 	    if (if_rule->pattern)
541 		DICT_PCRE_CODE_FREE(if_rule->pattern);
542 	    DICT_PCRE_MATCH_HINT_FREE(if_rule);
543 	    break;
544 	case DICT_PCRE_OP_ENDIF:
545 	    break;
546 	default:
547 	    msg_panic("dict_pcre_close: unknown operation %d", rule->op);
548 	}
549 	myfree((void *) rule);
550     }
551     if (dict_pcre->expansion_buf)
552 	vstring_free(dict_pcre->expansion_buf);
553     if (dict->fold_buf)
554 	vstring_free(dict->fold_buf);
555     dict_free(dict);
556 }
557 
558 /* dict_pcre_get_pattern - extract pattern from rule */
559 
560 static int dict_pcre_get_pattern(const char *mapname, int lineno, char **bufp,
561 				         DICT_PCRE_REGEXP *pattern)
562 {
563     char   *p = *bufp;
564     char    re_delimiter;
565 
566     /*
567      * Process negation operators.
568      */
569     pattern->match = 1;
570     for (;;) {
571 	if (*p == '!')
572 	    pattern->match = !pattern->match;
573 	else if (!ISSPACE(*p))
574 	    break;
575 	p++;
576     }
577     if (*p == 0) {
578 	msg_warn("pcre map %s, line %d: no regexp: skipping this rule",
579 		 mapname, lineno);
580 	return (0);
581     }
582     re_delimiter = *p++;
583     pattern->regexp = p;
584 
585     /*
586      * Search for second delimiter, handling backslash escape.
587      */
588     while (*p) {
589 	if (*p == '\\') {
590 	    ++p;
591 	    if (*p == 0)
592 		break;
593 	} else if (*p == re_delimiter)
594 	    break;
595 	++p;
596     }
597 
598     if (!*p) {
599 	msg_warn("pcre map %s, line %d: no closing regexp delimiter \"%c\": "
600 		 "ignoring this rule", mapname, lineno, re_delimiter);
601 	return (0);
602     }
603     *p++ = 0;					/* Null term the regexp */
604 
605     /*
606      * Parse any regexp options.
607      */
608     pattern->options = DICT_PCRE_CASELESS | DICT_PCRE_DOTALL;
609     while (*p && !ISSPACE(*p)) {
610 	switch (*p) {
611 	case 'i':
612 	    pattern->options ^= DICT_PCRE_CASELESS;
613 	    break;
614 	case 'm':
615 	    pattern->options ^= DICT_PCRE_MULTILINE;
616 	    break;
617 	case 's':
618 	    pattern->options ^= DICT_PCRE_DOTALL;
619 	    break;
620 	case 'x':
621 	    pattern->options ^= DICT_PCRE_EXTENDED;
622 	    break;
623 	case 'A':
624 	    pattern->options ^= DICT_PCRE_ANCHORED;
625 	    break;
626 	case 'E':
627 	    pattern->options ^= DICT_PCRE_DOLLAR_ENDONLY;
628 	    break;
629 	case 'U':
630 	    pattern->options ^= DICT_PCRE_UNGREEDY;
631 	    break;
632 	case 'X':
633 #if DICT_PCRE_EXTRA != 0
634 	    pattern->options ^= DICT_PCRE_EXTRA;
635 #else
636 	    msg_warn("pcre map %s, line %d: ignoring obsolete regexp "
637 		     "option \"%c\"", mapname, lineno, *p);
638 #endif
639 	    break;
640 	default:
641 	    msg_warn("pcre map %s, line %d: unknown regexp option \"%c\": "
642 		     "skipping this rule", mapname, lineno, *p);
643 	    return (0);
644 	}
645 	++p;
646     }
647     *bufp = p;
648     return (1);
649 }
650 
651 /* dict_pcre_prescan - sanity check $number instances in replacement text */
652 
653 static int dict_pcre_prescan(int type, VSTRING *buf, void *context)
654 {
655     DICT_PCRE_PRESCAN_CONTEXT *ctxt = (DICT_PCRE_PRESCAN_CONTEXT *) context;
656     size_t  n;
657 
658     /*
659      * Keep a copy of literal text (with $$ already replaced by $) if and
660      * only if the replacement text contains no $number expression. This way
661      * we can avoid having to scan the replacement text at lookup time.
662      */
663     if (type == MAC_PARSE_VARNAME) {
664 	if (ctxt->literal) {
665 	    myfree(ctxt->literal);
666 	    ctxt->literal = 0;
667 	}
668 	if (!alldig(vstring_str(buf))) {
669 	    msg_warn("pcre map %s, line %d: non-numeric replacement index \"%s\"",
670 		     ctxt->mapname, ctxt->lineno, vstring_str(buf));
671 	    return (MAC_PARSE_ERROR);
672 	}
673 	n = atoi(vstring_str(buf));
674 	if (n < 1) {
675 	    msg_warn("pcre map %s, line %d: out of range replacement index \"%s\"",
676 		     ctxt->mapname, ctxt->lineno, vstring_str(buf));
677 	    return (MAC_PARSE_ERROR);
678 	}
679 	if (n > ctxt->max_sub)
680 	    ctxt->max_sub = n;
681     } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) {
682 	if (ctxt->literal)
683 	    msg_panic("pcre map %s, line %d: multiple literals but no $number",
684 		      ctxt->mapname, ctxt->lineno);
685 	ctxt->literal = mystrdup(vstring_str(buf));
686     }
687     return (MAC_PARSE_OK);
688 }
689 
690 /* dict_pcre_compile - compile pattern */
691 
692 static int dict_pcre_compile(const char *mapname, int lineno,
693 			             DICT_PCRE_REGEXP *pattern,
694 			             DICT_PCRE_ENGINE *engine)
695 {
696 #if HAS_PCRE == 1
697     const char *error;
698     int     errptr;
699 
700     engine->pattern = pcre_compile(pattern->regexp, pattern->options,
701 				   &error, &errptr, NULL);
702     if (engine->pattern == 0) {
703 	msg_warn("pcre map %s, line %d: error in regex at offset %d: %s",
704 		 mapname, lineno, errptr, error);
705 	return (0);
706     }
707     engine->hints = pcre_study(engine->pattern, 0, &error);
708     if (error != 0) {
709 	msg_warn("pcre map %s, line %d: error while studying regex: %s",
710 		 mapname, lineno, error);
711 	DICT_PCRE_CODE_FREE(engine->pattern);
712 	return (0);
713     }
714 #else
715     int     error;
716     size_t  errptr;
717 
718     engine->pattern = pcre2_compile((unsigned char *) pattern->regexp,
719 				    PCRE2_ZERO_TERMINATED,
720 				    pattern->options, &error, &errptr, NULL);
721     if (engine->pattern == 0) {
722 	VSTRING *buf = vstring_alloc(DICT_PCRE_GET_ERROR_BUF_LEN);
723 
724 	msg_warn("pcre map %s, line %d: error in regex at offset %lu: %s",
725 		 mapname, lineno, (unsigned long) errptr,
726 		 dict_pcre_get_error(buf, error));
727 	vstring_free(buf);
728 	return (0);
729     }
730     engine->match_data = pcre2_match_data_create_from_pattern(
731 					       engine->pattern, (void *) 0);
732 #endif
733     return (1);
734 }
735 
736 /* dict_pcre_rule_alloc - fill in a generic rule structure */
737 
738 static DICT_PCRE_RULE *dict_pcre_rule_alloc(int op, int lineno, size_t size)
739 {
740     DICT_PCRE_RULE *rule;
741 
742     rule = (DICT_PCRE_RULE *) mymalloc(size);
743     rule->op = op;
744     rule->lineno = lineno;
745     rule->next = 0;
746 
747     return (rule);
748 }
749 
750 /* dict_pcre_parse_rule - parse and compile one rule */
751 
752 static DICT_PCRE_RULE *dict_pcre_parse_rule(DICT *dict, const char *mapname,
753 					            int lineno, char *line,
754 					            int nesting)
755 {
756     char   *p;
757 
758 #ifdef DICT_PCRE_CAPTURECOUNT_T
759     DICT_PCRE_CAPTURECOUNT_T actual_sub;
760 
761 #endif
762 #if 0
763     uint32_t namecount;
764 
765 #endif
766 
767     p = line;
768 
769     /*
770      * An ordinary match rule takes one pattern and replacement text.
771      */
772     if (!ISALNUM(*p)) {
773 	DICT_PCRE_REGEXP regexp;
774 	DICT_PCRE_ENGINE engine;
775 	DICT_PCRE_PRESCAN_CONTEXT prescan_context;
776 	DICT_PCRE_MATCH_RULE *match_rule;
777 
778 	/*
779 	 * Get the pattern string and options.
780 	 */
781 	if (dict_pcre_get_pattern(mapname, lineno, &p, &regexp) == 0)
782 	    return (0);
783 
784 	/*
785 	 * Get the replacement text.
786 	 */
787 	while (*p && ISSPACE(*p))
788 	    ++p;
789 	if (!*p)
790 	    msg_warn("pcre map %s, line %d: no replacement text: "
791 		     "using empty string", mapname, lineno);
792 
793 	/*
794 	 * Sanity check the $number instances in the replacement text.
795 	 */
796 	prescan_context.mapname = mapname;
797 	prescan_context.lineno = lineno;
798 	prescan_context.max_sub = 0;
799 	prescan_context.literal = 0;
800 
801 	/*
802 	 * The optimizer will eliminate code duplication and/or dead code.
803 	 */
804 #define CREATE_MATCHOP_ERROR_RETURN(rval) do { \
805 	if (prescan_context.literal) \
806 	    myfree(prescan_context.literal); \
807 	return (rval); \
808     } while (0)
809 
810 	if (dict->flags & DICT_FLAG_SRC_RHS_IS_FILE) {
811 	    VSTRING *base64_buf;
812 	    char   *err;
813 
814 	    if ((base64_buf = dict_file_to_b64(dict, p)) == 0) {
815 		err = dict_file_get_error(dict);
816 		msg_warn("pcre map %s, line %d: %s: skipping this rule",
817 			 mapname, lineno, err);
818 		myfree(err);
819 		CREATE_MATCHOP_ERROR_RETURN(0);
820 	    }
821 	    p = vstring_str(base64_buf);
822 	}
823 	if (mac_parse(p, dict_pcre_prescan, (void *) &prescan_context)
824 	    & MAC_PARSE_ERROR) {
825 	    msg_warn("pcre map %s, line %d: bad replacement syntax: "
826 		     "skipping this rule", mapname, lineno);
827 	    CREATE_MATCHOP_ERROR_RETURN(0);
828 	}
829 
830 	/*
831 	 * Substring replacement not possible with negative regexps.
832 	 */
833 	if (prescan_context.max_sub > 0 && regexp.match == 0) {
834 	    msg_warn("pcre map %s, line %d: $number found in negative match "
835 		   "replacement text: skipping this rule", mapname, lineno);
836 	    CREATE_MATCHOP_ERROR_RETURN(0);
837 	}
838 	if (prescan_context.max_sub > 0 && (dict->flags & DICT_FLAG_NO_REGSUB)) {
839 	    msg_warn("pcre map %s, line %d: "
840 		     "regular expression substitution is not allowed: "
841 		     "skipping this rule", mapname, lineno);
842 	    CREATE_MATCHOP_ERROR_RETURN(0);
843 	}
844 
845 	/*
846 	 * Compile the pattern.
847 	 */
848 	if (dict_pcre_compile(mapname, lineno, &regexp, &engine) == 0)
849 	    CREATE_MATCHOP_ERROR_RETURN(0);
850 #ifdef DICT_PCRE_CAPTURECOUNT_T
851 #if HAS_PCRE == 1
852 	if (pcre_fullinfo(engine.pattern, engine.hints,
853 			  PCRE_INFO_CAPTURECOUNT,
854 			  (void *) &actual_sub) != 0)
855 	    msg_panic("pcre map %s, line %d: pcre_fullinfo failed",
856 		      mapname, lineno);
857 #else						/* HAS_PCRE */
858 #if 0
859 	if (pcre2_pattern_info(
860 		     engine.pattern, PCRE2_INFO_NAMECOUNT, &namecount) != 0)
861 	    msg_panic("pcre map %s, line %d: pcre2_pattern_info failed",
862 		      mapname, lineno);
863 	if (namecount > 0) {
864 	    msg_warn("pcre map %s, line %d: named substrings are not supported",
865 		     mapname, lineno);
866 	    if (engine.pattern)
867 		DICT_PCRE_CODE_FREE(engine.pattern);
868 	    DICT_PCRE_MATCH_HINT_FREE(&engine);
869 	    CREATE_MATCHOP_ERROR_RETURN(0);
870 	}
871 #endif
872 	if (pcre2_pattern_info(engine.pattern, PCRE2_INFO_CAPTURECOUNT,
873 			       (void *) &actual_sub) != 0)
874 	    msg_panic("pcre map %s, line %d: pcre2_pattern_info failed",
875 		      mapname, lineno);
876 #endif						/* HAS_PCRE */
877 	if (prescan_context.max_sub > actual_sub) {
878 	    msg_warn("pcre map %s, line %d: out of range replacement index \"%d\": "
879 		     "skipping this rule", mapname, lineno,
880 		     (int) prescan_context.max_sub);
881 	    if (engine.pattern)
882 		DICT_PCRE_CODE_FREE(engine.pattern);
883 	    DICT_PCRE_MATCH_HINT_FREE(&engine);
884 	    CREATE_MATCHOP_ERROR_RETURN(0);
885 	}
886 #endif						/* DICT_PCRE_CAPTURECOUNT_T */
887 
888 	/*
889 	 * Save the result.
890 	 */
891 	match_rule = (DICT_PCRE_MATCH_RULE *)
892 	    dict_pcre_rule_alloc(DICT_PCRE_OP_MATCH, lineno,
893 				 sizeof(DICT_PCRE_MATCH_RULE));
894 	match_rule->match = regexp.match;
895 	match_rule->max_sub = prescan_context.max_sub;
896 	if (prescan_context.literal)
897 	    match_rule->replacement = prescan_context.literal;
898 	else
899 	    match_rule->replacement = mystrdup(p);
900 	match_rule->pattern = engine.pattern;
901 	DICT_PCRE_MATCH_HINT(match_rule) = DICT_PCRE_MATCH_HINT(&engine);
902 	return ((DICT_PCRE_RULE *) match_rule);
903     }
904 
905     /*
906      * The IF operator takes one pattern but no replacement text.
907      */
908     else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) {
909 	DICT_PCRE_REGEXP regexp;
910 	DICT_PCRE_ENGINE engine;
911 	DICT_PCRE_IF_RULE *if_rule;
912 
913 	p += 2;
914 
915 	/*
916 	 * Get the pattern.
917 	 */
918 	while (*p && ISSPACE(*p))
919 	    p++;
920 	if (!dict_pcre_get_pattern(mapname, lineno, &p, &regexp))
921 	    return (0);
922 
923 	/*
924 	 * Warn about out-of-place text.
925 	 */
926 	while (*p && ISSPACE(*p))
927 	    ++p;
928 	if (*p) {
929 	    msg_warn("pcre map %s, line %d: ignoring extra text after "
930 		     "IF statement: \"%s\"", mapname, lineno, p);
931 	    msg_warn("pcre map %s, line %d: do not prepend whitespace"
932 		     " to statements between IF and ENDIF", mapname, lineno);
933 	}
934 
935 	/*
936 	 * Compile the pattern.
937 	 */
938 	if (dict_pcre_compile(mapname, lineno, &regexp, &engine) == 0)
939 	    return (0);
940 
941 	/*
942 	 * Save the result.
943 	 */
944 	if_rule = (DICT_PCRE_IF_RULE *)
945 	    dict_pcre_rule_alloc(DICT_PCRE_OP_IF, lineno,
946 				 sizeof(DICT_PCRE_IF_RULE));
947 	if_rule->match = regexp.match;
948 	if_rule->pattern = engine.pattern;
949 	DICT_PCRE_MATCH_HINT(if_rule) = DICT_PCRE_MATCH_HINT(&engine);
950 	if_rule->endif_rule = 0;
951 	return ((DICT_PCRE_RULE *) if_rule);
952     }
953 
954     /*
955      * The ENDIF operator takes no patterns and no replacement text.
956      */
957     else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) {
958 	DICT_PCRE_RULE *rule;
959 
960 	p += 5;
961 
962 	/*
963 	 * Warn about out-of-place ENDIFs.
964 	 */
965 	if (nesting == 0) {
966 	    msg_warn("pcre map %s, line %d: ignoring ENDIF without matching IF",
967 		     mapname, lineno);
968 	    return (0);
969 	}
970 
971 	/*
972 	 * Warn about out-of-place text.
973 	 */
974 	while (*p && ISSPACE(*p))
975 	    ++p;
976 	if (*p)
977 	    msg_warn("pcre map %s, line %d: ignoring extra text after ENDIF",
978 		     mapname, lineno);
979 
980 	/*
981 	 * Save the result.
982 	 */
983 	rule = dict_pcre_rule_alloc(DICT_PCRE_OP_ENDIF, lineno,
984 				    sizeof(DICT_PCRE_RULE));
985 	return (rule);
986     }
987 
988     /*
989      * Unrecognized input.
990      */
991     else {
992 	msg_warn("pcre map %s, line %d: ignoring unrecognized request",
993 		 mapname, lineno);
994 	return (0);
995     }
996 }
997 
998 /* dict_pcre_open - load and compile a file containing regular expressions */
999 
1000 DICT   *dict_pcre_open(const char *mapname, int open_flags, int dict_flags)
1001 {
1002     const char myname[] = "dict_pcre_open";
1003     DICT_PCRE *dict_pcre;
1004     VSTREAM *map_fp = 0;
1005     struct stat st;
1006     VSTRING *why = 0;
1007     VSTRING *line_buffer = 0;
1008     DICT_PCRE_RULE *last_rule = 0;
1009     DICT_PCRE_RULE *rule;
1010     int     last_line = 0;
1011     int     lineno;
1012     int     nesting = 0;
1013     char   *p;
1014     DICT_PCRE_RULE **rule_stack = 0;
1015     MVECT   mvect;
1016 
1017     /*
1018      * Let the optimizer worry about eliminating redundant code.
1019      */
1020 #define DICT_PCRE_OPEN_RETURN(d) do { \
1021 	DICT *__d = (d); \
1022 	if (map_fp != 0) \
1023 	    vstream_fclose(map_fp); \
1024 	if (line_buffer != 0) \
1025 	    vstring_free(line_buffer); \
1026 	if (why != 0) \
1027 	   vstring_free(why); \
1028 	return (__d); \
1029     } while (0)
1030 
1031     /*
1032      * Sanity checks.
1033      */
1034     if (open_flags != O_RDONLY)
1035 	DICT_PCRE_OPEN_RETURN(dict_surrogate(DICT_TYPE_PCRE, mapname,
1036 					     open_flags, dict_flags,
1037 				  "%s:%s map requires O_RDONLY access mode",
1038 					     DICT_TYPE_PCRE, mapname));
1039 
1040     /*
1041      * Open the configuration file.
1042      */
1043     if ((map_fp = dict_stream_open(DICT_TYPE_PCRE, mapname, O_RDONLY,
1044 				   dict_flags, &st, &why)) == 0)
1045 	DICT_PCRE_OPEN_RETURN(dict_surrogate(DICT_TYPE_PCRE, mapname,
1046 					     open_flags, dict_flags,
1047 					     "%s", vstring_str(why)));
1048     line_buffer = vstring_alloc(100);
1049 
1050     dict_pcre = (DICT_PCRE *) dict_alloc(DICT_TYPE_PCRE, mapname,
1051 					 sizeof(*dict_pcre));
1052     dict_pcre->dict.lookup = dict_pcre_lookup;
1053     dict_pcre->dict.close = dict_pcre_close;
1054     dict_pcre->dict.flags = dict_flags | DICT_FLAG_PATTERN;
1055     if (dict_flags & DICT_FLAG_FOLD_MUL)
1056 	dict_pcre->dict.fold_buf = vstring_alloc(10);
1057     dict_pcre->head = 0;
1058     dict_pcre->expansion_buf = 0;
1059 
1060 #if HAS_PCRE == 1
1061     if (dict_pcre_init == 0) {
1062 	pcre_malloc = (void *(*) (size_t)) mymalloc;
1063 	pcre_free = (void (*) (void *)) myfree;
1064 	dict_pcre_init = 1;
1065     }
1066 #endif
1067     dict_pcre->dict.owner.uid = st.st_uid;
1068     dict_pcre->dict.owner.status = (st.st_uid != 0);
1069 
1070     /*
1071      * Parse the pcre table.
1072      */
1073     while (readllines(line_buffer, map_fp, &last_line, &lineno)) {
1074 	p = vstring_str(line_buffer);
1075 	trimblanks(p, 0)[0] = 0;		/* Trim space at end */
1076 	if (*p == 0)
1077 	    continue;
1078 	rule = dict_pcre_parse_rule(&dict_pcre->dict, mapname, lineno,
1079 				    p, nesting);
1080 	if (rule == 0)
1081 	    continue;
1082 	if (rule->op == DICT_PCRE_OP_IF) {
1083 	    if (rule_stack == 0)
1084 		rule_stack = (DICT_PCRE_RULE **) mvect_alloc(&mvect,
1085 					   sizeof(*rule_stack), nesting + 1,
1086 						(MVECT_FN) 0, (MVECT_FN) 0);
1087 	    else
1088 		rule_stack =
1089 		    (DICT_PCRE_RULE **) mvect_realloc(&mvect, nesting + 1);
1090 	    rule_stack[nesting] = rule;
1091 	    nesting++;
1092 	} else if (rule->op == DICT_PCRE_OP_ENDIF) {
1093 	    DICT_PCRE_IF_RULE *if_rule;
1094 
1095 	    if (nesting-- <= 0)
1096 		/* Already handled in dict_pcre_parse_rule(). */
1097 		msg_panic("%s: ENDIF without IF", myname);
1098 	    if (rule_stack[nesting]->op != DICT_PCRE_OP_IF)
1099 		msg_panic("%s: unexpected rule stack element type %d",
1100 			  myname, rule_stack[nesting]->op);
1101 	    if_rule = (DICT_PCRE_IF_RULE *) rule_stack[nesting];
1102 	    if_rule->endif_rule = rule;
1103 	}
1104 	if (last_rule == 0)
1105 	    dict_pcre->head = rule;
1106 	else
1107 	    last_rule->next = rule;
1108 	last_rule = rule;
1109     }
1110 
1111     while (nesting-- > 0)
1112 	msg_warn("pcre map %s, line %d: IF has no matching ENDIF",
1113 		 mapname, rule_stack[nesting]->lineno);
1114 
1115     if (rule_stack)
1116 	(void) mvect_free(&mvect);
1117 
1118     dict_file_purge_buffers(&dict_pcre->dict);
1119     DICT_PCRE_OPEN_RETURN(DICT_DEBUG (&dict_pcre->dict));
1120 }
1121 
1122 #endif					/* HAS_PCRE */
1123