xref: /netbsd-src/external/ibm-public/postfix/dist/src/util/dict_pcre.c (revision 6a493d6bc668897c91594964a732d38505b70cbb)
1 /*	$NetBSD: dict_pcre.c,v 1.1.1.2 2013/01/02 18:59:12 tron Exp $	*/
2 
3 /*++
4 /* NAME
5 /*	dict_pcre 3
6 /* SUMMARY
7 /*	dictionary manager interface to PCRE regular expression library
8 /* SYNOPSIS
9 /*	#include <dict_pcre.h>
10 /*
11 /*	DICT	*dict_pcre_open(name, dummy, dict_flags)
12 /*	const char *name;
13 /*	int	dummy;
14 /*	int	dict_flags;
15 /* DESCRIPTION
16 /*	dict_pcre_open() opens the named file and compiles the contained
17 /*	regular expressions. The result object can be used to match strings
18 /*	against the table.
19 /* SEE ALSO
20 /*	dict(3) generic dictionary manager
21 /* AUTHOR(S)
22 /*	Andrew McNamara
23 /*	andrewm@connect.com.au
24 /*	connect.com.au Pty. Ltd.
25 /*	Level 3, 213 Miller St
26 /*	North Sydney, NSW, Australia
27 /*
28 /*	Wietse Venema
29 /*	IBM T.J. Watson Research
30 /*	P.O. Box 704
31 /*	Yorktown Heights, NY 10598, USA
32 /*--*/
33 
34 #include "sys_defs.h"
35 
36 #ifdef HAS_PCRE
37 
38 /* System library. */
39 
40 #include <sys/stat.h>
41 #include <stdio.h>			/* sprintf() prototype */
42 #include <stdlib.h>
43 #include <unistd.h>
44 #include <string.h>
45 #include <ctype.h>
46 
47 #ifdef STRCASECMP_IN_STRINGS_H
48 #include <strings.h>
49 #endif
50 
51 /* Utility library. */
52 
53 #include "mymalloc.h"
54 #include "msg.h"
55 #include "safe.h"
56 #include "vstream.h"
57 #include "vstring.h"
58 #include "stringops.h"
59 #include "readlline.h"
60 #include "dict.h"
61 #include "dict_pcre.h"
62 #include "mac_parse.h"
63 #include "pcre.h"
64 #include "warn_stat.h"
65 
66  /*
67   * Support for IF/ENDIF based on an idea by Bert Driehuis.
68   */
69 #define DICT_PCRE_OP_MATCH    1		/* Match this regexp */
70 #define DICT_PCRE_OP_IF       2		/* Increase if/endif nesting on match */
71 #define DICT_PCRE_OP_ENDIF    3		/* Decrease if/endif nesting on match */
72 
73  /*
74   * Max strings captured by regexp - essentially the max number of (..)
75   */
76 #define PCRE_MAX_CAPTURE	99
77 
78  /*
79   * Regular expression before and after compilation.
80   */
81 typedef struct {
82     char   *regexp;			/* regular expression */
83     int     options;			/* options */
84     int     match;			/* positive or negative match */
85 } DICT_PCRE_REGEXP;
86 
87 typedef struct {
88     pcre   *pattern;			/* the compiled pattern */
89     pcre_extra *hints;			/* hints to speed pattern execution */
90 } DICT_PCRE_ENGINE;
91 
92  /*
93   * Compiled generic rule, and subclasses that derive from it.
94   */
95 typedef struct DICT_PCRE_RULE {
96     int     op;				/* DICT_PCRE_OP_MATCH/IF/ENDIF */
97     int     nesting;			/* level of IF/ENDIF nesting */
98     int     lineno;			/* source file line number */
99     struct DICT_PCRE_RULE *next;	/* next rule in dict */
100 } DICT_PCRE_RULE;
101 
102 typedef struct {
103     DICT_PCRE_RULE rule;		/* generic part */
104     pcre   *pattern;			/* compiled pattern */
105     pcre_extra *hints;			/* hints to speed pattern execution */
106     char   *replacement;		/* replacement string */
107     int     match;			/* positive or negative match */
108     size_t  max_sub;			/* largest $number in replacement */
109 } DICT_PCRE_MATCH_RULE;
110 
111 typedef struct {
112     DICT_PCRE_RULE rule;		/* generic members */
113     pcre   *pattern;			/* compiled pattern */
114     pcre_extra *hints;			/* hints to speed pattern execution */
115     int     match;			/* positive or negative match */
116 } DICT_PCRE_IF_RULE;
117 
118  /*
119   * PCRE map.
120   */
121 typedef struct {
122     DICT    dict;			/* generic members */
123     DICT_PCRE_RULE *head;
124     VSTRING *expansion_buf;		/* lookup result */
125 } DICT_PCRE;
126 
127 static int dict_pcre_init = 0;		/* flag need to init pcre library */
128 
129 /*
130  * Context for $number expansion callback.
131  */
132 typedef struct {
133     DICT_PCRE *dict_pcre;		/* the dictionary handle */
134     DICT_PCRE_MATCH_RULE *match_rule;	/* the rule we matched */
135     const char *lookup_string;		/* string against which we match */
136     int     offsets[PCRE_MAX_CAPTURE * 3];	/* Cut substrings */
137     int     matches;			/* Count of cuts */
138 } DICT_PCRE_EXPAND_CONTEXT;
139 
140  /*
141   * Context for $number pre-scan callback.
142   */
143 typedef struct {
144     const char *mapname;		/* name of regexp map */
145     int     lineno;			/* where in file */
146     size_t  max_sub;			/* Largest $n seen */
147     char   *literal;			/* constant result, $$ -> $ */
148 } DICT_PCRE_PRESCAN_CONTEXT;
149 
150  /*
151   * Compatibility.
152   */
153 #ifndef MAC_PARSE_OK
154 #define MAC_PARSE_OK 0
155 #endif
156 
157  /*
158   * Macros to make dense code more accessible.
159   */
160 #define NULL_STARTOFFSET	(0)
161 #define NULL_EXEC_OPTIONS 	(0)
162 #define NULL_OVECTOR		((int *) 0)
163 #define NULL_OVECTOR_LENGTH	(0)
164 
165 /* dict_pcre_expand - replace $number with matched text */
166 
167 static int dict_pcre_expand(int type, VSTRING *buf, char *ptr)
168 {
169     DICT_PCRE_EXPAND_CONTEXT *ctxt = (DICT_PCRE_EXPAND_CONTEXT *) ptr;
170     DICT_PCRE_MATCH_RULE *match_rule = ctxt->match_rule;
171     DICT_PCRE *dict_pcre = ctxt->dict_pcre;
172     const char *pp;
173     int     n;
174     int     ret;
175 
176     /*
177      * Replace $0-${99} with strings cut from matched text.
178      */
179     if (type == MAC_PARSE_VARNAME) {
180 	n = atoi(vstring_str(buf));
181 	ret = pcre_get_substring(ctxt->lookup_string, ctxt->offsets,
182 				 ctxt->matches, n, &pp);
183 	if (ret < 0) {
184 	    if (ret == PCRE_ERROR_NOSUBSTRING)
185 		return (MAC_PARSE_UNDEF);
186 	    else
187 		msg_fatal("pcre map %s, line %d: pcre_get_substring error: %d",
188 			dict_pcre->dict.name, match_rule->rule.lineno, ret);
189 	}
190 	if (*pp == 0) {
191 	    myfree((char *) pp);
192 	    return (MAC_PARSE_UNDEF);
193 	}
194 	vstring_strcat(dict_pcre->expansion_buf, pp);
195 	myfree((char *) pp);
196 	return (MAC_PARSE_OK);
197     }
198 
199     /*
200      * Straight text - duplicate with no substitution.
201      */
202     else {
203 	vstring_strcat(dict_pcre->expansion_buf, vstring_str(buf));
204 	return (MAC_PARSE_OK);
205     }
206 }
207 
208 /* dict_pcre_exec_error - report matching error */
209 
210 static void dict_pcre_exec_error(const char *mapname, int lineno, int errval)
211 {
212     switch (errval) {
213 	case 0:
214 	msg_warn("pcre map %s, line %d: too many (...)",
215 		 mapname, lineno);
216 	return;
217     case PCRE_ERROR_NULL:
218     case PCRE_ERROR_BADOPTION:
219 	msg_fatal("pcre map %s, line %d: bad args to re_exec",
220 		  mapname, lineno);
221     case PCRE_ERROR_BADMAGIC:
222     case PCRE_ERROR_UNKNOWN_NODE:
223 	msg_fatal("pcre map %s, line %d: corrupt compiled regexp",
224 		  mapname, lineno);
225 #ifdef PCRE_ERROR_NOMEMORY
226     case PCRE_ERROR_NOMEMORY:
227 	msg_fatal("pcre map %s, line %d: out of memory",
228 		  mapname, lineno);
229 #endif
230 #ifdef PCRE_ERROR_MATCHLIMIT
231     case PCRE_ERROR_MATCHLIMIT:
232 	msg_fatal("pcre map %s, line %d: matched text exceeds buffer limit",
233 		  mapname, lineno);
234 #endif
235 #ifdef PCRE_ERROR_BADUTF8
236     case PCRE_ERROR_BADUTF8:
237 	msg_fatal("pcre map %s, line %d: bad UTF-8 sequence in search string",
238 		  mapname, lineno);
239 #endif
240 #ifdef PCRE_ERROR_BADUTF8_OFFSET
241     case PCRE_ERROR_BADUTF8_OFFSET:
242 	msg_fatal("pcre map %s, line %d: bad UTF-8 start offset in search string",
243 		  mapname, lineno);
244 #endif
245     default:
246 	msg_fatal("pcre map %s, line %d: unknown re_exec error: %d",
247 		  mapname, lineno, errval);
248     }
249 }
250 
251 /* dict_pcre_lookup - match string and perform optional substitution */
252 
253 static const char *dict_pcre_lookup(DICT *dict, const char *lookup_string)
254 {
255     DICT_PCRE *dict_pcre = (DICT_PCRE *) dict;
256     DICT_PCRE_RULE *rule;
257     DICT_PCRE_IF_RULE *if_rule;
258     DICT_PCRE_MATCH_RULE *match_rule;
259     int     lookup_len = strlen(lookup_string);
260     DICT_PCRE_EXPAND_CONTEXT ctxt;
261     int     nesting = 0;
262 
263     dict->error = 0;
264 
265     if (msg_verbose)
266 	msg_info("dict_pcre_lookup: %s: %s", dict->name, lookup_string);
267 
268     /*
269      * Optionally fold the key.
270      */
271     if (dict->flags & DICT_FLAG_FOLD_MUL) {
272 	if (dict->fold_buf == 0)
273 	    dict->fold_buf = vstring_alloc(10);
274 	vstring_strcpy(dict->fold_buf, lookup_string);
275 	lookup_string = lowercase(vstring_str(dict->fold_buf));
276     }
277     for (rule = dict_pcre->head; rule; rule = rule->next) {
278 
279 	/*
280 	 * Skip rules inside failed IF/ENDIF.
281 	 */
282 	if (nesting < rule->nesting)
283 	    continue;
284 
285 	switch (rule->op) {
286 
287 	    /*
288 	     * Search for a matching expression.
289 	     */
290 	case DICT_PCRE_OP_MATCH:
291 	    match_rule = (DICT_PCRE_MATCH_RULE *) rule;
292 	    ctxt.matches = pcre_exec(match_rule->pattern, match_rule->hints,
293 				     lookup_string, lookup_len,
294 				     NULL_STARTOFFSET, NULL_EXEC_OPTIONS,
295 				     ctxt.offsets, PCRE_MAX_CAPTURE * 3);
296 
297 	    if (ctxt.matches > 0) {
298 		if (!match_rule->match)
299 		    continue;			/* Negative rule matched */
300 	    } else if (ctxt.matches == PCRE_ERROR_NOMATCH) {
301 		if (match_rule->match)
302 		    continue;			/* Positive rule did not
303 						 * match */
304 	    } else {
305 		dict_pcre_exec_error(dict->name, rule->lineno, ctxt.matches);
306 		continue;			/* pcre_exec failed */
307 	    }
308 
309 	    /*
310 	     * Skip $number substitutions when the replacement text contains
311 	     * no $number strings, as learned during the compile time
312 	     * pre-scan. The pre-scan already replaced $$ by $.
313 	     */
314 	    if (match_rule->max_sub == 0)
315 		return match_rule->replacement;
316 
317 	    /*
318 	     * We've got a match. Perform substitution on replacement string.
319 	     */
320 	    if (dict_pcre->expansion_buf == 0)
321 		dict_pcre->expansion_buf = vstring_alloc(10);
322 	    VSTRING_RESET(dict_pcre->expansion_buf);
323 	    ctxt.dict_pcre = dict_pcre;
324 	    ctxt.match_rule = match_rule;
325 	    ctxt.lookup_string = lookup_string;
326 
327 	    if (mac_parse(match_rule->replacement, dict_pcre_expand,
328 			  (char *) &ctxt) & MAC_PARSE_ERROR)
329 		msg_fatal("pcre map %s, line %d: bad replacement syntax",
330 			  dict->name, rule->lineno);
331 
332 	    VSTRING_TERMINATE(dict_pcre->expansion_buf);
333 	    return (vstring_str(dict_pcre->expansion_buf));
334 
335 	    /*
336 	     * Conditional. XXX We provide space for matched substring info
337 	     * because PCRE uses part of it as workspace for backtracking.
338 	     * PCRE will allocate memory if it runs out of backtracking
339 	     * storage.
340 	     */
341 	case DICT_PCRE_OP_IF:
342 	    if_rule = (DICT_PCRE_IF_RULE *) rule;
343 	    ctxt.matches = pcre_exec(if_rule->pattern, if_rule->hints,
344 				     lookup_string, lookup_len,
345 				     NULL_STARTOFFSET, NULL_EXEC_OPTIONS,
346 				     ctxt.offsets, PCRE_MAX_CAPTURE * 3);
347 
348 	    if (ctxt.matches > 0) {
349 		if (!if_rule->match)
350 		    continue;			/* Negative rule matched */
351 	    } else if (ctxt.matches == PCRE_ERROR_NOMATCH) {
352 		if (if_rule->match)
353 		    continue;			/* Positive rule did not
354 						 * match */
355 	    } else {
356 		dict_pcre_exec_error(dict->name, rule->lineno, ctxt.matches);
357 		continue;			/* pcre_exec failed */
358 	    }
359 	    nesting++;
360 	    continue;
361 
362 	    /*
363 	     * ENDIF after successful IF.
364 	     */
365 	case DICT_PCRE_OP_ENDIF:
366 	    nesting--;
367 	    continue;
368 
369 	default:
370 	    msg_panic("dict_pcre_lookup: impossible operation %d", rule->op);
371 	}
372     }
373     return (0);
374 }
375 
376 /* dict_pcre_close - close pcre dictionary */
377 
378 static void dict_pcre_close(DICT *dict)
379 {
380     DICT_PCRE *dict_pcre = (DICT_PCRE *) dict;
381     DICT_PCRE_RULE *rule;
382     DICT_PCRE_RULE *next;
383     DICT_PCRE_MATCH_RULE *match_rule;
384     DICT_PCRE_IF_RULE *if_rule;
385 
386     for (rule = dict_pcre->head; rule; rule = next) {
387 	next = rule->next;
388 	switch (rule->op) {
389 	case DICT_PCRE_OP_MATCH:
390 	    match_rule = (DICT_PCRE_MATCH_RULE *) rule;
391 	    if (match_rule->pattern)
392 		myfree((char *) match_rule->pattern);
393 	    if (match_rule->hints)
394 		myfree((char *) match_rule->hints);
395 	    if (match_rule->replacement)
396 		myfree((char *) match_rule->replacement);
397 	    break;
398 	case DICT_PCRE_OP_IF:
399 	    if_rule = (DICT_PCRE_IF_RULE *) rule;
400 	    if (if_rule->pattern)
401 		myfree((char *) if_rule->pattern);
402 	    if (if_rule->hints)
403 		myfree((char *) if_rule->hints);
404 	    break;
405 	case DICT_PCRE_OP_ENDIF:
406 	    break;
407 	default:
408 	    msg_panic("dict_pcre_close: unknown operation %d", rule->op);
409 	}
410 	myfree((char *) rule);
411     }
412     if (dict_pcre->expansion_buf)
413 	vstring_free(dict_pcre->expansion_buf);
414     if (dict->fold_buf)
415 	vstring_free(dict->fold_buf);
416     dict_free(dict);
417 }
418 
419 /* dict_pcre_get_pattern - extract pattern from rule */
420 
421 static int dict_pcre_get_pattern(const char *mapname, int lineno, char **bufp,
422 				         DICT_PCRE_REGEXP *pattern)
423 {
424     char   *p = *bufp;
425     char    re_delimiter;
426 
427     /*
428      * Process negation operators.
429      */
430     pattern->match = 1;
431     while (*p == '!') {
432 	pattern->match = !pattern->match;
433 	p++;
434     }
435 
436     /*
437      * Grr...aceful handling of whitespace after '!'.
438      */
439     while (*p && ISSPACE(*p))
440 	p++;
441     if (*p == 0) {
442 	msg_warn("pcre map %s, line %d: no regexp: skipping this rule",
443 		 mapname, lineno);
444 	return (0);
445     }
446     re_delimiter = *p++;
447     pattern->regexp = p;
448 
449     /*
450      * Search for second delimiter, handling backslash escape.
451      */
452     while (*p) {
453 	if (*p == '\\') {
454 	    ++p;
455 	    if (*p == 0)
456 		break;
457 	} else if (*p == re_delimiter)
458 	    break;
459 	++p;
460     }
461 
462     if (!*p) {
463 	msg_warn("pcre map %s, line %d: no closing regexp delimiter \"%c\": "
464 		 "ignoring this rule", mapname, lineno, re_delimiter);
465 	return (0);
466     }
467     *p++ = 0;					/* Null term the regexp */
468 
469     /*
470      * Parse any regexp options.
471      */
472     pattern->options = PCRE_CASELESS | PCRE_DOTALL;
473     while (*p && !ISSPACE(*p)) {
474 	switch (*p) {
475 	case 'i':
476 	    pattern->options ^= PCRE_CASELESS;
477 	    break;
478 	case 'm':
479 	    pattern->options ^= PCRE_MULTILINE;
480 	    break;
481 	case 's':
482 	    pattern->options ^= PCRE_DOTALL;
483 	    break;
484 	case 'x':
485 	    pattern->options ^= PCRE_EXTENDED;
486 	    break;
487 	case 'A':
488 	    pattern->options ^= PCRE_ANCHORED;
489 	    break;
490 	case 'E':
491 	    pattern->options ^= PCRE_DOLLAR_ENDONLY;
492 	    break;
493 	case 'U':
494 	    pattern->options ^= PCRE_UNGREEDY;
495 	    break;
496 	case 'X':
497 	    pattern->options ^= PCRE_EXTRA;
498 	    break;
499 	default:
500 	    msg_warn("pcre map %s, line %d: unknown regexp option \"%c\": "
501 		     "skipping this rule", mapname, lineno, *p);
502 	    return (0);
503 	}
504 	++p;
505     }
506     *bufp = p;
507     return (1);
508 }
509 
510 /* dict_pcre_prescan - sanity check $number instances in replacement text */
511 
512 static int dict_pcre_prescan(int type, VSTRING *buf, char *context)
513 {
514     DICT_PCRE_PRESCAN_CONTEXT *ctxt = (DICT_PCRE_PRESCAN_CONTEXT *) context;
515     size_t  n;
516 
517     /*
518      * Keep a copy of literal text (with $$ already replaced by $) if and
519      * only if the replacement text contains no $number expression. This way
520      * we can avoid having to scan the replacement text at lookup time.
521      */
522     if (type == MAC_PARSE_VARNAME) {
523 	if (ctxt->literal) {
524 	    myfree(ctxt->literal);
525 	    ctxt->literal = 0;
526 	}
527 	if (!alldig(vstring_str(buf))) {
528 	    msg_warn("pcre map %s, line %d: non-numeric replacement index \"%s\"",
529 		     ctxt->mapname, ctxt->lineno, vstring_str(buf));
530 	    return (MAC_PARSE_ERROR);
531 	}
532 	n = atoi(vstring_str(buf));
533 	if (n < 1) {
534 	    msg_warn("pcre map %s, line %d: out of range replacement index \"%s\"",
535 		     ctxt->mapname, ctxt->lineno, vstring_str(buf));
536 	    return (MAC_PARSE_ERROR);
537 	}
538 	if (n > ctxt->max_sub)
539 	    ctxt->max_sub = n;
540     } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) {
541 	if (ctxt->literal)
542 	    msg_panic("pcre map %s, line %d: multiple literals but no $number",
543 		      ctxt->mapname, ctxt->lineno);
544 	ctxt->literal = mystrdup(vstring_str(buf));
545     }
546     return (MAC_PARSE_OK);
547 }
548 
549 /* dict_pcre_compile - compile pattern */
550 
551 static int dict_pcre_compile(const char *mapname, int lineno,
552 			             DICT_PCRE_REGEXP *pattern,
553 			             DICT_PCRE_ENGINE *engine)
554 {
555     const char *error;
556     int     errptr;
557 
558     engine->pattern = pcre_compile(pattern->regexp, pattern->options,
559 				   &error, &errptr, NULL);
560     if (engine->pattern == 0) {
561 	msg_warn("pcre map %s, line %d: error in regex at offset %d: %s",
562 		 mapname, lineno, errptr, error);
563 	return (0);
564     }
565     engine->hints = pcre_study(engine->pattern, 0, &error);
566     if (error != 0) {
567 	msg_warn("pcre map %s, line %d: error while studying regex: %s",
568 		 mapname, lineno, error);
569 	myfree((char *) engine->pattern);
570 	return (0);
571     }
572     return (1);
573 }
574 
575 /* dict_pcre_rule_alloc - fill in a generic rule structure */
576 
577 static DICT_PCRE_RULE *dict_pcre_rule_alloc(int op, int nesting,
578 					            int lineno,
579 					            size_t size)
580 {
581     DICT_PCRE_RULE *rule;
582 
583     rule = (DICT_PCRE_RULE *) mymalloc(size);
584     rule->op = op;
585     rule->nesting = nesting;
586     rule->lineno = lineno;
587     rule->next = 0;
588 
589     return (rule);
590 }
591 
592 /* dict_pcre_parse_rule - parse and compile one rule */
593 
594 static DICT_PCRE_RULE *dict_pcre_parse_rule(const char *mapname, int lineno,
595 					            char *line, int nesting,
596 					            int dict_flags)
597 {
598     char   *p;
599     int     actual_sub;
600 
601     p = line;
602 
603     /*
604      * An ordinary match rule takes one pattern and replacement text.
605      */
606     if (!ISALNUM(*p)) {
607 	DICT_PCRE_REGEXP regexp;
608 	DICT_PCRE_ENGINE engine;
609 	DICT_PCRE_PRESCAN_CONTEXT prescan_context;
610 	DICT_PCRE_MATCH_RULE *match_rule;
611 
612 	/*
613 	 * Get the pattern string and options.
614 	 */
615 	if (dict_pcre_get_pattern(mapname, lineno, &p, &regexp) == 0)
616 	    return (0);
617 
618 	/*
619 	 * Get the replacement text.
620 	 */
621 	while (*p && ISSPACE(*p))
622 	    ++p;
623 	if (!*p)
624 	    msg_warn("%s, line %d: no replacement text: using empty string",
625 		     mapname, lineno);
626 
627 	/*
628 	 * Sanity check the $number instances in the replacement text.
629 	 */
630 	prescan_context.mapname = mapname;
631 	prescan_context.lineno = lineno;
632 	prescan_context.max_sub = 0;
633 	prescan_context.literal = 0;
634 
635 	/*
636 	 * The optimizer will eliminate code duplication and/or dead code.
637 	 */
638 #define CREATE_MATCHOP_ERROR_RETURN(rval) do { \
639 	if (prescan_context.literal) \
640 	    myfree(prescan_context.literal); \
641 	return (rval); \
642     } while (0)
643 
644 	if (mac_parse(p, dict_pcre_prescan, (char *) &prescan_context)
645 	    & MAC_PARSE_ERROR) {
646 	    msg_warn("pcre map %s, line %d: bad replacement syntax: "
647 		     "skipping this rule", mapname, lineno);
648 	    CREATE_MATCHOP_ERROR_RETURN(0);
649 	}
650 
651 	/*
652 	 * Substring replacement not possible with negative regexps.
653 	 */
654 	if (prescan_context.max_sub > 0 && regexp.match == 0) {
655 	    msg_warn("pcre map %s, line %d: $number found in negative match "
656 		   "replacement text: skipping this rule", mapname, lineno);
657 	    CREATE_MATCHOP_ERROR_RETURN(0);
658 	}
659 	if (prescan_context.max_sub > 0 && (dict_flags & DICT_FLAG_NO_REGSUB)) {
660 	    msg_warn("pcre map %s, line %d: "
661 		     "regular expression substitution is not allowed: "
662 		     "skipping this rule", mapname, lineno);
663 	    CREATE_MATCHOP_ERROR_RETURN(0);
664 	}
665 
666 	/*
667 	 * Compile the pattern.
668 	 */
669 	if (dict_pcre_compile(mapname, lineno, &regexp, &engine) == 0)
670 	    CREATE_MATCHOP_ERROR_RETURN(0);
671 #ifdef PCRE_INFO_CAPTURECOUNT
672 	if (pcre_fullinfo(engine.pattern, engine.hints,
673 			  PCRE_INFO_CAPTURECOUNT,
674 			  (void *) &actual_sub) != 0)
675 	    msg_panic("pcre map %s, line %d: pcre_fullinfo failed",
676 		      mapname, lineno);
677 	if (prescan_context.max_sub > actual_sub) {
678 	    msg_warn("pcre map %s, line %d: out of range replacement index \"%d\": "
679 		     "skipping this rule", mapname, lineno,
680 		     (int) prescan_context.max_sub);
681 	    if (engine.pattern)
682 		myfree((char *) engine.pattern);
683 	    if (engine.hints)
684 		myfree((char *) engine.hints);
685 	    CREATE_MATCHOP_ERROR_RETURN(0);
686 	}
687 #endif
688 
689 	/*
690 	 * Save the result.
691 	 */
692 	match_rule = (DICT_PCRE_MATCH_RULE *)
693 	    dict_pcre_rule_alloc(DICT_PCRE_OP_MATCH, nesting, lineno,
694 				 sizeof(DICT_PCRE_MATCH_RULE));
695 	match_rule->match = regexp.match;
696 	match_rule->max_sub = prescan_context.max_sub;
697 	if (prescan_context.literal)
698 	    match_rule->replacement = prescan_context.literal;
699 	else
700 	    match_rule->replacement = mystrdup(p);
701 	match_rule->pattern = engine.pattern;
702 	match_rule->hints = engine.hints;
703 	return ((DICT_PCRE_RULE *) match_rule);
704     }
705 
706     /*
707      * The IF operator takes one pattern but no replacement text.
708      */
709     else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) {
710 	DICT_PCRE_REGEXP regexp;
711 	DICT_PCRE_ENGINE engine;
712 	DICT_PCRE_IF_RULE *if_rule;
713 
714 	p += 2;
715 
716 	/*
717 	 * Get the pattern.
718 	 */
719 	while (*p && ISSPACE(*p))
720 	    p++;
721 	if (!dict_pcre_get_pattern(mapname, lineno, &p, &regexp))
722 	    return (0);
723 
724 	/*
725 	 * Warn about out-of-place text.
726 	 */
727 	while (*p && ISSPACE(*p))
728 	    ++p;
729 	if (*p) {
730 	    msg_warn("pcre map %s, line %d: ignoring extra text after "
731 		     "IF statement: \"%s\"", mapname, lineno, p);
732 	    msg_warn("pcre map %s, line %d: do not prepend whitespace"
733 		     " to statements between IF and ENDIF", mapname, lineno);
734 	}
735 
736 	/*
737 	 * Compile the pattern.
738 	 */
739 	if (dict_pcre_compile(mapname, lineno, &regexp, &engine) == 0)
740 	    return (0);
741 
742 	/*
743 	 * Save the result.
744 	 */
745 	if_rule = (DICT_PCRE_IF_RULE *)
746 	    dict_pcre_rule_alloc(DICT_PCRE_OP_IF, nesting, lineno,
747 				 sizeof(DICT_PCRE_IF_RULE));
748 	if_rule->match = regexp.match;
749 	if_rule->pattern = engine.pattern;
750 	if_rule->hints = engine.hints;
751 	return ((DICT_PCRE_RULE *) if_rule);
752     }
753 
754     /*
755      * The ENDIF operator takes no patterns and no replacement text.
756      */
757     else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) {
758 	DICT_PCRE_RULE *rule;
759 
760 	p += 5;
761 
762 	/*
763 	 * Warn about out-of-place ENDIFs.
764 	 */
765 	if (nesting == 0) {
766 	    msg_warn("pcre map %s, line %d: ignoring ENDIF without matching IF",
767 		     mapname, lineno);
768 	    return (0);
769 	}
770 
771 	/*
772 	 * Warn about out-of-place text.
773 	 */
774 	while (*p && ISSPACE(*p))
775 	    ++p;
776 	if (*p)
777 	    msg_warn("pcre map %s, line %d: ignoring extra text after ENDIF",
778 		     mapname, lineno);
779 
780 	/*
781 	 * Save the result.
782 	 */
783 	rule = dict_pcre_rule_alloc(DICT_PCRE_OP_ENDIF, nesting, lineno,
784 				    sizeof(DICT_PCRE_RULE));
785 	return (rule);
786     }
787 
788     /*
789      * Unrecognized input.
790      */
791     else {
792 	msg_warn("pcre map %s, line %d: ignoring unrecognized request",
793 		 mapname, lineno);
794 	return (0);
795     }
796 }
797 
798 /* dict_pcre_open - load and compile a file containing regular expressions */
799 
800 DICT   *dict_pcre_open(const char *mapname, int open_flags, int dict_flags)
801 {
802     DICT_PCRE *dict_pcre;
803     VSTREAM *map_fp;
804     struct stat st;
805     VSTRING *line_buffer;
806     DICT_PCRE_RULE *last_rule = 0;
807     DICT_PCRE_RULE *rule;
808     int     lineno = 0;
809     int     nesting = 0;
810     char   *p;
811 
812     /*
813      * Sanity checks.
814      */
815     if (open_flags != O_RDONLY)
816 	return (dict_surrogate(DICT_TYPE_PCRE, mapname, open_flags, dict_flags,
817 			       "%s:%s map requires O_RDONLY access mode",
818 			       DICT_TYPE_PCRE, mapname));
819 
820     /*
821      * Open the configuration file.
822      */
823     if ((map_fp = vstream_fopen(mapname, O_RDONLY, 0)) == 0)
824 	return (dict_surrogate(DICT_TYPE_PCRE, mapname, open_flags, dict_flags,
825 			       "open %s: %m", mapname));
826     if (fstat(vstream_fileno(map_fp), &st) < 0)
827 	msg_fatal("fstat %s: %m", mapname);
828 
829     line_buffer = vstring_alloc(100);
830 
831     dict_pcre = (DICT_PCRE *) dict_alloc(DICT_TYPE_PCRE, mapname,
832 					 sizeof(*dict_pcre));
833     dict_pcre->dict.lookup = dict_pcre_lookup;
834     dict_pcre->dict.close = dict_pcre_close;
835     dict_pcre->dict.flags = dict_flags | DICT_FLAG_PATTERN;
836     if (dict_flags & DICT_FLAG_FOLD_MUL)
837 	dict_pcre->dict.fold_buf = vstring_alloc(10);
838     dict_pcre->head = 0;
839     dict_pcre->expansion_buf = 0;
840 
841     if (dict_pcre_init == 0) {
842 	pcre_malloc = (void *(*) (size_t)) mymalloc;
843 	pcre_free = (void (*) (void *)) myfree;
844 	dict_pcre_init = 1;
845     }
846     dict_pcre->dict.owner.uid = st.st_uid;
847     dict_pcre->dict.owner.status = (st.st_uid != 0);
848 
849     /*
850      * Parse the pcre table.
851      */
852     while (readlline(line_buffer, map_fp, &lineno)) {
853 	p = vstring_str(line_buffer);
854 	trimblanks(p, 0)[0] = 0;		/* Trim space at end */
855 	if (*p == 0)
856 	    continue;
857 	rule = dict_pcre_parse_rule(mapname, lineno, p, nesting, dict_flags);
858 	if (rule == 0)
859 	    continue;
860 	if (rule->op == DICT_PCRE_OP_IF) {
861 	    nesting++;
862 	} else if (rule->op == DICT_PCRE_OP_ENDIF) {
863 	    nesting--;
864 	}
865 	if (last_rule == 0)
866 	    dict_pcre->head = rule;
867 	else
868 	    last_rule->next = rule;
869 	last_rule = rule;
870     }
871 
872     if (nesting)
873 	msg_warn("pcre map %s, line %d: more IFs than ENDIFs",
874 		 mapname, lineno);
875 
876     vstring_free(line_buffer);
877     vstream_fclose(map_fp);
878 
879     return (DICT_DEBUG (&dict_pcre->dict));
880 }
881 
882 #endif					/* HAS_PCRE */
883