xref: /netbsd-src/external/ibm-public/postfix/dist/src/util/dict_pcre.c (revision daf6c4152fcddc27c445489775ed1f66ab4ea9a9)
1 /*	$NetBSD: dict_pcre.c,v 1.1.1.1 2009/06/23 10:08:59 tron Exp $	*/
2 
3 /*++
4 /* NAME
5 /*	dict_pcre 3
6 /* SUMMARY
7 /*	dictionary manager interface to PCRE regular expression library
8 /* SYNOPSIS
9 /*	#include <dict_pcre.h>
10 /*
11 /*	DICT	*dict_pcre_open(name, dummy, dict_flags)
12 /*	const char *name;
13 /*	int	dummy;
14 /*	int	dict_flags;
15 /* DESCRIPTION
16 /*	dict_pcre_open() opens the named file and compiles the contained
17 /*	regular expressions. The result object can be used to match strings
18 /*	against the table.
19 /* SEE ALSO
20 /*	dict(3) generic dictionary manager
21 /* AUTHOR(S)
22 /*	Andrew McNamara
23 /*	andrewm@connect.com.au
24 /*	connect.com.au Pty. Ltd.
25 /*	Level 3, 213 Miller St
26 /*	North Sydney, NSW, Australia
27 /*
28 /*	Wietse Venema
29 /*	IBM T.J. Watson Research
30 /*	P.O. Box 704
31 /*	Yorktown Heights, NY 10598, USA
32 /*--*/
33 
34 #include "sys_defs.h"
35 
36 #ifdef HAS_PCRE
37 
38 /* System library. */
39 
40 #include <stdio.h>			/* sprintf() prototype */
41 #include <stdlib.h>
42 #include <unistd.h>
43 #include <string.h>
44 #include <ctype.h>
45 
46 #ifdef STRCASECMP_IN_STRINGS_H
47 #include <strings.h>
48 #endif
49 
50 /* Utility library. */
51 
52 #include "mymalloc.h"
53 #include "msg.h"
54 #include "safe.h"
55 #include "vstream.h"
56 #include "vstring.h"
57 #include "stringops.h"
58 #include "readlline.h"
59 #include "dict.h"
60 #include "dict_pcre.h"
61 #include "mac_parse.h"
62 #include "pcre.h"
63 
64  /*
65   * Support for IF/ENDIF based on an idea by Bert Driehuis.
66   */
67 #define DICT_PCRE_OP_MATCH    1		/* Match this regexp */
68 #define DICT_PCRE_OP_IF       2		/* Increase if/endif nesting on match */
69 #define DICT_PCRE_OP_ENDIF    3		/* Decrease if/endif nesting on match */
70 
71  /*
72   * Max strings captured by regexp - essentially the max number of (..)
73   */
74 #define PCRE_MAX_CAPTURE	99
75 
76  /*
77   * Regular expression before and after compilation.
78   */
79 typedef struct {
80     char   *regexp;			/* regular expression */
81     int     options;			/* options */
82     int     match;			/* positive or negative match */
83 } DICT_PCRE_REGEXP;
84 
85 typedef struct {
86     pcre   *pattern;			/* the compiled pattern */
87     pcre_extra *hints;			/* hints to speed pattern execution */
88 } DICT_PCRE_ENGINE;
89 
90  /*
91   * Compiled generic rule, and subclasses that derive from it.
92   */
93 typedef struct DICT_PCRE_RULE {
94     int     op;				/* DICT_PCRE_OP_MATCH/IF/ENDIF */
95     int     nesting;			/* level of IF/ENDIF nesting */
96     int     lineno;			/* source file line number */
97     struct DICT_PCRE_RULE *next;	/* next rule in dict */
98 } DICT_PCRE_RULE;
99 
100 typedef struct {
101     DICT_PCRE_RULE rule;		/* generic part */
102     pcre   *pattern;			/* compiled pattern */
103     pcre_extra *hints;			/* hints to speed pattern execution */
104     char   *replacement;		/* replacement string */
105     int     match;			/* positive or negative match */
106     size_t  max_sub;			/* largest $number in replacement */
107 } DICT_PCRE_MATCH_RULE;
108 
109 typedef struct {
110     DICT_PCRE_RULE rule;		/* generic members */
111     pcre   *pattern;			/* compiled pattern */
112     pcre_extra *hints;			/* hints to speed pattern execution */
113     int     match;			/* positive or negative match */
114 } DICT_PCRE_IF_RULE;
115 
116  /*
117   * PCRE map.
118   */
119 typedef struct {
120     DICT    dict;			/* generic members */
121     DICT_PCRE_RULE *head;
122     VSTRING *expansion_buf;		/* lookup result */
123 } DICT_PCRE;
124 
125 static int dict_pcre_init = 0;		/* flag need to init pcre library */
126 
127 /*
128  * Context for $number expansion callback.
129  */
130 typedef struct {
131     DICT_PCRE *dict_pcre;		/* the dictionary handle */
132     DICT_PCRE_MATCH_RULE *match_rule;	/* the rule we matched */
133     const char *lookup_string;		/* string against which we match */
134     int     offsets[PCRE_MAX_CAPTURE * 3];	/* Cut substrings */
135     int     matches;			/* Count of cuts */
136 } DICT_PCRE_EXPAND_CONTEXT;
137 
138  /*
139   * Context for $number pre-scan callback.
140   */
141 typedef struct {
142     const char *mapname;		/* name of regexp map */
143     int     lineno;			/* where in file */
144     size_t  max_sub;			/* Largest $n seen */
145     char   *literal;			/* constant result, $$ -> $ */
146 } DICT_PCRE_PRESCAN_CONTEXT;
147 
148  /*
149   * Compatibility.
150   */
151 #ifndef MAC_PARSE_OK
152 #define MAC_PARSE_OK 0
153 #endif
154 
155  /*
156   * Macros to make dense code more accessible.
157   */
158 #define NULL_STARTOFFSET	(0)
159 #define NULL_EXEC_OPTIONS 	(0)
160 #define NULL_OVECTOR		((int *) 0)
161 #define NULL_OVECTOR_LENGTH	(0)
162 
163 /* dict_pcre_expand - replace $number with matched text */
164 
165 static int dict_pcre_expand(int type, VSTRING *buf, char *ptr)
166 {
167     DICT_PCRE_EXPAND_CONTEXT *ctxt = (DICT_PCRE_EXPAND_CONTEXT *) ptr;
168     DICT_PCRE_MATCH_RULE *match_rule = ctxt->match_rule;
169     DICT_PCRE *dict_pcre = ctxt->dict_pcre;
170     const char *pp;
171     int     n;
172     int     ret;
173 
174     /*
175      * Replace $0-${99} with strings cut from matched text.
176      */
177     if (type == MAC_PARSE_VARNAME) {
178 	n = atoi(vstring_str(buf));
179 	ret = pcre_get_substring(ctxt->lookup_string, ctxt->offsets,
180 				 ctxt->matches, n, &pp);
181 	if (ret < 0) {
182 	    if (ret == PCRE_ERROR_NOSUBSTRING)
183 		return (MAC_PARSE_UNDEF);
184 	    else
185 		msg_fatal("pcre map %s, line %d: pcre_get_substring error: %d",
186 			dict_pcre->dict.name, match_rule->rule.lineno, ret);
187 	}
188 	if (*pp == 0) {
189 	    myfree((char *) pp);
190 	    return (MAC_PARSE_UNDEF);
191 	}
192 	vstring_strcat(dict_pcre->expansion_buf, pp);
193 	myfree((char *) pp);
194 	return (MAC_PARSE_OK);
195     }
196 
197     /*
198      * Straight text - duplicate with no substitution.
199      */
200     else {
201 	vstring_strcat(dict_pcre->expansion_buf, vstring_str(buf));
202 	return (MAC_PARSE_OK);
203     }
204 }
205 
206 /* dict_pcre_exec_error - report matching error */
207 
208 static void dict_pcre_exec_error(const char *mapname, int lineno, int errval)
209 {
210     switch (errval) {
211 	case 0:
212 	msg_warn("pcre map %s, line %d: too many (...)",
213 		 mapname, lineno);
214 	return;
215     case PCRE_ERROR_NULL:
216     case PCRE_ERROR_BADOPTION:
217 	msg_fatal("pcre map %s, line %d: bad args to re_exec",
218 		  mapname, lineno);
219     case PCRE_ERROR_BADMAGIC:
220     case PCRE_ERROR_UNKNOWN_NODE:
221 	msg_fatal("pcre map %s, line %d: corrupt compiled regexp",
222 		  mapname, lineno);
223 #ifdef PCRE_ERROR_NOMEMORY
224     case PCRE_ERROR_NOMEMORY:
225 	msg_fatal("pcre map %s, line %d: out of memory",
226 		  mapname, lineno);
227 #endif
228 #ifdef PCRE_ERROR_MATCHLIMIT
229     case PCRE_ERROR_MATCHLIMIT:
230 	msg_fatal("pcre map %s, line %d: matched text exceeds buffer limit",
231 		  mapname, lineno);
232 #endif
233 #ifdef PCRE_ERROR_BADUTF8
234     case PCRE_ERROR_BADUTF8:
235 	msg_fatal("pcre map %s, line %d: bad UTF-8 sequence in search string",
236 		  mapname, lineno);
237 #endif
238 #ifdef PCRE_ERROR_BADUTF8_OFFSET
239     case PCRE_ERROR_BADUTF8_OFFSET:
240 	msg_fatal("pcre map %s, line %d: bad UTF-8 start offset in search string",
241 		  mapname, lineno);
242 #endif
243     default:
244 	msg_fatal("pcre map %s, line %d: unknown re_exec error: %d",
245 		  mapname, lineno, errval);
246     }
247 }
248 
249 /* dict_pcre_lookup - match string and perform optional substitution */
250 
251 static const char *dict_pcre_lookup(DICT *dict, const char *lookup_string)
252 {
253     DICT_PCRE *dict_pcre = (DICT_PCRE *) dict;
254     DICT_PCRE_RULE *rule;
255     DICT_PCRE_IF_RULE *if_rule;
256     DICT_PCRE_MATCH_RULE *match_rule;
257     int     lookup_len = strlen(lookup_string);
258     DICT_PCRE_EXPAND_CONTEXT ctxt;
259     int     nesting = 0;
260 
261     dict_errno = 0;
262 
263     if (msg_verbose)
264 	msg_info("dict_pcre_lookup: %s: %s", dict->name, lookup_string);
265 
266     /*
267      * Optionally fold the key.
268      */
269     if (dict->flags & DICT_FLAG_FOLD_MUL) {
270 	if (dict->fold_buf == 0)
271 	    dict->fold_buf = vstring_alloc(10);
272 	vstring_strcpy(dict->fold_buf, lookup_string);
273 	lookup_string = lowercase(vstring_str(dict->fold_buf));
274     }
275     for (rule = dict_pcre->head; rule; rule = rule->next) {
276 
277 	/*
278 	 * Skip rules inside failed IF/ENDIF.
279 	 */
280 	if (nesting < rule->nesting)
281 	    continue;
282 
283 	switch (rule->op) {
284 
285 	    /*
286 	     * Search for a matching expression.
287 	     */
288 	case DICT_PCRE_OP_MATCH:
289 	    match_rule = (DICT_PCRE_MATCH_RULE *) rule;
290 	    ctxt.matches = pcre_exec(match_rule->pattern, match_rule->hints,
291 				     lookup_string, lookup_len,
292 				     NULL_STARTOFFSET, NULL_EXEC_OPTIONS,
293 				     ctxt.offsets, PCRE_MAX_CAPTURE * 3);
294 
295 	    if (ctxt.matches > 0) {
296 		if (!match_rule->match)
297 		    continue;			/* Negative rule matched */
298 	    } else if (ctxt.matches == PCRE_ERROR_NOMATCH) {
299 		if (match_rule->match)
300 		    continue;			/* Positive rule did not
301 						 * match */
302 	    } else {
303 		dict_pcre_exec_error(dict->name, rule->lineno, ctxt.matches);
304 		continue;			/* pcre_exec failed */
305 	    }
306 
307 	    /*
308 	     * Skip $number substitutions when the replacement text contains
309 	     * no $number strings, as learned during the compile time
310 	     * pre-scan. The pre-scan already replaced $$ by $.
311 	     */
312 	    if (match_rule->max_sub == 0)
313 		return match_rule->replacement;
314 
315 	    /*
316 	     * We've got a match. Perform substitution on replacement string.
317 	     */
318 	    if (dict_pcre->expansion_buf == 0)
319 		dict_pcre->expansion_buf = vstring_alloc(10);
320 	    VSTRING_RESET(dict_pcre->expansion_buf);
321 	    ctxt.dict_pcre = dict_pcre;
322 	    ctxt.match_rule = match_rule;
323 	    ctxt.lookup_string = lookup_string;
324 
325 	    if (mac_parse(match_rule->replacement, dict_pcre_expand,
326 			  (char *) &ctxt) & MAC_PARSE_ERROR)
327 		msg_fatal("pcre map %s, line %d: bad replacement syntax",
328 			  dict->name, rule->lineno);
329 
330 	    VSTRING_TERMINATE(dict_pcre->expansion_buf);
331 	    return (vstring_str(dict_pcre->expansion_buf));
332 
333 	    /*
334 	     * Conditional. XXX We provide space for matched substring info
335 	     * because PCRE uses part of it as workspace for backtracking.
336 	     * PCRE will allocate memory if it runs out of backtracking
337 	     * storage.
338 	     */
339 	case DICT_PCRE_OP_IF:
340 	    if_rule = (DICT_PCRE_IF_RULE *) rule;
341 	    ctxt.matches = pcre_exec(if_rule->pattern, if_rule->hints,
342 				     lookup_string, lookup_len,
343 				     NULL_STARTOFFSET, NULL_EXEC_OPTIONS,
344 				     ctxt.offsets, PCRE_MAX_CAPTURE * 3);
345 
346 	    if (ctxt.matches > 0) {
347 		if (!if_rule->match)
348 		    continue;			/* Negative rule matched */
349 	    } else if (ctxt.matches == PCRE_ERROR_NOMATCH) {
350 		if (if_rule->match)
351 		    continue;			/* Positive rule did not
352 						 * match */
353 	    } else {
354 		dict_pcre_exec_error(dict->name, rule->lineno, ctxt.matches);
355 		continue;			/* pcre_exec failed */
356 	    }
357 	    nesting++;
358 	    continue;
359 
360 	    /*
361 	     * ENDIF after successful IF.
362 	     */
363 	case DICT_PCRE_OP_ENDIF:
364 	    nesting--;
365 	    continue;
366 
367 	default:
368 	    msg_panic("dict_pcre_lookup: impossible operation %d", rule->op);
369 	}
370     }
371     return (0);
372 }
373 
374 /* dict_pcre_close - close pcre dictionary */
375 
376 static void dict_pcre_close(DICT *dict)
377 {
378     DICT_PCRE *dict_pcre = (DICT_PCRE *) dict;
379     DICT_PCRE_RULE *rule;
380     DICT_PCRE_RULE *next;
381     DICT_PCRE_MATCH_RULE *match_rule;
382     DICT_PCRE_IF_RULE *if_rule;
383 
384     for (rule = dict_pcre->head; rule; rule = next) {
385 	next = rule->next;
386 	switch (rule->op) {
387 	case DICT_PCRE_OP_MATCH:
388 	    match_rule = (DICT_PCRE_MATCH_RULE *) rule;
389 	    if (match_rule->pattern)
390 		myfree((char *) match_rule->pattern);
391 	    if (match_rule->hints)
392 		myfree((char *) match_rule->hints);
393 	    if (match_rule->replacement)
394 		myfree((char *) match_rule->replacement);
395 	    break;
396 	case DICT_PCRE_OP_IF:
397 	    if_rule = (DICT_PCRE_IF_RULE *) rule;
398 	    if (if_rule->pattern)
399 		myfree((char *) if_rule->pattern);
400 	    if (if_rule->hints)
401 		myfree((char *) if_rule->hints);
402 	    break;
403 	case DICT_PCRE_OP_ENDIF:
404 	    break;
405 	default:
406 	    msg_panic("dict_pcre_close: unknown operation %d", rule->op);
407 	}
408 	myfree((char *) rule);
409     }
410     if (dict_pcre->expansion_buf)
411 	vstring_free(dict_pcre->expansion_buf);
412     if (dict->fold_buf)
413 	vstring_free(dict->fold_buf);
414     dict_free(dict);
415 }
416 
417 /* dict_pcre_get_pattern - extract pattern from rule */
418 
419 static int dict_pcre_get_pattern(const char *mapname, int lineno, char **bufp,
420 				         DICT_PCRE_REGEXP *pattern)
421 {
422     char   *p = *bufp;
423     char    re_delimiter;
424 
425     /*
426      * Process negation operators.
427      */
428     pattern->match = 1;
429     while (*p == '!') {
430 	pattern->match = !pattern->match;
431 	p++;
432     }
433 
434     /*
435      * Grr...aceful handling of whitespace after '!'.
436      */
437     while (*p && ISSPACE(*p))
438 	p++;
439     if (*p == 0) {
440 	msg_warn("pcre map %s, line %d: no regexp: skipping this rule",
441 		 mapname, lineno);
442 	return (0);
443     }
444     re_delimiter = *p++;
445     pattern->regexp = p;
446 
447     /*
448      * Search for second delimiter, handling backslash escape.
449      */
450     while (*p) {
451 	if (*p == '\\') {
452 	    ++p;
453 	    if (*p == 0)
454 		break;
455 	} else if (*p == re_delimiter)
456 	    break;
457 	++p;
458     }
459 
460     if (!*p) {
461 	msg_warn("pcre map %s, line %d: no closing regexp delimiter \"%c\": "
462 		 "ignoring this rule", mapname, lineno, re_delimiter);
463 	return (0);
464     }
465     *p++ = 0;					/* Null term the regexp */
466 
467     /*
468      * Parse any regexp options.
469      */
470     pattern->options = PCRE_CASELESS | PCRE_DOTALL;
471     while (*p && !ISSPACE(*p)) {
472 	switch (*p) {
473 	case 'i':
474 	    pattern->options ^= PCRE_CASELESS;
475 	    break;
476 	case 'm':
477 	    pattern->options ^= PCRE_MULTILINE;
478 	    break;
479 	case 's':
480 	    pattern->options ^= PCRE_DOTALL;
481 	    break;
482 	case 'x':
483 	    pattern->options ^= PCRE_EXTENDED;
484 	    break;
485 	case 'A':
486 	    pattern->options ^= PCRE_ANCHORED;
487 	    break;
488 	case 'E':
489 	    pattern->options ^= PCRE_DOLLAR_ENDONLY;
490 	    break;
491 	case 'U':
492 	    pattern->options ^= PCRE_UNGREEDY;
493 	    break;
494 	case 'X':
495 	    pattern->options ^= PCRE_EXTRA;
496 	    break;
497 	default:
498 	    msg_warn("pcre map %s, line %d: unknown regexp option \"%c\": "
499 		     "skipping this rule", mapname, lineno, *p);
500 	    return (0);
501 	}
502 	++p;
503     }
504     *bufp = p;
505     return (1);
506 }
507 
508 /* dict_pcre_prescan - sanity check $number instances in replacement text */
509 
510 static int dict_pcre_prescan(int type, VSTRING *buf, char *context)
511 {
512     DICT_PCRE_PRESCAN_CONTEXT *ctxt = (DICT_PCRE_PRESCAN_CONTEXT *) context;
513     size_t  n;
514 
515     /*
516      * Keep a copy of literal text (with $$ already replaced by $) if and
517      * only if the replacement text contains no $number expression. This way
518      * we can avoid having to scan the replacement text at lookup time.
519      */
520     if (type == MAC_PARSE_VARNAME) {
521 	if (ctxt->literal) {
522 	    myfree(ctxt->literal);
523 	    ctxt->literal = 0;
524 	}
525 	if (!alldig(vstring_str(buf))) {
526 	    msg_warn("pcre map %s, line %d: non-numeric replacement index \"%s\"",
527 		     ctxt->mapname, ctxt->lineno, vstring_str(buf));
528 	    return (MAC_PARSE_ERROR);
529 	}
530 	n = atoi(vstring_str(buf));
531 	if (n < 1) {
532 	    msg_warn("pcre map %s, line %d: out of range replacement index \"%s\"",
533 		     ctxt->mapname, ctxt->lineno, vstring_str(buf));
534 	    return (MAC_PARSE_ERROR);
535 	}
536 	if (n > ctxt->max_sub)
537 	    ctxt->max_sub = n;
538     } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) {
539 	if (ctxt->literal)
540 	    msg_panic("pcre map %s, line %d: multiple literals but no $number",
541 		      ctxt->mapname, ctxt->lineno);
542 	ctxt->literal = mystrdup(vstring_str(buf));
543     }
544     return (MAC_PARSE_OK);
545 }
546 
547 /* dict_pcre_compile - compile pattern */
548 
549 static int dict_pcre_compile(const char *mapname, int lineno,
550 			             DICT_PCRE_REGEXP *pattern,
551 			             DICT_PCRE_ENGINE *engine)
552 {
553     const char *error;
554     int     errptr;
555 
556     engine->pattern = pcre_compile(pattern->regexp, pattern->options,
557 				   &error, &errptr, NULL);
558     if (engine->pattern == 0) {
559 	msg_warn("pcre map %s, line %d: error in regex at offset %d: %s",
560 		 mapname, lineno, errptr, error);
561 	return (0);
562     }
563     engine->hints = pcre_study(engine->pattern, 0, &error);
564     if (error != 0) {
565 	msg_warn("pcre map %s, line %d: error while studying regex: %s",
566 		 mapname, lineno, error);
567 	myfree((char *) engine->pattern);
568 	return (0);
569     }
570     return (1);
571 }
572 
573 /* dict_pcre_rule_alloc - fill in a generic rule structure */
574 
575 static DICT_PCRE_RULE *dict_pcre_rule_alloc(int op, int nesting,
576 					            int lineno,
577 					            size_t size)
578 {
579     DICT_PCRE_RULE *rule;
580 
581     rule = (DICT_PCRE_RULE *) mymalloc(size);
582     rule->op = op;
583     rule->nesting = nesting;
584     rule->lineno = lineno;
585     rule->next = 0;
586 
587     return (rule);
588 }
589 
590 /* dict_pcre_parse_rule - parse and compile one rule */
591 
592 static DICT_PCRE_RULE *dict_pcre_parse_rule(const char *mapname, int lineno,
593 					            char *line, int nesting,
594 					            int dict_flags)
595 {
596     char   *p;
597     int     actual_sub;
598 
599     p = line;
600 
601     /*
602      * An ordinary match rule takes one pattern and replacement text.
603      */
604     if (!ISALNUM(*p)) {
605 	DICT_PCRE_REGEXP regexp;
606 	DICT_PCRE_ENGINE engine;
607 	DICT_PCRE_PRESCAN_CONTEXT prescan_context;
608 	DICT_PCRE_MATCH_RULE *match_rule;
609 
610 	/*
611 	 * Get the pattern string and options.
612 	 */
613 	if (dict_pcre_get_pattern(mapname, lineno, &p, &regexp) == 0)
614 	    return (0);
615 
616 	/*
617 	 * Get the replacement text.
618 	 */
619 	while (*p && ISSPACE(*p))
620 	    ++p;
621 	if (!*p)
622 	    msg_warn("%s, line %d: no replacement text: using empty string",
623 		     mapname, lineno);
624 
625 	/*
626 	 * Sanity check the $number instances in the replacement text.
627 	 */
628 	prescan_context.mapname = mapname;
629 	prescan_context.lineno = lineno;
630 	prescan_context.max_sub = 0;
631 	prescan_context.literal = 0;
632 
633 	/*
634 	 * The optimizer will eliminate code duplication and/or dead code.
635 	 */
636 #define CREATE_MATCHOP_ERROR_RETURN(rval) do { \
637 	if (prescan_context.literal) \
638 	    myfree(prescan_context.literal); \
639 	return (rval); \
640     } while (0)
641 
642 	if (mac_parse(p, dict_pcre_prescan, (char *) &prescan_context)
643 	    & MAC_PARSE_ERROR) {
644 	    msg_warn("pcre map %s, line %d: bad replacement syntax: "
645 		     "skipping this rule", mapname, lineno);
646 	    CREATE_MATCHOP_ERROR_RETURN(0);
647 	}
648 
649 	/*
650 	 * Substring replacement not possible with negative regexps.
651 	 */
652 	if (prescan_context.max_sub > 0 && regexp.match == 0) {
653 	    msg_warn("pcre map %s, line %d: $number found in negative match "
654 		   "replacement text: skipping this rule", mapname, lineno);
655 	    CREATE_MATCHOP_ERROR_RETURN(0);
656 	}
657 	if (prescan_context.max_sub > 0 && (dict_flags & DICT_FLAG_NO_REGSUB)) {
658 	    msg_warn("pcre map %s, line %d: "
659 		     "regular expression substitution is not allowed: "
660 		     "skipping this rule", mapname, lineno);
661 	    CREATE_MATCHOP_ERROR_RETURN(0);
662 	}
663 
664 	/*
665 	 * Compile the pattern.
666 	 */
667 	if (dict_pcre_compile(mapname, lineno, &regexp, &engine) == 0)
668 	    CREATE_MATCHOP_ERROR_RETURN(0);
669 #ifdef PCRE_INFO_CAPTURECOUNT
670 	if (pcre_fullinfo(engine.pattern, engine.hints,
671 			  PCRE_INFO_CAPTURECOUNT,
672 			  (void *) &actual_sub) != 0)
673 	    msg_panic("pcre map %s, line %d: pcre_fullinfo failed",
674 		      mapname, lineno);
675 	if (prescan_context.max_sub > actual_sub) {
676 	    msg_warn("pcre map %s, line %d: out of range replacement index \"%d\": "
677 		     "skipping this rule", mapname, lineno,
678 		     (int) prescan_context.max_sub);
679 	    if (engine.pattern)
680 		myfree((char *) engine.pattern);
681 	    if (engine.hints)
682 		myfree((char *) engine.hints);
683 	    CREATE_MATCHOP_ERROR_RETURN(0);
684 	}
685 #endif
686 
687 	/*
688 	 * Save the result.
689 	 */
690 	match_rule = (DICT_PCRE_MATCH_RULE *)
691 	    dict_pcre_rule_alloc(DICT_PCRE_OP_MATCH, nesting, lineno,
692 				 sizeof(DICT_PCRE_MATCH_RULE));
693 	match_rule->match = regexp.match;
694 	match_rule->max_sub = prescan_context.max_sub;
695 	if (prescan_context.literal)
696 	    match_rule->replacement = prescan_context.literal;
697 	else
698 	    match_rule->replacement = mystrdup(p);
699 	match_rule->pattern = engine.pattern;
700 	match_rule->hints = engine.hints;
701 	return ((DICT_PCRE_RULE *) match_rule);
702     }
703 
704     /*
705      * The IF operator takes one pattern but no replacement text.
706      */
707     else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) {
708 	DICT_PCRE_REGEXP regexp;
709 	DICT_PCRE_ENGINE engine;
710 	DICT_PCRE_IF_RULE *if_rule;
711 
712 	p += 2;
713 
714 	/*
715 	 * Get the pattern.
716 	 */
717 	while (*p && ISSPACE(*p))
718 	    p++;
719 	if (!dict_pcre_get_pattern(mapname, lineno, &p, &regexp))
720 	    return (0);
721 
722 	/*
723 	 * Warn about out-of-place text.
724 	 */
725 	while (*p && ISSPACE(*p))
726 	    ++p;
727 	if (*p) {
728 	    msg_warn("pcre map %s, line %d: ignoring extra text after "
729 		     "IF statement: \"%s\"", mapname, lineno, p);
730 	    msg_warn("pcre map %s, line %d: do not prepend whitespace"
731 		     " to statements between IF and ENDIF", mapname, lineno);
732 	}
733 
734 	/*
735 	 * Compile the pattern.
736 	 */
737 	if (dict_pcre_compile(mapname, lineno, &regexp, &engine) == 0)
738 	    return (0);
739 
740 	/*
741 	 * Save the result.
742 	 */
743 	if_rule = (DICT_PCRE_IF_RULE *)
744 	    dict_pcre_rule_alloc(DICT_PCRE_OP_IF, nesting, lineno,
745 				 sizeof(DICT_PCRE_IF_RULE));
746 	if_rule->match = regexp.match;
747 	if_rule->pattern = engine.pattern;
748 	if_rule->hints = engine.hints;
749 	return ((DICT_PCRE_RULE *) if_rule);
750     }
751 
752     /*
753      * The ENDIF operator takes no patterns and no replacement text.
754      */
755     else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) {
756 	DICT_PCRE_RULE *rule;
757 
758 	p += 5;
759 
760 	/*
761 	 * Warn about out-of-place ENDIFs.
762 	 */
763 	if (nesting == 0) {
764 	    msg_warn("pcre map %s, line %d: ignoring ENDIF without matching IF",
765 		     mapname, lineno);
766 	    return (0);
767 	}
768 
769 	/*
770 	 * Warn about out-of-place text.
771 	 */
772 	while (*p && ISSPACE(*p))
773 	    ++p;
774 	if (*p)
775 	    msg_warn("pcre map %s, line %d: ignoring extra text after ENDIF",
776 		     mapname, lineno);
777 
778 	/*
779 	 * Save the result.
780 	 */
781 	rule = dict_pcre_rule_alloc(DICT_PCRE_OP_ENDIF, nesting, lineno,
782 				    sizeof(DICT_PCRE_RULE));
783 	return (rule);
784     }
785 
786     /*
787      * Unrecognized input.
788      */
789     else {
790 	msg_warn("pcre map %s, line %d: ignoring unrecognized request",
791 		 mapname, lineno);
792 	return (0);
793     }
794 }
795 
796 /* dict_pcre_open - load and compile a file containing regular expressions */
797 
798 DICT   *dict_pcre_open(const char *mapname, int unused_flags, int dict_flags)
799 {
800     DICT_PCRE *dict_pcre;
801     VSTREAM *map_fp;
802     VSTRING *line_buffer;
803     DICT_PCRE_RULE *last_rule = 0;
804     DICT_PCRE_RULE *rule;
805     int     lineno = 0;
806     int     nesting = 0;
807     char   *p;
808 
809     line_buffer = vstring_alloc(100);
810 
811     dict_pcre = (DICT_PCRE *) dict_alloc(DICT_TYPE_PCRE, mapname,
812 					 sizeof(*dict_pcre));
813     dict_pcre->dict.lookup = dict_pcre_lookup;
814     dict_pcre->dict.close = dict_pcre_close;
815     dict_pcre->dict.flags = dict_flags | DICT_FLAG_PATTERN;
816     if (dict_flags & DICT_FLAG_FOLD_MUL)
817 	dict_pcre->dict.fold_buf = vstring_alloc(10);
818     dict_pcre->head = 0;
819     dict_pcre->expansion_buf = 0;
820 
821     if (dict_pcre_init == 0) {
822 	pcre_malloc = (void *(*) (size_t)) mymalloc;
823 	pcre_free = (void (*) (void *)) myfree;
824 	dict_pcre_init = 1;
825     }
826 
827     /*
828      * Parse the pcre table.
829      */
830     if ((map_fp = vstream_fopen(mapname, O_RDONLY, 0)) == 0)
831 	msg_fatal("open %s: %m", mapname);
832 
833     while (readlline(line_buffer, map_fp, &lineno)) {
834 	p = vstring_str(line_buffer);
835 	trimblanks(p, 0)[0] = 0;		/* Trim space at end */
836 	if (*p == 0)
837 	    continue;
838 	rule = dict_pcre_parse_rule(mapname, lineno, p, nesting, dict_flags);
839 	if (rule == 0)
840 	    continue;
841 	if (rule->op == DICT_PCRE_OP_IF) {
842 	    nesting++;
843 	} else if (rule->op == DICT_PCRE_OP_ENDIF) {
844 	    nesting--;
845 	}
846 	if (last_rule == 0)
847 	    dict_pcre->head = rule;
848 	else
849 	    last_rule->next = rule;
850 	last_rule = rule;
851     }
852 
853     if (nesting)
854 	msg_warn("pcre map %s, line %d: more IFs than ENDIFs",
855 		 mapname, lineno);
856 
857     vstring_free(line_buffer);
858     vstream_fclose(map_fp);
859 
860     return (DICT_DEBUG (&dict_pcre->dict));
861 }
862 
863 #endif					/* HAS_PCRE */
864