xref: /netbsd-src/external/ibm-public/postfix/dist/src/util/dict_pcre.c (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 /*	$NetBSD: dict_pcre.c,v 1.3 2020/03/18 19:05:21 christos Exp $	*/
2 
3 /*++
4 /* NAME
5 /*	dict_pcre 3
6 /* SUMMARY
7 /*	dictionary manager interface to PCRE regular expression library
8 /* SYNOPSIS
9 /*	#include <dict_pcre.h>
10 /*
11 /*	DICT	*dict_pcre_open(name, dummy, dict_flags)
12 /*	const char *name;
13 /*	int	dummy;
14 /*	int	dict_flags;
15 /* DESCRIPTION
16 /*	dict_pcre_open() opens the named file and compiles the contained
17 /*	regular expressions. The result object can be used to match strings
18 /*	against the table.
19 /* SEE ALSO
20 /*	dict(3) generic dictionary manager
21 /* AUTHOR(S)
22 /*	Andrew McNamara
23 /*	andrewm@connect.com.au
24 /*	connect.com.au Pty. Ltd.
25 /*	Level 3, 213 Miller St
26 /*	North Sydney, NSW, Australia
27 /*
28 /*	Wietse Venema
29 /*	IBM T.J. Watson Research
30 /*	P.O. Box 704
31 /*	Yorktown Heights, NY 10598, USA
32 /*
33 /*	Wietse Venema
34 /*	Google, Inc.
35 /*	111 8th Avenue
36 /*	New York, NY 10011, USA
37 /*--*/
38 
39 #include "sys_defs.h"
40 
41 #ifdef HAS_PCRE
42 
43 /* System library. */
44 
45 #include <sys/stat.h>
46 #include <stdio.h>			/* sprintf() prototype */
47 #include <stdlib.h>
48 #include <unistd.h>
49 #include <string.h>
50 #include <ctype.h>
51 
52 #ifdef STRCASECMP_IN_STRINGS_H
53 #include <strings.h>
54 #endif
55 
56 /* Utility library. */
57 
58 #include "mymalloc.h"
59 #include "msg.h"
60 #include "safe.h"
61 #include "vstream.h"
62 #include "vstring.h"
63 #include "stringops.h"
64 #include "readlline.h"
65 #include "dict.h"
66 #include "dict_pcre.h"
67 #include "mac_parse.h"
68 #include "pcre.h"
69 #include "warn_stat.h"
70 #include "mvect.h"
71 
72  /*
73   * Backwards compatibility.
74   */
75 #ifdef PCRE_STUDY_JIT_COMPILE
76 #define DICT_PCRE_FREE_STUDY(x)	pcre_free_study(x)
77 #else
78 #define DICT_PCRE_FREE_STUDY(x)	pcre_free((char *) (x))
79 #endif
80 
81  /*
82   * Support for IF/ENDIF based on an idea by Bert Driehuis.
83   */
84 #define DICT_PCRE_OP_MATCH    1		/* Match this regexp */
85 #define DICT_PCRE_OP_IF       2		/* Increase if/endif nesting on match */
86 #define DICT_PCRE_OP_ENDIF    3		/* Decrease if/endif nesting on match */
87 
88  /*
89   * Max strings captured by regexp - essentially the max number of (..)
90   */
91 #define PCRE_MAX_CAPTURE	99
92 
93  /*
94   * Regular expression before and after compilation.
95   */
96 typedef struct {
97     char   *regexp;			/* regular expression */
98     int     options;			/* options */
99     int     match;			/* positive or negative match */
100 } DICT_PCRE_REGEXP;
101 
102 typedef struct {
103     pcre   *pattern;			/* the compiled pattern */
104     pcre_extra *hints;			/* hints to speed pattern execution */
105 } DICT_PCRE_ENGINE;
106 
107  /*
108   * Compiled generic rule, and subclasses that derive from it.
109   */
110 typedef struct DICT_PCRE_RULE {
111     int     op;				/* DICT_PCRE_OP_MATCH/IF/ENDIF */
112     int     lineno;			/* source file line number */
113     struct DICT_PCRE_RULE *next;	/* next rule in dict */
114 } DICT_PCRE_RULE;
115 
116 typedef struct {
117     DICT_PCRE_RULE rule;		/* generic part */
118     pcre   *pattern;			/* compiled pattern */
119     pcre_extra *hints;			/* hints to speed pattern execution */
120     char   *replacement;		/* replacement string */
121     int     match;			/* positive or negative match */
122     size_t  max_sub;			/* largest $number in replacement */
123 } DICT_PCRE_MATCH_RULE;
124 
125 typedef struct {
126     DICT_PCRE_RULE rule;		/* generic members */
127     pcre   *pattern;			/* compiled pattern */
128     pcre_extra *hints;			/* hints to speed pattern execution */
129     int     match;			/* positive or negative match */
130     struct DICT_PCRE_RULE *endif_rule;	/* matching endif rule */
131 } DICT_PCRE_IF_RULE;
132 
133  /*
134   * PCRE map.
135   */
136 typedef struct {
137     DICT    dict;			/* generic members */
138     DICT_PCRE_RULE *head;
139     VSTRING *expansion_buf;		/* lookup result */
140 } DICT_PCRE;
141 
142 static int dict_pcre_init = 0;		/* flag need to init pcre library */
143 
144 /*
145  * Context for $number expansion callback.
146  */
147 typedef struct {
148     DICT_PCRE *dict_pcre;		/* the dictionary handle */
149     DICT_PCRE_MATCH_RULE *match_rule;	/* the rule we matched */
150     const char *lookup_string;		/* string against which we match */
151     int     offsets[PCRE_MAX_CAPTURE * 3];	/* Cut substrings */
152     int     matches;			/* Count of cuts */
153 } DICT_PCRE_EXPAND_CONTEXT;
154 
155  /*
156   * Context for $number pre-scan callback.
157   */
158 typedef struct {
159     const char *mapname;		/* name of regexp map */
160     int     lineno;			/* where in file */
161     size_t  max_sub;			/* Largest $n seen */
162     char   *literal;			/* constant result, $$ -> $ */
163 } DICT_PCRE_PRESCAN_CONTEXT;
164 
165  /*
166   * Compatibility.
167   */
168 #ifndef MAC_PARSE_OK
169 #define MAC_PARSE_OK 0
170 #endif
171 
172  /*
173   * Macros to make dense code more accessible.
174   */
175 #define NULL_STARTOFFSET	(0)
176 #define NULL_EXEC_OPTIONS 	(0)
177 #define NULL_OVECTOR		((int *) 0)
178 #define NULL_OVECTOR_LENGTH	(0)
179 
180 /* dict_pcre_expand - replace $number with matched text */
181 
182 static int dict_pcre_expand(int type, VSTRING *buf, void *ptr)
183 {
184     DICT_PCRE_EXPAND_CONTEXT *ctxt = (DICT_PCRE_EXPAND_CONTEXT *) ptr;
185     DICT_PCRE_MATCH_RULE *match_rule = ctxt->match_rule;
186     DICT_PCRE *dict_pcre = ctxt->dict_pcre;
187     const char *pp;
188     int     n;
189     int     ret;
190 
191     /*
192      * Replace $0-${99} with strings cut from matched text.
193      */
194     if (type == MAC_PARSE_VARNAME) {
195 	n = atoi(vstring_str(buf));
196 	ret = pcre_get_substring(ctxt->lookup_string, ctxt->offsets,
197 				 ctxt->matches, n, &pp);
198 	if (ret < 0) {
199 	    if (ret == PCRE_ERROR_NOSUBSTRING)
200 		return (MAC_PARSE_UNDEF);
201 	    else
202 		msg_fatal("pcre map %s, line %d: pcre_get_substring error: %d",
203 			dict_pcre->dict.name, match_rule->rule.lineno, ret);
204 	}
205 	if (*pp == 0) {
206 	    myfree((void *) pp);
207 	    return (MAC_PARSE_UNDEF);
208 	}
209 	vstring_strcat(dict_pcre->expansion_buf, pp);
210 	myfree((void *) pp);
211 	return (MAC_PARSE_OK);
212     }
213 
214     /*
215      * Straight text - duplicate with no substitution.
216      */
217     else {
218 	vstring_strcat(dict_pcre->expansion_buf, vstring_str(buf));
219 	return (MAC_PARSE_OK);
220     }
221 }
222 
223 /* dict_pcre_exec_error - report matching error */
224 
225 static void dict_pcre_exec_error(const char *mapname, int lineno, int errval)
226 {
227     switch (errval) {
228 	case 0:
229 	msg_warn("pcre map %s, line %d: too many (...)",
230 		 mapname, lineno);
231 	return;
232     case PCRE_ERROR_NULL:
233     case PCRE_ERROR_BADOPTION:
234 	msg_warn("pcre map %s, line %d: bad args to re_exec",
235 		 mapname, lineno);
236 	return;
237     case PCRE_ERROR_BADMAGIC:
238     case PCRE_ERROR_UNKNOWN_NODE:
239 	msg_warn("pcre map %s, line %d: corrupt compiled regexp",
240 		 mapname, lineno);
241 	return;
242 #ifdef PCRE_ERROR_NOMEMORY
243     case PCRE_ERROR_NOMEMORY:
244 	msg_warn("pcre map %s, line %d: out of memory",
245 		 mapname, lineno);
246 	return;
247 #endif
248 #ifdef PCRE_ERROR_MATCHLIMIT
249     case PCRE_ERROR_MATCHLIMIT:
250 	msg_warn("pcre map %s, line %d: backtracking limit exceeded",
251 		 mapname, lineno);
252 	return;
253 #endif
254 #ifdef PCRE_ERROR_BADUTF8
255     case PCRE_ERROR_BADUTF8:
256 	msg_warn("pcre map %s, line %d: bad UTF-8 sequence in search string",
257 		 mapname, lineno);
258 	return;
259 #endif
260 #ifdef PCRE_ERROR_BADUTF8_OFFSET
261     case PCRE_ERROR_BADUTF8_OFFSET:
262 	msg_warn("pcre map %s, line %d: bad UTF-8 start offset in search string",
263 		 mapname, lineno);
264 	return;
265 #endif
266     default:
267 	msg_warn("pcre map %s, line %d: unknown pcre_exec error: %d",
268 		 mapname, lineno, errval);
269 	return;
270     }
271 }
272 
273  /*
274   * Inlined to reduce function call overhead in the time-critical loop.
275   */
276 #define DICT_PCRE_EXEC(ctxt, map, line, pattern, hints, match, str, len) \
277     ((ctxt).matches = pcre_exec((pattern), (hints), (str), (len), \
278 				NULL_STARTOFFSET, NULL_EXEC_OPTIONS, \
279 				(ctxt).offsets, PCRE_MAX_CAPTURE * 3), \
280      (ctxt).matches > 0 ? (match) : \
281      (ctxt).matches == PCRE_ERROR_NOMATCH ? !(match) : \
282      (dict_pcre_exec_error((map), (line), (ctxt).matches), 0))
283 
284 /* dict_pcre_lookup - match string and perform optional substitution */
285 
286 static const char *dict_pcre_lookup(DICT *dict, const char *lookup_string)
287 {
288     DICT_PCRE *dict_pcre = (DICT_PCRE *) dict;
289     DICT_PCRE_RULE *rule;
290     DICT_PCRE_IF_RULE *if_rule;
291     DICT_PCRE_MATCH_RULE *match_rule;
292     int     lookup_len = strlen(lookup_string);
293     DICT_PCRE_EXPAND_CONTEXT ctxt;
294 
295     dict->error = 0;
296 
297     if (msg_verbose)
298 	msg_info("dict_pcre_lookup: %s: %s", dict->name, lookup_string);
299 
300     /*
301      * Optionally fold the key.
302      */
303     if (dict->flags & DICT_FLAG_FOLD_MUL) {
304 	if (dict->fold_buf == 0)
305 	    dict->fold_buf = vstring_alloc(10);
306 	vstring_strcpy(dict->fold_buf, lookup_string);
307 	lookup_string = lowercase(vstring_str(dict->fold_buf));
308     }
309     for (rule = dict_pcre->head; rule; rule = rule->next) {
310 
311 	switch (rule->op) {
312 
313 	    /*
314 	     * Search for a matching expression.
315 	     */
316 	case DICT_PCRE_OP_MATCH:
317 	    match_rule = (DICT_PCRE_MATCH_RULE *) rule;
318 	    if (!DICT_PCRE_EXEC(ctxt, dict->name, rule->lineno,
319 				match_rule->pattern, match_rule->hints,
320 			      match_rule->match, lookup_string, lookup_len))
321 		continue;
322 
323 	    /*
324 	     * Skip $number substitutions when the replacement text contains
325 	     * no $number strings, as learned during the compile time
326 	     * pre-scan. The pre-scan already replaced $$ by $.
327 	     */
328 	    if (match_rule->max_sub == 0)
329 		return match_rule->replacement;
330 
331 	    /*
332 	     * We've got a match. Perform substitution on replacement string.
333 	     */
334 	    if (dict_pcre->expansion_buf == 0)
335 		dict_pcre->expansion_buf = vstring_alloc(10);
336 	    VSTRING_RESET(dict_pcre->expansion_buf);
337 	    ctxt.dict_pcre = dict_pcre;
338 	    ctxt.match_rule = match_rule;
339 	    ctxt.lookup_string = lookup_string;
340 
341 	    if (mac_parse(match_rule->replacement, dict_pcre_expand,
342 			  (void *) &ctxt) & MAC_PARSE_ERROR)
343 		msg_fatal("pcre map %s, line %d: bad replacement syntax",
344 			  dict->name, rule->lineno);
345 
346 	    VSTRING_TERMINATE(dict_pcre->expansion_buf);
347 	    return (vstring_str(dict_pcre->expansion_buf));
348 
349 	    /*
350 	     * Conditional. XXX We provide space for matched substring info
351 	     * because PCRE uses part of it as workspace for backtracking.
352 	     * PCRE will allocate memory if it runs out of backtracking
353 	     * storage.
354 	     */
355 	case DICT_PCRE_OP_IF:
356 	    if_rule = (DICT_PCRE_IF_RULE *) rule;
357 	    if (DICT_PCRE_EXEC(ctxt, dict->name, rule->lineno,
358 			       if_rule->pattern, if_rule->hints,
359 			       if_rule->match, lookup_string, lookup_len))
360 		continue;
361 	    /* An IF without matching ENDIF has no "endif" rule. */
362 	    if ((rule = if_rule->endif_rule) == 0)
363 		return (0);
364 	    /* FALLTHROUGH */
365 
366 	    /*
367 	     * ENDIF after IF.
368 	     */
369 	case DICT_PCRE_OP_ENDIF:
370 	    continue;
371 
372 	default:
373 	    msg_panic("dict_pcre_lookup: impossible operation %d", rule->op);
374 	}
375     }
376     return (0);
377 }
378 
379 /* dict_pcre_close - close pcre dictionary */
380 
381 static void dict_pcre_close(DICT *dict)
382 {
383     DICT_PCRE *dict_pcre = (DICT_PCRE *) dict;
384     DICT_PCRE_RULE *rule;
385     DICT_PCRE_RULE *next;
386     DICT_PCRE_MATCH_RULE *match_rule;
387     DICT_PCRE_IF_RULE *if_rule;
388 
389     for (rule = dict_pcre->head; rule; rule = next) {
390 	next = rule->next;
391 	switch (rule->op) {
392 	case DICT_PCRE_OP_MATCH:
393 	    match_rule = (DICT_PCRE_MATCH_RULE *) rule;
394 	    if (match_rule->pattern)
395 		myfree((void *) match_rule->pattern);
396 	    if (match_rule->hints)
397 		DICT_PCRE_FREE_STUDY(match_rule->hints);
398 	    if (match_rule->replacement)
399 		myfree((void *) match_rule->replacement);
400 	    break;
401 	case DICT_PCRE_OP_IF:
402 	    if_rule = (DICT_PCRE_IF_RULE *) rule;
403 	    if (if_rule->pattern)
404 		myfree((void *) if_rule->pattern);
405 	    if (if_rule->hints)
406 		DICT_PCRE_FREE_STUDY(if_rule->hints);
407 	    break;
408 	case DICT_PCRE_OP_ENDIF:
409 	    break;
410 	default:
411 	    msg_panic("dict_pcre_close: unknown operation %d", rule->op);
412 	}
413 	myfree((void *) rule);
414     }
415     if (dict_pcre->expansion_buf)
416 	vstring_free(dict_pcre->expansion_buf);
417     if (dict->fold_buf)
418 	vstring_free(dict->fold_buf);
419     dict_free(dict);
420 }
421 
422 /* dict_pcre_get_pattern - extract pattern from rule */
423 
424 static int dict_pcre_get_pattern(const char *mapname, int lineno, char **bufp,
425 				         DICT_PCRE_REGEXP *pattern)
426 {
427     char   *p = *bufp;
428     char    re_delimiter;
429 
430     /*
431      * Process negation operators.
432      */
433     pattern->match = 1;
434     for (;;) {
435 	if (*p == '!')
436 	    pattern->match = !pattern->match;
437 	else if (!ISSPACE(*p))
438 	    break;
439 	p++;
440     }
441     if (*p == 0) {
442 	msg_warn("pcre map %s, line %d: no regexp: skipping this rule",
443 		 mapname, lineno);
444 	return (0);
445     }
446     re_delimiter = *p++;
447     pattern->regexp = p;
448 
449     /*
450      * Search for second delimiter, handling backslash escape.
451      */
452     while (*p) {
453 	if (*p == '\\') {
454 	    ++p;
455 	    if (*p == 0)
456 		break;
457 	} else if (*p == re_delimiter)
458 	    break;
459 	++p;
460     }
461 
462     if (!*p) {
463 	msg_warn("pcre map %s, line %d: no closing regexp delimiter \"%c\": "
464 		 "ignoring this rule", mapname, lineno, re_delimiter);
465 	return (0);
466     }
467     *p++ = 0;					/* Null term the regexp */
468 
469     /*
470      * Parse any regexp options.
471      */
472     pattern->options = PCRE_CASELESS | PCRE_DOTALL;
473     while (*p && !ISSPACE(*p)) {
474 	switch (*p) {
475 	case 'i':
476 	    pattern->options ^= PCRE_CASELESS;
477 	    break;
478 	case 'm':
479 	    pattern->options ^= PCRE_MULTILINE;
480 	    break;
481 	case 's':
482 	    pattern->options ^= PCRE_DOTALL;
483 	    break;
484 	case 'x':
485 	    pattern->options ^= PCRE_EXTENDED;
486 	    break;
487 	case 'A':
488 	    pattern->options ^= PCRE_ANCHORED;
489 	    break;
490 	case 'E':
491 	    pattern->options ^= PCRE_DOLLAR_ENDONLY;
492 	    break;
493 	case 'U':
494 	    pattern->options ^= PCRE_UNGREEDY;
495 	    break;
496 	case 'X':
497 	    pattern->options ^= PCRE_EXTRA;
498 	    break;
499 	default:
500 	    msg_warn("pcre map %s, line %d: unknown regexp option \"%c\": "
501 		     "skipping this rule", mapname, lineno, *p);
502 	    return (0);
503 	}
504 	++p;
505     }
506     *bufp = p;
507     return (1);
508 }
509 
510 /* dict_pcre_prescan - sanity check $number instances in replacement text */
511 
512 static int dict_pcre_prescan(int type, VSTRING *buf, void *context)
513 {
514     DICT_PCRE_PRESCAN_CONTEXT *ctxt = (DICT_PCRE_PRESCAN_CONTEXT *) context;
515     size_t  n;
516 
517     /*
518      * Keep a copy of literal text (with $$ already replaced by $) if and
519      * only if the replacement text contains no $number expression. This way
520      * we can avoid having to scan the replacement text at lookup time.
521      */
522     if (type == MAC_PARSE_VARNAME) {
523 	if (ctxt->literal) {
524 	    myfree(ctxt->literal);
525 	    ctxt->literal = 0;
526 	}
527 	if (!alldig(vstring_str(buf))) {
528 	    msg_warn("pcre map %s, line %d: non-numeric replacement index \"%s\"",
529 		     ctxt->mapname, ctxt->lineno, vstring_str(buf));
530 	    return (MAC_PARSE_ERROR);
531 	}
532 	n = atoi(vstring_str(buf));
533 	if (n < 1) {
534 	    msg_warn("pcre map %s, line %d: out of range replacement index \"%s\"",
535 		     ctxt->mapname, ctxt->lineno, vstring_str(buf));
536 	    return (MAC_PARSE_ERROR);
537 	}
538 	if (n > ctxt->max_sub)
539 	    ctxt->max_sub = n;
540     } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) {
541 	if (ctxt->literal)
542 	    msg_panic("pcre map %s, line %d: multiple literals but no $number",
543 		      ctxt->mapname, ctxt->lineno);
544 	ctxt->literal = mystrdup(vstring_str(buf));
545     }
546     return (MAC_PARSE_OK);
547 }
548 
549 /* dict_pcre_compile - compile pattern */
550 
551 static int dict_pcre_compile(const char *mapname, int lineno,
552 			             DICT_PCRE_REGEXP *pattern,
553 			             DICT_PCRE_ENGINE *engine)
554 {
555     const char *error;
556     int     errptr;
557 
558     engine->pattern = pcre_compile(pattern->regexp, pattern->options,
559 				   &error, &errptr, NULL);
560     if (engine->pattern == 0) {
561 	msg_warn("pcre map %s, line %d: error in regex at offset %d: %s",
562 		 mapname, lineno, errptr, error);
563 	return (0);
564     }
565     engine->hints = pcre_study(engine->pattern, 0, &error);
566     if (error != 0) {
567 	msg_warn("pcre map %s, line %d: error while studying regex: %s",
568 		 mapname, lineno, error);
569 	myfree((void *) engine->pattern);
570 	return (0);
571     }
572     return (1);
573 }
574 
575 /* dict_pcre_rule_alloc - fill in a generic rule structure */
576 
577 static DICT_PCRE_RULE *dict_pcre_rule_alloc(int op, int lineno, size_t size)
578 {
579     DICT_PCRE_RULE *rule;
580 
581     rule = (DICT_PCRE_RULE *) mymalloc(size);
582     rule->op = op;
583     rule->lineno = lineno;
584     rule->next = 0;
585 
586     return (rule);
587 }
588 
589 /* dict_pcre_parse_rule - parse and compile one rule */
590 
591 static DICT_PCRE_RULE *dict_pcre_parse_rule(DICT *dict, const char *mapname,
592 					            int lineno, char *line,
593 					            int nesting)
594 {
595     char   *p;
596     int     actual_sub;
597 
598     p = line;
599 
600     /*
601      * An ordinary match rule takes one pattern and replacement text.
602      */
603     if (!ISALNUM(*p)) {
604 	DICT_PCRE_REGEXP regexp;
605 	DICT_PCRE_ENGINE engine;
606 	DICT_PCRE_PRESCAN_CONTEXT prescan_context;
607 	DICT_PCRE_MATCH_RULE *match_rule;
608 
609 	/*
610 	 * Get the pattern string and options.
611 	 */
612 	if (dict_pcre_get_pattern(mapname, lineno, &p, &regexp) == 0)
613 	    return (0);
614 
615 	/*
616 	 * Get the replacement text.
617 	 */
618 	while (*p && ISSPACE(*p))
619 	    ++p;
620 	if (!*p)
621 	    msg_warn("pcre map %s, line %d: no replacement text: "
622 		     "using empty string", mapname, lineno);
623 
624 	/*
625 	 * Sanity check the $number instances in the replacement text.
626 	 */
627 	prescan_context.mapname = mapname;
628 	prescan_context.lineno = lineno;
629 	prescan_context.max_sub = 0;
630 	prescan_context.literal = 0;
631 
632 	/*
633 	 * The optimizer will eliminate code duplication and/or dead code.
634 	 */
635 #define CREATE_MATCHOP_ERROR_RETURN(rval) do { \
636 	if (prescan_context.literal) \
637 	    myfree(prescan_context.literal); \
638 	return (rval); \
639     } while (0)
640 
641 	if (dict->flags & DICT_FLAG_SRC_RHS_IS_FILE) {
642 	    VSTRING *base64_buf;
643 	    char   *err;
644 
645 	    if ((base64_buf = dict_file_to_b64(dict, p)) == 0) {
646 		err = dict_file_get_error(dict);
647 		msg_warn("pcre map %s, line %d: %s: skipping this rule",
648 			 mapname, lineno, err);
649 		myfree(err);
650 		CREATE_MATCHOP_ERROR_RETURN(0);
651 	    }
652 	    p = vstring_str(base64_buf);
653 	}
654 	if (mac_parse(p, dict_pcre_prescan, (void *) &prescan_context)
655 	    & MAC_PARSE_ERROR) {
656 	    msg_warn("pcre map %s, line %d: bad replacement syntax: "
657 		     "skipping this rule", mapname, lineno);
658 	    CREATE_MATCHOP_ERROR_RETURN(0);
659 	}
660 
661 	/*
662 	 * Substring replacement not possible with negative regexps.
663 	 */
664 	if (prescan_context.max_sub > 0 && regexp.match == 0) {
665 	    msg_warn("pcre map %s, line %d: $number found in negative match "
666 		   "replacement text: skipping this rule", mapname, lineno);
667 	    CREATE_MATCHOP_ERROR_RETURN(0);
668 	}
669 	if (prescan_context.max_sub > 0 && (dict->flags & DICT_FLAG_NO_REGSUB)) {
670 	    msg_warn("pcre map %s, line %d: "
671 		     "regular expression substitution is not allowed: "
672 		     "skipping this rule", mapname, lineno);
673 	    CREATE_MATCHOP_ERROR_RETURN(0);
674 	}
675 
676 	/*
677 	 * Compile the pattern.
678 	 */
679 	if (dict_pcre_compile(mapname, lineno, &regexp, &engine) == 0)
680 	    CREATE_MATCHOP_ERROR_RETURN(0);
681 #ifdef PCRE_INFO_CAPTURECOUNT
682 	if (pcre_fullinfo(engine.pattern, engine.hints,
683 			  PCRE_INFO_CAPTURECOUNT,
684 			  (void *) &actual_sub) != 0)
685 	    msg_panic("pcre map %s, line %d: pcre_fullinfo failed",
686 		      mapname, lineno);
687 	if (prescan_context.max_sub > actual_sub) {
688 	    msg_warn("pcre map %s, line %d: out of range replacement index \"%d\": "
689 		     "skipping this rule", mapname, lineno,
690 		     (int) prescan_context.max_sub);
691 	    if (engine.pattern)
692 		myfree((void *) engine.pattern);
693 	    if (engine.hints)
694 		DICT_PCRE_FREE_STUDY(engine.hints);
695 	    CREATE_MATCHOP_ERROR_RETURN(0);
696 	}
697 #endif
698 
699 	/*
700 	 * Save the result.
701 	 */
702 	match_rule = (DICT_PCRE_MATCH_RULE *)
703 	    dict_pcre_rule_alloc(DICT_PCRE_OP_MATCH, lineno,
704 				 sizeof(DICT_PCRE_MATCH_RULE));
705 	match_rule->match = regexp.match;
706 	match_rule->max_sub = prescan_context.max_sub;
707 	if (prescan_context.literal)
708 	    match_rule->replacement = prescan_context.literal;
709 	else
710 	    match_rule->replacement = mystrdup(p);
711 	match_rule->pattern = engine.pattern;
712 	match_rule->hints = engine.hints;
713 	return ((DICT_PCRE_RULE *) match_rule);
714     }
715 
716     /*
717      * The IF operator takes one pattern but no replacement text.
718      */
719     else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) {
720 	DICT_PCRE_REGEXP regexp;
721 	DICT_PCRE_ENGINE engine;
722 	DICT_PCRE_IF_RULE *if_rule;
723 
724 	p += 2;
725 
726 	/*
727 	 * Get the pattern.
728 	 */
729 	while (*p && ISSPACE(*p))
730 	    p++;
731 	if (!dict_pcre_get_pattern(mapname, lineno, &p, &regexp))
732 	    return (0);
733 
734 	/*
735 	 * Warn about out-of-place text.
736 	 */
737 	while (*p && ISSPACE(*p))
738 	    ++p;
739 	if (*p) {
740 	    msg_warn("pcre map %s, line %d: ignoring extra text after "
741 		     "IF statement: \"%s\"", mapname, lineno, p);
742 	    msg_warn("pcre map %s, line %d: do not prepend whitespace"
743 		     " to statements between IF and ENDIF", mapname, lineno);
744 	}
745 
746 	/*
747 	 * Compile the pattern.
748 	 */
749 	if (dict_pcre_compile(mapname, lineno, &regexp, &engine) == 0)
750 	    return (0);
751 
752 	/*
753 	 * Save the result.
754 	 */
755 	if_rule = (DICT_PCRE_IF_RULE *)
756 	    dict_pcre_rule_alloc(DICT_PCRE_OP_IF, lineno,
757 				 sizeof(DICT_PCRE_IF_RULE));
758 	if_rule->match = regexp.match;
759 	if_rule->pattern = engine.pattern;
760 	if_rule->hints = engine.hints;
761 	if_rule->endif_rule = 0;
762 	return ((DICT_PCRE_RULE *) if_rule);
763     }
764 
765     /*
766      * The ENDIF operator takes no patterns and no replacement text.
767      */
768     else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) {
769 	DICT_PCRE_RULE *rule;
770 
771 	p += 5;
772 
773 	/*
774 	 * Warn about out-of-place ENDIFs.
775 	 */
776 	if (nesting == 0) {
777 	    msg_warn("pcre map %s, line %d: ignoring ENDIF without matching IF",
778 		     mapname, lineno);
779 	    return (0);
780 	}
781 
782 	/*
783 	 * Warn about out-of-place text.
784 	 */
785 	while (*p && ISSPACE(*p))
786 	    ++p;
787 	if (*p)
788 	    msg_warn("pcre map %s, line %d: ignoring extra text after ENDIF",
789 		     mapname, lineno);
790 
791 	/*
792 	 * Save the result.
793 	 */
794 	rule = dict_pcre_rule_alloc(DICT_PCRE_OP_ENDIF, lineno,
795 				    sizeof(DICT_PCRE_RULE));
796 	return (rule);
797     }
798 
799     /*
800      * Unrecognized input.
801      */
802     else {
803 	msg_warn("pcre map %s, line %d: ignoring unrecognized request",
804 		 mapname, lineno);
805 	return (0);
806     }
807 }
808 
809 /* dict_pcre_open - load and compile a file containing regular expressions */
810 
811 DICT   *dict_pcre_open(const char *mapname, int open_flags, int dict_flags)
812 {
813     const char myname[] = "dict_pcre_open";
814     DICT_PCRE *dict_pcre;
815     VSTREAM *map_fp = 0;
816     struct stat st;
817     VSTRING *line_buffer = 0;
818     DICT_PCRE_RULE *last_rule = 0;
819     DICT_PCRE_RULE *rule;
820     int     last_line = 0;
821     int     lineno;
822     int     nesting = 0;
823     char   *p;
824     DICT_PCRE_RULE **rule_stack = 0;
825     MVECT   mvect;
826 
827     /*
828      * Let the optimizer worry about eliminating redundant code.
829      */
830 #define DICT_PCRE_OPEN_RETURN(d) do { \
831 	DICT *__d = (d); \
832 	if (map_fp != 0) \
833 	    vstream_fclose(map_fp); \
834 	if (line_buffer != 0) \
835 	    vstring_free(line_buffer); \
836 	return (__d); \
837     } while (0)
838 
839     /*
840      * Sanity checks.
841      */
842     if (open_flags != O_RDONLY)
843 	DICT_PCRE_OPEN_RETURN(dict_surrogate(DICT_TYPE_PCRE, mapname,
844 					     open_flags, dict_flags,
845 				  "%s:%s map requires O_RDONLY access mode",
846 					     DICT_TYPE_PCRE, mapname));
847 
848     /*
849      * Open the configuration file.
850      */
851     if ((map_fp = vstream_fopen(mapname, O_RDONLY, 0)) == 0)
852 	DICT_PCRE_OPEN_RETURN(dict_surrogate(DICT_TYPE_PCRE, mapname,
853 					     open_flags, dict_flags,
854 					     "open %s: %m", mapname));
855     if (fstat(vstream_fileno(map_fp), &st) < 0)
856 	msg_fatal("fstat %s: %m", mapname);
857 
858     line_buffer = vstring_alloc(100);
859 
860     dict_pcre = (DICT_PCRE *) dict_alloc(DICT_TYPE_PCRE, mapname,
861 					 sizeof(*dict_pcre));
862     dict_pcre->dict.lookup = dict_pcre_lookup;
863     dict_pcre->dict.close = dict_pcre_close;
864     dict_pcre->dict.flags = dict_flags | DICT_FLAG_PATTERN;
865     if (dict_flags & DICT_FLAG_FOLD_MUL)
866 	dict_pcre->dict.fold_buf = vstring_alloc(10);
867     dict_pcre->head = 0;
868     dict_pcre->expansion_buf = 0;
869 
870     if (dict_pcre_init == 0) {
871 	pcre_malloc = (void *(*) (size_t)) mymalloc;
872 	pcre_free = (void (*) (void *)) myfree;
873 	dict_pcre_init = 1;
874     }
875     dict_pcre->dict.owner.uid = st.st_uid;
876     dict_pcre->dict.owner.status = (st.st_uid != 0);
877 
878     /*
879      * Parse the pcre table.
880      */
881     while (readllines(line_buffer, map_fp, &last_line, &lineno)) {
882 	p = vstring_str(line_buffer);
883 	trimblanks(p, 0)[0] = 0;		/* Trim space at end */
884 	if (*p == 0)
885 	    continue;
886 	rule = dict_pcre_parse_rule(&dict_pcre->dict, mapname, lineno,
887 				    p, nesting);
888 	if (rule == 0)
889 	    continue;
890 	if (rule->op == DICT_PCRE_OP_IF) {
891 	    if (rule_stack == 0)
892 		rule_stack = (DICT_PCRE_RULE **) mvect_alloc(&mvect,
893 					   sizeof(*rule_stack), nesting + 1,
894 						(MVECT_FN) 0, (MVECT_FN) 0);
895 	    else
896 		rule_stack =
897 		    (DICT_PCRE_RULE **) mvect_realloc(&mvect, nesting + 1);
898 	    rule_stack[nesting] = rule;
899 	    nesting++;
900 	} else if (rule->op == DICT_PCRE_OP_ENDIF) {
901 	    DICT_PCRE_IF_RULE *if_rule;
902 
903 	    if (nesting-- <= 0)
904 		/* Already handled in dict_pcre_parse_rule(). */
905 		msg_panic("%s: ENDIF without IF", myname);
906 	    if (rule_stack[nesting]->op != DICT_PCRE_OP_IF)
907 		msg_panic("%s: unexpected rule stack element type %d",
908 			  myname, rule_stack[nesting]->op);
909 	    if_rule = (DICT_PCRE_IF_RULE *) rule_stack[nesting];
910 	    if_rule->endif_rule = rule;
911 	}
912 	if (last_rule == 0)
913 	    dict_pcre->head = rule;
914 	else
915 	    last_rule->next = rule;
916 	last_rule = rule;
917     }
918 
919     while (nesting-- > 0)
920 	msg_warn("pcre map %s, line %d: IF has no matching ENDIF",
921 		 mapname, rule_stack[nesting]->lineno);
922 
923     if (rule_stack)
924 	(void) mvect_free(&mvect);
925 
926     dict_file_purge_buffers(&dict_pcre->dict);
927     DICT_PCRE_OPEN_RETURN(DICT_DEBUG (&dict_pcre->dict));
928 }
929 
930 #endif					/* HAS_PCRE */
931