xref: /netbsd-src/external/ibm-public/postfix/dist/src/util/dict_pcre.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /*	$NetBSD: dict_pcre.c,v 1.2 2017/02/14 01:16:49 christos Exp $	*/
2 
3 /*++
4 /* NAME
5 /*	dict_pcre 3
6 /* SUMMARY
7 /*	dictionary manager interface to PCRE regular expression library
8 /* SYNOPSIS
9 /*	#include <dict_pcre.h>
10 /*
11 /*	DICT	*dict_pcre_open(name, dummy, dict_flags)
12 /*	const char *name;
13 /*	int	dummy;
14 /*	int	dict_flags;
15 /* DESCRIPTION
16 /*	dict_pcre_open() opens the named file and compiles the contained
17 /*	regular expressions. The result object can be used to match strings
18 /*	against the table.
19 /* SEE ALSO
20 /*	dict(3) generic dictionary manager
21 /* AUTHOR(S)
22 /*	Andrew McNamara
23 /*	andrewm@connect.com.au
24 /*	connect.com.au Pty. Ltd.
25 /*	Level 3, 213 Miller St
26 /*	North Sydney, NSW, Australia
27 /*
28 /*	Wietse Venema
29 /*	IBM T.J. Watson Research
30 /*	P.O. Box 704
31 /*	Yorktown Heights, NY 10598, USA
32 /*--*/
33 
34 #include "sys_defs.h"
35 
36 #ifdef HAS_PCRE
37 
38 /* System library. */
39 
40 #include <sys/stat.h>
41 #include <stdio.h>			/* sprintf() prototype */
42 #include <stdlib.h>
43 #include <unistd.h>
44 #include <string.h>
45 #include <ctype.h>
46 
47 #ifdef STRCASECMP_IN_STRINGS_H
48 #include <strings.h>
49 #endif
50 
51 /* Utility library. */
52 
53 #include "mymalloc.h"
54 #include "msg.h"
55 #include "safe.h"
56 #include "vstream.h"
57 #include "vstring.h"
58 #include "stringops.h"
59 #include "readlline.h"
60 #include "dict.h"
61 #include "dict_pcre.h"
62 #include "mac_parse.h"
63 #include "pcre.h"
64 #include "warn_stat.h"
65 
66  /*
67   * Backwards compatibility.
68   */
69 #ifdef PCRE_STUDY_JIT_COMPILE
70 #define DICT_PCRE_FREE_STUDY(x)	pcre_free_study(x)
71 #else
72 #define DICT_PCRE_FREE_STUDY(x)	pcre_free((char *) (x))
73 #endif
74 
75  /*
76   * Support for IF/ENDIF based on an idea by Bert Driehuis.
77   */
78 #define DICT_PCRE_OP_MATCH    1		/* Match this regexp */
79 #define DICT_PCRE_OP_IF       2		/* Increase if/endif nesting on match */
80 #define DICT_PCRE_OP_ENDIF    3		/* Decrease if/endif nesting on match */
81 
82  /*
83   * Max strings captured by regexp - essentially the max number of (..)
84   */
85 #define PCRE_MAX_CAPTURE	99
86 
87  /*
88   * Regular expression before and after compilation.
89   */
90 typedef struct {
91     char   *regexp;			/* regular expression */
92     int     options;			/* options */
93     int     match;			/* positive or negative match */
94 } DICT_PCRE_REGEXP;
95 
96 typedef struct {
97     pcre   *pattern;			/* the compiled pattern */
98     pcre_extra *hints;			/* hints to speed pattern execution */
99 } DICT_PCRE_ENGINE;
100 
101  /*
102   * Compiled generic rule, and subclasses that derive from it.
103   */
104 typedef struct DICT_PCRE_RULE {
105     int     op;				/* DICT_PCRE_OP_MATCH/IF/ENDIF */
106     int     nesting;			/* level of IF/ENDIF nesting */
107     int     lineno;			/* source file line number */
108     struct DICT_PCRE_RULE *next;	/* next rule in dict */
109 } DICT_PCRE_RULE;
110 
111 typedef struct {
112     DICT_PCRE_RULE rule;		/* generic part */
113     pcre   *pattern;			/* compiled pattern */
114     pcre_extra *hints;			/* hints to speed pattern execution */
115     char   *replacement;		/* replacement string */
116     int     match;			/* positive or negative match */
117     size_t  max_sub;			/* largest $number in replacement */
118 } DICT_PCRE_MATCH_RULE;
119 
120 typedef struct {
121     DICT_PCRE_RULE rule;		/* generic members */
122     pcre   *pattern;			/* compiled pattern */
123     pcre_extra *hints;			/* hints to speed pattern execution */
124     int     match;			/* positive or negative match */
125 } DICT_PCRE_IF_RULE;
126 
127  /*
128   * PCRE map.
129   */
130 typedef struct {
131     DICT    dict;			/* generic members */
132     DICT_PCRE_RULE *head;
133     VSTRING *expansion_buf;		/* lookup result */
134 } DICT_PCRE;
135 
136 static int dict_pcre_init = 0;		/* flag need to init pcre library */
137 
138 /*
139  * Context for $number expansion callback.
140  */
141 typedef struct {
142     DICT_PCRE *dict_pcre;		/* the dictionary handle */
143     DICT_PCRE_MATCH_RULE *match_rule;	/* the rule we matched */
144     const char *lookup_string;		/* string against which we match */
145     int     offsets[PCRE_MAX_CAPTURE * 3];	/* Cut substrings */
146     int     matches;			/* Count of cuts */
147 } DICT_PCRE_EXPAND_CONTEXT;
148 
149  /*
150   * Context for $number pre-scan callback.
151   */
152 typedef struct {
153     const char *mapname;		/* name of regexp map */
154     int     lineno;			/* where in file */
155     size_t  max_sub;			/* Largest $n seen */
156     char   *literal;			/* constant result, $$ -> $ */
157 } DICT_PCRE_PRESCAN_CONTEXT;
158 
159  /*
160   * Compatibility.
161   */
162 #ifndef MAC_PARSE_OK
163 #define MAC_PARSE_OK 0
164 #endif
165 
166  /*
167   * Macros to make dense code more accessible.
168   */
169 #define NULL_STARTOFFSET	(0)
170 #define NULL_EXEC_OPTIONS 	(0)
171 #define NULL_OVECTOR		((int *) 0)
172 #define NULL_OVECTOR_LENGTH	(0)
173 
174 /* dict_pcre_expand - replace $number with matched text */
175 
176 static int dict_pcre_expand(int type, VSTRING *buf, void *ptr)
177 {
178     DICT_PCRE_EXPAND_CONTEXT *ctxt = (DICT_PCRE_EXPAND_CONTEXT *) ptr;
179     DICT_PCRE_MATCH_RULE *match_rule = ctxt->match_rule;
180     DICT_PCRE *dict_pcre = ctxt->dict_pcre;
181     const char *pp;
182     int     n;
183     int     ret;
184 
185     /*
186      * Replace $0-${99} with strings cut from matched text.
187      */
188     if (type == MAC_PARSE_VARNAME) {
189 	n = atoi(vstring_str(buf));
190 	ret = pcre_get_substring(ctxt->lookup_string, ctxt->offsets,
191 				 ctxt->matches, n, &pp);
192 	if (ret < 0) {
193 	    if (ret == PCRE_ERROR_NOSUBSTRING)
194 		return (MAC_PARSE_UNDEF);
195 	    else
196 		msg_fatal("pcre map %s, line %d: pcre_get_substring error: %d",
197 			dict_pcre->dict.name, match_rule->rule.lineno, ret);
198 	}
199 	if (*pp == 0) {
200 	    myfree((void *) pp);
201 	    return (MAC_PARSE_UNDEF);
202 	}
203 	vstring_strcat(dict_pcre->expansion_buf, pp);
204 	myfree((void *) pp);
205 	return (MAC_PARSE_OK);
206     }
207 
208     /*
209      * Straight text - duplicate with no substitution.
210      */
211     else {
212 	vstring_strcat(dict_pcre->expansion_buf, vstring_str(buf));
213 	return (MAC_PARSE_OK);
214     }
215 }
216 
217 /* dict_pcre_exec_error - report matching error */
218 
219 static void dict_pcre_exec_error(const char *mapname, int lineno, int errval)
220 {
221     switch (errval) {
222 	case 0:
223 	msg_warn("pcre map %s, line %d: too many (...)",
224 		 mapname, lineno);
225 	return;
226     case PCRE_ERROR_NULL:
227     case PCRE_ERROR_BADOPTION:
228 	msg_warn("pcre map %s, line %d: bad args to re_exec",
229 		 mapname, lineno);
230 	return;
231     case PCRE_ERROR_BADMAGIC:
232     case PCRE_ERROR_UNKNOWN_NODE:
233 	msg_warn("pcre map %s, line %d: corrupt compiled regexp",
234 		 mapname, lineno);
235 	return;
236 #ifdef PCRE_ERROR_NOMEMORY
237     case PCRE_ERROR_NOMEMORY:
238 	msg_warn("pcre map %s, line %d: out of memory",
239 		 mapname, lineno);
240 	return;
241 #endif
242 #ifdef PCRE_ERROR_MATCHLIMIT
243     case PCRE_ERROR_MATCHLIMIT:
244 	msg_warn("pcre map %s, line %d: backtracking limit exceeded",
245 		 mapname, lineno);
246 	return;
247 #endif
248 #ifdef PCRE_ERROR_BADUTF8
249     case PCRE_ERROR_BADUTF8:
250 	msg_warn("pcre map %s, line %d: bad UTF-8 sequence in search string",
251 		 mapname, lineno);
252 	return;
253 #endif
254 #ifdef PCRE_ERROR_BADUTF8_OFFSET
255     case PCRE_ERROR_BADUTF8_OFFSET:
256 	msg_warn("pcre map %s, line %d: bad UTF-8 start offset in search string",
257 		 mapname, lineno);
258 	return;
259 #endif
260     default:
261 	msg_warn("pcre map %s, line %d: unknown re_exec error: %d",
262 		 mapname, lineno, errval);
263 	return;
264     }
265 }
266 
267 /* dict_pcre_lookup - match string and perform optional substitution */
268 
269 static const char *dict_pcre_lookup(DICT *dict, const char *lookup_string)
270 {
271     DICT_PCRE *dict_pcre = (DICT_PCRE *) dict;
272     DICT_PCRE_RULE *rule;
273     DICT_PCRE_IF_RULE *if_rule;
274     DICT_PCRE_MATCH_RULE *match_rule;
275     int     lookup_len = strlen(lookup_string);
276     DICT_PCRE_EXPAND_CONTEXT ctxt;
277     int     nesting = 0;
278 
279     dict->error = 0;
280 
281     if (msg_verbose)
282 	msg_info("dict_pcre_lookup: %s: %s", dict->name, lookup_string);
283 
284     /*
285      * Optionally fold the key.
286      */
287     if (dict->flags & DICT_FLAG_FOLD_MUL) {
288 	if (dict->fold_buf == 0)
289 	    dict->fold_buf = vstring_alloc(10);
290 	vstring_strcpy(dict->fold_buf, lookup_string);
291 	lookup_string = lowercase(vstring_str(dict->fold_buf));
292     }
293     for (rule = dict_pcre->head; rule; rule = rule->next) {
294 
295 	/*
296 	 * Skip rules inside failed IF/ENDIF.
297 	 */
298 	if (nesting < rule->nesting)
299 	    continue;
300 
301 	switch (rule->op) {
302 
303 	    /*
304 	     * Search for a matching expression.
305 	     */
306 	case DICT_PCRE_OP_MATCH:
307 	    match_rule = (DICT_PCRE_MATCH_RULE *) rule;
308 	    ctxt.matches = pcre_exec(match_rule->pattern, match_rule->hints,
309 				     lookup_string, lookup_len,
310 				     NULL_STARTOFFSET, NULL_EXEC_OPTIONS,
311 				     ctxt.offsets, PCRE_MAX_CAPTURE * 3);
312 
313 	    if (ctxt.matches > 0) {
314 		if (!match_rule->match)
315 		    continue;			/* Negative rule matched */
316 	    } else if (ctxt.matches == PCRE_ERROR_NOMATCH) {
317 		if (match_rule->match)
318 		    continue;			/* Positive rule did not
319 						 * match */
320 	    } else {
321 		dict_pcre_exec_error(dict->name, rule->lineno, ctxt.matches);
322 		continue;			/* pcre_exec failed */
323 	    }
324 
325 	    /*
326 	     * Skip $number substitutions when the replacement text contains
327 	     * no $number strings, as learned during the compile time
328 	     * pre-scan. The pre-scan already replaced $$ by $.
329 	     */
330 	    if (match_rule->max_sub == 0)
331 		return match_rule->replacement;
332 
333 	    /*
334 	     * We've got a match. Perform substitution on replacement string.
335 	     */
336 	    if (dict_pcre->expansion_buf == 0)
337 		dict_pcre->expansion_buf = vstring_alloc(10);
338 	    VSTRING_RESET(dict_pcre->expansion_buf);
339 	    ctxt.dict_pcre = dict_pcre;
340 	    ctxt.match_rule = match_rule;
341 	    ctxt.lookup_string = lookup_string;
342 
343 	    if (mac_parse(match_rule->replacement, dict_pcre_expand,
344 			  (void *) &ctxt) & MAC_PARSE_ERROR)
345 		msg_fatal("pcre map %s, line %d: bad replacement syntax",
346 			  dict->name, rule->lineno);
347 
348 	    VSTRING_TERMINATE(dict_pcre->expansion_buf);
349 	    return (vstring_str(dict_pcre->expansion_buf));
350 
351 	    /*
352 	     * Conditional. XXX We provide space for matched substring info
353 	     * because PCRE uses part of it as workspace for backtracking.
354 	     * PCRE will allocate memory if it runs out of backtracking
355 	     * storage.
356 	     */
357 	case DICT_PCRE_OP_IF:
358 	    if_rule = (DICT_PCRE_IF_RULE *) rule;
359 	    ctxt.matches = pcre_exec(if_rule->pattern, if_rule->hints,
360 				     lookup_string, lookup_len,
361 				     NULL_STARTOFFSET, NULL_EXEC_OPTIONS,
362 				     ctxt.offsets, PCRE_MAX_CAPTURE * 3);
363 
364 	    if (ctxt.matches > 0) {
365 		if (!if_rule->match)
366 		    continue;			/* Negative rule matched */
367 	    } else if (ctxt.matches == PCRE_ERROR_NOMATCH) {
368 		if (if_rule->match)
369 		    continue;			/* Positive rule did not
370 						 * match */
371 	    } else {
372 		dict_pcre_exec_error(dict->name, rule->lineno, ctxt.matches);
373 		continue;			/* pcre_exec failed */
374 	    }
375 	    nesting++;
376 	    continue;
377 
378 	    /*
379 	     * ENDIF after successful IF.
380 	     */
381 	case DICT_PCRE_OP_ENDIF:
382 	    nesting--;
383 	    continue;
384 
385 	default:
386 	    msg_panic("dict_pcre_lookup: impossible operation %d", rule->op);
387 	}
388     }
389     return (0);
390 }
391 
392 /* dict_pcre_close - close pcre dictionary */
393 
394 static void dict_pcre_close(DICT *dict)
395 {
396     DICT_PCRE *dict_pcre = (DICT_PCRE *) dict;
397     DICT_PCRE_RULE *rule;
398     DICT_PCRE_RULE *next;
399     DICT_PCRE_MATCH_RULE *match_rule;
400     DICT_PCRE_IF_RULE *if_rule;
401 
402     for (rule = dict_pcre->head; rule; rule = next) {
403 	next = rule->next;
404 	switch (rule->op) {
405 	case DICT_PCRE_OP_MATCH:
406 	    match_rule = (DICT_PCRE_MATCH_RULE *) rule;
407 	    if (match_rule->pattern)
408 		myfree((void *) match_rule->pattern);
409 	    if (match_rule->hints)
410 		DICT_PCRE_FREE_STUDY(match_rule->hints);
411 	    if (match_rule->replacement)
412 		myfree((void *) match_rule->replacement);
413 	    break;
414 	case DICT_PCRE_OP_IF:
415 	    if_rule = (DICT_PCRE_IF_RULE *) rule;
416 	    if (if_rule->pattern)
417 		myfree((void *) if_rule->pattern);
418 	    if (if_rule->hints)
419 		DICT_PCRE_FREE_STUDY(if_rule->hints);
420 	    break;
421 	case DICT_PCRE_OP_ENDIF:
422 	    break;
423 	default:
424 	    msg_panic("dict_pcre_close: unknown operation %d", rule->op);
425 	}
426 	myfree((void *) rule);
427     }
428     if (dict_pcre->expansion_buf)
429 	vstring_free(dict_pcre->expansion_buf);
430     if (dict->fold_buf)
431 	vstring_free(dict->fold_buf);
432     dict_free(dict);
433 }
434 
435 /* dict_pcre_get_pattern - extract pattern from rule */
436 
437 static int dict_pcre_get_pattern(const char *mapname, int lineno, char **bufp,
438 				         DICT_PCRE_REGEXP *pattern)
439 {
440     char   *p = *bufp;
441     char    re_delimiter;
442 
443     /*
444      * Process negation operators.
445      */
446     pattern->match = 1;
447     while (*p == '!') {
448 	pattern->match = !pattern->match;
449 	p++;
450     }
451 
452     /*
453      * Grr...aceful handling of whitespace after '!'.
454      */
455     while (*p && ISSPACE(*p))
456 	p++;
457     if (*p == 0) {
458 	msg_warn("pcre map %s, line %d: no regexp: skipping this rule",
459 		 mapname, lineno);
460 	return (0);
461     }
462     re_delimiter = *p++;
463     pattern->regexp = p;
464 
465     /*
466      * Search for second delimiter, handling backslash escape.
467      */
468     while (*p) {
469 	if (*p == '\\') {
470 	    ++p;
471 	    if (*p == 0)
472 		break;
473 	} else if (*p == re_delimiter)
474 	    break;
475 	++p;
476     }
477 
478     if (!*p) {
479 	msg_warn("pcre map %s, line %d: no closing regexp delimiter \"%c\": "
480 		 "ignoring this rule", mapname, lineno, re_delimiter);
481 	return (0);
482     }
483     *p++ = 0;					/* Null term the regexp */
484 
485     /*
486      * Parse any regexp options.
487      */
488     pattern->options = PCRE_CASELESS | PCRE_DOTALL;
489     while (*p && !ISSPACE(*p)) {
490 	switch (*p) {
491 	case 'i':
492 	    pattern->options ^= PCRE_CASELESS;
493 	    break;
494 	case 'm':
495 	    pattern->options ^= PCRE_MULTILINE;
496 	    break;
497 	case 's':
498 	    pattern->options ^= PCRE_DOTALL;
499 	    break;
500 	case 'x':
501 	    pattern->options ^= PCRE_EXTENDED;
502 	    break;
503 	case 'A':
504 	    pattern->options ^= PCRE_ANCHORED;
505 	    break;
506 	case 'E':
507 	    pattern->options ^= PCRE_DOLLAR_ENDONLY;
508 	    break;
509 	case 'U':
510 	    pattern->options ^= PCRE_UNGREEDY;
511 	    break;
512 	case 'X':
513 	    pattern->options ^= PCRE_EXTRA;
514 	    break;
515 	default:
516 	    msg_warn("pcre map %s, line %d: unknown regexp option \"%c\": "
517 		     "skipping this rule", mapname, lineno, *p);
518 	    return (0);
519 	}
520 	++p;
521     }
522     *bufp = p;
523     return (1);
524 }
525 
526 /* dict_pcre_prescan - sanity check $number instances in replacement text */
527 
528 static int dict_pcre_prescan(int type, VSTRING *buf, void *context)
529 {
530     DICT_PCRE_PRESCAN_CONTEXT *ctxt = (DICT_PCRE_PRESCAN_CONTEXT *) context;
531     size_t  n;
532 
533     /*
534      * Keep a copy of literal text (with $$ already replaced by $) if and
535      * only if the replacement text contains no $number expression. This way
536      * we can avoid having to scan the replacement text at lookup time.
537      */
538     if (type == MAC_PARSE_VARNAME) {
539 	if (ctxt->literal) {
540 	    myfree(ctxt->literal);
541 	    ctxt->literal = 0;
542 	}
543 	if (!alldig(vstring_str(buf))) {
544 	    msg_warn("pcre map %s, line %d: non-numeric replacement index \"%s\"",
545 		     ctxt->mapname, ctxt->lineno, vstring_str(buf));
546 	    return (MAC_PARSE_ERROR);
547 	}
548 	n = atoi(vstring_str(buf));
549 	if (n < 1) {
550 	    msg_warn("pcre map %s, line %d: out of range replacement index \"%s\"",
551 		     ctxt->mapname, ctxt->lineno, vstring_str(buf));
552 	    return (MAC_PARSE_ERROR);
553 	}
554 	if (n > ctxt->max_sub)
555 	    ctxt->max_sub = n;
556     } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) {
557 	if (ctxt->literal)
558 	    msg_panic("pcre map %s, line %d: multiple literals but no $number",
559 		      ctxt->mapname, ctxt->lineno);
560 	ctxt->literal = mystrdup(vstring_str(buf));
561     }
562     return (MAC_PARSE_OK);
563 }
564 
565 /* dict_pcre_compile - compile pattern */
566 
567 static int dict_pcre_compile(const char *mapname, int lineno,
568 			             DICT_PCRE_REGEXP *pattern,
569 			             DICT_PCRE_ENGINE *engine)
570 {
571     const char *error;
572     int     errptr;
573 
574     engine->pattern = pcre_compile(pattern->regexp, pattern->options,
575 				   &error, &errptr, NULL);
576     if (engine->pattern == 0) {
577 	msg_warn("pcre map %s, line %d: error in regex at offset %d: %s",
578 		 mapname, lineno, errptr, error);
579 	return (0);
580     }
581     engine->hints = pcre_study(engine->pattern, 0, &error);
582     if (error != 0) {
583 	msg_warn("pcre map %s, line %d: error while studying regex: %s",
584 		 mapname, lineno, error);
585 	myfree((void *) engine->pattern);
586 	return (0);
587     }
588     return (1);
589 }
590 
591 /* dict_pcre_rule_alloc - fill in a generic rule structure */
592 
593 static DICT_PCRE_RULE *dict_pcre_rule_alloc(int op, int nesting,
594 					            int lineno,
595 					            size_t size)
596 {
597     DICT_PCRE_RULE *rule;
598 
599     rule = (DICT_PCRE_RULE *) mymalloc(size);
600     rule->op = op;
601     rule->nesting = nesting;
602     rule->lineno = lineno;
603     rule->next = 0;
604 
605     return (rule);
606 }
607 
608 /* dict_pcre_parse_rule - parse and compile one rule */
609 
610 static DICT_PCRE_RULE *dict_pcre_parse_rule(const char *mapname, int lineno,
611 					            char *line, int nesting,
612 					            int dict_flags)
613 {
614     char   *p;
615     int     actual_sub;
616 
617     p = line;
618 
619     /*
620      * An ordinary match rule takes one pattern and replacement text.
621      */
622     if (!ISALNUM(*p)) {
623 	DICT_PCRE_REGEXP regexp;
624 	DICT_PCRE_ENGINE engine;
625 	DICT_PCRE_PRESCAN_CONTEXT prescan_context;
626 	DICT_PCRE_MATCH_RULE *match_rule;
627 
628 	/*
629 	 * Get the pattern string and options.
630 	 */
631 	if (dict_pcre_get_pattern(mapname, lineno, &p, &regexp) == 0)
632 	    return (0);
633 
634 	/*
635 	 * Get the replacement text.
636 	 */
637 	while (*p && ISSPACE(*p))
638 	    ++p;
639 	if (!*p)
640 	    msg_warn("%s, line %d: no replacement text: using empty string",
641 		     mapname, lineno);
642 
643 	/*
644 	 * Sanity check the $number instances in the replacement text.
645 	 */
646 	prescan_context.mapname = mapname;
647 	prescan_context.lineno = lineno;
648 	prescan_context.max_sub = 0;
649 	prescan_context.literal = 0;
650 
651 	/*
652 	 * The optimizer will eliminate code duplication and/or dead code.
653 	 */
654 #define CREATE_MATCHOP_ERROR_RETURN(rval) do { \
655 	if (prescan_context.literal) \
656 	    myfree(prescan_context.literal); \
657 	return (rval); \
658     } while (0)
659 
660 	if (mac_parse(p, dict_pcre_prescan, (void *) &prescan_context)
661 	    & MAC_PARSE_ERROR) {
662 	    msg_warn("pcre map %s, line %d: bad replacement syntax: "
663 		     "skipping this rule", mapname, lineno);
664 	    CREATE_MATCHOP_ERROR_RETURN(0);
665 	}
666 
667 	/*
668 	 * Substring replacement not possible with negative regexps.
669 	 */
670 	if (prescan_context.max_sub > 0 && regexp.match == 0) {
671 	    msg_warn("pcre map %s, line %d: $number found in negative match "
672 		   "replacement text: skipping this rule", mapname, lineno);
673 	    CREATE_MATCHOP_ERROR_RETURN(0);
674 	}
675 	if (prescan_context.max_sub > 0 && (dict_flags & DICT_FLAG_NO_REGSUB)) {
676 	    msg_warn("pcre map %s, line %d: "
677 		     "regular expression substitution is not allowed: "
678 		     "skipping this rule", mapname, lineno);
679 	    CREATE_MATCHOP_ERROR_RETURN(0);
680 	}
681 
682 	/*
683 	 * Compile the pattern.
684 	 */
685 	if (dict_pcre_compile(mapname, lineno, &regexp, &engine) == 0)
686 	    CREATE_MATCHOP_ERROR_RETURN(0);
687 #ifdef PCRE_INFO_CAPTURECOUNT
688 	if (pcre_fullinfo(engine.pattern, engine.hints,
689 			  PCRE_INFO_CAPTURECOUNT,
690 			  (void *) &actual_sub) != 0)
691 	    msg_panic("pcre map %s, line %d: pcre_fullinfo failed",
692 		      mapname, lineno);
693 	if (prescan_context.max_sub > actual_sub) {
694 	    msg_warn("pcre map %s, line %d: out of range replacement index \"%d\": "
695 		     "skipping this rule", mapname, lineno,
696 		     (int) prescan_context.max_sub);
697 	    if (engine.pattern)
698 		myfree((void *) engine.pattern);
699 	    if (engine.hints)
700 		DICT_PCRE_FREE_STUDY(engine.hints);
701 	    CREATE_MATCHOP_ERROR_RETURN(0);
702 	}
703 #endif
704 
705 	/*
706 	 * Save the result.
707 	 */
708 	match_rule = (DICT_PCRE_MATCH_RULE *)
709 	    dict_pcre_rule_alloc(DICT_PCRE_OP_MATCH, nesting, lineno,
710 				 sizeof(DICT_PCRE_MATCH_RULE));
711 	match_rule->match = regexp.match;
712 	match_rule->max_sub = prescan_context.max_sub;
713 	if (prescan_context.literal)
714 	    match_rule->replacement = prescan_context.literal;
715 	else
716 	    match_rule->replacement = mystrdup(p);
717 	match_rule->pattern = engine.pattern;
718 	match_rule->hints = engine.hints;
719 	return ((DICT_PCRE_RULE *) match_rule);
720     }
721 
722     /*
723      * The IF operator takes one pattern but no replacement text.
724      */
725     else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) {
726 	DICT_PCRE_REGEXP regexp;
727 	DICT_PCRE_ENGINE engine;
728 	DICT_PCRE_IF_RULE *if_rule;
729 
730 	p += 2;
731 
732 	/*
733 	 * Get the pattern.
734 	 */
735 	while (*p && ISSPACE(*p))
736 	    p++;
737 	if (!dict_pcre_get_pattern(mapname, lineno, &p, &regexp))
738 	    return (0);
739 
740 	/*
741 	 * Warn about out-of-place text.
742 	 */
743 	while (*p && ISSPACE(*p))
744 	    ++p;
745 	if (*p) {
746 	    msg_warn("pcre map %s, line %d: ignoring extra text after "
747 		     "IF statement: \"%s\"", mapname, lineno, p);
748 	    msg_warn("pcre map %s, line %d: do not prepend whitespace"
749 		     " to statements between IF and ENDIF", mapname, lineno);
750 	}
751 
752 	/*
753 	 * Compile the pattern.
754 	 */
755 	if (dict_pcre_compile(mapname, lineno, &regexp, &engine) == 0)
756 	    return (0);
757 
758 	/*
759 	 * Save the result.
760 	 */
761 	if_rule = (DICT_PCRE_IF_RULE *)
762 	    dict_pcre_rule_alloc(DICT_PCRE_OP_IF, nesting, lineno,
763 				 sizeof(DICT_PCRE_IF_RULE));
764 	if_rule->match = regexp.match;
765 	if_rule->pattern = engine.pattern;
766 	if_rule->hints = engine.hints;
767 	return ((DICT_PCRE_RULE *) if_rule);
768     }
769 
770     /*
771      * The ENDIF operator takes no patterns and no replacement text.
772      */
773     else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) {
774 	DICT_PCRE_RULE *rule;
775 
776 	p += 5;
777 
778 	/*
779 	 * Warn about out-of-place ENDIFs.
780 	 */
781 	if (nesting == 0) {
782 	    msg_warn("pcre map %s, line %d: ignoring ENDIF without matching IF",
783 		     mapname, lineno);
784 	    return (0);
785 	}
786 
787 	/*
788 	 * Warn about out-of-place text.
789 	 */
790 	while (*p && ISSPACE(*p))
791 	    ++p;
792 	if (*p)
793 	    msg_warn("pcre map %s, line %d: ignoring extra text after ENDIF",
794 		     mapname, lineno);
795 
796 	/*
797 	 * Save the result.
798 	 */
799 	rule = dict_pcre_rule_alloc(DICT_PCRE_OP_ENDIF, nesting, lineno,
800 				    sizeof(DICT_PCRE_RULE));
801 	return (rule);
802     }
803 
804     /*
805      * Unrecognized input.
806      */
807     else {
808 	msg_warn("pcre map %s, line %d: ignoring unrecognized request",
809 		 mapname, lineno);
810 	return (0);
811     }
812 }
813 
814 /* dict_pcre_open - load and compile a file containing regular expressions */
815 
816 DICT   *dict_pcre_open(const char *mapname, int open_flags, int dict_flags)
817 {
818     DICT_PCRE *dict_pcre;
819     VSTREAM *map_fp = 0;
820     struct stat st;
821     VSTRING *line_buffer = 0;
822     DICT_PCRE_RULE *last_rule = 0;
823     DICT_PCRE_RULE *rule;
824     int     last_line = 0;
825     int     lineno;
826     int     nesting = 0;
827     char   *p;
828 
829     /*
830      * Let the optimizer worry about eliminating redundant code.
831      */
832 #define DICT_PCRE_OPEN_RETURN(d) do { \
833 	DICT *__d = (d); \
834 	if (map_fp != 0) \
835 	    vstream_fclose(map_fp); \
836 	if (line_buffer != 0) \
837 	    vstring_free(line_buffer); \
838 	return (__d); \
839     } while (0)
840 
841     /*
842      * Sanity checks.
843      */
844     if (open_flags != O_RDONLY)
845 	DICT_PCRE_OPEN_RETURN(dict_surrogate(DICT_TYPE_PCRE, mapname,
846 					     open_flags, dict_flags,
847 				  "%s:%s map requires O_RDONLY access mode",
848 					     DICT_TYPE_PCRE, mapname));
849 
850     /*
851      * Open the configuration file.
852      */
853     if ((map_fp = vstream_fopen(mapname, O_RDONLY, 0)) == 0)
854 	DICT_PCRE_OPEN_RETURN(dict_surrogate(DICT_TYPE_PCRE, mapname,
855 					     open_flags, dict_flags,
856 					     "open %s: %m", mapname));
857     if (fstat(vstream_fileno(map_fp), &st) < 0)
858 	msg_fatal("fstat %s: %m", mapname);
859 
860     line_buffer = vstring_alloc(100);
861 
862     dict_pcre = (DICT_PCRE *) dict_alloc(DICT_TYPE_PCRE, mapname,
863 					 sizeof(*dict_pcre));
864     dict_pcre->dict.lookup = dict_pcre_lookup;
865     dict_pcre->dict.close = dict_pcre_close;
866     dict_pcre->dict.flags = dict_flags | DICT_FLAG_PATTERN;
867     if (dict_flags & DICT_FLAG_FOLD_MUL)
868 	dict_pcre->dict.fold_buf = vstring_alloc(10);
869     dict_pcre->head = 0;
870     dict_pcre->expansion_buf = 0;
871 
872     if (dict_pcre_init == 0) {
873 	pcre_malloc = (void *(*) (size_t)) mymalloc;
874 	pcre_free = (void (*) (void *)) myfree;
875 	dict_pcre_init = 1;
876     }
877     dict_pcre->dict.owner.uid = st.st_uid;
878     dict_pcre->dict.owner.status = (st.st_uid != 0);
879 
880     /*
881      * Parse the pcre table.
882      */
883     while (readllines(line_buffer, map_fp, &last_line, &lineno)) {
884 	p = vstring_str(line_buffer);
885 	trimblanks(p, 0)[0] = 0;		/* Trim space at end */
886 	if (*p == 0)
887 	    continue;
888 	rule = dict_pcre_parse_rule(mapname, lineno, p, nesting, dict_flags);
889 	if (rule == 0)
890 	    continue;
891 	if (rule->op == DICT_PCRE_OP_IF) {
892 	    nesting++;
893 	} else if (rule->op == DICT_PCRE_OP_ENDIF) {
894 	    nesting--;
895 	}
896 	if (last_rule == 0)
897 	    dict_pcre->head = rule;
898 	else
899 	    last_rule->next = rule;
900 	last_rule = rule;
901     }
902 
903     if (nesting)
904 	msg_warn("pcre map %s, line %d: more IFs than ENDIFs",
905 		 mapname, lineno);
906 
907     DICT_PCRE_OPEN_RETURN(DICT_DEBUG (&dict_pcre->dict));
908 }
909 
910 #endif					/* HAS_PCRE */
911