xref: /netbsd-src/external/ibm-public/postfix/dist/src/util/dict_pcre.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /*	$NetBSD: dict_pcre.c,v 1.1.1.3 2014/01/18 17:04:24 tron Exp $	*/
2 
3 /*++
4 /* NAME
5 /*	dict_pcre 3
6 /* SUMMARY
7 /*	dictionary manager interface to PCRE regular expression library
8 /* SYNOPSIS
9 /*	#include <dict_pcre.h>
10 /*
11 /*	DICT	*dict_pcre_open(name, dummy, dict_flags)
12 /*	const char *name;
13 /*	int	dummy;
14 /*	int	dict_flags;
15 /* DESCRIPTION
16 /*	dict_pcre_open() opens the named file and compiles the contained
17 /*	regular expressions. The result object can be used to match strings
18 /*	against the table.
19 /* SEE ALSO
20 /*	dict(3) generic dictionary manager
21 /* AUTHOR(S)
22 /*	Andrew McNamara
23 /*	andrewm@connect.com.au
24 /*	connect.com.au Pty. Ltd.
25 /*	Level 3, 213 Miller St
26 /*	North Sydney, NSW, Australia
27 /*
28 /*	Wietse Venema
29 /*	IBM T.J. Watson Research
30 /*	P.O. Box 704
31 /*	Yorktown Heights, NY 10598, USA
32 /*--*/
33 
34 #include "sys_defs.h"
35 
36 #ifdef HAS_PCRE
37 
38 /* System library. */
39 
40 #include <sys/stat.h>
41 #include <stdio.h>			/* sprintf() prototype */
42 #include <stdlib.h>
43 #include <unistd.h>
44 #include <string.h>
45 #include <ctype.h>
46 
47 #ifdef STRCASECMP_IN_STRINGS_H
48 #include <strings.h>
49 #endif
50 
51 /* Utility library. */
52 
53 #include "mymalloc.h"
54 #include "msg.h"
55 #include "safe.h"
56 #include "vstream.h"
57 #include "vstring.h"
58 #include "stringops.h"
59 #include "readlline.h"
60 #include "dict.h"
61 #include "dict_pcre.h"
62 #include "mac_parse.h"
63 #include "pcre.h"
64 #include "warn_stat.h"
65 
66  /*
67   * Backwards compatibility.
68   */
69 #ifdef PCRE_STUDY_JIT_COMPILE
70 #define DICT_PCRE_FREE_STUDY(x)	pcre_free_study(x)
71 #else
72 #define DICT_PCRE_FREE_STUDY(x)	pcre_free((char *) (x))
73 #endif
74 
75  /*
76   * Support for IF/ENDIF based on an idea by Bert Driehuis.
77   */
78 #define DICT_PCRE_OP_MATCH    1		/* Match this regexp */
79 #define DICT_PCRE_OP_IF       2		/* Increase if/endif nesting on match */
80 #define DICT_PCRE_OP_ENDIF    3		/* Decrease if/endif nesting on match */
81 
82  /*
83   * Max strings captured by regexp - essentially the max number of (..)
84   */
85 #define PCRE_MAX_CAPTURE	99
86 
87  /*
88   * Regular expression before and after compilation.
89   */
90 typedef struct {
91     char   *regexp;			/* regular expression */
92     int     options;			/* options */
93     int     match;			/* positive or negative match */
94 } DICT_PCRE_REGEXP;
95 
96 typedef struct {
97     pcre   *pattern;			/* the compiled pattern */
98     pcre_extra *hints;			/* hints to speed pattern execution */
99 } DICT_PCRE_ENGINE;
100 
101  /*
102   * Compiled generic rule, and subclasses that derive from it.
103   */
104 typedef struct DICT_PCRE_RULE {
105     int     op;				/* DICT_PCRE_OP_MATCH/IF/ENDIF */
106     int     nesting;			/* level of IF/ENDIF nesting */
107     int     lineno;			/* source file line number */
108     struct DICT_PCRE_RULE *next;	/* next rule in dict */
109 } DICT_PCRE_RULE;
110 
111 typedef struct {
112     DICT_PCRE_RULE rule;		/* generic part */
113     pcre   *pattern;			/* compiled pattern */
114     pcre_extra *hints;			/* hints to speed pattern execution */
115     char   *replacement;		/* replacement string */
116     int     match;			/* positive or negative match */
117     size_t  max_sub;			/* largest $number in replacement */
118 } DICT_PCRE_MATCH_RULE;
119 
120 typedef struct {
121     DICT_PCRE_RULE rule;		/* generic members */
122     pcre   *pattern;			/* compiled pattern */
123     pcre_extra *hints;			/* hints to speed pattern execution */
124     int     match;			/* positive or negative match */
125 } DICT_PCRE_IF_RULE;
126 
127  /*
128   * PCRE map.
129   */
130 typedef struct {
131     DICT    dict;			/* generic members */
132     DICT_PCRE_RULE *head;
133     VSTRING *expansion_buf;		/* lookup result */
134 } DICT_PCRE;
135 
136 static int dict_pcre_init = 0;		/* flag need to init pcre library */
137 
138 /*
139  * Context for $number expansion callback.
140  */
141 typedef struct {
142     DICT_PCRE *dict_pcre;		/* the dictionary handle */
143     DICT_PCRE_MATCH_RULE *match_rule;	/* the rule we matched */
144     const char *lookup_string;		/* string against which we match */
145     int     offsets[PCRE_MAX_CAPTURE * 3];	/* Cut substrings */
146     int     matches;			/* Count of cuts */
147 } DICT_PCRE_EXPAND_CONTEXT;
148 
149  /*
150   * Context for $number pre-scan callback.
151   */
152 typedef struct {
153     const char *mapname;		/* name of regexp map */
154     int     lineno;			/* where in file */
155     size_t  max_sub;			/* Largest $n seen */
156     char   *literal;			/* constant result, $$ -> $ */
157 } DICT_PCRE_PRESCAN_CONTEXT;
158 
159  /*
160   * Compatibility.
161   */
162 #ifndef MAC_PARSE_OK
163 #define MAC_PARSE_OK 0
164 #endif
165 
166  /*
167   * Macros to make dense code more accessible.
168   */
169 #define NULL_STARTOFFSET	(0)
170 #define NULL_EXEC_OPTIONS 	(0)
171 #define NULL_OVECTOR		((int *) 0)
172 #define NULL_OVECTOR_LENGTH	(0)
173 
174 /* dict_pcre_expand - replace $number with matched text */
175 
176 static int dict_pcre_expand(int type, VSTRING *buf, char *ptr)
177 {
178     DICT_PCRE_EXPAND_CONTEXT *ctxt = (DICT_PCRE_EXPAND_CONTEXT *) ptr;
179     DICT_PCRE_MATCH_RULE *match_rule = ctxt->match_rule;
180     DICT_PCRE *dict_pcre = ctxt->dict_pcre;
181     const char *pp;
182     int     n;
183     int     ret;
184 
185     /*
186      * Replace $0-${99} with strings cut from matched text.
187      */
188     if (type == MAC_PARSE_VARNAME) {
189 	n = atoi(vstring_str(buf));
190 	ret = pcre_get_substring(ctxt->lookup_string, ctxt->offsets,
191 				 ctxt->matches, n, &pp);
192 	if (ret < 0) {
193 	    if (ret == PCRE_ERROR_NOSUBSTRING)
194 		return (MAC_PARSE_UNDEF);
195 	    else
196 		msg_fatal("pcre map %s, line %d: pcre_get_substring error: %d",
197 			dict_pcre->dict.name, match_rule->rule.lineno, ret);
198 	}
199 	if (*pp == 0) {
200 	    myfree((char *) pp);
201 	    return (MAC_PARSE_UNDEF);
202 	}
203 	vstring_strcat(dict_pcre->expansion_buf, pp);
204 	myfree((char *) pp);
205 	return (MAC_PARSE_OK);
206     }
207 
208     /*
209      * Straight text - duplicate with no substitution.
210      */
211     else {
212 	vstring_strcat(dict_pcre->expansion_buf, vstring_str(buf));
213 	return (MAC_PARSE_OK);
214     }
215 }
216 
217 /* dict_pcre_exec_error - report matching error */
218 
219 static void dict_pcre_exec_error(const char *mapname, int lineno, int errval)
220 {
221     switch (errval) {
222 	case 0:
223 	msg_warn("pcre map %s, line %d: too many (...)",
224 		 mapname, lineno);
225 	return;
226     case PCRE_ERROR_NULL:
227     case PCRE_ERROR_BADOPTION:
228 	msg_fatal("pcre map %s, line %d: bad args to re_exec",
229 		  mapname, lineno);
230     case PCRE_ERROR_BADMAGIC:
231     case PCRE_ERROR_UNKNOWN_NODE:
232 	msg_fatal("pcre map %s, line %d: corrupt compiled regexp",
233 		  mapname, lineno);
234 #ifdef PCRE_ERROR_NOMEMORY
235     case PCRE_ERROR_NOMEMORY:
236 	msg_fatal("pcre map %s, line %d: out of memory",
237 		  mapname, lineno);
238 #endif
239 #ifdef PCRE_ERROR_MATCHLIMIT
240     case PCRE_ERROR_MATCHLIMIT:
241 	msg_fatal("pcre map %s, line %d: matched text exceeds buffer limit",
242 		  mapname, lineno);
243 #endif
244 #ifdef PCRE_ERROR_BADUTF8
245     case PCRE_ERROR_BADUTF8:
246 	msg_fatal("pcre map %s, line %d: bad UTF-8 sequence in search string",
247 		  mapname, lineno);
248 #endif
249 #ifdef PCRE_ERROR_BADUTF8_OFFSET
250     case PCRE_ERROR_BADUTF8_OFFSET:
251 	msg_fatal("pcre map %s, line %d: bad UTF-8 start offset in search string",
252 		  mapname, lineno);
253 #endif
254     default:
255 	msg_fatal("pcre map %s, line %d: unknown re_exec error: %d",
256 		  mapname, lineno, errval);
257     }
258 }
259 
260 /* dict_pcre_lookup - match string and perform optional substitution */
261 
262 static const char *dict_pcre_lookup(DICT *dict, const char *lookup_string)
263 {
264     DICT_PCRE *dict_pcre = (DICT_PCRE *) dict;
265     DICT_PCRE_RULE *rule;
266     DICT_PCRE_IF_RULE *if_rule;
267     DICT_PCRE_MATCH_RULE *match_rule;
268     int     lookup_len = strlen(lookup_string);
269     DICT_PCRE_EXPAND_CONTEXT ctxt;
270     int     nesting = 0;
271 
272     dict->error = 0;
273 
274     if (msg_verbose)
275 	msg_info("dict_pcre_lookup: %s: %s", dict->name, lookup_string);
276 
277     /*
278      * Optionally fold the key.
279      */
280     if (dict->flags & DICT_FLAG_FOLD_MUL) {
281 	if (dict->fold_buf == 0)
282 	    dict->fold_buf = vstring_alloc(10);
283 	vstring_strcpy(dict->fold_buf, lookup_string);
284 	lookup_string = lowercase(vstring_str(dict->fold_buf));
285     }
286     for (rule = dict_pcre->head; rule; rule = rule->next) {
287 
288 	/*
289 	 * Skip rules inside failed IF/ENDIF.
290 	 */
291 	if (nesting < rule->nesting)
292 	    continue;
293 
294 	switch (rule->op) {
295 
296 	    /*
297 	     * Search for a matching expression.
298 	     */
299 	case DICT_PCRE_OP_MATCH:
300 	    match_rule = (DICT_PCRE_MATCH_RULE *) rule;
301 	    ctxt.matches = pcre_exec(match_rule->pattern, match_rule->hints,
302 				     lookup_string, lookup_len,
303 				     NULL_STARTOFFSET, NULL_EXEC_OPTIONS,
304 				     ctxt.offsets, PCRE_MAX_CAPTURE * 3);
305 
306 	    if (ctxt.matches > 0) {
307 		if (!match_rule->match)
308 		    continue;			/* Negative rule matched */
309 	    } else if (ctxt.matches == PCRE_ERROR_NOMATCH) {
310 		if (match_rule->match)
311 		    continue;			/* Positive rule did not
312 						 * match */
313 	    } else {
314 		dict_pcre_exec_error(dict->name, rule->lineno, ctxt.matches);
315 		continue;			/* pcre_exec failed */
316 	    }
317 
318 	    /*
319 	     * Skip $number substitutions when the replacement text contains
320 	     * no $number strings, as learned during the compile time
321 	     * pre-scan. The pre-scan already replaced $$ by $.
322 	     */
323 	    if (match_rule->max_sub == 0)
324 		return match_rule->replacement;
325 
326 	    /*
327 	     * We've got a match. Perform substitution on replacement string.
328 	     */
329 	    if (dict_pcre->expansion_buf == 0)
330 		dict_pcre->expansion_buf = vstring_alloc(10);
331 	    VSTRING_RESET(dict_pcre->expansion_buf);
332 	    ctxt.dict_pcre = dict_pcre;
333 	    ctxt.match_rule = match_rule;
334 	    ctxt.lookup_string = lookup_string;
335 
336 	    if (mac_parse(match_rule->replacement, dict_pcre_expand,
337 			  (char *) &ctxt) & MAC_PARSE_ERROR)
338 		msg_fatal("pcre map %s, line %d: bad replacement syntax",
339 			  dict->name, rule->lineno);
340 
341 	    VSTRING_TERMINATE(dict_pcre->expansion_buf);
342 	    return (vstring_str(dict_pcre->expansion_buf));
343 
344 	    /*
345 	     * Conditional. XXX We provide space for matched substring info
346 	     * because PCRE uses part of it as workspace for backtracking.
347 	     * PCRE will allocate memory if it runs out of backtracking
348 	     * storage.
349 	     */
350 	case DICT_PCRE_OP_IF:
351 	    if_rule = (DICT_PCRE_IF_RULE *) rule;
352 	    ctxt.matches = pcre_exec(if_rule->pattern, if_rule->hints,
353 				     lookup_string, lookup_len,
354 				     NULL_STARTOFFSET, NULL_EXEC_OPTIONS,
355 				     ctxt.offsets, PCRE_MAX_CAPTURE * 3);
356 
357 	    if (ctxt.matches > 0) {
358 		if (!if_rule->match)
359 		    continue;			/* Negative rule matched */
360 	    } else if (ctxt.matches == PCRE_ERROR_NOMATCH) {
361 		if (if_rule->match)
362 		    continue;			/* Positive rule did not
363 						 * match */
364 	    } else {
365 		dict_pcre_exec_error(dict->name, rule->lineno, ctxt.matches);
366 		continue;			/* pcre_exec failed */
367 	    }
368 	    nesting++;
369 	    continue;
370 
371 	    /*
372 	     * ENDIF after successful IF.
373 	     */
374 	case DICT_PCRE_OP_ENDIF:
375 	    nesting--;
376 	    continue;
377 
378 	default:
379 	    msg_panic("dict_pcre_lookup: impossible operation %d", rule->op);
380 	}
381     }
382     return (0);
383 }
384 
385 /* dict_pcre_close - close pcre dictionary */
386 
387 static void dict_pcre_close(DICT *dict)
388 {
389     DICT_PCRE *dict_pcre = (DICT_PCRE *) dict;
390     DICT_PCRE_RULE *rule;
391     DICT_PCRE_RULE *next;
392     DICT_PCRE_MATCH_RULE *match_rule;
393     DICT_PCRE_IF_RULE *if_rule;
394 
395     for (rule = dict_pcre->head; rule; rule = next) {
396 	next = rule->next;
397 	switch (rule->op) {
398 	case DICT_PCRE_OP_MATCH:
399 	    match_rule = (DICT_PCRE_MATCH_RULE *) rule;
400 	    if (match_rule->pattern)
401 		myfree((char *) match_rule->pattern);
402 	    if (match_rule->hints)
403 		DICT_PCRE_FREE_STUDY(match_rule->hints);
404 	    if (match_rule->replacement)
405 		myfree((char *) match_rule->replacement);
406 	    break;
407 	case DICT_PCRE_OP_IF:
408 	    if_rule = (DICT_PCRE_IF_RULE *) rule;
409 	    if (if_rule->pattern)
410 		myfree((char *) if_rule->pattern);
411 	    if (if_rule->hints)
412 		DICT_PCRE_FREE_STUDY(if_rule->hints);
413 	    break;
414 	case DICT_PCRE_OP_ENDIF:
415 	    break;
416 	default:
417 	    msg_panic("dict_pcre_close: unknown operation %d", rule->op);
418 	}
419 	myfree((char *) rule);
420     }
421     if (dict_pcre->expansion_buf)
422 	vstring_free(dict_pcre->expansion_buf);
423     if (dict->fold_buf)
424 	vstring_free(dict->fold_buf);
425     dict_free(dict);
426 }
427 
428 /* dict_pcre_get_pattern - extract pattern from rule */
429 
430 static int dict_pcre_get_pattern(const char *mapname, int lineno, char **bufp,
431 				         DICT_PCRE_REGEXP *pattern)
432 {
433     char   *p = *bufp;
434     char    re_delimiter;
435 
436     /*
437      * Process negation operators.
438      */
439     pattern->match = 1;
440     while (*p == '!') {
441 	pattern->match = !pattern->match;
442 	p++;
443     }
444 
445     /*
446      * Grr...aceful handling of whitespace after '!'.
447      */
448     while (*p && ISSPACE(*p))
449 	p++;
450     if (*p == 0) {
451 	msg_warn("pcre map %s, line %d: no regexp: skipping this rule",
452 		 mapname, lineno);
453 	return (0);
454     }
455     re_delimiter = *p++;
456     pattern->regexp = p;
457 
458     /*
459      * Search for second delimiter, handling backslash escape.
460      */
461     while (*p) {
462 	if (*p == '\\') {
463 	    ++p;
464 	    if (*p == 0)
465 		break;
466 	} else if (*p == re_delimiter)
467 	    break;
468 	++p;
469     }
470 
471     if (!*p) {
472 	msg_warn("pcre map %s, line %d: no closing regexp delimiter \"%c\": "
473 		 "ignoring this rule", mapname, lineno, re_delimiter);
474 	return (0);
475     }
476     *p++ = 0;					/* Null term the regexp */
477 
478     /*
479      * Parse any regexp options.
480      */
481     pattern->options = PCRE_CASELESS | PCRE_DOTALL;
482     while (*p && !ISSPACE(*p)) {
483 	switch (*p) {
484 	case 'i':
485 	    pattern->options ^= PCRE_CASELESS;
486 	    break;
487 	case 'm':
488 	    pattern->options ^= PCRE_MULTILINE;
489 	    break;
490 	case 's':
491 	    pattern->options ^= PCRE_DOTALL;
492 	    break;
493 	case 'x':
494 	    pattern->options ^= PCRE_EXTENDED;
495 	    break;
496 	case 'A':
497 	    pattern->options ^= PCRE_ANCHORED;
498 	    break;
499 	case 'E':
500 	    pattern->options ^= PCRE_DOLLAR_ENDONLY;
501 	    break;
502 	case 'U':
503 	    pattern->options ^= PCRE_UNGREEDY;
504 	    break;
505 	case 'X':
506 	    pattern->options ^= PCRE_EXTRA;
507 	    break;
508 	default:
509 	    msg_warn("pcre map %s, line %d: unknown regexp option \"%c\": "
510 		     "skipping this rule", mapname, lineno, *p);
511 	    return (0);
512 	}
513 	++p;
514     }
515     *bufp = p;
516     return (1);
517 }
518 
519 /* dict_pcre_prescan - sanity check $number instances in replacement text */
520 
521 static int dict_pcre_prescan(int type, VSTRING *buf, char *context)
522 {
523     DICT_PCRE_PRESCAN_CONTEXT *ctxt = (DICT_PCRE_PRESCAN_CONTEXT *) context;
524     size_t  n;
525 
526     /*
527      * Keep a copy of literal text (with $$ already replaced by $) if and
528      * only if the replacement text contains no $number expression. This way
529      * we can avoid having to scan the replacement text at lookup time.
530      */
531     if (type == MAC_PARSE_VARNAME) {
532 	if (ctxt->literal) {
533 	    myfree(ctxt->literal);
534 	    ctxt->literal = 0;
535 	}
536 	if (!alldig(vstring_str(buf))) {
537 	    msg_warn("pcre map %s, line %d: non-numeric replacement index \"%s\"",
538 		     ctxt->mapname, ctxt->lineno, vstring_str(buf));
539 	    return (MAC_PARSE_ERROR);
540 	}
541 	n = atoi(vstring_str(buf));
542 	if (n < 1) {
543 	    msg_warn("pcre map %s, line %d: out of range replacement index \"%s\"",
544 		     ctxt->mapname, ctxt->lineno, vstring_str(buf));
545 	    return (MAC_PARSE_ERROR);
546 	}
547 	if (n > ctxt->max_sub)
548 	    ctxt->max_sub = n;
549     } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) {
550 	if (ctxt->literal)
551 	    msg_panic("pcre map %s, line %d: multiple literals but no $number",
552 		      ctxt->mapname, ctxt->lineno);
553 	ctxt->literal = mystrdup(vstring_str(buf));
554     }
555     return (MAC_PARSE_OK);
556 }
557 
558 /* dict_pcre_compile - compile pattern */
559 
560 static int dict_pcre_compile(const char *mapname, int lineno,
561 			             DICT_PCRE_REGEXP *pattern,
562 			             DICT_PCRE_ENGINE *engine)
563 {
564     const char *error;
565     int     errptr;
566 
567     engine->pattern = pcre_compile(pattern->regexp, pattern->options,
568 				   &error, &errptr, NULL);
569     if (engine->pattern == 0) {
570 	msg_warn("pcre map %s, line %d: error in regex at offset %d: %s",
571 		 mapname, lineno, errptr, error);
572 	return (0);
573     }
574     engine->hints = pcre_study(engine->pattern, 0, &error);
575     if (error != 0) {
576 	msg_warn("pcre map %s, line %d: error while studying regex: %s",
577 		 mapname, lineno, error);
578 	myfree((char *) engine->pattern);
579 	return (0);
580     }
581     return (1);
582 }
583 
584 /* dict_pcre_rule_alloc - fill in a generic rule structure */
585 
586 static DICT_PCRE_RULE *dict_pcre_rule_alloc(int op, int nesting,
587 					            int lineno,
588 					            size_t size)
589 {
590     DICT_PCRE_RULE *rule;
591 
592     rule = (DICT_PCRE_RULE *) mymalloc(size);
593     rule->op = op;
594     rule->nesting = nesting;
595     rule->lineno = lineno;
596     rule->next = 0;
597 
598     return (rule);
599 }
600 
601 /* dict_pcre_parse_rule - parse and compile one rule */
602 
603 static DICT_PCRE_RULE *dict_pcre_parse_rule(const char *mapname, int lineno,
604 					            char *line, int nesting,
605 					            int dict_flags)
606 {
607     char   *p;
608     int     actual_sub;
609 
610     p = line;
611 
612     /*
613      * An ordinary match rule takes one pattern and replacement text.
614      */
615     if (!ISALNUM(*p)) {
616 	DICT_PCRE_REGEXP regexp;
617 	DICT_PCRE_ENGINE engine;
618 	DICT_PCRE_PRESCAN_CONTEXT prescan_context;
619 	DICT_PCRE_MATCH_RULE *match_rule;
620 
621 	/*
622 	 * Get the pattern string and options.
623 	 */
624 	if (dict_pcre_get_pattern(mapname, lineno, &p, &regexp) == 0)
625 	    return (0);
626 
627 	/*
628 	 * Get the replacement text.
629 	 */
630 	while (*p && ISSPACE(*p))
631 	    ++p;
632 	if (!*p)
633 	    msg_warn("%s, line %d: no replacement text: using empty string",
634 		     mapname, lineno);
635 
636 	/*
637 	 * Sanity check the $number instances in the replacement text.
638 	 */
639 	prescan_context.mapname = mapname;
640 	prescan_context.lineno = lineno;
641 	prescan_context.max_sub = 0;
642 	prescan_context.literal = 0;
643 
644 	/*
645 	 * The optimizer will eliminate code duplication and/or dead code.
646 	 */
647 #define CREATE_MATCHOP_ERROR_RETURN(rval) do { \
648 	if (prescan_context.literal) \
649 	    myfree(prescan_context.literal); \
650 	return (rval); \
651     } while (0)
652 
653 	if (mac_parse(p, dict_pcre_prescan, (char *) &prescan_context)
654 	    & MAC_PARSE_ERROR) {
655 	    msg_warn("pcre map %s, line %d: bad replacement syntax: "
656 		     "skipping this rule", mapname, lineno);
657 	    CREATE_MATCHOP_ERROR_RETURN(0);
658 	}
659 
660 	/*
661 	 * Substring replacement not possible with negative regexps.
662 	 */
663 	if (prescan_context.max_sub > 0 && regexp.match == 0) {
664 	    msg_warn("pcre map %s, line %d: $number found in negative match "
665 		   "replacement text: skipping this rule", mapname, lineno);
666 	    CREATE_MATCHOP_ERROR_RETURN(0);
667 	}
668 	if (prescan_context.max_sub > 0 && (dict_flags & DICT_FLAG_NO_REGSUB)) {
669 	    msg_warn("pcre map %s, line %d: "
670 		     "regular expression substitution is not allowed: "
671 		     "skipping this rule", mapname, lineno);
672 	    CREATE_MATCHOP_ERROR_RETURN(0);
673 	}
674 
675 	/*
676 	 * Compile the pattern.
677 	 */
678 	if (dict_pcre_compile(mapname, lineno, &regexp, &engine) == 0)
679 	    CREATE_MATCHOP_ERROR_RETURN(0);
680 #ifdef PCRE_INFO_CAPTURECOUNT
681 	if (pcre_fullinfo(engine.pattern, engine.hints,
682 			  PCRE_INFO_CAPTURECOUNT,
683 			  (void *) &actual_sub) != 0)
684 	    msg_panic("pcre map %s, line %d: pcre_fullinfo failed",
685 		      mapname, lineno);
686 	if (prescan_context.max_sub > actual_sub) {
687 	    msg_warn("pcre map %s, line %d: out of range replacement index \"%d\": "
688 		     "skipping this rule", mapname, lineno,
689 		     (int) prescan_context.max_sub);
690 	    if (engine.pattern)
691 		myfree((char *) engine.pattern);
692 	    if (engine.hints)
693 		DICT_PCRE_FREE_STUDY(engine.hints);
694 	    CREATE_MATCHOP_ERROR_RETURN(0);
695 	}
696 #endif
697 
698 	/*
699 	 * Save the result.
700 	 */
701 	match_rule = (DICT_PCRE_MATCH_RULE *)
702 	    dict_pcre_rule_alloc(DICT_PCRE_OP_MATCH, nesting, lineno,
703 				 sizeof(DICT_PCRE_MATCH_RULE));
704 	match_rule->match = regexp.match;
705 	match_rule->max_sub = prescan_context.max_sub;
706 	if (prescan_context.literal)
707 	    match_rule->replacement = prescan_context.literal;
708 	else
709 	    match_rule->replacement = mystrdup(p);
710 	match_rule->pattern = engine.pattern;
711 	match_rule->hints = engine.hints;
712 	return ((DICT_PCRE_RULE *) match_rule);
713     }
714 
715     /*
716      * The IF operator takes one pattern but no replacement text.
717      */
718     else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) {
719 	DICT_PCRE_REGEXP regexp;
720 	DICT_PCRE_ENGINE engine;
721 	DICT_PCRE_IF_RULE *if_rule;
722 
723 	p += 2;
724 
725 	/*
726 	 * Get the pattern.
727 	 */
728 	while (*p && ISSPACE(*p))
729 	    p++;
730 	if (!dict_pcre_get_pattern(mapname, lineno, &p, &regexp))
731 	    return (0);
732 
733 	/*
734 	 * Warn about out-of-place text.
735 	 */
736 	while (*p && ISSPACE(*p))
737 	    ++p;
738 	if (*p) {
739 	    msg_warn("pcre map %s, line %d: ignoring extra text after "
740 		     "IF statement: \"%s\"", mapname, lineno, p);
741 	    msg_warn("pcre map %s, line %d: do not prepend whitespace"
742 		     " to statements between IF and ENDIF", mapname, lineno);
743 	}
744 
745 	/*
746 	 * Compile the pattern.
747 	 */
748 	if (dict_pcre_compile(mapname, lineno, &regexp, &engine) == 0)
749 	    return (0);
750 
751 	/*
752 	 * Save the result.
753 	 */
754 	if_rule = (DICT_PCRE_IF_RULE *)
755 	    dict_pcre_rule_alloc(DICT_PCRE_OP_IF, nesting, lineno,
756 				 sizeof(DICT_PCRE_IF_RULE));
757 	if_rule->match = regexp.match;
758 	if_rule->pattern = engine.pattern;
759 	if_rule->hints = engine.hints;
760 	return ((DICT_PCRE_RULE *) if_rule);
761     }
762 
763     /*
764      * The ENDIF operator takes no patterns and no replacement text.
765      */
766     else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) {
767 	DICT_PCRE_RULE *rule;
768 
769 	p += 5;
770 
771 	/*
772 	 * Warn about out-of-place ENDIFs.
773 	 */
774 	if (nesting == 0) {
775 	    msg_warn("pcre map %s, line %d: ignoring ENDIF without matching IF",
776 		     mapname, lineno);
777 	    return (0);
778 	}
779 
780 	/*
781 	 * Warn about out-of-place text.
782 	 */
783 	while (*p && ISSPACE(*p))
784 	    ++p;
785 	if (*p)
786 	    msg_warn("pcre map %s, line %d: ignoring extra text after ENDIF",
787 		     mapname, lineno);
788 
789 	/*
790 	 * Save the result.
791 	 */
792 	rule = dict_pcre_rule_alloc(DICT_PCRE_OP_ENDIF, nesting, lineno,
793 				    sizeof(DICT_PCRE_RULE));
794 	return (rule);
795     }
796 
797     /*
798      * Unrecognized input.
799      */
800     else {
801 	msg_warn("pcre map %s, line %d: ignoring unrecognized request",
802 		 mapname, lineno);
803 	return (0);
804     }
805 }
806 
807 /* dict_pcre_open - load and compile a file containing regular expressions */
808 
809 DICT   *dict_pcre_open(const char *mapname, int open_flags, int dict_flags)
810 {
811     DICT_PCRE *dict_pcre;
812     VSTREAM *map_fp;
813     struct stat st;
814     VSTRING *line_buffer;
815     DICT_PCRE_RULE *last_rule = 0;
816     DICT_PCRE_RULE *rule;
817     int     lineno = 0;
818     int     nesting = 0;
819     char   *p;
820 
821     /*
822      * Sanity checks.
823      */
824     if (open_flags != O_RDONLY)
825 	return (dict_surrogate(DICT_TYPE_PCRE, mapname, open_flags, dict_flags,
826 			       "%s:%s map requires O_RDONLY access mode",
827 			       DICT_TYPE_PCRE, mapname));
828 
829     /*
830      * Open the configuration file.
831      */
832     if ((map_fp = vstream_fopen(mapname, O_RDONLY, 0)) == 0)
833 	return (dict_surrogate(DICT_TYPE_PCRE, mapname, open_flags, dict_flags,
834 			       "open %s: %m", mapname));
835     if (fstat(vstream_fileno(map_fp), &st) < 0)
836 	msg_fatal("fstat %s: %m", mapname);
837 
838     line_buffer = vstring_alloc(100);
839 
840     dict_pcre = (DICT_PCRE *) dict_alloc(DICT_TYPE_PCRE, mapname,
841 					 sizeof(*dict_pcre));
842     dict_pcre->dict.lookup = dict_pcre_lookup;
843     dict_pcre->dict.close = dict_pcre_close;
844     dict_pcre->dict.flags = dict_flags | DICT_FLAG_PATTERN;
845     if (dict_flags & DICT_FLAG_FOLD_MUL)
846 	dict_pcre->dict.fold_buf = vstring_alloc(10);
847     dict_pcre->head = 0;
848     dict_pcre->expansion_buf = 0;
849 
850     if (dict_pcre_init == 0) {
851 	pcre_malloc = (void *(*) (size_t)) mymalloc;
852 	pcre_free = (void (*) (void *)) myfree;
853 	dict_pcre_init = 1;
854     }
855     dict_pcre->dict.owner.uid = st.st_uid;
856     dict_pcre->dict.owner.status = (st.st_uid != 0);
857 
858     /*
859      * Parse the pcre table.
860      */
861     while (readlline(line_buffer, map_fp, &lineno)) {
862 	p = vstring_str(line_buffer);
863 	trimblanks(p, 0)[0] = 0;		/* Trim space at end */
864 	if (*p == 0)
865 	    continue;
866 	rule = dict_pcre_parse_rule(mapname, lineno, p, nesting, dict_flags);
867 	if (rule == 0)
868 	    continue;
869 	if (rule->op == DICT_PCRE_OP_IF) {
870 	    nesting++;
871 	} else if (rule->op == DICT_PCRE_OP_ENDIF) {
872 	    nesting--;
873 	}
874 	if (last_rule == 0)
875 	    dict_pcre->head = rule;
876 	else
877 	    last_rule->next = rule;
878 	last_rule = rule;
879     }
880 
881     if (nesting)
882 	msg_warn("pcre map %s, line %d: more IFs than ENDIFs",
883 		 mapname, lineno);
884 
885     vstring_free(line_buffer);
886     vstream_fclose(map_fp);
887 
888     return (DICT_DEBUG (&dict_pcre->dict));
889 }
890 
891 #endif					/* HAS_PCRE */
892