xref: /netbsd-src/external/ibm-public/postfix/dist/src/util/dict_regexp.c (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1 /*	$NetBSD: dict_regexp.c,v 1.1.1.3 2014/07/06 19:27:58 tron Exp $	*/
2 
3 /*++
4 /* NAME
5 /*	dict_regexp 3
6 /* SUMMARY
7 /*	dictionary manager interface to REGEXP regular expression library
8 /* SYNOPSIS
9 /*	#include <dict_regexp.h>
10 /*
11 /*	DICT	*dict_regexp_open(name, dummy, dict_flags)
12 /*	const char *name;
13 /*	int	dummy;
14 /*	int	dict_flags;
15 /* DESCRIPTION
16 /*	dict_regexp_open() opens the named file and compiles the contained
17 /*	regular expressions. The result object can be used to match strings
18 /*	against the table.
19 /* SEE ALSO
20 /*	dict(3) generic dictionary manager
21 /*	regexp_table(5) format of Postfix regular expression tables
22 /* AUTHOR(S)
23 /*	LaMont Jones
24 /*	lamont@hp.com
25 /*
26 /*	Based on PCRE dictionary contributed by Andrew McNamara
27 /*	andrewm@connect.com.au
28 /*	connect.com.au Pty. Ltd.
29 /*	Level 3, 213 Miller St
30 /*	North Sydney, NSW, Australia
31 /*
32 /*	Heavily rewritten by Wietse Venema
33 /*	IBM T.J. Watson Research
34 /*	P.O. Box 704
35 /*	Yorktown Heights, NY 10598, USA
36 /*--*/
37 
38 /* System library. */
39 
40 #include "sys_defs.h"
41 
42 #ifdef HAS_POSIX_REGEXP
43 
44 #include <sys/stat.h>
45 #include <stdlib.h>
46 #include <unistd.h>
47 #include <string.h>
48 #include <ctype.h>
49 #include <regex.h>
50 #ifdef STRCASECMP_IN_STRINGS_H
51 #include <strings.h>
52 #endif
53 
54 /* Utility library. */
55 
56 #include "mymalloc.h"
57 #include "msg.h"
58 #include "safe.h"
59 #include "vstream.h"
60 #include "vstring.h"
61 #include "stringops.h"
62 #include "readlline.h"
63 #include "dict.h"
64 #include "dict_regexp.h"
65 #include "mac_parse.h"
66 #include "warn_stat.h"
67 
68  /*
69   * Support for IF/ENDIF based on an idea by Bert Driehuis.
70   */
71 #define DICT_REGEXP_OP_MATCH	1	/* Match this regexp */
72 #define DICT_REGEXP_OP_IF	2	/* Increase if/endif nesting on match */
73 #define DICT_REGEXP_OP_ENDIF	3	/* Decrease if/endif nesting on match */
74 
75  /*
76   * Regular expression before compiling.
77   */
78 typedef struct {
79     char   *regexp;			/* regular expression */
80     int     options;			/* regcomp() options */
81     int     match;			/* positive or negative match */
82 } DICT_REGEXP_PATTERN;
83 
84  /*
85   * Compiled generic rule, and subclasses that derive from it.
86   */
87 typedef struct DICT_REGEXP_RULE {
88     int     op;				/* DICT_REGEXP_OP_MATCH/IF/ENDIF */
89     int     nesting;			/* Level of search nesting */
90     int     lineno;			/* source file line number */
91     struct DICT_REGEXP_RULE *next;	/* next rule in dict */
92 } DICT_REGEXP_RULE;
93 
94 typedef struct {
95     DICT_REGEXP_RULE rule;		/* generic part */
96     regex_t *first_exp;			/* compiled primary pattern */
97     int     first_match;		/* positive or negative match */
98     regex_t *second_exp;		/* compiled secondary pattern */
99     int     second_match;		/* positive or negative match */
100     char   *replacement;		/* replacement text */
101     size_t  max_sub;			/* largest $number in replacement */
102 } DICT_REGEXP_MATCH_RULE;
103 
104 typedef struct {
105     DICT_REGEXP_RULE rule;		/* generic members */
106     regex_t *expr;			/* the condition */
107     int     match;			/* positive or negative match */
108 } DICT_REGEXP_IF_RULE;
109 
110  /*
111   * Regexp map.
112   */
113 typedef struct {
114     DICT    dict;			/* generic members */
115     regmatch_t *pmatch;			/* matched substring info */
116     DICT_REGEXP_RULE *head;		/* first rule */
117     VSTRING *expansion_buf;		/* lookup result */
118 } DICT_REGEXP;
119 
120  /*
121   * Macros to make dense code more readable.
122   */
123 #define NULL_SUBSTITUTIONS	(0)
124 #define NULL_MATCH_RESULT	((regmatch_t *) 0)
125 
126  /*
127   * Context for $number expansion callback.
128   */
129 typedef struct {
130     DICT_REGEXP *dict_regexp;		/* the dictionary handle */
131     DICT_REGEXP_MATCH_RULE *match_rule;	/* the rule we matched */
132     const char *lookup_string;		/* matched text */
133 } DICT_REGEXP_EXPAND_CONTEXT;
134 
135  /*
136   * Context for $number pre-scan callback.
137   */
138 typedef struct {
139     const char *mapname;		/* name of regexp map */
140     int     lineno;			/* where in file */
141     size_t  max_sub;			/* largest $number seen */
142     char   *literal;			/* constant result, $$ -> $ */
143 } DICT_REGEXP_PRESCAN_CONTEXT;
144 
145  /*
146   * Compatibility.
147   */
148 #ifndef MAC_PARSE_OK
149 #define MAC_PARSE_OK 0
150 #endif
151 
152 /* dict_regexp_expand - replace $number with substring from matched text */
153 
154 static int dict_regexp_expand(int type, VSTRING *buf, char *ptr)
155 {
156     DICT_REGEXP_EXPAND_CONTEXT *ctxt = (DICT_REGEXP_EXPAND_CONTEXT *) ptr;
157     DICT_REGEXP_MATCH_RULE *match_rule = ctxt->match_rule;
158     DICT_REGEXP *dict_regexp = ctxt->dict_regexp;
159     regmatch_t *pmatch;
160     size_t  n;
161 
162     /*
163      * Replace $number by the corresponding substring from the matched text.
164      * We pre-scanned the replacement text at compile time, so any out of
165      * range $number means that something impossible has happened.
166      */
167     if (type == MAC_PARSE_VARNAME) {
168 	n = atoi(vstring_str(buf));
169 	if (n < 1 || n > match_rule->max_sub)
170 	    msg_panic("regexp map %s, line %d: out of range replacement index \"%s\"",
171 		      dict_regexp->dict.name, match_rule->rule.lineno,
172 		      vstring_str(buf));
173 	pmatch = dict_regexp->pmatch + n;
174 	if (pmatch->rm_so < 0 || pmatch->rm_so == pmatch->rm_eo)
175 	    return (MAC_PARSE_UNDEF);		/* empty or not matched */
176 	vstring_strncat(dict_regexp->expansion_buf,
177 			ctxt->lookup_string + pmatch->rm_so,
178 			pmatch->rm_eo - pmatch->rm_so);
179 	return (MAC_PARSE_OK);
180     }
181 
182     /*
183      * Straight text - duplicate with no substitution.
184      */
185     else {
186 	vstring_strcat(dict_regexp->expansion_buf, vstring_str(buf));
187 	return (MAC_PARSE_OK);
188     }
189 }
190 
191 /* dict_regexp_regerror - report regexp compile/execute error */
192 
193 static void dict_regexp_regerror(const char *mapname, int lineno, int error,
194 				         const regex_t *expr)
195 {
196     char    errbuf[256];
197 
198     (void) regerror(error, expr, errbuf, sizeof(errbuf));
199     msg_warn("regexp map %s, line %d: %s", mapname, lineno, errbuf);
200 }
201 
202  /*
203   * Inlined to reduce function call overhead in the time-critical loop.
204   */
205 #define DICT_REGEXP_REGEXEC(err, map, line, expr, match, str, nsub, pmatch) \
206     ((err) = regexec((expr), (str), (nsub), (pmatch), 0), \
207      ((err) == REG_NOMATCH ? !(match) : \
208       (err) == 0 ? (match) : \
209       (dict_regexp_regerror((map), (line), (err), (expr)), 0)))
210 
211 /* dict_regexp_lookup - match string and perform optional substitution */
212 
213 static const char *dict_regexp_lookup(DICT *dict, const char *lookup_string)
214 {
215     DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict;
216     DICT_REGEXP_RULE *rule;
217     DICT_REGEXP_IF_RULE *if_rule;
218     DICT_REGEXP_MATCH_RULE *match_rule;
219     DICT_REGEXP_EXPAND_CONTEXT expand_context;
220     int     error;
221     int     nesting = 0;
222 
223     dict->error = 0;
224 
225     if (msg_verbose)
226 	msg_info("dict_regexp_lookup: %s: %s", dict->name, lookup_string);
227 
228     /*
229      * Optionally fold the key.
230      */
231     if (dict->flags & DICT_FLAG_FOLD_MUL) {
232 	if (dict->fold_buf == 0)
233 	    dict->fold_buf = vstring_alloc(10);
234 	vstring_strcpy(dict->fold_buf, lookup_string);
235 	lookup_string = lowercase(vstring_str(dict->fold_buf));
236     }
237     for (rule = dict_regexp->head; rule; rule = rule->next) {
238 
239 	/*
240 	 * Skip rules inside failed IF/ENDIF.
241 	 */
242 	if (nesting < rule->nesting)
243 	    continue;
244 
245 	switch (rule->op) {
246 
247 	    /*
248 	     * Search for the first matching primary expression. Limit the
249 	     * overhead for substring substitution to the bare minimum.
250 	     */
251 	case DICT_REGEXP_OP_MATCH:
252 	    match_rule = (DICT_REGEXP_MATCH_RULE *) rule;
253 	    if (!DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
254 				     match_rule->first_exp,
255 				     match_rule->first_match,
256 				     lookup_string,
257 				     match_rule->max_sub > 0 ?
258 				     match_rule->max_sub + 1 : 0,
259 				     dict_regexp->pmatch))
260 		continue;
261 	    if (match_rule->second_exp
262 		&& !DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
263 					match_rule->second_exp,
264 					match_rule->second_match,
265 					lookup_string,
266 					NULL_SUBSTITUTIONS,
267 					NULL_MATCH_RESULT))
268 		continue;
269 
270 	    /*
271 	     * Skip $number substitutions when the replacement text contains
272 	     * no $number strings, as learned during the compile time
273 	     * pre-scan. The pre-scan already replaced $$ by $.
274 	     */
275 	    if (match_rule->max_sub == 0)
276 		return (match_rule->replacement);
277 
278 	    /*
279 	     * Perform $number substitutions on the replacement text. We
280 	     * pre-scanned the replacement text at compile time. Any macro
281 	     * expansion errors at this point mean something impossible has
282 	     * happened.
283 	     */
284 	    if (!dict_regexp->expansion_buf)
285 		dict_regexp->expansion_buf = vstring_alloc(10);
286 	    VSTRING_RESET(dict_regexp->expansion_buf);
287 	    expand_context.lookup_string = lookup_string;
288 	    expand_context.match_rule = match_rule;
289 	    expand_context.dict_regexp = dict_regexp;
290 
291 	    if (mac_parse(match_rule->replacement, dict_regexp_expand,
292 			  (char *) &expand_context) & MAC_PARSE_ERROR)
293 		msg_panic("regexp map %s, line %d: bad replacement syntax",
294 			  dict->name, rule->lineno);
295 	    VSTRING_TERMINATE(dict_regexp->expansion_buf);
296 	    return (vstring_str(dict_regexp->expansion_buf));
297 
298 	    /*
299 	     * Conditional.
300 	     */
301 	case DICT_REGEXP_OP_IF:
302 	    if_rule = (DICT_REGEXP_IF_RULE *) rule;
303 	    if (DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
304 			       if_rule->expr, if_rule->match, lookup_string,
305 				    NULL_SUBSTITUTIONS, NULL_MATCH_RESULT))
306 		nesting++;
307 	    continue;
308 
309 	    /*
310 	     * ENDIF after successful IF.
311 	     */
312 	case DICT_REGEXP_OP_ENDIF:
313 	    nesting--;
314 	    continue;
315 
316 	default:
317 	    msg_panic("dict_regexp_lookup: impossible operation %d", rule->op);
318 	}
319     }
320     return (0);
321 }
322 
323 /* dict_regexp_close - close regexp dictionary */
324 
325 static void dict_regexp_close(DICT *dict)
326 {
327     DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict;
328     DICT_REGEXP_RULE *rule;
329     DICT_REGEXP_RULE *next;
330     DICT_REGEXP_MATCH_RULE *match_rule;
331     DICT_REGEXP_IF_RULE *if_rule;
332 
333     for (rule = dict_regexp->head; rule; rule = next) {
334 	next = rule->next;
335 	switch (rule->op) {
336 	case DICT_REGEXP_OP_MATCH:
337 	    match_rule = (DICT_REGEXP_MATCH_RULE *) rule;
338 	    if (match_rule->first_exp) {
339 		regfree(match_rule->first_exp);
340 		myfree((char *) match_rule->first_exp);
341 	    }
342 	    if (match_rule->second_exp) {
343 		regfree(match_rule->second_exp);
344 		myfree((char *) match_rule->second_exp);
345 	    }
346 	    if (match_rule->replacement)
347 		myfree((char *) match_rule->replacement);
348 	    break;
349 	case DICT_REGEXP_OP_IF:
350 	    if_rule = (DICT_REGEXP_IF_RULE *) rule;
351 	    if (if_rule->expr) {
352 		regfree(if_rule->expr);
353 		myfree((char *) if_rule->expr);
354 	    }
355 	    break;
356 	case DICT_REGEXP_OP_ENDIF:
357 	    break;
358 	default:
359 	    msg_panic("dict_regexp_close: unknown operation %d", rule->op);
360 	}
361 	myfree((char *) rule);
362     }
363     if (dict_regexp->pmatch)
364 	myfree((char *) dict_regexp->pmatch);
365     if (dict_regexp->expansion_buf)
366 	vstring_free(dict_regexp->expansion_buf);
367     if (dict->fold_buf)
368 	vstring_free(dict->fold_buf);
369     dict_free(dict);
370 }
371 
372 /* dict_regexp_get_pat - extract one pattern with options from rule */
373 
374 static int dict_regexp_get_pat(const char *mapname, int lineno, char **bufp,
375 			               DICT_REGEXP_PATTERN *pat)
376 {
377     char   *p = *bufp;
378     char    re_delim;
379 
380     /*
381      * Process negation operators.
382      */
383     pat->match = 1;
384     while (*p == '!') {
385 	pat->match = !pat->match;
386 	p++;
387     }
388 
389     /*
390      * Grr...aceful handling of whitespace after '!'.
391      */
392     while (*p && ISSPACE(*p))
393 	p++;
394     if (*p == 0) {
395 	msg_warn("regexp map %s, line %d: no regexp: skipping this rule",
396 		 mapname, lineno);
397 	return (0);
398     }
399 
400     /*
401      * Search for the closing delimiter, handling backslash escape.
402      */
403     re_delim = *p++;
404     pat->regexp = p;
405     while (*p) {
406 	if (*p == '\\') {
407 	    if (p[1])
408 		p++;
409 	    else
410 		break;
411 	} else if (*p == re_delim) {
412 	    break;
413 	}
414 	++p;
415     }
416     if (!*p) {
417 	msg_warn("regexp map %s, line %d: no closing regexp delimiter \"%c\": "
418 		 "skipping this rule", mapname, lineno, re_delim);
419 	return (0);
420     }
421     *p++ = 0;					/* null terminate */
422 
423     /*
424      * Search for options.
425      */
426     pat->options = REG_EXTENDED | REG_ICASE;
427     while (*p && !ISSPACE(*p) && *p != '!') {
428 	switch (*p) {
429 	case 'i':
430 	    pat->options ^= REG_ICASE;
431 	    break;
432 	case 'm':
433 	    pat->options ^= REG_NEWLINE;
434 	    break;
435 	case 'x':
436 	    pat->options ^= REG_EXTENDED;
437 	    break;
438 	default:
439 	    msg_warn("regexp map %s, line %d: unknown regexp option \"%c\": "
440 		     "skipping this rule", mapname, lineno, *p);
441 	    return (0);
442 	}
443 	++p;
444     }
445     *bufp = p;
446     return (1);
447 }
448 
449 /* dict_regexp_get_pats - get the primary and second patterns and flags */
450 
451 static int dict_regexp_get_pats(const char *mapname, int lineno, char **p,
452 				        DICT_REGEXP_PATTERN *first_pat,
453 				        DICT_REGEXP_PATTERN *second_pat)
454 {
455 
456     /*
457      * Get the primary and optional secondary patterns and their flags.
458      */
459     if (dict_regexp_get_pat(mapname, lineno, p, first_pat) == 0)
460 	return (0);
461     if (**p == '!') {
462 #if 0
463 	static int bitrot_warned = 0;
464 
465 	if (bitrot_warned == 0) {
466 	    msg_warn("regexp file %s, line %d: /pattern1/!/pattern2/ goes away,"
467 		 " use \"if !/pattern2/ ... /pattern1/ ... endif\" instead",
468 		     mapname, lineno);
469 	    bitrot_warned = 1;
470 	}
471 #endif
472 	if (dict_regexp_get_pat(mapname, lineno, p, second_pat) == 0)
473 	    return (0);
474     } else {
475 	second_pat->regexp = 0;
476     }
477     return (1);
478 }
479 
480 /* dict_regexp_prescan - find largest $number in replacement text */
481 
482 static int dict_regexp_prescan(int type, VSTRING *buf, char *context)
483 {
484     DICT_REGEXP_PRESCAN_CONTEXT *ctxt = (DICT_REGEXP_PRESCAN_CONTEXT *) context;
485     size_t  n;
486 
487     /*
488      * Keep a copy of literal text (with $$ already replaced by $) if and
489      * only if the replacement text contains no $number expression. This way
490      * we can avoid having to scan the replacement text at lookup time.
491      */
492     if (type == MAC_PARSE_VARNAME) {
493 	if (ctxt->literal) {
494 	    myfree(ctxt->literal);
495 	    ctxt->literal = 0;
496 	}
497 	if (!alldig(vstring_str(buf))) {
498 	    msg_warn("regexp map %s, line %d: non-numeric replacement index \"%s\"",
499 		     ctxt->mapname, ctxt->lineno, vstring_str(buf));
500 	    return (MAC_PARSE_ERROR);
501 	}
502 	n = atoi(vstring_str(buf));
503 	if (n < 1) {
504 	    msg_warn("regexp map %s, line %d: out-of-range replacement index \"%s\"",
505 		     ctxt->mapname, ctxt->lineno, vstring_str(buf));
506 	    return (MAC_PARSE_ERROR);
507 	}
508 	if (n > ctxt->max_sub)
509 	    ctxt->max_sub = n;
510     } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) {
511 	if (ctxt->literal)
512 	    msg_panic("regexp map %s, line %d: multiple literals but no $number",
513 		      ctxt->mapname, ctxt->lineno);
514 	ctxt->literal = mystrdup(vstring_str(buf));
515     }
516     return (MAC_PARSE_OK);
517 }
518 
519 /* dict_regexp_compile_pat - compile one pattern */
520 
521 static regex_t *dict_regexp_compile_pat(const char *mapname, int lineno,
522 					        DICT_REGEXP_PATTERN *pat)
523 {
524     int     error;
525     regex_t *expr;
526 
527     expr = (regex_t *) mymalloc(sizeof(*expr));
528     error = regcomp(expr, pat->regexp, pat->options);
529     if (error != 0) {
530 	dict_regexp_regerror(mapname, lineno, error, expr);
531 	myfree((char *) expr);
532 	return (0);
533     }
534     return (expr);
535 }
536 
537 /* dict_regexp_rule_alloc - fill in a generic rule structure */
538 
539 static DICT_REGEXP_RULE *dict_regexp_rule_alloc(int op, int nesting,
540 						        int lineno,
541 						        size_t size)
542 {
543     DICT_REGEXP_RULE *rule;
544 
545     rule = (DICT_REGEXP_RULE *) mymalloc(size);
546     rule->op = op;
547     rule->nesting = nesting;
548     rule->lineno = lineno;
549     rule->next = 0;
550 
551     return (rule);
552 }
553 
554 /* dict_regexp_parseline - parse one rule */
555 
556 static DICT_REGEXP_RULE *dict_regexp_parseline(const char *mapname, int lineno,
557 					            char *line, int nesting,
558 					               int dict_flags)
559 {
560     char   *p;
561 
562     p = line;
563 
564     /*
565      * An ordinary rule takes one or two patterns and replacement text.
566      */
567     if (!ISALNUM(*p)) {
568 	DICT_REGEXP_PATTERN first_pat;
569 	DICT_REGEXP_PATTERN second_pat;
570 	DICT_REGEXP_PRESCAN_CONTEXT prescan_context;
571 	regex_t *first_exp = 0;
572 	regex_t *second_exp;
573 	DICT_REGEXP_MATCH_RULE *match_rule;
574 
575 	/*
576 	 * Get the primary and the optional secondary patterns.
577 	 */
578 	if (!dict_regexp_get_pats(mapname, lineno, &p, &first_pat, &second_pat))
579 	    return (0);
580 
581 	/*
582 	 * Get the replacement text.
583 	 */
584 	while (*p && ISSPACE(*p))
585 	    ++p;
586 	if (!*p) {
587 	    msg_warn("regexp map %s, line %d: using empty replacement string",
588 		     mapname, lineno);
589 	}
590 
591 	/*
592 	 * Find the highest-numbered $number in the replacement text. We can
593 	 * speed up pattern matching 1) by passing hints to the regexp
594 	 * compiler, setting the REG_NOSUB flag when the replacement text
595 	 * contains no $number string; 2) by passing hints to the regexp
596 	 * execution code, limiting the amount of text that is made available
597 	 * for substitution.
598 	 */
599 	prescan_context.mapname = mapname;
600 	prescan_context.lineno = lineno;
601 	prescan_context.max_sub = 0;
602 	prescan_context.literal = 0;
603 
604 	/*
605 	 * The optimizer will eliminate code duplication and/or dead code.
606 	 */
607 #define CREATE_MATCHOP_ERROR_RETURN(rval) do { \
608 	if (first_exp) { \
609 	    regfree(first_exp); \
610 	    myfree((char *) first_exp); \
611 	} \
612 	if (prescan_context.literal) \
613 	    myfree(prescan_context.literal); \
614 	return (rval); \
615     } while (0)
616 
617 	if (mac_parse(p, dict_regexp_prescan, (char *) &prescan_context)
618 	    & MAC_PARSE_ERROR) {
619 	    msg_warn("regexp map %s, line %d: bad replacement syntax: "
620 		     "skipping this rule", mapname, lineno);
621 	    CREATE_MATCHOP_ERROR_RETURN(0);
622 	}
623 
624 	/*
625 	 * Compile the primary and the optional secondary pattern. Speed up
626 	 * execution when no matched text needs to be substituted into the
627 	 * result string, or when the highest numbered substring is less than
628 	 * the total number of () subpatterns.
629 	 */
630 	if (prescan_context.max_sub == 0)
631 	    first_pat.options |= REG_NOSUB;
632 	if (prescan_context.max_sub > 0 && first_pat.match == 0) {
633 	    msg_warn("regexp map %s, line %d: $number found in negative match "
634 		   "replacement text: skipping this rule", mapname, lineno);
635 	    CREATE_MATCHOP_ERROR_RETURN(0);
636 	}
637 	if (prescan_context.max_sub > 0 && (dict_flags & DICT_FLAG_NO_REGSUB)) {
638 	    msg_warn("regexp map %s, line %d: "
639 		     "regular expression substitution is not allowed: "
640 		     "skipping this rule", mapname, lineno);
641 	    CREATE_MATCHOP_ERROR_RETURN(0);
642 	}
643 	if ((first_exp = dict_regexp_compile_pat(mapname, lineno,
644 						 &first_pat)) == 0)
645 	    CREATE_MATCHOP_ERROR_RETURN(0);
646 	if (prescan_context.max_sub > first_exp->re_nsub) {
647 	    msg_warn("regexp map %s, line %d: out of range replacement index \"%d\": "
648 		     "skipping this rule", mapname, lineno,
649 		     (int) prescan_context.max_sub);
650 	    CREATE_MATCHOP_ERROR_RETURN(0);
651 	}
652 	if (second_pat.regexp != 0) {
653 	    second_pat.options |= REG_NOSUB;
654 	    if ((second_exp = dict_regexp_compile_pat(mapname, lineno,
655 						      &second_pat)) == 0)
656 		CREATE_MATCHOP_ERROR_RETURN(0);
657 	} else {
658 	    second_exp = 0;
659 	}
660 	match_rule = (DICT_REGEXP_MATCH_RULE *)
661 	    dict_regexp_rule_alloc(DICT_REGEXP_OP_MATCH, nesting, lineno,
662 				   sizeof(DICT_REGEXP_MATCH_RULE));
663 	match_rule->first_exp = first_exp;
664 	match_rule->first_match = first_pat.match;
665 	match_rule->max_sub = prescan_context.max_sub;
666 	match_rule->second_exp = second_exp;
667 	match_rule->second_match = second_pat.match;
668 	if (prescan_context.literal)
669 	    match_rule->replacement = prescan_context.literal;
670 	else
671 	    match_rule->replacement = mystrdup(p);
672 	return ((DICT_REGEXP_RULE *) match_rule);
673     }
674 
675     /*
676      * The IF operator takes one pattern but no replacement text.
677      */
678     else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) {
679 	DICT_REGEXP_PATTERN pattern;
680 	regex_t *expr;
681 	DICT_REGEXP_IF_RULE *if_rule;
682 
683 	p += 2;
684 	while (*p && ISSPACE(*p))
685 	    p++;
686 	if (!dict_regexp_get_pat(mapname, lineno, &p, &pattern))
687 	    return (0);
688 	while (*p && ISSPACE(*p))
689 	    ++p;
690 	if (*p) {
691 	    msg_warn("regexp map %s, line %d: ignoring extra text after"
692 		     " IF statement: \"%s\"", mapname, lineno, p);
693 	    msg_warn("regexp map %s, line %d: do not prepend whitespace"
694 		     " to statements between IF and ENDIF", mapname, lineno);
695 	}
696 	if ((expr = dict_regexp_compile_pat(mapname, lineno, &pattern)) == 0)
697 	    return (0);
698 	if_rule = (DICT_REGEXP_IF_RULE *)
699 	    dict_regexp_rule_alloc(DICT_REGEXP_OP_IF, nesting, lineno,
700 				   sizeof(DICT_REGEXP_IF_RULE));
701 	if_rule->expr = expr;
702 	if_rule->match = pattern.match;
703 	return ((DICT_REGEXP_RULE *) if_rule);
704     }
705 
706     /*
707      * The ENDIF operator takes no patterns and no replacement text.
708      */
709     else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) {
710 	DICT_REGEXP_RULE *rule;
711 
712 	p += 5;
713 	if (nesting == 0) {
714 	    msg_warn("regexp map %s, line %d: ignoring ENDIF without matching IF",
715 		     mapname, lineno);
716 	    return (0);
717 	}
718 	while (*p && ISSPACE(*p))
719 	    ++p;
720 	if (*p)
721 	    msg_warn("regexp map %s, line %d: ignoring extra text after ENDIF",
722 		     mapname, lineno);
723 	rule = dict_regexp_rule_alloc(DICT_REGEXP_OP_ENDIF, nesting, lineno,
724 				      sizeof(DICT_REGEXP_RULE));
725 	return (rule);
726     }
727 
728     /*
729      * Unrecognized input.
730      */
731     else {
732 	msg_warn("regexp map %s, line %d: ignoring unrecognized request",
733 		 mapname, lineno);
734 	return (0);
735     }
736 }
737 
738 /* dict_regexp_open - load and compile a file containing regular expressions */
739 
740 DICT   *dict_regexp_open(const char *mapname, int open_flags, int dict_flags)
741 {
742     DICT_REGEXP *dict_regexp;
743     VSTREAM *map_fp = 0;
744     struct stat st;
745     VSTRING *line_buffer = 0;
746     DICT_REGEXP_RULE *rule;
747     DICT_REGEXP_RULE *last_rule = 0;
748     int     lineno = 0;
749     size_t  max_sub = 0;
750     int     nesting = 0;
751     char   *p;
752 
753     /*
754      * Let the optimizer worry about eliminating redundant code.
755      */
756 #define DICT_REGEXP_OPEN_RETURN(d) { \
757 	DICT *__d = (d); \
758 	if (line_buffer != 0) \
759 	    vstring_free(line_buffer); \
760 	if (map_fp != 0) \
761 	    vstream_fclose(map_fp); \
762 	return (__d); \
763     } while (0)
764 
765     /*
766      * Sanity checks.
767      */
768     if (open_flags != O_RDONLY)
769 	DICT_REGEXP_OPEN_RETURN(dict_surrogate(DICT_TYPE_REGEXP,
770 					    mapname, open_flags, dict_flags,
771 				  "%s:%s map requires O_RDONLY access mode",
772 					       DICT_TYPE_REGEXP, mapname));
773 
774     /*
775      * Open the configuration file.
776      */
777     if ((map_fp = vstream_fopen(mapname, O_RDONLY, 0)) == 0)
778 	DICT_REGEXP_OPEN_RETURN(dict_surrogate(DICT_TYPE_REGEXP, mapname,
779 					       open_flags, dict_flags,
780 					       "open %s: %m", mapname));
781     if (fstat(vstream_fileno(map_fp), &st) < 0)
782 	msg_fatal("fstat %s: %m", mapname);
783 
784     line_buffer = vstring_alloc(100);
785 
786     dict_regexp = (DICT_REGEXP *) dict_alloc(DICT_TYPE_REGEXP, mapname,
787 					     sizeof(*dict_regexp));
788     dict_regexp->dict.lookup = dict_regexp_lookup;
789     dict_regexp->dict.close = dict_regexp_close;
790     dict_regexp->dict.flags = dict_flags | DICT_FLAG_PATTERN;
791     if (dict_flags & DICT_FLAG_FOLD_MUL)
792 	dict_regexp->dict.fold_buf = vstring_alloc(10);
793     dict_regexp->head = 0;
794     dict_regexp->pmatch = 0;
795     dict_regexp->expansion_buf = 0;
796     dict_regexp->dict.owner.uid = st.st_uid;
797     dict_regexp->dict.owner.status = (st.st_uid != 0);
798 
799     /*
800      * Parse the regexp table.
801      */
802     while (readlline(line_buffer, map_fp, &lineno)) {
803 	p = vstring_str(line_buffer);
804 	trimblanks(p, 0)[0] = 0;
805 	if (*p == 0)
806 	    continue;
807 	rule = dict_regexp_parseline(mapname, lineno, p, nesting, dict_flags);
808 	if (rule == 0)
809 	    continue;
810 	if (rule->op == DICT_REGEXP_OP_MATCH) {
811 	    if (((DICT_REGEXP_MATCH_RULE *) rule)->max_sub > max_sub)
812 		max_sub = ((DICT_REGEXP_MATCH_RULE *) rule)->max_sub;
813 	} else if (rule->op == DICT_REGEXP_OP_IF) {
814 	    nesting++;
815 	} else if (rule->op == DICT_REGEXP_OP_ENDIF) {
816 	    nesting--;
817 	}
818 	if (last_rule == 0)
819 	    dict_regexp->head = rule;
820 	else
821 	    last_rule->next = rule;
822 	last_rule = rule;
823     }
824 
825     if (nesting)
826 	msg_warn("regexp map %s, line %d: more IFs than ENDIFs",
827 		 mapname, lineno);
828 
829     /*
830      * Allocate space for only as many matched substrings as used in the
831      * replacement text.
832      */
833     if (max_sub > 0)
834 	dict_regexp->pmatch =
835 	    (regmatch_t *) mymalloc(sizeof(regmatch_t) * (max_sub + 1));
836 
837     DICT_REGEXP_OPEN_RETURN(DICT_DEBUG (&dict_regexp->dict));
838 }
839 
840 #endif
841