xref: /netbsd-src/external/ibm-public/postfix/dist/src/util/dict_regexp.c (revision b1c86f5f087524e68db12794ee9c3e3da1ab17a0)
1 /*	$NetBSD: dict_regexp.c,v 1.1.1.1 2009/06/23 10:08:59 tron Exp $	*/
2 
3 /*++
4 /* NAME
5 /*	dict_regexp 3
6 /* SUMMARY
7 /*	dictionary manager interface to REGEXP regular expression library
8 /* SYNOPSIS
9 /*	#include <dict_regexp.h>
10 /*
11 /*	DICT	*dict_regexp_open(name, dummy, dict_flags)
12 /*	const char *name;
13 /*	int	dummy;
14 /*	int	dict_flags;
15 /* DESCRIPTION
16 /*	dict_regexp_open() opens the named file and compiles the contained
17 /*	regular expressions. The result object can be used to match strings
18 /*	against the table.
19 /* SEE ALSO
20 /*	dict(3) generic dictionary manager
21 /*	regexp_table(5) format of Postfix regular expression tables
22 /* AUTHOR(S)
23 /*	LaMont Jones
24 /*	lamont@hp.com
25 /*
26 /*	Based on PCRE dictionary contributed by Andrew McNamara
27 /*	andrewm@connect.com.au
28 /*	connect.com.au Pty. Ltd.
29 /*	Level 3, 213 Miller St
30 /*	North Sydney, NSW, Australia
31 /*
32 /*	Heavily rewritten by Wietse Venema
33 /*	IBM T.J. Watson Research
34 /*	P.O. Box 704
35 /*	Yorktown Heights, NY 10598, USA
36 /*--*/
37 
38 /* System library. */
39 
40 #include "sys_defs.h"
41 
42 #ifdef HAS_POSIX_REGEXP
43 
44 #include <stdlib.h>
45 #include <unistd.h>
46 #include <string.h>
47 #include <ctype.h>
48 #include <regex.h>
49 #ifdef STRCASECMP_IN_STRINGS_H
50 #include <strings.h>
51 #endif
52 
53 /* Utility library. */
54 
55 #include "mymalloc.h"
56 #include "msg.h"
57 #include "safe.h"
58 #include "vstream.h"
59 #include "vstring.h"
60 #include "stringops.h"
61 #include "readlline.h"
62 #include "dict.h"
63 #include "dict_regexp.h"
64 #include "mac_parse.h"
65 
66  /*
67   * Support for IF/ENDIF based on an idea by Bert Driehuis.
68   */
69 #define DICT_REGEXP_OP_MATCH	1	/* Match this regexp */
70 #define DICT_REGEXP_OP_IF	2	/* Increase if/endif nesting on match */
71 #define DICT_REGEXP_OP_ENDIF	3	/* Decrease if/endif nesting on match */
72 
73  /*
74   * Regular expression before compiling.
75   */
76 typedef struct {
77     char   *regexp;			/* regular expression */
78     int     options;			/* regcomp() options */
79     int     match;			/* positive or negative match */
80 } DICT_REGEXP_PATTERN;
81 
82  /*
83   * Compiled generic rule, and subclasses that derive from it.
84   */
85 typedef struct DICT_REGEXP_RULE {
86     int     op;				/* DICT_REGEXP_OP_MATCH/IF/ENDIF */
87     int     nesting;			/* Level of search nesting */
88     int     lineno;			/* source file line number */
89     struct DICT_REGEXP_RULE *next;	/* next rule in dict */
90 } DICT_REGEXP_RULE;
91 
92 typedef struct {
93     DICT_REGEXP_RULE rule;		/* generic part */
94     regex_t *first_exp;			/* compiled primary pattern */
95     int     first_match;		/* positive or negative match */
96     regex_t *second_exp;		/* compiled secondary pattern */
97     int     second_match;		/* positive or negative match */
98     char   *replacement;		/* replacement text */
99     size_t  max_sub;			/* largest $number in replacement */
100 } DICT_REGEXP_MATCH_RULE;
101 
102 typedef struct {
103     DICT_REGEXP_RULE rule;		/* generic members */
104     regex_t *expr;			/* the condition */
105     int     match;			/* positive or negative match */
106 } DICT_REGEXP_IF_RULE;
107 
108  /*
109   * Regexp map.
110   */
111 typedef struct {
112     DICT    dict;			/* generic members */
113     regmatch_t *pmatch;			/* matched substring info */
114     DICT_REGEXP_RULE *head;		/* first rule */
115     VSTRING *expansion_buf;		/* lookup result */
116 } DICT_REGEXP;
117 
118  /*
119   * Macros to make dense code more readable.
120   */
121 #define NULL_SUBSTITUTIONS	(0)
122 #define NULL_MATCH_RESULT	((regmatch_t *) 0)
123 
124  /*
125   * Context for $number expansion callback.
126   */
127 typedef struct {
128     DICT_REGEXP *dict_regexp;		/* the dictionary handle */
129     DICT_REGEXP_MATCH_RULE *match_rule;	/* the rule we matched */
130     const char *lookup_string;		/* matched text */
131 } DICT_REGEXP_EXPAND_CONTEXT;
132 
133  /*
134   * Context for $number pre-scan callback.
135   */
136 typedef struct {
137     const char *mapname;		/* name of regexp map */
138     int     lineno;			/* where in file */
139     size_t  max_sub;			/* largest $number seen */
140     char   *literal;			/* constant result, $$ -> $ */
141 } DICT_REGEXP_PRESCAN_CONTEXT;
142 
143  /*
144   * Compatibility.
145   */
146 #ifndef MAC_PARSE_OK
147 #define MAC_PARSE_OK 0
148 #endif
149 
150 /* dict_regexp_expand - replace $number with substring from matched text */
151 
152 static int dict_regexp_expand(int type, VSTRING *buf, char *ptr)
153 {
154     DICT_REGEXP_EXPAND_CONTEXT *ctxt = (DICT_REGEXP_EXPAND_CONTEXT *) ptr;
155     DICT_REGEXP_MATCH_RULE *match_rule = ctxt->match_rule;
156     DICT_REGEXP *dict_regexp = ctxt->dict_regexp;
157     regmatch_t *pmatch;
158     size_t  n;
159 
160     /*
161      * Replace $number by the corresponding substring from the matched text.
162      * We pre-scanned the replacement text at compile time, so any out of
163      * range $number means that something impossible has happened.
164      */
165     if (type == MAC_PARSE_VARNAME) {
166 	n = atoi(vstring_str(buf));
167 	if (n < 1 || n > match_rule->max_sub)
168 	    msg_panic("regexp map %s, line %d: out of range replacement index \"%s\"",
169 		      dict_regexp->dict.name, match_rule->rule.lineno,
170 		      vstring_str(buf));
171 	pmatch = dict_regexp->pmatch + n;
172 	if (pmatch->rm_so < 0 || pmatch->rm_so == pmatch->rm_eo)
173 	    return (MAC_PARSE_UNDEF);		/* empty or not matched */
174 	vstring_strncat(dict_regexp->expansion_buf,
175 			ctxt->lookup_string + pmatch->rm_so,
176 			pmatch->rm_eo - pmatch->rm_so);
177 	return (MAC_PARSE_OK);
178     }
179 
180     /*
181      * Straight text - duplicate with no substitution.
182      */
183     else {
184 	vstring_strcat(dict_regexp->expansion_buf, vstring_str(buf));
185 	return (MAC_PARSE_OK);
186     }
187 }
188 
189 /* dict_regexp_regerror - report regexp compile/execute error */
190 
191 static void dict_regexp_regerror(const char *mapname, int lineno, int error,
192 				         const regex_t *expr)
193 {
194     char    errbuf[256];
195 
196     (void) regerror(error, expr, errbuf, sizeof(errbuf));
197     msg_warn("regexp map %s, line %d: %s", mapname, lineno, errbuf);
198 }
199 
200  /*
201   * Inlined to reduce function call overhead in the time-critical loop.
202   */
203 #define DICT_REGEXP_REGEXEC(err, map, line, expr, match, str, nsub, pmatch) \
204     ((err) = regexec((expr), (str), (nsub), (pmatch), 0), \
205      ((err) == REG_NOMATCH ? !(match) : \
206       (err) == 0 ? (match) : \
207       (dict_regexp_regerror((map), (line), (err), (expr)), 0)))
208 
209 /* dict_regexp_lookup - match string and perform optional substitution */
210 
211 static const char *dict_regexp_lookup(DICT *dict, const char *lookup_string)
212 {
213     DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict;
214     DICT_REGEXP_RULE *rule;
215     DICT_REGEXP_IF_RULE *if_rule;
216     DICT_REGEXP_MATCH_RULE *match_rule;
217     DICT_REGEXP_EXPAND_CONTEXT expand_context;
218     int     error;
219     int     nesting = 0;
220 
221     dict_errno = 0;
222 
223     if (msg_verbose)
224 	msg_info("dict_regexp_lookup: %s: %s", dict->name, lookup_string);
225 
226     /*
227      * Optionally fold the key.
228      */
229     if (dict->flags & DICT_FLAG_FOLD_MUL) {
230 	if (dict->fold_buf == 0)
231 	    dict->fold_buf = vstring_alloc(10);
232 	vstring_strcpy(dict->fold_buf, lookup_string);
233 	lookup_string = lowercase(vstring_str(dict->fold_buf));
234     }
235     for (rule = dict_regexp->head; rule; rule = rule->next) {
236 
237 	/*
238 	 * Skip rules inside failed IF/ENDIF.
239 	 */
240 	if (nesting < rule->nesting)
241 	    continue;
242 
243 	switch (rule->op) {
244 
245 	    /*
246 	     * Search for the first matching primary expression. Limit the
247 	     * overhead for substring substitution to the bare minimum.
248 	     */
249 	case DICT_REGEXP_OP_MATCH:
250 	    match_rule = (DICT_REGEXP_MATCH_RULE *) rule;
251 	    if (!DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
252 				     match_rule->first_exp,
253 				     match_rule->first_match,
254 				     lookup_string,
255 				     match_rule->max_sub > 0 ?
256 				     match_rule->max_sub + 1 : 0,
257 				     dict_regexp->pmatch))
258 		continue;
259 	    if (match_rule->second_exp
260 		&& !DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
261 					match_rule->second_exp,
262 					match_rule->second_match,
263 					lookup_string,
264 					NULL_SUBSTITUTIONS,
265 					NULL_MATCH_RESULT))
266 		continue;
267 
268 	    /*
269 	     * Skip $number substitutions when the replacement text contains
270 	     * no $number strings, as learned during the compile time
271 	     * pre-scan. The pre-scan already replaced $$ by $.
272 	     */
273 	    if (match_rule->max_sub == 0)
274 		return (match_rule->replacement);
275 
276 	    /*
277 	     * Perform $number substitutions on the replacement text. We
278 	     * pre-scanned the replacement text at compile time. Any macro
279 	     * expansion errors at this point mean something impossible has
280 	     * happened.
281 	     */
282 	    if (!dict_regexp->expansion_buf)
283 		dict_regexp->expansion_buf = vstring_alloc(10);
284 	    VSTRING_RESET(dict_regexp->expansion_buf);
285 	    expand_context.lookup_string = lookup_string;
286 	    expand_context.match_rule = match_rule;
287 	    expand_context.dict_regexp = dict_regexp;
288 
289 	    if (mac_parse(match_rule->replacement, dict_regexp_expand,
290 			  (char *) &expand_context) & MAC_PARSE_ERROR)
291 		msg_panic("regexp map %s, line %d: bad replacement syntax",
292 			  dict->name, rule->lineno);
293 	    VSTRING_TERMINATE(dict_regexp->expansion_buf);
294 	    return (vstring_str(dict_regexp->expansion_buf));
295 
296 	    /*
297 	     * Conditional.
298 	     */
299 	case DICT_REGEXP_OP_IF:
300 	    if_rule = (DICT_REGEXP_IF_RULE *) rule;
301 	    if (DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
302 			       if_rule->expr, if_rule->match, lookup_string,
303 				    NULL_SUBSTITUTIONS, NULL_MATCH_RESULT))
304 		nesting++;
305 	    continue;
306 
307 	    /*
308 	     * ENDIF after successful IF.
309 	     */
310 	case DICT_REGEXP_OP_ENDIF:
311 	    nesting--;
312 	    continue;
313 
314 	default:
315 	    msg_panic("dict_regexp_lookup: impossible operation %d", rule->op);
316 	}
317     }
318     return (0);
319 }
320 
321 /* dict_regexp_close - close regexp dictionary */
322 
323 static void dict_regexp_close(DICT *dict)
324 {
325     DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict;
326     DICT_REGEXP_RULE *rule;
327     DICT_REGEXP_RULE *next;
328     DICT_REGEXP_MATCH_RULE *match_rule;
329     DICT_REGEXP_IF_RULE *if_rule;
330 
331     for (rule = dict_regexp->head; rule; rule = next) {
332 	next = rule->next;
333 	switch (rule->op) {
334 	case DICT_REGEXP_OP_MATCH:
335 	    match_rule = (DICT_REGEXP_MATCH_RULE *) rule;
336 	    if (match_rule->first_exp) {
337 		regfree(match_rule->first_exp);
338 		myfree((char *) match_rule->first_exp);
339 	    }
340 	    if (match_rule->second_exp) {
341 		regfree(match_rule->second_exp);
342 		myfree((char *) match_rule->second_exp);
343 	    }
344 	    if (match_rule->replacement)
345 		myfree((char *) match_rule->replacement);
346 	    break;
347 	case DICT_REGEXP_OP_IF:
348 	    if_rule = (DICT_REGEXP_IF_RULE *) rule;
349 	    if (if_rule->expr) {
350 		regfree(if_rule->expr);
351 		myfree((char *) if_rule->expr);
352 	    }
353 	    break;
354 	case DICT_REGEXP_OP_ENDIF:
355 	    break;
356 	default:
357 	    msg_panic("dict_regexp_close: unknown operation %d", rule->op);
358 	}
359 	myfree((char *) rule);
360     }
361     if (dict_regexp->pmatch)
362 	myfree((char *) dict_regexp->pmatch);
363     if (dict_regexp->expansion_buf)
364 	vstring_free(dict_regexp->expansion_buf);
365     if (dict->fold_buf)
366 	vstring_free(dict->fold_buf);
367     dict_free(dict);
368 }
369 
370 /* dict_regexp_get_pat - extract one pattern with options from rule */
371 
372 static int dict_regexp_get_pat(const char *mapname, int lineno, char **bufp,
373 			               DICT_REGEXP_PATTERN *pat)
374 {
375     char   *p = *bufp;
376     char    re_delim;
377 
378     /*
379      * Process negation operators.
380      */
381     pat->match = 1;
382     while (*p == '!') {
383 	pat->match = !pat->match;
384 	p++;
385     }
386 
387     /*
388      * Grr...aceful handling of whitespace after '!'.
389      */
390     while (*p && ISSPACE(*p))
391 	p++;
392     if (*p == 0) {
393 	msg_warn("regexp map %s, line %d: no regexp: skipping this rule",
394 		 mapname, lineno);
395 	return (0);
396     }
397 
398     /*
399      * Search for the closing delimiter, handling backslash escape.
400      */
401     re_delim = *p++;
402     pat->regexp = p;
403     while (*p) {
404 	if (*p == '\\') {
405 	    if (p[1])
406 		p++;
407 	    else
408 		break;
409 	} else if (*p == re_delim) {
410 	    break;
411 	}
412 	++p;
413     }
414     if (!*p) {
415 	msg_warn("regexp map %s, line %d: no closing regexp delimiter \"%c\": "
416 		 "skipping this rule", mapname, lineno, re_delim);
417 	return (0);
418     }
419     *p++ = 0;					/* null terminate */
420 
421     /*
422      * Search for options.
423      */
424     pat->options = REG_EXTENDED | REG_ICASE;
425     while (*p && !ISSPACE(*p) && *p != '!') {
426 	switch (*p) {
427 	case 'i':
428 	    pat->options ^= REG_ICASE;
429 	    break;
430 	case 'm':
431 	    pat->options ^= REG_NEWLINE;
432 	    break;
433 	case 'x':
434 	    pat->options ^= REG_EXTENDED;
435 	    break;
436 	default:
437 	    msg_warn("regexp map %s, line %d: unknown regexp option \"%c\": "
438 		     "skipping this rule", mapname, lineno, *p);
439 	    return (0);
440 	}
441 	++p;
442     }
443     *bufp = p;
444     return (1);
445 }
446 
447 /* dict_regexp_get_pats - get the primary and second patterns and flags */
448 
449 static int dict_regexp_get_pats(const char *mapname, int lineno, char **p,
450 				        DICT_REGEXP_PATTERN *first_pat,
451 				        DICT_REGEXP_PATTERN *second_pat)
452 {
453 
454     /*
455      * Get the primary and optional secondary patterns and their flags.
456      */
457     if (dict_regexp_get_pat(mapname, lineno, p, first_pat) == 0)
458 	return (0);
459     if (**p == '!') {
460 #if 0
461 	static int bitrot_warned = 0;
462 
463 	if (bitrot_warned == 0) {
464 	    msg_warn("regexp file %s, line %d: /pattern1/!/pattern2/ goes away,"
465 		 " use \"if !/pattern2/ ... /pattern1/ ... endif\" instead",
466 		     mapname, lineno);
467 	    bitrot_warned = 1;
468 	}
469 #endif
470 	if (dict_regexp_get_pat(mapname, lineno, p, second_pat) == 0)
471 	    return (0);
472     } else {
473 	second_pat->regexp = 0;
474     }
475     return (1);
476 }
477 
478 /* dict_regexp_prescan - find largest $number in replacement text */
479 
480 static int dict_regexp_prescan(int type, VSTRING *buf, char *context)
481 {
482     DICT_REGEXP_PRESCAN_CONTEXT *ctxt = (DICT_REGEXP_PRESCAN_CONTEXT *) context;
483     size_t  n;
484 
485     /*
486      * Keep a copy of literal text (with $$ already replaced by $) if and
487      * only if the replacement text contains no $number expression. This way
488      * we can avoid having to scan the replacement text at lookup time.
489      */
490     if (type == MAC_PARSE_VARNAME) {
491 	if (ctxt->literal) {
492 	    myfree(ctxt->literal);
493 	    ctxt->literal = 0;
494 	}
495 	if (!alldig(vstring_str(buf))) {
496 	    msg_warn("regexp map %s, line %d: non-numeric replacement index \"%s\"",
497 		     ctxt->mapname, ctxt->lineno, vstring_str(buf));
498 	    return (MAC_PARSE_ERROR);
499 	}
500 	n = atoi(vstring_str(buf));
501 	if (n < 1) {
502 	    msg_warn("regexp map %s, line %d: out-of-range replacement index \"%s\"",
503 		     ctxt->mapname, ctxt->lineno, vstring_str(buf));
504 	    return (MAC_PARSE_ERROR);
505 	}
506 	if (n > ctxt->max_sub)
507 	    ctxt->max_sub = n;
508     } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) {
509 	if (ctxt->literal)
510 	    msg_panic("regexp map %s, line %d: multiple literals but no $number",
511 		      ctxt->mapname, ctxt->lineno);
512 	ctxt->literal = mystrdup(vstring_str(buf));
513     }
514     return (MAC_PARSE_OK);
515 }
516 
517 /* dict_regexp_compile_pat - compile one pattern */
518 
519 static regex_t *dict_regexp_compile_pat(const char *mapname, int lineno,
520 					        DICT_REGEXP_PATTERN *pat)
521 {
522     int     error;
523     regex_t *expr;
524 
525     expr = (regex_t *) mymalloc(sizeof(*expr));
526     error = regcomp(expr, pat->regexp, pat->options);
527     if (error != 0) {
528 	dict_regexp_regerror(mapname, lineno, error, expr);
529 	myfree((char *) expr);
530 	return (0);
531     }
532     return (expr);
533 }
534 
535 /* dict_regexp_rule_alloc - fill in a generic rule structure */
536 
537 static DICT_REGEXP_RULE *dict_regexp_rule_alloc(int op, int nesting,
538 						        int lineno,
539 						        size_t size)
540 {
541     DICT_REGEXP_RULE *rule;
542 
543     rule = (DICT_REGEXP_RULE *) mymalloc(size);
544     rule->op = op;
545     rule->nesting = nesting;
546     rule->lineno = lineno;
547     rule->next = 0;
548 
549     return (rule);
550 }
551 
552 /* dict_regexp_parseline - parse one rule */
553 
554 static DICT_REGEXP_RULE *dict_regexp_parseline(const char *mapname, int lineno,
555 					            char *line, int nesting,
556 					               int dict_flags)
557 {
558     char   *p;
559 
560     p = line;
561 
562     /*
563      * An ordinary rule takes one or two patterns and replacement text.
564      */
565     if (!ISALNUM(*p)) {
566 	DICT_REGEXP_PATTERN first_pat;
567 	DICT_REGEXP_PATTERN second_pat;
568 	DICT_REGEXP_PRESCAN_CONTEXT prescan_context;
569 	regex_t *first_exp = 0;
570 	regex_t *second_exp;
571 	DICT_REGEXP_MATCH_RULE *match_rule;
572 
573 	/*
574 	 * Get the primary and the optional secondary patterns.
575 	 */
576 	if (!dict_regexp_get_pats(mapname, lineno, &p, &first_pat, &second_pat))
577 	    return (0);
578 
579 	/*
580 	 * Get the replacement text.
581 	 */
582 	while (*p && ISSPACE(*p))
583 	    ++p;
584 	if (!*p) {
585 	    msg_warn("regexp map %s, line %d: using empty replacement string",
586 		     mapname, lineno);
587 	}
588 
589 	/*
590 	 * Find the highest-numbered $number in the replacement text. We can
591 	 * speed up pattern matching 1) by passing hints to the regexp
592 	 * compiler, setting the REG_NOSUB flag when the replacement text
593 	 * contains no $number string; 2) by passing hints to the regexp
594 	 * execution code, limiting the amount of text that is made available
595 	 * for substitution.
596 	 */
597 	prescan_context.mapname = mapname;
598 	prescan_context.lineno = lineno;
599 	prescan_context.max_sub = 0;
600 	prescan_context.literal = 0;
601 
602 	/*
603 	 * The optimizer will eliminate code duplication and/or dead code.
604 	 */
605 #define CREATE_MATCHOP_ERROR_RETURN(rval) do { \
606 	if (first_exp) { \
607 	    regfree(first_exp); \
608 	    myfree((char *) first_exp); \
609 	} \
610 	if (prescan_context.literal) \
611 	    myfree(prescan_context.literal); \
612 	return (rval); \
613     } while (0)
614 
615 	if (mac_parse(p, dict_regexp_prescan, (char *) &prescan_context)
616 	    & MAC_PARSE_ERROR) {
617 	    msg_warn("regexp map %s, line %d: bad replacement syntax: "
618 		     "skipping this rule", mapname, lineno);
619 	    CREATE_MATCHOP_ERROR_RETURN(0);
620 	}
621 
622 	/*
623 	 * Compile the primary and the optional secondary pattern. Speed up
624 	 * execution when no matched text needs to be substituted into the
625 	 * result string, or when the highest numbered substring is less than
626 	 * the total number of () subpatterns.
627 	 */
628 	if (prescan_context.max_sub == 0)
629 	    first_pat.options |= REG_NOSUB;
630 	if (prescan_context.max_sub > 0 && first_pat.match == 0) {
631 	    msg_warn("regexp map %s, line %d: $number found in negative match "
632 		   "replacement text: skipping this rule", mapname, lineno);
633 	    CREATE_MATCHOP_ERROR_RETURN(0);
634 	}
635 	if (prescan_context.max_sub > 0 && (dict_flags & DICT_FLAG_NO_REGSUB)) {
636 	    msg_warn("regexp map %s, line %d: "
637 		     "regular expression substitution is not allowed: "
638 		     "skipping this rule", mapname, lineno);
639 	    CREATE_MATCHOP_ERROR_RETURN(0);
640 	}
641 	if ((first_exp = dict_regexp_compile_pat(mapname, lineno,
642 						 &first_pat)) == 0)
643 	    CREATE_MATCHOP_ERROR_RETURN(0);
644 	if (prescan_context.max_sub > first_exp->re_nsub) {
645 	    msg_warn("regexp map %s, line %d: out of range replacement index \"%d\": "
646 		     "skipping this rule", mapname, lineno,
647 		     (int) prescan_context.max_sub);
648 	    CREATE_MATCHOP_ERROR_RETURN(0);
649 	}
650 	if (second_pat.regexp != 0) {
651 	    second_pat.options |= REG_NOSUB;
652 	    if ((second_exp = dict_regexp_compile_pat(mapname, lineno,
653 						      &second_pat)) == 0)
654 		CREATE_MATCHOP_ERROR_RETURN(0);
655 	} else {
656 	    second_exp = 0;
657 	}
658 	match_rule = (DICT_REGEXP_MATCH_RULE *)
659 	    dict_regexp_rule_alloc(DICT_REGEXP_OP_MATCH, nesting, lineno,
660 				   sizeof(DICT_REGEXP_MATCH_RULE));
661 	match_rule->first_exp = first_exp;
662 	match_rule->first_match = first_pat.match;
663 	match_rule->max_sub = prescan_context.max_sub;
664 	match_rule->second_exp = second_exp;
665 	match_rule->second_match = second_pat.match;
666 	if (prescan_context.literal)
667 	    match_rule->replacement = prescan_context.literal;
668 	else
669 	    match_rule->replacement = mystrdup(p);
670 	return ((DICT_REGEXP_RULE *) match_rule);
671     }
672 
673     /*
674      * The IF operator takes one pattern but no replacement text.
675      */
676     else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) {
677 	DICT_REGEXP_PATTERN pattern;
678 	regex_t *expr;
679 	DICT_REGEXP_IF_RULE *if_rule;
680 
681 	p += 2;
682 	while (*p && ISSPACE(*p))
683 	    p++;
684 	if (!dict_regexp_get_pat(mapname, lineno, &p, &pattern))
685 	    return (0);
686 	while (*p && ISSPACE(*p))
687 	    ++p;
688 	if (*p) {
689 	    msg_warn("regexp map %s, line %d: ignoring extra text after"
690 		     " IF statement: \"%s\"", mapname, lineno, p);
691 	    msg_warn("regexp map %s, line %d: do not prepend whitespace"
692 		     " to statements between IF and ENDIF", mapname, lineno);
693 	}
694 	if ((expr = dict_regexp_compile_pat(mapname, lineno, &pattern)) == 0)
695 	    return (0);
696 	if_rule = (DICT_REGEXP_IF_RULE *)
697 	    dict_regexp_rule_alloc(DICT_REGEXP_OP_IF, nesting, lineno,
698 				   sizeof(DICT_REGEXP_IF_RULE));
699 	if_rule->expr = expr;
700 	if_rule->match = pattern.match;
701 	return ((DICT_REGEXP_RULE *) if_rule);
702     }
703 
704     /*
705      * The ENDIF operator takes no patterns and no replacement text.
706      */
707     else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) {
708 	DICT_REGEXP_RULE *rule;
709 
710 	p += 5;
711 	if (nesting == 0) {
712 	    msg_warn("regexp map %s, line %d: ignoring ENDIF without matching IF",
713 		     mapname, lineno);
714 	    return (0);
715 	}
716 	while (*p && ISSPACE(*p))
717 	    ++p;
718 	if (*p)
719 	    msg_warn("regexp map %s, line %d: ignoring extra text after ENDIF",
720 		     mapname, lineno);
721 	rule = dict_regexp_rule_alloc(DICT_REGEXP_OP_ENDIF, nesting, lineno,
722 				      sizeof(DICT_REGEXP_RULE));
723 	return (rule);
724     }
725 
726     /*
727      * Unrecognized input.
728      */
729     else {
730 	msg_warn("regexp map %s, line %d: ignoring unrecognized request",
731 		 mapname, lineno);
732 	return (0);
733     }
734 }
735 
736 /* dict_regexp_open - load and compile a file containing regular expressions */
737 
738 DICT   *dict_regexp_open(const char *mapname, int unused_flags, int dict_flags)
739 {
740     DICT_REGEXP *dict_regexp;
741     VSTREAM *map_fp;
742     VSTRING *line_buffer;
743     DICT_REGEXP_RULE *rule;
744     DICT_REGEXP_RULE *last_rule = 0;
745     int     lineno = 0;
746     size_t  max_sub = 0;
747     int     nesting = 0;
748     char   *p;
749 
750     line_buffer = vstring_alloc(100);
751 
752     dict_regexp = (DICT_REGEXP *) dict_alloc(DICT_TYPE_REGEXP, mapname,
753 					     sizeof(*dict_regexp));
754     dict_regexp->dict.lookup = dict_regexp_lookup;
755     dict_regexp->dict.close = dict_regexp_close;
756     dict_regexp->dict.flags = dict_flags | DICT_FLAG_PATTERN;
757     if (dict_flags & DICT_FLAG_FOLD_MUL)
758 	dict_regexp->dict.fold_buf = vstring_alloc(10);
759     dict_regexp->head = 0;
760     dict_regexp->pmatch = 0;
761     dict_regexp->expansion_buf = 0;
762 
763     /*
764      * Parse the regexp table.
765      */
766     if ((map_fp = vstream_fopen(mapname, O_RDONLY, 0)) == 0)
767 	msg_fatal("open %s: %m", mapname);
768 
769     while (readlline(line_buffer, map_fp, &lineno)) {
770 	p = vstring_str(line_buffer);
771 	trimblanks(p, 0)[0] = 0;
772 	if (*p == 0)
773 	    continue;
774 	rule = dict_regexp_parseline(mapname, lineno, p, nesting, dict_flags);
775 	if (rule == 0)
776 	    continue;
777 	if (rule->op == DICT_REGEXP_OP_MATCH) {
778 	    if (((DICT_REGEXP_MATCH_RULE *) rule)->max_sub > max_sub)
779 		max_sub = ((DICT_REGEXP_MATCH_RULE *) rule)->max_sub;
780 	} else if (rule->op == DICT_REGEXP_OP_IF) {
781 	    nesting++;
782 	} else if (rule->op == DICT_REGEXP_OP_ENDIF) {
783 	    nesting--;
784 	}
785 	if (last_rule == 0)
786 	    dict_regexp->head = rule;
787 	else
788 	    last_rule->next = rule;
789 	last_rule = rule;
790     }
791 
792     if (nesting)
793 	msg_warn("regexp map %s, line %d: more IFs than ENDIFs",
794 		 mapname, lineno);
795 
796     /*
797      * Allocate space for only as many matched substrings as used in the
798      * replacement text.
799      */
800     if (max_sub > 0)
801 	dict_regexp->pmatch =
802 	    (regmatch_t *) mymalloc(sizeof(regmatch_t) * (max_sub + 1));
803 
804     /*
805      * Clean up.
806      */
807     vstring_free(line_buffer);
808     vstream_fclose(map_fp);
809 
810     return (DICT_DEBUG (&dict_regexp->dict));
811 }
812 
813 #endif
814