xref: /netbsd-src/external/ibm-public/postfix/dist/src/util/dict_regexp.c (revision c48c605c14fd8622b523d1d6a3f0c0bad133ea89)
1 /*	$NetBSD: dict_regexp.c,v 1.5 2023/12/23 20:30:46 christos Exp $	*/
2 
3 /*++
4 /* NAME
5 /*	dict_regexp 3
6 /* SUMMARY
7 /*	dictionary manager interface to REGEXP regular expression library
8 /* SYNOPSIS
9 /*	#include <dict_regexp.h>
10 /*
11 /*	DICT	*dict_regexp_open(name, dummy, dict_flags)
12 /*	const char *name;
13 /*	int	dummy;
14 /*	int	dict_flags;
15 /* DESCRIPTION
16 /*	dict_regexp_open() opens the named file and compiles the contained
17 /*	regular expressions. The result object can be used to match strings
18 /*	against the table.
19 /* SEE ALSO
20 /*	dict(3) generic dictionary manager
21 /*	regexp_table(5) regular expression table configuration
22 /* AUTHOR(S)
23 /*	LaMont Jones
24 /*	lamont@hp.com
25 /*
26 /*	Based on PCRE dictionary contributed by Andrew McNamara
27 /*	andrewm@connect.com.au
28 /*	connect.com.au Pty. Ltd.
29 /*	Level 3, 213 Miller St
30 /*	North Sydney, NSW, Australia
31 /*
32 /*	Heavily rewritten by Wietse Venema
33 /*	IBM T.J. Watson Research
34 /*	P.O. Box 704
35 /*	Yorktown Heights, NY 10598, USA
36 /*
37 /*	Wietse Venema
38 /*	Google, Inc.
39 /*	111 8th Avenue
40 /*	New York, NY 10011, USA
41 /*--*/
42 
43 /* System library. */
44 
45 #include "sys_defs.h"
46 
47 #ifdef HAS_POSIX_REGEXP
48 
49 #include <sys/stat.h>
50 #include <stdlib.h>
51 #include <unistd.h>
52 #include <string.h>
53 #include <ctype.h>
54 #include <regex.h>
55 #ifdef STRCASECMP_IN_STRINGS_H
56 #include <strings.h>
57 #endif
58 
59 /* Utility library. */
60 
61 #include "mymalloc.h"
62 #include "msg.h"
63 #include "safe.h"
64 #include "vstream.h"
65 #include "vstring.h"
66 #include "stringops.h"
67 #include "readlline.h"
68 #include "dict.h"
69 #include "dict_regexp.h"
70 #include "mac_parse.h"
71 #include "warn_stat.h"
72 #include "mvect.h"
73 
74  /*
75   * Support for IF/ENDIF based on an idea by Bert Driehuis.
76   */
77 #define DICT_REGEXP_OP_MATCH	1	/* Match this regexp */
78 #define DICT_REGEXP_OP_IF	2	/* Increase if/endif nesting on match */
79 #define DICT_REGEXP_OP_ENDIF	3	/* Decrease if/endif nesting on match */
80 
81  /*
82   * Regular expression before compiling.
83   */
84 typedef struct {
85     char   *regexp;			/* regular expression */
86     int     options;			/* regcomp() options */
87     int     match;			/* positive or negative match */
88 } DICT_REGEXP_PATTERN;
89 
90  /*
91   * Compiled generic rule, and subclasses that derive from it.
92   */
93 typedef struct DICT_REGEXP_RULE {
94     int     op;				/* DICT_REGEXP_OP_MATCH/IF/ENDIF */
95     int     lineno;			/* source file line number */
96     struct DICT_REGEXP_RULE *next;	/* next rule in dict */
97 } DICT_REGEXP_RULE;
98 
99 typedef struct {
100     DICT_REGEXP_RULE rule;		/* generic part */
101     regex_t *first_exp;			/* compiled primary pattern */
102     int     first_match;		/* positive or negative match */
103     regex_t *second_exp;		/* compiled secondary pattern */
104     int     second_match;		/* positive or negative match */
105     char   *replacement;		/* replacement text */
106     size_t  max_sub;			/* largest $number in replacement */
107 } DICT_REGEXP_MATCH_RULE;
108 
109 typedef struct {
110     DICT_REGEXP_RULE rule;		/* generic members */
111     regex_t *expr;			/* the condition */
112     int     match;			/* positive or negative match */
113     struct DICT_REGEXP_RULE *endif_rule;/* matching endif rule */
114 } DICT_REGEXP_IF_RULE;
115 
116  /*
117   * Regexp map.
118   */
119 typedef struct {
120     DICT    dict;			/* generic members */
121     regmatch_t *pmatch;			/* matched substring info */
122     DICT_REGEXP_RULE *head;		/* first rule */
123     VSTRING *expansion_buf;		/* lookup result */
124 } DICT_REGEXP;
125 
126  /*
127   * Macros to make dense code more readable.
128   */
129 #define NULL_SUBSTITUTIONS	(0)
130 #define NULL_MATCH_RESULT	((regmatch_t *) 0)
131 
132  /*
133   * Context for $number expansion callback.
134   */
135 typedef struct {
136     DICT_REGEXP *dict_regexp;		/* the dictionary handle */
137     DICT_REGEXP_MATCH_RULE *match_rule;	/* the rule we matched */
138     const char *lookup_string;		/* matched text */
139 } DICT_REGEXP_EXPAND_CONTEXT;
140 
141  /*
142   * Context for $number pre-scan callback.
143   */
144 typedef struct {
145     const char *mapname;		/* name of regexp map */
146     int     lineno;			/* where in file */
147     size_t  max_sub;			/* largest $number seen */
148     char   *literal;			/* constant result, $$ -> $ */
149 } DICT_REGEXP_PRESCAN_CONTEXT;
150 
151  /*
152   * Compatibility.
153   */
154 #ifndef MAC_PARSE_OK
155 #define MAC_PARSE_OK 0
156 #endif
157 
158 /* dict_regexp_expand - replace $number with substring from matched text */
159 
dict_regexp_expand(int type,VSTRING * buf,void * ptr)160 static int dict_regexp_expand(int type, VSTRING *buf, void *ptr)
161 {
162     DICT_REGEXP_EXPAND_CONTEXT *ctxt = (DICT_REGEXP_EXPAND_CONTEXT *) ptr;
163     DICT_REGEXP_MATCH_RULE *match_rule = ctxt->match_rule;
164     DICT_REGEXP *dict_regexp = ctxt->dict_regexp;
165     regmatch_t *pmatch;
166     size_t  n;
167 
168     /*
169      * Replace $number by the corresponding substring from the matched text.
170      * We pre-scanned the replacement text at compile time, so any out of
171      * range $number means that something impossible has happened.
172      */
173     if (type == MAC_PARSE_VARNAME) {
174 	n = atoi(vstring_str(buf));
175 	if (n < 1 || n > match_rule->max_sub)
176 	    msg_panic("regexp map %s, line %d: out of range replacement index \"%s\"",
177 		      dict_regexp->dict.name, match_rule->rule.lineno,
178 		      vstring_str(buf));
179 	pmatch = dict_regexp->pmatch + n;
180 	if (pmatch->rm_so < 0 || pmatch->rm_so == pmatch->rm_eo)
181 	    return (MAC_PARSE_UNDEF);		/* empty or not matched */
182 	vstring_strncat(dict_regexp->expansion_buf,
183 			ctxt->lookup_string + pmatch->rm_so,
184 			pmatch->rm_eo - pmatch->rm_so);
185 	return (MAC_PARSE_OK);
186     }
187 
188     /*
189      * Straight text - duplicate with no substitution.
190      */
191     else {
192 	vstring_strcat(dict_regexp->expansion_buf, vstring_str(buf));
193 	return (MAC_PARSE_OK);
194     }
195 }
196 
197 /* dict_regexp_regerror - report regexp compile/execute error */
198 
dict_regexp_regerror(const char * mapname,int lineno,int error,const regex_t * expr)199 static void dict_regexp_regerror(const char *mapname, int lineno, int error,
200 				         const regex_t *expr)
201 {
202     char    errbuf[256];
203 
204     (void) regerror(error, expr, errbuf, sizeof(errbuf));
205     msg_warn("regexp map %s, line %d: %s", mapname, lineno, errbuf);
206 }
207 
208  /*
209   * Inlined to reduce function call overhead in the time-critical loop.
210   */
211 #define DICT_REGEXP_REGEXEC(err, map, line, expr, match, str, nsub, pmatch) \
212     ((err) = regexec((expr), (str), (nsub), (pmatch), 0), \
213      ((err) == REG_NOMATCH ? !(match) : \
214       (err) == 0 ? (match) : \
215       (dict_regexp_regerror((map), (line), (err), (expr)), 0)))
216 
217 /* dict_regexp_lookup - match string and perform optional substitution */
218 
dict_regexp_lookup(DICT * dict,const char * lookup_string)219 static const char *dict_regexp_lookup(DICT *dict, const char *lookup_string)
220 {
221     DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict;
222     DICT_REGEXP_RULE *rule;
223     DICT_REGEXP_IF_RULE *if_rule;
224     DICT_REGEXP_MATCH_RULE *match_rule;
225     DICT_REGEXP_EXPAND_CONTEXT expand_context;
226     int     error;
227 
228     dict->error = 0;
229 
230     if (msg_verbose)
231 	msg_info("dict_regexp_lookup: %s: %s", dict->name, lookup_string);
232 
233     /*
234      * Optionally fold the key.
235      */
236     if (dict->flags & DICT_FLAG_FOLD_MUL) {
237 	if (dict->fold_buf == 0)
238 	    dict->fold_buf = vstring_alloc(10);
239 	vstring_strcpy(dict->fold_buf, lookup_string);
240 	lookup_string = lowercase(vstring_str(dict->fold_buf));
241     }
242     for (rule = dict_regexp->head; rule; rule = rule->next) {
243 
244 	switch (rule->op) {
245 
246 	    /*
247 	     * Search for the first matching primary expression. Limit the
248 	     * overhead for substring substitution to the bare minimum.
249 	     */
250 	case DICT_REGEXP_OP_MATCH:
251 	    match_rule = (DICT_REGEXP_MATCH_RULE *) rule;
252 	    if (!DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
253 				     match_rule->first_exp,
254 				     match_rule->first_match,
255 				     lookup_string,
256 				     match_rule->max_sub > 0 ?
257 				     match_rule->max_sub + 1 : 0,
258 				     dict_regexp->pmatch))
259 		continue;
260 	    if (match_rule->second_exp
261 		&& !DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
262 					match_rule->second_exp,
263 					match_rule->second_match,
264 					lookup_string,
265 					NULL_SUBSTITUTIONS,
266 					NULL_MATCH_RESULT))
267 		continue;
268 
269 	    /*
270 	     * Skip $number substitutions when the replacement text contains
271 	     * no $number strings, as learned during the compile time
272 	     * pre-scan. The pre-scan already replaced $$ by $.
273 	     */
274 	    if (match_rule->max_sub == 0)
275 		return (match_rule->replacement);
276 
277 	    /*
278 	     * Perform $number substitutions on the replacement text. We
279 	     * pre-scanned the replacement text at compile time. Any macro
280 	     * expansion errors at this point mean something impossible has
281 	     * happened.
282 	     */
283 	    if (!dict_regexp->expansion_buf)
284 		dict_regexp->expansion_buf = vstring_alloc(10);
285 	    VSTRING_RESET(dict_regexp->expansion_buf);
286 	    expand_context.lookup_string = lookup_string;
287 	    expand_context.match_rule = match_rule;
288 	    expand_context.dict_regexp = dict_regexp;
289 
290 	    if (mac_parse(match_rule->replacement, dict_regexp_expand,
291 			  (void *) &expand_context) & MAC_PARSE_ERROR)
292 		msg_panic("regexp map %s, line %d: bad replacement syntax",
293 			  dict->name, rule->lineno);
294 	    VSTRING_TERMINATE(dict_regexp->expansion_buf);
295 	    return (vstring_str(dict_regexp->expansion_buf));
296 
297 	    /*
298 	     * Conditional.
299 	     */
300 	case DICT_REGEXP_OP_IF:
301 	    if_rule = (DICT_REGEXP_IF_RULE *) rule;
302 	    if (DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
303 			       if_rule->expr, if_rule->match, lookup_string,
304 				    NULL_SUBSTITUTIONS, NULL_MATCH_RESULT))
305 		continue;
306 	    /* An IF without matching ENDIF has no "endif" rule. */
307 	    if ((rule = if_rule->endif_rule) == 0)
308 		return (0);
309 	    /* FALLTHROUGH */
310 
311 	    /*
312 	     * ENDIF after IF.
313 	     */
314 	case DICT_REGEXP_OP_ENDIF:
315 	    continue;
316 
317 	default:
318 	    msg_panic("dict_regexp_lookup: impossible operation %d", rule->op);
319 	}
320     }
321     return (0);
322 }
323 
324 /* dict_regexp_close - close regexp dictionary */
325 
dict_regexp_close(DICT * dict)326 static void dict_regexp_close(DICT *dict)
327 {
328     DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict;
329     DICT_REGEXP_RULE *rule;
330     DICT_REGEXP_RULE *next;
331     DICT_REGEXP_MATCH_RULE *match_rule;
332     DICT_REGEXP_IF_RULE *if_rule;
333 
334     for (rule = dict_regexp->head; rule; rule = next) {
335 	next = rule->next;
336 	switch (rule->op) {
337 	case DICT_REGEXP_OP_MATCH:
338 	    match_rule = (DICT_REGEXP_MATCH_RULE *) rule;
339 	    if (match_rule->first_exp) {
340 		regfree(match_rule->first_exp);
341 		myfree((void *) match_rule->first_exp);
342 	    }
343 	    if (match_rule->second_exp) {
344 		regfree(match_rule->second_exp);
345 		myfree((void *) match_rule->second_exp);
346 	    }
347 	    if (match_rule->replacement)
348 		myfree((void *) match_rule->replacement);
349 	    break;
350 	case DICT_REGEXP_OP_IF:
351 	    if_rule = (DICT_REGEXP_IF_RULE *) rule;
352 	    if (if_rule->expr) {
353 		regfree(if_rule->expr);
354 		myfree((void *) if_rule->expr);
355 	    }
356 	    break;
357 	case DICT_REGEXP_OP_ENDIF:
358 	    break;
359 	default:
360 	    msg_panic("dict_regexp_close: unknown operation %d", rule->op);
361 	}
362 	myfree((void *) rule);
363     }
364     if (dict_regexp->pmatch)
365 	myfree((void *) dict_regexp->pmatch);
366     if (dict_regexp->expansion_buf)
367 	vstring_free(dict_regexp->expansion_buf);
368     if (dict->fold_buf)
369 	vstring_free(dict->fold_buf);
370     dict_free(dict);
371 }
372 
373 /* dict_regexp_get_pat - extract one pattern with options from rule */
374 
dict_regexp_get_pat(const char * mapname,int lineno,char ** bufp,DICT_REGEXP_PATTERN * pat)375 static int dict_regexp_get_pat(const char *mapname, int lineno, char **bufp,
376 			               DICT_REGEXP_PATTERN *pat)
377 {
378     char   *p = *bufp;
379     char    re_delim;
380 
381     /*
382      * Process negation operators.
383      */
384     pat->match = 1;
385     for (;;) {
386 	if (*p == '!')
387 	    pat->match = !pat->match;
388 	else if (!ISSPACE(*p))
389 	    break;
390 	p++;
391     }
392     if (*p == 0) {
393 	msg_warn("regexp map %s, line %d: no regexp: skipping this rule",
394 		 mapname, lineno);
395 	return (0);
396     }
397 
398     /*
399      * Search for the closing delimiter, handling backslash escape.
400      */
401     re_delim = *p++;
402     pat->regexp = p;
403     while (*p) {
404 	if (*p == '\\') {
405 	    if (p[1])
406 		p++;
407 	    else
408 		break;
409 	} else if (*p == re_delim) {
410 	    break;
411 	}
412 	++p;
413     }
414     if (!*p) {
415 	msg_warn("regexp map %s, line %d: no closing regexp delimiter \"%c\": "
416 		 "skipping this rule", mapname, lineno, re_delim);
417 	return (0);
418     }
419     *p++ = 0;					/* null terminate */
420 
421     /*
422      * Search for options.
423      */
424     pat->options = REG_EXTENDED | REG_ICASE;
425     while (*p && !ISSPACE(*p) && *p != '!') {
426 	switch (*p) {
427 	case 'i':
428 	    pat->options ^= REG_ICASE;
429 	    break;
430 	case 'm':
431 	    pat->options ^= REG_NEWLINE;
432 	    break;
433 	case 'x':
434 	    pat->options ^= REG_EXTENDED;
435 	    break;
436 	default:
437 	    msg_warn("regexp map %s, line %d: unknown regexp option \"%c\": "
438 		     "skipping this rule", mapname, lineno, *p);
439 	    return (0);
440 	}
441 	++p;
442     }
443     *bufp = p;
444     return (1);
445 }
446 
447 /* dict_regexp_get_pats - get the primary and second patterns and flags */
448 
dict_regexp_get_pats(const char * mapname,int lineno,char ** p,DICT_REGEXP_PATTERN * first_pat,DICT_REGEXP_PATTERN * second_pat)449 static int dict_regexp_get_pats(const char *mapname, int lineno, char **p,
450 				        DICT_REGEXP_PATTERN *first_pat,
451 				        DICT_REGEXP_PATTERN *second_pat)
452 {
453 
454     /*
455      * Get the primary and optional secondary patterns and their flags.
456      */
457     if (dict_regexp_get_pat(mapname, lineno, p, first_pat) == 0)
458 	return (0);
459     if (**p == '!') {
460 #if 0
461 	static int bitrot_warned = 0;
462 
463 	if (bitrot_warned == 0) {
464 	    msg_warn("regexp file %s, line %d: /pattern1/!/pattern2/ goes away,"
465 		 " use \"if !/pattern2/ ... /pattern1/ ... endif\" instead",
466 		     mapname, lineno);
467 	    bitrot_warned = 1;
468 	}
469 #endif
470 	if (dict_regexp_get_pat(mapname, lineno, p, second_pat) == 0)
471 	    return (0);
472     } else {
473 	second_pat->regexp = 0;
474     }
475     return (1);
476 }
477 
478 /* dict_regexp_prescan - find largest $number in replacement text */
479 
dict_regexp_prescan(int type,VSTRING * buf,void * context)480 static int dict_regexp_prescan(int type, VSTRING *buf, void *context)
481 {
482     DICT_REGEXP_PRESCAN_CONTEXT *ctxt = (DICT_REGEXP_PRESCAN_CONTEXT *) context;
483     size_t  n;
484 
485     /*
486      * Keep a copy of literal text (with $$ already replaced by $) if and
487      * only if the replacement text contains no $number expression. This way
488      * we can avoid having to scan the replacement text at lookup time.
489      */
490     if (type == MAC_PARSE_VARNAME) {
491 	if (ctxt->literal) {
492 	    myfree(ctxt->literal);
493 	    ctxt->literal = 0;
494 	}
495 	if (!alldig(vstring_str(buf))) {
496 	    msg_warn("regexp map %s, line %d: non-numeric replacement index \"%s\"",
497 		     ctxt->mapname, ctxt->lineno, vstring_str(buf));
498 	    return (MAC_PARSE_ERROR);
499 	}
500 	n = atoi(vstring_str(buf));
501 	if (n < 1) {
502 	    msg_warn("regexp map %s, line %d: out-of-range replacement index \"%s\"",
503 		     ctxt->mapname, ctxt->lineno, vstring_str(buf));
504 	    return (MAC_PARSE_ERROR);
505 	}
506 	if (n > ctxt->max_sub)
507 	    ctxt->max_sub = n;
508     } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) {
509 	if (ctxt->literal)
510 	    msg_panic("regexp map %s, line %d: multiple literals but no $number",
511 		      ctxt->mapname, ctxt->lineno);
512 	ctxt->literal = mystrdup(vstring_str(buf));
513     }
514     return (MAC_PARSE_OK);
515 }
516 
517 /* dict_regexp_compile_pat - compile one pattern */
518 
dict_regexp_compile_pat(const char * mapname,int lineno,DICT_REGEXP_PATTERN * pat)519 static regex_t *dict_regexp_compile_pat(const char *mapname, int lineno,
520 					        DICT_REGEXP_PATTERN *pat)
521 {
522     int     error;
523     regex_t *expr;
524 
525     expr = (regex_t *) mymalloc(sizeof(*expr));
526     error = regcomp(expr, pat->regexp, pat->options);
527     if (error != 0) {
528 	dict_regexp_regerror(mapname, lineno, error, expr);
529 	myfree((void *) expr);
530 	return (0);
531     }
532     return (expr);
533 }
534 
535 /* dict_regexp_rule_alloc - fill in a generic rule structure */
536 
dict_regexp_rule_alloc(int op,int lineno,size_t size)537 static DICT_REGEXP_RULE *dict_regexp_rule_alloc(int op, int lineno, size_t size)
538 {
539     DICT_REGEXP_RULE *rule;
540 
541     rule = (DICT_REGEXP_RULE *) mymalloc(size);
542     rule->op = op;
543     rule->lineno = lineno;
544     rule->next = 0;
545 
546     return (rule);
547 }
548 
549 /* dict_regexp_parseline - parse one rule */
550 
dict_regexp_parseline(DICT * dict,const char * mapname,int lineno,char * line,int nesting)551 static DICT_REGEXP_RULE *dict_regexp_parseline(DICT *dict, const char *mapname,
552 					             int lineno, char *line,
553 					               int nesting)
554 {
555     char   *p;
556 
557     p = line;
558 
559     /*
560      * An ordinary rule takes one or two patterns and replacement text.
561      */
562     if (!ISALNUM(*p)) {
563 	DICT_REGEXP_PATTERN first_pat;
564 	DICT_REGEXP_PATTERN second_pat;
565 	DICT_REGEXP_PRESCAN_CONTEXT prescan_context;
566 	regex_t *first_exp = 0;
567 	regex_t *second_exp;
568 	DICT_REGEXP_MATCH_RULE *match_rule;
569 
570 	/*
571 	 * Get the primary and the optional secondary patterns.
572 	 */
573 	if (!dict_regexp_get_pats(mapname, lineno, &p, &first_pat, &second_pat))
574 	    return (0);
575 
576 	/*
577 	 * Get the replacement text.
578 	 */
579 	while (*p && ISSPACE(*p))
580 	    ++p;
581 	if (!*p) {
582 	    msg_warn("regexp map %s, line %d: no replacement text: "
583 		     "using empty string", mapname, lineno);
584 	}
585 
586 	/*
587 	 * Find the highest-numbered $number in the replacement text. We can
588 	 * speed up pattern matching 1) by passing hints to the regexp
589 	 * compiler, setting the REG_NOSUB flag when the replacement text
590 	 * contains no $number string; 2) by passing hints to the regexp
591 	 * execution code, limiting the amount of text that is made available
592 	 * for substitution.
593 	 */
594 	prescan_context.mapname = mapname;
595 	prescan_context.lineno = lineno;
596 	prescan_context.max_sub = 0;
597 	prescan_context.literal = 0;
598 
599 	/*
600 	 * The optimizer will eliminate code duplication and/or dead code.
601 	 */
602 #define CREATE_MATCHOP_ERROR_RETURN(rval) do { \
603 	if (first_exp) { \
604 	    regfree(first_exp); \
605 	    myfree((void *) first_exp); \
606 	} \
607 	if (prescan_context.literal) \
608 	    myfree(prescan_context.literal); \
609 	return (rval); \
610     } while (0)
611 
612 	if (dict->flags & DICT_FLAG_SRC_RHS_IS_FILE) {
613 	    VSTRING *base64_buf;
614 	    char   *err;
615 
616 	    if ((base64_buf = dict_file_to_b64(dict, p)) == 0) {
617 		err = dict_file_get_error(dict);
618 		msg_warn("regexp map %s, line %d: %s: skipping this rule",
619 			 mapname, lineno, err);
620 		myfree(err);
621 		CREATE_MATCHOP_ERROR_RETURN(0);
622 	    }
623 	    p = vstring_str(base64_buf);
624 	}
625 	if (mac_parse(p, dict_regexp_prescan, (void *) &prescan_context)
626 	    & MAC_PARSE_ERROR) {
627 	    msg_warn("regexp map %s, line %d: bad replacement syntax: "
628 		     "skipping this rule", mapname, lineno);
629 	    CREATE_MATCHOP_ERROR_RETURN(0);
630 	}
631 
632 	/*
633 	 * Compile the primary and the optional secondary pattern. Speed up
634 	 * execution when no matched text needs to be substituted into the
635 	 * result string, or when the highest numbered substring is less than
636 	 * the total number of () subpatterns.
637 	 */
638 	if (prescan_context.max_sub == 0)
639 	    first_pat.options |= REG_NOSUB;
640 	if (prescan_context.max_sub > 0 && first_pat.match == 0) {
641 	    msg_warn("regexp map %s, line %d: $number found in negative match "
642 		   "replacement text: skipping this rule", mapname, lineno);
643 	    CREATE_MATCHOP_ERROR_RETURN(0);
644 	}
645 	if (prescan_context.max_sub > 0 && (dict->flags & DICT_FLAG_NO_REGSUB)) {
646 	    msg_warn("regexp map %s, line %d: "
647 		     "regular expression substitution is not allowed: "
648 		     "skipping this rule", mapname, lineno);
649 	    CREATE_MATCHOP_ERROR_RETURN(0);
650 	}
651 	if ((first_exp = dict_regexp_compile_pat(mapname, lineno,
652 						 &first_pat)) == 0)
653 	    CREATE_MATCHOP_ERROR_RETURN(0);
654 	if (prescan_context.max_sub > first_exp->re_nsub) {
655 	    msg_warn("regexp map %s, line %d: out of range replacement index \"%d\": "
656 		     "skipping this rule", mapname, lineno,
657 		     (int) prescan_context.max_sub);
658 	    CREATE_MATCHOP_ERROR_RETURN(0);
659 	}
660 	if (second_pat.regexp != 0) {
661 	    second_pat.options |= REG_NOSUB;
662 	    if ((second_exp = dict_regexp_compile_pat(mapname, lineno,
663 						      &second_pat)) == 0)
664 		CREATE_MATCHOP_ERROR_RETURN(0);
665 	} else {
666 	    second_exp = 0;
667 	}
668 	match_rule = (DICT_REGEXP_MATCH_RULE *)
669 	    dict_regexp_rule_alloc(DICT_REGEXP_OP_MATCH, lineno,
670 				   sizeof(DICT_REGEXP_MATCH_RULE));
671 	match_rule->first_exp = first_exp;
672 	match_rule->first_match = first_pat.match;
673 	match_rule->max_sub = prescan_context.max_sub;
674 	match_rule->second_exp = second_exp;
675 	match_rule->second_match = second_pat.match;
676 	if (prescan_context.literal)
677 	    match_rule->replacement = prescan_context.literal;
678 	else
679 	    match_rule->replacement = mystrdup(p);
680 	return ((DICT_REGEXP_RULE *) match_rule);
681     }
682 
683     /*
684      * The IF operator takes one pattern but no replacement text.
685      */
686     else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) {
687 	DICT_REGEXP_PATTERN pattern;
688 	regex_t *expr;
689 	DICT_REGEXP_IF_RULE *if_rule;
690 
691 	p += 2;
692 	while (*p && ISSPACE(*p))
693 	    p++;
694 	if (!dict_regexp_get_pat(mapname, lineno, &p, &pattern))
695 	    return (0);
696 	while (*p && ISSPACE(*p))
697 	    ++p;
698 	if (*p) {
699 	    msg_warn("regexp map %s, line %d: ignoring extra text after"
700 		     " IF statement: \"%s\"", mapname, lineno, p);
701 	    msg_warn("regexp map %s, line %d: do not prepend whitespace"
702 		     " to statements between IF and ENDIF", mapname, lineno);
703 	}
704 	if ((expr = dict_regexp_compile_pat(mapname, lineno, &pattern)) == 0)
705 	    return (0);
706 	if_rule = (DICT_REGEXP_IF_RULE *)
707 	    dict_regexp_rule_alloc(DICT_REGEXP_OP_IF, lineno,
708 				   sizeof(DICT_REGEXP_IF_RULE));
709 	if_rule->expr = expr;
710 	if_rule->match = pattern.match;
711 	if_rule->endif_rule = 0;
712 	return ((DICT_REGEXP_RULE *) if_rule);
713     }
714 
715     /*
716      * The ENDIF operator takes no patterns and no replacement text.
717      */
718     else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) {
719 	DICT_REGEXP_RULE *rule;
720 
721 	p += 5;
722 	if (nesting == 0) {
723 	    msg_warn("regexp map %s, line %d: ignoring ENDIF without matching IF",
724 		     mapname, lineno);
725 	    return (0);
726 	}
727 	while (*p && ISSPACE(*p))
728 	    ++p;
729 	if (*p)
730 	    msg_warn("regexp map %s, line %d: ignoring extra text after ENDIF",
731 		     mapname, lineno);
732 	rule = dict_regexp_rule_alloc(DICT_REGEXP_OP_ENDIF, lineno,
733 				      sizeof(DICT_REGEXP_RULE));
734 	return (rule);
735     }
736 
737     /*
738      * Unrecognized input.
739      */
740     else {
741 	msg_warn("regexp map %s, line %d: ignoring unrecognized request",
742 		 mapname, lineno);
743 	return (0);
744     }
745 }
746 
747 /* dict_regexp_open - load and compile a file containing regular expressions */
748 
dict_regexp_open(const char * mapname,int open_flags,int dict_flags)749 DICT   *dict_regexp_open(const char *mapname, int open_flags, int dict_flags)
750 {
751     const char myname[] = "dict_regexp_open";
752     DICT_REGEXP *dict_regexp;
753     VSTREAM *map_fp = 0;
754     struct stat st;
755     VSTRING *why = 0;
756     VSTRING *line_buffer = 0;
757     DICT_REGEXP_RULE *rule;
758     DICT_REGEXP_RULE *last_rule = 0;
759     int     lineno;
760     int     last_line = 0;
761     size_t  max_sub = 0;
762     int     nesting = 0;
763     char   *p;
764     DICT_REGEXP_RULE **rule_stack = 0;
765     MVECT   mvect;
766 
767     /*
768      * Let the optimizer worry about eliminating redundant code.
769      */
770 #define DICT_REGEXP_OPEN_RETURN(d) do { \
771 	DICT *__d = (d); \
772 	if (line_buffer != 0) \
773 	    vstring_free(line_buffer); \
774 	if (map_fp != 0) \
775 	    vstream_fclose(map_fp); \
776 	if (why != 0) \
777 	   vstring_free(why); \
778 	return (__d); \
779     } while (0)
780 
781     /*
782      * Sanity checks.
783      */
784     if (open_flags != O_RDONLY)
785 	DICT_REGEXP_OPEN_RETURN(dict_surrogate(DICT_TYPE_REGEXP,
786 					    mapname, open_flags, dict_flags,
787 				  "%s:%s map requires O_RDONLY access mode",
788 					       DICT_TYPE_REGEXP, mapname));
789 
790     /*
791      * Open the configuration file.
792      */
793     if ((map_fp = dict_stream_open(DICT_TYPE_REGEXP, mapname, O_RDONLY,
794 				   dict_flags, &st, &why)) == 0)
795 	DICT_REGEXP_OPEN_RETURN(dict_surrogate(DICT_TYPE_REGEXP, mapname,
796 					       open_flags, dict_flags,
797 					       "%s", vstring_str(why)));
798     line_buffer = vstring_alloc(100);
799 
800     dict_regexp = (DICT_REGEXP *) dict_alloc(DICT_TYPE_REGEXP, mapname,
801 					     sizeof(*dict_regexp));
802     dict_regexp->dict.lookup = dict_regexp_lookup;
803     dict_regexp->dict.close = dict_regexp_close;
804     dict_regexp->dict.flags = dict_flags | DICT_FLAG_PATTERN;
805     if (dict_flags & DICT_FLAG_FOLD_MUL)
806 	dict_regexp->dict.fold_buf = vstring_alloc(10);
807     dict_regexp->head = 0;
808     dict_regexp->pmatch = 0;
809     dict_regexp->expansion_buf = 0;
810     dict_regexp->dict.owner.uid = st.st_uid;
811     dict_regexp->dict.owner.status = (st.st_uid != 0);
812 
813     /*
814      * Parse the regexp table.
815      */
816     while (readllines(line_buffer, map_fp, &last_line, &lineno)) {
817 	p = vstring_str(line_buffer);
818 	trimblanks(p, 0)[0] = 0;
819 	if (*p == 0)
820 	    continue;
821 	rule = dict_regexp_parseline(&dict_regexp->dict, mapname, lineno,
822 				     p, nesting);
823 	if (rule == 0)
824 	    continue;
825 	if (rule->op == DICT_REGEXP_OP_MATCH) {
826 	    if (((DICT_REGEXP_MATCH_RULE *) rule)->max_sub > max_sub)
827 		max_sub = ((DICT_REGEXP_MATCH_RULE *) rule)->max_sub;
828 	} else if (rule->op == DICT_REGEXP_OP_IF) {
829 	    if (rule_stack == 0)
830 		rule_stack = (DICT_REGEXP_RULE **) mvect_alloc(&mvect,
831 					   sizeof(*rule_stack), nesting + 1,
832 						(MVECT_FN) 0, (MVECT_FN) 0);
833 	    else
834 		rule_stack =
835 		    (DICT_REGEXP_RULE **) mvect_realloc(&mvect, nesting + 1);
836 	    rule_stack[nesting] = rule;
837 	    nesting++;
838 	} else if (rule->op == DICT_REGEXP_OP_ENDIF) {
839 	    DICT_REGEXP_IF_RULE *if_rule;
840 
841 	    if (nesting-- <= 0)
842 		/* Already handled in dict_regexp_parseline(). */
843 		msg_panic("%s: ENDIF without IF", myname);
844 	    if (rule_stack[nesting]->op != DICT_REGEXP_OP_IF)
845 		msg_panic("%s: unexpected rule stack element type %d",
846 			  myname, rule_stack[nesting]->op);
847 	    if_rule = (DICT_REGEXP_IF_RULE *) rule_stack[nesting];
848 	    if_rule->endif_rule = rule;
849 	}
850 	if (last_rule == 0)
851 	    dict_regexp->head = rule;
852 	else
853 	    last_rule->next = rule;
854 	last_rule = rule;
855     }
856 
857     while (nesting-- > 0)
858 	msg_warn("regexp map %s, line %d: IF has no matching ENDIF",
859 		 mapname, rule_stack[nesting]->lineno);
860 
861     if (rule_stack)
862 	(void) mvect_free(&mvect);
863 
864     /*
865      * Allocate space for only as many matched substrings as used in the
866      * replacement text.
867      */
868     if (max_sub > 0)
869 	dict_regexp->pmatch =
870 	    (regmatch_t *) mymalloc(sizeof(regmatch_t) * (max_sub + 1));
871 
872     dict_file_purge_buffers(&dict_regexp->dict);
873     DICT_REGEXP_OPEN_RETURN(DICT_DEBUG (&dict_regexp->dict));
874 }
875 
876 #endif
877