1 /* $NetBSD: dict_regexp.c,v 1.5 2023/12/23 20:30:46 christos Exp $ */
2
3 /*++
4 /* NAME
5 /* dict_regexp 3
6 /* SUMMARY
7 /* dictionary manager interface to REGEXP regular expression library
8 /* SYNOPSIS
9 /* #include <dict_regexp.h>
10 /*
11 /* DICT *dict_regexp_open(name, dummy, dict_flags)
12 /* const char *name;
13 /* int dummy;
14 /* int dict_flags;
15 /* DESCRIPTION
16 /* dict_regexp_open() opens the named file and compiles the contained
17 /* regular expressions. The result object can be used to match strings
18 /* against the table.
19 /* SEE ALSO
20 /* dict(3) generic dictionary manager
21 /* regexp_table(5) regular expression table configuration
22 /* AUTHOR(S)
23 /* LaMont Jones
24 /* lamont@hp.com
25 /*
26 /* Based on PCRE dictionary contributed by Andrew McNamara
27 /* andrewm@connect.com.au
28 /* connect.com.au Pty. Ltd.
29 /* Level 3, 213 Miller St
30 /* North Sydney, NSW, Australia
31 /*
32 /* Heavily rewritten by Wietse Venema
33 /* IBM T.J. Watson Research
34 /* P.O. Box 704
35 /* Yorktown Heights, NY 10598, USA
36 /*
37 /* Wietse Venema
38 /* Google, Inc.
39 /* 111 8th Avenue
40 /* New York, NY 10011, USA
41 /*--*/
42
43 /* System library. */
44
45 #include "sys_defs.h"
46
47 #ifdef HAS_POSIX_REGEXP
48
49 #include <sys/stat.h>
50 #include <stdlib.h>
51 #include <unistd.h>
52 #include <string.h>
53 #include <ctype.h>
54 #include <regex.h>
55 #ifdef STRCASECMP_IN_STRINGS_H
56 #include <strings.h>
57 #endif
58
59 /* Utility library. */
60
61 #include "mymalloc.h"
62 #include "msg.h"
63 #include "safe.h"
64 #include "vstream.h"
65 #include "vstring.h"
66 #include "stringops.h"
67 #include "readlline.h"
68 #include "dict.h"
69 #include "dict_regexp.h"
70 #include "mac_parse.h"
71 #include "warn_stat.h"
72 #include "mvect.h"
73
74 /*
75 * Support for IF/ENDIF based on an idea by Bert Driehuis.
76 */
77 #define DICT_REGEXP_OP_MATCH 1 /* Match this regexp */
78 #define DICT_REGEXP_OP_IF 2 /* Increase if/endif nesting on match */
79 #define DICT_REGEXP_OP_ENDIF 3 /* Decrease if/endif nesting on match */
80
81 /*
82 * Regular expression before compiling.
83 */
84 typedef struct {
85 char *regexp; /* regular expression */
86 int options; /* regcomp() options */
87 int match; /* positive or negative match */
88 } DICT_REGEXP_PATTERN;
89
90 /*
91 * Compiled generic rule, and subclasses that derive from it.
92 */
93 typedef struct DICT_REGEXP_RULE {
94 int op; /* DICT_REGEXP_OP_MATCH/IF/ENDIF */
95 int lineno; /* source file line number */
96 struct DICT_REGEXP_RULE *next; /* next rule in dict */
97 } DICT_REGEXP_RULE;
98
99 typedef struct {
100 DICT_REGEXP_RULE rule; /* generic part */
101 regex_t *first_exp; /* compiled primary pattern */
102 int first_match; /* positive or negative match */
103 regex_t *second_exp; /* compiled secondary pattern */
104 int second_match; /* positive or negative match */
105 char *replacement; /* replacement text */
106 size_t max_sub; /* largest $number in replacement */
107 } DICT_REGEXP_MATCH_RULE;
108
109 typedef struct {
110 DICT_REGEXP_RULE rule; /* generic members */
111 regex_t *expr; /* the condition */
112 int match; /* positive or negative match */
113 struct DICT_REGEXP_RULE *endif_rule;/* matching endif rule */
114 } DICT_REGEXP_IF_RULE;
115
116 /*
117 * Regexp map.
118 */
119 typedef struct {
120 DICT dict; /* generic members */
121 regmatch_t *pmatch; /* matched substring info */
122 DICT_REGEXP_RULE *head; /* first rule */
123 VSTRING *expansion_buf; /* lookup result */
124 } DICT_REGEXP;
125
126 /*
127 * Macros to make dense code more readable.
128 */
129 #define NULL_SUBSTITUTIONS (0)
130 #define NULL_MATCH_RESULT ((regmatch_t *) 0)
131
132 /*
133 * Context for $number expansion callback.
134 */
135 typedef struct {
136 DICT_REGEXP *dict_regexp; /* the dictionary handle */
137 DICT_REGEXP_MATCH_RULE *match_rule; /* the rule we matched */
138 const char *lookup_string; /* matched text */
139 } DICT_REGEXP_EXPAND_CONTEXT;
140
141 /*
142 * Context for $number pre-scan callback.
143 */
144 typedef struct {
145 const char *mapname; /* name of regexp map */
146 int lineno; /* where in file */
147 size_t max_sub; /* largest $number seen */
148 char *literal; /* constant result, $$ -> $ */
149 } DICT_REGEXP_PRESCAN_CONTEXT;
150
151 /*
152 * Compatibility.
153 */
154 #ifndef MAC_PARSE_OK
155 #define MAC_PARSE_OK 0
156 #endif
157
158 /* dict_regexp_expand - replace $number with substring from matched text */
159
dict_regexp_expand(int type,VSTRING * buf,void * ptr)160 static int dict_regexp_expand(int type, VSTRING *buf, void *ptr)
161 {
162 DICT_REGEXP_EXPAND_CONTEXT *ctxt = (DICT_REGEXP_EXPAND_CONTEXT *) ptr;
163 DICT_REGEXP_MATCH_RULE *match_rule = ctxt->match_rule;
164 DICT_REGEXP *dict_regexp = ctxt->dict_regexp;
165 regmatch_t *pmatch;
166 size_t n;
167
168 /*
169 * Replace $number by the corresponding substring from the matched text.
170 * We pre-scanned the replacement text at compile time, so any out of
171 * range $number means that something impossible has happened.
172 */
173 if (type == MAC_PARSE_VARNAME) {
174 n = atoi(vstring_str(buf));
175 if (n < 1 || n > match_rule->max_sub)
176 msg_panic("regexp map %s, line %d: out of range replacement index \"%s\"",
177 dict_regexp->dict.name, match_rule->rule.lineno,
178 vstring_str(buf));
179 pmatch = dict_regexp->pmatch + n;
180 if (pmatch->rm_so < 0 || pmatch->rm_so == pmatch->rm_eo)
181 return (MAC_PARSE_UNDEF); /* empty or not matched */
182 vstring_strncat(dict_regexp->expansion_buf,
183 ctxt->lookup_string + pmatch->rm_so,
184 pmatch->rm_eo - pmatch->rm_so);
185 return (MAC_PARSE_OK);
186 }
187
188 /*
189 * Straight text - duplicate with no substitution.
190 */
191 else {
192 vstring_strcat(dict_regexp->expansion_buf, vstring_str(buf));
193 return (MAC_PARSE_OK);
194 }
195 }
196
197 /* dict_regexp_regerror - report regexp compile/execute error */
198
dict_regexp_regerror(const char * mapname,int lineno,int error,const regex_t * expr)199 static void dict_regexp_regerror(const char *mapname, int lineno, int error,
200 const regex_t *expr)
201 {
202 char errbuf[256];
203
204 (void) regerror(error, expr, errbuf, sizeof(errbuf));
205 msg_warn("regexp map %s, line %d: %s", mapname, lineno, errbuf);
206 }
207
208 /*
209 * Inlined to reduce function call overhead in the time-critical loop.
210 */
211 #define DICT_REGEXP_REGEXEC(err, map, line, expr, match, str, nsub, pmatch) \
212 ((err) = regexec((expr), (str), (nsub), (pmatch), 0), \
213 ((err) == REG_NOMATCH ? !(match) : \
214 (err) == 0 ? (match) : \
215 (dict_regexp_regerror((map), (line), (err), (expr)), 0)))
216
217 /* dict_regexp_lookup - match string and perform optional substitution */
218
dict_regexp_lookup(DICT * dict,const char * lookup_string)219 static const char *dict_regexp_lookup(DICT *dict, const char *lookup_string)
220 {
221 DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict;
222 DICT_REGEXP_RULE *rule;
223 DICT_REGEXP_IF_RULE *if_rule;
224 DICT_REGEXP_MATCH_RULE *match_rule;
225 DICT_REGEXP_EXPAND_CONTEXT expand_context;
226 int error;
227
228 dict->error = 0;
229
230 if (msg_verbose)
231 msg_info("dict_regexp_lookup: %s: %s", dict->name, lookup_string);
232
233 /*
234 * Optionally fold the key.
235 */
236 if (dict->flags & DICT_FLAG_FOLD_MUL) {
237 if (dict->fold_buf == 0)
238 dict->fold_buf = vstring_alloc(10);
239 vstring_strcpy(dict->fold_buf, lookup_string);
240 lookup_string = lowercase(vstring_str(dict->fold_buf));
241 }
242 for (rule = dict_regexp->head; rule; rule = rule->next) {
243
244 switch (rule->op) {
245
246 /*
247 * Search for the first matching primary expression. Limit the
248 * overhead for substring substitution to the bare minimum.
249 */
250 case DICT_REGEXP_OP_MATCH:
251 match_rule = (DICT_REGEXP_MATCH_RULE *) rule;
252 if (!DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
253 match_rule->first_exp,
254 match_rule->first_match,
255 lookup_string,
256 match_rule->max_sub > 0 ?
257 match_rule->max_sub + 1 : 0,
258 dict_regexp->pmatch))
259 continue;
260 if (match_rule->second_exp
261 && !DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
262 match_rule->second_exp,
263 match_rule->second_match,
264 lookup_string,
265 NULL_SUBSTITUTIONS,
266 NULL_MATCH_RESULT))
267 continue;
268
269 /*
270 * Skip $number substitutions when the replacement text contains
271 * no $number strings, as learned during the compile time
272 * pre-scan. The pre-scan already replaced $$ by $.
273 */
274 if (match_rule->max_sub == 0)
275 return (match_rule->replacement);
276
277 /*
278 * Perform $number substitutions on the replacement text. We
279 * pre-scanned the replacement text at compile time. Any macro
280 * expansion errors at this point mean something impossible has
281 * happened.
282 */
283 if (!dict_regexp->expansion_buf)
284 dict_regexp->expansion_buf = vstring_alloc(10);
285 VSTRING_RESET(dict_regexp->expansion_buf);
286 expand_context.lookup_string = lookup_string;
287 expand_context.match_rule = match_rule;
288 expand_context.dict_regexp = dict_regexp;
289
290 if (mac_parse(match_rule->replacement, dict_regexp_expand,
291 (void *) &expand_context) & MAC_PARSE_ERROR)
292 msg_panic("regexp map %s, line %d: bad replacement syntax",
293 dict->name, rule->lineno);
294 VSTRING_TERMINATE(dict_regexp->expansion_buf);
295 return (vstring_str(dict_regexp->expansion_buf));
296
297 /*
298 * Conditional.
299 */
300 case DICT_REGEXP_OP_IF:
301 if_rule = (DICT_REGEXP_IF_RULE *) rule;
302 if (DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
303 if_rule->expr, if_rule->match, lookup_string,
304 NULL_SUBSTITUTIONS, NULL_MATCH_RESULT))
305 continue;
306 /* An IF without matching ENDIF has no "endif" rule. */
307 if ((rule = if_rule->endif_rule) == 0)
308 return (0);
309 /* FALLTHROUGH */
310
311 /*
312 * ENDIF after IF.
313 */
314 case DICT_REGEXP_OP_ENDIF:
315 continue;
316
317 default:
318 msg_panic("dict_regexp_lookup: impossible operation %d", rule->op);
319 }
320 }
321 return (0);
322 }
323
324 /* dict_regexp_close - close regexp dictionary */
325
dict_regexp_close(DICT * dict)326 static void dict_regexp_close(DICT *dict)
327 {
328 DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict;
329 DICT_REGEXP_RULE *rule;
330 DICT_REGEXP_RULE *next;
331 DICT_REGEXP_MATCH_RULE *match_rule;
332 DICT_REGEXP_IF_RULE *if_rule;
333
334 for (rule = dict_regexp->head; rule; rule = next) {
335 next = rule->next;
336 switch (rule->op) {
337 case DICT_REGEXP_OP_MATCH:
338 match_rule = (DICT_REGEXP_MATCH_RULE *) rule;
339 if (match_rule->first_exp) {
340 regfree(match_rule->first_exp);
341 myfree((void *) match_rule->first_exp);
342 }
343 if (match_rule->second_exp) {
344 regfree(match_rule->second_exp);
345 myfree((void *) match_rule->second_exp);
346 }
347 if (match_rule->replacement)
348 myfree((void *) match_rule->replacement);
349 break;
350 case DICT_REGEXP_OP_IF:
351 if_rule = (DICT_REGEXP_IF_RULE *) rule;
352 if (if_rule->expr) {
353 regfree(if_rule->expr);
354 myfree((void *) if_rule->expr);
355 }
356 break;
357 case DICT_REGEXP_OP_ENDIF:
358 break;
359 default:
360 msg_panic("dict_regexp_close: unknown operation %d", rule->op);
361 }
362 myfree((void *) rule);
363 }
364 if (dict_regexp->pmatch)
365 myfree((void *) dict_regexp->pmatch);
366 if (dict_regexp->expansion_buf)
367 vstring_free(dict_regexp->expansion_buf);
368 if (dict->fold_buf)
369 vstring_free(dict->fold_buf);
370 dict_free(dict);
371 }
372
373 /* dict_regexp_get_pat - extract one pattern with options from rule */
374
dict_regexp_get_pat(const char * mapname,int lineno,char ** bufp,DICT_REGEXP_PATTERN * pat)375 static int dict_regexp_get_pat(const char *mapname, int lineno, char **bufp,
376 DICT_REGEXP_PATTERN *pat)
377 {
378 char *p = *bufp;
379 char re_delim;
380
381 /*
382 * Process negation operators.
383 */
384 pat->match = 1;
385 for (;;) {
386 if (*p == '!')
387 pat->match = !pat->match;
388 else if (!ISSPACE(*p))
389 break;
390 p++;
391 }
392 if (*p == 0) {
393 msg_warn("regexp map %s, line %d: no regexp: skipping this rule",
394 mapname, lineno);
395 return (0);
396 }
397
398 /*
399 * Search for the closing delimiter, handling backslash escape.
400 */
401 re_delim = *p++;
402 pat->regexp = p;
403 while (*p) {
404 if (*p == '\\') {
405 if (p[1])
406 p++;
407 else
408 break;
409 } else if (*p == re_delim) {
410 break;
411 }
412 ++p;
413 }
414 if (!*p) {
415 msg_warn("regexp map %s, line %d: no closing regexp delimiter \"%c\": "
416 "skipping this rule", mapname, lineno, re_delim);
417 return (0);
418 }
419 *p++ = 0; /* null terminate */
420
421 /*
422 * Search for options.
423 */
424 pat->options = REG_EXTENDED | REG_ICASE;
425 while (*p && !ISSPACE(*p) && *p != '!') {
426 switch (*p) {
427 case 'i':
428 pat->options ^= REG_ICASE;
429 break;
430 case 'm':
431 pat->options ^= REG_NEWLINE;
432 break;
433 case 'x':
434 pat->options ^= REG_EXTENDED;
435 break;
436 default:
437 msg_warn("regexp map %s, line %d: unknown regexp option \"%c\": "
438 "skipping this rule", mapname, lineno, *p);
439 return (0);
440 }
441 ++p;
442 }
443 *bufp = p;
444 return (1);
445 }
446
447 /* dict_regexp_get_pats - get the primary and second patterns and flags */
448
dict_regexp_get_pats(const char * mapname,int lineno,char ** p,DICT_REGEXP_PATTERN * first_pat,DICT_REGEXP_PATTERN * second_pat)449 static int dict_regexp_get_pats(const char *mapname, int lineno, char **p,
450 DICT_REGEXP_PATTERN *first_pat,
451 DICT_REGEXP_PATTERN *second_pat)
452 {
453
454 /*
455 * Get the primary and optional secondary patterns and their flags.
456 */
457 if (dict_regexp_get_pat(mapname, lineno, p, first_pat) == 0)
458 return (0);
459 if (**p == '!') {
460 #if 0
461 static int bitrot_warned = 0;
462
463 if (bitrot_warned == 0) {
464 msg_warn("regexp file %s, line %d: /pattern1/!/pattern2/ goes away,"
465 " use \"if !/pattern2/ ... /pattern1/ ... endif\" instead",
466 mapname, lineno);
467 bitrot_warned = 1;
468 }
469 #endif
470 if (dict_regexp_get_pat(mapname, lineno, p, second_pat) == 0)
471 return (0);
472 } else {
473 second_pat->regexp = 0;
474 }
475 return (1);
476 }
477
478 /* dict_regexp_prescan - find largest $number in replacement text */
479
dict_regexp_prescan(int type,VSTRING * buf,void * context)480 static int dict_regexp_prescan(int type, VSTRING *buf, void *context)
481 {
482 DICT_REGEXP_PRESCAN_CONTEXT *ctxt = (DICT_REGEXP_PRESCAN_CONTEXT *) context;
483 size_t n;
484
485 /*
486 * Keep a copy of literal text (with $$ already replaced by $) if and
487 * only if the replacement text contains no $number expression. This way
488 * we can avoid having to scan the replacement text at lookup time.
489 */
490 if (type == MAC_PARSE_VARNAME) {
491 if (ctxt->literal) {
492 myfree(ctxt->literal);
493 ctxt->literal = 0;
494 }
495 if (!alldig(vstring_str(buf))) {
496 msg_warn("regexp map %s, line %d: non-numeric replacement index \"%s\"",
497 ctxt->mapname, ctxt->lineno, vstring_str(buf));
498 return (MAC_PARSE_ERROR);
499 }
500 n = atoi(vstring_str(buf));
501 if (n < 1) {
502 msg_warn("regexp map %s, line %d: out-of-range replacement index \"%s\"",
503 ctxt->mapname, ctxt->lineno, vstring_str(buf));
504 return (MAC_PARSE_ERROR);
505 }
506 if (n > ctxt->max_sub)
507 ctxt->max_sub = n;
508 } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) {
509 if (ctxt->literal)
510 msg_panic("regexp map %s, line %d: multiple literals but no $number",
511 ctxt->mapname, ctxt->lineno);
512 ctxt->literal = mystrdup(vstring_str(buf));
513 }
514 return (MAC_PARSE_OK);
515 }
516
517 /* dict_regexp_compile_pat - compile one pattern */
518
dict_regexp_compile_pat(const char * mapname,int lineno,DICT_REGEXP_PATTERN * pat)519 static regex_t *dict_regexp_compile_pat(const char *mapname, int lineno,
520 DICT_REGEXP_PATTERN *pat)
521 {
522 int error;
523 regex_t *expr;
524
525 expr = (regex_t *) mymalloc(sizeof(*expr));
526 error = regcomp(expr, pat->regexp, pat->options);
527 if (error != 0) {
528 dict_regexp_regerror(mapname, lineno, error, expr);
529 myfree((void *) expr);
530 return (0);
531 }
532 return (expr);
533 }
534
535 /* dict_regexp_rule_alloc - fill in a generic rule structure */
536
dict_regexp_rule_alloc(int op,int lineno,size_t size)537 static DICT_REGEXP_RULE *dict_regexp_rule_alloc(int op, int lineno, size_t size)
538 {
539 DICT_REGEXP_RULE *rule;
540
541 rule = (DICT_REGEXP_RULE *) mymalloc(size);
542 rule->op = op;
543 rule->lineno = lineno;
544 rule->next = 0;
545
546 return (rule);
547 }
548
549 /* dict_regexp_parseline - parse one rule */
550
dict_regexp_parseline(DICT * dict,const char * mapname,int lineno,char * line,int nesting)551 static DICT_REGEXP_RULE *dict_regexp_parseline(DICT *dict, const char *mapname,
552 int lineno, char *line,
553 int nesting)
554 {
555 char *p;
556
557 p = line;
558
559 /*
560 * An ordinary rule takes one or two patterns and replacement text.
561 */
562 if (!ISALNUM(*p)) {
563 DICT_REGEXP_PATTERN first_pat;
564 DICT_REGEXP_PATTERN second_pat;
565 DICT_REGEXP_PRESCAN_CONTEXT prescan_context;
566 regex_t *first_exp = 0;
567 regex_t *second_exp;
568 DICT_REGEXP_MATCH_RULE *match_rule;
569
570 /*
571 * Get the primary and the optional secondary patterns.
572 */
573 if (!dict_regexp_get_pats(mapname, lineno, &p, &first_pat, &second_pat))
574 return (0);
575
576 /*
577 * Get the replacement text.
578 */
579 while (*p && ISSPACE(*p))
580 ++p;
581 if (!*p) {
582 msg_warn("regexp map %s, line %d: no replacement text: "
583 "using empty string", mapname, lineno);
584 }
585
586 /*
587 * Find the highest-numbered $number in the replacement text. We can
588 * speed up pattern matching 1) by passing hints to the regexp
589 * compiler, setting the REG_NOSUB flag when the replacement text
590 * contains no $number string; 2) by passing hints to the regexp
591 * execution code, limiting the amount of text that is made available
592 * for substitution.
593 */
594 prescan_context.mapname = mapname;
595 prescan_context.lineno = lineno;
596 prescan_context.max_sub = 0;
597 prescan_context.literal = 0;
598
599 /*
600 * The optimizer will eliminate code duplication and/or dead code.
601 */
602 #define CREATE_MATCHOP_ERROR_RETURN(rval) do { \
603 if (first_exp) { \
604 regfree(first_exp); \
605 myfree((void *) first_exp); \
606 } \
607 if (prescan_context.literal) \
608 myfree(prescan_context.literal); \
609 return (rval); \
610 } while (0)
611
612 if (dict->flags & DICT_FLAG_SRC_RHS_IS_FILE) {
613 VSTRING *base64_buf;
614 char *err;
615
616 if ((base64_buf = dict_file_to_b64(dict, p)) == 0) {
617 err = dict_file_get_error(dict);
618 msg_warn("regexp map %s, line %d: %s: skipping this rule",
619 mapname, lineno, err);
620 myfree(err);
621 CREATE_MATCHOP_ERROR_RETURN(0);
622 }
623 p = vstring_str(base64_buf);
624 }
625 if (mac_parse(p, dict_regexp_prescan, (void *) &prescan_context)
626 & MAC_PARSE_ERROR) {
627 msg_warn("regexp map %s, line %d: bad replacement syntax: "
628 "skipping this rule", mapname, lineno);
629 CREATE_MATCHOP_ERROR_RETURN(0);
630 }
631
632 /*
633 * Compile the primary and the optional secondary pattern. Speed up
634 * execution when no matched text needs to be substituted into the
635 * result string, or when the highest numbered substring is less than
636 * the total number of () subpatterns.
637 */
638 if (prescan_context.max_sub == 0)
639 first_pat.options |= REG_NOSUB;
640 if (prescan_context.max_sub > 0 && first_pat.match == 0) {
641 msg_warn("regexp map %s, line %d: $number found in negative match "
642 "replacement text: skipping this rule", mapname, lineno);
643 CREATE_MATCHOP_ERROR_RETURN(0);
644 }
645 if (prescan_context.max_sub > 0 && (dict->flags & DICT_FLAG_NO_REGSUB)) {
646 msg_warn("regexp map %s, line %d: "
647 "regular expression substitution is not allowed: "
648 "skipping this rule", mapname, lineno);
649 CREATE_MATCHOP_ERROR_RETURN(0);
650 }
651 if ((first_exp = dict_regexp_compile_pat(mapname, lineno,
652 &first_pat)) == 0)
653 CREATE_MATCHOP_ERROR_RETURN(0);
654 if (prescan_context.max_sub > first_exp->re_nsub) {
655 msg_warn("regexp map %s, line %d: out of range replacement index \"%d\": "
656 "skipping this rule", mapname, lineno,
657 (int) prescan_context.max_sub);
658 CREATE_MATCHOP_ERROR_RETURN(0);
659 }
660 if (second_pat.regexp != 0) {
661 second_pat.options |= REG_NOSUB;
662 if ((second_exp = dict_regexp_compile_pat(mapname, lineno,
663 &second_pat)) == 0)
664 CREATE_MATCHOP_ERROR_RETURN(0);
665 } else {
666 second_exp = 0;
667 }
668 match_rule = (DICT_REGEXP_MATCH_RULE *)
669 dict_regexp_rule_alloc(DICT_REGEXP_OP_MATCH, lineno,
670 sizeof(DICT_REGEXP_MATCH_RULE));
671 match_rule->first_exp = first_exp;
672 match_rule->first_match = first_pat.match;
673 match_rule->max_sub = prescan_context.max_sub;
674 match_rule->second_exp = second_exp;
675 match_rule->second_match = second_pat.match;
676 if (prescan_context.literal)
677 match_rule->replacement = prescan_context.literal;
678 else
679 match_rule->replacement = mystrdup(p);
680 return ((DICT_REGEXP_RULE *) match_rule);
681 }
682
683 /*
684 * The IF operator takes one pattern but no replacement text.
685 */
686 else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) {
687 DICT_REGEXP_PATTERN pattern;
688 regex_t *expr;
689 DICT_REGEXP_IF_RULE *if_rule;
690
691 p += 2;
692 while (*p && ISSPACE(*p))
693 p++;
694 if (!dict_regexp_get_pat(mapname, lineno, &p, &pattern))
695 return (0);
696 while (*p && ISSPACE(*p))
697 ++p;
698 if (*p) {
699 msg_warn("regexp map %s, line %d: ignoring extra text after"
700 " IF statement: \"%s\"", mapname, lineno, p);
701 msg_warn("regexp map %s, line %d: do not prepend whitespace"
702 " to statements between IF and ENDIF", mapname, lineno);
703 }
704 if ((expr = dict_regexp_compile_pat(mapname, lineno, &pattern)) == 0)
705 return (0);
706 if_rule = (DICT_REGEXP_IF_RULE *)
707 dict_regexp_rule_alloc(DICT_REGEXP_OP_IF, lineno,
708 sizeof(DICT_REGEXP_IF_RULE));
709 if_rule->expr = expr;
710 if_rule->match = pattern.match;
711 if_rule->endif_rule = 0;
712 return ((DICT_REGEXP_RULE *) if_rule);
713 }
714
715 /*
716 * The ENDIF operator takes no patterns and no replacement text.
717 */
718 else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) {
719 DICT_REGEXP_RULE *rule;
720
721 p += 5;
722 if (nesting == 0) {
723 msg_warn("regexp map %s, line %d: ignoring ENDIF without matching IF",
724 mapname, lineno);
725 return (0);
726 }
727 while (*p && ISSPACE(*p))
728 ++p;
729 if (*p)
730 msg_warn("regexp map %s, line %d: ignoring extra text after ENDIF",
731 mapname, lineno);
732 rule = dict_regexp_rule_alloc(DICT_REGEXP_OP_ENDIF, lineno,
733 sizeof(DICT_REGEXP_RULE));
734 return (rule);
735 }
736
737 /*
738 * Unrecognized input.
739 */
740 else {
741 msg_warn("regexp map %s, line %d: ignoring unrecognized request",
742 mapname, lineno);
743 return (0);
744 }
745 }
746
747 /* dict_regexp_open - load and compile a file containing regular expressions */
748
dict_regexp_open(const char * mapname,int open_flags,int dict_flags)749 DICT *dict_regexp_open(const char *mapname, int open_flags, int dict_flags)
750 {
751 const char myname[] = "dict_regexp_open";
752 DICT_REGEXP *dict_regexp;
753 VSTREAM *map_fp = 0;
754 struct stat st;
755 VSTRING *why = 0;
756 VSTRING *line_buffer = 0;
757 DICT_REGEXP_RULE *rule;
758 DICT_REGEXP_RULE *last_rule = 0;
759 int lineno;
760 int last_line = 0;
761 size_t max_sub = 0;
762 int nesting = 0;
763 char *p;
764 DICT_REGEXP_RULE **rule_stack = 0;
765 MVECT mvect;
766
767 /*
768 * Let the optimizer worry about eliminating redundant code.
769 */
770 #define DICT_REGEXP_OPEN_RETURN(d) do { \
771 DICT *__d = (d); \
772 if (line_buffer != 0) \
773 vstring_free(line_buffer); \
774 if (map_fp != 0) \
775 vstream_fclose(map_fp); \
776 if (why != 0) \
777 vstring_free(why); \
778 return (__d); \
779 } while (0)
780
781 /*
782 * Sanity checks.
783 */
784 if (open_flags != O_RDONLY)
785 DICT_REGEXP_OPEN_RETURN(dict_surrogate(DICT_TYPE_REGEXP,
786 mapname, open_flags, dict_flags,
787 "%s:%s map requires O_RDONLY access mode",
788 DICT_TYPE_REGEXP, mapname));
789
790 /*
791 * Open the configuration file.
792 */
793 if ((map_fp = dict_stream_open(DICT_TYPE_REGEXP, mapname, O_RDONLY,
794 dict_flags, &st, &why)) == 0)
795 DICT_REGEXP_OPEN_RETURN(dict_surrogate(DICT_TYPE_REGEXP, mapname,
796 open_flags, dict_flags,
797 "%s", vstring_str(why)));
798 line_buffer = vstring_alloc(100);
799
800 dict_regexp = (DICT_REGEXP *) dict_alloc(DICT_TYPE_REGEXP, mapname,
801 sizeof(*dict_regexp));
802 dict_regexp->dict.lookup = dict_regexp_lookup;
803 dict_regexp->dict.close = dict_regexp_close;
804 dict_regexp->dict.flags = dict_flags | DICT_FLAG_PATTERN;
805 if (dict_flags & DICT_FLAG_FOLD_MUL)
806 dict_regexp->dict.fold_buf = vstring_alloc(10);
807 dict_regexp->head = 0;
808 dict_regexp->pmatch = 0;
809 dict_regexp->expansion_buf = 0;
810 dict_regexp->dict.owner.uid = st.st_uid;
811 dict_regexp->dict.owner.status = (st.st_uid != 0);
812
813 /*
814 * Parse the regexp table.
815 */
816 while (readllines(line_buffer, map_fp, &last_line, &lineno)) {
817 p = vstring_str(line_buffer);
818 trimblanks(p, 0)[0] = 0;
819 if (*p == 0)
820 continue;
821 rule = dict_regexp_parseline(&dict_regexp->dict, mapname, lineno,
822 p, nesting);
823 if (rule == 0)
824 continue;
825 if (rule->op == DICT_REGEXP_OP_MATCH) {
826 if (((DICT_REGEXP_MATCH_RULE *) rule)->max_sub > max_sub)
827 max_sub = ((DICT_REGEXP_MATCH_RULE *) rule)->max_sub;
828 } else if (rule->op == DICT_REGEXP_OP_IF) {
829 if (rule_stack == 0)
830 rule_stack = (DICT_REGEXP_RULE **) mvect_alloc(&mvect,
831 sizeof(*rule_stack), nesting + 1,
832 (MVECT_FN) 0, (MVECT_FN) 0);
833 else
834 rule_stack =
835 (DICT_REGEXP_RULE **) mvect_realloc(&mvect, nesting + 1);
836 rule_stack[nesting] = rule;
837 nesting++;
838 } else if (rule->op == DICT_REGEXP_OP_ENDIF) {
839 DICT_REGEXP_IF_RULE *if_rule;
840
841 if (nesting-- <= 0)
842 /* Already handled in dict_regexp_parseline(). */
843 msg_panic("%s: ENDIF without IF", myname);
844 if (rule_stack[nesting]->op != DICT_REGEXP_OP_IF)
845 msg_panic("%s: unexpected rule stack element type %d",
846 myname, rule_stack[nesting]->op);
847 if_rule = (DICT_REGEXP_IF_RULE *) rule_stack[nesting];
848 if_rule->endif_rule = rule;
849 }
850 if (last_rule == 0)
851 dict_regexp->head = rule;
852 else
853 last_rule->next = rule;
854 last_rule = rule;
855 }
856
857 while (nesting-- > 0)
858 msg_warn("regexp map %s, line %d: IF has no matching ENDIF",
859 mapname, rule_stack[nesting]->lineno);
860
861 if (rule_stack)
862 (void) mvect_free(&mvect);
863
864 /*
865 * Allocate space for only as many matched substrings as used in the
866 * replacement text.
867 */
868 if (max_sub > 0)
869 dict_regexp->pmatch =
870 (regmatch_t *) mymalloc(sizeof(regmatch_t) * (max_sub + 1));
871
872 dict_file_purge_buffers(&dict_regexp->dict);
873 DICT_REGEXP_OPEN_RETURN(DICT_DEBUG (&dict_regexp->dict));
874 }
875
876 #endif
877