1 /* $NetBSD: dict_regexp.c,v 1.3 2020/03/18 19:05:21 christos Exp $ */ 2 3 /*++ 4 /* NAME 5 /* dict_regexp 3 6 /* SUMMARY 7 /* dictionary manager interface to REGEXP regular expression library 8 /* SYNOPSIS 9 /* #include <dict_regexp.h> 10 /* 11 /* DICT *dict_regexp_open(name, dummy, dict_flags) 12 /* const char *name; 13 /* int dummy; 14 /* int dict_flags; 15 /* DESCRIPTION 16 /* dict_regexp_open() opens the named file and compiles the contained 17 /* regular expressions. The result object can be used to match strings 18 /* against the table. 19 /* SEE ALSO 20 /* dict(3) generic dictionary manager 21 /* regexp_table(5) format of Postfix regular expression tables 22 /* AUTHOR(S) 23 /* LaMont Jones 24 /* lamont@hp.com 25 /* 26 /* Based on PCRE dictionary contributed by Andrew McNamara 27 /* andrewm@connect.com.au 28 /* connect.com.au Pty. Ltd. 29 /* Level 3, 213 Miller St 30 /* North Sydney, NSW, Australia 31 /* 32 /* Heavily rewritten by Wietse Venema 33 /* IBM T.J. Watson Research 34 /* P.O. Box 704 35 /* Yorktown Heights, NY 10598, USA 36 /* 37 /* Wietse Venema 38 /* Google, Inc. 39 /* 111 8th Avenue 40 /* New York, NY 10011, USA 41 /*--*/ 42 43 /* System library. */ 44 45 #include "sys_defs.h" 46 47 #ifdef HAS_POSIX_REGEXP 48 49 #include <sys/stat.h> 50 #include <stdlib.h> 51 #include <unistd.h> 52 #include <string.h> 53 #include <ctype.h> 54 #include <regex.h> 55 #ifdef STRCASECMP_IN_STRINGS_H 56 #include <strings.h> 57 #endif 58 59 /* Utility library. */ 60 61 #include "mymalloc.h" 62 #include "msg.h" 63 #include "safe.h" 64 #include "vstream.h" 65 #include "vstring.h" 66 #include "stringops.h" 67 #include "readlline.h" 68 #include "dict.h" 69 #include "dict_regexp.h" 70 #include "mac_parse.h" 71 #include "warn_stat.h" 72 #include "mvect.h" 73 74 /* 75 * Support for IF/ENDIF based on an idea by Bert Driehuis. 76 */ 77 #define DICT_REGEXP_OP_MATCH 1 /* Match this regexp */ 78 #define DICT_REGEXP_OP_IF 2 /* Increase if/endif nesting on match */ 79 #define DICT_REGEXP_OP_ENDIF 3 /* Decrease if/endif nesting on match */ 80 81 /* 82 * Regular expression before compiling. 83 */ 84 typedef struct { 85 char *regexp; /* regular expression */ 86 int options; /* regcomp() options */ 87 int match; /* positive or negative match */ 88 } DICT_REGEXP_PATTERN; 89 90 /* 91 * Compiled generic rule, and subclasses that derive from it. 92 */ 93 typedef struct DICT_REGEXP_RULE { 94 int op; /* DICT_REGEXP_OP_MATCH/IF/ENDIF */ 95 int lineno; /* source file line number */ 96 struct DICT_REGEXP_RULE *next; /* next rule in dict */ 97 } DICT_REGEXP_RULE; 98 99 typedef struct { 100 DICT_REGEXP_RULE rule; /* generic part */ 101 regex_t *first_exp; /* compiled primary pattern */ 102 int first_match; /* positive or negative match */ 103 regex_t *second_exp; /* compiled secondary pattern */ 104 int second_match; /* positive or negative match */ 105 char *replacement; /* replacement text */ 106 size_t max_sub; /* largest $number in replacement */ 107 } DICT_REGEXP_MATCH_RULE; 108 109 typedef struct { 110 DICT_REGEXP_RULE rule; /* generic members */ 111 regex_t *expr; /* the condition */ 112 int match; /* positive or negative match */ 113 struct DICT_REGEXP_RULE *endif_rule;/* matching endif rule */ 114 } DICT_REGEXP_IF_RULE; 115 116 /* 117 * Regexp map. 118 */ 119 typedef struct { 120 DICT dict; /* generic members */ 121 regmatch_t *pmatch; /* matched substring info */ 122 DICT_REGEXP_RULE *head; /* first rule */ 123 VSTRING *expansion_buf; /* lookup result */ 124 } DICT_REGEXP; 125 126 /* 127 * Macros to make dense code more readable. 128 */ 129 #define NULL_SUBSTITUTIONS (0) 130 #define NULL_MATCH_RESULT ((regmatch_t *) 0) 131 132 /* 133 * Context for $number expansion callback. 134 */ 135 typedef struct { 136 DICT_REGEXP *dict_regexp; /* the dictionary handle */ 137 DICT_REGEXP_MATCH_RULE *match_rule; /* the rule we matched */ 138 const char *lookup_string; /* matched text */ 139 } DICT_REGEXP_EXPAND_CONTEXT; 140 141 /* 142 * Context for $number pre-scan callback. 143 */ 144 typedef struct { 145 const char *mapname; /* name of regexp map */ 146 int lineno; /* where in file */ 147 size_t max_sub; /* largest $number seen */ 148 char *literal; /* constant result, $$ -> $ */ 149 } DICT_REGEXP_PRESCAN_CONTEXT; 150 151 /* 152 * Compatibility. 153 */ 154 #ifndef MAC_PARSE_OK 155 #define MAC_PARSE_OK 0 156 #endif 157 158 /* dict_regexp_expand - replace $number with substring from matched text */ 159 160 static int dict_regexp_expand(int type, VSTRING *buf, void *ptr) 161 { 162 DICT_REGEXP_EXPAND_CONTEXT *ctxt = (DICT_REGEXP_EXPAND_CONTEXT *) ptr; 163 DICT_REGEXP_MATCH_RULE *match_rule = ctxt->match_rule; 164 DICT_REGEXP *dict_regexp = ctxt->dict_regexp; 165 regmatch_t *pmatch; 166 size_t n; 167 168 /* 169 * Replace $number by the corresponding substring from the matched text. 170 * We pre-scanned the replacement text at compile time, so any out of 171 * range $number means that something impossible has happened. 172 */ 173 if (type == MAC_PARSE_VARNAME) { 174 n = atoi(vstring_str(buf)); 175 if (n < 1 || n > match_rule->max_sub) 176 msg_panic("regexp map %s, line %d: out of range replacement index \"%s\"", 177 dict_regexp->dict.name, match_rule->rule.lineno, 178 vstring_str(buf)); 179 pmatch = dict_regexp->pmatch + n; 180 if (pmatch->rm_so < 0 || pmatch->rm_so == pmatch->rm_eo) 181 return (MAC_PARSE_UNDEF); /* empty or not matched */ 182 vstring_strncat(dict_regexp->expansion_buf, 183 ctxt->lookup_string + pmatch->rm_so, 184 pmatch->rm_eo - pmatch->rm_so); 185 return (MAC_PARSE_OK); 186 } 187 188 /* 189 * Straight text - duplicate with no substitution. 190 */ 191 else { 192 vstring_strcat(dict_regexp->expansion_buf, vstring_str(buf)); 193 return (MAC_PARSE_OK); 194 } 195 } 196 197 /* dict_regexp_regerror - report regexp compile/execute error */ 198 199 static void dict_regexp_regerror(const char *mapname, int lineno, int error, 200 const regex_t *expr) 201 { 202 char errbuf[256]; 203 204 (void) regerror(error, expr, errbuf, sizeof(errbuf)); 205 msg_warn("regexp map %s, line %d: %s", mapname, lineno, errbuf); 206 } 207 208 /* 209 * Inlined to reduce function call overhead in the time-critical loop. 210 */ 211 #define DICT_REGEXP_REGEXEC(err, map, line, expr, match, str, nsub, pmatch) \ 212 ((err) = regexec((expr), (str), (nsub), (pmatch), 0), \ 213 ((err) == REG_NOMATCH ? !(match) : \ 214 (err) == 0 ? (match) : \ 215 (dict_regexp_regerror((map), (line), (err), (expr)), 0))) 216 217 /* dict_regexp_lookup - match string and perform optional substitution */ 218 219 static const char *dict_regexp_lookup(DICT *dict, const char *lookup_string) 220 { 221 DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict; 222 DICT_REGEXP_RULE *rule; 223 DICT_REGEXP_IF_RULE *if_rule; 224 DICT_REGEXP_MATCH_RULE *match_rule; 225 DICT_REGEXP_EXPAND_CONTEXT expand_context; 226 int error; 227 228 dict->error = 0; 229 230 if (msg_verbose) 231 msg_info("dict_regexp_lookup: %s: %s", dict->name, lookup_string); 232 233 /* 234 * Optionally fold the key. 235 */ 236 if (dict->flags & DICT_FLAG_FOLD_MUL) { 237 if (dict->fold_buf == 0) 238 dict->fold_buf = vstring_alloc(10); 239 vstring_strcpy(dict->fold_buf, lookup_string); 240 lookup_string = lowercase(vstring_str(dict->fold_buf)); 241 } 242 for (rule = dict_regexp->head; rule; rule = rule->next) { 243 244 switch (rule->op) { 245 246 /* 247 * Search for the first matching primary expression. Limit the 248 * overhead for substring substitution to the bare minimum. 249 */ 250 case DICT_REGEXP_OP_MATCH: 251 match_rule = (DICT_REGEXP_MATCH_RULE *) rule; 252 if (!DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno, 253 match_rule->first_exp, 254 match_rule->first_match, 255 lookup_string, 256 match_rule->max_sub > 0 ? 257 match_rule->max_sub + 1 : 0, 258 dict_regexp->pmatch)) 259 continue; 260 if (match_rule->second_exp 261 && !DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno, 262 match_rule->second_exp, 263 match_rule->second_match, 264 lookup_string, 265 NULL_SUBSTITUTIONS, 266 NULL_MATCH_RESULT)) 267 continue; 268 269 /* 270 * Skip $number substitutions when the replacement text contains 271 * no $number strings, as learned during the compile time 272 * pre-scan. The pre-scan already replaced $$ by $. 273 */ 274 if (match_rule->max_sub == 0) 275 return (match_rule->replacement); 276 277 /* 278 * Perform $number substitutions on the replacement text. We 279 * pre-scanned the replacement text at compile time. Any macro 280 * expansion errors at this point mean something impossible has 281 * happened. 282 */ 283 if (!dict_regexp->expansion_buf) 284 dict_regexp->expansion_buf = vstring_alloc(10); 285 VSTRING_RESET(dict_regexp->expansion_buf); 286 expand_context.lookup_string = lookup_string; 287 expand_context.match_rule = match_rule; 288 expand_context.dict_regexp = dict_regexp; 289 290 if (mac_parse(match_rule->replacement, dict_regexp_expand, 291 (void *) &expand_context) & MAC_PARSE_ERROR) 292 msg_panic("regexp map %s, line %d: bad replacement syntax", 293 dict->name, rule->lineno); 294 VSTRING_TERMINATE(dict_regexp->expansion_buf); 295 return (vstring_str(dict_regexp->expansion_buf)); 296 297 /* 298 * Conditional. 299 */ 300 case DICT_REGEXP_OP_IF: 301 if_rule = (DICT_REGEXP_IF_RULE *) rule; 302 if (DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno, 303 if_rule->expr, if_rule->match, lookup_string, 304 NULL_SUBSTITUTIONS, NULL_MATCH_RESULT)) 305 continue; 306 /* An IF without matching ENDIF has no "endif" rule. */ 307 if ((rule = if_rule->endif_rule) == 0) 308 return (0); 309 /* FALLTHROUGH */ 310 311 /* 312 * ENDIF after IF. 313 */ 314 case DICT_REGEXP_OP_ENDIF: 315 continue; 316 317 default: 318 msg_panic("dict_regexp_lookup: impossible operation %d", rule->op); 319 } 320 } 321 return (0); 322 } 323 324 /* dict_regexp_close - close regexp dictionary */ 325 326 static void dict_regexp_close(DICT *dict) 327 { 328 DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict; 329 DICT_REGEXP_RULE *rule; 330 DICT_REGEXP_RULE *next; 331 DICT_REGEXP_MATCH_RULE *match_rule; 332 DICT_REGEXP_IF_RULE *if_rule; 333 334 for (rule = dict_regexp->head; rule; rule = next) { 335 next = rule->next; 336 switch (rule->op) { 337 case DICT_REGEXP_OP_MATCH: 338 match_rule = (DICT_REGEXP_MATCH_RULE *) rule; 339 if (match_rule->first_exp) { 340 regfree(match_rule->first_exp); 341 myfree((void *) match_rule->first_exp); 342 } 343 if (match_rule->second_exp) { 344 regfree(match_rule->second_exp); 345 myfree((void *) match_rule->second_exp); 346 } 347 if (match_rule->replacement) 348 myfree((void *) match_rule->replacement); 349 break; 350 case DICT_REGEXP_OP_IF: 351 if_rule = (DICT_REGEXP_IF_RULE *) rule; 352 if (if_rule->expr) { 353 regfree(if_rule->expr); 354 myfree((void *) if_rule->expr); 355 } 356 break; 357 case DICT_REGEXP_OP_ENDIF: 358 break; 359 default: 360 msg_panic("dict_regexp_close: unknown operation %d", rule->op); 361 } 362 myfree((void *) rule); 363 } 364 if (dict_regexp->pmatch) 365 myfree((void *) dict_regexp->pmatch); 366 if (dict_regexp->expansion_buf) 367 vstring_free(dict_regexp->expansion_buf); 368 if (dict->fold_buf) 369 vstring_free(dict->fold_buf); 370 dict_free(dict); 371 } 372 373 /* dict_regexp_get_pat - extract one pattern with options from rule */ 374 375 static int dict_regexp_get_pat(const char *mapname, int lineno, char **bufp, 376 DICT_REGEXP_PATTERN *pat) 377 { 378 char *p = *bufp; 379 char re_delim; 380 381 /* 382 * Process negation operators. 383 */ 384 pat->match = 1; 385 for (;;) { 386 if (*p == '!') 387 pat->match = !pat->match; 388 else if (!ISSPACE(*p)) 389 break; 390 p++; 391 } 392 if (*p == 0) { 393 msg_warn("regexp map %s, line %d: no regexp: skipping this rule", 394 mapname, lineno); 395 return (0); 396 } 397 398 /* 399 * Search for the closing delimiter, handling backslash escape. 400 */ 401 re_delim = *p++; 402 pat->regexp = p; 403 while (*p) { 404 if (*p == '\\') { 405 if (p[1]) 406 p++; 407 else 408 break; 409 } else if (*p == re_delim) { 410 break; 411 } 412 ++p; 413 } 414 if (!*p) { 415 msg_warn("regexp map %s, line %d: no closing regexp delimiter \"%c\": " 416 "skipping this rule", mapname, lineno, re_delim); 417 return (0); 418 } 419 *p++ = 0; /* null terminate */ 420 421 /* 422 * Search for options. 423 */ 424 pat->options = REG_EXTENDED | REG_ICASE; 425 while (*p && !ISSPACE(*p) && *p != '!') { 426 switch (*p) { 427 case 'i': 428 pat->options ^= REG_ICASE; 429 break; 430 case 'm': 431 pat->options ^= REG_NEWLINE; 432 break; 433 case 'x': 434 pat->options ^= REG_EXTENDED; 435 break; 436 default: 437 msg_warn("regexp map %s, line %d: unknown regexp option \"%c\": " 438 "skipping this rule", mapname, lineno, *p); 439 return (0); 440 } 441 ++p; 442 } 443 *bufp = p; 444 return (1); 445 } 446 447 /* dict_regexp_get_pats - get the primary and second patterns and flags */ 448 449 static int dict_regexp_get_pats(const char *mapname, int lineno, char **p, 450 DICT_REGEXP_PATTERN *first_pat, 451 DICT_REGEXP_PATTERN *second_pat) 452 { 453 454 /* 455 * Get the primary and optional secondary patterns and their flags. 456 */ 457 if (dict_regexp_get_pat(mapname, lineno, p, first_pat) == 0) 458 return (0); 459 if (**p == '!') { 460 #if 0 461 static int bitrot_warned = 0; 462 463 if (bitrot_warned == 0) { 464 msg_warn("regexp file %s, line %d: /pattern1/!/pattern2/ goes away," 465 " use \"if !/pattern2/ ... /pattern1/ ... endif\" instead", 466 mapname, lineno); 467 bitrot_warned = 1; 468 } 469 #endif 470 if (dict_regexp_get_pat(mapname, lineno, p, second_pat) == 0) 471 return (0); 472 } else { 473 second_pat->regexp = 0; 474 } 475 return (1); 476 } 477 478 /* dict_regexp_prescan - find largest $number in replacement text */ 479 480 static int dict_regexp_prescan(int type, VSTRING *buf, void *context) 481 { 482 DICT_REGEXP_PRESCAN_CONTEXT *ctxt = (DICT_REGEXP_PRESCAN_CONTEXT *) context; 483 size_t n; 484 485 /* 486 * Keep a copy of literal text (with $$ already replaced by $) if and 487 * only if the replacement text contains no $number expression. This way 488 * we can avoid having to scan the replacement text at lookup time. 489 */ 490 if (type == MAC_PARSE_VARNAME) { 491 if (ctxt->literal) { 492 myfree(ctxt->literal); 493 ctxt->literal = 0; 494 } 495 if (!alldig(vstring_str(buf))) { 496 msg_warn("regexp map %s, line %d: non-numeric replacement index \"%s\"", 497 ctxt->mapname, ctxt->lineno, vstring_str(buf)); 498 return (MAC_PARSE_ERROR); 499 } 500 n = atoi(vstring_str(buf)); 501 if (n < 1) { 502 msg_warn("regexp map %s, line %d: out-of-range replacement index \"%s\"", 503 ctxt->mapname, ctxt->lineno, vstring_str(buf)); 504 return (MAC_PARSE_ERROR); 505 } 506 if (n > ctxt->max_sub) 507 ctxt->max_sub = n; 508 } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) { 509 if (ctxt->literal) 510 msg_panic("regexp map %s, line %d: multiple literals but no $number", 511 ctxt->mapname, ctxt->lineno); 512 ctxt->literal = mystrdup(vstring_str(buf)); 513 } 514 return (MAC_PARSE_OK); 515 } 516 517 /* dict_regexp_compile_pat - compile one pattern */ 518 519 static regex_t *dict_regexp_compile_pat(const char *mapname, int lineno, 520 DICT_REGEXP_PATTERN *pat) 521 { 522 int error; 523 regex_t *expr; 524 525 expr = (regex_t *) mymalloc(sizeof(*expr)); 526 error = regcomp(expr, pat->regexp, pat->options); 527 if (error != 0) { 528 dict_regexp_regerror(mapname, lineno, error, expr); 529 myfree((void *) expr); 530 return (0); 531 } 532 return (expr); 533 } 534 535 /* dict_regexp_rule_alloc - fill in a generic rule structure */ 536 537 static DICT_REGEXP_RULE *dict_regexp_rule_alloc(int op, int lineno, size_t size) 538 { 539 DICT_REGEXP_RULE *rule; 540 541 rule = (DICT_REGEXP_RULE *) mymalloc(size); 542 rule->op = op; 543 rule->lineno = lineno; 544 rule->next = 0; 545 546 return (rule); 547 } 548 549 /* dict_regexp_parseline - parse one rule */ 550 551 static DICT_REGEXP_RULE *dict_regexp_parseline(DICT *dict, const char *mapname, 552 int lineno, char *line, 553 int nesting) 554 { 555 char *p; 556 557 p = line; 558 559 /* 560 * An ordinary rule takes one or two patterns and replacement text. 561 */ 562 if (!ISALNUM(*p)) { 563 DICT_REGEXP_PATTERN first_pat; 564 DICT_REGEXP_PATTERN second_pat; 565 DICT_REGEXP_PRESCAN_CONTEXT prescan_context; 566 regex_t *first_exp = 0; 567 regex_t *second_exp; 568 DICT_REGEXP_MATCH_RULE *match_rule; 569 570 /* 571 * Get the primary and the optional secondary patterns. 572 */ 573 if (!dict_regexp_get_pats(mapname, lineno, &p, &first_pat, &second_pat)) 574 return (0); 575 576 /* 577 * Get the replacement text. 578 */ 579 while (*p && ISSPACE(*p)) 580 ++p; 581 if (!*p) { 582 msg_warn("regexp map %s, line %d: no replacement text: " 583 "using empty string", mapname, lineno); 584 } 585 586 /* 587 * Find the highest-numbered $number in the replacement text. We can 588 * speed up pattern matching 1) by passing hints to the regexp 589 * compiler, setting the REG_NOSUB flag when the replacement text 590 * contains no $number string; 2) by passing hints to the regexp 591 * execution code, limiting the amount of text that is made available 592 * for substitution. 593 */ 594 prescan_context.mapname = mapname; 595 prescan_context.lineno = lineno; 596 prescan_context.max_sub = 0; 597 prescan_context.literal = 0; 598 599 /* 600 * The optimizer will eliminate code duplication and/or dead code. 601 */ 602 #define CREATE_MATCHOP_ERROR_RETURN(rval) do { \ 603 if (first_exp) { \ 604 regfree(first_exp); \ 605 myfree((void *) first_exp); \ 606 } \ 607 if (prescan_context.literal) \ 608 myfree(prescan_context.literal); \ 609 return (rval); \ 610 } while (0) 611 612 if (dict->flags & DICT_FLAG_SRC_RHS_IS_FILE) { 613 VSTRING *base64_buf; 614 char *err; 615 616 if ((base64_buf = dict_file_to_b64(dict, p)) == 0) { 617 err = dict_file_get_error(dict); 618 msg_warn("regexp map %s, line %d: %s: skipping this rule", 619 mapname, lineno, err); 620 myfree(err); 621 CREATE_MATCHOP_ERROR_RETURN(0); 622 } 623 p = vstring_str(base64_buf); 624 } 625 if (mac_parse(p, dict_regexp_prescan, (void *) &prescan_context) 626 & MAC_PARSE_ERROR) { 627 msg_warn("regexp map %s, line %d: bad replacement syntax: " 628 "skipping this rule", mapname, lineno); 629 CREATE_MATCHOP_ERROR_RETURN(0); 630 } 631 632 /* 633 * Compile the primary and the optional secondary pattern. Speed up 634 * execution when no matched text needs to be substituted into the 635 * result string, or when the highest numbered substring is less than 636 * the total number of () subpatterns. 637 */ 638 if (prescan_context.max_sub == 0) 639 first_pat.options |= REG_NOSUB; 640 if (prescan_context.max_sub > 0 && first_pat.match == 0) { 641 msg_warn("regexp map %s, line %d: $number found in negative match " 642 "replacement text: skipping this rule", mapname, lineno); 643 CREATE_MATCHOP_ERROR_RETURN(0); 644 } 645 if (prescan_context.max_sub > 0 && (dict->flags & DICT_FLAG_NO_REGSUB)) { 646 msg_warn("regexp map %s, line %d: " 647 "regular expression substitution is not allowed: " 648 "skipping this rule", mapname, lineno); 649 CREATE_MATCHOP_ERROR_RETURN(0); 650 } 651 if ((first_exp = dict_regexp_compile_pat(mapname, lineno, 652 &first_pat)) == 0) 653 CREATE_MATCHOP_ERROR_RETURN(0); 654 if (prescan_context.max_sub > first_exp->re_nsub) { 655 msg_warn("regexp map %s, line %d: out of range replacement index \"%d\": " 656 "skipping this rule", mapname, lineno, 657 (int) prescan_context.max_sub); 658 CREATE_MATCHOP_ERROR_RETURN(0); 659 } 660 if (second_pat.regexp != 0) { 661 second_pat.options |= REG_NOSUB; 662 if ((second_exp = dict_regexp_compile_pat(mapname, lineno, 663 &second_pat)) == 0) 664 CREATE_MATCHOP_ERROR_RETURN(0); 665 } else { 666 second_exp = 0; 667 } 668 match_rule = (DICT_REGEXP_MATCH_RULE *) 669 dict_regexp_rule_alloc(DICT_REGEXP_OP_MATCH, lineno, 670 sizeof(DICT_REGEXP_MATCH_RULE)); 671 match_rule->first_exp = first_exp; 672 match_rule->first_match = first_pat.match; 673 match_rule->max_sub = prescan_context.max_sub; 674 match_rule->second_exp = second_exp; 675 match_rule->second_match = second_pat.match; 676 if (prescan_context.literal) 677 match_rule->replacement = prescan_context.literal; 678 else 679 match_rule->replacement = mystrdup(p); 680 return ((DICT_REGEXP_RULE *) match_rule); 681 } 682 683 /* 684 * The IF operator takes one pattern but no replacement text. 685 */ 686 else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) { 687 DICT_REGEXP_PATTERN pattern; 688 regex_t *expr; 689 DICT_REGEXP_IF_RULE *if_rule; 690 691 p += 2; 692 while (*p && ISSPACE(*p)) 693 p++; 694 if (!dict_regexp_get_pat(mapname, lineno, &p, &pattern)) 695 return (0); 696 while (*p && ISSPACE(*p)) 697 ++p; 698 if (*p) { 699 msg_warn("regexp map %s, line %d: ignoring extra text after" 700 " IF statement: \"%s\"", mapname, lineno, p); 701 msg_warn("regexp map %s, line %d: do not prepend whitespace" 702 " to statements between IF and ENDIF", mapname, lineno); 703 } 704 if ((expr = dict_regexp_compile_pat(mapname, lineno, &pattern)) == 0) 705 return (0); 706 if_rule = (DICT_REGEXP_IF_RULE *) 707 dict_regexp_rule_alloc(DICT_REGEXP_OP_IF, lineno, 708 sizeof(DICT_REGEXP_IF_RULE)); 709 if_rule->expr = expr; 710 if_rule->match = pattern.match; 711 if_rule->endif_rule = 0; 712 return ((DICT_REGEXP_RULE *) if_rule); 713 } 714 715 /* 716 * The ENDIF operator takes no patterns and no replacement text. 717 */ 718 else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) { 719 DICT_REGEXP_RULE *rule; 720 721 p += 5; 722 if (nesting == 0) { 723 msg_warn("regexp map %s, line %d: ignoring ENDIF without matching IF", 724 mapname, lineno); 725 return (0); 726 } 727 while (*p && ISSPACE(*p)) 728 ++p; 729 if (*p) 730 msg_warn("regexp map %s, line %d: ignoring extra text after ENDIF", 731 mapname, lineno); 732 rule = dict_regexp_rule_alloc(DICT_REGEXP_OP_ENDIF, lineno, 733 sizeof(DICT_REGEXP_RULE)); 734 return (rule); 735 } 736 737 /* 738 * Unrecognized input. 739 */ 740 else { 741 msg_warn("regexp map %s, line %d: ignoring unrecognized request", 742 mapname, lineno); 743 return (0); 744 } 745 } 746 747 /* dict_regexp_open - load and compile a file containing regular expressions */ 748 749 DICT *dict_regexp_open(const char *mapname, int open_flags, int dict_flags) 750 { 751 const char myname[] = "dict_regexp_open"; 752 DICT_REGEXP *dict_regexp; 753 VSTREAM *map_fp = 0; 754 struct stat st; 755 VSTRING *line_buffer = 0; 756 DICT_REGEXP_RULE *rule; 757 DICT_REGEXP_RULE *last_rule = 0; 758 int lineno; 759 int last_line = 0; 760 size_t max_sub = 0; 761 int nesting = 0; 762 char *p; 763 DICT_REGEXP_RULE **rule_stack = 0; 764 MVECT mvect; 765 766 /* 767 * Let the optimizer worry about eliminating redundant code. 768 */ 769 #define DICT_REGEXP_OPEN_RETURN(d) do { \ 770 DICT *__d = (d); \ 771 if (line_buffer != 0) \ 772 vstring_free(line_buffer); \ 773 if (map_fp != 0) \ 774 vstream_fclose(map_fp); \ 775 return (__d); \ 776 } while (0) 777 778 /* 779 * Sanity checks. 780 */ 781 if (open_flags != O_RDONLY) 782 DICT_REGEXP_OPEN_RETURN(dict_surrogate(DICT_TYPE_REGEXP, 783 mapname, open_flags, dict_flags, 784 "%s:%s map requires O_RDONLY access mode", 785 DICT_TYPE_REGEXP, mapname)); 786 787 /* 788 * Open the configuration file. 789 */ 790 if ((map_fp = vstream_fopen(mapname, O_RDONLY, 0)) == 0) 791 DICT_REGEXP_OPEN_RETURN(dict_surrogate(DICT_TYPE_REGEXP, mapname, 792 open_flags, dict_flags, 793 "open %s: %m", mapname)); 794 if (fstat(vstream_fileno(map_fp), &st) < 0) 795 msg_fatal("fstat %s: %m", mapname); 796 797 line_buffer = vstring_alloc(100); 798 799 dict_regexp = (DICT_REGEXP *) dict_alloc(DICT_TYPE_REGEXP, mapname, 800 sizeof(*dict_regexp)); 801 dict_regexp->dict.lookup = dict_regexp_lookup; 802 dict_regexp->dict.close = dict_regexp_close; 803 dict_regexp->dict.flags = dict_flags | DICT_FLAG_PATTERN; 804 if (dict_flags & DICT_FLAG_FOLD_MUL) 805 dict_regexp->dict.fold_buf = vstring_alloc(10); 806 dict_regexp->head = 0; 807 dict_regexp->pmatch = 0; 808 dict_regexp->expansion_buf = 0; 809 dict_regexp->dict.owner.uid = st.st_uid; 810 dict_regexp->dict.owner.status = (st.st_uid != 0); 811 812 /* 813 * Parse the regexp table. 814 */ 815 while (readllines(line_buffer, map_fp, &last_line, &lineno)) { 816 p = vstring_str(line_buffer); 817 trimblanks(p, 0)[0] = 0; 818 if (*p == 0) 819 continue; 820 rule = dict_regexp_parseline(&dict_regexp->dict, mapname, lineno, 821 p, nesting); 822 if (rule == 0) 823 continue; 824 if (rule->op == DICT_REGEXP_OP_MATCH) { 825 if (((DICT_REGEXP_MATCH_RULE *) rule)->max_sub > max_sub) 826 max_sub = ((DICT_REGEXP_MATCH_RULE *) rule)->max_sub; 827 } else if (rule->op == DICT_REGEXP_OP_IF) { 828 if (rule_stack == 0) 829 rule_stack = (DICT_REGEXP_RULE **) mvect_alloc(&mvect, 830 sizeof(*rule_stack), nesting + 1, 831 (MVECT_FN) 0, (MVECT_FN) 0); 832 else 833 rule_stack = 834 (DICT_REGEXP_RULE **) mvect_realloc(&mvect, nesting + 1); 835 rule_stack[nesting] = rule; 836 nesting++; 837 } else if (rule->op == DICT_REGEXP_OP_ENDIF) { 838 DICT_REGEXP_IF_RULE *if_rule; 839 840 if (nesting-- <= 0) 841 /* Already handled in dict_regexp_parseline(). */ 842 msg_panic("%s: ENDIF without IF", myname); 843 if (rule_stack[nesting]->op != DICT_REGEXP_OP_IF) 844 msg_panic("%s: unexpected rule stack element type %d", 845 myname, rule_stack[nesting]->op); 846 if_rule = (DICT_REGEXP_IF_RULE *) rule_stack[nesting]; 847 if_rule->endif_rule = rule; 848 } 849 if (last_rule == 0) 850 dict_regexp->head = rule; 851 else 852 last_rule->next = rule; 853 last_rule = rule; 854 } 855 856 while (nesting-- > 0) 857 msg_warn("regexp map %s, line %d: IF has no matching ENDIF", 858 mapname, rule_stack[nesting]->lineno); 859 860 if (rule_stack) 861 (void) mvect_free(&mvect); 862 863 /* 864 * Allocate space for only as many matched substrings as used in the 865 * replacement text. 866 */ 867 if (max_sub > 0) 868 dict_regexp->pmatch = 869 (regmatch_t *) mymalloc(sizeof(regmatch_t) * (max_sub + 1)); 870 871 dict_file_purge_buffers(&dict_regexp->dict); 872 DICT_REGEXP_OPEN_RETURN(DICT_DEBUG (&dict_regexp->dict)); 873 } 874 875 #endif 876