1 /* $NetBSD: dict_regexp.c,v 1.1.1.3 2014/07/06 19:27:58 tron Exp $ */ 2 3 /*++ 4 /* NAME 5 /* dict_regexp 3 6 /* SUMMARY 7 /* dictionary manager interface to REGEXP regular expression library 8 /* SYNOPSIS 9 /* #include <dict_regexp.h> 10 /* 11 /* DICT *dict_regexp_open(name, dummy, dict_flags) 12 /* const char *name; 13 /* int dummy; 14 /* int dict_flags; 15 /* DESCRIPTION 16 /* dict_regexp_open() opens the named file and compiles the contained 17 /* regular expressions. The result object can be used to match strings 18 /* against the table. 19 /* SEE ALSO 20 /* dict(3) generic dictionary manager 21 /* regexp_table(5) format of Postfix regular expression tables 22 /* AUTHOR(S) 23 /* LaMont Jones 24 /* lamont@hp.com 25 /* 26 /* Based on PCRE dictionary contributed by Andrew McNamara 27 /* andrewm@connect.com.au 28 /* connect.com.au Pty. Ltd. 29 /* Level 3, 213 Miller St 30 /* North Sydney, NSW, Australia 31 /* 32 /* Heavily rewritten by Wietse Venema 33 /* IBM T.J. Watson Research 34 /* P.O. Box 704 35 /* Yorktown Heights, NY 10598, USA 36 /*--*/ 37 38 /* System library. */ 39 40 #include "sys_defs.h" 41 42 #ifdef HAS_POSIX_REGEXP 43 44 #include <sys/stat.h> 45 #include <stdlib.h> 46 #include <unistd.h> 47 #include <string.h> 48 #include <ctype.h> 49 #include <regex.h> 50 #ifdef STRCASECMP_IN_STRINGS_H 51 #include <strings.h> 52 #endif 53 54 /* Utility library. */ 55 56 #include "mymalloc.h" 57 #include "msg.h" 58 #include "safe.h" 59 #include "vstream.h" 60 #include "vstring.h" 61 #include "stringops.h" 62 #include "readlline.h" 63 #include "dict.h" 64 #include "dict_regexp.h" 65 #include "mac_parse.h" 66 #include "warn_stat.h" 67 68 /* 69 * Support for IF/ENDIF based on an idea by Bert Driehuis. 70 */ 71 #define DICT_REGEXP_OP_MATCH 1 /* Match this regexp */ 72 #define DICT_REGEXP_OP_IF 2 /* Increase if/endif nesting on match */ 73 #define DICT_REGEXP_OP_ENDIF 3 /* Decrease if/endif nesting on match */ 74 75 /* 76 * Regular expression before compiling. 77 */ 78 typedef struct { 79 char *regexp; /* regular expression */ 80 int options; /* regcomp() options */ 81 int match; /* positive or negative match */ 82 } DICT_REGEXP_PATTERN; 83 84 /* 85 * Compiled generic rule, and subclasses that derive from it. 86 */ 87 typedef struct DICT_REGEXP_RULE { 88 int op; /* DICT_REGEXP_OP_MATCH/IF/ENDIF */ 89 int nesting; /* Level of search nesting */ 90 int lineno; /* source file line number */ 91 struct DICT_REGEXP_RULE *next; /* next rule in dict */ 92 } DICT_REGEXP_RULE; 93 94 typedef struct { 95 DICT_REGEXP_RULE rule; /* generic part */ 96 regex_t *first_exp; /* compiled primary pattern */ 97 int first_match; /* positive or negative match */ 98 regex_t *second_exp; /* compiled secondary pattern */ 99 int second_match; /* positive or negative match */ 100 char *replacement; /* replacement text */ 101 size_t max_sub; /* largest $number in replacement */ 102 } DICT_REGEXP_MATCH_RULE; 103 104 typedef struct { 105 DICT_REGEXP_RULE rule; /* generic members */ 106 regex_t *expr; /* the condition */ 107 int match; /* positive or negative match */ 108 } DICT_REGEXP_IF_RULE; 109 110 /* 111 * Regexp map. 112 */ 113 typedef struct { 114 DICT dict; /* generic members */ 115 regmatch_t *pmatch; /* matched substring info */ 116 DICT_REGEXP_RULE *head; /* first rule */ 117 VSTRING *expansion_buf; /* lookup result */ 118 } DICT_REGEXP; 119 120 /* 121 * Macros to make dense code more readable. 122 */ 123 #define NULL_SUBSTITUTIONS (0) 124 #define NULL_MATCH_RESULT ((regmatch_t *) 0) 125 126 /* 127 * Context for $number expansion callback. 128 */ 129 typedef struct { 130 DICT_REGEXP *dict_regexp; /* the dictionary handle */ 131 DICT_REGEXP_MATCH_RULE *match_rule; /* the rule we matched */ 132 const char *lookup_string; /* matched text */ 133 } DICT_REGEXP_EXPAND_CONTEXT; 134 135 /* 136 * Context for $number pre-scan callback. 137 */ 138 typedef struct { 139 const char *mapname; /* name of regexp map */ 140 int lineno; /* where in file */ 141 size_t max_sub; /* largest $number seen */ 142 char *literal; /* constant result, $$ -> $ */ 143 } DICT_REGEXP_PRESCAN_CONTEXT; 144 145 /* 146 * Compatibility. 147 */ 148 #ifndef MAC_PARSE_OK 149 #define MAC_PARSE_OK 0 150 #endif 151 152 /* dict_regexp_expand - replace $number with substring from matched text */ 153 154 static int dict_regexp_expand(int type, VSTRING *buf, char *ptr) 155 { 156 DICT_REGEXP_EXPAND_CONTEXT *ctxt = (DICT_REGEXP_EXPAND_CONTEXT *) ptr; 157 DICT_REGEXP_MATCH_RULE *match_rule = ctxt->match_rule; 158 DICT_REGEXP *dict_regexp = ctxt->dict_regexp; 159 regmatch_t *pmatch; 160 size_t n; 161 162 /* 163 * Replace $number by the corresponding substring from the matched text. 164 * We pre-scanned the replacement text at compile time, so any out of 165 * range $number means that something impossible has happened. 166 */ 167 if (type == MAC_PARSE_VARNAME) { 168 n = atoi(vstring_str(buf)); 169 if (n < 1 || n > match_rule->max_sub) 170 msg_panic("regexp map %s, line %d: out of range replacement index \"%s\"", 171 dict_regexp->dict.name, match_rule->rule.lineno, 172 vstring_str(buf)); 173 pmatch = dict_regexp->pmatch + n; 174 if (pmatch->rm_so < 0 || pmatch->rm_so == pmatch->rm_eo) 175 return (MAC_PARSE_UNDEF); /* empty or not matched */ 176 vstring_strncat(dict_regexp->expansion_buf, 177 ctxt->lookup_string + pmatch->rm_so, 178 pmatch->rm_eo - pmatch->rm_so); 179 return (MAC_PARSE_OK); 180 } 181 182 /* 183 * Straight text - duplicate with no substitution. 184 */ 185 else { 186 vstring_strcat(dict_regexp->expansion_buf, vstring_str(buf)); 187 return (MAC_PARSE_OK); 188 } 189 } 190 191 /* dict_regexp_regerror - report regexp compile/execute error */ 192 193 static void dict_regexp_regerror(const char *mapname, int lineno, int error, 194 const regex_t *expr) 195 { 196 char errbuf[256]; 197 198 (void) regerror(error, expr, errbuf, sizeof(errbuf)); 199 msg_warn("regexp map %s, line %d: %s", mapname, lineno, errbuf); 200 } 201 202 /* 203 * Inlined to reduce function call overhead in the time-critical loop. 204 */ 205 #define DICT_REGEXP_REGEXEC(err, map, line, expr, match, str, nsub, pmatch) \ 206 ((err) = regexec((expr), (str), (nsub), (pmatch), 0), \ 207 ((err) == REG_NOMATCH ? !(match) : \ 208 (err) == 0 ? (match) : \ 209 (dict_regexp_regerror((map), (line), (err), (expr)), 0))) 210 211 /* dict_regexp_lookup - match string and perform optional substitution */ 212 213 static const char *dict_regexp_lookup(DICT *dict, const char *lookup_string) 214 { 215 DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict; 216 DICT_REGEXP_RULE *rule; 217 DICT_REGEXP_IF_RULE *if_rule; 218 DICT_REGEXP_MATCH_RULE *match_rule; 219 DICT_REGEXP_EXPAND_CONTEXT expand_context; 220 int error; 221 int nesting = 0; 222 223 dict->error = 0; 224 225 if (msg_verbose) 226 msg_info("dict_regexp_lookup: %s: %s", dict->name, lookup_string); 227 228 /* 229 * Optionally fold the key. 230 */ 231 if (dict->flags & DICT_FLAG_FOLD_MUL) { 232 if (dict->fold_buf == 0) 233 dict->fold_buf = vstring_alloc(10); 234 vstring_strcpy(dict->fold_buf, lookup_string); 235 lookup_string = lowercase(vstring_str(dict->fold_buf)); 236 } 237 for (rule = dict_regexp->head; rule; rule = rule->next) { 238 239 /* 240 * Skip rules inside failed IF/ENDIF. 241 */ 242 if (nesting < rule->nesting) 243 continue; 244 245 switch (rule->op) { 246 247 /* 248 * Search for the first matching primary expression. Limit the 249 * overhead for substring substitution to the bare minimum. 250 */ 251 case DICT_REGEXP_OP_MATCH: 252 match_rule = (DICT_REGEXP_MATCH_RULE *) rule; 253 if (!DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno, 254 match_rule->first_exp, 255 match_rule->first_match, 256 lookup_string, 257 match_rule->max_sub > 0 ? 258 match_rule->max_sub + 1 : 0, 259 dict_regexp->pmatch)) 260 continue; 261 if (match_rule->second_exp 262 && !DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno, 263 match_rule->second_exp, 264 match_rule->second_match, 265 lookup_string, 266 NULL_SUBSTITUTIONS, 267 NULL_MATCH_RESULT)) 268 continue; 269 270 /* 271 * Skip $number substitutions when the replacement text contains 272 * no $number strings, as learned during the compile time 273 * pre-scan. The pre-scan already replaced $$ by $. 274 */ 275 if (match_rule->max_sub == 0) 276 return (match_rule->replacement); 277 278 /* 279 * Perform $number substitutions on the replacement text. We 280 * pre-scanned the replacement text at compile time. Any macro 281 * expansion errors at this point mean something impossible has 282 * happened. 283 */ 284 if (!dict_regexp->expansion_buf) 285 dict_regexp->expansion_buf = vstring_alloc(10); 286 VSTRING_RESET(dict_regexp->expansion_buf); 287 expand_context.lookup_string = lookup_string; 288 expand_context.match_rule = match_rule; 289 expand_context.dict_regexp = dict_regexp; 290 291 if (mac_parse(match_rule->replacement, dict_regexp_expand, 292 (char *) &expand_context) & MAC_PARSE_ERROR) 293 msg_panic("regexp map %s, line %d: bad replacement syntax", 294 dict->name, rule->lineno); 295 VSTRING_TERMINATE(dict_regexp->expansion_buf); 296 return (vstring_str(dict_regexp->expansion_buf)); 297 298 /* 299 * Conditional. 300 */ 301 case DICT_REGEXP_OP_IF: 302 if_rule = (DICT_REGEXP_IF_RULE *) rule; 303 if (DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno, 304 if_rule->expr, if_rule->match, lookup_string, 305 NULL_SUBSTITUTIONS, NULL_MATCH_RESULT)) 306 nesting++; 307 continue; 308 309 /* 310 * ENDIF after successful IF. 311 */ 312 case DICT_REGEXP_OP_ENDIF: 313 nesting--; 314 continue; 315 316 default: 317 msg_panic("dict_regexp_lookup: impossible operation %d", rule->op); 318 } 319 } 320 return (0); 321 } 322 323 /* dict_regexp_close - close regexp dictionary */ 324 325 static void dict_regexp_close(DICT *dict) 326 { 327 DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict; 328 DICT_REGEXP_RULE *rule; 329 DICT_REGEXP_RULE *next; 330 DICT_REGEXP_MATCH_RULE *match_rule; 331 DICT_REGEXP_IF_RULE *if_rule; 332 333 for (rule = dict_regexp->head; rule; rule = next) { 334 next = rule->next; 335 switch (rule->op) { 336 case DICT_REGEXP_OP_MATCH: 337 match_rule = (DICT_REGEXP_MATCH_RULE *) rule; 338 if (match_rule->first_exp) { 339 regfree(match_rule->first_exp); 340 myfree((char *) match_rule->first_exp); 341 } 342 if (match_rule->second_exp) { 343 regfree(match_rule->second_exp); 344 myfree((char *) match_rule->second_exp); 345 } 346 if (match_rule->replacement) 347 myfree((char *) match_rule->replacement); 348 break; 349 case DICT_REGEXP_OP_IF: 350 if_rule = (DICT_REGEXP_IF_RULE *) rule; 351 if (if_rule->expr) { 352 regfree(if_rule->expr); 353 myfree((char *) if_rule->expr); 354 } 355 break; 356 case DICT_REGEXP_OP_ENDIF: 357 break; 358 default: 359 msg_panic("dict_regexp_close: unknown operation %d", rule->op); 360 } 361 myfree((char *) rule); 362 } 363 if (dict_regexp->pmatch) 364 myfree((char *) dict_regexp->pmatch); 365 if (dict_regexp->expansion_buf) 366 vstring_free(dict_regexp->expansion_buf); 367 if (dict->fold_buf) 368 vstring_free(dict->fold_buf); 369 dict_free(dict); 370 } 371 372 /* dict_regexp_get_pat - extract one pattern with options from rule */ 373 374 static int dict_regexp_get_pat(const char *mapname, int lineno, char **bufp, 375 DICT_REGEXP_PATTERN *pat) 376 { 377 char *p = *bufp; 378 char re_delim; 379 380 /* 381 * Process negation operators. 382 */ 383 pat->match = 1; 384 while (*p == '!') { 385 pat->match = !pat->match; 386 p++; 387 } 388 389 /* 390 * Grr...aceful handling of whitespace after '!'. 391 */ 392 while (*p && ISSPACE(*p)) 393 p++; 394 if (*p == 0) { 395 msg_warn("regexp map %s, line %d: no regexp: skipping this rule", 396 mapname, lineno); 397 return (0); 398 } 399 400 /* 401 * Search for the closing delimiter, handling backslash escape. 402 */ 403 re_delim = *p++; 404 pat->regexp = p; 405 while (*p) { 406 if (*p == '\\') { 407 if (p[1]) 408 p++; 409 else 410 break; 411 } else if (*p == re_delim) { 412 break; 413 } 414 ++p; 415 } 416 if (!*p) { 417 msg_warn("regexp map %s, line %d: no closing regexp delimiter \"%c\": " 418 "skipping this rule", mapname, lineno, re_delim); 419 return (0); 420 } 421 *p++ = 0; /* null terminate */ 422 423 /* 424 * Search for options. 425 */ 426 pat->options = REG_EXTENDED | REG_ICASE; 427 while (*p && !ISSPACE(*p) && *p != '!') { 428 switch (*p) { 429 case 'i': 430 pat->options ^= REG_ICASE; 431 break; 432 case 'm': 433 pat->options ^= REG_NEWLINE; 434 break; 435 case 'x': 436 pat->options ^= REG_EXTENDED; 437 break; 438 default: 439 msg_warn("regexp map %s, line %d: unknown regexp option \"%c\": " 440 "skipping this rule", mapname, lineno, *p); 441 return (0); 442 } 443 ++p; 444 } 445 *bufp = p; 446 return (1); 447 } 448 449 /* dict_regexp_get_pats - get the primary and second patterns and flags */ 450 451 static int dict_regexp_get_pats(const char *mapname, int lineno, char **p, 452 DICT_REGEXP_PATTERN *first_pat, 453 DICT_REGEXP_PATTERN *second_pat) 454 { 455 456 /* 457 * Get the primary and optional secondary patterns and their flags. 458 */ 459 if (dict_regexp_get_pat(mapname, lineno, p, first_pat) == 0) 460 return (0); 461 if (**p == '!') { 462 #if 0 463 static int bitrot_warned = 0; 464 465 if (bitrot_warned == 0) { 466 msg_warn("regexp file %s, line %d: /pattern1/!/pattern2/ goes away," 467 " use \"if !/pattern2/ ... /pattern1/ ... endif\" instead", 468 mapname, lineno); 469 bitrot_warned = 1; 470 } 471 #endif 472 if (dict_regexp_get_pat(mapname, lineno, p, second_pat) == 0) 473 return (0); 474 } else { 475 second_pat->regexp = 0; 476 } 477 return (1); 478 } 479 480 /* dict_regexp_prescan - find largest $number in replacement text */ 481 482 static int dict_regexp_prescan(int type, VSTRING *buf, char *context) 483 { 484 DICT_REGEXP_PRESCAN_CONTEXT *ctxt = (DICT_REGEXP_PRESCAN_CONTEXT *) context; 485 size_t n; 486 487 /* 488 * Keep a copy of literal text (with $$ already replaced by $) if and 489 * only if the replacement text contains no $number expression. This way 490 * we can avoid having to scan the replacement text at lookup time. 491 */ 492 if (type == MAC_PARSE_VARNAME) { 493 if (ctxt->literal) { 494 myfree(ctxt->literal); 495 ctxt->literal = 0; 496 } 497 if (!alldig(vstring_str(buf))) { 498 msg_warn("regexp map %s, line %d: non-numeric replacement index \"%s\"", 499 ctxt->mapname, ctxt->lineno, vstring_str(buf)); 500 return (MAC_PARSE_ERROR); 501 } 502 n = atoi(vstring_str(buf)); 503 if (n < 1) { 504 msg_warn("regexp map %s, line %d: out-of-range replacement index \"%s\"", 505 ctxt->mapname, ctxt->lineno, vstring_str(buf)); 506 return (MAC_PARSE_ERROR); 507 } 508 if (n > ctxt->max_sub) 509 ctxt->max_sub = n; 510 } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) { 511 if (ctxt->literal) 512 msg_panic("regexp map %s, line %d: multiple literals but no $number", 513 ctxt->mapname, ctxt->lineno); 514 ctxt->literal = mystrdup(vstring_str(buf)); 515 } 516 return (MAC_PARSE_OK); 517 } 518 519 /* dict_regexp_compile_pat - compile one pattern */ 520 521 static regex_t *dict_regexp_compile_pat(const char *mapname, int lineno, 522 DICT_REGEXP_PATTERN *pat) 523 { 524 int error; 525 regex_t *expr; 526 527 expr = (regex_t *) mymalloc(sizeof(*expr)); 528 error = regcomp(expr, pat->regexp, pat->options); 529 if (error != 0) { 530 dict_regexp_regerror(mapname, lineno, error, expr); 531 myfree((char *) expr); 532 return (0); 533 } 534 return (expr); 535 } 536 537 /* dict_regexp_rule_alloc - fill in a generic rule structure */ 538 539 static DICT_REGEXP_RULE *dict_regexp_rule_alloc(int op, int nesting, 540 int lineno, 541 size_t size) 542 { 543 DICT_REGEXP_RULE *rule; 544 545 rule = (DICT_REGEXP_RULE *) mymalloc(size); 546 rule->op = op; 547 rule->nesting = nesting; 548 rule->lineno = lineno; 549 rule->next = 0; 550 551 return (rule); 552 } 553 554 /* dict_regexp_parseline - parse one rule */ 555 556 static DICT_REGEXP_RULE *dict_regexp_parseline(const char *mapname, int lineno, 557 char *line, int nesting, 558 int dict_flags) 559 { 560 char *p; 561 562 p = line; 563 564 /* 565 * An ordinary rule takes one or two patterns and replacement text. 566 */ 567 if (!ISALNUM(*p)) { 568 DICT_REGEXP_PATTERN first_pat; 569 DICT_REGEXP_PATTERN second_pat; 570 DICT_REGEXP_PRESCAN_CONTEXT prescan_context; 571 regex_t *first_exp = 0; 572 regex_t *second_exp; 573 DICT_REGEXP_MATCH_RULE *match_rule; 574 575 /* 576 * Get the primary and the optional secondary patterns. 577 */ 578 if (!dict_regexp_get_pats(mapname, lineno, &p, &first_pat, &second_pat)) 579 return (0); 580 581 /* 582 * Get the replacement text. 583 */ 584 while (*p && ISSPACE(*p)) 585 ++p; 586 if (!*p) { 587 msg_warn("regexp map %s, line %d: using empty replacement string", 588 mapname, lineno); 589 } 590 591 /* 592 * Find the highest-numbered $number in the replacement text. We can 593 * speed up pattern matching 1) by passing hints to the regexp 594 * compiler, setting the REG_NOSUB flag when the replacement text 595 * contains no $number string; 2) by passing hints to the regexp 596 * execution code, limiting the amount of text that is made available 597 * for substitution. 598 */ 599 prescan_context.mapname = mapname; 600 prescan_context.lineno = lineno; 601 prescan_context.max_sub = 0; 602 prescan_context.literal = 0; 603 604 /* 605 * The optimizer will eliminate code duplication and/or dead code. 606 */ 607 #define CREATE_MATCHOP_ERROR_RETURN(rval) do { \ 608 if (first_exp) { \ 609 regfree(first_exp); \ 610 myfree((char *) first_exp); \ 611 } \ 612 if (prescan_context.literal) \ 613 myfree(prescan_context.literal); \ 614 return (rval); \ 615 } while (0) 616 617 if (mac_parse(p, dict_regexp_prescan, (char *) &prescan_context) 618 & MAC_PARSE_ERROR) { 619 msg_warn("regexp map %s, line %d: bad replacement syntax: " 620 "skipping this rule", mapname, lineno); 621 CREATE_MATCHOP_ERROR_RETURN(0); 622 } 623 624 /* 625 * Compile the primary and the optional secondary pattern. Speed up 626 * execution when no matched text needs to be substituted into the 627 * result string, or when the highest numbered substring is less than 628 * the total number of () subpatterns. 629 */ 630 if (prescan_context.max_sub == 0) 631 first_pat.options |= REG_NOSUB; 632 if (prescan_context.max_sub > 0 && first_pat.match == 0) { 633 msg_warn("regexp map %s, line %d: $number found in negative match " 634 "replacement text: skipping this rule", mapname, lineno); 635 CREATE_MATCHOP_ERROR_RETURN(0); 636 } 637 if (prescan_context.max_sub > 0 && (dict_flags & DICT_FLAG_NO_REGSUB)) { 638 msg_warn("regexp map %s, line %d: " 639 "regular expression substitution is not allowed: " 640 "skipping this rule", mapname, lineno); 641 CREATE_MATCHOP_ERROR_RETURN(0); 642 } 643 if ((first_exp = dict_regexp_compile_pat(mapname, lineno, 644 &first_pat)) == 0) 645 CREATE_MATCHOP_ERROR_RETURN(0); 646 if (prescan_context.max_sub > first_exp->re_nsub) { 647 msg_warn("regexp map %s, line %d: out of range replacement index \"%d\": " 648 "skipping this rule", mapname, lineno, 649 (int) prescan_context.max_sub); 650 CREATE_MATCHOP_ERROR_RETURN(0); 651 } 652 if (second_pat.regexp != 0) { 653 second_pat.options |= REG_NOSUB; 654 if ((second_exp = dict_regexp_compile_pat(mapname, lineno, 655 &second_pat)) == 0) 656 CREATE_MATCHOP_ERROR_RETURN(0); 657 } else { 658 second_exp = 0; 659 } 660 match_rule = (DICT_REGEXP_MATCH_RULE *) 661 dict_regexp_rule_alloc(DICT_REGEXP_OP_MATCH, nesting, lineno, 662 sizeof(DICT_REGEXP_MATCH_RULE)); 663 match_rule->first_exp = first_exp; 664 match_rule->first_match = first_pat.match; 665 match_rule->max_sub = prescan_context.max_sub; 666 match_rule->second_exp = second_exp; 667 match_rule->second_match = second_pat.match; 668 if (prescan_context.literal) 669 match_rule->replacement = prescan_context.literal; 670 else 671 match_rule->replacement = mystrdup(p); 672 return ((DICT_REGEXP_RULE *) match_rule); 673 } 674 675 /* 676 * The IF operator takes one pattern but no replacement text. 677 */ 678 else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) { 679 DICT_REGEXP_PATTERN pattern; 680 regex_t *expr; 681 DICT_REGEXP_IF_RULE *if_rule; 682 683 p += 2; 684 while (*p && ISSPACE(*p)) 685 p++; 686 if (!dict_regexp_get_pat(mapname, lineno, &p, &pattern)) 687 return (0); 688 while (*p && ISSPACE(*p)) 689 ++p; 690 if (*p) { 691 msg_warn("regexp map %s, line %d: ignoring extra text after" 692 " IF statement: \"%s\"", mapname, lineno, p); 693 msg_warn("regexp map %s, line %d: do not prepend whitespace" 694 " to statements between IF and ENDIF", mapname, lineno); 695 } 696 if ((expr = dict_regexp_compile_pat(mapname, lineno, &pattern)) == 0) 697 return (0); 698 if_rule = (DICT_REGEXP_IF_RULE *) 699 dict_regexp_rule_alloc(DICT_REGEXP_OP_IF, nesting, lineno, 700 sizeof(DICT_REGEXP_IF_RULE)); 701 if_rule->expr = expr; 702 if_rule->match = pattern.match; 703 return ((DICT_REGEXP_RULE *) if_rule); 704 } 705 706 /* 707 * The ENDIF operator takes no patterns and no replacement text. 708 */ 709 else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) { 710 DICT_REGEXP_RULE *rule; 711 712 p += 5; 713 if (nesting == 0) { 714 msg_warn("regexp map %s, line %d: ignoring ENDIF without matching IF", 715 mapname, lineno); 716 return (0); 717 } 718 while (*p && ISSPACE(*p)) 719 ++p; 720 if (*p) 721 msg_warn("regexp map %s, line %d: ignoring extra text after ENDIF", 722 mapname, lineno); 723 rule = dict_regexp_rule_alloc(DICT_REGEXP_OP_ENDIF, nesting, lineno, 724 sizeof(DICT_REGEXP_RULE)); 725 return (rule); 726 } 727 728 /* 729 * Unrecognized input. 730 */ 731 else { 732 msg_warn("regexp map %s, line %d: ignoring unrecognized request", 733 mapname, lineno); 734 return (0); 735 } 736 } 737 738 /* dict_regexp_open - load and compile a file containing regular expressions */ 739 740 DICT *dict_regexp_open(const char *mapname, int open_flags, int dict_flags) 741 { 742 DICT_REGEXP *dict_regexp; 743 VSTREAM *map_fp = 0; 744 struct stat st; 745 VSTRING *line_buffer = 0; 746 DICT_REGEXP_RULE *rule; 747 DICT_REGEXP_RULE *last_rule = 0; 748 int lineno = 0; 749 size_t max_sub = 0; 750 int nesting = 0; 751 char *p; 752 753 /* 754 * Let the optimizer worry about eliminating redundant code. 755 */ 756 #define DICT_REGEXP_OPEN_RETURN(d) { \ 757 DICT *__d = (d); \ 758 if (line_buffer != 0) \ 759 vstring_free(line_buffer); \ 760 if (map_fp != 0) \ 761 vstream_fclose(map_fp); \ 762 return (__d); \ 763 } while (0) 764 765 /* 766 * Sanity checks. 767 */ 768 if (open_flags != O_RDONLY) 769 DICT_REGEXP_OPEN_RETURN(dict_surrogate(DICT_TYPE_REGEXP, 770 mapname, open_flags, dict_flags, 771 "%s:%s map requires O_RDONLY access mode", 772 DICT_TYPE_REGEXP, mapname)); 773 774 /* 775 * Open the configuration file. 776 */ 777 if ((map_fp = vstream_fopen(mapname, O_RDONLY, 0)) == 0) 778 DICT_REGEXP_OPEN_RETURN(dict_surrogate(DICT_TYPE_REGEXP, mapname, 779 open_flags, dict_flags, 780 "open %s: %m", mapname)); 781 if (fstat(vstream_fileno(map_fp), &st) < 0) 782 msg_fatal("fstat %s: %m", mapname); 783 784 line_buffer = vstring_alloc(100); 785 786 dict_regexp = (DICT_REGEXP *) dict_alloc(DICT_TYPE_REGEXP, mapname, 787 sizeof(*dict_regexp)); 788 dict_regexp->dict.lookup = dict_regexp_lookup; 789 dict_regexp->dict.close = dict_regexp_close; 790 dict_regexp->dict.flags = dict_flags | DICT_FLAG_PATTERN; 791 if (dict_flags & DICT_FLAG_FOLD_MUL) 792 dict_regexp->dict.fold_buf = vstring_alloc(10); 793 dict_regexp->head = 0; 794 dict_regexp->pmatch = 0; 795 dict_regexp->expansion_buf = 0; 796 dict_regexp->dict.owner.uid = st.st_uid; 797 dict_regexp->dict.owner.status = (st.st_uid != 0); 798 799 /* 800 * Parse the regexp table. 801 */ 802 while (readlline(line_buffer, map_fp, &lineno)) { 803 p = vstring_str(line_buffer); 804 trimblanks(p, 0)[0] = 0; 805 if (*p == 0) 806 continue; 807 rule = dict_regexp_parseline(mapname, lineno, p, nesting, dict_flags); 808 if (rule == 0) 809 continue; 810 if (rule->op == DICT_REGEXP_OP_MATCH) { 811 if (((DICT_REGEXP_MATCH_RULE *) rule)->max_sub > max_sub) 812 max_sub = ((DICT_REGEXP_MATCH_RULE *) rule)->max_sub; 813 } else if (rule->op == DICT_REGEXP_OP_IF) { 814 nesting++; 815 } else if (rule->op == DICT_REGEXP_OP_ENDIF) { 816 nesting--; 817 } 818 if (last_rule == 0) 819 dict_regexp->head = rule; 820 else 821 last_rule->next = rule; 822 last_rule = rule; 823 } 824 825 if (nesting) 826 msg_warn("regexp map %s, line %d: more IFs than ENDIFs", 827 mapname, lineno); 828 829 /* 830 * Allocate space for only as many matched substrings as used in the 831 * replacement text. 832 */ 833 if (max_sub > 0) 834 dict_regexp->pmatch = 835 (regmatch_t *) mymalloc(sizeof(regmatch_t) * (max_sub + 1)); 836 837 DICT_REGEXP_OPEN_RETURN(DICT_DEBUG (&dict_regexp->dict)); 838 } 839 840 #endif 841