1 /* $NetBSD: dict_pcre.c,v 1.2 2017/02/14 01:16:49 christos Exp $ */ 2 3 /*++ 4 /* NAME 5 /* dict_pcre 3 6 /* SUMMARY 7 /* dictionary manager interface to PCRE regular expression library 8 /* SYNOPSIS 9 /* #include <dict_pcre.h> 10 /* 11 /* DICT *dict_pcre_open(name, dummy, dict_flags) 12 /* const char *name; 13 /* int dummy; 14 /* int dict_flags; 15 /* DESCRIPTION 16 /* dict_pcre_open() opens the named file and compiles the contained 17 /* regular expressions. The result object can be used to match strings 18 /* against the table. 19 /* SEE ALSO 20 /* dict(3) generic dictionary manager 21 /* AUTHOR(S) 22 /* Andrew McNamara 23 /* andrewm@connect.com.au 24 /* connect.com.au Pty. Ltd. 25 /* Level 3, 213 Miller St 26 /* North Sydney, NSW, Australia 27 /* 28 /* Wietse Venema 29 /* IBM T.J. Watson Research 30 /* P.O. Box 704 31 /* Yorktown Heights, NY 10598, USA 32 /*--*/ 33 34 #include "sys_defs.h" 35 36 #ifdef HAS_PCRE 37 38 /* System library. */ 39 40 #include <sys/stat.h> 41 #include <stdio.h> /* sprintf() prototype */ 42 #include <stdlib.h> 43 #include <unistd.h> 44 #include <string.h> 45 #include <ctype.h> 46 47 #ifdef STRCASECMP_IN_STRINGS_H 48 #include <strings.h> 49 #endif 50 51 /* Utility library. */ 52 53 #include "mymalloc.h" 54 #include "msg.h" 55 #include "safe.h" 56 #include "vstream.h" 57 #include "vstring.h" 58 #include "stringops.h" 59 #include "readlline.h" 60 #include "dict.h" 61 #include "dict_pcre.h" 62 #include "mac_parse.h" 63 #include "pcre.h" 64 #include "warn_stat.h" 65 66 /* 67 * Backwards compatibility. 68 */ 69 #ifdef PCRE_STUDY_JIT_COMPILE 70 #define DICT_PCRE_FREE_STUDY(x) pcre_free_study(x) 71 #else 72 #define DICT_PCRE_FREE_STUDY(x) pcre_free((char *) (x)) 73 #endif 74 75 /* 76 * Support for IF/ENDIF based on an idea by Bert Driehuis. 77 */ 78 #define DICT_PCRE_OP_MATCH 1 /* Match this regexp */ 79 #define DICT_PCRE_OP_IF 2 /* Increase if/endif nesting on match */ 80 #define DICT_PCRE_OP_ENDIF 3 /* Decrease if/endif nesting on match */ 81 82 /* 83 * Max strings captured by regexp - essentially the max number of (..) 84 */ 85 #define PCRE_MAX_CAPTURE 99 86 87 /* 88 * Regular expression before and after compilation. 89 */ 90 typedef struct { 91 char *regexp; /* regular expression */ 92 int options; /* options */ 93 int match; /* positive or negative match */ 94 } DICT_PCRE_REGEXP; 95 96 typedef struct { 97 pcre *pattern; /* the compiled pattern */ 98 pcre_extra *hints; /* hints to speed pattern execution */ 99 } DICT_PCRE_ENGINE; 100 101 /* 102 * Compiled generic rule, and subclasses that derive from it. 103 */ 104 typedef struct DICT_PCRE_RULE { 105 int op; /* DICT_PCRE_OP_MATCH/IF/ENDIF */ 106 int nesting; /* level of IF/ENDIF nesting */ 107 int lineno; /* source file line number */ 108 struct DICT_PCRE_RULE *next; /* next rule in dict */ 109 } DICT_PCRE_RULE; 110 111 typedef struct { 112 DICT_PCRE_RULE rule; /* generic part */ 113 pcre *pattern; /* compiled pattern */ 114 pcre_extra *hints; /* hints to speed pattern execution */ 115 char *replacement; /* replacement string */ 116 int match; /* positive or negative match */ 117 size_t max_sub; /* largest $number in replacement */ 118 } DICT_PCRE_MATCH_RULE; 119 120 typedef struct { 121 DICT_PCRE_RULE rule; /* generic members */ 122 pcre *pattern; /* compiled pattern */ 123 pcre_extra *hints; /* hints to speed pattern execution */ 124 int match; /* positive or negative match */ 125 } DICT_PCRE_IF_RULE; 126 127 /* 128 * PCRE map. 129 */ 130 typedef struct { 131 DICT dict; /* generic members */ 132 DICT_PCRE_RULE *head; 133 VSTRING *expansion_buf; /* lookup result */ 134 } DICT_PCRE; 135 136 static int dict_pcre_init = 0; /* flag need to init pcre library */ 137 138 /* 139 * Context for $number expansion callback. 140 */ 141 typedef struct { 142 DICT_PCRE *dict_pcre; /* the dictionary handle */ 143 DICT_PCRE_MATCH_RULE *match_rule; /* the rule we matched */ 144 const char *lookup_string; /* string against which we match */ 145 int offsets[PCRE_MAX_CAPTURE * 3]; /* Cut substrings */ 146 int matches; /* Count of cuts */ 147 } DICT_PCRE_EXPAND_CONTEXT; 148 149 /* 150 * Context for $number pre-scan callback. 151 */ 152 typedef struct { 153 const char *mapname; /* name of regexp map */ 154 int lineno; /* where in file */ 155 size_t max_sub; /* Largest $n seen */ 156 char *literal; /* constant result, $$ -> $ */ 157 } DICT_PCRE_PRESCAN_CONTEXT; 158 159 /* 160 * Compatibility. 161 */ 162 #ifndef MAC_PARSE_OK 163 #define MAC_PARSE_OK 0 164 #endif 165 166 /* 167 * Macros to make dense code more accessible. 168 */ 169 #define NULL_STARTOFFSET (0) 170 #define NULL_EXEC_OPTIONS (0) 171 #define NULL_OVECTOR ((int *) 0) 172 #define NULL_OVECTOR_LENGTH (0) 173 174 /* dict_pcre_expand - replace $number with matched text */ 175 176 static int dict_pcre_expand(int type, VSTRING *buf, void *ptr) 177 { 178 DICT_PCRE_EXPAND_CONTEXT *ctxt = (DICT_PCRE_EXPAND_CONTEXT *) ptr; 179 DICT_PCRE_MATCH_RULE *match_rule = ctxt->match_rule; 180 DICT_PCRE *dict_pcre = ctxt->dict_pcre; 181 const char *pp; 182 int n; 183 int ret; 184 185 /* 186 * Replace $0-${99} with strings cut from matched text. 187 */ 188 if (type == MAC_PARSE_VARNAME) { 189 n = atoi(vstring_str(buf)); 190 ret = pcre_get_substring(ctxt->lookup_string, ctxt->offsets, 191 ctxt->matches, n, &pp); 192 if (ret < 0) { 193 if (ret == PCRE_ERROR_NOSUBSTRING) 194 return (MAC_PARSE_UNDEF); 195 else 196 msg_fatal("pcre map %s, line %d: pcre_get_substring error: %d", 197 dict_pcre->dict.name, match_rule->rule.lineno, ret); 198 } 199 if (*pp == 0) { 200 myfree((void *) pp); 201 return (MAC_PARSE_UNDEF); 202 } 203 vstring_strcat(dict_pcre->expansion_buf, pp); 204 myfree((void *) pp); 205 return (MAC_PARSE_OK); 206 } 207 208 /* 209 * Straight text - duplicate with no substitution. 210 */ 211 else { 212 vstring_strcat(dict_pcre->expansion_buf, vstring_str(buf)); 213 return (MAC_PARSE_OK); 214 } 215 } 216 217 /* dict_pcre_exec_error - report matching error */ 218 219 static void dict_pcre_exec_error(const char *mapname, int lineno, int errval) 220 { 221 switch (errval) { 222 case 0: 223 msg_warn("pcre map %s, line %d: too many (...)", 224 mapname, lineno); 225 return; 226 case PCRE_ERROR_NULL: 227 case PCRE_ERROR_BADOPTION: 228 msg_warn("pcre map %s, line %d: bad args to re_exec", 229 mapname, lineno); 230 return; 231 case PCRE_ERROR_BADMAGIC: 232 case PCRE_ERROR_UNKNOWN_NODE: 233 msg_warn("pcre map %s, line %d: corrupt compiled regexp", 234 mapname, lineno); 235 return; 236 #ifdef PCRE_ERROR_NOMEMORY 237 case PCRE_ERROR_NOMEMORY: 238 msg_warn("pcre map %s, line %d: out of memory", 239 mapname, lineno); 240 return; 241 #endif 242 #ifdef PCRE_ERROR_MATCHLIMIT 243 case PCRE_ERROR_MATCHLIMIT: 244 msg_warn("pcre map %s, line %d: backtracking limit exceeded", 245 mapname, lineno); 246 return; 247 #endif 248 #ifdef PCRE_ERROR_BADUTF8 249 case PCRE_ERROR_BADUTF8: 250 msg_warn("pcre map %s, line %d: bad UTF-8 sequence in search string", 251 mapname, lineno); 252 return; 253 #endif 254 #ifdef PCRE_ERROR_BADUTF8_OFFSET 255 case PCRE_ERROR_BADUTF8_OFFSET: 256 msg_warn("pcre map %s, line %d: bad UTF-8 start offset in search string", 257 mapname, lineno); 258 return; 259 #endif 260 default: 261 msg_warn("pcre map %s, line %d: unknown re_exec error: %d", 262 mapname, lineno, errval); 263 return; 264 } 265 } 266 267 /* dict_pcre_lookup - match string and perform optional substitution */ 268 269 static const char *dict_pcre_lookup(DICT *dict, const char *lookup_string) 270 { 271 DICT_PCRE *dict_pcre = (DICT_PCRE *) dict; 272 DICT_PCRE_RULE *rule; 273 DICT_PCRE_IF_RULE *if_rule; 274 DICT_PCRE_MATCH_RULE *match_rule; 275 int lookup_len = strlen(lookup_string); 276 DICT_PCRE_EXPAND_CONTEXT ctxt; 277 int nesting = 0; 278 279 dict->error = 0; 280 281 if (msg_verbose) 282 msg_info("dict_pcre_lookup: %s: %s", dict->name, lookup_string); 283 284 /* 285 * Optionally fold the key. 286 */ 287 if (dict->flags & DICT_FLAG_FOLD_MUL) { 288 if (dict->fold_buf == 0) 289 dict->fold_buf = vstring_alloc(10); 290 vstring_strcpy(dict->fold_buf, lookup_string); 291 lookup_string = lowercase(vstring_str(dict->fold_buf)); 292 } 293 for (rule = dict_pcre->head; rule; rule = rule->next) { 294 295 /* 296 * Skip rules inside failed IF/ENDIF. 297 */ 298 if (nesting < rule->nesting) 299 continue; 300 301 switch (rule->op) { 302 303 /* 304 * Search for a matching expression. 305 */ 306 case DICT_PCRE_OP_MATCH: 307 match_rule = (DICT_PCRE_MATCH_RULE *) rule; 308 ctxt.matches = pcre_exec(match_rule->pattern, match_rule->hints, 309 lookup_string, lookup_len, 310 NULL_STARTOFFSET, NULL_EXEC_OPTIONS, 311 ctxt.offsets, PCRE_MAX_CAPTURE * 3); 312 313 if (ctxt.matches > 0) { 314 if (!match_rule->match) 315 continue; /* Negative rule matched */ 316 } else if (ctxt.matches == PCRE_ERROR_NOMATCH) { 317 if (match_rule->match) 318 continue; /* Positive rule did not 319 * match */ 320 } else { 321 dict_pcre_exec_error(dict->name, rule->lineno, ctxt.matches); 322 continue; /* pcre_exec failed */ 323 } 324 325 /* 326 * Skip $number substitutions when the replacement text contains 327 * no $number strings, as learned during the compile time 328 * pre-scan. The pre-scan already replaced $$ by $. 329 */ 330 if (match_rule->max_sub == 0) 331 return match_rule->replacement; 332 333 /* 334 * We've got a match. Perform substitution on replacement string. 335 */ 336 if (dict_pcre->expansion_buf == 0) 337 dict_pcre->expansion_buf = vstring_alloc(10); 338 VSTRING_RESET(dict_pcre->expansion_buf); 339 ctxt.dict_pcre = dict_pcre; 340 ctxt.match_rule = match_rule; 341 ctxt.lookup_string = lookup_string; 342 343 if (mac_parse(match_rule->replacement, dict_pcre_expand, 344 (void *) &ctxt) & MAC_PARSE_ERROR) 345 msg_fatal("pcre map %s, line %d: bad replacement syntax", 346 dict->name, rule->lineno); 347 348 VSTRING_TERMINATE(dict_pcre->expansion_buf); 349 return (vstring_str(dict_pcre->expansion_buf)); 350 351 /* 352 * Conditional. XXX We provide space for matched substring info 353 * because PCRE uses part of it as workspace for backtracking. 354 * PCRE will allocate memory if it runs out of backtracking 355 * storage. 356 */ 357 case DICT_PCRE_OP_IF: 358 if_rule = (DICT_PCRE_IF_RULE *) rule; 359 ctxt.matches = pcre_exec(if_rule->pattern, if_rule->hints, 360 lookup_string, lookup_len, 361 NULL_STARTOFFSET, NULL_EXEC_OPTIONS, 362 ctxt.offsets, PCRE_MAX_CAPTURE * 3); 363 364 if (ctxt.matches > 0) { 365 if (!if_rule->match) 366 continue; /* Negative rule matched */ 367 } else if (ctxt.matches == PCRE_ERROR_NOMATCH) { 368 if (if_rule->match) 369 continue; /* Positive rule did not 370 * match */ 371 } else { 372 dict_pcre_exec_error(dict->name, rule->lineno, ctxt.matches); 373 continue; /* pcre_exec failed */ 374 } 375 nesting++; 376 continue; 377 378 /* 379 * ENDIF after successful IF. 380 */ 381 case DICT_PCRE_OP_ENDIF: 382 nesting--; 383 continue; 384 385 default: 386 msg_panic("dict_pcre_lookup: impossible operation %d", rule->op); 387 } 388 } 389 return (0); 390 } 391 392 /* dict_pcre_close - close pcre dictionary */ 393 394 static void dict_pcre_close(DICT *dict) 395 { 396 DICT_PCRE *dict_pcre = (DICT_PCRE *) dict; 397 DICT_PCRE_RULE *rule; 398 DICT_PCRE_RULE *next; 399 DICT_PCRE_MATCH_RULE *match_rule; 400 DICT_PCRE_IF_RULE *if_rule; 401 402 for (rule = dict_pcre->head; rule; rule = next) { 403 next = rule->next; 404 switch (rule->op) { 405 case DICT_PCRE_OP_MATCH: 406 match_rule = (DICT_PCRE_MATCH_RULE *) rule; 407 if (match_rule->pattern) 408 myfree((void *) match_rule->pattern); 409 if (match_rule->hints) 410 DICT_PCRE_FREE_STUDY(match_rule->hints); 411 if (match_rule->replacement) 412 myfree((void *) match_rule->replacement); 413 break; 414 case DICT_PCRE_OP_IF: 415 if_rule = (DICT_PCRE_IF_RULE *) rule; 416 if (if_rule->pattern) 417 myfree((void *) if_rule->pattern); 418 if (if_rule->hints) 419 DICT_PCRE_FREE_STUDY(if_rule->hints); 420 break; 421 case DICT_PCRE_OP_ENDIF: 422 break; 423 default: 424 msg_panic("dict_pcre_close: unknown operation %d", rule->op); 425 } 426 myfree((void *) rule); 427 } 428 if (dict_pcre->expansion_buf) 429 vstring_free(dict_pcre->expansion_buf); 430 if (dict->fold_buf) 431 vstring_free(dict->fold_buf); 432 dict_free(dict); 433 } 434 435 /* dict_pcre_get_pattern - extract pattern from rule */ 436 437 static int dict_pcre_get_pattern(const char *mapname, int lineno, char **bufp, 438 DICT_PCRE_REGEXP *pattern) 439 { 440 char *p = *bufp; 441 char re_delimiter; 442 443 /* 444 * Process negation operators. 445 */ 446 pattern->match = 1; 447 while (*p == '!') { 448 pattern->match = !pattern->match; 449 p++; 450 } 451 452 /* 453 * Grr...aceful handling of whitespace after '!'. 454 */ 455 while (*p && ISSPACE(*p)) 456 p++; 457 if (*p == 0) { 458 msg_warn("pcre map %s, line %d: no regexp: skipping this rule", 459 mapname, lineno); 460 return (0); 461 } 462 re_delimiter = *p++; 463 pattern->regexp = p; 464 465 /* 466 * Search for second delimiter, handling backslash escape. 467 */ 468 while (*p) { 469 if (*p == '\\') { 470 ++p; 471 if (*p == 0) 472 break; 473 } else if (*p == re_delimiter) 474 break; 475 ++p; 476 } 477 478 if (!*p) { 479 msg_warn("pcre map %s, line %d: no closing regexp delimiter \"%c\": " 480 "ignoring this rule", mapname, lineno, re_delimiter); 481 return (0); 482 } 483 *p++ = 0; /* Null term the regexp */ 484 485 /* 486 * Parse any regexp options. 487 */ 488 pattern->options = PCRE_CASELESS | PCRE_DOTALL; 489 while (*p && !ISSPACE(*p)) { 490 switch (*p) { 491 case 'i': 492 pattern->options ^= PCRE_CASELESS; 493 break; 494 case 'm': 495 pattern->options ^= PCRE_MULTILINE; 496 break; 497 case 's': 498 pattern->options ^= PCRE_DOTALL; 499 break; 500 case 'x': 501 pattern->options ^= PCRE_EXTENDED; 502 break; 503 case 'A': 504 pattern->options ^= PCRE_ANCHORED; 505 break; 506 case 'E': 507 pattern->options ^= PCRE_DOLLAR_ENDONLY; 508 break; 509 case 'U': 510 pattern->options ^= PCRE_UNGREEDY; 511 break; 512 case 'X': 513 pattern->options ^= PCRE_EXTRA; 514 break; 515 default: 516 msg_warn("pcre map %s, line %d: unknown regexp option \"%c\": " 517 "skipping this rule", mapname, lineno, *p); 518 return (0); 519 } 520 ++p; 521 } 522 *bufp = p; 523 return (1); 524 } 525 526 /* dict_pcre_prescan - sanity check $number instances in replacement text */ 527 528 static int dict_pcre_prescan(int type, VSTRING *buf, void *context) 529 { 530 DICT_PCRE_PRESCAN_CONTEXT *ctxt = (DICT_PCRE_PRESCAN_CONTEXT *) context; 531 size_t n; 532 533 /* 534 * Keep a copy of literal text (with $$ already replaced by $) if and 535 * only if the replacement text contains no $number expression. This way 536 * we can avoid having to scan the replacement text at lookup time. 537 */ 538 if (type == MAC_PARSE_VARNAME) { 539 if (ctxt->literal) { 540 myfree(ctxt->literal); 541 ctxt->literal = 0; 542 } 543 if (!alldig(vstring_str(buf))) { 544 msg_warn("pcre map %s, line %d: non-numeric replacement index \"%s\"", 545 ctxt->mapname, ctxt->lineno, vstring_str(buf)); 546 return (MAC_PARSE_ERROR); 547 } 548 n = atoi(vstring_str(buf)); 549 if (n < 1) { 550 msg_warn("pcre map %s, line %d: out of range replacement index \"%s\"", 551 ctxt->mapname, ctxt->lineno, vstring_str(buf)); 552 return (MAC_PARSE_ERROR); 553 } 554 if (n > ctxt->max_sub) 555 ctxt->max_sub = n; 556 } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) { 557 if (ctxt->literal) 558 msg_panic("pcre map %s, line %d: multiple literals but no $number", 559 ctxt->mapname, ctxt->lineno); 560 ctxt->literal = mystrdup(vstring_str(buf)); 561 } 562 return (MAC_PARSE_OK); 563 } 564 565 /* dict_pcre_compile - compile pattern */ 566 567 static int dict_pcre_compile(const char *mapname, int lineno, 568 DICT_PCRE_REGEXP *pattern, 569 DICT_PCRE_ENGINE *engine) 570 { 571 const char *error; 572 int errptr; 573 574 engine->pattern = pcre_compile(pattern->regexp, pattern->options, 575 &error, &errptr, NULL); 576 if (engine->pattern == 0) { 577 msg_warn("pcre map %s, line %d: error in regex at offset %d: %s", 578 mapname, lineno, errptr, error); 579 return (0); 580 } 581 engine->hints = pcre_study(engine->pattern, 0, &error); 582 if (error != 0) { 583 msg_warn("pcre map %s, line %d: error while studying regex: %s", 584 mapname, lineno, error); 585 myfree((void *) engine->pattern); 586 return (0); 587 } 588 return (1); 589 } 590 591 /* dict_pcre_rule_alloc - fill in a generic rule structure */ 592 593 static DICT_PCRE_RULE *dict_pcre_rule_alloc(int op, int nesting, 594 int lineno, 595 size_t size) 596 { 597 DICT_PCRE_RULE *rule; 598 599 rule = (DICT_PCRE_RULE *) mymalloc(size); 600 rule->op = op; 601 rule->nesting = nesting; 602 rule->lineno = lineno; 603 rule->next = 0; 604 605 return (rule); 606 } 607 608 /* dict_pcre_parse_rule - parse and compile one rule */ 609 610 static DICT_PCRE_RULE *dict_pcre_parse_rule(const char *mapname, int lineno, 611 char *line, int nesting, 612 int dict_flags) 613 { 614 char *p; 615 int actual_sub; 616 617 p = line; 618 619 /* 620 * An ordinary match rule takes one pattern and replacement text. 621 */ 622 if (!ISALNUM(*p)) { 623 DICT_PCRE_REGEXP regexp; 624 DICT_PCRE_ENGINE engine; 625 DICT_PCRE_PRESCAN_CONTEXT prescan_context; 626 DICT_PCRE_MATCH_RULE *match_rule; 627 628 /* 629 * Get the pattern string and options. 630 */ 631 if (dict_pcre_get_pattern(mapname, lineno, &p, ®exp) == 0) 632 return (0); 633 634 /* 635 * Get the replacement text. 636 */ 637 while (*p && ISSPACE(*p)) 638 ++p; 639 if (!*p) 640 msg_warn("%s, line %d: no replacement text: using empty string", 641 mapname, lineno); 642 643 /* 644 * Sanity check the $number instances in the replacement text. 645 */ 646 prescan_context.mapname = mapname; 647 prescan_context.lineno = lineno; 648 prescan_context.max_sub = 0; 649 prescan_context.literal = 0; 650 651 /* 652 * The optimizer will eliminate code duplication and/or dead code. 653 */ 654 #define CREATE_MATCHOP_ERROR_RETURN(rval) do { \ 655 if (prescan_context.literal) \ 656 myfree(prescan_context.literal); \ 657 return (rval); \ 658 } while (0) 659 660 if (mac_parse(p, dict_pcre_prescan, (void *) &prescan_context) 661 & MAC_PARSE_ERROR) { 662 msg_warn("pcre map %s, line %d: bad replacement syntax: " 663 "skipping this rule", mapname, lineno); 664 CREATE_MATCHOP_ERROR_RETURN(0); 665 } 666 667 /* 668 * Substring replacement not possible with negative regexps. 669 */ 670 if (prescan_context.max_sub > 0 && regexp.match == 0) { 671 msg_warn("pcre map %s, line %d: $number found in negative match " 672 "replacement text: skipping this rule", mapname, lineno); 673 CREATE_MATCHOP_ERROR_RETURN(0); 674 } 675 if (prescan_context.max_sub > 0 && (dict_flags & DICT_FLAG_NO_REGSUB)) { 676 msg_warn("pcre map %s, line %d: " 677 "regular expression substitution is not allowed: " 678 "skipping this rule", mapname, lineno); 679 CREATE_MATCHOP_ERROR_RETURN(0); 680 } 681 682 /* 683 * Compile the pattern. 684 */ 685 if (dict_pcre_compile(mapname, lineno, ®exp, &engine) == 0) 686 CREATE_MATCHOP_ERROR_RETURN(0); 687 #ifdef PCRE_INFO_CAPTURECOUNT 688 if (pcre_fullinfo(engine.pattern, engine.hints, 689 PCRE_INFO_CAPTURECOUNT, 690 (void *) &actual_sub) != 0) 691 msg_panic("pcre map %s, line %d: pcre_fullinfo failed", 692 mapname, lineno); 693 if (prescan_context.max_sub > actual_sub) { 694 msg_warn("pcre map %s, line %d: out of range replacement index \"%d\": " 695 "skipping this rule", mapname, lineno, 696 (int) prescan_context.max_sub); 697 if (engine.pattern) 698 myfree((void *) engine.pattern); 699 if (engine.hints) 700 DICT_PCRE_FREE_STUDY(engine.hints); 701 CREATE_MATCHOP_ERROR_RETURN(0); 702 } 703 #endif 704 705 /* 706 * Save the result. 707 */ 708 match_rule = (DICT_PCRE_MATCH_RULE *) 709 dict_pcre_rule_alloc(DICT_PCRE_OP_MATCH, nesting, lineno, 710 sizeof(DICT_PCRE_MATCH_RULE)); 711 match_rule->match = regexp.match; 712 match_rule->max_sub = prescan_context.max_sub; 713 if (prescan_context.literal) 714 match_rule->replacement = prescan_context.literal; 715 else 716 match_rule->replacement = mystrdup(p); 717 match_rule->pattern = engine.pattern; 718 match_rule->hints = engine.hints; 719 return ((DICT_PCRE_RULE *) match_rule); 720 } 721 722 /* 723 * The IF operator takes one pattern but no replacement text. 724 */ 725 else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) { 726 DICT_PCRE_REGEXP regexp; 727 DICT_PCRE_ENGINE engine; 728 DICT_PCRE_IF_RULE *if_rule; 729 730 p += 2; 731 732 /* 733 * Get the pattern. 734 */ 735 while (*p && ISSPACE(*p)) 736 p++; 737 if (!dict_pcre_get_pattern(mapname, lineno, &p, ®exp)) 738 return (0); 739 740 /* 741 * Warn about out-of-place text. 742 */ 743 while (*p && ISSPACE(*p)) 744 ++p; 745 if (*p) { 746 msg_warn("pcre map %s, line %d: ignoring extra text after " 747 "IF statement: \"%s\"", mapname, lineno, p); 748 msg_warn("pcre map %s, line %d: do not prepend whitespace" 749 " to statements between IF and ENDIF", mapname, lineno); 750 } 751 752 /* 753 * Compile the pattern. 754 */ 755 if (dict_pcre_compile(mapname, lineno, ®exp, &engine) == 0) 756 return (0); 757 758 /* 759 * Save the result. 760 */ 761 if_rule = (DICT_PCRE_IF_RULE *) 762 dict_pcre_rule_alloc(DICT_PCRE_OP_IF, nesting, lineno, 763 sizeof(DICT_PCRE_IF_RULE)); 764 if_rule->match = regexp.match; 765 if_rule->pattern = engine.pattern; 766 if_rule->hints = engine.hints; 767 return ((DICT_PCRE_RULE *) if_rule); 768 } 769 770 /* 771 * The ENDIF operator takes no patterns and no replacement text. 772 */ 773 else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) { 774 DICT_PCRE_RULE *rule; 775 776 p += 5; 777 778 /* 779 * Warn about out-of-place ENDIFs. 780 */ 781 if (nesting == 0) { 782 msg_warn("pcre map %s, line %d: ignoring ENDIF without matching IF", 783 mapname, lineno); 784 return (0); 785 } 786 787 /* 788 * Warn about out-of-place text. 789 */ 790 while (*p && ISSPACE(*p)) 791 ++p; 792 if (*p) 793 msg_warn("pcre map %s, line %d: ignoring extra text after ENDIF", 794 mapname, lineno); 795 796 /* 797 * Save the result. 798 */ 799 rule = dict_pcre_rule_alloc(DICT_PCRE_OP_ENDIF, nesting, lineno, 800 sizeof(DICT_PCRE_RULE)); 801 return (rule); 802 } 803 804 /* 805 * Unrecognized input. 806 */ 807 else { 808 msg_warn("pcre map %s, line %d: ignoring unrecognized request", 809 mapname, lineno); 810 return (0); 811 } 812 } 813 814 /* dict_pcre_open - load and compile a file containing regular expressions */ 815 816 DICT *dict_pcre_open(const char *mapname, int open_flags, int dict_flags) 817 { 818 DICT_PCRE *dict_pcre; 819 VSTREAM *map_fp = 0; 820 struct stat st; 821 VSTRING *line_buffer = 0; 822 DICT_PCRE_RULE *last_rule = 0; 823 DICT_PCRE_RULE *rule; 824 int last_line = 0; 825 int lineno; 826 int nesting = 0; 827 char *p; 828 829 /* 830 * Let the optimizer worry about eliminating redundant code. 831 */ 832 #define DICT_PCRE_OPEN_RETURN(d) do { \ 833 DICT *__d = (d); \ 834 if (map_fp != 0) \ 835 vstream_fclose(map_fp); \ 836 if (line_buffer != 0) \ 837 vstring_free(line_buffer); \ 838 return (__d); \ 839 } while (0) 840 841 /* 842 * Sanity checks. 843 */ 844 if (open_flags != O_RDONLY) 845 DICT_PCRE_OPEN_RETURN(dict_surrogate(DICT_TYPE_PCRE, mapname, 846 open_flags, dict_flags, 847 "%s:%s map requires O_RDONLY access mode", 848 DICT_TYPE_PCRE, mapname)); 849 850 /* 851 * Open the configuration file. 852 */ 853 if ((map_fp = vstream_fopen(mapname, O_RDONLY, 0)) == 0) 854 DICT_PCRE_OPEN_RETURN(dict_surrogate(DICT_TYPE_PCRE, mapname, 855 open_flags, dict_flags, 856 "open %s: %m", mapname)); 857 if (fstat(vstream_fileno(map_fp), &st) < 0) 858 msg_fatal("fstat %s: %m", mapname); 859 860 line_buffer = vstring_alloc(100); 861 862 dict_pcre = (DICT_PCRE *) dict_alloc(DICT_TYPE_PCRE, mapname, 863 sizeof(*dict_pcre)); 864 dict_pcre->dict.lookup = dict_pcre_lookup; 865 dict_pcre->dict.close = dict_pcre_close; 866 dict_pcre->dict.flags = dict_flags | DICT_FLAG_PATTERN; 867 if (dict_flags & DICT_FLAG_FOLD_MUL) 868 dict_pcre->dict.fold_buf = vstring_alloc(10); 869 dict_pcre->head = 0; 870 dict_pcre->expansion_buf = 0; 871 872 if (dict_pcre_init == 0) { 873 pcre_malloc = (void *(*) (size_t)) mymalloc; 874 pcre_free = (void (*) (void *)) myfree; 875 dict_pcre_init = 1; 876 } 877 dict_pcre->dict.owner.uid = st.st_uid; 878 dict_pcre->dict.owner.status = (st.st_uid != 0); 879 880 /* 881 * Parse the pcre table. 882 */ 883 while (readllines(line_buffer, map_fp, &last_line, &lineno)) { 884 p = vstring_str(line_buffer); 885 trimblanks(p, 0)[0] = 0; /* Trim space at end */ 886 if (*p == 0) 887 continue; 888 rule = dict_pcre_parse_rule(mapname, lineno, p, nesting, dict_flags); 889 if (rule == 0) 890 continue; 891 if (rule->op == DICT_PCRE_OP_IF) { 892 nesting++; 893 } else if (rule->op == DICT_PCRE_OP_ENDIF) { 894 nesting--; 895 } 896 if (last_rule == 0) 897 dict_pcre->head = rule; 898 else 899 last_rule->next = rule; 900 last_rule = rule; 901 } 902 903 if (nesting) 904 msg_warn("pcre map %s, line %d: more IFs than ENDIFs", 905 mapname, lineno); 906 907 DICT_PCRE_OPEN_RETURN(DICT_DEBUG (&dict_pcre->dict)); 908 } 909 910 #endif /* HAS_PCRE */ 911