1 /* $NetBSD: dict_pcre.c,v 1.1.1.3 2014/01/18 17:04:24 tron Exp $ */ 2 3 /*++ 4 /* NAME 5 /* dict_pcre 3 6 /* SUMMARY 7 /* dictionary manager interface to PCRE regular expression library 8 /* SYNOPSIS 9 /* #include <dict_pcre.h> 10 /* 11 /* DICT *dict_pcre_open(name, dummy, dict_flags) 12 /* const char *name; 13 /* int dummy; 14 /* int dict_flags; 15 /* DESCRIPTION 16 /* dict_pcre_open() opens the named file and compiles the contained 17 /* regular expressions. The result object can be used to match strings 18 /* against the table. 19 /* SEE ALSO 20 /* dict(3) generic dictionary manager 21 /* AUTHOR(S) 22 /* Andrew McNamara 23 /* andrewm@connect.com.au 24 /* connect.com.au Pty. Ltd. 25 /* Level 3, 213 Miller St 26 /* North Sydney, NSW, Australia 27 /* 28 /* Wietse Venema 29 /* IBM T.J. Watson Research 30 /* P.O. Box 704 31 /* Yorktown Heights, NY 10598, USA 32 /*--*/ 33 34 #include "sys_defs.h" 35 36 #ifdef HAS_PCRE 37 38 /* System library. */ 39 40 #include <sys/stat.h> 41 #include <stdio.h> /* sprintf() prototype */ 42 #include <stdlib.h> 43 #include <unistd.h> 44 #include <string.h> 45 #include <ctype.h> 46 47 #ifdef STRCASECMP_IN_STRINGS_H 48 #include <strings.h> 49 #endif 50 51 /* Utility library. */ 52 53 #include "mymalloc.h" 54 #include "msg.h" 55 #include "safe.h" 56 #include "vstream.h" 57 #include "vstring.h" 58 #include "stringops.h" 59 #include "readlline.h" 60 #include "dict.h" 61 #include "dict_pcre.h" 62 #include "mac_parse.h" 63 #include "pcre.h" 64 #include "warn_stat.h" 65 66 /* 67 * Backwards compatibility. 68 */ 69 #ifdef PCRE_STUDY_JIT_COMPILE 70 #define DICT_PCRE_FREE_STUDY(x) pcre_free_study(x) 71 #else 72 #define DICT_PCRE_FREE_STUDY(x) pcre_free((char *) (x)) 73 #endif 74 75 /* 76 * Support for IF/ENDIF based on an idea by Bert Driehuis. 77 */ 78 #define DICT_PCRE_OP_MATCH 1 /* Match this regexp */ 79 #define DICT_PCRE_OP_IF 2 /* Increase if/endif nesting on match */ 80 #define DICT_PCRE_OP_ENDIF 3 /* Decrease if/endif nesting on match */ 81 82 /* 83 * Max strings captured by regexp - essentially the max number of (..) 84 */ 85 #define PCRE_MAX_CAPTURE 99 86 87 /* 88 * Regular expression before and after compilation. 89 */ 90 typedef struct { 91 char *regexp; /* regular expression */ 92 int options; /* options */ 93 int match; /* positive or negative match */ 94 } DICT_PCRE_REGEXP; 95 96 typedef struct { 97 pcre *pattern; /* the compiled pattern */ 98 pcre_extra *hints; /* hints to speed pattern execution */ 99 } DICT_PCRE_ENGINE; 100 101 /* 102 * Compiled generic rule, and subclasses that derive from it. 103 */ 104 typedef struct DICT_PCRE_RULE { 105 int op; /* DICT_PCRE_OP_MATCH/IF/ENDIF */ 106 int nesting; /* level of IF/ENDIF nesting */ 107 int lineno; /* source file line number */ 108 struct DICT_PCRE_RULE *next; /* next rule in dict */ 109 } DICT_PCRE_RULE; 110 111 typedef struct { 112 DICT_PCRE_RULE rule; /* generic part */ 113 pcre *pattern; /* compiled pattern */ 114 pcre_extra *hints; /* hints to speed pattern execution */ 115 char *replacement; /* replacement string */ 116 int match; /* positive or negative match */ 117 size_t max_sub; /* largest $number in replacement */ 118 } DICT_PCRE_MATCH_RULE; 119 120 typedef struct { 121 DICT_PCRE_RULE rule; /* generic members */ 122 pcre *pattern; /* compiled pattern */ 123 pcre_extra *hints; /* hints to speed pattern execution */ 124 int match; /* positive or negative match */ 125 } DICT_PCRE_IF_RULE; 126 127 /* 128 * PCRE map. 129 */ 130 typedef struct { 131 DICT dict; /* generic members */ 132 DICT_PCRE_RULE *head; 133 VSTRING *expansion_buf; /* lookup result */ 134 } DICT_PCRE; 135 136 static int dict_pcre_init = 0; /* flag need to init pcre library */ 137 138 /* 139 * Context for $number expansion callback. 140 */ 141 typedef struct { 142 DICT_PCRE *dict_pcre; /* the dictionary handle */ 143 DICT_PCRE_MATCH_RULE *match_rule; /* the rule we matched */ 144 const char *lookup_string; /* string against which we match */ 145 int offsets[PCRE_MAX_CAPTURE * 3]; /* Cut substrings */ 146 int matches; /* Count of cuts */ 147 } DICT_PCRE_EXPAND_CONTEXT; 148 149 /* 150 * Context for $number pre-scan callback. 151 */ 152 typedef struct { 153 const char *mapname; /* name of regexp map */ 154 int lineno; /* where in file */ 155 size_t max_sub; /* Largest $n seen */ 156 char *literal; /* constant result, $$ -> $ */ 157 } DICT_PCRE_PRESCAN_CONTEXT; 158 159 /* 160 * Compatibility. 161 */ 162 #ifndef MAC_PARSE_OK 163 #define MAC_PARSE_OK 0 164 #endif 165 166 /* 167 * Macros to make dense code more accessible. 168 */ 169 #define NULL_STARTOFFSET (0) 170 #define NULL_EXEC_OPTIONS (0) 171 #define NULL_OVECTOR ((int *) 0) 172 #define NULL_OVECTOR_LENGTH (0) 173 174 /* dict_pcre_expand - replace $number with matched text */ 175 176 static int dict_pcre_expand(int type, VSTRING *buf, char *ptr) 177 { 178 DICT_PCRE_EXPAND_CONTEXT *ctxt = (DICT_PCRE_EXPAND_CONTEXT *) ptr; 179 DICT_PCRE_MATCH_RULE *match_rule = ctxt->match_rule; 180 DICT_PCRE *dict_pcre = ctxt->dict_pcre; 181 const char *pp; 182 int n; 183 int ret; 184 185 /* 186 * Replace $0-${99} with strings cut from matched text. 187 */ 188 if (type == MAC_PARSE_VARNAME) { 189 n = atoi(vstring_str(buf)); 190 ret = pcre_get_substring(ctxt->lookup_string, ctxt->offsets, 191 ctxt->matches, n, &pp); 192 if (ret < 0) { 193 if (ret == PCRE_ERROR_NOSUBSTRING) 194 return (MAC_PARSE_UNDEF); 195 else 196 msg_fatal("pcre map %s, line %d: pcre_get_substring error: %d", 197 dict_pcre->dict.name, match_rule->rule.lineno, ret); 198 } 199 if (*pp == 0) { 200 myfree((char *) pp); 201 return (MAC_PARSE_UNDEF); 202 } 203 vstring_strcat(dict_pcre->expansion_buf, pp); 204 myfree((char *) pp); 205 return (MAC_PARSE_OK); 206 } 207 208 /* 209 * Straight text - duplicate with no substitution. 210 */ 211 else { 212 vstring_strcat(dict_pcre->expansion_buf, vstring_str(buf)); 213 return (MAC_PARSE_OK); 214 } 215 } 216 217 /* dict_pcre_exec_error - report matching error */ 218 219 static void dict_pcre_exec_error(const char *mapname, int lineno, int errval) 220 { 221 switch (errval) { 222 case 0: 223 msg_warn("pcre map %s, line %d: too many (...)", 224 mapname, lineno); 225 return; 226 case PCRE_ERROR_NULL: 227 case PCRE_ERROR_BADOPTION: 228 msg_fatal("pcre map %s, line %d: bad args to re_exec", 229 mapname, lineno); 230 case PCRE_ERROR_BADMAGIC: 231 case PCRE_ERROR_UNKNOWN_NODE: 232 msg_fatal("pcre map %s, line %d: corrupt compiled regexp", 233 mapname, lineno); 234 #ifdef PCRE_ERROR_NOMEMORY 235 case PCRE_ERROR_NOMEMORY: 236 msg_fatal("pcre map %s, line %d: out of memory", 237 mapname, lineno); 238 #endif 239 #ifdef PCRE_ERROR_MATCHLIMIT 240 case PCRE_ERROR_MATCHLIMIT: 241 msg_fatal("pcre map %s, line %d: matched text exceeds buffer limit", 242 mapname, lineno); 243 #endif 244 #ifdef PCRE_ERROR_BADUTF8 245 case PCRE_ERROR_BADUTF8: 246 msg_fatal("pcre map %s, line %d: bad UTF-8 sequence in search string", 247 mapname, lineno); 248 #endif 249 #ifdef PCRE_ERROR_BADUTF8_OFFSET 250 case PCRE_ERROR_BADUTF8_OFFSET: 251 msg_fatal("pcre map %s, line %d: bad UTF-8 start offset in search string", 252 mapname, lineno); 253 #endif 254 default: 255 msg_fatal("pcre map %s, line %d: unknown re_exec error: %d", 256 mapname, lineno, errval); 257 } 258 } 259 260 /* dict_pcre_lookup - match string and perform optional substitution */ 261 262 static const char *dict_pcre_lookup(DICT *dict, const char *lookup_string) 263 { 264 DICT_PCRE *dict_pcre = (DICT_PCRE *) dict; 265 DICT_PCRE_RULE *rule; 266 DICT_PCRE_IF_RULE *if_rule; 267 DICT_PCRE_MATCH_RULE *match_rule; 268 int lookup_len = strlen(lookup_string); 269 DICT_PCRE_EXPAND_CONTEXT ctxt; 270 int nesting = 0; 271 272 dict->error = 0; 273 274 if (msg_verbose) 275 msg_info("dict_pcre_lookup: %s: %s", dict->name, lookup_string); 276 277 /* 278 * Optionally fold the key. 279 */ 280 if (dict->flags & DICT_FLAG_FOLD_MUL) { 281 if (dict->fold_buf == 0) 282 dict->fold_buf = vstring_alloc(10); 283 vstring_strcpy(dict->fold_buf, lookup_string); 284 lookup_string = lowercase(vstring_str(dict->fold_buf)); 285 } 286 for (rule = dict_pcre->head; rule; rule = rule->next) { 287 288 /* 289 * Skip rules inside failed IF/ENDIF. 290 */ 291 if (nesting < rule->nesting) 292 continue; 293 294 switch (rule->op) { 295 296 /* 297 * Search for a matching expression. 298 */ 299 case DICT_PCRE_OP_MATCH: 300 match_rule = (DICT_PCRE_MATCH_RULE *) rule; 301 ctxt.matches = pcre_exec(match_rule->pattern, match_rule->hints, 302 lookup_string, lookup_len, 303 NULL_STARTOFFSET, NULL_EXEC_OPTIONS, 304 ctxt.offsets, PCRE_MAX_CAPTURE * 3); 305 306 if (ctxt.matches > 0) { 307 if (!match_rule->match) 308 continue; /* Negative rule matched */ 309 } else if (ctxt.matches == PCRE_ERROR_NOMATCH) { 310 if (match_rule->match) 311 continue; /* Positive rule did not 312 * match */ 313 } else { 314 dict_pcre_exec_error(dict->name, rule->lineno, ctxt.matches); 315 continue; /* pcre_exec failed */ 316 } 317 318 /* 319 * Skip $number substitutions when the replacement text contains 320 * no $number strings, as learned during the compile time 321 * pre-scan. The pre-scan already replaced $$ by $. 322 */ 323 if (match_rule->max_sub == 0) 324 return match_rule->replacement; 325 326 /* 327 * We've got a match. Perform substitution on replacement string. 328 */ 329 if (dict_pcre->expansion_buf == 0) 330 dict_pcre->expansion_buf = vstring_alloc(10); 331 VSTRING_RESET(dict_pcre->expansion_buf); 332 ctxt.dict_pcre = dict_pcre; 333 ctxt.match_rule = match_rule; 334 ctxt.lookup_string = lookup_string; 335 336 if (mac_parse(match_rule->replacement, dict_pcre_expand, 337 (char *) &ctxt) & MAC_PARSE_ERROR) 338 msg_fatal("pcre map %s, line %d: bad replacement syntax", 339 dict->name, rule->lineno); 340 341 VSTRING_TERMINATE(dict_pcre->expansion_buf); 342 return (vstring_str(dict_pcre->expansion_buf)); 343 344 /* 345 * Conditional. XXX We provide space for matched substring info 346 * because PCRE uses part of it as workspace for backtracking. 347 * PCRE will allocate memory if it runs out of backtracking 348 * storage. 349 */ 350 case DICT_PCRE_OP_IF: 351 if_rule = (DICT_PCRE_IF_RULE *) rule; 352 ctxt.matches = pcre_exec(if_rule->pattern, if_rule->hints, 353 lookup_string, lookup_len, 354 NULL_STARTOFFSET, NULL_EXEC_OPTIONS, 355 ctxt.offsets, PCRE_MAX_CAPTURE * 3); 356 357 if (ctxt.matches > 0) { 358 if (!if_rule->match) 359 continue; /* Negative rule matched */ 360 } else if (ctxt.matches == PCRE_ERROR_NOMATCH) { 361 if (if_rule->match) 362 continue; /* Positive rule did not 363 * match */ 364 } else { 365 dict_pcre_exec_error(dict->name, rule->lineno, ctxt.matches); 366 continue; /* pcre_exec failed */ 367 } 368 nesting++; 369 continue; 370 371 /* 372 * ENDIF after successful IF. 373 */ 374 case DICT_PCRE_OP_ENDIF: 375 nesting--; 376 continue; 377 378 default: 379 msg_panic("dict_pcre_lookup: impossible operation %d", rule->op); 380 } 381 } 382 return (0); 383 } 384 385 /* dict_pcre_close - close pcre dictionary */ 386 387 static void dict_pcre_close(DICT *dict) 388 { 389 DICT_PCRE *dict_pcre = (DICT_PCRE *) dict; 390 DICT_PCRE_RULE *rule; 391 DICT_PCRE_RULE *next; 392 DICT_PCRE_MATCH_RULE *match_rule; 393 DICT_PCRE_IF_RULE *if_rule; 394 395 for (rule = dict_pcre->head; rule; rule = next) { 396 next = rule->next; 397 switch (rule->op) { 398 case DICT_PCRE_OP_MATCH: 399 match_rule = (DICT_PCRE_MATCH_RULE *) rule; 400 if (match_rule->pattern) 401 myfree((char *) match_rule->pattern); 402 if (match_rule->hints) 403 DICT_PCRE_FREE_STUDY(match_rule->hints); 404 if (match_rule->replacement) 405 myfree((char *) match_rule->replacement); 406 break; 407 case DICT_PCRE_OP_IF: 408 if_rule = (DICT_PCRE_IF_RULE *) rule; 409 if (if_rule->pattern) 410 myfree((char *) if_rule->pattern); 411 if (if_rule->hints) 412 DICT_PCRE_FREE_STUDY(if_rule->hints); 413 break; 414 case DICT_PCRE_OP_ENDIF: 415 break; 416 default: 417 msg_panic("dict_pcre_close: unknown operation %d", rule->op); 418 } 419 myfree((char *) rule); 420 } 421 if (dict_pcre->expansion_buf) 422 vstring_free(dict_pcre->expansion_buf); 423 if (dict->fold_buf) 424 vstring_free(dict->fold_buf); 425 dict_free(dict); 426 } 427 428 /* dict_pcre_get_pattern - extract pattern from rule */ 429 430 static int dict_pcre_get_pattern(const char *mapname, int lineno, char **bufp, 431 DICT_PCRE_REGEXP *pattern) 432 { 433 char *p = *bufp; 434 char re_delimiter; 435 436 /* 437 * Process negation operators. 438 */ 439 pattern->match = 1; 440 while (*p == '!') { 441 pattern->match = !pattern->match; 442 p++; 443 } 444 445 /* 446 * Grr...aceful handling of whitespace after '!'. 447 */ 448 while (*p && ISSPACE(*p)) 449 p++; 450 if (*p == 0) { 451 msg_warn("pcre map %s, line %d: no regexp: skipping this rule", 452 mapname, lineno); 453 return (0); 454 } 455 re_delimiter = *p++; 456 pattern->regexp = p; 457 458 /* 459 * Search for second delimiter, handling backslash escape. 460 */ 461 while (*p) { 462 if (*p == '\\') { 463 ++p; 464 if (*p == 0) 465 break; 466 } else if (*p == re_delimiter) 467 break; 468 ++p; 469 } 470 471 if (!*p) { 472 msg_warn("pcre map %s, line %d: no closing regexp delimiter \"%c\": " 473 "ignoring this rule", mapname, lineno, re_delimiter); 474 return (0); 475 } 476 *p++ = 0; /* Null term the regexp */ 477 478 /* 479 * Parse any regexp options. 480 */ 481 pattern->options = PCRE_CASELESS | PCRE_DOTALL; 482 while (*p && !ISSPACE(*p)) { 483 switch (*p) { 484 case 'i': 485 pattern->options ^= PCRE_CASELESS; 486 break; 487 case 'm': 488 pattern->options ^= PCRE_MULTILINE; 489 break; 490 case 's': 491 pattern->options ^= PCRE_DOTALL; 492 break; 493 case 'x': 494 pattern->options ^= PCRE_EXTENDED; 495 break; 496 case 'A': 497 pattern->options ^= PCRE_ANCHORED; 498 break; 499 case 'E': 500 pattern->options ^= PCRE_DOLLAR_ENDONLY; 501 break; 502 case 'U': 503 pattern->options ^= PCRE_UNGREEDY; 504 break; 505 case 'X': 506 pattern->options ^= PCRE_EXTRA; 507 break; 508 default: 509 msg_warn("pcre map %s, line %d: unknown regexp option \"%c\": " 510 "skipping this rule", mapname, lineno, *p); 511 return (0); 512 } 513 ++p; 514 } 515 *bufp = p; 516 return (1); 517 } 518 519 /* dict_pcre_prescan - sanity check $number instances in replacement text */ 520 521 static int dict_pcre_prescan(int type, VSTRING *buf, char *context) 522 { 523 DICT_PCRE_PRESCAN_CONTEXT *ctxt = (DICT_PCRE_PRESCAN_CONTEXT *) context; 524 size_t n; 525 526 /* 527 * Keep a copy of literal text (with $$ already replaced by $) if and 528 * only if the replacement text contains no $number expression. This way 529 * we can avoid having to scan the replacement text at lookup time. 530 */ 531 if (type == MAC_PARSE_VARNAME) { 532 if (ctxt->literal) { 533 myfree(ctxt->literal); 534 ctxt->literal = 0; 535 } 536 if (!alldig(vstring_str(buf))) { 537 msg_warn("pcre map %s, line %d: non-numeric replacement index \"%s\"", 538 ctxt->mapname, ctxt->lineno, vstring_str(buf)); 539 return (MAC_PARSE_ERROR); 540 } 541 n = atoi(vstring_str(buf)); 542 if (n < 1) { 543 msg_warn("pcre map %s, line %d: out of range replacement index \"%s\"", 544 ctxt->mapname, ctxt->lineno, vstring_str(buf)); 545 return (MAC_PARSE_ERROR); 546 } 547 if (n > ctxt->max_sub) 548 ctxt->max_sub = n; 549 } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) { 550 if (ctxt->literal) 551 msg_panic("pcre map %s, line %d: multiple literals but no $number", 552 ctxt->mapname, ctxt->lineno); 553 ctxt->literal = mystrdup(vstring_str(buf)); 554 } 555 return (MAC_PARSE_OK); 556 } 557 558 /* dict_pcre_compile - compile pattern */ 559 560 static int dict_pcre_compile(const char *mapname, int lineno, 561 DICT_PCRE_REGEXP *pattern, 562 DICT_PCRE_ENGINE *engine) 563 { 564 const char *error; 565 int errptr; 566 567 engine->pattern = pcre_compile(pattern->regexp, pattern->options, 568 &error, &errptr, NULL); 569 if (engine->pattern == 0) { 570 msg_warn("pcre map %s, line %d: error in regex at offset %d: %s", 571 mapname, lineno, errptr, error); 572 return (0); 573 } 574 engine->hints = pcre_study(engine->pattern, 0, &error); 575 if (error != 0) { 576 msg_warn("pcre map %s, line %d: error while studying regex: %s", 577 mapname, lineno, error); 578 myfree((char *) engine->pattern); 579 return (0); 580 } 581 return (1); 582 } 583 584 /* dict_pcre_rule_alloc - fill in a generic rule structure */ 585 586 static DICT_PCRE_RULE *dict_pcre_rule_alloc(int op, int nesting, 587 int lineno, 588 size_t size) 589 { 590 DICT_PCRE_RULE *rule; 591 592 rule = (DICT_PCRE_RULE *) mymalloc(size); 593 rule->op = op; 594 rule->nesting = nesting; 595 rule->lineno = lineno; 596 rule->next = 0; 597 598 return (rule); 599 } 600 601 /* dict_pcre_parse_rule - parse and compile one rule */ 602 603 static DICT_PCRE_RULE *dict_pcre_parse_rule(const char *mapname, int lineno, 604 char *line, int nesting, 605 int dict_flags) 606 { 607 char *p; 608 int actual_sub; 609 610 p = line; 611 612 /* 613 * An ordinary match rule takes one pattern and replacement text. 614 */ 615 if (!ISALNUM(*p)) { 616 DICT_PCRE_REGEXP regexp; 617 DICT_PCRE_ENGINE engine; 618 DICT_PCRE_PRESCAN_CONTEXT prescan_context; 619 DICT_PCRE_MATCH_RULE *match_rule; 620 621 /* 622 * Get the pattern string and options. 623 */ 624 if (dict_pcre_get_pattern(mapname, lineno, &p, ®exp) == 0) 625 return (0); 626 627 /* 628 * Get the replacement text. 629 */ 630 while (*p && ISSPACE(*p)) 631 ++p; 632 if (!*p) 633 msg_warn("%s, line %d: no replacement text: using empty string", 634 mapname, lineno); 635 636 /* 637 * Sanity check the $number instances in the replacement text. 638 */ 639 prescan_context.mapname = mapname; 640 prescan_context.lineno = lineno; 641 prescan_context.max_sub = 0; 642 prescan_context.literal = 0; 643 644 /* 645 * The optimizer will eliminate code duplication and/or dead code. 646 */ 647 #define CREATE_MATCHOP_ERROR_RETURN(rval) do { \ 648 if (prescan_context.literal) \ 649 myfree(prescan_context.literal); \ 650 return (rval); \ 651 } while (0) 652 653 if (mac_parse(p, dict_pcre_prescan, (char *) &prescan_context) 654 & MAC_PARSE_ERROR) { 655 msg_warn("pcre map %s, line %d: bad replacement syntax: " 656 "skipping this rule", mapname, lineno); 657 CREATE_MATCHOP_ERROR_RETURN(0); 658 } 659 660 /* 661 * Substring replacement not possible with negative regexps. 662 */ 663 if (prescan_context.max_sub > 0 && regexp.match == 0) { 664 msg_warn("pcre map %s, line %d: $number found in negative match " 665 "replacement text: skipping this rule", mapname, lineno); 666 CREATE_MATCHOP_ERROR_RETURN(0); 667 } 668 if (prescan_context.max_sub > 0 && (dict_flags & DICT_FLAG_NO_REGSUB)) { 669 msg_warn("pcre map %s, line %d: " 670 "regular expression substitution is not allowed: " 671 "skipping this rule", mapname, lineno); 672 CREATE_MATCHOP_ERROR_RETURN(0); 673 } 674 675 /* 676 * Compile the pattern. 677 */ 678 if (dict_pcre_compile(mapname, lineno, ®exp, &engine) == 0) 679 CREATE_MATCHOP_ERROR_RETURN(0); 680 #ifdef PCRE_INFO_CAPTURECOUNT 681 if (pcre_fullinfo(engine.pattern, engine.hints, 682 PCRE_INFO_CAPTURECOUNT, 683 (void *) &actual_sub) != 0) 684 msg_panic("pcre map %s, line %d: pcre_fullinfo failed", 685 mapname, lineno); 686 if (prescan_context.max_sub > actual_sub) { 687 msg_warn("pcre map %s, line %d: out of range replacement index \"%d\": " 688 "skipping this rule", mapname, lineno, 689 (int) prescan_context.max_sub); 690 if (engine.pattern) 691 myfree((char *) engine.pattern); 692 if (engine.hints) 693 DICT_PCRE_FREE_STUDY(engine.hints); 694 CREATE_MATCHOP_ERROR_RETURN(0); 695 } 696 #endif 697 698 /* 699 * Save the result. 700 */ 701 match_rule = (DICT_PCRE_MATCH_RULE *) 702 dict_pcre_rule_alloc(DICT_PCRE_OP_MATCH, nesting, lineno, 703 sizeof(DICT_PCRE_MATCH_RULE)); 704 match_rule->match = regexp.match; 705 match_rule->max_sub = prescan_context.max_sub; 706 if (prescan_context.literal) 707 match_rule->replacement = prescan_context.literal; 708 else 709 match_rule->replacement = mystrdup(p); 710 match_rule->pattern = engine.pattern; 711 match_rule->hints = engine.hints; 712 return ((DICT_PCRE_RULE *) match_rule); 713 } 714 715 /* 716 * The IF operator takes one pattern but no replacement text. 717 */ 718 else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) { 719 DICT_PCRE_REGEXP regexp; 720 DICT_PCRE_ENGINE engine; 721 DICT_PCRE_IF_RULE *if_rule; 722 723 p += 2; 724 725 /* 726 * Get the pattern. 727 */ 728 while (*p && ISSPACE(*p)) 729 p++; 730 if (!dict_pcre_get_pattern(mapname, lineno, &p, ®exp)) 731 return (0); 732 733 /* 734 * Warn about out-of-place text. 735 */ 736 while (*p && ISSPACE(*p)) 737 ++p; 738 if (*p) { 739 msg_warn("pcre map %s, line %d: ignoring extra text after " 740 "IF statement: \"%s\"", mapname, lineno, p); 741 msg_warn("pcre map %s, line %d: do not prepend whitespace" 742 " to statements between IF and ENDIF", mapname, lineno); 743 } 744 745 /* 746 * Compile the pattern. 747 */ 748 if (dict_pcre_compile(mapname, lineno, ®exp, &engine) == 0) 749 return (0); 750 751 /* 752 * Save the result. 753 */ 754 if_rule = (DICT_PCRE_IF_RULE *) 755 dict_pcre_rule_alloc(DICT_PCRE_OP_IF, nesting, lineno, 756 sizeof(DICT_PCRE_IF_RULE)); 757 if_rule->match = regexp.match; 758 if_rule->pattern = engine.pattern; 759 if_rule->hints = engine.hints; 760 return ((DICT_PCRE_RULE *) if_rule); 761 } 762 763 /* 764 * The ENDIF operator takes no patterns and no replacement text. 765 */ 766 else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) { 767 DICT_PCRE_RULE *rule; 768 769 p += 5; 770 771 /* 772 * Warn about out-of-place ENDIFs. 773 */ 774 if (nesting == 0) { 775 msg_warn("pcre map %s, line %d: ignoring ENDIF without matching IF", 776 mapname, lineno); 777 return (0); 778 } 779 780 /* 781 * Warn about out-of-place text. 782 */ 783 while (*p && ISSPACE(*p)) 784 ++p; 785 if (*p) 786 msg_warn("pcre map %s, line %d: ignoring extra text after ENDIF", 787 mapname, lineno); 788 789 /* 790 * Save the result. 791 */ 792 rule = dict_pcre_rule_alloc(DICT_PCRE_OP_ENDIF, nesting, lineno, 793 sizeof(DICT_PCRE_RULE)); 794 return (rule); 795 } 796 797 /* 798 * Unrecognized input. 799 */ 800 else { 801 msg_warn("pcre map %s, line %d: ignoring unrecognized request", 802 mapname, lineno); 803 return (0); 804 } 805 } 806 807 /* dict_pcre_open - load and compile a file containing regular expressions */ 808 809 DICT *dict_pcre_open(const char *mapname, int open_flags, int dict_flags) 810 { 811 DICT_PCRE *dict_pcre; 812 VSTREAM *map_fp; 813 struct stat st; 814 VSTRING *line_buffer; 815 DICT_PCRE_RULE *last_rule = 0; 816 DICT_PCRE_RULE *rule; 817 int lineno = 0; 818 int nesting = 0; 819 char *p; 820 821 /* 822 * Sanity checks. 823 */ 824 if (open_flags != O_RDONLY) 825 return (dict_surrogate(DICT_TYPE_PCRE, mapname, open_flags, dict_flags, 826 "%s:%s map requires O_RDONLY access mode", 827 DICT_TYPE_PCRE, mapname)); 828 829 /* 830 * Open the configuration file. 831 */ 832 if ((map_fp = vstream_fopen(mapname, O_RDONLY, 0)) == 0) 833 return (dict_surrogate(DICT_TYPE_PCRE, mapname, open_flags, dict_flags, 834 "open %s: %m", mapname)); 835 if (fstat(vstream_fileno(map_fp), &st) < 0) 836 msg_fatal("fstat %s: %m", mapname); 837 838 line_buffer = vstring_alloc(100); 839 840 dict_pcre = (DICT_PCRE *) dict_alloc(DICT_TYPE_PCRE, mapname, 841 sizeof(*dict_pcre)); 842 dict_pcre->dict.lookup = dict_pcre_lookup; 843 dict_pcre->dict.close = dict_pcre_close; 844 dict_pcre->dict.flags = dict_flags | DICT_FLAG_PATTERN; 845 if (dict_flags & DICT_FLAG_FOLD_MUL) 846 dict_pcre->dict.fold_buf = vstring_alloc(10); 847 dict_pcre->head = 0; 848 dict_pcre->expansion_buf = 0; 849 850 if (dict_pcre_init == 0) { 851 pcre_malloc = (void *(*) (size_t)) mymalloc; 852 pcre_free = (void (*) (void *)) myfree; 853 dict_pcre_init = 1; 854 } 855 dict_pcre->dict.owner.uid = st.st_uid; 856 dict_pcre->dict.owner.status = (st.st_uid != 0); 857 858 /* 859 * Parse the pcre table. 860 */ 861 while (readlline(line_buffer, map_fp, &lineno)) { 862 p = vstring_str(line_buffer); 863 trimblanks(p, 0)[0] = 0; /* Trim space at end */ 864 if (*p == 0) 865 continue; 866 rule = dict_pcre_parse_rule(mapname, lineno, p, nesting, dict_flags); 867 if (rule == 0) 868 continue; 869 if (rule->op == DICT_PCRE_OP_IF) { 870 nesting++; 871 } else if (rule->op == DICT_PCRE_OP_ENDIF) { 872 nesting--; 873 } 874 if (last_rule == 0) 875 dict_pcre->head = rule; 876 else 877 last_rule->next = rule; 878 last_rule = rule; 879 } 880 881 if (nesting) 882 msg_warn("pcre map %s, line %d: more IFs than ENDIFs", 883 mapname, lineno); 884 885 vstring_free(line_buffer); 886 vstream_fclose(map_fp); 887 888 return (DICT_DEBUG (&dict_pcre->dict)); 889 } 890 891 #endif /* HAS_PCRE */ 892