1 /* $NetBSD: dict_pcre.c,v 1.1.1.2 2013/01/02 18:59:12 tron Exp $ */ 2 3 /*++ 4 /* NAME 5 /* dict_pcre 3 6 /* SUMMARY 7 /* dictionary manager interface to PCRE regular expression library 8 /* SYNOPSIS 9 /* #include <dict_pcre.h> 10 /* 11 /* DICT *dict_pcre_open(name, dummy, dict_flags) 12 /* const char *name; 13 /* int dummy; 14 /* int dict_flags; 15 /* DESCRIPTION 16 /* dict_pcre_open() opens the named file and compiles the contained 17 /* regular expressions. The result object can be used to match strings 18 /* against the table. 19 /* SEE ALSO 20 /* dict(3) generic dictionary manager 21 /* AUTHOR(S) 22 /* Andrew McNamara 23 /* andrewm@connect.com.au 24 /* connect.com.au Pty. Ltd. 25 /* Level 3, 213 Miller St 26 /* North Sydney, NSW, Australia 27 /* 28 /* Wietse Venema 29 /* IBM T.J. Watson Research 30 /* P.O. Box 704 31 /* Yorktown Heights, NY 10598, USA 32 /*--*/ 33 34 #include "sys_defs.h" 35 36 #ifdef HAS_PCRE 37 38 /* System library. */ 39 40 #include <sys/stat.h> 41 #include <stdio.h> /* sprintf() prototype */ 42 #include <stdlib.h> 43 #include <unistd.h> 44 #include <string.h> 45 #include <ctype.h> 46 47 #ifdef STRCASECMP_IN_STRINGS_H 48 #include <strings.h> 49 #endif 50 51 /* Utility library. */ 52 53 #include "mymalloc.h" 54 #include "msg.h" 55 #include "safe.h" 56 #include "vstream.h" 57 #include "vstring.h" 58 #include "stringops.h" 59 #include "readlline.h" 60 #include "dict.h" 61 #include "dict_pcre.h" 62 #include "mac_parse.h" 63 #include "pcre.h" 64 #include "warn_stat.h" 65 66 /* 67 * Support for IF/ENDIF based on an idea by Bert Driehuis. 68 */ 69 #define DICT_PCRE_OP_MATCH 1 /* Match this regexp */ 70 #define DICT_PCRE_OP_IF 2 /* Increase if/endif nesting on match */ 71 #define DICT_PCRE_OP_ENDIF 3 /* Decrease if/endif nesting on match */ 72 73 /* 74 * Max strings captured by regexp - essentially the max number of (..) 75 */ 76 #define PCRE_MAX_CAPTURE 99 77 78 /* 79 * Regular expression before and after compilation. 80 */ 81 typedef struct { 82 char *regexp; /* regular expression */ 83 int options; /* options */ 84 int match; /* positive or negative match */ 85 } DICT_PCRE_REGEXP; 86 87 typedef struct { 88 pcre *pattern; /* the compiled pattern */ 89 pcre_extra *hints; /* hints to speed pattern execution */ 90 } DICT_PCRE_ENGINE; 91 92 /* 93 * Compiled generic rule, and subclasses that derive from it. 94 */ 95 typedef struct DICT_PCRE_RULE { 96 int op; /* DICT_PCRE_OP_MATCH/IF/ENDIF */ 97 int nesting; /* level of IF/ENDIF nesting */ 98 int lineno; /* source file line number */ 99 struct DICT_PCRE_RULE *next; /* next rule in dict */ 100 } DICT_PCRE_RULE; 101 102 typedef struct { 103 DICT_PCRE_RULE rule; /* generic part */ 104 pcre *pattern; /* compiled pattern */ 105 pcre_extra *hints; /* hints to speed pattern execution */ 106 char *replacement; /* replacement string */ 107 int match; /* positive or negative match */ 108 size_t max_sub; /* largest $number in replacement */ 109 } DICT_PCRE_MATCH_RULE; 110 111 typedef struct { 112 DICT_PCRE_RULE rule; /* generic members */ 113 pcre *pattern; /* compiled pattern */ 114 pcre_extra *hints; /* hints to speed pattern execution */ 115 int match; /* positive or negative match */ 116 } DICT_PCRE_IF_RULE; 117 118 /* 119 * PCRE map. 120 */ 121 typedef struct { 122 DICT dict; /* generic members */ 123 DICT_PCRE_RULE *head; 124 VSTRING *expansion_buf; /* lookup result */ 125 } DICT_PCRE; 126 127 static int dict_pcre_init = 0; /* flag need to init pcre library */ 128 129 /* 130 * Context for $number expansion callback. 131 */ 132 typedef struct { 133 DICT_PCRE *dict_pcre; /* the dictionary handle */ 134 DICT_PCRE_MATCH_RULE *match_rule; /* the rule we matched */ 135 const char *lookup_string; /* string against which we match */ 136 int offsets[PCRE_MAX_CAPTURE * 3]; /* Cut substrings */ 137 int matches; /* Count of cuts */ 138 } DICT_PCRE_EXPAND_CONTEXT; 139 140 /* 141 * Context for $number pre-scan callback. 142 */ 143 typedef struct { 144 const char *mapname; /* name of regexp map */ 145 int lineno; /* where in file */ 146 size_t max_sub; /* Largest $n seen */ 147 char *literal; /* constant result, $$ -> $ */ 148 } DICT_PCRE_PRESCAN_CONTEXT; 149 150 /* 151 * Compatibility. 152 */ 153 #ifndef MAC_PARSE_OK 154 #define MAC_PARSE_OK 0 155 #endif 156 157 /* 158 * Macros to make dense code more accessible. 159 */ 160 #define NULL_STARTOFFSET (0) 161 #define NULL_EXEC_OPTIONS (0) 162 #define NULL_OVECTOR ((int *) 0) 163 #define NULL_OVECTOR_LENGTH (0) 164 165 /* dict_pcre_expand - replace $number with matched text */ 166 167 static int dict_pcre_expand(int type, VSTRING *buf, char *ptr) 168 { 169 DICT_PCRE_EXPAND_CONTEXT *ctxt = (DICT_PCRE_EXPAND_CONTEXT *) ptr; 170 DICT_PCRE_MATCH_RULE *match_rule = ctxt->match_rule; 171 DICT_PCRE *dict_pcre = ctxt->dict_pcre; 172 const char *pp; 173 int n; 174 int ret; 175 176 /* 177 * Replace $0-${99} with strings cut from matched text. 178 */ 179 if (type == MAC_PARSE_VARNAME) { 180 n = atoi(vstring_str(buf)); 181 ret = pcre_get_substring(ctxt->lookup_string, ctxt->offsets, 182 ctxt->matches, n, &pp); 183 if (ret < 0) { 184 if (ret == PCRE_ERROR_NOSUBSTRING) 185 return (MAC_PARSE_UNDEF); 186 else 187 msg_fatal("pcre map %s, line %d: pcre_get_substring error: %d", 188 dict_pcre->dict.name, match_rule->rule.lineno, ret); 189 } 190 if (*pp == 0) { 191 myfree((char *) pp); 192 return (MAC_PARSE_UNDEF); 193 } 194 vstring_strcat(dict_pcre->expansion_buf, pp); 195 myfree((char *) pp); 196 return (MAC_PARSE_OK); 197 } 198 199 /* 200 * Straight text - duplicate with no substitution. 201 */ 202 else { 203 vstring_strcat(dict_pcre->expansion_buf, vstring_str(buf)); 204 return (MAC_PARSE_OK); 205 } 206 } 207 208 /* dict_pcre_exec_error - report matching error */ 209 210 static void dict_pcre_exec_error(const char *mapname, int lineno, int errval) 211 { 212 switch (errval) { 213 case 0: 214 msg_warn("pcre map %s, line %d: too many (...)", 215 mapname, lineno); 216 return; 217 case PCRE_ERROR_NULL: 218 case PCRE_ERROR_BADOPTION: 219 msg_fatal("pcre map %s, line %d: bad args to re_exec", 220 mapname, lineno); 221 case PCRE_ERROR_BADMAGIC: 222 case PCRE_ERROR_UNKNOWN_NODE: 223 msg_fatal("pcre map %s, line %d: corrupt compiled regexp", 224 mapname, lineno); 225 #ifdef PCRE_ERROR_NOMEMORY 226 case PCRE_ERROR_NOMEMORY: 227 msg_fatal("pcre map %s, line %d: out of memory", 228 mapname, lineno); 229 #endif 230 #ifdef PCRE_ERROR_MATCHLIMIT 231 case PCRE_ERROR_MATCHLIMIT: 232 msg_fatal("pcre map %s, line %d: matched text exceeds buffer limit", 233 mapname, lineno); 234 #endif 235 #ifdef PCRE_ERROR_BADUTF8 236 case PCRE_ERROR_BADUTF8: 237 msg_fatal("pcre map %s, line %d: bad UTF-8 sequence in search string", 238 mapname, lineno); 239 #endif 240 #ifdef PCRE_ERROR_BADUTF8_OFFSET 241 case PCRE_ERROR_BADUTF8_OFFSET: 242 msg_fatal("pcre map %s, line %d: bad UTF-8 start offset in search string", 243 mapname, lineno); 244 #endif 245 default: 246 msg_fatal("pcre map %s, line %d: unknown re_exec error: %d", 247 mapname, lineno, errval); 248 } 249 } 250 251 /* dict_pcre_lookup - match string and perform optional substitution */ 252 253 static const char *dict_pcre_lookup(DICT *dict, const char *lookup_string) 254 { 255 DICT_PCRE *dict_pcre = (DICT_PCRE *) dict; 256 DICT_PCRE_RULE *rule; 257 DICT_PCRE_IF_RULE *if_rule; 258 DICT_PCRE_MATCH_RULE *match_rule; 259 int lookup_len = strlen(lookup_string); 260 DICT_PCRE_EXPAND_CONTEXT ctxt; 261 int nesting = 0; 262 263 dict->error = 0; 264 265 if (msg_verbose) 266 msg_info("dict_pcre_lookup: %s: %s", dict->name, lookup_string); 267 268 /* 269 * Optionally fold the key. 270 */ 271 if (dict->flags & DICT_FLAG_FOLD_MUL) { 272 if (dict->fold_buf == 0) 273 dict->fold_buf = vstring_alloc(10); 274 vstring_strcpy(dict->fold_buf, lookup_string); 275 lookup_string = lowercase(vstring_str(dict->fold_buf)); 276 } 277 for (rule = dict_pcre->head; rule; rule = rule->next) { 278 279 /* 280 * Skip rules inside failed IF/ENDIF. 281 */ 282 if (nesting < rule->nesting) 283 continue; 284 285 switch (rule->op) { 286 287 /* 288 * Search for a matching expression. 289 */ 290 case DICT_PCRE_OP_MATCH: 291 match_rule = (DICT_PCRE_MATCH_RULE *) rule; 292 ctxt.matches = pcre_exec(match_rule->pattern, match_rule->hints, 293 lookup_string, lookup_len, 294 NULL_STARTOFFSET, NULL_EXEC_OPTIONS, 295 ctxt.offsets, PCRE_MAX_CAPTURE * 3); 296 297 if (ctxt.matches > 0) { 298 if (!match_rule->match) 299 continue; /* Negative rule matched */ 300 } else if (ctxt.matches == PCRE_ERROR_NOMATCH) { 301 if (match_rule->match) 302 continue; /* Positive rule did not 303 * match */ 304 } else { 305 dict_pcre_exec_error(dict->name, rule->lineno, ctxt.matches); 306 continue; /* pcre_exec failed */ 307 } 308 309 /* 310 * Skip $number substitutions when the replacement text contains 311 * no $number strings, as learned during the compile time 312 * pre-scan. The pre-scan already replaced $$ by $. 313 */ 314 if (match_rule->max_sub == 0) 315 return match_rule->replacement; 316 317 /* 318 * We've got a match. Perform substitution on replacement string. 319 */ 320 if (dict_pcre->expansion_buf == 0) 321 dict_pcre->expansion_buf = vstring_alloc(10); 322 VSTRING_RESET(dict_pcre->expansion_buf); 323 ctxt.dict_pcre = dict_pcre; 324 ctxt.match_rule = match_rule; 325 ctxt.lookup_string = lookup_string; 326 327 if (mac_parse(match_rule->replacement, dict_pcre_expand, 328 (char *) &ctxt) & MAC_PARSE_ERROR) 329 msg_fatal("pcre map %s, line %d: bad replacement syntax", 330 dict->name, rule->lineno); 331 332 VSTRING_TERMINATE(dict_pcre->expansion_buf); 333 return (vstring_str(dict_pcre->expansion_buf)); 334 335 /* 336 * Conditional. XXX We provide space for matched substring info 337 * because PCRE uses part of it as workspace for backtracking. 338 * PCRE will allocate memory if it runs out of backtracking 339 * storage. 340 */ 341 case DICT_PCRE_OP_IF: 342 if_rule = (DICT_PCRE_IF_RULE *) rule; 343 ctxt.matches = pcre_exec(if_rule->pattern, if_rule->hints, 344 lookup_string, lookup_len, 345 NULL_STARTOFFSET, NULL_EXEC_OPTIONS, 346 ctxt.offsets, PCRE_MAX_CAPTURE * 3); 347 348 if (ctxt.matches > 0) { 349 if (!if_rule->match) 350 continue; /* Negative rule matched */ 351 } else if (ctxt.matches == PCRE_ERROR_NOMATCH) { 352 if (if_rule->match) 353 continue; /* Positive rule did not 354 * match */ 355 } else { 356 dict_pcre_exec_error(dict->name, rule->lineno, ctxt.matches); 357 continue; /* pcre_exec failed */ 358 } 359 nesting++; 360 continue; 361 362 /* 363 * ENDIF after successful IF. 364 */ 365 case DICT_PCRE_OP_ENDIF: 366 nesting--; 367 continue; 368 369 default: 370 msg_panic("dict_pcre_lookup: impossible operation %d", rule->op); 371 } 372 } 373 return (0); 374 } 375 376 /* dict_pcre_close - close pcre dictionary */ 377 378 static void dict_pcre_close(DICT *dict) 379 { 380 DICT_PCRE *dict_pcre = (DICT_PCRE *) dict; 381 DICT_PCRE_RULE *rule; 382 DICT_PCRE_RULE *next; 383 DICT_PCRE_MATCH_RULE *match_rule; 384 DICT_PCRE_IF_RULE *if_rule; 385 386 for (rule = dict_pcre->head; rule; rule = next) { 387 next = rule->next; 388 switch (rule->op) { 389 case DICT_PCRE_OP_MATCH: 390 match_rule = (DICT_PCRE_MATCH_RULE *) rule; 391 if (match_rule->pattern) 392 myfree((char *) match_rule->pattern); 393 if (match_rule->hints) 394 myfree((char *) match_rule->hints); 395 if (match_rule->replacement) 396 myfree((char *) match_rule->replacement); 397 break; 398 case DICT_PCRE_OP_IF: 399 if_rule = (DICT_PCRE_IF_RULE *) rule; 400 if (if_rule->pattern) 401 myfree((char *) if_rule->pattern); 402 if (if_rule->hints) 403 myfree((char *) if_rule->hints); 404 break; 405 case DICT_PCRE_OP_ENDIF: 406 break; 407 default: 408 msg_panic("dict_pcre_close: unknown operation %d", rule->op); 409 } 410 myfree((char *) rule); 411 } 412 if (dict_pcre->expansion_buf) 413 vstring_free(dict_pcre->expansion_buf); 414 if (dict->fold_buf) 415 vstring_free(dict->fold_buf); 416 dict_free(dict); 417 } 418 419 /* dict_pcre_get_pattern - extract pattern from rule */ 420 421 static int dict_pcre_get_pattern(const char *mapname, int lineno, char **bufp, 422 DICT_PCRE_REGEXP *pattern) 423 { 424 char *p = *bufp; 425 char re_delimiter; 426 427 /* 428 * Process negation operators. 429 */ 430 pattern->match = 1; 431 while (*p == '!') { 432 pattern->match = !pattern->match; 433 p++; 434 } 435 436 /* 437 * Grr...aceful handling of whitespace after '!'. 438 */ 439 while (*p && ISSPACE(*p)) 440 p++; 441 if (*p == 0) { 442 msg_warn("pcre map %s, line %d: no regexp: skipping this rule", 443 mapname, lineno); 444 return (0); 445 } 446 re_delimiter = *p++; 447 pattern->regexp = p; 448 449 /* 450 * Search for second delimiter, handling backslash escape. 451 */ 452 while (*p) { 453 if (*p == '\\') { 454 ++p; 455 if (*p == 0) 456 break; 457 } else if (*p == re_delimiter) 458 break; 459 ++p; 460 } 461 462 if (!*p) { 463 msg_warn("pcre map %s, line %d: no closing regexp delimiter \"%c\": " 464 "ignoring this rule", mapname, lineno, re_delimiter); 465 return (0); 466 } 467 *p++ = 0; /* Null term the regexp */ 468 469 /* 470 * Parse any regexp options. 471 */ 472 pattern->options = PCRE_CASELESS | PCRE_DOTALL; 473 while (*p && !ISSPACE(*p)) { 474 switch (*p) { 475 case 'i': 476 pattern->options ^= PCRE_CASELESS; 477 break; 478 case 'm': 479 pattern->options ^= PCRE_MULTILINE; 480 break; 481 case 's': 482 pattern->options ^= PCRE_DOTALL; 483 break; 484 case 'x': 485 pattern->options ^= PCRE_EXTENDED; 486 break; 487 case 'A': 488 pattern->options ^= PCRE_ANCHORED; 489 break; 490 case 'E': 491 pattern->options ^= PCRE_DOLLAR_ENDONLY; 492 break; 493 case 'U': 494 pattern->options ^= PCRE_UNGREEDY; 495 break; 496 case 'X': 497 pattern->options ^= PCRE_EXTRA; 498 break; 499 default: 500 msg_warn("pcre map %s, line %d: unknown regexp option \"%c\": " 501 "skipping this rule", mapname, lineno, *p); 502 return (0); 503 } 504 ++p; 505 } 506 *bufp = p; 507 return (1); 508 } 509 510 /* dict_pcre_prescan - sanity check $number instances in replacement text */ 511 512 static int dict_pcre_prescan(int type, VSTRING *buf, char *context) 513 { 514 DICT_PCRE_PRESCAN_CONTEXT *ctxt = (DICT_PCRE_PRESCAN_CONTEXT *) context; 515 size_t n; 516 517 /* 518 * Keep a copy of literal text (with $$ already replaced by $) if and 519 * only if the replacement text contains no $number expression. This way 520 * we can avoid having to scan the replacement text at lookup time. 521 */ 522 if (type == MAC_PARSE_VARNAME) { 523 if (ctxt->literal) { 524 myfree(ctxt->literal); 525 ctxt->literal = 0; 526 } 527 if (!alldig(vstring_str(buf))) { 528 msg_warn("pcre map %s, line %d: non-numeric replacement index \"%s\"", 529 ctxt->mapname, ctxt->lineno, vstring_str(buf)); 530 return (MAC_PARSE_ERROR); 531 } 532 n = atoi(vstring_str(buf)); 533 if (n < 1) { 534 msg_warn("pcre map %s, line %d: out of range replacement index \"%s\"", 535 ctxt->mapname, ctxt->lineno, vstring_str(buf)); 536 return (MAC_PARSE_ERROR); 537 } 538 if (n > ctxt->max_sub) 539 ctxt->max_sub = n; 540 } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) { 541 if (ctxt->literal) 542 msg_panic("pcre map %s, line %d: multiple literals but no $number", 543 ctxt->mapname, ctxt->lineno); 544 ctxt->literal = mystrdup(vstring_str(buf)); 545 } 546 return (MAC_PARSE_OK); 547 } 548 549 /* dict_pcre_compile - compile pattern */ 550 551 static int dict_pcre_compile(const char *mapname, int lineno, 552 DICT_PCRE_REGEXP *pattern, 553 DICT_PCRE_ENGINE *engine) 554 { 555 const char *error; 556 int errptr; 557 558 engine->pattern = pcre_compile(pattern->regexp, pattern->options, 559 &error, &errptr, NULL); 560 if (engine->pattern == 0) { 561 msg_warn("pcre map %s, line %d: error in regex at offset %d: %s", 562 mapname, lineno, errptr, error); 563 return (0); 564 } 565 engine->hints = pcre_study(engine->pattern, 0, &error); 566 if (error != 0) { 567 msg_warn("pcre map %s, line %d: error while studying regex: %s", 568 mapname, lineno, error); 569 myfree((char *) engine->pattern); 570 return (0); 571 } 572 return (1); 573 } 574 575 /* dict_pcre_rule_alloc - fill in a generic rule structure */ 576 577 static DICT_PCRE_RULE *dict_pcre_rule_alloc(int op, int nesting, 578 int lineno, 579 size_t size) 580 { 581 DICT_PCRE_RULE *rule; 582 583 rule = (DICT_PCRE_RULE *) mymalloc(size); 584 rule->op = op; 585 rule->nesting = nesting; 586 rule->lineno = lineno; 587 rule->next = 0; 588 589 return (rule); 590 } 591 592 /* dict_pcre_parse_rule - parse and compile one rule */ 593 594 static DICT_PCRE_RULE *dict_pcre_parse_rule(const char *mapname, int lineno, 595 char *line, int nesting, 596 int dict_flags) 597 { 598 char *p; 599 int actual_sub; 600 601 p = line; 602 603 /* 604 * An ordinary match rule takes one pattern and replacement text. 605 */ 606 if (!ISALNUM(*p)) { 607 DICT_PCRE_REGEXP regexp; 608 DICT_PCRE_ENGINE engine; 609 DICT_PCRE_PRESCAN_CONTEXT prescan_context; 610 DICT_PCRE_MATCH_RULE *match_rule; 611 612 /* 613 * Get the pattern string and options. 614 */ 615 if (dict_pcre_get_pattern(mapname, lineno, &p, ®exp) == 0) 616 return (0); 617 618 /* 619 * Get the replacement text. 620 */ 621 while (*p && ISSPACE(*p)) 622 ++p; 623 if (!*p) 624 msg_warn("%s, line %d: no replacement text: using empty string", 625 mapname, lineno); 626 627 /* 628 * Sanity check the $number instances in the replacement text. 629 */ 630 prescan_context.mapname = mapname; 631 prescan_context.lineno = lineno; 632 prescan_context.max_sub = 0; 633 prescan_context.literal = 0; 634 635 /* 636 * The optimizer will eliminate code duplication and/or dead code. 637 */ 638 #define CREATE_MATCHOP_ERROR_RETURN(rval) do { \ 639 if (prescan_context.literal) \ 640 myfree(prescan_context.literal); \ 641 return (rval); \ 642 } while (0) 643 644 if (mac_parse(p, dict_pcre_prescan, (char *) &prescan_context) 645 & MAC_PARSE_ERROR) { 646 msg_warn("pcre map %s, line %d: bad replacement syntax: " 647 "skipping this rule", mapname, lineno); 648 CREATE_MATCHOP_ERROR_RETURN(0); 649 } 650 651 /* 652 * Substring replacement not possible with negative regexps. 653 */ 654 if (prescan_context.max_sub > 0 && regexp.match == 0) { 655 msg_warn("pcre map %s, line %d: $number found in negative match " 656 "replacement text: skipping this rule", mapname, lineno); 657 CREATE_MATCHOP_ERROR_RETURN(0); 658 } 659 if (prescan_context.max_sub > 0 && (dict_flags & DICT_FLAG_NO_REGSUB)) { 660 msg_warn("pcre map %s, line %d: " 661 "regular expression substitution is not allowed: " 662 "skipping this rule", mapname, lineno); 663 CREATE_MATCHOP_ERROR_RETURN(0); 664 } 665 666 /* 667 * Compile the pattern. 668 */ 669 if (dict_pcre_compile(mapname, lineno, ®exp, &engine) == 0) 670 CREATE_MATCHOP_ERROR_RETURN(0); 671 #ifdef PCRE_INFO_CAPTURECOUNT 672 if (pcre_fullinfo(engine.pattern, engine.hints, 673 PCRE_INFO_CAPTURECOUNT, 674 (void *) &actual_sub) != 0) 675 msg_panic("pcre map %s, line %d: pcre_fullinfo failed", 676 mapname, lineno); 677 if (prescan_context.max_sub > actual_sub) { 678 msg_warn("pcre map %s, line %d: out of range replacement index \"%d\": " 679 "skipping this rule", mapname, lineno, 680 (int) prescan_context.max_sub); 681 if (engine.pattern) 682 myfree((char *) engine.pattern); 683 if (engine.hints) 684 myfree((char *) engine.hints); 685 CREATE_MATCHOP_ERROR_RETURN(0); 686 } 687 #endif 688 689 /* 690 * Save the result. 691 */ 692 match_rule = (DICT_PCRE_MATCH_RULE *) 693 dict_pcre_rule_alloc(DICT_PCRE_OP_MATCH, nesting, lineno, 694 sizeof(DICT_PCRE_MATCH_RULE)); 695 match_rule->match = regexp.match; 696 match_rule->max_sub = prescan_context.max_sub; 697 if (prescan_context.literal) 698 match_rule->replacement = prescan_context.literal; 699 else 700 match_rule->replacement = mystrdup(p); 701 match_rule->pattern = engine.pattern; 702 match_rule->hints = engine.hints; 703 return ((DICT_PCRE_RULE *) match_rule); 704 } 705 706 /* 707 * The IF operator takes one pattern but no replacement text. 708 */ 709 else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) { 710 DICT_PCRE_REGEXP regexp; 711 DICT_PCRE_ENGINE engine; 712 DICT_PCRE_IF_RULE *if_rule; 713 714 p += 2; 715 716 /* 717 * Get the pattern. 718 */ 719 while (*p && ISSPACE(*p)) 720 p++; 721 if (!dict_pcre_get_pattern(mapname, lineno, &p, ®exp)) 722 return (0); 723 724 /* 725 * Warn about out-of-place text. 726 */ 727 while (*p && ISSPACE(*p)) 728 ++p; 729 if (*p) { 730 msg_warn("pcre map %s, line %d: ignoring extra text after " 731 "IF statement: \"%s\"", mapname, lineno, p); 732 msg_warn("pcre map %s, line %d: do not prepend whitespace" 733 " to statements between IF and ENDIF", mapname, lineno); 734 } 735 736 /* 737 * Compile the pattern. 738 */ 739 if (dict_pcre_compile(mapname, lineno, ®exp, &engine) == 0) 740 return (0); 741 742 /* 743 * Save the result. 744 */ 745 if_rule = (DICT_PCRE_IF_RULE *) 746 dict_pcre_rule_alloc(DICT_PCRE_OP_IF, nesting, lineno, 747 sizeof(DICT_PCRE_IF_RULE)); 748 if_rule->match = regexp.match; 749 if_rule->pattern = engine.pattern; 750 if_rule->hints = engine.hints; 751 return ((DICT_PCRE_RULE *) if_rule); 752 } 753 754 /* 755 * The ENDIF operator takes no patterns and no replacement text. 756 */ 757 else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) { 758 DICT_PCRE_RULE *rule; 759 760 p += 5; 761 762 /* 763 * Warn about out-of-place ENDIFs. 764 */ 765 if (nesting == 0) { 766 msg_warn("pcre map %s, line %d: ignoring ENDIF without matching IF", 767 mapname, lineno); 768 return (0); 769 } 770 771 /* 772 * Warn about out-of-place text. 773 */ 774 while (*p && ISSPACE(*p)) 775 ++p; 776 if (*p) 777 msg_warn("pcre map %s, line %d: ignoring extra text after ENDIF", 778 mapname, lineno); 779 780 /* 781 * Save the result. 782 */ 783 rule = dict_pcre_rule_alloc(DICT_PCRE_OP_ENDIF, nesting, lineno, 784 sizeof(DICT_PCRE_RULE)); 785 return (rule); 786 } 787 788 /* 789 * Unrecognized input. 790 */ 791 else { 792 msg_warn("pcre map %s, line %d: ignoring unrecognized request", 793 mapname, lineno); 794 return (0); 795 } 796 } 797 798 /* dict_pcre_open - load and compile a file containing regular expressions */ 799 800 DICT *dict_pcre_open(const char *mapname, int open_flags, int dict_flags) 801 { 802 DICT_PCRE *dict_pcre; 803 VSTREAM *map_fp; 804 struct stat st; 805 VSTRING *line_buffer; 806 DICT_PCRE_RULE *last_rule = 0; 807 DICT_PCRE_RULE *rule; 808 int lineno = 0; 809 int nesting = 0; 810 char *p; 811 812 /* 813 * Sanity checks. 814 */ 815 if (open_flags != O_RDONLY) 816 return (dict_surrogate(DICT_TYPE_PCRE, mapname, open_flags, dict_flags, 817 "%s:%s map requires O_RDONLY access mode", 818 DICT_TYPE_PCRE, mapname)); 819 820 /* 821 * Open the configuration file. 822 */ 823 if ((map_fp = vstream_fopen(mapname, O_RDONLY, 0)) == 0) 824 return (dict_surrogate(DICT_TYPE_PCRE, mapname, open_flags, dict_flags, 825 "open %s: %m", mapname)); 826 if (fstat(vstream_fileno(map_fp), &st) < 0) 827 msg_fatal("fstat %s: %m", mapname); 828 829 line_buffer = vstring_alloc(100); 830 831 dict_pcre = (DICT_PCRE *) dict_alloc(DICT_TYPE_PCRE, mapname, 832 sizeof(*dict_pcre)); 833 dict_pcre->dict.lookup = dict_pcre_lookup; 834 dict_pcre->dict.close = dict_pcre_close; 835 dict_pcre->dict.flags = dict_flags | DICT_FLAG_PATTERN; 836 if (dict_flags & DICT_FLAG_FOLD_MUL) 837 dict_pcre->dict.fold_buf = vstring_alloc(10); 838 dict_pcre->head = 0; 839 dict_pcre->expansion_buf = 0; 840 841 if (dict_pcre_init == 0) { 842 pcre_malloc = (void *(*) (size_t)) mymalloc; 843 pcre_free = (void (*) (void *)) myfree; 844 dict_pcre_init = 1; 845 } 846 dict_pcre->dict.owner.uid = st.st_uid; 847 dict_pcre->dict.owner.status = (st.st_uid != 0); 848 849 /* 850 * Parse the pcre table. 851 */ 852 while (readlline(line_buffer, map_fp, &lineno)) { 853 p = vstring_str(line_buffer); 854 trimblanks(p, 0)[0] = 0; /* Trim space at end */ 855 if (*p == 0) 856 continue; 857 rule = dict_pcre_parse_rule(mapname, lineno, p, nesting, dict_flags); 858 if (rule == 0) 859 continue; 860 if (rule->op == DICT_PCRE_OP_IF) { 861 nesting++; 862 } else if (rule->op == DICT_PCRE_OP_ENDIF) { 863 nesting--; 864 } 865 if (last_rule == 0) 866 dict_pcre->head = rule; 867 else 868 last_rule->next = rule; 869 last_rule = rule; 870 } 871 872 if (nesting) 873 msg_warn("pcre map %s, line %d: more IFs than ENDIFs", 874 mapname, lineno); 875 876 vstring_free(line_buffer); 877 vstream_fclose(map_fp); 878 879 return (DICT_DEBUG (&dict_pcre->dict)); 880 } 881 882 #endif /* HAS_PCRE */ 883