1 /* $NetBSD: dict_pcre.c,v 1.1.1.1 2009/06/23 10:08:59 tron Exp $ */ 2 3 /*++ 4 /* NAME 5 /* dict_pcre 3 6 /* SUMMARY 7 /* dictionary manager interface to PCRE regular expression library 8 /* SYNOPSIS 9 /* #include <dict_pcre.h> 10 /* 11 /* DICT *dict_pcre_open(name, dummy, dict_flags) 12 /* const char *name; 13 /* int dummy; 14 /* int dict_flags; 15 /* DESCRIPTION 16 /* dict_pcre_open() opens the named file and compiles the contained 17 /* regular expressions. The result object can be used to match strings 18 /* against the table. 19 /* SEE ALSO 20 /* dict(3) generic dictionary manager 21 /* AUTHOR(S) 22 /* Andrew McNamara 23 /* andrewm@connect.com.au 24 /* connect.com.au Pty. Ltd. 25 /* Level 3, 213 Miller St 26 /* North Sydney, NSW, Australia 27 /* 28 /* Wietse Venema 29 /* IBM T.J. Watson Research 30 /* P.O. Box 704 31 /* Yorktown Heights, NY 10598, USA 32 /*--*/ 33 34 #include "sys_defs.h" 35 36 #ifdef HAS_PCRE 37 38 /* System library. */ 39 40 #include <stdio.h> /* sprintf() prototype */ 41 #include <stdlib.h> 42 #include <unistd.h> 43 #include <string.h> 44 #include <ctype.h> 45 46 #ifdef STRCASECMP_IN_STRINGS_H 47 #include <strings.h> 48 #endif 49 50 /* Utility library. */ 51 52 #include "mymalloc.h" 53 #include "msg.h" 54 #include "safe.h" 55 #include "vstream.h" 56 #include "vstring.h" 57 #include "stringops.h" 58 #include "readlline.h" 59 #include "dict.h" 60 #include "dict_pcre.h" 61 #include "mac_parse.h" 62 #include "pcre.h" 63 64 /* 65 * Support for IF/ENDIF based on an idea by Bert Driehuis. 66 */ 67 #define DICT_PCRE_OP_MATCH 1 /* Match this regexp */ 68 #define DICT_PCRE_OP_IF 2 /* Increase if/endif nesting on match */ 69 #define DICT_PCRE_OP_ENDIF 3 /* Decrease if/endif nesting on match */ 70 71 /* 72 * Max strings captured by regexp - essentially the max number of (..) 73 */ 74 #define PCRE_MAX_CAPTURE 99 75 76 /* 77 * Regular expression before and after compilation. 78 */ 79 typedef struct { 80 char *regexp; /* regular expression */ 81 int options; /* options */ 82 int match; /* positive or negative match */ 83 } DICT_PCRE_REGEXP; 84 85 typedef struct { 86 pcre *pattern; /* the compiled pattern */ 87 pcre_extra *hints; /* hints to speed pattern execution */ 88 } DICT_PCRE_ENGINE; 89 90 /* 91 * Compiled generic rule, and subclasses that derive from it. 92 */ 93 typedef struct DICT_PCRE_RULE { 94 int op; /* DICT_PCRE_OP_MATCH/IF/ENDIF */ 95 int nesting; /* level of IF/ENDIF nesting */ 96 int lineno; /* source file line number */ 97 struct DICT_PCRE_RULE *next; /* next rule in dict */ 98 } DICT_PCRE_RULE; 99 100 typedef struct { 101 DICT_PCRE_RULE rule; /* generic part */ 102 pcre *pattern; /* compiled pattern */ 103 pcre_extra *hints; /* hints to speed pattern execution */ 104 char *replacement; /* replacement string */ 105 int match; /* positive or negative match */ 106 size_t max_sub; /* largest $number in replacement */ 107 } DICT_PCRE_MATCH_RULE; 108 109 typedef struct { 110 DICT_PCRE_RULE rule; /* generic members */ 111 pcre *pattern; /* compiled pattern */ 112 pcre_extra *hints; /* hints to speed pattern execution */ 113 int match; /* positive or negative match */ 114 } DICT_PCRE_IF_RULE; 115 116 /* 117 * PCRE map. 118 */ 119 typedef struct { 120 DICT dict; /* generic members */ 121 DICT_PCRE_RULE *head; 122 VSTRING *expansion_buf; /* lookup result */ 123 } DICT_PCRE; 124 125 static int dict_pcre_init = 0; /* flag need to init pcre library */ 126 127 /* 128 * Context for $number expansion callback. 129 */ 130 typedef struct { 131 DICT_PCRE *dict_pcre; /* the dictionary handle */ 132 DICT_PCRE_MATCH_RULE *match_rule; /* the rule we matched */ 133 const char *lookup_string; /* string against which we match */ 134 int offsets[PCRE_MAX_CAPTURE * 3]; /* Cut substrings */ 135 int matches; /* Count of cuts */ 136 } DICT_PCRE_EXPAND_CONTEXT; 137 138 /* 139 * Context for $number pre-scan callback. 140 */ 141 typedef struct { 142 const char *mapname; /* name of regexp map */ 143 int lineno; /* where in file */ 144 size_t max_sub; /* Largest $n seen */ 145 char *literal; /* constant result, $$ -> $ */ 146 } DICT_PCRE_PRESCAN_CONTEXT; 147 148 /* 149 * Compatibility. 150 */ 151 #ifndef MAC_PARSE_OK 152 #define MAC_PARSE_OK 0 153 #endif 154 155 /* 156 * Macros to make dense code more accessible. 157 */ 158 #define NULL_STARTOFFSET (0) 159 #define NULL_EXEC_OPTIONS (0) 160 #define NULL_OVECTOR ((int *) 0) 161 #define NULL_OVECTOR_LENGTH (0) 162 163 /* dict_pcre_expand - replace $number with matched text */ 164 165 static int dict_pcre_expand(int type, VSTRING *buf, char *ptr) 166 { 167 DICT_PCRE_EXPAND_CONTEXT *ctxt = (DICT_PCRE_EXPAND_CONTEXT *) ptr; 168 DICT_PCRE_MATCH_RULE *match_rule = ctxt->match_rule; 169 DICT_PCRE *dict_pcre = ctxt->dict_pcre; 170 const char *pp; 171 int n; 172 int ret; 173 174 /* 175 * Replace $0-${99} with strings cut from matched text. 176 */ 177 if (type == MAC_PARSE_VARNAME) { 178 n = atoi(vstring_str(buf)); 179 ret = pcre_get_substring(ctxt->lookup_string, ctxt->offsets, 180 ctxt->matches, n, &pp); 181 if (ret < 0) { 182 if (ret == PCRE_ERROR_NOSUBSTRING) 183 return (MAC_PARSE_UNDEF); 184 else 185 msg_fatal("pcre map %s, line %d: pcre_get_substring error: %d", 186 dict_pcre->dict.name, match_rule->rule.lineno, ret); 187 } 188 if (*pp == 0) { 189 myfree((char *) pp); 190 return (MAC_PARSE_UNDEF); 191 } 192 vstring_strcat(dict_pcre->expansion_buf, pp); 193 myfree((char *) pp); 194 return (MAC_PARSE_OK); 195 } 196 197 /* 198 * Straight text - duplicate with no substitution. 199 */ 200 else { 201 vstring_strcat(dict_pcre->expansion_buf, vstring_str(buf)); 202 return (MAC_PARSE_OK); 203 } 204 } 205 206 /* dict_pcre_exec_error - report matching error */ 207 208 static void dict_pcre_exec_error(const char *mapname, int lineno, int errval) 209 { 210 switch (errval) { 211 case 0: 212 msg_warn("pcre map %s, line %d: too many (...)", 213 mapname, lineno); 214 return; 215 case PCRE_ERROR_NULL: 216 case PCRE_ERROR_BADOPTION: 217 msg_fatal("pcre map %s, line %d: bad args to re_exec", 218 mapname, lineno); 219 case PCRE_ERROR_BADMAGIC: 220 case PCRE_ERROR_UNKNOWN_NODE: 221 msg_fatal("pcre map %s, line %d: corrupt compiled regexp", 222 mapname, lineno); 223 #ifdef PCRE_ERROR_NOMEMORY 224 case PCRE_ERROR_NOMEMORY: 225 msg_fatal("pcre map %s, line %d: out of memory", 226 mapname, lineno); 227 #endif 228 #ifdef PCRE_ERROR_MATCHLIMIT 229 case PCRE_ERROR_MATCHLIMIT: 230 msg_fatal("pcre map %s, line %d: matched text exceeds buffer limit", 231 mapname, lineno); 232 #endif 233 #ifdef PCRE_ERROR_BADUTF8 234 case PCRE_ERROR_BADUTF8: 235 msg_fatal("pcre map %s, line %d: bad UTF-8 sequence in search string", 236 mapname, lineno); 237 #endif 238 #ifdef PCRE_ERROR_BADUTF8_OFFSET 239 case PCRE_ERROR_BADUTF8_OFFSET: 240 msg_fatal("pcre map %s, line %d: bad UTF-8 start offset in search string", 241 mapname, lineno); 242 #endif 243 default: 244 msg_fatal("pcre map %s, line %d: unknown re_exec error: %d", 245 mapname, lineno, errval); 246 } 247 } 248 249 /* dict_pcre_lookup - match string and perform optional substitution */ 250 251 static const char *dict_pcre_lookup(DICT *dict, const char *lookup_string) 252 { 253 DICT_PCRE *dict_pcre = (DICT_PCRE *) dict; 254 DICT_PCRE_RULE *rule; 255 DICT_PCRE_IF_RULE *if_rule; 256 DICT_PCRE_MATCH_RULE *match_rule; 257 int lookup_len = strlen(lookup_string); 258 DICT_PCRE_EXPAND_CONTEXT ctxt; 259 int nesting = 0; 260 261 dict_errno = 0; 262 263 if (msg_verbose) 264 msg_info("dict_pcre_lookup: %s: %s", dict->name, lookup_string); 265 266 /* 267 * Optionally fold the key. 268 */ 269 if (dict->flags & DICT_FLAG_FOLD_MUL) { 270 if (dict->fold_buf == 0) 271 dict->fold_buf = vstring_alloc(10); 272 vstring_strcpy(dict->fold_buf, lookup_string); 273 lookup_string = lowercase(vstring_str(dict->fold_buf)); 274 } 275 for (rule = dict_pcre->head; rule; rule = rule->next) { 276 277 /* 278 * Skip rules inside failed IF/ENDIF. 279 */ 280 if (nesting < rule->nesting) 281 continue; 282 283 switch (rule->op) { 284 285 /* 286 * Search for a matching expression. 287 */ 288 case DICT_PCRE_OP_MATCH: 289 match_rule = (DICT_PCRE_MATCH_RULE *) rule; 290 ctxt.matches = pcre_exec(match_rule->pattern, match_rule->hints, 291 lookup_string, lookup_len, 292 NULL_STARTOFFSET, NULL_EXEC_OPTIONS, 293 ctxt.offsets, PCRE_MAX_CAPTURE * 3); 294 295 if (ctxt.matches > 0) { 296 if (!match_rule->match) 297 continue; /* Negative rule matched */ 298 } else if (ctxt.matches == PCRE_ERROR_NOMATCH) { 299 if (match_rule->match) 300 continue; /* Positive rule did not 301 * match */ 302 } else { 303 dict_pcre_exec_error(dict->name, rule->lineno, ctxt.matches); 304 continue; /* pcre_exec failed */ 305 } 306 307 /* 308 * Skip $number substitutions when the replacement text contains 309 * no $number strings, as learned during the compile time 310 * pre-scan. The pre-scan already replaced $$ by $. 311 */ 312 if (match_rule->max_sub == 0) 313 return match_rule->replacement; 314 315 /* 316 * We've got a match. Perform substitution on replacement string. 317 */ 318 if (dict_pcre->expansion_buf == 0) 319 dict_pcre->expansion_buf = vstring_alloc(10); 320 VSTRING_RESET(dict_pcre->expansion_buf); 321 ctxt.dict_pcre = dict_pcre; 322 ctxt.match_rule = match_rule; 323 ctxt.lookup_string = lookup_string; 324 325 if (mac_parse(match_rule->replacement, dict_pcre_expand, 326 (char *) &ctxt) & MAC_PARSE_ERROR) 327 msg_fatal("pcre map %s, line %d: bad replacement syntax", 328 dict->name, rule->lineno); 329 330 VSTRING_TERMINATE(dict_pcre->expansion_buf); 331 return (vstring_str(dict_pcre->expansion_buf)); 332 333 /* 334 * Conditional. XXX We provide space for matched substring info 335 * because PCRE uses part of it as workspace for backtracking. 336 * PCRE will allocate memory if it runs out of backtracking 337 * storage. 338 */ 339 case DICT_PCRE_OP_IF: 340 if_rule = (DICT_PCRE_IF_RULE *) rule; 341 ctxt.matches = pcre_exec(if_rule->pattern, if_rule->hints, 342 lookup_string, lookup_len, 343 NULL_STARTOFFSET, NULL_EXEC_OPTIONS, 344 ctxt.offsets, PCRE_MAX_CAPTURE * 3); 345 346 if (ctxt.matches > 0) { 347 if (!if_rule->match) 348 continue; /* Negative rule matched */ 349 } else if (ctxt.matches == PCRE_ERROR_NOMATCH) { 350 if (if_rule->match) 351 continue; /* Positive rule did not 352 * match */ 353 } else { 354 dict_pcre_exec_error(dict->name, rule->lineno, ctxt.matches); 355 continue; /* pcre_exec failed */ 356 } 357 nesting++; 358 continue; 359 360 /* 361 * ENDIF after successful IF. 362 */ 363 case DICT_PCRE_OP_ENDIF: 364 nesting--; 365 continue; 366 367 default: 368 msg_panic("dict_pcre_lookup: impossible operation %d", rule->op); 369 } 370 } 371 return (0); 372 } 373 374 /* dict_pcre_close - close pcre dictionary */ 375 376 static void dict_pcre_close(DICT *dict) 377 { 378 DICT_PCRE *dict_pcre = (DICT_PCRE *) dict; 379 DICT_PCRE_RULE *rule; 380 DICT_PCRE_RULE *next; 381 DICT_PCRE_MATCH_RULE *match_rule; 382 DICT_PCRE_IF_RULE *if_rule; 383 384 for (rule = dict_pcre->head; rule; rule = next) { 385 next = rule->next; 386 switch (rule->op) { 387 case DICT_PCRE_OP_MATCH: 388 match_rule = (DICT_PCRE_MATCH_RULE *) rule; 389 if (match_rule->pattern) 390 myfree((char *) match_rule->pattern); 391 if (match_rule->hints) 392 myfree((char *) match_rule->hints); 393 if (match_rule->replacement) 394 myfree((char *) match_rule->replacement); 395 break; 396 case DICT_PCRE_OP_IF: 397 if_rule = (DICT_PCRE_IF_RULE *) rule; 398 if (if_rule->pattern) 399 myfree((char *) if_rule->pattern); 400 if (if_rule->hints) 401 myfree((char *) if_rule->hints); 402 break; 403 case DICT_PCRE_OP_ENDIF: 404 break; 405 default: 406 msg_panic("dict_pcre_close: unknown operation %d", rule->op); 407 } 408 myfree((char *) rule); 409 } 410 if (dict_pcre->expansion_buf) 411 vstring_free(dict_pcre->expansion_buf); 412 if (dict->fold_buf) 413 vstring_free(dict->fold_buf); 414 dict_free(dict); 415 } 416 417 /* dict_pcre_get_pattern - extract pattern from rule */ 418 419 static int dict_pcre_get_pattern(const char *mapname, int lineno, char **bufp, 420 DICT_PCRE_REGEXP *pattern) 421 { 422 char *p = *bufp; 423 char re_delimiter; 424 425 /* 426 * Process negation operators. 427 */ 428 pattern->match = 1; 429 while (*p == '!') { 430 pattern->match = !pattern->match; 431 p++; 432 } 433 434 /* 435 * Grr...aceful handling of whitespace after '!'. 436 */ 437 while (*p && ISSPACE(*p)) 438 p++; 439 if (*p == 0) { 440 msg_warn("pcre map %s, line %d: no regexp: skipping this rule", 441 mapname, lineno); 442 return (0); 443 } 444 re_delimiter = *p++; 445 pattern->regexp = p; 446 447 /* 448 * Search for second delimiter, handling backslash escape. 449 */ 450 while (*p) { 451 if (*p == '\\') { 452 ++p; 453 if (*p == 0) 454 break; 455 } else if (*p == re_delimiter) 456 break; 457 ++p; 458 } 459 460 if (!*p) { 461 msg_warn("pcre map %s, line %d: no closing regexp delimiter \"%c\": " 462 "ignoring this rule", mapname, lineno, re_delimiter); 463 return (0); 464 } 465 *p++ = 0; /* Null term the regexp */ 466 467 /* 468 * Parse any regexp options. 469 */ 470 pattern->options = PCRE_CASELESS | PCRE_DOTALL; 471 while (*p && !ISSPACE(*p)) { 472 switch (*p) { 473 case 'i': 474 pattern->options ^= PCRE_CASELESS; 475 break; 476 case 'm': 477 pattern->options ^= PCRE_MULTILINE; 478 break; 479 case 's': 480 pattern->options ^= PCRE_DOTALL; 481 break; 482 case 'x': 483 pattern->options ^= PCRE_EXTENDED; 484 break; 485 case 'A': 486 pattern->options ^= PCRE_ANCHORED; 487 break; 488 case 'E': 489 pattern->options ^= PCRE_DOLLAR_ENDONLY; 490 break; 491 case 'U': 492 pattern->options ^= PCRE_UNGREEDY; 493 break; 494 case 'X': 495 pattern->options ^= PCRE_EXTRA; 496 break; 497 default: 498 msg_warn("pcre map %s, line %d: unknown regexp option \"%c\": " 499 "skipping this rule", mapname, lineno, *p); 500 return (0); 501 } 502 ++p; 503 } 504 *bufp = p; 505 return (1); 506 } 507 508 /* dict_pcre_prescan - sanity check $number instances in replacement text */ 509 510 static int dict_pcre_prescan(int type, VSTRING *buf, char *context) 511 { 512 DICT_PCRE_PRESCAN_CONTEXT *ctxt = (DICT_PCRE_PRESCAN_CONTEXT *) context; 513 size_t n; 514 515 /* 516 * Keep a copy of literal text (with $$ already replaced by $) if and 517 * only if the replacement text contains no $number expression. This way 518 * we can avoid having to scan the replacement text at lookup time. 519 */ 520 if (type == MAC_PARSE_VARNAME) { 521 if (ctxt->literal) { 522 myfree(ctxt->literal); 523 ctxt->literal = 0; 524 } 525 if (!alldig(vstring_str(buf))) { 526 msg_warn("pcre map %s, line %d: non-numeric replacement index \"%s\"", 527 ctxt->mapname, ctxt->lineno, vstring_str(buf)); 528 return (MAC_PARSE_ERROR); 529 } 530 n = atoi(vstring_str(buf)); 531 if (n < 1) { 532 msg_warn("pcre map %s, line %d: out of range replacement index \"%s\"", 533 ctxt->mapname, ctxt->lineno, vstring_str(buf)); 534 return (MAC_PARSE_ERROR); 535 } 536 if (n > ctxt->max_sub) 537 ctxt->max_sub = n; 538 } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) { 539 if (ctxt->literal) 540 msg_panic("pcre map %s, line %d: multiple literals but no $number", 541 ctxt->mapname, ctxt->lineno); 542 ctxt->literal = mystrdup(vstring_str(buf)); 543 } 544 return (MAC_PARSE_OK); 545 } 546 547 /* dict_pcre_compile - compile pattern */ 548 549 static int dict_pcre_compile(const char *mapname, int lineno, 550 DICT_PCRE_REGEXP *pattern, 551 DICT_PCRE_ENGINE *engine) 552 { 553 const char *error; 554 int errptr; 555 556 engine->pattern = pcre_compile(pattern->regexp, pattern->options, 557 &error, &errptr, NULL); 558 if (engine->pattern == 0) { 559 msg_warn("pcre map %s, line %d: error in regex at offset %d: %s", 560 mapname, lineno, errptr, error); 561 return (0); 562 } 563 engine->hints = pcre_study(engine->pattern, 0, &error); 564 if (error != 0) { 565 msg_warn("pcre map %s, line %d: error while studying regex: %s", 566 mapname, lineno, error); 567 myfree((char *) engine->pattern); 568 return (0); 569 } 570 return (1); 571 } 572 573 /* dict_pcre_rule_alloc - fill in a generic rule structure */ 574 575 static DICT_PCRE_RULE *dict_pcre_rule_alloc(int op, int nesting, 576 int lineno, 577 size_t size) 578 { 579 DICT_PCRE_RULE *rule; 580 581 rule = (DICT_PCRE_RULE *) mymalloc(size); 582 rule->op = op; 583 rule->nesting = nesting; 584 rule->lineno = lineno; 585 rule->next = 0; 586 587 return (rule); 588 } 589 590 /* dict_pcre_parse_rule - parse and compile one rule */ 591 592 static DICT_PCRE_RULE *dict_pcre_parse_rule(const char *mapname, int lineno, 593 char *line, int nesting, 594 int dict_flags) 595 { 596 char *p; 597 int actual_sub; 598 599 p = line; 600 601 /* 602 * An ordinary match rule takes one pattern and replacement text. 603 */ 604 if (!ISALNUM(*p)) { 605 DICT_PCRE_REGEXP regexp; 606 DICT_PCRE_ENGINE engine; 607 DICT_PCRE_PRESCAN_CONTEXT prescan_context; 608 DICT_PCRE_MATCH_RULE *match_rule; 609 610 /* 611 * Get the pattern string and options. 612 */ 613 if (dict_pcre_get_pattern(mapname, lineno, &p, ®exp) == 0) 614 return (0); 615 616 /* 617 * Get the replacement text. 618 */ 619 while (*p && ISSPACE(*p)) 620 ++p; 621 if (!*p) 622 msg_warn("%s, line %d: no replacement text: using empty string", 623 mapname, lineno); 624 625 /* 626 * Sanity check the $number instances in the replacement text. 627 */ 628 prescan_context.mapname = mapname; 629 prescan_context.lineno = lineno; 630 prescan_context.max_sub = 0; 631 prescan_context.literal = 0; 632 633 /* 634 * The optimizer will eliminate code duplication and/or dead code. 635 */ 636 #define CREATE_MATCHOP_ERROR_RETURN(rval) do { \ 637 if (prescan_context.literal) \ 638 myfree(prescan_context.literal); \ 639 return (rval); \ 640 } while (0) 641 642 if (mac_parse(p, dict_pcre_prescan, (char *) &prescan_context) 643 & MAC_PARSE_ERROR) { 644 msg_warn("pcre map %s, line %d: bad replacement syntax: " 645 "skipping this rule", mapname, lineno); 646 CREATE_MATCHOP_ERROR_RETURN(0); 647 } 648 649 /* 650 * Substring replacement not possible with negative regexps. 651 */ 652 if (prescan_context.max_sub > 0 && regexp.match == 0) { 653 msg_warn("pcre map %s, line %d: $number found in negative match " 654 "replacement text: skipping this rule", mapname, lineno); 655 CREATE_MATCHOP_ERROR_RETURN(0); 656 } 657 if (prescan_context.max_sub > 0 && (dict_flags & DICT_FLAG_NO_REGSUB)) { 658 msg_warn("pcre map %s, line %d: " 659 "regular expression substitution is not allowed: " 660 "skipping this rule", mapname, lineno); 661 CREATE_MATCHOP_ERROR_RETURN(0); 662 } 663 664 /* 665 * Compile the pattern. 666 */ 667 if (dict_pcre_compile(mapname, lineno, ®exp, &engine) == 0) 668 CREATE_MATCHOP_ERROR_RETURN(0); 669 #ifdef PCRE_INFO_CAPTURECOUNT 670 if (pcre_fullinfo(engine.pattern, engine.hints, 671 PCRE_INFO_CAPTURECOUNT, 672 (void *) &actual_sub) != 0) 673 msg_panic("pcre map %s, line %d: pcre_fullinfo failed", 674 mapname, lineno); 675 if (prescan_context.max_sub > actual_sub) { 676 msg_warn("pcre map %s, line %d: out of range replacement index \"%d\": " 677 "skipping this rule", mapname, lineno, 678 (int) prescan_context.max_sub); 679 if (engine.pattern) 680 myfree((char *) engine.pattern); 681 if (engine.hints) 682 myfree((char *) engine.hints); 683 CREATE_MATCHOP_ERROR_RETURN(0); 684 } 685 #endif 686 687 /* 688 * Save the result. 689 */ 690 match_rule = (DICT_PCRE_MATCH_RULE *) 691 dict_pcre_rule_alloc(DICT_PCRE_OP_MATCH, nesting, lineno, 692 sizeof(DICT_PCRE_MATCH_RULE)); 693 match_rule->match = regexp.match; 694 match_rule->max_sub = prescan_context.max_sub; 695 if (prescan_context.literal) 696 match_rule->replacement = prescan_context.literal; 697 else 698 match_rule->replacement = mystrdup(p); 699 match_rule->pattern = engine.pattern; 700 match_rule->hints = engine.hints; 701 return ((DICT_PCRE_RULE *) match_rule); 702 } 703 704 /* 705 * The IF operator takes one pattern but no replacement text. 706 */ 707 else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) { 708 DICT_PCRE_REGEXP regexp; 709 DICT_PCRE_ENGINE engine; 710 DICT_PCRE_IF_RULE *if_rule; 711 712 p += 2; 713 714 /* 715 * Get the pattern. 716 */ 717 while (*p && ISSPACE(*p)) 718 p++; 719 if (!dict_pcre_get_pattern(mapname, lineno, &p, ®exp)) 720 return (0); 721 722 /* 723 * Warn about out-of-place text. 724 */ 725 while (*p && ISSPACE(*p)) 726 ++p; 727 if (*p) { 728 msg_warn("pcre map %s, line %d: ignoring extra text after " 729 "IF statement: \"%s\"", mapname, lineno, p); 730 msg_warn("pcre map %s, line %d: do not prepend whitespace" 731 " to statements between IF and ENDIF", mapname, lineno); 732 } 733 734 /* 735 * Compile the pattern. 736 */ 737 if (dict_pcre_compile(mapname, lineno, ®exp, &engine) == 0) 738 return (0); 739 740 /* 741 * Save the result. 742 */ 743 if_rule = (DICT_PCRE_IF_RULE *) 744 dict_pcre_rule_alloc(DICT_PCRE_OP_IF, nesting, lineno, 745 sizeof(DICT_PCRE_IF_RULE)); 746 if_rule->match = regexp.match; 747 if_rule->pattern = engine.pattern; 748 if_rule->hints = engine.hints; 749 return ((DICT_PCRE_RULE *) if_rule); 750 } 751 752 /* 753 * The ENDIF operator takes no patterns and no replacement text. 754 */ 755 else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) { 756 DICT_PCRE_RULE *rule; 757 758 p += 5; 759 760 /* 761 * Warn about out-of-place ENDIFs. 762 */ 763 if (nesting == 0) { 764 msg_warn("pcre map %s, line %d: ignoring ENDIF without matching IF", 765 mapname, lineno); 766 return (0); 767 } 768 769 /* 770 * Warn about out-of-place text. 771 */ 772 while (*p && ISSPACE(*p)) 773 ++p; 774 if (*p) 775 msg_warn("pcre map %s, line %d: ignoring extra text after ENDIF", 776 mapname, lineno); 777 778 /* 779 * Save the result. 780 */ 781 rule = dict_pcre_rule_alloc(DICT_PCRE_OP_ENDIF, nesting, lineno, 782 sizeof(DICT_PCRE_RULE)); 783 return (rule); 784 } 785 786 /* 787 * Unrecognized input. 788 */ 789 else { 790 msg_warn("pcre map %s, line %d: ignoring unrecognized request", 791 mapname, lineno); 792 return (0); 793 } 794 } 795 796 /* dict_pcre_open - load and compile a file containing regular expressions */ 797 798 DICT *dict_pcre_open(const char *mapname, int unused_flags, int dict_flags) 799 { 800 DICT_PCRE *dict_pcre; 801 VSTREAM *map_fp; 802 VSTRING *line_buffer; 803 DICT_PCRE_RULE *last_rule = 0; 804 DICT_PCRE_RULE *rule; 805 int lineno = 0; 806 int nesting = 0; 807 char *p; 808 809 line_buffer = vstring_alloc(100); 810 811 dict_pcre = (DICT_PCRE *) dict_alloc(DICT_TYPE_PCRE, mapname, 812 sizeof(*dict_pcre)); 813 dict_pcre->dict.lookup = dict_pcre_lookup; 814 dict_pcre->dict.close = dict_pcre_close; 815 dict_pcre->dict.flags = dict_flags | DICT_FLAG_PATTERN; 816 if (dict_flags & DICT_FLAG_FOLD_MUL) 817 dict_pcre->dict.fold_buf = vstring_alloc(10); 818 dict_pcre->head = 0; 819 dict_pcre->expansion_buf = 0; 820 821 if (dict_pcre_init == 0) { 822 pcre_malloc = (void *(*) (size_t)) mymalloc; 823 pcre_free = (void (*) (void *)) myfree; 824 dict_pcre_init = 1; 825 } 826 827 /* 828 * Parse the pcre table. 829 */ 830 if ((map_fp = vstream_fopen(mapname, O_RDONLY, 0)) == 0) 831 msg_fatal("open %s: %m", mapname); 832 833 while (readlline(line_buffer, map_fp, &lineno)) { 834 p = vstring_str(line_buffer); 835 trimblanks(p, 0)[0] = 0; /* Trim space at end */ 836 if (*p == 0) 837 continue; 838 rule = dict_pcre_parse_rule(mapname, lineno, p, nesting, dict_flags); 839 if (rule == 0) 840 continue; 841 if (rule->op == DICT_PCRE_OP_IF) { 842 nesting++; 843 } else if (rule->op == DICT_PCRE_OP_ENDIF) { 844 nesting--; 845 } 846 if (last_rule == 0) 847 dict_pcre->head = rule; 848 else 849 last_rule->next = rule; 850 last_rule = rule; 851 } 852 853 if (nesting) 854 msg_warn("pcre map %s, line %d: more IFs than ENDIFs", 855 mapname, lineno); 856 857 vstring_free(line_buffer); 858 vstream_fclose(map_fp); 859 860 return (DICT_DEBUG (&dict_pcre->dict)); 861 } 862 863 #endif /* HAS_PCRE */ 864