1 /* $NetBSD: dict_pcre.c,v 1.3 2020/03/18 19:05:21 christos Exp $ */ 2 3 /*++ 4 /* NAME 5 /* dict_pcre 3 6 /* SUMMARY 7 /* dictionary manager interface to PCRE regular expression library 8 /* SYNOPSIS 9 /* #include <dict_pcre.h> 10 /* 11 /* DICT *dict_pcre_open(name, dummy, dict_flags) 12 /* const char *name; 13 /* int dummy; 14 /* int dict_flags; 15 /* DESCRIPTION 16 /* dict_pcre_open() opens the named file and compiles the contained 17 /* regular expressions. The result object can be used to match strings 18 /* against the table. 19 /* SEE ALSO 20 /* dict(3) generic dictionary manager 21 /* AUTHOR(S) 22 /* Andrew McNamara 23 /* andrewm@connect.com.au 24 /* connect.com.au Pty. Ltd. 25 /* Level 3, 213 Miller St 26 /* North Sydney, NSW, Australia 27 /* 28 /* Wietse Venema 29 /* IBM T.J. Watson Research 30 /* P.O. Box 704 31 /* Yorktown Heights, NY 10598, USA 32 /* 33 /* Wietse Venema 34 /* Google, Inc. 35 /* 111 8th Avenue 36 /* New York, NY 10011, USA 37 /*--*/ 38 39 #include "sys_defs.h" 40 41 #ifdef HAS_PCRE 42 43 /* System library. */ 44 45 #include <sys/stat.h> 46 #include <stdio.h> /* sprintf() prototype */ 47 #include <stdlib.h> 48 #include <unistd.h> 49 #include <string.h> 50 #include <ctype.h> 51 52 #ifdef STRCASECMP_IN_STRINGS_H 53 #include <strings.h> 54 #endif 55 56 /* Utility library. */ 57 58 #include "mymalloc.h" 59 #include "msg.h" 60 #include "safe.h" 61 #include "vstream.h" 62 #include "vstring.h" 63 #include "stringops.h" 64 #include "readlline.h" 65 #include "dict.h" 66 #include "dict_pcre.h" 67 #include "mac_parse.h" 68 #include "pcre.h" 69 #include "warn_stat.h" 70 #include "mvect.h" 71 72 /* 73 * Backwards compatibility. 74 */ 75 #ifdef PCRE_STUDY_JIT_COMPILE 76 #define DICT_PCRE_FREE_STUDY(x) pcre_free_study(x) 77 #else 78 #define DICT_PCRE_FREE_STUDY(x) pcre_free((char *) (x)) 79 #endif 80 81 /* 82 * Support for IF/ENDIF based on an idea by Bert Driehuis. 83 */ 84 #define DICT_PCRE_OP_MATCH 1 /* Match this regexp */ 85 #define DICT_PCRE_OP_IF 2 /* Increase if/endif nesting on match */ 86 #define DICT_PCRE_OP_ENDIF 3 /* Decrease if/endif nesting on match */ 87 88 /* 89 * Max strings captured by regexp - essentially the max number of (..) 90 */ 91 #define PCRE_MAX_CAPTURE 99 92 93 /* 94 * Regular expression before and after compilation. 95 */ 96 typedef struct { 97 char *regexp; /* regular expression */ 98 int options; /* options */ 99 int match; /* positive or negative match */ 100 } DICT_PCRE_REGEXP; 101 102 typedef struct { 103 pcre *pattern; /* the compiled pattern */ 104 pcre_extra *hints; /* hints to speed pattern execution */ 105 } DICT_PCRE_ENGINE; 106 107 /* 108 * Compiled generic rule, and subclasses that derive from it. 109 */ 110 typedef struct DICT_PCRE_RULE { 111 int op; /* DICT_PCRE_OP_MATCH/IF/ENDIF */ 112 int lineno; /* source file line number */ 113 struct DICT_PCRE_RULE *next; /* next rule in dict */ 114 } DICT_PCRE_RULE; 115 116 typedef struct { 117 DICT_PCRE_RULE rule; /* generic part */ 118 pcre *pattern; /* compiled pattern */ 119 pcre_extra *hints; /* hints to speed pattern execution */ 120 char *replacement; /* replacement string */ 121 int match; /* positive or negative match */ 122 size_t max_sub; /* largest $number in replacement */ 123 } DICT_PCRE_MATCH_RULE; 124 125 typedef struct { 126 DICT_PCRE_RULE rule; /* generic members */ 127 pcre *pattern; /* compiled pattern */ 128 pcre_extra *hints; /* hints to speed pattern execution */ 129 int match; /* positive or negative match */ 130 struct DICT_PCRE_RULE *endif_rule; /* matching endif rule */ 131 } DICT_PCRE_IF_RULE; 132 133 /* 134 * PCRE map. 135 */ 136 typedef struct { 137 DICT dict; /* generic members */ 138 DICT_PCRE_RULE *head; 139 VSTRING *expansion_buf; /* lookup result */ 140 } DICT_PCRE; 141 142 static int dict_pcre_init = 0; /* flag need to init pcre library */ 143 144 /* 145 * Context for $number expansion callback. 146 */ 147 typedef struct { 148 DICT_PCRE *dict_pcre; /* the dictionary handle */ 149 DICT_PCRE_MATCH_RULE *match_rule; /* the rule we matched */ 150 const char *lookup_string; /* string against which we match */ 151 int offsets[PCRE_MAX_CAPTURE * 3]; /* Cut substrings */ 152 int matches; /* Count of cuts */ 153 } DICT_PCRE_EXPAND_CONTEXT; 154 155 /* 156 * Context for $number pre-scan callback. 157 */ 158 typedef struct { 159 const char *mapname; /* name of regexp map */ 160 int lineno; /* where in file */ 161 size_t max_sub; /* Largest $n seen */ 162 char *literal; /* constant result, $$ -> $ */ 163 } DICT_PCRE_PRESCAN_CONTEXT; 164 165 /* 166 * Compatibility. 167 */ 168 #ifndef MAC_PARSE_OK 169 #define MAC_PARSE_OK 0 170 #endif 171 172 /* 173 * Macros to make dense code more accessible. 174 */ 175 #define NULL_STARTOFFSET (0) 176 #define NULL_EXEC_OPTIONS (0) 177 #define NULL_OVECTOR ((int *) 0) 178 #define NULL_OVECTOR_LENGTH (0) 179 180 /* dict_pcre_expand - replace $number with matched text */ 181 182 static int dict_pcre_expand(int type, VSTRING *buf, void *ptr) 183 { 184 DICT_PCRE_EXPAND_CONTEXT *ctxt = (DICT_PCRE_EXPAND_CONTEXT *) ptr; 185 DICT_PCRE_MATCH_RULE *match_rule = ctxt->match_rule; 186 DICT_PCRE *dict_pcre = ctxt->dict_pcre; 187 const char *pp; 188 int n; 189 int ret; 190 191 /* 192 * Replace $0-${99} with strings cut from matched text. 193 */ 194 if (type == MAC_PARSE_VARNAME) { 195 n = atoi(vstring_str(buf)); 196 ret = pcre_get_substring(ctxt->lookup_string, ctxt->offsets, 197 ctxt->matches, n, &pp); 198 if (ret < 0) { 199 if (ret == PCRE_ERROR_NOSUBSTRING) 200 return (MAC_PARSE_UNDEF); 201 else 202 msg_fatal("pcre map %s, line %d: pcre_get_substring error: %d", 203 dict_pcre->dict.name, match_rule->rule.lineno, ret); 204 } 205 if (*pp == 0) { 206 myfree((void *) pp); 207 return (MAC_PARSE_UNDEF); 208 } 209 vstring_strcat(dict_pcre->expansion_buf, pp); 210 myfree((void *) pp); 211 return (MAC_PARSE_OK); 212 } 213 214 /* 215 * Straight text - duplicate with no substitution. 216 */ 217 else { 218 vstring_strcat(dict_pcre->expansion_buf, vstring_str(buf)); 219 return (MAC_PARSE_OK); 220 } 221 } 222 223 /* dict_pcre_exec_error - report matching error */ 224 225 static void dict_pcre_exec_error(const char *mapname, int lineno, int errval) 226 { 227 switch (errval) { 228 case 0: 229 msg_warn("pcre map %s, line %d: too many (...)", 230 mapname, lineno); 231 return; 232 case PCRE_ERROR_NULL: 233 case PCRE_ERROR_BADOPTION: 234 msg_warn("pcre map %s, line %d: bad args to re_exec", 235 mapname, lineno); 236 return; 237 case PCRE_ERROR_BADMAGIC: 238 case PCRE_ERROR_UNKNOWN_NODE: 239 msg_warn("pcre map %s, line %d: corrupt compiled regexp", 240 mapname, lineno); 241 return; 242 #ifdef PCRE_ERROR_NOMEMORY 243 case PCRE_ERROR_NOMEMORY: 244 msg_warn("pcre map %s, line %d: out of memory", 245 mapname, lineno); 246 return; 247 #endif 248 #ifdef PCRE_ERROR_MATCHLIMIT 249 case PCRE_ERROR_MATCHLIMIT: 250 msg_warn("pcre map %s, line %d: backtracking limit exceeded", 251 mapname, lineno); 252 return; 253 #endif 254 #ifdef PCRE_ERROR_BADUTF8 255 case PCRE_ERROR_BADUTF8: 256 msg_warn("pcre map %s, line %d: bad UTF-8 sequence in search string", 257 mapname, lineno); 258 return; 259 #endif 260 #ifdef PCRE_ERROR_BADUTF8_OFFSET 261 case PCRE_ERROR_BADUTF8_OFFSET: 262 msg_warn("pcre map %s, line %d: bad UTF-8 start offset in search string", 263 mapname, lineno); 264 return; 265 #endif 266 default: 267 msg_warn("pcre map %s, line %d: unknown pcre_exec error: %d", 268 mapname, lineno, errval); 269 return; 270 } 271 } 272 273 /* 274 * Inlined to reduce function call overhead in the time-critical loop. 275 */ 276 #define DICT_PCRE_EXEC(ctxt, map, line, pattern, hints, match, str, len) \ 277 ((ctxt).matches = pcre_exec((pattern), (hints), (str), (len), \ 278 NULL_STARTOFFSET, NULL_EXEC_OPTIONS, \ 279 (ctxt).offsets, PCRE_MAX_CAPTURE * 3), \ 280 (ctxt).matches > 0 ? (match) : \ 281 (ctxt).matches == PCRE_ERROR_NOMATCH ? !(match) : \ 282 (dict_pcre_exec_error((map), (line), (ctxt).matches), 0)) 283 284 /* dict_pcre_lookup - match string and perform optional substitution */ 285 286 static const char *dict_pcre_lookup(DICT *dict, const char *lookup_string) 287 { 288 DICT_PCRE *dict_pcre = (DICT_PCRE *) dict; 289 DICT_PCRE_RULE *rule; 290 DICT_PCRE_IF_RULE *if_rule; 291 DICT_PCRE_MATCH_RULE *match_rule; 292 int lookup_len = strlen(lookup_string); 293 DICT_PCRE_EXPAND_CONTEXT ctxt; 294 295 dict->error = 0; 296 297 if (msg_verbose) 298 msg_info("dict_pcre_lookup: %s: %s", dict->name, lookup_string); 299 300 /* 301 * Optionally fold the key. 302 */ 303 if (dict->flags & DICT_FLAG_FOLD_MUL) { 304 if (dict->fold_buf == 0) 305 dict->fold_buf = vstring_alloc(10); 306 vstring_strcpy(dict->fold_buf, lookup_string); 307 lookup_string = lowercase(vstring_str(dict->fold_buf)); 308 } 309 for (rule = dict_pcre->head; rule; rule = rule->next) { 310 311 switch (rule->op) { 312 313 /* 314 * Search for a matching expression. 315 */ 316 case DICT_PCRE_OP_MATCH: 317 match_rule = (DICT_PCRE_MATCH_RULE *) rule; 318 if (!DICT_PCRE_EXEC(ctxt, dict->name, rule->lineno, 319 match_rule->pattern, match_rule->hints, 320 match_rule->match, lookup_string, lookup_len)) 321 continue; 322 323 /* 324 * Skip $number substitutions when the replacement text contains 325 * no $number strings, as learned during the compile time 326 * pre-scan. The pre-scan already replaced $$ by $. 327 */ 328 if (match_rule->max_sub == 0) 329 return match_rule->replacement; 330 331 /* 332 * We've got a match. Perform substitution on replacement string. 333 */ 334 if (dict_pcre->expansion_buf == 0) 335 dict_pcre->expansion_buf = vstring_alloc(10); 336 VSTRING_RESET(dict_pcre->expansion_buf); 337 ctxt.dict_pcre = dict_pcre; 338 ctxt.match_rule = match_rule; 339 ctxt.lookup_string = lookup_string; 340 341 if (mac_parse(match_rule->replacement, dict_pcre_expand, 342 (void *) &ctxt) & MAC_PARSE_ERROR) 343 msg_fatal("pcre map %s, line %d: bad replacement syntax", 344 dict->name, rule->lineno); 345 346 VSTRING_TERMINATE(dict_pcre->expansion_buf); 347 return (vstring_str(dict_pcre->expansion_buf)); 348 349 /* 350 * Conditional. XXX We provide space for matched substring info 351 * because PCRE uses part of it as workspace for backtracking. 352 * PCRE will allocate memory if it runs out of backtracking 353 * storage. 354 */ 355 case DICT_PCRE_OP_IF: 356 if_rule = (DICT_PCRE_IF_RULE *) rule; 357 if (DICT_PCRE_EXEC(ctxt, dict->name, rule->lineno, 358 if_rule->pattern, if_rule->hints, 359 if_rule->match, lookup_string, lookup_len)) 360 continue; 361 /* An IF without matching ENDIF has no "endif" rule. */ 362 if ((rule = if_rule->endif_rule) == 0) 363 return (0); 364 /* FALLTHROUGH */ 365 366 /* 367 * ENDIF after IF. 368 */ 369 case DICT_PCRE_OP_ENDIF: 370 continue; 371 372 default: 373 msg_panic("dict_pcre_lookup: impossible operation %d", rule->op); 374 } 375 } 376 return (0); 377 } 378 379 /* dict_pcre_close - close pcre dictionary */ 380 381 static void dict_pcre_close(DICT *dict) 382 { 383 DICT_PCRE *dict_pcre = (DICT_PCRE *) dict; 384 DICT_PCRE_RULE *rule; 385 DICT_PCRE_RULE *next; 386 DICT_PCRE_MATCH_RULE *match_rule; 387 DICT_PCRE_IF_RULE *if_rule; 388 389 for (rule = dict_pcre->head; rule; rule = next) { 390 next = rule->next; 391 switch (rule->op) { 392 case DICT_PCRE_OP_MATCH: 393 match_rule = (DICT_PCRE_MATCH_RULE *) rule; 394 if (match_rule->pattern) 395 myfree((void *) match_rule->pattern); 396 if (match_rule->hints) 397 DICT_PCRE_FREE_STUDY(match_rule->hints); 398 if (match_rule->replacement) 399 myfree((void *) match_rule->replacement); 400 break; 401 case DICT_PCRE_OP_IF: 402 if_rule = (DICT_PCRE_IF_RULE *) rule; 403 if (if_rule->pattern) 404 myfree((void *) if_rule->pattern); 405 if (if_rule->hints) 406 DICT_PCRE_FREE_STUDY(if_rule->hints); 407 break; 408 case DICT_PCRE_OP_ENDIF: 409 break; 410 default: 411 msg_panic("dict_pcre_close: unknown operation %d", rule->op); 412 } 413 myfree((void *) rule); 414 } 415 if (dict_pcre->expansion_buf) 416 vstring_free(dict_pcre->expansion_buf); 417 if (dict->fold_buf) 418 vstring_free(dict->fold_buf); 419 dict_free(dict); 420 } 421 422 /* dict_pcre_get_pattern - extract pattern from rule */ 423 424 static int dict_pcre_get_pattern(const char *mapname, int lineno, char **bufp, 425 DICT_PCRE_REGEXP *pattern) 426 { 427 char *p = *bufp; 428 char re_delimiter; 429 430 /* 431 * Process negation operators. 432 */ 433 pattern->match = 1; 434 for (;;) { 435 if (*p == '!') 436 pattern->match = !pattern->match; 437 else if (!ISSPACE(*p)) 438 break; 439 p++; 440 } 441 if (*p == 0) { 442 msg_warn("pcre map %s, line %d: no regexp: skipping this rule", 443 mapname, lineno); 444 return (0); 445 } 446 re_delimiter = *p++; 447 pattern->regexp = p; 448 449 /* 450 * Search for second delimiter, handling backslash escape. 451 */ 452 while (*p) { 453 if (*p == '\\') { 454 ++p; 455 if (*p == 0) 456 break; 457 } else if (*p == re_delimiter) 458 break; 459 ++p; 460 } 461 462 if (!*p) { 463 msg_warn("pcre map %s, line %d: no closing regexp delimiter \"%c\": " 464 "ignoring this rule", mapname, lineno, re_delimiter); 465 return (0); 466 } 467 *p++ = 0; /* Null term the regexp */ 468 469 /* 470 * Parse any regexp options. 471 */ 472 pattern->options = PCRE_CASELESS | PCRE_DOTALL; 473 while (*p && !ISSPACE(*p)) { 474 switch (*p) { 475 case 'i': 476 pattern->options ^= PCRE_CASELESS; 477 break; 478 case 'm': 479 pattern->options ^= PCRE_MULTILINE; 480 break; 481 case 's': 482 pattern->options ^= PCRE_DOTALL; 483 break; 484 case 'x': 485 pattern->options ^= PCRE_EXTENDED; 486 break; 487 case 'A': 488 pattern->options ^= PCRE_ANCHORED; 489 break; 490 case 'E': 491 pattern->options ^= PCRE_DOLLAR_ENDONLY; 492 break; 493 case 'U': 494 pattern->options ^= PCRE_UNGREEDY; 495 break; 496 case 'X': 497 pattern->options ^= PCRE_EXTRA; 498 break; 499 default: 500 msg_warn("pcre map %s, line %d: unknown regexp option \"%c\": " 501 "skipping this rule", mapname, lineno, *p); 502 return (0); 503 } 504 ++p; 505 } 506 *bufp = p; 507 return (1); 508 } 509 510 /* dict_pcre_prescan - sanity check $number instances in replacement text */ 511 512 static int dict_pcre_prescan(int type, VSTRING *buf, void *context) 513 { 514 DICT_PCRE_PRESCAN_CONTEXT *ctxt = (DICT_PCRE_PRESCAN_CONTEXT *) context; 515 size_t n; 516 517 /* 518 * Keep a copy of literal text (with $$ already replaced by $) if and 519 * only if the replacement text contains no $number expression. This way 520 * we can avoid having to scan the replacement text at lookup time. 521 */ 522 if (type == MAC_PARSE_VARNAME) { 523 if (ctxt->literal) { 524 myfree(ctxt->literal); 525 ctxt->literal = 0; 526 } 527 if (!alldig(vstring_str(buf))) { 528 msg_warn("pcre map %s, line %d: non-numeric replacement index \"%s\"", 529 ctxt->mapname, ctxt->lineno, vstring_str(buf)); 530 return (MAC_PARSE_ERROR); 531 } 532 n = atoi(vstring_str(buf)); 533 if (n < 1) { 534 msg_warn("pcre map %s, line %d: out of range replacement index \"%s\"", 535 ctxt->mapname, ctxt->lineno, vstring_str(buf)); 536 return (MAC_PARSE_ERROR); 537 } 538 if (n > ctxt->max_sub) 539 ctxt->max_sub = n; 540 } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) { 541 if (ctxt->literal) 542 msg_panic("pcre map %s, line %d: multiple literals but no $number", 543 ctxt->mapname, ctxt->lineno); 544 ctxt->literal = mystrdup(vstring_str(buf)); 545 } 546 return (MAC_PARSE_OK); 547 } 548 549 /* dict_pcre_compile - compile pattern */ 550 551 static int dict_pcre_compile(const char *mapname, int lineno, 552 DICT_PCRE_REGEXP *pattern, 553 DICT_PCRE_ENGINE *engine) 554 { 555 const char *error; 556 int errptr; 557 558 engine->pattern = pcre_compile(pattern->regexp, pattern->options, 559 &error, &errptr, NULL); 560 if (engine->pattern == 0) { 561 msg_warn("pcre map %s, line %d: error in regex at offset %d: %s", 562 mapname, lineno, errptr, error); 563 return (0); 564 } 565 engine->hints = pcre_study(engine->pattern, 0, &error); 566 if (error != 0) { 567 msg_warn("pcre map %s, line %d: error while studying regex: %s", 568 mapname, lineno, error); 569 myfree((void *) engine->pattern); 570 return (0); 571 } 572 return (1); 573 } 574 575 /* dict_pcre_rule_alloc - fill in a generic rule structure */ 576 577 static DICT_PCRE_RULE *dict_pcre_rule_alloc(int op, int lineno, size_t size) 578 { 579 DICT_PCRE_RULE *rule; 580 581 rule = (DICT_PCRE_RULE *) mymalloc(size); 582 rule->op = op; 583 rule->lineno = lineno; 584 rule->next = 0; 585 586 return (rule); 587 } 588 589 /* dict_pcre_parse_rule - parse and compile one rule */ 590 591 static DICT_PCRE_RULE *dict_pcre_parse_rule(DICT *dict, const char *mapname, 592 int lineno, char *line, 593 int nesting) 594 { 595 char *p; 596 int actual_sub; 597 598 p = line; 599 600 /* 601 * An ordinary match rule takes one pattern and replacement text. 602 */ 603 if (!ISALNUM(*p)) { 604 DICT_PCRE_REGEXP regexp; 605 DICT_PCRE_ENGINE engine; 606 DICT_PCRE_PRESCAN_CONTEXT prescan_context; 607 DICT_PCRE_MATCH_RULE *match_rule; 608 609 /* 610 * Get the pattern string and options. 611 */ 612 if (dict_pcre_get_pattern(mapname, lineno, &p, ®exp) == 0) 613 return (0); 614 615 /* 616 * Get the replacement text. 617 */ 618 while (*p && ISSPACE(*p)) 619 ++p; 620 if (!*p) 621 msg_warn("pcre map %s, line %d: no replacement text: " 622 "using empty string", mapname, lineno); 623 624 /* 625 * Sanity check the $number instances in the replacement text. 626 */ 627 prescan_context.mapname = mapname; 628 prescan_context.lineno = lineno; 629 prescan_context.max_sub = 0; 630 prescan_context.literal = 0; 631 632 /* 633 * The optimizer will eliminate code duplication and/or dead code. 634 */ 635 #define CREATE_MATCHOP_ERROR_RETURN(rval) do { \ 636 if (prescan_context.literal) \ 637 myfree(prescan_context.literal); \ 638 return (rval); \ 639 } while (0) 640 641 if (dict->flags & DICT_FLAG_SRC_RHS_IS_FILE) { 642 VSTRING *base64_buf; 643 char *err; 644 645 if ((base64_buf = dict_file_to_b64(dict, p)) == 0) { 646 err = dict_file_get_error(dict); 647 msg_warn("pcre map %s, line %d: %s: skipping this rule", 648 mapname, lineno, err); 649 myfree(err); 650 CREATE_MATCHOP_ERROR_RETURN(0); 651 } 652 p = vstring_str(base64_buf); 653 } 654 if (mac_parse(p, dict_pcre_prescan, (void *) &prescan_context) 655 & MAC_PARSE_ERROR) { 656 msg_warn("pcre map %s, line %d: bad replacement syntax: " 657 "skipping this rule", mapname, lineno); 658 CREATE_MATCHOP_ERROR_RETURN(0); 659 } 660 661 /* 662 * Substring replacement not possible with negative regexps. 663 */ 664 if (prescan_context.max_sub > 0 && regexp.match == 0) { 665 msg_warn("pcre map %s, line %d: $number found in negative match " 666 "replacement text: skipping this rule", mapname, lineno); 667 CREATE_MATCHOP_ERROR_RETURN(0); 668 } 669 if (prescan_context.max_sub > 0 && (dict->flags & DICT_FLAG_NO_REGSUB)) { 670 msg_warn("pcre map %s, line %d: " 671 "regular expression substitution is not allowed: " 672 "skipping this rule", mapname, lineno); 673 CREATE_MATCHOP_ERROR_RETURN(0); 674 } 675 676 /* 677 * Compile the pattern. 678 */ 679 if (dict_pcre_compile(mapname, lineno, ®exp, &engine) == 0) 680 CREATE_MATCHOP_ERROR_RETURN(0); 681 #ifdef PCRE_INFO_CAPTURECOUNT 682 if (pcre_fullinfo(engine.pattern, engine.hints, 683 PCRE_INFO_CAPTURECOUNT, 684 (void *) &actual_sub) != 0) 685 msg_panic("pcre map %s, line %d: pcre_fullinfo failed", 686 mapname, lineno); 687 if (prescan_context.max_sub > actual_sub) { 688 msg_warn("pcre map %s, line %d: out of range replacement index \"%d\": " 689 "skipping this rule", mapname, lineno, 690 (int) prescan_context.max_sub); 691 if (engine.pattern) 692 myfree((void *) engine.pattern); 693 if (engine.hints) 694 DICT_PCRE_FREE_STUDY(engine.hints); 695 CREATE_MATCHOP_ERROR_RETURN(0); 696 } 697 #endif 698 699 /* 700 * Save the result. 701 */ 702 match_rule = (DICT_PCRE_MATCH_RULE *) 703 dict_pcre_rule_alloc(DICT_PCRE_OP_MATCH, lineno, 704 sizeof(DICT_PCRE_MATCH_RULE)); 705 match_rule->match = regexp.match; 706 match_rule->max_sub = prescan_context.max_sub; 707 if (prescan_context.literal) 708 match_rule->replacement = prescan_context.literal; 709 else 710 match_rule->replacement = mystrdup(p); 711 match_rule->pattern = engine.pattern; 712 match_rule->hints = engine.hints; 713 return ((DICT_PCRE_RULE *) match_rule); 714 } 715 716 /* 717 * The IF operator takes one pattern but no replacement text. 718 */ 719 else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) { 720 DICT_PCRE_REGEXP regexp; 721 DICT_PCRE_ENGINE engine; 722 DICT_PCRE_IF_RULE *if_rule; 723 724 p += 2; 725 726 /* 727 * Get the pattern. 728 */ 729 while (*p && ISSPACE(*p)) 730 p++; 731 if (!dict_pcre_get_pattern(mapname, lineno, &p, ®exp)) 732 return (0); 733 734 /* 735 * Warn about out-of-place text. 736 */ 737 while (*p && ISSPACE(*p)) 738 ++p; 739 if (*p) { 740 msg_warn("pcre map %s, line %d: ignoring extra text after " 741 "IF statement: \"%s\"", mapname, lineno, p); 742 msg_warn("pcre map %s, line %d: do not prepend whitespace" 743 " to statements between IF and ENDIF", mapname, lineno); 744 } 745 746 /* 747 * Compile the pattern. 748 */ 749 if (dict_pcre_compile(mapname, lineno, ®exp, &engine) == 0) 750 return (0); 751 752 /* 753 * Save the result. 754 */ 755 if_rule = (DICT_PCRE_IF_RULE *) 756 dict_pcre_rule_alloc(DICT_PCRE_OP_IF, lineno, 757 sizeof(DICT_PCRE_IF_RULE)); 758 if_rule->match = regexp.match; 759 if_rule->pattern = engine.pattern; 760 if_rule->hints = engine.hints; 761 if_rule->endif_rule = 0; 762 return ((DICT_PCRE_RULE *) if_rule); 763 } 764 765 /* 766 * The ENDIF operator takes no patterns and no replacement text. 767 */ 768 else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) { 769 DICT_PCRE_RULE *rule; 770 771 p += 5; 772 773 /* 774 * Warn about out-of-place ENDIFs. 775 */ 776 if (nesting == 0) { 777 msg_warn("pcre map %s, line %d: ignoring ENDIF without matching IF", 778 mapname, lineno); 779 return (0); 780 } 781 782 /* 783 * Warn about out-of-place text. 784 */ 785 while (*p && ISSPACE(*p)) 786 ++p; 787 if (*p) 788 msg_warn("pcre map %s, line %d: ignoring extra text after ENDIF", 789 mapname, lineno); 790 791 /* 792 * Save the result. 793 */ 794 rule = dict_pcre_rule_alloc(DICT_PCRE_OP_ENDIF, lineno, 795 sizeof(DICT_PCRE_RULE)); 796 return (rule); 797 } 798 799 /* 800 * Unrecognized input. 801 */ 802 else { 803 msg_warn("pcre map %s, line %d: ignoring unrecognized request", 804 mapname, lineno); 805 return (0); 806 } 807 } 808 809 /* dict_pcre_open - load and compile a file containing regular expressions */ 810 811 DICT *dict_pcre_open(const char *mapname, int open_flags, int dict_flags) 812 { 813 const char myname[] = "dict_pcre_open"; 814 DICT_PCRE *dict_pcre; 815 VSTREAM *map_fp = 0; 816 struct stat st; 817 VSTRING *line_buffer = 0; 818 DICT_PCRE_RULE *last_rule = 0; 819 DICT_PCRE_RULE *rule; 820 int last_line = 0; 821 int lineno; 822 int nesting = 0; 823 char *p; 824 DICT_PCRE_RULE **rule_stack = 0; 825 MVECT mvect; 826 827 /* 828 * Let the optimizer worry about eliminating redundant code. 829 */ 830 #define DICT_PCRE_OPEN_RETURN(d) do { \ 831 DICT *__d = (d); \ 832 if (map_fp != 0) \ 833 vstream_fclose(map_fp); \ 834 if (line_buffer != 0) \ 835 vstring_free(line_buffer); \ 836 return (__d); \ 837 } while (0) 838 839 /* 840 * Sanity checks. 841 */ 842 if (open_flags != O_RDONLY) 843 DICT_PCRE_OPEN_RETURN(dict_surrogate(DICT_TYPE_PCRE, mapname, 844 open_flags, dict_flags, 845 "%s:%s map requires O_RDONLY access mode", 846 DICT_TYPE_PCRE, mapname)); 847 848 /* 849 * Open the configuration file. 850 */ 851 if ((map_fp = vstream_fopen(mapname, O_RDONLY, 0)) == 0) 852 DICT_PCRE_OPEN_RETURN(dict_surrogate(DICT_TYPE_PCRE, mapname, 853 open_flags, dict_flags, 854 "open %s: %m", mapname)); 855 if (fstat(vstream_fileno(map_fp), &st) < 0) 856 msg_fatal("fstat %s: %m", mapname); 857 858 line_buffer = vstring_alloc(100); 859 860 dict_pcre = (DICT_PCRE *) dict_alloc(DICT_TYPE_PCRE, mapname, 861 sizeof(*dict_pcre)); 862 dict_pcre->dict.lookup = dict_pcre_lookup; 863 dict_pcre->dict.close = dict_pcre_close; 864 dict_pcre->dict.flags = dict_flags | DICT_FLAG_PATTERN; 865 if (dict_flags & DICT_FLAG_FOLD_MUL) 866 dict_pcre->dict.fold_buf = vstring_alloc(10); 867 dict_pcre->head = 0; 868 dict_pcre->expansion_buf = 0; 869 870 if (dict_pcre_init == 0) { 871 pcre_malloc = (void *(*) (size_t)) mymalloc; 872 pcre_free = (void (*) (void *)) myfree; 873 dict_pcre_init = 1; 874 } 875 dict_pcre->dict.owner.uid = st.st_uid; 876 dict_pcre->dict.owner.status = (st.st_uid != 0); 877 878 /* 879 * Parse the pcre table. 880 */ 881 while (readllines(line_buffer, map_fp, &last_line, &lineno)) { 882 p = vstring_str(line_buffer); 883 trimblanks(p, 0)[0] = 0; /* Trim space at end */ 884 if (*p == 0) 885 continue; 886 rule = dict_pcre_parse_rule(&dict_pcre->dict, mapname, lineno, 887 p, nesting); 888 if (rule == 0) 889 continue; 890 if (rule->op == DICT_PCRE_OP_IF) { 891 if (rule_stack == 0) 892 rule_stack = (DICT_PCRE_RULE **) mvect_alloc(&mvect, 893 sizeof(*rule_stack), nesting + 1, 894 (MVECT_FN) 0, (MVECT_FN) 0); 895 else 896 rule_stack = 897 (DICT_PCRE_RULE **) mvect_realloc(&mvect, nesting + 1); 898 rule_stack[nesting] = rule; 899 nesting++; 900 } else if (rule->op == DICT_PCRE_OP_ENDIF) { 901 DICT_PCRE_IF_RULE *if_rule; 902 903 if (nesting-- <= 0) 904 /* Already handled in dict_pcre_parse_rule(). */ 905 msg_panic("%s: ENDIF without IF", myname); 906 if (rule_stack[nesting]->op != DICT_PCRE_OP_IF) 907 msg_panic("%s: unexpected rule stack element type %d", 908 myname, rule_stack[nesting]->op); 909 if_rule = (DICT_PCRE_IF_RULE *) rule_stack[nesting]; 910 if_rule->endif_rule = rule; 911 } 912 if (last_rule == 0) 913 dict_pcre->head = rule; 914 else 915 last_rule->next = rule; 916 last_rule = rule; 917 } 918 919 while (nesting-- > 0) 920 msg_warn("pcre map %s, line %d: IF has no matching ENDIF", 921 mapname, rule_stack[nesting]->lineno); 922 923 if (rule_stack) 924 (void) mvect_free(&mvect); 925 926 dict_file_purge_buffers(&dict_pcre->dict); 927 DICT_PCRE_OPEN_RETURN(DICT_DEBUG (&dict_pcre->dict)); 928 } 929 930 #endif /* HAS_PCRE */ 931