1 /* $NetBSD: dict_pcre.c,v 1.4 2022/10/08 16:12:50 christos Exp $ */ 2 3 /*++ 4 /* NAME 5 /* dict_pcre 3 6 /* SUMMARY 7 /* dictionary manager interface to PCRE regular expression library 8 /* SYNOPSIS 9 /* #include <dict_pcre.h> 10 /* 11 /* DICT *dict_pcre_open(name, dummy, dict_flags) 12 /* const char *name; 13 /* int dummy; 14 /* int dict_flags; 15 /* DESCRIPTION 16 /* dict_pcre_open() opens the named file and compiles the contained 17 /* regular expressions. The result object can be used to match strings 18 /* against the table. 19 /* SEE ALSO 20 /* dict(3) generic dictionary manager 21 /* AUTHOR(S) 22 /* Andrew McNamara 23 /* andrewm@connect.com.au 24 /* connect.com.au Pty. Ltd. 25 /* Level 3, 213 Miller St 26 /* North Sydney, NSW, Australia 27 /* 28 /* Wietse Venema 29 /* IBM T.J. Watson Research 30 /* P.O. Box 704 31 /* Yorktown Heights, NY 10598, USA 32 /* 33 /* Wietse Venema 34 /* Google, Inc. 35 /* 111 8th Avenue 36 /* New York, NY 10011, USA 37 /*--*/ 38 39 #include "sys_defs.h" 40 41 #ifdef HAS_PCRE 42 43 /* System library. */ 44 45 #include <sys/stat.h> 46 #include <stdio.h> /* sprintf() prototype */ 47 #include <stdlib.h> 48 #include <unistd.h> 49 #include <string.h> 50 #include <ctype.h> 51 52 #ifdef STRCASECMP_IN_STRINGS_H 53 #include <strings.h> 54 #endif 55 56 #if HAS_PCRE == 1 57 #include <pcre.h> 58 #elif HAS_PCRE == 2 59 #define PCRE2_CODE_UNIT_WIDTH 8 60 #include <pcre2.h> 61 #else 62 #error "define HAS_PCRE=2 or HAS_PCRE=1" 63 #endif 64 65 /* Utility library. */ 66 67 #include "mymalloc.h" 68 #include "msg.h" 69 #include "safe.h" 70 #include "vstream.h" 71 #include "vstring.h" 72 #include "stringops.h" 73 #include "readlline.h" 74 #include "dict.h" 75 #include "dict_pcre.h" 76 #include "mac_parse.h" 77 #include "warn_stat.h" 78 #include "mvect.h" 79 80 /* 81 * Backwards compatibility. 82 */ 83 #if HAS_PCRE == 1 84 /* PCRE Legacy JIT supprt. */ 85 #ifdef PCRE_STUDY_JIT_COMPILE 86 #define DICT_PCRE_FREE_STUDY(x) pcre_free_study(x) 87 #else 88 #define DICT_PCRE_FREE_STUDY(x) pcre_free((char *) (x)) 89 #endif 90 91 /* PCRE Compiled pattern. */ 92 #define DICT_PCRE_CODE pcre 93 #define DICT_PCRE_CODE_FREE(x) myfree((void *) (x)) 94 95 /* Old-style hints versus new-style match_data. */ 96 #define DICT_PCRE_MATCH_HINT_TYPE pcre_extra * 97 #define DICT_PCRE_MATCH_HINT_NAME hints 98 #define DICT_PCRE_MATCH_HINT(x) ((x)->DICT_PCRE_MATCH_HINT_NAME) 99 #define DICT_PCRE_MATCH_HINT_FREE(x) do { \ 100 if (DICT_PCRE_MATCH_HINT(x)) \ 101 DICT_PCRE_FREE_STUDY(DICT_PCRE_MATCH_HINT(x)); \ 102 } while (0) 103 104 /* PCRE Pattern options. */ 105 #define DICT_PCRE_CASELESS PCRE_CASELESS 106 #define DICT_PCRE_MULTILINE PCRE_MULTILINE 107 #define DICT_PCRE_DOTALL PCRE_DOTALL 108 #define DICT_PCRE_EXTENDED PCRE_EXTENDED 109 #define DICT_PCRE_ANCHORED PCRE_ANCHORED 110 #define DICT_PCRE_DOLLAR_ENDONLY PCRE_DOLLAR_ENDONLY 111 #define DICT_PCRE_UNGREEDY PCRE_UNGREEDY 112 #define DICT_PCRE_EXTRA PCRE_EXTRA 113 114 /* PCRE Number of captures in pattern. */ 115 #ifdef PCRE_INFO_CAPTURECOUNT 116 #define DICT_PCRE_CAPTURECOUNT_T int 117 #endif 118 119 #else /* HAS_PCRE */ 120 121 /* PCRE2 Compiled pattern. */ 122 #define DICT_PCRE_CODE pcre2_code 123 #define DICT_PCRE_CODE_FREE(x) pcre2_code_free(x) 124 125 /* PCRE2 Old-style hints versus new-style match_data. */ 126 #define DICT_PCRE_MATCH_HINT_TYPE pcre2_match_data * 127 #define DICT_PCRE_MATCH_HINT_NAME match_data 128 #define DICT_PCRE_MATCH_HINT(x) ((x)->DICT_PCRE_MATCH_HINT_NAME) 129 #define DICT_PCRE_MATCH_HINT_FREE(x) \ 130 pcre2_match_data_free(DICT_PCRE_MATCH_HINT(x)) 131 132 /* PCRE2 Pattern options. */ 133 #define DICT_PCRE_CASELESS PCRE2_CASELESS 134 #define DICT_PCRE_MULTILINE PCRE2_MULTILINE 135 #define DICT_PCRE_DOTALL PCRE2_DOTALL 136 #define DICT_PCRE_EXTENDED PCRE2_EXTENDED 137 #define DICT_PCRE_ANCHORED PCRE2_ANCHORED 138 #define DICT_PCRE_DOLLAR_ENDONLY PCRE2_DOLLAR_ENDONLY 139 #define DICT_PCRE_UNGREEDY PCRE2_UNGREEDY 140 #define DICT_PCRE_EXTRA 0 141 142 /* PCRE2 Number of captures in pattern. */ 143 #define DICT_PCRE_CAPTURECOUNT_T uint32_t 144 145 #endif /* HAS_PCRE */ 146 147 /* 148 * Support for IF/ENDIF based on an idea by Bert Driehuis. 149 */ 150 #define DICT_PCRE_OP_MATCH 1 /* Match this regexp */ 151 #define DICT_PCRE_OP_IF 2 /* Increase if/endif nesting on match */ 152 #define DICT_PCRE_OP_ENDIF 3 /* Decrease if/endif nesting on match */ 153 154 /* 155 * Max strings captured by regexp - essentially the max number of (..) 156 */ 157 #if HAS_PCRE == 1 158 #define PCRE_MAX_CAPTURE 99 159 #endif 160 161 /* 162 * Regular expression before and after compilation. 163 */ 164 typedef struct { 165 char *regexp; /* regular expression */ 166 int options; /* options */ 167 int match; /* positive or negative match */ 168 } DICT_PCRE_REGEXP; 169 170 typedef struct { 171 DICT_PCRE_CODE *pattern; /* the compiled pattern */ 172 DICT_PCRE_MATCH_HINT_TYPE DICT_PCRE_MATCH_HINT_NAME; 173 } DICT_PCRE_ENGINE; 174 175 /* 176 * Compiled generic rule, and subclasses that derive from it. 177 */ 178 typedef struct DICT_PCRE_RULE { 179 int op; /* DICT_PCRE_OP_MATCH/IF/ENDIF */ 180 int lineno; /* source file line number */ 181 struct DICT_PCRE_RULE *next; /* next rule in dict */ 182 } DICT_PCRE_RULE; 183 184 typedef struct { 185 DICT_PCRE_RULE rule; /* generic part */ 186 DICT_PCRE_CODE *pattern; /* compiled pattern */ 187 DICT_PCRE_MATCH_HINT_TYPE DICT_PCRE_MATCH_HINT_NAME; 188 char *replacement; /* replacement string */ 189 int match; /* positive or negative match */ 190 size_t max_sub; /* largest $number in replacement */ 191 } DICT_PCRE_MATCH_RULE; 192 193 typedef struct { 194 DICT_PCRE_RULE rule; /* generic members */ 195 DICT_PCRE_CODE *pattern; /* compiled pattern */ 196 DICT_PCRE_MATCH_HINT_TYPE DICT_PCRE_MATCH_HINT_NAME; 197 int match; /* positive or negative match */ 198 struct DICT_PCRE_RULE *endif_rule; /* matching endif rule */ 199 } DICT_PCRE_IF_RULE; 200 201 /* 202 * PCRE map. 203 */ 204 typedef struct { 205 DICT dict; /* generic members */ 206 DICT_PCRE_RULE *head; 207 VSTRING *expansion_buf; /* lookup result */ 208 } DICT_PCRE; 209 210 #if HAS_PCRE == 1 211 static int dict_pcre_init = 0; /* flag need to init pcre library */ 212 213 #endif 214 215 /* 216 * Context for $number expansion callback. 217 */ 218 typedef struct { 219 DICT_PCRE *dict_pcre; /* the dictionary handle */ 220 #if HAS_PCRE == 1 221 DICT_PCRE_MATCH_RULE *match_rule; /* the rule we matched */ 222 #endif 223 const char *lookup_string; /* string against which we match */ 224 #if HAS_PCRE == 1 225 int offsets[PCRE_MAX_CAPTURE * 3]; /* Cut substrings */ 226 #else /* HAS_PCRE */ 227 PCRE2_SIZE *ovector; /* matched string offsets */ 228 #endif /* HAS_PCRE */ 229 int matches; /* Count of cuts */ 230 } DICT_PCRE_EXPAND_CONTEXT; 231 232 /* 233 * Context for $number pre-scan callback. 234 */ 235 typedef struct { 236 const char *mapname; /* name of regexp map */ 237 int lineno; /* where in file */ 238 size_t max_sub; /* Largest $n seen */ 239 char *literal; /* constant result, $$ -> $ */ 240 } DICT_PCRE_PRESCAN_CONTEXT; 241 242 /* 243 * Compatibility. 244 */ 245 #ifndef MAC_PARSE_OK 246 #define MAC_PARSE_OK 0 247 #endif 248 249 /* 250 * Macros to make dense code more accessible. 251 */ 252 #define NULL_STARTOFFSET (0) 253 #define NULL_EXEC_OPTIONS (0) 254 255 /* dict_pcre_expand - replace $number with matched text */ 256 257 static int dict_pcre_expand(int type, VSTRING *buf, void *ptr) 258 { 259 DICT_PCRE_EXPAND_CONTEXT *ctxt = (DICT_PCRE_EXPAND_CONTEXT *) ptr; 260 DICT_PCRE *dict_pcre = ctxt->dict_pcre; 261 int n; 262 263 #if HAS_PCRE == 1 264 DICT_PCRE_MATCH_RULE *match_rule = ctxt->match_rule; 265 const char *pp; 266 int ret; 267 268 #else 269 PCRE2_SPTR start; 270 PCRE2_SIZE length; 271 272 #endif 273 274 /* 275 * Replace $0-${99} with strings cut from matched text. 276 */ 277 if (type == MAC_PARSE_VARNAME) { 278 n = atoi(vstring_str(buf)); 279 #if HAS_PCRE == 1 280 ret = pcre_get_substring(ctxt->lookup_string, ctxt->offsets, 281 ctxt->matches, n, &pp); 282 if (ret < 0) { 283 if (ret == PCRE_ERROR_NOSUBSTRING) 284 return (MAC_PARSE_UNDEF); 285 else 286 msg_fatal("pcre map %s, line %d: pcre_get_substring error: %d", 287 dict_pcre->dict.name, match_rule->rule.lineno, ret); 288 } 289 if (*pp == 0) { 290 myfree((void *) pp); 291 return (MAC_PARSE_UNDEF); 292 } 293 vstring_strcat(dict_pcre->expansion_buf, pp); 294 myfree((void *) pp); 295 return (MAC_PARSE_OK); 296 #else 297 start = (unsigned char *) ctxt->lookup_string + ctxt->ovector[2 * n]; 298 length = ctxt->ovector[2 * n + 1] - ctxt->ovector[2 * n]; 299 if (length == 0) 300 return (MAC_PARSE_UNDEF); 301 vstring_strncat(dict_pcre->expansion_buf, (char *) start, length); 302 return (MAC_PARSE_OK); 303 #endif 304 } 305 306 /* 307 * Straight text - duplicate with no substitution. 308 */ 309 else { 310 vstring_strcat(dict_pcre->expansion_buf, vstring_str(buf)); 311 return (MAC_PARSE_OK); 312 } 313 } 314 315 #if HAS_PCRE == 2 316 317 #define DICT_PCRE_GET_ERROR_BUF_LEN 256 318 319 /* dict_pcre_get_error - convert PCRE2 error number or text */ 320 321 static char *dict_pcre_get_error(VSTRING *buf, int errval) 322 { 323 ssize_t len; 324 325 VSTRING_SPACE(buf, DICT_PCRE_GET_ERROR_BUF_LEN); 326 if ((len = pcre2_get_error_message(errval, 327 (unsigned char *) vstring_str(buf), 328 DICT_PCRE_GET_ERROR_BUF_LEN)) > 0) { 329 vstring_set_payload_size(buf, len); 330 } else 331 vstring_sprintf(buf, "unexpected pcre2 error code %d", errval); 332 return (vstring_str(buf)); 333 } 334 335 #endif /* HAS_PCRE == 2 */ 336 337 /* dict_pcre_exec_error - report matching error */ 338 339 static void dict_pcre_exec_error(const char *mapname, int lineno, int errval) 340 { 341 #if HAS_PCRE == 1 342 switch (errval) { 343 case 0: 344 msg_warn("pcre map %s, line %d: too many (...)", 345 mapname, lineno); 346 return; 347 case PCRE_ERROR_NULL: 348 case PCRE_ERROR_BADOPTION: 349 msg_warn("pcre map %s, line %d: bad args to re_exec", 350 mapname, lineno); 351 return; 352 case PCRE_ERROR_BADMAGIC: 353 case PCRE_ERROR_UNKNOWN_NODE: 354 msg_warn("pcre map %s, line %d: corrupt compiled regexp", 355 mapname, lineno); 356 return; 357 #ifdef PCRE_ERROR_NOMEMORY 358 case PCRE_ERROR_NOMEMORY: 359 msg_warn("pcre map %s, line %d: out of memory", 360 mapname, lineno); 361 return; 362 #endif 363 #ifdef PCRE_ERROR_MATCHLIMIT 364 case PCRE_ERROR_MATCHLIMIT: 365 msg_warn("pcre map %s, line %d: backtracking limit exceeded", 366 mapname, lineno); 367 return; 368 #endif 369 #ifdef PCRE_ERROR_BADUTF8 370 case PCRE_ERROR_BADUTF8: 371 msg_warn("pcre map %s, line %d: bad UTF-8 sequence in search string", 372 mapname, lineno); 373 return; 374 #endif 375 #ifdef PCRE_ERROR_BADUTF8_OFFSET 376 case PCRE_ERROR_BADUTF8_OFFSET: 377 msg_warn("pcre map %s, line %d: bad UTF-8 start offset in search string", 378 mapname, lineno); 379 return; 380 #endif 381 default: 382 msg_warn("pcre map %s, line %d: unknown pcre_exec error: %d", 383 mapname, lineno, errval); 384 return; 385 } 386 #else /* HAS_PCRE */ 387 VSTRING *buf = vstring_alloc(DICT_PCRE_GET_ERROR_BUF_LEN); 388 389 msg_warn("pcre map %s, line %d: %s", mapname, lineno, 390 dict_pcre_get_error(buf, errval)); 391 vstring_free(buf); 392 #endif /* HAS_PCRE */ 393 } 394 395 /* 396 * Inlined to reduce function call overhead in the time-critical loop. 397 */ 398 #if HAS_PCRE == 1 399 #define DICT_PCRE_EXEC(ctxt, map, line, pattern, hints, match, str, len) \ 400 ((ctxt).matches = pcre_exec((pattern), (hints), (str), (len), \ 401 NULL_STARTOFFSET, NULL_EXEC_OPTIONS, \ 402 (ctxt).offsets, PCRE_MAX_CAPTURE * 3), \ 403 (ctxt).matches > 0 ? (match) : \ 404 (ctxt).matches == PCRE_ERROR_NOMATCH ? !(match) : \ 405 (dict_pcre_exec_error((map), (line), (ctxt).matches), 0)) 406 #else 407 #define DICT_PCRE_EXEC(ctxt, map, line, pattern, match_data, match, str, len) \ 408 ((ctxt).matches = pcre2_match((pattern), (unsigned char *) (str), (len), \ 409 NULL_STARTOFFSET, NULL_EXEC_OPTIONS, \ 410 (match_data), (pcre2_match_context *) 0), \ 411 (ctxt).matches > 0 ? (match) : \ 412 (ctxt).matches == PCRE2_ERROR_NOMATCH ? !(match) : \ 413 (dict_pcre_exec_error((map), (line), (ctxt).matches), 0)) 414 #endif 415 416 /* dict_pcre_lookup - match string and perform optional substitution */ 417 418 static const char *dict_pcre_lookup(DICT *dict, const char *lookup_string) 419 { 420 DICT_PCRE *dict_pcre = (DICT_PCRE *) dict; 421 DICT_PCRE_RULE *rule; 422 DICT_PCRE_IF_RULE *if_rule; 423 DICT_PCRE_MATCH_RULE *match_rule; 424 int lookup_len = strlen(lookup_string); 425 DICT_PCRE_EXPAND_CONTEXT ctxt; 426 427 dict->error = 0; 428 429 if (msg_verbose) 430 msg_info("dict_pcre_lookup: %s: %s", dict->name, lookup_string); 431 432 /* 433 * Optionally fold the key. 434 */ 435 if (dict->flags & DICT_FLAG_FOLD_MUL) { 436 if (dict->fold_buf == 0) 437 dict->fold_buf = vstring_alloc(10); 438 vstring_strcpy(dict->fold_buf, lookup_string); 439 lookup_string = lowercase(vstring_str(dict->fold_buf)); 440 } 441 for (rule = dict_pcre->head; rule; rule = rule->next) { 442 443 switch (rule->op) { 444 445 /* 446 * Search for a matching expression. 447 */ 448 case DICT_PCRE_OP_MATCH: 449 match_rule = (DICT_PCRE_MATCH_RULE *) rule; 450 if (!DICT_PCRE_EXEC(ctxt, dict->name, rule->lineno, 451 match_rule->pattern, 452 DICT_PCRE_MATCH_HINT(match_rule), 453 match_rule->match, lookup_string, lookup_len)) 454 continue; 455 456 /* 457 * Skip $number substitutions when the replacement text contains 458 * no $number strings, as learned during the compile time 459 * pre-scan. The pre-scan already replaced $$ by $. 460 */ 461 if (match_rule->max_sub == 0) 462 return match_rule->replacement; 463 464 /* 465 * We've got a match. Perform substitution on replacement string. 466 */ 467 if (dict_pcre->expansion_buf == 0) 468 dict_pcre->expansion_buf = vstring_alloc(10); 469 VSTRING_RESET(dict_pcre->expansion_buf); 470 ctxt.dict_pcre = dict_pcre; 471 #if HAS_PCRE == 1 472 ctxt.match_rule = match_rule; 473 #else 474 ctxt.ovector = pcre2_get_ovector_pointer(match_rule->match_data); 475 #endif 476 ctxt.lookup_string = lookup_string; 477 478 if (mac_parse(match_rule->replacement, dict_pcre_expand, 479 (void *) &ctxt) & MAC_PARSE_ERROR) 480 msg_fatal("pcre map %s, line %d: bad replacement syntax", 481 dict->name, rule->lineno); 482 483 VSTRING_TERMINATE(dict_pcre->expansion_buf); 484 return (vstring_str(dict_pcre->expansion_buf)); 485 486 /* 487 * Conditional. XXX We provide space for matched substring info 488 * because PCRE uses part of it as workspace for backtracking. 489 * PCRE will allocate memory if it runs out of backtracking 490 * storage. 491 */ 492 case DICT_PCRE_OP_IF: 493 if_rule = (DICT_PCRE_IF_RULE *) rule; 494 if (DICT_PCRE_EXEC(ctxt, dict->name, rule->lineno, 495 if_rule->pattern, 496 DICT_PCRE_MATCH_HINT(if_rule), 497 if_rule->match, lookup_string, lookup_len)) 498 continue; 499 /* An IF without matching ENDIF has no "endif" rule. */ 500 if ((rule = if_rule->endif_rule) == 0) 501 return (0); 502 /* FALLTHROUGH */ 503 504 /* 505 * ENDIF after IF. 506 */ 507 case DICT_PCRE_OP_ENDIF: 508 continue; 509 510 default: 511 msg_panic("dict_pcre_lookup: impossible operation %d", rule->op); 512 } 513 } 514 return (0); 515 } 516 517 /* dict_pcre_close - close pcre dictionary */ 518 519 static void dict_pcre_close(DICT *dict) 520 { 521 DICT_PCRE *dict_pcre = (DICT_PCRE *) dict; 522 DICT_PCRE_RULE *rule; 523 DICT_PCRE_RULE *next; 524 DICT_PCRE_MATCH_RULE *match_rule; 525 DICT_PCRE_IF_RULE *if_rule; 526 527 for (rule = dict_pcre->head; rule; rule = next) { 528 next = rule->next; 529 switch (rule->op) { 530 case DICT_PCRE_OP_MATCH: 531 match_rule = (DICT_PCRE_MATCH_RULE *) rule; 532 if (match_rule->pattern) 533 DICT_PCRE_CODE_FREE(match_rule->pattern); 534 DICT_PCRE_MATCH_HINT_FREE(match_rule); 535 if (match_rule->replacement) 536 myfree((void *) match_rule->replacement); 537 break; 538 case DICT_PCRE_OP_IF: 539 if_rule = (DICT_PCRE_IF_RULE *) rule; 540 if (if_rule->pattern) 541 DICT_PCRE_CODE_FREE(if_rule->pattern); 542 DICT_PCRE_MATCH_HINT_FREE(if_rule); 543 break; 544 case DICT_PCRE_OP_ENDIF: 545 break; 546 default: 547 msg_panic("dict_pcre_close: unknown operation %d", rule->op); 548 } 549 myfree((void *) rule); 550 } 551 if (dict_pcre->expansion_buf) 552 vstring_free(dict_pcre->expansion_buf); 553 if (dict->fold_buf) 554 vstring_free(dict->fold_buf); 555 dict_free(dict); 556 } 557 558 /* dict_pcre_get_pattern - extract pattern from rule */ 559 560 static int dict_pcre_get_pattern(const char *mapname, int lineno, char **bufp, 561 DICT_PCRE_REGEXP *pattern) 562 { 563 char *p = *bufp; 564 char re_delimiter; 565 566 /* 567 * Process negation operators. 568 */ 569 pattern->match = 1; 570 for (;;) { 571 if (*p == '!') 572 pattern->match = !pattern->match; 573 else if (!ISSPACE(*p)) 574 break; 575 p++; 576 } 577 if (*p == 0) { 578 msg_warn("pcre map %s, line %d: no regexp: skipping this rule", 579 mapname, lineno); 580 return (0); 581 } 582 re_delimiter = *p++; 583 pattern->regexp = p; 584 585 /* 586 * Search for second delimiter, handling backslash escape. 587 */ 588 while (*p) { 589 if (*p == '\\') { 590 ++p; 591 if (*p == 0) 592 break; 593 } else if (*p == re_delimiter) 594 break; 595 ++p; 596 } 597 598 if (!*p) { 599 msg_warn("pcre map %s, line %d: no closing regexp delimiter \"%c\": " 600 "ignoring this rule", mapname, lineno, re_delimiter); 601 return (0); 602 } 603 *p++ = 0; /* Null term the regexp */ 604 605 /* 606 * Parse any regexp options. 607 */ 608 pattern->options = DICT_PCRE_CASELESS | DICT_PCRE_DOTALL; 609 while (*p && !ISSPACE(*p)) { 610 switch (*p) { 611 case 'i': 612 pattern->options ^= DICT_PCRE_CASELESS; 613 break; 614 case 'm': 615 pattern->options ^= DICT_PCRE_MULTILINE; 616 break; 617 case 's': 618 pattern->options ^= DICT_PCRE_DOTALL; 619 break; 620 case 'x': 621 pattern->options ^= DICT_PCRE_EXTENDED; 622 break; 623 case 'A': 624 pattern->options ^= DICT_PCRE_ANCHORED; 625 break; 626 case 'E': 627 pattern->options ^= DICT_PCRE_DOLLAR_ENDONLY; 628 break; 629 case 'U': 630 pattern->options ^= DICT_PCRE_UNGREEDY; 631 break; 632 case 'X': 633 #if DICT_PCRE_EXTRA != 0 634 pattern->options ^= DICT_PCRE_EXTRA; 635 #else 636 msg_warn("pcre map %s, line %d: ignoring obsolete regexp " 637 "option \"%c\"", mapname, lineno, *p); 638 #endif 639 break; 640 default: 641 msg_warn("pcre map %s, line %d: unknown regexp option \"%c\": " 642 "skipping this rule", mapname, lineno, *p); 643 return (0); 644 } 645 ++p; 646 } 647 *bufp = p; 648 return (1); 649 } 650 651 /* dict_pcre_prescan - sanity check $number instances in replacement text */ 652 653 static int dict_pcre_prescan(int type, VSTRING *buf, void *context) 654 { 655 DICT_PCRE_PRESCAN_CONTEXT *ctxt = (DICT_PCRE_PRESCAN_CONTEXT *) context; 656 size_t n; 657 658 /* 659 * Keep a copy of literal text (with $$ already replaced by $) if and 660 * only if the replacement text contains no $number expression. This way 661 * we can avoid having to scan the replacement text at lookup time. 662 */ 663 if (type == MAC_PARSE_VARNAME) { 664 if (ctxt->literal) { 665 myfree(ctxt->literal); 666 ctxt->literal = 0; 667 } 668 if (!alldig(vstring_str(buf))) { 669 msg_warn("pcre map %s, line %d: non-numeric replacement index \"%s\"", 670 ctxt->mapname, ctxt->lineno, vstring_str(buf)); 671 return (MAC_PARSE_ERROR); 672 } 673 n = atoi(vstring_str(buf)); 674 if (n < 1) { 675 msg_warn("pcre map %s, line %d: out of range replacement index \"%s\"", 676 ctxt->mapname, ctxt->lineno, vstring_str(buf)); 677 return (MAC_PARSE_ERROR); 678 } 679 if (n > ctxt->max_sub) 680 ctxt->max_sub = n; 681 } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) { 682 if (ctxt->literal) 683 msg_panic("pcre map %s, line %d: multiple literals but no $number", 684 ctxt->mapname, ctxt->lineno); 685 ctxt->literal = mystrdup(vstring_str(buf)); 686 } 687 return (MAC_PARSE_OK); 688 } 689 690 /* dict_pcre_compile - compile pattern */ 691 692 static int dict_pcre_compile(const char *mapname, int lineno, 693 DICT_PCRE_REGEXP *pattern, 694 DICT_PCRE_ENGINE *engine) 695 { 696 #if HAS_PCRE == 1 697 const char *error; 698 int errptr; 699 700 engine->pattern = pcre_compile(pattern->regexp, pattern->options, 701 &error, &errptr, NULL); 702 if (engine->pattern == 0) { 703 msg_warn("pcre map %s, line %d: error in regex at offset %d: %s", 704 mapname, lineno, errptr, error); 705 return (0); 706 } 707 engine->hints = pcre_study(engine->pattern, 0, &error); 708 if (error != 0) { 709 msg_warn("pcre map %s, line %d: error while studying regex: %s", 710 mapname, lineno, error); 711 DICT_PCRE_CODE_FREE(engine->pattern); 712 return (0); 713 } 714 #else 715 int error; 716 size_t errptr; 717 718 engine->pattern = pcre2_compile((unsigned char *) pattern->regexp, 719 PCRE2_ZERO_TERMINATED, 720 pattern->options, &error, &errptr, NULL); 721 if (engine->pattern == 0) { 722 VSTRING *buf = vstring_alloc(DICT_PCRE_GET_ERROR_BUF_LEN); 723 724 msg_warn("pcre map %s, line %d: error in regex at offset %lu: %s", 725 mapname, lineno, (unsigned long) errptr, 726 dict_pcre_get_error(buf, error)); 727 vstring_free(buf); 728 return (0); 729 } 730 engine->match_data = pcre2_match_data_create_from_pattern( 731 engine->pattern, (void *) 0); 732 #endif 733 return (1); 734 } 735 736 /* dict_pcre_rule_alloc - fill in a generic rule structure */ 737 738 static DICT_PCRE_RULE *dict_pcre_rule_alloc(int op, int lineno, size_t size) 739 { 740 DICT_PCRE_RULE *rule; 741 742 rule = (DICT_PCRE_RULE *) mymalloc(size); 743 rule->op = op; 744 rule->lineno = lineno; 745 rule->next = 0; 746 747 return (rule); 748 } 749 750 /* dict_pcre_parse_rule - parse and compile one rule */ 751 752 static DICT_PCRE_RULE *dict_pcre_parse_rule(DICT *dict, const char *mapname, 753 int lineno, char *line, 754 int nesting) 755 { 756 char *p; 757 758 #ifdef DICT_PCRE_CAPTURECOUNT_T 759 DICT_PCRE_CAPTURECOUNT_T actual_sub; 760 761 #endif 762 #if 0 763 uint32_t namecount; 764 765 #endif 766 767 p = line; 768 769 /* 770 * An ordinary match rule takes one pattern and replacement text. 771 */ 772 if (!ISALNUM(*p)) { 773 DICT_PCRE_REGEXP regexp; 774 DICT_PCRE_ENGINE engine; 775 DICT_PCRE_PRESCAN_CONTEXT prescan_context; 776 DICT_PCRE_MATCH_RULE *match_rule; 777 778 /* 779 * Get the pattern string and options. 780 */ 781 if (dict_pcre_get_pattern(mapname, lineno, &p, ®exp) == 0) 782 return (0); 783 784 /* 785 * Get the replacement text. 786 */ 787 while (*p && ISSPACE(*p)) 788 ++p; 789 if (!*p) 790 msg_warn("pcre map %s, line %d: no replacement text: " 791 "using empty string", mapname, lineno); 792 793 /* 794 * Sanity check the $number instances in the replacement text. 795 */ 796 prescan_context.mapname = mapname; 797 prescan_context.lineno = lineno; 798 prescan_context.max_sub = 0; 799 prescan_context.literal = 0; 800 801 /* 802 * The optimizer will eliminate code duplication and/or dead code. 803 */ 804 #define CREATE_MATCHOP_ERROR_RETURN(rval) do { \ 805 if (prescan_context.literal) \ 806 myfree(prescan_context.literal); \ 807 return (rval); \ 808 } while (0) 809 810 if (dict->flags & DICT_FLAG_SRC_RHS_IS_FILE) { 811 VSTRING *base64_buf; 812 char *err; 813 814 if ((base64_buf = dict_file_to_b64(dict, p)) == 0) { 815 err = dict_file_get_error(dict); 816 msg_warn("pcre map %s, line %d: %s: skipping this rule", 817 mapname, lineno, err); 818 myfree(err); 819 CREATE_MATCHOP_ERROR_RETURN(0); 820 } 821 p = vstring_str(base64_buf); 822 } 823 if (mac_parse(p, dict_pcre_prescan, (void *) &prescan_context) 824 & MAC_PARSE_ERROR) { 825 msg_warn("pcre map %s, line %d: bad replacement syntax: " 826 "skipping this rule", mapname, lineno); 827 CREATE_MATCHOP_ERROR_RETURN(0); 828 } 829 830 /* 831 * Substring replacement not possible with negative regexps. 832 */ 833 if (prescan_context.max_sub > 0 && regexp.match == 0) { 834 msg_warn("pcre map %s, line %d: $number found in negative match " 835 "replacement text: skipping this rule", mapname, lineno); 836 CREATE_MATCHOP_ERROR_RETURN(0); 837 } 838 if (prescan_context.max_sub > 0 && (dict->flags & DICT_FLAG_NO_REGSUB)) { 839 msg_warn("pcre map %s, line %d: " 840 "regular expression substitution is not allowed: " 841 "skipping this rule", mapname, lineno); 842 CREATE_MATCHOP_ERROR_RETURN(0); 843 } 844 845 /* 846 * Compile the pattern. 847 */ 848 if (dict_pcre_compile(mapname, lineno, ®exp, &engine) == 0) 849 CREATE_MATCHOP_ERROR_RETURN(0); 850 #ifdef DICT_PCRE_CAPTURECOUNT_T 851 #if HAS_PCRE == 1 852 if (pcre_fullinfo(engine.pattern, engine.hints, 853 PCRE_INFO_CAPTURECOUNT, 854 (void *) &actual_sub) != 0) 855 msg_panic("pcre map %s, line %d: pcre_fullinfo failed", 856 mapname, lineno); 857 #else /* HAS_PCRE */ 858 #if 0 859 if (pcre2_pattern_info( 860 engine.pattern, PCRE2_INFO_NAMECOUNT, &namecount) != 0) 861 msg_panic("pcre map %s, line %d: pcre2_pattern_info failed", 862 mapname, lineno); 863 if (namecount > 0) { 864 msg_warn("pcre map %s, line %d: named substrings are not supported", 865 mapname, lineno); 866 if (engine.pattern) 867 DICT_PCRE_CODE_FREE(engine.pattern); 868 DICT_PCRE_MATCH_HINT_FREE(&engine); 869 CREATE_MATCHOP_ERROR_RETURN(0); 870 } 871 #endif 872 if (pcre2_pattern_info(engine.pattern, PCRE2_INFO_CAPTURECOUNT, 873 (void *) &actual_sub) != 0) 874 msg_panic("pcre map %s, line %d: pcre2_pattern_info failed", 875 mapname, lineno); 876 #endif /* HAS_PCRE */ 877 if (prescan_context.max_sub > actual_sub) { 878 msg_warn("pcre map %s, line %d: out of range replacement index \"%d\": " 879 "skipping this rule", mapname, lineno, 880 (int) prescan_context.max_sub); 881 if (engine.pattern) 882 DICT_PCRE_CODE_FREE(engine.pattern); 883 DICT_PCRE_MATCH_HINT_FREE(&engine); 884 CREATE_MATCHOP_ERROR_RETURN(0); 885 } 886 #endif /* DICT_PCRE_CAPTURECOUNT_T */ 887 888 /* 889 * Save the result. 890 */ 891 match_rule = (DICT_PCRE_MATCH_RULE *) 892 dict_pcre_rule_alloc(DICT_PCRE_OP_MATCH, lineno, 893 sizeof(DICT_PCRE_MATCH_RULE)); 894 match_rule->match = regexp.match; 895 match_rule->max_sub = prescan_context.max_sub; 896 if (prescan_context.literal) 897 match_rule->replacement = prescan_context.literal; 898 else 899 match_rule->replacement = mystrdup(p); 900 match_rule->pattern = engine.pattern; 901 DICT_PCRE_MATCH_HINT(match_rule) = DICT_PCRE_MATCH_HINT(&engine); 902 return ((DICT_PCRE_RULE *) match_rule); 903 } 904 905 /* 906 * The IF operator takes one pattern but no replacement text. 907 */ 908 else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) { 909 DICT_PCRE_REGEXP regexp; 910 DICT_PCRE_ENGINE engine; 911 DICT_PCRE_IF_RULE *if_rule; 912 913 p += 2; 914 915 /* 916 * Get the pattern. 917 */ 918 while (*p && ISSPACE(*p)) 919 p++; 920 if (!dict_pcre_get_pattern(mapname, lineno, &p, ®exp)) 921 return (0); 922 923 /* 924 * Warn about out-of-place text. 925 */ 926 while (*p && ISSPACE(*p)) 927 ++p; 928 if (*p) { 929 msg_warn("pcre map %s, line %d: ignoring extra text after " 930 "IF statement: \"%s\"", mapname, lineno, p); 931 msg_warn("pcre map %s, line %d: do not prepend whitespace" 932 " to statements between IF and ENDIF", mapname, lineno); 933 } 934 935 /* 936 * Compile the pattern. 937 */ 938 if (dict_pcre_compile(mapname, lineno, ®exp, &engine) == 0) 939 return (0); 940 941 /* 942 * Save the result. 943 */ 944 if_rule = (DICT_PCRE_IF_RULE *) 945 dict_pcre_rule_alloc(DICT_PCRE_OP_IF, lineno, 946 sizeof(DICT_PCRE_IF_RULE)); 947 if_rule->match = regexp.match; 948 if_rule->pattern = engine.pattern; 949 DICT_PCRE_MATCH_HINT(if_rule) = DICT_PCRE_MATCH_HINT(&engine); 950 if_rule->endif_rule = 0; 951 return ((DICT_PCRE_RULE *) if_rule); 952 } 953 954 /* 955 * The ENDIF operator takes no patterns and no replacement text. 956 */ 957 else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) { 958 DICT_PCRE_RULE *rule; 959 960 p += 5; 961 962 /* 963 * Warn about out-of-place ENDIFs. 964 */ 965 if (nesting == 0) { 966 msg_warn("pcre map %s, line %d: ignoring ENDIF without matching IF", 967 mapname, lineno); 968 return (0); 969 } 970 971 /* 972 * Warn about out-of-place text. 973 */ 974 while (*p && ISSPACE(*p)) 975 ++p; 976 if (*p) 977 msg_warn("pcre map %s, line %d: ignoring extra text after ENDIF", 978 mapname, lineno); 979 980 /* 981 * Save the result. 982 */ 983 rule = dict_pcre_rule_alloc(DICT_PCRE_OP_ENDIF, lineno, 984 sizeof(DICT_PCRE_RULE)); 985 return (rule); 986 } 987 988 /* 989 * Unrecognized input. 990 */ 991 else { 992 msg_warn("pcre map %s, line %d: ignoring unrecognized request", 993 mapname, lineno); 994 return (0); 995 } 996 } 997 998 /* dict_pcre_open - load and compile a file containing regular expressions */ 999 1000 DICT *dict_pcre_open(const char *mapname, int open_flags, int dict_flags) 1001 { 1002 const char myname[] = "dict_pcre_open"; 1003 DICT_PCRE *dict_pcre; 1004 VSTREAM *map_fp = 0; 1005 struct stat st; 1006 VSTRING *why = 0; 1007 VSTRING *line_buffer = 0; 1008 DICT_PCRE_RULE *last_rule = 0; 1009 DICT_PCRE_RULE *rule; 1010 int last_line = 0; 1011 int lineno; 1012 int nesting = 0; 1013 char *p; 1014 DICT_PCRE_RULE **rule_stack = 0; 1015 MVECT mvect; 1016 1017 /* 1018 * Let the optimizer worry about eliminating redundant code. 1019 */ 1020 #define DICT_PCRE_OPEN_RETURN(d) do { \ 1021 DICT *__d = (d); \ 1022 if (map_fp != 0) \ 1023 vstream_fclose(map_fp); \ 1024 if (line_buffer != 0) \ 1025 vstring_free(line_buffer); \ 1026 if (why != 0) \ 1027 vstring_free(why); \ 1028 return (__d); \ 1029 } while (0) 1030 1031 /* 1032 * Sanity checks. 1033 */ 1034 if (open_flags != O_RDONLY) 1035 DICT_PCRE_OPEN_RETURN(dict_surrogate(DICT_TYPE_PCRE, mapname, 1036 open_flags, dict_flags, 1037 "%s:%s map requires O_RDONLY access mode", 1038 DICT_TYPE_PCRE, mapname)); 1039 1040 /* 1041 * Open the configuration file. 1042 */ 1043 if ((map_fp = dict_stream_open(DICT_TYPE_PCRE, mapname, O_RDONLY, 1044 dict_flags, &st, &why)) == 0) 1045 DICT_PCRE_OPEN_RETURN(dict_surrogate(DICT_TYPE_PCRE, mapname, 1046 open_flags, dict_flags, 1047 "%s", vstring_str(why))); 1048 line_buffer = vstring_alloc(100); 1049 1050 dict_pcre = (DICT_PCRE *) dict_alloc(DICT_TYPE_PCRE, mapname, 1051 sizeof(*dict_pcre)); 1052 dict_pcre->dict.lookup = dict_pcre_lookup; 1053 dict_pcre->dict.close = dict_pcre_close; 1054 dict_pcre->dict.flags = dict_flags | DICT_FLAG_PATTERN; 1055 if (dict_flags & DICT_FLAG_FOLD_MUL) 1056 dict_pcre->dict.fold_buf = vstring_alloc(10); 1057 dict_pcre->head = 0; 1058 dict_pcre->expansion_buf = 0; 1059 1060 #if HAS_PCRE == 1 1061 if (dict_pcre_init == 0) { 1062 pcre_malloc = (void *(*) (size_t)) mymalloc; 1063 pcre_free = (void (*) (void *)) myfree; 1064 dict_pcre_init = 1; 1065 } 1066 #endif 1067 dict_pcre->dict.owner.uid = st.st_uid; 1068 dict_pcre->dict.owner.status = (st.st_uid != 0); 1069 1070 /* 1071 * Parse the pcre table. 1072 */ 1073 while (readllines(line_buffer, map_fp, &last_line, &lineno)) { 1074 p = vstring_str(line_buffer); 1075 trimblanks(p, 0)[0] = 0; /* Trim space at end */ 1076 if (*p == 0) 1077 continue; 1078 rule = dict_pcre_parse_rule(&dict_pcre->dict, mapname, lineno, 1079 p, nesting); 1080 if (rule == 0) 1081 continue; 1082 if (rule->op == DICT_PCRE_OP_IF) { 1083 if (rule_stack == 0) 1084 rule_stack = (DICT_PCRE_RULE **) mvect_alloc(&mvect, 1085 sizeof(*rule_stack), nesting + 1, 1086 (MVECT_FN) 0, (MVECT_FN) 0); 1087 else 1088 rule_stack = 1089 (DICT_PCRE_RULE **) mvect_realloc(&mvect, nesting + 1); 1090 rule_stack[nesting] = rule; 1091 nesting++; 1092 } else if (rule->op == DICT_PCRE_OP_ENDIF) { 1093 DICT_PCRE_IF_RULE *if_rule; 1094 1095 if (nesting-- <= 0) 1096 /* Already handled in dict_pcre_parse_rule(). */ 1097 msg_panic("%s: ENDIF without IF", myname); 1098 if (rule_stack[nesting]->op != DICT_PCRE_OP_IF) 1099 msg_panic("%s: unexpected rule stack element type %d", 1100 myname, rule_stack[nesting]->op); 1101 if_rule = (DICT_PCRE_IF_RULE *) rule_stack[nesting]; 1102 if_rule->endif_rule = rule; 1103 } 1104 if (last_rule == 0) 1105 dict_pcre->head = rule; 1106 else 1107 last_rule->next = rule; 1108 last_rule = rule; 1109 } 1110 1111 while (nesting-- > 0) 1112 msg_warn("pcre map %s, line %d: IF has no matching ENDIF", 1113 mapname, rule_stack[nesting]->lineno); 1114 1115 if (rule_stack) 1116 (void) mvect_free(&mvect); 1117 1118 dict_file_purge_buffers(&dict_pcre->dict); 1119 DICT_PCRE_OPEN_RETURN(DICT_DEBUG (&dict_pcre->dict)); 1120 } 1121 1122 #endif /* HAS_PCRE */ 1123