1 /* $NetBSD: dict_pcre.c,v 1.5 2023/12/23 20:30:46 christos Exp $ */ 2 3 /*++ 4 /* NAME 5 /* dict_pcre 3 6 /* SUMMARY 7 /* dictionary manager interface to PCRE regular expression library 8 /* SYNOPSIS 9 /* #include <dict_pcre.h> 10 /* 11 /* DICT *dict_pcre_open(name, dummy, dict_flags) 12 /* const char *name; 13 /* int dummy; 14 /* int dict_flags; 15 /* DESCRIPTION 16 /* dict_pcre_open() opens the named file and compiles the contained 17 /* regular expressions. The result object can be used to match strings 18 /* against the table. 19 /* SEE ALSO 20 /* dict(3) generic dictionary manager 21 /* pcre_table(5) PCRE table configuration 22 /* AUTHOR(S) 23 /* Andrew McNamara 24 /* andrewm@connect.com.au 25 /* connect.com.au Pty. Ltd. 26 /* Level 3, 213 Miller St 27 /* North Sydney, NSW, Australia 28 /* 29 /* Wietse Venema 30 /* IBM T.J. Watson Research 31 /* P.O. Box 704 32 /* Yorktown Heights, NY 10598, USA 33 /* 34 /* Wietse Venema 35 /* Google, Inc. 36 /* 111 8th Avenue 37 /* New York, NY 10011, USA 38 /*--*/ 39 40 #include "sys_defs.h" 41 42 #ifdef HAS_PCRE 43 44 /* System library. */ 45 46 #include <sys/stat.h> 47 #include <stdio.h> /* sprintf() prototype */ 48 #include <stdlib.h> 49 #include <unistd.h> 50 #include <string.h> 51 #include <ctype.h> 52 53 #ifdef STRCASECMP_IN_STRINGS_H 54 #include <strings.h> 55 #endif 56 57 #if HAS_PCRE == 1 58 #include <pcre.h> 59 #elif HAS_PCRE == 2 60 #define PCRE2_CODE_UNIT_WIDTH 8 61 #include <pcre2.h> 62 #else 63 #error "define HAS_PCRE=2 or HAS_PCRE=1" 64 #endif 65 66 /* Utility library. */ 67 68 #include "mymalloc.h" 69 #include "msg.h" 70 #include "safe.h" 71 #include "vstream.h" 72 #include "vstring.h" 73 #include "stringops.h" 74 #include "readlline.h" 75 #include "dict.h" 76 #include "dict_pcre.h" 77 #include "mac_parse.h" 78 #include "warn_stat.h" 79 #include "mvect.h" 80 81 /* 82 * Backwards compatibility. 83 */ 84 #if HAS_PCRE == 1 85 /* PCRE Legacy JIT supprt. */ 86 #ifdef PCRE_STUDY_JIT_COMPILE 87 #define DICT_PCRE_FREE_STUDY(x) pcre_free_study(x) 88 #else 89 #define DICT_PCRE_FREE_STUDY(x) pcre_free((char *) (x)) 90 #endif 91 92 /* PCRE Compiled pattern. */ 93 #define DICT_PCRE_CODE pcre 94 #define DICT_PCRE_CODE_FREE(x) myfree((void *) (x)) 95 96 /* Old-style hints versus new-style match_data. */ 97 #define DICT_PCRE_MATCH_HINT_TYPE pcre_extra * 98 #define DICT_PCRE_MATCH_HINT_NAME hints 99 #define DICT_PCRE_MATCH_HINT(x) ((x)->DICT_PCRE_MATCH_HINT_NAME) 100 #define DICT_PCRE_MATCH_HINT_FREE(x) do { \ 101 if (DICT_PCRE_MATCH_HINT(x)) \ 102 DICT_PCRE_FREE_STUDY(DICT_PCRE_MATCH_HINT(x)); \ 103 } while (0) 104 105 /* PCRE Pattern options. */ 106 #define DICT_PCRE_CASELESS PCRE_CASELESS 107 #define DICT_PCRE_MULTILINE PCRE_MULTILINE 108 #define DICT_PCRE_DOTALL PCRE_DOTALL 109 #define DICT_PCRE_EXTENDED PCRE_EXTENDED 110 #define DICT_PCRE_ANCHORED PCRE_ANCHORED 111 #define DICT_PCRE_DOLLAR_ENDONLY PCRE_DOLLAR_ENDONLY 112 #define DICT_PCRE_UNGREEDY PCRE_UNGREEDY 113 #define DICT_PCRE_EXTRA PCRE_EXTRA 114 115 /* PCRE Number of captures in pattern. */ 116 #ifdef PCRE_INFO_CAPTURECOUNT 117 #define DICT_PCRE_CAPTURECOUNT_T int 118 #endif 119 120 #else /* HAS_PCRE */ 121 122 /* PCRE2 Compiled pattern. */ 123 #define DICT_PCRE_CODE pcre2_code 124 #define DICT_PCRE_CODE_FREE(x) pcre2_code_free(x) 125 126 /* PCRE2 Old-style hints versus new-style match_data. */ 127 #define DICT_PCRE_MATCH_HINT_TYPE pcre2_match_data * 128 #define DICT_PCRE_MATCH_HINT_NAME match_data 129 #define DICT_PCRE_MATCH_HINT(x) ((x)->DICT_PCRE_MATCH_HINT_NAME) 130 #define DICT_PCRE_MATCH_HINT_FREE(x) \ 131 pcre2_match_data_free(DICT_PCRE_MATCH_HINT(x)) 132 133 /* PCRE2 Pattern options. */ 134 #define DICT_PCRE_CASELESS PCRE2_CASELESS 135 #define DICT_PCRE_MULTILINE PCRE2_MULTILINE 136 #define DICT_PCRE_DOTALL PCRE2_DOTALL 137 #define DICT_PCRE_EXTENDED PCRE2_EXTENDED 138 #define DICT_PCRE_ANCHORED PCRE2_ANCHORED 139 #define DICT_PCRE_DOLLAR_ENDONLY PCRE2_DOLLAR_ENDONLY 140 #define DICT_PCRE_UNGREEDY PCRE2_UNGREEDY 141 #define DICT_PCRE_EXTRA 0 142 143 /* PCRE2 Number of captures in pattern. */ 144 #define DICT_PCRE_CAPTURECOUNT_T uint32_t 145 146 #endif /* HAS_PCRE */ 147 148 /* 149 * Support for IF/ENDIF based on an idea by Bert Driehuis. 150 */ 151 #define DICT_PCRE_OP_MATCH 1 /* Match this regexp */ 152 #define DICT_PCRE_OP_IF 2 /* Increase if/endif nesting on match */ 153 #define DICT_PCRE_OP_ENDIF 3 /* Decrease if/endif nesting on match */ 154 155 /* 156 * Max strings captured by regexp - essentially the max number of (..) 157 */ 158 #if HAS_PCRE == 1 159 #define PCRE_MAX_CAPTURE 99 160 #endif 161 162 /* 163 * Regular expression before and after compilation. 164 */ 165 typedef struct { 166 char *regexp; /* regular expression */ 167 int options; /* options */ 168 int match; /* positive or negative match */ 169 } DICT_PCRE_REGEXP; 170 171 typedef struct { 172 DICT_PCRE_CODE *pattern; /* the compiled pattern */ 173 DICT_PCRE_MATCH_HINT_TYPE DICT_PCRE_MATCH_HINT_NAME; 174 } DICT_PCRE_ENGINE; 175 176 /* 177 * Compiled generic rule, and subclasses that derive from it. 178 */ 179 typedef struct DICT_PCRE_RULE { 180 int op; /* DICT_PCRE_OP_MATCH/IF/ENDIF */ 181 int lineno; /* source file line number */ 182 struct DICT_PCRE_RULE *next; /* next rule in dict */ 183 } DICT_PCRE_RULE; 184 185 typedef struct { 186 DICT_PCRE_RULE rule; /* generic part */ 187 DICT_PCRE_CODE *pattern; /* compiled pattern */ 188 DICT_PCRE_MATCH_HINT_TYPE DICT_PCRE_MATCH_HINT_NAME; 189 char *replacement; /* replacement string */ 190 int match; /* positive or negative match */ 191 size_t max_sub; /* largest $number in replacement */ 192 } DICT_PCRE_MATCH_RULE; 193 194 typedef struct { 195 DICT_PCRE_RULE rule; /* generic members */ 196 DICT_PCRE_CODE *pattern; /* compiled pattern */ 197 DICT_PCRE_MATCH_HINT_TYPE DICT_PCRE_MATCH_HINT_NAME; 198 int match; /* positive or negative match */ 199 struct DICT_PCRE_RULE *endif_rule; /* matching endif rule */ 200 } DICT_PCRE_IF_RULE; 201 202 /* 203 * PCRE map. 204 */ 205 typedef struct { 206 DICT dict; /* generic members */ 207 DICT_PCRE_RULE *head; 208 VSTRING *expansion_buf; /* lookup result */ 209 } DICT_PCRE; 210 211 #if HAS_PCRE == 1 212 static int dict_pcre_init = 0; /* flag need to init pcre library */ 213 214 #endif 215 216 /* 217 * Context for $number expansion callback. 218 */ 219 typedef struct { 220 DICT_PCRE *dict_pcre; /* the dictionary handle */ 221 #if HAS_PCRE == 1 222 DICT_PCRE_MATCH_RULE *match_rule; /* the rule we matched */ 223 #endif 224 const char *lookup_string; /* string against which we match */ 225 #if HAS_PCRE == 1 226 int offsets[PCRE_MAX_CAPTURE * 3]; /* Cut substrings */ 227 #else /* HAS_PCRE */ 228 PCRE2_SIZE *ovector; /* matched string offsets */ 229 #endif /* HAS_PCRE */ 230 int matches; /* Count of cuts */ 231 } DICT_PCRE_EXPAND_CONTEXT; 232 233 /* 234 * Context for $number pre-scan callback. 235 */ 236 typedef struct { 237 const char *mapname; /* name of regexp map */ 238 int lineno; /* where in file */ 239 size_t max_sub; /* Largest $n seen */ 240 char *literal; /* constant result, $$ -> $ */ 241 } DICT_PCRE_PRESCAN_CONTEXT; 242 243 /* 244 * Compatibility. 245 */ 246 #ifndef MAC_PARSE_OK 247 #define MAC_PARSE_OK 0 248 #endif 249 250 /* 251 * Macros to make dense code more accessible. 252 */ 253 #define NULL_STARTOFFSET (0) 254 #define NULL_EXEC_OPTIONS (0) 255 256 /* dict_pcre_expand - replace $number with matched text */ 257 258 static int dict_pcre_expand(int type, VSTRING *buf, void *ptr) 259 { 260 DICT_PCRE_EXPAND_CONTEXT *ctxt = (DICT_PCRE_EXPAND_CONTEXT *) ptr; 261 DICT_PCRE *dict_pcre = ctxt->dict_pcre; 262 int n; 263 264 #if HAS_PCRE == 1 265 DICT_PCRE_MATCH_RULE *match_rule = ctxt->match_rule; 266 const char *pp; 267 int ret; 268 269 #else 270 PCRE2_SPTR start; 271 PCRE2_SIZE length; 272 273 #endif 274 275 /* 276 * Replace $0-${99} with strings cut from matched text. 277 */ 278 if (type == MAC_PARSE_VARNAME) { 279 n = atoi(vstring_str(buf)); 280 #if HAS_PCRE == 1 281 ret = pcre_get_substring(ctxt->lookup_string, ctxt->offsets, 282 ctxt->matches, n, &pp); 283 if (ret < 0) { 284 if (ret == PCRE_ERROR_NOSUBSTRING) 285 return (MAC_PARSE_UNDEF); 286 else 287 msg_fatal("pcre map %s, line %d: pcre_get_substring error: %d", 288 dict_pcre->dict.name, match_rule->rule.lineno, ret); 289 } 290 if (*pp == 0) { 291 myfree((void *) pp); 292 return (MAC_PARSE_UNDEF); 293 } 294 vstring_strcat(dict_pcre->expansion_buf, pp); 295 myfree((void *) pp); 296 return (MAC_PARSE_OK); 297 #else 298 start = (unsigned char *) ctxt->lookup_string + ctxt->ovector[2 * n]; 299 length = ctxt->ovector[2 * n + 1] - ctxt->ovector[2 * n]; 300 if (length == 0) 301 return (MAC_PARSE_UNDEF); 302 vstring_strncat(dict_pcre->expansion_buf, (char *) start, length); 303 return (MAC_PARSE_OK); 304 #endif 305 } 306 307 /* 308 * Straight text - duplicate with no substitution. 309 */ 310 else { 311 vstring_strcat(dict_pcre->expansion_buf, vstring_str(buf)); 312 return (MAC_PARSE_OK); 313 } 314 } 315 316 #if HAS_PCRE == 2 317 318 #define DICT_PCRE_GET_ERROR_BUF_LEN 256 319 320 /* dict_pcre_get_error - convert PCRE2 error number or text */ 321 322 static char *dict_pcre_get_error(VSTRING *buf, int errval) 323 { 324 ssize_t len; 325 326 VSTRING_SPACE(buf, DICT_PCRE_GET_ERROR_BUF_LEN); 327 if ((len = pcre2_get_error_message(errval, 328 (unsigned char *) vstring_str(buf), 329 DICT_PCRE_GET_ERROR_BUF_LEN)) > 0) { 330 vstring_set_payload_size(buf, len); 331 } else 332 vstring_sprintf(buf, "unexpected pcre2 error code %d", errval); 333 return (vstring_str(buf)); 334 } 335 336 #endif /* HAS_PCRE == 2 */ 337 338 /* dict_pcre_exec_error - report matching error */ 339 340 static void dict_pcre_exec_error(const char *mapname, int lineno, int errval) 341 { 342 #if HAS_PCRE == 1 343 switch (errval) { 344 case 0: 345 msg_warn("pcre map %s, line %d: too many (...)", 346 mapname, lineno); 347 return; 348 case PCRE_ERROR_NULL: 349 case PCRE_ERROR_BADOPTION: 350 msg_warn("pcre map %s, line %d: bad args to re_exec", 351 mapname, lineno); 352 return; 353 case PCRE_ERROR_BADMAGIC: 354 case PCRE_ERROR_UNKNOWN_NODE: 355 msg_warn("pcre map %s, line %d: corrupt compiled regexp", 356 mapname, lineno); 357 return; 358 #ifdef PCRE_ERROR_NOMEMORY 359 case PCRE_ERROR_NOMEMORY: 360 msg_warn("pcre map %s, line %d: out of memory", 361 mapname, lineno); 362 return; 363 #endif 364 #ifdef PCRE_ERROR_MATCHLIMIT 365 case PCRE_ERROR_MATCHLIMIT: 366 msg_warn("pcre map %s, line %d: backtracking limit exceeded", 367 mapname, lineno); 368 return; 369 #endif 370 #ifdef PCRE_ERROR_BADUTF8 371 case PCRE_ERROR_BADUTF8: 372 msg_warn("pcre map %s, line %d: bad UTF-8 sequence in search string", 373 mapname, lineno); 374 return; 375 #endif 376 #ifdef PCRE_ERROR_BADUTF8_OFFSET 377 case PCRE_ERROR_BADUTF8_OFFSET: 378 msg_warn("pcre map %s, line %d: bad UTF-8 start offset in search string", 379 mapname, lineno); 380 return; 381 #endif 382 default: 383 msg_warn("pcre map %s, line %d: unknown pcre_exec error: %d", 384 mapname, lineno, errval); 385 return; 386 } 387 #else /* HAS_PCRE */ 388 VSTRING *buf = vstring_alloc(DICT_PCRE_GET_ERROR_BUF_LEN); 389 390 msg_warn("pcre map %s, line %d: %s", mapname, lineno, 391 dict_pcre_get_error(buf, errval)); 392 vstring_free(buf); 393 #endif /* HAS_PCRE */ 394 } 395 396 /* 397 * Inlined to reduce function call overhead in the time-critical loop. 398 */ 399 #if HAS_PCRE == 1 400 #define DICT_PCRE_EXEC(ctxt, map, line, pattern, hints, match, str, len) \ 401 ((ctxt).matches = pcre_exec((pattern), (hints), (str), (len), \ 402 NULL_STARTOFFSET, NULL_EXEC_OPTIONS, \ 403 (ctxt).offsets, PCRE_MAX_CAPTURE * 3), \ 404 (ctxt).matches > 0 ? (match) : \ 405 (ctxt).matches == PCRE_ERROR_NOMATCH ? !(match) : \ 406 (dict_pcre_exec_error((map), (line), (ctxt).matches), 0)) 407 #else 408 #define DICT_PCRE_EXEC(ctxt, map, line, pattern, match_data, match, str, len) \ 409 ((ctxt).matches = pcre2_match((pattern), (unsigned char *) (str), (len), \ 410 NULL_STARTOFFSET, NULL_EXEC_OPTIONS, \ 411 (match_data), (pcre2_match_context *) 0), \ 412 (ctxt).matches > 0 ? (match) : \ 413 (ctxt).matches == PCRE2_ERROR_NOMATCH ? !(match) : \ 414 (dict_pcre_exec_error((map), (line), (ctxt).matches), 0)) 415 #endif 416 417 /* dict_pcre_lookup - match string and perform optional substitution */ 418 419 static const char *dict_pcre_lookup(DICT *dict, const char *lookup_string) 420 { 421 DICT_PCRE *dict_pcre = (DICT_PCRE *) dict; 422 DICT_PCRE_RULE *rule; 423 DICT_PCRE_IF_RULE *if_rule; 424 DICT_PCRE_MATCH_RULE *match_rule; 425 int lookup_len = strlen(lookup_string); 426 DICT_PCRE_EXPAND_CONTEXT ctxt; 427 428 dict->error = 0; 429 430 if (msg_verbose) 431 msg_info("dict_pcre_lookup: %s: %s", dict->name, lookup_string); 432 433 /* 434 * Optionally fold the key. 435 */ 436 if (dict->flags & DICT_FLAG_FOLD_MUL) { 437 if (dict->fold_buf == 0) 438 dict->fold_buf = vstring_alloc(10); 439 vstring_strcpy(dict->fold_buf, lookup_string); 440 lookup_string = lowercase(vstring_str(dict->fold_buf)); 441 } 442 for (rule = dict_pcre->head; rule; rule = rule->next) { 443 444 switch (rule->op) { 445 446 /* 447 * Search for a matching expression. 448 */ 449 case DICT_PCRE_OP_MATCH: 450 match_rule = (DICT_PCRE_MATCH_RULE *) rule; 451 if (!DICT_PCRE_EXEC(ctxt, dict->name, rule->lineno, 452 match_rule->pattern, 453 DICT_PCRE_MATCH_HINT(match_rule), 454 match_rule->match, lookup_string, lookup_len)) 455 continue; 456 457 /* 458 * Skip $number substitutions when the replacement text contains 459 * no $number strings, as learned during the compile time 460 * pre-scan. The pre-scan already replaced $$ by $. 461 */ 462 if (match_rule->max_sub == 0) 463 return match_rule->replacement; 464 465 /* 466 * We've got a match. Perform substitution on replacement string. 467 */ 468 if (dict_pcre->expansion_buf == 0) 469 dict_pcre->expansion_buf = vstring_alloc(10); 470 VSTRING_RESET(dict_pcre->expansion_buf); 471 ctxt.dict_pcre = dict_pcre; 472 #if HAS_PCRE == 1 473 ctxt.match_rule = match_rule; 474 #else 475 ctxt.ovector = pcre2_get_ovector_pointer(match_rule->match_data); 476 #endif 477 ctxt.lookup_string = lookup_string; 478 479 if (mac_parse(match_rule->replacement, dict_pcre_expand, 480 (void *) &ctxt) & MAC_PARSE_ERROR) 481 msg_fatal("pcre map %s, line %d: bad replacement syntax", 482 dict->name, rule->lineno); 483 484 VSTRING_TERMINATE(dict_pcre->expansion_buf); 485 return (vstring_str(dict_pcre->expansion_buf)); 486 487 /* 488 * Conditional. XXX We provide space for matched substring info 489 * because PCRE uses part of it as workspace for backtracking. 490 * PCRE will allocate memory if it runs out of backtracking 491 * storage. 492 */ 493 case DICT_PCRE_OP_IF: 494 if_rule = (DICT_PCRE_IF_RULE *) rule; 495 if (DICT_PCRE_EXEC(ctxt, dict->name, rule->lineno, 496 if_rule->pattern, 497 DICT_PCRE_MATCH_HINT(if_rule), 498 if_rule->match, lookup_string, lookup_len)) 499 continue; 500 /* An IF without matching ENDIF has no "endif" rule. */ 501 if ((rule = if_rule->endif_rule) == 0) 502 return (0); 503 /* FALLTHROUGH */ 504 505 /* 506 * ENDIF after IF. 507 */ 508 case DICT_PCRE_OP_ENDIF: 509 continue; 510 511 default: 512 msg_panic("dict_pcre_lookup: impossible operation %d", rule->op); 513 } 514 } 515 return (0); 516 } 517 518 /* dict_pcre_close - close pcre dictionary */ 519 520 static void dict_pcre_close(DICT *dict) 521 { 522 DICT_PCRE *dict_pcre = (DICT_PCRE *) dict; 523 DICT_PCRE_RULE *rule; 524 DICT_PCRE_RULE *next; 525 DICT_PCRE_MATCH_RULE *match_rule; 526 DICT_PCRE_IF_RULE *if_rule; 527 528 for (rule = dict_pcre->head; rule; rule = next) { 529 next = rule->next; 530 switch (rule->op) { 531 case DICT_PCRE_OP_MATCH: 532 match_rule = (DICT_PCRE_MATCH_RULE *) rule; 533 if (match_rule->pattern) 534 DICT_PCRE_CODE_FREE(match_rule->pattern); 535 DICT_PCRE_MATCH_HINT_FREE(match_rule); 536 if (match_rule->replacement) 537 myfree((void *) match_rule->replacement); 538 break; 539 case DICT_PCRE_OP_IF: 540 if_rule = (DICT_PCRE_IF_RULE *) rule; 541 if (if_rule->pattern) 542 DICT_PCRE_CODE_FREE(if_rule->pattern); 543 DICT_PCRE_MATCH_HINT_FREE(if_rule); 544 break; 545 case DICT_PCRE_OP_ENDIF: 546 break; 547 default: 548 msg_panic("dict_pcre_close: unknown operation %d", rule->op); 549 } 550 myfree((void *) rule); 551 } 552 if (dict_pcre->expansion_buf) 553 vstring_free(dict_pcre->expansion_buf); 554 if (dict->fold_buf) 555 vstring_free(dict->fold_buf); 556 dict_free(dict); 557 } 558 559 /* dict_pcre_get_pattern - extract pattern from rule */ 560 561 static int dict_pcre_get_pattern(const char *mapname, int lineno, char **bufp, 562 DICT_PCRE_REGEXP *pattern) 563 { 564 char *p = *bufp; 565 char re_delimiter; 566 567 /* 568 * Process negation operators. 569 */ 570 pattern->match = 1; 571 for (;;) { 572 if (*p == '!') 573 pattern->match = !pattern->match; 574 else if (!ISSPACE(*p)) 575 break; 576 p++; 577 } 578 if (*p == 0) { 579 msg_warn("pcre map %s, line %d: no regexp: skipping this rule", 580 mapname, lineno); 581 return (0); 582 } 583 re_delimiter = *p++; 584 pattern->regexp = p; 585 586 /* 587 * Search for second delimiter, handling backslash escape. 588 */ 589 while (*p) { 590 if (*p == '\\') { 591 ++p; 592 if (*p == 0) 593 break; 594 } else if (*p == re_delimiter) 595 break; 596 ++p; 597 } 598 599 if (!*p) { 600 msg_warn("pcre map %s, line %d: no closing regexp delimiter \"%c\": " 601 "ignoring this rule", mapname, lineno, re_delimiter); 602 return (0); 603 } 604 *p++ = 0; /* Null term the regexp */ 605 606 /* 607 * Parse any regexp options. 608 */ 609 pattern->options = DICT_PCRE_CASELESS | DICT_PCRE_DOTALL; 610 while (*p && !ISSPACE(*p)) { 611 switch (*p) { 612 case 'i': 613 pattern->options ^= DICT_PCRE_CASELESS; 614 break; 615 case 'm': 616 pattern->options ^= DICT_PCRE_MULTILINE; 617 break; 618 case 's': 619 pattern->options ^= DICT_PCRE_DOTALL; 620 break; 621 case 'x': 622 pattern->options ^= DICT_PCRE_EXTENDED; 623 break; 624 case 'A': 625 pattern->options ^= DICT_PCRE_ANCHORED; 626 break; 627 case 'E': 628 pattern->options ^= DICT_PCRE_DOLLAR_ENDONLY; 629 break; 630 case 'U': 631 pattern->options ^= DICT_PCRE_UNGREEDY; 632 break; 633 case 'X': 634 #if DICT_PCRE_EXTRA != 0 635 pattern->options ^= DICT_PCRE_EXTRA; 636 #else 637 msg_warn("pcre map %s, line %d: ignoring obsolete regexp " 638 "option \"%c\"", mapname, lineno, *p); 639 #endif 640 break; 641 default: 642 msg_warn("pcre map %s, line %d: unknown regexp option \"%c\": " 643 "skipping this rule", mapname, lineno, *p); 644 return (0); 645 } 646 ++p; 647 } 648 *bufp = p; 649 return (1); 650 } 651 652 /* dict_pcre_prescan - sanity check $number instances in replacement text */ 653 654 static int dict_pcre_prescan(int type, VSTRING *buf, void *context) 655 { 656 DICT_PCRE_PRESCAN_CONTEXT *ctxt = (DICT_PCRE_PRESCAN_CONTEXT *) context; 657 size_t n; 658 659 /* 660 * Keep a copy of literal text (with $$ already replaced by $) if and 661 * only if the replacement text contains no $number expression. This way 662 * we can avoid having to scan the replacement text at lookup time. 663 */ 664 if (type == MAC_PARSE_VARNAME) { 665 if (ctxt->literal) { 666 myfree(ctxt->literal); 667 ctxt->literal = 0; 668 } 669 if (!alldig(vstring_str(buf))) { 670 msg_warn("pcre map %s, line %d: non-numeric replacement index \"%s\"", 671 ctxt->mapname, ctxt->lineno, vstring_str(buf)); 672 return (MAC_PARSE_ERROR); 673 } 674 n = atoi(vstring_str(buf)); 675 if (n < 1) { 676 msg_warn("pcre map %s, line %d: out of range replacement index \"%s\"", 677 ctxt->mapname, ctxt->lineno, vstring_str(buf)); 678 return (MAC_PARSE_ERROR); 679 } 680 if (n > ctxt->max_sub) 681 ctxt->max_sub = n; 682 } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) { 683 if (ctxt->literal) 684 msg_panic("pcre map %s, line %d: multiple literals but no $number", 685 ctxt->mapname, ctxt->lineno); 686 ctxt->literal = mystrdup(vstring_str(buf)); 687 } 688 return (MAC_PARSE_OK); 689 } 690 691 /* dict_pcre_compile - compile pattern */ 692 693 static int dict_pcre_compile(const char *mapname, int lineno, 694 DICT_PCRE_REGEXP *pattern, 695 DICT_PCRE_ENGINE *engine) 696 { 697 #if HAS_PCRE == 1 698 const char *error; 699 int errptr; 700 701 engine->pattern = pcre_compile(pattern->regexp, pattern->options, 702 &error, &errptr, NULL); 703 if (engine->pattern == 0) { 704 msg_warn("pcre map %s, line %d: error in regex at offset %d: %s", 705 mapname, lineno, errptr, error); 706 return (0); 707 } 708 engine->hints = pcre_study(engine->pattern, 0, &error); 709 if (error != 0) { 710 msg_warn("pcre map %s, line %d: error while studying regex: %s", 711 mapname, lineno, error); 712 DICT_PCRE_CODE_FREE(engine->pattern); 713 return (0); 714 } 715 #else 716 int error; 717 size_t errptr; 718 719 engine->pattern = pcre2_compile((unsigned char *) pattern->regexp, 720 PCRE2_ZERO_TERMINATED, 721 pattern->options, &error, &errptr, NULL); 722 if (engine->pattern == 0) { 723 VSTRING *buf = vstring_alloc(DICT_PCRE_GET_ERROR_BUF_LEN); 724 725 msg_warn("pcre map %s, line %d: error in regex at offset %lu: %s", 726 mapname, lineno, (unsigned long) errptr, 727 dict_pcre_get_error(buf, error)); 728 vstring_free(buf); 729 return (0); 730 } 731 engine->match_data = pcre2_match_data_create_from_pattern( 732 engine->pattern, (void *) 0); 733 #endif 734 return (1); 735 } 736 737 /* dict_pcre_rule_alloc - fill in a generic rule structure */ 738 739 static DICT_PCRE_RULE *dict_pcre_rule_alloc(int op, int lineno, size_t size) 740 { 741 DICT_PCRE_RULE *rule; 742 743 rule = (DICT_PCRE_RULE *) mymalloc(size); 744 rule->op = op; 745 rule->lineno = lineno; 746 rule->next = 0; 747 748 return (rule); 749 } 750 751 /* dict_pcre_parse_rule - parse and compile one rule */ 752 753 static DICT_PCRE_RULE *dict_pcre_parse_rule(DICT *dict, const char *mapname, 754 int lineno, char *line, 755 int nesting) 756 { 757 char *p; 758 759 #ifdef DICT_PCRE_CAPTURECOUNT_T 760 DICT_PCRE_CAPTURECOUNT_T actual_sub; 761 762 #endif 763 #if 0 764 uint32_t namecount; 765 766 #endif 767 768 p = line; 769 770 /* 771 * An ordinary match rule takes one pattern and replacement text. 772 */ 773 if (!ISALNUM(*p)) { 774 DICT_PCRE_REGEXP regexp; 775 DICT_PCRE_ENGINE engine; 776 DICT_PCRE_PRESCAN_CONTEXT prescan_context; 777 DICT_PCRE_MATCH_RULE *match_rule; 778 779 /* 780 * Get the pattern string and options. 781 */ 782 if (dict_pcre_get_pattern(mapname, lineno, &p, ®exp) == 0) 783 return (0); 784 785 /* 786 * Get the replacement text. 787 */ 788 while (*p && ISSPACE(*p)) 789 ++p; 790 if (!*p) 791 msg_warn("pcre map %s, line %d: no replacement text: " 792 "using empty string", mapname, lineno); 793 794 /* 795 * Sanity check the $number instances in the replacement text. 796 */ 797 prescan_context.mapname = mapname; 798 prescan_context.lineno = lineno; 799 prescan_context.max_sub = 0; 800 prescan_context.literal = 0; 801 802 /* 803 * The optimizer will eliminate code duplication and/or dead code. 804 */ 805 #define CREATE_MATCHOP_ERROR_RETURN(rval) do { \ 806 if (prescan_context.literal) \ 807 myfree(prescan_context.literal); \ 808 return (rval); \ 809 } while (0) 810 811 if (dict->flags & DICT_FLAG_SRC_RHS_IS_FILE) { 812 VSTRING *base64_buf; 813 char *err; 814 815 if ((base64_buf = dict_file_to_b64(dict, p)) == 0) { 816 err = dict_file_get_error(dict); 817 msg_warn("pcre map %s, line %d: %s: skipping this rule", 818 mapname, lineno, err); 819 myfree(err); 820 CREATE_MATCHOP_ERROR_RETURN(0); 821 } 822 p = vstring_str(base64_buf); 823 } 824 if (mac_parse(p, dict_pcre_prescan, (void *) &prescan_context) 825 & MAC_PARSE_ERROR) { 826 msg_warn("pcre map %s, line %d: bad replacement syntax: " 827 "skipping this rule", mapname, lineno); 828 CREATE_MATCHOP_ERROR_RETURN(0); 829 } 830 831 /* 832 * Substring replacement not possible with negative regexps. 833 */ 834 if (prescan_context.max_sub > 0 && regexp.match == 0) { 835 msg_warn("pcre map %s, line %d: $number found in negative match " 836 "replacement text: skipping this rule", mapname, lineno); 837 CREATE_MATCHOP_ERROR_RETURN(0); 838 } 839 if (prescan_context.max_sub > 0 && (dict->flags & DICT_FLAG_NO_REGSUB)) { 840 msg_warn("pcre map %s, line %d: " 841 "regular expression substitution is not allowed: " 842 "skipping this rule", mapname, lineno); 843 CREATE_MATCHOP_ERROR_RETURN(0); 844 } 845 846 /* 847 * Compile the pattern. 848 */ 849 if (dict_pcre_compile(mapname, lineno, ®exp, &engine) == 0) 850 CREATE_MATCHOP_ERROR_RETURN(0); 851 #ifdef DICT_PCRE_CAPTURECOUNT_T 852 #if HAS_PCRE == 1 853 if (pcre_fullinfo(engine.pattern, engine.hints, 854 PCRE_INFO_CAPTURECOUNT, 855 (void *) &actual_sub) != 0) 856 msg_panic("pcre map %s, line %d: pcre_fullinfo failed", 857 mapname, lineno); 858 #else /* HAS_PCRE */ 859 #if 0 860 if (pcre2_pattern_info( 861 engine.pattern, PCRE2_INFO_NAMECOUNT, &namecount) != 0) 862 msg_panic("pcre map %s, line %d: pcre2_pattern_info failed", 863 mapname, lineno); 864 if (namecount > 0) { 865 msg_warn("pcre map %s, line %d: named substrings are not supported", 866 mapname, lineno); 867 if (engine.pattern) 868 DICT_PCRE_CODE_FREE(engine.pattern); 869 DICT_PCRE_MATCH_HINT_FREE(&engine); 870 CREATE_MATCHOP_ERROR_RETURN(0); 871 } 872 #endif 873 if (pcre2_pattern_info(engine.pattern, PCRE2_INFO_CAPTURECOUNT, 874 (void *) &actual_sub) != 0) 875 msg_panic("pcre map %s, line %d: pcre2_pattern_info failed", 876 mapname, lineno); 877 #endif /* HAS_PCRE */ 878 if (prescan_context.max_sub > actual_sub) { 879 msg_warn("pcre map %s, line %d: out of range replacement index \"%d\": " 880 "skipping this rule", mapname, lineno, 881 (int) prescan_context.max_sub); 882 if (engine.pattern) 883 DICT_PCRE_CODE_FREE(engine.pattern); 884 DICT_PCRE_MATCH_HINT_FREE(&engine); 885 CREATE_MATCHOP_ERROR_RETURN(0); 886 } 887 #endif /* DICT_PCRE_CAPTURECOUNT_T */ 888 889 /* 890 * Save the result. 891 */ 892 match_rule = (DICT_PCRE_MATCH_RULE *) 893 dict_pcre_rule_alloc(DICT_PCRE_OP_MATCH, lineno, 894 sizeof(DICT_PCRE_MATCH_RULE)); 895 match_rule->match = regexp.match; 896 match_rule->max_sub = prescan_context.max_sub; 897 if (prescan_context.literal) 898 match_rule->replacement = prescan_context.literal; 899 else 900 match_rule->replacement = mystrdup(p); 901 match_rule->pattern = engine.pattern; 902 DICT_PCRE_MATCH_HINT(match_rule) = DICT_PCRE_MATCH_HINT(&engine); 903 return ((DICT_PCRE_RULE *) match_rule); 904 } 905 906 /* 907 * The IF operator takes one pattern but no replacement text. 908 */ 909 else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) { 910 DICT_PCRE_REGEXP regexp; 911 DICT_PCRE_ENGINE engine; 912 DICT_PCRE_IF_RULE *if_rule; 913 914 p += 2; 915 916 /* 917 * Get the pattern. 918 */ 919 while (*p && ISSPACE(*p)) 920 p++; 921 if (!dict_pcre_get_pattern(mapname, lineno, &p, ®exp)) 922 return (0); 923 924 /* 925 * Warn about out-of-place text. 926 */ 927 while (*p && ISSPACE(*p)) 928 ++p; 929 if (*p) { 930 msg_warn("pcre map %s, line %d: ignoring extra text after " 931 "IF statement: \"%s\"", mapname, lineno, p); 932 msg_warn("pcre map %s, line %d: do not prepend whitespace" 933 " to statements between IF and ENDIF", mapname, lineno); 934 } 935 936 /* 937 * Compile the pattern. 938 */ 939 if (dict_pcre_compile(mapname, lineno, ®exp, &engine) == 0) 940 return (0); 941 942 /* 943 * Save the result. 944 */ 945 if_rule = (DICT_PCRE_IF_RULE *) 946 dict_pcre_rule_alloc(DICT_PCRE_OP_IF, lineno, 947 sizeof(DICT_PCRE_IF_RULE)); 948 if_rule->match = regexp.match; 949 if_rule->pattern = engine.pattern; 950 DICT_PCRE_MATCH_HINT(if_rule) = DICT_PCRE_MATCH_HINT(&engine); 951 if_rule->endif_rule = 0; 952 return ((DICT_PCRE_RULE *) if_rule); 953 } 954 955 /* 956 * The ENDIF operator takes no patterns and no replacement text. 957 */ 958 else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) { 959 DICT_PCRE_RULE *rule; 960 961 p += 5; 962 963 /* 964 * Warn about out-of-place ENDIFs. 965 */ 966 if (nesting == 0) { 967 msg_warn("pcre map %s, line %d: ignoring ENDIF without matching IF", 968 mapname, lineno); 969 return (0); 970 } 971 972 /* 973 * Warn about out-of-place text. 974 */ 975 while (*p && ISSPACE(*p)) 976 ++p; 977 if (*p) 978 msg_warn("pcre map %s, line %d: ignoring extra text after ENDIF", 979 mapname, lineno); 980 981 /* 982 * Save the result. 983 */ 984 rule = dict_pcre_rule_alloc(DICT_PCRE_OP_ENDIF, lineno, 985 sizeof(DICT_PCRE_RULE)); 986 return (rule); 987 } 988 989 /* 990 * Unrecognized input. 991 */ 992 else { 993 msg_warn("pcre map %s, line %d: ignoring unrecognized request", 994 mapname, lineno); 995 return (0); 996 } 997 } 998 999 /* dict_pcre_open - load and compile a file containing regular expressions */ 1000 1001 DICT *dict_pcre_open(const char *mapname, int open_flags, int dict_flags) 1002 { 1003 const char myname[] = "dict_pcre_open"; 1004 DICT_PCRE *dict_pcre; 1005 VSTREAM *map_fp = 0; 1006 struct stat st; 1007 VSTRING *why = 0; 1008 VSTRING *line_buffer = 0; 1009 DICT_PCRE_RULE *last_rule = 0; 1010 DICT_PCRE_RULE *rule; 1011 int last_line = 0; 1012 int lineno; 1013 int nesting = 0; 1014 char *p; 1015 DICT_PCRE_RULE **rule_stack = 0; 1016 MVECT mvect; 1017 1018 /* 1019 * Let the optimizer worry about eliminating redundant code. 1020 */ 1021 #define DICT_PCRE_OPEN_RETURN(d) do { \ 1022 DICT *__d = (d); \ 1023 if (map_fp != 0) \ 1024 vstream_fclose(map_fp); \ 1025 if (line_buffer != 0) \ 1026 vstring_free(line_buffer); \ 1027 if (why != 0) \ 1028 vstring_free(why); \ 1029 return (__d); \ 1030 } while (0) 1031 1032 /* 1033 * Sanity checks. 1034 */ 1035 if (open_flags != O_RDONLY) 1036 DICT_PCRE_OPEN_RETURN(dict_surrogate(DICT_TYPE_PCRE, mapname, 1037 open_flags, dict_flags, 1038 "%s:%s map requires O_RDONLY access mode", 1039 DICT_TYPE_PCRE, mapname)); 1040 1041 /* 1042 * Open the configuration file. 1043 */ 1044 if ((map_fp = dict_stream_open(DICT_TYPE_PCRE, mapname, O_RDONLY, 1045 dict_flags, &st, &why)) == 0) 1046 DICT_PCRE_OPEN_RETURN(dict_surrogate(DICT_TYPE_PCRE, mapname, 1047 open_flags, dict_flags, 1048 "%s", vstring_str(why))); 1049 line_buffer = vstring_alloc(100); 1050 1051 dict_pcre = (DICT_PCRE *) dict_alloc(DICT_TYPE_PCRE, mapname, 1052 sizeof(*dict_pcre)); 1053 dict_pcre->dict.lookup = dict_pcre_lookup; 1054 dict_pcre->dict.close = dict_pcre_close; 1055 dict_pcre->dict.flags = dict_flags | DICT_FLAG_PATTERN; 1056 if (dict_flags & DICT_FLAG_FOLD_MUL) 1057 dict_pcre->dict.fold_buf = vstring_alloc(10); 1058 dict_pcre->head = 0; 1059 dict_pcre->expansion_buf = 0; 1060 1061 #if HAS_PCRE == 1 1062 if (dict_pcre_init == 0) { 1063 pcre_malloc = (void *(*) (size_t)) mymalloc; 1064 pcre_free = (void (*) (void *)) myfree; 1065 dict_pcre_init = 1; 1066 } 1067 #endif 1068 dict_pcre->dict.owner.uid = st.st_uid; 1069 dict_pcre->dict.owner.status = (st.st_uid != 0); 1070 1071 /* 1072 * Parse the pcre table. 1073 */ 1074 while (readllines(line_buffer, map_fp, &last_line, &lineno)) { 1075 p = vstring_str(line_buffer); 1076 trimblanks(p, 0)[0] = 0; /* Trim space at end */ 1077 if (*p == 0) 1078 continue; 1079 rule = dict_pcre_parse_rule(&dict_pcre->dict, mapname, lineno, 1080 p, nesting); 1081 if (rule == 0) 1082 continue; 1083 if (rule->op == DICT_PCRE_OP_IF) { 1084 if (rule_stack == 0) 1085 rule_stack = (DICT_PCRE_RULE **) mvect_alloc(&mvect, 1086 sizeof(*rule_stack), nesting + 1, 1087 (MVECT_FN) 0, (MVECT_FN) 0); 1088 else 1089 rule_stack = 1090 (DICT_PCRE_RULE **) mvect_realloc(&mvect, nesting + 1); 1091 rule_stack[nesting] = rule; 1092 nesting++; 1093 } else if (rule->op == DICT_PCRE_OP_ENDIF) { 1094 DICT_PCRE_IF_RULE *if_rule; 1095 1096 if (nesting-- <= 0) 1097 /* Already handled in dict_pcre_parse_rule(). */ 1098 msg_panic("%s: ENDIF without IF", myname); 1099 if (rule_stack[nesting]->op != DICT_PCRE_OP_IF) 1100 msg_panic("%s: unexpected rule stack element type %d", 1101 myname, rule_stack[nesting]->op); 1102 if_rule = (DICT_PCRE_IF_RULE *) rule_stack[nesting]; 1103 if_rule->endif_rule = rule; 1104 } 1105 if (last_rule == 0) 1106 dict_pcre->head = rule; 1107 else 1108 last_rule->next = rule; 1109 last_rule = rule; 1110 } 1111 1112 while (nesting-- > 0) 1113 msg_warn("pcre map %s, line %d: IF has no matching ENDIF", 1114 mapname, rule_stack[nesting]->lineno); 1115 1116 if (rule_stack) 1117 (void) mvect_free(&mvect); 1118 1119 dict_file_purge_buffers(&dict_pcre->dict); 1120 DICT_PCRE_OPEN_RETURN(DICT_DEBUG (&dict_pcre->dict)); 1121 } 1122 1123 #endif /* HAS_PCRE */ 1124