xref: /netbsd-src/external/ibm-public/postfix/dist/src/util/dict_pcre.c (revision c48c605c14fd8622b523d1d6a3f0c0bad133ea89)
1 /*	$NetBSD: dict_pcre.c,v 1.5 2023/12/23 20:30:46 christos Exp $	*/
2 
3 /*++
4 /* NAME
5 /*	dict_pcre 3
6 /* SUMMARY
7 /*	dictionary manager interface to PCRE regular expression library
8 /* SYNOPSIS
9 /*	#include <dict_pcre.h>
10 /*
11 /*	DICT	*dict_pcre_open(name, dummy, dict_flags)
12 /*	const char *name;
13 /*	int	dummy;
14 /*	int	dict_flags;
15 /* DESCRIPTION
16 /*	dict_pcre_open() opens the named file and compiles the contained
17 /*	regular expressions. The result object can be used to match strings
18 /*	against the table.
19 /* SEE ALSO
20 /*	dict(3) generic dictionary manager
21 /*	pcre_table(5) PCRE table configuration
22 /* AUTHOR(S)
23 /*	Andrew McNamara
24 /*	andrewm@connect.com.au
25 /*	connect.com.au Pty. Ltd.
26 /*	Level 3, 213 Miller St
27 /*	North Sydney, NSW, Australia
28 /*
29 /*	Wietse Venema
30 /*	IBM T.J. Watson Research
31 /*	P.O. Box 704
32 /*	Yorktown Heights, NY 10598, USA
33 /*
34 /*	Wietse Venema
35 /*	Google, Inc.
36 /*	111 8th Avenue
37 /*	New York, NY 10011, USA
38 /*--*/
39 
40 #include "sys_defs.h"
41 
42 #ifdef HAS_PCRE
43 
44 /* System library. */
45 
46 #include <sys/stat.h>
47 #include <stdio.h>			/* sprintf() prototype */
48 #include <stdlib.h>
49 #include <unistd.h>
50 #include <string.h>
51 #include <ctype.h>
52 
53 #ifdef STRCASECMP_IN_STRINGS_H
54 #include <strings.h>
55 #endif
56 
57 #if HAS_PCRE == 1
58 #include <pcre.h>
59 #elif HAS_PCRE == 2
60 #define PCRE2_CODE_UNIT_WIDTH	8
61 #include <pcre2.h>
62 #else
63 #error "define HAS_PCRE=2 or HAS_PCRE=1"
64 #endif
65 
66 /* Utility library. */
67 
68 #include "mymalloc.h"
69 #include "msg.h"
70 #include "safe.h"
71 #include "vstream.h"
72 #include "vstring.h"
73 #include "stringops.h"
74 #include "readlline.h"
75 #include "dict.h"
76 #include "dict_pcre.h"
77 #include "mac_parse.h"
78 #include "warn_stat.h"
79 #include "mvect.h"
80 
81  /*
82   * Backwards compatibility.
83   */
84 #if HAS_PCRE == 1
85  /* PCRE Legacy JIT supprt. */
86 #ifdef PCRE_STUDY_JIT_COMPILE
87 #define DICT_PCRE_FREE_STUDY(x)	pcre_free_study(x)
88 #else
89 #define DICT_PCRE_FREE_STUDY(x)	pcre_free((char *) (x))
90 #endif
91 
92  /* PCRE Compiled pattern. */
93 #define DICT_PCRE_CODE		pcre
94 #define DICT_PCRE_CODE_FREE(x)	myfree((void *) (x))
95 
96  /* Old-style hints versus new-style match_data. */
97 #define DICT_PCRE_MATCH_HINT_TYPE pcre_extra *
98 #define DICT_PCRE_MATCH_HINT_NAME hints
99 #define DICT_PCRE_MATCH_HINT(x) ((x)->DICT_PCRE_MATCH_HINT_NAME)
100 #define DICT_PCRE_MATCH_HINT_FREE(x) do { \
101 	if (DICT_PCRE_MATCH_HINT(x)) \
102 	    DICT_PCRE_FREE_STUDY(DICT_PCRE_MATCH_HINT(x)); \
103     } while (0)
104 
105  /* PCRE Pattern options. */
106 #define DICT_PCRE_CASELESS	PCRE_CASELESS
107 #define DICT_PCRE_MULTILINE	PCRE_MULTILINE
108 #define DICT_PCRE_DOTALL	PCRE_DOTALL
109 #define DICT_PCRE_EXTENDED	PCRE_EXTENDED
110 #define DICT_PCRE_ANCHORED	PCRE_ANCHORED
111 #define DICT_PCRE_DOLLAR_ENDONLY PCRE_DOLLAR_ENDONLY
112 #define DICT_PCRE_UNGREEDY	PCRE_UNGREEDY
113 #define DICT_PCRE_EXTRA		PCRE_EXTRA
114 
115  /* PCRE Number of captures in pattern. */
116 #ifdef PCRE_INFO_CAPTURECOUNT
117 #define DICT_PCRE_CAPTURECOUNT_T int
118 #endif
119 
120 #else					/* HAS_PCRE */
121 
122  /* PCRE2 Compiled pattern. */
123 #define DICT_PCRE_CODE		pcre2_code
124 #define DICT_PCRE_CODE_FREE(x)	pcre2_code_free(x)
125 
126  /* PCRE2 Old-style hints versus new-style match_data. */
127 #define DICT_PCRE_MATCH_HINT_TYPE pcre2_match_data *
128 #define DICT_PCRE_MATCH_HINT_NAME match_data
129 #define DICT_PCRE_MATCH_HINT(x)	((x)->DICT_PCRE_MATCH_HINT_NAME)
130 #define DICT_PCRE_MATCH_HINT_FREE(x) \
131 	pcre2_match_data_free(DICT_PCRE_MATCH_HINT(x))
132 
133  /* PCRE2 Pattern options. */
134 #define DICT_PCRE_CASELESS	PCRE2_CASELESS
135 #define DICT_PCRE_MULTILINE	PCRE2_MULTILINE
136 #define DICT_PCRE_DOTALL	PCRE2_DOTALL
137 #define DICT_PCRE_EXTENDED	PCRE2_EXTENDED
138 #define DICT_PCRE_ANCHORED	PCRE2_ANCHORED
139 #define DICT_PCRE_DOLLAR_ENDONLY PCRE2_DOLLAR_ENDONLY
140 #define DICT_PCRE_UNGREEDY	PCRE2_UNGREEDY
141 #define DICT_PCRE_EXTRA		0
142 
143  /* PCRE2 Number of captures in pattern. */
144 #define	DICT_PCRE_CAPTURECOUNT_T uint32_t
145 
146 #endif					/* HAS_PCRE */
147 
148  /*
149   * Support for IF/ENDIF based on an idea by Bert Driehuis.
150   */
151 #define DICT_PCRE_OP_MATCH    1		/* Match this regexp */
152 #define DICT_PCRE_OP_IF       2		/* Increase if/endif nesting on match */
153 #define DICT_PCRE_OP_ENDIF    3		/* Decrease if/endif nesting on match */
154 
155  /*
156   * Max strings captured by regexp - essentially the max number of (..)
157   */
158 #if HAS_PCRE == 1
159 #define PCRE_MAX_CAPTURE	99
160 #endif
161 
162  /*
163   * Regular expression before and after compilation.
164   */
165 typedef struct {
166     char   *regexp;			/* regular expression */
167     int     options;			/* options */
168     int     match;			/* positive or negative match */
169 } DICT_PCRE_REGEXP;
170 
171 typedef struct {
172     DICT_PCRE_CODE *pattern;		/* the compiled pattern */
173     DICT_PCRE_MATCH_HINT_TYPE DICT_PCRE_MATCH_HINT_NAME;
174 } DICT_PCRE_ENGINE;
175 
176  /*
177   * Compiled generic rule, and subclasses that derive from it.
178   */
179 typedef struct DICT_PCRE_RULE {
180     int     op;				/* DICT_PCRE_OP_MATCH/IF/ENDIF */
181     int     lineno;			/* source file line number */
182     struct DICT_PCRE_RULE *next;	/* next rule in dict */
183 } DICT_PCRE_RULE;
184 
185 typedef struct {
186     DICT_PCRE_RULE rule;		/* generic part */
187     DICT_PCRE_CODE *pattern;		/* compiled pattern */
188     DICT_PCRE_MATCH_HINT_TYPE DICT_PCRE_MATCH_HINT_NAME;
189     char   *replacement;		/* replacement string */
190     int     match;			/* positive or negative match */
191     size_t  max_sub;			/* largest $number in replacement */
192 } DICT_PCRE_MATCH_RULE;
193 
194 typedef struct {
195     DICT_PCRE_RULE rule;		/* generic members */
196     DICT_PCRE_CODE *pattern;		/* compiled pattern */
197     DICT_PCRE_MATCH_HINT_TYPE DICT_PCRE_MATCH_HINT_NAME;
198     int     match;			/* positive or negative match */
199     struct DICT_PCRE_RULE *endif_rule;	/* matching endif rule */
200 } DICT_PCRE_IF_RULE;
201 
202  /*
203   * PCRE map.
204   */
205 typedef struct {
206     DICT    dict;			/* generic members */
207     DICT_PCRE_RULE *head;
208     VSTRING *expansion_buf;		/* lookup result */
209 } DICT_PCRE;
210 
211 #if HAS_PCRE == 1
212 static int dict_pcre_init = 0;		/* flag need to init pcre library */
213 
214 #endif
215 
216 /*
217  * Context for $number expansion callback.
218  */
219 typedef struct {
220     DICT_PCRE *dict_pcre;		/* the dictionary handle */
221 #if HAS_PCRE == 1
222     DICT_PCRE_MATCH_RULE *match_rule;	/* the rule we matched */
223 #endif
224     const char *lookup_string;		/* string against which we match */
225 #if HAS_PCRE == 1
226     int     offsets[PCRE_MAX_CAPTURE * 3];	/* Cut substrings */
227 #else					/* HAS_PCRE */
228     PCRE2_SIZE *ovector;		/* matched string offsets */
229 #endif					/* HAS_PCRE */
230     int     matches;			/* Count of cuts */
231 } DICT_PCRE_EXPAND_CONTEXT;
232 
233  /*
234   * Context for $number pre-scan callback.
235   */
236 typedef struct {
237     const char *mapname;		/* name of regexp map */
238     int     lineno;			/* where in file */
239     size_t  max_sub;			/* Largest $n seen */
240     char   *literal;			/* constant result, $$ -> $ */
241 } DICT_PCRE_PRESCAN_CONTEXT;
242 
243  /*
244   * Compatibility.
245   */
246 #ifndef MAC_PARSE_OK
247 #define MAC_PARSE_OK 0
248 #endif
249 
250  /*
251   * Macros to make dense code more accessible.
252   */
253 #define NULL_STARTOFFSET	(0)
254 #define NULL_EXEC_OPTIONS 	(0)
255 
256 /* dict_pcre_expand - replace $number with matched text */
257 
dict_pcre_expand(int type,VSTRING * buf,void * ptr)258 static int dict_pcre_expand(int type, VSTRING *buf, void *ptr)
259 {
260     DICT_PCRE_EXPAND_CONTEXT *ctxt = (DICT_PCRE_EXPAND_CONTEXT *) ptr;
261     DICT_PCRE *dict_pcre = ctxt->dict_pcre;
262     int     n;
263 
264 #if HAS_PCRE == 1
265     DICT_PCRE_MATCH_RULE *match_rule = ctxt->match_rule;
266     const char *pp;
267     int     ret;
268 
269 #else
270     PCRE2_SPTR start;
271     PCRE2_SIZE length;
272 
273 #endif
274 
275     /*
276      * Replace $0-${99} with strings cut from matched text.
277      */
278     if (type == MAC_PARSE_VARNAME) {
279 	n = atoi(vstring_str(buf));
280 #if HAS_PCRE == 1
281 	ret = pcre_get_substring(ctxt->lookup_string, ctxt->offsets,
282 				 ctxt->matches, n, &pp);
283 	if (ret < 0) {
284 	    if (ret == PCRE_ERROR_NOSUBSTRING)
285 		return (MAC_PARSE_UNDEF);
286 	    else
287 		msg_fatal("pcre map %s, line %d: pcre_get_substring error: %d",
288 			dict_pcre->dict.name, match_rule->rule.lineno, ret);
289 	}
290 	if (*pp == 0) {
291 	    myfree((void *) pp);
292 	    return (MAC_PARSE_UNDEF);
293 	}
294 	vstring_strcat(dict_pcre->expansion_buf, pp);
295 	myfree((void *) pp);
296 	return (MAC_PARSE_OK);
297 #else
298 	start = (unsigned char *) ctxt->lookup_string + ctxt->ovector[2 * n];
299 	length = ctxt->ovector[2 * n + 1] - ctxt->ovector[2 * n];
300 	if (length == 0)
301 	    return (MAC_PARSE_UNDEF);
302 	vstring_strncat(dict_pcre->expansion_buf, (char *) start, length);
303 	return (MAC_PARSE_OK);
304 #endif
305     }
306 
307     /*
308      * Straight text - duplicate with no substitution.
309      */
310     else {
311 	vstring_strcat(dict_pcre->expansion_buf, vstring_str(buf));
312 	return (MAC_PARSE_OK);
313     }
314 }
315 
316 #if HAS_PCRE == 2
317 
318 #define DICT_PCRE_GET_ERROR_BUF_LEN	256
319 
320 /* dict_pcre_get_error - convert PCRE2 error number or text */
321 
dict_pcre_get_error(VSTRING * buf,int errval)322 static char *dict_pcre_get_error(VSTRING *buf, int errval)
323 {
324     ssize_t len;
325 
326     VSTRING_SPACE(buf, DICT_PCRE_GET_ERROR_BUF_LEN);
327     if ((len = pcre2_get_error_message(errval,
328 				       (unsigned char *) vstring_str(buf),
329 				       DICT_PCRE_GET_ERROR_BUF_LEN)) > 0) {
330 	vstring_set_payload_size(buf, len);
331     } else
332 	vstring_sprintf(buf, "unexpected pcre2 error code %d", errval);
333     return (vstring_str(buf));
334 }
335 
336 #endif					/* HAS_PCRE == 2 */
337 
338 /* dict_pcre_exec_error - report matching error */
339 
dict_pcre_exec_error(const char * mapname,int lineno,int errval)340 static void dict_pcre_exec_error(const char *mapname, int lineno, int errval)
341 {
342 #if HAS_PCRE == 1
343     switch (errval) {
344 	case 0:
345 	msg_warn("pcre map %s, line %d: too many (...)",
346 		 mapname, lineno);
347 	return;
348     case PCRE_ERROR_NULL:
349     case PCRE_ERROR_BADOPTION:
350 	msg_warn("pcre map %s, line %d: bad args to re_exec",
351 		 mapname, lineno);
352 	return;
353     case PCRE_ERROR_BADMAGIC:
354     case PCRE_ERROR_UNKNOWN_NODE:
355 	msg_warn("pcre map %s, line %d: corrupt compiled regexp",
356 		 mapname, lineno);
357 	return;
358 #ifdef PCRE_ERROR_NOMEMORY
359     case PCRE_ERROR_NOMEMORY:
360 	msg_warn("pcre map %s, line %d: out of memory",
361 		 mapname, lineno);
362 	return;
363 #endif
364 #ifdef PCRE_ERROR_MATCHLIMIT
365     case PCRE_ERROR_MATCHLIMIT:
366 	msg_warn("pcre map %s, line %d: backtracking limit exceeded",
367 		 mapname, lineno);
368 	return;
369 #endif
370 #ifdef PCRE_ERROR_BADUTF8
371     case PCRE_ERROR_BADUTF8:
372 	msg_warn("pcre map %s, line %d: bad UTF-8 sequence in search string",
373 		 mapname, lineno);
374 	return;
375 #endif
376 #ifdef PCRE_ERROR_BADUTF8_OFFSET
377     case PCRE_ERROR_BADUTF8_OFFSET:
378 	msg_warn("pcre map %s, line %d: bad UTF-8 start offset in search string",
379 		 mapname, lineno);
380 	return;
381 #endif
382     default:
383 	msg_warn("pcre map %s, line %d: unknown pcre_exec error: %d",
384 		 mapname, lineno, errval);
385 	return;
386     }
387 #else					/* HAS_PCRE */
388     VSTRING *buf = vstring_alloc(DICT_PCRE_GET_ERROR_BUF_LEN);
389 
390     msg_warn("pcre map %s, line %d: %s", mapname, lineno,
391 	     dict_pcre_get_error(buf, errval));
392     vstring_free(buf);
393 #endif						/* HAS_PCRE */
394 }
395 
396  /*
397   * Inlined to reduce function call overhead in the time-critical loop.
398   */
399 #if HAS_PCRE == 1
400 #define DICT_PCRE_EXEC(ctxt, map, line, pattern, hints, match, str, len) \
401     ((ctxt).matches = pcre_exec((pattern), (hints), (str), (len), \
402 				NULL_STARTOFFSET, NULL_EXEC_OPTIONS, \
403 				(ctxt).offsets, PCRE_MAX_CAPTURE * 3), \
404      (ctxt).matches > 0 ? (match) : \
405      (ctxt).matches == PCRE_ERROR_NOMATCH ? !(match) : \
406      (dict_pcre_exec_error((map), (line), (ctxt).matches), 0))
407 #else
408 #define DICT_PCRE_EXEC(ctxt, map, line, pattern, match_data, match, str, len) \
409     ((ctxt).matches = pcre2_match((pattern), (unsigned char *) (str), (len), \
410 				NULL_STARTOFFSET, NULL_EXEC_OPTIONS, \
411 				(match_data), (pcre2_match_context *) 0), \
412      (ctxt).matches > 0 ? (match) : \
413      (ctxt).matches == PCRE2_ERROR_NOMATCH ? !(match) : \
414      (dict_pcre_exec_error((map), (line), (ctxt).matches), 0))
415 #endif
416 
417 /* dict_pcre_lookup - match string and perform optional substitution */
418 
dict_pcre_lookup(DICT * dict,const char * lookup_string)419 static const char *dict_pcre_lookup(DICT *dict, const char *lookup_string)
420 {
421     DICT_PCRE *dict_pcre = (DICT_PCRE *) dict;
422     DICT_PCRE_RULE *rule;
423     DICT_PCRE_IF_RULE *if_rule;
424     DICT_PCRE_MATCH_RULE *match_rule;
425     int     lookup_len = strlen(lookup_string);
426     DICT_PCRE_EXPAND_CONTEXT ctxt;
427 
428     dict->error = 0;
429 
430     if (msg_verbose)
431 	msg_info("dict_pcre_lookup: %s: %s", dict->name, lookup_string);
432 
433     /*
434      * Optionally fold the key.
435      */
436     if (dict->flags & DICT_FLAG_FOLD_MUL) {
437 	if (dict->fold_buf == 0)
438 	    dict->fold_buf = vstring_alloc(10);
439 	vstring_strcpy(dict->fold_buf, lookup_string);
440 	lookup_string = lowercase(vstring_str(dict->fold_buf));
441     }
442     for (rule = dict_pcre->head; rule; rule = rule->next) {
443 
444 	switch (rule->op) {
445 
446 	    /*
447 	     * Search for a matching expression.
448 	     */
449 	case DICT_PCRE_OP_MATCH:
450 	    match_rule = (DICT_PCRE_MATCH_RULE *) rule;
451 	    if (!DICT_PCRE_EXEC(ctxt, dict->name, rule->lineno,
452 				match_rule->pattern,
453 				DICT_PCRE_MATCH_HINT(match_rule),
454 			      match_rule->match, lookup_string, lookup_len))
455 		continue;
456 
457 	    /*
458 	     * Skip $number substitutions when the replacement text contains
459 	     * no $number strings, as learned during the compile time
460 	     * pre-scan. The pre-scan already replaced $$ by $.
461 	     */
462 	    if (match_rule->max_sub == 0)
463 		return match_rule->replacement;
464 
465 	    /*
466 	     * We've got a match. Perform substitution on replacement string.
467 	     */
468 	    if (dict_pcre->expansion_buf == 0)
469 		dict_pcre->expansion_buf = vstring_alloc(10);
470 	    VSTRING_RESET(dict_pcre->expansion_buf);
471 	    ctxt.dict_pcre = dict_pcre;
472 #if HAS_PCRE == 1
473 	    ctxt.match_rule = match_rule;
474 #else
475 	    ctxt.ovector = pcre2_get_ovector_pointer(match_rule->match_data);
476 #endif
477 	    ctxt.lookup_string = lookup_string;
478 
479 	    if (mac_parse(match_rule->replacement, dict_pcre_expand,
480 			  (void *) &ctxt) & MAC_PARSE_ERROR)
481 		msg_fatal("pcre map %s, line %d: bad replacement syntax",
482 			  dict->name, rule->lineno);
483 
484 	    VSTRING_TERMINATE(dict_pcre->expansion_buf);
485 	    return (vstring_str(dict_pcre->expansion_buf));
486 
487 	    /*
488 	     * Conditional. XXX We provide space for matched substring info
489 	     * because PCRE uses part of it as workspace for backtracking.
490 	     * PCRE will allocate memory if it runs out of backtracking
491 	     * storage.
492 	     */
493 	case DICT_PCRE_OP_IF:
494 	    if_rule = (DICT_PCRE_IF_RULE *) rule;
495 	    if (DICT_PCRE_EXEC(ctxt, dict->name, rule->lineno,
496 			       if_rule->pattern,
497 			       DICT_PCRE_MATCH_HINT(if_rule),
498 			       if_rule->match, lookup_string, lookup_len))
499 		continue;
500 	    /* An IF without matching ENDIF has no "endif" rule. */
501 	    if ((rule = if_rule->endif_rule) == 0)
502 		return (0);
503 	    /* FALLTHROUGH */
504 
505 	    /*
506 	     * ENDIF after IF.
507 	     */
508 	case DICT_PCRE_OP_ENDIF:
509 	    continue;
510 
511 	default:
512 	    msg_panic("dict_pcre_lookup: impossible operation %d", rule->op);
513 	}
514     }
515     return (0);
516 }
517 
518 /* dict_pcre_close - close pcre dictionary */
519 
dict_pcre_close(DICT * dict)520 static void dict_pcre_close(DICT *dict)
521 {
522     DICT_PCRE *dict_pcre = (DICT_PCRE *) dict;
523     DICT_PCRE_RULE *rule;
524     DICT_PCRE_RULE *next;
525     DICT_PCRE_MATCH_RULE *match_rule;
526     DICT_PCRE_IF_RULE *if_rule;
527 
528     for (rule = dict_pcre->head; rule; rule = next) {
529 	next = rule->next;
530 	switch (rule->op) {
531 	case DICT_PCRE_OP_MATCH:
532 	    match_rule = (DICT_PCRE_MATCH_RULE *) rule;
533 	    if (match_rule->pattern)
534 		DICT_PCRE_CODE_FREE(match_rule->pattern);
535 	    DICT_PCRE_MATCH_HINT_FREE(match_rule);
536 	    if (match_rule->replacement)
537 		myfree((void *) match_rule->replacement);
538 	    break;
539 	case DICT_PCRE_OP_IF:
540 	    if_rule = (DICT_PCRE_IF_RULE *) rule;
541 	    if (if_rule->pattern)
542 		DICT_PCRE_CODE_FREE(if_rule->pattern);
543 	    DICT_PCRE_MATCH_HINT_FREE(if_rule);
544 	    break;
545 	case DICT_PCRE_OP_ENDIF:
546 	    break;
547 	default:
548 	    msg_panic("dict_pcre_close: unknown operation %d", rule->op);
549 	}
550 	myfree((void *) rule);
551     }
552     if (dict_pcre->expansion_buf)
553 	vstring_free(dict_pcre->expansion_buf);
554     if (dict->fold_buf)
555 	vstring_free(dict->fold_buf);
556     dict_free(dict);
557 }
558 
559 /* dict_pcre_get_pattern - extract pattern from rule */
560 
dict_pcre_get_pattern(const char * mapname,int lineno,char ** bufp,DICT_PCRE_REGEXP * pattern)561 static int dict_pcre_get_pattern(const char *mapname, int lineno, char **bufp,
562 				         DICT_PCRE_REGEXP *pattern)
563 {
564     char   *p = *bufp;
565     char    re_delimiter;
566 
567     /*
568      * Process negation operators.
569      */
570     pattern->match = 1;
571     for (;;) {
572 	if (*p == '!')
573 	    pattern->match = !pattern->match;
574 	else if (!ISSPACE(*p))
575 	    break;
576 	p++;
577     }
578     if (*p == 0) {
579 	msg_warn("pcre map %s, line %d: no regexp: skipping this rule",
580 		 mapname, lineno);
581 	return (0);
582     }
583     re_delimiter = *p++;
584     pattern->regexp = p;
585 
586     /*
587      * Search for second delimiter, handling backslash escape.
588      */
589     while (*p) {
590 	if (*p == '\\') {
591 	    ++p;
592 	    if (*p == 0)
593 		break;
594 	} else if (*p == re_delimiter)
595 	    break;
596 	++p;
597     }
598 
599     if (!*p) {
600 	msg_warn("pcre map %s, line %d: no closing regexp delimiter \"%c\": "
601 		 "ignoring this rule", mapname, lineno, re_delimiter);
602 	return (0);
603     }
604     *p++ = 0;					/* Null term the regexp */
605 
606     /*
607      * Parse any regexp options.
608      */
609     pattern->options = DICT_PCRE_CASELESS | DICT_PCRE_DOTALL;
610     while (*p && !ISSPACE(*p)) {
611 	switch (*p) {
612 	case 'i':
613 	    pattern->options ^= DICT_PCRE_CASELESS;
614 	    break;
615 	case 'm':
616 	    pattern->options ^= DICT_PCRE_MULTILINE;
617 	    break;
618 	case 's':
619 	    pattern->options ^= DICT_PCRE_DOTALL;
620 	    break;
621 	case 'x':
622 	    pattern->options ^= DICT_PCRE_EXTENDED;
623 	    break;
624 	case 'A':
625 	    pattern->options ^= DICT_PCRE_ANCHORED;
626 	    break;
627 	case 'E':
628 	    pattern->options ^= DICT_PCRE_DOLLAR_ENDONLY;
629 	    break;
630 	case 'U':
631 	    pattern->options ^= DICT_PCRE_UNGREEDY;
632 	    break;
633 	case 'X':
634 #if DICT_PCRE_EXTRA != 0
635 	    pattern->options ^= DICT_PCRE_EXTRA;
636 #else
637 	    msg_warn("pcre map %s, line %d: ignoring obsolete regexp "
638 		     "option \"%c\"", mapname, lineno, *p);
639 #endif
640 	    break;
641 	default:
642 	    msg_warn("pcre map %s, line %d: unknown regexp option \"%c\": "
643 		     "skipping this rule", mapname, lineno, *p);
644 	    return (0);
645 	}
646 	++p;
647     }
648     *bufp = p;
649     return (1);
650 }
651 
652 /* dict_pcre_prescan - sanity check $number instances in replacement text */
653 
dict_pcre_prescan(int type,VSTRING * buf,void * context)654 static int dict_pcre_prescan(int type, VSTRING *buf, void *context)
655 {
656     DICT_PCRE_PRESCAN_CONTEXT *ctxt = (DICT_PCRE_PRESCAN_CONTEXT *) context;
657     size_t  n;
658 
659     /*
660      * Keep a copy of literal text (with $$ already replaced by $) if and
661      * only if the replacement text contains no $number expression. This way
662      * we can avoid having to scan the replacement text at lookup time.
663      */
664     if (type == MAC_PARSE_VARNAME) {
665 	if (ctxt->literal) {
666 	    myfree(ctxt->literal);
667 	    ctxt->literal = 0;
668 	}
669 	if (!alldig(vstring_str(buf))) {
670 	    msg_warn("pcre map %s, line %d: non-numeric replacement index \"%s\"",
671 		     ctxt->mapname, ctxt->lineno, vstring_str(buf));
672 	    return (MAC_PARSE_ERROR);
673 	}
674 	n = atoi(vstring_str(buf));
675 	if (n < 1) {
676 	    msg_warn("pcre map %s, line %d: out of range replacement index \"%s\"",
677 		     ctxt->mapname, ctxt->lineno, vstring_str(buf));
678 	    return (MAC_PARSE_ERROR);
679 	}
680 	if (n > ctxt->max_sub)
681 	    ctxt->max_sub = n;
682     } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) {
683 	if (ctxt->literal)
684 	    msg_panic("pcre map %s, line %d: multiple literals but no $number",
685 		      ctxt->mapname, ctxt->lineno);
686 	ctxt->literal = mystrdup(vstring_str(buf));
687     }
688     return (MAC_PARSE_OK);
689 }
690 
691 /* dict_pcre_compile - compile pattern */
692 
dict_pcre_compile(const char * mapname,int lineno,DICT_PCRE_REGEXP * pattern,DICT_PCRE_ENGINE * engine)693 static int dict_pcre_compile(const char *mapname, int lineno,
694 			             DICT_PCRE_REGEXP *pattern,
695 			             DICT_PCRE_ENGINE *engine)
696 {
697 #if HAS_PCRE == 1
698     const char *error;
699     int     errptr;
700 
701     engine->pattern = pcre_compile(pattern->regexp, pattern->options,
702 				   &error, &errptr, NULL);
703     if (engine->pattern == 0) {
704 	msg_warn("pcre map %s, line %d: error in regex at offset %d: %s",
705 		 mapname, lineno, errptr, error);
706 	return (0);
707     }
708     engine->hints = pcre_study(engine->pattern, 0, &error);
709     if (error != 0) {
710 	msg_warn("pcre map %s, line %d: error while studying regex: %s",
711 		 mapname, lineno, error);
712 	DICT_PCRE_CODE_FREE(engine->pattern);
713 	return (0);
714     }
715 #else
716     int     error;
717     size_t  errptr;
718 
719     engine->pattern = pcre2_compile((unsigned char *) pattern->regexp,
720 				    PCRE2_ZERO_TERMINATED,
721 				    pattern->options, &error, &errptr, NULL);
722     if (engine->pattern == 0) {
723 	VSTRING *buf = vstring_alloc(DICT_PCRE_GET_ERROR_BUF_LEN);
724 
725 	msg_warn("pcre map %s, line %d: error in regex at offset %lu: %s",
726 		 mapname, lineno, (unsigned long) errptr,
727 		 dict_pcre_get_error(buf, error));
728 	vstring_free(buf);
729 	return (0);
730     }
731     engine->match_data = pcre2_match_data_create_from_pattern(
732 					       engine->pattern, (void *) 0);
733 #endif
734     return (1);
735 }
736 
737 /* dict_pcre_rule_alloc - fill in a generic rule structure */
738 
dict_pcre_rule_alloc(int op,int lineno,size_t size)739 static DICT_PCRE_RULE *dict_pcre_rule_alloc(int op, int lineno, size_t size)
740 {
741     DICT_PCRE_RULE *rule;
742 
743     rule = (DICT_PCRE_RULE *) mymalloc(size);
744     rule->op = op;
745     rule->lineno = lineno;
746     rule->next = 0;
747 
748     return (rule);
749 }
750 
751 /* dict_pcre_parse_rule - parse and compile one rule */
752 
dict_pcre_parse_rule(DICT * dict,const char * mapname,int lineno,char * line,int nesting)753 static DICT_PCRE_RULE *dict_pcre_parse_rule(DICT *dict, const char *mapname,
754 					            int lineno, char *line,
755 					            int nesting)
756 {
757     char   *p;
758 
759 #ifdef DICT_PCRE_CAPTURECOUNT_T
760     DICT_PCRE_CAPTURECOUNT_T actual_sub;
761 
762 #endif
763 #if 0
764     uint32_t namecount;
765 
766 #endif
767 
768     p = line;
769 
770     /*
771      * An ordinary match rule takes one pattern and replacement text.
772      */
773     if (!ISALNUM(*p)) {
774 	DICT_PCRE_REGEXP regexp;
775 	DICT_PCRE_ENGINE engine;
776 	DICT_PCRE_PRESCAN_CONTEXT prescan_context;
777 	DICT_PCRE_MATCH_RULE *match_rule;
778 
779 	/*
780 	 * Get the pattern string and options.
781 	 */
782 	if (dict_pcre_get_pattern(mapname, lineno, &p, &regexp) == 0)
783 	    return (0);
784 
785 	/*
786 	 * Get the replacement text.
787 	 */
788 	while (*p && ISSPACE(*p))
789 	    ++p;
790 	if (!*p)
791 	    msg_warn("pcre map %s, line %d: no replacement text: "
792 		     "using empty string", mapname, lineno);
793 
794 	/*
795 	 * Sanity check the $number instances in the replacement text.
796 	 */
797 	prescan_context.mapname = mapname;
798 	prescan_context.lineno = lineno;
799 	prescan_context.max_sub = 0;
800 	prescan_context.literal = 0;
801 
802 	/*
803 	 * The optimizer will eliminate code duplication and/or dead code.
804 	 */
805 #define CREATE_MATCHOP_ERROR_RETURN(rval) do { \
806 	if (prescan_context.literal) \
807 	    myfree(prescan_context.literal); \
808 	return (rval); \
809     } while (0)
810 
811 	if (dict->flags & DICT_FLAG_SRC_RHS_IS_FILE) {
812 	    VSTRING *base64_buf;
813 	    char   *err;
814 
815 	    if ((base64_buf = dict_file_to_b64(dict, p)) == 0) {
816 		err = dict_file_get_error(dict);
817 		msg_warn("pcre map %s, line %d: %s: skipping this rule",
818 			 mapname, lineno, err);
819 		myfree(err);
820 		CREATE_MATCHOP_ERROR_RETURN(0);
821 	    }
822 	    p = vstring_str(base64_buf);
823 	}
824 	if (mac_parse(p, dict_pcre_prescan, (void *) &prescan_context)
825 	    & MAC_PARSE_ERROR) {
826 	    msg_warn("pcre map %s, line %d: bad replacement syntax: "
827 		     "skipping this rule", mapname, lineno);
828 	    CREATE_MATCHOP_ERROR_RETURN(0);
829 	}
830 
831 	/*
832 	 * Substring replacement not possible with negative regexps.
833 	 */
834 	if (prescan_context.max_sub > 0 && regexp.match == 0) {
835 	    msg_warn("pcre map %s, line %d: $number found in negative match "
836 		   "replacement text: skipping this rule", mapname, lineno);
837 	    CREATE_MATCHOP_ERROR_RETURN(0);
838 	}
839 	if (prescan_context.max_sub > 0 && (dict->flags & DICT_FLAG_NO_REGSUB)) {
840 	    msg_warn("pcre map %s, line %d: "
841 		     "regular expression substitution is not allowed: "
842 		     "skipping this rule", mapname, lineno);
843 	    CREATE_MATCHOP_ERROR_RETURN(0);
844 	}
845 
846 	/*
847 	 * Compile the pattern.
848 	 */
849 	if (dict_pcre_compile(mapname, lineno, &regexp, &engine) == 0)
850 	    CREATE_MATCHOP_ERROR_RETURN(0);
851 #ifdef DICT_PCRE_CAPTURECOUNT_T
852 #if HAS_PCRE == 1
853 	if (pcre_fullinfo(engine.pattern, engine.hints,
854 			  PCRE_INFO_CAPTURECOUNT,
855 			  (void *) &actual_sub) != 0)
856 	    msg_panic("pcre map %s, line %d: pcre_fullinfo failed",
857 		      mapname, lineno);
858 #else						/* HAS_PCRE */
859 #if 0
860 	if (pcre2_pattern_info(
861 		     engine.pattern, PCRE2_INFO_NAMECOUNT, &namecount) != 0)
862 	    msg_panic("pcre map %s, line %d: pcre2_pattern_info failed",
863 		      mapname, lineno);
864 	if (namecount > 0) {
865 	    msg_warn("pcre map %s, line %d: named substrings are not supported",
866 		     mapname, lineno);
867 	    if (engine.pattern)
868 		DICT_PCRE_CODE_FREE(engine.pattern);
869 	    DICT_PCRE_MATCH_HINT_FREE(&engine);
870 	    CREATE_MATCHOP_ERROR_RETURN(0);
871 	}
872 #endif
873 	if (pcre2_pattern_info(engine.pattern, PCRE2_INFO_CAPTURECOUNT,
874 			       (void *) &actual_sub) != 0)
875 	    msg_panic("pcre map %s, line %d: pcre2_pattern_info failed",
876 		      mapname, lineno);
877 #endif						/* HAS_PCRE */
878 	if (prescan_context.max_sub > actual_sub) {
879 	    msg_warn("pcre map %s, line %d: out of range replacement index \"%d\": "
880 		     "skipping this rule", mapname, lineno,
881 		     (int) prescan_context.max_sub);
882 	    if (engine.pattern)
883 		DICT_PCRE_CODE_FREE(engine.pattern);
884 	    DICT_PCRE_MATCH_HINT_FREE(&engine);
885 	    CREATE_MATCHOP_ERROR_RETURN(0);
886 	}
887 #endif						/* DICT_PCRE_CAPTURECOUNT_T */
888 
889 	/*
890 	 * Save the result.
891 	 */
892 	match_rule = (DICT_PCRE_MATCH_RULE *)
893 	    dict_pcre_rule_alloc(DICT_PCRE_OP_MATCH, lineno,
894 				 sizeof(DICT_PCRE_MATCH_RULE));
895 	match_rule->match = regexp.match;
896 	match_rule->max_sub = prescan_context.max_sub;
897 	if (prescan_context.literal)
898 	    match_rule->replacement = prescan_context.literal;
899 	else
900 	    match_rule->replacement = mystrdup(p);
901 	match_rule->pattern = engine.pattern;
902 	DICT_PCRE_MATCH_HINT(match_rule) = DICT_PCRE_MATCH_HINT(&engine);
903 	return ((DICT_PCRE_RULE *) match_rule);
904     }
905 
906     /*
907      * The IF operator takes one pattern but no replacement text.
908      */
909     else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) {
910 	DICT_PCRE_REGEXP regexp;
911 	DICT_PCRE_ENGINE engine;
912 	DICT_PCRE_IF_RULE *if_rule;
913 
914 	p += 2;
915 
916 	/*
917 	 * Get the pattern.
918 	 */
919 	while (*p && ISSPACE(*p))
920 	    p++;
921 	if (!dict_pcre_get_pattern(mapname, lineno, &p, &regexp))
922 	    return (0);
923 
924 	/*
925 	 * Warn about out-of-place text.
926 	 */
927 	while (*p && ISSPACE(*p))
928 	    ++p;
929 	if (*p) {
930 	    msg_warn("pcre map %s, line %d: ignoring extra text after "
931 		     "IF statement: \"%s\"", mapname, lineno, p);
932 	    msg_warn("pcre map %s, line %d: do not prepend whitespace"
933 		     " to statements between IF and ENDIF", mapname, lineno);
934 	}
935 
936 	/*
937 	 * Compile the pattern.
938 	 */
939 	if (dict_pcre_compile(mapname, lineno, &regexp, &engine) == 0)
940 	    return (0);
941 
942 	/*
943 	 * Save the result.
944 	 */
945 	if_rule = (DICT_PCRE_IF_RULE *)
946 	    dict_pcre_rule_alloc(DICT_PCRE_OP_IF, lineno,
947 				 sizeof(DICT_PCRE_IF_RULE));
948 	if_rule->match = regexp.match;
949 	if_rule->pattern = engine.pattern;
950 	DICT_PCRE_MATCH_HINT(if_rule) = DICT_PCRE_MATCH_HINT(&engine);
951 	if_rule->endif_rule = 0;
952 	return ((DICT_PCRE_RULE *) if_rule);
953     }
954 
955     /*
956      * The ENDIF operator takes no patterns and no replacement text.
957      */
958     else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) {
959 	DICT_PCRE_RULE *rule;
960 
961 	p += 5;
962 
963 	/*
964 	 * Warn about out-of-place ENDIFs.
965 	 */
966 	if (nesting == 0) {
967 	    msg_warn("pcre map %s, line %d: ignoring ENDIF without matching IF",
968 		     mapname, lineno);
969 	    return (0);
970 	}
971 
972 	/*
973 	 * Warn about out-of-place text.
974 	 */
975 	while (*p && ISSPACE(*p))
976 	    ++p;
977 	if (*p)
978 	    msg_warn("pcre map %s, line %d: ignoring extra text after ENDIF",
979 		     mapname, lineno);
980 
981 	/*
982 	 * Save the result.
983 	 */
984 	rule = dict_pcre_rule_alloc(DICT_PCRE_OP_ENDIF, lineno,
985 				    sizeof(DICT_PCRE_RULE));
986 	return (rule);
987     }
988 
989     /*
990      * Unrecognized input.
991      */
992     else {
993 	msg_warn("pcre map %s, line %d: ignoring unrecognized request",
994 		 mapname, lineno);
995 	return (0);
996     }
997 }
998 
999 /* dict_pcre_open - load and compile a file containing regular expressions */
1000 
dict_pcre_open(const char * mapname,int open_flags,int dict_flags)1001 DICT   *dict_pcre_open(const char *mapname, int open_flags, int dict_flags)
1002 {
1003     const char myname[] = "dict_pcre_open";
1004     DICT_PCRE *dict_pcre;
1005     VSTREAM *map_fp = 0;
1006     struct stat st;
1007     VSTRING *why = 0;
1008     VSTRING *line_buffer = 0;
1009     DICT_PCRE_RULE *last_rule = 0;
1010     DICT_PCRE_RULE *rule;
1011     int     last_line = 0;
1012     int     lineno;
1013     int     nesting = 0;
1014     char   *p;
1015     DICT_PCRE_RULE **rule_stack = 0;
1016     MVECT   mvect;
1017 
1018     /*
1019      * Let the optimizer worry about eliminating redundant code.
1020      */
1021 #define DICT_PCRE_OPEN_RETURN(d) do { \
1022 	DICT *__d = (d); \
1023 	if (map_fp != 0) \
1024 	    vstream_fclose(map_fp); \
1025 	if (line_buffer != 0) \
1026 	    vstring_free(line_buffer); \
1027 	if (why != 0) \
1028 	   vstring_free(why); \
1029 	return (__d); \
1030     } while (0)
1031 
1032     /*
1033      * Sanity checks.
1034      */
1035     if (open_flags != O_RDONLY)
1036 	DICT_PCRE_OPEN_RETURN(dict_surrogate(DICT_TYPE_PCRE, mapname,
1037 					     open_flags, dict_flags,
1038 				  "%s:%s map requires O_RDONLY access mode",
1039 					     DICT_TYPE_PCRE, mapname));
1040 
1041     /*
1042      * Open the configuration file.
1043      */
1044     if ((map_fp = dict_stream_open(DICT_TYPE_PCRE, mapname, O_RDONLY,
1045 				   dict_flags, &st, &why)) == 0)
1046 	DICT_PCRE_OPEN_RETURN(dict_surrogate(DICT_TYPE_PCRE, mapname,
1047 					     open_flags, dict_flags,
1048 					     "%s", vstring_str(why)));
1049     line_buffer = vstring_alloc(100);
1050 
1051     dict_pcre = (DICT_PCRE *) dict_alloc(DICT_TYPE_PCRE, mapname,
1052 					 sizeof(*dict_pcre));
1053     dict_pcre->dict.lookup = dict_pcre_lookup;
1054     dict_pcre->dict.close = dict_pcre_close;
1055     dict_pcre->dict.flags = dict_flags | DICT_FLAG_PATTERN;
1056     if (dict_flags & DICT_FLAG_FOLD_MUL)
1057 	dict_pcre->dict.fold_buf = vstring_alloc(10);
1058     dict_pcre->head = 0;
1059     dict_pcre->expansion_buf = 0;
1060 
1061 #if HAS_PCRE == 1
1062     if (dict_pcre_init == 0) {
1063 	pcre_malloc = (void *(*) (size_t)) mymalloc;
1064 	pcre_free = (void (*) (void *)) myfree;
1065 	dict_pcre_init = 1;
1066     }
1067 #endif
1068     dict_pcre->dict.owner.uid = st.st_uid;
1069     dict_pcre->dict.owner.status = (st.st_uid != 0);
1070 
1071     /*
1072      * Parse the pcre table.
1073      */
1074     while (readllines(line_buffer, map_fp, &last_line, &lineno)) {
1075 	p = vstring_str(line_buffer);
1076 	trimblanks(p, 0)[0] = 0;		/* Trim space at end */
1077 	if (*p == 0)
1078 	    continue;
1079 	rule = dict_pcre_parse_rule(&dict_pcre->dict, mapname, lineno,
1080 				    p, nesting);
1081 	if (rule == 0)
1082 	    continue;
1083 	if (rule->op == DICT_PCRE_OP_IF) {
1084 	    if (rule_stack == 0)
1085 		rule_stack = (DICT_PCRE_RULE **) mvect_alloc(&mvect,
1086 					   sizeof(*rule_stack), nesting + 1,
1087 						(MVECT_FN) 0, (MVECT_FN) 0);
1088 	    else
1089 		rule_stack =
1090 		    (DICT_PCRE_RULE **) mvect_realloc(&mvect, nesting + 1);
1091 	    rule_stack[nesting] = rule;
1092 	    nesting++;
1093 	} else if (rule->op == DICT_PCRE_OP_ENDIF) {
1094 	    DICT_PCRE_IF_RULE *if_rule;
1095 
1096 	    if (nesting-- <= 0)
1097 		/* Already handled in dict_pcre_parse_rule(). */
1098 		msg_panic("%s: ENDIF without IF", myname);
1099 	    if (rule_stack[nesting]->op != DICT_PCRE_OP_IF)
1100 		msg_panic("%s: unexpected rule stack element type %d",
1101 			  myname, rule_stack[nesting]->op);
1102 	    if_rule = (DICT_PCRE_IF_RULE *) rule_stack[nesting];
1103 	    if_rule->endif_rule = rule;
1104 	}
1105 	if (last_rule == 0)
1106 	    dict_pcre->head = rule;
1107 	else
1108 	    last_rule->next = rule;
1109 	last_rule = rule;
1110     }
1111 
1112     while (nesting-- > 0)
1113 	msg_warn("pcre map %s, line %d: IF has no matching ENDIF",
1114 		 mapname, rule_stack[nesting]->lineno);
1115 
1116     if (rule_stack)
1117 	(void) mvect_free(&mvect);
1118 
1119     dict_file_purge_buffers(&dict_pcre->dict);
1120     DICT_PCRE_OPEN_RETURN(DICT_DEBUG (&dict_pcre->dict));
1121 }
1122 
1123 #endif					/* HAS_PCRE */
1124