xref: /netbsd-src/external/ibm-public/postfix/dist/src/util/dict_thash.c (revision 67b9b338a7386232ac596b5fd0cd5a9cc8a03c71)
1 /*	$NetBSD: dict_thash.c,v 1.4 2022/10/08 16:12:50 christos Exp $	*/
2 
3 /*++
4 /* NAME
5 /*	dict_thash 3
6 /* SUMMARY
7 /*	dictionary manager interface to hashed flat text files
8 /* SYNOPSIS
9 /*	#include <dict_thash.h>
10 /*
11 /*	DICT	*dict_thash_open(path, open_flags, dict_flags)
12 /*	const char *name;
13 /*	const char *path;
14 /*	int	open_flags;
15 /*	int	dict_flags;
16 /* DESCRIPTION
17 /*	dict_thash_open() opens the named flat text file, creates
18 /*	an in-memory hash table, and makes it available via the
19 /*	generic interface described in dict_open(3). The input
20 /*	format is as with postmap(1).
21 /* DIAGNOSTICS
22 /*	Fatal errors: cannot open file, out of memory.
23 /* SEE ALSO
24 /*	dict(3) generic dictionary manager
25 /* LICENSE
26 /* .ad
27 /* .fi
28 /*	The Secure Mailer license must be distributed with this software.
29 /* AUTHOR(S)
30 /*	Wietse Venema
31 /*	IBM T.J. Watson Research
32 /*	P.O. Box 704
33 /*	Yorktown Heights, NY 10598, USA
34 /*
35 /*	Wietse Venema
36 /*	Google, Inc.
37 /*	111 8th Avenue
38 /*	New York, NY 10011, USA
39 /*--*/
40 
41 /* System library. */
42 
43 #include <sys_defs.h>
44 #include <sys/stat.h>
45 #include <ctype.h>
46 #include <string.h>
47 
48 /* Utility library. */
49 
50 #include <msg.h>
51 #include <mymalloc.h>
52 #include <iostuff.h>
53 #include <vstring.h>
54 #include <stringops.h>
55 #include <readlline.h>
56 #include <dict.h>
57 #include <dict_ht.h>
58 #include <dict_thash.h>
59 
60 /* Application-specific. */
61 
62 #define STR	vstring_str
63 #define LEN	VSTRING_LEN
64 
65 /* dict_thash_open - open flat text data base */
66 
dict_thash_open(const char * path,int open_flags,int dict_flags)67 DICT   *dict_thash_open(const char *path, int open_flags, int dict_flags)
68 {
69     DICT   *dict;
70     VSTREAM *fp = 0;			/* DICT_THASH_OPEN_RETURN() */
71     struct stat st;
72     time_t  before;
73     time_t  after;
74     VSTRING *line_buffer = 0;		/* DICT_THASH_OPEN_RETURN() */
75     int     lineno;
76     int     last_line;
77     char   *key;
78     char   *value;
79 
80     /*
81      * Let the optimizer worry about eliminating redundant code.
82      */
83 #define DICT_THASH_OPEN_RETURN(d) do { \
84 	DICT *__d = (d); \
85 	if (fp != 0) \
86 	    vstream_fclose(fp); \
87 	if (line_buffer != 0) \
88 	    vstring_free(line_buffer); \
89 	return (__d); \
90     } while (0)
91 
92     /*
93      * Sanity checks.
94      */
95     if (open_flags != O_RDONLY)
96 	DICT_THASH_OPEN_RETURN(dict_surrogate(DICT_TYPE_THASH, path,
97 					      open_flags, dict_flags,
98 				  "%s:%s map requires O_RDONLY access mode",
99 					      DICT_TYPE_THASH, path));
100 
101     /*
102      * Read the flat text file into in-memory hash. Read the file again if it
103      * may have changed while we were reading.
104      */
105     for (before = time((time_t *) 0); /* see below */ ; before = after) {
106 	if ((fp = vstream_fopen(path, open_flags, 0644)) == 0) {
107 	    DICT_THASH_OPEN_RETURN(dict_surrogate(DICT_TYPE_THASH, path,
108 						  open_flags, dict_flags,
109 					     "open database %s: %m", path));
110 	}
111 
112 	/*
113 	 * Reuse the "internal" dictionary type.
114 	 */
115 	dict = dict_open3(DICT_TYPE_HT, path, open_flags, dict_flags);
116 	dict_type_override(dict, DICT_TYPE_THASH);
117 
118 	/*
119 	 * XXX This duplicates the parser in postmap.c.
120 	 */
121 	if (line_buffer == 0)
122 	    line_buffer = vstring_alloc(100);
123 	last_line = 0;
124 	while (readllines(line_buffer, fp, &last_line, &lineno)) {
125 	    int     in_quotes = 0;
126 
127 	    /*
128 	     * First some UTF-8 checks sans casefolding.
129 	     */
130 	    if ((dict->flags & DICT_FLAG_UTF8_ACTIVE)
131 		&& allascii(STR(line_buffer)) == 0
132 	    && valid_utf8_string(STR(line_buffer), LEN(line_buffer)) == 0) {
133 		msg_warn("%s, line %d: non-UTF-8 input \"%s\""
134 			 " -- ignoring this line",
135 			 VSTREAM_PATH(fp), lineno, STR(line_buffer));
136 		continue;
137 	    }
138 
139 	    /*
140 	     * Split on the first whitespace character, then trim leading and
141 	     * trailing whitespace from key and value.
142 	     */
143 	    for (value = STR(line_buffer); *value; value++) {
144 		if (*value == '\\') {
145 		    if (*++value == 0)
146 			break;
147 		} else if (ISSPACE(*value)) {
148 		    if (!in_quotes)
149 			break;
150 		} else if (*value == '"') {
151 		    in_quotes = !in_quotes;
152 		}
153 	    }
154 	    if (in_quotes) {
155 		msg_warn("%s, line %d: unbalanced '\"' in '%s'"
156 			 " -- ignoring this line",
157 			 VSTREAM_PATH(fp), lineno, STR(line_buffer));
158 		continue;
159 	    }
160 	    if (*value)
161 		*value++ = 0;
162 	    while (ISSPACE(*value))
163 		value++;
164 	    trimblanks(value, 0)[0] = 0;
165 
166 	    /*
167 	     * Leave the key in quoted form, for consistency with postmap.c
168 	     * and dict_inline.c.
169 	     */
170 	    key = STR(line_buffer);
171 
172 	    /*
173 	     * Enforce the "key whitespace value" format. Disallow missing
174 	     * keys or missing values.
175 	     */
176 	    if (*key == 0 || *value == 0) {
177 		msg_warn("%s, line %d: expected format: key whitespace value"
178 			 " -- ignoring this line", path, lineno);
179 		continue;
180 	    }
181 	    if (key[strlen(key) - 1] == ':')
182 		msg_warn("%s, line %d: record is in \"key: value\" format;"
183 			 " is this an alias file?", path, lineno);
184 
185 	    /*
186 	     * Optionally treat the value as a filename, and replace the value
187 	     * with the BASE64-encoded content of the named file.
188 	     */
189 	    if (dict_flags & DICT_FLAG_SRC_RHS_IS_FILE) {
190 		VSTRING *base64_buf;
191 		char   *err;
192 
193 		if ((base64_buf = dict_file_to_b64(dict, value)) == 0) {
194 		    err = dict_file_get_error(dict);
195 		    msg_warn("%s, line %d: %s: skipping this entry",
196 			     VSTREAM_PATH(fp), lineno, err);
197 		    myfree(err);
198 		    continue;
199 		}
200 		value = vstring_str(base64_buf);
201 	    }
202 
203 	    /*
204 	     * Store the value under the key. Handle duplicates
205 	     * appropriately. XXX Move this into dict_ht, but 1) that map
206 	     * ignores duplicates by default and we would have to check that
207 	     * we won't break existing code that depends on such behavior; 2)
208 	     * by inlining the checks here we can degrade gracefully instead
209 	     * of terminating with a fatal error. See comment in
210 	     * dict_inline.c.
211 	     */
212 	    if (dict->lookup(dict, key) != 0) {
213 		if (dict_flags & DICT_FLAG_DUP_IGNORE) {
214 		     /* void */ ;
215 		} else if (dict_flags & DICT_FLAG_DUP_REPLACE) {
216 		    dict->update(dict, key, value);
217 		} else if (dict_flags & DICT_FLAG_DUP_WARN) {
218 		    msg_warn("%s, line %d: duplicate entry: \"%s\"",
219 			     path, lineno, key);
220 		} else {
221 		    dict->close(dict);
222 		    DICT_THASH_OPEN_RETURN(dict_surrogate(DICT_TYPE_THASH, path,
223 						     open_flags, dict_flags,
224 				     "%s, line %d: duplicate entry: \"%s\"",
225 							path, lineno, key));
226 		}
227 	    } else {
228 		dict->update(dict, key, value);
229 	    }
230 	}
231 
232 	/*
233 	 * See if the source file is hot.
234 	 */
235 	if (fstat(vstream_fileno(fp), &st) < 0)
236 	    msg_fatal("fstat %s: %m", path);
237 	if (vstream_fclose(fp))
238 	    msg_fatal("read %s: %m", path);
239 	fp = 0;					/* DICT_THASH_OPEN_RETURN() */
240 	after = time((time_t *) 0);
241 	if (st.st_mtime < before - 1 || st.st_mtime > after)
242 	    break;
243 
244 	/*
245 	 * Yes, it is hot. Discard the result and read the file again.
246 	 */
247 	dict->close(dict);
248 	if (msg_verbose > 1)
249 	    msg_info("pausing to let file %s cool down", path);
250 	doze(300000);
251     }
252 
253     dict->owner.uid = st.st_uid;
254     dict->owner.status = (st.st_uid != 0);
255 
256     DICT_THASH_OPEN_RETURN(DICT_DEBUG (dict));
257 }
258