xref: /netbsd-src/external/ibm-public/postfix/dist/src/util/dict_thash.c (revision d536862b7d93d77932ef5de7eebdc48d76921b77)
1 /*	$NetBSD: dict_thash.c,v 1.3 2020/03/18 19:05:21 christos Exp $	*/
2 
3 /*++
4 /* NAME
5 /*	dict_thash 3
6 /* SUMMARY
7 /*	dictionary manager interface to hashed flat text files
8 /* SYNOPSIS
9 /*	#include <dict_thash.h>
10 /*
11 /*	DICT	*dict_thash_open(path, open_flags, dict_flags)
12 /*	const char *name;
13 /*	const char *path;
14 /*	int	open_flags;
15 /*	int	dict_flags;
16 /* DESCRIPTION
17 /*	dict_thash_open() opens the named flat text file, creates
18 /*	an in-memory hash table, and makes it available via the
19 /*	generic interface described in dict_open(3). The input
20 /*	format is as with postmap(1).
21 /* DIAGNOSTICS
22 /*	Fatal errors: cannot open file, out of memory.
23 /* SEE ALSO
24 /*	dict(3) generic dictionary manager
25 /* LICENSE
26 /* .ad
27 /* .fi
28 /*	The Secure Mailer license must be distributed with this software.
29 /* AUTHOR(S)
30 /*	Wietse Venema
31 /*	IBM T.J. Watson Research
32 /*	P.O. Box 704
33 /*	Yorktown Heights, NY 10598, USA
34 /*
35 /*	Wietse Venema
36 /*	Google, Inc.
37 /*	111 8th Avenue
38 /*	New York, NY 10011, USA
39 /*--*/
40 
41 /* System library. */
42 
43 #include <sys_defs.h>
44 #include <sys/stat.h>
45 #include <ctype.h>
46 #include <string.h>
47 
48 /* Utility library. */
49 
50 #include <msg.h>
51 #include <iostuff.h>
52 #include <vstring.h>
53 #include <stringops.h>
54 #include <readlline.h>
55 #include <dict.h>
56 #include <dict_ht.h>
57 #include <dict_thash.h>
58 
59 /* Application-specific. */
60 
61 #define STR	vstring_str
62 #define LEN	VSTRING_LEN
63 
64 /* dict_thash_open - open flat text data base */
65 
66 DICT   *dict_thash_open(const char *path, int open_flags, int dict_flags)
67 {
68     DICT   *dict;
69     VSTREAM *fp = 0;			/* DICT_THASH_OPEN_RETURN() */
70     struct stat st;
71     time_t  before;
72     time_t  after;
73     VSTRING *line_buffer = 0;		/* DICT_THASH_OPEN_RETURN() */
74     int     lineno;
75     int     last_line;
76     char   *key;
77     char   *value;
78 
79     /*
80      * Let the optimizer worry about eliminating redundant code.
81      */
82 #define DICT_THASH_OPEN_RETURN(d) do { \
83 	DICT *__d = (d); \
84 	if (fp != 0) \
85 	    vstream_fclose(fp); \
86 	if (line_buffer != 0) \
87 	    vstring_free(line_buffer); \
88 	return (__d); \
89     } while (0)
90 
91     /*
92      * Sanity checks.
93      */
94     if (open_flags != O_RDONLY)
95 	DICT_THASH_OPEN_RETURN(dict_surrogate(DICT_TYPE_THASH, path,
96 					      open_flags, dict_flags,
97 				  "%s:%s map requires O_RDONLY access mode",
98 					      DICT_TYPE_THASH, path));
99 
100     /*
101      * Read the flat text file into in-memory hash. Read the file again if it
102      * may have changed while we were reading.
103      */
104     for (before = time((time_t *) 0); /* see below */ ; before = after) {
105 	if ((fp = vstream_fopen(path, open_flags, 0644)) == 0) {
106 	    DICT_THASH_OPEN_RETURN(dict_surrogate(DICT_TYPE_THASH, path,
107 						  open_flags, dict_flags,
108 					     "open database %s: %m", path));
109 	}
110 
111 	/*
112 	 * Reuse the "internal" dictionary type.
113 	 */
114 	dict = dict_open3(DICT_TYPE_HT, path, open_flags, dict_flags);
115 	dict_type_override(dict, DICT_TYPE_THASH);
116 
117 	/*
118 	 * XXX This duplicates the parser in postmap.c.
119 	 */
120 	if (line_buffer == 0)
121 	    line_buffer = vstring_alloc(100);
122 	last_line = 0;
123 	while (readllines(line_buffer, fp, &last_line, &lineno)) {
124 	    int     in_quotes = 0;
125 
126 	    /*
127 	     * First some UTF-8 checks sans casefolding.
128 	     */
129 	    if ((dict->flags & DICT_FLAG_UTF8_ACTIVE)
130 		&& allascii(STR(line_buffer)) == 0
131 	    && valid_utf8_string(STR(line_buffer), LEN(line_buffer)) == 0) {
132 		msg_warn("%s, line %d: non-UTF-8 input \"%s\""
133 			 " -- ignoring this line",
134 			 VSTREAM_PATH(fp), lineno, STR(line_buffer));
135 		continue;
136 	    }
137 
138 	    /*
139 	     * Split on the first whitespace character, then trim leading and
140 	     * trailing whitespace from key and value.
141 	     */
142 	    for (value = STR(line_buffer); *value; value++) {
143 		if (*value == '\\') {
144 		    if (*++value == 0)
145 			break;
146 		} else if (ISSPACE(*value)) {
147 		    if (!in_quotes)
148 			break;
149 		} else if (*value == '"') {
150 		    in_quotes = !in_quotes;
151 		}
152 	    }
153 	    if (in_quotes) {
154 		msg_warn("%s, line %d: unbalanced '\"' in '%s'"
155 			 " -- ignoring this line",
156 			 VSTREAM_PATH(fp), lineno, STR(line_buffer));
157 		continue;
158 	    }
159 	    if (*value)
160 		*value++ = 0;
161 	    while (ISSPACE(*value))
162 		value++;
163 	    trimblanks(value, 0)[0] = 0;
164 
165 	    /*
166 	     * Leave the key in quoted form, for consistency with postmap.c
167 	     * and dict_inline.c.
168 	     */
169 	    key = STR(line_buffer);
170 
171 	    /*
172 	     * Enforce the "key whitespace value" format. Disallow missing
173 	     * keys or missing values.
174 	     */
175 	    if (*key == 0 || *value == 0) {
176 		msg_warn("%s, line %d: expected format: key whitespace value"
177 			 " -- ignoring this line", path, lineno);
178 		continue;
179 	    }
180 	    if (key[strlen(key) - 1] == ':')
181 		msg_warn("%s, line %d: record is in \"key: value\" format;"
182 			 " is this an alias file?", path, lineno);
183 
184 	    /*
185 	     * Store the value under the key. Handle duplicates
186 	     * appropriately. XXX Move this into dict_ht, but 1) that map
187 	     * ignores duplicates by default and we would have to check that
188 	     * we won't break existing code that depends on such benavior; 2)
189 	     * by inlining the checks here we can degrade gracefully instead
190 	     * of terminating with a fatal error. See comment in
191 	     * dict_inline.c.
192 	     */
193 	    if (dict->lookup(dict, key) != 0) {
194 		if (dict_flags & DICT_FLAG_DUP_IGNORE) {
195 		     /* void */ ;
196 		} else if (dict_flags & DICT_FLAG_DUP_REPLACE) {
197 		    dict->update(dict, key, value);
198 		} else if (dict_flags & DICT_FLAG_DUP_WARN) {
199 		    msg_warn("%s, line %d: duplicate entry: \"%s\"",
200 			     path, lineno, key);
201 		} else {
202 		    dict->close(dict);
203 		    DICT_THASH_OPEN_RETURN(dict_surrogate(DICT_TYPE_THASH, path,
204 						     open_flags, dict_flags,
205 				     "%s, line %d: duplicate entry: \"%s\"",
206 							path, lineno, key));
207 		}
208 	    } else {
209 		dict->update(dict, key, value);
210 	    }
211 	}
212 
213 	/*
214 	 * See if the source file is hot.
215 	 */
216 	if (fstat(vstream_fileno(fp), &st) < 0)
217 	    msg_fatal("fstat %s: %m", path);
218 	if (vstream_fclose(fp))
219 	    msg_fatal("read %s: %m", path);
220 	fp = 0;					/* DICT_THASH_OPEN_RETURN() */
221 	after = time((time_t *) 0);
222 	if (st.st_mtime < before - 1 || st.st_mtime > after)
223 	    break;
224 
225 	/*
226 	 * Yes, it is hot. Discard the result and read the file again.
227 	 */
228 	dict->close(dict);
229 	if (msg_verbose > 1)
230 	    msg_info("pausing to let file %s cool down", path);
231 	doze(300000);
232     }
233 
234     dict->owner.uid = st.st_uid;
235     dict->owner.status = (st.st_uid != 0);
236 
237     DICT_THASH_OPEN_RETURN(DICT_DEBUG (dict));
238 }
239