1 /* $NetBSD: dict_thash.c,v 1.4 2022/10/08 16:12:50 christos Exp $ */
2
3 /*++
4 /* NAME
5 /* dict_thash 3
6 /* SUMMARY
7 /* dictionary manager interface to hashed flat text files
8 /* SYNOPSIS
9 /* #include <dict_thash.h>
10 /*
11 /* DICT *dict_thash_open(path, open_flags, dict_flags)
12 /* const char *name;
13 /* const char *path;
14 /* int open_flags;
15 /* int dict_flags;
16 /* DESCRIPTION
17 /* dict_thash_open() opens the named flat text file, creates
18 /* an in-memory hash table, and makes it available via the
19 /* generic interface described in dict_open(3). The input
20 /* format is as with postmap(1).
21 /* DIAGNOSTICS
22 /* Fatal errors: cannot open file, out of memory.
23 /* SEE ALSO
24 /* dict(3) generic dictionary manager
25 /* LICENSE
26 /* .ad
27 /* .fi
28 /* The Secure Mailer license must be distributed with this software.
29 /* AUTHOR(S)
30 /* Wietse Venema
31 /* IBM T.J. Watson Research
32 /* P.O. Box 704
33 /* Yorktown Heights, NY 10598, USA
34 /*
35 /* Wietse Venema
36 /* Google, Inc.
37 /* 111 8th Avenue
38 /* New York, NY 10011, USA
39 /*--*/
40
41 /* System library. */
42
43 #include <sys_defs.h>
44 #include <sys/stat.h>
45 #include <ctype.h>
46 #include <string.h>
47
48 /* Utility library. */
49
50 #include <msg.h>
51 #include <mymalloc.h>
52 #include <iostuff.h>
53 #include <vstring.h>
54 #include <stringops.h>
55 #include <readlline.h>
56 #include <dict.h>
57 #include <dict_ht.h>
58 #include <dict_thash.h>
59
60 /* Application-specific. */
61
62 #define STR vstring_str
63 #define LEN VSTRING_LEN
64
65 /* dict_thash_open - open flat text data base */
66
dict_thash_open(const char * path,int open_flags,int dict_flags)67 DICT *dict_thash_open(const char *path, int open_flags, int dict_flags)
68 {
69 DICT *dict;
70 VSTREAM *fp = 0; /* DICT_THASH_OPEN_RETURN() */
71 struct stat st;
72 time_t before;
73 time_t after;
74 VSTRING *line_buffer = 0; /* DICT_THASH_OPEN_RETURN() */
75 int lineno;
76 int last_line;
77 char *key;
78 char *value;
79
80 /*
81 * Let the optimizer worry about eliminating redundant code.
82 */
83 #define DICT_THASH_OPEN_RETURN(d) do { \
84 DICT *__d = (d); \
85 if (fp != 0) \
86 vstream_fclose(fp); \
87 if (line_buffer != 0) \
88 vstring_free(line_buffer); \
89 return (__d); \
90 } while (0)
91
92 /*
93 * Sanity checks.
94 */
95 if (open_flags != O_RDONLY)
96 DICT_THASH_OPEN_RETURN(dict_surrogate(DICT_TYPE_THASH, path,
97 open_flags, dict_flags,
98 "%s:%s map requires O_RDONLY access mode",
99 DICT_TYPE_THASH, path));
100
101 /*
102 * Read the flat text file into in-memory hash. Read the file again if it
103 * may have changed while we were reading.
104 */
105 for (before = time((time_t *) 0); /* see below */ ; before = after) {
106 if ((fp = vstream_fopen(path, open_flags, 0644)) == 0) {
107 DICT_THASH_OPEN_RETURN(dict_surrogate(DICT_TYPE_THASH, path,
108 open_flags, dict_flags,
109 "open database %s: %m", path));
110 }
111
112 /*
113 * Reuse the "internal" dictionary type.
114 */
115 dict = dict_open3(DICT_TYPE_HT, path, open_flags, dict_flags);
116 dict_type_override(dict, DICT_TYPE_THASH);
117
118 /*
119 * XXX This duplicates the parser in postmap.c.
120 */
121 if (line_buffer == 0)
122 line_buffer = vstring_alloc(100);
123 last_line = 0;
124 while (readllines(line_buffer, fp, &last_line, &lineno)) {
125 int in_quotes = 0;
126
127 /*
128 * First some UTF-8 checks sans casefolding.
129 */
130 if ((dict->flags & DICT_FLAG_UTF8_ACTIVE)
131 && allascii(STR(line_buffer)) == 0
132 && valid_utf8_string(STR(line_buffer), LEN(line_buffer)) == 0) {
133 msg_warn("%s, line %d: non-UTF-8 input \"%s\""
134 " -- ignoring this line",
135 VSTREAM_PATH(fp), lineno, STR(line_buffer));
136 continue;
137 }
138
139 /*
140 * Split on the first whitespace character, then trim leading and
141 * trailing whitespace from key and value.
142 */
143 for (value = STR(line_buffer); *value; value++) {
144 if (*value == '\\') {
145 if (*++value == 0)
146 break;
147 } else if (ISSPACE(*value)) {
148 if (!in_quotes)
149 break;
150 } else if (*value == '"') {
151 in_quotes = !in_quotes;
152 }
153 }
154 if (in_quotes) {
155 msg_warn("%s, line %d: unbalanced '\"' in '%s'"
156 " -- ignoring this line",
157 VSTREAM_PATH(fp), lineno, STR(line_buffer));
158 continue;
159 }
160 if (*value)
161 *value++ = 0;
162 while (ISSPACE(*value))
163 value++;
164 trimblanks(value, 0)[0] = 0;
165
166 /*
167 * Leave the key in quoted form, for consistency with postmap.c
168 * and dict_inline.c.
169 */
170 key = STR(line_buffer);
171
172 /*
173 * Enforce the "key whitespace value" format. Disallow missing
174 * keys or missing values.
175 */
176 if (*key == 0 || *value == 0) {
177 msg_warn("%s, line %d: expected format: key whitespace value"
178 " -- ignoring this line", path, lineno);
179 continue;
180 }
181 if (key[strlen(key) - 1] == ':')
182 msg_warn("%s, line %d: record is in \"key: value\" format;"
183 " is this an alias file?", path, lineno);
184
185 /*
186 * Optionally treat the value as a filename, and replace the value
187 * with the BASE64-encoded content of the named file.
188 */
189 if (dict_flags & DICT_FLAG_SRC_RHS_IS_FILE) {
190 VSTRING *base64_buf;
191 char *err;
192
193 if ((base64_buf = dict_file_to_b64(dict, value)) == 0) {
194 err = dict_file_get_error(dict);
195 msg_warn("%s, line %d: %s: skipping this entry",
196 VSTREAM_PATH(fp), lineno, err);
197 myfree(err);
198 continue;
199 }
200 value = vstring_str(base64_buf);
201 }
202
203 /*
204 * Store the value under the key. Handle duplicates
205 * appropriately. XXX Move this into dict_ht, but 1) that map
206 * ignores duplicates by default and we would have to check that
207 * we won't break existing code that depends on such behavior; 2)
208 * by inlining the checks here we can degrade gracefully instead
209 * of terminating with a fatal error. See comment in
210 * dict_inline.c.
211 */
212 if (dict->lookup(dict, key) != 0) {
213 if (dict_flags & DICT_FLAG_DUP_IGNORE) {
214 /* void */ ;
215 } else if (dict_flags & DICT_FLAG_DUP_REPLACE) {
216 dict->update(dict, key, value);
217 } else if (dict_flags & DICT_FLAG_DUP_WARN) {
218 msg_warn("%s, line %d: duplicate entry: \"%s\"",
219 path, lineno, key);
220 } else {
221 dict->close(dict);
222 DICT_THASH_OPEN_RETURN(dict_surrogate(DICT_TYPE_THASH, path,
223 open_flags, dict_flags,
224 "%s, line %d: duplicate entry: \"%s\"",
225 path, lineno, key));
226 }
227 } else {
228 dict->update(dict, key, value);
229 }
230 }
231
232 /*
233 * See if the source file is hot.
234 */
235 if (fstat(vstream_fileno(fp), &st) < 0)
236 msg_fatal("fstat %s: %m", path);
237 if (vstream_fclose(fp))
238 msg_fatal("read %s: %m", path);
239 fp = 0; /* DICT_THASH_OPEN_RETURN() */
240 after = time((time_t *) 0);
241 if (st.st_mtime < before - 1 || st.st_mtime > after)
242 break;
243
244 /*
245 * Yes, it is hot. Discard the result and read the file again.
246 */
247 dict->close(dict);
248 if (msg_verbose > 1)
249 msg_info("pausing to let file %s cool down", path);
250 doze(300000);
251 }
252
253 dict->owner.uid = st.st_uid;
254 dict->owner.status = (st.st_uid != 0);
255
256 DICT_THASH_OPEN_RETURN(DICT_DEBUG (dict));
257 }
258