xref: /netbsd-src/external/ibm-public/postfix/dist/src/util/dict_thash.c (revision 87d689fb734c654d2486f87f7be32f1b53ecdbec)
1 /*	$NetBSD: dict_thash.c,v 1.2 2017/02/14 01:16:49 christos Exp $	*/
2 
3 /*++
4 /* NAME
5 /*	dict_thash 3
6 /* SUMMARY
7 /*	dictionary manager interface to hashed flat text files
8 /* SYNOPSIS
9 /*	#include <dict_thash.h>
10 /*
11 /*	DICT	*dict_thash_open(path, open_flags, dict_flags)
12 /*	const char *name;
13 /*	const char *path;
14 /*	int	open_flags;
15 /*	int	dict_flags;
16 /* DESCRIPTION
17 /*	dict_thash_open() opens the named flat text file, creates
18 /*	an in-memory hash table, and makes it available via the
19 /*	generic interface described in dict_open(3). The input
20 /*	format is as with postmap(1).
21 /* DIAGNOSTICS
22 /*	Fatal errors: cannot open file, out of memory.
23 /* SEE ALSO
24 /*	dict(3) generic dictionary manager
25 /* LICENSE
26 /* .ad
27 /* .fi
28 /*	The Secure Mailer license must be distributed with this software.
29 /* AUTHOR(S)
30 /*	Wietse Venema
31 /*	IBM T.J. Watson Research
32 /*	P.O. Box 704
33 /*	Yorktown Heights, NY 10598, USA
34 /*--*/
35 
36 /* System library. */
37 
38 #include <sys_defs.h>
39 #include <sys/stat.h>
40 #include <ctype.h>
41 #include <string.h>
42 
43 /* Utility library. */
44 
45 #include <msg.h>
46 #include <iostuff.h>
47 #include <vstring.h>
48 #include <stringops.h>
49 #include <readlline.h>
50 #include <dict.h>
51 #include <dict_ht.h>
52 #include <dict_thash.h>
53 
54 /* Application-specific. */
55 
56 #define STR	vstring_str
57 #define LEN	VSTRING_LEN
58 
59 /* dict_thash_open - open flat text data base */
60 
61 DICT   *dict_thash_open(const char *path, int open_flags, int dict_flags)
62 {
63     DICT   *dict;
64     VSTREAM *fp = 0;			/* DICT_THASH_OPEN_RETURN() */
65     struct stat st;
66     time_t  before;
67     time_t  after;
68     VSTRING *line_buffer = 0;		/* DICT_THASH_OPEN_RETURN() */
69     int     lineno;
70     int     last_line;
71     char   *key;
72     char   *value;
73 
74     /*
75      * Let the optimizer worry about eliminating redundant code.
76      */
77 #define DICT_THASH_OPEN_RETURN(d) do { \
78 	DICT *__d = (d); \
79 	if (fp != 0) \
80 	    vstream_fclose(fp); \
81 	if (line_buffer != 0) \
82 	    vstring_free(line_buffer); \
83 	return (__d); \
84     } while (0)
85 
86     /*
87      * Sanity checks.
88      */
89     if (open_flags != O_RDONLY)
90 	DICT_THASH_OPEN_RETURN(dict_surrogate(DICT_TYPE_THASH, path,
91 					      open_flags, dict_flags,
92 				  "%s:%s map requires O_RDONLY access mode",
93 					      DICT_TYPE_THASH, path));
94 
95     /*
96      * Read the flat text file into in-memory hash. Read the file again if it
97      * may have changed while we were reading.
98      */
99     for (before = time((time_t *) 0); /* see below */ ; before = after) {
100 	if ((fp = vstream_fopen(path, open_flags, 0644)) == 0) {
101 	    DICT_THASH_OPEN_RETURN(dict_surrogate(DICT_TYPE_THASH, path,
102 						  open_flags, dict_flags,
103 					     "open database %s: %m", path));
104 	    }
105 
106 	/*
107 	 * Reuse the "internal" dictionary type.
108 	 */
109 	dict = dict_open3(DICT_TYPE_HT, path, open_flags, dict_flags);
110 	dict_type_override(dict, DICT_TYPE_THASH);
111 
112 	if (line_buffer == 0)
113 	    line_buffer = vstring_alloc(100);
114 	last_line = 0;
115 	while (readllines(line_buffer, fp, &last_line, &lineno)) {
116 
117 	    /*
118 	     * First some UTF-8 checks sans casefolding.
119 	     */
120 	    if ((dict->flags & DICT_FLAG_UTF8_ACTIVE)
121 		&& allascii(STR(line_buffer)) == 0
122 		&& valid_utf8_string(STR(line_buffer), LEN(line_buffer)) == 0) {
123 		msg_warn("%s, line %d: non-UTF-8 input \"%s\""
124 			 " -- ignoring this line",
125 			 VSTREAM_PATH(fp), lineno, STR(line_buffer));
126 		continue;
127 	    }
128 
129 	    /*
130 	     * Split on the first whitespace character, then trim leading and
131 	     * trailing whitespace from key and value.
132 	     */
133 	    key = STR(line_buffer);
134 	    value = key + strcspn(key, CHARS_SPACE);
135 	    if (*value)
136 		*value++ = 0;
137 	    while (ISSPACE(*value))
138 		value++;
139 	    trimblanks(key, 0)[0] = 0;
140 	    trimblanks(value, 0)[0] = 0;
141 
142 	    /*
143 	     * Enforce the "key whitespace value" format. Disallow missing
144 	     * keys or missing values.
145 	     */
146 	    if (*key == 0 || *value == 0) {
147 		msg_warn("%s, line %d: expected format: key whitespace value"
148 			 " -- ignoring this line", path, lineno);
149 		continue;
150 	    }
151 	    if (key[strlen(key) - 1] == ':')
152 		msg_warn("%s, line %d: record is in \"key: value\" format;"
153 			 " is this an alias file?", path, lineno);
154 
155 	    /*
156 	     * Store the value under the key. Handle duplicates
157 	     * appropriately. XXX Move this into dict_ht, but 1) that map
158 	     * ignores duplicates by default and we would have to check that
159 	     * we won't break existing code that depends on such benavior; 2)
160 	     * by inlining the checks here we can degrade gracefully instead
161 	     * of terminating with a fatal error. See comment in dict_inline.c.
162 	     */
163 	    if (dict->lookup(dict, key) != 0) {
164 		if (dict_flags & DICT_FLAG_DUP_IGNORE) {
165 		     /* void */ ;
166 		} else if (dict_flags & DICT_FLAG_DUP_REPLACE) {
167 		    dict->update(dict, key, value);
168 		} else if (dict_flags & DICT_FLAG_DUP_WARN) {
169 		    msg_warn("%s, line %d: duplicate entry: \"%s\"",
170 			     path, lineno, key);
171 		} else {
172 		    dict->close(dict);
173 		    DICT_THASH_OPEN_RETURN(dict_surrogate(DICT_TYPE_THASH, path,
174 						     open_flags, dict_flags,
175 				     "%s, line %d: duplicate entry: \"%s\"",
176 							path, lineno, key));
177 		}
178 	    } else {
179 		dict->update(dict, key, value);
180 	    }
181 	}
182 
183 	/*
184 	 * See if the source file is hot.
185 	 */
186 	if (fstat(vstream_fileno(fp), &st) < 0)
187 	    msg_fatal("fstat %s: %m", path);
188 	if (vstream_fclose(fp))
189 	    msg_fatal("read %s: %m", path);
190 	fp = 0;					/* DICT_THASH_OPEN_RETURN() */
191 	after = time((time_t *) 0);
192 	if (st.st_mtime < before - 1 || st.st_mtime > after)
193 	    break;
194 
195 	/*
196 	 * Yes, it is hot. Discard the result and read the file again.
197 	 */
198 	dict->close(dict);
199 	if (msg_verbose > 1)
200 	    msg_info("pausing to let file %s cool down", path);
201 	doze(300000);
202     }
203 
204     dict->owner.uid = st.st_uid;
205     dict->owner.status = (st.st_uid != 0);
206 
207     DICT_THASH_OPEN_RETURN(DICT_DEBUG (dict));
208 }
209