1 /* $NetBSD: dict_thash.c,v 1.3 2020/03/18 19:05:21 christos Exp $ */ 2 3 /*++ 4 /* NAME 5 /* dict_thash 3 6 /* SUMMARY 7 /* dictionary manager interface to hashed flat text files 8 /* SYNOPSIS 9 /* #include <dict_thash.h> 10 /* 11 /* DICT *dict_thash_open(path, open_flags, dict_flags) 12 /* const char *name; 13 /* const char *path; 14 /* int open_flags; 15 /* int dict_flags; 16 /* DESCRIPTION 17 /* dict_thash_open() opens the named flat text file, creates 18 /* an in-memory hash table, and makes it available via the 19 /* generic interface described in dict_open(3). The input 20 /* format is as with postmap(1). 21 /* DIAGNOSTICS 22 /* Fatal errors: cannot open file, out of memory. 23 /* SEE ALSO 24 /* dict(3) generic dictionary manager 25 /* LICENSE 26 /* .ad 27 /* .fi 28 /* The Secure Mailer license must be distributed with this software. 29 /* AUTHOR(S) 30 /* Wietse Venema 31 /* IBM T.J. Watson Research 32 /* P.O. Box 704 33 /* Yorktown Heights, NY 10598, USA 34 /* 35 /* Wietse Venema 36 /* Google, Inc. 37 /* 111 8th Avenue 38 /* New York, NY 10011, USA 39 /*--*/ 40 41 /* System library. */ 42 43 #include <sys_defs.h> 44 #include <sys/stat.h> 45 #include <ctype.h> 46 #include <string.h> 47 48 /* Utility library. */ 49 50 #include <msg.h> 51 #include <iostuff.h> 52 #include <vstring.h> 53 #include <stringops.h> 54 #include <readlline.h> 55 #include <dict.h> 56 #include <dict_ht.h> 57 #include <dict_thash.h> 58 59 /* Application-specific. */ 60 61 #define STR vstring_str 62 #define LEN VSTRING_LEN 63 64 /* dict_thash_open - open flat text data base */ 65 66 DICT *dict_thash_open(const char *path, int open_flags, int dict_flags) 67 { 68 DICT *dict; 69 VSTREAM *fp = 0; /* DICT_THASH_OPEN_RETURN() */ 70 struct stat st; 71 time_t before; 72 time_t after; 73 VSTRING *line_buffer = 0; /* DICT_THASH_OPEN_RETURN() */ 74 int lineno; 75 int last_line; 76 char *key; 77 char *value; 78 79 /* 80 * Let the optimizer worry about eliminating redundant code. 81 */ 82 #define DICT_THASH_OPEN_RETURN(d) do { \ 83 DICT *__d = (d); \ 84 if (fp != 0) \ 85 vstream_fclose(fp); \ 86 if (line_buffer != 0) \ 87 vstring_free(line_buffer); \ 88 return (__d); \ 89 } while (0) 90 91 /* 92 * Sanity checks. 93 */ 94 if (open_flags != O_RDONLY) 95 DICT_THASH_OPEN_RETURN(dict_surrogate(DICT_TYPE_THASH, path, 96 open_flags, dict_flags, 97 "%s:%s map requires O_RDONLY access mode", 98 DICT_TYPE_THASH, path)); 99 100 /* 101 * Read the flat text file into in-memory hash. Read the file again if it 102 * may have changed while we were reading. 103 */ 104 for (before = time((time_t *) 0); /* see below */ ; before = after) { 105 if ((fp = vstream_fopen(path, open_flags, 0644)) == 0) { 106 DICT_THASH_OPEN_RETURN(dict_surrogate(DICT_TYPE_THASH, path, 107 open_flags, dict_flags, 108 "open database %s: %m", path)); 109 } 110 111 /* 112 * Reuse the "internal" dictionary type. 113 */ 114 dict = dict_open3(DICT_TYPE_HT, path, open_flags, dict_flags); 115 dict_type_override(dict, DICT_TYPE_THASH); 116 117 /* 118 * XXX This duplicates the parser in postmap.c. 119 */ 120 if (line_buffer == 0) 121 line_buffer = vstring_alloc(100); 122 last_line = 0; 123 while (readllines(line_buffer, fp, &last_line, &lineno)) { 124 int in_quotes = 0; 125 126 /* 127 * First some UTF-8 checks sans casefolding. 128 */ 129 if ((dict->flags & DICT_FLAG_UTF8_ACTIVE) 130 && allascii(STR(line_buffer)) == 0 131 && valid_utf8_string(STR(line_buffer), LEN(line_buffer)) == 0) { 132 msg_warn("%s, line %d: non-UTF-8 input \"%s\"" 133 " -- ignoring this line", 134 VSTREAM_PATH(fp), lineno, STR(line_buffer)); 135 continue; 136 } 137 138 /* 139 * Split on the first whitespace character, then trim leading and 140 * trailing whitespace from key and value. 141 */ 142 for (value = STR(line_buffer); *value; value++) { 143 if (*value == '\\') { 144 if (*++value == 0) 145 break; 146 } else if (ISSPACE(*value)) { 147 if (!in_quotes) 148 break; 149 } else if (*value == '"') { 150 in_quotes = !in_quotes; 151 } 152 } 153 if (in_quotes) { 154 msg_warn("%s, line %d: unbalanced '\"' in '%s'" 155 " -- ignoring this line", 156 VSTREAM_PATH(fp), lineno, STR(line_buffer)); 157 continue; 158 } 159 if (*value) 160 *value++ = 0; 161 while (ISSPACE(*value)) 162 value++; 163 trimblanks(value, 0)[0] = 0; 164 165 /* 166 * Leave the key in quoted form, for consistency with postmap.c 167 * and dict_inline.c. 168 */ 169 key = STR(line_buffer); 170 171 /* 172 * Enforce the "key whitespace value" format. Disallow missing 173 * keys or missing values. 174 */ 175 if (*key == 0 || *value == 0) { 176 msg_warn("%s, line %d: expected format: key whitespace value" 177 " -- ignoring this line", path, lineno); 178 continue; 179 } 180 if (key[strlen(key) - 1] == ':') 181 msg_warn("%s, line %d: record is in \"key: value\" format;" 182 " is this an alias file?", path, lineno); 183 184 /* 185 * Store the value under the key. Handle duplicates 186 * appropriately. XXX Move this into dict_ht, but 1) that map 187 * ignores duplicates by default and we would have to check that 188 * we won't break existing code that depends on such benavior; 2) 189 * by inlining the checks here we can degrade gracefully instead 190 * of terminating with a fatal error. See comment in 191 * dict_inline.c. 192 */ 193 if (dict->lookup(dict, key) != 0) { 194 if (dict_flags & DICT_FLAG_DUP_IGNORE) { 195 /* void */ ; 196 } else if (dict_flags & DICT_FLAG_DUP_REPLACE) { 197 dict->update(dict, key, value); 198 } else if (dict_flags & DICT_FLAG_DUP_WARN) { 199 msg_warn("%s, line %d: duplicate entry: \"%s\"", 200 path, lineno, key); 201 } else { 202 dict->close(dict); 203 DICT_THASH_OPEN_RETURN(dict_surrogate(DICT_TYPE_THASH, path, 204 open_flags, dict_flags, 205 "%s, line %d: duplicate entry: \"%s\"", 206 path, lineno, key)); 207 } 208 } else { 209 dict->update(dict, key, value); 210 } 211 } 212 213 /* 214 * See if the source file is hot. 215 */ 216 if (fstat(vstream_fileno(fp), &st) < 0) 217 msg_fatal("fstat %s: %m", path); 218 if (vstream_fclose(fp)) 219 msg_fatal("read %s: %m", path); 220 fp = 0; /* DICT_THASH_OPEN_RETURN() */ 221 after = time((time_t *) 0); 222 if (st.st_mtime < before - 1 || st.st_mtime > after) 223 break; 224 225 /* 226 * Yes, it is hot. Discard the result and read the file again. 227 */ 228 dict->close(dict); 229 if (msg_verbose > 1) 230 msg_info("pausing to let file %s cool down", path); 231 doze(300000); 232 } 233 234 dict->owner.uid = st.st_uid; 235 dict->owner.status = (st.st_uid != 0); 236 237 DICT_THASH_OPEN_RETURN(DICT_DEBUG (dict)); 238 } 239