1 /* $NetBSD: dict_thash.c,v 1.4 2022/10/08 16:12:50 christos Exp $ */ 2 3 /*++ 4 /* NAME 5 /* dict_thash 3 6 /* SUMMARY 7 /* dictionary manager interface to hashed flat text files 8 /* SYNOPSIS 9 /* #include <dict_thash.h> 10 /* 11 /* DICT *dict_thash_open(path, open_flags, dict_flags) 12 /* const char *name; 13 /* const char *path; 14 /* int open_flags; 15 /* int dict_flags; 16 /* DESCRIPTION 17 /* dict_thash_open() opens the named flat text file, creates 18 /* an in-memory hash table, and makes it available via the 19 /* generic interface described in dict_open(3). The input 20 /* format is as with postmap(1). 21 /* DIAGNOSTICS 22 /* Fatal errors: cannot open file, out of memory. 23 /* SEE ALSO 24 /* dict(3) generic dictionary manager 25 /* LICENSE 26 /* .ad 27 /* .fi 28 /* The Secure Mailer license must be distributed with this software. 29 /* AUTHOR(S) 30 /* Wietse Venema 31 /* IBM T.J. Watson Research 32 /* P.O. Box 704 33 /* Yorktown Heights, NY 10598, USA 34 /* 35 /* Wietse Venema 36 /* Google, Inc. 37 /* 111 8th Avenue 38 /* New York, NY 10011, USA 39 /*--*/ 40 41 /* System library. */ 42 43 #include <sys_defs.h> 44 #include <sys/stat.h> 45 #include <ctype.h> 46 #include <string.h> 47 48 /* Utility library. */ 49 50 #include <msg.h> 51 #include <mymalloc.h> 52 #include <iostuff.h> 53 #include <vstring.h> 54 #include <stringops.h> 55 #include <readlline.h> 56 #include <dict.h> 57 #include <dict_ht.h> 58 #include <dict_thash.h> 59 60 /* Application-specific. */ 61 62 #define STR vstring_str 63 #define LEN VSTRING_LEN 64 65 /* dict_thash_open - open flat text data base */ 66 67 DICT *dict_thash_open(const char *path, int open_flags, int dict_flags) 68 { 69 DICT *dict; 70 VSTREAM *fp = 0; /* DICT_THASH_OPEN_RETURN() */ 71 struct stat st; 72 time_t before; 73 time_t after; 74 VSTRING *line_buffer = 0; /* DICT_THASH_OPEN_RETURN() */ 75 int lineno; 76 int last_line; 77 char *key; 78 char *value; 79 80 /* 81 * Let the optimizer worry about eliminating redundant code. 82 */ 83 #define DICT_THASH_OPEN_RETURN(d) do { \ 84 DICT *__d = (d); \ 85 if (fp != 0) \ 86 vstream_fclose(fp); \ 87 if (line_buffer != 0) \ 88 vstring_free(line_buffer); \ 89 return (__d); \ 90 } while (0) 91 92 /* 93 * Sanity checks. 94 */ 95 if (open_flags != O_RDONLY) 96 DICT_THASH_OPEN_RETURN(dict_surrogate(DICT_TYPE_THASH, path, 97 open_flags, dict_flags, 98 "%s:%s map requires O_RDONLY access mode", 99 DICT_TYPE_THASH, path)); 100 101 /* 102 * Read the flat text file into in-memory hash. Read the file again if it 103 * may have changed while we were reading. 104 */ 105 for (before = time((time_t *) 0); /* see below */ ; before = after) { 106 if ((fp = vstream_fopen(path, open_flags, 0644)) == 0) { 107 DICT_THASH_OPEN_RETURN(dict_surrogate(DICT_TYPE_THASH, path, 108 open_flags, dict_flags, 109 "open database %s: %m", path)); 110 } 111 112 /* 113 * Reuse the "internal" dictionary type. 114 */ 115 dict = dict_open3(DICT_TYPE_HT, path, open_flags, dict_flags); 116 dict_type_override(dict, DICT_TYPE_THASH); 117 118 /* 119 * XXX This duplicates the parser in postmap.c. 120 */ 121 if (line_buffer == 0) 122 line_buffer = vstring_alloc(100); 123 last_line = 0; 124 while (readllines(line_buffer, fp, &last_line, &lineno)) { 125 int in_quotes = 0; 126 127 /* 128 * First some UTF-8 checks sans casefolding. 129 */ 130 if ((dict->flags & DICT_FLAG_UTF8_ACTIVE) 131 && allascii(STR(line_buffer)) == 0 132 && valid_utf8_string(STR(line_buffer), LEN(line_buffer)) == 0) { 133 msg_warn("%s, line %d: non-UTF-8 input \"%s\"" 134 " -- ignoring this line", 135 VSTREAM_PATH(fp), lineno, STR(line_buffer)); 136 continue; 137 } 138 139 /* 140 * Split on the first whitespace character, then trim leading and 141 * trailing whitespace from key and value. 142 */ 143 for (value = STR(line_buffer); *value; value++) { 144 if (*value == '\\') { 145 if (*++value == 0) 146 break; 147 } else if (ISSPACE(*value)) { 148 if (!in_quotes) 149 break; 150 } else if (*value == '"') { 151 in_quotes = !in_quotes; 152 } 153 } 154 if (in_quotes) { 155 msg_warn("%s, line %d: unbalanced '\"' in '%s'" 156 " -- ignoring this line", 157 VSTREAM_PATH(fp), lineno, STR(line_buffer)); 158 continue; 159 } 160 if (*value) 161 *value++ = 0; 162 while (ISSPACE(*value)) 163 value++; 164 trimblanks(value, 0)[0] = 0; 165 166 /* 167 * Leave the key in quoted form, for consistency with postmap.c 168 * and dict_inline.c. 169 */ 170 key = STR(line_buffer); 171 172 /* 173 * Enforce the "key whitespace value" format. Disallow missing 174 * keys or missing values. 175 */ 176 if (*key == 0 || *value == 0) { 177 msg_warn("%s, line %d: expected format: key whitespace value" 178 " -- ignoring this line", path, lineno); 179 continue; 180 } 181 if (key[strlen(key) - 1] == ':') 182 msg_warn("%s, line %d: record is in \"key: value\" format;" 183 " is this an alias file?", path, lineno); 184 185 /* 186 * Optionally treat the value as a filename, and replace the value 187 * with the BASE64-encoded content of the named file. 188 */ 189 if (dict_flags & DICT_FLAG_SRC_RHS_IS_FILE) { 190 VSTRING *base64_buf; 191 char *err; 192 193 if ((base64_buf = dict_file_to_b64(dict, value)) == 0) { 194 err = dict_file_get_error(dict); 195 msg_warn("%s, line %d: %s: skipping this entry", 196 VSTREAM_PATH(fp), lineno, err); 197 myfree(err); 198 continue; 199 } 200 value = vstring_str(base64_buf); 201 } 202 203 /* 204 * Store the value under the key. Handle duplicates 205 * appropriately. XXX Move this into dict_ht, but 1) that map 206 * ignores duplicates by default and we would have to check that 207 * we won't break existing code that depends on such behavior; 2) 208 * by inlining the checks here we can degrade gracefully instead 209 * of terminating with a fatal error. See comment in 210 * dict_inline.c. 211 */ 212 if (dict->lookup(dict, key) != 0) { 213 if (dict_flags & DICT_FLAG_DUP_IGNORE) { 214 /* void */ ; 215 } else if (dict_flags & DICT_FLAG_DUP_REPLACE) { 216 dict->update(dict, key, value); 217 } else if (dict_flags & DICT_FLAG_DUP_WARN) { 218 msg_warn("%s, line %d: duplicate entry: \"%s\"", 219 path, lineno, key); 220 } else { 221 dict->close(dict); 222 DICT_THASH_OPEN_RETURN(dict_surrogate(DICT_TYPE_THASH, path, 223 open_flags, dict_flags, 224 "%s, line %d: duplicate entry: \"%s\"", 225 path, lineno, key)); 226 } 227 } else { 228 dict->update(dict, key, value); 229 } 230 } 231 232 /* 233 * See if the source file is hot. 234 */ 235 if (fstat(vstream_fileno(fp), &st) < 0) 236 msg_fatal("fstat %s: %m", path); 237 if (vstream_fclose(fp)) 238 msg_fatal("read %s: %m", path); 239 fp = 0; /* DICT_THASH_OPEN_RETURN() */ 240 after = time((time_t *) 0); 241 if (st.st_mtime < before - 1 || st.st_mtime > after) 242 break; 243 244 /* 245 * Yes, it is hot. Discard the result and read the file again. 246 */ 247 dict->close(dict); 248 if (msg_verbose > 1) 249 msg_info("pausing to let file %s cool down", path); 250 doze(300000); 251 } 252 253 dict->owner.uid = st.st_uid; 254 dict->owner.status = (st.st_uid != 0); 255 256 DICT_THASH_OPEN_RETURN(DICT_DEBUG (dict)); 257 } 258