1 /* $NetBSD: dict_utf8.c,v 1.3 2020/03/18 19:05:21 christos Exp $ */ 2 3 /*++ 4 /* NAME 5 /* dict_utf8 3 6 /* SUMMARY 7 /* dictionary UTF-8 helpers 8 /* SYNOPSIS 9 /* #include <dict.h> 10 /* 11 /* DICT *dict_utf8_activate( 12 /* DICT *dict) 13 /* DESCRIPTION 14 /* dict_utf8_activate() wraps a dictionary's lookup/update/delete 15 /* methods with code that enforces UTF-8 checks on keys and 16 /* values, and that logs a warning when incorrect UTF-8 is 17 /* encountered. The original dictionary handle becomes invalid. 18 /* 19 /* The wrapper code enforces a policy that maximizes application 20 /* robustness (it avoids the need for new error-handling code 21 /* paths in application code). Attempts to store non-UTF-8 22 /* keys or values are skipped while reporting a non-error 23 /* status, attempts to look up or delete non-UTF-8 keys are 24 /* skipped while reporting a non-error status, and lookup 25 /* results that contain a non-UTF-8 value are blocked while 26 /* reporting a configuration error. 27 /* BUGS 28 /* dict_utf8_activate() does not nest. 29 /* LICENSE 30 /* .ad 31 /* .fi 32 /* The Secure Mailer license must be distributed with this software. 33 /* AUTHOR(S) 34 /* Wietse Venema 35 /* IBM T.J. Watson Research 36 /* P.O. Box 704 37 /* Yorktown Heights, NY 10598, USA 38 /* 39 /* Wietse Venema 40 /* Google, Inc. 41 /* 111 8th Avenue 42 /* New York, NY 10011, USA 43 /*--*/ 44 45 /* 46 * System library. 47 */ 48 #include <sys_defs.h> 49 #include <string.h> 50 51 /* 52 * Utility library. 53 */ 54 #include <msg.h> 55 #include <stringops.h> 56 #include <dict.h> 57 #include <mymalloc.h> 58 #include <msg.h> 59 60 /* 61 * The goal is to maximize robustness: bad UTF-8 should not appear in keys, 62 * because those are derived from controlled inputs, and values should be 63 * printable before they are stored. But if we failed to check something 64 * then it should not result in fatal errors and thus open up the system for 65 * a denial-of-service attack. 66 * 67 * Proposed over-all policy: skip attempts to store invalid UTF-8 lookup keys 68 * or values. Rationale: some storage may not permit malformed UTF-8. This 69 * maximizes program robustness. If we get an invalid lookup result, report 70 * a configuration error. 71 * 72 * LOOKUP 73 * 74 * If the key is invalid, log a warning and skip the request. Rationale: the 75 * item cannot exist. 76 * 77 * If the lookup result is invalid, log a warning and return a configuration 78 * error. 79 * 80 * UPDATE 81 * 82 * If the key is invalid, then log a warning and skip the request. Rationale: 83 * the item cannot exist. 84 * 85 * If the value is invalid, log a warning and skip the request. Rationale: 86 * storage may not permit malformed UTF-8. This maximizes program 87 * robustness. 88 * 89 * DELETE 90 * 91 * If the key is invalid, then skip the request. Rationale: the item cannot 92 * exist. 93 */ 94 95 /* dict_utf8_check_fold - casefold or validate string */ 96 97 static char *dict_utf8_check_fold(DICT *dict, const char *string, 98 CONST_CHAR_STAR *err) 99 { 100 int fold_flag = (dict->flags & DICT_FLAG_FOLD_ANY); 101 102 /* 103 * Validate UTF-8 without casefolding. 104 */ 105 if (!allascii(string) && valid_utf8_string(string, strlen(string)) == 0) { 106 if (err) 107 *err = "malformed UTF-8 or invalid codepoint"; 108 return (0); 109 } 110 111 /* 112 * Casefold UTF-8. 113 */ 114 if (fold_flag != 0 115 && (fold_flag & ((dict->flags & DICT_FLAG_FIXED) ? 116 DICT_FLAG_FOLD_FIX : DICT_FLAG_FOLD_MUL))) { 117 if (dict->fold_buf == 0) 118 dict->fold_buf = vstring_alloc(10); 119 return (casefold(dict->fold_buf, string)); 120 } 121 return ((char *) string); 122 } 123 124 /* dict_utf8_check validate UTF-8 string */ 125 126 static int dict_utf8_check(const char *string, CONST_CHAR_STAR *err) 127 { 128 if (!allascii(string) && valid_utf8_string(string, strlen(string)) == 0) { 129 if (err) 130 *err = "malformed UTF-8 or invalid codepoint"; 131 return (0); 132 } 133 return (1); 134 } 135 136 /* dict_utf8_lookup - UTF-8 lookup method wrapper */ 137 138 static const char *dict_utf8_lookup(DICT *dict, const char *key) 139 { 140 DICT_UTF8_BACKUP *backup; 141 const char *utf8_err; 142 const char *fold_res; 143 const char *value; 144 int saved_flags; 145 146 /* 147 * Validate and optionally fold the key, and if invalid skip the request. 148 */ 149 if ((fold_res = dict_utf8_check_fold(dict, key, &utf8_err)) == 0) { 150 msg_warn("%s:%s: non-UTF-8 key \"%s\": %s", 151 dict->type, dict->name, key, utf8_err); 152 dict->error = DICT_ERR_NONE; 153 return (0); 154 } 155 156 /* 157 * Proxy the request with casefolding turned off. 158 */ 159 saved_flags = (dict->flags & DICT_FLAG_FOLD_ANY); 160 dict->flags &= ~DICT_FLAG_FOLD_ANY; 161 backup = dict->utf8_backup; 162 value = backup->lookup(dict, fold_res); 163 dict->flags |= saved_flags; 164 165 /* 166 * Validate the result, and if invalid fail the request. 167 */ 168 if (value != 0 && dict_utf8_check(value, &utf8_err) == 0) { 169 msg_warn("%s:%s: key \"%s\": non-UTF-8 value \"%s\": %s", 170 dict->type, dict->name, key, value, utf8_err); 171 dict->error = DICT_ERR_CONFIG; 172 return (0); 173 } else { 174 return (value); 175 } 176 } 177 178 /* dict_utf8_update - UTF-8 update method wrapper */ 179 180 static int dict_utf8_update(DICT *dict, const char *key, const char *value) 181 { 182 DICT_UTF8_BACKUP *backup; 183 const char *utf8_err; 184 const char *fold_res; 185 int saved_flags; 186 int status; 187 188 /* 189 * Validate or fold the key, and if invalid skip the request. 190 */ 191 if ((fold_res = dict_utf8_check_fold(dict, key, &utf8_err)) == 0) { 192 msg_warn("%s:%s: non-UTF-8 key \"%s\": %s", 193 dict->type, dict->name, key, utf8_err); 194 dict->error = DICT_ERR_NONE; 195 return (DICT_STAT_SUCCESS); 196 } 197 198 /* 199 * Validate the value, and if invalid skip the request. 200 */ 201 else if (dict_utf8_check(value, &utf8_err) == 0) { 202 msg_warn("%s:%s: key \"%s\": non-UTF-8 value \"%s\": %s", 203 dict->type, dict->name, key, value, utf8_err); 204 dict->error = DICT_ERR_NONE; 205 return (DICT_STAT_SUCCESS); 206 } 207 208 /* 209 * Proxy the request with casefolding turned off. 210 */ 211 else { 212 saved_flags = (dict->flags & DICT_FLAG_FOLD_ANY); 213 dict->flags &= ~DICT_FLAG_FOLD_ANY; 214 backup = dict->utf8_backup; 215 status = backup->update(dict, fold_res, value); 216 dict->flags |= saved_flags; 217 return (status); 218 } 219 } 220 221 /* dict_utf8_delete - UTF-8 delete method wrapper */ 222 223 static int dict_utf8_delete(DICT *dict, const char *key) 224 { 225 DICT_UTF8_BACKUP *backup; 226 const char *utf8_err; 227 const char *fold_res; 228 int saved_flags; 229 int status; 230 231 /* 232 * Validate and optionally fold the key, and if invalid skip the request. 233 */ 234 if ((fold_res = dict_utf8_check_fold(dict, key, &utf8_err)) == 0) { 235 msg_warn("%s:%s: non-UTF-8 key \"%s\": %s", 236 dict->type, dict->name, key, utf8_err); 237 dict->error = DICT_ERR_NONE; 238 return (DICT_STAT_SUCCESS); 239 } 240 241 /* 242 * Proxy the request with casefolding turned off. 243 */ 244 else { 245 saved_flags = (dict->flags & DICT_FLAG_FOLD_ANY); 246 dict->flags &= ~DICT_FLAG_FOLD_ANY; 247 backup = dict->utf8_backup; 248 status = backup->delete(dict, fold_res); 249 dict->flags |= saved_flags; 250 return (status); 251 } 252 } 253 254 /* dict_utf8_activate - wrap a legacy dict object for UTF-8 processing */ 255 256 DICT *dict_utf8_activate(DICT *dict) 257 { 258 const char myname[] = "dict_utf8_activate"; 259 DICT_UTF8_BACKUP *backup; 260 261 /* 262 * Sanity check. 263 */ 264 if (util_utf8_enable == 0) 265 msg_panic("%s: Unicode support is not available", myname); 266 if ((dict->flags & DICT_FLAG_UTF8_REQUEST) == 0) 267 msg_panic("%s: %s:%s does not request Unicode support", 268 myname, dict->type, dict->name); 269 if ((dict->flags & DICT_FLAG_UTF8_ACTIVE) || dict->utf8_backup != 0) 270 msg_panic("%s: %s:%s Unicode support is already activated", 271 myname, dict->type, dict->name); 272 273 /* 274 * Unlike dict_debug(3) we do not put a proxy dict object in front of the 275 * encapsulated object, because then we would have to bidirectionally 276 * propagate changes in the data members (errors, flags, jbuf, and so on) 277 * between proxy object and encapsulated object. 278 * 279 * Instead we attach ourselves behind the encapsulated dict object, and 280 * redirect some function pointers to ourselves. 281 */ 282 backup = dict->utf8_backup = (DICT_UTF8_BACKUP *) mymalloc(sizeof(*backup)); 283 284 /* 285 * Interpose on the lookup/update/delete methods. It is a conscious 286 * decision not to tinker with the iterator or destructor. 287 */ 288 backup->lookup = dict->lookup; 289 backup->update = dict->update; 290 backup->delete = dict->delete; 291 292 dict->lookup = dict_utf8_lookup; 293 dict->update = dict_utf8_update; 294 dict->delete = dict_utf8_delete; 295 296 /* 297 * Leave our mark. See sanity check above. 298 */ 299 dict->flags |= DICT_FLAG_UTF8_ACTIVE; 300 301 return (dict); 302 } 303