1 /* $NetBSD: dict_utf8.c,v 1.3 2020/03/18 19:05:21 christos Exp $ */
2
3 /*++
4 /* NAME
5 /* dict_utf8 3
6 /* SUMMARY
7 /* dictionary UTF-8 helpers
8 /* SYNOPSIS
9 /* #include <dict.h>
10 /*
11 /* DICT *dict_utf8_activate(
12 /* DICT *dict)
13 /* DESCRIPTION
14 /* dict_utf8_activate() wraps a dictionary's lookup/update/delete
15 /* methods with code that enforces UTF-8 checks on keys and
16 /* values, and that logs a warning when incorrect UTF-8 is
17 /* encountered. The original dictionary handle becomes invalid.
18 /*
19 /* The wrapper code enforces a policy that maximizes application
20 /* robustness (it avoids the need for new error-handling code
21 /* paths in application code). Attempts to store non-UTF-8
22 /* keys or values are skipped while reporting a non-error
23 /* status, attempts to look up or delete non-UTF-8 keys are
24 /* skipped while reporting a non-error status, and lookup
25 /* results that contain a non-UTF-8 value are blocked while
26 /* reporting a configuration error.
27 /* BUGS
28 /* dict_utf8_activate() does not nest.
29 /* LICENSE
30 /* .ad
31 /* .fi
32 /* The Secure Mailer license must be distributed with this software.
33 /* AUTHOR(S)
34 /* Wietse Venema
35 /* IBM T.J. Watson Research
36 /* P.O. Box 704
37 /* Yorktown Heights, NY 10598, USA
38 /*
39 /* Wietse Venema
40 /* Google, Inc.
41 /* 111 8th Avenue
42 /* New York, NY 10011, USA
43 /*--*/
44
45 /*
46 * System library.
47 */
48 #include <sys_defs.h>
49 #include <string.h>
50
51 /*
52 * Utility library.
53 */
54 #include <msg.h>
55 #include <stringops.h>
56 #include <dict.h>
57 #include <mymalloc.h>
58 #include <msg.h>
59
60 /*
61 * The goal is to maximize robustness: bad UTF-8 should not appear in keys,
62 * because those are derived from controlled inputs, and values should be
63 * printable before they are stored. But if we failed to check something
64 * then it should not result in fatal errors and thus open up the system for
65 * a denial-of-service attack.
66 *
67 * Proposed over-all policy: skip attempts to store invalid UTF-8 lookup keys
68 * or values. Rationale: some storage may not permit malformed UTF-8. This
69 * maximizes program robustness. If we get an invalid lookup result, report
70 * a configuration error.
71 *
72 * LOOKUP
73 *
74 * If the key is invalid, log a warning and skip the request. Rationale: the
75 * item cannot exist.
76 *
77 * If the lookup result is invalid, log a warning and return a configuration
78 * error.
79 *
80 * UPDATE
81 *
82 * If the key is invalid, then log a warning and skip the request. Rationale:
83 * the item cannot exist.
84 *
85 * If the value is invalid, log a warning and skip the request. Rationale:
86 * storage may not permit malformed UTF-8. This maximizes program
87 * robustness.
88 *
89 * DELETE
90 *
91 * If the key is invalid, then skip the request. Rationale: the item cannot
92 * exist.
93 */
94
95 /* dict_utf8_check_fold - casefold or validate string */
96
dict_utf8_check_fold(DICT * dict,const char * string,CONST_CHAR_STAR * err)97 static char *dict_utf8_check_fold(DICT *dict, const char *string,
98 CONST_CHAR_STAR *err)
99 {
100 int fold_flag = (dict->flags & DICT_FLAG_FOLD_ANY);
101
102 /*
103 * Validate UTF-8 without casefolding.
104 */
105 if (!allascii(string) && valid_utf8_string(string, strlen(string)) == 0) {
106 if (err)
107 *err = "malformed UTF-8 or invalid codepoint";
108 return (0);
109 }
110
111 /*
112 * Casefold UTF-8.
113 */
114 if (fold_flag != 0
115 && (fold_flag & ((dict->flags & DICT_FLAG_FIXED) ?
116 DICT_FLAG_FOLD_FIX : DICT_FLAG_FOLD_MUL))) {
117 if (dict->fold_buf == 0)
118 dict->fold_buf = vstring_alloc(10);
119 return (casefold(dict->fold_buf, string));
120 }
121 return ((char *) string);
122 }
123
124 /* dict_utf8_check validate UTF-8 string */
125
dict_utf8_check(const char * string,CONST_CHAR_STAR * err)126 static int dict_utf8_check(const char *string, CONST_CHAR_STAR *err)
127 {
128 if (!allascii(string) && valid_utf8_string(string, strlen(string)) == 0) {
129 if (err)
130 *err = "malformed UTF-8 or invalid codepoint";
131 return (0);
132 }
133 return (1);
134 }
135
136 /* dict_utf8_lookup - UTF-8 lookup method wrapper */
137
dict_utf8_lookup(DICT * dict,const char * key)138 static const char *dict_utf8_lookup(DICT *dict, const char *key)
139 {
140 DICT_UTF8_BACKUP *backup;
141 const char *utf8_err;
142 const char *fold_res;
143 const char *value;
144 int saved_flags;
145
146 /*
147 * Validate and optionally fold the key, and if invalid skip the request.
148 */
149 if ((fold_res = dict_utf8_check_fold(dict, key, &utf8_err)) == 0) {
150 msg_warn("%s:%s: non-UTF-8 key \"%s\": %s",
151 dict->type, dict->name, key, utf8_err);
152 dict->error = DICT_ERR_NONE;
153 return (0);
154 }
155
156 /*
157 * Proxy the request with casefolding turned off.
158 */
159 saved_flags = (dict->flags & DICT_FLAG_FOLD_ANY);
160 dict->flags &= ~DICT_FLAG_FOLD_ANY;
161 backup = dict->utf8_backup;
162 value = backup->lookup(dict, fold_res);
163 dict->flags |= saved_flags;
164
165 /*
166 * Validate the result, and if invalid fail the request.
167 */
168 if (value != 0 && dict_utf8_check(value, &utf8_err) == 0) {
169 msg_warn("%s:%s: key \"%s\": non-UTF-8 value \"%s\": %s",
170 dict->type, dict->name, key, value, utf8_err);
171 dict->error = DICT_ERR_CONFIG;
172 return (0);
173 } else {
174 return (value);
175 }
176 }
177
178 /* dict_utf8_update - UTF-8 update method wrapper */
179
dict_utf8_update(DICT * dict,const char * key,const char * value)180 static int dict_utf8_update(DICT *dict, const char *key, const char *value)
181 {
182 DICT_UTF8_BACKUP *backup;
183 const char *utf8_err;
184 const char *fold_res;
185 int saved_flags;
186 int status;
187
188 /*
189 * Validate or fold the key, and if invalid skip the request.
190 */
191 if ((fold_res = dict_utf8_check_fold(dict, key, &utf8_err)) == 0) {
192 msg_warn("%s:%s: non-UTF-8 key \"%s\": %s",
193 dict->type, dict->name, key, utf8_err);
194 dict->error = DICT_ERR_NONE;
195 return (DICT_STAT_SUCCESS);
196 }
197
198 /*
199 * Validate the value, and if invalid skip the request.
200 */
201 else if (dict_utf8_check(value, &utf8_err) == 0) {
202 msg_warn("%s:%s: key \"%s\": non-UTF-8 value \"%s\": %s",
203 dict->type, dict->name, key, value, utf8_err);
204 dict->error = DICT_ERR_NONE;
205 return (DICT_STAT_SUCCESS);
206 }
207
208 /*
209 * Proxy the request with casefolding turned off.
210 */
211 else {
212 saved_flags = (dict->flags & DICT_FLAG_FOLD_ANY);
213 dict->flags &= ~DICT_FLAG_FOLD_ANY;
214 backup = dict->utf8_backup;
215 status = backup->update(dict, fold_res, value);
216 dict->flags |= saved_flags;
217 return (status);
218 }
219 }
220
221 /* dict_utf8_delete - UTF-8 delete method wrapper */
222
dict_utf8_delete(DICT * dict,const char * key)223 static int dict_utf8_delete(DICT *dict, const char *key)
224 {
225 DICT_UTF8_BACKUP *backup;
226 const char *utf8_err;
227 const char *fold_res;
228 int saved_flags;
229 int status;
230
231 /*
232 * Validate and optionally fold the key, and if invalid skip the request.
233 */
234 if ((fold_res = dict_utf8_check_fold(dict, key, &utf8_err)) == 0) {
235 msg_warn("%s:%s: non-UTF-8 key \"%s\": %s",
236 dict->type, dict->name, key, utf8_err);
237 dict->error = DICT_ERR_NONE;
238 return (DICT_STAT_SUCCESS);
239 }
240
241 /*
242 * Proxy the request with casefolding turned off.
243 */
244 else {
245 saved_flags = (dict->flags & DICT_FLAG_FOLD_ANY);
246 dict->flags &= ~DICT_FLAG_FOLD_ANY;
247 backup = dict->utf8_backup;
248 status = backup->delete(dict, fold_res);
249 dict->flags |= saved_flags;
250 return (status);
251 }
252 }
253
254 /* dict_utf8_activate - wrap a legacy dict object for UTF-8 processing */
255
dict_utf8_activate(DICT * dict)256 DICT *dict_utf8_activate(DICT *dict)
257 {
258 const char myname[] = "dict_utf8_activate";
259 DICT_UTF8_BACKUP *backup;
260
261 /*
262 * Sanity check.
263 */
264 if (util_utf8_enable == 0)
265 msg_panic("%s: Unicode support is not available", myname);
266 if ((dict->flags & DICT_FLAG_UTF8_REQUEST) == 0)
267 msg_panic("%s: %s:%s does not request Unicode support",
268 myname, dict->type, dict->name);
269 if ((dict->flags & DICT_FLAG_UTF8_ACTIVE) || dict->utf8_backup != 0)
270 msg_panic("%s: %s:%s Unicode support is already activated",
271 myname, dict->type, dict->name);
272
273 /*
274 * Unlike dict_debug(3) we do not put a proxy dict object in front of the
275 * encapsulated object, because then we would have to bidirectionally
276 * propagate changes in the data members (errors, flags, jbuf, and so on)
277 * between proxy object and encapsulated object.
278 *
279 * Instead we attach ourselves behind the encapsulated dict object, and
280 * redirect some function pointers to ourselves.
281 */
282 backup = dict->utf8_backup = (DICT_UTF8_BACKUP *) mymalloc(sizeof(*backup));
283
284 /*
285 * Interpose on the lookup/update/delete methods. It is a conscious
286 * decision not to tinker with the iterator or destructor.
287 */
288 backup->lookup = dict->lookup;
289 backup->update = dict->update;
290 backup->delete = dict->delete;
291
292 dict->lookup = dict_utf8_lookup;
293 dict->update = dict_utf8_update;
294 dict->delete = dict_utf8_delete;
295
296 /*
297 * Leave our mark. See sanity check above.
298 */
299 dict->flags |= DICT_FLAG_UTF8_ACTIVE;
300
301 return (dict);
302 }
303