xref: /netbsd-src/external/ibm-public/postfix/dist/src/util/dict_utf8.c (revision 33881f779a77dce6440bdc44610d94de75bebefe)
1 /*	$NetBSD: dict_utf8.c,v 1.3 2020/03/18 19:05:21 christos Exp $	*/
2 
3 /*++
4 /* NAME
5 /*	dict_utf8 3
6 /* SUMMARY
7 /*	dictionary UTF-8 helpers
8 /* SYNOPSIS
9 /*	#include <dict.h>
10 /*
11 /*	DICT	*dict_utf8_activate(
12 /*	DICT	*dict)
13 /* DESCRIPTION
14 /*	dict_utf8_activate() wraps a dictionary's lookup/update/delete
15 /*	methods with code that enforces UTF-8 checks on keys and
16 /*	values, and that logs a warning when incorrect UTF-8 is
17 /*	encountered. The original dictionary handle becomes invalid.
18 /*
19 /*	The wrapper code enforces a policy that maximizes application
20 /*	robustness (it avoids the need for new error-handling code
21 /*	paths in application code).  Attempts to store non-UTF-8
22 /*	keys or values are skipped while reporting a non-error
23 /*	status, attempts to look up or delete non-UTF-8 keys are
24 /*	skipped while reporting a non-error status, and lookup
25 /*	results that contain a non-UTF-8 value are blocked while
26 /*	reporting a configuration error.
27 /* BUGS
28 /*	dict_utf8_activate() does not nest.
29 /* LICENSE
30 /* .ad
31 /* .fi
32 /*	The Secure Mailer license must be distributed with this software.
33 /* AUTHOR(S)
34 /*	Wietse Venema
35 /*	IBM T.J. Watson Research
36 /*	P.O. Box 704
37 /*	Yorktown Heights, NY 10598, USA
38 /*
39 /*	Wietse Venema
40 /*	Google, Inc.
41 /*	111 8th Avenue
42 /*	New York, NY 10011, USA
43 /*--*/
44 
45  /*
46   * System library.
47   */
48 #include <sys_defs.h>
49 #include <string.h>
50 
51  /*
52   * Utility library.
53   */
54 #include <msg.h>
55 #include <stringops.h>
56 #include <dict.h>
57 #include <mymalloc.h>
58 #include <msg.h>
59 
60  /*
61   * The goal is to maximize robustness: bad UTF-8 should not appear in keys,
62   * because those are derived from controlled inputs, and values should be
63   * printable before they are stored. But if we failed to check something
64   * then it should not result in fatal errors and thus open up the system for
65   * a denial-of-service attack.
66   *
67   * Proposed over-all policy: skip attempts to store invalid UTF-8 lookup keys
68   * or values. Rationale: some storage may not permit malformed UTF-8. This
69   * maximizes program robustness. If we get an invalid lookup result, report
70   * a configuration error.
71   *
72   * LOOKUP
73   *
74   * If the key is invalid, log a warning and skip the request. Rationale: the
75   * item cannot exist.
76   *
77   * If the lookup result is invalid, log a warning and return a configuration
78   * error.
79   *
80   * UPDATE
81   *
82   * If the key is invalid, then log a warning and skip the request. Rationale:
83   * the item cannot exist.
84   *
85   * If the value is invalid, log a warning and skip the request. Rationale:
86   * storage may not permit malformed UTF-8. This maximizes program
87   * robustness.
88   *
89   * DELETE
90   *
91   * If the key is invalid, then skip the request. Rationale: the item cannot
92   * exist.
93   */
94 
95 /* dict_utf8_check_fold - casefold or validate string */
96 
dict_utf8_check_fold(DICT * dict,const char * string,CONST_CHAR_STAR * err)97 static char *dict_utf8_check_fold(DICT *dict, const char *string,
98 				          CONST_CHAR_STAR *err)
99 {
100     int     fold_flag = (dict->flags & DICT_FLAG_FOLD_ANY);
101 
102     /*
103      * Validate UTF-8 without casefolding.
104      */
105     if (!allascii(string) && valid_utf8_string(string, strlen(string)) == 0) {
106 	if (err)
107 	    *err = "malformed UTF-8 or invalid codepoint";
108 	return (0);
109     }
110 
111     /*
112      * Casefold UTF-8.
113      */
114     if (fold_flag != 0
115 	&& (fold_flag & ((dict->flags & DICT_FLAG_FIXED) ?
116 			 DICT_FLAG_FOLD_FIX : DICT_FLAG_FOLD_MUL))) {
117 	if (dict->fold_buf == 0)
118 	    dict->fold_buf = vstring_alloc(10);
119 	return (casefold(dict->fold_buf, string));
120     }
121     return ((char *) string);
122 }
123 
124 /* dict_utf8_check validate UTF-8 string */
125 
dict_utf8_check(const char * string,CONST_CHAR_STAR * err)126 static int dict_utf8_check(const char *string, CONST_CHAR_STAR *err)
127 {
128     if (!allascii(string) && valid_utf8_string(string, strlen(string)) == 0) {
129 	if (err)
130 	    *err = "malformed UTF-8 or invalid codepoint";
131 	return (0);
132     }
133     return (1);
134 }
135 
136 /* dict_utf8_lookup - UTF-8 lookup method wrapper */
137 
dict_utf8_lookup(DICT * dict,const char * key)138 static const char *dict_utf8_lookup(DICT *dict, const char *key)
139 {
140     DICT_UTF8_BACKUP *backup;
141     const char *utf8_err;
142     const char *fold_res;
143     const char *value;
144     int     saved_flags;
145 
146     /*
147      * Validate and optionally fold the key, and if invalid skip the request.
148      */
149     if ((fold_res = dict_utf8_check_fold(dict, key, &utf8_err)) == 0) {
150 	msg_warn("%s:%s: non-UTF-8 key \"%s\": %s",
151 		 dict->type, dict->name, key, utf8_err);
152 	dict->error = DICT_ERR_NONE;
153 	return (0);
154     }
155 
156     /*
157      * Proxy the request with casefolding turned off.
158      */
159     saved_flags = (dict->flags & DICT_FLAG_FOLD_ANY);
160     dict->flags &= ~DICT_FLAG_FOLD_ANY;
161     backup = dict->utf8_backup;
162     value = backup->lookup(dict, fold_res);
163     dict->flags |= saved_flags;
164 
165     /*
166      * Validate the result, and if invalid fail the request.
167      */
168     if (value != 0 && dict_utf8_check(value, &utf8_err) == 0) {
169 	msg_warn("%s:%s: key \"%s\": non-UTF-8 value \"%s\": %s",
170 		 dict->type, dict->name, key, value, utf8_err);
171 	dict->error = DICT_ERR_CONFIG;
172 	return (0);
173     } else {
174 	return (value);
175     }
176 }
177 
178 /* dict_utf8_update - UTF-8 update method wrapper */
179 
dict_utf8_update(DICT * dict,const char * key,const char * value)180 static int dict_utf8_update(DICT *dict, const char *key, const char *value)
181 {
182     DICT_UTF8_BACKUP *backup;
183     const char *utf8_err;
184     const char *fold_res;
185     int     saved_flags;
186     int     status;
187 
188     /*
189      * Validate or fold the key, and if invalid skip the request.
190      */
191     if ((fold_res = dict_utf8_check_fold(dict, key, &utf8_err)) == 0) {
192 	msg_warn("%s:%s: non-UTF-8 key \"%s\": %s",
193 		 dict->type, dict->name, key, utf8_err);
194 	dict->error = DICT_ERR_NONE;
195 	return (DICT_STAT_SUCCESS);
196     }
197 
198     /*
199      * Validate the value, and if invalid skip the request.
200      */
201     else if (dict_utf8_check(value, &utf8_err) == 0) {
202 	msg_warn("%s:%s: key \"%s\": non-UTF-8 value \"%s\": %s",
203 		 dict->type, dict->name, key, value, utf8_err);
204 	dict->error = DICT_ERR_NONE;
205 	return (DICT_STAT_SUCCESS);
206     }
207 
208     /*
209      * Proxy the request with casefolding turned off.
210      */
211     else {
212 	saved_flags = (dict->flags & DICT_FLAG_FOLD_ANY);
213 	dict->flags &= ~DICT_FLAG_FOLD_ANY;
214 	backup = dict->utf8_backup;
215 	status = backup->update(dict, fold_res, value);
216 	dict->flags |= saved_flags;
217 	return (status);
218     }
219 }
220 
221 /* dict_utf8_delete - UTF-8 delete method wrapper */
222 
dict_utf8_delete(DICT * dict,const char * key)223 static int dict_utf8_delete(DICT *dict, const char *key)
224 {
225     DICT_UTF8_BACKUP *backup;
226     const char *utf8_err;
227     const char *fold_res;
228     int     saved_flags;
229     int     status;
230 
231     /*
232      * Validate and optionally fold the key, and if invalid skip the request.
233      */
234     if ((fold_res = dict_utf8_check_fold(dict, key, &utf8_err)) == 0) {
235 	msg_warn("%s:%s: non-UTF-8 key \"%s\": %s",
236 		 dict->type, dict->name, key, utf8_err);
237 	dict->error = DICT_ERR_NONE;
238 	return (DICT_STAT_SUCCESS);
239     }
240 
241     /*
242      * Proxy the request with casefolding turned off.
243      */
244     else {
245 	saved_flags = (dict->flags & DICT_FLAG_FOLD_ANY);
246 	dict->flags &= ~DICT_FLAG_FOLD_ANY;
247 	backup = dict->utf8_backup;
248 	status = backup->delete(dict, fold_res);
249 	dict->flags |= saved_flags;
250 	return (status);
251     }
252 }
253 
254 /* dict_utf8_activate - wrap a legacy dict object for UTF-8 processing */
255 
dict_utf8_activate(DICT * dict)256 DICT   *dict_utf8_activate(DICT *dict)
257 {
258     const char myname[] = "dict_utf8_activate";
259     DICT_UTF8_BACKUP *backup;
260 
261     /*
262      * Sanity check.
263      */
264     if (util_utf8_enable == 0)
265 	msg_panic("%s: Unicode support is not available", myname);
266     if ((dict->flags & DICT_FLAG_UTF8_REQUEST) == 0)
267 	msg_panic("%s: %s:%s does not request Unicode support",
268 		  myname, dict->type, dict->name);
269     if ((dict->flags & DICT_FLAG_UTF8_ACTIVE) || dict->utf8_backup != 0)
270 	msg_panic("%s: %s:%s Unicode support is already activated",
271 		  myname, dict->type, dict->name);
272 
273     /*
274      * Unlike dict_debug(3) we do not put a proxy dict object in front of the
275      * encapsulated object, because then we would have to bidirectionally
276      * propagate changes in the data members (errors, flags, jbuf, and so on)
277      * between proxy object and encapsulated object.
278      *
279      * Instead we attach ourselves behind the encapsulated dict object, and
280      * redirect some function pointers to ourselves.
281      */
282     backup = dict->utf8_backup = (DICT_UTF8_BACKUP *) mymalloc(sizeof(*backup));
283 
284     /*
285      * Interpose on the lookup/update/delete methods. It is a conscious
286      * decision not to tinker with the iterator or destructor.
287      */
288     backup->lookup = dict->lookup;
289     backup->update = dict->update;
290     backup->delete = dict->delete;
291 
292     dict->lookup = dict_utf8_lookup;
293     dict->update = dict_utf8_update;
294     dict->delete = dict_utf8_delete;
295 
296     /*
297      * Leave our mark. See sanity check above.
298      */
299     dict->flags |= DICT_FLAG_UTF8_ACTIVE;
300 
301     return (dict);
302 }
303