xref: /netbsd-src/external/ibm-public/postfix/dist/src/util/midna_domain.c (revision 8b87d2a0a03ccefdf8b7f2b84267d70512de4059)
1 /*	$NetBSD: midna_domain.c,v 1.4 2020/05/25 23:47:14 christos Exp $	*/
2 
3 /*++
4 /* NAME
5 /*	midna_domain 3
6 /* SUMMARY
7 /*	ASCII/UTF-8 domain name conversion
8 /* SYNOPSIS
9 /*	#include <midna_domain.h>
10 /*
11 /*	int midna_domain_cache_size;
12 /*	int midna_domain_transitional;
13 /*
14 /*	const char *midna_domain_to_ascii(
15 /*	const char *name)
16 /*
17 /*	const char *midna_domain_to_utf8(
18 /*	const char *name)
19 /*
20 /*	const char *midna_domain_suffix_to_ascii(
21 /*	const char *name)
22 /*
23 /*	const char *midna_domain_suffix_to_utf8(
24 /*	const char *name)
25 /* AUXILIARY FUNCTIONS
26 /*	void midna_domain_pre_chroot(void)
27 /* DESCRIPTION
28 /*	The functions in this module transform domain names from/to
29 /*	ASCII and UTF-8 form. The result is cached to avoid repeated
30 /*	conversion.
31 /*
32 /*	This module builds on the ICU library implementation of the
33 /*	UTS #46 specification, using default ICU library options
34 /*	because those are likely best tested: with transitional
35 /*	processing, with case mapping, with normalization, with
36 /*	limited IDNA2003 compatibility, without STD3 ASCII rules.
37 /*
38 /*	midna_domain_to_ascii() converts an UTF-8 or ASCII domain
39 /*	name to ASCII.  The result is a null pointer in case of
40 /*	error.  This function verifies that the result passes
41 /*	valid_hostname().
42 /*
43 /*	midna_domain_to_utf8() converts an UTF-8 or ASCII domain
44 /*	name to UTF-8.  The result is a null pointer in case of
45 /*	error.  This function verifies that the result, after
46 /*	conversion to ASCII, passes valid_hostname().
47 /*
48 /*	midna_domain_suffix_to_ascii() and midna_domain_suffix_to_utf8()
49 /*	take a name that starts with '.' and otherwise perform the
50 /*	same operations as midna_domain_to_ascii() and
51 /*	midna_domain_to_utf8().
52 /*
53 /*	midna_domain_cache_size specifies the size of the conversion
54 /*	result cache.  This value is used only once, upon the first
55 /*	lookup request.
56 /*
57 /*	midna_domain_transitional enables transitional conversion
58 /*	between UTF8 and ASCII labels.
59 /*
60 /*	midna_domain_pre_chroot() does some pre-chroot initialization.
61 /* SEE ALSO
62 /*	http://unicode.org/reports/tr46/ Unicode IDNA Compatibility processing
63 /*	msg(3) diagnostics interface
64 /* DIAGNOSTICS
65 /*	Fatal errors: memory allocation problem.
66 /*	Warnings: conversion error or result validation error.
67 /* LICENSE
68 /* .ad
69 /* .fi
70 /*	The Secure Mailer license must be distributed with this software.
71 /* AUTHOR(S)
72 /*	Arnt Gulbrandsen
73 /*
74 /*	Wietse Venema
75 /*	IBM T.J. Watson Research
76 /*	P.O. Box 704
77 /*	Yorktown Heights, NY 10598, USA
78 /*
79 /*	Wietse Venema
80 /*	Google, Inc.
81 /*	111 8th Avenue
82 /*	New York, NY 10011, USA
83 /*--*/
84 
85  /*
86   * System library.
87   */
88 #include <sys_defs.h>
89 #include <string.h>
90 #include <ctype.h>
91 
92 #ifndef NO_EAI
93 #include <unicode/uidna.h>
94 
95  /*
96   * Utility library.
97   */
98 #include <mymalloc.h>
99 #include <msg.h>
100 #include <ctable.h>
101 #include <stringops.h>
102 #include <valid_hostname.h>
103 #include <name_mask.h>
104 #include <midna_domain.h>
105 
106  /*
107   * Application-specific.
108   */
109 #define DEF_MIDNA_CACHE_SIZE	256
110 
111 int     midna_domain_cache_size = DEF_MIDNA_CACHE_SIZE;
112 int     midna_domain_transitional = 0;
113 static VSTRING *midna_domain_buf;	/* x.suffix */
114 
115 #define STR(x)	vstring_str(x)
116 
117 /* midna_domain_strerror - pick one for error reporting */
118 
midna_domain_strerror(UErrorCode error,int info_errors)119 static const char *midna_domain_strerror(UErrorCode error, int info_errors)
120 {
121 
122     /*
123      * XXX The UIDNA_ERROR_EMPTY_LABEL etc. names are defined in an ENUM, so
124      * we can't use #ifdef to dynamically determine which names exist.
125      */
126     static LONG_NAME_MASK uidna_errors[] = {
127 	"UIDNA_ERROR_EMPTY_LABEL", UIDNA_ERROR_EMPTY_LABEL,
128 	"UIDNA_ERROR_LABEL_TOO_LONG", UIDNA_ERROR_LABEL_TOO_LONG,
129 	"UIDNA_ERROR_DOMAIN_NAME_TOO_LONG", UIDNA_ERROR_DOMAIN_NAME_TOO_LONG,
130 	"UIDNA_ERROR_LEADING_HYPHEN", UIDNA_ERROR_LEADING_HYPHEN,
131 	"UIDNA_ERROR_TRAILING_HYPHEN", UIDNA_ERROR_TRAILING_HYPHEN,
132 	"UIDNA_ERROR_HYPHEN_3_4", UIDNA_ERROR_HYPHEN_3_4,
133 	"UIDNA_ERROR_LEADING_COMBINING_MARK", UIDNA_ERROR_LEADING_COMBINING_MARK,
134 	"UIDNA_ERROR_DISALLOWED", UIDNA_ERROR_DISALLOWED,
135 	"UIDNA_ERROR_PUNYCODE", UIDNA_ERROR_PUNYCODE,
136 	"UIDNA_ERROR_LABEL_HAS_DOT", UIDNA_ERROR_LABEL_HAS_DOT,
137 	"UIDNA_ERROR_INVALID_ACE_LABEL", UIDNA_ERROR_INVALID_ACE_LABEL,
138 	"UIDNA_ERROR_BIDI", UIDNA_ERROR_BIDI,
139 	"UIDNA_ERROR_CONTEXTJ", UIDNA_ERROR_CONTEXTJ,
140 	/* The above errors are defined with ICU 46 and later. */
141 	0,
142     };
143 
144     if (info_errors) {
145 	return (str_long_name_mask_opt((VSTRING *) 0, "idna error",
146 				       uidna_errors, info_errors,
147 				       NAME_MASK_NUMBER | NAME_MASK_COMMA));
148     } else {
149 	return u_errorName(error);
150     }
151 }
152 
153 /* midna_domain_pre_chroot - pre-chroot initialization */
154 
midna_domain_pre_chroot(void)155 void    midna_domain_pre_chroot(void)
156 {
157     UErrorCode error = U_ZERO_ERROR;
158     UIDNAInfo info = UIDNA_INFO_INITIALIZER;
159     UIDNA  *idna;
160 
161     idna = uidna_openUTS46(midna_domain_transitional ? UIDNA_DEFAULT
162 			   : UIDNA_NONTRANSITIONAL_TO_ASCII, &error);
163     if (U_FAILURE(error))
164 	msg_warn("ICU library initialization failed: %s",
165 		 midna_domain_strerror(error, info.errors));
166     uidna_close(idna);
167 }
168 
169 /* midna_domain_to_ascii_create - convert domain to ASCII */
170 
midna_domain_to_ascii_create(const char * name,void * unused_context)171 static void *midna_domain_to_ascii_create(const char *name, void *unused_context)
172 {
173     static const char myname[] = "midna_domain_to_ascii_create";
174     char    buf[1024];			/* XXX */
175     UErrorCode error = U_ZERO_ERROR;
176     UIDNAInfo info = UIDNA_INFO_INITIALIZER;
177     UIDNA  *idna;
178     int     anl;
179 
180     /*
181      * Paranoia: do not expose uidna_*() to unfiltered network data.
182      */
183     if (allascii(name) == 0 && valid_utf8_string(name, strlen(name)) == 0) {
184 	msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s",
185 		 myname, name, "malformed UTF-8");
186 	return (0);
187     }
188 
189     /*
190      * Perform the requested conversion.
191      */
192     idna = uidna_openUTS46(midna_domain_transitional ? UIDNA_DEFAULT
193 			   : UIDNA_NONTRANSITIONAL_TO_ASCII, &error);
194     anl = uidna_nameToASCII_UTF8(idna,
195 				 name, strlen(name),
196 				 buf, sizeof(buf) - 1,
197 				 &info,
198 				 &error);
199     uidna_close(idna);
200 
201     /*
202      * Paranoia: verify that the result passes valid_hostname(). A quick
203      * check shows that UTS46 ToASCII by default rejects inputs with labels
204      * that start or end in '-', with names or labels that are over-long, or
205      * "fake" A-labels, as required by UTS 46 section 4.1, but we rely on
206      * valid_hostname() on the output side just to be sure.
207      */
208     if (U_SUCCESS(error) && info.errors == 0 && anl > 0) {
209 	buf[anl] = 0;				/* XXX */
210 	if (!valid_hostname(buf, DONT_GRIPE)) {
211 	    msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s",
212 		     myname, name, "malformed ASCII label(s)");
213 	    return (0);
214 	}
215 	return (mystrndup(buf, anl));
216     } else {
217 	msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s",
218 		 myname, name, midna_domain_strerror(error, info.errors));
219 	return (0);
220     }
221 }
222 
223 /* midna_domain_to_utf8_create - convert domain to UTF8 */
224 
midna_domain_to_utf8_create(const char * name,void * unused_context)225 static void *midna_domain_to_utf8_create(const char *name, void *unused_context)
226 {
227     static const char myname[] = "midna_domain_to_utf8_create";
228     char    buf[1024];			/* XXX */
229     UErrorCode error = U_ZERO_ERROR;
230     UIDNAInfo info = UIDNA_INFO_INITIALIZER;
231     UIDNA  *idna;
232     int     anl;
233 
234     /*
235      * Paranoia: do not expose uidna_*() to unfiltered network data.
236      */
237     if (allascii(name) == 0 && valid_utf8_string(name, strlen(name)) == 0) {
238 	msg_warn("%s: Problem translating domain \"%.100s\" to UTF-8 form: %s",
239 		 myname, name, "malformed UTF-8");
240 	return (0);
241     }
242 
243     /*
244      * Perform the requested conversion.
245      */
246     idna = uidna_openUTS46(midna_domain_transitional ? UIDNA_DEFAULT
247 			   : UIDNA_NONTRANSITIONAL_TO_UNICODE, &error);
248     anl = uidna_nameToUnicodeUTF8(idna,
249 				  name, strlen(name),
250 				  buf, sizeof(buf) - 1,
251 				  &info,
252 				  &error);
253     uidna_close(idna);
254 
255     /*
256      * Paranoia: UTS46 toUTF8 by default accepts and produces an over-long
257      * name or a name that contains an over-long NR-LDH label (and perhaps
258      * other invalid forms that are not covered in UTS 46, section 4.1). We
259      * rely on midna_domain_to_ascii() to validate the output.
260      */
261     if (U_SUCCESS(error) && info.errors == 0 && anl > 0) {
262 	buf[anl] = 0;				/* XXX */
263 	if (midna_domain_to_ascii(buf) == 0)
264 	    return (0);
265 	return (mystrndup(buf, anl));
266     } else {
267 	msg_warn("%s: Problem translating domain \"%.100s\" to UTF8 form: %s",
268 		 myname, name, midna_domain_strerror(error, info.errors));
269 	return (0);
270     }
271 }
272 
273 /* midna_domain_cache_free - cache element destructor */
274 
midna_domain_cache_free(void * value,void * unused_context)275 static void midna_domain_cache_free(void *value, void *unused_context)
276 {
277     if (value)
278 	myfree(value);
279 }
280 
281 /* midna_domain_to_ascii - convert name to ASCII */
282 
midna_domain_to_ascii(const char * name)283 const char *midna_domain_to_ascii(const char *name)
284 {
285     static CTABLE *midna_domain_to_ascii_cache = 0;
286 
287     if (midna_domain_to_ascii_cache == 0)
288 	midna_domain_to_ascii_cache = ctable_create(midna_domain_cache_size,
289 					       midna_domain_to_ascii_create,
290 						    midna_domain_cache_free,
291 						    (void *) 0);
292     return (ctable_locate(midna_domain_to_ascii_cache, name));
293 }
294 
295 /* midna_domain_to_utf8 - convert name to UTF8 */
296 
midna_domain_to_utf8(const char * name)297 const char *midna_domain_to_utf8(const char *name)
298 {
299     static CTABLE *midna_domain_to_utf8_cache = 0;
300 
301     if (midna_domain_to_utf8_cache == 0)
302 	midna_domain_to_utf8_cache = ctable_create(midna_domain_cache_size,
303 						midna_domain_to_utf8_create,
304 						   midna_domain_cache_free,
305 						   (void *) 0);
306     return (ctable_locate(midna_domain_to_utf8_cache, name));
307 }
308 
309 /* midna_domain_suffix_to_ascii - convert .name to ASCII */
310 
midna_domain_suffix_to_ascii(const char * suffix)311 const char *midna_domain_suffix_to_ascii(const char *suffix)
312 {
313     const char *cache_res;
314 
315     /*
316      * If prepending x to .name causes the result to become too long, then
317      * the suffix is bad.
318      */
319     if (midna_domain_buf == 0)
320 	midna_domain_buf = vstring_alloc(100);
321     vstring_sprintf(midna_domain_buf, "x%s", suffix);
322     if ((cache_res = midna_domain_to_ascii(STR(midna_domain_buf))) == 0)
323 	return (0);
324     else
325 	return (cache_res + 1);
326 }
327 
328 /* midna_domain_suffix_to_utf8 - convert .name to UTF8 */
329 
midna_domain_suffix_to_utf8(const char * name)330 const char *midna_domain_suffix_to_utf8(const char *name)
331 {
332     const char *cache_res;
333 
334     /*
335      * If prepending x to .name causes the result to become too long, then
336      * the suffix is bad.
337      */
338     if (midna_domain_buf == 0)
339 	midna_domain_buf = vstring_alloc(100);
340     vstring_sprintf(midna_domain_buf, "x%s", name);
341     if ((cache_res = midna_domain_to_utf8(STR(midna_domain_buf))) == 0)
342 	return (0);
343     else
344 	return (cache_res + 1);
345 }
346 
347 #ifdef TEST
348 
349  /*
350   * Test program - reads names from stdin, reports invalid names to stderr.
351   */
352 #include <unistd.h>
353 #include <stdlib.h>
354 #include <locale.h>
355 
356 #include <stringops.h>			/* XXX util_utf8_enable */
357 #include <vstring.h>
358 #include <vstream.h>
359 #include <vstring_vstream.h>
360 #include <msg_vstream.h>
361 
main(int argc,char ** argv)362 int     main(int argc, char **argv)
363 {
364     VSTRING *buffer = vstring_alloc(1);
365     const char *bp;
366     const char *ascii;
367     const char *utf8;
368 
369     if (setlocale(LC_ALL, "C") == 0)
370 	msg_fatal("setlocale(LC_ALL, C) failed: %m");
371 
372     msg_vstream_init(argv[0], VSTREAM_ERR);
373     /* msg_verbose = 1; */
374     util_utf8_enable = 1;
375 
376     if (geteuid() == 0) {
377 	midna_domain_pre_chroot();
378 	if (chroot(".") != 0)
379 	    msg_fatal("chroot(\".\"): %m");
380     }
381     while (vstring_fgets_nonl(buffer, VSTREAM_IN)) {
382 	bp = STR(buffer);
383 	msg_info("> %s", bp);
384 	while (ISSPACE(*bp))
385 	    bp++;
386 	if (*bp == '#' || *bp == 0)
387 	    continue;
388 	msg_info("unconditional conversions:");
389 	utf8 = midna_domain_to_utf8(bp);
390 	msg_info("\"%s\" ->utf8 \"%s\"", bp, utf8 ? utf8 : "(error)");
391 	ascii = midna_domain_to_ascii(bp);
392 	msg_info("\"%s\" ->ascii \"%s\"", bp, ascii ? ascii : "(error)");
393 	msg_info("conditional conversions:");
394 	if (!allascii(bp)) {
395 	    if (ascii != 0) {
396 		utf8 = midna_domain_to_utf8(ascii);
397 		msg_info("\"%s\" ->ascii \"%s\" ->utf8 \"%s\"",
398 			 bp, ascii, utf8 ? utf8 : "(error)");
399 		if (utf8 != 0) {
400 		    if (strcmp(utf8, bp) != 0)
401 			msg_warn("\"%s\" != \"%s\"", bp, utf8);
402 		}
403 	    }
404 	} else {
405 	    if (utf8 != 0) {
406 		ascii = midna_domain_to_ascii(utf8);
407 		msg_info("\"%s\" ->utf8 \"%s\" ->ascii \"%s\"",
408 			 bp, utf8, ascii ? ascii : "(error)");
409 		if (ascii != 0) {
410 		    if (strcmp(ascii, bp) != 0)
411 			msg_warn("\"%s\" != \"%s\"", bp, ascii);
412 		}
413 	    }
414 	}
415     }
416     exit(0);
417 }
418 
419 #endif					/* TEST */
420 
421 #endif					/* NO_EAI */
422