xref: /netbsd-src/external/ibm-public/postfix/dist/src/util/midna_domain.c (revision 16dce51364ebe8aeafbae46bc5aa167b8115bc45)
1 /*	$NetBSD: midna_domain.c,v 1.2 2017/02/14 01:16:49 christos Exp $	*/
2 
3 /*++
4 /* NAME
5 /*	midna_domain 3
6 /* SUMMARY
7 /*	ASCII/UTF-8 domain name conversion
8 /* SYNOPSIS
9 /*	#include <midna_domain.h>
10 /*
11 /*	int midna_domain_cache_size;
12 /*
13 /*	const char *midna_domain_to_ascii(
14 /*	const char *name)
15 /*
16 /*	const char *midna_domain_to_utf8(
17 /*	const char *name)
18 /*
19 /*	const char *midna_domain_suffix_to_ascii(
20 /*	const char *name)
21 /*
22 /*	const char *midna_domain_suffix_to_utf8(
23 /*	const char *name)
24 /* DESCRIPTION
25 /*	The functions in this module transform domain names from/to
26 /*	ASCII and UTF-8 form. The result is cached to avoid repeated
27 /*	conversion.
28 /*
29 /*	This module builds on the ICU library implementation of the
30 /*	UTS #46 specification, using default ICU library options
31 /*	because those are likely best tested: with transitional
32 /*	processing, with case mapping, with normalization, with
33 /*	limited IDNA2003 compatibility, without STD3 ASCII rules.
34 /*
35 /*	midna_domain_to_ascii() converts an UTF-8 or ASCII domain
36 /*	name to ASCII.  The result is a null pointer in case of
37 /*	error.  This function verifies that the result passes
38 /*	valid_hostname().
39 /*
40 /*	midna_domain_to_utf8() converts an UTF-8 or ASCII domain
41 /*	name to UTF-8.  The result is a null pointer in case of
42 /*	error.  This function verifies that the result, after
43 /*	conversion to ASCII, passes valid_hostname().
44 /*
45 /*	midna_domain_suffix_to_ascii() and midna_domain_suffix_to_utf8()
46 /*	take a name that starts with '.' and otherwise perform the
47 /*	same operations as midna_domain_to_ascii() and
48 /*	midna_domain_to_utf8().
49 /*
50 /*	midna_domain_cache_size specifies the size of the conversion
51 /*	result cache.  This value is used only once, upon the first
52 /*	lookup
53 /*	request.
54 /* SEE ALSO
55 /*	http://unicode.org/reports/tr46/ Unicode IDNA Compatibility processing
56 /*	msg(3) diagnostics interface
57 /* DIAGNOSTICS
58 /*	Fatal errors: memory allocation problem.
59 /*	Warnings: conversion error or result validation error.
60 /* LICENSE
61 /* .ad
62 /* .fi
63 /*	The Secure Mailer license must be distributed with this software.
64 /* AUTHOR(S)
65 /*	Arnt Gulbrandsen
66 /*
67 /*	Wietse Venema
68 /*	IBM T.J. Watson Research
69 /*	P.O. Box 704
70 /*	Yorktown Heights, NY 10598, USA
71 /*--*/
72 
73  /*
74   * System library.
75   */
76 #include <sys_defs.h>
77 #include <string.h>
78 #include <ctype.h>
79 
80 #ifndef NO_EAI
81 #include <unicode/uidna.h>
82 
83  /*
84   * Utility library.
85   */
86 #include <mymalloc.h>
87 #include <msg.h>
88 #include <ctable.h>
89 #include <stringops.h>
90 #include <valid_hostname.h>
91 #include <midna_domain.h>
92 
93  /*
94   * Application-specific.
95   */
96 #define DEF_MIDNA_CACHE_SIZE	256
97 
98 int     midna_domain_cache_size = DEF_MIDNA_CACHE_SIZE;
99 static VSTRING *midna_domain_buf;	/* x.suffix */
100 
101 #define STR(x)	vstring_str(x)
102 
103 /* midna_domain_to_ascii_create - convert domain to ASCII */
104 
105 static void *midna_domain_to_ascii_create(const char *name, void *unused_context)
106 {
107     static const char myname[] = "midna_domain_to_ascii_create";
108     char    buf[1024];			/* XXX */
109     UErrorCode error = U_ZERO_ERROR;
110     UIDNAInfo info = UIDNA_INFO_INITIALIZER;
111     UIDNA  *idna;
112     int     anl;
113 
114     /*
115      * Paranoia: do not expose uidna_*() to unfiltered network data.
116      */
117     if (allascii(name) == 0 && valid_utf8_string(name, strlen(name)) == 0) {
118 	msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s",
119 		 myname, name, "malformed UTF-8");
120 	return (0);
121     }
122 
123     /*
124      * Perform the requested conversion.
125      */
126     idna = uidna_openUTS46(UIDNA_DEFAULT, &error);/* XXX check error */
127     anl = uidna_nameToASCII_UTF8(idna,
128 				 name, strlen(name),
129 				 buf, sizeof(buf) - 1,
130 				 &info,
131 				 &error);
132     uidna_close(idna);
133 
134     /*
135      * Paranoia: verify that the result passes valid_hostname(). A quick
136      * check shows that UTS46 ToASCII by default rejects inputs with labels
137      * that start or end in '-', with names or labels that are over-long, or
138      * "fake" A-labels, as required by UTS 46 section 4.1, but we rely on
139      * valid_hostname() on the output side just to be sure.
140      */
141     if (U_SUCCESS(error) && info.errors == 0 && anl > 0) {
142 	buf[anl] = 0;				/* XXX */
143 	if (!valid_hostname(buf, DONT_GRIPE)) {
144 	    msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s",
145 		     myname, name, "malformed ASCII label(s)");
146 	    return (0);
147 	}
148 	return (mystrndup(buf, anl));
149     } else {
150 	msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s",
151 		 myname, name, u_errorName(info.errors));
152 	return (0);
153     }
154 }
155 
156 /* midna_domain_to_utf8_create - convert domain to UTF8 */
157 
158 static void *midna_domain_to_utf8_create(const char *name, void *unused_context)
159 {
160     static const char myname[] = "midna_domain_to_utf8_create";
161     char    buf[1024];			/* XXX */
162     UErrorCode error = U_ZERO_ERROR;
163     UIDNAInfo info = UIDNA_INFO_INITIALIZER;
164     UIDNA  *idna;
165     int     anl;
166 
167     /*
168      * Paranoia: do not expose uidna_*() to unfiltered network data.
169      */
170     if (allascii(name) == 0 && valid_utf8_string(name, strlen(name)) == 0) {
171 	msg_warn("%s: Problem translating domain \"%.100s\" to UTF-8 form: %s",
172 		 myname, name, "malformed UTF-8");
173 	return (0);
174     }
175 
176     /*
177      * Perform the requested conversion.
178      */
179     idna = uidna_openUTS46(UIDNA_DEFAULT, &error);/* XXX check error */
180     anl = uidna_nameToUnicodeUTF8(idna,
181 				  name, strlen(name),
182 				  buf, sizeof(buf) - 1,
183 				  &info,
184 				  &error);
185     uidna_close(idna);
186 
187     /*
188      * Paranoia: UTS46 toUTF8 by default accepts and produces an over-long
189      * name or a name that contains an over-long NR-LDH label (and perhaps
190      * other invalid forms that are not covered in UTS 46, section 4.1). We
191      * rely on midna_domain_to_ascii() to validate the output.
192      */
193     if (U_SUCCESS(error) && info.errors == 0 && anl > 0) {
194 	buf[anl] = 0;				/* XXX */
195 	if (midna_domain_to_ascii(buf) == 0)
196 	    return (0);
197 	return (mystrndup(buf, anl));
198     } else {
199 	msg_warn("%s: Problem translating domain \"%.100s\" to UTF8 form: %s",
200 		 myname, name, u_errorName(info.errors));
201 	return (0);
202     }
203 }
204 
205 /* midna_domain_cache_free - cache element destructor */
206 
207 static void midna_domain_cache_free(void *value, void *unused_context)
208 {
209     if (value)
210 	myfree(value);
211 }
212 
213 /* midna_domain_to_ascii - convert name to ASCII */
214 
215 const char *midna_domain_to_ascii(const char *name)
216 {
217     static CTABLE *midna_domain_to_ascii_cache = 0;
218 
219     if (midna_domain_to_ascii_cache == 0)
220 	midna_domain_to_ascii_cache = ctable_create(midna_domain_cache_size,
221 					       midna_domain_to_ascii_create,
222 						    midna_domain_cache_free,
223 						    (void *) 0);
224     return (ctable_locate(midna_domain_to_ascii_cache, name));
225 }
226 
227 /* midna_domain_to_utf8 - convert name to UTF8 */
228 
229 const char *midna_domain_to_utf8(const char *name)
230 {
231     static CTABLE *midna_domain_to_utf8_cache = 0;
232 
233     if (midna_domain_to_utf8_cache == 0)
234 	midna_domain_to_utf8_cache = ctable_create(midna_domain_cache_size,
235 						midna_domain_to_utf8_create,
236 						   midna_domain_cache_free,
237 						   (void *) 0);
238     return (ctable_locate(midna_domain_to_utf8_cache, name));
239 }
240 
241 /* midna_domain_suffix_to_ascii - convert .name to ASCII */
242 
243 const char *midna_domain_suffix_to_ascii(const char *suffix)
244 {
245     const char *cache_res;
246 
247     /*
248      * If prepending x to .name causes the result to become too long, then
249      * the suffix is bad.
250      */
251     if (midna_domain_buf == 0)
252 	midna_domain_buf = vstring_alloc(100);
253     vstring_sprintf(midna_domain_buf, "x%s", suffix);
254     if ((cache_res = midna_domain_to_ascii(STR(midna_domain_buf))) == 0)
255 	return (0);
256     else
257 	return (cache_res + 1);
258 }
259 
260 /* midna_domain_suffix_to_utf8 - convert .name to UTF8 */
261 
262 const char *midna_domain_suffix_to_utf8(const char *name)
263 {
264     const char *cache_res;
265 
266     /*
267      * If prepending x to .name causes the result to become too long, then
268      * the suffix is bad.
269      */
270     if (midna_domain_buf == 0)
271 	midna_domain_buf = vstring_alloc(100);
272     vstring_sprintf(midna_domain_buf, "x%s", name);
273     if ((cache_res = midna_domain_to_utf8(STR(midna_domain_buf))) == 0)
274 	return (0);
275     else
276 	return (cache_res + 1);
277 }
278 
279 #ifdef TEST
280 
281  /*
282   * Test program - reads names from stdin, reports invalid names to stderr.
283   */
284 #include <stdlib.h>
285 #include <locale.h>
286 
287 #include <stringops.h>			/* XXX util_utf8_enable */
288 #include <vstring.h>
289 #include <vstream.h>
290 #include <vstring_vstream.h>
291 #include <msg_vstream.h>
292 
293 int     main(int argc, char **argv)
294 {
295     VSTRING *buffer = vstring_alloc(1);
296     const char *bp;
297     const char *ascii;
298     const char *utf8;
299 
300     if (setlocale(LC_ALL, "C") == 0)
301 	msg_fatal("setlocale(LC_ALL, C) failed: %m");
302 
303     msg_vstream_init(argv[0], VSTREAM_ERR);
304     /* msg_verbose = 1; */
305     util_utf8_enable = 1;
306 
307     while (vstring_fgets_nonl(buffer, VSTREAM_IN)) {
308 	bp = STR(buffer);
309 	msg_info("> %s", bp);
310 	while (ISSPACE(*bp))
311 	    bp++;
312 	if (*bp == '#' || *bp == 0)
313 	    continue;
314 	msg_info("unconditional conversions:");
315 	utf8 = midna_domain_to_utf8(bp);
316 	msg_info("\"%s\" ->utf8 \"%s\"", bp, utf8 ? utf8 : "(error)");
317 	ascii = midna_domain_to_ascii(bp);
318 	msg_info("\"%s\" ->ascii \"%s\"", bp, ascii ? ascii : "(error)");
319 	msg_info("conditional conversions:");
320 	if (!allascii(bp)) {
321 	    if (ascii != 0) {
322 		utf8 = midna_domain_to_utf8(ascii);
323 		msg_info("\"%s\" ->ascii \"%s\" ->utf8 \"%s\"",
324 			 bp, ascii, utf8 ? utf8 : "(error)");
325 		if (utf8 != 0) {
326 		    if (strcmp(utf8, bp) != 0)
327 			msg_warn("\"%s\" != \"%s\"", bp, utf8);
328 		}
329 	    }
330 	} else {
331 	    if (utf8 != 0) {
332 		ascii = midna_domain_to_ascii(utf8);
333 		msg_info("\"%s\" ->utf8 \"%s\" ->ascii \"%s\"",
334 			 bp, utf8, ascii ? ascii : "(error)");
335 		if (ascii != 0) {
336 		    if (strcmp(ascii, bp) != 0)
337 			msg_warn("\"%s\" != \"%s\"", bp, ascii);
338 		}
339 	    }
340 	}
341     }
342     exit(0);
343 }
344 
345 #endif					/* TEST */
346 
347 #endif					/* NO_EAI */
348