1 /* $NetBSD: midna_domain.c,v 1.2 2017/02/14 01:16:49 christos Exp $ */ 2 3 /*++ 4 /* NAME 5 /* midna_domain 3 6 /* SUMMARY 7 /* ASCII/UTF-8 domain name conversion 8 /* SYNOPSIS 9 /* #include <midna_domain.h> 10 /* 11 /* int midna_domain_cache_size; 12 /* 13 /* const char *midna_domain_to_ascii( 14 /* const char *name) 15 /* 16 /* const char *midna_domain_to_utf8( 17 /* const char *name) 18 /* 19 /* const char *midna_domain_suffix_to_ascii( 20 /* const char *name) 21 /* 22 /* const char *midna_domain_suffix_to_utf8( 23 /* const char *name) 24 /* DESCRIPTION 25 /* The functions in this module transform domain names from/to 26 /* ASCII and UTF-8 form. The result is cached to avoid repeated 27 /* conversion. 28 /* 29 /* This module builds on the ICU library implementation of the 30 /* UTS #46 specification, using default ICU library options 31 /* because those are likely best tested: with transitional 32 /* processing, with case mapping, with normalization, with 33 /* limited IDNA2003 compatibility, without STD3 ASCII rules. 34 /* 35 /* midna_domain_to_ascii() converts an UTF-8 or ASCII domain 36 /* name to ASCII. The result is a null pointer in case of 37 /* error. This function verifies that the result passes 38 /* valid_hostname(). 39 /* 40 /* midna_domain_to_utf8() converts an UTF-8 or ASCII domain 41 /* name to UTF-8. The result is a null pointer in case of 42 /* error. This function verifies that the result, after 43 /* conversion to ASCII, passes valid_hostname(). 44 /* 45 /* midna_domain_suffix_to_ascii() and midna_domain_suffix_to_utf8() 46 /* take a name that starts with '.' and otherwise perform the 47 /* same operations as midna_domain_to_ascii() and 48 /* midna_domain_to_utf8(). 49 /* 50 /* midna_domain_cache_size specifies the size of the conversion 51 /* result cache. This value is used only once, upon the first 52 /* lookup 53 /* request. 54 /* SEE ALSO 55 /* http://unicode.org/reports/tr46/ Unicode IDNA Compatibility processing 56 /* msg(3) diagnostics interface 57 /* DIAGNOSTICS 58 /* Fatal errors: memory allocation problem. 59 /* Warnings: conversion error or result validation error. 60 /* LICENSE 61 /* .ad 62 /* .fi 63 /* The Secure Mailer license must be distributed with this software. 64 /* AUTHOR(S) 65 /* Arnt Gulbrandsen 66 /* 67 /* Wietse Venema 68 /* IBM T.J. Watson Research 69 /* P.O. Box 704 70 /* Yorktown Heights, NY 10598, USA 71 /*--*/ 72 73 /* 74 * System library. 75 */ 76 #include <sys_defs.h> 77 #include <string.h> 78 #include <ctype.h> 79 80 #ifndef NO_EAI 81 #include <unicode/uidna.h> 82 83 /* 84 * Utility library. 85 */ 86 #include <mymalloc.h> 87 #include <msg.h> 88 #include <ctable.h> 89 #include <stringops.h> 90 #include <valid_hostname.h> 91 #include <midna_domain.h> 92 93 /* 94 * Application-specific. 95 */ 96 #define DEF_MIDNA_CACHE_SIZE 256 97 98 int midna_domain_cache_size = DEF_MIDNA_CACHE_SIZE; 99 static VSTRING *midna_domain_buf; /* x.suffix */ 100 101 #define STR(x) vstring_str(x) 102 103 /* midna_domain_to_ascii_create - convert domain to ASCII */ 104 105 static void *midna_domain_to_ascii_create(const char *name, void *unused_context) 106 { 107 static const char myname[] = "midna_domain_to_ascii_create"; 108 char buf[1024]; /* XXX */ 109 UErrorCode error = U_ZERO_ERROR; 110 UIDNAInfo info = UIDNA_INFO_INITIALIZER; 111 UIDNA *idna; 112 int anl; 113 114 /* 115 * Paranoia: do not expose uidna_*() to unfiltered network data. 116 */ 117 if (allascii(name) == 0 && valid_utf8_string(name, strlen(name)) == 0) { 118 msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s", 119 myname, name, "malformed UTF-8"); 120 return (0); 121 } 122 123 /* 124 * Perform the requested conversion. 125 */ 126 idna = uidna_openUTS46(UIDNA_DEFAULT, &error);/* XXX check error */ 127 anl = uidna_nameToASCII_UTF8(idna, 128 name, strlen(name), 129 buf, sizeof(buf) - 1, 130 &info, 131 &error); 132 uidna_close(idna); 133 134 /* 135 * Paranoia: verify that the result passes valid_hostname(). A quick 136 * check shows that UTS46 ToASCII by default rejects inputs with labels 137 * that start or end in '-', with names or labels that are over-long, or 138 * "fake" A-labels, as required by UTS 46 section 4.1, but we rely on 139 * valid_hostname() on the output side just to be sure. 140 */ 141 if (U_SUCCESS(error) && info.errors == 0 && anl > 0) { 142 buf[anl] = 0; /* XXX */ 143 if (!valid_hostname(buf, DONT_GRIPE)) { 144 msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s", 145 myname, name, "malformed ASCII label(s)"); 146 return (0); 147 } 148 return (mystrndup(buf, anl)); 149 } else { 150 msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s", 151 myname, name, u_errorName(info.errors)); 152 return (0); 153 } 154 } 155 156 /* midna_domain_to_utf8_create - convert domain to UTF8 */ 157 158 static void *midna_domain_to_utf8_create(const char *name, void *unused_context) 159 { 160 static const char myname[] = "midna_domain_to_utf8_create"; 161 char buf[1024]; /* XXX */ 162 UErrorCode error = U_ZERO_ERROR; 163 UIDNAInfo info = UIDNA_INFO_INITIALIZER; 164 UIDNA *idna; 165 int anl; 166 167 /* 168 * Paranoia: do not expose uidna_*() to unfiltered network data. 169 */ 170 if (allascii(name) == 0 && valid_utf8_string(name, strlen(name)) == 0) { 171 msg_warn("%s: Problem translating domain \"%.100s\" to UTF-8 form: %s", 172 myname, name, "malformed UTF-8"); 173 return (0); 174 } 175 176 /* 177 * Perform the requested conversion. 178 */ 179 idna = uidna_openUTS46(UIDNA_DEFAULT, &error);/* XXX check error */ 180 anl = uidna_nameToUnicodeUTF8(idna, 181 name, strlen(name), 182 buf, sizeof(buf) - 1, 183 &info, 184 &error); 185 uidna_close(idna); 186 187 /* 188 * Paranoia: UTS46 toUTF8 by default accepts and produces an over-long 189 * name or a name that contains an over-long NR-LDH label (and perhaps 190 * other invalid forms that are not covered in UTS 46, section 4.1). We 191 * rely on midna_domain_to_ascii() to validate the output. 192 */ 193 if (U_SUCCESS(error) && info.errors == 0 && anl > 0) { 194 buf[anl] = 0; /* XXX */ 195 if (midna_domain_to_ascii(buf) == 0) 196 return (0); 197 return (mystrndup(buf, anl)); 198 } else { 199 msg_warn("%s: Problem translating domain \"%.100s\" to UTF8 form: %s", 200 myname, name, u_errorName(info.errors)); 201 return (0); 202 } 203 } 204 205 /* midna_domain_cache_free - cache element destructor */ 206 207 static void midna_domain_cache_free(void *value, void *unused_context) 208 { 209 if (value) 210 myfree(value); 211 } 212 213 /* midna_domain_to_ascii - convert name to ASCII */ 214 215 const char *midna_domain_to_ascii(const char *name) 216 { 217 static CTABLE *midna_domain_to_ascii_cache = 0; 218 219 if (midna_domain_to_ascii_cache == 0) 220 midna_domain_to_ascii_cache = ctable_create(midna_domain_cache_size, 221 midna_domain_to_ascii_create, 222 midna_domain_cache_free, 223 (void *) 0); 224 return (ctable_locate(midna_domain_to_ascii_cache, name)); 225 } 226 227 /* midna_domain_to_utf8 - convert name to UTF8 */ 228 229 const char *midna_domain_to_utf8(const char *name) 230 { 231 static CTABLE *midna_domain_to_utf8_cache = 0; 232 233 if (midna_domain_to_utf8_cache == 0) 234 midna_domain_to_utf8_cache = ctable_create(midna_domain_cache_size, 235 midna_domain_to_utf8_create, 236 midna_domain_cache_free, 237 (void *) 0); 238 return (ctable_locate(midna_domain_to_utf8_cache, name)); 239 } 240 241 /* midna_domain_suffix_to_ascii - convert .name to ASCII */ 242 243 const char *midna_domain_suffix_to_ascii(const char *suffix) 244 { 245 const char *cache_res; 246 247 /* 248 * If prepending x to .name causes the result to become too long, then 249 * the suffix is bad. 250 */ 251 if (midna_domain_buf == 0) 252 midna_domain_buf = vstring_alloc(100); 253 vstring_sprintf(midna_domain_buf, "x%s", suffix); 254 if ((cache_res = midna_domain_to_ascii(STR(midna_domain_buf))) == 0) 255 return (0); 256 else 257 return (cache_res + 1); 258 } 259 260 /* midna_domain_suffix_to_utf8 - convert .name to UTF8 */ 261 262 const char *midna_domain_suffix_to_utf8(const char *name) 263 { 264 const char *cache_res; 265 266 /* 267 * If prepending x to .name causes the result to become too long, then 268 * the suffix is bad. 269 */ 270 if (midna_domain_buf == 0) 271 midna_domain_buf = vstring_alloc(100); 272 vstring_sprintf(midna_domain_buf, "x%s", name); 273 if ((cache_res = midna_domain_to_utf8(STR(midna_domain_buf))) == 0) 274 return (0); 275 else 276 return (cache_res + 1); 277 } 278 279 #ifdef TEST 280 281 /* 282 * Test program - reads names from stdin, reports invalid names to stderr. 283 */ 284 #include <stdlib.h> 285 #include <locale.h> 286 287 #include <stringops.h> /* XXX util_utf8_enable */ 288 #include <vstring.h> 289 #include <vstream.h> 290 #include <vstring_vstream.h> 291 #include <msg_vstream.h> 292 293 int main(int argc, char **argv) 294 { 295 VSTRING *buffer = vstring_alloc(1); 296 const char *bp; 297 const char *ascii; 298 const char *utf8; 299 300 if (setlocale(LC_ALL, "C") == 0) 301 msg_fatal("setlocale(LC_ALL, C) failed: %m"); 302 303 msg_vstream_init(argv[0], VSTREAM_ERR); 304 /* msg_verbose = 1; */ 305 util_utf8_enable = 1; 306 307 while (vstring_fgets_nonl(buffer, VSTREAM_IN)) { 308 bp = STR(buffer); 309 msg_info("> %s", bp); 310 while (ISSPACE(*bp)) 311 bp++; 312 if (*bp == '#' || *bp == 0) 313 continue; 314 msg_info("unconditional conversions:"); 315 utf8 = midna_domain_to_utf8(bp); 316 msg_info("\"%s\" ->utf8 \"%s\"", bp, utf8 ? utf8 : "(error)"); 317 ascii = midna_domain_to_ascii(bp); 318 msg_info("\"%s\" ->ascii \"%s\"", bp, ascii ? ascii : "(error)"); 319 msg_info("conditional conversions:"); 320 if (!allascii(bp)) { 321 if (ascii != 0) { 322 utf8 = midna_domain_to_utf8(ascii); 323 msg_info("\"%s\" ->ascii \"%s\" ->utf8 \"%s\"", 324 bp, ascii, utf8 ? utf8 : "(error)"); 325 if (utf8 != 0) { 326 if (strcmp(utf8, bp) != 0) 327 msg_warn("\"%s\" != \"%s\"", bp, utf8); 328 } 329 } 330 } else { 331 if (utf8 != 0) { 332 ascii = midna_domain_to_ascii(utf8); 333 msg_info("\"%s\" ->utf8 \"%s\" ->ascii \"%s\"", 334 bp, utf8, ascii ? ascii : "(error)"); 335 if (ascii != 0) { 336 if (strcmp(ascii, bp) != 0) 337 msg_warn("\"%s\" != \"%s\"", bp, ascii); 338 } 339 } 340 } 341 } 342 exit(0); 343 } 344 345 #endif /* TEST */ 346 347 #endif /* NO_EAI */ 348