xref: /onnv-gate/usr/src/cmd/ssh/libssh/common/g11n.c (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  *
22*0Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
23*0Sstevel@tonic-gate  * Use is subject to license terms.
24*0Sstevel@tonic-gate  */
25*0Sstevel@tonic-gate 
26*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
27*0Sstevel@tonic-gate 
28*0Sstevel@tonic-gate #include <errno.h>
29*0Sstevel@tonic-gate #include <locale.h>
30*0Sstevel@tonic-gate #include <langinfo.h>
31*0Sstevel@tonic-gate #include <iconv.h>
32*0Sstevel@tonic-gate #include <ctype.h>
33*0Sstevel@tonic-gate #include <strings.h>
34*0Sstevel@tonic-gate #include <string.h>
35*0Sstevel@tonic-gate #include <stdio.h>
36*0Sstevel@tonic-gate #include <stdlib.h>
37*0Sstevel@tonic-gate #include "includes.h"
38*0Sstevel@tonic-gate #include "xmalloc.h"
39*0Sstevel@tonic-gate #include "xlist.h"
40*0Sstevel@tonic-gate 
41*0Sstevel@tonic-gate #ifdef MIN
42*0Sstevel@tonic-gate #undef MIN
43*0Sstevel@tonic-gate #endif /* MIN */
44*0Sstevel@tonic-gate 
45*0Sstevel@tonic-gate #define MIN(x, y)		    ((x) < (y) ? (x) : (y))
46*0Sstevel@tonic-gate 
47*0Sstevel@tonic-gate #define LOCALE_PATH "/usr/bin/locale"
48*0Sstevel@tonic-gate 
49*0Sstevel@tonic-gate #define LANGTAG_MAX 5 /* two-char country code, '-' and two-char region code */
50*0Sstevel@tonic-gate 
51*0Sstevel@tonic-gate static u_char * do_iconv(iconv_t cd, u_int *mul_ptr,
52*0Sstevel@tonic-gate 		       const void *buf, u_int len,
53*0Sstevel@tonic-gate 		       u_int *outlen, int *err,
54*0Sstevel@tonic-gate 		       u_char **err_str);
55*0Sstevel@tonic-gate 
56*0Sstevel@tonic-gate static int locale_cmp(const void *d1, const void *d2);
57*0Sstevel@tonic-gate static char *g11n_locale2langtag(char *locale);
58*0Sstevel@tonic-gate 
59*0Sstevel@tonic-gate u_int
60*0Sstevel@tonic-gate g11n_validate_ascii(const char *str, u_int len, u_char **error_str);
61*0Sstevel@tonic-gate 
62*0Sstevel@tonic-gate u_int
63*0Sstevel@tonic-gate g11n_validate_utf8(const u_char *str, u_int len, u_char **error_str);
64*0Sstevel@tonic-gate 
65*0Sstevel@tonic-gate static
66*0Sstevel@tonic-gate char *
67*0Sstevel@tonic-gate g11n_locale2langtag(char *locale)
68*0Sstevel@tonic-gate {
69*0Sstevel@tonic-gate     char *langtag;
70*0Sstevel@tonic-gate 
71*0Sstevel@tonic-gate     /* base cases */
72*0Sstevel@tonic-gate     if (!locale || !*locale) return NULL;
73*0Sstevel@tonic-gate 
74*0Sstevel@tonic-gate     if (strcmp(locale, "POSIX") == 0 ||
75*0Sstevel@tonic-gate 	strcmp(locale, "C") == 0) return "i-default";
76*0Sstevel@tonic-gate 
77*0Sstevel@tonic-gate     /* Punt for language codes which are not exactly 2 letters */
78*0Sstevel@tonic-gate     if (strlen(locale) < 2 ||
79*0Sstevel@tonic-gate 	!isalpha(locale[0]) ||
80*0Sstevel@tonic-gate 	!isalpha(locale[1]) ||
81*0Sstevel@tonic-gate 	(locale[2] != '\0' &&
82*0Sstevel@tonic-gate 	locale[2] != '_' &&
83*0Sstevel@tonic-gate 	locale[2] != '.' &&
84*0Sstevel@tonic-gate 	locale[2] != '@'))
85*0Sstevel@tonic-gate 	return NULL;
86*0Sstevel@tonic-gate 
87*0Sstevel@tonic-gate 
88*0Sstevel@tonic-gate     /* We have a primary language sub-tag */
89*0Sstevel@tonic-gate     langtag = (char *) xmalloc(LANGTAG_MAX + 1);
90*0Sstevel@tonic-gate 
91*0Sstevel@tonic-gate     strncpy(langtag, locale, 2);
92*0Sstevel@tonic-gate     langtag[2] = '\0';
93*0Sstevel@tonic-gate 
94*0Sstevel@tonic-gate     /* Do we have country sub-tag? */
95*0Sstevel@tonic-gate     if (locale[2] == '_') {
96*0Sstevel@tonic-gate 	if (strlen(locale) < 5 ||
97*0Sstevel@tonic-gate 	    !isalpha(locale[3]) ||
98*0Sstevel@tonic-gate 	    !isalpha(locale[4]) ||
99*0Sstevel@tonic-gate 	    (locale[5] != '\0' && (locale[5] != '.' && locale[5] != '@'))) {
100*0Sstevel@tonic-gate 	    return langtag;
101*0Sstevel@tonic-gate 	}
102*0Sstevel@tonic-gate 
103*0Sstevel@tonic-gate 	/* yes, we do */
104*0Sstevel@tonic-gate 	/* if (snprintf(langtag, 6, "%s-%s,%s", lang_subtag,
105*0Sstevel@tonic-gate 		     country_subtag, langtag) == 8) */
106*0Sstevel@tonic-gate 	if (snprintf(langtag, 6, "%.*s-%.*s", 2, locale,
107*0Sstevel@tonic-gate 		     2, locale+3) == 5)
108*0Sstevel@tonic-gate 	    return langtag;
109*0Sstevel@tonic-gate     }
110*0Sstevel@tonic-gate 
111*0Sstevel@tonic-gate     /* In all other cases we just use the primary language sub-tag */
112*0Sstevel@tonic-gate     return langtag;
113*0Sstevel@tonic-gate }
114*0Sstevel@tonic-gate 
115*0Sstevel@tonic-gate u_int
116*0Sstevel@tonic-gate g11n_langtag_is_default(char *langtag)
117*0Sstevel@tonic-gate {
118*0Sstevel@tonic-gate     return (strcmp(langtag, "i-default") == 0);
119*0Sstevel@tonic-gate }
120*0Sstevel@tonic-gate 
121*0Sstevel@tonic-gate /*
122*0Sstevel@tonic-gate  * This lang tag / locale matching function works only for two-character
123*0Sstevel@tonic-gate  * language primary sub-tags and two-character country sub-tags.
124*0Sstevel@tonic-gate  */
125*0Sstevel@tonic-gate u_int
126*0Sstevel@tonic-gate g11n_langtag_matches_locale(char *langtag, char *locale)
127*0Sstevel@tonic-gate {
128*0Sstevel@tonic-gate     /* Match "i-default" to the process' current locale if possible */
129*0Sstevel@tonic-gate     if (g11n_langtag_is_default(langtag)) {
130*0Sstevel@tonic-gate 	if (strcasecmp(locale, "POSIX") == 0 ||
131*0Sstevel@tonic-gate 	    strcasecmp(locale, "C") == 0)
132*0Sstevel@tonic-gate 	    return 1;
133*0Sstevel@tonic-gate 	else
134*0Sstevel@tonic-gate 	    return 0;
135*0Sstevel@tonic-gate     }
136*0Sstevel@tonic-gate 
137*0Sstevel@tonic-gate     /* locale must be at least 2 chars long and the lang part must be
138*0Sstevel@tonic-gate      * exactly two characters */
139*0Sstevel@tonic-gate     if (strlen(locale) < 2 ||
140*0Sstevel@tonic-gate 	(!isalpha(locale[0]) || !isalpha(locale[1]) ||
141*0Sstevel@tonic-gate 	(locale[2] != '\0' && locale[2] != '_' && locale[2] != '.' && locale[2] != '@')))
142*0Sstevel@tonic-gate 	return 0;
143*0Sstevel@tonic-gate 
144*0Sstevel@tonic-gate     /* same thing with the langtag */
145*0Sstevel@tonic-gate     if (strlen(langtag) < 2 ||
146*0Sstevel@tonic-gate 	(!isalpha(langtag[0]) || !isalpha(langtag[1]) ||
147*0Sstevel@tonic-gate 	(langtag[2] != '\0' && langtag[2] != '-')))
148*0Sstevel@tonic-gate 	return 0;
149*0Sstevel@tonic-gate 
150*0Sstevel@tonic-gate     /* primary language sub-tag and the locale's language part must match */
151*0Sstevel@tonic-gate     if (strncasecmp(langtag, locale, 2) != 0)
152*0Sstevel@tonic-gate 	return 0;
153*0Sstevel@tonic-gate 
154*0Sstevel@tonic-gate     /* primary language sub-tag and the locale's language match, now
155*0Sstevel@tonic-gate      * fuzzy check country part */
156*0Sstevel@tonic-gate 
157*0Sstevel@tonic-gate     /* neither langtag nor locale have more than one component */
158*0Sstevel@tonic-gate     if (langtag[2] == '\0' &&
159*0Sstevel@tonic-gate         (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@'))
160*0Sstevel@tonic-gate 	return 2;
161*0Sstevel@tonic-gate 
162*0Sstevel@tonic-gate     /* langtag has only one sub-tag... */
163*0Sstevel@tonic-gate     if (langtag[2] == '\0')
164*0Sstevel@tonic-gate 	return 1;
165*0Sstevel@tonic-gate 
166*0Sstevel@tonic-gate     /* locale has no country code... */
167*0Sstevel@tonic-gate     if (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@')
168*0Sstevel@tonic-gate 	return 1;
169*0Sstevel@tonic-gate 
170*0Sstevel@tonic-gate     /* langtag has more than one subtag and the locale has a country code */
171*0Sstevel@tonic-gate 
172*0Sstevel@tonic-gate     /* ignore second subtag if not two chars */
173*0Sstevel@tonic-gate     if (strlen(langtag) < 5)
174*0Sstevel@tonic-gate 	return 1;
175*0Sstevel@tonic-gate 
176*0Sstevel@tonic-gate     if (!isalpha(langtag[3]) || !isalpha(langtag[4]) ||
177*0Sstevel@tonic-gate 	(langtag[5] != '\0' && langtag[5] != '-'))
178*0Sstevel@tonic-gate 	return 1;
179*0Sstevel@tonic-gate 
180*0Sstevel@tonic-gate     /* ignore rest of locale if there is no two-character country part */
181*0Sstevel@tonic-gate     if (strlen(locale) < 5)
182*0Sstevel@tonic-gate 	return 1;
183*0Sstevel@tonic-gate 
184*0Sstevel@tonic-gate     if (locale[2] != '_' || !isalpha(locale[3]) || !isalpha(locale[4]) ||
185*0Sstevel@tonic-gate 	(locale[5] != '\0' && locale[5] != '.' && locale[5] != '@'))
186*0Sstevel@tonic-gate 	return 1;
187*0Sstevel@tonic-gate 
188*0Sstevel@tonic-gate     /* if the country part matches, return 2 */
189*0Sstevel@tonic-gate     if (strncasecmp(&langtag[3], &locale[3], 2) == 0)
190*0Sstevel@tonic-gate 	return 2;
191*0Sstevel@tonic-gate 
192*0Sstevel@tonic-gate     return 1;
193*0Sstevel@tonic-gate }
194*0Sstevel@tonic-gate 
195*0Sstevel@tonic-gate char *
196*0Sstevel@tonic-gate g11n_getlocale()
197*0Sstevel@tonic-gate {
198*0Sstevel@tonic-gate     /* We have one text domain - always set it */
199*0Sstevel@tonic-gate     (void) textdomain(TEXT_DOMAIN);
200*0Sstevel@tonic-gate 
201*0Sstevel@tonic-gate     /* If the locale is not set, set it from the env vars */
202*0Sstevel@tonic-gate     if (!setlocale(LC_CTYPE, NULL))
203*0Sstevel@tonic-gate 	(void) setlocale(LC_CTYPE, "");
204*0Sstevel@tonic-gate 
205*0Sstevel@tonic-gate     return setlocale(LC_CTYPE, NULL);
206*0Sstevel@tonic-gate }
207*0Sstevel@tonic-gate 
208*0Sstevel@tonic-gate void
209*0Sstevel@tonic-gate g11n_setlocale(int category, const char *locale)
210*0Sstevel@tonic-gate {
211*0Sstevel@tonic-gate     char *curr;
212*0Sstevel@tonic-gate 
213*0Sstevel@tonic-gate     /* We have one text domain - always set it */
214*0Sstevel@tonic-gate     (void) textdomain(TEXT_DOMAIN);
215*0Sstevel@tonic-gate 
216*0Sstevel@tonic-gate     if (!locale)
217*0Sstevel@tonic-gate 	return;
218*0Sstevel@tonic-gate 
219*0Sstevel@tonic-gate     if (*locale && ((curr = setlocale(category, NULL))) &&
220*0Sstevel@tonic-gate 	strcmp(curr, locale) == 0)
221*0Sstevel@tonic-gate 	return;
222*0Sstevel@tonic-gate 
223*0Sstevel@tonic-gate     /*
224*0Sstevel@tonic-gate      * If <category> is bogus, setlocale() will do nothing and will
225*0Sstevel@tonic-gate      * return NULL.
226*0Sstevel@tonic-gate      */
227*0Sstevel@tonic-gate     if (!setlocale(category, locale))
228*0Sstevel@tonic-gate 	return;
229*0Sstevel@tonic-gate 
230*0Sstevel@tonic-gate     /* If setting the locale from the environment, then we're done */
231*0Sstevel@tonic-gate     if (!*locale)
232*0Sstevel@tonic-gate 	return;
233*0Sstevel@tonic-gate 
234*0Sstevel@tonic-gate     /*
235*0Sstevel@tonic-gate      * If setting a locale from the <locale> argument, then set the
236*0Sstevel@tonic-gate      * related env vars.
237*0Sstevel@tonic-gate      */
238*0Sstevel@tonic-gate     switch (category) {
239*0Sstevel@tonic-gate     case LC_ALL:
240*0Sstevel@tonic-gate 	setenv("LANG", locale, 1);
241*0Sstevel@tonic-gate 	setenv("LC_ALL", locale, 1);
242*0Sstevel@tonic-gate 	break;
243*0Sstevel@tonic-gate     case LC_CTYPE:
244*0Sstevel@tonic-gate 	setenv("LC_CTYPE", locale, 1);
245*0Sstevel@tonic-gate 	break;
246*0Sstevel@tonic-gate     case LC_NUMERIC:
247*0Sstevel@tonic-gate 	setenv("LC_NUMERIC", locale, 1);
248*0Sstevel@tonic-gate 	break;
249*0Sstevel@tonic-gate     case LC_TIME:
250*0Sstevel@tonic-gate 	setenv("LC_TIME", locale, 1);
251*0Sstevel@tonic-gate 	break;
252*0Sstevel@tonic-gate     case LC_COLLATE:
253*0Sstevel@tonic-gate 	setenv("LC_COLLATE", locale, 1);
254*0Sstevel@tonic-gate 	break;
255*0Sstevel@tonic-gate     case LC_MONETARY:
256*0Sstevel@tonic-gate 	setenv("LC_MONETARY", locale, 1);
257*0Sstevel@tonic-gate 	break;
258*0Sstevel@tonic-gate     case LC_MESSAGES:
259*0Sstevel@tonic-gate 	setenv("LC_MESSAGES", locale, 1);
260*0Sstevel@tonic-gate 	break;
261*0Sstevel@tonic-gate     }
262*0Sstevel@tonic-gate     return;
263*0Sstevel@tonic-gate }
264*0Sstevel@tonic-gate 
265*0Sstevel@tonic-gate char **
266*0Sstevel@tonic-gate g11n_getlocales()
267*0Sstevel@tonic-gate {
268*0Sstevel@tonic-gate     FILE *locale_out;
269*0Sstevel@tonic-gate     u_int n_elems, list_size, long_line = 0;
270*0Sstevel@tonic-gate     char **list;
271*0Sstevel@tonic-gate     char locale[64];	/* 64 bytes is plenty for locale names */
272*0Sstevel@tonic-gate 
273*0Sstevel@tonic-gate     if ((locale_out = popen(LOCALE_PATH " -a", "r")) == NULL) {
274*0Sstevel@tonic-gate 	return NULL;
275*0Sstevel@tonic-gate     }
276*0Sstevel@tonic-gate 
277*0Sstevel@tonic-gate     /*
278*0Sstevel@tonic-gate      * Start with enough room for 65 locales - that's a lot fewer than
279*0Sstevel@tonic-gate      * all the locales available for installation, but a lot more than
280*0Sstevel@tonic-gate      * what most users will need and install
281*0Sstevel@tonic-gate      */
282*0Sstevel@tonic-gate     n_elems=0;
283*0Sstevel@tonic-gate     list_size=192;
284*0Sstevel@tonic-gate     list = (char **) xmalloc(sizeof(char *) * (list_size + 1));
285*0Sstevel@tonic-gate     memset(list, 0, sizeof(char *) * (list_size + 1));
286*0Sstevel@tonic-gate 
287*0Sstevel@tonic-gate     while (fgets(locale, sizeof(locale), locale_out)) {
288*0Sstevel@tonic-gate 	/* skip long locale names (if any) */
289*0Sstevel@tonic-gate 	if (!strchr(locale, '\n')) {
290*0Sstevel@tonic-gate 	    long_line = 1;
291*0Sstevel@tonic-gate 	    continue;
292*0Sstevel@tonic-gate 	}
293*0Sstevel@tonic-gate 	else if (long_line) {
294*0Sstevel@tonic-gate 	    long_line = 0;
295*0Sstevel@tonic-gate 	    continue;
296*0Sstevel@tonic-gate 	}
297*0Sstevel@tonic-gate 	if (strncmp(locale, "iso_8859", 8) == 0)
298*0Sstevel@tonic-gate 	    continue;		    /* ignore locale names like "iso_8859-1" */
299*0Sstevel@tonic-gate 
300*0Sstevel@tonic-gate 	if (n_elems == list_size) {
301*0Sstevel@tonic-gate 	    list_size *= 2;
302*0Sstevel@tonic-gate 	    list = (char **) xrealloc((void *) list, (list_size + 1) * sizeof(char *));
303*0Sstevel@tonic-gate 	    memset(&list[n_elems+1], 0, sizeof(char *) * (list_size - n_elems + 1));
304*0Sstevel@tonic-gate 	}
305*0Sstevel@tonic-gate 
306*0Sstevel@tonic-gate 	*(strchr(locale, '\n')) = '\0';      /* remove the trailing \n */
307*0Sstevel@tonic-gate 
308*0Sstevel@tonic-gate 	list[n_elems++] = xstrdup(locale);
309*0Sstevel@tonic-gate     }
310*0Sstevel@tonic-gate     list[n_elems] = NULL;
311*0Sstevel@tonic-gate     (void) pclose(locale_out);
312*0Sstevel@tonic-gate 
313*0Sstevel@tonic-gate     qsort(list, n_elems - 1, sizeof(char *), locale_cmp);
314*0Sstevel@tonic-gate     return list;
315*0Sstevel@tonic-gate }
316*0Sstevel@tonic-gate 
317*0Sstevel@tonic-gate char *
318*0Sstevel@tonic-gate g11n_getlangs()
319*0Sstevel@tonic-gate {
320*0Sstevel@tonic-gate     char *locale;
321*0Sstevel@tonic-gate 
322*0Sstevel@tonic-gate     if (getenv("SSH_LANGS"))
323*0Sstevel@tonic-gate 	return xstrdup(getenv("SSH_LANGS"));
324*0Sstevel@tonic-gate 
325*0Sstevel@tonic-gate     locale = g11n_getlocale();
326*0Sstevel@tonic-gate 
327*0Sstevel@tonic-gate     if (!locale || !*locale)
328*0Sstevel@tonic-gate 	return xstrdup("i-default");
329*0Sstevel@tonic-gate 
330*0Sstevel@tonic-gate     return g11n_locale2langtag(locale);
331*0Sstevel@tonic-gate }
332*0Sstevel@tonic-gate 
333*0Sstevel@tonic-gate char *
334*0Sstevel@tonic-gate g11n_locales2langs(char **locale_set)
335*0Sstevel@tonic-gate {
336*0Sstevel@tonic-gate     char **p, **r, **q;
337*0Sstevel@tonic-gate     char *langtag;
338*0Sstevel@tonic-gate     int locales, skip;
339*0Sstevel@tonic-gate 
340*0Sstevel@tonic-gate     for (locales = 0, p = locale_set ; p && *p ; p++)
341*0Sstevel@tonic-gate 	locales++;
342*0Sstevel@tonic-gate 
343*0Sstevel@tonic-gate     r = (char **) xmalloc((locales + 1) * sizeof(char *));
344*0Sstevel@tonic-gate     memset(r, 0, (locales + 1) * sizeof(char *));
345*0Sstevel@tonic-gate 
346*0Sstevel@tonic-gate     for (p = locale_set ; p && *p && ((p - locale_set) <= locales); p++) {
347*0Sstevel@tonic-gate 	skip = 0;
348*0Sstevel@tonic-gate 	if ((langtag = g11n_locale2langtag(*p)) == NULL)
349*0Sstevel@tonic-gate 	    continue;
350*0Sstevel@tonic-gate 	for (q = r ; (q - r) < locales ; q++) {
351*0Sstevel@tonic-gate 	    if (!*q) break;
352*0Sstevel@tonic-gate 	    if (*q && strcmp(*q, langtag) == 0)
353*0Sstevel@tonic-gate 		skip = 1;
354*0Sstevel@tonic-gate 	}
355*0Sstevel@tonic-gate 	if (!skip)
356*0Sstevel@tonic-gate 	    *(q++) = langtag;
357*0Sstevel@tonic-gate 	*q = NULL;
358*0Sstevel@tonic-gate     }
359*0Sstevel@tonic-gate     return xjoin(r, ',');
360*0Sstevel@tonic-gate }
361*0Sstevel@tonic-gate 
362*0Sstevel@tonic-gate static
363*0Sstevel@tonic-gate int
364*0Sstevel@tonic-gate sortcmp(const void *d1, const void *d2)
365*0Sstevel@tonic-gate {
366*0Sstevel@tonic-gate     char *s1 = *(char **)d1;
367*0Sstevel@tonic-gate     char *s2 = *(char **)d2;
368*0Sstevel@tonic-gate 
369*0Sstevel@tonic-gate     return strcmp(s1, s2);
370*0Sstevel@tonic-gate }
371*0Sstevel@tonic-gate 
372*0Sstevel@tonic-gate int
373*0Sstevel@tonic-gate g11n_langtag_match(char *langtag1, char *langtag2)
374*0Sstevel@tonic-gate {
375*0Sstevel@tonic-gate     int len1, len2;
376*0Sstevel@tonic-gate     char c1, c2;
377*0Sstevel@tonic-gate 
378*0Sstevel@tonic-gate     len1 = (strchr(langtag1, '-')) ?
379*0Sstevel@tonic-gate 		(strchr(langtag1, '-') - langtag1)
380*0Sstevel@tonic-gate 		: strlen(langtag1);
381*0Sstevel@tonic-gate 
382*0Sstevel@tonic-gate     len2 = (strchr(langtag2, '-')) ?
383*0Sstevel@tonic-gate 		(strchr(langtag2, '-') - langtag2)
384*0Sstevel@tonic-gate 		: strlen(langtag2);
385*0Sstevel@tonic-gate 
386*0Sstevel@tonic-gate     /* no match */
387*0Sstevel@tonic-gate     if (len1 != len2 ||
388*0Sstevel@tonic-gate 	strncmp(langtag1, langtag2, len1) != 0)
389*0Sstevel@tonic-gate 	return 0;
390*0Sstevel@tonic-gate 
391*0Sstevel@tonic-gate     c1 = *(langtag1 + len1);
392*0Sstevel@tonic-gate     c2 = *(langtag2 + len2);
393*0Sstevel@tonic-gate 
394*0Sstevel@tonic-gate     /* no country sub-tags - exact match */
395*0Sstevel@tonic-gate     if (c1 == '\0' && c2 == '\0')
396*0Sstevel@tonic-gate 	return 2;
397*0Sstevel@tonic-gate 
398*0Sstevel@tonic-gate     /* one langtag has a country sub-tag, the other doesn't */
399*0Sstevel@tonic-gate     if (c1 == '\0' || c2 == '\0')
400*0Sstevel@tonic-gate 	return 1;
401*0Sstevel@tonic-gate 
402*0Sstevel@tonic-gate     /* can't happen - both langtags have a country sub-tag */
403*0Sstevel@tonic-gate     if (c1 != '-' || c2 != '-')
404*0Sstevel@tonic-gate 	return 1;
405*0Sstevel@tonic-gate 
406*0Sstevel@tonic-gate     /* compare country subtags */
407*0Sstevel@tonic-gate     langtag1 = langtag1 + len1 + 1;
408*0Sstevel@tonic-gate     langtag2 = langtag2 + len2 + 1;
409*0Sstevel@tonic-gate 
410*0Sstevel@tonic-gate     len1 = (strchr(langtag1, '-')) ?
411*0Sstevel@tonic-gate 		(strchr(langtag1, '-') - langtag1)
412*0Sstevel@tonic-gate 		: strlen(langtag1);
413*0Sstevel@tonic-gate 
414*0Sstevel@tonic-gate     len2 = (strchr(langtag2, '-')) ?
415*0Sstevel@tonic-gate 		(strchr(langtag2, '-') - langtag2)
416*0Sstevel@tonic-gate 		: strlen(langtag2);
417*0Sstevel@tonic-gate 
418*0Sstevel@tonic-gate     if (len1 != len2 ||
419*0Sstevel@tonic-gate 	strncmp(langtag1, langtag2, len1) != 0)
420*0Sstevel@tonic-gate 	return 1;
421*0Sstevel@tonic-gate 
422*0Sstevel@tonic-gate     /* country tags matched - exact match */
423*0Sstevel@tonic-gate     return 2;
424*0Sstevel@tonic-gate }
425*0Sstevel@tonic-gate 
426*0Sstevel@tonic-gate char *
427*0Sstevel@tonic-gate g11n_langtag_set_intersect(char *set1, char *set2)
428*0Sstevel@tonic-gate {
429*0Sstevel@tonic-gate     char **list1, **list2, **list3, **p, **q, **r;
430*0Sstevel@tonic-gate     char *set3, *lang_subtag;
431*0Sstevel@tonic-gate     u_int n1, n2, n3;
432*0Sstevel@tonic-gate     u_int do_append;
433*0Sstevel@tonic-gate 
434*0Sstevel@tonic-gate     list1 = xsplit(set1, ',');
435*0Sstevel@tonic-gate     list2 = xsplit(set2, ',');
436*0Sstevel@tonic-gate     for (n1 = 0, p = list1 ; p && *p ; p++, n1++) ;
437*0Sstevel@tonic-gate     for (n2 = 0, p = list2 ; p && *p ; p++, n2++) ;
438*0Sstevel@tonic-gate 
439*0Sstevel@tonic-gate     list3 = (char **) xmalloc(sizeof(char *) * (n1 + n2 + 1));
440*0Sstevel@tonic-gate     *list3 = NULL;
441*0Sstevel@tonic-gate 
442*0Sstevel@tonic-gate     /* we must not sort the user langtags - sorting or not the server's
443*0Sstevel@tonic-gate      * should not affect the outcome
444*0Sstevel@tonic-gate      */
445*0Sstevel@tonic-gate     qsort(list2, n2, sizeof(char *), sortcmp);
446*0Sstevel@tonic-gate 
447*0Sstevel@tonic-gate     for (n3 = 0, p = list1 ; p && *p ; p++) {
448*0Sstevel@tonic-gate 	do_append = 0;
449*0Sstevel@tonic-gate 	for (q = list2 ; q && *q ; q++) {
450*0Sstevel@tonic-gate 	    if (g11n_langtag_match(*p, *q) != 2) continue;
451*0Sstevel@tonic-gate 	    /* append element */
452*0Sstevel@tonic-gate 	    for (r = list3; (r - list3) <= (n1 + n2) ; r++) {
453*0Sstevel@tonic-gate 		do_append = 1;
454*0Sstevel@tonic-gate 		if (!*r) break;
455*0Sstevel@tonic-gate 		if (strcmp(*p, *r) == 0) {
456*0Sstevel@tonic-gate 		    do_append = 0;
457*0Sstevel@tonic-gate 		    break;
458*0Sstevel@tonic-gate 		}
459*0Sstevel@tonic-gate 	    }
460*0Sstevel@tonic-gate 	    if (do_append && n3 <= (n1 + n2)) {
461*0Sstevel@tonic-gate 		list3[n3++] = xstrdup(*p);
462*0Sstevel@tonic-gate 		list3[n3] = NULL;
463*0Sstevel@tonic-gate 	    }
464*0Sstevel@tonic-gate 	}
465*0Sstevel@tonic-gate     }
466*0Sstevel@tonic-gate 
467*0Sstevel@tonic-gate     for (p = list1 ; p && *p ; p++) {
468*0Sstevel@tonic-gate 	do_append = 0;
469*0Sstevel@tonic-gate 	for (q = list2 ; q && *q ; q++) {
470*0Sstevel@tonic-gate 	    if (g11n_langtag_match(*p, *q) != 1) continue;
471*0Sstevel@tonic-gate 	    /* append element */
472*0Sstevel@tonic-gate 	    lang_subtag = xstrdup(*p);
473*0Sstevel@tonic-gate 	    if (strchr(lang_subtag, '-'))
474*0Sstevel@tonic-gate 		*(strchr(lang_subtag, '-')) = '\0';
475*0Sstevel@tonic-gate 	    for (r = list3; (r - list3) <= (n1 + n2) ; r++) {
476*0Sstevel@tonic-gate 		do_append = 1;
477*0Sstevel@tonic-gate 		if (!*r) break;
478*0Sstevel@tonic-gate 		if (strcmp(lang_subtag, *r) == 0) {
479*0Sstevel@tonic-gate 		    do_append = 0;
480*0Sstevel@tonic-gate 		    break;
481*0Sstevel@tonic-gate 		}
482*0Sstevel@tonic-gate 	    }
483*0Sstevel@tonic-gate 	    if (do_append && n3 <= (n1 + n2)) {
484*0Sstevel@tonic-gate 		list3[n3++] = lang_subtag;
485*0Sstevel@tonic-gate 		list3[n3] = NULL;
486*0Sstevel@tonic-gate 	    }
487*0Sstevel@tonic-gate 	    else
488*0Sstevel@tonic-gate 		xfree(lang_subtag);
489*0Sstevel@tonic-gate 	}
490*0Sstevel@tonic-gate     }
491*0Sstevel@tonic-gate 
492*0Sstevel@tonic-gate     set3 = xjoin(list3, ',');
493*0Sstevel@tonic-gate     xfree_split_list(list1);
494*0Sstevel@tonic-gate     xfree_split_list(list2);
495*0Sstevel@tonic-gate     xfree_split_list(list3);
496*0Sstevel@tonic-gate 
497*0Sstevel@tonic-gate     return set3;
498*0Sstevel@tonic-gate }
499*0Sstevel@tonic-gate 
500*0Sstevel@tonic-gate char *
501*0Sstevel@tonic-gate g11n_clnt_langtag_negotiate(char *clnt_langtags, char *srvr_langtags)
502*0Sstevel@tonic-gate {
503*0Sstevel@tonic-gate     char *list, *result;
504*0Sstevel@tonic-gate     char **xlist;
505*0Sstevel@tonic-gate 
506*0Sstevel@tonic-gate     /* g11n_langtag_set_intersect uses xmalloc - should not return NULL */
507*0Sstevel@tonic-gate     list = g11n_langtag_set_intersect(clnt_langtags, srvr_langtags);
508*0Sstevel@tonic-gate 
509*0Sstevel@tonic-gate     if (!list)
510*0Sstevel@tonic-gate 	    return NULL;
511*0Sstevel@tonic-gate 
512*0Sstevel@tonic-gate     xlist = xsplit(list, ',');
513*0Sstevel@tonic-gate 
514*0Sstevel@tonic-gate     xfree(list);
515*0Sstevel@tonic-gate 
516*0Sstevel@tonic-gate     if (!xlist || !*xlist)
517*0Sstevel@tonic-gate 	    return NULL;
518*0Sstevel@tonic-gate 
519*0Sstevel@tonic-gate     result = xstrdup(*xlist);
520*0Sstevel@tonic-gate 
521*0Sstevel@tonic-gate     xfree_split_list(xlist);
522*0Sstevel@tonic-gate 
523*0Sstevel@tonic-gate     return result;
524*0Sstevel@tonic-gate }
525*0Sstevel@tonic-gate 
526*0Sstevel@tonic-gate /*
527*0Sstevel@tonic-gate  * Compare locales, preferring UTF-8 codesets to others, otherwise doing
528*0Sstevel@tonic-gate  * a stright strcmp()
529*0Sstevel@tonic-gate  */
530*0Sstevel@tonic-gate static
531*0Sstevel@tonic-gate int
532*0Sstevel@tonic-gate locale_cmp(const void *d1, const void *d2)
533*0Sstevel@tonic-gate {
534*0Sstevel@tonic-gate     char *dot_ptr;
535*0Sstevel@tonic-gate     char *s1 = *(char **)d1;
536*0Sstevel@tonic-gate     char *s2 = *(char **)d2;
537*0Sstevel@tonic-gate     int s1_is_utf8 = 0;
538*0Sstevel@tonic-gate     int s2_is_utf8 = 0;
539*0Sstevel@tonic-gate 
540*0Sstevel@tonic-gate     /* check if s1 is a UTF-8 locale */
541*0Sstevel@tonic-gate     if (((dot_ptr = strchr((char *) s1, '.')) != NULL) && (*dot_ptr != '\0') &&
542*0Sstevel@tonic-gate 	(strncmp(dot_ptr+1, "UTF-8", 5) == 0) &&
543*0Sstevel@tonic-gate 	(*(dot_ptr+6) == '\0' || *(dot_ptr+6) == '@')) {
544*0Sstevel@tonic-gate 	s1_is_utf8++;
545*0Sstevel@tonic-gate     }
546*0Sstevel@tonic-gate     /* check if s2 is a UTF-8 locale */
547*0Sstevel@tonic-gate     if (((dot_ptr = strchr((char *) s2, '.')) != NULL) && (*dot_ptr != '\0') &&
548*0Sstevel@tonic-gate 	(strncmp(dot_ptr+1, "UTF-8", 5) == 0) &&
549*0Sstevel@tonic-gate 	(*(dot_ptr+6) == '\0' || *(dot_ptr+6) == '@')) {
550*0Sstevel@tonic-gate 	s2_is_utf8++;
551*0Sstevel@tonic-gate     }
552*0Sstevel@tonic-gate 
553*0Sstevel@tonic-gate     /* prefer UTF-8 locales */
554*0Sstevel@tonic-gate     if (s1_is_utf8 && !s2_is_utf8)
555*0Sstevel@tonic-gate 	return -1;
556*0Sstevel@tonic-gate 
557*0Sstevel@tonic-gate     if (s2_is_utf8 && !s1_is_utf8)
558*0Sstevel@tonic-gate 	return 1;
559*0Sstevel@tonic-gate 
560*0Sstevel@tonic-gate     /* prefer any locale over the default locales */
561*0Sstevel@tonic-gate     if (strcmp(s1, "C") == 0 ||
562*0Sstevel@tonic-gate 	strcmp(s1, "POSIX") == 0 ||
563*0Sstevel@tonic-gate 	strcmp(s1, "common") == 0)
564*0Sstevel@tonic-gate 	if (strcmp(s2, "C") != 0 &&
565*0Sstevel@tonic-gate 	    strcmp(s2, "POSIX") != 0 &&
566*0Sstevel@tonic-gate 	    strcmp(s2, "common") != 0)
567*0Sstevel@tonic-gate 	    return 1;
568*0Sstevel@tonic-gate 
569*0Sstevel@tonic-gate     if (strcmp(s2, "C") == 0 ||
570*0Sstevel@tonic-gate 	strcmp(s2, "POSIX") == 0 ||
571*0Sstevel@tonic-gate 	strcmp(s2, "common") == 0)
572*0Sstevel@tonic-gate 	if (strcmp(s1, "C") != 0 &&
573*0Sstevel@tonic-gate 	    strcmp(s1, "POSIX") != 0 &&
574*0Sstevel@tonic-gate 	    strcmp(s1, "common") != 0)
575*0Sstevel@tonic-gate 	    return -1;
576*0Sstevel@tonic-gate 
577*0Sstevel@tonic-gate     return strcmp(s1, s2);
578*0Sstevel@tonic-gate }
579*0Sstevel@tonic-gate 
580*0Sstevel@tonic-gate 
581*0Sstevel@tonic-gate char **
582*0Sstevel@tonic-gate g11n_langtag_set_locale_set_intersect(char *langtag_set,
583*0Sstevel@tonic-gate 				      char **locale_set)
584*0Sstevel@tonic-gate {
585*0Sstevel@tonic-gate     char **langtag_list, **result, **p, **q, **r;
586*0Sstevel@tonic-gate     char *s;
587*0Sstevel@tonic-gate     u_int do_append, n_langtags, n_locales, n_results, max_results;
588*0Sstevel@tonic-gate 
589*0Sstevel@tonic-gate     /* Count lang tags and locales */
590*0Sstevel@tonic-gate     for (n_locales = 0, p = locale_set ; p && *p ; p++) n_locales++;
591*0Sstevel@tonic-gate     n_langtags = ((s = langtag_set) != NULL && *s && *s != ',') ? 1 : 0;
592*0Sstevel@tonic-gate     for ( ; s = strchr(s, ',') ; s++, n_langtags++) ;
593*0Sstevel@tonic-gate     /*
594*0Sstevel@tonic-gate     while ((s = strchr(s, ','))) {
595*0Sstevel@tonic-gate 	n_langtags++;
596*0Sstevel@tonic-gate 	s++;
597*0Sstevel@tonic-gate     }
598*0Sstevel@tonic-gate      */
599*0Sstevel@tonic-gate 
600*0Sstevel@tonic-gate     qsort(locale_set, n_locales, sizeof(char *), locale_cmp);
601*0Sstevel@tonic-gate 
602*0Sstevel@tonic-gate     langtag_list = xsplit(langtag_set, ',');
603*0Sstevel@tonic-gate     for ( n_langtags = 0, p = langtag_list ; p && *p ; p++, n_langtags++);
604*0Sstevel@tonic-gate 
605*0Sstevel@tonic-gate     max_results = MIN(n_locales, n_langtags) * 2;
606*0Sstevel@tonic-gate     result = (char **) xmalloc(sizeof(char *) * (max_results + 1));
607*0Sstevel@tonic-gate     *result = NULL;
608*0Sstevel@tonic-gate     n_results = 0;
609*0Sstevel@tonic-gate 
610*0Sstevel@tonic-gate     /* More specific matches first */
611*0Sstevel@tonic-gate     for (p = langtag_list ; p && *p ; p++) {
612*0Sstevel@tonic-gate 	do_append = 0;
613*0Sstevel@tonic-gate 	for (q = locale_set ; q && *q ; q++) {
614*0Sstevel@tonic-gate 	    if (g11n_langtag_matches_locale(*p, *q) == 2) {
615*0Sstevel@tonic-gate 		do_append = 1;
616*0Sstevel@tonic-gate 		for (r = result ; (r - result) <= MIN(n_locales, n_langtags) ; r++) {
617*0Sstevel@tonic-gate 		    if (!*r) break;
618*0Sstevel@tonic-gate 		    if (strcmp(*q, *r) == 0) {
619*0Sstevel@tonic-gate 			do_append = 0;
620*0Sstevel@tonic-gate 			break;
621*0Sstevel@tonic-gate 		    }
622*0Sstevel@tonic-gate 		}
623*0Sstevel@tonic-gate 		if (do_append && n_results < max_results) {
624*0Sstevel@tonic-gate 		    result[n_results++] = xstrdup(*q);
625*0Sstevel@tonic-gate 		    result[n_results] = NULL;
626*0Sstevel@tonic-gate 		}
627*0Sstevel@tonic-gate 		break;
628*0Sstevel@tonic-gate 	    }
629*0Sstevel@tonic-gate 	}
630*0Sstevel@tonic-gate     }
631*0Sstevel@tonic-gate 
632*0Sstevel@tonic-gate     for (p = langtag_list ; p && *p ; p++) {
633*0Sstevel@tonic-gate 	do_append = 0;
634*0Sstevel@tonic-gate 	for (q = locale_set ; q && *q ; q++) {
635*0Sstevel@tonic-gate 	    if (g11n_langtag_matches_locale(*p, *q) == 1) {
636*0Sstevel@tonic-gate 		do_append = 1;
637*0Sstevel@tonic-gate 		for (r = result ; (r - result) <= MIN(n_locales, n_langtags) ; r++) {
638*0Sstevel@tonic-gate 		    if (!*r) break;
639*0Sstevel@tonic-gate 		    if (strcmp(*q, *r) == 0) {
640*0Sstevel@tonic-gate 			do_append = 0;
641*0Sstevel@tonic-gate 			break;
642*0Sstevel@tonic-gate 		    }
643*0Sstevel@tonic-gate 		}
644*0Sstevel@tonic-gate 		if (do_append && n_results < max_results) {
645*0Sstevel@tonic-gate 		    result[n_results++] = xstrdup(*q);
646*0Sstevel@tonic-gate 		    result[n_results] = NULL;
647*0Sstevel@tonic-gate 		}
648*0Sstevel@tonic-gate 		break;
649*0Sstevel@tonic-gate 	    }
650*0Sstevel@tonic-gate 	}
651*0Sstevel@tonic-gate     }
652*0Sstevel@tonic-gate     xfree_split_list(langtag_list);
653*0Sstevel@tonic-gate 
654*0Sstevel@tonic-gate     return result;
655*0Sstevel@tonic-gate }
656*0Sstevel@tonic-gate 
657*0Sstevel@tonic-gate char *
658*0Sstevel@tonic-gate g11n_srvr_locale_negotiate(char *clnt_langtags, char **srvr_locales)
659*0Sstevel@tonic-gate {
660*0Sstevel@tonic-gate     char **results, *result = NULL;
661*0Sstevel@tonic-gate 
662*0Sstevel@tonic-gate     if ((results = g11n_langtag_set_locale_set_intersect(clnt_langtags,
663*0Sstevel@tonic-gate 	    srvr_locales ?  srvr_locales : g11n_getlocales())) == NULL)
664*0Sstevel@tonic-gate 	return NULL;
665*0Sstevel@tonic-gate 
666*0Sstevel@tonic-gate     if (*results != NULL)
667*0Sstevel@tonic-gate 	    result = xstrdup(*results);
668*0Sstevel@tonic-gate 
669*0Sstevel@tonic-gate     xfree_split_list(results);
670*0Sstevel@tonic-gate 
671*0Sstevel@tonic-gate     return result;
672*0Sstevel@tonic-gate }
673*0Sstevel@tonic-gate 
674*0Sstevel@tonic-gate 
675*0Sstevel@tonic-gate /*
676*0Sstevel@tonic-gate  * Functions for validating ASCII and UTF-8 strings
677*0Sstevel@tonic-gate  *
678*0Sstevel@tonic-gate  * The error_str parameter is an optional pointer to a char variable
679*0Sstevel@tonic-gate  * where to store a string suitable for use with error() or fatal() or
680*0Sstevel@tonic-gate  * friends.
681*0Sstevel@tonic-gate  *
682*0Sstevel@tonic-gate  * The return value is 0 if success, EILSEQ or EINVAL.
683*0Sstevel@tonic-gate  *
684*0Sstevel@tonic-gate  */
685*0Sstevel@tonic-gate 
686*0Sstevel@tonic-gate u_int
687*0Sstevel@tonic-gate g11n_validate_ascii(const char *str, u_int len, u_char **error_str)
688*0Sstevel@tonic-gate {
689*0Sstevel@tonic-gate     u_char *p;
690*0Sstevel@tonic-gate 
691*0Sstevel@tonic-gate     for (p = (u_char *) str ; p && *p && (!(*p & 0x80)) ; p++) ;
692*0Sstevel@tonic-gate 
693*0Sstevel@tonic-gate     if (len && ((p - (u_char *) str) != len)) {
694*0Sstevel@tonic-gate 	return EILSEQ;
695*0Sstevel@tonic-gate     }
696*0Sstevel@tonic-gate     return 0;
697*0Sstevel@tonic-gate }
698*0Sstevel@tonic-gate 
699*0Sstevel@tonic-gate u_int
700*0Sstevel@tonic-gate g11n_validate_utf8(const u_char *str, u_int len, u_char **error_str)
701*0Sstevel@tonic-gate {
702*0Sstevel@tonic-gate     u_char *p;
703*0Sstevel@tonic-gate     u_int c, l;
704*0Sstevel@tonic-gate 
705*0Sstevel@tonic-gate     if (len == 0) len = strlen((const char *)str);
706*0Sstevel@tonic-gate 
707*0Sstevel@tonic-gate     for (p = (u_char *) str ; p && (p - str < len) && *p ; ) {
708*0Sstevel@tonic-gate 	/* 8-bit chars begin a UTF-8 sequence */
709*0Sstevel@tonic-gate 	if (*p & 0x80) {
710*0Sstevel@tonic-gate 	    /* Get sequence length and sanity check first byte */
711*0Sstevel@tonic-gate 	    if (*p < 0xc0)
712*0Sstevel@tonic-gate 		return EILSEQ;
713*0Sstevel@tonic-gate 	    else if (*p < 0xe0)
714*0Sstevel@tonic-gate 		l=2;
715*0Sstevel@tonic-gate 	    else if (*p < 0xf0)
716*0Sstevel@tonic-gate 		l=3;
717*0Sstevel@tonic-gate 	    else if (*p < 0xf8)
718*0Sstevel@tonic-gate 		l=4;
719*0Sstevel@tonic-gate 	    else if (*p < 0xfc)
720*0Sstevel@tonic-gate 		l=5;
721*0Sstevel@tonic-gate 	    else if (*p < 0xfe)
722*0Sstevel@tonic-gate 		l=6;
723*0Sstevel@tonic-gate 	    else
724*0Sstevel@tonic-gate 		return EILSEQ;
725*0Sstevel@tonic-gate 
726*0Sstevel@tonic-gate 	    if ((p + l - str) >= len)
727*0Sstevel@tonic-gate 		return EILSEQ;
728*0Sstevel@tonic-gate 
729*0Sstevel@tonic-gate 	    /* overlong detection - build codepoint */
730*0Sstevel@tonic-gate 	    c = *p & 0x3f;
731*0Sstevel@tonic-gate 	    c = c << (6 * (l-1)); /* shift c bits from first byte */
732*0Sstevel@tonic-gate 
733*0Sstevel@tonic-gate 	    if (l > 1) {
734*0Sstevel@tonic-gate 		if (*(p+1) && ((*(p+1) & 0xc0) == 0x80))
735*0Sstevel@tonic-gate 		    c = c | ((*(p+1) & 0x3f) << (6 * (l-2)));
736*0Sstevel@tonic-gate 		else
737*0Sstevel@tonic-gate 		    return EILSEQ;
738*0Sstevel@tonic-gate 		if (c < 0x80)
739*0Sstevel@tonic-gate 		    return EILSEQ;
740*0Sstevel@tonic-gate 	    }
741*0Sstevel@tonic-gate 	    if (l > 2) {
742*0Sstevel@tonic-gate 		if (*(p+2) && ((*(p+2) & 0xc0) == 0x80))
743*0Sstevel@tonic-gate 		    c = c | ((*(p+2) & 0x3f) << (6 * (l-3)));
744*0Sstevel@tonic-gate 		else
745*0Sstevel@tonic-gate 		    return EILSEQ;
746*0Sstevel@tonic-gate 		if (c < 0x800)
747*0Sstevel@tonic-gate 		    return EILSEQ;
748*0Sstevel@tonic-gate 	    }
749*0Sstevel@tonic-gate 	    if (l > 3) {
750*0Sstevel@tonic-gate 		if (*(p+3) && ((*(p+3) & 0xc0) == 0x80))
751*0Sstevel@tonic-gate 		    c = c | ((*(p+3) & 0x3f) << (6 * (l-4)));
752*0Sstevel@tonic-gate 		else
753*0Sstevel@tonic-gate 		    return EILSEQ;
754*0Sstevel@tonic-gate 		if (c < 0x10000)
755*0Sstevel@tonic-gate 		    return EILSEQ;
756*0Sstevel@tonic-gate 	    }
757*0Sstevel@tonic-gate 	    if (l > 4) {
758*0Sstevel@tonic-gate 		if (*(p+4) && ((*(p+4) & 0xc0) == 0x80))
759*0Sstevel@tonic-gate 		    c = c | ((*(p+4) & 0x3f) << (6 * (l-5)));
760*0Sstevel@tonic-gate 		else
761*0Sstevel@tonic-gate 		    return EILSEQ;
762*0Sstevel@tonic-gate 		if (c < 0x200000)
763*0Sstevel@tonic-gate 		    return EILSEQ;
764*0Sstevel@tonic-gate 	    }
765*0Sstevel@tonic-gate 	    if (l > 5) {
766*0Sstevel@tonic-gate 		if (*(p+5) && ((*(p+5) & 0xc0) == 0x80))
767*0Sstevel@tonic-gate 		    c = c | (*(p+5) & 0x3f) ;
768*0Sstevel@tonic-gate 		else
769*0Sstevel@tonic-gate 		    return EILSEQ;
770*0Sstevel@tonic-gate 		if (c < 0x4000000)
771*0Sstevel@tonic-gate 		    return EILSEQ;
772*0Sstevel@tonic-gate 	    }
773*0Sstevel@tonic-gate 
774*0Sstevel@tonic-gate 	    /* Check for UTF-16 surrogates ifs other illegal UTF-8 * points */
775*0Sstevel@tonic-gate 	    if (((c <= 0xdfff) && (c >= 0xd800)) ||
776*0Sstevel@tonic-gate 	        (c == 0xfffe) || (c == 0xffff))
777*0Sstevel@tonic-gate 		return EILSEQ;
778*0Sstevel@tonic-gate 	    p += l;
779*0Sstevel@tonic-gate 	}
780*0Sstevel@tonic-gate 	/* 7-bit chars are fine */
781*0Sstevel@tonic-gate 	else
782*0Sstevel@tonic-gate 	    p++;
783*0Sstevel@tonic-gate     }
784*0Sstevel@tonic-gate     return 0;
785*0Sstevel@tonic-gate }
786*0Sstevel@tonic-gate 
787*0Sstevel@tonic-gate /*
788*0Sstevel@tonic-gate  * Functions for converting to ASCII or UTF-8 from the local codeset
789*0Sstevel@tonic-gate  * Functions for converting from ASCII or UTF-8 to the local codeset
790*0Sstevel@tonic-gate  *
791*0Sstevel@tonic-gate  * The error_str parameter is an optional pointer to a char variable
792*0Sstevel@tonic-gate  * where to store a string suitable for use with error() or fatal() or
793*0Sstevel@tonic-gate  * friends.
794*0Sstevel@tonic-gate  *
795*0Sstevel@tonic-gate  * The err parameter is an optional pointer to an integer where 0
796*0Sstevel@tonic-gate  * (success) or EILSEQ or EINVAL will be stored (failure).
797*0Sstevel@tonic-gate  *
798*0Sstevel@tonic-gate  * These functions return NULL if the conversion fails.
799*0Sstevel@tonic-gate  *
800*0Sstevel@tonic-gate  */
801*0Sstevel@tonic-gate 
802*0Sstevel@tonic-gate u_char *
803*0Sstevel@tonic-gate g11n_convert_from_ascii(const char *str, int *err_ptr, u_char **error_str)
804*0Sstevel@tonic-gate {
805*0Sstevel@tonic-gate     static u_int initialized = 0;
806*0Sstevel@tonic-gate     static u_int do_convert = 0;
807*0Sstevel@tonic-gate     iconv_t cd;
808*0Sstevel@tonic-gate     int err;
809*0Sstevel@tonic-gate 
810*0Sstevel@tonic-gate     if (!initialized) {
811*0Sstevel@tonic-gate 	/*
812*0Sstevel@tonic-gate 	 * iconv_open() fails if the to/from codesets are the
813*0Sstevel@tonic-gate 	 * same, and there are aliases of codesets to boot...
814*0Sstevel@tonic-gate 	 */
815*0Sstevel@tonic-gate 	if (strcmp("646", nl_langinfo(CODESET)) == 0 ||
816*0Sstevel@tonic-gate 	    strcmp("ASCII",  nl_langinfo(CODESET)) == 0 ||
817*0Sstevel@tonic-gate 	    strcmp("US-ASCII",  nl_langinfo(CODESET)) == 0) {
818*0Sstevel@tonic-gate 	    initialized = 1;
819*0Sstevel@tonic-gate 	    do_convert = 0;
820*0Sstevel@tonic-gate 	}
821*0Sstevel@tonic-gate 	else {
822*0Sstevel@tonic-gate 	    cd = iconv_open(nl_langinfo(CODESET), "646");
823*0Sstevel@tonic-gate 	    if (cd == (iconv_t) -1) {
824*0Sstevel@tonic-gate 		if (err_ptr) *err_ptr = errno;
825*0Sstevel@tonic-gate 		if (error_str) *error_str = (u_char *)
826*0Sstevel@tonic-gate 		    "Cannot convert ASCII strings to the local codeset";
827*0Sstevel@tonic-gate 	    }
828*0Sstevel@tonic-gate 	    initialized = 1;
829*0Sstevel@tonic-gate 	    do_convert = 1;
830*0Sstevel@tonic-gate 	}
831*0Sstevel@tonic-gate     }
832*0Sstevel@tonic-gate 
833*0Sstevel@tonic-gate     if (!do_convert) {
834*0Sstevel@tonic-gate 	if ((err = g11n_validate_ascii(str, 0, error_str))) {
835*0Sstevel@tonic-gate 	    if (err_ptr) *err_ptr = err;
836*0Sstevel@tonic-gate 	    return NULL;
837*0Sstevel@tonic-gate 	}
838*0Sstevel@tonic-gate 	else
839*0Sstevel@tonic-gate 	    return (u_char *) xstrdup(str);
840*0Sstevel@tonic-gate     }
841*0Sstevel@tonic-gate     return do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str);
842*0Sstevel@tonic-gate }
843*0Sstevel@tonic-gate 
844*0Sstevel@tonic-gate u_char *
845*0Sstevel@tonic-gate g11n_convert_from_utf8(const u_char *str, int *err_ptr, u_char **error_str)
846*0Sstevel@tonic-gate {
847*0Sstevel@tonic-gate     static u_int initialized = 0;
848*0Sstevel@tonic-gate     static u_int do_convert = 0;
849*0Sstevel@tonic-gate     iconv_t cd;
850*0Sstevel@tonic-gate     int err;
851*0Sstevel@tonic-gate 
852*0Sstevel@tonic-gate     if (!initialized) {
853*0Sstevel@tonic-gate 	/*
854*0Sstevel@tonic-gate 	 * iconv_open() fails if the to/from codesets are the
855*0Sstevel@tonic-gate 	 * same, and there are aliases of codesets to boot...
856*0Sstevel@tonic-gate 	 */
857*0Sstevel@tonic-gate 	if (strcmp("UTF-8", nl_langinfo(CODESET)) == 0 ||
858*0Sstevel@tonic-gate 	    strcmp("UTF8",  nl_langinfo(CODESET)) == 0) {
859*0Sstevel@tonic-gate 	    initialized = 1;
860*0Sstevel@tonic-gate 	    do_convert = 0;
861*0Sstevel@tonic-gate 	}
862*0Sstevel@tonic-gate 	else {
863*0Sstevel@tonic-gate 	    cd = iconv_open(nl_langinfo(CODESET), "UTF-8");
864*0Sstevel@tonic-gate 	    if (cd == (iconv_t) -1) {
865*0Sstevel@tonic-gate 		if (err_ptr) *err_ptr = errno;
866*0Sstevel@tonic-gate 		if (error_str) *error_str = (u_char *)
867*0Sstevel@tonic-gate 		    "Cannot convert UTF-8 strings to the local codeset";
868*0Sstevel@tonic-gate 	    }
869*0Sstevel@tonic-gate 	    initialized = 1;
870*0Sstevel@tonic-gate 	    do_convert = 1;
871*0Sstevel@tonic-gate 	}
872*0Sstevel@tonic-gate     }
873*0Sstevel@tonic-gate 
874*0Sstevel@tonic-gate     if (!do_convert) {
875*0Sstevel@tonic-gate 	if ((err = g11n_validate_utf8(str, 0, error_str))) {
876*0Sstevel@tonic-gate 	    if (err_ptr) *err_ptr = err;
877*0Sstevel@tonic-gate 	    return NULL;
878*0Sstevel@tonic-gate 	}
879*0Sstevel@tonic-gate 	else
880*0Sstevel@tonic-gate 	    return (u_char *) xstrdup((char *) str);
881*0Sstevel@tonic-gate     }
882*0Sstevel@tonic-gate     return do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str);
883*0Sstevel@tonic-gate }
884*0Sstevel@tonic-gate 
885*0Sstevel@tonic-gate char *
886*0Sstevel@tonic-gate g11n_convert_to_ascii(const u_char *str, int *err_ptr, u_char **error_str)
887*0Sstevel@tonic-gate {
888*0Sstevel@tonic-gate     static u_int initialized = 0;
889*0Sstevel@tonic-gate     static u_int do_convert = 0;
890*0Sstevel@tonic-gate     iconv_t cd;
891*0Sstevel@tonic-gate 
892*0Sstevel@tonic-gate     if (!initialized) {
893*0Sstevel@tonic-gate 	/*
894*0Sstevel@tonic-gate 	 * iconv_open() fails if the to/from codesets are the
895*0Sstevel@tonic-gate 	 * same, and there are aliases of codesets to boot...
896*0Sstevel@tonic-gate 	 */
897*0Sstevel@tonic-gate 	if (strcmp("646", nl_langinfo(CODESET)) == 0 ||
898*0Sstevel@tonic-gate 	    strcmp("ASCII",  nl_langinfo(CODESET)) == 0 ||
899*0Sstevel@tonic-gate 	    strcmp("US-ASCII",  nl_langinfo(CODESET)) == 0) {
900*0Sstevel@tonic-gate 	    initialized = 1;
901*0Sstevel@tonic-gate 	    do_convert = 0;
902*0Sstevel@tonic-gate 	}
903*0Sstevel@tonic-gate 	else {
904*0Sstevel@tonic-gate 	    cd = iconv_open("646", nl_langinfo(CODESET));
905*0Sstevel@tonic-gate 	    if (cd == (iconv_t) -1) {
906*0Sstevel@tonic-gate 		if (err_ptr) *err_ptr = errno;
907*0Sstevel@tonic-gate 		if (error_str) *error_str = (u_char *)
908*0Sstevel@tonic-gate 		    "Cannot convert UTF-8 strings to the local codeset";
909*0Sstevel@tonic-gate 	    }
910*0Sstevel@tonic-gate 	    initialized = 1;
911*0Sstevel@tonic-gate 	    do_convert = 1;
912*0Sstevel@tonic-gate 	}
913*0Sstevel@tonic-gate     }
914*0Sstevel@tonic-gate 
915*0Sstevel@tonic-gate     if (!do_convert)
916*0Sstevel@tonic-gate 	return xstrdup((char *) str);
917*0Sstevel@tonic-gate     return (char *) do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str);
918*0Sstevel@tonic-gate }
919*0Sstevel@tonic-gate 
920*0Sstevel@tonic-gate u_char *
921*0Sstevel@tonic-gate g11n_convert_to_utf8(const u_char *str, int *err_ptr, u_char **error_str)
922*0Sstevel@tonic-gate {
923*0Sstevel@tonic-gate     static u_int initialized = 0;
924*0Sstevel@tonic-gate     static u_int do_convert = 0;
925*0Sstevel@tonic-gate     iconv_t cd;
926*0Sstevel@tonic-gate 
927*0Sstevel@tonic-gate     if (!initialized) {
928*0Sstevel@tonic-gate 	/*
929*0Sstevel@tonic-gate 	 * iconv_open() fails if the to/from codesets are the
930*0Sstevel@tonic-gate 	 * same, and there are aliases of codesets to boot...
931*0Sstevel@tonic-gate 	 */
932*0Sstevel@tonic-gate 	if (strcmp("UTF-8", nl_langinfo(CODESET)) == 0 ||
933*0Sstevel@tonic-gate 	    strcmp("UTF8",  nl_langinfo(CODESET)) == 0) {
934*0Sstevel@tonic-gate 	    initialized = 1;
935*0Sstevel@tonic-gate 	    do_convert = 0;
936*0Sstevel@tonic-gate 	}
937*0Sstevel@tonic-gate 	else {
938*0Sstevel@tonic-gate 	    cd = iconv_open("UTF-8", nl_langinfo(CODESET));
939*0Sstevel@tonic-gate 	    if (cd == (iconv_t) -1) {
940*0Sstevel@tonic-gate 		if (err_ptr) *err_ptr = errno;
941*0Sstevel@tonic-gate 		if (error_str) *error_str = (u_char *)
942*0Sstevel@tonic-gate 		    "Cannot convert UTF-8 strings to the local codeset";
943*0Sstevel@tonic-gate 	    }
944*0Sstevel@tonic-gate 	    initialized = 1;
945*0Sstevel@tonic-gate 	    do_convert = 1;
946*0Sstevel@tonic-gate 	}
947*0Sstevel@tonic-gate     }
948*0Sstevel@tonic-gate 
949*0Sstevel@tonic-gate     if (!do_convert)
950*0Sstevel@tonic-gate 	return (u_char *) xstrdup((char *) str);
951*0Sstevel@tonic-gate     return do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str);
952*0Sstevel@tonic-gate }
953*0Sstevel@tonic-gate 
954*0Sstevel@tonic-gate 
955*0Sstevel@tonic-gate /*
956*0Sstevel@tonic-gate  * Wrapper around iconv()
957*0Sstevel@tonic-gate  *
958*0Sstevel@tonic-gate  * The caller is responsible for freeing the result and for handling
959*0Sstevel@tonic-gate  * (errno && errno != E2BIG) (i.e., EILSEQ, EINVAL, EBADF).
960*0Sstevel@tonic-gate  */
961*0Sstevel@tonic-gate 
962*0Sstevel@tonic-gate static
963*0Sstevel@tonic-gate u_char *
964*0Sstevel@tonic-gate do_iconv(iconv_t cd, u_int *mul_ptr,
965*0Sstevel@tonic-gate 	 const void *buf, u_int len,
966*0Sstevel@tonic-gate 	 u_int *outlen, int *err,
967*0Sstevel@tonic-gate 	 u_char **err_str)
968*0Sstevel@tonic-gate {
969*0Sstevel@tonic-gate     size_t inbytesleft, outbytesleft, converted_size;
970*0Sstevel@tonic-gate     char *outbuf;
971*0Sstevel@tonic-gate     u_char *converted;
972*0Sstevel@tonic-gate     const char *inbuf;
973*0Sstevel@tonic-gate     u_int mul = 0;
974*0Sstevel@tonic-gate 
975*0Sstevel@tonic-gate     if (!buf || !(*(char *)buf)) return NULL;
976*0Sstevel@tonic-gate     if (len == 0) len = strlen(buf);
977*0Sstevel@tonic-gate     /* reset conversion descriptor */
978*0Sstevel@tonic-gate     /* XXX Do we need initial shift sequences for UTF-8??? */
979*0Sstevel@tonic-gate     (void) iconv(cd, NULL, &inbytesleft, &outbuf, &outbytesleft);
980*0Sstevel@tonic-gate     inbuf = (const char *) buf;
981*0Sstevel@tonic-gate     if (mul_ptr) mul = *mul_ptr;
982*0Sstevel@tonic-gate     converted_size = (len << mul);
983*0Sstevel@tonic-gate     outbuf = (char *) xmalloc(converted_size + 1); /* for null */
984*0Sstevel@tonic-gate     converted = (u_char *) outbuf;
985*0Sstevel@tonic-gate     outbytesleft = len;
986*0Sstevel@tonic-gate     do {
987*0Sstevel@tonic-gate 	if (iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) ==
988*0Sstevel@tonic-gate 		(size_t) -1) {
989*0Sstevel@tonic-gate 	    if (errno == E2BIG) {
990*0Sstevel@tonic-gate 		/* UTF-8 codepoints are at most 8 bytes long. */
991*0Sstevel@tonic-gate 		if (mul > 2) {
992*0Sstevel@tonic-gate 		    if (err_str)
993*0Sstevel@tonic-gate 			*err_str = (u_char *) "Conversion to UTF-8 failed due to"
994*0Sstevel@tonic-gate 				  "preposterous space requirements";
995*0Sstevel@tonic-gate 		    if (err)
996*0Sstevel@tonic-gate 			*err = EILSEQ;
997*0Sstevel@tonic-gate 		    return NULL;
998*0Sstevel@tonic-gate 		}
999*0Sstevel@tonic-gate 
1000*0Sstevel@tonic-gate 		/*
1001*0Sstevel@tonic-gate 		 * Re-alloc output and ensure that the outbuf
1002*0Sstevel@tonic-gate 		 * and outbytesleft values are adjusted.
1003*0Sstevel@tonic-gate 		 */
1004*0Sstevel@tonic-gate 		converted = xrealloc(converted, converted_size << 1 + 1);
1005*0Sstevel@tonic-gate 		outbuf = (char *) converted + converted_size - outbytesleft;
1006*0Sstevel@tonic-gate 		converted_size = (len << ++(mul));
1007*0Sstevel@tonic-gate 		outbytesleft = converted_size - outbytesleft;
1008*0Sstevel@tonic-gate 	    }
1009*0Sstevel@tonic-gate 	    else {
1010*0Sstevel@tonic-gate 		/*
1011*0Sstevel@tonic-gate 		 * Let the caller deal with iconv() errors, probably by
1012*0Sstevel@tonic-gate 		 * calling fatal(); xfree() does not set errno.
1013*0Sstevel@tonic-gate 		 */
1014*0Sstevel@tonic-gate 		if (err) *err = errno;
1015*0Sstevel@tonic-gate 		xfree(converted);
1016*0Sstevel@tonic-gate 		return NULL;
1017*0Sstevel@tonic-gate 	    }
1018*0Sstevel@tonic-gate 	}
1019*0Sstevel@tonic-gate     } while (inbytesleft);
1020*0Sstevel@tonic-gate     *outbuf = '\0'; /* ensure null-termination */
1021*0Sstevel@tonic-gate     if (outlen) *outlen = converted_size - outbytesleft;
1022*0Sstevel@tonic-gate     if (mul_ptr) *mul_ptr = mul;
1023*0Sstevel@tonic-gate     return converted;
1024*0Sstevel@tonic-gate }
1025