xref: /netbsd-src/external/bsd/openldap/dist/libraries/libldap/t61.c (revision 549b59ed3ccf0d36d3097190a0db27b770f3a839)
1*549b59edSchristos /*	$NetBSD: t61.c,v 1.3 2021/08/14 16:14:56 christos Exp $	*/
24e6df137Slukem 
3d11b170bStron /* $OpenLDAP$ */
42de962bdSlukem /* This work is part of OpenLDAP Software <http://www.openldap.org/>.
52de962bdSlukem  *
6*549b59edSchristos  * Copyright 2002-2021 The OpenLDAP Foundation.
72de962bdSlukem  * All rights reserved.
82de962bdSlukem  *
92de962bdSlukem  * Redistribution and use in source and binary forms, with or without
102de962bdSlukem  * modification, are permitted only as authorized by the OpenLDAP
112de962bdSlukem  * Public License.
122de962bdSlukem  *
132de962bdSlukem  * A copy of this license is available in the file LICENSE in the
142de962bdSlukem  * top-level directory of the distribution or, alternatively, at
152de962bdSlukem  * <http://www.OpenLDAP.org/license.html>.
162de962bdSlukem  */
172de962bdSlukem /* ACKNOWLEDGEMENTS:
182de962bdSlukem  * This work was initially developed by Howard Chu for inclusion in
192de962bdSlukem  * OpenLDAP Software.
202de962bdSlukem  */
212de962bdSlukem 
222de962bdSlukem /*
232de962bdSlukem  * Basic T.61 <-> UTF-8 conversion
242de962bdSlukem  *
252de962bdSlukem  * These routines will perform a lossless translation from T.61 to UTF-8
262de962bdSlukem  * and a lossy translation from UTF-8 to T.61.
272de962bdSlukem  */
282de962bdSlukem 
29376af7d7Schristos #include <sys/cdefs.h>
30*549b59edSchristos __RCSID("$NetBSD: t61.c,v 1.3 2021/08/14 16:14:56 christos Exp $");
31376af7d7Schristos 
322de962bdSlukem #include "portable.h"
332de962bdSlukem 
342de962bdSlukem #include <stdio.h>
352de962bdSlukem 
362de962bdSlukem #include <ac/stdlib.h>
372de962bdSlukem 
382de962bdSlukem #include <ac/socket.h>
392de962bdSlukem #include <ac/string.h>
402de962bdSlukem #include <ac/time.h>
412de962bdSlukem 
422de962bdSlukem #include "ldap-int.h"
432de962bdSlukem #include "ldap_utf8.h"
442de962bdSlukem 
452de962bdSlukem #include "ldap_defaults.h"
462de962bdSlukem 
472de962bdSlukem /*
482de962bdSlukem  * T.61 is somewhat braindead; even in the 7-bit space it is not
492de962bdSlukem  * completely equivalent to 7-bit US-ASCII. Our definition of the
502de962bdSlukem  * character set comes from RFC 1345 with a slightly more readable
512de962bdSlukem  * rendition at http://std.dkuug.dk/i18n/charmaps/T.61-8BIT.
522de962bdSlukem  *
532de962bdSlukem  * Even though '#' and '$' are present in the 7-bit US-ASCII space,
542de962bdSlukem  * (x23 and x24, resp.) in T.61 they are mapped to 8-bit characters
552de962bdSlukem  * xA6 and xA4.
562de962bdSlukem  *
572de962bdSlukem  * Also T.61 lacks
582de962bdSlukem  *	backslash 	\	(x5C)
592de962bdSlukem  *	caret		^	(x5E)
602de962bdSlukem  *	backquote	`	(x60)
612de962bdSlukem  *	left brace	{	(x7B)
622de962bdSlukem  *	right brace	}	(x7D)
632de962bdSlukem  *	tilde		~	(x7E)
642de962bdSlukem  *
652de962bdSlukem  * In T.61, the codes xC1 to xCF (excluding xC9, unused) are non-spacing
662de962bdSlukem  * accents of some form or another. There are predefined combinations
672de962bdSlukem  * for certain characters, but they can also be used arbitrarily. The
682de962bdSlukem  * table at dkuug.dk maps these accents to the E000 "private use" range
692de962bdSlukem  * of the Unicode space, but I believe they more properly belong in the
702de962bdSlukem  * 0300 range (non-spacing accents). The transformation is complicated
712de962bdSlukem  * slightly because Unicode wants the non-spacing character to follow
722de962bdSlukem  * the base character, while T.61 has the non-spacing character leading.
732de962bdSlukem  * Also, T.61 specifically recognizes certain combined pairs as "characters"
742de962bdSlukem  * but doesn't specify how to treat unrecognized pairs. This code will
752de962bdSlukem  * always attempt to combine pairs when a known Unicode composite exists.
762de962bdSlukem  */
772de962bdSlukem 
782de962bdSlukem static const wchar_t t61_tab[] = {
792de962bdSlukem 	0x000, 0x001, 0x002, 0x003, 0x004, 0x005, 0x006, 0x007,
802de962bdSlukem 	0x008, 0x009, 0x00a, 0x00b, 0x00c, 0x00d, 0x00e, 0x00f,
812de962bdSlukem 	0x010, 0x011, 0x012, 0x013, 0x014, 0x015, 0x016, 0x017,
822de962bdSlukem 	0x018, 0x019, 0x01a, 0x01b, 0x01c, 0x01d, 0x01e, 0x01f,
832de962bdSlukem 	0x020, 0x021, 0x022, 0x000, 0x000, 0x025, 0x026, 0x027,
842de962bdSlukem 	0x028, 0x029, 0x02a, 0x02b, 0x02c, 0x02d, 0x02e, 0x02f,
852de962bdSlukem 	0x030, 0x031, 0x032, 0x033, 0x034, 0x035, 0x036, 0x037,
862de962bdSlukem 	0x038, 0x039, 0x03a, 0x03b, 0x03c, 0x03d, 0x03e, 0x03f,
872de962bdSlukem 	0x040, 0x041, 0x042, 0x043, 0x044, 0x045, 0x046, 0x047,
882de962bdSlukem 	0x048, 0x049, 0x04a, 0x04b, 0x04c, 0x04d, 0x04e, 0x04f,
892de962bdSlukem 	0x050, 0x051, 0x052, 0x053, 0x054, 0x055, 0x056, 0x057,
902de962bdSlukem 	0x058, 0x059, 0x05a, 0x05b, 0x000, 0x05d, 0x000, 0x05f,
912de962bdSlukem 	0x000, 0x061, 0x062, 0x063, 0x064, 0x065, 0x066, 0x067,
922de962bdSlukem 	0x068, 0x069, 0x06a, 0x06b, 0x06c, 0x06d, 0x06e, 0x06f,
932de962bdSlukem 	0x070, 0x071, 0x072, 0x073, 0x074, 0x075, 0x076, 0x077,
942de962bdSlukem 	0x078, 0x079, 0x07a, 0x000, 0x07c, 0x000, 0x000, 0x07f,
952de962bdSlukem 	0x080, 0x081, 0x082, 0x083, 0x084, 0x085, 0x086, 0x087,
962de962bdSlukem 	0x088, 0x089, 0x08a, 0x08b, 0x08c, 0x08d, 0x08e, 0x08f,
972de962bdSlukem 	0x090, 0x091, 0x092, 0x093, 0x094, 0x095, 0x096, 0x097,
982de962bdSlukem 	0x098, 0x099, 0x09a, 0x09b, 0x09c, 0x09d, 0x09e, 0x09f,
992de962bdSlukem 	0x0a0, 0x0a1, 0x0a2, 0x0a3, 0x024, 0x0a5, 0x023, 0x0a7,
1002de962bdSlukem 	0x0a4, 0x000, 0x000, 0x0ab, 0x000, 0x000, 0x000, 0x000,
1012de962bdSlukem 	0x0b0, 0x0b1, 0x0b2, 0x0b3, 0x0d7, 0x0b5, 0x0b6, 0x0b7,
1022de962bdSlukem 	0x0f7, 0x000, 0x000, 0x0bb, 0x0bc, 0x0bd, 0x0be, 0x0bf,
1032de962bdSlukem 	0x000, 0x300, 0x301, 0x302, 0x303, 0x304, 0x306, 0x307,
1042de962bdSlukem 	0x308, 0x000, 0x30a, 0x327, 0x332, 0x30b, 0x328, 0x30c,
1052de962bdSlukem 	0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
1062de962bdSlukem 	0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
1072de962bdSlukem 	0x2126, 0xc6, 0x0d0, 0x0aa, 0x126, 0x000, 0x132, 0x13f,
1082de962bdSlukem 	0x141, 0x0d8, 0x152, 0x0ba, 0x0de, 0x166, 0x14a, 0x149,
1092de962bdSlukem 	0x138, 0x0e6, 0x111, 0x0f0, 0x127, 0x131, 0x133, 0x140,
1102de962bdSlukem 	0x142, 0x0f8, 0x153, 0x0df, 0x0fe, 0x167, 0x14b, 0x000
1112de962bdSlukem };
1122de962bdSlukem 
1132de962bdSlukem typedef wchar_t wvec16[16];
1142de962bdSlukem typedef wchar_t wvec32[32];
1152de962bdSlukem typedef wchar_t wvec64[64];
1162de962bdSlukem 
1172de962bdSlukem /* Substitutions when 0xc1-0xcf appears by itself or with space 0x20 */
1182de962bdSlukem static const wvec16 accents = {
1192de962bdSlukem 	0x000, 0x060, 0x0b4, 0x05e, 0x07e, 0x0af, 0x2d8, 0x2d9,
1202de962bdSlukem 	0x0a8, 0x000, 0x2da, 0x0b8, 0x000, 0x2dd, 0x2db, 0x2c7};
1212de962bdSlukem 
1222de962bdSlukem /* In the following tables, base characters commented in (parentheses)
1232de962bdSlukem  * are not defined by T.61 but are mapped anyway since their Unicode
1242de962bdSlukem  * composite exists.
1252de962bdSlukem  */
1262de962bdSlukem 
1272de962bdSlukem /* Grave accented chars AEIOU (NWY) */
1282de962bdSlukem static const wvec32 c1_vec1 = {
1292de962bdSlukem 	/* Upper case */
1302de962bdSlukem 	0, 0xc0, 0, 0, 0, 0xc8, 0, 0, 0, 0xcc, 0, 0, 0, 0, 0x1f8, 0xd2,
1312de962bdSlukem 	0, 0, 0, 0, 0, 0xd9, 0, 0x1e80, 0, 0x1ef2, 0, 0, 0, 0, 0, 0};
1322de962bdSlukem static const wvec32 c1_vec2 = {
1332de962bdSlukem 	/* Lower case */
1342de962bdSlukem 	0, 0xe0, 0, 0, 0, 0xe8, 0, 0, 0, 0xec, 0, 0, 0, 0, 0x1f9, 0xf2,
1352de962bdSlukem 	0, 0, 0, 0, 0, 0xf9, 0, 0x1e81, 0, 0x1ef3, 0, 0, 0, 0, 0, 0};
1362de962bdSlukem 
1372de962bdSlukem static const wvec32 *c1_grave[] = {
1382de962bdSlukem 	NULL, NULL, &c1_vec1, &c1_vec2, NULL, NULL, NULL, NULL
1392de962bdSlukem };
1402de962bdSlukem 
1412de962bdSlukem /* Acute accented chars AEIOUYCLNRSZ (GKMPW) */
1422de962bdSlukem static const wvec32 c2_vec1 = {
1432de962bdSlukem 	/* Upper case */
1442de962bdSlukem 	0, 0xc1, 0, 0x106, 0, 0xc9, 0, 0x1f4,
1452de962bdSlukem 	0, 0xcd, 0, 0x1e30, 0x139, 0x1e3e, 0x143, 0xd3,
1462de962bdSlukem 	0x1e54, 0, 0x154, 0x15a, 0, 0xda, 0, 0x1e82,
1472de962bdSlukem 	0, 0xdd, 0x179, 0, 0, 0, 0, 0};
1482de962bdSlukem static const wvec32 c2_vec2 = {
1492de962bdSlukem 	/* Lower case */
1502de962bdSlukem 	0, 0xe1, 0, 0x107, 0, 0xe9, 0, 0x1f5,
1512de962bdSlukem 	0, 0xed, 0, 0x1e31, 0x13a, 0x1e3f, 0x144, 0xf3,
1522de962bdSlukem 	0x1e55, 0, 0x155, 0x15b, 0, 0xfa, 0, 0x1e83,
1532de962bdSlukem 	0, 0xfd, 0x17a, 0, 0, 0, 0, 0};
1542de962bdSlukem static const wvec32 c2_vec3 = {
1552de962bdSlukem 	/* (AE and ae) */
1562de962bdSlukem 	0, 0x1fc, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1572de962bdSlukem 	0, 0x1fd, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
1582de962bdSlukem 
1592de962bdSlukem static const wvec32 *c2_acute[] = {
1602de962bdSlukem 	NULL, NULL, &c2_vec1, &c2_vec2, NULL, NULL, NULL, &c2_vec3
1612de962bdSlukem };
1622de962bdSlukem 
1632de962bdSlukem /* Circumflex AEIOUYCGHJSW (Z) */
1642de962bdSlukem static const wvec32 c3_vec1 = {
1652de962bdSlukem 	/* Upper case */
1662de962bdSlukem 	0, 0xc2, 0, 0x108, 0, 0xca, 0, 0x11c,
1672de962bdSlukem 	0x124, 0xce, 0x134, 0, 0, 0, 0, 0xd4,
1682de962bdSlukem 	0, 0, 0, 0x15c, 0, 0xdb, 0, 0x174,
1692de962bdSlukem 	0, 0x176, 0x1e90, 0, 0, 0, 0, 0};
1702de962bdSlukem static const wvec32 c3_vec2 = {
1712de962bdSlukem 	/* Lower case */
1722de962bdSlukem 	0, 0xe2, 0, 0x109, 0, 0xea, 0, 0x11d,
1732de962bdSlukem 	0x125, 0xee, 0x135, 0, 0, 0, 0, 0xf4,
1742de962bdSlukem 	0, 0, 0, 0x15d, 0, 0xfb, 0, 0x175,
1752de962bdSlukem 	0, 0x177, 0x1e91, 0, 0, 0, 0, 0};
1762de962bdSlukem static const wvec32 *c3_circumflex[] = {
1772de962bdSlukem 	NULL, NULL, &c3_vec1, &c3_vec2, NULL, NULL, NULL, NULL
1782de962bdSlukem };
1792de962bdSlukem 
1802de962bdSlukem /* Tilde AIOUN (EVY) */
1812de962bdSlukem static const wvec32 c4_vec1 = {
1822de962bdSlukem 	/* Upper case */
1832de962bdSlukem 	0, 0xc3, 0, 0, 0, 0x1ebc, 0, 0, 0, 0x128, 0, 0, 0, 0, 0xd1, 0xd5,
1842de962bdSlukem 	0, 0, 0, 0, 0, 0x168, 0x1e7c, 0, 0, 0x1ef8, 0, 0, 0, 0, 0, 0};
1852de962bdSlukem static const wvec32 c4_vec2 = {
1862de962bdSlukem 	/* Lower case */
1872de962bdSlukem 	0, 0xe3, 0, 0, 0, 0x1ebd, 0, 0, 0, 0x129, 0, 0, 0, 0, 0xf1, 0xf5,
1882de962bdSlukem 	0, 0, 0, 0, 0, 0x169, 0x1e7d, 0, 0, 0x1ef9, 0, 0, 0, 0, 0, 0};
1892de962bdSlukem static const wvec32 *c4_tilde[] = {
1902de962bdSlukem 	NULL, NULL, &c4_vec1, &c4_vec2, NULL, NULL, NULL, NULL
1912de962bdSlukem };
1922de962bdSlukem 
1932de962bdSlukem /* Macron AEIOU (YG) */
1942de962bdSlukem static const wvec32 c5_vec1 = {
1952de962bdSlukem 	/* Upper case */
1962de962bdSlukem 	0, 0x100, 0, 0, 0, 0x112, 0, 0x1e20, 0, 0x12a, 0, 0, 0, 0, 0, 0x14c,
1972de962bdSlukem 	0, 0, 0, 0, 0, 0x16a, 0, 0, 0, 0x232, 0, 0, 0, 0, 0, 0};
1982de962bdSlukem static const wvec32 c5_vec2 = {
1992de962bdSlukem 	/* Lower case */
2002de962bdSlukem 	0, 0x101, 0, 0, 0, 0x113, 0, 0x1e21, 0, 0x12b, 0, 0, 0, 0, 0, 0x14d,
2012de962bdSlukem 	0, 0, 0, 0, 0, 0x16b, 0, 0, 0, 0x233, 0, 0, 0, 0, 0, 0};
2022de962bdSlukem static const wvec32 c5_vec3 = {
2032de962bdSlukem 	/* (AE and ae) */
2042de962bdSlukem 	0, 0x1e2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2052de962bdSlukem 	0, 0x1e3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2062de962bdSlukem static const wvec32 *c5_macron[] = {
2072de962bdSlukem 	NULL, NULL, &c5_vec1, &c5_vec2, NULL, NULL, NULL, &c5_vec3
2082de962bdSlukem };
2092de962bdSlukem 
2102de962bdSlukem /* Breve AUG (EIO) */
2112de962bdSlukem static const wvec32 c6_vec1 = {
2122de962bdSlukem 	/* Upper case */
2132de962bdSlukem 	0, 0x102, 0, 0, 0, 0x114, 0, 0x11e, 0, 0x12c, 0, 0, 0, 0, 0, 0x14e,
2142de962bdSlukem 	0, 0, 0, 0, 0, 0x16c, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2152de962bdSlukem static const wvec32 c6_vec2 = {
2162de962bdSlukem 	/* Lower case */
2172de962bdSlukem 	0, 0x103, 0, 0, 0, 0x115, 0, 0x11f, 0, 0x12d, 0, 0, 0, 0, 0, 0x14f,
2182de962bdSlukem 	0, 0, 0, 0, 0, 0x16d, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2192de962bdSlukem static const wvec32 *c6_breve[] = {
2202de962bdSlukem 	NULL, NULL, &c6_vec1, &c6_vec2, NULL, NULL, NULL, NULL
2212de962bdSlukem };
2222de962bdSlukem 
2232de962bdSlukem /* Dot Above CEGIZ (AOBDFHMNPRSTWXY) */
2242de962bdSlukem static const wvec32 c7_vec1 = {
2252de962bdSlukem 	/* Upper case */
2262de962bdSlukem 	0, 0x226, 0x1e02, 0x10a, 0x1e0a, 0x116, 0x1e1e, 0x120,
2272de962bdSlukem 	0x1e22, 0x130, 0, 0, 0, 0x1e40, 0x1e44, 0x22e,
2282de962bdSlukem 	0x1e56, 0, 0x1e58, 0x1e60, 0x1e6a, 0, 0, 0x1e86,
2292de962bdSlukem 	0x1e8a, 0x1e8e, 0x17b, 0, 0, 0, 0, 0};
2302de962bdSlukem static const wvec32 c7_vec2 = {
2312de962bdSlukem 	/* Lower case */
2322de962bdSlukem 	0, 0x227, 0x1e03, 0x10b, 0x1e0b, 0x117, 0x1e1f, 0x121,
2332de962bdSlukem 	0x1e23, 0, 0, 0, 0, 0x1e41, 0x1e45, 0x22f,
2342de962bdSlukem 	0x1e57, 0, 0x1e59, 0x1e61, 0x1e6b, 0, 0, 0x1e87,
2352de962bdSlukem 	0x1e8b, 0x1e8f, 0x17c, 0, 0, 0, 0, 0};
2362de962bdSlukem static const wvec32 *c7_dotabove[] = {
2372de962bdSlukem 	NULL, NULL, &c7_vec1, &c7_vec2, NULL, NULL, NULL, NULL
2382de962bdSlukem };
2392de962bdSlukem 
2402de962bdSlukem /* Diaeresis AEIOUY (HWXt) */
2412de962bdSlukem static const wvec32 c8_vec1 = {
2422de962bdSlukem 	/* Upper case */
2432de962bdSlukem 	0, 0xc4, 0, 0, 0, 0xcb, 0, 0, 0x1e26, 0xcf, 0, 0, 0, 0, 0, 0xd6,
2442de962bdSlukem 	0, 0, 0, 0, 0, 0xdc, 0, 0x1e84, 0x1e8c, 0x178, 0, 0, 0, 0, 0, 0};
2452de962bdSlukem static const wvec32 c8_vec2 = {
2462de962bdSlukem 	/* Lower case */
2472de962bdSlukem 	0, 0xe4, 0, 0, 0, 0xeb, 0, 0, 0x1e27, 0xef, 0, 0, 0, 0, 0, 0xf6,
2482de962bdSlukem 	0, 0, 0, 0, 0x1e97, 0xfc, 0, 0x1e85, 0x1e8d, 0xff, 0, 0, 0, 0, 0, 0};
2492de962bdSlukem static const wvec32 *c8_diaeresis[] = {
2502de962bdSlukem 	NULL, NULL, &c8_vec1, &c8_vec2, NULL, NULL, NULL, NULL
2512de962bdSlukem };
2522de962bdSlukem 
2532de962bdSlukem /* Ring Above AU (wy) */
2542de962bdSlukem static const wvec32 ca_vec1 = {
2552de962bdSlukem 	/* Upper case */
2562de962bdSlukem 	0, 0xc5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2572de962bdSlukem 	0, 0, 0, 0, 0, 0x16e, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2582de962bdSlukem static const wvec32 ca_vec2 = {
2592de962bdSlukem 	/* Lower case */
2602de962bdSlukem 	0, 0xe5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2612de962bdSlukem 	0, 0, 0, 0, 0, 0x16f, 0, 0x1e98, 0, 0x1e99, 0, 0, 0, 0, 0, 0};
2622de962bdSlukem static const wvec32 *ca_ringabove[] = {
2632de962bdSlukem 	NULL, NULL, &ca_vec1, &ca_vec2, NULL, NULL, NULL, NULL
2642de962bdSlukem };
2652de962bdSlukem 
2662de962bdSlukem /* Cedilla CGKLNRST (EDH) */
2672de962bdSlukem static const wvec32 cb_vec1 = {
2682de962bdSlukem 	/* Upper case */
2692de962bdSlukem 	0, 0, 0, 0xc7, 0x1e10, 0x228, 0, 0x122,
2702de962bdSlukem 	0x1e28, 0, 0, 0x136, 0x13b, 0, 0x145, 0,
2712de962bdSlukem 	0, 0, 0x156, 0x15e, 0x162, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2722de962bdSlukem static const wvec32 cb_vec2 = {
2732de962bdSlukem 	/* Lower case */
2742de962bdSlukem 	0, 0, 0, 0xe7, 0x1e11, 0x229, 0, 0x123,
2752de962bdSlukem 	0x1e29, 0, 0, 0x137, 0x13c, 0, 0x146, 0,
2762de962bdSlukem 	0, 0, 0x157, 0x15f, 0x163, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2772de962bdSlukem static const wvec32 *cb_cedilla[] = {
2782de962bdSlukem 	NULL, NULL, &cb_vec1, &cb_vec2, NULL, NULL, NULL, NULL
2792de962bdSlukem };
2802de962bdSlukem 
2812de962bdSlukem /* Double Acute Accent OU */
2822de962bdSlukem static const wvec32 cd_vec1 = {
2832de962bdSlukem 	/* Upper case */
2842de962bdSlukem 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x150,
2852de962bdSlukem 	0, 0, 0, 0, 0, 0x170, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2862de962bdSlukem static const wvec32 cd_vec2 = {
2872de962bdSlukem 	/* Lower case */
2882de962bdSlukem 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x151,
2892de962bdSlukem 	0, 0, 0, 0, 0, 0x171, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2902de962bdSlukem static const wvec32 *cd_doubleacute[] = {
2912de962bdSlukem 	NULL, NULL, &cd_vec1, &cd_vec2, NULL, NULL, NULL, NULL
2922de962bdSlukem };
2932de962bdSlukem 
2942de962bdSlukem /* Ogonek AEIU (O) */
2952de962bdSlukem static const wvec32 ce_vec1 = {
2962de962bdSlukem 	/* Upper case */
2972de962bdSlukem 	0, 0x104, 0, 0, 0, 0x118, 0, 0, 0, 0x12e, 0, 0, 0, 0, 0, 0x1ea,
2982de962bdSlukem 	0, 0, 0, 0, 0, 0x172, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2992de962bdSlukem static const wvec32 ce_vec2 = {
3002de962bdSlukem 	/* Lower case */
3012de962bdSlukem 	0, 0x105, 0, 0, 0, 0x119, 0, 0, 0, 0x12f, 0, 0, 0, 0, 0, 0x1eb,
3022de962bdSlukem 	0, 0, 0, 0, 0, 0x173, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3032de962bdSlukem static const wvec32 *ce_ogonek[] = {
3042de962bdSlukem 	NULL, NULL, &ce_vec1, &ce_vec2, NULL, NULL, NULL, NULL
3052de962bdSlukem };
3062de962bdSlukem 
3072de962bdSlukem /* Caron CDELNRSTZ (AIOUGKjH) */
3082de962bdSlukem static const wvec32 cf_vec1 = {
3092de962bdSlukem 	/* Upper case */
3102de962bdSlukem 	0, 0x1cd, 0, 0x10c, 0x10e, 0x11a, 0, 0x1e6,
3112de962bdSlukem 	0x21e, 0x1cf, 0, 0x1e8, 0x13d, 0, 0x147, 0x1d1,
3122de962bdSlukem 	0, 0, 0x158, 0x160, 0x164, 0x1d3, 0, 0,
3132de962bdSlukem 	0, 0, 0x17d, 0, 0, 0, 0, 0};
3142de962bdSlukem static const wvec32 cf_vec2 = {
3152de962bdSlukem 	/* Lower case */
3162de962bdSlukem 	0, 0x1ce, 0, 0x10d, 0x10f, 0x11b, 0, 0x1e7,
3172de962bdSlukem 	0x21f, 0x1d0, 0x1f0, 0x1e9, 0x13e, 0, 0x148, 0x1d2,
3182de962bdSlukem 	0, 0, 0x159, 0x161, 0x165, 0x1d4, 0, 0,
3192de962bdSlukem 	0, 0, 0x17e, 0, 0, 0, 0, 0};
3202de962bdSlukem static const wvec32 *cf_caron[] = {
3212de962bdSlukem 	NULL, NULL, &cf_vec1, &cf_vec2, NULL, NULL, NULL, NULL
3222de962bdSlukem };
3232de962bdSlukem 
3242de962bdSlukem static const wvec32 **cx_tab[] = {
3252de962bdSlukem 	NULL, c1_grave, c2_acute, c3_circumflex, c4_tilde, c5_macron,
3262de962bdSlukem 	c6_breve, c7_dotabove, c8_diaeresis, NULL, ca_ringabove,
3272de962bdSlukem 	cb_cedilla, NULL, cd_doubleacute, ce_ogonek, cf_caron };
3282de962bdSlukem 
ldap_t61s_valid(struct berval * str)3292de962bdSlukem int ldap_t61s_valid( struct berval *str )
3302de962bdSlukem {
3312de962bdSlukem 	unsigned char *c = (unsigned char *)str->bv_val;
3322de962bdSlukem 	int i;
3332de962bdSlukem 
3342de962bdSlukem 	for (i=0; i < str->bv_len; c++,i++)
3352de962bdSlukem 		if (!t61_tab[*c])
3362de962bdSlukem 			return 0;
3372de962bdSlukem 	return 1;
3382de962bdSlukem }
3392de962bdSlukem 
3402de962bdSlukem /* Transform a T.61 string to UTF-8.
3412de962bdSlukem  */
ldap_t61s_to_utf8s(struct berval * src,struct berval * dst)3422de962bdSlukem int ldap_t61s_to_utf8s( struct berval *src, struct berval *dst )
3432de962bdSlukem {
3442de962bdSlukem 	unsigned char *c;
3452de962bdSlukem 	char *d;
3462de962bdSlukem 	int i, wlen = 0;
3472de962bdSlukem 
3482de962bdSlukem 	/* Just count the length of the UTF-8 result first */
3492de962bdSlukem 	for (i=0,c=(unsigned char *)src->bv_val; i < src->bv_len; c++,i++) {
3502de962bdSlukem 		/* Invalid T.61 characters? */
3512de962bdSlukem 		if (!t61_tab[*c])
3522de962bdSlukem 			return LDAP_INVALID_SYNTAX;
3532de962bdSlukem 		if ((*c & 0xf0) == 0xc0) {
3542de962bdSlukem 			int j = *c & 0x0f;
3552de962bdSlukem 			/* If this is the end of the string, or if the base
3562de962bdSlukem 			 * character is just a space, treat this as a regular
3572de962bdSlukem 			 * spacing character.
3582de962bdSlukem 			 */
3592de962bdSlukem 			if ((!c[1] || c[1] == 0x20) && accents[j]) {
3602de962bdSlukem 				wlen += ldap_x_wc_to_utf8(NULL, accents[j], 0);
3612de962bdSlukem 			} else if (cx_tab[j] && cx_tab[j][c[1]>>5] &&
3622de962bdSlukem 			/* We have a composite mapping for this pair */
3632de962bdSlukem 				(*cx_tab[j][c[1]>>5])[c[1]&0x1f]) {
3642de962bdSlukem 				wlen += ldap_x_wc_to_utf8( NULL,
3652de962bdSlukem 					(*cx_tab[j][c[1]>>5])[c[1]&0x1f], 0);
3662de962bdSlukem 			} else {
3672de962bdSlukem 			/* No mapping, just swap it around so the base
3682de962bdSlukem 			 * character comes first.
3692de962bdSlukem 			 */
3702de962bdSlukem 			 	wlen += ldap_x_wc_to_utf8(NULL, c[1], 0);
3712de962bdSlukem 				wlen += ldap_x_wc_to_utf8(NULL,
3722de962bdSlukem 					t61_tab[*c], 0);
3732de962bdSlukem 			}
3742de962bdSlukem 			c++; i++;
3752de962bdSlukem 			continue;
3762de962bdSlukem 		} else {
3772de962bdSlukem 			wlen += ldap_x_wc_to_utf8(NULL, t61_tab[*c], 0);
3782de962bdSlukem 		}
3792de962bdSlukem 	}
3802de962bdSlukem 
3812de962bdSlukem 	/* Now transform the string */
3822de962bdSlukem 	dst->bv_len = wlen;
3832de962bdSlukem 	dst->bv_val = LDAP_MALLOC( wlen+1 );
3842de962bdSlukem 	d = dst->bv_val;
3852de962bdSlukem 	if (!d)
3862de962bdSlukem 		return LDAP_NO_MEMORY;
3872de962bdSlukem 
3882de962bdSlukem 	for (i=0,c=(unsigned char *)src->bv_val; i < src->bv_len; c++,i++) {
3892de962bdSlukem 		if ((*c & 0xf0) == 0xc0) {
3902de962bdSlukem 			int j = *c & 0x0f;
3912de962bdSlukem 			/* If this is the end of the string, or if the base
3922de962bdSlukem 			 * character is just a space, treat this as a regular
3932de962bdSlukem 			 * spacing character.
3942de962bdSlukem 			 */
3952de962bdSlukem 			if ((!c[1] || c[1] == 0x20) && accents[j]) {
3962de962bdSlukem 				d += ldap_x_wc_to_utf8(d, accents[j], 6);
3972de962bdSlukem 			} else if (cx_tab[j] && cx_tab[j][c[1]>>5] &&
3982de962bdSlukem 			/* We have a composite mapping for this pair */
3992de962bdSlukem 				(*cx_tab[j][c[1]>>5])[c[1]&0x1f]) {
4002de962bdSlukem 				d += ldap_x_wc_to_utf8(d,
4012de962bdSlukem 				(*cx_tab[j][c[1]>>5])[c[1]&0x1f], 6);
4022de962bdSlukem 			} else {
4032de962bdSlukem 			/* No mapping, just swap it around so the base
4042de962bdSlukem 			 * character comes first.
4052de962bdSlukem 			 */
4062de962bdSlukem 				d += ldap_x_wc_to_utf8(d, c[1], 6);
4072de962bdSlukem 				d += ldap_x_wc_to_utf8(d, t61_tab[*c], 6);
4082de962bdSlukem 			}
4092de962bdSlukem 			c++; i++;
4102de962bdSlukem 			continue;
4112de962bdSlukem 		} else {
4122de962bdSlukem 			d += ldap_x_wc_to_utf8(d, t61_tab[*c], 6);
4132de962bdSlukem 		}
4142de962bdSlukem 	}
4152de962bdSlukem 	*d = '\0';
4162de962bdSlukem 	return LDAP_SUCCESS;
4172de962bdSlukem }
4182de962bdSlukem 
4192de962bdSlukem /* For the reverse mapping, we just pay attention to the Latin-oriented
4202de962bdSlukem  * code blocks. These are
4212de962bdSlukem  *	0000 - 007f Basic Latin
4222de962bdSlukem  *	0080 - 00ff Latin-1 Supplement
4232de962bdSlukem  *	0100 - 017f Latin Extended-A
4242de962bdSlukem  *	0180 - 024f Latin Extended-B
4252de962bdSlukem  *	1e00 - 1eff Latin Extended Additional
4262de962bdSlukem  *
4272de962bdSlukem  * We have a special case to map Ohm U2126 back to T.61 0xe0. All other
4282de962bdSlukem  * unrecognized characters are replaced with '?' 0x3f.
4292de962bdSlukem  */
4302de962bdSlukem 
4312de962bdSlukem static const wvec64 u000 = {
4322de962bdSlukem 	0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
4332de962bdSlukem 	0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
4342de962bdSlukem 	0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
4352de962bdSlukem 	0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
4362de962bdSlukem 	0x0020, 0x0021, 0x0022, 0x00a6, 0x00a4, 0x0025, 0x0026, 0x0027,
4372de962bdSlukem 	0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
4382de962bdSlukem 	0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
4392de962bdSlukem 	0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f};
4402de962bdSlukem 
4412de962bdSlukem /* In this range, we've mapped caret to xc3/x20, backquote to xc1/x20,
4422de962bdSlukem  * and tilde to xc4/x20. T.61 (stupidly!) doesn't define these characters
4432de962bdSlukem  * on their own, even though it provides them as combiners for other
4442de962bdSlukem  * letters. T.61 doesn't define these pairings either, so this may just
4452de962bdSlukem  * have to be replaced with '?' 0x3f if other software can't cope with it.
4462de962bdSlukem  */
4472de962bdSlukem static const wvec64 u001 = {
4482de962bdSlukem 	0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
4492de962bdSlukem 	0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
4502de962bdSlukem 	0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
4512de962bdSlukem 	0x0058, 0x0059, 0x005a, 0x005b, 0x003f, 0x005d, 0xc320, 0x005f,
4522de962bdSlukem 	0xc120, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
4532de962bdSlukem 	0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
4542de962bdSlukem 	0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
4552de962bdSlukem 	0x0078, 0x0079, 0x007a, 0x003f, 0x007c, 0x003f, 0xc420, 0x007f};
4562de962bdSlukem 
4572de962bdSlukem static const wvec64 u002 = {
4582de962bdSlukem 	0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
4592de962bdSlukem 	0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
4602de962bdSlukem 	0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
4612de962bdSlukem 	0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
4622de962bdSlukem 	0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a8, 0x00a5, 0x003f, 0x00a7,
4632de962bdSlukem 	0xc820, 0x003f, 0x00e3, 0x00ab, 0x003f, 0x003f, 0x003f, 0xc520,
4642de962bdSlukem 	0x00b0, 0x00b1, 0x00b2, 0x00b3, 0xc220, 0x00b5, 0x00b6, 0x00b7,
4652de962bdSlukem 	0xcb20, 0x003f, 0x00eb, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf};
4662de962bdSlukem 
4672de962bdSlukem static const wvec64 u003 = {
4682de962bdSlukem 	0xc141, 0xc241, 0xc341, 0xc441, 0xc841, 0xca41, 0x00e1, 0xcb43,
4692de962bdSlukem 	0xc145, 0xc245, 0xc345, 0xc845, 0xc149, 0xc249, 0xc349, 0xc849,
4702de962bdSlukem 	0x00e2, 0xc44e, 0xc14f, 0xc24f, 0xc34f, 0xc44f, 0xc84f, 0x00b4,
4712de962bdSlukem 	0x00e9, 0xc155, 0xc255, 0xc355, 0xc855, 0xc259, 0x00ec, 0x00fb,
4722de962bdSlukem 	0xc161, 0xc261, 0xc361, 0xc461, 0xc861, 0xca61, 0x00f1, 0xcb63,
4732de962bdSlukem 	0xc165, 0xc265, 0xc365, 0xc865, 0xc169, 0xc269, 0xc369, 0xc869,
4742de962bdSlukem 	0x00f3, 0xc46e, 0xc16f, 0xc26f, 0xc36f, 0xc46f, 0xc86f, 0x00b8,
4752de962bdSlukem 	0x00f9, 0xc175, 0xc275, 0xc375, 0xc875, 0xc279, 0x00fc, 0xc879};
4762de962bdSlukem 
4772de962bdSlukem /* These codes are used here but not defined by T.61:
4782de962bdSlukem  * x114 = xc6/x45, x115 = xc6/x65, x12c = xc6/x49, x12d = xc6/x69
4792de962bdSlukem  */
4802de962bdSlukem static const wvec64 u010 = {
4812de962bdSlukem 	0xc541, 0xc561, 0xc641, 0xc661, 0xce41, 0xce61, 0xc243, 0xc263,
4822de962bdSlukem 	0xc343, 0xc363, 0xc743, 0xc763, 0xcf43, 0xcf63, 0xcf44, 0xcf64,
4832de962bdSlukem 	0x003f, 0x00f2, 0xc545, 0xc565, 0xc645, 0xc665, 0xc745, 0xc765,
4842de962bdSlukem 	0xce45, 0xce65, 0xcf45, 0xcf65, 0xc347, 0xc367, 0xc647, 0xc667,
4852de962bdSlukem 	0xc747, 0xc767, 0xcb47, 0xcb67, 0xc348, 0xc368, 0x00e4, 0x00f4,
4862de962bdSlukem 	0xc449, 0xc469, 0xc549, 0xc569, 0xc649, 0xc669, 0xce49, 0xce69,
4872de962bdSlukem 	0xc749, 0x00f5, 0x00e6, 0x00f6, 0xc34a, 0xc36a, 0xcb4b, 0xcb6b,
4882de962bdSlukem 	0x00f0, 0xc24c, 0xc26c, 0xcb4c, 0xcb6c, 0xcf4c, 0xcf6c, 0x00e7};
4892de962bdSlukem 
4902de962bdSlukem /* These codes are used here but not defined by T.61:
4912de962bdSlukem  * x14e = xc6/x4f, x14f = xc6/x6f
4922de962bdSlukem  */
4932de962bdSlukem static const wvec64 u011 = {
4942de962bdSlukem 	0x00f7, 0x00e8, 0x00f8, 0xc24e, 0xc26e, 0xcb4e, 0xcb6e, 0xcf4e,
4952de962bdSlukem 	0xcf6e, 0x00ef, 0x00ee, 0x00fe, 0xc54f, 0xc56f, 0xc64f, 0xc66f,
4962de962bdSlukem 	0xcd4f, 0xcd6f, 0x00ea, 0x00fa, 0xc252, 0xc272, 0xcb52, 0xcb72,
4972de962bdSlukem 	0xcf52, 0xcf72, 0xc253, 0xc273, 0xc353, 0xc373, 0xcb53, 0xcb73,
4982de962bdSlukem 	0xcf53, 0xcf73, 0xcb54, 0xcb74, 0xcf54, 0xcf74, 0x00ed, 0x00fd,
4992de962bdSlukem 	0xc455, 0xc475, 0xc555, 0xc575, 0xc655, 0xc675, 0xca55, 0xca75,
5002de962bdSlukem 	0xcd55, 0xcd75, 0xce55, 0xce75, 0xc357, 0xc377, 0xc359, 0xc379,
5012de962bdSlukem 	0xc859, 0xc25a, 0xc27a, 0xc75a, 0xc77a, 0xcf5a, 0xcf7a, 0x003f};
5022de962bdSlukem 
5032de962bdSlukem /* All of the codes in this block are undefined in T.61.
5042de962bdSlukem  */
5052de962bdSlukem static const wvec64 u013 = {
5062de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5072de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0xcf41, 0xcf61, 0xcf49,
5082de962bdSlukem 	0xcf69, 0xcf4f, 0xcf6f, 0xcf55, 0xcf75, 0x003f, 0x003f, 0x003f,
5092de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5102de962bdSlukem 	0x003f, 0x003f, 0xc5e1, 0xc5f1, 0x003f, 0x003f, 0xcf47, 0xcf67,
5112de962bdSlukem 	0xcf4b, 0xcf6b, 0xce4f, 0xce6f, 0x003f, 0x003f, 0x003f, 0x003f,
5122de962bdSlukem 	0xcf6a, 0x003f, 0x003f, 0x003f, 0xc247, 0xc267, 0x003f, 0x003f,
5132de962bdSlukem 	0xc14e, 0xc16e, 0x003f, 0x003f, 0xc2e1, 0xc2f1, 0x003f, 0x003f};
5142de962bdSlukem 
5152de962bdSlukem /* All of the codes in this block are undefined in T.61.
5162de962bdSlukem  */
5172de962bdSlukem static const wvec64 u020 = {
5182de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5192de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5202de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5212de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0xcf48, 0xcf68,
5222de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0xc741, 0xc761,
5232de962bdSlukem 	0xcb45, 0xcb65, 0x003f, 0x003f, 0x003f, 0x003f, 0xc74f, 0xc76f,
5242de962bdSlukem 	0x003f, 0x003f, 0xc559, 0xc579, 0x003f, 0x003f, 0x003f, 0x003f,
5252de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f};
5262de962bdSlukem 
5272de962bdSlukem static const wvec64 u023 = {
5282de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0xcf20,
5292de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5302de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5312de962bdSlukem 	0xc620, 0xc720, 0xca20, 0xce20, 0x003f, 0xcd20, 0x003f, 0x003f,
5322de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5332de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5342de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5352de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f};
5362de962bdSlukem 
5372de962bdSlukem /* These are the non-spacing characters by themselves. They should
5382de962bdSlukem  * never appear by themselves in actual text.
5392de962bdSlukem  */
5402de962bdSlukem static const wvec64 u030 = {
5412de962bdSlukem 	0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x003f, 0x00c6, 0x00c7,
5422de962bdSlukem 	0x00c8, 0x003f, 0x00ca, 0x00cd, 0x00cf, 0x003f, 0x003f, 0x003f,
5432de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5442de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5452de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x00cb,
5462de962bdSlukem 	0x00ce, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5472de962bdSlukem 	0x003f, 0x003f, 0x00cc, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5482de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f};
5492de962bdSlukem 
5502de962bdSlukem /* None of the following blocks are defined in T.61.
5512de962bdSlukem  */
5522de962bdSlukem static const wvec64 u1e0 = {
5532de962bdSlukem 	0x003f, 0x003f, 0xc742, 0xc762, 0x003f, 0x003f, 0x003f, 0x003f,
5542de962bdSlukem 	0x003f, 0x003f, 0xc744, 0xc764, 0x003f, 0x003f, 0x003f, 0x003f,
5552de962bdSlukem 	0xcb44, 0xcb64, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5562de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0xc746, 0xc766,
5572de962bdSlukem 	0xc547, 0xc567, 0xc748, 0xc768, 0x003f, 0x003f, 0xc848, 0xc868,
5582de962bdSlukem 	0xcb48, 0xcb68, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5592de962bdSlukem 	0xc24b, 0xc26b, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5602de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0xc24d, 0xc26d,
5612de962bdSlukem };
5622de962bdSlukem 
5632de962bdSlukem static const wvec64 u1e1 = {
5642de962bdSlukem 	0xc74d, 0xc76d, 0x003f, 0x003f, 0xc74e, 0xc76e, 0x003f, 0x003f,
5652de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5662de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0xc250, 0xc270, 0xc750, 0xc770,
5672de962bdSlukem 	0xc752, 0xc772, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5682de962bdSlukem 	0xc753, 0xc773, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5692de962bdSlukem 	0x003f, 0x003f, 0xc754, 0xc774, 0x003f, 0x003f, 0x003f, 0x003f,
5702de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5712de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0xc456, 0xc476, 0x003f, 0x003f,
5722de962bdSlukem };
5732de962bdSlukem 
5742de962bdSlukem static const wvec64 u1e2 = {
5752de962bdSlukem 	0xc157, 0xc177, 0xc257, 0xc277, 0xc857, 0xc877, 0xc757, 0xc777,
5762de962bdSlukem 	0x003f, 0x003f, 0xc758, 0xc778, 0xc858, 0xc878, 0xc759, 0xc779,
5772de962bdSlukem 	0xc35a, 0xc37a, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0xc874,
5782de962bdSlukem 	0xca77, 0xca79, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5792de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5802de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5812de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5822de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0xc445, 0xc465, 0x003f, 0x003f,
5832de962bdSlukem };
5842de962bdSlukem 
5852de962bdSlukem static const wvec64 u1e3 = {
5862de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5872de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5882de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5892de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5902de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5912de962bdSlukem 	0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5922de962bdSlukem 	0x003f, 0x003f, 0xc159, 0xc179, 0x003f, 0x003f, 0x003f, 0x003f,
5932de962bdSlukem 	0xc459, 0xc479, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f, 0x003f,
5942de962bdSlukem };
5952de962bdSlukem 
5962de962bdSlukem static const wvec64 *wc00[] = {
5972de962bdSlukem 	&u000, &u001, &u002, &u003,
5982de962bdSlukem 	&u010, &u011, NULL, &u013,
5992de962bdSlukem 	&u020, NULL, NULL, &u023,
6002de962bdSlukem 	&u030, NULL, NULL, NULL};
6012de962bdSlukem 
6022de962bdSlukem static const wvec64 *wc1e[] = {
6032de962bdSlukem 	&u1e0, &u1e1, &u1e2, &u1e3};
6042de962bdSlukem 
6052de962bdSlukem 
ldap_utf8s_to_t61s(struct berval * src,struct berval * dst)6062de962bdSlukem int ldap_utf8s_to_t61s( struct berval *src, struct berval *dst )
6072de962bdSlukem {
6082de962bdSlukem 	char *c, *d;
6092de962bdSlukem 	wchar_t tmp;
6102de962bdSlukem 	int i, j, tlen = 0;
6112de962bdSlukem 
6122de962bdSlukem 	/* Just count the length of the T.61 result first */
6132de962bdSlukem 	for (i=0,c=src->bv_val; i < src->bv_len;) {
6142de962bdSlukem 		j = ldap_x_utf8_to_wc( &tmp, c );
6152de962bdSlukem 		if (j == -1)
6162de962bdSlukem 			return LDAP_INVALID_SYNTAX;
6172de962bdSlukem 		switch (tmp >> 8) {
6182de962bdSlukem 		case 0x00:
6192de962bdSlukem 		case 0x01:
6202de962bdSlukem 		case 0x02:
6212de962bdSlukem 		case 0x03:
6222de962bdSlukem 			if (wc00[tmp >> 6] &&
6232de962bdSlukem 				((*wc00[tmp >> 6])[tmp & 0x3f] & 0xff00)) {
6242de962bdSlukem 				tlen++;
6252de962bdSlukem 			}
6262de962bdSlukem 			tlen++;
6272de962bdSlukem 			break;
6282de962bdSlukem 		case 0x1e:
6292de962bdSlukem 			if ((*wc1e[(tmp >> 6) & 3])[tmp & 0x3f] & 0xff00) {
6302de962bdSlukem 				tlen++;
6312de962bdSlukem 			}
6322de962bdSlukem 		case 0x21:
6332de962bdSlukem 		default:
6342de962bdSlukem 			tlen ++;
6352de962bdSlukem 			break;
6362de962bdSlukem 		}
6372de962bdSlukem 		i += j;
6382de962bdSlukem 		c += j;
6392de962bdSlukem 	}
6402de962bdSlukem 	dst->bv_len = tlen;
6412de962bdSlukem 	dst->bv_val = LDAP_MALLOC( tlen+1 );
6422de962bdSlukem 	if (!dst->bv_val)
6432de962bdSlukem 		return LDAP_NO_MEMORY;
6442de962bdSlukem 
6452de962bdSlukem 	d = dst->bv_val;
6462de962bdSlukem 	for (i=0,c=src->bv_val; i < src->bv_len;) {
6472de962bdSlukem 		j = ldap_x_utf8_to_wc( &tmp, c );
6482de962bdSlukem 		switch (tmp >> 8) {
6492de962bdSlukem 		case 0x00:
6502de962bdSlukem 		case 0x01:
6512de962bdSlukem 		case 0x02:
6522de962bdSlukem 			if (wc00[tmp >> 6]) {
6532de962bdSlukem 				tmp = (*wc00[tmp >> 6])[tmp & 0x3f];
6542de962bdSlukem 				if (tmp & 0xff00)
6552de962bdSlukem 					*d++ = (tmp >> 8);
6562de962bdSlukem 				*d++ = tmp & 0xff;
6572de962bdSlukem 			} else {
6582de962bdSlukem 				*d++ = 0x3f;
6592de962bdSlukem 			}
6602de962bdSlukem 			break;
6612de962bdSlukem 		case 0x03:
6622de962bdSlukem 			/* swap order of non-spacing characters */
6632de962bdSlukem 			if (wc00[tmp >> 6]) {
6642de962bdSlukem 				wchar_t t2 = (*wc00[tmp >> 6])[tmp & 0x3f];
6652de962bdSlukem 				if (t2 != 0x3f) {
6662de962bdSlukem 					d[0] = d[-1];
6672de962bdSlukem 					d[-1] = t2;
6682de962bdSlukem 					d++;
6692de962bdSlukem 				} else {
6702de962bdSlukem 					*d++ = 0x3f;
6712de962bdSlukem 				}
6722de962bdSlukem 			} else {
6732de962bdSlukem 				*d++ = 0x3f;
6742de962bdSlukem 			}
6752de962bdSlukem 			break;
6762de962bdSlukem 		case 0x1e:
6772de962bdSlukem 			tmp = (*wc1e[(tmp >> 6) & 3])[tmp & 0x3f];
6782de962bdSlukem 			if (tmp & 0xff00)
6792de962bdSlukem 				*d++ = (tmp >> 8);
6802de962bdSlukem 			*d++ = tmp & 0xff;
6812de962bdSlukem 			break;
6822de962bdSlukem 		case 0x21:
6832de962bdSlukem 			if (tmp == 0x2126) {
6842de962bdSlukem 				*d++ = 0xe0;
6852de962bdSlukem 				break;
6862de962bdSlukem 			}
6872de962bdSlukem 			/* FALLTHRU */
6882de962bdSlukem 		default:
6892de962bdSlukem 			*d++ = 0x3f;
6902de962bdSlukem 			break;
6912de962bdSlukem 		}
692bb30016cSlukem 		i += j;
693bb30016cSlukem 		c += j;
6942de962bdSlukem 	}
6952de962bdSlukem 	*d = '\0';
6962de962bdSlukem 	return LDAP_SUCCESS;
6972de962bdSlukem }
698