13e12c5d1SDavid du Colombier #include <stdlib.h>
2*e94a8e9bSDavid du Colombier #include <limits.h>
33e12c5d1SDavid du Colombier
43e12c5d1SDavid du Colombier /*
53e12c5d1SDavid du Colombier * Use the FSS-UTF transformation proposed by posix.
63e12c5d1SDavid du Colombier * We define 7 byte types:
73e12c5d1SDavid du Colombier * T0 0xxxxxxx 7 free bits
83e12c5d1SDavid du Colombier * Tx 10xxxxxx 6 free bits
93e12c5d1SDavid du Colombier * T1 110xxxxx 5 free bits
103e12c5d1SDavid du Colombier * T2 1110xxxx 4 free bits
11*e94a8e9bSDavid du Colombier * T3 11110xxx 3 free bits
12*e94a8e9bSDavid du Colombier * T4 111110xx 2 free bits
13*e94a8e9bSDavid du Colombier * T5 1111110x 1 free bit
143e12c5d1SDavid du Colombier *
153e12c5d1SDavid du Colombier * Encoding is as follows.
163e12c5d1SDavid du Colombier * From hex Thru hex Sequence Bits
173e12c5d1SDavid du Colombier * 00000000 0000007F T0 7
183e12c5d1SDavid du Colombier * 00000080 000007FF T1 Tx 11
193e12c5d1SDavid du Colombier * 00000800 0000FFFF T2 Tx Tx 16
20*e94a8e9bSDavid du Colombier * 00010000 001FFFFF T3 Tx Tx Tx 21
21*e94a8e9bSDavid du Colombier * 00200000 03FFFFFF T4 Tx Tx Tx Tx 26
22*e94a8e9bSDavid du Colombier * 04000000 7FFFFFFF T5 Tx Tx Tx Tx Tx 31
233e12c5d1SDavid du Colombier */
24*e94a8e9bSDavid du Colombier int
25*e94a8e9bSDavid du Colombier mbtowc(wchar_t *pwc, const char *s, size_t n);
263e12c5d1SDavid du Colombier
273e12c5d1SDavid du Colombier int
mblen(const char * s,size_t n)283e12c5d1SDavid du Colombier mblen(const char *s, size_t n)
293e12c5d1SDavid du Colombier {
303e12c5d1SDavid du Colombier return mbtowc(0, s, n);
313e12c5d1SDavid du Colombier }
323e12c5d1SDavid du Colombier
33*e94a8e9bSDavid du Colombier enum {
34*e94a8e9bSDavid du Colombier C0MSK = 0x7F,
35*e94a8e9bSDavid du Colombier C1MSK = 0x7FF,
36*e94a8e9bSDavid du Colombier T1 = 0xC0,
37*e94a8e9bSDavid du Colombier T2 = 0xE0,
38*e94a8e9bSDavid du Colombier NT1BITS = 11,
39*e94a8e9bSDavid du Colombier NSHFT = 5,
40*e94a8e9bSDavid du Colombier NCSHFT = NSHFT + 1,
41*e94a8e9bSDavid du Colombier WCHARMSK = (1<< (8*MB_LEN_MAX - 1)) - 1,
42*e94a8e9bSDavid du Colombier };
43*e94a8e9bSDavid du Colombier
443e12c5d1SDavid du Colombier int
mbtowc(wchar_t * pwc,const char * s,size_t n)453e12c5d1SDavid du Colombier mbtowc(wchar_t *pwc, const char *s, size_t n)
463e12c5d1SDavid du Colombier {
47*e94a8e9bSDavid du Colombier unsigned long long c[MB_LEN_MAX];
48*e94a8e9bSDavid du Colombier unsigned long long l, m, wm, b;
49*e94a8e9bSDavid du Colombier int i;
503e12c5d1SDavid du Colombier
513e12c5d1SDavid du Colombier if(!s)
523e12c5d1SDavid du Colombier return 0;
533e12c5d1SDavid du Colombier
543e12c5d1SDavid du Colombier if(n < 1)
553e12c5d1SDavid du Colombier goto bad;
56*e94a8e9bSDavid du Colombier
57*e94a8e9bSDavid du Colombier c[0] = s[0] & 0xff; /* first one is special */
58*e94a8e9bSDavid du Colombier if((c[0] & 0x80) == 0x00) {
593e12c5d1SDavid du Colombier if(pwc)
60*e94a8e9bSDavid du Colombier *pwc = c[0];
61*e94a8e9bSDavid du Colombier if(c[0] == 0)
623e12c5d1SDavid du Colombier return 0;
633e12c5d1SDavid du Colombier return 1;
643e12c5d1SDavid du Colombier }
653e12c5d1SDavid du Colombier
66*e94a8e9bSDavid du Colombier m = T2;
67*e94a8e9bSDavid du Colombier b = m^0x20;
68*e94a8e9bSDavid du Colombier l = c[0];
69*e94a8e9bSDavid du Colombier wm = C1MSK;
70*e94a8e9bSDavid du Colombier for(i = 1; i < MB_LEN_MAX + 1; i++){
71*e94a8e9bSDavid du Colombier if(n < i+1)
723e12c5d1SDavid du Colombier goto bad;
73*e94a8e9bSDavid du Colombier c[i] = (s[i] ^ 0x80) & 0xff;
74*e94a8e9bSDavid du Colombier l = (l << NCSHFT) | c[i];
75*e94a8e9bSDavid du Colombier if((c[i] & 0xC0) != 0x00)
763e12c5d1SDavid du Colombier goto bad;
77*e94a8e9bSDavid du Colombier if((c[0] & m) == b) {
783e12c5d1SDavid du Colombier if(pwc)
79*e94a8e9bSDavid du Colombier *pwc = l & wm;
80*e94a8e9bSDavid du Colombier return i + 1;
813e12c5d1SDavid du Colombier }
82*e94a8e9bSDavid du Colombier b = m;
83*e94a8e9bSDavid du Colombier m = (m >> 1) | 0x80;
84*e94a8e9bSDavid du Colombier wm = (wm << NSHFT) | wm;
853e12c5d1SDavid du Colombier }
863e12c5d1SDavid du Colombier
873e12c5d1SDavid du Colombier /*
883e12c5d1SDavid du Colombier * bad decoding
893e12c5d1SDavid du Colombier */
903e12c5d1SDavid du Colombier bad:
913e12c5d1SDavid du Colombier return -1;
923e12c5d1SDavid du Colombier
933e12c5d1SDavid du Colombier }
943e12c5d1SDavid du Colombier
953e12c5d1SDavid du Colombier int
wctomb(char * s,wchar_t wchar)963e12c5d1SDavid du Colombier wctomb(char *s, wchar_t wchar)
973e12c5d1SDavid du Colombier {
98*e94a8e9bSDavid du Colombier unsigned long long c, maxc, m;
99*e94a8e9bSDavid du Colombier int i, j;
1003e12c5d1SDavid du Colombier
1013e12c5d1SDavid du Colombier if(!s)
1023e12c5d1SDavid du Colombier return 0;
1033e12c5d1SDavid du Colombier
104*e94a8e9bSDavid du Colombier maxc = 0x80;
105*e94a8e9bSDavid du Colombier c = wchar & WCHARMSK;
106*e94a8e9bSDavid du Colombier if(c < maxc) {
1073e12c5d1SDavid du Colombier s[0] = c;
1083e12c5d1SDavid du Colombier return 1;
1093e12c5d1SDavid du Colombier }
1103e12c5d1SDavid du Colombier
111*e94a8e9bSDavid du Colombier m = T1;
112*e94a8e9bSDavid du Colombier for(i = 2; i < MB_LEN_MAX + 1; i++){
113*e94a8e9bSDavid du Colombier maxc <<= 4;
114*e94a8e9bSDavid du Colombier if(c < maxc || i == MB_LEN_MAX){
115*e94a8e9bSDavid du Colombier s[0] = m | (c >> ((i - 1) * NCSHFT));
116*e94a8e9bSDavid du Colombier for(j = i - 1; j >= 1; j--){
117*e94a8e9bSDavid du Colombier s[i - j] = 0x80|((c>>(6 * (j - 1)))&0x3f);
1183e12c5d1SDavid du Colombier }
119*e94a8e9bSDavid du Colombier return i;
120*e94a8e9bSDavid du Colombier }
121*e94a8e9bSDavid du Colombier m = (m >> 1) | 0x80;
122*e94a8e9bSDavid du Colombier }
123*e94a8e9bSDavid du Colombier return MB_LEN_MAX;
1243e12c5d1SDavid du Colombier }
1253e12c5d1SDavid du Colombier
1263e12c5d1SDavid du Colombier size_t
mbstowcs(wchar_t * pwcs,const char * s,size_t n)1273e12c5d1SDavid du Colombier mbstowcs(wchar_t *pwcs, const char *s, size_t n)
1283e12c5d1SDavid du Colombier {
1293e12c5d1SDavid du Colombier int i, d, c;
1303e12c5d1SDavid du Colombier
1313e12c5d1SDavid du Colombier for(i=0; i < n; i++) {
1323e12c5d1SDavid du Colombier c = *s & 0xff;
1333e12c5d1SDavid du Colombier if(c < 0x80) {
1343e12c5d1SDavid du Colombier *pwcs = c;
1353e12c5d1SDavid du Colombier if(c == 0)
1363e12c5d1SDavid du Colombier break;
1373e12c5d1SDavid du Colombier s++;
1383e12c5d1SDavid du Colombier } else {
139*e94a8e9bSDavid du Colombier d = mbtowc(pwcs, s, MB_LEN_MAX);
1403e12c5d1SDavid du Colombier if(d <= 0)
1413e12c5d1SDavid du Colombier return (size_t)((d<0) ? -1 : i);
1423e12c5d1SDavid du Colombier s += d;
1433e12c5d1SDavid du Colombier }
1443e12c5d1SDavid du Colombier pwcs++;
1453e12c5d1SDavid du Colombier }
1463e12c5d1SDavid du Colombier return i;
1473e12c5d1SDavid du Colombier }
1483e12c5d1SDavid du Colombier
1493e12c5d1SDavid du Colombier size_t
wcstombs(char * s,const wchar_t * pwcs,size_t n)1503e12c5d1SDavid du Colombier wcstombs(char *s, const wchar_t *pwcs, size_t n)
1513e12c5d1SDavid du Colombier {
1523e12c5d1SDavid du Colombier int i, d;
1533e12c5d1SDavid du Colombier long c;
1543e12c5d1SDavid du Colombier char *p, *pe;
155*e94a8e9bSDavid du Colombier char buf[MB_LEN_MAX];
1563e12c5d1SDavid du Colombier
1573e12c5d1SDavid du Colombier p = s;
158*e94a8e9bSDavid du Colombier pe = p+n-MB_LEN_MAX;
1593e12c5d1SDavid du Colombier while(p < pe) {
1603e12c5d1SDavid du Colombier c = *pwcs++;
1613e12c5d1SDavid du Colombier if(c < 0x80)
1623e12c5d1SDavid du Colombier *p++ = c;
1633e12c5d1SDavid du Colombier else
1643e12c5d1SDavid du Colombier p += wctomb(p, c);
1653e12c5d1SDavid du Colombier if(c == 0)
1663e12c5d1SDavid du Colombier return p-s;
1673e12c5d1SDavid du Colombier }
168*e94a8e9bSDavid du Colombier while(p < pe+MB_LEN_MAX) {
1693e12c5d1SDavid du Colombier c = *pwcs++;
1703e12c5d1SDavid du Colombier d = wctomb(buf, c);
171*e94a8e9bSDavid du Colombier if(p+d <= pe+MB_LEN_MAX) {
172*e94a8e9bSDavid du Colombier *p++ = buf[0]; /* first one is special */
173*e94a8e9bSDavid du Colombier for(i = 2; i < MB_LEN_MAX + 1; i++){
174*e94a8e9bSDavid du Colombier if(d <= i -1)
175*e94a8e9bSDavid du Colombier break;
176*e94a8e9bSDavid du Colombier *p++ = buf[i];
1773e12c5d1SDavid du Colombier }
1783e12c5d1SDavid du Colombier }
1793e12c5d1SDavid du Colombier if(c == 0)
1803e12c5d1SDavid du Colombier break;
1813e12c5d1SDavid du Colombier }
1823e12c5d1SDavid du Colombier return p-s;
1833e12c5d1SDavid du Colombier }
184