1 #include <stdlib.h>
2 #include <limits.h>
3
4 /*
5 * Use the FSS-UTF transformation proposed by posix.
6 * We define 7 byte types:
7 * T0 0xxxxxxx 7 free bits
8 * Tx 10xxxxxx 6 free bits
9 * T1 110xxxxx 5 free bits
10 * T2 1110xxxx 4 free bits
11 * T3 11110xxx 3 free bits
12 * T4 111110xx 2 free bits
13 * T5 1111110x 1 free bit
14 *
15 * Encoding is as follows.
16 * From hex Thru hex Sequence Bits
17 * 00000000 0000007F T0 7
18 * 00000080 000007FF T1 Tx 11
19 * 00000800 0000FFFF T2 Tx Tx 16
20 * 00010000 001FFFFF T3 Tx Tx Tx 21
21 * 00200000 03FFFFFF T4 Tx Tx Tx Tx 26
22 * 04000000 7FFFFFFF T5 Tx Tx Tx Tx Tx 31
23 */
24 int
25 mbtowc(wchar_t *pwc, const char *s, size_t n);
26
27 int
mblen(const char * s,size_t n)28 mblen(const char *s, size_t n)
29 {
30 return mbtowc(0, s, n);
31 }
32
33 enum {
34 C0MSK = 0x7F,
35 C1MSK = 0x7FF,
36 T1 = 0xC0,
37 T2 = 0xE0,
38 NT1BITS = 11,
39 NSHFT = 5,
40 NCSHFT = NSHFT + 1,
41 WCHARMSK = (1<< (8*MB_LEN_MAX - 1)) - 1,
42 };
43
44 int
mbtowc(wchar_t * pwc,const char * s,size_t n)45 mbtowc(wchar_t *pwc, const char *s, size_t n)
46 {
47 unsigned long long c[MB_LEN_MAX];
48 unsigned long long l, m, wm, b;
49 int i;
50
51 if(!s)
52 return 0;
53
54 if(n < 1)
55 goto bad;
56
57 c[0] = s[0] & 0xff; /* first one is special */
58 if((c[0] & 0x80) == 0x00) {
59 if(pwc)
60 *pwc = c[0];
61 if(c[0] == 0)
62 return 0;
63 return 1;
64 }
65
66 m = T2;
67 b = m^0x20;
68 l = c[0];
69 wm = C1MSK;
70 for(i = 1; i < MB_LEN_MAX + 1; i++){
71 if(n < i+1)
72 goto bad;
73 c[i] = (s[i] ^ 0x80) & 0xff;
74 l = (l << NCSHFT) | c[i];
75 if((c[i] & 0xC0) != 0x00)
76 goto bad;
77 if((c[0] & m) == b) {
78 if(pwc)
79 *pwc = l & wm;
80 return i + 1;
81 }
82 b = m;
83 m = (m >> 1) | 0x80;
84 wm = (wm << NSHFT) | wm;
85 }
86
87 /*
88 * bad decoding
89 */
90 bad:
91 return -1;
92
93 }
94
95 int
wctomb(char * s,wchar_t wchar)96 wctomb(char *s, wchar_t wchar)
97 {
98 unsigned long long c, maxc, m;
99 int i, j;
100
101 if(!s)
102 return 0;
103
104 maxc = 0x80;
105 c = wchar & WCHARMSK;
106 if(c < maxc) {
107 s[0] = c;
108 return 1;
109 }
110
111 m = T1;
112 for(i = 2; i < MB_LEN_MAX + 1; i++){
113 maxc <<= 4;
114 if(c < maxc || i == MB_LEN_MAX){
115 s[0] = m | (c >> ((i - 1) * NCSHFT));
116 for(j = i - 1; j >= 1; j--){
117 s[i - j] = 0x80|((c>>(6 * (j - 1)))&0x3f);
118 }
119 return i;
120 }
121 m = (m >> 1) | 0x80;
122 }
123 return MB_LEN_MAX;
124 }
125
126 size_t
mbstowcs(wchar_t * pwcs,const char * s,size_t n)127 mbstowcs(wchar_t *pwcs, const char *s, size_t n)
128 {
129 int i, d, c;
130
131 for(i=0; i < n; i++) {
132 c = *s & 0xff;
133 if(c < 0x80) {
134 *pwcs = c;
135 if(c == 0)
136 break;
137 s++;
138 } else {
139 d = mbtowc(pwcs, s, MB_LEN_MAX);
140 if(d <= 0)
141 return (size_t)((d<0) ? -1 : i);
142 s += d;
143 }
144 pwcs++;
145 }
146 return i;
147 }
148
149 size_t
wcstombs(char * s,const wchar_t * pwcs,size_t n)150 wcstombs(char *s, const wchar_t *pwcs, size_t n)
151 {
152 int i, d;
153 long c;
154 char *p, *pe;
155 char buf[MB_LEN_MAX];
156
157 p = s;
158 pe = p+n-MB_LEN_MAX;
159 while(p < pe) {
160 c = *pwcs++;
161 if(c < 0x80)
162 *p++ = c;
163 else
164 p += wctomb(p, c);
165 if(c == 0)
166 return p-s;
167 }
168 while(p < pe+MB_LEN_MAX) {
169 c = *pwcs++;
170 d = wctomb(buf, c);
171 if(p+d <= pe+MB_LEN_MAX) {
172 *p++ = buf[0]; /* first one is special */
173 for(i = 2; i < MB_LEN_MAX + 1; i++){
174 if(d <= i -1)
175 break;
176 *p++ = buf[i];
177 }
178 }
179 if(c == 0)
180 break;
181 }
182 return p-s;
183 }
184