1 #include <stdlib.h> 2 #include <limits.h> 3 4 /* 5 * Use the FSS-UTF transformation proposed by posix. 6 * We define 7 byte types: 7 * T0 0xxxxxxx 7 free bits 8 * Tx 10xxxxxx 6 free bits 9 * T1 110xxxxx 5 free bits 10 * T2 1110xxxx 4 free bits 11 * T3 11110xxx 3 free bits 12 * T4 111110xx 2 free bits 13 * T5 1111110x 1 free bit 14 * 15 * Encoding is as follows. 16 * From hex Thru hex Sequence Bits 17 * 00000000 0000007F T0 7 18 * 00000080 000007FF T1 Tx 11 19 * 00000800 0000FFFF T2 Tx Tx 16 20 * 00010000 001FFFFF T3 Tx Tx Tx 21 21 * 00200000 03FFFFFF T4 Tx Tx Tx Tx 26 22 * 04000000 7FFFFFFF T5 Tx Tx Tx Tx Tx 31 23 */ 24 int 25 mbtowc(wchar_t *pwc, const char *s, size_t n); 26 27 int 28 mblen(const char *s, size_t n) 29 { 30 return mbtowc(0, s, n); 31 } 32 33 enum { 34 C0MSK = 0x7F, 35 C1MSK = 0x7FF, 36 T1 = 0xC0, 37 T2 = 0xE0, 38 NT1BITS = 11, 39 NSHFT = 5, 40 NCSHFT = NSHFT + 1, 41 WCHARMSK = (1<< (8*MB_LEN_MAX - 1)) - 1, 42 }; 43 44 int 45 mbtowc(wchar_t *pwc, const char *s, size_t n) 46 { 47 unsigned long long c[MB_LEN_MAX]; 48 unsigned long long l, m, wm, b; 49 int i; 50 51 if(!s) 52 return 0; 53 54 if(n < 1) 55 goto bad; 56 57 c[0] = s[0] & 0xff; /* first one is special */ 58 if((c[0] & 0x80) == 0x00) { 59 if(pwc) 60 *pwc = c[0]; 61 if(c[0] == 0) 62 return 0; 63 return 1; 64 } 65 66 m = T2; 67 b = m^0x20; 68 l = c[0]; 69 wm = C1MSK; 70 for(i = 1; i < MB_LEN_MAX + 1; i++){ 71 if(n < i+1) 72 goto bad; 73 c[i] = (s[i] ^ 0x80) & 0xff; 74 l = (l << NCSHFT) | c[i]; 75 if((c[i] & 0xC0) != 0x00) 76 goto bad; 77 if((c[0] & m) == b) { 78 if(pwc) 79 *pwc = l & wm; 80 return i + 1; 81 } 82 b = m; 83 m = (m >> 1) | 0x80; 84 wm = (wm << NSHFT) | wm; 85 } 86 87 /* 88 * bad decoding 89 */ 90 bad: 91 return -1; 92 93 } 94 95 int 96 wctomb(char *s, wchar_t wchar) 97 { 98 unsigned long long c, maxc, m; 99 int i, j; 100 101 if(!s) 102 return 0; 103 104 maxc = 0x80; 105 c = wchar & WCHARMSK; 106 if(c < maxc) { 107 s[0] = c; 108 return 1; 109 } 110 111 m = T1; 112 for(i = 2; i < MB_LEN_MAX + 1; i++){ 113 maxc <<= 4; 114 if(c < maxc || i == MB_LEN_MAX){ 115 s[0] = m | (c >> ((i - 1) * NCSHFT)); 116 for(j = i - 1; j >= 1; j--){ 117 s[i - j] = 0x80|((c>>(6 * (j - 1)))&0x3f); 118 } 119 return i; 120 } 121 m = (m >> 1) | 0x80; 122 } 123 return MB_LEN_MAX; 124 } 125 126 size_t 127 mbstowcs(wchar_t *pwcs, const char *s, size_t n) 128 { 129 int i, d, c; 130 131 for(i=0; i < n; i++) { 132 c = *s & 0xff; 133 if(c < 0x80) { 134 *pwcs = c; 135 if(c == 0) 136 break; 137 s++; 138 } else { 139 d = mbtowc(pwcs, s, MB_LEN_MAX); 140 if(d <= 0) 141 return (size_t)((d<0) ? -1 : i); 142 s += d; 143 } 144 pwcs++; 145 } 146 return i; 147 } 148 149 size_t 150 wcstombs(char *s, const wchar_t *pwcs, size_t n) 151 { 152 int i, d; 153 long c; 154 char *p, *pe; 155 char buf[MB_LEN_MAX]; 156 157 p = s; 158 pe = p+n-MB_LEN_MAX; 159 while(p < pe) { 160 c = *pwcs++; 161 if(c < 0x80) 162 *p++ = c; 163 else 164 p += wctomb(p, c); 165 if(c == 0) 166 return p-s; 167 } 168 while(p < pe+MB_LEN_MAX) { 169 c = *pwcs++; 170 d = wctomb(buf, c); 171 if(p+d <= pe+MB_LEN_MAX) { 172 *p++ = buf[0]; /* first one is special */ 173 for(i = 2; i < MB_LEN_MAX + 1; i++){ 174 if(d <= i -1) 175 break; 176 *p++ = buf[i]; 177 } 178 } 179 if(c == 0) 180 break; 181 } 182 return p-s; 183 } 184