1 #include "lib9.h" 2 3 enum 4 { 5 Bit1 = 7, 6 Bitx = 6, 7 Bit2 = 5, 8 Bit3 = 4, 9 Bit4 = 3, 10 11 T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ 12 Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ 13 T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ 14 T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ 15 T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ 16 17 Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */ 18 Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */ 19 Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */ 20 21 Maskx = (1<<Bitx)-1, /* 0011 1111 */ 22 Testx = Maskx ^ 0xFF, /* 1100 0000 */ 23 24 Bad = Runeerror, 25 }; 26 27 int 28 chartorune(Rune *rune, char *str) 29 { 30 int c, c1, c2; 31 long l; 32 33 /* 34 * one character sequence 35 * 00000-0007F => T1 36 */ 37 c = *(uchar*)str; 38 if(c < Tx) { 39 *rune = c; 40 return 1; 41 } 42 43 /* 44 * two character sequence 45 * 0080-07FF => T2 Tx 46 */ 47 c1 = *(uchar*)(str+1) ^ Tx; 48 if(c1 & Testx) 49 goto bad; 50 if(c < T3) { 51 if(c < T2) 52 goto bad; 53 l = ((c << Bitx) | c1) & Rune2; 54 if(l <= Rune1) 55 goto bad; 56 *rune = l; 57 return 2; 58 } 59 60 /* 61 * three character sequence 62 * 0800-FFFF => T3 Tx Tx 63 */ 64 c2 = *(uchar*)(str+2) ^ Tx; 65 if(c2 & Testx) 66 goto bad; 67 if(c < T4) { 68 l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; 69 if(l <= Rune2) 70 goto bad; 71 *rune = l; 72 return 3; 73 } 74 75 /* 76 * bad decoding 77 */ 78 bad: 79 *rune = Bad; 80 return 1; 81 } 82 83 int 84 runetochar(char *str, Rune *rune) 85 { 86 long c; 87 88 /* 89 * one character sequence 90 * 00000-0007F => 00-7F 91 */ 92 c = *rune; 93 if(c <= Rune1) { 94 str[0] = c; 95 return 1; 96 } 97 98 /* 99 * two character sequence 100 * 0080-07FF => T2 Tx 101 */ 102 if(c <= Rune2) { 103 str[0] = T2 | (c >> 1*Bitx); 104 str[1] = Tx | (c & Maskx); 105 return 2; 106 } 107 108 /* 109 * three character sequence 110 * 0800-FFFF => T3 Tx Tx 111 */ 112 str[0] = T3 | (c >> 2*Bitx); 113 str[1] = Tx | ((c >> 1*Bitx) & Maskx); 114 str[2] = Tx | (c & Maskx); 115 return 3; 116 } 117 118 int 119 runelen(long c) 120 { 121 if(c <= Rune1) 122 return 1; 123 if(c <= Rune2) 124 return 2; 125 return 3; 126 } 127 128 int 129 runenlen(Rune *r, int l) 130 { 131 int n; 132 long c; 133 134 n = 0; 135 while(l--) { 136 c = *r++; 137 if(c <= Rune1) 138 n += 1; 139 else 140 if(c <= Rune2) 141 n += 2; 142 else 143 n += 3; 144 } 145 return n; 146 } 147 148 int 149 fullrune(char *str, int n) 150 { 151 int c; 152 153 if(n > 0) { 154 c = *(uchar*)str; 155 if(c < Tx) 156 return 1; 157 if(n > 1) 158 if(c < T3 || n > 2) 159 return 1; 160 } 161 return 0; 162 } 163