1 #include <u.h> 2 #include <libc.h> 3 4 enum 5 { 6 Bit1 = 7, 7 Bitx = 6, 8 Bit2 = 5, 9 Bit3 = 4, 10 Bit4 = 3, 11 12 T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ 13 Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ 14 T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ 15 T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ 16 T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ 17 18 Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */ 19 Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */ 20 Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */ 21 22 Maskx = (1<<Bitx)-1, /* 0011 1111 */ 23 Testx = Maskx ^ 0xFF, /* 1100 0000 */ 24 25 Bad = Runeerror, 26 }; 27 28 int 29 chartorune(Rune *rune, char *str) 30 { 31 int c, c1, c2; 32 long l; 33 34 /* 35 * one character sequence 36 * 00000-0007F => T1 37 */ 38 c = *(uchar*)str; 39 if(c < Tx) { 40 *rune = c; 41 return 1; 42 } 43 44 /* 45 * two character sequence 46 * 0080-07FF => T2 Tx 47 */ 48 c1 = *(uchar*)(str+1) ^ Tx; 49 if(c1 & Testx) 50 goto bad; 51 if(c < T3) { 52 if(c < T2) 53 goto bad; 54 l = ((c << Bitx) | c1) & Rune2; 55 if(l <= Rune1) 56 goto bad; 57 *rune = l; 58 return 2; 59 } 60 61 /* 62 * three character sequence 63 * 0800-FFFF => T3 Tx Tx 64 */ 65 c2 = *(uchar*)(str+2) ^ Tx; 66 if(c2 & Testx) 67 goto bad; 68 if(c < T4) { 69 l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; 70 if(l <= Rune2) 71 goto bad; 72 *rune = l; 73 return 3; 74 } 75 76 /* 77 * bad decoding 78 */ 79 bad: 80 *rune = Bad; 81 return 1; 82 } 83 84 int 85 runetochar(char *str, Rune *rune) 86 { 87 long c; 88 89 /* 90 * one character sequence 91 * 00000-0007F => 00-7F 92 */ 93 c = *rune; 94 if(c <= Rune1) { 95 str[0] = c; 96 return 1; 97 } 98 99 /* 100 * two character sequence 101 * 0080-07FF => T2 Tx 102 */ 103 if(c <= Rune2) { 104 str[0] = T2 | (c >> 1*Bitx); 105 str[1] = Tx | (c & Maskx); 106 return 2; 107 } 108 109 /* 110 * three character sequence 111 * 0800-FFFF => T3 Tx Tx 112 */ 113 str[0] = T3 | (c >> 2*Bitx); 114 str[1] = Tx | ((c >> 1*Bitx) & Maskx); 115 str[2] = Tx | (c & Maskx); 116 return 3; 117 } 118 119 int 120 runelen(long c) 121 { 122 Rune rune; 123 char str[10]; 124 125 rune = c; 126 return runetochar(str, &rune); 127 } 128 129 int 130 runenlen(Rune *r, int nrune) 131 { 132 int nb, c; 133 134 nb = 0; 135 while(nrune--) { 136 c = *r++; 137 if(c <= Rune1) 138 nb++; 139 else 140 if(c <= Rune2) 141 nb += 2; 142 else 143 nb += 3; 144 } 145 return nb; 146 } 147 148 int 149 fullrune(char *str, int n) 150 { 151 int c; 152 153 if(n > 0) { 154 c = *(uchar*)str; 155 if(c < Tx) 156 return 1; 157 if(n > 1) 158 if(c < T3 || n > 2) 159 return 1; 160 } 161 return 0; 162 } 163