1 /* 2 * The authors of this software are Rob Pike and Ken Thompson. 3 * Copyright (c) 2002 by Lucent Technologies. 4 * Permission to use, copy, modify, and distribute this software for any 5 * purpose without fee is hereby granted, provided that this entire notice 6 * is included in all copies of any software which is or includes a copy 7 * or modification of this software and in all copies of the supporting 8 * documentation for such software. 9 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED 10 * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY 11 * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY 12 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. 13 */ 14 #include <stdarg.h> 15 #include <string.h> 16 #include "utf.h" 17 #include "utfdef.h" 18 19 enum 20 { 21 Bit1 = 7, 22 Bitx = 6, 23 Bit2 = 5, 24 Bit3 = 4, 25 Bit4 = 3, 26 27 T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ 28 Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ 29 T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ 30 T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ 31 T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ 32 33 Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */ 34 Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */ 35 Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */ 36 37 Maskx = (1<<Bitx)-1, /* 0011 1111 */ 38 Testx = Maskx ^ 0xFF, /* 1100 0000 */ 39 40 Bad = Runeerror, 41 }; 42 43 int 44 chartorune(Rune *rune, char *str) 45 { 46 int c, c1, c2; 47 long l; 48 49 /* 50 * one character sequence 51 * 00000-0007F => T1 52 */ 53 c = *(uchar*)str; 54 if(c < Tx) { 55 *rune = c; 56 return 1; 57 } 58 59 /* 60 * two character sequence 61 * 0080-07FF => T2 Tx 62 */ 63 c1 = *(uchar*)(str+1) ^ Tx; 64 if(c1 & Testx) 65 goto bad; 66 if(c < T3) { 67 if(c < T2) 68 goto bad; 69 l = ((c << Bitx) | c1) & Rune2; 70 if(l <= Rune1) 71 goto bad; 72 *rune = l; 73 return 2; 74 } 75 76 /* 77 * three character sequence 78 * 0800-FFFF => T3 Tx Tx 79 */ 80 c2 = *(uchar*)(str+2) ^ Tx; 81 if(c2 & Testx) 82 goto bad; 83 if(c < T4) { 84 l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; 85 if(l <= Rune2) 86 goto bad; 87 *rune = l; 88 return 3; 89 } 90 91 /* 92 * bad decoding 93 */ 94 bad: 95 *rune = Bad; 96 return 1; 97 } 98 99 int 100 runetochar(char *str, Rune *rune) 101 { 102 long c; 103 104 /* 105 * one character sequence 106 * 00000-0007F => 00-7F 107 */ 108 c = *rune; 109 if(c <= Rune1) { 110 str[0] = c; 111 return 1; 112 } 113 114 /* 115 * two character sequence 116 * 0080-07FF => T2 Tx 117 */ 118 if(c <= Rune2) { 119 str[0] = T2 | (c >> 1*Bitx); 120 str[1] = Tx | (c & Maskx); 121 return 2; 122 } 123 124 /* 125 * three character sequence 126 * 0800-FFFF => T3 Tx Tx 127 */ 128 str[0] = T3 | (c >> 2*Bitx); 129 str[1] = Tx | ((c >> 1*Bitx) & Maskx); 130 str[2] = Tx | (c & Maskx); 131 return 3; 132 } 133 134 int 135 runelen(long c) 136 { 137 Rune rune; 138 char str[10]; 139 140 rune = c; 141 return runetochar(str, &rune); 142 } 143 144 int 145 runenlen(Rune *r, int nrune) 146 { 147 int nb, c; 148 149 nb = 0; 150 while(nrune--) { 151 c = *r++; 152 if(c <= Rune1) 153 nb++; 154 else 155 if(c <= Rune2) 156 nb += 2; 157 else 158 nb += 3; 159 } 160 return nb; 161 } 162 163 int 164 fullrune(char *str, int n) 165 { 166 int c; 167 168 if(n > 0) { 169 c = *(uchar*)str; 170 if(c < Tx) 171 return 1; 172 if(n > 1) 173 if(c < T3 || n > 2) 174 return 1; 175 } 176 return 0; 177 } 178