18ccd4a63SDavid du Colombier #include <u.h> 28ccd4a63SDavid du Colombier #include <libc.h> 38ccd4a63SDavid du Colombier 48ccd4a63SDavid du Colombier enum 58ccd4a63SDavid du Colombier { 68ccd4a63SDavid du Colombier Bit1 = 7, 78ccd4a63SDavid du Colombier Bitx = 6, 88ccd4a63SDavid du Colombier Bit2 = 5, 98ccd4a63SDavid du Colombier Bit3 = 4, 108ccd4a63SDavid du Colombier Bit4 = 3, 118ccd4a63SDavid du Colombier 128ccd4a63SDavid du Colombier T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ 138ccd4a63SDavid du Colombier Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ 148ccd4a63SDavid du Colombier T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ 158ccd4a63SDavid du Colombier T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ 168ccd4a63SDavid du Colombier T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ 17*82726826SDavid du Colombier T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */ 188ccd4a63SDavid du Colombier 19*82726826SDavid du Colombier Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */ 20*82726826SDavid du Colombier Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */ 21*82726826SDavid du Colombier Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */ 22*82726826SDavid du Colombier Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0001 1111 1111 1111 1111 1111 */ 238ccd4a63SDavid du Colombier 248ccd4a63SDavid du Colombier Maskx = (1<<Bitx)-1, /* 0011 1111 */ 258ccd4a63SDavid du Colombier Testx = Maskx ^ 0xFF, /* 1100 0000 */ 268ccd4a63SDavid du Colombier 27*82726826SDavid du Colombier SurrogateMin = 0xD800, 28*82726826SDavid du Colombier SurrogateMax = 0xDFFF, 29*82726826SDavid du Colombier 308ccd4a63SDavid du Colombier Bad = Runeerror, 318ccd4a63SDavid du Colombier }; 328ccd4a63SDavid du Colombier 338ccd4a63SDavid du Colombier int 348ccd4a63SDavid du Colombier chartorune(Rune *rune, char *str) 358ccd4a63SDavid du Colombier { 36*82726826SDavid du Colombier int c, c1, c2, c3; 378ccd4a63SDavid du Colombier long l; 388ccd4a63SDavid du Colombier 398ccd4a63SDavid du Colombier /* 408ccd4a63SDavid du Colombier * one character sequence 418ccd4a63SDavid du Colombier * 00000-0007F => T1 428ccd4a63SDavid du Colombier */ 438ccd4a63SDavid du Colombier c = *(uchar*)str; 448ccd4a63SDavid du Colombier if(c < Tx) { 458ccd4a63SDavid du Colombier *rune = c; 468ccd4a63SDavid du Colombier return 1; 478ccd4a63SDavid du Colombier } 488ccd4a63SDavid du Colombier 498ccd4a63SDavid du Colombier /* 508ccd4a63SDavid du Colombier * two character sequence 51*82726826SDavid du Colombier * 00080-007FF => T2 Tx 528ccd4a63SDavid du Colombier */ 538ccd4a63SDavid du Colombier c1 = *(uchar*)(str+1) ^ Tx; 548ccd4a63SDavid du Colombier if(c1 & Testx) 558ccd4a63SDavid du Colombier goto bad; 568ccd4a63SDavid du Colombier if(c < T3) { 578ccd4a63SDavid du Colombier if(c < T2) 588ccd4a63SDavid du Colombier goto bad; 598ccd4a63SDavid du Colombier l = ((c << Bitx) | c1) & Rune2; 608ccd4a63SDavid du Colombier if(l <= Rune1) 618ccd4a63SDavid du Colombier goto bad; 628ccd4a63SDavid du Colombier *rune = l; 638ccd4a63SDavid du Colombier return 2; 648ccd4a63SDavid du Colombier } 658ccd4a63SDavid du Colombier 668ccd4a63SDavid du Colombier /* 678ccd4a63SDavid du Colombier * three character sequence 68*82726826SDavid du Colombier * 00800-0FFFF => T3 Tx Tx 698ccd4a63SDavid du Colombier */ 708ccd4a63SDavid du Colombier c2 = *(uchar*)(str+2) ^ Tx; 71*82726826SDavid du Colombier 728ccd4a63SDavid du Colombier if(c2 & Testx) 738ccd4a63SDavid du Colombier goto bad; 748ccd4a63SDavid du Colombier if(c < T4) { 758ccd4a63SDavid du Colombier l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; 768ccd4a63SDavid du Colombier if(l <= Rune2) 778ccd4a63SDavid du Colombier goto bad; 78*82726826SDavid du Colombier if (SurrogateMin <= l && l <= SurrogateMax) 79*82726826SDavid du Colombier goto bad; 808ccd4a63SDavid du Colombier *rune = l; 818ccd4a63SDavid du Colombier return 3; 828ccd4a63SDavid du Colombier } 838ccd4a63SDavid du Colombier 848ccd4a63SDavid du Colombier /* 85*82726826SDavid du Colombier * four character sequence 86*82726826SDavid du Colombier * 10000-10FFFF => T4 Tx Tx Tx 87*82726826SDavid du Colombier */ 88*82726826SDavid du Colombier if(UTFmax >= 4) { 89*82726826SDavid du Colombier c3 = *(uchar*)(str+3) ^ Tx; 90*82726826SDavid du Colombier if(c3 & Testx) 91*82726826SDavid du Colombier goto bad; 92*82726826SDavid du Colombier if(c < T5) { 93*82726826SDavid du Colombier l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; 94*82726826SDavid du Colombier if(l <= Rune3) 95*82726826SDavid du Colombier goto bad; 96*82726826SDavid du Colombier if(l > Runemax) 97*82726826SDavid du Colombier goto bad; 98*82726826SDavid du Colombier *rune = l; 99*82726826SDavid du Colombier return 4; 100*82726826SDavid du Colombier } 101*82726826SDavid du Colombier } 102*82726826SDavid du Colombier 103*82726826SDavid du Colombier /* 1048ccd4a63SDavid du Colombier * bad decoding 1058ccd4a63SDavid du Colombier */ 1068ccd4a63SDavid du Colombier bad: 1078ccd4a63SDavid du Colombier *rune = Bad; 1088ccd4a63SDavid du Colombier return 1; 1098ccd4a63SDavid du Colombier } 1108ccd4a63SDavid du Colombier 1118ccd4a63SDavid du Colombier int 1128ccd4a63SDavid du Colombier runetochar(char *str, Rune *rune) 1138ccd4a63SDavid du Colombier { 1148ccd4a63SDavid du Colombier long c; 1158ccd4a63SDavid du Colombier 1168ccd4a63SDavid du Colombier /* 1178ccd4a63SDavid du Colombier * one character sequence 1188ccd4a63SDavid du Colombier * 00000-0007F => 00-7F 1198ccd4a63SDavid du Colombier */ 1208ccd4a63SDavid du Colombier c = *rune; 1218ccd4a63SDavid du Colombier if(c <= Rune1) { 1228ccd4a63SDavid du Colombier str[0] = c; 1238ccd4a63SDavid du Colombier return 1; 1248ccd4a63SDavid du Colombier } 1258ccd4a63SDavid du Colombier 1268ccd4a63SDavid du Colombier /* 1278ccd4a63SDavid du Colombier * two character sequence 1288ccd4a63SDavid du Colombier * 0080-07FF => T2 Tx 1298ccd4a63SDavid du Colombier */ 1308ccd4a63SDavid du Colombier if(c <= Rune2) { 1318ccd4a63SDavid du Colombier str[0] = T2 | (c >> 1*Bitx); 1328ccd4a63SDavid du Colombier str[1] = Tx | (c & Maskx); 1338ccd4a63SDavid du Colombier return 2; 1348ccd4a63SDavid du Colombier } 135*82726826SDavid du Colombier /* 136*82726826SDavid du Colombier * If the Rune is out of range or a surrogate half, convert it to the error rune. 137*82726826SDavid du Colombier * Do this test here because the error rune encodes to three bytes. 138*82726826SDavid du Colombier * Doing it earlier would duplicate work, since an out of range 139*82726826SDavid du Colombier * Rune wouldn't have fit in one or two bytes. 140*82726826SDavid du Colombier */ 141*82726826SDavid du Colombier if (c > Runemax) 142*82726826SDavid du Colombier c = Runeerror; 143*82726826SDavid du Colombier if (SurrogateMin <= c && c <= SurrogateMax) 144*82726826SDavid du Colombier c = Runeerror; 1458ccd4a63SDavid du Colombier 1468ccd4a63SDavid du Colombier /* 1478ccd4a63SDavid du Colombier * three character sequence 1488ccd4a63SDavid du Colombier * 0800-FFFF => T3 Tx Tx 1498ccd4a63SDavid du Colombier */ 150*82726826SDavid du Colombier if (c <= Rune3) { 1518ccd4a63SDavid du Colombier str[0] = T3 | (c >> 2*Bitx); 1528ccd4a63SDavid du Colombier str[1] = Tx | ((c >> 1*Bitx) & Maskx); 1538ccd4a63SDavid du Colombier str[2] = Tx | (c & Maskx); 1548ccd4a63SDavid du Colombier return 3; 1558ccd4a63SDavid du Colombier } 1568ccd4a63SDavid du Colombier 157*82726826SDavid du Colombier /* 158*82726826SDavid du Colombier * four character sequence (21-bit value) 159*82726826SDavid du Colombier * 10000-1FFFFF => T4 Tx Tx Tx 160*82726826SDavid du Colombier */ 161*82726826SDavid du Colombier str[0] = T4 | (c >> 3*Bitx); 162*82726826SDavid du Colombier str[1] = Tx | ((c >> 2*Bitx) & Maskx); 163*82726826SDavid du Colombier str[2] = Tx | ((c >> 1*Bitx) & Maskx); 164*82726826SDavid du Colombier str[3] = Tx | (c & Maskx); 165*82726826SDavid du Colombier return 4; 166*82726826SDavid du Colombier } 167*82726826SDavid du Colombier 1688ccd4a63SDavid du Colombier int 1698ccd4a63SDavid du Colombier runelen(long c) 1708ccd4a63SDavid du Colombier { 1718ccd4a63SDavid du Colombier Rune rune; 1728ccd4a63SDavid du Colombier char str[10]; 1738ccd4a63SDavid du Colombier 1748ccd4a63SDavid du Colombier rune = c; 1758ccd4a63SDavid du Colombier return runetochar(str, &rune); 1768ccd4a63SDavid du Colombier } 1778ccd4a63SDavid du Colombier 1788ccd4a63SDavid du Colombier int 1798ccd4a63SDavid du Colombier runenlen(Rune *r, int nrune) 1808ccd4a63SDavid du Colombier { 1818ccd4a63SDavid du Colombier int nb, c; 1828ccd4a63SDavid du Colombier 1838ccd4a63SDavid du Colombier nb = 0; 1848ccd4a63SDavid du Colombier while(nrune--) { 1858ccd4a63SDavid du Colombier c = *r++; 1868ccd4a63SDavid du Colombier if(c <= Rune1) 1878ccd4a63SDavid du Colombier nb++; 188*82726826SDavid du Colombier else if(c <= Rune2) 1898ccd4a63SDavid du Colombier nb += 2; 190*82726826SDavid du Colombier else if(c <= Rune3) 1918ccd4a63SDavid du Colombier nb += 3; 192*82726826SDavid du Colombier else 193*82726826SDavid du Colombier nb += 4; 1948ccd4a63SDavid du Colombier } 1958ccd4a63SDavid du Colombier return nb; 1968ccd4a63SDavid du Colombier } 1978ccd4a63SDavid du Colombier 1988ccd4a63SDavid du Colombier int 1998ccd4a63SDavid du Colombier fullrune(char *str, int n) 2008ccd4a63SDavid du Colombier { 2018ccd4a63SDavid du Colombier int c; 202*82726826SDavid du Colombier if(n <= 0) 203*82726826SDavid du Colombier return 0; 2048ccd4a63SDavid du Colombier c = *(uchar*)str; 2058ccd4a63SDavid du Colombier if(c < Tx) 2068ccd4a63SDavid du Colombier return 1; 207*82726826SDavid du Colombier if(c < T3) 208*82726826SDavid du Colombier return n >= 2; 209*82726826SDavid du Colombier if(c < T4) 210*82726826SDavid du Colombier return n >= 3; 211*82726826SDavid du Colombier return n >= 4; 2128ccd4a63SDavid du Colombier } 213