18ccd4a63SDavid du Colombier #include <u.h> 28ccd4a63SDavid du Colombier #include <libc.h> 38ccd4a63SDavid du Colombier #include <bio.h> 48ccd4a63SDavid du Colombier #include "hdr.h" 58ccd4a63SDavid du Colombier #include "conv.h" 68ccd4a63SDavid du Colombier 78ccd4a63SDavid du Colombier typedef struct Hchar Hchar; 88ccd4a63SDavid du Colombier struct Hchar 98ccd4a63SDavid du Colombier { 108ccd4a63SDavid du Colombier char *s; 118ccd4a63SDavid du Colombier Rune r; 128ccd4a63SDavid du Colombier }; 138ccd4a63SDavid du Colombier 148ccd4a63SDavid du Colombier /* <, >, ", & intentionally omitted */ 158ccd4a63SDavid du Colombier 168ccd4a63SDavid du Colombier static Hchar byname[] = 178ccd4a63SDavid du Colombier { 188ccd4a63SDavid du Colombier {"AElig", 198}, 198ccd4a63SDavid du Colombier {"Aacute", 193}, 208ccd4a63SDavid du Colombier {"Acirc", 194}, 218ccd4a63SDavid du Colombier {"Agrave", 192}, 228ccd4a63SDavid du Colombier {"Aring", 197}, 238ccd4a63SDavid du Colombier {"Atilde", 195}, 248ccd4a63SDavid du Colombier {"Auml", 196}, 258ccd4a63SDavid du Colombier {"Ccedil", 199}, 268ccd4a63SDavid du Colombier {"ETH", 208}, 278ccd4a63SDavid du Colombier {"Eacute", 201}, 288ccd4a63SDavid du Colombier {"Ecirc", 202}, 298ccd4a63SDavid du Colombier {"Egrave", 200}, 308ccd4a63SDavid du Colombier {"Euml", 203}, 318ccd4a63SDavid du Colombier {"Iacute", 205}, 328ccd4a63SDavid du Colombier {"Icirc", 206}, 338ccd4a63SDavid du Colombier {"Igrave", 204}, 348ccd4a63SDavid du Colombier {"Iuml", 207}, 358ccd4a63SDavid du Colombier {"Ntilde", 209}, 368ccd4a63SDavid du Colombier {"Oacute", 211}, 378ccd4a63SDavid du Colombier {"Ocirc", 212}, 388ccd4a63SDavid du Colombier {"Ograve", 210}, 398ccd4a63SDavid du Colombier {"Oslash", 216}, 408ccd4a63SDavid du Colombier {"Otilde", 213}, 418ccd4a63SDavid du Colombier {"Ouml", 214}, 428ccd4a63SDavid du Colombier {"THORN", 222}, 438ccd4a63SDavid du Colombier {"Uacute", 218}, 448ccd4a63SDavid du Colombier {"Ucirc", 219}, 458ccd4a63SDavid du Colombier {"Ugrave", 217}, 468ccd4a63SDavid du Colombier {"Uuml", 220}, 478ccd4a63SDavid du Colombier {"Yacute", 221}, 488ccd4a63SDavid du Colombier {"aacute", 225}, 498ccd4a63SDavid du Colombier {"acirc", 226}, 508ccd4a63SDavid du Colombier {"acute", 180}, 518ccd4a63SDavid du Colombier {"aelig", 230}, 528ccd4a63SDavid du Colombier {"agrave", 224}, 538ccd4a63SDavid du Colombier {"alpha", 945}, 548ccd4a63SDavid du Colombier {"aring", 229}, 558ccd4a63SDavid du Colombier {"atilde", 227}, 568ccd4a63SDavid du Colombier {"auml", 228}, 578ccd4a63SDavid du Colombier {"beta", 946}, 588ccd4a63SDavid du Colombier {"brvbar", 166}, 598ccd4a63SDavid du Colombier {"ccedil", 231}, 608ccd4a63SDavid du Colombier {"cdots", 8943}, 618ccd4a63SDavid du Colombier {"cedil", 184}, 628ccd4a63SDavid du Colombier {"cent", 162}, 638ccd4a63SDavid du Colombier {"chi", 967}, 648ccd4a63SDavid du Colombier {"copy", 169}, 658ccd4a63SDavid du Colombier {"curren", 164}, 668ccd4a63SDavid du Colombier {"ddots", 8945}, 678ccd4a63SDavid du Colombier {"deg", 176}, 688ccd4a63SDavid du Colombier {"delta", 948}, 698ccd4a63SDavid du Colombier {"divide", 247}, 708ccd4a63SDavid du Colombier {"eacute", 233}, 718ccd4a63SDavid du Colombier {"ecirc", 234}, 728ccd4a63SDavid du Colombier {"egrave", 232}, 738ccd4a63SDavid du Colombier {"emdash", 8212}, /* non-standard but commonly used */ 748ccd4a63SDavid du Colombier {"emsp", 8195}, 758ccd4a63SDavid du Colombier {"endash", 8211}, /* non-standard but commonly used */ 768ccd4a63SDavid du Colombier {"ensp", 8194}, 778ccd4a63SDavid du Colombier {"epsilon", 949}, 788ccd4a63SDavid du Colombier {"eta", 951}, 798ccd4a63SDavid du Colombier {"eth", 240}, 808ccd4a63SDavid du Colombier {"euml", 235}, 818ccd4a63SDavid du Colombier {"frac12", 189}, 828ccd4a63SDavid du Colombier {"frac14", 188}, 838ccd4a63SDavid du Colombier {"frac34", 190}, 848ccd4a63SDavid du Colombier {"gamma", 947}, 858ccd4a63SDavid du Colombier {"iacute", 237}, 868ccd4a63SDavid du Colombier {"icirc", 238}, 878ccd4a63SDavid du Colombier {"iexcl", 161}, 888ccd4a63SDavid du Colombier {"igrave", 236}, 898ccd4a63SDavid du Colombier {"iota", 953}, 908ccd4a63SDavid du Colombier {"iquest", 191}, 918ccd4a63SDavid du Colombier {"iuml", 239}, 928ccd4a63SDavid du Colombier {"kappa", 954}, 938ccd4a63SDavid du Colombier {"lambda", 955}, 948ccd4a63SDavid du Colombier {"laquo", 171}, 958ccd4a63SDavid du Colombier {"ldquo", 8220}, 968ccd4a63SDavid du Colombier {"ldots", 8230}, 978ccd4a63SDavid du Colombier {"lsquo", 8216}, 988ccd4a63SDavid du Colombier {"macr", 175}, 998ccd4a63SDavid du Colombier {"mdash", 8212}, 1008ccd4a63SDavid du Colombier {"micro", 181}, 1018ccd4a63SDavid du Colombier {"middot", 183}, 1028ccd4a63SDavid du Colombier {"mu", 956}, 1038ccd4a63SDavid du Colombier {"nbsp", 160}, 1048ccd4a63SDavid du Colombier {"ndash", 8211}, 1058ccd4a63SDavid du Colombier {"not", 172}, 1068ccd4a63SDavid du Colombier {"ntilde", 241}, 1078ccd4a63SDavid du Colombier {"nu", 957}, 1088ccd4a63SDavid du Colombier {"oacute", 243}, 1098ccd4a63SDavid du Colombier {"ocirc", 244}, 1108ccd4a63SDavid du Colombier {"ograve", 242}, 1118ccd4a63SDavid du Colombier {"omega", 969}, 1128ccd4a63SDavid du Colombier {"omicron", 959}, 1138ccd4a63SDavid du Colombier {"ordf", 170}, 1148ccd4a63SDavid du Colombier {"ordm", 186}, 1158ccd4a63SDavid du Colombier {"oslash", 248}, 1168ccd4a63SDavid du Colombier {"otilde", 245}, 1178ccd4a63SDavid du Colombier {"ouml", 246}, 1188ccd4a63SDavid du Colombier {"para", 182}, 1198ccd4a63SDavid du Colombier {"phi", 966}, 1208ccd4a63SDavid du Colombier {"pi", 960}, 1218ccd4a63SDavid du Colombier {"plusmn", 177}, 1228ccd4a63SDavid du Colombier {"pound", 163}, 1238ccd4a63SDavid du Colombier {"psi", 968}, 1248ccd4a63SDavid du Colombier {"quad", 8193}, 1258ccd4a63SDavid du Colombier {"raquo", 187}, 1268ccd4a63SDavid du Colombier {"rdquo", 8221}, 1278ccd4a63SDavid du Colombier {"reg", 174}, 1288ccd4a63SDavid du Colombier {"rho", 961}, 1298ccd4a63SDavid du Colombier {"rsquo", 8217}, 1308ccd4a63SDavid du Colombier {"sect", 167}, 1318ccd4a63SDavid du Colombier {"shy", 173}, 1328ccd4a63SDavid du Colombier {"sigma", 963}, 1338ccd4a63SDavid du Colombier {"sp", 8194}, 1348ccd4a63SDavid du Colombier {"sup1", 185}, 1358ccd4a63SDavid du Colombier {"sup2", 178}, 1368ccd4a63SDavid du Colombier {"sup3", 179}, 1378ccd4a63SDavid du Colombier {"szlig", 223}, 1388ccd4a63SDavid du Colombier {"tau", 964}, 1398ccd4a63SDavid du Colombier {"theta", 952}, 1408ccd4a63SDavid du Colombier {"thinsp", 8201}, 1418ccd4a63SDavid du Colombier {"thorn", 254}, 1428ccd4a63SDavid du Colombier {"times", 215}, 1438ccd4a63SDavid du Colombier {"trade", 8482}, 1448ccd4a63SDavid du Colombier {"uacute", 250}, 1458ccd4a63SDavid du Colombier {"ucirc", 251}, 1468ccd4a63SDavid du Colombier {"ugrave", 249}, 1478ccd4a63SDavid du Colombier {"uml", 168}, 1488ccd4a63SDavid du Colombier {"upsilon", 965}, 1498ccd4a63SDavid du Colombier {"uuml", 252}, 1508ccd4a63SDavid du Colombier {"varepsilon", 8712}, 1518ccd4a63SDavid du Colombier {"varphi", 981}, 1528ccd4a63SDavid du Colombier {"varpi", 982}, 1538ccd4a63SDavid du Colombier {"varrho", 1009}, 1548ccd4a63SDavid du Colombier {"vdots", 8942}, 1558ccd4a63SDavid du Colombier {"vsigma", 962}, 1568ccd4a63SDavid du Colombier {"vtheta", 977}, 1578ccd4a63SDavid du Colombier {"xi", 958}, 1588ccd4a63SDavid du Colombier {"yacute", 253}, 1598ccd4a63SDavid du Colombier {"yen", 165}, 1608ccd4a63SDavid du Colombier {"yuml", 255}, 1618ccd4a63SDavid du Colombier {"zeta", 950} 1628ccd4a63SDavid du Colombier }; 1638ccd4a63SDavid du Colombier 1648ccd4a63SDavid du Colombier static Hchar byrune[nelem(byname)]; 1658ccd4a63SDavid du Colombier 1668ccd4a63SDavid du Colombier static int 1678ccd4a63SDavid du Colombier hnamecmp(const void *va, const void *vb) 1688ccd4a63SDavid du Colombier { 1698ccd4a63SDavid du Colombier Hchar *a, *b; 1708ccd4a63SDavid du Colombier 1718ccd4a63SDavid du Colombier a = (Hchar*)va; 1728ccd4a63SDavid du Colombier b = (Hchar*)vb; 1738ccd4a63SDavid du Colombier return strcmp(a->s, b->s); 1748ccd4a63SDavid du Colombier } 1758ccd4a63SDavid du Colombier 1768ccd4a63SDavid du Colombier static int 1778ccd4a63SDavid du Colombier hrunecmp(const void *va, const void *vb) 1788ccd4a63SDavid du Colombier { 1798ccd4a63SDavid du Colombier Hchar *a, *b; 1808ccd4a63SDavid du Colombier 1818ccd4a63SDavid du Colombier a = (Hchar*)va; 1828ccd4a63SDavid du Colombier b = (Hchar*)vb; 1838ccd4a63SDavid du Colombier return a->r - b->r; 1848ccd4a63SDavid du Colombier } 1858ccd4a63SDavid du Colombier 1868ccd4a63SDavid du Colombier static void 1878ccd4a63SDavid du Colombier html_init(void) 1888ccd4a63SDavid du Colombier { 1898ccd4a63SDavid du Colombier static int init; 1908ccd4a63SDavid du Colombier 1918ccd4a63SDavid du Colombier if(init) 1928ccd4a63SDavid du Colombier return; 1938ccd4a63SDavid du Colombier init = 1; 1948ccd4a63SDavid du Colombier memmove(byrune, byname, sizeof byrune); 1958ccd4a63SDavid du Colombier qsort(byname, nelem(byname), sizeof byname[0], hnamecmp); 1968ccd4a63SDavid du Colombier qsort(byrune, nelem(byrune), sizeof byrune[0], hrunecmp); 1978ccd4a63SDavid du Colombier } 1988ccd4a63SDavid du Colombier 1998ccd4a63SDavid du Colombier static Rune 2008ccd4a63SDavid du Colombier findbyname(char *s) 2018ccd4a63SDavid du Colombier { 2028ccd4a63SDavid du Colombier Hchar *h; 2038ccd4a63SDavid du Colombier int n, m, x; 2048ccd4a63SDavid du Colombier 2058ccd4a63SDavid du Colombier h = byname; 2068ccd4a63SDavid du Colombier n = nelem(byname); 2078ccd4a63SDavid du Colombier while(n > 0){ 2088ccd4a63SDavid du Colombier m = n/2; 2098ccd4a63SDavid du Colombier x = strcmp(h[m].s, s); 2108ccd4a63SDavid du Colombier if(x == 0) 2118ccd4a63SDavid du Colombier return h[m].r; 2128ccd4a63SDavid du Colombier if(x < 0){ 2138ccd4a63SDavid du Colombier h += m+1; 2148ccd4a63SDavid du Colombier n -= m+1; 2158ccd4a63SDavid du Colombier }else 2168ccd4a63SDavid du Colombier n = m; 2178ccd4a63SDavid du Colombier } 2188ccd4a63SDavid du Colombier return Runeerror; 2198ccd4a63SDavid du Colombier } 2208ccd4a63SDavid du Colombier 2218ccd4a63SDavid du Colombier static char* 2228ccd4a63SDavid du Colombier findbyrune(Rune r) 2238ccd4a63SDavid du Colombier { 2248ccd4a63SDavid du Colombier Hchar *h; 2258ccd4a63SDavid du Colombier int n, m; 2268ccd4a63SDavid du Colombier 2278ccd4a63SDavid du Colombier h = byrune; 2288ccd4a63SDavid du Colombier n = nelem(byrune); 2298ccd4a63SDavid du Colombier while(n > 0){ 2308ccd4a63SDavid du Colombier m = n/2; 2318ccd4a63SDavid du Colombier if(h[m].r == r) 2328ccd4a63SDavid du Colombier return h[m].s; 2338ccd4a63SDavid du Colombier if(h[m].r < r){ 2348ccd4a63SDavid du Colombier h += m+1; 2358ccd4a63SDavid du Colombier n -= m+1; 2368ccd4a63SDavid du Colombier }else 2378ccd4a63SDavid du Colombier n = m; 2388ccd4a63SDavid du Colombier } 2398ccd4a63SDavid du Colombier return nil; 2408ccd4a63SDavid du Colombier } 2418ccd4a63SDavid du Colombier 2428ccd4a63SDavid du Colombier void 2438ccd4a63SDavid du Colombier html_in(int fd, long *x, struct convert *out) 2448ccd4a63SDavid du Colombier { 2458ccd4a63SDavid du Colombier char buf[100], *p; 2468ccd4a63SDavid du Colombier Biobuf b; 2478ccd4a63SDavid du Colombier Rune rbuf[N]; 2488ccd4a63SDavid du Colombier Rune *r, *er; 2498ccd4a63SDavid du Colombier int c, i; 2508ccd4a63SDavid du Colombier 2518ccd4a63SDavid du Colombier USED(x); 2528ccd4a63SDavid du Colombier 2538ccd4a63SDavid du Colombier html_init(); 2548ccd4a63SDavid du Colombier r = rbuf; 2558ccd4a63SDavid du Colombier er = rbuf+N; 2568ccd4a63SDavid du Colombier Binit(&b, fd, OREAD); 2578ccd4a63SDavid du Colombier while((c = Bgetrune(&b)) != Beof){ 2588ccd4a63SDavid du Colombier if(r >= er){ 2598ccd4a63SDavid du Colombier OUT(out, rbuf, r-rbuf); 2608ccd4a63SDavid du Colombier r = rbuf; 2618ccd4a63SDavid du Colombier } 2628ccd4a63SDavid du Colombier if(c == '&'){ 2638ccd4a63SDavid du Colombier buf[0] = c; 2648ccd4a63SDavid du Colombier for(i=1; i<nelem(buf)-1;){ 2658ccd4a63SDavid du Colombier c = Bgetc(&b); 2668ccd4a63SDavid du Colombier if(c == Beof) 2678ccd4a63SDavid du Colombier break; 2688ccd4a63SDavid du Colombier buf[i++] = c; 2698ccd4a63SDavid du Colombier if(strchr("; \t\r\n", c)) 2708ccd4a63SDavid du Colombier break; 2718ccd4a63SDavid du Colombier } 2728ccd4a63SDavid du Colombier buf[i] = 0; 2738ccd4a63SDavid du Colombier if(buf[i-1] == ';'){ 2748ccd4a63SDavid du Colombier buf[i-1] = 0; 2758ccd4a63SDavid du Colombier if((c = findbyname(buf+1)) != Runeerror){ 2768ccd4a63SDavid du Colombier *r++ = c; 2778ccd4a63SDavid du Colombier continue; 2788ccd4a63SDavid du Colombier } 2798ccd4a63SDavid du Colombier buf[i-1] = ';'; 2808ccd4a63SDavid du Colombier if(buf[1] == '#'){ 2818ccd4a63SDavid du Colombier if(buf[2] == 'x') 2828ccd4a63SDavid du Colombier c = strtol(buf+3, &p, 16); 2838ccd4a63SDavid du Colombier else 2848ccd4a63SDavid du Colombier c = strtol(buf+2, &p, 10); 2858ccd4a63SDavid du Colombier if(*p != ';' || c >= NRUNE || c < 0) 2868ccd4a63SDavid du Colombier goto bad; 2878ccd4a63SDavid du Colombier *r++ = c; 2888ccd4a63SDavid du Colombier continue; 2898ccd4a63SDavid du Colombier } 2908ccd4a63SDavid du Colombier } 2918ccd4a63SDavid du Colombier bad: 2928ccd4a63SDavid du Colombier for(p=buf; p<buf+i; ){ 2938ccd4a63SDavid du Colombier p += chartorune(r++, p); 2948ccd4a63SDavid du Colombier if(r >= er){ 2958ccd4a63SDavid du Colombier OUT(out, rbuf, r-rbuf); 2968ccd4a63SDavid du Colombier r = rbuf; 2978ccd4a63SDavid du Colombier } 2988ccd4a63SDavid du Colombier } 2998ccd4a63SDavid du Colombier continue; 3008ccd4a63SDavid du Colombier } 3018ccd4a63SDavid du Colombier *r++ = c; 3028ccd4a63SDavid du Colombier } 3038ccd4a63SDavid du Colombier if(r > rbuf) 3048ccd4a63SDavid du Colombier OUT(out, rbuf, r-rbuf); 3058ccd4a63SDavid du Colombier } 3068ccd4a63SDavid du Colombier 3078ccd4a63SDavid du Colombier /* 3088ccd4a63SDavid du Colombier * use biobuf because can use more than UTFmax bytes per rune 3098ccd4a63SDavid du Colombier */ 3108ccd4a63SDavid du Colombier void 3118ccd4a63SDavid du Colombier html_out(Rune *r, int n, long *x) 3128ccd4a63SDavid du Colombier { 3138ccd4a63SDavid du Colombier char *s; 3148ccd4a63SDavid du Colombier Biobuf b; 3158ccd4a63SDavid du Colombier Rune *er; 3168ccd4a63SDavid du Colombier 317*1fa40b8eSDavid du Colombier USED(x); 3188ccd4a63SDavid du Colombier html_init(); 3198ccd4a63SDavid du Colombier Binit(&b, 1, OWRITE); 3208ccd4a63SDavid du Colombier er = r+n; 3218ccd4a63SDavid du Colombier for(; r<er; r++){ 3228ccd4a63SDavid du Colombier if(*r < Runeself) 3238ccd4a63SDavid du Colombier Bputrune(&b, *r); 3248ccd4a63SDavid du Colombier else if((s = findbyrune(*r)) != nil) 3258ccd4a63SDavid du Colombier Bprint(&b, "&%s;", s); 3268ccd4a63SDavid du Colombier else 3278ccd4a63SDavid du Colombier Bprint(&b, "&#x%04x;", *r); 3288ccd4a63SDavid du Colombier } 3298ccd4a63SDavid du Colombier Bflush(&b); 3308ccd4a63SDavid du Colombier } 3318ccd4a63SDavid du Colombier 332