1 #include <u.h> 2 #include <libc.h> 3 #include <ctype.h> 4 #include <bio.h> 5 6 enum 7 { 8 SSIZE = 10, 9 10 /* list types */ 11 Lordered = 0, 12 Lunordered, 13 Lmenu, 14 Ldir, 15 16 }; 17 18 Biobuf in, out; 19 int lastc = '\n'; 20 int inpre = 0; 21 22 /* stack for fonts */ 23 char *fontstack[SSIZE]; 24 char *font = "R"; 25 int fsp; 26 27 /* stack for lists */ 28 struct 29 { 30 int type; 31 int ord; 32 } liststack[SSIZE]; 33 int lsp; 34 35 int quoting; 36 37 typedef struct Goobie Goobie; 38 struct Goobie 39 { 40 char *name; 41 void (*f)(Goobie*, char*); 42 void (*ef)(Goobie*, char*); 43 }; 44 45 void eatwhite(void); 46 void escape(void); 47 48 typedef void Action(Goobie*, char*); 49 50 Action g_ignore; 51 Action g_unexpected; 52 Action g_title; 53 Action g_p; 54 Action g_h; 55 Action g_li; 56 Action g_list, g_listend; 57 Action g_pre; 58 Action g_fpush, g_fpop; 59 Action g_indent, g_exdent; 60 Action g_dt; 61 Action g_display; 62 Action g_displayend; 63 Action g_table, g_tableend, g_caption, g_captionend; 64 Action g_br, g_hr; 65 66 Goobie gtab[] = 67 { 68 "!--", g_ignore, g_unexpected, 69 "!doctype", g_ignore, g_unexpected, 70 "a", g_ignore, g_ignore, 71 "address", g_display, g_displayend, 72 "b", g_fpush, g_fpop, 73 "base", g_ignore, g_unexpected, 74 "blink", g_ignore, g_ignore, 75 "blockquote", g_ignore, g_ignore, 76 "body", g_ignore, g_ignore, 77 "br", g_br, g_unexpected, 78 "caption", g_caption, g_captionend, 79 "center", g_ignore, g_ignore, 80 "cite", g_ignore, g_ignore, 81 "code", g_ignore, g_ignore, 82 "dd", g_ignore, g_unexpected, 83 "dfn", g_ignore, g_ignore, 84 "dir", g_list, g_listend, 85 "dl", g_indent, g_exdent, 86 "dt", g_dt, g_unexpected, 87 "em", g_ignore, g_ignore, 88 "font", g_ignore, g_ignore, 89 "form", g_ignore, g_ignore, 90 "h1", g_h, g_p, 91 "h2", g_h, g_p, 92 "h3", g_h, g_p, 93 "h4", g_h, g_p, 94 "h5", g_h, g_p, 95 "h6", g_h, g_p, 96 "head", g_ignore, g_ignore, 97 "hr", g_hr, g_unexpected, 98 "html", g_ignore, g_ignore, 99 "i", g_fpush, g_fpop, 100 "input", g_ignore, g_unexpected, 101 "img", g_ignore, g_unexpected, 102 "isindex", g_ignore, g_unexpected, 103 "kbd", g_fpush, g_fpop, 104 "key", g_ignore, g_ignore, 105 "li", g_li, g_unexpected, 106 "link", g_ignore, g_unexpected, 107 "listing", g_ignore, g_ignore, 108 "menu", g_list, g_listend, 109 "meta", g_ignore, g_unexpected, 110 "nextid", g_ignore, g_unexpected, 111 "ol", g_list, g_listend, 112 "option", g_ignore, g_unexpected, 113 "p", g_p, g_ignore, 114 "plaintext", g_ignore, g_unexpected, 115 "pre", g_pre, g_displayend, 116 "samp", g_ignore, g_ignore, 117 "select", g_ignore, g_ignore, 118 "strong", g_ignore, g_ignore, 119 "table", g_table, g_tableend, 120 "textarea", g_ignore, g_ignore, 121 "title", g_title, g_ignore, 122 "tt", g_fpush, g_fpop, 123 "u", g_ignore, g_ignore, 124 "ul", g_list, g_listend, 125 "var", g_ignore, g_ignore, 126 "xmp", g_ignore, g_ignore, 127 0, 0, 0, 128 }; 129 130 typedef struct Entity Entity; 131 struct Entity 132 { 133 char *name; 134 Rune value; 135 }; 136 137 Entity pl_entity[]= 138 { 139 "#SPACE", L' ', "#RS", L'\n', "#RE", L'\r', "quot", L'"', 140 "AElig", L'Æ', "Aacute", L'Á', "Acirc", L'Â', "Agrave", L'À', "Aring", L'Å', 141 "Atilde", L'Ã', "Auml", L'Ä', "Ccedil", L'Ç', "ETH", L'Ð', "Eacute", L'É', 142 "Ecirc", L'Ê', "Egrave", L'È', "Euml", L'Ë', "Iacute", L'Í', "Icirc", L'Î', 143 "Igrave", L'Ì', "Iuml", L'Ï', "Ntilde", L'Ñ', "Oacute", L'Ó', "Ocirc", L'Ô', 144 "Ograve", L'Ò', "Oslash", L'Ø', "Otilde", L'Õ', "Ouml", L'Ö', "THORN", L'Þ', 145 "Uacute", L'Ú', "Ucirc", L'Û', "Ugrave", L'Ù', "Uuml", L'Ü', "Yacute", L'Ý', 146 "aacute", L'á', "acirc", L'â', "aelig", L'æ', "agrave", L'à', "amp", L'&', 147 "aring", L'å', "atilde", L'ã', "auml", L'ä', "ccedil", L'ç', "eacute", L'é', 148 "ecirc", L'ê', "egrave", L'è', "eth", L'ð', "euml", L'ë', "gt", L'>', 149 "iacute", L'í', "icirc", L'î', "igrave", L'ì', "iuml", L'ï', "lt", L'<', 150 "ntilde", L'ñ', "oacute", L'ó', "ocirc", L'ô', "ograve", L'ò', "oslash", L'ø', 151 "otilde", L'õ', "ouml", L'ö', "szlig", L'ß', "thorn", L'þ', "uacute", L'ú', 152 "ucirc", L'û', "ugrave", L'ù', "uuml", L'ü', "yacute", L'ý', "yuml", L'ÿ', 153 0 154 }; 155 156 int 157 cistrcmp(char *a, char *b) 158 { 159 int c, d; 160 161 for(;; a++, b++){ 162 d = tolower(*a); 163 c = d - tolower(*b); 164 if(c) 165 break; 166 if(d == 0) 167 break; 168 } 169 return c; 170 } 171 172 int 173 readupto(char *buf, int n, char d, char notme) 174 { 175 char *p; 176 int c; 177 178 buf[0] = 0; 179 for(p = buf;; p++){ 180 c = Bgetc(&in); 181 if(c < 0){ 182 *p = 0; 183 return -1; 184 } 185 if(c == notme){ 186 Bungetc(&in); 187 return -1; 188 } 189 if(c == d){ 190 *p = 0; 191 return 0; 192 } 193 *p = c; 194 if(p == buf + n){ 195 *p = 0; 196 Bprint(&out, "<%s", buf); 197 return -1; 198 } 199 } 200 } 201 202 void 203 dogoobie(void) 204 { 205 char *arg, *type; 206 Goobie *g; 207 char buf[1024]; 208 int closing; 209 210 if(readupto(buf, sizeof(buf), '>', '<') < 0){ 211 Bprint(&out, "<%s", buf); 212 return; 213 } 214 type = buf; 215 if(*type == '/'){ 216 type++; 217 closing = 1; 218 } else 219 closing = 0; 220 arg = strchr(type, ' '); 221 if(arg == 0) 222 arg = strchr(type, '\r'); 223 if(arg == 0) 224 arg = strchr(type, '\n'); 225 if(arg) 226 *arg++ = 0; 227 for(g = gtab; g->name; g++) 228 if(cistrcmp(type, g->name) == 0){ 229 if(closing){ 230 if(g->ef){ 231 (*g->ef)(g, arg); 232 return; 233 } 234 } else { 235 if(g->f){ 236 (*g->f)(g, arg); 237 return; 238 } 239 } 240 } 241 if(closing) 242 type--; 243 if(arg) 244 Bprint(&out, "<%s %s>\n", type, arg); 245 else 246 Bprint(&out, "<%s>\n", type); 247 } 248 249 void 250 main(void) 251 { 252 int c; 253 254 Binit(&in, 0, OREAD); 255 Binit(&out, 1, OWRITE); 256 257 for(;;){ 258 c = Bgetc(&in); 259 if(c < 0) 260 return; 261 switch(c){ 262 case '<': 263 dogoobie(); 264 break; 265 case '&': 266 escape(); 267 break; 268 case '\r': 269 break; 270 case '\n': 271 if(quoting){ 272 Bputc(&out, '"'); 273 quoting = 0; 274 } 275 if(lastc != '\n') 276 Bputc(&out, '\n'); 277 /* can't emit leading spaces in filled troff docs */ 278 if (!inpre) 279 eatwhite(); 280 lastc = c; 281 break; 282 default: 283 Bputc(&out, c); 284 lastc = c; 285 break; 286 } 287 } 288 } 289 290 void 291 escape(void) 292 { 293 Entity *e; 294 char buf[8]; 295 296 if(readupto(buf, sizeof(buf), ';', '\n') < 0){ 297 Bprint(&out, "&%s", buf); 298 return; 299 } 300 for(e = pl_entity; e->name; e++) 301 if(strcmp(buf, e->name) == 0){ 302 Bprint(&out, "%C", e->value); 303 return; 304 } 305 Bprint(&out, "&%s;", buf); 306 } 307 308 /* 309 * whitespace is not significant to HTML, but newlines 310 * and leading spaces are significant to troff. 311 */ 312 void 313 eatwhite(void) 314 { 315 int c; 316 317 for(;;){ 318 c = Bgetc(&in); 319 if(c < 0) 320 break; 321 if(!isspace(c)){ 322 Bungetc(&in); 323 break; 324 } 325 } 326 } 327 328 /* 329 * print at start of line 330 */ 331 void 332 printsol(char *fmt, ...) 333 { 334 char buf[8*1024], *s; 335 va_list arg; 336 337 if(quoting){ 338 Bputc(&out, '"'); 339 quoting = 0; 340 } 341 if(lastc != '\n') 342 Bputc(&out, '\n'); 343 va_start(arg, fmt); 344 s = doprint(buf, buf + (sizeof(buf)-1) / sizeof(*buf), fmt, arg); 345 va_end(arg); 346 Bwrite(&out, buf, s-buf); 347 lastc = *(s-1); 348 } 349 350 void 351 g_ignore(Goobie *g, char *arg) 352 { 353 USED(g, arg); 354 } 355 356 void 357 g_unexpected(Goobie *g, char *arg) 358 { 359 USED(arg); 360 fprint(2, "unexpected %s ending\n", g->name); 361 } 362 363 void 364 g_title(Goobie *g, char *arg) 365 { 366 USED(arg); 367 printsol(".TL\n", g->name); 368 } 369 370 void 371 g_p(Goobie *g, char *arg) 372 { 373 USED(arg); 374 printsol(".LP\n", g->name); 375 } 376 377 void 378 g_h(Goobie *g, char *arg) 379 { 380 USED(arg); 381 printsol(".SH %c\n", g->name[1]); 382 } 383 384 void 385 g_list(Goobie *g, char *arg) 386 { 387 USED(arg); 388 389 if(lsp != SSIZE){ 390 switch(g->name[0]){ 391 case 'o': 392 liststack[lsp].type = Lordered; 393 liststack[lsp].ord = 0; 394 break; 395 default: 396 liststack[lsp].type = Lunordered; 397 break; 398 } 399 } 400 lsp++; 401 } 402 403 void 404 g_br(Goobie *g, char *arg) 405 { 406 USED(g, arg); 407 printsol(".br\n"); 408 } 409 410 void 411 g_li(Goobie *g, char *arg) 412 { 413 USED(g, arg); 414 if(lsp <= 0 || lsp > SSIZE){ 415 printsol(".IP \\(bu\n"); 416 return; 417 } 418 switch(liststack[lsp-1].type){ 419 case Lunordered: 420 printsol(".IP \\(bu\n"); 421 break; 422 case Lordered: 423 printsol(".IP %d\n", ++liststack[lsp-1].ord); 424 break; 425 } 426 } 427 428 void 429 g_listend(Goobie *g, char *arg) 430 { 431 USED(g, arg); 432 if(--lsp < 0) 433 lsp = 0; 434 printsol(".LP\n"); 435 } 436 437 void 438 g_display(Goobie *g, char *arg) 439 { 440 USED(g, arg); 441 printsol(".DS\n"); 442 } 443 444 void 445 g_pre(Goobie *g, char *arg) 446 { 447 USED(g, arg); 448 printsol(".DS L\n"); 449 inpre = 1; 450 } 451 452 void 453 g_displayend(Goobie *g, char *arg) 454 { 455 USED(g, arg); 456 printsol(".DE\n"); 457 inpre = 0; 458 } 459 460 void 461 g_fpush(Goobie *g, char *arg) 462 { 463 USED(arg); 464 if(fsp < SSIZE) 465 fontstack[fsp] = font; 466 fsp++; 467 switch(g->name[0]){ 468 case 'b': 469 font = "B"; 470 break; 471 case 'i': 472 font = "I"; 473 break; 474 case 'k': /* kbd */ 475 case 't': /* tt */ 476 font = "(CW"; 477 break; 478 } 479 Bprint(&out, "\\f%s", font); 480 } 481 482 void 483 g_fpop(Goobie *g, char *arg) 484 { 485 USED(g, arg); 486 fsp--; 487 if(fsp < SSIZE) 488 font = fontstack[fsp]; 489 else 490 font = "R"; 491 492 Bprint(&out, "\\f%s", font); 493 } 494 495 void 496 g_indent(Goobie *g, char *arg) 497 { 498 USED(g, arg); 499 printsol(".RS\n"); 500 } 501 502 void 503 g_exdent(Goobie *g, char *arg) 504 { 505 USED(g, arg); 506 printsol(".RE\n"); 507 } 508 509 void 510 g_dt(Goobie *g, char *arg) 511 { 512 USED(g, arg); 513 printsol(".IP \""); 514 quoting = 1; 515 } 516 517 void 518 g_hr(Goobie *g, char *arg) 519 { 520 USED(g, arg); 521 printsol(".br\n"); 522 printsol("\\l'5i'\n"); 523 } 524 525 526 /* 527 <table border> 528 <caption><font size="+1"><b>Cumulative Class Data</b></font></caption> 529 <tr><th rowspan=2>DOSE<br>mg/kg</th><th colspan=2>PARALYSIS</th><th colspan=2>DEATH</th> 530 </tr> 531 <tr><th width=80>Number</th><th width=80>Percent</th><th width=80>Number</th><th width=80>Percent</th> 532 </tr> 533 <tr align=center> 534 <td>0.1</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 535 </tr> 536 <tr align=center> 537 <td>0.2</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 538 </tr> 539 <tr align=center> 540 <td>0.3</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 541 </tr> 542 <tr align=center> 543 <td>0.4</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 544 </tr> 545 <tr align=center> 546 <td>0.5</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 547 </tr> 548 <tr align=center> 549 <td>0.6</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 550 </tr> 551 <tr align=center> 552 <td>0.7</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 553 </tr> 554 <tr align=center> 555 <td>0.8</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 556 </tr> 557 <tr align=center> 558 <td>0.8 oral</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 559 </tr> 560 </table> 561 */ 562 563 void 564 g_table(Goobie *g, char *arg) 565 { 566 USED(g, arg); 567 printsol(".TS\ncenter ;\n"); 568 } 569 570 void 571 g_tableend(Goobie *g, char *arg) 572 { 573 USED(g, arg); 574 printsol(".TE\n"); 575 } 576 577 void 578 g_caption(Goobie *g, char *arg) 579 { 580 USED(g, arg); 581 } 582 583 void 584 g_captionend(Goobie *g, char *arg) 585 { 586 USED(g, arg); 587 } 588