1 #include <u.h> 2 #include <libc.h> 3 #include <ctype.h> 4 #include <bio.h> 5 6 enum 7 { 8 SSIZE = 10, 9 10 /* list types */ 11 Lordered = 0, 12 Lunordered, 13 Lmenu, 14 Ldir, 15 16 }; 17 18 Biobuf in, out; 19 int lastc = '\n'; 20 int inpre = 0; 21 22 /* stack for fonts */ 23 char *fontstack[SSIZE]; 24 char *font = "R"; 25 int fsp; 26 27 /* stack for lists */ 28 struct 29 { 30 int type; 31 int ord; 32 } liststack[SSIZE]; 33 int lsp; 34 35 int quoting; 36 37 typedef struct Goobie Goobie; 38 struct Goobie 39 { 40 char *name; 41 void (*f)(Goobie*, char*); 42 void (*ef)(Goobie*, char*); 43 }; 44 45 void eatwhite(void); 46 void escape(void); 47 48 typedef void Action(Goobie*, char*); 49 50 Action g_ignore; 51 Action g_unexpected; 52 Action g_title; 53 Action g_p; 54 Action g_h; 55 Action g_li; 56 Action g_list, g_listend; 57 Action g_pre; 58 Action g_fpush, g_fpop; 59 Action g_indent, g_exdent; 60 Action g_dt; 61 Action g_display; 62 Action g_displayend; 63 Action g_table, g_tableend, g_caption, g_captionend; 64 Action g_br, g_hr; 65 66 Goobie gtab[] = 67 { 68 "!--", g_ignore, g_unexpected, 69 "!doctype", g_ignore, g_unexpected, 70 "a", g_ignore, g_ignore, 71 "address", g_display, g_displayend, 72 "b", g_fpush, g_fpop, 73 "base", g_ignore, g_unexpected, 74 "blink", g_ignore, g_ignore, 75 "blockquote", g_ignore, g_ignore, 76 "body", g_ignore, g_ignore, 77 "br", g_br, g_unexpected, 78 "caption", g_caption, g_captionend, 79 "center", g_ignore, g_ignore, 80 "cite", g_ignore, g_ignore, 81 "code", g_ignore, g_ignore, 82 "dd", g_ignore, g_unexpected, 83 "dfn", g_ignore, g_ignore, 84 "dir", g_list, g_listend, 85 "dl", g_indent, g_exdent, 86 "dt", g_dt, g_unexpected, 87 "em", g_ignore, g_ignore, 88 "font", g_ignore, g_ignore, 89 "form", g_ignore, g_ignore, 90 "h1", g_h, g_p, 91 "h2", g_h, g_p, 92 "h3", g_h, g_p, 93 "h4", g_h, g_p, 94 "h5", g_h, g_p, 95 "h6", g_h, g_p, 96 "head", g_ignore, g_ignore, 97 "hr", g_hr, g_unexpected, 98 "html", g_ignore, g_ignore, 99 "i", g_fpush, g_fpop, 100 "input", g_ignore, g_unexpected, 101 "img", g_ignore, g_unexpected, 102 "isindex", g_ignore, g_unexpected, 103 "kbd", g_fpush, g_fpop, 104 "key", g_ignore, g_ignore, 105 "li", g_li, g_unexpected, 106 "link", g_ignore, g_unexpected, 107 "listing", g_ignore, g_ignore, 108 "menu", g_list, g_listend, 109 "meta", g_ignore, g_unexpected, 110 "nextid", g_ignore, g_unexpected, 111 "ol", g_list, g_listend, 112 "option", g_ignore, g_unexpected, 113 "p", g_p, g_ignore, 114 "plaintext", g_ignore, g_unexpected, 115 "pre", g_pre, g_displayend, 116 "samp", g_ignore, g_ignore, 117 "select", g_ignore, g_ignore, 118 "strong", g_ignore, g_ignore, 119 "table", g_table, g_tableend, 120 "textarea", g_ignore, g_ignore, 121 "title", g_title, g_ignore, 122 "tt", g_fpush, g_fpop, 123 "u", g_ignore, g_ignore, 124 "ul", g_list, g_listend, 125 "var", g_ignore, g_ignore, 126 "xmp", g_ignore, g_ignore, 127 0, 0, 0, 128 }; 129 130 typedef struct Entity Entity; 131 struct Entity 132 { 133 char *name; 134 Rune value; 135 }; 136 137 Entity pl_entity[]= 138 { 139 "#SPACE", L' ', "#RS", L'\n', "#RE", L'\r', "quot", L'"', 140 "AElig", L'Æ', "Aacute", L'Á', "Acirc", L'Â', "Agrave", L'À', "Aring", L'Å', 141 "Atilde", L'Ã', "Auml", L'Ä', "Ccedil", L'Ç', "ETH", L'Ð', "Eacute", L'É', 142 "Ecirc", L'Ê', "Egrave", L'È', "Euml", L'Ë', "Iacute", L'Í', "Icirc", L'Î', 143 "Igrave", L'Ì', "Iuml", L'Ï', "Ntilde", L'Ñ', "Oacute", L'Ó', "Ocirc", L'Ô', 144 "Ograve", L'Ò', "Oslash", L'Ø', "Otilde", L'Õ', "Ouml", L'Ö', "THORN", L'Þ', 145 "Uacute", L'Ú', "Ucirc", L'Û', "Ugrave", L'Ù', "Uuml", L'Ü', "Yacute", L'Ý', 146 "aacute", L'á', "acirc", L'â', "aelig", L'æ', "agrave", L'à', "amp", L'&', 147 "aring", L'å', "atilde", L'ã', "auml", L'ä', "ccedil", L'ç', "eacute", L'é', 148 "ecirc", L'ê', "egrave", L'è', "eth", L'ð', "euml", L'ë', "gt", L'>', 149 "iacute", L'í', "icirc", L'î', "igrave", L'ì', "iuml", L'ï', "lt", L'<', 150 "ntilde", L'ñ', "oacute", L'ó', "ocirc", L'ô', "ograve", L'ò', "oslash", L'ø', 151 "otilde", L'õ', "ouml", L'ö', "szlig", L'ß', "thorn", L'þ', "uacute", L'ú', 152 "ucirc", L'û', "ugrave", L'ù', "uuml", L'ü', "yacute", L'ý', "yuml", L'ÿ', 153 0 154 }; 155 156 int 157 cistrcmp(char *a, char *b) 158 { 159 int c, d; 160 161 for(;; a++, b++){ 162 d = tolower(*a); 163 c = d - tolower(*b); 164 if(c) 165 break; 166 if(d == 0) 167 break; 168 } 169 return c; 170 } 171 172 int 173 readupto(char *buf, int n, char d, char notme) 174 { 175 char *p; 176 int c; 177 178 buf[0] = 0; 179 for(p = buf;; p++){ 180 c = Bgetc(&in); 181 if(c < 0){ 182 *p = 0; 183 return -1; 184 } 185 if(c == notme){ 186 Bungetc(&in); 187 return -1; 188 } 189 if(c == d){ 190 *p = 0; 191 return 0; 192 } 193 *p = c; 194 if(p == buf + n){ 195 *p = 0; 196 Bprint(&out, "<%s", buf); 197 return -1; 198 } 199 } 200 } 201 202 void 203 dogoobie(void) 204 { 205 char *arg, *type; 206 Goobie *g; 207 char buf[1024]; 208 int closing; 209 210 if(readupto(buf, sizeof(buf), '>', '<') < 0){ 211 Bprint(&out, "<%s", buf); 212 return; 213 } 214 type = buf; 215 if(*type == '/'){ 216 type++; 217 closing = 1; 218 } else 219 closing = 0; 220 arg = strchr(type, ' '); 221 if(arg == 0) 222 arg = strchr(type, '\r'); 223 if(arg == 0) 224 arg = strchr(type, '\n'); 225 if(arg) 226 *arg++ = 0; 227 for(g = gtab; g->name; g++) 228 if(cistrcmp(type, g->name) == 0){ 229 if(closing){ 230 if(g->ef){ 231 (*g->ef)(g, arg); 232 return; 233 } 234 } else { 235 if(g->f){ 236 (*g->f)(g, arg); 237 return; 238 } 239 } 240 } 241 if(closing) 242 type--; 243 if(arg) 244 Bprint(&out, "<%s %s>\n", type, arg); 245 else 246 Bprint(&out, "<%s>\n", type); 247 } 248 249 void 250 main(void) 251 { 252 int c; 253 254 Binit(&in, 0, OREAD); 255 Binit(&out, 1, OWRITE); 256 257 for(;;){ 258 c = Bgetc(&in); 259 if(c < 0) 260 return; 261 switch(c){ 262 case '<': 263 dogoobie(); 264 break; 265 case '&': 266 escape(); 267 break; 268 case '\r': 269 break; 270 case '\n': 271 if(quoting){ 272 Bputc(&out, '"'); 273 quoting = 0; 274 } 275 if(lastc != '\n') 276 Bputc(&out, '\n'); 277 /* can't emit leading spaces in filled troff docs */ 278 if (!inpre) 279 eatwhite(); 280 lastc = c; 281 break; 282 default: 283 Bputc(&out, c); 284 lastc = c; 285 break; 286 } 287 } 288 } 289 290 void 291 escape(void) 292 { 293 Entity *e; 294 char buf[8]; 295 296 if(readupto(buf, sizeof(buf), ';', '\n') < 0){ 297 Bprint(&out, "&%s", buf); 298 return; 299 } 300 for(e = pl_entity; e->name; e++) 301 if(strcmp(buf, e->name) == 0){ 302 Bprint(&out, "%C", e->value); 303 return; 304 } 305 Bprint(&out, "&%s;", buf); 306 } 307 308 /* 309 * whitespace is not significant to HTML, but newlines 310 * and leading spaces are significant to troff. 311 */ 312 void 313 eatwhite(void) 314 { 315 int c; 316 317 for(;;){ 318 c = Bgetc(&in); 319 if(c < 0) 320 break; 321 if(!isspace(c)){ 322 Bungetc(&in); 323 break; 324 } 325 } 326 } 327 328 /* 329 * print at start of line 330 */ 331 void 332 printsol(char *fmt, ...) 333 { 334 va_list arg; 335 336 if(quoting){ 337 Bputc(&out, '"'); 338 quoting = 0; 339 } 340 if(lastc != '\n') 341 Bputc(&out, '\n'); 342 va_start(arg, fmt); 343 Bvprint(&out, fmt, arg); 344 va_end(arg); 345 lastc = '\n'; 346 } 347 348 void 349 g_ignore(Goobie *g, char *arg) 350 { 351 USED(g, arg); 352 } 353 354 void 355 g_unexpected(Goobie *g, char *arg) 356 { 357 USED(arg); 358 fprint(2, "unexpected %s ending\n", g->name); 359 } 360 361 void 362 g_title(Goobie *g, char *arg) 363 { 364 USED(arg); 365 printsol(".TL\n", g->name); 366 } 367 368 void 369 g_p(Goobie *g, char *arg) 370 { 371 USED(arg); 372 printsol(".LP\n", g->name); 373 } 374 375 void 376 g_h(Goobie *g, char *arg) 377 { 378 USED(arg); 379 printsol(".SH %c\n", g->name[1]); 380 } 381 382 void 383 g_list(Goobie *g, char *arg) 384 { 385 USED(arg); 386 387 if(lsp != SSIZE){ 388 switch(g->name[0]){ 389 case 'o': 390 liststack[lsp].type = Lordered; 391 liststack[lsp].ord = 0; 392 break; 393 default: 394 liststack[lsp].type = Lunordered; 395 break; 396 } 397 } 398 lsp++; 399 } 400 401 void 402 g_br(Goobie *g, char *arg) 403 { 404 USED(g, arg); 405 printsol(".br\n"); 406 } 407 408 void 409 g_li(Goobie *g, char *arg) 410 { 411 USED(g, arg); 412 if(lsp <= 0 || lsp > SSIZE){ 413 printsol(".IP \\(bu\n"); 414 return; 415 } 416 switch(liststack[lsp-1].type){ 417 case Lunordered: 418 printsol(".IP \\(bu\n"); 419 break; 420 case Lordered: 421 printsol(".IP %d\n", ++liststack[lsp-1].ord); 422 break; 423 } 424 } 425 426 void 427 g_listend(Goobie *g, char *arg) 428 { 429 USED(g, arg); 430 if(--lsp < 0) 431 lsp = 0; 432 printsol(".LP\n"); 433 } 434 435 void 436 g_display(Goobie *g, char *arg) 437 { 438 USED(g, arg); 439 printsol(".DS\n"); 440 } 441 442 void 443 g_pre(Goobie *g, char *arg) 444 { 445 USED(g, arg); 446 printsol(".DS L\n"); 447 inpre = 1; 448 } 449 450 void 451 g_displayend(Goobie *g, char *arg) 452 { 453 USED(g, arg); 454 printsol(".DE\n"); 455 inpre = 0; 456 } 457 458 void 459 g_fpush(Goobie *g, char *arg) 460 { 461 USED(arg); 462 if(fsp < SSIZE) 463 fontstack[fsp] = font; 464 fsp++; 465 switch(g->name[0]){ 466 case 'b': 467 font = "B"; 468 break; 469 case 'i': 470 font = "I"; 471 break; 472 case 'k': /* kbd */ 473 case 't': /* tt */ 474 font = "(CW"; 475 break; 476 } 477 Bprint(&out, "\\f%s", font); 478 } 479 480 void 481 g_fpop(Goobie *g, char *arg) 482 { 483 USED(g, arg); 484 fsp--; 485 if(fsp < SSIZE) 486 font = fontstack[fsp]; 487 else 488 font = "R"; 489 490 Bprint(&out, "\\f%s", font); 491 } 492 493 void 494 g_indent(Goobie *g, char *arg) 495 { 496 USED(g, arg); 497 printsol(".RS\n"); 498 } 499 500 void 501 g_exdent(Goobie *g, char *arg) 502 { 503 USED(g, arg); 504 printsol(".RE\n"); 505 } 506 507 void 508 g_dt(Goobie *g, char *arg) 509 { 510 USED(g, arg); 511 printsol(".IP \""); 512 quoting = 1; 513 } 514 515 void 516 g_hr(Goobie *g, char *arg) 517 { 518 USED(g, arg); 519 printsol(".br\n"); 520 printsol("\\l'5i'\n"); 521 } 522 523 524 /* 525 <table border> 526 <caption><font size="+1"><b>Cumulative Class Data</b></font></caption> 527 <tr><th rowspan=2>DOSE<br>mg/kg</th><th colspan=2>PARALYSIS</th><th colspan=2>DEATH</th> 528 </tr> 529 <tr><th width=80>Number</th><th width=80>Percent</th><th width=80>Number</th><th width=80>Percent</th> 530 </tr> 531 <tr align=center> 532 <td>0.1</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 533 </tr> 534 <tr align=center> 535 <td>0.2</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 536 </tr> 537 <tr align=center> 538 <td>0.3</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 539 </tr> 540 <tr align=center> 541 <td>0.4</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 542 </tr> 543 <tr align=center> 544 <td>0.5</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 545 </tr> 546 <tr align=center> 547 <td>0.6</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 548 </tr> 549 <tr align=center> 550 <td>0.7</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 551 </tr> 552 <tr align=center> 553 <td>0.8</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 554 </tr> 555 <tr align=center> 556 <td>0.8 oral</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 557 </tr> 558 </table> 559 */ 560 561 void 562 g_table(Goobie *g, char *arg) 563 { 564 USED(g, arg); 565 printsol(".TS\ncenter ;\n"); 566 } 567 568 void 569 g_tableend(Goobie *g, char *arg) 570 { 571 USED(g, arg); 572 printsol(".TE\n"); 573 } 574 575 void 576 g_caption(Goobie *g, char *arg) 577 { 578 USED(g, arg); 579 } 580 581 void 582 g_captionend(Goobie *g, char *arg) 583 { 584 USED(g, arg); 585 } 586