1 #include <u.h> 2 #include <libc.h> 3 #include <ctype.h> 4 #include <bio.h> 5 6 enum 7 { 8 SSIZE = 10, 9 10 /* list types */ 11 Lordered = 0, 12 Lunordered, 13 Lmenu, 14 Ldir, 15 16 }; 17 18 Biobuf in, out; 19 int lastc = '\n'; 20 int inpre = 0; 21 22 /* stack for fonts */ 23 char *fontstack[SSIZE]; 24 char *font = "R"; 25 int fsp; 26 27 /* stack for lists */ 28 struct 29 { 30 int type; 31 int ord; 32 } liststack[SSIZE]; 33 int lsp; 34 35 int quoting; 36 37 typedef struct Goobie Goobie; 38 struct Goobie 39 { 40 char *name; 41 void (*f)(Goobie*, char*); 42 void (*ef)(Goobie*, char*); 43 }; 44 45 void eatwhite(void); 46 void escape(void); 47 48 typedef void Action(Goobie*, char*); 49 50 Action g_ignore; 51 Action g_unexpected; 52 Action g_title; 53 Action g_p; 54 Action g_h; 55 Action g_li; 56 Action g_list, g_listend; 57 Action g_pre; 58 Action g_fpush, g_fpop; 59 Action g_indent, g_exdent; 60 Action g_dt; 61 Action g_display; 62 Action g_displayend; 63 Action g_table, g_tableend, g_caption, g_captionend; 64 Action g_br, g_hr; 65 66 Goobie gtab[] = 67 { 68 "!--", g_ignore, g_unexpected, 69 "!doctype", g_ignore, g_unexpected, 70 "a", g_ignore, g_ignore, 71 "address", g_display, g_displayend, 72 "b", g_fpush, g_fpop, 73 "base", g_ignore, g_unexpected, 74 "blink", g_ignore, g_ignore, 75 "blockquote", g_ignore, g_ignore, 76 "body", g_ignore, g_ignore, 77 "br", g_br, g_unexpected, 78 "caption", g_caption, g_captionend, 79 "center", g_ignore, g_ignore, 80 "cite", g_ignore, g_ignore, 81 "code", g_ignore, g_ignore, 82 "dd", g_ignore, g_unexpected, 83 "dfn", g_ignore, g_ignore, 84 "dir", g_list, g_listend, 85 "div", g_ignore, g_br, 86 "dl", g_indent, g_exdent, 87 "dt", g_dt, g_unexpected, 88 "em", g_ignore, g_ignore, 89 "font", g_ignore, g_ignore, 90 "form", g_ignore, g_ignore, 91 "h1", g_h, g_p, 92 "h2", g_h, g_p, 93 "h3", g_h, g_p, 94 "h4", g_h, g_p, 95 "h5", g_h, g_p, 96 "h6", g_h, g_p, 97 "head", g_ignore, g_ignore, 98 "hr", g_hr, g_unexpected, 99 "html", g_ignore, g_ignore, 100 "i", g_fpush, g_fpop, 101 "input", g_ignore, g_unexpected, 102 "img", g_ignore, g_unexpected, 103 "isindex", g_ignore, g_unexpected, 104 "kbd", g_fpush, g_fpop, 105 "key", g_ignore, g_ignore, 106 "li", g_li, g_unexpected, 107 "link", g_ignore, g_unexpected, 108 "listing", g_ignore, g_ignore, 109 "menu", g_list, g_listend, 110 "meta", g_ignore, g_unexpected, 111 "nextid", g_ignore, g_unexpected, 112 "ol", g_list, g_listend, 113 "option", g_ignore, g_unexpected, 114 "p", g_p, g_ignore, 115 "plaintext", g_ignore, g_unexpected, 116 "pre", g_pre, g_displayend, 117 "samp", g_ignore, g_ignore, 118 "script", g_ignore, g_ignore, 119 "select", g_ignore, g_ignore, 120 "span", g_ignore, g_ignore, 121 "strong", g_ignore, g_ignore, 122 "table", g_table, g_tableend, 123 "textarea", g_ignore, g_ignore, 124 "title", g_title, g_ignore, 125 "tt", g_fpush, g_fpop, 126 "u", g_ignore, g_ignore, 127 "ul", g_list, g_listend, 128 "var", g_ignore, g_ignore, 129 "xmp", g_ignore, g_ignore, 130 0, 0, 0, 131 }; 132 133 typedef struct Entity Entity; 134 struct Entity 135 { 136 char *name; 137 Rune value; 138 }; 139 140 Entity pl_entity[]= 141 { 142 "#SPACE", L' ', "#RS", L'\n', "#RE", L'\r', "quot", L'"', 143 "AElig", L'Æ', "Aacute", L'Á', "Acirc", L'Â', "Agrave", L'À', "Aring", L'Å', 144 "Atilde", L'Ã', "Auml", L'Ä', "Ccedil", L'Ç', "ETH", L'Ð', "Eacute", L'É', 145 "Ecirc", L'Ê', "Egrave", L'È', "Euml", L'Ë', "Iacute", L'Í', "Icirc", L'Î', 146 "Igrave", L'Ì', "Iuml", L'Ï', "Ntilde", L'Ñ', "Oacute", L'Ó', "Ocirc", L'Ô', 147 "Ograve", L'Ò', "Oslash", L'Ø', "Otilde", L'Õ', "Ouml", L'Ö', "THORN", L'Þ', 148 "Uacute", L'Ú', "Ucirc", L'Û', "Ugrave", L'Ù', "Uuml", L'Ü', "Yacute", L'Ý', 149 "aacute", L'á', "acirc", L'â', "aelig", L'æ', "agrave", L'à', "amp", L'&', 150 "aring", L'å', "atilde", L'ã', "auml", L'ä', "ccedil", L'ç', "eacute", L'é', 151 "ecirc", L'ê', "egrave", L'è', "eth", L'ð', "euml", L'ë', "gt", L'>', 152 "iacute", L'í', "icirc", L'î', "igrave", L'ì', "iuml", L'ï', "lt", L'<', 153 "nbsp", L' ', 154 "ntilde", L'ñ', "oacute", L'ó', "ocirc", L'ô', "ograve", L'ò', "oslash", L'ø', 155 "otilde", L'õ', "ouml", L'ö', "szlig", L'ß', "thorn", L'þ', "uacute", L'ú', 156 "ucirc", L'û', "ugrave", L'ù', "uuml", L'ü', "yacute", L'ý', "yuml", L'ÿ', 157 0 158 }; 159 160 int 161 cistrcmp(char *a, char *b) 162 { 163 int c, d; 164 165 for(;; a++, b++){ 166 d = tolower(*a); 167 c = d - tolower(*b); 168 if(c) 169 break; 170 if(d == 0) 171 break; 172 } 173 return c; 174 } 175 176 int 177 readupto(char *buf, int n, char d, char notme) 178 { 179 char *p; 180 int c; 181 182 buf[0] = 0; 183 for(p = buf;; p++){ 184 c = Bgetc(&in); 185 if(c < 0){ 186 *p = 0; 187 return -1; 188 } 189 if(c == notme){ 190 Bungetc(&in); 191 return -1; 192 } 193 if(c == d){ 194 *p = 0; 195 return 0; 196 } 197 *p = c; 198 if(p == buf + n){ 199 *p = 0; 200 Bprint(&out, "<%s", buf); 201 return -1; 202 } 203 } 204 } 205 206 void 207 dogoobie(void) 208 { 209 char *arg, *type; 210 Goobie *g; 211 char buf[1024]; 212 int closing; 213 214 if(readupto(buf, sizeof(buf), '>', '<') < 0){ 215 Bprint(&out, "<%s", buf); 216 return; 217 } 218 type = buf; 219 if(*type == '/'){ 220 type++; 221 closing = 1; 222 } else 223 closing = 0; 224 arg = strchr(type, ' '); 225 if(arg == 0) 226 arg = strchr(type, '\r'); 227 if(arg == 0) 228 arg = strchr(type, '\n'); 229 if(arg) 230 *arg++ = 0; 231 for(g = gtab; g->name; g++) 232 if(cistrcmp(type, g->name) == 0){ 233 if(closing){ 234 if(g->ef){ 235 (*g->ef)(g, arg); 236 return; 237 } 238 } else { 239 if(g->f){ 240 (*g->f)(g, arg); 241 return; 242 } 243 } 244 } 245 if(closing) 246 type--; 247 if(arg) 248 Bprint(&out, "<%s %s>\n", type, arg); 249 else 250 Bprint(&out, "<%s>\n", type); 251 } 252 253 void 254 main(void) 255 { 256 int c, pos; 257 258 Binit(&in, 0, OREAD); 259 Binit(&out, 1, OWRITE); 260 261 pos = 0; 262 for(;;){ 263 c = Bgetc(&in); 264 if(c < 0) 265 return; 266 switch(c){ 267 case '<': 268 dogoobie(); 269 break; 270 case '&': 271 escape(); 272 break; 273 case '\r': 274 pos = 0; 275 break; 276 case '\n': 277 if(quoting){ 278 Bputc(&out, '"'); 279 quoting = 0; 280 } 281 if(lastc != '\n') 282 Bputc(&out, '\n'); 283 /* can't emit leading spaces in filled troff docs */ 284 if (!inpre) 285 eatwhite(); 286 lastc = c; 287 break; 288 default: 289 ++pos; 290 if(!inpre && isascii(c) && isspace(c) && pos > 80){ 291 Bputc(&out, '\n'); 292 eatwhite(); 293 pos = 0; 294 }else 295 Bputc(&out, c); 296 lastc = c; 297 break; 298 } 299 } 300 } 301 302 void 303 escape(void) 304 { 305 int c; 306 Entity *e; 307 char buf[8]; 308 309 if(readupto(buf, sizeof(buf), ';', '\n') < 0){ 310 Bprint(&out, "&%s", buf); 311 return; 312 } 313 for(e = pl_entity; e->name; e++) 314 if(strcmp(buf, e->name) == 0){ 315 Bprint(&out, "%C", e->value); 316 return; 317 } 318 if(*buf == '#'){ 319 c = atoi(buf+1); 320 if(isascii(c) && isprint(c)){ 321 Bputc(&out, c); 322 return; 323 } 324 } 325 Bprint(&out, "&%s;", buf); 326 } 327 328 /* 329 * whitespace is not significant to HTML, but newlines 330 * and leading spaces are significant to troff. 331 */ 332 void 333 eatwhite(void) 334 { 335 int c; 336 337 for(;;){ 338 c = Bgetc(&in); 339 if(c < 0) 340 break; 341 if(!isspace(c)){ 342 Bungetc(&in); 343 break; 344 } 345 } 346 } 347 348 /* 349 * print at start of line 350 */ 351 void 352 printsol(char *fmt, ...) 353 { 354 va_list arg; 355 356 if(quoting){ 357 Bputc(&out, '"'); 358 quoting = 0; 359 } 360 if(lastc != '\n') 361 Bputc(&out, '\n'); 362 va_start(arg, fmt); 363 Bvprint(&out, fmt, arg); 364 va_end(arg); 365 lastc = '\n'; 366 } 367 368 void 369 g_ignore(Goobie *g, char *arg) 370 { 371 USED(g, arg); 372 } 373 374 void 375 g_unexpected(Goobie *g, char *arg) 376 { 377 USED(arg); 378 fprint(2, "unexpected %s ending\n", g->name); 379 } 380 381 void 382 g_title(Goobie *g, char *arg) 383 { 384 USED(arg); 385 printsol(".TL\n", g->name); 386 } 387 388 void 389 g_p(Goobie *g, char *arg) 390 { 391 USED(arg); 392 printsol(".LP\n", g->name); 393 } 394 395 void 396 g_h(Goobie *g, char *arg) 397 { 398 USED(arg); 399 printsol(".SH %c\n", g->name[1]); 400 } 401 402 void 403 g_list(Goobie *g, char *arg) 404 { 405 USED(arg); 406 407 if(lsp != SSIZE){ 408 switch(g->name[0]){ 409 case 'o': 410 liststack[lsp].type = Lordered; 411 liststack[lsp].ord = 0; 412 break; 413 default: 414 liststack[lsp].type = Lunordered; 415 break; 416 } 417 } 418 lsp++; 419 } 420 421 void 422 g_br(Goobie *g, char *arg) 423 { 424 USED(g, arg); 425 printsol(".br\n"); 426 } 427 428 void 429 g_li(Goobie *g, char *arg) 430 { 431 USED(g, arg); 432 if(lsp <= 0 || lsp > SSIZE){ 433 printsol(".IP \\(bu\n"); 434 return; 435 } 436 switch(liststack[lsp-1].type){ 437 case Lunordered: 438 printsol(".IP \\(bu\n"); 439 break; 440 case Lordered: 441 printsol(".IP %d\n", ++liststack[lsp-1].ord); 442 break; 443 } 444 } 445 446 void 447 g_listend(Goobie *g, char *arg) 448 { 449 USED(g, arg); 450 if(--lsp < 0) 451 lsp = 0; 452 printsol(".LP\n"); 453 } 454 455 void 456 g_display(Goobie *g, char *arg) 457 { 458 USED(g, arg); 459 printsol(".DS\n"); 460 } 461 462 void 463 g_pre(Goobie *g, char *arg) 464 { 465 USED(g, arg); 466 printsol(".DS L\n"); 467 inpre = 1; 468 } 469 470 void 471 g_displayend(Goobie *g, char *arg) 472 { 473 USED(g, arg); 474 printsol(".DE\n"); 475 inpre = 0; 476 } 477 478 void 479 g_fpush(Goobie *g, char *arg) 480 { 481 USED(arg); 482 if(fsp < SSIZE) 483 fontstack[fsp] = font; 484 fsp++; 485 switch(g->name[0]){ 486 case 'b': 487 font = "B"; 488 break; 489 case 'i': 490 font = "I"; 491 break; 492 case 'k': /* kbd */ 493 case 't': /* tt */ 494 font = "(CW"; 495 break; 496 } 497 Bprint(&out, "\\f%s", font); 498 } 499 500 void 501 g_fpop(Goobie *g, char *arg) 502 { 503 USED(g, arg); 504 fsp--; 505 if(fsp < SSIZE) 506 font = fontstack[fsp]; 507 else 508 font = "R"; 509 510 Bprint(&out, "\\f%s", font); 511 } 512 513 void 514 g_indent(Goobie *g, char *arg) 515 { 516 USED(g, arg); 517 printsol(".RS\n"); 518 } 519 520 void 521 g_exdent(Goobie *g, char *arg) 522 { 523 USED(g, arg); 524 printsol(".RE\n"); 525 } 526 527 void 528 g_dt(Goobie *g, char *arg) 529 { 530 USED(g, arg); 531 printsol(".IP \""); 532 quoting = 1; 533 } 534 535 void 536 g_hr(Goobie *g, char *arg) 537 { 538 USED(g, arg); 539 printsol(".br\n"); 540 printsol("\\l'5i'\n"); 541 } 542 543 544 /* 545 <table border> 546 <caption><font size="+1"><b>Cumulative Class Data</b></font></caption> 547 <tr><th rowspan=2>DOSE<br>mg/kg</th><th colspan=2>PARALYSIS</th><th colspan=2>DEATH</th> 548 </tr> 549 <tr><th width=80>Number</th><th width=80>Percent</th><th width=80>Number</th><th width=80>Percent</th> 550 </tr> 551 <tr align=center> 552 <td>0.1</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 553 </tr> 554 <tr align=center> 555 <td>0.2</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 556 </tr> 557 <tr align=center> 558 <td>0.3</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 559 </tr> 560 <tr align=center> 561 <td>0.4</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 562 </tr> 563 <tr align=center> 564 <td>0.5</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 565 </tr> 566 <tr align=center> 567 <td>0.6</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 568 </tr> 569 <tr align=center> 570 <td>0.7</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 571 </tr> 572 <tr align=center> 573 <td>0.8</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 574 </tr> 575 <tr align=center> 576 <td>0.8 oral</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 577 </tr> 578 </table> 579 */ 580 581 void 582 g_table(Goobie *g, char *arg) 583 { 584 USED(g, arg); 585 printsol(".TS\ncenter ;\n"); 586 } 587 588 void 589 g_tableend(Goobie *g, char *arg) 590 { 591 USED(g, arg); 592 printsol(".TE\n"); 593 } 594 595 void 596 g_caption(Goobie *g, char *arg) 597 { 598 USED(g, arg); 599 } 600 601 void 602 g_captionend(Goobie *g, char *arg) 603 { 604 USED(g, arg); 605 } 606