1 #include <u.h> 2 #include <libc.h> 3 #include <ctype.h> 4 #include <bio.h> 5 6 enum 7 { 8 SSIZE = 10, 9 10 /* list types */ 11 Lordered = 0, 12 Lunordered, 13 Lmenu, 14 Ldir, 15 16 }; 17 18 Biobuf in, out; 19 int lastc = '\n'; 20 int inpre = 0; 21 22 /* stack for fonts */ 23 char *fontstack[SSIZE]; 24 char *font = "R"; 25 int fsp; 26 27 /* stack for lists */ 28 struct 29 { 30 int type; 31 int ord; 32 } liststack[SSIZE]; 33 int lsp; 34 35 int quoting; 36 37 typedef struct Goobie Goobie; 38 struct Goobie 39 { 40 char *name; 41 void (*f)(Goobie*, char*); 42 void (*ef)(Goobie*, char*); 43 }; 44 45 void eatwhite(void); 46 void escape(void); 47 48 typedef void Action(Goobie*, char*); 49 50 Action g_ignore; 51 Action g_unexpected; 52 Action g_title; 53 Action g_p; 54 Action g_h; 55 Action g_li; 56 Action g_list, g_listend; 57 Action g_pre; 58 Action g_fpush, g_fpop; 59 Action g_indent, g_exdent; 60 Action g_dt; 61 Action g_display; 62 Action g_displayend; 63 Action g_table, g_tableend, g_caption, g_captionend; 64 Action g_br, g_hr; 65 66 Goobie gtab[] = 67 { 68 "!--", g_ignore, g_unexpected, 69 "!doctype", g_ignore, g_unexpected, 70 "a", g_ignore, g_ignore, 71 "address", g_display, g_displayend, 72 "b", g_fpush, g_fpop, 73 "base", g_ignore, g_unexpected, 74 "blink", g_ignore, g_ignore, 75 "blockquote", g_ignore, g_ignore, 76 "body", g_ignore, g_ignore, 77 "br", g_br, g_unexpected, 78 "caption", g_caption, g_captionend, 79 "center", g_ignore, g_ignore, 80 "cite", g_ignore, g_ignore, 81 "code", g_ignore, g_ignore, 82 "dd", g_ignore, g_unexpected, 83 "dfn", g_ignore, g_ignore, 84 "dir", g_list, g_listend, 85 "dl", g_indent, g_exdent, 86 "dt", g_dt, g_unexpected, 87 "em", g_ignore, g_ignore, 88 "font", g_ignore, g_ignore, 89 "form", g_ignore, g_ignore, 90 "h1", g_h, g_p, 91 "h2", g_h, g_p, 92 "h3", g_h, g_p, 93 "h4", g_h, g_p, 94 "h5", g_h, g_p, 95 "h6", g_h, g_p, 96 "head", g_ignore, g_ignore, 97 "hr", g_hr, g_unexpected, 98 "html", g_ignore, g_ignore, 99 "i", g_fpush, g_fpop, 100 "input", g_ignore, g_unexpected, 101 "img", g_ignore, g_unexpected, 102 "isindex", g_ignore, g_unexpected, 103 "kbd", g_fpush, g_fpop, 104 "key", g_ignore, g_ignore, 105 "li", g_li, g_unexpected, 106 "link", g_ignore, g_unexpected, 107 "listing", g_ignore, g_ignore, 108 "menu", g_list, g_listend, 109 "meta", g_ignore, g_unexpected, 110 "nextid", g_ignore, g_unexpected, 111 "ol", g_list, g_listend, 112 "option", g_ignore, g_unexpected, 113 "p", g_p, g_ignore, 114 "plaintext", g_ignore, g_unexpected, 115 "pre", g_pre, g_displayend, 116 "samp", g_ignore, g_ignore, 117 "select", g_ignore, g_ignore, 118 "strong", g_ignore, g_ignore, 119 "table", g_table, g_tableend, 120 "textarea", g_ignore, g_ignore, 121 "title", g_title, g_ignore, 122 "tt", g_fpush, g_fpop, 123 "u", g_ignore, g_ignore, 124 "ul", g_list, g_listend, 125 "var", g_ignore, g_ignore, 126 "xmp", g_ignore, g_ignore, 127 0, 0, 0, 128 }; 129 130 typedef struct Entity Entity; 131 struct Entity 132 { 133 char *name; 134 Rune value; 135 }; 136 137 Entity pl_entity[]= 138 { 139 "#SPACE", L' ', "#RS", L'\n', "#RE", L'\r', "quot", L'"', 140 "AElig", L'Æ', "Aacute", L'Á', "Acirc", L'Â', "Agrave", L'À', "Aring", L'Å', 141 "Atilde", L'Ã', "Auml", L'Ä', "Ccedil", L'Ç', "ETH", L'Ð', "Eacute", L'É', 142 "Ecirc", L'Ê', "Egrave", L'È', "Euml", L'Ë', "Iacute", L'Í', "Icirc", L'Î', 143 "Igrave", L'Ì', "Iuml", L'Ï', "Ntilde", L'Ñ', "Oacute", L'Ó', "Ocirc", L'Ô', 144 "Ograve", L'Ò', "Oslash", L'Ø', "Otilde", L'Õ', "Ouml", L'Ö', "THORN", L'Þ', 145 "Uacute", L'Ú', "Ucirc", L'Û', "Ugrave", L'Ù', "Uuml", L'Ü', "Yacute", L'Ý', 146 "aacute", L'á', "acirc", L'â', "aelig", L'æ', "agrave", L'à', "amp", L'&', 147 "aring", L'å', "atilde", L'ã', "auml", L'ä', "ccedil", L'ç', "eacute", L'é', 148 "ecirc", L'ê', "egrave", L'è', "eth", L'ð', "euml", L'ë', "gt", L'>', 149 "iacute", L'í', "icirc", L'î', "igrave", L'ì', "iuml", L'ï', "lt", L'<', 150 "ntilde", L'ñ', "oacute", L'ó', "ocirc", L'ô', "ograve", L'ò', "oslash", L'ø', 151 "otilde", L'õ', "ouml", L'ö', "szlig", L'ß', "thorn", L'þ', "uacute", L'ú', 152 "ucirc", L'û', "ugrave", L'ù', "uuml", L'ü', "yacute", L'ý', "yuml", L'ÿ', 153 0 154 }; 155 156 int 157 cistrcmp(char *a, char *b) 158 { 159 int c, d; 160 161 for(;; a++, b++){ 162 d = tolower(*a); 163 c = d - tolower(*b); 164 if(c) 165 break; 166 if(d == 0) 167 break; 168 } 169 return c; 170 } 171 172 int 173 readupto(char *buf, int n, char d, char notme) 174 { 175 char *p; 176 int c; 177 178 buf[0] = 0; 179 for(p = buf;; p++){ 180 c = Bgetc(&in); 181 if(c < 0){ 182 *p = 0; 183 return -1; 184 } 185 if(c == notme){ 186 Bungetc(&in); 187 return -1; 188 } 189 if(c == d){ 190 *p = 0; 191 return 0; 192 } 193 *p = c; 194 if(p == buf + n){ 195 *p = 0; 196 Bprint(&out, "<%s", buf); 197 return -1; 198 } 199 } 200 } 201 202 void 203 dogoobie(void) 204 { 205 char *arg, *type; 206 Goobie *g; 207 char buf[1024]; 208 int closing; 209 210 if(readupto(buf, sizeof(buf), '>', '<') < 0){ 211 Bprint(&out, "<%s", buf); 212 return; 213 } 214 type = buf; 215 if(*type == '/'){ 216 type++; 217 closing = 1; 218 } else 219 closing = 0; 220 arg = strchr(type, ' '); 221 if(arg == 0) 222 arg = strchr(type, '\r'); 223 if(arg == 0) 224 arg = strchr(type, '\n'); 225 if(arg) 226 *arg++ = 0; 227 for(g = gtab; g->name; g++) 228 if(cistrcmp(type, g->name) == 0){ 229 if(closing){ 230 if(g->ef){ 231 (*g->ef)(g, arg); 232 return; 233 } 234 } else { 235 if(g->f){ 236 (*g->f)(g, arg); 237 return; 238 } 239 } 240 } 241 if(closing) 242 type--; 243 if(arg) 244 Bprint(&out, "<%s %s>\n", type, arg); 245 else 246 Bprint(&out, "<%s>\n", type); 247 } 248 249 void 250 main(void) 251 { 252 int c, pos; 253 254 Binit(&in, 0, OREAD); 255 Binit(&out, 1, OWRITE); 256 257 pos = 0; 258 for(;;){ 259 c = Bgetc(&in); 260 if(c < 0) 261 return; 262 switch(c){ 263 case '<': 264 dogoobie(); 265 break; 266 case '&': 267 escape(); 268 break; 269 case '\r': 270 pos = 0; 271 break; 272 case '\n': 273 if(quoting){ 274 Bputc(&out, '"'); 275 quoting = 0; 276 } 277 if(lastc != '\n') 278 Bputc(&out, '\n'); 279 /* can't emit leading spaces in filled troff docs */ 280 if (!inpre) 281 eatwhite(); 282 lastc = c; 283 break; 284 default: 285 ++pos; 286 if(!inpre && isascii(c) && isspace(c) && pos > 80){ 287 Bputc(&out, '\n'); 288 eatwhite(); 289 pos = 0; 290 }else 291 Bputc(&out, c); 292 lastc = c; 293 break; 294 } 295 } 296 } 297 298 void 299 escape(void) 300 { 301 int c; 302 Entity *e; 303 char buf[8]; 304 305 if(readupto(buf, sizeof(buf), ';', '\n') < 0){ 306 Bprint(&out, "&%s", buf); 307 return; 308 } 309 for(e = pl_entity; e->name; e++) 310 if(strcmp(buf, e->name) == 0){ 311 Bprint(&out, "%C", e->value); 312 return; 313 } 314 if(*buf == '#'){ 315 c = atoi(buf+1); 316 if(isascii(c) && isprint(c)){ 317 Bputc(&out, c); 318 return; 319 } 320 } 321 Bprint(&out, "&%s;", buf); 322 } 323 324 /* 325 * whitespace is not significant to HTML, but newlines 326 * and leading spaces are significant to troff. 327 */ 328 void 329 eatwhite(void) 330 { 331 int c; 332 333 for(;;){ 334 c = Bgetc(&in); 335 if(c < 0) 336 break; 337 if(!isspace(c)){ 338 Bungetc(&in); 339 break; 340 } 341 } 342 } 343 344 /* 345 * print at start of line 346 */ 347 void 348 printsol(char *fmt, ...) 349 { 350 va_list arg; 351 352 if(quoting){ 353 Bputc(&out, '"'); 354 quoting = 0; 355 } 356 if(lastc != '\n') 357 Bputc(&out, '\n'); 358 va_start(arg, fmt); 359 Bvprint(&out, fmt, arg); 360 va_end(arg); 361 lastc = '\n'; 362 } 363 364 void 365 g_ignore(Goobie *g, char *arg) 366 { 367 USED(g, arg); 368 } 369 370 void 371 g_unexpected(Goobie *g, char *arg) 372 { 373 USED(arg); 374 fprint(2, "unexpected %s ending\n", g->name); 375 } 376 377 void 378 g_title(Goobie *g, char *arg) 379 { 380 USED(arg); 381 printsol(".TL\n", g->name); 382 } 383 384 void 385 g_p(Goobie *g, char *arg) 386 { 387 USED(arg); 388 printsol(".LP\n", g->name); 389 } 390 391 void 392 g_h(Goobie *g, char *arg) 393 { 394 USED(arg); 395 printsol(".SH %c\n", g->name[1]); 396 } 397 398 void 399 g_list(Goobie *g, char *arg) 400 { 401 USED(arg); 402 403 if(lsp != SSIZE){ 404 switch(g->name[0]){ 405 case 'o': 406 liststack[lsp].type = Lordered; 407 liststack[lsp].ord = 0; 408 break; 409 default: 410 liststack[lsp].type = Lunordered; 411 break; 412 } 413 } 414 lsp++; 415 } 416 417 void 418 g_br(Goobie *g, char *arg) 419 { 420 USED(g, arg); 421 printsol(".br\n"); 422 } 423 424 void 425 g_li(Goobie *g, char *arg) 426 { 427 USED(g, arg); 428 if(lsp <= 0 || lsp > SSIZE){ 429 printsol(".IP \\(bu\n"); 430 return; 431 } 432 switch(liststack[lsp-1].type){ 433 case Lunordered: 434 printsol(".IP \\(bu\n"); 435 break; 436 case Lordered: 437 printsol(".IP %d\n", ++liststack[lsp-1].ord); 438 break; 439 } 440 } 441 442 void 443 g_listend(Goobie *g, char *arg) 444 { 445 USED(g, arg); 446 if(--lsp < 0) 447 lsp = 0; 448 printsol(".LP\n"); 449 } 450 451 void 452 g_display(Goobie *g, char *arg) 453 { 454 USED(g, arg); 455 printsol(".DS\n"); 456 } 457 458 void 459 g_pre(Goobie *g, char *arg) 460 { 461 USED(g, arg); 462 printsol(".DS L\n"); 463 inpre = 1; 464 } 465 466 void 467 g_displayend(Goobie *g, char *arg) 468 { 469 USED(g, arg); 470 printsol(".DE\n"); 471 inpre = 0; 472 } 473 474 void 475 g_fpush(Goobie *g, char *arg) 476 { 477 USED(arg); 478 if(fsp < SSIZE) 479 fontstack[fsp] = font; 480 fsp++; 481 switch(g->name[0]){ 482 case 'b': 483 font = "B"; 484 break; 485 case 'i': 486 font = "I"; 487 break; 488 case 'k': /* kbd */ 489 case 't': /* tt */ 490 font = "(CW"; 491 break; 492 } 493 Bprint(&out, "\\f%s", font); 494 } 495 496 void 497 g_fpop(Goobie *g, char *arg) 498 { 499 USED(g, arg); 500 fsp--; 501 if(fsp < SSIZE) 502 font = fontstack[fsp]; 503 else 504 font = "R"; 505 506 Bprint(&out, "\\f%s", font); 507 } 508 509 void 510 g_indent(Goobie *g, char *arg) 511 { 512 USED(g, arg); 513 printsol(".RS\n"); 514 } 515 516 void 517 g_exdent(Goobie *g, char *arg) 518 { 519 USED(g, arg); 520 printsol(".RE\n"); 521 } 522 523 void 524 g_dt(Goobie *g, char *arg) 525 { 526 USED(g, arg); 527 printsol(".IP \""); 528 quoting = 1; 529 } 530 531 void 532 g_hr(Goobie *g, char *arg) 533 { 534 USED(g, arg); 535 printsol(".br\n"); 536 printsol("\\l'5i'\n"); 537 } 538 539 540 /* 541 <table border> 542 <caption><font size="+1"><b>Cumulative Class Data</b></font></caption> 543 <tr><th rowspan=2>DOSE<br>mg/kg</th><th colspan=2>PARALYSIS</th><th colspan=2>DEATH</th> 544 </tr> 545 <tr><th width=80>Number</th><th width=80>Percent</th><th width=80>Number</th><th width=80>Percent</th> 546 </tr> 547 <tr align=center> 548 <td>0.1</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 549 </tr> 550 <tr align=center> 551 <td>0.2</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 552 </tr> 553 <tr align=center> 554 <td>0.3</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 555 </tr> 556 <tr align=center> 557 <td>0.4</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 558 </tr> 559 <tr align=center> 560 <td>0.5</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 561 </tr> 562 <tr align=center> 563 <td>0.6</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 564 </tr> 565 <tr align=center> 566 <td>0.7</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 567 </tr> 568 <tr align=center> 569 <td>0.8</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 570 </tr> 571 <tr align=center> 572 <td>0.8 oral</td><td><br></td> <td><br></td> <td><br></td> <td><br></td> 573 </tr> 574 </table> 575 */ 576 577 void 578 g_table(Goobie *g, char *arg) 579 { 580 USED(g, arg); 581 printsol(".TS\ncenter ;\n"); 582 } 583 584 void 585 g_tableend(Goobie *g, char *arg) 586 { 587 USED(g, arg); 588 printsol(".TE\n"); 589 } 590 591 void 592 g_caption(Goobie *g, char *arg) 593 { 594 USED(g, arg); 595 } 596 597 void 598 g_captionend(Goobie *g, char *arg) 599 { 600 USED(g, arg); 601 } 602