1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 5 enum{ 6 Nfont = 11, 7 Wid = 20, /* tmac.anhtml sets page width to 20" so we can recognize .nf text */ 8 }; 9 10 typedef uintptr Char; 11 typedef struct Troffchar Troffchar; 12 typedef struct Htmlchar Htmlchar; 13 typedef struct Font Font; 14 typedef struct HTMLfont HTMLfont; 15 16 /* 17 * a Char is >= 32 bits. low 16 bits are the rune. higher are attributes. 18 * must be able to hold a pointer. 19 */ 20 enum 21 { 22 Italic = 16, 23 Bold, 24 CW, 25 Indent1, 26 Indent2, 27 Indent3, 28 Heading = 25, 29 Anchor = 26, /* must be last */ 30 }; 31 32 enum /* magic emissions */ 33 { 34 Estring = 0, 35 Epp = 1<<16, 36 }; 37 38 int attrorder[] = { Indent1, Indent2, Indent3, Heading, Anchor, Italic, Bold, CW }; 39 40 int nest[10]; 41 int nnest; 42 43 struct Troffchar 44 { 45 char *name; 46 char *value; 47 }; 48 49 struct Htmlchar 50 { 51 char *utf; 52 char *name; 53 int value; 54 }; 55 56 #include "chars.h" 57 58 struct Font{ 59 char *name; 60 HTMLfont *htmlfont; 61 }; 62 63 struct HTMLfont{ 64 char *name; 65 char *htmlname; 66 int bit; 67 }; 68 69 /* R must be first; it's the default representation for fonts we don't recognize */ 70 HTMLfont htmlfonts[] = 71 { 72 "R", nil, 0, 73 "LucidaSans", nil, 0, 74 "I", "i", Italic, 75 "LucidaSansI", "i", Italic, 76 "CW", "tt", CW, 77 "LucidaCW", "tt", CW, 78 nil, nil, 79 }; 80 81 #define TABLE "<table border=0 cellpadding=0 cellspacing=0>" 82 83 char* 84 onattr[8*sizeof(int)] = 85 { 86 0, 0, 0, 0, 0, 0, 0, 0, 87 0, 0, 0, 0, 0, 0, 0, 0, 88 "<i>", /* italic */ 89 "<b>", /* bold */ 90 "<tt><font size=+1>", /* cw */ 91 "<+table border=0 cellpadding=0 cellspacing=0><tr height=2><td><tr><td width=20><td>\n", /* indent1 */ 92 "<+table border=0 cellpadding=0 cellspacing=0><tr height=2><td><tr><td width=20><td>\n", /* indent2 */ 93 "<+table border=0 cellpadding=0 cellspacing=0><tr height=2><td><tr><td width=20><td>\n", /* indent3 */ 94 0, 95 0, 96 0, 97 "<p><font size=+1><b>", /* heading 25 */ 98 "<unused>", /* anchor 26 */ 99 }; 100 101 char* 102 offattr[8*sizeof(int)] = 103 { 104 0, 0, 0, 0, 0, 0, 0, 0, 105 0, 0, 0, 0, 0, 0, 0, 0, 106 "</i>", /* italic */ 107 "</b>", /* bold */ 108 "</font></tt>", /* cw */ 109 "<-/table>", /* indent1 */ 110 "<-/table>", /* indent2 */ 111 "<-/table>", /* indent3 */ 112 0, 113 0, 114 0, 115 "</b></font>", /* heading 25 */ 116 "</a>", /* anchor 26 */ 117 }; 118 119 Font *font[Nfont]; 120 121 Biobuf bout; 122 int debug = 0; 123 124 /* troff state */ 125 int page = 1; 126 int ft = 1; 127 int vp = 0; 128 int hp = 0; 129 int ps = 1; 130 int res = 720; 131 132 int didP = 0; 133 int atnewline = 1; 134 int prevlineH = 0; 135 Char attr = 0; /* or'ed into each Char */ 136 137 Char *chars; 138 int nchars; 139 int nalloc; 140 char** anchors; /* allocated in order */ 141 int nanchors; 142 143 char *filename; 144 int cno; 145 char buf[8192]; 146 char *title = "Plan 9 man page"; 147 148 void process(Biobuf*, char*); 149 void mountfont(int, char*); 150 void switchfont(int); 151 void header(char*); 152 void flush(void); 153 void trailer(void); 154 155 void* 156 emalloc(ulong n) 157 { 158 void *p; 159 160 p = malloc(n); 161 if(p == nil) 162 sysfatal("malloc failed: %r"); 163 return p; 164 } 165 166 void* 167 erealloc(void *p, ulong n) 168 { 169 170 p = realloc(p, n); 171 if(p == nil) 172 sysfatal("realloc failed: %r"); 173 return p; 174 } 175 176 char* 177 estrdup(char *s) 178 { 179 char *t; 180 181 t = strdup(s); 182 if(t == nil) 183 sysfatal("strdup failed: %r"); 184 return t; 185 } 186 187 void 188 usage(void) 189 { 190 fprint(2, "usage: troff2html [-d] [-t title] [file ...]\n"); 191 exits("usage"); 192 } 193 194 int 195 hccmp(const void *va, const void *vb) 196 { 197 Htmlchar *a, *b; 198 199 a = (Htmlchar*)va; 200 b = (Htmlchar*)vb; 201 return a->value - b->value; 202 } 203 204 void 205 main(int argc, char *argv[]) 206 { 207 int i; 208 Biobuf in, *inp; 209 Rune r; 210 211 for(i=0; i<nelem(htmlchars); i++){ 212 chartorune(&r, htmlchars[i].utf); 213 htmlchars[i].value = r; 214 } 215 qsort(htmlchars, nelem(htmlchars), sizeof(htmlchars[0]), hccmp); 216 217 ARGBEGIN{ 218 case 't': 219 title = ARGF(); 220 if(title == nil) 221 usage(); 222 break; 223 case 'd': 224 debug++; 225 break; 226 default: 227 usage(); 228 }ARGEND 229 230 Binit(&bout, 1, OWRITE); 231 if(argc == 0){ 232 header(title); 233 Binit(&in, 0, OREAD); 234 process(&in, "<stdin>"); 235 }else{ 236 header(title); 237 for(i=0; i<argc; i++){ 238 inp = Bopen(argv[i], OREAD); 239 if(inp == nil) 240 sysfatal("can't open %s: %r", argv[i]); 241 process(inp, argv[i]); 242 Bterm(inp); 243 } 244 } 245 flush(); 246 trailer(); 247 exits(nil); 248 } 249 250 void 251 emitchar(Char c) 252 { 253 if(nalloc == nchars){ 254 nalloc += 10000; 255 chars = realloc(chars, nalloc*sizeof(chars[0])); 256 if(chars == nil) 257 sysfatal("malloc failed: %r"); 258 } 259 chars[nchars++] = c; 260 } 261 262 void 263 emit(Rune r) 264 { 265 emitchar(r | attr); 266 /* 267 * Close man page references early, so that 268 * .IR proof (1), 269 * doesn't make the comma part of the link. 270 */ 271 if(r == ')') 272 attr &= ~(1<<Anchor); 273 } 274 275 void 276 emitstr(char *s) 277 { 278 emitchar(Estring); 279 emitchar((Char)s); 280 } 281 282 int indentlevel; 283 int linelen; 284 285 void 286 iputrune(Biobuf *b, Rune r) 287 { 288 int i; 289 290 if(linelen++ > 60 && r == ' ') 291 r = '\n'; 292 Bputrune(b, r); 293 if(r == '\n'){ 294 for(i=0; i<indentlevel; i++) 295 Bprint(b, " "); 296 linelen = 0; 297 } 298 } 299 300 void 301 iputs(Biobuf *b, char *s) 302 { 303 if(s[0]=='<' && s[1]=='+'){ 304 iputrune(b, '\n'); 305 Bprint(b, "<%s", s+2); 306 indentlevel++; 307 iputrune(b, '\n'); 308 }else if(s[0]=='<' && s[1]=='-'){ 309 indentlevel--; 310 iputrune(b, '\n'); 311 Bprint(b, "<%s", s+2); 312 iputrune(b, '\n'); 313 }else 314 Bprint(b, "%s", s); 315 } 316 317 void 318 setattr(Char a) 319 { 320 Char on, off; 321 int i, j; 322 323 on = a & ~attr; 324 off = attr & ~a; 325 326 /* walk up the nest stack until we reach something we need to turn off. */ 327 for(i=0; i<nnest; i++) 328 if(off&(1<<nest[i])) 329 break; 330 331 /* turn off everything above that */ 332 for(j=nnest-1; j>=i; j--) 333 iputs(&bout, offattr[nest[j]]); 334 335 /* turn on everything we just turned off but didn't want to */ 336 for(j=i; j<nnest; j++) 337 if(a&(1<<nest[j])) 338 iputs(&bout, onattr[nest[j]]); 339 else 340 nest[j] = 0; 341 342 /* shift the zeros (turned off things) up */ 343 for(i=j=0; i<nnest; i++) 344 if(nest[i] != 0) 345 nest[j++] = nest[i]; 346 nnest = j; 347 348 /* now turn on the new attributes */ 349 for(i=0; i<nelem(attrorder); i++){ 350 j = attrorder[i]; 351 if(on&(1<<j)){ 352 if(j == Anchor) 353 onattr[j] = anchors[nanchors++]; 354 iputs(&bout, onattr[j]); 355 if(nnest >= nelem(nest)) 356 sysfatal("nesting too deep"); 357 nest[nnest++] = j; 358 } 359 } 360 attr = a; 361 } 362 363 void 364 flush(void) 365 { 366 int i; 367 Char c, a; 368 369 nanchors = 0; 370 for(i=0; i<nchars; i++){ 371 c = chars[i]; 372 if(c == Estring){ 373 /* next word is string to print */ 374 iputs(&bout, (char*)chars[++i]); 375 continue; 376 } 377 if(c == Epp){ 378 iputrune(&bout, '\n'); 379 iputs(&bout, TABLE "<tr height=5><td></table>"); 380 iputrune(&bout, '\n'); 381 continue; 382 } 383 a = c & ~0xFFFF; 384 c &= 0xFFFF; 385 /* 386 * If we're going to something off after a space, 387 * let's just turn it off before. 388 */ 389 if(c == ' ' && i<nchars-1 && (chars[i+1]&0xFFFF) >= 32) 390 a ^= a & ~chars[i+1]; 391 setattr(a); 392 iputrune(&bout, c & 0xFFFF); 393 } 394 } 395 396 void 397 header(char *s) 398 { 399 Bprint(&bout, "<head>\n"); 400 Bprint(&bout, "<title>%s</title>\n", s); 401 Bprint(&bout, "<meta content=\"text/html; charset=utf-8\" http-equiv=Content-Type>\n"); 402 Bprint(&bout, "</head>\n"); 403 Bprint(&bout, "<body bgcolor=#ffffff>\n"); 404 } 405 406 void 407 trailer(void) 408 { 409 410 #ifdef LUCENT 411 Tm *t; 412 t = localtime(time(nil)); 413 Bprint(&bout, TABLE "<tr height=20><td></table>\n"); 414 Bprint(&bout, "<font size=-1><a href=\"http://www.lucent.com/copyright.html\">\n"); 415 Bprint(&bout, "Copyright</A> © %d Alcatel-Lucent. All rights reserved.</font>\n", t->year+1900); 416 #endif 417 Bprint(&bout, "</body></html>\n"); 418 } 419 420 int 421 getc(Biobuf *b) 422 { 423 cno++; 424 return Bgetrune(b); 425 } 426 427 void 428 ungetc(Biobuf *b) 429 { 430 cno--; 431 Bungetrune(b); 432 } 433 434 char* 435 getline(Biobuf *b) 436 { 437 int i, c; 438 439 for(i=0; i<sizeof buf; i++){ 440 c = getc(b); 441 if(c == Beof) 442 return nil; 443 buf[i] = c; 444 if(c == '\n'){ 445 buf[i] = '\0'; 446 break; 447 } 448 } 449 return buf; 450 } 451 452 int 453 getnum(Biobuf *b) 454 { 455 int i, c; 456 457 i = 0; 458 for(;;){ 459 c = getc(b); 460 if(c<'0' || '9'<c){ 461 ungetc(b); 462 break; 463 } 464 i = i*10 + (c-'0'); 465 } 466 return i; 467 } 468 469 char* 470 getstr(Biobuf *b) 471 { 472 int i, c; 473 474 for(i=0; i<sizeof buf; i++){ 475 /* must get bytes not runes */ 476 cno++; 477 c = Bgetc(b); 478 if(c == Beof) 479 return nil; 480 buf[i] = c; 481 if(c == '\n' || c==' ' || c=='\t'){ 482 ungetc(b); 483 buf[i] = '\0'; 484 break; 485 } 486 } 487 return buf; 488 } 489 490 int 491 setnum(Biobuf *b, char *name, int min, int max) 492 { 493 int i; 494 495 i = getnum(b); 496 if(debug > 2) 497 fprint(2, "set %s = %d\n", name, i); 498 if(min<=i && i<max) 499 return i; 500 sysfatal("value of %s is %d; min %d max %d at %s:#%d", name, i, min, max, filename, cno); 501 return i; 502 } 503 504 void 505 xcmd(Biobuf *b) 506 { 507 char *p, *fld[16], buf[1024]; 508 509 int i, nfld; 510 511 p = getline(b); 512 if(p == nil) 513 sysfatal("xcmd error: %r"); 514 if(debug) 515 fprint(2, "x command '%s'\n", p); 516 nfld = tokenize(p, fld, nelem(fld)); 517 if(nfld == 0) 518 return; 519 switch(fld[0][0]){ 520 case 'f': 521 /* mount font */ 522 if(nfld != 3) 523 break; 524 i = atoi(fld[1]); 525 if(i<0 || Nfont<=i) 526 sysfatal("font %d out of range at %s:#%d", i, filename, cno); 527 mountfont(i, fld[2]); 528 return; 529 case 'i': 530 /* init */ 531 return; 532 case 'r': 533 if(nfld<2 || atoi(fld[1])!=res) 534 sysfatal("typesetter has unexpected resolution %s", fld[1]? fld[1] : "<unspecified>"); 535 return; 536 case 's': 537 /* stop */ 538 return; 539 case 't': 540 /* trailer */ 541 return; 542 case 'T': 543 if(nfld!=2 || strcmp(fld[1], "utf")!=0) 544 sysfatal("output for unknown typesetter type %s", fld[1]); 545 return; 546 case 'X': 547 if(nfld<3 || strcmp(fld[1], "html")!=0) 548 break; 549 /* is it a man reference of the form cp(1)? */ 550 /* X manref start/end cp (1) */ 551 if(nfld==6 && strcmp(fld[2], "manref")==0){ 552 /* was the right macro; is it the right form? */ 553 if(strlen(fld[5])>=3 && 554 fld[5][0]=='(' && fld[5][2]==')' && 555 '0'<=fld[5][1] && fld[5][1]<='9'){ 556 if(strcmp(fld[3], "start") == 0){ 557 /* set anchor attribute and remember string */ 558 attr |= (1<<Anchor); 559 snprint(buf, sizeof buf, 560 "<a href=\"/magic/man2html/%c/%s\">", 561 fld[5][1], fld[4]); 562 nanchors++; 563 anchors = erealloc(anchors, nanchors*sizeof(char*)); 564 anchors[nanchors-1] = estrdup(buf); 565 }else if(strcmp(fld[3], "end") == 0) 566 attr &= ~(1<<Anchor); 567 } 568 }else if(strcmp(fld[2], "manPP") == 0){ 569 didP = 1; 570 emitchar(Epp); 571 }else if(nfld<4 || strcmp(fld[2], "manref")!=0){ 572 if(nfld>2 && strcmp(fld[2], "<P>")==0){ /* avoid triggering extra <br> */ 573 didP = 1; 574 /* clear all font attributes before paragraph */ 575 emitchar(' ' | (attr & ~(0xFFFF|((1<<Italic)|(1<<Bold)|(1<<CW))))); 576 emitstr("<P>"); 577 /* next emittec char will turn font attributes back on */ 578 }else if(nfld>2 && strcmp(fld[2], "<H4>")==0) 579 attr |= (1<<Heading); 580 else if(nfld>2 && strcmp(fld[2], "</H4>")==0) 581 attr &= ~(1<<Heading); 582 else if(debug) 583 fprint(2, "unknown in-line html %s... at %s:%#d\n", 584 fld[2], filename, cno); 585 } 586 return; 587 } 588 if(debug) 589 fprint(2, "unknown or badly formatted x command %s\n", fld[0]); 590 } 591 592 int 593 lookup(int c, Htmlchar tab[], int ntab) 594 { 595 int low, high, mid; 596 597 low = 0; 598 high = ntab - 1; 599 while(low <= high){ 600 mid = (low+high)/2; 601 if(c < tab[mid].value) 602 high = mid - 1; 603 else if(c > tab[mid].value) 604 low = mid + 1; 605 else 606 return mid; 607 } 608 return -1; /* no match */ 609 } 610 611 void 612 emithtmlchar(int r) 613 { 614 static char buf[10]; 615 int i; 616 617 i = lookup(r, htmlchars, nelem(htmlchars)); 618 if(i >= 0) 619 emitstr(htmlchars[i].name); 620 else 621 emit(r); 622 } 623 624 char* 625 troffchar(char *s) 626 { 627 int i; 628 629 for(i=0; troffchars[i].name!=nil; i++) 630 if(strcmp(s, troffchars[i].name) == 0) 631 return troffchars[i].value; 632 return "??"; 633 } 634 635 void 636 indent(void) 637 { 638 int nind; 639 640 didP = 0; 641 if(atnewline){ 642 if(hp != prevlineH){ 643 prevlineH = hp; 644 /* these most peculiar numbers appear in the troff -man output */ 645 nind = ((prevlineH-1*res)+323)/324; 646 attr &= ~((1<<Indent1)|(1<<Indent2)|(1<<Indent3)); 647 if(nind >= 1) 648 attr |= (1<<Indent1); 649 if(nind >= 2) 650 attr |= (1<<Indent2); 651 if(nind >= 3) 652 attr |= (1<<Indent3); 653 } 654 atnewline = 0; 655 } 656 } 657 658 void 659 process(Biobuf *b, char *name) 660 { 661 int c, r, v, i; 662 char *p; 663 664 cno = 0; 665 prevlineH = res; 666 filename = name; 667 for(;;){ 668 c = getc(b); 669 switch(c){ 670 case Beof: 671 /* go to ground state */ 672 attr = 0; 673 emit('\n'); 674 return; 675 case '\n': 676 break; 677 case '0': case '1': case '2': case '3': case '4': 678 case '5': case '6': case '7': case '8': case '9': 679 v = c-'0'; 680 c = getc(b); 681 if(c<'0' || '9'<c) 682 sysfatal("illegal character motion at %s:#%d", filename, cno); 683 v = v*10 + (c-'0'); 684 hp += v; 685 /* fall through to character case */ 686 case 'c': 687 indent(); 688 r = getc(b); 689 emithtmlchar(r); 690 break; 691 case 'D': 692 /* draw line; ignore */ 693 do 694 c = getc(b); 695 while(c!='\n' && c!= Beof); 696 break; 697 case 'f': 698 v = setnum(b, "font", 0, Nfont); 699 switchfont(v); 700 break; 701 case 'h': 702 v = setnum(b, "hpos", -20000, 20000); 703 /* generate spaces if motion is large and within a line */ 704 if(!atnewline && v>2*72) 705 for(i=0; i<v; i+=72) 706 emitstr(" "); 707 hp += v; 708 break; 709 case 'n': 710 setnum(b, "n1", -10000, 10000); 711 //Bprint(&bout, " N1=%d", v); 712 getc(b); /* space separates */ 713 setnum(b, "n2", -10000, 10000); 714 atnewline = 1; 715 if(!didP && hp < (Wid-1)*res) /* if line is less than 19" long, probably need a line break */ 716 emitstr("<br>"); 717 emit('\n'); 718 break; 719 case 'p': 720 page = setnum(b, "ps", -10000, 10000); 721 break; 722 case 's': 723 ps = setnum(b, "ps", 1, 1000); 724 break; 725 case 'v': 726 vp += setnum(b, "vpos", -10000, 10000); 727 /* BUG: ignore motion */ 728 break; 729 case 'x': 730 xcmd(b); 731 break; 732 case 'w': 733 emit(' '); 734 break; 735 case 'C': 736 indent(); 737 p = getstr(b); 738 emitstr(troffchar(p)); 739 break; 740 case 'H': 741 hp = setnum(b, "hpos", 0, 20000); 742 //Bprint(&bout, " H=%d ", hp); 743 break; 744 case 'V': 745 vp = setnum(b, "vpos", 0, 10000); 746 break; 747 default: 748 fprint(2, "dhtml: unknown directive %c(0x%.2ux) at %s:#%d\n", c, c, filename, cno); 749 return; 750 } 751 } 752 } 753 754 HTMLfont* 755 htmlfont(char *name) 756 { 757 int i; 758 759 for(i=0; htmlfonts[i].name!=nil; i++) 760 if(strcmp(name, htmlfonts[i].name) == 0) 761 return &htmlfonts[i]; 762 return &htmlfonts[0]; 763 } 764 765 void 766 mountfont(int pos, char *name) 767 { 768 if(debug) 769 fprint(2, "mount font %s on %d\n", name, pos); 770 if(font[pos] != nil){ 771 free(font[pos]->name); 772 free(font[pos]); 773 } 774 font[pos] = emalloc(sizeof(Font)); 775 font[pos]->name = estrdup(name); 776 font[pos]->htmlfont = htmlfont(name); 777 } 778 779 void 780 switchfont(int pos) 781 { 782 HTMLfont *hf; 783 784 if(debug) 785 fprint(2, "font change from %d (%s) to %d (%s)\n", ft, font[ft]->name, pos, font[pos]->name); 786 if(pos == ft) 787 return; 788 hf = font[ft]->htmlfont; 789 if(hf->bit != 0) 790 attr &= ~(1<<hf->bit); 791 ft = pos; 792 hf = font[ft]->htmlfont; 793 if(hf->bit != 0) 794 attr |= (1<<hf->bit); 795 } 796