1 #pragma lib "libhtml.a" 2 #pragma src "/sys/src/libhtml" 3 4 /* UTILS */ 5 extern uchar* fromStr(Rune* buf, int n, int chset); 6 extern Rune* toStr(uchar* buf, int n, int chset); 7 8 /* Common LEX and BUILD enums */ 9 10 /* Media types */ 11 enum 12 { 13 ApplMsword, 14 ApplOctets, 15 ApplPdf, 16 ApplPostscript, 17 ApplRtf, 18 ApplFramemaker, 19 ApplMsexcel, 20 ApplMspowerpoint, 21 UnknownType, 22 Audio32kadpcm, 23 AudioBasic, 24 ImageCgm, 25 ImageG3fax, 26 ImageGif, 27 ImageIef, 28 ImageJpeg, 29 ImagePng, 30 ImageTiff, 31 ImageXBit, 32 ImageXBit2, 33 ImageXBitmulti, 34 ImageXXBitmap, 35 ModelVrml, 36 MultiDigest, 37 MultiMixed, 38 TextCss, 39 TextEnriched, 40 TextHtml, 41 TextJavascript, 42 TextPlain, 43 TextRichtext, 44 TextSgml, 45 TextTabSeparatedValues, 46 TextXml, 47 VideoMpeg, 48 VideoQuicktime, 49 NMEDIATYPES 50 }; 51 52 /* HTTP methods */ 53 enum 54 { 55 HGet, 56 HPost 57 }; 58 59 /* Charsets */ 60 enum 61 { 62 UnknownCharset, 63 US_Ascii, 64 ISO_8859_1, 65 UTF_8, 66 Unicode, 67 NCHARSETS 68 }; 69 70 /* Frame Target IDs */ 71 enum { 72 FTtop, 73 FTself, 74 FTparent, 75 FTblank 76 }; 77 78 /* LEX */ 79 typedef struct Token Token; 80 typedef struct Attr Attr; 81 82 #pragma incomplete Token 83 84 /* BUILD */ 85 86 typedef struct Item Item; 87 typedef struct Itext Itext; 88 typedef struct Irule Irule; 89 typedef struct Iimage Iimage; 90 typedef struct Iformfield Iformfield; 91 typedef struct Itable Itable; 92 typedef struct Ifloat Ifloat; 93 typedef struct Ispacer Ispacer; 94 typedef struct Genattr Genattr; 95 typedef struct SEvent SEvent; 96 typedef struct Formfield Formfield; 97 typedef struct Option Option; 98 typedef struct Form Form; 99 typedef struct Table Table; 100 typedef struct Tablecol Tablecol; 101 typedef struct Tablerow Tablerow; 102 typedef struct Tablecell Tablecell; 103 typedef struct Align Align; 104 typedef struct Dimen Dimen; 105 typedef struct Anchor Anchor; 106 typedef struct DestAnchor DestAnchor; 107 typedef struct Map Map; 108 typedef struct Area Area; 109 typedef struct Background Background; 110 typedef struct Kidinfo Kidinfo; 111 typedef struct Docinfo Docinfo; 112 typedef struct Stack Stack; 113 typedef struct Pstate Pstate; 114 typedef struct ItemSource ItemSource; 115 typedef struct Lay Lay; /* defined in Layout module */ 116 117 #pragma incomplete Lay 118 119 120 /* Alignment types */ 121 enum { 122 ALnone = 0, ALleft, ALcenter, ALright, ALjustify, 123 ALchar, ALtop, ALmiddle, ALbottom, ALbaseline, 124 }; 125 126 struct Align 127 { 128 uchar halign; /* one of ALnone, ALleft, etc. */ 129 uchar valign; /* one of ALnone, ALtop, etc. */ 130 }; 131 132 /* 133 * A Dimen holds a dimension specification, especially for those 134 * cases when a number can be followed by a % or a * to indicate 135 * percentage of total or relative weight. 136 * Dnone means no dimension was specified 137 */ 138 139 /* To fit in a word, use top bits to identify kind, rest for value */ 140 enum { 141 Dnone = 0, 142 Dpixels = (1<<29), 143 Dpercent = (2<<29), 144 Drelative = (3<<29), 145 Dkindmask = (3<<29), 146 Dspecmask = (~Dkindmask) 147 }; 148 149 struct Dimen 150 { 151 int kindspec; /* kind | spec */ 152 }; 153 154 /* 155 * Background is either an image or a color. 156 * If both are set, the image has precedence. 157 */ 158 struct Background 159 { 160 Rune* image; /* url */ 161 int color; 162 }; 163 164 165 /* 166 * There are about a half dozen Item variants. 167 * The all look like this at the start (using Plan 9 C's 168 * anonymous structure member mechanism), 169 * and then the tag field dictates what extra fields there are. 170 */ 171 struct Item 172 { 173 Item* next; /* successor in list of items */ 174 int width; /* width in pixels (0 for floating items) */ 175 int height; /* height in pixels */ 176 int ascent; /* ascent (from top to baseline) in pixels */ 177 int anchorid; /* if nonzero, which anchor we're in */ 178 int state; /* flags and values (see below) */ 179 Genattr*genattr; /* generic attributes and events */ 180 int tag; /* variant discriminator: Itexttag, etc. */ 181 }; 182 183 /* Item variant tags */ 184 enum { 185 Itexttag, 186 Iruletag, 187 Iimagetag, 188 Iformfieldtag, 189 Itabletag, 190 Ifloattag, 191 Ispacertag 192 }; 193 194 struct Itext 195 { 196 Item; /* (with tag ==Itexttag) */ 197 Rune* s; /* the characters */ 198 int fnt; /* style*NumSize+size (see font stuff, below) */ 199 int fg; /* Pixel (color) for text */ 200 uchar voff; /* Voffbias+vertical offset from baseline, in pixels (+ve == down) */ 201 uchar ul; /* ULnone, ULunder, or ULmid */ 202 }; 203 204 struct Irule 205 { 206 Item; /* (with tag ==Iruletag) */ 207 uchar align; /* alignment spec */ 208 uchar noshade; /* if true, don't shade */ 209 int size; /* size attr (rule height) */ 210 int color; /* color attr */ 211 Dimen wspec; /* width spec */ 212 }; 213 214 215 struct Iimage 216 { 217 Item; /* (with tag ==Iimagetag) */ 218 Rune* imsrc; /* image src url */ 219 int imwidth; /* spec width (actual, if no spec) */ 220 int imheight; /* spec height (actual, if no spec) */ 221 Rune* altrep; /* alternate representation, in absence of image */ 222 Map* map; /* if non-nil, client side map */ 223 int ctlid; /* if animated */ 224 uchar align; /* vertical alignment */ 225 uchar hspace; /* in pixels; buffer space on each side */ 226 uchar vspace; /* in pixels; buffer space on top and bottom */ 227 uchar border; /* in pixels: border width to draw around image */ 228 Iimage* nextimage; /* next in list of document's images */ 229 void* aux; 230 }; 231 232 233 struct Iformfield 234 { 235 Item; /* (with tag ==Iformfieldtag) */ 236 Formfield*formfield; 237 void* aux; 238 }; 239 240 241 struct Itable 242 { 243 Item; /* (with tag ==Itabletag) */ 244 Table* table; 245 }; 246 247 248 struct Ifloat 249 { 250 Item; /* (with tag ==Ifloattag) */ 251 Item* item; /* table or image item that floats */ 252 int x; /* x coord of top (from right, if ALright) */ 253 int y; /* y coord of top */ 254 uchar side; /* margin it floats to: ALleft or ALright */ 255 uchar infloats; /* true if this has been added to a lay.floats */ 256 Ifloat* nextfloat; /* in list of floats */ 257 }; 258 259 260 struct Ispacer 261 { 262 Item; /* (with tag ==Ispacertag) */ 263 int spkind; /* ISPnull, etc. */ 264 }; 265 266 /* Item state flags and value fields */ 267 enum { 268 IFbrk = 0x80000000, /* forced break before this item */ 269 IFbrksp = 0x40000000, /* add 1 line space to break (IFbrk set too) */ 270 IFnobrk = 0x20000000, /* break not allowed before this item */ 271 IFcleft = 0x10000000, /* clear left floats (IFbrk set too) */ 272 IFcright= 0x08000000, /* clear right floats (IFbrk set too) */ 273 IFwrap = 0x04000000, /* in a wrapping (non-pre) line */ 274 IFhang = 0x02000000, /* in a hanging (into left indent) item */ 275 IFrjust = 0x01000000, /* right justify current line */ 276 IFcjust = 0x00800000, /* center justify current line */ 277 IFsmap = 0x00400000, /* image is server-side map */ 278 IFindentshift = 8, 279 IFindentmask = (255<<IFindentshift), /* current indent, in tab stops */ 280 IFhangmask = 255 /* current hang into left indent, in 1/10th tabstops */ 281 }; 282 283 /* Bias added to Itext's voff field */ 284 enum { Voffbias = 128 }; 285 286 /* Spacer kinds */ 287 enum { 288 ISPnull, /* 0 height and width */ 289 ISPvline, /* height and ascent of current font */ 290 ISPhspace, /* width of space in current font */ 291 ISPgeneral /* other purposes (e.g., between markers and list) */ 292 }; 293 294 /* Generic attributes and events (not many elements will have any of these set) */ 295 struct Genattr 296 { 297 Rune* id; 298 Rune* class; 299 Rune* style; 300 Rune* title; 301 SEvent* events; 302 }; 303 304 struct SEvent 305 { 306 SEvent* next; /* in list of events */ 307 int type; /* SEonblur, etc. */ 308 Rune* script; 309 }; 310 311 enum { 312 SEonblur, SEonchange, SEonclick, SEondblclick, 313 SEonfocus, SEonkeypress, SEonkeyup, SEonload, 314 SEonmousedown, SEonmousemove, SEonmouseout, 315 SEonmouseover, SEonmouseup, SEonreset, SEonselect, 316 SEonsubmit, SEonunload, 317 Numscriptev 318 }; 319 320 /* Form field types */ 321 enum { 322 Ftext, 323 Fpassword, 324 Fcheckbox, 325 Fradio, 326 Fsubmit, 327 Fhidden, 328 Fimage, 329 Freset, 330 Ffile, 331 Fbutton, 332 Fselect, 333 Ftextarea 334 }; 335 336 /* Information about a field in a form */ 337 struct Formfield 338 { 339 Formfield*next; /* in list of fields for a form */ 340 int ftype; /* Ftext, Fpassword, etc. */ 341 int fieldid; /* serial no. of field within its form */ 342 Form* form; /* containing form */ 343 Rune* name; /* name attr */ 344 Rune* value; /* value attr */ 345 int size; /* size attr */ 346 int maxlength; /* maxlength attr */ 347 int rows; /* rows attr */ 348 int cols; /* cols attr */ 349 uchar flags; /* FFchecked, etc. */ 350 Option* options; /* for Fselect fields */ 351 Item* image; /* image item, for Fimage fields */ 352 int ctlid; /* identifies control for this field in layout */ 353 SEvent* events; /* same as genattr->events of containing item */ 354 }; 355 356 enum { 357 FFchecked = (1<<7), 358 FFmultiple = (1<<6) 359 }; 360 361 /* Option holds info about an option in a "select" form field */ 362 struct Option 363 { 364 Option* next; /* next in list of options for a field */ 365 int selected; /* true if selected initially */ 366 Rune* value; /* value attr */ 367 Rune* display; /* display string */ 368 }; 369 370 /* Form holds info about a form */ 371 struct Form 372 { 373 Form* next; /* in list of forms for document */ 374 int formid; /* serial no. of form within its doc */ 375 Rune* name; /* name or id attr (netscape uses name, HTML 4.0 uses id) */ 376 Rune* action; /* action attr */ 377 int target; /* target attr as targetid */ 378 int method; /* HGet or HPost */ 379 int nfields; /* number of fields */ 380 Formfield*fields; /* field's forms, in input order */ 381 }; 382 383 /* Flags used in various table structures */ 384 enum { 385 TFparsing = (1<<7), 386 TFnowrap = (1<<6), 387 TFisth = (1<<5) 388 }; 389 390 391 /* Information about a table */ 392 struct Table 393 { 394 Table* next; /* next in list of document's tables */ 395 int tableid; /* serial no. of table within its doc */ 396 Tablerow*rows; /* array of row specs (list during parsing) */ 397 int nrow; /* total number of rows */ 398 Tablecol*cols; /* array of column specs */ 399 int ncol; /* total number of columns */ 400 Tablecell*cells; /* list of unique cells */ 401 int ncell; /* total number of cells */ 402 Tablecell***grid; /* 2-D array of cells */ 403 Align align; /* alignment spec for whole table */ 404 Dimen width; /* width spec for whole table */ 405 int border; /* border attr */ 406 int cellspacing; /* cellspacing attr */ 407 int cellpadding; /* cellpadding attr */ 408 Background background; /* table background */ 409 Item* caption; /* linked list of Items, giving caption */ 410 uchar caption_place; /* ALtop or ALbottom */ 411 Lay* caption_lay; /* layout of caption */ 412 int totw; /* total width */ 413 int toth; /* total height */ 414 int caph; /* caption height */ 415 int availw; /* used for previous 3 sizes */ 416 Token* tabletok; /* token that started the table */ 417 uchar flags; /* Lchanged, perhaps */ 418 }; 419 420 421 struct Tablecol 422 { 423 int width; 424 Align align; 425 Point pos; 426 }; 427 428 429 struct Tablerow 430 { 431 Tablerow*next; /* Next in list of rows, during parsing */ 432 Tablecell*cells; /* Cells in row, linked through nextinrow */ 433 int height; 434 int ascent; 435 Align align; 436 Background background; 437 Point pos; 438 uchar flags; /* 0 or TFparsing */ 439 }; 440 441 /* 442 * A Tablecell is one cell of a table. 443 * It may span multiple rows and multiple columns. 444 * Cells are linked on two lists: the list for all the cells of 445 * a document (the next pointers), and the list of all the 446 * cells that start in a given row (the nextinrow pointers) 447 */ 448 struct Tablecell 449 { 450 Tablecell*next; /* next in list of table's cells */ 451 Tablecell*nextinrow; /* next in list of row's cells */ 452 int cellid; /* serial no. of cell within table */ 453 Item* content; /* contents before layout */ 454 Lay* lay; /* layout of cell */ 455 int rowspan; /* number of rows spanned by this cell */ 456 int colspan; /* number of cols spanned by this cell */ 457 Align align; /* alignment spec */ 458 uchar flags; /* TFparsing, TFnowrap, TFisth */ 459 Dimen wspec; /* suggested width */ 460 int hspec; /* suggested height */ 461 Background background; /* cell background */ 462 int minw; /* minimum possible width */ 463 int maxw; /* maximum width */ 464 int ascent; /* cell's ascent */ 465 int row; /* row of upper left corner */ 466 int col; /* col of upper left corner */ 467 Point pos; /* nw corner of cell contents, in cell */ 468 }; 469 470 /* Anchor is for info about hyperlinks that go somewhere */ 471 struct Anchor 472 { 473 Anchor* next; /* next in list of document's anchors */ 474 int index; /* serial no. of anchor within its doc */ 475 Rune* name; /* name attr */ 476 Rune* href; /* href attr */ 477 int target; /* target attr as targetid */ 478 }; 479 480 481 /* DestAnchor is for info about hyperlinks that are destinations */ 482 struct DestAnchor 483 { 484 DestAnchor*next; /* next in list of document's destanchors */ 485 int index; /* serial no. of anchor within its doc */ 486 Rune* name; /* name attr */ 487 Item* item; /* the destination */ 488 }; 489 490 491 /* Maps (client side) */ 492 struct Map 493 { 494 Map* next; /* next in list of document's maps */ 495 Rune* name; /* map name */ 496 Area* areas; /* list of map areas */ 497 }; 498 499 500 struct Area 501 { 502 Area* next; /* next in list of a map's areas */ 503 int shape; /* SHrect, etc. */ 504 Rune* href; /* associated hypertext link */ 505 int target; /* associated target frame */ 506 Dimen* coords; /* array of coords for shape */ 507 int ncoords; /* size of coords array */ 508 }; 509 510 /* Area shapes */ 511 enum { 512 SHrect, SHcircle, SHpoly 513 }; 514 515 /* Fonts are represented by integers: style*NumSize + size */ 516 517 /* Font styles */ 518 enum { 519 FntR, /* roman */ 520 FntI, /* italic */ 521 FntB, /* bold */ 522 FntT, /* typewriter */ 523 NumStyle 524 }; 525 526 /* Font sizes */ 527 enum { 528 Tiny, 529 Small, 530 Normal, 531 Large, 532 Verylarge, 533 NumSize 534 }; 535 536 enum { 537 NumFnt = NumStyle*NumSize, 538 DefFnt = FntR*NumSize+Normal, 539 }; 540 541 /* Lines are needed through some text items, for underlining or strikethrough */ 542 enum { 543 ULnone, ULunder, ULmid 544 }; 545 546 /* Kidinfo flags */ 547 enum { 548 FRnoresize = (1<<0), 549 FRnoscroll = (1<<1), 550 FRhscroll = (1<<2), 551 FRvscroll = (1<<3), 552 FRhscrollauto = (1<<4), 553 FRvscrollauto = (1<<5) 554 }; 555 556 /* Information about child frame or frameset */ 557 struct Kidinfo 558 { 559 Kidinfo*next; /* in list of kidinfos for a frameset */ 560 int isframeset; 561 562 /* fields for "frame" */ 563 Rune* src; /* only nil if a "dummy" frame or this is frameset */ 564 Rune* name; /* always non-empty if this isn't frameset */ 565 int marginw; 566 int marginh; 567 int framebd; 568 int flags; 569 570 /* fields for "frameset" */ 571 Dimen* rows; /* array of row dimensions */ 572 int nrows; /* length of rows */ 573 Dimen* cols; /* array of col dimensions */ 574 int ncols; /* length of cols */ 575 Kidinfo*kidinfos; 576 Kidinfo*nextframeset; /* parsing stack */ 577 }; 578 579 580 /* Document info (global information about HTML page) */ 581 struct Docinfo 582 { 583 /* stuff from HTTP headers, doc head, and body tag */ 584 Rune* src; /* original source of doc */ 585 Rune* base; /* base URL of doc */ 586 Rune* doctitle; /* from <title> element */ 587 Background background; /* background specification */ 588 Iimage* backgrounditem; /* Image Item for doc background image, or nil */ 589 int text; /* doc foreground (text) color */ 590 int link; /* unvisited hyperlink color */ 591 int vlink; /* visited hyperlink color */ 592 int alink; /* highlighting hyperlink color */ 593 int target; /* target frame default */ 594 int chset; /* ISO_8859, etc. */ 595 int mediatype; /* TextHtml, etc. */ 596 int scripttype; /* TextJavascript, etc. */ 597 int hasscripts; /* true if scripts used */ 598 Rune* refresh; /* content of <http-equiv=Refresh ...> */ 599 Kidinfo*kidinfo; /* if a frameset */ 600 int frameid; /* id of document frame */ 601 602 /* info needed to respond to user actions */ 603 Anchor* anchors; /* list of href anchors */ 604 DestAnchor*dests; /* list of destination anchors */ 605 Form* forms; /* list of forms */ 606 Table* tables; /* list of tables */ 607 Map* maps; /* list of maps */ 608 Iimage* images; /* list of image items (through nextimage links) */ 609 }; 610 611 extern int dimenkind(Dimen d); 612 extern int dimenspec(Dimen d); 613 extern void freedocinfo(Docinfo* d); 614 extern void freeitems(Item* ithead); 615 extern Item* parsehtml(uchar* data, int datalen, Rune* src, int mtype, int chset, Docinfo** pdi); 616 extern void printitems(Item* items, char* msg); 617 extern int targetid(Rune* s); 618 extern Rune* targetname(int targid); 619 extern int validitems(Item* i); 620 621 #pragma varargck type "I" Item* 622 623 /* Control print output */ 624 extern int warn; 625 extern int dbglex; 626 extern int dbgbuild; 627 628 /* 629 * To be provided by caller 630 * emalloc and erealloc should not return if can't get memory. 631 * emalloc should zero its memory. 632 */ 633 extern void* emalloc(ulong); 634 extern void* erealloc(void* p, ulong size); 635