1 #pragma lib "libhtml.a" 2 #pragma src "/sys/src/libhtml" 3 4 // UTILS 5 extern uchar* fromStr(Rune* buf, int n, int chset); 6 extern Rune* toStr(uchar* buf, int n, int chset); 7 8 // Common LEX and BUILD enums 9 10 // Media types 11 enum 12 { 13 ApplMsword, 14 ApplOctets, 15 ApplPdf, 16 ApplPostscript, 17 ApplRtf, 18 ApplFramemaker, 19 ApplMsexcel, 20 ApplMspowerpoint, 21 UnknownType, 22 Audio32kadpcm, 23 AudioBasic, 24 ImageCgm, 25 ImageG3fax, 26 ImageGif, 27 ImageIef, 28 ImageJpeg, 29 ImagePng, 30 ImageTiff, 31 ImageXBit, 32 ImageXBit2, 33 ImageXBitmulti, 34 ImageXXBitmap, 35 ModelVrml, 36 MultiDigest, 37 MultiMixed, 38 TextCss, 39 TextEnriched, 40 TextHtml, 41 TextJavascript, 42 TextPlain, 43 TextRichtext, 44 TextSgml, 45 TextTabSeparatedValues, 46 TextXml, 47 VideoMpeg, 48 VideoQuicktime, 49 NMEDIATYPES 50 }; 51 52 // HTTP methods 53 enum 54 { 55 HGet, 56 HPost 57 }; 58 59 // Charsets 60 enum 61 { 62 UnknownCharset, 63 US_Ascii, 64 ISO_8859_1, 65 UTF_8, 66 Unicode, 67 NCHARSETS 68 }; 69 70 // Frame Target IDs 71 enum { 72 FTtop, 73 FTself, 74 FTparent, 75 FTblank 76 }; 77 78 // LEX 79 typedef struct Token Token; 80 typedef struct Attr Attr; 81 82 #pragma incomplete Token 83 84 // BUILD 85 86 typedef struct Item Item; 87 typedef struct Itext Itext; 88 typedef struct Irule Irule; 89 typedef struct Iimage Iimage; 90 typedef struct Iformfield Iformfield; 91 typedef struct Itable Itable; 92 typedef struct Ifloat Ifloat; 93 typedef struct Ispacer Ispacer; 94 typedef struct Genattr Genattr; 95 typedef struct SEvent SEvent; 96 typedef struct Formfield Formfield; 97 typedef struct Option Option; 98 typedef struct Form Form; 99 typedef struct Table Table; 100 typedef struct Tablecol Tablecol; 101 typedef struct Tablerow Tablerow; 102 typedef struct Tablecell Tablecell; 103 typedef struct Align Align; 104 typedef struct Dimen Dimen; 105 typedef struct Anchor Anchor; 106 typedef struct DestAnchor DestAnchor; 107 typedef struct Map Map; 108 typedef struct Area Area; 109 typedef struct Background Background; 110 typedef struct Kidinfo Kidinfo; 111 typedef struct Docinfo Docinfo; 112 typedef struct Stack Stack; 113 typedef struct Pstate Pstate; 114 typedef struct ItemSource ItemSource; 115 typedef struct Lay Lay; // defined in Layout module 116 117 #pragma incomplete Lay 118 119 120 // Alignment types 121 enum { 122 ALnone = 0, ALleft, ALcenter, ALright, ALjustify, 123 ALchar, ALtop, ALmiddle, ALbottom, ALbaseline 124 }; 125 126 struct Align 127 { 128 uchar halign; // one of ALnone, ALleft, etc. 129 uchar valign; // one of ALnone, ALtop, etc. 130 }; 131 132 // A Dimen holds a dimension specification, especially for those 133 // cases when a number can be followed by a % or a * to indicate 134 // percentage of total or relative weight. 135 // Dnone means no dimension was specified 136 137 // To fit in a word, use top bits to identify kind, rest for value 138 enum { 139 Dnone = 0, 140 Dpixels = (1<<29), 141 Dpercent = (2<<29), 142 Drelative = (3<<29), 143 Dkindmask = (3<<29), 144 Dspecmask = (~Dkindmask) 145 }; 146 147 struct Dimen 148 { 149 int kindspec; // kind | spec 150 }; 151 152 // Background is either an image or a color. 153 // If both are set, the image has precedence. 154 struct Background 155 { 156 Rune* image; // url 157 int color; 158 }; 159 160 161 // There are about a half dozen Item variants. 162 // The all look like this at the start (using Plan 9 C's 163 // anonymous structure member mechanism), 164 // and then the tag field dictates what extra fields there are. 165 struct Item 166 { 167 Item* next; // successor in list of items 168 int width; // width in pixels (0 for floating items) 169 int height; // height in pixels 170 int ascent; // ascent (from top to baseline) in pixels 171 int anchorid; // if nonzero, which anchor we're in 172 int state; // flags and values (see below) 173 Genattr* genattr; // generic attributes and events 174 int tag; // variant discriminator: Itexttag, etc. 175 }; 176 177 // Item variant tags 178 enum { 179 Itexttag, 180 Iruletag, 181 Iimagetag, 182 Iformfieldtag, 183 Itabletag, 184 Ifloattag, 185 Ispacertag 186 }; 187 188 struct Itext 189 { 190 Item; // (with tag ==Itexttag) 191 Rune* s; // the characters 192 int fnt; // style*NumSize+size (see font stuff, below) 193 int fg; // Pixel (color) for text 194 uchar voff; // Voffbias+vertical offset from baseline, in pixels (+ve == down) 195 uchar ul; // ULnone, ULunder, or ULmid 196 }; 197 198 struct Irule 199 { 200 Item; // (with tag ==Iruletag) 201 uchar align; // alignment spec 202 uchar noshade; // if true, don't shade 203 int size; // size attr (rule height) 204 Dimen wspec; // width spec 205 }; 206 207 208 struct Iimage 209 { 210 Item; // (with tag ==Iimagetag) 211 Rune* imsrc; // image src url 212 int imwidth; // spec width (actual, if no spec) 213 int imheight; // spec height (actual, if no spec) 214 Rune* altrep; // alternate representation, in absence of image 215 Map* map; // if non-nil, client side map 216 int ctlid; // if animated 217 uchar align; // vertical alignment 218 uchar hspace; // in pixels; buffer space on each side 219 uchar vspace; // in pixels; buffer space on top and bottom 220 uchar border; // in pixels: border width to draw around image 221 Iimage* nextimage; // next in list of document's images 222 void* aux; 223 }; 224 225 226 struct Iformfield 227 { 228 Item; // (with tag ==Iformfieldtag) 229 Formfield* formfield; 230 void* aux; 231 }; 232 233 234 struct Itable 235 { 236 Item; // (with tag ==Itabletag) 237 Table* table; 238 }; 239 240 241 struct Ifloat 242 { 243 Item; // (with tag ==Ifloattag) 244 Item* item; // table or image item that floats 245 int x; // x coord of top (from right, if ALright) 246 int y; // y coord of top 247 uchar side; // margin it floats to: ALleft or ALright 248 uchar infloats; // true if this has been added to a lay.floats 249 Ifloat* nextfloat; // in list of floats 250 }; 251 252 253 struct Ispacer 254 { 255 Item; // (with tag ==Ispacertag) 256 int spkind; // ISPnull, etc. 257 }; 258 259 // Item state flags and value fields 260 enum { 261 IFbrk = 0x80000000, // forced break before this item 262 IFbrksp = 0x40000000, // add 1 line space to break (IFbrk set too) 263 IFnobrk = 0x20000000, // break not allowed before this item 264 IFcleft = 0x10000000, // clear left floats (IFbrk set too) 265 IFcright = 0x08000000, // clear right floats (IFbrk set too) 266 IFwrap = 0x04000000, // in a wrapping (non-pre) line 267 IFhang = 0x02000000, // in a hanging (into left indent) item 268 IFrjust = 0x01000000, // right justify current line 269 IFcjust = 0x00800000, // center justify current line 270 IFsmap = 0x00400000, // image is server-side map 271 IFindentshift = 8, 272 IFindentmask = (255<<IFindentshift), // current indent, in tab stops 273 IFhangmask = 255 // current hang into left indent, in 1/10th tabstops 274 }; 275 276 // Bias added to Itext's voff field 277 enum { Voffbias = 128 }; 278 279 // Spacer kinds 280 enum { 281 ISPnull, // 0 height and width 282 ISPvline, // height and ascent of current font 283 ISPhspace, // width of space in current font 284 ISPgeneral // other purposes (e.g., between markers and list) 285 }; 286 287 // Generic attributes and events (not many elements will have any of these set) 288 struct Genattr 289 { 290 Rune* id; 291 Rune* class; 292 Rune* style; 293 Rune* title; 294 SEvent* events; 295 }; 296 297 struct SEvent 298 { 299 SEvent* next; // in list of events 300 int type; // SEonblur, etc. 301 Rune* script; 302 }; 303 304 enum { 305 SEonblur, SEonchange, SEonclick, SEondblclick, 306 SEonfocus, SEonkeypress, SEonkeyup, SEonload, 307 SEonmousedown, SEonmousemove, SEonmouseout, 308 SEonmouseover, SEonmouseup, SEonreset, SEonselect, 309 SEonsubmit, SEonunload, 310 Numscriptev 311 }; 312 313 // Form field types 314 enum { 315 Ftext, 316 Fpassword, 317 Fcheckbox, 318 Fradio, 319 Fsubmit, 320 Fhidden, 321 Fimage, 322 Freset, 323 Ffile, 324 Fbutton, 325 Fselect, 326 Ftextarea 327 }; 328 329 // Information about a field in a form 330 struct Formfield 331 { 332 Formfield* next; // in list of fields for a form 333 int ftype; // Ftext, Fpassword, etc. 334 int fieldid; // serial no. of field within its form 335 Form* form; // containing form 336 Rune* name; // name attr 337 Rune* value; // value attr 338 int size; // size attr 339 int maxlength; // maxlength attr 340 int rows; // rows attr 341 int cols; // cols attr 342 uchar flags; // FFchecked, etc. 343 Option* options; // for Fselect fields 344 Item* image; // image item, for Fimage fields 345 int ctlid; // identifies control for this field in layout 346 SEvent* events; // same as genattr->events of containing item 347 }; 348 349 enum { 350 FFchecked = (1<<7), 351 FFmultiple = (1<<6) 352 }; 353 354 // Option holds info about an option in a "select" form field 355 struct Option 356 { 357 Option* next; // next in list of options for a field 358 int selected; // true if selected initially 359 Rune* value; // value attr 360 Rune* display; // display string 361 }; 362 363 // Form holds info about a form 364 struct Form 365 { 366 Form* next; // in list of forms for document 367 int formid; // serial no. of form within its doc 368 Rune* name; // name or id attr (netscape uses name, HTML 4.0 uses id) 369 Rune* action; // action attr 370 int target; // target attr as targetid 371 int method; // HGet or HPost 372 int nfields; // number of fields 373 Formfield* fields; // field's forms, in input order 374 }; 375 376 // Flags used in various table structures 377 enum { 378 TFparsing = (1<<7), 379 TFnowrap = (1<<6), 380 TFisth = (1<<5) 381 }; 382 383 384 // Information about a table 385 struct Table 386 { 387 Table* next; // next in list of document's tables 388 int tableid; // serial no. of table within its doc 389 Tablerow* rows; // array of row specs (list during parsing) 390 int nrow; // total number of rows 391 Tablecol* cols; // array of column specs 392 int ncol; // total number of columns 393 Tablecell* cells; // list of unique cells 394 int ncell; // total number of cells 395 Tablecell*** grid; // 2-D array of cells 396 Align align; // alignment spec for whole table 397 Dimen width; // width spec for whole table 398 int border; // border attr 399 int cellspacing; // cellspacing attr 400 int cellpadding; // cellpadding attr 401 Background background; // table background 402 Item* caption; // linked list of Items, giving caption 403 uchar caption_place; // ALtop or ALbottom 404 Lay* caption_lay; // layout of caption 405 int totw; // total width 406 int toth; // total height 407 int caph; // caption height 408 int availw; // used for previous 3 sizes 409 Token* tabletok; // token that started the table 410 uchar flags; // Lchanged, perhaps 411 }; 412 413 414 struct Tablecol 415 { 416 int width; 417 Align align; 418 Point pos; 419 }; 420 421 422 struct Tablerow 423 { 424 Tablerow* next; // Next in list of rows, during parsing 425 Tablecell* cells; // Cells in row, linked through nextinrow 426 int height; 427 int ascent; 428 Align align; 429 Background background; 430 Point pos; 431 uchar flags; // 0 or TFparsing 432 }; 433 434 435 // A Tablecell is one cell of a table. 436 // It may span multiple rows and multiple columns. 437 // Cells are linked on two lists: the list for all the cells of 438 // a document (the next pointers), and the list of all the 439 // cells that start in a given row (the nextinrow pointers) 440 struct Tablecell 441 { 442 Tablecell* next; // next in list of table's cells 443 Tablecell* nextinrow; // next in list of row's cells 444 int cellid; // serial no. of cell within table 445 Item* content; // contents before layout 446 Lay* lay; // layout of cell 447 int rowspan; // number of rows spanned by this cell 448 int colspan; // number of cols spanned by this cell 449 Align align; // alignment spec 450 uchar flags; // TFparsing, TFnowrap, TFisth 451 Dimen wspec; // suggested width 452 int hspec; // suggested height 453 Background background; // cell background 454 int minw; // minimum possible width 455 int maxw; // maximum width 456 int ascent; // cell's ascent 457 int row; // row of upper left corner 458 int col; // col of upper left corner 459 Point pos; // nw corner of cell contents, in cell 460 }; 461 462 // Anchor is for info about hyperlinks that go somewhere 463 struct Anchor 464 { 465 Anchor* next; // next in list of document's anchors 466 int index; // serial no. of anchor within its doc 467 Rune* name; // name attr 468 Rune* href; // href attr 469 int target; // target attr as targetid 470 }; 471 472 473 // DestAnchor is for info about hyperlinks that are destinations 474 struct DestAnchor 475 { 476 DestAnchor* next; // next in list of document's destanchors 477 int index; // serial no. of anchor within its doc 478 Rune* name; // name attr 479 Item* item; // the destination 480 }; 481 482 483 // Maps (client side) 484 struct Map 485 { 486 Map* next; // next in list of document's maps 487 Rune* name; // map name 488 Area* areas; // list of map areas 489 }; 490 491 492 struct Area 493 { 494 Area* next; // next in list of a map's areas 495 int shape; // SHrect, etc. 496 Rune* href; // associated hypertext link 497 int target; // associated target frame 498 Dimen* coords; // array of coords for shape 499 int ncoords; // size of coords array 500 }; 501 502 // Area shapes 503 enum { 504 SHrect, SHcircle, SHpoly 505 }; 506 507 // Fonts are represented by integers: style*NumSize + size 508 509 // Font styles 510 enum { 511 FntR, // roman 512 FntI, // italic 513 FntB, // bold 514 FntT, // typewriter 515 NumStyle 516 }; 517 518 // Font sizes 519 enum { 520 Tiny, 521 Small, 522 Normal, 523 Large, 524 Verylarge, 525 NumSize 526 }; 527 528 enum { 529 NumFnt = (NumStyle*NumSize), 530 DefFnt = (FntR*NumSize+Normal) 531 }; 532 533 // Lines are needed through some text items, for underlining or strikethrough 534 enum { 535 ULnone, ULunder, ULmid 536 }; 537 538 // Kidinfo flags 539 enum { 540 FRnoresize = (1<<0), 541 FRnoscroll = (1<<1), 542 FRhscroll = (1<<2), 543 FRvscroll = (1<<3), 544 FRhscrollauto = (1<<4), 545 FRvscrollauto = (1<<5) 546 }; 547 548 // Information about child frame or frameset 549 struct Kidinfo 550 { 551 Kidinfo* next; // in list of kidinfos for a frameset 552 int isframeset; 553 554 // fields for "frame" 555 Rune* src; // only nil if a "dummy" frame or this is frameset 556 Rune* name; // always non-empty if this isn't frameset 557 int marginw; 558 int marginh; 559 int framebd; 560 int flags; 561 562 // fields for "frameset" 563 Dimen* rows; // array of row dimensions 564 int nrows; // length of rows 565 Dimen* cols; // array of col dimensions 566 int ncols; // length of cols 567 Kidinfo* kidinfos; 568 Kidinfo* nextframeset; // parsing stack 569 }; 570 571 572 // Document info (global information about HTML page) 573 struct Docinfo 574 { 575 // stuff from HTTP headers, doc head, and body tag 576 Rune* src; // original source of doc 577 Rune* base; // base URL of doc 578 Rune* doctitle; // from <title> element 579 Background background; // background specification 580 Iimage* backgrounditem; // Image Item for doc background image, or nil 581 int text; // doc foreground (text) color 582 int link; // unvisited hyperlink color 583 int vlink; // visited hyperlink color 584 int alink; // highlighting hyperlink color 585 int target; // target frame default 586 int chset; // ISO_8859, etc. 587 int mediatype; // TextHtml, etc. 588 int scripttype; // TextJavascript, etc. 589 int hasscripts; // true if scripts used 590 Rune* refresh; // content of <http-equiv=Refresh ...> 591 Kidinfo* kidinfo; // if a frameset 592 int frameid; // id of document frame 593 594 // info needed to respond to user actions 595 Anchor* anchors; // list of href anchors 596 DestAnchor* dests; // list of destination anchors 597 Form* forms; // list of forms 598 Table* tables; // list of tables 599 Map* maps; // list of maps 600 Iimage* images; // list of image items (through nextimage links) 601 }; 602 603 extern int dimenkind(Dimen d); 604 extern int dimenspec(Dimen d); 605 extern void freedocinfo(Docinfo* d); 606 extern void freeitems(Item* ithead); 607 extern Item* parsehtml(uchar* data, int datalen, Rune* src, int mtype, int chset, Docinfo** pdi); 608 extern void printitems(Item* items, char* msg); 609 extern int targetid(Rune* s); 610 extern Rune* targetname(int targid); 611 extern int validitems(Item* i); 612 613 #pragma varargck type "I" Item* 614 615 // Control print output 616 extern int warn; 617 extern int dbglex; 618 extern int dbgbuild; 619 620 // To be provided by caller 621 // emalloc and erealloc should not return if can't get memory. 622 // emalloc should zero its memory. 623 extern void* emalloc(ulong); 624 extern void* erealloc(void* p, ulong size); 625