1 #pragma lib "libhtml.a" 2 #pragma src "/sys/src/libhtml" 3 4 /* UTILS */ 5 extern uchar* fromStr(Rune* buf, int n, int chset); 6 extern Rune* toStr(uchar* buf, int n, int chset); 7 8 /* Common LEX and BUILD enums */ 9 10 /* Media types */ 11 enum 12 { 13 ApplMsword, 14 ApplOctets, 15 ApplPdf, 16 ApplPostscript, 17 ApplRtf, 18 ApplFramemaker, 19 ApplMsexcel, 20 ApplMspowerpoint, 21 UnknownType, 22 Audio32kadpcm, 23 AudioBasic, 24 ImageCgm, 25 ImageG3fax, 26 ImageGif, 27 ImageIef, 28 ImageJpeg, 29 ImagePng, 30 ImageTiff, 31 ImageXBit, 32 ImageXBit2, 33 ImageXBitmulti, 34 ImageXXBitmap, 35 ModelVrml, 36 MultiDigest, 37 MultiMixed, 38 TextCss, 39 TextEnriched, 40 TextHtml, 41 TextJavascript, 42 TextPlain, 43 TextRichtext, 44 TextSgml, 45 TextTabSeparatedValues, 46 TextXml, 47 VideoMpeg, 48 VideoQuicktime, 49 NMEDIATYPES 50 }; 51 52 /* HTTP methods */ 53 enum 54 { 55 HGet, 56 HPost 57 }; 58 59 /* Charsets */ 60 enum 61 { 62 UnknownCharset, 63 US_Ascii, 64 ISO_8859_1, 65 UTF_8, 66 Unicode, 67 NCHARSETS 68 }; 69 70 /* Frame Target IDs */ 71 enum { 72 FTtop, 73 FTself, 74 FTparent, 75 FTblank 76 }; 77 78 /* LEX */ 79 typedef struct Token Token; 80 typedef struct Attr Attr; 81 82 #pragma incomplete Token 83 84 /* BUILD */ 85 86 typedef struct Item Item; 87 typedef struct Itext Itext; 88 typedef struct Irule Irule; 89 typedef struct Iimage Iimage; 90 typedef struct Iformfield Iformfield; 91 typedef struct Itable Itable; 92 typedef struct Ifloat Ifloat; 93 typedef struct Ispacer Ispacer; 94 typedef struct Genattr Genattr; 95 typedef struct SEvent SEvent; 96 typedef struct Formfield Formfield; 97 typedef struct Option Option; 98 typedef struct Form Form; 99 typedef struct Table Table; 100 typedef struct Tablecol Tablecol; 101 typedef struct Tablerow Tablerow; 102 typedef struct Tablecell Tablecell; 103 typedef struct Align Align; 104 typedef struct Dimen Dimen; 105 typedef struct Anchor Anchor; 106 typedef struct DestAnchor DestAnchor; 107 typedef struct Map Map; 108 typedef struct Area Area; 109 typedef struct Background Background; 110 typedef struct Kidinfo Kidinfo; 111 typedef struct Docinfo Docinfo; 112 typedef struct Stack Stack; 113 typedef struct Pstate Pstate; 114 typedef struct ItemSource ItemSource; 115 typedef struct Lay Lay; /* defined in Layout module */ 116 117 #pragma incomplete Lay 118 119 120 /* Alignment types */ 121 enum { 122 ALnone = 0, ALleft, ALcenter, ALright, ALjustify, 123 ALchar, ALtop, ALmiddle, ALbottom, ALbaseline, 124 }; 125 126 struct Align 127 { 128 uchar halign; /* one of ALnone, ALleft, etc. */ 129 uchar valign; /* one of ALnone, ALtop, etc. */ 130 }; 131 132 /* 133 * A Dimen holds a dimension specification, especially for those 134 * cases when a number can be followed by a % or a * to indicate 135 * percentage of total or relative weight. 136 * Dnone means no dimension was specified 137 */ 138 139 /* To fit in a word, use top bits to identify kind, rest for value */ 140 enum { 141 Dnone = 0, 142 Dpixels = (1<<29), 143 Dpercent = (2<<29), 144 Drelative = (3<<29), 145 Dkindmask = (3<<29), 146 Dspecmask = (~Dkindmask) 147 }; 148 149 struct Dimen 150 { 151 int kindspec; /* kind | spec */ 152 }; 153 154 /* 155 * Background is either an image or a color. 156 * If both are set, the image has precedence. 157 */ 158 struct Background 159 { 160 Rune* image; /* url */ 161 int color; 162 }; 163 164 165 /* 166 * There are about a half dozen Item variants. 167 * The all look like this at the start (using Plan 9 C's 168 * anonymous structure member mechanism), 169 * and then the tag field dictates what extra fields there are. 170 */ 171 struct Item 172 { 173 Item* next; /* successor in list of items */ 174 int width; /* width in pixels (0 for floating items) */ 175 int height; /* height in pixels */ 176 int ascent; /* ascent (from top to baseline) in pixels */ 177 int anchorid; /* if nonzero, which anchor we're in */ 178 int state; /* flags and values (see below) */ 179 Genattr*genattr; /* generic attributes and events */ 180 int tag; /* variant discriminator: Itexttag, etc. */ 181 }; 182 183 /* Item variant tags */ 184 enum { 185 Itexttag, 186 Iruletag, 187 Iimagetag, 188 Iformfieldtag, 189 Itabletag, 190 Ifloattag, 191 Ispacertag 192 }; 193 194 struct Itext 195 { 196 Item; /* (with tag ==Itexttag) */ 197 Rune* s; /* the characters */ 198 int fnt; /* style*NumSize+size (see font stuff, below) */ 199 int fg; /* Pixel (color) for text */ 200 uchar voff; /* Voffbias+vertical offset from baseline, in pixels (+ve == down) */ 201 uchar ul; /* ULnone, ULunder, or ULmid */ 202 }; 203 204 struct Irule 205 { 206 Item; /* (with tag ==Iruletag) */ 207 uchar align; /* alignment spec */ 208 uchar noshade; /* if true, don't shade */ 209 int size; /* size attr (rule height) */ 210 Dimen wspec; /* width spec */ 211 }; 212 213 214 struct Iimage 215 { 216 Item; /* (with tag ==Iimagetag) */ 217 Rune* imsrc; /* image src url */ 218 int imwidth; /* spec width (actual, if no spec) */ 219 int imheight; /* spec height (actual, if no spec) */ 220 Rune* altrep; /* alternate representation, in absence of image */ 221 Map* map; /* if non-nil, client side map */ 222 int ctlid; /* if animated */ 223 uchar align; /* vertical alignment */ 224 uchar hspace; /* in pixels; buffer space on each side */ 225 uchar vspace; /* in pixels; buffer space on top and bottom */ 226 uchar border; /* in pixels: border width to draw around image */ 227 Iimage* nextimage; /* next in list of document's images */ 228 void* aux; 229 }; 230 231 232 struct Iformfield 233 { 234 Item; /* (with tag ==Iformfieldtag) */ 235 Formfield*formfield; 236 void* aux; 237 }; 238 239 240 struct Itable 241 { 242 Item; /* (with tag ==Itabletag) */ 243 Table* table; 244 }; 245 246 247 struct Ifloat 248 { 249 Item; /* (with tag ==Ifloattag) */ 250 Item* item; /* table or image item that floats */ 251 int x; /* x coord of top (from right, if ALright) */ 252 int y; /* y coord of top */ 253 uchar side; /* margin it floats to: ALleft or ALright */ 254 uchar infloats; /* true if this has been added to a lay.floats */ 255 Ifloat* nextfloat; /* in list of floats */ 256 }; 257 258 259 struct Ispacer 260 { 261 Item; /* (with tag ==Ispacertag) */ 262 int spkind; /* ISPnull, etc. */ 263 }; 264 265 /* Item state flags and value fields */ 266 enum { 267 IFbrk = 0x80000000, /* forced break before this item */ 268 IFbrksp = 0x40000000, /* add 1 line space to break (IFbrk set too) */ 269 IFnobrk = 0x20000000, /* break not allowed before this item */ 270 IFcleft = 0x10000000, /* clear left floats (IFbrk set too) */ 271 IFcright= 0x08000000, /* clear right floats (IFbrk set too) */ 272 IFwrap = 0x04000000, /* in a wrapping (non-pre) line */ 273 IFhang = 0x02000000, /* in a hanging (into left indent) item */ 274 IFrjust = 0x01000000, /* right justify current line */ 275 IFcjust = 0x00800000, /* center justify current line */ 276 IFsmap = 0x00400000, /* image is server-side map */ 277 IFindentshift = 8, 278 IFindentmask = (255<<IFindentshift), /* current indent, in tab stops */ 279 IFhangmask = 255 /* current hang into left indent, in 1/10th tabstops */ 280 }; 281 282 /* Bias added to Itext's voff field */ 283 enum { Voffbias = 128 }; 284 285 /* Spacer kinds */ 286 enum { 287 ISPnull, /* 0 height and width */ 288 ISPvline, /* height and ascent of current font */ 289 ISPhspace, /* width of space in current font */ 290 ISPgeneral /* other purposes (e.g., between markers and list) */ 291 }; 292 293 /* Generic attributes and events (not many elements will have any of these set) */ 294 struct Genattr 295 { 296 Rune* id; 297 Rune* class; 298 Rune* style; 299 Rune* title; 300 SEvent* events; 301 }; 302 303 struct SEvent 304 { 305 SEvent* next; /* in list of events */ 306 int type; /* SEonblur, etc. */ 307 Rune* script; 308 }; 309 310 enum { 311 SEonblur, SEonchange, SEonclick, SEondblclick, 312 SEonfocus, SEonkeypress, SEonkeyup, SEonload, 313 SEonmousedown, SEonmousemove, SEonmouseout, 314 SEonmouseover, SEonmouseup, SEonreset, SEonselect, 315 SEonsubmit, SEonunload, 316 Numscriptev 317 }; 318 319 /* Form field types */ 320 enum { 321 Ftext, 322 Fpassword, 323 Fcheckbox, 324 Fradio, 325 Fsubmit, 326 Fhidden, 327 Fimage, 328 Freset, 329 Ffile, 330 Fbutton, 331 Fselect, 332 Ftextarea 333 }; 334 335 /* Information about a field in a form */ 336 struct Formfield 337 { 338 Formfield*next; /* in list of fields for a form */ 339 int ftype; /* Ftext, Fpassword, etc. */ 340 int fieldid; /* serial no. of field within its form */ 341 Form* form; /* containing form */ 342 Rune* name; /* name attr */ 343 Rune* value; /* value attr */ 344 int size; /* size attr */ 345 int maxlength; /* maxlength attr */ 346 int rows; /* rows attr */ 347 int cols; /* cols attr */ 348 uchar flags; /* FFchecked, etc. */ 349 Option* options; /* for Fselect fields */ 350 Item* image; /* image item, for Fimage fields */ 351 int ctlid; /* identifies control for this field in layout */ 352 SEvent* events; /* same as genattr->events of containing item */ 353 }; 354 355 enum { 356 FFchecked = (1<<7), 357 FFmultiple = (1<<6) 358 }; 359 360 /* Option holds info about an option in a "select" form field */ 361 struct Option 362 { 363 Option* next; /* next in list of options for a field */ 364 int selected; /* true if selected initially */ 365 Rune* value; /* value attr */ 366 Rune* display; /* display string */ 367 }; 368 369 /* Form holds info about a form */ 370 struct Form 371 { 372 Form* next; /* in list of forms for document */ 373 int formid; /* serial no. of form within its doc */ 374 Rune* name; /* name or id attr (netscape uses name, HTML 4.0 uses id) */ 375 Rune* action; /* action attr */ 376 int target; /* target attr as targetid */ 377 int method; /* HGet or HPost */ 378 int nfields; /* number of fields */ 379 Formfield*fields; /* field's forms, in input order */ 380 }; 381 382 /* Flags used in various table structures */ 383 enum { 384 TFparsing = (1<<7), 385 TFnowrap = (1<<6), 386 TFisth = (1<<5) 387 }; 388 389 390 /* Information about a table */ 391 struct Table 392 { 393 Table* next; /* next in list of document's tables */ 394 int tableid; /* serial no. of table within its doc */ 395 Tablerow*rows; /* array of row specs (list during parsing) */ 396 int nrow; /* total number of rows */ 397 Tablecol*cols; /* array of column specs */ 398 int ncol; /* total number of columns */ 399 Tablecell*cells; /* list of unique cells */ 400 int ncell; /* total number of cells */ 401 Tablecell***grid; /* 2-D array of cells */ 402 Align align; /* alignment spec for whole table */ 403 Dimen width; /* width spec for whole table */ 404 int border; /* border attr */ 405 int cellspacing; /* cellspacing attr */ 406 int cellpadding; /* cellpadding attr */ 407 Background background; /* table background */ 408 Item* caption; /* linked list of Items, giving caption */ 409 uchar caption_place; /* ALtop or ALbottom */ 410 Lay* caption_lay; /* layout of caption */ 411 int totw; /* total width */ 412 int toth; /* total height */ 413 int caph; /* caption height */ 414 int availw; /* used for previous 3 sizes */ 415 Token* tabletok; /* token that started the table */ 416 uchar flags; /* Lchanged, perhaps */ 417 }; 418 419 420 struct Tablecol 421 { 422 int width; 423 Align align; 424 Point pos; 425 }; 426 427 428 struct Tablerow 429 { 430 Tablerow*next; /* Next in list of rows, during parsing */ 431 Tablecell*cells; /* Cells in row, linked through nextinrow */ 432 int height; 433 int ascent; 434 Align align; 435 Background background; 436 Point pos; 437 uchar flags; /* 0 or TFparsing */ 438 }; 439 440 /* 441 * A Tablecell is one cell of a table. 442 * It may span multiple rows and multiple columns. 443 * Cells are linked on two lists: the list for all the cells of 444 * a document (the next pointers), and the list of all the 445 * cells that start in a given row (the nextinrow pointers) 446 */ 447 struct Tablecell 448 { 449 Tablecell*next; /* next in list of table's cells */ 450 Tablecell*nextinrow; /* next in list of row's cells */ 451 int cellid; /* serial no. of cell within table */ 452 Item* content; /* contents before layout */ 453 Lay* lay; /* layout of cell */ 454 int rowspan; /* number of rows spanned by this cell */ 455 int colspan; /* number of cols spanned by this cell */ 456 Align align; /* alignment spec */ 457 uchar flags; /* TFparsing, TFnowrap, TFisth */ 458 Dimen wspec; /* suggested width */ 459 int hspec; /* suggested height */ 460 Background background; /* cell background */ 461 int minw; /* minimum possible width */ 462 int maxw; /* maximum width */ 463 int ascent; /* cell's ascent */ 464 int row; /* row of upper left corner */ 465 int col; /* col of upper left corner */ 466 Point pos; /* nw corner of cell contents, in cell */ 467 }; 468 469 /* Anchor is for info about hyperlinks that go somewhere */ 470 struct Anchor 471 { 472 Anchor* next; /* next in list of document's anchors */ 473 int index; /* serial no. of anchor within its doc */ 474 Rune* name; /* name attr */ 475 Rune* href; /* href attr */ 476 int target; /* target attr as targetid */ 477 }; 478 479 480 /* DestAnchor is for info about hyperlinks that are destinations */ 481 struct DestAnchor 482 { 483 DestAnchor*next; /* next in list of document's destanchors */ 484 int index; /* serial no. of anchor within its doc */ 485 Rune* name; /* name attr */ 486 Item* item; /* the destination */ 487 }; 488 489 490 /* Maps (client side) */ 491 struct Map 492 { 493 Map* next; /* next in list of document's maps */ 494 Rune* name; /* map name */ 495 Area* areas; /* list of map areas */ 496 }; 497 498 499 struct Area 500 { 501 Area* next; /* next in list of a map's areas */ 502 int shape; /* SHrect, etc. */ 503 Rune* href; /* associated hypertext link */ 504 int target; /* associated target frame */ 505 Dimen* coords; /* array of coords for shape */ 506 int ncoords; /* size of coords array */ 507 }; 508 509 /* Area shapes */ 510 enum { 511 SHrect, SHcircle, SHpoly 512 }; 513 514 /* Fonts are represented by integers: style*NumSize + size */ 515 516 /* Font styles */ 517 enum { 518 FntR, /* roman */ 519 FntI, /* italic */ 520 FntB, /* bold */ 521 FntT, /* typewriter */ 522 NumStyle 523 }; 524 525 /* Font sizes */ 526 enum { 527 Tiny, 528 Small, 529 Normal, 530 Large, 531 Verylarge, 532 NumSize 533 }; 534 535 enum { 536 NumFnt = NumStyle*NumSize, 537 DefFnt = FntR*NumSize+Normal, 538 }; 539 540 /* Lines are needed through some text items, for underlining or strikethrough */ 541 enum { 542 ULnone, ULunder, ULmid 543 }; 544 545 /* Kidinfo flags */ 546 enum { 547 FRnoresize = (1<<0), 548 FRnoscroll = (1<<1), 549 FRhscroll = (1<<2), 550 FRvscroll = (1<<3), 551 FRhscrollauto = (1<<4), 552 FRvscrollauto = (1<<5) 553 }; 554 555 /* Information about child frame or frameset */ 556 struct Kidinfo 557 { 558 Kidinfo*next; /* in list of kidinfos for a frameset */ 559 int isframeset; 560 561 /* fields for "frame" */ 562 Rune* src; /* only nil if a "dummy" frame or this is frameset */ 563 Rune* name; /* always non-empty if this isn't frameset */ 564 int marginw; 565 int marginh; 566 int framebd; 567 int flags; 568 569 /* fields for "frameset" */ 570 Dimen* rows; /* array of row dimensions */ 571 int nrows; /* length of rows */ 572 Dimen* cols; /* array of col dimensions */ 573 int ncols; /* length of cols */ 574 Kidinfo*kidinfos; 575 Kidinfo*nextframeset; /* parsing stack */ 576 }; 577 578 579 /* Document info (global information about HTML page) */ 580 struct Docinfo 581 { 582 /* stuff from HTTP headers, doc head, and body tag */ 583 Rune* src; /* original source of doc */ 584 Rune* base; /* base URL of doc */ 585 Rune* doctitle; /* from <title> element */ 586 Background background; /* background specification */ 587 Iimage* backgrounditem; /* Image Item for doc background image, or nil */ 588 int text; /* doc foreground (text) color */ 589 int link; /* unvisited hyperlink color */ 590 int vlink; /* visited hyperlink color */ 591 int alink; /* highlighting hyperlink color */ 592 int target; /* target frame default */ 593 int chset; /* ISO_8859, etc. */ 594 int mediatype; /* TextHtml, etc. */ 595 int scripttype; /* TextJavascript, etc. */ 596 int hasscripts; /* true if scripts used */ 597 Rune* refresh; /* content of <http-equiv=Refresh ...> */ 598 Kidinfo*kidinfo; /* if a frameset */ 599 int frameid; /* id of document frame */ 600 601 /* info needed to respond to user actions */ 602 Anchor* anchors; /* list of href anchors */ 603 DestAnchor*dests; /* list of destination anchors */ 604 Form* forms; /* list of forms */ 605 Table* tables; /* list of tables */ 606 Map* maps; /* list of maps */ 607 Iimage* images; /* list of image items (through nextimage links) */ 608 }; 609 610 extern int dimenkind(Dimen d); 611 extern int dimenspec(Dimen d); 612 extern void freedocinfo(Docinfo* d); 613 extern void freeitems(Item* ithead); 614 extern Item* parsehtml(uchar* data, int datalen, Rune* src, int mtype, int chset, Docinfo** pdi); 615 extern void printitems(Item* items, char* msg); 616 extern int targetid(Rune* s); 617 extern Rune* targetname(int targid); 618 extern int validitems(Item* i); 619 620 #pragma varargck type "I" Item* 621 622 /* Control print output */ 623 extern int warn; 624 extern int dbglex; 625 extern int dbgbuild; 626 627 /* 628 * To be provided by caller 629 * emalloc and erealloc should not return if can't get memory. 630 * emalloc should zero its memory. 631 */ 632 extern void* emalloc(ulong); 633 extern void* erealloc(void* p, ulong size); 634