xref: /plan9/sys/include/html.h (revision 684b447ecdf07ea000d8deca0fbcf5ac24894c7f)
1 #pragma lib "libhtml.a"
2 #pragma src "/sys/src/libhtml"
3 
4 /* UTILS */
5 extern uchar*	fromStr(Rune* buf, int n, int chset);
6 extern Rune*	toStr(uchar* buf, int n, int chset);
7 
8 /* Common LEX and BUILD enums */
9 
10 /* Media types */
11 enum
12 {
13 	ApplMsword,
14 	ApplOctets,
15 	ApplPdf,
16 	ApplPostscript,
17 	ApplRtf,
18 	ApplFramemaker,
19 	ApplMsexcel,
20 	ApplMspowerpoint,
21 	UnknownType,
22 	Audio32kadpcm,
23 	AudioBasic,
24 	ImageCgm,
25 	ImageG3fax,
26 	ImageGif,
27 	ImageIef,
28 	ImageJpeg,
29 	ImagePng,
30 	ImageTiff,
31 	ImageXBit,
32 	ImageXBit2,
33 	ImageXBitmulti,
34 	ImageXXBitmap,
35 	ModelVrml,
36 	MultiDigest,
37 	MultiMixed,
38 	TextCss,
39 	TextEnriched,
40 	TextHtml,
41 	TextJavascript,
42 	TextPlain,
43 	TextRichtext,
44 	TextSgml,
45 	TextTabSeparatedValues,
46 	TextXml,
47 	VideoMpeg,
48 	VideoQuicktime,
49 	NMEDIATYPES
50 };
51 
52 /* HTTP methods */
53 enum
54 {
55 	HGet,
56 	HPost
57 };
58 
59 /* Charsets */
60 enum
61 {
62 	UnknownCharset,
63 	US_Ascii,
64 	ISO_8859_1,
65 	UTF_8,
66 	Unicode,
67 	NCHARSETS
68 };
69 
70 /* Frame Target IDs */
71 enum {
72 	FTtop,
73 	FTself,
74 	FTparent,
75 	FTblank
76 };
77 
78 /* LEX */
79 typedef struct Token Token;
80 typedef struct Attr Attr;
81 
82 #pragma incomplete Token
83 
84 /* BUILD */
85 
86 typedef struct Item Item;
87 typedef struct Itext Itext;
88 typedef struct Irule Irule;
89 typedef struct Iimage Iimage;
90 typedef struct Iformfield Iformfield;
91 typedef struct Itable Itable;
92 typedef struct Ifloat Ifloat;
93 typedef struct Ispacer Ispacer;
94 typedef struct Genattr Genattr;
95 typedef struct SEvent SEvent;
96 typedef struct Formfield Formfield;
97 typedef struct Option Option;
98 typedef struct Form Form;
99 typedef struct Table Table;
100 typedef struct Tablecol Tablecol;
101 typedef struct Tablerow Tablerow;
102 typedef struct Tablecell Tablecell;
103 typedef struct Align Align;
104 typedef struct Dimen Dimen;
105 typedef struct Anchor Anchor;
106 typedef struct DestAnchor DestAnchor;
107 typedef struct Map Map;
108 typedef struct Area Area;
109 typedef struct Background Background;
110 typedef struct Kidinfo Kidinfo;
111 typedef struct Docinfo Docinfo;
112 typedef struct Stack Stack;
113 typedef struct Pstate Pstate;
114 typedef struct ItemSource ItemSource;
115 typedef struct Lay Lay;		/* defined in Layout module */
116 
117 #pragma incomplete Lay
118 
119 
120 /* Alignment types */
121 enum {
122 	ALnone = 0, ALleft, ALcenter, ALright, ALjustify,
123 	ALchar, ALtop, ALmiddle, ALbottom, ALbaseline,
124 };
125 
126 struct Align
127 {
128 	uchar	halign;		/* one of ALnone, ALleft, etc. */
129 	uchar	valign;		/* one of ALnone, ALtop, etc. */
130 };
131 
132 /*
133  * A Dimen holds a dimension specification, especially for those
134  * cases when a number can be followed by a % or a * to indicate
135  * percentage of total or relative weight.
136  * Dnone means no dimension was specified
137  */
138 
139 /* To fit in a word, use top bits to identify kind, rest for value */
140 enum {
141 	Dnone =		0,
142 	Dpixels =	(1<<29),
143 	Dpercent =	(2<<29),
144 	Drelative =	(3<<29),
145 	Dkindmask =	(3<<29),
146 	Dspecmask =	(~Dkindmask)
147 };
148 
149 struct Dimen
150 {
151 	int	kindspec;	/* kind | spec */
152 };
153 
154 /*
155  * Background is either an image or a color.
156  * If both are set, the image has precedence.
157  */
158 struct Background
159 {
160 	Rune*	image;		/* url */
161 	int	color;
162 };
163 
164 
165 /*
166  * There are about a half dozen Item variants.
167  * The all look like this at the start (using Plan 9 C's
168  * anonymous structure member mechanism),
169  * and then the tag field dictates what extra fields there are.
170  */
171 struct Item
172 {
173 	Item*	next;		/* successor in list of items */
174 	int	width;		/* width in pixels (0 for floating items) */
175 	int	height;		/* height in pixels */
176 	int	ascent;		/* ascent (from top to baseline) in pixels */
177 	int	anchorid;	/* if nonzero, which anchor we're in */
178 	int	state;		/* flags and values (see below) */
179 	Genattr*genattr;	/* generic attributes and events */
180 	int	tag;		/* variant discriminator: Itexttag, etc. */
181 };
182 
183 /* Item variant tags */
184 enum {
185 	Itexttag,
186 	Iruletag,
187 	Iimagetag,
188 	Iformfieldtag,
189 	Itabletag,
190 	Ifloattag,
191 	Ispacertag
192 };
193 
194 struct Itext
195 {
196 	Item;			/* (with tag ==Itexttag) */
197 	Rune*	s;		/* the characters */
198 	int	fnt;		/* style*NumSize+size (see font stuff, below) */
199 	int	fg;		/* Pixel (color) for text */
200 	uchar	voff; /* Voffbias+vertical offset from baseline, in pixels (+ve == down) */
201 	uchar	ul;		/* ULnone, ULunder, or ULmid */
202 };
203 
204 struct Irule
205 {
206 	Item;			/* (with tag ==Iruletag) */
207 	uchar	align;		/* alignment spec */
208 	uchar	noshade;	/* if true, don't shade */
209 	int	size;		/* size attr (rule height) */
210 	int	color;		/* color attr */
211 	Dimen	wspec;		/* width spec */
212 };
213 
214 
215 struct Iimage
216 {
217 	Item;			/* (with tag ==Iimagetag) */
218 	Rune*	imsrc;		/* image src url */
219 	int	imwidth;	/* spec width (actual, if no spec) */
220 	int	imheight;	/* spec height (actual, if no spec) */
221 	Rune*	altrep;		/* alternate representation, in absence of image */
222 	Map*	map;		/* if non-nil, client side map */
223 	int	ctlid;		/* if animated */
224 	uchar	align;		/* vertical alignment */
225 	uchar	hspace;		/* in pixels; buffer space on each side */
226 	uchar	vspace;		/* in pixels; buffer space on top and bottom */
227 	uchar	border;		/* in pixels: border width to draw around image */
228 	Iimage*	nextimage;	/* next in list of document's images */
229 	void*	aux;
230 };
231 
232 
233 struct Iformfield
234 {
235 	Item;			/* (with tag ==Iformfieldtag) */
236 	Formfield*formfield;
237 	void*	aux;
238 };
239 
240 
241 struct Itable
242 {
243 	Item;			/* (with tag ==Itabletag) */
244 	Table*	table;
245 };
246 
247 
248 struct Ifloat
249 {
250 	Item;			/* (with tag ==Ifloattag) */
251 	Item*	item;		/* table or image item that floats */
252 	int	x;		/* x coord of top (from right, if ALright) */
253 	int	y;		/* y coord of top */
254 	uchar	side;		/* margin it floats to: ALleft or ALright */
255 	uchar	infloats;	/* true if this has been added to a lay.floats */
256 	Ifloat*	nextfloat;	/* in list of floats */
257 };
258 
259 
260 struct Ispacer
261 {
262 	Item;			/* (with tag ==Ispacertag) */
263 	int	spkind;		/* ISPnull, etc. */
264 };
265 
266 /* Item state flags and value fields */
267 enum {
268 	IFbrk	= 0x80000000,	/* forced break before this item */
269 	IFbrksp	= 0x40000000,	/* add 1 line space to break (IFbrk set too) */
270 	IFnobrk	= 0x20000000,	/* break not allowed before this item */
271 	IFcleft	= 0x10000000,	/* clear left floats (IFbrk set too) */
272 	IFcright= 0x08000000,	/* clear right floats (IFbrk set too) */
273 	IFwrap	= 0x04000000,	/* in a wrapping (non-pre) line */
274 	IFhang	= 0x02000000,	/* in a hanging (into left indent) item */
275 	IFrjust	= 0x01000000,	/* right justify current line */
276 	IFcjust	= 0x00800000,	/* center justify current line */
277 	IFsmap	= 0x00400000,	/* image is server-side map */
278 	IFindentshift	= 8,
279 	IFindentmask	= (255<<IFindentshift),	/* current indent, in tab stops */
280 	IFhangmask	= 255	/* current hang into left indent, in 1/10th tabstops */
281 };
282 
283 /* Bias added to Itext's voff field */
284 enum { Voffbias = 128 };
285 
286 /* Spacer kinds */
287 enum {
288 	ISPnull,	/* 0 height and width */
289 	ISPvline,	/* height and ascent of current font */
290 	ISPhspace,	/* width of space in current font */
291 	ISPgeneral	/* other purposes (e.g., between markers and list) */
292 };
293 
294 /* Generic attributes and events (not many elements will have any of these set) */
295 struct Genattr
296 {
297 	Rune*	id;
298 	Rune*	class;
299 	Rune*	style;
300 	Rune*	title;
301 	SEvent*	events;
302 };
303 
304 struct SEvent
305 {
306 	SEvent*	next;		/* in list of events */
307 	int	type;		/* SEonblur, etc. */
308 	Rune*	script;
309 };
310 
311 enum {
312 	SEonblur, SEonchange, SEonclick, SEondblclick,
313 	SEonfocus, SEonkeypress, SEonkeyup, SEonload,
314 	SEonmousedown, SEonmousemove, SEonmouseout,
315 	SEonmouseover, SEonmouseup, SEonreset, SEonselect,
316 	SEonsubmit, SEonunload,
317 	Numscriptev
318 };
319 
320 /* Form field types */
321 enum {
322 	Ftext,
323 	Fpassword,
324 	Fcheckbox,
325 	Fradio,
326 	Fsubmit,
327 	Fhidden,
328 	Fimage,
329 	Freset,
330 	Ffile,
331 	Fbutton,
332 	Fselect,
333 	Ftextarea
334 };
335 
336 /* Information about a field in a form */
337 struct Formfield
338 {
339 	Formfield*next;		/* in list of fields for a form */
340 	int	ftype;		/* Ftext, Fpassword, etc. */
341 	int	fieldid;	/* serial no. of field within its form */
342 	Form*	form;		/* containing form */
343 	Rune*	name;		/* name attr */
344 	Rune*	value;		/* value attr */
345 	int	size;		/* size attr */
346 	int	maxlength;	/* maxlength attr */
347 	int	rows;		/* rows attr */
348 	int	cols;		/* cols attr */
349 	uchar	flags;		/* FFchecked, etc. */
350 	Option*	options;	/* for Fselect fields */
351 	Item*	image;		/* image item, for Fimage fields */
352 	int	ctlid;		/* identifies control for this field in layout */
353 	SEvent*	events;		/* same as genattr->events of containing item */
354 };
355 
356 enum {
357 	FFchecked =	(1<<7),
358 	FFmultiple =	(1<<6)
359 };
360 
361 /* Option holds info about an option in a "select" form field */
362 struct Option
363 {
364 	Option*	next;		/* next in list of options for a field */
365 	int	selected;	/* true if selected initially */
366 	Rune*	value;		/* value attr */
367 	Rune*	display;	/* display string */
368 };
369 
370 /* Form holds info about a form */
371 struct Form
372 {
373 	Form*	next;		/* in list of forms for document */
374 	int	formid;		/* serial no. of form within its doc */
375 	Rune*	name;	/* name or id attr (netscape uses name, HTML 4.0 uses id) */
376 	Rune*	action;		/* action attr */
377 	int	target;		/* target attr as targetid */
378 	int	method;		/* HGet or HPost */
379 	int	nfields;	/* number of fields */
380 	Formfield*fields;	/* field's forms, in input order */
381 };
382 
383 /* Flags used in various table structures */
384 enum {
385 	TFparsing =	(1<<7),
386 	TFnowrap =	(1<<6),
387 	TFisth =	(1<<5)
388 };
389 
390 
391 /* Information about a table */
392 struct Table
393 {
394 	Table*	next;		/* next in list of document's tables */
395 	int	tableid;	/* serial no. of table within its doc */
396 	Tablerow*rows;		/* array of row specs (list during parsing) */
397 	int	nrow;		/* total number of rows */
398 	Tablecol*cols;		/* array of column specs */
399 	int	ncol;		/* total number of columns */
400 	Tablecell*cells;	/* list of unique cells */
401 	int	ncell;		/* total number of cells */
402 	Tablecell***grid;	/* 2-D array of cells */
403 	Align	align;		/* alignment spec for whole table */
404 	Dimen	width;		/* width spec for whole table */
405 	int	border;		/* border attr */
406 	int	cellspacing;	/* cellspacing attr */
407 	int	cellpadding;	/* cellpadding attr */
408 	Background background;	/* table background */
409 	Item*	caption;	/* linked list of Items, giving caption */
410 	uchar	caption_place;	/* ALtop or ALbottom */
411 	Lay*	caption_lay;	/* layout of caption */
412 	int	totw;		/* total width */
413 	int	toth;		/* total height */
414 	int	caph;		/* caption height */
415 	int	availw;		/* used for previous 3 sizes */
416 	Token*	tabletok;	/* token that started the table */
417 	uchar	flags;		/* Lchanged, perhaps */
418 };
419 
420 
421 struct Tablecol
422 {
423 	int	width;
424 	Align	align;
425 	Point	pos;
426 };
427 
428 
429 struct Tablerow
430 {
431 	Tablerow*next;		/* Next in list of rows, during parsing */
432 	Tablecell*cells;	/* Cells in row, linked through nextinrow */
433 	int	height;
434 	int	ascent;
435 	Align	align;
436 	Background background;
437 	Point	pos;
438 	uchar	flags;		/* 0 or TFparsing */
439 };
440 
441 /*
442  * A Tablecell is one cell of a table.
443  * It may span multiple rows and multiple columns.
444  * Cells are linked on two lists: the list for all the cells of
445  * a document (the next pointers), and the list of all the
446  * cells that start in a given row (the nextinrow pointers)
447  */
448 struct Tablecell
449 {
450 	Tablecell*next;		/* next in list of table's cells */
451 	Tablecell*nextinrow;	/* next in list of row's cells */
452 	int	cellid;		/* serial no. of cell within table */
453 	Item*	content;	/* contents before layout */
454 	Lay*	lay;		/* layout of cell */
455 	int	rowspan;	/* number of rows spanned by this cell */
456 	int	colspan;	/* number of cols spanned by this cell */
457 	Align	align;		/* alignment spec */
458 	uchar	flags;		/* TFparsing, TFnowrap, TFisth */
459 	Dimen	wspec;		/* suggested width */
460 	int	hspec;		/* suggested height */
461 	Background background;	/* cell background */
462 	int	minw;		/* minimum possible width */
463 	int	maxw;		/* maximum width */
464 	int	ascent;		/* cell's ascent */
465 	int	row;		/* row of upper left corner */
466 	int	col;		/* col of upper left corner */
467 	Point	pos;		/* nw corner of cell contents, in cell */
468 };
469 
470 /* Anchor is for info about hyperlinks that go somewhere */
471 struct Anchor
472 {
473 	Anchor*	next;		/* next in list of document's anchors */
474 	int	index;		/* serial no. of anchor within its doc */
475 	Rune*	name;		/* name attr */
476 	Rune*	href;		/* href attr */
477 	int	target;		/* target attr as targetid */
478 };
479 
480 
481 /* DestAnchor is for info about hyperlinks that are destinations */
482 struct DestAnchor
483 {
484 	DestAnchor*next;	/* next in list of document's destanchors */
485 	int	index;		/* serial no. of anchor within its doc */
486 	Rune*	name;		/* name attr */
487 	Item*	item;		/* the destination */
488 };
489 
490 
491 /* Maps (client side) */
492 struct Map
493 {
494 	Map*	next;		/* next in list of document's maps */
495 	Rune*	name;		/* map name */
496 	Area*	areas;		/* list of map areas */
497 };
498 
499 
500 struct Area
501 {
502 	Area*	next;		/* next in list of a map's areas */
503 	int	shape;		/* SHrect, etc. */
504 	Rune*	href;		/* associated hypertext link */
505 	int	target;		/* associated target frame */
506 	Dimen*	coords;		/* array of coords for shape */
507 	int	ncoords;	/* size of coords array */
508 };
509 
510 /* Area shapes */
511 enum {
512 	SHrect, SHcircle, SHpoly
513 };
514 
515 /* Fonts are represented by integers: style*NumSize + size */
516 
517 /* Font styles */
518 enum {
519 	FntR,		/* roman */
520 	FntI,		/* italic */
521 	FntB,		/* bold */
522 	FntT,		/* typewriter */
523 	NumStyle
524 };
525 
526 /* Font sizes */
527 enum {
528 	Tiny,
529 	Small,
530 	Normal,
531 	Large,
532 	Verylarge,
533 	NumSize
534 };
535 
536 enum {
537 	NumFnt = NumStyle*NumSize,
538 	DefFnt = FntR*NumSize+Normal,
539 };
540 
541 /* Lines are needed through some text items, for underlining or strikethrough */
542 enum {
543 	ULnone, ULunder, ULmid
544 };
545 
546 /* Kidinfo flags */
547 enum {
548 	FRnoresize =	(1<<0),
549 	FRnoscroll =	(1<<1),
550 	FRhscroll = 	(1<<2),
551 	FRvscroll =	(1<<3),
552 	FRhscrollauto = (1<<4),
553 	FRvscrollauto =	(1<<5)
554 };
555 
556 /* Information about child frame or frameset */
557 struct Kidinfo
558 {
559 	Kidinfo*next;		/* in list of kidinfos for a frameset */
560 	int	isframeset;
561 
562 	/* fields for "frame" */
563 	Rune*	src;		/* only nil if a "dummy" frame or this is frameset */
564 	Rune*	name;		/* always non-empty if this isn't frameset */
565 	int	marginw;
566 	int	marginh;
567 	int	framebd;
568 	int	flags;
569 
570 	/* fields for "frameset" */
571 	Dimen*	rows;		/* array of row dimensions */
572 	int	nrows;		/* length of rows */
573 	Dimen*	cols;		/* array of col dimensions */
574 	int	ncols;		/* length of cols */
575 	Kidinfo*kidinfos;
576 	Kidinfo*nextframeset;	/* parsing stack */
577 };
578 
579 
580 /* Document info (global information about HTML page) */
581 struct Docinfo
582 {
583 	/* stuff from HTTP headers, doc head, and body tag */
584 	Rune*	src;		/* original source of doc */
585 	Rune*	base;		/* base URL of doc */
586 	Rune*	doctitle;	/* from <title> element */
587 	Background background;	/* background specification */
588 	Iimage*	backgrounditem;	/* Image Item for doc background image, or nil */
589 	int	text;		/* doc foreground (text) color */
590 	int	link;		/* unvisited hyperlink color */
591 	int	vlink;		/* visited hyperlink color */
592 	int	alink;		/* highlighting hyperlink color */
593 	int	target;		/* target frame default */
594 	int	chset;		/* ISO_8859, etc. */
595 	int	mediatype;	/* TextHtml, etc. */
596 	int	scripttype;	/* TextJavascript, etc. */
597 	int	hasscripts;	/* true if scripts used */
598 	Rune*	refresh;	/* content of <http-equiv=Refresh ...> */
599 	Kidinfo*kidinfo;	/* if a frameset */
600 	int	frameid;	/* id of document frame */
601 
602 	/* info needed to respond to user actions */
603 	Anchor*	anchors;	/* list of href anchors */
604 	DestAnchor*dests;	/* list of destination anchors */
605 	Form*	forms;		/* list of forms */
606 	Table*	tables;		/* list of tables */
607 	Map*	maps;		/* list of maps */
608 	Iimage*	images;		/* list of image items (through nextimage links) */
609 };
610 
611 extern int	dimenkind(Dimen d);
612 extern int	dimenspec(Dimen d);
613 extern void	freedocinfo(Docinfo* d);
614 extern void	freeitems(Item* ithead);
615 extern Item*	parsehtml(uchar* data, int datalen, Rune* src, int mtype, int chset, Docinfo** pdi);
616 extern void	printitems(Item* items, char* msg);
617 extern int	targetid(Rune* s);
618 extern Rune*	targetname(int targid);
619 extern int	validitems(Item* i);
620 
621 #pragma varargck	type "I"	Item*
622 
623 /* Control print output */
624 extern int	warn;
625 extern int	dbglex;
626 extern int	dbgbuild;
627 
628 /*
629  * To be provided by caller
630  * emalloc and erealloc should not return if can't get memory.
631  * emalloc should zero its memory.
632  */
633 extern void*	emalloc(ulong);
634 extern void*	erealloc(void* p, ulong size);
635