xref: /plan9/sys/include/html.h (revision f9e1cf08d3be51592e03e639fc848a68dc31a55e)
1 #pragma lib "libhtml.a"
2 #pragma src "/sys/src/libhtml"
3 
4 /* UTILS */
5 extern uchar*	fromStr(Rune* buf, int n, int chset);
6 extern Rune*	toStr(uchar* buf, int n, int chset);
7 
8 /* Common LEX and BUILD enums */
9 
10 /* Media types */
11 enum
12 {
13 	ApplMsword,
14 	ApplOctets,
15 	ApplPdf,
16 	ApplPostscript,
17 	ApplRtf,
18 	ApplFramemaker,
19 	ApplMsexcel,
20 	ApplMspowerpoint,
21 	UnknownType,
22 	Audio32kadpcm,
23 	AudioBasic,
24 	ImageCgm,
25 	ImageG3fax,
26 	ImageGif,
27 	ImageIef,
28 	ImageJpeg,
29 	ImagePng,
30 	ImageTiff,
31 	ImageXBit,
32 	ImageXBit2,
33 	ImageXBitmulti,
34 	ImageXXBitmap,
35 	ModelVrml,
36 	MultiDigest,
37 	MultiMixed,
38 	TextCss,
39 	TextEnriched,
40 	TextHtml,
41 	TextJavascript,
42 	TextPlain,
43 	TextRichtext,
44 	TextSgml,
45 	TextTabSeparatedValues,
46 	TextXml,
47 	VideoMpeg,
48 	VideoQuicktime,
49 	NMEDIATYPES
50 };
51 
52 /* HTTP methods */
53 enum
54 {
55 	HGet,
56 	HPost
57 };
58 
59 /* Charsets */
60 enum
61 {
62 	UnknownCharset,
63 	US_Ascii,
64 	ISO_8859_1,
65 	UTF_8,
66 	Unicode,
67 	NCHARSETS
68 };
69 
70 /* Frame Target IDs */
71 enum {
72 	FTtop,
73 	FTself,
74 	FTparent,
75 	FTblank
76 };
77 
78 /* LEX */
79 typedef struct Token Token;
80 typedef struct Attr Attr;
81 
82 #pragma incomplete Token
83 
84 /* BUILD */
85 
86 typedef struct Item Item;
87 typedef struct Itext Itext;
88 typedef struct Irule Irule;
89 typedef struct Iimage Iimage;
90 typedef struct Iformfield Iformfield;
91 typedef struct Itable Itable;
92 typedef struct Ifloat Ifloat;
93 typedef struct Ispacer Ispacer;
94 typedef struct Genattr Genattr;
95 typedef struct SEvent SEvent;
96 typedef struct Formfield Formfield;
97 typedef struct Option Option;
98 typedef struct Form Form;
99 typedef struct Table Table;
100 typedef struct Tablecol Tablecol;
101 typedef struct Tablerow Tablerow;
102 typedef struct Tablecell Tablecell;
103 typedef struct Align Align;
104 typedef struct Dimen Dimen;
105 typedef struct Anchor Anchor;
106 typedef struct DestAnchor DestAnchor;
107 typedef struct Map Map;
108 typedef struct Area Area;
109 typedef struct Background Background;
110 typedef struct Kidinfo Kidinfo;
111 typedef struct Docinfo Docinfo;
112 typedef struct Stack Stack;
113 typedef struct Pstate Pstate;
114 typedef struct ItemSource ItemSource;
115 typedef struct Lay Lay;		/* defined in Layout module */
116 
117 #pragma incomplete Lay
118 
119 
120 /* Alignment types */
121 enum {
122 	ALnone = 0, ALleft, ALcenter, ALright, ALjustify,
123 	ALchar, ALtop, ALmiddle, ALbottom, ALbaseline,
124 };
125 
126 struct Align
127 {
128 	uchar	halign;		/* one of ALnone, ALleft, etc. */
129 	uchar	valign;		/* one of ALnone, ALtop, etc. */
130 };
131 
132 /*
133  * A Dimen holds a dimension specification, especially for those
134  * cases when a number can be followed by a % or a * to indicate
135  * percentage of total or relative weight.
136  * Dnone means no dimension was specified
137  */
138 
139 /* To fit in a word, use top bits to identify kind, rest for value */
140 enum {
141 	Dnone =		0,
142 	Dpixels =	(1<<29),
143 	Dpercent =	(2<<29),
144 	Drelative =	(3<<29),
145 	Dkindmask =	(3<<29),
146 	Dspecmask =	(~Dkindmask)
147 };
148 
149 struct Dimen
150 {
151 	int	kindspec;	/* kind | spec */
152 };
153 
154 /*
155  * Background is either an image or a color.
156  * If both are set, the image has precedence.
157  */
158 struct Background
159 {
160 	Rune*	image;		/* url */
161 	int	color;
162 };
163 
164 
165 /*
166  * There are about a half dozen Item variants.
167  * The all look like this at the start (using Plan 9 C's
168  * anonymous structure member mechanism),
169  * and then the tag field dictates what extra fields there are.
170  */
171 struct Item
172 {
173 	Item*	next;		/* successor in list of items */
174 	int	width;		/* width in pixels (0 for floating items) */
175 	int	height;		/* height in pixels */
176 	int	ascent;		/* ascent (from top to baseline) in pixels */
177 	int	anchorid;	/* if nonzero, which anchor we're in */
178 	int	state;		/* flags and values (see below) */
179 	Genattr*genattr;	/* generic attributes and events */
180 	int	tag;		/* variant discriminator: Itexttag, etc. */
181 };
182 
183 /* Item variant tags */
184 enum {
185 	Itexttag,
186 	Iruletag,
187 	Iimagetag,
188 	Iformfieldtag,
189 	Itabletag,
190 	Ifloattag,
191 	Ispacertag
192 };
193 
194 struct Itext
195 {
196 	Item;			/* (with tag ==Itexttag) */
197 	Rune*	s;		/* the characters */
198 	int	fnt;		/* style*NumSize+size (see font stuff, below) */
199 	int	fg;		/* Pixel (color) for text */
200 	uchar	voff; /* Voffbias+vertical offset from baseline, in pixels (+ve == down) */
201 	uchar	ul;		/* ULnone, ULunder, or ULmid */
202 };
203 
204 struct Irule
205 {
206 	Item;			/* (with tag ==Iruletag) */
207 	uchar	align;		/* alignment spec */
208 	uchar	noshade;	/* if true, don't shade */
209 	int	size;		/* size attr (rule height) */
210 	Dimen	wspec;		/* width spec */
211 };
212 
213 
214 struct Iimage
215 {
216 	Item;			/* (with tag ==Iimagetag) */
217 	Rune*	imsrc;	/* image src url */
218 	int	imwidth;	/* spec width (actual, if no spec) */
219 	int	imheight;	/* spec height (actual, if no spec) */
220 	Rune*	altrep;		/* alternate representation, in absence of image */
221 	Map*	map;		/* if non-nil, client side map */
222 	int	ctlid;		/* if animated */
223 	uchar	align;		/* vertical alignment */
224 	uchar	hspace;		/* in pixels; buffer space on each side */
225 	uchar	vspace;		/* in pixels; buffer space on top and bottom */
226 	uchar	border;		/* in pixels: border width to draw around image */
227 	Iimage*	nextimage;	/* next in list of document's images */
228 	void*	aux;
229 };
230 
231 
232 struct Iformfield
233 {
234 	Item;			/* (with tag ==Iformfieldtag) */
235 	Formfield*formfield;
236 	void*	aux;
237 };
238 
239 
240 struct Itable
241 {
242 	Item;			/* (with tag ==Itabletag) */
243 	Table*	table;
244 };
245 
246 
247 struct Ifloat
248 {
249 	Item;			/* (with tag ==Ifloattag) */
250 	Item*	item;		/* table or image item that floats */
251 	int	x;		/* x coord of top (from right, if ALright) */
252 	int	y;		/* y coord of top */
253 	uchar	side;		/* margin it floats to: ALleft or ALright */
254 	uchar	infloats;	/* true if this has been added to a lay.floats */
255 	Ifloat*	nextfloat;	/* in list of floats */
256 };
257 
258 
259 struct Ispacer
260 {
261 	Item;			/* (with tag ==Ispacertag) */
262 	int	spkind;		/* ISPnull, etc. */
263 };
264 
265 /* Item state flags and value fields */
266 enum {
267 	IFbrk	= 0x80000000,	/* forced break before this item */
268 	IFbrksp	= 0x40000000,	/* add 1 line space to break (IFbrk set too) */
269 	IFnobrk	= 0x20000000,	/* break not allowed before this item */
270 	IFcleft	= 0x10000000,	/* clear left floats (IFbrk set too) */
271 	IFcright= 0x08000000,	/* clear right floats (IFbrk set too) */
272 	IFwrap	= 0x04000000,	/* in a wrapping (non-pre) line */
273 	IFhang	= 0x02000000,	/* in a hanging (into left indent) item */
274 	IFrjust	= 0x01000000,	/* right justify current line */
275 	IFcjust	= 0x00800000,	/* center justify current line */
276 	IFsmap	= 0x00400000,	/* image is server-side map */
277 	IFindentshift	= 8,
278 	IFindentmask	= (255<<IFindentshift),	/* current indent, in tab stops */
279 	IFhangmask	= 255	/* current hang into left indent, in 1/10th tabstops */
280 };
281 
282 /* Bias added to Itext's voff field */
283 enum { Voffbias = 128 };
284 
285 /* Spacer kinds */
286 enum {
287 	ISPnull,	/* 0 height and width */
288 	ISPvline,	/* height and ascent of current font */
289 	ISPhspace,	/* width of space in current font */
290 	ISPgeneral	/* other purposes (e.g., between markers and list) */
291 };
292 
293 /* Generic attributes and events (not many elements will have any of these set) */
294 struct Genattr
295 {
296 	Rune*	id;
297 	Rune*	class;
298 	Rune*	style;
299 	Rune*	title;
300 	SEvent*	events;
301 };
302 
303 struct SEvent
304 {
305 	SEvent*	next;		/* in list of events */
306 	int	type;		/* SEonblur, etc. */
307 	Rune*	script;
308 };
309 
310 enum {
311 	SEonblur, SEonchange, SEonclick, SEondblclick,
312 	SEonfocus, SEonkeypress, SEonkeyup, SEonload,
313 	SEonmousedown, SEonmousemove, SEonmouseout,
314 	SEonmouseover, SEonmouseup, SEonreset, SEonselect,
315 	SEonsubmit, SEonunload,
316 	Numscriptev
317 };
318 
319 /* Form field types */
320 enum {
321 	Ftext,
322 	Fpassword,
323 	Fcheckbox,
324 	Fradio,
325 	Fsubmit,
326 	Fhidden,
327 	Fimage,
328 	Freset,
329 	Ffile,
330 	Fbutton,
331 	Fselect,
332 	Ftextarea
333 };
334 
335 /* Information about a field in a form */
336 struct Formfield
337 {
338 	Formfield*next;		/* in list of fields for a form */
339 	int	ftype;		/* Ftext, Fpassword, etc. */
340 	int	fieldid;	/* serial no. of field within its form */
341 	Form*	form;		/* containing form */
342 	Rune*	name;		/* name attr */
343 	Rune*	value;		/* value attr */
344 	int	size;		/* size attr */
345 	int	maxlength;	/* maxlength attr */
346 	int	rows;		/* rows attr */
347 	int	cols;		/* cols attr */
348 	uchar	flags;		/* FFchecked, etc. */
349 	Option*	options;	/* for Fselect fields */
350 	Item*	image;		/* image item, for Fimage fields */
351 	int	ctlid;		/* identifies control for this field in layout */
352 	SEvent*	events;		/* same as genattr->events of containing item */
353 };
354 
355 enum {
356 	FFchecked =	(1<<7),
357 	FFmultiple =	(1<<6)
358 };
359 
360 /* Option holds info about an option in a "select" form field */
361 struct Option
362 {
363 	Option*	next;		/* next in list of options for a field */
364 	int	selected;	/* true if selected initially */
365 	Rune*	value;		/* value attr */
366 	Rune*	display;	/* display string */
367 };
368 
369 /* Form holds info about a form */
370 struct Form
371 {
372 	Form*	next;		/* in list of forms for document */
373 	int	formid;		/* serial no. of form within its doc */
374 	Rune*	name;	/* name or id attr (netscape uses name, HTML 4.0 uses id) */
375 	Rune*	action;		/* action attr */
376 	int	target;		/* target attr as targetid */
377 	int	method;		/* HGet or HPost */
378 	int	nfields;	/* number of fields */
379 	Formfield*fields;	/* field's forms, in input order */
380 };
381 
382 /* Flags used in various table structures */
383 enum {
384 	TFparsing =	(1<<7),
385 	TFnowrap =	(1<<6),
386 	TFisth =	(1<<5)
387 };
388 
389 
390 /* Information about a table */
391 struct Table
392 {
393 	Table*	next;		/* next in list of document's tables */
394 	int	tableid;	/* serial no. of table within its doc */
395 	Tablerow*rows;		/* array of row specs (list during parsing) */
396 	int	nrow;		/* total number of rows */
397 	Tablecol*cols;		/* array of column specs */
398 	int	ncol;		/* total number of columns */
399 	Tablecell*cells;	/* list of unique cells */
400 	int	ncell;		/* total number of cells */
401 	Tablecell***grid;	/* 2-D array of cells */
402 	Align	align;		/* alignment spec for whole table */
403 	Dimen	width;		/* width spec for whole table */
404 	int	border;		/* border attr */
405 	int	cellspacing;	/* cellspacing attr */
406 	int	cellpadding;	/* cellpadding attr */
407 	Background background;	/* table background */
408 	Item*	caption;	/* linked list of Items, giving caption */
409 	uchar	caption_place;	/* ALtop or ALbottom */
410 	Lay*	caption_lay;	/* layout of caption */
411 	int	totw;		/* total width */
412 	int	toth;		/* total height */
413 	int	caph;		/* caption height */
414 	int	availw;		/* used for previous 3 sizes */
415 	Token*	tabletok;	/* token that started the table */
416 	uchar	flags;		/* Lchanged, perhaps */
417 };
418 
419 
420 struct Tablecol
421 {
422 	int	width;
423 	Align	align;
424 	Point	pos;
425 };
426 
427 
428 struct Tablerow
429 {
430 	Tablerow*next;		/* Next in list of rows, during parsing */
431 	Tablecell*cells;	/* Cells in row, linked through nextinrow */
432 	int	height;
433 	int	ascent;
434 	Align	align;
435 	Background background;
436 	Point	pos;
437 	uchar	flags;		/* 0 or TFparsing */
438 };
439 
440 /*
441  * A Tablecell is one cell of a table.
442  * It may span multiple rows and multiple columns.
443  * Cells are linked on two lists: the list for all the cells of
444  * a document (the next pointers), and the list of all the
445  * cells that start in a given row (the nextinrow pointers)
446  */
447 struct Tablecell
448 {
449 	Tablecell*next;		/* next in list of table's cells */
450 	Tablecell*nextinrow;	/* next in list of row's cells */
451 	int	cellid;		/* serial no. of cell within table */
452 	Item*	content;	/* contents before layout */
453 	Lay*	lay;		/* layout of cell */
454 	int	rowspan;	/* number of rows spanned by this cell */
455 	int	colspan;	/* number of cols spanned by this cell */
456 	Align	align;		/* alignment spec */
457 	uchar	flags;		/* TFparsing, TFnowrap, TFisth */
458 	Dimen	wspec;		/* suggested width */
459 	int	hspec;		/* suggested height */
460 	Background background;	/* cell background */
461 	int	minw;		/* minimum possible width */
462 	int	maxw;		/* maximum width */
463 	int	ascent;		/* cell's ascent */
464 	int	row;		/* row of upper left corner */
465 	int	col;		/* col of upper left corner */
466 	Point	pos;		/* nw corner of cell contents, in cell */
467 };
468 
469 /* Anchor is for info about hyperlinks that go somewhere */
470 struct Anchor
471 {
472 	Anchor*	next;		/* next in list of document's anchors */
473 	int	index;		/* serial no. of anchor within its doc */
474 	Rune*	name;		/* name attr */
475 	Rune*	href;		/* href attr */
476 	int	target;		/* target attr as targetid */
477 };
478 
479 
480 /* DestAnchor is for info about hyperlinks that are destinations */
481 struct DestAnchor
482 {
483 	DestAnchor*next;	/* next in list of document's destanchors */
484 	int	index;		/* serial no. of anchor within its doc */
485 	Rune*	name;		/* name attr */
486 	Item*	item;		/* the destination */
487 };
488 
489 
490 /* Maps (client side) */
491 struct Map
492 {
493 	Map*	next;		/* next in list of document's maps */
494 	Rune*	name;		/* map name */
495 	Area*	areas;		/* list of map areas */
496 };
497 
498 
499 struct Area
500 {
501 	Area*	next;		/* next in list of a map's areas */
502 	int	shape;		/* SHrect, etc. */
503 	Rune*	href;		/* associated hypertext link */
504 	int	target;		/* associated target frame */
505 	Dimen*	coords;		/* array of coords for shape */
506 	int	ncoords;	/* size of coords array */
507 };
508 
509 /* Area shapes */
510 enum {
511 	SHrect, SHcircle, SHpoly
512 };
513 
514 /* Fonts are represented by integers: style*NumSize + size */
515 
516 /* Font styles */
517 enum {
518 	FntR,		/* roman */
519 	FntI,		/* italic */
520 	FntB,		/* bold */
521 	FntT,		/* typewriter */
522 	NumStyle
523 };
524 
525 /* Font sizes */
526 enum {
527 	Tiny,
528 	Small,
529 	Normal,
530 	Large,
531 	Verylarge,
532 	NumSize
533 };
534 
535 enum {
536 	NumFnt = NumStyle*NumSize,
537 	DefFnt = FntR*NumSize+Normal,
538 };
539 
540 /* Lines are needed through some text items, for underlining or strikethrough */
541 enum {
542 	ULnone, ULunder, ULmid
543 };
544 
545 /* Kidinfo flags */
546 enum {
547 	FRnoresize =	(1<<0),
548 	FRnoscroll =	(1<<1),
549 	FRhscroll = 	(1<<2),
550 	FRvscroll =	(1<<3),
551 	FRhscrollauto = (1<<4),
552 	FRvscrollauto =	(1<<5)
553 };
554 
555 /* Information about child frame or frameset */
556 struct Kidinfo
557 {
558 	Kidinfo*next;		/* in list of kidinfos for a frameset */
559 	int	isframeset;
560 
561 	/* fields for "frame" */
562 	Rune*	src;		/* only nil if a "dummy" frame or this is frameset */
563 	Rune*	name;		/* always non-empty if this isn't frameset */
564 	int	marginw;
565 	int	marginh;
566 	int	framebd;
567 	int	flags;
568 
569 	/* fields for "frameset" */
570 	Dimen*	rows;		/* array of row dimensions */
571 	int	nrows;		/* length of rows */
572 	Dimen*	cols;		/* array of col dimensions */
573 	int	ncols;		/* length of cols */
574 	Kidinfo*kidinfos;
575 	Kidinfo*nextframeset;	/* parsing stack */
576 };
577 
578 
579 /* Document info (global information about HTML page) */
580 struct Docinfo
581 {
582 	/* stuff from HTTP headers, doc head, and body tag */
583 	Rune*	src;		/* original source of doc */
584 	Rune*	base;		/* base URL of doc */
585 	Rune*	doctitle;	/* from <title> element */
586 	Background background;	/* background specification */
587 	Iimage*	backgrounditem;	/* Image Item for doc background image, or nil */
588 	int	text;		/* doc foreground (text) color */
589 	int	link;		/* unvisited hyperlink color */
590 	int	vlink;		/* visited hyperlink color */
591 	int	alink;		/* highlighting hyperlink color */
592 	int	target;		/* target frame default */
593 	int	chset;		/* ISO_8859, etc. */
594 	int	mediatype;	/* TextHtml, etc. */
595 	int	scripttype;	/* TextJavascript, etc. */
596 	int	hasscripts;	/* true if scripts used */
597 	Rune*	refresh;	/* content of <http-equiv=Refresh ...> */
598 	Kidinfo*kidinfo;	/* if a frameset */
599 	int	frameid;	/* id of document frame */
600 
601 	/* info needed to respond to user actions */
602 	Anchor*	anchors;	/* list of href anchors */
603 	DestAnchor*dests;	/* list of destination anchors */
604 	Form*	forms;		/* list of forms */
605 	Table*	tables;		/* list of tables */
606 	Map*	maps;		/* list of maps */
607 	Iimage*	images;		/* list of image items (through nextimage links) */
608 };
609 
610 extern int	dimenkind(Dimen d);
611 extern int	dimenspec(Dimen d);
612 extern void	freedocinfo(Docinfo* d);
613 extern void	freeitems(Item* ithead);
614 extern Item*	parsehtml(uchar* data, int datalen, Rune* src, int mtype, int chset, Docinfo** pdi);
615 extern void	printitems(Item* items, char* msg);
616 extern int	targetid(Rune* s);
617 extern Rune*	targetname(int targid);
618 extern int	validitems(Item* i);
619 
620 #pragma varargck	type "I"	Item*
621 
622 /* Control print output */
623 extern int	warn;
624 extern int	dbglex;
625 extern int	dbgbuild;
626 
627 /*
628  * To be provided by caller
629  * emalloc and erealloc should not return if can't get memory.
630  * emalloc should zero its memory.
631  */
632 extern void*	emalloc(ulong);
633 extern void*	erealloc(void* p, ulong size);
634