xref: /plan9/sys/include/html.h (revision ec59a3ddbfceee0efe34584c2c9981a5e5ff1ec4)
1 #pragma lib "libhtml.a"
2 #pragma src "/sys/src/libhtml"
3 
4 // UTILS
5 extern uchar*	fromStr(Rune* buf, int n, int chset);
6 extern Rune*	toStr(uchar* buf, int n, int chset);
7 
8 // Common LEX and BUILD enums
9 
10 // Media types
11 enum
12 {
13 	ApplMsword,
14 	ApplOctets,
15 	ApplPdf,
16 	ApplPostscript,
17 	ApplRtf,
18 	ApplFramemaker,
19 	ApplMsexcel,
20 	ApplMspowerpoint,
21 	UnknownType,
22 	Audio32kadpcm,
23 	AudioBasic,
24 	ImageCgm,
25 	ImageG3fax,
26 	ImageGif,
27 	ImageIef,
28 	ImageJpeg,
29 	ImagePng,
30 	ImageTiff,
31 	ImageXBit,
32 	ImageXBit2,
33 	ImageXBitmulti,
34 	ImageXXBitmap,
35 	ModelVrml,
36 	MultiDigest,
37 	MultiMixed,
38 	TextCss,
39 	TextEnriched,
40 	TextHtml,
41 	TextJavascript,
42 	TextPlain,
43 	TextRichtext,
44 	TextSgml,
45 	TextTabSeparatedValues,
46 	TextXml,
47 	VideoMpeg,
48 	VideoQuicktime,
49 	NMEDIATYPES
50 };
51 
52 // HTTP methods
53 enum
54 {
55 	HGet,
56 	HPost
57 };
58 
59 // Charsets
60 enum
61 {
62 	UnknownCharset,
63 	US_Ascii,
64 	ISO_8859_1,
65 	UTF_8,
66 	Unicode,
67 	NCHARSETS
68 };
69 
70 // Frame Target IDs
71 enum {
72 	FTtop,
73 	FTself,
74 	FTparent,
75 	FTblank
76 };
77 
78 // LEX
79 typedef struct Token Token;
80 typedef struct Attr Attr;
81 
82 #pragma incomplete Token
83 
84 // BUILD
85 
86 typedef struct Item Item;
87 typedef struct Itext Itext;
88 typedef struct Irule Irule;
89 typedef struct Iimage Iimage;
90 typedef struct Iformfield Iformfield;
91 typedef struct Itable Itable;
92 typedef struct Ifloat Ifloat;
93 typedef struct Ispacer Ispacer;
94 typedef struct Genattr Genattr;
95 typedef struct SEvent SEvent;
96 typedef struct Formfield Formfield;
97 typedef struct Option Option;
98 typedef struct Form Form;
99 typedef struct Table Table;
100 typedef struct Tablecol Tablecol;
101 typedef struct Tablerow Tablerow;
102 typedef struct Tablecell Tablecell;
103 typedef struct Align Align;
104 typedef struct Dimen Dimen;
105 typedef struct Anchor Anchor;
106 typedef struct DestAnchor DestAnchor;
107 typedef struct Map Map;
108 typedef struct Area Area;
109 typedef struct Background Background;
110 typedef struct Kidinfo Kidinfo;
111 typedef struct Docinfo Docinfo;
112 typedef struct Stack Stack;
113 typedef struct Pstate Pstate;
114 typedef struct ItemSource ItemSource;
115 typedef struct Lay Lay;	// defined in Layout module
116 
117 #pragma incomplete Lay
118 
119 
120 // Alignment types
121 enum {
122 	ALnone = 0, ALleft, ALcenter, ALright, ALjustify,
123 	ALchar, ALtop, ALmiddle, ALbottom, ALbaseline
124 };
125 
126 struct Align
127 {
128 	uchar	halign;	// one of ALnone, ALleft, etc.
129 	uchar	valign;	// one of ALnone, ALtop, etc.
130 };
131 
132 // A Dimen holds a dimension specification, especially for those
133 // cases when a number can be followed by a % or a * to indicate
134 // percentage of total or relative weight.
135 // Dnone means no dimension was specified
136 
137 // To fit in a word, use top bits to identify kind, rest for value
138 enum {
139 	Dnone =		0,
140 	Dpixels =		(1<<29),
141 	Dpercent =	(2<<29),
142 	Drelative =	(3<<29),
143 	Dkindmask =	(3<<29),
144 	Dspecmask =	(~Dkindmask)
145 };
146 
147 struct Dimen
148 {
149 	int	kindspec;		// kind | spec
150 };
151 
152 // Background is either an image or a color.
153 // If both are set, the image has precedence.
154 struct Background
155 {
156 	Rune*	image;	// url
157 	int		color;
158 };
159 
160 
161 // There are about a half dozen Item variants.
162 // The all look like this at the start (using Plan 9 C's
163 // anonymous structure member mechanism),
164 // and then the tag field dictates what extra fields there are.
165 struct Item
166 {
167 	Item*	next;		// successor in list of items
168 	int		width;	// width in pixels (0 for floating items)
169 	int		height;	// height in pixels
170 	int		ascent;	// ascent (from top to baseline) in pixels
171 	int		anchorid;	// if nonzero, which anchor we're in
172 	int		state;	// flags and values (see below)
173 	Genattr*	genattr;	// generic attributes and events
174 	int		tag;		// variant discriminator: Itexttag, etc.
175 };
176 
177 // Item variant tags
178 enum {
179 	Itexttag,
180 	Iruletag,
181 	Iimagetag,
182 	Iformfieldtag,
183 	Itabletag,
184 	Ifloattag,
185 	Ispacertag
186 };
187 
188 struct Itext
189 {
190 	Item;				// (with tag ==Itexttag)
191 	Rune*	s;			// the characters
192 	int		fnt;			// style*NumSize+size (see font stuff, below)
193 	int		fg;			// Pixel (color) for text
194 	uchar	voff;			// Voffbias+vertical offset from baseline, in pixels (+ve == down)
195 	uchar	ul;			// ULnone, ULunder, or ULmid
196 };
197 
198 struct Irule
199 {
200 	Item;				// (with tag ==Iruletag)
201 	uchar	align;		// alignment spec
202 	uchar	noshade;		// if true, don't shade
203 	int		size;			// size attr (rule height)
204 	Dimen	wspec;		// width spec
205 };
206 
207 
208 struct Iimage
209 {
210 	Item;				// (with tag ==Iimagetag)
211 	Rune*	imsrc;		// image src url
212 	int		imwidth;		// spec width (actual, if no spec)
213 	int		imheight;		// spec height (actual, if no spec)
214 	Rune*	altrep;		// alternate representation, in absence of image
215 	Map*	map;			// if non-nil, client side map
216 	int		ctlid;			// if animated
217 	uchar	align;		// vertical alignment
218 	uchar	hspace;		// in pixels; buffer space on each side
219 	uchar	vspace;		// in pixels; buffer space on top and bottom
220 	uchar	border;		// in pixels: border width to draw around image
221 	Iimage*	nextimage;	// next in list of document's images
222 	void*	aux;
223 };
224 
225 
226 struct Iformfield
227 {
228 	Item;				// (with tag ==Iformfieldtag)
229 	Formfield*	formfield;
230 	void*	aux;
231 };
232 
233 
234 struct Itable
235 {
236 	Item;				// (with tag ==Itabletag)
237 	Table*	table;
238 };
239 
240 
241 struct Ifloat
242 {
243 	Item;				// (with tag ==Ifloattag)
244 	Item*	item;			// table or image item that floats
245 	int		x;			// x coord of top (from right, if ALright)
246 	int		y;			// y coord of top
247 	uchar	side;			// margin it floats to: ALleft or ALright
248 	uchar	infloats;		// true if this has been added to a lay.floats
249 	Ifloat*	nextfloat;		// in list of floats
250 };
251 
252 
253 struct Ispacer
254 {
255 	Item;				// (with tag ==Ispacertag)
256 	int		spkind;		// ISPnull, etc.
257 };
258 
259 // Item state flags and value fields
260 enum {
261 	IFbrk =			0x80000000,	// forced break before this item
262 	IFbrksp =			0x40000000,	// add 1 line space to break (IFbrk set too)
263 	IFnobrk =			0x20000000,	// break not allowed before this item
264 	IFcleft =			0x10000000,	// clear left floats (IFbrk set too)
265 	IFcright =			0x08000000,	// clear right floats (IFbrk set too)
266 	IFwrap =			0x04000000,	// in a wrapping (non-pre) line
267 	IFhang =			0x02000000,	// in a hanging (into left indent) item
268 	IFrjust =			0x01000000,	// right justify current line
269 	IFcjust =			0x00800000,	// center justify current line
270 	IFsmap =			0x00400000,	// image is server-side map
271 	IFindentshift =		8,
272 	IFindentmask =		(255<<IFindentshift),	// current indent, in tab stops
273 	IFhangmask =		255			// current hang into left indent, in 1/10th tabstops
274 };
275 
276 // Bias added to Itext's voff field
277 enum { Voffbias = 128 };
278 
279 // Spacer kinds
280 enum {
281 	ISPnull,			// 0 height and width
282 	ISPvline,			// height and ascent of current font
283 	ISPhspace,		// width of space in current font
284 	ISPgeneral		// other purposes (e.g., between markers and list)
285 };
286 
287 // Generic attributes and events (not many elements will have any of these set)
288 struct Genattr
289 {
290 	Rune*	id;
291 	Rune*	class;
292 	Rune*	style;
293 	Rune*	title;
294 	SEvent*	events;
295 };
296 
297 struct SEvent
298 {
299 	SEvent*	next;		// in list of events
300 	int		type;		// SEonblur, etc.
301 	Rune*	script;
302 };
303 
304 enum {
305 	SEonblur, SEonchange, SEonclick, SEondblclick,
306 	SEonfocus, SEonkeypress, SEonkeyup, SEonload,
307 	SEonmousedown, SEonmousemove, SEonmouseout,
308 	SEonmouseover, SEonmouseup, SEonreset, SEonselect,
309 	SEonsubmit, SEonunload,
310 	Numscriptev
311 };
312 
313 // Form field types
314 enum {
315 	Ftext,
316 	Fpassword,
317 	Fcheckbox,
318 	Fradio,
319 	Fsubmit,
320 	Fhidden,
321 	Fimage,
322 	Freset,
323 	Ffile,
324 	Fbutton,
325 	Fselect,
326 	Ftextarea
327 };
328 
329 // Information about a field in a form
330 struct Formfield
331 {
332 	Formfield*	next;		// in list of fields for a form
333 	int			ftype;	// Ftext, Fpassword, etc.
334 	int			fieldid;	// serial no. of field within its form
335 	Form*		form;	// containing form
336 	Rune*		name;	// name attr
337 	Rune*		value;	// value attr
338 	int			size;		// size attr
339 	int			maxlength;	// maxlength attr
340 	int			rows;	// rows attr
341 	int			cols;		// cols attr
342 	uchar		flags;	// FFchecked, etc.
343 	Option*		options;	// for Fselect fields
344 	Item*		image;	// image item, for Fimage fields
345 	int			ctlid;		// identifies control for this field in layout
346 	SEvent*		events;	// same as genattr->events of containing item
347 };
348 
349 enum {
350 	FFchecked =	(1<<7),
351 	FFmultiple =	(1<<6)
352 };
353 
354 // Option holds info about an option in a "select" form field
355 struct Option
356 {
357 	Option*	next;			// next in list of options for a field
358 	int		selected;		// true if selected initially
359 	Rune*	value;		// value attr
360 	Rune*	display;		// display string
361 };
362 
363 // Form holds info about a form
364 struct Form
365 {
366 	Form*		next;		// in list of forms for document
367 	int			formid;	// serial no. of form within its doc
368 	Rune*		name;	// name or id attr (netscape uses name, HTML 4.0 uses id)
369 	Rune*		action;	// action attr
370 	int			target;	// target attr as targetid
371 	int			method;	// HGet or HPost
372 	int			nfields;	// number of fields
373 	Formfield*	fields;	// field's forms, in input order
374 };
375 
376 // Flags used in various table structures
377 enum {
378 	TFparsing =	(1<<7),
379 	TFnowrap =	(1<<6),
380 	TFisth =		(1<<5)
381 };
382 
383 
384 // Information about a table
385 struct Table
386 {
387 	Table*		next;			// next in list of document's tables
388 	int			tableid;		// serial no. of table within its doc
389 	Tablerow*	rows;		// array of row specs (list during parsing)
390 	int			nrow;		// total number of rows
391 	Tablecol*		cols;			// array of column specs
392 	int			ncol;			// total number of columns
393 	Tablecell*		cells;			// list of unique cells
394 	int			ncell;		// total number of cells
395 	Tablecell***	grid;			// 2-D array of cells
396 	Align		align;		// alignment spec for whole table
397 	Dimen		width;		// width spec for whole table
398 	int			border;		// border attr
399 	int			cellspacing;	// cellspacing attr
400 	int			cellpadding;	// cellpadding attr
401 	Background	background;	// table background
402 	Item*		caption;		// linked list of Items, giving caption
403 	uchar		caption_place;	// ALtop or ALbottom
404 	Lay*			caption_lay;	// layout of caption
405 	int			totw;			// total width
406 	int			toth;			// total height
407 	int			caph;		// caption height
408 	int			availw;		// used for previous 3 sizes
409 	Token*		tabletok;		// token that started the table
410 	uchar		flags;		// Lchanged, perhaps
411 };
412 
413 
414 struct Tablecol
415 {
416 	int		width;
417 	Align	align;
418 	Point		pos;
419 };
420 
421 
422 struct Tablerow
423 {
424 	Tablerow*	next;			// Next in list of rows, during parsing
425 	Tablecell*		cells;			// Cells in row, linked through nextinrow
426 	int			height;
427 	int			ascent;
428 	Align		align;
429 	Background	background;
430 	Point			pos;
431 	uchar		flags;		// 0 or TFparsing
432 };
433 
434 
435 // A Tablecell is one cell of a table.
436 // It may span multiple rows and multiple columns.
437 // Cells are linked on two lists: the list for all the cells of
438 // a document (the next pointers), and the list of all the
439 // cells that start in a given row (the nextinrow pointers)
440 struct Tablecell
441 {
442 	Tablecell*		next;			// next in list of table's cells
443 	Tablecell*		nextinrow;	// next in list of row's cells
444 	int			cellid;		// serial no. of cell within table
445 	Item*		content;		// contents before layout
446 	Lay*			lay;			// layout of cell
447 	int			rowspan;		// number of rows spanned by this cell
448 	int			colspan;		// number of cols spanned by this cell
449 	Align		align;		// alignment spec
450 	uchar		flags;		// TFparsing, TFnowrap, TFisth
451 	Dimen		wspec;		// suggested width
452 	int			hspec;		// suggested height
453 	Background	background;	// cell background
454 	int			minw;		// minimum possible width
455 	int			maxw;		// maximum width
456 	int			ascent;		// cell's ascent
457 	int			row;			// row of upper left corner
458 	int			col;			// col of upper left corner
459 	Point			pos;			// nw corner of cell contents, in cell
460 };
461 
462 // Anchor is for info about hyperlinks that go somewhere
463 struct Anchor
464 {
465 	Anchor*		next;		// next in list of document's anchors
466 	int			index;	// serial no. of anchor within its doc
467 	Rune*		name;	// name attr
468 	Rune*		href;		// href attr
469 	int			target;	// target attr as targetid
470 };
471 
472 
473 // DestAnchor is for info about hyperlinks that are destinations
474 struct DestAnchor
475 {
476 	DestAnchor*	next;		// next in list of document's destanchors
477 	int			index;	// serial no. of anchor within its doc
478 	Rune*		name;	// name attr
479 	Item*		item;		// the destination
480 };
481 
482 
483 // Maps (client side)
484 struct Map
485 {
486 	Map*	next;			// next in list of document's maps
487 	Rune*	name;		// map name
488 	Area*	areas;		// list of map areas
489 };
490 
491 
492 struct Area
493 {
494 	Area*		next;		// next in list of a map's areas
495 	int			shape;	// SHrect, etc.
496 	Rune*		href;		// associated hypertext link
497 	int			target;	// associated target frame
498 	Dimen*		coords;	// array of coords for shape
499 	int			ncoords;	// size of coords array
500 };
501 
502 // Area shapes
503 enum {
504 	SHrect, SHcircle, SHpoly
505 };
506 
507 // Fonts are represented by integers: style*NumSize + size
508 
509 // Font styles
510 enum {
511 	FntR,			// roman
512 	FntI,			// italic
513 	FntB,			// bold
514 	FntT,			// typewriter
515 	NumStyle
516 };
517 
518 // Font sizes
519 enum {
520 	Tiny,
521 	Small,
522 	Normal,
523 	Large,
524 	Verylarge,
525 	NumSize
526 };
527 
528 enum {
529 	NumFnt = (NumStyle*NumSize),
530 	DefFnt = (FntR*NumSize+Normal)
531 };
532 
533 // Lines are needed through some text items, for underlining or strikethrough
534 enum {
535 	ULnone, ULunder, ULmid
536 };
537 
538 // Kidinfo flags
539 enum {
540 	FRnoresize =	(1<<0),
541 	FRnoscroll =	(1<<1),
542 	FRhscroll = 	(1<<2),
543 	FRvscroll =	(1<<3),
544 	FRhscrollauto = (1<<4),
545 	FRvscrollauto =	(1<<5)
546 };
547 
548 // Information about child frame or frameset
549 struct Kidinfo
550 {
551 	Kidinfo*		next;		// in list of kidinfos for a frameset
552 	int			isframeset;
553 
554 	// fields for "frame"
555 	Rune*		src;		// only nil if a "dummy" frame or this is frameset
556 	Rune*		name;	// always non-empty if this isn't frameset
557 	int			marginw;
558 	int			marginh;
559 	int			framebd;
560 	int			flags;
561 
562 	// fields for "frameset"
563 	Dimen*		rows;	// array of row dimensions
564 	int			nrows;	// length of rows
565 	Dimen*		cols;		// array of col dimensions
566 	int			ncols;	// length of cols
567 	Kidinfo*		kidinfos;
568 	Kidinfo*		nextframeset;	// parsing stack
569 };
570 
571 
572 // Document info (global information about HTML page)
573 struct Docinfo
574 {
575 	// stuff from HTTP headers, doc head, and body tag
576 	Rune*		src;				// original source of doc
577 	Rune*		base;			// base URL of doc
578 	Rune*		doctitle;			// from <title> element
579 	Background	background;		// background specification
580 	Iimage*		backgrounditem;	// Image Item for doc background image, or nil
581 	int			text;				// doc foreground (text) color
582 	int			link;				// unvisited hyperlink color
583 	int			vlink;			// visited hyperlink color
584 	int			alink;			// highlighting hyperlink color
585 	int			target;			// target frame default
586 	int			chset;			// ISO_8859, etc.
587 	int			mediatype;		// TextHtml, etc.
588 	int			scripttype;		// TextJavascript, etc.
589 	int			hasscripts;		// true if scripts used
590 	Rune*		refresh;			// content of <http-equiv=Refresh ...>
591 	Kidinfo*		kidinfo;			// if a frameset
592 	int			frameid;			// id of document frame
593 
594 	// info needed to respond to user actions
595 	Anchor*		anchors;			// list of href anchors
596 	DestAnchor*	dests;			// list of destination anchors
597 	Form*		forms;			// list of forms
598 	Table*		tables;			// list of tables
599 	Map*		maps;			// list of maps
600 	Iimage*		images;			// list of image items (through nextimage links)
601 };
602 
603 extern int			dimenkind(Dimen d);
604 extern int			dimenspec(Dimen d);
605 extern void		freedocinfo(Docinfo* d);
606 extern void		freeitems(Item* ithead);
607 extern Item*		parsehtml(uchar* data, int datalen, Rune* src, int mtype, int chset, Docinfo** pdi);
608 extern void		printitems(Item* items, char* msg);
609 extern int			targetid(Rune* s);
610 extern Rune*		targetname(int targid);
611 extern int			validitems(Item* i);
612 
613 #pragma varargck	type "I"	Item*
614 
615 // Control print output
616 extern int			warn;
617 extern int			dbglex;
618 extern int			dbgbuild;
619 
620 // To be provided by caller
621 // emalloc and erealloc should not return if can't get memory.
622 // emalloc should zero its memory.
623 extern void*	emalloc(ulong);
624 extern void*	erealloc(void* p, ulong size);
625