xref: /plan9-contrib/sys/src/libhtml/build.c (revision d46c239f8612929b7dbade67d0d071633df3a15d)
1 #include <u.h>
2 #include <libc.h>
3 #include <draw.h>
4 #include <ctype.h>
5 #include <html.h>
6 #include "impl.h"
7 
8 // A stack for holding integer values
9 enum {
10 	Nestmax = 40	// max nesting level of lists, font styles, etc.
11 };
12 
13 struct Stack {
14 	int		n;				// next available slot (top of stack is stack[n-1])
15 	int		slots[Nestmax];	// stack entries
16 };
17 
18 // Parsing state
19 struct Pstate
20 {
21 	Pstate*	next;			// in stack of Pstates
22 	int		skipping;		// true when we shouldn't add items
23 	int		skipwhite;		// true when we should strip leading space
24 	int		curfont;		// font index for current font
25 	int		curfg;		// current foreground color
26 	Background	curbg;	// current background
27 	int		curvoff;		// current baseline offset
28 	uchar	curul;		// current underline/strike state
29 	uchar	curjust;		// current justify state
30 	int		curanchor;	// current (href) anchor id (if in one), or 0
31 	int		curstate;		// current value of item state
32 	int		literal;		// current literal state
33 	int		inpar;		// true when in a paragraph-like construct
34 	int		adjsize;		// current font size adjustment
35 	Item*	items;		// dummy head of item list we're building
36 	Item*	lastit;		// tail of item list we're building
37 	Item*	prelastit;		// item before lastit
38 	Stack	fntstylestk;	// style stack
39 	Stack	fntsizestk;		// size stack
40 	Stack	fgstk;		// text color stack
41 	Stack	ulstk;		// underline stack
42 	Stack	voffstk;		// vertical offset stack
43 	Stack	listtypestk;	// list type stack
44 	Stack	listcntstk;		// list counter stack
45 	Stack	juststk;		// justification stack
46 	Stack	hangstk;		// hanging stack
47 };
48 
49 struct ItemSource
50 {
51 	Docinfo*		doc;
52 	Pstate*		psstk;
53 	int			nforms;
54 	int			ntables;
55 	int			nanchors;
56 	int			nframes;
57 	Form*		curform;
58 	Map*		curmap;
59 	Table*		tabstk;
60 	Kidinfo*		kidstk;
61 };
62 
63 // Some layout parameters
64 enum {
65 	FRKIDMARGIN = 6,	// default margin around kid frames
66 	IMGHSPACE = 0,	// default hspace for images (0 matches IE, Netscape)
67 	IMGVSPACE = 0,	// default vspace for images
68 	FLTIMGHSPACE = 2,	// default hspace for float images
69 	TABSP = 5,		// default cellspacing for tables
70 	TABPAD = 1,		// default cell padding for tables
71 	LISTTAB = 1,		// number of tabs to indent lists
72 	BQTAB = 1,		// number of tabs to indent blockquotes
73 	HRSZ = 2,			// thickness of horizontal rules
74 	SUBOFF = 4,		// vertical offset for subscripts
75 	SUPOFF = 6,		// vertical offset for superscripts
76 	NBSP = 160		// non-breaking space character
77 };
78 
79 // These tables must be sorted
80 static StringInt align_tab[] = {
81 	{L"baseline",	ALbaseline},
82 	{L"bottom",	ALbottom},
83 	{L"center",	ALcenter},
84 	{L"char",		ALchar},
85 	{L"justify",	ALjustify},
86 	{L"left",		ALleft},
87 	{L"middle",	ALmiddle},
88 	{L"right",		ALright},
89 	{L"top",		ALtop}
90 };
91 #define NALIGNTAB (sizeof(align_tab)/sizeof(StringInt))
92 
93 static StringInt input_tab[] = {
94 	{L"button",	Fbutton},
95 	{L"checkbox",	Fcheckbox},
96 	{L"file",		Ffile},
97 	{L"hidden",	Fhidden},
98 	{L"image",	Fimage},
99 	{L"password",	Fpassword},
100 	{L"radio",		Fradio},
101 	{L"reset",		Freset},
102 	{L"submit",	Fsubmit},
103 	{L"text",		Ftext}
104 };
105 #define NINPUTTAB (sizeof(input_tab)/sizeof(StringInt))
106 
107 static StringInt clear_tab[] = {
108 	{L"all",	IFcleft|IFcright},
109 	{L"left",	IFcleft},
110 	{L"right",	IFcright}
111 };
112 #define NCLEARTAB (sizeof(clear_tab)/sizeof(StringInt))
113 
114 static StringInt fscroll_tab[] = {
115 	{L"auto",	FRhscrollauto|FRvscrollauto},
116 	{L"no",	FRnoscroll},
117 	{L"yes",	FRhscroll|FRvscroll},
118 };
119 #define NFSCROLLTAB (sizeof(fscroll_tab)/sizeof(StringInt))
120 
121 static StringInt shape_tab[] = {
122 	{L"circ",		SHcircle},
123 	{L"circle",		SHcircle},
124 	{L"poly",		SHpoly},
125 	{L"polygon",	SHpoly},
126 	{L"rect",		SHrect},
127 	{L"rectangle",	SHrect}
128 };
129 #define NSHAPETAB (sizeof(shape_tab)/sizeof(StringInt))
130 
131 static StringInt method_tab[] = {
132 	{L"get",		HGet},
133 	{L"post",		HPost}
134 };
135 #define NMETHODTAB (sizeof(method_tab)/sizeof(StringInt))
136 
137 static Rune* roman[15]= {
138 	L"I", L"II", L"III", L"IV", L"V", L"VI", L"VII", L"VIII", L"IX", L"X",
139 	L"XI", L"XII", L"XIII", L"XIV", L"XV"
140 };
141 #define NROMAN 15
142 
143 // List number types
144 enum {
145 	LTdisc, LTsquare, LTcircle, LT1, LTa, LTA, LTi, LTI
146 };
147 
148 enum {
149 	SPBefore = 2,
150 	SPAfter = 4,
151 	BL = 1,
152 	BLBA = (BL|SPBefore|SPAfter)
153 };
154 
155 // blockbrk[tag] is break info for a block level element, or one
156 // of a few others that get the same treatment re ending open paragraphs
157 // and requiring a line break / vertical space before them.
158 // If we want a line of space before the given element, SPBefore is OR'd in.
159 // If we want a line of space after the given element, SPAfter is OR'd in.
160 
161 static uchar blockbrk[Numtags]= {
162 	[Taddress] BLBA, [Tblockquote] BLBA, [Tcenter] BL,
163 	[Tdir] BLBA, [Tdiv] BL, [Tdd] BL, [Tdl] BLBA,
164 	[Tdt] BL, [Tform] BLBA,
165 	// headings and tables get breaks added manually
166 	[Th1] BL, [Th2] BL, [Th3] BL,
167 	[Th4] BL, [Th5] BL, [Th6] BL,
168 	[Thr] BL, [Tisindex] BLBA, [Tli] BL, [Tmenu] BLBA,
169 	[Tol] BLBA, [Tp] BLBA, [Tpre] BLBA,
170 	[Tul] BLBA
171 };
172 
173 enum {
174 	AGEN = 1
175 };
176 
177 // attrinfo is information about attributes.
178 // The AGEN value means that the attribute is generic (applies to almost all elements)
179 static uchar attrinfo[Numattrs]= {
180 	[Aid] AGEN, [Aclass] AGEN, [Astyle] AGEN, [Atitle] AGEN,
181 	[Aonblur] AGEN, [Aonchange] AGEN, [Aonclick] AGEN,
182 	[Aondblclick] AGEN, [Aonfocus] AGEN, [Aonkeypress] AGEN,
183 	[Aonkeyup] AGEN, [Aonload] AGEN, [Aonmousedown] AGEN,
184 	[Aonmousemove] AGEN, [Aonmouseout] AGEN, [Aonmouseover] AGEN,
185 	[Aonmouseup] AGEN, [Aonreset] AGEN, [Aonselect] AGEN,
186 	[Aonsubmit] AGEN, [Aonunload] AGEN
187 };
188 
189 static uchar scriptev[Numattrs]= {
190 	[Aonblur] SEonblur, [Aonchange] SEonchange, [Aonclick] SEonclick,
191 	[Aondblclick] SEondblclick, [Aonfocus] SEonfocus, [Aonkeypress] SEonkeypress,
192 	[Aonkeyup] SEonkeyup, [Aonload] SEonload, [Aonmousedown] SEonmousedown,
193 	[Aonmousemove] SEonmousemove, [Aonmouseout] SEonmouseout, [Aonmouseover] SEonmouseover,
194 	[Aonmouseup] SEonmouseup, [Aonreset] SEonreset, [Aonselect] SEonselect,
195 	[Aonsubmit] SEonsubmit, [Aonunload] SEonunload
196 };
197 
198 // Color lookup table
199 static StringInt color_tab[] = {
200 	{L"aqua", 0x00FFFF},
201 	{L"black",  0x000000},
202 	{L"blue", 0x0000CC},
203 	{L"fuchsia", 0xFF00FF},
204 	{L"gray", 0x808080},
205 	{L"green", 0x008000},
206 	{L"lime", 0x00FF00},
207 	{L"maroon", 0x800000},
208 	{L"navy", 0x000080,},
209 	{L"olive", 0x808000},
210 	{L"purple", 0x800080},
211 	{L"red", 0xFF0000},
212 	{L"silver", 0xC0C0C0},
213 	{L"teal", 0x008080},
214 	{L"white", 0xFFFFFF},
215 	{L"yellow", 0xFFFF00}
216 };
217 #define NCOLORS (sizeof(color_tab)/sizeof(StringInt))
218 
219 static StringInt 		*targetmap;
220 static int			targetmapsize;
221 static int			ntargets;
222 
223 static int buildinited = 0;
224 
225 #define SMALLBUFSIZE 240
226 #define BIGBUFSIZE 2000
227 
228 int	dbgbuild = 0;
229 int	warn = 0;
230 
231 static Align		aalign(Token* tok);
232 static int			acolorval(Token* tok, int attid, int dflt);
233 static void			addbrk(Pstate* ps, int sp, int clr);
234 static void			additem(Pstate* ps, Item* it, Token* tok);
235 static void			addlinebrk(Pstate* ps, int clr);
236 static void			addnbsp(Pstate* ps);
237 static void			addtext(Pstate* ps, Rune* s);
238 static Dimen		adimen(Token* tok, int attid);
239 static int			aflagval(Token* tok, int attid);
240 static int			aintval(Token* tok, int attid, int dflt);
241 static Rune*		astrval(Token* tok, int attid, Rune* dflt);
242 static int			atabval(Token* tok, int attid, StringInt* tab, int ntab, int dflt);
243 static int			atargval(Token* tok, int dflt);
244 static int			auintval(Token* tok, int attid, int dflt);
245 static Rune*		aurlval(Token* tok, int attid, Rune* dflt, Rune* base);
246 static Rune*		aval(Token* tok, int attid);
247 static void			buildinit(void);
248 static Pstate*		cell_pstate(Pstate* oldps, int ishead);
249 static void			changehang(Pstate* ps, int delta);
250 static void			changeindent(Pstate* ps, int delta);
251 static int			color(Rune* s, int dflt);
252 static void			copystack(Stack* tostk, Stack* fromstk);
253 static int			dimprint(char* buf, int nbuf, Dimen d);
254 static Pstate*		finishcell(Table* curtab, Pstate* psstk);
255 static void			finish_table(Table* t);
256 static void			freeanchor(Anchor* a);
257 static void			freedestanchor(DestAnchor* da);
258 static void			freeform(Form* f);
259 static void			freeformfield(Formfield* ff);
260 static void			freeitem(Item* it);
261 static void			freepstate(Pstate* p);
262 static void			freepstatestack(Pstate* pshead);
263 static void			freescriptevents(SEvent* ehead);
264 static void			freetable(Table* t);
265 static Map*		getmap(Docinfo* di, Rune* name);
266 static Rune*		getpcdata(Token* toks, int tokslen, int* ptoki);
267 static Pstate*		lastps(Pstate* psl);
268 static Rune*		listmark(uchar ty, int n);
269 static int			listtyval(Token* tok, int dflt);
270 static Align		makealign(int halign, int valign);
271 static Background	makebackground(Rune* imgurl, int color);
272 static Dimen		makedimen(int kind, int spec);
273 static Anchor*		newanchor(int index, Rune* name, Rune* href, int target, Anchor* link);
274 static Area*		newarea(int shape, Rune* href, int target, Area* link);
275 static DestAnchor*	newdestanchor(int index, Rune* name, Item* item, DestAnchor* link);
276 static Docinfo*		newdocinfo(void);
277 static Genattr*		newgenattr(Rune* id, Rune* class, Rune* style, Rune* title, Attr* events);
278 static Form*		newform(int formid, Rune* name, Rune* action,
279 					int target, int method, Form* link);
280 static Formfield*	newformfield(int ftype, int fieldid, Form* form, Rune* name,
281 					Rune* value, int size, int maxlength, Formfield* link);
282 static Item*		newifloat(Item* it, int side);
283 static Item*		newiformfield(Formfield* ff);
284 static Item*		newiimage(Rune* src, Rune* altrep, int align, int width, int height,
285 					int hspace, int vspace, int border, int ismap, Map* map);
286 static Item*		newirule(int align, int size, int noshade, Dimen wspec);
287 static Item*		newispacer(int spkind);
288 static Item*		newitable(Table* t);
289 static ItemSource*	newitemsource(Docinfo* di);
290 static Item*		newitext(Rune* s, int fnt, int fg, int voff, int ul);
291 static Kidinfo*		newkidinfo(int isframeset, Kidinfo* link);
292 static Option*		newoption(int selected, Rune* value, Rune* display, Option* link);
293 static Pstate*		newpstate(Pstate* link);
294 static SEvent*		newscriptevent(int type, Rune* script, SEvent* link);
295 static Table*		newtable(int tableid, Align align, Dimen width, int border,
296 					int cellspacing, int cellpadding, Background bg, Token* tok, Table* link);
297 static Tablecell*	newtablecell(int cellid, int rowspan, int colspan, Align align, Dimen wspec,
298 					int hspec, Background bg, int flags, Tablecell* link);
299 static Tablerow*	newtablerow(Align align, Background bg, int flags, Tablerow* link);
300 static Dimen		parsedim(Rune* s, int ns);
301 static void			pop(Stack* stk);
302 static void			popfontsize(Pstate* ps);
303 static void			popfontstyle(Pstate* ps);
304 static void			popjust(Pstate* ps);
305 static int			popretnewtop(Stack* stk, int dflt);
306 static int			push(Stack* stk, int val);
307 static void			pushfontsize(Pstate* ps, int sz);
308 static void			pushfontstyle(Pstate* ps, int sty);
309 static void			pushjust(Pstate* ps, int j);
310 static Item*		textit(Pstate* ps, Rune* s);
311 static Rune*		removeallwhite(Rune* s);
312 static void			resetdocinfo(Docinfo* d);
313 static void			setcurfont(Pstate* ps);
314 static void			setcurjust(Pstate* ps);
315 static void			setdimarray(Token* tok, int attid, Dimen** pans, int* panslen);
316 static Rune*		stringalign(int a);
317 static void			targetmapinit(void);
318 static int			toint(Rune* s);
319 static int			top(Stack* stk, int dflt);
320 static void			trim_cell(Tablecell* c);
321 static int			validalign(Align a);
322 static int			validdimen(Dimen d);
323 static int			validformfield(Formfield* f);
324 static int			validhalign(int a);
325 static int			validptr(void* p);
326 static int			validStr(Rune* s);
327 static int			validtable(Table* t);
328 static int			validtablerow(Tablerow* r);
329 static int			validtablecol(Tablecol* c);
330 static int			validtablecell(Tablecell* c);
331 static int			validvalign(int a);
332 static int			Iconv(Fmt *f);
333 
334 static void
335 buildinit(void)
336 {
337 	fmtinstall('I', Iconv);
338 	targetmapinit();
339 	buildinited = 1;
340 }
341 
342 static ItemSource*
343 newitemsource(Docinfo* di)
344 {
345 	ItemSource*	is;
346 	Pstate*	ps;
347 
348 	ps = newpstate(nil);
349 	if(di->mediatype != TextHtml) {
350 		ps->curstate &= ~IFwrap;
351 		ps->literal = 1;
352 		pushfontstyle(ps, FntT);
353 	}
354 	is = (ItemSource*)emalloc(sizeof(ItemSource));
355 	is->doc = di;
356 	is->psstk = ps;
357 	is->nforms = 0;
358 	is->ntables = 0;
359 	is->nanchors = 0;
360 	is->nframes = 0;
361 	is->curform = nil;
362 	is->curmap = nil;
363 	is->tabstk = nil;
364 	is->kidstk = nil;
365 	return is;
366 }
367 
368 static Item *getitems(ItemSource* is, uchar* data, int datalen);
369 
370 // Parse an html document and create a list of layout items.
371 // Allocate and return document info in *pdi.
372 // When caller is done with the items, it should call
373 // freeitems on the returned result, and then
374 // freedocinfo(*pdi).
375 Item*
376 parsehtml(uchar* data, int datalen, Rune* pagesrc, int mtype, int chset, Docinfo** pdi)
377 {
378 	Item *it;
379 	Docinfo*	di;
380 	ItemSource*	is;
381 
382 	di = newdocinfo();
383 	di->src = _Strdup(pagesrc);
384 	di->base = _Strdup(pagesrc);
385 	di->mediatype = mtype;
386 	di->chset = chset;
387 	*pdi = di;
388 	is = newitemsource(di);
389 	it = getitems(is, data, datalen);
390 	freepstatestack(is->psstk);
391 	free(is);
392 	return it;
393 }
394 
395 // Get a group of tokens for lexer, parse them, and create
396 // a list of layout items.
397 // When caller is done with the items, it should call
398 // freeitems on the returned result.
399 static Item*
400 getitems(ItemSource* is, uchar* data, int datalen)
401 {
402 	int	i;
403 	int	j;
404 	int	nt;
405 	int	pt;
406 	int	doscripts;
407 	int	tokslen;
408 	int	toki;
409 	int	h;
410 	int	sz;
411 	int	method;
412 	int	n;
413 	int	nblank;
414 	int	norsz;
415 	int	bramt;
416 	int	sty;
417 	int	nosh;
418 	int	oldcuranchor;
419 	int	dfltbd;
420 	int	v;
421 	int	hang;
422 	int	isempty;
423 	int	tag;
424 	int	brksp;
425 	int	target;
426 	uchar	brk;
427 	uchar	flags;
428 	uchar	align;
429 	uchar	al;
430 	uchar	ty;
431 	uchar	ty2;
432 	Pstate*	ps;
433 	Pstate*	nextps;
434 	Pstate*	outerps;
435 	Table*	curtab;
436 	Token*	tok;
437 	Token*	toks;
438 	Docinfo*	di;
439 	Item*	ans;
440 	Item*	img;
441 	Item*	ffit;
442 	Item*	tabitem;
443 	Rune*	s;
444 	Rune*	t;
445 	Rune*	name;
446 	Rune*	enctype;
447 	Rune*	usemap;
448 	Rune*	prompt;
449 	Rune*	equiv;
450 	Rune*	val;
451 	Rune*	nsz;
452 	Rune*	script;
453 	Map*	map;
454 	Form*	frm;
455 	Iimage*	ii;
456 	Kidinfo*	kd;
457 	Kidinfo*	ks;
458 	Kidinfo*	pks;
459 	Dimen	wd;
460 	Option*	option;
461 	Table*	tab;
462 	Tablecell*	c;
463 	Tablerow*	tr;
464 	Formfield*	field;
465 	Formfield*	ff;
466 	Rune*	href;
467 	Rune*	src;
468 	Rune*	scriptsrc;
469 	Rune*	bgurl;
470 	Rune*	action;
471 	Background	bg;
472 
473 	if(!buildinited)
474 		buildinit();
475 	doscripts = 0;	// for now
476 	ps = is->psstk;
477 	curtab = is->tabstk;
478 	di = is->doc;
479 	toks = _gettoks(data, datalen, di->chset, di->mediatype, &tokslen);
480 	toki = 0;
481 	for(; toki < tokslen; toki++) {
482 		tok = &toks[toki];
483 		if(dbgbuild > 1)
484 			fprint(2, "build: curstate %ux, token %T\n", ps->curstate, tok);
485 		tag = tok->tag;
486 		brk = 0;
487 		brksp = 0;
488 		if(tag < Numtags) {
489 			brk = blockbrk[tag];
490 			if(brk&SPBefore)
491 				brksp = 1;
492 		}
493 		else if(tag < Numtags + RBRA) {
494 			brk = blockbrk[tag - RBRA];
495 			if(brk&SPAfter)
496 				brksp = 1;
497 		}
498 		if(brk) {
499 			addbrk(ps, brksp, 0);
500 			if(ps->inpar) {
501 				popjust(ps);
502 				ps->inpar = 0;
503 			}
504 		}
505 		// check common case first (Data), then switch statement on tag
506 		if(tag == Data) {
507 			// Lexing didn't pay attention to SGML record boundary rules:
508 			// \n after start tag or before end tag to be discarded.
509 			// (Lex has already discarded all \r's).
510 			// Some pages assume this doesn't happen in <PRE> text,
511 			// so we won't do it if literal is true.
512 			// BUG: won't discard \n before a start tag that begins
513 			// the next bufferful of tokens.
514 			s = tok->text;
515 			n = _Strlen(s);
516 			if(!ps->literal) {
517 				i = 0;
518 				j = n;
519 				if(toki > 0) {
520 					pt = toks[toki - 1].tag;
521 					// IE and Netscape both ignore this rule (contrary to spec)
522 					// if previous tag was img
523 					if(pt < Numtags && pt != Timg && j > 0 && s[0] == '\n')
524 						i++;
525 				}
526 				if(toki < tokslen - 1) {
527 					nt = toks[toki + 1].tag;
528 					if(nt >= RBRA && nt < Numtags + RBRA && j > i && s[j - 1] == '\n')
529 						j--;
530 				}
531 				if(i > 0 || j < n) {
532 					t = s;
533 					s = _Strsubstr(s, i, j);
534 					free(t);
535 					n = j-i;
536 				}
537 			}
538 			if(ps->skipwhite) {
539 				_trimwhite(s, n, &t, &nt);
540 				if(t == nil) {
541 					free(s);
542 					s = nil;
543 				}
544 				else if(t != s) {
545 					t = _Strndup(t, nt);
546 					free(s);
547 					s = t;
548 				}
549 				if(s != nil)
550 					ps->skipwhite = 0;
551 			}
552 			tok->text = nil;		// token doesn't own string anymore
553 			if(s != nil)
554 				addtext(ps, s);
555 		}
556 		else
557 			switch(tag) {
558 			// Some abbrevs used in following DTD comments
559 			// %text = 	#PCDATA
560 			//		| TT | I | B | U | STRIKE | BIG | SMALL | SUB | SUP
561 			//		| EM | STRONG | DFN | CODE | SAMP | KBD | VAR | CITE
562 			//		| A | IMG | APPLET | FONT | BASEFONT | BR | SCRIPT | MAP
563 			//		| INPUT | SELECT | TEXTAREA
564 			// %block = P | UL | OL | DIR | MENU | DL | PRE | DL | DIV | CENTER
565 			//		| BLOCKQUOTE | FORM | ISINDEX | HR | TABLE
566 			// %flow = (%text | %block)*
567 			// %body.content = (%heading | %text | %block | ADDRESS)*
568 
569 			// <!ELEMENT A - - (%text) -(A)>
570 			// Anchors are not supposed to be nested, but you sometimes see
571 			// href anchors inside destination anchors.
572 			case Ta:
573 				if(ps->curanchor != 0) {
574 					if(warn)
575 						fprint(2, "warning: nested <A> or missing </A>\n");
576 					ps->curanchor = 0;
577 				}
578 				name = aval(tok, Aname);
579 				href = aurlval(tok, Ahref, nil, di->base);
580 				// ignore rel, rev, and title attrs
581 				if(href != nil) {
582 					target = atargval(tok, di->target);
583 					di->anchors = newanchor(++is->nanchors, name, href, target, di->anchors);
584 					if(name != nil)
585 						name = _Strdup(name);	// for DestAnchor construction, below
586 					ps->curanchor = is->nanchors;
587 					ps->curfg = push(&ps->fgstk, di->link);
588 					ps->curul = push(&ps->ulstk, ULunder);
589 				}
590 				if(name != nil) {
591 					// add a null item to be destination
592 					additem(ps, newispacer(ISPnull), tok);
593 					di->dests = newdestanchor(++is->nanchors, name, ps->lastit, di->dests);
594 				}
595 				break;
596 
597 			case Ta+RBRA :
598 				if(ps->curanchor != 0) {
599 					ps->curfg = popretnewtop(&ps->fgstk, di->text);
600 					ps->curul = popretnewtop(&ps->ulstk, ULnone);
601 					ps->curanchor = 0;
602 				}
603 				break;
604 
605 			// <!ELEMENT APPLET - - (PARAM | %text)* >
606 			// We can't do applets, so ignore PARAMS, and let
607 			// the %text contents appear for the alternative rep
608 			case Tapplet:
609 			case Tapplet+RBRA:
610 				if(warn && tag == Tapplet)
611 					fprint(2, "warning: <APPLET> ignored\n");
612 				break;
613 
614 			// <!ELEMENT AREA - O EMPTY>
615 			case Tarea:
616 				map = di->maps;
617 				if(map == nil) {
618 					if(warn)
619 						fprint(2, "warning: <AREA> not inside <MAP>\n");
620 					continue;
621 				}
622 				map->areas = newarea(atabval(tok, Ashape, shape_tab, NSHAPETAB, SHrect),
623 					aurlval(tok, Ahref, nil, di->base),
624 					atargval(tok, di->target),
625 					map->areas);
626 				setdimarray(tok, Acoords, &map->areas->coords, &map->areas->ncoords);
627 				break;
628 
629 			// <!ELEMENT (B|STRONG) - - (%text)*>
630 			case Tb:
631 			case Tstrong:
632 				pushfontstyle(ps, FntB);
633 				break;
634 
635 			case Tb+RBRA:
636 			case Tcite+RBRA:
637 			case Tcode+RBRA:
638 			case Tdfn+RBRA:
639 			case Tem+RBRA:
640 			case Tkbd+RBRA:
641 			case Ti+RBRA:
642 			case Tsamp+RBRA:
643 			case Tstrong+RBRA:
644 			case Ttt+RBRA:
645 			case Tvar+RBRA :
646 			case Taddress+RBRA:
647 				popfontstyle(ps);
648 				break;
649 
650 			// <!ELEMENT BASE - O EMPTY>
651 			case Tbase:
652 				t = di->base;
653 				di->base = aurlval(tok, Ahref, di->base, di->base);
654 				if(t != nil)
655 					free(t);
656 				di->target = atargval(tok, di->target);
657 				break;
658 
659 			// <!ELEMENT BASEFONT - O EMPTY>
660 			case Tbasefont:
661 				ps->adjsize = aintval(tok, Asize, 3) - 3;
662 				break;
663 
664 			// <!ELEMENT (BIG|SMALL) - - (%text)*>
665 			case Tbig:
666 			case Tsmall:
667 				sz = ps->adjsize;
668 				if(tag == Tbig)
669 					sz += Large;
670 				else
671 					sz += Small;
672 				pushfontsize(ps, sz);
673 				break;
674 
675 			case Tbig+RBRA:
676 			case Tsmall+RBRA:
677 				popfontsize(ps);
678 				break;
679 
680 			// <!ELEMENT BLOCKQUOTE - - %body.content>
681 			case Tblockquote:
682 				changeindent(ps, BQTAB);
683 				break;
684 
685 			case Tblockquote+RBRA:
686 				changeindent(ps, -BQTAB);
687 				break;
688 
689 			// <!ELEMENT BODY O O %body.content>
690 			case Tbody:
691 				ps->skipping = 0;
692 				bg = makebackground(nil, acolorval(tok, Abgcolor, di->background.color));
693 				bgurl = aurlval(tok, Abackground, nil, di->base);
694 				if(bgurl != nil) {
695 					if(di->backgrounditem != nil)
696 						freeitem((Item*)di->backgrounditem);
697 						// really should remove old item from di->images list,
698 						// but there should only be one BODY element ...
699 					di->backgrounditem = (Iimage*)newiimage(bgurl, nil, ALnone, 0, 0, 0, 0, 0, 0, nil);
700 					di->backgrounditem->nextimage = di->images;
701 					di->images = di->backgrounditem;
702 				}
703 				ps->curbg = bg;
704 				di->background = bg;
705 				di->text = acolorval(tok, Atext, di->text);
706 				di->link = acolorval(tok, Alink, di->link);
707 				di->vlink = acolorval(tok, Avlink, di->vlink);
708 				di->alink = acolorval(tok, Aalink, di->alink);
709 				if(di->text != ps->curfg) {
710 					ps->curfg = di->text;
711 					ps->fgstk.n = 0;
712 				}
713 				break;
714 
715 			case Tbody+RBRA:
716 				// HTML spec says ignore things after </body>,
717 				// but IE and Netscape don't
718 				// ps.skipping = 1;
719 				break;
720 
721 			// <!ELEMENT BR - O EMPTY>
722 			case Tbr:
723 				addlinebrk(ps, atabval(tok, Aclear, clear_tab, NCLEARTAB, 0));
724 				break;
725 
726 			// <!ELEMENT CAPTION - - (%text;)*>
727 			case Tcaption:
728 				if(curtab == nil) {
729 					if(warn)
730 						fprint(2, "warning: <CAPTION> outside <TABLE>\n");
731 					continue;
732 				}
733 				if(curtab->caption != nil) {
734 					if(warn)
735 						fprint(2, "warning: more than one <CAPTION> in <TABLE>\n");
736 					continue;
737 				}
738 				ps = newpstate(ps);
739 				curtab->caption_place = atabval(tok, Aalign, align_tab, NALIGNTAB, ALtop);
740 				break;
741 
742 			case Tcaption+RBRA:
743 				nextps = ps->next;
744 				if(curtab == nil || nextps == nil) {
745 					if(warn)
746 						fprint(2, "warning: unexpected </CAPTION>\n");
747 					continue;
748 				}
749 				curtab->caption = ps->items->next;
750 				free(ps);
751 				ps = nextps;
752 				break;
753 
754 			case Tcenter:
755 			case Tdiv:
756 				if(tag == Tcenter)
757 					al = ALcenter;
758 				else
759 					al = atabval(tok, Aalign, align_tab, NALIGNTAB, ps->curjust);
760 				pushjust(ps, al);
761 				break;
762 
763 			case Tcenter+RBRA:
764 			case Tdiv+RBRA:
765 				popjust(ps);
766 				break;
767 
768 			// <!ELEMENT DD - O  %flow >
769 			case Tdd:
770 				if(ps->hangstk.n == 0) {
771 					if(warn)
772 						fprint(2, "warning: <DD> not inside <DL\n");
773 					continue;
774 				}
775 				h = top(&ps->hangstk, 0);
776 				if(h != 0)
777 					changehang(ps, -10*LISTTAB);
778 				else
779 					addbrk(ps, 0, 0);
780 				push(&ps->hangstk, 0);
781 				break;
782 
783 			//<!ELEMENT (DIR|MENU) - - (LI)+ -(%block) >
784 			//<!ELEMENT (OL|UL) - - (LI)+>
785 			case Tdir:
786 			case Tmenu:
787 			case Tol:
788 			case Tul:
789 				changeindent(ps, LISTTAB);
790 				push(&ps->listtypestk, listtyval(tok, (tag==Tol)? LT1 : LTdisc));
791 				push(&ps->listcntstk, aintval(tok, Astart, 1));
792 				break;
793 
794 			case Tdir+RBRA:
795 			case Tmenu+RBRA:
796 			case Tol+RBRA:
797 			case Tul+RBRA:
798 				if(ps->listtypestk.n == 0) {
799 					if(warn)
800 						fprint(2, "warning: %T ended no list\n", tok);
801 					continue;
802 				}
803 				addbrk(ps, 0, 0);
804 				pop(&ps->listtypestk);
805 				pop(&ps->listcntstk);
806 				changeindent(ps, -LISTTAB);
807 				break;
808 
809 			// <!ELEMENT DL - - (DT|DD)+ >
810 			case Tdl:
811 				changeindent(ps, LISTTAB);
812 				push(&ps->hangstk, 0);
813 				break;
814 
815 			case Tdl+RBRA:
816 				if(ps->hangstk.n == 0) {
817 					if(warn)
818 						fprint(2, "warning: unexpected </DL>\n");
819 					continue;
820 				}
821 				changeindent(ps, -LISTTAB);
822 				if(top(&ps->hangstk, 0) != 0)
823 					changehang(ps, -10*LISTTAB);
824 				pop(&ps->hangstk);
825 				break;
826 
827 			// <!ELEMENT DT - O (%text)* >
828 			case Tdt:
829 				if(ps->hangstk.n == 0) {
830 					if(warn)
831 						fprint(2, "warning: <DT> not inside <DL>\n");
832 					continue;
833 				}
834 				h = top(&ps->hangstk, 0);
835 				pop(&ps->hangstk);
836 				if(h != 0)
837 					changehang(ps, -10*LISTTAB);
838 				changehang(ps, 10*LISTTAB);
839 				push(&ps->hangstk, 1);
840 				break;
841 
842 			// <!ELEMENT FONT - - (%text)*>
843 			case Tfont:
844 				sz = top(&ps->fntsizestk, Normal);
845 				if(_tokaval(tok, Asize, &nsz, 0)) {
846 					if(_prefix(L"+", nsz))
847 						sz = Normal + _Strtol(nsz+1, nil, 10) + ps->adjsize;
848 					else if(_prefix(L"-", nsz))
849 						sz = Normal - _Strtol(nsz+1, nil, 10) + ps->adjsize;
850 					else if(nsz != nil)
851 						sz = Normal + (_Strtol(nsz, nil, 10) - 3);
852 				}
853 				ps->curfg = push(&ps->fgstk, acolorval(tok, Acolor, ps->curfg));
854 				pushfontsize(ps, sz);
855 				break;
856 
857 			case Tfont+RBRA:
858 				if(ps->fgstk.n == 0) {
859 					if(warn)
860 						fprint(2, "warning: unexpected </FONT>\n");
861 					continue;
862 				}
863 				ps->curfg = popretnewtop(&ps->fgstk, di->text);
864 				popfontsize(ps);
865 				break;
866 
867 			// <!ELEMENT FORM - - %body.content -(FORM) >
868 			case Tform:
869 				if(is->curform != nil) {
870 					if(warn)
871 						fprint(2, "warning: <FORM> nested inside another\n");
872 					continue;
873 				}
874 				action = aurlval(tok, Aaction, di->base, di->base);
875 				s = aval(tok, Aid);
876 				name = astrval(tok, Aname, s);
877 				if(s)
878 					free(s);
879 				target = atargval(tok, di->target);
880 				method = atabval(tok, Amethod, method_tab, NMETHODTAB, HGet);
881 				if(warn && _tokaval(tok, Aenctype, &enctype, 0) &&
882 						_Strcmp(enctype, L"application/x-www-form-urlencoded"))
883 					fprint(2, "form enctype %S not handled\n", enctype);
884 				frm = newform(++is->nforms, name, action, target, method, di->forms);
885 				di->forms = frm;
886 				is->curform = frm;
887 				break;
888 
889 			case Tform+RBRA:
890 				if(is->curform == nil) {
891 					if(warn)
892 						fprint(2, "warning: unexpected </FORM>\n");
893 					continue;
894 				}
895 				// put fields back in input order
896 				is->curform->fields = (Formfield*)_revlist((List*)is->curform->fields);
897 				is->curform = nil;
898 				break;
899 
900 			// <!ELEMENT FRAME - O EMPTY>
901 			case Tframe:
902 				ks = is->kidstk;
903 				if(ks == nil) {
904 					if(warn)
905 						fprint(2, "warning: <FRAME> not in <FRAMESET>\n");
906 					continue;
907 				}
908 				ks->kidinfos = kd = newkidinfo(0, ks->kidinfos);
909 				kd->src = aurlval(tok, Asrc, nil, di->base);
910 				kd->name = aval(tok, Aname);
911 				if(kd->name == nil) {
912 					s = _ltoStr(++is->nframes);
913 					kd->name = _Strdup2(L"_fr", s);
914 					free(s);
915 				}
916 				kd->marginw = auintval(tok, Amarginwidth, 0);
917 				kd->marginh = auintval(tok, Amarginheight, 0);
918 				kd->framebd = auintval(tok, Aframeborder, 1);
919 				kd->flags = atabval(tok, Ascrolling, fscroll_tab, NFSCROLLTAB, kd->flags);
920 				norsz = aflagval(tok, Anoresize);
921 				if(norsz)
922 					kd->flags |= FRnoresize;
923 				break;
924 
925 			// <!ELEMENT FRAMESET - - (FRAME|FRAMESET)+>
926 			case Tframeset:
927 				ks = newkidinfo(1, nil);
928 				pks = is->kidstk;
929 				if(pks == nil)
930 					di->kidinfo = ks;
931 				else  {
932 					ks->next = pks->kidinfos;
933 					pks->kidinfos = ks;
934 				}
935 				ks->nextframeset = pks;
936 				is->kidstk = ks;
937 				setdimarray(tok, Arows, &ks->rows, &ks->nrows);
938 				if(ks->nrows == 0) {
939 					ks->rows = (Dimen*)emalloc(sizeof(Dimen));
940 					ks->nrows = 1;
941 					ks->rows[0] = makedimen(Dpercent, 100);
942 				}
943 				setdimarray(tok, Acols, &ks->cols, &ks->ncols);
944 				if(ks->ncols == 0) {
945 					ks->cols = (Dimen*)emalloc(sizeof(Dimen));
946 					ks->ncols = 1;
947 					ks->cols[0] = makedimen(Dpercent, 100);
948 				}
949 				break;
950 
951 			case Tframeset+RBRA:
952 				if(is->kidstk == nil) {
953 					if(warn)
954 						fprint(2, "warning: unexpected </FRAMESET>\n");
955 					continue;
956 				}
957 				ks = is->kidstk;
958 				// put kids back in original order
959 				// and add blank frames to fill out cells
960 				n = ks->nrows*ks->ncols;
961 				nblank = n - _listlen((List*)ks->kidinfos);
962 				while(nblank-- > 0)
963 					ks->kidinfos = newkidinfo(0, ks->kidinfos);
964 				ks->kidinfos = (Kidinfo*)_revlist((List*)ks->kidinfos);
965 				is->kidstk = is->kidstk->nextframeset;
966 				if(is->kidstk == nil) {
967 					// end input
968 					ans = nil;
969 					goto return_ans;
970 				}
971 				break;
972 
973 			// <!ELEMENT H1 - - (%text;)*>, etc.
974 			case Th1:
975 			case Th2:
976 			case Th3:
977 			case Th4:
978 			case Th5:
979 			case Th6:
980 				bramt = 1;
981 				if(ps->items == ps->lastit)
982 					bramt = 0;
983 				addbrk(ps, bramt, IFcleft|IFcright);
984 				sz = Verylarge - (tag - Th1);
985 				if(sz < Tiny)
986 					sz = Tiny;
987 				pushfontsize(ps, sz);
988 				sty = top(&ps->fntstylestk, FntR);
989 				if(tag == Th1)
990 					sty = FntB;
991 				pushfontstyle(ps, sty);
992 				pushjust(ps, atabval(tok, Aalign, align_tab, NALIGNTAB, ps->curjust));
993 				ps->skipwhite = 1;
994 				break;
995 
996 			case Th1+RBRA:
997 			case Th2+RBRA:
998 			case Th3+RBRA:
999 			case Th4+RBRA:
1000 			case Th5+RBRA:
1001 			case Th6+RBRA:
1002 				addbrk(ps, 1, IFcleft|IFcright);
1003 				popfontsize(ps);
1004 				popfontstyle(ps);
1005 				popjust(ps);
1006 				break;
1007 
1008 			case Thead:
1009 				// HTML spec says ignore regular markup in head,
1010 				// but Netscape and IE don't
1011 				// ps.skipping = 1;
1012 				break;
1013 
1014 			case Thead+RBRA:
1015 				ps->skipping = 0;
1016 				break;
1017 
1018 			// <!ELEMENT HR - O EMPTY>
1019 			case Thr:
1020 				al = atabval(tok, Aalign, align_tab, NALIGNTAB, ALcenter);
1021 				sz = auintval(tok, Asize, HRSZ);
1022 				wd = adimen(tok, Awidth);
1023 				if(dimenkind(wd) == Dnone)
1024 					wd = makedimen(Dpercent, 100);
1025 				nosh = aflagval(tok, Anoshade);
1026 				additem(ps, newirule(al, sz, nosh, wd), tok);
1027 				addbrk(ps, 0, 0);
1028 				break;
1029 
1030 			case Ti:
1031 			case Tcite:
1032 			case Tdfn:
1033 			case Tem:
1034 			case Tvar:
1035 			case Taddress:
1036 				pushfontstyle(ps, FntI);
1037 				break;
1038 
1039 			// <!ELEMENT IMG - O EMPTY>
1040 			case Timg:
1041 				map = nil;
1042 				oldcuranchor = ps->curanchor;
1043 				if(_tokaval(tok, Ausemap, &usemap, 0)) {
1044 					if(!_prefix(L"#", usemap)) {
1045 						if(warn)
1046 							fprint(2, "warning: can't handle non-local map %S\n", usemap);
1047 					}
1048 					else {
1049 						map = getmap(di, usemap+1);
1050 						if(ps->curanchor == 0) {
1051 							di->anchors = newanchor(++is->nanchors, nil, nil, di->target, di->anchors);
1052 							ps->curanchor = is->nanchors;
1053 						}
1054 					}
1055 				}
1056 				align = atabval(tok, Aalign, align_tab, NALIGNTAB, ALbottom);
1057 				dfltbd = 0;
1058 				if(ps->curanchor != 0)
1059 					dfltbd = 2;
1060 				src = aurlval(tok, Asrc, nil, di->base);
1061 				if(src == nil) {
1062 					if(warn)
1063 						fprint(2, "warning: <img> has no src attribute\n");
1064 					ps->curanchor = oldcuranchor;
1065 					continue;
1066 				}
1067 				img = newiimage(src,
1068 						aval(tok, Aalt),
1069 						align,
1070 						auintval(tok, Awidth, 0),
1071 						auintval(tok, Aheight, 0),
1072 						auintval(tok, Ahspace, IMGHSPACE),
1073 						auintval(tok, Avspace, IMGVSPACE),
1074 						auintval(tok, Aborder, dfltbd),
1075 						aflagval(tok, Aismap),
1076 						map);
1077 				if(align == ALleft || align == ALright) {
1078 					additem(ps, newifloat(img, align), tok);
1079 					// if no hspace specified, use FLTIMGHSPACE
1080 					if(!_tokaval(tok, Ahspace, &val, 0))
1081 						((Iimage*)img)->hspace = FLTIMGHSPACE;
1082 				}
1083 				else {
1084 					ps->skipwhite = 0;
1085 					additem(ps, img, tok);
1086 				}
1087 				if(!ps->skipping) {
1088 					((Iimage*)img)->nextimage = di->images;
1089 					di->images = (Iimage*)img;
1090 				}
1091 				ps->curanchor = oldcuranchor;
1092 				break;
1093 
1094 			// <!ELEMENT INPUT - O EMPTY>
1095 			case Tinput:
1096 				ps->skipwhite = 0;
1097 				if(is->curform == nil) {
1098 					if(warn)
1099 						fprint(2, "<INPUT> not inside <FORM>\n");
1100 					continue;
1101 				}
1102 				is->curform->fields = field = newformfield(
1103 						atabval(tok, Atype, input_tab, NINPUTTAB, Ftext),
1104 						++is->curform->nfields,
1105 						is->curform,
1106 						aval(tok, Aname),
1107 						aval(tok, Avalue),
1108 						auintval(tok, Asize, 0),
1109 						auintval(tok, Amaxlength, 1000),
1110 						is->curform->fields);
1111 				if(aflagval(tok, Achecked))
1112 					field->flags = FFchecked;
1113 
1114 				switch(field->ftype) {
1115 				case Ftext:
1116 				case Fpassword:
1117 				case Ffile:
1118 					if(field->size == 0)
1119 						field->size = 20;
1120 					break;
1121 
1122 				case Fcheckbox:
1123 					if(field->name == nil) {
1124 						if(warn)
1125 							fprint(2, "warning: checkbox form field missing name\n");
1126 						continue;
1127 					}
1128 					if(field->value == nil)
1129 						field->value = _Strdup(L"1");
1130 					break;
1131 
1132 				case Fradio:
1133 					if(field->name == nil || field->value == nil) {
1134 						if(warn)
1135 							fprint(2, "warning: radio form field missing name or value\n");
1136 						continue;
1137 					}
1138 					break;
1139 
1140 				case Fsubmit:
1141 					if(field->value == nil)
1142 						field->value = _Strdup(L"Submit");
1143 					if(field->name == nil)
1144 						field->name = _Strdup(L"_no_name_submit_");
1145 					break;
1146 
1147 				case Fimage:
1148 					src = aurlval(tok, Asrc, nil, di->base);
1149 					if(src == nil) {
1150 						if(warn)
1151 							fprint(2, "warning: image form field missing src\n");
1152 						continue;
1153 					}
1154 					// width and height attrs aren't specified in HTML 3.2,
1155 					// but some people provide them and they help avoid
1156 					// a relayout
1157 					field->image = newiimage(src,
1158 						astrval(tok, Aalt, L"Submit"),
1159 						atabval(tok, Aalign, align_tab, NALIGNTAB, ALbottom),
1160 						auintval(tok, Awidth, 0), auintval(tok, Aheight, 0),
1161 						0, 0, 0, 0, nil);
1162 					ii = (Iimage*)field->image;
1163 					ii->nextimage = di->images;
1164 					di->images = ii;
1165 					break;
1166 
1167 				case Freset:
1168 					if(field->value == nil)
1169 						field->value = _Strdup(L"Reset");
1170 					break;
1171 
1172 				case Fbutton:
1173 					if(field->value == nil)
1174 						field->value = _Strdup(L" ");
1175 					break;
1176 				}
1177 				ffit = newiformfield(field);
1178 				additem(ps, ffit, tok);
1179 				if(ffit->genattr != nil)
1180 					field->events = ffit->genattr->events;
1181 				break;
1182 
1183 			// <!ENTITY ISINDEX - O EMPTY>
1184 			case Tisindex:
1185 				ps->skipwhite = 0;
1186 				prompt = astrval(tok, Aprompt, L"Index search terms:");
1187 				target = atargval(tok, di->target);
1188 				additem(ps, textit(ps, prompt), tok);
1189 				frm = newform(++is->nforms,
1190 						nil,
1191 						di->base,
1192 						target,
1193 						HGet,
1194 						di->forms);
1195 				di->forms = frm;
1196 				ff = newformfield(Ftext,
1197 						1,
1198 						frm,
1199 						_Strdup(L"_ISINDEX_"),
1200 						nil,
1201 						50,
1202 						1000,
1203 						nil);
1204 				frm->fields = ff;
1205 				frm->nfields = 1;
1206 				additem(ps, newiformfield(ff), tok);
1207 				addbrk(ps, 1, 0);
1208 				break;
1209 
1210 			// <!ELEMENT LI - O %flow>
1211 			case Tli:
1212 				if(ps->listtypestk.n == 0) {
1213 					if(warn)
1214 						fprint(2, "<LI> not in list\n");
1215 					continue;
1216 				}
1217 				ty = top(&ps->listtypestk, 0);
1218 				ty2 = listtyval(tok, ty);
1219 				if(ty != ty2) {
1220 					ty = ty2;
1221 					push(&ps->listtypestk, ty2);
1222 				}
1223 				v = aintval(tok, Avalue, top(&ps->listcntstk, 1));
1224 				if(ty == LTdisc || ty == LTsquare || ty == LTcircle)
1225 					hang = 10*LISTTAB - 3;
1226 				else
1227 					hang = 10*LISTTAB - 1;
1228 				changehang(ps, hang);
1229 				addtext(ps, listmark(ty, v));
1230 				push(&ps->listcntstk, v + 1);
1231 				changehang(ps, -hang);
1232 				ps->skipwhite = 1;
1233 				break;
1234 
1235 			// <!ELEMENT MAP - - (AREA)+>
1236 			case Tmap:
1237 				if(_tokaval(tok, Aname, &name, 0))
1238 					is->curmap = getmap(di, name);
1239 				break;
1240 
1241 			case Tmap+RBRA:
1242 				map = is->curmap;
1243 				if(map == nil) {
1244 					if(warn)
1245 						fprint(2, "warning: unexpected </MAP>\n");
1246 					continue;
1247 				}
1248 				map->areas = (Area*)_revlist((List*)map->areas);
1249 				break;
1250 
1251 			case Tmeta:
1252 				if(ps->skipping)
1253 					continue;
1254 				if(_tokaval(tok, Ahttp_equiv, &equiv, 0)) {
1255 					val = aval(tok, Acontent);
1256 					n = _Strlen(equiv);
1257 					if(!_Strncmpci(equiv, n, L"refresh"))
1258 						di->refresh = val;
1259 					else if(!_Strncmpci(equiv, n, L"content-script-type")) {
1260 						n = _Strlen(val);
1261 						if(!_Strncmpci(val, n, L"javascript")
1262 						   || !_Strncmpci(val, n, L"jscript1.1")
1263 						   || !_Strncmpci(val, n, L"jscript"))
1264 							di->scripttype = TextJavascript;
1265 						else {
1266 							if(warn)
1267 								fprint(2, "unimplemented script type %S\n", val);
1268 							di->scripttype = UnknownType;
1269 						}
1270 					}
1271 				}
1272 				break;
1273 
1274 			// Nobr is NOT in HMTL 4.0, but it is ubiquitous on the web
1275 			case Tnobr:
1276 				ps->skipwhite = 0;
1277 				ps->curstate &= ~IFwrap;
1278 				break;
1279 
1280 			case Tnobr+RBRA:
1281 				ps->curstate |= IFwrap;
1282 				break;
1283 
1284 			// We do frames, so skip stuff in noframes
1285 			case Tnoframes:
1286 				ps->skipping = 1;
1287 				break;
1288 
1289 			case Tnoframes+RBRA:
1290 				ps->skipping = 0;
1291 				break;
1292 
1293 			// We do scripts (if enabled), so skip stuff in noscripts
1294 			case Tnoscript:
1295 				if(doscripts)
1296 					ps->skipping = 1;
1297 				break;
1298 
1299 			case Tnoscript+RBRA:
1300 				if(doscripts)
1301 					ps->skipping = 0;
1302 				break;
1303 
1304 			// <!ELEMENT OPTION - O (	//PCDATA)>
1305 			case Toption:
1306 				if(is->curform == nil || is->curform->fields == nil) {
1307 					if(warn)
1308 						fprint(2, "warning: <OPTION> not in <SELECT>\n");
1309 					continue;
1310 				}
1311 				field = is->curform->fields;
1312 				if(field->ftype != Fselect) {
1313 					if(warn)
1314 						fprint(2, "warning: <OPTION> not in <SELECT>\n");
1315 					continue;
1316 				}
1317 				val = aval(tok, Avalue);
1318 				option = newoption(aflagval(tok, Aselected), val, nil, field->options);
1319 				field->options = option;
1320 				option->display =  getpcdata(toks, tokslen, &toki);
1321 				if(val == nil)
1322 					option->value = _Strdup(option->display);
1323 				break;
1324 
1325 			// <!ELEMENT P - O (%text)* >
1326 			case Tp:
1327 				pushjust(ps, atabval(tok, Aalign, align_tab, NALIGNTAB, ps->curjust));
1328 				ps->inpar = 1;
1329 				ps->skipwhite = 1;
1330 				break;
1331 
1332 			case Tp+RBRA:
1333 				break;
1334 
1335 			// <!ELEMENT PARAM - O EMPTY>
1336 			// Do something when we do applets...
1337 			case Tparam:
1338 				break;
1339 
1340 			// <!ELEMENT PRE - - (%text)* -(IMG|BIG|SMALL|SUB|SUP|FONT) >
1341 			case Tpre:
1342 				ps->curstate &= ~IFwrap;
1343 				ps->literal = 1;
1344 				ps->skipwhite = 0;
1345 				pushfontstyle(ps, FntT);
1346 				break;
1347 
1348 			case Tpre+RBRA:
1349 				ps->curstate |= IFwrap;
1350 				if(ps->literal) {
1351 					popfontstyle(ps);
1352 					ps->literal = 0;
1353 				}
1354 				break;
1355 
1356 			// <!ELEMENT SCRIPT - - CDATA>
1357 			case Tscript:
1358 				if(doscripts) {
1359 					if(!di->hasscripts) {
1360 						if(di->scripttype == TextJavascript) {
1361 							// TODO: initialize script if nec.
1362 							// initjscript(di);
1363 							di->hasscripts = 1;
1364 						}
1365 					}
1366 				}
1367 				if(!di->hasscripts) {
1368 					if(warn)
1369 						fprint(2, "warning: <SCRIPT> ignored\n");
1370 					ps->skipping = 1;
1371 				}
1372 				else {
1373 					scriptsrc = aurlval(tok, Asrc, nil, di->base);
1374 					script = nil;
1375 					if(scriptsrc != nil) {
1376 						if(warn)
1377 							fprint(2, "warning: non-local <SCRIPT> ignored\n");
1378 						free(scriptsrc);
1379 					}
1380 					else {
1381 						script = getpcdata(toks, tokslen, &toki);
1382 					}
1383 					if(script != nil) {
1384 						if(warn)
1385 							fprint(2, "script ignored\n");
1386 						free(script);
1387 					}
1388 				}
1389 				break;
1390 
1391 			case Tscript+RBRA:
1392 				ps->skipping = 0;
1393 				break;
1394 
1395 			// <!ELEMENT SELECT - - (OPTION+)>
1396 			case Tselect:
1397 				if(is->curform == nil) {
1398 					if(warn)
1399 						fprint(2, "<SELECT> not inside <FORM>\n");
1400 					continue;
1401 				}
1402 				field = newformfield(Fselect,
1403 					++is->curform->nfields,
1404 					is->curform,
1405 					aval(tok, Aname),
1406 					nil,
1407 					auintval(tok, Asize, 0),
1408 					0,
1409 					is->curform->fields);
1410 				is->curform->fields = field;
1411 				if(aflagval(tok, Amultiple))
1412 					field->flags = FFmultiple;
1413 				ffit = newiformfield(field);
1414 				additem(ps, ffit, tok);
1415 				if(ffit->genattr != nil)
1416 					field->events = ffit->genattr->events;
1417 				// throw away stuff until next tag (should be <OPTION>)
1418 				s = getpcdata(toks, tokslen, &toki);
1419 				if(s != nil)
1420 					free(s);
1421 				break;
1422 
1423 			case Tselect+RBRA:
1424 				if(is->curform == nil || is->curform->fields == nil) {
1425 					if(warn)
1426 						fprint(2, "warning: unexpected </SELECT>\n");
1427 					continue;
1428 				}
1429 				field = is->curform->fields;
1430 				if(field->ftype != Fselect)
1431 					continue;
1432 				// put options back in input order
1433 				field->options = (Option*)_revlist((List*)field->options);
1434 				break;
1435 
1436 			// <!ELEMENT (STRIKE|U) - - (%text)*>
1437 			case Tstrike:
1438 			case Tu:
1439 				ps->curul = push(&ps->ulstk, (tag==Tstrike)? ULmid : ULunder);
1440 				break;
1441 
1442 			case Tstrike+RBRA:
1443 			case Tu+RBRA:
1444 				if(ps->ulstk.n == 0) {
1445 					if(warn)
1446 						fprint(2, "warning: unexpected %T\n", tok);
1447 					continue;
1448 				}
1449 				ps->curul = popretnewtop(&ps->ulstk, ULnone);
1450 				break;
1451 
1452 			// <!ELEMENT STYLE - - CDATA>
1453 			case Tstyle:
1454 				if(warn)
1455 					fprint(2, "warning: unimplemented <STYLE>\n");
1456 				ps->skipping = 1;
1457 				break;
1458 
1459 			case Tstyle+RBRA:
1460 				ps->skipping = 0;
1461 				break;
1462 
1463 			// <!ELEMENT (SUB|SUP) - - (%text)*>
1464 			case Tsub:
1465 			case Tsup:
1466 				if(tag == Tsub)
1467 					ps->curvoff += SUBOFF;
1468 				else
1469 					ps->curvoff -= SUPOFF;
1470 				push(&ps->voffstk, ps->curvoff);
1471 				sz = top(&ps->fntsizestk, Normal);
1472 				pushfontsize(ps, sz - 1);
1473 				break;
1474 
1475 			case Tsub+RBRA:
1476 			case Tsup+RBRA:
1477 				if(ps->voffstk.n == 0) {
1478 					if(warn)
1479 						fprint(2, "warning: unexpected %T\n", tok);
1480 					continue;
1481 				}
1482 				ps->curvoff = popretnewtop(&ps->voffstk, 0);
1483 				popfontsize(ps);
1484 				break;
1485 
1486 			// <!ELEMENT TABLE - - (CAPTION?, TR+)>
1487 			case Ttable:
1488 				ps->skipwhite = 0;
1489 				tab = newtable(++is->ntables,
1490 						aalign(tok),
1491 						adimen(tok, Awidth),
1492 						aflagval(tok, Aborder),
1493 						auintval(tok, Acellspacing, TABSP),
1494 						auintval(tok, Acellpadding, TABPAD),
1495 						makebackground(nil, acolorval(tok, Abgcolor, ps->curbg.color)),
1496 						tok,
1497 						is->tabstk);
1498 				is->tabstk = tab;
1499 				curtab = tab;
1500 				break;
1501 
1502 			case Ttable+RBRA:
1503 				if(curtab == nil) {
1504 					if(warn)
1505 						fprint(2, "warning: unexpected </TABLE>\n");
1506 					continue;
1507 				}
1508 				isempty = (curtab->cells == nil);
1509 				if(isempty) {
1510 					if(warn)
1511 						fprint(2, "warning: <TABLE> has no cells\n");
1512 				}
1513 				else {
1514 					ps = finishcell(curtab, ps);
1515 					if(curtab->rows != nil)
1516 						curtab->rows->flags = 0;
1517 					finish_table(curtab);
1518 				}
1519 				ps->skipping = 0;
1520 				if(!isempty) {
1521 					tabitem = newitable(curtab);
1522 					al = curtab->align.halign;
1523 					switch(al) {
1524 					case ALleft:
1525 					case ALright:
1526 						additem(ps, newifloat(tabitem, al), tok);
1527 						break;
1528 					default:
1529 						if(al == ALcenter)
1530 							pushjust(ps, ALcenter);
1531 						addbrk(ps, 0, 0);
1532 						if(ps->inpar) {
1533 							popjust(ps);
1534 							ps->inpar = 0;
1535 						}
1536 						additem(ps, tabitem, curtab->tabletok);
1537 						if(al == ALcenter)
1538 							popjust(ps);
1539 						break;
1540 					}
1541 				}
1542 				if(is->tabstk == nil) {
1543 					if(warn)
1544 						fprint(2, "warning: table stack is wrong\n");
1545 				}
1546 				else
1547 					is->tabstk = is->tabstk->next;
1548 				curtab->next = di->tables;
1549 				di->tables = curtab;
1550 				curtab = is->tabstk;
1551 				if(!isempty)
1552 					addbrk(ps, 0, 0);
1553 				break;
1554 
1555 			// <!ELEMENT (TH|TD) - O %body.content>
1556 			// Cells for a row are accumulated in reverse order.
1557 			// We push ps on a stack, and use a new one to accumulate
1558 			// the contents of the cell.
1559 			case Ttd:
1560 			case Tth:
1561 				if(curtab == nil) {
1562 					if(warn)
1563 						fprint(2, "%T outside <TABLE>\n", tok);
1564 					continue;
1565 				}
1566 				if(ps->inpar) {
1567 					popjust(ps);
1568 					ps->inpar = 0;
1569 				}
1570 				ps = finishcell(curtab, ps);
1571 				tr = nil;
1572 				if(curtab->rows != nil)
1573 					tr = curtab->rows;
1574 				if(tr == nil || !tr->flags) {
1575 					if(warn)
1576 						fprint(2, "%T outside row\n", tok);
1577 					tr = newtablerow(makealign(ALnone, ALnone),
1578 							makebackground(nil, curtab->background.color),
1579 							TFparsing,
1580 							curtab->rows);
1581 					curtab->rows = tr;
1582 				}
1583 				ps = cell_pstate(ps, tag == Tth);
1584 				flags = TFparsing;
1585 				if(aflagval(tok, Anowrap)) {
1586 					flags |= TFnowrap;
1587 					ps->curstate &= ~IFwrap;
1588 				}
1589 				if(tag == Tth)
1590 					flags |= TFisth;
1591 				c = newtablecell(curtab->cells==nil? 1 : curtab->cells->cellid+1,
1592 						auintval(tok, Arowspan, 1),
1593 						auintval(tok, Acolspan, 1),
1594 						aalign(tok),
1595 						adimen(tok, Awidth),
1596 						auintval(tok, Aheight, 0),
1597 						makebackground(nil, acolorval(tok, Abgcolor, tr->background.color)),
1598 						flags,
1599 						curtab->cells);
1600 				curtab->cells = c;
1601 				ps->curbg = c->background;
1602 				if(c->align.halign == ALnone) {
1603 					if(tr->align.halign != ALnone)
1604 						c->align.halign = tr->align.halign;
1605 					else if(tag == Tth)
1606 						c->align.halign = ALcenter;
1607 					else
1608 						c->align.halign = ALleft;
1609 				}
1610 				if(c->align.valign == ALnone) {
1611 					if(tr->align.valign != ALnone)
1612 						c->align.valign = tr->align.valign;
1613 					else
1614 						c->align.valign = ALmiddle;
1615 				}
1616 				c->nextinrow = tr->cells;
1617 				tr->cells = c;
1618 				break;
1619 
1620 			case Ttd+RBRA:
1621 			case Tth+RBRA:
1622 				if(curtab == nil || curtab->cells == nil) {
1623 					if(warn)
1624 						fprint(2, "unexpected %T\n", tok);
1625 					continue;
1626 				}
1627 				ps = finishcell(curtab, ps);
1628 				break;
1629 
1630 			// <!ELEMENT TEXTAREA - - (	//PCDATA)>
1631 			case Ttextarea:
1632 				if(is->curform == nil) {
1633 					if(warn)
1634 						fprint(2, "<TEXTAREA> not inside <FORM>\n");
1635 					continue;
1636 				}
1637 				field = newformfield(Ftextarea,
1638 					++is->curform->nfields,
1639 					is->curform,
1640 					aval(tok, Aname),
1641 					nil,
1642 					0,
1643 					0,
1644 					is->curform->fields);
1645 				is->curform->fields = field;
1646 				field->rows = auintval(tok, Arows, 3);
1647 				field->cols = auintval(tok, Acols, 50);
1648 				field->value = getpcdata(toks, tokslen, &toki);
1649 				if(warn && toki < tokslen - 1 && toks[toki + 1].tag != Ttextarea + RBRA)
1650 					fprint(2, "warning: <TEXTAREA> data ended by %T\n", &toks[toki + 1]);
1651 				ffit = newiformfield(field);
1652 				additem(ps, ffit, tok);
1653 				if(ffit->genattr != nil)
1654 					field->events = ffit->genattr->events;
1655 				break;
1656 
1657 			// <!ELEMENT TITLE - - (	//PCDATA)* -(%head.misc)>
1658 			case Ttitle:
1659 				di->doctitle = getpcdata(toks, tokslen, &toki);
1660 				if(warn && toki < tokslen - 1 && toks[toki + 1].tag != Ttitle + RBRA)
1661 					fprint(2, "warning: <TITLE> data ended by %T\n", &toks[toki + 1]);
1662 				break;
1663 
1664 			// <!ELEMENT TR - O (TH|TD)+>
1665 			// rows are accumulated in reverse order in curtab->rows
1666 			case Ttr:
1667 				if(curtab == nil) {
1668 					if(warn)
1669 						fprint(2, "warning: <TR> outside <TABLE>\n");
1670 					continue;
1671 				}
1672 				if(ps->inpar) {
1673 					popjust(ps);
1674 					ps->inpar = 0;
1675 				}
1676 				ps = finishcell(curtab, ps);
1677 				if(curtab->rows != nil)
1678 					curtab->rows->flags = 0;
1679 				curtab->rows = newtablerow(aalign(tok),
1680 					makebackground(nil, acolorval(tok, Abgcolor, curtab->background.color)),
1681 					TFparsing,
1682 					curtab->rows);
1683 				break;
1684 
1685 			case Ttr+RBRA:
1686 				if(curtab == nil || curtab->rows == nil) {
1687 					if(warn)
1688 						fprint(2, "warning: unexpected </TR>\n");
1689 					continue;
1690 				}
1691 				ps = finishcell(curtab, ps);
1692 				tr = curtab->rows;
1693 				if(tr->cells == nil) {
1694 					if(warn)
1695 						fprint(2, "warning: empty row\n");
1696 					curtab->rows = tr->next;
1697 					tr->next = nil;
1698 				}
1699 				else
1700 					tr->flags = 0;
1701 				break;
1702 
1703 			// <!ELEMENT (TT|CODE|KBD|SAMP) - - (%text)*>
1704 			case Ttt:
1705 			case Tcode:
1706 			case Tkbd:
1707 			case Tsamp:
1708 				pushfontstyle(ps, FntT);
1709 				break;
1710 
1711 			// Tags that have empty action
1712 			case Tabbr:
1713 			case Tabbr+RBRA:
1714 			case Tacronym:
1715 			case Tacronym+RBRA:
1716 			case Tarea+RBRA:
1717 			case Tbase+RBRA:
1718 			case Tbasefont+RBRA:
1719 			case Tbr+RBRA:
1720 			case Tdd+RBRA:
1721 			case Tdt+RBRA:
1722 			case Tframe+RBRA:
1723 			case Thr+RBRA:
1724 			case Thtml:
1725 			case Thtml+RBRA:
1726 			case Timg+RBRA:
1727 			case Tinput+RBRA:
1728 			case Tisindex+RBRA:
1729 			case Tli+RBRA:
1730 			case Tlink:
1731 			case Tlink+RBRA:
1732 			case Tmeta+RBRA:
1733 			case Toption+RBRA:
1734 			case Tparam+RBRA:
1735 			case Ttextarea+RBRA:
1736 			case Ttitle+RBRA:
1737 				break;
1738 
1739 
1740 			// Tags not implemented
1741 			case Tbdo:
1742 			case Tbdo+RBRA:
1743 			case Tbutton:
1744 			case Tbutton+RBRA:
1745 			case Tdel:
1746 			case Tdel+RBRA:
1747 			case Tfieldset:
1748 			case Tfieldset+RBRA:
1749 			case Tiframe:
1750 			case Tiframe+RBRA:
1751 			case Tins:
1752 			case Tins+RBRA:
1753 			case Tlabel:
1754 			case Tlabel+RBRA:
1755 			case Tlegend:
1756 			case Tlegend+RBRA:
1757 			case Tobject:
1758 			case Tobject+RBRA:
1759 			case Toptgroup:
1760 			case Toptgroup+RBRA:
1761 			case Tspan:
1762 			case Tspan+RBRA:
1763 				if(warn) {
1764 					if(tag > RBRA)
1765 						tag -= RBRA;
1766 					fprint(2, "warning: unimplemented HTML tag: %S\n", tagnames[tag]);
1767 				}
1768 				break;
1769 
1770 			default:
1771 				if(warn)
1772 					fprint(2, "warning: unknown HTML tag: %S\n", tok->text);
1773 				break;
1774 			}
1775 	}
1776 	// some pages omit trailing </table>
1777 	while(curtab != nil) {
1778 		if(warn)
1779 			fprint(2, "warning: <TABLE> not closed\n");
1780 		if(curtab->cells != nil) {
1781 			ps = finishcell(curtab, ps);
1782 			if(curtab->cells == nil) {
1783 				if(warn)
1784 					fprint(2, "warning: empty table\n");
1785 			}
1786 			else {
1787 				if(curtab->rows != nil)
1788 					curtab->rows->flags = 0;
1789 				finish_table(curtab);
1790 				ps->skipping = 0;
1791 				additem(ps, newitable(curtab), curtab->tabletok);
1792 				addbrk(ps, 0, 0);
1793 			}
1794 		}
1795 		if(is->tabstk != nil)
1796 			is->tabstk = is->tabstk->next;
1797 		curtab->next = di->tables;
1798 		di->tables = curtab;
1799 		curtab = is->tabstk;
1800 	}
1801 	outerps = lastps(ps);
1802 	ans = outerps->items->next;
1803 	// note: ans may be nil and di->kids not nil, if there's a frameset!
1804 	outerps->items = newispacer(ISPnull);
1805 	outerps->lastit = outerps->items;
1806 	is->psstk = ps;
1807 	if(ans != nil && di->hasscripts) {
1808 		// TODO evalscript(nil);
1809 		;
1810 	}
1811 
1812 return_ans:
1813 	if(dbgbuild) {
1814 		assert(validitems(ans));
1815 		if(ans == nil)
1816 			fprint(2, "getitems returning nil\n");
1817 		else
1818 			printitems(ans, "getitems returning:");
1819 	}
1820 	return ans;
1821 }
1822 
1823 // Concatenate together maximal set of Data tokens, starting at toks[toki+1].
1824 // Lexer has ensured that there will either be a following non-data token or
1825 // we will be at eof.
1826 // Return emallocd trimmed concatenation, and update *ptoki to last used toki
1827 static Rune*
1828 getpcdata(Token* toks, int tokslen, int* ptoki)
1829 {
1830 	Rune*	ans;
1831 	Rune*	p;
1832 	Rune*	trimans;
1833 	int	anslen;
1834 	int	trimanslen;
1835 	int	toki;
1836 	Token*	tok;
1837 
1838 	ans = nil;
1839 	anslen = 0;
1840 	// first find length of answer
1841 	toki = (*ptoki) + 1;
1842 	while(toki < tokslen) {
1843 		tok = &toks[toki];
1844 		if(tok->tag == Data) {
1845 			toki++;
1846 			anslen += _Strlen(tok->text);
1847 		}
1848 		else
1849 			break;
1850 	}
1851 	// now make up the initial answer
1852 	if(anslen > 0) {
1853 		ans = _newstr(anslen);
1854 		p = ans;
1855 		toki = (*ptoki) + 1;
1856 		while(toki < tokslen) {
1857 			tok = &toks[toki];
1858 			if(tok->tag == Data) {
1859 				toki++;
1860 				p = _Stradd(p, tok->text, _Strlen(tok->text));
1861 			}
1862 			else
1863 				break;
1864 		}
1865 		*p = 0;
1866 		_trimwhite(ans, anslen, &trimans, &trimanslen);
1867 		if(trimanslen != anslen) {
1868 			p = ans;
1869 			ans = _Strndup(trimans, trimanslen);
1870 			free(p);
1871 		}
1872 	}
1873 	*ptoki = toki-1;
1874 	return ans;
1875 }
1876 
1877 // If still parsing head of curtab->cells list, finish it off
1878 // by transferring the items on the head of psstk to the cell.
1879 // Then pop the psstk and return the new psstk.
1880 static Pstate*
1881 finishcell(Table* curtab, Pstate* psstk)
1882 {
1883 	Tablecell*	c;
1884 	Pstate* psstknext;
1885 
1886 	c = curtab->cells;
1887 	if(c != nil) {
1888 		if((c->flags&TFparsing)) {
1889 			psstknext = psstk->next;
1890 			if(psstknext == nil) {
1891 				if(warn)
1892 					fprint(2, "warning: parse state stack is wrong\n");
1893 			}
1894 			else {
1895 				c->content = psstk->items->next;
1896 				c->flags &= ~TFparsing;
1897 				freepstate(psstk);
1898 				psstk = psstknext;
1899 			}
1900 		}
1901 	}
1902 	return psstk;
1903 }
1904 
1905 // Make a new Pstate for a cell, based on the old pstate, oldps.
1906 // Also, put the new ps on the head of the oldps stack.
1907 static Pstate*
1908 cell_pstate(Pstate* oldps, int ishead)
1909 {
1910 	Pstate*	ps;
1911 	int	sty;
1912 
1913 	ps = newpstate(oldps);
1914 	ps->skipwhite = 1;
1915 	ps->curanchor = oldps->curanchor;
1916 	copystack(&ps->fntstylestk, &oldps->fntstylestk);
1917 	copystack(&ps->fntsizestk, &oldps->fntsizestk);
1918 	ps->curfont = oldps->curfont;
1919 	ps->curfg = oldps->curfg;
1920 	ps->curbg = oldps->curbg;
1921 	copystack(&ps->fgstk, &oldps->fgstk);
1922 	ps->adjsize = oldps->adjsize;
1923 	if(ishead) {
1924 		sty = ps->curfont%NumSize;
1925 		ps->curfont = FntB*NumSize + sty;
1926 	}
1927 	return ps;
1928 }
1929 
1930 // Return a new Pstate with default starting state.
1931 // Use link to add it to head of a list, if any.
1932 static Pstate*
1933 newpstate(Pstate* link)
1934 {
1935 	Pstate*	ps;
1936 
1937 	ps = (Pstate*)emalloc(sizeof(Pstate));
1938 	ps->curfont = DefFnt;
1939 	ps->curfg = Black;
1940 	ps->curbg.image = nil;
1941 	ps->curbg.color = White;
1942 	ps->curul = ULnone;
1943 	ps->curjust = ALleft;
1944 	ps->curstate = IFwrap;
1945 	ps->items = newispacer(ISPnull);
1946 	ps->lastit = ps->items;
1947 	ps->prelastit = nil;
1948 	ps->next = link;
1949 	return ps;
1950 }
1951 
1952 // Return last Pstate on psl list
1953 static Pstate*
1954 lastps(Pstate* psl)
1955 {
1956 	assert(psl != nil);
1957 	while(psl->next != nil)
1958 		psl = psl->next;
1959 	return psl;
1960 }
1961 
1962 // Add it to end of ps item chain, adding in current state from ps.
1963 // Also, if tok is not nil, scan it for generic attributes and assign
1964 // the genattr field of the item accordingly.
1965 static void
1966 additem(Pstate* ps, Item* it, Token* tok)
1967 {
1968 	int	aid;
1969 	int	any;
1970 	Rune*	i;
1971 	Rune*	c;
1972 	Rune*	s;
1973 	Rune*	t;
1974 	Attr*	a;
1975 	SEvent*	e;
1976 
1977 	if(ps->skipping) {
1978 		if(warn)
1979 			fprint(2, "warning: skipping item: %I\n", it);
1980 		return;
1981 	}
1982 	it->anchorid = ps->curanchor;
1983 	it->state |= ps->curstate;
1984 	if(tok != nil) {
1985 		any = 0;
1986 		i = nil;
1987 		c = nil;
1988 		s = nil;
1989 		t = nil;
1990 		e = nil;
1991 		for(a = tok->attr; a != nil; a = a->next) {
1992 			aid = a->attid;
1993 			if(!attrinfo[aid])
1994 				continue;
1995 			switch(aid) {
1996 			case Aid:
1997 				i = a->value;
1998 				break;
1999 
2000 			case Aclass:
2001 				c = a->value;
2002 				break;
2003 
2004 			case Astyle:
2005 				s = a->value;
2006 				break;
2007 
2008 			case Atitle:
2009 				t = a->value;
2010 				break;
2011 
2012 			default:
2013 				assert(aid >= Aonblur && aid <= Aonunload);
2014 				e = newscriptevent(scriptev[a->attid], a->value, e);
2015 				break;
2016 			}
2017 			a->value = nil;
2018 			any = 1;
2019 		}
2020 		if(any)
2021 			it->genattr = newgenattr(i, c, s, t, e);
2022 	}
2023 	ps->curstate &= ~(IFbrk|IFbrksp|IFnobrk|IFcleft|IFcright);
2024 	ps->prelastit = ps->lastit;
2025 	ps->lastit->next = it;
2026 	ps->lastit = it;
2027 }
2028 
2029 // Make a text item out of s,
2030 // using current font, foreground, vertical offset and underline state.
2031 static Item*
2032 textit(Pstate* ps, Rune* s)
2033 {
2034 	assert(s != nil);
2035 	return newitext(s, ps->curfont, ps->curfg, ps->curvoff + Voffbias, ps->curul);
2036 }
2037 
2038 // Add text item or items for s, paying attention to
2039 // current font, foreground, baseline offset, underline state,
2040 // and literal mode.  Unless we're in literal mode, compress
2041 // whitespace to single blank, and, if curstate has a break,
2042 // trim any leading whitespace.  Whether in literal mode or not,
2043 // turn nonbreaking spaces into spacer items with IFnobrk set.
2044 //
2045 // In literal mode, break up s at newlines and add breaks instead.
2046 // Also replace tabs appropriate number of spaces.
2047 // In nonliteral mode, break up the items every 100 or so characters
2048 // just to make the layout algorithm not go quadratic.
2049 //
2050 // addtext assumes ownership of s.
2051 static void
2052 addtext(Pstate* ps, Rune* s)
2053 {
2054 	int	n;
2055 	int	i;
2056 	int	j;
2057 	int	k;
2058 	int	col;
2059 	int	c;
2060 	int	nsp;
2061 	Item*	it;
2062 	Rune*	ss;
2063 	Rune*	p;
2064 	Rune	buf[SMALLBUFSIZE];
2065 
2066 	assert(s != nil);
2067 	n = runestrlen(s);
2068 	i = 0;
2069 	j = 0;
2070 	if(ps->literal) {
2071 		col = 0;
2072 		while(i < n) {
2073 			if(s[i] == '\n') {
2074 				if(i > j) {
2075 					// trim trailing blanks from line
2076 					for(k = i; k > j; k--)
2077 						if(s[k - 1] != ' ')
2078 							break;
2079 					if(k > j)
2080 						additem(ps, textit(ps, _Strndup(s+j, k-j)), nil);
2081 				}
2082 				addlinebrk(ps, 0);
2083 				j = i + 1;
2084 				col = 0;
2085 			}
2086 			else {
2087 				if(s[i] == '\t') {
2088 					col += i - j;
2089 					nsp = 8 - (col%8);
2090 					// make ss = s[j:i] + nsp spaces
2091 					ss = _newstr(i-j+nsp);
2092 					p = _Stradd(ss, s+j, i-j);
2093 					p = _Stradd(p, L"        ", nsp);
2094 					*p = 0;
2095 					additem(ps, textit(ps, ss), nil);
2096 					col += nsp;
2097 					j = i + 1;
2098 				}
2099 				else if(s[i] == NBSP) {
2100 					if(i > j)
2101 						additem(ps, textit(ps, _Strndup(s+j, i-j)), nil);
2102 					addnbsp(ps);
2103 					col += (i - j) + 1;
2104 					j = i + 1;
2105 				}
2106 			}
2107 			i++;
2108 		}
2109 		if(i > j) {
2110 			if(j == 0 && i == n) {
2111 				// just transfer s over
2112 				additem(ps, textit(ps, s), nil);
2113 			}
2114 			else {
2115 				additem(ps, textit(ps, _Strndup(s+j, i-j)), nil);
2116 				free(s);
2117 			}
2118 		}
2119 	}
2120 	else {	// not literal mode
2121 		if((ps->curstate&IFbrk) || ps->lastit == ps->items)
2122 			while(i < n) {
2123 				c = s[i];
2124 				if(c >= 256 || !isspace(c))
2125 					break;
2126 				i++;
2127 			}
2128 		p = buf;
2129 		for(j = i; i < n; i++) {
2130 			assert(p+i-j < buf+SMALLBUFSIZE-1);
2131 			c = s[i];
2132 			if(c == NBSP) {
2133 				if(i > j)
2134 					p = _Stradd(p, s+j, i-j);
2135 				if(p > buf)
2136 					additem(ps, textit(ps, _Strndup(buf, p-buf)), nil);
2137 				p = buf;
2138 				addnbsp(ps);
2139 				j = i + 1;
2140 				continue;
2141 			}
2142 			if(c < 256 && isspace(c)) {
2143 				if(i > j)
2144 					p = _Stradd(p, s+j, i-j);
2145 				*p++ = ' ';
2146 				while(i < n - 1) {
2147 					c = s[i + 1];
2148 					if(c >= 256 || !isspace(c))
2149 						break;
2150 					i++;
2151 				}
2152 				j = i + 1;
2153 			}
2154 			if(i - j >= 100) {
2155 				p = _Stradd(p, s+j, i+1-j);
2156 				j = i + 1;
2157 			}
2158 			if(p-buf >= 100) {
2159 				additem(ps, textit(ps, _Strndup(buf, p-buf)), nil);
2160 				p = buf;
2161 			}
2162 		}
2163 		if(i > j && j < n) {
2164 			assert(p+i-j < buf+SMALLBUFSIZE-1);
2165 			p = _Stradd(p, s+j, i-j);
2166 		}
2167 		// don't add a space if previous item ended in a space
2168 		if(p-buf == 1 && buf[0] == ' ' && ps->lastit != nil) {
2169 			it = ps->lastit;
2170 			if(it->tag == Itexttag) {
2171 				ss = ((Itext*)it)->s;
2172 				k = _Strlen(ss);
2173 				if(k > 0 && ss[k] == ' ')
2174 					p = buf;
2175 			}
2176 		}
2177 		if(p > buf)
2178 			additem(ps, textit(ps, _Strndup(buf, p-buf)), nil);
2179 		free(s);
2180 	}
2181 }
2182 
2183 // Add a break to ps->curstate, with extra space if sp is true.
2184 // If there was a previous break, combine this one's parameters
2185 // with that to make the amt be the max of the two and the clr
2186 // be the most general. (amt will be 0 or 1)
2187 // Also, if the immediately preceding item was a text item,
2188 // trim any whitespace from the end of it, if not in literal mode.
2189 // Finally, if this is at the very beginning of the item list
2190 // (the only thing there is a null spacer), then don't add the space.
2191 static void
2192 addbrk(Pstate* ps, int sp, int clr)
2193 {
2194 	int	state;
2195 	Rune*	l;
2196 	int		nl;
2197 	Rune*	r;
2198 	int		nr;
2199 	Itext*	t;
2200 	Rune*	s;
2201 
2202 	state = ps->curstate;
2203 	clr = clr|(state&(IFcleft|IFcright));
2204 	if(sp && !(ps->lastit == ps->items))
2205 		sp = IFbrksp;
2206 	else
2207 		sp = 0;
2208 	ps->curstate = IFbrk|sp|(state&~(IFcleft|IFcright))|clr;
2209 	if(ps->lastit != ps->items) {
2210 		if(!ps->literal && ps->lastit->tag == Itexttag) {
2211 			t = (Itext*)ps->lastit;
2212 			_splitr(t->s, _Strlen(t->s), notwhitespace, &l, &nl, &r, &nr);
2213 			// try to avoid making empty items
2214 			// but not crucial f the occasional one gets through
2215 			if(nl == 0 && ps->prelastit != nil) {
2216 				ps->lastit = ps->prelastit;
2217 				ps->lastit->next = nil;
2218 				ps->prelastit = nil;
2219 			}
2220 			else {
2221 				s = t->s;
2222 				if(nl == 0) {
2223 					// need a non-nil pointer to empty string
2224 					// (_Strdup(L"") returns nil)
2225 					t->s = emalloc(sizeof(Rune));
2226 					t->s[0] = 0;
2227 				}
2228 				else
2229 					t->s = _Strndup(l, nl);
2230 				if(s)
2231 					free(s);
2232 			}
2233 		}
2234 	}
2235 }
2236 
2237 // Add break due to a <br> or a newline within a preformatted section.
2238 // We add a null item first, with current font's height and ascent, to make
2239 // sure that the current line takes up at least that amount of vertical space.
2240 // This ensures that <br>s on empty lines cause blank lines, and that
2241 // multiple <br>s in a row give multiple blank lines.
2242 // However don't add the spacer if the previous item was something that
2243 // takes up space itself.
2244 static void
2245 addlinebrk(Pstate* ps, int clr)
2246 {
2247 	int	obrkstate;
2248 	int	b;
2249 	int	addit;
2250 
2251 	// don't want break before our null item unless the previous item
2252 	// was also a null item for the purposes of line breaking
2253 	obrkstate = ps->curstate&(IFbrk|IFbrksp);
2254 	b = IFnobrk;
2255 	addit = 0;
2256 	if(ps->lastit != nil) {
2257 		if(ps->lastit->tag == Ispacertag) {
2258 			if(((Ispacer*)ps->lastit)->spkind == ISPvline)
2259 				b = IFbrk;
2260 			addit = 1;
2261 		}
2262 		else if(ps->lastit->tag == Ifloattag)
2263 			addit = 1;
2264 	}
2265 	if(addit) {
2266 		ps->curstate = (ps->curstate&~(IFbrk|IFbrksp))|b;
2267 		additem(ps, newispacer(ISPvline), nil);
2268 		ps->curstate = (ps->curstate&~(IFbrk|IFbrksp))|obrkstate;
2269 	}
2270 	addbrk(ps, 0, clr);
2271 }
2272 
2273 // Add a nonbreakable space
2274 static void
2275 addnbsp(Pstate* ps)
2276 {
2277 	// if nbsp comes right where a break was specified,
2278 	// do the break anyway (nbsp is being used to generate undiscardable
2279 	// space rather than to prevent a break)
2280 	if((ps->curstate&IFbrk) == 0)
2281 		ps->curstate |= IFnobrk;
2282 	additem(ps, newispacer(ISPhspace), nil);
2283 	// but definitely no break on next item
2284 	ps->curstate |= IFnobrk;
2285 }
2286 
2287 // Change hang in ps.curstate by delta.
2288 // The amount is in 1/10ths of tabs, and is the amount that
2289 // the current contiguous set of items with a hang value set
2290 // is to be shifted left from its normal (indented) place.
2291 static void
2292 changehang(Pstate* ps, int delta)
2293 {
2294 	int	amt;
2295 
2296 	amt = (ps->curstate&IFhangmask) + delta;
2297 	if(amt < 0) {
2298 		if(warn)
2299 			fprint(2, "warning: hang went negative\n");
2300 		amt = 0;
2301 	}
2302 	ps->curstate = (ps->curstate&~IFhangmask)|amt;
2303 }
2304 
2305 // Change indent in ps.curstate by delta.
2306 static void
2307 changeindent(Pstate* ps, int delta)
2308 {
2309 	int	amt;
2310 
2311 	amt = ((ps->curstate&IFindentmask) >> IFindentshift) + delta;
2312 	if(amt < 0) {
2313 		if(warn)
2314 			fprint(2, "warning: indent went negative\n");
2315 		amt = 0;
2316 	}
2317 	ps->curstate = (ps->curstate&~IFindentmask)|(amt << IFindentshift);
2318 }
2319 
2320 // Push val on top of stack, and also return value pushed
2321 static int
2322 push(Stack* stk, int val)
2323 {
2324 	if(stk->n == Nestmax) {
2325 		if(warn)
2326 			fprint(2, "warning: build stack overflow\n");
2327 	}
2328 	else
2329 		stk->slots[stk->n++] = val;
2330 	return val;
2331 }
2332 
2333 // Pop top of stack
2334 static void
2335 pop(Stack* stk)
2336 {
2337 	if(stk->n > 0)
2338 		--stk->n;
2339 }
2340 
2341 //Return top of stack, using dflt if stack is empty
2342 static int
2343 top(Stack* stk, int dflt)
2344 {
2345 	if(stk->n == 0)
2346 		return dflt;
2347 	return stk->slots[stk->n-1];
2348 }
2349 
2350 // pop, then return new top, with dflt if empty
2351 static int
2352 popretnewtop(Stack* stk, int dflt)
2353 {
2354 	if(stk->n == 0)
2355 		return dflt;
2356 	stk->n--;
2357 	if(stk->n == 0)
2358 		return dflt;
2359 	return stk->slots[stk->n-1];
2360 }
2361 
2362 // Copy fromstk entries into tostk
2363 static void
2364 copystack(Stack* tostk, Stack* fromstk)
2365 {
2366 	int n;
2367 
2368 	n = fromstk->n;
2369 	tostk->n = n;
2370 	memmove(tostk->slots, fromstk->slots, n*sizeof(int));
2371 }
2372 
2373 static void
2374 popfontstyle(Pstate* ps)
2375 {
2376 	pop(&ps->fntstylestk);
2377 	setcurfont(ps);
2378 }
2379 
2380 static void
2381 pushfontstyle(Pstate* ps, int sty)
2382 {
2383 	push(&ps->fntstylestk, sty);
2384 	setcurfont(ps);
2385 }
2386 
2387 static void
2388 popfontsize(Pstate* ps)
2389 {
2390 	pop(&ps->fntsizestk);
2391 	setcurfont(ps);
2392 }
2393 
2394 static void
2395 pushfontsize(Pstate* ps, int sz)
2396 {
2397 	push(&ps->fntsizestk, sz);
2398 	setcurfont(ps);
2399 }
2400 
2401 static void
2402 setcurfont(Pstate* ps)
2403 {
2404 	int	sty;
2405 	int	sz;
2406 
2407 	sty = top(&ps->fntstylestk, FntR);
2408 	sz = top(&ps->fntsizestk, Normal);
2409 	if(sz < Tiny)
2410 		sz = Tiny;
2411 	if(sz > Verylarge)
2412 		sz = Verylarge;
2413 	ps->curfont = sty*NumSize + sz;
2414 }
2415 
2416 static void
2417 popjust(Pstate* ps)
2418 {
2419 	pop(&ps->juststk);
2420 	setcurjust(ps);
2421 }
2422 
2423 static void
2424 pushjust(Pstate* ps, int j)
2425 {
2426 	push(&ps->juststk, j);
2427 	setcurjust(ps);
2428 }
2429 
2430 static void
2431 setcurjust(Pstate* ps)
2432 {
2433 	int	j;
2434 	int	state;
2435 
2436 	j = top(&ps->juststk, ALleft);
2437 	if(j != ps->curjust) {
2438 		ps->curjust = j;
2439 		state = ps->curstate;
2440 		state &= ~(IFrjust|IFcjust);
2441 		if(j == ALcenter)
2442 			state |= IFcjust;
2443 		else if(j == ALright)
2444 			state |= IFrjust;
2445 		ps->curstate = state;
2446 	}
2447 }
2448 
2449 // Do final rearrangement after table parsing is finished
2450 // and assign cells to grid points
2451 static void
2452 finish_table(Table* t)
2453 {
2454 	int	ncol;
2455 	int	nrow;
2456 	int	r;
2457 	Tablerow*	rl;
2458 	Tablecell*	cl;
2459 	int*	rowspancnt;
2460 	Tablecell**	rowspancell;
2461 	int	ri;
2462 	int	ci;
2463 	Tablecell*	c;
2464 	Tablecell*	cnext;
2465 	Tablerow*	row;
2466 	Tablerow*	rownext;
2467 	int	rcols;
2468 	int	newncol;
2469 	int	k;
2470 	int	j;
2471 	int	cspan;
2472 	int	rspan;
2473 	int	i;
2474 
2475 	rl = t->rows;
2476 	t->nrow = nrow = _listlen((List*)rl);
2477 	t->rows = (Tablerow*)emalloc(nrow * sizeof(Tablerow));
2478 	ncol = 0;
2479 	r = nrow - 1;
2480 	for(row = rl; row != nil; row = rownext) {
2481 		// copy the data from the allocated Tablerow into the array slot
2482 		t->rows[r] = *row;
2483 		rownext = row->next;
2484 		row = &t->rows[r];
2485 		r--;
2486 		rcols = 0;
2487 		c = row->cells;
2488 
2489 		// If rowspan is > 1 but this is the last row,
2490 		// reset the rowspan
2491 		if(c != nil && c->rowspan > 1 && r == nrow-2)
2492 				c->rowspan = 1;
2493 
2494 		// reverse row->cells list (along nextinrow pointers)
2495 		row->cells = nil;
2496 		while(c != nil) {
2497 			cnext = c->nextinrow;
2498 			c->nextinrow = row->cells;
2499 			row->cells = c;
2500 			rcols += c->colspan;
2501 			c = cnext;
2502 		}
2503 		if(rcols > ncol)
2504 			ncol = rcols;
2505 	}
2506 	t->ncol = ncol;
2507 	t->cols = (Tablecol*)emalloc(ncol * sizeof(Tablecol));
2508 
2509 	// Reverse cells just so they are drawn in source order.
2510 	// Also, trim their contents so they don't end in whitespace.
2511 	t->cells = (Tablecell*)_revlist((List*)t->cells);
2512 	for(c = t->cells; c != nil; c= c->next)
2513 		trim_cell(c);
2514 	t->grid = (Tablecell***)emalloc(nrow * sizeof(Tablecell**));
2515 	for(i = 0; i < nrow; i++)
2516 		t->grid[i] = (Tablecell**)emalloc(ncol * sizeof(Tablecell*));
2517 
2518 	// The following arrays keep track of cells that are spanning
2519 	// multiple rows;  rowspancnt[i] is the number of rows left
2520 	// to be spanned in column i.
2521 	// When done, cell's (row,col) is upper left grid point.
2522 	rowspancnt = (int*)emalloc(ncol * sizeof(int));
2523 	rowspancell = (Tablecell**)emalloc(ncol * sizeof(Tablecell*));
2524 	for(ri = 0; ri < nrow; ri++) {
2525 		row = &t->rows[ri];
2526 		cl = row->cells;
2527 		ci = 0;
2528 		while(ci < ncol || cl != nil) {
2529 			if(ci < ncol && rowspancnt[ci] > 0) {
2530 				t->grid[ri][ci] = rowspancell[ci];
2531 				rowspancnt[ci]--;
2532 				ci++;
2533 			}
2534 			else {
2535 				if(cl == nil) {
2536 					ci++;
2537 					continue;
2538 				}
2539 				c = cl;
2540 				cl = cl->nextinrow;
2541 				cspan = c->colspan;
2542 				rspan = c->rowspan;
2543 				if(ci + cspan > ncol) {
2544 					// because of row spanning, we calculated
2545 					// ncol incorrectly; adjust it
2546 					newncol = ci + cspan;
2547 					t->cols = (Tablecol*)erealloc(t->cols, newncol * sizeof(Tablecol));
2548 					rowspancnt = (int*)erealloc(rowspancnt, newncol * sizeof(int));
2549 					rowspancell = (Tablecell**)erealloc(rowspancell, newncol * sizeof(Tablecell*));
2550 					k = newncol-ncol;
2551 					memset(t->cols+ncol, 0, k*sizeof(Tablecol));
2552 					memset(rowspancnt+ncol, 0, k*sizeof(int));
2553 					memset(rowspancell+ncol, 0, k*sizeof(Tablecell*));
2554 					for(j = 0; j < nrow; j++) {
2555 						t->grid[j] = (Tablecell**)erealloc(t->grid[j], newncol * sizeof(Tablecell*));
2556 						memset(t->grid[j], 0, k*sizeof(Tablecell*));
2557 					}
2558 					t->ncol = ncol = newncol;
2559 				}
2560 				c->row = ri;
2561 				c->col = ci;
2562 				for(i = 0; i < cspan; i++) {
2563 					t->grid[ri][ci] = c;
2564 					if(rspan > 1) {
2565 						rowspancnt[ci] = rspan - 1;
2566 						rowspancell[ci] = c;
2567 					}
2568 					ci++;
2569 				}
2570 			}
2571 		}
2572 	}
2573 }
2574 
2575 // Remove tail of cell content until it isn't whitespace.
2576 static void
2577 trim_cell(Tablecell* c)
2578 {
2579 	int	dropping;
2580 	Rune*	s;
2581 	Rune*	x;
2582 	Rune*	y;
2583 	int		nx;
2584 	int		ny;
2585 	Item*	p;
2586 	Itext*	q;
2587 	Item*	pprev;
2588 
2589 	dropping = 1;
2590 	while(c->content != nil && dropping) {
2591 		p = c->content;
2592 		pprev = nil;
2593 		while(p->next != nil) {
2594 			pprev = p;
2595 			p = p->next;
2596 		}
2597 		dropping = 0;
2598 		if(!(p->state&IFnobrk)) {
2599 			if(p->tag == Itexttag) {
2600 				q = (Itext*)p;
2601 				s = q->s;
2602 				_splitr(s, _Strlen(s), notwhitespace, &x, &nx, &y, &ny);
2603 				if(nx != 0 && ny != 0) {
2604 					q->s = _Strndup(x, nx);
2605 					free(s);
2606 				}
2607 				break;
2608 			}
2609 		}
2610 		if(dropping) {
2611 			if(pprev == nil)
2612 				c->content = nil;
2613 			else
2614 				pprev->next = nil;
2615 			freeitem(p);
2616 		}
2617 	}
2618 }
2619 
2620 // Caller must free answer (eventually).
2621 static Rune*
2622 listmark(uchar ty, int n)
2623 {
2624 	Rune*	s;
2625 	Rune*	t;
2626 	int	n2;
2627 	int	i;
2628 
2629 	s = nil;
2630 	switch(ty) {
2631 	case LTdisc:
2632 	case LTsquare:
2633 	case LTcircle:
2634 		s = _newstr(1);
2635 		s[0] = (ty == LTdisc)? 0x2022		// bullet
2636 			: ((ty == LTsquare)? 0x220e	// filled square
2637 			    : 0x2218);				// degree
2638 		s[1] = 0;
2639 		break;
2640 
2641 	case LT1:
2642 		t = _ltoStr(n);
2643 		n2 = _Strlen(t);
2644 		s = _newstr(n2+1);
2645 		t = _Stradd(s, t, n2);
2646 		*t++ = '.';
2647 		*t = 0;
2648 		break;
2649 
2650 	case LTa:
2651 	case LTA:
2652 		n--;
2653 		i = 0;
2654 		if(n < 0)
2655 			n = 0;
2656 		s = _newstr((n <= 25)? 2 : 3);
2657 		if(n > 25) {
2658 			n2 = n%26;
2659 			n /= 26;
2660 			if(n2 > 25)
2661 				n2 = 25;
2662 			s[i++] = n2 + (ty == LTa)? 'a' : 'A';
2663 		}
2664 		s[i++] = n + (ty == LTa)? 'a' : 'A';
2665 		s[i++] = '.';
2666 		s[i] = 0;
2667 		break;
2668 
2669 	case LTi:
2670 	case LTI:
2671 		if(n >= NROMAN) {
2672 			if(warn)
2673 				fprint(2, "warning: unimplemented roman number > %d\n", NROMAN);
2674 			n = NROMAN;
2675 		}
2676 		t = roman[n - 1];
2677 		n2 = _Strlen(t);
2678 		s = _newstr(n2+1);
2679 		for(i = 0; i < n2; i++)
2680 			s[i] = (ty == LTi)? tolower(t[i]) : t[i];
2681 		s[i++] = '.';
2682 		s[i] = 0;
2683 		break;
2684 	}
2685 	return s;
2686 }
2687 
2688 // Find map with given name in di.maps.
2689 // If not there, add one, copying name.
2690 // Ownership of map remains with di->maps list.
2691 static Map*
2692 getmap(Docinfo* di, Rune* name)
2693 {
2694 	Map*	m;
2695 
2696 	for(m = di->maps; m != nil; m = m->next) {
2697 		if(!_Strcmp(name, m->name))
2698 			return m;
2699 	}
2700 	m = (Map*)emalloc(sizeof(Map));
2701 	m->name = _Strdup(name);
2702 	m->areas = nil;
2703 	m->next = di->maps;
2704 	di->maps = m;
2705 	return m;
2706 }
2707 
2708 // Transfers ownership of href to Area
2709 static Area*
2710 newarea(int shape, Rune* href, int target, Area* link)
2711 {
2712 	Area* a;
2713 
2714 	a = (Area*)emalloc(sizeof(Area));
2715 	a->shape = shape;
2716 	a->href = href;
2717 	a->target = target;
2718 	a->next = link;
2719 	return a;
2720 }
2721 
2722 // Return string value associated with attid in tok, nil if none.
2723 // Caller must free the result (eventually).
2724 static Rune*
2725 aval(Token* tok, int attid)
2726 {
2727 	Rune*	ans;
2728 
2729 	_tokaval(tok, attid, &ans, 1);	// transfers string ownership from token to ans
2730 	return ans;
2731 }
2732 
2733 // Like aval, but use dflt if there was no such attribute in tok.
2734 // Caller must free the result (eventually).
2735 static Rune*
2736 astrval(Token* tok, int attid, Rune* dflt)
2737 {
2738 	Rune*	ans;
2739 
2740 	if(_tokaval(tok, attid, &ans, 1))
2741 		return ans;	// transfers string ownership from token to ans
2742 	else
2743 		return _Strdup(dflt);
2744 }
2745 
2746 // Here we're supposed to convert to an int,
2747 // and have a default when not found
2748 static int
2749 aintval(Token* tok, int attid, int dflt)
2750 {
2751 	Rune*	ans;
2752 
2753 	if(!_tokaval(tok, attid, &ans, 0) || ans == nil)
2754 		return dflt;
2755 	else
2756 		return toint(ans);
2757 }
2758 
2759 // Like aintval, but result should be >= 0
2760 static int
2761 auintval(Token* tok, int attid, int dflt)
2762 {
2763 	Rune* ans;
2764 	int v;
2765 
2766 	if(!_tokaval(tok, attid, &ans, 0) || ans == nil)
2767 		return dflt;
2768 	else {
2769 		v = toint(ans);
2770 		return v >= 0? v : 0;
2771 	}
2772 }
2773 
2774 // int conversion, but with possible error check (if warning)
2775 static int
2776 toint(Rune* s)
2777 {
2778 	int ans;
2779 	Rune* eptr;
2780 
2781 	ans = _Strtol(s, &eptr, 10);
2782 	if(warn) {
2783 		if(*eptr != 0) {
2784 			eptr = _Strclass(eptr, notwhitespace);
2785 			if(eptr != nil)
2786 				fprint(2, "warning: expected integer, got %S\n", s);
2787 		}
2788 	}
2789 	return ans;
2790 }
2791 
2792 // Attribute value when need a table to convert strings to ints
2793 static int
2794 atabval(Token* tok, int attid, StringInt* tab, int ntab, int dflt)
2795 {
2796 	Rune*	aval;
2797 	int	ans;
2798 
2799 	ans = dflt;
2800 	if(_tokaval(tok, attid, &aval, 0)) {
2801 		if(!_lookup(tab, ntab, aval, _Strlen(aval), &ans)) {
2802 			ans = dflt;
2803 			if(warn)
2804 				fprint(2, "warning: name not found in table lookup: %S\n", aval);
2805 		}
2806 	}
2807 	return ans;
2808 }
2809 
2810 // Attribute value when supposed to be a color
2811 static int
2812 acolorval(Token* tok, int attid, int dflt)
2813 {
2814 	Rune*	aval;
2815 	int	ans;
2816 
2817 	ans = dflt;
2818 	if(_tokaval(tok, attid, &aval, 0))
2819 		ans = color(aval, dflt);
2820 	return ans;
2821 }
2822 
2823 // Attribute value when supposed to be a target frame name
2824 static int
2825 atargval(Token* tok, int dflt)
2826 {
2827 	int	ans;
2828 	Rune*	aval;
2829 
2830 	ans = dflt;
2831 	if(_tokaval(tok, Atarget, &aval, 0)){
2832 		ans = targetid(aval);
2833 	}
2834 	return ans;
2835 }
2836 
2837 // special for list types, where "i" and "I" are different,
2838 // but "square" and "SQUARE" are the same
2839 static int
2840 listtyval(Token* tok, int dflt)
2841 {
2842 	Rune*	aval;
2843 	int	ans;
2844 	int	n;
2845 
2846 	ans = dflt;
2847 	if(_tokaval(tok, Atype, &aval, 0)) {
2848 		n = _Strlen(aval);
2849 		if(n == 1) {
2850 			switch(aval[0]) {
2851 			case '1':
2852 				ans = LT1;
2853 				break;
2854 			case 'A':
2855 				ans = LTA;
2856 				break;
2857 			case 'I':
2858 				ans = LTI;
2859 				break;
2860 			case 'a':
2861 				ans = LTa;
2862 				break;
2863 			case 'i':
2864 				ans = LTi;
2865 			default:
2866 				if(warn)
2867 					fprint(2, "warning: unknown list element type %c\n", aval[0]);
2868 			}
2869 		}
2870 		else {
2871 			if(!_Strncmpci(aval, n, L"circle"))
2872 				ans = LTcircle;
2873 			else if(!_Strncmpci(aval, n, L"disc"))
2874 				ans = LTdisc;
2875 			else if(!_Strncmpci(aval, n, L"square"))
2876 				ans = LTsquare;
2877 			else {
2878 				if(warn)
2879 					fprint(2, "warning: unknown list element type %S\n", aval);
2880 			}
2881 		}
2882 	}
2883 	return ans;
2884 }
2885 
2886 // Attribute value when value is a URL, possibly relative to base.
2887 // FOR NOW: leave the url relative.
2888 // Caller must free the result (eventually).
2889 static Rune*
2890 aurlval(Token* tok, int attid, Rune* dflt, Rune* base)
2891 {
2892 	Rune*	ans;
2893 	Rune*	url;
2894 
2895 	USED(base);
2896 	ans = nil;
2897 	if(_tokaval(tok, attid, &url, 0) && url != nil)
2898 		ans = removeallwhite(url);
2899 	if(ans == nil)
2900 		ans = _Strdup(dflt);
2901 	return ans;
2902 }
2903 
2904 // Return copy of s but with all whitespace (even internal) removed.
2905 // This fixes some buggy URL specification strings.
2906 static Rune*
2907 removeallwhite(Rune* s)
2908 {
2909 	int	j;
2910 	int	n;
2911 	int	i;
2912 	int	c;
2913 	Rune*	ans;
2914 
2915 	j = 0;
2916 	n = _Strlen(s);
2917 	for(i = 0; i < n; i++) {
2918 		c = s[i];
2919 		if(c >= 256 || !isspace(c))
2920 			j++;
2921 	}
2922 	if(j < n) {
2923 		ans = _newstr(j);
2924 		j = 0;
2925 		for(i = 0; i < n; i++) {
2926 			c = s[i];
2927 			if(c >= 256 || !isspace(c))
2928 				ans[j++] = c;
2929 		}
2930 		ans[j] = 0;
2931 	}
2932 	else
2933 		ans = _Strdup(s);
2934 	return ans;
2935 }
2936 
2937 // Attribute value when mere presence of attr implies value of 1,
2938 // but if there is an integer there, return it as the value.
2939 static int
2940 aflagval(Token* tok, int attid)
2941 {
2942 	int	val;
2943 	Rune*	sval;
2944 
2945 	val = 0;
2946 	if(_tokaval(tok, attid, &sval, 0)) {
2947 		val = 1;
2948 		if(sval != nil)
2949 			val = toint(sval);
2950 	}
2951 	return val;
2952 }
2953 
2954 static Align
2955 makealign(int halign, int valign)
2956 {
2957 	Align	al;
2958 
2959 	al.halign = halign;
2960 	al.valign = valign;
2961 	return al;
2962 }
2963 
2964 // Make an Align (two alignments, horizontal and vertical)
2965 static Align
2966 aalign(Token* tok)
2967 {
2968 	return makealign(
2969 		atabval(tok, Aalign, align_tab, NALIGNTAB, ALnone),
2970 		atabval(tok, Avalign, align_tab, NALIGNTAB, ALnone));
2971 }
2972 
2973 // Make a Dimen, based on value of attid attr
2974 static Dimen
2975 adimen(Token* tok, int attid)
2976 {
2977 	Rune*	wd;
2978 
2979 	if(_tokaval(tok, attid, &wd, 0))
2980 		return parsedim(wd, _Strlen(wd));
2981 	else
2982 		return makedimen(Dnone, 0);
2983 }
2984 
2985 // Parse s[0:n] as num[.[num]][unit][%|*]
2986 static Dimen
2987 parsedim(Rune* s, int ns)
2988 {
2989 	int	kind;
2990 	int	spec;
2991 	Rune*	l;
2992 	int	nl;
2993 	Rune*	r;
2994 	int	nr;
2995 	int	mul;
2996 	int	i;
2997 	Rune*	f;
2998 	int	nf;
2999 	int	Tkdpi;
3000 	Rune*	units;
3001 
3002 	kind = Dnone;
3003 	spec = 0;
3004 	_splitl(s, ns, L"^0-9", &l, &nl, &r, &nr);
3005 	if(nl != 0) {
3006 		spec = 1000*_Strtol(l, nil, 10);
3007 		if(nr > 0 && r[0] == '.') {
3008 			_splitl(r+1, nr-1, L"^0-9", &f, &nf, &r, &nr);
3009 			if(nf != 0) {
3010 				mul = 100;
3011 				for(i = 0; i < nf; i++) {
3012 					spec = spec + mul*(f[i]-'0');
3013 					mul = mul/10;
3014 				}
3015 			}
3016 		}
3017 		kind = Dpixels;
3018 		if(nr != 0) {
3019 			if(nr >= 2) {
3020 				Tkdpi = 100;
3021 				units = r;
3022 				r = r+2;
3023 				nr -= 2;
3024 				if(!_Strncmpci(units, 2, L"pt"))
3025 					spec = (spec*Tkdpi)/72;
3026 				else if(!_Strncmpci(units, 2, L"pi"))
3027 					spec = (spec*12*Tkdpi)/72;
3028 				else if(!_Strncmpci(units, 2, L"in"))
3029 					spec = spec*Tkdpi;
3030 				else if(!_Strncmpci(units, 2, L"cm"))
3031 					spec = (spec*100*Tkdpi)/254;
3032 				else if(!_Strncmpci(units, 2, L"mm"))
3033 					spec = (spec*10*Tkdpi)/254;
3034 				else if(!_Strncmpci(units, 2, L"em"))
3035 					spec = spec*15;
3036 				else {
3037 					if(warn)
3038 						fprint(2, "warning: unknown units %C%Cs\n", units[0], units[1]);
3039 				}
3040 			}
3041 			if(nr >= 1) {
3042 				if(r[0] == '%')
3043 					kind = Dpercent;
3044 				else if(r[0] == '*')
3045 					kind = Drelative;
3046 			}
3047 		}
3048 		spec = spec/1000;
3049 	}
3050 	else if(nr == 1 && r[0] == '*') {
3051 		spec = 1;
3052 		kind = Drelative;
3053 	}
3054 	return makedimen(kind, spec);
3055 }
3056 
3057 static void
3058 setdimarray(Token* tok, int attid, Dimen** pans, int* panslen)
3059 {
3060 	Rune*	s;
3061 	Dimen*	d;
3062 	int	k;
3063 	int	nc;
3064 	Rune* a[SMALLBUFSIZE];
3065 	int	an[SMALLBUFSIZE];
3066 
3067 	if(_tokaval(tok, attid, &s, 0)) {
3068 		nc = _splitall(s, _Strlen(s), L", ", a, an, SMALLBUFSIZE);
3069 		if(nc > 0) {
3070 			d = (Dimen*)emalloc(nc * sizeof(Dimen));
3071 			for(k = 0; k < nc; k++) {
3072 				d[k] = parsedim(a[k], an[k]);
3073 			}
3074 			*pans = d;
3075 			*panslen = nc;
3076 			return;
3077 		}
3078 	}
3079 	*pans = nil;
3080 	*panslen = 0;
3081 }
3082 
3083 static Background
3084 makebackground(Rune* imageurl, int color)
3085 {
3086 	Background bg;
3087 
3088 	bg.image = imageurl;
3089 	bg.color = color;
3090 	return bg;
3091 }
3092 
3093 static Item*
3094 newitext(Rune* s, int fnt, int fg, int voff, int ul)
3095 {
3096 	Itext* t;
3097 
3098 	assert(s != nil);
3099 	t = (Itext*)emalloc(sizeof(Itext));
3100 	t->tag = Itexttag;
3101 	t->s = s;
3102 	t->fnt = fnt;
3103 	t->fg = fg;
3104 	t->voff = voff;
3105 	t->ul = ul;
3106 	return (Item*)t;
3107 }
3108 
3109 static Item*
3110 newirule(int align, int size, int noshade, Dimen wspec)
3111 {
3112 	Irule* r;
3113 
3114 	r = (Irule*)emalloc(sizeof(Irule));
3115 	r->tag = Iruletag;
3116 	r->align = align;
3117 	r->size = size;
3118 	r->noshade = noshade;
3119 	r->wspec = wspec;
3120 	return (Item*)r;
3121 }
3122 
3123 // Map is owned elsewhere.
3124 static Item*
3125 newiimage(Rune* src, Rune* altrep, int align, int width, int height,
3126 		int hspace, int vspace, int border, int ismap, Map* map)
3127 {
3128 	Iimage* i;
3129 	int	state;
3130 
3131 	state = 0;
3132 	if(ismap)
3133 		state = IFsmap;
3134 	i = (Iimage*)emalloc(sizeof(Iimage));
3135 	i->tag = Iimagetag;
3136 	i->state = state;
3137 	i->imsrc = src;
3138 	i->altrep = altrep;
3139 	i->align = align;
3140 	i->imwidth = width;
3141 	i->imheight = height;
3142 	i->hspace = hspace;
3143 	i->vspace = vspace;
3144 	i->border = border;
3145 	i->map = map;
3146 	i->ctlid = -1;
3147 	return (Item*)i;
3148 }
3149 
3150 static Item*
3151 newiformfield(Formfield* ff)
3152 {
3153 	Iformfield* f;
3154 
3155 	f = (Iformfield*)emalloc(sizeof(Iformfield));
3156 	f->tag = Iformfieldtag;
3157 	f->formfield = ff;
3158 	return (Item*)f;
3159 }
3160 
3161 static Item*
3162 newitable(Table* tab)
3163 {
3164 	Itable* t;
3165 
3166 	t = (Itable*)emalloc(sizeof(Itable));
3167 	t->tag = Itabletag;
3168 	t->table = tab;
3169 	return (Item*)t;
3170 }
3171 
3172 static Item*
3173 newifloat(Item* it, int side)
3174 {
3175 	Ifloat* f;
3176 
3177 	f = (Ifloat*)emalloc(sizeof(Ifloat));
3178 	f->tag = Ifloattag;
3179 	f->state = IFwrap;
3180 	f->item = it;
3181 	f->side = side;
3182 	return (Item*)f;
3183 }
3184 
3185 static Item*
3186 newispacer(int spkind)
3187 {
3188 	Ispacer* s;
3189 
3190 	s = (Ispacer*)emalloc(sizeof(Ispacer));
3191 	s->tag = Ispacertag;
3192 	s->spkind = spkind;
3193 	return (Item*)s;
3194 }
3195 
3196 // Free one item (caller must deal with next pointer)
3197 static void
3198 freeitem(Item* it)
3199 {
3200 	Iimage* ii;
3201 	Genattr* ga;
3202 
3203 	if(it == nil)
3204 		return;
3205 
3206 	switch(it->tag) {
3207 	case Itexttag:
3208 		free(((Itext*)it)->s);
3209 		break;
3210 	case Iimagetag:
3211 		ii = (Iimage*)it;
3212 		free(ii->imsrc);
3213 		free(ii->altrep);
3214 		break;
3215 	case Iformfieldtag:
3216 		freeformfield(((Iformfield*)it)->formfield);
3217 		break;
3218 	case Itabletag:
3219 		freetable(((Itable*)it)->table);
3220 		break;
3221 	case Ifloattag:
3222 		freeitem(((Ifloat*)it)->item);
3223 		break;
3224 	}
3225 	ga = it->genattr;
3226 	if(ga != nil) {
3227 		free(ga->id);
3228 		free(ga->class);
3229 		free(ga->style);
3230 		free(ga->title);
3231 		freescriptevents(ga->events);
3232 	}
3233 	free(it);
3234 }
3235 
3236 // Free list of items chained through next pointer
3237 void
3238 freeitems(Item* ithead)
3239 {
3240 	Item* it;
3241 	Item* itnext;
3242 
3243 	it = ithead;
3244 	while(it != nil) {
3245 		itnext = it->next;
3246 		freeitem(it);
3247 		it = itnext;
3248 	}
3249 }
3250 
3251 static void
3252 freeformfield(Formfield* ff)
3253 {
3254 	Option* o;
3255 	Option* onext;
3256 
3257 	if(ff == nil)
3258 		return;
3259 
3260 	free(ff->name);
3261 	free(ff->value);
3262 	for(o = ff->options; o != nil; o = onext) {
3263 		onext = o->next;
3264 		free(o->value);
3265 		free(o->display);
3266 	}
3267 	free(ff);
3268 }
3269 
3270 static void
3271 freetable(Table* t)
3272 {
3273 	int i;
3274 	Tablecell* c;
3275 	Tablecell* cnext;
3276 
3277 	if(t == nil)
3278 		return;
3279 
3280 	// We'll find all the unique cells via t->cells and next pointers.
3281 	// (Other pointers to cells in the table are duplicates of these)
3282 	for(c = t->cells; c != nil; c = cnext) {
3283 		cnext = c->next;
3284 		freeitems(c->content);
3285 	}
3286 	if(t->grid != nil) {
3287 		for(i = 0; i < t->nrow; i++)
3288 			free(t->grid[i]);
3289 		free(t->grid);
3290 	}
3291 	free(t->rows);
3292 	free(t->cols);
3293 	freeitems(t->caption);
3294 	free(t);
3295 }
3296 
3297 static void
3298 freeform(Form* f)
3299 {
3300 	if(f == nil)
3301 		return;
3302 
3303 	free(f->name);
3304 	free(f->action);
3305 	// Form doesn't own its fields (Iformfield items do)
3306 	free(f);
3307 }
3308 
3309 static void
3310 freeforms(Form* fhead)
3311 {
3312 	Form* f;
3313 	Form* fnext;
3314 
3315 	for(f = fhead; f != nil; f = fnext) {
3316 		fnext = f->next;
3317 		freeform(f);
3318 	}
3319 }
3320 
3321 static void
3322 freeanchor(Anchor* a)
3323 {
3324 	if(a == nil)
3325 		return;
3326 
3327 	free(a->name);
3328 	free(a->href);
3329 	free(a);
3330 }
3331 
3332 static void
3333 freeanchors(Anchor* ahead)
3334 {
3335 	Anchor* a;
3336 	Anchor* anext;
3337 
3338 	for(a = ahead; a != nil; a = anext) {
3339 		anext = a->next;
3340 		freeanchor(a);
3341 	}
3342 }
3343 
3344 static void
3345 freedestanchor(DestAnchor* da)
3346 {
3347 	if(da == nil)
3348 		return;
3349 
3350 	free(da->name);
3351 	free(da);
3352 }
3353 
3354 static void
3355 freedestanchors(DestAnchor* dahead)
3356 {
3357 	DestAnchor* da;
3358 	DestAnchor* danext;
3359 
3360 	for(da = dahead; da != nil; da = danext) {
3361 		danext = da->next;
3362 		freedestanchor(da);
3363 	}
3364 }
3365 
3366 static void
3367 freearea(Area* a)
3368 {
3369 	if(a == nil)
3370 		return;
3371 	free(a->href);
3372 	free(a->coords);
3373 }
3374 
3375 static void freekidinfos(Kidinfo* khead);
3376 
3377 static void
3378 freekidinfo(Kidinfo* k)
3379 {
3380 	if(k->isframeset) {
3381 		free(k->rows);
3382 		free(k->cols);
3383 		freekidinfos(k->kidinfos);
3384 	}
3385 	else {
3386 		free(k->src);
3387 		free(k->name);
3388 	}
3389 	free(k);
3390 }
3391 
3392 static void
3393 freekidinfos(Kidinfo* khead)
3394 {
3395 	Kidinfo* k;
3396 	Kidinfo* knext;
3397 
3398 	for(k = khead; k != nil; k = knext) {
3399 		knext = k->next;
3400 		freekidinfo(k);
3401 	}
3402 }
3403 
3404 static void
3405 freemap(Map* m)
3406 {
3407 	Area* a;
3408 	Area* anext;
3409 
3410 	if(m == nil)
3411 		return;
3412 
3413 	free(m->name);
3414 	for(a = m->areas; a != nil; a = anext) {
3415 		anext = a->next;
3416 		freearea(a);
3417 	}
3418 	free(m);
3419 }
3420 
3421 static void
3422 freemaps(Map* mhead)
3423 {
3424 	Map* m;
3425 	Map* mnext;
3426 
3427 	for(m = mhead; m != nil; m = mnext) {
3428 		mnext = m->next;
3429 		freemap(m);
3430 	}
3431 }
3432 
3433 void
3434 freedocinfo(Docinfo* d)
3435 {
3436 	if(d == nil)
3437 		return;
3438 	free(d->src);
3439 	free(d->base);
3440 	freeitem((Item*)d->backgrounditem);
3441 	free(d->refresh);
3442 	freekidinfos(d->kidinfo);
3443 	freeanchors(d->anchors);
3444 	freedestanchors(d->dests);
3445 	freeforms(d->forms);
3446 	freemaps(d->maps);
3447 	// tables, images, and formfields are freed when
3448 	// the items pointing at them are freed
3449 	free(d);
3450 }
3451 
3452 // Currently, someone else owns all the memory
3453 // pointed to by things in a Pstate.
3454 static void
3455 freepstate(Pstate* p)
3456 {
3457 	free(p);
3458 }
3459 
3460 static void
3461 freepstatestack(Pstate* pshead)
3462 {
3463 	Pstate* p;
3464 	Pstate* pnext;
3465 
3466 	for(p = pshead; p != nil; p = pnext) {
3467 		pnext = p->next;
3468 		free(p);
3469 	}
3470 }
3471 
3472 static int
3473 Iconv(Fmt *f)
3474 {
3475 	Item*	it;
3476 	Itext*	t;
3477 	Irule*	r;
3478 	Iimage*	i;
3479 	Ifloat*	fl;
3480 	int	state;
3481 	Formfield*	ff;
3482 	Rune*	ty;
3483 	Tablecell*	c;
3484 	Table*	tab;
3485 	char*	p;
3486 	int	cl;
3487 	int	hang;
3488 	int	indent;
3489 	int	bi;
3490 	int	nbuf;
3491 	char	buf[BIGBUFSIZE];
3492 
3493 	it = va_arg(f->args, Item*);
3494 	bi = 0;
3495 	nbuf = sizeof(buf);
3496 	state = it->state;
3497 	nbuf = nbuf-1;
3498 	if(state&IFbrk) {
3499 		cl = state&(IFcleft|IFcright);
3500 		p = "";
3501 		if(cl) {
3502 			if(cl == (IFcleft|IFcright))
3503 				p = " both";
3504 			else if(cl == IFcleft)
3505 				p = " left";
3506 			else
3507 				p = " right";
3508 		}
3509 		bi = snprint(buf, nbuf, "brk(%d%s)", (state&IFbrksp)? 1 : 0, p);
3510 	}
3511 	if(state&IFnobrk)
3512 		bi += snprint(buf+bi, nbuf-bi, " nobrk");
3513 	if(!(state&IFwrap))
3514 		bi += snprint(buf+bi, nbuf-bi, " nowrap");
3515 	if(state&IFrjust)
3516 		bi += snprint(buf+bi, nbuf-bi, " rjust");
3517 	if(state&IFcjust)
3518 		bi += snprint(buf+bi, nbuf-bi, " cjust");
3519 	if(state&IFsmap)
3520 		bi += snprint(buf+bi, nbuf-bi, " smap");
3521 	indent = (state&IFindentmask) >> IFindentshift;
3522 	if(indent > 0)
3523 		bi += snprint(buf+bi, nbuf-bi, " indent=%d", indent);
3524 	hang = state&IFhangmask;
3525 	if(hang > 0)
3526 		bi += snprint(buf+bi, nbuf-bi, " hang=%d", hang);
3527 
3528 	switch(it->tag) {
3529 	case Itexttag:
3530 		t = (Itext*)it;
3531 		bi += snprint(buf+bi, nbuf-bi, " Text '%S', fnt=%d, fg=%x", t->s, t->fnt, t->fg);
3532 		break;
3533 
3534 	case Iruletag:
3535 		r = (Irule*)it;
3536 		bi += snprint(buf+bi, nbuf-bi, "Rule size=%d, al=%S, wspec=", r->size, stringalign(r->align));
3537 		bi += dimprint(buf+bi, nbuf-bi, r->wspec);
3538 		break;
3539 
3540 	case Iimagetag:
3541 		i = (Iimage*)it;
3542 		bi += snprint(buf+bi, nbuf-bi,
3543 			"Image src=%S, alt=%S, al=%S, w=%d, h=%d hsp=%d, vsp=%d, bd=%d, map=%S",
3544 			i->imsrc, i->altrep? i->altrep : L"", stringalign(i->align), i->imwidth, i->imheight,
3545 			i->hspace, i->vspace, i->border, i->map? i->map->name : L"");
3546 		break;
3547 
3548 	case Iformfieldtag:
3549 		ff = ((Iformfield*)it)->formfield;
3550 		if(ff->ftype == Ftextarea)
3551 			ty = L"textarea";
3552 		else if(ff->ftype == Fselect)
3553 			ty = L"select";
3554 		else {
3555 			ty = _revlookup(input_tab, NINPUTTAB, ff->ftype);
3556 			if(ty == nil)
3557 				ty = L"none";
3558 		}
3559 		bi += snprint(buf+bi, nbuf-bi, "Formfield %S, fieldid=%d, formid=%d, name=%S, value=%S",
3560 			ty, ff->fieldid, ff->form->formid, ff->name? ff->name : L"",
3561 			ff->value? ff->value : L"");
3562 		break;
3563 
3564 	case Itabletag:
3565 		tab = ((Itable*)it)->table;
3566 		bi += snprint(buf+bi, nbuf-bi, "Table tableid=%d, width=", tab->tableid);
3567 		bi += dimprint(buf+bi, nbuf-bi, tab->width);
3568 		bi += snprint(buf+bi, nbuf-bi, ", nrow=%d, ncol=%d, ncell=%d, totw=%d, toth=%d\n",
3569 			tab->nrow, tab->ncol, tab->ncell, tab->totw, tab->toth);
3570 		for(c = tab->cells; c != nil; c = c->next)
3571 			bi += snprint(buf+bi, nbuf-bi, "Cell %d.%d, at (%d,%d) ",
3572 					tab->tableid, c->cellid, c->row, c->col);
3573 		bi += snprint(buf+bi, nbuf-bi, "End of Table %d", tab->tableid);
3574 		break;
3575 
3576 	case Ifloattag:
3577 		fl = (Ifloat*)it;
3578 		bi += snprint(buf+bi, nbuf-bi, "Float, x=%d y=%d, side=%S, it=%I",
3579 			fl->x, fl->y, stringalign(fl->side), fl->item);
3580 		bi += snprint(buf+bi, nbuf-bi, "\n\t");
3581 		break;
3582 
3583 	case Ispacertag:
3584 		p = "";
3585 		switch(((Ispacer*)it)->spkind) {
3586 		case ISPnull:
3587 			p = "null";
3588 			break;
3589 		case ISPvline:
3590 			p = "vline";
3591 			break;
3592 		case ISPhspace:
3593 			p = "hspace";
3594 			break;
3595 		}
3596 		bi += snprint(buf+bi, nbuf-bi, "Spacer %s ", p);
3597 		break;
3598 	}
3599 	bi += snprint(buf+bi, nbuf-bi, " w=%d, h=%d, a=%d, anchor=%d\n",
3600 			it->width, it->height, it->ascent, it->anchorid);
3601 	buf[bi] = 0;
3602 	return fmtstrcpy(f, buf);
3603 }
3604 
3605 // String version of alignment 'a'
3606 static Rune*
3607 stringalign(int a)
3608 {
3609 	Rune*	s;
3610 
3611 	s = _revlookup(align_tab, NALIGNTAB, a);
3612 	if(s == nil)
3613 		s = L"none";
3614 	return s;
3615 }
3616 
3617 // Put at most nbuf chars of representation of d into buf,
3618 // and return number of characters put
3619 static int
3620 dimprint(char* buf, int nbuf, Dimen d)
3621 {
3622 	int	n;
3623 	int	k;
3624 
3625 	n = 0;
3626 	n += snprint(buf, nbuf, "%d", dimenspec(d));
3627 	k = dimenkind(d);
3628 	if(k == Dpercent)
3629 		buf[n++] = '%';
3630 	if(k == Drelative)
3631 		buf[n++] = '*';
3632 	return n;
3633 }
3634 
3635 void
3636 printitems(Item* items, char* msg)
3637 {
3638 	Item*	il;
3639 
3640 	fprint(2, "%s\n", msg);
3641 	il = items;
3642 	while(il != nil) {
3643 		fprint(2, "%I", il);
3644 		il = il->next;
3645 	}
3646 }
3647 
3648 static Genattr*
3649 newgenattr(Rune* id, Rune* class, Rune* style, Rune* title, SEvent* events)
3650 {
3651 	Genattr* g;
3652 
3653 	g = (Genattr*)emalloc(sizeof(Genattr));
3654 	g->id = id;
3655 	g->class = class;
3656 	g->style = style;
3657 	g->title = title;
3658 	g->events = events;
3659 	return g;
3660 }
3661 
3662 static Formfield*
3663 newformfield(int ftype, int fieldid, Form* form, Rune* name,
3664 		Rune* value, int size, int maxlength, Formfield* link)
3665 {
3666 	Formfield* ff;
3667 
3668 	ff = (Formfield*)emalloc(sizeof(Formfield));
3669 	ff->ftype = ftype;
3670 	ff->fieldid = fieldid;
3671 	ff->form = form;
3672 	ff->name = name;
3673 	ff->value = value;
3674 	ff->size = size;
3675 	ff->maxlength = maxlength;
3676 	ff->ctlid = -1;
3677 	ff->next = link;
3678 	return ff;
3679 }
3680 
3681 // Transfers ownership of value and display to Option.
3682 static Option*
3683 newoption(int selected, Rune* value, Rune* display, Option* link)
3684 {
3685 	Option *o;
3686 
3687 	o = (Option*)emalloc(sizeof(Option));
3688 	o->selected = selected;
3689 	o->value = value;
3690 	o->display = display;
3691 	o->next = link;
3692 	return o;
3693 }
3694 
3695 static Form*
3696 newform(int formid, Rune* name, Rune* action, int target, int method, Form* link)
3697 {
3698 	Form* f;
3699 
3700 	f = (Form*)emalloc(sizeof(Form));
3701 	f->formid = formid;
3702 	f->name = name;
3703 	f->action = action;
3704 	f->target = target;
3705 	f->method = method;
3706 	f->nfields = 0;
3707 	f->fields = nil;
3708 	f->next = link;
3709 	return f;
3710 }
3711 
3712 static Table*
3713 newtable(int tableid, Align align, Dimen width, int border,
3714 	int cellspacing, int cellpadding, Background bg, Token* tok, Table* link)
3715 {
3716 	Table* t;
3717 
3718 	t = (Table*)emalloc(sizeof(Table));
3719 	t->tableid = tableid;
3720 	t->align = align;
3721 	t->width = width;
3722 	t->border = border;
3723 	t->cellspacing = cellspacing;
3724 	t->cellpadding = cellpadding;
3725 	t->background = bg;
3726 	t->caption_place = ALbottom;
3727 	t->caption_lay = nil;
3728 	t->tabletok = tok;
3729 	t->tabletok = nil;
3730 	t->next = link;
3731 	return t;
3732 }
3733 
3734 static Tablerow*
3735 newtablerow(Align align, Background bg, int flags, Tablerow* link)
3736 {
3737 	Tablerow* tr;
3738 
3739 	tr = (Tablerow*)emalloc(sizeof(Tablerow));
3740 	tr->align = align;
3741 	tr->background = bg;
3742 	tr->flags = flags;
3743 	tr->next = link;
3744 	return tr;
3745 }
3746 
3747 static Tablecell*
3748 newtablecell(int cellid, int rowspan, int colspan, Align align, Dimen wspec, int hspec,
3749 		Background bg, int flags, Tablecell* link)
3750 {
3751 	Tablecell* c;
3752 
3753 	c = (Tablecell*)emalloc(sizeof(Tablecell));
3754 	c->cellid = cellid;
3755 	c->lay = nil;
3756 	c->rowspan = rowspan;
3757 	c->colspan = colspan;
3758 	c->align = align;
3759 	c->flags = flags;
3760 	c->wspec = wspec;
3761 	c->hspec = hspec;
3762 	c->background = bg;
3763 	c->next = link;
3764 	return c;
3765 }
3766 
3767 static Anchor*
3768 newanchor(int index, Rune* name, Rune* href, int target, Anchor* link)
3769 {
3770 	Anchor* a;
3771 
3772 	a = (Anchor*)emalloc(sizeof(Anchor));
3773 	a->index = index;
3774 	a->name = name;
3775 	a->href = href;
3776 	a->target = target;
3777 	a->next = link;
3778 	return a;
3779 }
3780 
3781 static DestAnchor*
3782 newdestanchor(int index, Rune* name, Item* item, DestAnchor* link)
3783 {
3784 	DestAnchor* d;
3785 
3786 	d = (DestAnchor*)emalloc(sizeof(DestAnchor));
3787 	d->index = index;
3788 	d->name = name;
3789 	d->item = item;
3790 	d->next = link;
3791 	return d;
3792 }
3793 
3794 static SEvent*
3795 newscriptevent(int type, Rune* script, SEvent* link)
3796 {
3797 	SEvent* ans;
3798 
3799 	ans = (SEvent*)emalloc(sizeof(SEvent));
3800 	ans->type = type;
3801 	ans->script = script;
3802 	ans->next = link;
3803 	return ans;
3804 }
3805 
3806 static void
3807 freescriptevents(SEvent* ehead)
3808 {
3809 	SEvent* e;
3810 	SEvent* nexte;
3811 
3812 	e = ehead;
3813 	while(e != nil) {
3814 		nexte = e->next;
3815 		free(e->script);
3816 		free(e);
3817 		e = nexte;
3818 	}
3819 }
3820 
3821 static Dimen
3822 makedimen(int kind, int spec)
3823 {
3824 	Dimen d;
3825 
3826 	if(spec&Dkindmask) {
3827 		if(warn)
3828 			fprint(2, "warning: dimension spec too big: %d\n", spec);
3829 		spec = 0;
3830 	}
3831 	d.kindspec = kind|spec;
3832 	return d;
3833 }
3834 
3835 int
3836 dimenkind(Dimen d)
3837 {
3838 	return (d.kindspec&Dkindmask);
3839 }
3840 
3841 int
3842 dimenspec(Dimen d)
3843 {
3844 	return (d.kindspec&Dspecmask);
3845 }
3846 
3847 static Kidinfo*
3848 newkidinfo(int isframeset, Kidinfo* link)
3849 {
3850 	Kidinfo*	ki;
3851 
3852 	ki = (Kidinfo*)emalloc(sizeof(Kidinfo));
3853 	ki->isframeset = isframeset;
3854 	if(!isframeset) {
3855 		ki->flags = FRhscrollauto|FRvscrollauto;
3856 		ki->marginw = FRKIDMARGIN;
3857 		ki->marginh = FRKIDMARGIN;
3858 		ki->framebd = 1;
3859 	}
3860 	ki->next = link;
3861 	return ki;
3862 }
3863 
3864 static Docinfo*
3865 newdocinfo(void)
3866 {
3867 	Docinfo*	d;
3868 
3869 	d = (Docinfo*)emalloc(sizeof(Docinfo));
3870 	resetdocinfo(d);
3871 	return d;
3872 }
3873 
3874 static void
3875 resetdocinfo(Docinfo* d)
3876 {
3877 	memset(d, 0, sizeof(Docinfo));
3878 	d->background = makebackground(nil, White);
3879 	d->text = Black;
3880 	d->link = Blue;
3881 	d->vlink = Blue;
3882 	d->alink = Blue;
3883 	d->target = FTself;
3884 	d->chset = ISO_8859_1;
3885 	d->scripttype = TextJavascript;
3886 	d->frameid = -1;
3887 }
3888 
3889 // Use targetmap array to keep track of name <-> targetid mapping.
3890 // Use real malloc(), and never free
3891 static void
3892 targetmapinit(void)
3893 {
3894 	targetmapsize = 10;
3895 	targetmap = (StringInt*)emalloc(targetmapsize*sizeof(StringInt));
3896 	memset(targetmap, 0, targetmapsize*sizeof(StringInt));
3897 	targetmap[0].key = _Strdup(L"_top");
3898 	targetmap[0].val = FTtop;
3899 	targetmap[1].key = _Strdup(L"_self");
3900 	targetmap[1].val = FTself;
3901 	targetmap[2].key = _Strdup(L"_parent");
3902 	targetmap[2].val = FTparent;
3903 	targetmap[3].key = _Strdup(L"_blank");
3904 	targetmap[3].val = FTblank;
3905 	ntargets = 4;
3906 }
3907 
3908 int
3909 targetid(Rune* s)
3910 {
3911 	int i;
3912 	int n;
3913 
3914 	n = _Strlen(s);
3915 	if(n == 0)
3916 		return FTself;
3917 	for(i = 0; i < ntargets; i++)
3918 		if(_Strcmp(s, targetmap[i].key) == 0)
3919 			return targetmap[i].val;
3920 	if(i >= targetmapsize) {
3921 		targetmapsize += 10;
3922 		targetmap = (StringInt*)erealloc(targetmap, targetmapsize*sizeof(StringInt));
3923 	}
3924 	targetmap[i].key = (Rune*)emalloc((n+1)*sizeof(Rune));
3925 	memmove(targetmap[i].key, s, (n+1)*sizeof(Rune));
3926 	targetmap[i].val = i;
3927 	ntargets++;
3928 	return i;
3929 }
3930 
3931 Rune*
3932 targetname(int targid)
3933 {
3934 	int i;
3935 
3936 	for(i = 0; i < ntargets; i++)
3937 		if(targetmap[i].val == targid)
3938 			return targetmap[i].key;
3939 	return L"?";
3940 }
3941 
3942 // Convert HTML color spec to RGB value, returning dflt if can't.
3943 // Argument is supposed to be a valid HTML color, or "".
3944 // Return the RGB value of the color, using dflt if s
3945 // is nil or an invalid color.
3946 static int
3947 color(Rune* s, int dflt)
3948 {
3949 	int v;
3950 	Rune* rest;
3951 
3952 	if(s == nil)
3953 		return dflt;
3954 	if(_lookup(color_tab, NCOLORS, s, _Strlen(s), &v))
3955 		return v;
3956 	if(s[0] == '#')
3957 		s++;
3958 	v = _Strtol(s, &rest, 16);
3959 	if(*rest == 0)
3960 		return v;
3961 	return dflt;
3962 }
3963 
3964 // Debugging
3965 
3966 #define HUGEPIX 10000
3967 
3968 // A "shallow" validitem, that doesn't follow next links
3969 // or descend into tables.
3970 static int
3971 validitem(Item* i)
3972 {
3973 	int ok;
3974 	Itext* ti;
3975 	Irule* ri;
3976 	Iimage* ii;
3977 	Ifloat* fi;
3978 	int a;
3979 
3980 	ok = (i->tag >= Itexttag && i->tag <= Ispacertag) &&
3981 		(i->next == nil || validptr(i->next)) &&
3982 		(i->width >= 0 && i->width < HUGEPIX) &&
3983 		(i->height >= 0 && i->height < HUGEPIX) &&
3984 		(i->ascent > -HUGEPIX && i->ascent < HUGEPIX) &&
3985 		(i->anchorid >= 0) &&
3986 		(i->genattr == nil || validptr(i->genattr));
3987 	// also, could check state for ridiculous combinations
3988 	// also, could check anchorid for within-doc-range
3989 	if(ok)
3990 		switch(i->tag) {
3991 		case Itexttag:
3992 			ti = (Itext*)i;
3993 			ok = validStr(ti->s) &&
3994 				(ti->fnt >= 0 && ti->fnt < NumStyle*NumSize) &&
3995 				(ti->ul == ULnone || ti->ul == ULunder || ti->ul == ULmid);
3996 			break;
3997 		case Iruletag:
3998 			ri = (Irule*)i;
3999 			ok = (validvalign(ri->align) || validhalign(ri->align)) &&
4000 				(ri->size >=0 && ri->size < HUGEPIX);
4001 			break;
4002 		case Iimagetag:
4003 			ii = (Iimage*)i;
4004 			ok = (ii->imsrc == nil || validptr(ii->imsrc)) &&
4005 				(ii->width >= 0 && ii->width < HUGEPIX) &&
4006 				(ii->height >= 0 && ii->height < HUGEPIX) &&
4007 				(ii->imwidth >= 0 && ii->imwidth < HUGEPIX) &&
4008 				(ii->imheight >= 0 && ii->imheight < HUGEPIX) &&
4009 				(ii->altrep == nil || validStr(ii->altrep)) &&
4010 				(ii->map == nil || validptr(ii->map)) &&
4011 				(validvalign(ii->align) || validhalign(ii->align)) &&
4012 				(ii->nextimage == nil || validptr(ii->nextimage));
4013 			break;
4014 		case Iformfieldtag:
4015 			ok = validformfield(((Iformfield*)i)->formfield);
4016 			break;
4017 		case Itabletag:
4018 			ok = validptr((Itable*)i);
4019 			break;
4020 		case Ifloattag:
4021 			fi = (Ifloat*)i;
4022 			ok = (fi->side == ALleft || fi->side == ALright) &&
4023 				validitem(fi->item) &&
4024 				(fi->item->tag == Iimagetag || fi->item->tag == Itabletag);
4025 			break;
4026 		case Ispacertag:
4027 			a = ((Ispacer*)i)->spkind;
4028 			ok = a==ISPnull || a==ISPvline || a==ISPhspace || a==ISPgeneral;
4029 			break;
4030 		default:
4031 			ok = 0;
4032 		}
4033 	return ok;
4034 }
4035 
4036 // "deep" validation, that checks whole list of items,
4037 // and descends into tables and floated tables.
4038 // nil is ok for argument.
4039 int
4040 validitems(Item* i)
4041 {
4042 	int ok;
4043 	Item* ii;
4044 
4045 	ok = 1;
4046 	while(i != nil && ok) {
4047 		ok = validitem(i);
4048 		if(ok) {
4049 			if(i->tag == Itabletag) {
4050 				ok = validtable(((Itable*)i)->table);
4051 			}
4052 			else if(i->tag == Ifloattag) {
4053 				ii = ((Ifloat*)i)->item;
4054 				if(ii->tag == Itabletag)
4055 					ok = validtable(((Itable*)ii)->table);
4056 			}
4057 		}
4058 		if(!ok) {
4059 			fprint(2, "invalid item: %I\n", i);
4060 		}
4061 		i = i->next;
4062 	}
4063 	return ok;
4064 }
4065 
4066 static int
4067 validformfield(Formfield* f)
4068 {
4069 	int ok;
4070 
4071 	ok = (f->next == nil || validptr(f->next)) &&
4072 		(f->ftype >= 0 && f->ftype <= Ftextarea) &&
4073 		f->fieldid >= 0 &&
4074 		(f->form == nil || validptr(f->form)) &&
4075 		(f->name == nil || validStr(f->name)) &&
4076 		(f->value == nil || validStr(f->value)) &&
4077 		(f->options == nil || validptr(f->options)) &&
4078 		(f->image == nil || validitem(f->image)) &&
4079 		(f->events == nil || validptr(f->events));
4080 	// when all built, should have f->fieldid < f->form->nfields,
4081 	// but this may be called during build...
4082 	return ok;
4083 }
4084 
4085 // "deep" validation -- checks cell contents too
4086 static int
4087 validtable(Table* t)
4088 {
4089 	int ok;
4090 	int i, j;
4091 	Tablecell* c;
4092 
4093 	ok = (t->next == nil || validptr(t->next)) &&
4094 		t->nrow >= 0 &&
4095 		t->ncol >= 0 &&
4096 		t->ncell >= 0 &&
4097 		validalign(t->align) &&
4098 		validdimen(t->width) &&
4099 		(t->border >= 0 && t->border < HUGEPIX) &&
4100 		(t->cellspacing >= 0 && t->cellspacing < HUGEPIX) &&
4101 		(t->cellpadding >= 0 && t->cellpadding < HUGEPIX) &&
4102 		validitems(t->caption) &&
4103 		(t->caption_place == ALtop || t->caption_place == ALbottom) &&
4104 		(t->totw >= 0 && t->totw < HUGEPIX) &&
4105 		(t->toth >= 0 && t->toth < HUGEPIX) &&
4106 		(t->tabletok == nil || validptr(t->tabletok));
4107 	// during parsing, t->rows has list;
4108 	// only when parsing is done is t->nrow set > 0
4109 	if(ok && t->nrow > 0 && t->ncol > 0) {
4110 		// table is "finished"
4111 		for(i = 0; i < t->nrow && ok; i++)
4112 			ok = validtablerow(t->rows+i);
4113 		for(j = 0; j < t->ncol && ok; j++)
4114 			ok = validtablecol(t->cols+j);
4115 		for(c = t->cells; c != nil && ok; c = c->next)
4116 			ok = validtablecell(c);
4117 		for(i = 0; i < t->nrow && ok; i++)
4118 			for(j = 0; j < t->ncol && ok; j++)
4119 				ok = validptr(t->grid[i][j]);
4120 	}
4121 	return ok;
4122 }
4123 
4124 static int
4125 validvalign(int a)
4126 {
4127 	return a == ALnone || a == ALmiddle || a == ALbottom || a == ALtop || a == ALbaseline;
4128 }
4129 
4130 static int
4131 validhalign(int a)
4132 {
4133 	return a == ALnone || a == ALleft || a == ALcenter || a == ALright ||
4134 			a == ALjustify || a == ALchar;
4135 }
4136 
4137 static int
4138 validalign(Align a)
4139 {
4140 	return validhalign(a.halign) && validvalign(a.valign);
4141 }
4142 
4143 static int
4144 validdimen(Dimen d)
4145 {
4146 	int ok;
4147 	int s;
4148 
4149 	ok = 0;
4150 	s = d.kindspec&Dspecmask;
4151 	switch(d.kindspec&Dkindmask) {
4152 	case Dnone:
4153 		ok = s==0;
4154 		break;
4155 	case Dpixels:
4156 		ok = s < HUGEPIX;
4157 		break;
4158 	case Dpercent:
4159 	case Drelative:
4160 		ok = 1;
4161 		break;
4162 	}
4163 	return ok;
4164 }
4165 
4166 static int
4167 validtablerow(Tablerow* r)
4168 {
4169 	return (r->cells == nil || validptr(r->cells)) &&
4170 		(r->height >= 0 && r->height < HUGEPIX) &&
4171 		(r->ascent > -HUGEPIX && r->ascent < HUGEPIX) &&
4172 		validalign(r->align);
4173 }
4174 
4175 static int
4176 validtablecol(Tablecol* c)
4177 {
4178 	return c->width >= 0 && c->width < HUGEPIX
4179 		&& validalign(c->align);
4180 }
4181 
4182 static int
4183 validtablecell(Tablecell* c)
4184 {
4185 	int ok;
4186 
4187 	ok = (c->next == nil || validptr(c->next)) &&
4188 		(c->nextinrow == nil || validptr(c->nextinrow)) &&
4189 		(c->content == nil || validptr(c->content)) &&
4190 		(c->lay == nil || validptr(c->lay)) &&
4191 		c->rowspan >= 0 &&
4192 		c->colspan >= 0 &&
4193 		validalign(c->align) &&
4194 		validdimen(c->wspec) &&
4195 		c->row >= 0 &&
4196 		c->col >= 0;
4197 	if(ok) {
4198 		if(c->content != nil)
4199 			ok = validitems(c->content);
4200 	}
4201 	return ok;
4202 }
4203 
4204 static int
4205 validptr(void* p)
4206 {
4207 	// TODO: a better job of this.
4208 	// For now, just dereference, which cause a bomb
4209 	// if not valid
4210 	static char c;
4211 
4212 	c = *((char*)p);
4213 	return 1;
4214 }
4215 
4216 static int
4217 validStr(Rune* s)
4218 {
4219 	return s != nil && validptr(s);
4220 }
4221