xref: /inferno-os/appl/lib/xml.b (revision 6e425a9de8c003b5a733621a6b6730ec3cc902b8)
137da2899SCharles.Forsythimplement Xml;
237da2899SCharles.Forsyth
337da2899SCharles.Forsyth#
437da2899SCharles.Forsyth# Portions copyright © 2002 Vita Nuova Holdings Limited
537da2899SCharles.Forsyth#
637da2899SCharles.Forsyth#
737da2899SCharles.Forsyth# Derived from saxparser.b Copyright © 2001-2002 by John Powers or his employer
837da2899SCharles.Forsyth#
937da2899SCharles.Forsyth
1037da2899SCharles.Forsyth# TO DO:
1137da2899SCharles.Forsyth# - provide a way of getting attributes out of <?...?> (process) requests,
1237da2899SCharles.Forsyth# so that we can process stylesheet requests given in that way.
1337da2899SCharles.Forsyth
1437da2899SCharles.Forsythinclude "sys.m";
1537da2899SCharles.Forsyth	sys: Sys;
1637da2899SCharles.Forsythinclude "bufio.m";
1737da2899SCharles.Forsyth	bufio: Bufio;
1837da2899SCharles.Forsyth	Iobuf: import bufio;
1937da2899SCharles.Forsythinclude "string.m";
2037da2899SCharles.Forsyth	str: String;
2137da2899SCharles.Forsythinclude "hash.m";
2237da2899SCharles.Forsyth	hash: Hash;
2337da2899SCharles.Forsyth	HashTable: import hash;
2437da2899SCharles.Forsythinclude "xml.m";
2537da2899SCharles.Forsyth
2637da2899SCharles.ForsythParcel: adt {
2737da2899SCharles.Forsyth	pick {
2837da2899SCharles.Forsyth	Start or
2937da2899SCharles.Forsyth	Empty =>
3037da2899SCharles.Forsyth		name: string;
3137da2899SCharles.Forsyth		attrs: Attributes;
3237da2899SCharles.Forsyth	End =>
3337da2899SCharles.Forsyth		name: string;
3437da2899SCharles.Forsyth	Text =>
3537da2899SCharles.Forsyth		ch: string;
3637da2899SCharles.Forsyth		ws1, ws2: int;
3737da2899SCharles.Forsyth	Process =>
3837da2899SCharles.Forsyth		target: string;
3937da2899SCharles.Forsyth		data: string;
4037da2899SCharles.Forsyth	Error =>
4137da2899SCharles.Forsyth		loc:	Locator;
4237da2899SCharles.Forsyth		msg:	string;
4337da2899SCharles.Forsyth	Doctype =>
4437da2899SCharles.Forsyth		name:	string;
4537da2899SCharles.Forsyth		public:	int;
4637da2899SCharles.Forsyth		params:	list of string;
4737da2899SCharles.Forsyth	Stylesheet =>
4837da2899SCharles.Forsyth		attrs: Attributes;
4937da2899SCharles.Forsyth	EOF =>
5037da2899SCharles.Forsyth	}
5137da2899SCharles.Forsyth};
5237da2899SCharles.Forsyth
5337da2899SCharles.Forsythentinit := array[] of {
5437da2899SCharles.Forsyth	("AElig", "Æ"),
5537da2899SCharles.Forsyth	("OElig", "Œ"),
5637da2899SCharles.Forsyth	("aelig", "æ"),
5737da2899SCharles.Forsyth	("amp", "&"),
5837da2899SCharles.Forsyth	("apos", "\'"),
5937da2899SCharles.Forsyth	("copy", "©"),
6037da2899SCharles.Forsyth	("gt", ">"),
6137da2899SCharles.Forsyth	("ldquo", "``"),
6237da2899SCharles.Forsyth	("lt", "<"),
6337da2899SCharles.Forsyth	("mdash", "-"),		# XXX ??
6437da2899SCharles.Forsyth	("oelig", "œ"),
6537da2899SCharles.Forsyth	("quot", "\""),
6637da2899SCharles.Forsyth	("rdquo", "''"),
6737da2899SCharles.Forsyth	("rsquo", "'"),
6837da2899SCharles.Forsyth	("trade", "™"),
6937da2899SCharles.Forsyth	("nbsp", "\u00a0"),
7037da2899SCharles.Forsyth};
7137da2899SCharles.Forsythentdict: ref HashTable;
7237da2899SCharles.Forsyth
7337da2899SCharles.Forsythinit(): string
7437da2899SCharles.Forsyth{
7537da2899SCharles.Forsyth	sys = load Sys Sys->PATH;
7637da2899SCharles.Forsyth	bufio = load Bufio Bufio->PATH;
7737da2899SCharles.Forsyth	if (bufio == nil)
7837da2899SCharles.Forsyth		return sys->sprint("cannot load %s: %r", Bufio->PATH);
7937da2899SCharles.Forsyth	str = load String String->PATH;
8037da2899SCharles.Forsyth	if (str == nil)
8137da2899SCharles.Forsyth		return sys->sprint("cannot load %s: %r", String->PATH);
8237da2899SCharles.Forsyth	hash = load Hash Hash->PATH;
8337da2899SCharles.Forsyth	if (hash == nil)
8437da2899SCharles.Forsyth		return sys->sprint("cannot load %s: %r", Hash->PATH);
8537da2899SCharles.Forsyth	entdict = hash->new(23);
8637da2899SCharles.Forsyth	for (i := 0; i < len entinit; i += 1) {
8737da2899SCharles.Forsyth		(key, value) := entinit[i];
8837da2899SCharles.Forsyth		entdict.insert(key, (0, 0.0, value));
8937da2899SCharles.Forsyth	}
9037da2899SCharles.Forsyth	return nil;
9137da2899SCharles.Forsyth}
9237da2899SCharles.Forsyth
9337da2899SCharles.Forsythblankparser: Parser;
9437da2899SCharles.Forsyth
9537da2899SCharles.Forsythopen(srcfile: string, warning: chan of (Locator, string), preelem: string): (ref Parser, string)
9637da2899SCharles.Forsyth{
97*6e425a9dSCharles.Forsyth	fd := bufio->open(srcfile, Bufio->OREAD);
98*6e425a9dSCharles.Forsyth	if(fd == nil)
9937da2899SCharles.Forsyth		return (nil, sys->sprint("cannot open %s: %r", srcfile));
100*6e425a9dSCharles.Forsyth	return fopen(fd, srcfile, warning, preelem);
101*6e425a9dSCharles.Forsyth}
102*6e425a9dSCharles.Forsyth
103*6e425a9dSCharles.Forsythfopen(fd: ref Bufio->Iobuf, name: string, warning: chan of (Locator, string), preelem: string): (ref Parser, string)
104*6e425a9dSCharles.Forsyth{
105*6e425a9dSCharles.Forsyth	x := ref blankparser;
106*6e425a9dSCharles.Forsyth	x.in = fd;
107*6e425a9dSCharles.Forsyth	# ignore utf16 initialisation character (yuck)
10837da2899SCharles.Forsyth	c := x.in.getc();
10937da2899SCharles.Forsyth	if (c != 16rfffe && c != 16rfeff)
11037da2899SCharles.Forsyth		x.in.ungetc();
11137da2899SCharles.Forsyth	x.estack = nil;
112*6e425a9dSCharles.Forsyth	x.loc = Locator(1, name, "");
11337da2899SCharles.Forsyth	x.warning = warning;
11437da2899SCharles.Forsyth	x.preelem = preelem;
11537da2899SCharles.Forsyth	return (x, "");
11637da2899SCharles.Forsyth}
11737da2899SCharles.Forsyth
11837da2899SCharles.ForsythParser.next(x: self ref Parser): ref Item
11937da2899SCharles.Forsyth{
12037da2899SCharles.Forsyth	curroffset := x.fileoffset;
12137da2899SCharles.Forsyth	currloc := x.loc;
12237da2899SCharles.Forsyth	# read up until end of current item
12337da2899SCharles.Forsyth	while (x.actdepth > x.readdepth) {
12437da2899SCharles.Forsyth		pick p := getparcel(x) {
12537da2899SCharles.Forsyth		Start =>
12637da2899SCharles.Forsyth			x.actdepth++;
12737da2899SCharles.Forsyth		End =>
12837da2899SCharles.Forsyth			x.actdepth--;
12937da2899SCharles.Forsyth		EOF =>
13037da2899SCharles.Forsyth			x.actdepth = 0;			# premature EOF closes all tags
13137da2899SCharles.Forsyth		Error =>
13237da2899SCharles.Forsyth			return ref Item.Error(curroffset, x.loc, x.errormsg);
13337da2899SCharles.Forsyth		}
13437da2899SCharles.Forsyth	}
13537da2899SCharles.Forsyth	if (x.actdepth < x.readdepth) {
13637da2899SCharles.Forsyth		x.fileoffset = int x.in.offset();
13737da2899SCharles.Forsyth		return nil;
13837da2899SCharles.Forsyth	}
13937da2899SCharles.Forsyth	gp := getparcel(x);
14037da2899SCharles.Forsyth	item: ref Item;
14137da2899SCharles.Forsyth	pick p := gp {
14237da2899SCharles.Forsyth	Start =>
14337da2899SCharles.Forsyth		x.actdepth++;
14437da2899SCharles.Forsyth		item = ref Item.Tag(curroffset, p.name, p.attrs);
14537da2899SCharles.Forsyth	End =>
14637da2899SCharles.Forsyth		x.actdepth--;
14737da2899SCharles.Forsyth		item = nil;
14837da2899SCharles.Forsyth	EOF =>
14937da2899SCharles.Forsyth		x.actdepth = 0;
15037da2899SCharles.Forsyth		item = nil;
15137da2899SCharles.Forsyth	Error =>
15237da2899SCharles.Forsyth		x.actdepth = 0;			# XXX is this the right thing to do?
15337da2899SCharles.Forsyth		item = ref Item.Error(curroffset, currloc, x.errormsg);
15437da2899SCharles.Forsyth	Text =>
15537da2899SCharles.Forsyth		item = ref Item.Text(curroffset, p.ch, p.ws1, p.ws2);
15637da2899SCharles.Forsyth	Process =>
15737da2899SCharles.Forsyth		item = ref Item.Process(curroffset, p.target, p.data);
15837da2899SCharles.Forsyth	Empty =>
15937da2899SCharles.Forsyth		item = ref Item.Tag(curroffset, p.name, p.attrs);
16037da2899SCharles.Forsyth	Doctype =>
16137da2899SCharles.Forsyth		item = ref Item.Doctype(curroffset, p.name, p.public, p.params);
16237da2899SCharles.Forsyth	Stylesheet =>
16337da2899SCharles.Forsyth		item = ref Item.Stylesheet(curroffset, p.attrs);
16437da2899SCharles.Forsyth	}
16537da2899SCharles.Forsyth	x.fileoffset = int x.in.offset();
16637da2899SCharles.Forsyth	return item;
16737da2899SCharles.Forsyth}
16837da2899SCharles.Forsyth
16937da2899SCharles.ForsythParser.atmark(x: self ref Parser, m: ref Mark): int
17037da2899SCharles.Forsyth{
17137da2899SCharles.Forsyth	return  int x.in.offset() == m.offset;
17237da2899SCharles.Forsyth}
17337da2899SCharles.Forsyth
17437da2899SCharles.ForsythParser.down(x: self ref Parser)
17537da2899SCharles.Forsyth{
17637da2899SCharles.Forsyth	x.readdepth++;
17737da2899SCharles.Forsyth}
17837da2899SCharles.Forsyth
17937da2899SCharles.ForsythParser.up(x: self ref Parser)
18037da2899SCharles.Forsyth{
18137da2899SCharles.Forsyth	x.readdepth--;
18237da2899SCharles.Forsyth}
18337da2899SCharles.Forsyth
18437da2899SCharles.Forsyth# mark is only defined after a next(), not after up() or down().
18537da2899SCharles.Forsyth# this means that we don't have to record lots of state when going up or down levels.
18637da2899SCharles.ForsythParser.mark(x: self ref Parser): ref Mark
18737da2899SCharles.Forsyth{
18837da2899SCharles.Forsyth	return ref Mark(x.estack, x.loc.line, int x.in.offset(), x.readdepth);
18937da2899SCharles.Forsyth}
19037da2899SCharles.Forsyth
19137da2899SCharles.ForsythParser.goto(x: self ref Parser, m: ref Mark)
19237da2899SCharles.Forsyth{
19337da2899SCharles.Forsyth	x.in.seek(big m.offset, Sys->SEEKSTART);
19437da2899SCharles.Forsyth	x.fileoffset = m.offset;
19537da2899SCharles.Forsyth	x.eof = 0;
19637da2899SCharles.Forsyth	x.estack = m.estack;
19737da2899SCharles.Forsyth	x.loc.line = m.line;
19837da2899SCharles.Forsyth	x.readdepth = m.readdepth;
19937da2899SCharles.Forsyth	x.actdepth = len x.estack;
20037da2899SCharles.Forsyth}
20137da2899SCharles.Forsyth
20237da2899SCharles.ForsythMark.str(m: self ref Mark): string
20337da2899SCharles.Forsyth{
20437da2899SCharles.Forsyth	# assume that neither the filename nor any of the tags contain spaces.
20537da2899SCharles.Forsyth	# format:
20637da2899SCharles.Forsyth	# offset readdepth linenum [tag...]
20737da2899SCharles.Forsyth	# XXX would be nice if the produced string did not contain
20837da2899SCharles.Forsyth	# any spaces so it could be treated as a word in other contexts.
20937da2899SCharles.Forsyth	s := sys->sprint("%d %d %d", m.offset, m.readdepth, m.line);
21037da2899SCharles.Forsyth	for (t := m.estack; t != nil; t = tl t)
21137da2899SCharles.Forsyth		s += " " + hd t;
21237da2899SCharles.Forsyth	return s;
21337da2899SCharles.Forsyth}
21437da2899SCharles.Forsyth
21537da2899SCharles.ForsythParser.str2mark(p: self ref Parser, s: string): ref Mark
21637da2899SCharles.Forsyth{
21737da2899SCharles.Forsyth	(n, toks) := sys->tokenize(s, " ");
21837da2899SCharles.Forsyth	if (n < 3)
21937da2899SCharles.Forsyth		return nil;
22037da2899SCharles.Forsyth	m := ref Mark(nil, p.loc.line, 0, 0);
22137da2899SCharles.Forsyth	(m.offset, toks) = (int hd toks, tl toks);
22237da2899SCharles.Forsyth	(m.readdepth, toks) = (int hd toks, tl toks);
22337da2899SCharles.Forsyth	(m.line, toks) = (int hd toks, tl toks);
22437da2899SCharles.Forsyth	m.estack = toks;
22537da2899SCharles.Forsyth	return m;
22637da2899SCharles.Forsyth}
22737da2899SCharles.Forsyth
22837da2899SCharles.Forsythgetparcel(x: ref Parser): ref Parcel
22937da2899SCharles.Forsyth{
23037da2899SCharles.Forsyth	{
23137da2899SCharles.Forsyth		p: ref Parcel;
23237da2899SCharles.Forsyth		while (!x.eof && p == nil) {
23337da2899SCharles.Forsyth			c := getc(x);
23437da2899SCharles.Forsyth			if (c == '<')
23537da2899SCharles.Forsyth				p = element(x);
23637da2899SCharles.Forsyth			else {
23737da2899SCharles.Forsyth				ungetc(x);
23837da2899SCharles.Forsyth				p = characters(x);
23937da2899SCharles.Forsyth			}
24037da2899SCharles.Forsyth		}
24137da2899SCharles.Forsyth		if (p == nil)
24237da2899SCharles.Forsyth			p = ref Parcel.EOF;
24337da2899SCharles.Forsyth		return p;
244*6e425a9dSCharles.Forsyth	}exception e{
24537da2899SCharles.Forsyth	"sax:*" =>
24637da2899SCharles.Forsyth			return ref Parcel.Error(x.loc, x.errormsg);
24737da2899SCharles.Forsyth	}
24837da2899SCharles.Forsyth}
24937da2899SCharles.Forsyth
25037da2899SCharles.Forsythparcelstr(gi: ref Parcel): string
25137da2899SCharles.Forsyth{
25237da2899SCharles.Forsyth	if (gi == nil)
25337da2899SCharles.Forsyth		return "nil";
25437da2899SCharles.Forsyth	pick i := gi {
25537da2899SCharles.Forsyth	Start =>
25637da2899SCharles.Forsyth		return sys->sprint("Start: %s", i.name);
25737da2899SCharles.Forsyth	Empty =>
25837da2899SCharles.Forsyth		return sys->sprint("Empty: %s", i.name);
25937da2899SCharles.Forsyth	End =>
26037da2899SCharles.Forsyth		return "End";
26137da2899SCharles.Forsyth	Text =>
26237da2899SCharles.Forsyth		return "Text";
26337da2899SCharles.Forsyth	Doctype =>
26437da2899SCharles.Forsyth		return sys->sprint("Doctype: %s", i.name);
26537da2899SCharles.Forsyth	Stylesheet =>
26637da2899SCharles.Forsyth		return "Stylesheet";
26737da2899SCharles.Forsyth	Error =>
26837da2899SCharles.Forsyth		return "Error: " + i.msg;
26937da2899SCharles.Forsyth	EOF =>
27037da2899SCharles.Forsyth		return "EOF";
27137da2899SCharles.Forsyth	* =>
27237da2899SCharles.Forsyth		return "Unknown";
27337da2899SCharles.Forsyth	}
27437da2899SCharles.Forsyth}
27537da2899SCharles.Forsyth
27637da2899SCharles.Forsythelement(x: ref Parser): ref Parcel
27737da2899SCharles.Forsyth{
27837da2899SCharles.Forsyth	# <tag ...>
27937da2899SCharles.Forsyth	elemname := xmlname(x);
28037da2899SCharles.Forsyth	c: int;
28137da2899SCharles.Forsyth	if (elemname != "") {
28237da2899SCharles.Forsyth		attrs := buildattrs(x);
28337da2899SCharles.Forsyth		skipwhite(x);
28437da2899SCharles.Forsyth		c = getc(x);
28537da2899SCharles.Forsyth		isend := 0;
28637da2899SCharles.Forsyth		if (c == '/')
28737da2899SCharles.Forsyth			isend = 1;
28837da2899SCharles.Forsyth		else
28937da2899SCharles.Forsyth			ungetc(x);
29037da2899SCharles.Forsyth		expect(x, '>');
29137da2899SCharles.Forsyth
29237da2899SCharles.Forsyth		if (isend)
29337da2899SCharles.Forsyth			return ref Parcel.Empty(elemname, attrs);
29437da2899SCharles.Forsyth		else {
29537da2899SCharles.Forsyth			startelement(x, elemname);
29637da2899SCharles.Forsyth			return ref Parcel.Start(elemname, attrs);
29737da2899SCharles.Forsyth		}
29837da2899SCharles.Forsyth	# </tag>
29937da2899SCharles.Forsyth	} else if ((c = getc(x)) == '/') {
30037da2899SCharles.Forsyth		elemname = xmlname(x);
30137da2899SCharles.Forsyth		if (elemname != "") {
30237da2899SCharles.Forsyth			expect(x, '>');
30337da2899SCharles.Forsyth			endelement(x, elemname);
30437da2899SCharles.Forsyth			return ref Parcel.End(elemname);
30537da2899SCharles.Forsyth		}
30637da2899SCharles.Forsyth		else
30737da2899SCharles.Forsyth			error(x, sys->sprint("illegal beginning of tag: '%c'", c));
30837da2899SCharles.Forsyth	# <?tag ... ?>
30937da2899SCharles.Forsyth	} else if (c == '?') {
31037da2899SCharles.Forsyth		elemname = xmlname(x);
31137da2899SCharles.Forsyth		if (elemname != "") {
31237da2899SCharles.Forsyth			# this special case could be generalised if there were many
31337da2899SCharles.Forsyth			# processing instructions that took attributes like this.
31437da2899SCharles.Forsyth			if (elemname == "xml-stylesheet") {
31537da2899SCharles.Forsyth				attrs := buildattrs(x);
31637da2899SCharles.Forsyth				balancedstring(x, "?>");
31737da2899SCharles.Forsyth				return ref Parcel.Stylesheet(attrs);
31837da2899SCharles.Forsyth			} else {
31937da2899SCharles.Forsyth				data := balancedstring(x, "?>");
32037da2899SCharles.Forsyth				return ref Parcel.Process(elemname, data);
32137da2899SCharles.Forsyth			}
32237da2899SCharles.Forsyth		}
32337da2899SCharles.Forsyth	} else if (c == '!') {
32437da2899SCharles.Forsyth		c = getc(x);
32537da2899SCharles.Forsyth		case c {
32637da2899SCharles.Forsyth		'-' =>
32737da2899SCharles.Forsyth			# <!-- comment -->
32837da2899SCharles.Forsyth			if(getc(x) == '-'){
32937da2899SCharles.Forsyth				balancedstring(x, "-->");
33037da2899SCharles.Forsyth				return nil;
33137da2899SCharles.Forsyth			}
33237da2899SCharles.Forsyth		'[' =>
33337da2899SCharles.Forsyth			# <![CDATA[...]]
33437da2899SCharles.Forsyth			s := xmlname(x);
33537da2899SCharles.Forsyth			if(s == "CDATA" && getc(x) == '['){
33637da2899SCharles.Forsyth				data := balancedstring(x, "]]>");
33737da2899SCharles.Forsyth				return ref Parcel.Text(data, 0, 0);
33837da2899SCharles.Forsyth			}
33937da2899SCharles.Forsyth		* =>
34037da2899SCharles.Forsyth			# <!declaration
34137da2899SCharles.Forsyth			ungetc(x);
34237da2899SCharles.Forsyth			s := xmlname(x);
34337da2899SCharles.Forsyth			case s {
34437da2899SCharles.Forsyth			"DOCTYPE" =>
34537da2899SCharles.Forsyth				# <!DOCTYPE name (SYSTEM "filename" | PUBLIC "pubid" "uri"?)? ("[" decls "]")?>
34637da2899SCharles.Forsyth				skipwhite(x);
34737da2899SCharles.Forsyth				name := xmlname(x);
34837da2899SCharles.Forsyth				if(name == nil)
34937da2899SCharles.Forsyth					break;
35037da2899SCharles.Forsyth				id := "";
35137da2899SCharles.Forsyth				uri := "";
35237da2899SCharles.Forsyth				public := 0;
35337da2899SCharles.Forsyth				skipwhite(x);
35437da2899SCharles.Forsyth				case sort := xmlname(x) {
35537da2899SCharles.Forsyth				"SYSTEM" =>
35637da2899SCharles.Forsyth					id = xmlstring(x, 1);
35737da2899SCharles.Forsyth				"PUBLIC" =>
35837da2899SCharles.Forsyth					public = 1;
35937da2899SCharles.Forsyth					id = xmlstring(x, 1);
36037da2899SCharles.Forsyth					skipwhite(x);
36137da2899SCharles.Forsyth					c = getc(x);
36237da2899SCharles.Forsyth					ungetc(x);
36337da2899SCharles.Forsyth					if(c == '"' || c == '\'')
36437da2899SCharles.Forsyth						uri = xmlstring(x, 1);
36537da2899SCharles.Forsyth				* =>
36637da2899SCharles.Forsyth					error(x, sys->sprint("unknown DOCTYPE: %s", sort));
36737da2899SCharles.Forsyth					return nil;
36837da2899SCharles.Forsyth				}
36937da2899SCharles.Forsyth				skipwhite(x);
37037da2899SCharles.Forsyth				if(getc(x) == '['){
37137da2899SCharles.Forsyth					error(x, "cannot handle DOCTYPE with declarations");
37237da2899SCharles.Forsyth					return nil;
37337da2899SCharles.Forsyth				}
37437da2899SCharles.Forsyth				ungetc(x);
37537da2899SCharles.Forsyth				skipwhite(x);
37637da2899SCharles.Forsyth				if(getc(x) == '>')
37737da2899SCharles.Forsyth					return ref Parcel.Doctype(name, public, id :: uri :: nil);
37837da2899SCharles.Forsyth			"ELEMENT" or "ATTRLIST" or "NOTATION" or "ENTITY" =>
37937da2899SCharles.Forsyth				# don't interpret internal DTDs
38037da2899SCharles.Forsyth				# <!ENTITY name ("value" | SYSTEM "filename")>
38137da2899SCharles.Forsyth				s = gets(x, '>');
38237da2899SCharles.Forsyth				if(s == nil || s[len s-1] != '>')
38337da2899SCharles.Forsyth					error(x, "end of file in declaration");
38437da2899SCharles.Forsyth				return nil;
38537da2899SCharles.Forsyth			* =>
38637da2899SCharles.Forsyth				error(x, sys->sprint("unknown declaration: %s", s));
38737da2899SCharles.Forsyth			}
38837da2899SCharles.Forsyth		}
38937da2899SCharles.Forsyth		error(x, "invalid XML declaration");
39037da2899SCharles.Forsyth	} else
39137da2899SCharles.Forsyth		error(x, sys->sprint("illegal beginning of tag: %c", c));
39237da2899SCharles.Forsyth	return nil;
39337da2899SCharles.Forsyth}
39437da2899SCharles.Forsyth
39537da2899SCharles.Forsythcharacters(x: ref Parser): ref Parcel
39637da2899SCharles.Forsyth{
39737da2899SCharles.Forsyth	p: ref Parcel;
39837da2899SCharles.Forsyth	content := gets(x, '<');
39937da2899SCharles.Forsyth	if (len content > 0) {
40037da2899SCharles.Forsyth		if (content[len content - 1] == '<') {
40137da2899SCharles.Forsyth			ungetc(x);
40237da2899SCharles.Forsyth			content = content[0:len content - 1];
40337da2899SCharles.Forsyth		}
40437da2899SCharles.Forsyth		ws1, ws2: int;
40537da2899SCharles.Forsyth		if (x.ispre) {
40637da2899SCharles.Forsyth			content = substituteentities(x, content);
40737da2899SCharles.Forsyth			ws1 = ws2 = 0;
40837da2899SCharles.Forsyth		} else
40937da2899SCharles.Forsyth			(content, ws1, ws2) = substituteentities_sp(x, content);
41037da2899SCharles.Forsyth		if (content != nil || ws1)
41137da2899SCharles.Forsyth			p = ref Parcel.Text(content, ws1, ws2);
41237da2899SCharles.Forsyth	}
41337da2899SCharles.Forsyth	return p;
41437da2899SCharles.Forsyth}
41537da2899SCharles.Forsyth
41637da2899SCharles.Forsythstartelement(x: ref Parser, name: string)
41737da2899SCharles.Forsyth{
41837da2899SCharles.Forsyth	x.estack = name :: x.estack;
41937da2899SCharles.Forsyth	if (name == x.preelem)
42037da2899SCharles.Forsyth		x.ispre++;
42137da2899SCharles.Forsyth}
42237da2899SCharles.Forsyth
42337da2899SCharles.Forsythendelement(x: ref Parser, name: string)
42437da2899SCharles.Forsyth{
42537da2899SCharles.Forsyth	if (x.estack != nil && name == hd x.estack) {
42637da2899SCharles.Forsyth		x.estack = tl x.estack;
42737da2899SCharles.Forsyth		if (name == x.preelem)
42837da2899SCharles.Forsyth			x.ispre--;
42937da2899SCharles.Forsyth	} else {
43037da2899SCharles.Forsyth		starttag := "";
43137da2899SCharles.Forsyth		if (x.estack != nil)
43237da2899SCharles.Forsyth			starttag = hd x.estack;
43337da2899SCharles.Forsyth		warning(x, sys->sprint("<%s></%s> mismatch", starttag, name));
43437da2899SCharles.Forsyth
43537da2899SCharles.Forsyth		# invalid XML but try to recover anyway to reduce turnaround time on fixing errors.
43637da2899SCharles.Forsyth		# loop back up through the tag stack to see if there's a matching tag, in which case
43737da2899SCharles.Forsyth		# jump up in the stack to that, making some rude assumptions about the
43837da2899SCharles.Forsyth		# way Parcels are handled at the top level.
43937da2899SCharles.Forsyth		n := 0;
44037da2899SCharles.Forsyth		for (t := x.estack; t != nil; (t, n) = (tl t, n + 1))
44137da2899SCharles.Forsyth			if (hd t == name)
44237da2899SCharles.Forsyth				break;
44337da2899SCharles.Forsyth		if (t != nil) {
44437da2899SCharles.Forsyth			x.estack = tl t;
44537da2899SCharles.Forsyth			x.actdepth -= n;
44637da2899SCharles.Forsyth		}
44737da2899SCharles.Forsyth	}
44837da2899SCharles.Forsyth}
44937da2899SCharles.Forsyth
45037da2899SCharles.Forsythbuildattrs(x: ref Parser): Attributes
45137da2899SCharles.Forsyth{
45237da2899SCharles.Forsyth	attrs: list of Attribute;
45337da2899SCharles.Forsyth
45437da2899SCharles.Forsyth	attr: Attribute;
45537da2899SCharles.Forsyth	for (;;) {
45637da2899SCharles.Forsyth		skipwhite(x);
45737da2899SCharles.Forsyth		attr.name = xmlname(x);
45837da2899SCharles.Forsyth		if (attr.name == nil)
45937da2899SCharles.Forsyth			break;
46037da2899SCharles.Forsyth		skipwhite(x);
46137da2899SCharles.Forsyth		c := getc(x);
46237da2899SCharles.Forsyth		if(c != '='){
46337da2899SCharles.Forsyth			ungetc(x);
46437da2899SCharles.Forsyth			attr.value = nil;
46537da2899SCharles.Forsyth		}else
46637da2899SCharles.Forsyth			attr.value = xmlstring(x, 1);
46737da2899SCharles.Forsyth		attrs = attr :: attrs;
46837da2899SCharles.Forsyth	}
46937da2899SCharles.Forsyth	return Attributes(attrs);
47037da2899SCharles.Forsyth}
47137da2899SCharles.Forsyth
47237da2899SCharles.Forsythxmlstring(x: ref Parser, dosub: int): string
47337da2899SCharles.Forsyth{
47437da2899SCharles.Forsyth	skipwhite(x);
47537da2899SCharles.Forsyth	s := "";
47637da2899SCharles.Forsyth	delim := getc(x);
47737da2899SCharles.Forsyth	if (delim == '\"' || delim == '\'') {
47837da2899SCharles.Forsyth		s = gets(x, delim);
47937da2899SCharles.Forsyth		n := len s;
48037da2899SCharles.Forsyth		if (n == 0 || s[n-1] != delim)
48137da2899SCharles.Forsyth			error(x, "unclosed string at end of file");
48237da2899SCharles.Forsyth		s = s[0:n-1];	# TO DO: avoid copy
48337da2899SCharles.Forsyth		if(dosub)
48437da2899SCharles.Forsyth			s = substituteentities(x, s);
48537da2899SCharles.Forsyth	} else
48637da2899SCharles.Forsyth		error(x, sys->sprint("illegal string delimiter: %c", delim));
48737da2899SCharles.Forsyth	return s;
48837da2899SCharles.Forsyth}
48937da2899SCharles.Forsyth
49037da2899SCharles.Forsythxmlname(x: ref Parser): string
49137da2899SCharles.Forsyth{
49237da2899SCharles.Forsyth	name := "";
49337da2899SCharles.Forsyth	ch := getc(x);
49437da2899SCharles.Forsyth	case ch {
49537da2899SCharles.Forsyth	'_' or ':' or
49637da2899SCharles.Forsyth	'a' to 'z' or
49737da2899SCharles.Forsyth	'A' to 'Z' or
49837da2899SCharles.Forsyth	16r100 to 16rd7ff or
49937da2899SCharles.Forsyth	16re000 or 16rfffd =>
50037da2899SCharles.Forsyth		name[0] = ch;
50137da2899SCharles.Forsythloop:
50237da2899SCharles.Forsyth		for (;;) {
50337da2899SCharles.Forsyth			case ch = getc(x) {
50437da2899SCharles.Forsyth			'_' or '-' or ':' or '.' or
50537da2899SCharles.Forsyth			'a' to 'z' or
50637da2899SCharles.Forsyth			'0' to '9' or
50737da2899SCharles.Forsyth			'A' to 'Z' or
50837da2899SCharles.Forsyth			16r100 to 16rd7ff or
50937da2899SCharles.Forsyth			16re000 to 16rfffd =>
51037da2899SCharles.Forsyth				name[len name] = ch;
51137da2899SCharles.Forsyth			* =>
51237da2899SCharles.Forsyth				break loop;
51337da2899SCharles.Forsyth			}
51437da2899SCharles.Forsyth		}
51537da2899SCharles.Forsyth	}
51637da2899SCharles.Forsyth	ungetc(x);
51737da2899SCharles.Forsyth	return name;
51837da2899SCharles.Forsyth}
51937da2899SCharles.Forsyth
52037da2899SCharles.Forsythsubstituteentities(x: ref Parser, buff: string): string
52137da2899SCharles.Forsyth{
52237da2899SCharles.Forsyth	i := 0;
52337da2899SCharles.Forsyth	while (i < len buff) {
52437da2899SCharles.Forsyth		if (buff[i] == '&') {
52537da2899SCharles.Forsyth			(t, j) := translateentity(x, buff, i);
52637da2899SCharles.Forsyth			# XXX could be quicker
52737da2899SCharles.Forsyth			buff = buff[0:i] + t + buff[j:];
52837da2899SCharles.Forsyth			i += len t;
52937da2899SCharles.Forsyth		} else
53037da2899SCharles.Forsyth			i++;
53137da2899SCharles.Forsyth	}
53237da2899SCharles.Forsyth	return buff;
53337da2899SCharles.Forsyth}
53437da2899SCharles.Forsyth
53537da2899SCharles.Forsyth# subsitute entities, squashing whitespace along the way.
53637da2899SCharles.Forsythsubstituteentities_sp(x: ref Parser, buf: string): (string, int, int)
53737da2899SCharles.Forsyth{
53837da2899SCharles.Forsyth	firstwhite := 0;
53937da2899SCharles.Forsyth	# skip initial white space
54037da2899SCharles.Forsyth	for (i := 0; i < len buf; i++) {
54137da2899SCharles.Forsyth		c := buf[i];
54237da2899SCharles.Forsyth		if (c != ' ' && c != '\t' && c != '\n' && c != '\r')
54337da2899SCharles.Forsyth			break;
54437da2899SCharles.Forsyth		firstwhite = 1;
54537da2899SCharles.Forsyth	}
54637da2899SCharles.Forsyth
54737da2899SCharles.Forsyth	lastwhite := 0;
54837da2899SCharles.Forsyth	s := "";
54937da2899SCharles.Forsyth	for (; i < len buf; i++) {
55037da2899SCharles.Forsyth		c := buf[i];
55137da2899SCharles.Forsyth		if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
55237da2899SCharles.Forsyth			lastwhite = 1;
55337da2899SCharles.Forsyth		else {
55437da2899SCharles.Forsyth			if (lastwhite) {
55537da2899SCharles.Forsyth				s[len s] = ' ';
55637da2899SCharles.Forsyth				lastwhite = 0;
55737da2899SCharles.Forsyth			}
55837da2899SCharles.Forsyth			if (c == '&') {
55937da2899SCharles.Forsyth				# should &x20; count as whitespace?
56037da2899SCharles.Forsyth				(ent, j) := translateentity(x, buf, i);
56137da2899SCharles.Forsyth				i = j - 1;
56237da2899SCharles.Forsyth				s += ent;
56337da2899SCharles.Forsyth			} else
56437da2899SCharles.Forsyth				s[len s] = c;
56537da2899SCharles.Forsyth		}
56637da2899SCharles.Forsyth	}
56737da2899SCharles.Forsyth	return (s, firstwhite, lastwhite);
56837da2899SCharles.Forsyth}
56937da2899SCharles.Forsyth
57037da2899SCharles.Forsythtranslateentity(x: ref Parser, s: string, i: int): (string, int)
57137da2899SCharles.Forsyth{
57237da2899SCharles.Forsyth	i++;
57337da2899SCharles.Forsyth	for (j := i; j < len s; j++)
57437da2899SCharles.Forsyth		if (s[j] == ';')
57537da2899SCharles.Forsyth			break;
57637da2899SCharles.Forsyth	ent := s[i:j];
57737da2899SCharles.Forsyth	if (j == len s) {
57837da2899SCharles.Forsyth		if (len ent > 10)
57937da2899SCharles.Forsyth			ent = ent[0:11] + "...";
58037da2899SCharles.Forsyth		warning(x, sys->sprint("missing ; at end of entity (&%s)", ent));
58137da2899SCharles.Forsyth		return (nil, i);
58237da2899SCharles.Forsyth	}
58337da2899SCharles.Forsyth	j++;
58437da2899SCharles.Forsyth	if (ent == nil) {
58537da2899SCharles.Forsyth		warning(x, "empty entity");
58637da2899SCharles.Forsyth		return ("", j);
58737da2899SCharles.Forsyth	}
58837da2899SCharles.Forsyth	if (ent[0] == '#') {
58937da2899SCharles.Forsyth		n: int;
59037da2899SCharles.Forsyth		rem := ent;
59137da2899SCharles.Forsyth		if (len ent >= 3 && ent[1] == 'x')
59237da2899SCharles.Forsyth			(n, rem) = str->toint(ent[2:], 16);
59337da2899SCharles.Forsyth		else if (len ent >= 2)
59437da2899SCharles.Forsyth			(n, rem) = str->toint(ent[1:], 10);
59537da2899SCharles.Forsyth		if (rem != nil) {
59637da2899SCharles.Forsyth			warning(x, sys->sprint("unrecognized entity (&%s)", ent));
59737da2899SCharles.Forsyth			return (nil, j);
59837da2899SCharles.Forsyth		}
59937da2899SCharles.Forsyth		ch: string = nil;
60037da2899SCharles.Forsyth		ch[0] = n;
60137da2899SCharles.Forsyth		return (ch, j);
60237da2899SCharles.Forsyth	}
60337da2899SCharles.Forsyth	hv := entdict.find(ent);
60437da2899SCharles.Forsyth	if (hv == nil) {
60537da2899SCharles.Forsyth		warning(x, sys->sprint("unrecognized entity (&%s)", ent));
60637da2899SCharles.Forsyth		return (nil, j);
60737da2899SCharles.Forsyth	}
60837da2899SCharles.Forsyth	return (hv.s, j);
60937da2899SCharles.Forsyth}
61037da2899SCharles.Forsyth
61137da2899SCharles.Forsythbalancedstring(x: ref Parser, eos: string): string
61237da2899SCharles.Forsyth{
61337da2899SCharles.Forsyth	s := "";
61437da2899SCharles.Forsyth	instring := 0;
61537da2899SCharles.Forsyth	quote: int;
61637da2899SCharles.Forsyth
61737da2899SCharles.Forsyth	for (i := 0; i < len eos; i++)
61837da2899SCharles.Forsyth		s[len s] = ' ';
61937da2899SCharles.Forsyth
62037da2899SCharles.Forsyth	skipwhite(x);
62137da2899SCharles.Forsyth	while ((c := getc(x)) != Bufio->EOF) {
62237da2899SCharles.Forsyth		s[len s] = c;
62337da2899SCharles.Forsyth		if (instring) {
62437da2899SCharles.Forsyth			if (c == quote)
62537da2899SCharles.Forsyth				instring = 0;
62637da2899SCharles.Forsyth		} else if (c == '\"' || c == '\'') {
62737da2899SCharles.Forsyth			quote = c;
62837da2899SCharles.Forsyth			instring = 1;
62937da2899SCharles.Forsyth		} else if (s[len s - len eos : len s] == eos)
63037da2899SCharles.Forsyth			return s[len eos : len s - len eos];
63137da2899SCharles.Forsyth	}
63237da2899SCharles.Forsyth	error(x, sys->sprint("unexpected end of file while looking for \"%s\"", eos));
63337da2899SCharles.Forsyth	return "";
63437da2899SCharles.Forsyth}
63537da2899SCharles.Forsyth
63637da2899SCharles.Forsythskipwhite(x: ref Parser)
63737da2899SCharles.Forsyth{
63837da2899SCharles.Forsyth	while ((c := getc(x)) == ' ' || c == '\t' || c == '\n' || c == '\r')
63937da2899SCharles.Forsyth		;
64037da2899SCharles.Forsyth	ungetc(x);
64137da2899SCharles.Forsyth}
64237da2899SCharles.Forsyth
64337da2899SCharles.Forsythexpectwhite(x: ref Parser)
64437da2899SCharles.Forsyth{
64537da2899SCharles.Forsyth	if ((c := getc(x)) != ' ' && c != '\t' && c != '\n' && c != '\r')
64637da2899SCharles.Forsyth		error(x, "expecting white space");
64737da2899SCharles.Forsyth	skipwhite(x);
64837da2899SCharles.Forsyth}
64937da2899SCharles.Forsyth
65037da2899SCharles.Forsythexpect(x: ref Parser, ch: int)
65137da2899SCharles.Forsyth{
65237da2899SCharles.Forsyth	skipwhite(x);
65337da2899SCharles.Forsyth	c := getc(x);
65437da2899SCharles.Forsyth	if (c != ch)
65537da2899SCharles.Forsyth		error(x, sys->sprint("expecting %c", ch));
65637da2899SCharles.Forsyth}
65737da2899SCharles.Forsyth
65837da2899SCharles.Forsythgetc(x: ref Parser): int
65937da2899SCharles.Forsyth{
66037da2899SCharles.Forsyth	if (x.eof)
66137da2899SCharles.Forsyth		return Bufio->EOF;
66237da2899SCharles.Forsyth	ch := x.in.getc();
66337da2899SCharles.Forsyth	if (ch == Bufio->EOF)
66437da2899SCharles.Forsyth		x.eof = 1;
66537da2899SCharles.Forsyth	else if (ch == '\n')
66637da2899SCharles.Forsyth		x.loc.line++;
66737da2899SCharles.Forsyth	x.lastnl = ch == '\n';
66837da2899SCharles.Forsyth	return ch;
66937da2899SCharles.Forsyth}
67037da2899SCharles.Forsyth
67137da2899SCharles.Forsythgets(x: ref Parser, delim: int): string
67237da2899SCharles.Forsyth{
67337da2899SCharles.Forsyth	if (x.eof)
67437da2899SCharles.Forsyth		return "";
67537da2899SCharles.Forsyth	s := x.in.gets(delim);
67637da2899SCharles.Forsyth	for (i := 0; i < len s; i++)
67737da2899SCharles.Forsyth		if (s[i] == '\n')
67837da2899SCharles.Forsyth			x.loc.line++;
67937da2899SCharles.Forsyth	if (s == "")
68037da2899SCharles.Forsyth		x.eof = 1;
68137da2899SCharles.Forsyth	else
68237da2899SCharles.Forsyth		x.lastnl = s[len s - 1] == '\n';
68337da2899SCharles.Forsyth	return s;
68437da2899SCharles.Forsyth}
68537da2899SCharles.Forsyth
68637da2899SCharles.Forsythungetc(x: ref Parser)
68737da2899SCharles.Forsyth{
68837da2899SCharles.Forsyth	if (x.eof)
68937da2899SCharles.Forsyth		return;
69037da2899SCharles.Forsyth	x.in.ungetc();
69137da2899SCharles.Forsyth	x.loc.line -= x.lastnl;
69237da2899SCharles.Forsyth}
69337da2899SCharles.Forsyth
69437da2899SCharles.ForsythAttributes.all(al: self Attributes): list of Attribute
69537da2899SCharles.Forsyth{
69637da2899SCharles.Forsyth	return al.attrs;
69737da2899SCharles.Forsyth}
69837da2899SCharles.Forsyth
69937da2899SCharles.ForsythAttributes.get(attrs: self Attributes, name: string): string
70037da2899SCharles.Forsyth{
70137da2899SCharles.Forsyth	for (a := attrs.attrs; a != nil; a = tl a)
70237da2899SCharles.Forsyth		if ((hd a).name == name)
70337da2899SCharles.Forsyth			return (hd a).value;
70437da2899SCharles.Forsyth	return nil;
70537da2899SCharles.Forsyth}
70637da2899SCharles.Forsyth
70737da2899SCharles.Forsythwarning(x: ref Parser, msg: string)
70837da2899SCharles.Forsyth{
70937da2899SCharles.Forsyth	if (x.warning != nil)
71037da2899SCharles.Forsyth		x.warning <-= (x.loc, msg);
71137da2899SCharles.Forsyth}
71237da2899SCharles.Forsyth
71337da2899SCharles.Forsytherror(x: ref Parser, msg: string)
71437da2899SCharles.Forsyth{
71537da2899SCharles.Forsyth	x.errormsg = msg;
71637da2899SCharles.Forsyth	raise "sax:error";
71737da2899SCharles.Forsyth}
718