137da2899SCharles.Forsythimplement Xml; 237da2899SCharles.Forsyth 337da2899SCharles.Forsyth# 437da2899SCharles.Forsyth# Portions copyright © 2002 Vita Nuova Holdings Limited 537da2899SCharles.Forsyth# 637da2899SCharles.Forsyth# 737da2899SCharles.Forsyth# Derived from saxparser.b Copyright © 2001-2002 by John Powers or his employer 837da2899SCharles.Forsyth# 937da2899SCharles.Forsyth 1037da2899SCharles.Forsyth# TO DO: 1137da2899SCharles.Forsyth# - provide a way of getting attributes out of <?...?> (process) requests, 1237da2899SCharles.Forsyth# so that we can process stylesheet requests given in that way. 1337da2899SCharles.Forsyth 1437da2899SCharles.Forsythinclude "sys.m"; 1537da2899SCharles.Forsyth sys: Sys; 1637da2899SCharles.Forsythinclude "bufio.m"; 1737da2899SCharles.Forsyth bufio: Bufio; 1837da2899SCharles.Forsyth Iobuf: import bufio; 1937da2899SCharles.Forsythinclude "string.m"; 2037da2899SCharles.Forsyth str: String; 2137da2899SCharles.Forsythinclude "hash.m"; 2237da2899SCharles.Forsyth hash: Hash; 2337da2899SCharles.Forsyth HashTable: import hash; 2437da2899SCharles.Forsythinclude "xml.m"; 2537da2899SCharles.Forsyth 2637da2899SCharles.ForsythParcel: adt { 2737da2899SCharles.Forsyth pick { 2837da2899SCharles.Forsyth Start or 2937da2899SCharles.Forsyth Empty => 3037da2899SCharles.Forsyth name: string; 3137da2899SCharles.Forsyth attrs: Attributes; 3237da2899SCharles.Forsyth End => 3337da2899SCharles.Forsyth name: string; 3437da2899SCharles.Forsyth Text => 3537da2899SCharles.Forsyth ch: string; 3637da2899SCharles.Forsyth ws1, ws2: int; 3737da2899SCharles.Forsyth Process => 3837da2899SCharles.Forsyth target: string; 3937da2899SCharles.Forsyth data: string; 4037da2899SCharles.Forsyth Error => 4137da2899SCharles.Forsyth loc: Locator; 4237da2899SCharles.Forsyth msg: string; 4337da2899SCharles.Forsyth Doctype => 4437da2899SCharles.Forsyth name: string; 4537da2899SCharles.Forsyth public: int; 4637da2899SCharles.Forsyth params: list of string; 4737da2899SCharles.Forsyth Stylesheet => 4837da2899SCharles.Forsyth attrs: Attributes; 4937da2899SCharles.Forsyth EOF => 5037da2899SCharles.Forsyth } 5137da2899SCharles.Forsyth}; 5237da2899SCharles.Forsyth 5337da2899SCharles.Forsythentinit := array[] of { 5437da2899SCharles.Forsyth ("AElig", "Æ"), 5537da2899SCharles.Forsyth ("OElig", "Œ"), 5637da2899SCharles.Forsyth ("aelig", "æ"), 5737da2899SCharles.Forsyth ("amp", "&"), 5837da2899SCharles.Forsyth ("apos", "\'"), 5937da2899SCharles.Forsyth ("copy", "©"), 6037da2899SCharles.Forsyth ("gt", ">"), 6137da2899SCharles.Forsyth ("ldquo", "``"), 6237da2899SCharles.Forsyth ("lt", "<"), 6337da2899SCharles.Forsyth ("mdash", "-"), # XXX ?? 6437da2899SCharles.Forsyth ("oelig", "œ"), 6537da2899SCharles.Forsyth ("quot", "\""), 6637da2899SCharles.Forsyth ("rdquo", "''"), 6737da2899SCharles.Forsyth ("rsquo", "'"), 6837da2899SCharles.Forsyth ("trade", "™"), 6937da2899SCharles.Forsyth ("nbsp", "\u00a0"), 7037da2899SCharles.Forsyth}; 7137da2899SCharles.Forsythentdict: ref HashTable; 7237da2899SCharles.Forsyth 7337da2899SCharles.Forsythinit(): string 7437da2899SCharles.Forsyth{ 7537da2899SCharles.Forsyth sys = load Sys Sys->PATH; 7637da2899SCharles.Forsyth bufio = load Bufio Bufio->PATH; 7737da2899SCharles.Forsyth if (bufio == nil) 7837da2899SCharles.Forsyth return sys->sprint("cannot load %s: %r", Bufio->PATH); 7937da2899SCharles.Forsyth str = load String String->PATH; 8037da2899SCharles.Forsyth if (str == nil) 8137da2899SCharles.Forsyth return sys->sprint("cannot load %s: %r", String->PATH); 8237da2899SCharles.Forsyth hash = load Hash Hash->PATH; 8337da2899SCharles.Forsyth if (hash == nil) 8437da2899SCharles.Forsyth return sys->sprint("cannot load %s: %r", Hash->PATH); 8537da2899SCharles.Forsyth entdict = hash->new(23); 8637da2899SCharles.Forsyth for (i := 0; i < len entinit; i += 1) { 8737da2899SCharles.Forsyth (key, value) := entinit[i]; 8837da2899SCharles.Forsyth entdict.insert(key, (0, 0.0, value)); 8937da2899SCharles.Forsyth } 9037da2899SCharles.Forsyth return nil; 9137da2899SCharles.Forsyth} 9237da2899SCharles.Forsyth 9337da2899SCharles.Forsythblankparser: Parser; 9437da2899SCharles.Forsyth 9537da2899SCharles.Forsythopen(srcfile: string, warning: chan of (Locator, string), preelem: string): (ref Parser, string) 9637da2899SCharles.Forsyth{ 97*6e425a9dSCharles.Forsyth fd := bufio->open(srcfile, Bufio->OREAD); 98*6e425a9dSCharles.Forsyth if(fd == nil) 9937da2899SCharles.Forsyth return (nil, sys->sprint("cannot open %s: %r", srcfile)); 100*6e425a9dSCharles.Forsyth return fopen(fd, srcfile, warning, preelem); 101*6e425a9dSCharles.Forsyth} 102*6e425a9dSCharles.Forsyth 103*6e425a9dSCharles.Forsythfopen(fd: ref Bufio->Iobuf, name: string, warning: chan of (Locator, string), preelem: string): (ref Parser, string) 104*6e425a9dSCharles.Forsyth{ 105*6e425a9dSCharles.Forsyth x := ref blankparser; 106*6e425a9dSCharles.Forsyth x.in = fd; 107*6e425a9dSCharles.Forsyth # ignore utf16 initialisation character (yuck) 10837da2899SCharles.Forsyth c := x.in.getc(); 10937da2899SCharles.Forsyth if (c != 16rfffe && c != 16rfeff) 11037da2899SCharles.Forsyth x.in.ungetc(); 11137da2899SCharles.Forsyth x.estack = nil; 112*6e425a9dSCharles.Forsyth x.loc = Locator(1, name, ""); 11337da2899SCharles.Forsyth x.warning = warning; 11437da2899SCharles.Forsyth x.preelem = preelem; 11537da2899SCharles.Forsyth return (x, ""); 11637da2899SCharles.Forsyth} 11737da2899SCharles.Forsyth 11837da2899SCharles.ForsythParser.next(x: self ref Parser): ref Item 11937da2899SCharles.Forsyth{ 12037da2899SCharles.Forsyth curroffset := x.fileoffset; 12137da2899SCharles.Forsyth currloc := x.loc; 12237da2899SCharles.Forsyth # read up until end of current item 12337da2899SCharles.Forsyth while (x.actdepth > x.readdepth) { 12437da2899SCharles.Forsyth pick p := getparcel(x) { 12537da2899SCharles.Forsyth Start => 12637da2899SCharles.Forsyth x.actdepth++; 12737da2899SCharles.Forsyth End => 12837da2899SCharles.Forsyth x.actdepth--; 12937da2899SCharles.Forsyth EOF => 13037da2899SCharles.Forsyth x.actdepth = 0; # premature EOF closes all tags 13137da2899SCharles.Forsyth Error => 13237da2899SCharles.Forsyth return ref Item.Error(curroffset, x.loc, x.errormsg); 13337da2899SCharles.Forsyth } 13437da2899SCharles.Forsyth } 13537da2899SCharles.Forsyth if (x.actdepth < x.readdepth) { 13637da2899SCharles.Forsyth x.fileoffset = int x.in.offset(); 13737da2899SCharles.Forsyth return nil; 13837da2899SCharles.Forsyth } 13937da2899SCharles.Forsyth gp := getparcel(x); 14037da2899SCharles.Forsyth item: ref Item; 14137da2899SCharles.Forsyth pick p := gp { 14237da2899SCharles.Forsyth Start => 14337da2899SCharles.Forsyth x.actdepth++; 14437da2899SCharles.Forsyth item = ref Item.Tag(curroffset, p.name, p.attrs); 14537da2899SCharles.Forsyth End => 14637da2899SCharles.Forsyth x.actdepth--; 14737da2899SCharles.Forsyth item = nil; 14837da2899SCharles.Forsyth EOF => 14937da2899SCharles.Forsyth x.actdepth = 0; 15037da2899SCharles.Forsyth item = nil; 15137da2899SCharles.Forsyth Error => 15237da2899SCharles.Forsyth x.actdepth = 0; # XXX is this the right thing to do? 15337da2899SCharles.Forsyth item = ref Item.Error(curroffset, currloc, x.errormsg); 15437da2899SCharles.Forsyth Text => 15537da2899SCharles.Forsyth item = ref Item.Text(curroffset, p.ch, p.ws1, p.ws2); 15637da2899SCharles.Forsyth Process => 15737da2899SCharles.Forsyth item = ref Item.Process(curroffset, p.target, p.data); 15837da2899SCharles.Forsyth Empty => 15937da2899SCharles.Forsyth item = ref Item.Tag(curroffset, p.name, p.attrs); 16037da2899SCharles.Forsyth Doctype => 16137da2899SCharles.Forsyth item = ref Item.Doctype(curroffset, p.name, p.public, p.params); 16237da2899SCharles.Forsyth Stylesheet => 16337da2899SCharles.Forsyth item = ref Item.Stylesheet(curroffset, p.attrs); 16437da2899SCharles.Forsyth } 16537da2899SCharles.Forsyth x.fileoffset = int x.in.offset(); 16637da2899SCharles.Forsyth return item; 16737da2899SCharles.Forsyth} 16837da2899SCharles.Forsyth 16937da2899SCharles.ForsythParser.atmark(x: self ref Parser, m: ref Mark): int 17037da2899SCharles.Forsyth{ 17137da2899SCharles.Forsyth return int x.in.offset() == m.offset; 17237da2899SCharles.Forsyth} 17337da2899SCharles.Forsyth 17437da2899SCharles.ForsythParser.down(x: self ref Parser) 17537da2899SCharles.Forsyth{ 17637da2899SCharles.Forsyth x.readdepth++; 17737da2899SCharles.Forsyth} 17837da2899SCharles.Forsyth 17937da2899SCharles.ForsythParser.up(x: self ref Parser) 18037da2899SCharles.Forsyth{ 18137da2899SCharles.Forsyth x.readdepth--; 18237da2899SCharles.Forsyth} 18337da2899SCharles.Forsyth 18437da2899SCharles.Forsyth# mark is only defined after a next(), not after up() or down(). 18537da2899SCharles.Forsyth# this means that we don't have to record lots of state when going up or down levels. 18637da2899SCharles.ForsythParser.mark(x: self ref Parser): ref Mark 18737da2899SCharles.Forsyth{ 18837da2899SCharles.Forsyth return ref Mark(x.estack, x.loc.line, int x.in.offset(), x.readdepth); 18937da2899SCharles.Forsyth} 19037da2899SCharles.Forsyth 19137da2899SCharles.ForsythParser.goto(x: self ref Parser, m: ref Mark) 19237da2899SCharles.Forsyth{ 19337da2899SCharles.Forsyth x.in.seek(big m.offset, Sys->SEEKSTART); 19437da2899SCharles.Forsyth x.fileoffset = m.offset; 19537da2899SCharles.Forsyth x.eof = 0; 19637da2899SCharles.Forsyth x.estack = m.estack; 19737da2899SCharles.Forsyth x.loc.line = m.line; 19837da2899SCharles.Forsyth x.readdepth = m.readdepth; 19937da2899SCharles.Forsyth x.actdepth = len x.estack; 20037da2899SCharles.Forsyth} 20137da2899SCharles.Forsyth 20237da2899SCharles.ForsythMark.str(m: self ref Mark): string 20337da2899SCharles.Forsyth{ 20437da2899SCharles.Forsyth # assume that neither the filename nor any of the tags contain spaces. 20537da2899SCharles.Forsyth # format: 20637da2899SCharles.Forsyth # offset readdepth linenum [tag...] 20737da2899SCharles.Forsyth # XXX would be nice if the produced string did not contain 20837da2899SCharles.Forsyth # any spaces so it could be treated as a word in other contexts. 20937da2899SCharles.Forsyth s := sys->sprint("%d %d %d", m.offset, m.readdepth, m.line); 21037da2899SCharles.Forsyth for (t := m.estack; t != nil; t = tl t) 21137da2899SCharles.Forsyth s += " " + hd t; 21237da2899SCharles.Forsyth return s; 21337da2899SCharles.Forsyth} 21437da2899SCharles.Forsyth 21537da2899SCharles.ForsythParser.str2mark(p: self ref Parser, s: string): ref Mark 21637da2899SCharles.Forsyth{ 21737da2899SCharles.Forsyth (n, toks) := sys->tokenize(s, " "); 21837da2899SCharles.Forsyth if (n < 3) 21937da2899SCharles.Forsyth return nil; 22037da2899SCharles.Forsyth m := ref Mark(nil, p.loc.line, 0, 0); 22137da2899SCharles.Forsyth (m.offset, toks) = (int hd toks, tl toks); 22237da2899SCharles.Forsyth (m.readdepth, toks) = (int hd toks, tl toks); 22337da2899SCharles.Forsyth (m.line, toks) = (int hd toks, tl toks); 22437da2899SCharles.Forsyth m.estack = toks; 22537da2899SCharles.Forsyth return m; 22637da2899SCharles.Forsyth} 22737da2899SCharles.Forsyth 22837da2899SCharles.Forsythgetparcel(x: ref Parser): ref Parcel 22937da2899SCharles.Forsyth{ 23037da2899SCharles.Forsyth { 23137da2899SCharles.Forsyth p: ref Parcel; 23237da2899SCharles.Forsyth while (!x.eof && p == nil) { 23337da2899SCharles.Forsyth c := getc(x); 23437da2899SCharles.Forsyth if (c == '<') 23537da2899SCharles.Forsyth p = element(x); 23637da2899SCharles.Forsyth else { 23737da2899SCharles.Forsyth ungetc(x); 23837da2899SCharles.Forsyth p = characters(x); 23937da2899SCharles.Forsyth } 24037da2899SCharles.Forsyth } 24137da2899SCharles.Forsyth if (p == nil) 24237da2899SCharles.Forsyth p = ref Parcel.EOF; 24337da2899SCharles.Forsyth return p; 244*6e425a9dSCharles.Forsyth }exception e{ 24537da2899SCharles.Forsyth "sax:*" => 24637da2899SCharles.Forsyth return ref Parcel.Error(x.loc, x.errormsg); 24737da2899SCharles.Forsyth } 24837da2899SCharles.Forsyth} 24937da2899SCharles.Forsyth 25037da2899SCharles.Forsythparcelstr(gi: ref Parcel): string 25137da2899SCharles.Forsyth{ 25237da2899SCharles.Forsyth if (gi == nil) 25337da2899SCharles.Forsyth return "nil"; 25437da2899SCharles.Forsyth pick i := gi { 25537da2899SCharles.Forsyth Start => 25637da2899SCharles.Forsyth return sys->sprint("Start: %s", i.name); 25737da2899SCharles.Forsyth Empty => 25837da2899SCharles.Forsyth return sys->sprint("Empty: %s", i.name); 25937da2899SCharles.Forsyth End => 26037da2899SCharles.Forsyth return "End"; 26137da2899SCharles.Forsyth Text => 26237da2899SCharles.Forsyth return "Text"; 26337da2899SCharles.Forsyth Doctype => 26437da2899SCharles.Forsyth return sys->sprint("Doctype: %s", i.name); 26537da2899SCharles.Forsyth Stylesheet => 26637da2899SCharles.Forsyth return "Stylesheet"; 26737da2899SCharles.Forsyth Error => 26837da2899SCharles.Forsyth return "Error: " + i.msg; 26937da2899SCharles.Forsyth EOF => 27037da2899SCharles.Forsyth return "EOF"; 27137da2899SCharles.Forsyth * => 27237da2899SCharles.Forsyth return "Unknown"; 27337da2899SCharles.Forsyth } 27437da2899SCharles.Forsyth} 27537da2899SCharles.Forsyth 27637da2899SCharles.Forsythelement(x: ref Parser): ref Parcel 27737da2899SCharles.Forsyth{ 27837da2899SCharles.Forsyth # <tag ...> 27937da2899SCharles.Forsyth elemname := xmlname(x); 28037da2899SCharles.Forsyth c: int; 28137da2899SCharles.Forsyth if (elemname != "") { 28237da2899SCharles.Forsyth attrs := buildattrs(x); 28337da2899SCharles.Forsyth skipwhite(x); 28437da2899SCharles.Forsyth c = getc(x); 28537da2899SCharles.Forsyth isend := 0; 28637da2899SCharles.Forsyth if (c == '/') 28737da2899SCharles.Forsyth isend = 1; 28837da2899SCharles.Forsyth else 28937da2899SCharles.Forsyth ungetc(x); 29037da2899SCharles.Forsyth expect(x, '>'); 29137da2899SCharles.Forsyth 29237da2899SCharles.Forsyth if (isend) 29337da2899SCharles.Forsyth return ref Parcel.Empty(elemname, attrs); 29437da2899SCharles.Forsyth else { 29537da2899SCharles.Forsyth startelement(x, elemname); 29637da2899SCharles.Forsyth return ref Parcel.Start(elemname, attrs); 29737da2899SCharles.Forsyth } 29837da2899SCharles.Forsyth # </tag> 29937da2899SCharles.Forsyth } else if ((c = getc(x)) == '/') { 30037da2899SCharles.Forsyth elemname = xmlname(x); 30137da2899SCharles.Forsyth if (elemname != "") { 30237da2899SCharles.Forsyth expect(x, '>'); 30337da2899SCharles.Forsyth endelement(x, elemname); 30437da2899SCharles.Forsyth return ref Parcel.End(elemname); 30537da2899SCharles.Forsyth } 30637da2899SCharles.Forsyth else 30737da2899SCharles.Forsyth error(x, sys->sprint("illegal beginning of tag: '%c'", c)); 30837da2899SCharles.Forsyth # <?tag ... ?> 30937da2899SCharles.Forsyth } else if (c == '?') { 31037da2899SCharles.Forsyth elemname = xmlname(x); 31137da2899SCharles.Forsyth if (elemname != "") { 31237da2899SCharles.Forsyth # this special case could be generalised if there were many 31337da2899SCharles.Forsyth # processing instructions that took attributes like this. 31437da2899SCharles.Forsyth if (elemname == "xml-stylesheet") { 31537da2899SCharles.Forsyth attrs := buildattrs(x); 31637da2899SCharles.Forsyth balancedstring(x, "?>"); 31737da2899SCharles.Forsyth return ref Parcel.Stylesheet(attrs); 31837da2899SCharles.Forsyth } else { 31937da2899SCharles.Forsyth data := balancedstring(x, "?>"); 32037da2899SCharles.Forsyth return ref Parcel.Process(elemname, data); 32137da2899SCharles.Forsyth } 32237da2899SCharles.Forsyth } 32337da2899SCharles.Forsyth } else if (c == '!') { 32437da2899SCharles.Forsyth c = getc(x); 32537da2899SCharles.Forsyth case c { 32637da2899SCharles.Forsyth '-' => 32737da2899SCharles.Forsyth # <!-- comment --> 32837da2899SCharles.Forsyth if(getc(x) == '-'){ 32937da2899SCharles.Forsyth balancedstring(x, "-->"); 33037da2899SCharles.Forsyth return nil; 33137da2899SCharles.Forsyth } 33237da2899SCharles.Forsyth '[' => 33337da2899SCharles.Forsyth # <![CDATA[...]] 33437da2899SCharles.Forsyth s := xmlname(x); 33537da2899SCharles.Forsyth if(s == "CDATA" && getc(x) == '['){ 33637da2899SCharles.Forsyth data := balancedstring(x, "]]>"); 33737da2899SCharles.Forsyth return ref Parcel.Text(data, 0, 0); 33837da2899SCharles.Forsyth } 33937da2899SCharles.Forsyth * => 34037da2899SCharles.Forsyth # <!declaration 34137da2899SCharles.Forsyth ungetc(x); 34237da2899SCharles.Forsyth s := xmlname(x); 34337da2899SCharles.Forsyth case s { 34437da2899SCharles.Forsyth "DOCTYPE" => 34537da2899SCharles.Forsyth # <!DOCTYPE name (SYSTEM "filename" | PUBLIC "pubid" "uri"?)? ("[" decls "]")?> 34637da2899SCharles.Forsyth skipwhite(x); 34737da2899SCharles.Forsyth name := xmlname(x); 34837da2899SCharles.Forsyth if(name == nil) 34937da2899SCharles.Forsyth break; 35037da2899SCharles.Forsyth id := ""; 35137da2899SCharles.Forsyth uri := ""; 35237da2899SCharles.Forsyth public := 0; 35337da2899SCharles.Forsyth skipwhite(x); 35437da2899SCharles.Forsyth case sort := xmlname(x) { 35537da2899SCharles.Forsyth "SYSTEM" => 35637da2899SCharles.Forsyth id = xmlstring(x, 1); 35737da2899SCharles.Forsyth "PUBLIC" => 35837da2899SCharles.Forsyth public = 1; 35937da2899SCharles.Forsyth id = xmlstring(x, 1); 36037da2899SCharles.Forsyth skipwhite(x); 36137da2899SCharles.Forsyth c = getc(x); 36237da2899SCharles.Forsyth ungetc(x); 36337da2899SCharles.Forsyth if(c == '"' || c == '\'') 36437da2899SCharles.Forsyth uri = xmlstring(x, 1); 36537da2899SCharles.Forsyth * => 36637da2899SCharles.Forsyth error(x, sys->sprint("unknown DOCTYPE: %s", sort)); 36737da2899SCharles.Forsyth return nil; 36837da2899SCharles.Forsyth } 36937da2899SCharles.Forsyth skipwhite(x); 37037da2899SCharles.Forsyth if(getc(x) == '['){ 37137da2899SCharles.Forsyth error(x, "cannot handle DOCTYPE with declarations"); 37237da2899SCharles.Forsyth return nil; 37337da2899SCharles.Forsyth } 37437da2899SCharles.Forsyth ungetc(x); 37537da2899SCharles.Forsyth skipwhite(x); 37637da2899SCharles.Forsyth if(getc(x) == '>') 37737da2899SCharles.Forsyth return ref Parcel.Doctype(name, public, id :: uri :: nil); 37837da2899SCharles.Forsyth "ELEMENT" or "ATTRLIST" or "NOTATION" or "ENTITY" => 37937da2899SCharles.Forsyth # don't interpret internal DTDs 38037da2899SCharles.Forsyth # <!ENTITY name ("value" | SYSTEM "filename")> 38137da2899SCharles.Forsyth s = gets(x, '>'); 38237da2899SCharles.Forsyth if(s == nil || s[len s-1] != '>') 38337da2899SCharles.Forsyth error(x, "end of file in declaration"); 38437da2899SCharles.Forsyth return nil; 38537da2899SCharles.Forsyth * => 38637da2899SCharles.Forsyth error(x, sys->sprint("unknown declaration: %s", s)); 38737da2899SCharles.Forsyth } 38837da2899SCharles.Forsyth } 38937da2899SCharles.Forsyth error(x, "invalid XML declaration"); 39037da2899SCharles.Forsyth } else 39137da2899SCharles.Forsyth error(x, sys->sprint("illegal beginning of tag: %c", c)); 39237da2899SCharles.Forsyth return nil; 39337da2899SCharles.Forsyth} 39437da2899SCharles.Forsyth 39537da2899SCharles.Forsythcharacters(x: ref Parser): ref Parcel 39637da2899SCharles.Forsyth{ 39737da2899SCharles.Forsyth p: ref Parcel; 39837da2899SCharles.Forsyth content := gets(x, '<'); 39937da2899SCharles.Forsyth if (len content > 0) { 40037da2899SCharles.Forsyth if (content[len content - 1] == '<') { 40137da2899SCharles.Forsyth ungetc(x); 40237da2899SCharles.Forsyth content = content[0:len content - 1]; 40337da2899SCharles.Forsyth } 40437da2899SCharles.Forsyth ws1, ws2: int; 40537da2899SCharles.Forsyth if (x.ispre) { 40637da2899SCharles.Forsyth content = substituteentities(x, content); 40737da2899SCharles.Forsyth ws1 = ws2 = 0; 40837da2899SCharles.Forsyth } else 40937da2899SCharles.Forsyth (content, ws1, ws2) = substituteentities_sp(x, content); 41037da2899SCharles.Forsyth if (content != nil || ws1) 41137da2899SCharles.Forsyth p = ref Parcel.Text(content, ws1, ws2); 41237da2899SCharles.Forsyth } 41337da2899SCharles.Forsyth return p; 41437da2899SCharles.Forsyth} 41537da2899SCharles.Forsyth 41637da2899SCharles.Forsythstartelement(x: ref Parser, name: string) 41737da2899SCharles.Forsyth{ 41837da2899SCharles.Forsyth x.estack = name :: x.estack; 41937da2899SCharles.Forsyth if (name == x.preelem) 42037da2899SCharles.Forsyth x.ispre++; 42137da2899SCharles.Forsyth} 42237da2899SCharles.Forsyth 42337da2899SCharles.Forsythendelement(x: ref Parser, name: string) 42437da2899SCharles.Forsyth{ 42537da2899SCharles.Forsyth if (x.estack != nil && name == hd x.estack) { 42637da2899SCharles.Forsyth x.estack = tl x.estack; 42737da2899SCharles.Forsyth if (name == x.preelem) 42837da2899SCharles.Forsyth x.ispre--; 42937da2899SCharles.Forsyth } else { 43037da2899SCharles.Forsyth starttag := ""; 43137da2899SCharles.Forsyth if (x.estack != nil) 43237da2899SCharles.Forsyth starttag = hd x.estack; 43337da2899SCharles.Forsyth warning(x, sys->sprint("<%s></%s> mismatch", starttag, name)); 43437da2899SCharles.Forsyth 43537da2899SCharles.Forsyth # invalid XML but try to recover anyway to reduce turnaround time on fixing errors. 43637da2899SCharles.Forsyth # loop back up through the tag stack to see if there's a matching tag, in which case 43737da2899SCharles.Forsyth # jump up in the stack to that, making some rude assumptions about the 43837da2899SCharles.Forsyth # way Parcels are handled at the top level. 43937da2899SCharles.Forsyth n := 0; 44037da2899SCharles.Forsyth for (t := x.estack; t != nil; (t, n) = (tl t, n + 1)) 44137da2899SCharles.Forsyth if (hd t == name) 44237da2899SCharles.Forsyth break; 44337da2899SCharles.Forsyth if (t != nil) { 44437da2899SCharles.Forsyth x.estack = tl t; 44537da2899SCharles.Forsyth x.actdepth -= n; 44637da2899SCharles.Forsyth } 44737da2899SCharles.Forsyth } 44837da2899SCharles.Forsyth} 44937da2899SCharles.Forsyth 45037da2899SCharles.Forsythbuildattrs(x: ref Parser): Attributes 45137da2899SCharles.Forsyth{ 45237da2899SCharles.Forsyth attrs: list of Attribute; 45337da2899SCharles.Forsyth 45437da2899SCharles.Forsyth attr: Attribute; 45537da2899SCharles.Forsyth for (;;) { 45637da2899SCharles.Forsyth skipwhite(x); 45737da2899SCharles.Forsyth attr.name = xmlname(x); 45837da2899SCharles.Forsyth if (attr.name == nil) 45937da2899SCharles.Forsyth break; 46037da2899SCharles.Forsyth skipwhite(x); 46137da2899SCharles.Forsyth c := getc(x); 46237da2899SCharles.Forsyth if(c != '='){ 46337da2899SCharles.Forsyth ungetc(x); 46437da2899SCharles.Forsyth attr.value = nil; 46537da2899SCharles.Forsyth }else 46637da2899SCharles.Forsyth attr.value = xmlstring(x, 1); 46737da2899SCharles.Forsyth attrs = attr :: attrs; 46837da2899SCharles.Forsyth } 46937da2899SCharles.Forsyth return Attributes(attrs); 47037da2899SCharles.Forsyth} 47137da2899SCharles.Forsyth 47237da2899SCharles.Forsythxmlstring(x: ref Parser, dosub: int): string 47337da2899SCharles.Forsyth{ 47437da2899SCharles.Forsyth skipwhite(x); 47537da2899SCharles.Forsyth s := ""; 47637da2899SCharles.Forsyth delim := getc(x); 47737da2899SCharles.Forsyth if (delim == '\"' || delim == '\'') { 47837da2899SCharles.Forsyth s = gets(x, delim); 47937da2899SCharles.Forsyth n := len s; 48037da2899SCharles.Forsyth if (n == 0 || s[n-1] != delim) 48137da2899SCharles.Forsyth error(x, "unclosed string at end of file"); 48237da2899SCharles.Forsyth s = s[0:n-1]; # TO DO: avoid copy 48337da2899SCharles.Forsyth if(dosub) 48437da2899SCharles.Forsyth s = substituteentities(x, s); 48537da2899SCharles.Forsyth } else 48637da2899SCharles.Forsyth error(x, sys->sprint("illegal string delimiter: %c", delim)); 48737da2899SCharles.Forsyth return s; 48837da2899SCharles.Forsyth} 48937da2899SCharles.Forsyth 49037da2899SCharles.Forsythxmlname(x: ref Parser): string 49137da2899SCharles.Forsyth{ 49237da2899SCharles.Forsyth name := ""; 49337da2899SCharles.Forsyth ch := getc(x); 49437da2899SCharles.Forsyth case ch { 49537da2899SCharles.Forsyth '_' or ':' or 49637da2899SCharles.Forsyth 'a' to 'z' or 49737da2899SCharles.Forsyth 'A' to 'Z' or 49837da2899SCharles.Forsyth 16r100 to 16rd7ff or 49937da2899SCharles.Forsyth 16re000 or 16rfffd => 50037da2899SCharles.Forsyth name[0] = ch; 50137da2899SCharles.Forsythloop: 50237da2899SCharles.Forsyth for (;;) { 50337da2899SCharles.Forsyth case ch = getc(x) { 50437da2899SCharles.Forsyth '_' or '-' or ':' or '.' or 50537da2899SCharles.Forsyth 'a' to 'z' or 50637da2899SCharles.Forsyth '0' to '9' or 50737da2899SCharles.Forsyth 'A' to 'Z' or 50837da2899SCharles.Forsyth 16r100 to 16rd7ff or 50937da2899SCharles.Forsyth 16re000 to 16rfffd => 51037da2899SCharles.Forsyth name[len name] = ch; 51137da2899SCharles.Forsyth * => 51237da2899SCharles.Forsyth break loop; 51337da2899SCharles.Forsyth } 51437da2899SCharles.Forsyth } 51537da2899SCharles.Forsyth } 51637da2899SCharles.Forsyth ungetc(x); 51737da2899SCharles.Forsyth return name; 51837da2899SCharles.Forsyth} 51937da2899SCharles.Forsyth 52037da2899SCharles.Forsythsubstituteentities(x: ref Parser, buff: string): string 52137da2899SCharles.Forsyth{ 52237da2899SCharles.Forsyth i := 0; 52337da2899SCharles.Forsyth while (i < len buff) { 52437da2899SCharles.Forsyth if (buff[i] == '&') { 52537da2899SCharles.Forsyth (t, j) := translateentity(x, buff, i); 52637da2899SCharles.Forsyth # XXX could be quicker 52737da2899SCharles.Forsyth buff = buff[0:i] + t + buff[j:]; 52837da2899SCharles.Forsyth i += len t; 52937da2899SCharles.Forsyth } else 53037da2899SCharles.Forsyth i++; 53137da2899SCharles.Forsyth } 53237da2899SCharles.Forsyth return buff; 53337da2899SCharles.Forsyth} 53437da2899SCharles.Forsyth 53537da2899SCharles.Forsyth# subsitute entities, squashing whitespace along the way. 53637da2899SCharles.Forsythsubstituteentities_sp(x: ref Parser, buf: string): (string, int, int) 53737da2899SCharles.Forsyth{ 53837da2899SCharles.Forsyth firstwhite := 0; 53937da2899SCharles.Forsyth # skip initial white space 54037da2899SCharles.Forsyth for (i := 0; i < len buf; i++) { 54137da2899SCharles.Forsyth c := buf[i]; 54237da2899SCharles.Forsyth if (c != ' ' && c != '\t' && c != '\n' && c != '\r') 54337da2899SCharles.Forsyth break; 54437da2899SCharles.Forsyth firstwhite = 1; 54537da2899SCharles.Forsyth } 54637da2899SCharles.Forsyth 54737da2899SCharles.Forsyth lastwhite := 0; 54837da2899SCharles.Forsyth s := ""; 54937da2899SCharles.Forsyth for (; i < len buf; i++) { 55037da2899SCharles.Forsyth c := buf[i]; 55137da2899SCharles.Forsyth if (c == ' ' || c == '\t' || c == '\n' || c == '\r') 55237da2899SCharles.Forsyth lastwhite = 1; 55337da2899SCharles.Forsyth else { 55437da2899SCharles.Forsyth if (lastwhite) { 55537da2899SCharles.Forsyth s[len s] = ' '; 55637da2899SCharles.Forsyth lastwhite = 0; 55737da2899SCharles.Forsyth } 55837da2899SCharles.Forsyth if (c == '&') { 55937da2899SCharles.Forsyth # should &x20; count as whitespace? 56037da2899SCharles.Forsyth (ent, j) := translateentity(x, buf, i); 56137da2899SCharles.Forsyth i = j - 1; 56237da2899SCharles.Forsyth s += ent; 56337da2899SCharles.Forsyth } else 56437da2899SCharles.Forsyth s[len s] = c; 56537da2899SCharles.Forsyth } 56637da2899SCharles.Forsyth } 56737da2899SCharles.Forsyth return (s, firstwhite, lastwhite); 56837da2899SCharles.Forsyth} 56937da2899SCharles.Forsyth 57037da2899SCharles.Forsythtranslateentity(x: ref Parser, s: string, i: int): (string, int) 57137da2899SCharles.Forsyth{ 57237da2899SCharles.Forsyth i++; 57337da2899SCharles.Forsyth for (j := i; j < len s; j++) 57437da2899SCharles.Forsyth if (s[j] == ';') 57537da2899SCharles.Forsyth break; 57637da2899SCharles.Forsyth ent := s[i:j]; 57737da2899SCharles.Forsyth if (j == len s) { 57837da2899SCharles.Forsyth if (len ent > 10) 57937da2899SCharles.Forsyth ent = ent[0:11] + "..."; 58037da2899SCharles.Forsyth warning(x, sys->sprint("missing ; at end of entity (&%s)", ent)); 58137da2899SCharles.Forsyth return (nil, i); 58237da2899SCharles.Forsyth } 58337da2899SCharles.Forsyth j++; 58437da2899SCharles.Forsyth if (ent == nil) { 58537da2899SCharles.Forsyth warning(x, "empty entity"); 58637da2899SCharles.Forsyth return ("", j); 58737da2899SCharles.Forsyth } 58837da2899SCharles.Forsyth if (ent[0] == '#') { 58937da2899SCharles.Forsyth n: int; 59037da2899SCharles.Forsyth rem := ent; 59137da2899SCharles.Forsyth if (len ent >= 3 && ent[1] == 'x') 59237da2899SCharles.Forsyth (n, rem) = str->toint(ent[2:], 16); 59337da2899SCharles.Forsyth else if (len ent >= 2) 59437da2899SCharles.Forsyth (n, rem) = str->toint(ent[1:], 10); 59537da2899SCharles.Forsyth if (rem != nil) { 59637da2899SCharles.Forsyth warning(x, sys->sprint("unrecognized entity (&%s)", ent)); 59737da2899SCharles.Forsyth return (nil, j); 59837da2899SCharles.Forsyth } 59937da2899SCharles.Forsyth ch: string = nil; 60037da2899SCharles.Forsyth ch[0] = n; 60137da2899SCharles.Forsyth return (ch, j); 60237da2899SCharles.Forsyth } 60337da2899SCharles.Forsyth hv := entdict.find(ent); 60437da2899SCharles.Forsyth if (hv == nil) { 60537da2899SCharles.Forsyth warning(x, sys->sprint("unrecognized entity (&%s)", ent)); 60637da2899SCharles.Forsyth return (nil, j); 60737da2899SCharles.Forsyth } 60837da2899SCharles.Forsyth return (hv.s, j); 60937da2899SCharles.Forsyth} 61037da2899SCharles.Forsyth 61137da2899SCharles.Forsythbalancedstring(x: ref Parser, eos: string): string 61237da2899SCharles.Forsyth{ 61337da2899SCharles.Forsyth s := ""; 61437da2899SCharles.Forsyth instring := 0; 61537da2899SCharles.Forsyth quote: int; 61637da2899SCharles.Forsyth 61737da2899SCharles.Forsyth for (i := 0; i < len eos; i++) 61837da2899SCharles.Forsyth s[len s] = ' '; 61937da2899SCharles.Forsyth 62037da2899SCharles.Forsyth skipwhite(x); 62137da2899SCharles.Forsyth while ((c := getc(x)) != Bufio->EOF) { 62237da2899SCharles.Forsyth s[len s] = c; 62337da2899SCharles.Forsyth if (instring) { 62437da2899SCharles.Forsyth if (c == quote) 62537da2899SCharles.Forsyth instring = 0; 62637da2899SCharles.Forsyth } else if (c == '\"' || c == '\'') { 62737da2899SCharles.Forsyth quote = c; 62837da2899SCharles.Forsyth instring = 1; 62937da2899SCharles.Forsyth } else if (s[len s - len eos : len s] == eos) 63037da2899SCharles.Forsyth return s[len eos : len s - len eos]; 63137da2899SCharles.Forsyth } 63237da2899SCharles.Forsyth error(x, sys->sprint("unexpected end of file while looking for \"%s\"", eos)); 63337da2899SCharles.Forsyth return ""; 63437da2899SCharles.Forsyth} 63537da2899SCharles.Forsyth 63637da2899SCharles.Forsythskipwhite(x: ref Parser) 63737da2899SCharles.Forsyth{ 63837da2899SCharles.Forsyth while ((c := getc(x)) == ' ' || c == '\t' || c == '\n' || c == '\r') 63937da2899SCharles.Forsyth ; 64037da2899SCharles.Forsyth ungetc(x); 64137da2899SCharles.Forsyth} 64237da2899SCharles.Forsyth 64337da2899SCharles.Forsythexpectwhite(x: ref Parser) 64437da2899SCharles.Forsyth{ 64537da2899SCharles.Forsyth if ((c := getc(x)) != ' ' && c != '\t' && c != '\n' && c != '\r') 64637da2899SCharles.Forsyth error(x, "expecting white space"); 64737da2899SCharles.Forsyth skipwhite(x); 64837da2899SCharles.Forsyth} 64937da2899SCharles.Forsyth 65037da2899SCharles.Forsythexpect(x: ref Parser, ch: int) 65137da2899SCharles.Forsyth{ 65237da2899SCharles.Forsyth skipwhite(x); 65337da2899SCharles.Forsyth c := getc(x); 65437da2899SCharles.Forsyth if (c != ch) 65537da2899SCharles.Forsyth error(x, sys->sprint("expecting %c", ch)); 65637da2899SCharles.Forsyth} 65737da2899SCharles.Forsyth 65837da2899SCharles.Forsythgetc(x: ref Parser): int 65937da2899SCharles.Forsyth{ 66037da2899SCharles.Forsyth if (x.eof) 66137da2899SCharles.Forsyth return Bufio->EOF; 66237da2899SCharles.Forsyth ch := x.in.getc(); 66337da2899SCharles.Forsyth if (ch == Bufio->EOF) 66437da2899SCharles.Forsyth x.eof = 1; 66537da2899SCharles.Forsyth else if (ch == '\n') 66637da2899SCharles.Forsyth x.loc.line++; 66737da2899SCharles.Forsyth x.lastnl = ch == '\n'; 66837da2899SCharles.Forsyth return ch; 66937da2899SCharles.Forsyth} 67037da2899SCharles.Forsyth 67137da2899SCharles.Forsythgets(x: ref Parser, delim: int): string 67237da2899SCharles.Forsyth{ 67337da2899SCharles.Forsyth if (x.eof) 67437da2899SCharles.Forsyth return ""; 67537da2899SCharles.Forsyth s := x.in.gets(delim); 67637da2899SCharles.Forsyth for (i := 0; i < len s; i++) 67737da2899SCharles.Forsyth if (s[i] == '\n') 67837da2899SCharles.Forsyth x.loc.line++; 67937da2899SCharles.Forsyth if (s == "") 68037da2899SCharles.Forsyth x.eof = 1; 68137da2899SCharles.Forsyth else 68237da2899SCharles.Forsyth x.lastnl = s[len s - 1] == '\n'; 68337da2899SCharles.Forsyth return s; 68437da2899SCharles.Forsyth} 68537da2899SCharles.Forsyth 68637da2899SCharles.Forsythungetc(x: ref Parser) 68737da2899SCharles.Forsyth{ 68837da2899SCharles.Forsyth if (x.eof) 68937da2899SCharles.Forsyth return; 69037da2899SCharles.Forsyth x.in.ungetc(); 69137da2899SCharles.Forsyth x.loc.line -= x.lastnl; 69237da2899SCharles.Forsyth} 69337da2899SCharles.Forsyth 69437da2899SCharles.ForsythAttributes.all(al: self Attributes): list of Attribute 69537da2899SCharles.Forsyth{ 69637da2899SCharles.Forsyth return al.attrs; 69737da2899SCharles.Forsyth} 69837da2899SCharles.Forsyth 69937da2899SCharles.ForsythAttributes.get(attrs: self Attributes, name: string): string 70037da2899SCharles.Forsyth{ 70137da2899SCharles.Forsyth for (a := attrs.attrs; a != nil; a = tl a) 70237da2899SCharles.Forsyth if ((hd a).name == name) 70337da2899SCharles.Forsyth return (hd a).value; 70437da2899SCharles.Forsyth return nil; 70537da2899SCharles.Forsyth} 70637da2899SCharles.Forsyth 70737da2899SCharles.Forsythwarning(x: ref Parser, msg: string) 70837da2899SCharles.Forsyth{ 70937da2899SCharles.Forsyth if (x.warning != nil) 71037da2899SCharles.Forsyth x.warning <-= (x.loc, msg); 71137da2899SCharles.Forsyth} 71237da2899SCharles.Forsyth 71337da2899SCharles.Forsytherror(x: ref Parser, msg: string) 71437da2899SCharles.Forsyth{ 71537da2899SCharles.Forsyth x.errormsg = msg; 71637da2899SCharles.Forsyth raise "sax:error"; 71737da2899SCharles.Forsyth} 718