xref: /netbsd-src/external/gpl3/gcc/dist/libphobos/src/std/xml.d (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1181254a7Smrg // Written in the D programming language.
2181254a7Smrg 
3181254a7Smrg /**
4181254a7Smrg $(RED Warning: This module is considered out-dated and not up to Phobos'
5*b1e83836Smrg       current standards. It will be removed from Phobos in 2.101.0.
6*b1e83836Smrg       If you still need it, go to $(LINK https://github.com/DigitalMars/undeaD))
7*b1e83836Smrg  */
8181254a7Smrg 
9*b1e83836Smrg /*
10181254a7Smrg Classes and functions for creating and parsing XML
11181254a7Smrg 
12181254a7Smrg The basic architecture of this module is that there are standalone functions,
13181254a7Smrg classes for constructing an XML document from scratch (Tag, Element and
14181254a7Smrg Document), and also classes for parsing a pre-existing XML file (ElementParser
15181254a7Smrg and DocumentParser). The parsing classes <i>may</i> be used to build a
16181254a7Smrg Document, but that is not their primary purpose. The handling capabilities of
17181254a7Smrg DocumentParser and ElementParser are sufficiently customizable that you can
18181254a7Smrg make them do pretty much whatever you want.
19181254a7Smrg 
20181254a7Smrg Example: This example creates a DOM (Document Object Model) tree
21181254a7Smrg     from an XML file.
22181254a7Smrg ------------------------------------------------------------------------------
23181254a7Smrg import std.xml;
24181254a7Smrg import std.stdio;
25181254a7Smrg import std.string;
26181254a7Smrg import std.file;
27181254a7Smrg 
28181254a7Smrg // books.xml is used in various samples throughout the Microsoft XML Core
29181254a7Smrg // Services (MSXML) SDK.
30181254a7Smrg //
31181254a7Smrg // See http://msdn2.microsoft.com/en-us/library/ms762271(VS.85).aspx
32181254a7Smrg 
33181254a7Smrg void main()
34181254a7Smrg {
35181254a7Smrg     string s = cast(string) std.file.read("books.xml");
36181254a7Smrg 
37181254a7Smrg     // Check for well-formedness
38181254a7Smrg     check(s);
39181254a7Smrg 
40181254a7Smrg     // Make a DOM tree
41181254a7Smrg     auto doc = new Document(s);
42181254a7Smrg 
43181254a7Smrg     // Plain-print it
44181254a7Smrg     writeln(doc);
45181254a7Smrg }
46181254a7Smrg ------------------------------------------------------------------------------
47181254a7Smrg 
48181254a7Smrg Example: This example does much the same thing, except that the file is
49181254a7Smrg     deconstructed and reconstructed by hand. This is more work, but the
50181254a7Smrg     techniques involved offer vastly more power.
51181254a7Smrg ------------------------------------------------------------------------------
52181254a7Smrg import std.xml;
53181254a7Smrg import std.stdio;
54181254a7Smrg import std.string;
55181254a7Smrg 
56181254a7Smrg struct Book
57181254a7Smrg {
58181254a7Smrg     string id;
59181254a7Smrg     string author;
60181254a7Smrg     string title;
61181254a7Smrg     string genre;
62181254a7Smrg     string price;
63181254a7Smrg     string pubDate;
64181254a7Smrg     string description;
65181254a7Smrg }
66181254a7Smrg 
67181254a7Smrg void main()
68181254a7Smrg {
69181254a7Smrg     string s = cast(string) std.file.read("books.xml");
70181254a7Smrg 
71181254a7Smrg     // Check for well-formedness
72181254a7Smrg     check(s);
73181254a7Smrg 
74181254a7Smrg     // Take it apart
75181254a7Smrg     Book[] books;
76181254a7Smrg 
77181254a7Smrg     auto xml = new DocumentParser(s);
78181254a7Smrg     xml.onStartTag["book"] = (ElementParser xml)
79181254a7Smrg     {
80181254a7Smrg         Book book;
81181254a7Smrg         book.id = xml.tag.attr["id"];
82181254a7Smrg 
83181254a7Smrg         xml.onEndTag["author"]       = (in Element e) { book.author      = e.text(); };
84181254a7Smrg         xml.onEndTag["title"]        = (in Element e) { book.title       = e.text(); };
85181254a7Smrg         xml.onEndTag["genre"]        = (in Element e) { book.genre       = e.text(); };
86181254a7Smrg         xml.onEndTag["price"]        = (in Element e) { book.price       = e.text(); };
87181254a7Smrg         xml.onEndTag["publish-date"] = (in Element e) { book.pubDate     = e.text(); };
88181254a7Smrg         xml.onEndTag["description"]  = (in Element e) { book.description = e.text(); };
89181254a7Smrg 
90181254a7Smrg         xml.parse();
91181254a7Smrg 
92181254a7Smrg         books ~= book;
93181254a7Smrg     };
94181254a7Smrg     xml.parse();
95181254a7Smrg 
96181254a7Smrg     // Put it back together again;
97181254a7Smrg     auto doc = new Document(new Tag("catalog"));
98181254a7Smrg     foreach (book;books)
99181254a7Smrg     {
100181254a7Smrg         auto element = new Element("book");
101181254a7Smrg         element.tag.attr["id"] = book.id;
102181254a7Smrg 
103181254a7Smrg         element ~= new Element("author",      book.author);
104181254a7Smrg         element ~= new Element("title",       book.title);
105181254a7Smrg         element ~= new Element("genre",       book.genre);
106181254a7Smrg         element ~= new Element("price",       book.price);
107181254a7Smrg         element ~= new Element("publish-date",book.pubDate);
108181254a7Smrg         element ~= new Element("description", book.description);
109181254a7Smrg 
110181254a7Smrg         doc ~= element;
111181254a7Smrg     }
112181254a7Smrg 
113181254a7Smrg     // Pretty-print it
114181254a7Smrg     writefln(join(doc.pretty(3),"\n"));
115181254a7Smrg }
116181254a7Smrg -------------------------------------------------------------------------------
117181254a7Smrg Copyright: Copyright Janice Caron 2008 - 2009.
118181254a7Smrg License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
119181254a7Smrg Authors:   Janice Caron
120*b1e83836Smrg Source:    $(PHOBOSSRC std/xml.d)
121181254a7Smrg */
122181254a7Smrg /*
123181254a7Smrg          Copyright Janice Caron 2008 - 2009.
124181254a7Smrg Distributed under the Boost Software License, Version 1.0.
125181254a7Smrg    (See accompanying file LICENSE_1_0.txt or copy at
126181254a7Smrg          http://www.boost.org/LICENSE_1_0.txt)
127181254a7Smrg */
128*b1e83836Smrg deprecated("Will be removed from Phobos in 2.101.0. If you still need it, go to https://github.com/DigitalMars/undeaD")
129181254a7Smrg module std.xml;
130181254a7Smrg 
131181254a7Smrg enum cdata = "<![CDATA[";
132181254a7Smrg 
133*b1e83836Smrg /*
134181254a7Smrg  * Returns true if the character is a character according to the XML standard
135181254a7Smrg  *
136181254a7Smrg  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
137181254a7Smrg  *
138181254a7Smrg  * Params:
139181254a7Smrg  *    c = the character to be tested
140181254a7Smrg  */
isChar(dchar c)141181254a7Smrg bool isChar(dchar c) @safe @nogc pure nothrow // rule 2
142181254a7Smrg {
143181254a7Smrg     if (c <= 0xD7FF)
144181254a7Smrg     {
145181254a7Smrg         if (c >= 0x20)
146181254a7Smrg             return true;
147181254a7Smrg         switch (c)
148181254a7Smrg         {
149181254a7Smrg         case 0xA:
150181254a7Smrg         case 0x9:
151181254a7Smrg         case 0xD:
152181254a7Smrg             return true;
153181254a7Smrg         default:
154181254a7Smrg             return false;
155181254a7Smrg         }
156181254a7Smrg     }
157181254a7Smrg     else if (0xE000 <= c && c <= 0x10FFFF)
158181254a7Smrg     {
159181254a7Smrg         if ((c & 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF
160181254a7Smrg             return true;
161181254a7Smrg     }
162181254a7Smrg     return false;
163181254a7Smrg }
164181254a7Smrg 
165181254a7Smrg @safe @nogc nothrow pure unittest
166181254a7Smrg {
167181254a7Smrg     assert(!isChar(cast(dchar) 0x8));
168181254a7Smrg     assert( isChar(cast(dchar) 0x9));
169181254a7Smrg     assert( isChar(cast(dchar) 0xA));
170181254a7Smrg     assert(!isChar(cast(dchar) 0xB));
171181254a7Smrg     assert(!isChar(cast(dchar) 0xC));
172181254a7Smrg     assert( isChar(cast(dchar) 0xD));
173181254a7Smrg     assert(!isChar(cast(dchar) 0xE));
174181254a7Smrg     assert(!isChar(cast(dchar) 0x1F));
175181254a7Smrg     assert( isChar(cast(dchar) 0x20));
176181254a7Smrg     assert( isChar('J'));
177181254a7Smrg     assert( isChar(cast(dchar) 0xD7FF));
178181254a7Smrg     assert(!isChar(cast(dchar) 0xD800));
179181254a7Smrg     assert(!isChar(cast(dchar) 0xDFFF));
180181254a7Smrg     assert( isChar(cast(dchar) 0xE000));
181181254a7Smrg     assert( isChar(cast(dchar) 0xFFFD));
182181254a7Smrg     assert(!isChar(cast(dchar) 0xFFFE));
183181254a7Smrg     assert(!isChar(cast(dchar) 0xFFFF));
184181254a7Smrg     assert( isChar(cast(dchar) 0x10000));
185181254a7Smrg     assert( isChar(cast(dchar) 0x10FFFF));
186181254a7Smrg     assert(!isChar(cast(dchar) 0x110000));
187181254a7Smrg 
debug(stdxml_TestHardcodedChecks)188181254a7Smrg     debug (stdxml_TestHardcodedChecks)
189181254a7Smrg     {
190181254a7Smrg         foreach (c; 0 .. dchar.max + 1)
191181254a7Smrg             assert(isChar(c) == lookup(CharTable, c));
192181254a7Smrg     }
193181254a7Smrg }
194181254a7Smrg 
195*b1e83836Smrg /*
196181254a7Smrg  * Returns true if the character is whitespace according to the XML standard
197181254a7Smrg  *
198181254a7Smrg  * Only the following characters are considered whitespace in XML - space, tab,
199181254a7Smrg  * carriage return and linefeed
200181254a7Smrg  *
201181254a7Smrg  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
202181254a7Smrg  *
203181254a7Smrg  * Params:
204181254a7Smrg  *    c = the character to be tested
205181254a7Smrg  */
isSpace(dchar c)206181254a7Smrg bool isSpace(dchar c) @safe @nogc pure nothrow
207181254a7Smrg {
208181254a7Smrg     return c == '\u0020' || c == '\u0009' || c == '\u000A' || c == '\u000D';
209181254a7Smrg }
210181254a7Smrg 
211*b1e83836Smrg /*
212181254a7Smrg  * Returns true if the character is a digit according to the XML standard
213181254a7Smrg  *
214181254a7Smrg  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
215181254a7Smrg  *
216181254a7Smrg  * Params:
217181254a7Smrg  *    c = the character to be tested
218181254a7Smrg  */
isDigit(dchar c)219181254a7Smrg bool isDigit(dchar c) @safe @nogc pure nothrow
220181254a7Smrg {
221181254a7Smrg     if (c <= 0x0039 && c >= 0x0030)
222181254a7Smrg         return true;
223181254a7Smrg     else
224181254a7Smrg         return lookup(DigitTable,c);
225181254a7Smrg }
226181254a7Smrg 
227181254a7Smrg @safe @nogc nothrow pure unittest
228181254a7Smrg {
debug(stdxml_TestHardcodedChecks)229181254a7Smrg     debug (stdxml_TestHardcodedChecks)
230181254a7Smrg     {
231181254a7Smrg         foreach (c; 0 .. dchar.max + 1)
232181254a7Smrg             assert(isDigit(c) == lookup(DigitTable, c));
233181254a7Smrg     }
234181254a7Smrg }
235181254a7Smrg 
236*b1e83836Smrg /*
237181254a7Smrg  * Returns true if the character is a letter according to the XML standard
238181254a7Smrg  *
239181254a7Smrg  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
240181254a7Smrg  *
241181254a7Smrg  * Params:
242181254a7Smrg  *    c = the character to be tested
243181254a7Smrg  */
isLetter(dchar c)244181254a7Smrg bool isLetter(dchar c) @safe @nogc nothrow pure // rule 84
245181254a7Smrg {
246181254a7Smrg     return isIdeographic(c) || isBaseChar(c);
247181254a7Smrg }
248181254a7Smrg 
249*b1e83836Smrg /*
250181254a7Smrg  * Returns true if the character is an ideographic character according to the
251181254a7Smrg  * XML standard
252181254a7Smrg  *
253181254a7Smrg  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
254181254a7Smrg  *
255181254a7Smrg  * Params:
256181254a7Smrg  *    c = the character to be tested
257181254a7Smrg  */
isIdeographic(dchar c)258181254a7Smrg bool isIdeographic(dchar c) @safe @nogc nothrow pure
259181254a7Smrg {
260181254a7Smrg     if (c == 0x3007)
261181254a7Smrg         return true;
262181254a7Smrg     if (c <= 0x3029 && c >= 0x3021 )
263181254a7Smrg         return true;
264181254a7Smrg     if (c <= 0x9FA5 && c >= 0x4E00)
265181254a7Smrg         return true;
266181254a7Smrg     return false;
267181254a7Smrg }
268181254a7Smrg 
269181254a7Smrg @safe @nogc nothrow pure unittest
270181254a7Smrg {
271181254a7Smrg     assert(isIdeographic('\u4E00'));
272181254a7Smrg     assert(isIdeographic('\u9FA5'));
273181254a7Smrg     assert(isIdeographic('\u3007'));
274181254a7Smrg     assert(isIdeographic('\u3021'));
275181254a7Smrg     assert(isIdeographic('\u3029'));
276181254a7Smrg 
debug(stdxml_TestHardcodedChecks)277181254a7Smrg     debug (stdxml_TestHardcodedChecks)
278181254a7Smrg     {
279181254a7Smrg         foreach (c; 0 .. dchar.max + 1)
280181254a7Smrg             assert(isIdeographic(c) == lookup(IdeographicTable, c));
281181254a7Smrg     }
282181254a7Smrg }
283181254a7Smrg 
284*b1e83836Smrg /*
285181254a7Smrg  * Returns true if the character is a base character according to the XML
286181254a7Smrg  * standard
287181254a7Smrg  *
288181254a7Smrg  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
289181254a7Smrg  *
290181254a7Smrg  * Params:
291181254a7Smrg  *    c = the character to be tested
292181254a7Smrg  */
isBaseChar(dchar c)293181254a7Smrg bool isBaseChar(dchar c) @safe @nogc nothrow pure
294181254a7Smrg {
295181254a7Smrg     return lookup(BaseCharTable,c);
296181254a7Smrg }
297181254a7Smrg 
298*b1e83836Smrg /*
299181254a7Smrg  * Returns true if the character is a combining character according to the
300181254a7Smrg  * XML standard
301181254a7Smrg  *
302181254a7Smrg  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
303181254a7Smrg  *
304181254a7Smrg  * Params:
305181254a7Smrg  *    c = the character to be tested
306181254a7Smrg  */
isCombiningChar(dchar c)307181254a7Smrg bool isCombiningChar(dchar c) @safe @nogc nothrow pure
308181254a7Smrg {
309181254a7Smrg     return lookup(CombiningCharTable,c);
310181254a7Smrg }
311181254a7Smrg 
312*b1e83836Smrg /*
313181254a7Smrg  * Returns true if the character is an extender according to the XML standard
314181254a7Smrg  *
315181254a7Smrg  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
316181254a7Smrg  *
317181254a7Smrg  * Params:
318181254a7Smrg  *    c = the character to be tested
319181254a7Smrg  */
isExtender(dchar c)320181254a7Smrg bool isExtender(dchar c) @safe @nogc nothrow pure
321181254a7Smrg {
322181254a7Smrg     return lookup(ExtenderTable,c);
323181254a7Smrg }
324181254a7Smrg 
325*b1e83836Smrg /*
326181254a7Smrg  * Encodes a string by replacing all characters which need to be escaped with
327181254a7Smrg  * appropriate predefined XML entities.
328181254a7Smrg  *
329181254a7Smrg  * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
330181254a7Smrg  * and greater-than), and similarly, decode() unescapes them. These functions
331181254a7Smrg  * are provided for convenience only. You do not need to use them when using
332181254a7Smrg  * the std.xml classes, because then all the encoding and decoding will be done
333181254a7Smrg  * for you automatically.
334181254a7Smrg  *
335181254a7Smrg  * If the string is not modified, the original will be returned.
336181254a7Smrg  *
337181254a7Smrg  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
338181254a7Smrg  *
339181254a7Smrg  * Params:
340181254a7Smrg  *      s = The string to be encoded
341181254a7Smrg  *
342181254a7Smrg  * Returns: The encoded string
343181254a7Smrg  *
344181254a7Smrg  * Example:
345181254a7Smrg  * --------------
346181254a7Smrg  * writefln(encode("a > b")); // writes "a &gt; b"
347181254a7Smrg  * --------------
348181254a7Smrg  */
encode(S)349181254a7Smrg S encode(S)(S s)
350181254a7Smrg {
351181254a7Smrg     import std.array : appender;
352181254a7Smrg 
353181254a7Smrg     string r;
354181254a7Smrg     size_t lastI;
355181254a7Smrg     auto result = appender!S();
356181254a7Smrg 
357181254a7Smrg     foreach (i, c; s)
358181254a7Smrg     {
359181254a7Smrg         switch (c)
360181254a7Smrg         {
361181254a7Smrg         case '&':  r = "&amp;"; break;
362181254a7Smrg         case '"':  r = "&quot;"; break;
363181254a7Smrg         case '\'': r = "&apos;"; break;
364181254a7Smrg         case '<':  r = "&lt;"; break;
365181254a7Smrg         case '>':  r = "&gt;"; break;
366181254a7Smrg         default: continue;
367181254a7Smrg         }
368181254a7Smrg         // Replace with r
369181254a7Smrg         result.put(s[lastI .. i]);
370181254a7Smrg         result.put(r);
371181254a7Smrg         lastI = i + 1;
372181254a7Smrg     }
373181254a7Smrg 
374181254a7Smrg     if (!result.data.ptr) return s;
375181254a7Smrg     result.put(s[lastI .. $]);
376181254a7Smrg     return result.data;
377181254a7Smrg }
378181254a7Smrg 
379181254a7Smrg @safe pure unittest
380181254a7Smrg {
381181254a7Smrg     auto s = "hello";
382181254a7Smrg     assert(encode(s) is s);
383181254a7Smrg     assert(encode("a > b") == "a &gt; b", encode("a > b"));
384181254a7Smrg     assert(encode("a < b") == "a &lt; b");
385181254a7Smrg     assert(encode("don't") == "don&apos;t");
386181254a7Smrg     assert(encode("\"hi\"") == "&quot;hi&quot;", encode("\"hi\""));
387181254a7Smrg     assert(encode("cat & dog") == "cat &amp; dog");
388181254a7Smrg }
389181254a7Smrg 
390*b1e83836Smrg /*
391181254a7Smrg  * Mode to use for decoding.
392181254a7Smrg  *
393181254a7Smrg  * $(DDOC_ENUM_MEMBERS NONE) Do not decode
394181254a7Smrg  * $(DDOC_ENUM_MEMBERS LOOSE) Decode, but ignore errors
395181254a7Smrg  * $(DDOC_ENUM_MEMBERS STRICT) Decode, and throw exception on error
396181254a7Smrg  */
397181254a7Smrg enum DecodeMode
398181254a7Smrg {
399181254a7Smrg     NONE, LOOSE, STRICT
400181254a7Smrg }
401181254a7Smrg 
402*b1e83836Smrg /*
403181254a7Smrg  * Decodes a string by unescaping all predefined XML entities.
404181254a7Smrg  *
405181254a7Smrg  * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
406181254a7Smrg  * and greater-than), and similarly, decode() unescapes them. These functions
407181254a7Smrg  * are provided for convenience only. You do not need to use them when using
408181254a7Smrg  * the std.xml classes, because then all the encoding and decoding will be done
409181254a7Smrg  * for you automatically.
410181254a7Smrg  *
411181254a7Smrg  * This function decodes the entities &amp;amp;, &amp;quot;, &amp;apos;,
412181254a7Smrg  * &amp;lt; and &amp;gt,
413181254a7Smrg  * as well as decimal and hexadecimal entities such as &amp;#x20AC;
414181254a7Smrg  *
415181254a7Smrg  * If the string does not contain an ampersand, the original will be returned.
416181254a7Smrg  *
417181254a7Smrg  * Note that the "mode" parameter can be one of DecodeMode.NONE (do not
418181254a7Smrg  * decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT
419181254a7Smrg  * (decode, and throw a DecodeException in the event of an error).
420181254a7Smrg  *
421181254a7Smrg  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
422181254a7Smrg  *
423181254a7Smrg  * Params:
424181254a7Smrg  *      s = The string to be decoded
425181254a7Smrg  *      mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
426181254a7Smrg  *
427181254a7Smrg  * Throws: DecodeException if mode == DecodeMode.STRICT and decode fails
428181254a7Smrg  *
429181254a7Smrg  * Returns: The decoded string
430181254a7Smrg  *
431181254a7Smrg  * Example:
432181254a7Smrg  * --------------
433181254a7Smrg  * writefln(decode("a &gt; b")); // writes "a > b"
434181254a7Smrg  * --------------
435181254a7Smrg  */
436181254a7Smrg string decode(string s, DecodeMode mode=DecodeMode.LOOSE) @safe pure
437181254a7Smrg {
438181254a7Smrg     import std.algorithm.searching : startsWith;
439181254a7Smrg 
440181254a7Smrg     if (mode == DecodeMode.NONE) return s;
441181254a7Smrg 
442181254a7Smrg     string buffer;
443181254a7Smrg     foreach (ref i; 0 .. s.length)
444181254a7Smrg     {
445181254a7Smrg         char c = s[i];
446181254a7Smrg         if (c != '&')
447181254a7Smrg         {
448181254a7Smrg             if (buffer.length != 0) buffer ~= c;
449181254a7Smrg         }
450181254a7Smrg         else
451181254a7Smrg         {
452181254a7Smrg             if (buffer.length == 0)
453181254a7Smrg             {
454181254a7Smrg                 buffer = s[0 .. i].dup;
455181254a7Smrg             }
456181254a7Smrg             if (startsWith(s[i..$],"&#"))
457181254a7Smrg             {
458181254a7Smrg                 try
459181254a7Smrg                 {
460181254a7Smrg                     dchar d;
461181254a7Smrg                     string t = s[i..$];
462181254a7Smrg                     checkCharRef(t, d);
463181254a7Smrg                     char[4] temp;
464181254a7Smrg                     import std.utf : encode;
465181254a7Smrg                     buffer ~= temp[0 .. encode(temp, d)];
466181254a7Smrg                     i = s.length - t.length - 1;
467181254a7Smrg                 }
catch(Err e)468181254a7Smrg                 catch (Err e)
469181254a7Smrg                 {
470181254a7Smrg                     if (mode == DecodeMode.STRICT)
471181254a7Smrg                         throw new DecodeException("Unescaped &");
472181254a7Smrg                     buffer ~= '&';
473181254a7Smrg                 }
474181254a7Smrg             }
475181254a7Smrg             else if (startsWith(s[i..$],"&amp;" )) { buffer ~= '&';  i += 4; }
476181254a7Smrg             else if (startsWith(s[i..$],"&quot;")) { buffer ~= '"';  i += 5; }
477181254a7Smrg             else if (startsWith(s[i..$],"&apos;")) { buffer ~= '\''; i += 5; }
478181254a7Smrg             else if (startsWith(s[i..$],"&lt;"  )) { buffer ~= '<';  i += 3; }
479181254a7Smrg             else if (startsWith(s[i..$],"&gt;"  )) { buffer ~= '>';  i += 3; }
480181254a7Smrg             else
481181254a7Smrg             {
482181254a7Smrg                 if (mode == DecodeMode.STRICT)
483181254a7Smrg                     throw new DecodeException("Unescaped &");
484181254a7Smrg                 buffer ~= '&';
485181254a7Smrg             }
486181254a7Smrg         }
487181254a7Smrg     }
488181254a7Smrg     return (buffer.length == 0) ? s : buffer;
489181254a7Smrg }
490181254a7Smrg 
491181254a7Smrg @safe pure unittest
492181254a7Smrg {
assertNot(string s)493181254a7Smrg     void assertNot(string s) pure
494181254a7Smrg     {
495181254a7Smrg         bool b = false;
496181254a7Smrg         try { decode(s,DecodeMode.STRICT); }
497181254a7Smrg         catch (DecodeException e) { b = true; }
498181254a7Smrg         assert(b,s);
499181254a7Smrg     }
500181254a7Smrg 
501181254a7Smrg     // Assert that things that should work, do
502181254a7Smrg     auto s = "hello";
503181254a7Smrg     assert(decode(s,                DecodeMode.STRICT) is s);
504181254a7Smrg     assert(decode("a &gt; b",       DecodeMode.STRICT) == "a > b");
505181254a7Smrg     assert(decode("a &lt; b",       DecodeMode.STRICT) == "a < b");
506181254a7Smrg     assert(decode("don&apos;t",     DecodeMode.STRICT) == "don't");
507181254a7Smrg     assert(decode("&quot;hi&quot;", DecodeMode.STRICT) == "\"hi\"");
508181254a7Smrg     assert(decode("cat &amp; dog",  DecodeMode.STRICT) == "cat & dog");
509181254a7Smrg     assert(decode("&#42;",          DecodeMode.STRICT) == "*");
510181254a7Smrg     assert(decode("&#x2A;",         DecodeMode.STRICT) == "*");
511181254a7Smrg     assert(decode("cat & dog",      DecodeMode.LOOSE) == "cat & dog");
512181254a7Smrg     assert(decode("a &gt b",        DecodeMode.LOOSE) == "a &gt b");
513181254a7Smrg     assert(decode("&#;",            DecodeMode.LOOSE) == "&#;");
514181254a7Smrg     assert(decode("&#x;",           DecodeMode.LOOSE) == "&#x;");
515181254a7Smrg     assert(decode("&#2G;",          DecodeMode.LOOSE) == "&#2G;");
516181254a7Smrg     assert(decode("&#x2G;",         DecodeMode.LOOSE) == "&#x2G;");
517181254a7Smrg 
518181254a7Smrg     // Assert that things that shouldn't work, don't
519181254a7Smrg     assertNot("cat & dog");
520181254a7Smrg     assertNot("a &gt b");
521181254a7Smrg     assertNot("&#;");
522181254a7Smrg     assertNot("&#x;");
523181254a7Smrg     assertNot("&#2G;");
524181254a7Smrg     assertNot("&#x2G;");
525181254a7Smrg }
526181254a7Smrg 
527*b1e83836Smrg /*
528181254a7Smrg  * Class representing an XML document.
529181254a7Smrg  *
530181254a7Smrg  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
531181254a7Smrg  *
532181254a7Smrg  */
533181254a7Smrg class Document : Element
534181254a7Smrg {
535*b1e83836Smrg     /*
536181254a7Smrg      * Contains all text which occurs before the root element.
537181254a7Smrg      * Defaults to &lt;?xml version="1.0"?&gt;
538181254a7Smrg      */
539181254a7Smrg     string prolog = "<?xml version=\"1.0\"?>";
540*b1e83836Smrg     /*
541181254a7Smrg      * Contains all text which occurs after the root element.
542181254a7Smrg      * Defaults to the empty string
543181254a7Smrg      */
544181254a7Smrg     string epilog;
545181254a7Smrg 
546*b1e83836Smrg     /*
547181254a7Smrg      * Constructs a Document by parsing XML text.
548181254a7Smrg      *
549181254a7Smrg      * This function creates a complete DOM (Document Object Model) tree.
550181254a7Smrg      *
551181254a7Smrg      * The input to this function MUST be valid XML.
552181254a7Smrg      * This is enforced by DocumentParser's in contract.
553181254a7Smrg      *
554181254a7Smrg      * Params:
555181254a7Smrg      *      s = the complete XML text.
556181254a7Smrg      */
this(string s)557181254a7Smrg     this(string s)
558181254a7Smrg     in
559181254a7Smrg     {
560181254a7Smrg         assert(s.length != 0);
561181254a7Smrg     }
562*b1e83836Smrg     do
563181254a7Smrg     {
564181254a7Smrg         auto xml = new DocumentParser(s);
565181254a7Smrg         string tagString = xml.tag.tagString;
566181254a7Smrg 
567181254a7Smrg         this(xml.tag);
568181254a7Smrg         prolog = s[0 .. tagString.ptr - s.ptr];
569181254a7Smrg         parse(xml);
570181254a7Smrg         epilog = *xml.s;
571181254a7Smrg     }
572181254a7Smrg 
573*b1e83836Smrg     /*
574181254a7Smrg      * Constructs a Document from a Tag.
575181254a7Smrg      *
576181254a7Smrg      * Params:
577181254a7Smrg      *      tag = the start tag of the document.
578181254a7Smrg      */
this(const (Tag)tag)579181254a7Smrg     this(const(Tag) tag)
580181254a7Smrg     {
581181254a7Smrg         super(tag);
582181254a7Smrg     }
583181254a7Smrg 
584181254a7Smrg     const
585181254a7Smrg     {
586*b1e83836Smrg         /*
587181254a7Smrg          * Compares two Documents for equality
588181254a7Smrg          *
589181254a7Smrg          * Example:
590181254a7Smrg          * --------------
591181254a7Smrg          * Document d1,d2;
592181254a7Smrg          * if (d1 == d2) { }
593181254a7Smrg          * --------------
594181254a7Smrg          */
opEquals(scope const Object o)595181254a7Smrg         override bool opEquals(scope const Object o) const
596181254a7Smrg         {
597181254a7Smrg             const doc = toType!(const Document)(o);
598181254a7Smrg             return prolog == doc.prolog
599181254a7Smrg                 && (cast(const) this).Element.opEquals(cast(const) doc)
600181254a7Smrg                 && epilog == doc.epilog;
601181254a7Smrg         }
602181254a7Smrg 
603*b1e83836Smrg         /*
604181254a7Smrg          * Compares two Documents
605181254a7Smrg          *
606181254a7Smrg          * You should rarely need to call this function. It exists so that
607181254a7Smrg          * Documents can be used as associative array keys.
608181254a7Smrg          *
609181254a7Smrg          * Example:
610181254a7Smrg          * --------------
611181254a7Smrg          * Document d1,d2;
612181254a7Smrg          * if (d1 < d2) { }
613181254a7Smrg          * --------------
614181254a7Smrg          */
opCmp(scope const Object o)615181254a7Smrg         override int opCmp(scope const Object o) scope const
616181254a7Smrg         {
617181254a7Smrg             const doc = toType!(const Document)(o);
618181254a7Smrg             if (prolog != doc.prolog)
619181254a7Smrg                 return prolog < doc.prolog ? -1 : 1;
620181254a7Smrg             if (int cmp = this.Element.opCmp(doc))
621181254a7Smrg                 return cmp;
622181254a7Smrg             if (epilog != doc.epilog)
623181254a7Smrg                 return epilog < doc.epilog ? -1 : 1;
624181254a7Smrg             return 0;
625181254a7Smrg         }
626181254a7Smrg 
627*b1e83836Smrg         /*
628181254a7Smrg          * Returns the hash of a Document
629181254a7Smrg          *
630181254a7Smrg          * You should rarely need to call this function. It exists so that
631181254a7Smrg          * Documents can be used as associative array keys.
632181254a7Smrg          */
toHash()633181254a7Smrg         override size_t toHash() scope const @trusted
634181254a7Smrg         {
635181254a7Smrg             return hash(prolog, hash(epilog, (cast() this).Element.toHash()));
636181254a7Smrg         }
637181254a7Smrg 
638*b1e83836Smrg         /*
639181254a7Smrg          * Returns the string representation of a Document. (That is, the
640181254a7Smrg          * complete XML of a document).
641181254a7Smrg          */
toString()642181254a7Smrg         override string toString() scope const @safe
643181254a7Smrg         {
644181254a7Smrg             return prolog ~ super.toString() ~ epilog;
645181254a7Smrg         }
646181254a7Smrg     }
647181254a7Smrg }
648181254a7Smrg 
649181254a7Smrg @system unittest
650181254a7Smrg {
651181254a7Smrg     // https://issues.dlang.org/show_bug.cgi?id=14966
652181254a7Smrg     auto xml = `<?xml version="1.0" encoding="UTF-8"?><foo></foo>`;
653181254a7Smrg 
654181254a7Smrg     auto a = new Document(xml);
655181254a7Smrg     auto b = new Document(xml);
656181254a7Smrg     assert(a == b);
657181254a7Smrg     assert(!(a < b));
658181254a7Smrg     int[Document] aa;
659181254a7Smrg     aa[a] = 1;
660181254a7Smrg     assert(aa[b] == 1);
661181254a7Smrg 
662181254a7Smrg     b ~= new Element("b");
663181254a7Smrg     assert(a < b);
664181254a7Smrg     assert(b > a);
665181254a7Smrg }
666181254a7Smrg 
667*b1e83836Smrg /*
668181254a7Smrg  * Class representing an XML element.
669181254a7Smrg  *
670181254a7Smrg  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
671181254a7Smrg  */
672181254a7Smrg class Element : Item
673181254a7Smrg {
674*b1e83836Smrg     Tag tag; // The start tag of the element
675*b1e83836Smrg     Item[] items; // The element's items
676*b1e83836Smrg     Text[] texts; // The element's text items
677*b1e83836Smrg     CData[] cdatas; // The element's CData items
678*b1e83836Smrg     Comment[] comments; // The element's comments
679*b1e83836Smrg     ProcessingInstruction[] pis; // The element's processing instructions
680*b1e83836Smrg     Element[] elements; // The element's child elements
681181254a7Smrg 
682*b1e83836Smrg     /*
683181254a7Smrg      * Constructs an Element given a name and a string to be used as a Text
684181254a7Smrg      * interior.
685181254a7Smrg      *
686181254a7Smrg      * Params:
687181254a7Smrg      *      name = the name of the element.
688181254a7Smrg      *      interior = (optional) the string interior.
689181254a7Smrg      *
690181254a7Smrg      * Example:
691181254a7Smrg      * -------------------------------------------------------
692181254a7Smrg      * auto element = new Element("title","Serenity")
693181254a7Smrg      *     // constructs the element <title>Serenity</title>
694181254a7Smrg      * -------------------------------------------------------
695181254a7Smrg      */
696181254a7Smrg     this(string name, string interior=null) @safe pure
697181254a7Smrg     {
698181254a7Smrg         this(new Tag(name));
699*b1e83836Smrg         if (interior.length != 0) opOpAssign!("~")(new Text(interior));
700181254a7Smrg     }
701181254a7Smrg 
702*b1e83836Smrg     /*
703181254a7Smrg      * Constructs an Element from a Tag.
704181254a7Smrg      *
705181254a7Smrg      * Params:
706181254a7Smrg      *      tag_ = the start or empty tag of the element.
707181254a7Smrg      */
this(const (Tag)tag_)708181254a7Smrg     this(const(Tag) tag_) @safe pure
709181254a7Smrg     {
710181254a7Smrg         this.tag = new Tag(tag_.name);
711181254a7Smrg         tag.type = TagType.EMPTY;
712181254a7Smrg         foreach (k,v;tag_.attr) tag.attr[k] = v;
713181254a7Smrg         tag.tagString = tag_.tagString;
714181254a7Smrg     }
715181254a7Smrg 
716*b1e83836Smrg     /*
717181254a7Smrg      * Append a text item to the interior of this element
718181254a7Smrg      *
719181254a7Smrg      * Params:
720181254a7Smrg      *      item = the item you wish to append.
721181254a7Smrg      *
722181254a7Smrg      * Example:
723181254a7Smrg      * --------------
724181254a7Smrg      * Element element;
725181254a7Smrg      * element ~= new Text("hello");
726181254a7Smrg      * --------------
727181254a7Smrg      */
728*b1e83836Smrg     void opOpAssign(string op)(Text item) @safe pure
729*b1e83836Smrg         if (op == "~")
730181254a7Smrg     {
731181254a7Smrg         texts ~= item;
732181254a7Smrg         appendItem(item);
733181254a7Smrg     }
734181254a7Smrg 
735*b1e83836Smrg     /*
736181254a7Smrg      * Append a CData item to the interior of this element
737181254a7Smrg      *
738181254a7Smrg      * Params:
739181254a7Smrg      *      item = the item you wish to append.
740181254a7Smrg      *
741181254a7Smrg      * Example:
742181254a7Smrg      * --------------
743181254a7Smrg      * Element element;
744181254a7Smrg      * element ~= new CData("hello");
745181254a7Smrg      * --------------
746181254a7Smrg      */
747*b1e83836Smrg     void opOpAssign(string op)(CData item) @safe pure
748*b1e83836Smrg         if (op == "~")
749181254a7Smrg     {
750181254a7Smrg         cdatas ~= item;
751181254a7Smrg         appendItem(item);
752181254a7Smrg     }
753181254a7Smrg 
754*b1e83836Smrg     /*
755181254a7Smrg      * Append a comment to the interior of this element
756181254a7Smrg      *
757181254a7Smrg      * Params:
758181254a7Smrg      *      item = the item you wish to append.
759181254a7Smrg      *
760181254a7Smrg      * Example:
761181254a7Smrg      * --------------
762181254a7Smrg      * Element element;
763181254a7Smrg      * element ~= new Comment("hello");
764181254a7Smrg      * --------------
765181254a7Smrg      */
766*b1e83836Smrg     void opOpAssign(string op)(Comment item) @safe pure
767*b1e83836Smrg         if (op == "~")
768181254a7Smrg     {
769181254a7Smrg         comments ~= item;
770181254a7Smrg         appendItem(item);
771181254a7Smrg     }
772181254a7Smrg 
773*b1e83836Smrg     /*
774181254a7Smrg      * Append a processing instruction to the interior of this element
775181254a7Smrg      *
776181254a7Smrg      * Params:
777181254a7Smrg      *      item = the item you wish to append.
778181254a7Smrg      *
779181254a7Smrg      * Example:
780181254a7Smrg      * --------------
781181254a7Smrg      * Element element;
782181254a7Smrg      * element ~= new ProcessingInstruction("hello");
783181254a7Smrg      * --------------
784181254a7Smrg      */
785*b1e83836Smrg     void opOpAssign(string op)(ProcessingInstruction item) @safe pure
786*b1e83836Smrg         if (op == "~")
787181254a7Smrg     {
788181254a7Smrg         pis ~= item;
789181254a7Smrg         appendItem(item);
790181254a7Smrg     }
791181254a7Smrg 
792*b1e83836Smrg     /*
793181254a7Smrg      * Append a complete element to the interior of this element
794181254a7Smrg      *
795181254a7Smrg      * Params:
796181254a7Smrg      *      item = the item you wish to append.
797181254a7Smrg      *
798181254a7Smrg      * Example:
799181254a7Smrg      * --------------
800181254a7Smrg      * Element element;
801181254a7Smrg      * Element other = new Element("br");
802181254a7Smrg      * element ~= other;
803181254a7Smrg      *    // appends element representing <br />
804181254a7Smrg      * --------------
805181254a7Smrg      */
806*b1e83836Smrg     void opOpAssign(string op)(Element item) @safe pure
807*b1e83836Smrg         if (op == "~")
808181254a7Smrg     {
809181254a7Smrg         elements ~= item;
810181254a7Smrg         appendItem(item);
811181254a7Smrg     }
812181254a7Smrg 
appendItem(Item item)813181254a7Smrg     private void appendItem(Item item) @safe pure
814181254a7Smrg     {
815181254a7Smrg         items ~= item;
816181254a7Smrg         if (tag.type == TagType.EMPTY && !item.isEmptyXML)
817181254a7Smrg             tag.type = TagType.START;
818181254a7Smrg     }
819181254a7Smrg 
parse(ElementParser xml)820181254a7Smrg     private void parse(ElementParser xml)
821181254a7Smrg     {
822*b1e83836Smrg         xml.onText = (string s) { opOpAssign!("~")(new Text(s)); };
823*b1e83836Smrg         xml.onCData = (string s) { opOpAssign!("~")(new CData(s)); };
824*b1e83836Smrg         xml.onComment = (string s) { opOpAssign!("~")(new Comment(s)); };
825*b1e83836Smrg         xml.onPI = (string s) { opOpAssign!("~")(new ProcessingInstruction(s)); };
826181254a7Smrg 
827181254a7Smrg         xml.onStartTag[null] = (ElementParser xml)
828181254a7Smrg         {
829181254a7Smrg             auto e = new Element(xml.tag);
830181254a7Smrg             e.parse(xml);
831*b1e83836Smrg             opOpAssign!("~")(e);
832181254a7Smrg         };
833181254a7Smrg 
834181254a7Smrg         xml.parse();
835181254a7Smrg     }
836181254a7Smrg 
837*b1e83836Smrg     /*
838181254a7Smrg      * Compares two Elements for equality
839181254a7Smrg      *
840181254a7Smrg      * Example:
841181254a7Smrg      * --------------
842181254a7Smrg      * Element e1,e2;
843181254a7Smrg      * if (e1 == e2) { }
844181254a7Smrg      * --------------
845181254a7Smrg      */
opEquals(scope const Object o)846181254a7Smrg     override bool opEquals(scope const Object o) const
847181254a7Smrg     {
848181254a7Smrg         const element = toType!(const Element)(o);
849181254a7Smrg         immutable len = items.length;
850181254a7Smrg         if (len != element.items.length) return false;
851181254a7Smrg         foreach (i; 0 .. len)
852181254a7Smrg         {
853181254a7Smrg             if (!items[i].opEquals(element.items[i])) return false;
854181254a7Smrg         }
855181254a7Smrg         return true;
856181254a7Smrg     }
857181254a7Smrg 
858*b1e83836Smrg     /*
859181254a7Smrg      * Compares two Elements
860181254a7Smrg      *
861181254a7Smrg      * You should rarely need to call this function. It exists so that Elements
862181254a7Smrg      * can be used as associative array keys.
863181254a7Smrg      *
864181254a7Smrg      * Example:
865181254a7Smrg      * --------------
866181254a7Smrg      * Element e1,e2;
867181254a7Smrg      * if (e1 < e2) { }
868181254a7Smrg      * --------------
869181254a7Smrg      */
opCmp(scope const Object o)870181254a7Smrg     override int opCmp(scope const Object o) @safe const
871181254a7Smrg     {
872181254a7Smrg         const element = toType!(const Element)(o);
873181254a7Smrg         for (uint i=0; ; ++i)
874181254a7Smrg         {
875181254a7Smrg             if (i == items.length && i == element.items.length) return 0;
876181254a7Smrg             if (i == items.length) return -1;
877181254a7Smrg             if (i == element.items.length) return 1;
878181254a7Smrg             if (!items[i].opEquals(element.items[i]))
879181254a7Smrg                 return items[i].opCmp(element.items[i]);
880181254a7Smrg         }
881181254a7Smrg     }
882181254a7Smrg 
883*b1e83836Smrg     /*
884181254a7Smrg      * Returns the hash of an Element
885181254a7Smrg      *
886181254a7Smrg      * You should rarely need to call this function. It exists so that Elements
887181254a7Smrg      * can be used as associative array keys.
888181254a7Smrg      */
toHash()889181254a7Smrg     override size_t toHash() scope const @safe
890181254a7Smrg     {
891181254a7Smrg         size_t hash = tag.toHash();
892181254a7Smrg         foreach (item;items) hash += item.toHash();
893181254a7Smrg         return hash;
894181254a7Smrg     }
895181254a7Smrg 
896181254a7Smrg     const
897181254a7Smrg     {
898*b1e83836Smrg         /*
899181254a7Smrg          * Returns the decoded interior of an element.
900181254a7Smrg          *
901181254a7Smrg          * The element is assumed to contain text <i>only</i>. So, for
902181254a7Smrg          * example, given XML such as "&lt;title&gt;Good &amp;amp;
903181254a7Smrg          * Bad&lt;/title&gt;", will return "Good &amp; Bad".
904181254a7Smrg          *
905181254a7Smrg          * Params:
906181254a7Smrg          *      mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
907181254a7Smrg          *
908181254a7Smrg          * Throws: DecodeException if decode fails
909181254a7Smrg          */
910181254a7Smrg         string text(DecodeMode mode=DecodeMode.LOOSE)
911181254a7Smrg         {
912181254a7Smrg             string buffer;
foreach(item;items)913181254a7Smrg             foreach (item;items)
914181254a7Smrg             {
915181254a7Smrg                 Text t = cast(Text) item;
916181254a7Smrg                 if (t is null) throw new DecodeException(item.toString());
917181254a7Smrg                 buffer ~= decode(t.toString(),mode);
918181254a7Smrg             }
919181254a7Smrg             return buffer;
920181254a7Smrg         }
921181254a7Smrg 
922*b1e83836Smrg         /*
923181254a7Smrg          * Returns an indented string representation of this item
924181254a7Smrg          *
925181254a7Smrg          * Params:
926181254a7Smrg          *      indent = (optional) number of spaces by which to indent this
927181254a7Smrg          *          element. Defaults to 2.
928181254a7Smrg          */
929181254a7Smrg         override string[] pretty(uint indent=2) scope
930181254a7Smrg         {
931181254a7Smrg             import std.algorithm.searching : count;
932181254a7Smrg             import std.string : rightJustify;
933181254a7Smrg 
934181254a7Smrg             if (isEmptyXML) return [ tag.toEmptyString() ];
935181254a7Smrg 
936181254a7Smrg             if (items.length == 1)
937181254a7Smrg             {
938181254a7Smrg                 auto t = cast(const(Text))(items[0]);
939181254a7Smrg                 if (t !is null)
940181254a7Smrg                 {
941181254a7Smrg                     return [tag.toStartString() ~ t.toString() ~ tag.toEndString()];
942181254a7Smrg                 }
943181254a7Smrg             }
944181254a7Smrg 
945181254a7Smrg             string[] a = [ tag.toStartString() ];
foreach(item;items)946181254a7Smrg             foreach (item;items)
947181254a7Smrg             {
948181254a7Smrg                 string[] b = item.pretty(indent);
949181254a7Smrg                 foreach (s;b)
950181254a7Smrg                 {
951181254a7Smrg                     a ~= rightJustify(s,count(s) + indent);
952181254a7Smrg                 }
953181254a7Smrg             }
954181254a7Smrg             a ~= tag.toEndString();
955181254a7Smrg             return a;
956181254a7Smrg         }
957181254a7Smrg 
958*b1e83836Smrg         /*
959181254a7Smrg          * Returns the string representation of an Element
960181254a7Smrg          *
961181254a7Smrg          * Example:
962181254a7Smrg          * --------------
963181254a7Smrg          * auto element = new Element("br");
964181254a7Smrg          * writefln(element.toString()); // writes "<br />"
965181254a7Smrg          * --------------
966181254a7Smrg          */
toString()967181254a7Smrg         override string toString() scope @safe
968181254a7Smrg         {
969181254a7Smrg             if (isEmptyXML) return tag.toEmptyString();
970181254a7Smrg 
971181254a7Smrg             string buffer = tag.toStartString();
972181254a7Smrg             foreach (item;items) { buffer ~= item.toString(); }
973181254a7Smrg             buffer ~= tag.toEndString();
974181254a7Smrg             return buffer;
975181254a7Smrg         }
976181254a7Smrg 
isEmptyXML()977181254a7Smrg         override @property @safe pure @nogc nothrow bool isEmptyXML() const scope { return items.length == 0; }
978181254a7Smrg     }
979181254a7Smrg }
980181254a7Smrg 
981*b1e83836Smrg /*
982181254a7Smrg  * Tag types.
983181254a7Smrg  *
984181254a7Smrg  * $(DDOC_ENUM_MEMBERS START) Used for start tags
985181254a7Smrg  * $(DDOC_ENUM_MEMBERS END) Used for end tags
986181254a7Smrg  * $(DDOC_ENUM_MEMBERS EMPTY) Used for empty tags
987181254a7Smrg  *
988181254a7Smrg  */
989181254a7Smrg enum TagType { START, END, EMPTY }
990181254a7Smrg 
991*b1e83836Smrg /*
992181254a7Smrg  * Class representing an XML tag.
993181254a7Smrg  *
994181254a7Smrg  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
995181254a7Smrg  *
996181254a7Smrg  * The class invariant guarantees
997181254a7Smrg  * <ul>
998181254a7Smrg  * <li> that $(B type) is a valid enum TagType value</li>
999181254a7Smrg  * <li> that $(B name) consists of valid characters</li>
1000181254a7Smrg  * <li> that each attribute name consists of valid characters</li>
1001181254a7Smrg  * </ul>
1002181254a7Smrg  */
1003181254a7Smrg class Tag
1004181254a7Smrg {
1005*b1e83836Smrg     TagType type = TagType.START;   // Type of tag
1006*b1e83836Smrg     string name;                    // Tag name
1007*b1e83836Smrg     string[string] attr;            // Associative array of attributes
1008181254a7Smrg     private string tagString;
1009181254a7Smrg 
invariant()1010181254a7Smrg     invariant()
1011181254a7Smrg     {
1012181254a7Smrg         string s;
1013181254a7Smrg         string t;
1014181254a7Smrg 
1015181254a7Smrg         assert(type == TagType.START
1016181254a7Smrg             || type == TagType.END
1017181254a7Smrg             || type == TagType.EMPTY);
1018181254a7Smrg 
1019181254a7Smrg         s = name;
1020181254a7Smrg         try { checkName(s,t); }
1021181254a7Smrg         catch (Err e) { assert(false,"Invalid tag name:" ~ e.toString()); }
1022181254a7Smrg 
1023181254a7Smrg         foreach (k,v;attr)
1024181254a7Smrg         {
1025181254a7Smrg             s = k;
1026181254a7Smrg             try { checkName(s,t); }
1027181254a7Smrg             catch (Err e)
1028*b1e83836Smrg                 { assert(false,"Invalid attribute name:" ~ e.toString()); }
1029181254a7Smrg         }
1030181254a7Smrg     }
1031181254a7Smrg 
1032*b1e83836Smrg     /*
1033181254a7Smrg      * Constructs an instance of Tag with a specified name and type
1034181254a7Smrg      *
1035181254a7Smrg      * The constructor does not initialize the attributes. To initialize the
1036181254a7Smrg      * attributes, you access the $(B attr) member variable.
1037181254a7Smrg      *
1038181254a7Smrg      * Params:
1039181254a7Smrg      *      name = the Tag's name
1040181254a7Smrg      *      type = (optional) the Tag's type. If omitted, defaults to
1041181254a7Smrg      *          TagType.START.
1042181254a7Smrg      *
1043181254a7Smrg      * Example:
1044181254a7Smrg      * --------------
1045181254a7Smrg      * auto tag = new Tag("img",Tag.EMPTY);
1046181254a7Smrg      * tag.attr["src"] = "http://example.com/example.jpg";
1047181254a7Smrg      * --------------
1048181254a7Smrg      */
1049181254a7Smrg     this(string name, TagType type=TagType.START) @safe pure
1050181254a7Smrg     {
1051181254a7Smrg         this.name = name;
1052181254a7Smrg         this.type = type;
1053181254a7Smrg     }
1054181254a7Smrg 
1055181254a7Smrg     /* Private constructor (so don't ddoc this!)
1056181254a7Smrg      *
1057181254a7Smrg      * Constructs a Tag by parsing the string representation, e.g. "<html>".
1058181254a7Smrg      *
1059181254a7Smrg      * The string is passed by reference, and is advanced over all characters
1060181254a7Smrg      * consumed.
1061181254a7Smrg      *
1062181254a7Smrg      * The second parameter is a dummy parameter only, required solely to
1063181254a7Smrg      * distinguish this constructor from the public one.
1064181254a7Smrg      */
this(ref string s,bool dummy)1065181254a7Smrg     private this(ref string s, bool dummy) @safe pure
1066181254a7Smrg     {
1067181254a7Smrg         import std.algorithm.searching : countUntil;
1068181254a7Smrg         import std.ascii : isWhite;
1069181254a7Smrg         import std.utf : byCodeUnit;
1070181254a7Smrg 
1071181254a7Smrg         tagString = s;
1072181254a7Smrg         try
1073181254a7Smrg         {
1074181254a7Smrg             reqc(s,'<');
1075181254a7Smrg             if (optc(s,'/')) type = TagType.END;
1076181254a7Smrg             ptrdiff_t i = s.byCodeUnit.countUntil(">", "/>", " ", "\t", "\v", "\r", "\n", "\f");
1077181254a7Smrg             name = s[0 .. i];
1078181254a7Smrg             s = s[i .. $];
1079181254a7Smrg 
1080181254a7Smrg             i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1081181254a7Smrg             s = s[i .. $];
1082181254a7Smrg 
1083181254a7Smrg             while (s.length > 0 && s[0] != '>' && s[0] != '/')
1084181254a7Smrg             {
1085181254a7Smrg                 i = s.byCodeUnit.countUntil("=", " ", "\t", "\v", "\r", "\n", "\f");
1086181254a7Smrg                 string key = s[0 .. i];
1087181254a7Smrg                 s = s[i .. $];
1088181254a7Smrg 
1089181254a7Smrg                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1090181254a7Smrg                 s = s[i .. $];
1091181254a7Smrg                 reqc(s,'=');
1092181254a7Smrg                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1093181254a7Smrg                 s = s[i .. $];
1094181254a7Smrg 
1095181254a7Smrg                 immutable char quote = requireOneOf(s,"'\"");
1096181254a7Smrg                 i = s.byCodeUnit.countUntil(quote);
1097181254a7Smrg                 string val = decode(s[0 .. i], DecodeMode.LOOSE);
1098181254a7Smrg                 s = s[i .. $];
1099181254a7Smrg                 reqc(s,quote);
1100181254a7Smrg 
1101181254a7Smrg                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1102181254a7Smrg                 s = s[i .. $];
1103181254a7Smrg                 attr[key] = val;
1104181254a7Smrg             }
1105181254a7Smrg             if (optc(s,'/'))
1106181254a7Smrg             {
1107181254a7Smrg                 if (type == TagType.END) throw new TagException("");
1108181254a7Smrg                 type = TagType.EMPTY;
1109181254a7Smrg             }
1110181254a7Smrg             reqc(s,'>');
1111181254a7Smrg             tagString.length = tagString.length - s.length;
1112181254a7Smrg         }
1113181254a7Smrg         catch (XMLException e)
1114181254a7Smrg         {
1115181254a7Smrg             tagString.length = tagString.length - s.length;
1116181254a7Smrg             throw new TagException(tagString);
1117181254a7Smrg         }
1118181254a7Smrg     }
1119181254a7Smrg 
1120181254a7Smrg     const
1121181254a7Smrg     {
1122*b1e83836Smrg         /*
1123181254a7Smrg          * Compares two Tags for equality
1124181254a7Smrg          *
1125181254a7Smrg          * You should rarely need to call this function. It exists so that Tags
1126181254a7Smrg          * can be used as associative array keys.
1127181254a7Smrg          *
1128181254a7Smrg          * Example:
1129181254a7Smrg          * --------------
1130181254a7Smrg          * Tag tag1,tag2
1131181254a7Smrg          * if (tag1 == tag2) { }
1132181254a7Smrg          * --------------
1133181254a7Smrg          */
opEquals(scope Object o)1134181254a7Smrg         override bool opEquals(scope Object o)
1135181254a7Smrg         {
1136181254a7Smrg             const tag = toType!(const Tag)(o);
1137181254a7Smrg             return
1138181254a7Smrg                 (name != tag.name) ? false : (
1139181254a7Smrg                 (attr != tag.attr) ? false : (
1140181254a7Smrg                 (type != tag.type) ? false : (
1141181254a7Smrg             true )));
1142181254a7Smrg         }
1143181254a7Smrg 
1144*b1e83836Smrg         /*
1145181254a7Smrg          * Compares two Tags
1146181254a7Smrg          *
1147181254a7Smrg          * Example:
1148181254a7Smrg          * --------------
1149181254a7Smrg          * Tag tag1,tag2
1150181254a7Smrg          * if (tag1 < tag2) { }
1151181254a7Smrg          * --------------
1152181254a7Smrg          */
opCmp(Object o)1153181254a7Smrg         override int opCmp(Object o)
1154181254a7Smrg         {
1155181254a7Smrg             const tag = toType!(const Tag)(o);
1156181254a7Smrg             // Note that attr is an AA, so the comparison is nonsensical (bug 10381)
1157181254a7Smrg             return
1158181254a7Smrg                 ((name != tag.name) ? ( name < tag.name ? -1 : 1 ) :
1159181254a7Smrg                 ((attr != tag.attr) ? ( cast(void *) attr < cast(void*) tag.attr ? -1 : 1 ) :
1160181254a7Smrg                 ((type != tag.type) ? ( type < tag.type ? -1 : 1 ) :
1161181254a7Smrg             0 )));
1162181254a7Smrg         }
1163181254a7Smrg 
1164*b1e83836Smrg         /*
1165181254a7Smrg          * Returns the hash of a Tag
1166181254a7Smrg          *
1167181254a7Smrg          * You should rarely need to call this function. It exists so that Tags
1168181254a7Smrg          * can be used as associative array keys.
1169181254a7Smrg          */
toHash()1170181254a7Smrg         override size_t toHash()
1171181254a7Smrg         {
1172*b1e83836Smrg             return .hashOf(name);
1173181254a7Smrg         }
1174181254a7Smrg 
1175*b1e83836Smrg         /*
1176181254a7Smrg          * Returns the string representation of a Tag
1177181254a7Smrg          *
1178181254a7Smrg          * Example:
1179181254a7Smrg          * --------------
1180181254a7Smrg          * auto tag = new Tag("book",TagType.START);
1181181254a7Smrg          * writefln(tag.toString()); // writes "<book>"
1182181254a7Smrg          * --------------
1183181254a7Smrg          */
toString()1184181254a7Smrg         override string toString() @safe
1185181254a7Smrg         {
1186181254a7Smrg             if (isEmpty) return toEmptyString();
1187181254a7Smrg             return (isEnd) ? toEndString() : toStartString();
1188181254a7Smrg         }
1189181254a7Smrg 
1190181254a7Smrg         private
1191181254a7Smrg         {
toNonEndString()1192181254a7Smrg             string toNonEndString() @safe
1193181254a7Smrg             {
1194181254a7Smrg                 import std.format : format;
1195181254a7Smrg 
1196181254a7Smrg                 string s = "<" ~ name;
1197181254a7Smrg                 foreach (key,val;attr)
1198181254a7Smrg                     s ~= format(" %s=\"%s\"",key,encode(val));
1199181254a7Smrg                 return s;
1200181254a7Smrg             }
1201181254a7Smrg 
toStartString()1202181254a7Smrg             string toStartString() @safe { return toNonEndString() ~ ">"; }
1203181254a7Smrg 
toEndString()1204181254a7Smrg             string toEndString() @safe { return "</" ~ name ~ ">"; }
1205181254a7Smrg 
toEmptyString()1206181254a7Smrg             string toEmptyString() @safe { return toNonEndString() ~ " />"; }
1207181254a7Smrg         }
1208181254a7Smrg 
1209*b1e83836Smrg         /*
1210181254a7Smrg          * Returns true if the Tag is a start tag
1211181254a7Smrg          *
1212181254a7Smrg          * Example:
1213181254a7Smrg          * --------------
1214181254a7Smrg          * if (tag.isStart) { }
1215181254a7Smrg          * --------------
1216181254a7Smrg          */
isStart()1217181254a7Smrg         @property bool isStart() @safe @nogc pure nothrow { return type == TagType.START; }
1218181254a7Smrg 
1219*b1e83836Smrg         /*
1220181254a7Smrg          * Returns true if the Tag is an end tag
1221181254a7Smrg          *
1222181254a7Smrg          * Example:
1223181254a7Smrg          * --------------
1224181254a7Smrg          * if (tag.isEnd) { }
1225181254a7Smrg          * --------------
1226181254a7Smrg          */
isEnd()1227181254a7Smrg         @property bool isEnd() @safe @nogc pure nothrow { return type == TagType.END;   }
1228181254a7Smrg 
1229*b1e83836Smrg         /*
1230181254a7Smrg          * Returns true if the Tag is an empty tag
1231181254a7Smrg          *
1232181254a7Smrg          * Example:
1233181254a7Smrg          * --------------
1234181254a7Smrg          * if (tag.isEmpty) { }
1235181254a7Smrg          * --------------
1236181254a7Smrg          */
isEmpty()1237181254a7Smrg         @property bool isEmpty() @safe @nogc pure nothrow { return type == TagType.EMPTY; }
1238181254a7Smrg     }
1239181254a7Smrg }
1240181254a7Smrg 
1241*b1e83836Smrg /*
1242181254a7Smrg  * Class representing a comment
1243181254a7Smrg  */
1244181254a7Smrg class Comment : Item
1245181254a7Smrg {
1246181254a7Smrg     private string content;
1247181254a7Smrg 
1248*b1e83836Smrg     /*
1249181254a7Smrg      * Construct a comment
1250181254a7Smrg      *
1251181254a7Smrg      * Params:
1252181254a7Smrg      *      content = the body of the comment
1253181254a7Smrg      *
1254181254a7Smrg      * Throws: CommentException if the comment body is illegal (contains "--"
1255181254a7Smrg      * or exactly equals "-")
1256181254a7Smrg      *
1257181254a7Smrg      * Example:
1258181254a7Smrg      * --------------
1259181254a7Smrg      * auto item = new Comment("This is a comment");
1260181254a7Smrg      *    // constructs <!--This is a comment-->
1261181254a7Smrg      * --------------
1262181254a7Smrg      */
this(string content)1263181254a7Smrg     this(string content) @safe pure
1264181254a7Smrg     {
1265181254a7Smrg         import std.string : indexOf;
1266181254a7Smrg 
1267181254a7Smrg         if (content == "-" || content.indexOf("--") != -1)
1268181254a7Smrg             throw new CommentException(content);
1269181254a7Smrg         this.content = content;
1270181254a7Smrg     }
1271181254a7Smrg 
1272*b1e83836Smrg     /*
1273181254a7Smrg      * Compares two comments for equality
1274181254a7Smrg      *
1275181254a7Smrg      * Example:
1276181254a7Smrg      * --------------
1277181254a7Smrg      * Comment item1,item2;
1278181254a7Smrg      * if (item1 == item2) { }
1279181254a7Smrg      * --------------
1280181254a7Smrg      */
opEquals(scope const Object o)1281181254a7Smrg     override bool opEquals(scope const Object o) const
1282181254a7Smrg     {
1283181254a7Smrg         const item = toType!(const Item)(o);
1284181254a7Smrg         const t = cast(const Comment) item;
1285181254a7Smrg         return t !is null && content == t.content;
1286181254a7Smrg     }
1287181254a7Smrg 
1288*b1e83836Smrg     /*
1289181254a7Smrg      * Compares two comments
1290181254a7Smrg      *
1291181254a7Smrg      * You should rarely need to call this function. It exists so that Comments
1292181254a7Smrg      * can be used as associative array keys.
1293181254a7Smrg      *
1294181254a7Smrg      * Example:
1295181254a7Smrg      * --------------
1296181254a7Smrg      * Comment item1,item2;
1297181254a7Smrg      * if (item1 < item2) { }
1298181254a7Smrg      * --------------
1299181254a7Smrg      */
opCmp(scope const Object o)1300181254a7Smrg     override int opCmp(scope const Object o) scope const
1301181254a7Smrg     {
1302181254a7Smrg         const item = toType!(const Item)(o);
1303181254a7Smrg         const t = cast(const Comment) item;
1304181254a7Smrg         return t !is null && (content != t.content
1305181254a7Smrg             ? (content < t.content ? -1 : 1 ) : 0 );
1306181254a7Smrg     }
1307181254a7Smrg 
1308*b1e83836Smrg     /*
1309181254a7Smrg      * Returns the hash of a Comment
1310181254a7Smrg      *
1311181254a7Smrg      * You should rarely need to call this function. It exists so that Comments
1312181254a7Smrg      * can be used as associative array keys.
1313181254a7Smrg      */
toHash()1314181254a7Smrg     override size_t toHash() scope const nothrow { return hash(content); }
1315181254a7Smrg 
1316*b1e83836Smrg     /*
1317181254a7Smrg      * Returns a string representation of this comment
1318181254a7Smrg      */
toString()1319181254a7Smrg     override string toString() scope const @safe pure nothrow { return "<!--" ~ content ~ "-->"; }
1320181254a7Smrg 
isEmptyXML()1321*b1e83836Smrg     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } // Returns false always
1322181254a7Smrg }
1323181254a7Smrg 
1324*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=16241
1325*b1e83836Smrg @safe unittest
1326181254a7Smrg {
1327181254a7Smrg     import std.exception : assertThrown;
1328181254a7Smrg     auto c = new Comment("==");
1329181254a7Smrg     assert(c.content == "==");
1330181254a7Smrg     assertThrown!CommentException(new Comment("--"));
1331181254a7Smrg }
1332181254a7Smrg 
1333*b1e83836Smrg /*
1334181254a7Smrg  * Class representing a Character Data section
1335181254a7Smrg  */
1336181254a7Smrg class CData : Item
1337181254a7Smrg {
1338181254a7Smrg     private string content;
1339181254a7Smrg 
1340*b1e83836Smrg     /*
1341181254a7Smrg      * Construct a character data section
1342181254a7Smrg      *
1343181254a7Smrg      * Params:
1344181254a7Smrg      *      content = the body of the character data segment
1345181254a7Smrg      *
1346181254a7Smrg      * Throws: CDataException if the segment body is illegal (contains "]]>")
1347181254a7Smrg      *
1348181254a7Smrg      * Example:
1349181254a7Smrg      * --------------
1350181254a7Smrg      * auto item = new CData("<b>hello</b>");
1351181254a7Smrg      *    // constructs <![CDATA[<b>hello</b>]]>
1352181254a7Smrg      * --------------
1353181254a7Smrg      */
this(string content)1354181254a7Smrg     this(string content) @safe pure
1355181254a7Smrg     {
1356181254a7Smrg         import std.string : indexOf;
1357181254a7Smrg         if (content.indexOf("]]>") != -1) throw new CDataException(content);
1358181254a7Smrg         this.content = content;
1359181254a7Smrg     }
1360181254a7Smrg 
1361*b1e83836Smrg     /*
1362181254a7Smrg      * Compares two CDatas for equality
1363181254a7Smrg      *
1364181254a7Smrg      * Example:
1365181254a7Smrg      * --------------
1366181254a7Smrg      * CData item1,item2;
1367181254a7Smrg      * if (item1 == item2) { }
1368181254a7Smrg      * --------------
1369181254a7Smrg      */
opEquals(scope const Object o)1370181254a7Smrg     override bool opEquals(scope const Object o) const
1371181254a7Smrg     {
1372181254a7Smrg         const item = toType!(const Item)(o);
1373181254a7Smrg         const t = cast(const CData) item;
1374181254a7Smrg         return t !is null && content == t.content;
1375181254a7Smrg     }
1376181254a7Smrg 
1377*b1e83836Smrg     /*
1378181254a7Smrg      * Compares two CDatas
1379181254a7Smrg      *
1380181254a7Smrg      * You should rarely need to call this function. It exists so that CDatas
1381181254a7Smrg      * can be used as associative array keys.
1382181254a7Smrg      *
1383181254a7Smrg      * Example:
1384181254a7Smrg      * --------------
1385181254a7Smrg      * CData item1,item2;
1386181254a7Smrg      * if (item1 < item2) { }
1387181254a7Smrg      * --------------
1388181254a7Smrg      */
opCmp(scope const Object o)1389181254a7Smrg     override int opCmp(scope const Object o) scope const
1390181254a7Smrg     {
1391181254a7Smrg         const item = toType!(const Item)(o);
1392181254a7Smrg         const t = cast(const CData) item;
1393181254a7Smrg         return t !is null && (content != t.content
1394181254a7Smrg             ? (content < t.content ? -1 : 1 ) : 0 );
1395181254a7Smrg     }
1396181254a7Smrg 
1397*b1e83836Smrg     /*
1398181254a7Smrg      * Returns the hash of a CData
1399181254a7Smrg      *
1400181254a7Smrg      * You should rarely need to call this function. It exists so that CDatas
1401181254a7Smrg      * can be used as associative array keys.
1402181254a7Smrg      */
toHash()1403181254a7Smrg     override size_t toHash() scope const nothrow { return hash(content); }
1404181254a7Smrg 
1405*b1e83836Smrg     /*
1406181254a7Smrg      * Returns a string representation of this CData section
1407181254a7Smrg      */
toString()1408181254a7Smrg     override string toString() scope const @safe pure nothrow { return cdata ~ content ~ "]]>"; }
1409181254a7Smrg 
isEmptyXML()1410*b1e83836Smrg     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } // Returns false always
1411181254a7Smrg }
1412181254a7Smrg 
1413*b1e83836Smrg /*
1414181254a7Smrg  * Class representing a text (aka Parsed Character Data) section
1415181254a7Smrg  */
1416181254a7Smrg class Text : Item
1417181254a7Smrg {
1418181254a7Smrg     private string content;
1419181254a7Smrg 
1420*b1e83836Smrg     /*
1421181254a7Smrg      * Construct a text (aka PCData) section
1422181254a7Smrg      *
1423181254a7Smrg      * Params:
1424181254a7Smrg      *      content = the text. This function encodes the text before
1425181254a7Smrg      *      insertion, so it is safe to insert any text
1426181254a7Smrg      *
1427181254a7Smrg      * Example:
1428181254a7Smrg      * --------------
1429181254a7Smrg      * auto Text = new CData("a < b");
1430181254a7Smrg      *    // constructs a &lt; b
1431181254a7Smrg      * --------------
1432181254a7Smrg      */
this(string content)1433181254a7Smrg     this(string content) @safe pure
1434181254a7Smrg     {
1435181254a7Smrg         this.content = encode(content);
1436181254a7Smrg     }
1437181254a7Smrg 
1438*b1e83836Smrg     /*
1439181254a7Smrg      * Compares two text sections for equality
1440181254a7Smrg      *
1441181254a7Smrg      * Example:
1442181254a7Smrg      * --------------
1443181254a7Smrg      * Text item1,item2;
1444181254a7Smrg      * if (item1 == item2) { }
1445181254a7Smrg      * --------------
1446181254a7Smrg      */
opEquals(scope const Object o)1447181254a7Smrg     override bool opEquals(scope const Object o) const
1448181254a7Smrg     {
1449181254a7Smrg         const item = toType!(const Item)(o);
1450181254a7Smrg         const t = cast(const Text) item;
1451181254a7Smrg         return t !is null && content == t.content;
1452181254a7Smrg     }
1453181254a7Smrg 
1454*b1e83836Smrg     /*
1455181254a7Smrg      * Compares two text sections
1456181254a7Smrg      *
1457181254a7Smrg      * You should rarely need to call this function. It exists so that Texts
1458181254a7Smrg      * can be used as associative array keys.
1459181254a7Smrg      *
1460181254a7Smrg      * Example:
1461181254a7Smrg      * --------------
1462181254a7Smrg      * Text item1,item2;
1463181254a7Smrg      * if (item1 < item2) { }
1464181254a7Smrg      * --------------
1465181254a7Smrg      */
opCmp(scope const Object o)1466181254a7Smrg     override int opCmp(scope const Object o) scope const
1467181254a7Smrg     {
1468181254a7Smrg         const item = toType!(const Item)(o);
1469181254a7Smrg         const t = cast(const Text) item;
1470181254a7Smrg         return t !is null
1471181254a7Smrg             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1472181254a7Smrg     }
1473181254a7Smrg 
1474*b1e83836Smrg     /*
1475181254a7Smrg      * Returns the hash of a text section
1476181254a7Smrg      *
1477181254a7Smrg      * You should rarely need to call this function. It exists so that Texts
1478181254a7Smrg      * can be used as associative array keys.
1479181254a7Smrg      */
toHash()1480181254a7Smrg     override size_t toHash() scope const nothrow { return hash(content); }
1481181254a7Smrg 
1482*b1e83836Smrg     /*
1483181254a7Smrg      * Returns a string representation of this Text section
1484181254a7Smrg      */
toString()1485181254a7Smrg     override string toString() scope const @safe @nogc pure nothrow { return content; }
1486181254a7Smrg 
1487*b1e83836Smrg     /*
1488181254a7Smrg      * Returns true if the content is the empty string
1489181254a7Smrg      */
isEmptyXML()1490181254a7Smrg     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return content.length == 0; }
1491181254a7Smrg }
1492181254a7Smrg 
1493*b1e83836Smrg /*
1494181254a7Smrg  * Class representing an XML Instruction section
1495181254a7Smrg  */
1496181254a7Smrg class XMLInstruction : Item
1497181254a7Smrg {
1498181254a7Smrg     private string content;
1499181254a7Smrg 
1500*b1e83836Smrg     /*
1501181254a7Smrg      * Construct an XML Instruction section
1502181254a7Smrg      *
1503181254a7Smrg      * Params:
1504181254a7Smrg      *      content = the body of the instruction segment
1505181254a7Smrg      *
1506181254a7Smrg      * Throws: XIException if the segment body is illegal (contains ">")
1507181254a7Smrg      *
1508181254a7Smrg      * Example:
1509181254a7Smrg      * --------------
1510181254a7Smrg      * auto item = new XMLInstruction("ATTLIST");
1511181254a7Smrg      *    // constructs <!ATTLIST>
1512181254a7Smrg      * --------------
1513181254a7Smrg      */
this(string content)1514181254a7Smrg     this(string content) @safe pure
1515181254a7Smrg     {
1516181254a7Smrg         import std.string : indexOf;
1517181254a7Smrg         if (content.indexOf(">") != -1) throw new XIException(content);
1518181254a7Smrg         this.content = content;
1519181254a7Smrg     }
1520181254a7Smrg 
1521*b1e83836Smrg     /*
1522181254a7Smrg      * Compares two XML instructions for equality
1523181254a7Smrg      *
1524181254a7Smrg      * Example:
1525181254a7Smrg      * --------------
1526181254a7Smrg      * XMLInstruction item1,item2;
1527181254a7Smrg      * if (item1 == item2) { }
1528181254a7Smrg      * --------------
1529181254a7Smrg      */
opEquals(scope const Object o)1530181254a7Smrg     override bool opEquals(scope const Object o) const
1531181254a7Smrg     {
1532181254a7Smrg         const item = toType!(const Item)(o);
1533181254a7Smrg         const t = cast(const XMLInstruction) item;
1534181254a7Smrg         return t !is null && content == t.content;
1535181254a7Smrg     }
1536181254a7Smrg 
1537*b1e83836Smrg     /*
1538181254a7Smrg      * Compares two XML instructions
1539181254a7Smrg      *
1540181254a7Smrg      * You should rarely need to call this function. It exists so that
1541181254a7Smrg      * XmlInstructions can be used as associative array keys.
1542181254a7Smrg      *
1543181254a7Smrg      * Example:
1544181254a7Smrg      * --------------
1545181254a7Smrg      * XMLInstruction item1,item2;
1546181254a7Smrg      * if (item1 < item2) { }
1547181254a7Smrg      * --------------
1548181254a7Smrg      */
opCmp(scope const Object o)1549181254a7Smrg     override int opCmp(scope const Object o) scope const
1550181254a7Smrg     {
1551181254a7Smrg         const item = toType!(const Item)(o);
1552181254a7Smrg         const t = cast(const XMLInstruction) item;
1553181254a7Smrg         return t !is null
1554181254a7Smrg             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1555181254a7Smrg     }
1556181254a7Smrg 
1557*b1e83836Smrg     /*
1558181254a7Smrg      * Returns the hash of an XMLInstruction
1559181254a7Smrg      *
1560181254a7Smrg      * You should rarely need to call this function. It exists so that
1561181254a7Smrg      * XmlInstructions can be used as associative array keys.
1562181254a7Smrg      */
toHash()1563181254a7Smrg     override size_t toHash() scope const nothrow { return hash(content); }
1564181254a7Smrg 
1565*b1e83836Smrg     /*
1566181254a7Smrg      * Returns a string representation of this XmlInstruction
1567181254a7Smrg      */
toString()1568181254a7Smrg     override string toString() scope const @safe pure nothrow { return "<!" ~ content ~ ">"; }
1569181254a7Smrg 
isEmptyXML()1570*b1e83836Smrg     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } // Returns false always
1571181254a7Smrg }
1572181254a7Smrg 
1573*b1e83836Smrg /*
1574181254a7Smrg  * Class representing a Processing Instruction section
1575181254a7Smrg  */
1576181254a7Smrg class ProcessingInstruction : Item
1577181254a7Smrg {
1578181254a7Smrg     private string content;
1579181254a7Smrg 
1580*b1e83836Smrg     /*
1581181254a7Smrg      * Construct a Processing Instruction section
1582181254a7Smrg      *
1583181254a7Smrg      * Params:
1584181254a7Smrg      *      content = the body of the instruction segment
1585181254a7Smrg      *
1586181254a7Smrg      * Throws: PIException if the segment body is illegal (contains "?>")
1587181254a7Smrg      *
1588181254a7Smrg      * Example:
1589181254a7Smrg      * --------------
1590181254a7Smrg      * auto item = new ProcessingInstruction("php");
1591181254a7Smrg      *    // constructs <?php?>
1592181254a7Smrg      * --------------
1593181254a7Smrg      */
this(string content)1594181254a7Smrg     this(string content) @safe pure
1595181254a7Smrg     {
1596181254a7Smrg         import std.string : indexOf;
1597181254a7Smrg         if (content.indexOf("?>") != -1) throw new PIException(content);
1598181254a7Smrg         this.content = content;
1599181254a7Smrg     }
1600181254a7Smrg 
1601*b1e83836Smrg     /*
1602181254a7Smrg      * Compares two processing instructions for equality
1603181254a7Smrg      *
1604181254a7Smrg      * Example:
1605181254a7Smrg      * --------------
1606181254a7Smrg      * ProcessingInstruction item1,item2;
1607181254a7Smrg      * if (item1 == item2) { }
1608181254a7Smrg      * --------------
1609181254a7Smrg      */
opEquals(scope const Object o)1610181254a7Smrg     override bool opEquals(scope const Object o) const
1611181254a7Smrg     {
1612181254a7Smrg         const item = toType!(const Item)(o);
1613181254a7Smrg         const t = cast(const ProcessingInstruction) item;
1614181254a7Smrg         return t !is null && content == t.content;
1615181254a7Smrg     }
1616181254a7Smrg 
1617*b1e83836Smrg     /*
1618181254a7Smrg      * Compares two processing instructions
1619181254a7Smrg      *
1620181254a7Smrg      * You should rarely need to call this function. It exists so that
1621181254a7Smrg      * ProcessingInstructions can be used as associative array keys.
1622181254a7Smrg      *
1623181254a7Smrg      * Example:
1624181254a7Smrg      * --------------
1625181254a7Smrg      * ProcessingInstruction item1,item2;
1626181254a7Smrg      * if (item1 < item2) { }
1627181254a7Smrg      * --------------
1628181254a7Smrg      */
opCmp(scope const Object o)1629181254a7Smrg     override int opCmp(scope const Object o) scope const
1630181254a7Smrg     {
1631181254a7Smrg         const item = toType!(const Item)(o);
1632181254a7Smrg         const t = cast(const ProcessingInstruction) item;
1633181254a7Smrg         return t !is null
1634181254a7Smrg             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1635181254a7Smrg     }
1636181254a7Smrg 
1637*b1e83836Smrg     /*
1638181254a7Smrg      * Returns the hash of a ProcessingInstruction
1639181254a7Smrg      *
1640181254a7Smrg      * You should rarely need to call this function. It exists so that
1641181254a7Smrg      * ProcessingInstructions can be used as associative array keys.
1642181254a7Smrg      */
toHash()1643181254a7Smrg     override size_t toHash() scope const nothrow { return hash(content); }
1644181254a7Smrg 
1645*b1e83836Smrg     /*
1646181254a7Smrg      * Returns a string representation of this ProcessingInstruction
1647181254a7Smrg      */
toString()1648181254a7Smrg     override string toString() scope const @safe pure nothrow { return "<?" ~ content ~ "?>"; }
1649181254a7Smrg 
isEmptyXML()1650*b1e83836Smrg     override @property @safe @nogc pure nothrow bool isEmptyXML() scope const { return false; } // Returns false always
1651181254a7Smrg }
1652181254a7Smrg 
1653*b1e83836Smrg /*
1654181254a7Smrg  * Abstract base class for XML items
1655181254a7Smrg  */
1656181254a7Smrg abstract class Item
1657181254a7Smrg {
1658*b1e83836Smrg     // Compares with another Item of same type for equality
1659181254a7Smrg     abstract override bool opEquals(scope const Object o) @safe const;
1660181254a7Smrg 
1661*b1e83836Smrg     // Compares with another Item of same type
1662181254a7Smrg     abstract override int opCmp(scope const Object o) @safe const;
1663181254a7Smrg 
1664*b1e83836Smrg     // Returns the hash of this item
1665181254a7Smrg     abstract override size_t toHash() @safe scope const;
1666181254a7Smrg 
1667*b1e83836Smrg     // Returns a string representation of this item
1668181254a7Smrg     abstract override string toString() @safe scope const;
1669181254a7Smrg 
1670*b1e83836Smrg     /*
1671181254a7Smrg      * Returns an indented string representation of this item
1672181254a7Smrg      *
1673181254a7Smrg      * Params:
1674181254a7Smrg      *      indent = number of spaces by which to indent child elements
1675181254a7Smrg      */
pretty(uint indent)1676181254a7Smrg     string[] pretty(uint indent) @safe scope const
1677181254a7Smrg     {
1678181254a7Smrg         import std.string : strip;
1679181254a7Smrg         string s = strip(toString());
1680181254a7Smrg         return s.length == 0 ? [] : [ s ];
1681181254a7Smrg     }
1682181254a7Smrg 
1683*b1e83836Smrg     // Returns true if the item represents empty XML text
1684181254a7Smrg     abstract @property @safe @nogc pure nothrow bool isEmptyXML() scope const;
1685181254a7Smrg }
1686181254a7Smrg 
1687*b1e83836Smrg /*
1688181254a7Smrg  * Class for parsing an XML Document.
1689181254a7Smrg  *
1690181254a7Smrg  * This is a subclass of ElementParser. Most of the useful functions are
1691181254a7Smrg  * documented there.
1692181254a7Smrg  *
1693181254a7Smrg  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1694181254a7Smrg  *
1695181254a7Smrg  * Bugs:
1696181254a7Smrg  *      Currently only supports UTF documents.
1697181254a7Smrg  *
1698181254a7Smrg  *      If there is an encoding attribute in the prolog, it is ignored.
1699181254a7Smrg  *
1700181254a7Smrg  */
1701181254a7Smrg class DocumentParser : ElementParser
1702181254a7Smrg {
1703181254a7Smrg     string xmlText;
1704181254a7Smrg 
1705*b1e83836Smrg     /*
1706181254a7Smrg      * Constructs a DocumentParser.
1707181254a7Smrg      *
1708181254a7Smrg      * The input to this function MUST be valid XML.
1709181254a7Smrg      * This is enforced by the function's in contract.
1710181254a7Smrg      *
1711181254a7Smrg      * Params:
1712181254a7Smrg      *      xmlText_ = the entire XML document as text
1713181254a7Smrg      *
1714181254a7Smrg      */
this(string xmlText_)1715181254a7Smrg     this(string xmlText_)
1716181254a7Smrg     in
1717181254a7Smrg     {
1718181254a7Smrg         assert(xmlText_.length != 0);
1719181254a7Smrg         try
1720181254a7Smrg         {
1721181254a7Smrg             // Confirm that the input is valid XML
1722181254a7Smrg             check(xmlText_);
1723181254a7Smrg         }
1724181254a7Smrg         catch (CheckException e)
1725181254a7Smrg         {
1726181254a7Smrg             // And if it's not, tell the user why not
1727181254a7Smrg             assert(false, "\n" ~ e.toString());
1728181254a7Smrg         }
1729181254a7Smrg     }
1730*b1e83836Smrg     do
1731181254a7Smrg     {
1732181254a7Smrg         xmlText = xmlText_;
1733181254a7Smrg         s = &xmlText;
1734181254a7Smrg         super();    // Initialize everything
1735181254a7Smrg         parse();    // Parse through the root tag (but not beyond)
1736181254a7Smrg     }
1737181254a7Smrg }
1738181254a7Smrg 
1739181254a7Smrg @system unittest
1740181254a7Smrg {
1741181254a7Smrg     auto doc = new Document("<root><child><grandchild/></child></root>");
1742181254a7Smrg     assert(doc.elements.length == 1);
1743181254a7Smrg     assert(doc.elements[0].tag.name == "child");
1744181254a7Smrg     assert(doc.items == doc.elements);
1745181254a7Smrg }
1746181254a7Smrg 
1747*b1e83836Smrg /*
1748181254a7Smrg  * Class for parsing an XML element.
1749181254a7Smrg  *
1750181254a7Smrg  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1751181254a7Smrg  *
1752181254a7Smrg  * Note that you cannot construct instances of this class directly. You can
1753181254a7Smrg  * construct a DocumentParser (which is a subclass of ElementParser), but
1754181254a7Smrg  * otherwise, Instances of ElementParser will be created for you by the
1755181254a7Smrg  * library, and passed your way via onStartTag handlers.
1756181254a7Smrg  *
1757181254a7Smrg  */
1758181254a7Smrg class ElementParser
1759181254a7Smrg {
1760181254a7Smrg     alias Handler = void delegate(string);
1761181254a7Smrg     alias ElementHandler = void delegate(in Element element);
1762181254a7Smrg     alias ParserHandler = void delegate(ElementParser parser);
1763181254a7Smrg 
1764181254a7Smrg     private
1765181254a7Smrg     {
1766181254a7Smrg         Tag tag_;
1767181254a7Smrg         string elementStart;
1768181254a7Smrg         string* s;
1769181254a7Smrg 
1770181254a7Smrg         Handler commentHandler = null;
1771181254a7Smrg         Handler cdataHandler = null;
1772181254a7Smrg         Handler xiHandler = null;
1773181254a7Smrg         Handler piHandler = null;
1774181254a7Smrg         Handler rawTextHandler = null;
1775181254a7Smrg         Handler textHandler = null;
1776181254a7Smrg 
1777181254a7Smrg         // Private constructor for start tags
this(ElementParser parent)1778181254a7Smrg         this(ElementParser parent) @safe @nogc pure nothrow
1779181254a7Smrg         {
1780181254a7Smrg             s = parent.s;
1781181254a7Smrg             this();
1782181254a7Smrg             tag_ = parent.tag_;
1783181254a7Smrg         }
1784181254a7Smrg 
1785181254a7Smrg         // Private constructor for empty tags
this(Tag tag,string * t)1786181254a7Smrg         this(Tag tag, string* t) @safe @nogc pure nothrow
1787181254a7Smrg         {
1788181254a7Smrg             s = t;
1789181254a7Smrg             this();
1790181254a7Smrg             tag_ = tag;
1791181254a7Smrg         }
1792181254a7Smrg     }
1793181254a7Smrg 
1794*b1e83836Smrg     /*
1795181254a7Smrg      * The Tag at the start of the element being parsed. You can read this to
1796181254a7Smrg      * determine the tag's name and attributes.
1797181254a7Smrg      */
tag()1798181254a7Smrg     @property @safe @nogc pure nothrow const(Tag) tag() const { return tag_; }
1799181254a7Smrg 
1800*b1e83836Smrg     /*
1801181254a7Smrg      * Register a handler which will be called whenever a start tag is
1802181254a7Smrg      * encountered which matches the specified name. You can also pass null as
1803181254a7Smrg      * the name, in which case the handler will be called for any unmatched
1804181254a7Smrg      * start tag.
1805181254a7Smrg      *
1806181254a7Smrg      * Example:
1807181254a7Smrg      * --------------
1808181254a7Smrg      * // Call this function whenever a <podcast> start tag is encountered
1809181254a7Smrg      * onStartTag["podcast"] = (ElementParser xml)
1810181254a7Smrg      * {
1811181254a7Smrg      *     // Your code here
1812181254a7Smrg      *     //
1813181254a7Smrg      *     // This is a a closure, so code here may reference
1814181254a7Smrg      *     // variables which are outside of this scope
1815181254a7Smrg      * };
1816181254a7Smrg      *
1817181254a7Smrg      * // call myEpisodeStartHandler (defined elsewhere) whenever an <episode>
1818181254a7Smrg      * // start tag is encountered
1819181254a7Smrg      * onStartTag["episode"] = &myEpisodeStartHandler;
1820181254a7Smrg      *
1821181254a7Smrg      * // call delegate dg for all other start tags
1822181254a7Smrg      * onStartTag[null] = dg;
1823181254a7Smrg      * --------------
1824181254a7Smrg      *
1825181254a7Smrg      * This library will supply your function with a new instance of
1826181254a7Smrg      * ElementHandler, which may be used to parse inside the element whose
1827181254a7Smrg      * start tag was just found, or to identify the tag attributes of the
1828181254a7Smrg      * element, etc.
1829181254a7Smrg      *
1830181254a7Smrg      * Note that your function will be called for both start tags and empty
1831181254a7Smrg      * tags. That is, we make no distinction between &lt;br&gt;&lt;/br&gt;
1832181254a7Smrg      * and &lt;br/&gt;.
1833181254a7Smrg      */
1834181254a7Smrg     ParserHandler[string] onStartTag;
1835181254a7Smrg 
1836*b1e83836Smrg     /*
1837181254a7Smrg      * Register a handler which will be called whenever an end tag is
1838181254a7Smrg      * encountered which matches the specified name. You can also pass null as
1839181254a7Smrg      * the name, in which case the handler will be called for any unmatched
1840181254a7Smrg      * end tag.
1841181254a7Smrg      *
1842181254a7Smrg      * Example:
1843181254a7Smrg      * --------------
1844181254a7Smrg      * // Call this function whenever a </podcast> end tag is encountered
1845181254a7Smrg      * onEndTag["podcast"] = (in Element e)
1846181254a7Smrg      * {
1847181254a7Smrg      *     // Your code here
1848181254a7Smrg      *     //
1849181254a7Smrg      *     // This is a a closure, so code here may reference
1850181254a7Smrg      *     // variables which are outside of this scope
1851181254a7Smrg      * };
1852181254a7Smrg      *
1853181254a7Smrg      * // call myEpisodeEndHandler (defined elsewhere) whenever an </episode>
1854181254a7Smrg      * // end tag is encountered
1855181254a7Smrg      * onEndTag["episode"] = &myEpisodeEndHandler;
1856181254a7Smrg      *
1857181254a7Smrg      * // call delegate dg for all other end tags
1858181254a7Smrg      * onEndTag[null] = dg;
1859181254a7Smrg      * --------------
1860181254a7Smrg      *
1861181254a7Smrg      * Note that your function will be called for both start tags and empty
1862181254a7Smrg      * tags. That is, we make no distinction between &lt;br&gt;&lt;/br&gt;
1863181254a7Smrg      * and &lt;br/&gt;.
1864181254a7Smrg      */
1865181254a7Smrg     ElementHandler[string] onEndTag;
1866181254a7Smrg 
this()1867181254a7Smrg     protected this() @safe @nogc pure nothrow
1868181254a7Smrg     {
1869181254a7Smrg         elementStart = *s;
1870181254a7Smrg     }
1871181254a7Smrg 
1872*b1e83836Smrg     /*
1873181254a7Smrg      * Register a handler which will be called whenever text is encountered.
1874181254a7Smrg      *
1875181254a7Smrg      * Example:
1876181254a7Smrg      * --------------
1877181254a7Smrg      * // Call this function whenever text is encountered
1878181254a7Smrg      * onText = (string s)
1879181254a7Smrg      * {
1880181254a7Smrg      *     // Your code here
1881181254a7Smrg      *
1882181254a7Smrg      *     // The passed parameter s will have been decoded by the time you see
1883181254a7Smrg      *     // it, and so may contain any character.
1884181254a7Smrg      *     //
1885181254a7Smrg      *     // This is a a closure, so code here may reference
1886181254a7Smrg      *     // variables which are outside of this scope
1887181254a7Smrg      * };
1888181254a7Smrg      * --------------
1889181254a7Smrg      */
onText(Handler handler)1890181254a7Smrg     @property @safe @nogc pure nothrow void onText(Handler handler) { textHandler = handler; }
1891181254a7Smrg 
1892*b1e83836Smrg     /*
1893181254a7Smrg      * Register an alternative handler which will be called whenever text
1894181254a7Smrg      * is encountered. This differs from onText in that onText will decode
1895181254a7Smrg      * the text, whereas onTextRaw will not. This allows you to make design
1896181254a7Smrg      * choices, since onText will be more accurate, but slower, while
1897181254a7Smrg      * onTextRaw will be faster, but less accurate. Of course, you can
1898181254a7Smrg      * still call decode() within your handler, if you want, but you'd
1899181254a7Smrg      * probably want to use onTextRaw only in circumstances where you
1900181254a7Smrg      * know that decoding is unnecessary.
1901181254a7Smrg      *
1902181254a7Smrg      * Example:
1903181254a7Smrg      * --------------
1904181254a7Smrg      * // Call this function whenever text is encountered
1905181254a7Smrg      * onText = (string s)
1906181254a7Smrg      * {
1907181254a7Smrg      *     // Your code here
1908181254a7Smrg      *
1909181254a7Smrg      *     // The passed parameter s will NOT have been decoded.
1910181254a7Smrg      *     //
1911181254a7Smrg      *     // This is a a closure, so code here may reference
1912181254a7Smrg      *     // variables which are outside of this scope
1913181254a7Smrg      * };
1914181254a7Smrg      * --------------
1915181254a7Smrg      */
onTextRaw(Handler handler)1916181254a7Smrg     @safe @nogc pure nothrow void onTextRaw(Handler handler) { rawTextHandler = handler; }
1917181254a7Smrg 
1918*b1e83836Smrg     /*
1919181254a7Smrg      * Register a handler which will be called whenever a character data
1920181254a7Smrg      * segment is encountered.
1921181254a7Smrg      *
1922181254a7Smrg      * Example:
1923181254a7Smrg      * --------------
1924181254a7Smrg      * // Call this function whenever a CData section is encountered
1925181254a7Smrg      * onCData = (string s)
1926181254a7Smrg      * {
1927181254a7Smrg      *     // Your code here
1928181254a7Smrg      *
1929181254a7Smrg      *     // The passed parameter s does not include the opening <![CDATA[
1930181254a7Smrg      *     // nor closing ]]>
1931181254a7Smrg      *     //
1932181254a7Smrg      *     // This is a a closure, so code here may reference
1933181254a7Smrg      *     // variables which are outside of this scope
1934181254a7Smrg      * };
1935181254a7Smrg      * --------------
1936181254a7Smrg      */
onCData(Handler handler)1937181254a7Smrg     @property @safe @nogc pure nothrow void onCData(Handler handler) { cdataHandler = handler; }
1938181254a7Smrg 
1939*b1e83836Smrg     /*
1940181254a7Smrg      * Register a handler which will be called whenever a comment is
1941181254a7Smrg      * encountered.
1942181254a7Smrg      *
1943181254a7Smrg      * Example:
1944181254a7Smrg      * --------------
1945181254a7Smrg      * // Call this function whenever a comment is encountered
1946181254a7Smrg      * onComment = (string s)
1947181254a7Smrg      * {
1948181254a7Smrg      *     // Your code here
1949181254a7Smrg      *
1950181254a7Smrg      *     // The passed parameter s does not include the opening <!-- nor
1951181254a7Smrg      *     // closing -->
1952181254a7Smrg      *     //
1953181254a7Smrg      *     // This is a a closure, so code here may reference
1954181254a7Smrg      *     // variables which are outside of this scope
1955181254a7Smrg      * };
1956181254a7Smrg      * --------------
1957181254a7Smrg      */
onComment(Handler handler)1958181254a7Smrg     @property @safe @nogc pure nothrow void onComment(Handler handler) { commentHandler = handler; }
1959181254a7Smrg 
1960*b1e83836Smrg     /*
1961181254a7Smrg      * Register a handler which will be called whenever a processing
1962181254a7Smrg      * instruction is encountered.
1963181254a7Smrg      *
1964181254a7Smrg      * Example:
1965181254a7Smrg      * --------------
1966181254a7Smrg      * // Call this function whenever a processing instruction is encountered
1967181254a7Smrg      * onPI = (string s)
1968181254a7Smrg      * {
1969181254a7Smrg      *     // Your code here
1970181254a7Smrg      *
1971181254a7Smrg      *     // The passed parameter s does not include the opening <? nor
1972181254a7Smrg      *     // closing ?>
1973181254a7Smrg      *     //
1974181254a7Smrg      *     // This is a a closure, so code here may reference
1975181254a7Smrg      *     // variables which are outside of this scope
1976181254a7Smrg      * };
1977181254a7Smrg      * --------------
1978181254a7Smrg      */
onPI(Handler handler)1979181254a7Smrg     @property @safe @nogc pure nothrow void onPI(Handler handler) { piHandler = handler; }
1980181254a7Smrg 
1981*b1e83836Smrg     /*
1982181254a7Smrg      * Register a handler which will be called whenever an XML instruction is
1983181254a7Smrg      * encountered.
1984181254a7Smrg      *
1985181254a7Smrg      * Example:
1986181254a7Smrg      * --------------
1987181254a7Smrg      * // Call this function whenever an XML instruction is encountered
1988181254a7Smrg      * // (Note: XML instructions may only occur preceding the root tag of a
1989181254a7Smrg      * // document).
1990181254a7Smrg      * onPI = (string s)
1991181254a7Smrg      * {
1992181254a7Smrg      *     // Your code here
1993181254a7Smrg      *
1994181254a7Smrg      *     // The passed parameter s does not include the opening <! nor
1995181254a7Smrg      *     // closing >
1996181254a7Smrg      *     //
1997181254a7Smrg      *     // This is a a closure, so code here may reference
1998181254a7Smrg      *     // variables which are outside of this scope
1999181254a7Smrg      * };
2000181254a7Smrg      * --------------
2001181254a7Smrg      */
onXI(Handler handler)2002181254a7Smrg     @property @safe @nogc pure nothrow void onXI(Handler handler) { xiHandler = handler; }
2003181254a7Smrg 
2004*b1e83836Smrg     /*
2005181254a7Smrg      * Parse an XML element.
2006181254a7Smrg      *
2007181254a7Smrg      * Parsing will continue until the end of the current element. Any items
2008181254a7Smrg      * encountered for which a handler has been registered will invoke that
2009181254a7Smrg      * handler.
2010181254a7Smrg      *
2011181254a7Smrg      * Throws: various kinds of XMLException
2012181254a7Smrg      */
parse()2013181254a7Smrg     void parse()
2014181254a7Smrg     {
2015181254a7Smrg         import std.algorithm.searching : startsWith;
2016181254a7Smrg         import std.string : indexOf;
2017181254a7Smrg 
2018181254a7Smrg         string t;
2019181254a7Smrg         const Tag root = tag_;
2020181254a7Smrg         Tag[string] startTags;
2021181254a7Smrg         if (tag_ !is null) startTags[tag_.name] = tag_;
2022181254a7Smrg 
2023181254a7Smrg         while (s.length != 0)
2024181254a7Smrg         {
2025181254a7Smrg             if (startsWith(*s,"<!--"))
2026181254a7Smrg             {
2027181254a7Smrg                 chop(*s,4);
2028181254a7Smrg                 t = chop(*s,indexOf(*s,"-->"));
2029181254a7Smrg                 if (commentHandler.funcptr !is null) commentHandler(t);
2030181254a7Smrg                 chop(*s,3);
2031181254a7Smrg             }
2032181254a7Smrg             else if (startsWith(*s,"<![CDATA["))
2033181254a7Smrg             {
2034181254a7Smrg                 chop(*s,9);
2035181254a7Smrg                 t = chop(*s,indexOf(*s,"]]>"));
2036181254a7Smrg                 if (cdataHandler.funcptr !is null) cdataHandler(t);
2037181254a7Smrg                 chop(*s,3);
2038181254a7Smrg             }
2039181254a7Smrg             else if (startsWith(*s,"<!"))
2040181254a7Smrg             {
2041181254a7Smrg                 chop(*s,2);
2042181254a7Smrg                 t = chop(*s,indexOf(*s,">"));
2043181254a7Smrg                 if (xiHandler.funcptr !is null) xiHandler(t);
2044181254a7Smrg                 chop(*s,1);
2045181254a7Smrg             }
2046181254a7Smrg             else if (startsWith(*s,"<?"))
2047181254a7Smrg             {
2048181254a7Smrg                 chop(*s,2);
2049181254a7Smrg                 t = chop(*s,indexOf(*s,"?>"));
2050181254a7Smrg                 if (piHandler.funcptr !is null) piHandler(t);
2051181254a7Smrg                 chop(*s,2);
2052181254a7Smrg             }
2053181254a7Smrg             else if (startsWith(*s,"<"))
2054181254a7Smrg             {
2055181254a7Smrg                 tag_ = new Tag(*s,true);
2056181254a7Smrg                 if (root is null)
2057181254a7Smrg                     return; // Return to constructor of derived class
2058181254a7Smrg 
2059181254a7Smrg                 if (tag_.isStart)
2060181254a7Smrg                 {
2061181254a7Smrg                     startTags[tag_.name] = tag_;
2062181254a7Smrg 
2063181254a7Smrg                     auto parser = new ElementParser(this);
2064181254a7Smrg 
2065181254a7Smrg                     auto handler = tag_.name in onStartTag;
2066181254a7Smrg                     if (handler !is null) (*handler)(parser);
2067181254a7Smrg                     else
2068181254a7Smrg                     {
2069181254a7Smrg                         handler = null in onStartTag;
2070181254a7Smrg                         if (handler !is null) (*handler)(parser);
2071181254a7Smrg                     }
2072181254a7Smrg                 }
2073181254a7Smrg                 else if (tag_.isEnd)
2074181254a7Smrg                 {
2075181254a7Smrg                     const startTag = startTags[tag_.name];
2076181254a7Smrg                     string text;
2077181254a7Smrg 
2078181254a7Smrg                     if (startTag.tagString.length == 0)
2079181254a7Smrg                         assert(0);
2080181254a7Smrg 
2081181254a7Smrg                     immutable(char)* p = startTag.tagString.ptr
2082181254a7Smrg                         + startTag.tagString.length;
2083181254a7Smrg                     immutable(char)* q = &tag_.tagString[0];
2084181254a7Smrg                     text = decode(p[0..(q-p)], DecodeMode.LOOSE);
2085181254a7Smrg 
2086181254a7Smrg                     auto element = new Element(startTag);
2087181254a7Smrg                     if (text.length != 0) element ~= new Text(text);
2088181254a7Smrg 
2089181254a7Smrg                     auto handler = tag_.name in onEndTag;
2090181254a7Smrg                     if (handler !is null) (*handler)(element);
2091181254a7Smrg                     else
2092181254a7Smrg                     {
2093181254a7Smrg                         handler = null in onEndTag;
2094181254a7Smrg                         if (handler !is null) (*handler)(element);
2095181254a7Smrg                     }
2096181254a7Smrg 
2097181254a7Smrg                     if (tag_.name == root.name) return;
2098181254a7Smrg                 }
2099181254a7Smrg                 else if (tag_.isEmpty)
2100181254a7Smrg                 {
2101181254a7Smrg                     Tag startTag = new Tag(tag_.name);
2102181254a7Smrg 
2103*b1e83836Smrg                     // FIX by hed010gy
2104*b1e83836Smrg                     // https://issues.dlang.org/show_bug.cgi?id=2979
2105181254a7Smrg                     if (tag_.attr.length > 0)
2106181254a7Smrg                           foreach (tn,tv; tag_.attr) startTag.attr[tn]=tv;
2107181254a7Smrg                     // END FIX
2108181254a7Smrg 
2109181254a7Smrg                     // Handle the pretend start tag
2110181254a7Smrg                     string s2;
2111181254a7Smrg                     auto parser = new ElementParser(startTag,&s2);
2112181254a7Smrg                     auto handler1 = startTag.name in onStartTag;
2113181254a7Smrg                     if (handler1 !is null) (*handler1)(parser);
2114181254a7Smrg                     else
2115181254a7Smrg                     {
2116181254a7Smrg                         handler1 = null in onStartTag;
2117181254a7Smrg                         if (handler1 !is null) (*handler1)(parser);
2118181254a7Smrg                     }
2119181254a7Smrg 
2120181254a7Smrg                     // Handle the pretend end tag
2121181254a7Smrg                     auto element = new Element(startTag);
2122181254a7Smrg                     auto handler2 = tag_.name in onEndTag;
2123181254a7Smrg                     if (handler2 !is null) (*handler2)(element);
2124181254a7Smrg                     else
2125181254a7Smrg                     {
2126181254a7Smrg                         handler2 = null in onEndTag;
2127181254a7Smrg                         if (handler2 !is null) (*handler2)(element);
2128181254a7Smrg                     }
2129181254a7Smrg                 }
2130181254a7Smrg             }
2131181254a7Smrg             else
2132181254a7Smrg             {
2133181254a7Smrg                 t = chop(*s,indexOf(*s,"<"));
2134181254a7Smrg                 if (rawTextHandler.funcptr !is null)
2135181254a7Smrg                     rawTextHandler(t);
2136181254a7Smrg                 else if (textHandler.funcptr !is null)
2137181254a7Smrg                     textHandler(decode(t,DecodeMode.LOOSE));
2138181254a7Smrg             }
2139181254a7Smrg         }
2140181254a7Smrg     }
2141181254a7Smrg 
2142*b1e83836Smrg     /*
2143181254a7Smrg      * Returns that part of the element which has already been parsed
2144181254a7Smrg      */
toString()2145181254a7Smrg     override string toString() const @nogc @safe pure nothrow
2146181254a7Smrg     {
2147181254a7Smrg         assert(elementStart.length >= s.length);
2148181254a7Smrg         return elementStart[0 .. elementStart.length - s.length];
2149181254a7Smrg     }
2150181254a7Smrg 
2151181254a7Smrg }
2152181254a7Smrg 
2153181254a7Smrg private
2154181254a7Smrg {
Check(string msg)2155181254a7Smrg     template Check(string msg)
2156181254a7Smrg     {
2157181254a7Smrg         string old = s;
2158181254a7Smrg 
2159181254a7Smrg         void fail() @safe pure
2160181254a7Smrg         {
2161181254a7Smrg             s = old;
2162181254a7Smrg             throw new Err(s,msg);
2163181254a7Smrg         }
2164181254a7Smrg 
2165181254a7Smrg         void fail(Err e) @safe pure
2166181254a7Smrg         {
2167181254a7Smrg             s = old;
2168181254a7Smrg             throw new Err(s,msg,e);
2169181254a7Smrg         }
2170181254a7Smrg 
2171181254a7Smrg         void fail(string msg2) @safe pure
2172181254a7Smrg         {
2173181254a7Smrg             fail(new Err(s,msg2));
2174181254a7Smrg         }
2175181254a7Smrg     }
2176181254a7Smrg 
checkMisc(ref string s)2177181254a7Smrg     void checkMisc(ref string s) @safe pure // rule 27
2178181254a7Smrg     {
2179181254a7Smrg         import std.algorithm.searching : startsWith;
2180181254a7Smrg 
2181181254a7Smrg         mixin Check!("Misc");
2182181254a7Smrg 
2183181254a7Smrg         try
2184181254a7Smrg         {
2185181254a7Smrg                  if (s.startsWith("<!--")) { checkComment(s); }
2186181254a7Smrg             else if (s.startsWith("<?"))   { checkPI(s); }
2187181254a7Smrg             else                           { checkSpace(s); }
2188181254a7Smrg         }
2189181254a7Smrg         catch (Err e) { fail(e); }
2190181254a7Smrg     }
2191181254a7Smrg 
checkDocument(ref string s)2192181254a7Smrg     void checkDocument(ref string s) @safe pure // rule 1
2193181254a7Smrg     {
2194181254a7Smrg         mixin Check!("Document");
2195181254a7Smrg         try
2196181254a7Smrg         {
2197181254a7Smrg             checkProlog(s);
2198181254a7Smrg             checkElement(s);
2199181254a7Smrg             star!(checkMisc)(s);
2200181254a7Smrg         }
2201181254a7Smrg         catch (Err e) { fail(e); }
2202181254a7Smrg     }
2203181254a7Smrg 
checkChars(ref string s)2204181254a7Smrg     void checkChars(ref string s) @safe pure // rule 2
2205181254a7Smrg     {
2206181254a7Smrg         // TO DO - Fix std.utf stride and decode functions, then use those
2207181254a7Smrg         // instead
2208181254a7Smrg         import std.format : format;
2209181254a7Smrg 
2210181254a7Smrg         mixin Check!("Chars");
2211181254a7Smrg 
2212181254a7Smrg         dchar c;
2213181254a7Smrg         ptrdiff_t n = -1;
2214181254a7Smrg         // 'i' must not be smaller than size_t because size_t is used internally in
2215181254a7Smrg         // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets.
2216181254a7Smrg         foreach (size_t i, dchar d; s)
2217181254a7Smrg         {
2218181254a7Smrg             if (!isChar(d))
2219181254a7Smrg             {
2220181254a7Smrg                 c = d;
2221181254a7Smrg                 n = i;
2222181254a7Smrg                 break;
2223181254a7Smrg             }
2224181254a7Smrg         }
2225181254a7Smrg         if (n != -1)
2226181254a7Smrg         {
2227181254a7Smrg             s = s[n..$];
2228181254a7Smrg             fail(format("invalid character: U+%04X",c));
2229181254a7Smrg         }
2230181254a7Smrg     }
2231181254a7Smrg 
checkSpace(ref string s)2232181254a7Smrg     void checkSpace(ref string s) @safe pure // rule 3
2233181254a7Smrg     {
2234181254a7Smrg         import std.algorithm.searching : countUntil;
2235181254a7Smrg         import std.ascii : isWhite;
2236181254a7Smrg         import std.utf : byCodeUnit;
2237181254a7Smrg 
2238181254a7Smrg         mixin Check!("Whitespace");
2239181254a7Smrg         ptrdiff_t i = s.byCodeUnit.countUntil!(a => !isWhite(a));
2240181254a7Smrg         if (i == -1 && s.length > 0 && isWhite(s[0]))
2241181254a7Smrg             s = s[$ .. $];
2242181254a7Smrg         else if (i > -1)
2243181254a7Smrg             s = s[i .. $];
2244181254a7Smrg         if (s is old) fail();
2245181254a7Smrg     }
2246181254a7Smrg 
checkName(ref string s,out string name)2247181254a7Smrg     void checkName(ref string s, out string name) @safe pure // rule 5
2248181254a7Smrg     {
2249181254a7Smrg         mixin Check!("Name");
2250181254a7Smrg 
2251181254a7Smrg         if (s.length == 0) fail();
2252181254a7Smrg         ptrdiff_t n;
2253181254a7Smrg         // 'i' must not be smaller than size_t because size_t is used internally in
2254181254a7Smrg         // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets.
2255181254a7Smrg         foreach (size_t i, dchar c; s)
2256181254a7Smrg         {
2257181254a7Smrg             if (c == '_' || c == ':' || isLetter(c)) continue;
2258181254a7Smrg             if (i == 0) fail();
2259181254a7Smrg             if (c == '-' || c == '.' || isDigit(c)
2260181254a7Smrg                 || isCombiningChar(c) || isExtender(c)) continue;
2261181254a7Smrg             n = i;
2262181254a7Smrg             break;
2263181254a7Smrg         }
2264181254a7Smrg         name = s[0 .. n];
2265181254a7Smrg         s = s[n..$];
2266181254a7Smrg     }
2267181254a7Smrg 
checkAttValue(ref string s)2268181254a7Smrg     void checkAttValue(ref string s) @safe pure // rule 10
2269181254a7Smrg     {
2270181254a7Smrg         import std.algorithm.searching : countUntil;
2271181254a7Smrg         import std.utf : byCodeUnit;
2272181254a7Smrg 
2273181254a7Smrg         mixin Check!("AttValue");
2274181254a7Smrg 
2275181254a7Smrg         if (s.length == 0) fail();
2276181254a7Smrg         char c = s[0];
2277181254a7Smrg         if (c != '\u0022' && c != '\u0027')
2278181254a7Smrg             fail("attribute value requires quotes");
2279181254a7Smrg         s = s[1..$];
2280181254a7Smrg         for (;;)
2281181254a7Smrg         {
2282181254a7Smrg             s = s[s.byCodeUnit.countUntil(c) .. $];
2283181254a7Smrg             if (s.length == 0) fail("unterminated attribute value");
2284181254a7Smrg             if (s[0] == '<') fail("< found in attribute value");
2285181254a7Smrg             if (s[0] == c) break;
2286181254a7Smrg             try { checkReference(s); } catch (Err e) { fail(e); }
2287181254a7Smrg         }
2288181254a7Smrg         s = s[1..$];
2289181254a7Smrg     }
2290181254a7Smrg 
checkCharData(ref string s)2291181254a7Smrg     void checkCharData(ref string s) @safe pure // rule 14
2292181254a7Smrg     {
2293181254a7Smrg         import std.algorithm.searching : startsWith;
2294181254a7Smrg 
2295181254a7Smrg         mixin Check!("CharData");
2296181254a7Smrg 
2297181254a7Smrg         while (s.length != 0)
2298181254a7Smrg         {
2299181254a7Smrg             if (s.startsWith("&")) break;
2300181254a7Smrg             if (s.startsWith("<")) break;
2301181254a7Smrg             if (s.startsWith("]]>")) fail("]]> found within char data");
2302181254a7Smrg             s = s[1..$];
2303181254a7Smrg         }
2304181254a7Smrg     }
2305181254a7Smrg 
checkComment(ref string s)2306181254a7Smrg     void checkComment(ref string s) @safe pure // rule 15
2307181254a7Smrg     {
2308181254a7Smrg         import std.string : indexOf;
2309181254a7Smrg 
2310181254a7Smrg         mixin Check!("Comment");
2311181254a7Smrg 
2312181254a7Smrg         try { checkLiteral("<!--",s); } catch (Err e) { fail(e); }
2313181254a7Smrg         ptrdiff_t n = s.indexOf("--");
2314181254a7Smrg         if (n == -1) fail("unterminated comment");
2315181254a7Smrg         s = s[n..$];
2316181254a7Smrg         try { checkLiteral("-->",s); } catch (Err e) { fail(e); }
2317181254a7Smrg     }
2318181254a7Smrg 
checkPI(ref string s)2319181254a7Smrg     void checkPI(ref string s) @safe pure // rule 16
2320181254a7Smrg     {
2321181254a7Smrg         mixin Check!("PI");
2322181254a7Smrg 
2323181254a7Smrg         try
2324181254a7Smrg         {
2325181254a7Smrg             checkLiteral("<?",s);
2326181254a7Smrg             checkEnd("?>",s);
2327181254a7Smrg         }
2328181254a7Smrg         catch (Err e) { fail(e); }
2329181254a7Smrg     }
2330181254a7Smrg 
checkCDSect(ref string s)2331181254a7Smrg     void checkCDSect(ref string s) @safe pure // rule 18
2332181254a7Smrg     {
2333181254a7Smrg         mixin Check!("CDSect");
2334181254a7Smrg 
2335181254a7Smrg         try
2336181254a7Smrg         {
2337181254a7Smrg             checkLiteral(cdata,s);
2338181254a7Smrg             checkEnd("]]>",s);
2339181254a7Smrg         }
2340181254a7Smrg         catch (Err e) { fail(e); }
2341181254a7Smrg     }
2342181254a7Smrg 
checkProlog(ref string s)2343181254a7Smrg     void checkProlog(ref string s) @safe pure // rule 22
2344181254a7Smrg     {
2345181254a7Smrg         mixin Check!("Prolog");
2346181254a7Smrg 
2347181254a7Smrg         try
2348181254a7Smrg         {
2349181254a7Smrg             /* The XML declaration is optional
2350181254a7Smrg              * http://www.w3.org/TR/2008/REC-xml-20081126/#NT-prolog
2351181254a7Smrg              */
2352181254a7Smrg             opt!(checkXMLDecl)(s);
2353181254a7Smrg 
2354181254a7Smrg             star!(checkMisc)(s);
2355181254a7Smrg             opt!(seq!(checkDocTypeDecl,star!(checkMisc)))(s);
2356181254a7Smrg         }
2357181254a7Smrg         catch (Err e) { fail(e); }
2358181254a7Smrg     }
2359181254a7Smrg 
checkXMLDecl(ref string s)2360181254a7Smrg     void checkXMLDecl(ref string s) @safe pure // rule 23
2361181254a7Smrg     {
2362181254a7Smrg         mixin Check!("XMLDecl");
2363181254a7Smrg 
2364181254a7Smrg         try
2365181254a7Smrg         {
2366181254a7Smrg             checkLiteral("<?xml",s);
2367181254a7Smrg             checkVersionInfo(s);
2368181254a7Smrg             opt!(checkEncodingDecl)(s);
2369181254a7Smrg             opt!(checkSDDecl)(s);
2370181254a7Smrg             opt!(checkSpace)(s);
2371181254a7Smrg             checkLiteral("?>",s);
2372181254a7Smrg         }
2373181254a7Smrg         catch (Err e) { fail(e); }
2374181254a7Smrg     }
2375181254a7Smrg 
checkVersionInfo(ref string s)2376181254a7Smrg     void checkVersionInfo(ref string s) @safe pure // rule 24
2377181254a7Smrg     {
2378181254a7Smrg         mixin Check!("VersionInfo");
2379181254a7Smrg 
2380181254a7Smrg         try
2381181254a7Smrg         {
2382181254a7Smrg             checkSpace(s);
2383181254a7Smrg             checkLiteral("version",s);
2384181254a7Smrg             checkEq(s);
2385181254a7Smrg             quoted!(checkVersionNum)(s);
2386181254a7Smrg         }
2387181254a7Smrg         catch (Err e) { fail(e); }
2388181254a7Smrg     }
2389181254a7Smrg 
checkEq(ref string s)2390181254a7Smrg     void checkEq(ref string s) @safe pure // rule 25
2391181254a7Smrg     {
2392181254a7Smrg         mixin Check!("Eq");
2393181254a7Smrg 
2394181254a7Smrg         try
2395181254a7Smrg         {
2396181254a7Smrg             opt!(checkSpace)(s);
2397181254a7Smrg             checkLiteral("=",s);
2398181254a7Smrg             opt!(checkSpace)(s);
2399181254a7Smrg         }
2400181254a7Smrg         catch (Err e) { fail(e); }
2401181254a7Smrg     }
2402181254a7Smrg 
checkVersionNum(ref string s)2403181254a7Smrg     void checkVersionNum(ref string s) @safe pure // rule 26
2404181254a7Smrg     {
2405181254a7Smrg         import std.algorithm.searching : countUntil;
2406181254a7Smrg         import std.utf : byCodeUnit;
2407181254a7Smrg 
2408181254a7Smrg         mixin Check!("VersionNum");
2409181254a7Smrg 
2410181254a7Smrg         s = s[s.byCodeUnit.countUntil('\"') .. $];
2411181254a7Smrg         if (s is old) fail();
2412181254a7Smrg     }
2413181254a7Smrg 
checkDocTypeDecl(ref string s)2414181254a7Smrg     void checkDocTypeDecl(ref string s) @safe pure // rule 28
2415181254a7Smrg     {
2416181254a7Smrg         mixin Check!("DocTypeDecl");
2417181254a7Smrg 
2418181254a7Smrg         try
2419181254a7Smrg         {
2420181254a7Smrg             checkLiteral("<!DOCTYPE",s);
2421181254a7Smrg             //
2422181254a7Smrg             // TO DO -- ensure DOCTYPE is well formed
2423181254a7Smrg             // (But not yet. That's one of our "future directions")
2424181254a7Smrg             //
2425181254a7Smrg             checkEnd(">",s);
2426181254a7Smrg         }
2427181254a7Smrg         catch (Err e) { fail(e); }
2428181254a7Smrg     }
2429181254a7Smrg 
checkSDDecl(ref string s)2430181254a7Smrg     void checkSDDecl(ref string s) @safe pure // rule 32
2431181254a7Smrg     {
2432181254a7Smrg         import std.algorithm.searching : startsWith;
2433181254a7Smrg 
2434181254a7Smrg         mixin Check!("SDDecl");
2435181254a7Smrg 
2436181254a7Smrg         try
2437181254a7Smrg         {
2438181254a7Smrg             checkSpace(s);
2439181254a7Smrg             checkLiteral("standalone",s);
2440181254a7Smrg             checkEq(s);
2441181254a7Smrg         }
2442181254a7Smrg         catch (Err e) { fail(e); }
2443181254a7Smrg 
2444181254a7Smrg         int n = 0;
2445181254a7Smrg              if (s.startsWith("'yes'") || s.startsWith("\"yes\"")) n = 5;
2446181254a7Smrg         else if (s.startsWith("'no'" ) || s.startsWith("\"no\"" )) n = 4;
2447181254a7Smrg         else fail("standalone attribute value must be 'yes', \"yes\","~
2448181254a7Smrg             " 'no' or \"no\"");
2449181254a7Smrg         s = s[n..$];
2450181254a7Smrg     }
2451181254a7Smrg 
checkElement(ref string s)2452181254a7Smrg     void checkElement(ref string s) @safe pure // rule 39
2453181254a7Smrg     {
2454181254a7Smrg         mixin Check!("Element");
2455181254a7Smrg 
2456181254a7Smrg         string sname,ename,t;
2457181254a7Smrg         try { checkTag(s,t,sname); } catch (Err e) { fail(e); }
2458181254a7Smrg 
2459181254a7Smrg         if (t == "STag")
2460181254a7Smrg         {
2461181254a7Smrg             try
2462181254a7Smrg             {
2463181254a7Smrg                 checkContent(s);
2464181254a7Smrg                 t = s;
2465181254a7Smrg                 checkETag(s,ename);
2466181254a7Smrg             }
2467181254a7Smrg             catch (Err e) { fail(e); }
2468181254a7Smrg 
2469181254a7Smrg             if (sname != ename)
2470181254a7Smrg             {
2471181254a7Smrg                 s = t;
2472181254a7Smrg                 fail("end tag name \"" ~ ename
2473181254a7Smrg                     ~ "\" differs from start tag name \""~sname~"\"");
2474181254a7Smrg             }
2475181254a7Smrg         }
2476181254a7Smrg     }
2477181254a7Smrg 
2478181254a7Smrg     // rules 40 and 44
checkTag(ref string s,out string type,out string name)2479181254a7Smrg     void checkTag(ref string s, out string type, out string name) @safe pure
2480181254a7Smrg     {
2481181254a7Smrg         mixin Check!("Tag");
2482181254a7Smrg 
2483181254a7Smrg         try
2484181254a7Smrg         {
2485181254a7Smrg             type = "STag";
2486181254a7Smrg             checkLiteral("<",s);
2487181254a7Smrg             checkName(s,name);
2488181254a7Smrg             star!(seq!(checkSpace,checkAttribute))(s);
2489181254a7Smrg             opt!(checkSpace)(s);
2490181254a7Smrg             if (s.length != 0 && s[0] == '/')
2491181254a7Smrg             {
2492181254a7Smrg                 s = s[1..$];
2493181254a7Smrg                 type = "ETag";
2494181254a7Smrg             }
2495181254a7Smrg             checkLiteral(">",s);
2496181254a7Smrg         }
2497181254a7Smrg         catch (Err e) { fail(e); }
2498181254a7Smrg     }
2499181254a7Smrg 
checkAttribute(ref string s)2500181254a7Smrg     void checkAttribute(ref string s) @safe pure // rule 41
2501181254a7Smrg     {
2502181254a7Smrg         mixin Check!("Attribute");
2503181254a7Smrg 
2504181254a7Smrg         try
2505181254a7Smrg         {
2506181254a7Smrg             string name;
2507181254a7Smrg             checkName(s,name);
2508181254a7Smrg             checkEq(s);
2509181254a7Smrg             checkAttValue(s);
2510181254a7Smrg         }
2511181254a7Smrg         catch (Err e) { fail(e); }
2512181254a7Smrg     }
2513181254a7Smrg 
checkETag(ref string s,out string name)2514181254a7Smrg     void checkETag(ref string s, out string name) @safe pure // rule 42
2515181254a7Smrg     {
2516181254a7Smrg         mixin Check!("ETag");
2517181254a7Smrg 
2518181254a7Smrg         try
2519181254a7Smrg         {
2520181254a7Smrg             checkLiteral("</",s);
2521181254a7Smrg             checkName(s,name);
2522181254a7Smrg             opt!(checkSpace)(s);
2523181254a7Smrg             checkLiteral(">",s);
2524181254a7Smrg         }
2525181254a7Smrg         catch (Err e) { fail(e); }
2526181254a7Smrg     }
2527181254a7Smrg 
checkContent(ref string s)2528181254a7Smrg     void checkContent(ref string s) @safe pure // rule 43
2529181254a7Smrg     {
2530181254a7Smrg         import std.algorithm.searching : startsWith;
2531181254a7Smrg 
2532181254a7Smrg         mixin Check!("Content");
2533181254a7Smrg 
2534181254a7Smrg         try
2535181254a7Smrg         {
2536181254a7Smrg             while (s.length != 0)
2537181254a7Smrg             {
2538181254a7Smrg                 old = s;
2539181254a7Smrg                      if (s.startsWith("&"))        { checkReference(s); }
2540181254a7Smrg                 else if (s.startsWith("<!--"))     { checkComment(s); }
2541181254a7Smrg                 else if (s.startsWith("<?"))       { checkPI(s); }
2542181254a7Smrg                 else if (s.startsWith(cdata)) { checkCDSect(s); }
2543181254a7Smrg                 else if (s.startsWith("</"))       { break; }
2544181254a7Smrg                 else if (s.startsWith("<"))        { checkElement(s); }
2545181254a7Smrg                 else                               { checkCharData(s); }
2546181254a7Smrg             }
2547181254a7Smrg         }
2548181254a7Smrg         catch (Err e) { fail(e); }
2549181254a7Smrg     }
2550181254a7Smrg 
checkCharRef(ref string s,out dchar c)2551181254a7Smrg     void checkCharRef(ref string s, out dchar c) @safe pure // rule 66
2552181254a7Smrg     {
2553181254a7Smrg         import std.format : format;
2554181254a7Smrg 
2555181254a7Smrg         mixin Check!("CharRef");
2556181254a7Smrg 
2557181254a7Smrg         c = 0;
2558181254a7Smrg         try { checkLiteral("&#",s); } catch (Err e) { fail(e); }
2559181254a7Smrg         int radix = 10;
2560181254a7Smrg         if (s.length != 0 && s[0] == 'x')
2561181254a7Smrg         {
2562181254a7Smrg             s = s[1..$];
2563181254a7Smrg             radix = 16;
2564181254a7Smrg         }
2565181254a7Smrg         if (s.length == 0) fail("unterminated character reference");
2566181254a7Smrg         if (s[0] == ';')
2567181254a7Smrg             fail("character reference must have at least one digit");
2568181254a7Smrg         while (s.length != 0)
2569181254a7Smrg         {
2570181254a7Smrg             immutable char d = s[0];
2571181254a7Smrg             int n = 0;
2572181254a7Smrg             switch (d)
2573181254a7Smrg             {
2574181254a7Smrg                 case 'F','f': ++n;      goto case;
2575181254a7Smrg                 case 'E','e': ++n;      goto case;
2576181254a7Smrg                 case 'D','d': ++n;      goto case;
2577181254a7Smrg                 case 'C','c': ++n;      goto case;
2578181254a7Smrg                 case 'B','b': ++n;      goto case;
2579181254a7Smrg                 case 'A','a': ++n;      goto case;
2580181254a7Smrg                 case '9':     ++n;      goto case;
2581181254a7Smrg                 case '8':     ++n;      goto case;
2582181254a7Smrg                 case '7':     ++n;      goto case;
2583181254a7Smrg                 case '6':     ++n;      goto case;
2584181254a7Smrg                 case '5':     ++n;      goto case;
2585181254a7Smrg                 case '4':     ++n;      goto case;
2586181254a7Smrg                 case '3':     ++n;      goto case;
2587181254a7Smrg                 case '2':     ++n;      goto case;
2588181254a7Smrg                 case '1':     ++n;      goto case;
2589181254a7Smrg                 case '0':     break;
2590181254a7Smrg                 default: n = 100; break;
2591181254a7Smrg             }
2592181254a7Smrg             if (n >= radix) break;
2593181254a7Smrg             c *= radix;
2594181254a7Smrg             c += n;
2595181254a7Smrg             s = s[1..$];
2596181254a7Smrg         }
2597181254a7Smrg         if (!isChar(c)) fail(format("U+%04X is not a legal character",c));
2598181254a7Smrg         if (s.length == 0 || s[0] != ';') fail("expected ;");
2599181254a7Smrg         else s = s[1..$];
2600181254a7Smrg     }
2601181254a7Smrg 
checkReference(ref string s)2602181254a7Smrg     void checkReference(ref string s) @safe pure // rule 67
2603181254a7Smrg     {
2604181254a7Smrg         import std.algorithm.searching : startsWith;
2605181254a7Smrg 
2606181254a7Smrg         mixin Check!("Reference");
2607181254a7Smrg 
2608181254a7Smrg         try
2609181254a7Smrg         {
2610181254a7Smrg             dchar c;
2611181254a7Smrg             if (s.startsWith("&#")) checkCharRef(s,c);
2612181254a7Smrg             else checkEntityRef(s);
2613181254a7Smrg         }
2614181254a7Smrg         catch (Err e) { fail(e); }
2615181254a7Smrg     }
2616181254a7Smrg 
checkEntityRef(ref string s)2617181254a7Smrg     void checkEntityRef(ref string s) @safe pure // rule 68
2618181254a7Smrg     {
2619181254a7Smrg         mixin Check!("EntityRef");
2620181254a7Smrg 
2621181254a7Smrg         try
2622181254a7Smrg         {
2623181254a7Smrg             string name;
2624181254a7Smrg             checkLiteral("&",s);
2625181254a7Smrg             checkName(s,name);
2626181254a7Smrg             checkLiteral(";",s);
2627181254a7Smrg         }
2628181254a7Smrg         catch (Err e) { fail(e); }
2629181254a7Smrg     }
2630181254a7Smrg 
checkEncName(ref string s)2631181254a7Smrg     void checkEncName(ref string s) @safe pure // rule 81
2632181254a7Smrg     {
2633181254a7Smrg         import std.algorithm.searching : countUntil;
2634181254a7Smrg         import std.ascii : isAlpha;
2635181254a7Smrg         import std.utf : byCodeUnit;
2636181254a7Smrg 
2637181254a7Smrg         mixin Check!("EncName");
2638181254a7Smrg 
2639181254a7Smrg         s = s[s.byCodeUnit.countUntil!(a => !isAlpha(a)) .. $];
2640181254a7Smrg         if (s is old) fail();
2641181254a7Smrg         s = s[s.byCodeUnit.countUntil('\"', '\'') .. $];
2642181254a7Smrg     }
2643181254a7Smrg 
checkEncodingDecl(ref string s)2644181254a7Smrg     void checkEncodingDecl(ref string s) @safe pure // rule 80
2645181254a7Smrg     {
2646181254a7Smrg         mixin Check!("EncodingDecl");
2647181254a7Smrg 
2648181254a7Smrg         try
2649181254a7Smrg         {
2650181254a7Smrg             checkSpace(s);
2651181254a7Smrg             checkLiteral("encoding",s);
2652181254a7Smrg             checkEq(s);
2653181254a7Smrg             quoted!(checkEncName)(s);
2654181254a7Smrg         }
2655181254a7Smrg         catch (Err e) { fail(e); }
2656181254a7Smrg     }
2657181254a7Smrg 
2658181254a7Smrg     // Helper functions
2659181254a7Smrg 
checkLiteral(string literal,ref string s)2660181254a7Smrg     void checkLiteral(string literal,ref string s) @safe pure
2661181254a7Smrg     {
2662181254a7Smrg         import std.string : startsWith;
2663181254a7Smrg 
2664181254a7Smrg         mixin Check!("Literal");
2665181254a7Smrg 
2666181254a7Smrg         if (!s.startsWith(literal)) fail("Expected literal \""~literal~"\"");
2667181254a7Smrg         s = s[literal.length..$];
2668181254a7Smrg     }
2669181254a7Smrg 
checkEnd(string end,ref string s)2670181254a7Smrg     void checkEnd(string end,ref string s) @safe pure
2671181254a7Smrg     {
2672181254a7Smrg         import std.string : indexOf;
2673181254a7Smrg         // Deliberately no mixin Check here.
2674181254a7Smrg 
2675181254a7Smrg         auto n = s.indexOf(end);
2676181254a7Smrg         if (n == -1) throw new Err(s,"Unable to find terminating \""~end~"\"");
2677181254a7Smrg         s = s[n..$];
2678181254a7Smrg         checkLiteral(end,s);
2679181254a7Smrg     }
2680181254a7Smrg 
2681181254a7Smrg     // Metafunctions -- none of these use mixin Check
2682181254a7Smrg 
opt(alias f)2683181254a7Smrg     void opt(alias f)(ref string s)
2684181254a7Smrg     {
2685181254a7Smrg         try { f(s); } catch (Err e) {}
2686181254a7Smrg     }
2687181254a7Smrg 
plus(alias f)2688181254a7Smrg     void plus(alias f)(ref string s)
2689181254a7Smrg     {
2690181254a7Smrg         f(s);
2691181254a7Smrg         star!(f)(s);
2692181254a7Smrg     }
2693181254a7Smrg 
star(alias f)2694181254a7Smrg     void star(alias f)(ref string s)
2695181254a7Smrg     {
2696181254a7Smrg         while (s.length != 0)
2697181254a7Smrg         {
2698181254a7Smrg             try { f(s); }
2699181254a7Smrg             catch (Err e) { return; }
2700181254a7Smrg         }
2701181254a7Smrg     }
2702181254a7Smrg 
quoted(alias f)2703181254a7Smrg     void quoted(alias f)(ref string s)
2704181254a7Smrg     {
2705181254a7Smrg         import std.string : startsWith;
2706181254a7Smrg 
2707181254a7Smrg         if (s.startsWith("'"))
2708181254a7Smrg         {
2709181254a7Smrg             checkLiteral("'",s);
2710181254a7Smrg             f(s);
2711181254a7Smrg             checkLiteral("'",s);
2712181254a7Smrg         }
2713181254a7Smrg         else
2714181254a7Smrg         {
2715181254a7Smrg             checkLiteral("\"",s);
2716181254a7Smrg             f(s);
2717181254a7Smrg             checkLiteral("\"",s);
2718181254a7Smrg         }
2719181254a7Smrg     }
2720181254a7Smrg 
seq(alias f,alias g)2721181254a7Smrg     void seq(alias f,alias g)(ref string s)
2722181254a7Smrg     {
2723181254a7Smrg         f(s);
2724181254a7Smrg         g(s);
2725181254a7Smrg     }
2726181254a7Smrg }
2727181254a7Smrg 
2728*b1e83836Smrg /*
2729181254a7Smrg  * Check an entire XML document for well-formedness
2730181254a7Smrg  *
2731181254a7Smrg  * Params:
2732181254a7Smrg  *      s = the document to be checked, passed as a string
2733181254a7Smrg  *
2734181254a7Smrg  * Throws: CheckException if the document is not well formed
2735181254a7Smrg  *
2736181254a7Smrg  * CheckException's toString() method will yield the complete hierarchy of
2737181254a7Smrg  * parse failure (the XML equivalent of a stack trace), giving the line and
2738181254a7Smrg  * column number of every failure at every level.
2739181254a7Smrg  */
check(string s)2740181254a7Smrg void check(string s) @safe pure
2741181254a7Smrg {
2742181254a7Smrg     try
2743181254a7Smrg     {
2744181254a7Smrg         checkChars(s);
2745181254a7Smrg         checkDocument(s);
2746181254a7Smrg         if (s.length != 0) throw new Err(s,"Junk found after document");
2747181254a7Smrg     }
2748181254a7Smrg     catch (Err e)
2749181254a7Smrg     {
2750181254a7Smrg         e.complete(s);
2751181254a7Smrg         throw e;
2752181254a7Smrg     }
2753181254a7Smrg }
2754181254a7Smrg 
2755181254a7Smrg @system pure unittest
2756181254a7Smrg {
2757181254a7Smrg     import std.string : indexOf;
2758181254a7Smrg 
2759181254a7Smrg     try
2760181254a7Smrg     {
2761181254a7Smrg         check(q"[<?xml version="1.0"?>
2762181254a7Smrg         <catalog>
2763181254a7Smrg            <book id="bk101">
2764181254a7Smrg               <author>Gambardella, Matthew</author>
2765181254a7Smrg               <title>XML Developer's Guide</title>
2766181254a7Smrg               <genre>Computer</genre>
2767181254a7Smrg               <price>44.95</price>
2768181254a7Smrg               <publish_date>2000-10-01</publish_date>
2769181254a7Smrg               <description>An in-depth look at creating applications
2770181254a7Smrg               with XML.</description>
2771181254a7Smrg            </book>
2772181254a7Smrg            <book id="bk102">
2773181254a7Smrg               <author>Ralls, Kim</author>
2774181254a7Smrg               <title>Midnight Rain</title>
2775181254a7Smrg               <genre>Fantasy</genres>
2776181254a7Smrg               <price>5.95</price>
2777181254a7Smrg               <publish_date>2000-12-16</publish_date>
2778181254a7Smrg               <description>A former architect battles corporate zombies,
2779181254a7Smrg               an evil sorceress, and her own childhood to become queen
2780181254a7Smrg               of the world.</description>
2781181254a7Smrg            </book>
2782181254a7Smrg            <book id="bk103">
2783181254a7Smrg               <author>Corets, Eva</author>
2784181254a7Smrg               <title>Maeve Ascendant</title>
2785181254a7Smrg               <genre>Fantasy</genre>
2786181254a7Smrg               <price>5.95</price>
2787181254a7Smrg               <publish_date>2000-11-17</publish_date>
2788181254a7Smrg               <description>After the collapse of a nanotechnology
2789181254a7Smrg               society in England, the young survivors lay the
2790181254a7Smrg               foundation for a new society.</description>
2791181254a7Smrg            </book>
2792181254a7Smrg         </catalog>
2793181254a7Smrg         ]");
2794181254a7Smrg         assert(false);
2795181254a7Smrg     }
catch(CheckException e)2796181254a7Smrg     catch (CheckException e)
2797181254a7Smrg     {
2798181254a7Smrg         auto n = e.toString().indexOf("end tag name \"genres\" differs"~
2799181254a7Smrg                                       " from start tag name \"genre\"");
2800181254a7Smrg         assert(n != -1);
2801181254a7Smrg     }
2802181254a7Smrg }
2803181254a7Smrg 
2804181254a7Smrg @system unittest
2805181254a7Smrg {
2806181254a7Smrg     string s = q"EOS
2807181254a7Smrg <?xml version="1.0"?>
2808181254a7Smrg <set>
2809181254a7Smrg     <one>A</one>
2810181254a7Smrg     <!-- comment -->
2811181254a7Smrg     <two>B</two>
2812181254a7Smrg </set>
2813181254a7Smrg EOS";
2814181254a7Smrg     try
2815181254a7Smrg     {
2816181254a7Smrg         check(s);
2817181254a7Smrg     }
catch(CheckException e)2818181254a7Smrg     catch (CheckException e)
2819181254a7Smrg     {
2820181254a7Smrg         assert(0, e.toString());
2821181254a7Smrg     }
2822181254a7Smrg }
2823181254a7Smrg 
2824181254a7Smrg @system unittest
2825181254a7Smrg {
2826181254a7Smrg     string test_xml = `<?xml version="1.0" encoding='UTF-8'?><r><stream:stream
2827181254a7Smrg                         xmlns:stream="http://etherx.'jabber'.org/streams"
2828181254a7Smrg                         xmlns="jabber:'client'" from='jid.pl' id="587a5767"
2829181254a7Smrg                         xml:lang="en" version="1.0" attr='a"b"c'>
2830181254a7Smrg                         </stream:stream></r>`;
2831181254a7Smrg 
2832181254a7Smrg     DocumentParser parser = new DocumentParser(test_xml);
2833181254a7Smrg     bool tested = false;
2834181254a7Smrg     parser.onStartTag["stream:stream"] = (ElementParser p) {
2835181254a7Smrg         assert(p.tag.attr["xmlns"] == "jabber:'client'");
2836181254a7Smrg         assert(p.tag.attr["from"] == "jid.pl");
2837181254a7Smrg         assert(p.tag.attr["attr"] == "a\"b\"c");
2838181254a7Smrg         tested = true;
2839181254a7Smrg     };
2840181254a7Smrg     parser.parse();
2841181254a7Smrg     assert(tested);
2842181254a7Smrg }
2843181254a7Smrg 
2844181254a7Smrg @system unittest
2845181254a7Smrg {
2846181254a7Smrg     string s = q"EOS
2847181254a7Smrg <?xml version="1.0" encoding="utf-8"?> <Tests>
2848181254a7Smrg     <Test thing="What &amp; Up">What &amp; Up Second</Test>
2849181254a7Smrg </Tests>
2850181254a7Smrg EOS";
2851181254a7Smrg     auto xml = new DocumentParser(s);
2852181254a7Smrg 
2853181254a7Smrg     xml.onStartTag["Test"] = (ElementParser xml) {
2854181254a7Smrg         assert(xml.tag.attr["thing"] == "What & Up");
2855181254a7Smrg     };
2856181254a7Smrg 
2857181254a7Smrg     xml.onEndTag["Test"] = (in Element e) {
2858181254a7Smrg         assert(e.text() == "What & Up Second");
2859181254a7Smrg     };
2860181254a7Smrg     xml.parse();
2861181254a7Smrg }
2862181254a7Smrg 
2863181254a7Smrg @system unittest
2864181254a7Smrg {
2865181254a7Smrg     string s = `<tag attr="&quot;value&gt;" />`;
2866181254a7Smrg     auto doc = new Document(s);
2867181254a7Smrg     assert(doc.toString() == s);
2868181254a7Smrg }
2869181254a7Smrg 
2870*b1e83836Smrg /* The base class for exceptions thrown by this module */
this(string msg)2871181254a7Smrg class XMLException : Exception { this(string msg) @safe pure { super(msg); } }
2872181254a7Smrg 
2873181254a7Smrg // Other exceptions
2874181254a7Smrg 
2875*b1e83836Smrg // Thrown during Comment constructor
2876181254a7Smrg class CommentException : XMLException
this(string msg)2877181254a7Smrg { private this(string msg) @safe pure { super(msg); } }
2878181254a7Smrg 
2879*b1e83836Smrg // Thrown during CData constructor
2880181254a7Smrg class CDataException : XMLException
this(string msg)2881181254a7Smrg { private this(string msg) @safe pure { super(msg); } }
2882181254a7Smrg 
2883*b1e83836Smrg // Thrown during XMLInstruction constructor
2884181254a7Smrg class XIException : XMLException
this(string msg)2885181254a7Smrg { private this(string msg) @safe pure { super(msg); } }
2886181254a7Smrg 
2887*b1e83836Smrg // Thrown during ProcessingInstruction constructor
2888181254a7Smrg class PIException : XMLException
this(string msg)2889181254a7Smrg { private this(string msg) @safe pure { super(msg); } }
2890181254a7Smrg 
2891*b1e83836Smrg // Thrown during Text constructor
2892181254a7Smrg class TextException : XMLException
this(string msg)2893181254a7Smrg { private this(string msg) @safe pure { super(msg); } }
2894181254a7Smrg 
2895*b1e83836Smrg // Thrown during decode()
2896181254a7Smrg class DecodeException : XMLException
this(string msg)2897181254a7Smrg { private this(string msg) @safe pure { super(msg); } }
2898181254a7Smrg 
2899*b1e83836Smrg // Thrown if comparing with wrong type
2900181254a7Smrg class InvalidTypeException : XMLException
this(string msg)2901181254a7Smrg { private this(string msg) @safe pure { super(msg); } }
2902181254a7Smrg 
2903*b1e83836Smrg // Thrown when parsing for Tags
2904181254a7Smrg class TagException : XMLException
this(string msg)2905181254a7Smrg { private this(string msg) @safe pure { super(msg); } }
2906181254a7Smrg 
2907*b1e83836Smrg /*
2908181254a7Smrg  * Thrown during check()
2909181254a7Smrg  */
2910181254a7Smrg class CheckException : XMLException
2911181254a7Smrg {
2912*b1e83836Smrg     CheckException err; // Parent in hierarchy
2913181254a7Smrg     private string tail;
2914*b1e83836Smrg     /*
2915181254a7Smrg      * Name of production rule which failed to parse,
2916181254a7Smrg      * or specific error message
2917181254a7Smrg      */
2918181254a7Smrg     string msg;
2919*b1e83836Smrg     size_t line = 0; // Line number at which parse failure occurred
2920*b1e83836Smrg     size_t column = 0; // Column number at which parse failure occurred
2921181254a7Smrg 
2922181254a7Smrg     private this(string tail,string msg,Err err=null) @safe pure
2923181254a7Smrg     {
2924181254a7Smrg         super(null);
2925181254a7Smrg         this.tail = tail;
2926181254a7Smrg         this.msg = msg;
2927181254a7Smrg         this.err = err;
2928181254a7Smrg     }
2929181254a7Smrg 
complete(string entire)2930181254a7Smrg     private void complete(string entire) @safe pure
2931181254a7Smrg     {
2932181254a7Smrg         import std.string : count, lastIndexOf;
2933181254a7Smrg         import std.utf : toUTF32;
2934181254a7Smrg 
2935181254a7Smrg         string head = entire[0..$-tail.length];
2936181254a7Smrg         ptrdiff_t n = head.lastIndexOf('\n') + 1;
2937181254a7Smrg         line = head.count("\n") + 1;
2938181254a7Smrg         dstring t = toUTF32(head[n..$]);
2939181254a7Smrg         column = t.length + 1;
2940181254a7Smrg         if (err !is null) err.complete(entire);
2941181254a7Smrg     }
2942181254a7Smrg 
toString()2943181254a7Smrg     override string toString() const @safe pure
2944181254a7Smrg     {
2945181254a7Smrg         import std.format : format;
2946181254a7Smrg 
2947181254a7Smrg         string s;
2948181254a7Smrg         if (line != 0) s = format("Line %d, column %d: ",line,column);
2949181254a7Smrg         s ~= msg;
2950181254a7Smrg         s ~= '\n';
2951181254a7Smrg         if (err !is null) s = err.toString() ~ s;
2952181254a7Smrg         return s;
2953181254a7Smrg     }
2954181254a7Smrg }
2955181254a7Smrg 
2956181254a7Smrg private alias Err = CheckException;
2957181254a7Smrg 
2958181254a7Smrg // Private helper functions
2959181254a7Smrg 
2960181254a7Smrg private
2961181254a7Smrg {
toType(T)2962*b1e83836Smrg     inout(T) toType(T)(inout return scope Object o)
2963181254a7Smrg     {
2964181254a7Smrg         T t = cast(T)(o);
2965181254a7Smrg         if (t is null)
2966181254a7Smrg         {
2967181254a7Smrg             throw new InvalidTypeException("Attempt to compare a "
2968181254a7Smrg                 ~ T.stringof ~ " with an instance of another type");
2969181254a7Smrg         }
2970181254a7Smrg         return t;
2971181254a7Smrg     }
2972181254a7Smrg 
chop(ref string s,size_t n)2973181254a7Smrg     string chop(ref string s, size_t n) @safe pure nothrow
2974181254a7Smrg     {
2975181254a7Smrg         if (n == -1) n = s.length;
2976181254a7Smrg         string t = s[0 .. n];
2977181254a7Smrg         s = s[n..$];
2978181254a7Smrg         return t;
2979181254a7Smrg     }
2980181254a7Smrg 
optc(ref string s,char c)2981181254a7Smrg     bool optc(ref string s, char c) @safe pure nothrow
2982181254a7Smrg     {
2983181254a7Smrg         immutable bool b = s.length != 0 && s[0] == c;
2984181254a7Smrg         if (b) s = s[1..$];
2985181254a7Smrg         return b;
2986181254a7Smrg     }
2987181254a7Smrg 
reqc(ref string s,char c)2988181254a7Smrg     void reqc(ref string s, char c) @safe pure
2989181254a7Smrg     {
2990181254a7Smrg         if (s.length == 0 || s[0] != c) throw new TagException("");
2991181254a7Smrg         s = s[1..$];
2992181254a7Smrg     }
2993181254a7Smrg 
requireOneOf(ref string s,string chars)2994181254a7Smrg     char requireOneOf(ref string s, string chars) @safe pure
2995181254a7Smrg     {
2996181254a7Smrg         import std.string : indexOf;
2997181254a7Smrg 
2998181254a7Smrg         if (s.length == 0 || indexOf(chars,s[0]) == -1)
2999181254a7Smrg             throw new TagException("");
3000181254a7Smrg         immutable char ch = s[0];
3001181254a7Smrg         s = s[1..$];
3002181254a7Smrg         return ch;
3003181254a7Smrg     }
3004181254a7Smrg 
3005*b1e83836Smrg     alias hash = .hashOf;
3006181254a7Smrg 
3007181254a7Smrg     // Definitions from the XML specification
3008181254a7Smrg     immutable CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD,
3009181254a7Smrg         0x10000,0x10FFFF];
3010181254a7Smrg     immutable BaseCharTable=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8,
3011181254a7Smrg         0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A,
3012181254a7Smrg         0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250,
3013181254a7Smrg         0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E,
3014181254a7Smrg         0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE,
3015181254a7Smrg         0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451,
3016181254a7Smrg         0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0,
3017181254a7Smrg         0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561,
3018181254a7Smrg         0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671,
3019181254a7Smrg         0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5,
3020181254a7Smrg         0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F,
3021181254a7Smrg         0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC,
3022181254a7Smrg         0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13,
3023181254a7Smrg         0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59,
3024181254a7Smrg         0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F,
3025181254a7Smrg         0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD,
3026181254a7Smrg         0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A,
3027181254a7Smrg         0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F,
3028181254a7Smrg         0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C,
3029181254a7Smrg         0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7,
3030181254a7Smrg         0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35,
3031181254a7Smrg         0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA,
3032181254a7Smrg         0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E,
3033181254a7Smrg         0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30,
3034181254a7Smrg         0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87,
3035181254a7Smrg         0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1,
3036181254a7Smrg         0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0,
3037181254a7Smrg         0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49,
3038181254a7Smrg         0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105,
3039181254a7Smrg         0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E,
3040181254a7Smrg         0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154,
3041181254a7Smrg         0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167,
3042181254a7Smrg         0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E,
3043181254a7Smrg         0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA,
3044181254a7Smrg         0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00,
3045181254a7Smrg         0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48,
3046181254a7Smrg         0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F,
3047181254a7Smrg         0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6,
3048181254a7Smrg         0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6,
3049181254a7Smrg         0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041,
3050181254a7Smrg         0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3];
3051181254a7Smrg     immutable IdeographicTable=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5];
3052181254a7Smrg     immutable CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486,
3053181254a7Smrg         0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2,
3054181254a7Smrg         0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF,
3055181254a7Smrg         0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C,
3056181254a7Smrg         0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983,
3057181254a7Smrg         0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8,
3058181254a7Smrg         0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C,
3059181254a7Smrg         0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D,
3060181254a7Smrg         0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9,
3061181254a7Smrg         0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48,
3062181254a7Smrg         0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8,
3063181254a7Smrg         0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48,
3064181254a7Smrg         0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8,
3065181254a7Smrg         0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48,
3066181254a7Smrg         0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E,
3067181254a7Smrg         0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19,
3068181254a7Smrg         0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F,
3069181254a7Smrg         0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD,
3070181254a7Smrg         0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F,
3071181254a7Smrg         0x3099,0x3099,0x309A,0x309A];
3072181254a7Smrg     immutable DigitTable=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966,
3073181254a7Smrg         0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7,
3074181254a7Smrg         0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0,
3075181254a7Smrg         0x0ED9,0x0F20,0x0F29];
3076181254a7Smrg     immutable ExtenderTable=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387,
3077181254a7Smrg         0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031,
3078181254a7Smrg         0x3035,0x309D,0x309E,0x30FC,0x30FE];
3079181254a7Smrg 
lookup(const (int)[]table,int c)3080181254a7Smrg     bool lookup(const(int)[] table, int c) @safe @nogc nothrow pure
3081181254a7Smrg     {
3082181254a7Smrg         while (table.length != 0)
3083181254a7Smrg         {
3084181254a7Smrg             auto m = (table.length >> 1) & ~1;
3085181254a7Smrg             if (c < table[m])
3086181254a7Smrg             {
3087181254a7Smrg                 table = table[0 .. m];
3088181254a7Smrg             }
3089181254a7Smrg             else if (c > table[m+1])
3090181254a7Smrg             {
3091181254a7Smrg                 table = table[m+2..$];
3092181254a7Smrg             }
3093181254a7Smrg             else return true;
3094181254a7Smrg         }
3095181254a7Smrg         return false;
3096181254a7Smrg     }
3097181254a7Smrg 
startOf(string s)3098181254a7Smrg     string startOf(string s) @safe nothrow pure
3099181254a7Smrg     {
3100181254a7Smrg         string r;
3101181254a7Smrg         foreach (char c;s)
3102181254a7Smrg         {
3103181254a7Smrg             r ~= (c < 0x20 || c > 0x7F) ? '.' : c;
3104181254a7Smrg             if (r.length >= 40) { r ~= "___"; break; }
3105181254a7Smrg         }
3106181254a7Smrg         return r;
3107181254a7Smrg     }
3108181254a7Smrg 
3109181254a7Smrg     void exit(string s=null)
3110181254a7Smrg     {
3111181254a7Smrg         throw new XMLException(s);
3112181254a7Smrg     }
3113181254a7Smrg }
3114