1 // Written in the D programming language. 2 3 /** 4 $(RED Warning: This module is considered out-dated and not up to Phobos' 5 current standards. It will be removed from Phobos in 2.101.0. 6 If you still need it, go to $(LINK https://github.com/DigitalMars/undeaD)) 7 */ 8 9 /* 10 Classes and functions for creating and parsing XML 11 12 The basic architecture of this module is that there are standalone functions, 13 classes for constructing an XML document from scratch (Tag, Element and 14 Document), and also classes for parsing a pre-existing XML file (ElementParser 15 and DocumentParser). The parsing classes <i>may</i> be used to build a 16 Document, but that is not their primary purpose. The handling capabilities of 17 DocumentParser and ElementParser are sufficiently customizable that you can 18 make them do pretty much whatever you want. 19 20 Example: This example creates a DOM (Document Object Model) tree 21 from an XML file. 22 ------------------------------------------------------------------------------ 23 import std.xml; 24 import std.stdio; 25 import std.string; 26 import std.file; 27 28 // books.xml is used in various samples throughout the Microsoft XML Core 29 // Services (MSXML) SDK. 30 // 31 // See http://msdn2.microsoft.com/en-us/library/ms762271(VS.85).aspx 32 33 void main() 34 { 35 string s = cast(string) std.file.read("books.xml"); 36 37 // Check for well-formedness 38 check(s); 39 40 // Make a DOM tree 41 auto doc = new Document(s); 42 43 // Plain-print it 44 writeln(doc); 45 } 46 ------------------------------------------------------------------------------ 47 48 Example: This example does much the same thing, except that the file is 49 deconstructed and reconstructed by hand. This is more work, but the 50 techniques involved offer vastly more power. 51 ------------------------------------------------------------------------------ 52 import std.xml; 53 import std.stdio; 54 import std.string; 55 56 struct Book 57 { 58 string id; 59 string author; 60 string title; 61 string genre; 62 string price; 63 string pubDate; 64 string description; 65 } 66 67 void main() 68 { 69 string s = cast(string) std.file.read("books.xml"); 70 71 // Check for well-formedness 72 check(s); 73 74 // Take it apart 75 Book[] books; 76 77 auto xml = new DocumentParser(s); 78 xml.onStartTag["book"] = (ElementParser xml) 79 { 80 Book book; 81 book.id = xml.tag.attr["id"]; 82 83 xml.onEndTag["author"] = (in Element e) { book.author = e.text(); }; 84 xml.onEndTag["title"] = (in Element e) { book.title = e.text(); }; 85 xml.onEndTag["genre"] = (in Element e) { book.genre = e.text(); }; 86 xml.onEndTag["price"] = (in Element e) { book.price = e.text(); }; 87 xml.onEndTag["publish-date"] = (in Element e) { book.pubDate = e.text(); }; 88 xml.onEndTag["description"] = (in Element e) { book.description = e.text(); }; 89 90 xml.parse(); 91 92 books ~= book; 93 }; 94 xml.parse(); 95 96 // Put it back together again; 97 auto doc = new Document(new Tag("catalog")); 98 foreach (book;books) 99 { 100 auto element = new Element("book"); 101 element.tag.attr["id"] = book.id; 102 103 element ~= new Element("author", book.author); 104 element ~= new Element("title", book.title); 105 element ~= new Element("genre", book.genre); 106 element ~= new Element("price", book.price); 107 element ~= new Element("publish-date",book.pubDate); 108 element ~= new Element("description", book.description); 109 110 doc ~= element; 111 } 112 113 // Pretty-print it 114 writefln(join(doc.pretty(3),"\n")); 115 } 116 ------------------------------------------------------------------------------- 117 Copyright: Copyright Janice Caron 2008 - 2009. 118 License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 119 Authors: Janice Caron 120 Source: $(PHOBOSSRC std/xml.d) 121 */ 122 /* 123 Copyright Janice Caron 2008 - 2009. 124 Distributed under the Boost Software License, Version 1.0. 125 (See accompanying file LICENSE_1_0.txt or copy at 126 http://www.boost.org/LICENSE_1_0.txt) 127 */ 128 deprecated("Will be removed from Phobos in 2.101.0. If you still need it, go to https://github.com/DigitalMars/undeaD") 129 module std.xml; 130 131 enum cdata = "<![CDATA["; 132 133 /* 134 * Returns true if the character is a character according to the XML standard 135 * 136 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 137 * 138 * Params: 139 * c = the character to be tested 140 */ 141 bool isChar(dchar c) @safe @nogc pure nothrow // rule 2 142 { 143 if (c <= 0xD7FF) 144 { 145 if (c >= 0x20) 146 return true; 147 switch (c) 148 { 149 case 0xA: 150 case 0x9: 151 case 0xD: 152 return true; 153 default: 154 return false; 155 } 156 } 157 else if (0xE000 <= c && c <= 0x10FFFF) 158 { 159 if ((c & 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF 160 return true; 161 } 162 return false; 163 } 164 165 @safe @nogc nothrow pure unittest 166 { 167 assert(!isChar(cast(dchar) 0x8)); 168 assert( isChar(cast(dchar) 0x9)); 169 assert( isChar(cast(dchar) 0xA)); 170 assert(!isChar(cast(dchar) 0xB)); 171 assert(!isChar(cast(dchar) 0xC)); 172 assert( isChar(cast(dchar) 0xD)); 173 assert(!isChar(cast(dchar) 0xE)); 174 assert(!isChar(cast(dchar) 0x1F)); 175 assert( isChar(cast(dchar) 0x20)); 176 assert( isChar('J')); 177 assert( isChar(cast(dchar) 0xD7FF)); 178 assert(!isChar(cast(dchar) 0xD800)); 179 assert(!isChar(cast(dchar) 0xDFFF)); 180 assert( isChar(cast(dchar) 0xE000)); 181 assert( isChar(cast(dchar) 0xFFFD)); 182 assert(!isChar(cast(dchar) 0xFFFE)); 183 assert(!isChar(cast(dchar) 0xFFFF)); 184 assert( isChar(cast(dchar) 0x10000)); 185 assert( isChar(cast(dchar) 0x10FFFF)); 186 assert(!isChar(cast(dchar) 0x110000)); 187 188 debug (stdxml_TestHardcodedChecks) 189 { 190 foreach (c; 0 .. dchar.max + 1) 191 assert(isChar(c) == lookup(CharTable, c)); 192 } 193 } 194 195 /* 196 * Returns true if the character is whitespace according to the XML standard 197 * 198 * Only the following characters are considered whitespace in XML - space, tab, 199 * carriage return and linefeed 200 * 201 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 202 * 203 * Params: 204 * c = the character to be tested 205 */ 206 bool isSpace(dchar c) @safe @nogc pure nothrow 207 { 208 return c == '\u0020' || c == '\u0009' || c == '\u000A' || c == '\u000D'; 209 } 210 211 /* 212 * Returns true if the character is a digit according to the XML standard 213 * 214 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 215 * 216 * Params: 217 * c = the character to be tested 218 */ 219 bool isDigit(dchar c) @safe @nogc pure nothrow 220 { 221 if (c <= 0x0039 && c >= 0x0030) 222 return true; 223 else 224 return lookup(DigitTable,c); 225 } 226 227 @safe @nogc nothrow pure unittest 228 { 229 debug (stdxml_TestHardcodedChecks) 230 { 231 foreach (c; 0 .. dchar.max + 1) 232 assert(isDigit(c) == lookup(DigitTable, c)); 233 } 234 } 235 236 /* 237 * Returns true if the character is a letter according to the XML standard 238 * 239 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 240 * 241 * Params: 242 * c = the character to be tested 243 */ 244 bool isLetter(dchar c) @safe @nogc nothrow pure // rule 84 245 { 246 return isIdeographic(c) || isBaseChar(c); 247 } 248 249 /* 250 * Returns true if the character is an ideographic character according to the 251 * XML standard 252 * 253 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 254 * 255 * Params: 256 * c = the character to be tested 257 */ 258 bool isIdeographic(dchar c) @safe @nogc nothrow pure 259 { 260 if (c == 0x3007) 261 return true; 262 if (c <= 0x3029 && c >= 0x3021 ) 263 return true; 264 if (c <= 0x9FA5 && c >= 0x4E00) 265 return true; 266 return false; 267 } 268 269 @safe @nogc nothrow pure unittest 270 { 271 assert(isIdeographic('\u4E00')); 272 assert(isIdeographic('\u9FA5')); 273 assert(isIdeographic('\u3007')); 274 assert(isIdeographic('\u3021')); 275 assert(isIdeographic('\u3029')); 276 277 debug (stdxml_TestHardcodedChecks) 278 { 279 foreach (c; 0 .. dchar.max + 1) 280 assert(isIdeographic(c) == lookup(IdeographicTable, c)); 281 } 282 } 283 284 /* 285 * Returns true if the character is a base character according to the XML 286 * standard 287 * 288 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 289 * 290 * Params: 291 * c = the character to be tested 292 */ 293 bool isBaseChar(dchar c) @safe @nogc nothrow pure 294 { 295 return lookup(BaseCharTable,c); 296 } 297 298 /* 299 * Returns true if the character is a combining character according to the 300 * XML standard 301 * 302 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 303 * 304 * Params: 305 * c = the character to be tested 306 */ 307 bool isCombiningChar(dchar c) @safe @nogc nothrow pure 308 { 309 return lookup(CombiningCharTable,c); 310 } 311 312 /* 313 * Returns true if the character is an extender according to the XML standard 314 * 315 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 316 * 317 * Params: 318 * c = the character to be tested 319 */ 320 bool isExtender(dchar c) @safe @nogc nothrow pure 321 { 322 return lookup(ExtenderTable,c); 323 } 324 325 /* 326 * Encodes a string by replacing all characters which need to be escaped with 327 * appropriate predefined XML entities. 328 * 329 * encode() escapes certain characters (ampersand, quote, apostrophe, less-than 330 * and greater-than), and similarly, decode() unescapes them. These functions 331 * are provided for convenience only. You do not need to use them when using 332 * the std.xml classes, because then all the encoding and decoding will be done 333 * for you automatically. 334 * 335 * If the string is not modified, the original will be returned. 336 * 337 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 338 * 339 * Params: 340 * s = The string to be encoded 341 * 342 * Returns: The encoded string 343 * 344 * Example: 345 * -------------- 346 * writefln(encode("a > b")); // writes "a > b" 347 * -------------- 348 */ 349 S encode(S)(S s) 350 { 351 import std.array : appender; 352 353 string r; 354 size_t lastI; 355 auto result = appender!S(); 356 357 foreach (i, c; s) 358 { 359 switch (c) 360 { 361 case '&': r = "&"; break; 362 case '"': r = """; break; 363 case '\'': r = "'"; break; 364 case '<': r = "<"; break; 365 case '>': r = ">"; break; 366 default: continue; 367 } 368 // Replace with r 369 result.put(s[lastI .. i]); 370 result.put(r); 371 lastI = i + 1; 372 } 373 374 if (!result.data.ptr) return s; 375 result.put(s[lastI .. $]); 376 return result.data; 377 } 378 379 @safe pure unittest 380 { 381 auto s = "hello"; 382 assert(encode(s) is s); 383 assert(encode("a > b") == "a > b", encode("a > b")); 384 assert(encode("a < b") == "a < b"); 385 assert(encode("don't") == "don't"); 386 assert(encode("\"hi\"") == ""hi"", encode("\"hi\"")); 387 assert(encode("cat & dog") == "cat & dog"); 388 } 389 390 /* 391 * Mode to use for decoding. 392 * 393 * $(DDOC_ENUM_MEMBERS NONE) Do not decode 394 * $(DDOC_ENUM_MEMBERS LOOSE) Decode, but ignore errors 395 * $(DDOC_ENUM_MEMBERS STRICT) Decode, and throw exception on error 396 */ 397 enum DecodeMode 398 { 399 NONE, LOOSE, STRICT 400 } 401 402 /* 403 * Decodes a string by unescaping all predefined XML entities. 404 * 405 * encode() escapes certain characters (ampersand, quote, apostrophe, less-than 406 * and greater-than), and similarly, decode() unescapes them. These functions 407 * are provided for convenience only. You do not need to use them when using 408 * the std.xml classes, because then all the encoding and decoding will be done 409 * for you automatically. 410 * 411 * This function decodes the entities &amp;, &quot;, &apos;, 412 * &lt; and &gt, 413 * as well as decimal and hexadecimal entities such as &#x20AC; 414 * 415 * If the string does not contain an ampersand, the original will be returned. 416 * 417 * Note that the "mode" parameter can be one of DecodeMode.NONE (do not 418 * decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT 419 * (decode, and throw a DecodeException in the event of an error). 420 * 421 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 422 * 423 * Params: 424 * s = The string to be decoded 425 * mode = (optional) Mode to use for decoding. (Defaults to LOOSE). 426 * 427 * Throws: DecodeException if mode == DecodeMode.STRICT and decode fails 428 * 429 * Returns: The decoded string 430 * 431 * Example: 432 * -------------- 433 * writefln(decode("a > b")); // writes "a > b" 434 * -------------- 435 */ 436 string decode(string s, DecodeMode mode=DecodeMode.LOOSE) @safe pure 437 { 438 import std.algorithm.searching : startsWith; 439 440 if (mode == DecodeMode.NONE) return s; 441 442 string buffer; 443 foreach (ref i; 0 .. s.length) 444 { 445 char c = s[i]; 446 if (c != '&') 447 { 448 if (buffer.length != 0) buffer ~= c; 449 } 450 else 451 { 452 if (buffer.length == 0) 453 { 454 buffer = s[0 .. i].dup; 455 } 456 if (startsWith(s[i..$],"&#")) 457 { 458 try 459 { 460 dchar d; 461 string t = s[i..$]; 462 checkCharRef(t, d); 463 char[4] temp; 464 import std.utf : encode; 465 buffer ~= temp[0 .. encode(temp, d)]; 466 i = s.length - t.length - 1; 467 } 468 catch (Err e) 469 { 470 if (mode == DecodeMode.STRICT) 471 throw new DecodeException("Unescaped &"); 472 buffer ~= '&'; 473 } 474 } 475 else if (startsWith(s[i..$],"&" )) { buffer ~= '&'; i += 4; } 476 else if (startsWith(s[i..$],""")) { buffer ~= '"'; i += 5; } 477 else if (startsWith(s[i..$],"'")) { buffer ~= '\''; i += 5; } 478 else if (startsWith(s[i..$],"<" )) { buffer ~= '<'; i += 3; } 479 else if (startsWith(s[i..$],">" )) { buffer ~= '>'; i += 3; } 480 else 481 { 482 if (mode == DecodeMode.STRICT) 483 throw new DecodeException("Unescaped &"); 484 buffer ~= '&'; 485 } 486 } 487 } 488 return (buffer.length == 0) ? s : buffer; 489 } 490 491 @safe pure unittest 492 { 493 void assertNot(string s) pure 494 { 495 bool b = false; 496 try { decode(s,DecodeMode.STRICT); } 497 catch (DecodeException e) { b = true; } 498 assert(b,s); 499 } 500 501 // Assert that things that should work, do 502 auto s = "hello"; 503 assert(decode(s, DecodeMode.STRICT) is s); 504 assert(decode("a > b", DecodeMode.STRICT) == "a > b"); 505 assert(decode("a < b", DecodeMode.STRICT) == "a < b"); 506 assert(decode("don't", DecodeMode.STRICT) == "don't"); 507 assert(decode(""hi"", DecodeMode.STRICT) == "\"hi\""); 508 assert(decode("cat & dog", DecodeMode.STRICT) == "cat & dog"); 509 assert(decode("*", DecodeMode.STRICT) == "*"); 510 assert(decode("*", DecodeMode.STRICT) == "*"); 511 assert(decode("cat & dog", DecodeMode.LOOSE) == "cat & dog"); 512 assert(decode("a > b", DecodeMode.LOOSE) == "a > b"); 513 assert(decode("&#;", DecodeMode.LOOSE) == "&#;"); 514 assert(decode("&#x;", DecodeMode.LOOSE) == "&#x;"); 515 assert(decode("G;", DecodeMode.LOOSE) == "G;"); 516 assert(decode("G;", DecodeMode.LOOSE) == "G;"); 517 518 // Assert that things that shouldn't work, don't 519 assertNot("cat & dog"); 520 assertNot("a > b"); 521 assertNot("&#;"); 522 assertNot("&#x;"); 523 assertNot("G;"); 524 assertNot("G;"); 525 } 526 527 /* 528 * Class representing an XML document. 529 * 530 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 531 * 532 */ 533 class Document : Element 534 { 535 /* 536 * Contains all text which occurs before the root element. 537 * Defaults to <?xml version="1.0"?> 538 */ 539 string prolog = "<?xml version=\"1.0\"?>"; 540 /* 541 * Contains all text which occurs after the root element. 542 * Defaults to the empty string 543 */ 544 string epilog; 545 546 /* 547 * Constructs a Document by parsing XML text. 548 * 549 * This function creates a complete DOM (Document Object Model) tree. 550 * 551 * The input to this function MUST be valid XML. 552 * This is enforced by DocumentParser's in contract. 553 * 554 * Params: 555 * s = the complete XML text. 556 */ 557 this(string s) 558 in 559 { 560 assert(s.length != 0); 561 } 562 do 563 { 564 auto xml = new DocumentParser(s); 565 string tagString = xml.tag.tagString; 566 567 this(xml.tag); 568 prolog = s[0 .. tagString.ptr - s.ptr]; 569 parse(xml); 570 epilog = *xml.s; 571 } 572 573 /* 574 * Constructs a Document from a Tag. 575 * 576 * Params: 577 * tag = the start tag of the document. 578 */ 579 this(const(Tag) tag) 580 { 581 super(tag); 582 } 583 584 const 585 { 586 /* 587 * Compares two Documents for equality 588 * 589 * Example: 590 * -------------- 591 * Document d1,d2; 592 * if (d1 == d2) { } 593 * -------------- 594 */ 595 override bool opEquals(scope const Object o) const 596 { 597 const doc = toType!(const Document)(o); 598 return prolog == doc.prolog 599 && (cast(const) this).Element.opEquals(cast(const) doc) 600 && epilog == doc.epilog; 601 } 602 603 /* 604 * Compares two Documents 605 * 606 * You should rarely need to call this function. It exists so that 607 * Documents can be used as associative array keys. 608 * 609 * Example: 610 * -------------- 611 * Document d1,d2; 612 * if (d1 < d2) { } 613 * -------------- 614 */ 615 override int opCmp(scope const Object o) scope const 616 { 617 const doc = toType!(const Document)(o); 618 if (prolog != doc.prolog) 619 return prolog < doc.prolog ? -1 : 1; 620 if (int cmp = this.Element.opCmp(doc)) 621 return cmp; 622 if (epilog != doc.epilog) 623 return epilog < doc.epilog ? -1 : 1; 624 return 0; 625 } 626 627 /* 628 * Returns the hash of a Document 629 * 630 * You should rarely need to call this function. It exists so that 631 * Documents can be used as associative array keys. 632 */ 633 override size_t toHash() scope const @trusted 634 { 635 return hash(prolog, hash(epilog, (cast() this).Element.toHash())); 636 } 637 638 /* 639 * Returns the string representation of a Document. (That is, the 640 * complete XML of a document). 641 */ 642 override string toString() scope const @safe 643 { 644 return prolog ~ super.toString() ~ epilog; 645 } 646 } 647 } 648 649 @system unittest 650 { 651 // https://issues.dlang.org/show_bug.cgi?id=14966 652 auto xml = `<?xml version="1.0" encoding="UTF-8"?><foo></foo>`; 653 654 auto a = new Document(xml); 655 auto b = new Document(xml); 656 assert(a == b); 657 assert(!(a < b)); 658 int[Document] aa; 659 aa[a] = 1; 660 assert(aa[b] == 1); 661 662 b ~= new Element("b"); 663 assert(a < b); 664 assert(b > a); 665 } 666 667 /* 668 * Class representing an XML element. 669 * 670 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 671 */ 672 class Element : Item 673 { 674 Tag tag; // The start tag of the element 675 Item[] items; // The element's items 676 Text[] texts; // The element's text items 677 CData[] cdatas; // The element's CData items 678 Comment[] comments; // The element's comments 679 ProcessingInstruction[] pis; // The element's processing instructions 680 Element[] elements; // The element's child elements 681 682 /* 683 * Constructs an Element given a name and a string to be used as a Text 684 * interior. 685 * 686 * Params: 687 * name = the name of the element. 688 * interior = (optional) the string interior. 689 * 690 * Example: 691 * ------------------------------------------------------- 692 * auto element = new Element("title","Serenity") 693 * // constructs the element <title>Serenity</title> 694 * ------------------------------------------------------- 695 */ 696 this(string name, string interior=null) @safe pure 697 { 698 this(new Tag(name)); 699 if (interior.length != 0) opOpAssign!("~")(new Text(interior)); 700 } 701 702 /* 703 * Constructs an Element from a Tag. 704 * 705 * Params: 706 * tag_ = the start or empty tag of the element. 707 */ 708 this(const(Tag) tag_) @safe pure 709 { 710 this.tag = new Tag(tag_.name); 711 tag.type = TagType.EMPTY; 712 foreach (k,v;tag_.attr) tag.attr[k] = v; 713 tag.tagString = tag_.tagString; 714 } 715 716 /* 717 * Append a text item to the interior of this element 718 * 719 * Params: 720 * item = the item you wish to append. 721 * 722 * Example: 723 * -------------- 724 * Element element; 725 * element ~= new Text("hello"); 726 * -------------- 727 */ 728 void opOpAssign(string op)(Text item) @safe pure 729 if (op == "~") 730 { 731 texts ~= item; 732 appendItem(item); 733 } 734 735 /* 736 * Append a CData item to the interior of this element 737 * 738 * Params: 739 * item = the item you wish to append. 740 * 741 * Example: 742 * -------------- 743 * Element element; 744 * element ~= new CData("hello"); 745 * -------------- 746 */ 747 void opOpAssign(string op)(CData item) @safe pure 748 if (op == "~") 749 { 750 cdatas ~= item; 751 appendItem(item); 752 } 753 754 /* 755 * Append a comment to the interior of this element 756 * 757 * Params: 758 * item = the item you wish to append. 759 * 760 * Example: 761 * -------------- 762 * Element element; 763 * element ~= new Comment("hello"); 764 * -------------- 765 */ 766 void opOpAssign(string op)(Comment item) @safe pure 767 if (op == "~") 768 { 769 comments ~= item; 770 appendItem(item); 771 } 772 773 /* 774 * Append a processing instruction to the interior of this element 775 * 776 * Params: 777 * item = the item you wish to append. 778 * 779 * Example: 780 * -------------- 781 * Element element; 782 * element ~= new ProcessingInstruction("hello"); 783 * -------------- 784 */ 785 void opOpAssign(string op)(ProcessingInstruction item) @safe pure 786 if (op == "~") 787 { 788 pis ~= item; 789 appendItem(item); 790 } 791 792 /* 793 * Append a complete element to the interior of this element 794 * 795 * Params: 796 * item = the item you wish to append. 797 * 798 * Example: 799 * -------------- 800 * Element element; 801 * Element other = new Element("br"); 802 * element ~= other; 803 * // appends element representing <br /> 804 * -------------- 805 */ 806 void opOpAssign(string op)(Element item) @safe pure 807 if (op == "~") 808 { 809 elements ~= item; 810 appendItem(item); 811 } 812 813 private void appendItem(Item item) @safe pure 814 { 815 items ~= item; 816 if (tag.type == TagType.EMPTY && !item.isEmptyXML) 817 tag.type = TagType.START; 818 } 819 820 private void parse(ElementParser xml) 821 { 822 xml.onText = (string s) { opOpAssign!("~")(new Text(s)); }; 823 xml.onCData = (string s) { opOpAssign!("~")(new CData(s)); }; 824 xml.onComment = (string s) { opOpAssign!("~")(new Comment(s)); }; 825 xml.onPI = (string s) { opOpAssign!("~")(new ProcessingInstruction(s)); }; 826 827 xml.onStartTag[null] = (ElementParser xml) 828 { 829 auto e = new Element(xml.tag); 830 e.parse(xml); 831 opOpAssign!("~")(e); 832 }; 833 834 xml.parse(); 835 } 836 837 /* 838 * Compares two Elements for equality 839 * 840 * Example: 841 * -------------- 842 * Element e1,e2; 843 * if (e1 == e2) { } 844 * -------------- 845 */ 846 override bool opEquals(scope const Object o) const 847 { 848 const element = toType!(const Element)(o); 849 immutable len = items.length; 850 if (len != element.items.length) return false; 851 foreach (i; 0 .. len) 852 { 853 if (!items[i].opEquals(element.items[i])) return false; 854 } 855 return true; 856 } 857 858 /* 859 * Compares two Elements 860 * 861 * You should rarely need to call this function. It exists so that Elements 862 * can be used as associative array keys. 863 * 864 * Example: 865 * -------------- 866 * Element e1,e2; 867 * if (e1 < e2) { } 868 * -------------- 869 */ 870 override int opCmp(scope const Object o) @safe const 871 { 872 const element = toType!(const Element)(o); 873 for (uint i=0; ; ++i) 874 { 875 if (i == items.length && i == element.items.length) return 0; 876 if (i == items.length) return -1; 877 if (i == element.items.length) return 1; 878 if (!items[i].opEquals(element.items[i])) 879 return items[i].opCmp(element.items[i]); 880 } 881 } 882 883 /* 884 * Returns the hash of an Element 885 * 886 * You should rarely need to call this function. It exists so that Elements 887 * can be used as associative array keys. 888 */ 889 override size_t toHash() scope const @safe 890 { 891 size_t hash = tag.toHash(); 892 foreach (item;items) hash += item.toHash(); 893 return hash; 894 } 895 896 const 897 { 898 /* 899 * Returns the decoded interior of an element. 900 * 901 * The element is assumed to contain text <i>only</i>. So, for 902 * example, given XML such as "<title>Good &amp; 903 * Bad</title>", will return "Good & Bad". 904 * 905 * Params: 906 * mode = (optional) Mode to use for decoding. (Defaults to LOOSE). 907 * 908 * Throws: DecodeException if decode fails 909 */ 910 string text(DecodeMode mode=DecodeMode.LOOSE) 911 { 912 string buffer; 913 foreach (item;items) 914 { 915 Text t = cast(Text) item; 916 if (t is null) throw new DecodeException(item.toString()); 917 buffer ~= decode(t.toString(),mode); 918 } 919 return buffer; 920 } 921 922 /* 923 * Returns an indented string representation of this item 924 * 925 * Params: 926 * indent = (optional) number of spaces by which to indent this 927 * element. Defaults to 2. 928 */ 929 override string[] pretty(uint indent=2) scope 930 { 931 import std.algorithm.searching : count; 932 import std.string : rightJustify; 933 934 if (isEmptyXML) return [ tag.toEmptyString() ]; 935 936 if (items.length == 1) 937 { 938 auto t = cast(const(Text))(items[0]); 939 if (t !is null) 940 { 941 return [tag.toStartString() ~ t.toString() ~ tag.toEndString()]; 942 } 943 } 944 945 string[] a = [ tag.toStartString() ]; 946 foreach (item;items) 947 { 948 string[] b = item.pretty(indent); 949 foreach (s;b) 950 { 951 a ~= rightJustify(s,count(s) + indent); 952 } 953 } 954 a ~= tag.toEndString(); 955 return a; 956 } 957 958 /* 959 * Returns the string representation of an Element 960 * 961 * Example: 962 * -------------- 963 * auto element = new Element("br"); 964 * writefln(element.toString()); // writes "<br />" 965 * -------------- 966 */ 967 override string toString() scope @safe 968 { 969 if (isEmptyXML) return tag.toEmptyString(); 970 971 string buffer = tag.toStartString(); 972 foreach (item;items) { buffer ~= item.toString(); } 973 buffer ~= tag.toEndString(); 974 return buffer; 975 } 976 977 override @property @safe pure @nogc nothrow bool isEmptyXML() const scope { return items.length == 0; } 978 } 979 } 980 981 /* 982 * Tag types. 983 * 984 * $(DDOC_ENUM_MEMBERS START) Used for start tags 985 * $(DDOC_ENUM_MEMBERS END) Used for end tags 986 * $(DDOC_ENUM_MEMBERS EMPTY) Used for empty tags 987 * 988 */ 989 enum TagType { START, END, EMPTY } 990 991 /* 992 * Class representing an XML tag. 993 * 994 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 995 * 996 * The class invariant guarantees 997 * <ul> 998 * <li> that $(B type) is a valid enum TagType value</li> 999 * <li> that $(B name) consists of valid characters</li> 1000 * <li> that each attribute name consists of valid characters</li> 1001 * </ul> 1002 */ 1003 class Tag 1004 { 1005 TagType type = TagType.START; // Type of tag 1006 string name; // Tag name 1007 string[string] attr; // Associative array of attributes 1008 private string tagString; 1009 1010 invariant() 1011 { 1012 string s; 1013 string t; 1014 1015 assert(type == TagType.START 1016 || type == TagType.END 1017 || type == TagType.EMPTY); 1018 1019 s = name; 1020 try { checkName(s,t); } 1021 catch (Err e) { assert(false,"Invalid tag name:" ~ e.toString()); } 1022 1023 foreach (k,v;attr) 1024 { 1025 s = k; 1026 try { checkName(s,t); } 1027 catch (Err e) 1028 { assert(false,"Invalid attribute name:" ~ e.toString()); } 1029 } 1030 } 1031 1032 /* 1033 * Constructs an instance of Tag with a specified name and type 1034 * 1035 * The constructor does not initialize the attributes. To initialize the 1036 * attributes, you access the $(B attr) member variable. 1037 * 1038 * Params: 1039 * name = the Tag's name 1040 * type = (optional) the Tag's type. If omitted, defaults to 1041 * TagType.START. 1042 * 1043 * Example: 1044 * -------------- 1045 * auto tag = new Tag("img",Tag.EMPTY); 1046 * tag.attr["src"] = "http://example.com/example.jpg"; 1047 * -------------- 1048 */ 1049 this(string name, TagType type=TagType.START) @safe pure 1050 { 1051 this.name = name; 1052 this.type = type; 1053 } 1054 1055 /* Private constructor (so don't ddoc this!) 1056 * 1057 * Constructs a Tag by parsing the string representation, e.g. "<html>". 1058 * 1059 * The string is passed by reference, and is advanced over all characters 1060 * consumed. 1061 * 1062 * The second parameter is a dummy parameter only, required solely to 1063 * distinguish this constructor from the public one. 1064 */ 1065 private this(ref string s, bool dummy) @safe pure 1066 { 1067 import std.algorithm.searching : countUntil; 1068 import std.ascii : isWhite; 1069 import std.utf : byCodeUnit; 1070 1071 tagString = s; 1072 try 1073 { 1074 reqc(s,'<'); 1075 if (optc(s,'/')) type = TagType.END; 1076 ptrdiff_t i = s.byCodeUnit.countUntil(">", "/>", " ", "\t", "\v", "\r", "\n", "\f"); 1077 name = s[0 .. i]; 1078 s = s[i .. $]; 1079 1080 i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 1081 s = s[i .. $]; 1082 1083 while (s.length > 0 && s[0] != '>' && s[0] != '/') 1084 { 1085 i = s.byCodeUnit.countUntil("=", " ", "\t", "\v", "\r", "\n", "\f"); 1086 string key = s[0 .. i]; 1087 s = s[i .. $]; 1088 1089 i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 1090 s = s[i .. $]; 1091 reqc(s,'='); 1092 i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 1093 s = s[i .. $]; 1094 1095 immutable char quote = requireOneOf(s,"'\""); 1096 i = s.byCodeUnit.countUntil(quote); 1097 string val = decode(s[0 .. i], DecodeMode.LOOSE); 1098 s = s[i .. $]; 1099 reqc(s,quote); 1100 1101 i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 1102 s = s[i .. $]; 1103 attr[key] = val; 1104 } 1105 if (optc(s,'/')) 1106 { 1107 if (type == TagType.END) throw new TagException(""); 1108 type = TagType.EMPTY; 1109 } 1110 reqc(s,'>'); 1111 tagString.length = tagString.length - s.length; 1112 } 1113 catch (XMLException e) 1114 { 1115 tagString.length = tagString.length - s.length; 1116 throw new TagException(tagString); 1117 } 1118 } 1119 1120 const 1121 { 1122 /* 1123 * Compares two Tags for equality 1124 * 1125 * You should rarely need to call this function. It exists so that Tags 1126 * can be used as associative array keys. 1127 * 1128 * Example: 1129 * -------------- 1130 * Tag tag1,tag2 1131 * if (tag1 == tag2) { } 1132 * -------------- 1133 */ 1134 override bool opEquals(scope Object o) 1135 { 1136 const tag = toType!(const Tag)(o); 1137 return 1138 (name != tag.name) ? false : ( 1139 (attr != tag.attr) ? false : ( 1140 (type != tag.type) ? false : ( 1141 true ))); 1142 } 1143 1144 /* 1145 * Compares two Tags 1146 * 1147 * Example: 1148 * -------------- 1149 * Tag tag1,tag2 1150 * if (tag1 < tag2) { } 1151 * -------------- 1152 */ 1153 override int opCmp(Object o) 1154 { 1155 const tag = toType!(const Tag)(o); 1156 // Note that attr is an AA, so the comparison is nonsensical (bug 10381) 1157 return 1158 ((name != tag.name) ? ( name < tag.name ? -1 : 1 ) : 1159 ((attr != tag.attr) ? ( cast(void *) attr < cast(void*) tag.attr ? -1 : 1 ) : 1160 ((type != tag.type) ? ( type < tag.type ? -1 : 1 ) : 1161 0 ))); 1162 } 1163 1164 /* 1165 * Returns the hash of a Tag 1166 * 1167 * You should rarely need to call this function. It exists so that Tags 1168 * can be used as associative array keys. 1169 */ 1170 override size_t toHash() 1171 { 1172 return .hashOf(name); 1173 } 1174 1175 /* 1176 * Returns the string representation of a Tag 1177 * 1178 * Example: 1179 * -------------- 1180 * auto tag = new Tag("book",TagType.START); 1181 * writefln(tag.toString()); // writes "<book>" 1182 * -------------- 1183 */ 1184 override string toString() @safe 1185 { 1186 if (isEmpty) return toEmptyString(); 1187 return (isEnd) ? toEndString() : toStartString(); 1188 } 1189 1190 private 1191 { 1192 string toNonEndString() @safe 1193 { 1194 import std.format : format; 1195 1196 string s = "<" ~ name; 1197 foreach (key,val;attr) 1198 s ~= format(" %s=\"%s\"",key,encode(val)); 1199 return s; 1200 } 1201 1202 string toStartString() @safe { return toNonEndString() ~ ">"; } 1203 1204 string toEndString() @safe { return "</" ~ name ~ ">"; } 1205 1206 string toEmptyString() @safe { return toNonEndString() ~ " />"; } 1207 } 1208 1209 /* 1210 * Returns true if the Tag is a start tag 1211 * 1212 * Example: 1213 * -------------- 1214 * if (tag.isStart) { } 1215 * -------------- 1216 */ 1217 @property bool isStart() @safe @nogc pure nothrow { return type == TagType.START; } 1218 1219 /* 1220 * Returns true if the Tag is an end tag 1221 * 1222 * Example: 1223 * -------------- 1224 * if (tag.isEnd) { } 1225 * -------------- 1226 */ 1227 @property bool isEnd() @safe @nogc pure nothrow { return type == TagType.END; } 1228 1229 /* 1230 * Returns true if the Tag is an empty tag 1231 * 1232 * Example: 1233 * -------------- 1234 * if (tag.isEmpty) { } 1235 * -------------- 1236 */ 1237 @property bool isEmpty() @safe @nogc pure nothrow { return type == TagType.EMPTY; } 1238 } 1239 } 1240 1241 /* 1242 * Class representing a comment 1243 */ 1244 class Comment : Item 1245 { 1246 private string content; 1247 1248 /* 1249 * Construct a comment 1250 * 1251 * Params: 1252 * content = the body of the comment 1253 * 1254 * Throws: CommentException if the comment body is illegal (contains "--" 1255 * or exactly equals "-") 1256 * 1257 * Example: 1258 * -------------- 1259 * auto item = new Comment("This is a comment"); 1260 * // constructs <!--This is a comment--> 1261 * -------------- 1262 */ 1263 this(string content) @safe pure 1264 { 1265 import std.string : indexOf; 1266 1267 if (content == "-" || content.indexOf("--") != -1) 1268 throw new CommentException(content); 1269 this.content = content; 1270 } 1271 1272 /* 1273 * Compares two comments for equality 1274 * 1275 * Example: 1276 * -------------- 1277 * Comment item1,item2; 1278 * if (item1 == item2) { } 1279 * -------------- 1280 */ 1281 override bool opEquals(scope const Object o) const 1282 { 1283 const item = toType!(const Item)(o); 1284 const t = cast(const Comment) item; 1285 return t !is null && content == t.content; 1286 } 1287 1288 /* 1289 * Compares two comments 1290 * 1291 * You should rarely need to call this function. It exists so that Comments 1292 * can be used as associative array keys. 1293 * 1294 * Example: 1295 * -------------- 1296 * Comment item1,item2; 1297 * if (item1 < item2) { } 1298 * -------------- 1299 */ 1300 override int opCmp(scope const Object o) scope const 1301 { 1302 const item = toType!(const Item)(o); 1303 const t = cast(const Comment) item; 1304 return t !is null && (content != t.content 1305 ? (content < t.content ? -1 : 1 ) : 0 ); 1306 } 1307 1308 /* 1309 * Returns the hash of a Comment 1310 * 1311 * You should rarely need to call this function. It exists so that Comments 1312 * can be used as associative array keys. 1313 */ 1314 override size_t toHash() scope const nothrow { return hash(content); } 1315 1316 /* 1317 * Returns a string representation of this comment 1318 */ 1319 override string toString() scope const @safe pure nothrow { return "<!--" ~ content ~ "-->"; } 1320 1321 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } // Returns false always 1322 } 1323 1324 // https://issues.dlang.org/show_bug.cgi?id=16241 1325 @safe unittest 1326 { 1327 import std.exception : assertThrown; 1328 auto c = new Comment("=="); 1329 assert(c.content == "=="); 1330 assertThrown!CommentException(new Comment("--")); 1331 } 1332 1333 /* 1334 * Class representing a Character Data section 1335 */ 1336 class CData : Item 1337 { 1338 private string content; 1339 1340 /* 1341 * Construct a character data section 1342 * 1343 * Params: 1344 * content = the body of the character data segment 1345 * 1346 * Throws: CDataException if the segment body is illegal (contains "]]>") 1347 * 1348 * Example: 1349 * -------------- 1350 * auto item = new CData("<b>hello</b>"); 1351 * // constructs <![CDATA[<b>hello</b>]]> 1352 * -------------- 1353 */ 1354 this(string content) @safe pure 1355 { 1356 import std.string : indexOf; 1357 if (content.indexOf("]]>") != -1) throw new CDataException(content); 1358 this.content = content; 1359 } 1360 1361 /* 1362 * Compares two CDatas for equality 1363 * 1364 * Example: 1365 * -------------- 1366 * CData item1,item2; 1367 * if (item1 == item2) { } 1368 * -------------- 1369 */ 1370 override bool opEquals(scope const Object o) const 1371 { 1372 const item = toType!(const Item)(o); 1373 const t = cast(const CData) item; 1374 return t !is null && content == t.content; 1375 } 1376 1377 /* 1378 * Compares two CDatas 1379 * 1380 * You should rarely need to call this function. It exists so that CDatas 1381 * can be used as associative array keys. 1382 * 1383 * Example: 1384 * -------------- 1385 * CData item1,item2; 1386 * if (item1 < item2) { } 1387 * -------------- 1388 */ 1389 override int opCmp(scope const Object o) scope const 1390 { 1391 const item = toType!(const Item)(o); 1392 const t = cast(const CData) item; 1393 return t !is null && (content != t.content 1394 ? (content < t.content ? -1 : 1 ) : 0 ); 1395 } 1396 1397 /* 1398 * Returns the hash of a CData 1399 * 1400 * You should rarely need to call this function. It exists so that CDatas 1401 * can be used as associative array keys. 1402 */ 1403 override size_t toHash() scope const nothrow { return hash(content); } 1404 1405 /* 1406 * Returns a string representation of this CData section 1407 */ 1408 override string toString() scope const @safe pure nothrow { return cdata ~ content ~ "]]>"; } 1409 1410 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } // Returns false always 1411 } 1412 1413 /* 1414 * Class representing a text (aka Parsed Character Data) section 1415 */ 1416 class Text : Item 1417 { 1418 private string content; 1419 1420 /* 1421 * Construct a text (aka PCData) section 1422 * 1423 * Params: 1424 * content = the text. This function encodes the text before 1425 * insertion, so it is safe to insert any text 1426 * 1427 * Example: 1428 * -------------- 1429 * auto Text = new CData("a < b"); 1430 * // constructs a < b 1431 * -------------- 1432 */ 1433 this(string content) @safe pure 1434 { 1435 this.content = encode(content); 1436 } 1437 1438 /* 1439 * Compares two text sections for equality 1440 * 1441 * Example: 1442 * -------------- 1443 * Text item1,item2; 1444 * if (item1 == item2) { } 1445 * -------------- 1446 */ 1447 override bool opEquals(scope const Object o) const 1448 { 1449 const item = toType!(const Item)(o); 1450 const t = cast(const Text) item; 1451 return t !is null && content == t.content; 1452 } 1453 1454 /* 1455 * Compares two text sections 1456 * 1457 * You should rarely need to call this function. It exists so that Texts 1458 * can be used as associative array keys. 1459 * 1460 * Example: 1461 * -------------- 1462 * Text item1,item2; 1463 * if (item1 < item2) { } 1464 * -------------- 1465 */ 1466 override int opCmp(scope const Object o) scope const 1467 { 1468 const item = toType!(const Item)(o); 1469 const t = cast(const Text) item; 1470 return t !is null 1471 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); 1472 } 1473 1474 /* 1475 * Returns the hash of a text section 1476 * 1477 * You should rarely need to call this function. It exists so that Texts 1478 * can be used as associative array keys. 1479 */ 1480 override size_t toHash() scope const nothrow { return hash(content); } 1481 1482 /* 1483 * Returns a string representation of this Text section 1484 */ 1485 override string toString() scope const @safe @nogc pure nothrow { return content; } 1486 1487 /* 1488 * Returns true if the content is the empty string 1489 */ 1490 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return content.length == 0; } 1491 } 1492 1493 /* 1494 * Class representing an XML Instruction section 1495 */ 1496 class XMLInstruction : Item 1497 { 1498 private string content; 1499 1500 /* 1501 * Construct an XML Instruction section 1502 * 1503 * Params: 1504 * content = the body of the instruction segment 1505 * 1506 * Throws: XIException if the segment body is illegal (contains ">") 1507 * 1508 * Example: 1509 * -------------- 1510 * auto item = new XMLInstruction("ATTLIST"); 1511 * // constructs <!ATTLIST> 1512 * -------------- 1513 */ 1514 this(string content) @safe pure 1515 { 1516 import std.string : indexOf; 1517 if (content.indexOf(">") != -1) throw new XIException(content); 1518 this.content = content; 1519 } 1520 1521 /* 1522 * Compares two XML instructions for equality 1523 * 1524 * Example: 1525 * -------------- 1526 * XMLInstruction item1,item2; 1527 * if (item1 == item2) { } 1528 * -------------- 1529 */ 1530 override bool opEquals(scope const Object o) const 1531 { 1532 const item = toType!(const Item)(o); 1533 const t = cast(const XMLInstruction) item; 1534 return t !is null && content == t.content; 1535 } 1536 1537 /* 1538 * Compares two XML instructions 1539 * 1540 * You should rarely need to call this function. It exists so that 1541 * XmlInstructions can be used as associative array keys. 1542 * 1543 * Example: 1544 * -------------- 1545 * XMLInstruction item1,item2; 1546 * if (item1 < item2) { } 1547 * -------------- 1548 */ 1549 override int opCmp(scope const Object o) scope const 1550 { 1551 const item = toType!(const Item)(o); 1552 const t = cast(const XMLInstruction) item; 1553 return t !is null 1554 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); 1555 } 1556 1557 /* 1558 * Returns the hash of an XMLInstruction 1559 * 1560 * You should rarely need to call this function. It exists so that 1561 * XmlInstructions can be used as associative array keys. 1562 */ 1563 override size_t toHash() scope const nothrow { return hash(content); } 1564 1565 /* 1566 * Returns a string representation of this XmlInstruction 1567 */ 1568 override string toString() scope const @safe pure nothrow { return "<!" ~ content ~ ">"; } 1569 1570 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } // Returns false always 1571 } 1572 1573 /* 1574 * Class representing a Processing Instruction section 1575 */ 1576 class ProcessingInstruction : Item 1577 { 1578 private string content; 1579 1580 /* 1581 * Construct a Processing Instruction section 1582 * 1583 * Params: 1584 * content = the body of the instruction segment 1585 * 1586 * Throws: PIException if the segment body is illegal (contains "?>") 1587 * 1588 * Example: 1589 * -------------- 1590 * auto item = new ProcessingInstruction("php"); 1591 * // constructs <?php?> 1592 * -------------- 1593 */ 1594 this(string content) @safe pure 1595 { 1596 import std.string : indexOf; 1597 if (content.indexOf("?>") != -1) throw new PIException(content); 1598 this.content = content; 1599 } 1600 1601 /* 1602 * Compares two processing instructions for equality 1603 * 1604 * Example: 1605 * -------------- 1606 * ProcessingInstruction item1,item2; 1607 * if (item1 == item2) { } 1608 * -------------- 1609 */ 1610 override bool opEquals(scope const Object o) const 1611 { 1612 const item = toType!(const Item)(o); 1613 const t = cast(const ProcessingInstruction) item; 1614 return t !is null && content == t.content; 1615 } 1616 1617 /* 1618 * Compares two processing instructions 1619 * 1620 * You should rarely need to call this function. It exists so that 1621 * ProcessingInstructions can be used as associative array keys. 1622 * 1623 * Example: 1624 * -------------- 1625 * ProcessingInstruction item1,item2; 1626 * if (item1 < item2) { } 1627 * -------------- 1628 */ 1629 override int opCmp(scope const Object o) scope const 1630 { 1631 const item = toType!(const Item)(o); 1632 const t = cast(const ProcessingInstruction) item; 1633 return t !is null 1634 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); 1635 } 1636 1637 /* 1638 * Returns the hash of a ProcessingInstruction 1639 * 1640 * You should rarely need to call this function. It exists so that 1641 * ProcessingInstructions can be used as associative array keys. 1642 */ 1643 override size_t toHash() scope const nothrow { return hash(content); } 1644 1645 /* 1646 * Returns a string representation of this ProcessingInstruction 1647 */ 1648 override string toString() scope const @safe pure nothrow { return "<?" ~ content ~ "?>"; } 1649 1650 override @property @safe @nogc pure nothrow bool isEmptyXML() scope const { return false; } // Returns false always 1651 } 1652 1653 /* 1654 * Abstract base class for XML items 1655 */ 1656 abstract class Item 1657 { 1658 // Compares with another Item of same type for equality 1659 abstract override bool opEquals(scope const Object o) @safe const; 1660 1661 // Compares with another Item of same type 1662 abstract override int opCmp(scope const Object o) @safe const; 1663 1664 // Returns the hash of this item 1665 abstract override size_t toHash() @safe scope const; 1666 1667 // Returns a string representation of this item 1668 abstract override string toString() @safe scope const; 1669 1670 /* 1671 * Returns an indented string representation of this item 1672 * 1673 * Params: 1674 * indent = number of spaces by which to indent child elements 1675 */ 1676 string[] pretty(uint indent) @safe scope const 1677 { 1678 import std.string : strip; 1679 string s = strip(toString()); 1680 return s.length == 0 ? [] : [ s ]; 1681 } 1682 1683 // Returns true if the item represents empty XML text 1684 abstract @property @safe @nogc pure nothrow bool isEmptyXML() scope const; 1685 } 1686 1687 /* 1688 * Class for parsing an XML Document. 1689 * 1690 * This is a subclass of ElementParser. Most of the useful functions are 1691 * documented there. 1692 * 1693 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 1694 * 1695 * Bugs: 1696 * Currently only supports UTF documents. 1697 * 1698 * If there is an encoding attribute in the prolog, it is ignored. 1699 * 1700 */ 1701 class DocumentParser : ElementParser 1702 { 1703 string xmlText; 1704 1705 /* 1706 * Constructs a DocumentParser. 1707 * 1708 * The input to this function MUST be valid XML. 1709 * This is enforced by the function's in contract. 1710 * 1711 * Params: 1712 * xmlText_ = the entire XML document as text 1713 * 1714 */ 1715 this(string xmlText_) 1716 in 1717 { 1718 assert(xmlText_.length != 0); 1719 try 1720 { 1721 // Confirm that the input is valid XML 1722 check(xmlText_); 1723 } 1724 catch (CheckException e) 1725 { 1726 // And if it's not, tell the user why not 1727 assert(false, "\n" ~ e.toString()); 1728 } 1729 } 1730 do 1731 { 1732 xmlText = xmlText_; 1733 s = &xmlText; 1734 super(); // Initialize everything 1735 parse(); // Parse through the root tag (but not beyond) 1736 } 1737 } 1738 1739 @system unittest 1740 { 1741 auto doc = new Document("<root><child><grandchild/></child></root>"); 1742 assert(doc.elements.length == 1); 1743 assert(doc.elements[0].tag.name == "child"); 1744 assert(doc.items == doc.elements); 1745 } 1746 1747 /* 1748 * Class for parsing an XML element. 1749 * 1750 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 1751 * 1752 * Note that you cannot construct instances of this class directly. You can 1753 * construct a DocumentParser (which is a subclass of ElementParser), but 1754 * otherwise, Instances of ElementParser will be created for you by the 1755 * library, and passed your way via onStartTag handlers. 1756 * 1757 */ 1758 class ElementParser 1759 { 1760 alias Handler = void delegate(string); 1761 alias ElementHandler = void delegate(in Element element); 1762 alias ParserHandler = void delegate(ElementParser parser); 1763 1764 private 1765 { 1766 Tag tag_; 1767 string elementStart; 1768 string* s; 1769 1770 Handler commentHandler = null; 1771 Handler cdataHandler = null; 1772 Handler xiHandler = null; 1773 Handler piHandler = null; 1774 Handler rawTextHandler = null; 1775 Handler textHandler = null; 1776 1777 // Private constructor for start tags 1778 this(ElementParser parent) @safe @nogc pure nothrow 1779 { 1780 s = parent.s; 1781 this(); 1782 tag_ = parent.tag_; 1783 } 1784 1785 // Private constructor for empty tags 1786 this(Tag tag, string* t) @safe @nogc pure nothrow 1787 { 1788 s = t; 1789 this(); 1790 tag_ = tag; 1791 } 1792 } 1793 1794 /* 1795 * The Tag at the start of the element being parsed. You can read this to 1796 * determine the tag's name and attributes. 1797 */ 1798 @property @safe @nogc pure nothrow const(Tag) tag() const { return tag_; } 1799 1800 /* 1801 * Register a handler which will be called whenever a start tag is 1802 * encountered which matches the specified name. You can also pass null as 1803 * the name, in which case the handler will be called for any unmatched 1804 * start tag. 1805 * 1806 * Example: 1807 * -------------- 1808 * // Call this function whenever a <podcast> start tag is encountered 1809 * onStartTag["podcast"] = (ElementParser xml) 1810 * { 1811 * // Your code here 1812 * // 1813 * // This is a a closure, so code here may reference 1814 * // variables which are outside of this scope 1815 * }; 1816 * 1817 * // call myEpisodeStartHandler (defined elsewhere) whenever an <episode> 1818 * // start tag is encountered 1819 * onStartTag["episode"] = &myEpisodeStartHandler; 1820 * 1821 * // call delegate dg for all other start tags 1822 * onStartTag[null] = dg; 1823 * -------------- 1824 * 1825 * This library will supply your function with a new instance of 1826 * ElementHandler, which may be used to parse inside the element whose 1827 * start tag was just found, or to identify the tag attributes of the 1828 * element, etc. 1829 * 1830 * Note that your function will be called for both start tags and empty 1831 * tags. That is, we make no distinction between <br></br> 1832 * and <br/>. 1833 */ 1834 ParserHandler[string] onStartTag; 1835 1836 /* 1837 * Register a handler which will be called whenever an end tag is 1838 * encountered which matches the specified name. You can also pass null as 1839 * the name, in which case the handler will be called for any unmatched 1840 * end tag. 1841 * 1842 * Example: 1843 * -------------- 1844 * // Call this function whenever a </podcast> end tag is encountered 1845 * onEndTag["podcast"] = (in Element e) 1846 * { 1847 * // Your code here 1848 * // 1849 * // This is a a closure, so code here may reference 1850 * // variables which are outside of this scope 1851 * }; 1852 * 1853 * // call myEpisodeEndHandler (defined elsewhere) whenever an </episode> 1854 * // end tag is encountered 1855 * onEndTag["episode"] = &myEpisodeEndHandler; 1856 * 1857 * // call delegate dg for all other end tags 1858 * onEndTag[null] = dg; 1859 * -------------- 1860 * 1861 * Note that your function will be called for both start tags and empty 1862 * tags. That is, we make no distinction between <br></br> 1863 * and <br/>. 1864 */ 1865 ElementHandler[string] onEndTag; 1866 1867 protected this() @safe @nogc pure nothrow 1868 { 1869 elementStart = *s; 1870 } 1871 1872 /* 1873 * Register a handler which will be called whenever text is encountered. 1874 * 1875 * Example: 1876 * -------------- 1877 * // Call this function whenever text is encountered 1878 * onText = (string s) 1879 * { 1880 * // Your code here 1881 * 1882 * // The passed parameter s will have been decoded by the time you see 1883 * // it, and so may contain any character. 1884 * // 1885 * // This is a a closure, so code here may reference 1886 * // variables which are outside of this scope 1887 * }; 1888 * -------------- 1889 */ 1890 @property @safe @nogc pure nothrow void onText(Handler handler) { textHandler = handler; } 1891 1892 /* 1893 * Register an alternative handler which will be called whenever text 1894 * is encountered. This differs from onText in that onText will decode 1895 * the text, whereas onTextRaw will not. This allows you to make design 1896 * choices, since onText will be more accurate, but slower, while 1897 * onTextRaw will be faster, but less accurate. Of course, you can 1898 * still call decode() within your handler, if you want, but you'd 1899 * probably want to use onTextRaw only in circumstances where you 1900 * know that decoding is unnecessary. 1901 * 1902 * Example: 1903 * -------------- 1904 * // Call this function whenever text is encountered 1905 * onText = (string s) 1906 * { 1907 * // Your code here 1908 * 1909 * // The passed parameter s will NOT have been decoded. 1910 * // 1911 * // This is a a closure, so code here may reference 1912 * // variables which are outside of this scope 1913 * }; 1914 * -------------- 1915 */ 1916 @safe @nogc pure nothrow void onTextRaw(Handler handler) { rawTextHandler = handler; } 1917 1918 /* 1919 * Register a handler which will be called whenever a character data 1920 * segment is encountered. 1921 * 1922 * Example: 1923 * -------------- 1924 * // Call this function whenever a CData section is encountered 1925 * onCData = (string s) 1926 * { 1927 * // Your code here 1928 * 1929 * // The passed parameter s does not include the opening <![CDATA[ 1930 * // nor closing ]]> 1931 * // 1932 * // This is a a closure, so code here may reference 1933 * // variables which are outside of this scope 1934 * }; 1935 * -------------- 1936 */ 1937 @property @safe @nogc pure nothrow void onCData(Handler handler) { cdataHandler = handler; } 1938 1939 /* 1940 * Register a handler which will be called whenever a comment is 1941 * encountered. 1942 * 1943 * Example: 1944 * -------------- 1945 * // Call this function whenever a comment is encountered 1946 * onComment = (string s) 1947 * { 1948 * // Your code here 1949 * 1950 * // The passed parameter s does not include the opening <!-- nor 1951 * // closing --> 1952 * // 1953 * // This is a a closure, so code here may reference 1954 * // variables which are outside of this scope 1955 * }; 1956 * -------------- 1957 */ 1958 @property @safe @nogc pure nothrow void onComment(Handler handler) { commentHandler = handler; } 1959 1960 /* 1961 * Register a handler which will be called whenever a processing 1962 * instruction is encountered. 1963 * 1964 * Example: 1965 * -------------- 1966 * // Call this function whenever a processing instruction is encountered 1967 * onPI = (string s) 1968 * { 1969 * // Your code here 1970 * 1971 * // The passed parameter s does not include the opening <? nor 1972 * // closing ?> 1973 * // 1974 * // This is a a closure, so code here may reference 1975 * // variables which are outside of this scope 1976 * }; 1977 * -------------- 1978 */ 1979 @property @safe @nogc pure nothrow void onPI(Handler handler) { piHandler = handler; } 1980 1981 /* 1982 * Register a handler which will be called whenever an XML instruction is 1983 * encountered. 1984 * 1985 * Example: 1986 * -------------- 1987 * // Call this function whenever an XML instruction is encountered 1988 * // (Note: XML instructions may only occur preceding the root tag of a 1989 * // document). 1990 * onPI = (string s) 1991 * { 1992 * // Your code here 1993 * 1994 * // The passed parameter s does not include the opening <! nor 1995 * // closing > 1996 * // 1997 * // This is a a closure, so code here may reference 1998 * // variables which are outside of this scope 1999 * }; 2000 * -------------- 2001 */ 2002 @property @safe @nogc pure nothrow void onXI(Handler handler) { xiHandler = handler; } 2003 2004 /* 2005 * Parse an XML element. 2006 * 2007 * Parsing will continue until the end of the current element. Any items 2008 * encountered for which a handler has been registered will invoke that 2009 * handler. 2010 * 2011 * Throws: various kinds of XMLException 2012 */ 2013 void parse() 2014 { 2015 import std.algorithm.searching : startsWith; 2016 import std.string : indexOf; 2017 2018 string t; 2019 const Tag root = tag_; 2020 Tag[string] startTags; 2021 if (tag_ !is null) startTags[tag_.name] = tag_; 2022 2023 while (s.length != 0) 2024 { 2025 if (startsWith(*s,"<!--")) 2026 { 2027 chop(*s,4); 2028 t = chop(*s,indexOf(*s,"-->")); 2029 if (commentHandler.funcptr !is null) commentHandler(t); 2030 chop(*s,3); 2031 } 2032 else if (startsWith(*s,"<![CDATA[")) 2033 { 2034 chop(*s,9); 2035 t = chop(*s,indexOf(*s,"]]>")); 2036 if (cdataHandler.funcptr !is null) cdataHandler(t); 2037 chop(*s,3); 2038 } 2039 else if (startsWith(*s,"<!")) 2040 { 2041 chop(*s,2); 2042 t = chop(*s,indexOf(*s,">")); 2043 if (xiHandler.funcptr !is null) xiHandler(t); 2044 chop(*s,1); 2045 } 2046 else if (startsWith(*s,"<?")) 2047 { 2048 chop(*s,2); 2049 t = chop(*s,indexOf(*s,"?>")); 2050 if (piHandler.funcptr !is null) piHandler(t); 2051 chop(*s,2); 2052 } 2053 else if (startsWith(*s,"<")) 2054 { 2055 tag_ = new Tag(*s,true); 2056 if (root is null) 2057 return; // Return to constructor of derived class 2058 2059 if (tag_.isStart) 2060 { 2061 startTags[tag_.name] = tag_; 2062 2063 auto parser = new ElementParser(this); 2064 2065 auto handler = tag_.name in onStartTag; 2066 if (handler !is null) (*handler)(parser); 2067 else 2068 { 2069 handler = null in onStartTag; 2070 if (handler !is null) (*handler)(parser); 2071 } 2072 } 2073 else if (tag_.isEnd) 2074 { 2075 const startTag = startTags[tag_.name]; 2076 string text; 2077 2078 if (startTag.tagString.length == 0) 2079 assert(0); 2080 2081 immutable(char)* p = startTag.tagString.ptr 2082 + startTag.tagString.length; 2083 immutable(char)* q = &tag_.tagString[0]; 2084 text = decode(p[0..(q-p)], DecodeMode.LOOSE); 2085 2086 auto element = new Element(startTag); 2087 if (text.length != 0) element ~= new Text(text); 2088 2089 auto handler = tag_.name in onEndTag; 2090 if (handler !is null) (*handler)(element); 2091 else 2092 { 2093 handler = null in onEndTag; 2094 if (handler !is null) (*handler)(element); 2095 } 2096 2097 if (tag_.name == root.name) return; 2098 } 2099 else if (tag_.isEmpty) 2100 { 2101 Tag startTag = new Tag(tag_.name); 2102 2103 // FIX by hed010gy 2104 // https://issues.dlang.org/show_bug.cgi?id=2979 2105 if (tag_.attr.length > 0) 2106 foreach (tn,tv; tag_.attr) startTag.attr[tn]=tv; 2107 // END FIX 2108 2109 // Handle the pretend start tag 2110 string s2; 2111 auto parser = new ElementParser(startTag,&s2); 2112 auto handler1 = startTag.name in onStartTag; 2113 if (handler1 !is null) (*handler1)(parser); 2114 else 2115 { 2116 handler1 = null in onStartTag; 2117 if (handler1 !is null) (*handler1)(parser); 2118 } 2119 2120 // Handle the pretend end tag 2121 auto element = new Element(startTag); 2122 auto handler2 = tag_.name in onEndTag; 2123 if (handler2 !is null) (*handler2)(element); 2124 else 2125 { 2126 handler2 = null in onEndTag; 2127 if (handler2 !is null) (*handler2)(element); 2128 } 2129 } 2130 } 2131 else 2132 { 2133 t = chop(*s,indexOf(*s,"<")); 2134 if (rawTextHandler.funcptr !is null) 2135 rawTextHandler(t); 2136 else if (textHandler.funcptr !is null) 2137 textHandler(decode(t,DecodeMode.LOOSE)); 2138 } 2139 } 2140 } 2141 2142 /* 2143 * Returns that part of the element which has already been parsed 2144 */ 2145 override string toString() const @nogc @safe pure nothrow 2146 { 2147 assert(elementStart.length >= s.length); 2148 return elementStart[0 .. elementStart.length - s.length]; 2149 } 2150 2151 } 2152 2153 private 2154 { 2155 template Check(string msg) 2156 { 2157 string old = s; 2158 2159 void fail() @safe pure 2160 { 2161 s = old; 2162 throw new Err(s,msg); 2163 } 2164 2165 void fail(Err e) @safe pure 2166 { 2167 s = old; 2168 throw new Err(s,msg,e); 2169 } 2170 2171 void fail(string msg2) @safe pure 2172 { 2173 fail(new Err(s,msg2)); 2174 } 2175 } 2176 2177 void checkMisc(ref string s) @safe pure // rule 27 2178 { 2179 import std.algorithm.searching : startsWith; 2180 2181 mixin Check!("Misc"); 2182 2183 try 2184 { 2185 if (s.startsWith("<!--")) { checkComment(s); } 2186 else if (s.startsWith("<?")) { checkPI(s); } 2187 else { checkSpace(s); } 2188 } 2189 catch (Err e) { fail(e); } 2190 } 2191 2192 void checkDocument(ref string s) @safe pure // rule 1 2193 { 2194 mixin Check!("Document"); 2195 try 2196 { 2197 checkProlog(s); 2198 checkElement(s); 2199 star!(checkMisc)(s); 2200 } 2201 catch (Err e) { fail(e); } 2202 } 2203 2204 void checkChars(ref string s) @safe pure // rule 2 2205 { 2206 // TO DO - Fix std.utf stride and decode functions, then use those 2207 // instead 2208 import std.format : format; 2209 2210 mixin Check!("Chars"); 2211 2212 dchar c; 2213 ptrdiff_t n = -1; 2214 // 'i' must not be smaller than size_t because size_t is used internally in 2215 // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets. 2216 foreach (size_t i, dchar d; s) 2217 { 2218 if (!isChar(d)) 2219 { 2220 c = d; 2221 n = i; 2222 break; 2223 } 2224 } 2225 if (n != -1) 2226 { 2227 s = s[n..$]; 2228 fail(format("invalid character: U+%04X",c)); 2229 } 2230 } 2231 2232 void checkSpace(ref string s) @safe pure // rule 3 2233 { 2234 import std.algorithm.searching : countUntil; 2235 import std.ascii : isWhite; 2236 import std.utf : byCodeUnit; 2237 2238 mixin Check!("Whitespace"); 2239 ptrdiff_t i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 2240 if (i == -1 && s.length > 0 && isWhite(s[0])) 2241 s = s[$ .. $]; 2242 else if (i > -1) 2243 s = s[i .. $]; 2244 if (s is old) fail(); 2245 } 2246 2247 void checkName(ref string s, out string name) @safe pure // rule 5 2248 { 2249 mixin Check!("Name"); 2250 2251 if (s.length == 0) fail(); 2252 ptrdiff_t n; 2253 // 'i' must not be smaller than size_t because size_t is used internally in 2254 // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets. 2255 foreach (size_t i, dchar c; s) 2256 { 2257 if (c == '_' || c == ':' || isLetter(c)) continue; 2258 if (i == 0) fail(); 2259 if (c == '-' || c == '.' || isDigit(c) 2260 || isCombiningChar(c) || isExtender(c)) continue; 2261 n = i; 2262 break; 2263 } 2264 name = s[0 .. n]; 2265 s = s[n..$]; 2266 } 2267 2268 void checkAttValue(ref string s) @safe pure // rule 10 2269 { 2270 import std.algorithm.searching : countUntil; 2271 import std.utf : byCodeUnit; 2272 2273 mixin Check!("AttValue"); 2274 2275 if (s.length == 0) fail(); 2276 char c = s[0]; 2277 if (c != '\u0022' && c != '\u0027') 2278 fail("attribute value requires quotes"); 2279 s = s[1..$]; 2280 for (;;) 2281 { 2282 s = s[s.byCodeUnit.countUntil(c) .. $]; 2283 if (s.length == 0) fail("unterminated attribute value"); 2284 if (s[0] == '<') fail("< found in attribute value"); 2285 if (s[0] == c) break; 2286 try { checkReference(s); } catch (Err e) { fail(e); } 2287 } 2288 s = s[1..$]; 2289 } 2290 2291 void checkCharData(ref string s) @safe pure // rule 14 2292 { 2293 import std.algorithm.searching : startsWith; 2294 2295 mixin Check!("CharData"); 2296 2297 while (s.length != 0) 2298 { 2299 if (s.startsWith("&")) break; 2300 if (s.startsWith("<")) break; 2301 if (s.startsWith("]]>")) fail("]]> found within char data"); 2302 s = s[1..$]; 2303 } 2304 } 2305 2306 void checkComment(ref string s) @safe pure // rule 15 2307 { 2308 import std.string : indexOf; 2309 2310 mixin Check!("Comment"); 2311 2312 try { checkLiteral("<!--",s); } catch (Err e) { fail(e); } 2313 ptrdiff_t n = s.indexOf("--"); 2314 if (n == -1) fail("unterminated comment"); 2315 s = s[n..$]; 2316 try { checkLiteral("-->",s); } catch (Err e) { fail(e); } 2317 } 2318 2319 void checkPI(ref string s) @safe pure // rule 16 2320 { 2321 mixin Check!("PI"); 2322 2323 try 2324 { 2325 checkLiteral("<?",s); 2326 checkEnd("?>",s); 2327 } 2328 catch (Err e) { fail(e); } 2329 } 2330 2331 void checkCDSect(ref string s) @safe pure // rule 18 2332 { 2333 mixin Check!("CDSect"); 2334 2335 try 2336 { 2337 checkLiteral(cdata,s); 2338 checkEnd("]]>",s); 2339 } 2340 catch (Err e) { fail(e); } 2341 } 2342 2343 void checkProlog(ref string s) @safe pure // rule 22 2344 { 2345 mixin Check!("Prolog"); 2346 2347 try 2348 { 2349 /* The XML declaration is optional 2350 * http://www.w3.org/TR/2008/REC-xml-20081126/#NT-prolog 2351 */ 2352 opt!(checkXMLDecl)(s); 2353 2354 star!(checkMisc)(s); 2355 opt!(seq!(checkDocTypeDecl,star!(checkMisc)))(s); 2356 } 2357 catch (Err e) { fail(e); } 2358 } 2359 2360 void checkXMLDecl(ref string s) @safe pure // rule 23 2361 { 2362 mixin Check!("XMLDecl"); 2363 2364 try 2365 { 2366 checkLiteral("<?xml",s); 2367 checkVersionInfo(s); 2368 opt!(checkEncodingDecl)(s); 2369 opt!(checkSDDecl)(s); 2370 opt!(checkSpace)(s); 2371 checkLiteral("?>",s); 2372 } 2373 catch (Err e) { fail(e); } 2374 } 2375 2376 void checkVersionInfo(ref string s) @safe pure // rule 24 2377 { 2378 mixin Check!("VersionInfo"); 2379 2380 try 2381 { 2382 checkSpace(s); 2383 checkLiteral("version",s); 2384 checkEq(s); 2385 quoted!(checkVersionNum)(s); 2386 } 2387 catch (Err e) { fail(e); } 2388 } 2389 2390 void checkEq(ref string s) @safe pure // rule 25 2391 { 2392 mixin Check!("Eq"); 2393 2394 try 2395 { 2396 opt!(checkSpace)(s); 2397 checkLiteral("=",s); 2398 opt!(checkSpace)(s); 2399 } 2400 catch (Err e) { fail(e); } 2401 } 2402 2403 void checkVersionNum(ref string s) @safe pure // rule 26 2404 { 2405 import std.algorithm.searching : countUntil; 2406 import std.utf : byCodeUnit; 2407 2408 mixin Check!("VersionNum"); 2409 2410 s = s[s.byCodeUnit.countUntil('\"') .. $]; 2411 if (s is old) fail(); 2412 } 2413 2414 void checkDocTypeDecl(ref string s) @safe pure // rule 28 2415 { 2416 mixin Check!("DocTypeDecl"); 2417 2418 try 2419 { 2420 checkLiteral("<!DOCTYPE",s); 2421 // 2422 // TO DO -- ensure DOCTYPE is well formed 2423 // (But not yet. That's one of our "future directions") 2424 // 2425 checkEnd(">",s); 2426 } 2427 catch (Err e) { fail(e); } 2428 } 2429 2430 void checkSDDecl(ref string s) @safe pure // rule 32 2431 { 2432 import std.algorithm.searching : startsWith; 2433 2434 mixin Check!("SDDecl"); 2435 2436 try 2437 { 2438 checkSpace(s); 2439 checkLiteral("standalone",s); 2440 checkEq(s); 2441 } 2442 catch (Err e) { fail(e); } 2443 2444 int n = 0; 2445 if (s.startsWith("'yes'") || s.startsWith("\"yes\"")) n = 5; 2446 else if (s.startsWith("'no'" ) || s.startsWith("\"no\"" )) n = 4; 2447 else fail("standalone attribute value must be 'yes', \"yes\","~ 2448 " 'no' or \"no\""); 2449 s = s[n..$]; 2450 } 2451 2452 void checkElement(ref string s) @safe pure // rule 39 2453 { 2454 mixin Check!("Element"); 2455 2456 string sname,ename,t; 2457 try { checkTag(s,t,sname); } catch (Err e) { fail(e); } 2458 2459 if (t == "STag") 2460 { 2461 try 2462 { 2463 checkContent(s); 2464 t = s; 2465 checkETag(s,ename); 2466 } 2467 catch (Err e) { fail(e); } 2468 2469 if (sname != ename) 2470 { 2471 s = t; 2472 fail("end tag name \"" ~ ename 2473 ~ "\" differs from start tag name \""~sname~"\""); 2474 } 2475 } 2476 } 2477 2478 // rules 40 and 44 2479 void checkTag(ref string s, out string type, out string name) @safe pure 2480 { 2481 mixin Check!("Tag"); 2482 2483 try 2484 { 2485 type = "STag"; 2486 checkLiteral("<",s); 2487 checkName(s,name); 2488 star!(seq!(checkSpace,checkAttribute))(s); 2489 opt!(checkSpace)(s); 2490 if (s.length != 0 && s[0] == '/') 2491 { 2492 s = s[1..$]; 2493 type = "ETag"; 2494 } 2495 checkLiteral(">",s); 2496 } 2497 catch (Err e) { fail(e); } 2498 } 2499 2500 void checkAttribute(ref string s) @safe pure // rule 41 2501 { 2502 mixin Check!("Attribute"); 2503 2504 try 2505 { 2506 string name; 2507 checkName(s,name); 2508 checkEq(s); 2509 checkAttValue(s); 2510 } 2511 catch (Err e) { fail(e); } 2512 } 2513 2514 void checkETag(ref string s, out string name) @safe pure // rule 42 2515 { 2516 mixin Check!("ETag"); 2517 2518 try 2519 { 2520 checkLiteral("</",s); 2521 checkName(s,name); 2522 opt!(checkSpace)(s); 2523 checkLiteral(">",s); 2524 } 2525 catch (Err e) { fail(e); } 2526 } 2527 2528 void checkContent(ref string s) @safe pure // rule 43 2529 { 2530 import std.algorithm.searching : startsWith; 2531 2532 mixin Check!("Content"); 2533 2534 try 2535 { 2536 while (s.length != 0) 2537 { 2538 old = s; 2539 if (s.startsWith("&")) { checkReference(s); } 2540 else if (s.startsWith("<!--")) { checkComment(s); } 2541 else if (s.startsWith("<?")) { checkPI(s); } 2542 else if (s.startsWith(cdata)) { checkCDSect(s); } 2543 else if (s.startsWith("</")) { break; } 2544 else if (s.startsWith("<")) { checkElement(s); } 2545 else { checkCharData(s); } 2546 } 2547 } 2548 catch (Err e) { fail(e); } 2549 } 2550 2551 void checkCharRef(ref string s, out dchar c) @safe pure // rule 66 2552 { 2553 import std.format : format; 2554 2555 mixin Check!("CharRef"); 2556 2557 c = 0; 2558 try { checkLiteral("&#",s); } catch (Err e) { fail(e); } 2559 int radix = 10; 2560 if (s.length != 0 && s[0] == 'x') 2561 { 2562 s = s[1..$]; 2563 radix = 16; 2564 } 2565 if (s.length == 0) fail("unterminated character reference"); 2566 if (s[0] == ';') 2567 fail("character reference must have at least one digit"); 2568 while (s.length != 0) 2569 { 2570 immutable char d = s[0]; 2571 int n = 0; 2572 switch (d) 2573 { 2574 case 'F','f': ++n; goto case; 2575 case 'E','e': ++n; goto case; 2576 case 'D','d': ++n; goto case; 2577 case 'C','c': ++n; goto case; 2578 case 'B','b': ++n; goto case; 2579 case 'A','a': ++n; goto case; 2580 case '9': ++n; goto case; 2581 case '8': ++n; goto case; 2582 case '7': ++n; goto case; 2583 case '6': ++n; goto case; 2584 case '5': ++n; goto case; 2585 case '4': ++n; goto case; 2586 case '3': ++n; goto case; 2587 case '2': ++n; goto case; 2588 case '1': ++n; goto case; 2589 case '0': break; 2590 default: n = 100; break; 2591 } 2592 if (n >= radix) break; 2593 c *= radix; 2594 c += n; 2595 s = s[1..$]; 2596 } 2597 if (!isChar(c)) fail(format("U+%04X is not a legal character",c)); 2598 if (s.length == 0 || s[0] != ';') fail("expected ;"); 2599 else s = s[1..$]; 2600 } 2601 2602 void checkReference(ref string s) @safe pure // rule 67 2603 { 2604 import std.algorithm.searching : startsWith; 2605 2606 mixin Check!("Reference"); 2607 2608 try 2609 { 2610 dchar c; 2611 if (s.startsWith("&#")) checkCharRef(s,c); 2612 else checkEntityRef(s); 2613 } 2614 catch (Err e) { fail(e); } 2615 } 2616 2617 void checkEntityRef(ref string s) @safe pure // rule 68 2618 { 2619 mixin Check!("EntityRef"); 2620 2621 try 2622 { 2623 string name; 2624 checkLiteral("&",s); 2625 checkName(s,name); 2626 checkLiteral(";",s); 2627 } 2628 catch (Err e) { fail(e); } 2629 } 2630 2631 void checkEncName(ref string s) @safe pure // rule 81 2632 { 2633 import std.algorithm.searching : countUntil; 2634 import std.ascii : isAlpha; 2635 import std.utf : byCodeUnit; 2636 2637 mixin Check!("EncName"); 2638 2639 s = s[s.byCodeUnit.countUntil!(a => !isAlpha(a)) .. $]; 2640 if (s is old) fail(); 2641 s = s[s.byCodeUnit.countUntil('\"', '\'') .. $]; 2642 } 2643 2644 void checkEncodingDecl(ref string s) @safe pure // rule 80 2645 { 2646 mixin Check!("EncodingDecl"); 2647 2648 try 2649 { 2650 checkSpace(s); 2651 checkLiteral("encoding",s); 2652 checkEq(s); 2653 quoted!(checkEncName)(s); 2654 } 2655 catch (Err e) { fail(e); } 2656 } 2657 2658 // Helper functions 2659 2660 void checkLiteral(string literal,ref string s) @safe pure 2661 { 2662 import std.string : startsWith; 2663 2664 mixin Check!("Literal"); 2665 2666 if (!s.startsWith(literal)) fail("Expected literal \""~literal~"\""); 2667 s = s[literal.length..$]; 2668 } 2669 2670 void checkEnd(string end,ref string s) @safe pure 2671 { 2672 import std.string : indexOf; 2673 // Deliberately no mixin Check here. 2674 2675 auto n = s.indexOf(end); 2676 if (n == -1) throw new Err(s,"Unable to find terminating \""~end~"\""); 2677 s = s[n..$]; 2678 checkLiteral(end,s); 2679 } 2680 2681 // Metafunctions -- none of these use mixin Check 2682 2683 void opt(alias f)(ref string s) 2684 { 2685 try { f(s); } catch (Err e) {} 2686 } 2687 2688 void plus(alias f)(ref string s) 2689 { 2690 f(s); 2691 star!(f)(s); 2692 } 2693 2694 void star(alias f)(ref string s) 2695 { 2696 while (s.length != 0) 2697 { 2698 try { f(s); } 2699 catch (Err e) { return; } 2700 } 2701 } 2702 2703 void quoted(alias f)(ref string s) 2704 { 2705 import std.string : startsWith; 2706 2707 if (s.startsWith("'")) 2708 { 2709 checkLiteral("'",s); 2710 f(s); 2711 checkLiteral("'",s); 2712 } 2713 else 2714 { 2715 checkLiteral("\"",s); 2716 f(s); 2717 checkLiteral("\"",s); 2718 } 2719 } 2720 2721 void seq(alias f,alias g)(ref string s) 2722 { 2723 f(s); 2724 g(s); 2725 } 2726 } 2727 2728 /* 2729 * Check an entire XML document for well-formedness 2730 * 2731 * Params: 2732 * s = the document to be checked, passed as a string 2733 * 2734 * Throws: CheckException if the document is not well formed 2735 * 2736 * CheckException's toString() method will yield the complete hierarchy of 2737 * parse failure (the XML equivalent of a stack trace), giving the line and 2738 * column number of every failure at every level. 2739 */ 2740 void check(string s) @safe pure 2741 { 2742 try 2743 { 2744 checkChars(s); 2745 checkDocument(s); 2746 if (s.length != 0) throw new Err(s,"Junk found after document"); 2747 } 2748 catch (Err e) 2749 { 2750 e.complete(s); 2751 throw e; 2752 } 2753 } 2754 2755 @system pure unittest 2756 { 2757 import std.string : indexOf; 2758 2759 try 2760 { 2761 check(q"[<?xml version="1.0"?> 2762 <catalog> 2763 <book id="bk101"> 2764 <author>Gambardella, Matthew</author> 2765 <title>XML Developer's Guide</title> 2766 <genre>Computer</genre> 2767 <price>44.95</price> 2768 <publish_date>2000-10-01</publish_date> 2769 <description>An in-depth look at creating applications 2770 with XML.</description> 2771 </book> 2772 <book id="bk102"> 2773 <author>Ralls, Kim</author> 2774 <title>Midnight Rain</title> 2775 <genre>Fantasy</genres> 2776 <price>5.95</price> 2777 <publish_date>2000-12-16</publish_date> 2778 <description>A former architect battles corporate zombies, 2779 an evil sorceress, and her own childhood to become queen 2780 of the world.</description> 2781 </book> 2782 <book id="bk103"> 2783 <author>Corets, Eva</author> 2784 <title>Maeve Ascendant</title> 2785 <genre>Fantasy</genre> 2786 <price>5.95</price> 2787 <publish_date>2000-11-17</publish_date> 2788 <description>After the collapse of a nanotechnology 2789 society in England, the young survivors lay the 2790 foundation for a new society.</description> 2791 </book> 2792 </catalog> 2793 ]"); 2794 assert(false); 2795 } 2796 catch (CheckException e) 2797 { 2798 auto n = e.toString().indexOf("end tag name \"genres\" differs"~ 2799 " from start tag name \"genre\""); 2800 assert(n != -1); 2801 } 2802 } 2803 2804 @system unittest 2805 { 2806 string s = q"EOS 2807 <?xml version="1.0"?> 2808 <set> 2809 <one>A</one> 2810 <!-- comment --> 2811 <two>B</two> 2812 </set> 2813 EOS"; 2814 try 2815 { 2816 check(s); 2817 } 2818 catch (CheckException e) 2819 { 2820 assert(0, e.toString()); 2821 } 2822 } 2823 2824 @system unittest 2825 { 2826 string test_xml = `<?xml version="1.0" encoding='UTF-8'?><r><stream:stream 2827 xmlns:stream="http://etherx.'jabber'.org/streams" 2828 xmlns="jabber:'client'" from='jid.pl' id="587a5767" 2829 xml:lang="en" version="1.0" attr='a"b"c'> 2830 </stream:stream></r>`; 2831 2832 DocumentParser parser = new DocumentParser(test_xml); 2833 bool tested = false; 2834 parser.onStartTag["stream:stream"] = (ElementParser p) { 2835 assert(p.tag.attr["xmlns"] == "jabber:'client'"); 2836 assert(p.tag.attr["from"] == "jid.pl"); 2837 assert(p.tag.attr["attr"] == "a\"b\"c"); 2838 tested = true; 2839 }; 2840 parser.parse(); 2841 assert(tested); 2842 } 2843 2844 @system unittest 2845 { 2846 string s = q"EOS 2847 <?xml version="1.0" encoding="utf-8"?> <Tests> 2848 <Test thing="What & Up">What & Up Second</Test> 2849 </Tests> 2850 EOS"; 2851 auto xml = new DocumentParser(s); 2852 2853 xml.onStartTag["Test"] = (ElementParser xml) { 2854 assert(xml.tag.attr["thing"] == "What & Up"); 2855 }; 2856 2857 xml.onEndTag["Test"] = (in Element e) { 2858 assert(e.text() == "What & Up Second"); 2859 }; 2860 xml.parse(); 2861 } 2862 2863 @system unittest 2864 { 2865 string s = `<tag attr=""value>" />`; 2866 auto doc = new Document(s); 2867 assert(doc.toString() == s); 2868 } 2869 2870 /* The base class for exceptions thrown by this module */ 2871 class XMLException : Exception { this(string msg) @safe pure { super(msg); } } 2872 2873 // Other exceptions 2874 2875 // Thrown during Comment constructor 2876 class CommentException : XMLException 2877 { private this(string msg) @safe pure { super(msg); } } 2878 2879 // Thrown during CData constructor 2880 class CDataException : XMLException 2881 { private this(string msg) @safe pure { super(msg); } } 2882 2883 // Thrown during XMLInstruction constructor 2884 class XIException : XMLException 2885 { private this(string msg) @safe pure { super(msg); } } 2886 2887 // Thrown during ProcessingInstruction constructor 2888 class PIException : XMLException 2889 { private this(string msg) @safe pure { super(msg); } } 2890 2891 // Thrown during Text constructor 2892 class TextException : XMLException 2893 { private this(string msg) @safe pure { super(msg); } } 2894 2895 // Thrown during decode() 2896 class DecodeException : XMLException 2897 { private this(string msg) @safe pure { super(msg); } } 2898 2899 // Thrown if comparing with wrong type 2900 class InvalidTypeException : XMLException 2901 { private this(string msg) @safe pure { super(msg); } } 2902 2903 // Thrown when parsing for Tags 2904 class TagException : XMLException 2905 { private this(string msg) @safe pure { super(msg); } } 2906 2907 /* 2908 * Thrown during check() 2909 */ 2910 class CheckException : XMLException 2911 { 2912 CheckException err; // Parent in hierarchy 2913 private string tail; 2914 /* 2915 * Name of production rule which failed to parse, 2916 * or specific error message 2917 */ 2918 string msg; 2919 size_t line = 0; // Line number at which parse failure occurred 2920 size_t column = 0; // Column number at which parse failure occurred 2921 2922 private this(string tail,string msg,Err err=null) @safe pure 2923 { 2924 super(null); 2925 this.tail = tail; 2926 this.msg = msg; 2927 this.err = err; 2928 } 2929 2930 private void complete(string entire) @safe pure 2931 { 2932 import std.string : count, lastIndexOf; 2933 import std.utf : toUTF32; 2934 2935 string head = entire[0..$-tail.length]; 2936 ptrdiff_t n = head.lastIndexOf('\n') + 1; 2937 line = head.count("\n") + 1; 2938 dstring t = toUTF32(head[n..$]); 2939 column = t.length + 1; 2940 if (err !is null) err.complete(entire); 2941 } 2942 2943 override string toString() const @safe pure 2944 { 2945 import std.format : format; 2946 2947 string s; 2948 if (line != 0) s = format("Line %d, column %d: ",line,column); 2949 s ~= msg; 2950 s ~= '\n'; 2951 if (err !is null) s = err.toString() ~ s; 2952 return s; 2953 } 2954 } 2955 2956 private alias Err = CheckException; 2957 2958 // Private helper functions 2959 2960 private 2961 { 2962 inout(T) toType(T)(inout return scope Object o) 2963 { 2964 T t = cast(T)(o); 2965 if (t is null) 2966 { 2967 throw new InvalidTypeException("Attempt to compare a " 2968 ~ T.stringof ~ " with an instance of another type"); 2969 } 2970 return t; 2971 } 2972 2973 string chop(ref string s, size_t n) @safe pure nothrow 2974 { 2975 if (n == -1) n = s.length; 2976 string t = s[0 .. n]; 2977 s = s[n..$]; 2978 return t; 2979 } 2980 2981 bool optc(ref string s, char c) @safe pure nothrow 2982 { 2983 immutable bool b = s.length != 0 && s[0] == c; 2984 if (b) s = s[1..$]; 2985 return b; 2986 } 2987 2988 void reqc(ref string s, char c) @safe pure 2989 { 2990 if (s.length == 0 || s[0] != c) throw new TagException(""); 2991 s = s[1..$]; 2992 } 2993 2994 char requireOneOf(ref string s, string chars) @safe pure 2995 { 2996 import std.string : indexOf; 2997 2998 if (s.length == 0 || indexOf(chars,s[0]) == -1) 2999 throw new TagException(""); 3000 immutable char ch = s[0]; 3001 s = s[1..$]; 3002 return ch; 3003 } 3004 3005 alias hash = .hashOf; 3006 3007 // Definitions from the XML specification 3008 immutable CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD, 3009 0x10000,0x10FFFF]; 3010 immutable BaseCharTable=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8, 3011 0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A, 3012 0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250, 3013 0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E, 3014 0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE, 3015 0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451, 3016 0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0, 3017 0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561, 3018 0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671, 3019 0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5, 3020 0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F, 3021 0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC, 3022 0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13, 3023 0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59, 3024 0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F, 3025 0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD, 3026 0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A, 3027 0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F, 3028 0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C, 3029 0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7, 3030 0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35, 3031 0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA, 3032 0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E, 3033 0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30, 3034 0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87, 3035 0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1, 3036 0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0, 3037 0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49, 3038 0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105, 3039 0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E, 3040 0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154, 3041 0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167, 3042 0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E, 3043 0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA, 3044 0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00, 3045 0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48, 3046 0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F, 3047 0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6, 3048 0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6, 3049 0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041, 3050 0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3]; 3051 immutable IdeographicTable=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5]; 3052 immutable CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486, 3053 0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2, 3054 0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF, 3055 0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C, 3056 0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983, 3057 0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8, 3058 0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C, 3059 0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D, 3060 0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9, 3061 0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48, 3062 0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8, 3063 0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48, 3064 0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8, 3065 0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48, 3066 0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E, 3067 0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19, 3068 0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F, 3069 0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD, 3070 0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F, 3071 0x3099,0x3099,0x309A,0x309A]; 3072 immutable DigitTable=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966, 3073 0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7, 3074 0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0, 3075 0x0ED9,0x0F20,0x0F29]; 3076 immutable ExtenderTable=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387, 3077 0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031, 3078 0x3035,0x309D,0x309E,0x30FC,0x30FE]; 3079 3080 bool lookup(const(int)[] table, int c) @safe @nogc nothrow pure 3081 { 3082 while (table.length != 0) 3083 { 3084 auto m = (table.length >> 1) & ~1; 3085 if (c < table[m]) 3086 { 3087 table = table[0 .. m]; 3088 } 3089 else if (c > table[m+1]) 3090 { 3091 table = table[m+2..$]; 3092 } 3093 else return true; 3094 } 3095 return false; 3096 } 3097 3098 string startOf(string s) @safe nothrow pure 3099 { 3100 string r; 3101 foreach (char c;s) 3102 { 3103 r ~= (c < 0x20 || c > 0x7F) ? '.' : c; 3104 if (r.length >= 40) { r ~= "___"; break; } 3105 } 3106 return r; 3107 } 3108 3109 void exit(string s=null) 3110 { 3111 throw new XMLException(s); 3112 } 3113 } 3114