1bd8f1dc3Sbluhm /* Tests in the "basic" test case for the Expat test suite 2bd8f1dc3Sbluhm __ __ _ 3bd8f1dc3Sbluhm ___\ \/ /_ __ __ _| |_ 4bd8f1dc3Sbluhm / _ \\ /| '_ \ / _` | __| 5bd8f1dc3Sbluhm | __// \| |_) | (_| | |_ 6bd8f1dc3Sbluhm \___/_/\_\ .__/ \__,_|\__| 7bd8f1dc3Sbluhm |_| XML parser 8bd8f1dc3Sbluhm 9bd8f1dc3Sbluhm Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 10bd8f1dc3Sbluhm Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net> 11bd8f1dc3Sbluhm Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> 12bd8f1dc3Sbluhm Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net> 13bd8f1dc3Sbluhm Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org> 14bd8f1dc3Sbluhm Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk> 15bd8f1dc3Sbluhm Copyright (c) 2017 Joe Orton <jorton@redhat.com> 16bd8f1dc3Sbluhm Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com> 17bd8f1dc3Sbluhm Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it> 18bd8f1dc3Sbluhm Copyright (c) 2019 David Loffredo <loffredo@steptools.com> 19bd8f1dc3Sbluhm Copyright (c) 2020 Tim Gates <tim.gates@iress.com> 20bd8f1dc3Sbluhm Copyright (c) 2021 Donghee Na <donghee.na@python.org> 21bd8f1dc3Sbluhm Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com> 22bd8f1dc3Sbluhm Licensed under the MIT license: 23bd8f1dc3Sbluhm 24bd8f1dc3Sbluhm Permission is hereby granted, free of charge, to any person obtaining 25bd8f1dc3Sbluhm a copy of this software and associated documentation files (the 26bd8f1dc3Sbluhm "Software"), to deal in the Software without restriction, including 27bd8f1dc3Sbluhm without limitation the rights to use, copy, modify, merge, publish, 28bd8f1dc3Sbluhm distribute, sublicense, and/or sell copies of the Software, and to permit 29bd8f1dc3Sbluhm persons to whom the Software is furnished to do so, subject to the 30bd8f1dc3Sbluhm following conditions: 31bd8f1dc3Sbluhm 32bd8f1dc3Sbluhm The above copyright notice and this permission notice shall be included 33bd8f1dc3Sbluhm in all copies or substantial portions of the Software. 34bd8f1dc3Sbluhm 35bd8f1dc3Sbluhm THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 36bd8f1dc3Sbluhm EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 37bd8f1dc3Sbluhm MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 38bd8f1dc3Sbluhm NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 39bd8f1dc3Sbluhm DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 40bd8f1dc3Sbluhm OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 41bd8f1dc3Sbluhm USE OR OTHER DEALINGS IN THE SOFTWARE. 42bd8f1dc3Sbluhm */ 43bd8f1dc3Sbluhm 44bd8f1dc3Sbluhm #if defined(NDEBUG) 45bd8f1dc3Sbluhm # undef NDEBUG /* because test suite relies on assert(...) at the moment */ 46bd8f1dc3Sbluhm #endif 47bd8f1dc3Sbluhm 48bd8f1dc3Sbluhm #include <assert.h> 49bd8f1dc3Sbluhm 50bd8f1dc3Sbluhm #include <stdio.h> 51bd8f1dc3Sbluhm #include <string.h> 52bd8f1dc3Sbluhm #include <time.h> 53bd8f1dc3Sbluhm 54bd8f1dc3Sbluhm #if ! defined(__cplusplus) 55bd8f1dc3Sbluhm # include <stdbool.h> 56bd8f1dc3Sbluhm #endif 57bd8f1dc3Sbluhm 58bd8f1dc3Sbluhm #include "expat_config.h" 59bd8f1dc3Sbluhm 60bd8f1dc3Sbluhm #include "expat.h" 61bd8f1dc3Sbluhm #include "internal.h" 62bd8f1dc3Sbluhm #include "minicheck.h" 63bd8f1dc3Sbluhm #include "structdata.h" 64bd8f1dc3Sbluhm #include "common.h" 65bd8f1dc3Sbluhm #include "dummy.h" 66bd8f1dc3Sbluhm #include "handlers.h" 67bd8f1dc3Sbluhm #include "siphash.h" 68bd8f1dc3Sbluhm #include "basic_tests.h" 69bd8f1dc3Sbluhm 70bd8f1dc3Sbluhm static void 71bd8f1dc3Sbluhm basic_setup(void) { 72bd8f1dc3Sbluhm g_parser = XML_ParserCreate(NULL); 73bd8f1dc3Sbluhm if (g_parser == NULL) 74bd8f1dc3Sbluhm fail("Parser not created."); 75bd8f1dc3Sbluhm } 76bd8f1dc3Sbluhm 77bd8f1dc3Sbluhm /* 78bd8f1dc3Sbluhm * Character & encoding tests. 79bd8f1dc3Sbluhm */ 80bd8f1dc3Sbluhm 81bd8f1dc3Sbluhm START_TEST(test_nul_byte) { 82bd8f1dc3Sbluhm char text[] = "<doc>\0</doc>"; 83bd8f1dc3Sbluhm 84bd8f1dc3Sbluhm /* test that a NUL byte (in US-ASCII data) is an error */ 85bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 86bd8f1dc3Sbluhm == XML_STATUS_OK) 87bd8f1dc3Sbluhm fail("Parser did not report error on NUL-byte."); 88bd8f1dc3Sbluhm if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 89bd8f1dc3Sbluhm xml_failure(g_parser); 90bd8f1dc3Sbluhm } 91bd8f1dc3Sbluhm END_TEST 92bd8f1dc3Sbluhm 93bd8f1dc3Sbluhm START_TEST(test_u0000_char) { 94bd8f1dc3Sbluhm /* test that a NUL byte (in US-ASCII data) is an error */ 95bd8f1dc3Sbluhm expect_failure("<doc>�</doc>", XML_ERROR_BAD_CHAR_REF, 96bd8f1dc3Sbluhm "Parser did not report error on NUL-byte."); 97bd8f1dc3Sbluhm } 98bd8f1dc3Sbluhm END_TEST 99bd8f1dc3Sbluhm 100bd8f1dc3Sbluhm START_TEST(test_siphash_self) { 101bd8f1dc3Sbluhm if (! sip24_valid()) 102bd8f1dc3Sbluhm fail("SipHash self-test failed"); 103bd8f1dc3Sbluhm } 104bd8f1dc3Sbluhm END_TEST 105bd8f1dc3Sbluhm 106bd8f1dc3Sbluhm START_TEST(test_siphash_spec) { 107bd8f1dc3Sbluhm /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */ 108bd8f1dc3Sbluhm const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09" 109bd8f1dc3Sbluhm "\x0a\x0b\x0c\x0d\x0e"; 110bd8f1dc3Sbluhm const size_t len = sizeof(message) - 1; 111bd8f1dc3Sbluhm const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U); 112bd8f1dc3Sbluhm struct siphash state; 113bd8f1dc3Sbluhm struct sipkey key; 114bd8f1dc3Sbluhm 115bd8f1dc3Sbluhm sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09" 116bd8f1dc3Sbluhm "\x0a\x0b\x0c\x0d\x0e\x0f"); 117bd8f1dc3Sbluhm sip24_init(&state, &key); 118bd8f1dc3Sbluhm 119bd8f1dc3Sbluhm /* Cover spread across calls */ 120bd8f1dc3Sbluhm sip24_update(&state, message, 4); 121bd8f1dc3Sbluhm sip24_update(&state, message + 4, len - 4); 122bd8f1dc3Sbluhm 123bd8f1dc3Sbluhm /* Cover null length */ 124bd8f1dc3Sbluhm sip24_update(&state, message, 0); 125bd8f1dc3Sbluhm 126bd8f1dc3Sbluhm if (sip24_final(&state) != expected) 127bd8f1dc3Sbluhm fail("sip24_final failed spec test\n"); 128bd8f1dc3Sbluhm 129bd8f1dc3Sbluhm /* Cover wrapper */ 130bd8f1dc3Sbluhm if (siphash24(message, len, &key) != expected) 131bd8f1dc3Sbluhm fail("siphash24 failed spec test\n"); 132bd8f1dc3Sbluhm } 133bd8f1dc3Sbluhm END_TEST 134bd8f1dc3Sbluhm 135bd8f1dc3Sbluhm START_TEST(test_bom_utf8) { 136bd8f1dc3Sbluhm /* This test is really just making sure we don't core on a UTF-8 BOM. */ 137bd8f1dc3Sbluhm const char *text = "\357\273\277<e/>"; 138bd8f1dc3Sbluhm 139bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 140bd8f1dc3Sbluhm == XML_STATUS_ERROR) 141bd8f1dc3Sbluhm xml_failure(g_parser); 142bd8f1dc3Sbluhm } 143bd8f1dc3Sbluhm END_TEST 144bd8f1dc3Sbluhm 145bd8f1dc3Sbluhm START_TEST(test_bom_utf16_be) { 146bd8f1dc3Sbluhm char text[] = "\376\377\0<\0e\0/\0>"; 147bd8f1dc3Sbluhm 148bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 149bd8f1dc3Sbluhm == XML_STATUS_ERROR) 150bd8f1dc3Sbluhm xml_failure(g_parser); 151bd8f1dc3Sbluhm } 152bd8f1dc3Sbluhm END_TEST 153bd8f1dc3Sbluhm 154bd8f1dc3Sbluhm START_TEST(test_bom_utf16_le) { 155bd8f1dc3Sbluhm char text[] = "\377\376<\0e\0/\0>\0"; 156bd8f1dc3Sbluhm 157bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 158bd8f1dc3Sbluhm == XML_STATUS_ERROR) 159bd8f1dc3Sbluhm xml_failure(g_parser); 160bd8f1dc3Sbluhm } 161bd8f1dc3Sbluhm END_TEST 162bd8f1dc3Sbluhm 163bd8f1dc3Sbluhm START_TEST(test_nobom_utf16_le) { 164bd8f1dc3Sbluhm char text[] = " \0<\0e\0/\0>\0"; 165bd8f1dc3Sbluhm 166bd8f1dc3Sbluhm if (g_chunkSize == 1) { 167bd8f1dc3Sbluhm // TODO: with just the first byte, we can't tell the difference between 168bd8f1dc3Sbluhm // UTF-16-LE and UTF-8. Avoid the failure for now. 169bd8f1dc3Sbluhm return; 170bd8f1dc3Sbluhm } 171bd8f1dc3Sbluhm 172bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 173bd8f1dc3Sbluhm == XML_STATUS_ERROR) 174bd8f1dc3Sbluhm xml_failure(g_parser); 175bd8f1dc3Sbluhm } 176bd8f1dc3Sbluhm END_TEST 177bd8f1dc3Sbluhm 178bd8f1dc3Sbluhm START_TEST(test_hash_collision) { 179bd8f1dc3Sbluhm /* For full coverage of the lookup routine, we need to ensure a 180bd8f1dc3Sbluhm * hash collision even though we can only tell that we have one 181bd8f1dc3Sbluhm * through breakpoint debugging or coverage statistics. The 182bd8f1dc3Sbluhm * following will cause a hash collision on machines with a 64-bit 183bd8f1dc3Sbluhm * long type; others will have to experiment. The full coverage 184bd8f1dc3Sbluhm * tests invoked from qa.sh usually provide a hash collision, but 185bd8f1dc3Sbluhm * not always. This is an attempt to provide insurance. 186bd8f1dc3Sbluhm */ 187bd8f1dc3Sbluhm #define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U) 188bd8f1dc3Sbluhm const char *text 189bd8f1dc3Sbluhm = "<doc>\n" 190bd8f1dc3Sbluhm "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n" 191bd8f1dc3Sbluhm "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n" 192bd8f1dc3Sbluhm "<b5></b5><b6></b6><b7></b7><b8></b8>\n" 193bd8f1dc3Sbluhm "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n" 194bd8f1dc3Sbluhm "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n" 195bd8f1dc3Sbluhm "<d8>This triggers the table growth and collides with b2</d8>\n" 196bd8f1dc3Sbluhm "</doc>\n"; 197bd8f1dc3Sbluhm 198bd8f1dc3Sbluhm XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT); 199bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 200bd8f1dc3Sbluhm == XML_STATUS_ERROR) 201bd8f1dc3Sbluhm xml_failure(g_parser); 202bd8f1dc3Sbluhm } 203bd8f1dc3Sbluhm END_TEST 204bd8f1dc3Sbluhm #undef COLLIDING_HASH_SALT 205bd8f1dc3Sbluhm 206bd8f1dc3Sbluhm /* Regression test for SF bug #491986. */ 207bd8f1dc3Sbluhm START_TEST(test_danish_latin1) { 208bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 209bd8f1dc3Sbluhm "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>"; 210bd8f1dc3Sbluhm #ifdef XML_UNICODE 211bd8f1dc3Sbluhm const XML_Char *expected 212bd8f1dc3Sbluhm = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5"); 213bd8f1dc3Sbluhm #else 214bd8f1dc3Sbluhm const XML_Char *expected 215bd8f1dc3Sbluhm = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85"); 216bd8f1dc3Sbluhm #endif 217bd8f1dc3Sbluhm run_character_check(text, expected); 218bd8f1dc3Sbluhm } 219bd8f1dc3Sbluhm END_TEST 220bd8f1dc3Sbluhm 221bd8f1dc3Sbluhm /* Regression test for SF bug #514281. */ 222bd8f1dc3Sbluhm START_TEST(test_french_charref_hexidecimal) { 223bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 224bd8f1dc3Sbluhm "<doc>éèàçêÈ</doc>"; 225bd8f1dc3Sbluhm #ifdef XML_UNICODE 226bd8f1dc3Sbluhm const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8"); 227bd8f1dc3Sbluhm #else 228bd8f1dc3Sbluhm const XML_Char *expected 229bd8f1dc3Sbluhm = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); 230bd8f1dc3Sbluhm #endif 231bd8f1dc3Sbluhm run_character_check(text, expected); 232bd8f1dc3Sbluhm } 233bd8f1dc3Sbluhm END_TEST 234bd8f1dc3Sbluhm 235bd8f1dc3Sbluhm START_TEST(test_french_charref_decimal) { 236bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 237bd8f1dc3Sbluhm "<doc>éèàçêÈ</doc>"; 238bd8f1dc3Sbluhm #ifdef XML_UNICODE 239bd8f1dc3Sbluhm const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8"); 240bd8f1dc3Sbluhm #else 241bd8f1dc3Sbluhm const XML_Char *expected 242bd8f1dc3Sbluhm = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); 243bd8f1dc3Sbluhm #endif 244bd8f1dc3Sbluhm run_character_check(text, expected); 245bd8f1dc3Sbluhm } 246bd8f1dc3Sbluhm END_TEST 247bd8f1dc3Sbluhm 248bd8f1dc3Sbluhm START_TEST(test_french_latin1) { 249bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 250bd8f1dc3Sbluhm "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>"; 251bd8f1dc3Sbluhm #ifdef XML_UNICODE 252bd8f1dc3Sbluhm const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8"); 253bd8f1dc3Sbluhm #else 254bd8f1dc3Sbluhm const XML_Char *expected 255bd8f1dc3Sbluhm = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); 256bd8f1dc3Sbluhm #endif 257bd8f1dc3Sbluhm run_character_check(text, expected); 258bd8f1dc3Sbluhm } 259bd8f1dc3Sbluhm END_TEST 260bd8f1dc3Sbluhm 261bd8f1dc3Sbluhm START_TEST(test_french_utf8) { 262bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='utf-8'?>\n" 263bd8f1dc3Sbluhm "<doc>\xC3\xA9</doc>"; 264bd8f1dc3Sbluhm #ifdef XML_UNICODE 265bd8f1dc3Sbluhm const XML_Char *expected = XCS("\x00e9"); 266bd8f1dc3Sbluhm #else 267bd8f1dc3Sbluhm const XML_Char *expected = XCS("\xC3\xA9"); 268bd8f1dc3Sbluhm #endif 269bd8f1dc3Sbluhm run_character_check(text, expected); 270bd8f1dc3Sbluhm } 271bd8f1dc3Sbluhm END_TEST 272bd8f1dc3Sbluhm 273bd8f1dc3Sbluhm /* Regression test for SF bug #600479. 274bd8f1dc3Sbluhm XXX There should be a test that exercises all legal XML Unicode 275bd8f1dc3Sbluhm characters as PCDATA and attribute value content, and XML Name 276bd8f1dc3Sbluhm characters as part of element and attribute names. 277bd8f1dc3Sbluhm */ 278bd8f1dc3Sbluhm START_TEST(test_utf8_false_rejection) { 279bd8f1dc3Sbluhm const char *text = "<doc>\xEF\xBA\xBF</doc>"; 280bd8f1dc3Sbluhm #ifdef XML_UNICODE 281bd8f1dc3Sbluhm const XML_Char *expected = XCS("\xfebf"); 282bd8f1dc3Sbluhm #else 283bd8f1dc3Sbluhm const XML_Char *expected = XCS("\xEF\xBA\xBF"); 284bd8f1dc3Sbluhm #endif 285bd8f1dc3Sbluhm run_character_check(text, expected); 286bd8f1dc3Sbluhm } 287bd8f1dc3Sbluhm END_TEST 288bd8f1dc3Sbluhm 289bd8f1dc3Sbluhm /* Regression test for SF bug #477667. 290bd8f1dc3Sbluhm This test assures that any 8-bit character followed by a 7-bit 291bd8f1dc3Sbluhm character will not be mistakenly interpreted as a valid UTF-8 292bd8f1dc3Sbluhm sequence. 293bd8f1dc3Sbluhm */ 294bd8f1dc3Sbluhm START_TEST(test_illegal_utf8) { 295bd8f1dc3Sbluhm char text[100]; 296bd8f1dc3Sbluhm int i; 297bd8f1dc3Sbluhm 298bd8f1dc3Sbluhm for (i = 128; i <= 255; ++i) { 299bd8f1dc3Sbluhm snprintf(text, sizeof(text), "<e>%ccd</e>", i); 300bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 301bd8f1dc3Sbluhm == XML_STATUS_OK) { 302bd8f1dc3Sbluhm snprintf(text, sizeof(text), 303bd8f1dc3Sbluhm "expected token error for '%c' (ordinal %d) in UTF-8 text", i, 304bd8f1dc3Sbluhm i); 305bd8f1dc3Sbluhm fail(text); 306bd8f1dc3Sbluhm } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 307bd8f1dc3Sbluhm xml_failure(g_parser); 308bd8f1dc3Sbluhm /* Reset the parser since we use the same parser repeatedly. */ 309bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 310bd8f1dc3Sbluhm } 311bd8f1dc3Sbluhm } 312bd8f1dc3Sbluhm END_TEST 313bd8f1dc3Sbluhm 314bd8f1dc3Sbluhm /* Examples, not masks: */ 315bd8f1dc3Sbluhm #define UTF8_LEAD_1 "\x7f" /* 0b01111111 */ 316bd8f1dc3Sbluhm #define UTF8_LEAD_2 "\xdf" /* 0b11011111 */ 317bd8f1dc3Sbluhm #define UTF8_LEAD_3 "\xef" /* 0b11101111 */ 318bd8f1dc3Sbluhm #define UTF8_LEAD_4 "\xf7" /* 0b11110111 */ 319bd8f1dc3Sbluhm #define UTF8_FOLLOW "\xbf" /* 0b10111111 */ 320bd8f1dc3Sbluhm 321bd8f1dc3Sbluhm START_TEST(test_utf8_auto_align) { 322bd8f1dc3Sbluhm struct TestCase { 323bd8f1dc3Sbluhm ptrdiff_t expectedMovementInChars; 324bd8f1dc3Sbluhm const char *input; 325bd8f1dc3Sbluhm }; 326bd8f1dc3Sbluhm 327bd8f1dc3Sbluhm struct TestCase cases[] = { 328bd8f1dc3Sbluhm {00, ""}, 329bd8f1dc3Sbluhm 330bd8f1dc3Sbluhm {00, UTF8_LEAD_1}, 331bd8f1dc3Sbluhm 332bd8f1dc3Sbluhm {-1, UTF8_LEAD_2}, 333bd8f1dc3Sbluhm {00, UTF8_LEAD_2 UTF8_FOLLOW}, 334bd8f1dc3Sbluhm 335bd8f1dc3Sbluhm {-1, UTF8_LEAD_3}, 336bd8f1dc3Sbluhm {-2, UTF8_LEAD_3 UTF8_FOLLOW}, 337bd8f1dc3Sbluhm {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW}, 338bd8f1dc3Sbluhm 339bd8f1dc3Sbluhm {-1, UTF8_LEAD_4}, 340bd8f1dc3Sbluhm {-2, UTF8_LEAD_4 UTF8_FOLLOW}, 341bd8f1dc3Sbluhm {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW}, 342bd8f1dc3Sbluhm {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW}, 343bd8f1dc3Sbluhm }; 344bd8f1dc3Sbluhm 345bd8f1dc3Sbluhm size_t i = 0; 346bd8f1dc3Sbluhm bool success = true; 347bd8f1dc3Sbluhm for (; i < sizeof(cases) / sizeof(*cases); i++) { 348bd8f1dc3Sbluhm const char *fromLim = cases[i].input + strlen(cases[i].input); 349bd8f1dc3Sbluhm const char *const fromLimInitially = fromLim; 350bd8f1dc3Sbluhm ptrdiff_t actualMovementInChars; 351bd8f1dc3Sbluhm 352bd8f1dc3Sbluhm _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim); 353bd8f1dc3Sbluhm 354bd8f1dc3Sbluhm actualMovementInChars = (fromLim - fromLimInitially); 355bd8f1dc3Sbluhm if (actualMovementInChars != cases[i].expectedMovementInChars) { 356bd8f1dc3Sbluhm size_t j = 0; 357bd8f1dc3Sbluhm success = false; 358bd8f1dc3Sbluhm printf("[-] UTF-8 case %2u: Expected movement by %2d chars" 359bd8f1dc3Sbluhm ", actually moved by %2d chars: \"", 360bd8f1dc3Sbluhm (unsigned)(i + 1), (int)cases[i].expectedMovementInChars, 361bd8f1dc3Sbluhm (int)actualMovementInChars); 362bd8f1dc3Sbluhm for (; j < strlen(cases[i].input); j++) { 363bd8f1dc3Sbluhm printf("\\x%02x", (unsigned char)cases[i].input[j]); 364bd8f1dc3Sbluhm } 365bd8f1dc3Sbluhm printf("\"\n"); 366bd8f1dc3Sbluhm } 367bd8f1dc3Sbluhm } 368bd8f1dc3Sbluhm 369bd8f1dc3Sbluhm if (! success) { 370bd8f1dc3Sbluhm fail("UTF-8 auto-alignment is not bullet-proof\n"); 371bd8f1dc3Sbluhm } 372bd8f1dc3Sbluhm } 373bd8f1dc3Sbluhm END_TEST 374bd8f1dc3Sbluhm 375bd8f1dc3Sbluhm START_TEST(test_utf16) { 376bd8f1dc3Sbluhm /* <?xml version="1.0" encoding="UTF-16"?> 377bd8f1dc3Sbluhm * <doc a='123'>some {A} text</doc> 378bd8f1dc3Sbluhm * 379bd8f1dc3Sbluhm * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A 380bd8f1dc3Sbluhm */ 381bd8f1dc3Sbluhm char text[] 382bd8f1dc3Sbluhm = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o" 383bd8f1dc3Sbluhm "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o" 384bd8f1dc3Sbluhm "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066" 385bd8f1dc3Sbluhm "\000'\000?\000>\000\n" 386bd8f1dc3Sbluhm "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>" 387bd8f1dc3Sbluhm "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000" 388bd8f1dc3Sbluhm "<\000/\000d\000o\000c\000>"; 389bd8f1dc3Sbluhm #ifdef XML_UNICODE 390bd8f1dc3Sbluhm const XML_Char *expected = XCS("some \xff21 text"); 391bd8f1dc3Sbluhm #else 392bd8f1dc3Sbluhm const XML_Char *expected = XCS("some \357\274\241 text"); 393bd8f1dc3Sbluhm #endif 394bd8f1dc3Sbluhm CharData storage; 395bd8f1dc3Sbluhm 396bd8f1dc3Sbluhm CharData_Init(&storage); 397bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 398bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, accumulate_characters); 399bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 400bd8f1dc3Sbluhm == XML_STATUS_ERROR) 401bd8f1dc3Sbluhm xml_failure(g_parser); 402bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 403bd8f1dc3Sbluhm } 404bd8f1dc3Sbluhm END_TEST 405bd8f1dc3Sbluhm 406bd8f1dc3Sbluhm START_TEST(test_utf16_le_epilog_newline) { 407bd8f1dc3Sbluhm unsigned int first_chunk_bytes = 17; 408bd8f1dc3Sbluhm char text[] = "\xFF\xFE" /* BOM */ 409bd8f1dc3Sbluhm "<\000e\000/\000>\000" /* document element */ 410bd8f1dc3Sbluhm "\r\000\n\000\r\000\n\000"; /* epilog */ 411bd8f1dc3Sbluhm 412bd8f1dc3Sbluhm if (first_chunk_bytes >= sizeof(text) - 1) 413bd8f1dc3Sbluhm fail("bad value of first_chunk_bytes"); 414bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, first_chunk_bytes, XML_FALSE) 415bd8f1dc3Sbluhm == XML_STATUS_ERROR) 416bd8f1dc3Sbluhm xml_failure(g_parser); 417bd8f1dc3Sbluhm else { 418bd8f1dc3Sbluhm enum XML_Status rc; 419bd8f1dc3Sbluhm rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes, 420bd8f1dc3Sbluhm sizeof(text) - first_chunk_bytes - 1, 421bd8f1dc3Sbluhm XML_TRUE); 422bd8f1dc3Sbluhm if (rc == XML_STATUS_ERROR) 423bd8f1dc3Sbluhm xml_failure(g_parser); 424bd8f1dc3Sbluhm } 425bd8f1dc3Sbluhm } 426bd8f1dc3Sbluhm END_TEST 427bd8f1dc3Sbluhm 428bd8f1dc3Sbluhm /* Test that an outright lie in the encoding is faulted */ 429bd8f1dc3Sbluhm START_TEST(test_not_utf16) { 430bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='utf-16'?>" 431bd8f1dc3Sbluhm "<doc>Hi</doc>"; 432bd8f1dc3Sbluhm 433bd8f1dc3Sbluhm /* Use a handler to provoke the appropriate code paths */ 434bd8f1dc3Sbluhm XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler); 435bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_INCORRECT_ENCODING, 436bd8f1dc3Sbluhm "UTF-16 declared in UTF-8 not faulted"); 437bd8f1dc3Sbluhm } 438bd8f1dc3Sbluhm END_TEST 439bd8f1dc3Sbluhm 440bd8f1dc3Sbluhm /* Test that an unknown encoding is rejected */ 441bd8f1dc3Sbluhm START_TEST(test_bad_encoding) { 442bd8f1dc3Sbluhm const char *text = "<doc>Hi</doc>"; 443bd8f1dc3Sbluhm 444bd8f1dc3Sbluhm if (! XML_SetEncoding(g_parser, XCS("unknown-encoding"))) 445bd8f1dc3Sbluhm fail("XML_SetEncoding failed"); 446bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 447bd8f1dc3Sbluhm "Unknown encoding not faulted"); 448bd8f1dc3Sbluhm } 449bd8f1dc3Sbluhm END_TEST 450bd8f1dc3Sbluhm 451bd8f1dc3Sbluhm /* Regression test for SF bug #481609, #774028. */ 452bd8f1dc3Sbluhm START_TEST(test_latin1_umlauts) { 453bd8f1dc3Sbluhm const char *text 454bd8f1dc3Sbluhm = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 455bd8f1dc3Sbluhm "<e a='\xE4 \xF6 \xFC ä ö ü ä ö ü >'\n" 456bd8f1dc3Sbluhm " >\xE4 \xF6 \xFC ä ö ü ä ö ü ></e>"; 457bd8f1dc3Sbluhm #ifdef XML_UNICODE 458bd8f1dc3Sbluhm /* Expected results in UTF-16 */ 459bd8f1dc3Sbluhm const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ") 460bd8f1dc3Sbluhm XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >"); 461bd8f1dc3Sbluhm #else 462bd8f1dc3Sbluhm /* Expected results in UTF-8 */ 463bd8f1dc3Sbluhm const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") 464bd8f1dc3Sbluhm XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >"); 465bd8f1dc3Sbluhm #endif 466bd8f1dc3Sbluhm 467bd8f1dc3Sbluhm run_character_check(text, expected); 468bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 469bd8f1dc3Sbluhm run_attribute_check(text, expected); 470bd8f1dc3Sbluhm /* Repeat with a default handler */ 471bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 472bd8f1dc3Sbluhm XML_SetDefaultHandler(g_parser, dummy_default_handler); 473bd8f1dc3Sbluhm run_character_check(text, expected); 474bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 475bd8f1dc3Sbluhm XML_SetDefaultHandler(g_parser, dummy_default_handler); 476bd8f1dc3Sbluhm run_attribute_check(text, expected); 477bd8f1dc3Sbluhm } 478bd8f1dc3Sbluhm END_TEST 479bd8f1dc3Sbluhm 480bd8f1dc3Sbluhm /* Test that an element name with a 4-byte UTF-8 character is rejected */ 481bd8f1dc3Sbluhm START_TEST(test_long_utf8_character) { 482bd8f1dc3Sbluhm const char *text 483bd8f1dc3Sbluhm = "<?xml version='1.0' encoding='utf-8'?>\n" 484bd8f1dc3Sbluhm /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */ 485bd8f1dc3Sbluhm "<do\xf0\x90\x80\x80/>"; 486bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_INVALID_TOKEN, 487bd8f1dc3Sbluhm "4-byte UTF-8 character in element name not faulted"); 488bd8f1dc3Sbluhm } 489bd8f1dc3Sbluhm END_TEST 490bd8f1dc3Sbluhm 491bd8f1dc3Sbluhm /* Test that a long latin-1 attribute (too long to convert in one go) 492bd8f1dc3Sbluhm * is correctly converted 493bd8f1dc3Sbluhm */ 494bd8f1dc3Sbluhm START_TEST(test_long_latin1_attribute) { 495bd8f1dc3Sbluhm const char *text 496bd8f1dc3Sbluhm = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 497bd8f1dc3Sbluhm "<doc att='" 498bd8f1dc3Sbluhm /* 64 characters per line */ 499bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 500bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 501bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 502bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 503bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 504bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 505bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 506bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 507bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 508bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 509bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 510bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 511bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 512bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 513bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 514bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" 515bd8f1dc3Sbluhm /* Last character splits across a buffer boundary */ 516bd8f1dc3Sbluhm "\xe4'>\n</doc>"; 517bd8f1dc3Sbluhm 518bd8f1dc3Sbluhm const XML_Char *expected = 519bd8f1dc3Sbluhm /* 64 characters per line */ 520bd8f1dc3Sbluhm /* clang-format off */ 521bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 522bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 523bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 524bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 525bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 526bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 527bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 528bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 529bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 530bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 531bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 532bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 533bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 534bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 535bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 536bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO") 537bd8f1dc3Sbluhm /* clang-format on */ 538bd8f1dc3Sbluhm #ifdef XML_UNICODE 539bd8f1dc3Sbluhm XCS("\x00e4"); 540bd8f1dc3Sbluhm #else 541bd8f1dc3Sbluhm XCS("\xc3\xa4"); 542bd8f1dc3Sbluhm #endif 543bd8f1dc3Sbluhm 544bd8f1dc3Sbluhm run_attribute_check(text, expected); 545bd8f1dc3Sbluhm } 546bd8f1dc3Sbluhm END_TEST 547bd8f1dc3Sbluhm 548bd8f1dc3Sbluhm /* Test that a long ASCII attribute (too long to convert in one go) 549bd8f1dc3Sbluhm * is correctly converted 550bd8f1dc3Sbluhm */ 551bd8f1dc3Sbluhm START_TEST(test_long_ascii_attribute) { 552bd8f1dc3Sbluhm const char *text 553bd8f1dc3Sbluhm = "<?xml version='1.0' encoding='us-ascii'?>\n" 554bd8f1dc3Sbluhm "<doc att='" 555bd8f1dc3Sbluhm /* 64 characters per line */ 556bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 557bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 558bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 559bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 560bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 561bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 562bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 563bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 564bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 565bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 566bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 567bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 568bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 569bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 570bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 571bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 572bd8f1dc3Sbluhm "01234'>\n</doc>"; 573bd8f1dc3Sbluhm const XML_Char *expected = 574bd8f1dc3Sbluhm /* 64 characters per line */ 575bd8f1dc3Sbluhm /* clang-format off */ 576bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 577bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 578bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 579bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 580bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 581bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 582bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 583bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 584bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 585bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 586bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 587bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 588bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 589bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 590bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 591bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 592bd8f1dc3Sbluhm XCS("01234"); 593bd8f1dc3Sbluhm /* clang-format on */ 594bd8f1dc3Sbluhm 595bd8f1dc3Sbluhm run_attribute_check(text, expected); 596bd8f1dc3Sbluhm } 597bd8f1dc3Sbluhm END_TEST 598bd8f1dc3Sbluhm 599bd8f1dc3Sbluhm /* Regression test #1 for SF bug #653180. */ 600bd8f1dc3Sbluhm START_TEST(test_line_number_after_parse) { 601bd8f1dc3Sbluhm const char *text = "<tag>\n" 602bd8f1dc3Sbluhm "\n" 603bd8f1dc3Sbluhm "\n</tag>"; 604bd8f1dc3Sbluhm XML_Size lineno; 605bd8f1dc3Sbluhm 606bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 607bd8f1dc3Sbluhm == XML_STATUS_ERROR) 608bd8f1dc3Sbluhm xml_failure(g_parser); 609bd8f1dc3Sbluhm lineno = XML_GetCurrentLineNumber(g_parser); 610bd8f1dc3Sbluhm if (lineno != 4) { 611bd8f1dc3Sbluhm char buffer[100]; 612bd8f1dc3Sbluhm snprintf(buffer, sizeof(buffer), 613bd8f1dc3Sbluhm "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno); 614bd8f1dc3Sbluhm fail(buffer); 615bd8f1dc3Sbluhm } 616bd8f1dc3Sbluhm } 617bd8f1dc3Sbluhm END_TEST 618bd8f1dc3Sbluhm 619bd8f1dc3Sbluhm /* Regression test #2 for SF bug #653180. */ 620bd8f1dc3Sbluhm START_TEST(test_column_number_after_parse) { 621bd8f1dc3Sbluhm const char *text = "<tag></tag>"; 622bd8f1dc3Sbluhm XML_Size colno; 623bd8f1dc3Sbluhm 624bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 625bd8f1dc3Sbluhm == XML_STATUS_ERROR) 626bd8f1dc3Sbluhm xml_failure(g_parser); 627bd8f1dc3Sbluhm colno = XML_GetCurrentColumnNumber(g_parser); 628bd8f1dc3Sbluhm if (colno != 11) { 629bd8f1dc3Sbluhm char buffer[100]; 630bd8f1dc3Sbluhm snprintf(buffer, sizeof(buffer), 631bd8f1dc3Sbluhm "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno); 632bd8f1dc3Sbluhm fail(buffer); 633bd8f1dc3Sbluhm } 634bd8f1dc3Sbluhm } 635bd8f1dc3Sbluhm END_TEST 636bd8f1dc3Sbluhm 637bd8f1dc3Sbluhm /* Regression test #3 for SF bug #653180. */ 638bd8f1dc3Sbluhm START_TEST(test_line_and_column_numbers_inside_handlers) { 639bd8f1dc3Sbluhm const char *text = "<a>\n" /* Unix end-of-line */ 640bd8f1dc3Sbluhm " <b>\r\n" /* Windows end-of-line */ 641bd8f1dc3Sbluhm " <c/>\r" /* Mac OS end-of-line */ 642bd8f1dc3Sbluhm " </b>\n" 643bd8f1dc3Sbluhm " <d>\n" 644bd8f1dc3Sbluhm " <f/>\n" 645bd8f1dc3Sbluhm " </d>\n" 646bd8f1dc3Sbluhm "</a>"; 647bd8f1dc3Sbluhm const StructDataEntry expected[] 648bd8f1dc3Sbluhm = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG}, 649bd8f1dc3Sbluhm {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG}, 650bd8f1dc3Sbluhm {XCS("b"), 2, 4, STRUCT_END_TAG}, {XCS("d"), 2, 5, STRUCT_START_TAG}, 651bd8f1dc3Sbluhm {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG}, 652bd8f1dc3Sbluhm {XCS("d"), 2, 7, STRUCT_END_TAG}, {XCS("a"), 0, 8, STRUCT_END_TAG}}; 653bd8f1dc3Sbluhm const int expected_count = sizeof(expected) / sizeof(StructDataEntry); 654bd8f1dc3Sbluhm StructData storage; 655bd8f1dc3Sbluhm 656bd8f1dc3Sbluhm StructData_Init(&storage); 657bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 658bd8f1dc3Sbluhm XML_SetStartElementHandler(g_parser, start_element_event_handler2); 659bd8f1dc3Sbluhm XML_SetEndElementHandler(g_parser, end_element_event_handler2); 660bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 661bd8f1dc3Sbluhm == XML_STATUS_ERROR) 662bd8f1dc3Sbluhm xml_failure(g_parser); 663bd8f1dc3Sbluhm 664bd8f1dc3Sbluhm StructData_CheckItems(&storage, expected, expected_count); 665bd8f1dc3Sbluhm StructData_Dispose(&storage); 666bd8f1dc3Sbluhm } 667bd8f1dc3Sbluhm END_TEST 668bd8f1dc3Sbluhm 669bd8f1dc3Sbluhm /* Regression test #4 for SF bug #653180. */ 670bd8f1dc3Sbluhm START_TEST(test_line_number_after_error) { 671bd8f1dc3Sbluhm const char *text = "<a>\n" 672bd8f1dc3Sbluhm " <b>\n" 673bd8f1dc3Sbluhm " </a>"; /* missing </b> */ 674bd8f1dc3Sbluhm XML_Size lineno; 675bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 676bd8f1dc3Sbluhm != XML_STATUS_ERROR) 677bd8f1dc3Sbluhm fail("Expected a parse error"); 678bd8f1dc3Sbluhm 679bd8f1dc3Sbluhm lineno = XML_GetCurrentLineNumber(g_parser); 680bd8f1dc3Sbluhm if (lineno != 3) { 681bd8f1dc3Sbluhm char buffer[100]; 682bd8f1dc3Sbluhm snprintf(buffer, sizeof(buffer), 683bd8f1dc3Sbluhm "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno); 684bd8f1dc3Sbluhm fail(buffer); 685bd8f1dc3Sbluhm } 686bd8f1dc3Sbluhm } 687bd8f1dc3Sbluhm END_TEST 688bd8f1dc3Sbluhm 689bd8f1dc3Sbluhm /* Regression test #5 for SF bug #653180. */ 690bd8f1dc3Sbluhm START_TEST(test_column_number_after_error) { 691bd8f1dc3Sbluhm const char *text = "<a>\n" 692bd8f1dc3Sbluhm " <b>\n" 693bd8f1dc3Sbluhm " </a>"; /* missing </b> */ 694bd8f1dc3Sbluhm XML_Size colno; 695bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 696bd8f1dc3Sbluhm != XML_STATUS_ERROR) 697bd8f1dc3Sbluhm fail("Expected a parse error"); 698bd8f1dc3Sbluhm 699bd8f1dc3Sbluhm colno = XML_GetCurrentColumnNumber(g_parser); 700bd8f1dc3Sbluhm if (colno != 4) { 701bd8f1dc3Sbluhm char buffer[100]; 702bd8f1dc3Sbluhm snprintf(buffer, sizeof(buffer), 703bd8f1dc3Sbluhm "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno); 704bd8f1dc3Sbluhm fail(buffer); 705bd8f1dc3Sbluhm } 706bd8f1dc3Sbluhm } 707bd8f1dc3Sbluhm END_TEST 708bd8f1dc3Sbluhm 709bd8f1dc3Sbluhm /* Regression test for SF bug #478332. */ 710bd8f1dc3Sbluhm START_TEST(test_really_long_lines) { 711bd8f1dc3Sbluhm /* This parses an input line longer than INIT_DATA_BUF_SIZE 712bd8f1dc3Sbluhm characters long (defined to be 1024 in xmlparse.c). We take a 713bd8f1dc3Sbluhm really cheesy approach to building the input buffer, because 714bd8f1dc3Sbluhm this avoids writing bugs in buffer-filling code. 715bd8f1dc3Sbluhm */ 716bd8f1dc3Sbluhm const char *text 717bd8f1dc3Sbluhm = "<e>" 718bd8f1dc3Sbluhm /* 64 chars */ 719bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 720bd8f1dc3Sbluhm /* until we have at least 1024 characters on the line: */ 721bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 722bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 723bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 724bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 725bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 726bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 727bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 728bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 729bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 730bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 731bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 732bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 733bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 734bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 735bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 736bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 737bd8f1dc3Sbluhm "</e>"; 738bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 739bd8f1dc3Sbluhm == XML_STATUS_ERROR) 740bd8f1dc3Sbluhm xml_failure(g_parser); 741bd8f1dc3Sbluhm } 742bd8f1dc3Sbluhm END_TEST 743bd8f1dc3Sbluhm 744bd8f1dc3Sbluhm /* Test cdata processing across a buffer boundary */ 745bd8f1dc3Sbluhm START_TEST(test_really_long_encoded_lines) { 746bd8f1dc3Sbluhm /* As above, except that we want to provoke an output buffer 747bd8f1dc3Sbluhm * overflow with a non-trivial encoding. For this we need to pass 748bd8f1dc3Sbluhm * the whole cdata in one go, not byte-by-byte. 749bd8f1dc3Sbluhm */ 750bd8f1dc3Sbluhm void *buffer; 751bd8f1dc3Sbluhm const char *text 752bd8f1dc3Sbluhm = "<?xml version='1.0' encoding='iso-8859-1'?>" 753bd8f1dc3Sbluhm "<e>" 754bd8f1dc3Sbluhm /* 64 chars */ 755bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 756bd8f1dc3Sbluhm /* until we have at least 1024 characters on the line: */ 757bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 758bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 759bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 760bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 761bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 762bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 763bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 764bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 765bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 766bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 767bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 768bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 769bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 770bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 771bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 772bd8f1dc3Sbluhm "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 773bd8f1dc3Sbluhm "</e>"; 774bd8f1dc3Sbluhm int parse_len = (int)strlen(text); 775bd8f1dc3Sbluhm 776bd8f1dc3Sbluhm /* Need a cdata handler to provoke the code path we want to test */ 777bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler); 778bd8f1dc3Sbluhm buffer = XML_GetBuffer(g_parser, parse_len); 779bd8f1dc3Sbluhm if (buffer == NULL) 780bd8f1dc3Sbluhm fail("Could not allocate parse buffer"); 781bd8f1dc3Sbluhm assert(buffer != NULL); 782bd8f1dc3Sbluhm memcpy(buffer, text, parse_len); 783bd8f1dc3Sbluhm if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR) 784bd8f1dc3Sbluhm xml_failure(g_parser); 785bd8f1dc3Sbluhm } 786bd8f1dc3Sbluhm END_TEST 787bd8f1dc3Sbluhm 788bd8f1dc3Sbluhm /* 789bd8f1dc3Sbluhm * Element event tests. 790bd8f1dc3Sbluhm */ 791bd8f1dc3Sbluhm 792bd8f1dc3Sbluhm START_TEST(test_end_element_events) { 793bd8f1dc3Sbluhm const char *text = "<a><b><c/></b><d><f/></d></a>"; 794bd8f1dc3Sbluhm const XML_Char *expected = XCS("/c/b/f/d/a"); 795bd8f1dc3Sbluhm CharData storage; 796bd8f1dc3Sbluhm 797bd8f1dc3Sbluhm CharData_Init(&storage); 798bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 799bd8f1dc3Sbluhm XML_SetEndElementHandler(g_parser, end_element_event_handler); 800bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 801bd8f1dc3Sbluhm == XML_STATUS_ERROR) 802bd8f1dc3Sbluhm xml_failure(g_parser); 803bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 804bd8f1dc3Sbluhm } 805bd8f1dc3Sbluhm END_TEST 806bd8f1dc3Sbluhm 807bd8f1dc3Sbluhm /* 808bd8f1dc3Sbluhm * Attribute tests. 809bd8f1dc3Sbluhm */ 810bd8f1dc3Sbluhm 811bd8f1dc3Sbluhm /* Helper used by the following tests; this checks any "attr" and "refs" 812bd8f1dc3Sbluhm attributes to make sure whitespace has been normalized. 813bd8f1dc3Sbluhm 814bd8f1dc3Sbluhm Return true if whitespace has been normalized in a string, using 815bd8f1dc3Sbluhm the rules for attribute value normalization. The 'is_cdata' flag 816bd8f1dc3Sbluhm is needed since CDATA attributes don't need to have multiple 817bd8f1dc3Sbluhm whitespace characters collapsed to a single space, while other 818bd8f1dc3Sbluhm attribute data types do. (Section 3.3.3 of the recommendation.) 819bd8f1dc3Sbluhm */ 820bd8f1dc3Sbluhm static int 821bd8f1dc3Sbluhm is_whitespace_normalized(const XML_Char *s, int is_cdata) { 822bd8f1dc3Sbluhm int blanks = 0; 823bd8f1dc3Sbluhm int at_start = 1; 824bd8f1dc3Sbluhm while (*s) { 825bd8f1dc3Sbluhm if (*s == XCS(' ')) 826bd8f1dc3Sbluhm ++blanks; 827bd8f1dc3Sbluhm else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r')) 828bd8f1dc3Sbluhm return 0; 829bd8f1dc3Sbluhm else { 830bd8f1dc3Sbluhm if (at_start) { 831bd8f1dc3Sbluhm at_start = 0; 832bd8f1dc3Sbluhm if (blanks && ! is_cdata) 833bd8f1dc3Sbluhm /* illegal leading blanks */ 834bd8f1dc3Sbluhm return 0; 835bd8f1dc3Sbluhm } else if (blanks > 1 && ! is_cdata) 836bd8f1dc3Sbluhm return 0; 837bd8f1dc3Sbluhm blanks = 0; 838bd8f1dc3Sbluhm } 839bd8f1dc3Sbluhm ++s; 840bd8f1dc3Sbluhm } 841bd8f1dc3Sbluhm if (blanks && ! is_cdata) 842bd8f1dc3Sbluhm return 0; 843bd8f1dc3Sbluhm return 1; 844bd8f1dc3Sbluhm } 845bd8f1dc3Sbluhm 846bd8f1dc3Sbluhm /* Check the attribute whitespace checker: */ 847bd8f1dc3Sbluhm START_TEST(test_helper_is_whitespace_normalized) { 848bd8f1dc3Sbluhm assert(is_whitespace_normalized(XCS("abc"), 0)); 849bd8f1dc3Sbluhm assert(is_whitespace_normalized(XCS("abc"), 1)); 850bd8f1dc3Sbluhm assert(is_whitespace_normalized(XCS("abc def ghi"), 0)); 851bd8f1dc3Sbluhm assert(is_whitespace_normalized(XCS("abc def ghi"), 1)); 852bd8f1dc3Sbluhm assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0)); 853bd8f1dc3Sbluhm assert(is_whitespace_normalized(XCS(" abc def ghi"), 1)); 854bd8f1dc3Sbluhm assert(! is_whitespace_normalized(XCS("abc def ghi"), 0)); 855bd8f1dc3Sbluhm assert(is_whitespace_normalized(XCS("abc def ghi"), 1)); 856bd8f1dc3Sbluhm assert(! is_whitespace_normalized(XCS("abc def ghi "), 0)); 857bd8f1dc3Sbluhm assert(is_whitespace_normalized(XCS("abc def ghi "), 1)); 858bd8f1dc3Sbluhm assert(! is_whitespace_normalized(XCS(" "), 0)); 859bd8f1dc3Sbluhm assert(is_whitespace_normalized(XCS(" "), 1)); 860bd8f1dc3Sbluhm assert(! is_whitespace_normalized(XCS("\t"), 0)); 861bd8f1dc3Sbluhm assert(! is_whitespace_normalized(XCS("\t"), 1)); 862bd8f1dc3Sbluhm assert(! is_whitespace_normalized(XCS("\n"), 0)); 863bd8f1dc3Sbluhm assert(! is_whitespace_normalized(XCS("\n"), 1)); 864bd8f1dc3Sbluhm assert(! is_whitespace_normalized(XCS("\r"), 0)); 865bd8f1dc3Sbluhm assert(! is_whitespace_normalized(XCS("\r"), 1)); 866bd8f1dc3Sbluhm assert(! is_whitespace_normalized(XCS("abc\t def"), 1)); 867bd8f1dc3Sbluhm } 868bd8f1dc3Sbluhm END_TEST 869bd8f1dc3Sbluhm 870bd8f1dc3Sbluhm static void XMLCALL 871bd8f1dc3Sbluhm check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name, 872bd8f1dc3Sbluhm const XML_Char **atts) { 873bd8f1dc3Sbluhm int i; 874bd8f1dc3Sbluhm UNUSED_P(userData); 875bd8f1dc3Sbluhm UNUSED_P(name); 876bd8f1dc3Sbluhm for (i = 0; atts[i] != NULL; i += 2) { 877bd8f1dc3Sbluhm const XML_Char *attrname = atts[i]; 878bd8f1dc3Sbluhm const XML_Char *value = atts[i + 1]; 879bd8f1dc3Sbluhm if (xcstrcmp(XCS("attr"), attrname) == 0 880bd8f1dc3Sbluhm || xcstrcmp(XCS("ents"), attrname) == 0 881bd8f1dc3Sbluhm || xcstrcmp(XCS("refs"), attrname) == 0) { 882bd8f1dc3Sbluhm if (! is_whitespace_normalized(value, 0)) { 883bd8f1dc3Sbluhm char buffer[256]; 884bd8f1dc3Sbluhm snprintf(buffer, sizeof(buffer), 885bd8f1dc3Sbluhm "attribute value not normalized: %" XML_FMT_STR 886bd8f1dc3Sbluhm "='%" XML_FMT_STR "'", 887bd8f1dc3Sbluhm attrname, value); 888bd8f1dc3Sbluhm fail(buffer); 889bd8f1dc3Sbluhm } 890bd8f1dc3Sbluhm } 891bd8f1dc3Sbluhm } 892bd8f1dc3Sbluhm } 893bd8f1dc3Sbluhm 894bd8f1dc3Sbluhm START_TEST(test_attr_whitespace_normalization) { 895bd8f1dc3Sbluhm const char *text 896bd8f1dc3Sbluhm = "<!DOCTYPE doc [\n" 897bd8f1dc3Sbluhm " <!ATTLIST doc\n" 898bd8f1dc3Sbluhm " attr NMTOKENS #REQUIRED\n" 899bd8f1dc3Sbluhm " ents ENTITIES #REQUIRED\n" 900bd8f1dc3Sbluhm " refs IDREFS #REQUIRED>\n" 901bd8f1dc3Sbluhm "]>\n" 902bd8f1dc3Sbluhm "<doc attr=' a b c\t\td\te\t' refs=' id-1 \t id-2\t\t' \n" 903bd8f1dc3Sbluhm " ents=' ent-1 \t\r\n" 904bd8f1dc3Sbluhm " ent-2 ' >\n" 905bd8f1dc3Sbluhm " <e id='id-1'/>\n" 906bd8f1dc3Sbluhm " <e id='id-2'/>\n" 907bd8f1dc3Sbluhm "</doc>"; 908bd8f1dc3Sbluhm 909bd8f1dc3Sbluhm XML_SetStartElementHandler(g_parser, 910bd8f1dc3Sbluhm check_attr_contains_normalized_whitespace); 911bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 912bd8f1dc3Sbluhm == XML_STATUS_ERROR) 913bd8f1dc3Sbluhm xml_failure(g_parser); 914bd8f1dc3Sbluhm } 915bd8f1dc3Sbluhm END_TEST 916bd8f1dc3Sbluhm 917bd8f1dc3Sbluhm /* 918bd8f1dc3Sbluhm * XML declaration tests. 919bd8f1dc3Sbluhm */ 920bd8f1dc3Sbluhm 921bd8f1dc3Sbluhm START_TEST(test_xmldecl_misplaced) { 922bd8f1dc3Sbluhm expect_failure("\n" 923bd8f1dc3Sbluhm "<?xml version='1.0'?>\n" 924bd8f1dc3Sbluhm "<a/>", 925bd8f1dc3Sbluhm XML_ERROR_MISPLACED_XML_PI, 926bd8f1dc3Sbluhm "failed to report misplaced XML declaration"); 927bd8f1dc3Sbluhm } 928bd8f1dc3Sbluhm END_TEST 929bd8f1dc3Sbluhm 930bd8f1dc3Sbluhm START_TEST(test_xmldecl_invalid) { 931bd8f1dc3Sbluhm expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL, 932bd8f1dc3Sbluhm "Failed to report invalid XML declaration"); 933bd8f1dc3Sbluhm } 934bd8f1dc3Sbluhm END_TEST 935bd8f1dc3Sbluhm 936bd8f1dc3Sbluhm START_TEST(test_xmldecl_missing_attr) { 937bd8f1dc3Sbluhm expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL, 938bd8f1dc3Sbluhm "Failed to report missing XML declaration attribute"); 939bd8f1dc3Sbluhm } 940bd8f1dc3Sbluhm END_TEST 941bd8f1dc3Sbluhm 942bd8f1dc3Sbluhm START_TEST(test_xmldecl_missing_value) { 943bd8f1dc3Sbluhm expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n" 944bd8f1dc3Sbluhm "<doc/>", 945bd8f1dc3Sbluhm XML_ERROR_XML_DECL, 946bd8f1dc3Sbluhm "Failed to report missing attribute value"); 947bd8f1dc3Sbluhm } 948bd8f1dc3Sbluhm END_TEST 949bd8f1dc3Sbluhm 950bd8f1dc3Sbluhm /* Regression test for SF bug #584832. */ 951bd8f1dc3Sbluhm START_TEST(test_unknown_encoding_internal_entity) { 952bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n" 953bd8f1dc3Sbluhm "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n" 954bd8f1dc3Sbluhm "<test a='&foo;'/>"; 955bd8f1dc3Sbluhm 956bd8f1dc3Sbluhm XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL); 957bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 958bd8f1dc3Sbluhm == XML_STATUS_ERROR) 959bd8f1dc3Sbluhm xml_failure(g_parser); 960bd8f1dc3Sbluhm } 961bd8f1dc3Sbluhm END_TEST 962bd8f1dc3Sbluhm 963bd8f1dc3Sbluhm /* Test unrecognised encoding handler */ 964bd8f1dc3Sbluhm START_TEST(test_unrecognised_encoding_internal_entity) { 965bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n" 966bd8f1dc3Sbluhm "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n" 967bd8f1dc3Sbluhm "<test a='&foo;'/>"; 968bd8f1dc3Sbluhm 969bd8f1dc3Sbluhm XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL); 970bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 971bd8f1dc3Sbluhm != XML_STATUS_ERROR) 972bd8f1dc3Sbluhm fail("Unrecognised encoding not rejected"); 973bd8f1dc3Sbluhm } 974bd8f1dc3Sbluhm END_TEST 975bd8f1dc3Sbluhm 976bd8f1dc3Sbluhm /* Regression test for SF bug #620106. */ 977bd8f1dc3Sbluhm START_TEST(test_ext_entity_set_encoding) { 978bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 979bd8f1dc3Sbluhm " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 980bd8f1dc3Sbluhm "]>\n" 981bd8f1dc3Sbluhm "<doc>&en;</doc>"; 982bd8f1dc3Sbluhm ExtTest test_data 983bd8f1dc3Sbluhm = {/* This text says it's an unsupported encoding, but it's really 984bd8f1dc3Sbluhm UTF-8, which we tell Expat using XML_SetEncoding(). 985bd8f1dc3Sbluhm */ 986bd8f1dc3Sbluhm "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL}; 987bd8f1dc3Sbluhm #ifdef XML_UNICODE 988bd8f1dc3Sbluhm const XML_Char *expected = XCS("\x00e9"); 989bd8f1dc3Sbluhm #else 990bd8f1dc3Sbluhm const XML_Char *expected = XCS("\xc3\xa9"); 991bd8f1dc3Sbluhm #endif 992bd8f1dc3Sbluhm 993bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 994bd8f1dc3Sbluhm run_ext_character_check(text, &test_data, expected); 995bd8f1dc3Sbluhm } 996bd8f1dc3Sbluhm END_TEST 997bd8f1dc3Sbluhm 998bd8f1dc3Sbluhm /* Test external entities with no handler */ 999bd8f1dc3Sbluhm START_TEST(test_ext_entity_no_handler) { 1000bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 1001bd8f1dc3Sbluhm " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1002bd8f1dc3Sbluhm "]>\n" 1003bd8f1dc3Sbluhm "<doc>&en;</doc>"; 1004bd8f1dc3Sbluhm 1005bd8f1dc3Sbluhm XML_SetDefaultHandler(g_parser, dummy_default_handler); 1006bd8f1dc3Sbluhm run_character_check(text, XCS("")); 1007bd8f1dc3Sbluhm } 1008bd8f1dc3Sbluhm END_TEST 1009bd8f1dc3Sbluhm 1010bd8f1dc3Sbluhm /* Test UTF-8 BOM is accepted */ 1011bd8f1dc3Sbluhm START_TEST(test_ext_entity_set_bom) { 1012bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 1013bd8f1dc3Sbluhm " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1014bd8f1dc3Sbluhm "]>\n" 1015bd8f1dc3Sbluhm "<doc>&en;</doc>"; 1016bd8f1dc3Sbluhm ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */ 1017bd8f1dc3Sbluhm "<?xml encoding='iso-8859-3'?>" 1018bd8f1dc3Sbluhm "\xC3\xA9", 1019bd8f1dc3Sbluhm XCS("utf-8"), NULL}; 1020bd8f1dc3Sbluhm #ifdef XML_UNICODE 1021bd8f1dc3Sbluhm const XML_Char *expected = XCS("\x00e9"); 1022bd8f1dc3Sbluhm #else 1023bd8f1dc3Sbluhm const XML_Char *expected = XCS("\xc3\xa9"); 1024bd8f1dc3Sbluhm #endif 1025bd8f1dc3Sbluhm 1026bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1027bd8f1dc3Sbluhm run_ext_character_check(text, &test_data, expected); 1028bd8f1dc3Sbluhm } 1029bd8f1dc3Sbluhm END_TEST 1030bd8f1dc3Sbluhm 1031bd8f1dc3Sbluhm /* Test that bad encodings are faulted */ 1032bd8f1dc3Sbluhm START_TEST(test_ext_entity_bad_encoding) { 1033bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 1034bd8f1dc3Sbluhm " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1035bd8f1dc3Sbluhm "]>\n" 1036bd8f1dc3Sbluhm "<doc>&en;</doc>"; 1037bd8f1dc3Sbluhm ExtFaults fault 1038bd8f1dc3Sbluhm = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted", 1039bd8f1dc3Sbluhm XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING}; 1040bd8f1dc3Sbluhm 1041bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 1042bd8f1dc3Sbluhm XML_SetUserData(g_parser, &fault); 1043bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 1044bd8f1dc3Sbluhm "Bad encoding should not have been accepted"); 1045bd8f1dc3Sbluhm } 1046bd8f1dc3Sbluhm END_TEST 1047bd8f1dc3Sbluhm 1048bd8f1dc3Sbluhm /* Try handing an invalid encoding to an external entity parser */ 1049bd8f1dc3Sbluhm START_TEST(test_ext_entity_bad_encoding_2) { 1050bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 1051bd8f1dc3Sbluhm "<!DOCTYPE doc SYSTEM 'foo'>\n" 1052bd8f1dc3Sbluhm "<doc>&entity;</doc>"; 1053bd8f1dc3Sbluhm ExtFaults fault 1054bd8f1dc3Sbluhm = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted", 1055bd8f1dc3Sbluhm XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING}; 1056bd8f1dc3Sbluhm 1057bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1058bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 1059bd8f1dc3Sbluhm XML_SetUserData(g_parser, &fault); 1060bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 1061bd8f1dc3Sbluhm "Bad encoding not faulted in external entity handler"); 1062bd8f1dc3Sbluhm } 1063bd8f1dc3Sbluhm END_TEST 1064bd8f1dc3Sbluhm 1065bd8f1dc3Sbluhm /* Test that no error is reported for unknown entities if we don't 1066bd8f1dc3Sbluhm read an external subset. This was fixed in Expat 1.95.5. 1067bd8f1dc3Sbluhm */ 1068bd8f1dc3Sbluhm START_TEST(test_wfc_undeclared_entity_unread_external_subset) { 1069bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 1070bd8f1dc3Sbluhm "<doc>&entity;</doc>"; 1071bd8f1dc3Sbluhm 1072bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1073bd8f1dc3Sbluhm == XML_STATUS_ERROR) 1074bd8f1dc3Sbluhm xml_failure(g_parser); 1075bd8f1dc3Sbluhm } 1076bd8f1dc3Sbluhm END_TEST 1077bd8f1dc3Sbluhm 1078bd8f1dc3Sbluhm /* Test that an error is reported for unknown entities if we don't 1079bd8f1dc3Sbluhm have an external subset. 1080bd8f1dc3Sbluhm */ 1081bd8f1dc3Sbluhm START_TEST(test_wfc_undeclared_entity_no_external_subset) { 1082bd8f1dc3Sbluhm expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY, 1083bd8f1dc3Sbluhm "Parser did not report undefined entity w/out a DTD."); 1084bd8f1dc3Sbluhm } 1085bd8f1dc3Sbluhm END_TEST 1086bd8f1dc3Sbluhm 1087bd8f1dc3Sbluhm /* Test that an error is reported for unknown entities if we don't 1088bd8f1dc3Sbluhm read an external subset, but have been declared standalone. 1089bd8f1dc3Sbluhm */ 1090bd8f1dc3Sbluhm START_TEST(test_wfc_undeclared_entity_standalone) { 1091bd8f1dc3Sbluhm const char *text 1092bd8f1dc3Sbluhm = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n" 1093bd8f1dc3Sbluhm "<!DOCTYPE doc SYSTEM 'foo'>\n" 1094bd8f1dc3Sbluhm "<doc>&entity;</doc>"; 1095bd8f1dc3Sbluhm 1096bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, 1097bd8f1dc3Sbluhm "Parser did not report undefined entity (standalone)."); 1098bd8f1dc3Sbluhm } 1099bd8f1dc3Sbluhm END_TEST 1100bd8f1dc3Sbluhm 1101bd8f1dc3Sbluhm /* Test that an error is reported for unknown entities if we have read 1102bd8f1dc3Sbluhm an external subset, and standalone is true. 1103bd8f1dc3Sbluhm */ 1104bd8f1dc3Sbluhm START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) { 1105bd8f1dc3Sbluhm const char *text 1106bd8f1dc3Sbluhm = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n" 1107bd8f1dc3Sbluhm "<!DOCTYPE doc SYSTEM 'foo'>\n" 1108bd8f1dc3Sbluhm "<doc>&entity;</doc>"; 1109bd8f1dc3Sbluhm ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 1110bd8f1dc3Sbluhm 1111bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1112bd8f1dc3Sbluhm XML_SetUserData(g_parser, &test_data); 1113bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1114bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, 1115bd8f1dc3Sbluhm "Parser did not report undefined entity (external DTD)."); 1116bd8f1dc3Sbluhm } 1117bd8f1dc3Sbluhm END_TEST 1118bd8f1dc3Sbluhm 1119bd8f1dc3Sbluhm /* Test that external entity handling is not done if the parsing flag 1120bd8f1dc3Sbluhm * is set to UNLESS_STANDALONE 1121bd8f1dc3Sbluhm */ 1122bd8f1dc3Sbluhm START_TEST(test_entity_with_external_subset_unless_standalone) { 1123bd8f1dc3Sbluhm const char *text 1124bd8f1dc3Sbluhm = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n" 1125bd8f1dc3Sbluhm "<!DOCTYPE doc SYSTEM 'foo'>\n" 1126bd8f1dc3Sbluhm "<doc>&entity;</doc>"; 1127bd8f1dc3Sbluhm ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL}; 1128bd8f1dc3Sbluhm 1129bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, 1130bd8f1dc3Sbluhm XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE); 1131bd8f1dc3Sbluhm XML_SetUserData(g_parser, &test_data); 1132bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1133bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, 1134bd8f1dc3Sbluhm "Parser did not report undefined entity"); 1135bd8f1dc3Sbluhm } 1136bd8f1dc3Sbluhm END_TEST 1137bd8f1dc3Sbluhm 1138bd8f1dc3Sbluhm /* Test that no error is reported for unknown entities if we have read 1139bd8f1dc3Sbluhm an external subset, and standalone is false. 1140bd8f1dc3Sbluhm */ 1141bd8f1dc3Sbluhm START_TEST(test_wfc_undeclared_entity_with_external_subset) { 1142bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 1143bd8f1dc3Sbluhm "<!DOCTYPE doc SYSTEM 'foo'>\n" 1144bd8f1dc3Sbluhm "<doc>&entity;</doc>"; 1145bd8f1dc3Sbluhm ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 1146bd8f1dc3Sbluhm 1147bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1148bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1149bd8f1dc3Sbluhm run_ext_character_check(text, &test_data, XCS("")); 1150bd8f1dc3Sbluhm } 1151bd8f1dc3Sbluhm END_TEST 1152bd8f1dc3Sbluhm 1153bd8f1dc3Sbluhm /* Test that an error is reported if our NotStandalone handler fails */ 1154bd8f1dc3Sbluhm START_TEST(test_not_standalone_handler_reject) { 1155bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 1156bd8f1dc3Sbluhm "<!DOCTYPE doc SYSTEM 'foo'>\n" 1157bd8f1dc3Sbluhm "<doc>&entity;</doc>"; 1158bd8f1dc3Sbluhm ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 1159bd8f1dc3Sbluhm 1160bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1161bd8f1dc3Sbluhm XML_SetUserData(g_parser, &test_data); 1162bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1163bd8f1dc3Sbluhm XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler); 1164bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_NOT_STANDALONE, 1165bd8f1dc3Sbluhm "NotStandalone handler failed to reject"); 1166bd8f1dc3Sbluhm 1167bd8f1dc3Sbluhm /* Try again but without external entity handling */ 1168bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 1169bd8f1dc3Sbluhm XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler); 1170bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_NOT_STANDALONE, 1171bd8f1dc3Sbluhm "NotStandalone handler failed to reject"); 1172bd8f1dc3Sbluhm } 1173bd8f1dc3Sbluhm END_TEST 1174bd8f1dc3Sbluhm 1175bd8f1dc3Sbluhm /* Test that no error is reported if our NotStandalone handler succeeds */ 1176bd8f1dc3Sbluhm START_TEST(test_not_standalone_handler_accept) { 1177bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 1178bd8f1dc3Sbluhm "<!DOCTYPE doc SYSTEM 'foo'>\n" 1179bd8f1dc3Sbluhm "<doc>&entity;</doc>"; 1180bd8f1dc3Sbluhm ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 1181bd8f1dc3Sbluhm 1182bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1183bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1184bd8f1dc3Sbluhm XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler); 1185bd8f1dc3Sbluhm run_ext_character_check(text, &test_data, XCS("")); 1186bd8f1dc3Sbluhm 1187bd8f1dc3Sbluhm /* Repeat without the external entity handler */ 1188bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 1189bd8f1dc3Sbluhm XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler); 1190bd8f1dc3Sbluhm run_character_check(text, XCS("")); 1191bd8f1dc3Sbluhm } 1192bd8f1dc3Sbluhm END_TEST 1193bd8f1dc3Sbluhm 1194bd8f1dc3Sbluhm START_TEST(test_wfc_no_recursive_entity_refs) { 1195bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 1196bd8f1dc3Sbluhm " <!ENTITY entity '&entity;'>\n" 1197bd8f1dc3Sbluhm "]>\n" 1198bd8f1dc3Sbluhm "<doc>&entity;</doc>"; 1199bd8f1dc3Sbluhm 1200bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF, 1201bd8f1dc3Sbluhm "Parser did not report recursive entity reference."); 1202bd8f1dc3Sbluhm } 1203bd8f1dc3Sbluhm END_TEST 1204bd8f1dc3Sbluhm 1205c033f770Sbluhm START_TEST(test_recursive_external_parameter_entity_2) { 1206c033f770Sbluhm struct TestCase { 1207c033f770Sbluhm const char *doc; 1208c033f770Sbluhm enum XML_Status expectedStatus; 1209c033f770Sbluhm }; 1210c033f770Sbluhm 1211c033f770Sbluhm struct TestCase cases[] = { 1212c033f770Sbluhm {"<!ENTITY % p1 '%p1;'>", XML_STATUS_ERROR}, 1213c033f770Sbluhm {"<!ENTITY % p1 '%p1;'>" 1214c033f770Sbluhm "<!ENTITY % p1 'first declaration wins'>", 1215c033f770Sbluhm XML_STATUS_ERROR}, 1216c033f770Sbluhm {"<!ENTITY % p1 'first declaration wins'>" 1217c033f770Sbluhm "<!ENTITY % p1 '%p1;'>", 1218c033f770Sbluhm XML_STATUS_OK}, 1219c033f770Sbluhm {"<!ENTITY % p1 '%p1;'>", XML_STATUS_OK}, 1220c033f770Sbluhm }; 1221c033f770Sbluhm 1222c033f770Sbluhm for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { 1223c033f770Sbluhm const char *const doc = cases[i].doc; 1224c033f770Sbluhm const enum XML_Status expectedStatus = cases[i].expectedStatus; 1225c033f770Sbluhm set_subtest("%s", doc); 1226c033f770Sbluhm 1227c033f770Sbluhm XML_Parser parser = XML_ParserCreate(NULL); 1228c033f770Sbluhm assert_true(parser != NULL); 1229c033f770Sbluhm 1230c033f770Sbluhm XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL); 1231c033f770Sbluhm assert_true(ext_parser != NULL); 1232c033f770Sbluhm 1233c033f770Sbluhm const enum XML_Status actualStatus 1234c033f770Sbluhm = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE); 1235c033f770Sbluhm 1236c033f770Sbluhm assert_true(actualStatus == expectedStatus); 1237c033f770Sbluhm if (actualStatus != XML_STATUS_OK) { 1238c033f770Sbluhm assert_true(XML_GetErrorCode(ext_parser) 1239c033f770Sbluhm == XML_ERROR_RECURSIVE_ENTITY_REF); 1240c033f770Sbluhm } 1241c033f770Sbluhm 1242c033f770Sbluhm XML_ParserFree(ext_parser); 1243c033f770Sbluhm XML_ParserFree(parser); 1244c033f770Sbluhm } 1245c033f770Sbluhm } 1246c033f770Sbluhm END_TEST 1247c033f770Sbluhm 1248bd8f1dc3Sbluhm /* Test incomplete external entities are faulted */ 1249bd8f1dc3Sbluhm START_TEST(test_ext_entity_invalid_parse) { 1250bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 1251bd8f1dc3Sbluhm " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1252bd8f1dc3Sbluhm "]>\n" 1253bd8f1dc3Sbluhm "<doc>&en;</doc>"; 1254bd8f1dc3Sbluhm const ExtFaults faults[] 1255bd8f1dc3Sbluhm = {{"<", "Incomplete element declaration not faulted", NULL, 1256bd8f1dc3Sbluhm XML_ERROR_UNCLOSED_TOKEN}, 1257bd8f1dc3Sbluhm {"<\xe2\x82", /* First two bytes of a three-byte char */ 1258bd8f1dc3Sbluhm "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR}, 1259bd8f1dc3Sbluhm {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL, 1260bd8f1dc3Sbluhm XML_ERROR_PARTIAL_CHAR}, 1261bd8f1dc3Sbluhm {NULL, NULL, NULL, XML_ERROR_NONE}}; 1262bd8f1dc3Sbluhm const ExtFaults *fault = faults; 1263bd8f1dc3Sbluhm 1264bd8f1dc3Sbluhm for (; fault->parse_text != NULL; fault++) { 1265bd8f1dc3Sbluhm set_subtest("\"%s\"", fault->parse_text); 1266bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1267bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 1268bd8f1dc3Sbluhm XML_SetUserData(g_parser, (void *)fault); 1269bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 1270bd8f1dc3Sbluhm "Parser did not report external entity error"); 1271bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 1272bd8f1dc3Sbluhm } 1273bd8f1dc3Sbluhm } 1274bd8f1dc3Sbluhm END_TEST 1275bd8f1dc3Sbluhm 1276bd8f1dc3Sbluhm /* Regression test for SF bug #483514. */ 1277bd8f1dc3Sbluhm START_TEST(test_dtd_default_handling) { 1278bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 1279bd8f1dc3Sbluhm "<!ENTITY e SYSTEM 'http://example.org/e'>\n" 1280bd8f1dc3Sbluhm "<!NOTATION n SYSTEM 'http://example.org/n'>\n" 1281bd8f1dc3Sbluhm "<!ELEMENT doc EMPTY>\n" 1282bd8f1dc3Sbluhm "<!ATTLIST doc a CDATA #IMPLIED>\n" 1283bd8f1dc3Sbluhm "<?pi in dtd?>\n" 1284bd8f1dc3Sbluhm "<!--comment in dtd-->\n" 1285bd8f1dc3Sbluhm "]><doc/>"; 1286bd8f1dc3Sbluhm 1287bd8f1dc3Sbluhm XML_SetDefaultHandler(g_parser, accumulate_characters); 1288bd8f1dc3Sbluhm XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); 1289bd8f1dc3Sbluhm XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); 1290bd8f1dc3Sbluhm XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler); 1291bd8f1dc3Sbluhm XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler); 1292bd8f1dc3Sbluhm XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 1293bd8f1dc3Sbluhm XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler); 1294bd8f1dc3Sbluhm XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler); 1295bd8f1dc3Sbluhm XML_SetCommentHandler(g_parser, dummy_comment_handler); 1296bd8f1dc3Sbluhm XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler); 1297bd8f1dc3Sbluhm XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler); 1298bd8f1dc3Sbluhm run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>")); 1299bd8f1dc3Sbluhm } 1300bd8f1dc3Sbluhm END_TEST 1301bd8f1dc3Sbluhm 1302bd8f1dc3Sbluhm /* Test handling of attribute declarations */ 1303bd8f1dc3Sbluhm START_TEST(test_dtd_attr_handling) { 1304bd8f1dc3Sbluhm const char *prolog = "<!DOCTYPE doc [\n" 1305bd8f1dc3Sbluhm "<!ELEMENT doc EMPTY>\n"; 1306bd8f1dc3Sbluhm AttTest attr_data[] 1307bd8f1dc3Sbluhm = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n" 1308bd8f1dc3Sbluhm "]>" 1309bd8f1dc3Sbluhm "<doc a='two'/>", 1310bd8f1dc3Sbluhm XCS("doc"), XCS("a"), 1311bd8f1dc3Sbluhm XCS("(one|two|three)"), /* Extraneous spaces will be removed */ 1312bd8f1dc3Sbluhm NULL, XML_TRUE}, 1313bd8f1dc3Sbluhm {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n" 1314bd8f1dc3Sbluhm "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n" 1315bd8f1dc3Sbluhm "]>" 1316bd8f1dc3Sbluhm "<doc/>", 1317bd8f1dc3Sbluhm XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE}, 1318bd8f1dc3Sbluhm {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n" 1319bd8f1dc3Sbluhm "]>" 1320bd8f1dc3Sbluhm "<doc/>", 1321bd8f1dc3Sbluhm XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE}, 1322bd8f1dc3Sbluhm {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n" 1323bd8f1dc3Sbluhm "]>" 1324bd8f1dc3Sbluhm "<doc/>", 1325bd8f1dc3Sbluhm XCS("doc"), XCS("a"), XCS("CDATA"), 1326bd8f1dc3Sbluhm #ifdef XML_UNICODE 1327bd8f1dc3Sbluhm XCS("\x06f2"), 1328bd8f1dc3Sbluhm #else 1329bd8f1dc3Sbluhm XCS("\xdb\xb2"), 1330bd8f1dc3Sbluhm #endif 1331bd8f1dc3Sbluhm XML_FALSE}, 1332bd8f1dc3Sbluhm {NULL, NULL, NULL, NULL, NULL, XML_FALSE}}; 1333bd8f1dc3Sbluhm AttTest *test; 1334bd8f1dc3Sbluhm 1335bd8f1dc3Sbluhm for (test = attr_data; test->definition != NULL; test++) { 1336bd8f1dc3Sbluhm set_subtest("%s", test->definition); 1337bd8f1dc3Sbluhm XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler); 1338bd8f1dc3Sbluhm XML_SetUserData(g_parser, test); 1339bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog), 1340bd8f1dc3Sbluhm XML_FALSE) 1341bd8f1dc3Sbluhm == XML_STATUS_ERROR) 1342bd8f1dc3Sbluhm xml_failure(g_parser); 1343bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition, 1344bd8f1dc3Sbluhm (int)strlen(test->definition), XML_TRUE) 1345bd8f1dc3Sbluhm == XML_STATUS_ERROR) 1346bd8f1dc3Sbluhm xml_failure(g_parser); 1347bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 1348bd8f1dc3Sbluhm } 1349bd8f1dc3Sbluhm } 1350bd8f1dc3Sbluhm END_TEST 1351bd8f1dc3Sbluhm 1352bd8f1dc3Sbluhm /* See related SF bug #673791. 1353bd8f1dc3Sbluhm When namespace processing is enabled, setting the namespace URI for 1354bd8f1dc3Sbluhm a prefix is not allowed; this test ensures that it *is* allowed 1355bd8f1dc3Sbluhm when namespace processing is not enabled. 1356bd8f1dc3Sbluhm (See Namespaces in XML, section 2.) 1357bd8f1dc3Sbluhm */ 1358bd8f1dc3Sbluhm START_TEST(test_empty_ns_without_namespaces) { 1359bd8f1dc3Sbluhm const char *text = "<doc xmlns:prefix='http://example.org/'>\n" 1360bd8f1dc3Sbluhm " <e xmlns:prefix=''/>\n" 1361bd8f1dc3Sbluhm "</doc>"; 1362bd8f1dc3Sbluhm 1363bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1364bd8f1dc3Sbluhm == XML_STATUS_ERROR) 1365bd8f1dc3Sbluhm xml_failure(g_parser); 1366bd8f1dc3Sbluhm } 1367bd8f1dc3Sbluhm END_TEST 1368bd8f1dc3Sbluhm 1369bd8f1dc3Sbluhm /* Regression test for SF bug #824420. 1370bd8f1dc3Sbluhm Checks that an xmlns:prefix attribute set in an attribute's default 1371bd8f1dc3Sbluhm value isn't misinterpreted. 1372bd8f1dc3Sbluhm */ 1373bd8f1dc3Sbluhm START_TEST(test_ns_in_attribute_default_without_namespaces) { 1374bd8f1dc3Sbluhm const char *text = "<!DOCTYPE e:element [\n" 1375bd8f1dc3Sbluhm " <!ATTLIST e:element\n" 1376bd8f1dc3Sbluhm " xmlns:e CDATA 'http://example.org/'>\n" 1377bd8f1dc3Sbluhm " ]>\n" 1378bd8f1dc3Sbluhm "<e:element/>"; 1379bd8f1dc3Sbluhm 1380bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1381bd8f1dc3Sbluhm == XML_STATUS_ERROR) 1382bd8f1dc3Sbluhm xml_failure(g_parser); 1383bd8f1dc3Sbluhm } 1384bd8f1dc3Sbluhm END_TEST 1385bd8f1dc3Sbluhm 1386bd8f1dc3Sbluhm /* Regression test for SF bug #1515266: missing check of stopped 1387bd8f1dc3Sbluhm parser in doContext() 'for' loop. */ 1388bd8f1dc3Sbluhm START_TEST(test_stop_parser_between_char_data_calls) { 1389bd8f1dc3Sbluhm /* The sample data must be big enough that there are two calls to 1390bd8f1dc3Sbluhm the character data handler from within the inner "for" loop of 1391bd8f1dc3Sbluhm the XML_TOK_DATA_CHARS case in doContent(), and the character 1392bd8f1dc3Sbluhm handler must stop the parser and clear the character data 1393bd8f1dc3Sbluhm handler. 1394bd8f1dc3Sbluhm */ 1395bd8f1dc3Sbluhm const char *text = long_character_data_text; 1396bd8f1dc3Sbluhm 1397bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 1398bd8f1dc3Sbluhm g_resumable = XML_FALSE; 1399bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1400bd8f1dc3Sbluhm != XML_STATUS_ERROR) 1401bd8f1dc3Sbluhm xml_failure(g_parser); 1402bd8f1dc3Sbluhm if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED) 1403bd8f1dc3Sbluhm xml_failure(g_parser); 1404bd8f1dc3Sbluhm } 1405bd8f1dc3Sbluhm END_TEST 1406bd8f1dc3Sbluhm 1407bd8f1dc3Sbluhm /* Regression test for SF bug #1515266: missing check of stopped 1408bd8f1dc3Sbluhm parser in doContext() 'for' loop. */ 1409bd8f1dc3Sbluhm START_TEST(test_suspend_parser_between_char_data_calls) { 1410bd8f1dc3Sbluhm /* The sample data must be big enough that there are two calls to 1411bd8f1dc3Sbluhm the character data handler from within the inner "for" loop of 1412bd8f1dc3Sbluhm the XML_TOK_DATA_CHARS case in doContent(), and the character 1413bd8f1dc3Sbluhm handler must stop the parser and clear the character data 1414bd8f1dc3Sbluhm handler. 1415bd8f1dc3Sbluhm */ 1416bd8f1dc3Sbluhm const char *text = long_character_data_text; 1417bd8f1dc3Sbluhm 1418bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 1419bd8f1dc3Sbluhm g_resumable = XML_TRUE; 1420bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1421bd8f1dc3Sbluhm != XML_STATUS_SUSPENDED) 1422bd8f1dc3Sbluhm xml_failure(g_parser); 1423bd8f1dc3Sbluhm if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE) 1424bd8f1dc3Sbluhm xml_failure(g_parser); 1425bd8f1dc3Sbluhm /* Try parsing directly */ 1426bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1427bd8f1dc3Sbluhm != XML_STATUS_ERROR) 1428bd8f1dc3Sbluhm fail("Attempt to continue parse while suspended not faulted"); 1429bd8f1dc3Sbluhm if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED) 1430bd8f1dc3Sbluhm fail("Suspended parse not faulted with correct error"); 1431bd8f1dc3Sbluhm } 1432bd8f1dc3Sbluhm END_TEST 1433bd8f1dc3Sbluhm 1434bd8f1dc3Sbluhm /* Test repeated calls to XML_StopParser are handled correctly */ 1435bd8f1dc3Sbluhm START_TEST(test_repeated_stop_parser_between_char_data_calls) { 1436bd8f1dc3Sbluhm const char *text = long_character_data_text; 1437bd8f1dc3Sbluhm 1438bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler); 1439bd8f1dc3Sbluhm g_resumable = XML_FALSE; 1440bd8f1dc3Sbluhm g_abortable = XML_FALSE; 1441bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1442bd8f1dc3Sbluhm != XML_STATUS_ERROR) 1443bd8f1dc3Sbluhm fail("Failed to double-stop parser"); 1444bd8f1dc3Sbluhm 1445bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 1446bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler); 1447bd8f1dc3Sbluhm g_resumable = XML_TRUE; 1448bd8f1dc3Sbluhm g_abortable = XML_FALSE; 1449bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1450bd8f1dc3Sbluhm != XML_STATUS_SUSPENDED) 1451bd8f1dc3Sbluhm fail("Failed to double-suspend parser"); 1452bd8f1dc3Sbluhm 1453bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 1454bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler); 1455bd8f1dc3Sbluhm g_resumable = XML_TRUE; 1456bd8f1dc3Sbluhm g_abortable = XML_TRUE; 1457bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1458bd8f1dc3Sbluhm != XML_STATUS_ERROR) 1459bd8f1dc3Sbluhm fail("Failed to suspend-abort parser"); 1460bd8f1dc3Sbluhm } 1461bd8f1dc3Sbluhm END_TEST 1462bd8f1dc3Sbluhm 1463bd8f1dc3Sbluhm START_TEST(test_good_cdata_ascii) { 1464bd8f1dc3Sbluhm const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>"; 1465bd8f1dc3Sbluhm const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>"); 1466bd8f1dc3Sbluhm 1467bd8f1dc3Sbluhm CharData storage; 1468bd8f1dc3Sbluhm CharData_Init(&storage); 1469bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 1470bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1471bd8f1dc3Sbluhm /* Add start and end handlers for coverage */ 1472bd8f1dc3Sbluhm XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler); 1473bd8f1dc3Sbluhm XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler); 1474bd8f1dc3Sbluhm 1475bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1476bd8f1dc3Sbluhm == XML_STATUS_ERROR) 1477bd8f1dc3Sbluhm xml_failure(g_parser); 1478bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 1479bd8f1dc3Sbluhm 1480bd8f1dc3Sbluhm /* Try again, this time with a default handler */ 1481bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 1482bd8f1dc3Sbluhm CharData_Init(&storage); 1483bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 1484bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1485bd8f1dc3Sbluhm XML_SetDefaultHandler(g_parser, dummy_default_handler); 1486bd8f1dc3Sbluhm 1487bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1488bd8f1dc3Sbluhm == XML_STATUS_ERROR) 1489bd8f1dc3Sbluhm xml_failure(g_parser); 1490bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 1491bd8f1dc3Sbluhm } 1492bd8f1dc3Sbluhm END_TEST 1493bd8f1dc3Sbluhm 1494bd8f1dc3Sbluhm START_TEST(test_good_cdata_utf16) { 1495bd8f1dc3Sbluhm /* Test data is: 1496bd8f1dc3Sbluhm * <?xml version='1.0' encoding='utf-16'?> 1497bd8f1dc3Sbluhm * <a><![CDATA[hello]]></a> 1498bd8f1dc3Sbluhm */ 1499bd8f1dc3Sbluhm const char text[] 1500bd8f1dc3Sbluhm = "\0<\0?\0x\0m\0l\0" 1501bd8f1dc3Sbluhm " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1502bd8f1dc3Sbluhm " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1503bd8f1dc3Sbluhm "1\0" 1504bd8f1dc3Sbluhm "6\0'" 1505bd8f1dc3Sbluhm "\0?\0>\0\n" 1506bd8f1dc3Sbluhm "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>"; 1507bd8f1dc3Sbluhm const XML_Char *expected = XCS("hello"); 1508bd8f1dc3Sbluhm 1509bd8f1dc3Sbluhm CharData storage; 1510bd8f1dc3Sbluhm CharData_Init(&storage); 1511bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 1512bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1513bd8f1dc3Sbluhm 1514bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 1515bd8f1dc3Sbluhm == XML_STATUS_ERROR) 1516bd8f1dc3Sbluhm xml_failure(g_parser); 1517bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 1518bd8f1dc3Sbluhm } 1519bd8f1dc3Sbluhm END_TEST 1520bd8f1dc3Sbluhm 1521bd8f1dc3Sbluhm START_TEST(test_good_cdata_utf16_le) { 1522bd8f1dc3Sbluhm /* Test data is: 1523bd8f1dc3Sbluhm * <?xml version='1.0' encoding='utf-16'?> 1524bd8f1dc3Sbluhm * <a><![CDATA[hello]]></a> 1525bd8f1dc3Sbluhm */ 1526bd8f1dc3Sbluhm const char text[] 1527bd8f1dc3Sbluhm = "<\0?\0x\0m\0l\0" 1528bd8f1dc3Sbluhm " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1529bd8f1dc3Sbluhm " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1530bd8f1dc3Sbluhm "1\0" 1531bd8f1dc3Sbluhm "6\0'" 1532bd8f1dc3Sbluhm "\0?\0>\0\n" 1533bd8f1dc3Sbluhm "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0"; 1534bd8f1dc3Sbluhm const XML_Char *expected = XCS("hello"); 1535bd8f1dc3Sbluhm 1536bd8f1dc3Sbluhm CharData storage; 1537bd8f1dc3Sbluhm CharData_Init(&storage); 1538bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 1539bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1540bd8f1dc3Sbluhm 1541bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 1542bd8f1dc3Sbluhm == XML_STATUS_ERROR) 1543bd8f1dc3Sbluhm xml_failure(g_parser); 1544bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 1545bd8f1dc3Sbluhm } 1546bd8f1dc3Sbluhm END_TEST 1547bd8f1dc3Sbluhm 1548bd8f1dc3Sbluhm /* Test UTF16 conversion of a long cdata string */ 1549bd8f1dc3Sbluhm 1550bd8f1dc3Sbluhm /* 16 characters: handy macro to reduce visual clutter */ 1551bd8f1dc3Sbluhm #define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P" 1552bd8f1dc3Sbluhm 1553bd8f1dc3Sbluhm START_TEST(test_long_cdata_utf16) { 1554bd8f1dc3Sbluhm /* Test data is: 1555bd8f1dc3Sbluhm * <?xlm version='1.0' encoding='utf-16'?> 1556bd8f1dc3Sbluhm * <a><![CDATA[ 1557bd8f1dc3Sbluhm * ABCDEFGHIJKLMNOP 1558bd8f1dc3Sbluhm * ]]></a> 1559bd8f1dc3Sbluhm */ 1560bd8f1dc3Sbluhm const char text[] 1561bd8f1dc3Sbluhm = "\0<\0?\0x\0m\0l\0 " 1562bd8f1dc3Sbluhm "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 " 1563bd8f1dc3Sbluhm "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>" 1564bd8f1dc3Sbluhm "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" 1565bd8f1dc3Sbluhm /* 64 characters per line */ 1566bd8f1dc3Sbluhm /* clang-format off */ 1567bd8f1dc3Sbluhm A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1568bd8f1dc3Sbluhm A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1569bd8f1dc3Sbluhm A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1570bd8f1dc3Sbluhm A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1571bd8f1dc3Sbluhm A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1572bd8f1dc3Sbluhm A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1573bd8f1dc3Sbluhm A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1574bd8f1dc3Sbluhm A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1575bd8f1dc3Sbluhm A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1576bd8f1dc3Sbluhm A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1577bd8f1dc3Sbluhm A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1578bd8f1dc3Sbluhm A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1579bd8f1dc3Sbluhm A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1580bd8f1dc3Sbluhm A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1581bd8f1dc3Sbluhm A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1582bd8f1dc3Sbluhm A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1583bd8f1dc3Sbluhm A_TO_P_IN_UTF16 1584bd8f1dc3Sbluhm /* clang-format on */ 1585bd8f1dc3Sbluhm "\0]\0]\0>\0<\0/\0a\0>"; 1586bd8f1dc3Sbluhm const XML_Char *expected = 1587bd8f1dc3Sbluhm /* clang-format off */ 1588bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1589bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1590bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1591bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1592bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1593bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1594bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1595bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1596bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1597bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1598bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1599bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1600bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1601bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1602bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1603bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1604bd8f1dc3Sbluhm XCS("ABCDEFGHIJKLMNOP"); 1605bd8f1dc3Sbluhm /* clang-format on */ 1606bd8f1dc3Sbluhm CharData storage; 1607bd8f1dc3Sbluhm void *buffer; 1608bd8f1dc3Sbluhm 1609bd8f1dc3Sbluhm CharData_Init(&storage); 1610bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 1611bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1612bd8f1dc3Sbluhm buffer = XML_GetBuffer(g_parser, sizeof(text) - 1); 1613bd8f1dc3Sbluhm if (buffer == NULL) 1614bd8f1dc3Sbluhm fail("Could not allocate parse buffer"); 1615bd8f1dc3Sbluhm assert(buffer != NULL); 1616bd8f1dc3Sbluhm memcpy(buffer, text, sizeof(text) - 1); 1617bd8f1dc3Sbluhm if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR) 1618bd8f1dc3Sbluhm xml_failure(g_parser); 1619bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 1620bd8f1dc3Sbluhm } 1621bd8f1dc3Sbluhm END_TEST 1622bd8f1dc3Sbluhm 1623bd8f1dc3Sbluhm /* Test handling of multiple unit UTF-16 characters */ 1624bd8f1dc3Sbluhm START_TEST(test_multichar_cdata_utf16) { 1625bd8f1dc3Sbluhm /* Test data is: 1626bd8f1dc3Sbluhm * <?xml version='1.0' encoding='utf-16'?> 1627bd8f1dc3Sbluhm * <a><![CDATA[{MINIM}{CROTCHET}]]></a> 1628bd8f1dc3Sbluhm * 1629bd8f1dc3Sbluhm * where {MINIM} is U+1d15e (a minim or half-note) 1630bd8f1dc3Sbluhm * UTF-16: 0xd834 0xdd5e 1631bd8f1dc3Sbluhm * UTF-8: 0xf0 0x9d 0x85 0x9e 1632bd8f1dc3Sbluhm * and {CROTCHET} is U+1d15f (a crotchet or quarter-note) 1633bd8f1dc3Sbluhm * UTF-16: 0xd834 0xdd5f 1634bd8f1dc3Sbluhm * UTF-8: 0xf0 0x9d 0x85 0x9f 1635bd8f1dc3Sbluhm */ 1636bd8f1dc3Sbluhm const char text[] = "\0<\0?\0x\0m\0l\0" 1637bd8f1dc3Sbluhm " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1638bd8f1dc3Sbluhm " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1639bd8f1dc3Sbluhm "1\0" 1640bd8f1dc3Sbluhm "6\0'" 1641bd8f1dc3Sbluhm "\0?\0>\0\n" 1642bd8f1dc3Sbluhm "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" 1643bd8f1dc3Sbluhm "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f" 1644bd8f1dc3Sbluhm "\0]\0]\0>\0<\0/\0a\0>"; 1645bd8f1dc3Sbluhm #ifdef XML_UNICODE 1646bd8f1dc3Sbluhm const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f"); 1647bd8f1dc3Sbluhm #else 1648bd8f1dc3Sbluhm const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f"); 1649bd8f1dc3Sbluhm #endif 1650bd8f1dc3Sbluhm CharData storage; 1651bd8f1dc3Sbluhm 1652bd8f1dc3Sbluhm CharData_Init(&storage); 1653bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 1654bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1655bd8f1dc3Sbluhm 1656bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 1657bd8f1dc3Sbluhm == XML_STATUS_ERROR) 1658bd8f1dc3Sbluhm xml_failure(g_parser); 1659bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 1660bd8f1dc3Sbluhm } 1661bd8f1dc3Sbluhm END_TEST 1662bd8f1dc3Sbluhm 1663bd8f1dc3Sbluhm /* Test that an element name with a UTF-16 surrogate pair is rejected */ 1664bd8f1dc3Sbluhm START_TEST(test_utf16_bad_surrogate_pair) { 1665bd8f1dc3Sbluhm /* Test data is: 1666bd8f1dc3Sbluhm * <?xml version='1.0' encoding='utf-16'?> 1667bd8f1dc3Sbluhm * <a><![CDATA[{BADLINB}]]></a> 1668bd8f1dc3Sbluhm * 1669bd8f1dc3Sbluhm * where {BADLINB} is U+10000 (the first Linear B character) 1670bd8f1dc3Sbluhm * with the UTF-16 surrogate pair in the wrong order, i.e. 1671bd8f1dc3Sbluhm * 0xdc00 0xd800 1672bd8f1dc3Sbluhm */ 1673bd8f1dc3Sbluhm const char text[] = "\0<\0?\0x\0m\0l\0" 1674bd8f1dc3Sbluhm " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1675bd8f1dc3Sbluhm " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1676bd8f1dc3Sbluhm "1\0" 1677bd8f1dc3Sbluhm "6\0'" 1678bd8f1dc3Sbluhm "\0?\0>\0\n" 1679bd8f1dc3Sbluhm "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" 1680bd8f1dc3Sbluhm "\xdc\x00\xd8\x00" 1681bd8f1dc3Sbluhm "\0]\0]\0>\0<\0/\0a\0>"; 1682bd8f1dc3Sbluhm 1683bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 1684bd8f1dc3Sbluhm != XML_STATUS_ERROR) 1685bd8f1dc3Sbluhm fail("Reversed UTF-16 surrogate pair not faulted"); 1686bd8f1dc3Sbluhm if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 1687bd8f1dc3Sbluhm xml_failure(g_parser); 1688bd8f1dc3Sbluhm } 1689bd8f1dc3Sbluhm END_TEST 1690bd8f1dc3Sbluhm 1691bd8f1dc3Sbluhm START_TEST(test_bad_cdata) { 1692bd8f1dc3Sbluhm struct CaseData { 1693bd8f1dc3Sbluhm const char *text; 1694bd8f1dc3Sbluhm enum XML_Error expectedError; 1695bd8f1dc3Sbluhm }; 1696bd8f1dc3Sbluhm 1697bd8f1dc3Sbluhm struct CaseData cases[] 1698bd8f1dc3Sbluhm = {{"<a><", XML_ERROR_UNCLOSED_TOKEN}, 1699bd8f1dc3Sbluhm {"<a><!", XML_ERROR_UNCLOSED_TOKEN}, 1700bd8f1dc3Sbluhm {"<a><![", XML_ERROR_UNCLOSED_TOKEN}, 1701bd8f1dc3Sbluhm {"<a><![C", XML_ERROR_UNCLOSED_TOKEN}, 1702bd8f1dc3Sbluhm {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN}, 1703bd8f1dc3Sbluhm {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN}, 1704bd8f1dc3Sbluhm {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN}, 1705bd8f1dc3Sbluhm {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN}, 1706bd8f1dc3Sbluhm 1707bd8f1dc3Sbluhm {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1708bd8f1dc3Sbluhm {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1709bd8f1dc3Sbluhm {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1710bd8f1dc3Sbluhm 1711bd8f1dc3Sbluhm {"<a><!<a/>", XML_ERROR_INVALID_TOKEN}, 1712bd8f1dc3Sbluhm {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */ 1713bd8f1dc3Sbluhm {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */ 1714bd8f1dc3Sbluhm {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN}, 1715bd8f1dc3Sbluhm {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN}, 1716bd8f1dc3Sbluhm {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN}, 1717bd8f1dc3Sbluhm {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN}, 1718bd8f1dc3Sbluhm 1719bd8f1dc3Sbluhm {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1720bd8f1dc3Sbluhm {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1721bd8f1dc3Sbluhm {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}}; 1722bd8f1dc3Sbluhm 1723bd8f1dc3Sbluhm size_t i = 0; 1724bd8f1dc3Sbluhm for (; i < sizeof(cases) / sizeof(struct CaseData); i++) { 1725bd8f1dc3Sbluhm set_subtest("%s", cases[i].text); 1726bd8f1dc3Sbluhm const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES( 1727bd8f1dc3Sbluhm g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE); 1728bd8f1dc3Sbluhm const enum XML_Error actualError = XML_GetErrorCode(g_parser); 1729bd8f1dc3Sbluhm 1730bd8f1dc3Sbluhm assert(actualStatus == XML_STATUS_ERROR); 1731bd8f1dc3Sbluhm 1732bd8f1dc3Sbluhm if (actualError != cases[i].expectedError) { 1733bd8f1dc3Sbluhm char message[100]; 1734bd8f1dc3Sbluhm snprintf(message, sizeof(message), 1735bd8f1dc3Sbluhm "Expected error %d but got error %d for case %u: \"%s\"\n", 1736bd8f1dc3Sbluhm cases[i].expectedError, actualError, (unsigned int)i + 1, 1737bd8f1dc3Sbluhm cases[i].text); 1738bd8f1dc3Sbluhm fail(message); 1739bd8f1dc3Sbluhm } 1740bd8f1dc3Sbluhm 1741bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 1742bd8f1dc3Sbluhm } 1743bd8f1dc3Sbluhm } 1744bd8f1dc3Sbluhm END_TEST 1745bd8f1dc3Sbluhm 1746bd8f1dc3Sbluhm /* Test failures in UTF-16 CDATA */ 1747bd8f1dc3Sbluhm START_TEST(test_bad_cdata_utf16) { 1748bd8f1dc3Sbluhm struct CaseData { 1749bd8f1dc3Sbluhm size_t text_bytes; 1750bd8f1dc3Sbluhm const char *text; 1751bd8f1dc3Sbluhm enum XML_Error expected_error; 1752bd8f1dc3Sbluhm }; 1753bd8f1dc3Sbluhm 1754bd8f1dc3Sbluhm const char prolog[] = "\0<\0?\0x\0m\0l\0" 1755bd8f1dc3Sbluhm " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1756bd8f1dc3Sbluhm " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1757bd8f1dc3Sbluhm "1\0" 1758bd8f1dc3Sbluhm "6\0'" 1759bd8f1dc3Sbluhm "\0?\0>\0\n" 1760bd8f1dc3Sbluhm "\0<\0a\0>"; 1761bd8f1dc3Sbluhm struct CaseData cases[] = { 1762bd8f1dc3Sbluhm {1, "\0", XML_ERROR_UNCLOSED_TOKEN}, 1763bd8f1dc3Sbluhm {2, "\0<", XML_ERROR_UNCLOSED_TOKEN}, 1764bd8f1dc3Sbluhm {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN}, 1765bd8f1dc3Sbluhm {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN}, 1766bd8f1dc3Sbluhm {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN}, 1767bd8f1dc3Sbluhm {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN}, 1768bd8f1dc3Sbluhm {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN}, 1769bd8f1dc3Sbluhm {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN}, 1770bd8f1dc3Sbluhm {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN}, 1771bd8f1dc3Sbluhm {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN}, 1772bd8f1dc3Sbluhm {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN}, 1773bd8f1dc3Sbluhm {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN}, 1774bd8f1dc3Sbluhm {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN}, 1775bd8f1dc3Sbluhm {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN}, 1776bd8f1dc3Sbluhm {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN}, 1777bd8f1dc3Sbluhm {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN}, 1778bd8f1dc3Sbluhm {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN}, 1779bd8f1dc3Sbluhm {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1780bd8f1dc3Sbluhm {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1781bd8f1dc3Sbluhm {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1782bd8f1dc3Sbluhm /* Now add a four-byte UTF-16 character */ 1783bd8f1dc3Sbluhm {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8", 1784bd8f1dc3Sbluhm XML_ERROR_UNCLOSED_CDATA_SECTION}, 1785bd8f1dc3Sbluhm {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR}, 1786bd8f1dc3Sbluhm {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd", 1787bd8f1dc3Sbluhm XML_ERROR_PARTIAL_CHAR}, 1788bd8f1dc3Sbluhm {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e", 1789bd8f1dc3Sbluhm XML_ERROR_UNCLOSED_CDATA_SECTION}}; 1790bd8f1dc3Sbluhm size_t i; 1791bd8f1dc3Sbluhm 1792bd8f1dc3Sbluhm for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) { 1793bd8f1dc3Sbluhm set_subtest("case %lu", (long unsigned)(i + 1)); 1794bd8f1dc3Sbluhm enum XML_Status actual_status; 1795bd8f1dc3Sbluhm enum XML_Error actual_error; 1796bd8f1dc3Sbluhm 1797bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1, 1798bd8f1dc3Sbluhm XML_FALSE) 1799bd8f1dc3Sbluhm == XML_STATUS_ERROR) 1800bd8f1dc3Sbluhm xml_failure(g_parser); 1801bd8f1dc3Sbluhm actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text, 1802bd8f1dc3Sbluhm (int)cases[i].text_bytes, XML_TRUE); 1803bd8f1dc3Sbluhm assert(actual_status == XML_STATUS_ERROR); 1804bd8f1dc3Sbluhm actual_error = XML_GetErrorCode(g_parser); 1805bd8f1dc3Sbluhm if (actual_error != cases[i].expected_error) { 1806bd8f1dc3Sbluhm char message[1024]; 1807bd8f1dc3Sbluhm 1808bd8f1dc3Sbluhm snprintf(message, sizeof(message), 1809bd8f1dc3Sbluhm "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR 1810bd8f1dc3Sbluhm ") for case %lu\n", 1811bd8f1dc3Sbluhm cases[i].expected_error, 1812bd8f1dc3Sbluhm XML_ErrorString(cases[i].expected_error), actual_error, 1813bd8f1dc3Sbluhm XML_ErrorString(actual_error), (long unsigned)(i + 1)); 1814bd8f1dc3Sbluhm fail(message); 1815bd8f1dc3Sbluhm } 1816bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 1817bd8f1dc3Sbluhm } 1818bd8f1dc3Sbluhm } 1819bd8f1dc3Sbluhm END_TEST 1820bd8f1dc3Sbluhm 1821bd8f1dc3Sbluhm /* Test stopping the parser in cdata handler */ 1822bd8f1dc3Sbluhm START_TEST(test_stop_parser_between_cdata_calls) { 1823bd8f1dc3Sbluhm const char *text = long_cdata_text; 1824bd8f1dc3Sbluhm 1825bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 1826bd8f1dc3Sbluhm g_resumable = XML_FALSE; 1827bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler"); 1828bd8f1dc3Sbluhm } 1829bd8f1dc3Sbluhm END_TEST 1830bd8f1dc3Sbluhm 1831bd8f1dc3Sbluhm /* Test suspending the parser in cdata handler */ 1832bd8f1dc3Sbluhm START_TEST(test_suspend_parser_between_cdata_calls) { 1833bd8f1dc3Sbluhm const char *text = long_cdata_text; 1834bd8f1dc3Sbluhm enum XML_Status result; 1835bd8f1dc3Sbluhm 1836bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 1837bd8f1dc3Sbluhm g_resumable = XML_TRUE; 1838bd8f1dc3Sbluhm result = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE); 1839bd8f1dc3Sbluhm if (result != XML_STATUS_SUSPENDED) { 1840bd8f1dc3Sbluhm if (result == XML_STATUS_ERROR) 1841bd8f1dc3Sbluhm xml_failure(g_parser); 1842bd8f1dc3Sbluhm fail("Parse not suspended in CDATA handler"); 1843bd8f1dc3Sbluhm } 1844bd8f1dc3Sbluhm if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE) 1845bd8f1dc3Sbluhm xml_failure(g_parser); 1846bd8f1dc3Sbluhm } 1847bd8f1dc3Sbluhm END_TEST 1848bd8f1dc3Sbluhm 1849bd8f1dc3Sbluhm /* Test memory allocation functions */ 1850bd8f1dc3Sbluhm START_TEST(test_memory_allocation) { 1851bd8f1dc3Sbluhm char *buffer = (char *)XML_MemMalloc(g_parser, 256); 1852bd8f1dc3Sbluhm char *p; 1853bd8f1dc3Sbluhm 1854bd8f1dc3Sbluhm if (buffer == NULL) { 1855bd8f1dc3Sbluhm fail("Allocation failed"); 1856bd8f1dc3Sbluhm } else { 1857bd8f1dc3Sbluhm /* Try writing to memory; some OSes try to cheat! */ 1858bd8f1dc3Sbluhm buffer[0] = 'T'; 1859bd8f1dc3Sbluhm buffer[1] = 'E'; 1860bd8f1dc3Sbluhm buffer[2] = 'S'; 1861bd8f1dc3Sbluhm buffer[3] = 'T'; 1862bd8f1dc3Sbluhm buffer[4] = '\0'; 1863bd8f1dc3Sbluhm if (strcmp(buffer, "TEST") != 0) { 1864bd8f1dc3Sbluhm fail("Memory not writable"); 1865bd8f1dc3Sbluhm } else { 1866bd8f1dc3Sbluhm p = (char *)XML_MemRealloc(g_parser, buffer, 512); 1867bd8f1dc3Sbluhm if (p == NULL) { 1868bd8f1dc3Sbluhm fail("Reallocation failed"); 1869bd8f1dc3Sbluhm } else { 1870bd8f1dc3Sbluhm /* Write again, just to be sure */ 1871bd8f1dc3Sbluhm buffer = p; 1872bd8f1dc3Sbluhm buffer[0] = 'V'; 1873bd8f1dc3Sbluhm if (strcmp(buffer, "VEST") != 0) { 1874bd8f1dc3Sbluhm fail("Reallocated memory not writable"); 1875bd8f1dc3Sbluhm } 1876bd8f1dc3Sbluhm } 1877bd8f1dc3Sbluhm } 1878bd8f1dc3Sbluhm XML_MemFree(g_parser, buffer); 1879bd8f1dc3Sbluhm } 1880bd8f1dc3Sbluhm } 1881bd8f1dc3Sbluhm END_TEST 1882bd8f1dc3Sbluhm 1883bd8f1dc3Sbluhm /* Test XML_DefaultCurrent() passes handling on correctly */ 1884bd8f1dc3Sbluhm START_TEST(test_default_current) { 1885bd8f1dc3Sbluhm const char *text = "<doc>hell]</doc>"; 1886bd8f1dc3Sbluhm const char *entity_text = "<!DOCTYPE doc [\n" 1887bd8f1dc3Sbluhm "<!ENTITY entity '%'>\n" 1888bd8f1dc3Sbluhm "]>\n" 1889bd8f1dc3Sbluhm "<doc>&entity;</doc>"; 1890bd8f1dc3Sbluhm 1891bd8f1dc3Sbluhm set_subtest("with defaulting"); 1892bd8f1dc3Sbluhm { 1893bd8f1dc3Sbluhm struct handler_record_list storage; 1894bd8f1dc3Sbluhm storage.count = 0; 1895bd8f1dc3Sbluhm XML_SetDefaultHandler(g_parser, record_default_handler); 1896bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, record_cdata_handler); 1897bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 1898bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1899bd8f1dc3Sbluhm == XML_STATUS_ERROR) 1900bd8f1dc3Sbluhm xml_failure(g_parser); 1901bd8f1dc3Sbluhm int i = 0; 1902bd8f1dc3Sbluhm assert_record_handler_called(&storage, i++, "record_default_handler", 5); 1903bd8f1dc3Sbluhm // we should have gotten one or more cdata callbacks, totaling 5 chars 1904bd8f1dc3Sbluhm int cdata_len_remaining = 5; 1905bd8f1dc3Sbluhm while (cdata_len_remaining > 0) { 1906bd8f1dc3Sbluhm const struct handler_record_entry *c_entry 1907bd8f1dc3Sbluhm = handler_record_get(&storage, i++); 1908bd8f1dc3Sbluhm assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0); 1909bd8f1dc3Sbluhm assert_true(c_entry->arg > 0); 1910bd8f1dc3Sbluhm assert_true(c_entry->arg <= cdata_len_remaining); 1911bd8f1dc3Sbluhm cdata_len_remaining -= c_entry->arg; 1912bd8f1dc3Sbluhm // default handler must follow, with the exact same len argument. 1913bd8f1dc3Sbluhm assert_record_handler_called(&storage, i++, "record_default_handler", 1914bd8f1dc3Sbluhm c_entry->arg); 1915bd8f1dc3Sbluhm } 1916bd8f1dc3Sbluhm assert_record_handler_called(&storage, i++, "record_default_handler", 6); 1917bd8f1dc3Sbluhm assert_true(storage.count == i); 1918bd8f1dc3Sbluhm } 1919bd8f1dc3Sbluhm 1920bd8f1dc3Sbluhm /* Again, without the defaulting */ 1921bd8f1dc3Sbluhm set_subtest("no defaulting"); 1922bd8f1dc3Sbluhm { 1923bd8f1dc3Sbluhm struct handler_record_list storage; 1924bd8f1dc3Sbluhm storage.count = 0; 1925bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 1926bd8f1dc3Sbluhm XML_SetDefaultHandler(g_parser, record_default_handler); 1927bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler); 1928bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 1929bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1930bd8f1dc3Sbluhm == XML_STATUS_ERROR) 1931bd8f1dc3Sbluhm xml_failure(g_parser); 1932bd8f1dc3Sbluhm int i = 0; 1933bd8f1dc3Sbluhm assert_record_handler_called(&storage, i++, "record_default_handler", 5); 1934bd8f1dc3Sbluhm // we should have gotten one or more cdata callbacks, totaling 5 chars 1935bd8f1dc3Sbluhm int cdata_len_remaining = 5; 1936bd8f1dc3Sbluhm while (cdata_len_remaining > 0) { 1937bd8f1dc3Sbluhm const struct handler_record_entry *c_entry 1938bd8f1dc3Sbluhm = handler_record_get(&storage, i++); 1939bd8f1dc3Sbluhm assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0); 1940bd8f1dc3Sbluhm assert_true(c_entry->arg > 0); 1941bd8f1dc3Sbluhm assert_true(c_entry->arg <= cdata_len_remaining); 1942bd8f1dc3Sbluhm cdata_len_remaining -= c_entry->arg; 1943bd8f1dc3Sbluhm } 1944bd8f1dc3Sbluhm assert_record_handler_called(&storage, i++, "record_default_handler", 6); 1945bd8f1dc3Sbluhm assert_true(storage.count == i); 1946bd8f1dc3Sbluhm } 1947bd8f1dc3Sbluhm 1948bd8f1dc3Sbluhm /* Now with an internal entity to complicate matters */ 1949bd8f1dc3Sbluhm set_subtest("with internal entity"); 1950bd8f1dc3Sbluhm { 1951bd8f1dc3Sbluhm struct handler_record_list storage; 1952bd8f1dc3Sbluhm storage.count = 0; 1953bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 1954bd8f1dc3Sbluhm XML_SetDefaultHandler(g_parser, record_default_handler); 1955bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, record_cdata_handler); 1956bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 1957bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), 1958bd8f1dc3Sbluhm XML_TRUE) 1959bd8f1dc3Sbluhm == XML_STATUS_ERROR) 1960bd8f1dc3Sbluhm xml_failure(g_parser); 1961bd8f1dc3Sbluhm /* The default handler suppresses the entity */ 1962bd8f1dc3Sbluhm assert_record_handler_called(&storage, 0, "record_default_handler", 9); 1963bd8f1dc3Sbluhm assert_record_handler_called(&storage, 1, "record_default_handler", 1); 1964bd8f1dc3Sbluhm assert_record_handler_called(&storage, 2, "record_default_handler", 3); 1965bd8f1dc3Sbluhm assert_record_handler_called(&storage, 3, "record_default_handler", 1); 1966bd8f1dc3Sbluhm assert_record_handler_called(&storage, 4, "record_default_handler", 1); 1967bd8f1dc3Sbluhm assert_record_handler_called(&storage, 5, "record_default_handler", 1); 1968bd8f1dc3Sbluhm assert_record_handler_called(&storage, 6, "record_default_handler", 8); 1969bd8f1dc3Sbluhm assert_record_handler_called(&storage, 7, "record_default_handler", 1); 1970bd8f1dc3Sbluhm assert_record_handler_called(&storage, 8, "record_default_handler", 6); 1971bd8f1dc3Sbluhm assert_record_handler_called(&storage, 9, "record_default_handler", 1); 1972bd8f1dc3Sbluhm assert_record_handler_called(&storage, 10, "record_default_handler", 7); 1973bd8f1dc3Sbluhm assert_record_handler_called(&storage, 11, "record_default_handler", 1); 1974bd8f1dc3Sbluhm assert_record_handler_called(&storage, 12, "record_default_handler", 1); 1975bd8f1dc3Sbluhm assert_record_handler_called(&storage, 13, "record_default_handler", 1); 1976bd8f1dc3Sbluhm assert_record_handler_called(&storage, 14, "record_default_handler", 1); 1977bd8f1dc3Sbluhm assert_record_handler_called(&storage, 15, "record_default_handler", 1); 1978bd8f1dc3Sbluhm assert_record_handler_called(&storage, 16, "record_default_handler", 5); 1979bd8f1dc3Sbluhm assert_record_handler_called(&storage, 17, "record_default_handler", 8); 1980bd8f1dc3Sbluhm assert_record_handler_called(&storage, 18, "record_default_handler", 6); 1981bd8f1dc3Sbluhm assert_true(storage.count == 19); 1982bd8f1dc3Sbluhm } 1983bd8f1dc3Sbluhm 1984bd8f1dc3Sbluhm /* Again, with a skip handler */ 1985bd8f1dc3Sbluhm set_subtest("with skip handler"); 1986bd8f1dc3Sbluhm { 1987bd8f1dc3Sbluhm struct handler_record_list storage; 1988bd8f1dc3Sbluhm storage.count = 0; 1989bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 1990bd8f1dc3Sbluhm XML_SetDefaultHandler(g_parser, record_default_handler); 1991bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, record_cdata_handler); 1992bd8f1dc3Sbluhm XML_SetSkippedEntityHandler(g_parser, record_skip_handler); 1993bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 1994bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), 1995bd8f1dc3Sbluhm XML_TRUE) 1996bd8f1dc3Sbluhm == XML_STATUS_ERROR) 1997bd8f1dc3Sbluhm xml_failure(g_parser); 1998bd8f1dc3Sbluhm /* The default handler suppresses the entity */ 1999bd8f1dc3Sbluhm assert_record_handler_called(&storage, 0, "record_default_handler", 9); 2000bd8f1dc3Sbluhm assert_record_handler_called(&storage, 1, "record_default_handler", 1); 2001bd8f1dc3Sbluhm assert_record_handler_called(&storage, 2, "record_default_handler", 3); 2002bd8f1dc3Sbluhm assert_record_handler_called(&storage, 3, "record_default_handler", 1); 2003bd8f1dc3Sbluhm assert_record_handler_called(&storage, 4, "record_default_handler", 1); 2004bd8f1dc3Sbluhm assert_record_handler_called(&storage, 5, "record_default_handler", 1); 2005bd8f1dc3Sbluhm assert_record_handler_called(&storage, 6, "record_default_handler", 8); 2006bd8f1dc3Sbluhm assert_record_handler_called(&storage, 7, "record_default_handler", 1); 2007bd8f1dc3Sbluhm assert_record_handler_called(&storage, 8, "record_default_handler", 6); 2008bd8f1dc3Sbluhm assert_record_handler_called(&storage, 9, "record_default_handler", 1); 2009bd8f1dc3Sbluhm assert_record_handler_called(&storage, 10, "record_default_handler", 7); 2010bd8f1dc3Sbluhm assert_record_handler_called(&storage, 11, "record_default_handler", 1); 2011bd8f1dc3Sbluhm assert_record_handler_called(&storage, 12, "record_default_handler", 1); 2012bd8f1dc3Sbluhm assert_record_handler_called(&storage, 13, "record_default_handler", 1); 2013bd8f1dc3Sbluhm assert_record_handler_called(&storage, 14, "record_default_handler", 1); 2014bd8f1dc3Sbluhm assert_record_handler_called(&storage, 15, "record_default_handler", 1); 2015bd8f1dc3Sbluhm assert_record_handler_called(&storage, 16, "record_default_handler", 5); 2016bd8f1dc3Sbluhm assert_record_handler_called(&storage, 17, "record_skip_handler", 0); 2017bd8f1dc3Sbluhm assert_record_handler_called(&storage, 18, "record_default_handler", 6); 2018bd8f1dc3Sbluhm assert_true(storage.count == 19); 2019bd8f1dc3Sbluhm } 2020bd8f1dc3Sbluhm 2021bd8f1dc3Sbluhm /* This time, allow the entity through */ 2022bd8f1dc3Sbluhm set_subtest("allow entity"); 2023bd8f1dc3Sbluhm { 2024bd8f1dc3Sbluhm struct handler_record_list storage; 2025bd8f1dc3Sbluhm storage.count = 0; 2026bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 2027bd8f1dc3Sbluhm XML_SetDefaultHandlerExpand(g_parser, record_default_handler); 2028bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, record_cdata_handler); 2029bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 2030bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), 2031bd8f1dc3Sbluhm XML_TRUE) 2032bd8f1dc3Sbluhm == XML_STATUS_ERROR) 2033bd8f1dc3Sbluhm xml_failure(g_parser); 2034bd8f1dc3Sbluhm assert_record_handler_called(&storage, 0, "record_default_handler", 9); 2035bd8f1dc3Sbluhm assert_record_handler_called(&storage, 1, "record_default_handler", 1); 2036bd8f1dc3Sbluhm assert_record_handler_called(&storage, 2, "record_default_handler", 3); 2037bd8f1dc3Sbluhm assert_record_handler_called(&storage, 3, "record_default_handler", 1); 2038bd8f1dc3Sbluhm assert_record_handler_called(&storage, 4, "record_default_handler", 1); 2039bd8f1dc3Sbluhm assert_record_handler_called(&storage, 5, "record_default_handler", 1); 2040bd8f1dc3Sbluhm assert_record_handler_called(&storage, 6, "record_default_handler", 8); 2041bd8f1dc3Sbluhm assert_record_handler_called(&storage, 7, "record_default_handler", 1); 2042bd8f1dc3Sbluhm assert_record_handler_called(&storage, 8, "record_default_handler", 6); 2043bd8f1dc3Sbluhm assert_record_handler_called(&storage, 9, "record_default_handler", 1); 2044bd8f1dc3Sbluhm assert_record_handler_called(&storage, 10, "record_default_handler", 7); 2045bd8f1dc3Sbluhm assert_record_handler_called(&storage, 11, "record_default_handler", 1); 2046bd8f1dc3Sbluhm assert_record_handler_called(&storage, 12, "record_default_handler", 1); 2047bd8f1dc3Sbluhm assert_record_handler_called(&storage, 13, "record_default_handler", 1); 2048bd8f1dc3Sbluhm assert_record_handler_called(&storage, 14, "record_default_handler", 1); 2049bd8f1dc3Sbluhm assert_record_handler_called(&storage, 15, "record_default_handler", 1); 2050bd8f1dc3Sbluhm assert_record_handler_called(&storage, 16, "record_default_handler", 5); 2051bd8f1dc3Sbluhm assert_record_handler_called(&storage, 17, "record_cdata_handler", 1); 2052bd8f1dc3Sbluhm assert_record_handler_called(&storage, 18, "record_default_handler", 1); 2053bd8f1dc3Sbluhm assert_record_handler_called(&storage, 19, "record_default_handler", 6); 2054bd8f1dc3Sbluhm assert_true(storage.count == 20); 2055bd8f1dc3Sbluhm } 2056bd8f1dc3Sbluhm 2057bd8f1dc3Sbluhm /* Finally, without passing the cdata to the default handler */ 2058bd8f1dc3Sbluhm set_subtest("not passing cdata"); 2059bd8f1dc3Sbluhm { 2060bd8f1dc3Sbluhm struct handler_record_list storage; 2061bd8f1dc3Sbluhm storage.count = 0; 2062bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 2063bd8f1dc3Sbluhm XML_SetDefaultHandlerExpand(g_parser, record_default_handler); 2064bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler); 2065bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 2066bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), 2067bd8f1dc3Sbluhm XML_TRUE) 2068bd8f1dc3Sbluhm == XML_STATUS_ERROR) 2069bd8f1dc3Sbluhm xml_failure(g_parser); 2070bd8f1dc3Sbluhm assert_record_handler_called(&storage, 0, "record_default_handler", 9); 2071bd8f1dc3Sbluhm assert_record_handler_called(&storage, 1, "record_default_handler", 1); 2072bd8f1dc3Sbluhm assert_record_handler_called(&storage, 2, "record_default_handler", 3); 2073bd8f1dc3Sbluhm assert_record_handler_called(&storage, 3, "record_default_handler", 1); 2074bd8f1dc3Sbluhm assert_record_handler_called(&storage, 4, "record_default_handler", 1); 2075bd8f1dc3Sbluhm assert_record_handler_called(&storage, 5, "record_default_handler", 1); 2076bd8f1dc3Sbluhm assert_record_handler_called(&storage, 6, "record_default_handler", 8); 2077bd8f1dc3Sbluhm assert_record_handler_called(&storage, 7, "record_default_handler", 1); 2078bd8f1dc3Sbluhm assert_record_handler_called(&storage, 8, "record_default_handler", 6); 2079bd8f1dc3Sbluhm assert_record_handler_called(&storage, 9, "record_default_handler", 1); 2080bd8f1dc3Sbluhm assert_record_handler_called(&storage, 10, "record_default_handler", 7); 2081bd8f1dc3Sbluhm assert_record_handler_called(&storage, 11, "record_default_handler", 1); 2082bd8f1dc3Sbluhm assert_record_handler_called(&storage, 12, "record_default_handler", 1); 2083bd8f1dc3Sbluhm assert_record_handler_called(&storage, 13, "record_default_handler", 1); 2084bd8f1dc3Sbluhm assert_record_handler_called(&storage, 14, "record_default_handler", 1); 2085bd8f1dc3Sbluhm assert_record_handler_called(&storage, 15, "record_default_handler", 1); 2086bd8f1dc3Sbluhm assert_record_handler_called(&storage, 16, "record_default_handler", 5); 2087bd8f1dc3Sbluhm assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler", 2088bd8f1dc3Sbluhm 1); 2089bd8f1dc3Sbluhm assert_record_handler_called(&storage, 18, "record_default_handler", 6); 2090bd8f1dc3Sbluhm assert_true(storage.count == 19); 2091bd8f1dc3Sbluhm } 2092bd8f1dc3Sbluhm } 2093bd8f1dc3Sbluhm END_TEST 2094bd8f1dc3Sbluhm 2095bd8f1dc3Sbluhm /* Test DTD element parsing code paths */ 2096bd8f1dc3Sbluhm START_TEST(test_dtd_elements) { 2097bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 2098bd8f1dc3Sbluhm "<!ELEMENT doc (chapter)>\n" 2099bd8f1dc3Sbluhm "<!ELEMENT chapter (#PCDATA)>\n" 2100bd8f1dc3Sbluhm "]>\n" 2101bd8f1dc3Sbluhm "<doc><chapter>Wombats are go</chapter></doc>"; 2102bd8f1dc3Sbluhm 2103bd8f1dc3Sbluhm XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 2104bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2105bd8f1dc3Sbluhm == XML_STATUS_ERROR) 2106bd8f1dc3Sbluhm xml_failure(g_parser); 2107bd8f1dc3Sbluhm } 2108bd8f1dc3Sbluhm END_TEST 2109bd8f1dc3Sbluhm 2110bd8f1dc3Sbluhm static void XMLCALL 2111bd8f1dc3Sbluhm element_decl_check_model(void *userData, const XML_Char *name, 2112bd8f1dc3Sbluhm XML_Content *model) { 2113bd8f1dc3Sbluhm UNUSED_P(userData); 2114bd8f1dc3Sbluhm uint32_t errorFlags = 0; 2115bd8f1dc3Sbluhm 2116bd8f1dc3Sbluhm /* Expected model array structure is this: 2117bd8f1dc3Sbluhm * [0] (type 6, quant 0) 2118bd8f1dc3Sbluhm * [1] (type 5, quant 0) 2119bd8f1dc3Sbluhm * [3] (type 4, quant 0, name "bar") 2120bd8f1dc3Sbluhm * [4] (type 4, quant 0, name "foo") 2121bd8f1dc3Sbluhm * [5] (type 4, quant 3, name "xyz") 2122bd8f1dc3Sbluhm * [2] (type 4, quant 2, name "zebra") 2123bd8f1dc3Sbluhm */ 2124bd8f1dc3Sbluhm errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0)); 2125bd8f1dc3Sbluhm errorFlags |= ((model != NULL) ? 0 : (1u << 1)); 2126bd8f1dc3Sbluhm 2127bd8f1dc3Sbluhm if (model != NULL) { 2128bd8f1dc3Sbluhm errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2)); 2129bd8f1dc3Sbluhm errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3)); 2130bd8f1dc3Sbluhm errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4)); 2131bd8f1dc3Sbluhm errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5)); 2132bd8f1dc3Sbluhm errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6)); 2133bd8f1dc3Sbluhm 2134bd8f1dc3Sbluhm errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7)); 2135bd8f1dc3Sbluhm errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8)); 2136bd8f1dc3Sbluhm errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9)); 2137bd8f1dc3Sbluhm errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10)); 2138bd8f1dc3Sbluhm errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11)); 2139bd8f1dc3Sbluhm 2140bd8f1dc3Sbluhm errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12)); 2141bd8f1dc3Sbluhm errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13)); 2142bd8f1dc3Sbluhm errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14)); 2143bd8f1dc3Sbluhm errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15)); 2144bd8f1dc3Sbluhm errorFlags 2145bd8f1dc3Sbluhm |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16)); 2146bd8f1dc3Sbluhm 2147bd8f1dc3Sbluhm errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17)); 2148bd8f1dc3Sbluhm errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18)); 2149bd8f1dc3Sbluhm errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19)); 2150bd8f1dc3Sbluhm errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20)); 2151bd8f1dc3Sbluhm errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21)); 2152bd8f1dc3Sbluhm 2153bd8f1dc3Sbluhm errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22)); 2154bd8f1dc3Sbluhm errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23)); 2155bd8f1dc3Sbluhm errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24)); 2156bd8f1dc3Sbluhm errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25)); 2157bd8f1dc3Sbluhm errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26)); 2158bd8f1dc3Sbluhm 2159bd8f1dc3Sbluhm errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27)); 2160bd8f1dc3Sbluhm errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28)); 2161bd8f1dc3Sbluhm errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29)); 2162bd8f1dc3Sbluhm errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30)); 2163bd8f1dc3Sbluhm errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31)); 2164bd8f1dc3Sbluhm } 2165bd8f1dc3Sbluhm 2166bd8f1dc3Sbluhm XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags); 2167bd8f1dc3Sbluhm XML_FreeContentModel(g_parser, model); 2168bd8f1dc3Sbluhm } 2169bd8f1dc3Sbluhm 2170bd8f1dc3Sbluhm START_TEST(test_dtd_elements_nesting) { 2171bd8f1dc3Sbluhm // Payload inspired by a test in Perl's XML::Parser 2172bd8f1dc3Sbluhm const char *text = "<!DOCTYPE foo [\n" 2173bd8f1dc3Sbluhm "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n" 2174bd8f1dc3Sbluhm "]>\n" 2175bd8f1dc3Sbluhm "<foo/>"; 2176bd8f1dc3Sbluhm 2177bd8f1dc3Sbluhm XML_SetUserData(g_parser, (void *)(uintptr_t)-1); 2178bd8f1dc3Sbluhm 2179bd8f1dc3Sbluhm XML_SetElementDeclHandler(g_parser, element_decl_check_model); 2180bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2181bd8f1dc3Sbluhm == XML_STATUS_ERROR) 2182bd8f1dc3Sbluhm xml_failure(g_parser); 2183bd8f1dc3Sbluhm 2184bd8f1dc3Sbluhm if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0) 2185bd8f1dc3Sbluhm fail("Element declaration model regression detected"); 2186bd8f1dc3Sbluhm } 2187bd8f1dc3Sbluhm END_TEST 2188bd8f1dc3Sbluhm 2189bd8f1dc3Sbluhm /* Test foreign DTD handling */ 2190bd8f1dc3Sbluhm START_TEST(test_set_foreign_dtd) { 2191bd8f1dc3Sbluhm const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"; 2192bd8f1dc3Sbluhm const char *text2 = "<doc>&entity;</doc>"; 2193bd8f1dc3Sbluhm ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 2194bd8f1dc3Sbluhm 2195bd8f1dc3Sbluhm /* Check hash salt is passed through too */ 2196bd8f1dc3Sbluhm XML_SetHashSalt(g_parser, 0x12345678); 2197bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2198bd8f1dc3Sbluhm XML_SetUserData(g_parser, &test_data); 2199bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 2200bd8f1dc3Sbluhm /* Add a default handler to exercise more code paths */ 2201bd8f1dc3Sbluhm XML_SetDefaultHandler(g_parser, dummy_default_handler); 2202bd8f1dc3Sbluhm if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE) 2203bd8f1dc3Sbluhm fail("Could not set foreign DTD"); 2204bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE) 2205bd8f1dc3Sbluhm == XML_STATUS_ERROR) 2206bd8f1dc3Sbluhm xml_failure(g_parser); 2207bd8f1dc3Sbluhm 2208bd8f1dc3Sbluhm /* Ensure that trying to set the DTD after parsing has started 2209bd8f1dc3Sbluhm * is faulted, even if it's the same setting. 2210bd8f1dc3Sbluhm */ 2211bd8f1dc3Sbluhm if (XML_UseForeignDTD(g_parser, XML_TRUE) 2212bd8f1dc3Sbluhm != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING) 2213bd8f1dc3Sbluhm fail("Failed to reject late foreign DTD setting"); 2214bd8f1dc3Sbluhm /* Ditto for the hash salt */ 2215bd8f1dc3Sbluhm if (XML_SetHashSalt(g_parser, 0x23456789)) 2216bd8f1dc3Sbluhm fail("Failed to reject late hash salt change"); 2217bd8f1dc3Sbluhm 2218bd8f1dc3Sbluhm /* Now finish the parse */ 2219bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE) 2220bd8f1dc3Sbluhm == XML_STATUS_ERROR) 2221bd8f1dc3Sbluhm xml_failure(g_parser); 2222bd8f1dc3Sbluhm } 2223bd8f1dc3Sbluhm END_TEST 2224bd8f1dc3Sbluhm 2225bd8f1dc3Sbluhm /* Test foreign DTD handling with a failing NotStandalone handler */ 2226bd8f1dc3Sbluhm START_TEST(test_foreign_dtd_not_standalone) { 2227bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2228bd8f1dc3Sbluhm "<doc>&entity;</doc>"; 2229bd8f1dc3Sbluhm ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 2230bd8f1dc3Sbluhm 2231bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2232bd8f1dc3Sbluhm XML_SetUserData(g_parser, &test_data); 2233bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 2234bd8f1dc3Sbluhm XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler); 2235bd8f1dc3Sbluhm if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE) 2236bd8f1dc3Sbluhm fail("Could not set foreign DTD"); 2237bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_NOT_STANDALONE, 2238bd8f1dc3Sbluhm "NotStandalonehandler failed to reject"); 2239bd8f1dc3Sbluhm } 2240bd8f1dc3Sbluhm END_TEST 2241bd8f1dc3Sbluhm 2242bd8f1dc3Sbluhm /* Test invalid character in a foreign DTD is faulted */ 2243bd8f1dc3Sbluhm START_TEST(test_invalid_foreign_dtd) { 2244bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2245bd8f1dc3Sbluhm "<doc>&entity;</doc>"; 2246bd8f1dc3Sbluhm ExtFaults test_data 2247bd8f1dc3Sbluhm = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN}; 2248bd8f1dc3Sbluhm 2249bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2250bd8f1dc3Sbluhm XML_SetUserData(g_parser, &test_data); 2251bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 2252bd8f1dc3Sbluhm XML_UseForeignDTD(g_parser, XML_TRUE); 2253bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 2254bd8f1dc3Sbluhm "Bad DTD should not have been accepted"); 2255bd8f1dc3Sbluhm } 2256bd8f1dc3Sbluhm END_TEST 2257bd8f1dc3Sbluhm 2258bd8f1dc3Sbluhm /* Test foreign DTD use with a doctype */ 2259bd8f1dc3Sbluhm START_TEST(test_foreign_dtd_with_doctype) { 2260bd8f1dc3Sbluhm const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n" 2261bd8f1dc3Sbluhm "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n"; 2262bd8f1dc3Sbluhm const char *text2 = "<doc>&entity;</doc>"; 2263bd8f1dc3Sbluhm ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 2264bd8f1dc3Sbluhm 2265bd8f1dc3Sbluhm /* Check hash salt is passed through too */ 2266bd8f1dc3Sbluhm XML_SetHashSalt(g_parser, 0x12345678); 2267bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2268bd8f1dc3Sbluhm XML_SetUserData(g_parser, &test_data); 2269bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 2270bd8f1dc3Sbluhm /* Add a default handler to exercise more code paths */ 2271bd8f1dc3Sbluhm XML_SetDefaultHandler(g_parser, dummy_default_handler); 2272bd8f1dc3Sbluhm if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE) 2273bd8f1dc3Sbluhm fail("Could not set foreign DTD"); 2274bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE) 2275bd8f1dc3Sbluhm == XML_STATUS_ERROR) 2276bd8f1dc3Sbluhm xml_failure(g_parser); 2277bd8f1dc3Sbluhm 2278bd8f1dc3Sbluhm /* Ensure that trying to set the DTD after parsing has started 2279bd8f1dc3Sbluhm * is faulted, even if it's the same setting. 2280bd8f1dc3Sbluhm */ 2281bd8f1dc3Sbluhm if (XML_UseForeignDTD(g_parser, XML_TRUE) 2282bd8f1dc3Sbluhm != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING) 2283bd8f1dc3Sbluhm fail("Failed to reject late foreign DTD setting"); 2284bd8f1dc3Sbluhm /* Ditto for the hash salt */ 2285bd8f1dc3Sbluhm if (XML_SetHashSalt(g_parser, 0x23456789)) 2286bd8f1dc3Sbluhm fail("Failed to reject late hash salt change"); 2287bd8f1dc3Sbluhm 2288bd8f1dc3Sbluhm /* Now finish the parse */ 2289bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE) 2290bd8f1dc3Sbluhm == XML_STATUS_ERROR) 2291bd8f1dc3Sbluhm xml_failure(g_parser); 2292bd8f1dc3Sbluhm } 2293bd8f1dc3Sbluhm END_TEST 2294bd8f1dc3Sbluhm 2295bd8f1dc3Sbluhm /* Test XML_UseForeignDTD with no external subset present */ 2296bd8f1dc3Sbluhm START_TEST(test_foreign_dtd_without_external_subset) { 2297bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n" 2298bd8f1dc3Sbluhm "<doc>&foo;</doc>"; 2299bd8f1dc3Sbluhm 2300bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2301bd8f1dc3Sbluhm XML_SetUserData(g_parser, NULL); 2302bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader); 2303bd8f1dc3Sbluhm XML_UseForeignDTD(g_parser, XML_TRUE); 2304bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2305bd8f1dc3Sbluhm == XML_STATUS_ERROR) 2306bd8f1dc3Sbluhm xml_failure(g_parser); 2307bd8f1dc3Sbluhm } 2308bd8f1dc3Sbluhm END_TEST 2309bd8f1dc3Sbluhm 2310bd8f1dc3Sbluhm START_TEST(test_empty_foreign_dtd) { 2311bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2312bd8f1dc3Sbluhm "<doc>&entity;</doc>"; 2313bd8f1dc3Sbluhm 2314bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2315bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader); 2316bd8f1dc3Sbluhm XML_UseForeignDTD(g_parser, XML_TRUE); 2317bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, 2318bd8f1dc3Sbluhm "Undefined entity not faulted"); 2319bd8f1dc3Sbluhm } 2320bd8f1dc3Sbluhm END_TEST 2321bd8f1dc3Sbluhm 2322bd8f1dc3Sbluhm /* Test XML Base is set and unset appropriately */ 2323bd8f1dc3Sbluhm START_TEST(test_set_base) { 2324bd8f1dc3Sbluhm const XML_Char *old_base; 2325bd8f1dc3Sbluhm const XML_Char *new_base = XCS("/local/file/name.xml"); 2326bd8f1dc3Sbluhm 2327bd8f1dc3Sbluhm old_base = XML_GetBase(g_parser); 2328bd8f1dc3Sbluhm if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK) 2329bd8f1dc3Sbluhm fail("Unable to set base"); 2330bd8f1dc3Sbluhm if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0) 2331bd8f1dc3Sbluhm fail("Base setting not correct"); 2332bd8f1dc3Sbluhm if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK) 2333bd8f1dc3Sbluhm fail("Unable to NULL base"); 2334bd8f1dc3Sbluhm if (XML_GetBase(g_parser) != NULL) 2335bd8f1dc3Sbluhm fail("Base setting not nulled"); 2336bd8f1dc3Sbluhm XML_SetBase(g_parser, old_base); 2337bd8f1dc3Sbluhm } 2338bd8f1dc3Sbluhm END_TEST 2339bd8f1dc3Sbluhm 2340bd8f1dc3Sbluhm /* Test attribute counts, indexing, etc */ 2341bd8f1dc3Sbluhm START_TEST(test_attributes) { 2342bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 2343bd8f1dc3Sbluhm "<!ELEMENT doc (tag)>\n" 2344bd8f1dc3Sbluhm "<!ATTLIST doc id ID #REQUIRED>\n" 2345bd8f1dc3Sbluhm "]>" 2346bd8f1dc3Sbluhm "<doc a='1' id='one' b='2'>" 2347bd8f1dc3Sbluhm "<tag c='3'/>" 2348bd8f1dc3Sbluhm "</doc>"; 2349bd8f1dc3Sbluhm AttrInfo doc_info[] = {{XCS("a"), XCS("1")}, 2350bd8f1dc3Sbluhm {XCS("b"), XCS("2")}, 2351bd8f1dc3Sbluhm {XCS("id"), XCS("one")}, 2352bd8f1dc3Sbluhm {NULL, NULL}}; 2353bd8f1dc3Sbluhm AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}}; 2354bd8f1dc3Sbluhm ElementInfo info[] = {{XCS("doc"), 3, XCS("id"), NULL}, 2355bd8f1dc3Sbluhm {XCS("tag"), 1, NULL, NULL}, 2356bd8f1dc3Sbluhm {NULL, 0, NULL, NULL}}; 2357bd8f1dc3Sbluhm info[0].attributes = doc_info; 2358bd8f1dc3Sbluhm info[1].attributes = tag_info; 2359bd8f1dc3Sbluhm 2360*aa071e6eSbluhm XML_Parser parser = XML_ParserCreate(NULL); 2361*aa071e6eSbluhm assert_true(parser != NULL); 2362*aa071e6eSbluhm ParserAndElementInfo parserAndElementInfos = { 2363*aa071e6eSbluhm parser, 2364*aa071e6eSbluhm info, 2365*aa071e6eSbluhm }; 2366*aa071e6eSbluhm 2367*aa071e6eSbluhm XML_SetStartElementHandler(parser, counting_start_element_handler); 2368*aa071e6eSbluhm XML_SetUserData(parser, &parserAndElementInfos); 2369*aa071e6eSbluhm if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 2370bd8f1dc3Sbluhm == XML_STATUS_ERROR) 2371*aa071e6eSbluhm xml_failure(parser); 2372*aa071e6eSbluhm 2373*aa071e6eSbluhm XML_ParserFree(parser); 2374bd8f1dc3Sbluhm } 2375bd8f1dc3Sbluhm END_TEST 2376bd8f1dc3Sbluhm 2377bd8f1dc3Sbluhm /* Test reset works correctly in the middle of processing an internal 2378bd8f1dc3Sbluhm * entity. Exercises some obscure code in XML_ParserReset(). 2379bd8f1dc3Sbluhm */ 2380bd8f1dc3Sbluhm START_TEST(test_reset_in_entity) { 2381bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 2382bd8f1dc3Sbluhm "<!ENTITY wombat 'wom'>\n" 2383bd8f1dc3Sbluhm "<!ENTITY entity 'hi &wom; there'>\n" 2384bd8f1dc3Sbluhm "]>\n" 2385bd8f1dc3Sbluhm "<doc>&entity;</doc>"; 2386bd8f1dc3Sbluhm XML_ParsingStatus status; 2387bd8f1dc3Sbluhm 2388bd8f1dc3Sbluhm g_resumable = XML_TRUE; 2389bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 2390bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2391bd8f1dc3Sbluhm == XML_STATUS_ERROR) 2392bd8f1dc3Sbluhm xml_failure(g_parser); 2393bd8f1dc3Sbluhm XML_GetParsingStatus(g_parser, &status); 2394bd8f1dc3Sbluhm if (status.parsing != XML_SUSPENDED) 2395bd8f1dc3Sbluhm fail("Parsing status not SUSPENDED"); 2396bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 2397bd8f1dc3Sbluhm XML_GetParsingStatus(g_parser, &status); 2398bd8f1dc3Sbluhm if (status.parsing != XML_INITIALIZED) 2399bd8f1dc3Sbluhm fail("Parsing status doesn't reset to INITIALIZED"); 2400bd8f1dc3Sbluhm } 2401bd8f1dc3Sbluhm END_TEST 2402bd8f1dc3Sbluhm 2403bd8f1dc3Sbluhm /* Test that resume correctly passes through parse errors */ 2404bd8f1dc3Sbluhm START_TEST(test_resume_invalid_parse) { 2405bd8f1dc3Sbluhm const char *text = "<doc>Hello</doc"; /* Missing closing wedge */ 2406bd8f1dc3Sbluhm 2407bd8f1dc3Sbluhm g_resumable = XML_TRUE; 2408bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 2409bd8f1dc3Sbluhm if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 2410bd8f1dc3Sbluhm == XML_STATUS_ERROR) 2411bd8f1dc3Sbluhm xml_failure(g_parser); 2412bd8f1dc3Sbluhm if (XML_ResumeParser(g_parser) == XML_STATUS_OK) 2413bd8f1dc3Sbluhm fail("Resumed invalid parse not faulted"); 2414bd8f1dc3Sbluhm if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN) 2415bd8f1dc3Sbluhm fail("Invalid parse not correctly faulted"); 2416bd8f1dc3Sbluhm } 2417bd8f1dc3Sbluhm END_TEST 2418bd8f1dc3Sbluhm 2419bd8f1dc3Sbluhm /* Test that re-suspended parses are correctly passed through */ 2420bd8f1dc3Sbluhm START_TEST(test_resume_resuspended) { 2421bd8f1dc3Sbluhm const char *text = "<doc>Hello<meep/>world</doc>"; 2422bd8f1dc3Sbluhm 2423bd8f1dc3Sbluhm g_resumable = XML_TRUE; 2424bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 2425bd8f1dc3Sbluhm if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 2426bd8f1dc3Sbluhm == XML_STATUS_ERROR) 2427bd8f1dc3Sbluhm xml_failure(g_parser); 2428bd8f1dc3Sbluhm g_resumable = XML_TRUE; 2429bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 2430bd8f1dc3Sbluhm if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED) 2431bd8f1dc3Sbluhm fail("Resumption not suspended"); 2432bd8f1dc3Sbluhm /* This one should succeed and finish up */ 2433bd8f1dc3Sbluhm if (XML_ResumeParser(g_parser) != XML_STATUS_OK) 2434bd8f1dc3Sbluhm xml_failure(g_parser); 2435bd8f1dc3Sbluhm } 2436bd8f1dc3Sbluhm END_TEST 2437bd8f1dc3Sbluhm 2438bd8f1dc3Sbluhm /* Test that CDATA shows up correctly through a default handler */ 2439bd8f1dc3Sbluhm START_TEST(test_cdata_default) { 2440bd8f1dc3Sbluhm const char *text = "<doc><![CDATA[Hello\nworld]]></doc>"; 2441bd8f1dc3Sbluhm const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>"); 2442bd8f1dc3Sbluhm CharData storage; 2443bd8f1dc3Sbluhm 2444bd8f1dc3Sbluhm CharData_Init(&storage); 2445bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 2446bd8f1dc3Sbluhm XML_SetDefaultHandler(g_parser, accumulate_characters); 2447bd8f1dc3Sbluhm 2448bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2449bd8f1dc3Sbluhm == XML_STATUS_ERROR) 2450bd8f1dc3Sbluhm xml_failure(g_parser); 2451bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 2452bd8f1dc3Sbluhm } 2453bd8f1dc3Sbluhm END_TEST 2454bd8f1dc3Sbluhm 2455bd8f1dc3Sbluhm /* Test resetting a subordinate parser does exactly nothing */ 2456bd8f1dc3Sbluhm START_TEST(test_subordinate_reset) { 2457bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2458bd8f1dc3Sbluhm "<!DOCTYPE doc SYSTEM 'foo'>\n" 2459bd8f1dc3Sbluhm "<doc>&entity;</doc>"; 2460bd8f1dc3Sbluhm 2461bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2462bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter); 2463bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2464bd8f1dc3Sbluhm == XML_STATUS_ERROR) 2465bd8f1dc3Sbluhm xml_failure(g_parser); 2466bd8f1dc3Sbluhm } 2467bd8f1dc3Sbluhm END_TEST 2468bd8f1dc3Sbluhm 2469bd8f1dc3Sbluhm /* Test suspending a subordinate parser */ 2470bd8f1dc3Sbluhm START_TEST(test_subordinate_suspend) { 2471bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2472bd8f1dc3Sbluhm "<!DOCTYPE doc SYSTEM 'foo'>\n" 2473bd8f1dc3Sbluhm "<doc>&entity;</doc>"; 2474bd8f1dc3Sbluhm 2475bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2476bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender); 2477bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2478bd8f1dc3Sbluhm == XML_STATUS_ERROR) 2479bd8f1dc3Sbluhm xml_failure(g_parser); 2480bd8f1dc3Sbluhm } 2481bd8f1dc3Sbluhm END_TEST 2482bd8f1dc3Sbluhm 2483bd8f1dc3Sbluhm /* Test suspending a subordinate parser from an XML declaration */ 2484bd8f1dc3Sbluhm /* Increases code coverage of the tests */ 2485bd8f1dc3Sbluhm 2486bd8f1dc3Sbluhm START_TEST(test_subordinate_xdecl_suspend) { 2487bd8f1dc3Sbluhm const char *text 2488bd8f1dc3Sbluhm = "<!DOCTYPE doc [\n" 2489bd8f1dc3Sbluhm " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n" 2490bd8f1dc3Sbluhm "]>\n" 2491bd8f1dc3Sbluhm "<doc>&entity;</doc>"; 2492bd8f1dc3Sbluhm 2493bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2494bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl); 2495bd8f1dc3Sbluhm g_resumable = XML_TRUE; 2496bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2497bd8f1dc3Sbluhm == XML_STATUS_ERROR) 2498bd8f1dc3Sbluhm xml_failure(g_parser); 2499bd8f1dc3Sbluhm } 2500bd8f1dc3Sbluhm END_TEST 2501bd8f1dc3Sbluhm 2502bd8f1dc3Sbluhm START_TEST(test_subordinate_xdecl_abort) { 2503bd8f1dc3Sbluhm const char *text 2504bd8f1dc3Sbluhm = "<!DOCTYPE doc [\n" 2505bd8f1dc3Sbluhm " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n" 2506bd8f1dc3Sbluhm "]>\n" 2507bd8f1dc3Sbluhm "<doc>&entity;</doc>"; 2508bd8f1dc3Sbluhm 2509bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2510bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl); 2511bd8f1dc3Sbluhm g_resumable = XML_FALSE; 2512bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2513bd8f1dc3Sbluhm == XML_STATUS_ERROR) 2514bd8f1dc3Sbluhm xml_failure(g_parser); 2515bd8f1dc3Sbluhm } 2516bd8f1dc3Sbluhm END_TEST 2517bd8f1dc3Sbluhm 2518bd8f1dc3Sbluhm /* Test external entity fault handling with suspension */ 2519bd8f1dc3Sbluhm START_TEST(test_ext_entity_invalid_suspended_parse) { 2520bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 2521bd8f1dc3Sbluhm " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 2522bd8f1dc3Sbluhm "]>\n" 2523bd8f1dc3Sbluhm "<doc>&en;</doc>"; 2524bd8f1dc3Sbluhm ExtFaults faults[] 2525bd8f1dc3Sbluhm = {{"<?xml version='1.0' encoding='us-ascii'?><", 2526bd8f1dc3Sbluhm "Incomplete element declaration not faulted", NULL, 2527bd8f1dc3Sbluhm XML_ERROR_UNCLOSED_TOKEN}, 2528bd8f1dc3Sbluhm {/* First two bytes of a three-byte char */ 2529bd8f1dc3Sbluhm "<?xml version='1.0' encoding='utf-8'?>\xe2\x82", 2530bd8f1dc3Sbluhm "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR}, 2531bd8f1dc3Sbluhm {NULL, NULL, NULL, XML_ERROR_NONE}}; 2532bd8f1dc3Sbluhm ExtFaults *fault; 2533bd8f1dc3Sbluhm 2534bd8f1dc3Sbluhm for (fault = &faults[0]; fault->parse_text != NULL; fault++) { 2535bd8f1dc3Sbluhm set_subtest("%s", fault->parse_text); 2536bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2537bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, 2538bd8f1dc3Sbluhm external_entity_suspending_faulter); 2539bd8f1dc3Sbluhm XML_SetUserData(g_parser, fault); 2540bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 2541bd8f1dc3Sbluhm "Parser did not report external entity error"); 2542bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 2543bd8f1dc3Sbluhm } 2544bd8f1dc3Sbluhm } 2545bd8f1dc3Sbluhm END_TEST 2546bd8f1dc3Sbluhm 2547bd8f1dc3Sbluhm /* Test setting an explicit encoding */ 2548bd8f1dc3Sbluhm START_TEST(test_explicit_encoding) { 2549bd8f1dc3Sbluhm const char *text1 = "<doc>Hello "; 2550bd8f1dc3Sbluhm const char *text2 = " World</doc>"; 2551bd8f1dc3Sbluhm 2552bd8f1dc3Sbluhm /* Just check that we can set the encoding to NULL before starting */ 2553bd8f1dc3Sbluhm if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK) 2554bd8f1dc3Sbluhm fail("Failed to initialise encoding to NULL"); 2555bd8f1dc3Sbluhm /* Say we are UTF-8 */ 2556bd8f1dc3Sbluhm if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK) 2557bd8f1dc3Sbluhm fail("Failed to set explicit encoding"); 2558bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE) 2559bd8f1dc3Sbluhm == XML_STATUS_ERROR) 2560bd8f1dc3Sbluhm xml_failure(g_parser); 2561bd8f1dc3Sbluhm /* Try to switch encodings mid-parse */ 2562bd8f1dc3Sbluhm if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR) 2563bd8f1dc3Sbluhm fail("Allowed encoding change"); 2564bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE) 2565bd8f1dc3Sbluhm == XML_STATUS_ERROR) 2566bd8f1dc3Sbluhm xml_failure(g_parser); 2567bd8f1dc3Sbluhm /* Try now the parse is over */ 2568bd8f1dc3Sbluhm if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK) 2569bd8f1dc3Sbluhm fail("Failed to unset encoding"); 2570bd8f1dc3Sbluhm } 2571bd8f1dc3Sbluhm END_TEST 2572bd8f1dc3Sbluhm 2573bd8f1dc3Sbluhm /* Test handling of trailing CR (rather than newline) */ 2574bd8f1dc3Sbluhm START_TEST(test_trailing_cr) { 2575bd8f1dc3Sbluhm const char *text = "<doc>\r"; 2576bd8f1dc3Sbluhm int found_cr; 2577bd8f1dc3Sbluhm 2578bd8f1dc3Sbluhm /* Try with a character handler, for code coverage */ 2579bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, cr_cdata_handler); 2580bd8f1dc3Sbluhm XML_SetUserData(g_parser, &found_cr); 2581bd8f1dc3Sbluhm found_cr = 0; 2582bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2583bd8f1dc3Sbluhm == XML_STATUS_OK) 2584bd8f1dc3Sbluhm fail("Failed to fault unclosed doc"); 2585bd8f1dc3Sbluhm if (found_cr == 0) 2586bd8f1dc3Sbluhm fail("Did not catch the carriage return"); 2587bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 2588bd8f1dc3Sbluhm 2589bd8f1dc3Sbluhm /* Now with a default handler instead */ 2590bd8f1dc3Sbluhm XML_SetDefaultHandler(g_parser, cr_cdata_handler); 2591bd8f1dc3Sbluhm XML_SetUserData(g_parser, &found_cr); 2592bd8f1dc3Sbluhm found_cr = 0; 2593bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2594bd8f1dc3Sbluhm == XML_STATUS_OK) 2595bd8f1dc3Sbluhm fail("Failed to fault unclosed doc"); 2596bd8f1dc3Sbluhm if (found_cr == 0) 2597bd8f1dc3Sbluhm fail("Did not catch default carriage return"); 2598bd8f1dc3Sbluhm } 2599bd8f1dc3Sbluhm END_TEST 2600bd8f1dc3Sbluhm 2601bd8f1dc3Sbluhm /* Test trailing CR in an external entity parse */ 2602bd8f1dc3Sbluhm START_TEST(test_ext_entity_trailing_cr) { 2603bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 2604bd8f1dc3Sbluhm " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 2605bd8f1dc3Sbluhm "]>\n" 2606bd8f1dc3Sbluhm "<doc>&en;</doc>"; 2607bd8f1dc3Sbluhm int found_cr; 2608bd8f1dc3Sbluhm 2609bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2610bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher); 2611bd8f1dc3Sbluhm XML_SetUserData(g_parser, &found_cr); 2612bd8f1dc3Sbluhm found_cr = 0; 2613bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2614bd8f1dc3Sbluhm != XML_STATUS_OK) 2615bd8f1dc3Sbluhm xml_failure(g_parser); 2616bd8f1dc3Sbluhm if (found_cr == 0) 2617bd8f1dc3Sbluhm fail("No carriage return found"); 2618bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 2619bd8f1dc3Sbluhm 2620bd8f1dc3Sbluhm /* Try again with a different trailing CR */ 2621bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2622bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher); 2623bd8f1dc3Sbluhm XML_SetUserData(g_parser, &found_cr); 2624bd8f1dc3Sbluhm found_cr = 0; 2625bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2626bd8f1dc3Sbluhm != XML_STATUS_OK) 2627bd8f1dc3Sbluhm xml_failure(g_parser); 2628bd8f1dc3Sbluhm if (found_cr == 0) 2629bd8f1dc3Sbluhm fail("No carriage return found"); 2630bd8f1dc3Sbluhm } 2631bd8f1dc3Sbluhm END_TEST 2632bd8f1dc3Sbluhm 2633bd8f1dc3Sbluhm /* Test handling of trailing square bracket */ 2634bd8f1dc3Sbluhm START_TEST(test_trailing_rsqb) { 2635bd8f1dc3Sbluhm const char *text8 = "<doc>]"; 2636bd8f1dc3Sbluhm const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000"; 2637bd8f1dc3Sbluhm int found_rsqb; 2638bd8f1dc3Sbluhm int text8_len = (int)strlen(text8); 2639bd8f1dc3Sbluhm 2640bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, rsqb_handler); 2641bd8f1dc3Sbluhm XML_SetUserData(g_parser, &found_rsqb); 2642bd8f1dc3Sbluhm found_rsqb = 0; 2643bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE) 2644bd8f1dc3Sbluhm == XML_STATUS_OK) 2645bd8f1dc3Sbluhm fail("Failed to fault unclosed doc"); 2646bd8f1dc3Sbluhm if (found_rsqb == 0) 2647bd8f1dc3Sbluhm fail("Did not catch the right square bracket"); 2648bd8f1dc3Sbluhm 2649bd8f1dc3Sbluhm /* Try again with a different encoding */ 2650bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 2651bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, rsqb_handler); 2652bd8f1dc3Sbluhm XML_SetUserData(g_parser, &found_rsqb); 2653bd8f1dc3Sbluhm found_rsqb = 0; 2654bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1, 2655bd8f1dc3Sbluhm XML_TRUE) 2656bd8f1dc3Sbluhm == XML_STATUS_OK) 2657bd8f1dc3Sbluhm fail("Failed to fault unclosed doc"); 2658bd8f1dc3Sbluhm if (found_rsqb == 0) 2659bd8f1dc3Sbluhm fail("Did not catch the right square bracket"); 2660bd8f1dc3Sbluhm 2661bd8f1dc3Sbluhm /* And finally with a default handler */ 2662bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 2663bd8f1dc3Sbluhm XML_SetDefaultHandler(g_parser, rsqb_handler); 2664bd8f1dc3Sbluhm XML_SetUserData(g_parser, &found_rsqb); 2665bd8f1dc3Sbluhm found_rsqb = 0; 2666bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1, 2667bd8f1dc3Sbluhm XML_TRUE) 2668bd8f1dc3Sbluhm == XML_STATUS_OK) 2669bd8f1dc3Sbluhm fail("Failed to fault unclosed doc"); 2670bd8f1dc3Sbluhm if (found_rsqb == 0) 2671bd8f1dc3Sbluhm fail("Did not catch the right square bracket"); 2672bd8f1dc3Sbluhm } 2673bd8f1dc3Sbluhm END_TEST 2674bd8f1dc3Sbluhm 2675bd8f1dc3Sbluhm /* Test trailing right square bracket in an external entity parse */ 2676bd8f1dc3Sbluhm START_TEST(test_ext_entity_trailing_rsqb) { 2677bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 2678bd8f1dc3Sbluhm " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 2679bd8f1dc3Sbluhm "]>\n" 2680bd8f1dc3Sbluhm "<doc>&en;</doc>"; 2681bd8f1dc3Sbluhm int found_rsqb; 2682bd8f1dc3Sbluhm 2683bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2684bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher); 2685bd8f1dc3Sbluhm XML_SetUserData(g_parser, &found_rsqb); 2686bd8f1dc3Sbluhm found_rsqb = 0; 2687bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2688bd8f1dc3Sbluhm != XML_STATUS_OK) 2689bd8f1dc3Sbluhm xml_failure(g_parser); 2690bd8f1dc3Sbluhm if (found_rsqb == 0) 2691bd8f1dc3Sbluhm fail("No right square bracket found"); 2692bd8f1dc3Sbluhm } 2693bd8f1dc3Sbluhm END_TEST 2694bd8f1dc3Sbluhm 2695bd8f1dc3Sbluhm /* Test CDATA handling in an external entity */ 2696bd8f1dc3Sbluhm START_TEST(test_ext_entity_good_cdata) { 2697bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 2698bd8f1dc3Sbluhm " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 2699bd8f1dc3Sbluhm "]>\n" 2700bd8f1dc3Sbluhm "<doc>&en;</doc>"; 2701bd8f1dc3Sbluhm 2702bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2703bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii); 2704bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2705bd8f1dc3Sbluhm != XML_STATUS_OK) 2706bd8f1dc3Sbluhm xml_failure(g_parser); 2707bd8f1dc3Sbluhm } 2708bd8f1dc3Sbluhm END_TEST 2709bd8f1dc3Sbluhm 2710bd8f1dc3Sbluhm /* Test user parameter settings */ 2711bd8f1dc3Sbluhm START_TEST(test_user_parameters) { 2712bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2713bd8f1dc3Sbluhm "<!-- Primary parse -->\n" 2714bd8f1dc3Sbluhm "<!DOCTYPE doc SYSTEM 'foo'>\n" 2715bd8f1dc3Sbluhm "<doc>&entity;"; 2716bd8f1dc3Sbluhm const char *epilog = "<!-- Back to primary parser -->\n" 2717bd8f1dc3Sbluhm "</doc>"; 2718bd8f1dc3Sbluhm 2719bd8f1dc3Sbluhm g_comment_count = 0; 2720bd8f1dc3Sbluhm g_skip_count = 0; 2721bd8f1dc3Sbluhm g_xdecl_count = 0; 2722bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2723bd8f1dc3Sbluhm XML_SetXmlDeclHandler(g_parser, xml_decl_handler); 2724bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker); 2725bd8f1dc3Sbluhm XML_SetCommentHandler(g_parser, data_check_comment_handler); 2726bd8f1dc3Sbluhm XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler); 2727bd8f1dc3Sbluhm XML_UseParserAsHandlerArg(g_parser); 2728bd8f1dc3Sbluhm XML_SetUserData(g_parser, (void *)1); 2729bd8f1dc3Sbluhm g_handler_data = g_parser; 2730bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) 2731bd8f1dc3Sbluhm == XML_STATUS_ERROR) 2732bd8f1dc3Sbluhm xml_failure(g_parser); 2733bd8f1dc3Sbluhm /* Ensure we can't change policy mid-parse */ 2734bd8f1dc3Sbluhm if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER)) 2735bd8f1dc3Sbluhm fail("Changed param entity parsing policy while parsing"); 2736bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE) 2737bd8f1dc3Sbluhm == XML_STATUS_ERROR) 2738bd8f1dc3Sbluhm xml_failure(g_parser); 2739bd8f1dc3Sbluhm if (g_comment_count != 3) 2740bd8f1dc3Sbluhm fail("Comment handler not invoked enough times"); 2741bd8f1dc3Sbluhm if (g_skip_count != 1) 2742bd8f1dc3Sbluhm fail("Skip handler not invoked enough times"); 2743bd8f1dc3Sbluhm if (g_xdecl_count != 1) 2744bd8f1dc3Sbluhm fail("XML declaration handler not invoked"); 2745bd8f1dc3Sbluhm } 2746bd8f1dc3Sbluhm END_TEST 2747bd8f1dc3Sbluhm 2748bd8f1dc3Sbluhm /* Test that an explicit external entity handler argument replaces 2749bd8f1dc3Sbluhm * the parser as the first argument. 2750bd8f1dc3Sbluhm * 2751bd8f1dc3Sbluhm * We do not call the first parameter to the external entity handler 2752bd8f1dc3Sbluhm * 'parser' for once, since the first time the handler is called it 2753bd8f1dc3Sbluhm * will actually be a text string. We need to be able to access the 2754bd8f1dc3Sbluhm * global 'parser' variable to create our external entity parser from, 2755bd8f1dc3Sbluhm * since there are code paths we need to ensure get executed. 2756bd8f1dc3Sbluhm */ 2757bd8f1dc3Sbluhm START_TEST(test_ext_entity_ref_parameter) { 2758bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2759bd8f1dc3Sbluhm "<!DOCTYPE doc SYSTEM 'foo'>\n" 2760bd8f1dc3Sbluhm "<doc>&entity;</doc>"; 2761bd8f1dc3Sbluhm 2762bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2763bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker); 2764bd8f1dc3Sbluhm /* Set a handler arg that is not NULL and not parser (which is 2765bd8f1dc3Sbluhm * what NULL would cause to be passed. 2766bd8f1dc3Sbluhm */ 2767bd8f1dc3Sbluhm XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text); 2768bd8f1dc3Sbluhm g_handler_data = text; 2769bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2770bd8f1dc3Sbluhm == XML_STATUS_ERROR) 2771bd8f1dc3Sbluhm xml_failure(g_parser); 2772bd8f1dc3Sbluhm 2773bd8f1dc3Sbluhm /* Now try again with unset args */ 2774bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 2775bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2776bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker); 2777bd8f1dc3Sbluhm XML_SetExternalEntityRefHandlerArg(g_parser, NULL); 2778bd8f1dc3Sbluhm g_handler_data = g_parser; 2779bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2780bd8f1dc3Sbluhm == XML_STATUS_ERROR) 2781bd8f1dc3Sbluhm xml_failure(g_parser); 2782bd8f1dc3Sbluhm } 2783bd8f1dc3Sbluhm END_TEST 2784bd8f1dc3Sbluhm 2785bd8f1dc3Sbluhm /* Test the parsing of an empty string */ 2786bd8f1dc3Sbluhm START_TEST(test_empty_parse) { 2787bd8f1dc3Sbluhm const char *text = "<doc></doc>"; 2788bd8f1dc3Sbluhm const char *partial = "<doc>"; 2789bd8f1dc3Sbluhm 2790bd8f1dc3Sbluhm if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR) 2791bd8f1dc3Sbluhm fail("Parsing empty string faulted"); 2792bd8f1dc3Sbluhm if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) 2793bd8f1dc3Sbluhm fail("Parsing final empty string not faulted"); 2794bd8f1dc3Sbluhm if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS) 2795bd8f1dc3Sbluhm fail("Parsing final empty string faulted for wrong reason"); 2796bd8f1dc3Sbluhm 2797bd8f1dc3Sbluhm /* Now try with valid text before the empty end */ 2798bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 2799bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) 2800bd8f1dc3Sbluhm == XML_STATUS_ERROR) 2801bd8f1dc3Sbluhm xml_failure(g_parser); 2802bd8f1dc3Sbluhm if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR) 2803bd8f1dc3Sbluhm fail("Parsing final empty string faulted"); 2804bd8f1dc3Sbluhm 2805bd8f1dc3Sbluhm /* Now try with invalid text before the empty end */ 2806bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 2807bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial), 2808bd8f1dc3Sbluhm XML_FALSE) 2809bd8f1dc3Sbluhm == XML_STATUS_ERROR) 2810bd8f1dc3Sbluhm xml_failure(g_parser); 2811bd8f1dc3Sbluhm if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) 2812bd8f1dc3Sbluhm fail("Parsing final incomplete empty string not faulted"); 2813bd8f1dc3Sbluhm } 2814bd8f1dc3Sbluhm END_TEST 2815bd8f1dc3Sbluhm 281661ad8a07Sbluhm /* Test XML_Parse for len < 0 */ 281761ad8a07Sbluhm START_TEST(test_negative_len_parse) { 281861ad8a07Sbluhm const char *const doc = "<root/>"; 281961ad8a07Sbluhm for (int isFinal = 0; isFinal < 2; isFinal++) { 282061ad8a07Sbluhm set_subtest("isFinal=%d", isFinal); 282161ad8a07Sbluhm 282261ad8a07Sbluhm XML_Parser parser = XML_ParserCreate(NULL); 282361ad8a07Sbluhm 282461ad8a07Sbluhm if (XML_GetErrorCode(parser) != XML_ERROR_NONE) 282561ad8a07Sbluhm fail("There was not supposed to be any initial parse error."); 282661ad8a07Sbluhm 282761ad8a07Sbluhm const enum XML_Status status = XML_Parse(parser, doc, -1, isFinal); 282861ad8a07Sbluhm 282961ad8a07Sbluhm if (status != XML_STATUS_ERROR) 283061ad8a07Sbluhm fail("Negative len was expected to fail the parse but did not."); 283161ad8a07Sbluhm 283261ad8a07Sbluhm if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT) 283361ad8a07Sbluhm fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT."); 283461ad8a07Sbluhm 283561ad8a07Sbluhm XML_ParserFree(parser); 283661ad8a07Sbluhm } 283761ad8a07Sbluhm } 283861ad8a07Sbluhm END_TEST 283961ad8a07Sbluhm 284061ad8a07Sbluhm /* Test XML_ParseBuffer for len < 0 */ 284161ad8a07Sbluhm START_TEST(test_negative_len_parse_buffer) { 284261ad8a07Sbluhm const char *const doc = "<root/>"; 284361ad8a07Sbluhm for (int isFinal = 0; isFinal < 2; isFinal++) { 284461ad8a07Sbluhm set_subtest("isFinal=%d", isFinal); 284561ad8a07Sbluhm 284661ad8a07Sbluhm XML_Parser parser = XML_ParserCreate(NULL); 284761ad8a07Sbluhm 284861ad8a07Sbluhm if (XML_GetErrorCode(parser) != XML_ERROR_NONE) 284961ad8a07Sbluhm fail("There was not supposed to be any initial parse error."); 285061ad8a07Sbluhm 285161ad8a07Sbluhm void *const buffer = XML_GetBuffer(parser, (int)strlen(doc)); 285261ad8a07Sbluhm 285361ad8a07Sbluhm if (buffer == NULL) 285461ad8a07Sbluhm fail("XML_GetBuffer failed."); 285561ad8a07Sbluhm 285661ad8a07Sbluhm memcpy(buffer, doc, strlen(doc)); 285761ad8a07Sbluhm 285861ad8a07Sbluhm const enum XML_Status status = XML_ParseBuffer(parser, -1, isFinal); 285961ad8a07Sbluhm 286061ad8a07Sbluhm if (status != XML_STATUS_ERROR) 286161ad8a07Sbluhm fail("Negative len was expected to fail the parse but did not."); 286261ad8a07Sbluhm 286361ad8a07Sbluhm if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT) 286461ad8a07Sbluhm fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT."); 286561ad8a07Sbluhm 286661ad8a07Sbluhm XML_ParserFree(parser); 286761ad8a07Sbluhm } 286861ad8a07Sbluhm } 286961ad8a07Sbluhm END_TEST 287061ad8a07Sbluhm 2871bd8f1dc3Sbluhm /* Test odd corners of the XML_GetBuffer interface */ 2872bd8f1dc3Sbluhm static enum XML_Status 2873bd8f1dc3Sbluhm get_feature(enum XML_FeatureEnum feature_id, long *presult) { 2874bd8f1dc3Sbluhm const XML_Feature *feature = XML_GetFeatureList(); 2875bd8f1dc3Sbluhm 2876bd8f1dc3Sbluhm if (feature == NULL) 2877bd8f1dc3Sbluhm return XML_STATUS_ERROR; 2878bd8f1dc3Sbluhm for (; feature->feature != XML_FEATURE_END; feature++) { 2879bd8f1dc3Sbluhm if (feature->feature == feature_id) { 2880bd8f1dc3Sbluhm *presult = feature->value; 2881bd8f1dc3Sbluhm return XML_STATUS_OK; 2882bd8f1dc3Sbluhm } 2883bd8f1dc3Sbluhm } 2884bd8f1dc3Sbluhm return XML_STATUS_ERROR; 2885bd8f1dc3Sbluhm } 2886bd8f1dc3Sbluhm 2887bd8f1dc3Sbluhm /* Test odd corners of the XML_GetBuffer interface */ 2888bd8f1dc3Sbluhm START_TEST(test_get_buffer_1) { 2889bd8f1dc3Sbluhm const char *text = get_buffer_test_text; 2890bd8f1dc3Sbluhm void *buffer; 2891bd8f1dc3Sbluhm long context_bytes; 2892bd8f1dc3Sbluhm 2893bd8f1dc3Sbluhm /* Attempt to allocate a negative length buffer */ 2894bd8f1dc3Sbluhm if (XML_GetBuffer(g_parser, -12) != NULL) 2895bd8f1dc3Sbluhm fail("Negative length buffer not failed"); 2896bd8f1dc3Sbluhm 2897bd8f1dc3Sbluhm /* Now get a small buffer and extend it past valid length */ 2898bd8f1dc3Sbluhm buffer = XML_GetBuffer(g_parser, 1536); 2899bd8f1dc3Sbluhm if (buffer == NULL) 2900bd8f1dc3Sbluhm fail("1.5K buffer failed"); 2901bd8f1dc3Sbluhm assert(buffer != NULL); 2902bd8f1dc3Sbluhm memcpy(buffer, text, strlen(text)); 2903bd8f1dc3Sbluhm if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE) 2904bd8f1dc3Sbluhm == XML_STATUS_ERROR) 2905bd8f1dc3Sbluhm xml_failure(g_parser); 2906bd8f1dc3Sbluhm if (XML_GetBuffer(g_parser, INT_MAX) != NULL) 2907bd8f1dc3Sbluhm fail("INT_MAX buffer not failed"); 2908bd8f1dc3Sbluhm 2909bd8f1dc3Sbluhm /* Now try extending it a more reasonable but still too large 2910bd8f1dc3Sbluhm * amount. The allocator in XML_GetBuffer() doubles the buffer 2911bd8f1dc3Sbluhm * size until it exceeds the requested amount or INT_MAX. If it 2912bd8f1dc3Sbluhm * exceeds INT_MAX, it rejects the request, so we want a request 2913bd8f1dc3Sbluhm * between INT_MAX and INT_MAX/2. A gap of 1K seems comfortable, 2914bd8f1dc3Sbluhm * with an extra byte just to ensure that the request is off any 2915bd8f1dc3Sbluhm * boundary. The request will be inflated internally by 2916bd8f1dc3Sbluhm * XML_CONTEXT_BYTES (if >=1), so we subtract that from our 2917bd8f1dc3Sbluhm * request. 2918bd8f1dc3Sbluhm */ 2919bd8f1dc3Sbluhm if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK) 2920bd8f1dc3Sbluhm context_bytes = 0; 2921bd8f1dc3Sbluhm if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL) 2922bd8f1dc3Sbluhm fail("INT_MAX- buffer not failed"); 2923bd8f1dc3Sbluhm 2924bd8f1dc3Sbluhm /* Now try extending it a carefully crafted amount */ 2925bd8f1dc3Sbluhm if (XML_GetBuffer(g_parser, 1000) == NULL) 2926bd8f1dc3Sbluhm fail("1000 buffer failed"); 2927bd8f1dc3Sbluhm } 2928bd8f1dc3Sbluhm END_TEST 2929bd8f1dc3Sbluhm 2930bd8f1dc3Sbluhm /* Test more corners of the XML_GetBuffer interface */ 2931bd8f1dc3Sbluhm START_TEST(test_get_buffer_2) { 2932bd8f1dc3Sbluhm const char *text = get_buffer_test_text; 2933bd8f1dc3Sbluhm void *buffer; 2934bd8f1dc3Sbluhm 2935bd8f1dc3Sbluhm /* Now get a decent buffer */ 2936bd8f1dc3Sbluhm buffer = XML_GetBuffer(g_parser, 1536); 2937bd8f1dc3Sbluhm if (buffer == NULL) 2938bd8f1dc3Sbluhm fail("1.5K buffer failed"); 2939bd8f1dc3Sbluhm assert(buffer != NULL); 2940bd8f1dc3Sbluhm memcpy(buffer, text, strlen(text)); 2941bd8f1dc3Sbluhm if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE) 2942bd8f1dc3Sbluhm == XML_STATUS_ERROR) 2943bd8f1dc3Sbluhm xml_failure(g_parser); 2944bd8f1dc3Sbluhm 2945bd8f1dc3Sbluhm /* Extend it, to catch a different code path */ 2946bd8f1dc3Sbluhm if (XML_GetBuffer(g_parser, 1024) == NULL) 2947bd8f1dc3Sbluhm fail("1024 buffer failed"); 2948bd8f1dc3Sbluhm } 2949bd8f1dc3Sbluhm END_TEST 2950bd8f1dc3Sbluhm 2951bd8f1dc3Sbluhm /* Test for signed integer overflow CVE-2022-23852 */ 2952bd8f1dc3Sbluhm #if XML_CONTEXT_BYTES > 0 2953bd8f1dc3Sbluhm START_TEST(test_get_buffer_3_overflow) { 2954bd8f1dc3Sbluhm XML_Parser parser = XML_ParserCreate(NULL); 2955bd8f1dc3Sbluhm assert(parser != NULL); 2956bd8f1dc3Sbluhm 2957bd8f1dc3Sbluhm const char *const text = "\n"; 2958bd8f1dc3Sbluhm const int expectedKeepValue = (int)strlen(text); 2959bd8f1dc3Sbluhm 2960bd8f1dc3Sbluhm // After this call, variable "keep" in XML_GetBuffer will 2961bd8f1dc3Sbluhm // have value expectedKeepValue 2962bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), 2963bd8f1dc3Sbluhm XML_FALSE /* isFinal */) 2964bd8f1dc3Sbluhm == XML_STATUS_ERROR) 2965bd8f1dc3Sbluhm xml_failure(parser); 2966bd8f1dc3Sbluhm 2967bd8f1dc3Sbluhm assert(expectedKeepValue > 0); 2968bd8f1dc3Sbluhm if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL) 2969bd8f1dc3Sbluhm fail("enlarging buffer not failed"); 2970bd8f1dc3Sbluhm 2971bd8f1dc3Sbluhm XML_ParserFree(parser); 2972bd8f1dc3Sbluhm } 2973bd8f1dc3Sbluhm END_TEST 2974bd8f1dc3Sbluhm #endif // XML_CONTEXT_BYTES > 0 2975bd8f1dc3Sbluhm 2976bd8f1dc3Sbluhm START_TEST(test_buffer_can_grow_to_max) { 2977bd8f1dc3Sbluhm const char *const prefixes[] = { 2978bd8f1dc3Sbluhm "", 2979bd8f1dc3Sbluhm "<", 2980bd8f1dc3Sbluhm "<x a='", 2981bd8f1dc3Sbluhm "<doc><x a='", 2982bd8f1dc3Sbluhm "<document><x a='", 2983bd8f1dc3Sbluhm "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand" 2984bd8f1dc3Sbluhm "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif" 2985bd8f1dc3Sbluhm "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin" 2986bd8f1dc3Sbluhm "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping" 2987bd8f1dc3Sbluhm "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"}; 2988bd8f1dc3Sbluhm const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]); 2989bd8f1dc3Sbluhm int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow 2990bd8f1dc3Sbluhm #if defined(__MINGW32__) && ! defined(__MINGW64__) 2991bd8f1dc3Sbluhm // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB 2992bd8f1dc3Sbluhm // Can we make a big allocation? 2993bd8f1dc3Sbluhm void *big = malloc(maxbuf); 2994bd8f1dc3Sbluhm if (! big) { 2995bd8f1dc3Sbluhm // The big allocation failed. Let's be a little lenient. 2996bd8f1dc3Sbluhm maxbuf = maxbuf / 2; 2997bd8f1dc3Sbluhm } 2998bd8f1dc3Sbluhm free(big); 2999bd8f1dc3Sbluhm #endif 3000bd8f1dc3Sbluhm 3001bd8f1dc3Sbluhm for (int i = 0; i < num_prefixes; ++i) { 3002bd8f1dc3Sbluhm set_subtest("\"%s\"", prefixes[i]); 3003bd8f1dc3Sbluhm XML_Parser parser = XML_ParserCreate(NULL); 3004bd8f1dc3Sbluhm const int prefix_len = (int)strlen(prefixes[i]); 3005bd8f1dc3Sbluhm const enum XML_Status s 3006bd8f1dc3Sbluhm = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE); 3007bd8f1dc3Sbluhm if (s != XML_STATUS_OK) 3008bd8f1dc3Sbluhm xml_failure(parser); 3009bd8f1dc3Sbluhm 3010bd8f1dc3Sbluhm // XML_CONTEXT_BYTES of the prefix may remain in the buffer; 3011bd8f1dc3Sbluhm // subtracting the whole prefix is easiest, and close enough. 3012bd8f1dc3Sbluhm assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL); 3013bd8f1dc3Sbluhm // The limit should be consistent; no prefix should allow us to 3014bd8f1dc3Sbluhm // reach above the max buffer size. 3015bd8f1dc3Sbluhm assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL); 3016bd8f1dc3Sbluhm XML_ParserFree(parser); 3017bd8f1dc3Sbluhm } 3018bd8f1dc3Sbluhm } 3019bd8f1dc3Sbluhm END_TEST 3020bd8f1dc3Sbluhm 3021bd8f1dc3Sbluhm START_TEST(test_getbuffer_allocates_on_zero_len) { 3022bd8f1dc3Sbluhm for (int first_len = 1; first_len >= 0; first_len--) { 3023bd8f1dc3Sbluhm set_subtest("with len=%d first", first_len); 3024bd8f1dc3Sbluhm XML_Parser parser = XML_ParserCreate(NULL); 3025bd8f1dc3Sbluhm assert_true(parser != NULL); 3026bd8f1dc3Sbluhm assert_true(XML_GetBuffer(parser, first_len) != NULL); 3027bd8f1dc3Sbluhm assert_true(XML_GetBuffer(parser, 0) != NULL); 3028bd8f1dc3Sbluhm if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK) 3029bd8f1dc3Sbluhm xml_failure(parser); 3030bd8f1dc3Sbluhm XML_ParserFree(parser); 3031bd8f1dc3Sbluhm } 3032bd8f1dc3Sbluhm } 3033bd8f1dc3Sbluhm END_TEST 3034bd8f1dc3Sbluhm 3035bd8f1dc3Sbluhm /* Test position information macros */ 3036bd8f1dc3Sbluhm START_TEST(test_byte_info_at_end) { 3037bd8f1dc3Sbluhm const char *text = "<doc></doc>"; 3038bd8f1dc3Sbluhm 3039bd8f1dc3Sbluhm if (XML_GetCurrentByteIndex(g_parser) != -1 3040bd8f1dc3Sbluhm || XML_GetCurrentByteCount(g_parser) != 0) 3041bd8f1dc3Sbluhm fail("Byte index/count incorrect at start of parse"); 3042bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3043bd8f1dc3Sbluhm == XML_STATUS_ERROR) 3044bd8f1dc3Sbluhm xml_failure(g_parser); 3045bd8f1dc3Sbluhm /* At end, the count will be zero and the index the end of string */ 3046bd8f1dc3Sbluhm if (XML_GetCurrentByteCount(g_parser) != 0) 3047bd8f1dc3Sbluhm fail("Terminal byte count incorrect"); 3048bd8f1dc3Sbluhm if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text)) 3049bd8f1dc3Sbluhm fail("Terminal byte index incorrect"); 3050bd8f1dc3Sbluhm } 3051bd8f1dc3Sbluhm END_TEST 3052bd8f1dc3Sbluhm 3053bd8f1dc3Sbluhm /* Test position information from errors */ 3054bd8f1dc3Sbluhm #define PRE_ERROR_STR "<doc></" 3055bd8f1dc3Sbluhm #define POST_ERROR_STR "wombat></doc>" 3056bd8f1dc3Sbluhm START_TEST(test_byte_info_at_error) { 3057bd8f1dc3Sbluhm const char *text = PRE_ERROR_STR POST_ERROR_STR; 3058bd8f1dc3Sbluhm 3059bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3060bd8f1dc3Sbluhm == XML_STATUS_OK) 3061bd8f1dc3Sbluhm fail("Syntax error not faulted"); 3062bd8f1dc3Sbluhm if (XML_GetCurrentByteCount(g_parser) != 0) 3063bd8f1dc3Sbluhm fail("Error byte count incorrect"); 3064bd8f1dc3Sbluhm if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR)) 3065bd8f1dc3Sbluhm fail("Error byte index incorrect"); 3066bd8f1dc3Sbluhm } 3067bd8f1dc3Sbluhm END_TEST 3068bd8f1dc3Sbluhm #undef PRE_ERROR_STR 3069bd8f1dc3Sbluhm #undef POST_ERROR_STR 3070bd8f1dc3Sbluhm 3071bd8f1dc3Sbluhm /* Test position information in handler */ 3072bd8f1dc3Sbluhm #define START_ELEMENT "<e>" 3073bd8f1dc3Sbluhm #define CDATA_TEXT "Hello" 3074bd8f1dc3Sbluhm #define END_ELEMENT "</e>" 3075bd8f1dc3Sbluhm START_TEST(test_byte_info_at_cdata) { 3076bd8f1dc3Sbluhm const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT; 3077bd8f1dc3Sbluhm int offset, size; 3078bd8f1dc3Sbluhm ByteTestData data; 3079bd8f1dc3Sbluhm 3080bd8f1dc3Sbluhm /* Check initial context is empty */ 3081bd8f1dc3Sbluhm if (XML_GetInputContext(g_parser, &offset, &size) != NULL) 3082bd8f1dc3Sbluhm fail("Unexpected context at start of parse"); 3083bd8f1dc3Sbluhm 3084bd8f1dc3Sbluhm data.start_element_len = (int)strlen(START_ELEMENT); 3085bd8f1dc3Sbluhm data.cdata_len = (int)strlen(CDATA_TEXT); 3086bd8f1dc3Sbluhm data.total_string_len = (int)strlen(text); 3087bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, byte_character_handler); 3088bd8f1dc3Sbluhm XML_SetUserData(g_parser, &data); 3089bd8f1dc3Sbluhm if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK) 3090bd8f1dc3Sbluhm xml_failure(g_parser); 3091bd8f1dc3Sbluhm } 3092bd8f1dc3Sbluhm END_TEST 3093bd8f1dc3Sbluhm #undef START_ELEMENT 3094bd8f1dc3Sbluhm #undef CDATA_TEXT 3095bd8f1dc3Sbluhm #undef END_ELEMENT 3096bd8f1dc3Sbluhm 3097bd8f1dc3Sbluhm /* Test predefined entities are correctly recognised */ 3098bd8f1dc3Sbluhm START_TEST(test_predefined_entities) { 3099bd8f1dc3Sbluhm const char *text = "<doc><>&"'</doc>"; 3100bd8f1dc3Sbluhm const XML_Char *expected = XCS("<doc><>&"'</doc>"); 3101bd8f1dc3Sbluhm const XML_Char *result = XCS("<>&\"'"); 3102bd8f1dc3Sbluhm CharData storage; 3103bd8f1dc3Sbluhm 3104bd8f1dc3Sbluhm XML_SetDefaultHandler(g_parser, accumulate_characters); 3105bd8f1dc3Sbluhm /* run_character_check uses XML_SetCharacterDataHandler(), which 3106bd8f1dc3Sbluhm * unfortunately heads off a code path that we need to exercise. 3107bd8f1dc3Sbluhm */ 3108bd8f1dc3Sbluhm CharData_Init(&storage); 3109bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 3110bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3111bd8f1dc3Sbluhm == XML_STATUS_ERROR) 3112bd8f1dc3Sbluhm xml_failure(g_parser); 3113bd8f1dc3Sbluhm /* The default handler doesn't translate the entities */ 3114bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 3115bd8f1dc3Sbluhm 3116bd8f1dc3Sbluhm /* Now try again and check the translation */ 3117bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 3118bd8f1dc3Sbluhm run_character_check(text, result); 3119bd8f1dc3Sbluhm } 3120bd8f1dc3Sbluhm END_TEST 3121bd8f1dc3Sbluhm 3122bd8f1dc3Sbluhm /* Regression test that an invalid tag in an external parameter 3123bd8f1dc3Sbluhm * reference in an external DTD is correctly faulted. 3124bd8f1dc3Sbluhm * 3125bd8f1dc3Sbluhm * Only a few specific tags are legal in DTDs ignoring comments and 3126bd8f1dc3Sbluhm * processing instructions, all of which begin with an exclamation 3127bd8f1dc3Sbluhm * mark. "<el/>" is not one of them, so the parser should raise an 3128bd8f1dc3Sbluhm * error on encountering it. 3129bd8f1dc3Sbluhm */ 3130bd8f1dc3Sbluhm START_TEST(test_invalid_tag_in_dtd) { 3131bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" 3132bd8f1dc3Sbluhm "<doc></doc>\n"; 3133bd8f1dc3Sbluhm 3134bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3135bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_param); 3136bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 3137bd8f1dc3Sbluhm "Invalid tag IN DTD external param not rejected"); 3138bd8f1dc3Sbluhm } 3139bd8f1dc3Sbluhm END_TEST 3140bd8f1dc3Sbluhm 3141bd8f1dc3Sbluhm /* Test entities not quite the predefined ones are not mis-recognised */ 3142bd8f1dc3Sbluhm START_TEST(test_not_predefined_entities) { 3143bd8f1dc3Sbluhm const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>", 3144bd8f1dc3Sbluhm "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL}; 3145bd8f1dc3Sbluhm int i = 0; 3146bd8f1dc3Sbluhm 3147bd8f1dc3Sbluhm while (text[i] != NULL) { 3148bd8f1dc3Sbluhm expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY, 3149bd8f1dc3Sbluhm "Undefined entity not rejected"); 3150bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 3151bd8f1dc3Sbluhm i++; 3152bd8f1dc3Sbluhm } 3153bd8f1dc3Sbluhm } 3154bd8f1dc3Sbluhm END_TEST 3155bd8f1dc3Sbluhm 3156bd8f1dc3Sbluhm /* Test conditional inclusion (IGNORE) */ 3157bd8f1dc3Sbluhm START_TEST(test_ignore_section) { 3158bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 3159bd8f1dc3Sbluhm "<doc><e>&entity;</e></doc>"; 3160bd8f1dc3Sbluhm const XML_Char *expected 3161bd8f1dc3Sbluhm = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;"); 3162bd8f1dc3Sbluhm CharData storage; 3163bd8f1dc3Sbluhm 3164bd8f1dc3Sbluhm CharData_Init(&storage); 3165bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3166bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 3167bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore); 3168bd8f1dc3Sbluhm XML_SetDefaultHandler(g_parser, accumulate_characters); 3169bd8f1dc3Sbluhm XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); 3170bd8f1dc3Sbluhm XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); 3171bd8f1dc3Sbluhm XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3172bd8f1dc3Sbluhm XML_SetStartElementHandler(g_parser, dummy_start_element); 3173bd8f1dc3Sbluhm XML_SetEndElementHandler(g_parser, dummy_end_element); 3174bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3175bd8f1dc3Sbluhm == XML_STATUS_ERROR) 3176bd8f1dc3Sbluhm xml_failure(g_parser); 3177bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 3178bd8f1dc3Sbluhm } 3179bd8f1dc3Sbluhm END_TEST 3180bd8f1dc3Sbluhm 3181bd8f1dc3Sbluhm START_TEST(test_ignore_section_utf16) { 3182bd8f1dc3Sbluhm const char text[] = 3183bd8f1dc3Sbluhm /* <!DOCTYPE d SYSTEM 's'> */ 3184bd8f1dc3Sbluhm "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 " 3185bd8f1dc3Sbluhm "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0" 3186bd8f1dc3Sbluhm /* <d><e>&en;</e></d> */ 3187bd8f1dc3Sbluhm "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0"; 3188bd8f1dc3Sbluhm const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;"); 3189bd8f1dc3Sbluhm CharData storage; 3190bd8f1dc3Sbluhm 3191bd8f1dc3Sbluhm CharData_Init(&storage); 3192bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3193bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 3194bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16); 3195bd8f1dc3Sbluhm XML_SetDefaultHandler(g_parser, accumulate_characters); 3196bd8f1dc3Sbluhm XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); 3197bd8f1dc3Sbluhm XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); 3198bd8f1dc3Sbluhm XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3199bd8f1dc3Sbluhm XML_SetStartElementHandler(g_parser, dummy_start_element); 3200bd8f1dc3Sbluhm XML_SetEndElementHandler(g_parser, dummy_end_element); 3201bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 3202bd8f1dc3Sbluhm == XML_STATUS_ERROR) 3203bd8f1dc3Sbluhm xml_failure(g_parser); 3204bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 3205bd8f1dc3Sbluhm } 3206bd8f1dc3Sbluhm END_TEST 3207bd8f1dc3Sbluhm 3208bd8f1dc3Sbluhm START_TEST(test_ignore_section_utf16_be) { 3209bd8f1dc3Sbluhm const char text[] = 3210bd8f1dc3Sbluhm /* <!DOCTYPE d SYSTEM 's'> */ 3211bd8f1dc3Sbluhm "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 " 3212bd8f1dc3Sbluhm "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n" 3213bd8f1dc3Sbluhm /* <d><e>&en;</e></d> */ 3214bd8f1dc3Sbluhm "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>"; 3215bd8f1dc3Sbluhm const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;"); 3216bd8f1dc3Sbluhm CharData storage; 3217bd8f1dc3Sbluhm 3218bd8f1dc3Sbluhm CharData_Init(&storage); 3219bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3220bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 3221bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, 3222bd8f1dc3Sbluhm external_entity_load_ignore_utf16_be); 3223bd8f1dc3Sbluhm XML_SetDefaultHandler(g_parser, accumulate_characters); 3224bd8f1dc3Sbluhm XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); 3225bd8f1dc3Sbluhm XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); 3226bd8f1dc3Sbluhm XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3227bd8f1dc3Sbluhm XML_SetStartElementHandler(g_parser, dummy_start_element); 3228bd8f1dc3Sbluhm XML_SetEndElementHandler(g_parser, dummy_end_element); 3229bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 3230bd8f1dc3Sbluhm == XML_STATUS_ERROR) 3231bd8f1dc3Sbluhm xml_failure(g_parser); 3232bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 3233bd8f1dc3Sbluhm } 3234bd8f1dc3Sbluhm END_TEST 3235bd8f1dc3Sbluhm 3236bd8f1dc3Sbluhm /* Test mis-formatted conditional exclusion */ 3237bd8f1dc3Sbluhm START_TEST(test_bad_ignore_section) { 3238bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 3239bd8f1dc3Sbluhm "<doc><e>&entity;</e></doc>"; 3240bd8f1dc3Sbluhm ExtFaults faults[] 3241bd8f1dc3Sbluhm = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL, 3242bd8f1dc3Sbluhm XML_ERROR_SYNTAX}, 3243bd8f1dc3Sbluhm {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL, 3244bd8f1dc3Sbluhm XML_ERROR_INVALID_TOKEN}, 3245bd8f1dc3Sbluhm {/* FIrst two bytes of a three-byte char */ 3246bd8f1dc3Sbluhm "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL, 3247bd8f1dc3Sbluhm XML_ERROR_PARTIAL_CHAR}, 3248bd8f1dc3Sbluhm {NULL, NULL, NULL, XML_ERROR_NONE}}; 3249bd8f1dc3Sbluhm ExtFaults *fault; 3250bd8f1dc3Sbluhm 3251bd8f1dc3Sbluhm for (fault = &faults[0]; fault->parse_text != NULL; fault++) { 3252bd8f1dc3Sbluhm set_subtest("%s", fault->parse_text); 3253bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3254bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 3255bd8f1dc3Sbluhm XML_SetUserData(g_parser, fault); 3256bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 3257bd8f1dc3Sbluhm "Incomplete IGNORE section not failed"); 3258bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 3259bd8f1dc3Sbluhm } 3260bd8f1dc3Sbluhm } 3261bd8f1dc3Sbluhm END_TEST 3262bd8f1dc3Sbluhm 3263bd8f1dc3Sbluhm struct bom_testdata { 3264bd8f1dc3Sbluhm const char *external; 3265bd8f1dc3Sbluhm int split; 3266bd8f1dc3Sbluhm XML_Bool nested_callback_happened; 3267bd8f1dc3Sbluhm }; 3268bd8f1dc3Sbluhm 3269bd8f1dc3Sbluhm static int XMLCALL 3270bd8f1dc3Sbluhm external_bom_checker(XML_Parser parser, const XML_Char *context, 3271bd8f1dc3Sbluhm const XML_Char *base, const XML_Char *systemId, 3272bd8f1dc3Sbluhm const XML_Char *publicId) { 3273bd8f1dc3Sbluhm const char *text; 3274bd8f1dc3Sbluhm UNUSED_P(base); 3275bd8f1dc3Sbluhm UNUSED_P(systemId); 3276bd8f1dc3Sbluhm UNUSED_P(publicId); 3277bd8f1dc3Sbluhm 3278bd8f1dc3Sbluhm XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); 3279bd8f1dc3Sbluhm if (ext_parser == NULL) 3280bd8f1dc3Sbluhm fail("Could not create external entity parser"); 3281bd8f1dc3Sbluhm 3282bd8f1dc3Sbluhm if (! xcstrcmp(systemId, XCS("004-2.ent"))) { 3283bd8f1dc3Sbluhm struct bom_testdata *const testdata 3284bd8f1dc3Sbluhm = (struct bom_testdata *)XML_GetUserData(parser); 3285bd8f1dc3Sbluhm const char *const external = testdata->external; 3286bd8f1dc3Sbluhm const int split = testdata->split; 3287bd8f1dc3Sbluhm testdata->nested_callback_happened = XML_TRUE; 3288bd8f1dc3Sbluhm 3289bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE) 3290bd8f1dc3Sbluhm != XML_STATUS_OK) { 3291bd8f1dc3Sbluhm xml_failure(ext_parser); 3292bd8f1dc3Sbluhm } 3293bd8f1dc3Sbluhm text = external + split; // the parse below will continue where we left off. 3294bd8f1dc3Sbluhm } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) { 3295bd8f1dc3Sbluhm text = "<!ELEMENT doc EMPTY>\n" 3296bd8f1dc3Sbluhm "<!ENTITY % e1 SYSTEM '004-2.ent'>\n" 3297bd8f1dc3Sbluhm "<!ENTITY % e2 '%e1;'>\n"; 3298bd8f1dc3Sbluhm } else { 3299bd8f1dc3Sbluhm fail("unknown systemId"); 3300bd8f1dc3Sbluhm } 3301bd8f1dc3Sbluhm 3302bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE) 3303bd8f1dc3Sbluhm != XML_STATUS_OK) 3304bd8f1dc3Sbluhm xml_failure(ext_parser); 3305bd8f1dc3Sbluhm 3306bd8f1dc3Sbluhm XML_ParserFree(ext_parser); 3307bd8f1dc3Sbluhm return XML_STATUS_OK; 3308bd8f1dc3Sbluhm } 3309bd8f1dc3Sbluhm 3310bd8f1dc3Sbluhm /* regression test: BOM should be consumed when followed by a partial token. */ 3311bd8f1dc3Sbluhm START_TEST(test_external_bom_consumed) { 3312bd8f1dc3Sbluhm const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" 3313bd8f1dc3Sbluhm "<doc></doc>\n"; 3314bd8f1dc3Sbluhm const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>"; 3315bd8f1dc3Sbluhm const int len = (int)strlen(external); 3316bd8f1dc3Sbluhm for (int split = 0; split <= len; ++split) { 3317bd8f1dc3Sbluhm set_subtest("split at byte %d", split); 3318bd8f1dc3Sbluhm 3319bd8f1dc3Sbluhm struct bom_testdata testdata; 3320bd8f1dc3Sbluhm testdata.external = external; 3321bd8f1dc3Sbluhm testdata.split = split; 3322bd8f1dc3Sbluhm testdata.nested_callback_happened = XML_FALSE; 3323bd8f1dc3Sbluhm 3324bd8f1dc3Sbluhm XML_Parser parser = XML_ParserCreate(NULL); 3325bd8f1dc3Sbluhm if (parser == NULL) { 3326bd8f1dc3Sbluhm fail("Couldn't create parser"); 3327bd8f1dc3Sbluhm } 3328bd8f1dc3Sbluhm XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3329bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(parser, external_bom_checker); 3330bd8f1dc3Sbluhm XML_SetUserData(parser, &testdata); 3331bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 3332bd8f1dc3Sbluhm == XML_STATUS_ERROR) 3333bd8f1dc3Sbluhm xml_failure(parser); 3334bd8f1dc3Sbluhm if (! testdata.nested_callback_happened) { 3335bd8f1dc3Sbluhm fail("ref handler not called"); 3336bd8f1dc3Sbluhm } 3337bd8f1dc3Sbluhm XML_ParserFree(parser); 3338bd8f1dc3Sbluhm } 3339bd8f1dc3Sbluhm } 3340bd8f1dc3Sbluhm END_TEST 3341bd8f1dc3Sbluhm 3342bd8f1dc3Sbluhm /* Test recursive parsing */ 3343bd8f1dc3Sbluhm START_TEST(test_external_entity_values) { 3344bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" 3345bd8f1dc3Sbluhm "<doc></doc>\n"; 3346bd8f1dc3Sbluhm ExtFaults data_004_2[] = { 3347bd8f1dc3Sbluhm {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE}, 3348bd8f1dc3Sbluhm {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL, 3349bd8f1dc3Sbluhm XML_ERROR_INVALID_TOKEN}, 3350bd8f1dc3Sbluhm {"'wombat", "Unterminated string not faulted", NULL, 3351bd8f1dc3Sbluhm XML_ERROR_UNCLOSED_TOKEN}, 3352bd8f1dc3Sbluhm {"\xe2\x82", "Partial UTF-8 character not faulted", NULL, 3353bd8f1dc3Sbluhm XML_ERROR_PARTIAL_CHAR}, 3354bd8f1dc3Sbluhm {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE}, 3355bd8f1dc3Sbluhm {"<?xml?>", "Malformed XML declaration not faulted", NULL, 3356bd8f1dc3Sbluhm XML_ERROR_XML_DECL}, 3357bd8f1dc3Sbluhm {/* UTF-8 BOM */ 3358bd8f1dc3Sbluhm "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, 3359bd8f1dc3Sbluhm XML_ERROR_NONE}, 3360bd8f1dc3Sbluhm {"<?xml version='1.0' encoding='utf-8'?>\n$", 3361bd8f1dc3Sbluhm "Invalid token after text declaration not faulted", NULL, 3362bd8f1dc3Sbluhm XML_ERROR_INVALID_TOKEN}, 3363bd8f1dc3Sbluhm {"<?xml version='1.0' encoding='utf-8'?>\n'wombat", 3364bd8f1dc3Sbluhm "Unterminated string after text decl not faulted", NULL, 3365bd8f1dc3Sbluhm XML_ERROR_UNCLOSED_TOKEN}, 3366bd8f1dc3Sbluhm {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82", 3367bd8f1dc3Sbluhm "Partial UTF-8 character after text decl not faulted", NULL, 3368bd8f1dc3Sbluhm XML_ERROR_PARTIAL_CHAR}, 3369bd8f1dc3Sbluhm {"%e1;", "Recursive parameter entity not faulted", NULL, 3370bd8f1dc3Sbluhm XML_ERROR_RECURSIVE_ENTITY_REF}, 3371bd8f1dc3Sbluhm {NULL, NULL, NULL, XML_ERROR_NONE}}; 3372bd8f1dc3Sbluhm int i; 3373bd8f1dc3Sbluhm 3374bd8f1dc3Sbluhm for (i = 0; data_004_2[i].parse_text != NULL; i++) { 3375bd8f1dc3Sbluhm set_subtest("%s", data_004_2[i].parse_text); 3376bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3377bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer); 3378bd8f1dc3Sbluhm XML_SetUserData(g_parser, &data_004_2[i]); 3379bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3380bd8f1dc3Sbluhm == XML_STATUS_ERROR) 3381bd8f1dc3Sbluhm xml_failure(g_parser); 3382bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 3383bd8f1dc3Sbluhm } 3384bd8f1dc3Sbluhm } 3385bd8f1dc3Sbluhm END_TEST 3386bd8f1dc3Sbluhm 3387bd8f1dc3Sbluhm /* Test the recursive parse interacts with a not standalone handler */ 3388bd8f1dc3Sbluhm START_TEST(test_ext_entity_not_standalone) { 3389bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 3390bd8f1dc3Sbluhm "<doc></doc>"; 3391bd8f1dc3Sbluhm 3392bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3393bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone); 3394bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 3395bd8f1dc3Sbluhm "Standalone rejection not caught"); 3396bd8f1dc3Sbluhm } 3397bd8f1dc3Sbluhm END_TEST 3398bd8f1dc3Sbluhm 3399bd8f1dc3Sbluhm START_TEST(test_ext_entity_value_abort) { 3400bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" 3401bd8f1dc3Sbluhm "<doc></doc>\n"; 3402bd8f1dc3Sbluhm 3403bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3404bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter); 3405bd8f1dc3Sbluhm g_resumable = XML_FALSE; 3406bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3407bd8f1dc3Sbluhm == XML_STATUS_ERROR) 3408bd8f1dc3Sbluhm xml_failure(g_parser); 3409bd8f1dc3Sbluhm } 3410bd8f1dc3Sbluhm END_TEST 3411bd8f1dc3Sbluhm 3412bd8f1dc3Sbluhm START_TEST(test_bad_public_doctype) { 3413bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='utf-8'?>\n" 3414bd8f1dc3Sbluhm "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n" 3415bd8f1dc3Sbluhm "<doc></doc>"; 3416bd8f1dc3Sbluhm 3417bd8f1dc3Sbluhm /* Setting a handler provokes a particular code path */ 3418bd8f1dc3Sbluhm XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler, 3419bd8f1dc3Sbluhm dummy_end_doctype_handler); 3420bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed"); 3421bd8f1dc3Sbluhm } 3422bd8f1dc3Sbluhm END_TEST 3423bd8f1dc3Sbluhm 3424bd8f1dc3Sbluhm /* Test based on ibm/valid/P32/ibm32v04.xml */ 3425bd8f1dc3Sbluhm START_TEST(test_attribute_enum_value) { 3426bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' standalone='no'?>\n" 3427bd8f1dc3Sbluhm "<!DOCTYPE animal SYSTEM 'test.dtd'>\n" 3428bd8f1dc3Sbluhm "<animal>This is a \n <a/> \n\nyellow tiger</animal>"; 3429bd8f1dc3Sbluhm ExtTest dtd_data 3430bd8f1dc3Sbluhm = {"<!ELEMENT animal (#PCDATA|a)*>\n" 3431bd8f1dc3Sbluhm "<!ELEMENT a EMPTY>\n" 3432bd8f1dc3Sbluhm "<!ATTLIST animal xml:space (default|preserve) 'preserve'>", 3433bd8f1dc3Sbluhm NULL, NULL}; 3434bd8f1dc3Sbluhm const XML_Char *expected = XCS("This is a \n \n\nyellow tiger"); 3435bd8f1dc3Sbluhm 3436bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 3437bd8f1dc3Sbluhm XML_SetUserData(g_parser, &dtd_data); 3438bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3439bd8f1dc3Sbluhm /* An attribute list handler provokes a different code path */ 3440bd8f1dc3Sbluhm XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler); 3441bd8f1dc3Sbluhm run_ext_character_check(text, &dtd_data, expected); 3442bd8f1dc3Sbluhm } 3443bd8f1dc3Sbluhm END_TEST 3444bd8f1dc3Sbluhm 3445bd8f1dc3Sbluhm /* Slightly bizarrely, the library seems to silently ignore entity 3446bd8f1dc3Sbluhm * definitions for predefined entities, even when they are wrong. The 3447bd8f1dc3Sbluhm * language of the XML 1.0 spec is somewhat unhelpful as to what ought 3448bd8f1dc3Sbluhm * to happen, so this is currently treated as acceptable. 3449bd8f1dc3Sbluhm */ 3450bd8f1dc3Sbluhm START_TEST(test_predefined_entity_redefinition) { 3451bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 3452bd8f1dc3Sbluhm "<!ENTITY apos 'foo'>\n" 3453bd8f1dc3Sbluhm "]>\n" 3454bd8f1dc3Sbluhm "<doc>'</doc>"; 3455bd8f1dc3Sbluhm run_character_check(text, XCS("'")); 3456bd8f1dc3Sbluhm } 3457bd8f1dc3Sbluhm END_TEST 3458bd8f1dc3Sbluhm 3459bd8f1dc3Sbluhm /* Test that the parser stops processing the DTD after an unresolved 3460bd8f1dc3Sbluhm * parameter entity is encountered. 3461bd8f1dc3Sbluhm */ 3462bd8f1dc3Sbluhm START_TEST(test_dtd_stop_processing) { 3463bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 3464bd8f1dc3Sbluhm "%foo;\n" 3465bd8f1dc3Sbluhm "<!ENTITY bar 'bas'>\n" 3466bd8f1dc3Sbluhm "]><doc/>"; 3467bd8f1dc3Sbluhm 3468bd8f1dc3Sbluhm XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler); 3469bd8f1dc3Sbluhm init_dummy_handlers(); 3470bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3471bd8f1dc3Sbluhm == XML_STATUS_ERROR) 3472bd8f1dc3Sbluhm xml_failure(g_parser); 3473bd8f1dc3Sbluhm if (get_dummy_handler_flags() != 0) 3474bd8f1dc3Sbluhm fail("DTD processing still going after undefined PE"); 3475bd8f1dc3Sbluhm } 3476bd8f1dc3Sbluhm END_TEST 3477bd8f1dc3Sbluhm 3478bd8f1dc3Sbluhm /* Test public notations with no system ID */ 3479bd8f1dc3Sbluhm START_TEST(test_public_notation_no_sysid) { 3480bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 3481bd8f1dc3Sbluhm "<!NOTATION note PUBLIC 'foo'>\n" 3482bd8f1dc3Sbluhm "<!ELEMENT doc EMPTY>\n" 3483bd8f1dc3Sbluhm "]>\n<doc/>"; 3484bd8f1dc3Sbluhm 3485bd8f1dc3Sbluhm init_dummy_handlers(); 3486bd8f1dc3Sbluhm XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler); 3487bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3488bd8f1dc3Sbluhm == XML_STATUS_ERROR) 3489bd8f1dc3Sbluhm xml_failure(g_parser); 3490bd8f1dc3Sbluhm if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG) 3491bd8f1dc3Sbluhm fail("Notation declaration handler not called"); 3492bd8f1dc3Sbluhm } 3493bd8f1dc3Sbluhm END_TEST 3494bd8f1dc3Sbluhm 3495bd8f1dc3Sbluhm START_TEST(test_nested_groups) { 3496bd8f1dc3Sbluhm const char *text 3497bd8f1dc3Sbluhm = "<!DOCTYPE doc [\n" 3498bd8f1dc3Sbluhm "<!ELEMENT doc " 3499bd8f1dc3Sbluhm /* Sixteen elements per line */ 3500bd8f1dc3Sbluhm "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?," 3501bd8f1dc3Sbluhm "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?" 3502bd8f1dc3Sbluhm "))))))))))))))))))))))))))))))))>\n" 3503bd8f1dc3Sbluhm "<!ELEMENT e EMPTY>" 3504bd8f1dc3Sbluhm "]>\n" 3505bd8f1dc3Sbluhm "<doc><e/></doc>"; 3506bd8f1dc3Sbluhm CharData storage; 3507bd8f1dc3Sbluhm 3508bd8f1dc3Sbluhm CharData_Init(&storage); 3509bd8f1dc3Sbluhm XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3510bd8f1dc3Sbluhm XML_SetStartElementHandler(g_parser, record_element_start_handler); 3511bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 3512bd8f1dc3Sbluhm init_dummy_handlers(); 3513bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3514bd8f1dc3Sbluhm == XML_STATUS_ERROR) 3515bd8f1dc3Sbluhm xml_failure(g_parser); 3516bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, XCS("doce")); 3517bd8f1dc3Sbluhm if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG) 3518bd8f1dc3Sbluhm fail("Element handler not fired"); 3519bd8f1dc3Sbluhm } 3520bd8f1dc3Sbluhm END_TEST 3521bd8f1dc3Sbluhm 3522bd8f1dc3Sbluhm START_TEST(test_group_choice) { 3523bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 3524bd8f1dc3Sbluhm "<!ELEMENT doc (a|b|c)+>\n" 3525bd8f1dc3Sbluhm "<!ELEMENT a EMPTY>\n" 3526bd8f1dc3Sbluhm "<!ELEMENT b (#PCDATA)>\n" 3527bd8f1dc3Sbluhm "<!ELEMENT c ANY>\n" 3528bd8f1dc3Sbluhm "]>\n" 3529bd8f1dc3Sbluhm "<doc>\n" 3530bd8f1dc3Sbluhm "<a/>\n" 3531bd8f1dc3Sbluhm "<b attr='foo'>This is a foo</b>\n" 3532bd8f1dc3Sbluhm "<c></c>\n" 3533bd8f1dc3Sbluhm "</doc>\n"; 3534bd8f1dc3Sbluhm 3535bd8f1dc3Sbluhm XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3536bd8f1dc3Sbluhm init_dummy_handlers(); 3537bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3538bd8f1dc3Sbluhm == XML_STATUS_ERROR) 3539bd8f1dc3Sbluhm xml_failure(g_parser); 3540bd8f1dc3Sbluhm if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG) 3541bd8f1dc3Sbluhm fail("Element handler flag not raised"); 3542bd8f1dc3Sbluhm } 3543bd8f1dc3Sbluhm END_TEST 3544bd8f1dc3Sbluhm 3545bd8f1dc3Sbluhm START_TEST(test_standalone_parameter_entity) { 3546bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' standalone='yes'?>\n" 3547bd8f1dc3Sbluhm "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n" 3548bd8f1dc3Sbluhm "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n" 3549bd8f1dc3Sbluhm "%entity;\n" 3550bd8f1dc3Sbluhm "]>\n" 3551bd8f1dc3Sbluhm "<doc></doc>"; 3552bd8f1dc3Sbluhm char dtd_data[] = "<!ENTITY % e1 'foo'>\n"; 3553bd8f1dc3Sbluhm 3554bd8f1dc3Sbluhm XML_SetUserData(g_parser, dtd_data); 3555bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3556bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_public); 3557bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3558bd8f1dc3Sbluhm == XML_STATUS_ERROR) 3559bd8f1dc3Sbluhm xml_failure(g_parser); 3560bd8f1dc3Sbluhm } 3561bd8f1dc3Sbluhm END_TEST 3562bd8f1dc3Sbluhm 3563bd8f1dc3Sbluhm /* Test skipping of parameter entity in an external DTD */ 3564bd8f1dc3Sbluhm /* Derived from ibm/invalid/P69/ibm69i01.xml */ 3565bd8f1dc3Sbluhm START_TEST(test_skipped_parameter_entity) { 3566bd8f1dc3Sbluhm const char *text = "<?xml version='1.0'?>\n" 3567bd8f1dc3Sbluhm "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n" 3568bd8f1dc3Sbluhm "<!ELEMENT root (#PCDATA|a)* >\n" 3569bd8f1dc3Sbluhm "]>\n" 3570bd8f1dc3Sbluhm "<root></root>"; 3571bd8f1dc3Sbluhm ExtTest dtd_data = {"%pe2;", NULL, NULL}; 3572bd8f1dc3Sbluhm 3573bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 3574bd8f1dc3Sbluhm XML_SetUserData(g_parser, &dtd_data); 3575bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3576bd8f1dc3Sbluhm XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler); 3577bd8f1dc3Sbluhm init_dummy_handlers(); 3578bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3579bd8f1dc3Sbluhm == XML_STATUS_ERROR) 3580bd8f1dc3Sbluhm xml_failure(g_parser); 3581bd8f1dc3Sbluhm if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG) 3582bd8f1dc3Sbluhm fail("Skip handler not executed"); 3583bd8f1dc3Sbluhm } 3584bd8f1dc3Sbluhm END_TEST 3585bd8f1dc3Sbluhm 3586bd8f1dc3Sbluhm /* Test recursive parameter entity definition rejected in external DTD */ 3587bd8f1dc3Sbluhm START_TEST(test_recursive_external_parameter_entity) { 3588bd8f1dc3Sbluhm const char *text = "<?xml version='1.0'?>\n" 3589bd8f1dc3Sbluhm "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n" 3590bd8f1dc3Sbluhm "<!ELEMENT root (#PCDATA|a)* >\n" 3591bd8f1dc3Sbluhm "]>\n" 3592bd8f1dc3Sbluhm "<root></root>"; 3593bd8f1dc3Sbluhm ExtFaults dtd_data = {"<!ENTITY % pe2 '%pe2;'>\n%pe2;", 3594bd8f1dc3Sbluhm "Recursive external parameter entity not faulted", NULL, 3595bd8f1dc3Sbluhm XML_ERROR_RECURSIVE_ENTITY_REF}; 3596bd8f1dc3Sbluhm 3597bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 3598bd8f1dc3Sbluhm XML_SetUserData(g_parser, &dtd_data); 3599bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3600bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 3601bd8f1dc3Sbluhm "Recursive external parameter not spotted"); 3602bd8f1dc3Sbluhm } 3603bd8f1dc3Sbluhm END_TEST 3604bd8f1dc3Sbluhm 3605bd8f1dc3Sbluhm /* Test undefined parameter entity in external entity handler */ 3606bd8f1dc3Sbluhm START_TEST(test_undefined_ext_entity_in_external_dtd) { 3607bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 3608bd8f1dc3Sbluhm "<doc></doc>\n"; 3609bd8f1dc3Sbluhm 3610bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3611bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer); 3612bd8f1dc3Sbluhm XML_SetUserData(g_parser, NULL); 3613bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3614bd8f1dc3Sbluhm == XML_STATUS_ERROR) 3615bd8f1dc3Sbluhm xml_failure(g_parser); 3616bd8f1dc3Sbluhm 3617bd8f1dc3Sbluhm /* Now repeat without the external entity ref handler invoking 3618bd8f1dc3Sbluhm * another copy of itself. 3619bd8f1dc3Sbluhm */ 3620bd8f1dc3Sbluhm XML_ParserReset(g_parser, NULL); 3621bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3622bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer); 3623bd8f1dc3Sbluhm XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */ 3624bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3625bd8f1dc3Sbluhm == XML_STATUS_ERROR) 3626bd8f1dc3Sbluhm xml_failure(g_parser); 3627bd8f1dc3Sbluhm } 3628bd8f1dc3Sbluhm END_TEST 3629bd8f1dc3Sbluhm 3630bd8f1dc3Sbluhm /* Test suspending the parse on receiving an XML declaration works */ 3631bd8f1dc3Sbluhm START_TEST(test_suspend_xdecl) { 3632bd8f1dc3Sbluhm const char *text = long_character_data_text; 3633bd8f1dc3Sbluhm 3634bd8f1dc3Sbluhm XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler); 3635bd8f1dc3Sbluhm XML_SetUserData(g_parser, g_parser); 3636bd8f1dc3Sbluhm g_resumable = XML_TRUE; 3637bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3638bd8f1dc3Sbluhm != XML_STATUS_SUSPENDED) 3639bd8f1dc3Sbluhm xml_failure(g_parser); 3640bd8f1dc3Sbluhm if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE) 3641bd8f1dc3Sbluhm xml_failure(g_parser); 3642bd8f1dc3Sbluhm /* Attempt to start a new parse while suspended */ 3643bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3644bd8f1dc3Sbluhm != XML_STATUS_ERROR) 3645bd8f1dc3Sbluhm fail("Attempt to parse while suspended not faulted"); 3646bd8f1dc3Sbluhm if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED) 3647bd8f1dc3Sbluhm fail("Suspended parse not faulted with correct error"); 3648bd8f1dc3Sbluhm } 3649bd8f1dc3Sbluhm END_TEST 3650bd8f1dc3Sbluhm 3651bd8f1dc3Sbluhm /* Test aborting the parse in an epilog works */ 3652bd8f1dc3Sbluhm START_TEST(test_abort_epilog) { 3653bd8f1dc3Sbluhm const char *text = "<doc></doc>\n\r\n"; 3654bd8f1dc3Sbluhm XML_Char trigger_char = XCS('\r'); 3655bd8f1dc3Sbluhm 3656bd8f1dc3Sbluhm XML_SetDefaultHandler(g_parser, selective_aborting_default_handler); 3657bd8f1dc3Sbluhm XML_SetUserData(g_parser, &trigger_char); 3658bd8f1dc3Sbluhm g_resumable = XML_FALSE; 3659bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3660bd8f1dc3Sbluhm != XML_STATUS_ERROR) 3661bd8f1dc3Sbluhm fail("Abort not triggered"); 3662bd8f1dc3Sbluhm if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED) 3663bd8f1dc3Sbluhm xml_failure(g_parser); 3664bd8f1dc3Sbluhm } 3665bd8f1dc3Sbluhm END_TEST 3666bd8f1dc3Sbluhm 3667bd8f1dc3Sbluhm /* Test a different code path for abort in the epilog */ 3668bd8f1dc3Sbluhm START_TEST(test_abort_epilog_2) { 3669bd8f1dc3Sbluhm const char *text = "<doc></doc>\n"; 3670bd8f1dc3Sbluhm XML_Char trigger_char = XCS('\n'); 3671bd8f1dc3Sbluhm 3672bd8f1dc3Sbluhm XML_SetDefaultHandler(g_parser, selective_aborting_default_handler); 3673bd8f1dc3Sbluhm XML_SetUserData(g_parser, &trigger_char); 3674bd8f1dc3Sbluhm g_resumable = XML_FALSE; 3675bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered"); 3676bd8f1dc3Sbluhm } 3677bd8f1dc3Sbluhm END_TEST 3678bd8f1dc3Sbluhm 3679bd8f1dc3Sbluhm /* Test suspension from the epilog */ 3680bd8f1dc3Sbluhm START_TEST(test_suspend_epilog) { 3681bd8f1dc3Sbluhm const char *text = "<doc></doc>\n"; 3682bd8f1dc3Sbluhm XML_Char trigger_char = XCS('\n'); 3683bd8f1dc3Sbluhm 3684bd8f1dc3Sbluhm XML_SetDefaultHandler(g_parser, selective_aborting_default_handler); 3685bd8f1dc3Sbluhm XML_SetUserData(g_parser, &trigger_char); 3686bd8f1dc3Sbluhm g_resumable = XML_TRUE; 3687bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3688bd8f1dc3Sbluhm != XML_STATUS_SUSPENDED) 3689bd8f1dc3Sbluhm xml_failure(g_parser); 3690bd8f1dc3Sbluhm } 3691bd8f1dc3Sbluhm END_TEST 3692bd8f1dc3Sbluhm 3693bd8f1dc3Sbluhm START_TEST(test_suspend_in_sole_empty_tag) { 3694bd8f1dc3Sbluhm const char *text = "<doc/>"; 3695bd8f1dc3Sbluhm enum XML_Status rc; 3696bd8f1dc3Sbluhm 3697bd8f1dc3Sbluhm XML_SetEndElementHandler(g_parser, suspending_end_handler); 3698bd8f1dc3Sbluhm XML_SetUserData(g_parser, g_parser); 3699bd8f1dc3Sbluhm rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE); 3700bd8f1dc3Sbluhm if (rc == XML_STATUS_ERROR) 3701bd8f1dc3Sbluhm xml_failure(g_parser); 3702bd8f1dc3Sbluhm else if (rc != XML_STATUS_SUSPENDED) 3703bd8f1dc3Sbluhm fail("Suspend not triggered"); 3704bd8f1dc3Sbluhm rc = XML_ResumeParser(g_parser); 3705bd8f1dc3Sbluhm if (rc == XML_STATUS_ERROR) 3706bd8f1dc3Sbluhm xml_failure(g_parser); 3707bd8f1dc3Sbluhm else if (rc != XML_STATUS_OK) 3708bd8f1dc3Sbluhm fail("Resume failed"); 3709bd8f1dc3Sbluhm } 3710bd8f1dc3Sbluhm END_TEST 3711bd8f1dc3Sbluhm 3712bd8f1dc3Sbluhm START_TEST(test_unfinished_epilog) { 3713bd8f1dc3Sbluhm const char *text = "<doc></doc><"; 3714bd8f1dc3Sbluhm 3715bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_UNCLOSED_TOKEN, 3716bd8f1dc3Sbluhm "Incomplete epilog entry not faulted"); 3717bd8f1dc3Sbluhm } 3718bd8f1dc3Sbluhm END_TEST 3719bd8f1dc3Sbluhm 3720bd8f1dc3Sbluhm START_TEST(test_partial_char_in_epilog) { 3721bd8f1dc3Sbluhm const char *text = "<doc></doc>\xe2\x82"; 3722bd8f1dc3Sbluhm 3723bd8f1dc3Sbluhm /* First check that no fault is raised if the parse is not finished */ 3724bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) 3725bd8f1dc3Sbluhm == XML_STATUS_ERROR) 3726bd8f1dc3Sbluhm xml_failure(g_parser); 3727bd8f1dc3Sbluhm /* Now check that it is faulted once we finish */ 3728bd8f1dc3Sbluhm if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR) 3729bd8f1dc3Sbluhm fail("Partial character in epilog not faulted"); 3730bd8f1dc3Sbluhm if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR) 3731bd8f1dc3Sbluhm xml_failure(g_parser); 3732bd8f1dc3Sbluhm } 3733bd8f1dc3Sbluhm END_TEST 3734bd8f1dc3Sbluhm 3735bd8f1dc3Sbluhm /* Test resuming a parse suspended in entity substitution */ 3736bd8f1dc3Sbluhm START_TEST(test_suspend_resume_internal_entity) { 3737bd8f1dc3Sbluhm const char *text 3738bd8f1dc3Sbluhm = "<!DOCTYPE doc [\n" 3739bd8f1dc3Sbluhm "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n" 3740bd8f1dc3Sbluhm "]>\n" 3741bd8f1dc3Sbluhm "<doc>&foo;</doc>\n"; 3742bd8f1dc3Sbluhm const XML_Char *expected1 = XCS("Hi"); 3743bd8f1dc3Sbluhm const XML_Char *expected2 = XCS("HiHo"); 3744bd8f1dc3Sbluhm CharData storage; 3745bd8f1dc3Sbluhm 3746bd8f1dc3Sbluhm CharData_Init(&storage); 3747bd8f1dc3Sbluhm XML_SetStartElementHandler(g_parser, start_element_suspender); 3748bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, accumulate_characters); 3749bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 3750bd8f1dc3Sbluhm // can't use SINGLE_BYTES here, because it'll return early on suspension, and 3751bd8f1dc3Sbluhm // we won't know exactly how much input we actually managed to give Expat. 3752bd8f1dc3Sbluhm if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 3753bd8f1dc3Sbluhm != XML_STATUS_SUSPENDED) 3754bd8f1dc3Sbluhm xml_failure(g_parser); 3755bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, XCS("")); 3756bd8f1dc3Sbluhm if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED) 3757bd8f1dc3Sbluhm xml_failure(g_parser); 3758bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected1); 3759bd8f1dc3Sbluhm if (XML_ResumeParser(g_parser) != XML_STATUS_OK) 3760bd8f1dc3Sbluhm xml_failure(g_parser); 3761bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected2); 3762bd8f1dc3Sbluhm } 3763bd8f1dc3Sbluhm END_TEST 3764bd8f1dc3Sbluhm 3765bd8f1dc3Sbluhm START_TEST(test_suspend_resume_internal_entity_issue_629) { 3766bd8f1dc3Sbluhm const char *const text 3767bd8f1dc3Sbluhm = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n" 3768bd8f1dc3Sbluhm "<" 3769bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3770bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3771bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3772bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3773bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3774bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3775bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3776bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3777bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3778bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3779bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3780bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3781bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3782bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3783bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3784bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3785bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3786bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3787bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3788bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3789bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3790bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3791bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3792bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3793bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3794bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3795bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3796bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3797bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3798bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3799bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3800bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3801bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3802bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3803bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3804bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3805bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3806bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3807bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3808bd8f1dc3Sbluhm "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3809bd8f1dc3Sbluhm "/>" 3810bd8f1dc3Sbluhm "</b></a>"; 3811bd8f1dc3Sbluhm const size_t firstChunkSizeBytes = 54; 3812bd8f1dc3Sbluhm 3813bd8f1dc3Sbluhm XML_Parser parser = XML_ParserCreate(NULL); 3814bd8f1dc3Sbluhm XML_SetUserData(parser, parser); 3815bd8f1dc3Sbluhm XML_SetCommentHandler(parser, suspending_comment_handler); 3816bd8f1dc3Sbluhm 3817bd8f1dc3Sbluhm if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE) 3818bd8f1dc3Sbluhm != XML_STATUS_SUSPENDED) 3819bd8f1dc3Sbluhm xml_failure(parser); 3820bd8f1dc3Sbluhm if (XML_ResumeParser(parser) != XML_STATUS_OK) 3821bd8f1dc3Sbluhm xml_failure(parser); 3822bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes, 3823bd8f1dc3Sbluhm (int)(strlen(text) - firstChunkSizeBytes), 3824bd8f1dc3Sbluhm XML_TRUE) 3825bd8f1dc3Sbluhm != XML_STATUS_OK) 3826bd8f1dc3Sbluhm xml_failure(parser); 3827bd8f1dc3Sbluhm XML_ParserFree(parser); 3828bd8f1dc3Sbluhm } 3829bd8f1dc3Sbluhm END_TEST 3830bd8f1dc3Sbluhm 3831bd8f1dc3Sbluhm /* Test syntax error is caught at parse resumption */ 3832bd8f1dc3Sbluhm START_TEST(test_resume_entity_with_syntax_error) { 3833bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 3834bd8f1dc3Sbluhm "<!ENTITY foo '<suspend>Hi</wombat>'>\n" 3835bd8f1dc3Sbluhm "]>\n" 3836bd8f1dc3Sbluhm "<doc>&foo;</doc>\n"; 3837bd8f1dc3Sbluhm 3838bd8f1dc3Sbluhm XML_SetStartElementHandler(g_parser, start_element_suspender); 3839bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3840bd8f1dc3Sbluhm != XML_STATUS_SUSPENDED) 3841bd8f1dc3Sbluhm xml_failure(g_parser); 3842bd8f1dc3Sbluhm if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR) 3843bd8f1dc3Sbluhm fail("Syntax error in entity not faulted"); 3844bd8f1dc3Sbluhm if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH) 3845bd8f1dc3Sbluhm xml_failure(g_parser); 3846bd8f1dc3Sbluhm } 3847bd8f1dc3Sbluhm END_TEST 3848bd8f1dc3Sbluhm 3849bd8f1dc3Sbluhm /* Test suspending and resuming in a parameter entity substitution */ 3850bd8f1dc3Sbluhm START_TEST(test_suspend_resume_parameter_entity) { 3851bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 3852bd8f1dc3Sbluhm "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n" 3853bd8f1dc3Sbluhm "%foo;\n" 3854bd8f1dc3Sbluhm "]>\n" 3855bd8f1dc3Sbluhm "<doc>Hello, world</doc>"; 3856bd8f1dc3Sbluhm const XML_Char *expected = XCS("Hello, world"); 3857bd8f1dc3Sbluhm CharData storage; 3858bd8f1dc3Sbluhm 3859bd8f1dc3Sbluhm CharData_Init(&storage); 3860bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3861bd8f1dc3Sbluhm XML_SetElementDeclHandler(g_parser, element_decl_suspender); 3862bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, accumulate_characters); 3863bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 3864bd8f1dc3Sbluhm if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 3865bd8f1dc3Sbluhm != XML_STATUS_SUSPENDED) 3866bd8f1dc3Sbluhm xml_failure(g_parser); 3867bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, XCS("")); 3868bd8f1dc3Sbluhm if (XML_ResumeParser(g_parser) != XML_STATUS_OK) 3869bd8f1dc3Sbluhm xml_failure(g_parser); 3870bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 3871bd8f1dc3Sbluhm } 3872bd8f1dc3Sbluhm END_TEST 3873bd8f1dc3Sbluhm 3874bd8f1dc3Sbluhm /* Test attempting to use parser after an error is faulted */ 3875bd8f1dc3Sbluhm START_TEST(test_restart_on_error) { 3876bd8f1dc3Sbluhm const char *text = "<$doc><doc></doc>"; 3877bd8f1dc3Sbluhm 3878bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3879bd8f1dc3Sbluhm != XML_STATUS_ERROR) 3880bd8f1dc3Sbluhm fail("Invalid tag name not faulted"); 3881bd8f1dc3Sbluhm if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 3882bd8f1dc3Sbluhm xml_failure(g_parser); 3883bd8f1dc3Sbluhm if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) 3884bd8f1dc3Sbluhm fail("Restarting invalid parse not faulted"); 3885bd8f1dc3Sbluhm if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 3886bd8f1dc3Sbluhm xml_failure(g_parser); 3887bd8f1dc3Sbluhm } 3888bd8f1dc3Sbluhm END_TEST 3889bd8f1dc3Sbluhm 3890bd8f1dc3Sbluhm /* Test that angle brackets in an attribute default value are faulted */ 3891bd8f1dc3Sbluhm START_TEST(test_reject_lt_in_attribute_value) { 3892bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n" 3893bd8f1dc3Sbluhm "<doc></doc>"; 3894bd8f1dc3Sbluhm 3895bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_INVALID_TOKEN, 3896bd8f1dc3Sbluhm "Bad attribute default not faulted"); 3897bd8f1dc3Sbluhm } 3898bd8f1dc3Sbluhm END_TEST 3899bd8f1dc3Sbluhm 3900bd8f1dc3Sbluhm START_TEST(test_reject_unfinished_param_in_att_value) { 3901bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n" 3902bd8f1dc3Sbluhm "<doc></doc>"; 3903bd8f1dc3Sbluhm 3904bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_INVALID_TOKEN, 3905bd8f1dc3Sbluhm "Bad attribute default not faulted"); 3906bd8f1dc3Sbluhm } 3907bd8f1dc3Sbluhm END_TEST 3908bd8f1dc3Sbluhm 3909bd8f1dc3Sbluhm START_TEST(test_trailing_cr_in_att_value) { 3910bd8f1dc3Sbluhm const char *text = "<doc a='value\r'/>"; 3911bd8f1dc3Sbluhm 3912bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3913bd8f1dc3Sbluhm == XML_STATUS_ERROR) 3914bd8f1dc3Sbluhm xml_failure(g_parser); 3915bd8f1dc3Sbluhm } 3916bd8f1dc3Sbluhm END_TEST 3917bd8f1dc3Sbluhm 3918bd8f1dc3Sbluhm /* Try parsing a general entity within a parameter entity in a 3919bd8f1dc3Sbluhm * standalone internal DTD. Covers a corner case in the parser. 3920bd8f1dc3Sbluhm */ 3921bd8f1dc3Sbluhm START_TEST(test_standalone_internal_entity) { 3922bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' standalone='yes' ?>\n" 3923bd8f1dc3Sbluhm "<!DOCTYPE doc [\n" 3924bd8f1dc3Sbluhm " <!ELEMENT doc (#PCDATA)>\n" 3925bd8f1dc3Sbluhm " <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"≥\">'>\n" 3926bd8f1dc3Sbluhm " <!ENTITY ge 'AttDefaultValue'>\n" 3927bd8f1dc3Sbluhm " %pe;\n" 3928bd8f1dc3Sbluhm "]>\n" 3929bd8f1dc3Sbluhm "<doc att2='any'/>"; 3930bd8f1dc3Sbluhm 3931bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3932bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3933bd8f1dc3Sbluhm == XML_STATUS_ERROR) 3934bd8f1dc3Sbluhm xml_failure(g_parser); 3935bd8f1dc3Sbluhm } 3936bd8f1dc3Sbluhm END_TEST 3937bd8f1dc3Sbluhm 3938bd8f1dc3Sbluhm /* Test that a reference to an unknown external entity is skipped */ 3939bd8f1dc3Sbluhm START_TEST(test_skipped_external_entity) { 3940bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n" 3941bd8f1dc3Sbluhm "<doc></doc>\n"; 3942bd8f1dc3Sbluhm ExtTest test_data = {"<!ELEMENT doc EMPTY>\n" 3943bd8f1dc3Sbluhm "<!ENTITY % e2 '%e1;'>\n", 3944bd8f1dc3Sbluhm NULL, NULL}; 3945bd8f1dc3Sbluhm 3946bd8f1dc3Sbluhm XML_SetUserData(g_parser, &test_data); 3947bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3948bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 3949bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3950bd8f1dc3Sbluhm == XML_STATUS_ERROR) 3951bd8f1dc3Sbluhm xml_failure(g_parser); 3952bd8f1dc3Sbluhm } 3953bd8f1dc3Sbluhm END_TEST 3954bd8f1dc3Sbluhm 3955bd8f1dc3Sbluhm /* Test a different form of unknown external entity */ 3956bd8f1dc3Sbluhm START_TEST(test_skipped_null_loaded_ext_entity) { 3957bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n" 3958bd8f1dc3Sbluhm "<doc />"; 3959bd8f1dc3Sbluhm ExtHdlrData test_data 3960bd8f1dc3Sbluhm = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n" 3961bd8f1dc3Sbluhm "<!ENTITY % pe2 '%pe1;'>\n" 3962bd8f1dc3Sbluhm "%pe2;\n", 3963bd8f1dc3Sbluhm external_entity_null_loader}; 3964bd8f1dc3Sbluhm 3965bd8f1dc3Sbluhm XML_SetUserData(g_parser, &test_data); 3966bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3967bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader); 3968bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3969bd8f1dc3Sbluhm == XML_STATUS_ERROR) 3970bd8f1dc3Sbluhm xml_failure(g_parser); 3971bd8f1dc3Sbluhm } 3972bd8f1dc3Sbluhm END_TEST 3973bd8f1dc3Sbluhm 3974bd8f1dc3Sbluhm START_TEST(test_skipped_unloaded_ext_entity) { 3975bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n" 3976bd8f1dc3Sbluhm "<doc />"; 3977bd8f1dc3Sbluhm ExtHdlrData test_data 3978bd8f1dc3Sbluhm = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n" 3979bd8f1dc3Sbluhm "<!ENTITY % pe2 '%pe1;'>\n" 3980bd8f1dc3Sbluhm "%pe2;\n", 3981bd8f1dc3Sbluhm NULL}; 3982bd8f1dc3Sbluhm 3983bd8f1dc3Sbluhm XML_SetUserData(g_parser, &test_data); 3984bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3985bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader); 3986bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3987bd8f1dc3Sbluhm == XML_STATUS_ERROR) 3988bd8f1dc3Sbluhm xml_failure(g_parser); 3989bd8f1dc3Sbluhm } 3990bd8f1dc3Sbluhm END_TEST 3991bd8f1dc3Sbluhm 3992bd8f1dc3Sbluhm /* Test that a parameter entity value ending with a carriage return 3993bd8f1dc3Sbluhm * has it translated internally into a newline. 3994bd8f1dc3Sbluhm */ 3995bd8f1dc3Sbluhm START_TEST(test_param_entity_with_trailing_cr) { 3996bd8f1dc3Sbluhm #define PARAM_ENTITY_NAME "pe" 3997bd8f1dc3Sbluhm #define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">" 3998bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n" 3999bd8f1dc3Sbluhm "<doc/>"; 4000bd8f1dc3Sbluhm ExtTest test_data 4001bd8f1dc3Sbluhm = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n" 4002bd8f1dc3Sbluhm "%" PARAM_ENTITY_NAME ";\n", 4003bd8f1dc3Sbluhm NULL, NULL}; 4004bd8f1dc3Sbluhm 4005bd8f1dc3Sbluhm XML_SetUserData(g_parser, &test_data); 4006bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4007bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 4008bd8f1dc3Sbluhm XML_SetEntityDeclHandler(g_parser, param_entity_match_handler); 4009bd8f1dc3Sbluhm param_entity_match_init(XCS(PARAM_ENTITY_NAME), 4010bd8f1dc3Sbluhm XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n")); 4011bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4012bd8f1dc3Sbluhm == XML_STATUS_ERROR) 4013bd8f1dc3Sbluhm xml_failure(g_parser); 4014bd8f1dc3Sbluhm int entity_match_flag = get_param_entity_match_flag(); 4015bd8f1dc3Sbluhm if (entity_match_flag == ENTITY_MATCH_FAIL) 4016bd8f1dc3Sbluhm fail("Parameter entity CR->NEWLINE conversion failed"); 4017bd8f1dc3Sbluhm else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND) 4018bd8f1dc3Sbluhm fail("Parameter entity not parsed"); 4019bd8f1dc3Sbluhm } 4020bd8f1dc3Sbluhm #undef PARAM_ENTITY_NAME 4021bd8f1dc3Sbluhm #undef PARAM_ENTITY_CORE_VALUE 4022bd8f1dc3Sbluhm END_TEST 4023bd8f1dc3Sbluhm 4024bd8f1dc3Sbluhm START_TEST(test_invalid_character_entity) { 4025bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 4026bd8f1dc3Sbluhm " <!ENTITY entity '�'>\n" 4027bd8f1dc3Sbluhm "]>\n" 4028bd8f1dc3Sbluhm "<doc>&entity;</doc>"; 4029bd8f1dc3Sbluhm 4030bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_BAD_CHAR_REF, 4031bd8f1dc3Sbluhm "Out of range character reference not faulted"); 4032bd8f1dc3Sbluhm } 4033bd8f1dc3Sbluhm END_TEST 4034bd8f1dc3Sbluhm 4035bd8f1dc3Sbluhm START_TEST(test_invalid_character_entity_2) { 4036bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 4037bd8f1dc3Sbluhm " <!ENTITY entity '&#xg0;'>\n" 4038bd8f1dc3Sbluhm "]>\n" 4039bd8f1dc3Sbluhm "<doc>&entity;</doc>"; 4040bd8f1dc3Sbluhm 4041bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_INVALID_TOKEN, 4042bd8f1dc3Sbluhm "Out of range character reference not faulted"); 4043bd8f1dc3Sbluhm } 4044bd8f1dc3Sbluhm END_TEST 4045bd8f1dc3Sbluhm 4046bd8f1dc3Sbluhm START_TEST(test_invalid_character_entity_3) { 4047bd8f1dc3Sbluhm const char text[] = 4048bd8f1dc3Sbluhm /* <!DOCTYPE doc [\n */ 4049bd8f1dc3Sbluhm "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n" 4050bd8f1dc3Sbluhm /* U+0E04 = KHO KHWAI 4051bd8f1dc3Sbluhm * U+0E08 = CHO CHAN */ 4052bd8f1dc3Sbluhm /* <!ENTITY entity '&\u0e04\u0e08;'>\n */ 4053bd8f1dc3Sbluhm "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 " 4054bd8f1dc3Sbluhm "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n" 4055bd8f1dc3Sbluhm /* ]>\n */ 4056bd8f1dc3Sbluhm "\0]\0>\0\n" 4057bd8f1dc3Sbluhm /* <doc>&entity;</doc> */ 4058bd8f1dc3Sbluhm "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>"; 4059bd8f1dc3Sbluhm 4060bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4061bd8f1dc3Sbluhm != XML_STATUS_ERROR) 4062bd8f1dc3Sbluhm fail("Invalid start of entity name not faulted"); 4063bd8f1dc3Sbluhm if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY) 4064bd8f1dc3Sbluhm xml_failure(g_parser); 4065bd8f1dc3Sbluhm } 4066bd8f1dc3Sbluhm END_TEST 4067bd8f1dc3Sbluhm 4068bd8f1dc3Sbluhm START_TEST(test_invalid_character_entity_4) { 4069bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 4070bd8f1dc3Sbluhm " <!ENTITY entity '�'>\n" /* = � */ 4071bd8f1dc3Sbluhm "]>\n" 4072bd8f1dc3Sbluhm "<doc>&entity;</doc>"; 4073bd8f1dc3Sbluhm 4074bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_BAD_CHAR_REF, 4075bd8f1dc3Sbluhm "Out of range character reference not faulted"); 4076bd8f1dc3Sbluhm } 4077bd8f1dc3Sbluhm END_TEST 4078bd8f1dc3Sbluhm 4079bd8f1dc3Sbluhm /* Test that processing instructions are picked up by a default handler */ 4080bd8f1dc3Sbluhm START_TEST(test_pi_handled_in_default) { 4081bd8f1dc3Sbluhm const char *text = "<?test processing instruction?>\n<doc/>"; 4082bd8f1dc3Sbluhm const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>"); 4083bd8f1dc3Sbluhm CharData storage; 4084bd8f1dc3Sbluhm 4085bd8f1dc3Sbluhm CharData_Init(&storage); 4086bd8f1dc3Sbluhm XML_SetDefaultHandler(g_parser, accumulate_characters); 4087bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 4088bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4089bd8f1dc3Sbluhm == XML_STATUS_ERROR) 4090bd8f1dc3Sbluhm xml_failure(g_parser); 4091bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 4092bd8f1dc3Sbluhm } 4093bd8f1dc3Sbluhm END_TEST 4094bd8f1dc3Sbluhm 4095bd8f1dc3Sbluhm /* Test that comments are picked up by a default handler */ 4096bd8f1dc3Sbluhm START_TEST(test_comment_handled_in_default) { 4097bd8f1dc3Sbluhm const char *text = "<!-- This is a comment -->\n<doc/>"; 4098bd8f1dc3Sbluhm const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>"); 4099bd8f1dc3Sbluhm CharData storage; 4100bd8f1dc3Sbluhm 4101bd8f1dc3Sbluhm CharData_Init(&storage); 4102bd8f1dc3Sbluhm XML_SetDefaultHandler(g_parser, accumulate_characters); 4103bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 4104bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4105bd8f1dc3Sbluhm == XML_STATUS_ERROR) 4106bd8f1dc3Sbluhm xml_failure(g_parser); 4107bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 4108bd8f1dc3Sbluhm } 4109bd8f1dc3Sbluhm END_TEST 4110bd8f1dc3Sbluhm 4111bd8f1dc3Sbluhm /* Test PIs that look almost but not quite like XML declarations */ 4112bd8f1dc3Sbluhm START_TEST(test_pi_yml) { 4113bd8f1dc3Sbluhm const char *text = "<?yml something like data?><doc/>"; 4114bd8f1dc3Sbluhm const XML_Char *expected = XCS("yml: something like data\n"); 4115bd8f1dc3Sbluhm CharData storage; 4116bd8f1dc3Sbluhm 4117bd8f1dc3Sbluhm CharData_Init(&storage); 4118bd8f1dc3Sbluhm XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4119bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 4120bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4121bd8f1dc3Sbluhm == XML_STATUS_ERROR) 4122bd8f1dc3Sbluhm xml_failure(g_parser); 4123bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 4124bd8f1dc3Sbluhm } 4125bd8f1dc3Sbluhm END_TEST 4126bd8f1dc3Sbluhm 4127bd8f1dc3Sbluhm START_TEST(test_pi_xnl) { 4128bd8f1dc3Sbluhm const char *text = "<?xnl nothing like data?><doc/>"; 4129bd8f1dc3Sbluhm const XML_Char *expected = XCS("xnl: nothing like data\n"); 4130bd8f1dc3Sbluhm CharData storage; 4131bd8f1dc3Sbluhm 4132bd8f1dc3Sbluhm CharData_Init(&storage); 4133bd8f1dc3Sbluhm XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4134bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 4135bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4136bd8f1dc3Sbluhm == XML_STATUS_ERROR) 4137bd8f1dc3Sbluhm xml_failure(g_parser); 4138bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 4139bd8f1dc3Sbluhm } 4140bd8f1dc3Sbluhm END_TEST 4141bd8f1dc3Sbluhm 4142bd8f1dc3Sbluhm START_TEST(test_pi_xmm) { 4143bd8f1dc3Sbluhm const char *text = "<?xmm everything like data?><doc/>"; 4144bd8f1dc3Sbluhm const XML_Char *expected = XCS("xmm: everything like data\n"); 4145bd8f1dc3Sbluhm CharData storage; 4146bd8f1dc3Sbluhm 4147bd8f1dc3Sbluhm CharData_Init(&storage); 4148bd8f1dc3Sbluhm XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4149bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 4150bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4151bd8f1dc3Sbluhm == XML_STATUS_ERROR) 4152bd8f1dc3Sbluhm xml_failure(g_parser); 4153bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 4154bd8f1dc3Sbluhm } 4155bd8f1dc3Sbluhm END_TEST 4156bd8f1dc3Sbluhm 4157bd8f1dc3Sbluhm START_TEST(test_utf16_pi) { 4158bd8f1dc3Sbluhm const char text[] = 4159bd8f1dc3Sbluhm /* <?{KHO KHWAI}{CHO CHAN}?> 4160bd8f1dc3Sbluhm * where {KHO KHWAI} = U+0E04 4161bd8f1dc3Sbluhm * and {CHO CHAN} = U+0E08 4162bd8f1dc3Sbluhm */ 4163bd8f1dc3Sbluhm "<\0?\0\x04\x0e\x08\x0e?\0>\0" 4164bd8f1dc3Sbluhm /* <q/> */ 4165bd8f1dc3Sbluhm "<\0q\0/\0>\0"; 4166bd8f1dc3Sbluhm #ifdef XML_UNICODE 4167bd8f1dc3Sbluhm const XML_Char *expected = XCS("\x0e04\x0e08: \n"); 4168bd8f1dc3Sbluhm #else 4169bd8f1dc3Sbluhm const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n"); 4170bd8f1dc3Sbluhm #endif 4171bd8f1dc3Sbluhm CharData storage; 4172bd8f1dc3Sbluhm 4173bd8f1dc3Sbluhm CharData_Init(&storage); 4174bd8f1dc3Sbluhm XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4175bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 4176bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4177bd8f1dc3Sbluhm == XML_STATUS_ERROR) 4178bd8f1dc3Sbluhm xml_failure(g_parser); 4179bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 4180bd8f1dc3Sbluhm } 4181bd8f1dc3Sbluhm END_TEST 4182bd8f1dc3Sbluhm 4183bd8f1dc3Sbluhm START_TEST(test_utf16_be_pi) { 4184bd8f1dc3Sbluhm const char text[] = 4185bd8f1dc3Sbluhm /* <?{KHO KHWAI}{CHO CHAN}?> 4186bd8f1dc3Sbluhm * where {KHO KHWAI} = U+0E04 4187bd8f1dc3Sbluhm * and {CHO CHAN} = U+0E08 4188bd8f1dc3Sbluhm */ 4189bd8f1dc3Sbluhm "\0<\0?\x0e\x04\x0e\x08\0?\0>" 4190bd8f1dc3Sbluhm /* <q/> */ 4191bd8f1dc3Sbluhm "\0<\0q\0/\0>"; 4192bd8f1dc3Sbluhm #ifdef XML_UNICODE 4193bd8f1dc3Sbluhm const XML_Char *expected = XCS("\x0e04\x0e08: \n"); 4194bd8f1dc3Sbluhm #else 4195bd8f1dc3Sbluhm const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n"); 4196bd8f1dc3Sbluhm #endif 4197bd8f1dc3Sbluhm CharData storage; 4198bd8f1dc3Sbluhm 4199bd8f1dc3Sbluhm CharData_Init(&storage); 4200bd8f1dc3Sbluhm XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4201bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 4202bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4203bd8f1dc3Sbluhm == XML_STATUS_ERROR) 4204bd8f1dc3Sbluhm xml_failure(g_parser); 4205bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 4206bd8f1dc3Sbluhm } 4207bd8f1dc3Sbluhm END_TEST 4208bd8f1dc3Sbluhm 4209bd8f1dc3Sbluhm /* Test that comments can be picked up and translated */ 4210bd8f1dc3Sbluhm START_TEST(test_utf16_be_comment) { 4211bd8f1dc3Sbluhm const char text[] = 4212bd8f1dc3Sbluhm /* <!-- Comment A --> */ 4213bd8f1dc3Sbluhm "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n" 4214bd8f1dc3Sbluhm /* <doc/> */ 4215bd8f1dc3Sbluhm "\0<\0d\0o\0c\0/\0>"; 4216bd8f1dc3Sbluhm const XML_Char *expected = XCS(" Comment A "); 4217bd8f1dc3Sbluhm CharData storage; 4218bd8f1dc3Sbluhm 4219bd8f1dc3Sbluhm CharData_Init(&storage); 4220bd8f1dc3Sbluhm XML_SetCommentHandler(g_parser, accumulate_comment); 4221bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 4222bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4223bd8f1dc3Sbluhm == XML_STATUS_ERROR) 4224bd8f1dc3Sbluhm xml_failure(g_parser); 4225bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 4226bd8f1dc3Sbluhm } 4227bd8f1dc3Sbluhm END_TEST 4228bd8f1dc3Sbluhm 4229bd8f1dc3Sbluhm START_TEST(test_utf16_le_comment) { 4230bd8f1dc3Sbluhm const char text[] = 4231bd8f1dc3Sbluhm /* <!-- Comment B --> */ 4232bd8f1dc3Sbluhm "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0" 4233bd8f1dc3Sbluhm /* <doc/> */ 4234bd8f1dc3Sbluhm "<\0d\0o\0c\0/\0>\0"; 4235bd8f1dc3Sbluhm const XML_Char *expected = XCS(" Comment B "); 4236bd8f1dc3Sbluhm CharData storage; 4237bd8f1dc3Sbluhm 4238bd8f1dc3Sbluhm CharData_Init(&storage); 4239bd8f1dc3Sbluhm XML_SetCommentHandler(g_parser, accumulate_comment); 4240bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 4241bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4242bd8f1dc3Sbluhm == XML_STATUS_ERROR) 4243bd8f1dc3Sbluhm xml_failure(g_parser); 4244bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 4245bd8f1dc3Sbluhm } 4246bd8f1dc3Sbluhm END_TEST 4247bd8f1dc3Sbluhm 4248bd8f1dc3Sbluhm /* Test that the unknown encoding handler with map entries that expect 4249bd8f1dc3Sbluhm * conversion but no conversion function is faulted 4250bd8f1dc3Sbluhm */ 4251bd8f1dc3Sbluhm START_TEST(test_missing_encoding_conversion_fn) { 4252bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='no-conv'?>\n" 4253bd8f1dc3Sbluhm "<doc>\x81</doc>"; 4254bd8f1dc3Sbluhm 4255bd8f1dc3Sbluhm XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4256bd8f1dc3Sbluhm /* MiscEncodingHandler sets up an encoding with every top-bit-set 4257bd8f1dc3Sbluhm * character introducing a two-byte sequence. For this, it 4258bd8f1dc3Sbluhm * requires a convert function. The above function call doesn't 4259bd8f1dc3Sbluhm * pass one through, so when BadEncodingHandler actually gets 4260bd8f1dc3Sbluhm * called it should supply an invalid encoding. 4261bd8f1dc3Sbluhm */ 4262bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4263bd8f1dc3Sbluhm "Encoding with missing convert() not faulted"); 4264bd8f1dc3Sbluhm } 4265bd8f1dc3Sbluhm END_TEST 4266bd8f1dc3Sbluhm 4267bd8f1dc3Sbluhm START_TEST(test_failing_encoding_conversion_fn) { 4268bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n" 4269bd8f1dc3Sbluhm "<doc>\x81</doc>"; 4270bd8f1dc3Sbluhm 4271bd8f1dc3Sbluhm XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4272bd8f1dc3Sbluhm /* BadEncodingHandler sets up an encoding with every top-bit-set 4273bd8f1dc3Sbluhm * character introducing a two-byte sequence. For this, it 4274bd8f1dc3Sbluhm * requires a convert function. The above function call passes 4275bd8f1dc3Sbluhm * one that insists all possible sequences are invalid anyway. 4276bd8f1dc3Sbluhm */ 4277bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_INVALID_TOKEN, 4278bd8f1dc3Sbluhm "Encoding with failing convert() not faulted"); 4279bd8f1dc3Sbluhm } 4280bd8f1dc3Sbluhm END_TEST 4281bd8f1dc3Sbluhm 4282bd8f1dc3Sbluhm /* Test unknown encoding conversions */ 4283bd8f1dc3Sbluhm START_TEST(test_unknown_encoding_success) { 4284bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4285bd8f1dc3Sbluhm /* Equivalent to <eoc>Hello, world</eoc> */ 4286bd8f1dc3Sbluhm "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>"; 4287bd8f1dc3Sbluhm 4288bd8f1dc3Sbluhm XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4289bd8f1dc3Sbluhm run_character_check(text, XCS("Hello, world")); 4290bd8f1dc3Sbluhm } 4291bd8f1dc3Sbluhm END_TEST 4292bd8f1dc3Sbluhm 4293bd8f1dc3Sbluhm /* Test bad name character in unknown encoding */ 4294bd8f1dc3Sbluhm START_TEST(test_unknown_encoding_bad_name) { 4295bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4296bd8f1dc3Sbluhm "<\xff\x64oc>Hello, world</\xff\x64oc>"; 4297bd8f1dc3Sbluhm 4298bd8f1dc3Sbluhm XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4299bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_INVALID_TOKEN, 4300bd8f1dc3Sbluhm "Bad name start in unknown encoding not faulted"); 4301bd8f1dc3Sbluhm } 4302bd8f1dc3Sbluhm END_TEST 4303bd8f1dc3Sbluhm 4304bd8f1dc3Sbluhm /* Test bad mid-name character in unknown encoding */ 4305bd8f1dc3Sbluhm START_TEST(test_unknown_encoding_bad_name_2) { 4306bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4307bd8f1dc3Sbluhm "<d\xffoc>Hello, world</d\xffoc>"; 4308bd8f1dc3Sbluhm 4309bd8f1dc3Sbluhm XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4310bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_INVALID_TOKEN, 4311bd8f1dc3Sbluhm "Bad name in unknown encoding not faulted"); 4312bd8f1dc3Sbluhm } 4313bd8f1dc3Sbluhm END_TEST 4314bd8f1dc3Sbluhm 4315bd8f1dc3Sbluhm /* Test element name that is long enough to fill the conversion buffer 4316bd8f1dc3Sbluhm * in an unknown encoding, finishing with an encoded character. 4317bd8f1dc3Sbluhm */ 4318bd8f1dc3Sbluhm START_TEST(test_unknown_encoding_long_name_1) { 4319bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4320bd8f1dc3Sbluhm "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>" 4321bd8f1dc3Sbluhm "Hi" 4322bd8f1dc3Sbluhm "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"; 4323bd8f1dc3Sbluhm const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop"); 4324bd8f1dc3Sbluhm CharData storage; 4325bd8f1dc3Sbluhm 4326bd8f1dc3Sbluhm CharData_Init(&storage); 4327bd8f1dc3Sbluhm XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4328bd8f1dc3Sbluhm XML_SetStartElementHandler(g_parser, record_element_start_handler); 4329bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 4330bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4331bd8f1dc3Sbluhm == XML_STATUS_ERROR) 4332bd8f1dc3Sbluhm xml_failure(g_parser); 4333bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 4334bd8f1dc3Sbluhm } 4335bd8f1dc3Sbluhm END_TEST 4336bd8f1dc3Sbluhm 4337bd8f1dc3Sbluhm /* Test element name that is long enough to fill the conversion buffer 4338bd8f1dc3Sbluhm * in an unknown encoding, finishing with an simple character. 4339bd8f1dc3Sbluhm */ 4340bd8f1dc3Sbluhm START_TEST(test_unknown_encoding_long_name_2) { 4341bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4342bd8f1dc3Sbluhm "<abcdefghabcdefghabcdefghijklmnop>" 4343bd8f1dc3Sbluhm "Hi" 4344bd8f1dc3Sbluhm "</abcdefghabcdefghabcdefghijklmnop>"; 4345bd8f1dc3Sbluhm const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop"); 4346bd8f1dc3Sbluhm CharData storage; 4347bd8f1dc3Sbluhm 4348bd8f1dc3Sbluhm CharData_Init(&storage); 4349bd8f1dc3Sbluhm XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4350bd8f1dc3Sbluhm XML_SetStartElementHandler(g_parser, record_element_start_handler); 4351bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 4352bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4353bd8f1dc3Sbluhm == XML_STATUS_ERROR) 4354bd8f1dc3Sbluhm xml_failure(g_parser); 4355bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 4356bd8f1dc3Sbluhm } 4357bd8f1dc3Sbluhm END_TEST 4358bd8f1dc3Sbluhm 4359bd8f1dc3Sbluhm START_TEST(test_invalid_unknown_encoding) { 4360bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n" 4361bd8f1dc3Sbluhm "<doc>Hello world</doc>"; 4362bd8f1dc3Sbluhm 4363bd8f1dc3Sbluhm XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4364bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4365bd8f1dc3Sbluhm "Invalid unknown encoding not faulted"); 4366bd8f1dc3Sbluhm } 4367bd8f1dc3Sbluhm END_TEST 4368bd8f1dc3Sbluhm 4369bd8f1dc3Sbluhm START_TEST(test_unknown_ascii_encoding_ok) { 4370bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n" 4371bd8f1dc3Sbluhm "<doc>Hello, world</doc>"; 4372bd8f1dc3Sbluhm 4373bd8f1dc3Sbluhm XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4374bd8f1dc3Sbluhm run_character_check(text, XCS("Hello, world")); 4375bd8f1dc3Sbluhm } 4376bd8f1dc3Sbluhm END_TEST 4377bd8f1dc3Sbluhm 4378bd8f1dc3Sbluhm START_TEST(test_unknown_ascii_encoding_fail) { 4379bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n" 4380bd8f1dc3Sbluhm "<doc>Hello, \x80 world</doc>"; 4381bd8f1dc3Sbluhm 4382bd8f1dc3Sbluhm XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4383bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_INVALID_TOKEN, 4384bd8f1dc3Sbluhm "Invalid character not faulted"); 4385bd8f1dc3Sbluhm } 4386bd8f1dc3Sbluhm END_TEST 4387bd8f1dc3Sbluhm 4388bd8f1dc3Sbluhm START_TEST(test_unknown_encoding_invalid_length) { 4389bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n" 4390bd8f1dc3Sbluhm "<doc>Hello, world</doc>"; 4391bd8f1dc3Sbluhm 4392bd8f1dc3Sbluhm XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4393bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4394bd8f1dc3Sbluhm "Invalid unknown encoding not faulted"); 4395bd8f1dc3Sbluhm } 4396bd8f1dc3Sbluhm END_TEST 4397bd8f1dc3Sbluhm 4398bd8f1dc3Sbluhm START_TEST(test_unknown_encoding_invalid_topbit) { 4399bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n" 4400bd8f1dc3Sbluhm "<doc>Hello, world</doc>"; 4401bd8f1dc3Sbluhm 4402bd8f1dc3Sbluhm XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4403bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4404bd8f1dc3Sbluhm "Invalid unknown encoding not faulted"); 4405bd8f1dc3Sbluhm } 4406bd8f1dc3Sbluhm END_TEST 4407bd8f1dc3Sbluhm 4408bd8f1dc3Sbluhm START_TEST(test_unknown_encoding_invalid_surrogate) { 4409bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n" 4410bd8f1dc3Sbluhm "<doc>Hello, \x82 world</doc>"; 4411bd8f1dc3Sbluhm 4412bd8f1dc3Sbluhm XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4413bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_INVALID_TOKEN, 4414bd8f1dc3Sbluhm "Invalid unknown encoding not faulted"); 4415bd8f1dc3Sbluhm } 4416bd8f1dc3Sbluhm END_TEST 4417bd8f1dc3Sbluhm 4418bd8f1dc3Sbluhm START_TEST(test_unknown_encoding_invalid_high) { 4419bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n" 4420bd8f1dc3Sbluhm "<doc>Hello, world</doc>"; 4421bd8f1dc3Sbluhm 4422bd8f1dc3Sbluhm XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4423bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4424bd8f1dc3Sbluhm "Invalid unknown encoding not faulted"); 4425bd8f1dc3Sbluhm } 4426bd8f1dc3Sbluhm END_TEST 4427bd8f1dc3Sbluhm 4428bd8f1dc3Sbluhm START_TEST(test_unknown_encoding_invalid_attr_value) { 4429bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4430bd8f1dc3Sbluhm "<doc attr='\xff\x30'/>"; 4431bd8f1dc3Sbluhm 4432bd8f1dc3Sbluhm XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4433bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_INVALID_TOKEN, 4434bd8f1dc3Sbluhm "Invalid attribute valid not faulted"); 4435bd8f1dc3Sbluhm } 4436bd8f1dc3Sbluhm END_TEST 4437bd8f1dc3Sbluhm 4438bd8f1dc3Sbluhm /* Test an external entity parser set to use latin-1 detects UTF-16 4439bd8f1dc3Sbluhm * BOMs correctly. 4440bd8f1dc3Sbluhm */ 4441bd8f1dc3Sbluhm /* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */ 4442bd8f1dc3Sbluhm START_TEST(test_ext_entity_latin1_utf16le_bom) { 4443bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 4444bd8f1dc3Sbluhm " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4445bd8f1dc3Sbluhm "]>\n" 4446bd8f1dc3Sbluhm "<doc>&en;</doc>"; 4447bd8f1dc3Sbluhm ExtTest2 test_data 4448bd8f1dc3Sbluhm = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ 4449bd8f1dc3Sbluhm /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, 4450bd8f1dc3Sbluhm * 0x4c = L and 0x20 is a space 4451bd8f1dc3Sbluhm */ 4452bd8f1dc3Sbluhm "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL}; 4453bd8f1dc3Sbluhm #ifdef XML_UNICODE 4454bd8f1dc3Sbluhm const XML_Char *expected = XCS("\x00ff\x00feL "); 4455bd8f1dc3Sbluhm #else 4456bd8f1dc3Sbluhm /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ 4457bd8f1dc3Sbluhm const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL "); 4458bd8f1dc3Sbluhm #endif 4459bd8f1dc3Sbluhm CharData storage; 4460bd8f1dc3Sbluhm 4461bd8f1dc3Sbluhm CharData_Init(&storage); 4462bd8f1dc3Sbluhm test_data.storage = &storage; 4463bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4464bd8f1dc3Sbluhm XML_SetUserData(g_parser, &test_data); 4465bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4466bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4467bd8f1dc3Sbluhm == XML_STATUS_ERROR) 4468bd8f1dc3Sbluhm xml_failure(g_parser); 4469bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 4470bd8f1dc3Sbluhm } 4471bd8f1dc3Sbluhm END_TEST 4472bd8f1dc3Sbluhm 4473bd8f1dc3Sbluhm START_TEST(test_ext_entity_latin1_utf16be_bom) { 4474bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 4475bd8f1dc3Sbluhm " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4476bd8f1dc3Sbluhm "]>\n" 4477bd8f1dc3Sbluhm "<doc>&en;</doc>"; 4478bd8f1dc3Sbluhm ExtTest2 test_data 4479bd8f1dc3Sbluhm = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ 4480bd8f1dc3Sbluhm /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, 4481bd8f1dc3Sbluhm * 0x4c = L and 0x20 is a space 4482bd8f1dc3Sbluhm */ 4483bd8f1dc3Sbluhm "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL}; 4484bd8f1dc3Sbluhm #ifdef XML_UNICODE 4485bd8f1dc3Sbluhm const XML_Char *expected = XCS("\x00fe\x00ff L"); 4486bd8f1dc3Sbluhm #else 4487bd8f1dc3Sbluhm /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ 4488bd8f1dc3Sbluhm const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L"); 4489bd8f1dc3Sbluhm #endif 4490bd8f1dc3Sbluhm CharData storage; 4491bd8f1dc3Sbluhm 4492bd8f1dc3Sbluhm CharData_Init(&storage); 4493bd8f1dc3Sbluhm test_data.storage = &storage; 4494bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4495bd8f1dc3Sbluhm XML_SetUserData(g_parser, &test_data); 4496bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4497bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4498bd8f1dc3Sbluhm == XML_STATUS_ERROR) 4499bd8f1dc3Sbluhm xml_failure(g_parser); 4500bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 4501bd8f1dc3Sbluhm } 4502bd8f1dc3Sbluhm END_TEST 4503bd8f1dc3Sbluhm 4504bd8f1dc3Sbluhm /* Parsing the full buffer rather than a byte at a time makes a 4505bd8f1dc3Sbluhm * difference to the encoding scanning code, so repeat the above tests 4506bd8f1dc3Sbluhm * without breaking them down by byte. 4507bd8f1dc3Sbluhm */ 4508bd8f1dc3Sbluhm START_TEST(test_ext_entity_latin1_utf16le_bom2) { 4509bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 4510bd8f1dc3Sbluhm " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4511bd8f1dc3Sbluhm "]>\n" 4512bd8f1dc3Sbluhm "<doc>&en;</doc>"; 4513bd8f1dc3Sbluhm ExtTest2 test_data 4514bd8f1dc3Sbluhm = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ 4515bd8f1dc3Sbluhm /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, 4516bd8f1dc3Sbluhm * 0x4c = L and 0x20 is a space 4517bd8f1dc3Sbluhm */ 4518bd8f1dc3Sbluhm "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL}; 4519bd8f1dc3Sbluhm #ifdef XML_UNICODE 4520bd8f1dc3Sbluhm const XML_Char *expected = XCS("\x00ff\x00feL "); 4521bd8f1dc3Sbluhm #else 4522bd8f1dc3Sbluhm /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ 4523bd8f1dc3Sbluhm const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL "); 4524bd8f1dc3Sbluhm #endif 4525bd8f1dc3Sbluhm CharData storage; 4526bd8f1dc3Sbluhm 4527bd8f1dc3Sbluhm CharData_Init(&storage); 4528bd8f1dc3Sbluhm test_data.storage = &storage; 4529bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4530bd8f1dc3Sbluhm XML_SetUserData(g_parser, &test_data); 4531bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4532bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4533bd8f1dc3Sbluhm == XML_STATUS_ERROR) 4534bd8f1dc3Sbluhm xml_failure(g_parser); 4535bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 4536bd8f1dc3Sbluhm } 4537bd8f1dc3Sbluhm END_TEST 4538bd8f1dc3Sbluhm 4539bd8f1dc3Sbluhm START_TEST(test_ext_entity_latin1_utf16be_bom2) { 4540bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 4541bd8f1dc3Sbluhm " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4542bd8f1dc3Sbluhm "]>\n" 4543bd8f1dc3Sbluhm "<doc>&en;</doc>"; 4544bd8f1dc3Sbluhm ExtTest2 test_data 4545bd8f1dc3Sbluhm = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ 4546bd8f1dc3Sbluhm /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, 4547bd8f1dc3Sbluhm * 0x4c = L and 0x20 is a space 4548bd8f1dc3Sbluhm */ 4549bd8f1dc3Sbluhm "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL}; 4550bd8f1dc3Sbluhm #ifdef XML_UNICODE 4551bd8f1dc3Sbluhm const XML_Char *expected = XCS("\x00fe\x00ff L"); 4552bd8f1dc3Sbluhm #else 4553bd8f1dc3Sbluhm /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ 4554bd8f1dc3Sbluhm const XML_Char *expected = "\xc3\xbe\xc3\xbf L"; 4555bd8f1dc3Sbluhm #endif 4556bd8f1dc3Sbluhm CharData storage; 4557bd8f1dc3Sbluhm 4558bd8f1dc3Sbluhm CharData_Init(&storage); 4559bd8f1dc3Sbluhm test_data.storage = &storage; 4560bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4561bd8f1dc3Sbluhm XML_SetUserData(g_parser, &test_data); 4562bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4563bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4564bd8f1dc3Sbluhm == XML_STATUS_ERROR) 4565bd8f1dc3Sbluhm xml_failure(g_parser); 4566bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 4567bd8f1dc3Sbluhm } 4568bd8f1dc3Sbluhm END_TEST 4569bd8f1dc3Sbluhm 4570bd8f1dc3Sbluhm /* Test little-endian UTF-16 given an explicit big-endian encoding */ 4571bd8f1dc3Sbluhm START_TEST(test_ext_entity_utf16_be) { 4572bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 4573bd8f1dc3Sbluhm " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4574bd8f1dc3Sbluhm "]>\n" 4575bd8f1dc3Sbluhm "<doc>&en;</doc>"; 4576bd8f1dc3Sbluhm ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL}; 4577bd8f1dc3Sbluhm #ifdef XML_UNICODE 4578bd8f1dc3Sbluhm const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00"); 4579bd8f1dc3Sbluhm #else 4580bd8f1dc3Sbluhm const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */ 4581bd8f1dc3Sbluhm "\xe6\x94\x80" /* U+6500 */ 4582bd8f1dc3Sbluhm "\xe2\xbc\x80" /* U+2F00 */ 4583bd8f1dc3Sbluhm "\xe3\xb8\x80"); /* U+3E00 */ 4584bd8f1dc3Sbluhm #endif 4585bd8f1dc3Sbluhm CharData storage; 4586bd8f1dc3Sbluhm 4587bd8f1dc3Sbluhm CharData_Init(&storage); 4588bd8f1dc3Sbluhm test_data.storage = &storage; 4589bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4590bd8f1dc3Sbluhm XML_SetUserData(g_parser, &test_data); 4591bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4592bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4593bd8f1dc3Sbluhm == XML_STATUS_ERROR) 4594bd8f1dc3Sbluhm xml_failure(g_parser); 4595bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 4596bd8f1dc3Sbluhm } 4597bd8f1dc3Sbluhm END_TEST 4598bd8f1dc3Sbluhm 4599bd8f1dc3Sbluhm /* Test big-endian UTF-16 given an explicit little-endian encoding */ 4600bd8f1dc3Sbluhm START_TEST(test_ext_entity_utf16_le) { 4601bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 4602bd8f1dc3Sbluhm " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4603bd8f1dc3Sbluhm "]>\n" 4604bd8f1dc3Sbluhm "<doc>&en;</doc>"; 4605bd8f1dc3Sbluhm ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL}; 4606bd8f1dc3Sbluhm #ifdef XML_UNICODE 4607bd8f1dc3Sbluhm const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00"); 4608bd8f1dc3Sbluhm #else 4609bd8f1dc3Sbluhm const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */ 4610bd8f1dc3Sbluhm "\xe6\x94\x80" /* U+6500 */ 4611bd8f1dc3Sbluhm "\xe2\xbc\x80" /* U+2F00 */ 4612bd8f1dc3Sbluhm "\xe3\xb8\x80"); /* U+3E00 */ 4613bd8f1dc3Sbluhm #endif 4614bd8f1dc3Sbluhm CharData storage; 4615bd8f1dc3Sbluhm 4616bd8f1dc3Sbluhm CharData_Init(&storage); 4617bd8f1dc3Sbluhm test_data.storage = &storage; 4618bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4619bd8f1dc3Sbluhm XML_SetUserData(g_parser, &test_data); 4620bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4621bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4622bd8f1dc3Sbluhm == XML_STATUS_ERROR) 4623bd8f1dc3Sbluhm xml_failure(g_parser); 4624bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 4625bd8f1dc3Sbluhm } 4626bd8f1dc3Sbluhm END_TEST 4627bd8f1dc3Sbluhm 4628bd8f1dc3Sbluhm /* Test little-endian UTF-16 given no explicit encoding. 4629bd8f1dc3Sbluhm * The existing default encoding (UTF-8) is assumed to hold without a 4630bd8f1dc3Sbluhm * BOM to contradict it, so the entity value will in fact provoke an 4631bd8f1dc3Sbluhm * error because 0x00 is not a valid XML character. We parse the 4632bd8f1dc3Sbluhm * whole buffer in one go rather than feeding it in byte by byte to 4633bd8f1dc3Sbluhm * exercise different code paths in the initial scanning routines. 4634bd8f1dc3Sbluhm */ 4635bd8f1dc3Sbluhm START_TEST(test_ext_entity_utf16_unknown) { 4636bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 4637bd8f1dc3Sbluhm " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4638bd8f1dc3Sbluhm "]>\n" 4639bd8f1dc3Sbluhm "<doc>&en;</doc>"; 4640bd8f1dc3Sbluhm ExtFaults2 test_data 4641bd8f1dc3Sbluhm = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL, 4642bd8f1dc3Sbluhm XML_ERROR_INVALID_TOKEN}; 4643bd8f1dc3Sbluhm 4644bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2); 4645bd8f1dc3Sbluhm XML_SetUserData(g_parser, &test_data); 4646bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 4647bd8f1dc3Sbluhm "Invalid character should not have been accepted"); 4648bd8f1dc3Sbluhm } 4649bd8f1dc3Sbluhm END_TEST 4650bd8f1dc3Sbluhm 4651bd8f1dc3Sbluhm /* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */ 4652bd8f1dc3Sbluhm START_TEST(test_ext_entity_utf8_non_bom) { 4653bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 4654bd8f1dc3Sbluhm " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4655bd8f1dc3Sbluhm "]>\n" 4656bd8f1dc3Sbluhm "<doc>&en;</doc>"; 4657bd8f1dc3Sbluhm ExtTest2 test_data 4658bd8f1dc3Sbluhm = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */ 4659bd8f1dc3Sbluhm 3, NULL, NULL}; 4660bd8f1dc3Sbluhm #ifdef XML_UNICODE 4661bd8f1dc3Sbluhm const XML_Char *expected = XCS("\xfec0"); 4662bd8f1dc3Sbluhm #else 4663bd8f1dc3Sbluhm const XML_Char *expected = XCS("\xef\xbb\x80"); 4664bd8f1dc3Sbluhm #endif 4665bd8f1dc3Sbluhm CharData storage; 4666bd8f1dc3Sbluhm 4667bd8f1dc3Sbluhm CharData_Init(&storage); 4668bd8f1dc3Sbluhm test_data.storage = &storage; 4669bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4670bd8f1dc3Sbluhm XML_SetUserData(g_parser, &test_data); 4671bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4672bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4673bd8f1dc3Sbluhm == XML_STATUS_ERROR) 4674bd8f1dc3Sbluhm xml_failure(g_parser); 4675bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 4676bd8f1dc3Sbluhm } 4677bd8f1dc3Sbluhm END_TEST 4678bd8f1dc3Sbluhm 4679bd8f1dc3Sbluhm /* Test that UTF-8 in a CDATA section is correctly passed through */ 4680bd8f1dc3Sbluhm START_TEST(test_utf8_in_cdata_section) { 4681bd8f1dc3Sbluhm const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>"; 4682bd8f1dc3Sbluhm #ifdef XML_UNICODE 4683bd8f1dc3Sbluhm const XML_Char *expected = XCS("one \x00e9 two"); 4684bd8f1dc3Sbluhm #else 4685bd8f1dc3Sbluhm const XML_Char *expected = XCS("one \xc3\xa9 two"); 4686bd8f1dc3Sbluhm #endif 4687bd8f1dc3Sbluhm 4688bd8f1dc3Sbluhm run_character_check(text, expected); 4689bd8f1dc3Sbluhm } 4690bd8f1dc3Sbluhm END_TEST 4691bd8f1dc3Sbluhm 4692bd8f1dc3Sbluhm /* Test that little-endian UTF-16 in a CDATA section is handled */ 4693bd8f1dc3Sbluhm START_TEST(test_utf8_in_cdata_section_2) { 4694bd8f1dc3Sbluhm const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>"; 4695bd8f1dc3Sbluhm #ifdef XML_UNICODE 4696bd8f1dc3Sbluhm const XML_Char *expected = XCS("\x00e9]\x00e9two"); 4697bd8f1dc3Sbluhm #else 4698bd8f1dc3Sbluhm const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two"); 4699bd8f1dc3Sbluhm #endif 4700bd8f1dc3Sbluhm 4701bd8f1dc3Sbluhm run_character_check(text, expected); 4702bd8f1dc3Sbluhm } 4703bd8f1dc3Sbluhm END_TEST 4704bd8f1dc3Sbluhm 4705bd8f1dc3Sbluhm START_TEST(test_utf8_in_start_tags) { 4706bd8f1dc3Sbluhm struct test_case { 4707bd8f1dc3Sbluhm bool goodName; 4708bd8f1dc3Sbluhm bool goodNameStart; 4709bd8f1dc3Sbluhm const char *tagName; 4710bd8f1dc3Sbluhm }; 4711bd8f1dc3Sbluhm 4712bd8f1dc3Sbluhm // The idea with the tests below is this: 4713bd8f1dc3Sbluhm // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences 4714bd8f1dc3Sbluhm // go to isNever and are hence not a concern. 4715bd8f1dc3Sbluhm // 4716bd8f1dc3Sbluhm // We start with a character that is a valid name character 4717bd8f1dc3Sbluhm // (or even name-start character, see XML 1.0r4 spec) and then we flip 4718bd8f1dc3Sbluhm // single bits at places where (1) the result leaves the UTF-8 encoding space 4719bd8f1dc3Sbluhm // and (2) we stay in the same n-byte sequence family. 4720bd8f1dc3Sbluhm // 4721bd8f1dc3Sbluhm // The flipped bits are highlighted in angle brackets in comments, 4722bd8f1dc3Sbluhm // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped 4723bd8f1dc3Sbluhm // the most significant bit to 1 to leave UTF-8 encoding space. 4724bd8f1dc3Sbluhm struct test_case cases[] = { 4725bd8f1dc3Sbluhm // 1-byte UTF-8: [0xxx xxxx] 4726bd8f1dc3Sbluhm {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':' 4727bd8f1dc3Sbluhm {false, false, "\xBA"}, // [<1>011 1010] 4728bd8f1dc3Sbluhm {true, false, "\x39"}, // [0011 1001] = ASCII nine '9' 4729bd8f1dc3Sbluhm {false, false, "\xB9"}, // [<1>011 1001] 4730bd8f1dc3Sbluhm 4731bd8f1dc3Sbluhm // 2-byte UTF-8: [110x xxxx] [10xx xxxx] 4732bd8f1dc3Sbluhm {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] = 4733bd8f1dc3Sbluhm // Arabic small waw U+06E5 4734bd8f1dc3Sbluhm {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101] 4735bd8f1dc3Sbluhm {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101] 4736bd8f1dc3Sbluhm {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101] 4737bd8f1dc3Sbluhm {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] = 4738bd8f1dc3Sbluhm // combining char U+0301 4739bd8f1dc3Sbluhm {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001] 4740bd8f1dc3Sbluhm {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001] 4741bd8f1dc3Sbluhm {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001] 4742bd8f1dc3Sbluhm 4743bd8f1dc3Sbluhm // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx] 4744bd8f1dc3Sbluhm {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] = 4745bd8f1dc3Sbluhm // Devanagari Letter A U+0905 4746bd8f1dc3Sbluhm {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101] 4747bd8f1dc3Sbluhm {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101] 4748bd8f1dc3Sbluhm {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101] 4749bd8f1dc3Sbluhm {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101] 4750bd8f1dc3Sbluhm {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101] 4751bd8f1dc3Sbluhm {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] = 4752bd8f1dc3Sbluhm // combining char U+0901 4753bd8f1dc3Sbluhm {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001] 4754bd8f1dc3Sbluhm {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001] 4755bd8f1dc3Sbluhm {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001] 4756bd8f1dc3Sbluhm {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001] 4757bd8f1dc3Sbluhm {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001] 4758bd8f1dc3Sbluhm }; 4759bd8f1dc3Sbluhm const bool atNameStart[] = {true, false}; 4760bd8f1dc3Sbluhm 4761bd8f1dc3Sbluhm size_t i = 0; 4762bd8f1dc3Sbluhm char doc[1024]; 4763bd8f1dc3Sbluhm size_t failCount = 0; 4764bd8f1dc3Sbluhm 4765bd8f1dc3Sbluhm // we need all the bytes to be parsed, but we don't want the errors that can 4766bd8f1dc3Sbluhm // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on. 4767bd8f1dc3Sbluhm if (g_reparseDeferralEnabledDefault) { 4768bd8f1dc3Sbluhm return; 4769bd8f1dc3Sbluhm } 4770bd8f1dc3Sbluhm 4771bd8f1dc3Sbluhm for (; i < sizeof(cases) / sizeof(cases[0]); i++) { 4772bd8f1dc3Sbluhm size_t j = 0; 4773bd8f1dc3Sbluhm for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) { 4774bd8f1dc3Sbluhm const bool expectedSuccess 4775bd8f1dc3Sbluhm = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName; 4776bd8f1dc3Sbluhm snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a", 4777bd8f1dc3Sbluhm cases[i].tagName); 4778bd8f1dc3Sbluhm XML_Parser parser = XML_ParserCreate(NULL); 4779bd8f1dc3Sbluhm 4780bd8f1dc3Sbluhm const enum XML_Status status = _XML_Parse_SINGLE_BYTES( 4781bd8f1dc3Sbluhm parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE); 4782bd8f1dc3Sbluhm 4783bd8f1dc3Sbluhm bool success = true; 4784bd8f1dc3Sbluhm if ((status == XML_STATUS_OK) != expectedSuccess) { 4785bd8f1dc3Sbluhm success = false; 4786bd8f1dc3Sbluhm } 4787bd8f1dc3Sbluhm if ((status == XML_STATUS_ERROR) 4788bd8f1dc3Sbluhm && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) { 4789bd8f1dc3Sbluhm success = false; 4790bd8f1dc3Sbluhm } 4791bd8f1dc3Sbluhm 4792bd8f1dc3Sbluhm if (! success) { 4793bd8f1dc3Sbluhm fprintf( 4794bd8f1dc3Sbluhm stderr, 4795bd8f1dc3Sbluhm "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n", 4796bd8f1dc3Sbluhm (unsigned)i + 1u, atNameStart[j] ? " " : "not ", 4797bd8f1dc3Sbluhm (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser)); 4798bd8f1dc3Sbluhm failCount++; 4799bd8f1dc3Sbluhm } 4800bd8f1dc3Sbluhm 4801bd8f1dc3Sbluhm XML_ParserFree(parser); 4802bd8f1dc3Sbluhm } 4803bd8f1dc3Sbluhm } 4804bd8f1dc3Sbluhm 4805bd8f1dc3Sbluhm if (failCount > 0) { 4806bd8f1dc3Sbluhm fail("UTF-8 regression detected"); 4807bd8f1dc3Sbluhm } 4808bd8f1dc3Sbluhm } 4809bd8f1dc3Sbluhm END_TEST 4810bd8f1dc3Sbluhm 4811bd8f1dc3Sbluhm /* Test trailing spaces in elements are accepted */ 4812bd8f1dc3Sbluhm START_TEST(test_trailing_spaces_in_elements) { 4813bd8f1dc3Sbluhm const char *text = "<doc >Hi</doc >"; 4814bd8f1dc3Sbluhm const XML_Char *expected = XCS("doc/doc"); 4815bd8f1dc3Sbluhm CharData storage; 4816bd8f1dc3Sbluhm 4817bd8f1dc3Sbluhm CharData_Init(&storage); 4818bd8f1dc3Sbluhm XML_SetElementHandler(g_parser, record_element_start_handler, 4819bd8f1dc3Sbluhm record_element_end_handler); 4820bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 4821bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4822bd8f1dc3Sbluhm == XML_STATUS_ERROR) 4823bd8f1dc3Sbluhm xml_failure(g_parser); 4824bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 4825bd8f1dc3Sbluhm } 4826bd8f1dc3Sbluhm END_TEST 4827bd8f1dc3Sbluhm 4828bd8f1dc3Sbluhm START_TEST(test_utf16_attribute) { 4829bd8f1dc3Sbluhm const char text[] = 4830bd8f1dc3Sbluhm /* <d {KHO KHWAI}{CHO CHAN}='a'/> 4831bd8f1dc3Sbluhm * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 4832bd8f1dc3Sbluhm * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 4833bd8f1dc3Sbluhm */ 4834bd8f1dc3Sbluhm "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0"; 4835bd8f1dc3Sbluhm const XML_Char *expected = XCS("a"); 4836bd8f1dc3Sbluhm CharData storage; 4837bd8f1dc3Sbluhm 4838bd8f1dc3Sbluhm CharData_Init(&storage); 4839bd8f1dc3Sbluhm XML_SetStartElementHandler(g_parser, accumulate_attribute); 4840bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 4841bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4842bd8f1dc3Sbluhm == XML_STATUS_ERROR) 4843bd8f1dc3Sbluhm xml_failure(g_parser); 4844bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 4845bd8f1dc3Sbluhm } 4846bd8f1dc3Sbluhm END_TEST 4847bd8f1dc3Sbluhm 4848bd8f1dc3Sbluhm START_TEST(test_utf16_second_attr) { 4849bd8f1dc3Sbluhm /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/> 4850bd8f1dc3Sbluhm * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 4851bd8f1dc3Sbluhm * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 4852bd8f1dc3Sbluhm */ 4853bd8f1dc3Sbluhm const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0" 4854bd8f1dc3Sbluhm "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0"; 4855bd8f1dc3Sbluhm const XML_Char *expected = XCS("1"); 4856bd8f1dc3Sbluhm CharData storage; 4857bd8f1dc3Sbluhm 4858bd8f1dc3Sbluhm CharData_Init(&storage); 4859bd8f1dc3Sbluhm XML_SetStartElementHandler(g_parser, accumulate_attribute); 4860bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 4861bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4862bd8f1dc3Sbluhm == XML_STATUS_ERROR) 4863bd8f1dc3Sbluhm xml_failure(g_parser); 4864bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 4865bd8f1dc3Sbluhm } 4866bd8f1dc3Sbluhm END_TEST 4867bd8f1dc3Sbluhm 4868bd8f1dc3Sbluhm START_TEST(test_attr_after_solidus) { 4869bd8f1dc3Sbluhm const char *text = "<doc attr1='a' / attr2='b'>"; 4870bd8f1dc3Sbluhm 4871bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted"); 4872bd8f1dc3Sbluhm } 4873bd8f1dc3Sbluhm END_TEST 4874bd8f1dc3Sbluhm 4875bd8f1dc3Sbluhm START_TEST(test_utf16_pe) { 4876bd8f1dc3Sbluhm /* <!DOCTYPE doc [ 4877bd8f1dc3Sbluhm * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'> 4878bd8f1dc3Sbluhm * %{KHO KHWAI}{CHO CHAN}; 4879bd8f1dc3Sbluhm * ]> 4880bd8f1dc3Sbluhm * <doc></doc> 4881bd8f1dc3Sbluhm * 4882bd8f1dc3Sbluhm * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 4883bd8f1dc3Sbluhm * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 4884bd8f1dc3Sbluhm */ 4885bd8f1dc3Sbluhm const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n" 4886bd8f1dc3Sbluhm "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 " 4887bd8f1dc3Sbluhm "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 " 4888bd8f1dc3Sbluhm "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n" 4889bd8f1dc3Sbluhm "\0%\x0e\x04\x0e\x08\0;\0\n" 4890bd8f1dc3Sbluhm "\0]\0>\0\n" 4891bd8f1dc3Sbluhm "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>"; 4892bd8f1dc3Sbluhm #ifdef XML_UNICODE 4893bd8f1dc3Sbluhm const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n"); 4894bd8f1dc3Sbluhm #else 4895bd8f1dc3Sbluhm const XML_Char *expected 4896bd8f1dc3Sbluhm = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n"); 4897bd8f1dc3Sbluhm #endif 4898bd8f1dc3Sbluhm CharData storage; 4899bd8f1dc3Sbluhm 4900bd8f1dc3Sbluhm CharData_Init(&storage); 4901bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 4902bd8f1dc3Sbluhm XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl); 4903bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4904bd8f1dc3Sbluhm == XML_STATUS_ERROR) 4905bd8f1dc3Sbluhm xml_failure(g_parser); 4906bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 4907bd8f1dc3Sbluhm } 4908bd8f1dc3Sbluhm END_TEST 4909bd8f1dc3Sbluhm 4910bd8f1dc3Sbluhm /* Test that duff attribute description keywords are rejected */ 4911bd8f1dc3Sbluhm START_TEST(test_bad_attr_desc_keyword) { 4912bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 4913bd8f1dc3Sbluhm " <!ATTLIST doc attr CDATA #!IMPLIED>\n" 4914bd8f1dc3Sbluhm "]>\n" 4915bd8f1dc3Sbluhm "<doc />"; 4916bd8f1dc3Sbluhm 4917bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_INVALID_TOKEN, 4918bd8f1dc3Sbluhm "Bad keyword !IMPLIED not faulted"); 4919bd8f1dc3Sbluhm } 4920bd8f1dc3Sbluhm END_TEST 4921bd8f1dc3Sbluhm 4922bd8f1dc3Sbluhm /* Test that an invalid attribute description keyword consisting of 4923bd8f1dc3Sbluhm * UTF-16 characters with their top bytes non-zero are correctly 4924bd8f1dc3Sbluhm * faulted 4925bd8f1dc3Sbluhm */ 4926bd8f1dc3Sbluhm START_TEST(test_bad_attr_desc_keyword_utf16) { 4927bd8f1dc3Sbluhm /* <!DOCTYPE d [ 4928bd8f1dc3Sbluhm * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}> 4929bd8f1dc3Sbluhm * ]><d/> 4930bd8f1dc3Sbluhm * 4931bd8f1dc3Sbluhm * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 4932bd8f1dc3Sbluhm * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 4933bd8f1dc3Sbluhm */ 4934bd8f1dc3Sbluhm const char text[] 4935bd8f1dc3Sbluhm = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n" 4936bd8f1dc3Sbluhm "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 " 4937bd8f1dc3Sbluhm "\0#\x0e\x04\x0e\x08\0>\0\n" 4938bd8f1dc3Sbluhm "\0]\0>\0<\0d\0/\0>"; 4939bd8f1dc3Sbluhm 4940bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4941bd8f1dc3Sbluhm != XML_STATUS_ERROR) 4942bd8f1dc3Sbluhm fail("Invalid UTF16 attribute keyword not faulted"); 4943bd8f1dc3Sbluhm if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX) 4944bd8f1dc3Sbluhm xml_failure(g_parser); 4945bd8f1dc3Sbluhm } 4946bd8f1dc3Sbluhm END_TEST 4947bd8f1dc3Sbluhm 4948bd8f1dc3Sbluhm /* Test that invalid syntax in a <!DOCTYPE> is rejected. Do this 4949bd8f1dc3Sbluhm * using prefix-encoding (see above) to trigger specific code paths 4950bd8f1dc3Sbluhm */ 4951bd8f1dc3Sbluhm START_TEST(test_bad_doctype) { 4952bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4953bd8f1dc3Sbluhm "<!DOCTYPE doc [ \x80\x44 ]><doc/>"; 4954bd8f1dc3Sbluhm 4955bd8f1dc3Sbluhm XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4956bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_SYNTAX, 4957bd8f1dc3Sbluhm "Invalid bytes in DOCTYPE not faulted"); 4958bd8f1dc3Sbluhm } 4959bd8f1dc3Sbluhm END_TEST 4960bd8f1dc3Sbluhm 4961bd8f1dc3Sbluhm START_TEST(test_bad_doctype_utf8) { 4962bd8f1dc3Sbluhm const char *text = "<!DOCTYPE \xDB\x25" 4963bd8f1dc3Sbluhm "doc><doc/>"; // [1101 1011] [<0>010 0101] 4964bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_INVALID_TOKEN, 4965bd8f1dc3Sbluhm "Invalid UTF-8 in DOCTYPE not faulted"); 4966bd8f1dc3Sbluhm } 4967bd8f1dc3Sbluhm END_TEST 4968bd8f1dc3Sbluhm 4969bd8f1dc3Sbluhm START_TEST(test_bad_doctype_utf16) { 4970bd8f1dc3Sbluhm const char text[] = 4971bd8f1dc3Sbluhm /* <!DOCTYPE doc [ \x06f2 ]><doc/> 4972bd8f1dc3Sbluhm * 4973bd8f1dc3Sbluhm * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number 4974bd8f1dc3Sbluhm * (name character) but not a valid letter (name start character) 4975bd8f1dc3Sbluhm */ 4976bd8f1dc3Sbluhm "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 " 4977bd8f1dc3Sbluhm "\x06\xf2" 4978bd8f1dc3Sbluhm "\0 \0]\0>\0<\0d\0o\0c\0/\0>"; 4979bd8f1dc3Sbluhm 4980bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4981bd8f1dc3Sbluhm != XML_STATUS_ERROR) 4982bd8f1dc3Sbluhm fail("Invalid bytes in DOCTYPE not faulted"); 4983bd8f1dc3Sbluhm if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX) 4984bd8f1dc3Sbluhm xml_failure(g_parser); 4985bd8f1dc3Sbluhm } 4986bd8f1dc3Sbluhm END_TEST 4987bd8f1dc3Sbluhm 4988bd8f1dc3Sbluhm START_TEST(test_bad_doctype_plus) { 4989bd8f1dc3Sbluhm const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n" 4990bd8f1dc3Sbluhm "<1+>&foo;</1+>"; 4991bd8f1dc3Sbluhm 4992bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_INVALID_TOKEN, 4993bd8f1dc3Sbluhm "'+' in document name not faulted"); 4994bd8f1dc3Sbluhm } 4995bd8f1dc3Sbluhm END_TEST 4996bd8f1dc3Sbluhm 4997bd8f1dc3Sbluhm START_TEST(test_bad_doctype_star) { 4998bd8f1dc3Sbluhm const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n" 4999bd8f1dc3Sbluhm "<1*>&foo;</1*>"; 5000bd8f1dc3Sbluhm 5001bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_INVALID_TOKEN, 5002bd8f1dc3Sbluhm "'*' in document name not faulted"); 5003bd8f1dc3Sbluhm } 5004bd8f1dc3Sbluhm END_TEST 5005bd8f1dc3Sbluhm 5006bd8f1dc3Sbluhm START_TEST(test_bad_doctype_query) { 5007bd8f1dc3Sbluhm const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n" 5008bd8f1dc3Sbluhm "<1?>&foo;</1?>"; 5009bd8f1dc3Sbluhm 5010bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_INVALID_TOKEN, 5011bd8f1dc3Sbluhm "'?' in document name not faulted"); 5012bd8f1dc3Sbluhm } 5013bd8f1dc3Sbluhm END_TEST 5014bd8f1dc3Sbluhm 5015bd8f1dc3Sbluhm START_TEST(test_unknown_encoding_bad_ignore) { 5016bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='prefix-conv'?>" 5017bd8f1dc3Sbluhm "<!DOCTYPE doc SYSTEM 'foo'>" 5018bd8f1dc3Sbluhm "<doc><e>&entity;</e></doc>"; 5019bd8f1dc3Sbluhm ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>", 5020bd8f1dc3Sbluhm "Invalid character not faulted", XCS("prefix-conv"), 5021bd8f1dc3Sbluhm XML_ERROR_INVALID_TOKEN}; 5022bd8f1dc3Sbluhm 5023bd8f1dc3Sbluhm XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 5024bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5025bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 5026bd8f1dc3Sbluhm XML_SetUserData(g_parser, &fault); 5027bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 5028bd8f1dc3Sbluhm "Bad IGNORE section with unknown encoding not failed"); 5029bd8f1dc3Sbluhm } 5030bd8f1dc3Sbluhm END_TEST 5031bd8f1dc3Sbluhm 5032bd8f1dc3Sbluhm START_TEST(test_entity_in_utf16_be_attr) { 5033bd8f1dc3Sbluhm const char text[] = 5034bd8f1dc3Sbluhm /* <e a='ä ä'></e> */ 5035bd8f1dc3Sbluhm "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 " 5036bd8f1dc3Sbluhm "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>"; 5037bd8f1dc3Sbluhm #ifdef XML_UNICODE 5038bd8f1dc3Sbluhm const XML_Char *expected = XCS("\x00e4 \x00e4"); 5039bd8f1dc3Sbluhm #else 5040bd8f1dc3Sbluhm const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4"); 5041bd8f1dc3Sbluhm #endif 5042bd8f1dc3Sbluhm CharData storage; 5043bd8f1dc3Sbluhm 5044bd8f1dc3Sbluhm CharData_Init(&storage); 5045bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 5046bd8f1dc3Sbluhm XML_SetStartElementHandler(g_parser, accumulate_attribute); 5047bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5048bd8f1dc3Sbluhm == XML_STATUS_ERROR) 5049bd8f1dc3Sbluhm xml_failure(g_parser); 5050bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 5051bd8f1dc3Sbluhm } 5052bd8f1dc3Sbluhm END_TEST 5053bd8f1dc3Sbluhm 5054bd8f1dc3Sbluhm START_TEST(test_entity_in_utf16_le_attr) { 5055bd8f1dc3Sbluhm const char text[] = 5056bd8f1dc3Sbluhm /* <e a='ä ä'></e> */ 5057bd8f1dc3Sbluhm "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0" 5058bd8f1dc3Sbluhm "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0"; 5059bd8f1dc3Sbluhm #ifdef XML_UNICODE 5060bd8f1dc3Sbluhm const XML_Char *expected = XCS("\x00e4 \x00e4"); 5061bd8f1dc3Sbluhm #else 5062bd8f1dc3Sbluhm const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4"); 5063bd8f1dc3Sbluhm #endif 5064bd8f1dc3Sbluhm CharData storage; 5065bd8f1dc3Sbluhm 5066bd8f1dc3Sbluhm CharData_Init(&storage); 5067bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 5068bd8f1dc3Sbluhm XML_SetStartElementHandler(g_parser, accumulate_attribute); 5069bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5070bd8f1dc3Sbluhm == XML_STATUS_ERROR) 5071bd8f1dc3Sbluhm xml_failure(g_parser); 5072bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 5073bd8f1dc3Sbluhm } 5074bd8f1dc3Sbluhm END_TEST 5075bd8f1dc3Sbluhm 5076bd8f1dc3Sbluhm START_TEST(test_entity_public_utf16_be) { 5077bd8f1dc3Sbluhm const char text[] = 5078bd8f1dc3Sbluhm /* <!DOCTYPE d [ */ 5079bd8f1dc3Sbluhm "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n" 5080bd8f1dc3Sbluhm /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */ 5081bd8f1dc3Sbluhm "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 " 5082bd8f1dc3Sbluhm "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n" 5083bd8f1dc3Sbluhm /* %e; */ 5084bd8f1dc3Sbluhm "\0%\0e\0;\0\n" 5085bd8f1dc3Sbluhm /* ]> */ 5086bd8f1dc3Sbluhm "\0]\0>\0\n" 5087bd8f1dc3Sbluhm /* <d>&j;</d> */ 5088bd8f1dc3Sbluhm "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>"; 5089bd8f1dc3Sbluhm ExtTest2 test_data 5090bd8f1dc3Sbluhm = {/* <!ENTITY j 'baz'> */ 5091bd8f1dc3Sbluhm "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL}; 5092bd8f1dc3Sbluhm const XML_Char *expected = XCS("baz"); 5093bd8f1dc3Sbluhm CharData storage; 5094bd8f1dc3Sbluhm 5095bd8f1dc3Sbluhm CharData_Init(&storage); 5096bd8f1dc3Sbluhm test_data.storage = &storage; 5097bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5098bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 5099bd8f1dc3Sbluhm XML_SetUserData(g_parser, &test_data); 5100bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 5101bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5102bd8f1dc3Sbluhm == XML_STATUS_ERROR) 5103bd8f1dc3Sbluhm xml_failure(g_parser); 5104bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 5105bd8f1dc3Sbluhm } 5106bd8f1dc3Sbluhm END_TEST 5107bd8f1dc3Sbluhm 5108bd8f1dc3Sbluhm START_TEST(test_entity_public_utf16_le) { 5109bd8f1dc3Sbluhm const char text[] = 5110bd8f1dc3Sbluhm /* <!DOCTYPE d [ */ 5111bd8f1dc3Sbluhm "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0" 5112bd8f1dc3Sbluhm /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */ 5113bd8f1dc3Sbluhm "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0" 5114bd8f1dc3Sbluhm "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0" 5115bd8f1dc3Sbluhm /* %e; */ 5116bd8f1dc3Sbluhm "%\0e\0;\0\n\0" 5117bd8f1dc3Sbluhm /* ]> */ 5118bd8f1dc3Sbluhm "]\0>\0\n\0" 5119bd8f1dc3Sbluhm /* <d>&j;</d> */ 5120bd8f1dc3Sbluhm "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0"; 5121bd8f1dc3Sbluhm ExtTest2 test_data 5122bd8f1dc3Sbluhm = {/* <!ENTITY j 'baz'> */ 5123bd8f1dc3Sbluhm "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL}; 5124bd8f1dc3Sbluhm const XML_Char *expected = XCS("baz"); 5125bd8f1dc3Sbluhm CharData storage; 5126bd8f1dc3Sbluhm 5127bd8f1dc3Sbluhm CharData_Init(&storage); 5128bd8f1dc3Sbluhm test_data.storage = &storage; 5129bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5130bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 5131bd8f1dc3Sbluhm XML_SetUserData(g_parser, &test_data); 5132bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 5133bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5134bd8f1dc3Sbluhm == XML_STATUS_ERROR) 5135bd8f1dc3Sbluhm xml_failure(g_parser); 5136bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 5137bd8f1dc3Sbluhm } 5138bd8f1dc3Sbluhm END_TEST 5139bd8f1dc3Sbluhm 5140bd8f1dc3Sbluhm /* Test that a doctype with neither an internal nor external subset is 5141bd8f1dc3Sbluhm * faulted 5142bd8f1dc3Sbluhm */ 5143bd8f1dc3Sbluhm START_TEST(test_short_doctype) { 5144bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc></doc>"; 5145bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_INVALID_TOKEN, 5146bd8f1dc3Sbluhm "DOCTYPE without subset not rejected"); 5147bd8f1dc3Sbluhm } 5148bd8f1dc3Sbluhm END_TEST 5149bd8f1dc3Sbluhm 5150bd8f1dc3Sbluhm START_TEST(test_short_doctype_2) { 5151bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc PUBLIC></doc>"; 5152bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_SYNTAX, 5153bd8f1dc3Sbluhm "DOCTYPE without Public ID not rejected"); 5154bd8f1dc3Sbluhm } 5155bd8f1dc3Sbluhm END_TEST 5156bd8f1dc3Sbluhm 5157bd8f1dc3Sbluhm START_TEST(test_short_doctype_3) { 5158bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc SYSTEM></doc>"; 5159bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_SYNTAX, 5160bd8f1dc3Sbluhm "DOCTYPE without System ID not rejected"); 5161bd8f1dc3Sbluhm } 5162bd8f1dc3Sbluhm END_TEST 5163bd8f1dc3Sbluhm 5164bd8f1dc3Sbluhm START_TEST(test_long_doctype) { 5165bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>"; 5166bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected"); 5167bd8f1dc3Sbluhm } 5168bd8f1dc3Sbluhm END_TEST 5169bd8f1dc3Sbluhm 5170bd8f1dc3Sbluhm START_TEST(test_bad_entity) { 5171bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 5172bd8f1dc3Sbluhm " <!ENTITY foo PUBLIC>\n" 5173bd8f1dc3Sbluhm "]>\n" 5174bd8f1dc3Sbluhm "<doc/>"; 5175bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_SYNTAX, 5176bd8f1dc3Sbluhm "ENTITY without Public ID is not rejected"); 5177bd8f1dc3Sbluhm } 5178bd8f1dc3Sbluhm END_TEST 5179bd8f1dc3Sbluhm 5180bd8f1dc3Sbluhm /* Test unquoted value is faulted */ 5181bd8f1dc3Sbluhm START_TEST(test_bad_entity_2) { 5182bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 5183bd8f1dc3Sbluhm " <!ENTITY % foo bar>\n" 5184bd8f1dc3Sbluhm "]>\n" 5185bd8f1dc3Sbluhm "<doc/>"; 5186bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_SYNTAX, 5187bd8f1dc3Sbluhm "ENTITY without Public ID is not rejected"); 5188bd8f1dc3Sbluhm } 5189bd8f1dc3Sbluhm END_TEST 5190bd8f1dc3Sbluhm 5191bd8f1dc3Sbluhm START_TEST(test_bad_entity_3) { 5192bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 5193bd8f1dc3Sbluhm " <!ENTITY % foo PUBLIC>\n" 5194bd8f1dc3Sbluhm "]>\n" 5195bd8f1dc3Sbluhm "<doc/>"; 5196bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_SYNTAX, 5197bd8f1dc3Sbluhm "Parameter ENTITY without Public ID is not rejected"); 5198bd8f1dc3Sbluhm } 5199bd8f1dc3Sbluhm END_TEST 5200bd8f1dc3Sbluhm 5201bd8f1dc3Sbluhm START_TEST(test_bad_entity_4) { 5202bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 5203bd8f1dc3Sbluhm " <!ENTITY % foo SYSTEM>\n" 5204bd8f1dc3Sbluhm "]>\n" 5205bd8f1dc3Sbluhm "<doc/>"; 5206bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_SYNTAX, 5207bd8f1dc3Sbluhm "Parameter ENTITY without Public ID is not rejected"); 5208bd8f1dc3Sbluhm } 5209bd8f1dc3Sbluhm END_TEST 5210bd8f1dc3Sbluhm 5211bd8f1dc3Sbluhm START_TEST(test_bad_notation) { 5212bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc [\n" 5213bd8f1dc3Sbluhm " <!NOTATION n SYSTEM>\n" 5214bd8f1dc3Sbluhm "]>\n" 5215bd8f1dc3Sbluhm "<doc/>"; 5216bd8f1dc3Sbluhm expect_failure(text, XML_ERROR_SYNTAX, 5217bd8f1dc3Sbluhm "Notation without System ID is not rejected"); 5218bd8f1dc3Sbluhm } 5219bd8f1dc3Sbluhm END_TEST 5220bd8f1dc3Sbluhm 5221bd8f1dc3Sbluhm /* Test for issue #11, wrongly suppressed default handler */ 5222bd8f1dc3Sbluhm START_TEST(test_default_doctype_handler) { 5223bd8f1dc3Sbluhm const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n" 5224bd8f1dc3Sbluhm " <!ENTITY foo 'bar'>\n" 5225bd8f1dc3Sbluhm "]>\n" 5226bd8f1dc3Sbluhm "<doc>&foo;</doc>"; 5227bd8f1dc3Sbluhm DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE}, 5228bd8f1dc3Sbluhm {XCS("'test.dtd'"), 10, XML_FALSE}, 5229bd8f1dc3Sbluhm {NULL, 0, XML_FALSE}}; 5230bd8f1dc3Sbluhm int i; 5231bd8f1dc3Sbluhm 5232bd8f1dc3Sbluhm XML_SetUserData(g_parser, &test_data); 5233bd8f1dc3Sbluhm XML_SetDefaultHandler(g_parser, checking_default_handler); 5234bd8f1dc3Sbluhm XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler); 5235bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5236bd8f1dc3Sbluhm == XML_STATUS_ERROR) 5237bd8f1dc3Sbluhm xml_failure(g_parser); 5238bd8f1dc3Sbluhm for (i = 0; test_data[i].expected != NULL; i++) 5239bd8f1dc3Sbluhm if (! test_data[i].seen) 5240bd8f1dc3Sbluhm fail("Default handler not run for public !DOCTYPE"); 5241bd8f1dc3Sbluhm } 5242bd8f1dc3Sbluhm END_TEST 5243bd8f1dc3Sbluhm 5244bd8f1dc3Sbluhm START_TEST(test_empty_element_abort) { 5245bd8f1dc3Sbluhm const char *text = "<abort/>"; 5246bd8f1dc3Sbluhm 5247bd8f1dc3Sbluhm XML_SetStartElementHandler(g_parser, start_element_suspender); 5248bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5249bd8f1dc3Sbluhm != XML_STATUS_ERROR) 5250bd8f1dc3Sbluhm fail("Expected to error on abort"); 5251bd8f1dc3Sbluhm } 5252bd8f1dc3Sbluhm END_TEST 5253bd8f1dc3Sbluhm 5254bd8f1dc3Sbluhm /* Regression test for GH issue #612: unfinished m_declAttributeType 5255bd8f1dc3Sbluhm * allocation in ->m_tempPool can corrupt following allocation. 5256bd8f1dc3Sbluhm */ 5257bd8f1dc3Sbluhm START_TEST(test_pool_integrity_with_unfinished_attr) { 5258bd8f1dc3Sbluhm const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n" 5259bd8f1dc3Sbluhm "<!DOCTYPE foo [\n" 5260bd8f1dc3Sbluhm "<!ELEMENT foo ANY>\n" 5261bd8f1dc3Sbluhm "<!ENTITY % entp SYSTEM \"external.dtd\">\n" 5262bd8f1dc3Sbluhm "%entp;\n" 5263bd8f1dc3Sbluhm "]>\n" 5264bd8f1dc3Sbluhm "<a></a>\n"; 5265bd8f1dc3Sbluhm const XML_Char *expected = XCS("COMMENT"); 5266bd8f1dc3Sbluhm CharData storage; 5267bd8f1dc3Sbluhm 5268bd8f1dc3Sbluhm CharData_Init(&storage); 5269bd8f1dc3Sbluhm XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5270bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist); 5271bd8f1dc3Sbluhm XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler); 5272bd8f1dc3Sbluhm XML_SetCommentHandler(g_parser, accumulate_comment); 5273bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 5274bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5275bd8f1dc3Sbluhm == XML_STATUS_ERROR) 5276bd8f1dc3Sbluhm xml_failure(g_parser); 5277bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 5278bd8f1dc3Sbluhm } 5279bd8f1dc3Sbluhm END_TEST 5280bd8f1dc3Sbluhm 5281bd8f1dc3Sbluhm START_TEST(test_nested_entity_suspend) { 5282bd8f1dc3Sbluhm const char *const text = "<!DOCTYPE a [\n" 5283bd8f1dc3Sbluhm " <!ENTITY e1 '<!--e1-->'>\n" 5284bd8f1dc3Sbluhm " <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n" 5285bd8f1dc3Sbluhm " <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n" 5286bd8f1dc3Sbluhm "]>\n" 5287bd8f1dc3Sbluhm "<a><!--start-->&e3;<!--end--></a>"; 5288bd8f1dc3Sbluhm const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head") 5289bd8f1dc3Sbluhm XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end"); 5290bd8f1dc3Sbluhm CharData storage; 5291bd8f1dc3Sbluhm CharData_Init(&storage); 5292bd8f1dc3Sbluhm XML_Parser parser = XML_ParserCreate(NULL); 5293bd8f1dc3Sbluhm ParserPlusStorage parserPlusStorage = {parser, &storage}; 5294bd8f1dc3Sbluhm 5295bd8f1dc3Sbluhm XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5296bd8f1dc3Sbluhm XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler); 5297bd8f1dc3Sbluhm XML_SetUserData(parser, &parserPlusStorage); 5298bd8f1dc3Sbluhm 5299bd8f1dc3Sbluhm enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE); 5300bd8f1dc3Sbluhm while (status == XML_STATUS_SUSPENDED) { 5301bd8f1dc3Sbluhm status = XML_ResumeParser(parser); 5302bd8f1dc3Sbluhm } 5303bd8f1dc3Sbluhm if (status != XML_STATUS_OK) 5304bd8f1dc3Sbluhm xml_failure(parser); 5305bd8f1dc3Sbluhm 5306bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 5307bd8f1dc3Sbluhm XML_ParserFree(parser); 5308bd8f1dc3Sbluhm } 5309bd8f1dc3Sbluhm END_TEST 5310bd8f1dc3Sbluhm 5311f558d286Sbluhm #if defined(XML_TESTING) 5312bd8f1dc3Sbluhm /* Regression test for quadratic parsing on large tokens */ 5313c033f770Sbluhm START_TEST(test_big_tokens_scale_linearly) { 5314bd8f1dc3Sbluhm const struct { 5315bd8f1dc3Sbluhm const char *pre; 5316bd8f1dc3Sbluhm const char *post; 5317bd8f1dc3Sbluhm } text[] = { 5318bd8f1dc3Sbluhm {"<a>", "</a>"}, // assumed good, used as baseline 5319bd8f1dc3Sbluhm {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch 5320bd8f1dc3Sbluhm {"<c attr='", "'></c>"}, // big attribute, used to be O(N²) 5321bd8f1dc3Sbluhm {"<d><!-- ", " --></d>"}, // long comment, used to be O(N²) 5322bd8f1dc3Sbluhm {"<e><", "/></e>"}, // big elem name, used to be O(N²) 5323bd8f1dc3Sbluhm }; 5324bd8f1dc3Sbluhm const int num_cases = sizeof(text) / sizeof(text[0]); 5325bd8f1dc3Sbluhm char aaaaaa[4096]; 5326bd8f1dc3Sbluhm const int fillsize = (int)sizeof(aaaaaa); 5327bd8f1dc3Sbluhm const int fillcount = 100; 5328c033f770Sbluhm const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post. 5329c033f770Sbluhm const unsigned max_factor = 4; 5330c033f770Sbluhm const unsigned max_scanned = max_factor * approx_bytes; 5331bd8f1dc3Sbluhm 5332bd8f1dc3Sbluhm memset(aaaaaa, 'a', fillsize); 5333bd8f1dc3Sbluhm 5334bd8f1dc3Sbluhm if (! g_reparseDeferralEnabledDefault) { 5335bd8f1dc3Sbluhm return; // heuristic is disabled; we would get O(n^2) and fail. 5336bd8f1dc3Sbluhm } 5337bd8f1dc3Sbluhm 5338bd8f1dc3Sbluhm for (int i = 0; i < num_cases; ++i) { 5339bd8f1dc3Sbluhm XML_Parser parser = XML_ParserCreate(NULL); 5340bd8f1dc3Sbluhm assert_true(parser != NULL); 5341bd8f1dc3Sbluhm enum XML_Status status; 5342c033f770Sbluhm set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post); 5343bd8f1dc3Sbluhm 5344bd8f1dc3Sbluhm // parse the start text 5345c033f770Sbluhm g_bytesScanned = 0; 5346bd8f1dc3Sbluhm status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre, 5347bd8f1dc3Sbluhm (int)strlen(text[i].pre), XML_FALSE); 5348bd8f1dc3Sbluhm if (status != XML_STATUS_OK) { 5349bd8f1dc3Sbluhm xml_failure(parser); 5350bd8f1dc3Sbluhm } 5351c033f770Sbluhm 5352bd8f1dc3Sbluhm // parse lots of 'a', failing the test early if it takes too long 5353c033f770Sbluhm unsigned past_max_count = 0; 5354bd8f1dc3Sbluhm for (int f = 0; f < fillcount; ++f) { 5355bd8f1dc3Sbluhm status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE); 5356bd8f1dc3Sbluhm if (status != XML_STATUS_OK) { 5357bd8f1dc3Sbluhm xml_failure(parser); 5358bd8f1dc3Sbluhm } 5359c033f770Sbluhm if (g_bytesScanned > max_scanned) { 5360c033f770Sbluhm // We're not done, and have already passed the limit -- the test will 5361c033f770Sbluhm // definitely fail. This block allows us to save time by failing early. 5362c033f770Sbluhm const unsigned pushed 5363c033f770Sbluhm = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize; 5364bd8f1dc3Sbluhm fprintf( 5365bd8f1dc3Sbluhm stderr, 5366c033f770Sbluhm "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n", 5367c033f770Sbluhm f + 1, fillcount, pushed, g_bytesScanned, 5368c033f770Sbluhm g_bytesScanned / (double)pushed, max_scanned, max_factor); 5369c033f770Sbluhm past_max_count++; 5370c033f770Sbluhm // We are failing, but allow a few log prints first. If we don't reach 5371c033f770Sbluhm // a count of five, the test will fail after the loop instead. 5372c033f770Sbluhm assert_true(past_max_count < 5); 5373bd8f1dc3Sbluhm } 5374bd8f1dc3Sbluhm } 5375c033f770Sbluhm 5376bd8f1dc3Sbluhm // parse the end text 5377bd8f1dc3Sbluhm status = _XML_Parse_SINGLE_BYTES(parser, text[i].post, 5378bd8f1dc3Sbluhm (int)strlen(text[i].post), XML_TRUE); 5379bd8f1dc3Sbluhm if (status != XML_STATUS_OK) { 5380bd8f1dc3Sbluhm xml_failure(parser); 5381bd8f1dc3Sbluhm } 5382bd8f1dc3Sbluhm 5383c033f770Sbluhm assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working 5384c033f770Sbluhm if (g_bytesScanned > max_scanned) { 5385c033f770Sbluhm fprintf( 5386c033f770Sbluhm stderr, 5387c033f770Sbluhm "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n", 5388c033f770Sbluhm g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned, 5389c033f770Sbluhm max_factor); 5390c033f770Sbluhm fail("scanned too many bytes"); 5391bd8f1dc3Sbluhm } 5392bd8f1dc3Sbluhm 5393bd8f1dc3Sbluhm XML_ParserFree(parser); 5394bd8f1dc3Sbluhm } 5395bd8f1dc3Sbluhm } 5396bd8f1dc3Sbluhm END_TEST 5397f558d286Sbluhm #endif 5398bd8f1dc3Sbluhm 5399bd8f1dc3Sbluhm START_TEST(test_set_reparse_deferral) { 5400bd8f1dc3Sbluhm const char *const pre = "<d>"; 5401bd8f1dc3Sbluhm const char *const start = "<x attr='"; 5402bd8f1dc3Sbluhm const char *const end = "'></x>"; 5403bd8f1dc3Sbluhm char eeeeee[100]; 5404bd8f1dc3Sbluhm const int fillsize = (int)sizeof(eeeeee); 5405bd8f1dc3Sbluhm memset(eeeeee, 'e', fillsize); 5406bd8f1dc3Sbluhm 5407bd8f1dc3Sbluhm for (int enabled = 0; enabled <= 1; enabled += 1) { 5408bd8f1dc3Sbluhm set_subtest("deferral=%d", enabled); 5409bd8f1dc3Sbluhm 5410bd8f1dc3Sbluhm XML_Parser parser = XML_ParserCreate(NULL); 5411bd8f1dc3Sbluhm assert_true(parser != NULL); 5412bd8f1dc3Sbluhm assert_true(XML_SetReparseDeferralEnabled(parser, enabled)); 5413bd8f1dc3Sbluhm // pre-grow the buffer to avoid reparsing due to almost-fullness 5414bd8f1dc3Sbluhm assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL); 5415bd8f1dc3Sbluhm 5416bd8f1dc3Sbluhm CharData storage; 5417bd8f1dc3Sbluhm CharData_Init(&storage); 5418bd8f1dc3Sbluhm XML_SetUserData(parser, &storage); 5419bd8f1dc3Sbluhm XML_SetStartElementHandler(parser, start_element_event_handler); 5420bd8f1dc3Sbluhm 5421bd8f1dc3Sbluhm enum XML_Status status; 5422bd8f1dc3Sbluhm // parse the start text 5423bd8f1dc3Sbluhm status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); 5424bd8f1dc3Sbluhm if (status != XML_STATUS_OK) { 5425bd8f1dc3Sbluhm xml_failure(parser); 5426bd8f1dc3Sbluhm } 5427bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done 5428bd8f1dc3Sbluhm 5429bd8f1dc3Sbluhm // ..and the start of the token 5430bd8f1dc3Sbluhm status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE); 5431bd8f1dc3Sbluhm if (status != XML_STATUS_OK) { 5432bd8f1dc3Sbluhm xml_failure(parser); 5433bd8f1dc3Sbluhm } 5434bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one 5435bd8f1dc3Sbluhm 5436bd8f1dc3Sbluhm // try to parse lots of 'e', but the token isn't finished 5437bd8f1dc3Sbluhm for (int c = 0; c < 100; ++c) { 5438bd8f1dc3Sbluhm status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); 5439bd8f1dc3Sbluhm if (status != XML_STATUS_OK) { 5440bd8f1dc3Sbluhm xml_failure(parser); 5441bd8f1dc3Sbluhm } 5442bd8f1dc3Sbluhm } 5443bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one 5444bd8f1dc3Sbluhm 5445bd8f1dc3Sbluhm // end the <x> token. 5446bd8f1dc3Sbluhm status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); 5447bd8f1dc3Sbluhm if (status != XML_STATUS_OK) { 5448bd8f1dc3Sbluhm xml_failure(parser); 5449bd8f1dc3Sbluhm } 5450bd8f1dc3Sbluhm 5451bd8f1dc3Sbluhm if (enabled) { 5452bd8f1dc3Sbluhm // In general, we may need to push more data to trigger a reparse attempt, 5453bd8f1dc3Sbluhm // but in this test, the data is constructed to always require it. 5454bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect 5455bd8f1dc3Sbluhm // 2x the token length should suffice; the +1 covers the start and end. 5456bd8f1dc3Sbluhm for (int c = 0; c < 101; ++c) { 5457bd8f1dc3Sbluhm status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); 5458bd8f1dc3Sbluhm if (status != XML_STATUS_OK) { 5459bd8f1dc3Sbluhm xml_failure(parser); 5460bd8f1dc3Sbluhm } 5461bd8f1dc3Sbluhm } 5462bd8f1dc3Sbluhm } 5463bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done 5464bd8f1dc3Sbluhm 5465bd8f1dc3Sbluhm XML_ParserFree(parser); 5466bd8f1dc3Sbluhm } 5467bd8f1dc3Sbluhm } 5468bd8f1dc3Sbluhm END_TEST 5469bd8f1dc3Sbluhm 5470bd8f1dc3Sbluhm struct element_decl_data { 5471bd8f1dc3Sbluhm XML_Parser parser; 5472bd8f1dc3Sbluhm int count; 5473bd8f1dc3Sbluhm }; 5474bd8f1dc3Sbluhm 5475bd8f1dc3Sbluhm static void 5476bd8f1dc3Sbluhm element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) { 5477bd8f1dc3Sbluhm UNUSED_P(name); 5478bd8f1dc3Sbluhm struct element_decl_data *testdata = (struct element_decl_data *)userData; 5479bd8f1dc3Sbluhm testdata->count += 1; 5480bd8f1dc3Sbluhm XML_FreeContentModel(testdata->parser, model); 5481bd8f1dc3Sbluhm } 5482bd8f1dc3Sbluhm 5483bd8f1dc3Sbluhm static int 5484bd8f1dc3Sbluhm external_inherited_parser(XML_Parser p, const XML_Char *context, 5485bd8f1dc3Sbluhm const XML_Char *base, const XML_Char *systemId, 5486bd8f1dc3Sbluhm const XML_Char *publicId) { 5487bd8f1dc3Sbluhm UNUSED_P(base); 5488bd8f1dc3Sbluhm UNUSED_P(systemId); 5489bd8f1dc3Sbluhm UNUSED_P(publicId); 5490bd8f1dc3Sbluhm const char *const pre = "<!ELEMENT document ANY>\n"; 5491bd8f1dc3Sbluhm const char *const start = "<!ELEMENT "; 5492bd8f1dc3Sbluhm const char *const end = " ANY>\n"; 5493bd8f1dc3Sbluhm const char *const post = "<!ELEMENT xyz ANY>\n"; 5494bd8f1dc3Sbluhm const int enabled = *(int *)XML_GetUserData(p); 5495bd8f1dc3Sbluhm char eeeeee[100]; 5496bd8f1dc3Sbluhm char spaces[100]; 5497bd8f1dc3Sbluhm const int fillsize = (int)sizeof(eeeeee); 5498bd8f1dc3Sbluhm assert_true(fillsize == (int)sizeof(spaces)); 5499bd8f1dc3Sbluhm memset(eeeeee, 'e', fillsize); 5500bd8f1dc3Sbluhm memset(spaces, ' ', fillsize); 5501bd8f1dc3Sbluhm 5502bd8f1dc3Sbluhm XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL); 5503bd8f1dc3Sbluhm assert_true(parser != NULL); 5504bd8f1dc3Sbluhm // pre-grow the buffer to avoid reparsing due to almost-fullness 5505bd8f1dc3Sbluhm assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL); 5506bd8f1dc3Sbluhm 5507bd8f1dc3Sbluhm struct element_decl_data testdata; 5508bd8f1dc3Sbluhm testdata.parser = parser; 5509bd8f1dc3Sbluhm testdata.count = 0; 5510bd8f1dc3Sbluhm XML_SetUserData(parser, &testdata); 5511bd8f1dc3Sbluhm XML_SetElementDeclHandler(parser, element_decl_counter); 5512bd8f1dc3Sbluhm 5513bd8f1dc3Sbluhm enum XML_Status status; 5514bd8f1dc3Sbluhm // parse the initial text 5515bd8f1dc3Sbluhm status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); 5516bd8f1dc3Sbluhm if (status != XML_STATUS_OK) { 5517bd8f1dc3Sbluhm xml_failure(parser); 5518bd8f1dc3Sbluhm } 5519bd8f1dc3Sbluhm assert_true(testdata.count == 1); // first element should be done 5520bd8f1dc3Sbluhm 5521bd8f1dc3Sbluhm // ..and the start of the big token 5522bd8f1dc3Sbluhm status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE); 5523bd8f1dc3Sbluhm if (status != XML_STATUS_OK) { 5524bd8f1dc3Sbluhm xml_failure(parser); 5525bd8f1dc3Sbluhm } 5526bd8f1dc3Sbluhm assert_true(testdata.count == 1); // still just the first one 5527bd8f1dc3Sbluhm 5528bd8f1dc3Sbluhm // try to parse lots of 'e', but the token isn't finished 5529bd8f1dc3Sbluhm for (int c = 0; c < 100; ++c) { 5530bd8f1dc3Sbluhm status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); 5531bd8f1dc3Sbluhm if (status != XML_STATUS_OK) { 5532bd8f1dc3Sbluhm xml_failure(parser); 5533bd8f1dc3Sbluhm } 5534bd8f1dc3Sbluhm } 5535bd8f1dc3Sbluhm assert_true(testdata.count == 1); // *still* just the first one 5536bd8f1dc3Sbluhm 5537bd8f1dc3Sbluhm // end the big token. 5538bd8f1dc3Sbluhm status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); 5539bd8f1dc3Sbluhm if (status != XML_STATUS_OK) { 5540bd8f1dc3Sbluhm xml_failure(parser); 5541bd8f1dc3Sbluhm } 5542bd8f1dc3Sbluhm 5543bd8f1dc3Sbluhm if (enabled) { 5544bd8f1dc3Sbluhm // In general, we may need to push more data to trigger a reparse attempt, 5545bd8f1dc3Sbluhm // but in this test, the data is constructed to always require it. 5546bd8f1dc3Sbluhm assert_true(testdata.count == 1); // or the test is incorrect 5547bd8f1dc3Sbluhm // 2x the token length should suffice; the +1 covers the start and end. 5548bd8f1dc3Sbluhm for (int c = 0; c < 101; ++c) { 5549bd8f1dc3Sbluhm status = XML_Parse(parser, spaces, fillsize, XML_FALSE); 5550bd8f1dc3Sbluhm if (status != XML_STATUS_OK) { 5551bd8f1dc3Sbluhm xml_failure(parser); 5552bd8f1dc3Sbluhm } 5553bd8f1dc3Sbluhm } 5554bd8f1dc3Sbluhm } 5555bd8f1dc3Sbluhm assert_true(testdata.count == 2); // the big token should be done 5556bd8f1dc3Sbluhm 5557bd8f1dc3Sbluhm // parse the final text 5558bd8f1dc3Sbluhm status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE); 5559bd8f1dc3Sbluhm if (status != XML_STATUS_OK) { 5560bd8f1dc3Sbluhm xml_failure(parser); 5561bd8f1dc3Sbluhm } 5562bd8f1dc3Sbluhm assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done 5563bd8f1dc3Sbluhm 5564bd8f1dc3Sbluhm XML_ParserFree(parser); 5565bd8f1dc3Sbluhm return XML_STATUS_OK; 5566bd8f1dc3Sbluhm } 5567bd8f1dc3Sbluhm 5568bd8f1dc3Sbluhm START_TEST(test_reparse_deferral_is_inherited) { 5569bd8f1dc3Sbluhm const char *const text 5570bd8f1dc3Sbluhm = "<!DOCTYPE document SYSTEM 'something.ext'><document/>"; 5571bd8f1dc3Sbluhm for (int enabled = 0; enabled <= 1; ++enabled) { 5572bd8f1dc3Sbluhm set_subtest("deferral=%d", enabled); 5573bd8f1dc3Sbluhm 5574bd8f1dc3Sbluhm XML_Parser parser = XML_ParserCreate(NULL); 5575bd8f1dc3Sbluhm assert_true(parser != NULL); 5576bd8f1dc3Sbluhm XML_SetUserData(parser, (void *)&enabled); 5577bd8f1dc3Sbluhm XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5578bd8f1dc3Sbluhm // this handler creates a sub-parser and checks that its deferral behavior 5579bd8f1dc3Sbluhm // is what we expected, based on the value of `enabled` (in userdata). 5580bd8f1dc3Sbluhm XML_SetExternalEntityRefHandler(parser, external_inherited_parser); 5581bd8f1dc3Sbluhm assert_true(XML_SetReparseDeferralEnabled(parser, enabled)); 5582bd8f1dc3Sbluhm if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK) 5583bd8f1dc3Sbluhm xml_failure(parser); 5584bd8f1dc3Sbluhm 5585bd8f1dc3Sbluhm XML_ParserFree(parser); 5586bd8f1dc3Sbluhm } 5587bd8f1dc3Sbluhm } 5588bd8f1dc3Sbluhm END_TEST 5589bd8f1dc3Sbluhm 5590bd8f1dc3Sbluhm START_TEST(test_set_reparse_deferral_on_null_parser) { 5591bd8f1dc3Sbluhm assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE); 5592bd8f1dc3Sbluhm assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE); 5593bd8f1dc3Sbluhm assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE); 5594bd8f1dc3Sbluhm assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE); 5595bd8f1dc3Sbluhm assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN) 5596bd8f1dc3Sbluhm == XML_FALSE); 5597bd8f1dc3Sbluhm assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX) 5598bd8f1dc3Sbluhm == XML_FALSE); 5599bd8f1dc3Sbluhm } 5600bd8f1dc3Sbluhm END_TEST 5601bd8f1dc3Sbluhm 5602bd8f1dc3Sbluhm START_TEST(test_set_reparse_deferral_on_the_fly) { 5603bd8f1dc3Sbluhm const char *const pre = "<d><x attr='"; 5604bd8f1dc3Sbluhm const char *const end = "'></x>"; 5605bd8f1dc3Sbluhm char iiiiii[100]; 5606bd8f1dc3Sbluhm const int fillsize = (int)sizeof(iiiiii); 5607bd8f1dc3Sbluhm memset(iiiiii, 'i', fillsize); 5608bd8f1dc3Sbluhm 5609bd8f1dc3Sbluhm XML_Parser parser = XML_ParserCreate(NULL); 5610bd8f1dc3Sbluhm assert_true(parser != NULL); 5611bd8f1dc3Sbluhm assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE)); 5612bd8f1dc3Sbluhm 5613bd8f1dc3Sbluhm CharData storage; 5614bd8f1dc3Sbluhm CharData_Init(&storage); 5615bd8f1dc3Sbluhm XML_SetUserData(parser, &storage); 5616bd8f1dc3Sbluhm XML_SetStartElementHandler(parser, start_element_event_handler); 5617bd8f1dc3Sbluhm 5618bd8f1dc3Sbluhm enum XML_Status status; 5619bd8f1dc3Sbluhm // parse the start text 5620bd8f1dc3Sbluhm status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); 5621bd8f1dc3Sbluhm if (status != XML_STATUS_OK) { 5622bd8f1dc3Sbluhm xml_failure(parser); 5623bd8f1dc3Sbluhm } 5624bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done 5625bd8f1dc3Sbluhm 5626bd8f1dc3Sbluhm // try to parse some 'i', but the token isn't finished 5627bd8f1dc3Sbluhm status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE); 5628bd8f1dc3Sbluhm if (status != XML_STATUS_OK) { 5629bd8f1dc3Sbluhm xml_failure(parser); 5630bd8f1dc3Sbluhm } 5631bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one 5632bd8f1dc3Sbluhm 5633bd8f1dc3Sbluhm // end the <x> token. 5634bd8f1dc3Sbluhm status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); 5635bd8f1dc3Sbluhm if (status != XML_STATUS_OK) { 5636bd8f1dc3Sbluhm xml_failure(parser); 5637bd8f1dc3Sbluhm } 5638bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, XCS("d")); // not yet. 5639bd8f1dc3Sbluhm 5640bd8f1dc3Sbluhm // now change the heuristic setting and add *no* data 5641bd8f1dc3Sbluhm assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE)); 5642bd8f1dc3Sbluhm // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic. 5643bd8f1dc3Sbluhm status = XML_Parse(parser, "", 0, XML_FALSE); 5644bd8f1dc3Sbluhm if (status != XML_STATUS_OK) { 5645bd8f1dc3Sbluhm xml_failure(parser); 5646bd8f1dc3Sbluhm } 5647bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, XCS("dx")); 5648bd8f1dc3Sbluhm 5649bd8f1dc3Sbluhm XML_ParserFree(parser); 5650bd8f1dc3Sbluhm } 5651bd8f1dc3Sbluhm END_TEST 5652bd8f1dc3Sbluhm 5653bd8f1dc3Sbluhm START_TEST(test_set_bad_reparse_option) { 5654bd8f1dc3Sbluhm XML_Parser parser = XML_ParserCreate(NULL); 5655bd8f1dc3Sbluhm assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2)); 5656bd8f1dc3Sbluhm assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3)); 5657bd8f1dc3Sbluhm assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99)); 5658bd8f1dc3Sbluhm assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127)); 5659bd8f1dc3Sbluhm assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128)); 5660bd8f1dc3Sbluhm assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129)); 5661bd8f1dc3Sbluhm assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255)); 5662bd8f1dc3Sbluhm assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0)); 5663bd8f1dc3Sbluhm assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1)); 5664bd8f1dc3Sbluhm XML_ParserFree(parser); 5665bd8f1dc3Sbluhm } 5666bd8f1dc3Sbluhm END_TEST 5667bd8f1dc3Sbluhm 5668bd8f1dc3Sbluhm static size_t g_totalAlloc = 0; 5669bd8f1dc3Sbluhm static size_t g_biggestAlloc = 0; 5670bd8f1dc3Sbluhm 5671bd8f1dc3Sbluhm static void * 5672bd8f1dc3Sbluhm counting_realloc(void *ptr, size_t size) { 5673bd8f1dc3Sbluhm g_totalAlloc += size; 5674bd8f1dc3Sbluhm if (size > g_biggestAlloc) { 5675bd8f1dc3Sbluhm g_biggestAlloc = size; 5676bd8f1dc3Sbluhm } 5677bd8f1dc3Sbluhm return realloc(ptr, size); 5678bd8f1dc3Sbluhm } 5679bd8f1dc3Sbluhm 5680bd8f1dc3Sbluhm static void * 5681bd8f1dc3Sbluhm counting_malloc(size_t size) { 5682bd8f1dc3Sbluhm return counting_realloc(NULL, size); 5683bd8f1dc3Sbluhm } 5684bd8f1dc3Sbluhm 5685bd8f1dc3Sbluhm START_TEST(test_bypass_heuristic_when_close_to_bufsize) { 5686bd8f1dc3Sbluhm if (g_chunkSize != 0) { 5687bd8f1dc3Sbluhm // this test does not use SINGLE_BYTES, because it depends on very precise 5688bd8f1dc3Sbluhm // buffer fills. 5689bd8f1dc3Sbluhm return; 5690bd8f1dc3Sbluhm } 5691bd8f1dc3Sbluhm if (! g_reparseDeferralEnabledDefault) { 5692bd8f1dc3Sbluhm return; // this test is irrelevant when the deferral heuristic is disabled. 5693bd8f1dc3Sbluhm } 5694bd8f1dc3Sbluhm 5695bd8f1dc3Sbluhm const int document_length = 65536; 5696bd8f1dc3Sbluhm char *const document = (char *)malloc(document_length); 5697bd8f1dc3Sbluhm 5698bd8f1dc3Sbluhm const XML_Memory_Handling_Suite memfuncs = { 5699bd8f1dc3Sbluhm counting_malloc, 5700bd8f1dc3Sbluhm counting_realloc, 5701bd8f1dc3Sbluhm free, 5702bd8f1dc3Sbluhm }; 5703bd8f1dc3Sbluhm 5704bd8f1dc3Sbluhm const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1}; 5705bd8f1dc3Sbluhm const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1}; 5706bd8f1dc3Sbluhm const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1}; 5707bd8f1dc3Sbluhm 5708bd8f1dc3Sbluhm for (const int *leading = leading_list; *leading >= 0; leading++) { 5709bd8f1dc3Sbluhm for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) { 5710bd8f1dc3Sbluhm for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) { 5711bd8f1dc3Sbluhm set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken, 5712bd8f1dc3Sbluhm *fillsize); 5713bd8f1dc3Sbluhm // start by checking that the test looks reasonably valid 5714bd8f1dc3Sbluhm assert_true(*leading + *bigtoken <= document_length); 5715bd8f1dc3Sbluhm 5716bd8f1dc3Sbluhm // put 'x' everywhere; some will be overwritten by elements. 5717bd8f1dc3Sbluhm memset(document, 'x', document_length); 5718bd8f1dc3Sbluhm // maybe add an initial tag 5719bd8f1dc3Sbluhm if (*leading) { 5720bd8f1dc3Sbluhm assert_true(*leading >= 3); // or the test case is invalid 5721bd8f1dc3Sbluhm memcpy(document, "<a>", 3); 5722bd8f1dc3Sbluhm } 5723bd8f1dc3Sbluhm // add the large token 5724bd8f1dc3Sbluhm document[*leading + 0] = '<'; 5725bd8f1dc3Sbluhm document[*leading + 1] = 'b'; 5726bd8f1dc3Sbluhm memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token 5727bd8f1dc3Sbluhm document[*leading + *bigtoken - 1] = '>'; 5728bd8f1dc3Sbluhm 5729bd8f1dc3Sbluhm // 1 for 'b', plus 1 or 0 depending on the presence of 'a' 5730bd8f1dc3Sbluhm const int expected_elem_total = 1 + (*leading ? 1 : 0); 5731bd8f1dc3Sbluhm 5732bd8f1dc3Sbluhm XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL); 5733bd8f1dc3Sbluhm assert_true(parser != NULL); 5734bd8f1dc3Sbluhm 5735bd8f1dc3Sbluhm CharData storage; 5736bd8f1dc3Sbluhm CharData_Init(&storage); 5737bd8f1dc3Sbluhm XML_SetUserData(parser, &storage); 5738bd8f1dc3Sbluhm XML_SetStartElementHandler(parser, start_element_event_handler); 5739bd8f1dc3Sbluhm 5740bd8f1dc3Sbluhm g_biggestAlloc = 0; 5741bd8f1dc3Sbluhm g_totalAlloc = 0; 5742bd8f1dc3Sbluhm int offset = 0; 5743bd8f1dc3Sbluhm // fill data until the big token is covered (but not necessarily parsed) 5744bd8f1dc3Sbluhm while (offset < *leading + *bigtoken) { 5745bd8f1dc3Sbluhm assert_true(offset + *fillsize <= document_length); 5746bd8f1dc3Sbluhm const enum XML_Status status 5747bd8f1dc3Sbluhm = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); 5748bd8f1dc3Sbluhm if (status != XML_STATUS_OK) { 5749bd8f1dc3Sbluhm xml_failure(parser); 5750bd8f1dc3Sbluhm } 5751bd8f1dc3Sbluhm offset += *fillsize; 5752bd8f1dc3Sbluhm } 5753bd8f1dc3Sbluhm // Now, check that we've had a buffer allocation that could fit the 5754bd8f1dc3Sbluhm // context bytes and our big token. In order to detect a special case, 5755bd8f1dc3Sbluhm // we need to know how many bytes of our big token were included in the 5756bd8f1dc3Sbluhm // first push that contained _any_ bytes of the big token: 5757bd8f1dc3Sbluhm const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize); 5758bd8f1dc3Sbluhm if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) { 5759bd8f1dc3Sbluhm // Special case: we aren't saving any context, and the whole big token 5760bd8f1dc3Sbluhm // was covered by a single fill, so Expat may have parsed directly 5761bd8f1dc3Sbluhm // from our input pointer, without allocating an internal buffer. 5762bd8f1dc3Sbluhm } else if (*leading < XML_CONTEXT_BYTES) { 5763bd8f1dc3Sbluhm assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken); 5764bd8f1dc3Sbluhm } else { 5765bd8f1dc3Sbluhm assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken); 5766bd8f1dc3Sbluhm } 5767bd8f1dc3Sbluhm // fill data until the big token is actually parsed 5768bd8f1dc3Sbluhm while (storage.count < expected_elem_total) { 5769bd8f1dc3Sbluhm const size_t alloc_before = g_totalAlloc; 5770bd8f1dc3Sbluhm assert_true(offset + *fillsize <= document_length); 5771bd8f1dc3Sbluhm const enum XML_Status status 5772bd8f1dc3Sbluhm = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); 5773bd8f1dc3Sbluhm if (status != XML_STATUS_OK) { 5774bd8f1dc3Sbluhm xml_failure(parser); 5775bd8f1dc3Sbluhm } 5776bd8f1dc3Sbluhm offset += *fillsize; 5777bd8f1dc3Sbluhm // since all the bytes of the big token are already in the buffer, 5778bd8f1dc3Sbluhm // the bufsize ceiling should make us finish its parsing without any 5779bd8f1dc3Sbluhm // further buffer allocations. We assume that there will be no other 5780bd8f1dc3Sbluhm // large allocations in this test. 5781bd8f1dc3Sbluhm assert_true(g_totalAlloc - alloc_before < 4096); 5782bd8f1dc3Sbluhm } 5783bd8f1dc3Sbluhm // test-the-test: was our alloc even called? 5784bd8f1dc3Sbluhm assert_true(g_totalAlloc > 0); 5785bd8f1dc3Sbluhm // test-the-test: there shouldn't be any extra start elements 5786bd8f1dc3Sbluhm assert_true(storage.count == expected_elem_total); 5787bd8f1dc3Sbluhm 5788bd8f1dc3Sbluhm XML_ParserFree(parser); 5789bd8f1dc3Sbluhm } 5790bd8f1dc3Sbluhm } 5791bd8f1dc3Sbluhm } 5792bd8f1dc3Sbluhm free(document); 5793bd8f1dc3Sbluhm } 5794bd8f1dc3Sbluhm END_TEST 5795bd8f1dc3Sbluhm 5796f558d286Sbluhm #if defined(XML_TESTING) 5797bd8f1dc3Sbluhm START_TEST(test_varying_buffer_fills) { 5798bd8f1dc3Sbluhm const int KiB = 1024; 5799bd8f1dc3Sbluhm const int MiB = 1024 * KiB; 5800bd8f1dc3Sbluhm const int document_length = 16 * MiB; 5801bd8f1dc3Sbluhm const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB 5802bd8f1dc3Sbluhm 5803bd8f1dc3Sbluhm if (g_chunkSize != 0) { 5804bd8f1dc3Sbluhm return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES(). 5805bd8f1dc3Sbluhm } 5806bd8f1dc3Sbluhm 5807bd8f1dc3Sbluhm char *const document = (char *)malloc(document_length); 5808bd8f1dc3Sbluhm assert_true(document != NULL); 5809bd8f1dc3Sbluhm memset(document, 'x', document_length); 5810bd8f1dc3Sbluhm document[0] = '<'; 5811bd8f1dc3Sbluhm document[1] = 't'; 5812bd8f1dc3Sbluhm memset(&document[2], ' ', big - 2); // a very spacy token 5813bd8f1dc3Sbluhm document[big - 1] = '>'; 5814bd8f1dc3Sbluhm 5815bd8f1dc3Sbluhm // Each testcase is a list of buffer fill sizes, terminated by a value < 0. 5816bd8f1dc3Sbluhm // When reparse deferral is enabled, the final (negated) value is the expected 5817bd8f1dc3Sbluhm // maximum number of bytes scanned in parse attempts. 5818bd8f1dc3Sbluhm const int testcases[][30] = { 5819bd8f1dc3Sbluhm {8 * MiB, -8 * MiB}, 5820bd8f1dc3Sbluhm {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total 5821bd8f1dc3Sbluhm // zero-size fills shouldn't trigger the bypass 5822bd8f1dc3Sbluhm {4 * MiB, 0, 4 * MiB, -12 * MiB}, 5823bd8f1dc3Sbluhm {4 * MiB, 0, 0, 4 * MiB, -12 * MiB}, 5824bd8f1dc3Sbluhm {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB}, 5825bd8f1dc3Sbluhm // try to hit the buffer ceiling only once (at the end) 5826bd8f1dc3Sbluhm {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB}, 5827bd8f1dc3Sbluhm // try to hit the same buffer ceiling multiple times 5828bd8f1dc3Sbluhm {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB}, 5829bd8f1dc3Sbluhm 5830bd8f1dc3Sbluhm // try to hit every ceiling, by always landing 1K shy of the buffer size 5831bd8f1dc3Sbluhm {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB, 5832bd8f1dc3Sbluhm 128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB}, 5833bd8f1dc3Sbluhm 5834bd8f1dc3Sbluhm // try to avoid every ceiling, by always landing 1B past the buffer size 5835bd8f1dc3Sbluhm // the normal 2x heuristic threshold still forces parse attempts. 5836bd8f1dc3Sbluhm {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1 5837bd8f1dc3Sbluhm 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2 5838bd8f1dc3Sbluhm 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3 5839bd8f1dc3Sbluhm 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4 5840bd8f1dc3Sbluhm 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5 5841bd8f1dc3Sbluhm 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6 5842bd8f1dc3Sbluhm 2 * MiB, 4 * MiB, // will attempt 8MiB + 1 ==> total 10M + 682K + 7 5843bd8f1dc3Sbluhm -(10 * MiB + 682 * KiB + 7)}, 5844bd8f1dc3Sbluhm // try to avoid every ceiling again, except on our last fill. 5845bd8f1dc3Sbluhm {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1 5846bd8f1dc3Sbluhm 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2 5847bd8f1dc3Sbluhm 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3 5848bd8f1dc3Sbluhm 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4 5849bd8f1dc3Sbluhm 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5 5850bd8f1dc3Sbluhm 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6 5851bd8f1dc3Sbluhm 2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6 5852bd8f1dc3Sbluhm -(10 * MiB + 682 * KiB + 6)}, 5853bd8f1dc3Sbluhm 5854bd8f1dc3Sbluhm // try to hit ceilings on the way multiple times 5855bd8f1dc3Sbluhm {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer 5856bd8f1dc3Sbluhm 512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer 5857bd8f1dc3Sbluhm 1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1, // 4 MiB buffer 5858bd8f1dc3Sbluhm 2 * MiB + 1, 1 * MiB, 512 * KiB, // 8 MiB buffer 5859bd8f1dc3Sbluhm // we'll make a parse attempt at every parse call 5860bd8f1dc3Sbluhm -(45 * MiB + 12)}, 5861bd8f1dc3Sbluhm }; 5862bd8f1dc3Sbluhm const int testcount = sizeof(testcases) / sizeof(testcases[0]); 5863bd8f1dc3Sbluhm for (int test_i = 0; test_i < testcount; test_i++) { 5864bd8f1dc3Sbluhm const int *fillsize = testcases[test_i]; 5865bd8f1dc3Sbluhm set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1], 5866bd8f1dc3Sbluhm fillsize[2], fillsize[3]); 5867bd8f1dc3Sbluhm XML_Parser parser = XML_ParserCreate(NULL); 5868bd8f1dc3Sbluhm assert_true(parser != NULL); 5869bd8f1dc3Sbluhm 5870bd8f1dc3Sbluhm CharData storage; 5871bd8f1dc3Sbluhm CharData_Init(&storage); 5872bd8f1dc3Sbluhm XML_SetUserData(parser, &storage); 5873bd8f1dc3Sbluhm XML_SetStartElementHandler(parser, start_element_event_handler); 5874bd8f1dc3Sbluhm 5875c033f770Sbluhm g_bytesScanned = 0; 5876bd8f1dc3Sbluhm int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call) 5877bd8f1dc3Sbluhm int offset = 0; 5878bd8f1dc3Sbluhm while (*fillsize >= 0) { 5879bd8f1dc3Sbluhm assert_true(offset + *fillsize <= document_length); // or test is invalid 5880bd8f1dc3Sbluhm const enum XML_Status status 5881bd8f1dc3Sbluhm = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); 5882bd8f1dc3Sbluhm if (status != XML_STATUS_OK) { 5883bd8f1dc3Sbluhm xml_failure(parser); 5884bd8f1dc3Sbluhm } 5885bd8f1dc3Sbluhm offset += *fillsize; 5886bd8f1dc3Sbluhm fillsize++; 5887bd8f1dc3Sbluhm assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow 5888bd8f1dc3Sbluhm worstcase_bytes += offset; // we might've tried to parse all pending bytes 5889bd8f1dc3Sbluhm } 5890bd8f1dc3Sbluhm assert_true(storage.count == 1); // the big token should've been parsed 5891c033f770Sbluhm assert_true(g_bytesScanned > 0); // test-the-test: does our counter work? 5892bd8f1dc3Sbluhm if (g_reparseDeferralEnabledDefault) { 5893bd8f1dc3Sbluhm // heuristic is enabled; some XML_Parse calls may have deferred reparsing 5894c033f770Sbluhm const unsigned max_bytes_scanned = -*fillsize; 5895c033f770Sbluhm if (g_bytesScanned > max_bytes_scanned) { 5896bd8f1dc3Sbluhm fprintf(stderr, 5897c033f770Sbluhm "bytes scanned in parse attempts: actual=%u limit=%u \n", 5898c033f770Sbluhm g_bytesScanned, max_bytes_scanned); 5899bd8f1dc3Sbluhm fail("too many bytes scanned in parse attempts"); 5900bd8f1dc3Sbluhm } 5901bd8f1dc3Sbluhm } 5902c033f770Sbluhm assert_true(g_bytesScanned <= (unsigned)worstcase_bytes); 5903bd8f1dc3Sbluhm 5904bd8f1dc3Sbluhm XML_ParserFree(parser); 5905bd8f1dc3Sbluhm } 5906bd8f1dc3Sbluhm free(document); 5907bd8f1dc3Sbluhm } 5908bd8f1dc3Sbluhm END_TEST 5909f558d286Sbluhm #endif 5910bd8f1dc3Sbluhm 5911bd8f1dc3Sbluhm void 5912bd8f1dc3Sbluhm make_basic_test_case(Suite *s) { 5913bd8f1dc3Sbluhm TCase *tc_basic = tcase_create("basic tests"); 5914bd8f1dc3Sbluhm 5915bd8f1dc3Sbluhm suite_add_tcase(s, tc_basic); 5916bd8f1dc3Sbluhm tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown); 5917bd8f1dc3Sbluhm 5918bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_nul_byte); 5919bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_u0000_char); 5920bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_siphash_self); 5921bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_siphash_spec); 5922bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_bom_utf8); 5923bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_bom_utf16_be); 5924bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_bom_utf16_le); 5925bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_nobom_utf16_le); 5926bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_hash_collision); 5927bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_illegal_utf8); 5928bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_utf8_auto_align); 5929bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_utf16); 5930bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_utf16_le_epilog_newline); 5931bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_not_utf16); 5932bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_bad_encoding); 5933bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_latin1_umlauts); 5934bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_long_utf8_character); 5935bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_long_latin1_attribute); 5936bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_long_ascii_attribute); 5937bd8f1dc3Sbluhm /* Regression test for SF bug #491986. */ 5938bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_danish_latin1); 5939bd8f1dc3Sbluhm /* Regression test for SF bug #514281. */ 5940bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_french_charref_hexidecimal); 5941bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_french_charref_decimal); 5942bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_french_latin1); 5943bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_french_utf8); 5944bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_utf8_false_rejection); 5945bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_line_number_after_parse); 5946bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_column_number_after_parse); 5947bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers); 5948bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_line_number_after_error); 5949bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_column_number_after_error); 5950bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_really_long_lines); 5951bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_really_long_encoded_lines); 5952bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_end_element_events); 5953bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_helper_is_whitespace_normalized); 5954bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_attr_whitespace_normalization); 5955bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_xmldecl_misplaced); 5956bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_xmldecl_invalid); 5957bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_xmldecl_missing_attr); 5958bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_xmldecl_missing_value); 5959bd8f1dc3Sbluhm tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity); 5960bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity); 5961bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding); 5962bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler); 5963bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom); 5964bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding); 5965bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2); 5966bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset); 5967bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset); 5968bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone); 5969bd8f1dc3Sbluhm tcase_add_test(tc_basic, 5970bd8f1dc3Sbluhm test_wfc_undeclared_entity_with_external_subset_standalone); 5971bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone); 5972bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset); 5973bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_not_standalone_handler_reject); 5974bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_not_standalone_handler_accept); 5975bd8f1dc3Sbluhm tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs); 5976bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse); 5977bd8f1dc3Sbluhm tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling); 5978bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_dtd_attr_handling); 5979bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_empty_ns_without_namespaces); 5980bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces); 5981bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls); 5982bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls); 5983bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls); 5984bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_good_cdata_ascii); 5985bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_good_cdata_utf16); 5986bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_good_cdata_utf16_le); 5987bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_long_cdata_utf16); 5988bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_multichar_cdata_utf16); 5989bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair); 5990bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_bad_cdata); 5991bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_bad_cdata_utf16); 5992bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls); 5993bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls); 5994bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_memory_allocation); 5995bd8f1dc3Sbluhm tcase_add_test__if_xml_ge(tc_basic, test_default_current); 5996bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_dtd_elements); 5997bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_dtd_elements_nesting); 5998bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd); 5999bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone); 6000bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd); 6001bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype); 6002bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, 6003bd8f1dc3Sbluhm test_foreign_dtd_without_external_subset); 6004bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd); 6005bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_set_base); 6006bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_attributes); 6007bd8f1dc3Sbluhm tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity); 6008bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_resume_invalid_parse); 6009bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_resume_resuspended); 6010bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_cdata_default); 6011bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_subordinate_reset); 6012bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_subordinate_suspend); 6013bd8f1dc3Sbluhm tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend); 6014bd8f1dc3Sbluhm tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort); 6015bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, 6016bd8f1dc3Sbluhm test_ext_entity_invalid_suspended_parse); 6017bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_explicit_encoding); 6018bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_trailing_cr); 6019bd8f1dc3Sbluhm tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr); 6020bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_trailing_rsqb); 6021bd8f1dc3Sbluhm tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb); 6022bd8f1dc3Sbluhm tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata); 6023bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters); 6024bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter); 6025bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_empty_parse); 602661ad8a07Sbluhm tcase_add_test(tc_basic, test_negative_len_parse); 602761ad8a07Sbluhm tcase_add_test(tc_basic, test_negative_len_parse_buffer); 6028bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_get_buffer_1); 6029bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_get_buffer_2); 6030bd8f1dc3Sbluhm #if XML_CONTEXT_BYTES > 0 6031bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_get_buffer_3_overflow); 6032bd8f1dc3Sbluhm #endif 6033bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_buffer_can_grow_to_max); 6034bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len); 6035bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_byte_info_at_end); 6036bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_byte_info_at_error); 6037bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_byte_info_at_cdata); 6038bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_predefined_entities); 6039bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd); 6040bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_not_predefined_entities); 6041bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section); 6042bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16); 6043bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be); 6044bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section); 6045bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed); 6046bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values); 6047bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone); 6048bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort); 6049bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_bad_public_doctype); 6050bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_attribute_enum_value); 6051bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_predefined_entity_redefinition); 6052bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing); 6053bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_public_notation_no_sysid); 6054bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_nested_groups); 6055bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_group_choice); 6056bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_standalone_parameter_entity); 6057bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity); 6058bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, 6059bd8f1dc3Sbluhm test_recursive_external_parameter_entity); 6060c033f770Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, 6061c033f770Sbluhm test_recursive_external_parameter_entity_2); 6062bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd); 6063bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_suspend_xdecl); 6064bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_abort_epilog); 6065bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_abort_epilog_2); 6066bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_suspend_epilog); 6067bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag); 6068bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_unfinished_epilog); 6069bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_partial_char_in_epilog); 6070bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity); 6071bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, 6072bd8f1dc3Sbluhm test_suspend_resume_internal_entity_issue_629); 6073bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error); 6074bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity); 6075bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_restart_on_error); 6076bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_reject_lt_in_attribute_value); 6077bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value); 6078bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_trailing_cr_in_att_value); 6079bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_standalone_internal_entity); 6080bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_skipped_external_entity); 6081bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity); 6082bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity); 6083bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr); 6084bd8f1dc3Sbluhm tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity); 6085bd8f1dc3Sbluhm tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2); 6086bd8f1dc3Sbluhm tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3); 6087bd8f1dc3Sbluhm tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4); 6088bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_pi_handled_in_default); 6089bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_comment_handled_in_default); 6090bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_pi_yml); 6091bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_pi_xnl); 6092bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_pi_xmm); 6093bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_utf16_pi); 6094bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_utf16_be_pi); 6095bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_utf16_be_comment); 6096bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_utf16_le_comment); 6097bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_missing_encoding_conversion_fn); 6098bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_failing_encoding_conversion_fn); 6099bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_unknown_encoding_success); 6100bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_unknown_encoding_bad_name); 6101bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2); 6102bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_unknown_encoding_long_name_1); 6103bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_unknown_encoding_long_name_2); 6104bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_invalid_unknown_encoding); 6105bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok); 6106bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail); 6107bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_unknown_encoding_invalid_length); 6108bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit); 6109bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate); 6110bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_unknown_encoding_invalid_high); 6111bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value); 6112bd8f1dc3Sbluhm tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom); 6113bd8f1dc3Sbluhm tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom); 6114bd8f1dc3Sbluhm tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2); 6115bd8f1dc3Sbluhm tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2); 6116bd8f1dc3Sbluhm tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be); 6117bd8f1dc3Sbluhm tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le); 6118bd8f1dc3Sbluhm tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown); 6119bd8f1dc3Sbluhm tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom); 6120bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_utf8_in_cdata_section); 6121bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_utf8_in_cdata_section_2); 6122bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_utf8_in_start_tags); 6123bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_trailing_spaces_in_elements); 6124bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_utf16_attribute); 6125bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_utf16_second_attr); 6126bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_attr_after_solidus); 6127bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe); 6128bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_bad_attr_desc_keyword); 6129bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16); 6130bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_bad_doctype); 6131bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_bad_doctype_utf8); 6132bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_bad_doctype_utf16); 6133bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_bad_doctype_plus); 6134bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_bad_doctype_star); 6135bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_bad_doctype_query); 6136bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore); 6137bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_entity_in_utf16_be_attr); 6138bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_entity_in_utf16_le_attr); 6139bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be); 6140bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le); 6141bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_short_doctype); 6142bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_short_doctype_2); 6143bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_short_doctype_3); 6144bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_long_doctype); 6145bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_bad_entity); 6146bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_bad_entity_2); 6147bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_bad_entity_3); 6148bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_bad_entity_4); 6149bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_bad_notation); 6150bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_default_doctype_handler); 6151bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_empty_element_abort); 6152bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(tc_basic, 6153bd8f1dc3Sbluhm test_pool_integrity_with_unfinished_attr); 6154bd8f1dc3Sbluhm tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend); 6155f558d286Sbluhm #if defined(XML_TESTING) 6156c033f770Sbluhm tcase_add_test(tc_basic, test_big_tokens_scale_linearly); 6157f558d286Sbluhm #endif 6158bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_set_reparse_deferral); 6159bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_reparse_deferral_is_inherited); 6160bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser); 6161bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly); 6162bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_set_bad_reparse_option); 6163bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize); 6164f558d286Sbluhm #if defined(XML_TESTING) 6165bd8f1dc3Sbluhm tcase_add_test(tc_basic, test_varying_buffer_fills); 6166f558d286Sbluhm #endif 6167bd8f1dc3Sbluhm } 6168