xref: /openbsd-src/lib/libexpat/tests/basic_tests.c (revision aa071e6ed2e21e8e72a6aac46533908f2defbdef)
1bd8f1dc3Sbluhm /* Tests in the "basic" test case for the Expat test suite
2bd8f1dc3Sbluhm                             __  __            _
3bd8f1dc3Sbluhm                          ___\ \/ /_ __   __ _| |_
4bd8f1dc3Sbluhm                         / _ \\  /| '_ \ / _` | __|
5bd8f1dc3Sbluhm                        |  __//  \| |_) | (_| | |_
6bd8f1dc3Sbluhm                         \___/_/\_\ .__/ \__,_|\__|
7bd8f1dc3Sbluhm                                  |_| XML parser
8bd8f1dc3Sbluhm 
9bd8f1dc3Sbluhm    Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10bd8f1dc3Sbluhm    Copyright (c) 2003      Greg Stein <gstein@users.sourceforge.net>
11bd8f1dc3Sbluhm    Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12bd8f1dc3Sbluhm    Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13bd8f1dc3Sbluhm    Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
14bd8f1dc3Sbluhm    Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15bd8f1dc3Sbluhm    Copyright (c) 2017      Joe Orton <jorton@redhat.com>
16bd8f1dc3Sbluhm    Copyright (c) 2017      José Gutiérrez de la Concha <jose@zeroc.com>
17bd8f1dc3Sbluhm    Copyright (c) 2018      Marco Maggi <marco.maggi-ipsu@poste.it>
18bd8f1dc3Sbluhm    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
19bd8f1dc3Sbluhm    Copyright (c) 2020      Tim Gates <tim.gates@iress.com>
20bd8f1dc3Sbluhm    Copyright (c) 2021      Donghee Na <donghee.na@python.org>
21bd8f1dc3Sbluhm    Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
22bd8f1dc3Sbluhm    Licensed under the MIT license:
23bd8f1dc3Sbluhm 
24bd8f1dc3Sbluhm    Permission is  hereby granted,  free of charge,  to any  person obtaining
25bd8f1dc3Sbluhm    a  copy  of  this  software   and  associated  documentation  files  (the
26bd8f1dc3Sbluhm    "Software"),  to  deal in  the  Software  without restriction,  including
27bd8f1dc3Sbluhm    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
28bd8f1dc3Sbluhm    distribute, sublicense, and/or sell copies of the Software, and to permit
29bd8f1dc3Sbluhm    persons  to whom  the Software  is  furnished to  do so,  subject to  the
30bd8f1dc3Sbluhm    following conditions:
31bd8f1dc3Sbluhm 
32bd8f1dc3Sbluhm    The above copyright  notice and this permission notice  shall be included
33bd8f1dc3Sbluhm    in all copies or substantial portions of the Software.
34bd8f1dc3Sbluhm 
35bd8f1dc3Sbluhm    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
36bd8f1dc3Sbluhm    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
37bd8f1dc3Sbluhm    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
38bd8f1dc3Sbluhm    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
39bd8f1dc3Sbluhm    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
40bd8f1dc3Sbluhm    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
41bd8f1dc3Sbluhm    USE OR OTHER DEALINGS IN THE SOFTWARE.
42bd8f1dc3Sbluhm */
43bd8f1dc3Sbluhm 
44bd8f1dc3Sbluhm #if defined(NDEBUG)
45bd8f1dc3Sbluhm #  undef NDEBUG /* because test suite relies on assert(...) at the moment */
46bd8f1dc3Sbluhm #endif
47bd8f1dc3Sbluhm 
48bd8f1dc3Sbluhm #include <assert.h>
49bd8f1dc3Sbluhm 
50bd8f1dc3Sbluhm #include <stdio.h>
51bd8f1dc3Sbluhm #include <string.h>
52bd8f1dc3Sbluhm #include <time.h>
53bd8f1dc3Sbluhm 
54bd8f1dc3Sbluhm #if ! defined(__cplusplus)
55bd8f1dc3Sbluhm #  include <stdbool.h>
56bd8f1dc3Sbluhm #endif
57bd8f1dc3Sbluhm 
58bd8f1dc3Sbluhm #include "expat_config.h"
59bd8f1dc3Sbluhm 
60bd8f1dc3Sbluhm #include "expat.h"
61bd8f1dc3Sbluhm #include "internal.h"
62bd8f1dc3Sbluhm #include "minicheck.h"
63bd8f1dc3Sbluhm #include "structdata.h"
64bd8f1dc3Sbluhm #include "common.h"
65bd8f1dc3Sbluhm #include "dummy.h"
66bd8f1dc3Sbluhm #include "handlers.h"
67bd8f1dc3Sbluhm #include "siphash.h"
68bd8f1dc3Sbluhm #include "basic_tests.h"
69bd8f1dc3Sbluhm 
70bd8f1dc3Sbluhm static void
71bd8f1dc3Sbluhm basic_setup(void) {
72bd8f1dc3Sbluhm   g_parser = XML_ParserCreate(NULL);
73bd8f1dc3Sbluhm   if (g_parser == NULL)
74bd8f1dc3Sbluhm     fail("Parser not created.");
75bd8f1dc3Sbluhm }
76bd8f1dc3Sbluhm 
77bd8f1dc3Sbluhm /*
78bd8f1dc3Sbluhm  * Character & encoding tests.
79bd8f1dc3Sbluhm  */
80bd8f1dc3Sbluhm 
81bd8f1dc3Sbluhm START_TEST(test_nul_byte) {
82bd8f1dc3Sbluhm   char text[] = "<doc>\0</doc>";
83bd8f1dc3Sbluhm 
84bd8f1dc3Sbluhm   /* test that a NUL byte (in US-ASCII data) is an error */
85bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
86bd8f1dc3Sbluhm       == XML_STATUS_OK)
87bd8f1dc3Sbluhm     fail("Parser did not report error on NUL-byte.");
88bd8f1dc3Sbluhm   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
89bd8f1dc3Sbluhm     xml_failure(g_parser);
90bd8f1dc3Sbluhm }
91bd8f1dc3Sbluhm END_TEST
92bd8f1dc3Sbluhm 
93bd8f1dc3Sbluhm START_TEST(test_u0000_char) {
94bd8f1dc3Sbluhm   /* test that a NUL byte (in US-ASCII data) is an error */
95bd8f1dc3Sbluhm   expect_failure("<doc>&#0;</doc>", XML_ERROR_BAD_CHAR_REF,
96bd8f1dc3Sbluhm                  "Parser did not report error on NUL-byte.");
97bd8f1dc3Sbluhm }
98bd8f1dc3Sbluhm END_TEST
99bd8f1dc3Sbluhm 
100bd8f1dc3Sbluhm START_TEST(test_siphash_self) {
101bd8f1dc3Sbluhm   if (! sip24_valid())
102bd8f1dc3Sbluhm     fail("SipHash self-test failed");
103bd8f1dc3Sbluhm }
104bd8f1dc3Sbluhm END_TEST
105bd8f1dc3Sbluhm 
106bd8f1dc3Sbluhm START_TEST(test_siphash_spec) {
107bd8f1dc3Sbluhm   /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */
108bd8f1dc3Sbluhm   const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
109bd8f1dc3Sbluhm                          "\x0a\x0b\x0c\x0d\x0e";
110bd8f1dc3Sbluhm   const size_t len = sizeof(message) - 1;
111bd8f1dc3Sbluhm   const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U);
112bd8f1dc3Sbluhm   struct siphash state;
113bd8f1dc3Sbluhm   struct sipkey key;
114bd8f1dc3Sbluhm 
115bd8f1dc3Sbluhm   sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
116bd8f1dc3Sbluhm                   "\x0a\x0b\x0c\x0d\x0e\x0f");
117bd8f1dc3Sbluhm   sip24_init(&state, &key);
118bd8f1dc3Sbluhm 
119bd8f1dc3Sbluhm   /* Cover spread across calls */
120bd8f1dc3Sbluhm   sip24_update(&state, message, 4);
121bd8f1dc3Sbluhm   sip24_update(&state, message + 4, len - 4);
122bd8f1dc3Sbluhm 
123bd8f1dc3Sbluhm   /* Cover null length */
124bd8f1dc3Sbluhm   sip24_update(&state, message, 0);
125bd8f1dc3Sbluhm 
126bd8f1dc3Sbluhm   if (sip24_final(&state) != expected)
127bd8f1dc3Sbluhm     fail("sip24_final failed spec test\n");
128bd8f1dc3Sbluhm 
129bd8f1dc3Sbluhm   /* Cover wrapper */
130bd8f1dc3Sbluhm   if (siphash24(message, len, &key) != expected)
131bd8f1dc3Sbluhm     fail("siphash24 failed spec test\n");
132bd8f1dc3Sbluhm }
133bd8f1dc3Sbluhm END_TEST
134bd8f1dc3Sbluhm 
135bd8f1dc3Sbluhm START_TEST(test_bom_utf8) {
136bd8f1dc3Sbluhm   /* This test is really just making sure we don't core on a UTF-8 BOM. */
137bd8f1dc3Sbluhm   const char *text = "\357\273\277<e/>";
138bd8f1dc3Sbluhm 
139bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
140bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
141bd8f1dc3Sbluhm     xml_failure(g_parser);
142bd8f1dc3Sbluhm }
143bd8f1dc3Sbluhm END_TEST
144bd8f1dc3Sbluhm 
145bd8f1dc3Sbluhm START_TEST(test_bom_utf16_be) {
146bd8f1dc3Sbluhm   char text[] = "\376\377\0<\0e\0/\0>";
147bd8f1dc3Sbluhm 
148bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
149bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
150bd8f1dc3Sbluhm     xml_failure(g_parser);
151bd8f1dc3Sbluhm }
152bd8f1dc3Sbluhm END_TEST
153bd8f1dc3Sbluhm 
154bd8f1dc3Sbluhm START_TEST(test_bom_utf16_le) {
155bd8f1dc3Sbluhm   char text[] = "\377\376<\0e\0/\0>\0";
156bd8f1dc3Sbluhm 
157bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
158bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
159bd8f1dc3Sbluhm     xml_failure(g_parser);
160bd8f1dc3Sbluhm }
161bd8f1dc3Sbluhm END_TEST
162bd8f1dc3Sbluhm 
163bd8f1dc3Sbluhm START_TEST(test_nobom_utf16_le) {
164bd8f1dc3Sbluhm   char text[] = " \0<\0e\0/\0>\0";
165bd8f1dc3Sbluhm 
166bd8f1dc3Sbluhm   if (g_chunkSize == 1) {
167bd8f1dc3Sbluhm     // TODO: with just the first byte, we can't tell the difference between
168bd8f1dc3Sbluhm     // UTF-16-LE and UTF-8. Avoid the failure for now.
169bd8f1dc3Sbluhm     return;
170bd8f1dc3Sbluhm   }
171bd8f1dc3Sbluhm 
172bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
173bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
174bd8f1dc3Sbluhm     xml_failure(g_parser);
175bd8f1dc3Sbluhm }
176bd8f1dc3Sbluhm END_TEST
177bd8f1dc3Sbluhm 
178bd8f1dc3Sbluhm START_TEST(test_hash_collision) {
179bd8f1dc3Sbluhm   /* For full coverage of the lookup routine, we need to ensure a
180bd8f1dc3Sbluhm    * hash collision even though we can only tell that we have one
181bd8f1dc3Sbluhm    * through breakpoint debugging or coverage statistics.  The
182bd8f1dc3Sbluhm    * following will cause a hash collision on machines with a 64-bit
183bd8f1dc3Sbluhm    * long type; others will have to experiment.  The full coverage
184bd8f1dc3Sbluhm    * tests invoked from qa.sh usually provide a hash collision, but
185bd8f1dc3Sbluhm    * not always.  This is an attempt to provide insurance.
186bd8f1dc3Sbluhm    */
187bd8f1dc3Sbluhm #define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U)
188bd8f1dc3Sbluhm   const char *text
189bd8f1dc3Sbluhm       = "<doc>\n"
190bd8f1dc3Sbluhm         "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n"
191bd8f1dc3Sbluhm         "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n"
192bd8f1dc3Sbluhm         "<b5></b5><b6></b6><b7></b7><b8></b8>\n"
193bd8f1dc3Sbluhm         "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n"
194bd8f1dc3Sbluhm         "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n"
195bd8f1dc3Sbluhm         "<d8>This triggers the table growth and collides with b2</d8>\n"
196bd8f1dc3Sbluhm         "</doc>\n";
197bd8f1dc3Sbluhm 
198bd8f1dc3Sbluhm   XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT);
199bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
200bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
201bd8f1dc3Sbluhm     xml_failure(g_parser);
202bd8f1dc3Sbluhm }
203bd8f1dc3Sbluhm END_TEST
204bd8f1dc3Sbluhm #undef COLLIDING_HASH_SALT
205bd8f1dc3Sbluhm 
206bd8f1dc3Sbluhm /* Regression test for SF bug #491986. */
207bd8f1dc3Sbluhm START_TEST(test_danish_latin1) {
208bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
209bd8f1dc3Sbluhm                      "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
210bd8f1dc3Sbluhm #ifdef XML_UNICODE
211bd8f1dc3Sbluhm   const XML_Char *expected
212bd8f1dc3Sbluhm       = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5");
213bd8f1dc3Sbluhm #else
214bd8f1dc3Sbluhm   const XML_Char *expected
215bd8f1dc3Sbluhm       = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
216bd8f1dc3Sbluhm #endif
217bd8f1dc3Sbluhm   run_character_check(text, expected);
218bd8f1dc3Sbluhm }
219bd8f1dc3Sbluhm END_TEST
220bd8f1dc3Sbluhm 
221bd8f1dc3Sbluhm /* Regression test for SF bug #514281. */
222bd8f1dc3Sbluhm START_TEST(test_french_charref_hexidecimal) {
223bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
224bd8f1dc3Sbluhm                      "<doc>&#xE9;&#xE8;&#xE0;&#xE7;&#xEA;&#xC8;</doc>";
225bd8f1dc3Sbluhm #ifdef XML_UNICODE
226bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
227bd8f1dc3Sbluhm #else
228bd8f1dc3Sbluhm   const XML_Char *expected
229bd8f1dc3Sbluhm       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
230bd8f1dc3Sbluhm #endif
231bd8f1dc3Sbluhm   run_character_check(text, expected);
232bd8f1dc3Sbluhm }
233bd8f1dc3Sbluhm END_TEST
234bd8f1dc3Sbluhm 
235bd8f1dc3Sbluhm START_TEST(test_french_charref_decimal) {
236bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
237bd8f1dc3Sbluhm                      "<doc>&#233;&#232;&#224;&#231;&#234;&#200;</doc>";
238bd8f1dc3Sbluhm #ifdef XML_UNICODE
239bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
240bd8f1dc3Sbluhm #else
241bd8f1dc3Sbluhm   const XML_Char *expected
242bd8f1dc3Sbluhm       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
243bd8f1dc3Sbluhm #endif
244bd8f1dc3Sbluhm   run_character_check(text, expected);
245bd8f1dc3Sbluhm }
246bd8f1dc3Sbluhm END_TEST
247bd8f1dc3Sbluhm 
248bd8f1dc3Sbluhm START_TEST(test_french_latin1) {
249bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
250bd8f1dc3Sbluhm                      "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
251bd8f1dc3Sbluhm #ifdef XML_UNICODE
252bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
253bd8f1dc3Sbluhm #else
254bd8f1dc3Sbluhm   const XML_Char *expected
255bd8f1dc3Sbluhm       = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
256bd8f1dc3Sbluhm #endif
257bd8f1dc3Sbluhm   run_character_check(text, expected);
258bd8f1dc3Sbluhm }
259bd8f1dc3Sbluhm END_TEST
260bd8f1dc3Sbluhm 
261bd8f1dc3Sbluhm START_TEST(test_french_utf8) {
262bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
263bd8f1dc3Sbluhm                      "<doc>\xC3\xA9</doc>";
264bd8f1dc3Sbluhm #ifdef XML_UNICODE
265bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\x00e9");
266bd8f1dc3Sbluhm #else
267bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\xC3\xA9");
268bd8f1dc3Sbluhm #endif
269bd8f1dc3Sbluhm   run_character_check(text, expected);
270bd8f1dc3Sbluhm }
271bd8f1dc3Sbluhm END_TEST
272bd8f1dc3Sbluhm 
273bd8f1dc3Sbluhm /* Regression test for SF bug #600479.
274bd8f1dc3Sbluhm    XXX There should be a test that exercises all legal XML Unicode
275bd8f1dc3Sbluhm    characters as PCDATA and attribute value content, and XML Name
276bd8f1dc3Sbluhm    characters as part of element and attribute names.
277bd8f1dc3Sbluhm */
278bd8f1dc3Sbluhm START_TEST(test_utf8_false_rejection) {
279bd8f1dc3Sbluhm   const char *text = "<doc>\xEF\xBA\xBF</doc>";
280bd8f1dc3Sbluhm #ifdef XML_UNICODE
281bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\xfebf");
282bd8f1dc3Sbluhm #else
283bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\xEF\xBA\xBF");
284bd8f1dc3Sbluhm #endif
285bd8f1dc3Sbluhm   run_character_check(text, expected);
286bd8f1dc3Sbluhm }
287bd8f1dc3Sbluhm END_TEST
288bd8f1dc3Sbluhm 
289bd8f1dc3Sbluhm /* Regression test for SF bug #477667.
290bd8f1dc3Sbluhm    This test assures that any 8-bit character followed by a 7-bit
291bd8f1dc3Sbluhm    character will not be mistakenly interpreted as a valid UTF-8
292bd8f1dc3Sbluhm    sequence.
293bd8f1dc3Sbluhm */
294bd8f1dc3Sbluhm START_TEST(test_illegal_utf8) {
295bd8f1dc3Sbluhm   char text[100];
296bd8f1dc3Sbluhm   int i;
297bd8f1dc3Sbluhm 
298bd8f1dc3Sbluhm   for (i = 128; i <= 255; ++i) {
299bd8f1dc3Sbluhm     snprintf(text, sizeof(text), "<e>%ccd</e>", i);
300bd8f1dc3Sbluhm     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
301bd8f1dc3Sbluhm         == XML_STATUS_OK) {
302bd8f1dc3Sbluhm       snprintf(text, sizeof(text),
303bd8f1dc3Sbluhm                "expected token error for '%c' (ordinal %d) in UTF-8 text", i,
304bd8f1dc3Sbluhm                i);
305bd8f1dc3Sbluhm       fail(text);
306bd8f1dc3Sbluhm     } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
307bd8f1dc3Sbluhm       xml_failure(g_parser);
308bd8f1dc3Sbluhm     /* Reset the parser since we use the same parser repeatedly. */
309bd8f1dc3Sbluhm     XML_ParserReset(g_parser, NULL);
310bd8f1dc3Sbluhm   }
311bd8f1dc3Sbluhm }
312bd8f1dc3Sbluhm END_TEST
313bd8f1dc3Sbluhm 
314bd8f1dc3Sbluhm /* Examples, not masks: */
315bd8f1dc3Sbluhm #define UTF8_LEAD_1 "\x7f" /* 0b01111111 */
316bd8f1dc3Sbluhm #define UTF8_LEAD_2 "\xdf" /* 0b11011111 */
317bd8f1dc3Sbluhm #define UTF8_LEAD_3 "\xef" /* 0b11101111 */
318bd8f1dc3Sbluhm #define UTF8_LEAD_4 "\xf7" /* 0b11110111 */
319bd8f1dc3Sbluhm #define UTF8_FOLLOW "\xbf" /* 0b10111111 */
320bd8f1dc3Sbluhm 
321bd8f1dc3Sbluhm START_TEST(test_utf8_auto_align) {
322bd8f1dc3Sbluhm   struct TestCase {
323bd8f1dc3Sbluhm     ptrdiff_t expectedMovementInChars;
324bd8f1dc3Sbluhm     const char *input;
325bd8f1dc3Sbluhm   };
326bd8f1dc3Sbluhm 
327bd8f1dc3Sbluhm   struct TestCase cases[] = {
328bd8f1dc3Sbluhm       {00, ""},
329bd8f1dc3Sbluhm 
330bd8f1dc3Sbluhm       {00, UTF8_LEAD_1},
331bd8f1dc3Sbluhm 
332bd8f1dc3Sbluhm       {-1, UTF8_LEAD_2},
333bd8f1dc3Sbluhm       {00, UTF8_LEAD_2 UTF8_FOLLOW},
334bd8f1dc3Sbluhm 
335bd8f1dc3Sbluhm       {-1, UTF8_LEAD_3},
336bd8f1dc3Sbluhm       {-2, UTF8_LEAD_3 UTF8_FOLLOW},
337bd8f1dc3Sbluhm       {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
338bd8f1dc3Sbluhm 
339bd8f1dc3Sbluhm       {-1, UTF8_LEAD_4},
340bd8f1dc3Sbluhm       {-2, UTF8_LEAD_4 UTF8_FOLLOW},
341bd8f1dc3Sbluhm       {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
342bd8f1dc3Sbluhm       {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
343bd8f1dc3Sbluhm   };
344bd8f1dc3Sbluhm 
345bd8f1dc3Sbluhm   size_t i = 0;
346bd8f1dc3Sbluhm   bool success = true;
347bd8f1dc3Sbluhm   for (; i < sizeof(cases) / sizeof(*cases); i++) {
348bd8f1dc3Sbluhm     const char *fromLim = cases[i].input + strlen(cases[i].input);
349bd8f1dc3Sbluhm     const char *const fromLimInitially = fromLim;
350bd8f1dc3Sbluhm     ptrdiff_t actualMovementInChars;
351bd8f1dc3Sbluhm 
352bd8f1dc3Sbluhm     _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim);
353bd8f1dc3Sbluhm 
354bd8f1dc3Sbluhm     actualMovementInChars = (fromLim - fromLimInitially);
355bd8f1dc3Sbluhm     if (actualMovementInChars != cases[i].expectedMovementInChars) {
356bd8f1dc3Sbluhm       size_t j = 0;
357bd8f1dc3Sbluhm       success = false;
358bd8f1dc3Sbluhm       printf("[-] UTF-8 case %2u: Expected movement by %2d chars"
359bd8f1dc3Sbluhm              ", actually moved by %2d chars: \"",
360bd8f1dc3Sbluhm              (unsigned)(i + 1), (int)cases[i].expectedMovementInChars,
361bd8f1dc3Sbluhm              (int)actualMovementInChars);
362bd8f1dc3Sbluhm       for (; j < strlen(cases[i].input); j++) {
363bd8f1dc3Sbluhm         printf("\\x%02x", (unsigned char)cases[i].input[j]);
364bd8f1dc3Sbluhm       }
365bd8f1dc3Sbluhm       printf("\"\n");
366bd8f1dc3Sbluhm     }
367bd8f1dc3Sbluhm   }
368bd8f1dc3Sbluhm 
369bd8f1dc3Sbluhm   if (! success) {
370bd8f1dc3Sbluhm     fail("UTF-8 auto-alignment is not bullet-proof\n");
371bd8f1dc3Sbluhm   }
372bd8f1dc3Sbluhm }
373bd8f1dc3Sbluhm END_TEST
374bd8f1dc3Sbluhm 
375bd8f1dc3Sbluhm START_TEST(test_utf16) {
376bd8f1dc3Sbluhm   /* <?xml version="1.0" encoding="UTF-16"?>
377bd8f1dc3Sbluhm    *  <doc a='123'>some {A} text</doc>
378bd8f1dc3Sbluhm    *
379bd8f1dc3Sbluhm    * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A
380bd8f1dc3Sbluhm    */
381bd8f1dc3Sbluhm   char text[]
382bd8f1dc3Sbluhm       = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o"
383bd8f1dc3Sbluhm         "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o"
384bd8f1dc3Sbluhm         "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066"
385bd8f1dc3Sbluhm         "\000'\000?\000>\000\n"
386bd8f1dc3Sbluhm         "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>"
387bd8f1dc3Sbluhm         "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000"
388bd8f1dc3Sbluhm         "<\000/\000d\000o\000c\000>";
389bd8f1dc3Sbluhm #ifdef XML_UNICODE
390bd8f1dc3Sbluhm   const XML_Char *expected = XCS("some \xff21 text");
391bd8f1dc3Sbluhm #else
392bd8f1dc3Sbluhm   const XML_Char *expected = XCS("some \357\274\241 text");
393bd8f1dc3Sbluhm #endif
394bd8f1dc3Sbluhm   CharData storage;
395bd8f1dc3Sbluhm 
396bd8f1dc3Sbluhm   CharData_Init(&storage);
397bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
398bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
399bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
400bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
401bd8f1dc3Sbluhm     xml_failure(g_parser);
402bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
403bd8f1dc3Sbluhm }
404bd8f1dc3Sbluhm END_TEST
405bd8f1dc3Sbluhm 
406bd8f1dc3Sbluhm START_TEST(test_utf16_le_epilog_newline) {
407bd8f1dc3Sbluhm   unsigned int first_chunk_bytes = 17;
408bd8f1dc3Sbluhm   char text[] = "\xFF\xFE"                  /* BOM */
409bd8f1dc3Sbluhm                 "<\000e\000/\000>\000"      /* document element */
410bd8f1dc3Sbluhm                 "\r\000\n\000\r\000\n\000"; /* epilog */
411bd8f1dc3Sbluhm 
412bd8f1dc3Sbluhm   if (first_chunk_bytes >= sizeof(text) - 1)
413bd8f1dc3Sbluhm     fail("bad value of first_chunk_bytes");
414bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, first_chunk_bytes, XML_FALSE)
415bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
416bd8f1dc3Sbluhm     xml_failure(g_parser);
417bd8f1dc3Sbluhm   else {
418bd8f1dc3Sbluhm     enum XML_Status rc;
419bd8f1dc3Sbluhm     rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes,
420bd8f1dc3Sbluhm                                  sizeof(text) - first_chunk_bytes - 1,
421bd8f1dc3Sbluhm                                  XML_TRUE);
422bd8f1dc3Sbluhm     if (rc == XML_STATUS_ERROR)
423bd8f1dc3Sbluhm       xml_failure(g_parser);
424bd8f1dc3Sbluhm   }
425bd8f1dc3Sbluhm }
426bd8f1dc3Sbluhm END_TEST
427bd8f1dc3Sbluhm 
428bd8f1dc3Sbluhm /* Test that an outright lie in the encoding is faulted */
429bd8f1dc3Sbluhm START_TEST(test_not_utf16) {
430bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='utf-16'?>"
431bd8f1dc3Sbluhm                      "<doc>Hi</doc>";
432bd8f1dc3Sbluhm 
433bd8f1dc3Sbluhm   /* Use a handler to provoke the appropriate code paths */
434bd8f1dc3Sbluhm   XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler);
435bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_INCORRECT_ENCODING,
436bd8f1dc3Sbluhm                  "UTF-16 declared in UTF-8 not faulted");
437bd8f1dc3Sbluhm }
438bd8f1dc3Sbluhm END_TEST
439bd8f1dc3Sbluhm 
440bd8f1dc3Sbluhm /* Test that an unknown encoding is rejected */
441bd8f1dc3Sbluhm START_TEST(test_bad_encoding) {
442bd8f1dc3Sbluhm   const char *text = "<doc>Hi</doc>";
443bd8f1dc3Sbluhm 
444bd8f1dc3Sbluhm   if (! XML_SetEncoding(g_parser, XCS("unknown-encoding")))
445bd8f1dc3Sbluhm     fail("XML_SetEncoding failed");
446bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
447bd8f1dc3Sbluhm                  "Unknown encoding not faulted");
448bd8f1dc3Sbluhm }
449bd8f1dc3Sbluhm END_TEST
450bd8f1dc3Sbluhm 
451bd8f1dc3Sbluhm /* Regression test for SF bug #481609, #774028. */
452bd8f1dc3Sbluhm START_TEST(test_latin1_umlauts) {
453bd8f1dc3Sbluhm   const char *text
454bd8f1dc3Sbluhm       = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
455bd8f1dc3Sbluhm         "<e a='\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; >'\n"
456bd8f1dc3Sbluhm         "  >\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; ></e>";
457bd8f1dc3Sbluhm #ifdef XML_UNICODE
458bd8f1dc3Sbluhm   /* Expected results in UTF-16 */
459bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ")
460bd8f1dc3Sbluhm       XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >");
461bd8f1dc3Sbluhm #else
462bd8f1dc3Sbluhm   /* Expected results in UTF-8 */
463bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ")
464bd8f1dc3Sbluhm       XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >");
465bd8f1dc3Sbluhm #endif
466bd8f1dc3Sbluhm 
467bd8f1dc3Sbluhm   run_character_check(text, expected);
468bd8f1dc3Sbluhm   XML_ParserReset(g_parser, NULL);
469bd8f1dc3Sbluhm   run_attribute_check(text, expected);
470bd8f1dc3Sbluhm   /* Repeat with a default handler */
471bd8f1dc3Sbluhm   XML_ParserReset(g_parser, NULL);
472bd8f1dc3Sbluhm   XML_SetDefaultHandler(g_parser, dummy_default_handler);
473bd8f1dc3Sbluhm   run_character_check(text, expected);
474bd8f1dc3Sbluhm   XML_ParserReset(g_parser, NULL);
475bd8f1dc3Sbluhm   XML_SetDefaultHandler(g_parser, dummy_default_handler);
476bd8f1dc3Sbluhm   run_attribute_check(text, expected);
477bd8f1dc3Sbluhm }
478bd8f1dc3Sbluhm END_TEST
479bd8f1dc3Sbluhm 
480bd8f1dc3Sbluhm /* Test that an element name with a 4-byte UTF-8 character is rejected */
481bd8f1dc3Sbluhm START_TEST(test_long_utf8_character) {
482bd8f1dc3Sbluhm   const char *text
483bd8f1dc3Sbluhm       = "<?xml version='1.0' encoding='utf-8'?>\n"
484bd8f1dc3Sbluhm         /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */
485bd8f1dc3Sbluhm         "<do\xf0\x90\x80\x80/>";
486bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_INVALID_TOKEN,
487bd8f1dc3Sbluhm                  "4-byte UTF-8 character in element name not faulted");
488bd8f1dc3Sbluhm }
489bd8f1dc3Sbluhm END_TEST
490bd8f1dc3Sbluhm 
491bd8f1dc3Sbluhm /* Test that a long latin-1 attribute (too long to convert in one go)
492bd8f1dc3Sbluhm  * is correctly converted
493bd8f1dc3Sbluhm  */
494bd8f1dc3Sbluhm START_TEST(test_long_latin1_attribute) {
495bd8f1dc3Sbluhm   const char *text
496bd8f1dc3Sbluhm       = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
497bd8f1dc3Sbluhm         "<doc att='"
498bd8f1dc3Sbluhm         /* 64 characters per line */
499bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
500bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
501bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
502bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
503bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
504bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
505bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
506bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
507bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
508bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
509bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
510bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
511bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
512bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
513bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
514bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO"
515bd8f1dc3Sbluhm         /* Last character splits across a buffer boundary */
516bd8f1dc3Sbluhm         "\xe4'>\n</doc>";
517bd8f1dc3Sbluhm 
518bd8f1dc3Sbluhm   const XML_Char *expected =
519bd8f1dc3Sbluhm       /* 64 characters per line */
520bd8f1dc3Sbluhm       /* clang-format off */
521bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
522bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
523bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
524bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
525bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
526bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
527bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
528bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
529bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
530bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
531bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
532bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
533bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
534bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
535bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
536bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO")
537bd8f1dc3Sbluhm   /* clang-format on */
538bd8f1dc3Sbluhm #ifdef XML_UNICODE
539bd8f1dc3Sbluhm                                                   XCS("\x00e4");
540bd8f1dc3Sbluhm #else
541bd8f1dc3Sbluhm                                                   XCS("\xc3\xa4");
542bd8f1dc3Sbluhm #endif
543bd8f1dc3Sbluhm 
544bd8f1dc3Sbluhm   run_attribute_check(text, expected);
545bd8f1dc3Sbluhm }
546bd8f1dc3Sbluhm END_TEST
547bd8f1dc3Sbluhm 
548bd8f1dc3Sbluhm /* Test that a long ASCII attribute (too long to convert in one go)
549bd8f1dc3Sbluhm  * is correctly converted
550bd8f1dc3Sbluhm  */
551bd8f1dc3Sbluhm START_TEST(test_long_ascii_attribute) {
552bd8f1dc3Sbluhm   const char *text
553bd8f1dc3Sbluhm       = "<?xml version='1.0' encoding='us-ascii'?>\n"
554bd8f1dc3Sbluhm         "<doc att='"
555bd8f1dc3Sbluhm         /* 64 characters per line */
556bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
557bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
558bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
559bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
560bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
561bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
562bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
563bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
564bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
565bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
566bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
567bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
568bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
569bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
570bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
571bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
572bd8f1dc3Sbluhm         "01234'>\n</doc>";
573bd8f1dc3Sbluhm   const XML_Char *expected =
574bd8f1dc3Sbluhm       /* 64 characters per line */
575bd8f1dc3Sbluhm       /* clang-format off */
576bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
577bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
578bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
579bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
580bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
581bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
582bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
583bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
584bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
585bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
586bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
587bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
588bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
589bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
590bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
591bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
592bd8f1dc3Sbluhm         XCS("01234");
593bd8f1dc3Sbluhm   /* clang-format on */
594bd8f1dc3Sbluhm 
595bd8f1dc3Sbluhm   run_attribute_check(text, expected);
596bd8f1dc3Sbluhm }
597bd8f1dc3Sbluhm END_TEST
598bd8f1dc3Sbluhm 
599bd8f1dc3Sbluhm /* Regression test #1 for SF bug #653180. */
600bd8f1dc3Sbluhm START_TEST(test_line_number_after_parse) {
601bd8f1dc3Sbluhm   const char *text = "<tag>\n"
602bd8f1dc3Sbluhm                      "\n"
603bd8f1dc3Sbluhm                      "\n</tag>";
604bd8f1dc3Sbluhm   XML_Size lineno;
605bd8f1dc3Sbluhm 
606bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
607bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
608bd8f1dc3Sbluhm     xml_failure(g_parser);
609bd8f1dc3Sbluhm   lineno = XML_GetCurrentLineNumber(g_parser);
610bd8f1dc3Sbluhm   if (lineno != 4) {
611bd8f1dc3Sbluhm     char buffer[100];
612bd8f1dc3Sbluhm     snprintf(buffer, sizeof(buffer),
613bd8f1dc3Sbluhm              "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno);
614bd8f1dc3Sbluhm     fail(buffer);
615bd8f1dc3Sbluhm   }
616bd8f1dc3Sbluhm }
617bd8f1dc3Sbluhm END_TEST
618bd8f1dc3Sbluhm 
619bd8f1dc3Sbluhm /* Regression test #2 for SF bug #653180. */
620bd8f1dc3Sbluhm START_TEST(test_column_number_after_parse) {
621bd8f1dc3Sbluhm   const char *text = "<tag></tag>";
622bd8f1dc3Sbluhm   XML_Size colno;
623bd8f1dc3Sbluhm 
624bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
625bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
626bd8f1dc3Sbluhm     xml_failure(g_parser);
627bd8f1dc3Sbluhm   colno = XML_GetCurrentColumnNumber(g_parser);
628bd8f1dc3Sbluhm   if (colno != 11) {
629bd8f1dc3Sbluhm     char buffer[100];
630bd8f1dc3Sbluhm     snprintf(buffer, sizeof(buffer),
631bd8f1dc3Sbluhm              "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno);
632bd8f1dc3Sbluhm     fail(buffer);
633bd8f1dc3Sbluhm   }
634bd8f1dc3Sbluhm }
635bd8f1dc3Sbluhm END_TEST
636bd8f1dc3Sbluhm 
637bd8f1dc3Sbluhm /* Regression test #3 for SF bug #653180. */
638bd8f1dc3Sbluhm START_TEST(test_line_and_column_numbers_inside_handlers) {
639bd8f1dc3Sbluhm   const char *text = "<a>\n"      /* Unix end-of-line */
640bd8f1dc3Sbluhm                      "  <b>\r\n"  /* Windows end-of-line */
641bd8f1dc3Sbluhm                      "    <c/>\r" /* Mac OS end-of-line */
642bd8f1dc3Sbluhm                      "  </b>\n"
643bd8f1dc3Sbluhm                      "  <d>\n"
644bd8f1dc3Sbluhm                      "    <f/>\n"
645bd8f1dc3Sbluhm                      "  </d>\n"
646bd8f1dc3Sbluhm                      "</a>";
647bd8f1dc3Sbluhm   const StructDataEntry expected[]
648bd8f1dc3Sbluhm       = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG},
649bd8f1dc3Sbluhm          {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG},
650bd8f1dc3Sbluhm          {XCS("b"), 2, 4, STRUCT_END_TAG},   {XCS("d"), 2, 5, STRUCT_START_TAG},
651bd8f1dc3Sbluhm          {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG},
652bd8f1dc3Sbluhm          {XCS("d"), 2, 7, STRUCT_END_TAG},   {XCS("a"), 0, 8, STRUCT_END_TAG}};
653bd8f1dc3Sbluhm   const int expected_count = sizeof(expected) / sizeof(StructDataEntry);
654bd8f1dc3Sbluhm   StructData storage;
655bd8f1dc3Sbluhm 
656bd8f1dc3Sbluhm   StructData_Init(&storage);
657bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
658bd8f1dc3Sbluhm   XML_SetStartElementHandler(g_parser, start_element_event_handler2);
659bd8f1dc3Sbluhm   XML_SetEndElementHandler(g_parser, end_element_event_handler2);
660bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
661bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
662bd8f1dc3Sbluhm     xml_failure(g_parser);
663bd8f1dc3Sbluhm 
664bd8f1dc3Sbluhm   StructData_CheckItems(&storage, expected, expected_count);
665bd8f1dc3Sbluhm   StructData_Dispose(&storage);
666bd8f1dc3Sbluhm }
667bd8f1dc3Sbluhm END_TEST
668bd8f1dc3Sbluhm 
669bd8f1dc3Sbluhm /* Regression test #4 for SF bug #653180. */
670bd8f1dc3Sbluhm START_TEST(test_line_number_after_error) {
671bd8f1dc3Sbluhm   const char *text = "<a>\n"
672bd8f1dc3Sbluhm                      "  <b>\n"
673bd8f1dc3Sbluhm                      "  </a>"; /* missing </b> */
674bd8f1dc3Sbluhm   XML_Size lineno;
675bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
676bd8f1dc3Sbluhm       != XML_STATUS_ERROR)
677bd8f1dc3Sbluhm     fail("Expected a parse error");
678bd8f1dc3Sbluhm 
679bd8f1dc3Sbluhm   lineno = XML_GetCurrentLineNumber(g_parser);
680bd8f1dc3Sbluhm   if (lineno != 3) {
681bd8f1dc3Sbluhm     char buffer[100];
682bd8f1dc3Sbluhm     snprintf(buffer, sizeof(buffer),
683bd8f1dc3Sbluhm              "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno);
684bd8f1dc3Sbluhm     fail(buffer);
685bd8f1dc3Sbluhm   }
686bd8f1dc3Sbluhm }
687bd8f1dc3Sbluhm END_TEST
688bd8f1dc3Sbluhm 
689bd8f1dc3Sbluhm /* Regression test #5 for SF bug #653180. */
690bd8f1dc3Sbluhm START_TEST(test_column_number_after_error) {
691bd8f1dc3Sbluhm   const char *text = "<a>\n"
692bd8f1dc3Sbluhm                      "  <b>\n"
693bd8f1dc3Sbluhm                      "  </a>"; /* missing </b> */
694bd8f1dc3Sbluhm   XML_Size colno;
695bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
696bd8f1dc3Sbluhm       != XML_STATUS_ERROR)
697bd8f1dc3Sbluhm     fail("Expected a parse error");
698bd8f1dc3Sbluhm 
699bd8f1dc3Sbluhm   colno = XML_GetCurrentColumnNumber(g_parser);
700bd8f1dc3Sbluhm   if (colno != 4) {
701bd8f1dc3Sbluhm     char buffer[100];
702bd8f1dc3Sbluhm     snprintf(buffer, sizeof(buffer),
703bd8f1dc3Sbluhm              "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno);
704bd8f1dc3Sbluhm     fail(buffer);
705bd8f1dc3Sbluhm   }
706bd8f1dc3Sbluhm }
707bd8f1dc3Sbluhm END_TEST
708bd8f1dc3Sbluhm 
709bd8f1dc3Sbluhm /* Regression test for SF bug #478332. */
710bd8f1dc3Sbluhm START_TEST(test_really_long_lines) {
711bd8f1dc3Sbluhm   /* This parses an input line longer than INIT_DATA_BUF_SIZE
712bd8f1dc3Sbluhm      characters long (defined to be 1024 in xmlparse.c).  We take a
713bd8f1dc3Sbluhm      really cheesy approach to building the input buffer, because
714bd8f1dc3Sbluhm      this avoids writing bugs in buffer-filling code.
715bd8f1dc3Sbluhm   */
716bd8f1dc3Sbluhm   const char *text
717bd8f1dc3Sbluhm       = "<e>"
718bd8f1dc3Sbluhm         /* 64 chars */
719bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
720bd8f1dc3Sbluhm         /* until we have at least 1024 characters on the line: */
721bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
722bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
723bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
724bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
725bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
726bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
727bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
728bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
729bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
730bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
731bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
732bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
733bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
734bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
735bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
736bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
737bd8f1dc3Sbluhm         "</e>";
738bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
739bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
740bd8f1dc3Sbluhm     xml_failure(g_parser);
741bd8f1dc3Sbluhm }
742bd8f1dc3Sbluhm END_TEST
743bd8f1dc3Sbluhm 
744bd8f1dc3Sbluhm /* Test cdata processing across a buffer boundary */
745bd8f1dc3Sbluhm START_TEST(test_really_long_encoded_lines) {
746bd8f1dc3Sbluhm   /* As above, except that we want to provoke an output buffer
747bd8f1dc3Sbluhm    * overflow with a non-trivial encoding.  For this we need to pass
748bd8f1dc3Sbluhm    * the whole cdata in one go, not byte-by-byte.
749bd8f1dc3Sbluhm    */
750bd8f1dc3Sbluhm   void *buffer;
751bd8f1dc3Sbluhm   const char *text
752bd8f1dc3Sbluhm       = "<?xml version='1.0' encoding='iso-8859-1'?>"
753bd8f1dc3Sbluhm         "<e>"
754bd8f1dc3Sbluhm         /* 64 chars */
755bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
756bd8f1dc3Sbluhm         /* until we have at least 1024 characters on the line: */
757bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
758bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
759bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
760bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
761bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
762bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
763bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
764bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
765bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
766bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
767bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
768bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
769bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
770bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
771bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
772bd8f1dc3Sbluhm         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
773bd8f1dc3Sbluhm         "</e>";
774bd8f1dc3Sbluhm   int parse_len = (int)strlen(text);
775bd8f1dc3Sbluhm 
776bd8f1dc3Sbluhm   /* Need a cdata handler to provoke the code path we want to test */
777bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler);
778bd8f1dc3Sbluhm   buffer = XML_GetBuffer(g_parser, parse_len);
779bd8f1dc3Sbluhm   if (buffer == NULL)
780bd8f1dc3Sbluhm     fail("Could not allocate parse buffer");
781bd8f1dc3Sbluhm   assert(buffer != NULL);
782bd8f1dc3Sbluhm   memcpy(buffer, text, parse_len);
783bd8f1dc3Sbluhm   if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR)
784bd8f1dc3Sbluhm     xml_failure(g_parser);
785bd8f1dc3Sbluhm }
786bd8f1dc3Sbluhm END_TEST
787bd8f1dc3Sbluhm 
788bd8f1dc3Sbluhm /*
789bd8f1dc3Sbluhm  * Element event tests.
790bd8f1dc3Sbluhm  */
791bd8f1dc3Sbluhm 
792bd8f1dc3Sbluhm START_TEST(test_end_element_events) {
793bd8f1dc3Sbluhm   const char *text = "<a><b><c/></b><d><f/></d></a>";
794bd8f1dc3Sbluhm   const XML_Char *expected = XCS("/c/b/f/d/a");
795bd8f1dc3Sbluhm   CharData storage;
796bd8f1dc3Sbluhm 
797bd8f1dc3Sbluhm   CharData_Init(&storage);
798bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
799bd8f1dc3Sbluhm   XML_SetEndElementHandler(g_parser, end_element_event_handler);
800bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
801bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
802bd8f1dc3Sbluhm     xml_failure(g_parser);
803bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
804bd8f1dc3Sbluhm }
805bd8f1dc3Sbluhm END_TEST
806bd8f1dc3Sbluhm 
807bd8f1dc3Sbluhm /*
808bd8f1dc3Sbluhm  * Attribute tests.
809bd8f1dc3Sbluhm  */
810bd8f1dc3Sbluhm 
811bd8f1dc3Sbluhm /* Helper used by the following tests; this checks any "attr" and "refs"
812bd8f1dc3Sbluhm    attributes to make sure whitespace has been normalized.
813bd8f1dc3Sbluhm 
814bd8f1dc3Sbluhm    Return true if whitespace has been normalized in a string, using
815bd8f1dc3Sbluhm    the rules for attribute value normalization.  The 'is_cdata' flag
816bd8f1dc3Sbluhm    is needed since CDATA attributes don't need to have multiple
817bd8f1dc3Sbluhm    whitespace characters collapsed to a single space, while other
818bd8f1dc3Sbluhm    attribute data types do.  (Section 3.3.3 of the recommendation.)
819bd8f1dc3Sbluhm */
820bd8f1dc3Sbluhm static int
821bd8f1dc3Sbluhm is_whitespace_normalized(const XML_Char *s, int is_cdata) {
822bd8f1dc3Sbluhm   int blanks = 0;
823bd8f1dc3Sbluhm   int at_start = 1;
824bd8f1dc3Sbluhm   while (*s) {
825bd8f1dc3Sbluhm     if (*s == XCS(' '))
826bd8f1dc3Sbluhm       ++blanks;
827bd8f1dc3Sbluhm     else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r'))
828bd8f1dc3Sbluhm       return 0;
829bd8f1dc3Sbluhm     else {
830bd8f1dc3Sbluhm       if (at_start) {
831bd8f1dc3Sbluhm         at_start = 0;
832bd8f1dc3Sbluhm         if (blanks && ! is_cdata)
833bd8f1dc3Sbluhm           /* illegal leading blanks */
834bd8f1dc3Sbluhm           return 0;
835bd8f1dc3Sbluhm       } else if (blanks > 1 && ! is_cdata)
836bd8f1dc3Sbluhm         return 0;
837bd8f1dc3Sbluhm       blanks = 0;
838bd8f1dc3Sbluhm     }
839bd8f1dc3Sbluhm     ++s;
840bd8f1dc3Sbluhm   }
841bd8f1dc3Sbluhm   if (blanks && ! is_cdata)
842bd8f1dc3Sbluhm     return 0;
843bd8f1dc3Sbluhm   return 1;
844bd8f1dc3Sbluhm }
845bd8f1dc3Sbluhm 
846bd8f1dc3Sbluhm /* Check the attribute whitespace checker: */
847bd8f1dc3Sbluhm START_TEST(test_helper_is_whitespace_normalized) {
848bd8f1dc3Sbluhm   assert(is_whitespace_normalized(XCS("abc"), 0));
849bd8f1dc3Sbluhm   assert(is_whitespace_normalized(XCS("abc"), 1));
850bd8f1dc3Sbluhm   assert(is_whitespace_normalized(XCS("abc def ghi"), 0));
851bd8f1dc3Sbluhm   assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
852bd8f1dc3Sbluhm   assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0));
853bd8f1dc3Sbluhm   assert(is_whitespace_normalized(XCS(" abc def ghi"), 1));
854bd8f1dc3Sbluhm   assert(! is_whitespace_normalized(XCS("abc  def ghi"), 0));
855bd8f1dc3Sbluhm   assert(is_whitespace_normalized(XCS("abc  def ghi"), 1));
856bd8f1dc3Sbluhm   assert(! is_whitespace_normalized(XCS("abc def ghi "), 0));
857bd8f1dc3Sbluhm   assert(is_whitespace_normalized(XCS("abc def ghi "), 1));
858bd8f1dc3Sbluhm   assert(! is_whitespace_normalized(XCS(" "), 0));
859bd8f1dc3Sbluhm   assert(is_whitespace_normalized(XCS(" "), 1));
860bd8f1dc3Sbluhm   assert(! is_whitespace_normalized(XCS("\t"), 0));
861bd8f1dc3Sbluhm   assert(! is_whitespace_normalized(XCS("\t"), 1));
862bd8f1dc3Sbluhm   assert(! is_whitespace_normalized(XCS("\n"), 0));
863bd8f1dc3Sbluhm   assert(! is_whitespace_normalized(XCS("\n"), 1));
864bd8f1dc3Sbluhm   assert(! is_whitespace_normalized(XCS("\r"), 0));
865bd8f1dc3Sbluhm   assert(! is_whitespace_normalized(XCS("\r"), 1));
866bd8f1dc3Sbluhm   assert(! is_whitespace_normalized(XCS("abc\t def"), 1));
867bd8f1dc3Sbluhm }
868bd8f1dc3Sbluhm END_TEST
869bd8f1dc3Sbluhm 
870bd8f1dc3Sbluhm static void XMLCALL
871bd8f1dc3Sbluhm check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name,
872bd8f1dc3Sbluhm                                           const XML_Char **atts) {
873bd8f1dc3Sbluhm   int i;
874bd8f1dc3Sbluhm   UNUSED_P(userData);
875bd8f1dc3Sbluhm   UNUSED_P(name);
876bd8f1dc3Sbluhm   for (i = 0; atts[i] != NULL; i += 2) {
877bd8f1dc3Sbluhm     const XML_Char *attrname = atts[i];
878bd8f1dc3Sbluhm     const XML_Char *value = atts[i + 1];
879bd8f1dc3Sbluhm     if (xcstrcmp(XCS("attr"), attrname) == 0
880bd8f1dc3Sbluhm         || xcstrcmp(XCS("ents"), attrname) == 0
881bd8f1dc3Sbluhm         || xcstrcmp(XCS("refs"), attrname) == 0) {
882bd8f1dc3Sbluhm       if (! is_whitespace_normalized(value, 0)) {
883bd8f1dc3Sbluhm         char buffer[256];
884bd8f1dc3Sbluhm         snprintf(buffer, sizeof(buffer),
885bd8f1dc3Sbluhm                  "attribute value not normalized: %" XML_FMT_STR
886bd8f1dc3Sbluhm                  "='%" XML_FMT_STR "'",
887bd8f1dc3Sbluhm                  attrname, value);
888bd8f1dc3Sbluhm         fail(buffer);
889bd8f1dc3Sbluhm       }
890bd8f1dc3Sbluhm     }
891bd8f1dc3Sbluhm   }
892bd8f1dc3Sbluhm }
893bd8f1dc3Sbluhm 
894bd8f1dc3Sbluhm START_TEST(test_attr_whitespace_normalization) {
895bd8f1dc3Sbluhm   const char *text
896bd8f1dc3Sbluhm       = "<!DOCTYPE doc [\n"
897bd8f1dc3Sbluhm         "  <!ATTLIST doc\n"
898bd8f1dc3Sbluhm         "            attr NMTOKENS #REQUIRED\n"
899bd8f1dc3Sbluhm         "            ents ENTITIES #REQUIRED\n"
900bd8f1dc3Sbluhm         "            refs IDREFS   #REQUIRED>\n"
901bd8f1dc3Sbluhm         "]>\n"
902bd8f1dc3Sbluhm         "<doc attr='    a  b c\t\td\te\t' refs=' id-1   \t  id-2\t\t'  \n"
903bd8f1dc3Sbluhm         "     ents=' ent-1   \t\r\n"
904bd8f1dc3Sbluhm         "            ent-2  ' >\n"
905bd8f1dc3Sbluhm         "  <e id='id-1'/>\n"
906bd8f1dc3Sbluhm         "  <e id='id-2'/>\n"
907bd8f1dc3Sbluhm         "</doc>";
908bd8f1dc3Sbluhm 
909bd8f1dc3Sbluhm   XML_SetStartElementHandler(g_parser,
910bd8f1dc3Sbluhm                              check_attr_contains_normalized_whitespace);
911bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
912bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
913bd8f1dc3Sbluhm     xml_failure(g_parser);
914bd8f1dc3Sbluhm }
915bd8f1dc3Sbluhm END_TEST
916bd8f1dc3Sbluhm 
917bd8f1dc3Sbluhm /*
918bd8f1dc3Sbluhm  * XML declaration tests.
919bd8f1dc3Sbluhm  */
920bd8f1dc3Sbluhm 
921bd8f1dc3Sbluhm START_TEST(test_xmldecl_misplaced) {
922bd8f1dc3Sbluhm   expect_failure("\n"
923bd8f1dc3Sbluhm                  "<?xml version='1.0'?>\n"
924bd8f1dc3Sbluhm                  "<a/>",
925bd8f1dc3Sbluhm                  XML_ERROR_MISPLACED_XML_PI,
926bd8f1dc3Sbluhm                  "failed to report misplaced XML declaration");
927bd8f1dc3Sbluhm }
928bd8f1dc3Sbluhm END_TEST
929bd8f1dc3Sbluhm 
930bd8f1dc3Sbluhm START_TEST(test_xmldecl_invalid) {
931bd8f1dc3Sbluhm   expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL,
932bd8f1dc3Sbluhm                  "Failed to report invalid XML declaration");
933bd8f1dc3Sbluhm }
934bd8f1dc3Sbluhm END_TEST
935bd8f1dc3Sbluhm 
936bd8f1dc3Sbluhm START_TEST(test_xmldecl_missing_attr) {
937bd8f1dc3Sbluhm   expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL,
938bd8f1dc3Sbluhm                  "Failed to report missing XML declaration attribute");
939bd8f1dc3Sbluhm }
940bd8f1dc3Sbluhm END_TEST
941bd8f1dc3Sbluhm 
942bd8f1dc3Sbluhm START_TEST(test_xmldecl_missing_value) {
943bd8f1dc3Sbluhm   expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n"
944bd8f1dc3Sbluhm                  "<doc/>",
945bd8f1dc3Sbluhm                  XML_ERROR_XML_DECL,
946bd8f1dc3Sbluhm                  "Failed to report missing attribute value");
947bd8f1dc3Sbluhm }
948bd8f1dc3Sbluhm END_TEST
949bd8f1dc3Sbluhm 
950bd8f1dc3Sbluhm /* Regression test for SF bug #584832. */
951bd8f1dc3Sbluhm START_TEST(test_unknown_encoding_internal_entity) {
952bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
953bd8f1dc3Sbluhm                      "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
954bd8f1dc3Sbluhm                      "<test a='&foo;'/>";
955bd8f1dc3Sbluhm 
956bd8f1dc3Sbluhm   XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL);
957bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
958bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
959bd8f1dc3Sbluhm     xml_failure(g_parser);
960bd8f1dc3Sbluhm }
961bd8f1dc3Sbluhm END_TEST
962bd8f1dc3Sbluhm 
963bd8f1dc3Sbluhm /* Test unrecognised encoding handler */
964bd8f1dc3Sbluhm START_TEST(test_unrecognised_encoding_internal_entity) {
965bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
966bd8f1dc3Sbluhm                      "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
967bd8f1dc3Sbluhm                      "<test a='&foo;'/>";
968bd8f1dc3Sbluhm 
969bd8f1dc3Sbluhm   XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL);
970bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
971bd8f1dc3Sbluhm       != XML_STATUS_ERROR)
972bd8f1dc3Sbluhm     fail("Unrecognised encoding not rejected");
973bd8f1dc3Sbluhm }
974bd8f1dc3Sbluhm END_TEST
975bd8f1dc3Sbluhm 
976bd8f1dc3Sbluhm /* Regression test for SF bug #620106. */
977bd8f1dc3Sbluhm START_TEST(test_ext_entity_set_encoding) {
978bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
979bd8f1dc3Sbluhm                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
980bd8f1dc3Sbluhm                      "]>\n"
981bd8f1dc3Sbluhm                      "<doc>&en;</doc>";
982bd8f1dc3Sbluhm   ExtTest test_data
983bd8f1dc3Sbluhm       = {/* This text says it's an unsupported encoding, but it's really
984bd8f1dc3Sbluhm             UTF-8, which we tell Expat using XML_SetEncoding().
985bd8f1dc3Sbluhm          */
986bd8f1dc3Sbluhm          "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL};
987bd8f1dc3Sbluhm #ifdef XML_UNICODE
988bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\x00e9");
989bd8f1dc3Sbluhm #else
990bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\xc3\xa9");
991bd8f1dc3Sbluhm #endif
992bd8f1dc3Sbluhm 
993bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
994bd8f1dc3Sbluhm   run_ext_character_check(text, &test_data, expected);
995bd8f1dc3Sbluhm }
996bd8f1dc3Sbluhm END_TEST
997bd8f1dc3Sbluhm 
998bd8f1dc3Sbluhm /* Test external entities with no handler */
999bd8f1dc3Sbluhm START_TEST(test_ext_entity_no_handler) {
1000bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
1001bd8f1dc3Sbluhm                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1002bd8f1dc3Sbluhm                      "]>\n"
1003bd8f1dc3Sbluhm                      "<doc>&en;</doc>";
1004bd8f1dc3Sbluhm 
1005bd8f1dc3Sbluhm   XML_SetDefaultHandler(g_parser, dummy_default_handler);
1006bd8f1dc3Sbluhm   run_character_check(text, XCS(""));
1007bd8f1dc3Sbluhm }
1008bd8f1dc3Sbluhm END_TEST
1009bd8f1dc3Sbluhm 
1010bd8f1dc3Sbluhm /* Test UTF-8 BOM is accepted */
1011bd8f1dc3Sbluhm START_TEST(test_ext_entity_set_bom) {
1012bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
1013bd8f1dc3Sbluhm                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1014bd8f1dc3Sbluhm                      "]>\n"
1015bd8f1dc3Sbluhm                      "<doc>&en;</doc>";
1016bd8f1dc3Sbluhm   ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */
1017bd8f1dc3Sbluhm                        "<?xml encoding='iso-8859-3'?>"
1018bd8f1dc3Sbluhm                        "\xC3\xA9",
1019bd8f1dc3Sbluhm                        XCS("utf-8"), NULL};
1020bd8f1dc3Sbluhm #ifdef XML_UNICODE
1021bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\x00e9");
1022bd8f1dc3Sbluhm #else
1023bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\xc3\xa9");
1024bd8f1dc3Sbluhm #endif
1025bd8f1dc3Sbluhm 
1026bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1027bd8f1dc3Sbluhm   run_ext_character_check(text, &test_data, expected);
1028bd8f1dc3Sbluhm }
1029bd8f1dc3Sbluhm END_TEST
1030bd8f1dc3Sbluhm 
1031bd8f1dc3Sbluhm /* Test that bad encodings are faulted */
1032bd8f1dc3Sbluhm START_TEST(test_ext_entity_bad_encoding) {
1033bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
1034bd8f1dc3Sbluhm                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1035bd8f1dc3Sbluhm                      "]>\n"
1036bd8f1dc3Sbluhm                      "<doc>&en;</doc>";
1037bd8f1dc3Sbluhm   ExtFaults fault
1038bd8f1dc3Sbluhm       = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted",
1039bd8f1dc3Sbluhm          XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING};
1040bd8f1dc3Sbluhm 
1041bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1042bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &fault);
1043bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1044bd8f1dc3Sbluhm                  "Bad encoding should not have been accepted");
1045bd8f1dc3Sbluhm }
1046bd8f1dc3Sbluhm END_TEST
1047bd8f1dc3Sbluhm 
1048bd8f1dc3Sbluhm /* Try handing an invalid encoding to an external entity parser */
1049bd8f1dc3Sbluhm START_TEST(test_ext_entity_bad_encoding_2) {
1050bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1051bd8f1dc3Sbluhm                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1052bd8f1dc3Sbluhm                      "<doc>&entity;</doc>";
1053bd8f1dc3Sbluhm   ExtFaults fault
1054bd8f1dc3Sbluhm       = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted",
1055bd8f1dc3Sbluhm          XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING};
1056bd8f1dc3Sbluhm 
1057bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1058bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1059bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &fault);
1060bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1061bd8f1dc3Sbluhm                  "Bad encoding not faulted in external entity handler");
1062bd8f1dc3Sbluhm }
1063bd8f1dc3Sbluhm END_TEST
1064bd8f1dc3Sbluhm 
1065bd8f1dc3Sbluhm /* Test that no error is reported for unknown entities if we don't
1066bd8f1dc3Sbluhm    read an external subset.  This was fixed in Expat 1.95.5.
1067bd8f1dc3Sbluhm */
1068bd8f1dc3Sbluhm START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
1069bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
1070bd8f1dc3Sbluhm                      "<doc>&entity;</doc>";
1071bd8f1dc3Sbluhm 
1072bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1073bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
1074bd8f1dc3Sbluhm     xml_failure(g_parser);
1075bd8f1dc3Sbluhm }
1076bd8f1dc3Sbluhm END_TEST
1077bd8f1dc3Sbluhm 
1078bd8f1dc3Sbluhm /* Test that an error is reported for unknown entities if we don't
1079bd8f1dc3Sbluhm    have an external subset.
1080bd8f1dc3Sbluhm */
1081bd8f1dc3Sbluhm START_TEST(test_wfc_undeclared_entity_no_external_subset) {
1082bd8f1dc3Sbluhm   expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY,
1083bd8f1dc3Sbluhm                  "Parser did not report undefined entity w/out a DTD.");
1084bd8f1dc3Sbluhm }
1085bd8f1dc3Sbluhm END_TEST
1086bd8f1dc3Sbluhm 
1087bd8f1dc3Sbluhm /* Test that an error is reported for unknown entities if we don't
1088bd8f1dc3Sbluhm    read an external subset, but have been declared standalone.
1089bd8f1dc3Sbluhm */
1090bd8f1dc3Sbluhm START_TEST(test_wfc_undeclared_entity_standalone) {
1091bd8f1dc3Sbluhm   const char *text
1092bd8f1dc3Sbluhm       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1093bd8f1dc3Sbluhm         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1094bd8f1dc3Sbluhm         "<doc>&entity;</doc>";
1095bd8f1dc3Sbluhm 
1096bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1097bd8f1dc3Sbluhm                  "Parser did not report undefined entity (standalone).");
1098bd8f1dc3Sbluhm }
1099bd8f1dc3Sbluhm END_TEST
1100bd8f1dc3Sbluhm 
1101bd8f1dc3Sbluhm /* Test that an error is reported for unknown entities if we have read
1102bd8f1dc3Sbluhm    an external subset, and standalone is true.
1103bd8f1dc3Sbluhm */
1104bd8f1dc3Sbluhm START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
1105bd8f1dc3Sbluhm   const char *text
1106bd8f1dc3Sbluhm       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1107bd8f1dc3Sbluhm         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1108bd8f1dc3Sbluhm         "<doc>&entity;</doc>";
1109bd8f1dc3Sbluhm   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1110bd8f1dc3Sbluhm 
1111bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1112bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &test_data);
1113bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1114bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1115bd8f1dc3Sbluhm                  "Parser did not report undefined entity (external DTD).");
1116bd8f1dc3Sbluhm }
1117bd8f1dc3Sbluhm END_TEST
1118bd8f1dc3Sbluhm 
1119bd8f1dc3Sbluhm /* Test that external entity handling is not done if the parsing flag
1120bd8f1dc3Sbluhm  * is set to UNLESS_STANDALONE
1121bd8f1dc3Sbluhm  */
1122bd8f1dc3Sbluhm START_TEST(test_entity_with_external_subset_unless_standalone) {
1123bd8f1dc3Sbluhm   const char *text
1124bd8f1dc3Sbluhm       = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1125bd8f1dc3Sbluhm         "<!DOCTYPE doc SYSTEM 'foo'>\n"
1126bd8f1dc3Sbluhm         "<doc>&entity;</doc>";
1127bd8f1dc3Sbluhm   ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL};
1128bd8f1dc3Sbluhm 
1129bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser,
1130bd8f1dc3Sbluhm                             XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1131bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &test_data);
1132bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1133bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1134bd8f1dc3Sbluhm                  "Parser did not report undefined entity");
1135bd8f1dc3Sbluhm }
1136bd8f1dc3Sbluhm END_TEST
1137bd8f1dc3Sbluhm 
1138bd8f1dc3Sbluhm /* Test that no error is reported for unknown entities if we have read
1139bd8f1dc3Sbluhm    an external subset, and standalone is false.
1140bd8f1dc3Sbluhm */
1141bd8f1dc3Sbluhm START_TEST(test_wfc_undeclared_entity_with_external_subset) {
1142bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1143bd8f1dc3Sbluhm                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1144bd8f1dc3Sbluhm                      "<doc>&entity;</doc>";
1145bd8f1dc3Sbluhm   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1146bd8f1dc3Sbluhm 
1147bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1148bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1149bd8f1dc3Sbluhm   run_ext_character_check(text, &test_data, XCS(""));
1150bd8f1dc3Sbluhm }
1151bd8f1dc3Sbluhm END_TEST
1152bd8f1dc3Sbluhm 
1153bd8f1dc3Sbluhm /* Test that an error is reported if our NotStandalone handler fails */
1154bd8f1dc3Sbluhm START_TEST(test_not_standalone_handler_reject) {
1155bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1156bd8f1dc3Sbluhm                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1157bd8f1dc3Sbluhm                      "<doc>&entity;</doc>";
1158bd8f1dc3Sbluhm   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1159bd8f1dc3Sbluhm 
1160bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1161bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &test_data);
1162bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1163bd8f1dc3Sbluhm   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1164bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_NOT_STANDALONE,
1165bd8f1dc3Sbluhm                  "NotStandalone handler failed to reject");
1166bd8f1dc3Sbluhm 
1167bd8f1dc3Sbluhm   /* Try again but without external entity handling */
1168bd8f1dc3Sbluhm   XML_ParserReset(g_parser, NULL);
1169bd8f1dc3Sbluhm   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1170bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_NOT_STANDALONE,
1171bd8f1dc3Sbluhm                  "NotStandalone handler failed to reject");
1172bd8f1dc3Sbluhm }
1173bd8f1dc3Sbluhm END_TEST
1174bd8f1dc3Sbluhm 
1175bd8f1dc3Sbluhm /* Test that no error is reported if our NotStandalone handler succeeds */
1176bd8f1dc3Sbluhm START_TEST(test_not_standalone_handler_accept) {
1177bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1178bd8f1dc3Sbluhm                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
1179bd8f1dc3Sbluhm                      "<doc>&entity;</doc>";
1180bd8f1dc3Sbluhm   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1181bd8f1dc3Sbluhm 
1182bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1183bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1184bd8f1dc3Sbluhm   XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1185bd8f1dc3Sbluhm   run_ext_character_check(text, &test_data, XCS(""));
1186bd8f1dc3Sbluhm 
1187bd8f1dc3Sbluhm   /* Repeat without the external entity handler */
1188bd8f1dc3Sbluhm   XML_ParserReset(g_parser, NULL);
1189bd8f1dc3Sbluhm   XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1190bd8f1dc3Sbluhm   run_character_check(text, XCS(""));
1191bd8f1dc3Sbluhm }
1192bd8f1dc3Sbluhm END_TEST
1193bd8f1dc3Sbluhm 
1194bd8f1dc3Sbluhm START_TEST(test_wfc_no_recursive_entity_refs) {
1195bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
1196bd8f1dc3Sbluhm                      "  <!ENTITY entity '&#38;entity;'>\n"
1197bd8f1dc3Sbluhm                      "]>\n"
1198bd8f1dc3Sbluhm                      "<doc>&entity;</doc>";
1199bd8f1dc3Sbluhm 
1200bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF,
1201bd8f1dc3Sbluhm                  "Parser did not report recursive entity reference.");
1202bd8f1dc3Sbluhm }
1203bd8f1dc3Sbluhm END_TEST
1204bd8f1dc3Sbluhm 
1205c033f770Sbluhm START_TEST(test_recursive_external_parameter_entity_2) {
1206c033f770Sbluhm   struct TestCase {
1207c033f770Sbluhm     const char *doc;
1208c033f770Sbluhm     enum XML_Status expectedStatus;
1209c033f770Sbluhm   };
1210c033f770Sbluhm 
1211c033f770Sbluhm   struct TestCase cases[] = {
1212c033f770Sbluhm       {"<!ENTITY % p1 '%p1;'>", XML_STATUS_ERROR},
1213c033f770Sbluhm       {"<!ENTITY % p1 '%p1;'>"
1214c033f770Sbluhm        "<!ENTITY % p1 'first declaration wins'>",
1215c033f770Sbluhm        XML_STATUS_ERROR},
1216c033f770Sbluhm       {"<!ENTITY % p1 'first declaration wins'>"
1217c033f770Sbluhm        "<!ENTITY % p1 '%p1;'>",
1218c033f770Sbluhm        XML_STATUS_OK},
1219c033f770Sbluhm       {"<!ENTITY % p1 '&#37;p1;'>", XML_STATUS_OK},
1220c033f770Sbluhm   };
1221c033f770Sbluhm 
1222c033f770Sbluhm   for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
1223c033f770Sbluhm     const char *const doc = cases[i].doc;
1224c033f770Sbluhm     const enum XML_Status expectedStatus = cases[i].expectedStatus;
1225c033f770Sbluhm     set_subtest("%s", doc);
1226c033f770Sbluhm 
1227c033f770Sbluhm     XML_Parser parser = XML_ParserCreate(NULL);
1228c033f770Sbluhm     assert_true(parser != NULL);
1229c033f770Sbluhm 
1230c033f770Sbluhm     XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
1231c033f770Sbluhm     assert_true(ext_parser != NULL);
1232c033f770Sbluhm 
1233c033f770Sbluhm     const enum XML_Status actualStatus
1234c033f770Sbluhm         = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE);
1235c033f770Sbluhm 
1236c033f770Sbluhm     assert_true(actualStatus == expectedStatus);
1237c033f770Sbluhm     if (actualStatus != XML_STATUS_OK) {
1238c033f770Sbluhm       assert_true(XML_GetErrorCode(ext_parser)
1239c033f770Sbluhm                   == XML_ERROR_RECURSIVE_ENTITY_REF);
1240c033f770Sbluhm     }
1241c033f770Sbluhm 
1242c033f770Sbluhm     XML_ParserFree(ext_parser);
1243c033f770Sbluhm     XML_ParserFree(parser);
1244c033f770Sbluhm   }
1245c033f770Sbluhm }
1246c033f770Sbluhm END_TEST
1247c033f770Sbluhm 
1248bd8f1dc3Sbluhm /* Test incomplete external entities are faulted */
1249bd8f1dc3Sbluhm START_TEST(test_ext_entity_invalid_parse) {
1250bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
1251bd8f1dc3Sbluhm                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1252bd8f1dc3Sbluhm                      "]>\n"
1253bd8f1dc3Sbluhm                      "<doc>&en;</doc>";
1254bd8f1dc3Sbluhm   const ExtFaults faults[]
1255bd8f1dc3Sbluhm       = {{"<", "Incomplete element declaration not faulted", NULL,
1256bd8f1dc3Sbluhm           XML_ERROR_UNCLOSED_TOKEN},
1257bd8f1dc3Sbluhm          {"<\xe2\x82", /* First two bytes of a three-byte char */
1258bd8f1dc3Sbluhm           "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
1259bd8f1dc3Sbluhm          {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL,
1260bd8f1dc3Sbluhm           XML_ERROR_PARTIAL_CHAR},
1261bd8f1dc3Sbluhm          {NULL, NULL, NULL, XML_ERROR_NONE}};
1262bd8f1dc3Sbluhm   const ExtFaults *fault = faults;
1263bd8f1dc3Sbluhm 
1264bd8f1dc3Sbluhm   for (; fault->parse_text != NULL; fault++) {
1265bd8f1dc3Sbluhm     set_subtest("\"%s\"", fault->parse_text);
1266bd8f1dc3Sbluhm     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1267bd8f1dc3Sbluhm     XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1268bd8f1dc3Sbluhm     XML_SetUserData(g_parser, (void *)fault);
1269bd8f1dc3Sbluhm     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1270bd8f1dc3Sbluhm                    "Parser did not report external entity error");
1271bd8f1dc3Sbluhm     XML_ParserReset(g_parser, NULL);
1272bd8f1dc3Sbluhm   }
1273bd8f1dc3Sbluhm }
1274bd8f1dc3Sbluhm END_TEST
1275bd8f1dc3Sbluhm 
1276bd8f1dc3Sbluhm /* Regression test for SF bug #483514. */
1277bd8f1dc3Sbluhm START_TEST(test_dtd_default_handling) {
1278bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
1279bd8f1dc3Sbluhm                      "<!ENTITY e SYSTEM 'http://example.org/e'>\n"
1280bd8f1dc3Sbluhm                      "<!NOTATION n SYSTEM 'http://example.org/n'>\n"
1281bd8f1dc3Sbluhm                      "<!ELEMENT doc EMPTY>\n"
1282bd8f1dc3Sbluhm                      "<!ATTLIST doc a CDATA #IMPLIED>\n"
1283bd8f1dc3Sbluhm                      "<?pi in dtd?>\n"
1284bd8f1dc3Sbluhm                      "<!--comment in dtd-->\n"
1285bd8f1dc3Sbluhm                      "]><doc/>";
1286bd8f1dc3Sbluhm 
1287bd8f1dc3Sbluhm   XML_SetDefaultHandler(g_parser, accumulate_characters);
1288bd8f1dc3Sbluhm   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
1289bd8f1dc3Sbluhm   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
1290bd8f1dc3Sbluhm   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
1291bd8f1dc3Sbluhm   XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
1292bd8f1dc3Sbluhm   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
1293bd8f1dc3Sbluhm   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
1294bd8f1dc3Sbluhm   XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler);
1295bd8f1dc3Sbluhm   XML_SetCommentHandler(g_parser, dummy_comment_handler);
1296bd8f1dc3Sbluhm   XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1297bd8f1dc3Sbluhm   XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1298bd8f1dc3Sbluhm   run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>"));
1299bd8f1dc3Sbluhm }
1300bd8f1dc3Sbluhm END_TEST
1301bd8f1dc3Sbluhm 
1302bd8f1dc3Sbluhm /* Test handling of attribute declarations */
1303bd8f1dc3Sbluhm START_TEST(test_dtd_attr_handling) {
1304bd8f1dc3Sbluhm   const char *prolog = "<!DOCTYPE doc [\n"
1305bd8f1dc3Sbluhm                        "<!ELEMENT doc EMPTY>\n";
1306bd8f1dc3Sbluhm   AttTest attr_data[]
1307bd8f1dc3Sbluhm       = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n"
1308bd8f1dc3Sbluhm           "]>"
1309bd8f1dc3Sbluhm           "<doc a='two'/>",
1310bd8f1dc3Sbluhm           XCS("doc"), XCS("a"),
1311bd8f1dc3Sbluhm           XCS("(one|two|three)"), /* Extraneous spaces will be removed */
1312bd8f1dc3Sbluhm           NULL, XML_TRUE},
1313bd8f1dc3Sbluhm          {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n"
1314bd8f1dc3Sbluhm           "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n"
1315bd8f1dc3Sbluhm           "]>"
1316bd8f1dc3Sbluhm           "<doc/>",
1317bd8f1dc3Sbluhm           XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE},
1318bd8f1dc3Sbluhm          {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n"
1319bd8f1dc3Sbluhm           "]>"
1320bd8f1dc3Sbluhm           "<doc/>",
1321bd8f1dc3Sbluhm           XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE},
1322bd8f1dc3Sbluhm          {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n"
1323bd8f1dc3Sbluhm           "]>"
1324bd8f1dc3Sbluhm           "<doc/>",
1325bd8f1dc3Sbluhm           XCS("doc"), XCS("a"), XCS("CDATA"),
1326bd8f1dc3Sbluhm #ifdef XML_UNICODE
1327bd8f1dc3Sbluhm           XCS("\x06f2"),
1328bd8f1dc3Sbluhm #else
1329bd8f1dc3Sbluhm           XCS("\xdb\xb2"),
1330bd8f1dc3Sbluhm #endif
1331bd8f1dc3Sbluhm           XML_FALSE},
1332bd8f1dc3Sbluhm          {NULL, NULL, NULL, NULL, NULL, XML_FALSE}};
1333bd8f1dc3Sbluhm   AttTest *test;
1334bd8f1dc3Sbluhm 
1335bd8f1dc3Sbluhm   for (test = attr_data; test->definition != NULL; test++) {
1336bd8f1dc3Sbluhm     set_subtest("%s", test->definition);
1337bd8f1dc3Sbluhm     XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler);
1338bd8f1dc3Sbluhm     XML_SetUserData(g_parser, test);
1339bd8f1dc3Sbluhm     if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog),
1340bd8f1dc3Sbluhm                                 XML_FALSE)
1341bd8f1dc3Sbluhm         == XML_STATUS_ERROR)
1342bd8f1dc3Sbluhm       xml_failure(g_parser);
1343bd8f1dc3Sbluhm     if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition,
1344bd8f1dc3Sbluhm                                 (int)strlen(test->definition), XML_TRUE)
1345bd8f1dc3Sbluhm         == XML_STATUS_ERROR)
1346bd8f1dc3Sbluhm       xml_failure(g_parser);
1347bd8f1dc3Sbluhm     XML_ParserReset(g_parser, NULL);
1348bd8f1dc3Sbluhm   }
1349bd8f1dc3Sbluhm }
1350bd8f1dc3Sbluhm END_TEST
1351bd8f1dc3Sbluhm 
1352bd8f1dc3Sbluhm /* See related SF bug #673791.
1353bd8f1dc3Sbluhm    When namespace processing is enabled, setting the namespace URI for
1354bd8f1dc3Sbluhm    a prefix is not allowed; this test ensures that it *is* allowed
1355bd8f1dc3Sbluhm    when namespace processing is not enabled.
1356bd8f1dc3Sbluhm    (See Namespaces in XML, section 2.)
1357bd8f1dc3Sbluhm */
1358bd8f1dc3Sbluhm START_TEST(test_empty_ns_without_namespaces) {
1359bd8f1dc3Sbluhm   const char *text = "<doc xmlns:prefix='http://example.org/'>\n"
1360bd8f1dc3Sbluhm                      "  <e xmlns:prefix=''/>\n"
1361bd8f1dc3Sbluhm                      "</doc>";
1362bd8f1dc3Sbluhm 
1363bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1364bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
1365bd8f1dc3Sbluhm     xml_failure(g_parser);
1366bd8f1dc3Sbluhm }
1367bd8f1dc3Sbluhm END_TEST
1368bd8f1dc3Sbluhm 
1369bd8f1dc3Sbluhm /* Regression test for SF bug #824420.
1370bd8f1dc3Sbluhm    Checks that an xmlns:prefix attribute set in an attribute's default
1371bd8f1dc3Sbluhm    value isn't misinterpreted.
1372bd8f1dc3Sbluhm */
1373bd8f1dc3Sbluhm START_TEST(test_ns_in_attribute_default_without_namespaces) {
1374bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE e:element [\n"
1375bd8f1dc3Sbluhm                      "  <!ATTLIST e:element\n"
1376bd8f1dc3Sbluhm                      "    xmlns:e CDATA 'http://example.org/'>\n"
1377bd8f1dc3Sbluhm                      "      ]>\n"
1378bd8f1dc3Sbluhm                      "<e:element/>";
1379bd8f1dc3Sbluhm 
1380bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1381bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
1382bd8f1dc3Sbluhm     xml_failure(g_parser);
1383bd8f1dc3Sbluhm }
1384bd8f1dc3Sbluhm END_TEST
1385bd8f1dc3Sbluhm 
1386bd8f1dc3Sbluhm /* Regression test for SF bug #1515266: missing check of stopped
1387bd8f1dc3Sbluhm    parser in doContext() 'for' loop. */
1388bd8f1dc3Sbluhm START_TEST(test_stop_parser_between_char_data_calls) {
1389bd8f1dc3Sbluhm   /* The sample data must be big enough that there are two calls to
1390bd8f1dc3Sbluhm      the character data handler from within the inner "for" loop of
1391bd8f1dc3Sbluhm      the XML_TOK_DATA_CHARS case in doContent(), and the character
1392bd8f1dc3Sbluhm      handler must stop the parser and clear the character data
1393bd8f1dc3Sbluhm      handler.
1394bd8f1dc3Sbluhm   */
1395bd8f1dc3Sbluhm   const char *text = long_character_data_text;
1396bd8f1dc3Sbluhm 
1397bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1398bd8f1dc3Sbluhm   g_resumable = XML_FALSE;
1399bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1400bd8f1dc3Sbluhm       != XML_STATUS_ERROR)
1401bd8f1dc3Sbluhm     xml_failure(g_parser);
1402bd8f1dc3Sbluhm   if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
1403bd8f1dc3Sbluhm     xml_failure(g_parser);
1404bd8f1dc3Sbluhm }
1405bd8f1dc3Sbluhm END_TEST
1406bd8f1dc3Sbluhm 
1407bd8f1dc3Sbluhm /* Regression test for SF bug #1515266: missing check of stopped
1408bd8f1dc3Sbluhm    parser in doContext() 'for' loop. */
1409bd8f1dc3Sbluhm START_TEST(test_suspend_parser_between_char_data_calls) {
1410bd8f1dc3Sbluhm   /* The sample data must be big enough that there are two calls to
1411bd8f1dc3Sbluhm      the character data handler from within the inner "for" loop of
1412bd8f1dc3Sbluhm      the XML_TOK_DATA_CHARS case in doContent(), and the character
1413bd8f1dc3Sbluhm      handler must stop the parser and clear the character data
1414bd8f1dc3Sbluhm      handler.
1415bd8f1dc3Sbluhm   */
1416bd8f1dc3Sbluhm   const char *text = long_character_data_text;
1417bd8f1dc3Sbluhm 
1418bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1419bd8f1dc3Sbluhm   g_resumable = XML_TRUE;
1420bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1421bd8f1dc3Sbluhm       != XML_STATUS_SUSPENDED)
1422bd8f1dc3Sbluhm     xml_failure(g_parser);
1423bd8f1dc3Sbluhm   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1424bd8f1dc3Sbluhm     xml_failure(g_parser);
1425bd8f1dc3Sbluhm   /* Try parsing directly */
1426bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1427bd8f1dc3Sbluhm       != XML_STATUS_ERROR)
1428bd8f1dc3Sbluhm     fail("Attempt to continue parse while suspended not faulted");
1429bd8f1dc3Sbluhm   if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
1430bd8f1dc3Sbluhm     fail("Suspended parse not faulted with correct error");
1431bd8f1dc3Sbluhm }
1432bd8f1dc3Sbluhm END_TEST
1433bd8f1dc3Sbluhm 
1434bd8f1dc3Sbluhm /* Test repeated calls to XML_StopParser are handled correctly */
1435bd8f1dc3Sbluhm START_TEST(test_repeated_stop_parser_between_char_data_calls) {
1436bd8f1dc3Sbluhm   const char *text = long_character_data_text;
1437bd8f1dc3Sbluhm 
1438bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1439bd8f1dc3Sbluhm   g_resumable = XML_FALSE;
1440bd8f1dc3Sbluhm   g_abortable = XML_FALSE;
1441bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1442bd8f1dc3Sbluhm       != XML_STATUS_ERROR)
1443bd8f1dc3Sbluhm     fail("Failed to double-stop parser");
1444bd8f1dc3Sbluhm 
1445bd8f1dc3Sbluhm   XML_ParserReset(g_parser, NULL);
1446bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1447bd8f1dc3Sbluhm   g_resumable = XML_TRUE;
1448bd8f1dc3Sbluhm   g_abortable = XML_FALSE;
1449bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1450bd8f1dc3Sbluhm       != XML_STATUS_SUSPENDED)
1451bd8f1dc3Sbluhm     fail("Failed to double-suspend parser");
1452bd8f1dc3Sbluhm 
1453bd8f1dc3Sbluhm   XML_ParserReset(g_parser, NULL);
1454bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1455bd8f1dc3Sbluhm   g_resumable = XML_TRUE;
1456bd8f1dc3Sbluhm   g_abortable = XML_TRUE;
1457bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1458bd8f1dc3Sbluhm       != XML_STATUS_ERROR)
1459bd8f1dc3Sbluhm     fail("Failed to suspend-abort parser");
1460bd8f1dc3Sbluhm }
1461bd8f1dc3Sbluhm END_TEST
1462bd8f1dc3Sbluhm 
1463bd8f1dc3Sbluhm START_TEST(test_good_cdata_ascii) {
1464bd8f1dc3Sbluhm   const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
1465bd8f1dc3Sbluhm   const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>");
1466bd8f1dc3Sbluhm 
1467bd8f1dc3Sbluhm   CharData storage;
1468bd8f1dc3Sbluhm   CharData_Init(&storage);
1469bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
1470bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1471bd8f1dc3Sbluhm   /* Add start and end handlers for coverage */
1472bd8f1dc3Sbluhm   XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1473bd8f1dc3Sbluhm   XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1474bd8f1dc3Sbluhm 
1475bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1476bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
1477bd8f1dc3Sbluhm     xml_failure(g_parser);
1478bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
1479bd8f1dc3Sbluhm 
1480bd8f1dc3Sbluhm   /* Try again, this time with a default handler */
1481bd8f1dc3Sbluhm   XML_ParserReset(g_parser, NULL);
1482bd8f1dc3Sbluhm   CharData_Init(&storage);
1483bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
1484bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1485bd8f1dc3Sbluhm   XML_SetDefaultHandler(g_parser, dummy_default_handler);
1486bd8f1dc3Sbluhm 
1487bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1488bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
1489bd8f1dc3Sbluhm     xml_failure(g_parser);
1490bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
1491bd8f1dc3Sbluhm }
1492bd8f1dc3Sbluhm END_TEST
1493bd8f1dc3Sbluhm 
1494bd8f1dc3Sbluhm START_TEST(test_good_cdata_utf16) {
1495bd8f1dc3Sbluhm   /* Test data is:
1496bd8f1dc3Sbluhm    *   <?xml version='1.0' encoding='utf-16'?>
1497bd8f1dc3Sbluhm    *   <a><![CDATA[hello]]></a>
1498bd8f1dc3Sbluhm    */
1499bd8f1dc3Sbluhm   const char text[]
1500bd8f1dc3Sbluhm       = "\0<\0?\0x\0m\0l\0"
1501bd8f1dc3Sbluhm         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1502bd8f1dc3Sbluhm         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1503bd8f1dc3Sbluhm         "1\0"
1504bd8f1dc3Sbluhm         "6\0'"
1505bd8f1dc3Sbluhm         "\0?\0>\0\n"
1506bd8f1dc3Sbluhm         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
1507bd8f1dc3Sbluhm   const XML_Char *expected = XCS("hello");
1508bd8f1dc3Sbluhm 
1509bd8f1dc3Sbluhm   CharData storage;
1510bd8f1dc3Sbluhm   CharData_Init(&storage);
1511bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
1512bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1513bd8f1dc3Sbluhm 
1514bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1515bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
1516bd8f1dc3Sbluhm     xml_failure(g_parser);
1517bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
1518bd8f1dc3Sbluhm }
1519bd8f1dc3Sbluhm END_TEST
1520bd8f1dc3Sbluhm 
1521bd8f1dc3Sbluhm START_TEST(test_good_cdata_utf16_le) {
1522bd8f1dc3Sbluhm   /* Test data is:
1523bd8f1dc3Sbluhm    *   <?xml version='1.0' encoding='utf-16'?>
1524bd8f1dc3Sbluhm    *   <a><![CDATA[hello]]></a>
1525bd8f1dc3Sbluhm    */
1526bd8f1dc3Sbluhm   const char text[]
1527bd8f1dc3Sbluhm       = "<\0?\0x\0m\0l\0"
1528bd8f1dc3Sbluhm         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1529bd8f1dc3Sbluhm         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1530bd8f1dc3Sbluhm         "1\0"
1531bd8f1dc3Sbluhm         "6\0'"
1532bd8f1dc3Sbluhm         "\0?\0>\0\n"
1533bd8f1dc3Sbluhm         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0";
1534bd8f1dc3Sbluhm   const XML_Char *expected = XCS("hello");
1535bd8f1dc3Sbluhm 
1536bd8f1dc3Sbluhm   CharData storage;
1537bd8f1dc3Sbluhm   CharData_Init(&storage);
1538bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
1539bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1540bd8f1dc3Sbluhm 
1541bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1542bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
1543bd8f1dc3Sbluhm     xml_failure(g_parser);
1544bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
1545bd8f1dc3Sbluhm }
1546bd8f1dc3Sbluhm END_TEST
1547bd8f1dc3Sbluhm 
1548bd8f1dc3Sbluhm /* Test UTF16 conversion of a long cdata string */
1549bd8f1dc3Sbluhm 
1550bd8f1dc3Sbluhm /* 16 characters: handy macro to reduce visual clutter */
1551bd8f1dc3Sbluhm #define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"
1552bd8f1dc3Sbluhm 
1553bd8f1dc3Sbluhm START_TEST(test_long_cdata_utf16) {
1554bd8f1dc3Sbluhm   /* Test data is:
1555bd8f1dc3Sbluhm    * <?xlm version='1.0' encoding='utf-16'?>
1556bd8f1dc3Sbluhm    * <a><![CDATA[
1557bd8f1dc3Sbluhm    * ABCDEFGHIJKLMNOP
1558bd8f1dc3Sbluhm    * ]]></a>
1559bd8f1dc3Sbluhm    */
1560bd8f1dc3Sbluhm   const char text[]
1561bd8f1dc3Sbluhm       = "\0<\0?\0x\0m\0l\0 "
1562bd8f1dc3Sbluhm         "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 "
1563bd8f1dc3Sbluhm         "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>"
1564bd8f1dc3Sbluhm         "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1565bd8f1dc3Sbluhm       /* 64 characters per line */
1566bd8f1dc3Sbluhm       /* clang-format off */
1567bd8f1dc3Sbluhm         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1568bd8f1dc3Sbluhm         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1569bd8f1dc3Sbluhm         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1570bd8f1dc3Sbluhm         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1571bd8f1dc3Sbluhm         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1572bd8f1dc3Sbluhm         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1573bd8f1dc3Sbluhm         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1574bd8f1dc3Sbluhm         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1575bd8f1dc3Sbluhm         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1576bd8f1dc3Sbluhm         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1577bd8f1dc3Sbluhm         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1578bd8f1dc3Sbluhm         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1579bd8f1dc3Sbluhm         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1580bd8f1dc3Sbluhm         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1581bd8f1dc3Sbluhm         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1582bd8f1dc3Sbluhm         A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1583bd8f1dc3Sbluhm         A_TO_P_IN_UTF16
1584bd8f1dc3Sbluhm         /* clang-format on */
1585bd8f1dc3Sbluhm         "\0]\0]\0>\0<\0/\0a\0>";
1586bd8f1dc3Sbluhm   const XML_Char *expected =
1587bd8f1dc3Sbluhm       /* clang-format off */
1588bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1589bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1590bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1591bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1592bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1593bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1594bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1595bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1596bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1597bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1598bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1599bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1600bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1601bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1602bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1603bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1604bd8f1dc3Sbluhm         XCS("ABCDEFGHIJKLMNOP");
1605bd8f1dc3Sbluhm   /* clang-format on */
1606bd8f1dc3Sbluhm   CharData storage;
1607bd8f1dc3Sbluhm   void *buffer;
1608bd8f1dc3Sbluhm 
1609bd8f1dc3Sbluhm   CharData_Init(&storage);
1610bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
1611bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1612bd8f1dc3Sbluhm   buffer = XML_GetBuffer(g_parser, sizeof(text) - 1);
1613bd8f1dc3Sbluhm   if (buffer == NULL)
1614bd8f1dc3Sbluhm     fail("Could not allocate parse buffer");
1615bd8f1dc3Sbluhm   assert(buffer != NULL);
1616bd8f1dc3Sbluhm   memcpy(buffer, text, sizeof(text) - 1);
1617bd8f1dc3Sbluhm   if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
1618bd8f1dc3Sbluhm     xml_failure(g_parser);
1619bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
1620bd8f1dc3Sbluhm }
1621bd8f1dc3Sbluhm END_TEST
1622bd8f1dc3Sbluhm 
1623bd8f1dc3Sbluhm /* Test handling of multiple unit UTF-16 characters */
1624bd8f1dc3Sbluhm START_TEST(test_multichar_cdata_utf16) {
1625bd8f1dc3Sbluhm   /* Test data is:
1626bd8f1dc3Sbluhm    *   <?xml version='1.0' encoding='utf-16'?>
1627bd8f1dc3Sbluhm    *   <a><![CDATA[{MINIM}{CROTCHET}]]></a>
1628bd8f1dc3Sbluhm    *
1629bd8f1dc3Sbluhm    * where {MINIM} is U+1d15e (a minim or half-note)
1630bd8f1dc3Sbluhm    *   UTF-16: 0xd834 0xdd5e
1631bd8f1dc3Sbluhm    *   UTF-8:  0xf0 0x9d 0x85 0x9e
1632bd8f1dc3Sbluhm    * and {CROTCHET} is U+1d15f (a crotchet or quarter-note)
1633bd8f1dc3Sbluhm    *   UTF-16: 0xd834 0xdd5f
1634bd8f1dc3Sbluhm    *   UTF-8:  0xf0 0x9d 0x85 0x9f
1635bd8f1dc3Sbluhm    */
1636bd8f1dc3Sbluhm   const char text[] = "\0<\0?\0x\0m\0l\0"
1637bd8f1dc3Sbluhm                       " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1638bd8f1dc3Sbluhm                       " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1639bd8f1dc3Sbluhm                       "1\0"
1640bd8f1dc3Sbluhm                       "6\0'"
1641bd8f1dc3Sbluhm                       "\0?\0>\0\n"
1642bd8f1dc3Sbluhm                       "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1643bd8f1dc3Sbluhm                       "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f"
1644bd8f1dc3Sbluhm                       "\0]\0]\0>\0<\0/\0a\0>";
1645bd8f1dc3Sbluhm #ifdef XML_UNICODE
1646bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f");
1647bd8f1dc3Sbluhm #else
1648bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f");
1649bd8f1dc3Sbluhm #endif
1650bd8f1dc3Sbluhm   CharData storage;
1651bd8f1dc3Sbluhm 
1652bd8f1dc3Sbluhm   CharData_Init(&storage);
1653bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
1654bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1655bd8f1dc3Sbluhm 
1656bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1657bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
1658bd8f1dc3Sbluhm     xml_failure(g_parser);
1659bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
1660bd8f1dc3Sbluhm }
1661bd8f1dc3Sbluhm END_TEST
1662bd8f1dc3Sbluhm 
1663bd8f1dc3Sbluhm /* Test that an element name with a UTF-16 surrogate pair is rejected */
1664bd8f1dc3Sbluhm START_TEST(test_utf16_bad_surrogate_pair) {
1665bd8f1dc3Sbluhm   /* Test data is:
1666bd8f1dc3Sbluhm    *   <?xml version='1.0' encoding='utf-16'?>
1667bd8f1dc3Sbluhm    *   <a><![CDATA[{BADLINB}]]></a>
1668bd8f1dc3Sbluhm    *
1669bd8f1dc3Sbluhm    * where {BADLINB} is U+10000 (the first Linear B character)
1670bd8f1dc3Sbluhm    * with the UTF-16 surrogate pair in the wrong order, i.e.
1671bd8f1dc3Sbluhm    *   0xdc00 0xd800
1672bd8f1dc3Sbluhm    */
1673bd8f1dc3Sbluhm   const char text[] = "\0<\0?\0x\0m\0l\0"
1674bd8f1dc3Sbluhm                       " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1675bd8f1dc3Sbluhm                       " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1676bd8f1dc3Sbluhm                       "1\0"
1677bd8f1dc3Sbluhm                       "6\0'"
1678bd8f1dc3Sbluhm                       "\0?\0>\0\n"
1679bd8f1dc3Sbluhm                       "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1680bd8f1dc3Sbluhm                       "\xdc\x00\xd8\x00"
1681bd8f1dc3Sbluhm                       "\0]\0]\0>\0<\0/\0a\0>";
1682bd8f1dc3Sbluhm 
1683bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1684bd8f1dc3Sbluhm       != XML_STATUS_ERROR)
1685bd8f1dc3Sbluhm     fail("Reversed UTF-16 surrogate pair not faulted");
1686bd8f1dc3Sbluhm   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
1687bd8f1dc3Sbluhm     xml_failure(g_parser);
1688bd8f1dc3Sbluhm }
1689bd8f1dc3Sbluhm END_TEST
1690bd8f1dc3Sbluhm 
1691bd8f1dc3Sbluhm START_TEST(test_bad_cdata) {
1692bd8f1dc3Sbluhm   struct CaseData {
1693bd8f1dc3Sbluhm     const char *text;
1694bd8f1dc3Sbluhm     enum XML_Error expectedError;
1695bd8f1dc3Sbluhm   };
1696bd8f1dc3Sbluhm 
1697bd8f1dc3Sbluhm   struct CaseData cases[]
1698bd8f1dc3Sbluhm       = {{"<a><", XML_ERROR_UNCLOSED_TOKEN},
1699bd8f1dc3Sbluhm          {"<a><!", XML_ERROR_UNCLOSED_TOKEN},
1700bd8f1dc3Sbluhm          {"<a><![", XML_ERROR_UNCLOSED_TOKEN},
1701bd8f1dc3Sbluhm          {"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
1702bd8f1dc3Sbluhm          {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
1703bd8f1dc3Sbluhm          {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
1704bd8f1dc3Sbluhm          {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
1705bd8f1dc3Sbluhm          {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
1706bd8f1dc3Sbluhm 
1707bd8f1dc3Sbluhm          {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1708bd8f1dc3Sbluhm          {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1709bd8f1dc3Sbluhm          {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1710bd8f1dc3Sbluhm 
1711bd8f1dc3Sbluhm          {"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
1712bd8f1dc3Sbluhm          {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN},  /* ?! */
1713bd8f1dc3Sbluhm          {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1714bd8f1dc3Sbluhm          {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
1715bd8f1dc3Sbluhm          {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
1716bd8f1dc3Sbluhm          {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
1717bd8f1dc3Sbluhm          {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
1718bd8f1dc3Sbluhm 
1719bd8f1dc3Sbluhm          {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1720bd8f1dc3Sbluhm          {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1721bd8f1dc3Sbluhm          {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}};
1722bd8f1dc3Sbluhm 
1723bd8f1dc3Sbluhm   size_t i = 0;
1724bd8f1dc3Sbluhm   for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1725bd8f1dc3Sbluhm     set_subtest("%s", cases[i].text);
1726bd8f1dc3Sbluhm     const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
1727bd8f1dc3Sbluhm         g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE);
1728bd8f1dc3Sbluhm     const enum XML_Error actualError = XML_GetErrorCode(g_parser);
1729bd8f1dc3Sbluhm 
1730bd8f1dc3Sbluhm     assert(actualStatus == XML_STATUS_ERROR);
1731bd8f1dc3Sbluhm 
1732bd8f1dc3Sbluhm     if (actualError != cases[i].expectedError) {
1733bd8f1dc3Sbluhm       char message[100];
1734bd8f1dc3Sbluhm       snprintf(message, sizeof(message),
1735bd8f1dc3Sbluhm                "Expected error %d but got error %d for case %u: \"%s\"\n",
1736bd8f1dc3Sbluhm                cases[i].expectedError, actualError, (unsigned int)i + 1,
1737bd8f1dc3Sbluhm                cases[i].text);
1738bd8f1dc3Sbluhm       fail(message);
1739bd8f1dc3Sbluhm     }
1740bd8f1dc3Sbluhm 
1741bd8f1dc3Sbluhm     XML_ParserReset(g_parser, NULL);
1742bd8f1dc3Sbluhm   }
1743bd8f1dc3Sbluhm }
1744bd8f1dc3Sbluhm END_TEST
1745bd8f1dc3Sbluhm 
1746bd8f1dc3Sbluhm /* Test failures in UTF-16 CDATA */
1747bd8f1dc3Sbluhm START_TEST(test_bad_cdata_utf16) {
1748bd8f1dc3Sbluhm   struct CaseData {
1749bd8f1dc3Sbluhm     size_t text_bytes;
1750bd8f1dc3Sbluhm     const char *text;
1751bd8f1dc3Sbluhm     enum XML_Error expected_error;
1752bd8f1dc3Sbluhm   };
1753bd8f1dc3Sbluhm 
1754bd8f1dc3Sbluhm   const char prolog[] = "\0<\0?\0x\0m\0l\0"
1755bd8f1dc3Sbluhm                         " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1756bd8f1dc3Sbluhm                         " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1757bd8f1dc3Sbluhm                         "1\0"
1758bd8f1dc3Sbluhm                         "6\0'"
1759bd8f1dc3Sbluhm                         "\0?\0>\0\n"
1760bd8f1dc3Sbluhm                         "\0<\0a\0>";
1761bd8f1dc3Sbluhm   struct CaseData cases[] = {
1762bd8f1dc3Sbluhm       {1, "\0", XML_ERROR_UNCLOSED_TOKEN},
1763bd8f1dc3Sbluhm       {2, "\0<", XML_ERROR_UNCLOSED_TOKEN},
1764bd8f1dc3Sbluhm       {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN},
1765bd8f1dc3Sbluhm       {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN},
1766bd8f1dc3Sbluhm       {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN},
1767bd8f1dc3Sbluhm       {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN},
1768bd8f1dc3Sbluhm       {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN},
1769bd8f1dc3Sbluhm       {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN},
1770bd8f1dc3Sbluhm       {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN},
1771bd8f1dc3Sbluhm       {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN},
1772bd8f1dc3Sbluhm       {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN},
1773bd8f1dc3Sbluhm       {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN},
1774bd8f1dc3Sbluhm       {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1775bd8f1dc3Sbluhm       {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN},
1776bd8f1dc3Sbluhm       {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN},
1777bd8f1dc3Sbluhm       {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN},
1778bd8f1dc3Sbluhm       {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1779bd8f1dc3Sbluhm       {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1780bd8f1dc3Sbluhm       {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION},
1781bd8f1dc3Sbluhm       {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION},
1782bd8f1dc3Sbluhm       /* Now add a four-byte UTF-16 character */
1783bd8f1dc3Sbluhm       {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8",
1784bd8f1dc3Sbluhm        XML_ERROR_UNCLOSED_CDATA_SECTION},
1785bd8f1dc3Sbluhm       {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR},
1786bd8f1dc3Sbluhm       {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd",
1787bd8f1dc3Sbluhm        XML_ERROR_PARTIAL_CHAR},
1788bd8f1dc3Sbluhm       {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e",
1789bd8f1dc3Sbluhm        XML_ERROR_UNCLOSED_CDATA_SECTION}};
1790bd8f1dc3Sbluhm   size_t i;
1791bd8f1dc3Sbluhm 
1792bd8f1dc3Sbluhm   for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1793bd8f1dc3Sbluhm     set_subtest("case %lu", (long unsigned)(i + 1));
1794bd8f1dc3Sbluhm     enum XML_Status actual_status;
1795bd8f1dc3Sbluhm     enum XML_Error actual_error;
1796bd8f1dc3Sbluhm 
1797bd8f1dc3Sbluhm     if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1,
1798bd8f1dc3Sbluhm                                 XML_FALSE)
1799bd8f1dc3Sbluhm         == XML_STATUS_ERROR)
1800bd8f1dc3Sbluhm       xml_failure(g_parser);
1801bd8f1dc3Sbluhm     actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text,
1802bd8f1dc3Sbluhm                                             (int)cases[i].text_bytes, XML_TRUE);
1803bd8f1dc3Sbluhm     assert(actual_status == XML_STATUS_ERROR);
1804bd8f1dc3Sbluhm     actual_error = XML_GetErrorCode(g_parser);
1805bd8f1dc3Sbluhm     if (actual_error != cases[i].expected_error) {
1806bd8f1dc3Sbluhm       char message[1024];
1807bd8f1dc3Sbluhm 
1808bd8f1dc3Sbluhm       snprintf(message, sizeof(message),
1809bd8f1dc3Sbluhm                "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR
1810bd8f1dc3Sbluhm                ") for case %lu\n",
1811bd8f1dc3Sbluhm                cases[i].expected_error,
1812bd8f1dc3Sbluhm                XML_ErrorString(cases[i].expected_error), actual_error,
1813bd8f1dc3Sbluhm                XML_ErrorString(actual_error), (long unsigned)(i + 1));
1814bd8f1dc3Sbluhm       fail(message);
1815bd8f1dc3Sbluhm     }
1816bd8f1dc3Sbluhm     XML_ParserReset(g_parser, NULL);
1817bd8f1dc3Sbluhm   }
1818bd8f1dc3Sbluhm }
1819bd8f1dc3Sbluhm END_TEST
1820bd8f1dc3Sbluhm 
1821bd8f1dc3Sbluhm /* Test stopping the parser in cdata handler */
1822bd8f1dc3Sbluhm START_TEST(test_stop_parser_between_cdata_calls) {
1823bd8f1dc3Sbluhm   const char *text = long_cdata_text;
1824bd8f1dc3Sbluhm 
1825bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1826bd8f1dc3Sbluhm   g_resumable = XML_FALSE;
1827bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler");
1828bd8f1dc3Sbluhm }
1829bd8f1dc3Sbluhm END_TEST
1830bd8f1dc3Sbluhm 
1831bd8f1dc3Sbluhm /* Test suspending the parser in cdata handler */
1832bd8f1dc3Sbluhm START_TEST(test_suspend_parser_between_cdata_calls) {
1833bd8f1dc3Sbluhm   const char *text = long_cdata_text;
1834bd8f1dc3Sbluhm   enum XML_Status result;
1835bd8f1dc3Sbluhm 
1836bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1837bd8f1dc3Sbluhm   g_resumable = XML_TRUE;
1838bd8f1dc3Sbluhm   result = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
1839bd8f1dc3Sbluhm   if (result != XML_STATUS_SUSPENDED) {
1840bd8f1dc3Sbluhm     if (result == XML_STATUS_ERROR)
1841bd8f1dc3Sbluhm       xml_failure(g_parser);
1842bd8f1dc3Sbluhm     fail("Parse not suspended in CDATA handler");
1843bd8f1dc3Sbluhm   }
1844bd8f1dc3Sbluhm   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1845bd8f1dc3Sbluhm     xml_failure(g_parser);
1846bd8f1dc3Sbluhm }
1847bd8f1dc3Sbluhm END_TEST
1848bd8f1dc3Sbluhm 
1849bd8f1dc3Sbluhm /* Test memory allocation functions */
1850bd8f1dc3Sbluhm START_TEST(test_memory_allocation) {
1851bd8f1dc3Sbluhm   char *buffer = (char *)XML_MemMalloc(g_parser, 256);
1852bd8f1dc3Sbluhm   char *p;
1853bd8f1dc3Sbluhm 
1854bd8f1dc3Sbluhm   if (buffer == NULL) {
1855bd8f1dc3Sbluhm     fail("Allocation failed");
1856bd8f1dc3Sbluhm   } else {
1857bd8f1dc3Sbluhm     /* Try writing to memory; some OSes try to cheat! */
1858bd8f1dc3Sbluhm     buffer[0] = 'T';
1859bd8f1dc3Sbluhm     buffer[1] = 'E';
1860bd8f1dc3Sbluhm     buffer[2] = 'S';
1861bd8f1dc3Sbluhm     buffer[3] = 'T';
1862bd8f1dc3Sbluhm     buffer[4] = '\0';
1863bd8f1dc3Sbluhm     if (strcmp(buffer, "TEST") != 0) {
1864bd8f1dc3Sbluhm       fail("Memory not writable");
1865bd8f1dc3Sbluhm     } else {
1866bd8f1dc3Sbluhm       p = (char *)XML_MemRealloc(g_parser, buffer, 512);
1867bd8f1dc3Sbluhm       if (p == NULL) {
1868bd8f1dc3Sbluhm         fail("Reallocation failed");
1869bd8f1dc3Sbluhm       } else {
1870bd8f1dc3Sbluhm         /* Write again, just to be sure */
1871bd8f1dc3Sbluhm         buffer = p;
1872bd8f1dc3Sbluhm         buffer[0] = 'V';
1873bd8f1dc3Sbluhm         if (strcmp(buffer, "VEST") != 0) {
1874bd8f1dc3Sbluhm           fail("Reallocated memory not writable");
1875bd8f1dc3Sbluhm         }
1876bd8f1dc3Sbluhm       }
1877bd8f1dc3Sbluhm     }
1878bd8f1dc3Sbluhm     XML_MemFree(g_parser, buffer);
1879bd8f1dc3Sbluhm   }
1880bd8f1dc3Sbluhm }
1881bd8f1dc3Sbluhm END_TEST
1882bd8f1dc3Sbluhm 
1883bd8f1dc3Sbluhm /* Test XML_DefaultCurrent() passes handling on correctly */
1884bd8f1dc3Sbluhm START_TEST(test_default_current) {
1885bd8f1dc3Sbluhm   const char *text = "<doc>hell]</doc>";
1886bd8f1dc3Sbluhm   const char *entity_text = "<!DOCTYPE doc [\n"
1887bd8f1dc3Sbluhm                             "<!ENTITY entity '&#37;'>\n"
1888bd8f1dc3Sbluhm                             "]>\n"
1889bd8f1dc3Sbluhm                             "<doc>&entity;</doc>";
1890bd8f1dc3Sbluhm 
1891bd8f1dc3Sbluhm   set_subtest("with defaulting");
1892bd8f1dc3Sbluhm   {
1893bd8f1dc3Sbluhm     struct handler_record_list storage;
1894bd8f1dc3Sbluhm     storage.count = 0;
1895bd8f1dc3Sbluhm     XML_SetDefaultHandler(g_parser, record_default_handler);
1896bd8f1dc3Sbluhm     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1897bd8f1dc3Sbluhm     XML_SetUserData(g_parser, &storage);
1898bd8f1dc3Sbluhm     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1899bd8f1dc3Sbluhm         == XML_STATUS_ERROR)
1900bd8f1dc3Sbluhm       xml_failure(g_parser);
1901bd8f1dc3Sbluhm     int i = 0;
1902bd8f1dc3Sbluhm     assert_record_handler_called(&storage, i++, "record_default_handler", 5);
1903bd8f1dc3Sbluhm     // we should have gotten one or more cdata callbacks, totaling 5 chars
1904bd8f1dc3Sbluhm     int cdata_len_remaining = 5;
1905bd8f1dc3Sbluhm     while (cdata_len_remaining > 0) {
1906bd8f1dc3Sbluhm       const struct handler_record_entry *c_entry
1907bd8f1dc3Sbluhm           = handler_record_get(&storage, i++);
1908bd8f1dc3Sbluhm       assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0);
1909bd8f1dc3Sbluhm       assert_true(c_entry->arg > 0);
1910bd8f1dc3Sbluhm       assert_true(c_entry->arg <= cdata_len_remaining);
1911bd8f1dc3Sbluhm       cdata_len_remaining -= c_entry->arg;
1912bd8f1dc3Sbluhm       // default handler must follow, with the exact same len argument.
1913bd8f1dc3Sbluhm       assert_record_handler_called(&storage, i++, "record_default_handler",
1914bd8f1dc3Sbluhm                                    c_entry->arg);
1915bd8f1dc3Sbluhm     }
1916bd8f1dc3Sbluhm     assert_record_handler_called(&storage, i++, "record_default_handler", 6);
1917bd8f1dc3Sbluhm     assert_true(storage.count == i);
1918bd8f1dc3Sbluhm   }
1919bd8f1dc3Sbluhm 
1920bd8f1dc3Sbluhm   /* Again, without the defaulting */
1921bd8f1dc3Sbluhm   set_subtest("no defaulting");
1922bd8f1dc3Sbluhm   {
1923bd8f1dc3Sbluhm     struct handler_record_list storage;
1924bd8f1dc3Sbluhm     storage.count = 0;
1925bd8f1dc3Sbluhm     XML_ParserReset(g_parser, NULL);
1926bd8f1dc3Sbluhm     XML_SetDefaultHandler(g_parser, record_default_handler);
1927bd8f1dc3Sbluhm     XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
1928bd8f1dc3Sbluhm     XML_SetUserData(g_parser, &storage);
1929bd8f1dc3Sbluhm     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1930bd8f1dc3Sbluhm         == XML_STATUS_ERROR)
1931bd8f1dc3Sbluhm       xml_failure(g_parser);
1932bd8f1dc3Sbluhm     int i = 0;
1933bd8f1dc3Sbluhm     assert_record_handler_called(&storage, i++, "record_default_handler", 5);
1934bd8f1dc3Sbluhm     // we should have gotten one or more cdata callbacks, totaling 5 chars
1935bd8f1dc3Sbluhm     int cdata_len_remaining = 5;
1936bd8f1dc3Sbluhm     while (cdata_len_remaining > 0) {
1937bd8f1dc3Sbluhm       const struct handler_record_entry *c_entry
1938bd8f1dc3Sbluhm           = handler_record_get(&storage, i++);
1939bd8f1dc3Sbluhm       assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0);
1940bd8f1dc3Sbluhm       assert_true(c_entry->arg > 0);
1941bd8f1dc3Sbluhm       assert_true(c_entry->arg <= cdata_len_remaining);
1942bd8f1dc3Sbluhm       cdata_len_remaining -= c_entry->arg;
1943bd8f1dc3Sbluhm     }
1944bd8f1dc3Sbluhm     assert_record_handler_called(&storage, i++, "record_default_handler", 6);
1945bd8f1dc3Sbluhm     assert_true(storage.count == i);
1946bd8f1dc3Sbluhm   }
1947bd8f1dc3Sbluhm 
1948bd8f1dc3Sbluhm   /* Now with an internal entity to complicate matters */
1949bd8f1dc3Sbluhm   set_subtest("with internal entity");
1950bd8f1dc3Sbluhm   {
1951bd8f1dc3Sbluhm     struct handler_record_list storage;
1952bd8f1dc3Sbluhm     storage.count = 0;
1953bd8f1dc3Sbluhm     XML_ParserReset(g_parser, NULL);
1954bd8f1dc3Sbluhm     XML_SetDefaultHandler(g_parser, record_default_handler);
1955bd8f1dc3Sbluhm     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1956bd8f1dc3Sbluhm     XML_SetUserData(g_parser, &storage);
1957bd8f1dc3Sbluhm     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1958bd8f1dc3Sbluhm                                 XML_TRUE)
1959bd8f1dc3Sbluhm         == XML_STATUS_ERROR)
1960bd8f1dc3Sbluhm       xml_failure(g_parser);
1961bd8f1dc3Sbluhm     /* The default handler suppresses the entity */
1962bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
1963bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
1964bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
1965bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
1966bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
1967bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
1968bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
1969bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
1970bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
1971bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
1972bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
1973bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
1974bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
1975bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
1976bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
1977bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
1978bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
1979bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 17, "record_default_handler", 8);
1980bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
1981bd8f1dc3Sbluhm     assert_true(storage.count == 19);
1982bd8f1dc3Sbluhm   }
1983bd8f1dc3Sbluhm 
1984bd8f1dc3Sbluhm   /* Again, with a skip handler */
1985bd8f1dc3Sbluhm   set_subtest("with skip handler");
1986bd8f1dc3Sbluhm   {
1987bd8f1dc3Sbluhm     struct handler_record_list storage;
1988bd8f1dc3Sbluhm     storage.count = 0;
1989bd8f1dc3Sbluhm     XML_ParserReset(g_parser, NULL);
1990bd8f1dc3Sbluhm     XML_SetDefaultHandler(g_parser, record_default_handler);
1991bd8f1dc3Sbluhm     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1992bd8f1dc3Sbluhm     XML_SetSkippedEntityHandler(g_parser, record_skip_handler);
1993bd8f1dc3Sbluhm     XML_SetUserData(g_parser, &storage);
1994bd8f1dc3Sbluhm     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1995bd8f1dc3Sbluhm                                 XML_TRUE)
1996bd8f1dc3Sbluhm         == XML_STATUS_ERROR)
1997bd8f1dc3Sbluhm       xml_failure(g_parser);
1998bd8f1dc3Sbluhm     /* The default handler suppresses the entity */
1999bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2000bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2001bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2002bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2003bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2004bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2005bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2006bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2007bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2008bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2009bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2010bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2011bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2012bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2013bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2014bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2015bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2016bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 17, "record_skip_handler", 0);
2017bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2018bd8f1dc3Sbluhm     assert_true(storage.count == 19);
2019bd8f1dc3Sbluhm   }
2020bd8f1dc3Sbluhm 
2021bd8f1dc3Sbluhm   /* This time, allow the entity through */
2022bd8f1dc3Sbluhm   set_subtest("allow entity");
2023bd8f1dc3Sbluhm   {
2024bd8f1dc3Sbluhm     struct handler_record_list storage;
2025bd8f1dc3Sbluhm     storage.count = 0;
2026bd8f1dc3Sbluhm     XML_ParserReset(g_parser, NULL);
2027bd8f1dc3Sbluhm     XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2028bd8f1dc3Sbluhm     XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2029bd8f1dc3Sbluhm     XML_SetUserData(g_parser, &storage);
2030bd8f1dc3Sbluhm     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2031bd8f1dc3Sbluhm                                 XML_TRUE)
2032bd8f1dc3Sbluhm         == XML_STATUS_ERROR)
2033bd8f1dc3Sbluhm       xml_failure(g_parser);
2034bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2035bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2036bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2037bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2038bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2039bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2040bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2041bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2042bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2043bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2044bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2045bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2046bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2047bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2048bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2049bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2050bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2051bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 17, "record_cdata_handler", 1);
2052bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 18, "record_default_handler", 1);
2053bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 19, "record_default_handler", 6);
2054bd8f1dc3Sbluhm     assert_true(storage.count == 20);
2055bd8f1dc3Sbluhm   }
2056bd8f1dc3Sbluhm 
2057bd8f1dc3Sbluhm   /* Finally, without passing the cdata to the default handler */
2058bd8f1dc3Sbluhm   set_subtest("not passing cdata");
2059bd8f1dc3Sbluhm   {
2060bd8f1dc3Sbluhm     struct handler_record_list storage;
2061bd8f1dc3Sbluhm     storage.count = 0;
2062bd8f1dc3Sbluhm     XML_ParserReset(g_parser, NULL);
2063bd8f1dc3Sbluhm     XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2064bd8f1dc3Sbluhm     XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
2065bd8f1dc3Sbluhm     XML_SetUserData(g_parser, &storage);
2066bd8f1dc3Sbluhm     if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2067bd8f1dc3Sbluhm                                 XML_TRUE)
2068bd8f1dc3Sbluhm         == XML_STATUS_ERROR)
2069bd8f1dc3Sbluhm       xml_failure(g_parser);
2070bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2071bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2072bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2073bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2074bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2075bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2076bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2077bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2078bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2079bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2080bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2081bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2082bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2083bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2084bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2085bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2086bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2087bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler",
2088bd8f1dc3Sbluhm                                  1);
2089bd8f1dc3Sbluhm     assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2090bd8f1dc3Sbluhm     assert_true(storage.count == 19);
2091bd8f1dc3Sbluhm   }
2092bd8f1dc3Sbluhm }
2093bd8f1dc3Sbluhm END_TEST
2094bd8f1dc3Sbluhm 
2095bd8f1dc3Sbluhm /* Test DTD element parsing code paths */
2096bd8f1dc3Sbluhm START_TEST(test_dtd_elements) {
2097bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
2098bd8f1dc3Sbluhm                      "<!ELEMENT doc (chapter)>\n"
2099bd8f1dc3Sbluhm                      "<!ELEMENT chapter (#PCDATA)>\n"
2100bd8f1dc3Sbluhm                      "]>\n"
2101bd8f1dc3Sbluhm                      "<doc><chapter>Wombats are go</chapter></doc>";
2102bd8f1dc3Sbluhm 
2103bd8f1dc3Sbluhm   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
2104bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2105bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
2106bd8f1dc3Sbluhm     xml_failure(g_parser);
2107bd8f1dc3Sbluhm }
2108bd8f1dc3Sbluhm END_TEST
2109bd8f1dc3Sbluhm 
2110bd8f1dc3Sbluhm static void XMLCALL
2111bd8f1dc3Sbluhm element_decl_check_model(void *userData, const XML_Char *name,
2112bd8f1dc3Sbluhm                          XML_Content *model) {
2113bd8f1dc3Sbluhm   UNUSED_P(userData);
2114bd8f1dc3Sbluhm   uint32_t errorFlags = 0;
2115bd8f1dc3Sbluhm 
2116bd8f1dc3Sbluhm   /* Expected model array structure is this:
2117bd8f1dc3Sbluhm    * [0] (type 6, quant 0)
2118bd8f1dc3Sbluhm    *   [1] (type 5, quant 0)
2119bd8f1dc3Sbluhm    *     [3] (type 4, quant 0, name "bar")
2120bd8f1dc3Sbluhm    *     [4] (type 4, quant 0, name "foo")
2121bd8f1dc3Sbluhm    *     [5] (type 4, quant 3, name "xyz")
2122bd8f1dc3Sbluhm    *   [2] (type 4, quant 2, name "zebra")
2123bd8f1dc3Sbluhm    */
2124bd8f1dc3Sbluhm   errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0));
2125bd8f1dc3Sbluhm   errorFlags |= ((model != NULL) ? 0 : (1u << 1));
2126bd8f1dc3Sbluhm 
2127bd8f1dc3Sbluhm   if (model != NULL) {
2128bd8f1dc3Sbluhm     errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2));
2129bd8f1dc3Sbluhm     errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3));
2130bd8f1dc3Sbluhm     errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4));
2131bd8f1dc3Sbluhm     errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5));
2132bd8f1dc3Sbluhm     errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6));
2133bd8f1dc3Sbluhm 
2134bd8f1dc3Sbluhm     errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7));
2135bd8f1dc3Sbluhm     errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8));
2136bd8f1dc3Sbluhm     errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9));
2137bd8f1dc3Sbluhm     errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10));
2138bd8f1dc3Sbluhm     errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11));
2139bd8f1dc3Sbluhm 
2140bd8f1dc3Sbluhm     errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12));
2141bd8f1dc3Sbluhm     errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13));
2142bd8f1dc3Sbluhm     errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14));
2143bd8f1dc3Sbluhm     errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15));
2144bd8f1dc3Sbluhm     errorFlags
2145bd8f1dc3Sbluhm         |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16));
2146bd8f1dc3Sbluhm 
2147bd8f1dc3Sbluhm     errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17));
2148bd8f1dc3Sbluhm     errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18));
2149bd8f1dc3Sbluhm     errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19));
2150bd8f1dc3Sbluhm     errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20));
2151bd8f1dc3Sbluhm     errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21));
2152bd8f1dc3Sbluhm 
2153bd8f1dc3Sbluhm     errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22));
2154bd8f1dc3Sbluhm     errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23));
2155bd8f1dc3Sbluhm     errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24));
2156bd8f1dc3Sbluhm     errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25));
2157bd8f1dc3Sbluhm     errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26));
2158bd8f1dc3Sbluhm 
2159bd8f1dc3Sbluhm     errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27));
2160bd8f1dc3Sbluhm     errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28));
2161bd8f1dc3Sbluhm     errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29));
2162bd8f1dc3Sbluhm     errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30));
2163bd8f1dc3Sbluhm     errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31));
2164bd8f1dc3Sbluhm   }
2165bd8f1dc3Sbluhm 
2166bd8f1dc3Sbluhm   XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags);
2167bd8f1dc3Sbluhm   XML_FreeContentModel(g_parser, model);
2168bd8f1dc3Sbluhm }
2169bd8f1dc3Sbluhm 
2170bd8f1dc3Sbluhm START_TEST(test_dtd_elements_nesting) {
2171bd8f1dc3Sbluhm   // Payload inspired by a test in Perl's XML::Parser
2172bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE foo [\n"
2173bd8f1dc3Sbluhm                      "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n"
2174bd8f1dc3Sbluhm                      "]>\n"
2175bd8f1dc3Sbluhm                      "<foo/>";
2176bd8f1dc3Sbluhm 
2177bd8f1dc3Sbluhm   XML_SetUserData(g_parser, (void *)(uintptr_t)-1);
2178bd8f1dc3Sbluhm 
2179bd8f1dc3Sbluhm   XML_SetElementDeclHandler(g_parser, element_decl_check_model);
2180bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2181bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
2182bd8f1dc3Sbluhm     xml_failure(g_parser);
2183bd8f1dc3Sbluhm 
2184bd8f1dc3Sbluhm   if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0)
2185bd8f1dc3Sbluhm     fail("Element declaration model regression detected");
2186bd8f1dc3Sbluhm }
2187bd8f1dc3Sbluhm END_TEST
2188bd8f1dc3Sbluhm 
2189bd8f1dc3Sbluhm /* Test foreign DTD handling */
2190bd8f1dc3Sbluhm START_TEST(test_set_foreign_dtd) {
2191bd8f1dc3Sbluhm   const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n";
2192bd8f1dc3Sbluhm   const char *text2 = "<doc>&entity;</doc>";
2193bd8f1dc3Sbluhm   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2194bd8f1dc3Sbluhm 
2195bd8f1dc3Sbluhm   /* Check hash salt is passed through too */
2196bd8f1dc3Sbluhm   XML_SetHashSalt(g_parser, 0x12345678);
2197bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2198bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &test_data);
2199bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2200bd8f1dc3Sbluhm   /* Add a default handler to exercise more code paths */
2201bd8f1dc3Sbluhm   XML_SetDefaultHandler(g_parser, dummy_default_handler);
2202bd8f1dc3Sbluhm   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2203bd8f1dc3Sbluhm     fail("Could not set foreign DTD");
2204bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2205bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
2206bd8f1dc3Sbluhm     xml_failure(g_parser);
2207bd8f1dc3Sbluhm 
2208bd8f1dc3Sbluhm   /* Ensure that trying to set the DTD after parsing has started
2209bd8f1dc3Sbluhm    * is faulted, even if it's the same setting.
2210bd8f1dc3Sbluhm    */
2211bd8f1dc3Sbluhm   if (XML_UseForeignDTD(g_parser, XML_TRUE)
2212bd8f1dc3Sbluhm       != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2213bd8f1dc3Sbluhm     fail("Failed to reject late foreign DTD setting");
2214bd8f1dc3Sbluhm   /* Ditto for the hash salt */
2215bd8f1dc3Sbluhm   if (XML_SetHashSalt(g_parser, 0x23456789))
2216bd8f1dc3Sbluhm     fail("Failed to reject late hash salt change");
2217bd8f1dc3Sbluhm 
2218bd8f1dc3Sbluhm   /* Now finish the parse */
2219bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2220bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
2221bd8f1dc3Sbluhm     xml_failure(g_parser);
2222bd8f1dc3Sbluhm }
2223bd8f1dc3Sbluhm END_TEST
2224bd8f1dc3Sbluhm 
2225bd8f1dc3Sbluhm /* Test foreign DTD handling with a failing NotStandalone handler */
2226bd8f1dc3Sbluhm START_TEST(test_foreign_dtd_not_standalone) {
2227bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2228bd8f1dc3Sbluhm                      "<doc>&entity;</doc>";
2229bd8f1dc3Sbluhm   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2230bd8f1dc3Sbluhm 
2231bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2232bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &test_data);
2233bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2234bd8f1dc3Sbluhm   XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
2235bd8f1dc3Sbluhm   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2236bd8f1dc3Sbluhm     fail("Could not set foreign DTD");
2237bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_NOT_STANDALONE,
2238bd8f1dc3Sbluhm                  "NotStandalonehandler failed to reject");
2239bd8f1dc3Sbluhm }
2240bd8f1dc3Sbluhm END_TEST
2241bd8f1dc3Sbluhm 
2242bd8f1dc3Sbluhm /* Test invalid character in a foreign DTD is faulted */
2243bd8f1dc3Sbluhm START_TEST(test_invalid_foreign_dtd) {
2244bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2245bd8f1dc3Sbluhm                      "<doc>&entity;</doc>";
2246bd8f1dc3Sbluhm   ExtFaults test_data
2247bd8f1dc3Sbluhm       = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN};
2248bd8f1dc3Sbluhm 
2249bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2250bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &test_data);
2251bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
2252bd8f1dc3Sbluhm   XML_UseForeignDTD(g_parser, XML_TRUE);
2253bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2254bd8f1dc3Sbluhm                  "Bad DTD should not have been accepted");
2255bd8f1dc3Sbluhm }
2256bd8f1dc3Sbluhm END_TEST
2257bd8f1dc3Sbluhm 
2258bd8f1dc3Sbluhm /* Test foreign DTD use with a doctype */
2259bd8f1dc3Sbluhm START_TEST(test_foreign_dtd_with_doctype) {
2260bd8f1dc3Sbluhm   const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"
2261bd8f1dc3Sbluhm                       "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n";
2262bd8f1dc3Sbluhm   const char *text2 = "<doc>&entity;</doc>";
2263bd8f1dc3Sbluhm   ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2264bd8f1dc3Sbluhm 
2265bd8f1dc3Sbluhm   /* Check hash salt is passed through too */
2266bd8f1dc3Sbluhm   XML_SetHashSalt(g_parser, 0x12345678);
2267bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2268bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &test_data);
2269bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2270bd8f1dc3Sbluhm   /* Add a default handler to exercise more code paths */
2271bd8f1dc3Sbluhm   XML_SetDefaultHandler(g_parser, dummy_default_handler);
2272bd8f1dc3Sbluhm   if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2273bd8f1dc3Sbluhm     fail("Could not set foreign DTD");
2274bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2275bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
2276bd8f1dc3Sbluhm     xml_failure(g_parser);
2277bd8f1dc3Sbluhm 
2278bd8f1dc3Sbluhm   /* Ensure that trying to set the DTD after parsing has started
2279bd8f1dc3Sbluhm    * is faulted, even if it's the same setting.
2280bd8f1dc3Sbluhm    */
2281bd8f1dc3Sbluhm   if (XML_UseForeignDTD(g_parser, XML_TRUE)
2282bd8f1dc3Sbluhm       != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2283bd8f1dc3Sbluhm     fail("Failed to reject late foreign DTD setting");
2284bd8f1dc3Sbluhm   /* Ditto for the hash salt */
2285bd8f1dc3Sbluhm   if (XML_SetHashSalt(g_parser, 0x23456789))
2286bd8f1dc3Sbluhm     fail("Failed to reject late hash salt change");
2287bd8f1dc3Sbluhm 
2288bd8f1dc3Sbluhm   /* Now finish the parse */
2289bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2290bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
2291bd8f1dc3Sbluhm     xml_failure(g_parser);
2292bd8f1dc3Sbluhm }
2293bd8f1dc3Sbluhm END_TEST
2294bd8f1dc3Sbluhm 
2295bd8f1dc3Sbluhm /* Test XML_UseForeignDTD with no external subset present */
2296bd8f1dc3Sbluhm START_TEST(test_foreign_dtd_without_external_subset) {
2297bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n"
2298bd8f1dc3Sbluhm                      "<doc>&foo;</doc>";
2299bd8f1dc3Sbluhm 
2300bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2301bd8f1dc3Sbluhm   XML_SetUserData(g_parser, NULL);
2302bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2303bd8f1dc3Sbluhm   XML_UseForeignDTD(g_parser, XML_TRUE);
2304bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2305bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
2306bd8f1dc3Sbluhm     xml_failure(g_parser);
2307bd8f1dc3Sbluhm }
2308bd8f1dc3Sbluhm END_TEST
2309bd8f1dc3Sbluhm 
2310bd8f1dc3Sbluhm START_TEST(test_empty_foreign_dtd) {
2311bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2312bd8f1dc3Sbluhm                      "<doc>&entity;</doc>";
2313bd8f1dc3Sbluhm 
2314bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2315bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2316bd8f1dc3Sbluhm   XML_UseForeignDTD(g_parser, XML_TRUE);
2317bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
2318bd8f1dc3Sbluhm                  "Undefined entity not faulted");
2319bd8f1dc3Sbluhm }
2320bd8f1dc3Sbluhm END_TEST
2321bd8f1dc3Sbluhm 
2322bd8f1dc3Sbluhm /* Test XML Base is set and unset appropriately */
2323bd8f1dc3Sbluhm START_TEST(test_set_base) {
2324bd8f1dc3Sbluhm   const XML_Char *old_base;
2325bd8f1dc3Sbluhm   const XML_Char *new_base = XCS("/local/file/name.xml");
2326bd8f1dc3Sbluhm 
2327bd8f1dc3Sbluhm   old_base = XML_GetBase(g_parser);
2328bd8f1dc3Sbluhm   if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK)
2329bd8f1dc3Sbluhm     fail("Unable to set base");
2330bd8f1dc3Sbluhm   if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0)
2331bd8f1dc3Sbluhm     fail("Base setting not correct");
2332bd8f1dc3Sbluhm   if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK)
2333bd8f1dc3Sbluhm     fail("Unable to NULL base");
2334bd8f1dc3Sbluhm   if (XML_GetBase(g_parser) != NULL)
2335bd8f1dc3Sbluhm     fail("Base setting not nulled");
2336bd8f1dc3Sbluhm   XML_SetBase(g_parser, old_base);
2337bd8f1dc3Sbluhm }
2338bd8f1dc3Sbluhm END_TEST
2339bd8f1dc3Sbluhm 
2340bd8f1dc3Sbluhm /* Test attribute counts, indexing, etc */
2341bd8f1dc3Sbluhm START_TEST(test_attributes) {
2342bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
2343bd8f1dc3Sbluhm                      "<!ELEMENT doc (tag)>\n"
2344bd8f1dc3Sbluhm                      "<!ATTLIST doc id ID #REQUIRED>\n"
2345bd8f1dc3Sbluhm                      "]>"
2346bd8f1dc3Sbluhm                      "<doc a='1' id='one' b='2'>"
2347bd8f1dc3Sbluhm                      "<tag c='3'/>"
2348bd8f1dc3Sbluhm                      "</doc>";
2349bd8f1dc3Sbluhm   AttrInfo doc_info[] = {{XCS("a"), XCS("1")},
2350bd8f1dc3Sbluhm                          {XCS("b"), XCS("2")},
2351bd8f1dc3Sbluhm                          {XCS("id"), XCS("one")},
2352bd8f1dc3Sbluhm                          {NULL, NULL}};
2353bd8f1dc3Sbluhm   AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}};
2354bd8f1dc3Sbluhm   ElementInfo info[] = {{XCS("doc"), 3, XCS("id"), NULL},
2355bd8f1dc3Sbluhm                         {XCS("tag"), 1, NULL, NULL},
2356bd8f1dc3Sbluhm                         {NULL, 0, NULL, NULL}};
2357bd8f1dc3Sbluhm   info[0].attributes = doc_info;
2358bd8f1dc3Sbluhm   info[1].attributes = tag_info;
2359bd8f1dc3Sbluhm 
2360*aa071e6eSbluhm   XML_Parser parser = XML_ParserCreate(NULL);
2361*aa071e6eSbluhm   assert_true(parser != NULL);
2362*aa071e6eSbluhm   ParserAndElementInfo parserAndElementInfos = {
2363*aa071e6eSbluhm       parser,
2364*aa071e6eSbluhm       info,
2365*aa071e6eSbluhm   };
2366*aa071e6eSbluhm 
2367*aa071e6eSbluhm   XML_SetStartElementHandler(parser, counting_start_element_handler);
2368*aa071e6eSbluhm   XML_SetUserData(parser, &parserAndElementInfos);
2369*aa071e6eSbluhm   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2370bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
2371*aa071e6eSbluhm     xml_failure(parser);
2372*aa071e6eSbluhm 
2373*aa071e6eSbluhm   XML_ParserFree(parser);
2374bd8f1dc3Sbluhm }
2375bd8f1dc3Sbluhm END_TEST
2376bd8f1dc3Sbluhm 
2377bd8f1dc3Sbluhm /* Test reset works correctly in the middle of processing an internal
2378bd8f1dc3Sbluhm  * entity.  Exercises some obscure code in XML_ParserReset().
2379bd8f1dc3Sbluhm  */
2380bd8f1dc3Sbluhm START_TEST(test_reset_in_entity) {
2381bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
2382bd8f1dc3Sbluhm                      "<!ENTITY wombat 'wom'>\n"
2383bd8f1dc3Sbluhm                      "<!ENTITY entity 'hi &wom; there'>\n"
2384bd8f1dc3Sbluhm                      "]>\n"
2385bd8f1dc3Sbluhm                      "<doc>&entity;</doc>";
2386bd8f1dc3Sbluhm   XML_ParsingStatus status;
2387bd8f1dc3Sbluhm 
2388bd8f1dc3Sbluhm   g_resumable = XML_TRUE;
2389bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2390bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2391bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
2392bd8f1dc3Sbluhm     xml_failure(g_parser);
2393bd8f1dc3Sbluhm   XML_GetParsingStatus(g_parser, &status);
2394bd8f1dc3Sbluhm   if (status.parsing != XML_SUSPENDED)
2395bd8f1dc3Sbluhm     fail("Parsing status not SUSPENDED");
2396bd8f1dc3Sbluhm   XML_ParserReset(g_parser, NULL);
2397bd8f1dc3Sbluhm   XML_GetParsingStatus(g_parser, &status);
2398bd8f1dc3Sbluhm   if (status.parsing != XML_INITIALIZED)
2399bd8f1dc3Sbluhm     fail("Parsing status doesn't reset to INITIALIZED");
2400bd8f1dc3Sbluhm }
2401bd8f1dc3Sbluhm END_TEST
2402bd8f1dc3Sbluhm 
2403bd8f1dc3Sbluhm /* Test that resume correctly passes through parse errors */
2404bd8f1dc3Sbluhm START_TEST(test_resume_invalid_parse) {
2405bd8f1dc3Sbluhm   const char *text = "<doc>Hello</doc"; /* Missing closing wedge */
2406bd8f1dc3Sbluhm 
2407bd8f1dc3Sbluhm   g_resumable = XML_TRUE;
2408bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2409bd8f1dc3Sbluhm   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2410bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
2411bd8f1dc3Sbluhm     xml_failure(g_parser);
2412bd8f1dc3Sbluhm   if (XML_ResumeParser(g_parser) == XML_STATUS_OK)
2413bd8f1dc3Sbluhm     fail("Resumed invalid parse not faulted");
2414bd8f1dc3Sbluhm   if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN)
2415bd8f1dc3Sbluhm     fail("Invalid parse not correctly faulted");
2416bd8f1dc3Sbluhm }
2417bd8f1dc3Sbluhm END_TEST
2418bd8f1dc3Sbluhm 
2419bd8f1dc3Sbluhm /* Test that re-suspended parses are correctly passed through */
2420bd8f1dc3Sbluhm START_TEST(test_resume_resuspended) {
2421bd8f1dc3Sbluhm   const char *text = "<doc>Hello<meep/>world</doc>";
2422bd8f1dc3Sbluhm 
2423bd8f1dc3Sbluhm   g_resumable = XML_TRUE;
2424bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2425bd8f1dc3Sbluhm   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2426bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
2427bd8f1dc3Sbluhm     xml_failure(g_parser);
2428bd8f1dc3Sbluhm   g_resumable = XML_TRUE;
2429bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2430bd8f1dc3Sbluhm   if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
2431bd8f1dc3Sbluhm     fail("Resumption not suspended");
2432bd8f1dc3Sbluhm   /* This one should succeed and finish up */
2433bd8f1dc3Sbluhm   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
2434bd8f1dc3Sbluhm     xml_failure(g_parser);
2435bd8f1dc3Sbluhm }
2436bd8f1dc3Sbluhm END_TEST
2437bd8f1dc3Sbluhm 
2438bd8f1dc3Sbluhm /* Test that CDATA shows up correctly through a default handler */
2439bd8f1dc3Sbluhm START_TEST(test_cdata_default) {
2440bd8f1dc3Sbluhm   const char *text = "<doc><![CDATA[Hello\nworld]]></doc>";
2441bd8f1dc3Sbluhm   const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>");
2442bd8f1dc3Sbluhm   CharData storage;
2443bd8f1dc3Sbluhm 
2444bd8f1dc3Sbluhm   CharData_Init(&storage);
2445bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
2446bd8f1dc3Sbluhm   XML_SetDefaultHandler(g_parser, accumulate_characters);
2447bd8f1dc3Sbluhm 
2448bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2449bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
2450bd8f1dc3Sbluhm     xml_failure(g_parser);
2451bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
2452bd8f1dc3Sbluhm }
2453bd8f1dc3Sbluhm END_TEST
2454bd8f1dc3Sbluhm 
2455bd8f1dc3Sbluhm /* Test resetting a subordinate parser does exactly nothing */
2456bd8f1dc3Sbluhm START_TEST(test_subordinate_reset) {
2457bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2458bd8f1dc3Sbluhm                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2459bd8f1dc3Sbluhm                      "<doc>&entity;</doc>";
2460bd8f1dc3Sbluhm 
2461bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2462bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter);
2463bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2464bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
2465bd8f1dc3Sbluhm     xml_failure(g_parser);
2466bd8f1dc3Sbluhm }
2467bd8f1dc3Sbluhm END_TEST
2468bd8f1dc3Sbluhm 
2469bd8f1dc3Sbluhm /* Test suspending a subordinate parser */
2470bd8f1dc3Sbluhm START_TEST(test_subordinate_suspend) {
2471bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2472bd8f1dc3Sbluhm                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2473bd8f1dc3Sbluhm                      "<doc>&entity;</doc>";
2474bd8f1dc3Sbluhm 
2475bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2476bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender);
2477bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2478bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
2479bd8f1dc3Sbluhm     xml_failure(g_parser);
2480bd8f1dc3Sbluhm }
2481bd8f1dc3Sbluhm END_TEST
2482bd8f1dc3Sbluhm 
2483bd8f1dc3Sbluhm /* Test suspending a subordinate parser from an XML declaration */
2484bd8f1dc3Sbluhm /* Increases code coverage of the tests */
2485bd8f1dc3Sbluhm 
2486bd8f1dc3Sbluhm START_TEST(test_subordinate_xdecl_suspend) {
2487bd8f1dc3Sbluhm   const char *text
2488bd8f1dc3Sbluhm       = "<!DOCTYPE doc [\n"
2489bd8f1dc3Sbluhm         "  <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2490bd8f1dc3Sbluhm         "]>\n"
2491bd8f1dc3Sbluhm         "<doc>&entity;</doc>";
2492bd8f1dc3Sbluhm 
2493bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2494bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2495bd8f1dc3Sbluhm   g_resumable = XML_TRUE;
2496bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2497bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
2498bd8f1dc3Sbluhm     xml_failure(g_parser);
2499bd8f1dc3Sbluhm }
2500bd8f1dc3Sbluhm END_TEST
2501bd8f1dc3Sbluhm 
2502bd8f1dc3Sbluhm START_TEST(test_subordinate_xdecl_abort) {
2503bd8f1dc3Sbluhm   const char *text
2504bd8f1dc3Sbluhm       = "<!DOCTYPE doc [\n"
2505bd8f1dc3Sbluhm         "  <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2506bd8f1dc3Sbluhm         "]>\n"
2507bd8f1dc3Sbluhm         "<doc>&entity;</doc>";
2508bd8f1dc3Sbluhm 
2509bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2510bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2511bd8f1dc3Sbluhm   g_resumable = XML_FALSE;
2512bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2513bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
2514bd8f1dc3Sbluhm     xml_failure(g_parser);
2515bd8f1dc3Sbluhm }
2516bd8f1dc3Sbluhm END_TEST
2517bd8f1dc3Sbluhm 
2518bd8f1dc3Sbluhm /* Test external entity fault handling with suspension */
2519bd8f1dc3Sbluhm START_TEST(test_ext_entity_invalid_suspended_parse) {
2520bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
2521bd8f1dc3Sbluhm                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2522bd8f1dc3Sbluhm                      "]>\n"
2523bd8f1dc3Sbluhm                      "<doc>&en;</doc>";
2524bd8f1dc3Sbluhm   ExtFaults faults[]
2525bd8f1dc3Sbluhm       = {{"<?xml version='1.0' encoding='us-ascii'?><",
2526bd8f1dc3Sbluhm           "Incomplete element declaration not faulted", NULL,
2527bd8f1dc3Sbluhm           XML_ERROR_UNCLOSED_TOKEN},
2528bd8f1dc3Sbluhm          {/* First two bytes of a three-byte char */
2529bd8f1dc3Sbluhm           "<?xml version='1.0' encoding='utf-8'?>\xe2\x82",
2530bd8f1dc3Sbluhm           "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
2531bd8f1dc3Sbluhm          {NULL, NULL, NULL, XML_ERROR_NONE}};
2532bd8f1dc3Sbluhm   ExtFaults *fault;
2533bd8f1dc3Sbluhm 
2534bd8f1dc3Sbluhm   for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
2535bd8f1dc3Sbluhm     set_subtest("%s", fault->parse_text);
2536bd8f1dc3Sbluhm     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2537bd8f1dc3Sbluhm     XML_SetExternalEntityRefHandler(g_parser,
2538bd8f1dc3Sbluhm                                     external_entity_suspending_faulter);
2539bd8f1dc3Sbluhm     XML_SetUserData(g_parser, fault);
2540bd8f1dc3Sbluhm     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2541bd8f1dc3Sbluhm                    "Parser did not report external entity error");
2542bd8f1dc3Sbluhm     XML_ParserReset(g_parser, NULL);
2543bd8f1dc3Sbluhm   }
2544bd8f1dc3Sbluhm }
2545bd8f1dc3Sbluhm END_TEST
2546bd8f1dc3Sbluhm 
2547bd8f1dc3Sbluhm /* Test setting an explicit encoding */
2548bd8f1dc3Sbluhm START_TEST(test_explicit_encoding) {
2549bd8f1dc3Sbluhm   const char *text1 = "<doc>Hello ";
2550bd8f1dc3Sbluhm   const char *text2 = " World</doc>";
2551bd8f1dc3Sbluhm 
2552bd8f1dc3Sbluhm   /* Just check that we can set the encoding to NULL before starting */
2553bd8f1dc3Sbluhm   if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2554bd8f1dc3Sbluhm     fail("Failed to initialise encoding to NULL");
2555bd8f1dc3Sbluhm   /* Say we are UTF-8 */
2556bd8f1dc3Sbluhm   if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK)
2557bd8f1dc3Sbluhm     fail("Failed to set explicit encoding");
2558bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2559bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
2560bd8f1dc3Sbluhm     xml_failure(g_parser);
2561bd8f1dc3Sbluhm   /* Try to switch encodings mid-parse */
2562bd8f1dc3Sbluhm   if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR)
2563bd8f1dc3Sbluhm     fail("Allowed encoding change");
2564bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2565bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
2566bd8f1dc3Sbluhm     xml_failure(g_parser);
2567bd8f1dc3Sbluhm   /* Try now the parse is over */
2568bd8f1dc3Sbluhm   if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2569bd8f1dc3Sbluhm     fail("Failed to unset encoding");
2570bd8f1dc3Sbluhm }
2571bd8f1dc3Sbluhm END_TEST
2572bd8f1dc3Sbluhm 
2573bd8f1dc3Sbluhm /* Test handling of trailing CR (rather than newline) */
2574bd8f1dc3Sbluhm START_TEST(test_trailing_cr) {
2575bd8f1dc3Sbluhm   const char *text = "<doc>\r";
2576bd8f1dc3Sbluhm   int found_cr;
2577bd8f1dc3Sbluhm 
2578bd8f1dc3Sbluhm   /* Try with a character handler, for code coverage */
2579bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, cr_cdata_handler);
2580bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &found_cr);
2581bd8f1dc3Sbluhm   found_cr = 0;
2582bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2583bd8f1dc3Sbluhm       == XML_STATUS_OK)
2584bd8f1dc3Sbluhm     fail("Failed to fault unclosed doc");
2585bd8f1dc3Sbluhm   if (found_cr == 0)
2586bd8f1dc3Sbluhm     fail("Did not catch the carriage return");
2587bd8f1dc3Sbluhm   XML_ParserReset(g_parser, NULL);
2588bd8f1dc3Sbluhm 
2589bd8f1dc3Sbluhm   /* Now with a default handler instead */
2590bd8f1dc3Sbluhm   XML_SetDefaultHandler(g_parser, cr_cdata_handler);
2591bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &found_cr);
2592bd8f1dc3Sbluhm   found_cr = 0;
2593bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2594bd8f1dc3Sbluhm       == XML_STATUS_OK)
2595bd8f1dc3Sbluhm     fail("Failed to fault unclosed doc");
2596bd8f1dc3Sbluhm   if (found_cr == 0)
2597bd8f1dc3Sbluhm     fail("Did not catch default carriage return");
2598bd8f1dc3Sbluhm }
2599bd8f1dc3Sbluhm END_TEST
2600bd8f1dc3Sbluhm 
2601bd8f1dc3Sbluhm /* Test trailing CR in an external entity parse */
2602bd8f1dc3Sbluhm START_TEST(test_ext_entity_trailing_cr) {
2603bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
2604bd8f1dc3Sbluhm                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2605bd8f1dc3Sbluhm                      "]>\n"
2606bd8f1dc3Sbluhm                      "<doc>&en;</doc>";
2607bd8f1dc3Sbluhm   int found_cr;
2608bd8f1dc3Sbluhm 
2609bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2610bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher);
2611bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &found_cr);
2612bd8f1dc3Sbluhm   found_cr = 0;
2613bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2614bd8f1dc3Sbluhm       != XML_STATUS_OK)
2615bd8f1dc3Sbluhm     xml_failure(g_parser);
2616bd8f1dc3Sbluhm   if (found_cr == 0)
2617bd8f1dc3Sbluhm     fail("No carriage return found");
2618bd8f1dc3Sbluhm   XML_ParserReset(g_parser, NULL);
2619bd8f1dc3Sbluhm 
2620bd8f1dc3Sbluhm   /* Try again with a different trailing CR */
2621bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2622bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher);
2623bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &found_cr);
2624bd8f1dc3Sbluhm   found_cr = 0;
2625bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2626bd8f1dc3Sbluhm       != XML_STATUS_OK)
2627bd8f1dc3Sbluhm     xml_failure(g_parser);
2628bd8f1dc3Sbluhm   if (found_cr == 0)
2629bd8f1dc3Sbluhm     fail("No carriage return found");
2630bd8f1dc3Sbluhm }
2631bd8f1dc3Sbluhm END_TEST
2632bd8f1dc3Sbluhm 
2633bd8f1dc3Sbluhm /* Test handling of trailing square bracket */
2634bd8f1dc3Sbluhm START_TEST(test_trailing_rsqb) {
2635bd8f1dc3Sbluhm   const char *text8 = "<doc>]";
2636bd8f1dc3Sbluhm   const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000";
2637bd8f1dc3Sbluhm   int found_rsqb;
2638bd8f1dc3Sbluhm   int text8_len = (int)strlen(text8);
2639bd8f1dc3Sbluhm 
2640bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2641bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &found_rsqb);
2642bd8f1dc3Sbluhm   found_rsqb = 0;
2643bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE)
2644bd8f1dc3Sbluhm       == XML_STATUS_OK)
2645bd8f1dc3Sbluhm     fail("Failed to fault unclosed doc");
2646bd8f1dc3Sbluhm   if (found_rsqb == 0)
2647bd8f1dc3Sbluhm     fail("Did not catch the right square bracket");
2648bd8f1dc3Sbluhm 
2649bd8f1dc3Sbluhm   /* Try again with a different encoding */
2650bd8f1dc3Sbluhm   XML_ParserReset(g_parser, NULL);
2651bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2652bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &found_rsqb);
2653bd8f1dc3Sbluhm   found_rsqb = 0;
2654bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2655bd8f1dc3Sbluhm                               XML_TRUE)
2656bd8f1dc3Sbluhm       == XML_STATUS_OK)
2657bd8f1dc3Sbluhm     fail("Failed to fault unclosed doc");
2658bd8f1dc3Sbluhm   if (found_rsqb == 0)
2659bd8f1dc3Sbluhm     fail("Did not catch the right square bracket");
2660bd8f1dc3Sbluhm 
2661bd8f1dc3Sbluhm   /* And finally with a default handler */
2662bd8f1dc3Sbluhm   XML_ParserReset(g_parser, NULL);
2663bd8f1dc3Sbluhm   XML_SetDefaultHandler(g_parser, rsqb_handler);
2664bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &found_rsqb);
2665bd8f1dc3Sbluhm   found_rsqb = 0;
2666bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2667bd8f1dc3Sbluhm                               XML_TRUE)
2668bd8f1dc3Sbluhm       == XML_STATUS_OK)
2669bd8f1dc3Sbluhm     fail("Failed to fault unclosed doc");
2670bd8f1dc3Sbluhm   if (found_rsqb == 0)
2671bd8f1dc3Sbluhm     fail("Did not catch the right square bracket");
2672bd8f1dc3Sbluhm }
2673bd8f1dc3Sbluhm END_TEST
2674bd8f1dc3Sbluhm 
2675bd8f1dc3Sbluhm /* Test trailing right square bracket in an external entity parse */
2676bd8f1dc3Sbluhm START_TEST(test_ext_entity_trailing_rsqb) {
2677bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
2678bd8f1dc3Sbluhm                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2679bd8f1dc3Sbluhm                      "]>\n"
2680bd8f1dc3Sbluhm                      "<doc>&en;</doc>";
2681bd8f1dc3Sbluhm   int found_rsqb;
2682bd8f1dc3Sbluhm 
2683bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2684bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher);
2685bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &found_rsqb);
2686bd8f1dc3Sbluhm   found_rsqb = 0;
2687bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2688bd8f1dc3Sbluhm       != XML_STATUS_OK)
2689bd8f1dc3Sbluhm     xml_failure(g_parser);
2690bd8f1dc3Sbluhm   if (found_rsqb == 0)
2691bd8f1dc3Sbluhm     fail("No right square bracket found");
2692bd8f1dc3Sbluhm }
2693bd8f1dc3Sbluhm END_TEST
2694bd8f1dc3Sbluhm 
2695bd8f1dc3Sbluhm /* Test CDATA handling in an external entity */
2696bd8f1dc3Sbluhm START_TEST(test_ext_entity_good_cdata) {
2697bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
2698bd8f1dc3Sbluhm                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2699bd8f1dc3Sbluhm                      "]>\n"
2700bd8f1dc3Sbluhm                      "<doc>&en;</doc>";
2701bd8f1dc3Sbluhm 
2702bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2703bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii);
2704bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2705bd8f1dc3Sbluhm       != XML_STATUS_OK)
2706bd8f1dc3Sbluhm     xml_failure(g_parser);
2707bd8f1dc3Sbluhm }
2708bd8f1dc3Sbluhm END_TEST
2709bd8f1dc3Sbluhm 
2710bd8f1dc3Sbluhm /* Test user parameter settings */
2711bd8f1dc3Sbluhm START_TEST(test_user_parameters) {
2712bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2713bd8f1dc3Sbluhm                      "<!-- Primary parse -->\n"
2714bd8f1dc3Sbluhm                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2715bd8f1dc3Sbluhm                      "<doc>&entity;";
2716bd8f1dc3Sbluhm   const char *epilog = "<!-- Back to primary parser -->\n"
2717bd8f1dc3Sbluhm                        "</doc>";
2718bd8f1dc3Sbluhm 
2719bd8f1dc3Sbluhm   g_comment_count = 0;
2720bd8f1dc3Sbluhm   g_skip_count = 0;
2721bd8f1dc3Sbluhm   g_xdecl_count = 0;
2722bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2723bd8f1dc3Sbluhm   XML_SetXmlDeclHandler(g_parser, xml_decl_handler);
2724bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker);
2725bd8f1dc3Sbluhm   XML_SetCommentHandler(g_parser, data_check_comment_handler);
2726bd8f1dc3Sbluhm   XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler);
2727bd8f1dc3Sbluhm   XML_UseParserAsHandlerArg(g_parser);
2728bd8f1dc3Sbluhm   XML_SetUserData(g_parser, (void *)1);
2729bd8f1dc3Sbluhm   g_handler_data = g_parser;
2730bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2731bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
2732bd8f1dc3Sbluhm     xml_failure(g_parser);
2733bd8f1dc3Sbluhm   /* Ensure we can't change policy mid-parse */
2734bd8f1dc3Sbluhm   if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER))
2735bd8f1dc3Sbluhm     fail("Changed param entity parsing policy while parsing");
2736bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE)
2737bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
2738bd8f1dc3Sbluhm     xml_failure(g_parser);
2739bd8f1dc3Sbluhm   if (g_comment_count != 3)
2740bd8f1dc3Sbluhm     fail("Comment handler not invoked enough times");
2741bd8f1dc3Sbluhm   if (g_skip_count != 1)
2742bd8f1dc3Sbluhm     fail("Skip handler not invoked enough times");
2743bd8f1dc3Sbluhm   if (g_xdecl_count != 1)
2744bd8f1dc3Sbluhm     fail("XML declaration handler not invoked");
2745bd8f1dc3Sbluhm }
2746bd8f1dc3Sbluhm END_TEST
2747bd8f1dc3Sbluhm 
2748bd8f1dc3Sbluhm /* Test that an explicit external entity handler argument replaces
2749bd8f1dc3Sbluhm  * the parser as the first argument.
2750bd8f1dc3Sbluhm  *
2751bd8f1dc3Sbluhm  * We do not call the first parameter to the external entity handler
2752bd8f1dc3Sbluhm  * 'parser' for once, since the first time the handler is called it
2753bd8f1dc3Sbluhm  * will actually be a text string.  We need to be able to access the
2754bd8f1dc3Sbluhm  * global 'parser' variable to create our external entity parser from,
2755bd8f1dc3Sbluhm  * since there are code paths we need to ensure get executed.
2756bd8f1dc3Sbluhm  */
2757bd8f1dc3Sbluhm START_TEST(test_ext_entity_ref_parameter) {
2758bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2759bd8f1dc3Sbluhm                      "<!DOCTYPE doc SYSTEM 'foo'>\n"
2760bd8f1dc3Sbluhm                      "<doc>&entity;</doc>";
2761bd8f1dc3Sbluhm 
2762bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2763bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2764bd8f1dc3Sbluhm   /* Set a handler arg that is not NULL and not parser (which is
2765bd8f1dc3Sbluhm    * what NULL would cause to be passed.
2766bd8f1dc3Sbluhm    */
2767bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text);
2768bd8f1dc3Sbluhm   g_handler_data = text;
2769bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2770bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
2771bd8f1dc3Sbluhm     xml_failure(g_parser);
2772bd8f1dc3Sbluhm 
2773bd8f1dc3Sbluhm   /* Now try again with unset args */
2774bd8f1dc3Sbluhm   XML_ParserReset(g_parser, NULL);
2775bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2776bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2777bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandlerArg(g_parser, NULL);
2778bd8f1dc3Sbluhm   g_handler_data = g_parser;
2779bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2780bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
2781bd8f1dc3Sbluhm     xml_failure(g_parser);
2782bd8f1dc3Sbluhm }
2783bd8f1dc3Sbluhm END_TEST
2784bd8f1dc3Sbluhm 
2785bd8f1dc3Sbluhm /* Test the parsing of an empty string */
2786bd8f1dc3Sbluhm START_TEST(test_empty_parse) {
2787bd8f1dc3Sbluhm   const char *text = "<doc></doc>";
2788bd8f1dc3Sbluhm   const char *partial = "<doc>";
2789bd8f1dc3Sbluhm 
2790bd8f1dc3Sbluhm   if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR)
2791bd8f1dc3Sbluhm     fail("Parsing empty string faulted");
2792bd8f1dc3Sbluhm   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2793bd8f1dc3Sbluhm     fail("Parsing final empty string not faulted");
2794bd8f1dc3Sbluhm   if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS)
2795bd8f1dc3Sbluhm     fail("Parsing final empty string faulted for wrong reason");
2796bd8f1dc3Sbluhm 
2797bd8f1dc3Sbluhm   /* Now try with valid text before the empty end */
2798bd8f1dc3Sbluhm   XML_ParserReset(g_parser, NULL);
2799bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2800bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
2801bd8f1dc3Sbluhm     xml_failure(g_parser);
2802bd8f1dc3Sbluhm   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR)
2803bd8f1dc3Sbluhm     fail("Parsing final empty string faulted");
2804bd8f1dc3Sbluhm 
2805bd8f1dc3Sbluhm   /* Now try with invalid text before the empty end */
2806bd8f1dc3Sbluhm   XML_ParserReset(g_parser, NULL);
2807bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial),
2808bd8f1dc3Sbluhm                               XML_FALSE)
2809bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
2810bd8f1dc3Sbluhm     xml_failure(g_parser);
2811bd8f1dc3Sbluhm   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2812bd8f1dc3Sbluhm     fail("Parsing final incomplete empty string not faulted");
2813bd8f1dc3Sbluhm }
2814bd8f1dc3Sbluhm END_TEST
2815bd8f1dc3Sbluhm 
281661ad8a07Sbluhm /* Test XML_Parse for len < 0 */
281761ad8a07Sbluhm START_TEST(test_negative_len_parse) {
281861ad8a07Sbluhm   const char *const doc = "<root/>";
281961ad8a07Sbluhm   for (int isFinal = 0; isFinal < 2; isFinal++) {
282061ad8a07Sbluhm     set_subtest("isFinal=%d", isFinal);
282161ad8a07Sbluhm 
282261ad8a07Sbluhm     XML_Parser parser = XML_ParserCreate(NULL);
282361ad8a07Sbluhm 
282461ad8a07Sbluhm     if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
282561ad8a07Sbluhm       fail("There was not supposed to be any initial parse error.");
282661ad8a07Sbluhm 
282761ad8a07Sbluhm     const enum XML_Status status = XML_Parse(parser, doc, -1, isFinal);
282861ad8a07Sbluhm 
282961ad8a07Sbluhm     if (status != XML_STATUS_ERROR)
283061ad8a07Sbluhm       fail("Negative len was expected to fail the parse but did not.");
283161ad8a07Sbluhm 
283261ad8a07Sbluhm     if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
283361ad8a07Sbluhm       fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
283461ad8a07Sbluhm 
283561ad8a07Sbluhm     XML_ParserFree(parser);
283661ad8a07Sbluhm   }
283761ad8a07Sbluhm }
283861ad8a07Sbluhm END_TEST
283961ad8a07Sbluhm 
284061ad8a07Sbluhm /* Test XML_ParseBuffer for len < 0 */
284161ad8a07Sbluhm START_TEST(test_negative_len_parse_buffer) {
284261ad8a07Sbluhm   const char *const doc = "<root/>";
284361ad8a07Sbluhm   for (int isFinal = 0; isFinal < 2; isFinal++) {
284461ad8a07Sbluhm     set_subtest("isFinal=%d", isFinal);
284561ad8a07Sbluhm 
284661ad8a07Sbluhm     XML_Parser parser = XML_ParserCreate(NULL);
284761ad8a07Sbluhm 
284861ad8a07Sbluhm     if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
284961ad8a07Sbluhm       fail("There was not supposed to be any initial parse error.");
285061ad8a07Sbluhm 
285161ad8a07Sbluhm     void *const buffer = XML_GetBuffer(parser, (int)strlen(doc));
285261ad8a07Sbluhm 
285361ad8a07Sbluhm     if (buffer == NULL)
285461ad8a07Sbluhm       fail("XML_GetBuffer failed.");
285561ad8a07Sbluhm 
285661ad8a07Sbluhm     memcpy(buffer, doc, strlen(doc));
285761ad8a07Sbluhm 
285861ad8a07Sbluhm     const enum XML_Status status = XML_ParseBuffer(parser, -1, isFinal);
285961ad8a07Sbluhm 
286061ad8a07Sbluhm     if (status != XML_STATUS_ERROR)
286161ad8a07Sbluhm       fail("Negative len was expected to fail the parse but did not.");
286261ad8a07Sbluhm 
286361ad8a07Sbluhm     if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
286461ad8a07Sbluhm       fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
286561ad8a07Sbluhm 
286661ad8a07Sbluhm     XML_ParserFree(parser);
286761ad8a07Sbluhm   }
286861ad8a07Sbluhm }
286961ad8a07Sbluhm END_TEST
287061ad8a07Sbluhm 
2871bd8f1dc3Sbluhm /* Test odd corners of the XML_GetBuffer interface */
2872bd8f1dc3Sbluhm static enum XML_Status
2873bd8f1dc3Sbluhm get_feature(enum XML_FeatureEnum feature_id, long *presult) {
2874bd8f1dc3Sbluhm   const XML_Feature *feature = XML_GetFeatureList();
2875bd8f1dc3Sbluhm 
2876bd8f1dc3Sbluhm   if (feature == NULL)
2877bd8f1dc3Sbluhm     return XML_STATUS_ERROR;
2878bd8f1dc3Sbluhm   for (; feature->feature != XML_FEATURE_END; feature++) {
2879bd8f1dc3Sbluhm     if (feature->feature == feature_id) {
2880bd8f1dc3Sbluhm       *presult = feature->value;
2881bd8f1dc3Sbluhm       return XML_STATUS_OK;
2882bd8f1dc3Sbluhm     }
2883bd8f1dc3Sbluhm   }
2884bd8f1dc3Sbluhm   return XML_STATUS_ERROR;
2885bd8f1dc3Sbluhm }
2886bd8f1dc3Sbluhm 
2887bd8f1dc3Sbluhm /* Test odd corners of the XML_GetBuffer interface */
2888bd8f1dc3Sbluhm START_TEST(test_get_buffer_1) {
2889bd8f1dc3Sbluhm   const char *text = get_buffer_test_text;
2890bd8f1dc3Sbluhm   void *buffer;
2891bd8f1dc3Sbluhm   long context_bytes;
2892bd8f1dc3Sbluhm 
2893bd8f1dc3Sbluhm   /* Attempt to allocate a negative length buffer */
2894bd8f1dc3Sbluhm   if (XML_GetBuffer(g_parser, -12) != NULL)
2895bd8f1dc3Sbluhm     fail("Negative length buffer not failed");
2896bd8f1dc3Sbluhm 
2897bd8f1dc3Sbluhm   /* Now get a small buffer and extend it past valid length */
2898bd8f1dc3Sbluhm   buffer = XML_GetBuffer(g_parser, 1536);
2899bd8f1dc3Sbluhm   if (buffer == NULL)
2900bd8f1dc3Sbluhm     fail("1.5K buffer failed");
2901bd8f1dc3Sbluhm   assert(buffer != NULL);
2902bd8f1dc3Sbluhm   memcpy(buffer, text, strlen(text));
2903bd8f1dc3Sbluhm   if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
2904bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
2905bd8f1dc3Sbluhm     xml_failure(g_parser);
2906bd8f1dc3Sbluhm   if (XML_GetBuffer(g_parser, INT_MAX) != NULL)
2907bd8f1dc3Sbluhm     fail("INT_MAX buffer not failed");
2908bd8f1dc3Sbluhm 
2909bd8f1dc3Sbluhm   /* Now try extending it a more reasonable but still too large
2910bd8f1dc3Sbluhm    * amount.  The allocator in XML_GetBuffer() doubles the buffer
2911bd8f1dc3Sbluhm    * size until it exceeds the requested amount or INT_MAX.  If it
2912bd8f1dc3Sbluhm    * exceeds INT_MAX, it rejects the request, so we want a request
2913bd8f1dc3Sbluhm    * between INT_MAX and INT_MAX/2.  A gap of 1K seems comfortable,
2914bd8f1dc3Sbluhm    * with an extra byte just to ensure that the request is off any
2915bd8f1dc3Sbluhm    * boundary.  The request will be inflated internally by
2916bd8f1dc3Sbluhm    * XML_CONTEXT_BYTES (if >=1), so we subtract that from our
2917bd8f1dc3Sbluhm    * request.
2918bd8f1dc3Sbluhm    */
2919bd8f1dc3Sbluhm   if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK)
2920bd8f1dc3Sbluhm     context_bytes = 0;
2921bd8f1dc3Sbluhm   if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL)
2922bd8f1dc3Sbluhm     fail("INT_MAX- buffer not failed");
2923bd8f1dc3Sbluhm 
2924bd8f1dc3Sbluhm   /* Now try extending it a carefully crafted amount */
2925bd8f1dc3Sbluhm   if (XML_GetBuffer(g_parser, 1000) == NULL)
2926bd8f1dc3Sbluhm     fail("1000 buffer failed");
2927bd8f1dc3Sbluhm }
2928bd8f1dc3Sbluhm END_TEST
2929bd8f1dc3Sbluhm 
2930bd8f1dc3Sbluhm /* Test more corners of the XML_GetBuffer interface */
2931bd8f1dc3Sbluhm START_TEST(test_get_buffer_2) {
2932bd8f1dc3Sbluhm   const char *text = get_buffer_test_text;
2933bd8f1dc3Sbluhm   void *buffer;
2934bd8f1dc3Sbluhm 
2935bd8f1dc3Sbluhm   /* Now get a decent buffer */
2936bd8f1dc3Sbluhm   buffer = XML_GetBuffer(g_parser, 1536);
2937bd8f1dc3Sbluhm   if (buffer == NULL)
2938bd8f1dc3Sbluhm     fail("1.5K buffer failed");
2939bd8f1dc3Sbluhm   assert(buffer != NULL);
2940bd8f1dc3Sbluhm   memcpy(buffer, text, strlen(text));
2941bd8f1dc3Sbluhm   if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
2942bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
2943bd8f1dc3Sbluhm     xml_failure(g_parser);
2944bd8f1dc3Sbluhm 
2945bd8f1dc3Sbluhm   /* Extend it, to catch a different code path */
2946bd8f1dc3Sbluhm   if (XML_GetBuffer(g_parser, 1024) == NULL)
2947bd8f1dc3Sbluhm     fail("1024 buffer failed");
2948bd8f1dc3Sbluhm }
2949bd8f1dc3Sbluhm END_TEST
2950bd8f1dc3Sbluhm 
2951bd8f1dc3Sbluhm /* Test for signed integer overflow CVE-2022-23852 */
2952bd8f1dc3Sbluhm #if XML_CONTEXT_BYTES > 0
2953bd8f1dc3Sbluhm START_TEST(test_get_buffer_3_overflow) {
2954bd8f1dc3Sbluhm   XML_Parser parser = XML_ParserCreate(NULL);
2955bd8f1dc3Sbluhm   assert(parser != NULL);
2956bd8f1dc3Sbluhm 
2957bd8f1dc3Sbluhm   const char *const text = "\n";
2958bd8f1dc3Sbluhm   const int expectedKeepValue = (int)strlen(text);
2959bd8f1dc3Sbluhm 
2960bd8f1dc3Sbluhm   // After this call, variable "keep" in XML_GetBuffer will
2961bd8f1dc3Sbluhm   // have value expectedKeepValue
2962bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
2963bd8f1dc3Sbluhm                               XML_FALSE /* isFinal */)
2964bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
2965bd8f1dc3Sbluhm     xml_failure(parser);
2966bd8f1dc3Sbluhm 
2967bd8f1dc3Sbluhm   assert(expectedKeepValue > 0);
2968bd8f1dc3Sbluhm   if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL)
2969bd8f1dc3Sbluhm     fail("enlarging buffer not failed");
2970bd8f1dc3Sbluhm 
2971bd8f1dc3Sbluhm   XML_ParserFree(parser);
2972bd8f1dc3Sbluhm }
2973bd8f1dc3Sbluhm END_TEST
2974bd8f1dc3Sbluhm #endif // XML_CONTEXT_BYTES > 0
2975bd8f1dc3Sbluhm 
2976bd8f1dc3Sbluhm START_TEST(test_buffer_can_grow_to_max) {
2977bd8f1dc3Sbluhm   const char *const prefixes[] = {
2978bd8f1dc3Sbluhm       "",
2979bd8f1dc3Sbluhm       "<",
2980bd8f1dc3Sbluhm       "<x a='",
2981bd8f1dc3Sbluhm       "<doc><x a='",
2982bd8f1dc3Sbluhm       "<document><x a='",
2983bd8f1dc3Sbluhm       "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand"
2984bd8f1dc3Sbluhm       "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif"
2985bd8f1dc3Sbluhm       "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin"
2986bd8f1dc3Sbluhm       "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping"
2987bd8f1dc3Sbluhm       "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"};
2988bd8f1dc3Sbluhm   const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
2989bd8f1dc3Sbluhm   int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow
2990bd8f1dc3Sbluhm #if defined(__MINGW32__) && ! defined(__MINGW64__)
2991bd8f1dc3Sbluhm   // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB
2992bd8f1dc3Sbluhm   // Can we make a big allocation?
2993bd8f1dc3Sbluhm   void *big = malloc(maxbuf);
2994bd8f1dc3Sbluhm   if (! big) {
2995bd8f1dc3Sbluhm     // The big allocation failed. Let's be a little lenient.
2996bd8f1dc3Sbluhm     maxbuf = maxbuf / 2;
2997bd8f1dc3Sbluhm   }
2998bd8f1dc3Sbluhm   free(big);
2999bd8f1dc3Sbluhm #endif
3000bd8f1dc3Sbluhm 
3001bd8f1dc3Sbluhm   for (int i = 0; i < num_prefixes; ++i) {
3002bd8f1dc3Sbluhm     set_subtest("\"%s\"", prefixes[i]);
3003bd8f1dc3Sbluhm     XML_Parser parser = XML_ParserCreate(NULL);
3004bd8f1dc3Sbluhm     const int prefix_len = (int)strlen(prefixes[i]);
3005bd8f1dc3Sbluhm     const enum XML_Status s
3006bd8f1dc3Sbluhm         = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE);
3007bd8f1dc3Sbluhm     if (s != XML_STATUS_OK)
3008bd8f1dc3Sbluhm       xml_failure(parser);
3009bd8f1dc3Sbluhm 
3010bd8f1dc3Sbluhm     // XML_CONTEXT_BYTES of the prefix may remain in the buffer;
3011bd8f1dc3Sbluhm     // subtracting the whole prefix is easiest, and close enough.
3012bd8f1dc3Sbluhm     assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL);
3013bd8f1dc3Sbluhm     // The limit should be consistent; no prefix should allow us to
3014bd8f1dc3Sbluhm     // reach above the max buffer size.
3015bd8f1dc3Sbluhm     assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL);
3016bd8f1dc3Sbluhm     XML_ParserFree(parser);
3017bd8f1dc3Sbluhm   }
3018bd8f1dc3Sbluhm }
3019bd8f1dc3Sbluhm END_TEST
3020bd8f1dc3Sbluhm 
3021bd8f1dc3Sbluhm START_TEST(test_getbuffer_allocates_on_zero_len) {
3022bd8f1dc3Sbluhm   for (int first_len = 1; first_len >= 0; first_len--) {
3023bd8f1dc3Sbluhm     set_subtest("with len=%d first", first_len);
3024bd8f1dc3Sbluhm     XML_Parser parser = XML_ParserCreate(NULL);
3025bd8f1dc3Sbluhm     assert_true(parser != NULL);
3026bd8f1dc3Sbluhm     assert_true(XML_GetBuffer(parser, first_len) != NULL);
3027bd8f1dc3Sbluhm     assert_true(XML_GetBuffer(parser, 0) != NULL);
3028bd8f1dc3Sbluhm     if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK)
3029bd8f1dc3Sbluhm       xml_failure(parser);
3030bd8f1dc3Sbluhm     XML_ParserFree(parser);
3031bd8f1dc3Sbluhm   }
3032bd8f1dc3Sbluhm }
3033bd8f1dc3Sbluhm END_TEST
3034bd8f1dc3Sbluhm 
3035bd8f1dc3Sbluhm /* Test position information macros */
3036bd8f1dc3Sbluhm START_TEST(test_byte_info_at_end) {
3037bd8f1dc3Sbluhm   const char *text = "<doc></doc>";
3038bd8f1dc3Sbluhm 
3039bd8f1dc3Sbluhm   if (XML_GetCurrentByteIndex(g_parser) != -1
3040bd8f1dc3Sbluhm       || XML_GetCurrentByteCount(g_parser) != 0)
3041bd8f1dc3Sbluhm     fail("Byte index/count incorrect at start of parse");
3042bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3043bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
3044bd8f1dc3Sbluhm     xml_failure(g_parser);
3045bd8f1dc3Sbluhm   /* At end, the count will be zero and the index the end of string */
3046bd8f1dc3Sbluhm   if (XML_GetCurrentByteCount(g_parser) != 0)
3047bd8f1dc3Sbluhm     fail("Terminal byte count incorrect");
3048bd8f1dc3Sbluhm   if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text))
3049bd8f1dc3Sbluhm     fail("Terminal byte index incorrect");
3050bd8f1dc3Sbluhm }
3051bd8f1dc3Sbluhm END_TEST
3052bd8f1dc3Sbluhm 
3053bd8f1dc3Sbluhm /* Test position information from errors */
3054bd8f1dc3Sbluhm #define PRE_ERROR_STR "<doc></"
3055bd8f1dc3Sbluhm #define POST_ERROR_STR "wombat></doc>"
3056bd8f1dc3Sbluhm START_TEST(test_byte_info_at_error) {
3057bd8f1dc3Sbluhm   const char *text = PRE_ERROR_STR POST_ERROR_STR;
3058bd8f1dc3Sbluhm 
3059bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3060bd8f1dc3Sbluhm       == XML_STATUS_OK)
3061bd8f1dc3Sbluhm     fail("Syntax error not faulted");
3062bd8f1dc3Sbluhm   if (XML_GetCurrentByteCount(g_parser) != 0)
3063bd8f1dc3Sbluhm     fail("Error byte count incorrect");
3064bd8f1dc3Sbluhm   if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR))
3065bd8f1dc3Sbluhm     fail("Error byte index incorrect");
3066bd8f1dc3Sbluhm }
3067bd8f1dc3Sbluhm END_TEST
3068bd8f1dc3Sbluhm #undef PRE_ERROR_STR
3069bd8f1dc3Sbluhm #undef POST_ERROR_STR
3070bd8f1dc3Sbluhm 
3071bd8f1dc3Sbluhm /* Test position information in handler */
3072bd8f1dc3Sbluhm #define START_ELEMENT "<e>"
3073bd8f1dc3Sbluhm #define CDATA_TEXT "Hello"
3074bd8f1dc3Sbluhm #define END_ELEMENT "</e>"
3075bd8f1dc3Sbluhm START_TEST(test_byte_info_at_cdata) {
3076bd8f1dc3Sbluhm   const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT;
3077bd8f1dc3Sbluhm   int offset, size;
3078bd8f1dc3Sbluhm   ByteTestData data;
3079bd8f1dc3Sbluhm 
3080bd8f1dc3Sbluhm   /* Check initial context is empty */
3081bd8f1dc3Sbluhm   if (XML_GetInputContext(g_parser, &offset, &size) != NULL)
3082bd8f1dc3Sbluhm     fail("Unexpected context at start of parse");
3083bd8f1dc3Sbluhm 
3084bd8f1dc3Sbluhm   data.start_element_len = (int)strlen(START_ELEMENT);
3085bd8f1dc3Sbluhm   data.cdata_len = (int)strlen(CDATA_TEXT);
3086bd8f1dc3Sbluhm   data.total_string_len = (int)strlen(text);
3087bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, byte_character_handler);
3088bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &data);
3089bd8f1dc3Sbluhm   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
3090bd8f1dc3Sbluhm     xml_failure(g_parser);
3091bd8f1dc3Sbluhm }
3092bd8f1dc3Sbluhm END_TEST
3093bd8f1dc3Sbluhm #undef START_ELEMENT
3094bd8f1dc3Sbluhm #undef CDATA_TEXT
3095bd8f1dc3Sbluhm #undef END_ELEMENT
3096bd8f1dc3Sbluhm 
3097bd8f1dc3Sbluhm /* Test predefined entities are correctly recognised */
3098bd8f1dc3Sbluhm START_TEST(test_predefined_entities) {
3099bd8f1dc3Sbluhm   const char *text = "<doc>&lt;&gt;&amp;&quot;&apos;</doc>";
3100bd8f1dc3Sbluhm   const XML_Char *expected = XCS("<doc>&lt;&gt;&amp;&quot;&apos;</doc>");
3101bd8f1dc3Sbluhm   const XML_Char *result = XCS("<>&\"'");
3102bd8f1dc3Sbluhm   CharData storage;
3103bd8f1dc3Sbluhm 
3104bd8f1dc3Sbluhm   XML_SetDefaultHandler(g_parser, accumulate_characters);
3105bd8f1dc3Sbluhm   /* run_character_check uses XML_SetCharacterDataHandler(), which
3106bd8f1dc3Sbluhm    * unfortunately heads off a code path that we need to exercise.
3107bd8f1dc3Sbluhm    */
3108bd8f1dc3Sbluhm   CharData_Init(&storage);
3109bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
3110bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3111bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
3112bd8f1dc3Sbluhm     xml_failure(g_parser);
3113bd8f1dc3Sbluhm   /* The default handler doesn't translate the entities */
3114bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
3115bd8f1dc3Sbluhm 
3116bd8f1dc3Sbluhm   /* Now try again and check the translation */
3117bd8f1dc3Sbluhm   XML_ParserReset(g_parser, NULL);
3118bd8f1dc3Sbluhm   run_character_check(text, result);
3119bd8f1dc3Sbluhm }
3120bd8f1dc3Sbluhm END_TEST
3121bd8f1dc3Sbluhm 
3122bd8f1dc3Sbluhm /* Regression test that an invalid tag in an external parameter
3123bd8f1dc3Sbluhm  * reference in an external DTD is correctly faulted.
3124bd8f1dc3Sbluhm  *
3125bd8f1dc3Sbluhm  * Only a few specific tags are legal in DTDs ignoring comments and
3126bd8f1dc3Sbluhm  * processing instructions, all of which begin with an exclamation
3127bd8f1dc3Sbluhm  * mark.  "<el/>" is not one of them, so the parser should raise an
3128bd8f1dc3Sbluhm  * error on encountering it.
3129bd8f1dc3Sbluhm  */
3130bd8f1dc3Sbluhm START_TEST(test_invalid_tag_in_dtd) {
3131bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3132bd8f1dc3Sbluhm                      "<doc></doc>\n";
3133bd8f1dc3Sbluhm 
3134bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3135bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_param);
3136bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3137bd8f1dc3Sbluhm                  "Invalid tag IN DTD external param not rejected");
3138bd8f1dc3Sbluhm }
3139bd8f1dc3Sbluhm END_TEST
3140bd8f1dc3Sbluhm 
3141bd8f1dc3Sbluhm /* Test entities not quite the predefined ones are not mis-recognised */
3142bd8f1dc3Sbluhm START_TEST(test_not_predefined_entities) {
3143bd8f1dc3Sbluhm   const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>",
3144bd8f1dc3Sbluhm                         "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL};
3145bd8f1dc3Sbluhm   int i = 0;
3146bd8f1dc3Sbluhm 
3147bd8f1dc3Sbluhm   while (text[i] != NULL) {
3148bd8f1dc3Sbluhm     expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY,
3149bd8f1dc3Sbluhm                    "Undefined entity not rejected");
3150bd8f1dc3Sbluhm     XML_ParserReset(g_parser, NULL);
3151bd8f1dc3Sbluhm     i++;
3152bd8f1dc3Sbluhm   }
3153bd8f1dc3Sbluhm }
3154bd8f1dc3Sbluhm END_TEST
3155bd8f1dc3Sbluhm 
3156bd8f1dc3Sbluhm /* Test conditional inclusion (IGNORE) */
3157bd8f1dc3Sbluhm START_TEST(test_ignore_section) {
3158bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3159bd8f1dc3Sbluhm                      "<doc><e>&entity;</e></doc>";
3160bd8f1dc3Sbluhm   const XML_Char *expected
3161bd8f1dc3Sbluhm       = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;");
3162bd8f1dc3Sbluhm   CharData storage;
3163bd8f1dc3Sbluhm 
3164bd8f1dc3Sbluhm   CharData_Init(&storage);
3165bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3166bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
3167bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore);
3168bd8f1dc3Sbluhm   XML_SetDefaultHandler(g_parser, accumulate_characters);
3169bd8f1dc3Sbluhm   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3170bd8f1dc3Sbluhm   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3171bd8f1dc3Sbluhm   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3172bd8f1dc3Sbluhm   XML_SetStartElementHandler(g_parser, dummy_start_element);
3173bd8f1dc3Sbluhm   XML_SetEndElementHandler(g_parser, dummy_end_element);
3174bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3175bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
3176bd8f1dc3Sbluhm     xml_failure(g_parser);
3177bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
3178bd8f1dc3Sbluhm }
3179bd8f1dc3Sbluhm END_TEST
3180bd8f1dc3Sbluhm 
3181bd8f1dc3Sbluhm START_TEST(test_ignore_section_utf16) {
3182bd8f1dc3Sbluhm   const char text[] =
3183bd8f1dc3Sbluhm       /* <!DOCTYPE d SYSTEM 's'> */
3184bd8f1dc3Sbluhm       "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3185bd8f1dc3Sbluhm       "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0"
3186bd8f1dc3Sbluhm       /* <d><e>&en;</e></d> */
3187bd8f1dc3Sbluhm       "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0";
3188bd8f1dc3Sbluhm   const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3189bd8f1dc3Sbluhm   CharData storage;
3190bd8f1dc3Sbluhm 
3191bd8f1dc3Sbluhm   CharData_Init(&storage);
3192bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3193bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
3194bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16);
3195bd8f1dc3Sbluhm   XML_SetDefaultHandler(g_parser, accumulate_characters);
3196bd8f1dc3Sbluhm   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3197bd8f1dc3Sbluhm   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3198bd8f1dc3Sbluhm   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3199bd8f1dc3Sbluhm   XML_SetStartElementHandler(g_parser, dummy_start_element);
3200bd8f1dc3Sbluhm   XML_SetEndElementHandler(g_parser, dummy_end_element);
3201bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3202bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
3203bd8f1dc3Sbluhm     xml_failure(g_parser);
3204bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
3205bd8f1dc3Sbluhm }
3206bd8f1dc3Sbluhm END_TEST
3207bd8f1dc3Sbluhm 
3208bd8f1dc3Sbluhm START_TEST(test_ignore_section_utf16_be) {
3209bd8f1dc3Sbluhm   const char text[] =
3210bd8f1dc3Sbluhm       /* <!DOCTYPE d SYSTEM 's'> */
3211bd8f1dc3Sbluhm       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3212bd8f1dc3Sbluhm       "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n"
3213bd8f1dc3Sbluhm       /* <d><e>&en;</e></d> */
3214bd8f1dc3Sbluhm       "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>";
3215bd8f1dc3Sbluhm   const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3216bd8f1dc3Sbluhm   CharData storage;
3217bd8f1dc3Sbluhm 
3218bd8f1dc3Sbluhm   CharData_Init(&storage);
3219bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3220bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
3221bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser,
3222bd8f1dc3Sbluhm                                   external_entity_load_ignore_utf16_be);
3223bd8f1dc3Sbluhm   XML_SetDefaultHandler(g_parser, accumulate_characters);
3224bd8f1dc3Sbluhm   XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3225bd8f1dc3Sbluhm   XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3226bd8f1dc3Sbluhm   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3227bd8f1dc3Sbluhm   XML_SetStartElementHandler(g_parser, dummy_start_element);
3228bd8f1dc3Sbluhm   XML_SetEndElementHandler(g_parser, dummy_end_element);
3229bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3230bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
3231bd8f1dc3Sbluhm     xml_failure(g_parser);
3232bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
3233bd8f1dc3Sbluhm }
3234bd8f1dc3Sbluhm END_TEST
3235bd8f1dc3Sbluhm 
3236bd8f1dc3Sbluhm /* Test mis-formatted conditional exclusion */
3237bd8f1dc3Sbluhm START_TEST(test_bad_ignore_section) {
3238bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3239bd8f1dc3Sbluhm                      "<doc><e>&entity;</e></doc>";
3240bd8f1dc3Sbluhm   ExtFaults faults[]
3241bd8f1dc3Sbluhm       = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL,
3242bd8f1dc3Sbluhm           XML_ERROR_SYNTAX},
3243bd8f1dc3Sbluhm          {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL,
3244bd8f1dc3Sbluhm           XML_ERROR_INVALID_TOKEN},
3245bd8f1dc3Sbluhm          {/* FIrst two bytes of a three-byte char */
3246bd8f1dc3Sbluhm           "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL,
3247bd8f1dc3Sbluhm           XML_ERROR_PARTIAL_CHAR},
3248bd8f1dc3Sbluhm          {NULL, NULL, NULL, XML_ERROR_NONE}};
3249bd8f1dc3Sbluhm   ExtFaults *fault;
3250bd8f1dc3Sbluhm 
3251bd8f1dc3Sbluhm   for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
3252bd8f1dc3Sbluhm     set_subtest("%s", fault->parse_text);
3253bd8f1dc3Sbluhm     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3254bd8f1dc3Sbluhm     XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3255bd8f1dc3Sbluhm     XML_SetUserData(g_parser, fault);
3256bd8f1dc3Sbluhm     expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3257bd8f1dc3Sbluhm                    "Incomplete IGNORE section not failed");
3258bd8f1dc3Sbluhm     XML_ParserReset(g_parser, NULL);
3259bd8f1dc3Sbluhm   }
3260bd8f1dc3Sbluhm }
3261bd8f1dc3Sbluhm END_TEST
3262bd8f1dc3Sbluhm 
3263bd8f1dc3Sbluhm struct bom_testdata {
3264bd8f1dc3Sbluhm   const char *external;
3265bd8f1dc3Sbluhm   int split;
3266bd8f1dc3Sbluhm   XML_Bool nested_callback_happened;
3267bd8f1dc3Sbluhm };
3268bd8f1dc3Sbluhm 
3269bd8f1dc3Sbluhm static int XMLCALL
3270bd8f1dc3Sbluhm external_bom_checker(XML_Parser parser, const XML_Char *context,
3271bd8f1dc3Sbluhm                      const XML_Char *base, const XML_Char *systemId,
3272bd8f1dc3Sbluhm                      const XML_Char *publicId) {
3273bd8f1dc3Sbluhm   const char *text;
3274bd8f1dc3Sbluhm   UNUSED_P(base);
3275bd8f1dc3Sbluhm   UNUSED_P(systemId);
3276bd8f1dc3Sbluhm   UNUSED_P(publicId);
3277bd8f1dc3Sbluhm 
3278bd8f1dc3Sbluhm   XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL);
3279bd8f1dc3Sbluhm   if (ext_parser == NULL)
3280bd8f1dc3Sbluhm     fail("Could not create external entity parser");
3281bd8f1dc3Sbluhm 
3282bd8f1dc3Sbluhm   if (! xcstrcmp(systemId, XCS("004-2.ent"))) {
3283bd8f1dc3Sbluhm     struct bom_testdata *const testdata
3284bd8f1dc3Sbluhm         = (struct bom_testdata *)XML_GetUserData(parser);
3285bd8f1dc3Sbluhm     const char *const external = testdata->external;
3286bd8f1dc3Sbluhm     const int split = testdata->split;
3287bd8f1dc3Sbluhm     testdata->nested_callback_happened = XML_TRUE;
3288bd8f1dc3Sbluhm 
3289bd8f1dc3Sbluhm     if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE)
3290bd8f1dc3Sbluhm         != XML_STATUS_OK) {
3291bd8f1dc3Sbluhm       xml_failure(ext_parser);
3292bd8f1dc3Sbluhm     }
3293bd8f1dc3Sbluhm     text = external + split; // the parse below will continue where we left off.
3294bd8f1dc3Sbluhm   } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) {
3295bd8f1dc3Sbluhm     text = "<!ELEMENT doc EMPTY>\n"
3296bd8f1dc3Sbluhm            "<!ENTITY % e1 SYSTEM '004-2.ent'>\n"
3297bd8f1dc3Sbluhm            "<!ENTITY % e2 '%e1;'>\n";
3298bd8f1dc3Sbluhm   } else {
3299bd8f1dc3Sbluhm     fail("unknown systemId");
3300bd8f1dc3Sbluhm   }
3301bd8f1dc3Sbluhm 
3302bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE)
3303bd8f1dc3Sbluhm       != XML_STATUS_OK)
3304bd8f1dc3Sbluhm     xml_failure(ext_parser);
3305bd8f1dc3Sbluhm 
3306bd8f1dc3Sbluhm   XML_ParserFree(ext_parser);
3307bd8f1dc3Sbluhm   return XML_STATUS_OK;
3308bd8f1dc3Sbluhm }
3309bd8f1dc3Sbluhm 
3310bd8f1dc3Sbluhm /* regression test: BOM should be consumed when followed by a partial token. */
3311bd8f1dc3Sbluhm START_TEST(test_external_bom_consumed) {
3312bd8f1dc3Sbluhm   const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3313bd8f1dc3Sbluhm                            "<doc></doc>\n";
3314bd8f1dc3Sbluhm   const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>";
3315bd8f1dc3Sbluhm   const int len = (int)strlen(external);
3316bd8f1dc3Sbluhm   for (int split = 0; split <= len; ++split) {
3317bd8f1dc3Sbluhm     set_subtest("split at byte %d", split);
3318bd8f1dc3Sbluhm 
3319bd8f1dc3Sbluhm     struct bom_testdata testdata;
3320bd8f1dc3Sbluhm     testdata.external = external;
3321bd8f1dc3Sbluhm     testdata.split = split;
3322bd8f1dc3Sbluhm     testdata.nested_callback_happened = XML_FALSE;
3323bd8f1dc3Sbluhm 
3324bd8f1dc3Sbluhm     XML_Parser parser = XML_ParserCreate(NULL);
3325bd8f1dc3Sbluhm     if (parser == NULL) {
3326bd8f1dc3Sbluhm       fail("Couldn't create parser");
3327bd8f1dc3Sbluhm     }
3328bd8f1dc3Sbluhm     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3329bd8f1dc3Sbluhm     XML_SetExternalEntityRefHandler(parser, external_bom_checker);
3330bd8f1dc3Sbluhm     XML_SetUserData(parser, &testdata);
3331bd8f1dc3Sbluhm     if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
3332bd8f1dc3Sbluhm         == XML_STATUS_ERROR)
3333bd8f1dc3Sbluhm       xml_failure(parser);
3334bd8f1dc3Sbluhm     if (! testdata.nested_callback_happened) {
3335bd8f1dc3Sbluhm       fail("ref handler not called");
3336bd8f1dc3Sbluhm     }
3337bd8f1dc3Sbluhm     XML_ParserFree(parser);
3338bd8f1dc3Sbluhm   }
3339bd8f1dc3Sbluhm }
3340bd8f1dc3Sbluhm END_TEST
3341bd8f1dc3Sbluhm 
3342bd8f1dc3Sbluhm /* Test recursive parsing */
3343bd8f1dc3Sbluhm START_TEST(test_external_entity_values) {
3344bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3345bd8f1dc3Sbluhm                      "<doc></doc>\n";
3346bd8f1dc3Sbluhm   ExtFaults data_004_2[] = {
3347bd8f1dc3Sbluhm       {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE},
3348bd8f1dc3Sbluhm       {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL,
3349bd8f1dc3Sbluhm        XML_ERROR_INVALID_TOKEN},
3350bd8f1dc3Sbluhm       {"'wombat", "Unterminated string not faulted", NULL,
3351bd8f1dc3Sbluhm        XML_ERROR_UNCLOSED_TOKEN},
3352bd8f1dc3Sbluhm       {"\xe2\x82", "Partial UTF-8 character not faulted", NULL,
3353bd8f1dc3Sbluhm        XML_ERROR_PARTIAL_CHAR},
3354bd8f1dc3Sbluhm       {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE},
3355bd8f1dc3Sbluhm       {"<?xml?>", "Malformed XML declaration not faulted", NULL,
3356bd8f1dc3Sbluhm        XML_ERROR_XML_DECL},
3357bd8f1dc3Sbluhm       {/* UTF-8 BOM */
3358bd8f1dc3Sbluhm        "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL,
3359bd8f1dc3Sbluhm        XML_ERROR_NONE},
3360bd8f1dc3Sbluhm       {"<?xml version='1.0' encoding='utf-8'?>\n$",
3361bd8f1dc3Sbluhm        "Invalid token after text declaration not faulted", NULL,
3362bd8f1dc3Sbluhm        XML_ERROR_INVALID_TOKEN},
3363bd8f1dc3Sbluhm       {"<?xml version='1.0' encoding='utf-8'?>\n'wombat",
3364bd8f1dc3Sbluhm        "Unterminated string after text decl not faulted", NULL,
3365bd8f1dc3Sbluhm        XML_ERROR_UNCLOSED_TOKEN},
3366bd8f1dc3Sbluhm       {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82",
3367bd8f1dc3Sbluhm        "Partial UTF-8 character after text decl not faulted", NULL,
3368bd8f1dc3Sbluhm        XML_ERROR_PARTIAL_CHAR},
3369bd8f1dc3Sbluhm       {"%e1;", "Recursive parameter entity not faulted", NULL,
3370bd8f1dc3Sbluhm        XML_ERROR_RECURSIVE_ENTITY_REF},
3371bd8f1dc3Sbluhm       {NULL, NULL, NULL, XML_ERROR_NONE}};
3372bd8f1dc3Sbluhm   int i;
3373bd8f1dc3Sbluhm 
3374bd8f1dc3Sbluhm   for (i = 0; data_004_2[i].parse_text != NULL; i++) {
3375bd8f1dc3Sbluhm     set_subtest("%s", data_004_2[i].parse_text);
3376bd8f1dc3Sbluhm     XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3377bd8f1dc3Sbluhm     XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer);
3378bd8f1dc3Sbluhm     XML_SetUserData(g_parser, &data_004_2[i]);
3379bd8f1dc3Sbluhm     if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3380bd8f1dc3Sbluhm         == XML_STATUS_ERROR)
3381bd8f1dc3Sbluhm       xml_failure(g_parser);
3382bd8f1dc3Sbluhm     XML_ParserReset(g_parser, NULL);
3383bd8f1dc3Sbluhm   }
3384bd8f1dc3Sbluhm }
3385bd8f1dc3Sbluhm END_TEST
3386bd8f1dc3Sbluhm 
3387bd8f1dc3Sbluhm /* Test the recursive parse interacts with a not standalone handler */
3388bd8f1dc3Sbluhm START_TEST(test_ext_entity_not_standalone) {
3389bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3390bd8f1dc3Sbluhm                      "<doc></doc>";
3391bd8f1dc3Sbluhm 
3392bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3393bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone);
3394bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3395bd8f1dc3Sbluhm                  "Standalone rejection not caught");
3396bd8f1dc3Sbluhm }
3397bd8f1dc3Sbluhm END_TEST
3398bd8f1dc3Sbluhm 
3399bd8f1dc3Sbluhm START_TEST(test_ext_entity_value_abort) {
3400bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3401bd8f1dc3Sbluhm                      "<doc></doc>\n";
3402bd8f1dc3Sbluhm 
3403bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3404bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter);
3405bd8f1dc3Sbluhm   g_resumable = XML_FALSE;
3406bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3407bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
3408bd8f1dc3Sbluhm     xml_failure(g_parser);
3409bd8f1dc3Sbluhm }
3410bd8f1dc3Sbluhm END_TEST
3411bd8f1dc3Sbluhm 
3412bd8f1dc3Sbluhm START_TEST(test_bad_public_doctype) {
3413bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
3414bd8f1dc3Sbluhm                      "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n"
3415bd8f1dc3Sbluhm                      "<doc></doc>";
3416bd8f1dc3Sbluhm 
3417bd8f1dc3Sbluhm   /* Setting a handler provokes a particular code path */
3418bd8f1dc3Sbluhm   XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler,
3419bd8f1dc3Sbluhm                             dummy_end_doctype_handler);
3420bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed");
3421bd8f1dc3Sbluhm }
3422bd8f1dc3Sbluhm END_TEST
3423bd8f1dc3Sbluhm 
3424bd8f1dc3Sbluhm /* Test based on ibm/valid/P32/ibm32v04.xml */
3425bd8f1dc3Sbluhm START_TEST(test_attribute_enum_value) {
3426bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' standalone='no'?>\n"
3427bd8f1dc3Sbluhm                      "<!DOCTYPE animal SYSTEM 'test.dtd'>\n"
3428bd8f1dc3Sbluhm                      "<animal>This is a \n    <a/>  \n\nyellow tiger</animal>";
3429bd8f1dc3Sbluhm   ExtTest dtd_data
3430bd8f1dc3Sbluhm       = {"<!ELEMENT animal (#PCDATA|a)*>\n"
3431bd8f1dc3Sbluhm          "<!ELEMENT a EMPTY>\n"
3432bd8f1dc3Sbluhm          "<!ATTLIST animal xml:space (default|preserve) 'preserve'>",
3433bd8f1dc3Sbluhm          NULL, NULL};
3434bd8f1dc3Sbluhm   const XML_Char *expected = XCS("This is a \n      \n\nyellow tiger");
3435bd8f1dc3Sbluhm 
3436bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3437bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &dtd_data);
3438bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3439bd8f1dc3Sbluhm   /* An attribute list handler provokes a different code path */
3440bd8f1dc3Sbluhm   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
3441bd8f1dc3Sbluhm   run_ext_character_check(text, &dtd_data, expected);
3442bd8f1dc3Sbluhm }
3443bd8f1dc3Sbluhm END_TEST
3444bd8f1dc3Sbluhm 
3445bd8f1dc3Sbluhm /* Slightly bizarrely, the library seems to silently ignore entity
3446bd8f1dc3Sbluhm  * definitions for predefined entities, even when they are wrong.  The
3447bd8f1dc3Sbluhm  * language of the XML 1.0 spec is somewhat unhelpful as to what ought
3448bd8f1dc3Sbluhm  * to happen, so this is currently treated as acceptable.
3449bd8f1dc3Sbluhm  */
3450bd8f1dc3Sbluhm START_TEST(test_predefined_entity_redefinition) {
3451bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
3452bd8f1dc3Sbluhm                      "<!ENTITY apos 'foo'>\n"
3453bd8f1dc3Sbluhm                      "]>\n"
3454bd8f1dc3Sbluhm                      "<doc>&apos;</doc>";
3455bd8f1dc3Sbluhm   run_character_check(text, XCS("'"));
3456bd8f1dc3Sbluhm }
3457bd8f1dc3Sbluhm END_TEST
3458bd8f1dc3Sbluhm 
3459bd8f1dc3Sbluhm /* Test that the parser stops processing the DTD after an unresolved
3460bd8f1dc3Sbluhm  * parameter entity is encountered.
3461bd8f1dc3Sbluhm  */
3462bd8f1dc3Sbluhm START_TEST(test_dtd_stop_processing) {
3463bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
3464bd8f1dc3Sbluhm                      "%foo;\n"
3465bd8f1dc3Sbluhm                      "<!ENTITY bar 'bas'>\n"
3466bd8f1dc3Sbluhm                      "]><doc/>";
3467bd8f1dc3Sbluhm 
3468bd8f1dc3Sbluhm   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
3469bd8f1dc3Sbluhm   init_dummy_handlers();
3470bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3471bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
3472bd8f1dc3Sbluhm     xml_failure(g_parser);
3473bd8f1dc3Sbluhm   if (get_dummy_handler_flags() != 0)
3474bd8f1dc3Sbluhm     fail("DTD processing still going after undefined PE");
3475bd8f1dc3Sbluhm }
3476bd8f1dc3Sbluhm END_TEST
3477bd8f1dc3Sbluhm 
3478bd8f1dc3Sbluhm /* Test public notations with no system ID */
3479bd8f1dc3Sbluhm START_TEST(test_public_notation_no_sysid) {
3480bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
3481bd8f1dc3Sbluhm                      "<!NOTATION note PUBLIC 'foo'>\n"
3482bd8f1dc3Sbluhm                      "<!ELEMENT doc EMPTY>\n"
3483bd8f1dc3Sbluhm                      "]>\n<doc/>";
3484bd8f1dc3Sbluhm 
3485bd8f1dc3Sbluhm   init_dummy_handlers();
3486bd8f1dc3Sbluhm   XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
3487bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3488bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
3489bd8f1dc3Sbluhm     xml_failure(g_parser);
3490bd8f1dc3Sbluhm   if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG)
3491bd8f1dc3Sbluhm     fail("Notation declaration handler not called");
3492bd8f1dc3Sbluhm }
3493bd8f1dc3Sbluhm END_TEST
3494bd8f1dc3Sbluhm 
3495bd8f1dc3Sbluhm START_TEST(test_nested_groups) {
3496bd8f1dc3Sbluhm   const char *text
3497bd8f1dc3Sbluhm       = "<!DOCTYPE doc [\n"
3498bd8f1dc3Sbluhm         "<!ELEMENT doc "
3499bd8f1dc3Sbluhm         /* Sixteen elements per line */
3500bd8f1dc3Sbluhm         "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,"
3501bd8f1dc3Sbluhm         "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?"
3502bd8f1dc3Sbluhm         "))))))))))))))))))))))))))))))))>\n"
3503bd8f1dc3Sbluhm         "<!ELEMENT e EMPTY>"
3504bd8f1dc3Sbluhm         "]>\n"
3505bd8f1dc3Sbluhm         "<doc><e/></doc>";
3506bd8f1dc3Sbluhm   CharData storage;
3507bd8f1dc3Sbluhm 
3508bd8f1dc3Sbluhm   CharData_Init(&storage);
3509bd8f1dc3Sbluhm   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3510bd8f1dc3Sbluhm   XML_SetStartElementHandler(g_parser, record_element_start_handler);
3511bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
3512bd8f1dc3Sbluhm   init_dummy_handlers();
3513bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3514bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
3515bd8f1dc3Sbluhm     xml_failure(g_parser);
3516bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, XCS("doce"));
3517bd8f1dc3Sbluhm   if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3518bd8f1dc3Sbluhm     fail("Element handler not fired");
3519bd8f1dc3Sbluhm }
3520bd8f1dc3Sbluhm END_TEST
3521bd8f1dc3Sbluhm 
3522bd8f1dc3Sbluhm START_TEST(test_group_choice) {
3523bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
3524bd8f1dc3Sbluhm                      "<!ELEMENT doc (a|b|c)+>\n"
3525bd8f1dc3Sbluhm                      "<!ELEMENT a EMPTY>\n"
3526bd8f1dc3Sbluhm                      "<!ELEMENT b (#PCDATA)>\n"
3527bd8f1dc3Sbluhm                      "<!ELEMENT c ANY>\n"
3528bd8f1dc3Sbluhm                      "]>\n"
3529bd8f1dc3Sbluhm                      "<doc>\n"
3530bd8f1dc3Sbluhm                      "<a/>\n"
3531bd8f1dc3Sbluhm                      "<b attr='foo'>This is a foo</b>\n"
3532bd8f1dc3Sbluhm                      "<c></c>\n"
3533bd8f1dc3Sbluhm                      "</doc>\n";
3534bd8f1dc3Sbluhm 
3535bd8f1dc3Sbluhm   XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3536bd8f1dc3Sbluhm   init_dummy_handlers();
3537bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3538bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
3539bd8f1dc3Sbluhm     xml_failure(g_parser);
3540bd8f1dc3Sbluhm   if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3541bd8f1dc3Sbluhm     fail("Element handler flag not raised");
3542bd8f1dc3Sbluhm }
3543bd8f1dc3Sbluhm END_TEST
3544bd8f1dc3Sbluhm 
3545bd8f1dc3Sbluhm START_TEST(test_standalone_parameter_entity) {
3546bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' standalone='yes'?>\n"
3547bd8f1dc3Sbluhm                      "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n"
3548bd8f1dc3Sbluhm                      "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n"
3549bd8f1dc3Sbluhm                      "%entity;\n"
3550bd8f1dc3Sbluhm                      "]>\n"
3551bd8f1dc3Sbluhm                      "<doc></doc>";
3552bd8f1dc3Sbluhm   char dtd_data[] = "<!ENTITY % e1 'foo'>\n";
3553bd8f1dc3Sbluhm 
3554bd8f1dc3Sbluhm   XML_SetUserData(g_parser, dtd_data);
3555bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3556bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_public);
3557bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3558bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
3559bd8f1dc3Sbluhm     xml_failure(g_parser);
3560bd8f1dc3Sbluhm }
3561bd8f1dc3Sbluhm END_TEST
3562bd8f1dc3Sbluhm 
3563bd8f1dc3Sbluhm /* Test skipping of parameter entity in an external DTD */
3564bd8f1dc3Sbluhm /* Derived from ibm/invalid/P69/ibm69i01.xml */
3565bd8f1dc3Sbluhm START_TEST(test_skipped_parameter_entity) {
3566bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0'?>\n"
3567bd8f1dc3Sbluhm                      "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3568bd8f1dc3Sbluhm                      "<!ELEMENT root (#PCDATA|a)* >\n"
3569bd8f1dc3Sbluhm                      "]>\n"
3570bd8f1dc3Sbluhm                      "<root></root>";
3571bd8f1dc3Sbluhm   ExtTest dtd_data = {"%pe2;", NULL, NULL};
3572bd8f1dc3Sbluhm 
3573bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3574bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &dtd_data);
3575bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3576bd8f1dc3Sbluhm   XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler);
3577bd8f1dc3Sbluhm   init_dummy_handlers();
3578bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3579bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
3580bd8f1dc3Sbluhm     xml_failure(g_parser);
3581bd8f1dc3Sbluhm   if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG)
3582bd8f1dc3Sbluhm     fail("Skip handler not executed");
3583bd8f1dc3Sbluhm }
3584bd8f1dc3Sbluhm END_TEST
3585bd8f1dc3Sbluhm 
3586bd8f1dc3Sbluhm /* Test recursive parameter entity definition rejected in external DTD */
3587bd8f1dc3Sbluhm START_TEST(test_recursive_external_parameter_entity) {
3588bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0'?>\n"
3589bd8f1dc3Sbluhm                      "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3590bd8f1dc3Sbluhm                      "<!ELEMENT root (#PCDATA|a)* >\n"
3591bd8f1dc3Sbluhm                      "]>\n"
3592bd8f1dc3Sbluhm                      "<root></root>";
3593bd8f1dc3Sbluhm   ExtFaults dtd_data = {"<!ENTITY % pe2 '&#37;pe2;'>\n%pe2;",
3594bd8f1dc3Sbluhm                         "Recursive external parameter entity not faulted", NULL,
3595bd8f1dc3Sbluhm                         XML_ERROR_RECURSIVE_ENTITY_REF};
3596bd8f1dc3Sbluhm 
3597bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3598bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &dtd_data);
3599bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3600bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3601bd8f1dc3Sbluhm                  "Recursive external parameter not spotted");
3602bd8f1dc3Sbluhm }
3603bd8f1dc3Sbluhm END_TEST
3604bd8f1dc3Sbluhm 
3605bd8f1dc3Sbluhm /* Test undefined parameter entity in external entity handler */
3606bd8f1dc3Sbluhm START_TEST(test_undefined_ext_entity_in_external_dtd) {
3607bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3608bd8f1dc3Sbluhm                      "<doc></doc>\n";
3609bd8f1dc3Sbluhm 
3610bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3611bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3612bd8f1dc3Sbluhm   XML_SetUserData(g_parser, NULL);
3613bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3614bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
3615bd8f1dc3Sbluhm     xml_failure(g_parser);
3616bd8f1dc3Sbluhm 
3617bd8f1dc3Sbluhm   /* Now repeat without the external entity ref handler invoking
3618bd8f1dc3Sbluhm    * another copy of itself.
3619bd8f1dc3Sbluhm    */
3620bd8f1dc3Sbluhm   XML_ParserReset(g_parser, NULL);
3621bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3622bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3623bd8f1dc3Sbluhm   XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */
3624bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3625bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
3626bd8f1dc3Sbluhm     xml_failure(g_parser);
3627bd8f1dc3Sbluhm }
3628bd8f1dc3Sbluhm END_TEST
3629bd8f1dc3Sbluhm 
3630bd8f1dc3Sbluhm /* Test suspending the parse on receiving an XML declaration works */
3631bd8f1dc3Sbluhm START_TEST(test_suspend_xdecl) {
3632bd8f1dc3Sbluhm   const char *text = long_character_data_text;
3633bd8f1dc3Sbluhm 
3634bd8f1dc3Sbluhm   XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler);
3635bd8f1dc3Sbluhm   XML_SetUserData(g_parser, g_parser);
3636bd8f1dc3Sbluhm   g_resumable = XML_TRUE;
3637bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3638bd8f1dc3Sbluhm       != XML_STATUS_SUSPENDED)
3639bd8f1dc3Sbluhm     xml_failure(g_parser);
3640bd8f1dc3Sbluhm   if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
3641bd8f1dc3Sbluhm     xml_failure(g_parser);
3642bd8f1dc3Sbluhm   /* Attempt to start a new parse while suspended */
3643bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3644bd8f1dc3Sbluhm       != XML_STATUS_ERROR)
3645bd8f1dc3Sbluhm     fail("Attempt to parse while suspended not faulted");
3646bd8f1dc3Sbluhm   if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
3647bd8f1dc3Sbluhm     fail("Suspended parse not faulted with correct error");
3648bd8f1dc3Sbluhm }
3649bd8f1dc3Sbluhm END_TEST
3650bd8f1dc3Sbluhm 
3651bd8f1dc3Sbluhm /* Test aborting the parse in an epilog works */
3652bd8f1dc3Sbluhm START_TEST(test_abort_epilog) {
3653bd8f1dc3Sbluhm   const char *text = "<doc></doc>\n\r\n";
3654bd8f1dc3Sbluhm   XML_Char trigger_char = XCS('\r');
3655bd8f1dc3Sbluhm 
3656bd8f1dc3Sbluhm   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3657bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &trigger_char);
3658bd8f1dc3Sbluhm   g_resumable = XML_FALSE;
3659bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3660bd8f1dc3Sbluhm       != XML_STATUS_ERROR)
3661bd8f1dc3Sbluhm     fail("Abort not triggered");
3662bd8f1dc3Sbluhm   if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
3663bd8f1dc3Sbluhm     xml_failure(g_parser);
3664bd8f1dc3Sbluhm }
3665bd8f1dc3Sbluhm END_TEST
3666bd8f1dc3Sbluhm 
3667bd8f1dc3Sbluhm /* Test a different code path for abort in the epilog */
3668bd8f1dc3Sbluhm START_TEST(test_abort_epilog_2) {
3669bd8f1dc3Sbluhm   const char *text = "<doc></doc>\n";
3670bd8f1dc3Sbluhm   XML_Char trigger_char = XCS('\n');
3671bd8f1dc3Sbluhm 
3672bd8f1dc3Sbluhm   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3673bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &trigger_char);
3674bd8f1dc3Sbluhm   g_resumable = XML_FALSE;
3675bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered");
3676bd8f1dc3Sbluhm }
3677bd8f1dc3Sbluhm END_TEST
3678bd8f1dc3Sbluhm 
3679bd8f1dc3Sbluhm /* Test suspension from the epilog */
3680bd8f1dc3Sbluhm START_TEST(test_suspend_epilog) {
3681bd8f1dc3Sbluhm   const char *text = "<doc></doc>\n";
3682bd8f1dc3Sbluhm   XML_Char trigger_char = XCS('\n');
3683bd8f1dc3Sbluhm 
3684bd8f1dc3Sbluhm   XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3685bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &trigger_char);
3686bd8f1dc3Sbluhm   g_resumable = XML_TRUE;
3687bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3688bd8f1dc3Sbluhm       != XML_STATUS_SUSPENDED)
3689bd8f1dc3Sbluhm     xml_failure(g_parser);
3690bd8f1dc3Sbluhm }
3691bd8f1dc3Sbluhm END_TEST
3692bd8f1dc3Sbluhm 
3693bd8f1dc3Sbluhm START_TEST(test_suspend_in_sole_empty_tag) {
3694bd8f1dc3Sbluhm   const char *text = "<doc/>";
3695bd8f1dc3Sbluhm   enum XML_Status rc;
3696bd8f1dc3Sbluhm 
3697bd8f1dc3Sbluhm   XML_SetEndElementHandler(g_parser, suspending_end_handler);
3698bd8f1dc3Sbluhm   XML_SetUserData(g_parser, g_parser);
3699bd8f1dc3Sbluhm   rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
3700bd8f1dc3Sbluhm   if (rc == XML_STATUS_ERROR)
3701bd8f1dc3Sbluhm     xml_failure(g_parser);
3702bd8f1dc3Sbluhm   else if (rc != XML_STATUS_SUSPENDED)
3703bd8f1dc3Sbluhm     fail("Suspend not triggered");
3704bd8f1dc3Sbluhm   rc = XML_ResumeParser(g_parser);
3705bd8f1dc3Sbluhm   if (rc == XML_STATUS_ERROR)
3706bd8f1dc3Sbluhm     xml_failure(g_parser);
3707bd8f1dc3Sbluhm   else if (rc != XML_STATUS_OK)
3708bd8f1dc3Sbluhm     fail("Resume failed");
3709bd8f1dc3Sbluhm }
3710bd8f1dc3Sbluhm END_TEST
3711bd8f1dc3Sbluhm 
3712bd8f1dc3Sbluhm START_TEST(test_unfinished_epilog) {
3713bd8f1dc3Sbluhm   const char *text = "<doc></doc><";
3714bd8f1dc3Sbluhm 
3715bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_UNCLOSED_TOKEN,
3716bd8f1dc3Sbluhm                  "Incomplete epilog entry not faulted");
3717bd8f1dc3Sbluhm }
3718bd8f1dc3Sbluhm END_TEST
3719bd8f1dc3Sbluhm 
3720bd8f1dc3Sbluhm START_TEST(test_partial_char_in_epilog) {
3721bd8f1dc3Sbluhm   const char *text = "<doc></doc>\xe2\x82";
3722bd8f1dc3Sbluhm 
3723bd8f1dc3Sbluhm   /* First check that no fault is raised if the parse is not finished */
3724bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
3725bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
3726bd8f1dc3Sbluhm     xml_failure(g_parser);
3727bd8f1dc3Sbluhm   /* Now check that it is faulted once we finish */
3728bd8f1dc3Sbluhm   if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR)
3729bd8f1dc3Sbluhm     fail("Partial character in epilog not faulted");
3730bd8f1dc3Sbluhm   if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR)
3731bd8f1dc3Sbluhm     xml_failure(g_parser);
3732bd8f1dc3Sbluhm }
3733bd8f1dc3Sbluhm END_TEST
3734bd8f1dc3Sbluhm 
3735bd8f1dc3Sbluhm /* Test resuming a parse suspended in entity substitution */
3736bd8f1dc3Sbluhm START_TEST(test_suspend_resume_internal_entity) {
3737bd8f1dc3Sbluhm   const char *text
3738bd8f1dc3Sbluhm       = "<!DOCTYPE doc [\n"
3739bd8f1dc3Sbluhm         "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n"
3740bd8f1dc3Sbluhm         "]>\n"
3741bd8f1dc3Sbluhm         "<doc>&foo;</doc>\n";
3742bd8f1dc3Sbluhm   const XML_Char *expected1 = XCS("Hi");
3743bd8f1dc3Sbluhm   const XML_Char *expected2 = XCS("HiHo");
3744bd8f1dc3Sbluhm   CharData storage;
3745bd8f1dc3Sbluhm 
3746bd8f1dc3Sbluhm   CharData_Init(&storage);
3747bd8f1dc3Sbluhm   XML_SetStartElementHandler(g_parser, start_element_suspender);
3748bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3749bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
3750bd8f1dc3Sbluhm   // can't use SINGLE_BYTES here, because it'll return early on suspension, and
3751bd8f1dc3Sbluhm   // we won't know exactly how much input we actually managed to give Expat.
3752bd8f1dc3Sbluhm   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3753bd8f1dc3Sbluhm       != XML_STATUS_SUSPENDED)
3754bd8f1dc3Sbluhm     xml_failure(g_parser);
3755bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, XCS(""));
3756bd8f1dc3Sbluhm   if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
3757bd8f1dc3Sbluhm     xml_failure(g_parser);
3758bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected1);
3759bd8f1dc3Sbluhm   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3760bd8f1dc3Sbluhm     xml_failure(g_parser);
3761bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected2);
3762bd8f1dc3Sbluhm }
3763bd8f1dc3Sbluhm END_TEST
3764bd8f1dc3Sbluhm 
3765bd8f1dc3Sbluhm START_TEST(test_suspend_resume_internal_entity_issue_629) {
3766bd8f1dc3Sbluhm   const char *const text
3767bd8f1dc3Sbluhm       = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n"
3768bd8f1dc3Sbluhm         "<"
3769bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3770bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3771bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3772bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3773bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3774bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3775bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3776bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3777bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3778bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3779bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3780bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3781bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3782bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3783bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3784bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3785bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3786bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3787bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3788bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3789bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3790bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3791bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3792bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3793bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3794bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3795bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3796bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3797bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3798bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3799bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3800bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3801bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3802bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3803bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3804bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3805bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3806bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3807bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3808bd8f1dc3Sbluhm         "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3809bd8f1dc3Sbluhm         "/>"
3810bd8f1dc3Sbluhm         "</b></a>";
3811bd8f1dc3Sbluhm   const size_t firstChunkSizeBytes = 54;
3812bd8f1dc3Sbluhm 
3813bd8f1dc3Sbluhm   XML_Parser parser = XML_ParserCreate(NULL);
3814bd8f1dc3Sbluhm   XML_SetUserData(parser, parser);
3815bd8f1dc3Sbluhm   XML_SetCommentHandler(parser, suspending_comment_handler);
3816bd8f1dc3Sbluhm 
3817bd8f1dc3Sbluhm   if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE)
3818bd8f1dc3Sbluhm       != XML_STATUS_SUSPENDED)
3819bd8f1dc3Sbluhm     xml_failure(parser);
3820bd8f1dc3Sbluhm   if (XML_ResumeParser(parser) != XML_STATUS_OK)
3821bd8f1dc3Sbluhm     xml_failure(parser);
3822bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes,
3823bd8f1dc3Sbluhm                               (int)(strlen(text) - firstChunkSizeBytes),
3824bd8f1dc3Sbluhm                               XML_TRUE)
3825bd8f1dc3Sbluhm       != XML_STATUS_OK)
3826bd8f1dc3Sbluhm     xml_failure(parser);
3827bd8f1dc3Sbluhm   XML_ParserFree(parser);
3828bd8f1dc3Sbluhm }
3829bd8f1dc3Sbluhm END_TEST
3830bd8f1dc3Sbluhm 
3831bd8f1dc3Sbluhm /* Test syntax error is caught at parse resumption */
3832bd8f1dc3Sbluhm START_TEST(test_resume_entity_with_syntax_error) {
3833bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
3834bd8f1dc3Sbluhm                      "<!ENTITY foo '<suspend>Hi</wombat>'>\n"
3835bd8f1dc3Sbluhm                      "]>\n"
3836bd8f1dc3Sbluhm                      "<doc>&foo;</doc>\n";
3837bd8f1dc3Sbluhm 
3838bd8f1dc3Sbluhm   XML_SetStartElementHandler(g_parser, start_element_suspender);
3839bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3840bd8f1dc3Sbluhm       != XML_STATUS_SUSPENDED)
3841bd8f1dc3Sbluhm     xml_failure(g_parser);
3842bd8f1dc3Sbluhm   if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR)
3843bd8f1dc3Sbluhm     fail("Syntax error in entity not faulted");
3844bd8f1dc3Sbluhm   if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH)
3845bd8f1dc3Sbluhm     xml_failure(g_parser);
3846bd8f1dc3Sbluhm }
3847bd8f1dc3Sbluhm END_TEST
3848bd8f1dc3Sbluhm 
3849bd8f1dc3Sbluhm /* Test suspending and resuming in a parameter entity substitution */
3850bd8f1dc3Sbluhm START_TEST(test_suspend_resume_parameter_entity) {
3851bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
3852bd8f1dc3Sbluhm                      "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n"
3853bd8f1dc3Sbluhm                      "%foo;\n"
3854bd8f1dc3Sbluhm                      "]>\n"
3855bd8f1dc3Sbluhm                      "<doc>Hello, world</doc>";
3856bd8f1dc3Sbluhm   const XML_Char *expected = XCS("Hello, world");
3857bd8f1dc3Sbluhm   CharData storage;
3858bd8f1dc3Sbluhm 
3859bd8f1dc3Sbluhm   CharData_Init(&storage);
3860bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3861bd8f1dc3Sbluhm   XML_SetElementDeclHandler(g_parser, element_decl_suspender);
3862bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3863bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
3864bd8f1dc3Sbluhm   if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3865bd8f1dc3Sbluhm       != XML_STATUS_SUSPENDED)
3866bd8f1dc3Sbluhm     xml_failure(g_parser);
3867bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, XCS(""));
3868bd8f1dc3Sbluhm   if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3869bd8f1dc3Sbluhm     xml_failure(g_parser);
3870bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
3871bd8f1dc3Sbluhm }
3872bd8f1dc3Sbluhm END_TEST
3873bd8f1dc3Sbluhm 
3874bd8f1dc3Sbluhm /* Test attempting to use parser after an error is faulted */
3875bd8f1dc3Sbluhm START_TEST(test_restart_on_error) {
3876bd8f1dc3Sbluhm   const char *text = "<$doc><doc></doc>";
3877bd8f1dc3Sbluhm 
3878bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3879bd8f1dc3Sbluhm       != XML_STATUS_ERROR)
3880bd8f1dc3Sbluhm     fail("Invalid tag name not faulted");
3881bd8f1dc3Sbluhm   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
3882bd8f1dc3Sbluhm     xml_failure(g_parser);
3883bd8f1dc3Sbluhm   if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
3884bd8f1dc3Sbluhm     fail("Restarting invalid parse not faulted");
3885bd8f1dc3Sbluhm   if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
3886bd8f1dc3Sbluhm     xml_failure(g_parser);
3887bd8f1dc3Sbluhm }
3888bd8f1dc3Sbluhm END_TEST
3889bd8f1dc3Sbluhm 
3890bd8f1dc3Sbluhm /* Test that angle brackets in an attribute default value are faulted */
3891bd8f1dc3Sbluhm START_TEST(test_reject_lt_in_attribute_value) {
3892bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n"
3893bd8f1dc3Sbluhm                      "<doc></doc>";
3894bd8f1dc3Sbluhm 
3895bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_INVALID_TOKEN,
3896bd8f1dc3Sbluhm                  "Bad attribute default not faulted");
3897bd8f1dc3Sbluhm }
3898bd8f1dc3Sbluhm END_TEST
3899bd8f1dc3Sbluhm 
3900bd8f1dc3Sbluhm START_TEST(test_reject_unfinished_param_in_att_value) {
3901bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n"
3902bd8f1dc3Sbluhm                      "<doc></doc>";
3903bd8f1dc3Sbluhm 
3904bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_INVALID_TOKEN,
3905bd8f1dc3Sbluhm                  "Bad attribute default not faulted");
3906bd8f1dc3Sbluhm }
3907bd8f1dc3Sbluhm END_TEST
3908bd8f1dc3Sbluhm 
3909bd8f1dc3Sbluhm START_TEST(test_trailing_cr_in_att_value) {
3910bd8f1dc3Sbluhm   const char *text = "<doc a='value\r'/>";
3911bd8f1dc3Sbluhm 
3912bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3913bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
3914bd8f1dc3Sbluhm     xml_failure(g_parser);
3915bd8f1dc3Sbluhm }
3916bd8f1dc3Sbluhm END_TEST
3917bd8f1dc3Sbluhm 
3918bd8f1dc3Sbluhm /* Try parsing a general entity within a parameter entity in a
3919bd8f1dc3Sbluhm  * standalone internal DTD.  Covers a corner case in the parser.
3920bd8f1dc3Sbluhm  */
3921bd8f1dc3Sbluhm START_TEST(test_standalone_internal_entity) {
3922bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' standalone='yes' ?>\n"
3923bd8f1dc3Sbluhm                      "<!DOCTYPE doc [\n"
3924bd8f1dc3Sbluhm                      "  <!ELEMENT doc (#PCDATA)>\n"
3925bd8f1dc3Sbluhm                      "  <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"&ge;\">'>\n"
3926bd8f1dc3Sbluhm                      "  <!ENTITY ge 'AttDefaultValue'>\n"
3927bd8f1dc3Sbluhm                      "  %pe;\n"
3928bd8f1dc3Sbluhm                      "]>\n"
3929bd8f1dc3Sbluhm                      "<doc att2='any'/>";
3930bd8f1dc3Sbluhm 
3931bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3932bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3933bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
3934bd8f1dc3Sbluhm     xml_failure(g_parser);
3935bd8f1dc3Sbluhm }
3936bd8f1dc3Sbluhm END_TEST
3937bd8f1dc3Sbluhm 
3938bd8f1dc3Sbluhm /* Test that a reference to an unknown external entity is skipped */
3939bd8f1dc3Sbluhm START_TEST(test_skipped_external_entity) {
3940bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
3941bd8f1dc3Sbluhm                      "<doc></doc>\n";
3942bd8f1dc3Sbluhm   ExtTest test_data = {"<!ELEMENT doc EMPTY>\n"
3943bd8f1dc3Sbluhm                        "<!ENTITY % e2 '%e1;'>\n",
3944bd8f1dc3Sbluhm                        NULL, NULL};
3945bd8f1dc3Sbluhm 
3946bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &test_data);
3947bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3948bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3949bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3950bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
3951bd8f1dc3Sbluhm     xml_failure(g_parser);
3952bd8f1dc3Sbluhm }
3953bd8f1dc3Sbluhm END_TEST
3954bd8f1dc3Sbluhm 
3955bd8f1dc3Sbluhm /* Test a different form of unknown external entity */
3956bd8f1dc3Sbluhm START_TEST(test_skipped_null_loaded_ext_entity) {
3957bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
3958bd8f1dc3Sbluhm                      "<doc />";
3959bd8f1dc3Sbluhm   ExtHdlrData test_data
3960bd8f1dc3Sbluhm       = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
3961bd8f1dc3Sbluhm          "<!ENTITY % pe2 '%pe1;'>\n"
3962bd8f1dc3Sbluhm          "%pe2;\n",
3963bd8f1dc3Sbluhm          external_entity_null_loader};
3964bd8f1dc3Sbluhm 
3965bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &test_data);
3966bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3967bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
3968bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3969bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
3970bd8f1dc3Sbluhm     xml_failure(g_parser);
3971bd8f1dc3Sbluhm }
3972bd8f1dc3Sbluhm END_TEST
3973bd8f1dc3Sbluhm 
3974bd8f1dc3Sbluhm START_TEST(test_skipped_unloaded_ext_entity) {
3975bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
3976bd8f1dc3Sbluhm                      "<doc />";
3977bd8f1dc3Sbluhm   ExtHdlrData test_data
3978bd8f1dc3Sbluhm       = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
3979bd8f1dc3Sbluhm          "<!ENTITY % pe2 '%pe1;'>\n"
3980bd8f1dc3Sbluhm          "%pe2;\n",
3981bd8f1dc3Sbluhm          NULL};
3982bd8f1dc3Sbluhm 
3983bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &test_data);
3984bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3985bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
3986bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3987bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
3988bd8f1dc3Sbluhm     xml_failure(g_parser);
3989bd8f1dc3Sbluhm }
3990bd8f1dc3Sbluhm END_TEST
3991bd8f1dc3Sbluhm 
3992bd8f1dc3Sbluhm /* Test that a parameter entity value ending with a carriage return
3993bd8f1dc3Sbluhm  * has it translated internally into a newline.
3994bd8f1dc3Sbluhm  */
3995bd8f1dc3Sbluhm START_TEST(test_param_entity_with_trailing_cr) {
3996bd8f1dc3Sbluhm #define PARAM_ENTITY_NAME "pe"
3997bd8f1dc3Sbluhm #define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">"
3998bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
3999bd8f1dc3Sbluhm                      "<doc/>";
4000bd8f1dc3Sbluhm   ExtTest test_data
4001bd8f1dc3Sbluhm       = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n"
4002bd8f1dc3Sbluhm          "%" PARAM_ENTITY_NAME ";\n",
4003bd8f1dc3Sbluhm          NULL, NULL};
4004bd8f1dc3Sbluhm 
4005bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &test_data);
4006bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4007bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
4008bd8f1dc3Sbluhm   XML_SetEntityDeclHandler(g_parser, param_entity_match_handler);
4009bd8f1dc3Sbluhm   param_entity_match_init(XCS(PARAM_ENTITY_NAME),
4010bd8f1dc3Sbluhm                           XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n"));
4011bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4012bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
4013bd8f1dc3Sbluhm     xml_failure(g_parser);
4014bd8f1dc3Sbluhm   int entity_match_flag = get_param_entity_match_flag();
4015bd8f1dc3Sbluhm   if (entity_match_flag == ENTITY_MATCH_FAIL)
4016bd8f1dc3Sbluhm     fail("Parameter entity CR->NEWLINE conversion failed");
4017bd8f1dc3Sbluhm   else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND)
4018bd8f1dc3Sbluhm     fail("Parameter entity not parsed");
4019bd8f1dc3Sbluhm }
4020bd8f1dc3Sbluhm #undef PARAM_ENTITY_NAME
4021bd8f1dc3Sbluhm #undef PARAM_ENTITY_CORE_VALUE
4022bd8f1dc3Sbluhm END_TEST
4023bd8f1dc3Sbluhm 
4024bd8f1dc3Sbluhm START_TEST(test_invalid_character_entity) {
4025bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
4026bd8f1dc3Sbluhm                      "  <!ENTITY entity '&#x110000;'>\n"
4027bd8f1dc3Sbluhm                      "]>\n"
4028bd8f1dc3Sbluhm                      "<doc>&entity;</doc>";
4029bd8f1dc3Sbluhm 
4030bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4031bd8f1dc3Sbluhm                  "Out of range character reference not faulted");
4032bd8f1dc3Sbluhm }
4033bd8f1dc3Sbluhm END_TEST
4034bd8f1dc3Sbluhm 
4035bd8f1dc3Sbluhm START_TEST(test_invalid_character_entity_2) {
4036bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
4037bd8f1dc3Sbluhm                      "  <!ENTITY entity '&#xg0;'>\n"
4038bd8f1dc3Sbluhm                      "]>\n"
4039bd8f1dc3Sbluhm                      "<doc>&entity;</doc>";
4040bd8f1dc3Sbluhm 
4041bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4042bd8f1dc3Sbluhm                  "Out of range character reference not faulted");
4043bd8f1dc3Sbluhm }
4044bd8f1dc3Sbluhm END_TEST
4045bd8f1dc3Sbluhm 
4046bd8f1dc3Sbluhm START_TEST(test_invalid_character_entity_3) {
4047bd8f1dc3Sbluhm   const char text[] =
4048bd8f1dc3Sbluhm       /* <!DOCTYPE doc [\n */
4049bd8f1dc3Sbluhm       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4050bd8f1dc3Sbluhm       /* U+0E04 = KHO KHWAI
4051bd8f1dc3Sbluhm        * U+0E08 = CHO CHAN */
4052bd8f1dc3Sbluhm       /* <!ENTITY entity '&\u0e04\u0e08;'>\n */
4053bd8f1dc3Sbluhm       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 "
4054bd8f1dc3Sbluhm       "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n"
4055bd8f1dc3Sbluhm       /* ]>\n */
4056bd8f1dc3Sbluhm       "\0]\0>\0\n"
4057bd8f1dc3Sbluhm       /* <doc>&entity;</doc> */
4058bd8f1dc3Sbluhm       "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>";
4059bd8f1dc3Sbluhm 
4060bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4061bd8f1dc3Sbluhm       != XML_STATUS_ERROR)
4062bd8f1dc3Sbluhm     fail("Invalid start of entity name not faulted");
4063bd8f1dc3Sbluhm   if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY)
4064bd8f1dc3Sbluhm     xml_failure(g_parser);
4065bd8f1dc3Sbluhm }
4066bd8f1dc3Sbluhm END_TEST
4067bd8f1dc3Sbluhm 
4068bd8f1dc3Sbluhm START_TEST(test_invalid_character_entity_4) {
4069bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
4070bd8f1dc3Sbluhm                      "  <!ENTITY entity '&#1114112;'>\n" /* = &#x110000 */
4071bd8f1dc3Sbluhm                      "]>\n"
4072bd8f1dc3Sbluhm                      "<doc>&entity;</doc>";
4073bd8f1dc3Sbluhm 
4074bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4075bd8f1dc3Sbluhm                  "Out of range character reference not faulted");
4076bd8f1dc3Sbluhm }
4077bd8f1dc3Sbluhm END_TEST
4078bd8f1dc3Sbluhm 
4079bd8f1dc3Sbluhm /* Test that processing instructions are picked up by a default handler */
4080bd8f1dc3Sbluhm START_TEST(test_pi_handled_in_default) {
4081bd8f1dc3Sbluhm   const char *text = "<?test processing instruction?>\n<doc/>";
4082bd8f1dc3Sbluhm   const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>");
4083bd8f1dc3Sbluhm   CharData storage;
4084bd8f1dc3Sbluhm 
4085bd8f1dc3Sbluhm   CharData_Init(&storage);
4086bd8f1dc3Sbluhm   XML_SetDefaultHandler(g_parser, accumulate_characters);
4087bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
4088bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4089bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
4090bd8f1dc3Sbluhm     xml_failure(g_parser);
4091bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
4092bd8f1dc3Sbluhm }
4093bd8f1dc3Sbluhm END_TEST
4094bd8f1dc3Sbluhm 
4095bd8f1dc3Sbluhm /* Test that comments are picked up by a default handler */
4096bd8f1dc3Sbluhm START_TEST(test_comment_handled_in_default) {
4097bd8f1dc3Sbluhm   const char *text = "<!-- This is a comment -->\n<doc/>";
4098bd8f1dc3Sbluhm   const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>");
4099bd8f1dc3Sbluhm   CharData storage;
4100bd8f1dc3Sbluhm 
4101bd8f1dc3Sbluhm   CharData_Init(&storage);
4102bd8f1dc3Sbluhm   XML_SetDefaultHandler(g_parser, accumulate_characters);
4103bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
4104bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4105bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
4106bd8f1dc3Sbluhm     xml_failure(g_parser);
4107bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
4108bd8f1dc3Sbluhm }
4109bd8f1dc3Sbluhm END_TEST
4110bd8f1dc3Sbluhm 
4111bd8f1dc3Sbluhm /* Test PIs that look almost but not quite like XML declarations */
4112bd8f1dc3Sbluhm START_TEST(test_pi_yml) {
4113bd8f1dc3Sbluhm   const char *text = "<?yml something like data?><doc/>";
4114bd8f1dc3Sbluhm   const XML_Char *expected = XCS("yml: something like data\n");
4115bd8f1dc3Sbluhm   CharData storage;
4116bd8f1dc3Sbluhm 
4117bd8f1dc3Sbluhm   CharData_Init(&storage);
4118bd8f1dc3Sbluhm   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4119bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
4120bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4121bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
4122bd8f1dc3Sbluhm     xml_failure(g_parser);
4123bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
4124bd8f1dc3Sbluhm }
4125bd8f1dc3Sbluhm END_TEST
4126bd8f1dc3Sbluhm 
4127bd8f1dc3Sbluhm START_TEST(test_pi_xnl) {
4128bd8f1dc3Sbluhm   const char *text = "<?xnl nothing like data?><doc/>";
4129bd8f1dc3Sbluhm   const XML_Char *expected = XCS("xnl: nothing like data\n");
4130bd8f1dc3Sbluhm   CharData storage;
4131bd8f1dc3Sbluhm 
4132bd8f1dc3Sbluhm   CharData_Init(&storage);
4133bd8f1dc3Sbluhm   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4134bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
4135bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4136bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
4137bd8f1dc3Sbluhm     xml_failure(g_parser);
4138bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
4139bd8f1dc3Sbluhm }
4140bd8f1dc3Sbluhm END_TEST
4141bd8f1dc3Sbluhm 
4142bd8f1dc3Sbluhm START_TEST(test_pi_xmm) {
4143bd8f1dc3Sbluhm   const char *text = "<?xmm everything like data?><doc/>";
4144bd8f1dc3Sbluhm   const XML_Char *expected = XCS("xmm: everything like data\n");
4145bd8f1dc3Sbluhm   CharData storage;
4146bd8f1dc3Sbluhm 
4147bd8f1dc3Sbluhm   CharData_Init(&storage);
4148bd8f1dc3Sbluhm   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4149bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
4150bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4151bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
4152bd8f1dc3Sbluhm     xml_failure(g_parser);
4153bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
4154bd8f1dc3Sbluhm }
4155bd8f1dc3Sbluhm END_TEST
4156bd8f1dc3Sbluhm 
4157bd8f1dc3Sbluhm START_TEST(test_utf16_pi) {
4158bd8f1dc3Sbluhm   const char text[] =
4159bd8f1dc3Sbluhm       /* <?{KHO KHWAI}{CHO CHAN}?>
4160bd8f1dc3Sbluhm        * where {KHO KHWAI} = U+0E04
4161bd8f1dc3Sbluhm        * and   {CHO CHAN}  = U+0E08
4162bd8f1dc3Sbluhm        */
4163bd8f1dc3Sbluhm       "<\0?\0\x04\x0e\x08\x0e?\0>\0"
4164bd8f1dc3Sbluhm       /* <q/> */
4165bd8f1dc3Sbluhm       "<\0q\0/\0>\0";
4166bd8f1dc3Sbluhm #ifdef XML_UNICODE
4167bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4168bd8f1dc3Sbluhm #else
4169bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4170bd8f1dc3Sbluhm #endif
4171bd8f1dc3Sbluhm   CharData storage;
4172bd8f1dc3Sbluhm 
4173bd8f1dc3Sbluhm   CharData_Init(&storage);
4174bd8f1dc3Sbluhm   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4175bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
4176bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4177bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
4178bd8f1dc3Sbluhm     xml_failure(g_parser);
4179bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
4180bd8f1dc3Sbluhm }
4181bd8f1dc3Sbluhm END_TEST
4182bd8f1dc3Sbluhm 
4183bd8f1dc3Sbluhm START_TEST(test_utf16_be_pi) {
4184bd8f1dc3Sbluhm   const char text[] =
4185bd8f1dc3Sbluhm       /* <?{KHO KHWAI}{CHO CHAN}?>
4186bd8f1dc3Sbluhm        * where {KHO KHWAI} = U+0E04
4187bd8f1dc3Sbluhm        * and   {CHO CHAN}  = U+0E08
4188bd8f1dc3Sbluhm        */
4189bd8f1dc3Sbluhm       "\0<\0?\x0e\x04\x0e\x08\0?\0>"
4190bd8f1dc3Sbluhm       /* <q/> */
4191bd8f1dc3Sbluhm       "\0<\0q\0/\0>";
4192bd8f1dc3Sbluhm #ifdef XML_UNICODE
4193bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4194bd8f1dc3Sbluhm #else
4195bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4196bd8f1dc3Sbluhm #endif
4197bd8f1dc3Sbluhm   CharData storage;
4198bd8f1dc3Sbluhm 
4199bd8f1dc3Sbluhm   CharData_Init(&storage);
4200bd8f1dc3Sbluhm   XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4201bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
4202bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4203bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
4204bd8f1dc3Sbluhm     xml_failure(g_parser);
4205bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
4206bd8f1dc3Sbluhm }
4207bd8f1dc3Sbluhm END_TEST
4208bd8f1dc3Sbluhm 
4209bd8f1dc3Sbluhm /* Test that comments can be picked up and translated */
4210bd8f1dc3Sbluhm START_TEST(test_utf16_be_comment) {
4211bd8f1dc3Sbluhm   const char text[] =
4212bd8f1dc3Sbluhm       /* <!-- Comment A --> */
4213bd8f1dc3Sbluhm       "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n"
4214bd8f1dc3Sbluhm       /* <doc/> */
4215bd8f1dc3Sbluhm       "\0<\0d\0o\0c\0/\0>";
4216bd8f1dc3Sbluhm   const XML_Char *expected = XCS(" Comment A ");
4217bd8f1dc3Sbluhm   CharData storage;
4218bd8f1dc3Sbluhm 
4219bd8f1dc3Sbluhm   CharData_Init(&storage);
4220bd8f1dc3Sbluhm   XML_SetCommentHandler(g_parser, accumulate_comment);
4221bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
4222bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4223bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
4224bd8f1dc3Sbluhm     xml_failure(g_parser);
4225bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
4226bd8f1dc3Sbluhm }
4227bd8f1dc3Sbluhm END_TEST
4228bd8f1dc3Sbluhm 
4229bd8f1dc3Sbluhm START_TEST(test_utf16_le_comment) {
4230bd8f1dc3Sbluhm   const char text[] =
4231bd8f1dc3Sbluhm       /* <!-- Comment B --> */
4232bd8f1dc3Sbluhm       "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0"
4233bd8f1dc3Sbluhm       /* <doc/> */
4234bd8f1dc3Sbluhm       "<\0d\0o\0c\0/\0>\0";
4235bd8f1dc3Sbluhm   const XML_Char *expected = XCS(" Comment B ");
4236bd8f1dc3Sbluhm   CharData storage;
4237bd8f1dc3Sbluhm 
4238bd8f1dc3Sbluhm   CharData_Init(&storage);
4239bd8f1dc3Sbluhm   XML_SetCommentHandler(g_parser, accumulate_comment);
4240bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
4241bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4242bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
4243bd8f1dc3Sbluhm     xml_failure(g_parser);
4244bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
4245bd8f1dc3Sbluhm }
4246bd8f1dc3Sbluhm END_TEST
4247bd8f1dc3Sbluhm 
4248bd8f1dc3Sbluhm /* Test that the unknown encoding handler with map entries that expect
4249bd8f1dc3Sbluhm  * conversion but no conversion function is faulted
4250bd8f1dc3Sbluhm  */
4251bd8f1dc3Sbluhm START_TEST(test_missing_encoding_conversion_fn) {
4252bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='no-conv'?>\n"
4253bd8f1dc3Sbluhm                      "<doc>\x81</doc>";
4254bd8f1dc3Sbluhm 
4255bd8f1dc3Sbluhm   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4256bd8f1dc3Sbluhm   /* MiscEncodingHandler sets up an encoding with every top-bit-set
4257bd8f1dc3Sbluhm    * character introducing a two-byte sequence.  For this, it
4258bd8f1dc3Sbluhm    * requires a convert function.  The above function call doesn't
4259bd8f1dc3Sbluhm    * pass one through, so when BadEncodingHandler actually gets
4260bd8f1dc3Sbluhm    * called it should supply an invalid encoding.
4261bd8f1dc3Sbluhm    */
4262bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4263bd8f1dc3Sbluhm                  "Encoding with missing convert() not faulted");
4264bd8f1dc3Sbluhm }
4265bd8f1dc3Sbluhm END_TEST
4266bd8f1dc3Sbluhm 
4267bd8f1dc3Sbluhm START_TEST(test_failing_encoding_conversion_fn) {
4268bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n"
4269bd8f1dc3Sbluhm                      "<doc>\x81</doc>";
4270bd8f1dc3Sbluhm 
4271bd8f1dc3Sbluhm   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4272bd8f1dc3Sbluhm   /* BadEncodingHandler sets up an encoding with every top-bit-set
4273bd8f1dc3Sbluhm    * character introducing a two-byte sequence.  For this, it
4274bd8f1dc3Sbluhm    * requires a convert function.  The above function call passes
4275bd8f1dc3Sbluhm    * one that insists all possible sequences are invalid anyway.
4276bd8f1dc3Sbluhm    */
4277bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4278bd8f1dc3Sbluhm                  "Encoding with failing convert() not faulted");
4279bd8f1dc3Sbluhm }
4280bd8f1dc3Sbluhm END_TEST
4281bd8f1dc3Sbluhm 
4282bd8f1dc3Sbluhm /* Test unknown encoding conversions */
4283bd8f1dc3Sbluhm START_TEST(test_unknown_encoding_success) {
4284bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4285bd8f1dc3Sbluhm                      /* Equivalent to <eoc>Hello, world</eoc> */
4286bd8f1dc3Sbluhm                      "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>";
4287bd8f1dc3Sbluhm 
4288bd8f1dc3Sbluhm   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4289bd8f1dc3Sbluhm   run_character_check(text, XCS("Hello, world"));
4290bd8f1dc3Sbluhm }
4291bd8f1dc3Sbluhm END_TEST
4292bd8f1dc3Sbluhm 
4293bd8f1dc3Sbluhm /* Test bad name character in unknown encoding */
4294bd8f1dc3Sbluhm START_TEST(test_unknown_encoding_bad_name) {
4295bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4296bd8f1dc3Sbluhm                      "<\xff\x64oc>Hello, world</\xff\x64oc>";
4297bd8f1dc3Sbluhm 
4298bd8f1dc3Sbluhm   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4299bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4300bd8f1dc3Sbluhm                  "Bad name start in unknown encoding not faulted");
4301bd8f1dc3Sbluhm }
4302bd8f1dc3Sbluhm END_TEST
4303bd8f1dc3Sbluhm 
4304bd8f1dc3Sbluhm /* Test bad mid-name character in unknown encoding */
4305bd8f1dc3Sbluhm START_TEST(test_unknown_encoding_bad_name_2) {
4306bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4307bd8f1dc3Sbluhm                      "<d\xffoc>Hello, world</d\xffoc>";
4308bd8f1dc3Sbluhm 
4309bd8f1dc3Sbluhm   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4310bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4311bd8f1dc3Sbluhm                  "Bad name in unknown encoding not faulted");
4312bd8f1dc3Sbluhm }
4313bd8f1dc3Sbluhm END_TEST
4314bd8f1dc3Sbluhm 
4315bd8f1dc3Sbluhm /* Test element name that is long enough to fill the conversion buffer
4316bd8f1dc3Sbluhm  * in an unknown encoding, finishing with an encoded character.
4317bd8f1dc3Sbluhm  */
4318bd8f1dc3Sbluhm START_TEST(test_unknown_encoding_long_name_1) {
4319bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4320bd8f1dc3Sbluhm                      "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"
4321bd8f1dc3Sbluhm                      "Hi"
4322bd8f1dc3Sbluhm                      "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>";
4323bd8f1dc3Sbluhm   const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4324bd8f1dc3Sbluhm   CharData storage;
4325bd8f1dc3Sbluhm 
4326bd8f1dc3Sbluhm   CharData_Init(&storage);
4327bd8f1dc3Sbluhm   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4328bd8f1dc3Sbluhm   XML_SetStartElementHandler(g_parser, record_element_start_handler);
4329bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
4330bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4331bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
4332bd8f1dc3Sbluhm     xml_failure(g_parser);
4333bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
4334bd8f1dc3Sbluhm }
4335bd8f1dc3Sbluhm END_TEST
4336bd8f1dc3Sbluhm 
4337bd8f1dc3Sbluhm /* Test element name that is long enough to fill the conversion buffer
4338bd8f1dc3Sbluhm  * in an unknown encoding, finishing with an simple character.
4339bd8f1dc3Sbluhm  */
4340bd8f1dc3Sbluhm START_TEST(test_unknown_encoding_long_name_2) {
4341bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4342bd8f1dc3Sbluhm                      "<abcdefghabcdefghabcdefghijklmnop>"
4343bd8f1dc3Sbluhm                      "Hi"
4344bd8f1dc3Sbluhm                      "</abcdefghabcdefghabcdefghijklmnop>";
4345bd8f1dc3Sbluhm   const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4346bd8f1dc3Sbluhm   CharData storage;
4347bd8f1dc3Sbluhm 
4348bd8f1dc3Sbluhm   CharData_Init(&storage);
4349bd8f1dc3Sbluhm   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4350bd8f1dc3Sbluhm   XML_SetStartElementHandler(g_parser, record_element_start_handler);
4351bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
4352bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4353bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
4354bd8f1dc3Sbluhm     xml_failure(g_parser);
4355bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
4356bd8f1dc3Sbluhm }
4357bd8f1dc3Sbluhm END_TEST
4358bd8f1dc3Sbluhm 
4359bd8f1dc3Sbluhm START_TEST(test_invalid_unknown_encoding) {
4360bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n"
4361bd8f1dc3Sbluhm                      "<doc>Hello world</doc>";
4362bd8f1dc3Sbluhm 
4363bd8f1dc3Sbluhm   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4364bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4365bd8f1dc3Sbluhm                  "Invalid unknown encoding not faulted");
4366bd8f1dc3Sbluhm }
4367bd8f1dc3Sbluhm END_TEST
4368bd8f1dc3Sbluhm 
4369bd8f1dc3Sbluhm START_TEST(test_unknown_ascii_encoding_ok) {
4370bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4371bd8f1dc3Sbluhm                      "<doc>Hello, world</doc>";
4372bd8f1dc3Sbluhm 
4373bd8f1dc3Sbluhm   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4374bd8f1dc3Sbluhm   run_character_check(text, XCS("Hello, world"));
4375bd8f1dc3Sbluhm }
4376bd8f1dc3Sbluhm END_TEST
4377bd8f1dc3Sbluhm 
4378bd8f1dc3Sbluhm START_TEST(test_unknown_ascii_encoding_fail) {
4379bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4380bd8f1dc3Sbluhm                      "<doc>Hello, \x80 world</doc>";
4381bd8f1dc3Sbluhm 
4382bd8f1dc3Sbluhm   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4383bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4384bd8f1dc3Sbluhm                  "Invalid character not faulted");
4385bd8f1dc3Sbluhm }
4386bd8f1dc3Sbluhm END_TEST
4387bd8f1dc3Sbluhm 
4388bd8f1dc3Sbluhm START_TEST(test_unknown_encoding_invalid_length) {
4389bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n"
4390bd8f1dc3Sbluhm                      "<doc>Hello, world</doc>";
4391bd8f1dc3Sbluhm 
4392bd8f1dc3Sbluhm   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4393bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4394bd8f1dc3Sbluhm                  "Invalid unknown encoding not faulted");
4395bd8f1dc3Sbluhm }
4396bd8f1dc3Sbluhm END_TEST
4397bd8f1dc3Sbluhm 
4398bd8f1dc3Sbluhm START_TEST(test_unknown_encoding_invalid_topbit) {
4399bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n"
4400bd8f1dc3Sbluhm                      "<doc>Hello, world</doc>";
4401bd8f1dc3Sbluhm 
4402bd8f1dc3Sbluhm   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4403bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4404bd8f1dc3Sbluhm                  "Invalid unknown encoding not faulted");
4405bd8f1dc3Sbluhm }
4406bd8f1dc3Sbluhm END_TEST
4407bd8f1dc3Sbluhm 
4408bd8f1dc3Sbluhm START_TEST(test_unknown_encoding_invalid_surrogate) {
4409bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n"
4410bd8f1dc3Sbluhm                      "<doc>Hello, \x82 world</doc>";
4411bd8f1dc3Sbluhm 
4412bd8f1dc3Sbluhm   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4413bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4414bd8f1dc3Sbluhm                  "Invalid unknown encoding not faulted");
4415bd8f1dc3Sbluhm }
4416bd8f1dc3Sbluhm END_TEST
4417bd8f1dc3Sbluhm 
4418bd8f1dc3Sbluhm START_TEST(test_unknown_encoding_invalid_high) {
4419bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n"
4420bd8f1dc3Sbluhm                      "<doc>Hello, world</doc>";
4421bd8f1dc3Sbluhm 
4422bd8f1dc3Sbluhm   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4423bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4424bd8f1dc3Sbluhm                  "Invalid unknown encoding not faulted");
4425bd8f1dc3Sbluhm }
4426bd8f1dc3Sbluhm END_TEST
4427bd8f1dc3Sbluhm 
4428bd8f1dc3Sbluhm START_TEST(test_unknown_encoding_invalid_attr_value) {
4429bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4430bd8f1dc3Sbluhm                      "<doc attr='\xff\x30'/>";
4431bd8f1dc3Sbluhm 
4432bd8f1dc3Sbluhm   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4433bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4434bd8f1dc3Sbluhm                  "Invalid attribute valid not faulted");
4435bd8f1dc3Sbluhm }
4436bd8f1dc3Sbluhm END_TEST
4437bd8f1dc3Sbluhm 
4438bd8f1dc3Sbluhm /* Test an external entity parser set to use latin-1 detects UTF-16
4439bd8f1dc3Sbluhm  * BOMs correctly.
4440bd8f1dc3Sbluhm  */
4441bd8f1dc3Sbluhm /* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */
4442bd8f1dc3Sbluhm START_TEST(test_ext_entity_latin1_utf16le_bom) {
4443bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
4444bd8f1dc3Sbluhm                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4445bd8f1dc3Sbluhm                      "]>\n"
4446bd8f1dc3Sbluhm                      "<doc>&en;</doc>";
4447bd8f1dc3Sbluhm   ExtTest2 test_data
4448bd8f1dc3Sbluhm       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4449bd8f1dc3Sbluhm          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4450bd8f1dc3Sbluhm           *   0x4c = L and 0x20 is a space
4451bd8f1dc3Sbluhm           */
4452bd8f1dc3Sbluhm          "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4453bd8f1dc3Sbluhm #ifdef XML_UNICODE
4454bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\x00ff\x00feL ");
4455bd8f1dc3Sbluhm #else
4456bd8f1dc3Sbluhm   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4457bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4458bd8f1dc3Sbluhm #endif
4459bd8f1dc3Sbluhm   CharData storage;
4460bd8f1dc3Sbluhm 
4461bd8f1dc3Sbluhm   CharData_Init(&storage);
4462bd8f1dc3Sbluhm   test_data.storage = &storage;
4463bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4464bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &test_data);
4465bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4466bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4467bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
4468bd8f1dc3Sbluhm     xml_failure(g_parser);
4469bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
4470bd8f1dc3Sbluhm }
4471bd8f1dc3Sbluhm END_TEST
4472bd8f1dc3Sbluhm 
4473bd8f1dc3Sbluhm START_TEST(test_ext_entity_latin1_utf16be_bom) {
4474bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
4475bd8f1dc3Sbluhm                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4476bd8f1dc3Sbluhm                      "]>\n"
4477bd8f1dc3Sbluhm                      "<doc>&en;</doc>";
4478bd8f1dc3Sbluhm   ExtTest2 test_data
4479bd8f1dc3Sbluhm       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4480bd8f1dc3Sbluhm          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4481bd8f1dc3Sbluhm           *   0x4c = L and 0x20 is a space
4482bd8f1dc3Sbluhm           */
4483bd8f1dc3Sbluhm          "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4484bd8f1dc3Sbluhm #ifdef XML_UNICODE
4485bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\x00fe\x00ff L");
4486bd8f1dc3Sbluhm #else
4487bd8f1dc3Sbluhm   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4488bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L");
4489bd8f1dc3Sbluhm #endif
4490bd8f1dc3Sbluhm   CharData storage;
4491bd8f1dc3Sbluhm 
4492bd8f1dc3Sbluhm   CharData_Init(&storage);
4493bd8f1dc3Sbluhm   test_data.storage = &storage;
4494bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4495bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &test_data);
4496bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4497bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4498bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
4499bd8f1dc3Sbluhm     xml_failure(g_parser);
4500bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
4501bd8f1dc3Sbluhm }
4502bd8f1dc3Sbluhm END_TEST
4503bd8f1dc3Sbluhm 
4504bd8f1dc3Sbluhm /* Parsing the full buffer rather than a byte at a time makes a
4505bd8f1dc3Sbluhm  * difference to the encoding scanning code, so repeat the above tests
4506bd8f1dc3Sbluhm  * without breaking them down by byte.
4507bd8f1dc3Sbluhm  */
4508bd8f1dc3Sbluhm START_TEST(test_ext_entity_latin1_utf16le_bom2) {
4509bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
4510bd8f1dc3Sbluhm                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4511bd8f1dc3Sbluhm                      "]>\n"
4512bd8f1dc3Sbluhm                      "<doc>&en;</doc>";
4513bd8f1dc3Sbluhm   ExtTest2 test_data
4514bd8f1dc3Sbluhm       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4515bd8f1dc3Sbluhm          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4516bd8f1dc3Sbluhm           *   0x4c = L and 0x20 is a space
4517bd8f1dc3Sbluhm           */
4518bd8f1dc3Sbluhm          "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4519bd8f1dc3Sbluhm #ifdef XML_UNICODE
4520bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\x00ff\x00feL ");
4521bd8f1dc3Sbluhm #else
4522bd8f1dc3Sbluhm   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4523bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4524bd8f1dc3Sbluhm #endif
4525bd8f1dc3Sbluhm   CharData storage;
4526bd8f1dc3Sbluhm 
4527bd8f1dc3Sbluhm   CharData_Init(&storage);
4528bd8f1dc3Sbluhm   test_data.storage = &storage;
4529bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4530bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &test_data);
4531bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4532bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4533bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
4534bd8f1dc3Sbluhm     xml_failure(g_parser);
4535bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
4536bd8f1dc3Sbluhm }
4537bd8f1dc3Sbluhm END_TEST
4538bd8f1dc3Sbluhm 
4539bd8f1dc3Sbluhm START_TEST(test_ext_entity_latin1_utf16be_bom2) {
4540bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
4541bd8f1dc3Sbluhm                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4542bd8f1dc3Sbluhm                      "]>\n"
4543bd8f1dc3Sbluhm                      "<doc>&en;</doc>";
4544bd8f1dc3Sbluhm   ExtTest2 test_data
4545bd8f1dc3Sbluhm       = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4546bd8f1dc3Sbluhm          /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4547bd8f1dc3Sbluhm           *   0x4c = L and 0x20 is a space
4548bd8f1dc3Sbluhm           */
4549bd8f1dc3Sbluhm          "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4550bd8f1dc3Sbluhm #ifdef XML_UNICODE
4551bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\x00fe\x00ff L");
4552bd8f1dc3Sbluhm #else
4553bd8f1dc3Sbluhm   /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4554bd8f1dc3Sbluhm   const XML_Char *expected = "\xc3\xbe\xc3\xbf L";
4555bd8f1dc3Sbluhm #endif
4556bd8f1dc3Sbluhm   CharData storage;
4557bd8f1dc3Sbluhm 
4558bd8f1dc3Sbluhm   CharData_Init(&storage);
4559bd8f1dc3Sbluhm   test_data.storage = &storage;
4560bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4561bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &test_data);
4562bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4563bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4564bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
4565bd8f1dc3Sbluhm     xml_failure(g_parser);
4566bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
4567bd8f1dc3Sbluhm }
4568bd8f1dc3Sbluhm END_TEST
4569bd8f1dc3Sbluhm 
4570bd8f1dc3Sbluhm /* Test little-endian UTF-16 given an explicit big-endian encoding */
4571bd8f1dc3Sbluhm START_TEST(test_ext_entity_utf16_be) {
4572bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
4573bd8f1dc3Sbluhm                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4574bd8f1dc3Sbluhm                      "]>\n"
4575bd8f1dc3Sbluhm                      "<doc>&en;</doc>";
4576bd8f1dc3Sbluhm   ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL};
4577bd8f1dc3Sbluhm #ifdef XML_UNICODE
4578bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4579bd8f1dc3Sbluhm #else
4580bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\xe3\xb0\x80"   /* U+3C00 */
4581bd8f1dc3Sbluhm                                  "\xe6\x94\x80"   /* U+6500 */
4582bd8f1dc3Sbluhm                                  "\xe2\xbc\x80"   /* U+2F00 */
4583bd8f1dc3Sbluhm                                  "\xe3\xb8\x80"); /* U+3E00 */
4584bd8f1dc3Sbluhm #endif
4585bd8f1dc3Sbluhm   CharData storage;
4586bd8f1dc3Sbluhm 
4587bd8f1dc3Sbluhm   CharData_Init(&storage);
4588bd8f1dc3Sbluhm   test_data.storage = &storage;
4589bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4590bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &test_data);
4591bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4592bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4593bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
4594bd8f1dc3Sbluhm     xml_failure(g_parser);
4595bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
4596bd8f1dc3Sbluhm }
4597bd8f1dc3Sbluhm END_TEST
4598bd8f1dc3Sbluhm 
4599bd8f1dc3Sbluhm /* Test big-endian UTF-16 given an explicit little-endian encoding */
4600bd8f1dc3Sbluhm START_TEST(test_ext_entity_utf16_le) {
4601bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
4602bd8f1dc3Sbluhm                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4603bd8f1dc3Sbluhm                      "]>\n"
4604bd8f1dc3Sbluhm                      "<doc>&en;</doc>";
4605bd8f1dc3Sbluhm   ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL};
4606bd8f1dc3Sbluhm #ifdef XML_UNICODE
4607bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4608bd8f1dc3Sbluhm #else
4609bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\xe3\xb0\x80"   /* U+3C00 */
4610bd8f1dc3Sbluhm                                  "\xe6\x94\x80"   /* U+6500 */
4611bd8f1dc3Sbluhm                                  "\xe2\xbc\x80"   /* U+2F00 */
4612bd8f1dc3Sbluhm                                  "\xe3\xb8\x80"); /* U+3E00 */
4613bd8f1dc3Sbluhm #endif
4614bd8f1dc3Sbluhm   CharData storage;
4615bd8f1dc3Sbluhm 
4616bd8f1dc3Sbluhm   CharData_Init(&storage);
4617bd8f1dc3Sbluhm   test_data.storage = &storage;
4618bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4619bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &test_data);
4620bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4621bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4622bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
4623bd8f1dc3Sbluhm     xml_failure(g_parser);
4624bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
4625bd8f1dc3Sbluhm }
4626bd8f1dc3Sbluhm END_TEST
4627bd8f1dc3Sbluhm 
4628bd8f1dc3Sbluhm /* Test little-endian UTF-16 given no explicit encoding.
4629bd8f1dc3Sbluhm  * The existing default encoding (UTF-8) is assumed to hold without a
4630bd8f1dc3Sbluhm  * BOM to contradict it, so the entity value will in fact provoke an
4631bd8f1dc3Sbluhm  * error because 0x00 is not a valid XML character.  We parse the
4632bd8f1dc3Sbluhm  * whole buffer in one go rather than feeding it in byte by byte to
4633bd8f1dc3Sbluhm  * exercise different code paths in the initial scanning routines.
4634bd8f1dc3Sbluhm  */
4635bd8f1dc3Sbluhm START_TEST(test_ext_entity_utf16_unknown) {
4636bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
4637bd8f1dc3Sbluhm                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4638bd8f1dc3Sbluhm                      "]>\n"
4639bd8f1dc3Sbluhm                      "<doc>&en;</doc>";
4640bd8f1dc3Sbluhm   ExtFaults2 test_data
4641bd8f1dc3Sbluhm       = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL,
4642bd8f1dc3Sbluhm          XML_ERROR_INVALID_TOKEN};
4643bd8f1dc3Sbluhm 
4644bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2);
4645bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &test_data);
4646bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
4647bd8f1dc3Sbluhm                  "Invalid character should not have been accepted");
4648bd8f1dc3Sbluhm }
4649bd8f1dc3Sbluhm END_TEST
4650bd8f1dc3Sbluhm 
4651bd8f1dc3Sbluhm /* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */
4652bd8f1dc3Sbluhm START_TEST(test_ext_entity_utf8_non_bom) {
4653bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
4654bd8f1dc3Sbluhm                      "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4655bd8f1dc3Sbluhm                      "]>\n"
4656bd8f1dc3Sbluhm                      "<doc>&en;</doc>";
4657bd8f1dc3Sbluhm   ExtTest2 test_data
4658bd8f1dc3Sbluhm       = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */
4659bd8f1dc3Sbluhm          3, NULL, NULL};
4660bd8f1dc3Sbluhm #ifdef XML_UNICODE
4661bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\xfec0");
4662bd8f1dc3Sbluhm #else
4663bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\xef\xbb\x80");
4664bd8f1dc3Sbluhm #endif
4665bd8f1dc3Sbluhm   CharData storage;
4666bd8f1dc3Sbluhm 
4667bd8f1dc3Sbluhm   CharData_Init(&storage);
4668bd8f1dc3Sbluhm   test_data.storage = &storage;
4669bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4670bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &test_data);
4671bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4672bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4673bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
4674bd8f1dc3Sbluhm     xml_failure(g_parser);
4675bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
4676bd8f1dc3Sbluhm }
4677bd8f1dc3Sbluhm END_TEST
4678bd8f1dc3Sbluhm 
4679bd8f1dc3Sbluhm /* Test that UTF-8 in a CDATA section is correctly passed through */
4680bd8f1dc3Sbluhm START_TEST(test_utf8_in_cdata_section) {
4681bd8f1dc3Sbluhm   const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>";
4682bd8f1dc3Sbluhm #ifdef XML_UNICODE
4683bd8f1dc3Sbluhm   const XML_Char *expected = XCS("one \x00e9 two");
4684bd8f1dc3Sbluhm #else
4685bd8f1dc3Sbluhm   const XML_Char *expected = XCS("one \xc3\xa9 two");
4686bd8f1dc3Sbluhm #endif
4687bd8f1dc3Sbluhm 
4688bd8f1dc3Sbluhm   run_character_check(text, expected);
4689bd8f1dc3Sbluhm }
4690bd8f1dc3Sbluhm END_TEST
4691bd8f1dc3Sbluhm 
4692bd8f1dc3Sbluhm /* Test that little-endian UTF-16 in a CDATA section is handled */
4693bd8f1dc3Sbluhm START_TEST(test_utf8_in_cdata_section_2) {
4694bd8f1dc3Sbluhm   const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>";
4695bd8f1dc3Sbluhm #ifdef XML_UNICODE
4696bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\x00e9]\x00e9two");
4697bd8f1dc3Sbluhm #else
4698bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two");
4699bd8f1dc3Sbluhm #endif
4700bd8f1dc3Sbluhm 
4701bd8f1dc3Sbluhm   run_character_check(text, expected);
4702bd8f1dc3Sbluhm }
4703bd8f1dc3Sbluhm END_TEST
4704bd8f1dc3Sbluhm 
4705bd8f1dc3Sbluhm START_TEST(test_utf8_in_start_tags) {
4706bd8f1dc3Sbluhm   struct test_case {
4707bd8f1dc3Sbluhm     bool goodName;
4708bd8f1dc3Sbluhm     bool goodNameStart;
4709bd8f1dc3Sbluhm     const char *tagName;
4710bd8f1dc3Sbluhm   };
4711bd8f1dc3Sbluhm 
4712bd8f1dc3Sbluhm   // The idea with the tests below is this:
4713bd8f1dc3Sbluhm   // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
4714bd8f1dc3Sbluhm   // go to isNever and are hence not a concern.
4715bd8f1dc3Sbluhm   //
4716bd8f1dc3Sbluhm   // We start with a character that is a valid name character
4717bd8f1dc3Sbluhm   // (or even name-start character, see XML 1.0r4 spec) and then we flip
4718bd8f1dc3Sbluhm   // single bits at places where (1) the result leaves the UTF-8 encoding space
4719bd8f1dc3Sbluhm   // and (2) we stay in the same n-byte sequence family.
4720bd8f1dc3Sbluhm   //
4721bd8f1dc3Sbluhm   // The flipped bits are highlighted in angle brackets in comments,
4722bd8f1dc3Sbluhm   // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
4723bd8f1dc3Sbluhm   // the most significant bit to 1 to leave UTF-8 encoding space.
4724bd8f1dc3Sbluhm   struct test_case cases[] = {
4725bd8f1dc3Sbluhm       // 1-byte UTF-8: [0xxx xxxx]
4726bd8f1dc3Sbluhm       {true, true, "\x3A"},   // [0011 1010] = ASCII colon ':'
4727bd8f1dc3Sbluhm       {false, false, "\xBA"}, // [<1>011 1010]
4728bd8f1dc3Sbluhm       {true, false, "\x39"},  // [0011 1001] = ASCII nine '9'
4729bd8f1dc3Sbluhm       {false, false, "\xB9"}, // [<1>011 1001]
4730bd8f1dc3Sbluhm 
4731bd8f1dc3Sbluhm       // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
4732bd8f1dc3Sbluhm       {true, true, "\xDB\xA5"},   // [1101 1011] [1010 0101] =
4733bd8f1dc3Sbluhm                                   // Arabic small waw U+06E5
4734bd8f1dc3Sbluhm       {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
4735bd8f1dc3Sbluhm       {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
4736bd8f1dc3Sbluhm       {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
4737bd8f1dc3Sbluhm       {true, false, "\xCC\x81"},  // [1100 1100] [1000 0001] =
4738bd8f1dc3Sbluhm                                   // combining char U+0301
4739bd8f1dc3Sbluhm       {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
4740bd8f1dc3Sbluhm       {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
4741bd8f1dc3Sbluhm       {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
4742bd8f1dc3Sbluhm 
4743bd8f1dc3Sbluhm       // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
4744bd8f1dc3Sbluhm       {true, true, "\xE0\xA4\x85"},   // [1110 0000] [1010 0100] [1000 0101] =
4745bd8f1dc3Sbluhm                                       // Devanagari Letter A U+0905
4746bd8f1dc3Sbluhm       {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
4747bd8f1dc3Sbluhm       {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
4748bd8f1dc3Sbluhm       {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
4749bd8f1dc3Sbluhm       {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
4750bd8f1dc3Sbluhm       {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
4751bd8f1dc3Sbluhm       {true, false, "\xE0\xA4\x81"},  // [1110 0000] [1010 0100] [1000 0001] =
4752bd8f1dc3Sbluhm                                       // combining char U+0901
4753bd8f1dc3Sbluhm       {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
4754bd8f1dc3Sbluhm       {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
4755bd8f1dc3Sbluhm       {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
4756bd8f1dc3Sbluhm       {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
4757bd8f1dc3Sbluhm       {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
4758bd8f1dc3Sbluhm   };
4759bd8f1dc3Sbluhm   const bool atNameStart[] = {true, false};
4760bd8f1dc3Sbluhm 
4761bd8f1dc3Sbluhm   size_t i = 0;
4762bd8f1dc3Sbluhm   char doc[1024];
4763bd8f1dc3Sbluhm   size_t failCount = 0;
4764bd8f1dc3Sbluhm 
4765bd8f1dc3Sbluhm   // we need all the bytes to be parsed, but we don't want the errors that can
4766bd8f1dc3Sbluhm   // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on.
4767bd8f1dc3Sbluhm   if (g_reparseDeferralEnabledDefault) {
4768bd8f1dc3Sbluhm     return;
4769bd8f1dc3Sbluhm   }
4770bd8f1dc3Sbluhm 
4771bd8f1dc3Sbluhm   for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
4772bd8f1dc3Sbluhm     size_t j = 0;
4773bd8f1dc3Sbluhm     for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
4774bd8f1dc3Sbluhm       const bool expectedSuccess
4775bd8f1dc3Sbluhm           = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
4776bd8f1dc3Sbluhm       snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a",
4777bd8f1dc3Sbluhm                cases[i].tagName);
4778bd8f1dc3Sbluhm       XML_Parser parser = XML_ParserCreate(NULL);
4779bd8f1dc3Sbluhm 
4780bd8f1dc3Sbluhm       const enum XML_Status status = _XML_Parse_SINGLE_BYTES(
4781bd8f1dc3Sbluhm           parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
4782bd8f1dc3Sbluhm 
4783bd8f1dc3Sbluhm       bool success = true;
4784bd8f1dc3Sbluhm       if ((status == XML_STATUS_OK) != expectedSuccess) {
4785bd8f1dc3Sbluhm         success = false;
4786bd8f1dc3Sbluhm       }
4787bd8f1dc3Sbluhm       if ((status == XML_STATUS_ERROR)
4788bd8f1dc3Sbluhm           && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
4789bd8f1dc3Sbluhm         success = false;
4790bd8f1dc3Sbluhm       }
4791bd8f1dc3Sbluhm 
4792bd8f1dc3Sbluhm       if (! success) {
4793bd8f1dc3Sbluhm         fprintf(
4794bd8f1dc3Sbluhm             stderr,
4795bd8f1dc3Sbluhm             "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
4796bd8f1dc3Sbluhm             (unsigned)i + 1u, atNameStart[j] ? "    " : "not ",
4797bd8f1dc3Sbluhm             (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
4798bd8f1dc3Sbluhm         failCount++;
4799bd8f1dc3Sbluhm       }
4800bd8f1dc3Sbluhm 
4801bd8f1dc3Sbluhm       XML_ParserFree(parser);
4802bd8f1dc3Sbluhm     }
4803bd8f1dc3Sbluhm   }
4804bd8f1dc3Sbluhm 
4805bd8f1dc3Sbluhm   if (failCount > 0) {
4806bd8f1dc3Sbluhm     fail("UTF-8 regression detected");
4807bd8f1dc3Sbluhm   }
4808bd8f1dc3Sbluhm }
4809bd8f1dc3Sbluhm END_TEST
4810bd8f1dc3Sbluhm 
4811bd8f1dc3Sbluhm /* Test trailing spaces in elements are accepted */
4812bd8f1dc3Sbluhm START_TEST(test_trailing_spaces_in_elements) {
4813bd8f1dc3Sbluhm   const char *text = "<doc   >Hi</doc >";
4814bd8f1dc3Sbluhm   const XML_Char *expected = XCS("doc/doc");
4815bd8f1dc3Sbluhm   CharData storage;
4816bd8f1dc3Sbluhm 
4817bd8f1dc3Sbluhm   CharData_Init(&storage);
4818bd8f1dc3Sbluhm   XML_SetElementHandler(g_parser, record_element_start_handler,
4819bd8f1dc3Sbluhm                         record_element_end_handler);
4820bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
4821bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4822bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
4823bd8f1dc3Sbluhm     xml_failure(g_parser);
4824bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
4825bd8f1dc3Sbluhm }
4826bd8f1dc3Sbluhm END_TEST
4827bd8f1dc3Sbluhm 
4828bd8f1dc3Sbluhm START_TEST(test_utf16_attribute) {
4829bd8f1dc3Sbluhm   const char text[] =
4830bd8f1dc3Sbluhm       /* <d {KHO KHWAI}{CHO CHAN}='a'/>
4831bd8f1dc3Sbluhm        * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4832bd8f1dc3Sbluhm        * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4833bd8f1dc3Sbluhm        */
4834bd8f1dc3Sbluhm       "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0";
4835bd8f1dc3Sbluhm   const XML_Char *expected = XCS("a");
4836bd8f1dc3Sbluhm   CharData storage;
4837bd8f1dc3Sbluhm 
4838bd8f1dc3Sbluhm   CharData_Init(&storage);
4839bd8f1dc3Sbluhm   XML_SetStartElementHandler(g_parser, accumulate_attribute);
4840bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
4841bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4842bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
4843bd8f1dc3Sbluhm     xml_failure(g_parser);
4844bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
4845bd8f1dc3Sbluhm }
4846bd8f1dc3Sbluhm END_TEST
4847bd8f1dc3Sbluhm 
4848bd8f1dc3Sbluhm START_TEST(test_utf16_second_attr) {
4849bd8f1dc3Sbluhm   /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/>
4850bd8f1dc3Sbluhm    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4851bd8f1dc3Sbluhm    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4852bd8f1dc3Sbluhm    */
4853bd8f1dc3Sbluhm   const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0"
4854bd8f1dc3Sbluhm                       "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0";
4855bd8f1dc3Sbluhm   const XML_Char *expected = XCS("1");
4856bd8f1dc3Sbluhm   CharData storage;
4857bd8f1dc3Sbluhm 
4858bd8f1dc3Sbluhm   CharData_Init(&storage);
4859bd8f1dc3Sbluhm   XML_SetStartElementHandler(g_parser, accumulate_attribute);
4860bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
4861bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4862bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
4863bd8f1dc3Sbluhm     xml_failure(g_parser);
4864bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
4865bd8f1dc3Sbluhm }
4866bd8f1dc3Sbluhm END_TEST
4867bd8f1dc3Sbluhm 
4868bd8f1dc3Sbluhm START_TEST(test_attr_after_solidus) {
4869bd8f1dc3Sbluhm   const char *text = "<doc attr1='a' / attr2='b'>";
4870bd8f1dc3Sbluhm 
4871bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted");
4872bd8f1dc3Sbluhm }
4873bd8f1dc3Sbluhm END_TEST
4874bd8f1dc3Sbluhm 
4875bd8f1dc3Sbluhm START_TEST(test_utf16_pe) {
4876bd8f1dc3Sbluhm   /* <!DOCTYPE doc [
4877bd8f1dc3Sbluhm    * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'>
4878bd8f1dc3Sbluhm    * %{KHO KHWAI}{CHO CHAN};
4879bd8f1dc3Sbluhm    * ]>
4880bd8f1dc3Sbluhm    * <doc></doc>
4881bd8f1dc3Sbluhm    *
4882bd8f1dc3Sbluhm    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4883bd8f1dc3Sbluhm    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4884bd8f1dc3Sbluhm    */
4885bd8f1dc3Sbluhm   const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4886bd8f1dc3Sbluhm                       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 "
4887bd8f1dc3Sbluhm                       "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 "
4888bd8f1dc3Sbluhm                       "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n"
4889bd8f1dc3Sbluhm                       "\0%\x0e\x04\x0e\x08\0;\0\n"
4890bd8f1dc3Sbluhm                       "\0]\0>\0\n"
4891bd8f1dc3Sbluhm                       "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>";
4892bd8f1dc3Sbluhm #ifdef XML_UNICODE
4893bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n");
4894bd8f1dc3Sbluhm #else
4895bd8f1dc3Sbluhm   const XML_Char *expected
4896bd8f1dc3Sbluhm       = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n");
4897bd8f1dc3Sbluhm #endif
4898bd8f1dc3Sbluhm   CharData storage;
4899bd8f1dc3Sbluhm 
4900bd8f1dc3Sbluhm   CharData_Init(&storage);
4901bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
4902bd8f1dc3Sbluhm   XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl);
4903bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4904bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
4905bd8f1dc3Sbluhm     xml_failure(g_parser);
4906bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
4907bd8f1dc3Sbluhm }
4908bd8f1dc3Sbluhm END_TEST
4909bd8f1dc3Sbluhm 
4910bd8f1dc3Sbluhm /* Test that duff attribute description keywords are rejected */
4911bd8f1dc3Sbluhm START_TEST(test_bad_attr_desc_keyword) {
4912bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
4913bd8f1dc3Sbluhm                      "  <!ATTLIST doc attr CDATA #!IMPLIED>\n"
4914bd8f1dc3Sbluhm                      "]>\n"
4915bd8f1dc3Sbluhm                      "<doc />";
4916bd8f1dc3Sbluhm 
4917bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4918bd8f1dc3Sbluhm                  "Bad keyword !IMPLIED not faulted");
4919bd8f1dc3Sbluhm }
4920bd8f1dc3Sbluhm END_TEST
4921bd8f1dc3Sbluhm 
4922bd8f1dc3Sbluhm /* Test that an invalid attribute description keyword consisting of
4923bd8f1dc3Sbluhm  * UTF-16 characters with their top bytes non-zero are correctly
4924bd8f1dc3Sbluhm  * faulted
4925bd8f1dc3Sbluhm  */
4926bd8f1dc3Sbluhm START_TEST(test_bad_attr_desc_keyword_utf16) {
4927bd8f1dc3Sbluhm   /* <!DOCTYPE d [
4928bd8f1dc3Sbluhm    * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}>
4929bd8f1dc3Sbluhm    * ]><d/>
4930bd8f1dc3Sbluhm    *
4931bd8f1dc3Sbluhm    * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4932bd8f1dc3Sbluhm    * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4933bd8f1dc3Sbluhm    */
4934bd8f1dc3Sbluhm   const char text[]
4935bd8f1dc3Sbluhm       = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
4936bd8f1dc3Sbluhm         "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 "
4937bd8f1dc3Sbluhm         "\0#\x0e\x04\x0e\x08\0>\0\n"
4938bd8f1dc3Sbluhm         "\0]\0>\0<\0d\0/\0>";
4939bd8f1dc3Sbluhm 
4940bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4941bd8f1dc3Sbluhm       != XML_STATUS_ERROR)
4942bd8f1dc3Sbluhm     fail("Invalid UTF16 attribute keyword not faulted");
4943bd8f1dc3Sbluhm   if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
4944bd8f1dc3Sbluhm     xml_failure(g_parser);
4945bd8f1dc3Sbluhm }
4946bd8f1dc3Sbluhm END_TEST
4947bd8f1dc3Sbluhm 
4948bd8f1dc3Sbluhm /* Test that invalid syntax in a <!DOCTYPE> is rejected.  Do this
4949bd8f1dc3Sbluhm  * using prefix-encoding (see above) to trigger specific code paths
4950bd8f1dc3Sbluhm  */
4951bd8f1dc3Sbluhm START_TEST(test_bad_doctype) {
4952bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4953bd8f1dc3Sbluhm                      "<!DOCTYPE doc [ \x80\x44 ]><doc/>";
4954bd8f1dc3Sbluhm 
4955bd8f1dc3Sbluhm   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4956bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_SYNTAX,
4957bd8f1dc3Sbluhm                  "Invalid bytes in DOCTYPE not faulted");
4958bd8f1dc3Sbluhm }
4959bd8f1dc3Sbluhm END_TEST
4960bd8f1dc3Sbluhm 
4961bd8f1dc3Sbluhm START_TEST(test_bad_doctype_utf8) {
4962bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE \xDB\x25"
4963bd8f1dc3Sbluhm                      "doc><doc/>"; // [1101 1011] [<0>010 0101]
4964bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4965bd8f1dc3Sbluhm                  "Invalid UTF-8 in DOCTYPE not faulted");
4966bd8f1dc3Sbluhm }
4967bd8f1dc3Sbluhm END_TEST
4968bd8f1dc3Sbluhm 
4969bd8f1dc3Sbluhm START_TEST(test_bad_doctype_utf16) {
4970bd8f1dc3Sbluhm   const char text[] =
4971bd8f1dc3Sbluhm       /* <!DOCTYPE doc [ \x06f2 ]><doc/>
4972bd8f1dc3Sbluhm        *
4973bd8f1dc3Sbluhm        * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number
4974bd8f1dc3Sbluhm        * (name character) but not a valid letter (name start character)
4975bd8f1dc3Sbluhm        */
4976bd8f1dc3Sbluhm       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 "
4977bd8f1dc3Sbluhm       "\x06\xf2"
4978bd8f1dc3Sbluhm       "\0 \0]\0>\0<\0d\0o\0c\0/\0>";
4979bd8f1dc3Sbluhm 
4980bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4981bd8f1dc3Sbluhm       != XML_STATUS_ERROR)
4982bd8f1dc3Sbluhm     fail("Invalid bytes in DOCTYPE not faulted");
4983bd8f1dc3Sbluhm   if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
4984bd8f1dc3Sbluhm     xml_failure(g_parser);
4985bd8f1dc3Sbluhm }
4986bd8f1dc3Sbluhm END_TEST
4987bd8f1dc3Sbluhm 
4988bd8f1dc3Sbluhm START_TEST(test_bad_doctype_plus) {
4989bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n"
4990bd8f1dc3Sbluhm                      "<1+>&foo;</1+>";
4991bd8f1dc3Sbluhm 
4992bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_INVALID_TOKEN,
4993bd8f1dc3Sbluhm                  "'+' in document name not faulted");
4994bd8f1dc3Sbluhm }
4995bd8f1dc3Sbluhm END_TEST
4996bd8f1dc3Sbluhm 
4997bd8f1dc3Sbluhm START_TEST(test_bad_doctype_star) {
4998bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n"
4999bd8f1dc3Sbluhm                      "<1*>&foo;</1*>";
5000bd8f1dc3Sbluhm 
5001bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5002bd8f1dc3Sbluhm                  "'*' in document name not faulted");
5003bd8f1dc3Sbluhm }
5004bd8f1dc3Sbluhm END_TEST
5005bd8f1dc3Sbluhm 
5006bd8f1dc3Sbluhm START_TEST(test_bad_doctype_query) {
5007bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n"
5008bd8f1dc3Sbluhm                      "<1?>&foo;</1?>";
5009bd8f1dc3Sbluhm 
5010bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5011bd8f1dc3Sbluhm                  "'?' in document name not faulted");
5012bd8f1dc3Sbluhm }
5013bd8f1dc3Sbluhm END_TEST
5014bd8f1dc3Sbluhm 
5015bd8f1dc3Sbluhm START_TEST(test_unknown_encoding_bad_ignore) {
5016bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='prefix-conv'?>"
5017bd8f1dc3Sbluhm                      "<!DOCTYPE doc SYSTEM 'foo'>"
5018bd8f1dc3Sbluhm                      "<doc><e>&entity;</e></doc>";
5019bd8f1dc3Sbluhm   ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>",
5020bd8f1dc3Sbluhm                      "Invalid character not faulted", XCS("prefix-conv"),
5021bd8f1dc3Sbluhm                      XML_ERROR_INVALID_TOKEN};
5022bd8f1dc3Sbluhm 
5023bd8f1dc3Sbluhm   XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
5024bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5025bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
5026bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &fault);
5027bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
5028bd8f1dc3Sbluhm                  "Bad IGNORE section with unknown encoding not failed");
5029bd8f1dc3Sbluhm }
5030bd8f1dc3Sbluhm END_TEST
5031bd8f1dc3Sbluhm 
5032bd8f1dc3Sbluhm START_TEST(test_entity_in_utf16_be_attr) {
5033bd8f1dc3Sbluhm   const char text[] =
5034bd8f1dc3Sbluhm       /* <e a='&#228; &#x00E4;'></e> */
5035bd8f1dc3Sbluhm       "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 "
5036bd8f1dc3Sbluhm       "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>";
5037bd8f1dc3Sbluhm #ifdef XML_UNICODE
5038bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\x00e4 \x00e4");
5039bd8f1dc3Sbluhm #else
5040bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5041bd8f1dc3Sbluhm #endif
5042bd8f1dc3Sbluhm   CharData storage;
5043bd8f1dc3Sbluhm 
5044bd8f1dc3Sbluhm   CharData_Init(&storage);
5045bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
5046bd8f1dc3Sbluhm   XML_SetStartElementHandler(g_parser, accumulate_attribute);
5047bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5048bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
5049bd8f1dc3Sbluhm     xml_failure(g_parser);
5050bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
5051bd8f1dc3Sbluhm }
5052bd8f1dc3Sbluhm END_TEST
5053bd8f1dc3Sbluhm 
5054bd8f1dc3Sbluhm START_TEST(test_entity_in_utf16_le_attr) {
5055bd8f1dc3Sbluhm   const char text[] =
5056bd8f1dc3Sbluhm       /* <e a='&#228; &#x00E4;'></e> */
5057bd8f1dc3Sbluhm       "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0"
5058bd8f1dc3Sbluhm       "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0";
5059bd8f1dc3Sbluhm #ifdef XML_UNICODE
5060bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\x00e4 \x00e4");
5061bd8f1dc3Sbluhm #else
5062bd8f1dc3Sbluhm   const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5063bd8f1dc3Sbluhm #endif
5064bd8f1dc3Sbluhm   CharData storage;
5065bd8f1dc3Sbluhm 
5066bd8f1dc3Sbluhm   CharData_Init(&storage);
5067bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
5068bd8f1dc3Sbluhm   XML_SetStartElementHandler(g_parser, accumulate_attribute);
5069bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5070bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
5071bd8f1dc3Sbluhm     xml_failure(g_parser);
5072bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
5073bd8f1dc3Sbluhm }
5074bd8f1dc3Sbluhm END_TEST
5075bd8f1dc3Sbluhm 
5076bd8f1dc3Sbluhm START_TEST(test_entity_public_utf16_be) {
5077bd8f1dc3Sbluhm   const char text[] =
5078bd8f1dc3Sbluhm       /* <!DOCTYPE d [ */
5079bd8f1dc3Sbluhm       "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
5080bd8f1dc3Sbluhm       /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5081bd8f1dc3Sbluhm       "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 "
5082bd8f1dc3Sbluhm       "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n"
5083bd8f1dc3Sbluhm       /* %e; */
5084bd8f1dc3Sbluhm       "\0%\0e\0;\0\n"
5085bd8f1dc3Sbluhm       /* ]> */
5086bd8f1dc3Sbluhm       "\0]\0>\0\n"
5087bd8f1dc3Sbluhm       /* <d>&j;</d> */
5088bd8f1dc3Sbluhm       "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>";
5089bd8f1dc3Sbluhm   ExtTest2 test_data
5090bd8f1dc3Sbluhm       = {/* <!ENTITY j 'baz'> */
5091bd8f1dc3Sbluhm          "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL};
5092bd8f1dc3Sbluhm   const XML_Char *expected = XCS("baz");
5093bd8f1dc3Sbluhm   CharData storage;
5094bd8f1dc3Sbluhm 
5095bd8f1dc3Sbluhm   CharData_Init(&storage);
5096bd8f1dc3Sbluhm   test_data.storage = &storage;
5097bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5098bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5099bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &test_data);
5100bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5101bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5102bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
5103bd8f1dc3Sbluhm     xml_failure(g_parser);
5104bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
5105bd8f1dc3Sbluhm }
5106bd8f1dc3Sbluhm END_TEST
5107bd8f1dc3Sbluhm 
5108bd8f1dc3Sbluhm START_TEST(test_entity_public_utf16_le) {
5109bd8f1dc3Sbluhm   const char text[] =
5110bd8f1dc3Sbluhm       /* <!DOCTYPE d [ */
5111bd8f1dc3Sbluhm       "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0"
5112bd8f1dc3Sbluhm       /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5113bd8f1dc3Sbluhm       "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0"
5114bd8f1dc3Sbluhm       "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0"
5115bd8f1dc3Sbluhm       /* %e; */
5116bd8f1dc3Sbluhm       "%\0e\0;\0\n\0"
5117bd8f1dc3Sbluhm       /* ]> */
5118bd8f1dc3Sbluhm       "]\0>\0\n\0"
5119bd8f1dc3Sbluhm       /* <d>&j;</d> */
5120bd8f1dc3Sbluhm       "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0";
5121bd8f1dc3Sbluhm   ExtTest2 test_data
5122bd8f1dc3Sbluhm       = {/* <!ENTITY j 'baz'> */
5123bd8f1dc3Sbluhm          "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL};
5124bd8f1dc3Sbluhm   const XML_Char *expected = XCS("baz");
5125bd8f1dc3Sbluhm   CharData storage;
5126bd8f1dc3Sbluhm 
5127bd8f1dc3Sbluhm   CharData_Init(&storage);
5128bd8f1dc3Sbluhm   test_data.storage = &storage;
5129bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5130bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5131bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &test_data);
5132bd8f1dc3Sbluhm   XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5133bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5134bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
5135bd8f1dc3Sbluhm     xml_failure(g_parser);
5136bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
5137bd8f1dc3Sbluhm }
5138bd8f1dc3Sbluhm END_TEST
5139bd8f1dc3Sbluhm 
5140bd8f1dc3Sbluhm /* Test that a doctype with neither an internal nor external subset is
5141bd8f1dc3Sbluhm  * faulted
5142bd8f1dc3Sbluhm  */
5143bd8f1dc3Sbluhm START_TEST(test_short_doctype) {
5144bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc></doc>";
5145bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_INVALID_TOKEN,
5146bd8f1dc3Sbluhm                  "DOCTYPE without subset not rejected");
5147bd8f1dc3Sbluhm }
5148bd8f1dc3Sbluhm END_TEST
5149bd8f1dc3Sbluhm 
5150bd8f1dc3Sbluhm START_TEST(test_short_doctype_2) {
5151bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc PUBLIC></doc>";
5152bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_SYNTAX,
5153bd8f1dc3Sbluhm                  "DOCTYPE without Public ID not rejected");
5154bd8f1dc3Sbluhm }
5155bd8f1dc3Sbluhm END_TEST
5156bd8f1dc3Sbluhm 
5157bd8f1dc3Sbluhm START_TEST(test_short_doctype_3) {
5158bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc SYSTEM></doc>";
5159bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_SYNTAX,
5160bd8f1dc3Sbluhm                  "DOCTYPE without System ID not rejected");
5161bd8f1dc3Sbluhm }
5162bd8f1dc3Sbluhm END_TEST
5163bd8f1dc3Sbluhm 
5164bd8f1dc3Sbluhm START_TEST(test_long_doctype) {
5165bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>";
5166bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected");
5167bd8f1dc3Sbluhm }
5168bd8f1dc3Sbluhm END_TEST
5169bd8f1dc3Sbluhm 
5170bd8f1dc3Sbluhm START_TEST(test_bad_entity) {
5171bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
5172bd8f1dc3Sbluhm                      "  <!ENTITY foo PUBLIC>\n"
5173bd8f1dc3Sbluhm                      "]>\n"
5174bd8f1dc3Sbluhm                      "<doc/>";
5175bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_SYNTAX,
5176bd8f1dc3Sbluhm                  "ENTITY without Public ID is not rejected");
5177bd8f1dc3Sbluhm }
5178bd8f1dc3Sbluhm END_TEST
5179bd8f1dc3Sbluhm 
5180bd8f1dc3Sbluhm /* Test unquoted value is faulted */
5181bd8f1dc3Sbluhm START_TEST(test_bad_entity_2) {
5182bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
5183bd8f1dc3Sbluhm                      "  <!ENTITY % foo bar>\n"
5184bd8f1dc3Sbluhm                      "]>\n"
5185bd8f1dc3Sbluhm                      "<doc/>";
5186bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_SYNTAX,
5187bd8f1dc3Sbluhm                  "ENTITY without Public ID is not rejected");
5188bd8f1dc3Sbluhm }
5189bd8f1dc3Sbluhm END_TEST
5190bd8f1dc3Sbluhm 
5191bd8f1dc3Sbluhm START_TEST(test_bad_entity_3) {
5192bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
5193bd8f1dc3Sbluhm                      "  <!ENTITY % foo PUBLIC>\n"
5194bd8f1dc3Sbluhm                      "]>\n"
5195bd8f1dc3Sbluhm                      "<doc/>";
5196bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_SYNTAX,
5197bd8f1dc3Sbluhm                  "Parameter ENTITY without Public ID is not rejected");
5198bd8f1dc3Sbluhm }
5199bd8f1dc3Sbluhm END_TEST
5200bd8f1dc3Sbluhm 
5201bd8f1dc3Sbluhm START_TEST(test_bad_entity_4) {
5202bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
5203bd8f1dc3Sbluhm                      "  <!ENTITY % foo SYSTEM>\n"
5204bd8f1dc3Sbluhm                      "]>\n"
5205bd8f1dc3Sbluhm                      "<doc/>";
5206bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_SYNTAX,
5207bd8f1dc3Sbluhm                  "Parameter ENTITY without Public ID is not rejected");
5208bd8f1dc3Sbluhm }
5209bd8f1dc3Sbluhm END_TEST
5210bd8f1dc3Sbluhm 
5211bd8f1dc3Sbluhm START_TEST(test_bad_notation) {
5212bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc [\n"
5213bd8f1dc3Sbluhm                      "  <!NOTATION n SYSTEM>\n"
5214bd8f1dc3Sbluhm                      "]>\n"
5215bd8f1dc3Sbluhm                      "<doc/>";
5216bd8f1dc3Sbluhm   expect_failure(text, XML_ERROR_SYNTAX,
5217bd8f1dc3Sbluhm                  "Notation without System ID is not rejected");
5218bd8f1dc3Sbluhm }
5219bd8f1dc3Sbluhm END_TEST
5220bd8f1dc3Sbluhm 
5221bd8f1dc3Sbluhm /* Test for issue #11, wrongly suppressed default handler */
5222bd8f1dc3Sbluhm START_TEST(test_default_doctype_handler) {
5223bd8f1dc3Sbluhm   const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n"
5224bd8f1dc3Sbluhm                      "  <!ENTITY foo 'bar'>\n"
5225bd8f1dc3Sbluhm                      "]>\n"
5226bd8f1dc3Sbluhm                      "<doc>&foo;</doc>";
5227bd8f1dc3Sbluhm   DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE},
5228bd8f1dc3Sbluhm                               {XCS("'test.dtd'"), 10, XML_FALSE},
5229bd8f1dc3Sbluhm                               {NULL, 0, XML_FALSE}};
5230bd8f1dc3Sbluhm   int i;
5231bd8f1dc3Sbluhm 
5232bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &test_data);
5233bd8f1dc3Sbluhm   XML_SetDefaultHandler(g_parser, checking_default_handler);
5234bd8f1dc3Sbluhm   XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
5235bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5236bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
5237bd8f1dc3Sbluhm     xml_failure(g_parser);
5238bd8f1dc3Sbluhm   for (i = 0; test_data[i].expected != NULL; i++)
5239bd8f1dc3Sbluhm     if (! test_data[i].seen)
5240bd8f1dc3Sbluhm       fail("Default handler not run for public !DOCTYPE");
5241bd8f1dc3Sbluhm }
5242bd8f1dc3Sbluhm END_TEST
5243bd8f1dc3Sbluhm 
5244bd8f1dc3Sbluhm START_TEST(test_empty_element_abort) {
5245bd8f1dc3Sbluhm   const char *text = "<abort/>";
5246bd8f1dc3Sbluhm 
5247bd8f1dc3Sbluhm   XML_SetStartElementHandler(g_parser, start_element_suspender);
5248bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5249bd8f1dc3Sbluhm       != XML_STATUS_ERROR)
5250bd8f1dc3Sbluhm     fail("Expected to error on abort");
5251bd8f1dc3Sbluhm }
5252bd8f1dc3Sbluhm END_TEST
5253bd8f1dc3Sbluhm 
5254bd8f1dc3Sbluhm /* Regression test for GH issue #612: unfinished m_declAttributeType
5255bd8f1dc3Sbluhm  * allocation in ->m_tempPool can corrupt following allocation.
5256bd8f1dc3Sbluhm  */
5257bd8f1dc3Sbluhm START_TEST(test_pool_integrity_with_unfinished_attr) {
5258bd8f1dc3Sbluhm   const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n"
5259bd8f1dc3Sbluhm                      "<!DOCTYPE foo [\n"
5260bd8f1dc3Sbluhm                      "<!ELEMENT foo ANY>\n"
5261bd8f1dc3Sbluhm                      "<!ENTITY % entp SYSTEM \"external.dtd\">\n"
5262bd8f1dc3Sbluhm                      "%entp;\n"
5263bd8f1dc3Sbluhm                      "]>\n"
5264bd8f1dc3Sbluhm                      "<a></a>\n";
5265bd8f1dc3Sbluhm   const XML_Char *expected = XCS("COMMENT");
5266bd8f1dc3Sbluhm   CharData storage;
5267bd8f1dc3Sbluhm 
5268bd8f1dc3Sbluhm   CharData_Init(&storage);
5269bd8f1dc3Sbluhm   XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5270bd8f1dc3Sbluhm   XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist);
5271bd8f1dc3Sbluhm   XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
5272bd8f1dc3Sbluhm   XML_SetCommentHandler(g_parser, accumulate_comment);
5273bd8f1dc3Sbluhm   XML_SetUserData(g_parser, &storage);
5274bd8f1dc3Sbluhm   if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5275bd8f1dc3Sbluhm       == XML_STATUS_ERROR)
5276bd8f1dc3Sbluhm     xml_failure(g_parser);
5277bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
5278bd8f1dc3Sbluhm }
5279bd8f1dc3Sbluhm END_TEST
5280bd8f1dc3Sbluhm 
5281bd8f1dc3Sbluhm START_TEST(test_nested_entity_suspend) {
5282bd8f1dc3Sbluhm   const char *const text = "<!DOCTYPE a [\n"
5283bd8f1dc3Sbluhm                            "  <!ENTITY e1 '<!--e1-->'>\n"
5284bd8f1dc3Sbluhm                            "  <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n"
5285bd8f1dc3Sbluhm                            "  <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n"
5286bd8f1dc3Sbluhm                            "]>\n"
5287bd8f1dc3Sbluhm                            "<a><!--start-->&e3;<!--end--></a>";
5288bd8f1dc3Sbluhm   const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head")
5289bd8f1dc3Sbluhm       XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end");
5290bd8f1dc3Sbluhm   CharData storage;
5291bd8f1dc3Sbluhm   CharData_Init(&storage);
5292bd8f1dc3Sbluhm   XML_Parser parser = XML_ParserCreate(NULL);
5293bd8f1dc3Sbluhm   ParserPlusStorage parserPlusStorage = {parser, &storage};
5294bd8f1dc3Sbluhm 
5295bd8f1dc3Sbluhm   XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5296bd8f1dc3Sbluhm   XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler);
5297bd8f1dc3Sbluhm   XML_SetUserData(parser, &parserPlusStorage);
5298bd8f1dc3Sbluhm 
5299bd8f1dc3Sbluhm   enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5300bd8f1dc3Sbluhm   while (status == XML_STATUS_SUSPENDED) {
5301bd8f1dc3Sbluhm     status = XML_ResumeParser(parser);
5302bd8f1dc3Sbluhm   }
5303bd8f1dc3Sbluhm   if (status != XML_STATUS_OK)
5304bd8f1dc3Sbluhm     xml_failure(parser);
5305bd8f1dc3Sbluhm 
5306bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, expected);
5307bd8f1dc3Sbluhm   XML_ParserFree(parser);
5308bd8f1dc3Sbluhm }
5309bd8f1dc3Sbluhm END_TEST
5310bd8f1dc3Sbluhm 
5311f558d286Sbluhm #if defined(XML_TESTING)
5312bd8f1dc3Sbluhm /* Regression test for quadratic parsing on large tokens */
5313c033f770Sbluhm START_TEST(test_big_tokens_scale_linearly) {
5314bd8f1dc3Sbluhm   const struct {
5315bd8f1dc3Sbluhm     const char *pre;
5316bd8f1dc3Sbluhm     const char *post;
5317bd8f1dc3Sbluhm   } text[] = {
5318bd8f1dc3Sbluhm       {"<a>", "</a>"},                      // assumed good, used as baseline
5319bd8f1dc3Sbluhm       {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch
5320bd8f1dc3Sbluhm       {"<c attr='", "'></c>"},              // big attribute, used to be O(N²)
5321bd8f1dc3Sbluhm       {"<d><!-- ", " --></d>"},             // long comment, used to be O(N²)
5322bd8f1dc3Sbluhm       {"<e><", "/></e>"},                   // big elem name, used to be O(N²)
5323bd8f1dc3Sbluhm   };
5324bd8f1dc3Sbluhm   const int num_cases = sizeof(text) / sizeof(text[0]);
5325bd8f1dc3Sbluhm   char aaaaaa[4096];
5326bd8f1dc3Sbluhm   const int fillsize = (int)sizeof(aaaaaa);
5327bd8f1dc3Sbluhm   const int fillcount = 100;
5328c033f770Sbluhm   const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post.
5329c033f770Sbluhm   const unsigned max_factor = 4;
5330c033f770Sbluhm   const unsigned max_scanned = max_factor * approx_bytes;
5331bd8f1dc3Sbluhm 
5332bd8f1dc3Sbluhm   memset(aaaaaa, 'a', fillsize);
5333bd8f1dc3Sbluhm 
5334bd8f1dc3Sbluhm   if (! g_reparseDeferralEnabledDefault) {
5335bd8f1dc3Sbluhm     return; // heuristic is disabled; we would get O(n^2) and fail.
5336bd8f1dc3Sbluhm   }
5337bd8f1dc3Sbluhm 
5338bd8f1dc3Sbluhm   for (int i = 0; i < num_cases; ++i) {
5339bd8f1dc3Sbluhm     XML_Parser parser = XML_ParserCreate(NULL);
5340bd8f1dc3Sbluhm     assert_true(parser != NULL);
5341bd8f1dc3Sbluhm     enum XML_Status status;
5342c033f770Sbluhm     set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post);
5343bd8f1dc3Sbluhm 
5344bd8f1dc3Sbluhm     // parse the start text
5345c033f770Sbluhm     g_bytesScanned = 0;
5346bd8f1dc3Sbluhm     status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
5347bd8f1dc3Sbluhm                                      (int)strlen(text[i].pre), XML_FALSE);
5348bd8f1dc3Sbluhm     if (status != XML_STATUS_OK) {
5349bd8f1dc3Sbluhm       xml_failure(parser);
5350bd8f1dc3Sbluhm     }
5351c033f770Sbluhm 
5352bd8f1dc3Sbluhm     // parse lots of 'a', failing the test early if it takes too long
5353c033f770Sbluhm     unsigned past_max_count = 0;
5354bd8f1dc3Sbluhm     for (int f = 0; f < fillcount; ++f) {
5355bd8f1dc3Sbluhm       status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
5356bd8f1dc3Sbluhm       if (status != XML_STATUS_OK) {
5357bd8f1dc3Sbluhm         xml_failure(parser);
5358bd8f1dc3Sbluhm       }
5359c033f770Sbluhm       if (g_bytesScanned > max_scanned) {
5360c033f770Sbluhm         // We're not done, and have already passed the limit -- the test will
5361c033f770Sbluhm         // definitely fail. This block allows us to save time by failing early.
5362c033f770Sbluhm         const unsigned pushed
5363c033f770Sbluhm             = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize;
5364bd8f1dc3Sbluhm         fprintf(
5365bd8f1dc3Sbluhm             stderr,
5366c033f770Sbluhm             "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5367c033f770Sbluhm             f + 1, fillcount, pushed, g_bytesScanned,
5368c033f770Sbluhm             g_bytesScanned / (double)pushed, max_scanned, max_factor);
5369c033f770Sbluhm         past_max_count++;
5370c033f770Sbluhm         // We are failing, but allow a few log prints first. If we don't reach
5371c033f770Sbluhm         // a count of five, the test will fail after the loop instead.
5372c033f770Sbluhm         assert_true(past_max_count < 5);
5373bd8f1dc3Sbluhm       }
5374bd8f1dc3Sbluhm     }
5375c033f770Sbluhm 
5376bd8f1dc3Sbluhm     // parse the end text
5377bd8f1dc3Sbluhm     status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
5378bd8f1dc3Sbluhm                                      (int)strlen(text[i].post), XML_TRUE);
5379bd8f1dc3Sbluhm     if (status != XML_STATUS_OK) {
5380bd8f1dc3Sbluhm       xml_failure(parser);
5381bd8f1dc3Sbluhm     }
5382bd8f1dc3Sbluhm 
5383c033f770Sbluhm     assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working
5384c033f770Sbluhm     if (g_bytesScanned > max_scanned) {
5385c033f770Sbluhm       fprintf(
5386c033f770Sbluhm           stderr,
5387c033f770Sbluhm           "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5388c033f770Sbluhm           g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned,
5389c033f770Sbluhm           max_factor);
5390c033f770Sbluhm       fail("scanned too many bytes");
5391bd8f1dc3Sbluhm     }
5392bd8f1dc3Sbluhm 
5393bd8f1dc3Sbluhm     XML_ParserFree(parser);
5394bd8f1dc3Sbluhm   }
5395bd8f1dc3Sbluhm }
5396bd8f1dc3Sbluhm END_TEST
5397f558d286Sbluhm #endif
5398bd8f1dc3Sbluhm 
5399bd8f1dc3Sbluhm START_TEST(test_set_reparse_deferral) {
5400bd8f1dc3Sbluhm   const char *const pre = "<d>";
5401bd8f1dc3Sbluhm   const char *const start = "<x attr='";
5402bd8f1dc3Sbluhm   const char *const end = "'></x>";
5403bd8f1dc3Sbluhm   char eeeeee[100];
5404bd8f1dc3Sbluhm   const int fillsize = (int)sizeof(eeeeee);
5405bd8f1dc3Sbluhm   memset(eeeeee, 'e', fillsize);
5406bd8f1dc3Sbluhm 
5407bd8f1dc3Sbluhm   for (int enabled = 0; enabled <= 1; enabled += 1) {
5408bd8f1dc3Sbluhm     set_subtest("deferral=%d", enabled);
5409bd8f1dc3Sbluhm 
5410bd8f1dc3Sbluhm     XML_Parser parser = XML_ParserCreate(NULL);
5411bd8f1dc3Sbluhm     assert_true(parser != NULL);
5412bd8f1dc3Sbluhm     assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5413bd8f1dc3Sbluhm     // pre-grow the buffer to avoid reparsing due to almost-fullness
5414bd8f1dc3Sbluhm     assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5415bd8f1dc3Sbluhm 
5416bd8f1dc3Sbluhm     CharData storage;
5417bd8f1dc3Sbluhm     CharData_Init(&storage);
5418bd8f1dc3Sbluhm     XML_SetUserData(parser, &storage);
5419bd8f1dc3Sbluhm     XML_SetStartElementHandler(parser, start_element_event_handler);
5420bd8f1dc3Sbluhm 
5421bd8f1dc3Sbluhm     enum XML_Status status;
5422bd8f1dc3Sbluhm     // parse the start text
5423bd8f1dc3Sbluhm     status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5424bd8f1dc3Sbluhm     if (status != XML_STATUS_OK) {
5425bd8f1dc3Sbluhm       xml_failure(parser);
5426bd8f1dc3Sbluhm     }
5427bd8f1dc3Sbluhm     CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5428bd8f1dc3Sbluhm 
5429bd8f1dc3Sbluhm     // ..and the start of the token
5430bd8f1dc3Sbluhm     status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5431bd8f1dc3Sbluhm     if (status != XML_STATUS_OK) {
5432bd8f1dc3Sbluhm       xml_failure(parser);
5433bd8f1dc3Sbluhm     }
5434bd8f1dc3Sbluhm     CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one
5435bd8f1dc3Sbluhm 
5436bd8f1dc3Sbluhm     // try to parse lots of 'e', but the token isn't finished
5437bd8f1dc3Sbluhm     for (int c = 0; c < 100; ++c) {
5438bd8f1dc3Sbluhm       status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5439bd8f1dc3Sbluhm       if (status != XML_STATUS_OK) {
5440bd8f1dc3Sbluhm         xml_failure(parser);
5441bd8f1dc3Sbluhm       }
5442bd8f1dc3Sbluhm     }
5443bd8f1dc3Sbluhm     CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5444bd8f1dc3Sbluhm 
5445bd8f1dc3Sbluhm     // end the <x> token.
5446bd8f1dc3Sbluhm     status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5447bd8f1dc3Sbluhm     if (status != XML_STATUS_OK) {
5448bd8f1dc3Sbluhm       xml_failure(parser);
5449bd8f1dc3Sbluhm     }
5450bd8f1dc3Sbluhm 
5451bd8f1dc3Sbluhm     if (enabled) {
5452bd8f1dc3Sbluhm       // In general, we may need to push more data to trigger a reparse attempt,
5453bd8f1dc3Sbluhm       // but in this test, the data is constructed to always require it.
5454bd8f1dc3Sbluhm       CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect
5455bd8f1dc3Sbluhm       // 2x the token length should suffice; the +1 covers the start and end.
5456bd8f1dc3Sbluhm       for (int c = 0; c < 101; ++c) {
5457bd8f1dc3Sbluhm         status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5458bd8f1dc3Sbluhm         if (status != XML_STATUS_OK) {
5459bd8f1dc3Sbluhm           xml_failure(parser);
5460bd8f1dc3Sbluhm         }
5461bd8f1dc3Sbluhm       }
5462bd8f1dc3Sbluhm     }
5463bd8f1dc3Sbluhm     CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done
5464bd8f1dc3Sbluhm 
5465bd8f1dc3Sbluhm     XML_ParserFree(parser);
5466bd8f1dc3Sbluhm   }
5467bd8f1dc3Sbluhm }
5468bd8f1dc3Sbluhm END_TEST
5469bd8f1dc3Sbluhm 
5470bd8f1dc3Sbluhm struct element_decl_data {
5471bd8f1dc3Sbluhm   XML_Parser parser;
5472bd8f1dc3Sbluhm   int count;
5473bd8f1dc3Sbluhm };
5474bd8f1dc3Sbluhm 
5475bd8f1dc3Sbluhm static void
5476bd8f1dc3Sbluhm element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) {
5477bd8f1dc3Sbluhm   UNUSED_P(name);
5478bd8f1dc3Sbluhm   struct element_decl_data *testdata = (struct element_decl_data *)userData;
5479bd8f1dc3Sbluhm   testdata->count += 1;
5480bd8f1dc3Sbluhm   XML_FreeContentModel(testdata->parser, model);
5481bd8f1dc3Sbluhm }
5482bd8f1dc3Sbluhm 
5483bd8f1dc3Sbluhm static int
5484bd8f1dc3Sbluhm external_inherited_parser(XML_Parser p, const XML_Char *context,
5485bd8f1dc3Sbluhm                           const XML_Char *base, const XML_Char *systemId,
5486bd8f1dc3Sbluhm                           const XML_Char *publicId) {
5487bd8f1dc3Sbluhm   UNUSED_P(base);
5488bd8f1dc3Sbluhm   UNUSED_P(systemId);
5489bd8f1dc3Sbluhm   UNUSED_P(publicId);
5490bd8f1dc3Sbluhm   const char *const pre = "<!ELEMENT document ANY>\n";
5491bd8f1dc3Sbluhm   const char *const start = "<!ELEMENT ";
5492bd8f1dc3Sbluhm   const char *const end = " ANY>\n";
5493bd8f1dc3Sbluhm   const char *const post = "<!ELEMENT xyz ANY>\n";
5494bd8f1dc3Sbluhm   const int enabled = *(int *)XML_GetUserData(p);
5495bd8f1dc3Sbluhm   char eeeeee[100];
5496bd8f1dc3Sbluhm   char spaces[100];
5497bd8f1dc3Sbluhm   const int fillsize = (int)sizeof(eeeeee);
5498bd8f1dc3Sbluhm   assert_true(fillsize == (int)sizeof(spaces));
5499bd8f1dc3Sbluhm   memset(eeeeee, 'e', fillsize);
5500bd8f1dc3Sbluhm   memset(spaces, ' ', fillsize);
5501bd8f1dc3Sbluhm 
5502bd8f1dc3Sbluhm   XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL);
5503bd8f1dc3Sbluhm   assert_true(parser != NULL);
5504bd8f1dc3Sbluhm   // pre-grow the buffer to avoid reparsing due to almost-fullness
5505bd8f1dc3Sbluhm   assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5506bd8f1dc3Sbluhm 
5507bd8f1dc3Sbluhm   struct element_decl_data testdata;
5508bd8f1dc3Sbluhm   testdata.parser = parser;
5509bd8f1dc3Sbluhm   testdata.count = 0;
5510bd8f1dc3Sbluhm   XML_SetUserData(parser, &testdata);
5511bd8f1dc3Sbluhm   XML_SetElementDeclHandler(parser, element_decl_counter);
5512bd8f1dc3Sbluhm 
5513bd8f1dc3Sbluhm   enum XML_Status status;
5514bd8f1dc3Sbluhm   // parse the initial text
5515bd8f1dc3Sbluhm   status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5516bd8f1dc3Sbluhm   if (status != XML_STATUS_OK) {
5517bd8f1dc3Sbluhm     xml_failure(parser);
5518bd8f1dc3Sbluhm   }
5519bd8f1dc3Sbluhm   assert_true(testdata.count == 1); // first element should be done
5520bd8f1dc3Sbluhm 
5521bd8f1dc3Sbluhm   // ..and the start of the big token
5522bd8f1dc3Sbluhm   status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5523bd8f1dc3Sbluhm   if (status != XML_STATUS_OK) {
5524bd8f1dc3Sbluhm     xml_failure(parser);
5525bd8f1dc3Sbluhm   }
5526bd8f1dc3Sbluhm   assert_true(testdata.count == 1); // still just the first one
5527bd8f1dc3Sbluhm 
5528bd8f1dc3Sbluhm   // try to parse lots of 'e', but the token isn't finished
5529bd8f1dc3Sbluhm   for (int c = 0; c < 100; ++c) {
5530bd8f1dc3Sbluhm     status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5531bd8f1dc3Sbluhm     if (status != XML_STATUS_OK) {
5532bd8f1dc3Sbluhm       xml_failure(parser);
5533bd8f1dc3Sbluhm     }
5534bd8f1dc3Sbluhm   }
5535bd8f1dc3Sbluhm   assert_true(testdata.count == 1); // *still* just the first one
5536bd8f1dc3Sbluhm 
5537bd8f1dc3Sbluhm   // end the big token.
5538bd8f1dc3Sbluhm   status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5539bd8f1dc3Sbluhm   if (status != XML_STATUS_OK) {
5540bd8f1dc3Sbluhm     xml_failure(parser);
5541bd8f1dc3Sbluhm   }
5542bd8f1dc3Sbluhm 
5543bd8f1dc3Sbluhm   if (enabled) {
5544bd8f1dc3Sbluhm     // In general, we may need to push more data to trigger a reparse attempt,
5545bd8f1dc3Sbluhm     // but in this test, the data is constructed to always require it.
5546bd8f1dc3Sbluhm     assert_true(testdata.count == 1); // or the test is incorrect
5547bd8f1dc3Sbluhm     // 2x the token length should suffice; the +1 covers the start and end.
5548bd8f1dc3Sbluhm     for (int c = 0; c < 101; ++c) {
5549bd8f1dc3Sbluhm       status = XML_Parse(parser, spaces, fillsize, XML_FALSE);
5550bd8f1dc3Sbluhm       if (status != XML_STATUS_OK) {
5551bd8f1dc3Sbluhm         xml_failure(parser);
5552bd8f1dc3Sbluhm       }
5553bd8f1dc3Sbluhm     }
5554bd8f1dc3Sbluhm   }
5555bd8f1dc3Sbluhm   assert_true(testdata.count == 2); // the big token should be done
5556bd8f1dc3Sbluhm 
5557bd8f1dc3Sbluhm   // parse the final text
5558bd8f1dc3Sbluhm   status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE);
5559bd8f1dc3Sbluhm   if (status != XML_STATUS_OK) {
5560bd8f1dc3Sbluhm     xml_failure(parser);
5561bd8f1dc3Sbluhm   }
5562bd8f1dc3Sbluhm   assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done
5563bd8f1dc3Sbluhm 
5564bd8f1dc3Sbluhm   XML_ParserFree(parser);
5565bd8f1dc3Sbluhm   return XML_STATUS_OK;
5566bd8f1dc3Sbluhm }
5567bd8f1dc3Sbluhm 
5568bd8f1dc3Sbluhm START_TEST(test_reparse_deferral_is_inherited) {
5569bd8f1dc3Sbluhm   const char *const text
5570bd8f1dc3Sbluhm       = "<!DOCTYPE document SYSTEM 'something.ext'><document/>";
5571bd8f1dc3Sbluhm   for (int enabled = 0; enabled <= 1; ++enabled) {
5572bd8f1dc3Sbluhm     set_subtest("deferral=%d", enabled);
5573bd8f1dc3Sbluhm 
5574bd8f1dc3Sbluhm     XML_Parser parser = XML_ParserCreate(NULL);
5575bd8f1dc3Sbluhm     assert_true(parser != NULL);
5576bd8f1dc3Sbluhm     XML_SetUserData(parser, (void *)&enabled);
5577bd8f1dc3Sbluhm     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5578bd8f1dc3Sbluhm     // this handler creates a sub-parser and checks that its deferral behavior
5579bd8f1dc3Sbluhm     // is what we expected, based on the value of `enabled` (in userdata).
5580bd8f1dc3Sbluhm     XML_SetExternalEntityRefHandler(parser, external_inherited_parser);
5581bd8f1dc3Sbluhm     assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5582bd8f1dc3Sbluhm     if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
5583bd8f1dc3Sbluhm       xml_failure(parser);
5584bd8f1dc3Sbluhm 
5585bd8f1dc3Sbluhm     XML_ParserFree(parser);
5586bd8f1dc3Sbluhm   }
5587bd8f1dc3Sbluhm }
5588bd8f1dc3Sbluhm END_TEST
5589bd8f1dc3Sbluhm 
5590bd8f1dc3Sbluhm START_TEST(test_set_reparse_deferral_on_null_parser) {
5591bd8f1dc3Sbluhm   assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE);
5592bd8f1dc3Sbluhm   assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE);
5593bd8f1dc3Sbluhm   assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE);
5594bd8f1dc3Sbluhm   assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE);
5595bd8f1dc3Sbluhm   assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN)
5596bd8f1dc3Sbluhm               == XML_FALSE);
5597bd8f1dc3Sbluhm   assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX)
5598bd8f1dc3Sbluhm               == XML_FALSE);
5599bd8f1dc3Sbluhm }
5600bd8f1dc3Sbluhm END_TEST
5601bd8f1dc3Sbluhm 
5602bd8f1dc3Sbluhm START_TEST(test_set_reparse_deferral_on_the_fly) {
5603bd8f1dc3Sbluhm   const char *const pre = "<d><x attr='";
5604bd8f1dc3Sbluhm   const char *const end = "'></x>";
5605bd8f1dc3Sbluhm   char iiiiii[100];
5606bd8f1dc3Sbluhm   const int fillsize = (int)sizeof(iiiiii);
5607bd8f1dc3Sbluhm   memset(iiiiii, 'i', fillsize);
5608bd8f1dc3Sbluhm 
5609bd8f1dc3Sbluhm   XML_Parser parser = XML_ParserCreate(NULL);
5610bd8f1dc3Sbluhm   assert_true(parser != NULL);
5611bd8f1dc3Sbluhm   assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE));
5612bd8f1dc3Sbluhm 
5613bd8f1dc3Sbluhm   CharData storage;
5614bd8f1dc3Sbluhm   CharData_Init(&storage);
5615bd8f1dc3Sbluhm   XML_SetUserData(parser, &storage);
5616bd8f1dc3Sbluhm   XML_SetStartElementHandler(parser, start_element_event_handler);
5617bd8f1dc3Sbluhm 
5618bd8f1dc3Sbluhm   enum XML_Status status;
5619bd8f1dc3Sbluhm   // parse the start text
5620bd8f1dc3Sbluhm   status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5621bd8f1dc3Sbluhm   if (status != XML_STATUS_OK) {
5622bd8f1dc3Sbluhm     xml_failure(parser);
5623bd8f1dc3Sbluhm   }
5624bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5625bd8f1dc3Sbluhm 
5626bd8f1dc3Sbluhm   // try to parse some 'i', but the token isn't finished
5627bd8f1dc3Sbluhm   status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE);
5628bd8f1dc3Sbluhm   if (status != XML_STATUS_OK) {
5629bd8f1dc3Sbluhm     xml_failure(parser);
5630bd8f1dc3Sbluhm   }
5631bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5632bd8f1dc3Sbluhm 
5633bd8f1dc3Sbluhm   // end the <x> token.
5634bd8f1dc3Sbluhm   status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5635bd8f1dc3Sbluhm   if (status != XML_STATUS_OK) {
5636bd8f1dc3Sbluhm     xml_failure(parser);
5637bd8f1dc3Sbluhm   }
5638bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, XCS("d")); // not yet.
5639bd8f1dc3Sbluhm 
5640bd8f1dc3Sbluhm   // now change the heuristic setting and add *no* data
5641bd8f1dc3Sbluhm   assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE));
5642bd8f1dc3Sbluhm   // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic.
5643bd8f1dc3Sbluhm   status = XML_Parse(parser, "", 0, XML_FALSE);
5644bd8f1dc3Sbluhm   if (status != XML_STATUS_OK) {
5645bd8f1dc3Sbluhm     xml_failure(parser);
5646bd8f1dc3Sbluhm   }
5647bd8f1dc3Sbluhm   CharData_CheckXMLChars(&storage, XCS("dx"));
5648bd8f1dc3Sbluhm 
5649bd8f1dc3Sbluhm   XML_ParserFree(parser);
5650bd8f1dc3Sbluhm }
5651bd8f1dc3Sbluhm END_TEST
5652bd8f1dc3Sbluhm 
5653bd8f1dc3Sbluhm START_TEST(test_set_bad_reparse_option) {
5654bd8f1dc3Sbluhm   XML_Parser parser = XML_ParserCreate(NULL);
5655bd8f1dc3Sbluhm   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2));
5656bd8f1dc3Sbluhm   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3));
5657bd8f1dc3Sbluhm   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99));
5658bd8f1dc3Sbluhm   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127));
5659bd8f1dc3Sbluhm   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128));
5660bd8f1dc3Sbluhm   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129));
5661bd8f1dc3Sbluhm   assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255));
5662bd8f1dc3Sbluhm   assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0));
5663bd8f1dc3Sbluhm   assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1));
5664bd8f1dc3Sbluhm   XML_ParserFree(parser);
5665bd8f1dc3Sbluhm }
5666bd8f1dc3Sbluhm END_TEST
5667bd8f1dc3Sbluhm 
5668bd8f1dc3Sbluhm static size_t g_totalAlloc = 0;
5669bd8f1dc3Sbluhm static size_t g_biggestAlloc = 0;
5670bd8f1dc3Sbluhm 
5671bd8f1dc3Sbluhm static void *
5672bd8f1dc3Sbluhm counting_realloc(void *ptr, size_t size) {
5673bd8f1dc3Sbluhm   g_totalAlloc += size;
5674bd8f1dc3Sbluhm   if (size > g_biggestAlloc) {
5675bd8f1dc3Sbluhm     g_biggestAlloc = size;
5676bd8f1dc3Sbluhm   }
5677bd8f1dc3Sbluhm   return realloc(ptr, size);
5678bd8f1dc3Sbluhm }
5679bd8f1dc3Sbluhm 
5680bd8f1dc3Sbluhm static void *
5681bd8f1dc3Sbluhm counting_malloc(size_t size) {
5682bd8f1dc3Sbluhm   return counting_realloc(NULL, size);
5683bd8f1dc3Sbluhm }
5684bd8f1dc3Sbluhm 
5685bd8f1dc3Sbluhm START_TEST(test_bypass_heuristic_when_close_to_bufsize) {
5686bd8f1dc3Sbluhm   if (g_chunkSize != 0) {
5687bd8f1dc3Sbluhm     // this test does not use SINGLE_BYTES, because it depends on very precise
5688bd8f1dc3Sbluhm     // buffer fills.
5689bd8f1dc3Sbluhm     return;
5690bd8f1dc3Sbluhm   }
5691bd8f1dc3Sbluhm   if (! g_reparseDeferralEnabledDefault) {
5692bd8f1dc3Sbluhm     return; // this test is irrelevant when the deferral heuristic is disabled.
5693bd8f1dc3Sbluhm   }
5694bd8f1dc3Sbluhm 
5695bd8f1dc3Sbluhm   const int document_length = 65536;
5696bd8f1dc3Sbluhm   char *const document = (char *)malloc(document_length);
5697bd8f1dc3Sbluhm 
5698bd8f1dc3Sbluhm   const XML_Memory_Handling_Suite memfuncs = {
5699bd8f1dc3Sbluhm       counting_malloc,
5700bd8f1dc3Sbluhm       counting_realloc,
5701bd8f1dc3Sbluhm       free,
5702bd8f1dc3Sbluhm   };
5703bd8f1dc3Sbluhm 
5704bd8f1dc3Sbluhm   const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1};
5705bd8f1dc3Sbluhm   const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1};
5706bd8f1dc3Sbluhm   const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1};
5707bd8f1dc3Sbluhm 
5708bd8f1dc3Sbluhm   for (const int *leading = leading_list; *leading >= 0; leading++) {
5709bd8f1dc3Sbluhm     for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) {
5710bd8f1dc3Sbluhm       for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) {
5711bd8f1dc3Sbluhm         set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken,
5712bd8f1dc3Sbluhm                     *fillsize);
5713bd8f1dc3Sbluhm         // start by checking that the test looks reasonably valid
5714bd8f1dc3Sbluhm         assert_true(*leading + *bigtoken <= document_length);
5715bd8f1dc3Sbluhm 
5716bd8f1dc3Sbluhm         // put 'x' everywhere; some will be overwritten by elements.
5717bd8f1dc3Sbluhm         memset(document, 'x', document_length);
5718bd8f1dc3Sbluhm         // maybe add an initial tag
5719bd8f1dc3Sbluhm         if (*leading) {
5720bd8f1dc3Sbluhm           assert_true(*leading >= 3); // or the test case is invalid
5721bd8f1dc3Sbluhm           memcpy(document, "<a>", 3);
5722bd8f1dc3Sbluhm         }
5723bd8f1dc3Sbluhm         // add the large token
5724bd8f1dc3Sbluhm         document[*leading + 0] = '<';
5725bd8f1dc3Sbluhm         document[*leading + 1] = 'b';
5726bd8f1dc3Sbluhm         memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token
5727bd8f1dc3Sbluhm         document[*leading + *bigtoken - 1] = '>';
5728bd8f1dc3Sbluhm 
5729bd8f1dc3Sbluhm         // 1 for 'b', plus 1 or 0 depending on the presence of 'a'
5730bd8f1dc3Sbluhm         const int expected_elem_total = 1 + (*leading ? 1 : 0);
5731bd8f1dc3Sbluhm 
5732bd8f1dc3Sbluhm         XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL);
5733bd8f1dc3Sbluhm         assert_true(parser != NULL);
5734bd8f1dc3Sbluhm 
5735bd8f1dc3Sbluhm         CharData storage;
5736bd8f1dc3Sbluhm         CharData_Init(&storage);
5737bd8f1dc3Sbluhm         XML_SetUserData(parser, &storage);
5738bd8f1dc3Sbluhm         XML_SetStartElementHandler(parser, start_element_event_handler);
5739bd8f1dc3Sbluhm 
5740bd8f1dc3Sbluhm         g_biggestAlloc = 0;
5741bd8f1dc3Sbluhm         g_totalAlloc = 0;
5742bd8f1dc3Sbluhm         int offset = 0;
5743bd8f1dc3Sbluhm         // fill data until the big token is covered (but not necessarily parsed)
5744bd8f1dc3Sbluhm         while (offset < *leading + *bigtoken) {
5745bd8f1dc3Sbluhm           assert_true(offset + *fillsize <= document_length);
5746bd8f1dc3Sbluhm           const enum XML_Status status
5747bd8f1dc3Sbluhm               = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5748bd8f1dc3Sbluhm           if (status != XML_STATUS_OK) {
5749bd8f1dc3Sbluhm             xml_failure(parser);
5750bd8f1dc3Sbluhm           }
5751bd8f1dc3Sbluhm           offset += *fillsize;
5752bd8f1dc3Sbluhm         }
5753bd8f1dc3Sbluhm         // Now, check that we've had a buffer allocation that could fit the
5754bd8f1dc3Sbluhm         // context bytes and our big token. In order to detect a special case,
5755bd8f1dc3Sbluhm         // we need to know how many bytes of our big token were included in the
5756bd8f1dc3Sbluhm         // first push that contained _any_ bytes of the big token:
5757bd8f1dc3Sbluhm         const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize);
5758bd8f1dc3Sbluhm         if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) {
5759bd8f1dc3Sbluhm           // Special case: we aren't saving any context, and the whole big token
5760bd8f1dc3Sbluhm           // was covered by a single fill, so Expat may have parsed directly
5761bd8f1dc3Sbluhm           // from our input pointer, without allocating an internal buffer.
5762bd8f1dc3Sbluhm         } else if (*leading < XML_CONTEXT_BYTES) {
5763bd8f1dc3Sbluhm           assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken);
5764bd8f1dc3Sbluhm         } else {
5765bd8f1dc3Sbluhm           assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken);
5766bd8f1dc3Sbluhm         }
5767bd8f1dc3Sbluhm         // fill data until the big token is actually parsed
5768bd8f1dc3Sbluhm         while (storage.count < expected_elem_total) {
5769bd8f1dc3Sbluhm           const size_t alloc_before = g_totalAlloc;
5770bd8f1dc3Sbluhm           assert_true(offset + *fillsize <= document_length);
5771bd8f1dc3Sbluhm           const enum XML_Status status
5772bd8f1dc3Sbluhm               = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5773bd8f1dc3Sbluhm           if (status != XML_STATUS_OK) {
5774bd8f1dc3Sbluhm             xml_failure(parser);
5775bd8f1dc3Sbluhm           }
5776bd8f1dc3Sbluhm           offset += *fillsize;
5777bd8f1dc3Sbluhm           // since all the bytes of the big token are already in the buffer,
5778bd8f1dc3Sbluhm           // the bufsize ceiling should make us finish its parsing without any
5779bd8f1dc3Sbluhm           // further buffer allocations. We assume that there will be no other
5780bd8f1dc3Sbluhm           // large allocations in this test.
5781bd8f1dc3Sbluhm           assert_true(g_totalAlloc - alloc_before < 4096);
5782bd8f1dc3Sbluhm         }
5783bd8f1dc3Sbluhm         // test-the-test: was our alloc even called?
5784bd8f1dc3Sbluhm         assert_true(g_totalAlloc > 0);
5785bd8f1dc3Sbluhm         // test-the-test: there shouldn't be any extra start elements
5786bd8f1dc3Sbluhm         assert_true(storage.count == expected_elem_total);
5787bd8f1dc3Sbluhm 
5788bd8f1dc3Sbluhm         XML_ParserFree(parser);
5789bd8f1dc3Sbluhm       }
5790bd8f1dc3Sbluhm     }
5791bd8f1dc3Sbluhm   }
5792bd8f1dc3Sbluhm   free(document);
5793bd8f1dc3Sbluhm }
5794bd8f1dc3Sbluhm END_TEST
5795bd8f1dc3Sbluhm 
5796f558d286Sbluhm #if defined(XML_TESTING)
5797bd8f1dc3Sbluhm START_TEST(test_varying_buffer_fills) {
5798bd8f1dc3Sbluhm   const int KiB = 1024;
5799bd8f1dc3Sbluhm   const int MiB = 1024 * KiB;
5800bd8f1dc3Sbluhm   const int document_length = 16 * MiB;
5801bd8f1dc3Sbluhm   const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB
5802bd8f1dc3Sbluhm 
5803bd8f1dc3Sbluhm   if (g_chunkSize != 0) {
5804bd8f1dc3Sbluhm     return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES().
5805bd8f1dc3Sbluhm   }
5806bd8f1dc3Sbluhm 
5807bd8f1dc3Sbluhm   char *const document = (char *)malloc(document_length);
5808bd8f1dc3Sbluhm   assert_true(document != NULL);
5809bd8f1dc3Sbluhm   memset(document, 'x', document_length);
5810bd8f1dc3Sbluhm   document[0] = '<';
5811bd8f1dc3Sbluhm   document[1] = 't';
5812bd8f1dc3Sbluhm   memset(&document[2], ' ', big - 2); // a very spacy token
5813bd8f1dc3Sbluhm   document[big - 1] = '>';
5814bd8f1dc3Sbluhm 
5815bd8f1dc3Sbluhm   // Each testcase is a list of buffer fill sizes, terminated by a value < 0.
5816bd8f1dc3Sbluhm   // When reparse deferral is enabled, the final (negated) value is the expected
5817bd8f1dc3Sbluhm   // maximum number of bytes scanned in parse attempts.
5818bd8f1dc3Sbluhm   const int testcases[][30] = {
5819bd8f1dc3Sbluhm       {8 * MiB, -8 * MiB},
5820bd8f1dc3Sbluhm       {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total
5821bd8f1dc3Sbluhm       // zero-size fills shouldn't trigger the bypass
5822bd8f1dc3Sbluhm       {4 * MiB, 0, 4 * MiB, -12 * MiB},
5823bd8f1dc3Sbluhm       {4 * MiB, 0, 0, 4 * MiB, -12 * MiB},
5824bd8f1dc3Sbluhm       {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB},
5825bd8f1dc3Sbluhm       // try to hit the buffer ceiling only once (at the end)
5826bd8f1dc3Sbluhm       {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB},
5827bd8f1dc3Sbluhm       // try to hit the same buffer ceiling multiple times
5828bd8f1dc3Sbluhm       {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB},
5829bd8f1dc3Sbluhm 
5830bd8f1dc3Sbluhm       // try to hit every ceiling, by always landing 1K shy of the buffer size
5831bd8f1dc3Sbluhm       {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB,
5832bd8f1dc3Sbluhm        128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB},
5833bd8f1dc3Sbluhm 
5834bd8f1dc3Sbluhm       // try to avoid every ceiling, by always landing 1B past the buffer size
5835bd8f1dc3Sbluhm       // the normal 2x heuristic threshold still forces parse attempts.
5836bd8f1dc3Sbluhm       {2 * KiB + 1,          // will attempt 2KiB + 1 ==> total 2KiB + 1
5837bd8f1dc3Sbluhm        2 * KiB, 4 * KiB,     // will attempt 8KiB + 1 ==> total 10KiB + 2
5838bd8f1dc3Sbluhm        8 * KiB, 16 * KiB,    // will attempt 32KiB + 1 ==> total 42KiB + 3
5839bd8f1dc3Sbluhm        32 * KiB, 64 * KiB,   // will attempt 128KiB + 1 ==> total 170KiB + 4
5840bd8f1dc3Sbluhm        128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
5841bd8f1dc3Sbluhm        512 * KiB, 1 * MiB,   // will attempt 2MiB + 1 ==> total 2M + 682K + 6
5842bd8f1dc3Sbluhm        2 * MiB, 4 * MiB,     // will attempt 8MiB + 1 ==> total 10M + 682K + 7
5843bd8f1dc3Sbluhm        -(10 * MiB + 682 * KiB + 7)},
5844bd8f1dc3Sbluhm       // try to avoid every ceiling again, except on our last fill.
5845bd8f1dc3Sbluhm       {2 * KiB + 1,          // will attempt 2KiB + 1 ==> total 2KiB + 1
5846bd8f1dc3Sbluhm        2 * KiB, 4 * KiB,     // will attempt 8KiB + 1 ==> total 10KiB + 2
5847bd8f1dc3Sbluhm        8 * KiB, 16 * KiB,    // will attempt 32KiB + 1 ==> total 42KiB + 3
5848bd8f1dc3Sbluhm        32 * KiB, 64 * KiB,   // will attempt 128KiB + 1 ==> total 170KiB + 4
5849bd8f1dc3Sbluhm        128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
5850bd8f1dc3Sbluhm        512 * KiB, 1 * MiB,   // will attempt 2MiB + 1 ==> total 2M + 682K + 6
5851bd8f1dc3Sbluhm        2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6
5852bd8f1dc3Sbluhm        -(10 * MiB + 682 * KiB + 6)},
5853bd8f1dc3Sbluhm 
5854bd8f1dc3Sbluhm       // try to hit ceilings on the way multiple times
5855bd8f1dc3Sbluhm       {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer
5856bd8f1dc3Sbluhm        512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer
5857bd8f1dc3Sbluhm        1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1,   // 4 MiB buffer
5858bd8f1dc3Sbluhm        2 * MiB + 1, 1 * MiB, 512 * KiB,                    // 8 MiB buffer
5859bd8f1dc3Sbluhm        // we'll make a parse attempt at every parse call
5860bd8f1dc3Sbluhm        -(45 * MiB + 12)},
5861bd8f1dc3Sbluhm   };
5862bd8f1dc3Sbluhm   const int testcount = sizeof(testcases) / sizeof(testcases[0]);
5863bd8f1dc3Sbluhm   for (int test_i = 0; test_i < testcount; test_i++) {
5864bd8f1dc3Sbluhm     const int *fillsize = testcases[test_i];
5865bd8f1dc3Sbluhm     set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1],
5866bd8f1dc3Sbluhm                 fillsize[2], fillsize[3]);
5867bd8f1dc3Sbluhm     XML_Parser parser = XML_ParserCreate(NULL);
5868bd8f1dc3Sbluhm     assert_true(parser != NULL);
5869bd8f1dc3Sbluhm 
5870bd8f1dc3Sbluhm     CharData storage;
5871bd8f1dc3Sbluhm     CharData_Init(&storage);
5872bd8f1dc3Sbluhm     XML_SetUserData(parser, &storage);
5873bd8f1dc3Sbluhm     XML_SetStartElementHandler(parser, start_element_event_handler);
5874bd8f1dc3Sbluhm 
5875c033f770Sbluhm     g_bytesScanned = 0;
5876bd8f1dc3Sbluhm     int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
5877bd8f1dc3Sbluhm     int offset = 0;
5878bd8f1dc3Sbluhm     while (*fillsize >= 0) {
5879bd8f1dc3Sbluhm       assert_true(offset + *fillsize <= document_length); // or test is invalid
5880bd8f1dc3Sbluhm       const enum XML_Status status
5881bd8f1dc3Sbluhm           = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5882bd8f1dc3Sbluhm       if (status != XML_STATUS_OK) {
5883bd8f1dc3Sbluhm         xml_failure(parser);
5884bd8f1dc3Sbluhm       }
5885bd8f1dc3Sbluhm       offset += *fillsize;
5886bd8f1dc3Sbluhm       fillsize++;
5887bd8f1dc3Sbluhm       assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
5888bd8f1dc3Sbluhm       worstcase_bytes += offset; // we might've tried to parse all pending bytes
5889bd8f1dc3Sbluhm     }
5890bd8f1dc3Sbluhm     assert_true(storage.count == 1); // the big token should've been parsed
5891c033f770Sbluhm     assert_true(g_bytesScanned > 0); // test-the-test: does our counter work?
5892bd8f1dc3Sbluhm     if (g_reparseDeferralEnabledDefault) {
5893bd8f1dc3Sbluhm       // heuristic is enabled; some XML_Parse calls may have deferred reparsing
5894c033f770Sbluhm       const unsigned max_bytes_scanned = -*fillsize;
5895c033f770Sbluhm       if (g_bytesScanned > max_bytes_scanned) {
5896bd8f1dc3Sbluhm         fprintf(stderr,
5897c033f770Sbluhm                 "bytes scanned in parse attempts: actual=%u limit=%u \n",
5898c033f770Sbluhm                 g_bytesScanned, max_bytes_scanned);
5899bd8f1dc3Sbluhm         fail("too many bytes scanned in parse attempts");
5900bd8f1dc3Sbluhm       }
5901bd8f1dc3Sbluhm     }
5902c033f770Sbluhm     assert_true(g_bytesScanned <= (unsigned)worstcase_bytes);
5903bd8f1dc3Sbluhm 
5904bd8f1dc3Sbluhm     XML_ParserFree(parser);
5905bd8f1dc3Sbluhm   }
5906bd8f1dc3Sbluhm   free(document);
5907bd8f1dc3Sbluhm }
5908bd8f1dc3Sbluhm END_TEST
5909f558d286Sbluhm #endif
5910bd8f1dc3Sbluhm 
5911bd8f1dc3Sbluhm void
5912bd8f1dc3Sbluhm make_basic_test_case(Suite *s) {
5913bd8f1dc3Sbluhm   TCase *tc_basic = tcase_create("basic tests");
5914bd8f1dc3Sbluhm 
5915bd8f1dc3Sbluhm   suite_add_tcase(s, tc_basic);
5916bd8f1dc3Sbluhm   tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown);
5917bd8f1dc3Sbluhm 
5918bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_nul_byte);
5919bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_u0000_char);
5920bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_siphash_self);
5921bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_siphash_spec);
5922bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_bom_utf8);
5923bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_bom_utf16_be);
5924bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_bom_utf16_le);
5925bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_nobom_utf16_le);
5926bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_hash_collision);
5927bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_illegal_utf8);
5928bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_utf8_auto_align);
5929bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_utf16);
5930bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_utf16_le_epilog_newline);
5931bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_not_utf16);
5932bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_bad_encoding);
5933bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_latin1_umlauts);
5934bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_long_utf8_character);
5935bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_long_latin1_attribute);
5936bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_long_ascii_attribute);
5937bd8f1dc3Sbluhm   /* Regression test for SF bug #491986. */
5938bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_danish_latin1);
5939bd8f1dc3Sbluhm   /* Regression test for SF bug #514281. */
5940bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_french_charref_hexidecimal);
5941bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_french_charref_decimal);
5942bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_french_latin1);
5943bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_french_utf8);
5944bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_utf8_false_rejection);
5945bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_line_number_after_parse);
5946bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_column_number_after_parse);
5947bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers);
5948bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_line_number_after_error);
5949bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_column_number_after_error);
5950bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_really_long_lines);
5951bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_really_long_encoded_lines);
5952bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_end_element_events);
5953bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_helper_is_whitespace_normalized);
5954bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_attr_whitespace_normalization);
5955bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_xmldecl_misplaced);
5956bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_xmldecl_invalid);
5957bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_xmldecl_missing_attr);
5958bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_xmldecl_missing_value);
5959bd8f1dc3Sbluhm   tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity);
5960bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity);
5961bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding);
5962bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler);
5963bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom);
5964bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding);
5965bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2);
5966bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset);
5967bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset);
5968bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone);
5969bd8f1dc3Sbluhm   tcase_add_test(tc_basic,
5970bd8f1dc3Sbluhm                  test_wfc_undeclared_entity_with_external_subset_standalone);
5971bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone);
5972bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset);
5973bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_not_standalone_handler_reject);
5974bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_not_standalone_handler_accept);
5975bd8f1dc3Sbluhm   tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs);
5976bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse);
5977bd8f1dc3Sbluhm   tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling);
5978bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_dtd_attr_handling);
5979bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_empty_ns_without_namespaces);
5980bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
5981bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
5982bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
5983bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls);
5984bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_good_cdata_ascii);
5985bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_good_cdata_utf16);
5986bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_good_cdata_utf16_le);
5987bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_long_cdata_utf16);
5988bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_multichar_cdata_utf16);
5989bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair);
5990bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_bad_cdata);
5991bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_bad_cdata_utf16);
5992bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls);
5993bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls);
5994bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_memory_allocation);
5995bd8f1dc3Sbluhm   tcase_add_test__if_xml_ge(tc_basic, test_default_current);
5996bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_dtd_elements);
5997bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_dtd_elements_nesting);
5998bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd);
5999bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone);
6000bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd);
6001bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype);
6002bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic,
6003bd8f1dc3Sbluhm                                 test_foreign_dtd_without_external_subset);
6004bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd);
6005bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_set_base);
6006bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_attributes);
6007bd8f1dc3Sbluhm   tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity);
6008bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_resume_invalid_parse);
6009bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_resume_resuspended);
6010bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_cdata_default);
6011bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_subordinate_reset);
6012bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_subordinate_suspend);
6013bd8f1dc3Sbluhm   tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend);
6014bd8f1dc3Sbluhm   tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort);
6015bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic,
6016bd8f1dc3Sbluhm                                 test_ext_entity_invalid_suspended_parse);
6017bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_explicit_encoding);
6018bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_trailing_cr);
6019bd8f1dc3Sbluhm   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr);
6020bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_trailing_rsqb);
6021bd8f1dc3Sbluhm   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb);
6022bd8f1dc3Sbluhm   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata);
6023bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters);
6024bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter);
6025bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_empty_parse);
602661ad8a07Sbluhm   tcase_add_test(tc_basic, test_negative_len_parse);
602761ad8a07Sbluhm   tcase_add_test(tc_basic, test_negative_len_parse_buffer);
6028bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_get_buffer_1);
6029bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_get_buffer_2);
6030bd8f1dc3Sbluhm #if XML_CONTEXT_BYTES > 0
6031bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_get_buffer_3_overflow);
6032bd8f1dc3Sbluhm #endif
6033bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_buffer_can_grow_to_max);
6034bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len);
6035bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_byte_info_at_end);
6036bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_byte_info_at_error);
6037bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_byte_info_at_cdata);
6038bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_predefined_entities);
6039bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd);
6040bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_not_predefined_entities);
6041bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section);
6042bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16);
6043bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be);
6044bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section);
6045bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed);
6046bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values);
6047bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone);
6048bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort);
6049bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_bad_public_doctype);
6050bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_attribute_enum_value);
6051bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_predefined_entity_redefinition);
6052bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing);
6053bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_public_notation_no_sysid);
6054bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_nested_groups);
6055bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_group_choice);
6056bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_standalone_parameter_entity);
6057bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity);
6058bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic,
6059bd8f1dc3Sbluhm                                 test_recursive_external_parameter_entity);
6060c033f770Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic,
6061c033f770Sbluhm                                 test_recursive_external_parameter_entity_2);
6062bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd);
6063bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_suspend_xdecl);
6064bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_abort_epilog);
6065bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_abort_epilog_2);
6066bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_suspend_epilog);
6067bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag);
6068bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_unfinished_epilog);
6069bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_partial_char_in_epilog);
6070bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity);
6071bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic,
6072bd8f1dc3Sbluhm                                 test_suspend_resume_internal_entity_issue_629);
6073bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error);
6074bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity);
6075bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_restart_on_error);
6076bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_reject_lt_in_attribute_value);
6077bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value);
6078bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_trailing_cr_in_att_value);
6079bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_standalone_internal_entity);
6080bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_skipped_external_entity);
6081bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity);
6082bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity);
6083bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr);
6084bd8f1dc3Sbluhm   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity);
6085bd8f1dc3Sbluhm   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2);
6086bd8f1dc3Sbluhm   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3);
6087bd8f1dc3Sbluhm   tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4);
6088bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_pi_handled_in_default);
6089bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_comment_handled_in_default);
6090bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_pi_yml);
6091bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_pi_xnl);
6092bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_pi_xmm);
6093bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_utf16_pi);
6094bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_utf16_be_pi);
6095bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_utf16_be_comment);
6096bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_utf16_le_comment);
6097bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_missing_encoding_conversion_fn);
6098bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_failing_encoding_conversion_fn);
6099bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_unknown_encoding_success);
6100bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_unknown_encoding_bad_name);
6101bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2);
6102bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_unknown_encoding_long_name_1);
6103bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_unknown_encoding_long_name_2);
6104bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_invalid_unknown_encoding);
6105bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok);
6106bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail);
6107bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_unknown_encoding_invalid_length);
6108bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit);
6109bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate);
6110bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_unknown_encoding_invalid_high);
6111bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value);
6112bd8f1dc3Sbluhm   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom);
6113bd8f1dc3Sbluhm   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom);
6114bd8f1dc3Sbluhm   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2);
6115bd8f1dc3Sbluhm   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2);
6116bd8f1dc3Sbluhm   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be);
6117bd8f1dc3Sbluhm   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le);
6118bd8f1dc3Sbluhm   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown);
6119bd8f1dc3Sbluhm   tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom);
6120bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_utf8_in_cdata_section);
6121bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
6122bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_utf8_in_start_tags);
6123bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
6124bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_utf16_attribute);
6125bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_utf16_second_attr);
6126bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_attr_after_solidus);
6127bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe);
6128bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
6129bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
6130bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_bad_doctype);
6131bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_bad_doctype_utf8);
6132bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_bad_doctype_utf16);
6133bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_bad_doctype_plus);
6134bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_bad_doctype_star);
6135bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_bad_doctype_query);
6136bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore);
6137bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_entity_in_utf16_be_attr);
6138bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_entity_in_utf16_le_attr);
6139bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be);
6140bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le);
6141bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_short_doctype);
6142bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_short_doctype_2);
6143bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_short_doctype_3);
6144bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_long_doctype);
6145bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_bad_entity);
6146bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_bad_entity_2);
6147bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_bad_entity_3);
6148bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_bad_entity_4);
6149bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_bad_notation);
6150bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_default_doctype_handler);
6151bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_empty_element_abort);
6152bd8f1dc3Sbluhm   tcase_add_test__ifdef_xml_dtd(tc_basic,
6153bd8f1dc3Sbluhm                                 test_pool_integrity_with_unfinished_attr);
6154bd8f1dc3Sbluhm   tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
6155f558d286Sbluhm #if defined(XML_TESTING)
6156c033f770Sbluhm   tcase_add_test(tc_basic, test_big_tokens_scale_linearly);
6157f558d286Sbluhm #endif
6158bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_set_reparse_deferral);
6159bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
6160bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
6161bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly);
6162bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_set_bad_reparse_option);
6163bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize);
6164f558d286Sbluhm #if defined(XML_TESTING)
6165bd8f1dc3Sbluhm   tcase_add_test(tc_basic, test_varying_buffer_fills);
6166f558d286Sbluhm #endif
6167bd8f1dc3Sbluhm }
6168