14543ef51SXin LI /* Commonly used functions for the Expat test suite 24543ef51SXin LI __ __ _ 34543ef51SXin LI ___\ \/ /_ __ __ _| |_ 44543ef51SXin LI / _ \\ /| '_ \ / _` | __| 54543ef51SXin LI | __// \| |_) | (_| | |_ 64543ef51SXin LI \___/_/\_\ .__/ \__,_|\__| 74543ef51SXin LI |_| XML parser 84543ef51SXin LI 94543ef51SXin LI Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 104543ef51SXin LI Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net> 114543ef51SXin LI Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> 124543ef51SXin LI Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net> 13*908f215eSXin LI Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org> 144543ef51SXin LI Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk> 154543ef51SXin LI Copyright (c) 2017 Joe Orton <jorton@redhat.com> 164543ef51SXin LI Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com> 174543ef51SXin LI Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it> 184543ef51SXin LI Copyright (c) 2019 David Loffredo <loffredo@steptools.com> 194543ef51SXin LI Copyright (c) 2020 Tim Gates <tim.gates@iress.com> 204543ef51SXin LI Copyright (c) 2021 Donghee Na <donghee.na@python.org> 214543ef51SXin LI Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com> 224543ef51SXin LI Licensed under the MIT license: 234543ef51SXin LI 244543ef51SXin LI Permission is hereby granted, free of charge, to any person obtaining 254543ef51SXin LI a copy of this software and associated documentation files (the 264543ef51SXin LI "Software"), to deal in the Software without restriction, including 274543ef51SXin LI without limitation the rights to use, copy, modify, merge, publish, 284543ef51SXin LI distribute, sublicense, and/or sell copies of the Software, and to permit 294543ef51SXin LI persons to whom the Software is furnished to do so, subject to the 304543ef51SXin LI following conditions: 314543ef51SXin LI 324543ef51SXin LI The above copyright notice and this permission notice shall be included 334543ef51SXin LI in all copies or substantial portions of the Software. 344543ef51SXin LI 354543ef51SXin LI THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 364543ef51SXin LI EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 374543ef51SXin LI MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 384543ef51SXin LI NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 394543ef51SXin LI DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 404543ef51SXin LI OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 414543ef51SXin LI USE OR OTHER DEALINGS IN THE SOFTWARE. 424543ef51SXin LI */ 434543ef51SXin LI 444543ef51SXin LI #include <assert.h> 454543ef51SXin LI #include <stdio.h> 464543ef51SXin LI #include <string.h> 474543ef51SXin LI 484543ef51SXin LI #include "expat_config.h" 494543ef51SXin LI #include "expat.h" 504543ef51SXin LI #include "internal.h" 514543ef51SXin LI #include "chardata.h" 524543ef51SXin LI #include "minicheck.h" 534543ef51SXin LI #include "common.h" 54*908f215eSXin LI #include "handlers.h" 554543ef51SXin LI 564543ef51SXin LI /* Common test data */ 574543ef51SXin LI 584543ef51SXin LI const char *long_character_data_text 594543ef51SXin LI = "<?xml version='1.0' encoding='iso-8859-1'?><s>" 604543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 614543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 624543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 634543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 644543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 654543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 664543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 674543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 684543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 694543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 704543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 714543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 724543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 734543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 744543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 754543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 764543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 774543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 784543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 794543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 804543ef51SXin LI "</s>"; 814543ef51SXin LI 824543ef51SXin LI const char *long_cdata_text 834543ef51SXin LI = "<s><![CDATA[" 844543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 854543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 864543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 874543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 884543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 894543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 904543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 914543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 924543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 934543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 944543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 954543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 964543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 974543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 984543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 994543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 1004543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 1014543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 1024543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 1034543ef51SXin LI "012345678901234567890123456789012345678901234567890123456789" 1044543ef51SXin LI "]]></s>"; 1054543ef51SXin LI 1064543ef51SXin LI /* Having an element name longer than 1024 characters exercises some 1074543ef51SXin LI * of the pool allocation code in the parser that otherwise does not 1084543ef51SXin LI * get executed. The count at the end of the line is the number of 1094543ef51SXin LI * characters (bytes) in the element name by that point.x 1104543ef51SXin LI */ 1114543ef51SXin LI const char *get_buffer_test_text 1124543ef51SXin LI = "<documentwitharidiculouslylongelementnametotease" /* 0x030 */ 1134543ef51SXin LI "aparticularcorneroftheallocationinXML_GetBuffers" /* 0x060 */ 1144543ef51SXin LI "othatwecanimprovethecoverageyetagain012345678901" /* 0x090 */ 1154543ef51SXin LI "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x0c0 */ 1164543ef51SXin LI "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x0f0 */ 1174543ef51SXin LI "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x120 */ 1184543ef51SXin LI "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x150 */ 1194543ef51SXin LI "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x180 */ 1204543ef51SXin LI "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x1b0 */ 1214543ef51SXin LI "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x1e0 */ 1224543ef51SXin LI "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x210 */ 1234543ef51SXin LI "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x240 */ 1244543ef51SXin LI "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x270 */ 1254543ef51SXin LI "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x2a0 */ 1264543ef51SXin LI "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x2d0 */ 1274543ef51SXin LI "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x300 */ 1284543ef51SXin LI "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x330 */ 1294543ef51SXin LI "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x360 */ 1304543ef51SXin LI "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x390 */ 1314543ef51SXin LI "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x3c0 */ 1324543ef51SXin LI "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x3f0 */ 1334543ef51SXin LI "123456789abcdef0123456789abcdef0123456789>\n<ef0"; /* 0x420 */ 1344543ef51SXin LI 1354543ef51SXin LI /* Test control globals */ 1364543ef51SXin LI 1374543ef51SXin LI /* Used as the "resumable" parameter to XML_StopParser by some tests */ 1384543ef51SXin LI XML_Bool g_resumable = XML_FALSE; 1394543ef51SXin LI 1404543ef51SXin LI /* Used to control abort checks in some tests */ 1414543ef51SXin LI XML_Bool g_abortable = XML_FALSE; 1424543ef51SXin LI 1434543ef51SXin LI /* Used to control _XML_Parse_SINGLE_BYTES() chunk size */ 1444543ef51SXin LI int g_chunkSize = 1; 1454543ef51SXin LI 1464543ef51SXin LI /* Common test functions */ 1474543ef51SXin LI 1484543ef51SXin LI void 1494543ef51SXin LI tcase_add_test__ifdef_xml_dtd(TCase *tc, tcase_test_function test) { 1504543ef51SXin LI #ifdef XML_DTD 1514543ef51SXin LI tcase_add_test(tc, test); 1524543ef51SXin LI #else 1534543ef51SXin LI UNUSED_P(tc); 1544543ef51SXin LI UNUSED_P(test); 1554543ef51SXin LI #endif 1564543ef51SXin LI } 1574543ef51SXin LI 1584543ef51SXin LI void 1594543ef51SXin LI tcase_add_test__if_xml_ge(TCase *tc, tcase_test_function test) { 1604543ef51SXin LI #if XML_GE == 1 1614543ef51SXin LI tcase_add_test(tc, test); 1624543ef51SXin LI #else 1634543ef51SXin LI UNUSED_P(tc); 1644543ef51SXin LI UNUSED_P(test); 1654543ef51SXin LI #endif 1664543ef51SXin LI } 1674543ef51SXin LI 1684543ef51SXin LI void 1694543ef51SXin LI basic_teardown(void) { 1704543ef51SXin LI if (g_parser != NULL) { 1714543ef51SXin LI XML_ParserFree(g_parser); 1724543ef51SXin LI g_parser = NULL; 1734543ef51SXin LI } 1744543ef51SXin LI } 1754543ef51SXin LI 1764543ef51SXin LI /* Generate a failure using the parser state to create an error message; 1774543ef51SXin LI this should be used when the parser reports an error we weren't 1784543ef51SXin LI expecting. 1794543ef51SXin LI */ 1804543ef51SXin LI void 1814543ef51SXin LI _xml_failure(XML_Parser parser, const char *file, int line) { 1824543ef51SXin LI char buffer[1024]; 1834543ef51SXin LI enum XML_Error err = XML_GetErrorCode(parser); 1844543ef51SXin LI snprintf(buffer, sizeof(buffer), 1854543ef51SXin LI " %d: %" XML_FMT_STR " (line %" XML_FMT_INT_MOD 1864543ef51SXin LI "u, offset %" XML_FMT_INT_MOD "u)\n reported from %s, line %d\n", 1874543ef51SXin LI err, XML_ErrorString(err), XML_GetCurrentLineNumber(parser), 1884543ef51SXin LI XML_GetCurrentColumnNumber(parser), file, line); 1894543ef51SXin LI _fail(file, line, buffer); 1904543ef51SXin LI } 1914543ef51SXin LI 1924543ef51SXin LI enum XML_Status 1934543ef51SXin LI _XML_Parse_SINGLE_BYTES(XML_Parser parser, const char *s, int len, 1944543ef51SXin LI int isFinal) { 1954543ef51SXin LI // This ensures that tests have to run pathological parse cases 1964543ef51SXin LI // (e.g. when `s` is NULL) against plain XML_Parse rather than 1974543ef51SXin LI // chunking _XML_Parse_SINGLE_BYTES. 1984543ef51SXin LI assert((parser != NULL) && (s != NULL) && (len >= 0)); 1994543ef51SXin LI const int chunksize = g_chunkSize; 2004543ef51SXin LI if (chunksize > 0) { 2014543ef51SXin LI // parse in chunks of `chunksize` bytes as long as not exhausting 2024543ef51SXin LI for (; len > chunksize; len -= chunksize, s += chunksize) { 2034543ef51SXin LI enum XML_Status res = XML_Parse(parser, s, chunksize, XML_FALSE); 2044543ef51SXin LI if (res != XML_STATUS_OK) { 2054543ef51SXin LI return res; 2064543ef51SXin LI } 2074543ef51SXin LI } 2084543ef51SXin LI } 2094543ef51SXin LI // parse the final chunk, the size of which will be <= chunksize 2104543ef51SXin LI return XML_Parse(parser, s, len, isFinal); 2114543ef51SXin LI } 2124543ef51SXin LI 2134543ef51SXin LI void 2144543ef51SXin LI _expect_failure(const char *text, enum XML_Error errorCode, 2154543ef51SXin LI const char *errorMessage, const char *file, int lineno) { 2164543ef51SXin LI if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2174543ef51SXin LI == XML_STATUS_OK) 2184543ef51SXin LI /* Hackish use of _fail() macro, but lets us report 2194543ef51SXin LI the right filename and line number. */ 2204543ef51SXin LI _fail(file, lineno, errorMessage); 2214543ef51SXin LI if (XML_GetErrorCode(g_parser) != errorCode) 2224543ef51SXin LI _xml_failure(g_parser, file, lineno); 2234543ef51SXin LI } 2244543ef51SXin LI 2254543ef51SXin LI void 2264543ef51SXin LI _run_character_check(const char *text, const XML_Char *expected, 2274543ef51SXin LI const char *file, int line) { 2284543ef51SXin LI CharData storage; 2294543ef51SXin LI 2304543ef51SXin LI CharData_Init(&storage); 2314543ef51SXin LI XML_SetUserData(g_parser, &storage); 2324543ef51SXin LI XML_SetCharacterDataHandler(g_parser, accumulate_characters); 2334543ef51SXin LI if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2344543ef51SXin LI == XML_STATUS_ERROR) 2354543ef51SXin LI _xml_failure(g_parser, file, line); 2364543ef51SXin LI CharData_CheckXMLChars(&storage, expected); 2374543ef51SXin LI } 2384543ef51SXin LI 2394543ef51SXin LI void 2404543ef51SXin LI _run_attribute_check(const char *text, const XML_Char *expected, 2414543ef51SXin LI const char *file, int line) { 2424543ef51SXin LI CharData storage; 2434543ef51SXin LI 2444543ef51SXin LI CharData_Init(&storage); 2454543ef51SXin LI XML_SetUserData(g_parser, &storage); 2464543ef51SXin LI XML_SetStartElementHandler(g_parser, accumulate_attribute); 2474543ef51SXin LI if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2484543ef51SXin LI == XML_STATUS_ERROR) 2494543ef51SXin LI _xml_failure(g_parser, file, line); 2504543ef51SXin LI CharData_CheckXMLChars(&storage, expected); 2514543ef51SXin LI } 2524543ef51SXin LI 2534543ef51SXin LI void 2544543ef51SXin LI _run_ext_character_check(const char *text, ExtTest *test_data, 2554543ef51SXin LI const XML_Char *expected, const char *file, int line) { 2564543ef51SXin LI CharData *const storage = (CharData *)malloc(sizeof(CharData)); 2574543ef51SXin LI 2584543ef51SXin LI CharData_Init(storage); 2594543ef51SXin LI test_data->storage = storage; 2604543ef51SXin LI XML_SetUserData(g_parser, test_data); 2614543ef51SXin LI XML_SetCharacterDataHandler(g_parser, ext_accumulate_characters); 2624543ef51SXin LI if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2634543ef51SXin LI == XML_STATUS_ERROR) 2644543ef51SXin LI _xml_failure(g_parser, file, line); 2654543ef51SXin LI CharData_CheckXMLChars(storage, expected); 2664543ef51SXin LI 2674543ef51SXin LI free(storage); 2684543ef51SXin LI } 2694543ef51SXin LI 2704543ef51SXin LI /* Control variable; the number of times duff_allocator() will successfully 2714543ef51SXin LI * allocate */ 2724543ef51SXin LI #define ALLOC_ALWAYS_SUCCEED (-1) 2734543ef51SXin LI #define REALLOC_ALWAYS_SUCCEED (-1) 2744543ef51SXin LI 2754543ef51SXin LI int g_allocation_count = ALLOC_ALWAYS_SUCCEED; 2764543ef51SXin LI int g_reallocation_count = REALLOC_ALWAYS_SUCCEED; 2774543ef51SXin LI 2784543ef51SXin LI /* Crocked allocator for allocation failure tests */ 2794543ef51SXin LI void * 2804543ef51SXin LI duff_allocator(size_t size) { 2814543ef51SXin LI if (g_allocation_count == 0) 2824543ef51SXin LI return NULL; 2834543ef51SXin LI if (g_allocation_count != ALLOC_ALWAYS_SUCCEED) 2844543ef51SXin LI g_allocation_count--; 2854543ef51SXin LI return malloc(size); 2864543ef51SXin LI } 2874543ef51SXin LI 2884543ef51SXin LI /* Crocked reallocator for allocation failure tests */ 2894543ef51SXin LI void * 2904543ef51SXin LI duff_reallocator(void *ptr, size_t size) { 2914543ef51SXin LI if (g_reallocation_count == 0) 2924543ef51SXin LI return NULL; 2934543ef51SXin LI if (g_reallocation_count != REALLOC_ALWAYS_SUCCEED) 2944543ef51SXin LI g_reallocation_count--; 2954543ef51SXin LI return realloc(ptr, size); 2964543ef51SXin LI } 297