1bd8f1dc3Sbluhm /* Commonly used functions for the Expat test suite 2bd8f1dc3Sbluhm __ __ _ 3bd8f1dc3Sbluhm ___\ \/ /_ __ __ _| |_ 4bd8f1dc3Sbluhm / _ \\ /| '_ \ / _` | __| 5bd8f1dc3Sbluhm | __// \| |_) | (_| | |_ 6bd8f1dc3Sbluhm \___/_/\_\ .__/ \__,_|\__| 7bd8f1dc3Sbluhm |_| XML parser 8bd8f1dc3Sbluhm 9bd8f1dc3Sbluhm Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 10bd8f1dc3Sbluhm Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net> 11bd8f1dc3Sbluhm Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> 12bd8f1dc3Sbluhm Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net> 13*aa071e6eSbluhm Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org> 14bd8f1dc3Sbluhm Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk> 15bd8f1dc3Sbluhm Copyright (c) 2017 Joe Orton <jorton@redhat.com> 16bd8f1dc3Sbluhm Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com> 17bd8f1dc3Sbluhm Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it> 18bd8f1dc3Sbluhm Copyright (c) 2019 David Loffredo <loffredo@steptools.com> 19bd8f1dc3Sbluhm Copyright (c) 2020 Tim Gates <tim.gates@iress.com> 20bd8f1dc3Sbluhm Copyright (c) 2021 Donghee Na <donghee.na@python.org> 21bd8f1dc3Sbluhm Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com> 22bd8f1dc3Sbluhm Licensed under the MIT license: 23bd8f1dc3Sbluhm 24bd8f1dc3Sbluhm Permission is hereby granted, free of charge, to any person obtaining 25bd8f1dc3Sbluhm a copy of this software and associated documentation files (the 26bd8f1dc3Sbluhm "Software"), to deal in the Software without restriction, including 27bd8f1dc3Sbluhm without limitation the rights to use, copy, modify, merge, publish, 28bd8f1dc3Sbluhm distribute, sublicense, and/or sell copies of the Software, and to permit 29bd8f1dc3Sbluhm persons to whom the Software is furnished to do so, subject to the 30bd8f1dc3Sbluhm following conditions: 31bd8f1dc3Sbluhm 32bd8f1dc3Sbluhm The above copyright notice and this permission notice shall be included 33bd8f1dc3Sbluhm in all copies or substantial portions of the Software. 34bd8f1dc3Sbluhm 35bd8f1dc3Sbluhm THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 36bd8f1dc3Sbluhm EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 37bd8f1dc3Sbluhm MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 38bd8f1dc3Sbluhm NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 39bd8f1dc3Sbluhm DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 40bd8f1dc3Sbluhm OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 41bd8f1dc3Sbluhm USE OR OTHER DEALINGS IN THE SOFTWARE. 42bd8f1dc3Sbluhm */ 43bd8f1dc3Sbluhm 44bd8f1dc3Sbluhm #include <assert.h> 45bd8f1dc3Sbluhm #include <stdio.h> 46bd8f1dc3Sbluhm #include <string.h> 47bd8f1dc3Sbluhm 48bd8f1dc3Sbluhm #include "expat_config.h" 49bd8f1dc3Sbluhm #include "expat.h" 50bd8f1dc3Sbluhm #include "internal.h" 51bd8f1dc3Sbluhm #include "chardata.h" 52bd8f1dc3Sbluhm #include "minicheck.h" 53bd8f1dc3Sbluhm #include "common.h" 54*aa071e6eSbluhm #include "handlers.h" 55bd8f1dc3Sbluhm 56bd8f1dc3Sbluhm /* Common test data */ 57bd8f1dc3Sbluhm 58bd8f1dc3Sbluhm const char *long_character_data_text 59bd8f1dc3Sbluhm = "<?xml version='1.0' encoding='iso-8859-1'?><s>" 60bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 61bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 62bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 63bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 64bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 65bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 66bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 67bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 68bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 69bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 70bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 71bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 72bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 73bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 74bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 75bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 76bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 77bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 78bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 79bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 80bd8f1dc3Sbluhm "</s>"; 81bd8f1dc3Sbluhm 82bd8f1dc3Sbluhm const char *long_cdata_text 83bd8f1dc3Sbluhm = "<s><![CDATA[" 84bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 85bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 86bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 87bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 88bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 89bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 90bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 91bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 92bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 93bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 94bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 95bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 96bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 97bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 98bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 99bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 100bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 101bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 102bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 103bd8f1dc3Sbluhm "012345678901234567890123456789012345678901234567890123456789" 104bd8f1dc3Sbluhm "]]></s>"; 105bd8f1dc3Sbluhm 106bd8f1dc3Sbluhm /* Having an element name longer than 1024 characters exercises some 107bd8f1dc3Sbluhm * of the pool allocation code in the parser that otherwise does not 108bd8f1dc3Sbluhm * get executed. The count at the end of the line is the number of 109bd8f1dc3Sbluhm * characters (bytes) in the element name by that point.x 110bd8f1dc3Sbluhm */ 111bd8f1dc3Sbluhm const char *get_buffer_test_text 112bd8f1dc3Sbluhm = "<documentwitharidiculouslylongelementnametotease" /* 0x030 */ 113bd8f1dc3Sbluhm "aparticularcorneroftheallocationinXML_GetBuffers" /* 0x060 */ 114bd8f1dc3Sbluhm "othatwecanimprovethecoverageyetagain012345678901" /* 0x090 */ 115bd8f1dc3Sbluhm "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x0c0 */ 116bd8f1dc3Sbluhm "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x0f0 */ 117bd8f1dc3Sbluhm "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x120 */ 118bd8f1dc3Sbluhm "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x150 */ 119bd8f1dc3Sbluhm "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x180 */ 120bd8f1dc3Sbluhm "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x1b0 */ 121bd8f1dc3Sbluhm "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x1e0 */ 122bd8f1dc3Sbluhm "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x210 */ 123bd8f1dc3Sbluhm "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x240 */ 124bd8f1dc3Sbluhm "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x270 */ 125bd8f1dc3Sbluhm "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x2a0 */ 126bd8f1dc3Sbluhm "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x2d0 */ 127bd8f1dc3Sbluhm "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x300 */ 128bd8f1dc3Sbluhm "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x330 */ 129bd8f1dc3Sbluhm "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x360 */ 130bd8f1dc3Sbluhm "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x390 */ 131bd8f1dc3Sbluhm "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x3c0 */ 132bd8f1dc3Sbluhm "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x3f0 */ 133bd8f1dc3Sbluhm "123456789abcdef0123456789abcdef0123456789>\n<ef0"; /* 0x420 */ 134bd8f1dc3Sbluhm 135bd8f1dc3Sbluhm /* Test control globals */ 136bd8f1dc3Sbluhm 137bd8f1dc3Sbluhm /* Used as the "resumable" parameter to XML_StopParser by some tests */ 138bd8f1dc3Sbluhm XML_Bool g_resumable = XML_FALSE; 139bd8f1dc3Sbluhm 140bd8f1dc3Sbluhm /* Used to control abort checks in some tests */ 141bd8f1dc3Sbluhm XML_Bool g_abortable = XML_FALSE; 142bd8f1dc3Sbluhm 143bd8f1dc3Sbluhm /* Used to control _XML_Parse_SINGLE_BYTES() chunk size */ 144bd8f1dc3Sbluhm int g_chunkSize = 1; 145bd8f1dc3Sbluhm 146bd8f1dc3Sbluhm /* Common test functions */ 147bd8f1dc3Sbluhm 148bd8f1dc3Sbluhm void 149bd8f1dc3Sbluhm tcase_add_test__ifdef_xml_dtd(TCase *tc, tcase_test_function test) { 150bd8f1dc3Sbluhm #ifdef XML_DTD 151bd8f1dc3Sbluhm tcase_add_test(tc, test); 152bd8f1dc3Sbluhm #else 153bd8f1dc3Sbluhm UNUSED_P(tc); 154bd8f1dc3Sbluhm UNUSED_P(test); 155bd8f1dc3Sbluhm #endif 156bd8f1dc3Sbluhm } 157bd8f1dc3Sbluhm 158bd8f1dc3Sbluhm void 159bd8f1dc3Sbluhm tcase_add_test__if_xml_ge(TCase *tc, tcase_test_function test) { 160bd8f1dc3Sbluhm #if XML_GE == 1 161bd8f1dc3Sbluhm tcase_add_test(tc, test); 162bd8f1dc3Sbluhm #else 163bd8f1dc3Sbluhm UNUSED_P(tc); 164bd8f1dc3Sbluhm UNUSED_P(test); 165bd8f1dc3Sbluhm #endif 166bd8f1dc3Sbluhm } 167bd8f1dc3Sbluhm 168bd8f1dc3Sbluhm void 169bd8f1dc3Sbluhm basic_teardown(void) { 170bd8f1dc3Sbluhm if (g_parser != NULL) { 171bd8f1dc3Sbluhm XML_ParserFree(g_parser); 172bd8f1dc3Sbluhm g_parser = NULL; 173bd8f1dc3Sbluhm } 174bd8f1dc3Sbluhm } 175bd8f1dc3Sbluhm 176bd8f1dc3Sbluhm /* Generate a failure using the parser state to create an error message; 177bd8f1dc3Sbluhm this should be used when the parser reports an error we weren't 178bd8f1dc3Sbluhm expecting. 179bd8f1dc3Sbluhm */ 180bd8f1dc3Sbluhm void 181bd8f1dc3Sbluhm _xml_failure(XML_Parser parser, const char *file, int line) { 182bd8f1dc3Sbluhm char buffer[1024]; 183bd8f1dc3Sbluhm enum XML_Error err = XML_GetErrorCode(parser); 184bd8f1dc3Sbluhm snprintf(buffer, sizeof(buffer), 185bd8f1dc3Sbluhm " %d: %" XML_FMT_STR " (line %" XML_FMT_INT_MOD 186bd8f1dc3Sbluhm "u, offset %" XML_FMT_INT_MOD "u)\n reported from %s, line %d\n", 187bd8f1dc3Sbluhm err, XML_ErrorString(err), XML_GetCurrentLineNumber(parser), 188bd8f1dc3Sbluhm XML_GetCurrentColumnNumber(parser), file, line); 189bd8f1dc3Sbluhm _fail(file, line, buffer); 190bd8f1dc3Sbluhm } 191bd8f1dc3Sbluhm 192bd8f1dc3Sbluhm enum XML_Status 193bd8f1dc3Sbluhm _XML_Parse_SINGLE_BYTES(XML_Parser parser, const char *s, int len, 194bd8f1dc3Sbluhm int isFinal) { 195bd8f1dc3Sbluhm // This ensures that tests have to run pathological parse cases 196bd8f1dc3Sbluhm // (e.g. when `s` is NULL) against plain XML_Parse rather than 197bd8f1dc3Sbluhm // chunking _XML_Parse_SINGLE_BYTES. 198bd8f1dc3Sbluhm assert((parser != NULL) && (s != NULL) && (len >= 0)); 199bd8f1dc3Sbluhm const int chunksize = g_chunkSize; 200bd8f1dc3Sbluhm if (chunksize > 0) { 201bd8f1dc3Sbluhm // parse in chunks of `chunksize` bytes as long as not exhausting 202bd8f1dc3Sbluhm for (; len > chunksize; len -= chunksize, s += chunksize) { 203bd8f1dc3Sbluhm enum XML_Status res = XML_Parse(parser, s, chunksize, XML_FALSE); 204bd8f1dc3Sbluhm if (res != XML_STATUS_OK) { 205bd8f1dc3Sbluhm return res; 206bd8f1dc3Sbluhm } 207bd8f1dc3Sbluhm } 208bd8f1dc3Sbluhm } 209bd8f1dc3Sbluhm // parse the final chunk, the size of which will be <= chunksize 210bd8f1dc3Sbluhm return XML_Parse(parser, s, len, isFinal); 211bd8f1dc3Sbluhm } 212bd8f1dc3Sbluhm 213bd8f1dc3Sbluhm void 214bd8f1dc3Sbluhm _expect_failure(const char *text, enum XML_Error errorCode, 215bd8f1dc3Sbluhm const char *errorMessage, const char *file, int lineno) { 216bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 217bd8f1dc3Sbluhm == XML_STATUS_OK) 218bd8f1dc3Sbluhm /* Hackish use of _fail() macro, but lets us report 219bd8f1dc3Sbluhm the right filename and line number. */ 220bd8f1dc3Sbluhm _fail(file, lineno, errorMessage); 221bd8f1dc3Sbluhm if (XML_GetErrorCode(g_parser) != errorCode) 222bd8f1dc3Sbluhm _xml_failure(g_parser, file, lineno); 223bd8f1dc3Sbluhm } 224bd8f1dc3Sbluhm 225bd8f1dc3Sbluhm void 226bd8f1dc3Sbluhm _run_character_check(const char *text, const XML_Char *expected, 227bd8f1dc3Sbluhm const char *file, int line) { 228bd8f1dc3Sbluhm CharData storage; 229bd8f1dc3Sbluhm 230bd8f1dc3Sbluhm CharData_Init(&storage); 231bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 232bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, accumulate_characters); 233bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 234bd8f1dc3Sbluhm == XML_STATUS_ERROR) 235bd8f1dc3Sbluhm _xml_failure(g_parser, file, line); 236bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 237bd8f1dc3Sbluhm } 238bd8f1dc3Sbluhm 239bd8f1dc3Sbluhm void 240bd8f1dc3Sbluhm _run_attribute_check(const char *text, const XML_Char *expected, 241bd8f1dc3Sbluhm const char *file, int line) { 242bd8f1dc3Sbluhm CharData storage; 243bd8f1dc3Sbluhm 244bd8f1dc3Sbluhm CharData_Init(&storage); 245bd8f1dc3Sbluhm XML_SetUserData(g_parser, &storage); 246bd8f1dc3Sbluhm XML_SetStartElementHandler(g_parser, accumulate_attribute); 247bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 248bd8f1dc3Sbluhm == XML_STATUS_ERROR) 249bd8f1dc3Sbluhm _xml_failure(g_parser, file, line); 250bd8f1dc3Sbluhm CharData_CheckXMLChars(&storage, expected); 251bd8f1dc3Sbluhm } 252bd8f1dc3Sbluhm 253bd8f1dc3Sbluhm void 254bd8f1dc3Sbluhm _run_ext_character_check(const char *text, ExtTest *test_data, 255bd8f1dc3Sbluhm const XML_Char *expected, const char *file, int line) { 256bd8f1dc3Sbluhm CharData *const storage = (CharData *)malloc(sizeof(CharData)); 257bd8f1dc3Sbluhm 258bd8f1dc3Sbluhm CharData_Init(storage); 259bd8f1dc3Sbluhm test_data->storage = storage; 260bd8f1dc3Sbluhm XML_SetUserData(g_parser, test_data); 261bd8f1dc3Sbluhm XML_SetCharacterDataHandler(g_parser, ext_accumulate_characters); 262bd8f1dc3Sbluhm if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 263bd8f1dc3Sbluhm == XML_STATUS_ERROR) 264bd8f1dc3Sbluhm _xml_failure(g_parser, file, line); 265bd8f1dc3Sbluhm CharData_CheckXMLChars(storage, expected); 266bd8f1dc3Sbluhm 267bd8f1dc3Sbluhm free(storage); 268bd8f1dc3Sbluhm } 269bd8f1dc3Sbluhm 270bd8f1dc3Sbluhm /* Control variable; the number of times duff_allocator() will successfully 271bd8f1dc3Sbluhm * allocate */ 272bd8f1dc3Sbluhm #define ALLOC_ALWAYS_SUCCEED (-1) 273bd8f1dc3Sbluhm #define REALLOC_ALWAYS_SUCCEED (-1) 274bd8f1dc3Sbluhm 275bd8f1dc3Sbluhm int g_allocation_count = ALLOC_ALWAYS_SUCCEED; 276bd8f1dc3Sbluhm int g_reallocation_count = REALLOC_ALWAYS_SUCCEED; 277bd8f1dc3Sbluhm 278bd8f1dc3Sbluhm /* Crocked allocator for allocation failure tests */ 279bd8f1dc3Sbluhm void * 280bd8f1dc3Sbluhm duff_allocator(size_t size) { 281bd8f1dc3Sbluhm if (g_allocation_count == 0) 282bd8f1dc3Sbluhm return NULL; 283bd8f1dc3Sbluhm if (g_allocation_count != ALLOC_ALWAYS_SUCCEED) 284bd8f1dc3Sbluhm g_allocation_count--; 285bd8f1dc3Sbluhm return malloc(size); 286bd8f1dc3Sbluhm } 287bd8f1dc3Sbluhm 288bd8f1dc3Sbluhm /* Crocked reallocator for allocation failure tests */ 289bd8f1dc3Sbluhm void * 290bd8f1dc3Sbluhm duff_reallocator(void *ptr, size_t size) { 291bd8f1dc3Sbluhm if (g_reallocation_count == 0) 292bd8f1dc3Sbluhm return NULL; 293bd8f1dc3Sbluhm if (g_reallocation_count != REALLOC_ALWAYS_SUCCEED) 294bd8f1dc3Sbluhm g_reallocation_count--; 295bd8f1dc3Sbluhm return realloc(ptr, size); 296bd8f1dc3Sbluhm } 297