1bd8f1dc3Sbluhm /* Read an XML document from standard input and print 2bd8f1dc3Sbluhm element declarations (if any) to standard output. 3bd8f1dc3Sbluhm It must be used with Expat compiled for UTF-8 output. 4bd8f1dc3Sbluhm __ __ _ 5bd8f1dc3Sbluhm ___\ \/ /_ __ __ _| |_ 6bd8f1dc3Sbluhm / _ \\ /| '_ \ / _` | __| 7bd8f1dc3Sbluhm | __// \| |_) | (_| | |_ 8bd8f1dc3Sbluhm \___/_/\_\ .__/ \__,_|\__| 9bd8f1dc3Sbluhm |_| XML parser 10bd8f1dc3Sbluhm 11bd8f1dc3Sbluhm Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 12bd8f1dc3Sbluhm Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 13bd8f1dc3Sbluhm Copyright (c) 2004-2006 Karl Waclawek <karl@waclawek.net> 14bd8f1dc3Sbluhm Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> 15bd8f1dc3Sbluhm Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org> 16bd8f1dc3Sbluhm Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> 17bd8f1dc3Sbluhm Copyright (c) 2019 Zhongyuan Zhou <zhouzhongyuan@huawei.com> 18*aa071e6eSbluhm Copyright (c) 2024 Hanno Böck <hanno@gentoo.org> 19bd8f1dc3Sbluhm Licensed under the MIT license: 20bd8f1dc3Sbluhm 21bd8f1dc3Sbluhm Permission is hereby granted, free of charge, to any person obtaining 22bd8f1dc3Sbluhm a copy of this software and associated documentation files (the 23bd8f1dc3Sbluhm "Software"), to deal in the Software without restriction, including 24bd8f1dc3Sbluhm without limitation the rights to use, copy, modify, merge, publish, 25bd8f1dc3Sbluhm distribute, sublicense, and/or sell copies of the Software, and to permit 26bd8f1dc3Sbluhm persons to whom the Software is furnished to do so, subject to the 27bd8f1dc3Sbluhm following conditions: 28bd8f1dc3Sbluhm 29bd8f1dc3Sbluhm The above copyright notice and this permission notice shall be included 30bd8f1dc3Sbluhm in all copies or substantial portions of the Software. 31bd8f1dc3Sbluhm 32bd8f1dc3Sbluhm THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 33bd8f1dc3Sbluhm EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 34bd8f1dc3Sbluhm MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 35bd8f1dc3Sbluhm NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 36bd8f1dc3Sbluhm DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 37bd8f1dc3Sbluhm OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 38bd8f1dc3Sbluhm USE OR OTHER DEALINGS IN THE SOFTWARE. 39bd8f1dc3Sbluhm */ 40bd8f1dc3Sbluhm 41bd8f1dc3Sbluhm #include <stdbool.h> 42bd8f1dc3Sbluhm #include <stdio.h> 43bd8f1dc3Sbluhm #include <stdlib.h> 44bd8f1dc3Sbluhm #include <expat.h> 45bd8f1dc3Sbluhm 46bd8f1dc3Sbluhm #ifdef XML_LARGE_SIZE 47bd8f1dc3Sbluhm # define XML_FMT_INT_MOD "ll" 48bd8f1dc3Sbluhm #else 49bd8f1dc3Sbluhm # define XML_FMT_INT_MOD "l" 50bd8f1dc3Sbluhm #endif 51bd8f1dc3Sbluhm 52bd8f1dc3Sbluhm #ifdef XML_UNICODE_WCHAR_T 53bd8f1dc3Sbluhm # define XML_FMT_STR "ls" 54bd8f1dc3Sbluhm #else 55bd8f1dc3Sbluhm # define XML_FMT_STR "s" 56bd8f1dc3Sbluhm #endif 57bd8f1dc3Sbluhm 58bd8f1dc3Sbluhm // While traversing the XML_Content tree, we avoid recursion 59bd8f1dc3Sbluhm // to not be vulnerable to a denial of service attack. 60bd8f1dc3Sbluhm typedef struct StackStruct { 61bd8f1dc3Sbluhm const XML_Content *model; 62bd8f1dc3Sbluhm unsigned level; 63bd8f1dc3Sbluhm struct StackStruct *prev; 64bd8f1dc3Sbluhm } Stack; 65bd8f1dc3Sbluhm 66bd8f1dc3Sbluhm static Stack * 67bd8f1dc3Sbluhm stackPushMalloc(Stack *stackTop, const XML_Content *model, unsigned level) { 68bd8f1dc3Sbluhm Stack *const newStackTop = malloc(sizeof(Stack)); 69bd8f1dc3Sbluhm if (! newStackTop) { 70bd8f1dc3Sbluhm return NULL; 71bd8f1dc3Sbluhm } 72bd8f1dc3Sbluhm newStackTop->model = model; 73bd8f1dc3Sbluhm newStackTop->level = level; 74bd8f1dc3Sbluhm newStackTop->prev = stackTop; 75bd8f1dc3Sbluhm return newStackTop; 76bd8f1dc3Sbluhm } 77bd8f1dc3Sbluhm 78bd8f1dc3Sbluhm static Stack * 79bd8f1dc3Sbluhm stackPopFree(Stack *stackTop) { 80bd8f1dc3Sbluhm Stack *const newStackTop = stackTop->prev; 81bd8f1dc3Sbluhm free(stackTop); 82bd8f1dc3Sbluhm return newStackTop; 83bd8f1dc3Sbluhm } 84bd8f1dc3Sbluhm 85bd8f1dc3Sbluhm static char * 86bd8f1dc3Sbluhm contentTypeName(enum XML_Content_Type contentType) { 87bd8f1dc3Sbluhm switch (contentType) { 88bd8f1dc3Sbluhm case XML_CTYPE_EMPTY: 89bd8f1dc3Sbluhm return "EMPTY"; 90bd8f1dc3Sbluhm case XML_CTYPE_ANY: 91bd8f1dc3Sbluhm return "ANY"; 92bd8f1dc3Sbluhm case XML_CTYPE_MIXED: 93bd8f1dc3Sbluhm return "MIXED"; 94bd8f1dc3Sbluhm case XML_CTYPE_NAME: 95bd8f1dc3Sbluhm return "NAME"; 96bd8f1dc3Sbluhm case XML_CTYPE_CHOICE: 97bd8f1dc3Sbluhm return "CHOICE"; 98bd8f1dc3Sbluhm case XML_CTYPE_SEQ: 99bd8f1dc3Sbluhm return "SEQ"; 100bd8f1dc3Sbluhm default: 101bd8f1dc3Sbluhm return "???"; 102bd8f1dc3Sbluhm } 103bd8f1dc3Sbluhm } 104bd8f1dc3Sbluhm 105bd8f1dc3Sbluhm static char * 106bd8f1dc3Sbluhm contentQuantName(enum XML_Content_Quant contentQuant) { 107bd8f1dc3Sbluhm switch (contentQuant) { 108bd8f1dc3Sbluhm case XML_CQUANT_NONE: 109bd8f1dc3Sbluhm return "NONE"; 110bd8f1dc3Sbluhm case XML_CQUANT_OPT: 111bd8f1dc3Sbluhm return "OPT"; 112bd8f1dc3Sbluhm case XML_CQUANT_REP: 113bd8f1dc3Sbluhm return "REP"; 114bd8f1dc3Sbluhm case XML_CQUANT_PLUS: 115bd8f1dc3Sbluhm return "PLUS"; 116bd8f1dc3Sbluhm default: 117bd8f1dc3Sbluhm return "???"; 118bd8f1dc3Sbluhm } 119bd8f1dc3Sbluhm } 120bd8f1dc3Sbluhm 121bd8f1dc3Sbluhm static void 122bd8f1dc3Sbluhm dumpContentModelElement(const XML_Content *model, unsigned level, 123bd8f1dc3Sbluhm const XML_Content *root) { 124bd8f1dc3Sbluhm // Indent 125bd8f1dc3Sbluhm unsigned u = 0; 126bd8f1dc3Sbluhm for (; u < level; u++) { 127bd8f1dc3Sbluhm printf(" "); 128bd8f1dc3Sbluhm } 129bd8f1dc3Sbluhm 130bd8f1dc3Sbluhm // Node 131*aa071e6eSbluhm printf("[%u] type=%s(%u), quant=%s(%u)", (unsigned)(model - root), 132*aa071e6eSbluhm contentTypeName(model->type), (unsigned int)model->type, 133*aa071e6eSbluhm contentQuantName(model->quant), (unsigned int)model->quant); 134bd8f1dc3Sbluhm if (model->name) { 135bd8f1dc3Sbluhm printf(", name=\"%" XML_FMT_STR "\"", model->name); 136bd8f1dc3Sbluhm } else { 137bd8f1dc3Sbluhm printf(", name=NULL"); 138bd8f1dc3Sbluhm } 139*aa071e6eSbluhm printf(", numchildren=%u", model->numchildren); 140bd8f1dc3Sbluhm printf("\n"); 141bd8f1dc3Sbluhm } 142bd8f1dc3Sbluhm 143bd8f1dc3Sbluhm static bool 144bd8f1dc3Sbluhm dumpContentModel(const XML_Char *name, const XML_Content *root) { 145bd8f1dc3Sbluhm printf("Element \"%" XML_FMT_STR "\":\n", name); 146bd8f1dc3Sbluhm Stack *stackTop = stackPushMalloc(NULL, root, 1); 147bd8f1dc3Sbluhm if (! stackTop) { 148bd8f1dc3Sbluhm return false; 149bd8f1dc3Sbluhm } 150bd8f1dc3Sbluhm 151bd8f1dc3Sbluhm while (stackTop) { 152bd8f1dc3Sbluhm const XML_Content *const model = stackTop->model; 153bd8f1dc3Sbluhm const unsigned level = stackTop->level; 154bd8f1dc3Sbluhm 155bd8f1dc3Sbluhm dumpContentModelElement(model, level, root); 156bd8f1dc3Sbluhm 157bd8f1dc3Sbluhm stackTop = stackPopFree(stackTop); 158bd8f1dc3Sbluhm 159bd8f1dc3Sbluhm for (size_t u = model->numchildren; u >= 1; u--) { 160bd8f1dc3Sbluhm Stack *const newStackTop 161bd8f1dc3Sbluhm = stackPushMalloc(stackTop, model->children + (u - 1), level + 1); 162bd8f1dc3Sbluhm if (! newStackTop) { 163bd8f1dc3Sbluhm // We ran out of memory, so let's free all memory allocated 164bd8f1dc3Sbluhm // earlier in this function, to be leak-clean: 165bd8f1dc3Sbluhm while (stackTop != NULL) { 166bd8f1dc3Sbluhm stackTop = stackPopFree(stackTop); 167bd8f1dc3Sbluhm } 168bd8f1dc3Sbluhm return false; 169bd8f1dc3Sbluhm } 170bd8f1dc3Sbluhm stackTop = newStackTop; 171bd8f1dc3Sbluhm } 172bd8f1dc3Sbluhm } 173bd8f1dc3Sbluhm 174bd8f1dc3Sbluhm printf("\n"); 175bd8f1dc3Sbluhm return true; 176bd8f1dc3Sbluhm } 177bd8f1dc3Sbluhm 178bd8f1dc3Sbluhm static void XMLCALL 179bd8f1dc3Sbluhm handleElementDeclaration(void *userData, const XML_Char *name, 180bd8f1dc3Sbluhm XML_Content *model) { 181bd8f1dc3Sbluhm XML_Parser parser = (XML_Parser)userData; 182bd8f1dc3Sbluhm const bool success = dumpContentModel(name, model); 183bd8f1dc3Sbluhm XML_FreeContentModel(parser, model); 184bd8f1dc3Sbluhm if (! success) { 185bd8f1dc3Sbluhm XML_StopParser(parser, /* resumable= */ XML_FALSE); 186bd8f1dc3Sbluhm } 187bd8f1dc3Sbluhm } 188bd8f1dc3Sbluhm 189bd8f1dc3Sbluhm int 190bd8f1dc3Sbluhm main(void) { 191bd8f1dc3Sbluhm XML_Parser parser = XML_ParserCreate(NULL); 192bd8f1dc3Sbluhm int done; 193bd8f1dc3Sbluhm 194bd8f1dc3Sbluhm if (! parser) { 195bd8f1dc3Sbluhm fprintf(stderr, "Couldn't allocate memory for parser\n"); 196bd8f1dc3Sbluhm return 1; 197bd8f1dc3Sbluhm } 198bd8f1dc3Sbluhm 199bd8f1dc3Sbluhm XML_SetUserData(parser, parser); 200bd8f1dc3Sbluhm XML_SetElementDeclHandler(parser, handleElementDeclaration); 201bd8f1dc3Sbluhm 202bd8f1dc3Sbluhm do { 203bd8f1dc3Sbluhm void *const buf = XML_GetBuffer(parser, BUFSIZ); 204bd8f1dc3Sbluhm if (! buf) { 205bd8f1dc3Sbluhm fprintf(stderr, "Couldn't allocate memory for buffer\n"); 206bd8f1dc3Sbluhm XML_ParserFree(parser); 207bd8f1dc3Sbluhm return 1; 208bd8f1dc3Sbluhm } 209bd8f1dc3Sbluhm 210bd8f1dc3Sbluhm const size_t len = fread(buf, 1, BUFSIZ, stdin); 211bd8f1dc3Sbluhm 212bd8f1dc3Sbluhm if (ferror(stdin)) { 213bd8f1dc3Sbluhm fprintf(stderr, "Read error\n"); 214bd8f1dc3Sbluhm XML_ParserFree(parser); 215bd8f1dc3Sbluhm return 1; 216bd8f1dc3Sbluhm } 217bd8f1dc3Sbluhm 218bd8f1dc3Sbluhm done = feof(stdin); 219bd8f1dc3Sbluhm 220bd8f1dc3Sbluhm if (XML_ParseBuffer(parser, (int)len, done) == XML_STATUS_ERROR) { 221bd8f1dc3Sbluhm enum XML_Error errorCode = XML_GetErrorCode(parser); 222bd8f1dc3Sbluhm if (errorCode == XML_ERROR_ABORTED) { 223bd8f1dc3Sbluhm errorCode = XML_ERROR_NO_MEMORY; 224bd8f1dc3Sbluhm } 225bd8f1dc3Sbluhm fprintf(stderr, 226bd8f1dc3Sbluhm "Parse error at line %" XML_FMT_INT_MOD "u:\n%" XML_FMT_STR "\n", 227bd8f1dc3Sbluhm XML_GetCurrentLineNumber(parser), XML_ErrorString(errorCode)); 228bd8f1dc3Sbluhm XML_ParserFree(parser); 229bd8f1dc3Sbluhm return 1; 230bd8f1dc3Sbluhm } 231bd8f1dc3Sbluhm } while (! done); 232bd8f1dc3Sbluhm 233bd8f1dc3Sbluhm XML_ParserFree(parser); 234bd8f1dc3Sbluhm return 0; 235bd8f1dc3Sbluhm } 236