xref: /openbsd-src/lib/libexpat/examples/element_declarations.c (revision aa071e6ed2e21e8e72a6aac46533908f2defbdef)
1bd8f1dc3Sbluhm /* Read an XML document from standard input and print
2bd8f1dc3Sbluhm    element declarations (if any) to standard output.
3bd8f1dc3Sbluhm    It must be used with Expat compiled for UTF-8 output.
4bd8f1dc3Sbluhm                             __  __            _
5bd8f1dc3Sbluhm                          ___\ \/ /_ __   __ _| |_
6bd8f1dc3Sbluhm                         / _ \\  /| '_ \ / _` | __|
7bd8f1dc3Sbluhm                        |  __//  \| |_) | (_| | |_
8bd8f1dc3Sbluhm                         \___/_/\_\ .__/ \__,_|\__|
9bd8f1dc3Sbluhm                                  |_| XML parser
10bd8f1dc3Sbluhm 
11bd8f1dc3Sbluhm    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
12bd8f1dc3Sbluhm    Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
13bd8f1dc3Sbluhm    Copyright (c) 2004-2006 Karl Waclawek <karl@waclawek.net>
14bd8f1dc3Sbluhm    Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
15bd8f1dc3Sbluhm    Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
16bd8f1dc3Sbluhm    Copyright (c) 2017      Rhodri James <rhodri@wildebeest.org.uk>
17bd8f1dc3Sbluhm    Copyright (c) 2019      Zhongyuan Zhou <zhouzhongyuan@huawei.com>
18*aa071e6eSbluhm    Copyright (c) 2024      Hanno Böck <hanno@gentoo.org>
19bd8f1dc3Sbluhm    Licensed under the MIT license:
20bd8f1dc3Sbluhm 
21bd8f1dc3Sbluhm    Permission is  hereby granted,  free of charge,  to any  person obtaining
22bd8f1dc3Sbluhm    a  copy  of  this  software   and  associated  documentation  files  (the
23bd8f1dc3Sbluhm    "Software"),  to  deal in  the  Software  without restriction,  including
24bd8f1dc3Sbluhm    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
25bd8f1dc3Sbluhm    distribute, sublicense, and/or sell copies of the Software, and to permit
26bd8f1dc3Sbluhm    persons  to whom  the Software  is  furnished to  do so,  subject to  the
27bd8f1dc3Sbluhm    following conditions:
28bd8f1dc3Sbluhm 
29bd8f1dc3Sbluhm    The above copyright  notice and this permission notice  shall be included
30bd8f1dc3Sbluhm    in all copies or substantial portions of the Software.
31bd8f1dc3Sbluhm 
32bd8f1dc3Sbluhm    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
33bd8f1dc3Sbluhm    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
34bd8f1dc3Sbluhm    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
35bd8f1dc3Sbluhm    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
36bd8f1dc3Sbluhm    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
37bd8f1dc3Sbluhm    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
38bd8f1dc3Sbluhm    USE OR OTHER DEALINGS IN THE SOFTWARE.
39bd8f1dc3Sbluhm */
40bd8f1dc3Sbluhm 
41bd8f1dc3Sbluhm #include <stdbool.h>
42bd8f1dc3Sbluhm #include <stdio.h>
43bd8f1dc3Sbluhm #include <stdlib.h>
44bd8f1dc3Sbluhm #include <expat.h>
45bd8f1dc3Sbluhm 
46bd8f1dc3Sbluhm #ifdef XML_LARGE_SIZE
47bd8f1dc3Sbluhm #  define XML_FMT_INT_MOD "ll"
48bd8f1dc3Sbluhm #else
49bd8f1dc3Sbluhm #  define XML_FMT_INT_MOD "l"
50bd8f1dc3Sbluhm #endif
51bd8f1dc3Sbluhm 
52bd8f1dc3Sbluhm #ifdef XML_UNICODE_WCHAR_T
53bd8f1dc3Sbluhm #  define XML_FMT_STR "ls"
54bd8f1dc3Sbluhm #else
55bd8f1dc3Sbluhm #  define XML_FMT_STR "s"
56bd8f1dc3Sbluhm #endif
57bd8f1dc3Sbluhm 
58bd8f1dc3Sbluhm // While traversing the XML_Content tree, we avoid recursion
59bd8f1dc3Sbluhm // to not be vulnerable to a denial of service attack.
60bd8f1dc3Sbluhm typedef struct StackStruct {
61bd8f1dc3Sbluhm   const XML_Content *model;
62bd8f1dc3Sbluhm   unsigned level;
63bd8f1dc3Sbluhm   struct StackStruct *prev;
64bd8f1dc3Sbluhm } Stack;
65bd8f1dc3Sbluhm 
66bd8f1dc3Sbluhm static Stack *
67bd8f1dc3Sbluhm stackPushMalloc(Stack *stackTop, const XML_Content *model, unsigned level) {
68bd8f1dc3Sbluhm   Stack *const newStackTop = malloc(sizeof(Stack));
69bd8f1dc3Sbluhm   if (! newStackTop) {
70bd8f1dc3Sbluhm     return NULL;
71bd8f1dc3Sbluhm   }
72bd8f1dc3Sbluhm   newStackTop->model = model;
73bd8f1dc3Sbluhm   newStackTop->level = level;
74bd8f1dc3Sbluhm   newStackTop->prev = stackTop;
75bd8f1dc3Sbluhm   return newStackTop;
76bd8f1dc3Sbluhm }
77bd8f1dc3Sbluhm 
78bd8f1dc3Sbluhm static Stack *
79bd8f1dc3Sbluhm stackPopFree(Stack *stackTop) {
80bd8f1dc3Sbluhm   Stack *const newStackTop = stackTop->prev;
81bd8f1dc3Sbluhm   free(stackTop);
82bd8f1dc3Sbluhm   return newStackTop;
83bd8f1dc3Sbluhm }
84bd8f1dc3Sbluhm 
85bd8f1dc3Sbluhm static char *
86bd8f1dc3Sbluhm contentTypeName(enum XML_Content_Type contentType) {
87bd8f1dc3Sbluhm   switch (contentType) {
88bd8f1dc3Sbluhm   case XML_CTYPE_EMPTY:
89bd8f1dc3Sbluhm     return "EMPTY";
90bd8f1dc3Sbluhm   case XML_CTYPE_ANY:
91bd8f1dc3Sbluhm     return "ANY";
92bd8f1dc3Sbluhm   case XML_CTYPE_MIXED:
93bd8f1dc3Sbluhm     return "MIXED";
94bd8f1dc3Sbluhm   case XML_CTYPE_NAME:
95bd8f1dc3Sbluhm     return "NAME";
96bd8f1dc3Sbluhm   case XML_CTYPE_CHOICE:
97bd8f1dc3Sbluhm     return "CHOICE";
98bd8f1dc3Sbluhm   case XML_CTYPE_SEQ:
99bd8f1dc3Sbluhm     return "SEQ";
100bd8f1dc3Sbluhm   default:
101bd8f1dc3Sbluhm     return "???";
102bd8f1dc3Sbluhm   }
103bd8f1dc3Sbluhm }
104bd8f1dc3Sbluhm 
105bd8f1dc3Sbluhm static char *
106bd8f1dc3Sbluhm contentQuantName(enum XML_Content_Quant contentQuant) {
107bd8f1dc3Sbluhm   switch (contentQuant) {
108bd8f1dc3Sbluhm   case XML_CQUANT_NONE:
109bd8f1dc3Sbluhm     return "NONE";
110bd8f1dc3Sbluhm   case XML_CQUANT_OPT:
111bd8f1dc3Sbluhm     return "OPT";
112bd8f1dc3Sbluhm   case XML_CQUANT_REP:
113bd8f1dc3Sbluhm     return "REP";
114bd8f1dc3Sbluhm   case XML_CQUANT_PLUS:
115bd8f1dc3Sbluhm     return "PLUS";
116bd8f1dc3Sbluhm   default:
117bd8f1dc3Sbluhm     return "???";
118bd8f1dc3Sbluhm   }
119bd8f1dc3Sbluhm }
120bd8f1dc3Sbluhm 
121bd8f1dc3Sbluhm static void
122bd8f1dc3Sbluhm dumpContentModelElement(const XML_Content *model, unsigned level,
123bd8f1dc3Sbluhm                         const XML_Content *root) {
124bd8f1dc3Sbluhm   // Indent
125bd8f1dc3Sbluhm   unsigned u = 0;
126bd8f1dc3Sbluhm   for (; u < level; u++) {
127bd8f1dc3Sbluhm     printf("  ");
128bd8f1dc3Sbluhm   }
129bd8f1dc3Sbluhm 
130bd8f1dc3Sbluhm   // Node
131*aa071e6eSbluhm   printf("[%u] type=%s(%u), quant=%s(%u)", (unsigned)(model - root),
132*aa071e6eSbluhm          contentTypeName(model->type), (unsigned int)model->type,
133*aa071e6eSbluhm          contentQuantName(model->quant), (unsigned int)model->quant);
134bd8f1dc3Sbluhm   if (model->name) {
135bd8f1dc3Sbluhm     printf(", name=\"%" XML_FMT_STR "\"", model->name);
136bd8f1dc3Sbluhm   } else {
137bd8f1dc3Sbluhm     printf(", name=NULL");
138bd8f1dc3Sbluhm   }
139*aa071e6eSbluhm   printf(", numchildren=%u", model->numchildren);
140bd8f1dc3Sbluhm   printf("\n");
141bd8f1dc3Sbluhm }
142bd8f1dc3Sbluhm 
143bd8f1dc3Sbluhm static bool
144bd8f1dc3Sbluhm dumpContentModel(const XML_Char *name, const XML_Content *root) {
145bd8f1dc3Sbluhm   printf("Element \"%" XML_FMT_STR "\":\n", name);
146bd8f1dc3Sbluhm   Stack *stackTop = stackPushMalloc(NULL, root, 1);
147bd8f1dc3Sbluhm   if (! stackTop) {
148bd8f1dc3Sbluhm     return false;
149bd8f1dc3Sbluhm   }
150bd8f1dc3Sbluhm 
151bd8f1dc3Sbluhm   while (stackTop) {
152bd8f1dc3Sbluhm     const XML_Content *const model = stackTop->model;
153bd8f1dc3Sbluhm     const unsigned level = stackTop->level;
154bd8f1dc3Sbluhm 
155bd8f1dc3Sbluhm     dumpContentModelElement(model, level, root);
156bd8f1dc3Sbluhm 
157bd8f1dc3Sbluhm     stackTop = stackPopFree(stackTop);
158bd8f1dc3Sbluhm 
159bd8f1dc3Sbluhm     for (size_t u = model->numchildren; u >= 1; u--) {
160bd8f1dc3Sbluhm       Stack *const newStackTop
161bd8f1dc3Sbluhm           = stackPushMalloc(stackTop, model->children + (u - 1), level + 1);
162bd8f1dc3Sbluhm       if (! newStackTop) {
163bd8f1dc3Sbluhm         // We ran out of memory, so let's free all memory allocated
164bd8f1dc3Sbluhm         // earlier in this function, to be leak-clean:
165bd8f1dc3Sbluhm         while (stackTop != NULL) {
166bd8f1dc3Sbluhm           stackTop = stackPopFree(stackTop);
167bd8f1dc3Sbluhm         }
168bd8f1dc3Sbluhm         return false;
169bd8f1dc3Sbluhm       }
170bd8f1dc3Sbluhm       stackTop = newStackTop;
171bd8f1dc3Sbluhm     }
172bd8f1dc3Sbluhm   }
173bd8f1dc3Sbluhm 
174bd8f1dc3Sbluhm   printf("\n");
175bd8f1dc3Sbluhm   return true;
176bd8f1dc3Sbluhm }
177bd8f1dc3Sbluhm 
178bd8f1dc3Sbluhm static void XMLCALL
179bd8f1dc3Sbluhm handleElementDeclaration(void *userData, const XML_Char *name,
180bd8f1dc3Sbluhm                          XML_Content *model) {
181bd8f1dc3Sbluhm   XML_Parser parser = (XML_Parser)userData;
182bd8f1dc3Sbluhm   const bool success = dumpContentModel(name, model);
183bd8f1dc3Sbluhm   XML_FreeContentModel(parser, model);
184bd8f1dc3Sbluhm   if (! success) {
185bd8f1dc3Sbluhm     XML_StopParser(parser, /* resumable= */ XML_FALSE);
186bd8f1dc3Sbluhm   }
187bd8f1dc3Sbluhm }
188bd8f1dc3Sbluhm 
189bd8f1dc3Sbluhm int
190bd8f1dc3Sbluhm main(void) {
191bd8f1dc3Sbluhm   XML_Parser parser = XML_ParserCreate(NULL);
192bd8f1dc3Sbluhm   int done;
193bd8f1dc3Sbluhm 
194bd8f1dc3Sbluhm   if (! parser) {
195bd8f1dc3Sbluhm     fprintf(stderr, "Couldn't allocate memory for parser\n");
196bd8f1dc3Sbluhm     return 1;
197bd8f1dc3Sbluhm   }
198bd8f1dc3Sbluhm 
199bd8f1dc3Sbluhm   XML_SetUserData(parser, parser);
200bd8f1dc3Sbluhm   XML_SetElementDeclHandler(parser, handleElementDeclaration);
201bd8f1dc3Sbluhm 
202bd8f1dc3Sbluhm   do {
203bd8f1dc3Sbluhm     void *const buf = XML_GetBuffer(parser, BUFSIZ);
204bd8f1dc3Sbluhm     if (! buf) {
205bd8f1dc3Sbluhm       fprintf(stderr, "Couldn't allocate memory for buffer\n");
206bd8f1dc3Sbluhm       XML_ParserFree(parser);
207bd8f1dc3Sbluhm       return 1;
208bd8f1dc3Sbluhm     }
209bd8f1dc3Sbluhm 
210bd8f1dc3Sbluhm     const size_t len = fread(buf, 1, BUFSIZ, stdin);
211bd8f1dc3Sbluhm 
212bd8f1dc3Sbluhm     if (ferror(stdin)) {
213bd8f1dc3Sbluhm       fprintf(stderr, "Read error\n");
214bd8f1dc3Sbluhm       XML_ParserFree(parser);
215bd8f1dc3Sbluhm       return 1;
216bd8f1dc3Sbluhm     }
217bd8f1dc3Sbluhm 
218bd8f1dc3Sbluhm     done = feof(stdin);
219bd8f1dc3Sbluhm 
220bd8f1dc3Sbluhm     if (XML_ParseBuffer(parser, (int)len, done) == XML_STATUS_ERROR) {
221bd8f1dc3Sbluhm       enum XML_Error errorCode = XML_GetErrorCode(parser);
222bd8f1dc3Sbluhm       if (errorCode == XML_ERROR_ABORTED) {
223bd8f1dc3Sbluhm         errorCode = XML_ERROR_NO_MEMORY;
224bd8f1dc3Sbluhm       }
225bd8f1dc3Sbluhm       fprintf(stderr,
226bd8f1dc3Sbluhm               "Parse error at line %" XML_FMT_INT_MOD "u:\n%" XML_FMT_STR "\n",
227bd8f1dc3Sbluhm               XML_GetCurrentLineNumber(parser), XML_ErrorString(errorCode));
228bd8f1dc3Sbluhm       XML_ParserFree(parser);
229bd8f1dc3Sbluhm       return 1;
230bd8f1dc3Sbluhm     }
231bd8f1dc3Sbluhm   } while (! done);
232bd8f1dc3Sbluhm 
233bd8f1dc3Sbluhm   XML_ParserFree(parser);
234bd8f1dc3Sbluhm   return 0;
235bd8f1dc3Sbluhm }
236