1 /* a30d2613dcfdef81475a9d1a349134d2d42722172fdaa7d5bb12ed2aa74b9596 (2.4.6+)
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
11 Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12 Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net>
13 Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
14 Copyright (c) 2005-2009 Steven Solie <steven@solie.ca>
15 Copyright (c) 2016 Eric Rahm <erahm@mozilla.com>
16 Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org>
17 Copyright (c) 2016 Gaurav <g.gupta@samsung.com>
18 Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de>
19 Copyright (c) 2016 Gustavo Grieco <gustavo.grieco@imag.fr>
20 Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com>
21 Copyright (c) 2016 Ed Schouten <ed@nuxi.nl>
22 Copyright (c) 2017-2018 Rhodri James <rhodri@wildebeest.org.uk>
23 Copyright (c) 2017 Václav Slavík <vaclav@slavik.io>
24 Copyright (c) 2017 Viktor Szakats <commit@vsz.me>
25 Copyright (c) 2017 Chanho Park <chanho61.park@samsung.com>
26 Copyright (c) 2017 Rolf Eike Beer <eike@sf-mail.de>
27 Copyright (c) 2017 Hans Wennborg <hans@chromium.org>
28 Copyright (c) 2018 Anton Maklakov <antmak.pub@gmail.com>
29 Copyright (c) 2018 Benjamin Peterson <benjamin@python.org>
30 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it>
31 Copyright (c) 2018 Mariusz Zaborski <oshogbo@vexillium.org>
32 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
33 Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org>
34 Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org>
35 Copyright (c) 2021 Dong-hee Na <donghee.na@python.org>
36 Copyright (c) 2022 Samanta Navarro <ferivoz@riseup.net>
37 Licensed under the MIT license:
38
39 Permission is hereby granted, free of charge, to any person obtaining
40 a copy of this software and associated documentation files (the
41 "Software"), to deal in the Software without restriction, including
42 without limitation the rights to use, copy, modify, merge, publish,
43 distribute, sublicense, and/or sell copies of the Software, and to permit
44 persons to whom the Software is furnished to do so, subject to the
45 following conditions:
46
47 The above copyright notice and this permission notice shall be included
48 in all copies or substantial portions of the Software.
49
50 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
51 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
52 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
53 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
54 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
55 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
56 USE OR OTHER DEALINGS IN THE SOFTWARE.
57 */
58
59 #define XML_BUILDING_EXPAT 1
60
61 #include <expat_config.h>
62
63 #if ! defined(_GNU_SOURCE)
64 # define _GNU_SOURCE 1 /* syscall prototype */
65 #endif
66
67 #ifdef _WIN32
68 /* force stdlib to define rand_s() */
69 # if ! defined(_CRT_RAND_S)
70 # define _CRT_RAND_S
71 # endif
72 #endif
73
74 #include <stddef.h>
75 #include <string.h> /* memset(), memcpy() */
76 #include <assert.h>
77 #include <limits.h> /* UINT_MAX */
78 #include <stdio.h> /* fprintf */
79 #include <stdlib.h> /* getenv, rand_s */
80 #include <stdint.h> /* uintptr_t */
81 #include <math.h> /* isnan */
82
83 #ifdef _WIN32
84 # define getpid GetCurrentProcessId
85 #else
86 # include <sys/time.h> /* gettimeofday() */
87 # include <sys/types.h> /* getpid() */
88 # include <unistd.h> /* getpid() */
89 # include <fcntl.h> /* O_RDONLY */
90 # include <errno.h>
91 #endif
92
93 #ifdef _WIN32
94 # include "winconfig.h"
95 #endif
96
97 #include "ascii.h"
98 #include "expat.h"
99 #include "siphash.h"
100
101 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
102 # if defined(HAVE_GETRANDOM)
103 # include <sys/random.h> /* getrandom */
104 # else
105 # include <unistd.h> /* syscall */
106 # include <sys/syscall.h> /* SYS_getrandom */
107 # endif
108 # if ! defined(GRND_NONBLOCK)
109 # define GRND_NONBLOCK 0x0001
110 # endif /* defined(GRND_NONBLOCK) */
111 #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
112
113 #if defined(HAVE_LIBBSD) \
114 && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
115 # include <bsd/stdlib.h>
116 #endif
117
118 #if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
119 # define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
120 #endif
121
122 #if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \
123 && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \
124 && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32) \
125 && ! defined(XML_POOR_ENTROPY)
126 # error You do not have support for any sources of high quality entropy \
127 enabled. For end user security, that is probably not what you want. \
128 \
129 Your options include: \
130 * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
131 * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
132 * BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
133 * BSD / macOS (including <10.7) (arc4random): HAVE_ARC4RANDOM, \
134 * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
135 * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
136 * Linux (including <3.17) / BSD / macOS (including <10.7) (/dev/urandom): XML_DEV_URANDOM, \
137 * Windows >=Vista (rand_s): _WIN32. \
138 \
139 If insist on not using any of these, bypass this error by defining \
140 XML_POOR_ENTROPY; you have been warned. \
141 \
142 If you have reasons to patch this detection code away or need changes \
143 to the build system, please open a bug. Thank you!
144 #endif
145
146 #ifdef XML_UNICODE
147 # define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
148 # define XmlConvert XmlUtf16Convert
149 # define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
150 # define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
151 # define XmlEncode XmlUtf16Encode
152 # define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1))
153 typedef unsigned short ICHAR;
154 #else
155 # define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
156 # define XmlConvert XmlUtf8Convert
157 # define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
158 # define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
159 # define XmlEncode XmlUtf8Encode
160 # define MUST_CONVERT(enc, s) (! (enc)->isUtf8)
161 typedef char ICHAR;
162 #endif
163
164 #ifndef XML_NS
165
166 # define XmlInitEncodingNS XmlInitEncoding
167 # define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
168 # undef XmlGetInternalEncodingNS
169 # define XmlGetInternalEncodingNS XmlGetInternalEncoding
170 # define XmlParseXmlDeclNS XmlParseXmlDecl
171
172 #endif
173
174 #ifdef XML_UNICODE
175
176 # ifdef XML_UNICODE_WCHAR_T
177 # define XML_T(x) (const wchar_t) x
178 # define XML_L(x) L##x
179 # else
180 # define XML_T(x) (const unsigned short)x
181 # define XML_L(x) x
182 # endif
183
184 #else
185
186 # define XML_T(x) x
187 # define XML_L(x) x
188
189 #endif
190
191 /* Round up n to be a multiple of sz, where sz is a power of 2. */
192 #define ROUND_UP(n, sz) (((n) + ((sz)-1)) & ~((sz)-1))
193
194 /* Do safe (NULL-aware) pointer arithmetic */
195 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
196
197 #include "internal.h"
198 #include "xmltok.h"
199 #include "xmlrole.h"
200
201 typedef const XML_Char *KEY;
202
203 typedef struct {
204 KEY name;
205 } NAMED;
206
207 typedef struct {
208 NAMED **v;
209 unsigned char power;
210 size_t size;
211 size_t used;
212 const XML_Memory_Handling_Suite *mem;
213 } HASH_TABLE;
214
215 static size_t keylen(KEY s);
216
217 static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key);
218
219 /* For probing (after a collision) we need a step size relative prime
220 to the hash table size, which is a power of 2. We use double-hashing,
221 since we can calculate a second hash value cheaply by taking those bits
222 of the first hash value that were discarded (masked out) when the table
223 index was calculated: index = hash & mask, where mask = table->size - 1.
224 We limit the maximum step size to table->size / 4 (mask >> 2) and make
225 it odd, since odd numbers are always relative prime to a power of 2.
226 */
227 #define SECOND_HASH(hash, mask, power) \
228 ((((hash) & ~(mask)) >> ((power)-1)) & ((mask) >> 2))
229 #define PROBE_STEP(hash, mask, power) \
230 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
231
232 typedef struct {
233 NAMED **p;
234 NAMED **end;
235 } HASH_TABLE_ITER;
236
237 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
238 #define INIT_DATA_BUF_SIZE 1024
239 #define INIT_ATTS_SIZE 16
240 #define INIT_ATTS_VERSION 0xFFFFFFFF
241 #define INIT_BLOCK_SIZE 1024
242 #define INIT_BUFFER_SIZE 1024
243
244 #define EXPAND_SPARE 24
245
246 typedef struct binding {
247 struct prefix *prefix;
248 struct binding *nextTagBinding;
249 struct binding *prevPrefixBinding;
250 const struct attribute_id *attId;
251 XML_Char *uri;
252 int uriLen;
253 int uriAlloc;
254 } BINDING;
255
256 typedef struct prefix {
257 const XML_Char *name;
258 BINDING *binding;
259 } PREFIX;
260
261 typedef struct {
262 const XML_Char *str;
263 const XML_Char *localPart;
264 const XML_Char *prefix;
265 int strLen;
266 int uriLen;
267 int prefixLen;
268 } TAG_NAME;
269
270 /* TAG represents an open element.
271 The name of the element is stored in both the document and API
272 encodings. The memory buffer 'buf' is a separately-allocated
273 memory area which stores the name. During the XML_Parse()/
274 XMLParseBuffer() when the element is open, the memory for the 'raw'
275 version of the name (in the document encoding) is shared with the
276 document buffer. If the element is open across calls to
277 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
278 contain the 'raw' name as well.
279
280 A parser re-uses these structures, maintaining a list of allocated
281 TAG objects in a free list.
282 */
283 typedef struct tag {
284 struct tag *parent; /* parent of this element */
285 const char *rawName; /* tagName in the original encoding */
286 int rawNameLength;
287 TAG_NAME name; /* tagName in the API encoding */
288 char *buf; /* buffer for name components */
289 char *bufEnd; /* end of the buffer */
290 BINDING *bindings;
291 } TAG;
292
293 typedef struct {
294 const XML_Char *name;
295 const XML_Char *textPtr;
296 int textLen; /* length in XML_Chars */
297 int processed; /* # of processed bytes - when suspended */
298 const XML_Char *systemId;
299 const XML_Char *base;
300 const XML_Char *publicId;
301 const XML_Char *notation;
302 XML_Bool open;
303 XML_Bool is_param;
304 XML_Bool is_internal; /* true if declared in internal subset outside PE */
305 } ENTITY;
306
307 typedef struct {
308 enum XML_Content_Type type;
309 enum XML_Content_Quant quant;
310 const XML_Char *name;
311 int firstchild;
312 int lastchild;
313 int childcnt;
314 int nextsib;
315 } CONTENT_SCAFFOLD;
316
317 #define INIT_SCAFFOLD_ELEMENTS 32
318
319 typedef struct block {
320 struct block *next;
321 int size;
322 XML_Char s[1];
323 } BLOCK;
324
325 typedef struct {
326 BLOCK *blocks;
327 BLOCK *freeBlocks;
328 const XML_Char *end;
329 XML_Char *ptr;
330 XML_Char *start;
331 const XML_Memory_Handling_Suite *mem;
332 } STRING_POOL;
333
334 /* The XML_Char before the name is used to determine whether
335 an attribute has been specified. */
336 typedef struct attribute_id {
337 XML_Char *name;
338 PREFIX *prefix;
339 XML_Bool maybeTokenized;
340 XML_Bool xmlns;
341 } ATTRIBUTE_ID;
342
343 typedef struct {
344 const ATTRIBUTE_ID *id;
345 XML_Bool isCdata;
346 const XML_Char *value;
347 } DEFAULT_ATTRIBUTE;
348
349 typedef struct {
350 unsigned long version;
351 unsigned long hash;
352 const XML_Char *uriName;
353 } NS_ATT;
354
355 typedef struct {
356 const XML_Char *name;
357 PREFIX *prefix;
358 const ATTRIBUTE_ID *idAtt;
359 int nDefaultAtts;
360 int allocDefaultAtts;
361 DEFAULT_ATTRIBUTE *defaultAtts;
362 } ELEMENT_TYPE;
363
364 typedef struct {
365 HASH_TABLE generalEntities;
366 HASH_TABLE elementTypes;
367 HASH_TABLE attributeIds;
368 HASH_TABLE prefixes;
369 STRING_POOL pool;
370 STRING_POOL entityValuePool;
371 /* false once a parameter entity reference has been skipped */
372 XML_Bool keepProcessing;
373 /* true once an internal or external PE reference has been encountered;
374 this includes the reference to an external subset */
375 XML_Bool hasParamEntityRefs;
376 XML_Bool standalone;
377 #ifdef XML_DTD
378 /* indicates if external PE has been read */
379 XML_Bool paramEntityRead;
380 HASH_TABLE paramEntities;
381 #endif /* XML_DTD */
382 PREFIX defaultPrefix;
383 /* === scaffolding for building content model === */
384 XML_Bool in_eldecl;
385 CONTENT_SCAFFOLD *scaffold;
386 unsigned contentStringLen;
387 unsigned scaffSize;
388 unsigned scaffCount;
389 int scaffLevel;
390 int *scaffIndex;
391 } DTD;
392
393 typedef struct open_internal_entity {
394 const char *internalEventPtr;
395 const char *internalEventEndPtr;
396 struct open_internal_entity *next;
397 ENTITY *entity;
398 int startTagLevel;
399 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
400 } OPEN_INTERNAL_ENTITY;
401
402 enum XML_Account {
403 XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */
404 XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity
405 expansion */
406 XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */
407 };
408
409 #ifdef XML_DTD
410 typedef unsigned long long XmlBigCount;
411 typedef struct accounting {
412 XmlBigCount countBytesDirect;
413 XmlBigCount countBytesIndirect;
414 int debugLevel;
415 float maximumAmplificationFactor; // >=1.0
416 unsigned long long activationThresholdBytes;
417 } ACCOUNTING;
418
419 typedef struct entity_stats {
420 unsigned int countEverOpened;
421 unsigned int currentDepth;
422 unsigned int maximumDepthSeen;
423 int debugLevel;
424 } ENTITY_STATS;
425 #endif /* XML_DTD */
426
427 typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start,
428 const char *end, const char **endPtr);
429
430 static Processor prologProcessor;
431 static Processor prologInitProcessor;
432 static Processor contentProcessor;
433 static Processor cdataSectionProcessor;
434 #ifdef XML_DTD
435 static Processor ignoreSectionProcessor;
436 static Processor externalParEntProcessor;
437 static Processor externalParEntInitProcessor;
438 static Processor entityValueProcessor;
439 static Processor entityValueInitProcessor;
440 #endif /* XML_DTD */
441 static Processor epilogProcessor;
442 static Processor errorProcessor;
443 static Processor externalEntityInitProcessor;
444 static Processor externalEntityInitProcessor2;
445 static Processor externalEntityInitProcessor3;
446 static Processor externalEntityContentProcessor;
447 static Processor internalEntityProcessor;
448
449 static enum XML_Error handleUnknownEncoding(XML_Parser parser,
450 const XML_Char *encodingName);
451 static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
452 const char *s, const char *next);
453 static enum XML_Error initializeEncoding(XML_Parser parser);
454 static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc,
455 const char *s, const char *end, int tok,
456 const char *next, const char **nextPtr,
457 XML_Bool haveMore, XML_Bool allowClosingDoctype,
458 enum XML_Account account);
459 static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity,
460 XML_Bool betweenDecl);
461 static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
462 const ENCODING *enc, const char *start,
463 const char *end, const char **endPtr,
464 XML_Bool haveMore, enum XML_Account account);
465 static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *,
466 const char **startPtr, const char *end,
467 const char **nextPtr, XML_Bool haveMore,
468 enum XML_Account account);
469 #ifdef XML_DTD
470 static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *,
471 const char **startPtr, const char *end,
472 const char **nextPtr, XML_Bool haveMore);
473 #endif /* XML_DTD */
474
475 static void freeBindings(XML_Parser parser, BINDING *bindings);
476 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *,
477 const char *s, TAG_NAME *tagNamePtr,
478 BINDING **bindingsPtr,
479 enum XML_Account account);
480 static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix,
481 const ATTRIBUTE_ID *attId, const XML_Char *uri,
482 BINDING **bindingsPtr);
483 static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata,
484 XML_Bool isId, const XML_Char *dfltValue,
485 XML_Parser parser);
486 static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *,
487 XML_Bool isCdata, const char *,
488 const char *, STRING_POOL *,
489 enum XML_Account account);
490 static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *,
491 XML_Bool isCdata, const char *,
492 const char *, STRING_POOL *,
493 enum XML_Account account);
494 static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
495 const char *start, const char *end);
496 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
497 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
498 const char *start, const char *end,
499 enum XML_Account account);
500 static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
501 const char *start, const char *end);
502 static int reportComment(XML_Parser parser, const ENCODING *enc,
503 const char *start, const char *end);
504 static void reportDefault(XML_Parser parser, const ENCODING *enc,
505 const char *start, const char *end);
506
507 static const XML_Char *getContext(XML_Parser parser);
508 static XML_Bool setContext(XML_Parser parser, const XML_Char *context);
509
510 static void FASTCALL normalizePublicId(XML_Char *s);
511
512 static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms);
513 /* do not call if m_parentParser != NULL */
514 static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
515 static void dtdDestroy(DTD *p, XML_Bool isDocEntity,
516 const XML_Memory_Handling_Suite *ms);
517 static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
518 const XML_Memory_Handling_Suite *ms);
519 static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *, STRING_POOL *,
520 const HASH_TABLE *);
521 static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name,
522 size_t createSize);
523 static void FASTCALL hashTableInit(HASH_TABLE *,
524 const XML_Memory_Handling_Suite *ms);
525 static void FASTCALL hashTableClear(HASH_TABLE *);
526 static void FASTCALL hashTableDestroy(HASH_TABLE *);
527 static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *);
528 static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *);
529
530 static void FASTCALL poolInit(STRING_POOL *,
531 const XML_Memory_Handling_Suite *ms);
532 static void FASTCALL poolClear(STRING_POOL *);
533 static void FASTCALL poolDestroy(STRING_POOL *);
534 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
535 const char *ptr, const char *end);
536 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
537 const char *ptr, const char *end);
538 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
539 static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool,
540 const XML_Char *s);
541 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s,
542 int n);
543 static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool,
544 const XML_Char *s);
545
546 static int FASTCALL nextScaffoldPart(XML_Parser parser);
547 static XML_Content *build_model(XML_Parser parser);
548 static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc,
549 const char *ptr, const char *end);
550
551 static XML_Char *copyString(const XML_Char *s,
552 const XML_Memory_Handling_Suite *memsuite);
553
554 static unsigned long generate_hash_secret_salt(XML_Parser parser);
555 static XML_Bool startParsing(XML_Parser parser);
556
557 static XML_Parser parserCreate(const XML_Char *encodingName,
558 const XML_Memory_Handling_Suite *memsuite,
559 const XML_Char *nameSep, DTD *dtd);
560
561 static void parserInit(XML_Parser parser, const XML_Char *encodingName);
562
563 #ifdef XML_DTD
564 static float accountingGetCurrentAmplification(XML_Parser rootParser);
565 static void accountingReportStats(XML_Parser originParser, const char *epilog);
566 static void accountingOnAbort(XML_Parser originParser);
567 static void accountingReportDiff(XML_Parser rootParser,
568 unsigned int levelsAwayFromRootParser,
569 const char *before, const char *after,
570 ptrdiff_t bytesMore, int source_line,
571 enum XML_Account account);
572 static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok,
573 const char *before, const char *after,
574 int source_line,
575 enum XML_Account account);
576
577 static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity,
578 const char *action, int sourceLine);
579 static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity,
580 int sourceLine);
581 static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity,
582 int sourceLine);
583
584 static XML_Parser getRootParserOf(XML_Parser parser,
585 unsigned int *outLevelDiff);
586 #endif /* XML_DTD */
587
588 static unsigned long getDebugLevel(const char *variableName,
589 unsigned long defaultDebugLevel);
590
591 #define poolStart(pool) ((pool)->start)
592 #define poolEnd(pool) ((pool)->ptr)
593 #define poolLength(pool) ((pool)->ptr - (pool)->start)
594 #define poolChop(pool) ((void)--(pool->ptr))
595 #define poolLastChar(pool) (((pool)->ptr)[-1])
596 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
597 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
598 #define poolAppendChar(pool, c) \
599 (((pool)->ptr == (pool)->end && ! poolGrow(pool)) \
600 ? 0 \
601 : ((*((pool)->ptr)++ = c), 1))
602
603 struct XML_ParserStruct {
604 /* The first member must be m_userData so that the XML_GetUserData
605 macro works. */
606 void *m_userData;
607 void *m_handlerArg;
608 char *m_buffer;
609 const XML_Memory_Handling_Suite m_mem;
610 /* first character to be parsed */
611 const char *m_bufferPtr;
612 /* past last character to be parsed */
613 char *m_bufferEnd;
614 /* allocated end of m_buffer */
615 const char *m_bufferLim;
616 XML_Index m_parseEndByteIndex;
617 const char *m_parseEndPtr;
618 XML_Char *m_dataBuf;
619 XML_Char *m_dataBufEnd;
620 XML_StartElementHandler m_startElementHandler;
621 XML_EndElementHandler m_endElementHandler;
622 XML_CharacterDataHandler m_characterDataHandler;
623 XML_ProcessingInstructionHandler m_processingInstructionHandler;
624 XML_CommentHandler m_commentHandler;
625 XML_StartCdataSectionHandler m_startCdataSectionHandler;
626 XML_EndCdataSectionHandler m_endCdataSectionHandler;
627 XML_DefaultHandler m_defaultHandler;
628 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
629 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
630 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
631 XML_NotationDeclHandler m_notationDeclHandler;
632 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
633 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
634 XML_NotStandaloneHandler m_notStandaloneHandler;
635 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
636 XML_Parser m_externalEntityRefHandlerArg;
637 XML_SkippedEntityHandler m_skippedEntityHandler;
638 XML_UnknownEncodingHandler m_unknownEncodingHandler;
639 XML_ElementDeclHandler m_elementDeclHandler;
640 XML_AttlistDeclHandler m_attlistDeclHandler;
641 XML_EntityDeclHandler m_entityDeclHandler;
642 XML_XmlDeclHandler m_xmlDeclHandler;
643 const ENCODING *m_encoding;
644 INIT_ENCODING m_initEncoding;
645 const ENCODING *m_internalEncoding;
646 const XML_Char *m_protocolEncodingName;
647 XML_Bool m_ns;
648 XML_Bool m_ns_triplets;
649 void *m_unknownEncodingMem;
650 void *m_unknownEncodingData;
651 void *m_unknownEncodingHandlerData;
652 void(XMLCALL *m_unknownEncodingRelease)(void *);
653 PROLOG_STATE m_prologState;
654 Processor *m_processor;
655 enum XML_Error m_errorCode;
656 const char *m_eventPtr;
657 const char *m_eventEndPtr;
658 const char *m_positionPtr;
659 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
660 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
661 XML_Bool m_defaultExpandInternalEntities;
662 int m_tagLevel;
663 ENTITY *m_declEntity;
664 const XML_Char *m_doctypeName;
665 const XML_Char *m_doctypeSysid;
666 const XML_Char *m_doctypePubid;
667 const XML_Char *m_declAttributeType;
668 const XML_Char *m_declNotationName;
669 const XML_Char *m_declNotationPublicId;
670 ELEMENT_TYPE *m_declElementType;
671 ATTRIBUTE_ID *m_declAttributeId;
672 XML_Bool m_declAttributeIsCdata;
673 XML_Bool m_declAttributeIsId;
674 DTD *m_dtd;
675 const XML_Char *m_curBase;
676 TAG *m_tagStack;
677 TAG *m_freeTagList;
678 BINDING *m_inheritedBindings;
679 BINDING *m_freeBindingList;
680 int m_attsSize;
681 int m_nSpecifiedAtts;
682 int m_idAttIndex;
683 ATTRIBUTE *m_atts;
684 NS_ATT *m_nsAtts;
685 unsigned long m_nsAttsVersion;
686 unsigned char m_nsAttsPower;
687 #ifdef XML_ATTR_INFO
688 XML_AttrInfo *m_attInfo;
689 #endif
690 POSITION m_position;
691 STRING_POOL m_tempPool;
692 STRING_POOL m_temp2Pool;
693 char *m_groupConnector;
694 unsigned int m_groupSize;
695 XML_Char m_namespaceSeparator;
696 XML_Parser m_parentParser;
697 XML_ParsingStatus m_parsingStatus;
698 #ifdef XML_DTD
699 XML_Bool m_isParamEntity;
700 XML_Bool m_useForeignDTD;
701 enum XML_ParamEntityParsing m_paramEntityParsing;
702 #endif
703 unsigned long m_hash_secret_salt;
704 #ifdef XML_DTD
705 ACCOUNTING m_accounting;
706 ENTITY_STATS m_entity_stats;
707 #endif
708 };
709
710 #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
711 #define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s)))
712 #define FREE(parser, p) (parser->m_mem.free_fcn((p)))
713
714 XML_Parser XMLCALL
XML_ParserCreate(const XML_Char * encodingName)715 XML_ParserCreate(const XML_Char *encodingName) {
716 return XML_ParserCreate_MM(encodingName, NULL, NULL);
717 }
718
719 XML_Parser XMLCALL
XML_ParserCreateNS(const XML_Char * encodingName,XML_Char nsSep)720 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
721 XML_Char tmp[2] = {nsSep, 0};
722 return XML_ParserCreate_MM(encodingName, NULL, tmp);
723 }
724
725 static const XML_Char implicitContext[]
726 = {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h,
727 ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
728 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD,
729 ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r,
730 ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
731 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8,
732 ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
733 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e,
734 '\0'};
735
736 /* To avoid warnings about unused functions: */
737 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
738
739 # if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
740
741 /* Obtain entropy on Linux 3.17+ */
742 static int
writeRandomBytes_getrandom_nonblock(void * target,size_t count)743 writeRandomBytes_getrandom_nonblock(void *target, size_t count) {
744 int success = 0; /* full count bytes written? */
745 size_t bytesWrittenTotal = 0;
746 const unsigned int getrandomFlags = GRND_NONBLOCK;
747
748 do {
749 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
750 const size_t bytesToWrite = count - bytesWrittenTotal;
751
752 const int bytesWrittenMore =
753 # if defined(HAVE_GETRANDOM)
754 getrandom(currentTarget, bytesToWrite, getrandomFlags);
755 # else
756 syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags);
757 # endif
758
759 if (bytesWrittenMore > 0) {
760 bytesWrittenTotal += bytesWrittenMore;
761 if (bytesWrittenTotal >= count)
762 success = 1;
763 }
764 } while (! success && (errno == EINTR));
765
766 return success;
767 }
768
769 # endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
770
771 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
772
773 /* Extract entropy from /dev/urandom */
774 static int
writeRandomBytes_dev_urandom(void * target,size_t count)775 writeRandomBytes_dev_urandom(void *target, size_t count) {
776 int success = 0; /* full count bytes written? */
777 size_t bytesWrittenTotal = 0;
778
779 const int fd = open("/dev/urandom", O_RDONLY);
780 if (fd < 0) {
781 return 0;
782 }
783
784 do {
785 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
786 const size_t bytesToWrite = count - bytesWrittenTotal;
787
788 const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
789
790 if (bytesWrittenMore > 0) {
791 bytesWrittenTotal += bytesWrittenMore;
792 if (bytesWrittenTotal >= count)
793 success = 1;
794 }
795 } while (! success && (errno == EINTR));
796
797 close(fd);
798 return success;
799 }
800
801 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
802
803 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
804
805 #if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF)
806
807 static void
writeRandomBytes_arc4random(void * target,size_t count)808 writeRandomBytes_arc4random(void *target, size_t count) {
809 size_t bytesWrittenTotal = 0;
810
811 while (bytesWrittenTotal < count) {
812 const uint32_t random32 = arc4random();
813 size_t i = 0;
814
815 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
816 i++, bytesWrittenTotal++) {
817 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
818 ((uint8_t *)target)[bytesWrittenTotal] = random8;
819 }
820 }
821 }
822
823 #endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */
824
825 #ifdef _WIN32
826
827 /* Provide declaration of rand_s() for MinGW-32 (not 64, which has it),
828 as it didn't declare it in its header prior to version 5.3.0 of its
829 runtime package (mingwrt, containing stdlib.h). The upstream fix
830 was introduced at https://osdn.net/projects/mingw/ticket/39658 . */
831 # if defined(__MINGW32__) && defined(__MINGW32_VERSION) \
832 && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR)
833 __declspec(dllimport) int rand_s(unsigned int *);
834 # endif
835
836 /* Obtain entropy on Windows using the rand_s() function which
837 * generates cryptographically secure random numbers. Internally it
838 * uses RtlGenRandom API which is present in Windows XP and later.
839 */
840 static int
writeRandomBytes_rand_s(void * target,size_t count)841 writeRandomBytes_rand_s(void *target, size_t count) {
842 size_t bytesWrittenTotal = 0;
843
844 while (bytesWrittenTotal < count) {
845 unsigned int random32 = 0;
846 size_t i = 0;
847
848 if (rand_s(&random32))
849 return 0; /* failure */
850
851 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
852 i++, bytesWrittenTotal++) {
853 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
854 ((uint8_t *)target)[bytesWrittenTotal] = random8;
855 }
856 }
857 return 1; /* success */
858 }
859
860 #endif /* _WIN32 */
861
862 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
863
864 static unsigned long
gather_time_entropy(void)865 gather_time_entropy(void) {
866 # ifdef _WIN32
867 FILETIME ft;
868 GetSystemTimeAsFileTime(&ft); /* never fails */
869 return ft.dwHighDateTime ^ ft.dwLowDateTime;
870 # else
871 struct timeval tv;
872 int gettimeofday_res;
873
874 gettimeofday_res = gettimeofday(&tv, NULL);
875
876 # if defined(NDEBUG)
877 (void)gettimeofday_res;
878 # else
879 assert(gettimeofday_res == 0);
880 # endif /* defined(NDEBUG) */
881
882 /* Microseconds time is <20 bits entropy */
883 return tv.tv_usec;
884 # endif
885 }
886
887 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
888
889 static unsigned long
ENTROPY_DEBUG(const char * label,unsigned long entropy)890 ENTROPY_DEBUG(const char *label, unsigned long entropy) {
891 if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) {
892 fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label,
893 (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy));
894 }
895 return entropy;
896 }
897
898 static unsigned long
generate_hash_secret_salt(XML_Parser parser)899 generate_hash_secret_salt(XML_Parser parser) {
900 unsigned long entropy;
901 (void)parser;
902
903 /* "Failproof" high quality providers: */
904 #if defined(HAVE_ARC4RANDOM_BUF)
905 arc4random_buf(&entropy, sizeof(entropy));
906 return ENTROPY_DEBUG("arc4random_buf", entropy);
907 #elif defined(HAVE_ARC4RANDOM)
908 writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
909 return ENTROPY_DEBUG("arc4random", entropy);
910 #else
911 /* Try high quality providers first .. */
912 # ifdef _WIN32
913 if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) {
914 return ENTROPY_DEBUG("rand_s", entropy);
915 }
916 # elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
917 if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
918 return ENTROPY_DEBUG("getrandom", entropy);
919 }
920 # endif
921 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
922 if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
923 return ENTROPY_DEBUG("/dev/urandom", entropy);
924 }
925 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
926 /* .. and self-made low quality for backup: */
927
928 /* Process ID is 0 bits entropy if attacker has local access */
929 entropy = gather_time_entropy() ^ getpid();
930
931 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
932 if (sizeof(unsigned long) == 4) {
933 return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
934 } else {
935 return ENTROPY_DEBUG("fallback(8)",
936 entropy * (unsigned long)2305843009213693951ULL);
937 }
938 #endif
939 }
940
941 static unsigned long
get_hash_secret_salt(XML_Parser parser)942 get_hash_secret_salt(XML_Parser parser) {
943 if (parser->m_parentParser != NULL)
944 return get_hash_secret_salt(parser->m_parentParser);
945 return parser->m_hash_secret_salt;
946 }
947
948 static XML_Bool /* only valid for root parser */
startParsing(XML_Parser parser)949 startParsing(XML_Parser parser) {
950 /* hash functions must be initialized before setContext() is called */
951 if (parser->m_hash_secret_salt == 0)
952 parser->m_hash_secret_salt = generate_hash_secret_salt(parser);
953 if (parser->m_ns) {
954 /* implicit context only set for root parser, since child
955 parsers (i.e. external entity parsers) will inherit it
956 */
957 return setContext(parser, implicitContext);
958 }
959 return XML_TRUE;
960 }
961
962 XML_Parser XMLCALL
XML_ParserCreate_MM(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep)963 XML_ParserCreate_MM(const XML_Char *encodingName,
964 const XML_Memory_Handling_Suite *memsuite,
965 const XML_Char *nameSep) {
966 return parserCreate(encodingName, memsuite, nameSep, NULL);
967 }
968
969 static XML_Parser
parserCreate(const XML_Char * encodingName,const XML_Memory_Handling_Suite * memsuite,const XML_Char * nameSep,DTD * dtd)970 parserCreate(const XML_Char *encodingName,
971 const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep,
972 DTD *dtd) {
973 XML_Parser parser;
974
975 if (memsuite) {
976 XML_Memory_Handling_Suite *mtemp;
977 parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
978 if (parser != NULL) {
979 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
980 mtemp->malloc_fcn = memsuite->malloc_fcn;
981 mtemp->realloc_fcn = memsuite->realloc_fcn;
982 mtemp->free_fcn = memsuite->free_fcn;
983 }
984 } else {
985 XML_Memory_Handling_Suite *mtemp;
986 parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
987 if (parser != NULL) {
988 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
989 mtemp->malloc_fcn = malloc;
990 mtemp->realloc_fcn = realloc;
991 mtemp->free_fcn = free;
992 }
993 }
994
995 if (! parser)
996 return parser;
997
998 parser->m_buffer = NULL;
999 parser->m_bufferLim = NULL;
1000
1001 parser->m_attsSize = INIT_ATTS_SIZE;
1002 parser->m_atts
1003 = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
1004 if (parser->m_atts == NULL) {
1005 FREE(parser, parser);
1006 return NULL;
1007 }
1008 #ifdef XML_ATTR_INFO
1009 parser->m_attInfo = (XML_AttrInfo *)MALLOC(
1010 parser, parser->m_attsSize * sizeof(XML_AttrInfo));
1011 if (parser->m_attInfo == NULL) {
1012 FREE(parser, parser->m_atts);
1013 FREE(parser, parser);
1014 return NULL;
1015 }
1016 #endif
1017 parser->m_dataBuf
1018 = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
1019 if (parser->m_dataBuf == NULL) {
1020 FREE(parser, parser->m_atts);
1021 #ifdef XML_ATTR_INFO
1022 FREE(parser, parser->m_attInfo);
1023 #endif
1024 FREE(parser, parser);
1025 return NULL;
1026 }
1027 parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
1028
1029 if (dtd)
1030 parser->m_dtd = dtd;
1031 else {
1032 parser->m_dtd = dtdCreate(&parser->m_mem);
1033 if (parser->m_dtd == NULL) {
1034 FREE(parser, parser->m_dataBuf);
1035 FREE(parser, parser->m_atts);
1036 #ifdef XML_ATTR_INFO
1037 FREE(parser, parser->m_attInfo);
1038 #endif
1039 FREE(parser, parser);
1040 return NULL;
1041 }
1042 }
1043
1044 parser->m_freeBindingList = NULL;
1045 parser->m_freeTagList = NULL;
1046 parser->m_freeInternalEntities = NULL;
1047
1048 parser->m_groupSize = 0;
1049 parser->m_groupConnector = NULL;
1050
1051 parser->m_unknownEncodingHandler = NULL;
1052 parser->m_unknownEncodingHandlerData = NULL;
1053
1054 parser->m_namespaceSeparator = ASCII_EXCL;
1055 parser->m_ns = XML_FALSE;
1056 parser->m_ns_triplets = XML_FALSE;
1057
1058 parser->m_nsAtts = NULL;
1059 parser->m_nsAttsVersion = 0;
1060 parser->m_nsAttsPower = 0;
1061
1062 parser->m_protocolEncodingName = NULL;
1063
1064 poolInit(&parser->m_tempPool, &(parser->m_mem));
1065 poolInit(&parser->m_temp2Pool, &(parser->m_mem));
1066 parserInit(parser, encodingName);
1067
1068 if (encodingName && ! parser->m_protocolEncodingName) {
1069 XML_ParserFree(parser);
1070 return NULL;
1071 }
1072
1073 if (nameSep) {
1074 parser->m_ns = XML_TRUE;
1075 parser->m_internalEncoding = XmlGetInternalEncodingNS();
1076 parser->m_namespaceSeparator = *nameSep;
1077 } else {
1078 parser->m_internalEncoding = XmlGetInternalEncoding();
1079 }
1080
1081 return parser;
1082 }
1083
1084 static void
parserInit(XML_Parser parser,const XML_Char * encodingName)1085 parserInit(XML_Parser parser, const XML_Char *encodingName) {
1086 parser->m_processor = prologInitProcessor;
1087 XmlPrologStateInit(&parser->m_prologState);
1088 if (encodingName != NULL) {
1089 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1090 }
1091 parser->m_curBase = NULL;
1092 XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
1093 parser->m_userData = NULL;
1094 parser->m_handlerArg = NULL;
1095 parser->m_startElementHandler = NULL;
1096 parser->m_endElementHandler = NULL;
1097 parser->m_characterDataHandler = NULL;
1098 parser->m_processingInstructionHandler = NULL;
1099 parser->m_commentHandler = NULL;
1100 parser->m_startCdataSectionHandler = NULL;
1101 parser->m_endCdataSectionHandler = NULL;
1102 parser->m_defaultHandler = NULL;
1103 parser->m_startDoctypeDeclHandler = NULL;
1104 parser->m_endDoctypeDeclHandler = NULL;
1105 parser->m_unparsedEntityDeclHandler = NULL;
1106 parser->m_notationDeclHandler = NULL;
1107 parser->m_startNamespaceDeclHandler = NULL;
1108 parser->m_endNamespaceDeclHandler = NULL;
1109 parser->m_notStandaloneHandler = NULL;
1110 parser->m_externalEntityRefHandler = NULL;
1111 parser->m_externalEntityRefHandlerArg = parser;
1112 parser->m_skippedEntityHandler = NULL;
1113 parser->m_elementDeclHandler = NULL;
1114 parser->m_attlistDeclHandler = NULL;
1115 parser->m_entityDeclHandler = NULL;
1116 parser->m_xmlDeclHandler = NULL;
1117 parser->m_bufferPtr = parser->m_buffer;
1118 parser->m_bufferEnd = parser->m_buffer;
1119 parser->m_parseEndByteIndex = 0;
1120 parser->m_parseEndPtr = NULL;
1121 parser->m_declElementType = NULL;
1122 parser->m_declAttributeId = NULL;
1123 parser->m_declEntity = NULL;
1124 parser->m_doctypeName = NULL;
1125 parser->m_doctypeSysid = NULL;
1126 parser->m_doctypePubid = NULL;
1127 parser->m_declAttributeType = NULL;
1128 parser->m_declNotationName = NULL;
1129 parser->m_declNotationPublicId = NULL;
1130 parser->m_declAttributeIsCdata = XML_FALSE;
1131 parser->m_declAttributeIsId = XML_FALSE;
1132 memset(&parser->m_position, 0, sizeof(POSITION));
1133 parser->m_errorCode = XML_ERROR_NONE;
1134 parser->m_eventPtr = NULL;
1135 parser->m_eventEndPtr = NULL;
1136 parser->m_positionPtr = NULL;
1137 parser->m_openInternalEntities = NULL;
1138 parser->m_defaultExpandInternalEntities = XML_TRUE;
1139 parser->m_tagLevel = 0;
1140 parser->m_tagStack = NULL;
1141 parser->m_inheritedBindings = NULL;
1142 parser->m_nSpecifiedAtts = 0;
1143 parser->m_unknownEncodingMem = NULL;
1144 parser->m_unknownEncodingRelease = NULL;
1145 parser->m_unknownEncodingData = NULL;
1146 parser->m_parentParser = NULL;
1147 parser->m_parsingStatus.parsing = XML_INITIALIZED;
1148 #ifdef XML_DTD
1149 parser->m_isParamEntity = XML_FALSE;
1150 parser->m_useForeignDTD = XML_FALSE;
1151 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
1152 #endif
1153 parser->m_hash_secret_salt = 0;
1154
1155 #ifdef XML_DTD
1156 memset(&parser->m_accounting, 0, sizeof(ACCOUNTING));
1157 parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u);
1158 parser->m_accounting.maximumAmplificationFactor
1159 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT;
1160 parser->m_accounting.activationThresholdBytes
1161 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT;
1162
1163 memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS));
1164 parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u);
1165 #endif
1166 }
1167
1168 /* moves list of bindings to m_freeBindingList */
1169 static void FASTCALL
moveToFreeBindingList(XML_Parser parser,BINDING * bindings)1170 moveToFreeBindingList(XML_Parser parser, BINDING *bindings) {
1171 while (bindings) {
1172 BINDING *b = bindings;
1173 bindings = bindings->nextTagBinding;
1174 b->nextTagBinding = parser->m_freeBindingList;
1175 parser->m_freeBindingList = b;
1176 }
1177 }
1178
1179 XML_Bool XMLCALL
XML_ParserReset(XML_Parser parser,const XML_Char * encodingName)1180 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) {
1181 TAG *tStk;
1182 OPEN_INTERNAL_ENTITY *openEntityList;
1183
1184 if (parser == NULL)
1185 return XML_FALSE;
1186
1187 if (parser->m_parentParser)
1188 return XML_FALSE;
1189 /* move m_tagStack to m_freeTagList */
1190 tStk = parser->m_tagStack;
1191 while (tStk) {
1192 TAG *tag = tStk;
1193 tStk = tStk->parent;
1194 tag->parent = parser->m_freeTagList;
1195 moveToFreeBindingList(parser, tag->bindings);
1196 tag->bindings = NULL;
1197 parser->m_freeTagList = tag;
1198 }
1199 /* move m_openInternalEntities to m_freeInternalEntities */
1200 openEntityList = parser->m_openInternalEntities;
1201 while (openEntityList) {
1202 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1203 openEntityList = openEntity->next;
1204 openEntity->next = parser->m_freeInternalEntities;
1205 parser->m_freeInternalEntities = openEntity;
1206 }
1207 moveToFreeBindingList(parser, parser->m_inheritedBindings);
1208 FREE(parser, parser->m_unknownEncodingMem);
1209 if (parser->m_unknownEncodingRelease)
1210 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1211 poolClear(&parser->m_tempPool);
1212 poolClear(&parser->m_temp2Pool);
1213 FREE(parser, (void *)parser->m_protocolEncodingName);
1214 parser->m_protocolEncodingName = NULL;
1215 parserInit(parser, encodingName);
1216 dtdReset(parser->m_dtd, &parser->m_mem);
1217 return XML_TRUE;
1218 }
1219
1220 enum XML_Status XMLCALL
XML_SetEncoding(XML_Parser parser,const XML_Char * encodingName)1221 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
1222 if (parser == NULL)
1223 return XML_STATUS_ERROR;
1224 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1225 XXX There's no way for the caller to determine which of the
1226 XXX possible error cases caused the XML_STATUS_ERROR return.
1227 */
1228 if (parser->m_parsingStatus.parsing == XML_PARSING
1229 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1230 return XML_STATUS_ERROR;
1231
1232 /* Get rid of any previous encoding name */
1233 FREE(parser, (void *)parser->m_protocolEncodingName);
1234
1235 if (encodingName == NULL)
1236 /* No new encoding name */
1237 parser->m_protocolEncodingName = NULL;
1238 else {
1239 /* Copy the new encoding name into allocated memory */
1240 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1241 if (! parser->m_protocolEncodingName)
1242 return XML_STATUS_ERROR;
1243 }
1244 return XML_STATUS_OK;
1245 }
1246
1247 XML_Parser XMLCALL
XML_ExternalEntityParserCreate(XML_Parser oldParser,const XML_Char * context,const XML_Char * encodingName)1248 XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
1249 const XML_Char *encodingName) {
1250 XML_Parser parser = oldParser;
1251 DTD *newDtd = NULL;
1252 DTD *oldDtd;
1253 XML_StartElementHandler oldStartElementHandler;
1254 XML_EndElementHandler oldEndElementHandler;
1255 XML_CharacterDataHandler oldCharacterDataHandler;
1256 XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1257 XML_CommentHandler oldCommentHandler;
1258 XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1259 XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1260 XML_DefaultHandler oldDefaultHandler;
1261 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1262 XML_NotationDeclHandler oldNotationDeclHandler;
1263 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1264 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1265 XML_NotStandaloneHandler oldNotStandaloneHandler;
1266 XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1267 XML_SkippedEntityHandler oldSkippedEntityHandler;
1268 XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1269 XML_ElementDeclHandler oldElementDeclHandler;
1270 XML_AttlistDeclHandler oldAttlistDeclHandler;
1271 XML_EntityDeclHandler oldEntityDeclHandler;
1272 XML_XmlDeclHandler oldXmlDeclHandler;
1273 ELEMENT_TYPE *oldDeclElementType;
1274
1275 void *oldUserData;
1276 void *oldHandlerArg;
1277 XML_Bool oldDefaultExpandInternalEntities;
1278 XML_Parser oldExternalEntityRefHandlerArg;
1279 #ifdef XML_DTD
1280 enum XML_ParamEntityParsing oldParamEntityParsing;
1281 int oldInEntityValue;
1282 #endif
1283 XML_Bool oldns_triplets;
1284 /* Note that the new parser shares the same hash secret as the old
1285 parser, so that dtdCopy and copyEntityTable can lookup values
1286 from hash tables associated with either parser without us having
1287 to worry which hash secrets each table has.
1288 */
1289 unsigned long oldhash_secret_salt;
1290
1291 /* Validate the oldParser parameter before we pull everything out of it */
1292 if (oldParser == NULL)
1293 return NULL;
1294
1295 /* Stash the original parser contents on the stack */
1296 oldDtd = parser->m_dtd;
1297 oldStartElementHandler = parser->m_startElementHandler;
1298 oldEndElementHandler = parser->m_endElementHandler;
1299 oldCharacterDataHandler = parser->m_characterDataHandler;
1300 oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1301 oldCommentHandler = parser->m_commentHandler;
1302 oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1303 oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1304 oldDefaultHandler = parser->m_defaultHandler;
1305 oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1306 oldNotationDeclHandler = parser->m_notationDeclHandler;
1307 oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1308 oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1309 oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1310 oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1311 oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1312 oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1313 oldElementDeclHandler = parser->m_elementDeclHandler;
1314 oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1315 oldEntityDeclHandler = parser->m_entityDeclHandler;
1316 oldXmlDeclHandler = parser->m_xmlDeclHandler;
1317 oldDeclElementType = parser->m_declElementType;
1318
1319 oldUserData = parser->m_userData;
1320 oldHandlerArg = parser->m_handlerArg;
1321 oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1322 oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
1323 #ifdef XML_DTD
1324 oldParamEntityParsing = parser->m_paramEntityParsing;
1325 oldInEntityValue = parser->m_prologState.inEntityValue;
1326 #endif
1327 oldns_triplets = parser->m_ns_triplets;
1328 /* Note that the new parser shares the same hash secret as the old
1329 parser, so that dtdCopy and copyEntityTable can lookup values
1330 from hash tables associated with either parser without us having
1331 to worry which hash secrets each table has.
1332 */
1333 oldhash_secret_salt = parser->m_hash_secret_salt;
1334
1335 #ifdef XML_DTD
1336 if (! context)
1337 newDtd = oldDtd;
1338 #endif /* XML_DTD */
1339
1340 /* Note that the magical uses of the pre-processor to make field
1341 access look more like C++ require that `parser' be overwritten
1342 here. This makes this function more painful to follow than it
1343 would be otherwise.
1344 */
1345 if (parser->m_ns) {
1346 XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
1347 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
1348 } else {
1349 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
1350 }
1351
1352 if (! parser)
1353 return NULL;
1354
1355 parser->m_startElementHandler = oldStartElementHandler;
1356 parser->m_endElementHandler = oldEndElementHandler;
1357 parser->m_characterDataHandler = oldCharacterDataHandler;
1358 parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1359 parser->m_commentHandler = oldCommentHandler;
1360 parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1361 parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1362 parser->m_defaultHandler = oldDefaultHandler;
1363 parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1364 parser->m_notationDeclHandler = oldNotationDeclHandler;
1365 parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1366 parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1367 parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1368 parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1369 parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1370 parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1371 parser->m_elementDeclHandler = oldElementDeclHandler;
1372 parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1373 parser->m_entityDeclHandler = oldEntityDeclHandler;
1374 parser->m_xmlDeclHandler = oldXmlDeclHandler;
1375 parser->m_declElementType = oldDeclElementType;
1376 parser->m_userData = oldUserData;
1377 if (oldUserData == oldHandlerArg)
1378 parser->m_handlerArg = parser->m_userData;
1379 else
1380 parser->m_handlerArg = parser;
1381 if (oldExternalEntityRefHandlerArg != oldParser)
1382 parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1383 parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1384 parser->m_ns_triplets = oldns_triplets;
1385 parser->m_hash_secret_salt = oldhash_secret_salt;
1386 parser->m_parentParser = oldParser;
1387 #ifdef XML_DTD
1388 parser->m_paramEntityParsing = oldParamEntityParsing;
1389 parser->m_prologState.inEntityValue = oldInEntityValue;
1390 if (context) {
1391 #endif /* XML_DTD */
1392 if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem)
1393 || ! setContext(parser, context)) {
1394 XML_ParserFree(parser);
1395 return NULL;
1396 }
1397 parser->m_processor = externalEntityInitProcessor;
1398 #ifdef XML_DTD
1399 } else {
1400 /* The DTD instance referenced by parser->m_dtd is shared between the
1401 document's root parser and external PE parsers, therefore one does not
1402 need to call setContext. In addition, one also *must* not call
1403 setContext, because this would overwrite existing prefix->binding
1404 pointers in parser->m_dtd with ones that get destroyed with the external
1405 PE parser. This would leave those prefixes with dangling pointers.
1406 */
1407 parser->m_isParamEntity = XML_TRUE;
1408 XmlPrologStateInitExternalEntity(&parser->m_prologState);
1409 parser->m_processor = externalParEntInitProcessor;
1410 }
1411 #endif /* XML_DTD */
1412 return parser;
1413 }
1414
1415 static void FASTCALL
destroyBindings(BINDING * bindings,XML_Parser parser)1416 destroyBindings(BINDING *bindings, XML_Parser parser) {
1417 for (;;) {
1418 BINDING *b = bindings;
1419 if (! b)
1420 break;
1421 bindings = b->nextTagBinding;
1422 FREE(parser, b->uri);
1423 FREE(parser, b);
1424 }
1425 }
1426
1427 void XMLCALL
XML_ParserFree(XML_Parser parser)1428 XML_ParserFree(XML_Parser parser) {
1429 TAG *tagList;
1430 OPEN_INTERNAL_ENTITY *entityList;
1431 if (parser == NULL)
1432 return;
1433 /* free m_tagStack and m_freeTagList */
1434 tagList = parser->m_tagStack;
1435 for (;;) {
1436 TAG *p;
1437 if (tagList == NULL) {
1438 if (parser->m_freeTagList == NULL)
1439 break;
1440 tagList = parser->m_freeTagList;
1441 parser->m_freeTagList = NULL;
1442 }
1443 p = tagList;
1444 tagList = tagList->parent;
1445 FREE(parser, p->buf);
1446 destroyBindings(p->bindings, parser);
1447 FREE(parser, p);
1448 }
1449 /* free m_openInternalEntities and m_freeInternalEntities */
1450 entityList = parser->m_openInternalEntities;
1451 for (;;) {
1452 OPEN_INTERNAL_ENTITY *openEntity;
1453 if (entityList == NULL) {
1454 if (parser->m_freeInternalEntities == NULL)
1455 break;
1456 entityList = parser->m_freeInternalEntities;
1457 parser->m_freeInternalEntities = NULL;
1458 }
1459 openEntity = entityList;
1460 entityList = entityList->next;
1461 FREE(parser, openEntity);
1462 }
1463
1464 destroyBindings(parser->m_freeBindingList, parser);
1465 destroyBindings(parser->m_inheritedBindings, parser);
1466 poolDestroy(&parser->m_tempPool);
1467 poolDestroy(&parser->m_temp2Pool);
1468 FREE(parser, (void *)parser->m_protocolEncodingName);
1469 #ifdef XML_DTD
1470 /* external parameter entity parsers share the DTD structure
1471 parser->m_dtd with the root parser, so we must not destroy it
1472 */
1473 if (! parser->m_isParamEntity && parser->m_dtd)
1474 #else
1475 if (parser->m_dtd)
1476 #endif /* XML_DTD */
1477 dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser,
1478 &parser->m_mem);
1479 FREE(parser, (void *)parser->m_atts);
1480 #ifdef XML_ATTR_INFO
1481 FREE(parser, (void *)parser->m_attInfo);
1482 #endif
1483 FREE(parser, parser->m_groupConnector);
1484 FREE(parser, parser->m_buffer);
1485 FREE(parser, parser->m_dataBuf);
1486 FREE(parser, parser->m_nsAtts);
1487 FREE(parser, parser->m_unknownEncodingMem);
1488 if (parser->m_unknownEncodingRelease)
1489 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1490 FREE(parser, parser);
1491 }
1492
1493 void XMLCALL
XML_UseParserAsHandlerArg(XML_Parser parser)1494 XML_UseParserAsHandlerArg(XML_Parser parser) {
1495 if (parser != NULL)
1496 parser->m_handlerArg = parser;
1497 }
1498
1499 enum XML_Error XMLCALL
XML_UseForeignDTD(XML_Parser parser,XML_Bool useDTD)1500 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) {
1501 if (parser == NULL)
1502 return XML_ERROR_INVALID_ARGUMENT;
1503 #ifdef XML_DTD
1504 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1505 if (parser->m_parsingStatus.parsing == XML_PARSING
1506 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1507 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1508 parser->m_useForeignDTD = useDTD;
1509 return XML_ERROR_NONE;
1510 #else
1511 UNUSED_P(useDTD);
1512 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1513 #endif
1514 }
1515
1516 void XMLCALL
XML_SetReturnNSTriplet(XML_Parser parser,int do_nst)1517 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) {
1518 if (parser == NULL)
1519 return;
1520 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1521 if (parser->m_parsingStatus.parsing == XML_PARSING
1522 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1523 return;
1524 parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1525 }
1526
1527 void XMLCALL
XML_SetUserData(XML_Parser parser,void * p)1528 XML_SetUserData(XML_Parser parser, void *p) {
1529 if (parser == NULL)
1530 return;
1531 if (parser->m_handlerArg == parser->m_userData)
1532 parser->m_handlerArg = parser->m_userData = p;
1533 else
1534 parser->m_userData = p;
1535 }
1536
1537 enum XML_Status XMLCALL
XML_SetBase(XML_Parser parser,const XML_Char * p)1538 XML_SetBase(XML_Parser parser, const XML_Char *p) {
1539 if (parser == NULL)
1540 return XML_STATUS_ERROR;
1541 if (p) {
1542 p = poolCopyString(&parser->m_dtd->pool, p);
1543 if (! p)
1544 return XML_STATUS_ERROR;
1545 parser->m_curBase = p;
1546 } else
1547 parser->m_curBase = NULL;
1548 return XML_STATUS_OK;
1549 }
1550
1551 const XML_Char *XMLCALL
XML_GetBase(XML_Parser parser)1552 XML_GetBase(XML_Parser parser) {
1553 if (parser == NULL)
1554 return NULL;
1555 return parser->m_curBase;
1556 }
1557
1558 int XMLCALL
XML_GetSpecifiedAttributeCount(XML_Parser parser)1559 XML_GetSpecifiedAttributeCount(XML_Parser parser) {
1560 if (parser == NULL)
1561 return -1;
1562 return parser->m_nSpecifiedAtts;
1563 }
1564
1565 int XMLCALL
XML_GetIdAttributeIndex(XML_Parser parser)1566 XML_GetIdAttributeIndex(XML_Parser parser) {
1567 if (parser == NULL)
1568 return -1;
1569 return parser->m_idAttIndex;
1570 }
1571
1572 #ifdef XML_ATTR_INFO
1573 const XML_AttrInfo *XMLCALL
XML_GetAttributeInfo(XML_Parser parser)1574 XML_GetAttributeInfo(XML_Parser parser) {
1575 if (parser == NULL)
1576 return NULL;
1577 return parser->m_attInfo;
1578 }
1579 #endif
1580
1581 void XMLCALL
XML_SetElementHandler(XML_Parser parser,XML_StartElementHandler start,XML_EndElementHandler end)1582 XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start,
1583 XML_EndElementHandler end) {
1584 if (parser == NULL)
1585 return;
1586 parser->m_startElementHandler = start;
1587 parser->m_endElementHandler = end;
1588 }
1589
1590 void XMLCALL
XML_SetStartElementHandler(XML_Parser parser,XML_StartElementHandler start)1591 XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) {
1592 if (parser != NULL)
1593 parser->m_startElementHandler = start;
1594 }
1595
1596 void XMLCALL
XML_SetEndElementHandler(XML_Parser parser,XML_EndElementHandler end)1597 XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) {
1598 if (parser != NULL)
1599 parser->m_endElementHandler = end;
1600 }
1601
1602 void XMLCALL
XML_SetCharacterDataHandler(XML_Parser parser,XML_CharacterDataHandler handler)1603 XML_SetCharacterDataHandler(XML_Parser parser,
1604 XML_CharacterDataHandler handler) {
1605 if (parser != NULL)
1606 parser->m_characterDataHandler = handler;
1607 }
1608
1609 void XMLCALL
XML_SetProcessingInstructionHandler(XML_Parser parser,XML_ProcessingInstructionHandler handler)1610 XML_SetProcessingInstructionHandler(XML_Parser parser,
1611 XML_ProcessingInstructionHandler handler) {
1612 if (parser != NULL)
1613 parser->m_processingInstructionHandler = handler;
1614 }
1615
1616 void XMLCALL
XML_SetCommentHandler(XML_Parser parser,XML_CommentHandler handler)1617 XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) {
1618 if (parser != NULL)
1619 parser->m_commentHandler = handler;
1620 }
1621
1622 void XMLCALL
XML_SetCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start,XML_EndCdataSectionHandler end)1623 XML_SetCdataSectionHandler(XML_Parser parser,
1624 XML_StartCdataSectionHandler start,
1625 XML_EndCdataSectionHandler end) {
1626 if (parser == NULL)
1627 return;
1628 parser->m_startCdataSectionHandler = start;
1629 parser->m_endCdataSectionHandler = end;
1630 }
1631
1632 void XMLCALL
XML_SetStartCdataSectionHandler(XML_Parser parser,XML_StartCdataSectionHandler start)1633 XML_SetStartCdataSectionHandler(XML_Parser parser,
1634 XML_StartCdataSectionHandler start) {
1635 if (parser != NULL)
1636 parser->m_startCdataSectionHandler = start;
1637 }
1638
1639 void XMLCALL
XML_SetEndCdataSectionHandler(XML_Parser parser,XML_EndCdataSectionHandler end)1640 XML_SetEndCdataSectionHandler(XML_Parser parser,
1641 XML_EndCdataSectionHandler end) {
1642 if (parser != NULL)
1643 parser->m_endCdataSectionHandler = end;
1644 }
1645
1646 void XMLCALL
XML_SetDefaultHandler(XML_Parser parser,XML_DefaultHandler handler)1647 XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) {
1648 if (parser == NULL)
1649 return;
1650 parser->m_defaultHandler = handler;
1651 parser->m_defaultExpandInternalEntities = XML_FALSE;
1652 }
1653
1654 void XMLCALL
XML_SetDefaultHandlerExpand(XML_Parser parser,XML_DefaultHandler handler)1655 XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) {
1656 if (parser == NULL)
1657 return;
1658 parser->m_defaultHandler = handler;
1659 parser->m_defaultExpandInternalEntities = XML_TRUE;
1660 }
1661
1662 void XMLCALL
XML_SetDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start,XML_EndDoctypeDeclHandler end)1663 XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start,
1664 XML_EndDoctypeDeclHandler end) {
1665 if (parser == NULL)
1666 return;
1667 parser->m_startDoctypeDeclHandler = start;
1668 parser->m_endDoctypeDeclHandler = end;
1669 }
1670
1671 void XMLCALL
XML_SetStartDoctypeDeclHandler(XML_Parser parser,XML_StartDoctypeDeclHandler start)1672 XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1673 XML_StartDoctypeDeclHandler start) {
1674 if (parser != NULL)
1675 parser->m_startDoctypeDeclHandler = start;
1676 }
1677
1678 void XMLCALL
XML_SetEndDoctypeDeclHandler(XML_Parser parser,XML_EndDoctypeDeclHandler end)1679 XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) {
1680 if (parser != NULL)
1681 parser->m_endDoctypeDeclHandler = end;
1682 }
1683
1684 void XMLCALL
XML_SetUnparsedEntityDeclHandler(XML_Parser parser,XML_UnparsedEntityDeclHandler handler)1685 XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1686 XML_UnparsedEntityDeclHandler handler) {
1687 if (parser != NULL)
1688 parser->m_unparsedEntityDeclHandler = handler;
1689 }
1690
1691 void XMLCALL
XML_SetNotationDeclHandler(XML_Parser parser,XML_NotationDeclHandler handler)1692 XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) {
1693 if (parser != NULL)
1694 parser->m_notationDeclHandler = handler;
1695 }
1696
1697 void XMLCALL
XML_SetNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start,XML_EndNamespaceDeclHandler end)1698 XML_SetNamespaceDeclHandler(XML_Parser parser,
1699 XML_StartNamespaceDeclHandler start,
1700 XML_EndNamespaceDeclHandler end) {
1701 if (parser == NULL)
1702 return;
1703 parser->m_startNamespaceDeclHandler = start;
1704 parser->m_endNamespaceDeclHandler = end;
1705 }
1706
1707 void XMLCALL
XML_SetStartNamespaceDeclHandler(XML_Parser parser,XML_StartNamespaceDeclHandler start)1708 XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1709 XML_StartNamespaceDeclHandler start) {
1710 if (parser != NULL)
1711 parser->m_startNamespaceDeclHandler = start;
1712 }
1713
1714 void XMLCALL
XML_SetEndNamespaceDeclHandler(XML_Parser parser,XML_EndNamespaceDeclHandler end)1715 XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1716 XML_EndNamespaceDeclHandler end) {
1717 if (parser != NULL)
1718 parser->m_endNamespaceDeclHandler = end;
1719 }
1720
1721 void XMLCALL
XML_SetNotStandaloneHandler(XML_Parser parser,XML_NotStandaloneHandler handler)1722 XML_SetNotStandaloneHandler(XML_Parser parser,
1723 XML_NotStandaloneHandler handler) {
1724 if (parser != NULL)
1725 parser->m_notStandaloneHandler = handler;
1726 }
1727
1728 void XMLCALL
XML_SetExternalEntityRefHandler(XML_Parser parser,XML_ExternalEntityRefHandler handler)1729 XML_SetExternalEntityRefHandler(XML_Parser parser,
1730 XML_ExternalEntityRefHandler handler) {
1731 if (parser != NULL)
1732 parser->m_externalEntityRefHandler = handler;
1733 }
1734
1735 void XMLCALL
XML_SetExternalEntityRefHandlerArg(XML_Parser parser,void * arg)1736 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) {
1737 if (parser == NULL)
1738 return;
1739 if (arg)
1740 parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
1741 else
1742 parser->m_externalEntityRefHandlerArg = parser;
1743 }
1744
1745 void XMLCALL
XML_SetSkippedEntityHandler(XML_Parser parser,XML_SkippedEntityHandler handler)1746 XML_SetSkippedEntityHandler(XML_Parser parser,
1747 XML_SkippedEntityHandler handler) {
1748 if (parser != NULL)
1749 parser->m_skippedEntityHandler = handler;
1750 }
1751
1752 void XMLCALL
XML_SetUnknownEncodingHandler(XML_Parser parser,XML_UnknownEncodingHandler handler,void * data)1753 XML_SetUnknownEncodingHandler(XML_Parser parser,
1754 XML_UnknownEncodingHandler handler, void *data) {
1755 if (parser == NULL)
1756 return;
1757 parser->m_unknownEncodingHandler = handler;
1758 parser->m_unknownEncodingHandlerData = data;
1759 }
1760
1761 void XMLCALL
XML_SetElementDeclHandler(XML_Parser parser,XML_ElementDeclHandler eldecl)1762 XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) {
1763 if (parser != NULL)
1764 parser->m_elementDeclHandler = eldecl;
1765 }
1766
1767 void XMLCALL
XML_SetAttlistDeclHandler(XML_Parser parser,XML_AttlistDeclHandler attdecl)1768 XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) {
1769 if (parser != NULL)
1770 parser->m_attlistDeclHandler = attdecl;
1771 }
1772
1773 void XMLCALL
XML_SetEntityDeclHandler(XML_Parser parser,XML_EntityDeclHandler handler)1774 XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) {
1775 if (parser != NULL)
1776 parser->m_entityDeclHandler = handler;
1777 }
1778
1779 void XMLCALL
XML_SetXmlDeclHandler(XML_Parser parser,XML_XmlDeclHandler handler)1780 XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) {
1781 if (parser != NULL)
1782 parser->m_xmlDeclHandler = handler;
1783 }
1784
1785 int XMLCALL
XML_SetParamEntityParsing(XML_Parser parser,enum XML_ParamEntityParsing peParsing)1786 XML_SetParamEntityParsing(XML_Parser parser,
1787 enum XML_ParamEntityParsing peParsing) {
1788 if (parser == NULL)
1789 return 0;
1790 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1791 if (parser->m_parsingStatus.parsing == XML_PARSING
1792 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1793 return 0;
1794 #ifdef XML_DTD
1795 parser->m_paramEntityParsing = peParsing;
1796 return 1;
1797 #else
1798 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
1799 #endif
1800 }
1801
1802 int XMLCALL
XML_SetHashSalt(XML_Parser parser,unsigned long hash_salt)1803 XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) {
1804 if (parser == NULL)
1805 return 0;
1806 if (parser->m_parentParser)
1807 return XML_SetHashSalt(parser->m_parentParser, hash_salt);
1808 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1809 if (parser->m_parsingStatus.parsing == XML_PARSING
1810 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
1811 return 0;
1812 parser->m_hash_secret_salt = hash_salt;
1813 return 1;
1814 }
1815
1816 enum XML_Status XMLCALL
XML_Parse(XML_Parser parser,const char * s,int len,int isFinal)1817 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
1818 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
1819 if (parser != NULL)
1820 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
1821 return XML_STATUS_ERROR;
1822 }
1823 switch (parser->m_parsingStatus.parsing) {
1824 case XML_SUSPENDED:
1825 parser->m_errorCode = XML_ERROR_SUSPENDED;
1826 return XML_STATUS_ERROR;
1827 case XML_FINISHED:
1828 parser->m_errorCode = XML_ERROR_FINISHED;
1829 return XML_STATUS_ERROR;
1830 case XML_INITIALIZED:
1831 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
1832 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1833 return XML_STATUS_ERROR;
1834 }
1835 /* fall through */
1836 default:
1837 parser->m_parsingStatus.parsing = XML_PARSING;
1838 }
1839
1840 if (len == 0) {
1841 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1842 if (! isFinal)
1843 return XML_STATUS_OK;
1844 parser->m_positionPtr = parser->m_bufferPtr;
1845 parser->m_parseEndPtr = parser->m_bufferEnd;
1846
1847 /* If data are left over from last buffer, and we now know that these
1848 data are the final chunk of input, then we have to check them again
1849 to detect errors based on that fact.
1850 */
1851 parser->m_errorCode
1852 = parser->m_processor(parser, parser->m_bufferPtr,
1853 parser->m_parseEndPtr, &parser->m_bufferPtr);
1854
1855 if (parser->m_errorCode == XML_ERROR_NONE) {
1856 switch (parser->m_parsingStatus.parsing) {
1857 case XML_SUSPENDED:
1858 /* It is hard to be certain, but it seems that this case
1859 * cannot occur. This code is cleaning up a previous parse
1860 * with no new data (since len == 0). Changing the parsing
1861 * state requires getting to execute a handler function, and
1862 * there doesn't seem to be an opportunity for that while in
1863 * this circumstance.
1864 *
1865 * Given the uncertainty, we retain the code but exclude it
1866 * from coverage tests.
1867 *
1868 * LCOV_EXCL_START
1869 */
1870 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
1871 parser->m_bufferPtr, &parser->m_position);
1872 parser->m_positionPtr = parser->m_bufferPtr;
1873 return XML_STATUS_SUSPENDED;
1874 /* LCOV_EXCL_STOP */
1875 case XML_INITIALIZED:
1876 case XML_PARSING:
1877 parser->m_parsingStatus.parsing = XML_FINISHED;
1878 /* fall through */
1879 default:
1880 return XML_STATUS_OK;
1881 }
1882 }
1883 parser->m_eventEndPtr = parser->m_eventPtr;
1884 parser->m_processor = errorProcessor;
1885 return XML_STATUS_ERROR;
1886 }
1887 #ifndef XML_CONTEXT_BYTES
1888 else if (parser->m_bufferPtr == parser->m_bufferEnd) {
1889 const char *end;
1890 int nLeftOver;
1891 enum XML_Status result;
1892 /* Detect overflow (a+b > MAX <==> b > MAX-a) */
1893 if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
1894 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1895 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1896 parser->m_processor = errorProcessor;
1897 return XML_STATUS_ERROR;
1898 }
1899 parser->m_parseEndByteIndex += len;
1900 parser->m_positionPtr = s;
1901 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
1902
1903 parser->m_errorCode
1904 = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end);
1905
1906 if (parser->m_errorCode != XML_ERROR_NONE) {
1907 parser->m_eventEndPtr = parser->m_eventPtr;
1908 parser->m_processor = errorProcessor;
1909 return XML_STATUS_ERROR;
1910 } else {
1911 switch (parser->m_parsingStatus.parsing) {
1912 case XML_SUSPENDED:
1913 result = XML_STATUS_SUSPENDED;
1914 break;
1915 case XML_INITIALIZED:
1916 case XML_PARSING:
1917 if (isFinal) {
1918 parser->m_parsingStatus.parsing = XML_FINISHED;
1919 return XML_STATUS_OK;
1920 }
1921 /* fall through */
1922 default:
1923 result = XML_STATUS_OK;
1924 }
1925 }
1926
1927 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end,
1928 &parser->m_position);
1929 nLeftOver = s + len - end;
1930 if (nLeftOver) {
1931 if (parser->m_buffer == NULL
1932 || nLeftOver > parser->m_bufferLim - parser->m_buffer) {
1933 /* avoid _signed_ integer overflow */
1934 char *temp = NULL;
1935 const int bytesToAllocate = (int)((unsigned)len * 2U);
1936 if (bytesToAllocate > 0) {
1937 temp = (char *)REALLOC(parser, parser->m_buffer, bytesToAllocate);
1938 }
1939 if (temp == NULL) {
1940 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1941 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1942 parser->m_processor = errorProcessor;
1943 return XML_STATUS_ERROR;
1944 }
1945 parser->m_buffer = temp;
1946 parser->m_bufferLim = parser->m_buffer + bytesToAllocate;
1947 }
1948 memcpy(parser->m_buffer, end, nLeftOver);
1949 }
1950 parser->m_bufferPtr = parser->m_buffer;
1951 parser->m_bufferEnd = parser->m_buffer + nLeftOver;
1952 parser->m_positionPtr = parser->m_bufferPtr;
1953 parser->m_parseEndPtr = parser->m_bufferEnd;
1954 parser->m_eventPtr = parser->m_bufferPtr;
1955 parser->m_eventEndPtr = parser->m_bufferPtr;
1956 return result;
1957 }
1958 #endif /* not defined XML_CONTEXT_BYTES */
1959 else {
1960 void *buff = XML_GetBuffer(parser, len);
1961 if (buff == NULL)
1962 return XML_STATUS_ERROR;
1963 else {
1964 memcpy(buff, s, len);
1965 return XML_ParseBuffer(parser, len, isFinal);
1966 }
1967 }
1968 }
1969
1970 enum XML_Status XMLCALL
XML_ParseBuffer(XML_Parser parser,int len,int isFinal)1971 XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
1972 const char *start;
1973 enum XML_Status result = XML_STATUS_OK;
1974
1975 if (parser == NULL)
1976 return XML_STATUS_ERROR;
1977 switch (parser->m_parsingStatus.parsing) {
1978 case XML_SUSPENDED:
1979 parser->m_errorCode = XML_ERROR_SUSPENDED;
1980 return XML_STATUS_ERROR;
1981 case XML_FINISHED:
1982 parser->m_errorCode = XML_ERROR_FINISHED;
1983 return XML_STATUS_ERROR;
1984 case XML_INITIALIZED:
1985 /* Has someone called XML_GetBuffer successfully before? */
1986 if (! parser->m_bufferPtr) {
1987 parser->m_errorCode = XML_ERROR_NO_BUFFER;
1988 return XML_STATUS_ERROR;
1989 }
1990
1991 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
1992 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1993 return XML_STATUS_ERROR;
1994 }
1995 /* fall through */
1996 default:
1997 parser->m_parsingStatus.parsing = XML_PARSING;
1998 }
1999
2000 start = parser->m_bufferPtr;
2001 parser->m_positionPtr = start;
2002 parser->m_bufferEnd += len;
2003 parser->m_parseEndPtr = parser->m_bufferEnd;
2004 parser->m_parseEndByteIndex += len;
2005 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2006
2007 parser->m_errorCode = parser->m_processor(
2008 parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr);
2009
2010 if (parser->m_errorCode != XML_ERROR_NONE) {
2011 parser->m_eventEndPtr = parser->m_eventPtr;
2012 parser->m_processor = errorProcessor;
2013 return XML_STATUS_ERROR;
2014 } else {
2015 switch (parser->m_parsingStatus.parsing) {
2016 case XML_SUSPENDED:
2017 result = XML_STATUS_SUSPENDED;
2018 break;
2019 case XML_INITIALIZED:
2020 case XML_PARSING:
2021 if (isFinal) {
2022 parser->m_parsingStatus.parsing = XML_FINISHED;
2023 return result;
2024 }
2025 default:; /* should not happen */
2026 }
2027 }
2028
2029 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2030 parser->m_bufferPtr, &parser->m_position);
2031 parser->m_positionPtr = parser->m_bufferPtr;
2032 return result;
2033 }
2034
2035 void *XMLCALL
XML_GetBuffer(XML_Parser parser,int len)2036 XML_GetBuffer(XML_Parser parser, int len) {
2037 if (parser == NULL)
2038 return NULL;
2039 if (len < 0) {
2040 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2041 return NULL;
2042 }
2043 switch (parser->m_parsingStatus.parsing) {
2044 case XML_SUSPENDED:
2045 parser->m_errorCode = XML_ERROR_SUSPENDED;
2046 return NULL;
2047 case XML_FINISHED:
2048 parser->m_errorCode = XML_ERROR_FINISHED;
2049 return NULL;
2050 default:;
2051 }
2052
2053 if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)) {
2054 #ifdef XML_CONTEXT_BYTES
2055 int keep;
2056 #endif /* defined XML_CONTEXT_BYTES */
2057 /* Do not invoke signed arithmetic overflow: */
2058 int neededSize = (int)((unsigned)len
2059 + (unsigned)EXPAT_SAFE_PTR_DIFF(
2060 parser->m_bufferEnd, parser->m_bufferPtr));
2061 if (neededSize < 0) {
2062 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2063 return NULL;
2064 }
2065 #ifdef XML_CONTEXT_BYTES
2066 keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
2067 if (keep > XML_CONTEXT_BYTES)
2068 keep = XML_CONTEXT_BYTES;
2069 /* Detect and prevent integer overflow */
2070 if (keep > INT_MAX - neededSize) {
2071 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2072 return NULL;
2073 }
2074 neededSize += keep;
2075 #endif /* defined XML_CONTEXT_BYTES */
2076 if (neededSize
2077 <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
2078 #ifdef XML_CONTEXT_BYTES
2079 if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
2080 int offset
2081 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)
2082 - keep;
2083 /* The buffer pointers cannot be NULL here; we have at least some bytes
2084 * in the buffer */
2085 memmove(parser->m_buffer, &parser->m_buffer[offset],
2086 parser->m_bufferEnd - parser->m_bufferPtr + keep);
2087 parser->m_bufferEnd -= offset;
2088 parser->m_bufferPtr -= offset;
2089 }
2090 #else
2091 if (parser->m_buffer && parser->m_bufferPtr) {
2092 memmove(parser->m_buffer, parser->m_bufferPtr,
2093 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2094 parser->m_bufferEnd
2095 = parser->m_buffer
2096 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2097 parser->m_bufferPtr = parser->m_buffer;
2098 }
2099 #endif /* not defined XML_CONTEXT_BYTES */
2100 } else {
2101 char *newBuf;
2102 int bufferSize
2103 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferPtr);
2104 if (bufferSize == 0)
2105 bufferSize = INIT_BUFFER_SIZE;
2106 do {
2107 /* Do not invoke signed arithmetic overflow: */
2108 bufferSize = (int)(2U * (unsigned)bufferSize);
2109 } while (bufferSize < neededSize && bufferSize > 0);
2110 if (bufferSize <= 0) {
2111 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2112 return NULL;
2113 }
2114 newBuf = (char *)MALLOC(parser, bufferSize);
2115 if (newBuf == 0) {
2116 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2117 return NULL;
2118 }
2119 parser->m_bufferLim = newBuf + bufferSize;
2120 #ifdef XML_CONTEXT_BYTES
2121 if (parser->m_bufferPtr) {
2122 memcpy(newBuf, &parser->m_bufferPtr[-keep],
2123 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2124 + keep);
2125 FREE(parser, parser->m_buffer);
2126 parser->m_buffer = newBuf;
2127 parser->m_bufferEnd
2128 = parser->m_buffer
2129 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2130 + keep;
2131 parser->m_bufferPtr = parser->m_buffer + keep;
2132 } else {
2133 /* This must be a brand new buffer with no data in it yet */
2134 parser->m_bufferEnd = newBuf;
2135 parser->m_bufferPtr = parser->m_buffer = newBuf;
2136 }
2137 #else
2138 if (parser->m_bufferPtr) {
2139 memcpy(newBuf, parser->m_bufferPtr,
2140 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2141 FREE(parser, parser->m_buffer);
2142 parser->m_bufferEnd
2143 = newBuf
2144 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2145 } else {
2146 /* This must be a brand new buffer with no data in it yet */
2147 parser->m_bufferEnd = newBuf;
2148 }
2149 parser->m_bufferPtr = parser->m_buffer = newBuf;
2150 #endif /* not defined XML_CONTEXT_BYTES */
2151 }
2152 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2153 parser->m_positionPtr = NULL;
2154 }
2155 return parser->m_bufferEnd;
2156 }
2157
2158 enum XML_Status XMLCALL
XML_StopParser(XML_Parser parser,XML_Bool resumable)2159 XML_StopParser(XML_Parser parser, XML_Bool resumable) {
2160 if (parser == NULL)
2161 return XML_STATUS_ERROR;
2162 switch (parser->m_parsingStatus.parsing) {
2163 case XML_SUSPENDED:
2164 if (resumable) {
2165 parser->m_errorCode = XML_ERROR_SUSPENDED;
2166 return XML_STATUS_ERROR;
2167 }
2168 parser->m_parsingStatus.parsing = XML_FINISHED;
2169 break;
2170 case XML_FINISHED:
2171 parser->m_errorCode = XML_ERROR_FINISHED;
2172 return XML_STATUS_ERROR;
2173 default:
2174 if (resumable) {
2175 #ifdef XML_DTD
2176 if (parser->m_isParamEntity) {
2177 parser->m_errorCode = XML_ERROR_SUSPEND_PE;
2178 return XML_STATUS_ERROR;
2179 }
2180 #endif
2181 parser->m_parsingStatus.parsing = XML_SUSPENDED;
2182 } else
2183 parser->m_parsingStatus.parsing = XML_FINISHED;
2184 }
2185 return XML_STATUS_OK;
2186 }
2187
2188 enum XML_Status XMLCALL
XML_ResumeParser(XML_Parser parser)2189 XML_ResumeParser(XML_Parser parser) {
2190 enum XML_Status result = XML_STATUS_OK;
2191
2192 if (parser == NULL)
2193 return XML_STATUS_ERROR;
2194 if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2195 parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
2196 return XML_STATUS_ERROR;
2197 }
2198 parser->m_parsingStatus.parsing = XML_PARSING;
2199
2200 parser->m_errorCode = parser->m_processor(
2201 parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
2202
2203 if (parser->m_errorCode != XML_ERROR_NONE) {
2204 parser->m_eventEndPtr = parser->m_eventPtr;
2205 parser->m_processor = errorProcessor;
2206 return XML_STATUS_ERROR;
2207 } else {
2208 switch (parser->m_parsingStatus.parsing) {
2209 case XML_SUSPENDED:
2210 result = XML_STATUS_SUSPENDED;
2211 break;
2212 case XML_INITIALIZED:
2213 case XML_PARSING:
2214 if (parser->m_parsingStatus.finalBuffer) {
2215 parser->m_parsingStatus.parsing = XML_FINISHED;
2216 return result;
2217 }
2218 default:;
2219 }
2220 }
2221
2222 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2223 parser->m_bufferPtr, &parser->m_position);
2224 parser->m_positionPtr = parser->m_bufferPtr;
2225 return result;
2226 }
2227
2228 void XMLCALL
XML_GetParsingStatus(XML_Parser parser,XML_ParsingStatus * status)2229 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) {
2230 if (parser == NULL)
2231 return;
2232 assert(status != NULL);
2233 *status = parser->m_parsingStatus;
2234 }
2235
2236 enum XML_Error XMLCALL
XML_GetErrorCode(XML_Parser parser)2237 XML_GetErrorCode(XML_Parser parser) {
2238 if (parser == NULL)
2239 return XML_ERROR_INVALID_ARGUMENT;
2240 return parser->m_errorCode;
2241 }
2242
2243 XML_Index XMLCALL
XML_GetCurrentByteIndex(XML_Parser parser)2244 XML_GetCurrentByteIndex(XML_Parser parser) {
2245 if (parser == NULL)
2246 return -1;
2247 if (parser->m_eventPtr)
2248 return (XML_Index)(parser->m_parseEndByteIndex
2249 - (parser->m_parseEndPtr - parser->m_eventPtr));
2250 return -1;
2251 }
2252
2253 int XMLCALL
XML_GetCurrentByteCount(XML_Parser parser)2254 XML_GetCurrentByteCount(XML_Parser parser) {
2255 if (parser == NULL)
2256 return 0;
2257 if (parser->m_eventEndPtr && parser->m_eventPtr)
2258 return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
2259 return 0;
2260 }
2261
2262 const char *XMLCALL
XML_GetInputContext(XML_Parser parser,int * offset,int * size)2263 XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
2264 #ifdef XML_CONTEXT_BYTES
2265 if (parser == NULL)
2266 return NULL;
2267 if (parser->m_eventPtr && parser->m_buffer) {
2268 if (offset != NULL)
2269 *offset = (int)(parser->m_eventPtr - parser->m_buffer);
2270 if (size != NULL)
2271 *size = (int)(parser->m_bufferEnd - parser->m_buffer);
2272 return parser->m_buffer;
2273 }
2274 #else
2275 (void)parser;
2276 (void)offset;
2277 (void)size;
2278 #endif /* defined XML_CONTEXT_BYTES */
2279 return (const char *)0;
2280 }
2281
2282 XML_Size XMLCALL
XML_GetCurrentLineNumber(XML_Parser parser)2283 XML_GetCurrentLineNumber(XML_Parser parser) {
2284 if (parser == NULL)
2285 return 0;
2286 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2287 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2288 parser->m_eventPtr, &parser->m_position);
2289 parser->m_positionPtr = parser->m_eventPtr;
2290 }
2291 return parser->m_position.lineNumber + 1;
2292 }
2293
2294 XML_Size XMLCALL
XML_GetCurrentColumnNumber(XML_Parser parser)2295 XML_GetCurrentColumnNumber(XML_Parser parser) {
2296 if (parser == NULL)
2297 return 0;
2298 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2299 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2300 parser->m_eventPtr, &parser->m_position);
2301 parser->m_positionPtr = parser->m_eventPtr;
2302 }
2303 return parser->m_position.columnNumber;
2304 }
2305
2306 void XMLCALL
XML_FreeContentModel(XML_Parser parser,XML_Content * model)2307 XML_FreeContentModel(XML_Parser parser, XML_Content *model) {
2308 if (parser != NULL)
2309 FREE(parser, model);
2310 }
2311
2312 void *XMLCALL
XML_MemMalloc(XML_Parser parser,size_t size)2313 XML_MemMalloc(XML_Parser parser, size_t size) {
2314 if (parser == NULL)
2315 return NULL;
2316 return MALLOC(parser, size);
2317 }
2318
2319 void *XMLCALL
XML_MemRealloc(XML_Parser parser,void * ptr,size_t size)2320 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) {
2321 if (parser == NULL)
2322 return NULL;
2323 return REALLOC(parser, ptr, size);
2324 }
2325
2326 void XMLCALL
XML_MemFree(XML_Parser parser,void * ptr)2327 XML_MemFree(XML_Parser parser, void *ptr) {
2328 if (parser != NULL)
2329 FREE(parser, ptr);
2330 }
2331
2332 void XMLCALL
XML_DefaultCurrent(XML_Parser parser)2333 XML_DefaultCurrent(XML_Parser parser) {
2334 if (parser == NULL)
2335 return;
2336 if (parser->m_defaultHandler) {
2337 if (parser->m_openInternalEntities)
2338 reportDefault(parser, parser->m_internalEncoding,
2339 parser->m_openInternalEntities->internalEventPtr,
2340 parser->m_openInternalEntities->internalEventEndPtr);
2341 else
2342 reportDefault(parser, parser->m_encoding, parser->m_eventPtr,
2343 parser->m_eventEndPtr);
2344 }
2345 }
2346
2347 const XML_LChar *XMLCALL
XML_ErrorString(enum XML_Error code)2348 XML_ErrorString(enum XML_Error code) {
2349 switch (code) {
2350 case XML_ERROR_NONE:
2351 return NULL;
2352 case XML_ERROR_NO_MEMORY:
2353 return XML_L("out of memory");
2354 case XML_ERROR_SYNTAX:
2355 return XML_L("syntax error");
2356 case XML_ERROR_NO_ELEMENTS:
2357 return XML_L("no element found");
2358 case XML_ERROR_INVALID_TOKEN:
2359 return XML_L("not well-formed (invalid token)");
2360 case XML_ERROR_UNCLOSED_TOKEN:
2361 return XML_L("unclosed token");
2362 case XML_ERROR_PARTIAL_CHAR:
2363 return XML_L("partial character");
2364 case XML_ERROR_TAG_MISMATCH:
2365 return XML_L("mismatched tag");
2366 case XML_ERROR_DUPLICATE_ATTRIBUTE:
2367 return XML_L("duplicate attribute");
2368 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2369 return XML_L("junk after document element");
2370 case XML_ERROR_PARAM_ENTITY_REF:
2371 return XML_L("illegal parameter entity reference");
2372 case XML_ERROR_UNDEFINED_ENTITY:
2373 return XML_L("undefined entity");
2374 case XML_ERROR_RECURSIVE_ENTITY_REF:
2375 return XML_L("recursive entity reference");
2376 case XML_ERROR_ASYNC_ENTITY:
2377 return XML_L("asynchronous entity");
2378 case XML_ERROR_BAD_CHAR_REF:
2379 return XML_L("reference to invalid character number");
2380 case XML_ERROR_BINARY_ENTITY_REF:
2381 return XML_L("reference to binary entity");
2382 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2383 return XML_L("reference to external entity in attribute");
2384 case XML_ERROR_MISPLACED_XML_PI:
2385 return XML_L("XML or text declaration not at start of entity");
2386 case XML_ERROR_UNKNOWN_ENCODING:
2387 return XML_L("unknown encoding");
2388 case XML_ERROR_INCORRECT_ENCODING:
2389 return XML_L("encoding specified in XML declaration is incorrect");
2390 case XML_ERROR_UNCLOSED_CDATA_SECTION:
2391 return XML_L("unclosed CDATA section");
2392 case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2393 return XML_L("error in processing external entity reference");
2394 case XML_ERROR_NOT_STANDALONE:
2395 return XML_L("document is not standalone");
2396 case XML_ERROR_UNEXPECTED_STATE:
2397 return XML_L("unexpected parser state - please send a bug report");
2398 case XML_ERROR_ENTITY_DECLARED_IN_PE:
2399 return XML_L("entity declared in parameter entity");
2400 case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2401 return XML_L("requested feature requires XML_DTD support in Expat");
2402 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2403 return XML_L("cannot change setting once parsing has begun");
2404 /* Added in 1.95.7. */
2405 case XML_ERROR_UNBOUND_PREFIX:
2406 return XML_L("unbound prefix");
2407 /* Added in 1.95.8. */
2408 case XML_ERROR_UNDECLARING_PREFIX:
2409 return XML_L("must not undeclare prefix");
2410 case XML_ERROR_INCOMPLETE_PE:
2411 return XML_L("incomplete markup in parameter entity");
2412 case XML_ERROR_XML_DECL:
2413 return XML_L("XML declaration not well-formed");
2414 case XML_ERROR_TEXT_DECL:
2415 return XML_L("text declaration not well-formed");
2416 case XML_ERROR_PUBLICID:
2417 return XML_L("illegal character(s) in public id");
2418 case XML_ERROR_SUSPENDED:
2419 return XML_L("parser suspended");
2420 case XML_ERROR_NOT_SUSPENDED:
2421 return XML_L("parser not suspended");
2422 case XML_ERROR_ABORTED:
2423 return XML_L("parsing aborted");
2424 case XML_ERROR_FINISHED:
2425 return XML_L("parsing finished");
2426 case XML_ERROR_SUSPEND_PE:
2427 return XML_L("cannot suspend in external parameter entity");
2428 /* Added in 2.0.0. */
2429 case XML_ERROR_RESERVED_PREFIX_XML:
2430 return XML_L(
2431 "reserved prefix (xml) must not be undeclared or bound to another namespace name");
2432 case XML_ERROR_RESERVED_PREFIX_XMLNS:
2433 return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2434 case XML_ERROR_RESERVED_NAMESPACE_URI:
2435 return XML_L(
2436 "prefix must not be bound to one of the reserved namespace names");
2437 /* Added in 2.2.5. */
2438 case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
2439 return XML_L("invalid argument");
2440 /* Added in 2.3.0. */
2441 case XML_ERROR_NO_BUFFER:
2442 return XML_L(
2443 "a successful prior call to function XML_GetBuffer is required");
2444 /* Added in 2.4.0. */
2445 case XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
2446 return XML_L(
2447 "limit on input amplification factor (from DTD and entities) breached");
2448 }
2449 return NULL;
2450 }
2451
2452 const XML_LChar *XMLCALL
XML_ExpatVersion(void)2453 XML_ExpatVersion(void) {
2454 /* V1 is used to string-ize the version number. However, it would
2455 string-ize the actual version macro *names* unless we get them
2456 substituted before being passed to V1. CPP is defined to expand
2457 a macro, then rescan for more expansions. Thus, we use V2 to expand
2458 the version macros, then CPP will expand the resulting V1() macro
2459 with the correct numerals. */
2460 /* ### I'm assuming cpp is portable in this respect... */
2461
2462 #define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c)
2463 #define V2(a, b, c) XML_L("expat_") V1(a, b, c)
2464
2465 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2466
2467 #undef V1
2468 #undef V2
2469 }
2470
2471 XML_Expat_Version XMLCALL
XML_ExpatVersionInfo(void)2472 XML_ExpatVersionInfo(void) {
2473 XML_Expat_Version version;
2474
2475 version.major = XML_MAJOR_VERSION;
2476 version.minor = XML_MINOR_VERSION;
2477 version.micro = XML_MICRO_VERSION;
2478
2479 return version;
2480 }
2481
2482 const XML_Feature *XMLCALL
XML_GetFeatureList(void)2483 XML_GetFeatureList(void) {
2484 static const XML_Feature features[] = {
2485 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2486 sizeof(XML_Char)},
2487 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2488 sizeof(XML_LChar)},
2489 #ifdef XML_UNICODE
2490 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
2491 #endif
2492 #ifdef XML_UNICODE_WCHAR_T
2493 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
2494 #endif
2495 #ifdef XML_DTD
2496 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
2497 #endif
2498 #ifdef XML_CONTEXT_BYTES
2499 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2500 XML_CONTEXT_BYTES},
2501 #endif
2502 #ifdef XML_MIN_SIZE
2503 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
2504 #endif
2505 #ifdef XML_NS
2506 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2507 #endif
2508 #ifdef XML_LARGE_SIZE
2509 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2510 #endif
2511 #ifdef XML_ATTR_INFO
2512 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2513 #endif
2514 #ifdef XML_DTD
2515 /* Added in Expat 2.4.0. */
2516 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
2517 XML_L("XML_BLAP_MAX_AMP"),
2518 (long int)
2519 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT},
2520 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT,
2521 XML_L("XML_BLAP_ACT_THRES"),
2522 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT},
2523 #endif
2524 {XML_FEATURE_END, NULL, 0}};
2525
2526 return features;
2527 }
2528
2529 #ifdef XML_DTD
2530 XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionMaximumAmplification(XML_Parser parser,float maximumAmplificationFactor)2531 XML_SetBillionLaughsAttackProtectionMaximumAmplification(
2532 XML_Parser parser, float maximumAmplificationFactor) {
2533 if ((parser == NULL) || (parser->m_parentParser != NULL)
2534 || isnan(maximumAmplificationFactor)
2535 || (maximumAmplificationFactor < 1.0f)) {
2536 return XML_FALSE;
2537 }
2538 parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor;
2539 return XML_TRUE;
2540 }
2541
2542 XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser parser,unsigned long long activationThresholdBytes)2543 XML_SetBillionLaughsAttackProtectionActivationThreshold(
2544 XML_Parser parser, unsigned long long activationThresholdBytes) {
2545 if ((parser == NULL) || (parser->m_parentParser != NULL)) {
2546 return XML_FALSE;
2547 }
2548 parser->m_accounting.activationThresholdBytes = activationThresholdBytes;
2549 return XML_TRUE;
2550 }
2551 #endif /* XML_DTD */
2552
2553 /* Initially tag->rawName always points into the parse buffer;
2554 for those TAG instances opened while the current parse buffer was
2555 processed, and not yet closed, we need to store tag->rawName in a more
2556 permanent location, since the parse buffer is about to be discarded.
2557 */
2558 static XML_Bool
storeRawNames(XML_Parser parser)2559 storeRawNames(XML_Parser parser) {
2560 TAG *tag = parser->m_tagStack;
2561 while (tag) {
2562 int bufSize;
2563 int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2564 size_t rawNameLen;
2565 char *rawNameBuf = tag->buf + nameLen;
2566 /* Stop if already stored. Since m_tagStack is a stack, we can stop
2567 at the first entry that has already been copied; everything
2568 below it in the stack is already been accounted for in a
2569 previous call to this function.
2570 */
2571 if (tag->rawName == rawNameBuf)
2572 break;
2573 /* For re-use purposes we need to ensure that the
2574 size of tag->buf is a multiple of sizeof(XML_Char).
2575 */
2576 rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2577 /* Detect and prevent integer overflow. */
2578 if (rawNameLen > (size_t)INT_MAX - nameLen)
2579 return XML_FALSE;
2580 bufSize = nameLen + (int)rawNameLen;
2581 if (bufSize > tag->bufEnd - tag->buf) {
2582 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2583 if (temp == NULL)
2584 return XML_FALSE;
2585 /* if tag->name.str points to tag->buf (only when namespace
2586 processing is off) then we have to update it
2587 */
2588 if (tag->name.str == (XML_Char *)tag->buf)
2589 tag->name.str = (XML_Char *)temp;
2590 /* if tag->name.localPart is set (when namespace processing is on)
2591 then update it as well, since it will always point into tag->buf
2592 */
2593 if (tag->name.localPart)
2594 tag->name.localPart
2595 = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf);
2596 tag->buf = temp;
2597 tag->bufEnd = temp + bufSize;
2598 rawNameBuf = temp + nameLen;
2599 }
2600 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2601 tag->rawName = rawNameBuf;
2602 tag = tag->parent;
2603 }
2604 return XML_TRUE;
2605 }
2606
2607 static enum XML_Error PTRCALL
contentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2608 contentProcessor(XML_Parser parser, const char *start, const char *end,
2609 const char **endPtr) {
2610 enum XML_Error result = doContent(
2611 parser, 0, parser->m_encoding, start, end, endPtr,
2612 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
2613 if (result == XML_ERROR_NONE) {
2614 if (! storeRawNames(parser))
2615 return XML_ERROR_NO_MEMORY;
2616 }
2617 return result;
2618 }
2619
2620 static enum XML_Error PTRCALL
externalEntityInitProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2621 externalEntityInitProcessor(XML_Parser parser, const char *start,
2622 const char *end, const char **endPtr) {
2623 enum XML_Error result = initializeEncoding(parser);
2624 if (result != XML_ERROR_NONE)
2625 return result;
2626 parser->m_processor = externalEntityInitProcessor2;
2627 return externalEntityInitProcessor2(parser, start, end, endPtr);
2628 }
2629
2630 static enum XML_Error PTRCALL
externalEntityInitProcessor2(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2631 externalEntityInitProcessor2(XML_Parser parser, const char *start,
2632 const char *end, const char **endPtr) {
2633 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2634 int tok = XmlContentTok(parser->m_encoding, start, end, &next);
2635 switch (tok) {
2636 case XML_TOK_BOM:
2637 #ifdef XML_DTD
2638 if (! accountingDiffTolerated(parser, tok, start, next, __LINE__,
2639 XML_ACCOUNT_DIRECT)) {
2640 accountingOnAbort(parser);
2641 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2642 }
2643 #endif /* XML_DTD */
2644
2645 /* If we are at the end of the buffer, this would cause the next stage,
2646 i.e. externalEntityInitProcessor3, to pass control directly to
2647 doContent (by detecting XML_TOK_NONE) without processing any xml text
2648 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2649 */
2650 if (next == end && ! parser->m_parsingStatus.finalBuffer) {
2651 *endPtr = next;
2652 return XML_ERROR_NONE;
2653 }
2654 start = next;
2655 break;
2656 case XML_TOK_PARTIAL:
2657 if (! parser->m_parsingStatus.finalBuffer) {
2658 *endPtr = start;
2659 return XML_ERROR_NONE;
2660 }
2661 parser->m_eventPtr = start;
2662 return XML_ERROR_UNCLOSED_TOKEN;
2663 case XML_TOK_PARTIAL_CHAR:
2664 if (! parser->m_parsingStatus.finalBuffer) {
2665 *endPtr = start;
2666 return XML_ERROR_NONE;
2667 }
2668 parser->m_eventPtr = start;
2669 return XML_ERROR_PARTIAL_CHAR;
2670 }
2671 parser->m_processor = externalEntityInitProcessor3;
2672 return externalEntityInitProcessor3(parser, start, end, endPtr);
2673 }
2674
2675 static enum XML_Error PTRCALL
externalEntityInitProcessor3(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2676 externalEntityInitProcessor3(XML_Parser parser, const char *start,
2677 const char *end, const char **endPtr) {
2678 int tok;
2679 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2680 parser->m_eventPtr = start;
2681 tok = XmlContentTok(parser->m_encoding, start, end, &next);
2682 /* Note: These bytes are accounted later in:
2683 - processXmlDecl
2684 - externalEntityContentProcessor
2685 */
2686 parser->m_eventEndPtr = next;
2687
2688 switch (tok) {
2689 case XML_TOK_XML_DECL: {
2690 enum XML_Error result;
2691 result = processXmlDecl(parser, 1, start, next);
2692 if (result != XML_ERROR_NONE)
2693 return result;
2694 switch (parser->m_parsingStatus.parsing) {
2695 case XML_SUSPENDED:
2696 *endPtr = next;
2697 return XML_ERROR_NONE;
2698 case XML_FINISHED:
2699 return XML_ERROR_ABORTED;
2700 default:
2701 start = next;
2702 }
2703 } break;
2704 case XML_TOK_PARTIAL:
2705 if (! parser->m_parsingStatus.finalBuffer) {
2706 *endPtr = start;
2707 return XML_ERROR_NONE;
2708 }
2709 return XML_ERROR_UNCLOSED_TOKEN;
2710 case XML_TOK_PARTIAL_CHAR:
2711 if (! parser->m_parsingStatus.finalBuffer) {
2712 *endPtr = start;
2713 return XML_ERROR_NONE;
2714 }
2715 return XML_ERROR_PARTIAL_CHAR;
2716 }
2717 parser->m_processor = externalEntityContentProcessor;
2718 parser->m_tagLevel = 1;
2719 return externalEntityContentProcessor(parser, start, end, endPtr);
2720 }
2721
2722 static enum XML_Error PTRCALL
externalEntityContentProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)2723 externalEntityContentProcessor(XML_Parser parser, const char *start,
2724 const char *end, const char **endPtr) {
2725 enum XML_Error result
2726 = doContent(parser, 1, parser->m_encoding, start, end, endPtr,
2727 (XML_Bool)! parser->m_parsingStatus.finalBuffer,
2728 XML_ACCOUNT_ENTITY_EXPANSION);
2729 if (result == XML_ERROR_NONE) {
2730 if (! storeRawNames(parser))
2731 return XML_ERROR_NO_MEMORY;
2732 }
2733 return result;
2734 }
2735
2736 static enum XML_Error
doContent(XML_Parser parser,int startTagLevel,const ENCODING * enc,const char * s,const char * end,const char ** nextPtr,XML_Bool haveMore,enum XML_Account account)2737 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
2738 const char *s, const char *end, const char **nextPtr,
2739 XML_Bool haveMore, enum XML_Account account) {
2740 /* save one level of indirection */
2741 DTD *const dtd = parser->m_dtd;
2742
2743 const char **eventPP;
2744 const char **eventEndPP;
2745 if (enc == parser->m_encoding) {
2746 eventPP = &parser->m_eventPtr;
2747 eventEndPP = &parser->m_eventEndPtr;
2748 } else {
2749 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
2750 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
2751 }
2752 *eventPP = s;
2753
2754 for (;;) {
2755 const char *next = s; /* XmlContentTok doesn't always set the last arg */
2756 int tok = XmlContentTok(enc, s, end, &next);
2757 #ifdef XML_DTD
2758 const char *accountAfter
2759 = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR))
2760 ? (haveMore ? s /* i.e. 0 bytes */ : end)
2761 : next;
2762 if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__,
2763 account)) {
2764 accountingOnAbort(parser);
2765 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2766 }
2767 #endif
2768 *eventEndPP = next;
2769 switch (tok) {
2770 case XML_TOK_TRAILING_CR:
2771 if (haveMore) {
2772 *nextPtr = s;
2773 return XML_ERROR_NONE;
2774 }
2775 *eventEndPP = end;
2776 if (parser->m_characterDataHandler) {
2777 XML_Char c = 0xA;
2778 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
2779 } else if (parser->m_defaultHandler)
2780 reportDefault(parser, enc, s, end);
2781 /* We are at the end of the final buffer, should we check for
2782 XML_SUSPENDED, XML_FINISHED?
2783 */
2784 if (startTagLevel == 0)
2785 return XML_ERROR_NO_ELEMENTS;
2786 if (parser->m_tagLevel != startTagLevel)
2787 return XML_ERROR_ASYNC_ENTITY;
2788 *nextPtr = end;
2789 return XML_ERROR_NONE;
2790 case XML_TOK_NONE:
2791 if (haveMore) {
2792 *nextPtr = s;
2793 return XML_ERROR_NONE;
2794 }
2795 if (startTagLevel > 0) {
2796 if (parser->m_tagLevel != startTagLevel)
2797 return XML_ERROR_ASYNC_ENTITY;
2798 *nextPtr = s;
2799 return XML_ERROR_NONE;
2800 }
2801 return XML_ERROR_NO_ELEMENTS;
2802 case XML_TOK_INVALID:
2803 *eventPP = next;
2804 return XML_ERROR_INVALID_TOKEN;
2805 case XML_TOK_PARTIAL:
2806 if (haveMore) {
2807 *nextPtr = s;
2808 return XML_ERROR_NONE;
2809 }
2810 return XML_ERROR_UNCLOSED_TOKEN;
2811 case XML_TOK_PARTIAL_CHAR:
2812 if (haveMore) {
2813 *nextPtr = s;
2814 return XML_ERROR_NONE;
2815 }
2816 return XML_ERROR_PARTIAL_CHAR;
2817 case XML_TOK_ENTITY_REF: {
2818 const XML_Char *name;
2819 ENTITY *entity;
2820 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
2821 enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
2822 if (ch) {
2823 #ifdef XML_DTD
2824 /* NOTE: We are replacing 4-6 characters original input for 1 character
2825 * so there is no amplification and hence recording without
2826 * protection. */
2827 accountingDiffTolerated(parser, tok, (char *)&ch,
2828 ((char *)&ch) + sizeof(XML_Char), __LINE__,
2829 XML_ACCOUNT_ENTITY_EXPANSION);
2830 #endif /* XML_DTD */
2831 if (parser->m_characterDataHandler)
2832 parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
2833 else if (parser->m_defaultHandler)
2834 reportDefault(parser, enc, s, next);
2835 break;
2836 }
2837 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
2838 next - enc->minBytesPerChar);
2839 if (! name)
2840 return XML_ERROR_NO_MEMORY;
2841 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
2842 poolDiscard(&dtd->pool);
2843 /* First, determine if a check for an existing declaration is needed;
2844 if yes, check that the entity exists, and that it is internal,
2845 otherwise call the skipped entity or default handler.
2846 */
2847 if (! dtd->hasParamEntityRefs || dtd->standalone) {
2848 if (! entity)
2849 return XML_ERROR_UNDEFINED_ENTITY;
2850 else if (! entity->is_internal)
2851 return XML_ERROR_ENTITY_DECLARED_IN_PE;
2852 } else if (! entity) {
2853 if (parser->m_skippedEntityHandler)
2854 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
2855 else if (parser->m_defaultHandler)
2856 reportDefault(parser, enc, s, next);
2857 break;
2858 }
2859 if (entity->open)
2860 return XML_ERROR_RECURSIVE_ENTITY_REF;
2861 if (entity->notation)
2862 return XML_ERROR_BINARY_ENTITY_REF;
2863 if (entity->textPtr) {
2864 enum XML_Error result;
2865 if (! parser->m_defaultExpandInternalEntities) {
2866 if (parser->m_skippedEntityHandler)
2867 parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name,
2868 0);
2869 else if (parser->m_defaultHandler)
2870 reportDefault(parser, enc, s, next);
2871 break;
2872 }
2873 result = processInternalEntity(parser, entity, XML_FALSE);
2874 if (result != XML_ERROR_NONE)
2875 return result;
2876 } else if (parser->m_externalEntityRefHandler) {
2877 const XML_Char *context;
2878 entity->open = XML_TRUE;
2879 context = getContext(parser);
2880 entity->open = XML_FALSE;
2881 if (! context)
2882 return XML_ERROR_NO_MEMORY;
2883 if (! parser->m_externalEntityRefHandler(
2884 parser->m_externalEntityRefHandlerArg, context, entity->base,
2885 entity->systemId, entity->publicId))
2886 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2887 poolDiscard(&parser->m_tempPool);
2888 } else if (parser->m_defaultHandler)
2889 reportDefault(parser, enc, s, next);
2890 break;
2891 }
2892 case XML_TOK_START_TAG_NO_ATTS:
2893 /* fall through */
2894 case XML_TOK_START_TAG_WITH_ATTS: {
2895 TAG *tag;
2896 enum XML_Error result;
2897 XML_Char *toPtr;
2898 if (parser->m_freeTagList) {
2899 tag = parser->m_freeTagList;
2900 parser->m_freeTagList = parser->m_freeTagList->parent;
2901 } else {
2902 tag = (TAG *)MALLOC(parser, sizeof(TAG));
2903 if (! tag)
2904 return XML_ERROR_NO_MEMORY;
2905 tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE);
2906 if (! tag->buf) {
2907 FREE(parser, tag);
2908 return XML_ERROR_NO_MEMORY;
2909 }
2910 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
2911 }
2912 tag->bindings = NULL;
2913 tag->parent = parser->m_tagStack;
2914 parser->m_tagStack = tag;
2915 tag->name.localPart = NULL;
2916 tag->name.prefix = NULL;
2917 tag->rawName = s + enc->minBytesPerChar;
2918 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
2919 ++parser->m_tagLevel;
2920 {
2921 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
2922 const char *fromPtr = tag->rawName;
2923 toPtr = (XML_Char *)tag->buf;
2924 for (;;) {
2925 int bufSize;
2926 int convLen;
2927 const enum XML_Convert_Result convert_res
2928 = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr,
2929 (ICHAR *)tag->bufEnd - 1);
2930 convLen = (int)(toPtr - (XML_Char *)tag->buf);
2931 if ((fromPtr >= rawNameEnd)
2932 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
2933 tag->name.strLen = convLen;
2934 break;
2935 }
2936 bufSize = (int)(tag->bufEnd - tag->buf) << 1;
2937 {
2938 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2939 if (temp == NULL)
2940 return XML_ERROR_NO_MEMORY;
2941 tag->buf = temp;
2942 tag->bufEnd = temp + bufSize;
2943 toPtr = (XML_Char *)temp + convLen;
2944 }
2945 }
2946 }
2947 tag->name.str = (XML_Char *)tag->buf;
2948 *toPtr = XML_T('\0');
2949 result
2950 = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account);
2951 if (result)
2952 return result;
2953 if (parser->m_startElementHandler)
2954 parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
2955 (const XML_Char **)parser->m_atts);
2956 else if (parser->m_defaultHandler)
2957 reportDefault(parser, enc, s, next);
2958 poolClear(&parser->m_tempPool);
2959 break;
2960 }
2961 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
2962 /* fall through */
2963 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: {
2964 const char *rawName = s + enc->minBytesPerChar;
2965 enum XML_Error result;
2966 BINDING *bindings = NULL;
2967 XML_Bool noElmHandlers = XML_TRUE;
2968 TAG_NAME name;
2969 name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
2970 rawName + XmlNameLength(enc, rawName));
2971 if (! name.str)
2972 return XML_ERROR_NO_MEMORY;
2973 poolFinish(&parser->m_tempPool);
2974 result = storeAtts(parser, enc, s, &name, &bindings,
2975 XML_ACCOUNT_NONE /* token spans whole start tag */);
2976 if (result != XML_ERROR_NONE) {
2977 freeBindings(parser, bindings);
2978 return result;
2979 }
2980 poolFinish(&parser->m_tempPool);
2981 if (parser->m_startElementHandler) {
2982 parser->m_startElementHandler(parser->m_handlerArg, name.str,
2983 (const XML_Char **)parser->m_atts);
2984 noElmHandlers = XML_FALSE;
2985 }
2986 if (parser->m_endElementHandler) {
2987 if (parser->m_startElementHandler)
2988 *eventPP = *eventEndPP;
2989 parser->m_endElementHandler(parser->m_handlerArg, name.str);
2990 noElmHandlers = XML_FALSE;
2991 }
2992 if (noElmHandlers && parser->m_defaultHandler)
2993 reportDefault(parser, enc, s, next);
2994 poolClear(&parser->m_tempPool);
2995 freeBindings(parser, bindings);
2996 }
2997 if ((parser->m_tagLevel == 0)
2998 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
2999 if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
3000 parser->m_processor = epilogProcessor;
3001 else
3002 return epilogProcessor(parser, next, end, nextPtr);
3003 }
3004 break;
3005 case XML_TOK_END_TAG:
3006 if (parser->m_tagLevel == startTagLevel)
3007 return XML_ERROR_ASYNC_ENTITY;
3008 else {
3009 int len;
3010 const char *rawName;
3011 TAG *tag = parser->m_tagStack;
3012 parser->m_tagStack = tag->parent;
3013 tag->parent = parser->m_freeTagList;
3014 parser->m_freeTagList = tag;
3015 rawName = s + enc->minBytesPerChar * 2;
3016 len = XmlNameLength(enc, rawName);
3017 if (len != tag->rawNameLength
3018 || memcmp(tag->rawName, rawName, len) != 0) {
3019 *eventPP = rawName;
3020 return XML_ERROR_TAG_MISMATCH;
3021 }
3022 --parser->m_tagLevel;
3023 if (parser->m_endElementHandler) {
3024 const XML_Char *localPart;
3025 const XML_Char *prefix;
3026 XML_Char *uri;
3027 localPart = tag->name.localPart;
3028 if (parser->m_ns && localPart) {
3029 /* localPart and prefix may have been overwritten in
3030 tag->name.str, since this points to the binding->uri
3031 buffer which gets re-used; so we have to add them again
3032 */
3033 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
3034 /* don't need to check for space - already done in storeAtts() */
3035 while (*localPart)
3036 *uri++ = *localPart++;
3037 prefix = (XML_Char *)tag->name.prefix;
3038 if (parser->m_ns_triplets && prefix) {
3039 *uri++ = parser->m_namespaceSeparator;
3040 while (*prefix)
3041 *uri++ = *prefix++;
3042 }
3043 *uri = XML_T('\0');
3044 }
3045 parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
3046 } else if (parser->m_defaultHandler)
3047 reportDefault(parser, enc, s, next);
3048 while (tag->bindings) {
3049 BINDING *b = tag->bindings;
3050 if (parser->m_endNamespaceDeclHandler)
3051 parser->m_endNamespaceDeclHandler(parser->m_handlerArg,
3052 b->prefix->name);
3053 tag->bindings = tag->bindings->nextTagBinding;
3054 b->nextTagBinding = parser->m_freeBindingList;
3055 parser->m_freeBindingList = b;
3056 b->prefix->binding = b->prevPrefixBinding;
3057 }
3058 if ((parser->m_tagLevel == 0)
3059 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3060 if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
3061 parser->m_processor = epilogProcessor;
3062 else
3063 return epilogProcessor(parser, next, end, nextPtr);
3064 }
3065 }
3066 break;
3067 case XML_TOK_CHAR_REF: {
3068 int n = XmlCharRefNumber(enc, s);
3069 if (n < 0)
3070 return XML_ERROR_BAD_CHAR_REF;
3071 if (parser->m_characterDataHandler) {
3072 XML_Char buf[XML_ENCODE_MAX];
3073 parser->m_characterDataHandler(parser->m_handlerArg, buf,
3074 XmlEncode(n, (ICHAR *)buf));
3075 } else if (parser->m_defaultHandler)
3076 reportDefault(parser, enc, s, next);
3077 } break;
3078 case XML_TOK_XML_DECL:
3079 return XML_ERROR_MISPLACED_XML_PI;
3080 case XML_TOK_DATA_NEWLINE:
3081 if (parser->m_characterDataHandler) {
3082 XML_Char c = 0xA;
3083 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3084 } else if (parser->m_defaultHandler)
3085 reportDefault(parser, enc, s, next);
3086 break;
3087 case XML_TOK_CDATA_SECT_OPEN: {
3088 enum XML_Error result;
3089 if (parser->m_startCdataSectionHandler)
3090 parser->m_startCdataSectionHandler(parser->m_handlerArg);
3091 /* BEGIN disabled code */
3092 /* Suppose you doing a transformation on a document that involves
3093 changing only the character data. You set up a defaultHandler
3094 and a characterDataHandler. The defaultHandler simply copies
3095 characters through. The characterDataHandler does the
3096 transformation and writes the characters out escaping them as
3097 necessary. This case will fail to work if we leave out the
3098 following two lines (because & and < inside CDATA sections will
3099 be incorrectly escaped).
3100
3101 However, now we have a start/endCdataSectionHandler, so it seems
3102 easier to let the user deal with this.
3103 */
3104 else if (0 && parser->m_characterDataHandler)
3105 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3106 0);
3107 /* END disabled code */
3108 else if (parser->m_defaultHandler)
3109 reportDefault(parser, enc, s, next);
3110 result
3111 = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account);
3112 if (result != XML_ERROR_NONE)
3113 return result;
3114 else if (! next) {
3115 parser->m_processor = cdataSectionProcessor;
3116 return result;
3117 }
3118 } break;
3119 case XML_TOK_TRAILING_RSQB:
3120 if (haveMore) {
3121 *nextPtr = s;
3122 return XML_ERROR_NONE;
3123 }
3124 if (parser->m_characterDataHandler) {
3125 if (MUST_CONVERT(enc, s)) {
3126 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3127 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3128 parser->m_characterDataHandler(
3129 parser->m_handlerArg, parser->m_dataBuf,
3130 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3131 } else
3132 parser->m_characterDataHandler(
3133 parser->m_handlerArg, (XML_Char *)s,
3134 (int)((XML_Char *)end - (XML_Char *)s));
3135 } else if (parser->m_defaultHandler)
3136 reportDefault(parser, enc, s, end);
3137 /* We are at the end of the final buffer, should we check for
3138 XML_SUSPENDED, XML_FINISHED?
3139 */
3140 if (startTagLevel == 0) {
3141 *eventPP = end;
3142 return XML_ERROR_NO_ELEMENTS;
3143 }
3144 if (parser->m_tagLevel != startTagLevel) {
3145 *eventPP = end;
3146 return XML_ERROR_ASYNC_ENTITY;
3147 }
3148 *nextPtr = end;
3149 return XML_ERROR_NONE;
3150 case XML_TOK_DATA_CHARS: {
3151 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3152 if (charDataHandler) {
3153 if (MUST_CONVERT(enc, s)) {
3154 for (;;) {
3155 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3156 const enum XML_Convert_Result convert_res = XmlConvert(
3157 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3158 *eventEndPP = s;
3159 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3160 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3161 if ((convert_res == XML_CONVERT_COMPLETED)
3162 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3163 break;
3164 *eventPP = s;
3165 }
3166 } else
3167 charDataHandler(parser->m_handlerArg, (XML_Char *)s,
3168 (int)((XML_Char *)next - (XML_Char *)s));
3169 } else if (parser->m_defaultHandler)
3170 reportDefault(parser, enc, s, next);
3171 } break;
3172 case XML_TOK_PI:
3173 if (! reportProcessingInstruction(parser, enc, s, next))
3174 return XML_ERROR_NO_MEMORY;
3175 break;
3176 case XML_TOK_COMMENT:
3177 if (! reportComment(parser, enc, s, next))
3178 return XML_ERROR_NO_MEMORY;
3179 break;
3180 default:
3181 /* All of the tokens produced by XmlContentTok() have their own
3182 * explicit cases, so this default is not strictly necessary.
3183 * However it is a useful safety net, so we retain the code and
3184 * simply exclude it from the coverage tests.
3185 *
3186 * LCOV_EXCL_START
3187 */
3188 if (parser->m_defaultHandler)
3189 reportDefault(parser, enc, s, next);
3190 break;
3191 /* LCOV_EXCL_STOP */
3192 }
3193 *eventPP = s = next;
3194 switch (parser->m_parsingStatus.parsing) {
3195 case XML_SUSPENDED:
3196 *nextPtr = next;
3197 return XML_ERROR_NONE;
3198 case XML_FINISHED:
3199 return XML_ERROR_ABORTED;
3200 default:;
3201 }
3202 }
3203 /* not reached */
3204 }
3205
3206 /* This function does not call free() on the allocated memory, merely
3207 * moving it to the parser's m_freeBindingList where it can be freed or
3208 * reused as appropriate.
3209 */
3210 static void
freeBindings(XML_Parser parser,BINDING * bindings)3211 freeBindings(XML_Parser parser, BINDING *bindings) {
3212 while (bindings) {
3213 BINDING *b = bindings;
3214
3215 /* m_startNamespaceDeclHandler will have been called for this
3216 * binding in addBindings(), so call the end handler now.
3217 */
3218 if (parser->m_endNamespaceDeclHandler)
3219 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
3220
3221 bindings = bindings->nextTagBinding;
3222 b->nextTagBinding = parser->m_freeBindingList;
3223 parser->m_freeBindingList = b;
3224 b->prefix->binding = b->prevPrefixBinding;
3225 }
3226 }
3227
3228 /* Precondition: all arguments must be non-NULL;
3229 Purpose:
3230 - normalize attributes
3231 - check attributes for well-formedness
3232 - generate namespace aware attribute names (URI, prefix)
3233 - build list of attributes for startElementHandler
3234 - default attributes
3235 - process namespace declarations (check and report them)
3236 - generate namespace aware element name (URI, prefix)
3237 */
3238 static enum XML_Error
storeAtts(XML_Parser parser,const ENCODING * enc,const char * attStr,TAG_NAME * tagNamePtr,BINDING ** bindingsPtr,enum XML_Account account)3239 storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
3240 TAG_NAME *tagNamePtr, BINDING **bindingsPtr,
3241 enum XML_Account account) {
3242 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
3243 ELEMENT_TYPE *elementType;
3244 int nDefaultAtts;
3245 const XML_Char **appAtts; /* the attribute list for the application */
3246 int attIndex = 0;
3247 int prefixLen;
3248 int i;
3249 int n;
3250 XML_Char *uri;
3251 int nPrefixes = 0;
3252 BINDING *binding;
3253 const XML_Char *localPart;
3254
3255 /* lookup the element type name */
3256 elementType
3257 = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0);
3258 if (! elementType) {
3259 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3260 if (! name)
3261 return XML_ERROR_NO_MEMORY;
3262 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
3263 sizeof(ELEMENT_TYPE));
3264 if (! elementType)
3265 return XML_ERROR_NO_MEMORY;
3266 if (parser->m_ns && ! setElementTypePrefix(parser, elementType))
3267 return XML_ERROR_NO_MEMORY;
3268 }
3269 nDefaultAtts = elementType->nDefaultAtts;
3270
3271 /* get the attributes from the tokenizer */
3272 n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3273
3274 /* Detect and prevent integer overflow */
3275 if (n > INT_MAX - nDefaultAtts) {
3276 return XML_ERROR_NO_MEMORY;
3277 }
3278
3279 if (n + nDefaultAtts > parser->m_attsSize) {
3280 int oldAttsSize = parser->m_attsSize;
3281 ATTRIBUTE *temp;
3282 #ifdef XML_ATTR_INFO
3283 XML_AttrInfo *temp2;
3284 #endif
3285
3286 /* Detect and prevent integer overflow */
3287 if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE)
3288 || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) {
3289 return XML_ERROR_NO_MEMORY;
3290 }
3291
3292 parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
3293
3294 /* Detect and prevent integer overflow.
3295 * The preprocessor guard addresses the "always false" warning
3296 * from -Wtype-limits on platforms where
3297 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3298 #if UINT_MAX >= SIZE_MAX
3299 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) {
3300 parser->m_attsSize = oldAttsSize;
3301 return XML_ERROR_NO_MEMORY;
3302 }
3303 #endif
3304
3305 temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts,
3306 parser->m_attsSize * sizeof(ATTRIBUTE));
3307 if (temp == NULL) {
3308 parser->m_attsSize = oldAttsSize;
3309 return XML_ERROR_NO_MEMORY;
3310 }
3311 parser->m_atts = temp;
3312 #ifdef XML_ATTR_INFO
3313 /* Detect and prevent integer overflow.
3314 * The preprocessor guard addresses the "always false" warning
3315 * from -Wtype-limits on platforms where
3316 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3317 # if UINT_MAX >= SIZE_MAX
3318 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) {
3319 parser->m_attsSize = oldAttsSize;
3320 return XML_ERROR_NO_MEMORY;
3321 }
3322 # endif
3323
3324 temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo,
3325 parser->m_attsSize * sizeof(XML_AttrInfo));
3326 if (temp2 == NULL) {
3327 parser->m_attsSize = oldAttsSize;
3328 return XML_ERROR_NO_MEMORY;
3329 }
3330 parser->m_attInfo = temp2;
3331 #endif
3332 if (n > oldAttsSize)
3333 XmlGetAttributes(enc, attStr, n, parser->m_atts);
3334 }
3335
3336 appAtts = (const XML_Char **)parser->m_atts;
3337 for (i = 0; i < n; i++) {
3338 ATTRIBUTE *currAtt = &parser->m_atts[i];
3339 #ifdef XML_ATTR_INFO
3340 XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
3341 #endif
3342 /* add the name and value to the attribute list */
3343 ATTRIBUTE_ID *attId
3344 = getAttributeId(parser, enc, currAtt->name,
3345 currAtt->name + XmlNameLength(enc, currAtt->name));
3346 if (! attId)
3347 return XML_ERROR_NO_MEMORY;
3348 #ifdef XML_ATTR_INFO
3349 currAttInfo->nameStart
3350 = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
3351 currAttInfo->nameEnd
3352 = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name);
3353 currAttInfo->valueStart = parser->m_parseEndByteIndex
3354 - (parser->m_parseEndPtr - currAtt->valuePtr);
3355 currAttInfo->valueEnd = parser->m_parseEndByteIndex
3356 - (parser->m_parseEndPtr - currAtt->valueEnd);
3357 #endif
3358 /* Detect duplicate attributes by their QNames. This does not work when
3359 namespace processing is turned on and different prefixes for the same
3360 namespace are used. For this case we have a check further down.
3361 */
3362 if ((attId->name)[-1]) {
3363 if (enc == parser->m_encoding)
3364 parser->m_eventPtr = parser->m_atts[i].name;
3365 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3366 }
3367 (attId->name)[-1] = 1;
3368 appAtts[attIndex++] = attId->name;
3369 if (! parser->m_atts[i].normalized) {
3370 enum XML_Error result;
3371 XML_Bool isCdata = XML_TRUE;
3372
3373 /* figure out whether declared as other than CDATA */
3374 if (attId->maybeTokenized) {
3375 int j;
3376 for (j = 0; j < nDefaultAtts; j++) {
3377 if (attId == elementType->defaultAtts[j].id) {
3378 isCdata = elementType->defaultAtts[j].isCdata;
3379 break;
3380 }
3381 }
3382 }
3383
3384 /* normalize the attribute value */
3385 result = storeAttributeValue(
3386 parser, enc, isCdata, parser->m_atts[i].valuePtr,
3387 parser->m_atts[i].valueEnd, &parser->m_tempPool, account);
3388 if (result)
3389 return result;
3390 appAtts[attIndex] = poolStart(&parser->m_tempPool);
3391 poolFinish(&parser->m_tempPool);
3392 } else {
3393 /* the value did not need normalizing */
3394 appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc,
3395 parser->m_atts[i].valuePtr,
3396 parser->m_atts[i].valueEnd);
3397 if (appAtts[attIndex] == 0)
3398 return XML_ERROR_NO_MEMORY;
3399 poolFinish(&parser->m_tempPool);
3400 }
3401 /* handle prefixed attribute names */
3402 if (attId->prefix) {
3403 if (attId->xmlns) {
3404 /* deal with namespace declarations here */
3405 enum XML_Error result = addBinding(parser, attId->prefix, attId,
3406 appAtts[attIndex], bindingsPtr);
3407 if (result)
3408 return result;
3409 --attIndex;
3410 } else {
3411 /* deal with other prefixed names later */
3412 attIndex++;
3413 nPrefixes++;
3414 (attId->name)[-1] = 2;
3415 }
3416 } else
3417 attIndex++;
3418 }
3419
3420 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
3421 parser->m_nSpecifiedAtts = attIndex;
3422 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3423 for (i = 0; i < attIndex; i += 2)
3424 if (appAtts[i] == elementType->idAtt->name) {
3425 parser->m_idAttIndex = i;
3426 break;
3427 }
3428 } else
3429 parser->m_idAttIndex = -1;
3430
3431 /* do attribute defaulting */
3432 for (i = 0; i < nDefaultAtts; i++) {
3433 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
3434 if (! (da->id->name)[-1] && da->value) {
3435 if (da->id->prefix) {
3436 if (da->id->xmlns) {
3437 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3438 da->value, bindingsPtr);
3439 if (result)
3440 return result;
3441 } else {
3442 (da->id->name)[-1] = 2;
3443 nPrefixes++;
3444 appAtts[attIndex++] = da->id->name;
3445 appAtts[attIndex++] = da->value;
3446 }
3447 } else {
3448 (da->id->name)[-1] = 1;
3449 appAtts[attIndex++] = da->id->name;
3450 appAtts[attIndex++] = da->value;
3451 }
3452 }
3453 }
3454 appAtts[attIndex] = 0;
3455
3456 /* expand prefixed attribute names, check for duplicates,
3457 and clear flags that say whether attributes were specified */
3458 i = 0;
3459 if (nPrefixes) {
3460 int j; /* hash table index */
3461 unsigned long version = parser->m_nsAttsVersion;
3462
3463 /* Detect and prevent invalid shift */
3464 if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) {
3465 return XML_ERROR_NO_MEMORY;
3466 }
3467
3468 unsigned int nsAttsSize = 1u << parser->m_nsAttsPower;
3469 unsigned char oldNsAttsPower = parser->m_nsAttsPower;
3470 /* size of hash table must be at least 2 * (# of prefixed attributes) */
3471 if ((nPrefixes << 1)
3472 >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
3473 NS_ATT *temp;
3474 /* hash table size must also be a power of 2 and >= 8 */
3475 while (nPrefixes >> parser->m_nsAttsPower++)
3476 ;
3477 if (parser->m_nsAttsPower < 3)
3478 parser->m_nsAttsPower = 3;
3479
3480 /* Detect and prevent invalid shift */
3481 if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) {
3482 /* Restore actual size of memory in m_nsAtts */
3483 parser->m_nsAttsPower = oldNsAttsPower;
3484 return XML_ERROR_NO_MEMORY;
3485 }
3486
3487 nsAttsSize = 1u << parser->m_nsAttsPower;
3488
3489 /* Detect and prevent integer overflow.
3490 * The preprocessor guard addresses the "always false" warning
3491 * from -Wtype-limits on platforms where
3492 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3493 #if UINT_MAX >= SIZE_MAX
3494 if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) {
3495 /* Restore actual size of memory in m_nsAtts */
3496 parser->m_nsAttsPower = oldNsAttsPower;
3497 return XML_ERROR_NO_MEMORY;
3498 }
3499 #endif
3500
3501 temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts,
3502 nsAttsSize * sizeof(NS_ATT));
3503 if (! temp) {
3504 /* Restore actual size of memory in m_nsAtts */
3505 parser->m_nsAttsPower = oldNsAttsPower;
3506 return XML_ERROR_NO_MEMORY;
3507 }
3508 parser->m_nsAtts = temp;
3509 version = 0; /* force re-initialization of m_nsAtts hash table */
3510 }
3511 /* using a version flag saves us from initializing m_nsAtts every time */
3512 if (! version) { /* initialize version flags when version wraps around */
3513 version = INIT_ATTS_VERSION;
3514 for (j = nsAttsSize; j != 0;)
3515 parser->m_nsAtts[--j].version = version;
3516 }
3517 parser->m_nsAttsVersion = --version;
3518
3519 /* expand prefixed names and check for duplicates */
3520 for (; i < attIndex; i += 2) {
3521 const XML_Char *s = appAtts[i];
3522 if (s[-1] == 2) { /* prefixed */
3523 ATTRIBUTE_ID *id;
3524 const BINDING *b;
3525 unsigned long uriHash;
3526 struct siphash sip_state;
3527 struct sipkey sip_key;
3528
3529 copy_salt_to_sipkey(parser, &sip_key);
3530 sip24_init(&sip_state, &sip_key);
3531
3532 ((XML_Char *)s)[-1] = 0; /* clear flag */
3533 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
3534 if (! id || ! id->prefix) {
3535 /* This code is walking through the appAtts array, dealing
3536 * with (in this case) a prefixed attribute name. To be in
3537 * the array, the attribute must have already been bound, so
3538 * has to have passed through the hash table lookup once
3539 * already. That implies that an entry for it already
3540 * exists, so the lookup above will return a pointer to
3541 * already allocated memory. There is no opportunaity for
3542 * the allocator to fail, so the condition above cannot be
3543 * fulfilled.
3544 *
3545 * Since it is difficult to be certain that the above
3546 * analysis is complete, we retain the test and merely
3547 * remove the code from coverage tests.
3548 */
3549 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
3550 }
3551 b = id->prefix->binding;
3552 if (! b)
3553 return XML_ERROR_UNBOUND_PREFIX;
3554
3555 for (j = 0; j < b->uriLen; j++) {
3556 const XML_Char c = b->uri[j];
3557 if (! poolAppendChar(&parser->m_tempPool, c))
3558 return XML_ERROR_NO_MEMORY;
3559 }
3560
3561 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
3562
3563 while (*s++ != XML_T(ASCII_COLON))
3564 ;
3565
3566 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
3567
3568 do { /* copies null terminator */
3569 if (! poolAppendChar(&parser->m_tempPool, *s))
3570 return XML_ERROR_NO_MEMORY;
3571 } while (*s++);
3572
3573 uriHash = (unsigned long)sip24_final(&sip_state);
3574
3575 { /* Check hash table for duplicate of expanded name (uriName).
3576 Derived from code in lookup(parser, HASH_TABLE *table, ...).
3577 */
3578 unsigned char step = 0;
3579 unsigned long mask = nsAttsSize - 1;
3580 j = uriHash & mask; /* index into hash table */
3581 while (parser->m_nsAtts[j].version == version) {
3582 /* for speed we compare stored hash values first */
3583 if (uriHash == parser->m_nsAtts[j].hash) {
3584 const XML_Char *s1 = poolStart(&parser->m_tempPool);
3585 const XML_Char *s2 = parser->m_nsAtts[j].uriName;
3586 /* s1 is null terminated, but not s2 */
3587 for (; *s1 == *s2 && *s1 != 0; s1++, s2++)
3588 ;
3589 if (*s1 == 0)
3590 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3591 }
3592 if (! step)
3593 step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
3594 j < step ? (j += nsAttsSize - step) : (j -= step);
3595 }
3596 }
3597
3598 if (parser->m_ns_triplets) { /* append namespace separator and prefix */
3599 parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
3600 s = b->prefix->name;
3601 do {
3602 if (! poolAppendChar(&parser->m_tempPool, *s))
3603 return XML_ERROR_NO_MEMORY;
3604 } while (*s++);
3605 }
3606
3607 /* store expanded name in attribute list */
3608 s = poolStart(&parser->m_tempPool);
3609 poolFinish(&parser->m_tempPool);
3610 appAtts[i] = s;
3611
3612 /* fill empty slot with new version, uriName and hash value */
3613 parser->m_nsAtts[j].version = version;
3614 parser->m_nsAtts[j].hash = uriHash;
3615 parser->m_nsAtts[j].uriName = s;
3616
3617 if (! --nPrefixes) {
3618 i += 2;
3619 break;
3620 }
3621 } else /* not prefixed */
3622 ((XML_Char *)s)[-1] = 0; /* clear flag */
3623 }
3624 }
3625 /* clear flags for the remaining attributes */
3626 for (; i < attIndex; i += 2)
3627 ((XML_Char *)(appAtts[i]))[-1] = 0;
3628 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3629 binding->attId->name[-1] = 0;
3630
3631 if (! parser->m_ns)
3632 return XML_ERROR_NONE;
3633
3634 /* expand the element type name */
3635 if (elementType->prefix) {
3636 binding = elementType->prefix->binding;
3637 if (! binding)
3638 return XML_ERROR_UNBOUND_PREFIX;
3639 localPart = tagNamePtr->str;
3640 while (*localPart++ != XML_T(ASCII_COLON))
3641 ;
3642 } else if (dtd->defaultPrefix.binding) {
3643 binding = dtd->defaultPrefix.binding;
3644 localPart = tagNamePtr->str;
3645 } else
3646 return XML_ERROR_NONE;
3647 prefixLen = 0;
3648 if (parser->m_ns_triplets && binding->prefix->name) {
3649 for (; binding->prefix->name[prefixLen++];)
3650 ; /* prefixLen includes null terminator */
3651 }
3652 tagNamePtr->localPart = localPart;
3653 tagNamePtr->uriLen = binding->uriLen;
3654 tagNamePtr->prefix = binding->prefix->name;
3655 tagNamePtr->prefixLen = prefixLen;
3656 for (i = 0; localPart[i++];)
3657 ; /* i includes null terminator */
3658
3659 /* Detect and prevent integer overflow */
3660 if (binding->uriLen > INT_MAX - prefixLen
3661 || i > INT_MAX - (binding->uriLen + prefixLen)) {
3662 return XML_ERROR_NO_MEMORY;
3663 }
3664
3665 n = i + binding->uriLen + prefixLen;
3666 if (n > binding->uriAlloc) {
3667 TAG *p;
3668
3669 /* Detect and prevent integer overflow */
3670 if (n > INT_MAX - EXPAND_SPARE) {
3671 return XML_ERROR_NO_MEMORY;
3672 }
3673 /* Detect and prevent integer overflow.
3674 * The preprocessor guard addresses the "always false" warning
3675 * from -Wtype-limits on platforms where
3676 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3677 #if UINT_MAX >= SIZE_MAX
3678 if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
3679 return XML_ERROR_NO_MEMORY;
3680 }
3681 #endif
3682
3683 uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
3684 if (! uri)
3685 return XML_ERROR_NO_MEMORY;
3686 binding->uriAlloc = n + EXPAND_SPARE;
3687 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
3688 for (p = parser->m_tagStack; p; p = p->parent)
3689 if (p->name.str == binding->uri)
3690 p->name.str = uri;
3691 FREE(parser, binding->uri);
3692 binding->uri = uri;
3693 }
3694 /* if m_namespaceSeparator != '\0' then uri includes it already */
3695 uri = binding->uri + binding->uriLen;
3696 memcpy(uri, localPart, i * sizeof(XML_Char));
3697 /* we always have a namespace separator between localPart and prefix */
3698 if (prefixLen) {
3699 uri += i - 1;
3700 *uri = parser->m_namespaceSeparator; /* replace null terminator */
3701 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3702 }
3703 tagNamePtr->str = binding->uri;
3704 return XML_ERROR_NONE;
3705 }
3706
3707 /* addBinding() overwrites the value of prefix->binding without checking.
3708 Therefore one must keep track of the old value outside of addBinding().
3709 */
3710 static enum XML_Error
addBinding(XML_Parser parser,PREFIX * prefix,const ATTRIBUTE_ID * attId,const XML_Char * uri,BINDING ** bindingsPtr)3711 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3712 const XML_Char *uri, BINDING **bindingsPtr) {
3713 static const XML_Char xmlNamespace[]
3714 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON,
3715 ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
3716 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o,
3717 ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M,
3718 ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9,
3719 ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m,
3720 ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
3721 ASCII_e, '\0'};
3722 static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
3723 static const XML_Char xmlnsNamespace[]
3724 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
3725 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
3726 ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH,
3727 ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x,
3728 ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'};
3729 static const int xmlnsLen
3730 = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1;
3731
3732 XML_Bool mustBeXML = XML_FALSE;
3733 XML_Bool isXML = XML_TRUE;
3734 XML_Bool isXMLNS = XML_TRUE;
3735
3736 BINDING *b;
3737 int len;
3738
3739 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
3740 if (*uri == XML_T('\0') && prefix->name)
3741 return XML_ERROR_UNDECLARING_PREFIX;
3742
3743 if (prefix->name && prefix->name[0] == XML_T(ASCII_x)
3744 && prefix->name[1] == XML_T(ASCII_m)
3745 && prefix->name[2] == XML_T(ASCII_l)) {
3746 /* Not allowed to bind xmlns */
3747 if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s)
3748 && prefix->name[5] == XML_T('\0'))
3749 return XML_ERROR_RESERVED_PREFIX_XMLNS;
3750
3751 if (prefix->name[3] == XML_T('\0'))
3752 mustBeXML = XML_TRUE;
3753 }
3754
3755 for (len = 0; uri[len]; len++) {
3756 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3757 isXML = XML_FALSE;
3758
3759 if (! mustBeXML && isXMLNS
3760 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3761 isXMLNS = XML_FALSE;
3762
3763 // NOTE: While Expat does not validate namespace URIs against RFC 3986,
3764 // we have to at least make sure that the XML processor on top of
3765 // Expat (that is splitting tag names by namespace separator into
3766 // 2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused
3767 // by an attacker putting additional namespace separator characters
3768 // into namespace declarations. That would be ambiguous and not to
3769 // be expected.
3770 if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) {
3771 return XML_ERROR_SYNTAX;
3772 }
3773 }
3774 isXML = isXML && len == xmlLen;
3775 isXMLNS = isXMLNS && len == xmlnsLen;
3776
3777 if (mustBeXML != isXML)
3778 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3779 : XML_ERROR_RESERVED_NAMESPACE_URI;
3780
3781 if (isXMLNS)
3782 return XML_ERROR_RESERVED_NAMESPACE_URI;
3783
3784 if (parser->m_namespaceSeparator)
3785 len++;
3786 if (parser->m_freeBindingList) {
3787 b = parser->m_freeBindingList;
3788 if (len > b->uriAlloc) {
3789 /* Detect and prevent integer overflow */
3790 if (len > INT_MAX - EXPAND_SPARE) {
3791 return XML_ERROR_NO_MEMORY;
3792 }
3793
3794 /* Detect and prevent integer overflow.
3795 * The preprocessor guard addresses the "always false" warning
3796 * from -Wtype-limits on platforms where
3797 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3798 #if UINT_MAX >= SIZE_MAX
3799 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
3800 return XML_ERROR_NO_MEMORY;
3801 }
3802 #endif
3803
3804 XML_Char *temp = (XML_Char *)REALLOC(
3805 parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
3806 if (temp == NULL)
3807 return XML_ERROR_NO_MEMORY;
3808 b->uri = temp;
3809 b->uriAlloc = len + EXPAND_SPARE;
3810 }
3811 parser->m_freeBindingList = b->nextTagBinding;
3812 } else {
3813 b = (BINDING *)MALLOC(parser, sizeof(BINDING));
3814 if (! b)
3815 return XML_ERROR_NO_MEMORY;
3816
3817 /* Detect and prevent integer overflow */
3818 if (len > INT_MAX - EXPAND_SPARE) {
3819 return XML_ERROR_NO_MEMORY;
3820 }
3821 /* Detect and prevent integer overflow.
3822 * The preprocessor guard addresses the "always false" warning
3823 * from -Wtype-limits on platforms where
3824 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3825 #if UINT_MAX >= SIZE_MAX
3826 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
3827 return XML_ERROR_NO_MEMORY;
3828 }
3829 #endif
3830
3831 b->uri
3832 = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
3833 if (! b->uri) {
3834 FREE(parser, b);
3835 return XML_ERROR_NO_MEMORY;
3836 }
3837 b->uriAlloc = len + EXPAND_SPARE;
3838 }
3839 b->uriLen = len;
3840 memcpy(b->uri, uri, len * sizeof(XML_Char));
3841 if (parser->m_namespaceSeparator)
3842 b->uri[len - 1] = parser->m_namespaceSeparator;
3843 b->prefix = prefix;
3844 b->attId = attId;
3845 b->prevPrefixBinding = prefix->binding;
3846 /* NULL binding when default namespace undeclared */
3847 if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
3848 prefix->binding = NULL;
3849 else
3850 prefix->binding = b;
3851 b->nextTagBinding = *bindingsPtr;
3852 *bindingsPtr = b;
3853 /* if attId == NULL then we are not starting a namespace scope */
3854 if (attId && parser->m_startNamespaceDeclHandler)
3855 parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
3856 prefix->binding ? uri : 0);
3857 return XML_ERROR_NONE;
3858 }
3859
3860 /* The idea here is to avoid using stack for each CDATA section when
3861 the whole file is parsed with one call.
3862 */
3863 static enum XML_Error PTRCALL
cdataSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)3864 cdataSectionProcessor(XML_Parser parser, const char *start, const char *end,
3865 const char **endPtr) {
3866 enum XML_Error result = doCdataSection(
3867 parser, parser->m_encoding, &start, end, endPtr,
3868 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
3869 if (result != XML_ERROR_NONE)
3870 return result;
3871 if (start) {
3872 if (parser->m_parentParser) { /* we are parsing an external entity */
3873 parser->m_processor = externalEntityContentProcessor;
3874 return externalEntityContentProcessor(parser, start, end, endPtr);
3875 } else {
3876 parser->m_processor = contentProcessor;
3877 return contentProcessor(parser, start, end, endPtr);
3878 }
3879 }
3880 return result;
3881 }
3882
3883 /* startPtr gets set to non-null if the section is closed, and to null if
3884 the section is not yet closed.
3885 */
3886 static enum XML_Error
doCdataSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore,enum XML_Account account)3887 doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
3888 const char *end, const char **nextPtr, XML_Bool haveMore,
3889 enum XML_Account account) {
3890 const char *s = *startPtr;
3891 const char **eventPP;
3892 const char **eventEndPP;
3893 if (enc == parser->m_encoding) {
3894 eventPP = &parser->m_eventPtr;
3895 *eventPP = s;
3896 eventEndPP = &parser->m_eventEndPtr;
3897 } else {
3898 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
3899 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
3900 }
3901 *eventPP = s;
3902 *startPtr = NULL;
3903
3904 for (;;) {
3905 const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
3906 int tok = XmlCdataSectionTok(enc, s, end, &next);
3907 #ifdef XML_DTD
3908 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
3909 accountingOnAbort(parser);
3910 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
3911 }
3912 #else
3913 UNUSED_P(account);
3914 #endif
3915 *eventEndPP = next;
3916 switch (tok) {
3917 case XML_TOK_CDATA_SECT_CLOSE:
3918 if (parser->m_endCdataSectionHandler)
3919 parser->m_endCdataSectionHandler(parser->m_handlerArg);
3920 /* BEGIN disabled code */
3921 /* see comment under XML_TOK_CDATA_SECT_OPEN */
3922 else if (0 && parser->m_characterDataHandler)
3923 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3924 0);
3925 /* END disabled code */
3926 else if (parser->m_defaultHandler)
3927 reportDefault(parser, enc, s, next);
3928 *startPtr = next;
3929 *nextPtr = next;
3930 if (parser->m_parsingStatus.parsing == XML_FINISHED)
3931 return XML_ERROR_ABORTED;
3932 else
3933 return XML_ERROR_NONE;
3934 case XML_TOK_DATA_NEWLINE:
3935 if (parser->m_characterDataHandler) {
3936 XML_Char c = 0xA;
3937 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3938 } else if (parser->m_defaultHandler)
3939 reportDefault(parser, enc, s, next);
3940 break;
3941 case XML_TOK_DATA_CHARS: {
3942 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3943 if (charDataHandler) {
3944 if (MUST_CONVERT(enc, s)) {
3945 for (;;) {
3946 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3947 const enum XML_Convert_Result convert_res = XmlConvert(
3948 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3949 *eventEndPP = next;
3950 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3951 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3952 if ((convert_res == XML_CONVERT_COMPLETED)
3953 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3954 break;
3955 *eventPP = s;
3956 }
3957 } else
3958 charDataHandler(parser->m_handlerArg, (XML_Char *)s,
3959 (int)((XML_Char *)next - (XML_Char *)s));
3960 } else if (parser->m_defaultHandler)
3961 reportDefault(parser, enc, s, next);
3962 } break;
3963 case XML_TOK_INVALID:
3964 *eventPP = next;
3965 return XML_ERROR_INVALID_TOKEN;
3966 case XML_TOK_PARTIAL_CHAR:
3967 if (haveMore) {
3968 *nextPtr = s;
3969 return XML_ERROR_NONE;
3970 }
3971 return XML_ERROR_PARTIAL_CHAR;
3972 case XML_TOK_PARTIAL:
3973 case XML_TOK_NONE:
3974 if (haveMore) {
3975 *nextPtr = s;
3976 return XML_ERROR_NONE;
3977 }
3978 return XML_ERROR_UNCLOSED_CDATA_SECTION;
3979 default:
3980 /* Every token returned by XmlCdataSectionTok() has its own
3981 * explicit case, so this default case will never be executed.
3982 * We retain it as a safety net and exclude it from the coverage
3983 * statistics.
3984 *
3985 * LCOV_EXCL_START
3986 */
3987 *eventPP = next;
3988 return XML_ERROR_UNEXPECTED_STATE;
3989 /* LCOV_EXCL_STOP */
3990 }
3991
3992 *eventPP = s = next;
3993 switch (parser->m_parsingStatus.parsing) {
3994 case XML_SUSPENDED:
3995 *nextPtr = next;
3996 return XML_ERROR_NONE;
3997 case XML_FINISHED:
3998 return XML_ERROR_ABORTED;
3999 default:;
4000 }
4001 }
4002 /* not reached */
4003 }
4004
4005 #ifdef XML_DTD
4006
4007 /* The idea here is to avoid using stack for each IGNORE section when
4008 the whole file is parsed with one call.
4009 */
4010 static enum XML_Error PTRCALL
ignoreSectionProcessor(XML_Parser parser,const char * start,const char * end,const char ** endPtr)4011 ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end,
4012 const char **endPtr) {
4013 enum XML_Error result
4014 = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr,
4015 (XML_Bool)! parser->m_parsingStatus.finalBuffer);
4016 if (result != XML_ERROR_NONE)
4017 return result;
4018 if (start) {
4019 parser->m_processor = prologProcessor;
4020 return prologProcessor(parser, start, end, endPtr);
4021 }
4022 return result;
4023 }
4024
4025 /* startPtr gets set to non-null is the section is closed, and to null
4026 if the section is not yet closed.
4027 */
4028 static enum XML_Error
doIgnoreSection(XML_Parser parser,const ENCODING * enc,const char ** startPtr,const char * end,const char ** nextPtr,XML_Bool haveMore)4029 doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4030 const char *end, const char **nextPtr, XML_Bool haveMore) {
4031 const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4032 int tok;
4033 const char *s = *startPtr;
4034 const char **eventPP;
4035 const char **eventEndPP;
4036 if (enc == parser->m_encoding) {
4037 eventPP = &parser->m_eventPtr;
4038 *eventPP = s;
4039 eventEndPP = &parser->m_eventEndPtr;
4040 } else {
4041 /* It's not entirely clear, but it seems the following two lines
4042 * of code cannot be executed. The only occasions on which 'enc'
4043 * is not 'encoding' are when this function is called
4044 * from the internal entity processing, and IGNORE sections are an
4045 * error in internal entities.
4046 *
4047 * Since it really isn't clear that this is true, we keep the code
4048 * and just remove it from our coverage tests.
4049 *
4050 * LCOV_EXCL_START
4051 */
4052 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4053 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4054 /* LCOV_EXCL_STOP */
4055 }
4056 *eventPP = s;
4057 *startPtr = NULL;
4058 tok = XmlIgnoreSectionTok(enc, s, end, &next);
4059 # ifdef XML_DTD
4060 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4061 XML_ACCOUNT_DIRECT)) {
4062 accountingOnAbort(parser);
4063 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4064 }
4065 # endif
4066 *eventEndPP = next;
4067 switch (tok) {
4068 case XML_TOK_IGNORE_SECT:
4069 if (parser->m_defaultHandler)
4070 reportDefault(parser, enc, s, next);
4071 *startPtr = next;
4072 *nextPtr = next;
4073 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4074 return XML_ERROR_ABORTED;
4075 else
4076 return XML_ERROR_NONE;
4077 case XML_TOK_INVALID:
4078 *eventPP = next;
4079 return XML_ERROR_INVALID_TOKEN;
4080 case XML_TOK_PARTIAL_CHAR:
4081 if (haveMore) {
4082 *nextPtr = s;
4083 return XML_ERROR_NONE;
4084 }
4085 return XML_ERROR_PARTIAL_CHAR;
4086 case XML_TOK_PARTIAL:
4087 case XML_TOK_NONE:
4088 if (haveMore) {
4089 *nextPtr = s;
4090 return XML_ERROR_NONE;
4091 }
4092 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
4093 default:
4094 /* All of the tokens that XmlIgnoreSectionTok() returns have
4095 * explicit cases to handle them, so this default case is never
4096 * executed. We keep it as a safety net anyway, and remove it
4097 * from our test coverage statistics.
4098 *
4099 * LCOV_EXCL_START
4100 */
4101 *eventPP = next;
4102 return XML_ERROR_UNEXPECTED_STATE;
4103 /* LCOV_EXCL_STOP */
4104 }
4105 /* not reached */
4106 }
4107
4108 #endif /* XML_DTD */
4109
4110 static enum XML_Error
initializeEncoding(XML_Parser parser)4111 initializeEncoding(XML_Parser parser) {
4112 const char *s;
4113 #ifdef XML_UNICODE
4114 char encodingBuf[128];
4115 /* See comments about `protocolEncodingName` in parserInit() */
4116 if (! parser->m_protocolEncodingName)
4117 s = NULL;
4118 else {
4119 int i;
4120 for (i = 0; parser->m_protocolEncodingName[i]; i++) {
4121 if (i == sizeof(encodingBuf) - 1
4122 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
4123 encodingBuf[0] = '\0';
4124 break;
4125 }
4126 encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
4127 }
4128 encodingBuf[i] = '\0';
4129 s = encodingBuf;
4130 }
4131 #else
4132 s = parser->m_protocolEncodingName;
4133 #endif
4134 if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(
4135 &parser->m_initEncoding, &parser->m_encoding, s))
4136 return XML_ERROR_NONE;
4137 return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
4138 }
4139
4140 static enum XML_Error
processXmlDecl(XML_Parser parser,int isGeneralTextEntity,const char * s,const char * next)4141 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s,
4142 const char *next) {
4143 const char *encodingName = NULL;
4144 const XML_Char *storedEncName = NULL;
4145 const ENCODING *newEncoding = NULL;
4146 const char *version = NULL;
4147 const char *versionend;
4148 const XML_Char *storedversion = NULL;
4149 int standalone = -1;
4150
4151 #ifdef XML_DTD
4152 if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__,
4153 XML_ACCOUNT_DIRECT)) {
4154 accountingOnAbort(parser);
4155 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4156 }
4157 #endif
4158
4159 if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(
4160 isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr,
4161 &version, &versionend, &encodingName, &newEncoding, &standalone)) {
4162 if (isGeneralTextEntity)
4163 return XML_ERROR_TEXT_DECL;
4164 else
4165 return XML_ERROR_XML_DECL;
4166 }
4167 if (! isGeneralTextEntity && standalone == 1) {
4168 parser->m_dtd->standalone = XML_TRUE;
4169 #ifdef XML_DTD
4170 if (parser->m_paramEntityParsing
4171 == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
4172 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
4173 #endif /* XML_DTD */
4174 }
4175 if (parser->m_xmlDeclHandler) {
4176 if (encodingName != NULL) {
4177 storedEncName = poolStoreString(
4178 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4179 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4180 if (! storedEncName)
4181 return XML_ERROR_NO_MEMORY;
4182 poolFinish(&parser->m_temp2Pool);
4183 }
4184 if (version) {
4185 storedversion
4186 = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version,
4187 versionend - parser->m_encoding->minBytesPerChar);
4188 if (! storedversion)
4189 return XML_ERROR_NO_MEMORY;
4190 }
4191 parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName,
4192 standalone);
4193 } else if (parser->m_defaultHandler)
4194 reportDefault(parser, parser->m_encoding, s, next);
4195 if (parser->m_protocolEncodingName == NULL) {
4196 if (newEncoding) {
4197 /* Check that the specified encoding does not conflict with what
4198 * the parser has already deduced. Do we have the same number
4199 * of bytes in the smallest representation of a character? If
4200 * this is UTF-16, is it the same endianness?
4201 */
4202 if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
4203 || (newEncoding->minBytesPerChar == 2
4204 && newEncoding != parser->m_encoding)) {
4205 parser->m_eventPtr = encodingName;
4206 return XML_ERROR_INCORRECT_ENCODING;
4207 }
4208 parser->m_encoding = newEncoding;
4209 } else if (encodingName) {
4210 enum XML_Error result;
4211 if (! storedEncName) {
4212 storedEncName = poolStoreString(
4213 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4214 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4215 if (! storedEncName)
4216 return XML_ERROR_NO_MEMORY;
4217 }
4218 result = handleUnknownEncoding(parser, storedEncName);
4219 poolClear(&parser->m_temp2Pool);
4220 if (result == XML_ERROR_UNKNOWN_ENCODING)
4221 parser->m_eventPtr = encodingName;
4222 return result;
4223 }
4224 }
4225
4226 if (storedEncName || storedversion)
4227 poolClear(&parser->m_temp2Pool);
4228
4229 return XML_ERROR_NONE;
4230 }
4231
4232 static enum XML_Error
handleUnknownEncoding(XML_Parser parser,const XML_Char * encodingName)4233 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) {
4234 if (parser->m_unknownEncodingHandler) {
4235 XML_Encoding info;
4236 int i;
4237 for (i = 0; i < 256; i++)
4238 info.map[i] = -1;
4239 info.convert = NULL;
4240 info.data = NULL;
4241 info.release = NULL;
4242 if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData,
4243 encodingName, &info)) {
4244 ENCODING *enc;
4245 parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
4246 if (! parser->m_unknownEncodingMem) {
4247 if (info.release)
4248 info.release(info.data);
4249 return XML_ERROR_NO_MEMORY;
4250 }
4251 enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)(
4252 parser->m_unknownEncodingMem, info.map, info.convert, info.data);
4253 if (enc) {
4254 parser->m_unknownEncodingData = info.data;
4255 parser->m_unknownEncodingRelease = info.release;
4256 parser->m_encoding = enc;
4257 return XML_ERROR_NONE;
4258 }
4259 }
4260 if (info.release != NULL)
4261 info.release(info.data);
4262 }
4263 return XML_ERROR_UNKNOWN_ENCODING;
4264 }
4265
4266 static enum XML_Error PTRCALL
prologInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4267 prologInitProcessor(XML_Parser parser, const char *s, const char *end,
4268 const char **nextPtr) {
4269 enum XML_Error result = initializeEncoding(parser);
4270 if (result != XML_ERROR_NONE)
4271 return result;
4272 parser->m_processor = prologProcessor;
4273 return prologProcessor(parser, s, end, nextPtr);
4274 }
4275
4276 #ifdef XML_DTD
4277
4278 static enum XML_Error PTRCALL
externalParEntInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4279 externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end,
4280 const char **nextPtr) {
4281 enum XML_Error result = initializeEncoding(parser);
4282 if (result != XML_ERROR_NONE)
4283 return result;
4284
4285 /* we know now that XML_Parse(Buffer) has been called,
4286 so we consider the external parameter entity read */
4287 parser->m_dtd->paramEntityRead = XML_TRUE;
4288
4289 if (parser->m_prologState.inEntityValue) {
4290 parser->m_processor = entityValueInitProcessor;
4291 return entityValueInitProcessor(parser, s, end, nextPtr);
4292 } else {
4293 parser->m_processor = externalParEntProcessor;
4294 return externalParEntProcessor(parser, s, end, nextPtr);
4295 }
4296 }
4297
4298 static enum XML_Error PTRCALL
entityValueInitProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4299 entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
4300 const char **nextPtr) {
4301 int tok;
4302 const char *start = s;
4303 const char *next = start;
4304 parser->m_eventPtr = start;
4305
4306 for (;;) {
4307 tok = XmlPrologTok(parser->m_encoding, start, end, &next);
4308 /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in:
4309 - storeEntityValue
4310 - processXmlDecl
4311 */
4312 parser->m_eventEndPtr = next;
4313 if (tok <= 0) {
4314 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4315 *nextPtr = s;
4316 return XML_ERROR_NONE;
4317 }
4318 switch (tok) {
4319 case XML_TOK_INVALID:
4320 return XML_ERROR_INVALID_TOKEN;
4321 case XML_TOK_PARTIAL:
4322 return XML_ERROR_UNCLOSED_TOKEN;
4323 case XML_TOK_PARTIAL_CHAR:
4324 return XML_ERROR_PARTIAL_CHAR;
4325 case XML_TOK_NONE: /* start == end */
4326 default:
4327 break;
4328 }
4329 /* found end of entity value - can store it now */
4330 return storeEntityValue(parser, parser->m_encoding, s, end,
4331 XML_ACCOUNT_DIRECT);
4332 } else if (tok == XML_TOK_XML_DECL) {
4333 enum XML_Error result;
4334 result = processXmlDecl(parser, 0, start, next);
4335 if (result != XML_ERROR_NONE)
4336 return result;
4337 /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For
4338 * that to happen, a parameter entity parsing handler must have attempted
4339 * to suspend the parser, which fails and raises an error. The parser can
4340 * be aborted, but can't be suspended.
4341 */
4342 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4343 return XML_ERROR_ABORTED;
4344 *nextPtr = next;
4345 /* stop scanning for text declaration - we found one */
4346 parser->m_processor = entityValueProcessor;
4347 return entityValueProcessor(parser, next, end, nextPtr);
4348 }
4349 /* If we are at the end of the buffer, this would cause XmlPrologTok to
4350 return XML_TOK_NONE on the next call, which would then cause the
4351 function to exit with *nextPtr set to s - that is what we want for other
4352 tokens, but not for the BOM - we would rather like to skip it;
4353 then, when this routine is entered the next time, XmlPrologTok will
4354 return XML_TOK_INVALID, since the BOM is still in the buffer
4355 */
4356 else if (tok == XML_TOK_BOM && next == end
4357 && ! parser->m_parsingStatus.finalBuffer) {
4358 # ifdef XML_DTD
4359 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4360 XML_ACCOUNT_DIRECT)) {
4361 accountingOnAbort(parser);
4362 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4363 }
4364 # endif
4365
4366 *nextPtr = next;
4367 return XML_ERROR_NONE;
4368 }
4369 /* If we get this token, we have the start of what might be a
4370 normal tag, but not a declaration (i.e. it doesn't begin with
4371 "<!"). In a DTD context, that isn't legal.
4372 */
4373 else if (tok == XML_TOK_INSTANCE_START) {
4374 *nextPtr = next;
4375 return XML_ERROR_SYNTAX;
4376 }
4377 start = next;
4378 parser->m_eventPtr = start;
4379 }
4380 }
4381
4382 static enum XML_Error PTRCALL
externalParEntProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4383 externalParEntProcessor(XML_Parser parser, const char *s, const char *end,
4384 const char **nextPtr) {
4385 const char *next = s;
4386 int tok;
4387
4388 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4389 if (tok <= 0) {
4390 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4391 *nextPtr = s;
4392 return XML_ERROR_NONE;
4393 }
4394 switch (tok) {
4395 case XML_TOK_INVALID:
4396 return XML_ERROR_INVALID_TOKEN;
4397 case XML_TOK_PARTIAL:
4398 return XML_ERROR_UNCLOSED_TOKEN;
4399 case XML_TOK_PARTIAL_CHAR:
4400 return XML_ERROR_PARTIAL_CHAR;
4401 case XML_TOK_NONE: /* start == end */
4402 default:
4403 break;
4404 }
4405 }
4406 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4407 However, when parsing an external subset, doProlog will not accept a BOM
4408 as valid, and report a syntax error, so we have to skip the BOM, and
4409 account for the BOM bytes.
4410 */
4411 else if (tok == XML_TOK_BOM) {
4412 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4413 XML_ACCOUNT_DIRECT)) {
4414 accountingOnAbort(parser);
4415 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4416 }
4417
4418 s = next;
4419 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4420 }
4421
4422 parser->m_processor = prologProcessor;
4423 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4424 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4425 XML_ACCOUNT_DIRECT);
4426 }
4427
4428 static enum XML_Error PTRCALL
entityValueProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4429 entityValueProcessor(XML_Parser parser, const char *s, const char *end,
4430 const char **nextPtr) {
4431 const char *start = s;
4432 const char *next = s;
4433 const ENCODING *enc = parser->m_encoding;
4434 int tok;
4435
4436 for (;;) {
4437 tok = XmlPrologTok(enc, start, end, &next);
4438 /* Note: These bytes are accounted later in:
4439 - storeEntityValue
4440 */
4441 if (tok <= 0) {
4442 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4443 *nextPtr = s;
4444 return XML_ERROR_NONE;
4445 }
4446 switch (tok) {
4447 case XML_TOK_INVALID:
4448 return XML_ERROR_INVALID_TOKEN;
4449 case XML_TOK_PARTIAL:
4450 return XML_ERROR_UNCLOSED_TOKEN;
4451 case XML_TOK_PARTIAL_CHAR:
4452 return XML_ERROR_PARTIAL_CHAR;
4453 case XML_TOK_NONE: /* start == end */
4454 default:
4455 break;
4456 }
4457 /* found end of entity value - can store it now */
4458 return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT);
4459 }
4460 start = next;
4461 }
4462 }
4463
4464 #endif /* XML_DTD */
4465
4466 static enum XML_Error PTRCALL
prologProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)4467 prologProcessor(XML_Parser parser, const char *s, const char *end,
4468 const char **nextPtr) {
4469 const char *next = s;
4470 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4471 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4472 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4473 XML_ACCOUNT_DIRECT);
4474 }
4475
4476 static enum XML_Error
doProlog(XML_Parser parser,const ENCODING * enc,const char * s,const char * end,int tok,const char * next,const char ** nextPtr,XML_Bool haveMore,XML_Bool allowClosingDoctype,enum XML_Account account)4477 doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
4478 int tok, const char *next, const char **nextPtr, XML_Bool haveMore,
4479 XML_Bool allowClosingDoctype, enum XML_Account account) {
4480 #ifdef XML_DTD
4481 static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'};
4482 #endif /* XML_DTD */
4483 static const XML_Char atypeCDATA[]
4484 = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
4485 static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'};
4486 static const XML_Char atypeIDREF[]
4487 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
4488 static const XML_Char atypeIDREFS[]
4489 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
4490 static const XML_Char atypeENTITY[]
4491 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
4492 static const XML_Char atypeENTITIES[]
4493 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
4494 ASCII_I, ASCII_E, ASCII_S, '\0'};
4495 static const XML_Char atypeNMTOKEN[]
4496 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
4497 static const XML_Char atypeNMTOKENS[]
4498 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
4499 ASCII_E, ASCII_N, ASCII_S, '\0'};
4500 static const XML_Char notationPrefix[]
4501 = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T,
4502 ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'};
4503 static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'};
4504 static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'};
4505
4506 #ifndef XML_DTD
4507 UNUSED_P(account);
4508 #endif
4509
4510 /* save one level of indirection */
4511 DTD *const dtd = parser->m_dtd;
4512
4513 const char **eventPP;
4514 const char **eventEndPP;
4515 enum XML_Content_Quant quant;
4516
4517 if (enc == parser->m_encoding) {
4518 eventPP = &parser->m_eventPtr;
4519 eventEndPP = &parser->m_eventEndPtr;
4520 } else {
4521 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4522 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4523 }
4524
4525 for (;;) {
4526 int role;
4527 XML_Bool handleDefault = XML_TRUE;
4528 *eventPP = s;
4529 *eventEndPP = next;
4530 if (tok <= 0) {
4531 if (haveMore && tok != XML_TOK_INVALID) {
4532 *nextPtr = s;
4533 return XML_ERROR_NONE;
4534 }
4535 switch (tok) {
4536 case XML_TOK_INVALID:
4537 *eventPP = next;
4538 return XML_ERROR_INVALID_TOKEN;
4539 case XML_TOK_PARTIAL:
4540 return XML_ERROR_UNCLOSED_TOKEN;
4541 case XML_TOK_PARTIAL_CHAR:
4542 return XML_ERROR_PARTIAL_CHAR;
4543 case -XML_TOK_PROLOG_S:
4544 tok = -tok;
4545 break;
4546 case XML_TOK_NONE:
4547 #ifdef XML_DTD
4548 /* for internal PE NOT referenced between declarations */
4549 if (enc != parser->m_encoding
4550 && ! parser->m_openInternalEntities->betweenDecl) {
4551 *nextPtr = s;
4552 return XML_ERROR_NONE;
4553 }
4554 /* WFC: PE Between Declarations - must check that PE contains
4555 complete markup, not only for external PEs, but also for
4556 internal PEs if the reference occurs between declarations.
4557 */
4558 if (parser->m_isParamEntity || enc != parser->m_encoding) {
4559 if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
4560 == XML_ROLE_ERROR)
4561 return XML_ERROR_INCOMPLETE_PE;
4562 *nextPtr = s;
4563 return XML_ERROR_NONE;
4564 }
4565 #endif /* XML_DTD */
4566 return XML_ERROR_NO_ELEMENTS;
4567 default:
4568 tok = -tok;
4569 next = end;
4570 break;
4571 }
4572 }
4573 role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
4574 #ifdef XML_DTD
4575 switch (role) {
4576 case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor
4577 case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl
4578 case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl
4579 break;
4580 default:
4581 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4582 accountingOnAbort(parser);
4583 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4584 }
4585 }
4586 #endif
4587 switch (role) {
4588 case XML_ROLE_XML_DECL: {
4589 enum XML_Error result = processXmlDecl(parser, 0, s, next);
4590 if (result != XML_ERROR_NONE)
4591 return result;
4592 enc = parser->m_encoding;
4593 handleDefault = XML_FALSE;
4594 } break;
4595 case XML_ROLE_DOCTYPE_NAME:
4596 if (parser->m_startDoctypeDeclHandler) {
4597 parser->m_doctypeName
4598 = poolStoreString(&parser->m_tempPool, enc, s, next);
4599 if (! parser->m_doctypeName)
4600 return XML_ERROR_NO_MEMORY;
4601 poolFinish(&parser->m_tempPool);
4602 parser->m_doctypePubid = NULL;
4603 handleDefault = XML_FALSE;
4604 }
4605 parser->m_doctypeSysid = NULL; /* always initialize to NULL */
4606 break;
4607 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
4608 if (parser->m_startDoctypeDeclHandler) {
4609 parser->m_startDoctypeDeclHandler(
4610 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4611 parser->m_doctypePubid, 1);
4612 parser->m_doctypeName = NULL;
4613 poolClear(&parser->m_tempPool);
4614 handleDefault = XML_FALSE;
4615 }
4616 break;
4617 #ifdef XML_DTD
4618 case XML_ROLE_TEXT_DECL: {
4619 enum XML_Error result = processXmlDecl(parser, 1, s, next);
4620 if (result != XML_ERROR_NONE)
4621 return result;
4622 enc = parser->m_encoding;
4623 handleDefault = XML_FALSE;
4624 } break;
4625 #endif /* XML_DTD */
4626 case XML_ROLE_DOCTYPE_PUBLIC_ID:
4627 #ifdef XML_DTD
4628 parser->m_useForeignDTD = XML_FALSE;
4629 parser->m_declEntity = (ENTITY *)lookup(
4630 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
4631 if (! parser->m_declEntity)
4632 return XML_ERROR_NO_MEMORY;
4633 #endif /* XML_DTD */
4634 dtd->hasParamEntityRefs = XML_TRUE;
4635 if (parser->m_startDoctypeDeclHandler) {
4636 XML_Char *pubId;
4637 if (! XmlIsPublicId(enc, s, next, eventPP))
4638 return XML_ERROR_PUBLICID;
4639 pubId = poolStoreString(&parser->m_tempPool, enc,
4640 s + enc->minBytesPerChar,
4641 next - enc->minBytesPerChar);
4642 if (! pubId)
4643 return XML_ERROR_NO_MEMORY;
4644 normalizePublicId(pubId);
4645 poolFinish(&parser->m_tempPool);
4646 parser->m_doctypePubid = pubId;
4647 handleDefault = XML_FALSE;
4648 goto alreadyChecked;
4649 }
4650 /* fall through */
4651 case XML_ROLE_ENTITY_PUBLIC_ID:
4652 if (! XmlIsPublicId(enc, s, next, eventPP))
4653 return XML_ERROR_PUBLICID;
4654 alreadyChecked:
4655 if (dtd->keepProcessing && parser->m_declEntity) {
4656 XML_Char *tem
4657 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
4658 next - enc->minBytesPerChar);
4659 if (! tem)
4660 return XML_ERROR_NO_MEMORY;
4661 normalizePublicId(tem);
4662 parser->m_declEntity->publicId = tem;
4663 poolFinish(&dtd->pool);
4664 /* Don't suppress the default handler if we fell through from
4665 * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
4666 */
4667 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
4668 handleDefault = XML_FALSE;
4669 }
4670 break;
4671 case XML_ROLE_DOCTYPE_CLOSE:
4672 if (allowClosingDoctype != XML_TRUE) {
4673 /* Must not close doctype from within expanded parameter entities */
4674 return XML_ERROR_INVALID_TOKEN;
4675 }
4676
4677 if (parser->m_doctypeName) {
4678 parser->m_startDoctypeDeclHandler(
4679 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4680 parser->m_doctypePubid, 0);
4681 poolClear(&parser->m_tempPool);
4682 handleDefault = XML_FALSE;
4683 }
4684 /* parser->m_doctypeSysid will be non-NULL in the case of a previous
4685 XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
4686 was not set, indicating an external subset
4687 */
4688 #ifdef XML_DTD
4689 if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
4690 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4691 dtd->hasParamEntityRefs = XML_TRUE;
4692 if (parser->m_paramEntityParsing
4693 && parser->m_externalEntityRefHandler) {
4694 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4695 externalSubsetName, sizeof(ENTITY));
4696 if (! entity) {
4697 /* The external subset name "#" will have already been
4698 * inserted into the hash table at the start of the
4699 * external entity parsing, so no allocation will happen
4700 * and lookup() cannot fail.
4701 */
4702 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4703 }
4704 if (parser->m_useForeignDTD)
4705 entity->base = parser->m_curBase;
4706 dtd->paramEntityRead = XML_FALSE;
4707 if (! parser->m_externalEntityRefHandler(
4708 parser->m_externalEntityRefHandlerArg, 0, entity->base,
4709 entity->systemId, entity->publicId))
4710 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4711 if (dtd->paramEntityRead) {
4712 if (! dtd->standalone && parser->m_notStandaloneHandler
4713 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4714 return XML_ERROR_NOT_STANDALONE;
4715 }
4716 /* if we didn't read the foreign DTD then this means that there
4717 is no external subset and we must reset dtd->hasParamEntityRefs
4718 */
4719 else if (! parser->m_doctypeSysid)
4720 dtd->hasParamEntityRefs = hadParamEntityRefs;
4721 /* end of DTD - no need to update dtd->keepProcessing */
4722 }
4723 parser->m_useForeignDTD = XML_FALSE;
4724 }
4725 #endif /* XML_DTD */
4726 if (parser->m_endDoctypeDeclHandler) {
4727 parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
4728 handleDefault = XML_FALSE;
4729 }
4730 break;
4731 case XML_ROLE_INSTANCE_START:
4732 #ifdef XML_DTD
4733 /* if there is no DOCTYPE declaration then now is the
4734 last chance to read the foreign DTD
4735 */
4736 if (parser->m_useForeignDTD) {
4737 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4738 dtd->hasParamEntityRefs = XML_TRUE;
4739 if (parser->m_paramEntityParsing
4740 && parser->m_externalEntityRefHandler) {
4741 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4742 externalSubsetName, sizeof(ENTITY));
4743 if (! entity)
4744 return XML_ERROR_NO_MEMORY;
4745 entity->base = parser->m_curBase;
4746 dtd->paramEntityRead = XML_FALSE;
4747 if (! parser->m_externalEntityRefHandler(
4748 parser->m_externalEntityRefHandlerArg, 0, entity->base,
4749 entity->systemId, entity->publicId))
4750 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4751 if (dtd->paramEntityRead) {
4752 if (! dtd->standalone && parser->m_notStandaloneHandler
4753 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4754 return XML_ERROR_NOT_STANDALONE;
4755 }
4756 /* if we didn't read the foreign DTD then this means that there
4757 is no external subset and we must reset dtd->hasParamEntityRefs
4758 */
4759 else
4760 dtd->hasParamEntityRefs = hadParamEntityRefs;
4761 /* end of DTD - no need to update dtd->keepProcessing */
4762 }
4763 }
4764 #endif /* XML_DTD */
4765 parser->m_processor = contentProcessor;
4766 return contentProcessor(parser, s, end, nextPtr);
4767 case XML_ROLE_ATTLIST_ELEMENT_NAME:
4768 parser->m_declElementType = getElementType(parser, enc, s, next);
4769 if (! parser->m_declElementType)
4770 return XML_ERROR_NO_MEMORY;
4771 goto checkAttListDeclHandler;
4772 case XML_ROLE_ATTRIBUTE_NAME:
4773 parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
4774 if (! parser->m_declAttributeId)
4775 return XML_ERROR_NO_MEMORY;
4776 parser->m_declAttributeIsCdata = XML_FALSE;
4777 parser->m_declAttributeType = NULL;
4778 parser->m_declAttributeIsId = XML_FALSE;
4779 goto checkAttListDeclHandler;
4780 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
4781 parser->m_declAttributeIsCdata = XML_TRUE;
4782 parser->m_declAttributeType = atypeCDATA;
4783 goto checkAttListDeclHandler;
4784 case XML_ROLE_ATTRIBUTE_TYPE_ID:
4785 parser->m_declAttributeIsId = XML_TRUE;
4786 parser->m_declAttributeType = atypeID;
4787 goto checkAttListDeclHandler;
4788 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
4789 parser->m_declAttributeType = atypeIDREF;
4790 goto checkAttListDeclHandler;
4791 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
4792 parser->m_declAttributeType = atypeIDREFS;
4793 goto checkAttListDeclHandler;
4794 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
4795 parser->m_declAttributeType = atypeENTITY;
4796 goto checkAttListDeclHandler;
4797 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
4798 parser->m_declAttributeType = atypeENTITIES;
4799 goto checkAttListDeclHandler;
4800 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
4801 parser->m_declAttributeType = atypeNMTOKEN;
4802 goto checkAttListDeclHandler;
4803 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
4804 parser->m_declAttributeType = atypeNMTOKENS;
4805 checkAttListDeclHandler:
4806 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
4807 handleDefault = XML_FALSE;
4808 break;
4809 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
4810 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
4811 if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
4812 const XML_Char *prefix;
4813 if (parser->m_declAttributeType) {
4814 prefix = enumValueSep;
4815 } else {
4816 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix
4817 : enumValueStart);
4818 }
4819 if (! poolAppendString(&parser->m_tempPool, prefix))
4820 return XML_ERROR_NO_MEMORY;
4821 if (! poolAppend(&parser->m_tempPool, enc, s, next))
4822 return XML_ERROR_NO_MEMORY;
4823 parser->m_declAttributeType = parser->m_tempPool.start;
4824 handleDefault = XML_FALSE;
4825 }
4826 break;
4827 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
4828 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
4829 if (dtd->keepProcessing) {
4830 if (! defineAttribute(parser->m_declElementType,
4831 parser->m_declAttributeId,
4832 parser->m_declAttributeIsCdata,
4833 parser->m_declAttributeIsId, 0, parser))
4834 return XML_ERROR_NO_MEMORY;
4835 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
4836 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
4837 || (*parser->m_declAttributeType == XML_T(ASCII_N)
4838 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
4839 /* Enumerated or Notation type */
4840 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
4841 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
4842 return XML_ERROR_NO_MEMORY;
4843 parser->m_declAttributeType = parser->m_tempPool.start;
4844 poolFinish(&parser->m_tempPool);
4845 }
4846 *eventEndPP = s;
4847 parser->m_attlistDeclHandler(
4848 parser->m_handlerArg, parser->m_declElementType->name,
4849 parser->m_declAttributeId->name, parser->m_declAttributeType, 0,
4850 role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
4851 poolClear(&parser->m_tempPool);
4852 handleDefault = XML_FALSE;
4853 }
4854 }
4855 break;
4856 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
4857 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
4858 if (dtd->keepProcessing) {
4859 const XML_Char *attVal;
4860 enum XML_Error result = storeAttributeValue(
4861 parser, enc, parser->m_declAttributeIsCdata,
4862 s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool,
4863 XML_ACCOUNT_NONE);
4864 if (result)
4865 return result;
4866 attVal = poolStart(&dtd->pool);
4867 poolFinish(&dtd->pool);
4868 /* ID attributes aren't allowed to have a default */
4869 if (! defineAttribute(
4870 parser->m_declElementType, parser->m_declAttributeId,
4871 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
4872 return XML_ERROR_NO_MEMORY;
4873 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
4874 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
4875 || (*parser->m_declAttributeType == XML_T(ASCII_N)
4876 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
4877 /* Enumerated or Notation type */
4878 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
4879 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
4880 return XML_ERROR_NO_MEMORY;
4881 parser->m_declAttributeType = parser->m_tempPool.start;
4882 poolFinish(&parser->m_tempPool);
4883 }
4884 *eventEndPP = s;
4885 parser->m_attlistDeclHandler(
4886 parser->m_handlerArg, parser->m_declElementType->name,
4887 parser->m_declAttributeId->name, parser->m_declAttributeType,
4888 attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
4889 poolClear(&parser->m_tempPool);
4890 handleDefault = XML_FALSE;
4891 }
4892 }
4893 break;
4894 case XML_ROLE_ENTITY_VALUE:
4895 if (dtd->keepProcessing) {
4896 enum XML_Error result
4897 = storeEntityValue(parser, enc, s + enc->minBytesPerChar,
4898 next - enc->minBytesPerChar, XML_ACCOUNT_NONE);
4899 if (parser->m_declEntity) {
4900 parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
4901 parser->m_declEntity->textLen
4902 = (int)(poolLength(&dtd->entityValuePool));
4903 poolFinish(&dtd->entityValuePool);
4904 if (parser->m_entityDeclHandler) {
4905 *eventEndPP = s;
4906 parser->m_entityDeclHandler(
4907 parser->m_handlerArg, parser->m_declEntity->name,
4908 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
4909 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
4910 handleDefault = XML_FALSE;
4911 }
4912 } else
4913 poolDiscard(&dtd->entityValuePool);
4914 if (result != XML_ERROR_NONE)
4915 return result;
4916 }
4917 break;
4918 case XML_ROLE_DOCTYPE_SYSTEM_ID:
4919 #ifdef XML_DTD
4920 parser->m_useForeignDTD = XML_FALSE;
4921 #endif /* XML_DTD */
4922 dtd->hasParamEntityRefs = XML_TRUE;
4923 if (parser->m_startDoctypeDeclHandler) {
4924 parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
4925 s + enc->minBytesPerChar,
4926 next - enc->minBytesPerChar);
4927 if (parser->m_doctypeSysid == NULL)
4928 return XML_ERROR_NO_MEMORY;
4929 poolFinish(&parser->m_tempPool);
4930 handleDefault = XML_FALSE;
4931 }
4932 #ifdef XML_DTD
4933 else
4934 /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
4935 for the case where no parser->m_startDoctypeDeclHandler is set */
4936 parser->m_doctypeSysid = externalSubsetName;
4937 #endif /* XML_DTD */
4938 if (! dtd->standalone
4939 #ifdef XML_DTD
4940 && ! parser->m_paramEntityParsing
4941 #endif /* XML_DTD */
4942 && parser->m_notStandaloneHandler
4943 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
4944 return XML_ERROR_NOT_STANDALONE;
4945 #ifndef XML_DTD
4946 break;
4947 #else /* XML_DTD */
4948 if (! parser->m_declEntity) {
4949 parser->m_declEntity = (ENTITY *)lookup(
4950 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
4951 if (! parser->m_declEntity)
4952 return XML_ERROR_NO_MEMORY;
4953 parser->m_declEntity->publicId = NULL;
4954 }
4955 #endif /* XML_DTD */
4956 /* fall through */
4957 case XML_ROLE_ENTITY_SYSTEM_ID:
4958 if (dtd->keepProcessing && parser->m_declEntity) {
4959 parser->m_declEntity->systemId
4960 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
4961 next - enc->minBytesPerChar);
4962 if (! parser->m_declEntity->systemId)
4963 return XML_ERROR_NO_MEMORY;
4964 parser->m_declEntity->base = parser->m_curBase;
4965 poolFinish(&dtd->pool);
4966 /* Don't suppress the default handler if we fell through from
4967 * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
4968 */
4969 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
4970 handleDefault = XML_FALSE;
4971 }
4972 break;
4973 case XML_ROLE_ENTITY_COMPLETE:
4974 if (dtd->keepProcessing && parser->m_declEntity
4975 && parser->m_entityDeclHandler) {
4976 *eventEndPP = s;
4977 parser->m_entityDeclHandler(
4978 parser->m_handlerArg, parser->m_declEntity->name,
4979 parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base,
4980 parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0);
4981 handleDefault = XML_FALSE;
4982 }
4983 break;
4984 case XML_ROLE_ENTITY_NOTATION_NAME:
4985 if (dtd->keepProcessing && parser->m_declEntity) {
4986 parser->m_declEntity->notation
4987 = poolStoreString(&dtd->pool, enc, s, next);
4988 if (! parser->m_declEntity->notation)
4989 return XML_ERROR_NO_MEMORY;
4990 poolFinish(&dtd->pool);
4991 if (parser->m_unparsedEntityDeclHandler) {
4992 *eventEndPP = s;
4993 parser->m_unparsedEntityDeclHandler(
4994 parser->m_handlerArg, parser->m_declEntity->name,
4995 parser->m_declEntity->base, parser->m_declEntity->systemId,
4996 parser->m_declEntity->publicId, parser->m_declEntity->notation);
4997 handleDefault = XML_FALSE;
4998 } else if (parser->m_entityDeclHandler) {
4999 *eventEndPP = s;
5000 parser->m_entityDeclHandler(
5001 parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0,
5002 parser->m_declEntity->base, parser->m_declEntity->systemId,
5003 parser->m_declEntity->publicId, parser->m_declEntity->notation);
5004 handleDefault = XML_FALSE;
5005 }
5006 }
5007 break;
5008 case XML_ROLE_GENERAL_ENTITY_NAME: {
5009 if (XmlPredefinedEntityName(enc, s, next)) {
5010 parser->m_declEntity = NULL;
5011 break;
5012 }
5013 if (dtd->keepProcessing) {
5014 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5015 if (! name)
5016 return XML_ERROR_NO_MEMORY;
5017 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities,
5018 name, sizeof(ENTITY));
5019 if (! parser->m_declEntity)
5020 return XML_ERROR_NO_MEMORY;
5021 if (parser->m_declEntity->name != name) {
5022 poolDiscard(&dtd->pool);
5023 parser->m_declEntity = NULL;
5024 } else {
5025 poolFinish(&dtd->pool);
5026 parser->m_declEntity->publicId = NULL;
5027 parser->m_declEntity->is_param = XML_FALSE;
5028 /* if we have a parent parser or are reading an internal parameter
5029 entity, then the entity declaration is not considered "internal"
5030 */
5031 parser->m_declEntity->is_internal
5032 = ! (parser->m_parentParser || parser->m_openInternalEntities);
5033 if (parser->m_entityDeclHandler)
5034 handleDefault = XML_FALSE;
5035 }
5036 } else {
5037 poolDiscard(&dtd->pool);
5038 parser->m_declEntity = NULL;
5039 }
5040 } break;
5041 case XML_ROLE_PARAM_ENTITY_NAME:
5042 #ifdef XML_DTD
5043 if (dtd->keepProcessing) {
5044 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5045 if (! name)
5046 return XML_ERROR_NO_MEMORY;
5047 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5048 name, sizeof(ENTITY));
5049 if (! parser->m_declEntity)
5050 return XML_ERROR_NO_MEMORY;
5051 if (parser->m_declEntity->name != name) {
5052 poolDiscard(&dtd->pool);
5053 parser->m_declEntity = NULL;
5054 } else {
5055 poolFinish(&dtd->pool);
5056 parser->m_declEntity->publicId = NULL;
5057 parser->m_declEntity->is_param = XML_TRUE;
5058 /* if we have a parent parser or are reading an internal parameter
5059 entity, then the entity declaration is not considered "internal"
5060 */
5061 parser->m_declEntity->is_internal
5062 = ! (parser->m_parentParser || parser->m_openInternalEntities);
5063 if (parser->m_entityDeclHandler)
5064 handleDefault = XML_FALSE;
5065 }
5066 } else {
5067 poolDiscard(&dtd->pool);
5068 parser->m_declEntity = NULL;
5069 }
5070 #else /* not XML_DTD */
5071 parser->m_declEntity = NULL;
5072 #endif /* XML_DTD */
5073 break;
5074 case XML_ROLE_NOTATION_NAME:
5075 parser->m_declNotationPublicId = NULL;
5076 parser->m_declNotationName = NULL;
5077 if (parser->m_notationDeclHandler) {
5078 parser->m_declNotationName
5079 = poolStoreString(&parser->m_tempPool, enc, s, next);
5080 if (! parser->m_declNotationName)
5081 return XML_ERROR_NO_MEMORY;
5082 poolFinish(&parser->m_tempPool);
5083 handleDefault = XML_FALSE;
5084 }
5085 break;
5086 case XML_ROLE_NOTATION_PUBLIC_ID:
5087 if (! XmlIsPublicId(enc, s, next, eventPP))
5088 return XML_ERROR_PUBLICID;
5089 if (parser
5090 ->m_declNotationName) { /* means m_notationDeclHandler != NULL */
5091 XML_Char *tem = poolStoreString(&parser->m_tempPool, enc,
5092 s + enc->minBytesPerChar,
5093 next - enc->minBytesPerChar);
5094 if (! tem)
5095 return XML_ERROR_NO_MEMORY;
5096 normalizePublicId(tem);
5097 parser->m_declNotationPublicId = tem;
5098 poolFinish(&parser->m_tempPool);
5099 handleDefault = XML_FALSE;
5100 }
5101 break;
5102 case XML_ROLE_NOTATION_SYSTEM_ID:
5103 if (parser->m_declNotationName && parser->m_notationDeclHandler) {
5104 const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc,
5105 s + enc->minBytesPerChar,
5106 next - enc->minBytesPerChar);
5107 if (! systemId)
5108 return XML_ERROR_NO_MEMORY;
5109 *eventEndPP = s;
5110 parser->m_notationDeclHandler(
5111 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5112 systemId, parser->m_declNotationPublicId);
5113 handleDefault = XML_FALSE;
5114 }
5115 poolClear(&parser->m_tempPool);
5116 break;
5117 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
5118 if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
5119 *eventEndPP = s;
5120 parser->m_notationDeclHandler(
5121 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5122 0, parser->m_declNotationPublicId);
5123 handleDefault = XML_FALSE;
5124 }
5125 poolClear(&parser->m_tempPool);
5126 break;
5127 case XML_ROLE_ERROR:
5128 switch (tok) {
5129 case XML_TOK_PARAM_ENTITY_REF:
5130 /* PE references in internal subset are
5131 not allowed within declarations. */
5132 return XML_ERROR_PARAM_ENTITY_REF;
5133 case XML_TOK_XML_DECL:
5134 return XML_ERROR_MISPLACED_XML_PI;
5135 default:
5136 return XML_ERROR_SYNTAX;
5137 }
5138 #ifdef XML_DTD
5139 case XML_ROLE_IGNORE_SECT: {
5140 enum XML_Error result;
5141 if (parser->m_defaultHandler)
5142 reportDefault(parser, enc, s, next);
5143 handleDefault = XML_FALSE;
5144 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
5145 if (result != XML_ERROR_NONE)
5146 return result;
5147 else if (! next) {
5148 parser->m_processor = ignoreSectionProcessor;
5149 return result;
5150 }
5151 } break;
5152 #endif /* XML_DTD */
5153 case XML_ROLE_GROUP_OPEN:
5154 if (parser->m_prologState.level >= parser->m_groupSize) {
5155 if (parser->m_groupSize) {
5156 {
5157 /* Detect and prevent integer overflow */
5158 if (parser->m_groupSize > (unsigned int)(-1) / 2u) {
5159 return XML_ERROR_NO_MEMORY;
5160 }
5161
5162 char *const new_connector = (char *)REALLOC(
5163 parser, parser->m_groupConnector, parser->m_groupSize *= 2);
5164 if (new_connector == NULL) {
5165 parser->m_groupSize /= 2;
5166 return XML_ERROR_NO_MEMORY;
5167 }
5168 parser->m_groupConnector = new_connector;
5169 }
5170
5171 if (dtd->scaffIndex) {
5172 /* Detect and prevent integer overflow.
5173 * The preprocessor guard addresses the "always false" warning
5174 * from -Wtype-limits on platforms where
5175 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
5176 #if UINT_MAX >= SIZE_MAX
5177 if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) {
5178 return XML_ERROR_NO_MEMORY;
5179 }
5180 #endif
5181
5182 int *const new_scaff_index = (int *)REALLOC(
5183 parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int));
5184 if (new_scaff_index == NULL)
5185 return XML_ERROR_NO_MEMORY;
5186 dtd->scaffIndex = new_scaff_index;
5187 }
5188 } else {
5189 parser->m_groupConnector
5190 = (char *)MALLOC(parser, parser->m_groupSize = 32);
5191 if (! parser->m_groupConnector) {
5192 parser->m_groupSize = 0;
5193 return XML_ERROR_NO_MEMORY;
5194 }
5195 }
5196 }
5197 parser->m_groupConnector[parser->m_prologState.level] = 0;
5198 if (dtd->in_eldecl) {
5199 int myindex = nextScaffoldPart(parser);
5200 if (myindex < 0)
5201 return XML_ERROR_NO_MEMORY;
5202 assert(dtd->scaffIndex != NULL);
5203 dtd->scaffIndex[dtd->scaffLevel] = myindex;
5204 dtd->scaffLevel++;
5205 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
5206 if (parser->m_elementDeclHandler)
5207 handleDefault = XML_FALSE;
5208 }
5209 break;
5210 case XML_ROLE_GROUP_SEQUENCE:
5211 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
5212 return XML_ERROR_SYNTAX;
5213 parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
5214 if (dtd->in_eldecl && parser->m_elementDeclHandler)
5215 handleDefault = XML_FALSE;
5216 break;
5217 case XML_ROLE_GROUP_CHOICE:
5218 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
5219 return XML_ERROR_SYNTAX;
5220 if (dtd->in_eldecl
5221 && ! parser->m_groupConnector[parser->m_prologState.level]
5222 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5223 != XML_CTYPE_MIXED)) {
5224 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5225 = XML_CTYPE_CHOICE;
5226 if (parser->m_elementDeclHandler)
5227 handleDefault = XML_FALSE;
5228 }
5229 parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
5230 break;
5231 case XML_ROLE_PARAM_ENTITY_REF:
5232 #ifdef XML_DTD
5233 case XML_ROLE_INNER_PARAM_ENTITY_REF:
5234 dtd->hasParamEntityRefs = XML_TRUE;
5235 if (! parser->m_paramEntityParsing)
5236 dtd->keepProcessing = dtd->standalone;
5237 else {
5238 const XML_Char *name;
5239 ENTITY *entity;
5240 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5241 next - enc->minBytesPerChar);
5242 if (! name)
5243 return XML_ERROR_NO_MEMORY;
5244 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5245 poolDiscard(&dtd->pool);
5246 /* first, determine if a check for an existing declaration is needed;
5247 if yes, check that the entity exists, and that it is internal,
5248 otherwise call the skipped entity handler
5249 */
5250 if (parser->m_prologState.documentEntity
5251 && (dtd->standalone ? ! parser->m_openInternalEntities
5252 : ! dtd->hasParamEntityRefs)) {
5253 if (! entity)
5254 return XML_ERROR_UNDEFINED_ENTITY;
5255 else if (! entity->is_internal) {
5256 /* It's hard to exhaustively search the code to be sure,
5257 * but there doesn't seem to be a way of executing the
5258 * following line. There are two cases:
5259 *
5260 * If 'standalone' is false, the DTD must have no
5261 * parameter entities or we wouldn't have passed the outer
5262 * 'if' statement. That measn the only entity in the hash
5263 * table is the external subset name "#" which cannot be
5264 * given as a parameter entity name in XML syntax, so the
5265 * lookup must have returned NULL and we don't even reach
5266 * the test for an internal entity.
5267 *
5268 * If 'standalone' is true, it does not seem to be
5269 * possible to create entities taking this code path that
5270 * are not internal entities, so fail the test above.
5271 *
5272 * Because this analysis is very uncertain, the code is
5273 * being left in place and merely removed from the
5274 * coverage test statistics.
5275 */
5276 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
5277 }
5278 } else if (! entity) {
5279 dtd->keepProcessing = dtd->standalone;
5280 /* cannot report skipped entities in declarations */
5281 if ((role == XML_ROLE_PARAM_ENTITY_REF)
5282 && parser->m_skippedEntityHandler) {
5283 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
5284 handleDefault = XML_FALSE;
5285 }
5286 break;
5287 }
5288 if (entity->open)
5289 return XML_ERROR_RECURSIVE_ENTITY_REF;
5290 if (entity->textPtr) {
5291 enum XML_Error result;
5292 XML_Bool betweenDecl
5293 = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
5294 result = processInternalEntity(parser, entity, betweenDecl);
5295 if (result != XML_ERROR_NONE)
5296 return result;
5297 handleDefault = XML_FALSE;
5298 break;
5299 }
5300 if (parser->m_externalEntityRefHandler) {
5301 dtd->paramEntityRead = XML_FALSE;
5302 entity->open = XML_TRUE;
5303 entityTrackingOnOpen(parser, entity, __LINE__);
5304 if (! parser->m_externalEntityRefHandler(
5305 parser->m_externalEntityRefHandlerArg, 0, entity->base,
5306 entity->systemId, entity->publicId)) {
5307 entityTrackingOnClose(parser, entity, __LINE__);
5308 entity->open = XML_FALSE;
5309 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5310 }
5311 entityTrackingOnClose(parser, entity, __LINE__);
5312 entity->open = XML_FALSE;
5313 handleDefault = XML_FALSE;
5314 if (! dtd->paramEntityRead) {
5315 dtd->keepProcessing = dtd->standalone;
5316 break;
5317 }
5318 } else {
5319 dtd->keepProcessing = dtd->standalone;
5320 break;
5321 }
5322 }
5323 #endif /* XML_DTD */
5324 if (! dtd->standalone && parser->m_notStandaloneHandler
5325 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5326 return XML_ERROR_NOT_STANDALONE;
5327 break;
5328
5329 /* Element declaration stuff */
5330
5331 case XML_ROLE_ELEMENT_NAME:
5332 if (parser->m_elementDeclHandler) {
5333 parser->m_declElementType = getElementType(parser, enc, s, next);
5334 if (! parser->m_declElementType)
5335 return XML_ERROR_NO_MEMORY;
5336 dtd->scaffLevel = 0;
5337 dtd->scaffCount = 0;
5338 dtd->in_eldecl = XML_TRUE;
5339 handleDefault = XML_FALSE;
5340 }
5341 break;
5342
5343 case XML_ROLE_CONTENT_ANY:
5344 case XML_ROLE_CONTENT_EMPTY:
5345 if (dtd->in_eldecl) {
5346 if (parser->m_elementDeclHandler) {
5347 XML_Content *content
5348 = (XML_Content *)MALLOC(parser, sizeof(XML_Content));
5349 if (! content)
5350 return XML_ERROR_NO_MEMORY;
5351 content->quant = XML_CQUANT_NONE;
5352 content->name = NULL;
5353 content->numchildren = 0;
5354 content->children = NULL;
5355 content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY
5356 : XML_CTYPE_EMPTY);
5357 *eventEndPP = s;
5358 parser->m_elementDeclHandler(
5359 parser->m_handlerArg, parser->m_declElementType->name, content);
5360 handleDefault = XML_FALSE;
5361 }
5362 dtd->in_eldecl = XML_FALSE;
5363 }
5364 break;
5365
5366 case XML_ROLE_CONTENT_PCDATA:
5367 if (dtd->in_eldecl) {
5368 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5369 = XML_CTYPE_MIXED;
5370 if (parser->m_elementDeclHandler)
5371 handleDefault = XML_FALSE;
5372 }
5373 break;
5374
5375 case XML_ROLE_CONTENT_ELEMENT:
5376 quant = XML_CQUANT_NONE;
5377 goto elementContent;
5378 case XML_ROLE_CONTENT_ELEMENT_OPT:
5379 quant = XML_CQUANT_OPT;
5380 goto elementContent;
5381 case XML_ROLE_CONTENT_ELEMENT_REP:
5382 quant = XML_CQUANT_REP;
5383 goto elementContent;
5384 case XML_ROLE_CONTENT_ELEMENT_PLUS:
5385 quant = XML_CQUANT_PLUS;
5386 elementContent:
5387 if (dtd->in_eldecl) {
5388 ELEMENT_TYPE *el;
5389 const XML_Char *name;
5390 size_t nameLen;
5391 const char *nxt
5392 = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar);
5393 int myindex = nextScaffoldPart(parser);
5394 if (myindex < 0)
5395 return XML_ERROR_NO_MEMORY;
5396 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
5397 dtd->scaffold[myindex].quant = quant;
5398 el = getElementType(parser, enc, s, nxt);
5399 if (! el)
5400 return XML_ERROR_NO_MEMORY;
5401 name = el->name;
5402 dtd->scaffold[myindex].name = name;
5403 nameLen = 0;
5404 for (; name[nameLen++];)
5405 ;
5406
5407 /* Detect and prevent integer overflow */
5408 if (nameLen > UINT_MAX - dtd->contentStringLen) {
5409 return XML_ERROR_NO_MEMORY;
5410 }
5411
5412 dtd->contentStringLen += (unsigned)nameLen;
5413 if (parser->m_elementDeclHandler)
5414 handleDefault = XML_FALSE;
5415 }
5416 break;
5417
5418 case XML_ROLE_GROUP_CLOSE:
5419 quant = XML_CQUANT_NONE;
5420 goto closeGroup;
5421 case XML_ROLE_GROUP_CLOSE_OPT:
5422 quant = XML_CQUANT_OPT;
5423 goto closeGroup;
5424 case XML_ROLE_GROUP_CLOSE_REP:
5425 quant = XML_CQUANT_REP;
5426 goto closeGroup;
5427 case XML_ROLE_GROUP_CLOSE_PLUS:
5428 quant = XML_CQUANT_PLUS;
5429 closeGroup:
5430 if (dtd->in_eldecl) {
5431 if (parser->m_elementDeclHandler)
5432 handleDefault = XML_FALSE;
5433 dtd->scaffLevel--;
5434 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
5435 if (dtd->scaffLevel == 0) {
5436 if (! handleDefault) {
5437 XML_Content *model = build_model(parser);
5438 if (! model)
5439 return XML_ERROR_NO_MEMORY;
5440 *eventEndPP = s;
5441 parser->m_elementDeclHandler(
5442 parser->m_handlerArg, parser->m_declElementType->name, model);
5443 }
5444 dtd->in_eldecl = XML_FALSE;
5445 dtd->contentStringLen = 0;
5446 }
5447 }
5448 break;
5449 /* End element declaration stuff */
5450
5451 case XML_ROLE_PI:
5452 if (! reportProcessingInstruction(parser, enc, s, next))
5453 return XML_ERROR_NO_MEMORY;
5454 handleDefault = XML_FALSE;
5455 break;
5456 case XML_ROLE_COMMENT:
5457 if (! reportComment(parser, enc, s, next))
5458 return XML_ERROR_NO_MEMORY;
5459 handleDefault = XML_FALSE;
5460 break;
5461 case XML_ROLE_NONE:
5462 switch (tok) {
5463 case XML_TOK_BOM:
5464 handleDefault = XML_FALSE;
5465 break;
5466 }
5467 break;
5468 case XML_ROLE_DOCTYPE_NONE:
5469 if (parser->m_startDoctypeDeclHandler)
5470 handleDefault = XML_FALSE;
5471 break;
5472 case XML_ROLE_ENTITY_NONE:
5473 if (dtd->keepProcessing && parser->m_entityDeclHandler)
5474 handleDefault = XML_FALSE;
5475 break;
5476 case XML_ROLE_NOTATION_NONE:
5477 if (parser->m_notationDeclHandler)
5478 handleDefault = XML_FALSE;
5479 break;
5480 case XML_ROLE_ATTLIST_NONE:
5481 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5482 handleDefault = XML_FALSE;
5483 break;
5484 case XML_ROLE_ELEMENT_NONE:
5485 if (parser->m_elementDeclHandler)
5486 handleDefault = XML_FALSE;
5487 break;
5488 } /* end of big switch */
5489
5490 if (handleDefault && parser->m_defaultHandler)
5491 reportDefault(parser, enc, s, next);
5492
5493 switch (parser->m_parsingStatus.parsing) {
5494 case XML_SUSPENDED:
5495 *nextPtr = next;
5496 return XML_ERROR_NONE;
5497 case XML_FINISHED:
5498 return XML_ERROR_ABORTED;
5499 default:
5500 s = next;
5501 tok = XmlPrologTok(enc, s, end, &next);
5502 }
5503 }
5504 /* not reached */
5505 }
5506
5507 static enum XML_Error PTRCALL
epilogProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5508 epilogProcessor(XML_Parser parser, const char *s, const char *end,
5509 const char **nextPtr) {
5510 parser->m_processor = epilogProcessor;
5511 parser->m_eventPtr = s;
5512 for (;;) {
5513 const char *next = NULL;
5514 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5515 #ifdef XML_DTD
5516 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
5517 XML_ACCOUNT_DIRECT)) {
5518 accountingOnAbort(parser);
5519 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5520 }
5521 #endif
5522 parser->m_eventEndPtr = next;
5523 switch (tok) {
5524 /* report partial linebreak - it might be the last token */
5525 case -XML_TOK_PROLOG_S:
5526 if (parser->m_defaultHandler) {
5527 reportDefault(parser, parser->m_encoding, s, next);
5528 if (parser->m_parsingStatus.parsing == XML_FINISHED)
5529 return XML_ERROR_ABORTED;
5530 }
5531 *nextPtr = next;
5532 return XML_ERROR_NONE;
5533 case XML_TOK_NONE:
5534 *nextPtr = s;
5535 return XML_ERROR_NONE;
5536 case XML_TOK_PROLOG_S:
5537 if (parser->m_defaultHandler)
5538 reportDefault(parser, parser->m_encoding, s, next);
5539 break;
5540 case XML_TOK_PI:
5541 if (! reportProcessingInstruction(parser, parser->m_encoding, s, next))
5542 return XML_ERROR_NO_MEMORY;
5543 break;
5544 case XML_TOK_COMMENT:
5545 if (! reportComment(parser, parser->m_encoding, s, next))
5546 return XML_ERROR_NO_MEMORY;
5547 break;
5548 case XML_TOK_INVALID:
5549 parser->m_eventPtr = next;
5550 return XML_ERROR_INVALID_TOKEN;
5551 case XML_TOK_PARTIAL:
5552 if (! parser->m_parsingStatus.finalBuffer) {
5553 *nextPtr = s;
5554 return XML_ERROR_NONE;
5555 }
5556 return XML_ERROR_UNCLOSED_TOKEN;
5557 case XML_TOK_PARTIAL_CHAR:
5558 if (! parser->m_parsingStatus.finalBuffer) {
5559 *nextPtr = s;
5560 return XML_ERROR_NONE;
5561 }
5562 return XML_ERROR_PARTIAL_CHAR;
5563 default:
5564 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
5565 }
5566 parser->m_eventPtr = s = next;
5567 switch (parser->m_parsingStatus.parsing) {
5568 case XML_SUSPENDED:
5569 *nextPtr = next;
5570 return XML_ERROR_NONE;
5571 case XML_FINISHED:
5572 return XML_ERROR_ABORTED;
5573 default:;
5574 }
5575 }
5576 }
5577
5578 static enum XML_Error
processInternalEntity(XML_Parser parser,ENTITY * entity,XML_Bool betweenDecl)5579 processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
5580 const char *textStart, *textEnd;
5581 const char *next;
5582 enum XML_Error result;
5583 OPEN_INTERNAL_ENTITY *openEntity;
5584
5585 if (parser->m_freeInternalEntities) {
5586 openEntity = parser->m_freeInternalEntities;
5587 parser->m_freeInternalEntities = openEntity->next;
5588 } else {
5589 openEntity
5590 = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
5591 if (! openEntity)
5592 return XML_ERROR_NO_MEMORY;
5593 }
5594 entity->open = XML_TRUE;
5595 #ifdef XML_DTD
5596 entityTrackingOnOpen(parser, entity, __LINE__);
5597 #endif
5598 entity->processed = 0;
5599 openEntity->next = parser->m_openInternalEntities;
5600 parser->m_openInternalEntities = openEntity;
5601 openEntity->entity = entity;
5602 openEntity->startTagLevel = parser->m_tagLevel;
5603 openEntity->betweenDecl = betweenDecl;
5604 openEntity->internalEventPtr = NULL;
5605 openEntity->internalEventEndPtr = NULL;
5606 textStart = (const char *)entity->textPtr;
5607 textEnd = (const char *)(entity->textPtr + entity->textLen);
5608 /* Set a safe default value in case 'next' does not get set */
5609 next = textStart;
5610
5611 #ifdef XML_DTD
5612 if (entity->is_param) {
5613 int tok
5614 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5615 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5616 tok, next, &next, XML_FALSE, XML_FALSE,
5617 XML_ACCOUNT_ENTITY_EXPANSION);
5618 } else
5619 #endif /* XML_DTD */
5620 result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding,
5621 textStart, textEnd, &next, XML_FALSE,
5622 XML_ACCOUNT_ENTITY_EXPANSION);
5623
5624 if (result == XML_ERROR_NONE) {
5625 if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5626 entity->processed = (int)(next - textStart);
5627 parser->m_processor = internalEntityProcessor;
5628 } else {
5629 #ifdef XML_DTD
5630 entityTrackingOnClose(parser, entity, __LINE__);
5631 #endif /* XML_DTD */
5632 entity->open = XML_FALSE;
5633 parser->m_openInternalEntities = openEntity->next;
5634 /* put openEntity back in list of free instances */
5635 openEntity->next = parser->m_freeInternalEntities;
5636 parser->m_freeInternalEntities = openEntity;
5637 }
5638 }
5639 return result;
5640 }
5641
5642 static enum XML_Error PTRCALL
internalEntityProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5643 internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
5644 const char **nextPtr) {
5645 ENTITY *entity;
5646 const char *textStart, *textEnd;
5647 const char *next;
5648 enum XML_Error result;
5649 OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
5650 if (! openEntity)
5651 return XML_ERROR_UNEXPECTED_STATE;
5652
5653 entity = openEntity->entity;
5654 textStart = ((const char *)entity->textPtr) + entity->processed;
5655 textEnd = (const char *)(entity->textPtr + entity->textLen);
5656 /* Set a safe default value in case 'next' does not get set */
5657 next = textStart;
5658
5659 #ifdef XML_DTD
5660 if (entity->is_param) {
5661 int tok
5662 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5663 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5664 tok, next, &next, XML_FALSE, XML_TRUE,
5665 XML_ACCOUNT_ENTITY_EXPANSION);
5666 } else
5667 #endif /* XML_DTD */
5668 result = doContent(parser, openEntity->startTagLevel,
5669 parser->m_internalEncoding, textStart, textEnd, &next,
5670 XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
5671
5672 if (result != XML_ERROR_NONE)
5673 return result;
5674 else if (textEnd != next
5675 && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
5676 entity->processed = (int)(next - (const char *)entity->textPtr);
5677 return result;
5678 } else {
5679 #ifdef XML_DTD
5680 entityTrackingOnClose(parser, entity, __LINE__);
5681 #endif
5682 entity->open = XML_FALSE;
5683 parser->m_openInternalEntities = openEntity->next;
5684 /* put openEntity back in list of free instances */
5685 openEntity->next = parser->m_freeInternalEntities;
5686 parser->m_freeInternalEntities = openEntity;
5687 }
5688
5689 #ifdef XML_DTD
5690 if (entity->is_param) {
5691 int tok;
5692 parser->m_processor = prologProcessor;
5693 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5694 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5695 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
5696 XML_ACCOUNT_DIRECT);
5697 } else
5698 #endif /* XML_DTD */
5699 {
5700 parser->m_processor = contentProcessor;
5701 /* see externalEntityContentProcessor vs contentProcessor */
5702 return doContent(parser, parser->m_parentParser ? 1 : 0, parser->m_encoding,
5703 s, end, nextPtr,
5704 (XML_Bool)! parser->m_parsingStatus.finalBuffer,
5705 XML_ACCOUNT_DIRECT);
5706 }
5707 }
5708
5709 static enum XML_Error PTRCALL
errorProcessor(XML_Parser parser,const char * s,const char * end,const char ** nextPtr)5710 errorProcessor(XML_Parser parser, const char *s, const char *end,
5711 const char **nextPtr) {
5712 UNUSED_P(s);
5713 UNUSED_P(end);
5714 UNUSED_P(nextPtr);
5715 return parser->m_errorCode;
5716 }
5717
5718 static enum XML_Error
storeAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool,enum XML_Account account)5719 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5720 const char *ptr, const char *end, STRING_POOL *pool,
5721 enum XML_Account account) {
5722 enum XML_Error result
5723 = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account);
5724 if (result)
5725 return result;
5726 if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
5727 poolChop(pool);
5728 if (! poolAppendChar(pool, XML_T('\0')))
5729 return XML_ERROR_NO_MEMORY;
5730 return XML_ERROR_NONE;
5731 }
5732
5733 static enum XML_Error
appendAttributeValue(XML_Parser parser,const ENCODING * enc,XML_Bool isCdata,const char * ptr,const char * end,STRING_POOL * pool,enum XML_Account account)5734 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5735 const char *ptr, const char *end, STRING_POOL *pool,
5736 enum XML_Account account) {
5737 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
5738 #ifndef XML_DTD
5739 UNUSED_P(account);
5740 #endif
5741
5742 for (;;) {
5743 const char *next
5744 = ptr; /* XmlAttributeValueTok doesn't always set the last arg */
5745 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
5746 #ifdef XML_DTD
5747 if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) {
5748 accountingOnAbort(parser);
5749 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5750 }
5751 #endif
5752 switch (tok) {
5753 case XML_TOK_NONE:
5754 return XML_ERROR_NONE;
5755 case XML_TOK_INVALID:
5756 if (enc == parser->m_encoding)
5757 parser->m_eventPtr = next;
5758 return XML_ERROR_INVALID_TOKEN;
5759 case XML_TOK_PARTIAL:
5760 if (enc == parser->m_encoding)
5761 parser->m_eventPtr = ptr;
5762 return XML_ERROR_INVALID_TOKEN;
5763 case XML_TOK_CHAR_REF: {
5764 XML_Char buf[XML_ENCODE_MAX];
5765 int i;
5766 int n = XmlCharRefNumber(enc, ptr);
5767 if (n < 0) {
5768 if (enc == parser->m_encoding)
5769 parser->m_eventPtr = ptr;
5770 return XML_ERROR_BAD_CHAR_REF;
5771 }
5772 if (! isCdata && n == 0x20 /* space */
5773 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
5774 break;
5775 n = XmlEncode(n, (ICHAR *)buf);
5776 /* The XmlEncode() functions can never return 0 here. That
5777 * error return happens if the code point passed in is either
5778 * negative or greater than or equal to 0x110000. The
5779 * XmlCharRefNumber() functions will all return a number
5780 * strictly less than 0x110000 or a negative value if an error
5781 * occurred. The negative value is intercepted above, so
5782 * XmlEncode() is never passed a value it might return an
5783 * error for.
5784 */
5785 for (i = 0; i < n; i++) {
5786 if (! poolAppendChar(pool, buf[i]))
5787 return XML_ERROR_NO_MEMORY;
5788 }
5789 } break;
5790 case XML_TOK_DATA_CHARS:
5791 if (! poolAppend(pool, enc, ptr, next))
5792 return XML_ERROR_NO_MEMORY;
5793 break;
5794 case XML_TOK_TRAILING_CR:
5795 next = ptr + enc->minBytesPerChar;
5796 /* fall through */
5797 case XML_TOK_ATTRIBUTE_VALUE_S:
5798 case XML_TOK_DATA_NEWLINE:
5799 if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
5800 break;
5801 if (! poolAppendChar(pool, 0x20))
5802 return XML_ERROR_NO_MEMORY;
5803 break;
5804 case XML_TOK_ENTITY_REF: {
5805 const XML_Char *name;
5806 ENTITY *entity;
5807 char checkEntityDecl;
5808 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
5809 enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar);
5810 if (ch) {
5811 #ifdef XML_DTD
5812 /* NOTE: We are replacing 4-6 characters original input for 1 character
5813 * so there is no amplification and hence recording without
5814 * protection. */
5815 accountingDiffTolerated(parser, tok, (char *)&ch,
5816 ((char *)&ch) + sizeof(XML_Char), __LINE__,
5817 XML_ACCOUNT_ENTITY_EXPANSION);
5818 #endif /* XML_DTD */
5819 if (! poolAppendChar(pool, ch))
5820 return XML_ERROR_NO_MEMORY;
5821 break;
5822 }
5823 name = poolStoreString(&parser->m_temp2Pool, enc,
5824 ptr + enc->minBytesPerChar,
5825 next - enc->minBytesPerChar);
5826 if (! name)
5827 return XML_ERROR_NO_MEMORY;
5828 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
5829 poolDiscard(&parser->m_temp2Pool);
5830 /* First, determine if a check for an existing declaration is needed;
5831 if yes, check that the entity exists, and that it is internal.
5832 */
5833 if (pool == &dtd->pool) /* are we called from prolog? */
5834 checkEntityDecl =
5835 #ifdef XML_DTD
5836 parser->m_prologState.documentEntity &&
5837 #endif /* XML_DTD */
5838 (dtd->standalone ? ! parser->m_openInternalEntities
5839 : ! dtd->hasParamEntityRefs);
5840 else /* if (pool == &parser->m_tempPool): we are called from content */
5841 checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone;
5842 if (checkEntityDecl) {
5843 if (! entity)
5844 return XML_ERROR_UNDEFINED_ENTITY;
5845 else if (! entity->is_internal)
5846 return XML_ERROR_ENTITY_DECLARED_IN_PE;
5847 } else if (! entity) {
5848 /* Cannot report skipped entity here - see comments on
5849 parser->m_skippedEntityHandler.
5850 if (parser->m_skippedEntityHandler)
5851 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
5852 */
5853 /* Cannot call the default handler because this would be
5854 out of sync with the call to the startElementHandler.
5855 if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
5856 reportDefault(parser, enc, ptr, next);
5857 */
5858 break;
5859 }
5860 if (entity->open) {
5861 if (enc == parser->m_encoding) {
5862 /* It does not appear that this line can be executed.
5863 *
5864 * The "if (entity->open)" check catches recursive entity
5865 * definitions. In order to be called with an open
5866 * entity, it must have gone through this code before and
5867 * been through the recursive call to
5868 * appendAttributeValue() some lines below. That call
5869 * sets the local encoding ("enc") to the parser's
5870 * internal encoding (internal_utf8 or internal_utf16),
5871 * which can never be the same as the principle encoding.
5872 * It doesn't appear there is another code path that gets
5873 * here with entity->open being TRUE.
5874 *
5875 * Since it is not certain that this logic is watertight,
5876 * we keep the line and merely exclude it from coverage
5877 * tests.
5878 */
5879 parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
5880 }
5881 return XML_ERROR_RECURSIVE_ENTITY_REF;
5882 }
5883 if (entity->notation) {
5884 if (enc == parser->m_encoding)
5885 parser->m_eventPtr = ptr;
5886 return XML_ERROR_BINARY_ENTITY_REF;
5887 }
5888 if (! entity->textPtr) {
5889 if (enc == parser->m_encoding)
5890 parser->m_eventPtr = ptr;
5891 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
5892 } else {
5893 enum XML_Error result;
5894 const XML_Char *textEnd = entity->textPtr + entity->textLen;
5895 entity->open = XML_TRUE;
5896 #ifdef XML_DTD
5897 entityTrackingOnOpen(parser, entity, __LINE__);
5898 #endif
5899 result = appendAttributeValue(parser, parser->m_internalEncoding,
5900 isCdata, (const char *)entity->textPtr,
5901 (const char *)textEnd, pool,
5902 XML_ACCOUNT_ENTITY_EXPANSION);
5903 #ifdef XML_DTD
5904 entityTrackingOnClose(parser, entity, __LINE__);
5905 #endif
5906 entity->open = XML_FALSE;
5907 if (result)
5908 return result;
5909 }
5910 } break;
5911 default:
5912 /* The only token returned by XmlAttributeValueTok() that does
5913 * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
5914 * Getting that would require an entity name to contain an
5915 * incomplete XML character (e.g. \xE2\x82); however previous
5916 * tokenisers will have already recognised and rejected such
5917 * names before XmlAttributeValueTok() gets a look-in. This
5918 * default case should be retained as a safety net, but the code
5919 * excluded from coverage tests.
5920 *
5921 * LCOV_EXCL_START
5922 */
5923 if (enc == parser->m_encoding)
5924 parser->m_eventPtr = ptr;
5925 return XML_ERROR_UNEXPECTED_STATE;
5926 /* LCOV_EXCL_STOP */
5927 }
5928 ptr = next;
5929 }
5930 /* not reached */
5931 }
5932
5933 static enum XML_Error
storeEntityValue(XML_Parser parser,const ENCODING * enc,const char * entityTextPtr,const char * entityTextEnd,enum XML_Account account)5934 storeEntityValue(XML_Parser parser, const ENCODING *enc,
5935 const char *entityTextPtr, const char *entityTextEnd,
5936 enum XML_Account account) {
5937 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
5938 STRING_POOL *pool = &(dtd->entityValuePool);
5939 enum XML_Error result = XML_ERROR_NONE;
5940 #ifdef XML_DTD
5941 int oldInEntityValue = parser->m_prologState.inEntityValue;
5942 parser->m_prologState.inEntityValue = 1;
5943 #else
5944 UNUSED_P(account);
5945 #endif /* XML_DTD */
5946 /* never return Null for the value argument in EntityDeclHandler,
5947 since this would indicate an external entity; therefore we
5948 have to make sure that entityValuePool.start is not null */
5949 if (! pool->blocks) {
5950 if (! poolGrow(pool))
5951 return XML_ERROR_NO_MEMORY;
5952 }
5953
5954 for (;;) {
5955 const char *next
5956 = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
5957 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
5958
5959 #ifdef XML_DTD
5960 if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__,
5961 account)) {
5962 accountingOnAbort(parser);
5963 result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5964 goto endEntityValue;
5965 }
5966 #endif
5967
5968 switch (tok) {
5969 case XML_TOK_PARAM_ENTITY_REF:
5970 #ifdef XML_DTD
5971 if (parser->m_isParamEntity || enc != parser->m_encoding) {
5972 const XML_Char *name;
5973 ENTITY *entity;
5974 name = poolStoreString(&parser->m_tempPool, enc,
5975 entityTextPtr + enc->minBytesPerChar,
5976 next - enc->minBytesPerChar);
5977 if (! name) {
5978 result = XML_ERROR_NO_MEMORY;
5979 goto endEntityValue;
5980 }
5981 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5982 poolDiscard(&parser->m_tempPool);
5983 if (! entity) {
5984 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
5985 /* cannot report skipped entity here - see comments on
5986 parser->m_skippedEntityHandler
5987 if (parser->m_skippedEntityHandler)
5988 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
5989 */
5990 dtd->keepProcessing = dtd->standalone;
5991 goto endEntityValue;
5992 }
5993 if (entity->open) {
5994 if (enc == parser->m_encoding)
5995 parser->m_eventPtr = entityTextPtr;
5996 result = XML_ERROR_RECURSIVE_ENTITY_REF;
5997 goto endEntityValue;
5998 }
5999 if (entity->systemId) {
6000 if (parser->m_externalEntityRefHandler) {
6001 dtd->paramEntityRead = XML_FALSE;
6002 entity->open = XML_TRUE;
6003 entityTrackingOnOpen(parser, entity, __LINE__);
6004 if (! parser->m_externalEntityRefHandler(
6005 parser->m_externalEntityRefHandlerArg, 0, entity->base,
6006 entity->systemId, entity->publicId)) {
6007 entityTrackingOnClose(parser, entity, __LINE__);
6008 entity->open = XML_FALSE;
6009 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
6010 goto endEntityValue;
6011 }
6012 entityTrackingOnClose(parser, entity, __LINE__);
6013 entity->open = XML_FALSE;
6014 if (! dtd->paramEntityRead)
6015 dtd->keepProcessing = dtd->standalone;
6016 } else
6017 dtd->keepProcessing = dtd->standalone;
6018 } else {
6019 entity->open = XML_TRUE;
6020 entityTrackingOnOpen(parser, entity, __LINE__);
6021 result = storeEntityValue(
6022 parser, parser->m_internalEncoding, (const char *)entity->textPtr,
6023 (const char *)(entity->textPtr + entity->textLen),
6024 XML_ACCOUNT_ENTITY_EXPANSION);
6025 entityTrackingOnClose(parser, entity, __LINE__);
6026 entity->open = XML_FALSE;
6027 if (result)
6028 goto endEntityValue;
6029 }
6030 break;
6031 }
6032 #endif /* XML_DTD */
6033 /* In the internal subset, PE references are not legal
6034 within markup declarations, e.g entity values in this case. */
6035 parser->m_eventPtr = entityTextPtr;
6036 result = XML_ERROR_PARAM_ENTITY_REF;
6037 goto endEntityValue;
6038 case XML_TOK_NONE:
6039 result = XML_ERROR_NONE;
6040 goto endEntityValue;
6041 case XML_TOK_ENTITY_REF:
6042 case XML_TOK_DATA_CHARS:
6043 if (! poolAppend(pool, enc, entityTextPtr, next)) {
6044 result = XML_ERROR_NO_MEMORY;
6045 goto endEntityValue;
6046 }
6047 break;
6048 case XML_TOK_TRAILING_CR:
6049 next = entityTextPtr + enc->minBytesPerChar;
6050 /* fall through */
6051 case XML_TOK_DATA_NEWLINE:
6052 if (pool->end == pool->ptr && ! poolGrow(pool)) {
6053 result = XML_ERROR_NO_MEMORY;
6054 goto endEntityValue;
6055 }
6056 *(pool->ptr)++ = 0xA;
6057 break;
6058 case XML_TOK_CHAR_REF: {
6059 XML_Char buf[XML_ENCODE_MAX];
6060 int i;
6061 int n = XmlCharRefNumber(enc, entityTextPtr);
6062 if (n < 0) {
6063 if (enc == parser->m_encoding)
6064 parser->m_eventPtr = entityTextPtr;
6065 result = XML_ERROR_BAD_CHAR_REF;
6066 goto endEntityValue;
6067 }
6068 n = XmlEncode(n, (ICHAR *)buf);
6069 /* The XmlEncode() functions can never return 0 here. That
6070 * error return happens if the code point passed in is either
6071 * negative or greater than or equal to 0x110000. The
6072 * XmlCharRefNumber() functions will all return a number
6073 * strictly less than 0x110000 or a negative value if an error
6074 * occurred. The negative value is intercepted above, so
6075 * XmlEncode() is never passed a value it might return an
6076 * error for.
6077 */
6078 for (i = 0; i < n; i++) {
6079 if (pool->end == pool->ptr && ! poolGrow(pool)) {
6080 result = XML_ERROR_NO_MEMORY;
6081 goto endEntityValue;
6082 }
6083 *(pool->ptr)++ = buf[i];
6084 }
6085 } break;
6086 case XML_TOK_PARTIAL:
6087 if (enc == parser->m_encoding)
6088 parser->m_eventPtr = entityTextPtr;
6089 result = XML_ERROR_INVALID_TOKEN;
6090 goto endEntityValue;
6091 case XML_TOK_INVALID:
6092 if (enc == parser->m_encoding)
6093 parser->m_eventPtr = next;
6094 result = XML_ERROR_INVALID_TOKEN;
6095 goto endEntityValue;
6096 default:
6097 /* This default case should be unnecessary -- all the tokens
6098 * that XmlEntityValueTok() can return have their own explicit
6099 * cases -- but should be retained for safety. We do however
6100 * exclude it from the coverage statistics.
6101 *
6102 * LCOV_EXCL_START
6103 */
6104 if (enc == parser->m_encoding)
6105 parser->m_eventPtr = entityTextPtr;
6106 result = XML_ERROR_UNEXPECTED_STATE;
6107 goto endEntityValue;
6108 /* LCOV_EXCL_STOP */
6109 }
6110 entityTextPtr = next;
6111 }
6112 endEntityValue:
6113 #ifdef XML_DTD
6114 parser->m_prologState.inEntityValue = oldInEntityValue;
6115 #endif /* XML_DTD */
6116 return result;
6117 }
6118
6119 static void FASTCALL
normalizeLines(XML_Char * s)6120 normalizeLines(XML_Char *s) {
6121 XML_Char *p;
6122 for (;; s++) {
6123 if (*s == XML_T('\0'))
6124 return;
6125 if (*s == 0xD)
6126 break;
6127 }
6128 p = s;
6129 do {
6130 if (*s == 0xD) {
6131 *p++ = 0xA;
6132 if (*++s == 0xA)
6133 s++;
6134 } else
6135 *p++ = *s++;
6136 } while (*s);
6137 *p = XML_T('\0');
6138 }
6139
6140 static int
reportProcessingInstruction(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6141 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
6142 const char *start, const char *end) {
6143 const XML_Char *target;
6144 XML_Char *data;
6145 const char *tem;
6146 if (! parser->m_processingInstructionHandler) {
6147 if (parser->m_defaultHandler)
6148 reportDefault(parser, enc, start, end);
6149 return 1;
6150 }
6151 start += enc->minBytesPerChar * 2;
6152 tem = start + XmlNameLength(enc, start);
6153 target = poolStoreString(&parser->m_tempPool, enc, start, tem);
6154 if (! target)
6155 return 0;
6156 poolFinish(&parser->m_tempPool);
6157 data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem),
6158 end - enc->minBytesPerChar * 2);
6159 if (! data)
6160 return 0;
6161 normalizeLines(data);
6162 parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
6163 poolClear(&parser->m_tempPool);
6164 return 1;
6165 }
6166
6167 static int
reportComment(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6168 reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
6169 const char *end) {
6170 XML_Char *data;
6171 if (! parser->m_commentHandler) {
6172 if (parser->m_defaultHandler)
6173 reportDefault(parser, enc, start, end);
6174 return 1;
6175 }
6176 data = poolStoreString(&parser->m_tempPool, enc,
6177 start + enc->minBytesPerChar * 4,
6178 end - enc->minBytesPerChar * 3);
6179 if (! data)
6180 return 0;
6181 normalizeLines(data);
6182 parser->m_commentHandler(parser->m_handlerArg, data);
6183 poolClear(&parser->m_tempPool);
6184 return 1;
6185 }
6186
6187 static void
reportDefault(XML_Parser parser,const ENCODING * enc,const char * s,const char * end)6188 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s,
6189 const char *end) {
6190 if (MUST_CONVERT(enc, s)) {
6191 enum XML_Convert_Result convert_res;
6192 const char **eventPP;
6193 const char **eventEndPP;
6194 if (enc == parser->m_encoding) {
6195 eventPP = &parser->m_eventPtr;
6196 eventEndPP = &parser->m_eventEndPtr;
6197 } else {
6198 /* To get here, two things must be true; the parser must be
6199 * using a character encoding that is not the same as the
6200 * encoding passed in, and the encoding passed in must need
6201 * conversion to the internal format (UTF-8 unless XML_UNICODE
6202 * is defined). The only occasions on which the encoding passed
6203 * in is not the same as the parser's encoding are when it is
6204 * the internal encoding (e.g. a previously defined parameter
6205 * entity, already converted to internal format). This by
6206 * definition doesn't need conversion, so the whole branch never
6207 * gets executed.
6208 *
6209 * For safety's sake we don't delete these lines and merely
6210 * exclude them from coverage statistics.
6211 *
6212 * LCOV_EXCL_START
6213 */
6214 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
6215 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
6216 /* LCOV_EXCL_STOP */
6217 }
6218 do {
6219 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
6220 convert_res
6221 = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
6222 *eventEndPP = s;
6223 parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf,
6224 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
6225 *eventPP = s;
6226 } while ((convert_res != XML_CONVERT_COMPLETED)
6227 && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
6228 } else
6229 parser->m_defaultHandler(parser->m_handlerArg, (XML_Char *)s,
6230 (int)((XML_Char *)end - (XML_Char *)s));
6231 }
6232
6233 static int
defineAttribute(ELEMENT_TYPE * type,ATTRIBUTE_ID * attId,XML_Bool isCdata,XML_Bool isId,const XML_Char * value,XML_Parser parser)6234 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
6235 XML_Bool isId, const XML_Char *value, XML_Parser parser) {
6236 DEFAULT_ATTRIBUTE *att;
6237 if (value || isId) {
6238 /* The handling of default attributes gets messed up if we have
6239 a default which duplicates a non-default. */
6240 int i;
6241 for (i = 0; i < type->nDefaultAtts; i++)
6242 if (attId == type->defaultAtts[i].id)
6243 return 1;
6244 if (isId && ! type->idAtt && ! attId->xmlns)
6245 type->idAtt = attId;
6246 }
6247 if (type->nDefaultAtts == type->allocDefaultAtts) {
6248 if (type->allocDefaultAtts == 0) {
6249 type->allocDefaultAtts = 8;
6250 type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(
6251 parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6252 if (! type->defaultAtts) {
6253 type->allocDefaultAtts = 0;
6254 return 0;
6255 }
6256 } else {
6257 DEFAULT_ATTRIBUTE *temp;
6258
6259 /* Detect and prevent integer overflow */
6260 if (type->allocDefaultAtts > INT_MAX / 2) {
6261 return 0;
6262 }
6263
6264 int count = type->allocDefaultAtts * 2;
6265
6266 /* Detect and prevent integer overflow.
6267 * The preprocessor guard addresses the "always false" warning
6268 * from -Wtype-limits on platforms where
6269 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
6270 #if UINT_MAX >= SIZE_MAX
6271 if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) {
6272 return 0;
6273 }
6274 #endif
6275
6276 temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts,
6277 (count * sizeof(DEFAULT_ATTRIBUTE)));
6278 if (temp == NULL)
6279 return 0;
6280 type->allocDefaultAtts = count;
6281 type->defaultAtts = temp;
6282 }
6283 }
6284 att = type->defaultAtts + type->nDefaultAtts;
6285 att->id = attId;
6286 att->value = value;
6287 att->isCdata = isCdata;
6288 if (! isCdata)
6289 attId->maybeTokenized = XML_TRUE;
6290 type->nDefaultAtts += 1;
6291 return 1;
6292 }
6293
6294 static int
setElementTypePrefix(XML_Parser parser,ELEMENT_TYPE * elementType)6295 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) {
6296 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6297 const XML_Char *name;
6298 for (name = elementType->name; *name; name++) {
6299 if (*name == XML_T(ASCII_COLON)) {
6300 PREFIX *prefix;
6301 const XML_Char *s;
6302 for (s = elementType->name; s != name; s++) {
6303 if (! poolAppendChar(&dtd->pool, *s))
6304 return 0;
6305 }
6306 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6307 return 0;
6308 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
6309 sizeof(PREFIX));
6310 if (! prefix)
6311 return 0;
6312 if (prefix->name == poolStart(&dtd->pool))
6313 poolFinish(&dtd->pool);
6314 else
6315 poolDiscard(&dtd->pool);
6316 elementType->prefix = prefix;
6317 break;
6318 }
6319 }
6320 return 1;
6321 }
6322
6323 static ATTRIBUTE_ID *
getAttributeId(XML_Parser parser,const ENCODING * enc,const char * start,const char * end)6324 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
6325 const char *end) {
6326 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6327 ATTRIBUTE_ID *id;
6328 const XML_Char *name;
6329 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6330 return NULL;
6331 name = poolStoreString(&dtd->pool, enc, start, end);
6332 if (! name)
6333 return NULL;
6334 /* skip quotation mark - its storage will be re-used (like in name[-1]) */
6335 ++name;
6336 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name,
6337 sizeof(ATTRIBUTE_ID));
6338 if (! id)
6339 return NULL;
6340 if (id->name != name)
6341 poolDiscard(&dtd->pool);
6342 else {
6343 poolFinish(&dtd->pool);
6344 if (! parser->m_ns)
6345 ;
6346 else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m)
6347 && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n)
6348 && name[4] == XML_T(ASCII_s)
6349 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
6350 if (name[5] == XML_T('\0'))
6351 id->prefix = &dtd->defaultPrefix;
6352 else
6353 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6,
6354 sizeof(PREFIX));
6355 id->xmlns = XML_TRUE;
6356 } else {
6357 int i;
6358 for (i = 0; name[i]; i++) {
6359 /* attributes without prefix are *not* in the default namespace */
6360 if (name[i] == XML_T(ASCII_COLON)) {
6361 int j;
6362 for (j = 0; j < i; j++) {
6363 if (! poolAppendChar(&dtd->pool, name[j]))
6364 return NULL;
6365 }
6366 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6367 return NULL;
6368 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes,
6369 poolStart(&dtd->pool), sizeof(PREFIX));
6370 if (! id->prefix)
6371 return NULL;
6372 if (id->prefix->name == poolStart(&dtd->pool))
6373 poolFinish(&dtd->pool);
6374 else
6375 poolDiscard(&dtd->pool);
6376 break;
6377 }
6378 }
6379 }
6380 }
6381 return id;
6382 }
6383
6384 #define CONTEXT_SEP XML_T(ASCII_FF)
6385
6386 static const XML_Char *
getContext(XML_Parser parser)6387 getContext(XML_Parser parser) {
6388 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6389 HASH_TABLE_ITER iter;
6390 XML_Bool needSep = XML_FALSE;
6391
6392 if (dtd->defaultPrefix.binding) {
6393 int i;
6394 int len;
6395 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6396 return NULL;
6397 len = dtd->defaultPrefix.binding->uriLen;
6398 if (parser->m_namespaceSeparator)
6399 len--;
6400 for (i = 0; i < len; i++) {
6401 if (! poolAppendChar(&parser->m_tempPool,
6402 dtd->defaultPrefix.binding->uri[i])) {
6403 /* Because of memory caching, I don't believe this line can be
6404 * executed.
6405 *
6406 * This is part of a loop copying the default prefix binding
6407 * URI into the parser's temporary string pool. Previously,
6408 * that URI was copied into the same string pool, with a
6409 * terminating NUL character, as part of setContext(). When
6410 * the pool was cleared, that leaves a block definitely big
6411 * enough to hold the URI on the free block list of the pool.
6412 * The URI copy in getContext() therefore cannot run out of
6413 * memory.
6414 *
6415 * If the pool is used between the setContext() and
6416 * getContext() calls, the worst it can do is leave a bigger
6417 * block on the front of the free list. Given that this is
6418 * all somewhat inobvious and program logic can be changed, we
6419 * don't delete the line but we do exclude it from the test
6420 * coverage statistics.
6421 */
6422 return NULL; /* LCOV_EXCL_LINE */
6423 }
6424 }
6425 needSep = XML_TRUE;
6426 }
6427
6428 hashTableIterInit(&iter, &(dtd->prefixes));
6429 for (;;) {
6430 int i;
6431 int len;
6432 const XML_Char *s;
6433 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
6434 if (! prefix)
6435 break;
6436 if (! prefix->binding) {
6437 /* This test appears to be (justifiable) paranoia. There does
6438 * not seem to be a way of injecting a prefix without a binding
6439 * that doesn't get errored long before this function is called.
6440 * The test should remain for safety's sake, so we instead
6441 * exclude the following line from the coverage statistics.
6442 */
6443 continue; /* LCOV_EXCL_LINE */
6444 }
6445 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6446 return NULL;
6447 for (s = prefix->name; *s; s++)
6448 if (! poolAppendChar(&parser->m_tempPool, *s))
6449 return NULL;
6450 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6451 return NULL;
6452 len = prefix->binding->uriLen;
6453 if (parser->m_namespaceSeparator)
6454 len--;
6455 for (i = 0; i < len; i++)
6456 if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
6457 return NULL;
6458 needSep = XML_TRUE;
6459 }
6460
6461 hashTableIterInit(&iter, &(dtd->generalEntities));
6462 for (;;) {
6463 const XML_Char *s;
6464 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
6465 if (! e)
6466 break;
6467 if (! e->open)
6468 continue;
6469 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6470 return NULL;
6471 for (s = e->name; *s; s++)
6472 if (! poolAppendChar(&parser->m_tempPool, *s))
6473 return 0;
6474 needSep = XML_TRUE;
6475 }
6476
6477 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6478 return NULL;
6479 return parser->m_tempPool.start;
6480 }
6481
6482 static XML_Bool
setContext(XML_Parser parser,const XML_Char * context)6483 setContext(XML_Parser parser, const XML_Char *context) {
6484 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6485 const XML_Char *s = context;
6486
6487 while (*context != XML_T('\0')) {
6488 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
6489 ENTITY *e;
6490 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6491 return XML_FALSE;
6492 e = (ENTITY *)lookup(parser, &dtd->generalEntities,
6493 poolStart(&parser->m_tempPool), 0);
6494 if (e)
6495 e->open = XML_TRUE;
6496 if (*s != XML_T('\0'))
6497 s++;
6498 context = s;
6499 poolDiscard(&parser->m_tempPool);
6500 } else if (*s == XML_T(ASCII_EQUALS)) {
6501 PREFIX *prefix;
6502 if (poolLength(&parser->m_tempPool) == 0)
6503 prefix = &dtd->defaultPrefix;
6504 else {
6505 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6506 return XML_FALSE;
6507 prefix
6508 = (PREFIX *)lookup(parser, &dtd->prefixes,
6509 poolStart(&parser->m_tempPool), sizeof(PREFIX));
6510 if (! prefix)
6511 return XML_FALSE;
6512 if (prefix->name == poolStart(&parser->m_tempPool)) {
6513 prefix->name = poolCopyString(&dtd->pool, prefix->name);
6514 if (! prefix->name)
6515 return XML_FALSE;
6516 }
6517 poolDiscard(&parser->m_tempPool);
6518 }
6519 for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0');
6520 context++)
6521 if (! poolAppendChar(&parser->m_tempPool, *context))
6522 return XML_FALSE;
6523 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
6524 return XML_FALSE;
6525 if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
6526 &parser->m_inheritedBindings)
6527 != XML_ERROR_NONE)
6528 return XML_FALSE;
6529 poolDiscard(&parser->m_tempPool);
6530 if (*context != XML_T('\0'))
6531 ++context;
6532 s = context;
6533 } else {
6534 if (! poolAppendChar(&parser->m_tempPool, *s))
6535 return XML_FALSE;
6536 s++;
6537 }
6538 }
6539 return XML_TRUE;
6540 }
6541
6542 static void FASTCALL
normalizePublicId(XML_Char * publicId)6543 normalizePublicId(XML_Char *publicId) {
6544 XML_Char *p = publicId;
6545 XML_Char *s;
6546 for (s = publicId; *s; s++) {
6547 switch (*s) {
6548 case 0x20:
6549 case 0xD:
6550 case 0xA:
6551 if (p != publicId && p[-1] != 0x20)
6552 *p++ = 0x20;
6553 break;
6554 default:
6555 *p++ = *s;
6556 }
6557 }
6558 if (p != publicId && p[-1] == 0x20)
6559 --p;
6560 *p = XML_T('\0');
6561 }
6562
6563 static DTD *
dtdCreate(const XML_Memory_Handling_Suite * ms)6564 dtdCreate(const XML_Memory_Handling_Suite *ms) {
6565 DTD *p = ms->malloc_fcn(sizeof(DTD));
6566 if (p == NULL)
6567 return p;
6568 poolInit(&(p->pool), ms);
6569 poolInit(&(p->entityValuePool), ms);
6570 hashTableInit(&(p->generalEntities), ms);
6571 hashTableInit(&(p->elementTypes), ms);
6572 hashTableInit(&(p->attributeIds), ms);
6573 hashTableInit(&(p->prefixes), ms);
6574 #ifdef XML_DTD
6575 p->paramEntityRead = XML_FALSE;
6576 hashTableInit(&(p->paramEntities), ms);
6577 #endif /* XML_DTD */
6578 p->defaultPrefix.name = NULL;
6579 p->defaultPrefix.binding = NULL;
6580
6581 p->in_eldecl = XML_FALSE;
6582 p->scaffIndex = NULL;
6583 p->scaffold = NULL;
6584 p->scaffLevel = 0;
6585 p->scaffSize = 0;
6586 p->scaffCount = 0;
6587 p->contentStringLen = 0;
6588
6589 p->keepProcessing = XML_TRUE;
6590 p->hasParamEntityRefs = XML_FALSE;
6591 p->standalone = XML_FALSE;
6592 return p;
6593 }
6594
6595 static void
dtdReset(DTD * p,const XML_Memory_Handling_Suite * ms)6596 dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) {
6597 HASH_TABLE_ITER iter;
6598 hashTableIterInit(&iter, &(p->elementTypes));
6599 for (;;) {
6600 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6601 if (! e)
6602 break;
6603 if (e->allocDefaultAtts != 0)
6604 ms->free_fcn(e->defaultAtts);
6605 }
6606 hashTableClear(&(p->generalEntities));
6607 #ifdef XML_DTD
6608 p->paramEntityRead = XML_FALSE;
6609 hashTableClear(&(p->paramEntities));
6610 #endif /* XML_DTD */
6611 hashTableClear(&(p->elementTypes));
6612 hashTableClear(&(p->attributeIds));
6613 hashTableClear(&(p->prefixes));
6614 poolClear(&(p->pool));
6615 poolClear(&(p->entityValuePool));
6616 p->defaultPrefix.name = NULL;
6617 p->defaultPrefix.binding = NULL;
6618
6619 p->in_eldecl = XML_FALSE;
6620
6621 ms->free_fcn(p->scaffIndex);
6622 p->scaffIndex = NULL;
6623 ms->free_fcn(p->scaffold);
6624 p->scaffold = NULL;
6625
6626 p->scaffLevel = 0;
6627 p->scaffSize = 0;
6628 p->scaffCount = 0;
6629 p->contentStringLen = 0;
6630
6631 p->keepProcessing = XML_TRUE;
6632 p->hasParamEntityRefs = XML_FALSE;
6633 p->standalone = XML_FALSE;
6634 }
6635
6636 static void
dtdDestroy(DTD * p,XML_Bool isDocEntity,const XML_Memory_Handling_Suite * ms)6637 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) {
6638 HASH_TABLE_ITER iter;
6639 hashTableIterInit(&iter, &(p->elementTypes));
6640 for (;;) {
6641 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6642 if (! e)
6643 break;
6644 if (e->allocDefaultAtts != 0)
6645 ms->free_fcn(e->defaultAtts);
6646 }
6647 hashTableDestroy(&(p->generalEntities));
6648 #ifdef XML_DTD
6649 hashTableDestroy(&(p->paramEntities));
6650 #endif /* XML_DTD */
6651 hashTableDestroy(&(p->elementTypes));
6652 hashTableDestroy(&(p->attributeIds));
6653 hashTableDestroy(&(p->prefixes));
6654 poolDestroy(&(p->pool));
6655 poolDestroy(&(p->entityValuePool));
6656 if (isDocEntity) {
6657 ms->free_fcn(p->scaffIndex);
6658 ms->free_fcn(p->scaffold);
6659 }
6660 ms->free_fcn(p);
6661 }
6662
6663 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
6664 The new DTD has already been initialized.
6665 */
6666 static int
dtdCopy(XML_Parser oldParser,DTD * newDtd,const DTD * oldDtd,const XML_Memory_Handling_Suite * ms)6667 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
6668 const XML_Memory_Handling_Suite *ms) {
6669 HASH_TABLE_ITER iter;
6670
6671 /* Copy the prefix table. */
6672
6673 hashTableIterInit(&iter, &(oldDtd->prefixes));
6674 for (;;) {
6675 const XML_Char *name;
6676 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
6677 if (! oldP)
6678 break;
6679 name = poolCopyString(&(newDtd->pool), oldP->name);
6680 if (! name)
6681 return 0;
6682 if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
6683 return 0;
6684 }
6685
6686 hashTableIterInit(&iter, &(oldDtd->attributeIds));
6687
6688 /* Copy the attribute id table. */
6689
6690 for (;;) {
6691 ATTRIBUTE_ID *newA;
6692 const XML_Char *name;
6693 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
6694
6695 if (! oldA)
6696 break;
6697 /* Remember to allocate the scratch byte before the name. */
6698 if (! poolAppendChar(&(newDtd->pool), XML_T('\0')))
6699 return 0;
6700 name = poolCopyString(&(newDtd->pool), oldA->name);
6701 if (! name)
6702 return 0;
6703 ++name;
6704 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
6705 sizeof(ATTRIBUTE_ID));
6706 if (! newA)
6707 return 0;
6708 newA->maybeTokenized = oldA->maybeTokenized;
6709 if (oldA->prefix) {
6710 newA->xmlns = oldA->xmlns;
6711 if (oldA->prefix == &oldDtd->defaultPrefix)
6712 newA->prefix = &newDtd->defaultPrefix;
6713 else
6714 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
6715 oldA->prefix->name, 0);
6716 }
6717 }
6718
6719 /* Copy the element type table. */
6720
6721 hashTableIterInit(&iter, &(oldDtd->elementTypes));
6722
6723 for (;;) {
6724 int i;
6725 ELEMENT_TYPE *newE;
6726 const XML_Char *name;
6727 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6728 if (! oldE)
6729 break;
6730 name = poolCopyString(&(newDtd->pool), oldE->name);
6731 if (! name)
6732 return 0;
6733 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
6734 sizeof(ELEMENT_TYPE));
6735 if (! newE)
6736 return 0;
6737 if (oldE->nDefaultAtts) {
6738 newE->defaultAtts
6739 = ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6740 if (! newE->defaultAtts) {
6741 return 0;
6742 }
6743 }
6744 if (oldE->idAtt)
6745 newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds),
6746 oldE->idAtt->name, 0);
6747 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
6748 if (oldE->prefix)
6749 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
6750 oldE->prefix->name, 0);
6751 for (i = 0; i < newE->nDefaultAtts; i++) {
6752 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(
6753 oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
6754 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
6755 if (oldE->defaultAtts[i].value) {
6756 newE->defaultAtts[i].value
6757 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
6758 if (! newE->defaultAtts[i].value)
6759 return 0;
6760 } else
6761 newE->defaultAtts[i].value = NULL;
6762 }
6763 }
6764
6765 /* Copy the entity tables. */
6766 if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool),
6767 &(oldDtd->generalEntities)))
6768 return 0;
6769
6770 #ifdef XML_DTD
6771 if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool),
6772 &(oldDtd->paramEntities)))
6773 return 0;
6774 newDtd->paramEntityRead = oldDtd->paramEntityRead;
6775 #endif /* XML_DTD */
6776
6777 newDtd->keepProcessing = oldDtd->keepProcessing;
6778 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
6779 newDtd->standalone = oldDtd->standalone;
6780
6781 /* Don't want deep copying for scaffolding */
6782 newDtd->in_eldecl = oldDtd->in_eldecl;
6783 newDtd->scaffold = oldDtd->scaffold;
6784 newDtd->contentStringLen = oldDtd->contentStringLen;
6785 newDtd->scaffSize = oldDtd->scaffSize;
6786 newDtd->scaffLevel = oldDtd->scaffLevel;
6787 newDtd->scaffIndex = oldDtd->scaffIndex;
6788
6789 return 1;
6790 } /* End dtdCopy */
6791
6792 static int
copyEntityTable(XML_Parser oldParser,HASH_TABLE * newTable,STRING_POOL * newPool,const HASH_TABLE * oldTable)6793 copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
6794 STRING_POOL *newPool, const HASH_TABLE *oldTable) {
6795 HASH_TABLE_ITER iter;
6796 const XML_Char *cachedOldBase = NULL;
6797 const XML_Char *cachedNewBase = NULL;
6798
6799 hashTableIterInit(&iter, oldTable);
6800
6801 for (;;) {
6802 ENTITY *newE;
6803 const XML_Char *name;
6804 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
6805 if (! oldE)
6806 break;
6807 name = poolCopyString(newPool, oldE->name);
6808 if (! name)
6809 return 0;
6810 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
6811 if (! newE)
6812 return 0;
6813 if (oldE->systemId) {
6814 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
6815 if (! tem)
6816 return 0;
6817 newE->systemId = tem;
6818 if (oldE->base) {
6819 if (oldE->base == cachedOldBase)
6820 newE->base = cachedNewBase;
6821 else {
6822 cachedOldBase = oldE->base;
6823 tem = poolCopyString(newPool, cachedOldBase);
6824 if (! tem)
6825 return 0;
6826 cachedNewBase = newE->base = tem;
6827 }
6828 }
6829 if (oldE->publicId) {
6830 tem = poolCopyString(newPool, oldE->publicId);
6831 if (! tem)
6832 return 0;
6833 newE->publicId = tem;
6834 }
6835 } else {
6836 const XML_Char *tem
6837 = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
6838 if (! tem)
6839 return 0;
6840 newE->textPtr = tem;
6841 newE->textLen = oldE->textLen;
6842 }
6843 if (oldE->notation) {
6844 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
6845 if (! tem)
6846 return 0;
6847 newE->notation = tem;
6848 }
6849 newE->is_param = oldE->is_param;
6850 newE->is_internal = oldE->is_internal;
6851 }
6852 return 1;
6853 }
6854
6855 #define INIT_POWER 6
6856
6857 static XML_Bool FASTCALL
keyeq(KEY s1,KEY s2)6858 keyeq(KEY s1, KEY s2) {
6859 for (; *s1 == *s2; s1++, s2++)
6860 if (*s1 == 0)
6861 return XML_TRUE;
6862 return XML_FALSE;
6863 }
6864
6865 static size_t
keylen(KEY s)6866 keylen(KEY s) {
6867 size_t len = 0;
6868 for (; *s; s++, len++)
6869 ;
6870 return len;
6871 }
6872
6873 static void
copy_salt_to_sipkey(XML_Parser parser,struct sipkey * key)6874 copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) {
6875 key->k[0] = 0;
6876 key->k[1] = get_hash_secret_salt(parser);
6877 }
6878
6879 static unsigned long FASTCALL
hash(XML_Parser parser,KEY s)6880 hash(XML_Parser parser, KEY s) {
6881 struct siphash state;
6882 struct sipkey key;
6883 (void)sip24_valid;
6884 copy_salt_to_sipkey(parser, &key);
6885 sip24_init(&state, &key);
6886 sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
6887 return (unsigned long)sip24_final(&state);
6888 }
6889
6890 static NAMED *
lookup(XML_Parser parser,HASH_TABLE * table,KEY name,size_t createSize)6891 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
6892 size_t i;
6893 if (table->size == 0) {
6894 size_t tsize;
6895 if (! createSize)
6896 return NULL;
6897 table->power = INIT_POWER;
6898 /* table->size is a power of 2 */
6899 table->size = (size_t)1 << INIT_POWER;
6900 tsize = table->size * sizeof(NAMED *);
6901 table->v = table->mem->malloc_fcn(tsize);
6902 if (! table->v) {
6903 table->size = 0;
6904 return NULL;
6905 }
6906 memset(table->v, 0, tsize);
6907 i = hash(parser, name) & ((unsigned long)table->size - 1);
6908 } else {
6909 unsigned long h = hash(parser, name);
6910 unsigned long mask = (unsigned long)table->size - 1;
6911 unsigned char step = 0;
6912 i = h & mask;
6913 while (table->v[i]) {
6914 if (keyeq(name, table->v[i]->name))
6915 return table->v[i];
6916 if (! step)
6917 step = PROBE_STEP(h, mask, table->power);
6918 i < step ? (i += table->size - step) : (i -= step);
6919 }
6920 if (! createSize)
6921 return NULL;
6922
6923 /* check for overflow (table is half full) */
6924 if (table->used >> (table->power - 1)) {
6925 unsigned char newPower = table->power + 1;
6926
6927 /* Detect and prevent invalid shift */
6928 if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) {
6929 return NULL;
6930 }
6931
6932 size_t newSize = (size_t)1 << newPower;
6933 unsigned long newMask = (unsigned long)newSize - 1;
6934
6935 /* Detect and prevent integer overflow */
6936 if (newSize > (size_t)(-1) / sizeof(NAMED *)) {
6937 return NULL;
6938 }
6939
6940 size_t tsize = newSize * sizeof(NAMED *);
6941 NAMED **newV = table->mem->malloc_fcn(tsize);
6942 if (! newV)
6943 return NULL;
6944 memset(newV, 0, tsize);
6945 for (i = 0; i < table->size; i++)
6946 if (table->v[i]) {
6947 unsigned long newHash = hash(parser, table->v[i]->name);
6948 size_t j = newHash & newMask;
6949 step = 0;
6950 while (newV[j]) {
6951 if (! step)
6952 step = PROBE_STEP(newHash, newMask, newPower);
6953 j < step ? (j += newSize - step) : (j -= step);
6954 }
6955 newV[j] = table->v[i];
6956 }
6957 table->mem->free_fcn(table->v);
6958 table->v = newV;
6959 table->power = newPower;
6960 table->size = newSize;
6961 i = h & newMask;
6962 step = 0;
6963 while (table->v[i]) {
6964 if (! step)
6965 step = PROBE_STEP(h, newMask, newPower);
6966 i < step ? (i += newSize - step) : (i -= step);
6967 }
6968 }
6969 }
6970 table->v[i] = table->mem->malloc_fcn(createSize);
6971 if (! table->v[i])
6972 return NULL;
6973 memset(table->v[i], 0, createSize);
6974 table->v[i]->name = name;
6975 (table->used)++;
6976 return table->v[i];
6977 }
6978
6979 static void FASTCALL
hashTableClear(HASH_TABLE * table)6980 hashTableClear(HASH_TABLE *table) {
6981 size_t i;
6982 for (i = 0; i < table->size; i++) {
6983 table->mem->free_fcn(table->v[i]);
6984 table->v[i] = NULL;
6985 }
6986 table->used = 0;
6987 }
6988
6989 static void FASTCALL
hashTableDestroy(HASH_TABLE * table)6990 hashTableDestroy(HASH_TABLE *table) {
6991 size_t i;
6992 for (i = 0; i < table->size; i++)
6993 table->mem->free_fcn(table->v[i]);
6994 table->mem->free_fcn(table->v);
6995 }
6996
6997 static void FASTCALL
hashTableInit(HASH_TABLE * p,const XML_Memory_Handling_Suite * ms)6998 hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) {
6999 p->power = 0;
7000 p->size = 0;
7001 p->used = 0;
7002 p->v = NULL;
7003 p->mem = ms;
7004 }
7005
7006 static void FASTCALL
hashTableIterInit(HASH_TABLE_ITER * iter,const HASH_TABLE * table)7007 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) {
7008 iter->p = table->v;
7009 iter->end = iter->p ? iter->p + table->size : NULL;
7010 }
7011
7012 static NAMED *FASTCALL
hashTableIterNext(HASH_TABLE_ITER * iter)7013 hashTableIterNext(HASH_TABLE_ITER *iter) {
7014 while (iter->p != iter->end) {
7015 NAMED *tem = *(iter->p)++;
7016 if (tem)
7017 return tem;
7018 }
7019 return NULL;
7020 }
7021
7022 static void FASTCALL
poolInit(STRING_POOL * pool,const XML_Memory_Handling_Suite * ms)7023 poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) {
7024 pool->blocks = NULL;
7025 pool->freeBlocks = NULL;
7026 pool->start = NULL;
7027 pool->ptr = NULL;
7028 pool->end = NULL;
7029 pool->mem = ms;
7030 }
7031
7032 static void FASTCALL
poolClear(STRING_POOL * pool)7033 poolClear(STRING_POOL *pool) {
7034 if (! pool->freeBlocks)
7035 pool->freeBlocks = pool->blocks;
7036 else {
7037 BLOCK *p = pool->blocks;
7038 while (p) {
7039 BLOCK *tem = p->next;
7040 p->next = pool->freeBlocks;
7041 pool->freeBlocks = p;
7042 p = tem;
7043 }
7044 }
7045 pool->blocks = NULL;
7046 pool->start = NULL;
7047 pool->ptr = NULL;
7048 pool->end = NULL;
7049 }
7050
7051 static void FASTCALL
poolDestroy(STRING_POOL * pool)7052 poolDestroy(STRING_POOL *pool) {
7053 BLOCK *p = pool->blocks;
7054 while (p) {
7055 BLOCK *tem = p->next;
7056 pool->mem->free_fcn(p);
7057 p = tem;
7058 }
7059 p = pool->freeBlocks;
7060 while (p) {
7061 BLOCK *tem = p->next;
7062 pool->mem->free_fcn(p);
7063 p = tem;
7064 }
7065 }
7066
7067 static XML_Char *
poolAppend(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)7068 poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7069 const char *end) {
7070 if (! pool->ptr && ! poolGrow(pool))
7071 return NULL;
7072 for (;;) {
7073 const enum XML_Convert_Result convert_res = XmlConvert(
7074 enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
7075 if ((convert_res == XML_CONVERT_COMPLETED)
7076 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
7077 break;
7078 if (! poolGrow(pool))
7079 return NULL;
7080 }
7081 return pool->start;
7082 }
7083
7084 static const XML_Char *FASTCALL
poolCopyString(STRING_POOL * pool,const XML_Char * s)7085 poolCopyString(STRING_POOL *pool, const XML_Char *s) {
7086 do {
7087 if (! poolAppendChar(pool, *s))
7088 return NULL;
7089 } while (*s++);
7090 s = pool->start;
7091 poolFinish(pool);
7092 return s;
7093 }
7094
7095 static const XML_Char *
poolCopyStringN(STRING_POOL * pool,const XML_Char * s,int n)7096 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) {
7097 if (! pool->ptr && ! poolGrow(pool)) {
7098 /* The following line is unreachable given the current usage of
7099 * poolCopyStringN(). Currently it is called from exactly one
7100 * place to copy the text of a simple general entity. By that
7101 * point, the name of the entity is already stored in the pool, so
7102 * pool->ptr cannot be NULL.
7103 *
7104 * If poolCopyStringN() is used elsewhere as it well might be,
7105 * this line may well become executable again. Regardless, this
7106 * sort of check shouldn't be removed lightly, so we just exclude
7107 * it from the coverage statistics.
7108 */
7109 return NULL; /* LCOV_EXCL_LINE */
7110 }
7111 for (; n > 0; --n, s++) {
7112 if (! poolAppendChar(pool, *s))
7113 return NULL;
7114 }
7115 s = pool->start;
7116 poolFinish(pool);
7117 return s;
7118 }
7119
7120 static const XML_Char *FASTCALL
poolAppendString(STRING_POOL * pool,const XML_Char * s)7121 poolAppendString(STRING_POOL *pool, const XML_Char *s) {
7122 while (*s) {
7123 if (! poolAppendChar(pool, *s))
7124 return NULL;
7125 s++;
7126 }
7127 return pool->start;
7128 }
7129
7130 static XML_Char *
poolStoreString(STRING_POOL * pool,const ENCODING * enc,const char * ptr,const char * end)7131 poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7132 const char *end) {
7133 if (! poolAppend(pool, enc, ptr, end))
7134 return NULL;
7135 if (pool->ptr == pool->end && ! poolGrow(pool))
7136 return NULL;
7137 *(pool->ptr)++ = 0;
7138 return pool->start;
7139 }
7140
7141 static size_t
poolBytesToAllocateFor(int blockSize)7142 poolBytesToAllocateFor(int blockSize) {
7143 /* Unprotected math would be:
7144 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
7145 **
7146 ** Detect overflow, avoiding _signed_ overflow undefined behavior
7147 ** For a + b * c we check b * c in isolation first, so that addition of a
7148 ** on top has no chance of making us accept a small non-negative number
7149 */
7150 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
7151
7152 if (blockSize <= 0)
7153 return 0;
7154
7155 if (blockSize > (int)(INT_MAX / stretch))
7156 return 0;
7157
7158 {
7159 const int stretchedBlockSize = blockSize * (int)stretch;
7160 const int bytesToAllocate
7161 = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
7162 if (bytesToAllocate < 0)
7163 return 0;
7164
7165 return (size_t)bytesToAllocate;
7166 }
7167 }
7168
7169 static XML_Bool FASTCALL
poolGrow(STRING_POOL * pool)7170 poolGrow(STRING_POOL *pool) {
7171 if (pool->freeBlocks) {
7172 if (pool->start == 0) {
7173 pool->blocks = pool->freeBlocks;
7174 pool->freeBlocks = pool->freeBlocks->next;
7175 pool->blocks->next = NULL;
7176 pool->start = pool->blocks->s;
7177 pool->end = pool->start + pool->blocks->size;
7178 pool->ptr = pool->start;
7179 return XML_TRUE;
7180 }
7181 if (pool->end - pool->start < pool->freeBlocks->size) {
7182 BLOCK *tem = pool->freeBlocks->next;
7183 pool->freeBlocks->next = pool->blocks;
7184 pool->blocks = pool->freeBlocks;
7185 pool->freeBlocks = tem;
7186 memcpy(pool->blocks->s, pool->start,
7187 (pool->end - pool->start) * sizeof(XML_Char));
7188 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
7189 pool->start = pool->blocks->s;
7190 pool->end = pool->start + pool->blocks->size;
7191 return XML_TRUE;
7192 }
7193 }
7194 if (pool->blocks && pool->start == pool->blocks->s) {
7195 BLOCK *temp;
7196 int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U);
7197 size_t bytesToAllocate;
7198
7199 /* NOTE: Needs to be calculated prior to calling `realloc`
7200 to avoid dangling pointers: */
7201 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
7202
7203 if (blockSize < 0) {
7204 /* This condition traps a situation where either more than
7205 * INT_MAX/2 bytes have already been allocated. This isn't
7206 * readily testable, since it is unlikely that an average
7207 * machine will have that much memory, so we exclude it from the
7208 * coverage statistics.
7209 */
7210 return XML_FALSE; /* LCOV_EXCL_LINE */
7211 }
7212
7213 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7214 if (bytesToAllocate == 0)
7215 return XML_FALSE;
7216
7217 temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks,
7218 (unsigned)bytesToAllocate);
7219 if (temp == NULL)
7220 return XML_FALSE;
7221 pool->blocks = temp;
7222 pool->blocks->size = blockSize;
7223 pool->ptr = pool->blocks->s + offsetInsideBlock;
7224 pool->start = pool->blocks->s;
7225 pool->end = pool->start + blockSize;
7226 } else {
7227 BLOCK *tem;
7228 int blockSize = (int)(pool->end - pool->start);
7229 size_t bytesToAllocate;
7230
7231 if (blockSize < 0) {
7232 /* This condition traps a situation where either more than
7233 * INT_MAX bytes have already been allocated (which is prevented
7234 * by various pieces of program logic, not least this one, never
7235 * mind the unlikelihood of actually having that much memory) or
7236 * the pool control fields have been corrupted (which could
7237 * conceivably happen in an extremely buggy user handler
7238 * function). Either way it isn't readily testable, so we
7239 * exclude it from the coverage statistics.
7240 */
7241 return XML_FALSE; /* LCOV_EXCL_LINE */
7242 }
7243
7244 if (blockSize < INIT_BLOCK_SIZE)
7245 blockSize = INIT_BLOCK_SIZE;
7246 else {
7247 /* Detect overflow, avoiding _signed_ overflow undefined behavior */
7248 if ((int)((unsigned)blockSize * 2U) < 0) {
7249 return XML_FALSE;
7250 }
7251 blockSize *= 2;
7252 }
7253
7254 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7255 if (bytesToAllocate == 0)
7256 return XML_FALSE;
7257
7258 tem = pool->mem->malloc_fcn(bytesToAllocate);
7259 if (! tem)
7260 return XML_FALSE;
7261 tem->size = blockSize;
7262 tem->next = pool->blocks;
7263 pool->blocks = tem;
7264 if (pool->ptr != pool->start)
7265 memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
7266 pool->ptr = tem->s + (pool->ptr - pool->start);
7267 pool->start = tem->s;
7268 pool->end = tem->s + blockSize;
7269 }
7270 return XML_TRUE;
7271 }
7272
7273 static int FASTCALL
nextScaffoldPart(XML_Parser parser)7274 nextScaffoldPart(XML_Parser parser) {
7275 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7276 CONTENT_SCAFFOLD *me;
7277 int next;
7278
7279 if (! dtd->scaffIndex) {
7280 dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
7281 if (! dtd->scaffIndex)
7282 return -1;
7283 dtd->scaffIndex[0] = 0;
7284 }
7285
7286 if (dtd->scaffCount >= dtd->scaffSize) {
7287 CONTENT_SCAFFOLD *temp;
7288 if (dtd->scaffold) {
7289 /* Detect and prevent integer overflow */
7290 if (dtd->scaffSize > UINT_MAX / 2u) {
7291 return -1;
7292 }
7293 /* Detect and prevent integer overflow.
7294 * The preprocessor guard addresses the "always false" warning
7295 * from -Wtype-limits on platforms where
7296 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7297 #if UINT_MAX >= SIZE_MAX
7298 if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) {
7299 return -1;
7300 }
7301 #endif
7302
7303 temp = (CONTENT_SCAFFOLD *)REALLOC(
7304 parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
7305 if (temp == NULL)
7306 return -1;
7307 dtd->scaffSize *= 2;
7308 } else {
7309 temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS
7310 * sizeof(CONTENT_SCAFFOLD));
7311 if (temp == NULL)
7312 return -1;
7313 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
7314 }
7315 dtd->scaffold = temp;
7316 }
7317 next = dtd->scaffCount++;
7318 me = &dtd->scaffold[next];
7319 if (dtd->scaffLevel) {
7320 CONTENT_SCAFFOLD *parent
7321 = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]];
7322 if (parent->lastchild) {
7323 dtd->scaffold[parent->lastchild].nextsib = next;
7324 }
7325 if (! parent->childcnt)
7326 parent->firstchild = next;
7327 parent->lastchild = next;
7328 parent->childcnt++;
7329 }
7330 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
7331 return next;
7332 }
7333
7334 static XML_Content *
build_model(XML_Parser parser)7335 build_model(XML_Parser parser) {
7336 /* Function build_model transforms the existing parser->m_dtd->scaffold
7337 * array of CONTENT_SCAFFOLD tree nodes into a new array of
7338 * XML_Content tree nodes followed by a gapless list of zero-terminated
7339 * strings. */
7340 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7341 XML_Content *ret;
7342 XML_Char *str; /* the current string writing location */
7343
7344 /* Detect and prevent integer overflow.
7345 * The preprocessor guard addresses the "always false" warning
7346 * from -Wtype-limits on platforms where
7347 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7348 #if UINT_MAX >= SIZE_MAX
7349 if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) {
7350 return NULL;
7351 }
7352 if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) {
7353 return NULL;
7354 }
7355 #endif
7356 if (dtd->scaffCount * sizeof(XML_Content)
7357 > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) {
7358 return NULL;
7359 }
7360
7361 const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content)
7362 + (dtd->contentStringLen * sizeof(XML_Char)));
7363
7364 ret = (XML_Content *)MALLOC(parser, allocsize);
7365 if (! ret)
7366 return NULL;
7367
7368 /* What follows is an iterative implementation (of what was previously done
7369 * recursively in a dedicated function called "build_node". The old recursive
7370 * build_node could be forced into stack exhaustion from input as small as a
7371 * few megabyte, and so that was a security issue. Hence, a function call
7372 * stack is avoided now by resolving recursion.)
7373 *
7374 * The iterative approach works as follows:
7375 *
7376 * - We have two writing pointers, both walking up the result array; one does
7377 * the work, the other creates "jobs" for its colleague to do, and leads
7378 * the way:
7379 *
7380 * - The faster one, pointer jobDest, always leads and writes "what job
7381 * to do" by the other, once they reach that place in the
7382 * array: leader "jobDest" stores the source node array index (relative
7383 * to array dtd->scaffold) in field "numchildren".
7384 *
7385 * - The slower one, pointer dest, looks at the value stored in the
7386 * "numchildren" field (which actually holds a source node array index
7387 * at that time) and puts the real data from dtd->scaffold in.
7388 *
7389 * - Before the loop starts, jobDest writes source array index 0
7390 * (where the root node is located) so that dest will have something to do
7391 * when it starts operation.
7392 *
7393 * - Whenever nodes with children are encountered, jobDest appends
7394 * them as new jobs, in order. As a result, tree node siblings are
7395 * adjacent in the resulting array, for example:
7396 *
7397 * [0] root, has two children
7398 * [1] first child of 0, has three children
7399 * [3] first child of 1, does not have children
7400 * [4] second child of 1, does not have children
7401 * [5] third child of 1, does not have children
7402 * [2] second child of 0, does not have children
7403 *
7404 * Or (the same data) presented in flat array view:
7405 *
7406 * [0] root, has two children
7407 *
7408 * [1] first child of 0, has three children
7409 * [2] second child of 0, does not have children
7410 *
7411 * [3] first child of 1, does not have children
7412 * [4] second child of 1, does not have children
7413 * [5] third child of 1, does not have children
7414 *
7415 * - The algorithm repeats until all target array indices have been processed.
7416 */
7417 XML_Content *dest = ret; /* tree node writing location, moves upwards */
7418 XML_Content *const destLimit = &ret[dtd->scaffCount];
7419 XML_Content *jobDest = ret; /* next free writing location in target array */
7420 str = (XML_Char *)&ret[dtd->scaffCount];
7421
7422 /* Add the starting job, the root node (index 0) of the source tree */
7423 (jobDest++)->numchildren = 0;
7424
7425 for (; dest < destLimit; dest++) {
7426 /* Retrieve source tree array index from job storage */
7427 const int src_node = (int)dest->numchildren;
7428
7429 /* Convert item */
7430 dest->type = dtd->scaffold[src_node].type;
7431 dest->quant = dtd->scaffold[src_node].quant;
7432 if (dest->type == XML_CTYPE_NAME) {
7433 const XML_Char *src;
7434 dest->name = str;
7435 src = dtd->scaffold[src_node].name;
7436 for (;;) {
7437 *str++ = *src;
7438 if (! *src)
7439 break;
7440 src++;
7441 }
7442 dest->numchildren = 0;
7443 dest->children = NULL;
7444 } else {
7445 unsigned int i;
7446 int cn;
7447 dest->name = NULL;
7448 dest->numchildren = dtd->scaffold[src_node].childcnt;
7449 dest->children = jobDest;
7450
7451 /* Append scaffold indices of children to array */
7452 for (i = 0, cn = dtd->scaffold[src_node].firstchild;
7453 i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib)
7454 (jobDest++)->numchildren = (unsigned int)cn;
7455 }
7456 }
7457
7458 return ret;
7459 }
7460
7461 static ELEMENT_TYPE *
getElementType(XML_Parser parser,const ENCODING * enc,const char * ptr,const char * end)7462 getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
7463 const char *end) {
7464 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7465 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
7466 ELEMENT_TYPE *ret;
7467
7468 if (! name)
7469 return NULL;
7470 ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
7471 sizeof(ELEMENT_TYPE));
7472 if (! ret)
7473 return NULL;
7474 if (ret->name != name)
7475 poolDiscard(&dtd->pool);
7476 else {
7477 poolFinish(&dtd->pool);
7478 if (! setElementTypePrefix(parser, ret))
7479 return NULL;
7480 }
7481 return ret;
7482 }
7483
7484 static XML_Char *
copyString(const XML_Char * s,const XML_Memory_Handling_Suite * memsuite)7485 copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
7486 size_t charsRequired = 0;
7487 XML_Char *result;
7488
7489 /* First determine how long the string is */
7490 while (s[charsRequired] != 0) {
7491 charsRequired++;
7492 }
7493 /* Include the terminator */
7494 charsRequired++;
7495
7496 /* Now allocate space for the copy */
7497 result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
7498 if (result == NULL)
7499 return NULL;
7500 /* Copy the original into place */
7501 memcpy(result, s, charsRequired * sizeof(XML_Char));
7502 return result;
7503 }
7504
7505 #ifdef XML_DTD
7506
7507 static float
accountingGetCurrentAmplification(XML_Parser rootParser)7508 accountingGetCurrentAmplification(XML_Parser rootParser) {
7509 const XmlBigCount countBytesOutput
7510 = rootParser->m_accounting.countBytesDirect
7511 + rootParser->m_accounting.countBytesIndirect;
7512 const float amplificationFactor
7513 = rootParser->m_accounting.countBytesDirect
7514 ? (countBytesOutput
7515 / (float)(rootParser->m_accounting.countBytesDirect))
7516 : 1.0f;
7517 assert(! rootParser->m_parentParser);
7518 return amplificationFactor;
7519 }
7520
7521 static void
accountingReportStats(XML_Parser originParser,const char * epilog)7522 accountingReportStats(XML_Parser originParser, const char *epilog) {
7523 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7524 assert(! rootParser->m_parentParser);
7525
7526 if (rootParser->m_accounting.debugLevel < 1) {
7527 return;
7528 }
7529
7530 const float amplificationFactor
7531 = accountingGetCurrentAmplification(rootParser);
7532 fprintf(stderr,
7533 "expat: Accounting(%p): Direct " EXPAT_FMT_ULL(
7534 "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s",
7535 (void *)rootParser, rootParser->m_accounting.countBytesDirect,
7536 rootParser->m_accounting.countBytesIndirect,
7537 (double)amplificationFactor, epilog);
7538 }
7539
7540 static void
accountingOnAbort(XML_Parser originParser)7541 accountingOnAbort(XML_Parser originParser) {
7542 accountingReportStats(originParser, " ABORTING\n");
7543 }
7544
7545 static void
accountingReportDiff(XML_Parser rootParser,unsigned int levelsAwayFromRootParser,const char * before,const char * after,ptrdiff_t bytesMore,int source_line,enum XML_Account account)7546 accountingReportDiff(XML_Parser rootParser,
7547 unsigned int levelsAwayFromRootParser, const char *before,
7548 const char *after, ptrdiff_t bytesMore, int source_line,
7549 enum XML_Account account) {
7550 assert(! rootParser->m_parentParser);
7551
7552 fprintf(stderr,
7553 " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%d, xmlparse.c:%d) %*s\"",
7554 bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP",
7555 levelsAwayFromRootParser, source_line, 10, "");
7556
7557 const char ellipis[] = "[..]";
7558 const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1;
7559 const unsigned int contextLength = 10;
7560
7561 /* Note: Performance is of no concern here */
7562 const char *walker = before;
7563 if ((rootParser->m_accounting.debugLevel >= 3)
7564 || (after - before)
7565 <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) {
7566 for (; walker < after; walker++) {
7567 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7568 }
7569 } else {
7570 for (; walker < before + contextLength; walker++) {
7571 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7572 }
7573 fprintf(stderr, ellipis);
7574 walker = after - contextLength;
7575 for (; walker < after; walker++) {
7576 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7577 }
7578 }
7579 fprintf(stderr, "\"\n");
7580 }
7581
7582 static XML_Bool
accountingDiffTolerated(XML_Parser originParser,int tok,const char * before,const char * after,int source_line,enum XML_Account account)7583 accountingDiffTolerated(XML_Parser originParser, int tok, const char *before,
7584 const char *after, int source_line,
7585 enum XML_Account account) {
7586 /* Note: We need to check the token type *first* to be sure that
7587 * we can even access variable <after>, safely.
7588 * E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */
7589 switch (tok) {
7590 case XML_TOK_INVALID:
7591 case XML_TOK_PARTIAL:
7592 case XML_TOK_PARTIAL_CHAR:
7593 case XML_TOK_NONE:
7594 return XML_TRUE;
7595 }
7596
7597 if (account == XML_ACCOUNT_NONE)
7598 return XML_TRUE; /* because these bytes have been accounted for, already */
7599
7600 unsigned int levelsAwayFromRootParser;
7601 const XML_Parser rootParser
7602 = getRootParserOf(originParser, &levelsAwayFromRootParser);
7603 assert(! rootParser->m_parentParser);
7604
7605 const int isDirect
7606 = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser);
7607 const ptrdiff_t bytesMore = after - before;
7608
7609 XmlBigCount *const additionTarget
7610 = isDirect ? &rootParser->m_accounting.countBytesDirect
7611 : &rootParser->m_accounting.countBytesIndirect;
7612
7613 /* Detect and avoid integer overflow */
7614 if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore)
7615 return XML_FALSE;
7616 *additionTarget += bytesMore;
7617
7618 const XmlBigCount countBytesOutput
7619 = rootParser->m_accounting.countBytesDirect
7620 + rootParser->m_accounting.countBytesIndirect;
7621 const float amplificationFactor
7622 = accountingGetCurrentAmplification(rootParser);
7623 const XML_Bool tolerated
7624 = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes)
7625 || (amplificationFactor
7626 <= rootParser->m_accounting.maximumAmplificationFactor);
7627
7628 if (rootParser->m_accounting.debugLevel >= 2) {
7629 accountingReportStats(rootParser, "");
7630 accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after,
7631 bytesMore, source_line, account);
7632 }
7633
7634 return tolerated;
7635 }
7636
7637 unsigned long long
testingAccountingGetCountBytesDirect(XML_Parser parser)7638 testingAccountingGetCountBytesDirect(XML_Parser parser) {
7639 if (! parser)
7640 return 0;
7641 return parser->m_accounting.countBytesDirect;
7642 }
7643
7644 unsigned long long
testingAccountingGetCountBytesIndirect(XML_Parser parser)7645 testingAccountingGetCountBytesIndirect(XML_Parser parser) {
7646 if (! parser)
7647 return 0;
7648 return parser->m_accounting.countBytesIndirect;
7649 }
7650
7651 static void
entityTrackingReportStats(XML_Parser rootParser,ENTITY * entity,const char * action,int sourceLine)7652 entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity,
7653 const char *action, int sourceLine) {
7654 assert(! rootParser->m_parentParser);
7655 if (rootParser->m_entity_stats.debugLevel < 1)
7656 return;
7657
7658 # if defined(XML_UNICODE)
7659 const char *const entityName = "[..]";
7660 # else
7661 const char *const entityName = entity->name;
7662 # endif
7663
7664 fprintf(
7665 stderr,
7666 "expat: Entities(%p): Count %9d, depth %2d/%2d %*s%s%s; %s length %d (xmlparse.c:%d)\n",
7667 (void *)rootParser, rootParser->m_entity_stats.countEverOpened,
7668 rootParser->m_entity_stats.currentDepth,
7669 rootParser->m_entity_stats.maximumDepthSeen,
7670 (rootParser->m_entity_stats.currentDepth - 1) * 2, "",
7671 entity->is_param ? "%" : "&", entityName, action, entity->textLen,
7672 sourceLine);
7673 }
7674
7675 static void
entityTrackingOnOpen(XML_Parser originParser,ENTITY * entity,int sourceLine)7676 entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) {
7677 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7678 assert(! rootParser->m_parentParser);
7679
7680 rootParser->m_entity_stats.countEverOpened++;
7681 rootParser->m_entity_stats.currentDepth++;
7682 if (rootParser->m_entity_stats.currentDepth
7683 > rootParser->m_entity_stats.maximumDepthSeen) {
7684 rootParser->m_entity_stats.maximumDepthSeen++;
7685 }
7686
7687 entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine);
7688 }
7689
7690 static void
entityTrackingOnClose(XML_Parser originParser,ENTITY * entity,int sourceLine)7691 entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) {
7692 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7693 assert(! rootParser->m_parentParser);
7694
7695 entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine);
7696 rootParser->m_entity_stats.currentDepth--;
7697 }
7698
7699 static XML_Parser
getRootParserOf(XML_Parser parser,unsigned int * outLevelDiff)7700 getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) {
7701 XML_Parser rootParser = parser;
7702 unsigned int stepsTakenUpwards = 0;
7703 while (rootParser->m_parentParser) {
7704 rootParser = rootParser->m_parentParser;
7705 stepsTakenUpwards++;
7706 }
7707 assert(! rootParser->m_parentParser);
7708 if (outLevelDiff != NULL) {
7709 *outLevelDiff = stepsTakenUpwards;
7710 }
7711 return rootParser;
7712 }
7713
7714 const char *
unsignedCharToPrintable(unsigned char c)7715 unsignedCharToPrintable(unsigned char c) {
7716 switch (c) {
7717 case 0:
7718 return "\\0";
7719 case 1:
7720 return "\\x1";
7721 case 2:
7722 return "\\x2";
7723 case 3:
7724 return "\\x3";
7725 case 4:
7726 return "\\x4";
7727 case 5:
7728 return "\\x5";
7729 case 6:
7730 return "\\x6";
7731 case 7:
7732 return "\\x7";
7733 case 8:
7734 return "\\x8";
7735 case 9:
7736 return "\\t";
7737 case 10:
7738 return "\\n";
7739 case 11:
7740 return "\\xB";
7741 case 12:
7742 return "\\xC";
7743 case 13:
7744 return "\\r";
7745 case 14:
7746 return "\\xE";
7747 case 15:
7748 return "\\xF";
7749 case 16:
7750 return "\\x10";
7751 case 17:
7752 return "\\x11";
7753 case 18:
7754 return "\\x12";
7755 case 19:
7756 return "\\x13";
7757 case 20:
7758 return "\\x14";
7759 case 21:
7760 return "\\x15";
7761 case 22:
7762 return "\\x16";
7763 case 23:
7764 return "\\x17";
7765 case 24:
7766 return "\\x18";
7767 case 25:
7768 return "\\x19";
7769 case 26:
7770 return "\\x1A";
7771 case 27:
7772 return "\\x1B";
7773 case 28:
7774 return "\\x1C";
7775 case 29:
7776 return "\\x1D";
7777 case 30:
7778 return "\\x1E";
7779 case 31:
7780 return "\\x1F";
7781 case 32:
7782 return " ";
7783 case 33:
7784 return "!";
7785 case 34:
7786 return "\\\"";
7787 case 35:
7788 return "#";
7789 case 36:
7790 return "$";
7791 case 37:
7792 return "%";
7793 case 38:
7794 return "&";
7795 case 39:
7796 return "'";
7797 case 40:
7798 return "(";
7799 case 41:
7800 return ")";
7801 case 42:
7802 return "*";
7803 case 43:
7804 return "+";
7805 case 44:
7806 return ",";
7807 case 45:
7808 return "-";
7809 case 46:
7810 return ".";
7811 case 47:
7812 return "/";
7813 case 48:
7814 return "0";
7815 case 49:
7816 return "1";
7817 case 50:
7818 return "2";
7819 case 51:
7820 return "3";
7821 case 52:
7822 return "4";
7823 case 53:
7824 return "5";
7825 case 54:
7826 return "6";
7827 case 55:
7828 return "7";
7829 case 56:
7830 return "8";
7831 case 57:
7832 return "9";
7833 case 58:
7834 return ":";
7835 case 59:
7836 return ";";
7837 case 60:
7838 return "<";
7839 case 61:
7840 return "=";
7841 case 62:
7842 return ">";
7843 case 63:
7844 return "?";
7845 case 64:
7846 return "@";
7847 case 65:
7848 return "A";
7849 case 66:
7850 return "B";
7851 case 67:
7852 return "C";
7853 case 68:
7854 return "D";
7855 case 69:
7856 return "E";
7857 case 70:
7858 return "F";
7859 case 71:
7860 return "G";
7861 case 72:
7862 return "H";
7863 case 73:
7864 return "I";
7865 case 74:
7866 return "J";
7867 case 75:
7868 return "K";
7869 case 76:
7870 return "L";
7871 case 77:
7872 return "M";
7873 case 78:
7874 return "N";
7875 case 79:
7876 return "O";
7877 case 80:
7878 return "P";
7879 case 81:
7880 return "Q";
7881 case 82:
7882 return "R";
7883 case 83:
7884 return "S";
7885 case 84:
7886 return "T";
7887 case 85:
7888 return "U";
7889 case 86:
7890 return "V";
7891 case 87:
7892 return "W";
7893 case 88:
7894 return "X";
7895 case 89:
7896 return "Y";
7897 case 90:
7898 return "Z";
7899 case 91:
7900 return "[";
7901 case 92:
7902 return "\\\\";
7903 case 93:
7904 return "]";
7905 case 94:
7906 return "^";
7907 case 95:
7908 return "_";
7909 case 96:
7910 return "`";
7911 case 97:
7912 return "a";
7913 case 98:
7914 return "b";
7915 case 99:
7916 return "c";
7917 case 100:
7918 return "d";
7919 case 101:
7920 return "e";
7921 case 102:
7922 return "f";
7923 case 103:
7924 return "g";
7925 case 104:
7926 return "h";
7927 case 105:
7928 return "i";
7929 case 106:
7930 return "j";
7931 case 107:
7932 return "k";
7933 case 108:
7934 return "l";
7935 case 109:
7936 return "m";
7937 case 110:
7938 return "n";
7939 case 111:
7940 return "o";
7941 case 112:
7942 return "p";
7943 case 113:
7944 return "q";
7945 case 114:
7946 return "r";
7947 case 115:
7948 return "s";
7949 case 116:
7950 return "t";
7951 case 117:
7952 return "u";
7953 case 118:
7954 return "v";
7955 case 119:
7956 return "w";
7957 case 120:
7958 return "x";
7959 case 121:
7960 return "y";
7961 case 122:
7962 return "z";
7963 case 123:
7964 return "{";
7965 case 124:
7966 return "|";
7967 case 125:
7968 return "}";
7969 case 126:
7970 return "~";
7971 case 127:
7972 return "\\x7F";
7973 case 128:
7974 return "\\x80";
7975 case 129:
7976 return "\\x81";
7977 case 130:
7978 return "\\x82";
7979 case 131:
7980 return "\\x83";
7981 case 132:
7982 return "\\x84";
7983 case 133:
7984 return "\\x85";
7985 case 134:
7986 return "\\x86";
7987 case 135:
7988 return "\\x87";
7989 case 136:
7990 return "\\x88";
7991 case 137:
7992 return "\\x89";
7993 case 138:
7994 return "\\x8A";
7995 case 139:
7996 return "\\x8B";
7997 case 140:
7998 return "\\x8C";
7999 case 141:
8000 return "\\x8D";
8001 case 142:
8002 return "\\x8E";
8003 case 143:
8004 return "\\x8F";
8005 case 144:
8006 return "\\x90";
8007 case 145:
8008 return "\\x91";
8009 case 146:
8010 return "\\x92";
8011 case 147:
8012 return "\\x93";
8013 case 148:
8014 return "\\x94";
8015 case 149:
8016 return "\\x95";
8017 case 150:
8018 return "\\x96";
8019 case 151:
8020 return "\\x97";
8021 case 152:
8022 return "\\x98";
8023 case 153:
8024 return "\\x99";
8025 case 154:
8026 return "\\x9A";
8027 case 155:
8028 return "\\x9B";
8029 case 156:
8030 return "\\x9C";
8031 case 157:
8032 return "\\x9D";
8033 case 158:
8034 return "\\x9E";
8035 case 159:
8036 return "\\x9F";
8037 case 160:
8038 return "\\xA0";
8039 case 161:
8040 return "\\xA1";
8041 case 162:
8042 return "\\xA2";
8043 case 163:
8044 return "\\xA3";
8045 case 164:
8046 return "\\xA4";
8047 case 165:
8048 return "\\xA5";
8049 case 166:
8050 return "\\xA6";
8051 case 167:
8052 return "\\xA7";
8053 case 168:
8054 return "\\xA8";
8055 case 169:
8056 return "\\xA9";
8057 case 170:
8058 return "\\xAA";
8059 case 171:
8060 return "\\xAB";
8061 case 172:
8062 return "\\xAC";
8063 case 173:
8064 return "\\xAD";
8065 case 174:
8066 return "\\xAE";
8067 case 175:
8068 return "\\xAF";
8069 case 176:
8070 return "\\xB0";
8071 case 177:
8072 return "\\xB1";
8073 case 178:
8074 return "\\xB2";
8075 case 179:
8076 return "\\xB3";
8077 case 180:
8078 return "\\xB4";
8079 case 181:
8080 return "\\xB5";
8081 case 182:
8082 return "\\xB6";
8083 case 183:
8084 return "\\xB7";
8085 case 184:
8086 return "\\xB8";
8087 case 185:
8088 return "\\xB9";
8089 case 186:
8090 return "\\xBA";
8091 case 187:
8092 return "\\xBB";
8093 case 188:
8094 return "\\xBC";
8095 case 189:
8096 return "\\xBD";
8097 case 190:
8098 return "\\xBE";
8099 case 191:
8100 return "\\xBF";
8101 case 192:
8102 return "\\xC0";
8103 case 193:
8104 return "\\xC1";
8105 case 194:
8106 return "\\xC2";
8107 case 195:
8108 return "\\xC3";
8109 case 196:
8110 return "\\xC4";
8111 case 197:
8112 return "\\xC5";
8113 case 198:
8114 return "\\xC6";
8115 case 199:
8116 return "\\xC7";
8117 case 200:
8118 return "\\xC8";
8119 case 201:
8120 return "\\xC9";
8121 case 202:
8122 return "\\xCA";
8123 case 203:
8124 return "\\xCB";
8125 case 204:
8126 return "\\xCC";
8127 case 205:
8128 return "\\xCD";
8129 case 206:
8130 return "\\xCE";
8131 case 207:
8132 return "\\xCF";
8133 case 208:
8134 return "\\xD0";
8135 case 209:
8136 return "\\xD1";
8137 case 210:
8138 return "\\xD2";
8139 case 211:
8140 return "\\xD3";
8141 case 212:
8142 return "\\xD4";
8143 case 213:
8144 return "\\xD5";
8145 case 214:
8146 return "\\xD6";
8147 case 215:
8148 return "\\xD7";
8149 case 216:
8150 return "\\xD8";
8151 case 217:
8152 return "\\xD9";
8153 case 218:
8154 return "\\xDA";
8155 case 219:
8156 return "\\xDB";
8157 case 220:
8158 return "\\xDC";
8159 case 221:
8160 return "\\xDD";
8161 case 222:
8162 return "\\xDE";
8163 case 223:
8164 return "\\xDF";
8165 case 224:
8166 return "\\xE0";
8167 case 225:
8168 return "\\xE1";
8169 case 226:
8170 return "\\xE2";
8171 case 227:
8172 return "\\xE3";
8173 case 228:
8174 return "\\xE4";
8175 case 229:
8176 return "\\xE5";
8177 case 230:
8178 return "\\xE6";
8179 case 231:
8180 return "\\xE7";
8181 case 232:
8182 return "\\xE8";
8183 case 233:
8184 return "\\xE9";
8185 case 234:
8186 return "\\xEA";
8187 case 235:
8188 return "\\xEB";
8189 case 236:
8190 return "\\xEC";
8191 case 237:
8192 return "\\xED";
8193 case 238:
8194 return "\\xEE";
8195 case 239:
8196 return "\\xEF";
8197 case 240:
8198 return "\\xF0";
8199 case 241:
8200 return "\\xF1";
8201 case 242:
8202 return "\\xF2";
8203 case 243:
8204 return "\\xF3";
8205 case 244:
8206 return "\\xF4";
8207 case 245:
8208 return "\\xF5";
8209 case 246:
8210 return "\\xF6";
8211 case 247:
8212 return "\\xF7";
8213 case 248:
8214 return "\\xF8";
8215 case 249:
8216 return "\\xF9";
8217 case 250:
8218 return "\\xFA";
8219 case 251:
8220 return "\\xFB";
8221 case 252:
8222 return "\\xFC";
8223 case 253:
8224 return "\\xFD";
8225 case 254:
8226 return "\\xFE";
8227 case 255:
8228 return "\\xFF";
8229 default:
8230 assert(0); /* never gets here */
8231 return "dead code";
8232 }
8233 assert(0); /* never gets here */
8234 }
8235
8236 #endif /* XML_DTD */
8237
8238 static unsigned long
getDebugLevel(const char * variableName,unsigned long defaultDebugLevel)8239 getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) {
8240 const char *const valueOrNull = getenv(variableName);
8241 if (valueOrNull == NULL) {
8242 return defaultDebugLevel;
8243 }
8244 const char *const value = valueOrNull;
8245
8246 errno = 0;
8247 char *afterValue = (char *)value;
8248 unsigned long debugLevel = strtoul(value, &afterValue, 10);
8249 if ((errno != 0) || (afterValue[0] != '\0')) {
8250 errno = 0;
8251 return defaultDebugLevel;
8252 }
8253
8254 return debugLevel;
8255 }
8256