xref: /netbsd-src/external/gpl3/binutils/dist/gprofng/src/SAXParserFactory.cc (revision 901e7e84758515fbf39dfc064cb0b45ab146d8b0)
1 /* Copyright (C) 2021 Free Software Foundation, Inc.
2    Contributed by Oracle.
3 
4    This file is part of GNU Binutils.
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3, or (at your option)
9    any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, write to the Free Software
18    Foundation, 51 Franklin Street - Fifth Floor, Boston,
19    MA 02110-1301, USA.  */
20 
21 #include "config.h"
22 #include <ctype.h>
23 
24 #include "util.h"
25 #include "vec.h"
26 #include "DefaultHandler.h"
27 #include "SAXParser.h"
28 #include "SAXParserFactory.h"
29 #include "StringBuilder.h"
30 
31 /*
32  *  Private implementation of Attributes
33  */
34 class AttributesP : public Attributes
35 {
36 public:
37   AttributesP ();
38   ~AttributesP ();
39   int getLength ();
40   const char *getQName (int index);
41   const char *getValue (int index);
42   int getIndex (const char *qName);
43   const char *getValue (const char *qName);
44   void append (char *qName, char *value);
45 
46 private:
47   Vector<char*> *names;
48   Vector<char*> *values;
49 };
50 
51 AttributesP::AttributesP ()
52 {
53   names = new Vector<char*>;
54   values = new Vector<char*>;
55 }
56 
57 AttributesP::~AttributesP ()
58 {
59   Destroy (names);
60   Destroy (values);
61 }
62 
63 int
64 AttributesP::getLength ()
65 {
66   return names->size ();
67 }
68 
69 const char *
70 AttributesP::getQName (int index)
71 {
72   if (index < 0 || index >= names->size ())
73     return NULL;
74   return names->fetch (index);
75 }
76 
77 const char *
78 AttributesP::getValue (int index)
79 {
80   if (index < 0 || index >= values->size ())
81     return NULL;
82   return values->fetch (index);
83 }
84 
85 int
86 AttributesP::getIndex (const char *qName)
87 {
88   for (int idx = 0; idx < names->size (); idx++)
89     if (strcmp (names->fetch (idx), qName) == 0)
90       return idx;
91   return -1;
92 }
93 
94 const char *
95 AttributesP::getValue (const char *qName)
96 {
97   for (int idx = 0; idx < names->size (); idx++)
98     if (strcmp (names->fetch (idx), qName) == 0)
99       return values->fetch (idx);
100   return NULL;
101 }
102 
103 void
104 AttributesP::append (char *qName, char *value)
105 {
106   names->append (qName);
107   values->append (value);
108 }
109 
110 /*
111  *  Implementation of SAXException
112  */
113 SAXException::SAXException ()
114 {
115   message = strdup ("null");
116 }
117 
118 SAXException::SAXException (const char *_message)
119 {
120   if (_message == NULL)
121     message = strdup ("null");
122   else
123     message = strdup (_message);
124 }
125 
126 SAXException::~SAXException ()
127 {
128   free (message);
129 }
130 
131 char *
132 SAXException::getMessage ()
133 {
134   return message;
135 }
136 
137 /*
138  *  SAXParseException
139  */
140 SAXParseException::SAXParseException (char *message, int _lineNumber, int _columnNumber)
141 : SAXException (message == NULL ? GTXT ("XML parse error") : message)
142 {
143   lineNumber = _lineNumber;
144   columnNumber = _columnNumber;
145 }
146 
147 /*
148  *  Private implementation of SAXParser
149  */
150 class SAXParserP : public SAXParser
151 {
152 public:
153   SAXParserP ();
154   ~SAXParserP ();
155   void reset ();
156   void parse (File*, DefaultHandler*);
157 
158   bool
159   isNamespaceAware ()
160   {
161     return false;
162   }
163 
164   bool
165   isValidating ()
166   {
167     return false;
168   }
169 
170 private:
171 
172   static const int CH_EOF = -1;
173 
174   void nextch ();
175   bool isWSpace ();
176   void skipWSpaces ();
177   void scanString (const char *str);
178   char *parseName ();
179   char *parseString ();
180   char *decodeString (char *str);
181   Attributes *parseAttributes ();
182   void parseTag ();
183   void parseDocument ();
184   void parsePart (int idx);
185 
186   DefaultHandler *dh;
187   int bufsz;
188   char *buffer;
189   int cntsz;
190   int idx;
191   int curch;
192   int line;
193   int column;
194 };
195 
196 SAXParserP::SAXParserP ()
197 {
198   dh = NULL;
199   bufsz = 0x2000;
200   buffer = (char*) malloc (bufsz);
201   cntsz = 0;
202   idx = 0;
203   line = 1;
204   column = 0;
205 }
206 
207 SAXParserP::~SAXParserP ()
208 {
209   free (buffer);
210 }
211 
212 void
213 SAXParserP::reset ()
214 {
215   dh = NULL;
216   bufsz = 8192;
217   buffer = (char*) realloc (buffer, bufsz);
218   cntsz = 0;
219   idx = 0;
220   line = 1;
221   column = 0;
222 }
223 
224 void
225 SAXParserP::parse (File *f, DefaultHandler *_dh)
226 {
227   if (_dh == NULL)
228     return;
229   dh = _dh;
230   FILE *file = (FILE*) f;
231   int rem = bufsz;
232   cntsz = 0;
233   idx = 0;
234   for (;;)
235     {
236       int n = (int) fread (buffer + cntsz, 1, rem, file);
237       if (ferror (file) || n <= 0)
238 	break;
239       cntsz += n;
240       if (feof (file))
241 	break;
242       rem -= n;
243       if (rem == 0)
244 	{
245 	  int oldbufsz = bufsz;
246 	  bufsz = bufsz >= 0x100000 ? bufsz + 0x100000 : bufsz * 2;
247 	  buffer = (char*) realloc (buffer, bufsz);
248 	  rem = bufsz - oldbufsz;
249 	}
250     }
251   nextch ();
252   parseDocument ();
253 }
254 
255 static int
256 hex (char c)
257 {
258   if (c >= '0' && c <= '9')
259     return (c - '0');
260   else if (c >= 'a' && c <= 'f')
261       return 10 + (c - 'a');
262   return -1;
263 }
264 
265 void
266 SAXParserP::nextch ()
267 {
268   curch = idx >= cntsz ? CH_EOF : buffer[idx++];
269   if (curch == '\n')
270     {
271       line += 1;
272       column = 0;
273     }
274   else
275     column += 1;
276 }
277 
278 bool
279 SAXParserP::isWSpace ()
280 {
281   return curch == ' ' || curch == '\t' || curch == '\n' || curch == '\r';
282 }
283 
284 void
285 SAXParserP::skipWSpaces ()
286 {
287   while (isWSpace ())
288     nextch ();
289 }
290 
291 void
292 SAXParserP::scanString (const char *str)
293 {
294   if (str == NULL || *str == '\0')
295     return;
296   for (;;)
297     {
298       if (curch == CH_EOF)
299 	break;
300       else if (curch == *str)
301 	{
302 	  const char *p = str;
303 	  for (;;)
304 	    {
305 	      p += 1;
306 	      nextch ();
307 	      if (*p == '\0')
308 		return;
309 	      if (curch != *p)
310 		break;
311 	    }
312 	}
313       nextch ();
314     }
315 }
316 
317 char *
318 SAXParserP::parseName ()
319 {
320   StringBuilder *name = new StringBuilder ();
321 
322   if ((curch >= 'A' && curch <= 'Z') || (curch >= 'a' && curch <= 'z'))
323     {
324       name->append ((char) curch);
325       nextch ();
326       while (isalnum (curch) != 0 || curch == '_')
327 	{
328 	  name->append ((char) curch);
329 	  nextch ();
330 	}
331     }
332 
333   char *res = name->toString ();
334   delete name;
335   return res;
336 }
337 
338 /**
339  * Replaces encoded XML characters with original characters
340  * Attention: this method reuses the same string that is passed as the argument
341  * @param str
342  * @return str
343  */
344 char *
345 SAXParserP::decodeString (char * str)
346 {
347   // Check if string has %22% and replace it with double quotes
348   // Also replace all other special combinations.
349   char *from = str;
350   char *to = str;
351   if (strstr (from, "%") || strstr (from, "&"))
352     {
353       int len = strlen (from);
354       for (int i = 0; i < len; i++)
355 	{
356 	  int nch = from[i];
357 	  // Process &...; combinations
358 	  if (nch == '&' && i + 3 < len)
359 	    {
360 	      if (from[i + 2] == 't' && from[i + 3] == ';')
361 		{
362 		  // check &lt; &gt;
363 		  if (from[i + 1] == 'l')
364 		    {
365 		      nch = '<';
366 		      i += 3;
367 		    }
368 		  else if (from[i + 1] == 'g')
369 		    {
370 		      nch = '>';
371 		      i += 3;
372 		    }
373 		}
374 	      else if (i + 4 < len && from[i + 4] == ';')
375 		{
376 		  // check &amp;
377 		  if (from[i + 1] == 'a' && from[i + 2] == 'm' && from[i + 3] == 'p')
378 		    {
379 		      nch = '&';
380 		      i += 4;
381 		    }
382 		}
383 	      else if ((i + 5 < len) && (from[i + 5] == ';'))
384 		{
385 		  // check &apos; &quot;
386 		  if (from[i + 1] == 'a' && from[i + 2] == 'p'
387 		      && from[i + 3] == 'o' && from[i + 4] == 's')
388 		    {
389 		      nch = '\'';
390 		      i += 5;
391 		    }
392 		  if (from[i + 1] == 'q' && from[i + 2] == 'u' && from[i + 3] == 'o' && from[i + 4] == 't')
393 		    {
394 		      nch = '"';
395 		      i += 5;
396 		    }
397 		}
398 	    }
399 	  // Process %XX% combinations
400 	  if (nch == '%' && i + 3 < len && from[i + 3] == '%')
401 	    {
402 	      int ch = hex (from[i + 1]);
403 	      if (ch >= 0)
404 		{
405 		  int ch2 = hex (from[i + 2]);
406 		  if (ch2 >= 0)
407 		    {
408 		      ch = ch * 16 + ch2;
409 		      nch = ch;
410 		      i += 3;
411 		    }
412 		}
413 	    }
414 	  *to++ = (char) nch;
415 	}
416       *to = '\0';
417     }
418   return str;
419 }
420 
421 char *
422 SAXParserP::parseString ()
423 {
424   StringBuilder *str = new StringBuilder ();
425   int quote = '>';
426   if (curch == '"')
427     {
428       quote = curch;
429       nextch ();
430     }
431   for (;;)
432     {
433       if (curch == CH_EOF)
434 	break;
435       if (curch == quote)
436 	{
437 	  nextch ();
438 	  break;
439 	}
440       str->append ((char) curch);
441       nextch ();
442     }
443 
444   char *res = str->toString ();
445   // Decode XML characters
446   res = decodeString (res);
447   delete str;
448   return res;
449 }
450 
451 Attributes *
452 SAXParserP::parseAttributes ()
453 {
454   AttributesP *attrs = new AttributesP ();
455 
456   for (;;)
457     {
458       skipWSpaces ();
459       char *name = parseName ();
460       if (name == NULL || *name == '\0')
461 	{
462 	  free (name);
463 	  break;
464 	}
465       skipWSpaces ();
466       if (curch != '=')
467 	{
468 	  SAXParseException *e = new SAXParseException (NULL, line, column);
469 	  dh->error (e);
470 	  scanString (">");
471 	  free (name);
472 	  return attrs;
473 	}
474       nextch ();
475       skipWSpaces ();
476       char *value = parseString ();
477       attrs->append (name, value);
478     }
479   return attrs;
480 }
481 
482 void
483 SAXParserP::parseTag ()
484 {
485   skipWSpaces ();
486   bool empty = false;
487   char *name = parseName ();
488   if (name == NULL || *name == '\0')
489     {
490       SAXParseException *e = new SAXParseException (NULL, line, column);
491       dh->error (e);
492       scanString (">");
493       free (name);
494       return;
495     }
496 
497   Attributes *attrs = parseAttributes ();
498   if (curch == '/')
499     {
500       nextch ();
501       empty = true;
502     }
503   if (curch == '>')
504     nextch ();
505   else
506     {
507       empty = false;
508       SAXParseException *e = new SAXParseException (NULL, line, column);
509       dh->error (e);
510       scanString (">");
511     }
512   if (curch == CH_EOF)
513     {
514       free (name);
515       delete attrs;
516       return;
517     }
518   dh->startElement (NULL, NULL, name, attrs);
519   if (empty)
520     {
521       dh->endElement (NULL, NULL, name);
522       free (name);
523       delete attrs;
524       return;
525     }
526 
527   StringBuilder *chars = new StringBuilder ();
528   bool wspaces = true;
529   for (;;)
530     {
531       if (curch == CH_EOF)
532 	break;
533       else if (curch == '<')
534 	{
535 	  if (chars->length () > 0)
536 	    {
537 	      char *str = chars->toString ();
538 	      // Decode XML characters
539 	      str = decodeString (str);
540 	      if (wspaces)
541 		dh->ignorableWhitespace (str, 0, chars->length ());
542 	      else
543 		dh->characters (str, 0, chars->length ());
544 	      free (str);
545 	      chars->setLength (0);
546 	      wspaces = true;
547 	    }
548 	  nextch ();
549 	  if (curch == '/')
550 	    {
551 	      nextch ();
552 	      char *ename = parseName ();
553 	      if (ename && *ename != '\0')
554 		{
555 		  if (strcmp (name, ename) == 0)
556 		    {
557 		      skipWSpaces ();
558 		      if (curch == '>')
559 			{
560 			  nextch ();
561 			  dh->endElement (NULL, NULL, name);
562 			  free (ename);
563 			  break;
564 			}
565 		      SAXParseException *e = new SAXParseException (NULL, line, column);
566 		      dh->error (e);
567 		    }
568 		  else
569 		    {
570 		      SAXParseException *e = new SAXParseException (NULL, line, column);
571 		      dh->error (e);
572 		    }
573 		  scanString (">");
574 		}
575 	      free (ename);
576 	    }
577 	  else
578 	    parseTag ();
579 	}
580       else
581 	{
582 	  if (!isWSpace ())
583 	    wspaces = false;
584 	  chars->append ((char) curch);
585 	  nextch ();
586 	}
587     }
588 
589   free (name);
590   delete attrs;
591   delete chars;
592   return;
593 }
594 
595 void
596 SAXParserP::parseDocument ()
597 {
598   dh->startDocument ();
599   for (;;)
600     {
601       if (curch == CH_EOF)
602 	break;
603       if (curch == '<')
604 	{
605 	  nextch ();
606 	  if (curch == '?')
607 	    scanString ("?>");
608 	  else if (curch == '!')
609 	    scanString (">");
610 	  else
611 	    parseTag ();
612 	}
613       else
614 	nextch ();
615     }
616   dh->endDocument ();
617 }
618 
619 /*
620  *  Private implementation of SAXParserFactory
621  */
622 class SAXParserFactoryP : public SAXParserFactory
623 {
624 public:
625   SAXParserFactoryP () { }
626   ~SAXParserFactoryP () { }
627   SAXParser *newSAXParser ();
628 
629   void
630   setFeature (const char *, bool) { }
631 
632   bool
633   getFeature (const char *)
634   {
635     return false;
636   }
637 };
638 
639 SAXParser *
640 SAXParserFactoryP::newSAXParser ()
641 {
642   return new SAXParserP ();
643 }
644 
645 /*
646  *  SAXParserFactory
647  */
648 const char *SAXParserFactory::DEFAULT_PROPERTY_NAME = "javax.xml.parsers.SAXParserFactory";
649 
650 SAXParserFactory *
651 SAXParserFactory::newInstance ()
652 {
653   return new SAXParserFactoryP ();
654 }
655 
656 void
657 DefaultHandler::dump_startElement (const char *qName, Attributes *attrs)
658 {
659   fprintf (stderr, NTXT ("DefaultHandler::startElement qName='%s'\n"), STR (qName));
660   for (int i = 0, sz = attrs ? attrs->getLength () : 0; i < sz; i++)
661     {
662       const char *qn = attrs->getQName (i);
663       const char *vl = attrs->getValue (i);
664       fprintf (stderr, NTXT ("  %d  '%s' = '%s'\n"), i, STR (qn), STR (vl));
665     }
666 }
667