xref: /netbsd-src/external/gpl3/gcc.old/dist/libgfortran/io/format.c (revision 53b02e147d4ed531c0d2a5ca9b3e8026ba3e99b5)
1 /* Copyright (C) 2002-2019 Free Software Foundation, Inc.
2    Contributed by Andy Vaught
3    F2003 I/O support contributed by Jerry DeLisle
4 
5 This file is part of the GNU Fortran runtime library (libgfortran).
6 
7 Libgfortran is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 Libgfortran is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20 
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24 <http://www.gnu.org/licenses/>.  */
25 
26 
27 /* format.c-- parse a FORMAT string into a binary format suitable for
28    interpretation during I/O statements.  */
29 
30 #include "io.h"
31 #include "format.h"
32 #include <ctype.h>
33 #include <string.h>
34 
35 
36 static const fnode colon_node = { FMT_COLON, 0, NULL, NULL, {{ 0, 0, 0 }}, 0,
37 				  NULL };
38 
39 /* Error messages. */
40 
41 static const char posint_required[] = "Positive width required in format",
42   period_required[] = "Period required in format",
43   nonneg_required[] = "Nonnegative width required in format",
44   unexpected_element[] = "Unexpected element '%c' in format\n",
45   unexpected_end[] = "Unexpected end of format string",
46   bad_string[] = "Unterminated character constant in format",
47   bad_hollerith[] = "Hollerith constant extends past the end of the format",
48   reversion_error[] = "Exhausted data descriptors in format",
49   zero_width[] = "Zero width in format descriptor";
50 
51 /* The following routines support caching format data from parsed format strings
52    into a hash table.  This avoids repeatedly parsing duplicate format strings
53    or format strings in I/O statements that are repeated in loops.  */
54 
55 
56 /* Traverse the table and free all data.  */
57 
58 void
59 free_format_hash_table (gfc_unit *u)
60 {
61   size_t i;
62 
63   /* free_format_data handles any NULL pointers.  */
64   for (i = 0; i < FORMAT_HASH_SIZE; i++)
65     {
66       if (u->format_hash_table[i].hashed_fmt != NULL)
67 	{
68 	  free_format_data (u->format_hash_table[i].hashed_fmt);
69 	  free (u->format_hash_table[i].key);
70 	}
71       u->format_hash_table[i].key = NULL;
72       u->format_hash_table[i].key_len = 0;
73       u->format_hash_table[i].hashed_fmt = NULL;
74     }
75 }
76 
77 /* Traverse the format_data structure and reset the fnode counters.  */
78 
79 static void
80 reset_node (fnode *fn)
81 {
82   fnode *f;
83 
84   fn->count = 0;
85   fn->current = NULL;
86 
87   if (fn->format != FMT_LPAREN)
88     return;
89 
90   for (f = fn->u.child; f; f = f->next)
91     {
92       if (f->format == FMT_RPAREN)
93 	break;
94       reset_node (f);
95     }
96 }
97 
98 static void
99 reset_fnode_counters (st_parameter_dt *dtp)
100 {
101   fnode *f;
102   format_data *fmt;
103 
104   fmt = dtp->u.p.fmt;
105 
106   /* Clear this pointer at the head so things start at the right place.  */
107   fmt->array.array[0].current = NULL;
108 
109   for (f = fmt->array.array[0].u.child; f; f = f->next)
110     reset_node (f);
111 }
112 
113 
114 /* A simple hashing function to generate an index into the hash table.  */
115 
116 static uint32_t
117 format_hash (st_parameter_dt *dtp)
118 {
119   char *key;
120   gfc_charlen_type key_len;
121   uint32_t hash = 0;
122   gfc_charlen_type i;
123 
124   /* Hash the format string. Super simple, but what the heck!  */
125   key = dtp->format;
126   key_len = dtp->format_len;
127   for (i = 0; i < key_len; i++)
128     hash ^= key[i];
129   hash &= (FORMAT_HASH_SIZE - 1);
130   return hash;
131 }
132 
133 
134 static void
135 save_parsed_format (st_parameter_dt *dtp)
136 {
137   uint32_t hash;
138   gfc_unit *u;
139 
140   hash = format_hash (dtp);
141   u = dtp->u.p.current_unit;
142 
143   /* Index into the hash table.  We are simply replacing whatever is there
144      relying on probability.  */
145   if (u->format_hash_table[hash].hashed_fmt != NULL)
146     free_format_data (u->format_hash_table[hash].hashed_fmt);
147   u->format_hash_table[hash].hashed_fmt = NULL;
148 
149   free (u->format_hash_table[hash].key);
150   u->format_hash_table[hash].key = dtp->format;
151 
152   u->format_hash_table[hash].key_len = dtp->format_len;
153   u->format_hash_table[hash].hashed_fmt = dtp->u.p.fmt;
154 }
155 
156 
157 static format_data *
158 find_parsed_format (st_parameter_dt *dtp)
159 {
160   uint32_t hash;
161   gfc_unit *u;
162 
163   hash = format_hash (dtp);
164   u = dtp->u.p.current_unit;
165 
166   if (u->format_hash_table[hash].key != NULL)
167     {
168       /* See if it matches.  */
169       if (u->format_hash_table[hash].key_len == dtp->format_len)
170 	{
171 	  /* So far so good.  */
172 	  if (strncmp (u->format_hash_table[hash].key,
173 	      dtp->format, dtp->format_len) == 0)
174 	    return u->format_hash_table[hash].hashed_fmt;
175 	}
176     }
177   return NULL;
178 }
179 
180 
181 /* next_char()-- Return the next character in the format string.
182    Returns -1 when the string is done.  If the literal flag is set,
183    spaces are significant, otherwise they are not. */
184 
185 static int
186 next_char (format_data *fmt, int literal)
187 {
188   int c;
189 
190   do
191     {
192       if (fmt->format_string_len == 0)
193 	return -1;
194 
195       fmt->format_string_len--;
196       c = toupper (*fmt->format_string++);
197       fmt->error_element = c;
198     }
199   while ((c == ' ' || c == '\t') && !literal);
200 
201   return c;
202 }
203 
204 
205 /* unget_char()-- Back up one character position. */
206 
207 #define unget_char(fmt) \
208   { fmt->format_string--; fmt->format_string_len++; }
209 
210 
211 /* get_fnode()-- Allocate a new format node, inserting it into the
212    current singly linked list.  These are initially allocated from the
213    static buffer. */
214 
215 static fnode *
216 get_fnode (format_data *fmt, fnode **head, fnode **tail, format_token t)
217 {
218   fnode *f;
219 
220   if (fmt->avail == &fmt->last->array[FARRAY_SIZE])
221     {
222       fmt->last->next = xmalloc (sizeof (fnode_array));
223       fmt->last = fmt->last->next;
224       fmt->last->next = NULL;
225       fmt->avail = &fmt->last->array[0];
226     }
227   f = fmt->avail++;
228   memset (f, '\0', sizeof (fnode));
229 
230   if (*head == NULL)
231     *head = *tail = f;
232   else
233     {
234       (*tail)->next = f;
235       *tail = f;
236     }
237 
238   f->format = t;
239   f->repeat = -1;
240   f->source = fmt->format_string;
241   return f;
242 }
243 
244 
245 /* free_format()-- Free allocated format string.  */
246 void
247 free_format (st_parameter_dt *dtp)
248 {
249   if ((dtp->common.flags & IOPARM_DT_HAS_FORMAT) && dtp->format)
250     {
251       free (dtp->format);
252       dtp->format = NULL;
253     }
254 }
255 
256 
257 /* free_format_data()-- Free all allocated format data.  */
258 
259 void
260 free_format_data (format_data *fmt)
261 {
262   fnode_array *fa, *fa_next;
263   fnode *fnp;
264 
265   if (fmt == NULL)
266     return;
267 
268   /* Free vlist descriptors in the fnode_array if one was allocated.  */
269   for (fnp = fmt->array.array; fnp < &fmt->array.array[FARRAY_SIZE] &&
270        fnp->format != FMT_NONE; fnp++)
271     if (fnp->format == FMT_DT)
272 	{
273 	  if (GFC_DESCRIPTOR_DATA(fnp->u.udf.vlist))
274 	    free (GFC_DESCRIPTOR_DATA(fnp->u.udf.vlist));
275 	  free (fnp->u.udf.vlist);
276 	}
277 
278   for (fa = fmt->array.next; fa; fa = fa_next)
279     {
280       fa_next = fa->next;
281       free (fa);
282     }
283 
284   free (fmt);
285   fmt = NULL;
286 }
287 
288 
289 /* format_lex()-- Simple lexical analyzer for getting the next token
290    in a FORMAT string.  We support a one-level token pushback in the
291    fmt->saved_token variable. */
292 
293 static format_token
294 format_lex (format_data *fmt)
295 {
296   format_token token;
297   int negative_flag;
298   int c;
299   char delim;
300 
301   if (fmt->saved_token != FMT_NONE)
302     {
303       token = fmt->saved_token;
304       fmt->saved_token = FMT_NONE;
305       return token;
306     }
307 
308   negative_flag = 0;
309   c = next_char (fmt, 0);
310 
311   switch (c)
312     {
313     case '*':
314        token = FMT_STAR;
315        break;
316 
317     case '(':
318       token = FMT_LPAREN;
319       break;
320 
321     case ')':
322       token = FMT_RPAREN;
323       break;
324 
325     case '-':
326       negative_flag = 1;
327       /* Fall Through */
328 
329     case '+':
330       c = next_char (fmt, 0);
331       if (!isdigit (c))
332 	{
333 	  token = FMT_UNKNOWN;
334 	  break;
335 	}
336 
337       fmt->value = c - '0';
338 
339       for (;;)
340 	{
341 	  c = next_char (fmt, 0);
342 	  if (!isdigit (c))
343 	    break;
344 
345 	  fmt->value = 10 * fmt->value + c - '0';
346 	}
347 
348       unget_char (fmt);
349 
350       if (negative_flag)
351 	fmt->value = -fmt->value;
352       token = FMT_SIGNED_INT;
353       break;
354 
355     case '0':
356     case '1':
357     case '2':
358     case '3':
359     case '4':
360     case '5':
361     case '6':
362     case '7':
363     case '8':
364     case '9':
365       fmt->value = c - '0';
366 
367       for (;;)
368 	{
369 	  c = next_char (fmt, 0);
370 	  if (!isdigit (c))
371 	    break;
372 
373 	  fmt->value = 10 * fmt->value + c - '0';
374 	}
375 
376       unget_char (fmt);
377       token = (fmt->value == 0) ? FMT_ZERO : FMT_POSINT;
378       break;
379 
380     case '.':
381       token = FMT_PERIOD;
382       break;
383 
384     case ',':
385       token = FMT_COMMA;
386       break;
387 
388     case ':':
389       token = FMT_COLON;
390       break;
391 
392     case '/':
393       token = FMT_SLASH;
394       break;
395 
396     case '$':
397       token = FMT_DOLLAR;
398       break;
399 
400     case 'T':
401       switch (next_char (fmt, 0))
402 	{
403 	case 'L':
404 	  token = FMT_TL;
405 	  break;
406 	case 'R':
407 	  token = FMT_TR;
408 	  break;
409 	default:
410 	  token = FMT_T;
411 	  unget_char (fmt);
412 	  break;
413 	}
414 
415       break;
416 
417     case 'X':
418       token = FMT_X;
419       break;
420 
421     case 'S':
422       switch (next_char (fmt, 0))
423 	{
424 	case 'S':
425 	  token = FMT_SS;
426 	  break;
427 	case 'P':
428 	  token = FMT_SP;
429 	  break;
430 	default:
431 	  token = FMT_S;
432 	  unget_char (fmt);
433 	  break;
434 	}
435 
436       break;
437 
438     case 'B':
439       switch (next_char (fmt, 0))
440 	{
441 	case 'N':
442 	  token = FMT_BN;
443 	  break;
444 	case 'Z':
445 	  token = FMT_BZ;
446 	  break;
447 	default:
448 	  token = FMT_B;
449 	  unget_char (fmt);
450 	  break;
451 	}
452 
453       break;
454 
455     case '\'':
456     case '"':
457       delim = c;
458 
459       fmt->string = fmt->format_string;
460       fmt->value = 0;		/* This is the length of the string */
461 
462       for (;;)
463 	{
464 	  c = next_char (fmt, 1);
465 	  if (c == -1)
466 	    {
467 	      token = FMT_BADSTRING;
468 	      fmt->error = bad_string;
469 	      break;
470 	    }
471 
472 	  if (c == delim)
473 	    {
474 	      c = next_char (fmt, 1);
475 
476 	      if (c == -1)
477 		{
478 		  token = FMT_BADSTRING;
479 		  fmt->error = bad_string;
480 		  break;
481 		}
482 
483 	      if (c != delim)
484 		{
485 		  unget_char (fmt);
486 		  token = FMT_STRING;
487 		  break;
488 		}
489 	    }
490 
491 	  fmt->value++;
492 	}
493 
494       break;
495 
496     case 'P':
497       token = FMT_P;
498       break;
499 
500     case 'I':
501       token = FMT_I;
502       break;
503 
504     case 'O':
505       token = FMT_O;
506       break;
507 
508     case 'Z':
509       token = FMT_Z;
510       break;
511 
512     case 'F':
513       token = FMT_F;
514       break;
515 
516     case 'E':
517       switch (next_char (fmt, 0))
518 	{
519 	case 'N':
520 	  token = FMT_EN;
521 	  break;
522 	case 'S':
523 	  token = FMT_ES;
524 	  break;
525 	default:
526 	  token = FMT_E;
527 	  unget_char (fmt);
528 	  break;
529 	}
530       break;
531 
532     case 'G':
533       token = FMT_G;
534       break;
535 
536     case 'H':
537       token = FMT_H;
538       break;
539 
540     case 'L':
541       token = FMT_L;
542       break;
543 
544     case 'A':
545       token = FMT_A;
546       break;
547 
548     case 'D':
549       switch (next_char (fmt, 0))
550 	{
551 	case 'P':
552 	  token = FMT_DP;
553 	  break;
554 	case 'C':
555 	  token = FMT_DC;
556 	  break;
557 	case 'T':
558 	  token = FMT_DT;
559 	  break;
560 	default:
561 	  token = FMT_D;
562 	  unget_char (fmt);
563 	  break;
564 	}
565       break;
566 
567     case 'R':
568       switch (next_char (fmt, 0))
569 	{
570 	case 'C':
571 	  token = FMT_RC;
572 	  break;
573 	case 'D':
574 	  token = FMT_RD;
575 	  break;
576 	case 'N':
577 	  token = FMT_RN;
578 	  break;
579 	case 'P':
580 	  token = FMT_RP;
581 	  break;
582 	case 'U':
583 	  token = FMT_RU;
584 	  break;
585 	case 'Z':
586 	  token = FMT_RZ;
587 	  break;
588 	default:
589 	  unget_char (fmt);
590 	  token = FMT_UNKNOWN;
591 	  break;
592 	}
593       break;
594 
595     case -1:
596       token = FMT_END;
597       break;
598 
599     default:
600       token = FMT_UNKNOWN;
601       break;
602     }
603 
604   return token;
605 }
606 
607 
608 /* parse_format_list()-- Parse a format list.  Assumes that a left
609    paren has already been seen.  Returns a list representing the
610    parenthesis node which contains the rest of the list. */
611 
612 static fnode *
613 parse_format_list (st_parameter_dt *dtp, bool *seen_dd)
614 {
615   fnode *head, *tail;
616   format_token t, u, t2;
617   int repeat;
618   format_data *fmt = dtp->u.p.fmt;
619   bool seen_data_desc = false;
620 
621   head = tail = NULL;
622 
623   /* Get the next format item */
624  format_item:
625   t = format_lex (fmt);
626  format_item_1:
627   switch (t)
628     {
629     case FMT_STAR:
630       t = format_lex (fmt);
631       if (t != FMT_LPAREN)
632 	{
633 	  fmt->error = "Left parenthesis required after '*'";
634 	  goto finished;
635 	}
636       get_fnode (fmt, &head, &tail, FMT_LPAREN);
637       tail->repeat = -2;  /* Signifies unlimited format.  */
638       tail->u.child = parse_format_list (dtp, &seen_data_desc);
639       *seen_dd = seen_data_desc;
640       if (fmt->error != NULL)
641 	goto finished;
642       if (!seen_data_desc)
643 	{
644 	  fmt->error = "'*' requires at least one associated data descriptor";
645 	  goto finished;
646 	}
647       goto between_desc;
648 
649     case FMT_POSINT:
650       repeat = fmt->value;
651 
652       t = format_lex (fmt);
653       switch (t)
654 	{
655 	case FMT_LPAREN:
656 	  get_fnode (fmt, &head, &tail, FMT_LPAREN);
657 	  tail->repeat = repeat;
658 	  tail->u.child = parse_format_list (dtp, &seen_data_desc);
659 	  *seen_dd = seen_data_desc;
660 	  if (fmt->error != NULL)
661 	    goto finished;
662 
663 	  goto between_desc;
664 
665 	case FMT_SLASH:
666 	  get_fnode (fmt, &head, &tail, FMT_SLASH);
667 	  tail->repeat = repeat;
668 	  goto optional_comma;
669 
670 	case FMT_X:
671 	  get_fnode (fmt, &head, &tail, FMT_X);
672 	  tail->repeat = 1;
673 	  tail->u.k = fmt->value;
674 	  goto between_desc;
675 
676 	case FMT_P:
677 	  goto p_descriptor;
678 
679 	default:
680 	  goto data_desc;
681 	}
682 
683     case FMT_LPAREN:
684       get_fnode (fmt, &head, &tail, FMT_LPAREN);
685       tail->repeat = 1;
686       tail->u.child = parse_format_list (dtp, &seen_data_desc);
687       *seen_dd = seen_data_desc;
688       if (fmt->error != NULL)
689 	goto finished;
690 
691       goto between_desc;
692 
693     case FMT_SIGNED_INT:	/* Signed integer can only precede a P format.  */
694     case FMT_ZERO:		/* Same for zero.  */
695       t = format_lex (fmt);
696       if (t != FMT_P)
697 	{
698 	  fmt->error = "Expected P edit descriptor in format";
699 	  goto finished;
700 	}
701 
702     p_descriptor:
703       get_fnode (fmt, &head, &tail, FMT_P);
704       tail->u.k = fmt->value;
705       tail->repeat = 1;
706 
707       t = format_lex (fmt);
708       if (t == FMT_F || t == FMT_EN || t == FMT_ES || t == FMT_D
709 	  || t == FMT_G || t == FMT_E)
710 	{
711 	  repeat = 1;
712 	  goto data_desc;
713 	}
714 
715       if (t != FMT_COMMA && t != FMT_RPAREN && t != FMT_SLASH
716 	  && t != FMT_POSINT)
717 	{
718 	  fmt->error = "Comma required after P descriptor";
719 	  goto finished;
720 	}
721 
722       fmt->saved_token = t;
723       goto optional_comma;
724 
725     case FMT_P:		/* P and X require a prior number */
726       fmt->error = "P descriptor requires leading scale factor";
727       goto finished;
728 
729     case FMT_X:
730 /*
731    EXTENSION!
732 
733    If we would be pedantic in the library, we would have to reject
734    an X descriptor without an integer prefix:
735 
736       fmt->error = "X descriptor requires leading space count";
737       goto finished;
738 
739    However, this is an extension supported by many Fortran compilers,
740    including Cray, HP, AIX, and IRIX.  Therefore, we allow it in the
741    runtime library, and make the front end reject it if the compiler
742    is in pedantic mode.  The interpretation of 'X' is '1X'.
743 */
744       get_fnode (fmt, &head, &tail, FMT_X);
745       tail->repeat = 1;
746       tail->u.k = 1;
747       goto between_desc;
748 
749     case FMT_STRING:
750       get_fnode (fmt, &head, &tail, FMT_STRING);
751       tail->u.string.p = fmt->string;
752       tail->u.string.length = fmt->value;
753       tail->repeat = 1;
754       goto optional_comma;
755 
756     case FMT_RC:
757     case FMT_RD:
758     case FMT_RN:
759     case FMT_RP:
760     case FMT_RU:
761     case FMT_RZ:
762       notify_std (&dtp->common, GFC_STD_F2003, "Fortran 2003: Round "
763 		  "descriptor not allowed");
764       get_fnode (fmt, &head, &tail, t);
765       tail->repeat = 1;
766       goto between_desc;
767 
768     case FMT_DC:
769     case FMT_DP:
770       notify_std (&dtp->common, GFC_STD_F2003, "Fortran 2003: DC or DP "
771 		  "descriptor not allowed");
772     /* Fall through.  */
773     case FMT_S:
774     case FMT_SS:
775     case FMT_SP:
776     case FMT_BN:
777     case FMT_BZ:
778       get_fnode (fmt, &head, &tail, t);
779       tail->repeat = 1;
780       goto between_desc;
781 
782     case FMT_COLON:
783       get_fnode (fmt, &head, &tail, FMT_COLON);
784       tail->repeat = 1;
785       goto optional_comma;
786 
787     case FMT_SLASH:
788       get_fnode (fmt, &head, &tail, FMT_SLASH);
789       tail->repeat = 1;
790       tail->u.r = 1;
791       goto optional_comma;
792 
793     case FMT_DOLLAR:
794       get_fnode (fmt, &head, &tail, FMT_DOLLAR);
795       tail->repeat = 1;
796       notify_std (&dtp->common, GFC_STD_GNU, "Extension: $ descriptor");
797       goto between_desc;
798 
799     case FMT_T:
800     case FMT_TL:
801     case FMT_TR:
802       t2 = format_lex (fmt);
803       if (t2 != FMT_POSINT)
804 	{
805 	  fmt->error = posint_required;
806 	  goto finished;
807 	}
808       get_fnode (fmt, &head, &tail, t);
809       tail->u.n = fmt->value;
810       tail->repeat = 1;
811       goto between_desc;
812 
813     case FMT_I:
814     case FMT_B:
815     case FMT_O:
816     case FMT_Z:
817     case FMT_E:
818     case FMT_EN:
819     case FMT_ES:
820     case FMT_D:
821     case FMT_DT:
822     case FMT_L:
823     case FMT_A:
824     case FMT_F:
825     case FMT_G:
826       repeat = 1;
827       *seen_dd = true;
828       goto data_desc;
829 
830     case FMT_H:
831       get_fnode (fmt, &head, &tail, FMT_STRING);
832       if (fmt->format_string_len < 1)
833 	{
834 	  fmt->error = bad_hollerith;
835 	  goto finished;
836 	}
837 
838       tail->u.string.p = fmt->format_string;
839       tail->u.string.length = 1;
840       tail->repeat = 1;
841 
842       fmt->format_string++;
843       fmt->format_string_len--;
844 
845       goto between_desc;
846 
847     case FMT_END:
848       fmt->error = unexpected_end;
849       goto finished;
850 
851     case FMT_BADSTRING:
852       goto finished;
853 
854     case FMT_RPAREN:
855       goto finished;
856 
857     default:
858       fmt->error = unexpected_element;
859       goto finished;
860     }
861 
862   /* In this state, t must currently be a data descriptor.  Deal with
863      things that can/must follow the descriptor */
864  data_desc:
865 
866   switch (t)
867     {
868     case FMT_L:
869       *seen_dd = true;
870       t = format_lex (fmt);
871       if (t != FMT_POSINT)
872 	{
873 	  if (t == FMT_ZERO)
874 	    {
875 	      if (notification_std(GFC_STD_GNU) == NOTIFICATION_ERROR)
876 		{
877 		  fmt->error = "Extension: Zero width after L descriptor";
878 		  goto finished;
879 		}
880 	      else
881 		notify_std (&dtp->common, GFC_STD_GNU,
882 			    "Zero width after L descriptor");
883 	    }
884 	  else
885 	    {
886 	      fmt->saved_token = t;
887 	      notify_std (&dtp->common, GFC_STD_GNU,
888 			  "Positive width required with L descriptor");
889 	    }
890 	  fmt->value = 1;	/* Default width */
891 	}
892       get_fnode (fmt, &head, &tail, FMT_L);
893       tail->u.n = fmt->value;
894       tail->repeat = repeat;
895       break;
896 
897     case FMT_A:
898       *seen_dd = true;
899       t = format_lex (fmt);
900       if (t == FMT_ZERO)
901 	{
902 	  fmt->error = zero_width;
903 	  goto finished;
904 	}
905 
906       if (t != FMT_POSINT)
907 	{
908 	  fmt->saved_token = t;
909 	  fmt->value = -1;		/* Width not present */
910 	}
911 
912       get_fnode (fmt, &head, &tail, FMT_A);
913       tail->repeat = repeat;
914       tail->u.n = fmt->value;
915       break;
916 
917     case FMT_D:
918     case FMT_E:
919     case FMT_F:
920     case FMT_G:
921     case FMT_EN:
922     case FMT_ES:
923       *seen_dd = true;
924       get_fnode (fmt, &head, &tail, t);
925       tail->repeat = repeat;
926 
927       u = format_lex (fmt);
928       if (t == FMT_G && u == FMT_ZERO)
929 	{
930 	  *seen_dd = true;
931 	  if (notification_std (GFC_STD_F2008) == NOTIFICATION_ERROR
932 	      || dtp->u.p.mode == READING)
933 	    {
934 	      fmt->error = zero_width;
935 	      goto finished;
936 	    }
937 	  tail->u.real.w = 0;
938 	  u = format_lex (fmt);
939 	  if (u != FMT_PERIOD)
940 	    {
941 	      fmt->saved_token = u;
942 	      break;
943 	    }
944 
945 	  u = format_lex (fmt);
946 	  if (u != FMT_POSINT)
947 	    {
948 	      fmt->error = posint_required;
949 	      goto finished;
950 	    }
951 	  tail->u.real.d = fmt->value;
952 	  break;
953 	}
954       if (t == FMT_F && dtp->u.p.mode == WRITING)
955 	{
956 	  *seen_dd = true;
957 	  if (u != FMT_POSINT && u != FMT_ZERO)
958 	    {
959 	      fmt->error = nonneg_required;
960 	      goto finished;
961 	    }
962 	}
963       else if (u != FMT_POSINT)
964 	{
965 	  fmt->error = posint_required;
966 	  goto finished;
967 	}
968 
969       tail->u.real.w = fmt->value;
970       t2 = t;
971       t = format_lex (fmt);
972       if (t != FMT_PERIOD)
973 	{
974 	  /* We treat a missing decimal descriptor as 0.  Note: This is only
975 	     allowed if -std=legacy, otherwise an error occurs.  */
976 	  if (compile_options.warn_std != 0)
977 	    {
978 	      fmt->error = period_required;
979 	      goto finished;
980 	    }
981 	  fmt->saved_token = t;
982 	  tail->u.real.d = 0;
983 	  tail->u.real.e = -1;
984 	  break;
985 	}
986 
987       t = format_lex (fmt);
988       if (t != FMT_ZERO && t != FMT_POSINT)
989 	{
990 	  fmt->error = nonneg_required;
991 	  goto finished;
992 	}
993 
994       tail->u.real.d = fmt->value;
995       tail->u.real.e = -1;
996 
997       if (t2 == FMT_D || t2 == FMT_F)
998 	{
999 	  *seen_dd = true;
1000 	  break;
1001 	}
1002 
1003       /* Look for optional exponent */
1004       t = format_lex (fmt);
1005       if (t != FMT_E)
1006 	fmt->saved_token = t;
1007       else
1008 	{
1009 	  t = format_lex (fmt);
1010 	  if (t != FMT_POSINT)
1011 	    {
1012 	      fmt->error = "Positive exponent width required in format";
1013 	      goto finished;
1014 	    }
1015 
1016 	  tail->u.real.e = fmt->value;
1017 	}
1018 
1019       break;
1020     case FMT_DT:
1021       *seen_dd = true;
1022       get_fnode (fmt, &head, &tail, t);
1023       tail->repeat = repeat;
1024 
1025       t = format_lex (fmt);
1026 
1027       /* Initialize the vlist to a zero size, rank-one array.  */
1028       tail->u.udf.vlist= xmalloc (sizeof(gfc_array_i4)
1029 				  + sizeof (descriptor_dimension));
1030       GFC_DESCRIPTOR_DATA(tail->u.udf.vlist) = NULL;
1031       GFC_DIMENSION_SET(tail->u.udf.vlist->dim[0],1, 0, 0);
1032 
1033       if (t == FMT_STRING)
1034         {
1035 	  /* Get pointer to the optional format string.  */
1036 	  tail->u.udf.string = fmt->string;
1037 	  tail->u.udf.string_len = fmt->value;
1038 	  t = format_lex (fmt);
1039 	}
1040       if (t == FMT_LPAREN)
1041         {
1042 	  /* Temporary buffer to hold the vlist values.  */
1043 	  GFC_INTEGER_4 temp[FARRAY_SIZE];
1044 	  int i = 0;
1045 	loop:
1046 	  t = format_lex (fmt);
1047 	  if (t != FMT_POSINT)
1048 	    {
1049 	      fmt->error = posint_required;
1050 	      goto finished;
1051 	    }
1052 	  /* Save the positive integer value.  */
1053 	  temp[i++] = fmt->value;
1054 	  t = format_lex (fmt);
1055 	  if (t == FMT_COMMA)
1056 	    goto loop;
1057 	  if (t == FMT_RPAREN)
1058 	    {
1059 	      /* We have parsed the complete vlist so initialize the
1060 	         array descriptor and save it in the format node.  */
1061 	      gfc_full_array_i4 *vp = tail->u.udf.vlist;
1062 	      GFC_DESCRIPTOR_DATA(vp) = xmalloc (i * sizeof(GFC_INTEGER_4));
1063 	      GFC_DIMENSION_SET(vp->dim[0],1, i, 1);
1064 	      memcpy (GFC_DESCRIPTOR_DATA(vp), temp, i * sizeof(GFC_INTEGER_4));
1065 	      break;
1066 	    }
1067 	  fmt->error = unexpected_element;
1068 	  goto finished;
1069 	}
1070       fmt->saved_token = t;
1071       break;
1072     case FMT_H:
1073       if (repeat > fmt->format_string_len)
1074 	{
1075 	  fmt->error = bad_hollerith;
1076 	  goto finished;
1077 	}
1078 
1079       get_fnode (fmt, &head, &tail, FMT_STRING);
1080       tail->u.string.p = fmt->format_string;
1081       tail->u.string.length = repeat;
1082       tail->repeat = 1;
1083 
1084       fmt->format_string += fmt->value;
1085       fmt->format_string_len -= repeat;
1086 
1087       break;
1088 
1089     case FMT_I:
1090     case FMT_B:
1091     case FMT_O:
1092     case FMT_Z:
1093       *seen_dd = true;
1094       get_fnode (fmt, &head, &tail, t);
1095       tail->repeat = repeat;
1096 
1097       t = format_lex (fmt);
1098 
1099       if (dtp->u.p.mode == READING)
1100 	{
1101 	  if (t != FMT_POSINT)
1102 	    {
1103 	      fmt->error = posint_required;
1104 	      goto finished;
1105 	    }
1106 	}
1107       else
1108 	{
1109 	  if (t != FMT_ZERO && t != FMT_POSINT)
1110 	    {
1111 	      fmt->error = nonneg_required;
1112 	      goto finished;
1113 	    }
1114 	}
1115 
1116       tail->u.integer.w = fmt->value;
1117       tail->u.integer.m = -1;
1118 
1119       t = format_lex (fmt);
1120       if (t != FMT_PERIOD)
1121 	{
1122 	  fmt->saved_token = t;
1123 	}
1124       else
1125 	{
1126 	  t = format_lex (fmt);
1127 	  if (t != FMT_ZERO && t != FMT_POSINT)
1128 	    {
1129 	      fmt->error = nonneg_required;
1130 	      goto finished;
1131 	    }
1132 
1133 	  tail->u.integer.m = fmt->value;
1134 	}
1135 
1136       if (tail->u.integer.w != 0 && tail->u.integer.m > tail->u.integer.w)
1137 	{
1138 	  fmt->error = "Minimum digits exceeds field width";
1139 	  goto finished;
1140 	}
1141 
1142       break;
1143 
1144     default:
1145       fmt->error = unexpected_element;
1146       goto finished;
1147     }
1148 
1149   /* Between a descriptor and what comes next */
1150  between_desc:
1151   t = format_lex (fmt);
1152   switch (t)
1153     {
1154     case FMT_COMMA:
1155       goto format_item;
1156 
1157     case FMT_RPAREN:
1158       goto finished;
1159 
1160     case FMT_SLASH:
1161     case FMT_COLON:
1162       get_fnode (fmt, &head, &tail, t);
1163       tail->repeat = 1;
1164       goto optional_comma;
1165 
1166     case FMT_END:
1167       fmt->error = unexpected_end;
1168       goto finished;
1169 
1170     default:
1171       /* Assume a missing comma, this is a GNU extension */
1172       goto format_item_1;
1173     }
1174 
1175   /* Optional comma is a weird between state where we've just finished
1176      reading a colon, slash or P descriptor. */
1177  optional_comma:
1178   t = format_lex (fmt);
1179   switch (t)
1180     {
1181     case FMT_COMMA:
1182       break;
1183 
1184     case FMT_RPAREN:
1185       goto finished;
1186 
1187     default:			/* Assume that we have another format item */
1188       fmt->saved_token = t;
1189       break;
1190     }
1191 
1192   goto format_item;
1193 
1194  finished:
1195 
1196   return head;
1197 }
1198 
1199 
1200 /* format_error()-- Generate an error message for a format statement.
1201    If the node that gives the location of the error is NULL, the error
1202    is assumed to happen at parse time, and the current location of the
1203    parser is shown.
1204 
1205    We generate a message showing where the problem is.  We take extra
1206    care to print only the relevant part of the format if it is longer
1207    than a standard 80 column display. */
1208 
1209 void
1210 format_error (st_parameter_dt *dtp, const fnode *f, const char *message)
1211 {
1212   int width, i, offset;
1213 #define BUFLEN 300
1214   char *p, buffer[BUFLEN];
1215   format_data *fmt = dtp->u.p.fmt;
1216 
1217   if (f != NULL)
1218     p = f->source;
1219   else                /* This should not happen.  */
1220     p = dtp->format;
1221 
1222   if (message == unexpected_element)
1223     snprintf (buffer, BUFLEN, message, fmt->error_element);
1224   else
1225     snprintf (buffer, BUFLEN, "%s\n", message);
1226 
1227   /* Get the offset into the format string where the error occurred.  */
1228   offset = dtp->format_len - (fmt->reversion_ok ?
1229 			      (int) strlen(p) : fmt->format_string_len);
1230 
1231   width = dtp->format_len;
1232 
1233   if (width > 80)
1234     width = 80;
1235 
1236   /* Show the format */
1237 
1238   p = strchr (buffer, '\0');
1239 
1240   if (dtp->format)
1241     memcpy (p, dtp->format, width);
1242 
1243   p += width;
1244   *p++ = '\n';
1245 
1246   /* Show where the problem is */
1247 
1248   for (i = 1; i < offset; i++)
1249     *p++ = ' ';
1250 
1251   *p++ = '^';
1252   *p = '\0';
1253 
1254   generate_error (&dtp->common, LIBERROR_FORMAT, buffer);
1255 }
1256 
1257 
1258 /* revert()-- Do reversion of the format.  Control reverts to the left
1259    parenthesis that matches the rightmost right parenthesis.  From our
1260    tree structure, we are looking for the rightmost parenthesis node
1261    at the second level, the first level always being a single
1262    parenthesis node.  If this node doesn't exit, we use the top
1263    level. */
1264 
1265 static void
1266 revert (st_parameter_dt *dtp)
1267 {
1268   fnode *f, *r;
1269   format_data *fmt = dtp->u.p.fmt;
1270 
1271   dtp->u.p.reversion_flag = 1;
1272 
1273   r = NULL;
1274 
1275   for (f = fmt->array.array[0].u.child; f; f = f->next)
1276     if (f->format == FMT_LPAREN)
1277       r = f;
1278 
1279   /* If r is NULL because no node was found, the whole tree will be used */
1280 
1281   fmt->array.array[0].current = r;
1282   fmt->array.array[0].count = 0;
1283 }
1284 
1285 /* parse_format()-- Parse a format string.  */
1286 
1287 void
1288 parse_format (st_parameter_dt *dtp)
1289 {
1290   format_data *fmt;
1291   bool format_cache_ok, seen_data_desc = false;
1292 
1293   /* Don't cache for internal units and set an arbitrary limit on the
1294      size of format strings we will cache.  (Avoids memory issues.)
1295      Also, the format_hash_table resides in the current_unit, so
1296      child_dtio procedures would overwrite the parent table  */
1297   format_cache_ok = !is_internal_unit (dtp)
1298 		    && (dtp->u.p.current_unit->child_dtio == 0);
1299 
1300   /* Lookup format string to see if it has already been parsed.  */
1301   if (format_cache_ok)
1302     {
1303       dtp->u.p.fmt = find_parsed_format (dtp);
1304 
1305       if (dtp->u.p.fmt != NULL)
1306 	{
1307 	  dtp->u.p.fmt->reversion_ok = 0;
1308 	  dtp->u.p.fmt->saved_token = FMT_NONE;
1309 	  dtp->u.p.fmt->saved_format = NULL;
1310 	  reset_fnode_counters (dtp);
1311 	  return;
1312 	}
1313     }
1314 
1315   /* Not found so proceed as follows.  */
1316 
1317   char *fmt_string = fc_strdup_notrim (dtp->format, dtp->format_len);
1318   dtp->format = fmt_string;
1319 
1320   dtp->u.p.fmt = fmt = xmalloc (sizeof (format_data));
1321   fmt->format_string = dtp->format;
1322   fmt->format_string_len = dtp->format_len;
1323 
1324   fmt->string = NULL;
1325   fmt->saved_token = FMT_NONE;
1326   fmt->error = NULL;
1327   fmt->value = 0;
1328 
1329   /* Initialize variables used during traversal of the tree.  */
1330 
1331   fmt->reversion_ok = 0;
1332   fmt->saved_format = NULL;
1333 
1334   /* Initialize the fnode_array.  */
1335 
1336   memset (&(fmt->array), 0, sizeof(fmt->array));
1337 
1338   /* Allocate the first format node as the root of the tree.  */
1339 
1340   fmt->last = &fmt->array;
1341   fmt->last->next = NULL;
1342   fmt->avail = &fmt->array.array[0];
1343 
1344   memset (fmt->avail, 0, sizeof (*fmt->avail));
1345   fmt->avail->format = FMT_LPAREN;
1346   fmt->avail->repeat = 1;
1347   fmt->avail++;
1348 
1349   if (format_lex (fmt) == FMT_LPAREN)
1350     fmt->array.array[0].u.child = parse_format_list (dtp, &seen_data_desc);
1351   else
1352     fmt->error = "Missing initial left parenthesis in format";
1353 
1354   if (format_cache_ok)
1355     save_parsed_format (dtp);
1356   else
1357     dtp->u.p.format_not_saved = 1;
1358 
1359   if (fmt->error)
1360     format_error (dtp, NULL, fmt->error);
1361 }
1362 
1363 
1364 /* next_format0()-- Get the next format node without worrying about
1365    reversion.  Returns NULL when we hit the end of the list.
1366    Parenthesis nodes are incremented after the list has been
1367    exhausted, other nodes are incremented before they are returned. */
1368 
1369 static const fnode *
1370 next_format0 (fnode *f)
1371 {
1372   const fnode *r;
1373 
1374   if (f == NULL)
1375     return NULL;
1376 
1377   if (f->format != FMT_LPAREN)
1378     {
1379       f->count++;
1380       if (f->count <= f->repeat)
1381 	return f;
1382 
1383       f->count = 0;
1384       return NULL;
1385     }
1386 
1387   /* Deal with a parenthesis node with unlimited format.  */
1388 
1389   if (f->repeat == -2)  /* -2 signifies unlimited.  */
1390   for (;;)
1391     {
1392       if (f->current == NULL)
1393 	f->current = f->u.child;
1394 
1395       for (; f->current != NULL; f->current = f->current->next)
1396 	{
1397 	  r = next_format0 (f->current);
1398 	  if (r != NULL)
1399 	    return r;
1400 	}
1401     }
1402 
1403   /* Deal with a parenthesis node with specific repeat count.  */
1404   for (; f->count < f->repeat; f->count++)
1405     {
1406       if (f->current == NULL)
1407 	f->current = f->u.child;
1408 
1409       for (; f->current != NULL; f->current = f->current->next)
1410 	{
1411 	  r = next_format0 (f->current);
1412 	  if (r != NULL)
1413 	    return r;
1414 	}
1415     }
1416 
1417   f->count = 0;
1418   return NULL;
1419 }
1420 
1421 
1422 /* next_format()-- Return the next format node.  If the format list
1423    ends up being exhausted, we do reversion.  Reversion is only
1424    allowed if we've seen a data descriptor since the
1425    initialization or the last reversion.  We return NULL if there
1426    are no more data descriptors to return (which is an error
1427    condition).  */
1428 
1429 const fnode *
1430 next_format (st_parameter_dt *dtp)
1431 {
1432   format_token t;
1433   const fnode *f;
1434   format_data *fmt = dtp->u.p.fmt;
1435 
1436   if (fmt->saved_format != NULL)
1437     {				/* Deal with a pushed-back format node */
1438       f = fmt->saved_format;
1439       fmt->saved_format = NULL;
1440       goto done;
1441     }
1442 
1443   f = next_format0 (&fmt->array.array[0]);
1444   if (f == NULL)
1445     {
1446       if (!fmt->reversion_ok)
1447 	return NULL;
1448 
1449       fmt->reversion_ok = 0;
1450       revert (dtp);
1451 
1452       f = next_format0 (&fmt->array.array[0]);
1453       if (f == NULL)
1454 	{
1455 	  format_error (dtp, NULL, reversion_error);
1456 	  return NULL;
1457 	}
1458 
1459       /* Push the first reverted token and return a colon node in case
1460 	 there are no more data items.  */
1461 
1462       fmt->saved_format = f;
1463       return &colon_node;
1464     }
1465 
1466   /* If this is a data edit descriptor, then reversion has become OK. */
1467  done:
1468   t = f->format;
1469 
1470   if (!fmt->reversion_ok &&
1471       (t == FMT_I || t == FMT_B || t == FMT_O || t == FMT_Z || t == FMT_F ||
1472        t == FMT_E || t == FMT_EN || t == FMT_ES || t == FMT_G || t == FMT_L ||
1473        t == FMT_A || t == FMT_D || t == FMT_DT))
1474     fmt->reversion_ok = 1;
1475   return f;
1476 }
1477 
1478 
1479 /* unget_format()-- Push the given format back so that it will be
1480    returned on the next call to next_format() without affecting
1481    counts.  This is necessary when we've encountered a data
1482    descriptor, but don't know what the data item is yet.  The format
1483    node is pushed back, and we return control to the main program,
1484    which calls the library back with the data item (or not). */
1485 
1486 void
1487 unget_format (st_parameter_dt *dtp, const fnode *f)
1488 {
1489   dtp->u.p.fmt->saved_format = f;
1490 }
1491 
1492