xref: /netbsd-src/external/gpl3/gcc.old/dist/libgfortran/io/format.c (revision 4c3eb207d36f67d31994830c0a694161fc1ca39b)
1 /* Copyright (C) 2002-2020 Free Software Foundation, Inc.
2    Contributed by Andy Vaught
3    F2003 I/O support contributed by Jerry DeLisle
4 
5 This file is part of the GNU Fortran runtime library (libgfortran).
6 
7 Libgfortran is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 Libgfortran is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20 
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24 <http://www.gnu.org/licenses/>.  */
25 
26 
27 /* format.c-- parse a FORMAT string into a binary format suitable for
28    interpretation during I/O statements.  */
29 
30 #include "io.h"
31 #include "format.h"
32 #include <ctype.h>
33 #include <string.h>
34 
35 
36 static const fnode colon_node = { FMT_COLON, 0, NULL, NULL, {{ 0, 0, 0 }}, 0,
37 				  NULL };
38 
39 /* Error messages. */
40 
41 static const char posint_required[] = "Positive integer required in format",
42   period_required[] = "Period required in format",
43   nonneg_required[] = "Nonnegative width required in format",
44   unexpected_element[] = "Unexpected element '%c' in format\n",
45   unexpected_end[] = "Unexpected end of format string",
46   bad_string[] = "Unterminated character constant in format",
47   bad_hollerith[] = "Hollerith constant extends past the end of the format",
48   reversion_error[] = "Exhausted data descriptors in format",
49   zero_width[] = "Zero width in format descriptor";
50 
51 /* The following routines support caching format data from parsed format strings
52    into a hash table.  This avoids repeatedly parsing duplicate format strings
53    or format strings in I/O statements that are repeated in loops.  */
54 
55 
56 /* Traverse the table and free all data.  */
57 
58 void
free_format_hash_table(gfc_unit * u)59 free_format_hash_table (gfc_unit *u)
60 {
61   size_t i;
62 
63   /* free_format_data handles any NULL pointers.  */
64   for (i = 0; i < FORMAT_HASH_SIZE; i++)
65     {
66       if (u->format_hash_table[i].hashed_fmt != NULL)
67 	{
68 	  free_format_data (u->format_hash_table[i].hashed_fmt);
69 	  free (u->format_hash_table[i].key);
70 	}
71       u->format_hash_table[i].key = NULL;
72       u->format_hash_table[i].key_len = 0;
73       u->format_hash_table[i].hashed_fmt = NULL;
74     }
75 }
76 
77 /* Traverse the format_data structure and reset the fnode counters.  */
78 
79 static void
reset_node(fnode * fn)80 reset_node (fnode *fn)
81 {
82   fnode *f;
83 
84   fn->count = 0;
85   fn->current = NULL;
86 
87   if (fn->format != FMT_LPAREN)
88     return;
89 
90   for (f = fn->u.child; f; f = f->next)
91     {
92       if (f->format == FMT_RPAREN)
93 	break;
94       reset_node (f);
95     }
96 }
97 
98 static void
reset_fnode_counters(st_parameter_dt * dtp)99 reset_fnode_counters (st_parameter_dt *dtp)
100 {
101   fnode *f;
102   format_data *fmt;
103 
104   fmt = dtp->u.p.fmt;
105 
106   /* Clear this pointer at the head so things start at the right place.  */
107   fmt->array.array[0].current = NULL;
108 
109   for (f = fmt->array.array[0].u.child; f; f = f->next)
110     reset_node (f);
111 }
112 
113 
114 /* A simple hashing function to generate an index into the hash table.  */
115 
116 static uint32_t
format_hash(st_parameter_dt * dtp)117 format_hash (st_parameter_dt *dtp)
118 {
119   char *key;
120   gfc_charlen_type key_len;
121   uint32_t hash = 0;
122   gfc_charlen_type i;
123 
124   /* Hash the format string. Super simple, but what the heck!  */
125   key = dtp->format;
126   key_len = dtp->format_len;
127   for (i = 0; i < key_len; i++)
128     hash ^= key[i];
129   hash &= (FORMAT_HASH_SIZE - 1);
130   return hash;
131 }
132 
133 
134 static void
save_parsed_format(st_parameter_dt * dtp)135 save_parsed_format (st_parameter_dt *dtp)
136 {
137   uint32_t hash;
138   gfc_unit *u;
139 
140   hash = format_hash (dtp);
141   u = dtp->u.p.current_unit;
142 
143   /* Index into the hash table.  We are simply replacing whatever is there
144      relying on probability.  */
145   if (u->format_hash_table[hash].hashed_fmt != NULL)
146     free_format_data (u->format_hash_table[hash].hashed_fmt);
147   u->format_hash_table[hash].hashed_fmt = NULL;
148 
149   free (u->format_hash_table[hash].key);
150   u->format_hash_table[hash].key = dtp->format;
151 
152   u->format_hash_table[hash].key_len = dtp->format_len;
153   u->format_hash_table[hash].hashed_fmt = dtp->u.p.fmt;
154 }
155 
156 
157 static format_data *
find_parsed_format(st_parameter_dt * dtp)158 find_parsed_format (st_parameter_dt *dtp)
159 {
160   uint32_t hash;
161   gfc_unit *u;
162 
163   hash = format_hash (dtp);
164   u = dtp->u.p.current_unit;
165 
166   if (u->format_hash_table[hash].key != NULL)
167     {
168       /* See if it matches.  */
169       if (u->format_hash_table[hash].key_len == dtp->format_len)
170 	{
171 	  /* So far so good.  */
172 	  if (strncmp (u->format_hash_table[hash].key,
173 	      dtp->format, dtp->format_len) == 0)
174 	    return u->format_hash_table[hash].hashed_fmt;
175 	}
176     }
177   return NULL;
178 }
179 
180 
181 /* next_char()-- Return the next character in the format string.
182    Returns -1 when the string is done.  If the literal flag is set,
183    spaces are significant, otherwise they are not. */
184 
185 static int
next_char(format_data * fmt,int literal)186 next_char (format_data *fmt, int literal)
187 {
188   int c;
189 
190   do
191     {
192       if (fmt->format_string_len == 0)
193 	return -1;
194 
195       fmt->format_string_len--;
196       c = toupper (*fmt->format_string++);
197       fmt->error_element = c;
198     }
199   while ((c == ' ' || c == '\t') && !literal);
200 
201   return c;
202 }
203 
204 
205 /* unget_char()-- Back up one character position. */
206 
207 #define unget_char(fmt) \
208   { fmt->format_string--; fmt->format_string_len++; }
209 
210 
211 /* get_fnode()-- Allocate a new format node, inserting it into the
212    current singly linked list.  These are initially allocated from the
213    static buffer. */
214 
215 static fnode *
get_fnode(format_data * fmt,fnode ** head,fnode ** tail,format_token t)216 get_fnode (format_data *fmt, fnode **head, fnode **tail, format_token t)
217 {
218   fnode *f;
219 
220   if (fmt->avail == &fmt->last->array[FARRAY_SIZE])
221     {
222       fmt->last->next = xmalloc (sizeof (fnode_array));
223       fmt->last = fmt->last->next;
224       fmt->last->next = NULL;
225       fmt->avail = &fmt->last->array[0];
226     }
227   f = fmt->avail++;
228   memset (f, '\0', sizeof (fnode));
229 
230   if (*head == NULL)
231     *head = *tail = f;
232   else
233     {
234       (*tail)->next = f;
235       *tail = f;
236     }
237 
238   f->format = t;
239   f->repeat = -1;
240   f->source = fmt->format_string;
241   return f;
242 }
243 
244 
245 /* free_format()-- Free allocated format string.  */
246 void
free_format(st_parameter_dt * dtp)247 free_format (st_parameter_dt *dtp)
248 {
249   if ((dtp->common.flags & IOPARM_DT_HAS_FORMAT) && dtp->format)
250     {
251       free (dtp->format);
252       dtp->format = NULL;
253     }
254 }
255 
256 
257 /* free_format_data()-- Free all allocated format data.  */
258 
259 void
free_format_data(format_data * fmt)260 free_format_data (format_data *fmt)
261 {
262   fnode_array *fa, *fa_next;
263   fnode *fnp;
264 
265   if (fmt == NULL)
266     return;
267 
268   /* Free vlist descriptors in the fnode_array if one was allocated.  */
269   for (fnp = fmt->array.array; fnp < &fmt->array.array[FARRAY_SIZE] &&
270        fnp->format != FMT_NONE; fnp++)
271     if (fnp->format == FMT_DT)
272 	{
273 	  if (GFC_DESCRIPTOR_DATA(fnp->u.udf.vlist))
274 	    free (GFC_DESCRIPTOR_DATA(fnp->u.udf.vlist));
275 	  free (fnp->u.udf.vlist);
276 	}
277 
278   for (fa = fmt->array.next; fa; fa = fa_next)
279     {
280       fa_next = fa->next;
281       free (fa);
282     }
283 
284   free (fmt);
285   fmt = NULL;
286 }
287 
288 
289 /* format_lex()-- Simple lexical analyzer for getting the next token
290    in a FORMAT string.  We support a one-level token pushback in the
291    fmt->saved_token variable. */
292 
293 static format_token
format_lex(format_data * fmt)294 format_lex (format_data *fmt)
295 {
296   format_token token;
297   int negative_flag;
298   int c;
299   char delim;
300 
301   if (fmt->saved_token != FMT_NONE)
302     {
303       token = fmt->saved_token;
304       fmt->saved_token = FMT_NONE;
305       return token;
306     }
307 
308   negative_flag = 0;
309   c = next_char (fmt, 0);
310 
311   switch (c)
312     {
313     case '*':
314        token = FMT_STAR;
315        break;
316 
317     case '(':
318       token = FMT_LPAREN;
319       break;
320 
321     case ')':
322       token = FMT_RPAREN;
323       break;
324 
325     case '-':
326       negative_flag = 1;
327       /* Fall Through */
328 
329     case '+':
330       c = next_char (fmt, 0);
331       if (!isdigit (c))
332 	{
333 	  token = FMT_UNKNOWN;
334 	  break;
335 	}
336 
337       fmt->value = c - '0';
338 
339       for (;;)
340 	{
341 	  c = next_char (fmt, 0);
342 	  if (!isdigit (c))
343 	    break;
344 
345 	  fmt->value = 10 * fmt->value + c - '0';
346 	}
347 
348       unget_char (fmt);
349 
350       if (negative_flag)
351 	fmt->value = -fmt->value;
352       token = FMT_SIGNED_INT;
353       break;
354 
355     case '0':
356     case '1':
357     case '2':
358     case '3':
359     case '4':
360     case '5':
361     case '6':
362     case '7':
363     case '8':
364     case '9':
365       fmt->value = c - '0';
366 
367       for (;;)
368 	{
369 	  c = next_char (fmt, 0);
370 	  if (!isdigit (c))
371 	    break;
372 
373 	  fmt->value = 10 * fmt->value + c - '0';
374 	}
375 
376       unget_char (fmt);
377       token = (fmt->value == 0) ? FMT_ZERO : FMT_POSINT;
378       break;
379 
380     case '.':
381       token = FMT_PERIOD;
382       break;
383 
384     case ',':
385       token = FMT_COMMA;
386       break;
387 
388     case ':':
389       token = FMT_COLON;
390       break;
391 
392     case '/':
393       token = FMT_SLASH;
394       break;
395 
396     case '$':
397       token = FMT_DOLLAR;
398       break;
399 
400     case 'T':
401       switch (next_char (fmt, 0))
402 	{
403 	case 'L':
404 	  token = FMT_TL;
405 	  break;
406 	case 'R':
407 	  token = FMT_TR;
408 	  break;
409 	default:
410 	  token = FMT_T;
411 	  unget_char (fmt);
412 	  break;
413 	}
414 
415       break;
416 
417     case 'X':
418       token = FMT_X;
419       break;
420 
421     case 'S':
422       switch (next_char (fmt, 0))
423 	{
424 	case 'S':
425 	  token = FMT_SS;
426 	  break;
427 	case 'P':
428 	  token = FMT_SP;
429 	  break;
430 	default:
431 	  token = FMT_S;
432 	  unget_char (fmt);
433 	  break;
434 	}
435 
436       break;
437 
438     case 'B':
439       switch (next_char (fmt, 0))
440 	{
441 	case 'N':
442 	  token = FMT_BN;
443 	  break;
444 	case 'Z':
445 	  token = FMT_BZ;
446 	  break;
447 	default:
448 	  token = FMT_B;
449 	  unget_char (fmt);
450 	  break;
451 	}
452 
453       break;
454 
455     case '\'':
456     case '"':
457       delim = c;
458 
459       fmt->string = fmt->format_string;
460       fmt->value = 0;		/* This is the length of the string */
461 
462       for (;;)
463 	{
464 	  c = next_char (fmt, 1);
465 	  if (c == -1)
466 	    {
467 	      token = FMT_BADSTRING;
468 	      fmt->error = bad_string;
469 	      break;
470 	    }
471 
472 	  if (c == delim)
473 	    {
474 	      c = next_char (fmt, 1);
475 
476 	      if (c == -1)
477 		{
478 		  token = FMT_BADSTRING;
479 		  fmt->error = bad_string;
480 		  break;
481 		}
482 
483 	      if (c != delim)
484 		{
485 		  unget_char (fmt);
486 		  token = FMT_STRING;
487 		  break;
488 		}
489 	    }
490 
491 	  fmt->value++;
492 	}
493 
494       break;
495 
496     case 'P':
497       token = FMT_P;
498       break;
499 
500     case 'I':
501       token = FMT_I;
502       break;
503 
504     case 'O':
505       token = FMT_O;
506       break;
507 
508     case 'Z':
509       token = FMT_Z;
510       break;
511 
512     case 'F':
513       token = FMT_F;
514       break;
515 
516     case 'E':
517       switch (next_char (fmt, 0))
518 	{
519 	case 'N':
520 	  token = FMT_EN;
521 	  break;
522 	case 'S':
523 	  token = FMT_ES;
524 	  break;
525 	default:
526 	  token = FMT_E;
527 	  unget_char (fmt);
528 	  break;
529 	}
530       break;
531 
532     case 'G':
533       token = FMT_G;
534       break;
535 
536     case 'H':
537       token = FMT_H;
538       break;
539 
540     case 'L':
541       token = FMT_L;
542       break;
543 
544     case 'A':
545       token = FMT_A;
546       break;
547 
548     case 'D':
549       switch (next_char (fmt, 0))
550 	{
551 	case 'P':
552 	  token = FMT_DP;
553 	  break;
554 	case 'C':
555 	  token = FMT_DC;
556 	  break;
557 	case 'T':
558 	  token = FMT_DT;
559 	  break;
560 	default:
561 	  token = FMT_D;
562 	  unget_char (fmt);
563 	  break;
564 	}
565       break;
566 
567     case 'R':
568       switch (next_char (fmt, 0))
569 	{
570 	case 'C':
571 	  token = FMT_RC;
572 	  break;
573 	case 'D':
574 	  token = FMT_RD;
575 	  break;
576 	case 'N':
577 	  token = FMT_RN;
578 	  break;
579 	case 'P':
580 	  token = FMT_RP;
581 	  break;
582 	case 'U':
583 	  token = FMT_RU;
584 	  break;
585 	case 'Z':
586 	  token = FMT_RZ;
587 	  break;
588 	default:
589 	  unget_char (fmt);
590 	  token = FMT_UNKNOWN;
591 	  break;
592 	}
593       break;
594 
595     case -1:
596       token = FMT_END;
597       break;
598 
599     default:
600       token = FMT_UNKNOWN;
601       break;
602     }
603 
604   return token;
605 }
606 
607 
608 /* parse_format_list()-- Parse a format list.  Assumes that a left
609    paren has already been seen.  Returns a list representing the
610    parenthesis node which contains the rest of the list. */
611 
612 static fnode *
parse_format_list(st_parameter_dt * dtp,bool * seen_dd)613 parse_format_list (st_parameter_dt *dtp, bool *seen_dd)
614 {
615   fnode *head, *tail;
616   format_token t, u, t2;
617   int repeat;
618   format_data *fmt = dtp->u.p.fmt;
619   bool seen_data_desc = false;
620 
621   head = tail = NULL;
622 
623   /* Get the next format item */
624  format_item:
625   t = format_lex (fmt);
626  format_item_1:
627   switch (t)
628     {
629     case FMT_STAR:
630       t = format_lex (fmt);
631       if (t != FMT_LPAREN)
632 	{
633 	  fmt->error = "Left parenthesis required after '*'";
634 	  goto finished;
635 	}
636       get_fnode (fmt, &head, &tail, FMT_LPAREN);
637       tail->repeat = -2;  /* Signifies unlimited format.  */
638       tail->u.child = parse_format_list (dtp, &seen_data_desc);
639       *seen_dd = seen_data_desc;
640       if (fmt->error != NULL)
641 	goto finished;
642       if (!seen_data_desc)
643 	{
644 	  fmt->error = "'*' requires at least one associated data descriptor";
645 	  goto finished;
646 	}
647       goto between_desc;
648 
649     case FMT_POSINT:
650       repeat = fmt->value;
651 
652       t = format_lex (fmt);
653       switch (t)
654 	{
655 	case FMT_LPAREN:
656 	  get_fnode (fmt, &head, &tail, FMT_LPAREN);
657 	  tail->repeat = repeat;
658 	  tail->u.child = parse_format_list (dtp, &seen_data_desc);
659 	  *seen_dd = seen_data_desc;
660 	  if (fmt->error != NULL)
661 	    goto finished;
662 
663 	  goto between_desc;
664 
665 	case FMT_SLASH:
666 	  get_fnode (fmt, &head, &tail, FMT_SLASH);
667 	  tail->repeat = repeat;
668 	  goto optional_comma;
669 
670 	case FMT_X:
671 	  get_fnode (fmt, &head, &tail, FMT_X);
672 	  tail->repeat = 1;
673 	  tail->u.k = fmt->value;
674 	  goto between_desc;
675 
676 	case FMT_P:
677 	  goto p_descriptor;
678 
679 	default:
680 	  goto data_desc;
681 	}
682 
683     case FMT_LPAREN:
684       get_fnode (fmt, &head, &tail, FMT_LPAREN);
685       tail->repeat = 1;
686       tail->u.child = parse_format_list (dtp, &seen_data_desc);
687       *seen_dd = seen_data_desc;
688       if (fmt->error != NULL)
689 	goto finished;
690 
691       goto between_desc;
692 
693     case FMT_SIGNED_INT:	/* Signed integer can only precede a P format.  */
694     case FMT_ZERO:		/* Same for zero.  */
695       t = format_lex (fmt);
696       if (t != FMT_P)
697 	{
698 	  fmt->error = "Expected P edit descriptor in format";
699 	  goto finished;
700 	}
701 
702     p_descriptor:
703       get_fnode (fmt, &head, &tail, FMT_P);
704       tail->u.k = fmt->value;
705       tail->repeat = 1;
706 
707       t = format_lex (fmt);
708       if (t == FMT_F || t == FMT_EN || t == FMT_ES || t == FMT_D
709 	  || t == FMT_G || t == FMT_E)
710 	{
711 	  repeat = 1;
712 	  goto data_desc;
713 	}
714 
715       if (t != FMT_COMMA && t != FMT_RPAREN && t != FMT_SLASH
716 	  && t != FMT_POSINT)
717 	{
718 	  fmt->error = "Comma required after P descriptor";
719 	  goto finished;
720 	}
721 
722       fmt->saved_token = t;
723       goto optional_comma;
724 
725     case FMT_P:		/* P and X require a prior number */
726       fmt->error = "P descriptor requires leading scale factor";
727       goto finished;
728 
729     case FMT_X:
730 /*
731    EXTENSION!
732 
733    If we would be pedantic in the library, we would have to reject
734    an X descriptor without an integer prefix:
735 
736       fmt->error = "X descriptor requires leading space count";
737       goto finished;
738 
739    However, this is an extension supported by many Fortran compilers,
740    including Cray, HP, AIX, and IRIX.  Therefore, we allow it in the
741    runtime library, and make the front end reject it if the compiler
742    is in pedantic mode.  The interpretation of 'X' is '1X'.
743 */
744       get_fnode (fmt, &head, &tail, FMT_X);
745       tail->repeat = 1;
746       tail->u.k = 1;
747       goto between_desc;
748 
749     case FMT_STRING:
750       get_fnode (fmt, &head, &tail, FMT_STRING);
751       tail->u.string.p = fmt->string;
752       tail->u.string.length = fmt->value;
753       tail->repeat = 1;
754       goto optional_comma;
755 
756     case FMT_RC:
757     case FMT_RD:
758     case FMT_RN:
759     case FMT_RP:
760     case FMT_RU:
761     case FMT_RZ:
762       notify_std (&dtp->common, GFC_STD_F2003, "Fortran 2003: Round "
763 		  "descriptor not allowed");
764       get_fnode (fmt, &head, &tail, t);
765       tail->repeat = 1;
766       goto between_desc;
767 
768     case FMT_DC:
769     case FMT_DP:
770       notify_std (&dtp->common, GFC_STD_F2003, "Fortran 2003: DC or DP "
771 		  "descriptor not allowed");
772     /* Fall through.  */
773     case FMT_S:
774     case FMT_SS:
775     case FMT_SP:
776     case FMT_BN:
777     case FMT_BZ:
778       get_fnode (fmt, &head, &tail, t);
779       tail->repeat = 1;
780       goto between_desc;
781 
782     case FMT_COLON:
783       get_fnode (fmt, &head, &tail, FMT_COLON);
784       tail->repeat = 1;
785       goto optional_comma;
786 
787     case FMT_SLASH:
788       get_fnode (fmt, &head, &tail, FMT_SLASH);
789       tail->repeat = 1;
790       tail->u.r = 1;
791       goto optional_comma;
792 
793     case FMT_DOLLAR:
794       get_fnode (fmt, &head, &tail, FMT_DOLLAR);
795       tail->repeat = 1;
796       notify_std (&dtp->common, GFC_STD_GNU, "Extension: $ descriptor");
797       goto between_desc;
798 
799     case FMT_T:
800     case FMT_TL:
801     case FMT_TR:
802       t2 = format_lex (fmt);
803       if (t2 != FMT_POSINT)
804 	{
805 	  fmt->error = posint_required;
806 	  goto finished;
807 	}
808       get_fnode (fmt, &head, &tail, t);
809       tail->u.n = fmt->value;
810       tail->repeat = 1;
811       goto between_desc;
812 
813     case FMT_I:
814     case FMT_B:
815     case FMT_O:
816     case FMT_Z:
817     case FMT_E:
818     case FMT_EN:
819     case FMT_ES:
820     case FMT_D:
821     case FMT_DT:
822     case FMT_L:
823     case FMT_A:
824     case FMT_F:
825     case FMT_G:
826       repeat = 1;
827       *seen_dd = true;
828       goto data_desc;
829 
830     case FMT_H:
831       get_fnode (fmt, &head, &tail, FMT_STRING);
832       if (fmt->format_string_len < 1)
833 	{
834 	  fmt->error = bad_hollerith;
835 	  goto finished;
836 	}
837 
838       tail->u.string.p = fmt->format_string;
839       tail->u.string.length = 1;
840       tail->repeat = 1;
841 
842       fmt->format_string++;
843       fmt->format_string_len--;
844 
845       goto between_desc;
846 
847     case FMT_END:
848       fmt->error = unexpected_end;
849       goto finished;
850 
851     case FMT_BADSTRING:
852       goto finished;
853 
854     case FMT_RPAREN:
855       goto finished;
856 
857     default:
858       fmt->error = unexpected_element;
859       goto finished;
860     }
861 
862   /* In this state, t must currently be a data descriptor.  Deal with
863      things that can/must follow the descriptor */
864  data_desc:
865 
866   switch (t)
867     {
868     case FMT_L:
869       *seen_dd = true;
870       t = format_lex (fmt);
871       if (t != FMT_POSINT)
872 	{
873 	  if (t == FMT_ZERO)
874 	    {
875 	      if (notification_std(GFC_STD_GNU) == NOTIFICATION_ERROR)
876 		{
877 		  fmt->error = "Extension: Zero width after L descriptor";
878 		  goto finished;
879 		}
880 	      else
881 		notify_std (&dtp->common, GFC_STD_GNU,
882 			    "Zero width after L descriptor");
883 	    }
884 	  else
885 	    {
886 	      fmt->saved_token = t;
887 	      notify_std (&dtp->common, GFC_STD_GNU,
888 			  "Positive width required with L descriptor");
889 	    }
890 	  fmt->value = 1;	/* Default width */
891 	}
892       get_fnode (fmt, &head, &tail, FMT_L);
893       tail->u.n = fmt->value;
894       tail->repeat = repeat;
895       break;
896 
897     case FMT_A:
898       *seen_dd = true;
899       t = format_lex (fmt);
900       if (t == FMT_ZERO)
901 	{
902 	  fmt->error = zero_width;
903 	  goto finished;
904 	}
905 
906       if (t != FMT_POSINT)
907 	{
908 	  fmt->saved_token = t;
909 	  fmt->value = -1;		/* Width not present */
910 	}
911 
912       get_fnode (fmt, &head, &tail, FMT_A);
913       tail->repeat = repeat;
914       tail->u.n = fmt->value;
915       break;
916 
917     case FMT_D:
918     case FMT_E:
919     case FMT_F:
920     case FMT_G:
921     case FMT_EN:
922     case FMT_ES:
923       *seen_dd = true;
924       get_fnode (fmt, &head, &tail, t);
925       tail->repeat = repeat;
926 
927       u = format_lex (fmt);
928 
929       /* Processing for zero width formats.  */
930       if (u == FMT_ZERO)
931 	{
932 	  if (notification_std (GFC_STD_F2008) == NOTIFICATION_ERROR
933 	      || dtp->u.p.mode == READING)
934 	    {
935 	      fmt->error = zero_width;
936 	      goto finished;
937 	    }
938 	  tail->u.real.w = 0;
939 
940 	  /* Look for the dot seperator.  */
941 	  u = format_lex (fmt);
942 	  if (u != FMT_PERIOD)
943 	    {
944 	      fmt->saved_token = u;
945 	      break;
946 	    }
947 
948 	  /* Look for the precision.  */
949 	  u = format_lex (fmt);
950 	  if (u != FMT_ZERO && u != FMT_POSINT)
951 	    {
952 	      fmt->error = nonneg_required;
953 	      goto finished;
954 	    }
955 	  tail->u.real.d = fmt->value;
956 
957 	  /* Look for optional exponent, not allowed for FMT_D */
958 	  if (t == FMT_D)
959 	    break;
960 	  u = format_lex (fmt);
961 	  if (u != FMT_E)
962 	    fmt->saved_token = u;
963 	  else
964 	    {
965 	      u = format_lex (fmt);
966 	      if (u != FMT_POSINT)
967 		{
968 		  if (u == FMT_ZERO)
969 		    {
970 		      notify_std (&dtp->common, GFC_STD_F2018,
971 				  "Positive exponent width required");
972 		    }
973 		  else
974 		    {
975 		      fmt->error = "Positive exponent width required in "
976 				   "format string at %L";
977 		      goto finished;
978 		    }
979 		}
980 	      tail->u.real.e = fmt->value;
981 	    }
982 	  break;
983 	}
984 
985       /* Processing for positive width formats.  */
986       if (u == FMT_POSINT)
987 	{
988 	  tail->u.real.w = fmt->value;
989 
990 	  /* Look for the dot separator. Because of legacy behaviors
991 	     we do some look ahead for missing things.  */
992 	  t2 = t;
993 	  t = format_lex (fmt);
994 	  if (t != FMT_PERIOD)
995 	    {
996 	      /* We treat a missing decimal descriptor as 0.  Note: This is only
997 		 allowed if -std=legacy, otherwise an error occurs.  */
998 	      if (compile_options.warn_std != 0)
999 		{
1000 		  fmt->error = period_required;
1001 		  goto finished;
1002 		}
1003 	      fmt->saved_token = t;
1004 	      tail->u.real.d = 0;
1005 	      tail->u.real.e = -1;
1006 	      break;
1007 	    }
1008 
1009 	  /* If we made it here, we should have the dot so look for the
1010 	     precision.  */
1011 	  t = format_lex (fmt);
1012 	  if (t != FMT_ZERO && t != FMT_POSINT)
1013 	    {
1014 	      fmt->error = nonneg_required;
1015 	      goto finished;
1016 	    }
1017 	  tail->u.real.d = fmt->value;
1018 	  tail->u.real.e = -1;
1019 
1020 	  /* Done with D and F formats.  */
1021 	  if (t2 == FMT_D || t2 == FMT_F)
1022 	    {
1023 	      *seen_dd = true;
1024 	      break;
1025 	    }
1026 
1027 	  /* Look for optional exponent */
1028 	  u = format_lex (fmt);
1029 	  if (u != FMT_E)
1030 	    fmt->saved_token = u;
1031 	  else
1032 	    {
1033 	      u = format_lex (fmt);
1034 	      if (u != FMT_POSINT)
1035 		{
1036 		  if (u == FMT_ZERO)
1037 		    {
1038 		      notify_std (&dtp->common, GFC_STD_F2018,
1039 				  "Positive exponent width required");
1040 		    }
1041 		  else
1042 		    {
1043 		      fmt->error = "Positive exponent width required in "
1044 				   "format string at %L";
1045 		      goto finished;
1046 		    }
1047 		}
1048 	      tail->u.real.e = fmt->value;
1049 	    }
1050 	  break;
1051 	}
1052 
1053       /* Old DEC codes may not have width or precision specified.  */
1054       if (dtp->u.p.mode == WRITING && (dtp->common.flags & IOPARM_DT_DEC_EXT))
1055 	{
1056 	  tail->u.real.w = DEFAULT_WIDTH;
1057 	  tail->u.real.d = 0;
1058 	  tail->u.real.e = -1;
1059 	  fmt->saved_token = u;
1060 	}
1061       break;
1062 
1063     case FMT_DT:
1064       *seen_dd = true;
1065       get_fnode (fmt, &head, &tail, t);
1066       tail->repeat = repeat;
1067 
1068       t = format_lex (fmt);
1069 
1070       /* Initialize the vlist to a zero size, rank-one array.  */
1071       tail->u.udf.vlist= xmalloc (sizeof(gfc_array_i4)
1072 				  + sizeof (descriptor_dimension));
1073       GFC_DESCRIPTOR_DATA(tail->u.udf.vlist) = NULL;
1074       GFC_DIMENSION_SET(tail->u.udf.vlist->dim[0],1, 0, 0);
1075 
1076       if (t == FMT_STRING)
1077         {
1078 	  /* Get pointer to the optional format string.  */
1079 	  tail->u.udf.string = fmt->string;
1080 	  tail->u.udf.string_len = fmt->value;
1081 	  t = format_lex (fmt);
1082 	}
1083       if (t == FMT_LPAREN)
1084         {
1085 	  /* Temporary buffer to hold the vlist values.  */
1086 	  GFC_INTEGER_4 temp[FARRAY_SIZE];
1087 	  int i = 0;
1088 	loop:
1089 	  t = format_lex (fmt);
1090 	  if (t != FMT_POSINT)
1091 	    {
1092 	      fmt->error = posint_required;
1093 	      goto finished;
1094 	    }
1095 	  /* Save the positive integer value.  */
1096 	  temp[i++] = fmt->value;
1097 	  t = format_lex (fmt);
1098 	  if (t == FMT_COMMA)
1099 	    goto loop;
1100 	  if (t == FMT_RPAREN)
1101 	    {
1102 	      /* We have parsed the complete vlist so initialize the
1103 	         array descriptor and save it in the format node.  */
1104 	      gfc_full_array_i4 *vp = tail->u.udf.vlist;
1105 	      GFC_DESCRIPTOR_DATA(vp) = xmalloc (i * sizeof(GFC_INTEGER_4));
1106 	      GFC_DIMENSION_SET(vp->dim[0],1, i, 1);
1107 	      memcpy (GFC_DESCRIPTOR_DATA(vp), temp, i * sizeof(GFC_INTEGER_4));
1108 	      break;
1109 	    }
1110 	  fmt->error = unexpected_element;
1111 	  goto finished;
1112 	}
1113       fmt->saved_token = t;
1114       break;
1115     case FMT_H:
1116       if (repeat > fmt->format_string_len)
1117 	{
1118 	  fmt->error = bad_hollerith;
1119 	  goto finished;
1120 	}
1121 
1122       get_fnode (fmt, &head, &tail, FMT_STRING);
1123       tail->u.string.p = fmt->format_string;
1124       tail->u.string.length = repeat;
1125       tail->repeat = 1;
1126 
1127       fmt->format_string += fmt->value;
1128       fmt->format_string_len -= repeat;
1129 
1130       break;
1131 
1132     case FMT_I:
1133     case FMT_B:
1134     case FMT_O:
1135     case FMT_Z:
1136       *seen_dd = true;
1137       get_fnode (fmt, &head, &tail, t);
1138       tail->repeat = repeat;
1139 
1140       t = format_lex (fmt);
1141 
1142       if (dtp->u.p.mode == READING)
1143 	{
1144 	  if (t != FMT_POSINT)
1145 	    {
1146 	      if (dtp->common.flags & IOPARM_DT_DEC_EXT)
1147 		{
1148 		  tail->u.integer.w = DEFAULT_WIDTH;
1149 		  tail->u.integer.m = -1;
1150 		  fmt->saved_token = t;
1151 		  break;
1152 		}
1153 	      fmt->error = posint_required;
1154 	      goto finished;
1155 	    }
1156 	}
1157       else
1158 	{
1159 	  if (t != FMT_ZERO && t != FMT_POSINT)
1160 	    {
1161 	      if (dtp->common.flags & IOPARM_DT_DEC_EXT)
1162 		{
1163 		  tail->u.integer.w = DEFAULT_WIDTH;
1164 		  tail->u.integer.m = -1;
1165 		  fmt->saved_token = t;
1166 		  break;
1167 		}
1168 	      fmt->error = nonneg_required;
1169 	      goto finished;
1170 	    }
1171 	}
1172 
1173       tail->u.integer.w = fmt->value;
1174       tail->u.integer.m = -1;
1175 
1176       t = format_lex (fmt);
1177       if (t != FMT_PERIOD)
1178 	{
1179 	  fmt->saved_token = t;
1180 	}
1181       else
1182 	{
1183 	  t = format_lex (fmt);
1184 	  if (t != FMT_ZERO && t != FMT_POSINT)
1185 	    {
1186 	      fmt->error = nonneg_required;
1187 	      goto finished;
1188 	    }
1189 
1190 	  tail->u.integer.m = fmt->value;
1191 	}
1192 
1193       if (tail->u.integer.w != 0 && tail->u.integer.m > tail->u.integer.w)
1194 	{
1195 	  fmt->error = "Minimum digits exceeds field width";
1196 	  goto finished;
1197 	}
1198 
1199       break;
1200 
1201     default:
1202       fmt->error = unexpected_element;
1203       goto finished;
1204     }
1205 
1206   /* Between a descriptor and what comes next */
1207  between_desc:
1208   t = format_lex (fmt);
1209   switch (t)
1210     {
1211     case FMT_COMMA:
1212       goto format_item;
1213 
1214     case FMT_RPAREN:
1215       goto finished;
1216 
1217     case FMT_SLASH:
1218     case FMT_COLON:
1219       get_fnode (fmt, &head, &tail, t);
1220       tail->repeat = 1;
1221       goto optional_comma;
1222 
1223     case FMT_END:
1224       fmt->error = unexpected_end;
1225       goto finished;
1226 
1227     default:
1228       /* Assume a missing comma, this is a GNU extension */
1229       goto format_item_1;
1230     }
1231 
1232   /* Optional comma is a weird between state where we've just finished
1233      reading a colon, slash or P descriptor. */
1234  optional_comma:
1235   t = format_lex (fmt);
1236   switch (t)
1237     {
1238     case FMT_COMMA:
1239       break;
1240 
1241     case FMT_RPAREN:
1242       goto finished;
1243 
1244     default:			/* Assume that we have another format item */
1245       fmt->saved_token = t;
1246       break;
1247     }
1248 
1249   goto format_item;
1250 
1251  finished:
1252 
1253   return head;
1254 }
1255 
1256 
1257 /* format_error()-- Generate an error message for a format statement.
1258    If the node that gives the location of the error is NULL, the error
1259    is assumed to happen at parse time, and the current location of the
1260    parser is shown.
1261 
1262    We generate a message showing where the problem is.  We take extra
1263    care to print only the relevant part of the format if it is longer
1264    than a standard 80 column display. */
1265 
1266 void
format_error(st_parameter_dt * dtp,const fnode * f,const char * message)1267 format_error (st_parameter_dt *dtp, const fnode *f, const char *message)
1268 {
1269   int width, i, offset;
1270 #define BUFLEN 300
1271   char *p, buffer[BUFLEN];
1272   format_data *fmt = dtp->u.p.fmt;
1273 
1274   if (f != NULL)
1275     p = f->source;
1276   else                /* This should not happen.  */
1277     p = dtp->format;
1278 
1279   if (message == unexpected_element)
1280     snprintf (buffer, BUFLEN, message, fmt->error_element);
1281   else
1282     snprintf (buffer, BUFLEN, "%s\n", message);
1283 
1284   /* Get the offset into the format string where the error occurred.  */
1285   offset = dtp->format_len - (fmt->reversion_ok ?
1286 			      (int) strlen(p) : fmt->format_string_len);
1287 
1288   width = dtp->format_len;
1289 
1290   if (width > 80)
1291     width = 80;
1292 
1293   /* Show the format */
1294 
1295   p = strchr (buffer, '\0');
1296 
1297   if (dtp->format)
1298     memcpy (p, dtp->format, width);
1299 
1300   p += width;
1301   *p++ = '\n';
1302 
1303   /* Show where the problem is */
1304 
1305   for (i = 1; i < offset; i++)
1306     *p++ = ' ';
1307 
1308   *p++ = '^';
1309   *p = '\0';
1310 
1311   generate_error (&dtp->common, LIBERROR_FORMAT, buffer);
1312 }
1313 
1314 
1315 /* revert()-- Do reversion of the format.  Control reverts to the left
1316    parenthesis that matches the rightmost right parenthesis.  From our
1317    tree structure, we are looking for the rightmost parenthesis node
1318    at the second level, the first level always being a single
1319    parenthesis node.  If this node doesn't exit, we use the top
1320    level. */
1321 
1322 static void
revert(st_parameter_dt * dtp)1323 revert (st_parameter_dt *dtp)
1324 {
1325   fnode *f, *r;
1326   format_data *fmt = dtp->u.p.fmt;
1327 
1328   dtp->u.p.reversion_flag = 1;
1329 
1330   r = NULL;
1331 
1332   for (f = fmt->array.array[0].u.child; f; f = f->next)
1333     if (f->format == FMT_LPAREN)
1334       r = f;
1335 
1336   /* If r is NULL because no node was found, the whole tree will be used */
1337 
1338   fmt->array.array[0].current = r;
1339   fmt->array.array[0].count = 0;
1340 }
1341 
1342 /* parse_format()-- Parse a format string.  */
1343 
1344 void
parse_format(st_parameter_dt * dtp)1345 parse_format (st_parameter_dt *dtp)
1346 {
1347   format_data *fmt;
1348   bool format_cache_ok, seen_data_desc = false;
1349 
1350   /* Don't cache for internal units and set an arbitrary limit on the
1351      size of format strings we will cache.  (Avoids memory issues.)
1352      Also, the format_hash_table resides in the current_unit, so
1353      child_dtio procedures would overwrite the parent table  */
1354   format_cache_ok = !is_internal_unit (dtp)
1355 		    && (dtp->u.p.current_unit->child_dtio == 0);
1356 
1357   /* Lookup format string to see if it has already been parsed.  */
1358   if (format_cache_ok)
1359     {
1360       dtp->u.p.fmt = find_parsed_format (dtp);
1361 
1362       if (dtp->u.p.fmt != NULL)
1363 	{
1364 	  dtp->u.p.fmt->reversion_ok = 0;
1365 	  dtp->u.p.fmt->saved_token = FMT_NONE;
1366 	  dtp->u.p.fmt->saved_format = NULL;
1367 	  reset_fnode_counters (dtp);
1368 	  return;
1369 	}
1370     }
1371 
1372   /* Not found so proceed as follows.  */
1373 
1374   char *fmt_string = fc_strdup_notrim (dtp->format, dtp->format_len);
1375   dtp->format = fmt_string;
1376 
1377   dtp->u.p.fmt = fmt = xmalloc (sizeof (format_data));
1378   fmt->format_string = dtp->format;
1379   fmt->format_string_len = dtp->format_len;
1380 
1381   fmt->string = NULL;
1382   fmt->saved_token = FMT_NONE;
1383   fmt->error = NULL;
1384   fmt->value = 0;
1385 
1386   /* Initialize variables used during traversal of the tree.  */
1387 
1388   fmt->reversion_ok = 0;
1389   fmt->saved_format = NULL;
1390 
1391   /* Initialize the fnode_array.  */
1392 
1393   memset (&(fmt->array), 0, sizeof(fmt->array));
1394 
1395   /* Allocate the first format node as the root of the tree.  */
1396 
1397   fmt->last = &fmt->array;
1398   fmt->last->next = NULL;
1399   fmt->avail = &fmt->array.array[0];
1400 
1401   memset (fmt->avail, 0, sizeof (*fmt->avail));
1402   fmt->avail->format = FMT_LPAREN;
1403   fmt->avail->repeat = 1;
1404   fmt->avail++;
1405 
1406   if (format_lex (fmt) == FMT_LPAREN)
1407     fmt->array.array[0].u.child = parse_format_list (dtp, &seen_data_desc);
1408   else
1409     fmt->error = "Missing initial left parenthesis in format";
1410 
1411   if (format_cache_ok)
1412     save_parsed_format (dtp);
1413   else
1414     dtp->u.p.format_not_saved = 1;
1415 
1416   if (fmt->error)
1417     format_error (dtp, NULL, fmt->error);
1418 }
1419 
1420 
1421 /* next_format0()-- Get the next format node without worrying about
1422    reversion.  Returns NULL when we hit the end of the list.
1423    Parenthesis nodes are incremented after the list has been
1424    exhausted, other nodes are incremented before they are returned. */
1425 
1426 static const fnode *
next_format0(fnode * f)1427 next_format0 (fnode *f)
1428 {
1429   const fnode *r;
1430 
1431   if (f == NULL)
1432     return NULL;
1433 
1434   if (f->format != FMT_LPAREN)
1435     {
1436       f->count++;
1437       if (f->count <= f->repeat)
1438 	return f;
1439 
1440       f->count = 0;
1441       return NULL;
1442     }
1443 
1444   /* Deal with a parenthesis node with unlimited format.  */
1445 
1446   if (f->repeat == -2)  /* -2 signifies unlimited.  */
1447   for (;;)
1448     {
1449       if (f->current == NULL)
1450 	f->current = f->u.child;
1451 
1452       for (; f->current != NULL; f->current = f->current->next)
1453 	{
1454 	  r = next_format0 (f->current);
1455 	  if (r != NULL)
1456 	    return r;
1457 	}
1458     }
1459 
1460   /* Deal with a parenthesis node with specific repeat count.  */
1461   for (; f->count < f->repeat; f->count++)
1462     {
1463       if (f->current == NULL)
1464 	f->current = f->u.child;
1465 
1466       for (; f->current != NULL; f->current = f->current->next)
1467 	{
1468 	  r = next_format0 (f->current);
1469 	  if (r != NULL)
1470 	    return r;
1471 	}
1472     }
1473 
1474   f->count = 0;
1475   return NULL;
1476 }
1477 
1478 
1479 /* next_format()-- Return the next format node.  If the format list
1480    ends up being exhausted, we do reversion.  Reversion is only
1481    allowed if we've seen a data descriptor since the
1482    initialization or the last reversion.  We return NULL if there
1483    are no more data descriptors to return (which is an error
1484    condition).  */
1485 
1486 const fnode *
next_format(st_parameter_dt * dtp)1487 next_format (st_parameter_dt *dtp)
1488 {
1489   format_token t;
1490   const fnode *f;
1491   format_data *fmt = dtp->u.p.fmt;
1492 
1493   if (fmt->saved_format != NULL)
1494     {				/* Deal with a pushed-back format node */
1495       f = fmt->saved_format;
1496       fmt->saved_format = NULL;
1497       goto done;
1498     }
1499 
1500   f = next_format0 (&fmt->array.array[0]);
1501   if (f == NULL)
1502     {
1503       if (!fmt->reversion_ok)
1504 	return NULL;
1505 
1506       fmt->reversion_ok = 0;
1507       revert (dtp);
1508 
1509       f = next_format0 (&fmt->array.array[0]);
1510       if (f == NULL)
1511 	{
1512 	  format_error (dtp, NULL, reversion_error);
1513 	  return NULL;
1514 	}
1515 
1516       /* Push the first reverted token and return a colon node in case
1517 	 there are no more data items.  */
1518 
1519       fmt->saved_format = f;
1520       return &colon_node;
1521     }
1522 
1523   /* If this is a data edit descriptor, then reversion has become OK. */
1524  done:
1525   t = f->format;
1526 
1527   if (!fmt->reversion_ok &&
1528       (t == FMT_I || t == FMT_B || t == FMT_O || t == FMT_Z || t == FMT_F ||
1529        t == FMT_E || t == FMT_EN || t == FMT_ES || t == FMT_G || t == FMT_L ||
1530        t == FMT_A || t == FMT_D || t == FMT_DT))
1531     fmt->reversion_ok = 1;
1532   return f;
1533 }
1534 
1535 
1536 /* unget_format()-- Push the given format back so that it will be
1537    returned on the next call to next_format() without affecting
1538    counts.  This is necessary when we've encountered a data
1539    descriptor, but don't know what the data item is yet.  The format
1540    node is pushed back, and we return control to the main program,
1541    which calls the library back with the data item (or not). */
1542 
1543 void
unget_format(st_parameter_dt * dtp,const fnode * f)1544 unget_format (st_parameter_dt *dtp, const fnode *f)
1545 {
1546   dtp->u.p.fmt->saved_format = f;
1547 }
1548 
1549