xref: /netbsd-src/external/gpl3/gcc/dist/libgfortran/io/format.c (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1 /* Copyright (C) 2002-2022 Free Software Foundation, Inc.
2    Contributed by Andy Vaught
3    F2003 I/O support contributed by Jerry DeLisle
4 
5 This file is part of the GNU Fortran runtime library (libgfortran).
6 
7 Libgfortran is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 Libgfortran is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20 
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24 <http://www.gnu.org/licenses/>.  */
25 
26 
27 /* format.c-- parse a FORMAT string into a binary format suitable for
28    interpretation during I/O statements.  */
29 
30 #include "io.h"
31 #include "format.h"
32 #include <string.h>
33 
34 
35 static const fnode colon_node = { FMT_COLON, 0, NULL, NULL, {{ 0, 0, 0 }}, 0,
36 				  NULL };
37 
38 /* Error messages. */
39 
40 static const char posint_required[] = "Positive integer required in format",
41   period_required[] = "Period required in format",
42   nonneg_required[] = "Nonnegative width required in format",
43   unexpected_element[] = "Unexpected element '%c' in format\n",
44   unexpected_end[] = "Unexpected end of format string",
45   bad_string[] = "Unterminated character constant in format",
46   bad_hollerith[] = "Hollerith constant extends past the end of the format",
47   reversion_error[] = "Exhausted data descriptors in format",
48   zero_width[] = "Zero width in format descriptor";
49 
50 /* The following routines support caching format data from parsed format strings
51    into a hash table.  This avoids repeatedly parsing duplicate format strings
52    or format strings in I/O statements that are repeated in loops.  */
53 
54 
55 /* Traverse the table and free all data.  */
56 
57 void
free_format_hash_table(gfc_unit * u)58 free_format_hash_table (gfc_unit *u)
59 {
60   size_t i;
61 
62   /* free_format_data handles any NULL pointers.  */
63   for (i = 0; i < FORMAT_HASH_SIZE; i++)
64     {
65       if (u->format_hash_table[i].hashed_fmt != NULL)
66 	{
67 	  free_format_data (u->format_hash_table[i].hashed_fmt);
68 	  free (u->format_hash_table[i].key);
69 	}
70       u->format_hash_table[i].key = NULL;
71       u->format_hash_table[i].key_len = 0;
72       u->format_hash_table[i].hashed_fmt = NULL;
73     }
74 }
75 
76 /* Traverse the format_data structure and reset the fnode counters.  */
77 
78 static void
reset_node(fnode * fn)79 reset_node (fnode *fn)
80 {
81   fnode *f;
82 
83   fn->count = 0;
84   fn->current = NULL;
85 
86   if (fn->format != FMT_LPAREN)
87     return;
88 
89   for (f = fn->u.child; f; f = f->next)
90     {
91       if (f->format == FMT_RPAREN)
92 	break;
93       reset_node (f);
94     }
95 }
96 
97 static void
reset_fnode_counters(st_parameter_dt * dtp)98 reset_fnode_counters (st_parameter_dt *dtp)
99 {
100   fnode *f;
101   format_data *fmt;
102 
103   fmt = dtp->u.p.fmt;
104 
105   /* Clear this pointer at the head so things start at the right place.  */
106   fmt->array.array[0].current = NULL;
107 
108   for (f = fmt->array.array[0].u.child; f; f = f->next)
109     reset_node (f);
110 }
111 
112 
113 /* A simple hashing function to generate an index into the hash table.  */
114 
115 static uint32_t
format_hash(st_parameter_dt * dtp)116 format_hash (st_parameter_dt *dtp)
117 {
118   char *key;
119   gfc_charlen_type key_len;
120   uint32_t hash = 0;
121   gfc_charlen_type i;
122 
123   /* Hash the format string. Super simple, but what the heck!  */
124   key = dtp->format;
125   key_len = dtp->format_len;
126   for (i = 0; i < key_len; i++)
127     hash ^= key[i];
128   hash &= (FORMAT_HASH_SIZE - 1);
129   return hash;
130 }
131 
132 
133 static void
save_parsed_format(st_parameter_dt * dtp)134 save_parsed_format (st_parameter_dt *dtp)
135 {
136   uint32_t hash;
137   gfc_unit *u;
138 
139   hash = format_hash (dtp);
140   u = dtp->u.p.current_unit;
141 
142   /* Index into the hash table.  We are simply replacing whatever is there
143      relying on probability.  */
144   if (u->format_hash_table[hash].hashed_fmt != NULL)
145     free_format_data (u->format_hash_table[hash].hashed_fmt);
146   u->format_hash_table[hash].hashed_fmt = NULL;
147 
148   free (u->format_hash_table[hash].key);
149   u->format_hash_table[hash].key = dtp->format;
150 
151   u->format_hash_table[hash].key_len = dtp->format_len;
152   u->format_hash_table[hash].hashed_fmt = dtp->u.p.fmt;
153 }
154 
155 
156 static format_data *
find_parsed_format(st_parameter_dt * dtp)157 find_parsed_format (st_parameter_dt *dtp)
158 {
159   uint32_t hash;
160   gfc_unit *u;
161 
162   hash = format_hash (dtp);
163   u = dtp->u.p.current_unit;
164 
165   if (u->format_hash_table[hash].key != NULL)
166     {
167       /* See if it matches.  */
168       if (u->format_hash_table[hash].key_len == dtp->format_len)
169 	{
170 	  /* So far so good.  */
171 	  if (strncmp (u->format_hash_table[hash].key,
172 	      dtp->format, dtp->format_len) == 0)
173 	    return u->format_hash_table[hash].hashed_fmt;
174 	}
175     }
176   return NULL;
177 }
178 
179 
180 /* next_char()-- Return the next character in the format string.
181    Returns -1 when the string is done.  If the literal flag is set,
182    spaces are significant, otherwise they are not. */
183 
184 static int
next_char(format_data * fmt,int literal)185 next_char (format_data *fmt, int literal)
186 {
187   int c;
188 
189   do
190     {
191       if (fmt->format_string_len == 0)
192 	return -1;
193 
194       fmt->format_string_len--;
195       c = safe_toupper (*fmt->format_string++);
196       fmt->error_element = c;
197     }
198   while ((c == ' ' || c == '\t') && !literal);
199 
200   return c;
201 }
202 
203 
204 /* unget_char()-- Back up one character position. */
205 
206 #define unget_char(fmt) \
207   { fmt->format_string--; fmt->format_string_len++; }
208 
209 
210 /* get_fnode()-- Allocate a new format node, inserting it into the
211    current singly linked list.  These are initially allocated from the
212    static buffer. */
213 
214 static fnode *
get_fnode(format_data * fmt,fnode ** head,fnode ** tail,format_token t)215 get_fnode (format_data *fmt, fnode **head, fnode **tail, format_token t)
216 {
217   fnode *f;
218 
219   if (fmt->avail == &fmt->last->array[FARRAY_SIZE])
220     {
221       fmt->last->next = xmalloc (sizeof (fnode_array));
222       fmt->last = fmt->last->next;
223       fmt->last->next = NULL;
224       fmt->avail = &fmt->last->array[0];
225     }
226   f = fmt->avail++;
227   memset (f, '\0', sizeof (fnode));
228 
229   if (*head == NULL)
230     *head = *tail = f;
231   else
232     {
233       (*tail)->next = f;
234       *tail = f;
235     }
236 
237   f->format = t;
238   f->repeat = -1;
239   f->source = fmt->format_string;
240   return f;
241 }
242 
243 
244 /* free_format()-- Free allocated format string.  */
245 void
free_format(st_parameter_dt * dtp)246 free_format (st_parameter_dt *dtp)
247 {
248   if ((dtp->common.flags & IOPARM_DT_HAS_FORMAT) && dtp->format)
249     {
250       free (dtp->format);
251       dtp->format = NULL;
252     }
253 }
254 
255 
256 /* free_format_data()-- Free all allocated format data.  */
257 
258 void
free_format_data(format_data * fmt)259 free_format_data (format_data *fmt)
260 {
261   fnode_array *fa, *fa_next;
262   fnode *fnp;
263 
264   if (fmt == NULL)
265     return;
266 
267   /* Free vlist descriptors in the fnode_array if one was allocated.  */
268   for (fnp = fmt->array.array; fnp < &fmt->array.array[FARRAY_SIZE] &&
269        fnp->format != FMT_NONE; fnp++)
270     if (fnp->format == FMT_DT)
271 	{
272 	  if (GFC_DESCRIPTOR_DATA(fnp->u.udf.vlist))
273 	    free (GFC_DESCRIPTOR_DATA(fnp->u.udf.vlist));
274 	  free (fnp->u.udf.vlist);
275 	}
276 
277   for (fa = fmt->array.next; fa; fa = fa_next)
278     {
279       fa_next = fa->next;
280       free (fa);
281     }
282 
283   free (fmt);
284   fmt = NULL;
285 }
286 
287 
288 /* format_lex()-- Simple lexical analyzer for getting the next token
289    in a FORMAT string.  We support a one-level token pushback in the
290    fmt->saved_token variable. */
291 
292 static format_token
format_lex(format_data * fmt)293 format_lex (format_data *fmt)
294 {
295   format_token token;
296   int negative_flag;
297   int c;
298   char delim;
299 
300   if (fmt->saved_token != FMT_NONE)
301     {
302       token = fmt->saved_token;
303       fmt->saved_token = FMT_NONE;
304       return token;
305     }
306 
307   negative_flag = 0;
308   c = next_char (fmt, 0);
309 
310   switch (c)
311     {
312     case '*':
313        token = FMT_STAR;
314        break;
315 
316     case '(':
317       token = FMT_LPAREN;
318       break;
319 
320     case ')':
321       token = FMT_RPAREN;
322       break;
323 
324     case '-':
325       negative_flag = 1;
326       /* Fall Through */
327 
328     case '+':
329       c = next_char (fmt, 0);
330       if (!safe_isdigit (c))
331 	{
332 	  token = FMT_UNKNOWN;
333 	  break;
334 	}
335 
336       fmt->value = c - '0';
337 
338       for (;;)
339 	{
340 	  c = next_char (fmt, 0);
341 	  if (!safe_isdigit (c))
342 	    break;
343 
344 	  fmt->value = 10 * fmt->value + c - '0';
345 	}
346 
347       unget_char (fmt);
348 
349       if (negative_flag)
350 	fmt->value = -fmt->value;
351       token = FMT_SIGNED_INT;
352       break;
353 
354     case '0':
355     case '1':
356     case '2':
357     case '3':
358     case '4':
359     case '5':
360     case '6':
361     case '7':
362     case '8':
363     case '9':
364       fmt->value = c - '0';
365 
366       for (;;)
367 	{
368 	  c = next_char (fmt, 0);
369 	  if (!safe_isdigit (c))
370 	    break;
371 
372 	  fmt->value = 10 * fmt->value + c - '0';
373 	}
374 
375       unget_char (fmt);
376       token = (fmt->value == 0) ? FMT_ZERO : FMT_POSINT;
377       break;
378 
379     case '.':
380       token = FMT_PERIOD;
381       break;
382 
383     case ',':
384       token = FMT_COMMA;
385       break;
386 
387     case ':':
388       token = FMT_COLON;
389       break;
390 
391     case '/':
392       token = FMT_SLASH;
393       break;
394 
395     case '$':
396       token = FMT_DOLLAR;
397       break;
398 
399     case 'T':
400       switch (next_char (fmt, 0))
401 	{
402 	case 'L':
403 	  token = FMT_TL;
404 	  break;
405 	case 'R':
406 	  token = FMT_TR;
407 	  break;
408 	default:
409 	  token = FMT_T;
410 	  unget_char (fmt);
411 	  break;
412 	}
413 
414       break;
415 
416     case 'X':
417       token = FMT_X;
418       break;
419 
420     case 'S':
421       switch (next_char (fmt, 0))
422 	{
423 	case 'S':
424 	  token = FMT_SS;
425 	  break;
426 	case 'P':
427 	  token = FMT_SP;
428 	  break;
429 	default:
430 	  token = FMT_S;
431 	  unget_char (fmt);
432 	  break;
433 	}
434 
435       break;
436 
437     case 'B':
438       switch (next_char (fmt, 0))
439 	{
440 	case 'N':
441 	  token = FMT_BN;
442 	  break;
443 	case 'Z':
444 	  token = FMT_BZ;
445 	  break;
446 	default:
447 	  token = FMT_B;
448 	  unget_char (fmt);
449 	  break;
450 	}
451 
452       break;
453 
454     case '\'':
455     case '"':
456       delim = c;
457 
458       fmt->string = fmt->format_string;
459       fmt->value = 0;		/* This is the length of the string */
460 
461       for (;;)
462 	{
463 	  c = next_char (fmt, 1);
464 	  if (c == -1)
465 	    {
466 	      token = FMT_BADSTRING;
467 	      fmt->error = bad_string;
468 	      break;
469 	    }
470 
471 	  if (c == delim)
472 	    {
473 	      c = next_char (fmt, 1);
474 
475 	      if (c == -1)
476 		{
477 		  token = FMT_BADSTRING;
478 		  fmt->error = bad_string;
479 		  break;
480 		}
481 
482 	      if (c != delim)
483 		{
484 		  unget_char (fmt);
485 		  token = FMT_STRING;
486 		  break;
487 		}
488 	    }
489 
490 	  fmt->value++;
491 	}
492 
493       break;
494 
495     case 'P':
496       token = FMT_P;
497       break;
498 
499     case 'I':
500       token = FMT_I;
501       break;
502 
503     case 'O':
504       token = FMT_O;
505       break;
506 
507     case 'Z':
508       token = FMT_Z;
509       break;
510 
511     case 'F':
512       token = FMT_F;
513       break;
514 
515     case 'E':
516       switch (next_char (fmt, 0))
517 	{
518 	case 'N':
519 	  token = FMT_EN;
520 	  break;
521 	case 'S':
522 	  token = FMT_ES;
523 	  break;
524 	default:
525 	  token = FMT_E;
526 	  unget_char (fmt);
527 	  break;
528 	}
529       break;
530 
531     case 'G':
532       token = FMT_G;
533       break;
534 
535     case 'H':
536       token = FMT_H;
537       break;
538 
539     case 'L':
540       token = FMT_L;
541       break;
542 
543     case 'A':
544       token = FMT_A;
545       break;
546 
547     case 'D':
548       switch (next_char (fmt, 0))
549 	{
550 	case 'P':
551 	  token = FMT_DP;
552 	  break;
553 	case 'C':
554 	  token = FMT_DC;
555 	  break;
556 	case 'T':
557 	  token = FMT_DT;
558 	  break;
559 	default:
560 	  token = FMT_D;
561 	  unget_char (fmt);
562 	  break;
563 	}
564       break;
565 
566     case 'R':
567       switch (next_char (fmt, 0))
568 	{
569 	case 'C':
570 	  token = FMT_RC;
571 	  break;
572 	case 'D':
573 	  token = FMT_RD;
574 	  break;
575 	case 'N':
576 	  token = FMT_RN;
577 	  break;
578 	case 'P':
579 	  token = FMT_RP;
580 	  break;
581 	case 'U':
582 	  token = FMT_RU;
583 	  break;
584 	case 'Z':
585 	  token = FMT_RZ;
586 	  break;
587 	default:
588 	  unget_char (fmt);
589 	  token = FMT_UNKNOWN;
590 	  break;
591 	}
592       break;
593 
594     case -1:
595       token = FMT_END;
596       break;
597 
598     default:
599       token = FMT_UNKNOWN;
600       break;
601     }
602 
603   return token;
604 }
605 
606 
607 /* parse_format_list()-- Parse a format list.  Assumes that a left
608    paren has already been seen.  Returns a list representing the
609    parenthesis node which contains the rest of the list. */
610 
611 static fnode *
parse_format_list(st_parameter_dt * dtp,bool * seen_dd)612 parse_format_list (st_parameter_dt *dtp, bool *seen_dd)
613 {
614   fnode *head, *tail;
615   format_token t, u, t2;
616   int repeat;
617   format_data *fmt = dtp->u.p.fmt;
618   bool seen_data_desc = false;
619   int standard;
620 
621   head = tail = NULL;
622 
623   /* Get the next format item */
624  format_item:
625   t = format_lex (fmt);
626  format_item_1:
627   switch (t)
628     {
629     case FMT_STAR:
630       t = format_lex (fmt);
631       if (t != FMT_LPAREN)
632 	{
633 	  fmt->error = "Left parenthesis required after '*'";
634 	  goto finished;
635 	}
636       get_fnode (fmt, &head, &tail, FMT_LPAREN);
637       tail->repeat = -2;  /* Signifies unlimited format.  */
638       tail->u.child = parse_format_list (dtp, &seen_data_desc);
639       *seen_dd = seen_data_desc;
640       if (fmt->error != NULL)
641 	goto finished;
642       if (!seen_data_desc)
643 	{
644 	  fmt->error = "'*' requires at least one associated data descriptor";
645 	  goto finished;
646 	}
647       goto between_desc;
648 
649     case FMT_POSINT:
650       repeat = fmt->value;
651 
652       t = format_lex (fmt);
653       switch (t)
654 	{
655 	case FMT_LPAREN:
656 	  get_fnode (fmt, &head, &tail, FMT_LPAREN);
657 	  tail->repeat = repeat;
658 	  tail->u.child = parse_format_list (dtp, &seen_data_desc);
659 	  *seen_dd = seen_data_desc;
660 	  if (fmt->error != NULL)
661 	    goto finished;
662 
663 	  goto between_desc;
664 
665 	case FMT_SLASH:
666 	  get_fnode (fmt, &head, &tail, FMT_SLASH);
667 	  tail->repeat = repeat;
668 	  goto optional_comma;
669 
670 	case FMT_X:
671 	  get_fnode (fmt, &head, &tail, FMT_X);
672 	  tail->repeat = 1;
673 	  tail->u.k = fmt->value;
674 	  goto between_desc;
675 
676 	case FMT_P:
677 	  goto p_descriptor;
678 
679 	default:
680 	  goto data_desc;
681 	}
682 
683     case FMT_LPAREN:
684       get_fnode (fmt, &head, &tail, FMT_LPAREN);
685       tail->repeat = 1;
686       tail->u.child = parse_format_list (dtp, &seen_data_desc);
687       *seen_dd = seen_data_desc;
688       if (fmt->error != NULL)
689 	goto finished;
690 
691       goto between_desc;
692 
693     case FMT_SIGNED_INT:	/* Signed integer can only precede a P format.  */
694     case FMT_ZERO:		/* Same for zero.  */
695       t = format_lex (fmt);
696       if (t != FMT_P)
697 	{
698 	  fmt->error = "Expected P edit descriptor in format";
699 	  goto finished;
700 	}
701 
702     p_descriptor:
703       get_fnode (fmt, &head, &tail, FMT_P);
704       tail->u.k = fmt->value;
705       tail->repeat = 1;
706 
707       t = format_lex (fmt);
708       if (t == FMT_F || t == FMT_EN || t == FMT_ES || t == FMT_D
709 	  || t == FMT_G || t == FMT_E)
710 	{
711 	  repeat = 1;
712 	  goto data_desc;
713 	}
714 
715       if (t != FMT_COMMA && t != FMT_RPAREN && t != FMT_SLASH
716 	  && t != FMT_POSINT)
717 	{
718 	  fmt->error = "Comma required after P descriptor";
719 	  goto finished;
720 	}
721 
722       fmt->saved_token = t;
723       goto optional_comma;
724 
725     case FMT_P:		/* P and X require a prior number */
726       fmt->error = "P descriptor requires leading scale factor";
727       goto finished;
728 
729     case FMT_X:
730 /*
731    EXTENSION!
732 
733    If we would be pedantic in the library, we would have to reject
734    an X descriptor without an integer prefix:
735 
736       fmt->error = "X descriptor requires leading space count";
737       goto finished;
738 
739    However, this is an extension supported by many Fortran compilers,
740    including Cray, HP, AIX, and IRIX.  Therefore, we allow it in the
741    runtime library, and make the front end reject it if the compiler
742    is in pedantic mode.  The interpretation of 'X' is '1X'.
743 */
744       get_fnode (fmt, &head, &tail, FMT_X);
745       tail->repeat = 1;
746       tail->u.k = 1;
747       goto between_desc;
748 
749     case FMT_STRING:
750       get_fnode (fmt, &head, &tail, FMT_STRING);
751       tail->u.string.p = fmt->string;
752       tail->u.string.length = fmt->value;
753       tail->repeat = 1;
754       goto optional_comma;
755 
756     case FMT_RC:
757     case FMT_RD:
758     case FMT_RN:
759     case FMT_RP:
760     case FMT_RU:
761     case FMT_RZ:
762       notify_std (&dtp->common, GFC_STD_F2003, "Fortran 2003: Round "
763 		  "descriptor not allowed");
764       get_fnode (fmt, &head, &tail, t);
765       tail->repeat = 1;
766       goto between_desc;
767 
768     case FMT_DC:
769     case FMT_DP:
770       notify_std (&dtp->common, GFC_STD_F2003, "Fortran 2003: DC or DP "
771 		  "descriptor not allowed");
772     /* Fall through.  */
773     case FMT_S:
774     case FMT_SS:
775     case FMT_SP:
776     case FMT_BN:
777     case FMT_BZ:
778       get_fnode (fmt, &head, &tail, t);
779       tail->repeat = 1;
780       goto between_desc;
781 
782     case FMT_COLON:
783       get_fnode (fmt, &head, &tail, FMT_COLON);
784       tail->repeat = 1;
785       goto optional_comma;
786 
787     case FMT_SLASH:
788       get_fnode (fmt, &head, &tail, FMT_SLASH);
789       tail->repeat = 1;
790       tail->u.r = 1;
791       goto optional_comma;
792 
793     case FMT_DOLLAR:
794       get_fnode (fmt, &head, &tail, FMT_DOLLAR);
795       tail->repeat = 1;
796       notify_std (&dtp->common, GFC_STD_GNU, "Extension: $ descriptor");
797       goto between_desc;
798 
799     case FMT_T:
800     case FMT_TL:
801     case FMT_TR:
802       t2 = format_lex (fmt);
803       if (t2 != FMT_POSINT)
804 	{
805 	  fmt->error = posint_required;
806 	  goto finished;
807 	}
808       get_fnode (fmt, &head, &tail, t);
809       tail->u.n = fmt->value;
810       tail->repeat = 1;
811       goto between_desc;
812 
813     case FMT_I:
814     case FMT_B:
815     case FMT_O:
816     case FMT_Z:
817     case FMT_E:
818     case FMT_EN:
819     case FMT_ES:
820     case FMT_D:
821     case FMT_DT:
822     case FMT_L:
823     case FMT_A:
824     case FMT_F:
825     case FMT_G:
826       repeat = 1;
827       *seen_dd = true;
828       goto data_desc;
829 
830     case FMT_H:
831       get_fnode (fmt, &head, &tail, FMT_STRING);
832       if (fmt->format_string_len < 1)
833 	{
834 	  fmt->error = bad_hollerith;
835 	  goto finished;
836 	}
837 
838       tail->u.string.p = fmt->format_string;
839       tail->u.string.length = 1;
840       tail->repeat = 1;
841 
842       fmt->format_string++;
843       fmt->format_string_len--;
844 
845       goto between_desc;
846 
847     case FMT_END:
848       fmt->error = unexpected_end;
849       goto finished;
850 
851     case FMT_BADSTRING:
852       goto finished;
853 
854     case FMT_RPAREN:
855       goto finished;
856 
857     default:
858       fmt->error = unexpected_element;
859       goto finished;
860     }
861 
862   /* In this state, t must currently be a data descriptor.  Deal with
863      things that can/must follow the descriptor */
864  data_desc:
865 
866   switch (t)
867     {
868     case FMT_L:
869       *seen_dd = true;
870       t = format_lex (fmt);
871       if (t != FMT_POSINT)
872 	{
873 	  if (t == FMT_ZERO)
874 	    {
875 	      if (notification_std(GFC_STD_GNU) == NOTIFICATION_ERROR)
876 		{
877 		  fmt->error = "Extension: Zero width after L descriptor";
878 		  goto finished;
879 		}
880 	      else
881 		notify_std (&dtp->common, GFC_STD_GNU,
882 			    "Zero width after L descriptor");
883 	    }
884 	  else
885 	    {
886 	      fmt->saved_token = t;
887 	      notify_std (&dtp->common, GFC_STD_GNU,
888 			  "Positive width required with L descriptor");
889 	    }
890 	  fmt->value = 1;	/* Default width */
891 	}
892       get_fnode (fmt, &head, &tail, FMT_L);
893       tail->u.n = fmt->value;
894       tail->repeat = repeat;
895       break;
896 
897     case FMT_A:
898       *seen_dd = true;
899       t = format_lex (fmt);
900       if (t == FMT_ZERO)
901 	{
902 	  fmt->error = zero_width;
903 	  goto finished;
904 	}
905 
906       if (t != FMT_POSINT)
907 	{
908 	  fmt->saved_token = t;
909 	  fmt->value = -1;		/* Width not present */
910 	}
911 
912       get_fnode (fmt, &head, &tail, FMT_A);
913       tail->repeat = repeat;
914       tail->u.n = fmt->value;
915       break;
916 
917     case FMT_D:
918     case FMT_E:
919     case FMT_F:
920     case FMT_G:
921     case FMT_EN:
922     case FMT_ES:
923       *seen_dd = true;
924       get_fnode (fmt, &head, &tail, t);
925       tail->repeat = repeat;
926 
927       u = format_lex (fmt);
928 
929       /* Processing for zero width formats.  */
930       if (u == FMT_ZERO)
931 	{
932           if (t == FMT_F)
933 	    standard = GFC_STD_F95;
934 	  else if (t == FMT_G)
935 	    standard = GFC_STD_F2008;
936 	  else
937 	    standard = GFC_STD_F2018;
938 
939 	  if (notification_std (standard) == NOTIFICATION_ERROR
940 	      || dtp->u.p.mode == READING)
941 	    {
942 	      fmt->error = zero_width;
943 	      goto finished;
944 	    }
945 	  tail->u.real.w = 0;
946 
947 	  /* Look for the dot seperator.  */
948 	  u = format_lex (fmt);
949 	  if (u != FMT_PERIOD)
950 	    {
951 	      fmt->saved_token = u;
952 	      break;
953 	    }
954 
955 	  /* Look for the precision.  */
956 	  u = format_lex (fmt);
957 	  if (u != FMT_ZERO && u != FMT_POSINT)
958 	    {
959 	      fmt->error = nonneg_required;
960 	      goto finished;
961 	    }
962 	  tail->u.real.d = fmt->value;
963 
964 	  /* Look for optional exponent, not allowed for FMT_D */
965 	  if (t == FMT_D)
966 	    break;
967 	  u = format_lex (fmt);
968 	  if (u != FMT_E)
969 	    fmt->saved_token = u;
970 	  else
971 	    {
972 	      u = format_lex (fmt);
973 	      if (u != FMT_POSINT)
974 		{
975 		  if (u == FMT_ZERO)
976 		    {
977 		      notify_std (&dtp->common, GFC_STD_F2018,
978 				  "Positive exponent width required");
979 		    }
980 		  else
981 		    {
982 		      fmt->error = "Positive exponent width required in "
983 				   "format string at %L";
984 		      goto finished;
985 		    }
986 		}
987 	      tail->u.real.e = fmt->value;
988 	    }
989 	  break;
990 	}
991 
992       /* Processing for positive width formats.  */
993       if (u == FMT_POSINT)
994 	{
995 	  tail->u.real.w = fmt->value;
996 
997 	  /* Look for the dot separator. Because of legacy behaviors
998 	     we do some look ahead for missing things.  */
999 	  t2 = t;
1000 	  t = format_lex (fmt);
1001 	  if (t != FMT_PERIOD)
1002 	    {
1003 	      /* We treat a missing decimal descriptor as 0.  Note: This is only
1004 		 allowed if -std=legacy, otherwise an error occurs.  */
1005 	      if (compile_options.warn_std != 0)
1006 		{
1007 		  fmt->error = period_required;
1008 		  goto finished;
1009 		}
1010 	      fmt->saved_token = t;
1011 	      tail->u.real.d = 0;
1012 	      tail->u.real.e = -1;
1013 	      break;
1014 	    }
1015 
1016 	  /* If we made it here, we should have the dot so look for the
1017 	     precision.  */
1018 	  t = format_lex (fmt);
1019 	  if (t != FMT_ZERO && t != FMT_POSINT)
1020 	    {
1021 	      fmt->error = nonneg_required;
1022 	      goto finished;
1023 	    }
1024 	  tail->u.real.d = fmt->value;
1025 	  tail->u.real.e = -1;
1026 
1027 	  /* Done with D and F formats.  */
1028 	  if (t2 == FMT_D || t2 == FMT_F)
1029 	    {
1030 	      *seen_dd = true;
1031 	      break;
1032 	    }
1033 
1034 	  /* Look for optional exponent */
1035 	  u = format_lex (fmt);
1036 	  if (u != FMT_E)
1037 	    fmt->saved_token = u;
1038 	  else
1039 	    {
1040 	      u = format_lex (fmt);
1041 	      if (u != FMT_POSINT)
1042 		{
1043 		  if (u == FMT_ZERO)
1044 		    {
1045 		      notify_std (&dtp->common, GFC_STD_F2018,
1046 				  "Positive exponent width required");
1047 		    }
1048 		  else
1049 		    {
1050 		      fmt->error = "Positive exponent width required in "
1051 				   "format string at %L";
1052 		      goto finished;
1053 		    }
1054 		}
1055 	      tail->u.real.e = fmt->value;
1056 	    }
1057 	  break;
1058 	}
1059 
1060       /* Old DEC codes may not have width or precision specified.  */
1061       if (dtp->u.p.mode == WRITING && (dtp->common.flags & IOPARM_DT_DEC_EXT))
1062 	{
1063 	  tail->u.real.w = DEFAULT_WIDTH;
1064 	  tail->u.real.d = 0;
1065 	  tail->u.real.e = -1;
1066 	  fmt->saved_token = u;
1067 	}
1068       break;
1069 
1070     case FMT_DT:
1071       *seen_dd = true;
1072       get_fnode (fmt, &head, &tail, t);
1073       tail->repeat = repeat;
1074 
1075       t = format_lex (fmt);
1076 
1077       /* Initialize the vlist to a zero size, rank-one array.  */
1078       tail->u.udf.vlist= xmalloc (sizeof(gfc_array_i4)
1079 				  + sizeof (descriptor_dimension));
1080       GFC_DESCRIPTOR_DATA(tail->u.udf.vlist) = NULL;
1081       GFC_DIMENSION_SET(tail->u.udf.vlist->dim[0],1, 0, 0);
1082 
1083       if (t == FMT_STRING)
1084         {
1085 	  /* Get pointer to the optional format string.  */
1086 	  tail->u.udf.string = fmt->string;
1087 	  tail->u.udf.string_len = fmt->value;
1088 	  t = format_lex (fmt);
1089 	}
1090       if (t == FMT_LPAREN)
1091         {
1092 	  /* Temporary buffer to hold the vlist values.  */
1093 	  GFC_INTEGER_4 temp[FARRAY_SIZE];
1094 	  int i = 0;
1095 	loop:
1096 	  t = format_lex (fmt);
1097 	  if (t != FMT_POSINT)
1098 	    {
1099 	      fmt->error = posint_required;
1100 	      goto finished;
1101 	    }
1102 	  /* Save the positive integer value.  */
1103 	  temp[i++] = fmt->value;
1104 	  t = format_lex (fmt);
1105 	  if (t == FMT_COMMA)
1106 	    goto loop;
1107 	  if (t == FMT_RPAREN)
1108 	    {
1109 	      /* We have parsed the complete vlist so initialize the
1110 	         array descriptor and save it in the format node.  */
1111 	      gfc_full_array_i4 *vp = tail->u.udf.vlist;
1112 	      GFC_DESCRIPTOR_DATA(vp) = xmalloc (i * sizeof(GFC_INTEGER_4));
1113 	      GFC_DIMENSION_SET(vp->dim[0],1, i, 1);
1114 	      memcpy (GFC_DESCRIPTOR_DATA(vp), temp, i * sizeof(GFC_INTEGER_4));
1115 	      break;
1116 	    }
1117 	  fmt->error = unexpected_element;
1118 	  goto finished;
1119 	}
1120       fmt->saved_token = t;
1121       break;
1122     case FMT_H:
1123       if (repeat > fmt->format_string_len)
1124 	{
1125 	  fmt->error = bad_hollerith;
1126 	  goto finished;
1127 	}
1128 
1129       get_fnode (fmt, &head, &tail, FMT_STRING);
1130       tail->u.string.p = fmt->format_string;
1131       tail->u.string.length = repeat;
1132       tail->repeat = 1;
1133 
1134       fmt->format_string += fmt->value;
1135       fmt->format_string_len -= repeat;
1136 
1137       break;
1138 
1139     case FMT_I:
1140     case FMT_B:
1141     case FMT_O:
1142     case FMT_Z:
1143       *seen_dd = true;
1144       get_fnode (fmt, &head, &tail, t);
1145       tail->repeat = repeat;
1146 
1147       t = format_lex (fmt);
1148 
1149       if (dtp->u.p.mode == READING)
1150 	{
1151 	  if (t != FMT_POSINT)
1152 	    {
1153 	      if (dtp->common.flags & IOPARM_DT_DEC_EXT)
1154 		{
1155 		  tail->u.integer.w = DEFAULT_WIDTH;
1156 		  tail->u.integer.m = -1;
1157 		  fmt->saved_token = t;
1158 		  break;
1159 		}
1160 	      fmt->error = posint_required;
1161 	      goto finished;
1162 	    }
1163 	}
1164       else
1165 	{
1166 	  if (t != FMT_ZERO && t != FMT_POSINT)
1167 	    {
1168 	      if (dtp->common.flags & IOPARM_DT_DEC_EXT)
1169 		{
1170 		  tail->u.integer.w = DEFAULT_WIDTH;
1171 		  tail->u.integer.m = -1;
1172 		  fmt->saved_token = t;
1173 		  break;
1174 		}
1175 	      fmt->error = nonneg_required;
1176 	      goto finished;
1177 	    }
1178 	}
1179 
1180       tail->u.integer.w = fmt->value;
1181       tail->u.integer.m = -1;
1182 
1183       t = format_lex (fmt);
1184       if (t != FMT_PERIOD)
1185 	{
1186 	  fmt->saved_token = t;
1187 	}
1188       else
1189 	{
1190 	  t = format_lex (fmt);
1191 	  if (t != FMT_ZERO && t != FMT_POSINT)
1192 	    {
1193 	      fmt->error = nonneg_required;
1194 	      goto finished;
1195 	    }
1196 
1197 	  tail->u.integer.m = fmt->value;
1198 	}
1199 
1200       if (tail->u.integer.w != 0 && tail->u.integer.m > tail->u.integer.w)
1201 	{
1202 	  fmt->error = "Minimum digits exceeds field width";
1203 	  goto finished;
1204 	}
1205 
1206       break;
1207 
1208     default:
1209       fmt->error = unexpected_element;
1210       goto finished;
1211     }
1212 
1213   /* Between a descriptor and what comes next */
1214  between_desc:
1215   t = format_lex (fmt);
1216   switch (t)
1217     {
1218     case FMT_COMMA:
1219       goto format_item;
1220 
1221     case FMT_RPAREN:
1222       goto finished;
1223 
1224     case FMT_SLASH:
1225     case FMT_COLON:
1226       get_fnode (fmt, &head, &tail, t);
1227       tail->repeat = 1;
1228       goto optional_comma;
1229 
1230     case FMT_END:
1231       fmt->error = unexpected_end;
1232       goto finished;
1233 
1234     default:
1235       /* Assume a missing comma, this is a GNU extension */
1236       goto format_item_1;
1237     }
1238 
1239   /* Optional comma is a weird between state where we've just finished
1240      reading a colon, slash or P descriptor. */
1241  optional_comma:
1242   t = format_lex (fmt);
1243   switch (t)
1244     {
1245     case FMT_COMMA:
1246       break;
1247 
1248     case FMT_RPAREN:
1249       goto finished;
1250 
1251     default:			/* Assume that we have another format item */
1252       fmt->saved_token = t;
1253       break;
1254     }
1255 
1256   goto format_item;
1257 
1258  finished:
1259 
1260   return head;
1261 }
1262 
1263 
1264 /* format_error()-- Generate an error message for a format statement.
1265    If the node that gives the location of the error is NULL, the error
1266    is assumed to happen at parse time, and the current location of the
1267    parser is shown.
1268 
1269    We generate a message showing where the problem is.  We take extra
1270    care to print only the relevant part of the format if it is longer
1271    than a standard 80 column display. */
1272 
1273 void
format_error(st_parameter_dt * dtp,const fnode * f,const char * message)1274 format_error (st_parameter_dt *dtp, const fnode *f, const char *message)
1275 {
1276   int width, i, offset;
1277 #define BUFLEN 300
1278   char *p, buffer[BUFLEN];
1279   format_data *fmt = dtp->u.p.fmt;
1280 
1281   if (f != NULL)
1282     p = f->source;
1283   else                /* This should not happen.  */
1284     p = dtp->format;
1285 
1286   if (message == unexpected_element)
1287     snprintf (buffer, BUFLEN, message, fmt->error_element);
1288   else
1289     snprintf (buffer, BUFLEN, "%s\n", message);
1290 
1291   /* Get the offset into the format string where the error occurred.  */
1292   offset = dtp->format_len - (fmt->reversion_ok ?
1293 			      (int) strlen(p) : fmt->format_string_len);
1294 
1295   width = dtp->format_len;
1296 
1297   if (width > 80)
1298     width = 80;
1299 
1300   /* Show the format */
1301 
1302   p = strchr (buffer, '\0');
1303 
1304   if (dtp->format)
1305     memcpy (p, dtp->format, width);
1306 
1307   p += width;
1308   *p++ = '\n';
1309 
1310   /* Show where the problem is */
1311 
1312   for (i = 1; i < offset; i++)
1313     *p++ = ' ';
1314 
1315   *p++ = '^';
1316   *p = '\0';
1317 
1318   generate_error (&dtp->common, LIBERROR_FORMAT, buffer);
1319 }
1320 
1321 
1322 /* revert()-- Do reversion of the format.  Control reverts to the left
1323    parenthesis that matches the rightmost right parenthesis.  From our
1324    tree structure, we are looking for the rightmost parenthesis node
1325    at the second level, the first level always being a single
1326    parenthesis node.  If this node doesn't exit, we use the top
1327    level. */
1328 
1329 static void
revert(st_parameter_dt * dtp)1330 revert (st_parameter_dt *dtp)
1331 {
1332   fnode *f, *r;
1333   format_data *fmt = dtp->u.p.fmt;
1334 
1335   dtp->u.p.reversion_flag = 1;
1336 
1337   r = NULL;
1338 
1339   for (f = fmt->array.array[0].u.child; f; f = f->next)
1340     if (f->format == FMT_LPAREN)
1341       r = f;
1342 
1343   /* If r is NULL because no node was found, the whole tree will be used */
1344 
1345   fmt->array.array[0].current = r;
1346   fmt->array.array[0].count = 0;
1347 }
1348 
1349 /* parse_format()-- Parse a format string.  */
1350 
1351 void
parse_format(st_parameter_dt * dtp)1352 parse_format (st_parameter_dt *dtp)
1353 {
1354   format_data *fmt;
1355   bool format_cache_ok, seen_data_desc = false;
1356 
1357   /* Don't cache for internal units and set an arbitrary limit on the
1358      size of format strings we will cache.  (Avoids memory issues.)
1359      Also, the format_hash_table resides in the current_unit, so
1360      child_dtio procedures would overwrite the parent table  */
1361   format_cache_ok = !is_internal_unit (dtp)
1362 		    && (dtp->u.p.current_unit->child_dtio == 0);
1363 
1364   /* Lookup format string to see if it has already been parsed.  */
1365   if (format_cache_ok)
1366     {
1367       dtp->u.p.fmt = find_parsed_format (dtp);
1368 
1369       if (dtp->u.p.fmt != NULL)
1370 	{
1371 	  dtp->u.p.fmt->reversion_ok = 0;
1372 	  dtp->u.p.fmt->saved_token = FMT_NONE;
1373 	  dtp->u.p.fmt->saved_format = NULL;
1374 	  reset_fnode_counters (dtp);
1375 	  return;
1376 	}
1377     }
1378 
1379   /* Not found so proceed as follows.  */
1380 
1381   char *fmt_string = fc_strdup_notrim (dtp->format, dtp->format_len);
1382   dtp->format = fmt_string;
1383 
1384   dtp->u.p.fmt = fmt = xmalloc (sizeof (format_data));
1385   fmt->format_string = dtp->format;
1386   fmt->format_string_len = dtp->format_len;
1387 
1388   fmt->string = NULL;
1389   fmt->saved_token = FMT_NONE;
1390   fmt->error = NULL;
1391   fmt->value = 0;
1392 
1393   /* Initialize variables used during traversal of the tree.  */
1394 
1395   fmt->reversion_ok = 0;
1396   fmt->saved_format = NULL;
1397 
1398   /* Initialize the fnode_array.  */
1399 
1400   memset (&(fmt->array), 0, sizeof(fmt->array));
1401 
1402   /* Allocate the first format node as the root of the tree.  */
1403 
1404   fmt->last = &fmt->array;
1405   fmt->last->next = NULL;
1406   fmt->avail = &fmt->array.array[0];
1407 
1408   memset (fmt->avail, 0, sizeof (*fmt->avail));
1409   fmt->avail->format = FMT_LPAREN;
1410   fmt->avail->repeat = 1;
1411   fmt->avail++;
1412 
1413   if (format_lex (fmt) == FMT_LPAREN)
1414     fmt->array.array[0].u.child = parse_format_list (dtp, &seen_data_desc);
1415   else
1416     fmt->error = "Missing initial left parenthesis in format";
1417 
1418   if (format_cache_ok)
1419     save_parsed_format (dtp);
1420   else
1421     dtp->u.p.format_not_saved = 1;
1422 
1423   if (fmt->error)
1424     format_error (dtp, NULL, fmt->error);
1425 }
1426 
1427 
1428 /* next_format0()-- Get the next format node without worrying about
1429    reversion.  Returns NULL when we hit the end of the list.
1430    Parenthesis nodes are incremented after the list has been
1431    exhausted, other nodes are incremented before they are returned. */
1432 
1433 static const fnode *
next_format0(fnode * f)1434 next_format0 (fnode *f)
1435 {
1436   const fnode *r;
1437 
1438   if (f == NULL)
1439     return NULL;
1440 
1441   if (f->format != FMT_LPAREN)
1442     {
1443       f->count++;
1444       if (f->count <= f->repeat)
1445 	return f;
1446 
1447       f->count = 0;
1448       return NULL;
1449     }
1450 
1451   /* Deal with a parenthesis node with unlimited format.  */
1452 
1453   if (f->repeat == -2)  /* -2 signifies unlimited.  */
1454   for (;;)
1455     {
1456       if (f->current == NULL)
1457 	f->current = f->u.child;
1458 
1459       for (; f->current != NULL; f->current = f->current->next)
1460 	{
1461 	  r = next_format0 (f->current);
1462 	  if (r != NULL)
1463 	    return r;
1464 	}
1465     }
1466 
1467   /* Deal with a parenthesis node with specific repeat count.  */
1468   for (; f->count < f->repeat; f->count++)
1469     {
1470       if (f->current == NULL)
1471 	f->current = f->u.child;
1472 
1473       for (; f->current != NULL; f->current = f->current->next)
1474 	{
1475 	  r = next_format0 (f->current);
1476 	  if (r != NULL)
1477 	    return r;
1478 	}
1479     }
1480 
1481   f->count = 0;
1482   return NULL;
1483 }
1484 
1485 
1486 /* next_format()-- Return the next format node.  If the format list
1487    ends up being exhausted, we do reversion.  Reversion is only
1488    allowed if we've seen a data descriptor since the
1489    initialization or the last reversion.  We return NULL if there
1490    are no more data descriptors to return (which is an error
1491    condition).  */
1492 
1493 const fnode *
next_format(st_parameter_dt * dtp)1494 next_format (st_parameter_dt *dtp)
1495 {
1496   format_token t;
1497   const fnode *f;
1498   format_data *fmt = dtp->u.p.fmt;
1499 
1500   if (fmt->saved_format != NULL)
1501     {				/* Deal with a pushed-back format node */
1502       f = fmt->saved_format;
1503       fmt->saved_format = NULL;
1504       goto done;
1505     }
1506 
1507   f = next_format0 (&fmt->array.array[0]);
1508   if (f == NULL)
1509     {
1510       if (!fmt->reversion_ok)
1511 	return NULL;
1512 
1513       fmt->reversion_ok = 0;
1514       revert (dtp);
1515 
1516       f = next_format0 (&fmt->array.array[0]);
1517       if (f == NULL)
1518 	{
1519 	  format_error (dtp, NULL, reversion_error);
1520 	  return NULL;
1521 	}
1522 
1523       /* Push the first reverted token and return a colon node in case
1524 	 there are no more data items.  */
1525 
1526       fmt->saved_format = f;
1527       return &colon_node;
1528     }
1529 
1530   /* If this is a data edit descriptor, then reversion has become OK. */
1531  done:
1532   t = f->format;
1533 
1534   if (!fmt->reversion_ok &&
1535       (t == FMT_I || t == FMT_B || t == FMT_O || t == FMT_Z || t == FMT_F ||
1536        t == FMT_E || t == FMT_EN || t == FMT_ES || t == FMT_G || t == FMT_L ||
1537        t == FMT_A || t == FMT_D || t == FMT_DT))
1538     fmt->reversion_ok = 1;
1539   return f;
1540 }
1541 
1542 
1543 /* unget_format()-- Push the given format back so that it will be
1544    returned on the next call to next_format() without affecting
1545    counts.  This is necessary when we've encountered a data
1546    descriptor, but don't know what the data item is yet.  The format
1547    node is pushed back, and we return control to the main program,
1548    which calls the library back with the data item (or not). */
1549 
1550 void
unget_format(st_parameter_dt * dtp,const fnode * f)1551 unget_format (st_parameter_dt *dtp, const fnode *f)
1552 {
1553   dtp->u.p.fmt->saved_format = f;
1554 }
1555 
1556