xref: /llvm-project/flang/runtime/format-implementation.h (revision cd92c4255582299b9a55fa0dc485982b8f54c49a)
1 //===-- runtime/format-implementation.h -------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // Implements out-of-line member functions of template class FormatControl
10 
11 #ifndef FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_
12 #define FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_
13 
14 #include "emit-encoded.h"
15 #include "format.h"
16 #include "io-stmt.h"
17 #include "memory.h"
18 #include "flang/Common/format.h"
19 #include "flang/Decimal/decimal.h"
20 #include "flang/Runtime/main.h"
21 #include <algorithm>
22 #include <cstring>
23 #include <limits>
24 
25 namespace Fortran::runtime::io {
26 
27 template <typename CONTEXT>
28 RT_API_ATTRS FormatControl<CONTEXT>::FormatControl(const Terminator &terminator,
29     const CharType *format, std::size_t formatLength,
30     const Descriptor *formatDescriptor, int maxHeight)
31     : maxHeight_{static_cast<std::uint8_t>(maxHeight)}, format_{format},
32       formatLength_{static_cast<int>(formatLength)} {
33   RUNTIME_CHECK(terminator, maxHeight == maxHeight_);
34   if (!format && formatDescriptor) {
35     // The format is a character array passed via a descriptor.
36     std::size_t elements{formatDescriptor->Elements()};
37     std::size_t elementBytes{formatDescriptor->ElementBytes()};
38     formatLength = elements * elementBytes / sizeof(CharType);
39     formatLength_ = static_cast<int>(formatLength);
40     if (formatDescriptor->IsContiguous()) {
41       // Treat the contiguous array as a single character value.
42       format_ = const_cast<const CharType *>(
43           reinterpret_cast<CharType *>(formatDescriptor->raw().base_addr));
44     } else {
45       // Concatenate its elements into a temporary array.
46       char *p{reinterpret_cast<char *>(
47           AllocateMemoryOrCrash(terminator, formatLength * sizeof(CharType)))};
48       format_ = p;
49       SubscriptValue at[maxRank];
50       formatDescriptor->GetLowerBounds(at);
51       for (std::size_t j{0}; j < elements; ++j) {
52         std::memcpy(p, formatDescriptor->Element<char>(at), elementBytes);
53         p += elementBytes;
54         formatDescriptor->IncrementSubscripts(at);
55       }
56       freeFormat_ = true;
57     }
58   }
59   RUNTIME_CHECK(
60       terminator, formatLength == static_cast<std::size_t>(formatLength_));
61   stack_[0].start = offset_;
62   stack_[0].remaining = Iteration::unlimited; // 13.4(8)
63 }
64 
65 template <typename CONTEXT>
66 RT_API_ATTRS int FormatControl<CONTEXT>::GetIntField(
67     IoErrorHandler &handler, CharType firstCh, bool *hadError) {
68   CharType ch{firstCh ? firstCh : PeekNext()};
69   bool negate{ch == '-'};
70   if (negate || ch == '+') {
71     if (firstCh) {
72       firstCh = '\0';
73     } else {
74       ++offset_;
75     }
76     ch = PeekNext();
77   }
78   if (ch < '0' || ch > '9') {
79     handler.SignalError(IostatErrorInFormat,
80         "Invalid FORMAT: integer expected at '%c'", static_cast<char>(ch));
81     if (hadError) {
82       *hadError = true;
83     }
84     return 0;
85   }
86   int result{0};
87   while (ch >= '0' && ch <= '9') {
88     constexpr int tenth{std::numeric_limits<int>::max() / 10};
89     if (result > tenth ||
90         ch - '0' > std::numeric_limits<int>::max() - 10 * result) {
91       handler.SignalError(
92           IostatErrorInFormat, "FORMAT integer field out of range");
93       if (hadError) {
94         *hadError = true;
95       }
96       return result;
97     }
98     result = 10 * result + ch - '0';
99     if (firstCh) {
100       firstCh = '\0';
101     } else {
102       ++offset_;
103     }
104     ch = PeekNext();
105   }
106   if (negate && (result *= -1) > 0) {
107     handler.SignalError(
108         IostatErrorInFormat, "FORMAT integer field out of range");
109     if (hadError) {
110       *hadError = true;
111     }
112   }
113   return result;
114 }
115 
116 // Xn, TRn, TLn
117 template <typename CONTEXT>
118 static RT_API_ATTRS bool RelativeTabbing(CONTEXT &context, int n) {
119   ConnectionState &connection{context.GetConnectionState()};
120   if constexpr (std::is_same_v<CONTEXT,
121                     ExternalFormattedIoStatementState<Direction::Input>> ||
122       std::is_same_v<CONTEXT,
123           ExternalFormattedIoStatementState<Direction::Output>>) {
124     if (n != 0 && connection.isUTF8) {
125       const char *p{};
126       if (n > 0) { // Xn or TRn
127         // Skip 'n' multi-byte characters.  If that's more than are in the
128         // current record, that's valid -- the program can position past the
129         // end and then reposition back with Tn or TLn.
130         std::size_t bytesLeft{context.ViewBytesInRecord(p, true)};
131         for (; n > 0 && bytesLeft && p; --n) {
132           std::size_t byteCount{MeasureUTF8Bytes(*p)};
133           if (byteCount > bytesLeft) {
134             break;
135           }
136           context.HandleRelativePosition(byteCount);
137           bytesLeft -= byteCount;
138           // Don't call GotChar(byteCount), these don't count towards SIZE=
139           p += byteCount;
140         }
141       } else { // n < 0: TLn
142         n = -n;
143         if (std::int64_t excess{connection.positionInRecord -
144                 connection.recordLength.value_or(connection.positionInRecord)};
145             excess > 0) {
146           // Have tabbed past the end of the record
147           if (excess >= n) {
148             context.HandleRelativePosition(-n);
149             return true;
150           }
151           context.HandleRelativePosition(-excess);
152           n -= excess;
153         }
154         std::size_t bytesLeft{context.ViewBytesInRecord(p, false)};
155         // Go back 'n' multi-byte characters.
156         for (; n > 0 && bytesLeft && p; --n) {
157           std::size_t byteCount{MeasurePreviousUTF8Bytes(p, bytesLeft)};
158           context.HandleRelativePosition(-byteCount);
159           bytesLeft -= byteCount;
160           p -= byteCount;
161         }
162       }
163     }
164   }
165   if (connection.internalIoCharKind > 1) {
166     n *= connection.internalIoCharKind;
167   }
168   context.HandleRelativePosition(n);
169   return true;
170 }
171 
172 // Tn
173 template <typename CONTEXT>
174 static RT_API_ATTRS bool AbsoluteTabbing(CONTEXT &context, int n) {
175   ConnectionState &connection{context.GetConnectionState()};
176   n = n > 0 ? n - 1 : 0; // convert 1-based position to 0-based offset
177   if constexpr (std::is_same_v<CONTEXT,
178                     ExternalFormattedIoStatementState<Direction::Input>> ||
179       std::is_same_v<CONTEXT,
180           ExternalFormattedIoStatementState<Direction::Output>>) {
181     if (connection.isUTF8) {
182       // Reset to the beginning of the record, then TR(n-1)
183       connection.HandleAbsolutePosition(0);
184       return RelativeTabbing(context, n);
185     }
186   }
187   if (connection.internalIoCharKind > 1) {
188     n *= connection.internalIoCharKind;
189   }
190   context.HandleAbsolutePosition(n);
191   return true;
192 }
193 
194 template <typename CONTEXT>
195 static RT_API_ATTRS void HandleControl(
196     CONTEXT &context, char ch, char next, int n) {
197   MutableModes &modes{context.mutableModes()};
198   switch (ch) {
199   case 'B':
200     if (next == 'Z') {
201       modes.editingFlags |= blankZero;
202       return;
203     }
204     if (next == 'N') {
205       modes.editingFlags &= ~blankZero;
206       return;
207     }
208     break;
209   case 'D':
210     if (next == 'C') {
211       modes.editingFlags |= decimalComma;
212       return;
213     }
214     if (next == 'P') {
215       modes.editingFlags &= ~decimalComma;
216       return;
217     }
218     break;
219   case 'P':
220     if (!next) {
221       modes.scale = n; // kP - decimal scaling by 10**k
222       return;
223     }
224     break;
225   case 'R':
226     switch (next) {
227     case 'N':
228       modes.round = decimal::RoundNearest;
229       return;
230     case 'Z':
231       modes.round = decimal::RoundToZero;
232       return;
233     case 'U':
234       modes.round = decimal::RoundUp;
235       return;
236     case 'D':
237       modes.round = decimal::RoundDown;
238       return;
239     case 'C':
240       modes.round = decimal::RoundCompatible;
241       return;
242     case 'P':
243       modes.round = executionEnvironment.defaultOutputRoundingMode;
244       return;
245     default:
246       break;
247     }
248     break;
249   case 'X':
250     if (!next && RelativeTabbing(context, n)) {
251       return;
252     }
253     break;
254   case 'S':
255     if (next == 'P') {
256       modes.editingFlags |= signPlus;
257       return;
258     }
259     if (!next || next == 'S') {
260       modes.editingFlags &= ~signPlus;
261       return;
262     }
263     break;
264   case 'T': {
265     if (!next) { // Tn
266       if (AbsoluteTabbing(context, n)) {
267         return;
268       }
269     } else if (next == 'R' || next == 'L') { // TRn / TLn
270       if (RelativeTabbing(context, next == 'L' ? -n : n)) {
271         return;
272       }
273     }
274   } break;
275   default:
276     break;
277   }
278   if (next) {
279     context.SignalError(IostatErrorInFormat,
280         "Unknown '%c%c' edit descriptor in FORMAT", ch, next);
281   } else {
282     context.SignalError(
283         IostatErrorInFormat, "Unknown '%c' edit descriptor in FORMAT", ch);
284   }
285 }
286 
287 // Locates the next data edit descriptor in the format.
288 // Handles all repetition counts and control edit descriptors.
289 // Generally assumes that the format string has survived the common
290 // format validator gauntlet.
291 template <typename CONTEXT>
292 RT_API_ATTRS int FormatControl<CONTEXT>::CueUpNextDataEdit(
293     Context &context, bool stop) {
294   bool hitUnlimitedLoopEnd{false};
295   // Do repetitions remain on an unparenthesized data edit?
296   while (height_ > 1 && format_[stack_[height_ - 1].start] != '(') {
297     offset_ = stack_[height_ - 1].start;
298     int repeat{stack_[height_ - 1].remaining};
299     --height_;
300     if (repeat > 0) {
301       return repeat;
302     }
303   }
304   while (true) {
305     Fortran::common::optional<int> repeat;
306     bool unlimited{false};
307     auto maybeReversionPoint{offset_};
308     CharType ch{GetNextChar(context)};
309     while (ch == ',' || ch == ':') {
310       // Skip commas, and don't complain if they're missing; the format
311       // validator does that.
312       if (stop && ch == ':') {
313         return 0;
314       }
315       ch = GetNextChar(context);
316     }
317     if (ch == '-' || ch == '+' || (ch >= '0' && ch <= '9')) {
318       bool hadSign{ch == '-' || ch == '+'};
319       repeat = GetIntField(context, ch);
320       ch = GetNextChar(context);
321       if (hadSign && ch != 'p' && ch != 'P') {
322         ReportBadFormat(context,
323             "Invalid FORMAT: signed integer may appear only before 'P",
324             maybeReversionPoint);
325         return 0;
326       }
327     } else if (ch == '*') {
328       unlimited = true;
329       ch = GetNextChar(context);
330       if (ch != '(') {
331         ReportBadFormat(context,
332             "Invalid FORMAT: '*' may appear only before '('",
333             maybeReversionPoint);
334         return 0;
335       }
336       if (height_ != 1) {
337         ReportBadFormat(context,
338             "Invalid FORMAT: '*' must be nested in exactly one set of "
339             "parentheses",
340             maybeReversionPoint);
341         return 0;
342       }
343     }
344     ch = Capitalize(ch);
345     if (ch == '(') {
346       if (height_ >= maxHeight_) {
347         ReportBadFormat(context,
348             "FORMAT stack overflow: too many nested parentheses",
349             maybeReversionPoint);
350         return 0;
351       }
352       stack_[height_].start = offset_ - 1; // the '('
353       RUNTIME_CHECK(context, format_[stack_[height_].start] == '(');
354       if (unlimited || height_ == 0) {
355         stack_[height_].remaining = Iteration::unlimited;
356       } else if (repeat) {
357         if (*repeat <= 0) {
358           *repeat = 1; // error recovery
359         }
360         stack_[height_].remaining = *repeat - 1;
361       } else {
362         stack_[height_].remaining = 0;
363       }
364       if (height_ == 1 && !hitEnd_) {
365         // Subtle point (F'2018 13.4 para 9): the last parenthesized group
366         // at height 1 becomes the restart point after control reaches the
367         // end of the format, including its repeat count.
368         stack_[0].start = maybeReversionPoint;
369       }
370       ++height_;
371     } else if (height_ == 0) {
372       ReportBadFormat(context, "FORMAT lacks initial '('", maybeReversionPoint);
373       return 0;
374     } else if (ch == ')') {
375       if (height_ == 1) {
376         hitEnd_ = true;
377         if (stop) {
378           return 0; // end of FORMAT and no data items remain
379         }
380         context.AdvanceRecord(); // implied / before rightmost )
381       }
382       auto restart{stack_[height_ - 1].start};
383       if (format_[restart] == '(') {
384         ++restart;
385       }
386       if (stack_[height_ - 1].remaining == Iteration::unlimited) {
387         if (height_ > 1 && GetNextChar(context) != ')') {
388           ReportBadFormat(context,
389               "Unlimited repetition in FORMAT may not be followed by more "
390               "items",
391               restart);
392           return 0;
393         }
394         if (hitUnlimitedLoopEnd) {
395           ReportBadFormat(context,
396               "Unlimited repetition in FORMAT lacks data edit descriptors",
397               restart);
398           return 0;
399         }
400         hitUnlimitedLoopEnd = true;
401         offset_ = restart;
402       } else if (stack_[height_ - 1].remaining-- > 0) {
403         offset_ = restart;
404       } else {
405         --height_;
406       }
407     } else if (ch == '\'' || ch == '"') {
408       // Quoted 'character literal'
409       CharType quote{ch};
410       auto start{offset_};
411       while (offset_ < formatLength_ && format_[offset_] != quote) {
412         ++offset_;
413       }
414       if (offset_ >= formatLength_) {
415         ReportBadFormat(context,
416             "FORMAT missing closing quote on character literal",
417             maybeReversionPoint);
418         return 0;
419       }
420       ++offset_;
421       std::size_t chars{
422           static_cast<std::size_t>(&format_[offset_] - &format_[start])};
423       if (offset_ < formatLength_ && format_[offset_] == quote) {
424         // subtle: handle doubled quote character in a literal by including
425         // the first in the output, then treating the second as the start
426         // of another character literal.
427       } else {
428         --chars;
429       }
430       EmitAscii(context, format_ + start, chars);
431     } else if (ch == 'H') {
432       // 9HHOLLERITH
433       if (!repeat || *repeat < 1 || offset_ + *repeat > formatLength_) {
434         ReportBadFormat(context, "Invalid width on Hollerith in FORMAT",
435             maybeReversionPoint);
436         return 0;
437       }
438       EmitAscii(context, format_ + offset_, static_cast<std::size_t>(*repeat));
439       offset_ += *repeat;
440     } else if (ch >= 'A' && ch <= 'Z') {
441       int start{offset_ - 1};
442       CharType next{'\0'};
443       if (ch != 'P') { // 1PE5.2 - comma not required (C1302)
444         CharType peek{Capitalize(PeekNext())};
445         if (peek >= 'A' && peek <= 'Z') {
446           if ((ch == 'A' && peek == 'T' /* anticipate F'202X AT editing */) ||
447               ch == 'B' || ch == 'D' || ch == 'E' || ch == 'R' || ch == 'S' ||
448               ch == 'T') {
449             // Assume a two-letter edit descriptor
450             next = peek;
451             ++offset_;
452           } else {
453             // extension: assume a comma between 'ch' and 'peek'
454           }
455         }
456       }
457       if ((!next &&
458               (ch == 'A' || ch == 'I' || ch == 'B' || ch == 'E' || ch == 'D' ||
459                   ch == 'O' || ch == 'Z' || ch == 'F' || ch == 'G' ||
460                   ch == 'L')) ||
461           (ch == 'E' && (next == 'N' || next == 'S' || next == 'X')) ||
462           (ch == 'D' && next == 'T')) {
463         // Data edit descriptor found
464         offset_ = start;
465         return repeat && *repeat > 0 ? *repeat : 1;
466       } else {
467         // Control edit descriptor
468         if (ch == 'T') { // Tn, TLn, TRn
469           repeat = GetIntField(context);
470         }
471         HandleControl(context, static_cast<char>(ch), static_cast<char>(next),
472             repeat ? *repeat : 1);
473       }
474     } else if (ch == '/') {
475       context.AdvanceRecord(repeat && *repeat > 0 ? *repeat : 1);
476     } else if (ch == '$' || ch == '\\') {
477       context.mutableModes().nonAdvancing = true;
478     } else if (ch == '\t' || ch == '\v') {
479       // Tabs (extension)
480       // TODO: any other raw characters?
481       EmitAscii(context, format_ + offset_ - 1, 1);
482     } else {
483       ReportBadFormat(
484           context, "Invalid character in FORMAT", maybeReversionPoint);
485       return 0;
486     }
487   }
488 }
489 
490 // Returns the next data edit descriptor
491 template <typename CONTEXT>
492 RT_API_ATTRS Fortran::common::optional<DataEdit>
493 FormatControl<CONTEXT>::GetNextDataEdit(Context &context, int maxRepeat) {
494   int repeat{CueUpNextDataEdit(context)};
495   auto start{offset_};
496   DataEdit edit;
497   edit.modes = context.mutableModes();
498   // Handle repeated nonparenthesized edit descriptors
499   edit.repeat = std::min(repeat, maxRepeat); // 0 if maxRepeat==0
500   if (repeat > maxRepeat) {
501     stack_[height_].start = start; // after repeat count
502     stack_[height_].remaining = repeat - edit.repeat;
503     ++height_;
504   }
505   edit.descriptor = static_cast<char>(Capitalize(GetNextChar(context)));
506   if (edit.descriptor == 'D' && Capitalize(PeekNext()) == 'T') {
507     // DT['iotype'][(v_list)] defined I/O
508     edit.descriptor = DataEdit::DefinedDerivedType;
509     ++offset_;
510     if (auto quote{static_cast<char>(PeekNext())};
511         quote == '\'' || quote == '"') {
512       // Capture the quoted 'iotype'
513       bool ok{false};
514       for (++offset_; offset_ < formatLength_;) {
515         auto ch{static_cast<char>(format_[offset_++])};
516         if (ch == quote &&
517             (offset_ == formatLength_ ||
518                 static_cast<char>(format_[offset_]) != quote)) {
519           ok = true;
520           break; // that was terminating quote
521         }
522         if (edit.ioTypeChars >= edit.maxIoTypeChars) {
523           ReportBadFormat(context, "Excessive DT'iotype' in FORMAT", start);
524           return Fortran::common::nullopt;
525         }
526         edit.ioType[edit.ioTypeChars++] = ch;
527         if (ch == quote) {
528           ++offset_;
529         }
530       }
531       if (!ok) {
532         ReportBadFormat(context, "Unclosed DT'iotype' in FORMAT", start);
533         return Fortran::common::nullopt;
534       }
535     }
536     if (PeekNext() == '(') {
537       // Capture the v_list arguments
538       bool ok{false};
539       for (++offset_; offset_ < formatLength_;) {
540         bool hadError{false};
541         int n{GetIntField(context, '\0', &hadError)};
542         if (hadError) {
543           ok = false;
544           break;
545         }
546         if (edit.vListEntries >= edit.maxVListEntries) {
547           ReportBadFormat(context, "Excessive DT(v_list) in FORMAT", start);
548           return Fortran::common::nullopt;
549         }
550         edit.vList[edit.vListEntries++] = n;
551         auto ch{static_cast<char>(GetNextChar(context))};
552         if (ch != ',') {
553           ok = ch == ')';
554           break;
555         }
556       }
557       if (!ok) {
558         ReportBadFormat(context, "Unclosed DT(v_list) in FORMAT", start);
559         return Fortran::common::nullopt;
560       }
561     }
562   } else { // not DT'iotype'
563     if (edit.descriptor == 'E') {
564       if (auto next{static_cast<char>(Capitalize(PeekNext()))};
565           next == 'N' || next == 'S' || next == 'X') {
566         edit.variation = next;
567         ++offset_;
568       }
569     }
570     // Width is optional for A[w] in the standard and optional
571     // for Lw in most compilers.
572     // Intel & (presumably, from bug report) Fujitsu allow
573     // a missing 'w' & 'd'/'m' for other edit descriptors -- but not
574     // 'd'/'m' with a missing 'w' -- and so interpret "(E)" as "(E0)".
575     if (CharType ch{PeekNext()}; (ch >= '0' && ch <= '9') || ch == '.') {
576       edit.width = GetIntField(context);
577       if constexpr (std::is_base_of_v<InputStatementState, CONTEXT>) {
578         if (edit.width.value_or(-1) == 0) {
579           ReportBadFormat(context, "Input field width is zero", start);
580         }
581       }
582       if (PeekNext() == '.') {
583         ++offset_;
584         edit.digits = GetIntField(context);
585         if (CharType ch{PeekNext()};
586             ch == 'e' || ch == 'E' || ch == 'd' || ch == 'D') {
587           ++offset_;
588           edit.expoDigits = GetIntField(context);
589         }
590       }
591     }
592   }
593   return edit;
594 }
595 
596 template <typename CONTEXT>
597 RT_API_ATTRS void FormatControl<CONTEXT>::Finish(Context &context) {
598   CueUpNextDataEdit(context, true /* stop at colon or end of FORMAT */);
599   if (freeFormat_) {
600     FreeMemory(const_cast<CharType *>(format_));
601   }
602 }
603 } // namespace Fortran::runtime::io
604 #endif // FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_
605