xref: /llvm-project/flang/runtime/edit-input.cpp (revision ac4202fe9daf3202f7d1e09e9a46cb962c072888)
1 //===-- runtime/edit-input.cpp --------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "edit-input.h"
10 #include "namelist.h"
11 #include "flang/Common/real.h"
12 #include "flang/Common/uint128.h"
13 #include <algorithm>
14 
15 namespace Fortran::runtime::io {
16 
17 static bool EditBOZInput(IoStatementState &io, const DataEdit &edit, void *n,
18     int base, int totalBitSize) {
19   std::optional<int> remaining;
20   std::optional<char32_t> next{io.PrepareInput(edit, remaining)};
21   common::UnsignedInt128 value{0};
22   for (; next; next = io.NextInField(remaining)) {
23     char32_t ch{*next};
24     if (ch == ' ' || ch == '\t') {
25       continue;
26     }
27     int digit{0};
28     if (ch >= '0' && ch <= '1') {
29       digit = ch - '0';
30     } else if (base >= 8 && ch >= '2' && ch <= '7') {
31       digit = ch - '0';
32     } else if (base >= 10 && ch >= '8' && ch <= '9') {
33       digit = ch - '0';
34     } else if (base == 16 && ch >= 'A' && ch <= 'Z') {
35       digit = ch + 10 - 'A';
36     } else if (base == 16 && ch >= 'a' && ch <= 'z') {
37       digit = ch + 10 - 'a';
38     } else {
39       io.GetIoErrorHandler().SignalError(
40           "Bad character '%lc' in B/O/Z input field", ch);
41       return false;
42     }
43     value *= base;
44     value += digit;
45   }
46   // TODO: check for overflow
47   std::memcpy(n, &value, totalBitSize >> 3);
48   return true;
49 }
50 
51 // Prepares input from a field, and consumes the sign, if any.
52 // Returns true if there's a '-' sign.
53 static bool ScanNumericPrefix(IoStatementState &io, const DataEdit &edit,
54     std::optional<char32_t> &next, std::optional<int> &remaining) {
55   next = io.PrepareInput(edit, remaining);
56   bool negative{false};
57   if (next) {
58     negative = *next == '-';
59     if (negative || *next == '+') {
60       io.GotChar();
61       io.SkipSpaces(remaining);
62       next = io.NextInField(remaining);
63     }
64   }
65   return negative;
66 }
67 
68 bool EditIntegerInput(
69     IoStatementState &io, const DataEdit &edit, void *n, int kind) {
70   RUNTIME_CHECK(io.GetIoErrorHandler(), kind >= 1 && !(kind & (kind - 1)));
71   switch (edit.descriptor) {
72   case DataEdit::ListDirected:
73     if (IsNamelistName(io)) {
74       return false;
75     }
76     break;
77   case 'G':
78   case 'I':
79     break;
80   case 'B':
81     return EditBOZInput(io, edit, n, 2, kind << 3);
82   case 'O':
83     return EditBOZInput(io, edit, n, 8, kind << 3);
84   case 'Z':
85     return EditBOZInput(io, edit, n, 16, kind << 3);
86   case 'A': // legacy extension
87     return EditDefaultCharacterInput(
88         io, edit, reinterpret_cast<char *>(n), kind);
89   default:
90     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
91         "Data edit descriptor '%c' may not be used with an INTEGER data item",
92         edit.descriptor);
93     return false;
94   }
95   std::optional<int> remaining;
96   std::optional<char32_t> next;
97   bool negate{ScanNumericPrefix(io, edit, next, remaining)};
98   common::UnsignedInt128 value;
99   bool any{false};
100   for (; next; next = io.NextInField(remaining)) {
101     char32_t ch{*next};
102     if (ch == ' ' || ch == '\t') {
103       if (edit.modes.editingFlags & blankZero) {
104         ch = '0'; // BZ mode - treat blank as if it were zero
105       } else {
106         continue;
107       }
108     }
109     int digit{0};
110     if (ch >= '0' && ch <= '9') {
111       digit = ch - '0';
112     } else {
113       io.GetIoErrorHandler().SignalError(
114           "Bad character '%lc' in INTEGER input field", ch);
115       return false;
116     }
117     value *= 10;
118     value += digit;
119     any = true;
120   }
121   if (any) {
122     if (negate) {
123       value = -value;
124     }
125     std::memcpy(n, &value, kind);
126   }
127   return any;
128 }
129 
130 // Parses a REAL input number from the input source as a normalized
131 // fraction into a supplied buffer -- there's an optional '-', a
132 // decimal point, and at least one digit.  The adjusted exponent value
133 // is returned in a reference argument.  The returned value is the number
134 // of characters that (should) have been written to the buffer -- this can
135 // be larger than the buffer size and can indicate overflow.  Replaces
136 // blanks with zeroes if appropriate.
137 static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io,
138     const DataEdit &edit, int &exponent) {
139   std::optional<int> remaining;
140   std::optional<char32_t> next;
141   int got{0};
142   std::optional<int> decimalPoint;
143   auto Put{[&](char ch) -> void {
144     if (got < bufferSize) {
145       buffer[got] = ch;
146     }
147     ++got;
148   }};
149   if (ScanNumericPrefix(io, edit, next, remaining)) {
150     Put('-');
151   }
152   if (next.value_or(' ') == ' ') { // empty/blank field means zero
153     remaining.reset();
154     Put('0');
155     return got;
156   }
157   char32_t decimal = edit.modes.editingFlags & decimalComma ? ',' : '.';
158   char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next};
159   if (first == 'N' || first == 'I') {
160     // NaN or infinity - convert to upper case
161     // Subtle: a blank field of digits could be followed by 'E' or 'D',
162     for (; next &&
163          ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z'));
164          next = io.NextInField(remaining)) {
165       if (*next >= 'a' && *next <= 'z') {
166         Put(*next - 'a' + 'A');
167       } else {
168         Put(*next);
169       }
170     }
171     if (next && *next == '(') { // NaN(...)
172       while (next && *next != ')') {
173         next = io.NextInField(remaining);
174       }
175     }
176     exponent = 0;
177   } else if (first == decimal || (first >= '0' && first <= '9') ||
178       first == 'E' || first == 'D' || first == 'Q') {
179     Put('.'); // input field is normalized to a fraction
180     auto start{got};
181     bool bzMode{(edit.modes.editingFlags & blankZero) != 0};
182     for (; next; next = io.NextInField(remaining)) {
183       char32_t ch{*next};
184       if (ch == ' ' || ch == '\t') {
185         if (bzMode) {
186           ch = '0'; // BZ mode - treat blank as if it were zero
187         } else {
188           continue;
189         }
190       }
191       if (ch == '0' && got == start && !decimalPoint) {
192         // omit leading zeroes before the decimal
193       } else if (ch >= '0' && ch <= '9') {
194         Put(ch);
195       } else if (ch == decimal && !decimalPoint) {
196         // the decimal point is *not* copied to the buffer
197         decimalPoint = got - start; // # of digits before the decimal point
198       } else {
199         break;
200       }
201     }
202     if (got == start) {
203       Put('0'); // emit at least one digit
204     }
205     if (next &&
206         (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' ||
207             *next == 'q' || *next == 'Q')) {
208       // Optional exponent letter.  Blanks are allowed between the
209       // optional exponent letter and the exponent value.
210       io.SkipSpaces(remaining);
211       next = io.NextInField(remaining);
212     }
213     // The default exponent is -kP, but the scale factor doesn't affect
214     // an explicit exponent.
215     exponent = -edit.modes.scale;
216     if (next &&
217         (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9') ||
218             (bzMode && (*next == ' ' || *next == '\t')))) {
219       bool negExpo{*next == '-'};
220       if (negExpo || *next == '+') {
221         next = io.NextInField(remaining);
222       }
223       for (exponent = 0; next; next = io.NextInField(remaining)) {
224         if (*next >= '0' && *next <= '9') {
225           exponent = 10 * exponent + *next - '0';
226         } else if (bzMode && (*next == ' ' || *next == '\t')) {
227           exponent = 10 * exponent;
228         } else {
229           break;
230         }
231       }
232       if (negExpo) {
233         exponent = -exponent;
234       }
235     }
236     if (decimalPoint) {
237       exponent += *decimalPoint;
238     } else {
239       // When no decimal point (or comma) appears in the value, the 'd'
240       // part of the edit descriptor must be interpreted as the number of
241       // digits in the value to be interpreted as being to the *right* of
242       // the assumed decimal point (13.7.2.3.2)
243       exponent += got - start - edit.digits.value_or(0);
244     }
245   } else {
246     // TODO: hex FP input
247     exponent = 0;
248     return 0;
249   }
250   // Consume the trailing ')' of a list-directed or NAMELIST complex
251   // input value.
252   if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
253     if (next && (*next == ' ' || *next == '\t')) {
254       next = io.NextInField(remaining);
255     }
256     if (!next) { // NextInField fails on separators like ')'
257       next = io.GetCurrentChar();
258       if (next && *next == ')') {
259         io.HandleRelativePosition(1);
260       }
261     }
262   } else if (remaining) {
263     while (next && (*next == ' ' || *next == '\t')) {
264       next = io.NextInField(remaining);
265     }
266     if (next) {
267       return 0; // error: unused nonblank character in fixed-width field
268     }
269   }
270   return got;
271 }
272 
273 // If no special modes are in effect and the form of the input value
274 // that's present in the input stream is acceptable to the decimal->binary
275 // converter without modification, this fast path for real input
276 // saves time by avoiding memory copies and reformatting of the exponent.
277 template <int PRECISION>
278 static bool TryFastPathRealInput(
279     IoStatementState &io, const DataEdit &edit, void *n) {
280   if (edit.modes.editingFlags & (blankZero | decimalComma)) {
281     return false;
282   }
283   if (edit.modes.scale != 0) {
284     return false;
285   }
286   const char *str{nullptr};
287   std::size_t got{io.GetNextInputBytes(str)};
288   if (got == 0 || str == nullptr ||
289       !io.GetConnectionState().recordLength.has_value()) {
290     return false; // could not access reliably-terminated input stream
291   }
292   const char *p{str};
293   std::int64_t maxConsume{
294       std::min<std::int64_t>(got, edit.width.value_or(got))};
295   const char *limit{str + maxConsume};
296   decimal::ConversionToBinaryResult<PRECISION> converted{
297       decimal::ConvertToBinary<PRECISION>(p, edit.modes.round, limit)};
298   if (converted.flags & decimal::Invalid) {
299     return false;
300   }
301   if (edit.digits.value_or(0) != 0 &&
302       std::memchr(str, '.', p - str) == nullptr) {
303     // No explicit decimal point, and edit descriptor is Fw.d (or other)
304     // with d != 0, which implies scaling.
305     return false;
306   }
307   for (; p < limit && (*p == ' ' || *p == '\t'); ++p) {
308   }
309   if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
310     // Need to consume a trailing ')' and any white space after
311     if (p >= limit || *p != ')') {
312       return false;
313     }
314     for (++p; p < limit && (*p == ' ' || *p == '\t'); ++p) {
315     }
316   }
317   if (edit.width && p < str + *edit.width) {
318     return false; // unconverted characters remain in fixed width field
319   }
320   // Success on the fast path!
321   // TODO: raise converted.flags as exceptions?
322   *reinterpret_cast<decimal::BinaryFloatingPointNumber<PRECISION> *>(n) =
323       converted.binary;
324   io.HandleRelativePosition(p - str);
325   return true;
326 }
327 
328 template <int KIND>
329 bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
330   constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
331   if (TryFastPathRealInput<binaryPrecision>(io, edit, n)) {
332     return true;
333   }
334   // Fast path wasn't available or didn't work; go the more general route
335   static constexpr int maxDigits{
336       common::MaxDecimalConversionDigits(binaryPrecision)};
337   static constexpr int bufferSize{maxDigits + 18};
338   char buffer[bufferSize];
339   int exponent{0};
340   int got{ScanRealInput(buffer, maxDigits + 2, io, edit, exponent)};
341   if (got >= maxDigits + 2) {
342     io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small");
343     return false;
344   }
345   if (got == 0) {
346     io.GetIoErrorHandler().SignalError("Bad REAL input value");
347     return false;
348   }
349   bool hadExtra{got > maxDigits};
350   if (exponent != 0) {
351     buffer[got++] = 'e';
352     if (exponent < 0) {
353       buffer[got++] = '-';
354       exponent = -exponent;
355     }
356     if (exponent > 9999) {
357       exponent = 9999; // will convert to +/-Inf
358     }
359     if (exponent > 999) {
360       int dig{exponent / 1000};
361       buffer[got++] = '0' + dig;
362       int rest{exponent - 1000 * dig};
363       dig = rest / 100;
364       buffer[got++] = '0' + dig;
365       rest -= 100 * dig;
366       dig = rest / 10;
367       buffer[got++] = '0' + dig;
368       buffer[got++] = '0' + (rest - 10 * dig);
369     } else if (exponent > 99) {
370       int dig{exponent / 100};
371       buffer[got++] = '0' + dig;
372       int rest{exponent - 100 * dig};
373       dig = rest / 10;
374       buffer[got++] = '0' + dig;
375       buffer[got++] = '0' + (rest - 10 * dig);
376     } else if (exponent > 9) {
377       int dig{exponent / 10};
378       buffer[got++] = '0' + dig;
379       buffer[got++] = '0' + (exponent - 10 * dig);
380     } else {
381       buffer[got++] = '0' + exponent;
382     }
383   }
384   buffer[got] = '\0';
385   const char *p{buffer};
386   decimal::ConversionToBinaryResult<binaryPrecision> converted{
387       decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round)};
388   if (hadExtra) {
389     converted.flags = static_cast<enum decimal::ConversionResultFlags>(
390         converted.flags | decimal::Inexact);
391   }
392   // TODO: raise converted.flags as exceptions?
393   *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) =
394       converted.binary;
395   return true;
396 }
397 
398 template <int KIND>
399 bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
400   constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
401   switch (edit.descriptor) {
402   case DataEdit::ListDirected:
403     if (IsNamelistName(io)) {
404       return false;
405     }
406     return EditCommonRealInput<KIND>(io, edit, n);
407   case DataEdit::ListDirectedRealPart:
408   case DataEdit::ListDirectedImaginaryPart:
409   case 'F':
410   case 'E': // incl. EN, ES, & EX
411   case 'D':
412   case 'G':
413     return EditCommonRealInput<KIND>(io, edit, n);
414   case 'B':
415     return EditBOZInput(
416         io, edit, n, 2, common::BitsForBinaryPrecision(binaryPrecision));
417   case 'O':
418     return EditBOZInput(
419         io, edit, n, 8, common::BitsForBinaryPrecision(binaryPrecision));
420   case 'Z':
421     return EditBOZInput(
422         io, edit, n, 16, common::BitsForBinaryPrecision(binaryPrecision));
423   case 'A': // legacy extension
424     return EditDefaultCharacterInput(
425         io, edit, reinterpret_cast<char *>(n), KIND);
426   default:
427     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
428         "Data edit descriptor '%c' may not be used for REAL input",
429         edit.descriptor);
430     return false;
431   }
432 }
433 
434 // 13.7.3 in Fortran 2018
435 bool EditLogicalInput(IoStatementState &io, const DataEdit &edit, bool &x) {
436   switch (edit.descriptor) {
437   case DataEdit::ListDirected:
438     if (IsNamelistName(io)) {
439       return false;
440     }
441     break;
442   case 'L':
443   case 'G':
444     break;
445   default:
446     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
447         "Data edit descriptor '%c' may not be used for LOGICAL input",
448         edit.descriptor);
449     return false;
450   }
451   std::optional<int> remaining;
452   std::optional<char32_t> next{io.PrepareInput(edit, remaining)};
453   if (next && *next == '.') { // skip optional period
454     next = io.NextInField(remaining);
455   }
456   if (!next) {
457     io.GetIoErrorHandler().SignalError("Empty LOGICAL input field");
458     return false;
459   }
460   switch (*next) {
461   case 'T':
462   case 't':
463     x = true;
464     break;
465   case 'F':
466   case 'f':
467     x = false;
468     break;
469   default:
470     io.GetIoErrorHandler().SignalError(
471         "Bad character '%lc' in LOGICAL input field", *next);
472     return false;
473   }
474   if (remaining) { // ignore the rest of the field
475     io.HandleRelativePosition(*remaining);
476   } else if (edit.descriptor == DataEdit::ListDirected) {
477     while (io.NextInField(remaining)) { // discard rest of field
478     }
479   }
480   return true;
481 }
482 
483 // See 13.10.3.1 paragraphs 7-9 in Fortran 2018
484 static bool EditDelimitedCharacterInput(
485     IoStatementState &io, char *x, std::size_t length, char32_t delimiter) {
486   bool result{true};
487   while (true) {
488     auto ch{io.GetCurrentChar()};
489     if (!ch) {
490       if (io.AdvanceRecord()) {
491         continue;
492       } else {
493         result = false; // EOF in character value
494         break;
495       }
496     }
497     io.HandleRelativePosition(1);
498     if (*ch == delimiter) {
499       auto next{io.GetCurrentChar()};
500       if (next && *next == delimiter) {
501         // Repeated delimiter: use as character value
502         io.HandleRelativePosition(1);
503       } else {
504         break; // closing delimiter
505       }
506     }
507     if (length > 0) {
508       *x++ = *ch;
509       --length;
510     }
511   }
512   std::fill_n(x, length, ' ');
513   return result;
514 }
515 
516 static bool EditListDirectedDefaultCharacterInput(
517     IoStatementState &io, char *x, std::size_t length) {
518   auto ch{io.GetCurrentChar()};
519   if (ch && (*ch == '\'' || *ch == '"')) {
520     io.HandleRelativePosition(1);
521     return EditDelimitedCharacterInput(io, x, length, *ch);
522   }
523   if (IsNamelistName(io)) {
524     return false;
525   }
526   // Undelimited list-directed character input: stop at a value separator
527   // or the end of the current record.
528   std::optional<int> remaining{length};
529   for (std::optional<char32_t> next{io.NextInField(remaining)}; next;
530        next = io.NextInField(remaining)) {
531     switch (*next) {
532     case ' ':
533     case '\t':
534     case ',':
535     case ';':
536     case '/':
537       remaining = 0; // value separator: stop
538       break;
539     default:
540       *x++ = *next;
541       --length;
542     }
543   }
544   std::fill_n(x, length, ' ');
545   return true;
546 }
547 
548 bool EditDefaultCharacterInput(
549     IoStatementState &io, const DataEdit &edit, char *x, std::size_t length) {
550   switch (edit.descriptor) {
551   case DataEdit::ListDirected:
552     return EditListDirectedDefaultCharacterInput(io, x, length);
553   case 'A':
554   case 'G':
555     break;
556   default:
557     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
558         "Data edit descriptor '%c' may not be used with a CHARACTER data item",
559         edit.descriptor);
560     return false;
561   }
562   std::optional<int> remaining{length};
563   if (edit.width && *edit.width > 0) {
564     remaining = *edit.width;
565   }
566   // When the field is wider than the variable, we drop the leading
567   // characters.  When the variable is wider than the field, there's
568   // trailing padding.
569   std::int64_t skip{*remaining - static_cast<std::int64_t>(length)};
570   for (std::optional<char32_t> next{io.NextInField(remaining)}; next;
571        next = io.NextInField(remaining)) {
572     if (skip > 0) {
573       --skip;
574       io.GotChar(-1);
575     } else {
576       *x++ = *next;
577       --length;
578     }
579   }
580   std::fill_n(x, length, ' ');
581   return true;
582 }
583 
584 template bool EditRealInput<2>(IoStatementState &, const DataEdit &, void *);
585 template bool EditRealInput<3>(IoStatementState &, const DataEdit &, void *);
586 template bool EditRealInput<4>(IoStatementState &, const DataEdit &, void *);
587 template bool EditRealInput<8>(IoStatementState &, const DataEdit &, void *);
588 template bool EditRealInput<10>(IoStatementState &, const DataEdit &, void *);
589 // TODO: double/double
590 template bool EditRealInput<16>(IoStatementState &, const DataEdit &, void *);
591 } // namespace Fortran::runtime::io
592