xref: /llvm-project/flang/runtime/edit-input.cpp (revision fdccfa33d96b1935e90a9148a661f51ea8b46aa3)
1 //===-- runtime/edit-input.cpp --------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "edit-input.h"
10 #include "namelist.h"
11 #include "utf.h"
12 #include "flang/Common/real.h"
13 #include "flang/Common/uint128.h"
14 #include <algorithm>
15 #include <cfenv>
16 
17 namespace Fortran::runtime::io {
18 
19 // Checks that a list-directed input value has been entirely consumed and
20 // doesn't contain unparsed characters before the next value separator.
21 static inline bool IsCharValueSeparator(const DataEdit &edit, char32_t ch) {
22   char32_t comma{
23       edit.modes.editingFlags & decimalComma ? char32_t{';'} : char32_t{','}};
24   return ch == ' ' || ch == '\t' || ch == '/' || ch == comma;
25 }
26 
27 static bool CheckCompleteListDirectedField(
28     IoStatementState &io, const DataEdit &edit) {
29   if (edit.IsListDirected()) {
30     std::size_t byteCount;
31     if (auto ch{io.GetCurrentChar(byteCount)}) {
32       if (IsCharValueSeparator(edit, *ch)) {
33         return true;
34       } else {
35         const auto &connection{io.GetConnectionState()};
36         io.GetIoErrorHandler().SignalError(IostatBadListDirectedInputSeparator,
37             "invalid character (0x%x) after list-directed input value, "
38             "at column %d in record %d",
39             static_cast<unsigned>(*ch),
40             static_cast<int>(connection.positionInRecord + 1),
41             static_cast<int>(connection.currentRecordNumber));
42         return false;
43       }
44     } else {
45       return true; // end of record: ok
46     }
47   } else {
48     return true;
49   }
50 }
51 
52 template <int LOG2_BASE>
53 static bool EditBOZInput(
54     IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) {
55   // Skip leading white space & zeroes
56   std::optional<int> remaining{io.CueUpInput(edit)};
57   auto start{io.GetConnectionState().positionInRecord};
58   std::optional<char32_t> next{io.NextInField(remaining, edit)};
59   if (next.value_or('?') == '0') {
60     do {
61       start = io.GetConnectionState().positionInRecord;
62       next = io.NextInField(remaining, edit);
63     } while (next && *next == '0');
64   }
65   // Count significant digits after any leading white space & zeroes
66   int digits{0};
67   int significantBits{0};
68   for (; next; next = io.NextInField(remaining, edit)) {
69     char32_t ch{*next};
70     if (ch == ' ' || ch == '\t') {
71       if (edit.modes.editingFlags & blankZero) {
72         ch = '0'; // BZ mode - treat blank as if it were zero
73       } else {
74         continue;
75       }
76     }
77     if (ch >= '0' && ch <= '1') {
78     } else if (LOG2_BASE >= 3 && ch >= '2' && ch <= '7') {
79     } else if (LOG2_BASE >= 4 && ch >= '8' && ch <= '9') {
80     } else if (LOG2_BASE >= 4 && ch >= 'A' && ch <= 'F') {
81     } else if (LOG2_BASE >= 4 && ch >= 'a' && ch <= 'f') {
82     } else {
83       io.GetIoErrorHandler().SignalError(
84           "Bad character '%lc' in B/O/Z input field", ch);
85       return false;
86     }
87     if (digits++ == 0) {
88       significantBits = 4;
89       if (ch >= '0' && ch <= '1') {
90         significantBits = 1;
91       } else if (ch >= '2' && ch <= '3') {
92         significantBits = 2;
93       } else if (ch >= '4' && ch <= '7') {
94         significantBits = 3;
95       } else {
96         significantBits = 4;
97       }
98     } else {
99       significantBits += LOG2_BASE;
100     }
101   }
102   auto significantBytes{static_cast<std::size_t>(significantBits + 7) / 8};
103   if (significantBytes > bytes) {
104     io.GetIoErrorHandler().SignalError(IostatBOZInputOverflow,
105         "B/O/Z input of %d digits overflows %zd-byte variable", digits, bytes);
106     return false;
107   }
108   // Reset to start of significant digits
109   io.HandleAbsolutePosition(start);
110   remaining.reset();
111   // Make a second pass now that the digit count is known
112   std::memset(n, 0, bytes);
113   int increment{isHostLittleEndian ? -1 : 1};
114   auto *data{reinterpret_cast<unsigned char *>(n) +
115       (isHostLittleEndian ? significantBytes - 1 : 0)};
116   int shift{((digits - 1) * LOG2_BASE) & 7};
117   while (digits > 0) {
118     char32_t ch{*io.NextInField(remaining, edit)};
119     int digit{0};
120     if (ch == ' ' || ch == '\t') {
121       if (edit.modes.editingFlags & blankZero) {
122         ch = '0'; // BZ mode - treat blank as if it were zero
123       } else {
124         continue;
125       }
126     }
127     --digits;
128     if (ch >= '0' && ch <= '9') {
129       digit = ch - '0';
130     } else if (ch >= 'A' && ch <= 'F') {
131       digit = ch + 10 - 'A';
132     } else if (ch >= 'a' && ch <= 'f') {
133       digit = ch + 10 - 'a';
134     } else {
135       continue;
136     }
137     if (shift < 0) {
138       if (shift + LOG2_BASE > 0) { // misaligned octal
139         *data |= digit >> -shift;
140       }
141       shift += 8;
142       data += increment;
143     }
144     *data |= digit << shift;
145     shift -= LOG2_BASE;
146   }
147   return CheckCompleteListDirectedField(io, edit);
148 }
149 
150 static inline char32_t GetRadixPointChar(const DataEdit &edit) {
151   return edit.modes.editingFlags & decimalComma ? char32_t{','} : char32_t{'.'};
152 }
153 
154 // Prepares input from a field, and returns the sign, if any, else '\0'.
155 static char ScanNumericPrefix(IoStatementState &io, const DataEdit &edit,
156     std::optional<char32_t> &next, std::optional<int> &remaining) {
157   remaining = io.CueUpInput(edit);
158   next = io.NextInField(remaining, edit);
159   char sign{'\0'};
160   if (next) {
161     if (*next == '-' || *next == '+') {
162       sign = *next;
163       if (!edit.IsListDirected()) {
164         io.SkipSpaces(remaining);
165       }
166       next = io.NextInField(remaining, edit);
167     }
168   }
169   return sign;
170 }
171 
172 bool EditIntegerInput(
173     IoStatementState &io, const DataEdit &edit, void *n, int kind) {
174   RUNTIME_CHECK(io.GetIoErrorHandler(), kind >= 1 && !(kind & (kind - 1)));
175   switch (edit.descriptor) {
176   case DataEdit::ListDirected:
177     if (IsNamelistNameOrSlash(io)) {
178       return false;
179     }
180     break;
181   case 'G':
182   case 'I':
183     break;
184   case 'B':
185     return EditBOZInput<1>(io, edit, n, kind);
186   case 'O':
187     return EditBOZInput<3>(io, edit, n, kind);
188   case 'Z':
189     return EditBOZInput<4>(io, edit, n, kind);
190   case 'A': // legacy extension
191     return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), kind);
192   default:
193     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
194         "Data edit descriptor '%c' may not be used with an INTEGER data item",
195         edit.descriptor);
196     return false;
197   }
198   std::optional<int> remaining;
199   std::optional<char32_t> next;
200   char sign{ScanNumericPrefix(io, edit, next, remaining)};
201   common::UnsignedInt128 value{0};
202   bool any{!!sign};
203   bool overflow{false};
204   for (; next; next = io.NextInField(remaining, edit)) {
205     char32_t ch{*next};
206     if (ch == ' ' || ch == '\t') {
207       if (edit.modes.editingFlags & blankZero) {
208         ch = '0'; // BZ mode - treat blank as if it were zero
209       } else {
210         continue;
211       }
212     }
213     int digit{0};
214     if (ch >= '0' && ch <= '9') {
215       digit = ch - '0';
216     } else {
217       io.GetIoErrorHandler().SignalError(
218           "Bad character '%lc' in INTEGER input field", ch);
219       return false;
220     }
221     static constexpr auto maxu128{~common::UnsignedInt128{0}};
222     static constexpr auto maxu128OverTen{maxu128 / 10};
223     static constexpr int maxLastDigit{
224         static_cast<int>(maxu128 - (maxu128OverTen * 10))};
225     overflow |= value >= maxu128OverTen &&
226         (value > maxu128OverTen || digit > maxLastDigit);
227     value *= 10;
228     value += digit;
229     any = true;
230   }
231   if (!any && !remaining) {
232     io.GetIoErrorHandler().SignalError(
233         "Integer value absent from NAMELIST or list-directed input");
234     return false;
235   }
236   auto maxForKind{common::UnsignedInt128{1} << ((8 * kind) - 1)};
237   overflow |= value >= maxForKind && (value > maxForKind || sign != '-');
238   if (overflow) {
239     io.GetIoErrorHandler().SignalError(IostatIntegerInputOverflow,
240         "Decimal input overflows INTEGER(%d) variable", kind);
241     return false;
242   }
243   if (sign == '-') {
244     value = -value;
245   }
246   if (any || !io.GetConnectionState().IsAtEOF()) {
247     // The value is stored in the lower order bits on big endian platform.
248     // When memcpy, shift the value to the higher order bit.
249     auto shft{static_cast<int>(sizeof(value.low())) - kind};
250     // For kind==8 (i.e. shft==0), the value is stored in low_ in big endian.
251     if (!isHostLittleEndian && shft >= 0) {
252       auto l{value.low() << (8 * shft)};
253       std::memcpy(n, &l, kind);
254     } else {
255       std::memcpy(n, &value, kind); // a blank field means zero
256     }
257   }
258   return any;
259 }
260 
261 // Parses a REAL input number from the input source as a normalized
262 // fraction into a supplied buffer -- there's an optional '-', a
263 // decimal point when the input is not hexadecimal, and at least one
264 // digit.  Replaces blanks with zeroes where appropriate.
265 struct ScannedRealInput {
266   // Number of characters that (should) have been written to the
267   // buffer -- this can be larger than the buffer size, which
268   // indicates buffer overflow.  Zero indicates an error.
269   int got{0};
270   int exponent{0}; // adjusted as necessary; binary if isHexadecimal
271   bool isHexadecimal{false}; // 0X...
272 };
273 static ScannedRealInput ScanRealInput(
274     char *buffer, int bufferSize, IoStatementState &io, const DataEdit &edit) {
275   std::optional<int> remaining;
276   std::optional<char32_t> next;
277   int got{0};
278   std::optional<int> radixPointOffset;
279   auto Put{[&](char ch) -> void {
280     if (got < bufferSize) {
281       buffer[got] = ch;
282     }
283     ++got;
284   }};
285   char sign{ScanNumericPrefix(io, edit, next, remaining)};
286   if (sign == '-') {
287     Put('-');
288   }
289   bool bzMode{(edit.modes.editingFlags & blankZero) != 0};
290   int exponent{0};
291   if (!next || (!bzMode && *next == ' ')) {
292     if (!edit.IsListDirected() && !io.GetConnectionState().IsAtEOF()) {
293       // An empty/blank field means zero when not list-directed.
294       // A fixed-width field containing only a sign is also zero;
295       // this behavior isn't standard-conforming in F'2023 but it is
296       // required to pass FCVS.
297       Put('0');
298     }
299     return {got, exponent, false};
300   }
301   char32_t radixPointChar{GetRadixPointChar(edit)};
302   char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next};
303   bool isHexadecimal{false};
304   if (first == 'N' || first == 'I') {
305     // NaN or infinity - convert to upper case
306     // Subtle: a blank field of digits could be followed by 'E' or 'D',
307     for (; next &&
308          ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z'));
309          next = io.NextInField(remaining, edit)) {
310       if (*next >= 'a' && *next <= 'z') {
311         Put(*next - 'a' + 'A');
312       } else {
313         Put(*next);
314       }
315     }
316     if (next && *next == '(') { // NaN(...)
317       Put('(');
318       int depth{1};
319       while (true) {
320         next = io.NextInField(remaining, edit);
321         if (depth == 0) {
322           break;
323         } else if (!next) {
324           return {}; // error
325         } else if (*next == '(') {
326           ++depth;
327         } else if (*next == ')') {
328           --depth;
329         }
330         Put(*next);
331       }
332     }
333   } else if (first == radixPointChar || (first >= '0' && first <= '9') ||
334       (bzMode && (first == ' ' || first == '\t')) || first == 'E' ||
335       first == 'D' || first == 'Q') {
336     if (first == '0') {
337       next = io.NextInField(remaining, edit);
338       if (next && (*next == 'x' || *next == 'X')) { // 0X...
339         isHexadecimal = true;
340         next = io.NextInField(remaining, edit);
341       } else {
342         Put('0');
343       }
344     }
345     // input field is normalized to a fraction
346     if (!isHexadecimal) {
347       Put('.');
348     }
349     auto start{got};
350     for (; next; next = io.NextInField(remaining, edit)) {
351       char32_t ch{*next};
352       if (ch == ' ' || ch == '\t') {
353         if (isHexadecimal) {
354           return {}; // error
355         } else if (bzMode) {
356           ch = '0'; // BZ mode - treat blank as if it were zero
357         } else {
358           continue; // ignore blank in fixed field
359         }
360       }
361       if (ch == '0' && got == start && !radixPointOffset) {
362         // omit leading zeroes before the radix point
363       } else if (ch >= '0' && ch <= '9') {
364         Put(ch);
365       } else if (ch == radixPointChar && !radixPointOffset) {
366         // The radix point character is *not* copied to the buffer.
367         radixPointOffset = got - start; // # of digits before the radix point
368       } else if (isHexadecimal && ch >= 'A' && ch <= 'F') {
369         Put(ch);
370       } else if (isHexadecimal && ch >= 'a' && ch <= 'f') {
371         Put(ch - 'a' + 'A'); // normalize to capitals
372       } else {
373         break;
374       }
375     }
376     if (got == start) {
377       // Nothing but zeroes and maybe a radix point.  F'2018 requires
378       // at least one digit, but F'77 did not, and a bare "." shows up in
379       // the FCVS suite.
380       Put('0'); // emit at least one digit
381     }
382     // In list-directed input, a bad exponent is not consumed.
383     auto nextBeforeExponent{next};
384     auto startExponent{io.GetConnectionState().positionInRecord};
385     bool hasGoodExponent{false};
386     if (next) {
387       if (isHexadecimal) {
388         if (*next == 'p' || *next == 'P') {
389           next = io.NextInField(remaining, edit);
390         } else {
391           // The binary exponent is not optional in the standard.
392           return {}; // error
393         }
394       } else if (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' ||
395           *next == 'q' || *next == 'Q') {
396         // Optional exponent letter.  Blanks are allowed between the
397         // optional exponent letter and the exponent value.
398         io.SkipSpaces(remaining);
399         next = io.NextInField(remaining, edit);
400       }
401     }
402     if (next &&
403         (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9') ||
404             *next == ' ' || *next == '\t')) {
405       bool negExpo{*next == '-'};
406       if (negExpo || *next == '+') {
407         next = io.NextInField(remaining, edit);
408       }
409       for (; next; next = io.NextInField(remaining, edit)) {
410         if (*next >= '0' && *next <= '9') {
411           hasGoodExponent = true;
412           if (exponent < 10000) {
413             exponent = 10 * exponent + *next - '0';
414           }
415         } else if (*next == ' ' || *next == '\t') {
416           if (isHexadecimal) {
417             break;
418           } else if (bzMode) {
419             hasGoodExponent = true;
420             exponent = 10 * exponent;
421           }
422         } else {
423           break;
424         }
425       }
426       if (negExpo) {
427         exponent = -exponent;
428       }
429     }
430     if (!hasGoodExponent) {
431       if (isHexadecimal) {
432         return {}; // error
433       }
434       // There isn't a good exponent; do not consume it.
435       next = nextBeforeExponent;
436       io.HandleAbsolutePosition(startExponent);
437       // The default exponent is -kP, but the scale factor doesn't affect
438       // an explicit exponent.
439       exponent = -edit.modes.scale;
440     }
441     // Adjust exponent by number of digits before the radix point.
442     if (isHexadecimal) {
443       // Exponents for hexadecimal input are binary.
444       exponent += radixPointOffset.value_or(got - start) * 4;
445     } else if (radixPointOffset) {
446       exponent += *radixPointOffset;
447     } else {
448       // When no redix point (or comma) appears in the value, the 'd'
449       // part of the edit descriptor must be interpreted as the number of
450       // digits in the value to be interpreted as being to the *right* of
451       // the assumed radix point (13.7.2.3.2)
452       exponent += got - start - edit.digits.value_or(0);
453     }
454   }
455   // Consume the trailing ')' of a list-directed or NAMELIST complex
456   // input value.
457   if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
458     if (next && (*next == ' ' || *next == '\t')) {
459       io.SkipSpaces(remaining);
460       next = io.NextInField(remaining, edit);
461     }
462     if (!next) { // NextInField fails on separators like ')'
463       std::size_t byteCount{0};
464       next = io.GetCurrentChar(byteCount);
465       if (next && *next == ')') {
466         io.HandleRelativePosition(byteCount);
467       }
468     }
469   } else if (remaining) {
470     while (next && (*next == ' ' || *next == '\t')) {
471       next = io.NextInField(remaining, edit);
472     }
473     if (next) {
474       return {}; // error: unused nonblank character in fixed-width field
475     }
476   }
477   return {got, exponent, isHexadecimal};
478 }
479 
480 static void RaiseFPExceptions(decimal::ConversionResultFlags flags) {
481 #undef RAISE
482 #ifdef feraisexcept // a macro in some environments; omit std::
483 #define RAISE feraiseexcept
484 #else
485 #define RAISE std::feraiseexcept
486 #endif
487   if (flags & decimal::ConversionResultFlags::Overflow) {
488     RAISE(FE_OVERFLOW);
489   }
490   if (flags & decimal::ConversionResultFlags::Underflow) {
491     RAISE(FE_UNDERFLOW);
492   }
493   if (flags & decimal::ConversionResultFlags::Inexact) {
494     RAISE(FE_INEXACT);
495   }
496   if (flags & decimal::ConversionResultFlags::Invalid) {
497     RAISE(FE_INVALID);
498   }
499 #undef RAISE
500 }
501 
502 // If no special modes are in effect and the form of the input value
503 // that's present in the input stream is acceptable to the decimal->binary
504 // converter without modification, this fast path for real input
505 // saves time by avoiding memory copies and reformatting of the exponent.
506 template <int PRECISION>
507 static bool TryFastPathRealDecimalInput(
508     IoStatementState &io, const DataEdit &edit, void *n) {
509   if (edit.modes.editingFlags & (blankZero | decimalComma)) {
510     return false;
511   }
512   if (edit.modes.scale != 0) {
513     return false;
514   }
515   const ConnectionState &connection{io.GetConnectionState()};
516   if (connection.internalIoCharKind > 1) {
517     return false; // reading non-default character
518   }
519   const char *str{nullptr};
520   std::size_t got{io.GetNextInputBytes(str)};
521   if (got == 0 || str == nullptr || !connection.recordLength.has_value()) {
522     return false; // could not access reliably-terminated input stream
523   }
524   const char *p{str};
525   std::int64_t maxConsume{
526       std::min<std::int64_t>(got, edit.width.value_or(got))};
527   const char *limit{str + maxConsume};
528   decimal::ConversionToBinaryResult<PRECISION> converted{
529       decimal::ConvertToBinary<PRECISION>(p, edit.modes.round, limit)};
530   if (converted.flags & (decimal::Invalid | decimal::Overflow)) {
531     return false;
532   }
533   if (edit.digits.value_or(0) != 0) {
534     // Edit descriptor is Fw.d (or other) with d != 0, which
535     // implies scaling
536     const char *q{str};
537     for (; q < limit; ++q) {
538       if (*q == '.' || *q == 'n' || *q == 'N') {
539         break;
540       }
541     }
542     if (q == limit) {
543       // No explicit decimal point, and not NaN/Inf.
544       return false;
545     }
546   }
547   if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
548     // Need to consume a trailing ')', possibly with leading spaces
549     for (; p < limit && (*p == ' ' || *p == '\t'); ++p) {
550     }
551     if (p < limit && *p == ')') {
552       ++p;
553     } else {
554       return false;
555     }
556   } else if (edit.IsListDirected()) {
557     if (p < limit && !IsCharValueSeparator(edit, *p)) {
558       return false;
559     }
560   } else {
561     for (; p < limit && (*p == ' ' || *p == '\t'); ++p) {
562     }
563     if (edit.width && p < str + *edit.width) {
564       return false; // unconverted characters remain in fixed width field
565     }
566   }
567   // Success on the fast path!
568   *reinterpret_cast<decimal::BinaryFloatingPointNumber<PRECISION> *>(n) =
569       converted.binary;
570   io.HandleRelativePosition(p - str);
571   // Set FP exception flags
572   if (converted.flags != decimal::ConversionResultFlags::Exact) {
573     RaiseFPExceptions(converted.flags);
574   }
575   return true;
576 }
577 
578 template <int binaryPrecision>
579 decimal::ConversionToBinaryResult<binaryPrecision> ConvertHexadecimal(
580     const char *&p, enum decimal::FortranRounding rounding, int expo) {
581   using RealType = decimal::BinaryFloatingPointNumber<binaryPrecision>;
582   using RawType = typename RealType::RawType;
583   bool isNegative{*p == '-'};
584   constexpr RawType one{1};
585   RawType signBit{0};
586   if (isNegative) {
587     ++p;
588     signBit = one << (RealType::bits - 1);
589   }
590   RawType fraction{0};
591   // Adjust the incoming binary P+/- exponent to shift the radix point
592   // to below the LSB and add in the bias.
593   expo += binaryPrecision - 1 + RealType::exponentBias;
594   // Input the fraction.
595   int roundingBit{0};
596   int guardBit{0};
597   for (; *p; ++p) {
598     fraction <<= 4;
599     expo -= 4;
600     if (*p >= '0' && *p <= '9') {
601       fraction |= *p - '0';
602     } else if (*p >= 'A' && *p <= 'F') {
603       fraction |= *p - 'A' + 10; // data were normalized to capitals
604     } else {
605       break;
606     }
607     while (fraction >> binaryPrecision) {
608       guardBit |= roundingBit;
609       roundingBit = (int)fraction & 1;
610       fraction >>= 1;
611       ++expo;
612     }
613   }
614   if (fraction) {
615     // Boost biased expo if too small
616     while (expo < 1) {
617       guardBit |= roundingBit;
618       roundingBit = (int)fraction & 1;
619       fraction >>= 1;
620       ++expo;
621     }
622     // Normalize
623     while (expo > 1 && !(fraction >> (binaryPrecision - 1))) {
624       fraction <<= 1;
625       --expo;
626     }
627     // Rounding
628     bool increase{false};
629     switch (rounding) {
630     case decimal::RoundNearest: // RN & RP
631       increase = roundingBit && (guardBit | ((int)fraction & 1));
632       break;
633     case decimal::RoundUp: // RU
634       increase = !isNegative && (roundingBit | guardBit);
635       break;
636     case decimal::RoundDown: // RD
637       increase = isNegative && (roundingBit | guardBit);
638       break;
639     case decimal::RoundToZero: // RZ
640       break;
641     case decimal::RoundCompatible: // RC
642       increase = roundingBit != 0;
643       break;
644     }
645     if (increase) {
646       ++fraction;
647       if (fraction >> binaryPrecision) {
648         fraction >>= 1;
649         ++expo;
650       }
651     }
652   }
653   // Package & return result
654   constexpr RawType significandMask{(one << RealType::significandBits) - 1};
655   int flags{(roundingBit | guardBit) ? decimal::Inexact : decimal::Exact};
656   if (!fraction) {
657     expo = 0;
658   } else if (expo == 1 && !(fraction >> (binaryPrecision - 1))) {
659     expo = 0; // subnormal
660     flags |= decimal::Underflow;
661   } else if (expo >= RealType::maxExponent) {
662     if (rounding == decimal::RoundToZero ||
663         (rounding == decimal::RoundDown && !isNegative) ||
664         (rounding == decimal::RoundUp && isNegative)) {
665       expo = RealType::maxExponent - 1; // +/-HUGE()
666       fraction = significandMask;
667     } else {
668       expo = RealType::maxExponent; // +/-Inf
669       fraction = 0;
670       flags |= decimal::Overflow;
671     }
672   } else {
673     fraction &= significandMask; // remove explicit normalization unless x87
674   }
675   return decimal::ConversionToBinaryResult<binaryPrecision>{
676       RealType{static_cast<RawType>(signBit |
677           static_cast<RawType>(expo) << RealType::significandBits | fraction)},
678       static_cast<decimal::ConversionResultFlags>(flags)};
679 }
680 
681 template <int KIND>
682 bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
683   constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
684   if (TryFastPathRealDecimalInput<binaryPrecision>(io, edit, n)) {
685     return CheckCompleteListDirectedField(io, edit);
686   }
687   // Fast path wasn't available or didn't work; go the more general route
688   static constexpr int maxDigits{
689       common::MaxDecimalConversionDigits(binaryPrecision)};
690   static constexpr int bufferSize{maxDigits + 18};
691   char buffer[bufferSize];
692   auto scanned{ScanRealInput(buffer, maxDigits + 2, io, edit)};
693   int got{scanned.got};
694   if (got >= maxDigits + 2) {
695     io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small");
696     return false;
697   }
698   if (got == 0) {
699     const auto &connection{io.GetConnectionState()};
700     io.GetIoErrorHandler().SignalError(IostatBadRealInput,
701         "Bad real input data at column %d of record %d",
702         static_cast<int>(connection.positionInRecord + 1),
703         static_cast<int>(connection.currentRecordNumber));
704     return false;
705   }
706   decimal::ConversionToBinaryResult<binaryPrecision> converted;
707   const char *p{buffer};
708   if (scanned.isHexadecimal) {
709     buffer[got] = '\0';
710     converted = ConvertHexadecimal<binaryPrecision>(
711         p, edit.modes.round, scanned.exponent);
712   } else {
713     bool hadExtra{got > maxDigits};
714     int exponent{scanned.exponent};
715     if (exponent != 0) {
716       buffer[got++] = 'e';
717       if (exponent < 0) {
718         buffer[got++] = '-';
719         exponent = -exponent;
720       }
721       if (exponent > 9999) {
722         exponent = 9999; // will convert to +/-Inf
723       }
724       if (exponent > 999) {
725         int dig{exponent / 1000};
726         buffer[got++] = '0' + dig;
727         int rest{exponent - 1000 * dig};
728         dig = rest / 100;
729         buffer[got++] = '0' + dig;
730         rest -= 100 * dig;
731         dig = rest / 10;
732         buffer[got++] = '0' + dig;
733         buffer[got++] = '0' + (rest - 10 * dig);
734       } else if (exponent > 99) {
735         int dig{exponent / 100};
736         buffer[got++] = '0' + dig;
737         int rest{exponent - 100 * dig};
738         dig = rest / 10;
739         buffer[got++] = '0' + dig;
740         buffer[got++] = '0' + (rest - 10 * dig);
741       } else if (exponent > 9) {
742         int dig{exponent / 10};
743         buffer[got++] = '0' + dig;
744         buffer[got++] = '0' + (exponent - 10 * dig);
745       } else {
746         buffer[got++] = '0' + exponent;
747       }
748     }
749     buffer[got] = '\0';
750     converted = decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round);
751     if (hadExtra) {
752       converted.flags = static_cast<enum decimal::ConversionResultFlags>(
753           converted.flags | decimal::Inexact);
754     }
755   }
756   if (*p) { // unprocessed junk after value
757     const auto &connection{io.GetConnectionState()};
758     io.GetIoErrorHandler().SignalError(IostatBadRealInput,
759         "Trailing characters after real input data at column %d of record %d",
760         static_cast<int>(connection.positionInRecord + 1),
761         static_cast<int>(connection.currentRecordNumber));
762     return false;
763   }
764   *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) =
765       converted.binary;
766   // Set FP exception flags
767   if (converted.flags != decimal::ConversionResultFlags::Exact) {
768     if (converted.flags & decimal::ConversionResultFlags::Overflow) {
769       io.GetIoErrorHandler().SignalError(IostatRealInputOverflow);
770       return false;
771     }
772     RaiseFPExceptions(converted.flags);
773   }
774   return CheckCompleteListDirectedField(io, edit);
775 }
776 
777 template <int KIND>
778 bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
779   switch (edit.descriptor) {
780   case DataEdit::ListDirected:
781     if (IsNamelistNameOrSlash(io)) {
782       return false;
783     }
784     return EditCommonRealInput<KIND>(io, edit, n);
785   case DataEdit::ListDirectedRealPart:
786   case DataEdit::ListDirectedImaginaryPart:
787   case 'F':
788   case 'E': // incl. EN, ES, & EX
789   case 'D':
790   case 'G':
791     return EditCommonRealInput<KIND>(io, edit, n);
792   case 'B':
793     return EditBOZInput<1>(io, edit, n,
794         common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
795   case 'O':
796     return EditBOZInput<3>(io, edit, n,
797         common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
798   case 'Z':
799     return EditBOZInput<4>(io, edit, n,
800         common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
801   case 'A': // legacy extension
802     return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), KIND);
803   default:
804     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
805         "Data edit descriptor '%c' may not be used for REAL input",
806         edit.descriptor);
807     return false;
808   }
809 }
810 
811 // 13.7.3 in Fortran 2018
812 bool EditLogicalInput(IoStatementState &io, const DataEdit &edit, bool &x) {
813   switch (edit.descriptor) {
814   case DataEdit::ListDirected:
815     if (IsNamelistNameOrSlash(io)) {
816       return false;
817     }
818     break;
819   case 'L':
820   case 'G':
821     break;
822   default:
823     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
824         "Data edit descriptor '%c' may not be used for LOGICAL input",
825         edit.descriptor);
826     return false;
827   }
828   std::optional<int> remaining{io.CueUpInput(edit)};
829   std::optional<char32_t> next{io.NextInField(remaining, edit)};
830   if (next && *next == '.') { // skip optional period
831     next = io.NextInField(remaining, edit);
832   }
833   if (!next) {
834     io.GetIoErrorHandler().SignalError("Empty LOGICAL input field");
835     return false;
836   }
837   switch (*next) {
838   case 'T':
839   case 't':
840     x = true;
841     break;
842   case 'F':
843   case 'f':
844     x = false;
845     break;
846   default:
847     io.GetIoErrorHandler().SignalError(
848         "Bad character '%lc' in LOGICAL input field", *next);
849     return false;
850   }
851   if (remaining) { // ignore the rest of a fixed-width field
852     io.HandleRelativePosition(*remaining);
853   } else if (edit.descriptor == DataEdit::ListDirected) {
854     while (io.NextInField(remaining, edit)) { // discard rest of field
855     }
856   }
857   return CheckCompleteListDirectedField(io, edit);
858 }
859 
860 // See 13.10.3.1 paragraphs 7-9 in Fortran 2018
861 template <typename CHAR>
862 static bool EditDelimitedCharacterInput(
863     IoStatementState &io, CHAR *x, std::size_t length, char32_t delimiter) {
864   bool result{true};
865   while (true) {
866     std::size_t byteCount{0};
867     auto ch{io.GetCurrentChar(byteCount)};
868     if (!ch) {
869       if (io.AdvanceRecord()) {
870         continue;
871       } else {
872         result = false; // EOF in character value
873         break;
874       }
875     }
876     io.HandleRelativePosition(byteCount);
877     if (*ch == delimiter) {
878       auto next{io.GetCurrentChar(byteCount)};
879       if (next && *next == delimiter) {
880         // Repeated delimiter: use as character value
881         io.HandleRelativePosition(byteCount);
882       } else {
883         break; // closing delimiter
884       }
885     }
886     if (length > 0) {
887       *x++ = *ch;
888       --length;
889     }
890   }
891   std::fill_n(x, length, ' ');
892   return result;
893 }
894 
895 template <typename CHAR>
896 static bool EditListDirectedCharacterInput(
897     IoStatementState &io, CHAR *x, std::size_t length, const DataEdit &edit) {
898   std::size_t byteCount{0};
899   auto ch{io.GetCurrentChar(byteCount)};
900   if (ch && (*ch == '\'' || *ch == '"')) {
901     io.HandleRelativePosition(byteCount);
902     return EditDelimitedCharacterInput(io, x, length, *ch);
903   }
904   if (IsNamelistNameOrSlash(io) || io.GetConnectionState().IsAtEOF()) {
905     return false;
906   }
907   // Undelimited list-directed character input: stop at a value separator
908   // or the end of the current record.  Subtlety: the "remaining" count
909   // here is a dummy that's used to avoid the interpretation of separators
910   // in NextInField.
911   std::optional<int> remaining{length > 0 ? maxUTF8Bytes : 0};
912   while (std::optional<char32_t> next{io.NextInField(remaining, edit)}) {
913     bool isSep{false};
914     switch (*next) {
915     case ' ':
916     case '\t':
917     case '/':
918       isSep = true;
919       break;
920     case ',':
921       isSep = !(edit.modes.editingFlags & decimalComma);
922       break;
923     case ';':
924       isSep = !!(edit.modes.editingFlags & decimalComma);
925       break;
926     default:
927       break;
928     }
929     if (isSep) {
930       remaining = 0;
931     } else {
932       *x++ = *next;
933       remaining = --length > 0 ? maxUTF8Bytes : 0;
934     }
935   }
936   std::fill_n(x, length, ' ');
937   return true;
938 }
939 
940 template <typename CHAR>
941 bool EditCharacterInput(IoStatementState &io, const DataEdit &edit, CHAR *x,
942     std::size_t lengthChars) {
943   switch (edit.descriptor) {
944   case DataEdit::ListDirected:
945     return EditListDirectedCharacterInput(io, x, lengthChars, edit);
946   case 'A':
947   case 'G':
948     break;
949   case 'B':
950     return EditBOZInput<1>(io, edit, x, lengthChars * sizeof *x);
951   case 'O':
952     return EditBOZInput<3>(io, edit, x, lengthChars * sizeof *x);
953   case 'Z':
954     return EditBOZInput<4>(io, edit, x, lengthChars * sizeof *x);
955   default:
956     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
957         "Data edit descriptor '%c' may not be used with a CHARACTER data item",
958         edit.descriptor);
959     return false;
960   }
961   const ConnectionState &connection{io.GetConnectionState()};
962   std::size_t remainingChars{lengthChars};
963   // Skip leading characters.
964   // Their bytes don't count towards INQUIRE(IOLENGTH=).
965   std::size_t skipChars{0};
966   if (edit.width && *edit.width > 0) {
967     remainingChars = *edit.width;
968     if (remainingChars > lengthChars) {
969       skipChars = remainingChars - lengthChars;
970     }
971   }
972   // When the field is wider than the variable, we drop the leading
973   // characters.  When the variable is wider than the field, there can be
974   // trailing padding or an EOR condition.
975   const char *input{nullptr};
976   std::size_t readyBytes{0};
977   // Transfer payload bytes; these do count.
978   while (remainingChars > 0) {
979     if (readyBytes == 0) {
980       readyBytes = io.GetNextInputBytes(input);
981       if (readyBytes == 0 ||
982           (readyBytes < remainingChars && edit.modes.nonAdvancing)) {
983         if (io.CheckForEndOfRecord(readyBytes)) {
984           if (readyBytes == 0) {
985             // PAD='YES' and no more data
986             std::fill_n(x, lengthChars, ' ');
987             return !io.GetIoErrorHandler().InError();
988           } else {
989             // Do partial read(s) then pad on last iteration
990           }
991         } else {
992           return !io.GetIoErrorHandler().InError();
993         }
994       }
995     }
996     std::size_t chunkBytes;
997     std::size_t chunkChars{1};
998     bool skipping{skipChars > 0};
999     if (connection.isUTF8) {
1000       chunkBytes = MeasureUTF8Bytes(*input);
1001       if (skipping) {
1002         --skipChars;
1003       } else if (auto ucs{DecodeUTF8(input)}) {
1004         *x++ = *ucs;
1005         --lengthChars;
1006       } else if (chunkBytes == 0) {
1007         // error recovery: skip bad encoding
1008         chunkBytes = 1;
1009       }
1010     } else if (connection.internalIoCharKind > 1) {
1011       // Reading from non-default character internal unit
1012       chunkBytes = connection.internalIoCharKind;
1013       if (skipping) {
1014         --skipChars;
1015       } else {
1016         char32_t buffer{0};
1017         std::memcpy(&buffer, input, chunkBytes);
1018         *x++ = buffer;
1019         --lengthChars;
1020       }
1021     } else if constexpr (sizeof *x > 1) {
1022       // Read single byte with expansion into multi-byte CHARACTER
1023       chunkBytes = 1;
1024       if (skipping) {
1025         --skipChars;
1026       } else {
1027         *x++ = static_cast<unsigned char>(*input);
1028         --lengthChars;
1029       }
1030     } else { // single bytes -> default CHARACTER
1031       if (skipping) {
1032         chunkBytes = std::min<std::size_t>(skipChars, readyBytes);
1033         chunkChars = chunkBytes;
1034         skipChars -= chunkChars;
1035       } else {
1036         chunkBytes = std::min<std::size_t>(remainingChars, readyBytes);
1037         chunkBytes = std::min<std::size_t>(lengthChars, chunkBytes);
1038         chunkChars = chunkBytes;
1039         std::memcpy(x, input, chunkBytes);
1040         x += chunkBytes;
1041         lengthChars -= chunkChars;
1042       }
1043     }
1044     input += chunkBytes;
1045     remainingChars -= chunkChars;
1046     if (!skipping) {
1047       io.GotChar(chunkBytes);
1048     }
1049     io.HandleRelativePosition(chunkBytes);
1050     readyBytes -= chunkBytes;
1051   }
1052   // Pad the remainder of the input variable, if any.
1053   std::fill_n(x, lengthChars, ' ');
1054   return CheckCompleteListDirectedField(io, edit);
1055 }
1056 
1057 template bool EditRealInput<2>(IoStatementState &, const DataEdit &, void *);
1058 template bool EditRealInput<3>(IoStatementState &, const DataEdit &, void *);
1059 template bool EditRealInput<4>(IoStatementState &, const DataEdit &, void *);
1060 template bool EditRealInput<8>(IoStatementState &, const DataEdit &, void *);
1061 template bool EditRealInput<10>(IoStatementState &, const DataEdit &, void *);
1062 // TODO: double/double
1063 template bool EditRealInput<16>(IoStatementState &, const DataEdit &, void *);
1064 
1065 template bool EditCharacterInput(
1066     IoStatementState &, const DataEdit &, char *, std::size_t);
1067 template bool EditCharacterInput(
1068     IoStatementState &, const DataEdit &, char16_t *, std::size_t);
1069 template bool EditCharacterInput(
1070     IoStatementState &, const DataEdit &, char32_t *, std::size_t);
1071 
1072 } // namespace Fortran::runtime::io
1073