xref: /llvm-project/flang/runtime/edit-input.cpp (revision ea7e50cdf2f531d323a564590a22c7bb6e11aa3a)
1 //===-- runtime/edit-input.cpp --------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "edit-input.h"
10 #include "namelist.h"
11 #include "utf.h"
12 #include "flang/Common/real.h"
13 #include "flang/Common/uint128.h"
14 #include <algorithm>
15 #include <cfenv>
16 
17 namespace Fortran::runtime::io {
18 
19 // Checks that a list-directed input value has been entirely consumed and
20 // doesn't contain unparsed characters before the next value separator.
21 static inline bool IsCharValueSeparator(const DataEdit &edit, char32_t ch) {
22   char32_t comma{
23       edit.modes.editingFlags & decimalComma ? char32_t{';'} : char32_t{','}};
24   return ch == ' ' || ch == '\t' || ch == '/' || ch == comma;
25 }
26 
27 static bool CheckCompleteListDirectedField(
28     IoStatementState &io, const DataEdit &edit) {
29   if (edit.IsListDirected()) {
30     std::size_t byteCount;
31     if (auto ch{io.GetCurrentChar(byteCount)}) {
32       if (IsCharValueSeparator(edit, *ch)) {
33         return true;
34       } else {
35         const auto &connection{io.GetConnectionState()};
36         io.GetIoErrorHandler().SignalError(IostatBadListDirectedInputSeparator,
37             "invalid character (0x%x) after list-directed input value, "
38             "at column %d in record %d",
39             static_cast<unsigned>(*ch),
40             static_cast<int>(connection.positionInRecord + 1),
41             static_cast<int>(connection.currentRecordNumber));
42         return false;
43       }
44     } else {
45       return true; // end of record: ok
46     }
47   } else {
48     return true;
49   }
50 }
51 
52 template <int LOG2_BASE>
53 static bool EditBOZInput(
54     IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) {
55   // Skip leading white space & zeroes
56   std::optional<int> remaining{io.CueUpInput(edit)};
57   auto start{io.GetConnectionState().positionInRecord};
58   std::optional<char32_t> next{io.NextInField(remaining, edit)};
59   if (next.value_or('?') == '0') {
60     do {
61       start = io.GetConnectionState().positionInRecord;
62       next = io.NextInField(remaining, edit);
63     } while (next && *next == '0');
64   }
65   // Count significant digits after any leading white space & zeroes
66   int digits{0};
67   for (; next; next = io.NextInField(remaining, edit)) {
68     char32_t ch{*next};
69     if (ch == ' ' || ch == '\t') {
70       continue;
71     }
72     if (ch >= '0' && ch <= '1') {
73     } else if (LOG2_BASE >= 3 && ch >= '2' && ch <= '7') {
74     } else if (LOG2_BASE >= 4 && ch >= '8' && ch <= '9') {
75     } else if (LOG2_BASE >= 4 && ch >= 'A' && ch <= 'F') {
76     } else if (LOG2_BASE >= 4 && ch >= 'a' && ch <= 'f') {
77     } else {
78       io.GetIoErrorHandler().SignalError(
79           "Bad character '%lc' in B/O/Z input field", ch);
80       return false;
81     }
82     ++digits;
83   }
84   auto significantBytes{static_cast<std::size_t>(digits * LOG2_BASE + 7) / 8};
85   if (significantBytes > bytes) {
86     io.GetIoErrorHandler().SignalError(IostatBOZInputOverflow,
87         "B/O/Z input of %d digits overflows %zd-byte variable", digits, bytes);
88     return false;
89   }
90   // Reset to start of significant digits
91   io.HandleAbsolutePosition(start);
92   remaining.reset();
93   // Make a second pass now that the digit count is known
94   std::memset(n, 0, bytes);
95   int increment{isHostLittleEndian ? -1 : 1};
96   auto *data{reinterpret_cast<unsigned char *>(n) +
97       (isHostLittleEndian ? significantBytes - 1 : 0)};
98   int shift{((digits - 1) * LOG2_BASE) & 7};
99   if (shift + LOG2_BASE > 8) {
100     shift -= 8; // misaligned octal
101   }
102   while (digits > 0) {
103     char32_t ch{*io.NextInField(remaining, edit)};
104     int digit{0};
105     if (ch >= '0' && ch <= '9') {
106       digit = ch - '0';
107     } else if (ch >= 'A' && ch <= 'F') {
108       digit = ch + 10 - 'A';
109     } else if (ch >= 'a' && ch <= 'f') {
110       digit = ch + 10 - 'a';
111     } else {
112       continue;
113     }
114     --digits;
115     if (shift < 0) {
116       shift += 8;
117       if (shift + LOG2_BASE > 8) { // misaligned octal
118         *data |= digit >> (8 - shift);
119       }
120       data += increment;
121     }
122     *data |= digit << shift;
123     shift -= LOG2_BASE;
124   }
125   return CheckCompleteListDirectedField(io, edit);
126 }
127 
128 static inline char32_t GetRadixPointChar(const DataEdit &edit) {
129   return edit.modes.editingFlags & decimalComma ? char32_t{','} : char32_t{'.'};
130 }
131 
132 // Prepares input from a field, and returns the sign, if any, else '\0'.
133 static char ScanNumericPrefix(IoStatementState &io, const DataEdit &edit,
134     std::optional<char32_t> &next, std::optional<int> &remaining) {
135   remaining = io.CueUpInput(edit);
136   next = io.NextInField(remaining, edit);
137   char sign{'\0'};
138   if (next) {
139     if (*next == '-' || *next == '+') {
140       sign = *next;
141       if (!edit.IsListDirected()) {
142         io.SkipSpaces(remaining);
143       }
144       next = io.NextInField(remaining, edit);
145     }
146   }
147   return sign;
148 }
149 
150 bool EditIntegerInput(
151     IoStatementState &io, const DataEdit &edit, void *n, int kind) {
152   RUNTIME_CHECK(io.GetIoErrorHandler(), kind >= 1 && !(kind & (kind - 1)));
153   switch (edit.descriptor) {
154   case DataEdit::ListDirected:
155     if (IsNamelistNameOrSlash(io)) {
156       return false;
157     }
158     break;
159   case 'G':
160   case 'I':
161     break;
162   case 'B':
163     return EditBOZInput<1>(io, edit, n, kind);
164   case 'O':
165     return EditBOZInput<3>(io, edit, n, kind);
166   case 'Z':
167     return EditBOZInput<4>(io, edit, n, kind);
168   case 'A': // legacy extension
169     return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), kind);
170   default:
171     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
172         "Data edit descriptor '%c' may not be used with an INTEGER data item",
173         edit.descriptor);
174     return false;
175   }
176   std::optional<int> remaining;
177   std::optional<char32_t> next;
178   char sign{ScanNumericPrefix(io, edit, next, remaining)};
179   common::UnsignedInt128 value{0};
180   bool any{!!sign};
181   bool overflow{false};
182   for (; next; next = io.NextInField(remaining, edit)) {
183     char32_t ch{*next};
184     if (ch == ' ' || ch == '\t') {
185       if (edit.modes.editingFlags & blankZero) {
186         ch = '0'; // BZ mode - treat blank as if it were zero
187       } else {
188         continue;
189       }
190     }
191     int digit{0};
192     if (ch >= '0' && ch <= '9') {
193       digit = ch - '0';
194     } else {
195       io.GetIoErrorHandler().SignalError(
196           "Bad character '%lc' in INTEGER input field", ch);
197       return false;
198     }
199     static constexpr auto maxu128{~common::UnsignedInt128{0}};
200     static constexpr auto maxu128OverTen{maxu128 / 10};
201     static constexpr int maxLastDigit{
202         static_cast<int>(maxu128 - (maxu128OverTen * 10))};
203     overflow |= value >= maxu128OverTen &&
204         (value > maxu128OverTen || digit > maxLastDigit);
205     value *= 10;
206     value += digit;
207     any = true;
208   }
209   if (!any && !remaining) {
210     io.GetIoErrorHandler().SignalError(
211         "Integer value absent from NAMELIST or list-directed input");
212     return false;
213   }
214   auto maxForKind{common::UnsignedInt128{1} << ((8 * kind) - 1)};
215   overflow |= value >= maxForKind && (value > maxForKind || sign != '-');
216   if (overflow) {
217     io.GetIoErrorHandler().SignalError(IostatIntegerInputOverflow,
218         "Decimal input overflows INTEGER(%d) variable", kind);
219     return false;
220   }
221   if (sign == '-') {
222     value = -value;
223   }
224   if (any || !io.GetConnectionState().IsAtEOF()) {
225     std::memcpy(n, &value, kind); // a blank field means zero
226   }
227   return any;
228 }
229 
230 // Parses a REAL input number from the input source as a normalized
231 // fraction into a supplied buffer -- there's an optional '-', a
232 // decimal point when the input is not hexadecimal, and at least one
233 // digit.  Replaces blanks with zeroes where appropriate.
234 struct ScannedRealInput {
235   // Number of characters that (should) have been written to the
236   // buffer -- this can be larger than the buffer size, which
237   // indicates buffer overflow.  Zero indicates an error.
238   int got{0};
239   int exponent{0}; // adjusted as necessary; binary if isHexadecimal
240   bool isHexadecimal{false}; // 0X...
241 };
242 static ScannedRealInput ScanRealInput(
243     char *buffer, int bufferSize, IoStatementState &io, const DataEdit &edit) {
244   std::optional<int> remaining;
245   std::optional<char32_t> next;
246   int got{0};
247   std::optional<int> radixPointOffset;
248   auto Put{[&](char ch) -> void {
249     if (got < bufferSize) {
250       buffer[got] = ch;
251     }
252     ++got;
253   }};
254   char sign{ScanNumericPrefix(io, edit, next, remaining)};
255   if (sign == '-') {
256     Put('-');
257   }
258   bool bzMode{(edit.modes.editingFlags & blankZero) != 0};
259   int exponent{0};
260   if (!next || (!bzMode && *next == ' ')) {
261     if (!edit.IsListDirected() && !io.GetConnectionState().IsAtEOF()) {
262       // An empty/blank field means zero when not list-directed.
263       // A fixed-width field containing only a sign is also zero;
264       // this behavior isn't standard-conforming in F'2023 but it is
265       // required to pass FCVS.
266       Put('0');
267     }
268     return {got, exponent, false};
269   }
270   char32_t radixPointChar{GetRadixPointChar(edit)};
271   char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next};
272   bool isHexadecimal{false};
273   if (first == 'N' || first == 'I') {
274     // NaN or infinity - convert to upper case
275     // Subtle: a blank field of digits could be followed by 'E' or 'D',
276     for (; next &&
277          ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z'));
278          next = io.NextInField(remaining, edit)) {
279       if (*next >= 'a' && *next <= 'z') {
280         Put(*next - 'a' + 'A');
281       } else {
282         Put(*next);
283       }
284     }
285     if (next && *next == '(') { // NaN(...)
286       Put('(');
287       int depth{1};
288       while (true) {
289         next = io.NextInField(remaining, edit);
290         if (depth == 0) {
291           break;
292         } else if (!next) {
293           return {}; // error
294         } else if (*next == '(') {
295           ++depth;
296         } else if (*next == ')') {
297           --depth;
298         }
299         Put(*next);
300       }
301     }
302   } else if (first == radixPointChar || (first >= '0' && first <= '9') ||
303       (bzMode && (first == ' ' || first == '\t')) || first == 'E' ||
304       first == 'D' || first == 'Q') {
305     if (first == '0') {
306       next = io.NextInField(remaining, edit);
307       if (next && (*next == 'x' || *next == 'X')) { // 0X...
308         isHexadecimal = true;
309         next = io.NextInField(remaining, edit);
310       } else {
311         Put('0');
312       }
313     }
314     // input field is normalized to a fraction
315     if (!isHexadecimal) {
316       Put('.');
317     }
318     auto start{got};
319     for (; next; next = io.NextInField(remaining, edit)) {
320       char32_t ch{*next};
321       if (ch == ' ' || ch == '\t') {
322         if (isHexadecimal) {
323           return {}; // error
324         } else if (bzMode) {
325           ch = '0'; // BZ mode - treat blank as if it were zero
326         } else {
327           continue; // ignore blank in fixed field
328         }
329       }
330       if (ch == '0' && got == start && !radixPointOffset) {
331         // omit leading zeroes before the radix point
332       } else if (ch >= '0' && ch <= '9') {
333         Put(ch);
334       } else if (ch == radixPointChar && !radixPointOffset) {
335         // The radix point character is *not* copied to the buffer.
336         radixPointOffset = got - start; // # of digits before the radix point
337       } else if (isHexadecimal && ch >= 'A' && ch <= 'F') {
338         Put(ch);
339       } else if (isHexadecimal && ch >= 'a' && ch <= 'f') {
340         Put(ch - 'a' + 'A'); // normalize to capitals
341       } else {
342         break;
343       }
344     }
345     if (got == start) {
346       // Nothing but zeroes and maybe a radix point.  F'2018 requires
347       // at least one digit, but F'77 did not, and a bare "." shows up in
348       // the FCVS suite.
349       Put('0'); // emit at least one digit
350     }
351     // In list-directed input, a bad exponent is not consumed.
352     auto nextBeforeExponent{next};
353     auto startExponent{io.GetConnectionState().positionInRecord};
354     bool hasGoodExponent{false};
355     if (next) {
356       if (isHexadecimal) {
357         if (*next == 'p' || *next == 'P') {
358           next = io.NextInField(remaining, edit);
359         } else {
360           // The binary exponent is not optional in the standard.
361           return {}; // error
362         }
363       } else if (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' ||
364           *next == 'q' || *next == 'Q') {
365         // Optional exponent letter.  Blanks are allowed between the
366         // optional exponent letter and the exponent value.
367         io.SkipSpaces(remaining);
368         next = io.NextInField(remaining, edit);
369       }
370     }
371     if (next &&
372         (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9') ||
373             *next == ' ' || *next == '\t')) {
374       bool negExpo{*next == '-'};
375       if (negExpo || *next == '+') {
376         next = io.NextInField(remaining, edit);
377       }
378       for (; next; next = io.NextInField(remaining, edit)) {
379         if (*next >= '0' && *next <= '9') {
380           hasGoodExponent = true;
381           if (exponent < 10000) {
382             exponent = 10 * exponent + *next - '0';
383           }
384         } else if (*next == ' ' || *next == '\t') {
385           if (isHexadecimal) {
386             break;
387           } else if (bzMode) {
388             hasGoodExponent = true;
389             exponent = 10 * exponent;
390           }
391         } else {
392           break;
393         }
394       }
395       if (negExpo) {
396         exponent = -exponent;
397       }
398     }
399     if (!hasGoodExponent) {
400       if (isHexadecimal) {
401         return {}; // error
402       }
403       // There isn't a good exponent; do not consume it.
404       next = nextBeforeExponent;
405       io.HandleAbsolutePosition(startExponent);
406       // The default exponent is -kP, but the scale factor doesn't affect
407       // an explicit exponent.
408       exponent = -edit.modes.scale;
409     }
410     // Adjust exponent by number of digits before the radix point.
411     if (isHexadecimal) {
412       // Exponents for hexadecimal input are binary.
413       exponent += radixPointOffset.value_or(got - start) * 4;
414     } else if (radixPointOffset) {
415       exponent += *radixPointOffset;
416     } else {
417       // When no redix point (or comma) appears in the value, the 'd'
418       // part of the edit descriptor must be interpreted as the number of
419       // digits in the value to be interpreted as being to the *right* of
420       // the assumed radix point (13.7.2.3.2)
421       exponent += got - start - edit.digits.value_or(0);
422     }
423   }
424   // Consume the trailing ')' of a list-directed or NAMELIST complex
425   // input value.
426   if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
427     if (next && (*next == ' ' || *next == '\t')) {
428       io.SkipSpaces(remaining);
429       next = io.NextInField(remaining, edit);
430     }
431     if (!next) { // NextInField fails on separators like ')'
432       std::size_t byteCount{0};
433       next = io.GetCurrentChar(byteCount);
434       if (next && *next == ')') {
435         io.HandleRelativePosition(byteCount);
436       }
437     }
438   } else if (remaining) {
439     while (next && (*next == ' ' || *next == '\t')) {
440       next = io.NextInField(remaining, edit);
441     }
442     if (next) {
443       return {}; // error: unused nonblank character in fixed-width field
444     }
445   }
446   return {got, exponent, isHexadecimal};
447 }
448 
449 static void RaiseFPExceptions(decimal::ConversionResultFlags flags) {
450 #undef RAISE
451 #ifdef feraisexcept // a macro in some environments; omit std::
452 #define RAISE feraiseexcept
453 #else
454 #define RAISE std::feraiseexcept
455 #endif
456   if (flags & decimal::ConversionResultFlags::Overflow) {
457     RAISE(FE_OVERFLOW);
458   }
459   if (flags & decimal::ConversionResultFlags::Inexact) {
460     RAISE(FE_INEXACT);
461   }
462   if (flags & decimal::ConversionResultFlags::Invalid) {
463     RAISE(FE_INVALID);
464   }
465 #undef RAISE
466 }
467 
468 // If no special modes are in effect and the form of the input value
469 // that's present in the input stream is acceptable to the decimal->binary
470 // converter without modification, this fast path for real input
471 // saves time by avoiding memory copies and reformatting of the exponent.
472 template <int PRECISION>
473 static bool TryFastPathRealDecimalInput(
474     IoStatementState &io, const DataEdit &edit, void *n) {
475   if (edit.modes.editingFlags & (blankZero | decimalComma)) {
476     return false;
477   }
478   if (edit.modes.scale != 0) {
479     return false;
480   }
481   const ConnectionState &connection{io.GetConnectionState()};
482   if (connection.internalIoCharKind > 1) {
483     return false; // reading non-default character
484   }
485   const char *str{nullptr};
486   std::size_t got{io.GetNextInputBytes(str)};
487   if (got == 0 || str == nullptr || !connection.recordLength.has_value()) {
488     return false; // could not access reliably-terminated input stream
489   }
490   const char *p{str};
491   std::int64_t maxConsume{
492       std::min<std::int64_t>(got, edit.width.value_or(got))};
493   const char *limit{str + maxConsume};
494   decimal::ConversionToBinaryResult<PRECISION> converted{
495       decimal::ConvertToBinary<PRECISION>(p, edit.modes.round, limit)};
496   if (converted.flags & (decimal::Invalid | decimal::Overflow)) {
497     return false;
498   }
499   if (edit.digits.value_or(0) != 0) {
500     // Edit descriptor is Fw.d (or other) with d != 0, which
501     // implies scaling
502     const char *q{str};
503     for (; q < limit; ++q) {
504       if (*q == '.' || *q == 'n' || *q == 'N') {
505         break;
506       }
507     }
508     if (q == limit) {
509       // No explicit decimal point, and not NaN/Inf.
510       return false;
511     }
512   }
513   if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
514     // Need to consume a trailing ')', possibly with leading spaces
515     for (; p < limit && (*p == ' ' || *p == '\t'); ++p) {
516     }
517     if (p < limit && *p == ')') {
518       ++p;
519     } else {
520       return false;
521     }
522   } else if (edit.IsListDirected()) {
523     if (p < limit && !IsCharValueSeparator(edit, *p)) {
524       return false;
525     }
526   } else {
527     for (; p < limit && (*p == ' ' || *p == '\t'); ++p) {
528     }
529     if (edit.width && p < str + *edit.width) {
530       return false; // unconverted characters remain in fixed width field
531     }
532   }
533   // Success on the fast path!
534   *reinterpret_cast<decimal::BinaryFloatingPointNumber<PRECISION> *>(n) =
535       converted.binary;
536   io.HandleRelativePosition(p - str);
537   // Set FP exception flags
538   if (converted.flags != decimal::ConversionResultFlags::Exact) {
539     RaiseFPExceptions(converted.flags);
540   }
541   return true;
542 }
543 
544 template <int binaryPrecision>
545 decimal::ConversionToBinaryResult<binaryPrecision> ConvertHexadecimal(
546     const char *&p, enum decimal::FortranRounding rounding, int expo) {
547   using RealType = decimal::BinaryFloatingPointNumber<binaryPrecision>;
548   using RawType = typename RealType::RawType;
549   bool isNegative{*p == '-'};
550   constexpr RawType one{1};
551   RawType signBit{0};
552   if (isNegative) {
553     ++p;
554     signBit = one << (RealType::bits - 1);
555   }
556   RawType fraction{0};
557   // Adjust the incoming binary P+/- exponent to shift the radix point
558   // to below the LSB and add in the bias.
559   expo += binaryPrecision - 1 + RealType::exponentBias;
560   // Input the fraction.
561   int roundingBit{0};
562   int guardBit{0};
563   for (; *p; ++p) {
564     fraction <<= 4;
565     expo -= 4;
566     if (*p >= '0' && *p <= '9') {
567       fraction |= *p - '0';
568     } else if (*p >= 'A' && *p <= 'F') {
569       fraction |= *p - 'A' + 10; // data were normalized to capitals
570     } else {
571       break;
572     }
573     while (fraction >> binaryPrecision) {
574       guardBit |= roundingBit;
575       roundingBit = (int)fraction & 1;
576       fraction >>= 1;
577       ++expo;
578     }
579   }
580   if (fraction) {
581     // Boost biased expo if too small
582     while (expo < 1) {
583       guardBit |= roundingBit;
584       roundingBit = (int)fraction & 1;
585       fraction >>= 1;
586       ++expo;
587     }
588     // Normalize
589     while (expo > 1 && !(fraction >> (binaryPrecision - 1))) {
590       fraction <<= 1;
591       --expo;
592     }
593     // Rounding
594     bool increase{false};
595     switch (rounding) {
596     case decimal::RoundNearest: // RN & RP
597       increase = roundingBit && (guardBit | ((int)fraction & 1));
598       break;
599     case decimal::RoundUp: // RU
600       increase = !isNegative && (roundingBit | guardBit);
601       break;
602     case decimal::RoundDown: // RD
603       increase = isNegative && (roundingBit | guardBit);
604       break;
605     case decimal::RoundToZero: // RZ
606       break;
607     case decimal::RoundCompatible: // RC
608       increase = roundingBit != 0;
609       break;
610     }
611     if (increase) {
612       ++fraction;
613       if (fraction >> binaryPrecision) {
614         fraction >>= 1;
615         ++expo;
616       }
617     }
618   }
619   // Package & return result
620   constexpr RawType significandMask{(one << RealType::significandBits) - 1};
621   if (!fraction) {
622     expo = 0;
623   } else if (expo == 1 && !(fraction >> (binaryPrecision - 1))) {
624     expo = 0; // subnormal
625   } else if (expo >= RealType::maxExponent) {
626     expo = RealType::maxExponent; // +/-Inf
627     fraction = 0;
628   } else {
629     fraction &= significandMask; // remove explicit normalization unless x87
630   }
631   return decimal::ConversionToBinaryResult<binaryPrecision>{
632       RealType{static_cast<RawType>(signBit |
633           static_cast<RawType>(expo) << RealType::significandBits | fraction)},
634       (roundingBit | guardBit) ? decimal::Inexact : decimal::Exact};
635 }
636 
637 template <int KIND>
638 bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
639   constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
640   if (TryFastPathRealDecimalInput<binaryPrecision>(io, edit, n)) {
641     return CheckCompleteListDirectedField(io, edit);
642   }
643   // Fast path wasn't available or didn't work; go the more general route
644   static constexpr int maxDigits{
645       common::MaxDecimalConversionDigits(binaryPrecision)};
646   static constexpr int bufferSize{maxDigits + 18};
647   char buffer[bufferSize];
648   auto scanned{ScanRealInput(buffer, maxDigits + 2, io, edit)};
649   int got{scanned.got};
650   if (got >= maxDigits + 2) {
651     io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small");
652     return false;
653   }
654   if (got == 0) {
655     const auto &connection{io.GetConnectionState()};
656     io.GetIoErrorHandler().SignalError(IostatBadRealInput,
657         "Bad real input data at column %d of record %d",
658         static_cast<int>(connection.positionInRecord + 1),
659         static_cast<int>(connection.currentRecordNumber));
660     return false;
661   }
662   decimal::ConversionToBinaryResult<binaryPrecision> converted;
663   const char *p{buffer};
664   if (scanned.isHexadecimal) {
665     buffer[got] = '\0';
666     converted = ConvertHexadecimal<binaryPrecision>(
667         p, edit.modes.round, scanned.exponent);
668   } else {
669     bool hadExtra{got > maxDigits};
670     int exponent{scanned.exponent};
671     if (exponent != 0) {
672       buffer[got++] = 'e';
673       if (exponent < 0) {
674         buffer[got++] = '-';
675         exponent = -exponent;
676       }
677       if (exponent > 9999) {
678         exponent = 9999; // will convert to +/-Inf
679       }
680       if (exponent > 999) {
681         int dig{exponent / 1000};
682         buffer[got++] = '0' + dig;
683         int rest{exponent - 1000 * dig};
684         dig = rest / 100;
685         buffer[got++] = '0' + dig;
686         rest -= 100 * dig;
687         dig = rest / 10;
688         buffer[got++] = '0' + dig;
689         buffer[got++] = '0' + (rest - 10 * dig);
690       } else if (exponent > 99) {
691         int dig{exponent / 100};
692         buffer[got++] = '0' + dig;
693         int rest{exponent - 100 * dig};
694         dig = rest / 10;
695         buffer[got++] = '0' + dig;
696         buffer[got++] = '0' + (rest - 10 * dig);
697       } else if (exponent > 9) {
698         int dig{exponent / 10};
699         buffer[got++] = '0' + dig;
700         buffer[got++] = '0' + (exponent - 10 * dig);
701       } else {
702         buffer[got++] = '0' + exponent;
703       }
704     }
705     buffer[got] = '\0';
706     converted = decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round);
707     if (hadExtra) {
708       converted.flags = static_cast<enum decimal::ConversionResultFlags>(
709           converted.flags | decimal::Inexact);
710     }
711   }
712   if (*p) { // unprocessed junk after value
713     const auto &connection{io.GetConnectionState()};
714     io.GetIoErrorHandler().SignalError(IostatBadRealInput,
715         "Trailing characters after real input data at column %d of record %d",
716         static_cast<int>(connection.positionInRecord + 1),
717         static_cast<int>(connection.currentRecordNumber));
718     return false;
719   }
720   *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) =
721       converted.binary;
722   // Set FP exception flags
723   if (converted.flags != decimal::ConversionResultFlags::Exact) {
724     if (converted.flags & decimal::ConversionResultFlags::Overflow) {
725       io.GetIoErrorHandler().SignalError(IostatRealInputOverflow);
726       return false;
727     }
728     RaiseFPExceptions(converted.flags);
729   }
730   return CheckCompleteListDirectedField(io, edit);
731 }
732 
733 template <int KIND>
734 bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
735   switch (edit.descriptor) {
736   case DataEdit::ListDirected:
737     if (IsNamelistNameOrSlash(io)) {
738       return false;
739     }
740     return EditCommonRealInput<KIND>(io, edit, n);
741   case DataEdit::ListDirectedRealPart:
742   case DataEdit::ListDirectedImaginaryPart:
743   case 'F':
744   case 'E': // incl. EN, ES, & EX
745   case 'D':
746   case 'G':
747     return EditCommonRealInput<KIND>(io, edit, n);
748   case 'B':
749     return EditBOZInput<1>(io, edit, n,
750         common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
751   case 'O':
752     return EditBOZInput<3>(io, edit, n,
753         common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
754   case 'Z':
755     return EditBOZInput<4>(io, edit, n,
756         common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
757   case 'A': // legacy extension
758     return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), KIND);
759   default:
760     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
761         "Data edit descriptor '%c' may not be used for REAL input",
762         edit.descriptor);
763     return false;
764   }
765 }
766 
767 // 13.7.3 in Fortran 2018
768 bool EditLogicalInput(IoStatementState &io, const DataEdit &edit, bool &x) {
769   switch (edit.descriptor) {
770   case DataEdit::ListDirected:
771     if (IsNamelistNameOrSlash(io)) {
772       return false;
773     }
774     break;
775   case 'L':
776   case 'G':
777     break;
778   default:
779     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
780         "Data edit descriptor '%c' may not be used for LOGICAL input",
781         edit.descriptor);
782     return false;
783   }
784   std::optional<int> remaining{io.CueUpInput(edit)};
785   std::optional<char32_t> next{io.NextInField(remaining, edit)};
786   if (next && *next == '.') { // skip optional period
787     next = io.NextInField(remaining, edit);
788   }
789   if (!next) {
790     io.GetIoErrorHandler().SignalError("Empty LOGICAL input field");
791     return false;
792   }
793   switch (*next) {
794   case 'T':
795   case 't':
796     x = true;
797     break;
798   case 'F':
799   case 'f':
800     x = false;
801     break;
802   default:
803     io.GetIoErrorHandler().SignalError(
804         "Bad character '%lc' in LOGICAL input field", *next);
805     return false;
806   }
807   if (remaining) { // ignore the rest of a fixed-width field
808     io.HandleRelativePosition(*remaining);
809   } else if (edit.descriptor == DataEdit::ListDirected) {
810     while (io.NextInField(remaining, edit)) { // discard rest of field
811     }
812   }
813   return CheckCompleteListDirectedField(io, edit);
814 }
815 
816 // See 13.10.3.1 paragraphs 7-9 in Fortran 2018
817 template <typename CHAR>
818 static bool EditDelimitedCharacterInput(
819     IoStatementState &io, CHAR *x, std::size_t length, char32_t delimiter) {
820   bool result{true};
821   while (true) {
822     std::size_t byteCount{0};
823     auto ch{io.GetCurrentChar(byteCount)};
824     if (!ch) {
825       if (io.AdvanceRecord()) {
826         continue;
827       } else {
828         result = false; // EOF in character value
829         break;
830       }
831     }
832     io.HandleRelativePosition(byteCount);
833     if (*ch == delimiter) {
834       auto next{io.GetCurrentChar(byteCount)};
835       if (next && *next == delimiter) {
836         // Repeated delimiter: use as character value
837         io.HandleRelativePosition(byteCount);
838       } else {
839         break; // closing delimiter
840       }
841     }
842     if (length > 0) {
843       *x++ = *ch;
844       --length;
845     }
846   }
847   std::fill_n(x, length, ' ');
848   return result;
849 }
850 
851 template <typename CHAR>
852 static bool EditListDirectedCharacterInput(
853     IoStatementState &io, CHAR *x, std::size_t length, const DataEdit &edit) {
854   std::size_t byteCount{0};
855   auto ch{io.GetCurrentChar(byteCount)};
856   if (ch && (*ch == '\'' || *ch == '"')) {
857     io.HandleRelativePosition(byteCount);
858     return EditDelimitedCharacterInput(io, x, length, *ch);
859   }
860   if (IsNamelistNameOrSlash(io) || io.GetConnectionState().IsAtEOF()) {
861     return false;
862   }
863   // Undelimited list-directed character input: stop at a value separator
864   // or the end of the current record.  Subtlety: the "remaining" count
865   // here is a dummy that's used to avoid the interpretation of separators
866   // in NextInField.
867   std::optional<int> remaining{length > 0 ? maxUTF8Bytes : 0};
868   while (std::optional<char32_t> next{io.NextInField(remaining, edit)}) {
869     bool isSep{false};
870     switch (*next) {
871     case ' ':
872     case '\t':
873     case '/':
874       isSep = true;
875       break;
876     case ',':
877       isSep = !(edit.modes.editingFlags & decimalComma);
878       break;
879     case ';':
880       isSep = !!(edit.modes.editingFlags & decimalComma);
881       break;
882     default:
883       break;
884     }
885     if (isSep) {
886       remaining = 0;
887     } else {
888       *x++ = *next;
889       remaining = --length > 0 ? maxUTF8Bytes : 0;
890     }
891   }
892   std::fill_n(x, length, ' ');
893   return true;
894 }
895 
896 template <typename CHAR>
897 bool EditCharacterInput(
898     IoStatementState &io, const DataEdit &edit, CHAR *x, std::size_t length) {
899   switch (edit.descriptor) {
900   case DataEdit::ListDirected:
901     return EditListDirectedCharacterInput(io, x, length, edit);
902   case 'A':
903   case 'G':
904     break;
905   case 'B':
906     return EditBOZInput<1>(io, edit, x, length * sizeof *x);
907   case 'O':
908     return EditBOZInput<3>(io, edit, x, length * sizeof *x);
909   case 'Z':
910     return EditBOZInput<4>(io, edit, x, length * sizeof *x);
911   default:
912     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
913         "Data edit descriptor '%c' may not be used with a CHARACTER data item",
914         edit.descriptor);
915     return false;
916   }
917   const ConnectionState &connection{io.GetConnectionState()};
918   std::size_t remaining{length};
919   if (edit.width && *edit.width > 0) {
920     remaining = *edit.width;
921   }
922   // When the field is wider than the variable, we drop the leading
923   // characters.  When the variable is wider than the field, there can be
924   // trailing padding or an EOR condition.
925   const char *input{nullptr};
926   std::size_t ready{0};
927   // Skip leading bytes.
928   // These bytes don't count towards INQUIRE(IOLENGTH=).
929   std::size_t skip{remaining > length ? remaining - length : 0};
930   // Transfer payload bytes; these do count.
931   while (remaining > 0) {
932     if (ready == 0) {
933       ready = io.GetNextInputBytes(input);
934       if (ready == 0 || (ready < remaining && edit.modes.nonAdvancing)) {
935         if (io.CheckForEndOfRecord(ready)) {
936           if (ready == 0) {
937             // PAD='YES' and no more data
938             std::fill_n(x, length, ' ');
939             return !io.GetIoErrorHandler().InError();
940           } else {
941             // Do partial read(s) then pad on last iteration
942           }
943         } else {
944           return !io.GetIoErrorHandler().InError();
945         }
946       }
947     }
948     std::size_t chunk;
949     bool skipping{skip > 0};
950     if (connection.isUTF8) {
951       chunk = MeasureUTF8Bytes(*input);
952       if (skipping) {
953         --skip;
954       } else if (auto ucs{DecodeUTF8(input)}) {
955         *x++ = *ucs;
956         --length;
957       } else if (chunk == 0) {
958         // error recovery: skip bad encoding
959         chunk = 1;
960       }
961       --remaining;
962     } else if (connection.internalIoCharKind > 1) {
963       // Reading from non-default character internal unit
964       chunk = connection.internalIoCharKind;
965       if (skipping) {
966         --skip;
967       } else {
968         char32_t buffer{0};
969         std::memcpy(&buffer, input, chunk);
970         *x++ = buffer;
971         --length;
972       }
973       --remaining;
974     } else if constexpr (sizeof *x > 1) {
975       // Read single byte with expansion into multi-byte CHARACTER
976       chunk = 1;
977       if (skipping) {
978         --skip;
979       } else {
980         *x++ = static_cast<unsigned char>(*input);
981         --length;
982       }
983       --remaining;
984     } else { // single bytes -> default CHARACTER
985       if (skipping) {
986         chunk = std::min<std::size_t>(skip, ready);
987         skip -= chunk;
988       } else {
989         chunk = std::min<std::size_t>(remaining, ready);
990         std::memcpy(x, input, chunk);
991         x += chunk;
992         length -= chunk;
993       }
994       remaining -= chunk;
995     }
996     input += chunk;
997     if (!skipping) {
998       io.GotChar(chunk);
999     }
1000     io.HandleRelativePosition(chunk);
1001     ready -= chunk;
1002   }
1003   // Pad the remainder of the input variable, if any.
1004   std::fill_n(x, length, ' ');
1005   return CheckCompleteListDirectedField(io, edit);
1006 }
1007 
1008 template bool EditRealInput<2>(IoStatementState &, const DataEdit &, void *);
1009 template bool EditRealInput<3>(IoStatementState &, const DataEdit &, void *);
1010 template bool EditRealInput<4>(IoStatementState &, const DataEdit &, void *);
1011 template bool EditRealInput<8>(IoStatementState &, const DataEdit &, void *);
1012 template bool EditRealInput<10>(IoStatementState &, const DataEdit &, void *);
1013 // TODO: double/double
1014 template bool EditRealInput<16>(IoStatementState &, const DataEdit &, void *);
1015 
1016 template bool EditCharacterInput(
1017     IoStatementState &, const DataEdit &, char *, std::size_t);
1018 template bool EditCharacterInput(
1019     IoStatementState &, const DataEdit &, char16_t *, std::size_t);
1020 template bool EditCharacterInput(
1021     IoStatementState &, const DataEdit &, char32_t *, std::size_t);
1022 
1023 } // namespace Fortran::runtime::io
1024