xref: /llvm-project/flang/runtime/edit-input.cpp (revision 2f31b4b10a1ab3ec937fbbead55b66b8dfbb0934)
1 //===-- runtime/edit-input.cpp --------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "edit-input.h"
10 #include "namelist.h"
11 #include "utf.h"
12 #include "flang/Common/real.h"
13 #include "flang/Common/uint128.h"
14 #include <algorithm>
15 #include <cfenv>
16 
17 namespace Fortran::runtime::io {
18 
19 template <int LOG2_BASE>
20 static bool EditBOZInput(
21     IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) {
22   std::optional<int> remaining;
23   std::optional<char32_t> next{io.PrepareInput(edit, remaining)};
24   if (*next == '0') {
25     do {
26       next = io.NextInField(remaining, edit);
27     } while (next && *next == '0');
28   }
29   // Count significant digits after any leading white space & zeroes
30   int digits{0};
31   for (; next; next = io.NextInField(remaining, edit)) {
32     char32_t ch{*next};
33     if (ch == ' ' || ch == '\t') {
34       continue;
35     }
36     if (ch >= '0' && ch <= '1') {
37     } else if (LOG2_BASE >= 3 && ch >= '2' && ch <= '7') {
38     } else if (LOG2_BASE >= 4 && ch >= '8' && ch <= '9') {
39     } else if (LOG2_BASE >= 4 && ch >= 'A' && ch <= 'F') {
40     } else if (LOG2_BASE >= 4 && ch >= 'a' && ch <= 'f') {
41     } else {
42       io.GetIoErrorHandler().SignalError(
43           "Bad character '%lc' in B/O/Z input field", ch);
44       return false;
45     }
46     ++digits;
47   }
48   auto significantBytes{static_cast<std::size_t>(digits * LOG2_BASE + 7) / 8};
49   if (significantBytes > bytes) {
50     io.GetIoErrorHandler().SignalError(
51         "B/O/Z input of %d digits overflows %zd-byte variable", digits, bytes);
52     return false;
53   }
54   // Reset to start of significant digits
55   io.HandleRelativePosition(-digits);
56   remaining.reset();
57   // Make a second pass now that the digit count is known
58   std::memset(n, 0, bytes);
59   int increment{isHostLittleEndian ? -1 : 1};
60   auto *data{reinterpret_cast<unsigned char *>(n) +
61       (isHostLittleEndian ? significantBytes - 1 : 0)};
62   int shift{((digits - 1) * LOG2_BASE) & 7};
63   if (shift + LOG2_BASE > 8) {
64     shift -= 8; // misaligned octal
65   }
66   while (digits > 0) {
67     char32_t ch{*io.NextInField(remaining, edit)};
68     int digit{0};
69     if (ch >= '0' && ch <= '9') {
70       digit = ch - '0';
71     } else if (ch >= 'A' && ch <= 'F') {
72       digit = ch + 10 - 'A';
73     } else if (ch >= 'a' && ch <= 'f') {
74       digit = ch + 10 - 'a';
75     } else {
76       continue;
77     }
78     --digits;
79     if (shift < 0) {
80       shift += 8;
81       if (shift + LOG2_BASE > 8) { // misaligned octal
82         *data |= digit >> (8 - shift);
83       }
84       data += increment;
85     }
86     *data |= digit << shift;
87     shift -= LOG2_BASE;
88   }
89   return true;
90 }
91 
92 static inline char32_t GetDecimalPoint(const DataEdit &edit) {
93   return edit.modes.editingFlags & decimalComma ? char32_t{','} : char32_t{'.'};
94 }
95 
96 // Prepares input from a field, and consumes the sign, if any.
97 // Returns true if there's a '-' sign.
98 static bool ScanNumericPrefix(IoStatementState &io, const DataEdit &edit,
99     std::optional<char32_t> &next, std::optional<int> &remaining) {
100   next = io.PrepareInput(edit, remaining);
101   bool negative{false};
102   if (next) {
103     negative = *next == '-';
104     if (negative || *next == '+') {
105       io.SkipSpaces(remaining);
106       next = io.NextInField(remaining, edit);
107     }
108   }
109   return negative;
110 }
111 
112 bool EditIntegerInput(
113     IoStatementState &io, const DataEdit &edit, void *n, int kind) {
114   RUNTIME_CHECK(io.GetIoErrorHandler(), kind >= 1 && !(kind & (kind - 1)));
115   switch (edit.descriptor) {
116   case DataEdit::ListDirected:
117     if (IsNamelistName(io)) {
118       return false;
119     }
120     break;
121   case 'G':
122   case 'I':
123     break;
124   case 'B':
125     return EditBOZInput<1>(io, edit, n, kind);
126   case 'O':
127     return EditBOZInput<3>(io, edit, n, kind);
128   case 'Z':
129     return EditBOZInput<4>(io, edit, n, kind);
130   case 'A': // legacy extension
131     return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), kind);
132   default:
133     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
134         "Data edit descriptor '%c' may not be used with an INTEGER data item",
135         edit.descriptor);
136     return false;
137   }
138   std::optional<int> remaining;
139   std::optional<char32_t> next;
140   bool negate{ScanNumericPrefix(io, edit, next, remaining)};
141   common::UnsignedInt128 value{0};
142   bool any{negate};
143   for (; next; next = io.NextInField(remaining, edit)) {
144     char32_t ch{*next};
145     if (ch == ' ' || ch == '\t') {
146       if (edit.modes.editingFlags & blankZero) {
147         ch = '0'; // BZ mode - treat blank as if it were zero
148       } else {
149         continue;
150       }
151     }
152     int digit{0};
153     if (ch >= '0' && ch <= '9') {
154       digit = ch - '0';
155     } else {
156       io.GetIoErrorHandler().SignalError(
157           "Bad character '%lc' in INTEGER input field", ch);
158       return false;
159     }
160     value *= 10;
161     value += digit;
162     any = true;
163   }
164   if (negate) {
165     value = -value;
166   }
167   if (any || !io.GetConnectionState().IsAtEOF()) {
168     std::memcpy(n, &value, kind); // a blank field means zero
169   }
170   return any;
171 }
172 
173 // Parses a REAL input number from the input source as a normalized
174 // fraction into a supplied buffer -- there's an optional '-', a
175 // decimal point, and at least one digit.  The adjusted exponent value
176 // is returned in a reference argument.  The returned value is the number
177 // of characters that (should) have been written to the buffer -- this can
178 // be larger than the buffer size and can indicate overflow.  Replaces
179 // blanks with zeroes if appropriate.
180 static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io,
181     const DataEdit &edit, int &exponent) {
182   std::optional<int> remaining;
183   std::optional<char32_t> next;
184   int got{0};
185   std::optional<int> decimalPoint;
186   auto Put{[&](char ch) -> void {
187     if (got < bufferSize) {
188       buffer[got] = ch;
189     }
190     ++got;
191   }};
192   if (ScanNumericPrefix(io, edit, next, remaining)) {
193     Put('-');
194   }
195   bool bzMode{(edit.modes.editingFlags & blankZero) != 0};
196   if (!next || (!bzMode && *next == ' ')) { // empty/blank field means zero
197     remaining.reset();
198     if (!io.GetConnectionState().IsAtEOF()) {
199       Put('0');
200     }
201     return got;
202   }
203   char32_t decimal{GetDecimalPoint(edit)};
204   char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next};
205   if (first == 'N' || first == 'I') {
206     // NaN or infinity - convert to upper case
207     // Subtle: a blank field of digits could be followed by 'E' or 'D',
208     for (; next &&
209          ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z'));
210          next = io.NextInField(remaining, edit)) {
211       if (*next >= 'a' && *next <= 'z') {
212         Put(*next - 'a' + 'A');
213       } else {
214         Put(*next);
215       }
216     }
217     if (next && *next == '(') { // NaN(...)
218       Put('(');
219       int depth{1};
220       while (true) {
221         next = io.NextInField(remaining, edit);
222         if (depth == 0) {
223           break;
224         } else if (!next) {
225           return 0; // error
226         } else if (*next == '(') {
227           ++depth;
228         } else if (*next == ')') {
229           --depth;
230         }
231         Put(*next);
232       }
233     }
234     exponent = 0;
235   } else if (first == decimal || (first >= '0' && first <= '9') ||
236       (bzMode && (first == ' ' || first == '\t')) || first == 'E' ||
237       first == 'D' || first == 'Q') {
238     Put('.'); // input field is normalized to a fraction
239     auto start{got};
240     for (; next; next = io.NextInField(remaining, edit)) {
241       char32_t ch{*next};
242       if (ch == ' ' || ch == '\t') {
243         if (bzMode) {
244           ch = '0'; // BZ mode - treat blank as if it were zero
245         } else {
246           continue;
247         }
248       }
249       if (ch == '0' && got == start && !decimalPoint) {
250         // omit leading zeroes before the decimal
251       } else if (ch >= '0' && ch <= '9') {
252         Put(ch);
253       } else if (ch == decimal && !decimalPoint) {
254         // the decimal point is *not* copied to the buffer
255         decimalPoint = got - start; // # of digits before the decimal point
256       } else {
257         break;
258       }
259     }
260     if (got == start) {
261       // Nothing but zeroes and maybe a decimal point.  F'2018 requires
262       // at least one digit, but F'77 did not, and a bare "." shows up in
263       // the FCVS suite.
264       Put('0'); // emit at least one digit
265     }
266     if (next &&
267         (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' ||
268             *next == 'q' || *next == 'Q')) {
269       // Optional exponent letter.  Blanks are allowed between the
270       // optional exponent letter and the exponent value.
271       io.SkipSpaces(remaining);
272       next = io.NextInField(remaining, edit);
273     }
274     // The default exponent is -kP, but the scale factor doesn't affect
275     // an explicit exponent.
276     exponent = -edit.modes.scale;
277     if (next &&
278         (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9') ||
279             *next == ' ' || *next == '\t')) {
280       bool negExpo{*next == '-'};
281       if (negExpo || *next == '+') {
282         next = io.NextInField(remaining, edit);
283       }
284       for (exponent = 0; next; next = io.NextInField(remaining, edit)) {
285         if (*next >= '0' && *next <= '9') {
286           exponent = 10 * exponent + *next - '0';
287         } else if (*next == ' ' || *next == '\t') {
288           if (bzMode) {
289             exponent = 10 * exponent;
290           }
291         } else {
292           break;
293         }
294       }
295       if (negExpo) {
296         exponent = -exponent;
297       }
298     }
299     if (decimalPoint) {
300       exponent += *decimalPoint;
301     } else {
302       // When no decimal point (or comma) appears in the value, the 'd'
303       // part of the edit descriptor must be interpreted as the number of
304       // digits in the value to be interpreted as being to the *right* of
305       // the assumed decimal point (13.7.2.3.2)
306       exponent += got - start - edit.digits.value_or(0);
307     }
308   } else {
309     // TODO: hex FP input
310     exponent = 0;
311     return 0;
312   }
313   // Consume the trailing ')' of a list-directed or NAMELIST complex
314   // input value.
315   if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
316     if (next && (*next == ' ' || *next == '\t')) {
317       next = io.NextInField(remaining, edit);
318     }
319     if (!next) { // NextInField fails on separators like ')'
320       std::size_t byteCount{0};
321       next = io.GetCurrentChar(byteCount);
322       if (next && *next == ')') {
323         io.HandleRelativePosition(byteCount);
324       }
325     }
326   } else if (remaining) {
327     while (next && (*next == ' ' || *next == '\t')) {
328       next = io.NextInField(remaining, edit);
329     }
330     if (next) {
331       return 0; // error: unused nonblank character in fixed-width field
332     }
333   }
334   return got;
335 }
336 
337 static void RaiseFPExceptions(decimal::ConversionResultFlags flags) {
338 #undef RAISE
339 #ifdef feraisexcept // a macro in some environments; omit std::
340 #define RAISE feraiseexcept
341 #else
342 #define RAISE std::feraiseexcept
343 #endif
344   if (flags & decimal::ConversionResultFlags::Overflow) {
345     RAISE(FE_OVERFLOW);
346   }
347   if (flags & decimal::ConversionResultFlags::Inexact) {
348     RAISE(FE_INEXACT);
349   }
350   if (flags & decimal::ConversionResultFlags::Invalid) {
351     RAISE(FE_INVALID);
352   }
353 #undef RAISE
354 }
355 
356 // If no special modes are in effect and the form of the input value
357 // that's present in the input stream is acceptable to the decimal->binary
358 // converter without modification, this fast path for real input
359 // saves time by avoiding memory copies and reformatting of the exponent.
360 template <int PRECISION>
361 static bool TryFastPathRealInput(
362     IoStatementState &io, const DataEdit &edit, void *n) {
363   if (edit.modes.editingFlags & (blankZero | decimalComma)) {
364     return false;
365   }
366   if (edit.modes.scale != 0) {
367     return false;
368   }
369   const char *str{nullptr};
370   std::size_t got{io.GetNextInputBytes(str)};
371   if (got == 0 || str == nullptr ||
372       !io.GetConnectionState().recordLength.has_value()) {
373     return false; // could not access reliably-terminated input stream
374   }
375   const char *p{str};
376   std::int64_t maxConsume{
377       std::min<std::int64_t>(got, edit.width.value_or(got))};
378   const char *limit{str + maxConsume};
379   decimal::ConversionToBinaryResult<PRECISION> converted{
380       decimal::ConvertToBinary<PRECISION>(p, edit.modes.round, limit)};
381   if (converted.flags & decimal::Invalid) {
382     return false;
383   }
384   if (edit.digits.value_or(0) != 0) {
385     // Edit descriptor is Fw.d (or other) with d != 0, which
386     // implies scaling
387     const char *q{str};
388     for (; q < limit; ++q) {
389       if (*q == '.' || *q == 'n' || *q == 'N') {
390         break;
391       }
392     }
393     if (q == limit) {
394       // No explicit decimal point, and not NaN/Inf.
395       return false;
396     }
397   }
398   for (; p < limit && (*p == ' ' || *p == '\t'); ++p) {
399   }
400   if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
401     // Need to consume a trailing ')' and any white space after
402     if (p >= limit || *p != ')') {
403       return false;
404     }
405     for (++p; p < limit && (*p == ' ' || *p == '\t'); ++p) {
406     }
407   }
408   if (edit.width && p < str + *edit.width) {
409     return false; // unconverted characters remain in fixed width field
410   }
411   // Success on the fast path!
412   *reinterpret_cast<decimal::BinaryFloatingPointNumber<PRECISION> *>(n) =
413       converted.binary;
414   io.HandleRelativePosition(p - str);
415   // Set FP exception flags
416   if (converted.flags != decimal::ConversionResultFlags::Exact) {
417     RaiseFPExceptions(converted.flags);
418   }
419   return true;
420 }
421 
422 template <int KIND>
423 bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
424   constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
425   if (TryFastPathRealInput<binaryPrecision>(io, edit, n)) {
426     return true;
427   }
428   // Fast path wasn't available or didn't work; go the more general route
429   static constexpr int maxDigits{
430       common::MaxDecimalConversionDigits(binaryPrecision)};
431   static constexpr int bufferSize{maxDigits + 18};
432   char buffer[bufferSize];
433   int exponent{0};
434   int got{ScanRealInput(buffer, maxDigits + 2, io, edit, exponent)};
435   if (got >= maxDigits + 2) {
436     io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small");
437     return false;
438   }
439   if (got == 0) {
440     io.GetIoErrorHandler().SignalError(IostatBadRealInput);
441     return false;
442   }
443   bool hadExtra{got > maxDigits};
444   if (exponent != 0) {
445     buffer[got++] = 'e';
446     if (exponent < 0) {
447       buffer[got++] = '-';
448       exponent = -exponent;
449     }
450     if (exponent > 9999) {
451       exponent = 9999; // will convert to +/-Inf
452     }
453     if (exponent > 999) {
454       int dig{exponent / 1000};
455       buffer[got++] = '0' + dig;
456       int rest{exponent - 1000 * dig};
457       dig = rest / 100;
458       buffer[got++] = '0' + dig;
459       rest -= 100 * dig;
460       dig = rest / 10;
461       buffer[got++] = '0' + dig;
462       buffer[got++] = '0' + (rest - 10 * dig);
463     } else if (exponent > 99) {
464       int dig{exponent / 100};
465       buffer[got++] = '0' + dig;
466       int rest{exponent - 100 * dig};
467       dig = rest / 10;
468       buffer[got++] = '0' + dig;
469       buffer[got++] = '0' + (rest - 10 * dig);
470     } else if (exponent > 9) {
471       int dig{exponent / 10};
472       buffer[got++] = '0' + dig;
473       buffer[got++] = '0' + (exponent - 10 * dig);
474     } else {
475       buffer[got++] = '0' + exponent;
476     }
477   }
478   buffer[got] = '\0';
479   const char *p{buffer};
480   decimal::ConversionToBinaryResult<binaryPrecision> converted{
481       decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round)};
482   if (hadExtra) {
483     converted.flags = static_cast<enum decimal::ConversionResultFlags>(
484         converted.flags | decimal::Inexact);
485   }
486   if (*p) { // unprocessed junk after value
487     io.GetIoErrorHandler().SignalError(IostatBadRealInput);
488     return false;
489   }
490   *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) =
491       converted.binary;
492   // Set FP exception flags
493   if (converted.flags != decimal::ConversionResultFlags::Exact) {
494     RaiseFPExceptions(converted.flags);
495   }
496   return true;
497 }
498 
499 template <int KIND>
500 bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
501   switch (edit.descriptor) {
502   case DataEdit::ListDirected:
503     if (IsNamelistName(io)) {
504       return false;
505     }
506     return EditCommonRealInput<KIND>(io, edit, n);
507   case DataEdit::ListDirectedRealPart:
508   case DataEdit::ListDirectedImaginaryPart:
509   case 'F':
510   case 'E': // incl. EN, ES, & EX
511   case 'D':
512   case 'G':
513     return EditCommonRealInput<KIND>(io, edit, n);
514   case 'B':
515     return EditBOZInput<1>(io, edit, n,
516         common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
517   case 'O':
518     return EditBOZInput<3>(io, edit, n,
519         common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
520   case 'Z':
521     return EditBOZInput<4>(io, edit, n,
522         common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
523   case 'A': // legacy extension
524     return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), KIND);
525   default:
526     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
527         "Data edit descriptor '%c' may not be used for REAL input",
528         edit.descriptor);
529     return false;
530   }
531 }
532 
533 // 13.7.3 in Fortran 2018
534 bool EditLogicalInput(IoStatementState &io, const DataEdit &edit, bool &x) {
535   switch (edit.descriptor) {
536   case DataEdit::ListDirected:
537     if (IsNamelistName(io)) {
538       return false;
539     }
540     break;
541   case 'L':
542   case 'G':
543     break;
544   default:
545     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
546         "Data edit descriptor '%c' may not be used for LOGICAL input",
547         edit.descriptor);
548     return false;
549   }
550   std::optional<int> remaining;
551   std::optional<char32_t> next{io.PrepareInput(edit, remaining)};
552   if (next && *next == '.') { // skip optional period
553     next = io.NextInField(remaining, edit);
554   }
555   if (!next) {
556     io.GetIoErrorHandler().SignalError("Empty LOGICAL input field");
557     return false;
558   }
559   switch (*next) {
560   case 'T':
561   case 't':
562     x = true;
563     break;
564   case 'F':
565   case 'f':
566     x = false;
567     break;
568   default:
569     io.GetIoErrorHandler().SignalError(
570         "Bad character '%lc' in LOGICAL input field", *next);
571     return false;
572   }
573   if (remaining) { // ignore the rest of the field
574     io.HandleRelativePosition(*remaining);
575   } else if (edit.descriptor == DataEdit::ListDirected) {
576     while (io.NextInField(remaining, edit)) { // discard rest of field
577     }
578   }
579   return true;
580 }
581 
582 // See 13.10.3.1 paragraphs 7-9 in Fortran 2018
583 template <typename CHAR>
584 static bool EditDelimitedCharacterInput(
585     IoStatementState &io, CHAR *x, std::size_t length, char32_t delimiter) {
586   bool result{true};
587   while (true) {
588     std::size_t byteCount{0};
589     auto ch{io.GetCurrentChar(byteCount)};
590     if (!ch) {
591       if (io.AdvanceRecord()) {
592         continue;
593       } else {
594         result = false; // EOF in character value
595         break;
596       }
597     }
598     io.HandleRelativePosition(byteCount);
599     if (*ch == delimiter) {
600       auto next{io.GetCurrentChar(byteCount)};
601       if (next && *next == delimiter) {
602         // Repeated delimiter: use as character value
603         io.HandleRelativePosition(byteCount);
604       } else {
605         break; // closing delimiter
606       }
607     }
608     if (length > 0) {
609       *x++ = *ch;
610       --length;
611     }
612   }
613   std::fill_n(x, length, ' ');
614   return result;
615 }
616 
617 template <typename CHAR>
618 static bool EditListDirectedCharacterInput(
619     IoStatementState &io, CHAR *x, std::size_t length, const DataEdit &edit) {
620   std::size_t byteCount{0};
621   auto ch{io.GetCurrentChar(byteCount)};
622   if (ch && (*ch == '\'' || *ch == '"')) {
623     io.HandleRelativePosition(byteCount);
624     return EditDelimitedCharacterInput(io, x, length, *ch);
625   }
626   if (IsNamelistName(io) || io.GetConnectionState().IsAtEOF()) {
627     return false;
628   }
629   // Undelimited list-directed character input: stop at a value separator
630   // or the end of the current record.  Subtlety: the "remaining" count
631   // here is a dummy that's used to avoid the interpretation of separators
632   // in NextInField.
633   std::optional<int> remaining{length > 0 ? maxUTF8Bytes : 0};
634   while (std::optional<char32_t> next{io.NextInField(remaining, edit)}) {
635     switch (*next) {
636     case ' ':
637     case '\t':
638     case ',':
639     case ';':
640     case '/':
641       remaining = 0; // value separator: stop
642       break;
643     default:
644       *x++ = *next;
645       remaining = --length > 0 ? maxUTF8Bytes : 0;
646     }
647   }
648   std::fill_n(x, length, ' ');
649   return true;
650 }
651 
652 template <typename CHAR>
653 bool EditCharacterInput(
654     IoStatementState &io, const DataEdit &edit, CHAR *x, std::size_t length) {
655   switch (edit.descriptor) {
656   case DataEdit::ListDirected:
657     return EditListDirectedCharacterInput(io, x, length, edit);
658   case 'A':
659   case 'G':
660     break;
661   case 'B':
662     return EditBOZInput<1>(io, edit, x, length * sizeof *x);
663   case 'O':
664     return EditBOZInput<3>(io, edit, x, length * sizeof *x);
665   case 'Z':
666     return EditBOZInput<4>(io, edit, x, length * sizeof *x);
667   default:
668     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
669         "Data edit descriptor '%c' may not be used with a CHARACTER data item",
670         edit.descriptor);
671     return false;
672   }
673   const ConnectionState &connection{io.GetConnectionState()};
674   if (connection.IsAtEOF()) {
675     return false;
676   }
677   std::size_t remaining{length};
678   if (edit.width && *edit.width > 0) {
679     remaining = *edit.width;
680   }
681   // When the field is wider than the variable, we drop the leading
682   // characters.  When the variable is wider than the field, there's
683   // trailing padding.
684   const char *input{nullptr};
685   std::size_t ready{0};
686   bool hitEnd{false};
687   // Skip leading bytes.
688   // These bytes don't count towards INQUIRE(IOLENGTH=).
689   std::size_t skip{remaining > length ? remaining - length : 0};
690   // Transfer payload bytes; these do count.
691   while (remaining > 0) {
692     if (ready == 0) {
693       ready = io.GetNextInputBytes(input);
694       if (ready == 0) {
695         hitEnd = true;
696         break;
697       }
698     }
699     std::size_t chunk;
700     bool skipping{skip > 0};
701     if (connection.isUTF8) {
702       chunk = MeasureUTF8Bytes(*input);
703       if (skipping) {
704         --skip;
705       } else if (auto ucs{DecodeUTF8(input)}) {
706         *x++ = *ucs;
707         --length;
708       } else if (chunk == 0) {
709         // error recovery: skip bad encoding
710         chunk = 1;
711       }
712       --remaining;
713     } else {
714       if (skipping) {
715         chunk = std::min<std::size_t>(skip, ready);
716         skip -= chunk;
717       } else {
718         chunk = std::min<std::size_t>(remaining, ready);
719         std::memcpy(x, input, chunk);
720         x += chunk;
721         length -= chunk;
722       }
723       remaining -= chunk;
724     }
725     input += chunk;
726     if (!skipping) {
727       io.GotChar(chunk);
728     }
729     io.HandleRelativePosition(chunk);
730     ready -= chunk;
731   }
732   // Pad the remainder of the input variable, if any.
733   std::fill_n(x, length, ' ');
734   if (hitEnd) {
735     io.CheckForEndOfRecord(); // signal any needed error
736   }
737   return true;
738 }
739 
740 template bool EditRealInput<2>(IoStatementState &, const DataEdit &, void *);
741 template bool EditRealInput<3>(IoStatementState &, const DataEdit &, void *);
742 template bool EditRealInput<4>(IoStatementState &, const DataEdit &, void *);
743 template bool EditRealInput<8>(IoStatementState &, const DataEdit &, void *);
744 template bool EditRealInput<10>(IoStatementState &, const DataEdit &, void *);
745 // TODO: double/double
746 template bool EditRealInput<16>(IoStatementState &, const DataEdit &, void *);
747 
748 template bool EditCharacterInput(
749     IoStatementState &, const DataEdit &, char *, std::size_t);
750 template bool EditCharacterInput(
751     IoStatementState &, const DataEdit &, char16_t *, std::size_t);
752 template bool EditCharacterInput(
753     IoStatementState &, const DataEdit &, char32_t *, std::size_t);
754 
755 } // namespace Fortran::runtime::io
756