xref: /llvm-project/flang/runtime/edit-input.cpp (revision 7aad87312a00a6ce4cdf7fd5bd5d597ff413a600)
1 //===-- runtime/edit-input.cpp --------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "edit-input.h"
10 #include "namelist.h"
11 #include "utf.h"
12 #include "flang/Common/optional.h"
13 #include "flang/Common/real.h"
14 #include "flang/Common/uint128.h"
15 #include "flang/Runtime/freestanding-tools.h"
16 #include <algorithm>
17 #include <cfenv>
18 
19 namespace Fortran::runtime::io {
20 RT_OFFLOAD_API_GROUP_BEGIN
21 
22 // Checks that a list-directed input value has been entirely consumed and
23 // doesn't contain unparsed characters before the next value separator.
24 static inline RT_API_ATTRS bool IsCharValueSeparator(
25     const DataEdit &edit, char32_t ch) {
26   char32_t comma{
27       edit.modes.editingFlags & decimalComma ? char32_t{';'} : char32_t{','}};
28   return ch == ' ' || ch == '\t' || ch == comma || ch == '/' ||
29       (edit.IsNamelist() && (ch == '&' || ch == '$'));
30 }
31 
32 static RT_API_ATTRS bool CheckCompleteListDirectedField(
33     IoStatementState &io, const DataEdit &edit) {
34   if (edit.IsListDirected()) {
35     std::size_t byteCount;
36     if (auto ch{io.GetCurrentChar(byteCount)}) {
37       if (IsCharValueSeparator(edit, *ch)) {
38         return true;
39       } else {
40         const auto &connection{io.GetConnectionState()};
41         io.GetIoErrorHandler().SignalError(IostatBadListDirectedInputSeparator,
42             "invalid character (0x%x) after list-directed input value, "
43             "at column %d in record %d",
44             static_cast<unsigned>(*ch),
45             static_cast<int>(connection.positionInRecord + 1),
46             static_cast<int>(connection.currentRecordNumber));
47         return false;
48       }
49     } else {
50       return true; // end of record: ok
51     }
52   } else {
53     return true;
54   }
55 }
56 
57 static inline RT_API_ATTRS char32_t GetSeparatorChar(const DataEdit &edit) {
58   return edit.modes.editingFlags & decimalComma ? char32_t{';'} : char32_t{','};
59 }
60 
61 template <int LOG2_BASE>
62 static RT_API_ATTRS bool EditBOZInput(
63     IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) {
64   // Skip leading white space & zeroes
65   Fortran::common::optional<int> remaining{io.CueUpInput(edit)};
66   auto start{io.GetConnectionState().positionInRecord};
67   Fortran::common::optional<char32_t> next{io.NextInField(remaining, edit)};
68   if (next.value_or('?') == '0') {
69     do {
70       start = io.GetConnectionState().positionInRecord;
71       next = io.NextInField(remaining, edit);
72     } while (next && *next == '0');
73   }
74   // Count significant digits after any leading white space & zeroes
75   int digits{0};
76   int significantBits{0};
77   const char32_t comma{GetSeparatorChar(edit)};
78   for (; next; next = io.NextInField(remaining, edit)) {
79     char32_t ch{*next};
80     if (ch == ' ' || ch == '\t') {
81       if (edit.modes.editingFlags & blankZero) {
82         ch = '0'; // BZ mode - treat blank as if it were zero
83       } else {
84         continue;
85       }
86     }
87     if (ch >= '0' && ch <= '1') {
88     } else if (LOG2_BASE >= 3 && ch >= '2' && ch <= '7') {
89     } else if (LOG2_BASE >= 4 && ch >= '8' && ch <= '9') {
90     } else if (LOG2_BASE >= 4 && ch >= 'A' && ch <= 'F') {
91     } else if (LOG2_BASE >= 4 && ch >= 'a' && ch <= 'f') {
92     } else if (ch == comma) {
93       break; // end non-list-directed field early
94     } else {
95       io.GetIoErrorHandler().SignalError(
96           "Bad character '%lc' in B/O/Z input field", ch);
97       return false;
98     }
99     if (digits++ == 0) {
100       significantBits = 4;
101       if (ch >= '0' && ch <= '1') {
102         significantBits = 1;
103       } else if (ch >= '2' && ch <= '3') {
104         significantBits = 2;
105       } else if (ch >= '4' && ch <= '7') {
106         significantBits = 3;
107       } else {
108         significantBits = 4;
109       }
110     } else {
111       significantBits += LOG2_BASE;
112     }
113   }
114   auto significantBytes{static_cast<std::size_t>(significantBits + 7) / 8};
115   if (significantBytes > bytes) {
116     io.GetIoErrorHandler().SignalError(IostatBOZInputOverflow,
117         "B/O/Z input of %d digits overflows %zd-byte variable", digits, bytes);
118     return false;
119   }
120   // Reset to start of significant digits
121   io.HandleAbsolutePosition(start);
122   remaining.reset();
123   // Make a second pass now that the digit count is known
124   std::memset(n, 0, bytes);
125   int increment{isHostLittleEndian ? -1 : 1};
126   auto *data{reinterpret_cast<unsigned char *>(n) +
127       (isHostLittleEndian ? significantBytes - 1 : bytes - significantBytes)};
128   int shift{((digits - 1) * LOG2_BASE) & 7};
129   while (digits > 0) {
130     char32_t ch{*io.NextInField(remaining, edit)};
131     int digit{0};
132     if (ch == ' ' || ch == '\t') {
133       if (edit.modes.editingFlags & blankZero) {
134         ch = '0'; // BZ mode - treat blank as if it were zero
135       } else {
136         continue;
137       }
138     }
139     --digits;
140     if (ch >= '0' && ch <= '9') {
141       digit = ch - '0';
142     } else if (ch >= 'A' && ch <= 'F') {
143       digit = ch + 10 - 'A';
144     } else if (ch >= 'a' && ch <= 'f') {
145       digit = ch + 10 - 'a';
146     } else {
147       continue;
148     }
149     if (shift < 0) {
150       if (shift + LOG2_BASE > 0) { // misaligned octal
151         *data |= digit >> -shift;
152       }
153       shift += 8;
154       data += increment;
155     }
156     *data |= digit << shift;
157     shift -= LOG2_BASE;
158   }
159   return CheckCompleteListDirectedField(io, edit);
160 }
161 
162 static inline RT_API_ATTRS char32_t GetRadixPointChar(const DataEdit &edit) {
163   return edit.modes.editingFlags & decimalComma ? char32_t{','} : char32_t{'.'};
164 }
165 
166 // Prepares input from a field, and returns the sign, if any, else '\0'.
167 static RT_API_ATTRS char ScanNumericPrefix(IoStatementState &io,
168     const DataEdit &edit, Fortran::common::optional<char32_t> &next,
169     Fortran::common::optional<int> &remaining) {
170   remaining = io.CueUpInput(edit);
171   next = io.NextInField(remaining, edit);
172   char sign{'\0'};
173   if (next) {
174     if (*next == '-' || *next == '+') {
175       sign = *next;
176       if (!edit.IsListDirected()) {
177         io.SkipSpaces(remaining);
178       }
179       next = io.NextInField(remaining, edit);
180     }
181   }
182   return sign;
183 }
184 
185 RT_API_ATTRS bool EditIntegerInput(
186     IoStatementState &io, const DataEdit &edit, void *n, int kind) {
187   RUNTIME_CHECK(io.GetIoErrorHandler(), kind >= 1 && !(kind & (kind - 1)));
188   switch (edit.descriptor) {
189   case DataEdit::ListDirected:
190     if (IsNamelistNameOrSlash(io)) {
191       return false;
192     }
193     break;
194   case 'G':
195   case 'I':
196     break;
197   case 'B':
198     return EditBOZInput<1>(io, edit, n, kind);
199   case 'O':
200     return EditBOZInput<3>(io, edit, n, kind);
201   case 'Z':
202     return EditBOZInput<4>(io, edit, n, kind);
203   case 'A': // legacy extension
204     return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), kind);
205   default:
206     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
207         "Data edit descriptor '%c' may not be used with an INTEGER data item",
208         edit.descriptor);
209     return false;
210   }
211   Fortran::common::optional<int> remaining;
212   Fortran::common::optional<char32_t> next;
213   char sign{ScanNumericPrefix(io, edit, next, remaining)};
214   common::UnsignedInt128 value{0};
215   bool any{!!sign};
216   bool overflow{false};
217   const char32_t comma{GetSeparatorChar(edit)};
218   for (; next; next = io.NextInField(remaining, edit)) {
219     char32_t ch{*next};
220     if (ch == ' ' || ch == '\t') {
221       if (edit.modes.editingFlags & blankZero) {
222         ch = '0'; // BZ mode - treat blank as if it were zero
223       } else {
224         continue;
225       }
226     }
227     int digit{0};
228     if (ch >= '0' && ch <= '9') {
229       digit = ch - '0';
230     } else if (ch == comma) {
231       break; // end non-list-directed field early
232     } else {
233       if (edit.modes.inNamelist && ch == GetRadixPointChar(edit)) {
234         // Ignore any fractional part that might appear in NAMELIST integer
235         // input, like a few other Fortran compilers do.
236         // TODO: also process exponents?  Some compilers do, but they obviously
237         // can't just be ignored.
238         while ((next = io.NextInField(remaining, edit))) {
239           if (*next < '0' || *next > '9') {
240             break;
241           }
242         }
243         if (!next || *next == comma) {
244           break;
245         }
246       }
247       io.GetIoErrorHandler().SignalError(
248           "Bad character '%lc' in INTEGER input field", ch);
249       return false;
250     }
251     static constexpr auto maxu128{~common::UnsignedInt128{0}};
252     static constexpr auto maxu128OverTen{maxu128 / 10};
253     static constexpr int maxLastDigit{
254         static_cast<int>(maxu128 - (maxu128OverTen * 10))};
255     overflow |= value >= maxu128OverTen &&
256         (value > maxu128OverTen || digit > maxLastDigit);
257     value *= 10;
258     value += digit;
259     any = true;
260   }
261   if (!any && !remaining) {
262     io.GetIoErrorHandler().SignalError(
263         "Integer value absent from NAMELIST or list-directed input");
264     return false;
265   }
266   auto maxForKind{common::UnsignedInt128{1} << ((8 * kind) - 1)};
267   overflow |= value >= maxForKind && (value > maxForKind || sign != '-');
268   if (overflow) {
269     io.GetIoErrorHandler().SignalError(IostatIntegerInputOverflow,
270         "Decimal input overflows INTEGER(%d) variable", kind);
271     return false;
272   }
273   if (sign == '-') {
274     value = -value;
275   }
276   if (any || !io.GetIoErrorHandler().InError()) {
277     // The value is stored in the lower order bits on big endian platform.
278     // When memcpy, shift the value to the higher order bit.
279     auto shft{static_cast<int>(sizeof(value.low())) - kind};
280     // For kind==8 (i.e. shft==0), the value is stored in low_ in big endian.
281     if (!isHostLittleEndian && shft >= 0) {
282       auto l{value.low() << (8 * shft)};
283       std::memcpy(n, &l, kind);
284     } else {
285       std::memcpy(n, &value, kind); // a blank field means zero
286     }
287     return true;
288   } else {
289     return false;
290   }
291 }
292 
293 // Parses a REAL input number from the input source as a normalized
294 // fraction into a supplied buffer -- there's an optional '-', a
295 // decimal point when the input is not hexadecimal, and at least one
296 // digit.  Replaces blanks with zeroes where appropriate.
297 struct ScannedRealInput {
298   // Number of characters that (should) have been written to the
299   // buffer -- this can be larger than the buffer size, which
300   // indicates buffer overflow.  Zero indicates an error.
301   int got{0};
302   int exponent{0}; // adjusted as necessary; binary if isHexadecimal
303   bool isHexadecimal{false}; // 0X...
304 };
305 static RT_API_ATTRS ScannedRealInput ScanRealInput(
306     char *buffer, int bufferSize, IoStatementState &io, const DataEdit &edit) {
307   Fortran::common::optional<int> remaining;
308   Fortran::common::optional<char32_t> next;
309   int got{0};
310   Fortran::common::optional<int> radixPointOffset;
311   // The following lambda definition violates the conding style,
312   // but cuda-11.8 nvcc hits an internal error with the brace initialization.
313   auto Put = [&](char ch) -> void {
314     if (got < bufferSize) {
315       buffer[got] = ch;
316     }
317     ++got;
318   };
319   char sign{ScanNumericPrefix(io, edit, next, remaining)};
320   if (sign == '-') {
321     Put('-');
322   }
323   bool bzMode{(edit.modes.editingFlags & blankZero) != 0};
324   int exponent{0};
325   if (!next || (!bzMode && *next == ' ') ||
326       (!(edit.modes.editingFlags & decimalComma) && *next == ',')) {
327     if (!edit.IsListDirected() && !io.GetConnectionState().IsAtEOF()) {
328       // An empty/blank field means zero when not list-directed.
329       // A fixed-width field containing only a sign is also zero;
330       // this behavior isn't standard-conforming in F'2023 but it is
331       // required to pass FCVS.
332       Put('0');
333     }
334     return {got, exponent, false};
335   }
336   char32_t radixPointChar{GetRadixPointChar(edit)};
337   char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next};
338   bool isHexadecimal{false};
339   if (first == 'N' || first == 'I') {
340     // NaN or infinity - convert to upper case
341     // Subtle: a blank field of digits could be followed by 'E' or 'D',
342     for (; next &&
343          ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z'));
344          next = io.NextInField(remaining, edit)) {
345       if (*next >= 'a' && *next <= 'z') {
346         Put(*next - 'a' + 'A');
347       } else {
348         Put(*next);
349       }
350     }
351     if (next && *next == '(') { // NaN(...)
352       Put('(');
353       int depth{1};
354       while (true) {
355         next = io.NextInField(remaining, edit);
356         if (depth == 0) {
357           break;
358         } else if (!next) {
359           return {}; // error
360         } else if (*next == '(') {
361           ++depth;
362         } else if (*next == ')') {
363           --depth;
364         }
365         Put(*next);
366       }
367     }
368   } else if (first == radixPointChar || (first >= '0' && first <= '9') ||
369       (bzMode && (first == ' ' || first == '\t')) || first == 'E' ||
370       first == 'D' || first == 'Q') {
371     if (first == '0') {
372       next = io.NextInField(remaining, edit);
373       if (next && (*next == 'x' || *next == 'X')) { // 0X...
374         isHexadecimal = true;
375         next = io.NextInField(remaining, edit);
376       } else {
377         Put('0');
378       }
379     }
380     // input field is normalized to a fraction
381     if (!isHexadecimal) {
382       Put('.');
383     }
384     auto start{got};
385     for (; next; next = io.NextInField(remaining, edit)) {
386       char32_t ch{*next};
387       if (ch == ' ' || ch == '\t') {
388         if (isHexadecimal) {
389           return {}; // error
390         } else if (bzMode) {
391           ch = '0'; // BZ mode - treat blank as if it were zero
392         } else {
393           continue; // ignore blank in fixed field
394         }
395       }
396       if (ch == '0' && got == start && !radixPointOffset) {
397         // omit leading zeroes before the radix point
398       } else if (ch >= '0' && ch <= '9') {
399         Put(ch);
400       } else if (ch == radixPointChar && !radixPointOffset) {
401         // The radix point character is *not* copied to the buffer.
402         radixPointOffset = got - start; // # of digits before the radix point
403       } else if (isHexadecimal && ch >= 'A' && ch <= 'F') {
404         Put(ch);
405       } else if (isHexadecimal && ch >= 'a' && ch <= 'f') {
406         Put(ch - 'a' + 'A'); // normalize to capitals
407       } else {
408         break;
409       }
410     }
411     if (got == start) {
412       // Nothing but zeroes and maybe a radix point.  F'2018 requires
413       // at least one digit, but F'77 did not, and a bare "." shows up in
414       // the FCVS suite.
415       Put('0'); // emit at least one digit
416     }
417     // In list-directed input, a bad exponent is not consumed.
418     auto nextBeforeExponent{next};
419     auto startExponent{io.GetConnectionState().positionInRecord};
420     bool hasGoodExponent{false};
421     if (next) {
422       if (isHexadecimal) {
423         if (*next == 'p' || *next == 'P') {
424           next = io.NextInField(remaining, edit);
425         } else {
426           // The binary exponent is not optional in the standard.
427           return {}; // error
428         }
429       } else if (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' ||
430           *next == 'q' || *next == 'Q') {
431         // Optional exponent letter.  Blanks are allowed between the
432         // optional exponent letter and the exponent value.
433         io.SkipSpaces(remaining);
434         next = io.NextInField(remaining, edit);
435       }
436     }
437     if (next &&
438         (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9') ||
439             *next == ' ' || *next == '\t')) {
440       bool negExpo{*next == '-'};
441       if (negExpo || *next == '+') {
442         next = io.NextInField(remaining, edit);
443       }
444       for (; next; next = io.NextInField(remaining, edit)) {
445         if (*next >= '0' && *next <= '9') {
446           hasGoodExponent = true;
447           if (exponent < 10000) {
448             exponent = 10 * exponent + *next - '0';
449           }
450         } else if (*next == ' ' || *next == '\t') {
451           if (isHexadecimal) {
452             break;
453           } else if (bzMode) {
454             hasGoodExponent = true;
455             exponent = 10 * exponent;
456           }
457         } else {
458           break;
459         }
460       }
461       if (negExpo) {
462         exponent = -exponent;
463       }
464     }
465     if (!hasGoodExponent) {
466       if (isHexadecimal) {
467         return {}; // error
468       }
469       // There isn't a good exponent; do not consume it.
470       next = nextBeforeExponent;
471       io.HandleAbsolutePosition(startExponent);
472       // The default exponent is -kP, but the scale factor doesn't affect
473       // an explicit exponent.
474       exponent = -edit.modes.scale;
475     }
476     // Adjust exponent by number of digits before the radix point.
477     if (isHexadecimal) {
478       // Exponents for hexadecimal input are binary.
479       exponent += radixPointOffset.value_or(got - start) * 4;
480     } else if (radixPointOffset) {
481       exponent += *radixPointOffset;
482     } else {
483       // When no redix point (or comma) appears in the value, the 'd'
484       // part of the edit descriptor must be interpreted as the number of
485       // digits in the value to be interpreted as being to the *right* of
486       // the assumed radix point (13.7.2.3.2)
487       exponent += got - start - edit.digits.value_or(0);
488     }
489   }
490   // Consume the trailing ')' of a list-directed or NAMELIST complex
491   // input value.
492   if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
493     if (next && (*next == ' ' || *next == '\t')) {
494       io.SkipSpaces(remaining);
495       next = io.NextInField(remaining, edit);
496     }
497     if (!next) { // NextInField fails on separators like ')'
498       std::size_t byteCount{0};
499       next = io.GetCurrentChar(byteCount);
500       if (next && *next == ')') {
501         io.HandleRelativePosition(byteCount);
502       }
503     }
504   } else if (remaining) {
505     while (next && (*next == ' ' || *next == '\t')) {
506       next = io.NextInField(remaining, edit);
507     }
508     if (next && (*next != ',' || (edit.modes.editingFlags & decimalComma))) {
509       return {}; // error: unused nonblank character in fixed-width field
510     }
511   }
512   return {got, exponent, isHexadecimal};
513 }
514 
515 static RT_API_ATTRS void RaiseFPExceptions(
516     decimal::ConversionResultFlags flags) {
517 #undef RAISE
518 #if defined(RT_DEVICE_COMPILATION)
519   Terminator terminator(__FILE__, __LINE__);
520 #define RAISE(e) \
521   terminator.Crash( \
522       "not implemented yet: raising FP exception in device code: %s", #e);
523 #else // !defined(RT_DEVICE_COMPILATION)
524 #ifdef feraisexcept // a macro in some environments; omit std::
525 #define RAISE feraiseexcept
526 #else
527 #define RAISE std::feraiseexcept
528 #endif
529 #endif // !defined(RT_DEVICE_COMPILATION)
530 
531 // Some environment (e.g. emscripten, musl) don't define FE_OVERFLOW as allowed
532 // by c99 (but not c++11) :-/
533 #if defined(FE_OVERFLOW) || defined(RT_DEVICE_COMPILATION)
534   if (flags & decimal::ConversionResultFlags::Overflow) {
535     RAISE(FE_OVERFLOW);
536   }
537 #endif
538 #if defined(FE_UNDERFLOW) || defined(RT_DEVICE_COMPILATION)
539   if (flags & decimal::ConversionResultFlags::Underflow) {
540     RAISE(FE_UNDERFLOW);
541   }
542 #endif
543 #if defined(FE_INEXACT) || defined(RT_DEVICE_COMPILATION)
544   if (flags & decimal::ConversionResultFlags::Inexact) {
545     RAISE(FE_INEXACT);
546   }
547 #endif
548 #if defined(FE_INVALID) || defined(RT_DEVICE_COMPILATION)
549   if (flags & decimal::ConversionResultFlags::Invalid) {
550     RAISE(FE_INVALID);
551   }
552 #endif
553 #undef RAISE
554 }
555 
556 // If no special modes are in effect and the form of the input value
557 // that's present in the input stream is acceptable to the decimal->binary
558 // converter without modification, this fast path for real input
559 // saves time by avoiding memory copies and reformatting of the exponent.
560 template <int PRECISION>
561 static RT_API_ATTRS bool TryFastPathRealDecimalInput(
562     IoStatementState &io, const DataEdit &edit, void *n) {
563   if (edit.modes.editingFlags & (blankZero | decimalComma)) {
564     return false;
565   }
566   if (edit.modes.scale != 0) {
567     return false;
568   }
569   const ConnectionState &connection{io.GetConnectionState()};
570   if (connection.internalIoCharKind > 1) {
571     return false; // reading non-default character
572   }
573   const char *str{nullptr};
574   std::size_t got{io.GetNextInputBytes(str)};
575   if (got == 0 || str == nullptr || !connection.recordLength.has_value()) {
576     return false; // could not access reliably-terminated input stream
577   }
578   const char *p{str};
579   std::int64_t maxConsume{
580       std::min<std::int64_t>(got, edit.width.value_or(got))};
581   const char *limit{str + maxConsume};
582   decimal::ConversionToBinaryResult<PRECISION> converted{
583       decimal::ConvertToBinary<PRECISION>(p, edit.modes.round, limit)};
584   if (converted.flags & (decimal::Invalid | decimal::Overflow)) {
585     return false;
586   }
587   if (edit.digits.value_or(0) != 0) {
588     // Edit descriptor is Fw.d (or other) with d != 0, which
589     // implies scaling
590     const char *q{str};
591     for (; q < limit; ++q) {
592       if (*q == '.' || *q == 'n' || *q == 'N') {
593         break;
594       }
595     }
596     if (q == limit) {
597       // No explicit decimal point, and not NaN/Inf.
598       return false;
599     }
600   }
601   if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
602     // Need to consume a trailing ')', possibly with leading spaces
603     for (; p < limit && (*p == ' ' || *p == '\t'); ++p) {
604     }
605     if (p < limit && *p == ')') {
606       ++p;
607     } else {
608       return false;
609     }
610   } else if (edit.IsListDirected()) {
611     if (p < limit && !IsCharValueSeparator(edit, *p)) {
612       return false;
613     }
614   } else {
615     for (; p < limit && (*p == ' ' || *p == '\t'); ++p) {
616     }
617     if (edit.width && p < str + *edit.width) {
618       return false; // unconverted characters remain in fixed width field
619     }
620   }
621   // Success on the fast path!
622   *reinterpret_cast<decimal::BinaryFloatingPointNumber<PRECISION> *>(n) =
623       converted.binary;
624   io.HandleRelativePosition(p - str);
625   // Set FP exception flags
626   if (converted.flags != decimal::ConversionResultFlags::Exact) {
627     RaiseFPExceptions(converted.flags);
628   }
629   return true;
630 }
631 
632 template <int binaryPrecision>
633 RT_API_ATTRS decimal::ConversionToBinaryResult<binaryPrecision>
634 ConvertHexadecimal(
635     const char *&p, enum decimal::FortranRounding rounding, int expo) {
636   using RealType = decimal::BinaryFloatingPointNumber<binaryPrecision>;
637   using RawType = typename RealType::RawType;
638   bool isNegative{*p == '-'};
639   constexpr RawType one{1};
640   RawType signBit{0};
641   if (isNegative) {
642     ++p;
643     signBit = one << (RealType::bits - 1);
644   }
645   RawType fraction{0};
646   // Adjust the incoming binary P+/- exponent to shift the radix point
647   // to below the LSB and add in the bias.
648   expo += binaryPrecision - 1 + RealType::exponentBias;
649   // Input the fraction.
650   int roundingBit{0};
651   int guardBit{0};
652   for (; *p; ++p) {
653     fraction <<= 4;
654     expo -= 4;
655     if (*p >= '0' && *p <= '9') {
656       fraction |= *p - '0';
657     } else if (*p >= 'A' && *p <= 'F') {
658       fraction |= *p - 'A' + 10; // data were normalized to capitals
659     } else {
660       break;
661     }
662     if (fraction >> binaryPrecision) {
663       while (fraction >> binaryPrecision) {
664         guardBit |= roundingBit;
665         roundingBit = (int)fraction & 1;
666         fraction >>= 1;
667         ++expo;
668       }
669       // Consume excess digits
670       while (*++p) {
671         if (*p == '0') {
672         } else if ((*p >= '1' && *p <= '9') || (*p >= 'A' && *p <= 'F')) {
673           guardBit = 1;
674         } else {
675           break;
676         }
677       }
678       break;
679     }
680   }
681   if (fraction) {
682     // Boost biased expo if too small
683     while (expo < 1) {
684       guardBit |= roundingBit;
685       roundingBit = (int)fraction & 1;
686       fraction >>= 1;
687       ++expo;
688     }
689     // Normalize
690     while (expo > 1 && !(fraction >> (binaryPrecision - 1))) {
691       fraction <<= 1;
692       --expo;
693       guardBit = roundingBit = 0;
694     }
695   }
696   // Rounding
697   bool increase{false};
698   switch (rounding) {
699   case decimal::RoundNearest: // RN & RP
700     increase = roundingBit && (guardBit | ((int)fraction & 1));
701     break;
702   case decimal::RoundUp: // RU
703     increase = !isNegative && (roundingBit | guardBit);
704     break;
705   case decimal::RoundDown: // RD
706     increase = isNegative && (roundingBit | guardBit);
707     break;
708   case decimal::RoundToZero: // RZ
709     break;
710   case decimal::RoundCompatible: // RC
711     increase = roundingBit != 0;
712     break;
713   }
714   if (increase) {
715     ++fraction;
716     if (fraction >> binaryPrecision) {
717       fraction >>= 1;
718       ++expo;
719     }
720   }
721   // Package & return result
722   constexpr RawType significandMask{(one << RealType::significandBits) - 1};
723   int flags{(roundingBit | guardBit) ? decimal::Inexact : decimal::Exact};
724   if (!fraction) {
725     expo = 0;
726   } else if (expo == 1 && !(fraction >> (binaryPrecision - 1))) {
727     expo = 0; // subnormal
728     flags |= decimal::Underflow;
729   } else if (expo >= RealType::maxExponent) {
730     if (rounding == decimal::RoundToZero ||
731         (rounding == decimal::RoundDown && !isNegative) ||
732         (rounding == decimal::RoundUp && isNegative)) {
733       expo = RealType::maxExponent - 1; // +/-HUGE()
734       fraction = significandMask;
735     } else {
736       expo = RealType::maxExponent; // +/-Inf
737       fraction = 0;
738       flags |= decimal::Overflow;
739     }
740   } else {
741     fraction &= significandMask; // remove explicit normalization unless x87
742   }
743   return decimal::ConversionToBinaryResult<binaryPrecision>{
744       RealType{static_cast<RawType>(signBit |
745           static_cast<RawType>(expo) << RealType::significandBits | fraction)},
746       static_cast<decimal::ConversionResultFlags>(flags)};
747 }
748 
749 template <int KIND>
750 RT_API_ATTRS bool EditCommonRealInput(
751     IoStatementState &io, const DataEdit &edit, void *n) {
752   constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
753   if (TryFastPathRealDecimalInput<binaryPrecision>(io, edit, n)) {
754     return CheckCompleteListDirectedField(io, edit);
755   }
756   // Fast path wasn't available or didn't work; go the more general route
757   static constexpr int maxDigits{
758       common::MaxDecimalConversionDigits(binaryPrecision)};
759   static constexpr int bufferSize{maxDigits + 18};
760   char buffer[bufferSize];
761   auto scanned{ScanRealInput(buffer, maxDigits + 2, io, edit)};
762   int got{scanned.got};
763   if (got >= maxDigits + 2) {
764     io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small");
765     return false;
766   }
767   if (got == 0) {
768     const auto &connection{io.GetConnectionState()};
769     io.GetIoErrorHandler().SignalError(IostatBadRealInput,
770         "Bad real input data at column %d of record %d",
771         static_cast<int>(connection.positionInRecord + 1),
772         static_cast<int>(connection.currentRecordNumber));
773     return false;
774   }
775   decimal::ConversionToBinaryResult<binaryPrecision> converted;
776   const char *p{buffer};
777   if (scanned.isHexadecimal) {
778     buffer[got] = '\0';
779     converted = ConvertHexadecimal<binaryPrecision>(
780         p, edit.modes.round, scanned.exponent);
781   } else {
782     bool hadExtra{got > maxDigits};
783     int exponent{scanned.exponent};
784     if (exponent != 0) {
785       buffer[got++] = 'e';
786       if (exponent < 0) {
787         buffer[got++] = '-';
788         exponent = -exponent;
789       }
790       if (exponent > 9999) {
791         exponent = 9999; // will convert to +/-Inf
792       }
793       if (exponent > 999) {
794         int dig{exponent / 1000};
795         buffer[got++] = '0' + dig;
796         int rest{exponent - 1000 * dig};
797         dig = rest / 100;
798         buffer[got++] = '0' + dig;
799         rest -= 100 * dig;
800         dig = rest / 10;
801         buffer[got++] = '0' + dig;
802         buffer[got++] = '0' + (rest - 10 * dig);
803       } else if (exponent > 99) {
804         int dig{exponent / 100};
805         buffer[got++] = '0' + dig;
806         int rest{exponent - 100 * dig};
807         dig = rest / 10;
808         buffer[got++] = '0' + dig;
809         buffer[got++] = '0' + (rest - 10 * dig);
810       } else if (exponent > 9) {
811         int dig{exponent / 10};
812         buffer[got++] = '0' + dig;
813         buffer[got++] = '0' + (exponent - 10 * dig);
814       } else {
815         buffer[got++] = '0' + exponent;
816       }
817     }
818     buffer[got] = '\0';
819     converted = decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round);
820     if (hadExtra) {
821       converted.flags = static_cast<enum decimal::ConversionResultFlags>(
822           converted.flags | decimal::Inexact);
823     }
824   }
825   if (*p) { // unprocessed junk after value
826     const auto &connection{io.GetConnectionState()};
827     io.GetIoErrorHandler().SignalError(IostatBadRealInput,
828         "Trailing characters after real input data at column %d of record %d",
829         static_cast<int>(connection.positionInRecord + 1),
830         static_cast<int>(connection.currentRecordNumber));
831     return false;
832   }
833   *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) =
834       converted.binary;
835   // Set FP exception flags
836   if (converted.flags != decimal::ConversionResultFlags::Exact) {
837     if (converted.flags & decimal::ConversionResultFlags::Overflow) {
838       io.GetIoErrorHandler().SignalError(IostatRealInputOverflow);
839       return false;
840     }
841     RaiseFPExceptions(converted.flags);
842   }
843   return CheckCompleteListDirectedField(io, edit);
844 }
845 
846 template <int KIND>
847 RT_API_ATTRS bool EditRealInput(
848     IoStatementState &io, const DataEdit &edit, void *n) {
849   switch (edit.descriptor) {
850   case DataEdit::ListDirected:
851     if (IsNamelistNameOrSlash(io)) {
852       return false;
853     }
854     return EditCommonRealInput<KIND>(io, edit, n);
855   case DataEdit::ListDirectedRealPart:
856   case DataEdit::ListDirectedImaginaryPart:
857   case 'F':
858   case 'E': // incl. EN, ES, & EX
859   case 'D':
860   case 'G':
861     return EditCommonRealInput<KIND>(io, edit, n);
862   case 'B':
863     return EditBOZInput<1>(io, edit, n,
864         common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
865   case 'O':
866     return EditBOZInput<3>(io, edit, n,
867         common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
868   case 'Z':
869     return EditBOZInput<4>(io, edit, n,
870         common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
871   case 'A': // legacy extension
872     return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), KIND);
873   default:
874     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
875         "Data edit descriptor '%c' may not be used for REAL input",
876         edit.descriptor);
877     return false;
878   }
879 }
880 
881 // 13.7.3 in Fortran 2018
882 RT_API_ATTRS bool EditLogicalInput(
883     IoStatementState &io, const DataEdit &edit, bool &x) {
884   switch (edit.descriptor) {
885   case DataEdit::ListDirected:
886     if (IsNamelistNameOrSlash(io)) {
887       return false;
888     }
889     break;
890   case 'L':
891   case 'G':
892     break;
893   default:
894     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
895         "Data edit descriptor '%c' may not be used for LOGICAL input",
896         edit.descriptor);
897     return false;
898   }
899   Fortran::common::optional<int> remaining{io.CueUpInput(edit)};
900   Fortran::common::optional<char32_t> next{io.NextInField(remaining, edit)};
901   if (next && *next == '.') { // skip optional period
902     next = io.NextInField(remaining, edit);
903   }
904   if (!next) {
905     io.GetIoErrorHandler().SignalError("Empty LOGICAL input field");
906     return false;
907   }
908   switch (*next) {
909   case 'T':
910   case 't':
911     x = true;
912     break;
913   case 'F':
914   case 'f':
915     x = false;
916     break;
917   default:
918     io.GetIoErrorHandler().SignalError(
919         "Bad character '%lc' in LOGICAL input field", *next);
920     return false;
921   }
922   if (remaining) { // ignore the rest of a fixed-width field
923     io.HandleRelativePosition(*remaining);
924   } else if (edit.descriptor == DataEdit::ListDirected) {
925     while (io.NextInField(remaining, edit)) { // discard rest of field
926     }
927   }
928   return CheckCompleteListDirectedField(io, edit);
929 }
930 
931 // See 13.10.3.1 paragraphs 7-9 in Fortran 2018
932 template <typename CHAR>
933 static RT_API_ATTRS bool EditDelimitedCharacterInput(
934     IoStatementState &io, CHAR *x, std::size_t length, char32_t delimiter) {
935   bool result{true};
936   while (true) {
937     std::size_t byteCount{0};
938     auto ch{io.GetCurrentChar(byteCount)};
939     if (!ch) {
940       if (io.AdvanceRecord()) {
941         continue;
942       } else {
943         result = false; // EOF in character value
944         break;
945       }
946     }
947     io.HandleRelativePosition(byteCount);
948     if (*ch == delimiter) {
949       auto next{io.GetCurrentChar(byteCount)};
950       if (next && *next == delimiter) {
951         // Repeated delimiter: use as character value
952         io.HandleRelativePosition(byteCount);
953       } else {
954         break; // closing delimiter
955       }
956     }
957     if (length > 0) {
958       *x++ = *ch;
959       --length;
960     }
961   }
962   Fortran::runtime::fill_n(x, length, ' ');
963   return result;
964 }
965 
966 template <typename CHAR>
967 static RT_API_ATTRS bool EditListDirectedCharacterInput(
968     IoStatementState &io, CHAR *x, std::size_t length, const DataEdit &edit) {
969   std::size_t byteCount{0};
970   auto ch{io.GetCurrentChar(byteCount)};
971   if (ch && (*ch == '\'' || *ch == '"')) {
972     io.HandleRelativePosition(byteCount);
973     return EditDelimitedCharacterInput(io, x, length, *ch);
974   }
975   if (IsNamelistNameOrSlash(io) || io.GetConnectionState().IsAtEOF()) {
976     return false;
977   }
978   // Undelimited list-directed character input: stop at a value separator
979   // or the end of the current record.  Subtlety: the "remaining" count
980   // here is a dummy that's used to avoid the interpretation of separators
981   // in NextInField.
982   Fortran::common::optional<int> remaining{length > 0 ? maxUTF8Bytes : 0};
983   while (Fortran::common::optional<char32_t> next{
984       io.NextInField(remaining, edit)}) {
985     bool isSep{false};
986     switch (*next) {
987     case ' ':
988     case '\t':
989     case '/':
990       isSep = true;
991       break;
992     case '&':
993     case '$':
994       isSep = edit.IsNamelist();
995       break;
996     case ',':
997       isSep = !(edit.modes.editingFlags & decimalComma);
998       break;
999     case ';':
1000       isSep = !!(edit.modes.editingFlags & decimalComma);
1001       break;
1002     default:
1003       break;
1004     }
1005     if (isSep) {
1006       remaining = 0;
1007     } else {
1008       *x++ = *next;
1009       remaining = --length > 0 ? maxUTF8Bytes : 0;
1010     }
1011   }
1012   Fortran::runtime::fill_n(x, length, ' ');
1013   return true;
1014 }
1015 
1016 template <typename CHAR>
1017 RT_API_ATTRS bool EditCharacterInput(IoStatementState &io, const DataEdit &edit,
1018     CHAR *x, std::size_t lengthChars) {
1019   switch (edit.descriptor) {
1020   case DataEdit::ListDirected:
1021     return EditListDirectedCharacterInput(io, x, lengthChars, edit);
1022   case 'A':
1023   case 'G':
1024     break;
1025   case 'B':
1026     return EditBOZInput<1>(io, edit, x, lengthChars * sizeof *x);
1027   case 'O':
1028     return EditBOZInput<3>(io, edit, x, lengthChars * sizeof *x);
1029   case 'Z':
1030     return EditBOZInput<4>(io, edit, x, lengthChars * sizeof *x);
1031   default:
1032     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
1033         "Data edit descriptor '%c' may not be used with a CHARACTER data item",
1034         edit.descriptor);
1035     return false;
1036   }
1037   const ConnectionState &connection{io.GetConnectionState()};
1038   std::size_t remainingChars{lengthChars};
1039   // Skip leading characters.
1040   // Their bytes don't count towards INQUIRE(IOLENGTH=).
1041   std::size_t skipChars{0};
1042   if (edit.width && *edit.width > 0) {
1043     remainingChars = *edit.width;
1044     if (remainingChars > lengthChars) {
1045       skipChars = remainingChars - lengthChars;
1046     }
1047   }
1048   // When the field is wider than the variable, we drop the leading
1049   // characters.  When the variable is wider than the field, there can be
1050   // trailing padding or an EOR condition.
1051   const char *input{nullptr};
1052   std::size_t readyBytes{0};
1053   // Transfer payload bytes; these do count.
1054   while (remainingChars > 0) {
1055     if (readyBytes == 0) {
1056       readyBytes = io.GetNextInputBytes(input);
1057       if (readyBytes == 0 ||
1058           (readyBytes < remainingChars && edit.modes.nonAdvancing)) {
1059         if (io.CheckForEndOfRecord(readyBytes)) {
1060           if (readyBytes == 0) {
1061             // PAD='YES' and no more data
1062             Fortran::runtime::fill_n(x, lengthChars, ' ');
1063             return !io.GetIoErrorHandler().InError();
1064           } else {
1065             // Do partial read(s) then pad on last iteration
1066           }
1067         } else {
1068           return !io.GetIoErrorHandler().InError();
1069         }
1070       }
1071     }
1072     std::size_t chunkBytes;
1073     std::size_t chunkChars{1};
1074     bool skipping{skipChars > 0};
1075     if (connection.isUTF8) {
1076       chunkBytes = MeasureUTF8Bytes(*input);
1077       if (skipping) {
1078         --skipChars;
1079       } else if (auto ucs{DecodeUTF8(input)}) {
1080         if ((sizeof *x == 1 && *ucs > 0xff) ||
1081             (sizeof *x == 2 && *ucs > 0xffff)) {
1082           *x++ = '?';
1083         } else {
1084           *x++ = *ucs;
1085         }
1086         --lengthChars;
1087       } else if (chunkBytes == 0) {
1088         // error recovery: skip bad encoding
1089         chunkBytes = 1;
1090       }
1091     } else if (connection.internalIoCharKind > 1) {
1092       // Reading from non-default character internal unit
1093       chunkBytes = connection.internalIoCharKind;
1094       if (skipping) {
1095         --skipChars;
1096       } else {
1097         char32_t buffer{0};
1098         std::memcpy(&buffer, input, chunkBytes);
1099         if ((sizeof *x == 1 && buffer > 0xff) ||
1100             (sizeof *x == 2 && buffer > 0xffff)) {
1101           *x++ = '?';
1102         } else {
1103           *x++ = buffer;
1104         }
1105         --lengthChars;
1106       }
1107     } else if constexpr (sizeof *x > 1) {
1108       // Read single byte with expansion into multi-byte CHARACTER
1109       chunkBytes = 1;
1110       if (skipping) {
1111         --skipChars;
1112       } else {
1113         *x++ = static_cast<unsigned char>(*input);
1114         --lengthChars;
1115       }
1116     } else { // single bytes -> default CHARACTER
1117       if (skipping) {
1118         chunkBytes = std::min<std::size_t>(skipChars, readyBytes);
1119         chunkChars = chunkBytes;
1120         skipChars -= chunkChars;
1121       } else {
1122         chunkBytes = std::min<std::size_t>(remainingChars, readyBytes);
1123         chunkBytes = std::min<std::size_t>(lengthChars, chunkBytes);
1124         chunkChars = chunkBytes;
1125         std::memcpy(x, input, chunkBytes);
1126         x += chunkBytes;
1127         lengthChars -= chunkChars;
1128       }
1129     }
1130     input += chunkBytes;
1131     remainingChars -= chunkChars;
1132     if (!skipping) {
1133       io.GotChar(chunkBytes);
1134     }
1135     io.HandleRelativePosition(chunkBytes);
1136     readyBytes -= chunkBytes;
1137   }
1138   // Pad the remainder of the input variable, if any.
1139   Fortran::runtime::fill_n(x, lengthChars, ' ');
1140   return CheckCompleteListDirectedField(io, edit);
1141 }
1142 
1143 template RT_API_ATTRS bool EditRealInput<2>(
1144     IoStatementState &, const DataEdit &, void *);
1145 template RT_API_ATTRS bool EditRealInput<3>(
1146     IoStatementState &, const DataEdit &, void *);
1147 template RT_API_ATTRS bool EditRealInput<4>(
1148     IoStatementState &, const DataEdit &, void *);
1149 template RT_API_ATTRS bool EditRealInput<8>(
1150     IoStatementState &, const DataEdit &, void *);
1151 template RT_API_ATTRS bool EditRealInput<10>(
1152     IoStatementState &, const DataEdit &, void *);
1153 // TODO: double/double
1154 template RT_API_ATTRS bool EditRealInput<16>(
1155     IoStatementState &, const DataEdit &, void *);
1156 
1157 template RT_API_ATTRS bool EditCharacterInput(
1158     IoStatementState &, const DataEdit &, char *, std::size_t);
1159 template RT_API_ATTRS bool EditCharacterInput(
1160     IoStatementState &, const DataEdit &, char16_t *, std::size_t);
1161 template RT_API_ATTRS bool EditCharacterInput(
1162     IoStatementState &, const DataEdit &, char32_t *, std::size_t);
1163 
1164 RT_OFFLOAD_API_GROUP_END
1165 } // namespace Fortran::runtime::io
1166