xref: /llvm-project/flang/runtime/edit-input.cpp (revision 6a1c3efa051e012aaf102b7d9e7e428a58ea8ad9)
1 //===-- runtime/edit-input.cpp ----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "edit-input.h"
10 #include "flang/Common/real.h"
11 #include "flang/Common/uint128.h"
12 #include <algorithm>
13 
14 namespace Fortran::runtime::io {
15 
16 static bool EditBOZInput(IoStatementState &io, const DataEdit &edit, void *n,
17     int base, int totalBitSize) {
18   std::optional<int> remaining;
19   std::optional<char32_t> next{io.PrepareInput(edit, remaining)};
20   common::UnsignedInt128 value{0};
21   for (; next; next = io.NextInField(remaining)) {
22     char32_t ch{*next};
23     if (ch == ' ' || ch == '\t') {
24       continue;
25     }
26     int digit{0};
27     if (ch >= '0' && ch <= '1') {
28       digit = ch - '0';
29     } else if (base >= 8 && ch >= '2' && ch <= '7') {
30       digit = ch - '0';
31     } else if (base >= 10 && ch >= '8' && ch <= '9') {
32       digit = ch - '0';
33     } else if (base == 16 && ch >= 'A' && ch <= 'Z') {
34       digit = ch + 10 - 'A';
35     } else if (base == 16 && ch >= 'a' && ch <= 'z') {
36       digit = ch + 10 - 'a';
37     } else {
38       io.GetIoErrorHandler().SignalError(
39           "Bad character '%lc' in B/O/Z input field", ch);
40       return false;
41     }
42     value *= base;
43     value += digit;
44   }
45   // TODO: check for overflow
46   std::memcpy(n, &value, totalBitSize >> 3);
47   return true;
48 }
49 
50 // Prepares input from a field, and consumes the sign, if any.
51 // Returns true if there's a '-' sign.
52 static bool ScanNumericPrefix(IoStatementState &io, const DataEdit &edit,
53     std::optional<char32_t> &next, std::optional<int> &remaining) {
54   next = io.PrepareInput(edit, remaining);
55   bool negative{false};
56   if (next) {
57     negative = *next == '-';
58     if (negative || *next == '+') {
59       io.SkipSpaces(remaining);
60       next = io.NextInField(remaining);
61     }
62   }
63   return negative;
64 }
65 
66 bool EditIntegerInput(
67     IoStatementState &io, const DataEdit &edit, void *n, int kind) {
68   RUNTIME_CHECK(io.GetIoErrorHandler(), kind >= 1 && !(kind & (kind - 1)));
69   switch (edit.descriptor) {
70   case DataEdit::ListDirected:
71   case 'G':
72   case 'I':
73     break;
74   case 'B':
75     return EditBOZInput(io, edit, n, 2, kind << 3);
76   case 'O':
77     return EditBOZInput(io, edit, n, 8, kind << 3);
78   case 'Z':
79     return EditBOZInput(io, edit, n, 16, kind << 3);
80   default:
81     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
82         "Data edit descriptor '%c' may not be used with an INTEGER data item",
83         edit.descriptor);
84     return false;
85   }
86   std::optional<int> remaining;
87   std::optional<char32_t> next;
88   bool negate{ScanNumericPrefix(io, edit, next, remaining)};
89   common::UnsignedInt128 value;
90   for (; next; next = io.NextInField(remaining)) {
91     char32_t ch{*next};
92     if (ch == ' ' || ch == '\t') {
93       if (edit.modes.editingFlags & blankZero) {
94         ch = '0'; // BZ mode - treat blank as if it were zero
95       } else {
96         continue;
97       }
98     }
99     int digit{0};
100     if (ch >= '0' && ch <= '9') {
101       digit = ch - '0';
102     } else {
103       io.GetIoErrorHandler().SignalError(
104           "Bad character '%lc' in INTEGER input field", ch);
105       return false;
106     }
107     value *= 10;
108     value += digit;
109   }
110   if (negate) {
111     value = -value;
112   }
113   std::memcpy(n, &value, kind);
114   return true;
115 }
116 
117 // Parses a REAL input number from the input source as a normalized
118 // fraction into a supplied buffer -- there's an optional '-', a
119 // decimal point, and at least one digit.  The adjusted exponent value
120 // is returned in a reference argument.  The returned value is the number
121 // of characters that (should) have been written to the buffer -- this can
122 // be larger than the buffer size and can indicate overflow.  Replaces
123 // blanks with zeroes if appropriate.
124 static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io,
125     const DataEdit &edit, int &exponent) {
126   std::optional<int> remaining;
127   std::optional<char32_t> next;
128   int got{0};
129   std::optional<int> decimalPoint;
130   auto Put{[&](char ch) -> void {
131     if (got < bufferSize) {
132       buffer[got] = ch;
133     }
134     ++got;
135   }};
136   if (ScanNumericPrefix(io, edit, next, remaining)) {
137     Put('-');
138   }
139   if (!next) { // empty field means zero
140     Put('0');
141     return got;
142   }
143   char32_t decimal = edit.modes.editingFlags & decimalComma ? ',' : '.';
144   char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next};
145   if (first == 'N' || first == 'I') {
146     // NaN or infinity - convert to upper case
147     // Subtle: a blank field of digits could be followed by 'E' or 'D',
148     for (; next &&
149          ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z'));
150          next = io.NextInField(remaining)) {
151       if (*next >= 'a' && *next <= 'z') {
152         Put(*next - 'a' + 'A');
153       } else {
154         Put(*next);
155       }
156     }
157     if (next && *next == '(') { // NaN(...)
158       while (next && *next != ')') {
159         next = io.NextInField(remaining);
160       }
161     }
162     exponent = 0;
163   } else if (first == decimal || (first >= '0' && first <= '9') ||
164       first == 'E' || first == 'D' || first == 'Q') {
165     Put('.'); // input field is normalized to a fraction
166     auto start{got};
167     bool bzMode{(edit.modes.editingFlags & blankZero) != 0};
168     for (; next; next = io.NextInField(remaining)) {
169       char32_t ch{*next};
170       if (ch == ' ' || ch == '\t') {
171         if (bzMode) {
172           ch = '0'; // BZ mode - treat blank as if it were zero
173         } else {
174           continue;
175         }
176       }
177       if (ch == '0' && got == start && !decimalPoint) {
178         // omit leading zeroes before the decimal
179       } else if (ch >= '0' && ch <= '9') {
180         Put(ch);
181       } else if (ch == decimal && !decimalPoint) {
182         // the decimal point is *not* copied to the buffer
183         decimalPoint = got - start; // # of digits before the decimal point
184       } else {
185         break;
186       }
187     }
188     if (got == start) {
189       Put('0'); // emit at least one digit
190     }
191     if (next &&
192         (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' ||
193             *next == 'q' || *next == 'Q')) {
194       // Optional exponent letter.  Blanks are allowed between the
195       // optional exponent letter and the exponent value.
196       io.SkipSpaces(remaining);
197       next = io.NextInField(remaining);
198     }
199     // The default exponent is -kP, but the scale factor doesn't affect
200     // an explicit exponent.
201     exponent = -edit.modes.scale;
202     if (next &&
203         (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9') ||
204             (bzMode && (*next == ' ' || *next == '\t')))) {
205       bool negExpo{*next == '-'};
206       if (negExpo || *next == '+') {
207         next = io.NextInField(remaining);
208       }
209       for (exponent = 0; next; next = io.NextInField(remaining)) {
210         if (*next >= '0' && *next <= '9') {
211           exponent = 10 * exponent + *next - '0';
212         } else if (bzMode && (*next == ' ' || *next == '\t')) {
213           exponent = 10 * exponent;
214         } else {
215           break;
216         }
217       }
218       if (negExpo) {
219         exponent = -exponent;
220       }
221     }
222     if (decimalPoint) {
223       exponent += *decimalPoint;
224     } else {
225       // When no decimal point (or comma) appears in the value, the 'd'
226       // part of the edit descriptor must be interpreted as the number of
227       // digits in the value to be interpreted as being to the *right* of
228       // the assumed decimal point (13.7.2.3.2)
229       exponent += got - start - edit.digits.value_or(0);
230     }
231   } else {
232     // TODO: hex FP input
233     exponent = 0;
234     return 0;
235   }
236   // Consume the trailing ')' of a list-directed or NAMELIST complex
237   // input value.
238   if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
239     if (next && (*next == ' ' || *next == '\t')) {
240       next = io.NextInField(remaining);
241     }
242     if (!next) { // NextInField fails on separators like ')'
243       next = io.GetCurrentChar();
244       if (next && *next == ')') {
245         io.HandleRelativePosition(1);
246       }
247     }
248   } else if (remaining) {
249     while (next && (*next == ' ' || *next == '\t')) {
250       next = io.NextInField(remaining);
251     }
252     if (next) {
253       return 0; // error: unused nonblank character in fixed-width field
254     }
255   }
256   return got;
257 }
258 
259 template <int KIND>
260 bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
261   constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
262   static constexpr int maxDigits{
263       common::MaxDecimalConversionDigits(binaryPrecision)};
264   static constexpr int bufferSize{maxDigits + 18};
265   char buffer[bufferSize];
266   int exponent{0};
267   int got{ScanRealInput(buffer, maxDigits + 2, io, edit, exponent)};
268   if (got >= maxDigits + 2) {
269     io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small");
270     return false;
271   }
272   if (got == 0) {
273     io.GetIoErrorHandler().SignalError("Bad REAL input value");
274     return false;
275   }
276   bool hadExtra{got > maxDigits};
277   if (exponent != 0) {
278     got += std::snprintf(&buffer[got], bufferSize - got, "e%d", exponent);
279   }
280   buffer[got] = '\0';
281   const char *p{buffer};
282   decimal::ConversionToBinaryResult<binaryPrecision> converted{
283       decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round)};
284   if (hadExtra) {
285     converted.flags = static_cast<enum decimal::ConversionResultFlags>(
286         converted.flags | decimal::Inexact);
287   }
288   // TODO: raise converted.flags as exceptions?
289   *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) =
290       converted.binary;
291   return true;
292 }
293 
294 template <int KIND>
295 bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
296   constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
297   switch (edit.descriptor) {
298   case DataEdit::ListDirected:
299   case DataEdit::ListDirectedRealPart:
300   case DataEdit::ListDirectedImaginaryPart:
301   case 'F':
302   case 'E': // incl. EN, ES, & EX
303   case 'D':
304   case 'G':
305     return EditCommonRealInput<KIND>(io, edit, n);
306   case 'B':
307     return EditBOZInput(
308         io, edit, n, 2, common::BitsForBinaryPrecision(binaryPrecision));
309   case 'O':
310     return EditBOZInput(
311         io, edit, n, 8, common::BitsForBinaryPrecision(binaryPrecision));
312   case 'Z':
313     return EditBOZInput(
314         io, edit, n, 16, common::BitsForBinaryPrecision(binaryPrecision));
315   default:
316     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
317         "Data edit descriptor '%c' may not be used for REAL input",
318         edit.descriptor);
319     return false;
320   }
321 }
322 
323 // 13.7.3 in Fortran 2018
324 bool EditLogicalInput(IoStatementState &io, const DataEdit &edit, bool &x) {
325   switch (edit.descriptor) {
326   case DataEdit::ListDirected:
327   case 'L':
328   case 'G':
329     break;
330   default:
331     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
332         "Data edit descriptor '%c' may not be used for LOGICAL input",
333         edit.descriptor);
334     return false;
335   }
336   std::optional<int> remaining;
337   std::optional<char32_t> next{io.PrepareInput(edit, remaining)};
338   if (next && *next == '.') { // skip optional period
339     next = io.NextInField(remaining);
340   }
341   if (!next) {
342     io.GetIoErrorHandler().SignalError("Empty LOGICAL input field");
343     return false;
344   }
345   switch (*next) {
346   case 'T':
347   case 't':
348     x = true;
349     break;
350   case 'F':
351   case 'f':
352     x = false;
353     break;
354   default:
355     io.GetIoErrorHandler().SignalError(
356         "Bad character '%lc' in LOGICAL input field", *next);
357     return false;
358   }
359   if (remaining) { // ignore the rest of the field
360     io.HandleRelativePosition(*remaining);
361   } else if (edit.descriptor == DataEdit::ListDirected) {
362     while (io.NextInField(remaining)) { // discard rest of field
363     }
364   }
365   return true;
366 }
367 
368 // See 13.10.3.1 paragraphs 7-9 in Fortran 2018
369 static bool EditDelimitedCharacterInput(
370     IoStatementState &io, char *x, std::size_t length, char32_t delimiter) {
371   bool result{true};
372   while (true) {
373     auto ch{io.GetCurrentChar()};
374     if (!ch) {
375       if (io.AdvanceRecord()) {
376         continue;
377       } else {
378         result = false; // EOF in character value
379         break;
380       }
381     }
382     io.HandleRelativePosition(1);
383     if (*ch == delimiter) {
384       if (auto next{io.GetCurrentChar()}) {
385         if (*next == delimiter) {
386           // Repeated delimiter: use as character value
387           io.HandleRelativePosition(1);
388         } else { // closing delimiter
389           break;
390         }
391       } else { // delimiter was at the end of the record
392         if (length > 0) {
393           // Look ahead on next record: if it begins with the delimiter,
394           // treat it as a split character value, ignoring both delimiters
395           ConnectionState &connection{io.GetConnectionState()};
396           auto position{connection.positionInRecord};
397           if (io.AdvanceRecord()) {
398             if (auto next{io.GetCurrentChar()}; next && *next == delimiter) {
399               // Character constant split over a record boundary
400               io.HandleRelativePosition(1);
401               continue;
402             }
403             // Not a character value split over a record boundary.
404             io.BackspaceRecord();
405             connection.HandleAbsolutePosition(position);
406           }
407         }
408         break;
409       }
410     }
411     if (length > 0) {
412       *x++ = *ch;
413       --length;
414     }
415   }
416   std::fill_n(x, length, ' ');
417   return result;
418 }
419 
420 static bool EditListDirectedDefaultCharacterInput(
421     IoStatementState &io, char *x, std::size_t length) {
422   auto ch{io.GetCurrentChar()};
423   if (ch && (*ch == '\'' || *ch == '"')) {
424     io.HandleRelativePosition(1);
425     return EditDelimitedCharacterInput(io, x, length, *ch);
426   }
427   // Undelimited list-directed character input: stop at a value separator
428   // or the end of the current record.
429   std::optional<int> remaining{length};
430   for (std::optional<char32_t> next{io.NextInField(remaining)}; next;
431        next = io.NextInField(remaining)) {
432     switch (*next) {
433     case ' ':
434     case '\t':
435     case ',':
436     case ';':
437     case '/':
438       remaining = 0; // value separator: stop
439       break;
440     default:
441       *x++ = *next;
442       --length;
443     }
444   }
445   std::fill_n(x, length, ' ');
446   return true;
447 }
448 
449 bool EditDefaultCharacterInput(
450     IoStatementState &io, const DataEdit &edit, char *x, std::size_t length) {
451   switch (edit.descriptor) {
452   case DataEdit::ListDirected:
453     return EditListDirectedDefaultCharacterInput(io, x, length);
454   case 'A':
455   case 'G':
456     break;
457   default:
458     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
459         "Data edit descriptor '%c' may not be used with a CHARACTER data item",
460         edit.descriptor);
461     return false;
462   }
463   std::optional<int> remaining{length};
464   if (edit.width && *edit.width > 0) {
465     remaining = *edit.width;
466   }
467   // When the field is wider than the variable, we drop the leading
468   // characters.  When the variable is wider than the field, there's
469   // trailing padding.
470   std::int64_t skip{*remaining - static_cast<std::int64_t>(length)};
471   for (std::optional<char32_t> next{io.NextInField(remaining)}; next;
472        next = io.NextInField(remaining)) {
473     if (skip > 0) {
474       --skip;
475     } else {
476       *x++ = *next;
477       --length;
478     }
479   }
480   std::fill_n(x, length, ' ');
481   return true;
482 }
483 
484 template bool EditRealInput<2>(IoStatementState &, const DataEdit &, void *);
485 template bool EditRealInput<3>(IoStatementState &, const DataEdit &, void *);
486 template bool EditRealInput<4>(IoStatementState &, const DataEdit &, void *);
487 template bool EditRealInput<8>(IoStatementState &, const DataEdit &, void *);
488 template bool EditRealInput<10>(IoStatementState &, const DataEdit &, void *);
489 // TODO: double/double
490 template bool EditRealInput<16>(IoStatementState &, const DataEdit &, void *);
491 } // namespace Fortran::runtime::io
492