xref: /llvm-project/flang/runtime/edit-input.cpp (revision 4393e3776b41471afbd37cb13fe5b777243fedd1)
1 //===-- runtime/edit-input.cpp --------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "edit-input.h"
10 #include "flang/Common/real.h"
11 #include "flang/Common/uint128.h"
12 #include <algorithm>
13 
14 namespace Fortran::runtime::io {
15 
16 static bool EditBOZInput(IoStatementState &io, const DataEdit &edit, void *n,
17     int base, int totalBitSize) {
18   std::optional<int> remaining;
19   std::optional<char32_t> next{io.PrepareInput(edit, remaining)};
20   common::UnsignedInt128 value{0};
21   for (; next; next = io.NextInField(remaining)) {
22     char32_t ch{*next};
23     if (ch == ' ' || ch == '\t') {
24       continue;
25     }
26     int digit{0};
27     if (ch >= '0' && ch <= '1') {
28       digit = ch - '0';
29     } else if (base >= 8 && ch >= '2' && ch <= '7') {
30       digit = ch - '0';
31     } else if (base >= 10 && ch >= '8' && ch <= '9') {
32       digit = ch - '0';
33     } else if (base == 16 && ch >= 'A' && ch <= 'Z') {
34       digit = ch + 10 - 'A';
35     } else if (base == 16 && ch >= 'a' && ch <= 'z') {
36       digit = ch + 10 - 'a';
37     } else {
38       io.GetIoErrorHandler().SignalError(
39           "Bad character '%lc' in B/O/Z input field", ch);
40       return false;
41     }
42     value *= base;
43     value += digit;
44   }
45   // TODO: check for overflow
46   std::memcpy(n, &value, totalBitSize >> 3);
47   return true;
48 }
49 
50 // Prepares input from a field, and consumes the sign, if any.
51 // Returns true if there's a '-' sign.
52 static bool ScanNumericPrefix(IoStatementState &io, const DataEdit &edit,
53     std::optional<char32_t> &next, std::optional<int> &remaining) {
54   next = io.PrepareInput(edit, remaining);
55   bool negative{false};
56   if (next) {
57     negative = *next == '-';
58     if (negative || *next == '+') {
59       io.GotChar();
60       io.SkipSpaces(remaining);
61       next = io.NextInField(remaining);
62     }
63   }
64   return negative;
65 }
66 
67 bool EditIntegerInput(
68     IoStatementState &io, const DataEdit &edit, void *n, int kind) {
69   RUNTIME_CHECK(io.GetIoErrorHandler(), kind >= 1 && !(kind & (kind - 1)));
70   switch (edit.descriptor) {
71   case DataEdit::ListDirected:
72   case 'G':
73   case 'I':
74     break;
75   case 'B':
76     return EditBOZInput(io, edit, n, 2, kind << 3);
77   case 'O':
78     return EditBOZInput(io, edit, n, 8, kind << 3);
79   case 'Z':
80     return EditBOZInput(io, edit, n, 16, kind << 3);
81   default:
82     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
83         "Data edit descriptor '%c' may not be used with an INTEGER data item",
84         edit.descriptor);
85     return false;
86   }
87   std::optional<int> remaining;
88   std::optional<char32_t> next;
89   bool negate{ScanNumericPrefix(io, edit, next, remaining)};
90   common::UnsignedInt128 value;
91   for (; next; next = io.NextInField(remaining)) {
92     char32_t ch{*next};
93     if (ch == ' ' || ch == '\t') {
94       if (edit.modes.editingFlags & blankZero) {
95         ch = '0'; // BZ mode - treat blank as if it were zero
96       } else {
97         continue;
98       }
99     }
100     int digit{0};
101     if (ch >= '0' && ch <= '9') {
102       digit = ch - '0';
103     } else {
104       io.GetIoErrorHandler().SignalError(
105           "Bad character '%lc' in INTEGER input field", ch);
106       return false;
107     }
108     value *= 10;
109     value += digit;
110   }
111   if (negate) {
112     value = -value;
113   }
114   std::memcpy(n, &value, kind);
115   return true;
116 }
117 
118 // Parses a REAL input number from the input source as a normalized
119 // fraction into a supplied buffer -- there's an optional '-', a
120 // decimal point, and at least one digit.  The adjusted exponent value
121 // is returned in a reference argument.  The returned value is the number
122 // of characters that (should) have been written to the buffer -- this can
123 // be larger than the buffer size and can indicate overflow.  Replaces
124 // blanks with zeroes if appropriate.
125 static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io,
126     const DataEdit &edit, int &exponent) {
127   std::optional<int> remaining;
128   std::optional<char32_t> next;
129   int got{0};
130   std::optional<int> decimalPoint;
131   auto Put{[&](char ch) -> void {
132     if (got < bufferSize) {
133       buffer[got] = ch;
134     }
135     ++got;
136   }};
137   if (ScanNumericPrefix(io, edit, next, remaining)) {
138     Put('-');
139   }
140   if (!next) { // empty field means zero
141     Put('0');
142     return got;
143   }
144   char32_t decimal = edit.modes.editingFlags & decimalComma ? ',' : '.';
145   char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next};
146   if (first == 'N' || first == 'I') {
147     // NaN or infinity - convert to upper case
148     // Subtle: a blank field of digits could be followed by 'E' or 'D',
149     for (; next &&
150          ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z'));
151          next = io.NextInField(remaining)) {
152       if (*next >= 'a' && *next <= 'z') {
153         Put(*next - 'a' + 'A');
154       } else {
155         Put(*next);
156       }
157     }
158     if (next && *next == '(') { // NaN(...)
159       while (next && *next != ')') {
160         next = io.NextInField(remaining);
161       }
162     }
163     exponent = 0;
164   } else if (first == decimal || (first >= '0' && first <= '9') ||
165       first == 'E' || first == 'D' || first == 'Q') {
166     Put('.'); // input field is normalized to a fraction
167     auto start{got};
168     bool bzMode{(edit.modes.editingFlags & blankZero) != 0};
169     for (; next; next = io.NextInField(remaining)) {
170       char32_t ch{*next};
171       if (ch == ' ' || ch == '\t') {
172         if (bzMode) {
173           ch = '0'; // BZ mode - treat blank as if it were zero
174         } else {
175           continue;
176         }
177       }
178       if (ch == '0' && got == start && !decimalPoint) {
179         // omit leading zeroes before the decimal
180       } else if (ch >= '0' && ch <= '9') {
181         Put(ch);
182       } else if (ch == decimal && !decimalPoint) {
183         // the decimal point is *not* copied to the buffer
184         decimalPoint = got - start; // # of digits before the decimal point
185       } else {
186         break;
187       }
188     }
189     if (got == start) {
190       Put('0'); // emit at least one digit
191     }
192     if (next &&
193         (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' ||
194             *next == 'q' || *next == 'Q')) {
195       // Optional exponent letter.  Blanks are allowed between the
196       // optional exponent letter and the exponent value.
197       io.SkipSpaces(remaining);
198       next = io.NextInField(remaining);
199     }
200     // The default exponent is -kP, but the scale factor doesn't affect
201     // an explicit exponent.
202     exponent = -edit.modes.scale;
203     if (next &&
204         (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9') ||
205             (bzMode && (*next == ' ' || *next == '\t')))) {
206       bool negExpo{*next == '-'};
207       if (negExpo || *next == '+') {
208         next = io.NextInField(remaining);
209       }
210       for (exponent = 0; next; next = io.NextInField(remaining)) {
211         if (*next >= '0' && *next <= '9') {
212           exponent = 10 * exponent + *next - '0';
213         } else if (bzMode && (*next == ' ' || *next == '\t')) {
214           exponent = 10 * exponent;
215         } else {
216           break;
217         }
218       }
219       if (negExpo) {
220         exponent = -exponent;
221       }
222     }
223     if (decimalPoint) {
224       exponent += *decimalPoint;
225     } else {
226       // When no decimal point (or comma) appears in the value, the 'd'
227       // part of the edit descriptor must be interpreted as the number of
228       // digits in the value to be interpreted as being to the *right* of
229       // the assumed decimal point (13.7.2.3.2)
230       exponent += got - start - edit.digits.value_or(0);
231     }
232   } else {
233     // TODO: hex FP input
234     exponent = 0;
235     return 0;
236   }
237   // Consume the trailing ')' of a list-directed or NAMELIST complex
238   // input value.
239   if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
240     if (next && (*next == ' ' || *next == '\t')) {
241       next = io.NextInField(remaining);
242     }
243     if (!next) { // NextInField fails on separators like ')'
244       next = io.GetCurrentChar();
245       if (next && *next == ')') {
246         io.HandleRelativePosition(1);
247       }
248     }
249   } else if (remaining) {
250     while (next && (*next == ' ' || *next == '\t')) {
251       next = io.NextInField(remaining);
252     }
253     if (next) {
254       return 0; // error: unused nonblank character in fixed-width field
255     }
256   }
257   return got;
258 }
259 
260 template <int KIND>
261 bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
262   constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
263   static constexpr int maxDigits{
264       common::MaxDecimalConversionDigits(binaryPrecision)};
265   static constexpr int bufferSize{maxDigits + 18};
266   char buffer[bufferSize];
267   int exponent{0};
268   int got{ScanRealInput(buffer, maxDigits + 2, io, edit, exponent)};
269   if (got >= maxDigits + 2) {
270     io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small");
271     return false;
272   }
273   if (got == 0) {
274     io.GetIoErrorHandler().SignalError("Bad REAL input value");
275     return false;
276   }
277   bool hadExtra{got > maxDigits};
278   if (exponent != 0) {
279     got += std::snprintf(&buffer[got], bufferSize - got, "e%d", exponent);
280   }
281   buffer[got] = '\0';
282   const char *p{buffer};
283   decimal::ConversionToBinaryResult<binaryPrecision> converted{
284       decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round)};
285   if (hadExtra) {
286     converted.flags = static_cast<enum decimal::ConversionResultFlags>(
287         converted.flags | decimal::Inexact);
288   }
289   // TODO: raise converted.flags as exceptions?
290   *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) =
291       converted.binary;
292   return true;
293 }
294 
295 template <int KIND>
296 bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
297   constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
298   switch (edit.descriptor) {
299   case DataEdit::ListDirected:
300   case DataEdit::ListDirectedRealPart:
301   case DataEdit::ListDirectedImaginaryPart:
302   case 'F':
303   case 'E': // incl. EN, ES, & EX
304   case 'D':
305   case 'G':
306     return EditCommonRealInput<KIND>(io, edit, n);
307   case 'B':
308     return EditBOZInput(
309         io, edit, n, 2, common::BitsForBinaryPrecision(binaryPrecision));
310   case 'O':
311     return EditBOZInput(
312         io, edit, n, 8, common::BitsForBinaryPrecision(binaryPrecision));
313   case 'Z':
314     return EditBOZInput(
315         io, edit, n, 16, common::BitsForBinaryPrecision(binaryPrecision));
316   default:
317     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
318         "Data edit descriptor '%c' may not be used for REAL input",
319         edit.descriptor);
320     return false;
321   }
322 }
323 
324 // 13.7.3 in Fortran 2018
325 bool EditLogicalInput(IoStatementState &io, const DataEdit &edit, bool &x) {
326   switch (edit.descriptor) {
327   case DataEdit::ListDirected:
328   case 'L':
329   case 'G':
330     break;
331   default:
332     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
333         "Data edit descriptor '%c' may not be used for LOGICAL input",
334         edit.descriptor);
335     return false;
336   }
337   std::optional<int> remaining;
338   std::optional<char32_t> next{io.PrepareInput(edit, remaining)};
339   if (next && *next == '.') { // skip optional period
340     next = io.NextInField(remaining);
341   }
342   if (!next) {
343     io.GetIoErrorHandler().SignalError("Empty LOGICAL input field");
344     return false;
345   }
346   switch (*next) {
347   case 'T':
348   case 't':
349     x = true;
350     break;
351   case 'F':
352   case 'f':
353     x = false;
354     break;
355   default:
356     io.GetIoErrorHandler().SignalError(
357         "Bad character '%lc' in LOGICAL input field", *next);
358     return false;
359   }
360   if (remaining) { // ignore the rest of the field
361     io.HandleRelativePosition(*remaining);
362   } else if (edit.descriptor == DataEdit::ListDirected) {
363     while (io.NextInField(remaining)) { // discard rest of field
364     }
365   }
366   return true;
367 }
368 
369 // See 13.10.3.1 paragraphs 7-9 in Fortran 2018
370 static bool EditDelimitedCharacterInput(
371     IoStatementState &io, char *x, std::size_t length, char32_t delimiter) {
372   bool result{true};
373   while (true) {
374     auto ch{io.GetCurrentChar()};
375     if (!ch) {
376       if (io.AdvanceRecord()) {
377         continue;
378       } else {
379         result = false; // EOF in character value
380         break;
381       }
382     }
383     io.HandleRelativePosition(1);
384     if (*ch == delimiter) {
385       auto next{io.GetCurrentChar()};
386       if (next && *next == delimiter) {
387         // Repeated delimiter: use as character value
388         io.HandleRelativePosition(1);
389       } else {
390         break; // closing delimiter
391       }
392     }
393     if (length > 0) {
394       *x++ = *ch;
395       --length;
396     }
397   }
398   std::fill_n(x, length, ' ');
399   return result;
400 }
401 
402 static bool EditListDirectedDefaultCharacterInput(
403     IoStatementState &io, char *x, std::size_t length) {
404   auto ch{io.GetCurrentChar()};
405   if (ch && (*ch == '\'' || *ch == '"')) {
406     io.HandleRelativePosition(1);
407     return EditDelimitedCharacterInput(io, x, length, *ch);
408   }
409   // Undelimited list-directed character input: stop at a value separator
410   // or the end of the current record.
411   std::optional<int> remaining{length};
412   for (std::optional<char32_t> next{io.NextInField(remaining)}; next;
413        next = io.NextInField(remaining)) {
414     switch (*next) {
415     case ' ':
416     case '\t':
417     case ',':
418     case ';':
419     case '/':
420       remaining = 0; // value separator: stop
421       break;
422     default:
423       *x++ = *next;
424       --length;
425     }
426   }
427   std::fill_n(x, length, ' ');
428   return true;
429 }
430 
431 bool EditDefaultCharacterInput(
432     IoStatementState &io, const DataEdit &edit, char *x, std::size_t length) {
433   switch (edit.descriptor) {
434   case DataEdit::ListDirected:
435     return EditListDirectedDefaultCharacterInput(io, x, length);
436   case 'A':
437   case 'G':
438     break;
439   default:
440     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
441         "Data edit descriptor '%c' may not be used with a CHARACTER data item",
442         edit.descriptor);
443     return false;
444   }
445   std::optional<int> remaining{length};
446   if (edit.width && *edit.width > 0) {
447     remaining = *edit.width;
448   }
449   // When the field is wider than the variable, we drop the leading
450   // characters.  When the variable is wider than the field, there's
451   // trailing padding.
452   std::int64_t skip{*remaining - static_cast<std::int64_t>(length)};
453   for (std::optional<char32_t> next{io.NextInField(remaining)}; next;
454        next = io.NextInField(remaining)) {
455     if (skip > 0) {
456       --skip;
457       io.GotChar(-1);
458     } else {
459       *x++ = *next;
460       --length;
461     }
462   }
463   std::fill_n(x, length, ' ');
464   return true;
465 }
466 
467 template bool EditRealInput<2>(IoStatementState &, const DataEdit &, void *);
468 template bool EditRealInput<3>(IoStatementState &, const DataEdit &, void *);
469 template bool EditRealInput<4>(IoStatementState &, const DataEdit &, void *);
470 template bool EditRealInput<8>(IoStatementState &, const DataEdit &, void *);
471 template bool EditRealInput<10>(IoStatementState &, const DataEdit &, void *);
472 // TODO: double/double
473 template bool EditRealInput<16>(IoStatementState &, const DataEdit &, void *);
474 } // namespace Fortran::runtime::io
475