xref: /llvm-project/flang/runtime/edit-input.cpp (revision f63dafebf4e9f5dec6ad58cf238bd4eb71d34698)
1 //===-- runtime/edit-input.cpp --------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "edit-input.h"
10 #include "flang/Common/real.h"
11 #include "flang/Common/uint128.h"
12 #include <algorithm>
13 
14 namespace Fortran::runtime::io {
15 
16 static bool EditBOZInput(IoStatementState &io, const DataEdit &edit, void *n,
17     int base, int totalBitSize) {
18   std::optional<int> remaining;
19   std::optional<char32_t> next{io.PrepareInput(edit, remaining)};
20   common::UnsignedInt128 value{0};
21   for (; next; next = io.NextInField(remaining)) {
22     char32_t ch{*next};
23     if (ch == ' ' || ch == '\t') {
24       continue;
25     }
26     int digit{0};
27     if (ch >= '0' && ch <= '1') {
28       digit = ch - '0';
29     } else if (base >= 8 && ch >= '2' && ch <= '7') {
30       digit = ch - '0';
31     } else if (base >= 10 && ch >= '8' && ch <= '9') {
32       digit = ch - '0';
33     } else if (base == 16 && ch >= 'A' && ch <= 'Z') {
34       digit = ch + 10 - 'A';
35     } else if (base == 16 && ch >= 'a' && ch <= 'z') {
36       digit = ch + 10 - 'a';
37     } else {
38       io.GetIoErrorHandler().SignalError(
39           "Bad character '%lc' in B/O/Z input field", ch);
40       return false;
41     }
42     value *= base;
43     value += digit;
44   }
45   // TODO: check for overflow
46   std::memcpy(n, &value, totalBitSize >> 3);
47   return true;
48 }
49 
50 // Prepares input from a field, and consumes the sign, if any.
51 // Returns true if there's a '-' sign.
52 static bool ScanNumericPrefix(IoStatementState &io, const DataEdit &edit,
53     std::optional<char32_t> &next, std::optional<int> &remaining) {
54   next = io.PrepareInput(edit, remaining);
55   bool negative{false};
56   if (next) {
57     negative = *next == '-';
58     if (negative || *next == '+') {
59       io.GotChar();
60       io.SkipSpaces(remaining);
61       next = io.NextInField(remaining);
62     }
63   }
64   return negative;
65 }
66 
67 bool EditIntegerInput(
68     IoStatementState &io, const DataEdit &edit, void *n, int kind) {
69   RUNTIME_CHECK(io.GetIoErrorHandler(), kind >= 1 && !(kind & (kind - 1)));
70   switch (edit.descriptor) {
71   case DataEdit::ListDirected:
72   case 'G':
73   case 'I':
74     break;
75   case 'B':
76     return EditBOZInput(io, edit, n, 2, kind << 3);
77   case 'O':
78     return EditBOZInput(io, edit, n, 8, kind << 3);
79   case 'Z':
80     return EditBOZInput(io, edit, n, 16, kind << 3);
81   default:
82     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
83         "Data edit descriptor '%c' may not be used with an INTEGER data item",
84         edit.descriptor);
85     return false;
86   }
87   std::optional<int> remaining;
88   std::optional<char32_t> next;
89   bool negate{ScanNumericPrefix(io, edit, next, remaining)};
90   common::UnsignedInt128 value;
91   for (; next; next = io.NextInField(remaining)) {
92     char32_t ch{*next};
93     if (ch == ' ' || ch == '\t') {
94       if (edit.modes.editingFlags & blankZero) {
95         ch = '0'; // BZ mode - treat blank as if it were zero
96       } else {
97         continue;
98       }
99     }
100     int digit{0};
101     if (ch >= '0' && ch <= '9') {
102       digit = ch - '0';
103     } else {
104       io.GetIoErrorHandler().SignalError(
105           "Bad character '%lc' in INTEGER input field", ch);
106       return false;
107     }
108     value *= 10;
109     value += digit;
110   }
111   if (negate) {
112     value = -value;
113   }
114   std::memcpy(n, &value, kind);
115   return true;
116 }
117 
118 // Parses a REAL input number from the input source as a normalized
119 // fraction into a supplied buffer -- there's an optional '-', a
120 // decimal point, and at least one digit.  The adjusted exponent value
121 // is returned in a reference argument.  The returned value is the number
122 // of characters that (should) have been written to the buffer -- this can
123 // be larger than the buffer size and can indicate overflow.  Replaces
124 // blanks with zeroes if appropriate.
125 static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io,
126     const DataEdit &edit, int &exponent) {
127   std::optional<int> remaining;
128   std::optional<char32_t> next;
129   int got{0};
130   std::optional<int> decimalPoint;
131   auto Put{[&](char ch) -> void {
132     if (got < bufferSize) {
133       buffer[got] = ch;
134     }
135     ++got;
136   }};
137   if (ScanNumericPrefix(io, edit, next, remaining)) {
138     Put('-');
139   }
140   if (next.value_or(' ') == ' ') { // empty/blank field means zero
141     remaining.reset();
142     Put('0');
143     return got;
144   }
145   char32_t decimal = edit.modes.editingFlags & decimalComma ? ',' : '.';
146   char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next};
147   if (first == 'N' || first == 'I') {
148     // NaN or infinity - convert to upper case
149     // Subtle: a blank field of digits could be followed by 'E' or 'D',
150     for (; next &&
151          ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z'));
152          next = io.NextInField(remaining)) {
153       if (*next >= 'a' && *next <= 'z') {
154         Put(*next - 'a' + 'A');
155       } else {
156         Put(*next);
157       }
158     }
159     if (next && *next == '(') { // NaN(...)
160       while (next && *next != ')') {
161         next = io.NextInField(remaining);
162       }
163     }
164     exponent = 0;
165   } else if (first == decimal || (first >= '0' && first <= '9') ||
166       first == 'E' || first == 'D' || first == 'Q') {
167     Put('.'); // input field is normalized to a fraction
168     auto start{got};
169     bool bzMode{(edit.modes.editingFlags & blankZero) != 0};
170     for (; next; next = io.NextInField(remaining)) {
171       char32_t ch{*next};
172       if (ch == ' ' || ch == '\t') {
173         if (bzMode) {
174           ch = '0'; // BZ mode - treat blank as if it were zero
175         } else {
176           continue;
177         }
178       }
179       if (ch == '0' && got == start && !decimalPoint) {
180         // omit leading zeroes before the decimal
181       } else if (ch >= '0' && ch <= '9') {
182         Put(ch);
183       } else if (ch == decimal && !decimalPoint) {
184         // the decimal point is *not* copied to the buffer
185         decimalPoint = got - start; // # of digits before the decimal point
186       } else {
187         break;
188       }
189     }
190     if (got == start) {
191       Put('0'); // emit at least one digit
192     }
193     if (next &&
194         (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' ||
195             *next == 'q' || *next == 'Q')) {
196       // Optional exponent letter.  Blanks are allowed between the
197       // optional exponent letter and the exponent value.
198       io.SkipSpaces(remaining);
199       next = io.NextInField(remaining);
200     }
201     // The default exponent is -kP, but the scale factor doesn't affect
202     // an explicit exponent.
203     exponent = -edit.modes.scale;
204     if (next &&
205         (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9') ||
206             (bzMode && (*next == ' ' || *next == '\t')))) {
207       bool negExpo{*next == '-'};
208       if (negExpo || *next == '+') {
209         next = io.NextInField(remaining);
210       }
211       for (exponent = 0; next; next = io.NextInField(remaining)) {
212         if (*next >= '0' && *next <= '9') {
213           exponent = 10 * exponent + *next - '0';
214         } else if (bzMode && (*next == ' ' || *next == '\t')) {
215           exponent = 10 * exponent;
216         } else {
217           break;
218         }
219       }
220       if (negExpo) {
221         exponent = -exponent;
222       }
223     }
224     if (decimalPoint) {
225       exponent += *decimalPoint;
226     } else {
227       // When no decimal point (or comma) appears in the value, the 'd'
228       // part of the edit descriptor must be interpreted as the number of
229       // digits in the value to be interpreted as being to the *right* of
230       // the assumed decimal point (13.7.2.3.2)
231       exponent += got - start - edit.digits.value_or(0);
232     }
233   } else {
234     // TODO: hex FP input
235     exponent = 0;
236     return 0;
237   }
238   // Consume the trailing ')' of a list-directed or NAMELIST complex
239   // input value.
240   if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
241     if (next && (*next == ' ' || *next == '\t')) {
242       next = io.NextInField(remaining);
243     }
244     if (!next) { // NextInField fails on separators like ')'
245       next = io.GetCurrentChar();
246       if (next && *next == ')') {
247         io.HandleRelativePosition(1);
248       }
249     }
250   } else if (remaining) {
251     while (next && (*next == ' ' || *next == '\t')) {
252       next = io.NextInField(remaining);
253     }
254     if (next) {
255       return 0; // error: unused nonblank character in fixed-width field
256     }
257   }
258   return got;
259 }
260 
261 template <int KIND>
262 bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
263   constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
264   static constexpr int maxDigits{
265       common::MaxDecimalConversionDigits(binaryPrecision)};
266   static constexpr int bufferSize{maxDigits + 18};
267   char buffer[bufferSize];
268   int exponent{0};
269   int got{ScanRealInput(buffer, maxDigits + 2, io, edit, exponent)};
270   if (got >= maxDigits + 2) {
271     io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small");
272     return false;
273   }
274   if (got == 0) {
275     io.GetIoErrorHandler().SignalError("Bad REAL input value");
276     return false;
277   }
278   bool hadExtra{got > maxDigits};
279   if (exponent != 0) {
280     got += std::snprintf(&buffer[got], bufferSize - got, "e%d", exponent);
281   }
282   buffer[got] = '\0';
283   const char *p{buffer};
284   decimal::ConversionToBinaryResult<binaryPrecision> converted{
285       decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round)};
286   if (hadExtra) {
287     converted.flags = static_cast<enum decimal::ConversionResultFlags>(
288         converted.flags | decimal::Inexact);
289   }
290   // TODO: raise converted.flags as exceptions?
291   *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) =
292       converted.binary;
293   return true;
294 }
295 
296 template <int KIND>
297 bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
298   constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
299   switch (edit.descriptor) {
300   case DataEdit::ListDirected:
301   case DataEdit::ListDirectedRealPart:
302   case DataEdit::ListDirectedImaginaryPart:
303   case 'F':
304   case 'E': // incl. EN, ES, & EX
305   case 'D':
306   case 'G':
307     return EditCommonRealInput<KIND>(io, edit, n);
308   case 'B':
309     return EditBOZInput(
310         io, edit, n, 2, common::BitsForBinaryPrecision(binaryPrecision));
311   case 'O':
312     return EditBOZInput(
313         io, edit, n, 8, common::BitsForBinaryPrecision(binaryPrecision));
314   case 'Z':
315     return EditBOZInput(
316         io, edit, n, 16, common::BitsForBinaryPrecision(binaryPrecision));
317   default:
318     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
319         "Data edit descriptor '%c' may not be used for REAL input",
320         edit.descriptor);
321     return false;
322   }
323 }
324 
325 // 13.7.3 in Fortran 2018
326 bool EditLogicalInput(IoStatementState &io, const DataEdit &edit, bool &x) {
327   switch (edit.descriptor) {
328   case DataEdit::ListDirected:
329   case 'L':
330   case 'G':
331     break;
332   default:
333     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
334         "Data edit descriptor '%c' may not be used for LOGICAL input",
335         edit.descriptor);
336     return false;
337   }
338   std::optional<int> remaining;
339   std::optional<char32_t> next{io.PrepareInput(edit, remaining)};
340   if (next && *next == '.') { // skip optional period
341     next = io.NextInField(remaining);
342   }
343   if (!next) {
344     io.GetIoErrorHandler().SignalError("Empty LOGICAL input field");
345     return false;
346   }
347   switch (*next) {
348   case 'T':
349   case 't':
350     x = true;
351     break;
352   case 'F':
353   case 'f':
354     x = false;
355     break;
356   default:
357     io.GetIoErrorHandler().SignalError(
358         "Bad character '%lc' in LOGICAL input field", *next);
359     return false;
360   }
361   if (remaining) { // ignore the rest of the field
362     io.HandleRelativePosition(*remaining);
363   } else if (edit.descriptor == DataEdit::ListDirected) {
364     while (io.NextInField(remaining)) { // discard rest of field
365     }
366   }
367   return true;
368 }
369 
370 // See 13.10.3.1 paragraphs 7-9 in Fortran 2018
371 static bool EditDelimitedCharacterInput(
372     IoStatementState &io, char *x, std::size_t length, char32_t delimiter) {
373   bool result{true};
374   while (true) {
375     auto ch{io.GetCurrentChar()};
376     if (!ch) {
377       if (io.AdvanceRecord()) {
378         continue;
379       } else {
380         result = false; // EOF in character value
381         break;
382       }
383     }
384     io.HandleRelativePosition(1);
385     if (*ch == delimiter) {
386       auto next{io.GetCurrentChar()};
387       if (next && *next == delimiter) {
388         // Repeated delimiter: use as character value
389         io.HandleRelativePosition(1);
390       } else {
391         break; // closing delimiter
392       }
393     }
394     if (length > 0) {
395       *x++ = *ch;
396       --length;
397     }
398   }
399   std::fill_n(x, length, ' ');
400   return result;
401 }
402 
403 static bool EditListDirectedDefaultCharacterInput(
404     IoStatementState &io, char *x, std::size_t length) {
405   auto ch{io.GetCurrentChar()};
406   if (ch && (*ch == '\'' || *ch == '"')) {
407     io.HandleRelativePosition(1);
408     return EditDelimitedCharacterInput(io, x, length, *ch);
409   }
410   // Undelimited list-directed character input: stop at a value separator
411   // or the end of the current record.
412   std::optional<int> remaining{length};
413   for (std::optional<char32_t> next{io.NextInField(remaining)}; next;
414        next = io.NextInField(remaining)) {
415     switch (*next) {
416     case ' ':
417     case '\t':
418     case ',':
419     case ';':
420     case '/':
421       remaining = 0; // value separator: stop
422       break;
423     default:
424       *x++ = *next;
425       --length;
426     }
427   }
428   std::fill_n(x, length, ' ');
429   return true;
430 }
431 
432 bool EditDefaultCharacterInput(
433     IoStatementState &io, const DataEdit &edit, char *x, std::size_t length) {
434   switch (edit.descriptor) {
435   case DataEdit::ListDirected:
436     return EditListDirectedDefaultCharacterInput(io, x, length);
437   case 'A':
438   case 'G':
439     break;
440   default:
441     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
442         "Data edit descriptor '%c' may not be used with a CHARACTER data item",
443         edit.descriptor);
444     return false;
445   }
446   std::optional<int> remaining{length};
447   if (edit.width && *edit.width > 0) {
448     remaining = *edit.width;
449   }
450   // When the field is wider than the variable, we drop the leading
451   // characters.  When the variable is wider than the field, there's
452   // trailing padding.
453   std::int64_t skip{*remaining - static_cast<std::int64_t>(length)};
454   for (std::optional<char32_t> next{io.NextInField(remaining)}; next;
455        next = io.NextInField(remaining)) {
456     if (skip > 0) {
457       --skip;
458       io.GotChar(-1);
459     } else {
460       *x++ = *next;
461       --length;
462     }
463   }
464   std::fill_n(x, length, ' ');
465   return true;
466 }
467 
468 template bool EditRealInput<2>(IoStatementState &, const DataEdit &, void *);
469 template bool EditRealInput<3>(IoStatementState &, const DataEdit &, void *);
470 template bool EditRealInput<4>(IoStatementState &, const DataEdit &, void *);
471 template bool EditRealInput<8>(IoStatementState &, const DataEdit &, void *);
472 template bool EditRealInput<10>(IoStatementState &, const DataEdit &, void *);
473 // TODO: double/double
474 template bool EditRealInput<16>(IoStatementState &, const DataEdit &, void *);
475 } // namespace Fortran::runtime::io
476