xref: /llvm-project/flang/runtime/edit-input.cpp (revision 1f8790050b0e99e7b46cc69518aa84f46f50738e)
1 //===-- runtime/edit-input.cpp ----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "edit-input.h"
10 #include "flang/Common/real.h"
11 #include "flang/Common/uint128.h"
12 #include <algorithm>
13 
14 namespace Fortran::runtime::io {
15 
16 static bool EditBOZInput(IoStatementState &io, const DataEdit &edit, void *n,
17     int base, int totalBitSize) {
18   std::optional<int> remaining;
19   if (edit.width) {
20     remaining = std::max(0, *edit.width);
21   }
22   io.SkipSpaces(remaining);
23   std::optional<char32_t> next{io.NextInField(remaining)};
24   common::UnsignedInt128 value{0};
25   for (; next; next = io.NextInField(remaining)) {
26     char32_t ch{*next};
27     if (ch == ' ') {
28       continue;
29     }
30     int digit{0};
31     if (ch >= '0' && ch <= '1') {
32       digit = ch - '0';
33     } else if (base >= 8 && ch >= '2' && ch <= '7') {
34       digit = ch - '0';
35     } else if (base >= 10 && ch >= '8' && ch <= '9') {
36       digit = ch - '0';
37     } else if (base == 16 && ch >= 'A' && ch <= 'Z') {
38       digit = ch + 10 - 'A';
39     } else if (base == 16 && ch >= 'a' && ch <= 'z') {
40       digit = ch + 10 - 'a';
41     } else {
42       io.GetIoErrorHandler().SignalError(
43           "Bad character '%lc' in B/O/Z input field", ch);
44       return false;
45     }
46     value *= base;
47     value += digit;
48   }
49   // TODO: check for overflow
50   std::memcpy(n, &value, totalBitSize >> 3);
51   return true;
52 }
53 
54 // Returns false if there's a '-' sign
55 static bool ScanNumericPrefix(IoStatementState &io, const DataEdit &edit,
56     std::optional<char32_t> &next, std::optional<int> &remaining) {
57   if (edit.descriptor != DataEdit::ListDirected && edit.width) {
58     remaining = std::max(0, *edit.width);
59   } else {
60     // list-directed, namelist, or (nonstandard) 0-width input editing
61     remaining.reset();
62   }
63   io.SkipSpaces(remaining);
64   next = io.NextInField(remaining);
65   bool negative{false};
66   if (next) {
67     negative = *next == '-';
68     if (negative || *next == '+') {
69       next = io.NextInField(remaining);
70     }
71   }
72   return negative;
73 }
74 
75 bool EditIntegerInput(
76     IoStatementState &io, const DataEdit &edit, void *n, int kind) {
77   RUNTIME_CHECK(io.GetIoErrorHandler(), kind >= 1 && !(kind & (kind - 1)));
78   switch (edit.descriptor) {
79   case DataEdit::ListDirected:
80   case 'G':
81   case 'I':
82     break;
83   case 'B':
84     return EditBOZInput(io, edit, n, 2, kind << 3);
85   case 'O':
86     return EditBOZInput(io, edit, n, 8, kind << 3);
87   case 'Z':
88     return EditBOZInput(io, edit, n, 16, kind << 3);
89   default:
90     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
91         "Data edit descriptor '%c' may not be used with an INTEGER data item",
92         edit.descriptor);
93     return false;
94   }
95   std::optional<int> remaining;
96   std::optional<char32_t> next;
97   bool negate{ScanNumericPrefix(io, edit, next, remaining)};
98   common::UnsignedInt128 value;
99   for (; next; next = io.NextInField(remaining)) {
100     char32_t ch{*next};
101     if (ch == ' ') {
102       if (edit.modes.editingFlags & blankZero) {
103         ch = '0'; // BZ mode - treat blank as if it were zero
104       } else {
105         continue;
106       }
107     }
108     int digit{0};
109     if (ch >= '0' && ch <= '9') {
110       digit = ch - '0';
111     } else {
112       io.GetIoErrorHandler().SignalError(
113           "Bad character '%lc' in INTEGER input field", ch);
114       return false;
115     }
116     value *= 10;
117     value += digit;
118   }
119   if (negate) {
120     value = -value;
121   }
122   std::memcpy(n, &value, kind);
123   return true;
124 }
125 
126 static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io,
127     const DataEdit &edit, int &exponent) {
128   std::optional<int> remaining;
129   std::optional<char32_t> next;
130   int got{0};
131   std::optional<int> decimalPoint;
132   if (ScanNumericPrefix(io, edit, next, remaining) && next) {
133     if (got < bufferSize) {
134       buffer[got++] = '-';
135     }
136   }
137   if (!next) { // empty field means zero
138     if (got < bufferSize) {
139       buffer[got++] = '0';
140     }
141     return got;
142   }
143   if (got < bufferSize) {
144     buffer[got++] = '.'; // input field is normalized to a fraction
145   }
146   char32_t decimal = edit.modes.editingFlags & decimalComma ? ',' : '.';
147   auto start{got};
148   if ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z')) {
149     // NaN or infinity - convert to upper case
150     for (; next &&
151          ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z'));
152          next = io.NextInField(remaining)) {
153       if (got < bufferSize) {
154         if (*next >= 'a' && *next <= 'z') {
155           buffer[got++] = *next - 'a' + 'A';
156         } else {
157           buffer[got++] = *next;
158         }
159       }
160     }
161     if (next && *next == '(') { // NaN(...)
162       while (next && *next != ')') {
163         next = io.NextInField(remaining);
164       }
165     }
166     exponent = 0;
167   } else if (*next == decimal || (*next >= '0' && *next <= '9')) {
168     for (; next; next = io.NextInField(remaining)) {
169       char32_t ch{*next};
170       if (ch == ' ') {
171         if (edit.modes.editingFlags & blankZero) {
172           ch = '0'; // BZ mode - treat blank as if it were zero
173         } else {
174           continue;
175         }
176       }
177       if (ch == '0' && got == start) {
178         // omit leading zeroes
179       } else if (ch >= '0' && ch <= '9') {
180         if (got < bufferSize) {
181           buffer[got++] = ch;
182         }
183       } else if (ch == decimal && !decimalPoint) {
184         // the decimal point is *not* copied to the buffer
185         decimalPoint = got - start; // # of digits before the decimal point
186       } else {
187         break;
188       }
189     }
190     if (got == start && got < bufferSize) {
191       buffer[got++] = '0'; // all digits were zeroes
192     }
193     if (next &&
194         (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' ||
195             *next == 'q' || *next == 'Q')) {
196       io.SkipSpaces(remaining);
197       next = io.NextInField(remaining);
198     }
199     exponent = -edit.modes.scale; // default exponent is -kP
200     if (next &&
201         (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9'))) {
202       bool negExpo{*next == '-'};
203       if (negExpo || *next == '+') {
204         next = io.NextInField(remaining);
205       }
206       for (exponent = 0; next && (*next >= '0' && *next <= '9');
207            next = io.NextInField(remaining)) {
208         exponent = 10 * exponent + *next - '0';
209       }
210       if (negExpo) {
211         exponent = -exponent;
212       }
213     }
214     if (decimalPoint) {
215       exponent += *decimalPoint;
216     } else {
217       // When no decimal point (or comma) appears in the value, the 'd'
218       // part of the edit descriptor must be interpreted as the number of
219       // digits in the value to be interpreted as being to the *right* of
220       // the assumed decimal point (13.7.2.3.2)
221       exponent += got - start - edit.digits.value_or(0);
222     }
223   } else {
224     // TODO: hex FP input
225     exponent = 0;
226     return 0;
227   }
228   if (remaining) {
229     while (next && *next == ' ') {
230       next = io.NextInField(remaining);
231     }
232     if (next) {
233       return 0; // error: unused nonblank character in fixed-width field
234     }
235   }
236   return got;
237 }
238 
239 template <int binaryPrecision>
240 bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
241   static constexpr int maxDigits{
242       common::MaxDecimalConversionDigits(binaryPrecision)};
243   static constexpr int bufferSize{maxDigits + 18};
244   char buffer[bufferSize];
245   int exponent{0};
246   int got{ScanRealInput(buffer, maxDigits + 2, io, edit, exponent)};
247   if (got >= maxDigits + 2) {
248     io.GetIoErrorHandler().Crash("EditRealInput: buffer was too small");
249     return false;
250   }
251   if (got == 0) {
252     io.GetIoErrorHandler().SignalError("Bad REAL input value");
253     return false;
254   }
255   bool hadExtra{got > maxDigits};
256   if (exponent != 0) {
257     got += std::snprintf(&buffer[got], bufferSize - got, "e%d", exponent);
258   }
259   buffer[got] = '\0';
260   const char *p{buffer};
261   decimal::ConversionToBinaryResult<binaryPrecision> converted{
262       decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round)};
263   if (hadExtra) {
264     converted.flags = static_cast<enum decimal::ConversionResultFlags>(
265         converted.flags | decimal::Inexact);
266   }
267   // TODO: raise converted.flags as exceptions?
268   *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) =
269       converted.binary;
270   return true;
271 }
272 
273 template <int binaryPrecision>
274 bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
275   switch (edit.descriptor) {
276   case DataEdit::ListDirected:
277   case 'F':
278   case 'E': // incl. EN, ES, & EX
279   case 'D':
280   case 'G':
281     return EditCommonRealInput<binaryPrecision>(io, edit, n);
282   case 'B':
283     return EditBOZInput(
284         io, edit, n, 2, common::BitsForBinaryPrecision(binaryPrecision));
285   case 'O':
286     return EditBOZInput(
287         io, edit, n, 8, common::BitsForBinaryPrecision(binaryPrecision));
288   case 'Z':
289     return EditBOZInput(
290         io, edit, n, 16, common::BitsForBinaryPrecision(binaryPrecision));
291   default:
292     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
293         "Data edit descriptor '%c' may not be used for REAL input",
294         edit.descriptor);
295     return false;
296   }
297 }
298 
299 // 13.7.3 in Fortran 2018
300 bool EditLogicalInput(IoStatementState &io, const DataEdit &edit, bool &x) {
301   switch (edit.descriptor) {
302   case DataEdit::ListDirected:
303   case 'L':
304   case 'G':
305     break;
306   default:
307     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
308         "Data edit descriptor '%c' may not be used for LOGICAL input",
309         edit.descriptor);
310     return false;
311   }
312   std::optional<int> remaining;
313   if (edit.width) {
314     remaining = std::max(0, *edit.width);
315   }
316   io.SkipSpaces(remaining);
317   std::optional<char32_t> next{io.NextInField(remaining)};
318   if (next && *next == '.') { // skip optional period
319     next = io.NextInField(remaining);
320   }
321   if (!next) {
322     io.GetIoErrorHandler().SignalError("Empty LOGICAL input field");
323     return false;
324   }
325   switch (*next) {
326   case 'T':
327   case 't':
328     x = true;
329     break;
330   case 'F':
331   case 'f':
332     x = false;
333     break;
334   default:
335     io.GetIoErrorHandler().SignalError(
336         "Bad character '%lc' in LOGICAL input field", *next);
337     return false;
338   }
339   if (remaining) { // ignore the rest of the field
340     io.HandleRelativePosition(*remaining);
341   }
342   return true;
343 }
344 
345 // See 13.10.3.1 paragraphs 7-9 in Fortran 2018
346 static bool EditDelimitedCharacterInput(
347     IoStatementState &io, char *x, std::size_t length, char32_t delimiter) {
348   while (true) {
349     if (auto ch{io.GetCurrentChar()}) {
350       io.HandleRelativePosition(1);
351       if (*ch == delimiter) {
352         ch = io.GetCurrentChar();
353         if (ch && *ch == delimiter) {
354           // Repeated delimiter: use as character value.  Can't straddle a
355           // record boundary.
356           io.HandleRelativePosition(1);
357         } else {
358           std::fill_n(x, length, ' ');
359           return true;
360         }
361       }
362       if (length > 0) {
363         *x++ = *ch;
364         --length;
365       }
366     } else if (!io.AdvanceRecord()) { // EOF
367       std::fill_n(x, length, ' ');
368       return false;
369     }
370   }
371 }
372 
373 static bool EditListDirectedDefaultCharacterInput(
374     IoStatementState &io, char *x, std::size_t length) {
375   auto ch{io.GetCurrentChar()};
376   if (ch && (*ch == '\'' || *ch == '"')) {
377     io.HandleRelativePosition(1);
378     return EditDelimitedCharacterInput(io, x, length, *ch);
379   }
380   // Undelimited list-directed character input: stop at a value separator
381   // or the end of the current record.
382   std::optional<int> remaining{length};
383   for (std::optional<char32_t> next{io.NextInField(remaining)}; next;
384        next = io.NextInField(remaining)) {
385     switch (*next) {
386     case ' ':
387     case ',':
388     case ';':
389     case '/':
390       remaining = 0; // value separator: stop
391       break;
392     default:
393       *x++ = *next;
394       --length;
395     }
396   }
397   std::fill_n(x, length, ' ');
398   return true;
399 }
400 
401 bool EditDefaultCharacterInput(
402     IoStatementState &io, const DataEdit &edit, char *x, std::size_t length) {
403   switch (edit.descriptor) {
404   case DataEdit::ListDirected:
405     return EditListDirectedDefaultCharacterInput(io, x, length);
406   case 'A':
407   case 'G':
408     break;
409   default:
410     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
411         "Data edit descriptor '%c' may not be used with a CHARACTER data item",
412         edit.descriptor);
413     return false;
414   }
415   std::optional<int> remaining{length};
416   if (edit.width && *edit.width > 0) {
417     remaining = *edit.width;
418   }
419   // When the field is wider than the variable, we drop the leading
420   // characters.  When the variable is wider than the field, there's
421   // trailing padding.
422   std::int64_t skip{*remaining - static_cast<std::int64_t>(length)};
423   for (std::optional<char32_t> next{io.NextInField(remaining)}; next;
424        next = io.NextInField(remaining)) {
425     if (skip > 0) {
426       --skip;
427     } else {
428       *x++ = *next;
429       --length;
430     }
431   }
432   std::fill_n(x, length, ' ');
433   return true;
434 }
435 
436 template bool EditRealInput<8>(IoStatementState &, const DataEdit &, void *);
437 template bool EditRealInput<11>(IoStatementState &, const DataEdit &, void *);
438 template bool EditRealInput<24>(IoStatementState &, const DataEdit &, void *);
439 template bool EditRealInput<53>(IoStatementState &, const DataEdit &, void *);
440 template bool EditRealInput<64>(IoStatementState &, const DataEdit &, void *);
441 template bool EditRealInput<113>(IoStatementState &, const DataEdit &, void *);
442 } // namespace Fortran::runtime::io
443