xref: /llvm-project/flang/include/flang/Parser/char-set.h (revision b98ad941a40c96c841bceb171725c925500fce6c)
1 //===-- include/flang/Parser/char-set.h -------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef FORTRAN_PARSER_CHAR_SET_H_
10 #define FORTRAN_PARSER_CHAR_SET_H_
11 
12 // Sets of distinct characters that are valid in Fortran programs outside
13 // character literals are encoded as 64-bit integers by mapping them to a 6-bit
14 // character set encoding in which the case of letters is lost (even if
15 // mixed case input reached the parser, which it does not).  These sets
16 // need to be suitable for constexprs, so std::bitset<> was not eligible.
17 
18 #include <cinttypes>
19 #include <string>
20 
21 namespace Fortran::parser {
22 
23 struct SetOfChars {
SetOfCharsSetOfChars24   constexpr SetOfChars() {}
25 
SetOfCharsSetOfChars26   constexpr SetOfChars(char c) {
27     // This is basically the old DECSIX encoding, which maps the
28     // 7-bit ASCII codes [32..95] to [0..63].  Only '#', '&', '?', '\', and '^'
29     // in that range are unused in Fortran after preprocessing outside
30     // character literals.  We repurpose '^' and '?' for newline and unknown
31     // characters (resp.), leaving the others alone in case this code might
32     // be useful in preprocssing.
33     if (c == '\n') {
34       // map newline to '^'
35       c = '^';
36     } else if (c < 32 || c >= 127) {
37       // map other control characters, DEL, and 8-bit characters to '?'
38       c = '?';
39     } else if (c >= 96) {
40       // map lower-case letters to upper-case
41       c -= 32;
42     }
43     // range is now [32..95]; reduce to [0..63] and use as a shift count
44     bits_ = static_cast<std::uint64_t>(1) << (c - 32);
45   }
46 
SetOfCharsSetOfChars47   constexpr SetOfChars(const char str[], std::size_t n) {
48     for (std::size_t j{0}; j < n; ++j) {
49       bits_ |= SetOfChars{str[j]}.bits_;
50     }
51   }
52 
53   constexpr SetOfChars(const SetOfChars &) = default;
54   constexpr SetOfChars(SetOfChars &&) = default;
55   constexpr SetOfChars &operator=(const SetOfChars &) = default;
56   constexpr SetOfChars &operator=(SetOfChars &&) = default;
emptySetOfChars57   constexpr bool empty() const { return bits_ == 0; }
58 
HasSetOfChars59   constexpr bool Has(SetOfChars that) const {
60     return (that.bits_ & ~bits_) == 0;
61   }
UnionSetOfChars62   constexpr SetOfChars Union(SetOfChars that) const {
63     return SetOfChars{bits_ | that.bits_};
64   }
IntersectionSetOfChars65   constexpr SetOfChars Intersection(SetOfChars that) const {
66     return SetOfChars{bits_ & that.bits_};
67   }
DifferenceSetOfChars68   constexpr SetOfChars Difference(SetOfChars that) const {
69     return SetOfChars{bits_ & ~that.bits_};
70   }
71 
72   std::string ToString() const;
73 
74 private:
SetOfCharsSetOfChars75   constexpr SetOfChars(std::uint64_t b) : bits_{b} {}
76   std::uint64_t bits_{0};
77 };
78 } // namespace Fortran::parser
79 #endif // FORTRAN_PARSER_CHAR_SET_H_
80