1 //===-- include/flang/Parser/char-set.h -------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef FORTRAN_PARSER_CHAR_SET_H_ 10 #define FORTRAN_PARSER_CHAR_SET_H_ 11 12 // Sets of distinct characters that are valid in Fortran programs outside 13 // character literals are encoded as 64-bit integers by mapping them to a 6-bit 14 // character set encoding in which the case of letters is lost (even if 15 // mixed case input reached the parser, which it does not). These sets 16 // need to be suitable for constexprs, so std::bitset<> was not eligible. 17 18 #include <cinttypes> 19 #include <string> 20 21 namespace Fortran::parser { 22 23 struct SetOfChars { SetOfCharsSetOfChars24 constexpr SetOfChars() {} 25 SetOfCharsSetOfChars26 constexpr SetOfChars(char c) { 27 // This is basically the old DECSIX encoding, which maps the 28 // 7-bit ASCII codes [32..95] to [0..63]. Only '#', '&', '?', '\', and '^' 29 // in that range are unused in Fortran after preprocessing outside 30 // character literals. We repurpose '^' and '?' for newline and unknown 31 // characters (resp.), leaving the others alone in case this code might 32 // be useful in preprocssing. 33 if (c == '\n') { 34 // map newline to '^' 35 c = '^'; 36 } else if (c < 32 || c >= 127) { 37 // map other control characters, DEL, and 8-bit characters to '?' 38 c = '?'; 39 } else if (c >= 96) { 40 // map lower-case letters to upper-case 41 c -= 32; 42 } 43 // range is now [32..95]; reduce to [0..63] and use as a shift count 44 bits_ = static_cast<std::uint64_t>(1) << (c - 32); 45 } 46 SetOfCharsSetOfChars47 constexpr SetOfChars(const char str[], std::size_t n) { 48 for (std::size_t j{0}; j < n; ++j) { 49 bits_ |= SetOfChars{str[j]}.bits_; 50 } 51 } 52 53 constexpr SetOfChars(const SetOfChars &) = default; 54 constexpr SetOfChars(SetOfChars &&) = default; 55 constexpr SetOfChars &operator=(const SetOfChars &) = default; 56 constexpr SetOfChars &operator=(SetOfChars &&) = default; emptySetOfChars57 constexpr bool empty() const { return bits_ == 0; } 58 HasSetOfChars59 constexpr bool Has(SetOfChars that) const { 60 return (that.bits_ & ~bits_) == 0; 61 } UnionSetOfChars62 constexpr SetOfChars Union(SetOfChars that) const { 63 return SetOfChars{bits_ | that.bits_}; 64 } IntersectionSetOfChars65 constexpr SetOfChars Intersection(SetOfChars that) const { 66 return SetOfChars{bits_ & that.bits_}; 67 } DifferenceSetOfChars68 constexpr SetOfChars Difference(SetOfChars that) const { 69 return SetOfChars{bits_ & ~that.bits_}; 70 } 71 72 std::string ToString() const; 73 74 private: SetOfCharsSetOfChars75 constexpr SetOfChars(std::uint64_t b) : bits_{b} {} 76 std::uint64_t bits_{0}; 77 }; 78 } // namespace Fortran::parser 79 #endif // FORTRAN_PARSER_CHAR_SET_H_ 80