xref: /llvm-project/flang/include/flang/Parser/token-sequence.h (revision 850d42fb145c636a3b56a7616c3e3c5c188c1916)
1 //===-- lib/Parser/token-sequence.h -----------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef FORTRAN_PARSER_TOKEN_SEQUENCE_H_
10 #define FORTRAN_PARSER_TOKEN_SEQUENCE_H_
11 
12 // A buffer class capable of holding a contiguous sequence of characters
13 // and a partitioning thereof into preprocessing tokens, along with their
14 // associated provenances.
15 
16 #include "flang/Parser/char-block.h"
17 #include "flang/Parser/provenance.h"
18 #include <cstddef>
19 #include <cstring>
20 #include <string>
21 #include <utility>
22 #include <vector>
23 
24 namespace llvm {
25 class raw_ostream;
26 }
27 
28 namespace Fortran::parser {
29 
30 class Messages;
31 class Prescanner;
32 
33 // Buffers a contiguous sequence of characters that has been partitioned into
34 // a sequence of preprocessing tokens with provenances.
35 class TokenSequence {
36 public:
37   TokenSequence() {}
38   TokenSequence(const TokenSequence &that) { Put(that); }
39   TokenSequence(
40       const TokenSequence &that, std::size_t at, std::size_t count = 1) {
41     Put(that, at, count);
42   }
43   TokenSequence(TokenSequence &&that)
44       : start_{std::move(that.start_)}, nextStart_{that.nextStart_},
45         char_{std::move(that.char_)},
46         provenances_{std::move(that.provenances_)} {}
47   TokenSequence(const std::string &s, Provenance p) { Put(s, p); }
48 
49   TokenSequence &operator=(const TokenSequence &that) {
50     clear();
51     Put(that);
52     return *this;
53   }
54   TokenSequence &operator=(TokenSequence &&that);
55   bool empty() const { return start_.empty(); }
56   void clear();
57   void pop_back();
58   void shrink_to_fit();
59   void swap(TokenSequence &);
60 
61   std::size_t SizeInTokens() const { return start_.size(); }
62   std::size_t SizeInChars() const { return char_.size(); }
63 
64   CharBlock ToCharBlock() const {
65     return char_.empty() ? CharBlock{} : CharBlock{&char_[0], char_.size()};
66   }
67   std::string ToString() const { return ToCharBlock().ToString(); }
68 
69   CharBlock TokenAt(std::size_t token) const {
70     if (auto bytes{TokenBytes(token)}) {
71       return {&char_[start_.at(token)], bytes};
72     } else { // char_ could be empty
73       return {};
74     }
75   }
76   char CharAt(std::size_t j) const { return char_.at(j); }
77   CharBlock CurrentOpenToken() const {
78     return {&char_[nextStart_], char_.size() - nextStart_};
79   }
80 
81   std::size_t SkipBlanks(std::size_t) const;
82   std::optional<std::size_t> SkipBlanksBackwards(std::size_t) const;
83 
84   // True if anything remains in the sequence at & after the given offset
85   // except blanks and line-ending C++ and Fortran free-form comments.
86   bool IsAnythingLeft(std::size_t) const;
87 
88   void PutNextTokenChar(char ch, Provenance provenance) {
89     char_.emplace_back(ch);
90     provenances_.Put({provenance, 1});
91   }
92 
93   void CloseToken() {
94     start_.emplace_back(nextStart_);
95     nextStart_ = char_.size();
96   }
97 
98   void ReopenLastToken() {
99     nextStart_ = start_.back();
100     start_.pop_back();
101   }
102 
103   void Put(const TokenSequence &);
104   void Put(const TokenSequence &, ProvenanceRange);
105   void Put(const TokenSequence &, std::size_t at, std::size_t tokens = 1);
106   void Put(const char *, std::size_t, Provenance);
107   void Put(const CharBlock &, Provenance);
108   void Put(const std::string &, Provenance);
109   void Put(llvm::raw_string_ostream &, Provenance);
110 
111   Provenance GetCharProvenance(std::size_t) const;
112   Provenance GetTokenProvenance(
113       std::size_t token, std::size_t offset = 0) const;
114   ProvenanceRange GetTokenProvenanceRange(
115       std::size_t token, std::size_t offset = 0) const;
116   ProvenanceRange GetIntervalProvenanceRange(
117       std::size_t token, std::size_t tokens = 1) const;
118   ProvenanceRange GetProvenanceRange() const;
119 
120   char *GetMutableCharData() { return &char_[0]; }
121   TokenSequence &ToLowerCase();
122   bool HasBlanks(std::size_t firstChar = 0) const;
123   bool HasRedundantBlanks(std::size_t firstChar = 0) const;
124   TokenSequence &RemoveBlanks(std::size_t firstChar = 0);
125   TokenSequence &RemoveRedundantBlanks(std::size_t firstChar = 0);
126   TokenSequence &ClipComment(const Prescanner &, bool skipFirst = false);
127   const TokenSequence &CheckBadFortranCharacters(
128       Messages &, const Prescanner &, bool allowAmpersand) const;
129   bool BadlyNestedParentheses() const;
130   const TokenSequence &CheckBadParentheses(Messages &) const;
131   void Emit(CookedSource &) const;
132   llvm::raw_ostream &Dump(llvm::raw_ostream &) const;
133 
134 private:
135   std::size_t TokenBytes(std::size_t token) const {
136     return (token + 1 >= start_.size() ? char_.size() : start_[token + 1]) -
137         start_[token];
138   }
139 
140   std::vector<std::size_t> start_;
141   std::size_t nextStart_{0};
142   std::vector<char> char_;
143   OffsetToProvenanceMappings provenances_;
144 };
145 } // namespace Fortran::parser
146 #endif // FORTRAN_PARSER_TOKEN_SEQUENCE_H_
147