xref: /llvm-project/flang/lib/Parser/token-sequence.cpp (revision a0a1a4e5c83db53c806c56011a8741b31ab598a4)
1 //===-- lib/Parser/token-sequence.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "token-sequence.h"
10 #include "flang/Parser/characters.h"
11 #include "flang/Parser/message.h"
12 #include "llvm/Support/raw_ostream.h"
13 
14 namespace Fortran::parser {
15 
16 TokenSequence &TokenSequence::operator=(TokenSequence &&that) {
17   clear();
18   swap(that);
19   return *this;
20 }
21 
22 void TokenSequence::clear() {
23   start_.clear();
24   nextStart_ = 0;
25   char_.clear();
26   provenances_.clear();
27 }
28 
29 void TokenSequence::pop_back() {
30   std::size_t bytes{nextStart_ - start_.back()};
31   nextStart_ = start_.back();
32   start_.pop_back();
33   char_.resize(nextStart_);
34   provenances_.RemoveLastBytes(bytes);
35 }
36 
37 void TokenSequence::shrink_to_fit() {
38   start_.shrink_to_fit();
39   char_.shrink_to_fit();
40   provenances_.shrink_to_fit();
41 }
42 
43 void TokenSequence::swap(TokenSequence &that) {
44   start_.swap(that.start_);
45   std::swap(nextStart_, that.nextStart_);
46   char_.swap(that.char_);
47   provenances_.swap(that.provenances_);
48 }
49 
50 std::size_t TokenSequence::SkipBlanks(std::size_t at) const {
51   std::size_t tokens{start_.size()};
52   for (; at < tokens; ++at) {
53     if (!TokenAt(at).IsBlank()) {
54       return at;
55     }
56   }
57   return tokens; // even if at > tokens
58 }
59 
60 // C-style /*comments*/ are removed from preprocessing directive
61 // token sequences by the prescanner, but not C++ or Fortran
62 // free-form line-ending comments (//...  and !...) because
63 // ignoring them is directive-specific.
64 bool TokenSequence::IsAnythingLeft(std::size_t at) const {
65   std::size_t tokens{start_.size()};
66   for (; at < tokens; ++at) {
67     auto tok{TokenAt(at)};
68     const char *end{tok.end()};
69     for (const char *p{tok.begin()}; p < end; ++p) {
70       switch (*p) {
71       case '/':
72         return p + 1 >= end || p[1] != '/';
73       case '!':
74         return false;
75       case ' ':
76         break;
77       default:
78         return true;
79       }
80     }
81   }
82   return false;
83 }
84 
85 void TokenSequence::RemoveLastToken() {
86   CHECK(!start_.empty());
87   CHECK(nextStart_ > start_.back());
88   std::size_t bytes{nextStart_ - start_.back()};
89   nextStart_ = start_.back();
90   start_.pop_back();
91   char_.erase(char_.begin() + nextStart_, char_.end());
92   provenances_.RemoveLastBytes(bytes);
93 }
94 
95 void TokenSequence::Put(const TokenSequence &that) {
96   if (nextStart_ < char_.size()) {
97     start_.push_back(nextStart_);
98   }
99   int offset = char_.size();
100   for (int st : that.start_) {
101     start_.push_back(st + offset);
102   }
103   char_.insert(char_.end(), that.char_.begin(), that.char_.end());
104   nextStart_ = char_.size();
105   provenances_.Put(that.provenances_);
106 }
107 
108 void TokenSequence::Put(const TokenSequence &that, ProvenanceRange range) {
109   std::size_t offset{0};
110   std::size_t tokens{that.SizeInTokens()};
111   for (std::size_t j{0}; j < tokens; ++j) {
112     CharBlock tok{that.TokenAt(j)};
113     Put(tok, range.OffsetMember(offset));
114     offset += tok.size();
115   }
116   CHECK(offset == range.size());
117 }
118 
119 void TokenSequence::Put(
120     const TokenSequence &that, std::size_t at, std::size_t tokens) {
121   ProvenanceRange provenance;
122   std::size_t offset{0};
123   for (; tokens-- > 0; ++at) {
124     CharBlock tok{that.TokenAt(at)};
125     std::size_t tokBytes{tok.size()};
126     for (std::size_t j{0}; j < tokBytes; ++j) {
127       if (offset == provenance.size()) {
128         provenance = that.provenances_.Map(that.start_[at] + j);
129         offset = 0;
130       }
131       PutNextTokenChar(tok[j], provenance.OffsetMember(offset++));
132     }
133     CloseToken();
134   }
135 }
136 
137 void TokenSequence::Put(
138     const char *s, std::size_t bytes, Provenance provenance) {
139   for (std::size_t j{0}; j < bytes; ++j) {
140     PutNextTokenChar(s[j], provenance + j);
141   }
142   CloseToken();
143 }
144 
145 void TokenSequence::Put(const CharBlock &t, Provenance provenance) {
146   Put(&t[0], t.size(), provenance);
147 }
148 
149 void TokenSequence::Put(const std::string &s, Provenance provenance) {
150   Put(s.data(), s.size(), provenance);
151 }
152 
153 void TokenSequence::Put(llvm::raw_string_ostream &ss, Provenance provenance) {
154   Put(ss.str(), provenance);
155 }
156 
157 TokenSequence &TokenSequence::ToLowerCase() {
158   std::size_t tokens{start_.size()};
159   std::size_t chars{char_.size()};
160   std::size_t atToken{0};
161   for (std::size_t j{0}; j < chars;) {
162     std::size_t nextStart{atToken + 1 < tokens ? start_[++atToken] : chars};
163     char *p{&char_[j]}, *limit{&char_[nextStart]};
164     j = nextStart;
165     if (IsDecimalDigit(*p)) {
166       while (p < limit && IsDecimalDigit(*p)) {
167         ++p;
168       }
169       if (p >= limit) {
170       } else if (*p == 'h' || *p == 'H') {
171         // Hollerith
172         *p = 'h';
173       } else if (*p == '_') {
174         // kind-prefixed character literal (e.g., 1_"ABC")
175       } else {
176         // exponent
177         for (; p < limit; ++p) {
178           *p = ToLowerCaseLetter(*p);
179         }
180       }
181     } else if (limit[-1] == '\'' || limit[-1] == '"') {
182       if (*p == limit[-1]) {
183         // Character literal without prefix
184       } else if (p[1] == limit[-1]) {
185         // BOZX-prefixed constant
186         for (; p < limit; ++p) {
187           *p = ToLowerCaseLetter(*p);
188         }
189       } else {
190         // Literal with kind-param prefix name (e.g., K_"ABC").
191         for (; *p != limit[-1]; ++p) {
192           *p = ToLowerCaseLetter(*p);
193         }
194       }
195     } else {
196       for (; p < limit; ++p) {
197         *p = ToLowerCaseLetter(*p);
198       }
199     }
200   }
201   return *this;
202 }
203 
204 bool TokenSequence::HasBlanks(std::size_t firstChar) const {
205   std::size_t tokens{SizeInTokens()};
206   for (std::size_t j{0}; j < tokens; ++j) {
207     if (start_[j] >= firstChar && TokenAt(j).IsBlank()) {
208       return true;
209     }
210   }
211   return false;
212 }
213 
214 bool TokenSequence::HasRedundantBlanks(std::size_t firstChar) const {
215   std::size_t tokens{SizeInTokens()};
216   bool lastWasBlank{false};
217   for (std::size_t j{0}; j < tokens; ++j) {
218     bool isBlank{TokenAt(j).IsBlank()};
219     if (isBlank && lastWasBlank && start_[j] >= firstChar) {
220       return true;
221     }
222     lastWasBlank = isBlank;
223   }
224   return false;
225 }
226 
227 TokenSequence &TokenSequence::RemoveBlanks(std::size_t firstChar) {
228   std::size_t tokens{SizeInTokens()};
229   TokenSequence result;
230   for (std::size_t j{0}; j < tokens; ++j) {
231     if (!TokenAt(j).IsBlank() || start_[j] < firstChar) {
232       result.Put(*this, j);
233     }
234   }
235   swap(result);
236   return *this;
237 }
238 
239 TokenSequence &TokenSequence::RemoveRedundantBlanks(std::size_t firstChar) {
240   std::size_t tokens{SizeInTokens()};
241   TokenSequence result;
242   bool lastWasBlank{false};
243   for (std::size_t j{0}; j < tokens; ++j) {
244     bool isBlank{TokenAt(j).IsBlank()};
245     if (!isBlank || !lastWasBlank || start_[j] < firstChar) {
246       result.Put(*this, j);
247     }
248     lastWasBlank = isBlank;
249   }
250   swap(result);
251   return *this;
252 }
253 
254 TokenSequence &TokenSequence::ClipComment(bool skipFirst) {
255   std::size_t tokens{SizeInTokens()};
256   for (std::size_t j{0}; j < tokens; ++j) {
257     if (TokenAt(j).FirstNonBlank() == '!') {
258       if (skipFirst) {
259         skipFirst = false;
260       } else {
261         TokenSequence result;
262         if (j > 0) {
263           result.Put(*this, 0, j - 1);
264         }
265         swap(result);
266         return *this;
267       }
268     }
269   }
270   return *this;
271 }
272 
273 void TokenSequence::Emit(CookedSource &cooked) const {
274   cooked.Put(&char_[0], char_.size());
275   cooked.PutProvenanceMappings(provenances_);
276 }
277 
278 void TokenSequence::Dump(llvm::raw_ostream &o) const {
279   o << "TokenSequence has " << char_.size() << " chars; nextStart_ "
280     << nextStart_ << '\n';
281   for (std::size_t j{0}; j < start_.size(); ++j) {
282     o << '[' << j << "] @ " << start_[j] << " '" << TokenAt(j).ToString()
283       << "'\n";
284   }
285 }
286 
287 Provenance TokenSequence::GetTokenProvenance(
288     std::size_t token, std::size_t offset) const {
289   ProvenanceRange range{provenances_.Map(start_[token] + offset)};
290   return range.start();
291 }
292 
293 ProvenanceRange TokenSequence::GetTokenProvenanceRange(
294     std::size_t token, std::size_t offset) const {
295   ProvenanceRange range{provenances_.Map(start_[token] + offset)};
296   return range.Prefix(TokenBytes(token) - offset);
297 }
298 
299 ProvenanceRange TokenSequence::GetIntervalProvenanceRange(
300     std::size_t token, std::size_t tokens) const {
301   if (tokens == 0) {
302     return {};
303   }
304   ProvenanceRange range{provenances_.Map(start_[token])};
305   while (--tokens > 0 &&
306       range.AnnexIfPredecessor(provenances_.Map(start_[++token]))) {
307   }
308   return range;
309 }
310 
311 ProvenanceRange TokenSequence::GetProvenanceRange() const {
312   return GetIntervalProvenanceRange(0, start_.size());
313 }
314 
315 const TokenSequence &TokenSequence::CheckBadFortranCharacters(
316     Messages &messages) const {
317   std::size_t tokens{SizeInTokens()};
318   for (std::size_t j{0}; j < tokens; ++j) {
319     CharBlock token{TokenAt(j)};
320     char ch{token.FirstNonBlank()};
321     if (ch != ' ' && !IsValidFortranTokenCharacter(ch)) {
322       if (ch == '!' && j == 0) {
323         // allow in !dir$
324       } else if (ch < ' ' || ch >= '\x7f') {
325         messages.Say(GetTokenProvenanceRange(j),
326             "bad character (0x%02x) in Fortran token"_err_en_US, ch & 0xff);
327       } else {
328         messages.Say(GetTokenProvenanceRange(j),
329             "bad character ('%c') in Fortran token"_err_en_US, ch);
330       }
331     }
332   }
333   return *this;
334 }
335 } // namespace Fortran::parser
336