1 //===-- lib/Parser/token-sequence.cpp -------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "token-sequence.h" 10 #include "flang/Parser/characters.h" 11 #include "flang/Parser/message.h" 12 #include "llvm/Support/raw_ostream.h" 13 14 namespace Fortran::parser { 15 16 TokenSequence &TokenSequence::operator=(TokenSequence &&that) { 17 clear(); 18 swap(that); 19 return *this; 20 } 21 22 void TokenSequence::clear() { 23 start_.clear(); 24 nextStart_ = 0; 25 char_.clear(); 26 provenances_.clear(); 27 } 28 29 void TokenSequence::pop_back() { 30 std::size_t bytes{nextStart_ - start_.back()}; 31 nextStart_ = start_.back(); 32 start_.pop_back(); 33 char_.resize(nextStart_); 34 provenances_.RemoveLastBytes(bytes); 35 } 36 37 void TokenSequence::shrink_to_fit() { 38 start_.shrink_to_fit(); 39 char_.shrink_to_fit(); 40 provenances_.shrink_to_fit(); 41 } 42 43 void TokenSequence::swap(TokenSequence &that) { 44 start_.swap(that.start_); 45 std::swap(nextStart_, that.nextStart_); 46 char_.swap(that.char_); 47 provenances_.swap(that.provenances_); 48 } 49 50 std::size_t TokenSequence::SkipBlanks(std::size_t at) const { 51 std::size_t tokens{start_.size()}; 52 for (; at < tokens; ++at) { 53 if (!TokenAt(at).IsBlank()) { 54 return at; 55 } 56 } 57 return tokens; // even if at > tokens 58 } 59 60 // C-style /*comments*/ are removed from preprocessing directive 61 // token sequences by the prescanner, but not C++ or Fortran 62 // free-form line-ending comments (//... and !...) because 63 // ignoring them is directive-specific. 64 bool TokenSequence::IsAnythingLeft(std::size_t at) const { 65 std::size_t tokens{start_.size()}; 66 for (; at < tokens; ++at) { 67 auto tok{TokenAt(at)}; 68 const char *end{tok.end()}; 69 for (const char *p{tok.begin()}; p < end; ++p) { 70 switch (*p) { 71 case '/': 72 return p + 1 >= end || p[1] != '/'; 73 case '!': 74 return false; 75 case ' ': 76 break; 77 default: 78 return true; 79 } 80 } 81 } 82 return false; 83 } 84 85 void TokenSequence::RemoveLastToken() { 86 CHECK(!start_.empty()); 87 CHECK(nextStart_ > start_.back()); 88 std::size_t bytes{nextStart_ - start_.back()}; 89 nextStart_ = start_.back(); 90 start_.pop_back(); 91 char_.erase(char_.begin() + nextStart_, char_.end()); 92 provenances_.RemoveLastBytes(bytes); 93 } 94 95 void TokenSequence::Put(const TokenSequence &that) { 96 if (nextStart_ < char_.size()) { 97 start_.push_back(nextStart_); 98 } 99 int offset = char_.size(); 100 for (int st : that.start_) { 101 start_.push_back(st + offset); 102 } 103 char_.insert(char_.end(), that.char_.begin(), that.char_.end()); 104 nextStart_ = char_.size(); 105 provenances_.Put(that.provenances_); 106 } 107 108 void TokenSequence::Put(const TokenSequence &that, ProvenanceRange range) { 109 std::size_t offset{0}; 110 std::size_t tokens{that.SizeInTokens()}; 111 for (std::size_t j{0}; j < tokens; ++j) { 112 CharBlock tok{that.TokenAt(j)}; 113 Put(tok, range.OffsetMember(offset)); 114 offset += tok.size(); 115 } 116 CHECK(offset == range.size()); 117 } 118 119 void TokenSequence::Put( 120 const TokenSequence &that, std::size_t at, std::size_t tokens) { 121 ProvenanceRange provenance; 122 std::size_t offset{0}; 123 for (; tokens-- > 0; ++at) { 124 CharBlock tok{that.TokenAt(at)}; 125 std::size_t tokBytes{tok.size()}; 126 for (std::size_t j{0}; j < tokBytes; ++j) { 127 if (offset == provenance.size()) { 128 provenance = that.provenances_.Map(that.start_[at] + j); 129 offset = 0; 130 } 131 PutNextTokenChar(tok[j], provenance.OffsetMember(offset++)); 132 } 133 CloseToken(); 134 } 135 } 136 137 void TokenSequence::Put( 138 const char *s, std::size_t bytes, Provenance provenance) { 139 for (std::size_t j{0}; j < bytes; ++j) { 140 PutNextTokenChar(s[j], provenance + j); 141 } 142 CloseToken(); 143 } 144 145 void TokenSequence::Put(const CharBlock &t, Provenance provenance) { 146 Put(&t[0], t.size(), provenance); 147 } 148 149 void TokenSequence::Put(const std::string &s, Provenance provenance) { 150 Put(s.data(), s.size(), provenance); 151 } 152 153 void TokenSequence::Put(llvm::raw_string_ostream &ss, Provenance provenance) { 154 Put(ss.str(), provenance); 155 } 156 157 TokenSequence &TokenSequence::ToLowerCase() { 158 std::size_t tokens{start_.size()}; 159 std::size_t chars{char_.size()}; 160 std::size_t atToken{0}; 161 for (std::size_t j{0}; j < chars;) { 162 std::size_t nextStart{atToken + 1 < tokens ? start_[++atToken] : chars}; 163 char *p{&char_[j]}, *limit{&char_[nextStart]}; 164 j = nextStart; 165 if (IsDecimalDigit(*p)) { 166 while (p < limit && IsDecimalDigit(*p)) { 167 ++p; 168 } 169 if (p >= limit) { 170 } else if (*p == 'h' || *p == 'H') { 171 // Hollerith 172 *p = 'h'; 173 } else if (*p == '_') { 174 // kind-prefixed character literal (e.g., 1_"ABC") 175 } else { 176 // exponent 177 for (; p < limit; ++p) { 178 *p = ToLowerCaseLetter(*p); 179 } 180 } 181 } else if (limit[-1] == '\'' || limit[-1] == '"') { 182 if (*p == limit[-1]) { 183 // Character literal without prefix 184 } else if (p[1] == limit[-1]) { 185 // BOZX-prefixed constant 186 for (; p < limit; ++p) { 187 *p = ToLowerCaseLetter(*p); 188 } 189 } else { 190 // Literal with kind-param prefix name (e.g., K_"ABC"). 191 for (; *p != limit[-1]; ++p) { 192 *p = ToLowerCaseLetter(*p); 193 } 194 } 195 } else { 196 for (; p < limit; ++p) { 197 *p = ToLowerCaseLetter(*p); 198 } 199 } 200 } 201 return *this; 202 } 203 204 bool TokenSequence::HasBlanks(std::size_t firstChar) const { 205 std::size_t tokens{SizeInTokens()}; 206 for (std::size_t j{0}; j < tokens; ++j) { 207 if (start_[j] >= firstChar && TokenAt(j).IsBlank()) { 208 return true; 209 } 210 } 211 return false; 212 } 213 214 bool TokenSequence::HasRedundantBlanks(std::size_t firstChar) const { 215 std::size_t tokens{SizeInTokens()}; 216 bool lastWasBlank{false}; 217 for (std::size_t j{0}; j < tokens; ++j) { 218 bool isBlank{TokenAt(j).IsBlank()}; 219 if (isBlank && lastWasBlank && start_[j] >= firstChar) { 220 return true; 221 } 222 lastWasBlank = isBlank; 223 } 224 return false; 225 } 226 227 TokenSequence &TokenSequence::RemoveBlanks(std::size_t firstChar) { 228 std::size_t tokens{SizeInTokens()}; 229 TokenSequence result; 230 for (std::size_t j{0}; j < tokens; ++j) { 231 if (!TokenAt(j).IsBlank() || start_[j] < firstChar) { 232 result.Put(*this, j); 233 } 234 } 235 swap(result); 236 return *this; 237 } 238 239 TokenSequence &TokenSequence::RemoveRedundantBlanks(std::size_t firstChar) { 240 std::size_t tokens{SizeInTokens()}; 241 TokenSequence result; 242 bool lastWasBlank{false}; 243 for (std::size_t j{0}; j < tokens; ++j) { 244 bool isBlank{TokenAt(j).IsBlank()}; 245 if (!isBlank || !lastWasBlank || start_[j] < firstChar) { 246 result.Put(*this, j); 247 } 248 lastWasBlank = isBlank; 249 } 250 swap(result); 251 return *this; 252 } 253 254 TokenSequence &TokenSequence::ClipComment(bool skipFirst) { 255 std::size_t tokens{SizeInTokens()}; 256 for (std::size_t j{0}; j < tokens; ++j) { 257 if (TokenAt(j).FirstNonBlank() == '!') { 258 if (skipFirst) { 259 skipFirst = false; 260 } else { 261 TokenSequence result; 262 if (j > 0) { 263 result.Put(*this, 0, j - 1); 264 } 265 swap(result); 266 return *this; 267 } 268 } 269 } 270 return *this; 271 } 272 273 void TokenSequence::Emit(CookedSource &cooked) const { 274 cooked.Put(&char_[0], char_.size()); 275 cooked.PutProvenanceMappings(provenances_); 276 } 277 278 void TokenSequence::Dump(llvm::raw_ostream &o) const { 279 o << "TokenSequence has " << char_.size() << " chars; nextStart_ " 280 << nextStart_ << '\n'; 281 for (std::size_t j{0}; j < start_.size(); ++j) { 282 o << '[' << j << "] @ " << start_[j] << " '" << TokenAt(j).ToString() 283 << "'\n"; 284 } 285 } 286 287 Provenance TokenSequence::GetTokenProvenance( 288 std::size_t token, std::size_t offset) const { 289 ProvenanceRange range{provenances_.Map(start_[token] + offset)}; 290 return range.start(); 291 } 292 293 ProvenanceRange TokenSequence::GetTokenProvenanceRange( 294 std::size_t token, std::size_t offset) const { 295 ProvenanceRange range{provenances_.Map(start_[token] + offset)}; 296 return range.Prefix(TokenBytes(token) - offset); 297 } 298 299 ProvenanceRange TokenSequence::GetIntervalProvenanceRange( 300 std::size_t token, std::size_t tokens) const { 301 if (tokens == 0) { 302 return {}; 303 } 304 ProvenanceRange range{provenances_.Map(start_[token])}; 305 while (--tokens > 0 && 306 range.AnnexIfPredecessor(provenances_.Map(start_[++token]))) { 307 } 308 return range; 309 } 310 311 ProvenanceRange TokenSequence::GetProvenanceRange() const { 312 return GetIntervalProvenanceRange(0, start_.size()); 313 } 314 315 const TokenSequence &TokenSequence::CheckBadFortranCharacters( 316 Messages &messages) const { 317 std::size_t tokens{SizeInTokens()}; 318 for (std::size_t j{0}; j < tokens; ++j) { 319 CharBlock token{TokenAt(j)}; 320 char ch{token.FirstNonBlank()}; 321 if (ch != ' ' && !IsValidFortranTokenCharacter(ch)) { 322 if (ch == '!' && j == 0) { 323 // allow in !dir$ 324 } else if (ch < ' ' || ch >= '\x7f') { 325 messages.Say(GetTokenProvenanceRange(j), 326 "bad character (0x%02x) in Fortran token"_err_en_US, ch & 0xff); 327 } else { 328 messages.Say(GetTokenProvenanceRange(j), 329 "bad character ('%c') in Fortran token"_err_en_US, ch); 330 } 331 } 332 } 333 return *this; 334 } 335 } // namespace Fortran::parser 336