1 //===-- lib/Parser/token-sequence.cpp -------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "token-sequence.h" 10 #include "flang/Parser/characters.h" 11 #include "flang/Parser/message.h" 12 #include "llvm/Support/raw_ostream.h" 13 14 namespace Fortran::parser { 15 16 TokenSequence &TokenSequence::operator=(TokenSequence &&that) { 17 clear(); 18 swap(that); 19 return *this; 20 } 21 22 void TokenSequence::clear() { 23 start_.clear(); 24 nextStart_ = 0; 25 char_.clear(); 26 provenances_.clear(); 27 } 28 29 void TokenSequence::pop_back() { 30 CHECK(!start_.empty()); 31 CHECK(nextStart_ > start_.back()); 32 std::size_t bytes{nextStart_ - start_.back()}; 33 nextStart_ = start_.back(); 34 start_.pop_back(); 35 char_.resize(nextStart_); 36 provenances_.RemoveLastBytes(bytes); 37 } 38 39 void TokenSequence::shrink_to_fit() { 40 start_.shrink_to_fit(); 41 char_.shrink_to_fit(); 42 provenances_.shrink_to_fit(); 43 } 44 45 void TokenSequence::swap(TokenSequence &that) { 46 start_.swap(that.start_); 47 std::swap(nextStart_, that.nextStart_); 48 char_.swap(that.char_); 49 provenances_.swap(that.provenances_); 50 } 51 52 std::size_t TokenSequence::SkipBlanks(std::size_t at) const { 53 std::size_t tokens{start_.size()}; 54 for (; at < tokens; ++at) { 55 if (!TokenAt(at).IsBlank()) { 56 return at; 57 } 58 } 59 return tokens; // even if at > tokens 60 } 61 62 // C-style /*comments*/ are removed from preprocessing directive 63 // token sequences by the prescanner, but not C++ or Fortran 64 // free-form line-ending comments (//... and !...) because 65 // ignoring them is directive-specific. 66 bool TokenSequence::IsAnythingLeft(std::size_t at) const { 67 std::size_t tokens{start_.size()}; 68 for (; at < tokens; ++at) { 69 auto tok{TokenAt(at)}; 70 const char *end{tok.end()}; 71 for (const char *p{tok.begin()}; p < end; ++p) { 72 switch (*p) { 73 case '/': 74 return p + 1 >= end || p[1] != '/'; 75 case '!': 76 return false; 77 case ' ': 78 break; 79 default: 80 return true; 81 } 82 } 83 } 84 return false; 85 } 86 87 void TokenSequence::Put(const TokenSequence &that) { 88 if (nextStart_ < char_.size()) { 89 start_.push_back(nextStart_); 90 } 91 int offset = char_.size(); 92 for (int st : that.start_) { 93 start_.push_back(st + offset); 94 } 95 char_.insert(char_.end(), that.char_.begin(), that.char_.end()); 96 nextStart_ = char_.size(); 97 provenances_.Put(that.provenances_); 98 } 99 100 void TokenSequence::Put(const TokenSequence &that, ProvenanceRange range) { 101 std::size_t offset{0}; 102 std::size_t tokens{that.SizeInTokens()}; 103 for (std::size_t j{0}; j < tokens; ++j) { 104 CharBlock tok{that.TokenAt(j)}; 105 Put(tok, range.OffsetMember(offset)); 106 offset += tok.size(); 107 } 108 CHECK(offset == range.size()); 109 } 110 111 void TokenSequence::Put( 112 const TokenSequence &that, std::size_t at, std::size_t tokens) { 113 ProvenanceRange provenance; 114 std::size_t offset{0}; 115 for (; tokens-- > 0; ++at) { 116 CharBlock tok{that.TokenAt(at)}; 117 std::size_t tokBytes{tok.size()}; 118 for (std::size_t j{0}; j < tokBytes; ++j) { 119 if (offset == provenance.size()) { 120 provenance = that.provenances_.Map(that.start_[at] + j); 121 offset = 0; 122 } 123 PutNextTokenChar(tok[j], provenance.OffsetMember(offset++)); 124 } 125 CloseToken(); 126 } 127 } 128 129 void TokenSequence::Put( 130 const char *s, std::size_t bytes, Provenance provenance) { 131 for (std::size_t j{0}; j < bytes; ++j) { 132 PutNextTokenChar(s[j], provenance + j); 133 } 134 CloseToken(); 135 } 136 137 void TokenSequence::Put(const CharBlock &t, Provenance provenance) { 138 Put(&t[0], t.size(), provenance); 139 } 140 141 void TokenSequence::Put(const std::string &s, Provenance provenance) { 142 Put(s.data(), s.size(), provenance); 143 } 144 145 void TokenSequence::Put(llvm::raw_string_ostream &ss, Provenance provenance) { 146 Put(ss.str(), provenance); 147 } 148 149 TokenSequence &TokenSequence::ToLowerCase() { 150 std::size_t tokens{start_.size()}; 151 std::size_t chars{char_.size()}; 152 std::size_t atToken{0}; 153 for (std::size_t j{0}; j < chars;) { 154 std::size_t nextStart{atToken + 1 < tokens ? start_[++atToken] : chars}; 155 char *p{&char_[j]}; 156 char const *limit{char_.data() + nextStart}; 157 j = nextStart; 158 if (IsDecimalDigit(*p)) { 159 while (p < limit && IsDecimalDigit(*p)) { 160 ++p; 161 } 162 if (p >= limit) { 163 } else if (*p == 'h' || *p == 'H') { 164 // Hollerith 165 *p = 'h'; 166 } else if (*p == '_') { 167 // kind-prefixed character literal (e.g., 1_"ABC") 168 } else { 169 // exponent 170 for (; p < limit; ++p) { 171 *p = ToLowerCaseLetter(*p); 172 } 173 } 174 } else if (limit[-1] == '\'' || limit[-1] == '"') { 175 if (*p == limit[-1]) { 176 // Character literal without prefix 177 } else if (p[1] == limit[-1]) { 178 // BOZX-prefixed constant 179 for (; p < limit; ++p) { 180 *p = ToLowerCaseLetter(*p); 181 } 182 } else { 183 // Literal with kind-param prefix name (e.g., K_"ABC"). 184 for (; *p != limit[-1]; ++p) { 185 *p = ToLowerCaseLetter(*p); 186 } 187 } 188 } else { 189 for (; p < limit; ++p) { 190 *p = ToLowerCaseLetter(*p); 191 } 192 } 193 } 194 return *this; 195 } 196 197 bool TokenSequence::HasBlanks(std::size_t firstChar) const { 198 std::size_t tokens{SizeInTokens()}; 199 for (std::size_t j{0}; j < tokens; ++j) { 200 if (start_[j] >= firstChar && TokenAt(j).IsBlank()) { 201 return true; 202 } 203 } 204 return false; 205 } 206 207 bool TokenSequence::HasRedundantBlanks(std::size_t firstChar) const { 208 std::size_t tokens{SizeInTokens()}; 209 bool lastWasBlank{false}; 210 for (std::size_t j{0}; j < tokens; ++j) { 211 bool isBlank{TokenAt(j).IsBlank()}; 212 if (isBlank && lastWasBlank && start_[j] >= firstChar) { 213 return true; 214 } 215 lastWasBlank = isBlank; 216 } 217 return false; 218 } 219 220 TokenSequence &TokenSequence::RemoveBlanks(std::size_t firstChar) { 221 std::size_t tokens{SizeInTokens()}; 222 TokenSequence result; 223 for (std::size_t j{0}; j < tokens; ++j) { 224 if (!TokenAt(j).IsBlank() || start_[j] < firstChar) { 225 result.Put(*this, j); 226 } 227 } 228 swap(result); 229 return *this; 230 } 231 232 TokenSequence &TokenSequence::RemoveRedundantBlanks(std::size_t firstChar) { 233 std::size_t tokens{SizeInTokens()}; 234 TokenSequence result; 235 bool lastWasBlank{false}; 236 for (std::size_t j{0}; j < tokens; ++j) { 237 bool isBlank{TokenAt(j).IsBlank()}; 238 if (!isBlank || !lastWasBlank || start_[j] < firstChar) { 239 result.Put(*this, j); 240 } 241 lastWasBlank = isBlank; 242 } 243 swap(result); 244 return *this; 245 } 246 247 TokenSequence &TokenSequence::ClipComment(bool skipFirst) { 248 std::size_t tokens{SizeInTokens()}; 249 for (std::size_t j{0}; j < tokens; ++j) { 250 if (TokenAt(j).FirstNonBlank() == '!') { 251 if (skipFirst) { 252 skipFirst = false; 253 } else { 254 TokenSequence result; 255 if (j > 0) { 256 result.Put(*this, 0, j - 1); 257 } 258 swap(result); 259 return *this; 260 } 261 } 262 } 263 return *this; 264 } 265 266 void TokenSequence::Emit(CookedSource &cooked) const { 267 cooked.Put(&char_[0], char_.size()); 268 cooked.PutProvenanceMappings(provenances_); 269 } 270 271 llvm::raw_ostream &TokenSequence::Dump(llvm::raw_ostream &o) const { 272 o << "TokenSequence has " << char_.size() << " chars; nextStart_ " 273 << nextStart_ << '\n'; 274 for (std::size_t j{0}; j < start_.size(); ++j) { 275 o << '[' << j << "] @ " << start_[j] << " '" << TokenAt(j).ToString() 276 << "'\n"; 277 } 278 return o; 279 } 280 281 Provenance TokenSequence::GetCharProvenance(std::size_t offset) const { 282 ProvenanceRange range{provenances_.Map(offset)}; 283 return range.start(); 284 } 285 286 Provenance TokenSequence::GetTokenProvenance( 287 std::size_t token, std::size_t offset) const { 288 return GetCharProvenance(start_[token] + offset); 289 } 290 291 ProvenanceRange TokenSequence::GetTokenProvenanceRange( 292 std::size_t token, std::size_t offset) const { 293 ProvenanceRange range{provenances_.Map(start_[token] + offset)}; 294 return range.Prefix(TokenBytes(token) - offset); 295 } 296 297 ProvenanceRange TokenSequence::GetIntervalProvenanceRange( 298 std::size_t token, std::size_t tokens) const { 299 if (tokens == 0) { 300 return {}; 301 } 302 ProvenanceRange range{provenances_.Map(start_[token])}; 303 while (--tokens > 0 && 304 range.AnnexIfPredecessor(provenances_.Map(start_[++token]))) { 305 } 306 return range; 307 } 308 309 ProvenanceRange TokenSequence::GetProvenanceRange() const { 310 return GetIntervalProvenanceRange(0, start_.size()); 311 } 312 313 const TokenSequence &TokenSequence::CheckBadFortranCharacters( 314 Messages &messages) const { 315 std::size_t tokens{SizeInTokens()}; 316 for (std::size_t j{0}; j < tokens; ++j) { 317 CharBlock token{TokenAt(j)}; 318 char ch{token.FirstNonBlank()}; 319 if (ch != ' ' && !IsValidFortranTokenCharacter(ch)) { 320 if (ch == '!' && j == 0) { 321 // allow in !dir$ 322 } else if (ch < ' ' || ch >= '\x7f') { 323 messages.Say(GetTokenProvenanceRange(j), 324 "bad character (0x%02x) in Fortran token"_err_en_US, ch & 0xff); 325 } else { 326 messages.Say(GetTokenProvenanceRange(j), 327 "bad character ('%c') in Fortran token"_err_en_US, ch); 328 } 329 } 330 } 331 return *this; 332 } 333 334 const TokenSequence &TokenSequence::CheckBadParentheses( 335 Messages &messages) const { 336 // First, a quick pass with no allocation for the common case 337 int nesting{0}; 338 std::size_t tokens{SizeInTokens()}; 339 for (std::size_t j{0}; j < tokens; ++j) { 340 CharBlock token{TokenAt(j)}; 341 char ch{token.FirstNonBlank()}; 342 if (ch == '(') { 343 ++nesting; 344 } else if (ch == ')') { 345 --nesting; 346 } 347 } 348 if (nesting != 0) { 349 // There's an error; diagnose it 350 std::vector<std::size_t> stack; 351 for (std::size_t j{0}; j < tokens; ++j) { 352 CharBlock token{TokenAt(j)}; 353 char ch{token.FirstNonBlank()}; 354 if (ch == '(') { 355 stack.push_back(j); 356 } else if (ch == ')') { 357 if (stack.empty()) { 358 messages.Say(GetTokenProvenanceRange(j), "Unmatched ')'"_err_en_US); 359 return *this; 360 } 361 stack.pop_back(); 362 } 363 } 364 CHECK(!stack.empty()); 365 messages.Say( 366 GetTokenProvenanceRange(stack.back()), "Unmatched '('"_err_en_US); 367 } 368 return *this; 369 } 370 } // namespace Fortran::parser 371