1 //===-- lib/Parser/token-sequence.cpp -------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "token-sequence.h" 10 #include "flang/Parser/characters.h" 11 #include "flang/Parser/message.h" 12 #include "llvm/Support/raw_ostream.h" 13 14 namespace Fortran::parser { 15 16 TokenSequence &TokenSequence::operator=(TokenSequence &&that) { 17 clear(); 18 swap(that); 19 return *this; 20 } 21 22 void TokenSequence::clear() { 23 start_.clear(); 24 nextStart_ = 0; 25 char_.clear(); 26 provenances_.clear(); 27 } 28 29 void TokenSequence::pop_back() { 30 std::size_t bytes{nextStart_ - start_.back()}; 31 nextStart_ = start_.back(); 32 start_.pop_back(); 33 char_.resize(nextStart_); 34 provenances_.RemoveLastBytes(bytes); 35 } 36 37 void TokenSequence::shrink_to_fit() { 38 start_.shrink_to_fit(); 39 char_.shrink_to_fit(); 40 provenances_.shrink_to_fit(); 41 } 42 43 void TokenSequence::swap(TokenSequence &that) { 44 start_.swap(that.start_); 45 std::swap(nextStart_, that.nextStart_); 46 char_.swap(that.char_); 47 provenances_.swap(that.provenances_); 48 } 49 50 std::size_t TokenSequence::SkipBlanks(std::size_t at) const { 51 std::size_t tokens{start_.size()}; 52 for (; at < tokens; ++at) { 53 if (!TokenAt(at).IsBlank()) { 54 return at; 55 } 56 } 57 return tokens; // even if at > tokens 58 } 59 60 // C-style /*comments*/ are removed from preprocessing directive 61 // token sequences by the prescanner, but not C++ or Fortran 62 // free-form line-ending comments (//... and !...) because 63 // ignoring them is directive-specific. 64 bool TokenSequence::IsAnythingLeft(std::size_t at) const { 65 std::size_t tokens{start_.size()}; 66 for (; at < tokens; ++at) { 67 auto tok{TokenAt(at)}; 68 const char *end{tok.end()}; 69 for (const char *p{tok.begin()}; p < end; ++p) { 70 switch (*p) { 71 case '/': 72 return p + 1 >= end || p[1] != '/'; 73 case '!': 74 return false; 75 case ' ': 76 break; 77 default: 78 return true; 79 } 80 } 81 } 82 return false; 83 } 84 85 void TokenSequence::RemoveLastToken() { 86 CHECK(!start_.empty()); 87 CHECK(nextStart_ > start_.back()); 88 std::size_t bytes{nextStart_ - start_.back()}; 89 nextStart_ = start_.back(); 90 start_.pop_back(); 91 char_.erase(char_.begin() + nextStart_, char_.end()); 92 provenances_.RemoveLastBytes(bytes); 93 } 94 95 void TokenSequence::Put(const TokenSequence &that) { 96 if (nextStart_ < char_.size()) { 97 start_.push_back(nextStart_); 98 } 99 int offset = char_.size(); 100 for (int st : that.start_) { 101 start_.push_back(st + offset); 102 } 103 char_.insert(char_.end(), that.char_.begin(), that.char_.end()); 104 nextStart_ = char_.size(); 105 provenances_.Put(that.provenances_); 106 } 107 108 void TokenSequence::Put(const TokenSequence &that, ProvenanceRange range) { 109 std::size_t offset{0}; 110 std::size_t tokens{that.SizeInTokens()}; 111 for (std::size_t j{0}; j < tokens; ++j) { 112 CharBlock tok{that.TokenAt(j)}; 113 Put(tok, range.OffsetMember(offset)); 114 offset += tok.size(); 115 } 116 CHECK(offset == range.size()); 117 } 118 119 void TokenSequence::Put( 120 const TokenSequence &that, std::size_t at, std::size_t tokens) { 121 ProvenanceRange provenance; 122 std::size_t offset{0}; 123 for (; tokens-- > 0; ++at) { 124 CharBlock tok{that.TokenAt(at)}; 125 std::size_t tokBytes{tok.size()}; 126 for (std::size_t j{0}; j < tokBytes; ++j) { 127 if (offset == provenance.size()) { 128 provenance = that.provenances_.Map(that.start_[at] + j); 129 offset = 0; 130 } 131 PutNextTokenChar(tok[j], provenance.OffsetMember(offset++)); 132 } 133 CloseToken(); 134 } 135 } 136 137 void TokenSequence::Put( 138 const char *s, std::size_t bytes, Provenance provenance) { 139 for (std::size_t j{0}; j < bytes; ++j) { 140 PutNextTokenChar(s[j], provenance + j); 141 } 142 CloseToken(); 143 } 144 145 void TokenSequence::Put(const CharBlock &t, Provenance provenance) { 146 Put(&t[0], t.size(), provenance); 147 } 148 149 void TokenSequence::Put(const std::string &s, Provenance provenance) { 150 Put(s.data(), s.size(), provenance); 151 } 152 153 void TokenSequence::Put(llvm::raw_string_ostream &ss, Provenance provenance) { 154 Put(ss.str(), provenance); 155 } 156 157 TokenSequence &TokenSequence::ToLowerCase() { 158 std::size_t tokens{start_.size()}; 159 std::size_t chars{char_.size()}; 160 std::size_t atToken{0}; 161 for (std::size_t j{0}; j < chars;) { 162 std::size_t nextStart{atToken + 1 < tokens ? start_[++atToken] : chars}; 163 char *p{&char_[j]}; 164 char const *limit{char_.data() + nextStart}; 165 j = nextStart; 166 if (IsDecimalDigit(*p)) { 167 while (p < limit && IsDecimalDigit(*p)) { 168 ++p; 169 } 170 if (p >= limit) { 171 } else if (*p == 'h' || *p == 'H') { 172 // Hollerith 173 *p = 'h'; 174 } else if (*p == '_') { 175 // kind-prefixed character literal (e.g., 1_"ABC") 176 } else { 177 // exponent 178 for (; p < limit; ++p) { 179 *p = ToLowerCaseLetter(*p); 180 } 181 } 182 } else if (limit[-1] == '\'' || limit[-1] == '"') { 183 if (*p == limit[-1]) { 184 // Character literal without prefix 185 } else if (p[1] == limit[-1]) { 186 // BOZX-prefixed constant 187 for (; p < limit; ++p) { 188 *p = ToLowerCaseLetter(*p); 189 } 190 } else { 191 // Literal with kind-param prefix name (e.g., K_"ABC"). 192 for (; *p != limit[-1]; ++p) { 193 *p = ToLowerCaseLetter(*p); 194 } 195 } 196 } else { 197 for (; p < limit; ++p) { 198 *p = ToLowerCaseLetter(*p); 199 } 200 } 201 } 202 return *this; 203 } 204 205 bool TokenSequence::HasBlanks(std::size_t firstChar) const { 206 std::size_t tokens{SizeInTokens()}; 207 for (std::size_t j{0}; j < tokens; ++j) { 208 if (start_[j] >= firstChar && TokenAt(j).IsBlank()) { 209 return true; 210 } 211 } 212 return false; 213 } 214 215 bool TokenSequence::HasRedundantBlanks(std::size_t firstChar) const { 216 std::size_t tokens{SizeInTokens()}; 217 bool lastWasBlank{false}; 218 for (std::size_t j{0}; j < tokens; ++j) { 219 bool isBlank{TokenAt(j).IsBlank()}; 220 if (isBlank && lastWasBlank && start_[j] >= firstChar) { 221 return true; 222 } 223 lastWasBlank = isBlank; 224 } 225 return false; 226 } 227 228 TokenSequence &TokenSequence::RemoveBlanks(std::size_t firstChar) { 229 std::size_t tokens{SizeInTokens()}; 230 TokenSequence result; 231 for (std::size_t j{0}; j < tokens; ++j) { 232 if (!TokenAt(j).IsBlank() || start_[j] < firstChar) { 233 result.Put(*this, j); 234 } 235 } 236 swap(result); 237 return *this; 238 } 239 240 TokenSequence &TokenSequence::RemoveRedundantBlanks(std::size_t firstChar) { 241 std::size_t tokens{SizeInTokens()}; 242 TokenSequence result; 243 bool lastWasBlank{false}; 244 for (std::size_t j{0}; j < tokens; ++j) { 245 bool isBlank{TokenAt(j).IsBlank()}; 246 if (!isBlank || !lastWasBlank || start_[j] < firstChar) { 247 result.Put(*this, j); 248 } 249 lastWasBlank = isBlank; 250 } 251 swap(result); 252 return *this; 253 } 254 255 TokenSequence &TokenSequence::ClipComment(bool skipFirst) { 256 std::size_t tokens{SizeInTokens()}; 257 for (std::size_t j{0}; j < tokens; ++j) { 258 if (TokenAt(j).FirstNonBlank() == '!') { 259 if (skipFirst) { 260 skipFirst = false; 261 } else { 262 TokenSequence result; 263 if (j > 0) { 264 result.Put(*this, 0, j - 1); 265 } 266 swap(result); 267 return *this; 268 } 269 } 270 } 271 return *this; 272 } 273 274 void TokenSequence::Emit(CookedSource &cooked) const { 275 cooked.Put(&char_[0], char_.size()); 276 cooked.PutProvenanceMappings(provenances_); 277 } 278 279 llvm::raw_ostream &TokenSequence::Dump(llvm::raw_ostream &o) const { 280 o << "TokenSequence has " << char_.size() << " chars; nextStart_ " 281 << nextStart_ << '\n'; 282 for (std::size_t j{0}; j < start_.size(); ++j) { 283 o << '[' << j << "] @ " << start_[j] << " '" << TokenAt(j).ToString() 284 << "'\n"; 285 } 286 return o; 287 } 288 289 Provenance TokenSequence::GetCharProvenance(std::size_t offset) const { 290 ProvenanceRange range{provenances_.Map(offset)}; 291 return range.start(); 292 } 293 294 Provenance TokenSequence::GetTokenProvenance( 295 std::size_t token, std::size_t offset) const { 296 return GetCharProvenance(start_[token] + offset); 297 } 298 299 ProvenanceRange TokenSequence::GetTokenProvenanceRange( 300 std::size_t token, std::size_t offset) const { 301 ProvenanceRange range{provenances_.Map(start_[token] + offset)}; 302 return range.Prefix(TokenBytes(token) - offset); 303 } 304 305 ProvenanceRange TokenSequence::GetIntervalProvenanceRange( 306 std::size_t token, std::size_t tokens) const { 307 if (tokens == 0) { 308 return {}; 309 } 310 ProvenanceRange range{provenances_.Map(start_[token])}; 311 while (--tokens > 0 && 312 range.AnnexIfPredecessor(provenances_.Map(start_[++token]))) { 313 } 314 return range; 315 } 316 317 ProvenanceRange TokenSequence::GetProvenanceRange() const { 318 return GetIntervalProvenanceRange(0, start_.size()); 319 } 320 321 const TokenSequence &TokenSequence::CheckBadFortranCharacters( 322 Messages &messages) const { 323 std::size_t tokens{SizeInTokens()}; 324 for (std::size_t j{0}; j < tokens; ++j) { 325 CharBlock token{TokenAt(j)}; 326 char ch{token.FirstNonBlank()}; 327 if (ch != ' ' && !IsValidFortranTokenCharacter(ch)) { 328 if (ch == '!' && j == 0) { 329 // allow in !dir$ 330 } else if (ch < ' ' || ch >= '\x7f') { 331 messages.Say(GetTokenProvenanceRange(j), 332 "bad character (0x%02x) in Fortran token"_err_en_US, ch & 0xff); 333 } else { 334 messages.Say(GetTokenProvenanceRange(j), 335 "bad character ('%c') in Fortran token"_err_en_US, ch); 336 } 337 } 338 } 339 return *this; 340 } 341 } // namespace Fortran::parser 342