//===-- lib/Parser/source.cpp ---------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "flang/Parser/source.h" #include "flang/Common/idioms.h" #include "flang/Parser/char-buffer.h" #include "flang/Parser/characters.h" #include "llvm/Support/Errno.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include #include namespace Fortran::parser { SourceFile::~SourceFile() { Close(); } void SourceFile::RecordLineStarts() { if (std::size_t chars{bytes()}; chars > 0) { origins_.emplace(1, SourcePositionOrigin{path_, 1}); const char *source{content().data()}; CHECK(source[chars - 1] == '\n' && "missing ultimate newline"); std::size_t at{0}; do { // "at" is always at the beginning of a source line lineStart_.push_back(at); at = reinterpret_cast( std::memchr(source + at, '\n', chars - at)) - source + 1; } while (at < chars); CHECK(at == chars); lineStart_.shrink_to_fit(); } } // Check for a Unicode byte order mark (BOM). // Module files all have one; so can source files. void SourceFile::IdentifyPayload() { llvm::StringRef content{buf_->getBufferStart(), buf_->getBufferSize()}; constexpr llvm::StringLiteral UTF8_BOM{"\xef\xbb\xbf"}; if (content.starts_with(UTF8_BOM)) { bom_end_ = UTF8_BOM.size(); encoding_ = Encoding::UTF_8; } } std::string DirectoryName(std::string path) { llvm::SmallString<128> pathBuf{path}; llvm::sys::path::remove_filename(pathBuf); return pathBuf.str().str(); } std::optional LocateSourceFile( std::string name, const std::list &searchPath) { if (name == "-" || llvm::sys::path::is_absolute(name)) { return name; } for (const std::string &dir : searchPath) { llvm::SmallString<128> path{dir}; llvm::sys::path::append(path, name); bool isDir{false}; auto er = llvm::sys::fs::is_directory(path, isDir); if (!er && !isDir) { return path.str().str(); } } return std::nullopt; } std::vector LocateSourceFileAll( std::string name, const std::vector &searchPath) { if (name == "-" || llvm::sys::path::is_absolute(name)) { return {name}; } std::vector result; for (const std::string &dir : searchPath) { llvm::SmallString<128> path{dir}; llvm::sys::path::append(path, name); bool isDir{false}; auto er = llvm::sys::fs::is_directory(path, isDir); if (!er && !isDir) { result.emplace_back(path.str().str()); } } return result; } std::size_t RemoveCarriageReturns(llvm::MutableArrayRef buf) { std::size_t wrote{0}; char *buffer{buf.data()}; char *p{buf.data()}; std::size_t bytes = buf.size(); while (bytes > 0) { void *vp{static_cast(p)}; void *crvp{std::memchr(vp, '\r', bytes)}; char *crcp{static_cast(crvp)}; if (!crcp) { std::memmove(buffer + wrote, p, bytes); wrote += bytes; break; } std::size_t chunk = crcp - p; auto advance{chunk + 1}; if (chunk + 1 >= bytes || crcp[1] == '\n') { // CR followed by LF or EOF: omit } else if ((chunk == 0 && p == buf.data()) || crcp[-1] == '\n') { // CR preceded by LF or BOF: omit } else { // CR in line: retain ++chunk; } std::memmove(buffer + wrote, p, chunk); wrote += chunk; p += advance; bytes -= advance; } return wrote; } bool SourceFile::Open(std::string path, llvm::raw_ostream &error) { Close(); path_ = path; std::string errorPath{"'"s + path_ + "'"}; auto bufOr{llvm::WritableMemoryBuffer::getFile(path)}; if (!bufOr) { auto err = bufOr.getError(); error << "Could not open " << errorPath << ": " << err.message(); return false; } buf_ = std::move(bufOr.get()); ReadFile(); return true; } bool SourceFile::ReadStandardInput(llvm::raw_ostream &error) { Close(); path_ = "standard input"; auto buf_or = llvm::MemoryBuffer::getSTDIN(); if (!buf_or) { auto err = buf_or.getError(); error << err.message(); return false; } auto inbuf = std::move(buf_or.get()); buf_ = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(inbuf->getBufferSize()); llvm::copy(inbuf->getBuffer(), buf_->getBufferStart()); ReadFile(); return true; } void SourceFile::ReadFile() { buf_end_ = RemoveCarriageReturns(buf_->getBuffer()); if (content().size() == 0 || content().back() != '\n') { // Don't bother to copy if we have spare memory if (content().size() >= buf_->getBufferSize()) { auto tmp_buf{llvm::WritableMemoryBuffer::getNewUninitMemBuffer( content().size() + 1)}; llvm::copy(content(), tmp_buf->getBufferStart()); buf_ = std::move(tmp_buf); } buf_end_++; buf_->getBuffer()[buf_end_ - 1] = '\n'; } IdentifyPayload(); RecordLineStarts(); } void SourceFile::Close() { path_.clear(); buf_.reset(); distinctPaths_.clear(); origins_.clear(); } SourcePosition SourceFile::GetSourcePosition(std::size_t at) const { CHECK(at < bytes()); auto it{llvm::upper_bound(lineStart_, at)}; auto trueLineNumber{std::distance(lineStart_.begin(), it - 1) + 1}; auto ub{origins_.upper_bound(trueLineNumber)}; auto column{static_cast(at - lineStart_[trueLineNumber - 1] + 1)}; if (ub == origins_.begin()) { return {*this, path_, static_cast(trueLineNumber), column, static_cast(trueLineNumber)}; } else { --ub; const SourcePositionOrigin &origin{ub->second}; auto lineNumber{ trueLineNumber - ub->first + static_cast(origin.line)}; return {*this, origin.path, static_cast(lineNumber), column, static_cast(trueLineNumber)}; } } const std::string &SourceFile::SavePath(std::string &&path) { return *distinctPaths_.emplace(std::move(path)).first; } void SourceFile::LineDirective( int trueLineNumber, const std::string &path, int lineNumber) { origins_.emplace(trueLineNumber, SourcePositionOrigin{path, lineNumber}); } llvm::raw_ostream &SourceFile::Dump(llvm::raw_ostream &o) const { o << "SourceFile '" << path_ << "'\n"; for (const auto &[at, spo] : origins_) { o << " origin_[" << at << "] -> '" << spo.path << "' " << spo.line << '\n'; } return o; } } // namespace Fortran::parser