164ab3302SCarolineConcatto //===-- lib/Parser/source.cpp ---------------------------------------------===// 264ab3302SCarolineConcatto // 364ab3302SCarolineConcatto // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 464ab3302SCarolineConcatto // See https://llvm.org/LICENSE.txt for license information. 564ab3302SCarolineConcatto // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 664ab3302SCarolineConcatto // 764ab3302SCarolineConcatto //===----------------------------------------------------------------------===// 864ab3302SCarolineConcatto 964ab3302SCarolineConcatto #include "flang/Parser/source.h" 1064ab3302SCarolineConcatto #include "flang/Common/idioms.h" 1164ab3302SCarolineConcatto #include "flang/Parser/char-buffer.h" 128670e499SCaroline Concatto #include "llvm/Support/Errno.h" 1313ea73e4SDavid Truby #include "llvm/Support/FileSystem.h" 14*b5793786SMichael Kruse #include "llvm/Support/Path.h" 158670e499SCaroline Concatto #include "llvm/Support/raw_ostream.h" 1664ab3302SCarolineConcatto #include <algorithm> 1764ab3302SCarolineConcatto #include <memory> 1864ab3302SCarolineConcatto #include <vector> 1964ab3302SCarolineConcatto 2064ab3302SCarolineConcatto namespace Fortran::parser { 2164ab3302SCarolineConcatto 2264ab3302SCarolineConcatto SourceFile::~SourceFile() { Close(); } 2364ab3302SCarolineConcatto 2413ea73e4SDavid Truby static std::vector<std::size_t> FindLineStarts(llvm::StringRef source) { 2564ab3302SCarolineConcatto std::vector<std::size_t> result; 2613ea73e4SDavid Truby if (source.size() > 0) { 2713ea73e4SDavid Truby CHECK(source.back() == '\n' && "missing ultimate newline"); 2864ab3302SCarolineConcatto std::size_t at{0}; 2964ab3302SCarolineConcatto do { 3064ab3302SCarolineConcatto result.push_back(at); 3113ea73e4SDavid Truby at = source.find('\n', at) + 1; 3213ea73e4SDavid Truby } while (at < source.size()); 3364ab3302SCarolineConcatto result.shrink_to_fit(); 3464ab3302SCarolineConcatto } 3564ab3302SCarolineConcatto return result; 3664ab3302SCarolineConcatto } 3764ab3302SCarolineConcatto 3864ab3302SCarolineConcatto void SourceFile::RecordLineStarts() { 3913ea73e4SDavid Truby lineStart_ = FindLineStarts({content().data(), bytes()}); 4064ab3302SCarolineConcatto } 4164ab3302SCarolineConcatto 4264ab3302SCarolineConcatto // Check for a Unicode byte order mark (BOM). 4364ab3302SCarolineConcatto // Module files all have one; so can source files. 4464ab3302SCarolineConcatto void SourceFile::IdentifyPayload() { 4513ea73e4SDavid Truby llvm::StringRef content{buf_->getBufferStart(), buf_->getBufferSize()}; 4613ea73e4SDavid Truby constexpr llvm::StringLiteral UTF8_BOM{"\xef\xbb\xbf"}; 4713ea73e4SDavid Truby if (content.startswith(UTF8_BOM)) { 4813ea73e4SDavid Truby bom_end_ = UTF8_BOM.size(); 4964ab3302SCarolineConcatto encoding_ = Encoding::UTF_8; 5064ab3302SCarolineConcatto } 5164ab3302SCarolineConcatto } 5264ab3302SCarolineConcatto 5364ab3302SCarolineConcatto std::string DirectoryName(std::string path) { 54*b5793786SMichael Kruse llvm::SmallString<128> pathBuf{path}; 55*b5793786SMichael Kruse llvm::sys::path::remove_filename(pathBuf); 56*b5793786SMichael Kruse return pathBuf.str().str(); 5764ab3302SCarolineConcatto } 5864ab3302SCarolineConcatto 5964ab3302SCarolineConcatto std::string LocateSourceFile( 6064ab3302SCarolineConcatto std::string name, const std::vector<std::string> &searchPath) { 61*b5793786SMichael Kruse if (name.empty() || name == "-" || llvm::sys::path::is_absolute(name)) { 6264ab3302SCarolineConcatto return name; 6364ab3302SCarolineConcatto } 6464ab3302SCarolineConcatto for (const std::string &dir : searchPath) { 65*b5793786SMichael Kruse llvm::SmallString<128> path{dir}; 66*b5793786SMichael Kruse llvm::sys::path::append(path, name); 6713ea73e4SDavid Truby bool isDir{false}; 6813ea73e4SDavid Truby auto er = llvm::sys::fs::is_directory(path, isDir); 6913ea73e4SDavid Truby if (!er && !isDir) { 70*b5793786SMichael Kruse return path.str().str(); 7164ab3302SCarolineConcatto } 7264ab3302SCarolineConcatto } 7364ab3302SCarolineConcatto return name; 7464ab3302SCarolineConcatto } 7564ab3302SCarolineConcatto 7613ea73e4SDavid Truby std::size_t RemoveCarriageReturns(llvm::MutableArrayRef<char> buf) { 7764ab3302SCarolineConcatto std::size_t wrote{0}; 7813ea73e4SDavid Truby char *buffer{buf.data()}; 7913ea73e4SDavid Truby char *p{buf.data()}; 8013ea73e4SDavid Truby std::size_t bytes = buf.size(); 8164ab3302SCarolineConcatto while (bytes > 0) { 8264ab3302SCarolineConcatto void *vp{static_cast<void *>(p)}; 8364ab3302SCarolineConcatto void *crvp{std::memchr(vp, '\r', bytes)}; 8464ab3302SCarolineConcatto char *crcp{static_cast<char *>(crvp)}; 8564ab3302SCarolineConcatto if (!crcp) { 8664ab3302SCarolineConcatto std::memmove(buffer + wrote, p, bytes); 8764ab3302SCarolineConcatto wrote += bytes; 8864ab3302SCarolineConcatto break; 8964ab3302SCarolineConcatto } 9064ab3302SCarolineConcatto std::size_t chunk = crcp - p; 91af6b8d51Speter klausler auto advance{chunk + 1}; 92af6b8d51Speter klausler if (chunk + 1 >= bytes || crcp[1] == '\n') { 93af6b8d51Speter klausler // CR followed by LF or EOF: omit 94af6b8d51Speter klausler } else if ((chunk == 0 && p == buf.data()) || crcp[-1] == '\n') { 95af6b8d51Speter klausler // CR preceded by LF or BOF: omit 96af6b8d51Speter klausler } else { 97af6b8d51Speter klausler // CR in line: retain 98af6b8d51Speter klausler ++chunk; 99af6b8d51Speter klausler } 10064ab3302SCarolineConcatto std::memmove(buffer + wrote, p, chunk); 10164ab3302SCarolineConcatto wrote += chunk; 102af6b8d51Speter klausler p += advance; 103af6b8d51Speter klausler bytes -= advance; 10464ab3302SCarolineConcatto } 10564ab3302SCarolineConcatto return wrote; 10664ab3302SCarolineConcatto } 10764ab3302SCarolineConcatto 1088670e499SCaroline Concatto bool SourceFile::Open(std::string path, llvm::raw_ostream &error) { 10964ab3302SCarolineConcatto Close(); 11064ab3302SCarolineConcatto path_ = path; 11113ea73e4SDavid Truby std::string errorPath{"'"s + path_ + "'"}; 11213ea73e4SDavid Truby auto bufOr{llvm::WritableMemoryBuffer::getFile(path)}; 11313ea73e4SDavid Truby if (!bufOr) { 11413ea73e4SDavid Truby auto err = bufOr.getError(); 11513ea73e4SDavid Truby error << "Could not open " << errorPath << ": " << err.message(); 11664ab3302SCarolineConcatto return false; 11764ab3302SCarolineConcatto } 11813ea73e4SDavid Truby buf_ = std::move(bufOr.get()); 11913ea73e4SDavid Truby ReadFile(); 12013ea73e4SDavid Truby return true; 12164ab3302SCarolineConcatto } 12264ab3302SCarolineConcatto 1238670e499SCaroline Concatto bool SourceFile::ReadStandardInput(llvm::raw_ostream &error) { 12464ab3302SCarolineConcatto Close(); 12564ab3302SCarolineConcatto path_ = "standard input"; 12664ab3302SCarolineConcatto 12713ea73e4SDavid Truby auto buf_or = llvm::MemoryBuffer::getSTDIN(); 12813ea73e4SDavid Truby if (!buf_or) { 12913ea73e4SDavid Truby auto err = buf_or.getError(); 13013ea73e4SDavid Truby error << err.message(); 13164ab3302SCarolineConcatto return false; 13264ab3302SCarolineConcatto } 13313ea73e4SDavid Truby auto inbuf = std::move(buf_or.get()); 13413ea73e4SDavid Truby buf_ = 13513ea73e4SDavid Truby llvm::WritableMemoryBuffer::getNewUninitMemBuffer(inbuf->getBufferSize()); 13613ea73e4SDavid Truby llvm::copy(inbuf->getBuffer(), buf_->getBufferStart()); 13713ea73e4SDavid Truby ReadFile(); 13813ea73e4SDavid Truby return true; 13964ab3302SCarolineConcatto } 14064ab3302SCarolineConcatto 14113ea73e4SDavid Truby void SourceFile::ReadFile() { 14213ea73e4SDavid Truby buf_end_ = RemoveCarriageReturns(buf_->getBuffer()); 143b547e1a4SDavid Truby if (content().size() == 0 || content().back() != '\n') { 144b547e1a4SDavid Truby // Don't bother to copy if we have spare memory 145b547e1a4SDavid Truby if (content().size() >= buf_->getBufferSize()) { 146b547e1a4SDavid Truby auto tmp_buf{llvm::WritableMemoryBuffer::getNewUninitMemBuffer( 147b547e1a4SDavid Truby content().size() + 1)}; 148b547e1a4SDavid Truby llvm::copy(content(), tmp_buf->getBufferStart()); 149b547e1a4SDavid Truby Close(); 150b547e1a4SDavid Truby buf_ = std::move(tmp_buf); 151b547e1a4SDavid Truby } 152b547e1a4SDavid Truby buf_end_++; 153b547e1a4SDavid Truby buf_->getBuffer()[buf_end_ - 1] = '\n'; 154b547e1a4SDavid Truby } 15564ab3302SCarolineConcatto IdentifyPayload(); 15664ab3302SCarolineConcatto RecordLineStarts(); 15764ab3302SCarolineConcatto } 15864ab3302SCarolineConcatto 15964ab3302SCarolineConcatto void SourceFile::Close() { 16064ab3302SCarolineConcatto path_.clear(); 16113ea73e4SDavid Truby buf_.reset(); 16264ab3302SCarolineConcatto } 16364ab3302SCarolineConcatto 16464ab3302SCarolineConcatto SourcePosition SourceFile::FindOffsetLineAndColumn(std::size_t at) const { 16513ea73e4SDavid Truby CHECK(at < bytes()); 16613ea73e4SDavid Truby 16713ea73e4SDavid Truby auto it = llvm::upper_bound(lineStart_, at); 16813ea73e4SDavid Truby auto low = std::distance(lineStart_.begin(), it - 1); 16964ab3302SCarolineConcatto return {*this, static_cast<int>(low + 1), 17064ab3302SCarolineConcatto static_cast<int>(at - lineStart_[low] + 1)}; 17164ab3302SCarolineConcatto } 1721f879005STim Keith } // namespace Fortran::parser 173