1 //===-- lib/Parser/source.cpp ---------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "flang/Parser/source.h" 10 #include "flang/Common/idioms.h" 11 #include "flang/Parser/char-buffer.h" 12 #include "llvm/Support/Errno.h" 13 #include "llvm/Support/FileSystem.h" 14 #include "llvm/Support/raw_ostream.h" 15 #include <algorithm> 16 #include <memory> 17 #include <vector> 18 19 namespace Fortran::parser { 20 21 SourceFile::~SourceFile() { Close(); } 22 23 static std::vector<std::size_t> FindLineStarts(llvm::StringRef source) { 24 std::vector<std::size_t> result; 25 if (source.size() > 0) { 26 CHECK(source.back() == '\n' && "missing ultimate newline"); 27 std::size_t at{0}; 28 do { 29 result.push_back(at); 30 at = source.find('\n', at) + 1; 31 } while (at < source.size()); 32 result.shrink_to_fit(); 33 } 34 return result; 35 } 36 37 void SourceFile::RecordLineStarts() { 38 lineStart_ = FindLineStarts({content().data(), bytes()}); 39 } 40 41 // Check for a Unicode byte order mark (BOM). 42 // Module files all have one; so can source files. 43 void SourceFile::IdentifyPayload() { 44 llvm::StringRef content{buf_->getBufferStart(), buf_->getBufferSize()}; 45 constexpr llvm::StringLiteral UTF8_BOM{"\xef\xbb\xbf"}; 46 if (content.startswith(UTF8_BOM)) { 47 bom_end_ = UTF8_BOM.size(); 48 encoding_ = Encoding::UTF_8; 49 } 50 } 51 52 std::string DirectoryName(std::string path) { 53 auto lastSlash{path.rfind("/")}; 54 return lastSlash == std::string::npos ? path : path.substr(0, lastSlash); 55 } 56 57 std::string LocateSourceFile( 58 std::string name, const std::vector<std::string> &searchPath) { 59 if (name.empty() || name == "-" || name[0] == '/') { 60 return name; 61 } 62 for (const std::string &dir : searchPath) { 63 std::string path{dir + '/' + name}; 64 bool isDir{false}; 65 auto er = llvm::sys::fs::is_directory(path, isDir); 66 if (!er && !isDir) { 67 return path; 68 } 69 } 70 return name; 71 } 72 73 std::size_t RemoveCarriageReturns(llvm::MutableArrayRef<char> buf) { 74 std::size_t wrote{0}; 75 char *buffer{buf.data()}; 76 char *p{buf.data()}; 77 std::size_t bytes = buf.size(); 78 while (bytes > 0) { 79 void *vp{static_cast<void *>(p)}; 80 void *crvp{std::memchr(vp, '\r', bytes)}; 81 char *crcp{static_cast<char *>(crvp)}; 82 if (!crcp) { 83 std::memmove(buffer + wrote, p, bytes); 84 wrote += bytes; 85 break; 86 } 87 std::size_t chunk = crcp - p; 88 std::memmove(buffer + wrote, p, chunk); 89 wrote += chunk; 90 p += chunk + 1; 91 bytes -= chunk + 1; 92 } 93 return wrote; 94 } 95 96 bool SourceFile::Open(std::string path, llvm::raw_ostream &error) { 97 Close(); 98 path_ = path; 99 std::string errorPath{"'"s + path_ + "'"}; 100 auto bufOr{llvm::WritableMemoryBuffer::getFile(path)}; 101 if (!bufOr) { 102 auto err = bufOr.getError(); 103 error << "Could not open " << errorPath << ": " << err.message(); 104 return false; 105 } 106 buf_ = std::move(bufOr.get()); 107 ReadFile(); 108 return true; 109 } 110 111 bool SourceFile::ReadStandardInput(llvm::raw_ostream &error) { 112 Close(); 113 path_ = "standard input"; 114 115 auto buf_or = llvm::MemoryBuffer::getSTDIN(); 116 if (!buf_or) { 117 auto err = buf_or.getError(); 118 error << err.message(); 119 return false; 120 } 121 auto inbuf = std::move(buf_or.get()); 122 buf_ = 123 llvm::WritableMemoryBuffer::getNewUninitMemBuffer(inbuf->getBufferSize()); 124 llvm::copy(inbuf->getBuffer(), buf_->getBufferStart()); 125 ReadFile(); 126 return true; 127 } 128 129 void SourceFile::ReadFile() { 130 if (buf_->getBuffer().size() == 0) { 131 Close(); 132 buf_ = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(1); 133 buf_->getBuffer()[0] = '\n'; 134 } 135 buf_end_ = RemoveCarriageReturns(buf_->getBuffer()); 136 IdentifyPayload(); 137 RecordLineStarts(); 138 } 139 140 void SourceFile::Close() { 141 path_.clear(); 142 buf_.reset(); 143 } 144 145 SourcePosition SourceFile::FindOffsetLineAndColumn(std::size_t at) const { 146 CHECK(at < bytes()); 147 148 auto it = llvm::upper_bound(lineStart_, at); 149 auto low = std::distance(lineStart_.begin(), it - 1); 150 return {*this, static_cast<int>(low + 1), 151 static_cast<int>(at - lineStart_[low] + 1)}; 152 } 153 } // namespace Fortran::parser 154