1 //===-- lib/Parser/source.cpp ---------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "flang/Parser/source.h" 10 #include "flang/Common/idioms.h" 11 #include "flang/Parser/char-buffer.h" 12 #include <algorithm> 13 #include <cerrno> 14 #include <cstddef> 15 #include <cstring> 16 #include <fcntl.h> 17 #include <memory> 18 #include <sys/mman.h> 19 #include <sys/stat.h> 20 #include <sys/types.h> 21 #include <unistd.h> 22 #include <vector> 23 24 // TODO: Port to Windows &c. 25 26 namespace Fortran::parser { 27 28 static constexpr bool useMMap{true}; 29 static constexpr int minMapFileBytes{1}; // i.e., no minimum requirement 30 static constexpr int maxMapOpenFileDescriptors{100}; 31 static int openFileDescriptors{0}; 32 33 SourceFile::~SourceFile() { Close(); } 34 35 static std::vector<std::size_t> FindLineStarts( 36 const char *source, std::size_t bytes) { 37 std::vector<std::size_t> result; 38 if (bytes > 0) { 39 CHECK(source[bytes - 1] == '\n' && "missing ultimate newline"); 40 std::size_t at{0}; 41 do { 42 result.push_back(at); 43 const void *vp{static_cast<const void *>(&source[at])}; 44 const void *vnl{std::memchr(vp, '\n', bytes - at)}; 45 const char *nl{static_cast<const char *>(vnl)}; 46 at = nl + 1 - source; 47 } while (at < bytes); 48 result.shrink_to_fit(); 49 } 50 return result; 51 } 52 53 void SourceFile::RecordLineStarts() { 54 lineStart_ = FindLineStarts(content_, bytes_); 55 } 56 57 // Check for a Unicode byte order mark (BOM). 58 // Module files all have one; so can source files. 59 void SourceFile::IdentifyPayload() { 60 content_ = address_; 61 bytes_ = size_; 62 if (content_) { 63 static constexpr int BOMBytes{3}; 64 static const char UTF8_BOM[]{"\xef\xbb\xbf"}; 65 if (bytes_ >= BOMBytes && std::memcmp(content_, UTF8_BOM, BOMBytes) == 0) { 66 content_ += BOMBytes; 67 bytes_ -= BOMBytes; 68 encoding_ = Encoding::UTF_8; 69 } 70 } 71 } 72 73 std::string DirectoryName(std::string path) { 74 auto lastSlash{path.rfind("/")}; 75 return lastSlash == std::string::npos ? path : path.substr(0, lastSlash); 76 } 77 78 std::string LocateSourceFile( 79 std::string name, const std::vector<std::string> &searchPath) { 80 if (name.empty() || name == "-" || name[0] == '/') { 81 return name; 82 } 83 for (const std::string &dir : searchPath) { 84 std::string path{dir + '/' + name}; 85 struct stat statbuf; 86 if (stat(path.c_str(), &statbuf) == 0 && !S_ISDIR(statbuf.st_mode)) { 87 return path; 88 } 89 } 90 return name; 91 } 92 93 static std::size_t RemoveCarriageReturns(char *buffer, std::size_t bytes) { 94 std::size_t wrote{0}; 95 char *p{buffer}; 96 while (bytes > 0) { 97 void *vp{static_cast<void *>(p)}; 98 void *crvp{std::memchr(vp, '\r', bytes)}; 99 char *crcp{static_cast<char *>(crvp)}; 100 if (!crcp) { 101 std::memmove(buffer + wrote, p, bytes); 102 wrote += bytes; 103 break; 104 } 105 std::size_t chunk = crcp - p; 106 std::memmove(buffer + wrote, p, chunk); 107 wrote += chunk; 108 p += chunk + 1; 109 bytes -= chunk + 1; 110 } 111 return wrote; 112 } 113 114 bool SourceFile::Open(std::string path, std::stringstream *error) { 115 Close(); 116 path_ = path; 117 std::string errorPath{"'"s + path + "'"}; 118 errno = 0; 119 fileDescriptor_ = open(path.c_str(), O_RDONLY); 120 if (fileDescriptor_ < 0) { 121 *error << "Could not open " << errorPath << ": " << std::strerror(errno); 122 return false; 123 } 124 ++openFileDescriptors; 125 return ReadFile(errorPath, error); 126 } 127 128 bool SourceFile::ReadStandardInput(std::stringstream *error) { 129 Close(); 130 path_ = "standard input"; 131 fileDescriptor_ = 0; 132 return ReadFile(path_, error); 133 } 134 135 bool SourceFile::ReadFile(std::string errorPath, std::stringstream *error) { 136 struct stat statbuf; 137 if (fstat(fileDescriptor_, &statbuf) != 0) { 138 *error << "fstat failed on " << errorPath << ": " << std::strerror(errno); 139 Close(); 140 return false; 141 } 142 if (S_ISDIR(statbuf.st_mode)) { 143 *error << errorPath << " is a directory"; 144 Close(); 145 return false; 146 } 147 148 // Try to map a large source file into the process' address space. 149 // Don't bother with small ones. This also helps keep the number 150 // of open file descriptors from getting out of hand. 151 if (useMMap && S_ISREG(statbuf.st_mode)) { 152 size_ = static_cast<std::size_t>(statbuf.st_size); 153 if (size_ >= minMapFileBytes && 154 openFileDescriptors <= maxMapOpenFileDescriptors) { 155 void *vp = mmap(0, size_, PROT_READ, MAP_SHARED, fileDescriptor_, 0); 156 if (vp != MAP_FAILED) { 157 address_ = static_cast<const char *>(const_cast<const void *>(vp)); 158 IdentifyPayload(); 159 if (bytes_ > 0 && content_[bytes_ - 1] == '\n' && 160 std::memchr(static_cast<const void *>(content_), '\r', bytes_) == 161 nullptr) { 162 isMemoryMapped_ = true; 163 RecordLineStarts(); 164 return true; 165 } 166 // The file needs to have its line endings normalized to simple 167 // newlines. Remap it for a private rewrite in place. 168 vp = mmap( 169 vp, size_, PROT_READ | PROT_WRITE, MAP_PRIVATE, fileDescriptor_, 0); 170 if (vp != MAP_FAILED) { 171 address_ = static_cast<const char *>(const_cast<const void *>(vp)); 172 IdentifyPayload(); 173 auto mutableContent{const_cast<char *>(content_)}; 174 bytes_ = RemoveCarriageReturns(mutableContent, bytes_); 175 if (bytes_ > 0) { 176 if (mutableContent[bytes_ - 1] == '\n' || 177 (bytes_ & 0xfff) != 0 /* don't cross into next page */) { 178 if (mutableContent[bytes_ - 1] != '\n') { 179 // Append a final newline. 180 mutableContent[bytes_++] = '\n'; 181 } 182 bool isNowReadOnly{mprotect(vp, bytes_, PROT_READ) == 0}; 183 CHECK(isNowReadOnly); 184 content_ = mutableContent; 185 isMemoryMapped_ = true; 186 RecordLineStarts(); 187 return true; 188 } 189 } 190 } 191 munmap(vp, size_); 192 address_ = content_ = nullptr; 193 size_ = bytes_ = 0; 194 } 195 } 196 } 197 198 // Read it into an expandable buffer, then marshal its content into a single 199 // contiguous block. 200 CharBuffer buffer; 201 while (true) { 202 std::size_t count; 203 char *to{buffer.FreeSpace(count)}; 204 ssize_t got{read(fileDescriptor_, to, count)}; 205 if (got < 0) { 206 *error << "could not read " << errorPath << ": " << std::strerror(errno); 207 Close(); 208 return false; 209 } 210 if (got == 0) { 211 break; 212 } 213 buffer.Claim(got); 214 } 215 if (fileDescriptor_ > 0) { 216 close(fileDescriptor_); 217 --openFileDescriptors; 218 } 219 fileDescriptor_ = -1; 220 normalized_ = buffer.MarshalNormalized(); 221 address_ = normalized_.c_str(); 222 size_ = normalized_.size(); 223 IdentifyPayload(); 224 RecordLineStarts(); 225 return true; 226 } 227 228 void SourceFile::Close() { 229 if (useMMap && isMemoryMapped_) { 230 munmap(reinterpret_cast<void *>(const_cast<char *>(address_)), size_); 231 isMemoryMapped_ = false; 232 } else if (!normalized_.empty()) { 233 normalized_.clear(); 234 } else if (address_) { 235 delete[] address_; 236 } 237 address_ = content_ = nullptr; 238 size_ = bytes_ = 0; 239 if (fileDescriptor_ > 0) { 240 close(fileDescriptor_); 241 --openFileDescriptors; 242 } 243 fileDescriptor_ = -1; 244 path_.clear(); 245 } 246 247 SourcePosition SourceFile::FindOffsetLineAndColumn(std::size_t at) const { 248 CHECK(at < bytes_); 249 if (lineStart_.empty()) { 250 return {*this, 1, static_cast<int>(at + 1)}; 251 } 252 std::size_t low{0}, count{lineStart_.size()}; 253 while (count > 1) { 254 std::size_t mid{low + (count >> 1)}; 255 if (lineStart_[mid] > at) { 256 count = mid - low; 257 } else { 258 count -= mid - low; 259 low = mid; 260 } 261 } 262 return {*this, static_cast<int>(low + 1), 263 static_cast<int>(at - lineStart_[low] + 1)}; 264 } 265 } 266