xref: /llvm-project/flang/lib/Parser/source.cpp (revision 3b635714255da40a7c2a2b3ff03e73fe4fd0a34e)
1 //===-- lib/Parser/source.cpp ---------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "flang/Parser/source.h"
10 #include "flang/Common/idioms.h"
11 #include "flang/Parser/char-buffer.h"
12 #include <algorithm>
13 #include <cerrno>
14 #include <cstddef>
15 #include <cstring>
16 #include <fcntl.h>
17 #include <memory>
18 #include <sys/mman.h>
19 #include <sys/stat.h>
20 #include <sys/types.h>
21 #include <unistd.h>
22 #include <vector>
23 
24 // TODO: Port to Windows &c.
25 
26 namespace Fortran::parser {
27 
28 static constexpr bool useMMap{true};
29 static constexpr int minMapFileBytes{1};  // i.e., no minimum requirement
30 static constexpr int maxMapOpenFileDescriptors{100};
31 static int openFileDescriptors{0};
32 
33 SourceFile::~SourceFile() { Close(); }
34 
35 static std::vector<std::size_t> FindLineStarts(
36     const char *source, std::size_t bytes) {
37   std::vector<std::size_t> result;
38   if (bytes > 0) {
39     CHECK(source[bytes - 1] == '\n' && "missing ultimate newline");
40     std::size_t at{0};
41     do {
42       result.push_back(at);
43       const void *vp{static_cast<const void *>(&source[at])};
44       const void *vnl{std::memchr(vp, '\n', bytes - at)};
45       const char *nl{static_cast<const char *>(vnl)};
46       at = nl + 1 - source;
47     } while (at < bytes);
48     result.shrink_to_fit();
49   }
50   return result;
51 }
52 
53 void SourceFile::RecordLineStarts() {
54   lineStart_ = FindLineStarts(content_, bytes_);
55 }
56 
57 // Check for a Unicode byte order mark (BOM).
58 // Module files all have one; so can source files.
59 void SourceFile::IdentifyPayload() {
60   content_ = address_;
61   bytes_ = size_;
62   if (content_) {
63     static constexpr int BOMBytes{3};
64     static const char UTF8_BOM[]{"\xef\xbb\xbf"};
65     if (bytes_ >= BOMBytes && std::memcmp(content_, UTF8_BOM, BOMBytes) == 0) {
66       content_ += BOMBytes;
67       bytes_ -= BOMBytes;
68       encoding_ = Encoding::UTF_8;
69     }
70   }
71 }
72 
73 std::string DirectoryName(std::string path) {
74   auto lastSlash{path.rfind("/")};
75   return lastSlash == std::string::npos ? path : path.substr(0, lastSlash);
76 }
77 
78 std::string LocateSourceFile(
79     std::string name, const std::vector<std::string> &searchPath) {
80   if (name.empty() || name == "-" || name[0] == '/') {
81     return name;
82   }
83   for (const std::string &dir : searchPath) {
84     std::string path{dir + '/' + name};
85     struct stat statbuf;
86     if (stat(path.c_str(), &statbuf) == 0 && !S_ISDIR(statbuf.st_mode)) {
87       return path;
88     }
89   }
90   return name;
91 }
92 
93 static std::size_t RemoveCarriageReturns(char *buffer, std::size_t bytes) {
94   std::size_t wrote{0};
95   char *p{buffer};
96   while (bytes > 0) {
97     void *vp{static_cast<void *>(p)};
98     void *crvp{std::memchr(vp, '\r', bytes)};
99     char *crcp{static_cast<char *>(crvp)};
100     if (!crcp) {
101       std::memmove(buffer + wrote, p, bytes);
102       wrote += bytes;
103       break;
104     }
105     std::size_t chunk = crcp - p;
106     std::memmove(buffer + wrote, p, chunk);
107     wrote += chunk;
108     p += chunk + 1;
109     bytes -= chunk + 1;
110   }
111   return wrote;
112 }
113 
114 bool SourceFile::Open(std::string path, std::stringstream *error) {
115   Close();
116   path_ = path;
117   std::string errorPath{"'"s + path + "'"};
118   errno = 0;
119   fileDescriptor_ = open(path.c_str(), O_RDONLY);
120   if (fileDescriptor_ < 0) {
121     *error << "Could not open " << errorPath << ": " << std::strerror(errno);
122     return false;
123   }
124   ++openFileDescriptors;
125   return ReadFile(errorPath, error);
126 }
127 
128 bool SourceFile::ReadStandardInput(std::stringstream *error) {
129   Close();
130   path_ = "standard input";
131   fileDescriptor_ = 0;
132   return ReadFile(path_, error);
133 }
134 
135 bool SourceFile::ReadFile(std::string errorPath, std::stringstream *error) {
136   struct stat statbuf;
137   if (fstat(fileDescriptor_, &statbuf) != 0) {
138     *error << "fstat failed on " << errorPath << ": " << std::strerror(errno);
139     Close();
140     return false;
141   }
142   if (S_ISDIR(statbuf.st_mode)) {
143     *error << errorPath << " is a directory";
144     Close();
145     return false;
146   }
147 
148   // Try to map a large source file into the process' address space.
149   // Don't bother with small ones.  This also helps keep the number
150   // of open file descriptors from getting out of hand.
151   if (useMMap && S_ISREG(statbuf.st_mode)) {
152     size_ = static_cast<std::size_t>(statbuf.st_size);
153     if (size_ >= minMapFileBytes &&
154         openFileDescriptors <= maxMapOpenFileDescriptors) {
155       void *vp = mmap(0, size_, PROT_READ, MAP_SHARED, fileDescriptor_, 0);
156       if (vp != MAP_FAILED) {
157         address_ = static_cast<const char *>(const_cast<const void *>(vp));
158         IdentifyPayload();
159         if (bytes_ > 0 && content_[bytes_ - 1] == '\n' &&
160             std::memchr(static_cast<const void *>(content_), '\r', bytes_) ==
161                 nullptr) {
162           isMemoryMapped_ = true;
163           RecordLineStarts();
164           return true;
165         }
166         // The file needs to have its line endings normalized to simple
167         // newlines.  Remap it for a private rewrite in place.
168         vp = mmap(
169             vp, size_, PROT_READ | PROT_WRITE, MAP_PRIVATE, fileDescriptor_, 0);
170         if (vp != MAP_FAILED) {
171           address_ = static_cast<const char *>(const_cast<const void *>(vp));
172           IdentifyPayload();
173           auto mutableContent{const_cast<char *>(content_)};
174           bytes_ = RemoveCarriageReturns(mutableContent, bytes_);
175           if (bytes_ > 0) {
176             if (mutableContent[bytes_ - 1] == '\n' ||
177                 (bytes_ & 0xfff) != 0 /* don't cross into next page */) {
178               if (mutableContent[bytes_ - 1] != '\n') {
179                 // Append a final newline.
180                 mutableContent[bytes_++] = '\n';
181               }
182               bool isNowReadOnly{mprotect(vp, bytes_, PROT_READ) == 0};
183               CHECK(isNowReadOnly);
184               content_ = mutableContent;
185               isMemoryMapped_ = true;
186               RecordLineStarts();
187               return true;
188             }
189           }
190         }
191         munmap(vp, size_);
192         address_ = content_ = nullptr;
193         size_ = bytes_ = 0;
194       }
195     }
196   }
197 
198   // Read it into an expandable buffer, then marshal its content into a single
199   // contiguous block.
200   CharBuffer buffer;
201   while (true) {
202     std::size_t count;
203     char *to{buffer.FreeSpace(count)};
204     ssize_t got{read(fileDescriptor_, to, count)};
205     if (got < 0) {
206       *error << "could not read " << errorPath << ": " << std::strerror(errno);
207       Close();
208       return false;
209     }
210     if (got == 0) {
211       break;
212     }
213     buffer.Claim(got);
214   }
215   if (fileDescriptor_ > 0) {
216     close(fileDescriptor_);
217     --openFileDescriptors;
218   }
219   fileDescriptor_ = -1;
220   normalized_ = buffer.MarshalNormalized();
221   address_ = normalized_.c_str();
222   size_ = normalized_.size();
223   IdentifyPayload();
224   RecordLineStarts();
225   return true;
226 }
227 
228 void SourceFile::Close() {
229   if (useMMap && isMemoryMapped_) {
230     munmap(reinterpret_cast<void *>(const_cast<char *>(address_)), size_);
231     isMemoryMapped_ = false;
232   } else if (!normalized_.empty()) {
233     normalized_.clear();
234   } else if (address_) {
235     delete[] address_;
236   }
237   address_ = content_ = nullptr;
238   size_ = bytes_ = 0;
239   if (fileDescriptor_ > 0) {
240     close(fileDescriptor_);
241     --openFileDescriptors;
242   }
243   fileDescriptor_ = -1;
244   path_.clear();
245 }
246 
247 SourcePosition SourceFile::FindOffsetLineAndColumn(std::size_t at) const {
248   CHECK(at < bytes_);
249   if (lineStart_.empty()) {
250     return {*this, 1, static_cast<int>(at + 1)};
251   }
252   std::size_t low{0}, count{lineStart_.size()};
253   while (count > 1) {
254     std::size_t mid{low + (count >> 1)};
255     if (lineStart_[mid] > at) {
256       count = mid - low;
257     } else {
258       count -= mid - low;
259       low = mid;
260     }
261   }
262   return {*this, static_cast<int>(low + 1),
263       static_cast<int>(at - lineStart_[low] + 1)};
264 }
265 }
266