xref: /llvm-project/flang/lib/Parser/source.cpp (revision af6b8d51390dc1a4af7ae5de4e71947dce8a75f6)
1 //===-- lib/Parser/source.cpp ---------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "flang/Parser/source.h"
10 #include "flang/Common/idioms.h"
11 #include "flang/Parser/char-buffer.h"
12 #include "llvm/Support/Errno.h"
13 #include "llvm/Support/FileSystem.h"
14 #include "llvm/Support/raw_ostream.h"
15 #include <algorithm>
16 #include <memory>
17 #include <vector>
18 
19 namespace Fortran::parser {
20 
21 SourceFile::~SourceFile() { Close(); }
22 
23 static std::vector<std::size_t> FindLineStarts(llvm::StringRef source) {
24   std::vector<std::size_t> result;
25   if (source.size() > 0) {
26     CHECK(source.back() == '\n' && "missing ultimate newline");
27     std::size_t at{0};
28     do {
29       result.push_back(at);
30       at = source.find('\n', at) + 1;
31     } while (at < source.size());
32     result.shrink_to_fit();
33   }
34   return result;
35 }
36 
37 void SourceFile::RecordLineStarts() {
38   lineStart_ = FindLineStarts({content().data(), bytes()});
39 }
40 
41 // Check for a Unicode byte order mark (BOM).
42 // Module files all have one; so can source files.
43 void SourceFile::IdentifyPayload() {
44   llvm::StringRef content{buf_->getBufferStart(), buf_->getBufferSize()};
45   constexpr llvm::StringLiteral UTF8_BOM{"\xef\xbb\xbf"};
46   if (content.startswith(UTF8_BOM)) {
47     bom_end_ = UTF8_BOM.size();
48     encoding_ = Encoding::UTF_8;
49   }
50 }
51 
52 std::string DirectoryName(std::string path) {
53   auto lastSlash{path.rfind("/")};
54   return lastSlash == std::string::npos ? path : path.substr(0, lastSlash);
55 }
56 
57 std::string LocateSourceFile(
58     std::string name, const std::vector<std::string> &searchPath) {
59   if (name.empty() || name == "-" || name[0] == '/') {
60     return name;
61   }
62   for (const std::string &dir : searchPath) {
63     std::string path{dir + '/' + name};
64     bool isDir{false};
65     auto er = llvm::sys::fs::is_directory(path, isDir);
66     if (!er && !isDir) {
67       return path;
68     }
69   }
70   return name;
71 }
72 
73 std::size_t RemoveCarriageReturns(llvm::MutableArrayRef<char> buf) {
74   std::size_t wrote{0};
75   char *buffer{buf.data()};
76   char *p{buf.data()};
77   std::size_t bytes = buf.size();
78   while (bytes > 0) {
79     void *vp{static_cast<void *>(p)};
80     void *crvp{std::memchr(vp, '\r', bytes)};
81     char *crcp{static_cast<char *>(crvp)};
82     if (!crcp) {
83       std::memmove(buffer + wrote, p, bytes);
84       wrote += bytes;
85       break;
86     }
87     std::size_t chunk = crcp - p;
88     auto advance{chunk + 1};
89     if (chunk + 1 >= bytes || crcp[1] == '\n') {
90       // CR followed by LF or EOF: omit
91     } else if ((chunk == 0 && p == buf.data()) || crcp[-1] == '\n') {
92       // CR preceded by LF or BOF: omit
93     } else {
94       // CR in line: retain
95       ++chunk;
96     }
97     std::memmove(buffer + wrote, p, chunk);
98     wrote += chunk;
99     p += advance;
100     bytes -= advance;
101   }
102   return wrote;
103 }
104 
105 bool SourceFile::Open(std::string path, llvm::raw_ostream &error) {
106   Close();
107   path_ = path;
108   std::string errorPath{"'"s + path_ + "'"};
109   auto bufOr{llvm::WritableMemoryBuffer::getFile(path)};
110   if (!bufOr) {
111     auto err = bufOr.getError();
112     error << "Could not open " << errorPath << ": " << err.message();
113     return false;
114   }
115   buf_ = std::move(bufOr.get());
116   ReadFile();
117   return true;
118 }
119 
120 bool SourceFile::ReadStandardInput(llvm::raw_ostream &error) {
121   Close();
122   path_ = "standard input";
123 
124   auto buf_or = llvm::MemoryBuffer::getSTDIN();
125   if (!buf_or) {
126     auto err = buf_or.getError();
127     error << err.message();
128     return false;
129   }
130   auto inbuf = std::move(buf_or.get());
131   buf_ =
132       llvm::WritableMemoryBuffer::getNewUninitMemBuffer(inbuf->getBufferSize());
133   llvm::copy(inbuf->getBuffer(), buf_->getBufferStart());
134   ReadFile();
135   return true;
136 }
137 
138 void SourceFile::ReadFile() {
139   buf_end_ = RemoveCarriageReturns(buf_->getBuffer());
140   if (content().size() == 0 || content().back() != '\n') {
141     // Don't bother to copy if we have spare memory
142     if (content().size() >= buf_->getBufferSize()) {
143       auto tmp_buf{llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
144           content().size() + 1)};
145       llvm::copy(content(), tmp_buf->getBufferStart());
146       Close();
147       buf_ = std::move(tmp_buf);
148     }
149     buf_end_++;
150     buf_->getBuffer()[buf_end_ - 1] = '\n';
151   }
152   IdentifyPayload();
153   RecordLineStarts();
154 }
155 
156 void SourceFile::Close() {
157   path_.clear();
158   buf_.reset();
159 }
160 
161 SourcePosition SourceFile::FindOffsetLineAndColumn(std::size_t at) const {
162   CHECK(at < bytes());
163 
164   auto it = llvm::upper_bound(lineStart_, at);
165   auto low = std::distance(lineStart_.begin(), it - 1);
166   return {*this, static_cast<int>(low + 1),
167       static_cast<int>(at - lineStart_[low] + 1)};
168 }
169 } // namespace Fortran::parser
170