xref: /llvm-project/flang/lib/Parser/source.cpp (revision f7a15e0021697e2346d3aa335dedf2bb3cf468f9)
1 //===-- lib/Parser/source.cpp ---------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "flang/Parser/source.h"
10 #include "flang/Common/idioms.h"
11 #include "flang/Parser/char-buffer.h"
12 #include "flang/Parser/characters.h"
13 #include "llvm/Support/Errno.h"
14 #include "llvm/Support/FileSystem.h"
15 #include "llvm/Support/Path.h"
16 #include "llvm/Support/raw_ostream.h"
17 #include <algorithm>
18 #include <cstring>
19 #include <memory>
20 #include <string>
21 #include <vector>
22 
23 namespace Fortran::parser {
24 
~SourceFile()25 SourceFile::~SourceFile() { Close(); }
26 
RecordLineStarts()27 void SourceFile::RecordLineStarts() {
28   if (std::size_t chars{bytes()}; chars > 0) {
29     origins_.emplace(1, SourcePositionOrigin{path_, 1});
30     const char *source{content().data()};
31     CHECK(source[chars - 1] == '\n' && "missing ultimate newline");
32     std::size_t at{0};
33     do { // "at" is always at the beginning of a source line
34       lineStart_.push_back(at);
35       at = reinterpret_cast<const char *>(
36                std::memchr(source + at, '\n', chars - at)) -
37           source + 1;
38     } while (at < chars);
39     CHECK(at == chars);
40     lineStart_.shrink_to_fit();
41   }
42 }
43 
44 // Check for a Unicode byte order mark (BOM).
45 // Module files all have one; so can source files.
IdentifyPayload()46 void SourceFile::IdentifyPayload() {
47   llvm::StringRef content{buf_->getBufferStart(), buf_->getBufferSize()};
48   constexpr llvm::StringLiteral UTF8_BOM{"\xef\xbb\xbf"};
49   if (content.starts_with(UTF8_BOM)) {
50     bom_end_ = UTF8_BOM.size();
51     encoding_ = Encoding::UTF_8;
52   }
53 }
54 
DirectoryName(std::string path)55 std::string DirectoryName(std::string path) {
56   llvm::SmallString<128> pathBuf{path};
57   llvm::sys::path::remove_filename(pathBuf);
58   return pathBuf.str().str();
59 }
60 
LocateSourceFile(std::string name,const std::list<std::string> & searchPath)61 std::optional<std::string> LocateSourceFile(
62     std::string name, const std::list<std::string> &searchPath) {
63   if (name == "-" || llvm::sys::path::is_absolute(name)) {
64     return name;
65   }
66   for (const std::string &dir : searchPath) {
67     llvm::SmallString<128> path{dir};
68     llvm::sys::path::append(path, name);
69     bool isDir{false};
70     auto er = llvm::sys::fs::is_directory(path, isDir);
71     if (!er && !isDir) {
72       return path.str().str();
73     }
74   }
75   return std::nullopt;
76 }
77 
LocateSourceFileAll(std::string name,const std::vector<std::string> & searchPath)78 std::vector<std::string> LocateSourceFileAll(
79     std::string name, const std::vector<std::string> &searchPath) {
80   if (name == "-" || llvm::sys::path::is_absolute(name)) {
81     return {name};
82   }
83   std::vector<std::string> result;
84   for (const std::string &dir : searchPath) {
85     llvm::SmallString<128> path{dir};
86     llvm::sys::path::append(path, name);
87     bool isDir{false};
88     auto er = llvm::sys::fs::is_directory(path, isDir);
89     if (!er && !isDir) {
90       result.emplace_back(path.str().str());
91     }
92   }
93   return result;
94 }
95 
RemoveCarriageReturns(llvm::MutableArrayRef<char> buf)96 std::size_t RemoveCarriageReturns(llvm::MutableArrayRef<char> buf) {
97   std::size_t wrote{0};
98   char *buffer{buf.data()};
99   char *p{buf.data()};
100   std::size_t bytes = buf.size();
101   while (bytes > 0) {
102     void *vp{static_cast<void *>(p)};
103     void *crvp{std::memchr(vp, '\r', bytes)};
104     char *crcp{static_cast<char *>(crvp)};
105     if (!crcp) {
106       std::memmove(buffer + wrote, p, bytes);
107       wrote += bytes;
108       break;
109     }
110     std::size_t chunk = crcp - p;
111     auto advance{chunk + 1};
112     if (chunk + 1 >= bytes || crcp[1] == '\n') {
113       // CR followed by LF or EOF: omit
114     } else if ((chunk == 0 && p == buf.data()) || crcp[-1] == '\n') {
115       // CR preceded by LF or BOF: omit
116     } else {
117       // CR in line: retain
118       ++chunk;
119     }
120     std::memmove(buffer + wrote, p, chunk);
121     wrote += chunk;
122     p += advance;
123     bytes -= advance;
124   }
125   return wrote;
126 }
127 
Open(std::string path,llvm::raw_ostream & error)128 bool SourceFile::Open(std::string path, llvm::raw_ostream &error) {
129   Close();
130   path_ = path;
131   std::string errorPath{"'"s + path_ + "'"};
132   auto bufOr{llvm::WritableMemoryBuffer::getFile(path)};
133   if (!bufOr) {
134     auto err = bufOr.getError();
135     error << "Could not open " << errorPath << ": " << err.message();
136     return false;
137   }
138   buf_ = std::move(bufOr.get());
139   ReadFile();
140   return true;
141 }
142 
ReadStandardInput(llvm::raw_ostream & error)143 bool SourceFile::ReadStandardInput(llvm::raw_ostream &error) {
144   Close();
145   path_ = "standard input";
146   auto buf_or = llvm::MemoryBuffer::getSTDIN();
147   if (!buf_or) {
148     auto err = buf_or.getError();
149     error << err.message();
150     return false;
151   }
152   auto inbuf = std::move(buf_or.get());
153   buf_ =
154       llvm::WritableMemoryBuffer::getNewUninitMemBuffer(inbuf->getBufferSize());
155   llvm::copy(inbuf->getBuffer(), buf_->getBufferStart());
156   ReadFile();
157   return true;
158 }
159 
ReadFile()160 void SourceFile::ReadFile() {
161   buf_end_ = RemoveCarriageReturns(buf_->getBuffer());
162   if (content().size() == 0 || content().back() != '\n') {
163     // Don't bother to copy if we have spare memory
164     if (content().size() >= buf_->getBufferSize()) {
165       auto tmp_buf{llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
166           content().size() + 1)};
167       llvm::copy(content(), tmp_buf->getBufferStart());
168       buf_ = std::move(tmp_buf);
169     }
170     buf_end_++;
171     buf_->getBuffer()[buf_end_ - 1] = '\n';
172   }
173   IdentifyPayload();
174   RecordLineStarts();
175 }
176 
Close()177 void SourceFile::Close() {
178   path_.clear();
179   buf_.reset();
180   distinctPaths_.clear();
181   origins_.clear();
182 }
183 
GetSourcePosition(std::size_t at) const184 SourcePosition SourceFile::GetSourcePosition(std::size_t at) const {
185   CHECK(at < bytes());
186   auto it{llvm::upper_bound(lineStart_, at)};
187   auto trueLineNumber{std::distance(lineStart_.begin(), it - 1) + 1};
188   auto ub{origins_.upper_bound(trueLineNumber)};
189   auto column{static_cast<int>(at - lineStart_[trueLineNumber - 1] + 1)};
190   if (ub == origins_.begin()) {
191     return {*this, path_, static_cast<int>(trueLineNumber), column,
192         static_cast<int>(trueLineNumber)};
193   } else {
194     --ub;
195     const SourcePositionOrigin &origin{ub->second};
196     auto lineNumber{
197         trueLineNumber - ub->first + static_cast<std::size_t>(origin.line)};
198     return {*this, origin.path, static_cast<int>(lineNumber), column,
199         static_cast<int>(trueLineNumber)};
200   }
201 }
202 
SavePath(std::string && path)203 const std::string &SourceFile::SavePath(std::string &&path) {
204   return *distinctPaths_.emplace(std::move(path)).first;
205 }
206 
LineDirective(int trueLineNumber,const std::string & path,int lineNumber)207 void SourceFile::LineDirective(
208     int trueLineNumber, const std::string &path, int lineNumber) {
209   origins_.emplace(trueLineNumber, SourcePositionOrigin{path, lineNumber});
210 }
211 
Dump(llvm::raw_ostream & o) const212 llvm::raw_ostream &SourceFile::Dump(llvm::raw_ostream &o) const {
213   o << "SourceFile '" << path_ << "'\n";
214   for (const auto &[at, spo] : origins_) {
215     o << "  origin_[" << at << "] -> '" << spo.path << "' " << spo.line << '\n';
216   }
217   return o;
218 }
219 } // namespace Fortran::parser
220