xref: /llvm-project/flang/lib/Parser/source.cpp (revision f7a15e0021697e2346d3aa335dedf2bb3cf468f9)
164ab3302SCarolineConcatto //===-- lib/Parser/source.cpp ---------------------------------------------===//
264ab3302SCarolineConcatto //
364ab3302SCarolineConcatto // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
464ab3302SCarolineConcatto // See https://llvm.org/LICENSE.txt for license information.
564ab3302SCarolineConcatto // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
664ab3302SCarolineConcatto //
764ab3302SCarolineConcatto //===----------------------------------------------------------------------===//
864ab3302SCarolineConcatto 
964ab3302SCarolineConcatto #include "flang/Parser/source.h"
1064ab3302SCarolineConcatto #include "flang/Common/idioms.h"
1164ab3302SCarolineConcatto #include "flang/Parser/char-buffer.h"
12e12ffe6aSPeter Klausler #include "flang/Parser/characters.h"
138670e499SCaroline Concatto #include "llvm/Support/Errno.h"
1413ea73e4SDavid Truby #include "llvm/Support/FileSystem.h"
15b5793786SMichael Kruse #include "llvm/Support/Path.h"
168670e499SCaroline Concatto #include "llvm/Support/raw_ostream.h"
1764ab3302SCarolineConcatto #include <algorithm>
18e12ffe6aSPeter Klausler #include <cstring>
1964ab3302SCarolineConcatto #include <memory>
20e12ffe6aSPeter Klausler #include <string>
2164ab3302SCarolineConcatto #include <vector>
2264ab3302SCarolineConcatto 
2364ab3302SCarolineConcatto namespace Fortran::parser {
2464ab3302SCarolineConcatto 
~SourceFile()2564ab3302SCarolineConcatto SourceFile::~SourceFile() { Close(); }
2664ab3302SCarolineConcatto 
RecordLineStarts()2764ab3302SCarolineConcatto void SourceFile::RecordLineStarts() {
28e12ffe6aSPeter Klausler   if (std::size_t chars{bytes()}; chars > 0) {
29e12ffe6aSPeter Klausler     origins_.emplace(1, SourcePositionOrigin{path_, 1});
30e12ffe6aSPeter Klausler     const char *source{content().data()};
31e12ffe6aSPeter Klausler     CHECK(source[chars - 1] == '\n' && "missing ultimate newline");
32e12ffe6aSPeter Klausler     std::size_t at{0};
33e12ffe6aSPeter Klausler     do { // "at" is always at the beginning of a source line
34e12ffe6aSPeter Klausler       lineStart_.push_back(at);
35e12ffe6aSPeter Klausler       at = reinterpret_cast<const char *>(
36e12ffe6aSPeter Klausler                std::memchr(source + at, '\n', chars - at)) -
37e12ffe6aSPeter Klausler           source + 1;
38e12ffe6aSPeter Klausler     } while (at < chars);
39e12ffe6aSPeter Klausler     CHECK(at == chars);
40e12ffe6aSPeter Klausler     lineStart_.shrink_to_fit();
41e12ffe6aSPeter Klausler   }
4264ab3302SCarolineConcatto }
4364ab3302SCarolineConcatto 
4464ab3302SCarolineConcatto // Check for a Unicode byte order mark (BOM).
4564ab3302SCarolineConcatto // Module files all have one; so can source files.
IdentifyPayload()4664ab3302SCarolineConcatto void SourceFile::IdentifyPayload() {
4713ea73e4SDavid Truby   llvm::StringRef content{buf_->getBufferStart(), buf_->getBufferSize()};
4813ea73e4SDavid Truby   constexpr llvm::StringLiteral UTF8_BOM{"\xef\xbb\xbf"};
4911efcceaSKazu Hirata   if (content.starts_with(UTF8_BOM)) {
5013ea73e4SDavid Truby     bom_end_ = UTF8_BOM.size();
5164ab3302SCarolineConcatto     encoding_ = Encoding::UTF_8;
5264ab3302SCarolineConcatto   }
5364ab3302SCarolineConcatto }
5464ab3302SCarolineConcatto 
DirectoryName(std::string path)5564ab3302SCarolineConcatto std::string DirectoryName(std::string path) {
56b5793786SMichael Kruse   llvm::SmallString<128> pathBuf{path};
57b5793786SMichael Kruse   llvm::sys::path::remove_filename(pathBuf);
58b5793786SMichael Kruse   return pathBuf.str().str();
5964ab3302SCarolineConcatto }
6064ab3302SCarolineConcatto 
LocateSourceFile(std::string name,const std::list<std::string> & searchPath)616110e771Speter klausler std::optional<std::string> LocateSourceFile(
626110e771Speter klausler     std::string name, const std::list<std::string> &searchPath) {
636110e771Speter klausler   if (name == "-" || llvm::sys::path::is_absolute(name)) {
6464ab3302SCarolineConcatto     return name;
6564ab3302SCarolineConcatto   }
6664ab3302SCarolineConcatto   for (const std::string &dir : searchPath) {
67b5793786SMichael Kruse     llvm::SmallString<128> path{dir};
68b5793786SMichael Kruse     llvm::sys::path::append(path, name);
6913ea73e4SDavid Truby     bool isDir{false};
7013ea73e4SDavid Truby     auto er = llvm::sys::fs::is_directory(path, isDir);
7113ea73e4SDavid Truby     if (!er && !isDir) {
72b5793786SMichael Kruse       return path.str().str();
7364ab3302SCarolineConcatto     }
7464ab3302SCarolineConcatto   }
756110e771Speter klausler   return std::nullopt;
7664ab3302SCarolineConcatto }
7764ab3302SCarolineConcatto 
LocateSourceFileAll(std::string name,const std::vector<std::string> & searchPath)78*f7a15e00SPeter Klausler std::vector<std::string> LocateSourceFileAll(
79*f7a15e00SPeter Klausler     std::string name, const std::vector<std::string> &searchPath) {
80*f7a15e00SPeter Klausler   if (name == "-" || llvm::sys::path::is_absolute(name)) {
81*f7a15e00SPeter Klausler     return {name};
82*f7a15e00SPeter Klausler   }
83*f7a15e00SPeter Klausler   std::vector<std::string> result;
84*f7a15e00SPeter Klausler   for (const std::string &dir : searchPath) {
85*f7a15e00SPeter Klausler     llvm::SmallString<128> path{dir};
86*f7a15e00SPeter Klausler     llvm::sys::path::append(path, name);
87*f7a15e00SPeter Klausler     bool isDir{false};
88*f7a15e00SPeter Klausler     auto er = llvm::sys::fs::is_directory(path, isDir);
89*f7a15e00SPeter Klausler     if (!er && !isDir) {
90*f7a15e00SPeter Klausler       result.emplace_back(path.str().str());
91*f7a15e00SPeter Klausler     }
92*f7a15e00SPeter Klausler   }
93*f7a15e00SPeter Klausler   return result;
94*f7a15e00SPeter Klausler }
95*f7a15e00SPeter Klausler 
RemoveCarriageReturns(llvm::MutableArrayRef<char> buf)9613ea73e4SDavid Truby std::size_t RemoveCarriageReturns(llvm::MutableArrayRef<char> buf) {
9764ab3302SCarolineConcatto   std::size_t wrote{0};
9813ea73e4SDavid Truby   char *buffer{buf.data()};
9913ea73e4SDavid Truby   char *p{buf.data()};
10013ea73e4SDavid Truby   std::size_t bytes = buf.size();
10164ab3302SCarolineConcatto   while (bytes > 0) {
10264ab3302SCarolineConcatto     void *vp{static_cast<void *>(p)};
10364ab3302SCarolineConcatto     void *crvp{std::memchr(vp, '\r', bytes)};
10464ab3302SCarolineConcatto     char *crcp{static_cast<char *>(crvp)};
10564ab3302SCarolineConcatto     if (!crcp) {
10664ab3302SCarolineConcatto       std::memmove(buffer + wrote, p, bytes);
10764ab3302SCarolineConcatto       wrote += bytes;
10864ab3302SCarolineConcatto       break;
10964ab3302SCarolineConcatto     }
11064ab3302SCarolineConcatto     std::size_t chunk = crcp - p;
111af6b8d51Speter klausler     auto advance{chunk + 1};
112af6b8d51Speter klausler     if (chunk + 1 >= bytes || crcp[1] == '\n') {
113af6b8d51Speter klausler       // CR followed by LF or EOF: omit
114af6b8d51Speter klausler     } else if ((chunk == 0 && p == buf.data()) || crcp[-1] == '\n') {
115af6b8d51Speter klausler       // CR preceded by LF or BOF: omit
116af6b8d51Speter klausler     } else {
117af6b8d51Speter klausler       // CR in line: retain
118af6b8d51Speter klausler       ++chunk;
119af6b8d51Speter klausler     }
12064ab3302SCarolineConcatto     std::memmove(buffer + wrote, p, chunk);
12164ab3302SCarolineConcatto     wrote += chunk;
122af6b8d51Speter klausler     p += advance;
123af6b8d51Speter klausler     bytes -= advance;
12464ab3302SCarolineConcatto   }
12564ab3302SCarolineConcatto   return wrote;
12664ab3302SCarolineConcatto }
12764ab3302SCarolineConcatto 
Open(std::string path,llvm::raw_ostream & error)1288670e499SCaroline Concatto bool SourceFile::Open(std::string path, llvm::raw_ostream &error) {
12964ab3302SCarolineConcatto   Close();
13064ab3302SCarolineConcatto   path_ = path;
13113ea73e4SDavid Truby   std::string errorPath{"'"s + path_ + "'"};
13213ea73e4SDavid Truby   auto bufOr{llvm::WritableMemoryBuffer::getFile(path)};
13313ea73e4SDavid Truby   if (!bufOr) {
13413ea73e4SDavid Truby     auto err = bufOr.getError();
13513ea73e4SDavid Truby     error << "Could not open " << errorPath << ": " << err.message();
13664ab3302SCarolineConcatto     return false;
13764ab3302SCarolineConcatto   }
13813ea73e4SDavid Truby   buf_ = std::move(bufOr.get());
13913ea73e4SDavid Truby   ReadFile();
14013ea73e4SDavid Truby   return true;
14164ab3302SCarolineConcatto }
14264ab3302SCarolineConcatto 
ReadStandardInput(llvm::raw_ostream & error)1438670e499SCaroline Concatto bool SourceFile::ReadStandardInput(llvm::raw_ostream &error) {
14464ab3302SCarolineConcatto   Close();
14564ab3302SCarolineConcatto   path_ = "standard input";
14613ea73e4SDavid Truby   auto buf_or = llvm::MemoryBuffer::getSTDIN();
14713ea73e4SDavid Truby   if (!buf_or) {
14813ea73e4SDavid Truby     auto err = buf_or.getError();
14913ea73e4SDavid Truby     error << err.message();
15064ab3302SCarolineConcatto     return false;
15164ab3302SCarolineConcatto   }
15213ea73e4SDavid Truby   auto inbuf = std::move(buf_or.get());
15313ea73e4SDavid Truby   buf_ =
15413ea73e4SDavid Truby       llvm::WritableMemoryBuffer::getNewUninitMemBuffer(inbuf->getBufferSize());
15513ea73e4SDavid Truby   llvm::copy(inbuf->getBuffer(), buf_->getBufferStart());
15613ea73e4SDavid Truby   ReadFile();
15713ea73e4SDavid Truby   return true;
15864ab3302SCarolineConcatto }
15964ab3302SCarolineConcatto 
ReadFile()16013ea73e4SDavid Truby void SourceFile::ReadFile() {
16113ea73e4SDavid Truby   buf_end_ = RemoveCarriageReturns(buf_->getBuffer());
162b547e1a4SDavid Truby   if (content().size() == 0 || content().back() != '\n') {
163b547e1a4SDavid Truby     // Don't bother to copy if we have spare memory
164b547e1a4SDavid Truby     if (content().size() >= buf_->getBufferSize()) {
165b547e1a4SDavid Truby       auto tmp_buf{llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
166b547e1a4SDavid Truby           content().size() + 1)};
167b547e1a4SDavid Truby       llvm::copy(content(), tmp_buf->getBufferStart());
168b547e1a4SDavid Truby       buf_ = std::move(tmp_buf);
169b547e1a4SDavid Truby     }
170b547e1a4SDavid Truby     buf_end_++;
171b547e1a4SDavid Truby     buf_->getBuffer()[buf_end_ - 1] = '\n';
172b547e1a4SDavid Truby   }
17364ab3302SCarolineConcatto   IdentifyPayload();
17464ab3302SCarolineConcatto   RecordLineStarts();
17564ab3302SCarolineConcatto }
17664ab3302SCarolineConcatto 
Close()17764ab3302SCarolineConcatto void SourceFile::Close() {
17864ab3302SCarolineConcatto   path_.clear();
17913ea73e4SDavid Truby   buf_.reset();
180e12ffe6aSPeter Klausler   distinctPaths_.clear();
181e12ffe6aSPeter Klausler   origins_.clear();
18264ab3302SCarolineConcatto }
18364ab3302SCarolineConcatto 
GetSourcePosition(std::size_t at) const184e12ffe6aSPeter Klausler SourcePosition SourceFile::GetSourcePosition(std::size_t at) const {
18513ea73e4SDavid Truby   CHECK(at < bytes());
186e12ffe6aSPeter Klausler   auto it{llvm::upper_bound(lineStart_, at)};
187e12ffe6aSPeter Klausler   auto trueLineNumber{std::distance(lineStart_.begin(), it - 1) + 1};
188e12ffe6aSPeter Klausler   auto ub{origins_.upper_bound(trueLineNumber)};
189e12ffe6aSPeter Klausler   auto column{static_cast<int>(at - lineStart_[trueLineNumber - 1] + 1)};
190e12ffe6aSPeter Klausler   if (ub == origins_.begin()) {
191e12ffe6aSPeter Klausler     return {*this, path_, static_cast<int>(trueLineNumber), column,
192e12ffe6aSPeter Klausler         static_cast<int>(trueLineNumber)};
193e12ffe6aSPeter Klausler   } else {
194e12ffe6aSPeter Klausler     --ub;
195e12ffe6aSPeter Klausler     const SourcePositionOrigin &origin{ub->second};
196e12ffe6aSPeter Klausler     auto lineNumber{
197e12ffe6aSPeter Klausler         trueLineNumber - ub->first + static_cast<std::size_t>(origin.line)};
198e12ffe6aSPeter Klausler     return {*this, origin.path, static_cast<int>(lineNumber), column,
199e12ffe6aSPeter Klausler         static_cast<int>(trueLineNumber)};
200e12ffe6aSPeter Klausler   }
201e12ffe6aSPeter Klausler }
20213ea73e4SDavid Truby 
SavePath(std::string && path)203e12ffe6aSPeter Klausler const std::string &SourceFile::SavePath(std::string &&path) {
204e12ffe6aSPeter Klausler   return *distinctPaths_.emplace(std::move(path)).first;
205e12ffe6aSPeter Klausler }
206e12ffe6aSPeter Klausler 
LineDirective(int trueLineNumber,const std::string & path,int lineNumber)207e12ffe6aSPeter Klausler void SourceFile::LineDirective(
208e12ffe6aSPeter Klausler     int trueLineNumber, const std::string &path, int lineNumber) {
209e12ffe6aSPeter Klausler   origins_.emplace(trueLineNumber, SourcePositionOrigin{path, lineNumber});
210e12ffe6aSPeter Klausler }
211e12ffe6aSPeter Klausler 
Dump(llvm::raw_ostream & o) const212e12ffe6aSPeter Klausler llvm::raw_ostream &SourceFile::Dump(llvm::raw_ostream &o) const {
213e12ffe6aSPeter Klausler   o << "SourceFile '" << path_ << "'\n";
214e12ffe6aSPeter Klausler   for (const auto &[at, spo] : origins_) {
215e12ffe6aSPeter Klausler     o << "  origin_[" << at << "] -> '" << spo.path << "' " << spo.line << '\n';
216e12ffe6aSPeter Klausler   }
217e12ffe6aSPeter Klausler   return o;
21864ab3302SCarolineConcatto }
2191f879005STim Keith } // namespace Fortran::parser
220