xref: /llvm-project/flang/lib/Parser/source.cpp (revision b57937861f68305068d8a35154811b4303ce52e5)
164ab3302SCarolineConcatto //===-- lib/Parser/source.cpp ---------------------------------------------===//
264ab3302SCarolineConcatto //
364ab3302SCarolineConcatto // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
464ab3302SCarolineConcatto // See https://llvm.org/LICENSE.txt for license information.
564ab3302SCarolineConcatto // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
664ab3302SCarolineConcatto //
764ab3302SCarolineConcatto //===----------------------------------------------------------------------===//
864ab3302SCarolineConcatto 
964ab3302SCarolineConcatto #include "flang/Parser/source.h"
1064ab3302SCarolineConcatto #include "flang/Common/idioms.h"
1164ab3302SCarolineConcatto #include "flang/Parser/char-buffer.h"
128670e499SCaroline Concatto #include "llvm/Support/Errno.h"
1313ea73e4SDavid Truby #include "llvm/Support/FileSystem.h"
14*b5793786SMichael Kruse #include "llvm/Support/Path.h"
158670e499SCaroline Concatto #include "llvm/Support/raw_ostream.h"
1664ab3302SCarolineConcatto #include <algorithm>
1764ab3302SCarolineConcatto #include <memory>
1864ab3302SCarolineConcatto #include <vector>
1964ab3302SCarolineConcatto 
2064ab3302SCarolineConcatto namespace Fortran::parser {
2164ab3302SCarolineConcatto 
2264ab3302SCarolineConcatto SourceFile::~SourceFile() { Close(); }
2364ab3302SCarolineConcatto 
2413ea73e4SDavid Truby static std::vector<std::size_t> FindLineStarts(llvm::StringRef source) {
2564ab3302SCarolineConcatto   std::vector<std::size_t> result;
2613ea73e4SDavid Truby   if (source.size() > 0) {
2713ea73e4SDavid Truby     CHECK(source.back() == '\n' && "missing ultimate newline");
2864ab3302SCarolineConcatto     std::size_t at{0};
2964ab3302SCarolineConcatto     do {
3064ab3302SCarolineConcatto       result.push_back(at);
3113ea73e4SDavid Truby       at = source.find('\n', at) + 1;
3213ea73e4SDavid Truby     } while (at < source.size());
3364ab3302SCarolineConcatto     result.shrink_to_fit();
3464ab3302SCarolineConcatto   }
3564ab3302SCarolineConcatto   return result;
3664ab3302SCarolineConcatto }
3764ab3302SCarolineConcatto 
3864ab3302SCarolineConcatto void SourceFile::RecordLineStarts() {
3913ea73e4SDavid Truby   lineStart_ = FindLineStarts({content().data(), bytes()});
4064ab3302SCarolineConcatto }
4164ab3302SCarolineConcatto 
4264ab3302SCarolineConcatto // Check for a Unicode byte order mark (BOM).
4364ab3302SCarolineConcatto // Module files all have one; so can source files.
4464ab3302SCarolineConcatto void SourceFile::IdentifyPayload() {
4513ea73e4SDavid Truby   llvm::StringRef content{buf_->getBufferStart(), buf_->getBufferSize()};
4613ea73e4SDavid Truby   constexpr llvm::StringLiteral UTF8_BOM{"\xef\xbb\xbf"};
4713ea73e4SDavid Truby   if (content.startswith(UTF8_BOM)) {
4813ea73e4SDavid Truby     bom_end_ = UTF8_BOM.size();
4964ab3302SCarolineConcatto     encoding_ = Encoding::UTF_8;
5064ab3302SCarolineConcatto   }
5164ab3302SCarolineConcatto }
5264ab3302SCarolineConcatto 
5364ab3302SCarolineConcatto std::string DirectoryName(std::string path) {
54*b5793786SMichael Kruse   llvm::SmallString<128> pathBuf{path};
55*b5793786SMichael Kruse   llvm::sys::path::remove_filename(pathBuf);
56*b5793786SMichael Kruse   return pathBuf.str().str();
5764ab3302SCarolineConcatto }
5864ab3302SCarolineConcatto 
5964ab3302SCarolineConcatto std::string LocateSourceFile(
6064ab3302SCarolineConcatto     std::string name, const std::vector<std::string> &searchPath) {
61*b5793786SMichael Kruse   if (name.empty() || name == "-" || llvm::sys::path::is_absolute(name)) {
6264ab3302SCarolineConcatto     return name;
6364ab3302SCarolineConcatto   }
6464ab3302SCarolineConcatto   for (const std::string &dir : searchPath) {
65*b5793786SMichael Kruse     llvm::SmallString<128> path{dir};
66*b5793786SMichael Kruse     llvm::sys::path::append(path, name);
6713ea73e4SDavid Truby     bool isDir{false};
6813ea73e4SDavid Truby     auto er = llvm::sys::fs::is_directory(path, isDir);
6913ea73e4SDavid Truby     if (!er && !isDir) {
70*b5793786SMichael Kruse       return path.str().str();
7164ab3302SCarolineConcatto     }
7264ab3302SCarolineConcatto   }
7364ab3302SCarolineConcatto   return name;
7464ab3302SCarolineConcatto }
7564ab3302SCarolineConcatto 
7613ea73e4SDavid Truby std::size_t RemoveCarriageReturns(llvm::MutableArrayRef<char> buf) {
7764ab3302SCarolineConcatto   std::size_t wrote{0};
7813ea73e4SDavid Truby   char *buffer{buf.data()};
7913ea73e4SDavid Truby   char *p{buf.data()};
8013ea73e4SDavid Truby   std::size_t bytes = buf.size();
8164ab3302SCarolineConcatto   while (bytes > 0) {
8264ab3302SCarolineConcatto     void *vp{static_cast<void *>(p)};
8364ab3302SCarolineConcatto     void *crvp{std::memchr(vp, '\r', bytes)};
8464ab3302SCarolineConcatto     char *crcp{static_cast<char *>(crvp)};
8564ab3302SCarolineConcatto     if (!crcp) {
8664ab3302SCarolineConcatto       std::memmove(buffer + wrote, p, bytes);
8764ab3302SCarolineConcatto       wrote += bytes;
8864ab3302SCarolineConcatto       break;
8964ab3302SCarolineConcatto     }
9064ab3302SCarolineConcatto     std::size_t chunk = crcp - p;
91af6b8d51Speter klausler     auto advance{chunk + 1};
92af6b8d51Speter klausler     if (chunk + 1 >= bytes || crcp[1] == '\n') {
93af6b8d51Speter klausler       // CR followed by LF or EOF: omit
94af6b8d51Speter klausler     } else if ((chunk == 0 && p == buf.data()) || crcp[-1] == '\n') {
95af6b8d51Speter klausler       // CR preceded by LF or BOF: omit
96af6b8d51Speter klausler     } else {
97af6b8d51Speter klausler       // CR in line: retain
98af6b8d51Speter klausler       ++chunk;
99af6b8d51Speter klausler     }
10064ab3302SCarolineConcatto     std::memmove(buffer + wrote, p, chunk);
10164ab3302SCarolineConcatto     wrote += chunk;
102af6b8d51Speter klausler     p += advance;
103af6b8d51Speter klausler     bytes -= advance;
10464ab3302SCarolineConcatto   }
10564ab3302SCarolineConcatto   return wrote;
10664ab3302SCarolineConcatto }
10764ab3302SCarolineConcatto 
1088670e499SCaroline Concatto bool SourceFile::Open(std::string path, llvm::raw_ostream &error) {
10964ab3302SCarolineConcatto   Close();
11064ab3302SCarolineConcatto   path_ = path;
11113ea73e4SDavid Truby   std::string errorPath{"'"s + path_ + "'"};
11213ea73e4SDavid Truby   auto bufOr{llvm::WritableMemoryBuffer::getFile(path)};
11313ea73e4SDavid Truby   if (!bufOr) {
11413ea73e4SDavid Truby     auto err = bufOr.getError();
11513ea73e4SDavid Truby     error << "Could not open " << errorPath << ": " << err.message();
11664ab3302SCarolineConcatto     return false;
11764ab3302SCarolineConcatto   }
11813ea73e4SDavid Truby   buf_ = std::move(bufOr.get());
11913ea73e4SDavid Truby   ReadFile();
12013ea73e4SDavid Truby   return true;
12164ab3302SCarolineConcatto }
12264ab3302SCarolineConcatto 
1238670e499SCaroline Concatto bool SourceFile::ReadStandardInput(llvm::raw_ostream &error) {
12464ab3302SCarolineConcatto   Close();
12564ab3302SCarolineConcatto   path_ = "standard input";
12664ab3302SCarolineConcatto 
12713ea73e4SDavid Truby   auto buf_or = llvm::MemoryBuffer::getSTDIN();
12813ea73e4SDavid Truby   if (!buf_or) {
12913ea73e4SDavid Truby     auto err = buf_or.getError();
13013ea73e4SDavid Truby     error << err.message();
13164ab3302SCarolineConcatto     return false;
13264ab3302SCarolineConcatto   }
13313ea73e4SDavid Truby   auto inbuf = std::move(buf_or.get());
13413ea73e4SDavid Truby   buf_ =
13513ea73e4SDavid Truby       llvm::WritableMemoryBuffer::getNewUninitMemBuffer(inbuf->getBufferSize());
13613ea73e4SDavid Truby   llvm::copy(inbuf->getBuffer(), buf_->getBufferStart());
13713ea73e4SDavid Truby   ReadFile();
13813ea73e4SDavid Truby   return true;
13964ab3302SCarolineConcatto }
14064ab3302SCarolineConcatto 
14113ea73e4SDavid Truby void SourceFile::ReadFile() {
14213ea73e4SDavid Truby   buf_end_ = RemoveCarriageReturns(buf_->getBuffer());
143b547e1a4SDavid Truby   if (content().size() == 0 || content().back() != '\n') {
144b547e1a4SDavid Truby     // Don't bother to copy if we have spare memory
145b547e1a4SDavid Truby     if (content().size() >= buf_->getBufferSize()) {
146b547e1a4SDavid Truby       auto tmp_buf{llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
147b547e1a4SDavid Truby           content().size() + 1)};
148b547e1a4SDavid Truby       llvm::copy(content(), tmp_buf->getBufferStart());
149b547e1a4SDavid Truby       Close();
150b547e1a4SDavid Truby       buf_ = std::move(tmp_buf);
151b547e1a4SDavid Truby     }
152b547e1a4SDavid Truby     buf_end_++;
153b547e1a4SDavid Truby     buf_->getBuffer()[buf_end_ - 1] = '\n';
154b547e1a4SDavid Truby   }
15564ab3302SCarolineConcatto   IdentifyPayload();
15664ab3302SCarolineConcatto   RecordLineStarts();
15764ab3302SCarolineConcatto }
15864ab3302SCarolineConcatto 
15964ab3302SCarolineConcatto void SourceFile::Close() {
16064ab3302SCarolineConcatto   path_.clear();
16113ea73e4SDavid Truby   buf_.reset();
16264ab3302SCarolineConcatto }
16364ab3302SCarolineConcatto 
16464ab3302SCarolineConcatto SourcePosition SourceFile::FindOffsetLineAndColumn(std::size_t at) const {
16513ea73e4SDavid Truby   CHECK(at < bytes());
16613ea73e4SDavid Truby 
16713ea73e4SDavid Truby   auto it = llvm::upper_bound(lineStart_, at);
16813ea73e4SDavid Truby   auto low = std::distance(lineStart_.begin(), it - 1);
16964ab3302SCarolineConcatto   return {*this, static_cast<int>(low + 1),
17064ab3302SCarolineConcatto       static_cast<int>(at - lineStart_[low] + 1)};
17164ab3302SCarolineConcatto }
1721f879005STim Keith } // namespace Fortran::parser
173