xref: /llvm-project/mlir/lib/Tools/lsp-server-support/SourceMgrUtils.cpp (revision abaa79b25dde740d5b54adab463432bee2840c85)
1 //===--- SourceMgrUtils.cpp - SourceMgr LSP Utils -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "mlir/Tools/lsp-server-support/SourceMgrUtils.h"
10 #include "llvm/ADT/StringExtras.h"
11 #include "llvm/Support/Path.h"
12 #include <optional>
13 
14 using namespace mlir;
15 using namespace mlir::lsp;
16 
17 //===----------------------------------------------------------------------===//
18 // Utils
19 //===----------------------------------------------------------------------===//
20 
21 /// Find the end of a string whose contents start at the given `curPtr`. Returns
22 /// the position at the end of the string, after a terminal or invalid character
23 /// (e.g. `"` or `\0`).
lexLocStringTok(const char * curPtr)24 static const char *lexLocStringTok(const char *curPtr) {
25   while (char c = *curPtr++) {
26     // Check for various terminal characters.
27     if (StringRef("\"\n\v\f").contains(c))
28       return curPtr;
29 
30     // Check for escape sequences.
31     if (c == '\\') {
32       // Check a few known escapes and \xx hex digits.
33       if (*curPtr == '"' || *curPtr == '\\' || *curPtr == 'n' || *curPtr == 't')
34         ++curPtr;
35       else if (llvm::isHexDigit(*curPtr) && llvm::isHexDigit(curPtr[1]))
36         curPtr += 2;
37       else
38         return curPtr;
39     }
40   }
41 
42   // If we hit this point, we've reached the end of the buffer. Update the end
43   // pointer to not point past the buffer.
44   return curPtr - 1;
45 }
46 
convertTokenLocToRange(SMLoc loc,StringRef identifierChars)47 SMRange lsp::convertTokenLocToRange(SMLoc loc, StringRef identifierChars) {
48   if (!loc.isValid())
49     return SMRange();
50   const char *curPtr = loc.getPointer();
51 
52   // Check if this is a string token.
53   if (*curPtr == '"') {
54     curPtr = lexLocStringTok(curPtr + 1);
55 
56     // Otherwise, default to handling an identifier.
57   } else {
58     // Return if the given character is a valid identifier character.
59     auto isIdentifierChar = [=](char c) {
60       return isalnum(c) || c == '_' || identifierChars.contains(c);
61     };
62 
63     while (*curPtr && isIdentifierChar(*(++curPtr)))
64       continue;
65   }
66 
67   return SMRange(loc, SMLoc::getFromPointer(curPtr));
68 }
69 
70 std::optional<std::string>
extractSourceDocComment(llvm::SourceMgr & sourceMgr,SMLoc loc)71 lsp::extractSourceDocComment(llvm::SourceMgr &sourceMgr, SMLoc loc) {
72   // This is a heuristic, and isn't intended to cover every case, but should
73   // cover the most common. We essentially look for a comment preceding the
74   // line, and if we find one, use that as the documentation.
75   if (!loc.isValid())
76     return std::nullopt;
77   int bufferId = sourceMgr.FindBufferContainingLoc(loc);
78   if (bufferId == 0)
79     return std::nullopt;
80   const char *bufferStart =
81       sourceMgr.getMemoryBuffer(bufferId)->getBufferStart();
82   StringRef buffer(bufferStart, loc.getPointer() - bufferStart);
83 
84   // Pop the last line from the buffer string.
85   auto popLastLine = [&]() -> std::optional<StringRef> {
86     size_t newlineOffset = buffer.find_last_of('\n');
87     if (newlineOffset == StringRef::npos)
88       return std::nullopt;
89     StringRef lastLine = buffer.drop_front(newlineOffset).trim();
90     buffer = buffer.take_front(newlineOffset);
91     return lastLine;
92   };
93 
94   // Try to pop the current line.
95   if (!popLastLine())
96     return std::nullopt;
97 
98   // Try to parse a comment string from the source file.
99   SmallVector<StringRef> commentLines;
100   while (std::optional<StringRef> line = popLastLine()) {
101     // Check for a comment at the beginning of the line.
102     if (!line->starts_with("//"))
103       break;
104 
105     // Extract the document string from the comment.
106     commentLines.push_back(line->ltrim('/'));
107   }
108 
109   if (commentLines.empty())
110     return std::nullopt;
111   return llvm::join(llvm::reverse(commentLines), "\n");
112 }
113 
contains(SMRange range,SMLoc loc)114 bool lsp::contains(SMRange range, SMLoc loc) {
115   return range.Start.getPointer() <= loc.getPointer() &&
116          loc.getPointer() < range.End.getPointer();
117 }
118 
119 //===----------------------------------------------------------------------===//
120 // SourceMgrInclude
121 //===----------------------------------------------------------------------===//
122 
buildHover() const123 Hover SourceMgrInclude::buildHover() const {
124   Hover hover(range);
125   {
126     llvm::raw_string_ostream hoverOS(hover.contents.value);
127     hoverOS << "`" << llvm::sys::path::filename(uri.file()) << "`\n***\n"
128             << uri.file();
129   }
130   return hover;
131 }
132 
gatherIncludeFiles(llvm::SourceMgr & sourceMgr,SmallVectorImpl<SourceMgrInclude> & includes)133 void lsp::gatherIncludeFiles(llvm::SourceMgr &sourceMgr,
134                              SmallVectorImpl<SourceMgrInclude> &includes) {
135   for (unsigned i = 1, e = sourceMgr.getNumBuffers(); i < e; ++i) {
136     // Check to see if this file was included by the main file.
137     SMLoc includeLoc = sourceMgr.getBufferInfo(i + 1).IncludeLoc;
138     if (!includeLoc.isValid() || sourceMgr.FindBufferContainingLoc(
139                                      includeLoc) != sourceMgr.getMainFileID())
140       continue;
141 
142     // Try to build a URI for this file path.
143     auto *buffer = sourceMgr.getMemoryBuffer(i + 1);
144     llvm::SmallString<256> path(buffer->getBufferIdentifier());
145     llvm::sys::path::remove_dots(path, /*remove_dot_dot=*/true);
146 
147     llvm::Expected<URIForFile> includedFileURI = URIForFile::fromFile(path);
148     if (!includedFileURI)
149       continue;
150 
151     // Find the end of the include token.
152     const char *includeStart = includeLoc.getPointer() - 2;
153     while (*(--includeStart) != '\"')
154       continue;
155 
156     // Push this include.
157     SMRange includeRange(SMLoc::getFromPointer(includeStart), includeLoc);
158     includes.emplace_back(*includedFileURI, Range(sourceMgr, includeRange));
159   }
160 }
161