xref: /llvm-project/clang-tools-extra/clangd/support/Markup.cpp (revision d5953e3e3092f7142a07aa012fc9665ede09e53b)
1fa1f4cf8SSam McCall //===--- Markup.cpp -----------------------------------------*- C++-*------===//
2fa1f4cf8SSam McCall //
3fa1f4cf8SSam McCall // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4fa1f4cf8SSam McCall // See https://llvm.org/LICENSE.txt for license information.
5fa1f4cf8SSam McCall // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6fa1f4cf8SSam McCall //
7fa1f4cf8SSam McCall //===----------------------------------------------------------------------===//
8fa1f4cf8SSam McCall #include "support/Markup.h"
9fa1f4cf8SSam McCall #include "llvm/ADT/ArrayRef.h"
10fa1f4cf8SSam McCall #include "llvm/ADT/STLExtras.h"
11fa1f4cf8SSam McCall #include "llvm/ADT/SmallVector.h"
12fa1f4cf8SSam McCall #include "llvm/ADT/StringExtras.h"
13fa1f4cf8SSam McCall #include "llvm/ADT/StringRef.h"
14fa1f4cf8SSam McCall #include "llvm/Support/Compiler.h"
15fa1f4cf8SSam McCall #include "llvm/Support/raw_ostream.h"
16fa1f4cf8SSam McCall #include <cstddef>
17fa1f4cf8SSam McCall #include <iterator>
18fa1f4cf8SSam McCall #include <memory>
19fa1f4cf8SSam McCall #include <string>
20fa1f4cf8SSam McCall #include <vector>
21fa1f4cf8SSam McCall 
22fa1f4cf8SSam McCall namespace clang {
23fa1f4cf8SSam McCall namespace clangd {
24fa1f4cf8SSam McCall namespace markup {
25fa1f4cf8SSam McCall namespace {
26fa1f4cf8SSam McCall 
27fa1f4cf8SSam McCall // Is <contents a plausible start to an HTML tag?
28fa1f4cf8SSam McCall // Contents may not be the rest of the line, but it's the rest of the plain
29fa1f4cf8SSam McCall // text, so we expect to see at least the tag name.
looksLikeTag(llvm::StringRef Contents)30fa1f4cf8SSam McCall bool looksLikeTag(llvm::StringRef Contents) {
31fa1f4cf8SSam McCall   if (Contents.empty())
32fa1f4cf8SSam McCall     return false;
33fa1f4cf8SSam McCall   if (Contents.front() == '!' || Contents.front() == '?' ||
34fa1f4cf8SSam McCall       Contents.front() == '/')
35fa1f4cf8SSam McCall     return true;
36fa1f4cf8SSam McCall   // Check the start of the tag name.
37fa1f4cf8SSam McCall   if (!llvm::isAlpha(Contents.front()))
38fa1f4cf8SSam McCall     return false;
39fa1f4cf8SSam McCall   // Drop rest of the tag name, and following whitespace.
40fa1f4cf8SSam McCall   Contents = Contents
41fa1f4cf8SSam McCall                  .drop_while([](char C) {
42fa1f4cf8SSam McCall                    return llvm::isAlnum(C) || C == '-' || C == '_' || C == ':';
43fa1f4cf8SSam McCall                  })
44b283ae7aSSam McCall                  .drop_while(llvm::isSpace);
45fa1f4cf8SSam McCall   // The rest of the tag consists of attributes, which have restrictive names.
46fa1f4cf8SSam McCall   // If we hit '=', all bets are off (attribute values can contain anything).
47fa1f4cf8SSam McCall   for (; !Contents.empty(); Contents = Contents.drop_front()) {
48b283ae7aSSam McCall     if (llvm::isAlnum(Contents.front()) || llvm::isSpace(Contents.front()))
49fa1f4cf8SSam McCall       continue;
50*d5953e3eSKazu Hirata     if (Contents.front() == '>' || Contents.starts_with("/>"))
51fa1f4cf8SSam McCall       return true; // May close the tag.
52fa1f4cf8SSam McCall     if (Contents.front() == '=')
53fa1f4cf8SSam McCall       return true; // Don't try to parse attribute values.
54fa1f4cf8SSam McCall     return false;  // Random punctuation means this isn't a tag.
55fa1f4cf8SSam McCall   }
56fa1f4cf8SSam McCall   return true; // Potentially incomplete tag.
57fa1f4cf8SSam McCall }
58fa1f4cf8SSam McCall 
59fa1f4cf8SSam McCall // Tests whether C should be backslash-escaped in markdown.
60fa1f4cf8SSam McCall // The string being escaped is Before + C + After. This is part of a paragraph.
61fa1f4cf8SSam McCall // StartsLine indicates whether `Before` is the start of the line.
62fa1f4cf8SSam McCall // After may not be everything until the end of the line.
63fa1f4cf8SSam McCall //
64fa1f4cf8SSam McCall // It's always safe to escape punctuation, but want minimal escaping.
65fa1f4cf8SSam McCall // The strategy is to escape the first character of anything that might start
66fa1f4cf8SSam McCall // a markdown grammar construct.
needsLeadingEscape(char C,llvm::StringRef Before,llvm::StringRef After,bool StartsLine)67fa1f4cf8SSam McCall bool needsLeadingEscape(char C, llvm::StringRef Before, llvm::StringRef After,
68fa1f4cf8SSam McCall                         bool StartsLine) {
69b283ae7aSSam McCall   assert(Before.take_while(llvm::isSpace).empty());
70fa1f4cf8SSam McCall   auto RulerLength = [&]() -> /*Length*/ unsigned {
71fa1f4cf8SSam McCall     if (!StartsLine || !Before.empty())
72fa1f4cf8SSam McCall       return false;
73fa1f4cf8SSam McCall     llvm::StringRef A = After.rtrim();
74fa1f4cf8SSam McCall     return llvm::all_of(A, [C](char D) { return C == D; }) ? 1 + A.size() : 0;
75fa1f4cf8SSam McCall   };
76fa1f4cf8SSam McCall   auto IsBullet = [&]() {
77fa1f4cf8SSam McCall     return StartsLine && Before.empty() &&
78*d5953e3eSKazu Hirata            (After.empty() || After.starts_with(" "));
79fa1f4cf8SSam McCall   };
80fa1f4cf8SSam McCall   auto SpaceSurrounds = [&]() {
81b283ae7aSSam McCall     return (After.empty() || llvm::isSpace(After.front())) &&
82b283ae7aSSam McCall            (Before.empty() || llvm::isSpace(Before.back()));
83fa1f4cf8SSam McCall   };
84fa1f4cf8SSam McCall   auto WordSurrounds = [&]() {
85fa1f4cf8SSam McCall     return (!After.empty() && llvm::isAlnum(After.front())) &&
86fa1f4cf8SSam McCall            (!Before.empty() && llvm::isAlnum(Before.back()));
87fa1f4cf8SSam McCall   };
88fa1f4cf8SSam McCall 
89fa1f4cf8SSam McCall   switch (C) {
90fa1f4cf8SSam McCall   case '\\': // Escaped character.
91fa1f4cf8SSam McCall     return true;
92fa1f4cf8SSam McCall   case '`': // Code block or inline code
93fa1f4cf8SSam McCall     // Any number of backticks can delimit an inline code block that can end
94fa1f4cf8SSam McCall     // anywhere (including on another line). We must escape them all.
95fa1f4cf8SSam McCall     return true;
96fa1f4cf8SSam McCall   case '~': // Code block
97*d5953e3eSKazu Hirata     return StartsLine && Before.empty() && After.starts_with("~~");
98fa1f4cf8SSam McCall   case '#': { // ATX heading.
99fa1f4cf8SSam McCall     if (!StartsLine || !Before.empty())
100fa1f4cf8SSam McCall       return false;
101fa1f4cf8SSam McCall     llvm::StringRef Rest = After.ltrim(C);
102*d5953e3eSKazu Hirata     return Rest.empty() || Rest.starts_with(" ");
103fa1f4cf8SSam McCall   }
104fa1f4cf8SSam McCall   case ']': // Link or link reference.
105fa1f4cf8SSam McCall     // We escape ] rather than [ here, because it's more constrained:
106fa1f4cf8SSam McCall     //   ](...) is an in-line link
107fa1f4cf8SSam McCall     //   ]: is a link reference
108fa1f4cf8SSam McCall     // The following are only links if the link reference exists:
109fa1f4cf8SSam McCall     //   ] by itself is a shortcut link
110fa1f4cf8SSam McCall     //   ][...] is an out-of-line link
111fa1f4cf8SSam McCall     // Because we never emit link references, we don't need to handle these.
112*d5953e3eSKazu Hirata     return After.starts_with(":") || After.starts_with("(");
113fa1f4cf8SSam McCall   case '=': // Setex heading.
114fa1f4cf8SSam McCall     return RulerLength() > 0;
115fa1f4cf8SSam McCall   case '_': // Horizontal ruler or matched delimiter.
116fa1f4cf8SSam McCall     if (RulerLength() >= 3)
117fa1f4cf8SSam McCall       return true;
118fa1f4cf8SSam McCall     // Not a delimiter if surrounded by space, or inside a word.
119fa1f4cf8SSam McCall     // (The rules at word boundaries are subtle).
120fa1f4cf8SSam McCall     return !(SpaceSurrounds() || WordSurrounds());
121fa1f4cf8SSam McCall   case '-': // Setex heading, horizontal ruler, or bullet.
122fa1f4cf8SSam McCall     if (RulerLength() > 0)
123fa1f4cf8SSam McCall       return true;
124fa1f4cf8SSam McCall     return IsBullet();
125fa1f4cf8SSam McCall   case '+': // Bullet list.
126fa1f4cf8SSam McCall     return IsBullet();
127fa1f4cf8SSam McCall   case '*': // Bullet list, horizontal ruler, or delimiter.
128fa1f4cf8SSam McCall     return IsBullet() || RulerLength() >= 3 || !SpaceSurrounds();
129fa1f4cf8SSam McCall   case '<': // HTML tag (or autolink, which we choose not to escape)
130fa1f4cf8SSam McCall     return looksLikeTag(After);
131fa1f4cf8SSam McCall   case '>': // Quote marker. Needs escaping at start of line.
132fa1f4cf8SSam McCall     return StartsLine && Before.empty();
133fa1f4cf8SSam McCall   case '&': { // HTML entity reference
134fa1f4cf8SSam McCall     auto End = After.find(';');
135fa1f4cf8SSam McCall     if (End == llvm::StringRef::npos)
136fa1f4cf8SSam McCall       return false;
137fa1f4cf8SSam McCall     llvm::StringRef Content = After.substr(0, End);
138fa1f4cf8SSam McCall     if (Content.consume_front("#")) {
139fa1f4cf8SSam McCall       if (Content.consume_front("x") || Content.consume_front("X"))
140fa1f4cf8SSam McCall         return llvm::all_of(Content, llvm::isHexDigit);
141fa1f4cf8SSam McCall       return llvm::all_of(Content, llvm::isDigit);
142fa1f4cf8SSam McCall     }
143fa1f4cf8SSam McCall     return llvm::all_of(Content, llvm::isAlpha);
144fa1f4cf8SSam McCall   }
145fa1f4cf8SSam McCall   case '.': // Numbered list indicator. Escape 12. -> 12\. at start of line.
146fa1f4cf8SSam McCall   case ')':
147fa1f4cf8SSam McCall     return StartsLine && !Before.empty() &&
148*d5953e3eSKazu Hirata            llvm::all_of(Before, llvm::isDigit) && After.starts_with(" ");
149fa1f4cf8SSam McCall   default:
150fa1f4cf8SSam McCall     return false;
151fa1f4cf8SSam McCall   }
152fa1f4cf8SSam McCall }
153fa1f4cf8SSam McCall 
154fa1f4cf8SSam McCall /// Escape a markdown text block. Ensures the punctuation will not introduce
155fa1f4cf8SSam McCall /// any of the markdown constructs.
renderText(llvm::StringRef Input,bool StartsLine)156fa1f4cf8SSam McCall std::string renderText(llvm::StringRef Input, bool StartsLine) {
157fa1f4cf8SSam McCall   std::string R;
158fa1f4cf8SSam McCall   for (unsigned I = 0; I < Input.size(); ++I) {
159fa1f4cf8SSam McCall     if (needsLeadingEscape(Input[I], Input.substr(0, I), Input.substr(I + 1),
160fa1f4cf8SSam McCall                            StartsLine))
161fa1f4cf8SSam McCall       R.push_back('\\');
162fa1f4cf8SSam McCall     R.push_back(Input[I]);
163fa1f4cf8SSam McCall   }
164fa1f4cf8SSam McCall   return R;
165fa1f4cf8SSam McCall }
166fa1f4cf8SSam McCall 
167fa1f4cf8SSam McCall /// Renders \p Input as an inline block of code in markdown. The returned value
168fa1f4cf8SSam McCall /// is surrounded by backticks and the inner contents are properly escaped.
renderInlineBlock(llvm::StringRef Input)169fa1f4cf8SSam McCall std::string renderInlineBlock(llvm::StringRef Input) {
170fa1f4cf8SSam McCall   std::string R;
171fa1f4cf8SSam McCall   // Double all backticks to make sure we don't close the inline block early.
172fa1f4cf8SSam McCall   for (size_t From = 0; From < Input.size();) {
173fa1f4cf8SSam McCall     size_t Next = Input.find("`", From);
174fa1f4cf8SSam McCall     R += Input.substr(From, Next - From);
175fa1f4cf8SSam McCall     if (Next == llvm::StringRef::npos)
176fa1f4cf8SSam McCall       break;
177fa1f4cf8SSam McCall     R += "``"; // double the found backtick.
178fa1f4cf8SSam McCall 
179fa1f4cf8SSam McCall     From = Next + 1;
180fa1f4cf8SSam McCall   }
181fa1f4cf8SSam McCall   // If results starts with a backtick, add spaces on both sides. The spaces
182fa1f4cf8SSam McCall   // are ignored by markdown renderers.
183*d5953e3eSKazu Hirata   if (llvm::StringRef(R).starts_with("`") || llvm::StringRef(R).ends_with("`"))
184fa1f4cf8SSam McCall     return "` " + std::move(R) + " `";
185fa1f4cf8SSam McCall   // Markdown render should ignore first and last space if both are there. We
186fa1f4cf8SSam McCall   // add an extra pair of spaces in that case to make sure we render what the
187fa1f4cf8SSam McCall   // user intended.
188*d5953e3eSKazu Hirata   if (llvm::StringRef(R).starts_with(" ") && llvm::StringRef(R).ends_with(" "))
189fa1f4cf8SSam McCall     return "` " + std::move(R) + " `";
190fa1f4cf8SSam McCall   return "`" + std::move(R) + "`";
191fa1f4cf8SSam McCall }
192fa1f4cf8SSam McCall 
193fa1f4cf8SSam McCall /// Get marker required for \p Input to represent a markdown codeblock. It
194fa1f4cf8SSam McCall /// consists of at least 3 backticks(`). Although markdown also allows to use
195fa1f4cf8SSam McCall /// tilde(~) for code blocks, they are never used.
getMarkerForCodeBlock(llvm::StringRef Input)196fa1f4cf8SSam McCall std::string getMarkerForCodeBlock(llvm::StringRef Input) {
197fa1f4cf8SSam McCall   // Count the maximum number of consecutive backticks in \p Input. We need to
198fa1f4cf8SSam McCall   // start and end the code block with more.
199fa1f4cf8SSam McCall   unsigned MaxBackticks = 0;
200fa1f4cf8SSam McCall   unsigned Backticks = 0;
201fa1f4cf8SSam McCall   for (char C : Input) {
202fa1f4cf8SSam McCall     if (C == '`') {
203fa1f4cf8SSam McCall       ++Backticks;
204fa1f4cf8SSam McCall       continue;
205fa1f4cf8SSam McCall     }
206fa1f4cf8SSam McCall     MaxBackticks = std::max(MaxBackticks, Backticks);
207fa1f4cf8SSam McCall     Backticks = 0;
208fa1f4cf8SSam McCall   }
209fa1f4cf8SSam McCall   MaxBackticks = std::max(Backticks, MaxBackticks);
210fa1f4cf8SSam McCall   // Use the corresponding number of backticks to start and end a code block.
211fa1f4cf8SSam McCall   return std::string(/*Repeat=*/std::max(3u, MaxBackticks + 1), '`');
212fa1f4cf8SSam McCall }
213fa1f4cf8SSam McCall 
214fa1f4cf8SSam McCall // Trims the input and concatenates whitespace blocks into a single ` `.
canonicalizeSpaces(llvm::StringRef Input)215fa1f4cf8SSam McCall std::string canonicalizeSpaces(llvm::StringRef Input) {
216ee02e20cSKirill Bobyrev   llvm::SmallVector<llvm::StringRef> Words;
217fa1f4cf8SSam McCall   llvm::SplitString(Input, Words);
218fa1f4cf8SSam McCall   return llvm::join(Words, " ");
219fa1f4cf8SSam McCall }
220fa1f4cf8SSam McCall 
renderBlocks(llvm::ArrayRef<std::unique_ptr<Block>> Children,void (Block::* RenderFunc)(llvm::raw_ostream &)const)221fa1f4cf8SSam McCall std::string renderBlocks(llvm::ArrayRef<std::unique_ptr<Block>> Children,
222fa1f4cf8SSam McCall                          void (Block::*RenderFunc)(llvm::raw_ostream &) const) {
223fa1f4cf8SSam McCall   std::string R;
224fa1f4cf8SSam McCall   llvm::raw_string_ostream OS(R);
225fa1f4cf8SSam McCall 
226fa1f4cf8SSam McCall   // Trim rulers.
227fa1f4cf8SSam McCall   Children = Children.drop_while(
228fa1f4cf8SSam McCall       [](const std::unique_ptr<Block> &C) { return C->isRuler(); });
229fa1f4cf8SSam McCall   auto Last = llvm::find_if(
230fa1f4cf8SSam McCall       llvm::reverse(Children),
231fa1f4cf8SSam McCall       [](const std::unique_ptr<Block> &C) { return !C->isRuler(); });
232fa1f4cf8SSam McCall   Children = Children.drop_back(Children.end() - Last.base());
233fa1f4cf8SSam McCall 
234fa1f4cf8SSam McCall   bool LastBlockWasRuler = true;
235fa1f4cf8SSam McCall   for (const auto &C : Children) {
236fa1f4cf8SSam McCall     if (C->isRuler() && LastBlockWasRuler)
237fa1f4cf8SSam McCall       continue;
238fa1f4cf8SSam McCall     LastBlockWasRuler = C->isRuler();
239fa1f4cf8SSam McCall     ((*C).*RenderFunc)(OS);
240fa1f4cf8SSam McCall   }
241fa1f4cf8SSam McCall 
242fa1f4cf8SSam McCall   // Get rid of redundant empty lines introduced in plaintext while imitating
243fa1f4cf8SSam McCall   // padding in markdown.
244fa1f4cf8SSam McCall   std::string AdjustedResult;
245fa1f4cf8SSam McCall   llvm::StringRef TrimmedText(OS.str());
246fa1f4cf8SSam McCall   TrimmedText = TrimmedText.trim();
247fa1f4cf8SSam McCall 
248fa1f4cf8SSam McCall   llvm::copy_if(TrimmedText, std::back_inserter(AdjustedResult),
249fa1f4cf8SSam McCall                 [&TrimmedText](const char &C) {
250fa1f4cf8SSam McCall                   return !llvm::StringRef(TrimmedText.data(),
251fa1f4cf8SSam McCall                                           &C - TrimmedText.data() + 1)
252fa1f4cf8SSam McCall                               // We allow at most two newlines.
253*d5953e3eSKazu Hirata                               .ends_with("\n\n\n");
254fa1f4cf8SSam McCall                 });
255fa1f4cf8SSam McCall 
256fa1f4cf8SSam McCall   return AdjustedResult;
257fa1f4cf8SSam McCall }
258fa1f4cf8SSam McCall 
259fa1f4cf8SSam McCall // Separates two blocks with extra spacing. Note that it might render strangely
260fa1f4cf8SSam McCall // in vscode if the trailing block is a codeblock, see
261fa1f4cf8SSam McCall // https://github.com/microsoft/vscode/issues/88416 for details.
262fa1f4cf8SSam McCall class Ruler : public Block {
263fa1f4cf8SSam McCall public:
renderMarkdown(llvm::raw_ostream & OS) const264fa1f4cf8SSam McCall   void renderMarkdown(llvm::raw_ostream &OS) const override {
265fa1f4cf8SSam McCall     // Note that we need an extra new line before the ruler, otherwise we might
266fa1f4cf8SSam McCall     // make previous block a title instead of introducing a ruler.
267fa1f4cf8SSam McCall     OS << "\n---\n";
268fa1f4cf8SSam McCall   }
renderPlainText(llvm::raw_ostream & OS) const269fa1f4cf8SSam McCall   void renderPlainText(llvm::raw_ostream &OS) const override { OS << '\n'; }
clone() const270fa1f4cf8SSam McCall   std::unique_ptr<Block> clone() const override {
271fa1f4cf8SSam McCall     return std::make_unique<Ruler>(*this);
272fa1f4cf8SSam McCall   }
isRuler() const273fa1f4cf8SSam McCall   bool isRuler() const override { return true; }
274fa1f4cf8SSam McCall };
275fa1f4cf8SSam McCall 
276fa1f4cf8SSam McCall class CodeBlock : public Block {
277fa1f4cf8SSam McCall public:
renderMarkdown(llvm::raw_ostream & OS) const278fa1f4cf8SSam McCall   void renderMarkdown(llvm::raw_ostream &OS) const override {
279fa1f4cf8SSam McCall     std::string Marker = getMarkerForCodeBlock(Contents);
280fa1f4cf8SSam McCall     // No need to pad from previous blocks, as they should end with a new line.
281fa1f4cf8SSam McCall     OS << Marker << Language << '\n' << Contents << '\n' << Marker << '\n';
282fa1f4cf8SSam McCall   }
283fa1f4cf8SSam McCall 
renderPlainText(llvm::raw_ostream & OS) const284fa1f4cf8SSam McCall   void renderPlainText(llvm::raw_ostream &OS) const override {
285fa1f4cf8SSam McCall     // In plaintext we want one empty line before and after codeblocks.
286fa1f4cf8SSam McCall     OS << '\n' << Contents << "\n\n";
287fa1f4cf8SSam McCall   }
288fa1f4cf8SSam McCall 
clone() const289fa1f4cf8SSam McCall   std::unique_ptr<Block> clone() const override {
290fa1f4cf8SSam McCall     return std::make_unique<CodeBlock>(*this);
291fa1f4cf8SSam McCall   }
292fa1f4cf8SSam McCall 
CodeBlock(std::string Contents,std::string Language)293fa1f4cf8SSam McCall   CodeBlock(std::string Contents, std::string Language)
294fa1f4cf8SSam McCall       : Contents(std::move(Contents)), Language(std::move(Language)) {}
295fa1f4cf8SSam McCall 
296fa1f4cf8SSam McCall private:
297fa1f4cf8SSam McCall   std::string Contents;
298fa1f4cf8SSam McCall   std::string Language;
299fa1f4cf8SSam McCall };
300fa1f4cf8SSam McCall 
301fa1f4cf8SSam McCall // Inserts two spaces after each `\n` to indent each line. First line is not
302fa1f4cf8SSam McCall // indented.
indentLines(llvm::StringRef Input)303fa1f4cf8SSam McCall std::string indentLines(llvm::StringRef Input) {
304*d5953e3eSKazu Hirata   assert(!Input.ends_with("\n") && "Input should've been trimmed.");
305fa1f4cf8SSam McCall   std::string IndentedR;
306fa1f4cf8SSam McCall   // We'll add 2 spaces after each new line.
307fa1f4cf8SSam McCall   IndentedR.reserve(Input.size() + Input.count('\n') * 2);
308fa1f4cf8SSam McCall   for (char C : Input) {
309fa1f4cf8SSam McCall     IndentedR += C;
310fa1f4cf8SSam McCall     if (C == '\n')
311fa1f4cf8SSam McCall       IndentedR.append("  ");
312fa1f4cf8SSam McCall   }
313fa1f4cf8SSam McCall   return IndentedR;
314fa1f4cf8SSam McCall }
315fa1f4cf8SSam McCall 
316fa1f4cf8SSam McCall class Heading : public Paragraph {
317fa1f4cf8SSam McCall public:
Heading(size_t Level)318fa1f4cf8SSam McCall   Heading(size_t Level) : Level(Level) {}
renderMarkdown(llvm::raw_ostream & OS) const319fa1f4cf8SSam McCall   void renderMarkdown(llvm::raw_ostream &OS) const override {
320fa1f4cf8SSam McCall     OS << std::string(Level, '#') << ' ';
321fa1f4cf8SSam McCall     Paragraph::renderMarkdown(OS);
322fa1f4cf8SSam McCall   }
323fa1f4cf8SSam McCall 
324fa1f4cf8SSam McCall private:
325fa1f4cf8SSam McCall   size_t Level;
326fa1f4cf8SSam McCall };
327fa1f4cf8SSam McCall 
328fa1f4cf8SSam McCall } // namespace
329fa1f4cf8SSam McCall 
asMarkdown() const330fa1f4cf8SSam McCall std::string Block::asMarkdown() const {
331fa1f4cf8SSam McCall   std::string R;
332fa1f4cf8SSam McCall   llvm::raw_string_ostream OS(R);
333fa1f4cf8SSam McCall   renderMarkdown(OS);
334fa1f4cf8SSam McCall   return llvm::StringRef(OS.str()).trim().str();
335fa1f4cf8SSam McCall }
336fa1f4cf8SSam McCall 
asPlainText() const337fa1f4cf8SSam McCall std::string Block::asPlainText() const {
338fa1f4cf8SSam McCall   std::string R;
339fa1f4cf8SSam McCall   llvm::raw_string_ostream OS(R);
340fa1f4cf8SSam McCall   renderPlainText(OS);
341fa1f4cf8SSam McCall   return llvm::StringRef(OS.str()).trim().str();
342fa1f4cf8SSam McCall }
343fa1f4cf8SSam McCall 
renderMarkdown(llvm::raw_ostream & OS) const344fa1f4cf8SSam McCall void Paragraph::renderMarkdown(llvm::raw_ostream &OS) const {
345fa1f4cf8SSam McCall   bool NeedsSpace = false;
346fa1f4cf8SSam McCall   bool HasChunks = false;
347fa1f4cf8SSam McCall   for (auto &C : Chunks) {
348fa1f4cf8SSam McCall     if (C.SpaceBefore || NeedsSpace)
349fa1f4cf8SSam McCall       OS << " ";
350fa1f4cf8SSam McCall     switch (C.Kind) {
351fa1f4cf8SSam McCall     case Chunk::PlainText:
352fa1f4cf8SSam McCall       OS << renderText(C.Contents, !HasChunks);
353fa1f4cf8SSam McCall       break;
354fa1f4cf8SSam McCall     case Chunk::InlineCode:
355fa1f4cf8SSam McCall       OS << renderInlineBlock(C.Contents);
356fa1f4cf8SSam McCall       break;
357fa1f4cf8SSam McCall     }
358fa1f4cf8SSam McCall     HasChunks = true;
359fa1f4cf8SSam McCall     NeedsSpace = C.SpaceAfter;
360fa1f4cf8SSam McCall   }
361fa1f4cf8SSam McCall   // Paragraphs are translated into markdown lines, not markdown paragraphs.
362fa1f4cf8SSam McCall   // Therefore it only has a single linebreak afterwards.
363fa1f4cf8SSam McCall   // VSCode requires two spaces at the end of line to start a new one.
364fa1f4cf8SSam McCall   OS << "  \n";
365fa1f4cf8SSam McCall }
366fa1f4cf8SSam McCall 
clone() const367fa1f4cf8SSam McCall std::unique_ptr<Block> Paragraph::clone() const {
368fa1f4cf8SSam McCall   return std::make_unique<Paragraph>(*this);
369fa1f4cf8SSam McCall }
370fa1f4cf8SSam McCall 
371fa1f4cf8SSam McCall /// Choose a marker to delimit `Text` from a prioritized list of options.
372fa1f4cf8SSam McCall /// This is more readable than escaping for plain-text.
chooseMarker(llvm::ArrayRef<llvm::StringRef> Options,llvm::StringRef Text)373fa1f4cf8SSam McCall llvm::StringRef chooseMarker(llvm::ArrayRef<llvm::StringRef> Options,
374fa1f4cf8SSam McCall                              llvm::StringRef Text) {
375fa1f4cf8SSam McCall   // Prefer a delimiter whose characters don't appear in the text.
376fa1f4cf8SSam McCall   for (llvm::StringRef S : Options)
377fa1f4cf8SSam McCall     if (Text.find_first_of(S) == llvm::StringRef::npos)
378fa1f4cf8SSam McCall       return S;
379fa1f4cf8SSam McCall   return Options.front();
380fa1f4cf8SSam McCall }
381fa1f4cf8SSam McCall 
renderPlainText(llvm::raw_ostream & OS) const382fa1f4cf8SSam McCall void Paragraph::renderPlainText(llvm::raw_ostream &OS) const {
383fa1f4cf8SSam McCall   bool NeedsSpace = false;
384fa1f4cf8SSam McCall   for (auto &C : Chunks) {
385fa1f4cf8SSam McCall     if (C.SpaceBefore || NeedsSpace)
386fa1f4cf8SSam McCall       OS << " ";
387fa1f4cf8SSam McCall     llvm::StringRef Marker = "";
388fa1f4cf8SSam McCall     if (C.Preserve && C.Kind == Chunk::InlineCode)
389fa1f4cf8SSam McCall       Marker = chooseMarker({"`", "'", "\""}, C.Contents);
390fa1f4cf8SSam McCall     OS << Marker << C.Contents << Marker;
391fa1f4cf8SSam McCall     NeedsSpace = C.SpaceAfter;
392fa1f4cf8SSam McCall   }
393fa1f4cf8SSam McCall   OS << '\n';
394fa1f4cf8SSam McCall }
395fa1f4cf8SSam McCall 
3964c862da8SSam McCall BulletList::BulletList() = default;
3974c862da8SSam McCall BulletList::~BulletList() = default;
3984c862da8SSam McCall 
renderMarkdown(llvm::raw_ostream & OS) const399fa1f4cf8SSam McCall void BulletList::renderMarkdown(llvm::raw_ostream &OS) const {
400fa1f4cf8SSam McCall   for (auto &D : Items) {
401fa1f4cf8SSam McCall     // Instead of doing this we might prefer passing Indent to children to get
402fa1f4cf8SSam McCall     // rid of the copies, if it turns out to be a bottleneck.
403fa1f4cf8SSam McCall     OS << "- " << indentLines(D.asMarkdown()) << '\n';
404fa1f4cf8SSam McCall   }
405fa1f4cf8SSam McCall   // We need a new line after list to terminate it in markdown.
406fa1f4cf8SSam McCall   OS << '\n';
407fa1f4cf8SSam McCall }
408fa1f4cf8SSam McCall 
renderPlainText(llvm::raw_ostream & OS) const409fa1f4cf8SSam McCall void BulletList::renderPlainText(llvm::raw_ostream &OS) const {
410fa1f4cf8SSam McCall   for (auto &D : Items) {
411fa1f4cf8SSam McCall     // Instead of doing this we might prefer passing Indent to children to get
412fa1f4cf8SSam McCall     // rid of the copies, if it turns out to be a bottleneck.
413fa1f4cf8SSam McCall     OS << "- " << indentLines(D.asPlainText()) << '\n';
414fa1f4cf8SSam McCall   }
415fa1f4cf8SSam McCall }
416fa1f4cf8SSam McCall 
appendSpace()417fa1f4cf8SSam McCall Paragraph &Paragraph::appendSpace() {
418fa1f4cf8SSam McCall   if (!Chunks.empty())
419fa1f4cf8SSam McCall     Chunks.back().SpaceAfter = true;
420fa1f4cf8SSam McCall   return *this;
421fa1f4cf8SSam McCall }
422fa1f4cf8SSam McCall 
appendText(llvm::StringRef Text)423fa1f4cf8SSam McCall Paragraph &Paragraph::appendText(llvm::StringRef Text) {
424fa1f4cf8SSam McCall   std::string Norm = canonicalizeSpaces(Text);
425fa1f4cf8SSam McCall   if (Norm.empty())
426fa1f4cf8SSam McCall     return *this;
427fa1f4cf8SSam McCall   Chunks.emplace_back();
428fa1f4cf8SSam McCall   Chunk &C = Chunks.back();
429fa1f4cf8SSam McCall   C.Contents = std::move(Norm);
430fa1f4cf8SSam McCall   C.Kind = Chunk::PlainText;
431b283ae7aSSam McCall   C.SpaceBefore = llvm::isSpace(Text.front());
432b283ae7aSSam McCall   C.SpaceAfter = llvm::isSpace(Text.back());
433fa1f4cf8SSam McCall   return *this;
434fa1f4cf8SSam McCall }
435fa1f4cf8SSam McCall 
appendCode(llvm::StringRef Code,bool Preserve)436fa1f4cf8SSam McCall Paragraph &Paragraph::appendCode(llvm::StringRef Code, bool Preserve) {
437fa1f4cf8SSam McCall   bool AdjacentCode =
438fa1f4cf8SSam McCall       !Chunks.empty() && Chunks.back().Kind == Chunk::InlineCode;
439fa1f4cf8SSam McCall   std::string Norm = canonicalizeSpaces(std::move(Code));
440fa1f4cf8SSam McCall   if (Norm.empty())
441fa1f4cf8SSam McCall     return *this;
442fa1f4cf8SSam McCall   Chunks.emplace_back();
443fa1f4cf8SSam McCall   Chunk &C = Chunks.back();
444fa1f4cf8SSam McCall   C.Contents = std::move(Norm);
445fa1f4cf8SSam McCall   C.Kind = Chunk::InlineCode;
446fa1f4cf8SSam McCall   C.Preserve = Preserve;
447fa1f4cf8SSam McCall   // Disallow adjacent code spans without spaces, markdown can't render them.
448fa1f4cf8SSam McCall   C.SpaceBefore = AdjacentCode;
449fa1f4cf8SSam McCall   return *this;
450fa1f4cf8SSam McCall }
451fa1f4cf8SSam McCall 
clone() const452fa1f4cf8SSam McCall std::unique_ptr<Block> BulletList::clone() const {
453fa1f4cf8SSam McCall   return std::make_unique<BulletList>(*this);
454fa1f4cf8SSam McCall }
455fa1f4cf8SSam McCall 
addItem()456fa1f4cf8SSam McCall class Document &BulletList::addItem() {
457fa1f4cf8SSam McCall   Items.emplace_back();
458fa1f4cf8SSam McCall   return Items.back();
459fa1f4cf8SSam McCall }
460fa1f4cf8SSam McCall 
operator =(const Document & Other)461fa1f4cf8SSam McCall Document &Document::operator=(const Document &Other) {
462fa1f4cf8SSam McCall   Children.clear();
463fa1f4cf8SSam McCall   for (const auto &C : Other.Children)
464fa1f4cf8SSam McCall     Children.push_back(C->clone());
465fa1f4cf8SSam McCall   return *this;
466fa1f4cf8SSam McCall }
467fa1f4cf8SSam McCall 
append(Document Other)468fa1f4cf8SSam McCall void Document::append(Document Other) {
469fa1f4cf8SSam McCall   std::move(Other.Children.begin(), Other.Children.end(),
470fa1f4cf8SSam McCall             std::back_inserter(Children));
471fa1f4cf8SSam McCall }
472fa1f4cf8SSam McCall 
addParagraph()473fa1f4cf8SSam McCall Paragraph &Document::addParagraph() {
474fa1f4cf8SSam McCall   Children.push_back(std::make_unique<Paragraph>());
475fa1f4cf8SSam McCall   return *static_cast<Paragraph *>(Children.back().get());
476fa1f4cf8SSam McCall }
477fa1f4cf8SSam McCall 
addRuler()478fa1f4cf8SSam McCall void Document::addRuler() { Children.push_back(std::make_unique<Ruler>()); }
479fa1f4cf8SSam McCall 
addCodeBlock(std::string Code,std::string Language)480fa1f4cf8SSam McCall void Document::addCodeBlock(std::string Code, std::string Language) {
481fa1f4cf8SSam McCall   Children.emplace_back(
482fa1f4cf8SSam McCall       std::make_unique<CodeBlock>(std::move(Code), std::move(Language)));
483fa1f4cf8SSam McCall }
484fa1f4cf8SSam McCall 
asMarkdown() const485fa1f4cf8SSam McCall std::string Document::asMarkdown() const {
486fa1f4cf8SSam McCall   return renderBlocks(Children, &Block::renderMarkdown);
487fa1f4cf8SSam McCall }
488fa1f4cf8SSam McCall 
asPlainText() const489fa1f4cf8SSam McCall std::string Document::asPlainText() const {
490fa1f4cf8SSam McCall   return renderBlocks(Children, &Block::renderPlainText);
491fa1f4cf8SSam McCall }
492fa1f4cf8SSam McCall 
addBulletList()493fa1f4cf8SSam McCall BulletList &Document::addBulletList() {
494fa1f4cf8SSam McCall   Children.emplace_back(std::make_unique<BulletList>());
495fa1f4cf8SSam McCall   return *static_cast<BulletList *>(Children.back().get());
496fa1f4cf8SSam McCall }
497fa1f4cf8SSam McCall 
addHeading(size_t Level)498fa1f4cf8SSam McCall Paragraph &Document::addHeading(size_t Level) {
499fa1f4cf8SSam McCall   assert(Level > 0);
500fa1f4cf8SSam McCall   Children.emplace_back(std::make_unique<Heading>(Level));
501fa1f4cf8SSam McCall   return *static_cast<Paragraph *>(Children.back().get());
502fa1f4cf8SSam McCall }
503fa1f4cf8SSam McCall } // namespace markup
504fa1f4cf8SSam McCall } // namespace clangd
505fa1f4cf8SSam McCall } // namespace clang
506