xref: /freebsd-src/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/Markup.cpp (revision 81ad626541db97eb356e2c1d4a20eb2a26a766ab)
1 //===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file defines the log symbolizer markup data model and parser.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/DebugInfo/Symbolize/Markup.h"
15 
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/StringExtras.h"
18 
19 namespace llvm {
20 namespace symbolize {
21 
22 // Matches the following:
23 //   "\033[0m"
24 //   "\033[1m"
25 //   "\033[30m" -- "\033[37m"
26 static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m";
27 
28 MarkupParser::MarkupParser(StringSet<> MultilineTags)
29     : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {}
30 
31 static StringRef takeTo(StringRef Str, StringRef::iterator Pos) {
32   return Str.take_front(Pos - Str.begin());
33 }
34 static void advanceTo(StringRef &Str, StringRef::iterator Pos) {
35   Str = Str.drop_front(Pos - Str.begin());
36 }
37 
38 void MarkupParser::parseLine(StringRef Line) {
39   Buffer.clear();
40   NextIdx = 0;
41   FinishedMultiline.clear();
42   this->Line = Line;
43 }
44 
45 Optional<MarkupNode> MarkupParser::nextNode() {
46   // Pull something out of the buffer if possible.
47   if (!Buffer.empty()) {
48     if (NextIdx < Buffer.size())
49       return std::move(Buffer[NextIdx++]);
50     NextIdx = 0;
51     Buffer.clear();
52   }
53 
54   // The buffer is empty, so parse the next bit of the line.
55 
56   if (Line.empty())
57     return None;
58 
59   if (!InProgressMultiline.empty()) {
60     if (Optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) {
61       llvm::append_range(InProgressMultiline, *MultilineEnd);
62       assert(FinishedMultiline.empty() &&
63              "At most one multi-line element can be finished at a time.");
64       FinishedMultiline.swap(InProgressMultiline);
65       // Parse the multi-line element as if it were contiguous.
66       advanceTo(Line, MultilineEnd->end());
67       return *parseElement(FinishedMultiline);
68     }
69 
70     // The whole line is part of the multi-line element.
71     llvm::append_range(InProgressMultiline, Line);
72     Line = Line.drop_front(Line.size());
73     return None;
74   }
75 
76   // Find the first valid markup element, if any.
77   if (Optional<MarkupNode> Element = parseElement(Line)) {
78     parseTextOutsideMarkup(takeTo(Line, Element->Text.begin()));
79     Buffer.push_back(std::move(*Element));
80     advanceTo(Line, Element->Text.end());
81     return nextNode();
82   }
83 
84   // Since there were no valid elements remaining, see if the line opens a
85   // multi-line element.
86   if (Optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) {
87     // Emit any text before the element.
88     parseTextOutsideMarkup(takeTo(Line, MultilineBegin->begin()));
89 
90     // Begin recording the multi-line element.
91     llvm::append_range(InProgressMultiline, *MultilineBegin);
92     Line = Line.drop_front(Line.size());
93     return nextNode();
94   }
95 
96   // The line doesn't contain any more markup elements, so emit it as text.
97   parseTextOutsideMarkup(Line);
98   Line = Line.drop_front(Line.size());
99   return nextNode();
100 }
101 
102 void MarkupParser::flush() {
103   if (InProgressMultiline.empty())
104     return;
105   FinishedMultiline.swap(InProgressMultiline);
106   parseTextOutsideMarkup(FinishedMultiline);
107 }
108 
109 // Finds and returns the next valid markup element in the given line. Returns
110 // None if the line contains no valid elements.
111 Optional<MarkupNode> MarkupParser::parseElement(StringRef Line) {
112   while (true) {
113     // Find next element using begin and end markers.
114     size_t BeginPos = Line.find("{{{");
115     if (BeginPos == StringRef::npos)
116       return None;
117     size_t EndPos = Line.find("}}}", BeginPos + 3);
118     if (EndPos == StringRef::npos)
119       return None;
120     EndPos += 3;
121     MarkupNode Element;
122     Element.Text = Line.slice(BeginPos, EndPos);
123     Line = Line.substr(EndPos);
124 
125     // Parse tag.
126     StringRef Content = Element.Text.drop_front(3).drop_back(3);
127     StringRef FieldsContent;
128     std::tie(Element.Tag, FieldsContent) = Content.split(':');
129     if (Element.Tag.empty())
130       continue;
131 
132     // Parse fields.
133     if (!FieldsContent.empty())
134       FieldsContent.split(Element.Fields, ":");
135     else if (Content.back() == ':')
136       Element.Fields.push_back(FieldsContent);
137 
138     return Element;
139   }
140 }
141 
142 static MarkupNode textNode(StringRef Text) {
143   MarkupNode Node;
144   Node.Text = Text;
145   return Node;
146 }
147 
148 // Parses a region of text known to be outside any markup elements. Such text
149 // may still contain SGR control codes, so the region is further subdivided into
150 // control codes and true text regions.
151 void MarkupParser::parseTextOutsideMarkup(StringRef Text) {
152   if (Text.empty())
153     return;
154   SmallVector<StringRef> Matches;
155   while (SGRSyntax.match(Text, &Matches)) {
156     // Emit any text before the SGR element.
157     if (Matches.begin()->begin() != Text.begin())
158       Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin())));
159 
160     Buffer.push_back(textNode(*Matches.begin()));
161     advanceTo(Text, Matches.begin()->end());
162   }
163   if (!Text.empty())
164     Buffer.push_back(textNode(Text));
165 }
166 
167 // Given that a line doesn't contain any valid markup, see if it ends with the
168 // start of a multi-line element. If so, returns the beginning.
169 Optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) {
170   // A multi-line begin marker must be the last one on the line.
171   size_t BeginPos = Line.rfind("{{{");
172   if (BeginPos == StringRef::npos)
173     return None;
174   size_t BeginTagPos = BeginPos + 3;
175 
176   // If there are any end markers afterwards, the begin marker cannot belong to
177   // a multi-line element.
178   size_t EndPos = Line.find("}}}", BeginTagPos);
179   if (EndPos != StringRef::npos)
180     return None;
181 
182   // Check whether the tag is registered multi-line.
183   size_t EndTagPos = Line.find(':', BeginTagPos);
184   if (EndTagPos == StringRef::npos)
185     return None;
186   StringRef Tag = Line.slice(BeginTagPos, EndTagPos);
187   if (!MultilineTags.contains(Tag))
188     return None;
189   return Line.substr(BeginPos);
190 }
191 
192 // See if the line begins with the ending of an in-progress multi-line element.
193 // If so, return the ending.
194 Optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) {
195   size_t EndPos = Line.find("}}}");
196   if (EndPos == StringRef::npos)
197     return None;
198   return Line.take_front(EndPos + 3);
199 }
200 
201 } // end namespace symbolize
202 } // end namespace llvm
203