xref: /freebsd-src/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/Markup.cpp (revision 81ad626541db97eb356e2c1d4a20eb2a26a766ab)
1*81ad6265SDimitry Andric //===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===//
2*81ad6265SDimitry Andric //
3*81ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*81ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*81ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*81ad6265SDimitry Andric //
7*81ad6265SDimitry Andric //===----------------------------------------------------------------------===//
8*81ad6265SDimitry Andric ///
9*81ad6265SDimitry Andric /// \file
10*81ad6265SDimitry Andric /// This file defines the log symbolizer markup data model and parser.
11*81ad6265SDimitry Andric ///
12*81ad6265SDimitry Andric //===----------------------------------------------------------------------===//
13*81ad6265SDimitry Andric 
14*81ad6265SDimitry Andric #include "llvm/DebugInfo/Symbolize/Markup.h"
15*81ad6265SDimitry Andric 
16*81ad6265SDimitry Andric #include "llvm/ADT/STLExtras.h"
17*81ad6265SDimitry Andric #include "llvm/ADT/StringExtras.h"
18*81ad6265SDimitry Andric 
19*81ad6265SDimitry Andric namespace llvm {
20*81ad6265SDimitry Andric namespace symbolize {
21*81ad6265SDimitry Andric 
22*81ad6265SDimitry Andric // Matches the following:
23*81ad6265SDimitry Andric //   "\033[0m"
24*81ad6265SDimitry Andric //   "\033[1m"
25*81ad6265SDimitry Andric //   "\033[30m" -- "\033[37m"
26*81ad6265SDimitry Andric static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m";
27*81ad6265SDimitry Andric 
28*81ad6265SDimitry Andric MarkupParser::MarkupParser(StringSet<> MultilineTags)
29*81ad6265SDimitry Andric     : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {}
30*81ad6265SDimitry Andric 
31*81ad6265SDimitry Andric static StringRef takeTo(StringRef Str, StringRef::iterator Pos) {
32*81ad6265SDimitry Andric   return Str.take_front(Pos - Str.begin());
33*81ad6265SDimitry Andric }
34*81ad6265SDimitry Andric static void advanceTo(StringRef &Str, StringRef::iterator Pos) {
35*81ad6265SDimitry Andric   Str = Str.drop_front(Pos - Str.begin());
36*81ad6265SDimitry Andric }
37*81ad6265SDimitry Andric 
38*81ad6265SDimitry Andric void MarkupParser::parseLine(StringRef Line) {
39*81ad6265SDimitry Andric   Buffer.clear();
40*81ad6265SDimitry Andric   NextIdx = 0;
41*81ad6265SDimitry Andric   FinishedMultiline.clear();
42*81ad6265SDimitry Andric   this->Line = Line;
43*81ad6265SDimitry Andric }
44*81ad6265SDimitry Andric 
45*81ad6265SDimitry Andric Optional<MarkupNode> MarkupParser::nextNode() {
46*81ad6265SDimitry Andric   // Pull something out of the buffer if possible.
47*81ad6265SDimitry Andric   if (!Buffer.empty()) {
48*81ad6265SDimitry Andric     if (NextIdx < Buffer.size())
49*81ad6265SDimitry Andric       return std::move(Buffer[NextIdx++]);
50*81ad6265SDimitry Andric     NextIdx = 0;
51*81ad6265SDimitry Andric     Buffer.clear();
52*81ad6265SDimitry Andric   }
53*81ad6265SDimitry Andric 
54*81ad6265SDimitry Andric   // The buffer is empty, so parse the next bit of the line.
55*81ad6265SDimitry Andric 
56*81ad6265SDimitry Andric   if (Line.empty())
57*81ad6265SDimitry Andric     return None;
58*81ad6265SDimitry Andric 
59*81ad6265SDimitry Andric   if (!InProgressMultiline.empty()) {
60*81ad6265SDimitry Andric     if (Optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) {
61*81ad6265SDimitry Andric       llvm::append_range(InProgressMultiline, *MultilineEnd);
62*81ad6265SDimitry Andric       assert(FinishedMultiline.empty() &&
63*81ad6265SDimitry Andric              "At most one multi-line element can be finished at a time.");
64*81ad6265SDimitry Andric       FinishedMultiline.swap(InProgressMultiline);
65*81ad6265SDimitry Andric       // Parse the multi-line element as if it were contiguous.
66*81ad6265SDimitry Andric       advanceTo(Line, MultilineEnd->end());
67*81ad6265SDimitry Andric       return *parseElement(FinishedMultiline);
68*81ad6265SDimitry Andric     }
69*81ad6265SDimitry Andric 
70*81ad6265SDimitry Andric     // The whole line is part of the multi-line element.
71*81ad6265SDimitry Andric     llvm::append_range(InProgressMultiline, Line);
72*81ad6265SDimitry Andric     Line = Line.drop_front(Line.size());
73*81ad6265SDimitry Andric     return None;
74*81ad6265SDimitry Andric   }
75*81ad6265SDimitry Andric 
76*81ad6265SDimitry Andric   // Find the first valid markup element, if any.
77*81ad6265SDimitry Andric   if (Optional<MarkupNode> Element = parseElement(Line)) {
78*81ad6265SDimitry Andric     parseTextOutsideMarkup(takeTo(Line, Element->Text.begin()));
79*81ad6265SDimitry Andric     Buffer.push_back(std::move(*Element));
80*81ad6265SDimitry Andric     advanceTo(Line, Element->Text.end());
81*81ad6265SDimitry Andric     return nextNode();
82*81ad6265SDimitry Andric   }
83*81ad6265SDimitry Andric 
84*81ad6265SDimitry Andric   // Since there were no valid elements remaining, see if the line opens a
85*81ad6265SDimitry Andric   // multi-line element.
86*81ad6265SDimitry Andric   if (Optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) {
87*81ad6265SDimitry Andric     // Emit any text before the element.
88*81ad6265SDimitry Andric     parseTextOutsideMarkup(takeTo(Line, MultilineBegin->begin()));
89*81ad6265SDimitry Andric 
90*81ad6265SDimitry Andric     // Begin recording the multi-line element.
91*81ad6265SDimitry Andric     llvm::append_range(InProgressMultiline, *MultilineBegin);
92*81ad6265SDimitry Andric     Line = Line.drop_front(Line.size());
93*81ad6265SDimitry Andric     return nextNode();
94*81ad6265SDimitry Andric   }
95*81ad6265SDimitry Andric 
96*81ad6265SDimitry Andric   // The line doesn't contain any more markup elements, so emit it as text.
97*81ad6265SDimitry Andric   parseTextOutsideMarkup(Line);
98*81ad6265SDimitry Andric   Line = Line.drop_front(Line.size());
99*81ad6265SDimitry Andric   return nextNode();
100*81ad6265SDimitry Andric }
101*81ad6265SDimitry Andric 
102*81ad6265SDimitry Andric void MarkupParser::flush() {
103*81ad6265SDimitry Andric   if (InProgressMultiline.empty())
104*81ad6265SDimitry Andric     return;
105*81ad6265SDimitry Andric   FinishedMultiline.swap(InProgressMultiline);
106*81ad6265SDimitry Andric   parseTextOutsideMarkup(FinishedMultiline);
107*81ad6265SDimitry Andric }
108*81ad6265SDimitry Andric 
109*81ad6265SDimitry Andric // Finds and returns the next valid markup element in the given line. Returns
110*81ad6265SDimitry Andric // None if the line contains no valid elements.
111*81ad6265SDimitry Andric Optional<MarkupNode> MarkupParser::parseElement(StringRef Line) {
112*81ad6265SDimitry Andric   while (true) {
113*81ad6265SDimitry Andric     // Find next element using begin and end markers.
114*81ad6265SDimitry Andric     size_t BeginPos = Line.find("{{{");
115*81ad6265SDimitry Andric     if (BeginPos == StringRef::npos)
116*81ad6265SDimitry Andric       return None;
117*81ad6265SDimitry Andric     size_t EndPos = Line.find("}}}", BeginPos + 3);
118*81ad6265SDimitry Andric     if (EndPos == StringRef::npos)
119*81ad6265SDimitry Andric       return None;
120*81ad6265SDimitry Andric     EndPos += 3;
121*81ad6265SDimitry Andric     MarkupNode Element;
122*81ad6265SDimitry Andric     Element.Text = Line.slice(BeginPos, EndPos);
123*81ad6265SDimitry Andric     Line = Line.substr(EndPos);
124*81ad6265SDimitry Andric 
125*81ad6265SDimitry Andric     // Parse tag.
126*81ad6265SDimitry Andric     StringRef Content = Element.Text.drop_front(3).drop_back(3);
127*81ad6265SDimitry Andric     StringRef FieldsContent;
128*81ad6265SDimitry Andric     std::tie(Element.Tag, FieldsContent) = Content.split(':');
129*81ad6265SDimitry Andric     if (Element.Tag.empty())
130*81ad6265SDimitry Andric       continue;
131*81ad6265SDimitry Andric 
132*81ad6265SDimitry Andric     // Parse fields.
133*81ad6265SDimitry Andric     if (!FieldsContent.empty())
134*81ad6265SDimitry Andric       FieldsContent.split(Element.Fields, ":");
135*81ad6265SDimitry Andric     else if (Content.back() == ':')
136*81ad6265SDimitry Andric       Element.Fields.push_back(FieldsContent);
137*81ad6265SDimitry Andric 
138*81ad6265SDimitry Andric     return Element;
139*81ad6265SDimitry Andric   }
140*81ad6265SDimitry Andric }
141*81ad6265SDimitry Andric 
142*81ad6265SDimitry Andric static MarkupNode textNode(StringRef Text) {
143*81ad6265SDimitry Andric   MarkupNode Node;
144*81ad6265SDimitry Andric   Node.Text = Text;
145*81ad6265SDimitry Andric   return Node;
146*81ad6265SDimitry Andric }
147*81ad6265SDimitry Andric 
148*81ad6265SDimitry Andric // Parses a region of text known to be outside any markup elements. Such text
149*81ad6265SDimitry Andric // may still contain SGR control codes, so the region is further subdivided into
150*81ad6265SDimitry Andric // control codes and true text regions.
151*81ad6265SDimitry Andric void MarkupParser::parseTextOutsideMarkup(StringRef Text) {
152*81ad6265SDimitry Andric   if (Text.empty())
153*81ad6265SDimitry Andric     return;
154*81ad6265SDimitry Andric   SmallVector<StringRef> Matches;
155*81ad6265SDimitry Andric   while (SGRSyntax.match(Text, &Matches)) {
156*81ad6265SDimitry Andric     // Emit any text before the SGR element.
157*81ad6265SDimitry Andric     if (Matches.begin()->begin() != Text.begin())
158*81ad6265SDimitry Andric       Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin())));
159*81ad6265SDimitry Andric 
160*81ad6265SDimitry Andric     Buffer.push_back(textNode(*Matches.begin()));
161*81ad6265SDimitry Andric     advanceTo(Text, Matches.begin()->end());
162*81ad6265SDimitry Andric   }
163*81ad6265SDimitry Andric   if (!Text.empty())
164*81ad6265SDimitry Andric     Buffer.push_back(textNode(Text));
165*81ad6265SDimitry Andric }
166*81ad6265SDimitry Andric 
167*81ad6265SDimitry Andric // Given that a line doesn't contain any valid markup, see if it ends with the
168*81ad6265SDimitry Andric // start of a multi-line element. If so, returns the beginning.
169*81ad6265SDimitry Andric Optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) {
170*81ad6265SDimitry Andric   // A multi-line begin marker must be the last one on the line.
171*81ad6265SDimitry Andric   size_t BeginPos = Line.rfind("{{{");
172*81ad6265SDimitry Andric   if (BeginPos == StringRef::npos)
173*81ad6265SDimitry Andric     return None;
174*81ad6265SDimitry Andric   size_t BeginTagPos = BeginPos + 3;
175*81ad6265SDimitry Andric 
176*81ad6265SDimitry Andric   // If there are any end markers afterwards, the begin marker cannot belong to
177*81ad6265SDimitry Andric   // a multi-line element.
178*81ad6265SDimitry Andric   size_t EndPos = Line.find("}}}", BeginTagPos);
179*81ad6265SDimitry Andric   if (EndPos != StringRef::npos)
180*81ad6265SDimitry Andric     return None;
181*81ad6265SDimitry Andric 
182*81ad6265SDimitry Andric   // Check whether the tag is registered multi-line.
183*81ad6265SDimitry Andric   size_t EndTagPos = Line.find(':', BeginTagPos);
184*81ad6265SDimitry Andric   if (EndTagPos == StringRef::npos)
185*81ad6265SDimitry Andric     return None;
186*81ad6265SDimitry Andric   StringRef Tag = Line.slice(BeginTagPos, EndTagPos);
187*81ad6265SDimitry Andric   if (!MultilineTags.contains(Tag))
188*81ad6265SDimitry Andric     return None;
189*81ad6265SDimitry Andric   return Line.substr(BeginPos);
190*81ad6265SDimitry Andric }
191*81ad6265SDimitry Andric 
192*81ad6265SDimitry Andric // See if the line begins with the ending of an in-progress multi-line element.
193*81ad6265SDimitry Andric // If so, return the ending.
194*81ad6265SDimitry Andric Optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) {
195*81ad6265SDimitry Andric   size_t EndPos = Line.find("}}}");
196*81ad6265SDimitry Andric   if (EndPos == StringRef::npos)
197*81ad6265SDimitry Andric     return None;
198*81ad6265SDimitry Andric   return Line.take_front(EndPos + 3);
199*81ad6265SDimitry Andric }
200*81ad6265SDimitry Andric 
201*81ad6265SDimitry Andric } // end namespace symbolize
202*81ad6265SDimitry Andric } // end namespace llvm
203