xref: /llvm-project/llvm/lib/DebugInfo/Symbolize/Markup.cpp (revision 0060c54e0da6d1429875da2d30895faa7562b706)
1 //===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file defines the log symbolizer markup data model and parser.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/DebugInfo/Symbolize/Markup.h"
15 
16 #include "llvm/ADT/STLExtras.h"
17 
18 namespace llvm {
19 namespace symbolize {
20 
21 // Matches the following:
22 //   "\033[0m"
23 //   "\033[1m"
24 //   "\033[30m" -- "\033[37m"
25 static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m";
26 
27 MarkupParser::MarkupParser(StringSet<> MultilineTags)
28     : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {}
29 
30 static StringRef takeTo(StringRef Str, StringRef::iterator Pos) {
31   return Str.take_front(Pos - Str.begin());
32 }
33 static void advanceTo(StringRef &Str, StringRef::iterator Pos) {
34   Str = Str.drop_front(Pos - Str.begin());
35 }
36 
37 void MarkupParser::parseLine(StringRef Line) {
38   Buffer.clear();
39   NextIdx = 0;
40   FinishedMultiline.clear();
41   this->Line = Line;
42 }
43 
44 std::optional<MarkupNode> MarkupParser::nextNode() {
45   // Pull something out of the buffer if possible.
46   if (!Buffer.empty()) {
47     if (NextIdx < Buffer.size())
48       return std::move(Buffer[NextIdx++]);
49     NextIdx = 0;
50     Buffer.clear();
51   }
52 
53   // The buffer is empty, so parse the next bit of the line.
54 
55   if (Line.empty())
56     return std::nullopt;
57 
58   if (!InProgressMultiline.empty()) {
59     if (std::optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) {
60       llvm::append_range(InProgressMultiline, *MultilineEnd);
61       assert(FinishedMultiline.empty() &&
62              "At most one multi-line element can be finished at a time.");
63       FinishedMultiline.swap(InProgressMultiline);
64       // Parse the multi-line element as if it were contiguous.
65       advanceTo(Line, MultilineEnd->end());
66       return *parseElement(FinishedMultiline);
67     }
68 
69     // The whole line is part of the multi-line element.
70     llvm::append_range(InProgressMultiline, Line);
71     Line = Line.drop_front(Line.size());
72     return std::nullopt;
73   }
74 
75   // Find the first valid markup element, if any.
76   if (std::optional<MarkupNode> Element = parseElement(Line)) {
77     parseTextOutsideMarkup(takeTo(Line, Element->Text.begin()));
78     Buffer.push_back(std::move(*Element));
79     advanceTo(Line, Element->Text.end());
80     return nextNode();
81   }
82 
83   // Since there were no valid elements remaining, see if the line opens a
84   // multi-line element.
85   if (std::optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) {
86     // Emit any text before the element.
87     parseTextOutsideMarkup(takeTo(Line, MultilineBegin->begin()));
88 
89     // Begin recording the multi-line element.
90     llvm::append_range(InProgressMultiline, *MultilineBegin);
91     Line = Line.drop_front(Line.size());
92     return nextNode();
93   }
94 
95   // The line doesn't contain any more markup elements, so emit it as text.
96   parseTextOutsideMarkup(Line);
97   Line = Line.drop_front(Line.size());
98   return nextNode();
99 }
100 
101 void MarkupParser::flush() {
102   Buffer.clear();
103   NextIdx = 0;
104   Line = {};
105   if (InProgressMultiline.empty())
106     return;
107   FinishedMultiline.swap(InProgressMultiline);
108   parseTextOutsideMarkup(FinishedMultiline);
109 }
110 
111 // Finds and returns the next valid markup element in the given line. Returns
112 // std::nullopt if the line contains no valid elements.
113 std::optional<MarkupNode> MarkupParser::parseElement(StringRef Line) {
114   while (true) {
115     // Find next element using begin and end markers.
116     size_t BeginPos = Line.find("{{{");
117     if (BeginPos == StringRef::npos)
118       return std::nullopt;
119     size_t EndPos = Line.find("}}}", BeginPos + 3);
120     if (EndPos == StringRef::npos)
121       return std::nullopt;
122     EndPos += 3;
123     MarkupNode Element;
124     Element.Text = Line.slice(BeginPos, EndPos);
125     Line = Line.substr(EndPos);
126 
127     // Parse tag.
128     StringRef Content = Element.Text.drop_front(3).drop_back(3);
129     StringRef FieldsContent;
130     std::tie(Element.Tag, FieldsContent) = Content.split(':');
131     if (Element.Tag.empty())
132       continue;
133 
134     // Parse fields.
135     if (!FieldsContent.empty())
136       FieldsContent.split(Element.Fields, ":");
137     else if (Content.back() == ':')
138       Element.Fields.push_back(FieldsContent);
139 
140     return Element;
141   }
142 }
143 
144 static MarkupNode textNode(StringRef Text) {
145   MarkupNode Node;
146   Node.Text = Text;
147   return Node;
148 }
149 
150 // Parses a region of text known to be outside any markup elements. Such text
151 // may still contain SGR control codes, so the region is further subdivided into
152 // control codes and true text regions.
153 void MarkupParser::parseTextOutsideMarkup(StringRef Text) {
154   if (Text.empty())
155     return;
156   SmallVector<StringRef> Matches;
157   while (SGRSyntax.match(Text, &Matches)) {
158     // Emit any text before the SGR element.
159     if (Matches.begin()->begin() != Text.begin())
160       Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin())));
161 
162     Buffer.push_back(textNode(*Matches.begin()));
163     advanceTo(Text, Matches.begin()->end());
164   }
165   if (!Text.empty())
166     Buffer.push_back(textNode(Text));
167 }
168 
169 // Given that a line doesn't contain any valid markup, see if it ends with the
170 // start of a multi-line element. If so, returns the beginning.
171 std::optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) {
172   // A multi-line begin marker must be the last one on the line.
173   size_t BeginPos = Line.rfind("{{{");
174   if (BeginPos == StringRef::npos)
175     return std::nullopt;
176   size_t BeginTagPos = BeginPos + 3;
177 
178   // If there are any end markers afterwards, the begin marker cannot belong to
179   // a multi-line element.
180   size_t EndPos = Line.find("}}}", BeginTagPos);
181   if (EndPos != StringRef::npos)
182     return std::nullopt;
183 
184   // Check whether the tag is registered multi-line.
185   size_t EndTagPos = Line.find(':', BeginTagPos);
186   if (EndTagPos == StringRef::npos)
187     return std::nullopt;
188   StringRef Tag = Line.slice(BeginTagPos, EndTagPos);
189   if (!MultilineTags.contains(Tag))
190     return std::nullopt;
191   return Line.substr(BeginPos);
192 }
193 
194 // See if the line begins with the ending of an in-progress multi-line element.
195 // If so, return the ending.
196 std::optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) {
197   size_t EndPos = Line.find("}}}");
198   if (EndPos == StringRef::npos)
199     return std::nullopt;
200   return Line.take_front(EndPos + 3);
201 }
202 
203 } // end namespace symbolize
204 } // end namespace llvm
205