xref: /freebsd-src/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/Markup.cpp (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
181ad6265SDimitry Andric //===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===//
281ad6265SDimitry Andric //
381ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
481ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
581ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
681ad6265SDimitry Andric //
781ad6265SDimitry Andric //===----------------------------------------------------------------------===//
881ad6265SDimitry Andric ///
981ad6265SDimitry Andric /// \file
1081ad6265SDimitry Andric /// This file defines the log symbolizer markup data model and parser.
1181ad6265SDimitry Andric ///
1281ad6265SDimitry Andric //===----------------------------------------------------------------------===//
1381ad6265SDimitry Andric 
1481ad6265SDimitry Andric #include "llvm/DebugInfo/Symbolize/Markup.h"
1581ad6265SDimitry Andric 
1681ad6265SDimitry Andric #include "llvm/ADT/STLExtras.h"
1781ad6265SDimitry Andric #include "llvm/ADT/StringExtras.h"
1881ad6265SDimitry Andric 
1981ad6265SDimitry Andric namespace llvm {
2081ad6265SDimitry Andric namespace symbolize {
2181ad6265SDimitry Andric 
2281ad6265SDimitry Andric // Matches the following:
2381ad6265SDimitry Andric //   "\033[0m"
2481ad6265SDimitry Andric //   "\033[1m"
2581ad6265SDimitry Andric //   "\033[30m" -- "\033[37m"
2681ad6265SDimitry Andric static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m";
2781ad6265SDimitry Andric 
MarkupParser(StringSet<> MultilineTags)2881ad6265SDimitry Andric MarkupParser::MarkupParser(StringSet<> MultilineTags)
2981ad6265SDimitry Andric     : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {}
3081ad6265SDimitry Andric 
takeTo(StringRef Str,StringRef::iterator Pos)3181ad6265SDimitry Andric static StringRef takeTo(StringRef Str, StringRef::iterator Pos) {
3281ad6265SDimitry Andric   return Str.take_front(Pos - Str.begin());
3381ad6265SDimitry Andric }
advanceTo(StringRef & Str,StringRef::iterator Pos)3481ad6265SDimitry Andric static void advanceTo(StringRef &Str, StringRef::iterator Pos) {
3581ad6265SDimitry Andric   Str = Str.drop_front(Pos - Str.begin());
3681ad6265SDimitry Andric }
3781ad6265SDimitry Andric 
parseLine(StringRef Line)3881ad6265SDimitry Andric void MarkupParser::parseLine(StringRef Line) {
3981ad6265SDimitry Andric   Buffer.clear();
4081ad6265SDimitry Andric   NextIdx = 0;
4181ad6265SDimitry Andric   FinishedMultiline.clear();
4281ad6265SDimitry Andric   this->Line = Line;
4381ad6265SDimitry Andric }
4481ad6265SDimitry Andric 
nextNode()45*bdd1243dSDimitry Andric std::optional<MarkupNode> MarkupParser::nextNode() {
4681ad6265SDimitry Andric   // Pull something out of the buffer if possible.
4781ad6265SDimitry Andric   if (!Buffer.empty()) {
4881ad6265SDimitry Andric     if (NextIdx < Buffer.size())
4981ad6265SDimitry Andric       return std::move(Buffer[NextIdx++]);
5081ad6265SDimitry Andric     NextIdx = 0;
5181ad6265SDimitry Andric     Buffer.clear();
5281ad6265SDimitry Andric   }
5381ad6265SDimitry Andric 
5481ad6265SDimitry Andric   // The buffer is empty, so parse the next bit of the line.
5581ad6265SDimitry Andric 
5681ad6265SDimitry Andric   if (Line.empty())
57*bdd1243dSDimitry Andric     return std::nullopt;
5881ad6265SDimitry Andric 
5981ad6265SDimitry Andric   if (!InProgressMultiline.empty()) {
60*bdd1243dSDimitry Andric     if (std::optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) {
6181ad6265SDimitry Andric       llvm::append_range(InProgressMultiline, *MultilineEnd);
6281ad6265SDimitry Andric       assert(FinishedMultiline.empty() &&
6381ad6265SDimitry Andric              "At most one multi-line element can be finished at a time.");
6481ad6265SDimitry Andric       FinishedMultiline.swap(InProgressMultiline);
6581ad6265SDimitry Andric       // Parse the multi-line element as if it were contiguous.
6681ad6265SDimitry Andric       advanceTo(Line, MultilineEnd->end());
6781ad6265SDimitry Andric       return *parseElement(FinishedMultiline);
6881ad6265SDimitry Andric     }
6981ad6265SDimitry Andric 
7081ad6265SDimitry Andric     // The whole line is part of the multi-line element.
7181ad6265SDimitry Andric     llvm::append_range(InProgressMultiline, Line);
7281ad6265SDimitry Andric     Line = Line.drop_front(Line.size());
73*bdd1243dSDimitry Andric     return std::nullopt;
7481ad6265SDimitry Andric   }
7581ad6265SDimitry Andric 
7681ad6265SDimitry Andric   // Find the first valid markup element, if any.
77*bdd1243dSDimitry Andric   if (std::optional<MarkupNode> Element = parseElement(Line)) {
7881ad6265SDimitry Andric     parseTextOutsideMarkup(takeTo(Line, Element->Text.begin()));
7981ad6265SDimitry Andric     Buffer.push_back(std::move(*Element));
8081ad6265SDimitry Andric     advanceTo(Line, Element->Text.end());
8181ad6265SDimitry Andric     return nextNode();
8281ad6265SDimitry Andric   }
8381ad6265SDimitry Andric 
8481ad6265SDimitry Andric   // Since there were no valid elements remaining, see if the line opens a
8581ad6265SDimitry Andric   // multi-line element.
86*bdd1243dSDimitry Andric   if (std::optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) {
8781ad6265SDimitry Andric     // Emit any text before the element.
8881ad6265SDimitry Andric     parseTextOutsideMarkup(takeTo(Line, MultilineBegin->begin()));
8981ad6265SDimitry Andric 
9081ad6265SDimitry Andric     // Begin recording the multi-line element.
9181ad6265SDimitry Andric     llvm::append_range(InProgressMultiline, *MultilineBegin);
9281ad6265SDimitry Andric     Line = Line.drop_front(Line.size());
9381ad6265SDimitry Andric     return nextNode();
9481ad6265SDimitry Andric   }
9581ad6265SDimitry Andric 
9681ad6265SDimitry Andric   // The line doesn't contain any more markup elements, so emit it as text.
9781ad6265SDimitry Andric   parseTextOutsideMarkup(Line);
9881ad6265SDimitry Andric   Line = Line.drop_front(Line.size());
9981ad6265SDimitry Andric   return nextNode();
10081ad6265SDimitry Andric }
10181ad6265SDimitry Andric 
flush()10281ad6265SDimitry Andric void MarkupParser::flush() {
103fcaf7f86SDimitry Andric   Buffer.clear();
104fcaf7f86SDimitry Andric   NextIdx = 0;
105fcaf7f86SDimitry Andric   Line = {};
10681ad6265SDimitry Andric   if (InProgressMultiline.empty())
10781ad6265SDimitry Andric     return;
10881ad6265SDimitry Andric   FinishedMultiline.swap(InProgressMultiline);
10981ad6265SDimitry Andric   parseTextOutsideMarkup(FinishedMultiline);
11081ad6265SDimitry Andric }
11181ad6265SDimitry Andric 
11281ad6265SDimitry Andric // Finds and returns the next valid markup element in the given line. Returns
113*bdd1243dSDimitry Andric // std::nullopt if the line contains no valid elements.
parseElement(StringRef Line)114*bdd1243dSDimitry Andric std::optional<MarkupNode> MarkupParser::parseElement(StringRef Line) {
11581ad6265SDimitry Andric   while (true) {
11681ad6265SDimitry Andric     // Find next element using begin and end markers.
11781ad6265SDimitry Andric     size_t BeginPos = Line.find("{{{");
11881ad6265SDimitry Andric     if (BeginPos == StringRef::npos)
119*bdd1243dSDimitry Andric       return std::nullopt;
12081ad6265SDimitry Andric     size_t EndPos = Line.find("}}}", BeginPos + 3);
12181ad6265SDimitry Andric     if (EndPos == StringRef::npos)
122*bdd1243dSDimitry Andric       return std::nullopt;
12381ad6265SDimitry Andric     EndPos += 3;
12481ad6265SDimitry Andric     MarkupNode Element;
12581ad6265SDimitry Andric     Element.Text = Line.slice(BeginPos, EndPos);
12681ad6265SDimitry Andric     Line = Line.substr(EndPos);
12781ad6265SDimitry Andric 
12881ad6265SDimitry Andric     // Parse tag.
12981ad6265SDimitry Andric     StringRef Content = Element.Text.drop_front(3).drop_back(3);
13081ad6265SDimitry Andric     StringRef FieldsContent;
13181ad6265SDimitry Andric     std::tie(Element.Tag, FieldsContent) = Content.split(':');
13281ad6265SDimitry Andric     if (Element.Tag.empty())
13381ad6265SDimitry Andric       continue;
13481ad6265SDimitry Andric 
13581ad6265SDimitry Andric     // Parse fields.
13681ad6265SDimitry Andric     if (!FieldsContent.empty())
13781ad6265SDimitry Andric       FieldsContent.split(Element.Fields, ":");
13881ad6265SDimitry Andric     else if (Content.back() == ':')
13981ad6265SDimitry Andric       Element.Fields.push_back(FieldsContent);
14081ad6265SDimitry Andric 
14181ad6265SDimitry Andric     return Element;
14281ad6265SDimitry Andric   }
14381ad6265SDimitry Andric }
14481ad6265SDimitry Andric 
textNode(StringRef Text)14581ad6265SDimitry Andric static MarkupNode textNode(StringRef Text) {
14681ad6265SDimitry Andric   MarkupNode Node;
14781ad6265SDimitry Andric   Node.Text = Text;
14881ad6265SDimitry Andric   return Node;
14981ad6265SDimitry Andric }
15081ad6265SDimitry Andric 
15181ad6265SDimitry Andric // Parses a region of text known to be outside any markup elements. Such text
15281ad6265SDimitry Andric // may still contain SGR control codes, so the region is further subdivided into
15381ad6265SDimitry Andric // control codes and true text regions.
parseTextOutsideMarkup(StringRef Text)15481ad6265SDimitry Andric void MarkupParser::parseTextOutsideMarkup(StringRef Text) {
15581ad6265SDimitry Andric   if (Text.empty())
15681ad6265SDimitry Andric     return;
15781ad6265SDimitry Andric   SmallVector<StringRef> Matches;
15881ad6265SDimitry Andric   while (SGRSyntax.match(Text, &Matches)) {
15981ad6265SDimitry Andric     // Emit any text before the SGR element.
16081ad6265SDimitry Andric     if (Matches.begin()->begin() != Text.begin())
16181ad6265SDimitry Andric       Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin())));
16281ad6265SDimitry Andric 
16381ad6265SDimitry Andric     Buffer.push_back(textNode(*Matches.begin()));
16481ad6265SDimitry Andric     advanceTo(Text, Matches.begin()->end());
16581ad6265SDimitry Andric   }
16681ad6265SDimitry Andric   if (!Text.empty())
16781ad6265SDimitry Andric     Buffer.push_back(textNode(Text));
16881ad6265SDimitry Andric }
16981ad6265SDimitry Andric 
17081ad6265SDimitry Andric // Given that a line doesn't contain any valid markup, see if it ends with the
17181ad6265SDimitry Andric // start of a multi-line element. If so, returns the beginning.
parseMultiLineBegin(StringRef Line)172*bdd1243dSDimitry Andric std::optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) {
17381ad6265SDimitry Andric   // A multi-line begin marker must be the last one on the line.
17481ad6265SDimitry Andric   size_t BeginPos = Line.rfind("{{{");
17581ad6265SDimitry Andric   if (BeginPos == StringRef::npos)
176*bdd1243dSDimitry Andric     return std::nullopt;
17781ad6265SDimitry Andric   size_t BeginTagPos = BeginPos + 3;
17881ad6265SDimitry Andric 
17981ad6265SDimitry Andric   // If there are any end markers afterwards, the begin marker cannot belong to
18081ad6265SDimitry Andric   // a multi-line element.
18181ad6265SDimitry Andric   size_t EndPos = Line.find("}}}", BeginTagPos);
18281ad6265SDimitry Andric   if (EndPos != StringRef::npos)
183*bdd1243dSDimitry Andric     return std::nullopt;
18481ad6265SDimitry Andric 
18581ad6265SDimitry Andric   // Check whether the tag is registered multi-line.
18681ad6265SDimitry Andric   size_t EndTagPos = Line.find(':', BeginTagPos);
18781ad6265SDimitry Andric   if (EndTagPos == StringRef::npos)
188*bdd1243dSDimitry Andric     return std::nullopt;
18981ad6265SDimitry Andric   StringRef Tag = Line.slice(BeginTagPos, EndTagPos);
19081ad6265SDimitry Andric   if (!MultilineTags.contains(Tag))
191*bdd1243dSDimitry Andric     return std::nullopt;
19281ad6265SDimitry Andric   return Line.substr(BeginPos);
19381ad6265SDimitry Andric }
19481ad6265SDimitry Andric 
19581ad6265SDimitry Andric // See if the line begins with the ending of an in-progress multi-line element.
19681ad6265SDimitry Andric // If so, return the ending.
parseMultiLineEnd(StringRef Line)197*bdd1243dSDimitry Andric std::optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) {
19881ad6265SDimitry Andric   size_t EndPos = Line.find("}}}");
19981ad6265SDimitry Andric   if (EndPos == StringRef::npos)
200*bdd1243dSDimitry Andric     return std::nullopt;
20181ad6265SDimitry Andric   return Line.take_front(EndPos + 3);
20281ad6265SDimitry Andric }
20381ad6265SDimitry Andric 
20481ad6265SDimitry Andric } // end namespace symbolize
20581ad6265SDimitry Andric } // end namespace llvm
206