xref: /llvm-project/llvm/lib/DebugInfo/Symbolize/Markup.cpp (revision 0060c54e0da6d1429875da2d30895faa7562b706)
12040b6dfSDaniel Thornburgh //===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===//
22040b6dfSDaniel Thornburgh //
32040b6dfSDaniel Thornburgh // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42040b6dfSDaniel Thornburgh // See https://llvm.org/LICENSE.txt for license information.
52040b6dfSDaniel Thornburgh // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
62040b6dfSDaniel Thornburgh //
72040b6dfSDaniel Thornburgh //===----------------------------------------------------------------------===//
82040b6dfSDaniel Thornburgh ///
92040b6dfSDaniel Thornburgh /// \file
102040b6dfSDaniel Thornburgh /// This file defines the log symbolizer markup data model and parser.
112040b6dfSDaniel Thornburgh ///
122040b6dfSDaniel Thornburgh //===----------------------------------------------------------------------===//
132040b6dfSDaniel Thornburgh 
142040b6dfSDaniel Thornburgh #include "llvm/DebugInfo/Symbolize/Markup.h"
152040b6dfSDaniel Thornburgh 
168bd078b5SDaniel Thornburgh #include "llvm/ADT/STLExtras.h"
172040b6dfSDaniel Thornburgh 
182040b6dfSDaniel Thornburgh namespace llvm {
192040b6dfSDaniel Thornburgh namespace symbolize {
202040b6dfSDaniel Thornburgh 
212040b6dfSDaniel Thornburgh // Matches the following:
222040b6dfSDaniel Thornburgh //   "\033[0m"
232040b6dfSDaniel Thornburgh //   "\033[1m"
242040b6dfSDaniel Thornburgh //   "\033[30m" -- "\033[37m"
252040b6dfSDaniel Thornburgh static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m";
262040b6dfSDaniel Thornburgh 
278bd078b5SDaniel Thornburgh MarkupParser::MarkupParser(StringSet<> MultilineTags)
288bd078b5SDaniel Thornburgh     : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {}
292040b6dfSDaniel Thornburgh 
302040b6dfSDaniel Thornburgh static StringRef takeTo(StringRef Str, StringRef::iterator Pos) {
312040b6dfSDaniel Thornburgh   return Str.take_front(Pos - Str.begin());
322040b6dfSDaniel Thornburgh }
332040b6dfSDaniel Thornburgh static void advanceTo(StringRef &Str, StringRef::iterator Pos) {
342040b6dfSDaniel Thornburgh   Str = Str.drop_front(Pos - Str.begin());
352040b6dfSDaniel Thornburgh }
362040b6dfSDaniel Thornburgh 
372040b6dfSDaniel Thornburgh void MarkupParser::parseLine(StringRef Line) {
382040b6dfSDaniel Thornburgh   Buffer.clear();
398bd078b5SDaniel Thornburgh   NextIdx = 0;
408bd078b5SDaniel Thornburgh   FinishedMultiline.clear();
418bd078b5SDaniel Thornburgh   this->Line = Line;
428bd078b5SDaniel Thornburgh }
438bd078b5SDaniel Thornburgh 
4489fab98eSFangrui Song std::optional<MarkupNode> MarkupParser::nextNode() {
458bd078b5SDaniel Thornburgh   // Pull something out of the buffer if possible.
468bd078b5SDaniel Thornburgh   if (!Buffer.empty()) {
478bd078b5SDaniel Thornburgh     if (NextIdx < Buffer.size())
488bd078b5SDaniel Thornburgh       return std::move(Buffer[NextIdx++]);
498bd078b5SDaniel Thornburgh     NextIdx = 0;
508bd078b5SDaniel Thornburgh     Buffer.clear();
518bd078b5SDaniel Thornburgh   }
528bd078b5SDaniel Thornburgh 
538bd078b5SDaniel Thornburgh   // The buffer is empty, so parse the next bit of the line.
548bd078b5SDaniel Thornburgh 
558bd078b5SDaniel Thornburgh   if (Line.empty())
5611011599SKazu Hirata     return std::nullopt;
578bd078b5SDaniel Thornburgh 
588bd078b5SDaniel Thornburgh   if (!InProgressMultiline.empty()) {
5989fab98eSFangrui Song     if (std::optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) {
608bd078b5SDaniel Thornburgh       llvm::append_range(InProgressMultiline, *MultilineEnd);
618bd078b5SDaniel Thornburgh       assert(FinishedMultiline.empty() &&
628bd078b5SDaniel Thornburgh              "At most one multi-line element can be finished at a time.");
638bd078b5SDaniel Thornburgh       FinishedMultiline.swap(InProgressMultiline);
648bd078b5SDaniel Thornburgh       // Parse the multi-line element as if it were contiguous.
658bd078b5SDaniel Thornburgh       advanceTo(Line, MultilineEnd->end());
668bd078b5SDaniel Thornburgh       return *parseElement(FinishedMultiline);
678bd078b5SDaniel Thornburgh     }
688bd078b5SDaniel Thornburgh 
698bd078b5SDaniel Thornburgh     // The whole line is part of the multi-line element.
708bd078b5SDaniel Thornburgh     llvm::append_range(InProgressMultiline, Line);
718bd078b5SDaniel Thornburgh     Line = Line.drop_front(Line.size());
7211011599SKazu Hirata     return std::nullopt;
738bd078b5SDaniel Thornburgh   }
748bd078b5SDaniel Thornburgh 
752040b6dfSDaniel Thornburgh   // Find the first valid markup element, if any.
7689fab98eSFangrui Song   if (std::optional<MarkupNode> Element = parseElement(Line)) {
772040b6dfSDaniel Thornburgh     parseTextOutsideMarkup(takeTo(Line, Element->Text.begin()));
782040b6dfSDaniel Thornburgh     Buffer.push_back(std::move(*Element));
792040b6dfSDaniel Thornburgh     advanceTo(Line, Element->Text.end());
808bd078b5SDaniel Thornburgh     return nextNode();
818bd078b5SDaniel Thornburgh   }
828bd078b5SDaniel Thornburgh 
838bd078b5SDaniel Thornburgh   // Since there were no valid elements remaining, see if the line opens a
848bd078b5SDaniel Thornburgh   // multi-line element.
8589fab98eSFangrui Song   if (std::optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) {
868bd078b5SDaniel Thornburgh     // Emit any text before the element.
878bd078b5SDaniel Thornburgh     parseTextOutsideMarkup(takeTo(Line, MultilineBegin->begin()));
888bd078b5SDaniel Thornburgh 
898bd078b5SDaniel Thornburgh     // Begin recording the multi-line element.
908bd078b5SDaniel Thornburgh     llvm::append_range(InProgressMultiline, *MultilineBegin);
918bd078b5SDaniel Thornburgh     Line = Line.drop_front(Line.size());
928bd078b5SDaniel Thornburgh     return nextNode();
938bd078b5SDaniel Thornburgh   }
948bd078b5SDaniel Thornburgh 
952040b6dfSDaniel Thornburgh   // The line doesn't contain any more markup elements, so emit it as text.
962040b6dfSDaniel Thornburgh   parseTextOutsideMarkup(Line);
978bd078b5SDaniel Thornburgh   Line = Line.drop_front(Line.size());
988bd078b5SDaniel Thornburgh   return nextNode();
998bd078b5SDaniel Thornburgh }
1008bd078b5SDaniel Thornburgh 
1018bd078b5SDaniel Thornburgh void MarkupParser::flush() {
10217e4c217SDaniel Thornburgh   Buffer.clear();
10317e4c217SDaniel Thornburgh   NextIdx = 0;
10417e4c217SDaniel Thornburgh   Line = {};
1058bd078b5SDaniel Thornburgh   if (InProgressMultiline.empty())
1062040b6dfSDaniel Thornburgh     return;
1078bd078b5SDaniel Thornburgh   FinishedMultiline.swap(InProgressMultiline);
1088bd078b5SDaniel Thornburgh   parseTextOutsideMarkup(FinishedMultiline);
1092040b6dfSDaniel Thornburgh }
1102040b6dfSDaniel Thornburgh 
1112040b6dfSDaniel Thornburgh // Finds and returns the next valid markup element in the given line. Returns
112*595f1a6aSKazu Hirata // std::nullopt if the line contains no valid elements.
11389fab98eSFangrui Song std::optional<MarkupNode> MarkupParser::parseElement(StringRef Line) {
1142040b6dfSDaniel Thornburgh   while (true) {
1152040b6dfSDaniel Thornburgh     // Find next element using begin and end markers.
1162040b6dfSDaniel Thornburgh     size_t BeginPos = Line.find("{{{");
1172040b6dfSDaniel Thornburgh     if (BeginPos == StringRef::npos)
11811011599SKazu Hirata       return std::nullopt;
1192040b6dfSDaniel Thornburgh     size_t EndPos = Line.find("}}}", BeginPos + 3);
1202040b6dfSDaniel Thornburgh     if (EndPos == StringRef::npos)
12111011599SKazu Hirata       return std::nullopt;
1222040b6dfSDaniel Thornburgh     EndPos += 3;
1232040b6dfSDaniel Thornburgh     MarkupNode Element;
1242040b6dfSDaniel Thornburgh     Element.Text = Line.slice(BeginPos, EndPos);
1252040b6dfSDaniel Thornburgh     Line = Line.substr(EndPos);
1262040b6dfSDaniel Thornburgh 
1272040b6dfSDaniel Thornburgh     // Parse tag.
1282040b6dfSDaniel Thornburgh     StringRef Content = Element.Text.drop_front(3).drop_back(3);
1292040b6dfSDaniel Thornburgh     StringRef FieldsContent;
1302040b6dfSDaniel Thornburgh     std::tie(Element.Tag, FieldsContent) = Content.split(':');
1312040b6dfSDaniel Thornburgh     if (Element.Tag.empty())
1322040b6dfSDaniel Thornburgh       continue;
1332040b6dfSDaniel Thornburgh 
1342040b6dfSDaniel Thornburgh     // Parse fields.
1352040b6dfSDaniel Thornburgh     if (!FieldsContent.empty())
1362040b6dfSDaniel Thornburgh       FieldsContent.split(Element.Fields, ":");
1372040b6dfSDaniel Thornburgh     else if (Content.back() == ':')
1382040b6dfSDaniel Thornburgh       Element.Fields.push_back(FieldsContent);
1392040b6dfSDaniel Thornburgh 
1402040b6dfSDaniel Thornburgh     return Element;
1412040b6dfSDaniel Thornburgh   }
1422040b6dfSDaniel Thornburgh }
1432040b6dfSDaniel Thornburgh 
1442040b6dfSDaniel Thornburgh static MarkupNode textNode(StringRef Text) {
1452040b6dfSDaniel Thornburgh   MarkupNode Node;
1462040b6dfSDaniel Thornburgh   Node.Text = Text;
1472040b6dfSDaniel Thornburgh   return Node;
1482040b6dfSDaniel Thornburgh }
1492040b6dfSDaniel Thornburgh 
1502040b6dfSDaniel Thornburgh // Parses a region of text known to be outside any markup elements. Such text
1512040b6dfSDaniel Thornburgh // may still contain SGR control codes, so the region is further subdivided into
1522040b6dfSDaniel Thornburgh // control codes and true text regions.
1532040b6dfSDaniel Thornburgh void MarkupParser::parseTextOutsideMarkup(StringRef Text) {
1542040b6dfSDaniel Thornburgh   if (Text.empty())
1552040b6dfSDaniel Thornburgh     return;
1562040b6dfSDaniel Thornburgh   SmallVector<StringRef> Matches;
1572040b6dfSDaniel Thornburgh   while (SGRSyntax.match(Text, &Matches)) {
1582040b6dfSDaniel Thornburgh     // Emit any text before the SGR element.
1592040b6dfSDaniel Thornburgh     if (Matches.begin()->begin() != Text.begin())
1602040b6dfSDaniel Thornburgh       Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin())));
1612040b6dfSDaniel Thornburgh 
1622040b6dfSDaniel Thornburgh     Buffer.push_back(textNode(*Matches.begin()));
1632040b6dfSDaniel Thornburgh     advanceTo(Text, Matches.begin()->end());
1642040b6dfSDaniel Thornburgh   }
1652040b6dfSDaniel Thornburgh   if (!Text.empty())
1662040b6dfSDaniel Thornburgh     Buffer.push_back(textNode(Text));
1672040b6dfSDaniel Thornburgh }
1682040b6dfSDaniel Thornburgh 
1698bd078b5SDaniel Thornburgh // Given that a line doesn't contain any valid markup, see if it ends with the
1708bd078b5SDaniel Thornburgh // start of a multi-line element. If so, returns the beginning.
17189fab98eSFangrui Song std::optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) {
1728bd078b5SDaniel Thornburgh   // A multi-line begin marker must be the last one on the line.
1738bd078b5SDaniel Thornburgh   size_t BeginPos = Line.rfind("{{{");
1748bd078b5SDaniel Thornburgh   if (BeginPos == StringRef::npos)
17511011599SKazu Hirata     return std::nullopt;
1768bd078b5SDaniel Thornburgh   size_t BeginTagPos = BeginPos + 3;
1778bd078b5SDaniel Thornburgh 
1788bd078b5SDaniel Thornburgh   // If there are any end markers afterwards, the begin marker cannot belong to
1798bd078b5SDaniel Thornburgh   // a multi-line element.
1808bd078b5SDaniel Thornburgh   size_t EndPos = Line.find("}}}", BeginTagPos);
1818bd078b5SDaniel Thornburgh   if (EndPos != StringRef::npos)
18211011599SKazu Hirata     return std::nullopt;
1838bd078b5SDaniel Thornburgh 
1848bd078b5SDaniel Thornburgh   // Check whether the tag is registered multi-line.
1858bd078b5SDaniel Thornburgh   size_t EndTagPos = Line.find(':', BeginTagPos);
1868bd078b5SDaniel Thornburgh   if (EndTagPos == StringRef::npos)
18711011599SKazu Hirata     return std::nullopt;
1888bd078b5SDaniel Thornburgh   StringRef Tag = Line.slice(BeginTagPos, EndTagPos);
1898bd078b5SDaniel Thornburgh   if (!MultilineTags.contains(Tag))
19011011599SKazu Hirata     return std::nullopt;
1918bd078b5SDaniel Thornburgh   return Line.substr(BeginPos);
1928bd078b5SDaniel Thornburgh }
1938bd078b5SDaniel Thornburgh 
1948bd078b5SDaniel Thornburgh // See if the line begins with the ending of an in-progress multi-line element.
1958bd078b5SDaniel Thornburgh // If so, return the ending.
19689fab98eSFangrui Song std::optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) {
1978bd078b5SDaniel Thornburgh   size_t EndPos = Line.find("}}}");
1988bd078b5SDaniel Thornburgh   if (EndPos == StringRef::npos)
19911011599SKazu Hirata     return std::nullopt;
2008bd078b5SDaniel Thornburgh   return Line.take_front(EndPos + 3);
2018bd078b5SDaniel Thornburgh }
2028bd078b5SDaniel Thornburgh 
2032040b6dfSDaniel Thornburgh } // end namespace symbolize
2042040b6dfSDaniel Thornburgh } // end namespace llvm
205