12040b6dfSDaniel Thornburgh //===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===// 22040b6dfSDaniel Thornburgh // 32040b6dfSDaniel Thornburgh // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42040b6dfSDaniel Thornburgh // See https://llvm.org/LICENSE.txt for license information. 52040b6dfSDaniel Thornburgh // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 62040b6dfSDaniel Thornburgh // 72040b6dfSDaniel Thornburgh //===----------------------------------------------------------------------===// 82040b6dfSDaniel Thornburgh /// 92040b6dfSDaniel Thornburgh /// \file 102040b6dfSDaniel Thornburgh /// This file defines the log symbolizer markup data model and parser. 112040b6dfSDaniel Thornburgh /// 122040b6dfSDaniel Thornburgh //===----------------------------------------------------------------------===// 132040b6dfSDaniel Thornburgh 142040b6dfSDaniel Thornburgh #include "llvm/DebugInfo/Symbolize/Markup.h" 152040b6dfSDaniel Thornburgh 168bd078b5SDaniel Thornburgh #include "llvm/ADT/STLExtras.h" 172040b6dfSDaniel Thornburgh 182040b6dfSDaniel Thornburgh namespace llvm { 192040b6dfSDaniel Thornburgh namespace symbolize { 202040b6dfSDaniel Thornburgh 212040b6dfSDaniel Thornburgh // Matches the following: 222040b6dfSDaniel Thornburgh // "\033[0m" 232040b6dfSDaniel Thornburgh // "\033[1m" 242040b6dfSDaniel Thornburgh // "\033[30m" -- "\033[37m" 252040b6dfSDaniel Thornburgh static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m"; 262040b6dfSDaniel Thornburgh 278bd078b5SDaniel Thornburgh MarkupParser::MarkupParser(StringSet<> MultilineTags) 288bd078b5SDaniel Thornburgh : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {} 292040b6dfSDaniel Thornburgh 302040b6dfSDaniel Thornburgh static StringRef takeTo(StringRef Str, StringRef::iterator Pos) { 312040b6dfSDaniel Thornburgh return Str.take_front(Pos - Str.begin()); 322040b6dfSDaniel Thornburgh } 332040b6dfSDaniel Thornburgh static void advanceTo(StringRef &Str, StringRef::iterator Pos) { 342040b6dfSDaniel Thornburgh Str = Str.drop_front(Pos - Str.begin()); 352040b6dfSDaniel Thornburgh } 362040b6dfSDaniel Thornburgh 372040b6dfSDaniel Thornburgh void MarkupParser::parseLine(StringRef Line) { 382040b6dfSDaniel Thornburgh Buffer.clear(); 398bd078b5SDaniel Thornburgh NextIdx = 0; 408bd078b5SDaniel Thornburgh FinishedMultiline.clear(); 418bd078b5SDaniel Thornburgh this->Line = Line; 428bd078b5SDaniel Thornburgh } 438bd078b5SDaniel Thornburgh 4489fab98eSFangrui Song std::optional<MarkupNode> MarkupParser::nextNode() { 458bd078b5SDaniel Thornburgh // Pull something out of the buffer if possible. 468bd078b5SDaniel Thornburgh if (!Buffer.empty()) { 478bd078b5SDaniel Thornburgh if (NextIdx < Buffer.size()) 488bd078b5SDaniel Thornburgh return std::move(Buffer[NextIdx++]); 498bd078b5SDaniel Thornburgh NextIdx = 0; 508bd078b5SDaniel Thornburgh Buffer.clear(); 518bd078b5SDaniel Thornburgh } 528bd078b5SDaniel Thornburgh 538bd078b5SDaniel Thornburgh // The buffer is empty, so parse the next bit of the line. 548bd078b5SDaniel Thornburgh 558bd078b5SDaniel Thornburgh if (Line.empty()) 5611011599SKazu Hirata return std::nullopt; 578bd078b5SDaniel Thornburgh 588bd078b5SDaniel Thornburgh if (!InProgressMultiline.empty()) { 5989fab98eSFangrui Song if (std::optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) { 608bd078b5SDaniel Thornburgh llvm::append_range(InProgressMultiline, *MultilineEnd); 618bd078b5SDaniel Thornburgh assert(FinishedMultiline.empty() && 628bd078b5SDaniel Thornburgh "At most one multi-line element can be finished at a time."); 638bd078b5SDaniel Thornburgh FinishedMultiline.swap(InProgressMultiline); 648bd078b5SDaniel Thornburgh // Parse the multi-line element as if it were contiguous. 658bd078b5SDaniel Thornburgh advanceTo(Line, MultilineEnd->end()); 668bd078b5SDaniel Thornburgh return *parseElement(FinishedMultiline); 678bd078b5SDaniel Thornburgh } 688bd078b5SDaniel Thornburgh 698bd078b5SDaniel Thornburgh // The whole line is part of the multi-line element. 708bd078b5SDaniel Thornburgh llvm::append_range(InProgressMultiline, Line); 718bd078b5SDaniel Thornburgh Line = Line.drop_front(Line.size()); 7211011599SKazu Hirata return std::nullopt; 738bd078b5SDaniel Thornburgh } 748bd078b5SDaniel Thornburgh 752040b6dfSDaniel Thornburgh // Find the first valid markup element, if any. 7689fab98eSFangrui Song if (std::optional<MarkupNode> Element = parseElement(Line)) { 772040b6dfSDaniel Thornburgh parseTextOutsideMarkup(takeTo(Line, Element->Text.begin())); 782040b6dfSDaniel Thornburgh Buffer.push_back(std::move(*Element)); 792040b6dfSDaniel Thornburgh advanceTo(Line, Element->Text.end()); 808bd078b5SDaniel Thornburgh return nextNode(); 818bd078b5SDaniel Thornburgh } 828bd078b5SDaniel Thornburgh 838bd078b5SDaniel Thornburgh // Since there were no valid elements remaining, see if the line opens a 848bd078b5SDaniel Thornburgh // multi-line element. 8589fab98eSFangrui Song if (std::optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) { 868bd078b5SDaniel Thornburgh // Emit any text before the element. 878bd078b5SDaniel Thornburgh parseTextOutsideMarkup(takeTo(Line, MultilineBegin->begin())); 888bd078b5SDaniel Thornburgh 898bd078b5SDaniel Thornburgh // Begin recording the multi-line element. 908bd078b5SDaniel Thornburgh llvm::append_range(InProgressMultiline, *MultilineBegin); 918bd078b5SDaniel Thornburgh Line = Line.drop_front(Line.size()); 928bd078b5SDaniel Thornburgh return nextNode(); 938bd078b5SDaniel Thornburgh } 948bd078b5SDaniel Thornburgh 952040b6dfSDaniel Thornburgh // The line doesn't contain any more markup elements, so emit it as text. 962040b6dfSDaniel Thornburgh parseTextOutsideMarkup(Line); 978bd078b5SDaniel Thornburgh Line = Line.drop_front(Line.size()); 988bd078b5SDaniel Thornburgh return nextNode(); 998bd078b5SDaniel Thornburgh } 1008bd078b5SDaniel Thornburgh 1018bd078b5SDaniel Thornburgh void MarkupParser::flush() { 10217e4c217SDaniel Thornburgh Buffer.clear(); 10317e4c217SDaniel Thornburgh NextIdx = 0; 10417e4c217SDaniel Thornburgh Line = {}; 1058bd078b5SDaniel Thornburgh if (InProgressMultiline.empty()) 1062040b6dfSDaniel Thornburgh return; 1078bd078b5SDaniel Thornburgh FinishedMultiline.swap(InProgressMultiline); 1088bd078b5SDaniel Thornburgh parseTextOutsideMarkup(FinishedMultiline); 1092040b6dfSDaniel Thornburgh } 1102040b6dfSDaniel Thornburgh 1112040b6dfSDaniel Thornburgh // Finds and returns the next valid markup element in the given line. Returns 112*595f1a6aSKazu Hirata // std::nullopt if the line contains no valid elements. 11389fab98eSFangrui Song std::optional<MarkupNode> MarkupParser::parseElement(StringRef Line) { 1142040b6dfSDaniel Thornburgh while (true) { 1152040b6dfSDaniel Thornburgh // Find next element using begin and end markers. 1162040b6dfSDaniel Thornburgh size_t BeginPos = Line.find("{{{"); 1172040b6dfSDaniel Thornburgh if (BeginPos == StringRef::npos) 11811011599SKazu Hirata return std::nullopt; 1192040b6dfSDaniel Thornburgh size_t EndPos = Line.find("}}}", BeginPos + 3); 1202040b6dfSDaniel Thornburgh if (EndPos == StringRef::npos) 12111011599SKazu Hirata return std::nullopt; 1222040b6dfSDaniel Thornburgh EndPos += 3; 1232040b6dfSDaniel Thornburgh MarkupNode Element; 1242040b6dfSDaniel Thornburgh Element.Text = Line.slice(BeginPos, EndPos); 1252040b6dfSDaniel Thornburgh Line = Line.substr(EndPos); 1262040b6dfSDaniel Thornburgh 1272040b6dfSDaniel Thornburgh // Parse tag. 1282040b6dfSDaniel Thornburgh StringRef Content = Element.Text.drop_front(3).drop_back(3); 1292040b6dfSDaniel Thornburgh StringRef FieldsContent; 1302040b6dfSDaniel Thornburgh std::tie(Element.Tag, FieldsContent) = Content.split(':'); 1312040b6dfSDaniel Thornburgh if (Element.Tag.empty()) 1322040b6dfSDaniel Thornburgh continue; 1332040b6dfSDaniel Thornburgh 1342040b6dfSDaniel Thornburgh // Parse fields. 1352040b6dfSDaniel Thornburgh if (!FieldsContent.empty()) 1362040b6dfSDaniel Thornburgh FieldsContent.split(Element.Fields, ":"); 1372040b6dfSDaniel Thornburgh else if (Content.back() == ':') 1382040b6dfSDaniel Thornburgh Element.Fields.push_back(FieldsContent); 1392040b6dfSDaniel Thornburgh 1402040b6dfSDaniel Thornburgh return Element; 1412040b6dfSDaniel Thornburgh } 1422040b6dfSDaniel Thornburgh } 1432040b6dfSDaniel Thornburgh 1442040b6dfSDaniel Thornburgh static MarkupNode textNode(StringRef Text) { 1452040b6dfSDaniel Thornburgh MarkupNode Node; 1462040b6dfSDaniel Thornburgh Node.Text = Text; 1472040b6dfSDaniel Thornburgh return Node; 1482040b6dfSDaniel Thornburgh } 1492040b6dfSDaniel Thornburgh 1502040b6dfSDaniel Thornburgh // Parses a region of text known to be outside any markup elements. Such text 1512040b6dfSDaniel Thornburgh // may still contain SGR control codes, so the region is further subdivided into 1522040b6dfSDaniel Thornburgh // control codes and true text regions. 1532040b6dfSDaniel Thornburgh void MarkupParser::parseTextOutsideMarkup(StringRef Text) { 1542040b6dfSDaniel Thornburgh if (Text.empty()) 1552040b6dfSDaniel Thornburgh return; 1562040b6dfSDaniel Thornburgh SmallVector<StringRef> Matches; 1572040b6dfSDaniel Thornburgh while (SGRSyntax.match(Text, &Matches)) { 1582040b6dfSDaniel Thornburgh // Emit any text before the SGR element. 1592040b6dfSDaniel Thornburgh if (Matches.begin()->begin() != Text.begin()) 1602040b6dfSDaniel Thornburgh Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin()))); 1612040b6dfSDaniel Thornburgh 1622040b6dfSDaniel Thornburgh Buffer.push_back(textNode(*Matches.begin())); 1632040b6dfSDaniel Thornburgh advanceTo(Text, Matches.begin()->end()); 1642040b6dfSDaniel Thornburgh } 1652040b6dfSDaniel Thornburgh if (!Text.empty()) 1662040b6dfSDaniel Thornburgh Buffer.push_back(textNode(Text)); 1672040b6dfSDaniel Thornburgh } 1682040b6dfSDaniel Thornburgh 1698bd078b5SDaniel Thornburgh // Given that a line doesn't contain any valid markup, see if it ends with the 1708bd078b5SDaniel Thornburgh // start of a multi-line element. If so, returns the beginning. 17189fab98eSFangrui Song std::optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) { 1728bd078b5SDaniel Thornburgh // A multi-line begin marker must be the last one on the line. 1738bd078b5SDaniel Thornburgh size_t BeginPos = Line.rfind("{{{"); 1748bd078b5SDaniel Thornburgh if (BeginPos == StringRef::npos) 17511011599SKazu Hirata return std::nullopt; 1768bd078b5SDaniel Thornburgh size_t BeginTagPos = BeginPos + 3; 1778bd078b5SDaniel Thornburgh 1788bd078b5SDaniel Thornburgh // If there are any end markers afterwards, the begin marker cannot belong to 1798bd078b5SDaniel Thornburgh // a multi-line element. 1808bd078b5SDaniel Thornburgh size_t EndPos = Line.find("}}}", BeginTagPos); 1818bd078b5SDaniel Thornburgh if (EndPos != StringRef::npos) 18211011599SKazu Hirata return std::nullopt; 1838bd078b5SDaniel Thornburgh 1848bd078b5SDaniel Thornburgh // Check whether the tag is registered multi-line. 1858bd078b5SDaniel Thornburgh size_t EndTagPos = Line.find(':', BeginTagPos); 1868bd078b5SDaniel Thornburgh if (EndTagPos == StringRef::npos) 18711011599SKazu Hirata return std::nullopt; 1888bd078b5SDaniel Thornburgh StringRef Tag = Line.slice(BeginTagPos, EndTagPos); 1898bd078b5SDaniel Thornburgh if (!MultilineTags.contains(Tag)) 19011011599SKazu Hirata return std::nullopt; 1918bd078b5SDaniel Thornburgh return Line.substr(BeginPos); 1928bd078b5SDaniel Thornburgh } 1938bd078b5SDaniel Thornburgh 1948bd078b5SDaniel Thornburgh // See if the line begins with the ending of an in-progress multi-line element. 1958bd078b5SDaniel Thornburgh // If so, return the ending. 19689fab98eSFangrui Song std::optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) { 1978bd078b5SDaniel Thornburgh size_t EndPos = Line.find("}}}"); 1988bd078b5SDaniel Thornburgh if (EndPos == StringRef::npos) 19911011599SKazu Hirata return std::nullopt; 2008bd078b5SDaniel Thornburgh return Line.take_front(EndPos + 3); 2018bd078b5SDaniel Thornburgh } 2028bd078b5SDaniel Thornburgh 2032040b6dfSDaniel Thornburgh } // end namespace symbolize 2042040b6dfSDaniel Thornburgh } // end namespace llvm 205