1*81ad6265SDimitry Andric //===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===// 2*81ad6265SDimitry Andric // 3*81ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*81ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*81ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*81ad6265SDimitry Andric // 7*81ad6265SDimitry Andric //===----------------------------------------------------------------------===// 8*81ad6265SDimitry Andric /// 9*81ad6265SDimitry Andric /// \file 10*81ad6265SDimitry Andric /// This file defines the log symbolizer markup data model and parser. 11*81ad6265SDimitry Andric /// 12*81ad6265SDimitry Andric //===----------------------------------------------------------------------===// 13*81ad6265SDimitry Andric 14*81ad6265SDimitry Andric #include "llvm/DebugInfo/Symbolize/Markup.h" 15*81ad6265SDimitry Andric 16*81ad6265SDimitry Andric #include "llvm/ADT/STLExtras.h" 17*81ad6265SDimitry Andric #include "llvm/ADT/StringExtras.h" 18*81ad6265SDimitry Andric 19*81ad6265SDimitry Andric namespace llvm { 20*81ad6265SDimitry Andric namespace symbolize { 21*81ad6265SDimitry Andric 22*81ad6265SDimitry Andric // Matches the following: 23*81ad6265SDimitry Andric // "\033[0m" 24*81ad6265SDimitry Andric // "\033[1m" 25*81ad6265SDimitry Andric // "\033[30m" -- "\033[37m" 26*81ad6265SDimitry Andric static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m"; 27*81ad6265SDimitry Andric 28*81ad6265SDimitry Andric MarkupParser::MarkupParser(StringSet<> MultilineTags) 29*81ad6265SDimitry Andric : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {} 30*81ad6265SDimitry Andric 31*81ad6265SDimitry Andric static StringRef takeTo(StringRef Str, StringRef::iterator Pos) { 32*81ad6265SDimitry Andric return Str.take_front(Pos - Str.begin()); 33*81ad6265SDimitry Andric } 34*81ad6265SDimitry Andric static void advanceTo(StringRef &Str, StringRef::iterator Pos) { 35*81ad6265SDimitry Andric Str = Str.drop_front(Pos - Str.begin()); 36*81ad6265SDimitry Andric } 37*81ad6265SDimitry Andric 38*81ad6265SDimitry Andric void MarkupParser::parseLine(StringRef Line) { 39*81ad6265SDimitry Andric Buffer.clear(); 40*81ad6265SDimitry Andric NextIdx = 0; 41*81ad6265SDimitry Andric FinishedMultiline.clear(); 42*81ad6265SDimitry Andric this->Line = Line; 43*81ad6265SDimitry Andric } 44*81ad6265SDimitry Andric 45*81ad6265SDimitry Andric Optional<MarkupNode> MarkupParser::nextNode() { 46*81ad6265SDimitry Andric // Pull something out of the buffer if possible. 47*81ad6265SDimitry Andric if (!Buffer.empty()) { 48*81ad6265SDimitry Andric if (NextIdx < Buffer.size()) 49*81ad6265SDimitry Andric return std::move(Buffer[NextIdx++]); 50*81ad6265SDimitry Andric NextIdx = 0; 51*81ad6265SDimitry Andric Buffer.clear(); 52*81ad6265SDimitry Andric } 53*81ad6265SDimitry Andric 54*81ad6265SDimitry Andric // The buffer is empty, so parse the next bit of the line. 55*81ad6265SDimitry Andric 56*81ad6265SDimitry Andric if (Line.empty()) 57*81ad6265SDimitry Andric return None; 58*81ad6265SDimitry Andric 59*81ad6265SDimitry Andric if (!InProgressMultiline.empty()) { 60*81ad6265SDimitry Andric if (Optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) { 61*81ad6265SDimitry Andric llvm::append_range(InProgressMultiline, *MultilineEnd); 62*81ad6265SDimitry Andric assert(FinishedMultiline.empty() && 63*81ad6265SDimitry Andric "At most one multi-line element can be finished at a time."); 64*81ad6265SDimitry Andric FinishedMultiline.swap(InProgressMultiline); 65*81ad6265SDimitry Andric // Parse the multi-line element as if it were contiguous. 66*81ad6265SDimitry Andric advanceTo(Line, MultilineEnd->end()); 67*81ad6265SDimitry Andric return *parseElement(FinishedMultiline); 68*81ad6265SDimitry Andric } 69*81ad6265SDimitry Andric 70*81ad6265SDimitry Andric // The whole line is part of the multi-line element. 71*81ad6265SDimitry Andric llvm::append_range(InProgressMultiline, Line); 72*81ad6265SDimitry Andric Line = Line.drop_front(Line.size()); 73*81ad6265SDimitry Andric return None; 74*81ad6265SDimitry Andric } 75*81ad6265SDimitry Andric 76*81ad6265SDimitry Andric // Find the first valid markup element, if any. 77*81ad6265SDimitry Andric if (Optional<MarkupNode> Element = parseElement(Line)) { 78*81ad6265SDimitry Andric parseTextOutsideMarkup(takeTo(Line, Element->Text.begin())); 79*81ad6265SDimitry Andric Buffer.push_back(std::move(*Element)); 80*81ad6265SDimitry Andric advanceTo(Line, Element->Text.end()); 81*81ad6265SDimitry Andric return nextNode(); 82*81ad6265SDimitry Andric } 83*81ad6265SDimitry Andric 84*81ad6265SDimitry Andric // Since there were no valid elements remaining, see if the line opens a 85*81ad6265SDimitry Andric // multi-line element. 86*81ad6265SDimitry Andric if (Optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) { 87*81ad6265SDimitry Andric // Emit any text before the element. 88*81ad6265SDimitry Andric parseTextOutsideMarkup(takeTo(Line, MultilineBegin->begin())); 89*81ad6265SDimitry Andric 90*81ad6265SDimitry Andric // Begin recording the multi-line element. 91*81ad6265SDimitry Andric llvm::append_range(InProgressMultiline, *MultilineBegin); 92*81ad6265SDimitry Andric Line = Line.drop_front(Line.size()); 93*81ad6265SDimitry Andric return nextNode(); 94*81ad6265SDimitry Andric } 95*81ad6265SDimitry Andric 96*81ad6265SDimitry Andric // The line doesn't contain any more markup elements, so emit it as text. 97*81ad6265SDimitry Andric parseTextOutsideMarkup(Line); 98*81ad6265SDimitry Andric Line = Line.drop_front(Line.size()); 99*81ad6265SDimitry Andric return nextNode(); 100*81ad6265SDimitry Andric } 101*81ad6265SDimitry Andric 102*81ad6265SDimitry Andric void MarkupParser::flush() { 103*81ad6265SDimitry Andric if (InProgressMultiline.empty()) 104*81ad6265SDimitry Andric return; 105*81ad6265SDimitry Andric FinishedMultiline.swap(InProgressMultiline); 106*81ad6265SDimitry Andric parseTextOutsideMarkup(FinishedMultiline); 107*81ad6265SDimitry Andric } 108*81ad6265SDimitry Andric 109*81ad6265SDimitry Andric // Finds and returns the next valid markup element in the given line. Returns 110*81ad6265SDimitry Andric // None if the line contains no valid elements. 111*81ad6265SDimitry Andric Optional<MarkupNode> MarkupParser::parseElement(StringRef Line) { 112*81ad6265SDimitry Andric while (true) { 113*81ad6265SDimitry Andric // Find next element using begin and end markers. 114*81ad6265SDimitry Andric size_t BeginPos = Line.find("{{{"); 115*81ad6265SDimitry Andric if (BeginPos == StringRef::npos) 116*81ad6265SDimitry Andric return None; 117*81ad6265SDimitry Andric size_t EndPos = Line.find("}}}", BeginPos + 3); 118*81ad6265SDimitry Andric if (EndPos == StringRef::npos) 119*81ad6265SDimitry Andric return None; 120*81ad6265SDimitry Andric EndPos += 3; 121*81ad6265SDimitry Andric MarkupNode Element; 122*81ad6265SDimitry Andric Element.Text = Line.slice(BeginPos, EndPos); 123*81ad6265SDimitry Andric Line = Line.substr(EndPos); 124*81ad6265SDimitry Andric 125*81ad6265SDimitry Andric // Parse tag. 126*81ad6265SDimitry Andric StringRef Content = Element.Text.drop_front(3).drop_back(3); 127*81ad6265SDimitry Andric StringRef FieldsContent; 128*81ad6265SDimitry Andric std::tie(Element.Tag, FieldsContent) = Content.split(':'); 129*81ad6265SDimitry Andric if (Element.Tag.empty()) 130*81ad6265SDimitry Andric continue; 131*81ad6265SDimitry Andric 132*81ad6265SDimitry Andric // Parse fields. 133*81ad6265SDimitry Andric if (!FieldsContent.empty()) 134*81ad6265SDimitry Andric FieldsContent.split(Element.Fields, ":"); 135*81ad6265SDimitry Andric else if (Content.back() == ':') 136*81ad6265SDimitry Andric Element.Fields.push_back(FieldsContent); 137*81ad6265SDimitry Andric 138*81ad6265SDimitry Andric return Element; 139*81ad6265SDimitry Andric } 140*81ad6265SDimitry Andric } 141*81ad6265SDimitry Andric 142*81ad6265SDimitry Andric static MarkupNode textNode(StringRef Text) { 143*81ad6265SDimitry Andric MarkupNode Node; 144*81ad6265SDimitry Andric Node.Text = Text; 145*81ad6265SDimitry Andric return Node; 146*81ad6265SDimitry Andric } 147*81ad6265SDimitry Andric 148*81ad6265SDimitry Andric // Parses a region of text known to be outside any markup elements. Such text 149*81ad6265SDimitry Andric // may still contain SGR control codes, so the region is further subdivided into 150*81ad6265SDimitry Andric // control codes and true text regions. 151*81ad6265SDimitry Andric void MarkupParser::parseTextOutsideMarkup(StringRef Text) { 152*81ad6265SDimitry Andric if (Text.empty()) 153*81ad6265SDimitry Andric return; 154*81ad6265SDimitry Andric SmallVector<StringRef> Matches; 155*81ad6265SDimitry Andric while (SGRSyntax.match(Text, &Matches)) { 156*81ad6265SDimitry Andric // Emit any text before the SGR element. 157*81ad6265SDimitry Andric if (Matches.begin()->begin() != Text.begin()) 158*81ad6265SDimitry Andric Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin()))); 159*81ad6265SDimitry Andric 160*81ad6265SDimitry Andric Buffer.push_back(textNode(*Matches.begin())); 161*81ad6265SDimitry Andric advanceTo(Text, Matches.begin()->end()); 162*81ad6265SDimitry Andric } 163*81ad6265SDimitry Andric if (!Text.empty()) 164*81ad6265SDimitry Andric Buffer.push_back(textNode(Text)); 165*81ad6265SDimitry Andric } 166*81ad6265SDimitry Andric 167*81ad6265SDimitry Andric // Given that a line doesn't contain any valid markup, see if it ends with the 168*81ad6265SDimitry Andric // start of a multi-line element. If so, returns the beginning. 169*81ad6265SDimitry Andric Optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) { 170*81ad6265SDimitry Andric // A multi-line begin marker must be the last one on the line. 171*81ad6265SDimitry Andric size_t BeginPos = Line.rfind("{{{"); 172*81ad6265SDimitry Andric if (BeginPos == StringRef::npos) 173*81ad6265SDimitry Andric return None; 174*81ad6265SDimitry Andric size_t BeginTagPos = BeginPos + 3; 175*81ad6265SDimitry Andric 176*81ad6265SDimitry Andric // If there are any end markers afterwards, the begin marker cannot belong to 177*81ad6265SDimitry Andric // a multi-line element. 178*81ad6265SDimitry Andric size_t EndPos = Line.find("}}}", BeginTagPos); 179*81ad6265SDimitry Andric if (EndPos != StringRef::npos) 180*81ad6265SDimitry Andric return None; 181*81ad6265SDimitry Andric 182*81ad6265SDimitry Andric // Check whether the tag is registered multi-line. 183*81ad6265SDimitry Andric size_t EndTagPos = Line.find(':', BeginTagPos); 184*81ad6265SDimitry Andric if (EndTagPos == StringRef::npos) 185*81ad6265SDimitry Andric return None; 186*81ad6265SDimitry Andric StringRef Tag = Line.slice(BeginTagPos, EndTagPos); 187*81ad6265SDimitry Andric if (!MultilineTags.contains(Tag)) 188*81ad6265SDimitry Andric return None; 189*81ad6265SDimitry Andric return Line.substr(BeginPos); 190*81ad6265SDimitry Andric } 191*81ad6265SDimitry Andric 192*81ad6265SDimitry Andric // See if the line begins with the ending of an in-progress multi-line element. 193*81ad6265SDimitry Andric // If so, return the ending. 194*81ad6265SDimitry Andric Optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) { 195*81ad6265SDimitry Andric size_t EndPos = Line.find("}}}"); 196*81ad6265SDimitry Andric if (EndPos == StringRef::npos) 197*81ad6265SDimitry Andric return None; 198*81ad6265SDimitry Andric return Line.take_front(EndPos + 3); 199*81ad6265SDimitry Andric } 200*81ad6265SDimitry Andric 201*81ad6265SDimitry Andric } // end namespace symbolize 202*81ad6265SDimitry Andric } // end namespace llvm 203