181ad6265SDimitry Andric //===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===// 281ad6265SDimitry Andric // 381ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 481ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 581ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 681ad6265SDimitry Andric // 781ad6265SDimitry Andric //===----------------------------------------------------------------------===// 881ad6265SDimitry Andric /// 981ad6265SDimitry Andric /// \file 1081ad6265SDimitry Andric /// This file defines the log symbolizer markup data model and parser. 1181ad6265SDimitry Andric /// 1281ad6265SDimitry Andric //===----------------------------------------------------------------------===// 1381ad6265SDimitry Andric 1481ad6265SDimitry Andric #include "llvm/DebugInfo/Symbolize/Markup.h" 1581ad6265SDimitry Andric 1681ad6265SDimitry Andric #include "llvm/ADT/STLExtras.h" 1781ad6265SDimitry Andric #include "llvm/ADT/StringExtras.h" 1881ad6265SDimitry Andric 1981ad6265SDimitry Andric namespace llvm { 2081ad6265SDimitry Andric namespace symbolize { 2181ad6265SDimitry Andric 2281ad6265SDimitry Andric // Matches the following: 2381ad6265SDimitry Andric // "\033[0m" 2481ad6265SDimitry Andric // "\033[1m" 2581ad6265SDimitry Andric // "\033[30m" -- "\033[37m" 2681ad6265SDimitry Andric static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m"; 2781ad6265SDimitry Andric 2881ad6265SDimitry Andric MarkupParser::MarkupParser(StringSet<> MultilineTags) 2981ad6265SDimitry Andric : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {} 3081ad6265SDimitry Andric 3181ad6265SDimitry Andric static StringRef takeTo(StringRef Str, StringRef::iterator Pos) { 3281ad6265SDimitry Andric return Str.take_front(Pos - Str.begin()); 3381ad6265SDimitry Andric } 3481ad6265SDimitry Andric static void advanceTo(StringRef &Str, StringRef::iterator Pos) { 3581ad6265SDimitry Andric Str = Str.drop_front(Pos - Str.begin()); 3681ad6265SDimitry Andric } 3781ad6265SDimitry Andric 3881ad6265SDimitry Andric void MarkupParser::parseLine(StringRef Line) { 3981ad6265SDimitry Andric Buffer.clear(); 4081ad6265SDimitry Andric NextIdx = 0; 4181ad6265SDimitry Andric FinishedMultiline.clear(); 4281ad6265SDimitry Andric this->Line = Line; 4381ad6265SDimitry Andric } 4481ad6265SDimitry Andric 4581ad6265SDimitry Andric Optional<MarkupNode> MarkupParser::nextNode() { 4681ad6265SDimitry Andric // Pull something out of the buffer if possible. 4781ad6265SDimitry Andric if (!Buffer.empty()) { 4881ad6265SDimitry Andric if (NextIdx < Buffer.size()) 4981ad6265SDimitry Andric return std::move(Buffer[NextIdx++]); 5081ad6265SDimitry Andric NextIdx = 0; 5181ad6265SDimitry Andric Buffer.clear(); 5281ad6265SDimitry Andric } 5381ad6265SDimitry Andric 5481ad6265SDimitry Andric // The buffer is empty, so parse the next bit of the line. 5581ad6265SDimitry Andric 5681ad6265SDimitry Andric if (Line.empty()) 5781ad6265SDimitry Andric return None; 5881ad6265SDimitry Andric 5981ad6265SDimitry Andric if (!InProgressMultiline.empty()) { 6081ad6265SDimitry Andric if (Optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) { 6181ad6265SDimitry Andric llvm::append_range(InProgressMultiline, *MultilineEnd); 6281ad6265SDimitry Andric assert(FinishedMultiline.empty() && 6381ad6265SDimitry Andric "At most one multi-line element can be finished at a time."); 6481ad6265SDimitry Andric FinishedMultiline.swap(InProgressMultiline); 6581ad6265SDimitry Andric // Parse the multi-line element as if it were contiguous. 6681ad6265SDimitry Andric advanceTo(Line, MultilineEnd->end()); 6781ad6265SDimitry Andric return *parseElement(FinishedMultiline); 6881ad6265SDimitry Andric } 6981ad6265SDimitry Andric 7081ad6265SDimitry Andric // The whole line is part of the multi-line element. 7181ad6265SDimitry Andric llvm::append_range(InProgressMultiline, Line); 7281ad6265SDimitry Andric Line = Line.drop_front(Line.size()); 7381ad6265SDimitry Andric return None; 7481ad6265SDimitry Andric } 7581ad6265SDimitry Andric 7681ad6265SDimitry Andric // Find the first valid markup element, if any. 7781ad6265SDimitry Andric if (Optional<MarkupNode> Element = parseElement(Line)) { 7881ad6265SDimitry Andric parseTextOutsideMarkup(takeTo(Line, Element->Text.begin())); 7981ad6265SDimitry Andric Buffer.push_back(std::move(*Element)); 8081ad6265SDimitry Andric advanceTo(Line, Element->Text.end()); 8181ad6265SDimitry Andric return nextNode(); 8281ad6265SDimitry Andric } 8381ad6265SDimitry Andric 8481ad6265SDimitry Andric // Since there were no valid elements remaining, see if the line opens a 8581ad6265SDimitry Andric // multi-line element. 8681ad6265SDimitry Andric if (Optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) { 8781ad6265SDimitry Andric // Emit any text before the element. 8881ad6265SDimitry Andric parseTextOutsideMarkup(takeTo(Line, MultilineBegin->begin())); 8981ad6265SDimitry Andric 9081ad6265SDimitry Andric // Begin recording the multi-line element. 9181ad6265SDimitry Andric llvm::append_range(InProgressMultiline, *MultilineBegin); 9281ad6265SDimitry Andric Line = Line.drop_front(Line.size()); 9381ad6265SDimitry Andric return nextNode(); 9481ad6265SDimitry Andric } 9581ad6265SDimitry Andric 9681ad6265SDimitry Andric // The line doesn't contain any more markup elements, so emit it as text. 9781ad6265SDimitry Andric parseTextOutsideMarkup(Line); 9881ad6265SDimitry Andric Line = Line.drop_front(Line.size()); 9981ad6265SDimitry Andric return nextNode(); 10081ad6265SDimitry Andric } 10181ad6265SDimitry Andric 10281ad6265SDimitry Andric void MarkupParser::flush() { 103*fcaf7f86SDimitry Andric Buffer.clear(); 104*fcaf7f86SDimitry Andric NextIdx = 0; 105*fcaf7f86SDimitry Andric Line = {}; 10681ad6265SDimitry Andric if (InProgressMultiline.empty()) 10781ad6265SDimitry Andric return; 10881ad6265SDimitry Andric FinishedMultiline.swap(InProgressMultiline); 10981ad6265SDimitry Andric parseTextOutsideMarkup(FinishedMultiline); 11081ad6265SDimitry Andric } 11181ad6265SDimitry Andric 11281ad6265SDimitry Andric // Finds and returns the next valid markup element in the given line. Returns 11381ad6265SDimitry Andric // None if the line contains no valid elements. 11481ad6265SDimitry Andric Optional<MarkupNode> MarkupParser::parseElement(StringRef Line) { 11581ad6265SDimitry Andric while (true) { 11681ad6265SDimitry Andric // Find next element using begin and end markers. 11781ad6265SDimitry Andric size_t BeginPos = Line.find("{{{"); 11881ad6265SDimitry Andric if (BeginPos == StringRef::npos) 11981ad6265SDimitry Andric return None; 12081ad6265SDimitry Andric size_t EndPos = Line.find("}}}", BeginPos + 3); 12181ad6265SDimitry Andric if (EndPos == StringRef::npos) 12281ad6265SDimitry Andric return None; 12381ad6265SDimitry Andric EndPos += 3; 12481ad6265SDimitry Andric MarkupNode Element; 12581ad6265SDimitry Andric Element.Text = Line.slice(BeginPos, EndPos); 12681ad6265SDimitry Andric Line = Line.substr(EndPos); 12781ad6265SDimitry Andric 12881ad6265SDimitry Andric // Parse tag. 12981ad6265SDimitry Andric StringRef Content = Element.Text.drop_front(3).drop_back(3); 13081ad6265SDimitry Andric StringRef FieldsContent; 13181ad6265SDimitry Andric std::tie(Element.Tag, FieldsContent) = Content.split(':'); 13281ad6265SDimitry Andric if (Element.Tag.empty()) 13381ad6265SDimitry Andric continue; 13481ad6265SDimitry Andric 13581ad6265SDimitry Andric // Parse fields. 13681ad6265SDimitry Andric if (!FieldsContent.empty()) 13781ad6265SDimitry Andric FieldsContent.split(Element.Fields, ":"); 13881ad6265SDimitry Andric else if (Content.back() == ':') 13981ad6265SDimitry Andric Element.Fields.push_back(FieldsContent); 14081ad6265SDimitry Andric 14181ad6265SDimitry Andric return Element; 14281ad6265SDimitry Andric } 14381ad6265SDimitry Andric } 14481ad6265SDimitry Andric 14581ad6265SDimitry Andric static MarkupNode textNode(StringRef Text) { 14681ad6265SDimitry Andric MarkupNode Node; 14781ad6265SDimitry Andric Node.Text = Text; 14881ad6265SDimitry Andric return Node; 14981ad6265SDimitry Andric } 15081ad6265SDimitry Andric 15181ad6265SDimitry Andric // Parses a region of text known to be outside any markup elements. Such text 15281ad6265SDimitry Andric // may still contain SGR control codes, so the region is further subdivided into 15381ad6265SDimitry Andric // control codes and true text regions. 15481ad6265SDimitry Andric void MarkupParser::parseTextOutsideMarkup(StringRef Text) { 15581ad6265SDimitry Andric if (Text.empty()) 15681ad6265SDimitry Andric return; 15781ad6265SDimitry Andric SmallVector<StringRef> Matches; 15881ad6265SDimitry Andric while (SGRSyntax.match(Text, &Matches)) { 15981ad6265SDimitry Andric // Emit any text before the SGR element. 16081ad6265SDimitry Andric if (Matches.begin()->begin() != Text.begin()) 16181ad6265SDimitry Andric Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin()))); 16281ad6265SDimitry Andric 16381ad6265SDimitry Andric Buffer.push_back(textNode(*Matches.begin())); 16481ad6265SDimitry Andric advanceTo(Text, Matches.begin()->end()); 16581ad6265SDimitry Andric } 16681ad6265SDimitry Andric if (!Text.empty()) 16781ad6265SDimitry Andric Buffer.push_back(textNode(Text)); 16881ad6265SDimitry Andric } 16981ad6265SDimitry Andric 17081ad6265SDimitry Andric // Given that a line doesn't contain any valid markup, see if it ends with the 17181ad6265SDimitry Andric // start of a multi-line element. If so, returns the beginning. 17281ad6265SDimitry Andric Optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) { 17381ad6265SDimitry Andric // A multi-line begin marker must be the last one on the line. 17481ad6265SDimitry Andric size_t BeginPos = Line.rfind("{{{"); 17581ad6265SDimitry Andric if (BeginPos == StringRef::npos) 17681ad6265SDimitry Andric return None; 17781ad6265SDimitry Andric size_t BeginTagPos = BeginPos + 3; 17881ad6265SDimitry Andric 17981ad6265SDimitry Andric // If there are any end markers afterwards, the begin marker cannot belong to 18081ad6265SDimitry Andric // a multi-line element. 18181ad6265SDimitry Andric size_t EndPos = Line.find("}}}", BeginTagPos); 18281ad6265SDimitry Andric if (EndPos != StringRef::npos) 18381ad6265SDimitry Andric return None; 18481ad6265SDimitry Andric 18581ad6265SDimitry Andric // Check whether the tag is registered multi-line. 18681ad6265SDimitry Andric size_t EndTagPos = Line.find(':', BeginTagPos); 18781ad6265SDimitry Andric if (EndTagPos == StringRef::npos) 18881ad6265SDimitry Andric return None; 18981ad6265SDimitry Andric StringRef Tag = Line.slice(BeginTagPos, EndTagPos); 19081ad6265SDimitry Andric if (!MultilineTags.contains(Tag)) 19181ad6265SDimitry Andric return None; 19281ad6265SDimitry Andric return Line.substr(BeginPos); 19381ad6265SDimitry Andric } 19481ad6265SDimitry Andric 19581ad6265SDimitry Andric // See if the line begins with the ending of an in-progress multi-line element. 19681ad6265SDimitry Andric // If so, return the ending. 19781ad6265SDimitry Andric Optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) { 19881ad6265SDimitry Andric size_t EndPos = Line.find("}}}"); 19981ad6265SDimitry Andric if (EndPos == StringRef::npos) 20081ad6265SDimitry Andric return None; 20181ad6265SDimitry Andric return Line.take_front(EndPos + 3); 20281ad6265SDimitry Andric } 20381ad6265SDimitry Andric 20481ad6265SDimitry Andric } // end namespace symbolize 20581ad6265SDimitry Andric } // end namespace llvm 206