1 //===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file defines the log symbolizer markup data model and parser. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/DebugInfo/Symbolize/Markup.h" 15 16 #include "llvm/ADT/STLExtras.h" 17 18 namespace llvm { 19 namespace symbolize { 20 21 // Matches the following: 22 // "\033[0m" 23 // "\033[1m" 24 // "\033[30m" -- "\033[37m" 25 static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m"; 26 27 MarkupParser::MarkupParser(StringSet<> MultilineTags) 28 : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {} 29 30 static StringRef takeTo(StringRef Str, StringRef::iterator Pos) { 31 return Str.take_front(Pos - Str.begin()); 32 } 33 static void advanceTo(StringRef &Str, StringRef::iterator Pos) { 34 Str = Str.drop_front(Pos - Str.begin()); 35 } 36 37 void MarkupParser::parseLine(StringRef Line) { 38 Buffer.clear(); 39 NextIdx = 0; 40 FinishedMultiline.clear(); 41 this->Line = Line; 42 } 43 44 std::optional<MarkupNode> MarkupParser::nextNode() { 45 // Pull something out of the buffer if possible. 46 if (!Buffer.empty()) { 47 if (NextIdx < Buffer.size()) 48 return std::move(Buffer[NextIdx++]); 49 NextIdx = 0; 50 Buffer.clear(); 51 } 52 53 // The buffer is empty, so parse the next bit of the line. 54 55 if (Line.empty()) 56 return std::nullopt; 57 58 if (!InProgressMultiline.empty()) { 59 if (std::optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) { 60 llvm::append_range(InProgressMultiline, *MultilineEnd); 61 assert(FinishedMultiline.empty() && 62 "At most one multi-line element can be finished at a time."); 63 FinishedMultiline.swap(InProgressMultiline); 64 // Parse the multi-line element as if it were contiguous. 65 advanceTo(Line, MultilineEnd->end()); 66 return *parseElement(FinishedMultiline); 67 } 68 69 // The whole line is part of the multi-line element. 70 llvm::append_range(InProgressMultiline, Line); 71 Line = Line.drop_front(Line.size()); 72 return std::nullopt; 73 } 74 75 // Find the first valid markup element, if any. 76 if (std::optional<MarkupNode> Element = parseElement(Line)) { 77 parseTextOutsideMarkup(takeTo(Line, Element->Text.begin())); 78 Buffer.push_back(std::move(*Element)); 79 advanceTo(Line, Element->Text.end()); 80 return nextNode(); 81 } 82 83 // Since there were no valid elements remaining, see if the line opens a 84 // multi-line element. 85 if (std::optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) { 86 // Emit any text before the element. 87 parseTextOutsideMarkup(takeTo(Line, MultilineBegin->begin())); 88 89 // Begin recording the multi-line element. 90 llvm::append_range(InProgressMultiline, *MultilineBegin); 91 Line = Line.drop_front(Line.size()); 92 return nextNode(); 93 } 94 95 // The line doesn't contain any more markup elements, so emit it as text. 96 parseTextOutsideMarkup(Line); 97 Line = Line.drop_front(Line.size()); 98 return nextNode(); 99 } 100 101 void MarkupParser::flush() { 102 Buffer.clear(); 103 NextIdx = 0; 104 Line = {}; 105 if (InProgressMultiline.empty()) 106 return; 107 FinishedMultiline.swap(InProgressMultiline); 108 parseTextOutsideMarkup(FinishedMultiline); 109 } 110 111 // Finds and returns the next valid markup element in the given line. Returns 112 // std::nullopt if the line contains no valid elements. 113 std::optional<MarkupNode> MarkupParser::parseElement(StringRef Line) { 114 while (true) { 115 // Find next element using begin and end markers. 116 size_t BeginPos = Line.find("{{{"); 117 if (BeginPos == StringRef::npos) 118 return std::nullopt; 119 size_t EndPos = Line.find("}}}", BeginPos + 3); 120 if (EndPos == StringRef::npos) 121 return std::nullopt; 122 EndPos += 3; 123 MarkupNode Element; 124 Element.Text = Line.slice(BeginPos, EndPos); 125 Line = Line.substr(EndPos); 126 127 // Parse tag. 128 StringRef Content = Element.Text.drop_front(3).drop_back(3); 129 StringRef FieldsContent; 130 std::tie(Element.Tag, FieldsContent) = Content.split(':'); 131 if (Element.Tag.empty()) 132 continue; 133 134 // Parse fields. 135 if (!FieldsContent.empty()) 136 FieldsContent.split(Element.Fields, ":"); 137 else if (Content.back() == ':') 138 Element.Fields.push_back(FieldsContent); 139 140 return Element; 141 } 142 } 143 144 static MarkupNode textNode(StringRef Text) { 145 MarkupNode Node; 146 Node.Text = Text; 147 return Node; 148 } 149 150 // Parses a region of text known to be outside any markup elements. Such text 151 // may still contain SGR control codes, so the region is further subdivided into 152 // control codes and true text regions. 153 void MarkupParser::parseTextOutsideMarkup(StringRef Text) { 154 if (Text.empty()) 155 return; 156 SmallVector<StringRef> Matches; 157 while (SGRSyntax.match(Text, &Matches)) { 158 // Emit any text before the SGR element. 159 if (Matches.begin()->begin() != Text.begin()) 160 Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin()))); 161 162 Buffer.push_back(textNode(*Matches.begin())); 163 advanceTo(Text, Matches.begin()->end()); 164 } 165 if (!Text.empty()) 166 Buffer.push_back(textNode(Text)); 167 } 168 169 // Given that a line doesn't contain any valid markup, see if it ends with the 170 // start of a multi-line element. If so, returns the beginning. 171 std::optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) { 172 // A multi-line begin marker must be the last one on the line. 173 size_t BeginPos = Line.rfind("{{{"); 174 if (BeginPos == StringRef::npos) 175 return std::nullopt; 176 size_t BeginTagPos = BeginPos + 3; 177 178 // If there are any end markers afterwards, the begin marker cannot belong to 179 // a multi-line element. 180 size_t EndPos = Line.find("}}}", BeginTagPos); 181 if (EndPos != StringRef::npos) 182 return std::nullopt; 183 184 // Check whether the tag is registered multi-line. 185 size_t EndTagPos = Line.find(':', BeginTagPos); 186 if (EndTagPos == StringRef::npos) 187 return std::nullopt; 188 StringRef Tag = Line.slice(BeginTagPos, EndTagPos); 189 if (!MultilineTags.contains(Tag)) 190 return std::nullopt; 191 return Line.substr(BeginPos); 192 } 193 194 // See if the line begins with the ending of an in-progress multi-line element. 195 // If so, return the ending. 196 std::optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) { 197 size_t EndPos = Line.find("}}}"); 198 if (EndPos == StringRef::npos) 199 return std::nullopt; 200 return Line.take_front(EndPos + 3); 201 } 202 203 } // end namespace symbolize 204 } // end namespace llvm 205