1 //===-- SequenceToOffsetTable.h - Compress similar sequences ----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // SequenceToOffsetTable can be used to emit a number of sequences as one big 10 // array. Uses the same memory when a sequence is a suffix of another. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_UTILS_TABLEGEN_BASIC_SEQUENCETOOFFSETTABLE_H 15 #define LLVM_UTILS_TABLEGEN_BASIC_SEQUENCETOOFFSETTABLE_H 16 17 #include "llvm/ADT/StringExtras.h" 18 #include "llvm/Support/CommandLine.h" 19 #include "llvm/Support/raw_ostream.h" 20 #include <algorithm> 21 #include <cassert> 22 #include <functional> 23 #include <map> 24 25 namespace llvm { 26 extern cl::opt<bool> EmitLongStrLiterals; 27 28 inline void printChar(raw_ostream &OS, char C) { 29 unsigned char UC(C); 30 if (isAlnum(UC) || isPunct(UC)) { 31 OS << '\''; 32 if (C == '\\' || C == '\'') 33 OS << '\\'; 34 OS << C << '\''; 35 } else { 36 OS << unsigned(UC); 37 } 38 } 39 40 /// SequenceToOffsetTable - Collect a number of terminated sequences of T. 41 /// Compute the layout of a table that contains all the sequences, possibly by 42 /// reusing entries. 43 /// 44 /// @tparam SeqT The sequence container. (vector or string). 45 /// @tparam Less A stable comparator for SeqT elements. 46 template <typename SeqT, typename Less = std::less<typename SeqT::value_type>> 47 class SequenceToOffsetTable { 48 typedef typename SeqT::value_type ElemT; 49 50 // Define a comparator for SeqT that sorts a suffix immediately before a 51 // sequence with that suffix. 52 struct SeqLess { 53 Less L; 54 bool operator()(const SeqT &A, const SeqT &B) const { 55 return std::lexicographical_compare(A.rbegin(), A.rend(), B.rbegin(), 56 B.rend(), L); 57 } 58 }; 59 60 // Keep sequences ordered according to SeqLess so suffixes are easy to find. 61 // Map each sequence to its offset in the table. 62 typedef std::map<SeqT, unsigned, SeqLess> SeqMap; 63 64 // Sequences added so far, with suffixes removed. 65 SeqMap Seqs; 66 67 // Terminator element to be appended to each added sequence. 68 std::optional<ElemT> Terminator; 69 70 // True if `layout` method was called. 71 bool IsLaidOut = false; 72 73 // Entries in the final table, or 0 before layout was called. 74 unsigned Entries = 0; 75 76 // isSuffix - Returns true if A is a suffix of B. 77 static bool isSuffix(const SeqT &A, const SeqT &B) { 78 return A.size() <= B.size() && std::equal(A.rbegin(), A.rend(), B.rbegin()); 79 } 80 81 public: 82 explicit SequenceToOffsetTable(std::optional<ElemT> Terminator = ElemT()) 83 : Terminator(Terminator) {} 84 85 /// add - Add a sequence to the table. 86 /// This must be called before layout(). 87 void add(const SeqT &Seq) { 88 assert(!IsLaidOut && "Cannot call add() after layout()"); 89 typename SeqMap::iterator I = Seqs.lower_bound(Seq); 90 91 // If SeqMap contains a sequence that has Seq as a suffix, I will be 92 // pointing to it. 93 if (I != Seqs.end() && isSuffix(Seq, I->first)) 94 return; 95 96 I = Seqs.insert(I, {Seq, 0u}); 97 98 // The entry before I may be a suffix of Seq that can now be erased. 99 if (I != Seqs.begin() && isSuffix((--I)->first, Seq)) 100 Seqs.erase(I); 101 } 102 103 bool empty() const { return Seqs.empty(); } 104 105 unsigned size() const { 106 assert(IsLaidOut && "Call layout() before size()"); 107 return Entries; 108 } 109 110 /// layout - Computes the final table layout. 111 void layout() { 112 assert(!IsLaidOut && "Can only call layout() once"); 113 IsLaidOut = true; 114 115 // Lay out the table in Seqs iteration order. 116 for (typename SeqMap::iterator I = Seqs.begin(), E = Seqs.end(); I != E; 117 ++I) { 118 I->second = Entries; 119 // Include space for a terminator. 120 Entries += I->first.size() + Terminator.has_value(); 121 } 122 } 123 124 /// get - Returns the offset of Seq in the final table. 125 unsigned get(const SeqT &Seq) const { 126 assert(IsLaidOut && "Call layout() before get()"); 127 typename SeqMap::const_iterator I = Seqs.lower_bound(Seq); 128 assert(I != Seqs.end() && isSuffix(Seq, I->first) && 129 "get() called with sequence that wasn't added first"); 130 return I->second + (I->first.size() - Seq.size()); 131 } 132 133 /// `emitStringLiteralDef` - Print out the table as the body of an array 134 /// initializer, where each element is a C string literal terminated by 135 /// `\0`. Falls back to emitting a comma-separated integer list if 136 /// `EmitLongStrLiterals` is false 137 void emitStringLiteralDef(raw_ostream &OS, const Twine &Decl) const { 138 assert(IsLaidOut && "Call layout() before emitStringLiteralDef()"); 139 if (!EmitLongStrLiterals) { 140 OS << Decl << " = {\n"; 141 emit(OS, printChar); 142 OS << " 0\n};\n\n"; 143 return; 144 } 145 146 OS << "\n#ifdef __GNUC__\n" 147 << "#pragma GCC diagnostic push\n" 148 << "#pragma GCC diagnostic ignored \"-Woverlength-strings\"\n" 149 << "#endif\n" 150 << Decl << " = {\n"; 151 for (const auto &[Seq, Offset] : Seqs) { 152 OS << " /* " << Offset << " */ \""; 153 OS.write_escaped(Seq); 154 if (Terminator) 155 OS.write_escaped(StringRef(&*Terminator, 1)); 156 OS << "\"\n"; 157 } 158 OS << "};\n" 159 << "#ifdef __GNUC__\n" 160 << "#pragma GCC diagnostic pop\n" 161 << "#endif\n\n"; 162 } 163 164 /// emit - Print out the table as the body of an array initializer. 165 /// Use the Print function to print elements. 166 void emit(raw_ostream &OS, void (*Print)(raw_ostream &, ElemT)) const { 167 assert(IsLaidOut && "Call layout() before emit()"); 168 for (const auto &[Seq, Offset] : Seqs) { 169 OS << " /* " << Offset << " */ "; 170 for (const ElemT &Element : Seq) { 171 Print(OS, Element); 172 OS << ", "; 173 } 174 if (Terminator) { 175 Print(OS, *Terminator); 176 OS << ','; 177 } 178 OS << '\n'; 179 } 180 181 // Print a dummy element if the array would be empty otherwise. 182 if (!Entries) { 183 OS << " /* dummy */ "; 184 Print(OS, ElemT()); 185 OS << '\n'; 186 } 187 } 188 }; 189 190 } // end namespace llvm 191 192 #endif // LLVM_UTILS_TABLEGEN_BASIC_SEQUENCETOOFFSETTABLE_H 193