1fa3d789dSPierre van Houtryve //===-- SequenceToOffsetTable.h - Compress similar sequences ----*- C++ -*-===// 2fa3d789dSPierre van Houtryve // 3fa3d789dSPierre van Houtryve // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4fa3d789dSPierre van Houtryve // See https://llvm.org/LICENSE.txt for license information. 5fa3d789dSPierre van Houtryve // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6fa3d789dSPierre van Houtryve // 7fa3d789dSPierre van Houtryve //===----------------------------------------------------------------------===// 8fa3d789dSPierre van Houtryve // 9c9070cceSSergei Barannikov // SequenceToOffsetTable can be used to emit a number of sequences as one big 10c9070cceSSergei Barannikov // array. Uses the same memory when a sequence is a suffix of another. 11fa3d789dSPierre van Houtryve // 12fa3d789dSPierre van Houtryve //===----------------------------------------------------------------------===// 13fa3d789dSPierre van Houtryve 148a61bfcfSRahul Joshi #ifndef LLVM_UTILS_TABLEGEN_BASIC_SEQUENCETOOFFSETTABLE_H 158a61bfcfSRahul Joshi #define LLVM_UTILS_TABLEGEN_BASIC_SEQUENCETOOFFSETTABLE_H 16fa3d789dSPierre van Houtryve 17a968ae68SRahul Joshi #include "llvm/ADT/StringExtras.h" 18fa3d789dSPierre van Houtryve #include "llvm/Support/CommandLine.h" 19fa3d789dSPierre van Houtryve #include "llvm/Support/raw_ostream.h" 20fa3d789dSPierre van Houtryve #include <algorithm> 21fa3d789dSPierre van Houtryve #include <cassert> 22fa3d789dSPierre van Houtryve #include <functional> 23fa3d789dSPierre van Houtryve #include <map> 24fa3d789dSPierre van Houtryve 25fa3d789dSPierre van Houtryve namespace llvm { 26a968ae68SRahul Joshi extern cl::opt<bool> EmitLongStrLiterals; 27fa3d789dSPierre van Houtryve 28a968ae68SRahul Joshi inline void printChar(raw_ostream &OS, char C) { 29fa3d789dSPierre van Houtryve unsigned char UC(C); 30a968ae68SRahul Joshi if (isAlnum(UC) || isPunct(UC)) { 31fa3d789dSPierre van Houtryve OS << '\''; 32fa3d789dSPierre van Houtryve if (C == '\\' || C == '\'') 33fa3d789dSPierre van Houtryve OS << '\\'; 34fa3d789dSPierre van Houtryve OS << C << '\''; 35fa3d789dSPierre van Houtryve } else { 36fa3d789dSPierre van Houtryve OS << unsigned(UC); 37fa3d789dSPierre van Houtryve } 38fa3d789dSPierre van Houtryve } 39fa3d789dSPierre van Houtryve 40fa3d789dSPierre van Houtryve /// SequenceToOffsetTable - Collect a number of terminated sequences of T. 41fa3d789dSPierre van Houtryve /// Compute the layout of a table that contains all the sequences, possibly by 42fa3d789dSPierre van Houtryve /// reusing entries. 43fa3d789dSPierre van Houtryve /// 44fa3d789dSPierre van Houtryve /// @tparam SeqT The sequence container. (vector or string). 45fa3d789dSPierre van Houtryve /// @tparam Less A stable comparator for SeqT elements. 46fa3d789dSPierre van Houtryve template <typename SeqT, typename Less = std::less<typename SeqT::value_type>> 47fa3d789dSPierre van Houtryve class SequenceToOffsetTable { 48fa3d789dSPierre van Houtryve typedef typename SeqT::value_type ElemT; 49fa3d789dSPierre van Houtryve 50fa3d789dSPierre van Houtryve // Define a comparator for SeqT that sorts a suffix immediately before a 51fa3d789dSPierre van Houtryve // sequence with that suffix. 52fa3d789dSPierre van Houtryve struct SeqLess { 53fa3d789dSPierre van Houtryve Less L; 54fa3d789dSPierre van Houtryve bool operator()(const SeqT &A, const SeqT &B) const { 55fa3d789dSPierre van Houtryve return std::lexicographical_compare(A.rbegin(), A.rend(), B.rbegin(), 56fa3d789dSPierre van Houtryve B.rend(), L); 57fa3d789dSPierre van Houtryve } 58fa3d789dSPierre van Houtryve }; 59fa3d789dSPierre van Houtryve 60fa3d789dSPierre van Houtryve // Keep sequences ordered according to SeqLess so suffixes are easy to find. 61fa3d789dSPierre van Houtryve // Map each sequence to its offset in the table. 62fa3d789dSPierre van Houtryve typedef std::map<SeqT, unsigned, SeqLess> SeqMap; 63fa3d789dSPierre van Houtryve 64fa3d789dSPierre van Houtryve // Sequences added so far, with suffixes removed. 65fa3d789dSPierre van Houtryve SeqMap Seqs; 66fa3d789dSPierre van Houtryve 67c9070cceSSergei Barannikov // Terminator element to be appended to each added sequence. 68c9070cceSSergei Barannikov std::optional<ElemT> Terminator; 69c9070cceSSergei Barannikov 70c9070cceSSergei Barannikov // True if `layout` method was called. 71c9070cceSSergei Barannikov bool IsLaidOut = false; 72c9070cceSSergei Barannikov 73fa3d789dSPierre van Houtryve // Entries in the final table, or 0 before layout was called. 74c9070cceSSergei Barannikov unsigned Entries = 0; 75fa3d789dSPierre van Houtryve 76fa3d789dSPierre van Houtryve // isSuffix - Returns true if A is a suffix of B. 77fa3d789dSPierre van Houtryve static bool isSuffix(const SeqT &A, const SeqT &B) { 78fa3d789dSPierre van Houtryve return A.size() <= B.size() && std::equal(A.rbegin(), A.rend(), B.rbegin()); 79fa3d789dSPierre van Houtryve } 80fa3d789dSPierre van Houtryve 81fa3d789dSPierre van Houtryve public: 82c9070cceSSergei Barannikov explicit SequenceToOffsetTable(std::optional<ElemT> Terminator = ElemT()) 83c9070cceSSergei Barannikov : Terminator(Terminator) {} 84fa3d789dSPierre van Houtryve 85fa3d789dSPierre van Houtryve /// add - Add a sequence to the table. 86fa3d789dSPierre van Houtryve /// This must be called before layout(). 87fa3d789dSPierre van Houtryve void add(const SeqT &Seq) { 88c9070cceSSergei Barannikov assert(!IsLaidOut && "Cannot call add() after layout()"); 89fa3d789dSPierre van Houtryve typename SeqMap::iterator I = Seqs.lower_bound(Seq); 90fa3d789dSPierre van Houtryve 91fa3d789dSPierre van Houtryve // If SeqMap contains a sequence that has Seq as a suffix, I will be 92fa3d789dSPierre van Houtryve // pointing to it. 93fa3d789dSPierre van Houtryve if (I != Seqs.end() && isSuffix(Seq, I->first)) 94fa3d789dSPierre van Houtryve return; 95fa3d789dSPierre van Houtryve 96*4e8c9d28SJay Foad I = Seqs.insert(I, {Seq, 0u}); 97fa3d789dSPierre van Houtryve 98fa3d789dSPierre van Houtryve // The entry before I may be a suffix of Seq that can now be erased. 99fa3d789dSPierre van Houtryve if (I != Seqs.begin() && isSuffix((--I)->first, Seq)) 100fa3d789dSPierre van Houtryve Seqs.erase(I); 101fa3d789dSPierre van Houtryve } 102fa3d789dSPierre van Houtryve 103fa3d789dSPierre van Houtryve bool empty() const { return Seqs.empty(); } 104fa3d789dSPierre van Houtryve 105fa3d789dSPierre van Houtryve unsigned size() const { 106c9070cceSSergei Barannikov assert(IsLaidOut && "Call layout() before size()"); 107fa3d789dSPierre van Houtryve return Entries; 108fa3d789dSPierre van Houtryve } 109fa3d789dSPierre van Houtryve 110fa3d789dSPierre van Houtryve /// layout - Computes the final table layout. 111fa3d789dSPierre van Houtryve void layout() { 112c9070cceSSergei Barannikov assert(!IsLaidOut && "Can only call layout() once"); 113c9070cceSSergei Barannikov IsLaidOut = true; 114c9070cceSSergei Barannikov 115fa3d789dSPierre van Houtryve // Lay out the table in Seqs iteration order. 116fa3d789dSPierre van Houtryve for (typename SeqMap::iterator I = Seqs.begin(), E = Seqs.end(); I != E; 117fa3d789dSPierre van Houtryve ++I) { 118fa3d789dSPierre van Houtryve I->second = Entries; 119fa3d789dSPierre van Houtryve // Include space for a terminator. 120c9070cceSSergei Barannikov Entries += I->first.size() + Terminator.has_value(); 121fa3d789dSPierre van Houtryve } 122fa3d789dSPierre van Houtryve } 123fa3d789dSPierre van Houtryve 124fa3d789dSPierre van Houtryve /// get - Returns the offset of Seq in the final table. 125fa3d789dSPierre van Houtryve unsigned get(const SeqT &Seq) const { 126c9070cceSSergei Barannikov assert(IsLaidOut && "Call layout() before get()"); 127fa3d789dSPierre van Houtryve typename SeqMap::const_iterator I = Seqs.lower_bound(Seq); 128fa3d789dSPierre van Houtryve assert(I != Seqs.end() && isSuffix(Seq, I->first) && 129fa3d789dSPierre van Houtryve "get() called with sequence that wasn't added first"); 130fa3d789dSPierre van Houtryve return I->second + (I->first.size() - Seq.size()); 131fa3d789dSPierre van Houtryve } 132fa3d789dSPierre van Houtryve 133fa3d789dSPierre van Houtryve /// `emitStringLiteralDef` - Print out the table as the body of an array 134fa3d789dSPierre van Houtryve /// initializer, where each element is a C string literal terminated by 135fa3d789dSPierre van Houtryve /// `\0`. Falls back to emitting a comma-separated integer list if 136fa3d789dSPierre van Houtryve /// `EmitLongStrLiterals` is false 137a968ae68SRahul Joshi void emitStringLiteralDef(raw_ostream &OS, const Twine &Decl) const { 138c9070cceSSergei Barannikov assert(IsLaidOut && "Call layout() before emitStringLiteralDef()"); 139fa3d789dSPierre van Houtryve if (!EmitLongStrLiterals) { 140fa3d789dSPierre van Houtryve OS << Decl << " = {\n"; 141c9070cceSSergei Barannikov emit(OS, printChar); 142fa3d789dSPierre van Houtryve OS << " 0\n};\n\n"; 143fa3d789dSPierre van Houtryve return; 144fa3d789dSPierre van Houtryve } 145fa3d789dSPierre van Houtryve 146fa3d789dSPierre van Houtryve OS << "\n#ifdef __GNUC__\n" 147fa3d789dSPierre van Houtryve << "#pragma GCC diagnostic push\n" 148fa3d789dSPierre van Houtryve << "#pragma GCC diagnostic ignored \"-Woverlength-strings\"\n" 149fa3d789dSPierre van Houtryve << "#endif\n" 150fa3d789dSPierre van Houtryve << Decl << " = {\n"; 151a968ae68SRahul Joshi for (const auto &[Seq, Offset] : Seqs) { 152a968ae68SRahul Joshi OS << " /* " << Offset << " */ \""; 153a968ae68SRahul Joshi OS.write_escaped(Seq); 154c9070cceSSergei Barannikov if (Terminator) 155c9070cceSSergei Barannikov OS.write_escaped(StringRef(&*Terminator, 1)); 156c9070cceSSergei Barannikov OS << "\"\n"; 157fa3d789dSPierre van Houtryve } 158fa3d789dSPierre van Houtryve OS << "};\n" 159fa3d789dSPierre van Houtryve << "#ifdef __GNUC__\n" 160fa3d789dSPierre van Houtryve << "#pragma GCC diagnostic pop\n" 161fa3d789dSPierre van Houtryve << "#endif\n\n"; 162fa3d789dSPierre van Houtryve } 163fa3d789dSPierre van Houtryve 164fa3d789dSPierre van Houtryve /// emit - Print out the table as the body of an array initializer. 165fa3d789dSPierre van Houtryve /// Use the Print function to print elements. 166c9070cceSSergei Barannikov void emit(raw_ostream &OS, void (*Print)(raw_ostream &, ElemT)) const { 167c9070cceSSergei Barannikov assert(IsLaidOut && "Call layout() before emit()"); 168a968ae68SRahul Joshi for (const auto &[Seq, Offset] : Seqs) { 169a968ae68SRahul Joshi OS << " /* " << Offset << " */ "; 170a968ae68SRahul Joshi for (const ElemT &Element : Seq) { 171a968ae68SRahul Joshi Print(OS, Element); 172fa3d789dSPierre van Houtryve OS << ", "; 173fa3d789dSPierre van Houtryve } 174c9070cceSSergei Barannikov if (Terminator) { 175c9070cceSSergei Barannikov Print(OS, *Terminator); 176c9070cceSSergei Barannikov OS << ','; 177c9070cceSSergei Barannikov } 178c9070cceSSergei Barannikov OS << '\n'; 179c9070cceSSergei Barannikov } 180c9070cceSSergei Barannikov 181c9070cceSSergei Barannikov // Print a dummy element if the array would be empty otherwise. 182c9070cceSSergei Barannikov if (!Entries) { 183c9070cceSSergei Barannikov OS << " /* dummy */ "; 184c9070cceSSergei Barannikov Print(OS, ElemT()); 185c9070cceSSergei Barannikov OS << '\n'; 186fa3d789dSPierre van Houtryve } 187fa3d789dSPierre van Houtryve } 188fa3d789dSPierre van Houtryve }; 189fa3d789dSPierre van Houtryve 190fa3d789dSPierre van Houtryve } // end namespace llvm 191fa3d789dSPierre van Houtryve 1928a61bfcfSRahul Joshi #endif // LLVM_UTILS_TABLEGEN_BASIC_SEQUENCETOOFFSETTABLE_H 193