xref: /llvm-project/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h (revision 4e8c9d28132039a98feb97cec2759cddeb37d934)
1fa3d789dSPierre van Houtryve //===-- SequenceToOffsetTable.h - Compress similar sequences ----*- C++ -*-===//
2fa3d789dSPierre van Houtryve //
3fa3d789dSPierre van Houtryve // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4fa3d789dSPierre van Houtryve // See https://llvm.org/LICENSE.txt for license information.
5fa3d789dSPierre van Houtryve // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6fa3d789dSPierre van Houtryve //
7fa3d789dSPierre van Houtryve //===----------------------------------------------------------------------===//
8fa3d789dSPierre van Houtryve //
9c9070cceSSergei Barannikov // SequenceToOffsetTable can be used to emit a number of sequences as one big
10c9070cceSSergei Barannikov // array. Uses the same memory when a sequence is a suffix of another.
11fa3d789dSPierre van Houtryve //
12fa3d789dSPierre van Houtryve //===----------------------------------------------------------------------===//
13fa3d789dSPierre van Houtryve 
148a61bfcfSRahul Joshi #ifndef LLVM_UTILS_TABLEGEN_BASIC_SEQUENCETOOFFSETTABLE_H
158a61bfcfSRahul Joshi #define LLVM_UTILS_TABLEGEN_BASIC_SEQUENCETOOFFSETTABLE_H
16fa3d789dSPierre van Houtryve 
17a968ae68SRahul Joshi #include "llvm/ADT/StringExtras.h"
18fa3d789dSPierre van Houtryve #include "llvm/Support/CommandLine.h"
19fa3d789dSPierre van Houtryve #include "llvm/Support/raw_ostream.h"
20fa3d789dSPierre van Houtryve #include <algorithm>
21fa3d789dSPierre van Houtryve #include <cassert>
22fa3d789dSPierre van Houtryve #include <functional>
23fa3d789dSPierre van Houtryve #include <map>
24fa3d789dSPierre van Houtryve 
25fa3d789dSPierre van Houtryve namespace llvm {
26a968ae68SRahul Joshi extern cl::opt<bool> EmitLongStrLiterals;
27fa3d789dSPierre van Houtryve 
28a968ae68SRahul Joshi inline void printChar(raw_ostream &OS, char C) {
29fa3d789dSPierre van Houtryve   unsigned char UC(C);
30a968ae68SRahul Joshi   if (isAlnum(UC) || isPunct(UC)) {
31fa3d789dSPierre van Houtryve     OS << '\'';
32fa3d789dSPierre van Houtryve     if (C == '\\' || C == '\'')
33fa3d789dSPierre van Houtryve       OS << '\\';
34fa3d789dSPierre van Houtryve     OS << C << '\'';
35fa3d789dSPierre van Houtryve   } else {
36fa3d789dSPierre van Houtryve     OS << unsigned(UC);
37fa3d789dSPierre van Houtryve   }
38fa3d789dSPierre van Houtryve }
39fa3d789dSPierre van Houtryve 
40fa3d789dSPierre van Houtryve /// SequenceToOffsetTable - Collect a number of terminated sequences of T.
41fa3d789dSPierre van Houtryve /// Compute the layout of a table that contains all the sequences, possibly by
42fa3d789dSPierre van Houtryve /// reusing entries.
43fa3d789dSPierre van Houtryve ///
44fa3d789dSPierre van Houtryve /// @tparam SeqT The sequence container. (vector or string).
45fa3d789dSPierre van Houtryve /// @tparam Less A stable comparator for SeqT elements.
46fa3d789dSPierre van Houtryve template <typename SeqT, typename Less = std::less<typename SeqT::value_type>>
47fa3d789dSPierre van Houtryve class SequenceToOffsetTable {
48fa3d789dSPierre van Houtryve   typedef typename SeqT::value_type ElemT;
49fa3d789dSPierre van Houtryve 
50fa3d789dSPierre van Houtryve   // Define a comparator for SeqT that sorts a suffix immediately before a
51fa3d789dSPierre van Houtryve   // sequence with that suffix.
52fa3d789dSPierre van Houtryve   struct SeqLess {
53fa3d789dSPierre van Houtryve     Less L;
54fa3d789dSPierre van Houtryve     bool operator()(const SeqT &A, const SeqT &B) const {
55fa3d789dSPierre van Houtryve       return std::lexicographical_compare(A.rbegin(), A.rend(), B.rbegin(),
56fa3d789dSPierre van Houtryve                                           B.rend(), L);
57fa3d789dSPierre van Houtryve     }
58fa3d789dSPierre van Houtryve   };
59fa3d789dSPierre van Houtryve 
60fa3d789dSPierre van Houtryve   // Keep sequences ordered according to SeqLess so suffixes are easy to find.
61fa3d789dSPierre van Houtryve   // Map each sequence to its offset in the table.
62fa3d789dSPierre van Houtryve   typedef std::map<SeqT, unsigned, SeqLess> SeqMap;
63fa3d789dSPierre van Houtryve 
64fa3d789dSPierre van Houtryve   // Sequences added so far, with suffixes removed.
65fa3d789dSPierre van Houtryve   SeqMap Seqs;
66fa3d789dSPierre van Houtryve 
67c9070cceSSergei Barannikov   // Terminator element to be appended to each added sequence.
68c9070cceSSergei Barannikov   std::optional<ElemT> Terminator;
69c9070cceSSergei Barannikov 
70c9070cceSSergei Barannikov   // True if `layout` method was called.
71c9070cceSSergei Barannikov   bool IsLaidOut = false;
72c9070cceSSergei Barannikov 
73fa3d789dSPierre van Houtryve   // Entries in the final table, or 0 before layout was called.
74c9070cceSSergei Barannikov   unsigned Entries = 0;
75fa3d789dSPierre van Houtryve 
76fa3d789dSPierre van Houtryve   // isSuffix - Returns true if A is a suffix of B.
77fa3d789dSPierre van Houtryve   static bool isSuffix(const SeqT &A, const SeqT &B) {
78fa3d789dSPierre van Houtryve     return A.size() <= B.size() && std::equal(A.rbegin(), A.rend(), B.rbegin());
79fa3d789dSPierre van Houtryve   }
80fa3d789dSPierre van Houtryve 
81fa3d789dSPierre van Houtryve public:
82c9070cceSSergei Barannikov   explicit SequenceToOffsetTable(std::optional<ElemT> Terminator = ElemT())
83c9070cceSSergei Barannikov       : Terminator(Terminator) {}
84fa3d789dSPierre van Houtryve 
85fa3d789dSPierre van Houtryve   /// add - Add a sequence to the table.
86fa3d789dSPierre van Houtryve   /// This must be called before layout().
87fa3d789dSPierre van Houtryve   void add(const SeqT &Seq) {
88c9070cceSSergei Barannikov     assert(!IsLaidOut && "Cannot call add() after layout()");
89fa3d789dSPierre van Houtryve     typename SeqMap::iterator I = Seqs.lower_bound(Seq);
90fa3d789dSPierre van Houtryve 
91fa3d789dSPierre van Houtryve     // If SeqMap contains a sequence that has Seq as a suffix, I will be
92fa3d789dSPierre van Houtryve     // pointing to it.
93fa3d789dSPierre van Houtryve     if (I != Seqs.end() && isSuffix(Seq, I->first))
94fa3d789dSPierre van Houtryve       return;
95fa3d789dSPierre van Houtryve 
96*4e8c9d28SJay Foad     I = Seqs.insert(I, {Seq, 0u});
97fa3d789dSPierre van Houtryve 
98fa3d789dSPierre van Houtryve     // The entry before I may be a suffix of Seq that can now be erased.
99fa3d789dSPierre van Houtryve     if (I != Seqs.begin() && isSuffix((--I)->first, Seq))
100fa3d789dSPierre van Houtryve       Seqs.erase(I);
101fa3d789dSPierre van Houtryve   }
102fa3d789dSPierre van Houtryve 
103fa3d789dSPierre van Houtryve   bool empty() const { return Seqs.empty(); }
104fa3d789dSPierre van Houtryve 
105fa3d789dSPierre van Houtryve   unsigned size() const {
106c9070cceSSergei Barannikov     assert(IsLaidOut && "Call layout() before size()");
107fa3d789dSPierre van Houtryve     return Entries;
108fa3d789dSPierre van Houtryve   }
109fa3d789dSPierre van Houtryve 
110fa3d789dSPierre van Houtryve   /// layout - Computes the final table layout.
111fa3d789dSPierre van Houtryve   void layout() {
112c9070cceSSergei Barannikov     assert(!IsLaidOut && "Can only call layout() once");
113c9070cceSSergei Barannikov     IsLaidOut = true;
114c9070cceSSergei Barannikov 
115fa3d789dSPierre van Houtryve     // Lay out the table in Seqs iteration order.
116fa3d789dSPierre van Houtryve     for (typename SeqMap::iterator I = Seqs.begin(), E = Seqs.end(); I != E;
117fa3d789dSPierre van Houtryve          ++I) {
118fa3d789dSPierre van Houtryve       I->second = Entries;
119fa3d789dSPierre van Houtryve       // Include space for a terminator.
120c9070cceSSergei Barannikov       Entries += I->first.size() + Terminator.has_value();
121fa3d789dSPierre van Houtryve     }
122fa3d789dSPierre van Houtryve   }
123fa3d789dSPierre van Houtryve 
124fa3d789dSPierre van Houtryve   /// get - Returns the offset of Seq in the final table.
125fa3d789dSPierre van Houtryve   unsigned get(const SeqT &Seq) const {
126c9070cceSSergei Barannikov     assert(IsLaidOut && "Call layout() before get()");
127fa3d789dSPierre van Houtryve     typename SeqMap::const_iterator I = Seqs.lower_bound(Seq);
128fa3d789dSPierre van Houtryve     assert(I != Seqs.end() && isSuffix(Seq, I->first) &&
129fa3d789dSPierre van Houtryve            "get() called with sequence that wasn't added first");
130fa3d789dSPierre van Houtryve     return I->second + (I->first.size() - Seq.size());
131fa3d789dSPierre van Houtryve   }
132fa3d789dSPierre van Houtryve 
133fa3d789dSPierre van Houtryve   /// `emitStringLiteralDef` - Print out the table as the body of an array
134fa3d789dSPierre van Houtryve   /// initializer, where each element is a C string literal terminated by
135fa3d789dSPierre van Houtryve   /// `\0`. Falls back to emitting a comma-separated integer list if
136fa3d789dSPierre van Houtryve   /// `EmitLongStrLiterals` is false
137a968ae68SRahul Joshi   void emitStringLiteralDef(raw_ostream &OS, const Twine &Decl) const {
138c9070cceSSergei Barannikov     assert(IsLaidOut && "Call layout() before emitStringLiteralDef()");
139fa3d789dSPierre van Houtryve     if (!EmitLongStrLiterals) {
140fa3d789dSPierre van Houtryve       OS << Decl << " = {\n";
141c9070cceSSergei Barannikov       emit(OS, printChar);
142fa3d789dSPierre van Houtryve       OS << "  0\n};\n\n";
143fa3d789dSPierre van Houtryve       return;
144fa3d789dSPierre van Houtryve     }
145fa3d789dSPierre van Houtryve 
146fa3d789dSPierre van Houtryve     OS << "\n#ifdef __GNUC__\n"
147fa3d789dSPierre van Houtryve        << "#pragma GCC diagnostic push\n"
148fa3d789dSPierre van Houtryve        << "#pragma GCC diagnostic ignored \"-Woverlength-strings\"\n"
149fa3d789dSPierre van Houtryve        << "#endif\n"
150fa3d789dSPierre van Houtryve        << Decl << " = {\n";
151a968ae68SRahul Joshi     for (const auto &[Seq, Offset] : Seqs) {
152a968ae68SRahul Joshi       OS << "  /* " << Offset << " */ \"";
153a968ae68SRahul Joshi       OS.write_escaped(Seq);
154c9070cceSSergei Barannikov       if (Terminator)
155c9070cceSSergei Barannikov         OS.write_escaped(StringRef(&*Terminator, 1));
156c9070cceSSergei Barannikov       OS << "\"\n";
157fa3d789dSPierre van Houtryve     }
158fa3d789dSPierre van Houtryve     OS << "};\n"
159fa3d789dSPierre van Houtryve        << "#ifdef __GNUC__\n"
160fa3d789dSPierre van Houtryve        << "#pragma GCC diagnostic pop\n"
161fa3d789dSPierre van Houtryve        << "#endif\n\n";
162fa3d789dSPierre van Houtryve   }
163fa3d789dSPierre van Houtryve 
164fa3d789dSPierre van Houtryve   /// emit - Print out the table as the body of an array initializer.
165fa3d789dSPierre van Houtryve   /// Use the Print function to print elements.
166c9070cceSSergei Barannikov   void emit(raw_ostream &OS, void (*Print)(raw_ostream &, ElemT)) const {
167c9070cceSSergei Barannikov     assert(IsLaidOut && "Call layout() before emit()");
168a968ae68SRahul Joshi     for (const auto &[Seq, Offset] : Seqs) {
169a968ae68SRahul Joshi       OS << "  /* " << Offset << " */ ";
170a968ae68SRahul Joshi       for (const ElemT &Element : Seq) {
171a968ae68SRahul Joshi         Print(OS, Element);
172fa3d789dSPierre van Houtryve         OS << ", ";
173fa3d789dSPierre van Houtryve       }
174c9070cceSSergei Barannikov       if (Terminator) {
175c9070cceSSergei Barannikov         Print(OS, *Terminator);
176c9070cceSSergei Barannikov         OS << ',';
177c9070cceSSergei Barannikov       }
178c9070cceSSergei Barannikov       OS << '\n';
179c9070cceSSergei Barannikov     }
180c9070cceSSergei Barannikov 
181c9070cceSSergei Barannikov     // Print a dummy element if the array would be empty otherwise.
182c9070cceSSergei Barannikov     if (!Entries) {
183c9070cceSSergei Barannikov       OS << "  /* dummy */ ";
184c9070cceSSergei Barannikov       Print(OS, ElemT());
185c9070cceSSergei Barannikov       OS << '\n';
186fa3d789dSPierre van Houtryve     }
187fa3d789dSPierre van Houtryve   }
188fa3d789dSPierre van Houtryve };
189fa3d789dSPierre van Houtryve 
190fa3d789dSPierre van Houtryve } // end namespace llvm
191fa3d789dSPierre van Houtryve 
1928a61bfcfSRahul Joshi #endif // LLVM_UTILS_TABLEGEN_BASIC_SEQUENCETOOFFSETTABLE_H
193