xref: /llvm-project/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h (revision 4e8c9d28132039a98feb97cec2759cddeb37d934)
1 //===-- SequenceToOffsetTable.h - Compress similar sequences ----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // SequenceToOffsetTable can be used to emit a number of sequences as one big
10 // array. Uses the same memory when a sequence is a suffix of another.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_UTILS_TABLEGEN_BASIC_SEQUENCETOOFFSETTABLE_H
15 #define LLVM_UTILS_TABLEGEN_BASIC_SEQUENCETOOFFSETTABLE_H
16 
17 #include "llvm/ADT/StringExtras.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/raw_ostream.h"
20 #include <algorithm>
21 #include <cassert>
22 #include <functional>
23 #include <map>
24 
25 namespace llvm {
26 extern cl::opt<bool> EmitLongStrLiterals;
27 
28 inline void printChar(raw_ostream &OS, char C) {
29   unsigned char UC(C);
30   if (isAlnum(UC) || isPunct(UC)) {
31     OS << '\'';
32     if (C == '\\' || C == '\'')
33       OS << '\\';
34     OS << C << '\'';
35   } else {
36     OS << unsigned(UC);
37   }
38 }
39 
40 /// SequenceToOffsetTable - Collect a number of terminated sequences of T.
41 /// Compute the layout of a table that contains all the sequences, possibly by
42 /// reusing entries.
43 ///
44 /// @tparam SeqT The sequence container. (vector or string).
45 /// @tparam Less A stable comparator for SeqT elements.
46 template <typename SeqT, typename Less = std::less<typename SeqT::value_type>>
47 class SequenceToOffsetTable {
48   typedef typename SeqT::value_type ElemT;
49 
50   // Define a comparator for SeqT that sorts a suffix immediately before a
51   // sequence with that suffix.
52   struct SeqLess {
53     Less L;
54     bool operator()(const SeqT &A, const SeqT &B) const {
55       return std::lexicographical_compare(A.rbegin(), A.rend(), B.rbegin(),
56                                           B.rend(), L);
57     }
58   };
59 
60   // Keep sequences ordered according to SeqLess so suffixes are easy to find.
61   // Map each sequence to its offset in the table.
62   typedef std::map<SeqT, unsigned, SeqLess> SeqMap;
63 
64   // Sequences added so far, with suffixes removed.
65   SeqMap Seqs;
66 
67   // Terminator element to be appended to each added sequence.
68   std::optional<ElemT> Terminator;
69 
70   // True if `layout` method was called.
71   bool IsLaidOut = false;
72 
73   // Entries in the final table, or 0 before layout was called.
74   unsigned Entries = 0;
75 
76   // isSuffix - Returns true if A is a suffix of B.
77   static bool isSuffix(const SeqT &A, const SeqT &B) {
78     return A.size() <= B.size() && std::equal(A.rbegin(), A.rend(), B.rbegin());
79   }
80 
81 public:
82   explicit SequenceToOffsetTable(std::optional<ElemT> Terminator = ElemT())
83       : Terminator(Terminator) {}
84 
85   /// add - Add a sequence to the table.
86   /// This must be called before layout().
87   void add(const SeqT &Seq) {
88     assert(!IsLaidOut && "Cannot call add() after layout()");
89     typename SeqMap::iterator I = Seqs.lower_bound(Seq);
90 
91     // If SeqMap contains a sequence that has Seq as a suffix, I will be
92     // pointing to it.
93     if (I != Seqs.end() && isSuffix(Seq, I->first))
94       return;
95 
96     I = Seqs.insert(I, {Seq, 0u});
97 
98     // The entry before I may be a suffix of Seq that can now be erased.
99     if (I != Seqs.begin() && isSuffix((--I)->first, Seq))
100       Seqs.erase(I);
101   }
102 
103   bool empty() const { return Seqs.empty(); }
104 
105   unsigned size() const {
106     assert(IsLaidOut && "Call layout() before size()");
107     return Entries;
108   }
109 
110   /// layout - Computes the final table layout.
111   void layout() {
112     assert(!IsLaidOut && "Can only call layout() once");
113     IsLaidOut = true;
114 
115     // Lay out the table in Seqs iteration order.
116     for (typename SeqMap::iterator I = Seqs.begin(), E = Seqs.end(); I != E;
117          ++I) {
118       I->second = Entries;
119       // Include space for a terminator.
120       Entries += I->first.size() + Terminator.has_value();
121     }
122   }
123 
124   /// get - Returns the offset of Seq in the final table.
125   unsigned get(const SeqT &Seq) const {
126     assert(IsLaidOut && "Call layout() before get()");
127     typename SeqMap::const_iterator I = Seqs.lower_bound(Seq);
128     assert(I != Seqs.end() && isSuffix(Seq, I->first) &&
129            "get() called with sequence that wasn't added first");
130     return I->second + (I->first.size() - Seq.size());
131   }
132 
133   /// `emitStringLiteralDef` - Print out the table as the body of an array
134   /// initializer, where each element is a C string literal terminated by
135   /// `\0`. Falls back to emitting a comma-separated integer list if
136   /// `EmitLongStrLiterals` is false
137   void emitStringLiteralDef(raw_ostream &OS, const Twine &Decl) const {
138     assert(IsLaidOut && "Call layout() before emitStringLiteralDef()");
139     if (!EmitLongStrLiterals) {
140       OS << Decl << " = {\n";
141       emit(OS, printChar);
142       OS << "  0\n};\n\n";
143       return;
144     }
145 
146     OS << "\n#ifdef __GNUC__\n"
147        << "#pragma GCC diagnostic push\n"
148        << "#pragma GCC diagnostic ignored \"-Woverlength-strings\"\n"
149        << "#endif\n"
150        << Decl << " = {\n";
151     for (const auto &[Seq, Offset] : Seqs) {
152       OS << "  /* " << Offset << " */ \"";
153       OS.write_escaped(Seq);
154       if (Terminator)
155         OS.write_escaped(StringRef(&*Terminator, 1));
156       OS << "\"\n";
157     }
158     OS << "};\n"
159        << "#ifdef __GNUC__\n"
160        << "#pragma GCC diagnostic pop\n"
161        << "#endif\n\n";
162   }
163 
164   /// emit - Print out the table as the body of an array initializer.
165   /// Use the Print function to print elements.
166   void emit(raw_ostream &OS, void (*Print)(raw_ostream &, ElemT)) const {
167     assert(IsLaidOut && "Call layout() before emit()");
168     for (const auto &[Seq, Offset] : Seqs) {
169       OS << "  /* " << Offset << " */ ";
170       for (const ElemT &Element : Seq) {
171         Print(OS, Element);
172         OS << ", ";
173       }
174       if (Terminator) {
175         Print(OS, *Terminator);
176         OS << ',';
177       }
178       OS << '\n';
179     }
180 
181     // Print a dummy element if the array would be empty otherwise.
182     if (!Entries) {
183       OS << "  /* dummy */ ";
184       Print(OS, ElemT());
185       OS << '\n';
186     }
187   }
188 };
189 
190 } // end namespace llvm
191 
192 #endif // LLVM_UTILS_TABLEGEN_BASIC_SEQUENCETOOFFSETTABLE_H
193