xref: /llvm-project/llvm/lib/Analysis/TrainingLogger.cpp (revision c5ff6f72342e0a4b0ba2ec9f603bedca86721e80)
1 //===- TrainingLogger.cpp - mlgo feature/reward logging -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements logging infrastructure for extracting features and
10 // rewards for mlgo policy training.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "llvm/Analysis/TensorSpec.h"
14 #include "llvm/Config/config.h"
15 #if defined(LLVM_HAVE_TF_API)
16 
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/Analysis/Utils/TrainingLogger.h"
19 #include "llvm/Support/Base64.h"
20 #include "llvm/Support/CommandLine.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/JSON.h"
23 #include "llvm/Support/MemoryBuffer.h"
24 #include "llvm/Support/Path.h"
25 #include "llvm/Support/raw_ostream.h"
26 
27 #include "google/protobuf/struct.pb.h"
28 #include "google/protobuf/text_format.h"
29 #include "tensorflow/core/example/example.pb.h"
30 #include <cassert>
31 #include <numeric>
32 
33 using namespace llvm;
34 
35 using google::protobuf::Message;
36 using google::protobuf::TextFormat;
37 
38 static cl::opt<bool>
39     ProtobufTextMode("tfutils-text-log", cl::init(false), cl::Hidden,
40                      cl::desc("Output textual (human-readable) protobuf."));
41 
42 static cl::opt<bool>
43     UseSimpleLogger("tfutils-use-simplelogger", cl::init(false), cl::Hidden,
44                     cl::desc("Output simple (non-protobuf) log."));
45 
46 namespace {
47 
48 void serialize(const Message &SE, std::string *OutStr) {
49   if (ProtobufTextMode) {
50     TextFormat::PrintToString(SE, OutStr);
51   } else {
52     *OutStr = SE.SerializeAsString();
53   }
54 }
55 } // namespace
56 
57 namespace llvm {
58 
59 class LoggerDataImpl {
60 protected:
61   const std::vector<TensorSpec> LoggedFeatureSpecs;
62   const TensorSpec RewardSpec;
63   const bool IncludeReward;
64   LoggerDataImpl(const std::vector<TensorSpec> &LoggedSpecs,
65                  const TensorSpec &RewardSpec, bool IncludeReward)
66       : LoggedFeatureSpecs(LoggedSpecs), RewardSpec(RewardSpec),
67         IncludeReward(IncludeReward) {}
68   virtual void logRewardImpl(const char *Value, size_t Size) = 0;
69 
70 public:
71   // flush the logged info to a stream and clear the log contents.
72   virtual void flush(std::string *Str) = 0;
73   virtual char *addNewTensor(size_t FeatureID) = 0;
74   virtual size_t getNrRecords() const = 0;
75   virtual ~LoggerDataImpl() = default;
76 
77   template <typename T> void logReward(T Value) {
78     logRewardImpl(reinterpret_cast<const char *>(&Value), sizeof(T));
79   }
80 };
81 
82 // The design goals of the simple logger are:
83 // - no dependencies that llvm doesn't already have.
84 // - support streaming, so that we don't need to buffer data during compilation
85 // - 0-decoding tensor values. Tensor values are potentially very large buffers
86 // of scalars. Because of their potentially large size, avoiding
87 // serialization/deserialization overhead is preferred.
88 //
89 // The simple logger produces an output of the form (each line item on its line)
90 // - header: a json object describing the data that will follow.
91 // - context: e.g. function name, for regalloc, or "default" for module-wide
92 // optimizations like the inliner. This is the context to which the subsequent
93 // data corresponds.
94 // - observation number.
95 // - tensor values - raw bytes of the tensors, in the order given in the header.
96 // The values are in succession, i.e. no separator is found between successive
97 // tensor values. At the end, there is a new line character.
98 // - [score] - this is optional, and is present if it was present in the header.
99 // Currently, for final rewards, we output "0" scores after each observation,
100 // except for the last one.
101 // <repeat>
102 // The file should be read as binary, but the reason we use newlines is mostly
103 // ease of debugging: the log can be opened in a text editor and, while tensor
104 // values are inscrutable, at least the sequence of data can be easily observed.
105 // Of course, the buffer of tensor values could contain '\n' bytes. A reader
106 // should use the header information to know how much data to read for the
107 // tensor values, and not use line information for that.
108 //
109 // An example reader, used for test, is available at
110 // Analysis/models/log_reader.py
111 //
112 // Example:
113 // {"features":[list of TensorSpecs], "score":<a tensor spec>}
114 // {"context": "aFunction"}
115 // {"observation": 0}
116 // <bytes>
117 // {"outcome": 0}
118 // <bytes for the tensor corresponding to the "score" spec in the header>
119 // {"observation": 1}
120 // ...
121 // {"context": "anotherFunction"}
122 // {"observation": 0}
123 // ...
124 //
125 class SimpleLoggerDataImpl : public LoggerDataImpl {
126   std::vector<std::unique_ptr<char[]>> FeatureStorage;
127   std::vector<std::unique_ptr<char[]>> RewardStorage;
128 
129   raw_ostream &dumpHeader(raw_ostream &OS) const {
130     json::OStream JOS(OS);
131     JOS.object([&]() {
132       JOS.attributeArray("features", [&]() {
133         for (const auto &TS : LoggedFeatureSpecs)
134           TS.toJSON(JOS);
135       });
136       if (IncludeReward) {
137         JOS.attributeBegin("score");
138         RewardSpec.toJSON(JOS);
139         JOS.attributeEnd();
140       }
141     });
142     OS << "\n";
143     return OS;
144   }
145 
146   raw_ostream &startContext(raw_ostream &OS, StringRef Name) const {
147     json::OStream JOS(OS);
148     JOS.object([&]() { JOS.attribute("context", Name); });
149     OS << "\n";
150     return OS;
151   }
152 
153   raw_ostream &startObservation(raw_ostream &OS, size_t Nr) const {
154     json::OStream JOS(OS);
155     JOS.object([&]() { JOS.attribute("observation", Nr); });
156     OS << "\n";
157     return OS;
158   }
159 
160   raw_ostream &writeOutcome(raw_ostream &OS,
161                             size_t CurrentObservationID) const {
162     if (IncludeReward) {
163       OS << "\n";
164       json::OStream JOS(OS);
165       JOS.object([&]() { JOS.attribute("outcome", CurrentObservationID); });
166       OS << "\n";
167       OS.write(RewardStorage[CurrentObservationID].get(),
168                RewardSpec.getTotalTensorBufferSize());
169     }
170     OS << "\n";
171     return OS;
172   }
173   void flush(std::string *Str) override {
174     llvm_unreachable("Use the ostream implementation");
175   }
176 
177   char *addNewTensor(size_t FeatureID) override {
178     return FeatureStorage
179         .emplace_back(
180             new char[LoggedFeatureSpecs[FeatureID].getTotalTensorBufferSize()])
181         .get();
182   }
183 
184   size_t getNrRecords() const override {
185     assert(FeatureStorage.size() % LoggedFeatureSpecs.size() == 0);
186     return FeatureStorage.size() / LoggedFeatureSpecs.size();
187   }
188 
189   void logRewardImpl(const char *Value, size_t Size) override {
190     std::memcpy(RewardStorage.emplace_back(new char[Size]).get(), Value, Size);
191   }
192 
193 public:
194   SimpleLoggerDataImpl(const std::vector<TensorSpec> &LoggedSpecs,
195                        const TensorSpec &RewardSpec, bool IncludeReward)
196       : LoggerDataImpl(LoggedSpecs, RewardSpec, IncludeReward) {}
197 
198   raw_ostream &flush(raw_ostream &OS, bool WithHeader = true,
199                      StringRef Context = "default") const {
200     if (WithHeader)
201       dumpHeader(OS);
202     startContext(OS, Context);
203     size_t CurrentObservationID = 0;
204     for (size_t I = 0; I < FeatureStorage.size(); ++I) {
205       size_t TensorID = I % LoggedFeatureSpecs.size();
206       if (TensorID == 0) {
207         CurrentObservationID = I / LoggedFeatureSpecs.size();
208         startObservation(OS, CurrentObservationID);
209       }
210       OS.write(FeatureStorage[I].get(),
211                LoggedFeatureSpecs[TensorID].getTotalTensorBufferSize());
212       if (TensorID == LoggedFeatureSpecs.size() - 1) {
213         writeOutcome(OS, CurrentObservationID);
214       }
215     }
216     return OS;
217   }
218 };
219 
220 class TFSequenceExampleLoggerDataImpl : public LoggerDataImpl {
221   std::vector<tensorflow::FeatureList> FeatureLists;
222   tensorflow::FeatureList Reward;
223 
224   bool isSelfConsistent(const tensorflow::SequenceExample &SE,
225                         size_t NrRecords) const {
226     bool Ret = true;
227     for (const auto &TSpecs : LoggedFeatureSpecs) {
228       const auto &Name = TSpecs.name();
229       const auto &FL = SE.feature_lists().feature_list().at(Name).feature();
230       if (NrRecords != static_cast<size_t>(FL.size())) {
231         dbgs() << "[TF-UTILS]: " << Name << " has missing records. Expected "
232                << NrRecords << " got " << FL.size() << "\n";
233         Ret = false;
234       }
235     }
236     if (IncludeReward && static_cast<size_t>(SE.feature_lists()
237                                                  .feature_list()
238                                                  .at(RewardSpec.name())
239                                                  .feature()
240                                                  .size()) != NrRecords) {
241       dbgs() << "[TF-UTILS]: reward is missing records.\n";
242       Ret = false;
243     }
244     return Ret;
245   }
246 
247   void transferLog(tensorflow::SequenceExample &SE) {
248     auto *FL = SE.mutable_feature_lists()->mutable_feature_list();
249     if (IncludeReward)
250       (*FL)[RewardSpec.name()] = std::move(Reward);
251     assert(FeatureLists.size() == LoggedFeatureSpecs.size());
252     for (size_t I = 0; I < FeatureLists.size(); ++I) {
253       const auto &LFS = LoggedFeatureSpecs[I];
254       (*FL)[LFS.name()] = std::move(FeatureLists[I]);
255     }
256   }
257 
258 public:
259   TFSequenceExampleLoggerDataImpl(const std::vector<TensorSpec> &LoggedSpecs,
260                                   const TensorSpec &RewardSpec,
261                                   bool IncludeReward)
262       : LoggerDataImpl(LoggedSpecs, RewardSpec, IncludeReward),
263         FeatureLists(LoggedFeatureSpecs.size()) {}
264 
265   // flush the logged info to a stream and clear the log contents.
266   void flush(std::string *Str) override {
267     size_t NrRecords = getNrRecords();
268     (void)NrRecords;
269     tensorflow::SequenceExample SE;
270     transferLog(SE);
271     assert(isSelfConsistent(SE, NrRecords));
272     serialize(SE, Str);
273   }
274 
275   char *addNewTensor(size_t FeatureID) override {
276     const auto &Spec = LoggedFeatureSpecs[FeatureID];
277     if (Spec.isElementType<float>()) {
278       auto *RF = FeatureLists[FeatureID]
279                      .add_feature()
280                      ->mutable_float_list()
281                      ->mutable_value();
282       RF->Resize(Spec.getElementCount(), 0.0);
283       return reinterpret_cast<char *>(RF->mutable_data());
284     } else if (Spec.isElementType<int32_t>() || Spec.isElementType<int64_t>()) {
285       auto *RF = FeatureLists[FeatureID]
286                      .add_feature()
287                      ->mutable_int64_list()
288                      ->mutable_value();
289       RF->Resize(Spec.getElementCount(), 0);
290       return reinterpret_cast<char *>(RF->mutable_data());
291     }
292     llvm_unreachable("Unsupported tensor type.");
293   }
294 
295   void logRewardImpl(const char *Value, size_t Size) override {
296     assert(IncludeReward);
297     if (RewardSpec.isElementType<float>())
298       Reward.add_feature()->mutable_float_list()->add_value(
299           *reinterpret_cast<const float *>(Value));
300     else if (RewardSpec.isElementType<int32_t>())
301       Reward.add_feature()->mutable_int64_list()->add_value(
302           *reinterpret_cast<const int32_t *>(Value));
303     else if (RewardSpec.isElementType<int64_t>())
304       Reward.add_feature()->mutable_int64_list()->add_value(
305           *reinterpret_cast<const int64_t *>(Value));
306     else
307       llvm_unreachable("Unsupported tensor type.");
308   }
309 
310   size_t getNrRecords() const override {
311     return FeatureLists.empty() ? 0 : FeatureLists[0].feature().size();
312   }
313 };
314 } // namespace llvm
315 
316 Logger::Logger(const std::vector<TensorSpec> &FeatureSpecs,
317                const TensorSpec &RewardSpec, bool IncludeReward)
318     : FeatureSpecs(FeatureSpecs), RewardSpec(RewardSpec),
319       IncludeReward(IncludeReward) {
320   if (UseSimpleLogger)
321     LoggerData = std::make_unique<SimpleLoggerDataImpl>(
322         FeatureSpecs, RewardSpec, IncludeReward);
323   else
324     LoggerData = std::make_unique<TFSequenceExampleLoggerDataImpl>(
325         FeatureSpecs, RewardSpec, IncludeReward);
326 }
327 
328 Logger::~Logger() {}
329 
330 #define LOG_REWARD(NAME, TYPE)                                                 \
331   void Logger::log##NAME##Reward(TYPE Value) {                                 \
332     assert(IncludeReward);                                                     \
333     LoggerData->logReward(Value);                                              \
334   }
335 
336 LOG_REWARD(Float, float)
337 LOG_REWARD(Int32, int32_t)
338 LOG_REWARD(Int64, int64_t)
339 #undef LOG_REWARD
340 
341 #define LOG_FINAL_REWARD(NAME, TYPE)                                           \
342   void Logger::log##NAME##FinalReward(TYPE Value) {                            \
343     assert(RewardSpec.isElementType<TYPE>());                                  \
344     for (size_t I = 1; I < LoggerData->getNrRecords(); ++I)                    \
345       log##NAME##Reward(0);                                                    \
346     log##NAME##Reward(Value);                                                  \
347   }
348 
349 LOG_FINAL_REWARD(Float, float)
350 LOG_FINAL_REWARD(Int32, int32_t)
351 LOG_FINAL_REWARD(Int64, int64_t)
352 #undef LOG_FINAL_REWARD
353 
354 void Logger::logFloatValue(size_t FeatureID, const float *Value) {
355   assert(FeatureSpecs[FeatureID].isElementType<float>());
356   logSpecifiedTensorValue(FeatureID, reinterpret_cast<const char *>(Value));
357 }
358 
359 void Logger::logInt64Value(size_t FeatureID, const int64_t *Value) {
360   assert(FeatureSpecs[FeatureID].isElementType<int64_t>());
361   logSpecifiedTensorValue(FeatureID, reinterpret_cast<const char *>(Value));
362 }
363 
364 void Logger::logInt32Value(size_t FeatureID, const int32_t *Value) {
365   assert(FeatureSpecs[FeatureID].isElementType<int32_t>());
366   logSpecifiedTensorValue(FeatureID, reinterpret_cast<const char *>(Value));
367 }
368 
369 void Logger::logSpecifiedTensorValue(size_t FeatureID, const char *RawData) {
370   const auto &Spec = FeatureSpecs[FeatureID];
371   char *Buff = addEntryAndGetFloatOrInt64Buffer(FeatureID);
372   if (Spec.isElementType<int32_t>())
373     for (size_t I = 0; I < Spec.getElementCount(); ++I)
374       (reinterpret_cast<int64_t *>(Buff))[I] =
375           static_cast<int64_t>((reinterpret_cast<const int32_t *>(RawData))[I]);
376   else if (Spec.isElementType<int64_t>() || Spec.isElementType<float>())
377     std::memcpy(Buff, RawData,
378                 Spec.getElementCount() * Spec.getElementByteSize());
379   else
380     llvm_unreachable("Unsupported tensor type");
381 }
382 
383 char *Logger::addEntryAndGetFloatOrInt64Buffer(size_t FeatureID) {
384   return reinterpret_cast<char *>(LoggerData->addNewTensor(FeatureID));
385 }
386 
387 void Logger::flush(std::string *Str) { LoggerData->flush(Str); }
388 
389 void Logger::flush(raw_ostream &OS) {
390   if (UseSimpleLogger) {
391     reinterpret_cast<SimpleLoggerDataImpl *>(LoggerData.get())->flush(OS);
392   } else {
393     std::string Buff;
394     LoggerData->flush(&Buff);
395     OS << Buff;
396   }
397 }
398 
399 void Logger::flushLogs(raw_ostream &OS,
400                        const StringMap<std::unique_ptr<Logger>> &Loggers) {
401   if (UseSimpleLogger) {
402     bool IsFirst = true;
403     for (const auto &NamedLogger : Loggers) {
404       auto *Impl = NamedLogger.second->LoggerData.get();
405       reinterpret_cast<const SimpleLoggerDataImpl *>(Impl)->flush(
406           OS, IsFirst, NamedLogger.first());
407       IsFirst = false;
408     }
409   } else {
410     google::protobuf::Struct Msg;
411     for (const auto &NamedLogger : Loggers) {
412       tensorflow::SequenceExample SE;
413       const auto &Logger = NamedLogger.second;
414       std::string Unencoded;
415       if (Logger->LoggerData->getNrRecords() > 0)
416         Logger->flush(&Unencoded);
417 
418       (*Msg.mutable_fields())[NamedLogger.first().str()]
419           .mutable_string_value()
420           ->append(ProtobufTextMode ? Unencoded : encodeBase64(Unencoded));
421     }
422 
423     std::string OutStr;
424     serialize(Msg, &OutStr);
425     OS << OutStr;
426   }
427 }
428 #endif // defined(LLVM_HAVE_TF_API)
429