1 //===- TFUtils.cpp - tensorflow evaluation utilities ----------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements utilities for interfacing with tensorflow C APIs.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "llvm/Config/config.h"
14 #if defined(LLVM_HAVE_TF_API)
15
16 #include "llvm/ADT/Twine.h"
17 #include "llvm/Analysis/Utils/TFUtils.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/JSON.h"
20 #include "llvm/Support/ManagedStatic.h"
21 #include "llvm/Support/MemoryBuffer.h"
22 #include "llvm/Support/Path.h"
23 #include "llvm/Support/raw_ostream.h"
24
25 #include "tensorflow/c/c_api.h"
26 #include "tensorflow/c/c_api_experimental.h"
27
28 #include <cassert>
29 #include <numeric>
30
31 using namespace llvm;
32
33 namespace {
34
35 using TFGraphPtr = std::unique_ptr<TF_Graph, decltype(&TF_DeleteGraph)>;
36 using TFSessionOptionsPtr =
37 std::unique_ptr<TF_SessionOptions, decltype(&TF_DeleteSessionOptions)>;
38 using TFStatusPtr = std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)>;
39
40 struct TFInitializer {
TFInitializer__anon900ec6390111::TFInitializer41 TFInitializer() {
42 assert(!IsInitialized && "TFInitialized should be called only once");
43 int Argc = 1;
44 const char *Name = "";
45 const char **NamePtr = &Name;
46 TF_InitMain(Name, &Argc, const_cast<char ***>(&NamePtr));
47 IsInitialized = true;
48 }
49 bool IsInitialized = false;
50 };
51
52 llvm::ManagedStatic<TFInitializer> TFLibInitializer;
53
ensureInitTF()54 bool ensureInitTF() { return TFLibInitializer->IsInitialized; }
55
createTFGraph()56 TFGraphPtr createTFGraph() {
57 return TFGraphPtr(TF_NewGraph(), &TF_DeleteGraph);
58 }
59
createTFStatus()60 TFStatusPtr createTFStatus() {
61 return TFStatusPtr(TF_NewStatus(), &TF_DeleteStatus);
62 }
63
createTFSessionOptions()64 TFSessionOptionsPtr createTFSessionOptions() {
65 return TFSessionOptionsPtr(TF_NewSessionOptions(), &TF_DeleteSessionOptions);
66 }
67
68 /// Write the values of one tensor as a list.
69 template <typename T>
writeTensorValues(raw_ostream & OutFile,const char * TensorData,size_t ElemCount)70 void writeTensorValues(raw_ostream &OutFile, const char *TensorData,
71 size_t ElemCount) {
72 OutFile << "[";
73 const T *TypedData = reinterpret_cast<const T *>(TensorData);
74 ListSeparator LS;
75 for (size_t I = 0; I < ElemCount; ++I)
76 OutFile << LS << TypedData[I];
77 OutFile << "]";
78 }
79
80 /// Write a list of tensors as a sequence of TensorFlow FeatureList protobufs.
81 /// The tensors are assumed to be stored contiguously, in row-major format,
82 /// in the TensorData buffer. Each tensor has the shape given by Spec. The
83 /// feature name in the output is either the provided LoggingName, if
84 /// specified, otherwise it's the name of the tensor (as given by Spec).
writeRawTensorsAsFeatureLists(raw_ostream & OutFile,const LoggedFeatureSpec & LoggedSpec,const char * TensorData,size_t TensorCount,bool FinalReward=false)85 void writeRawTensorsAsFeatureLists(raw_ostream &OutFile,
86 const LoggedFeatureSpec &LoggedSpec,
87 const char *TensorData, size_t TensorCount,
88 bool FinalReward = false) {
89 const char *FieldName = "<invalid>";
90 std::function<void(const char *)> ValueWriter;
91 const auto &Spec = LoggedSpec.Spec;
92 // The 'Feature' protobuf only has 3 possible fields: float_list,
93 // int64_list, or bytes_list, so we capture int32 values as int64. We don't
94 // support any other types.
95 if (Spec.isElementType<int64_t>()) {
96 FieldName = "int64_list";
97 ValueWriter = [&](const char *Data) {
98 writeTensorValues<int64_t>(OutFile, Data, Spec.getElementCount());
99 };
100 } else if (Spec.isElementType<int32_t>()) {
101 FieldName = "int64_list";
102 ValueWriter = [&](const char *Data) {
103 writeTensorValues<int32_t>(OutFile, Data, Spec.getElementCount());
104 };
105
106 } else if (Spec.isElementType<float>()) {
107 FieldName = "float_list";
108 ValueWriter = [&](const char *Data) {
109 writeTensorValues<float>(OutFile, Data, Spec.getElementCount());
110 };
111
112 } else {
113 llvm_unreachable("Unsupported tensor type.");
114 }
115
116 OutFile << " feature_list: {\n";
117 OutFile << " key: "
118 << "\""
119 << (LoggedSpec.LoggingName ? *LoggedSpec.LoggingName : Spec.name())
120 << "\" ";
121 OutFile << "value: {\n";
122 size_t TensorByteSize = Spec.getElementCount() * Spec.getElementByteSize();
123
124 auto WriteFeatureProto = [&](const char *P) {
125 OutFile << " feature: { " << FieldName << ": { value: ";
126 ValueWriter(P);
127 OutFile << " } }\n";
128 };
129
130 const char *CurrentTensor = TensorData;
131 static int64_t Zero = 0;
132 // Write all but the last value. If this is the final reward, don't increment
133 // the CurrentTensor, and just write 0.
134 for (size_t I = 0; I < TensorCount - 1; ++I) {
135 if (FinalReward)
136 WriteFeatureProto(reinterpret_cast<const char *>(&Zero));
137 else {
138 WriteFeatureProto(CurrentTensor);
139 CurrentTensor += TensorByteSize;
140 }
141 }
142
143 WriteFeatureProto(CurrentTensor);
144
145 OutFile << " }\n";
146 OutFile << " }\n";
147 }
148 } // namespace
149
150 namespace llvm {
151 class EvaluationResultImpl {
152 public:
EvaluationResultImpl(size_t OutputSize)153 EvaluationResultImpl(size_t OutputSize)
154 : OutputSize(OutputSize), Output(OutputSize){};
155
~EvaluationResultImpl()156 ~EvaluationResultImpl() {
157 for (auto *P : Output)
158 if (P)
159 TF_DeleteTensor(P);
160 }
161
162 EvaluationResultImpl(const EvaluationResultImpl &) = delete;
163 EvaluationResultImpl(EvaluationResultImpl &&Other) = delete;
getOutput()164 std::vector<TF_Tensor *> &getOutput() { return Output; }
165
166 private:
167 const size_t OutputSize;
168 std::vector<TF_Tensor *> Output;
169 };
170
getElementByteSize() const171 size_t TensorSpec::getElementByteSize() const {
172 return TF_DataTypeSize(static_cast<TF_DataType>(TypeIndex));
173 }
174
TensorSpec(const std::string & Name,int Port,int TypeIndex,const std::vector<int64_t> & Shape)175 TensorSpec::TensorSpec(const std::string &Name, int Port, int TypeIndex,
176 const std::vector<int64_t> &Shape)
177 : Name(Name), Port(Port), TypeIndex(TypeIndex), Shape(Shape),
178 ElementCount(std::accumulate(Shape.begin(), Shape.end(), 1,
179 std::multiplies<int64_t>())) {}
180
getTensorSpecFromJSON(LLVMContext & Ctx,const json::Value & Value)181 Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
182 const json::Value &Value) {
183 auto EmitError = [&](const llvm::Twine &Message) -> Optional<TensorSpec> {
184 std::string S;
185 llvm::raw_string_ostream OS(S);
186 OS << Value;
187 Ctx.emitError("Unable to parse JSON Value as spec (" + Message + "): " + S);
188 return None;
189 };
190 // FIXME: accept a Path as a parameter, and use it for error reporting.
191 json::Path::Root Root("tensor_spec");
192 json::ObjectMapper Mapper(Value, Root);
193 if (!Mapper)
194 return EmitError("Value is not a dict");
195
196 std::string TensorName;
197 int TensorPort = -1;
198 std::string TensorType;
199 std::vector<int64_t> TensorShape;
200
201 if (!Mapper.map<std::string>("name", TensorName))
202 return EmitError("'name' property not present or not a string");
203 if (!Mapper.map<std::string>("type", TensorType))
204 return EmitError("'type' property not present or not a string");
205 if (!Mapper.map<int>("port", TensorPort))
206 return EmitError("'port' property not present or not an int");
207 if (!Mapper.map<std::vector<int64_t>>("shape", TensorShape))
208 return EmitError("'shape' property not present or not an int array");
209
210 #define PARSE_TYPE(T, E) \
211 if (TensorType == #T) \
212 return TensorSpec::createSpec<T>(TensorName, TensorShape, TensorPort);
213 TFUTILS_SUPPORTED_TYPES(PARSE_TYPE)
214 #undef PARSE_TYPE
215 return None;
216 }
217
218 Optional<std::vector<LoggedFeatureSpec>>
loadOutputSpecs(LLVMContext & Ctx,StringRef ExpectedDecisionName,StringRef ModelPath,StringRef SpecFileOverride)219 loadOutputSpecs(LLVMContext &Ctx, StringRef ExpectedDecisionName,
220 StringRef ModelPath, StringRef SpecFileOverride) {
221 SmallVector<char, 128> OutputSpecsPath;
222 StringRef FileName = SpecFileOverride;
223 if (FileName.empty()) {
224 llvm::sys::path::append(OutputSpecsPath, ModelPath, "output_spec.json");
225 FileName = {OutputSpecsPath.data(), OutputSpecsPath.size()};
226 }
227
228 auto BufferOrError = MemoryBuffer::getFileOrSTDIN(FileName);
229 if (!BufferOrError) {
230 Ctx.emitError("Error opening output specs file: " + FileName + " : " +
231 BufferOrError.getError().message());
232 return None;
233 }
234 auto ParsedJSONValues = json::parse(BufferOrError.get()->getBuffer());
235 if (!ParsedJSONValues) {
236 Ctx.emitError("Could not parse specs file: " + FileName);
237 return None;
238 }
239 auto ValuesArray = ParsedJSONValues->getAsArray();
240 if (!ValuesArray) {
241 Ctx.emitError("Expected an array of {tensor_spec:<TensorSpec>, "
242 "logging_name:<name>} dictionaries");
243 return None;
244 }
245 std::vector<LoggedFeatureSpec> Ret;
246 for (const auto &Value : *ValuesArray)
247 if (const auto *Obj = Value.getAsObject())
248 if (const auto *SpecPart = Obj->get("tensor_spec"))
249 if (auto TensorSpec = getTensorSpecFromJSON(Ctx, *SpecPart))
250 if (auto LoggingName = Obj->getString("logging_name")) {
251 if (!TensorSpec->isElementType<int64_t>() &&
252 !TensorSpec->isElementType<int32_t>() &&
253 !TensorSpec->isElementType<float>()) {
254 Ctx.emitError(
255 "Only int64, int32, and float tensors are supported. "
256 "Found unsupported type for tensor named " +
257 TensorSpec->name());
258 return None;
259 }
260 Ret.push_back({*TensorSpec, LoggingName->str()});
261 }
262
263 if (ValuesArray->size() != Ret.size()) {
264 Ctx.emitError(
265 "Unable to parse output spec. It should be a json file containing an "
266 "array of dictionaries. Each dictionary must have a 'tensor_spec' key, "
267 "with a json object describing a TensorSpec; and a 'logging_name' key, "
268 "which is a string to use as name when logging this tensor in the "
269 "training log.");
270 return None;
271 }
272 if (Ret.empty() || *Ret[0].LoggingName != ExpectedDecisionName) {
273 Ctx.emitError("The first output spec must describe the decision tensor, "
274 "and must have the logging_name " +
275 StringRef(ExpectedDecisionName));
276 return None;
277 }
278 return Ret;
279 }
280
281 class TFModelEvaluatorImpl {
282 public:
283 TFModelEvaluatorImpl(StringRef SavedModelPath,
284 const std::vector<TensorSpec> &InputSpecs,
285 function_ref<TensorSpec(size_t)> GetOutputSpecs,
286 size_t OutputSpecsSize, const char *Tags);
287
isValid() const288 bool isValid() const { return IsValid; }
OutputSize() const289 size_t OutputSize() const { return OutputFeed.size(); }
290
evaluate(TF_Tensor ** Output,TF_Status * Status)291 void evaluate(TF_Tensor **Output, TF_Status *Status) {
292 TF_SessionRun(Session, nullptr, InputFeed.data(), Input.data(),
293 Input.size(), OutputFeed.data(), Output, OutputFeed.size(),
294 nullptr, 0, nullptr, Status);
295 }
296
297 void initInput(size_t Index, TF_DataType Type,
298 const std::vector<int64_t> &Dimensions);
getInput() const299 const std::vector<TF_Tensor *> &getInput() const { return Input; }
300
301 ~TFModelEvaluatorImpl();
302
303 private:
304 /// The objects necessary for carrying out an evaluation of the SavedModel.
305 /// They are expensive to set up, and we maintain them accross all the
306 /// evaluations of the model.
307 TF_Session *Session = nullptr;
308 TFGraphPtr Graph;
309 TFSessionOptionsPtr Options;
310
311 /// The specification of the input nodes.
312 std::vector<TF_Output> InputFeed;
313
314 /// The input tensors. They must match by index of the corresponding InputFeed
315 /// value. We set up the tensors once and just mutate theirs scalars before
316 /// each evaluation. The input tensors keep their value after an evaluation.
317 std::vector<TF_Tensor *> Input;
318
319 /// The specification of the output nodes. When evaluating, the tensors in the
320 /// output tensor vector must match by index the corresponding element in the
321 /// OutputFeed.
322 std::vector<TF_Output> OutputFeed;
323
invalidate()324 void invalidate() { IsValid = false; }
325
326 bool IsValid = true;
327
328 /// Reusable utility for ensuring we can bind the requested Name to a node in
329 /// the SavedModel Graph.
330 bool checkReportAndInvalidate(const TF_Output &Output,
331 const TensorSpec &OutputSpec);
332 };
333 } // namespace llvm
334
TFModelEvaluatorImpl(StringRef SavedModelPath,const std::vector<TensorSpec> & InputSpecs,function_ref<TensorSpec (size_t)> GetOutputSpecs,size_t OutputSpecsSize,const char * Tags="serve")335 TFModelEvaluatorImpl::TFModelEvaluatorImpl(
336 StringRef SavedModelPath, const std::vector<TensorSpec> &InputSpecs,
337 function_ref<TensorSpec(size_t)> GetOutputSpecs, size_t OutputSpecsSize,
338 const char *Tags = "serve")
339 : Graph(createTFGraph()), Options(createTFSessionOptions()),
340 InputFeed(InputSpecs.size()), Input(InputSpecs.size()),
341 OutputFeed(OutputSpecsSize) {
342 if (!ensureInitTF()) {
343 errs() << "Tensorflow should have been initialized";
344 return;
345 }
346 auto Status = createTFStatus();
347
348 Session = TF_LoadSessionFromSavedModel(Options.get(), nullptr,
349 SavedModelPath.str().c_str(), &Tags, 1,
350 Graph.get(), nullptr, Status.get());
351 if (TF_GetCode(Status.get()) != TF_Code::TF_OK) {
352 errs() << TF_Message(Status.get());
353 invalidate();
354 }
355 for (size_t I = 0; I < InputSpecs.size(); ++I) {
356 auto &InputSpec = InputSpecs[I];
357 InputFeed[I] = {
358 TF_GraphOperationByName(Graph.get(), (InputSpec.name()).c_str()),
359 InputSpec.port()};
360 if (!checkReportAndInvalidate(InputFeed[I], InputSpec))
361 return;
362 initInput(I, static_cast<TF_DataType>(InputSpec.typeIndex()),
363 InputSpec.shape());
364 }
365 for (size_t I = 0; I < OutputSpecsSize; ++I) {
366 auto OutputSpec = GetOutputSpecs(I);
367 OutputFeed[I] = {
368 TF_GraphOperationByName(Graph.get(), (OutputSpec.name()).c_str()),
369 OutputSpec.port()};
370 if (!checkReportAndInvalidate(OutputFeed[I], OutputSpec))
371 return;
372 }
373 }
374
TFModelEvaluator(StringRef SavedModelPath,const std::vector<TensorSpec> & InputSpecs,function_ref<TensorSpec (size_t)> GetOutputSpecs,size_t OutputSpecsSize,const char * Tags)375 TFModelEvaluator::TFModelEvaluator(
376 StringRef SavedModelPath, const std::vector<TensorSpec> &InputSpecs,
377 function_ref<TensorSpec(size_t)> GetOutputSpecs, size_t OutputSpecsSize,
378 const char *Tags)
379 : Impl(new TFModelEvaluatorImpl(SavedModelPath, InputSpecs, GetOutputSpecs,
380 OutputSpecsSize, Tags)) {
381 if (!Impl->isValid())
382 Impl.reset();
383 }
384
TFModelEvaluator(StringRef SavedModelPath,const std::vector<TensorSpec> & InputSpecs,const std::vector<TensorSpec> & OutputSpecs,const char * Tags)385 TFModelEvaluator::TFModelEvaluator(StringRef SavedModelPath,
386 const std::vector<TensorSpec> &InputSpecs,
387 const std::vector<TensorSpec> &OutputSpecs,
388 const char *Tags)
389 : TFModelEvaluator(
390 SavedModelPath, InputSpecs, [&](size_t I) { return OutputSpecs[I]; },
391 OutputSpecs.size(), Tags) {}
392
~TFModelEvaluatorImpl()393 TFModelEvaluatorImpl::~TFModelEvaluatorImpl() {
394 for (auto *T : Input) {
395 TF_DeleteTensor(T);
396 }
397 if (Session == nullptr)
398 return;
399 auto Status = createTFStatus();
400 TF_DeleteSession(Session, Status.get());
401 Session = nullptr;
402 if (TF_GetCode(Status.get()) != TF_Code::TF_OK)
403 errs() << "Could not delete TF session";
404 }
405
checkReportAndInvalidate(const TF_Output & Output,const TensorSpec & OutputSpec)406 bool TFModelEvaluatorImpl::checkReportAndInvalidate(
407 const TF_Output &Output, const TensorSpec &OutputSpec) {
408 if (Output.oper)
409 return true;
410 errs() << "Could not find TF_Output named: " + OutputSpec.name();
411 IsValid = false;
412 return IsValid;
413 }
414
evaluate()415 Optional<TFModelEvaluator::EvaluationResult> TFModelEvaluator::evaluate() {
416 if (!isValid())
417 return None;
418 std::unique_ptr<EvaluationResultImpl> Ret =
419 std::make_unique<EvaluationResultImpl>(Impl->OutputSize());
420 auto Status = createTFStatus();
421 Impl->evaluate(Ret->getOutput().data(), Status.get());
422 if (TF_GetCode(Status.get()) != TF_Code::TF_OK) {
423 errs() << TF_Message(Status.get());
424 Impl.reset();
425 return None;
426 }
427 return EvaluationResult(std::move(Ret));
428 }
429
initInput(size_t Index,TF_DataType Type,const std::vector<int64_t> & Dimensions)430 void TFModelEvaluatorImpl::initInput(size_t Index, TF_DataType Type,
431 const std::vector<int64_t> &Dimensions) {
432 int64_t TotalSize = TF_DataTypeSize(Type);
433 for (auto &D : Dimensions)
434 TotalSize *= D;
435
436 Input[Index] =
437 TF_AllocateTensor(Type, Dimensions.data(), Dimensions.size(), TotalSize);
438 std::memset(TF_TensorData(Input[Index]), 0, TotalSize);
439 }
440
getUntypedInput(size_t Index)441 void *TFModelEvaluator::getUntypedInput(size_t Index) {
442 return TF_TensorData(Impl->getInput()[Index]);
443 }
444
EvaluationResult(std::unique_ptr<EvaluationResultImpl> Impl)445 TFModelEvaluator::EvaluationResult::EvaluationResult(
446 std::unique_ptr<EvaluationResultImpl> Impl)
447 : Impl(std::move(Impl)) {}
448
EvaluationResult(EvaluationResult && Other)449 TFModelEvaluator::EvaluationResult::EvaluationResult(EvaluationResult &&Other)
450 : Impl(std::move(Other.Impl)) {}
451
452 TFModelEvaluator::EvaluationResult &
operator =(EvaluationResult && Other)453 TFModelEvaluator::EvaluationResult::operator=(EvaluationResult &&Other) {
454 Impl = std::move(Other.Impl);
455 return *this;
456 }
457
getUntypedTensorValue(size_t Index)458 void *TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) {
459 return TF_TensorData(Impl->getOutput()[Index]);
460 }
461
462 const void *
getUntypedTensorValue(size_t Index) const463 TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) const {
464 return TF_TensorData(Impl->getOutput()[Index]);
465 }
466
467 #define TFUTILS_GETDATATYPE_IMPL(T, E) \
468 template <> int TensorSpec::getDataType<T>() { return E; }
469
TFUTILS_SUPPORTED_TYPES(TFUTILS_GETDATATYPE_IMPL)470 TFUTILS_SUPPORTED_TYPES(TFUTILS_GETDATATYPE_IMPL)
471
472 #undef TFUTILS_GETDATATYPE_IMPL
473
474 TFModelEvaluator::EvaluationResult::~EvaluationResult() {}
~TFModelEvaluator()475 TFModelEvaluator::~TFModelEvaluator() {}
476
print(raw_ostream & OS)477 void Logger::print(raw_ostream &OS) {
478 if (RawLogData.empty())
479 return;
480 if (RawLogData[0].empty())
481 return;
482 size_t Tensor0Size = FeatureSpecs[0].Spec.getElementCount() *
483 FeatureSpecs[0].Spec.getElementByteSize();
484 size_t NumberOfRecords = RawLogData[0].size() / Tensor0Size;
485 if (NumberOfRecords == 0)
486 return;
487 size_t RewardSize =
488 RewardSpec.getElementCount() * RewardSpec.getElementByteSize();
489 size_t NumberOfRewards = RawLogData.back().size() / RewardSize;
490
491 OS << "feature_lists: {\n";
492 for (size_t I = 0; I < FeatureSpecs.size(); ++I)
493 writeRawTensorsAsFeatureLists(OS, FeatureSpecs[I], RawLogData[I].data(),
494 NumberOfRecords);
495
496 if (IncludeReward)
497 writeRawTensorsAsFeatureLists(OS, {RewardSpec, None},
498 RawLogData.back().data(), NumberOfRecords,
499 NumberOfRewards == 1);
500
501 OS << "}\n";
502 }
503 #endif // defined(LLVM_HAVE_TF_API)
504