1 //===- TFUtils.h - utilities for tensorflow C API ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 #ifndef LLVM_ANALYSIS_UTILS_TFUTILS_H 10 #define LLVM_ANALYSIS_UTILS_TFUTILS_H 11 12 #include "llvm/Config/llvm-config.h" 13 14 #ifdef LLVM_HAVE_TF_API 15 #include "llvm/IR/LLVMContext.h" 16 #include "llvm/Support/JSON.h" 17 18 #include <memory> 19 #include <vector> 20 21 namespace llvm { 22 23 /// Load a SavedModel, find the given inputs and outputs, and setup storage 24 /// for input tensors. The user is responsible for correctly dimensioning the 25 /// input tensors and setting their values before calling evaluate(). 26 /// To initialize: 27 /// - construct the object 28 /// - initialize the input tensors using initInput. Indices must correspond to 29 /// indices in the InputNames used at construction. 30 /// To use: 31 /// - set input values by using getInput to get each input tensor, and then 32 /// setting internal scalars, for all dimensions (tensors are row-major: 33 /// https://github.com/tensorflow/tensorflow/blob/r1.5/tensorflow/c/c_api.h#L205) 34 /// - call evaluate. The input tensors' values are not consumed after this, and 35 /// may still be read. 36 /// - use the outputs in the output vector 37 class TFModelEvaluatorImpl; 38 class EvaluationResultImpl; 39 40 /// TensorSpec encapsulates the specification of a tensor: its dimensions, or 41 /// "shape" (row-major), its type (see TensorSpec::getDataType specializations 42 /// for supported types), its name and port (see "TensorFlow: Large-Scale 43 /// Machine Learning on Heterogeneous Distributed Systems", section 4.2, para 2: 44 /// https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/45166.pdf) 45 /// 46 /// TensorSpec is used to set up a TFModelEvaluator by describing the expected 47 /// inputs and outputs. 48 class TensorSpec final { 49 public: 50 template <typename T> 51 static TensorSpec createSpec(const std::string &Name, 52 const std::vector<int64_t> &Shape, 53 int Port = 0) { 54 return TensorSpec(Name, Port, getDataType<T>(), Shape); 55 } 56 name()57 const std::string &name() const { return Name; } port()58 int port() const { return Port; } typeIndex()59 int typeIndex() const { return TypeIndex; } shape()60 const std::vector<int64_t> &shape() const { return Shape; } 61 62 bool operator==(const TensorSpec &Other) const { 63 return Name == Other.Name && Port == Other.Port && 64 TypeIndex == Other.TypeIndex && Shape == Other.Shape; 65 } 66 67 bool operator!=(const TensorSpec &Other) const { return !(*this == Other); } 68 69 /// Get the number of elements in a tensor with this shape. getElementCount()70 size_t getElementCount() const { return ElementCount; } 71 /// Get the size, in bytes, of one element. 72 size_t getElementByteSize() const; 73 isElementType()74 template <typename T> bool isElementType() const { 75 return getDataType<T>() == TypeIndex; 76 } 77 78 private: 79 TensorSpec(const std::string &Name, int Port, int TypeIndex, 80 const std::vector<int64_t> &Shape); 81 getDataType()82 template <typename T> static int getDataType() { 83 llvm_unreachable("Undefined tensor type"); 84 } 85 86 std::string Name; 87 int Port = 0; 88 int TypeIndex = 0; 89 std::vector<int64_t> Shape; 90 size_t ElementCount = 0; 91 }; 92 93 /// Construct a TensorSpec from a JSON dictionary of the form: 94 /// { "name": <string>, 95 /// "port": <int>, 96 /// "type": <string. Use LLVM's types, e.g. float, double, int64_t>, 97 /// "shape": <array of ints> } 98 /// For the "type" field, see the C++ primitive types used in 99 /// TFUTILS_SUPPORTED_TYPES. 100 Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx, 101 const json::Value &Value); 102 103 struct LoggedFeatureSpec { 104 TensorSpec Spec; 105 Optional<std::string> LoggingName; 106 }; 107 108 /// Load the output specs. If SpecFileOverride is not empty, that path is used. 109 /// Otherwise, the file is assumed to be called 'output_spec.json' and be found 110 /// under ModelPath (the model directory). 111 /// The first output tensor name must match ExpectedDecisionName. 112 /// In case of error, the return is None and the error is logged. 113 Optional<std::vector<LoggedFeatureSpec>> 114 loadOutputSpecs(LLVMContext &Ctx, StringRef ExpectedDecisionName, 115 StringRef ModelPath, StringRef SpecFileOverride = StringRef()); 116 117 /// Logging utility - given an ordered specification of features, and assuming 118 /// a scalar reward, allow logging feature values and rewards, and then print 119 /// as tf.train.SequenceExample text protobuf. 120 /// The assumption is that, for an event to be logged (i.e. a set of feature 121 /// values and a reward), the user calls the log* API for each feature exactly 122 /// once, providing the index matching the position in the feature spec list 123 /// provided at construction: 124 /// event 0: 125 /// logTensorValue(0, ...) 126 /// logTensorValue(1, ...) 127 /// ... 128 /// logReward(...) 129 /// event 1: 130 /// logTensorValue(0, ...) 131 /// logTensorValue(1, ...) 132 /// ... 133 /// logReward(...) 134 /// 135 /// At the end, call print to generate the protobuf. 136 class Logger final { 137 public: 138 /// Construct a Logger. If IncludeReward is false, then logReward shouldn't 139 /// be called, and the reward feature won't be printed out. Logger(const std::vector<LoggedFeatureSpec> & FeatureSpecs,const TensorSpec & RewardSpec,bool IncludeReward)140 Logger(const std::vector<LoggedFeatureSpec> &FeatureSpecs, 141 const TensorSpec &RewardSpec, bool IncludeReward) 142 : FeatureSpecs(FeatureSpecs), RewardSpec(RewardSpec), 143 RawLogData(FeatureSpecs.size() + IncludeReward), 144 IncludeReward(IncludeReward) {} 145 logReward(T Value)146 template <typename T> void logReward(T Value) { 147 assert(IncludeReward); 148 logTensorValue(RawLogData.size() - 1, &Value); 149 } 150 logFinalReward(T Value)151 template <typename T> void logFinalReward(T Value) { 152 assert(RawLogData.back().empty()); 153 logReward(Value); 154 } 155 156 template <typename T> 157 void logTensorValue(size_t FeatureID, const T *Value, size_t Size = 1) { 158 const char *Start = reinterpret_cast<const char *>(Value); 159 const char *End = Start + sizeof(T) * Size; 160 RawLogData[FeatureID].insert(RawLogData[FeatureID].end(), Start, End); 161 } 162 163 void print(raw_ostream &OS); 164 165 private: 166 std::vector<LoggedFeatureSpec> FeatureSpecs; 167 TensorSpec RewardSpec; 168 /// RawData has one entry per feature, plus one more for the reward. 169 /// Each feature's values are then stored in a vector, in succession. 170 /// This means the ith event is stored at [*][i] 171 std::vector<std::vector<char>> RawLogData; 172 const bool IncludeReward; 173 }; 174 175 class TFModelEvaluator final { 176 public: 177 /// The result of a model evaluation. Handles the lifetime of the output 178 /// tensors, which means that their values need to be used before 179 /// the EvaluationResult's dtor is called. 180 class EvaluationResult { 181 public: 182 EvaluationResult(const EvaluationResult &) = delete; 183 EvaluationResult &operator=(const EvaluationResult &Other) = delete; 184 185 EvaluationResult(EvaluationResult &&Other); 186 EvaluationResult &operator=(EvaluationResult &&Other); 187 188 ~EvaluationResult(); 189 190 /// Get a (const) pointer to the first element of the tensor at Index. getTensorValue(size_t Index)191 template <typename T> T *getTensorValue(size_t Index) { 192 return static_cast<T *>(getUntypedTensorValue(Index)); 193 } 194 getTensorValue(size_t Index)195 template <typename T> const T *getTensorValue(size_t Index) const { 196 return static_cast<T *>(getUntypedTensorValue(Index)); 197 } 198 199 /// Get a (const) pointer to the untyped data of the tensor. 200 void *getUntypedTensorValue(size_t Index); 201 const void *getUntypedTensorValue(size_t Index) const; 202 203 private: 204 friend class TFModelEvaluator; 205 EvaluationResult(std::unique_ptr<EvaluationResultImpl> Impl); 206 std::unique_ptr<EvaluationResultImpl> Impl; 207 }; 208 209 TFModelEvaluator(StringRef SavedModelPath, 210 const std::vector<TensorSpec> &InputSpecs, 211 const std::vector<TensorSpec> &OutputSpecs, 212 const char *Tags = "serve"); 213 TFModelEvaluator(StringRef SavedModelPath, 214 const std::vector<TensorSpec> &InputSpecs, 215 function_ref<TensorSpec(size_t)> GetOutputSpecs, 216 size_t OutputSpecsSize, const char *Tags = "serve"); 217 218 ~TFModelEvaluator(); 219 TFModelEvaluator(const TFModelEvaluator &) = delete; 220 TFModelEvaluator(TFModelEvaluator &&) = delete; 221 222 /// Evaluate the model, assuming it is valid. Returns None if the evaluation 223 /// fails or the model is invalid, or an EvaluationResult otherwise. The 224 /// inputs are assumed to have been already provided via getInput(). When 225 /// returning None, it also invalidates this object. 226 Optional<EvaluationResult> evaluate(); 227 228 /// Provides access to the input vector. getInput(size_t Index)229 template <typename T> T *getInput(size_t Index) { 230 return static_cast<T *>(getUntypedInput(Index)); 231 } 232 233 /// Returns true if the tensorflow model was loaded successfully, false 234 /// otherwise. isValid()235 bool isValid() const { return !!Impl; } 236 237 private: 238 void *getUntypedInput(size_t Index); 239 std::unique_ptr<TFModelEvaluatorImpl> Impl; 240 }; 241 242 /// List of supported types, as a pair: 243 /// - C++ type 244 /// - enum name (implementation-specific) 245 #define TFUTILS_SUPPORTED_TYPES(M) \ 246 M(float, TF_FLOAT) \ 247 M(double, TF_DOUBLE) \ 248 M(int8_t, TF_INT8) \ 249 M(uint8_t, TF_UINT8) \ 250 M(int16_t, TF_INT16) \ 251 M(uint16_t, TF_UINT16) \ 252 M(int32_t, TF_INT32) \ 253 M(uint32_t, TF_UINT32) \ 254 M(int64_t, TF_INT64) \ 255 M(uint64_t, TF_UINT64) 256 257 #define TFUTILS_GETDATATYPE_DEF(T, E) \ 258 template <> int TensorSpec::getDataType<T>(); 259 260 TFUTILS_SUPPORTED_TYPES(TFUTILS_GETDATATYPE_DEF) 261 262 #undef TFUTILS_GETDATATYPE_DEF 263 } // namespace llvm 264 265 #endif // LLVM_HAVE_TF_API 266 #endif // LLVM_ANALYSIS_UTILS_TFUTILS_H 267