1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a model runner using Tensorflow C APIs, allowing the 10 // loading of a model from a command line option. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "llvm/Config/config.h" 14 #if defined(LLVM_HAVE_TF_API) 15 16 #include "llvm/Analysis/CallGraph.h" 17 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" 18 #include "llvm/Analysis/MLInlineAdvisor.h" 19 #include "llvm/Analysis/ModelUnderTrainingRunner.h" 20 #include "llvm/Analysis/NoInferenceModelRunner.h" 21 #include "llvm/Analysis/Utils/TFUtils.h" 22 #include "llvm/IR/LLVMContext.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/ManagedStatic.h" 25 26 #include <vector> 27 28 using namespace llvm; 29 30 static cl::opt<std::string> TrainingLog( 31 "training-log", cl::Hidden, 32 cl::desc("Path where the development - mode inlining log is saved.")); 33 34 static cl::opt<std::string> TFModelUnderTrainingPath( 35 "ml-inliner-model-under-training", cl::Hidden, 36 cl::desc(R"(Path to SavedModel from the previous training iteration. 37 The directory is also expected to contain a JSON specification of the 38 outputs expected to be logged, where the first entry must be the 39 inlining decision. The file containing the specification should be 40 called output_spec.json. The expected JSON value is an array of 41 dictionaries. Each dictionary should have 2 keys: 42 43 - "tensor_spec, followed by the TensorSpec description of the 44 output; and 45 - "logging_name", a string indicating the name to use when 46 logging the output values. 47 48 Example: 49 [ 50 { 51 "logging_name" : "some_name", 52 "tensor_spec" : { 53 "name" : "model_name", 54 "port" : 0, 55 "shape" : [2, 3], 56 "type" : "float" 57 } 58 } 59 ] 60 61 The first value must always correspond to the decision.)")); 62 63 static cl::opt<std::string> TFOutputSpecOverride( 64 "ml-inliner-output-spec-override", cl::Hidden, 65 cl::desc("Override the path to the output spec json file. See " 66 "-ml-inliner-model-under-training documentation for the " 67 "specification of that file.")); 68 69 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix", 70 cl::Hidden, cl::init("action_"), 71 cl::desc("Prefix for feature names.")); 72 73 namespace { 74 /// An InlineEvent, used by TrainingLogger. 75 struct InlineEvent { 76 /// What the default policy's decision would have been. 77 int64_t DefaultDecision = 0; 78 79 /// What we advised. When training off the default policy, this is the same as 80 /// DefaultDecision. 81 int64_t AdvisedDecision = 0; 82 83 /// What actually happened. This would be 'false' in the case of an inline 84 /// error, even if AdvisedDecision were true, otherwise it agrees with 85 /// AdvisedDecision. 86 bool Effect = false; 87 88 /// What the change in size was: size_after - size_before 89 int64_t Reward = 0; 90 }; 91 92 /// Collect data we may use for training a model, and write it as a textual 93 /// Tensorflow SequenceExample 94 /// (https://www.tensorflow.org/api_docs/python/tf/train/SequenceExample) 95 /// protobuf (https://developers.google.com/protocol-buffers). 96 /// Because this is a protobuf, we cannot just stream the events as they come. 97 /// Internally, TrainingLogger stores data in column-major format, because that 98 /// lines up with how TF SequenceExample represents it. 99 class TrainingLogger final { 100 public: 101 TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR); 102 103 /// Log one inlining event. 104 void logInlineEvent(const InlineEvent &Event, 105 const MLModelRunner &ModelRunner); 106 107 /// Print the stored tensors. 108 void print(); 109 110 private: 111 StringRef LogFileName; 112 const ModelUnderTrainingRunner *const MUTR; 113 std::unique_ptr<Logger> L; 114 std::vector<bool> Effects; 115 /// There's at least one output. We'll set this to a different value if MUTR 116 /// is avaliable. 117 size_t OutputCount = 1; 118 /// Set these 2 clearly OOB, to make sure we set them later. 119 size_t DefaultDecisionPos = std::numeric_limits<size_t>::max(); 120 size_t DecisionPos = std::numeric_limits<size_t>::max(); 121 }; 122 123 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting 124 /// the offline training scenario. Note that training happens outside of the 125 /// compiler, this facility is concerned with producing training data ("logs"). 126 /// This InlineAdvisor can operate in the following modes: 127 /// 128 /// 1) collect logs for the default policy. This is useful for bootstrapping 129 /// training, which will be considerably faster by starting from a reasonable 130 /// policy. 131 /// 132 /// 2) collect logs for the ML policy, using a model from a previous 133 /// training. Potentially, that model uses internally some small random 134 /// perturbation of its weights, to induce exploration (setting this up is the 135 /// responsibility of the training algorithm). The logs would then be used to 136 /// retrain and improve on this model. 137 /// 138 /// 3) use the provided model, with no logging. This is useful for end to end 139 /// validation - the model, in this case, is a release candidate and shouldn't 140 /// have random perturbations. It is a convenience feature: rather than needing 141 /// to take the release candidate model and compile it in 'release' mode, 142 /// validate it, then potentially discard it, it's easier to just pass the model 143 /// to the compiler, albeit compilation would be slower, as a one-off. Once the 144 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in 145 /// release mode. The expectation is that a well-trained model provides a good 146 /// policy over a sufficiently diverse codebase, over many changes (i.e. 147 /// training happens seldom). 148 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor { 149 public: 150 DevelopmentModeMLInlineAdvisor( 151 Module &M, ModuleAnalysisManager &MAM, 152 std::unique_ptr<MLModelRunner> ModelRunner, 153 std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference, 154 std::unique_ptr<TrainingLogger> Logger); 155 156 size_t getTotalSizeEstimate(); 157 158 virtual ~DevelopmentModeMLInlineAdvisor(); 159 void updateNativeSizeEstimate(int64_t Change) { 160 *CurrentNativeSize += Change; 161 } 162 void resetNativeSize(Function *F) { 163 PreservedAnalyses PA = PreservedAnalyses::all(); 164 PA.abandon<InlineSizeEstimatorAnalysis>(); 165 FAM.invalidate(*F, PA); 166 } 167 168 std::unique_ptr<MLInlineAdvice> 169 getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override; 170 171 Optional<size_t> getNativeSizeEstimate(const Function &F) const; 172 173 private: 174 bool isLogging() const { return !!Logger; } 175 std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB) override; 176 177 std::function<bool(CallBase &)> GetDefaultAdvice; 178 const bool IsDoingInference; 179 std::unique_ptr<TrainingLogger> Logger; 180 181 const Optional<int32_t> InitialNativeSize; 182 Optional<int32_t> CurrentNativeSize; 183 }; 184 185 /// A variant of MLInlineAdvice that tracks all non-trivial inlining 186 /// decisions, for training/logging. 187 class LoggingMLInlineAdvice : public MLInlineAdvice { 188 public: 189 LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB, 190 OptimizationRemarkEmitter &ORE, bool Recommendation, 191 TrainingLogger &Logger, 192 Optional<size_t> CallerSizeEstimateBefore, 193 Optional<size_t> CalleeSizeEstimateBefore, 194 bool DefaultDecision, bool Mandatory = false) 195 : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger), 196 CallerSizeEstimateBefore(CallerSizeEstimateBefore), 197 CalleeSizeEstimateBefore(CalleeSizeEstimateBefore), 198 DefaultDecision(DefaultDecision), Mandatory(Mandatory) {} 199 200 virtual ~LoggingMLInlineAdvice() = default; 201 202 private: 203 DevelopmentModeMLInlineAdvisor *getAdvisor() const { 204 return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor); 205 } 206 void recordInliningImpl() override { 207 MLInlineAdvice::recordInliningImpl(); 208 getAdvisor()->resetNativeSize(Caller); 209 int Reward = std::numeric_limits<int>::max(); 210 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && 211 !getAdvisor()->isForcedToStop()) { 212 int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller) + 213 *CalleeSizeEstimateBefore; 214 Reward = NativeSizeAfter - 215 (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); 216 getAdvisor()->updateNativeSizeEstimate(Reward); 217 } 218 log(Reward, /*Success=*/true); 219 } 220 221 void recordInliningWithCalleeDeletedImpl() override { 222 MLInlineAdvice::recordInliningWithCalleeDeletedImpl(); 223 getAdvisor()->resetNativeSize(Caller); 224 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && 225 !getAdvisor()->isForcedToStop()) { 226 int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller); 227 int Reward = NativeSizeAfter - 228 (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); 229 getAdvisor()->updateNativeSizeEstimate(Reward); 230 log(Reward, /*Success=*/true); 231 } else { 232 log(NoReward, /*Success=*/true); 233 } 234 } 235 236 void recordUnsuccessfulInliningImpl(const InlineResult &Result) override { 237 MLInlineAdvice::recordUnsuccessfulInliningImpl(Result); 238 log(NoReward, /*Success=*/false); 239 } 240 241 void recordUnattemptedInliningImpl() override { 242 MLInlineAdvice::recordUnattemptedInliningImpl(); 243 log(NoReward, /*Success=*/false); 244 } 245 246 void log(int64_t Reward, bool Success) { 247 if (Mandatory) 248 return; 249 InlineEvent Event; 250 Event.AdvisedDecision = isInliningRecommended(); 251 Event.DefaultDecision = DefaultDecision; 252 Event.Effect = Success; 253 Event.Reward = Reward; 254 Logger.logInlineEvent(Event, getAdvisor()->getModelRunner()); 255 } 256 257 static const int64_t NoReward = 0; 258 TrainingLogger &Logger; 259 const Optional<size_t> CallerSizeEstimateBefore; 260 const Optional<size_t> CalleeSizeEstimateBefore; 261 const int64_t DefaultDecision; 262 const int64_t Mandatory; 263 }; 264 265 static const std::vector<TensorSpec> TrainingOnlyFeatures{ 266 TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}), 267 TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}), 268 TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}), 269 TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})}; 270 271 static const std::vector<TensorSpec> getInputFeatures() { 272 std::vector<TensorSpec> InputSpecs; 273 for (size_t I = 0; I < NumberOfFeatures; ++I) 274 InputSpecs.push_back( 275 TensorSpec::createSpec<int64_t>(TFFeedPrefix + FeatureNameMap[I], {1})); 276 append_range(InputSpecs, TrainingOnlyFeatures); 277 return InputSpecs; 278 } 279 280 } // namespace 281 282 TrainingLogger::TrainingLogger(StringRef LogFileName, 283 const ModelUnderTrainingRunner *MUTR) 284 : LogFileName(LogFileName), MUTR(MUTR) { 285 // The first output is the inlining decision. 286 if (MUTR) 287 OutputCount = MUTR->outputLoggedFeatureSpecs().size(); 288 std::vector<LoggedFeatureSpec> FT; 289 290 for (size_t I = 0; I < NumberOfFeatures; ++I) 291 FT.push_back( 292 {TensorSpec::createSpec<int64_t>(FeatureNameMap.at(I), {1}), None}); 293 if (MUTR && MUTR->outputLoggedFeatureSpecs().size() > 1) 294 append_range(FT, drop_begin(MUTR->outputLoggedFeatureSpecs())); 295 296 DefaultDecisionPos = FT.size(); 297 FT.push_back( 298 {TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1}), None}); 299 300 DecisionPos = FT.size(); 301 FT.push_back({TensorSpec::createSpec<int64_t>(DecisionName, {1}), None}); 302 303 L = std::make_unique<Logger>( 304 FT, TensorSpec::createSpec<int64_t>(RewardName, {1}), 305 InlineSizeEstimatorAnalysis::isEvaluatorRequested()); 306 } 307 308 /// Log one inlining event. 309 void TrainingLogger::logInlineEvent(const InlineEvent &Event, 310 const MLModelRunner &ModelRunner) { 311 size_t CurrentFeature = 0; 312 for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature) { 313 int64_t F = *ModelRunner.getTensor<int64_t>(CurrentFeature); 314 L->logInt64Value(CurrentFeature, &F); 315 } 316 317 for (size_t I = 1; I < OutputCount; ++I) { 318 const auto &Result = *MUTR->lastEvaluationResult(); 319 const char *RawData = 320 reinterpret_cast<const char *>(Result.getUntypedTensorValue(I)); 321 L->logSpecifiedTensorValue(CurrentFeature, RawData); 322 ++CurrentFeature; 323 } 324 325 assert(CurrentFeature == DefaultDecisionPos); 326 L->logInt64Value(DefaultDecisionPos, &Event.DefaultDecision); 327 L->logInt64Value(DecisionPos, &Event.AdvisedDecision); 328 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested()) 329 L->logInt64Reward(Event.Reward); 330 331 // For debugging / later use 332 Effects.push_back(Event.Effect); 333 } 334 335 void TrainingLogger::print() { 336 std::error_code EC; 337 raw_fd_ostream OutFile(LogFileName, EC); 338 L->flush(OutFile); 339 } 340 341 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor( 342 Module &M, ModuleAnalysisManager &MAM, 343 std::unique_ptr<MLModelRunner> ModelRunner, 344 std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference, 345 std::unique_ptr<TrainingLogger> Logger) 346 : MLInlineAdvisor(M, MAM, std::move(ModelRunner)), 347 GetDefaultAdvice(GetDefaultAdvice), IsDoingInference(IsDoingInference), 348 Logger(std::move(Logger)), 349 InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0), 350 CurrentNativeSize(InitialNativeSize) { 351 // We cannot have the case of neither inference nor logging. 352 assert(IsDoingInference || isLogging()); 353 } 354 355 DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() { 356 if (isLogging()) 357 Logger->print(); 358 } 359 360 Optional<size_t> 361 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const { 362 if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) 363 return None; 364 auto &R = 365 FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F)); 366 if (!R) { 367 F.getParent()->getContext().emitError( 368 "Native size estimator is not present."); 369 return 0; 370 } 371 return *R; 372 } 373 374 std::unique_ptr<MLInlineAdvice> 375 DevelopmentModeMLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) { 376 return std::make_unique<LoggingMLInlineAdvice>( 377 /*Advisor=*/this, 378 /*CB=*/CB, /*ORE=*/getCallerORE(CB), /*Recommendation=*/true, 379 /*Logger=*/*Logger, 380 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 381 /*CalleeSizeEstimateBefore=*/ 382 getNativeSizeEstimate(*CB.getCalledFunction()), 383 /*DefaultDecision=*/true, /*Mandatory*/ true); 384 } 385 386 std::unique_ptr<MLInlineAdvice> 387 DevelopmentModeMLInlineAdvisor::getAdviceFromModel( 388 CallBase &CB, OptimizationRemarkEmitter &ORE) { 389 if (IsDoingInference && !isLogging()) 390 return MLInlineAdvisor::getAdviceFromModel(CB, ORE); 391 392 bool DefaultAdvice = GetDefaultAdvice(CB); 393 auto Recommendation = 394 IsDoingInference ? static_cast<bool>(ModelRunner->evaluate<int64_t>()) 395 : DefaultAdvice; 396 return std::make_unique<LoggingMLInlineAdvice>( 397 /*Advisor=*/this, 398 /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation, 399 /*Logger=*/*Logger, 400 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 401 /*CalleeSizeEstimateBefore=*/ 402 getNativeSizeEstimate(*CB.getCalledFunction()), 403 /*DefaultDecision=*/DefaultAdvice); 404 } 405 406 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() { 407 if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) 408 return 0; 409 size_t Ret = 0; 410 for (auto &F : M) { 411 if (F.isDeclaration()) 412 continue; 413 if (isFunctionDeleted(&F)) 414 continue; 415 Ret += *getNativeSizeEstimate(F); 416 } 417 return Ret; 418 } 419 420 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor( 421 Module &M, ModuleAnalysisManager &MAM, 422 std::function<bool(CallBase &)> GetDefaultAdvice) { 423 auto &Ctx = M.getContext(); 424 std::unique_ptr<MLModelRunner> Runner; 425 ModelUnderTrainingRunner *MUTRPtr = nullptr; 426 bool IsDoingInference = false; 427 if (TFModelUnderTrainingPath.empty()) 428 Runner.reset(new NoInferenceModelRunner(Ctx, getInputFeatures())); 429 else { 430 std::unique_ptr<ModelUnderTrainingRunner> MUTR; 431 if (auto MaybeOutputSpecs = loadOutputSpecs( 432 Ctx, DecisionName, TFModelUnderTrainingPath, TFOutputSpecOverride)) 433 MUTR = std::make_unique<ModelUnderTrainingRunner>( 434 Ctx, TFModelUnderTrainingPath, getInputFeatures(), *MaybeOutputSpecs); 435 if (!MUTR || !MUTR->isValid()) { 436 Ctx.emitError("Could not load the policy model from the provided path"); 437 return nullptr; 438 } 439 IsDoingInference = true; 440 MUTRPtr = MUTR.get(); 441 Runner = std::move(MUTR); 442 } 443 std::unique_ptr<TrainingLogger> Logger; 444 if (!TrainingLog.empty()) 445 Logger = std::make_unique<TrainingLogger>(TrainingLog, MUTRPtr); 446 447 return std::make_unique<DevelopmentModeMLInlineAdvisor>( 448 M, MAM, std::move(Runner), GetDefaultAdvice, IsDoingInference, 449 std::move(Logger)); 450 } 451 #endif // defined(LLVM_HAVE_TF_API) 452