xref: /llvm-project/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp (revision 00f692b94f9aa08ede4aaba6f2aafe17857599c4)
1 //===- MLRegAllocEvictAdvisor.cpp - ML eviction advisor -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation of the ML eviction advisor and reward injection pass
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AllocationOrder.h"
14 #include "RegAllocEvictionAdvisor.h"
15 #include "RegAllocGreedy.h"
16 #include "llvm/Analysis/InteractiveModelRunner.h"
17 #include "llvm/Analysis/MLModelRunner.h"
18 #include "llvm/Analysis/TensorSpec.h"
19 #if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) || defined(LLVM_HAVE_TFLITE)
20 #include "llvm/Analysis/ModelUnderTrainingRunner.h"
21 #include "llvm/Analysis/NoInferenceModelRunner.h"
22 #include "llvm/Analysis/Utils/TrainingLogger.h"
23 #endif
24 #include "MLRegAllocEvictAdvisor.h"
25 #include "llvm/Analysis/ReleaseModeModelRunner.h"
26 #include "llvm/CodeGen/CalcSpillWeights.h"
27 #include "llvm/CodeGen/LiveRegMatrix.h"
28 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
29 #include "llvm/CodeGen/MachineFunction.h"
30 #include "llvm/CodeGen/MachineLoopInfo.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/CodeGen/Passes.h"
33 #include "llvm/CodeGen/RegisterClassInfo.h"
34 #include "llvm/CodeGen/VirtRegMap.h"
35 #include "llvm/IR/Module.h"
36 #include "llvm/InitializePasses.h"
37 #include "llvm/Pass.h"
38 #include "llvm/PassRegistry.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Support/ErrorHandling.h"
41 
42 #include <array>
43 #include <bitset>
44 #include <memory>
45 #include <unordered_map>
46 
47 using namespace llvm;
48 
49 #define DEBUG_TYPE "ml-regalloc"
50 
51 // Generated header in release (AOT) mode
52 #if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL)
53 #include "RegAllocEvictModel.h"
54 using CompiledModelType = RegAllocEvictModel;
55 #else
56 using CompiledModelType = NoopSavedModelImpl;
57 #endif
58 
59 static cl::opt<std::string> InteractiveChannelBaseName(
60     "regalloc-evict-interactive-channel-base", cl::Hidden,
61     cl::desc(
62         "Base file path for the interactive mode. The incoming filename should "
63         "have the name <regalloc-evict-interactive-channel-base>.in, while the "
64         "outgoing name should be "
65         "<regalloc-evict-interactive-channel-base>.out"));
66 
67 static cl::opt<unsigned> MaxEvictionCount(
68     "mlregalloc-max-eviction-count", cl::Hidden,
69     cl::desc("The maximum number of times a live range can be "
70              "evicted before preventing it from being evicted"),
71     cl::init(100));
72 
73 // Options that only make sense in development mode
74 #ifdef LLVM_HAVE_TFLITE
75 #include "RegAllocScore.h"
76 #include "llvm/Analysis/Utils/TFUtils.h"
77 
78 static cl::opt<std::string> TrainingLog(
79     "regalloc-training-log", cl::Hidden,
80     cl::desc("Training log for the register allocator eviction model"));
81 
82 static cl::opt<std::string> ModelUnderTraining(
83     "regalloc-model", cl::Hidden,
84     cl::desc("The model being trained for register allocation eviction"));
85 
86 static cl::opt<bool> EnableDevelopmentFeatures(
87     "regalloc-enable-development-features", cl::Hidden,
88     cl::desc("Whether or not to enable features under development for the ML "
89              "regalloc advisor"));
90 
91 #else
92 static const bool EnableDevelopmentFeatures = false;
93 #endif // #ifdef LLVM_HAVE_TFLITE
94 
95 /// The score injection pass.
96 /// This pass calculates the score for a function and inserts it in the log, but
97 /// this happens only in development mode. It's a no-op otherwise.
98 namespace llvm {
99 extern cl::opt<unsigned> EvictInterferenceCutoff;
100 
101 class RegAllocScoring : public MachineFunctionPass {
102 public:
103   static char ID;
104 
105   RegAllocScoring() : MachineFunctionPass(ID) {
106     initializeRegAllocScoringPass(*PassRegistry::getPassRegistry());
107   }
108 
109   ~RegAllocScoring() override = default;
110 
111   StringRef getPassName() const override {
112     return "Register Allocation Pass Scoring";
113   }
114 
115   /// RegAllocReward analysis usage.
116   void getAnalysisUsage(AnalysisUsage &AU) const override {
117     AU.setPreservesAll();
118     AU.addRequired<RegAllocEvictionAdvisorAnalysis>();
119     AU.addRequired<RegAllocPriorityAdvisorAnalysis>();
120     AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
121     MachineFunctionPass::getAnalysisUsage(AU);
122   }
123 
124   /// Performs this pass
125   bool runOnMachineFunction(MachineFunction &) override;
126 };
127 
128 char RegAllocScoring::ID = 0;
129 FunctionPass *createRegAllocScoringPass() { return new RegAllocScoring(); }
130 
131 } // namespace llvm
132 
133 INITIALIZE_PASS(RegAllocScoring, "regallocscoringpass",
134                 "Register Allocation Scoring Pass", false, false)
135 
136 // ===================================
137 // Common ML Advisor declarations
138 // ===================================
139 namespace {
140 // The model can only accept a specified number of opcodes and will error it if
141 // fed an opcode it hasn't seen before. This constant sets the current cutoff.
142 static const int OpcodeValueCutoff = 17716;
143 
144 // Most features are as described above, so we'll reuse this vector in defining
145 // them.
146 static const std::vector<int64_t> PerLiveRangeShape{1, NumberOfInterferences};
147 
148 // --------------
149 // Features table
150 // --------------
151 // For each interfering live range (incl. the candidate) we collect a number of
152 // features. However, because the features are of different types (and because
153 // of ML best practices), we organize the tensors per feature, not per
154 // candidate. Each such tensor has a scalar value corresponding to the
155 // interferring live range at that position, in the order in AllocationOrder.
156 // The last position corresponds to the virt reg seeking allocation.
157 // Exception to all that is the progression feature, which is just a scalar (see
158 // its documentation for details).
159 // Note on naming: the "_by_max" are normalized using the largest value of that
160 // tensor, as observed in the current decision making stage (i.e. for the
161 // current call to the advisor's tryFindEvictionCandidate)
162 //
163 // The feature list format: type, name, shape, documentation.
164 // Note: we can really just use int64 and float, hence the modeling of some
165 // bools as int64 values.
166 #define RA_EVICT_FEATURES_LIST(M)                                              \
167   M(int64_t, mask, PerLiveRangeShape,                                          \
168     "boolean values, 0 for unavailable candidates (i.e. if a position is 0, "  \
169     "it "                                                                      \
170     "can't be evicted)")                                                       \
171   M(int64_t, is_free, PerLiveRangeShape,                                       \
172     "boolean values, 1 if this phys reg is actually free (no interferences)")  \
173   M(float, nr_urgent, PerLiveRangeShape,                                       \
174     "number of 'urgent' intervals, normalized. Urgent are those that are OK "  \
175     "to break cascades")                                                       \
176   M(float, nr_broken_hints, PerLiveRangeShape,                                 \
177     "if this position were evicted, how many broken hints would there be")     \
178   M(int64_t, is_hint, PerLiveRangeShape,                                       \
179     "is this a preferred phys reg for the candidate")                          \
180   M(int64_t, is_local, PerLiveRangeShape,                                      \
181     "is this live range local to a basic block")                               \
182   M(float, nr_rematerializable, PerLiveRangeShape,                             \
183     "nr rematerializable ranges")                                              \
184   M(float, nr_defs_and_uses, PerLiveRangeShape,                                \
185     "bb freq - weighed nr defs and uses")                                      \
186   M(float, weighed_reads_by_max, PerLiveRangeShape,                            \
187     "bb freq - weighed nr of reads, normalized")                               \
188   M(float, weighed_writes_by_max, PerLiveRangeShape,                           \
189     "bb feq - weighed nr of writes, normalized")                               \
190   M(float, weighed_read_writes_by_max, PerLiveRangeShape,                      \
191     "bb freq - weighed nr of uses that are both read and writes, normalized")  \
192   M(float, weighed_indvars_by_max, PerLiveRangeShape,                          \
193     "bb freq - weighed nr of uses that are indvars, normalized")               \
194   M(float, hint_weights_by_max, PerLiveRangeShape,                             \
195     "bb freq - weighed nr of uses that are hints, normalized")                 \
196   M(float, start_bb_freq_by_max, PerLiveRangeShape,                            \
197     "the freq in the start block, normalized")                                 \
198   M(float, end_bb_freq_by_max, PerLiveRangeShape,                              \
199     "freq of end block, normalized")                                           \
200   M(float, hottest_bb_freq_by_max, PerLiveRangeShape,                          \
201     "hottest BB freq, normalized")                                             \
202   M(float, liverange_size, PerLiveRangeShape,                                  \
203     "size (instr index diff) of the LR")                                       \
204   M(float, use_def_density, PerLiveRangeShape,                                 \
205     "the max weight, as computed by the manual heuristic")                     \
206   M(int64_t, max_stage, PerLiveRangeShape,                                     \
207     "largest stage of an interval in this LR")                                 \
208   M(int64_t, min_stage, PerLiveRangeShape,                                     \
209     "lowest stage of an interval in this LR")                                  \
210   M(float, progress, {1}, "ratio of current queue size to initial size")
211 
212 #ifdef LLVM_HAVE_TFLITE
213 #define RA_EVICT_FIRST_DEVELOPMENT_FEATURE(M)                                  \
214   M(int64_t, instructions, InstructionsShape,                                  \
215     "Opcodes of the instructions covered by the eviction problem")
216 
217 #define RA_EVICT_REST_DEVELOPMENT_FEATURES(M)                                  \
218   M(int64_t, instructions_mapping, InstructionsMappingShape,                   \
219     "A binary matrix mapping LRs to instruction opcodes")                      \
220   M(float, mbb_frequencies, MBBFrequencyShape,                                 \
221     "A vector of machine basic block frequencies")                             \
222   M(int64_t, mbb_mapping, InstructionsShape,                                   \
223     "A vector of indices mapping instructions to MBBs")
224 #else
225 #define RA_EVICT_FIRST_DEVELOPMENT_FEATURE(M)
226 #define RA_EVICT_REST_DEVELOPMENT_FEATURES(M)
227 #endif
228 
229 // The model learns to pick one of the mask == 1 interferences. This is the
230 // name of the output tensor. The contract with the model is that the output
231 // will be guaranteed to be to a mask == 1 position. Using a macro here to
232 // avoid 'not used' warnings (and keep cond compilation to a minimum)
233 #define DecisionName "index_to_evict"
234 static const TensorSpec DecisionSpec =
235     TensorSpec::createSpec<int64_t>(DecisionName, {1});
236 
237 // Named features index.
238 enum FeatureIDs {
239 #define _FEATURE_IDX_SIMPLE(_, name, __, ___) name
240 #define _FEATURE_IDX(A, B, C, D) _FEATURE_IDX_SIMPLE(A, B, C, D),
241   RA_EVICT_FEATURES_LIST(_FEATURE_IDX) FeatureCount,
242 #ifdef LLVM_HAVE_TFLITE
243   RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_FEATURE_IDX_SIMPLE) = FeatureCount,
244 #else
245   RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_FEATURE_IDX)
246 #endif // #ifdef LLVM_HAVE_TFLITE
247   RA_EVICT_REST_DEVELOPMENT_FEATURES(_FEATURE_IDX) FeaturesWithDevelopmentCount
248 #undef _FEATURE_IDX
249 #undef _FEATURE_IDX_SIMPLE
250 };
251 
252 // The ML advisor will typically have a sparse input to the evaluator, because
253 // various phys regs won't be available. It's easier (maintenance-wise) to
254 // bulk-reset the state of the evaluator each time we are about to use it
255 // again.
256 template <typename T> size_t getTotalSize(const std::vector<int64_t> &Shape) {
257   size_t Ret = sizeof(T);
258   for (const auto V : Shape)
259     Ret *= V;
260   return Ret;
261 }
262 
263 void resetInputs(MLModelRunner &Runner) {
264 #define _RESET(TYPE, NAME, SHAPE, __)                                          \
265   std::memset(Runner.getTensorUntyped(FeatureIDs::NAME), 0,                    \
266               getTotalSize<TYPE>(SHAPE));
267   RA_EVICT_FEATURES_LIST(_RESET)
268   if (EnableDevelopmentFeatures) {
269     RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_RESET)
270     RA_EVICT_REST_DEVELOPMENT_FEATURES(_RESET)
271 #undef _RESET
272   }
273 }
274 
275 // Per-live interval components that get aggregated into the feature values
276 // that will be passed to the evaluator.
277 struct LIFeatureComponents {
278   double R = 0;
279   double W = 0;
280   double RW = 0;
281   double IndVarUpdates = 0;
282   double HintWeights = 0.0;
283   int64_t NumDefsAndUses = 0;
284   float HottestBlockFreq = 0.0;
285   bool IsRemat = false;
286 };
287 
288 using CandidateRegList =
289     std::array<std::pair<MCRegister, bool>, NumberOfInterferences>;
290 using FeaturesListNormalizer =
291     llvm::SmallVector<float, FeatureIDs::FeatureCount>;
292 
293 /// The ML evictor (commonalities between release and development mode)
294 class MLEvictAdvisor : public RegAllocEvictionAdvisor {
295 public:
296   MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
297                  MLModelRunner *Runner, const MachineBlockFrequencyInfo &MBFI,
298                  const MachineLoopInfo &Loops);
299 
300 protected:
301   const RegAllocEvictionAdvisor &getDefaultAdvisor() const {
302     return static_cast<const RegAllocEvictionAdvisor &>(DefaultAdvisor);
303   }
304 
305   // The assumption is that if the Runner could not be constructed, we emit-ed
306   // error, and we shouldn't be asking for it here.
307   const MLModelRunner &getRunner() const { return *Runner; }
308 
309   /// This just calls Evaluate on the Runner, but in the development mode
310   /// case, if we're just capturing the log of the default advisor, it needs
311   /// to call the latter instead, so we need to pass all the necessary
312   /// parameters for it. In the development case, it will also log.
313   virtual int64_t
314   tryFindEvictionCandidatePosition(const LiveInterval &VirtReg,
315                                    const AllocationOrder &Order,
316                                    unsigned OrderLimit, uint8_t CostPerUseLimit,
317                                    const SmallVirtRegSet &FixedRegisters) const;
318 
319   /// Load the features of the given VirtReg (allocated or not) at column Pos,
320   /// but if  that can't be evicted, return false instead.
321   bool
322   loadInterferenceFeatures(const LiveInterval &VirtReg, MCRegister PhysReg,
323                            bool IsHint, const SmallVirtRegSet &FixedRegisters,
324                            llvm::SmallVectorImpl<float> &Largest, size_t Pos,
325                            SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const;
326 
327 private:
328   static float getInitialQueueSize(const MachineFunction &MF);
329 
330   MCRegister tryFindEvictionCandidate(
331       const LiveInterval &VirtReg, const AllocationOrder &Order,
332       uint8_t CostPerUseLimit,
333       const SmallVirtRegSet &FixedRegisters) const override;
334 
335   void extractFeatures(const SmallVectorImpl<const LiveInterval *> &Intervals,
336                        llvm::SmallVectorImpl<float> &Largest, size_t Pos,
337                        int64_t IsHint, int64_t LocalIntfsCount, float NumUrgent,
338                        SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const;
339 
340   // Point-in-time: we didn't learn this, so we always delegate to the
341   // default.
342   bool canEvictHintInterference(
343       const LiveInterval &VirtReg, MCRegister PhysReg,
344       const SmallVirtRegSet &FixedRegisters) const override {
345     return getDefaultAdvisor().canEvictHintInterference(VirtReg, PhysReg,
346                                                         FixedRegisters);
347   }
348 
349   const LIFeatureComponents &
350   getLIFeatureComponents(const LiveInterval &LI) const;
351 
352   // Hold on to a default advisor for:
353   // 1) the implementation of canEvictHintInterference, because we didn't
354   // learn that nuance yet; 2) for bootstrapping (logging) in the development
355   // mode case.
356   const DefaultEvictionAdvisor DefaultAdvisor;
357   MLModelRunner *const Runner;
358   const MachineBlockFrequencyInfo &MBFI;
359   const MachineLoopInfo &Loops;
360 
361   // Indices of those features we don't want to normalize.
362   // This could be static and shared, but its initialization is non-trivial.
363   std::bitset<FeatureIDs::FeatureCount> DoNotNormalize;
364   const float InitialQSize;
365 
366   using RegID = unsigned;
367   mutable DenseMap<RegID, LIFeatureComponents> CachedFeatures;
368 
369   mutable std::unordered_map<unsigned, unsigned> VirtRegEvictionCounts;
370 
371   void onEviction(Register RegBeingEvicted) const {
372     // If we cannot find the virtual register in the map, we just assume it has
373     // not been evicted before and thus has a value of zero (which is what the
374     // subscript operator returns by default).
375     ++VirtRegEvictionCounts[RegBeingEvicted.id()];
376   }
377 
378   unsigned getEvictionCount(Register Reg) const {
379     auto EvictionCountIt = VirtRegEvictionCounts.find(Reg.id());
380     if (EvictionCountIt != VirtRegEvictionCounts.end())
381       return EvictionCountIt->second;
382     return 0;
383   }
384 };
385 
386 #define _DECL_FEATURES(type, name, shape, _)                                   \
387   TensorSpec::createSpec<type>(#name, shape),
388 
389 // ===================================
390 // Release (AOT) - specifics
391 // ===================================
392 class ReleaseModeEvictionAdvisorAnalysis final
393     : public RegAllocEvictionAdvisorAnalysis {
394 public:
395   ReleaseModeEvictionAdvisorAnalysis()
396       : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Release) {
397     if (EnableDevelopmentFeatures) {
398       InputFeatures = {RA_EVICT_FEATURES_LIST(
399           _DECL_FEATURES) RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_FEATURES)
400                            RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_FEATURES)};
401     } else {
402       InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)};
403     }
404   }
405   // support for isa<> and dyn_cast.
406   static bool classof(const RegAllocEvictionAdvisorAnalysis *R) {
407     return R->getAdvisorMode() == AdvisorMode::Release;
408   }
409 
410 private:
411   std::vector<TensorSpec> InputFeatures;
412 
413   void getAnalysisUsage(AnalysisUsage &AU) const override {
414     AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
415     AU.addRequired<MachineLoopInfoWrapperPass>();
416     RegAllocEvictionAdvisorAnalysis::getAnalysisUsage(AU);
417   }
418 
419   std::unique_ptr<RegAllocEvictionAdvisor>
420   getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
421     if (!Runner) {
422       if (InteractiveChannelBaseName.empty())
423         Runner = std::make_unique<ReleaseModeModelRunner<CompiledModelType>>(
424             MF.getFunction().getContext(), InputFeatures, DecisionName);
425       else
426         Runner = std::make_unique<InteractiveModelRunner>(
427             MF.getFunction().getContext(), InputFeatures, DecisionSpec,
428             InteractiveChannelBaseName + ".out",
429             InteractiveChannelBaseName + ".in");
430     }
431     return std::make_unique<MLEvictAdvisor>(
432         MF, RA, Runner.get(),
433         getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(),
434         getAnalysis<MachineLoopInfoWrapperPass>().getLI());
435   }
436   std::unique_ptr<MLModelRunner> Runner;
437 };
438 
439 // ===================================
440 // Development mode-specifics
441 // ===================================
442 //
443 // Features we log
444 #ifdef LLVM_HAVE_TFLITE
445 static const TensorSpec Reward = TensorSpec::createSpec<float>("reward", {1});
446 
447 // Features we bind on the model. The tensor names have a prefix, and we also
448 // need to include some tensors that are expected to be present by the
449 // training algo.
450 // TODO: can we just get rid of these?
451 #define _DECL_TRAIN_FEATURES(type, name, shape, _)                             \
452   TensorSpec::createSpec<type>(std::string("action_") + #name, shape),
453 
454 class DevelopmentModeEvictAdvisor : public MLEvictAdvisor {
455 public:
456   DevelopmentModeEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
457                               MLModelRunner *Runner,
458                               const MachineBlockFrequencyInfo &MBFI,
459                               const MachineLoopInfo &Loops, Logger *Log)
460       : MLEvictAdvisor(MF, RA, Runner, MBFI, Loops), Log(Log) {}
461 
462 private:
463   int64_t tryFindEvictionCandidatePosition(
464       const LiveInterval &VirtReg, const AllocationOrder &Order,
465       unsigned OrderLimit, uint8_t CostPerUseLimit,
466       const SmallVirtRegSet &FixedRegisters) const override;
467 
468   Logger *const Log;
469 };
470 
471 class DevelopmentModeEvictionAdvisorAnalysis final
472     : public RegAllocEvictionAdvisorAnalysis {
473 public:
474   DevelopmentModeEvictionAdvisorAnalysis()
475       : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Development) {
476     if (EnableDevelopmentFeatures) {
477       InputFeatures = {RA_EVICT_FEATURES_LIST(
478           _DECL_FEATURES) RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_FEATURES)
479                            RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_FEATURES)};
480       TrainingInputFeatures = {
481           RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES)
482               RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_TRAIN_FEATURES)
483                   RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_TRAIN_FEATURES)
484                       TensorSpec::createSpec<float>("action_discount", {1}),
485           TensorSpec::createSpec<int32_t>("action_step_type", {1}),
486           TensorSpec::createSpec<float>("action_reward", {1})};
487     } else {
488       InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)};
489       TrainingInputFeatures = {
490           RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES)
491               TensorSpec::createSpec<float>("action_discount", {1}),
492           TensorSpec::createSpec<int32_t>("action_step_type", {1}),
493           TensorSpec::createSpec<float>("action_reward", {1})};
494     }
495   }
496   // support for isa<> and dyn_cast.
497   static bool classof(const RegAllocEvictionAdvisorAnalysis *R) {
498     return R->getAdvisorMode() == AdvisorMode::Development;
499   }
500 
501   void logRewardIfNeeded(const MachineFunction &MF,
502                          llvm::function_ref<float()> GetReward) override {
503     if (!Log || !Log->hasAnyObservationForContext(MF.getName()))
504       return;
505     // The function pass manager would run all the function passes for a
506     // function, so we assume the last context belongs to this function. If
507     // this invariant ever changes, we can implement at that time switching
508     // contexts. At this point, it'd be an error
509     if (Log->currentContext() != MF.getName()) {
510       MF.getFunction().getContext().emitError(
511           "The training log context shouldn't have had changed.");
512     }
513     if (Log->hasObservationInProgress())
514       Log->logReward<float>(GetReward());
515   }
516 
517 private:
518   std::vector<TensorSpec> InputFeatures;
519   std::vector<TensorSpec> TrainingInputFeatures;
520 
521   void getAnalysisUsage(AnalysisUsage &AU) const override {
522     AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
523     AU.addRequired<MachineLoopInfoWrapperPass>();
524     RegAllocEvictionAdvisorAnalysis::getAnalysisUsage(AU);
525   }
526 
527   bool doInitialization(Module &M) override {
528     LLVMContext &Ctx = M.getContext();
529     if (ModelUnderTraining.empty() && TrainingLog.empty()) {
530       Ctx.emitError("Regalloc development mode should be requested with at "
531                     "least logging enabled and/or a training model");
532       return false;
533     }
534     if (ModelUnderTraining.empty())
535       Runner = std::make_unique<NoInferenceModelRunner>(Ctx, InputFeatures);
536     else
537       Runner = ModelUnderTrainingRunner::createAndEnsureValid(
538           Ctx, ModelUnderTraining, DecisionName, TrainingInputFeatures);
539     if (!Runner) {
540       Ctx.emitError("Regalloc: could not set up the model runner");
541       return false;
542     }
543     if (TrainingLog.empty())
544       return false;
545     std::error_code EC;
546     auto OS = std::make_unique<raw_fd_ostream>(TrainingLog, EC);
547     if (EC) {
548       M.getContext().emitError(EC.message() + ":" + TrainingLog);
549       return false;
550     }
551     std::vector<TensorSpec> LFS = InputFeatures;
552     if (auto *MUTR = dyn_cast<ModelUnderTrainingRunner>(Runner.get()))
553       append_range(LFS, MUTR->extraOutputsForLoggingSpecs());
554     // We always log the output; in particular, if we're not evaluating, we
555     // don't have an output spec json file. That's why we handle the
556     // 'normal' output separately.
557     LFS.push_back(DecisionSpec);
558 
559     Log = std::make_unique<Logger>(std::move(OS), LFS, Reward,
560                                    /*IncludeReward*/ true);
561     return false;
562   }
563 
564   std::unique_ptr<RegAllocEvictionAdvisor>
565   getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
566     if (!Runner)
567       return nullptr;
568     if (Log)
569       Log->switchContext(MF.getName());
570     return std::make_unique<DevelopmentModeEvictAdvisor>(
571         MF, RA, Runner.get(),
572         getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(),
573         getAnalysis<MachineLoopInfoWrapperPass>().getLI(), Log.get());
574   }
575 
576   std::unique_ptr<MLModelRunner> Runner;
577   std::unique_ptr<Logger> Log;
578 };
579 
580 #endif // #ifdef LLVM_HAVE_TFLITE
581 } // namespace
582 
583 float MLEvictAdvisor::getInitialQueueSize(const MachineFunction &MF) {
584   auto &MRI = MF.getRegInfo();
585   unsigned NumUsedRegs = 0;
586   for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
587     Register Reg = Register::index2VirtReg(I);
588     if (!MRI.reg_nodbg_empty(Reg))
589       ++NumUsedRegs;
590   }
591   return static_cast<float>(NumUsedRegs);
592 }
593 
594 MLEvictAdvisor::MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
595                                MLModelRunner *Runner,
596                                const MachineBlockFrequencyInfo &MBFI,
597                                const MachineLoopInfo &Loops)
598     : RegAllocEvictionAdvisor(MF, RA), DefaultAdvisor(MF, RA),
599       Runner(std::move(Runner)), MBFI(MBFI), Loops(Loops),
600       InitialQSize(MLEvictAdvisor::getInitialQueueSize(MF)) {
601   assert(this->Runner);
602   Runner->switchContext(MF.getName());
603   DoNotNormalize.set(FeatureIDs::mask);
604   DoNotNormalize.set(FeatureIDs::is_free);
605   DoNotNormalize.set(FeatureIDs::is_hint);
606   DoNotNormalize.set(FeatureIDs::is_local);
607   DoNotNormalize.set(FeatureIDs::min_stage);
608   DoNotNormalize.set(FeatureIDs::max_stage);
609   DoNotNormalize.set(FeatureIDs::progress);
610 }
611 
612 int64_t MLEvictAdvisor::tryFindEvictionCandidatePosition(
613     const LiveInterval &, const AllocationOrder &, unsigned, uint8_t,
614     const SmallVirtRegSet &) const {
615   int64_t Ret = Runner->evaluate<int64_t>();
616   assert(Ret >= 0);
617   assert(Ret <= CandidateVirtRegPos);
618   return Ret;
619 }
620 
621 bool MLEvictAdvisor::loadInterferenceFeatures(
622     const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
623     const SmallVirtRegSet &FixedRegisters,
624     llvm::SmallVectorImpl<float> &Largest, size_t Pos,
625     llvm::SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const {
626   // It is only possible to evict virtual register interference.
627   if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) {
628     // leave unavailable
629     return false;
630   }
631 
632   const bool IsLocal = LIS->intervalIsInOneMBB(VirtReg);
633   int64_t LocalIntfs = 0;
634   float NumUrgent = 0.0f;
635 
636   // The cascade tracking is the same as in the default advisor
637   unsigned Cascade = RA.getExtraInfo().getCascadeOrCurrentNext(VirtReg.reg());
638 
639   SmallVector<const LiveInterval *, MaxInterferences> InterferingIntervals;
640   for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
641     LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
642     // Different from the default heuristic, we don't make any assumptions
643     // about what having more than 10 results in the query may mean.
644     const auto &IFIntervals = Q.interferingVRegs(EvictInterferenceCutoff);
645     if (IFIntervals.empty() && InterferingIntervals.empty())
646       continue;
647     if (IFIntervals.size() >= EvictInterferenceCutoff)
648       return false;
649     InterferingIntervals.append(IFIntervals.begin(), IFIntervals.end());
650     for (const LiveInterval *Intf : reverse(IFIntervals)) {
651       assert(Intf->reg().isVirtual() &&
652              "Only expecting virtual register interference from query");
653       // This is the same set of legality checks as in the default case: don't
654       // try to evict fixed regs or 'done' ones. Also don't break cascades,
655       // except in the urgent case, with the same nuances used in the default
656       // heuristic.
657       // We could try sharing this between the advisors, but it may end up
658       // more complex than it is right now.
659       if (FixedRegisters.count(Intf->reg()))
660         return false;
661       if (RA.getExtraInfo().getStage(*Intf) == RS_Done)
662         return false;
663       bool Urgent =
664           !VirtReg.isSpillable() &&
665           (Intf->isSpillable() ||
666            RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg())) <
667                RegClassInfo.getNumAllocatableRegs(
668                    MRI->getRegClass(Intf->reg())));
669 
670       unsigned IntfCascade = RA.getExtraInfo().getCascade(Intf->reg());
671       // There is a potential that the model could be adversarial and
672       // continually evict live ranges over and over again, leading to a
673       // large amount of compile time being spent in regalloc. If we hit the
674       // threshold, prevent the range from being evicted. We still let the
675       // range through if it is urgent as we are required to produce an
676       // eviction if the candidate is not spillable.
677       if (getEvictionCount(Intf->reg()) > MaxEvictionCount && !Urgent)
678         return false;
679 
680       // Only evict older cascades or live ranges without a cascade.
681       if (Cascade <= IntfCascade) {
682         if (!Urgent)
683           return false;
684         ++NumUrgent;
685       }
686 
687       LocalIntfs += (IsLocal && LIS->intervalIsInOneMBB(*Intf) &&
688                      (!EnableLocalReassign || !canReassign(*Intf, PhysReg)));
689     }
690   }
691   // OK, so if we made it this far, this LR is an eviction candidate, load its
692   // features.
693   extractFeatures(InterferingIntervals, Largest, Pos, IsHint, LocalIntfs,
694                   NumUrgent, LRPosInfo);
695   return true;
696 }
697 
698 MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
699     const LiveInterval &VirtReg, const AllocationOrder &Order,
700     uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const {
701   auto MaybeOrderLimit = getOrderLimit(VirtReg, Order, CostPerUseLimit);
702   if (!MaybeOrderLimit)
703     return MCRegister::NoRegister;
704   unsigned OrderLimit = *MaybeOrderLimit;
705 
706   // The heuristic sets initial costs such as, if CostPerUseLimit is
707   // max<uint8_t>, then any of the costs of the legally-evictable intervals
708   // would be lower. When that happens, one of those will be selected.
709   // Therefore, we allow the candidate be selected, unless the candidate is
710   // unspillable, in which case it would be incorrect to not find a register
711   // for it.
712   const bool MustFindEviction =
713       (!VirtReg.isSpillable() && CostPerUseLimit == static_cast<uint8_t>(~0u));
714   // Number of available candidates - if 0, no need to continue.
715   size_t Available = 0;
716   // Make sure we don't have leftover partial state from an attempt where we
717   // had no available candidates and bailed out early.
718   resetInputs(*Runner);
719 
720   // Track the index->register mapping because AllocationOrder doesn't do that
721   // and we'd have to scan it.
722   // Also track their mask, to write asserts/debug.
723   CandidateRegList Regs;
724   Regs.fill({0, false});
725 
726   // Track the largest value of features seen during this eviction session. We
727   // only normalize (some of) the float features, but it's just simpler to
728   // dimension 'Largest' to all the features, especially since we have the
729   // 'DoNotNormalize' list.
730   FeaturesListNormalizer Largest(FeatureIDs::FeatureCount, 0.0);
731 
732   // Same overal idea as in the default eviction policy - we visit the values
733   // of AllocationOrder one at a time. If it's not legally available, we mask
734   // off the corresponding feature column (==do nothing because we already
735   // reset all the features to 0) Use Pos to capture the column we load
736   // features at - in AllocationOrder order.
737   size_t Pos = 0;
738   SmallVector<LRStartEndInfo, NumberOfInterferences> LRPosInfo;
739   for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E;
740        ++I, ++Pos) {
741     MCRegister PhysReg = *I;
742     assert(!Regs[Pos].second);
743     assert(PhysReg);
744     if (!canAllocatePhysReg(CostPerUseLimit, PhysReg)) {
745       continue;
746     }
747     if (loadInterferenceFeatures(VirtReg, PhysReg, I.isHint(), FixedRegisters,
748                                  Largest, Pos, LRPosInfo)) {
749       ++Available;
750       Regs[Pos] = std::make_pair(PhysReg, true);
751     }
752   }
753   if (Available == 0) {
754     // Nothing to decide, nothing to learn.
755     assert(!MustFindEviction);
756     return MCRegister::NoRegister;
757   }
758   const size_t ValidPosLimit = Pos;
759   // If we must find eviction, the candidate should be masked out of the
760   // decision making process.
761   Regs[CandidateVirtRegPos].second = !MustFindEviction;
762   if (!MustFindEviction)
763     extractFeatures(SmallVector<const LiveInterval *, 1>(1, &VirtReg), Largest,
764                     CandidateVirtRegPos, /*IsHint*/ 0,
765                     /*LocalIntfsCount*/ 0,
766                     /*NumUrgent*/ 0.0, LRPosInfo);
767   assert(InitialQSize > 0.0 && "We couldn't have gotten here if we had "
768                                "nothing to allocate initially.");
769 #ifdef LLVM_HAVE_TFLITE
770   if (EnableDevelopmentFeatures) {
771     extractInstructionFeatures(
772         LRPosInfo, Runner,
773         [this](SlotIndex InputIndex) -> int {
774           auto *CurrentMachineInstruction =
775               LIS->getInstructionFromIndex(InputIndex);
776           if (!CurrentMachineInstruction) {
777             return -1;
778           }
779           return CurrentMachineInstruction->getOpcode();
780         },
781         [this](SlotIndex InputIndex) -> float {
782           auto *CurrentMachineInstruction =
783               LIS->getInstructionFromIndex(InputIndex);
784           return MBFI.getBlockFreqRelativeToEntryBlock(
785               CurrentMachineInstruction->getParent());
786         },
787         [this](SlotIndex InputIndex) -> MachineBasicBlock * {
788           auto *CurrentMachineInstruction =
789               LIS->getInstructionFromIndex(InputIndex);
790           return CurrentMachineInstruction->getParent();
791         },
792         FeatureIDs::instructions, FeatureIDs::instructions_mapping,
793         FeatureIDs::mbb_frequencies, FeatureIDs::mbb_mapping,
794         LIS->getSlotIndexes()->getLastIndex());
795   }
796 #endif // #ifdef LLVM_HAVE_TFLITE
797   // Normalize the features.
798   for (auto &V : Largest)
799     V = V ? V : 1.0;
800   for (size_t FeatureIndex = 0; FeatureIndex < FeatureIDs::FeatureCount;
801        ++FeatureIndex) {
802     if (DoNotNormalize.test(FeatureIndex))
803       continue;
804     for (size_t Pos = 0; Pos < NumberOfInterferences; ++Pos) {
805       Runner->getTensor<float>(FeatureIndex)[Pos] /= Largest[FeatureIndex];
806     }
807   }
808   *Runner->getTensor<float>(FeatureIDs::progress) =
809       static_cast<float>(RA.getQueueSize()) / InitialQSize;
810 
811   // Get a decision.
812   size_t CandidatePos = tryFindEvictionCandidatePosition(
813       VirtReg, Order, OrderLimit, CostPerUseLimit, FixedRegisters);
814   // The contract with the ML side is that CandidatePos is mask == 1 (i.e.
815   // Regs[CandidatePos].second)
816   assert(Regs[CandidatePos].second);
817   if (CandidatePos == CandidateVirtRegPos) {
818     assert(!MustFindEviction);
819     return MCRegister::NoRegister;
820   }
821   assert(CandidatePos < ValidPosLimit);
822   (void)ValidPosLimit;
823 
824   // Update information about how many times the virtual registers being
825   // evicted have been evicted so that we can prevent the model from evicting
826   // the same ranges continually and eating compile time.
827   if (CandidatePos == CandidateVirtRegPos) {
828     onEviction(VirtReg.reg());
829   } else {
830     for (MCRegUnit Unit : TRI->regunits(Regs[CandidatePos].first)) {
831       LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
832       const auto &IFIntervals = Q.interferingVRegs(EvictInterferenceCutoff);
833       for (const LiveInterval *Intf : reverse(IFIntervals)) {
834         onEviction(Intf->reg());
835       }
836     }
837   }
838 
839   return Regs[CandidatePos].first;
840 }
841 
842 const LIFeatureComponents &
843 MLEvictAdvisor::getLIFeatureComponents(const LiveInterval &LI) const {
844   RegID ID = LI.reg().id();
845   LIFeatureComponents Empty;
846   auto I = CachedFeatures.insert(std::make_pair(ID, Empty));
847   LIFeatureComponents &Ret = I.first->getSecond();
848   if (!I.second)
849     return Ret;
850 
851   SmallPtrSet<MachineInstr *, 8> Visited;
852   const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
853 
854   for (MachineRegisterInfo::reg_instr_nodbg_iterator
855            I = MRI->reg_instr_nodbg_begin(LI.reg()),
856            E = MRI->reg_instr_nodbg_end();
857        I != E;) {
858     MachineInstr *MI = &*(I++);
859 
860     ++Ret.NumDefsAndUses;
861     if (!Visited.insert(MI).second)
862       continue;
863 
864     if (MI->isIdentityCopy() || MI->isImplicitDef())
865       continue;
866 
867     bool Reads, Writes;
868     std::tie(Reads, Writes) = MI->readsWritesVirtualRegister(LI.reg());
869 
870     float Freq = MBFI.getBlockFreqRelativeToEntryBlock(MI->getParent());
871     Ret.HottestBlockFreq = std::max(Freq, Ret.HottestBlockFreq);
872 
873     Ret.R += (Reads && !Writes) * Freq;
874     Ret.W += (!Reads && Writes) * Freq;
875     Ret.RW += (Reads && Writes) * Freq;
876 
877     auto *MBB = MI->getParent();
878     auto *Loop = Loops.getLoopFor(MBB);
879     bool IsExiting = Loop ? Loop->isLoopExiting(MBB) : false;
880 
881     if (Writes && IsExiting && LIS->isLiveOutOfMBB(LI, MBB))
882       Ret.IndVarUpdates += Freq;
883 
884     if (MI->isCopy() && VirtRegAuxInfo::copyHint(MI, LI.reg(), TRI, *MRI))
885       Ret.HintWeights += Freq;
886   }
887   Ret.IsRemat = VirtRegAuxInfo::isRematerializable(
888       LI, *LIS, *VRM, *MF.getSubtarget().getInstrInfo());
889   return Ret;
890 }
891 
892 // Overall, this currently mimics what we do for weight calculation, but instead
893 // of accummulating the various features, we keep them separate.
894 void MLEvictAdvisor::extractFeatures(
895     const SmallVectorImpl<const LiveInterval *> &Intervals,
896     llvm::SmallVectorImpl<float> &Largest, size_t Pos, int64_t IsHint,
897     int64_t LocalIntfsCount, float NumUrgent,
898     SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const {
899   int64_t NumDefsAndUses = 0;
900   int64_t NumBrokenHints = 0;
901   double R = 0.0;
902   double W = 0.0;
903   double RW = 0.0;
904   double IndVarUpdates = 0.0;
905   double HintWeights = 0.0;
906   float StartBBFreq = 0.0;
907   float EndBBFreq = 0.0;
908   float HottestBlockFreq = 0.0;
909   int32_t NumRematerializable = 0;
910   float TotalWeight = 0.0;
911 
912   SlotIndex EndSI = LIS->getSlotIndexes()->getZeroIndex();
913   SlotIndex StartSI = LIS->getSlotIndexes()->getLastIndex();
914   int64_t MaxStage = 0;
915   int64_t MinStage =
916       Intervals.empty() ? 0 : std::numeric_limits<int64_t>::max();
917 
918   for (const auto *L : Intervals) {
919     const LiveInterval &LI = *L;
920     MaxStage = std::max<int64_t>(
921         MaxStage, static_cast<int64_t>(RA.getExtraInfo().getStage(LI)));
922     MinStage = std::min<int64_t>(
923         MinStage, static_cast<int64_t>(RA.getExtraInfo().getStage(LI)));
924 
925     TotalWeight = std::max(TotalWeight, LI.weight());
926 
927     if (LI.beginIndex() < StartSI)
928       StartSI = LI.beginIndex();
929 
930     if (LI.endIndex() > EndSI)
931       EndSI = LI.endIndex();
932     const LIFeatureComponents &LIFC = getLIFeatureComponents(LI);
933     NumBrokenHints += VRM->hasPreferredPhys(LI.reg());
934 
935     NumDefsAndUses += LIFC.NumDefsAndUses;
936     HottestBlockFreq = std::max(HottestBlockFreq, LIFC.HottestBlockFreq);
937     R += LIFC.R;
938     W += LIFC.W;
939     RW += LIFC.RW;
940 
941     IndVarUpdates += LIFC.IndVarUpdates;
942 
943     HintWeights += LIFC.HintWeights;
944     NumRematerializable += LIFC.IsRemat;
945 
946     if (EnableDevelopmentFeatures) {
947       for (auto CurrentSegment : LI) {
948         LRPosInfo.push_back(
949             LRStartEndInfo{CurrentSegment.start, CurrentSegment.end, Pos});
950       }
951     }
952   }
953   size_t Size = 0;
954   if (!Intervals.empty()) {
955     StartBBFreq =
956         MBFI.getBlockFreqRelativeToEntryBlock(LIS->getMBBFromIndex(StartSI));
957     if (EndSI >= LIS->getSlotIndexes()->getLastIndex())
958       EndSI = LIS->getSlotIndexes()->getLastIndex().getPrevIndex();
959     EndBBFreq =
960         MBFI.getBlockFreqRelativeToEntryBlock(LIS->getMBBFromIndex(EndSI));
961     Size = StartSI.distance(EndSI);
962   }
963   // Set the features at the column 'Pos'.
964 #define SET(ID, TYPE, VAL)                                                     \
965   do {                                                                         \
966     Runner->getTensor<TYPE>(FeatureIDs::ID)[Pos] = static_cast<TYPE>(VAL);     \
967     if (!DoNotNormalize.test(FeatureIDs::ID))                                  \
968       Largest[FeatureIDs::ID] =                                                \
969           std::max(Largest[FeatureIDs::ID], static_cast<float>(VAL));          \
970   } while (false)
971   SET(mask, int64_t, 1);
972   SET(is_free, int64_t, Intervals.empty());
973   SET(nr_urgent, float, NumUrgent);
974   SET(nr_broken_hints, float, NumBrokenHints);
975   SET(is_hint, int64_t, IsHint);
976   SET(is_local, int64_t, LocalIntfsCount);
977   SET(nr_rematerializable, float, NumRematerializable);
978   SET(nr_defs_and_uses, float, NumDefsAndUses);
979   SET(weighed_reads_by_max, float, R);
980   SET(weighed_writes_by_max, float, W);
981   SET(weighed_read_writes_by_max, float, RW);
982   SET(weighed_indvars_by_max, float, IndVarUpdates);
983   SET(hint_weights_by_max, float, HintWeights);
984   SET(start_bb_freq_by_max, float, StartBBFreq);
985   SET(end_bb_freq_by_max, float, EndBBFreq);
986   SET(hottest_bb_freq_by_max, float, HottestBlockFreq);
987   SET(liverange_size, float, Size);
988   SET(use_def_density, float, TotalWeight);
989   SET(max_stage, int64_t, MaxStage);
990   SET(min_stage, int64_t, MinStage);
991 #undef SET
992 }
993 
994 void llvm::extractInstructionFeatures(
995     SmallVectorImpl<LRStartEndInfo> &LRPosInfo, MLModelRunner *RegallocRunner,
996     function_ref<int(SlotIndex)> GetOpcode,
997     function_ref<float(SlotIndex)> GetMBBFreq,
998     function_ref<MachineBasicBlock *(SlotIndex)> GetMBBReference,
999     const int InstructionsIndex, const int InstructionsMappingIndex,
1000     const int MBBFreqIndex, const int MBBMappingIndex,
1001     const SlotIndex LastIndex) {
1002   // This function extracts instruction based features relevant to the eviction
1003   // problem currently being solved. This function ends up extracting two
1004   // tensors.
1005   // 1 - A vector of size max instruction count. It contains the opcodes of the
1006   // instructions spanned by all the intervals in the current instance of the
1007   // eviction problem.
1008   // 2 - A binary mapping matrix of size (LR count * max
1009   // instruction count) which maps where the LRs are live to the actual opcodes
1010   // for which they are live.
1011   // 3 - A vector of size max supported MBB count storing MBB frequencies,
1012   // encompassing all of the MBBs covered by the eviction problem.
1013   // 4 - A vector of size max instruction count of indices to members of the MBB
1014   // frequency vector, mapping each instruction to its associated MBB.
1015 
1016   // Start off by sorting the segments based on the beginning slot index.
1017   std::sort(
1018       LRPosInfo.begin(), LRPosInfo.end(),
1019       [](LRStartEndInfo A, LRStartEndInfo B) { return A.Begin < B.Begin; });
1020   size_t InstructionIndex = 0;
1021   size_t CurrentSegmentIndex = 0;
1022   SlotIndex CurrentIndex = LRPosInfo[0].Begin;
1023   std::map<MachineBasicBlock *, size_t> VisitedMBBs;
1024   size_t CurrentMBBIndex = 0;
1025   // This loop processes all the segments sequentially by starting at the
1026   // beginning slot index of the first segment, iterating through all the slot
1027   // indices before the end slot index of that segment (while checking for
1028   // overlaps with segments that start at greater slot indices). After hitting
1029   // that end index, the current segment being processed gets bumped until they
1030   // are all processed or the max instruction count is hit, where everything is
1031   // just truncated.
1032   while (true) {
1033     // If the index that we are currently at is within the current segment and
1034     // we haven't hit the max instruction count, continue processing the current
1035     // segment.
1036     while (CurrentIndex <= LRPosInfo[CurrentSegmentIndex].End &&
1037            InstructionIndex < ModelMaxSupportedInstructionCount) {
1038       int CurrentOpcode = GetOpcode(CurrentIndex);
1039       // If the current machine instruction is null, skip it
1040       if (CurrentOpcode == -1) {
1041         // If we're currently at the last index in the SlotIndex analysis,
1042         // we can't go any further, so return from the function
1043         if (CurrentIndex >= LastIndex) {
1044           return;
1045         }
1046         CurrentIndex = CurrentIndex.getNextIndex();
1047         continue;
1048       }
1049       MachineBasicBlock *CurrentMBBReference = GetMBBReference(CurrentIndex);
1050       if (VisitedMBBs.count(CurrentMBBReference) == 0) {
1051         VisitedMBBs[CurrentMBBReference] = CurrentMBBIndex;
1052         ++CurrentMBBIndex;
1053       }
1054       extractMBBFrequency(CurrentIndex, InstructionIndex, VisitedMBBs,
1055                           GetMBBFreq, CurrentMBBReference, RegallocRunner,
1056                           MBBFreqIndex, MBBMappingIndex);
1057       // Current code assumes we're not going to get any disjointed segments
1058       assert(LRPosInfo[CurrentSegmentIndex].Begin <= CurrentIndex);
1059       RegallocRunner->getTensor<int64_t>(InstructionsIndex)[InstructionIndex] =
1060           CurrentOpcode < OpcodeValueCutoff ? CurrentOpcode : 0;
1061       // set value in the binary mapping matrix for the current instruction
1062       auto CurrentSegmentPosition = LRPosInfo[CurrentSegmentIndex].Pos;
1063       RegallocRunner->getTensor<int64_t>(
1064           InstructionsMappingIndex)[CurrentSegmentPosition *
1065                                         ModelMaxSupportedInstructionCount +
1066                                     InstructionIndex] = 1;
1067       // All of the segments are sorted based on the beginning slot index, but
1068       // this doesn't mean that the beginning slot index of the next segment is
1069       // after the end segment of the one being currently processed. This while
1070       // loop checks for overlapping segments and modifies the portion of the
1071       // column in the mapping matrix for the currently processed instruction
1072       // for the LR it is checking. Also make sure that the beginning of the
1073       // current segment we're checking for overlap in is less than the current
1074       // index, otherwise we're done checking overlaps.
1075       size_t OverlapCheckCurrentSegment = CurrentSegmentIndex + 1;
1076       while (OverlapCheckCurrentSegment < LRPosInfo.size() &&
1077              LRPosInfo[OverlapCheckCurrentSegment].Begin <= CurrentIndex) {
1078         auto OverlapCurrentSegmentPosition =
1079             LRPosInfo[OverlapCheckCurrentSegment].Pos;
1080         if (LRPosInfo[OverlapCheckCurrentSegment].End >= CurrentIndex) {
1081           RegallocRunner->getTensor<int64_t>(
1082               InstructionsMappingIndex)[OverlapCurrentSegmentPosition *
1083                                             ModelMaxSupportedInstructionCount +
1084                                         InstructionIndex] = 1;
1085         }
1086         ++OverlapCheckCurrentSegment;
1087       }
1088       ++InstructionIndex;
1089       if (CurrentIndex >= LastIndex) {
1090         return;
1091       }
1092       CurrentIndex = CurrentIndex.getNextIndex();
1093     }
1094     // if we've just finished processing through the last segment or if we've
1095     // hit the maximum number of instructions, break out of the loop.
1096     if (CurrentSegmentIndex == LRPosInfo.size() - 1 ||
1097         InstructionIndex >= ModelMaxSupportedInstructionCount) {
1098       break;
1099     }
1100     // If the segments are not overlapping, we need to move to the beginning
1101     // index of the next segment to avoid having instructions not attached to
1102     // any register.
1103     if (LRPosInfo[CurrentSegmentIndex + 1].Begin >
1104         LRPosInfo[CurrentSegmentIndex].End) {
1105       CurrentIndex = LRPosInfo[CurrentSegmentIndex + 1].Begin;
1106     }
1107     ++CurrentSegmentIndex;
1108   }
1109 }
1110 
1111 void llvm::extractMBBFrequency(
1112     const SlotIndex CurrentIndex, const size_t CurrentInstructionIndex,
1113     std::map<MachineBasicBlock *, size_t> &VisitedMBBs,
1114     function_ref<float(SlotIndex)> GetMBBFreq,
1115     MachineBasicBlock *CurrentMBBReference, MLModelRunner *RegallocRunner,
1116     const int MBBFreqIndex, const int MBBMappingIndex) {
1117   size_t CurrentMBBIndex = VisitedMBBs[CurrentMBBReference];
1118   float CurrentMBBFreq = GetMBBFreq(CurrentIndex);
1119   if (CurrentMBBIndex < ModelMaxSupportedMBBCount) {
1120     RegallocRunner->getTensor<float>(MBBFreqIndex)[CurrentMBBIndex] =
1121         CurrentMBBFreq;
1122     RegallocRunner->getTensor<int64_t>(
1123         MBBMappingIndex)[CurrentInstructionIndex] = CurrentMBBIndex;
1124   }
1125 }
1126 
1127 // Development mode-specific implementations
1128 #ifdef LLVM_HAVE_TFLITE
1129 
1130 RegAllocEvictionAdvisorAnalysis *llvm::createDevelopmentModeAdvisor() {
1131   return new DevelopmentModeEvictionAdvisorAnalysis();
1132 }
1133 
1134 int64_t DevelopmentModeEvictAdvisor::tryFindEvictionCandidatePosition(
1135     const LiveInterval &VirtReg, const AllocationOrder &Order,
1136     unsigned OrderLimit, uint8_t CostPerUseLimit,
1137     const SmallVirtRegSet &FixedRegisters) const {
1138   int64_t Ret = 0;
1139   if (isa<ModelUnderTrainingRunner>(getRunner())) {
1140     Ret = MLEvictAdvisor::tryFindEvictionCandidatePosition(
1141         VirtReg, Order, OrderLimit, CostPerUseLimit, FixedRegisters);
1142   } else {
1143     MCRegister PhysReg = getDefaultAdvisor().tryFindEvictionCandidate(
1144         VirtReg, Order, CostPerUseLimit, FixedRegisters);
1145     // Find the index of the selected PhysReg. We need it for logging,
1146     // otherwise this is wasted cycles (but so would starting development mode
1147     // without a model nor logging)
1148     if (!PhysReg)
1149       Ret = CandidateVirtRegPos;
1150     else
1151       for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit);
1152            I != E; ++I, ++Ret)
1153         if (*I == PhysReg)
1154           break;
1155   }
1156   if (TrainingLog.empty())
1157     return Ret;
1158   // TODO(mtrofin): when we support optional rewards, this can go away. In the
1159   // meantime, we log the "pretend" reward (0) for the previous observation
1160   // before starting a new one.
1161   if (Log->hasObservationInProgress())
1162     Log->logReward<float>(0.0);
1163 
1164   Log->startObservation();
1165   size_t CurrentFeature = 0;
1166   size_t FeatureCount = EnableDevelopmentFeatures
1167                             ? FeatureIDs::FeaturesWithDevelopmentCount
1168                             : FeatureIDs::FeatureCount;
1169   for (; CurrentFeature < FeatureCount; ++CurrentFeature) {
1170     Log->logTensorValue(CurrentFeature,
1171                         reinterpret_cast<const char *>(
1172                             getRunner().getTensorUntyped(CurrentFeature)));
1173   }
1174   if (auto *MUTR = dyn_cast<ModelUnderTrainingRunner>(&getRunner()))
1175     for (size_t I = 0; I < MUTR->extraOutputsForLoggingSpecs().size();
1176          ++I, ++CurrentFeature)
1177       Log->logTensorValue(
1178           CurrentFeature,
1179           reinterpret_cast<const char *>(MUTR->getUntypedExtraOutputValue(I)));
1180   // The output is right after the features and the extra outputs
1181   Log->logTensorValue(CurrentFeature, reinterpret_cast<const char *>(&Ret));
1182   Log->endObservation();
1183   return Ret;
1184 }
1185 
1186 bool RegAllocScoring::runOnMachineFunction(MachineFunction &MF) {
1187   std::optional<float> CachedReward;
1188   auto GetReward = [&]() {
1189     if (!CachedReward)
1190       CachedReward = static_cast<float>(
1191           calculateRegAllocScore(
1192               MF, getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI())
1193               .getScore());
1194     return *CachedReward;
1195   };
1196 
1197   getAnalysis<RegAllocEvictionAdvisorAnalysis>().logRewardIfNeeded(MF,
1198                                                                    GetReward);
1199   getAnalysis<RegAllocPriorityAdvisorAnalysis>().logRewardIfNeeded(MF,
1200                                                                    GetReward);
1201   return false;
1202 }
1203 #endif // #ifdef LLVM_HAVE_TFLITE
1204 
1205 RegAllocEvictionAdvisorAnalysis *llvm::createReleaseModeAdvisor() {
1206   return llvm::isEmbeddedModelEvaluatorValid<CompiledModelType>() ||
1207                  !InteractiveChannelBaseName.empty()
1208              ? new ReleaseModeEvictionAdvisorAnalysis()
1209              : nullptr;
1210 }
1211 
1212 // In all cases except development mode, we don't need scoring.
1213 #if !defined(LLVM_HAVE_TFLITE)
1214 bool RegAllocScoring::runOnMachineFunction(MachineFunction &) { return false; }
1215 #endif
1216