xref: /llvm-project/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp (revision e48916f615e0ad2b994b2b785d4fe1b8a98bc322)
1 //===- ARMConstantIslandPass.cpp - ARM constant islands -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that splits the constant pool up into 'islands'
10 // which are scattered through-out the function.  This is required due to the
11 // limited pc-relative displacements that ARM has.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ARM.h"
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBasicBlockInfo.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMSubtarget.h"
20 #include "MCTargetDesc/ARMBaseInfo.h"
21 #include "MVETailPredUtils.h"
22 #include "Thumb2InstrInfo.h"
23 #include "Utils/ARMBaseInfo.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/ADT/StringRef.h"
30 #include "llvm/CodeGen/LivePhysRegs.h"
31 #include "llvm/CodeGen/MachineBasicBlock.h"
32 #include "llvm/CodeGen/MachineConstantPool.h"
33 #include "llvm/CodeGen/MachineDominators.h"
34 #include "llvm/CodeGen/MachineFunction.h"
35 #include "llvm/CodeGen/MachineFunctionPass.h"
36 #include "llvm/CodeGen/MachineInstr.h"
37 #include "llvm/CodeGen/MachineJumpTableInfo.h"
38 #include "llvm/CodeGen/MachineOperand.h"
39 #include "llvm/CodeGen/MachineRegisterInfo.h"
40 #include "llvm/Config/llvm-config.h"
41 #include "llvm/IR/DataLayout.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/MC/MCInstrDesc.h"
44 #include "llvm/Pass.h"
45 #include "llvm/Support/CommandLine.h"
46 #include "llvm/Support/Compiler.h"
47 #include "llvm/Support/Debug.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/Format.h"
50 #include "llvm/Support/raw_ostream.h"
51 #include <algorithm>
52 #include <cassert>
53 #include <cstdint>
54 #include <iterator>
55 #include <utility>
56 #include <vector>
57 
58 using namespace llvm;
59 
60 #define DEBUG_TYPE "arm-cp-islands"
61 
62 #define ARM_CP_ISLANDS_OPT_NAME \
63   "ARM constant island placement and branch shortening pass"
64 STATISTIC(NumCPEs,       "Number of constpool entries");
65 STATISTIC(NumSplit,      "Number of uncond branches inserted");
66 STATISTIC(NumCBrFixed,   "Number of cond branches fixed");
67 STATISTIC(NumUBrFixed,   "Number of uncond branches fixed");
68 STATISTIC(NumTBs,        "Number of table branches generated");
69 STATISTIC(NumT2CPShrunk, "Number of Thumb2 constantpool instructions shrunk");
70 STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk");
71 STATISTIC(NumCBZ,        "Number of CBZ / CBNZ formed");
72 STATISTIC(NumJTMoved,    "Number of jump table destination blocks moved");
73 STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted");
74 STATISTIC(NumLEInserted, "Number of LE backwards branches inserted");
75 
76 static cl::opt<bool>
77 AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
78           cl::desc("Adjust basic block layout to better use TB[BH]"));
79 
80 static cl::opt<unsigned>
81 CPMaxIteration("arm-constant-island-max-iteration", cl::Hidden, cl::init(30),
82           cl::desc("The max number of iteration for converge"));
83 
84 static cl::opt<bool> SynthesizeThumb1TBB(
85     "arm-synthesize-thumb-1-tbb", cl::Hidden, cl::init(true),
86     cl::desc("Use compressed jump tables in Thumb-1 by synthesizing an "
87              "equivalent to the TBB/TBH instructions"));
88 
89 namespace {
90 
91   /// ARMConstantIslands - Due to limited PC-relative displacements, ARM
92   /// requires constant pool entries to be scattered among the instructions
93   /// inside a function.  To do this, it completely ignores the normal LLVM
94   /// constant pool; instead, it places constants wherever it feels like with
95   /// special instructions.
96   ///
97   /// The terminology used in this pass includes:
98   ///   Islands - Clumps of constants placed in the function.
99   ///   Water   - Potential places where an island could be formed.
100   ///   CPE     - A constant pool entry that has been placed somewhere, which
101   ///             tracks a list of users.
102   class ARMConstantIslands : public MachineFunctionPass {
103     std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
104 
105     /// WaterList - A sorted list of basic blocks where islands could be placed
106     /// (i.e. blocks that don't fall through to the following block, due
107     /// to a return, unreachable, or unconditional branch).
108     std::vector<MachineBasicBlock*> WaterList;
109 
110     /// NewWaterList - The subset of WaterList that was created since the
111     /// previous iteration by inserting unconditional branches.
112     SmallSet<MachineBasicBlock*, 4> NewWaterList;
113 
114     using water_iterator = std::vector<MachineBasicBlock *>::iterator;
115 
116     /// CPUser - One user of a constant pool, keeping the machine instruction
117     /// pointer, the constant pool being referenced, and the max displacement
118     /// allowed from the instruction to the CP.  The HighWaterMark records the
119     /// highest basic block where a new CPEntry can be placed.  To ensure this
120     /// pass terminates, the CP entries are initially placed at the end of the
121     /// function and then move monotonically to lower addresses.  The
122     /// exception to this rule is when the current CP entry for a particular
123     /// CPUser is out of range, but there is another CP entry for the same
124     /// constant value in range.  We want to use the existing in-range CP
125     /// entry, but if it later moves out of range, the search for new water
126     /// should resume where it left off.  The HighWaterMark is used to record
127     /// that point.
128     struct CPUser {
129       MachineInstr *MI;
130       MachineInstr *CPEMI;
131       MachineBasicBlock *HighWaterMark;
132       unsigned MaxDisp;
133       bool NegOk;
134       bool IsSoImm;
135       bool KnownAlignment = false;
136 
137       CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp,
138              bool neg, bool soimm)
139         : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm) {
140         HighWaterMark = CPEMI->getParent();
141       }
142 
143       /// getMaxDisp - Returns the maximum displacement supported by MI.
144       /// Correct for unknown alignment.
145       /// Conservatively subtract 2 bytes to handle weird alignment effects.
146       unsigned getMaxDisp() const {
147         return (KnownAlignment ? MaxDisp : MaxDisp - 2) - 2;
148       }
149     };
150 
151     /// CPUsers - Keep track of all of the machine instructions that use various
152     /// constant pools and their max displacement.
153     std::vector<CPUser> CPUsers;
154 
155     /// CPEntry - One per constant pool entry, keeping the machine instruction
156     /// pointer, the constpool index, and the number of CPUser's which
157     /// reference this entry.
158     struct CPEntry {
159       MachineInstr *CPEMI;
160       unsigned CPI;
161       unsigned RefCount;
162 
163       CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0)
164         : CPEMI(cpemi), CPI(cpi), RefCount(rc) {}
165     };
166 
167     /// CPEntries - Keep track of all of the constant pool entry machine
168     /// instructions. For each original constpool index (i.e. those that existed
169     /// upon entry to this pass), it keeps a vector of entries.  Original
170     /// elements are cloned as we go along; the clones are put in the vector of
171     /// the original element, but have distinct CPIs.
172     ///
173     /// The first half of CPEntries contains generic constants, the second half
174     /// contains jump tables. Use getCombinedIndex on a generic CPEMI to look up
175     /// which vector it will be in here.
176     std::vector<std::vector<CPEntry>> CPEntries;
177 
178     /// Maps a JT index to the offset in CPEntries containing copies of that
179     /// table. The equivalent map for a CONSTPOOL_ENTRY is the identity.
180     DenseMap<int, int> JumpTableEntryIndices;
181 
182     /// Maps a JT index to the LEA that actually uses the index to calculate its
183     /// base address.
184     DenseMap<int, int> JumpTableUserIndices;
185 
186     // Maps a MachineBasicBlock to the number of jump tables entries.
187     DenseMap<const MachineBasicBlock *, int> BlockJumpTableRefCount;
188 
189     /// ImmBranch - One per immediate branch, keeping the machine instruction
190     /// pointer, conditional or unconditional, the max displacement,
191     /// and (if isCond is true) the corresponding unconditional branch
192     /// opcode.
193     struct ImmBranch {
194       MachineInstr *MI;
195       unsigned MaxDisp : 31;
196       bool isCond : 1;
197       unsigned UncondBr;
198 
199       ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, unsigned ubr)
200         : MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {}
201     };
202 
203     /// ImmBranches - Keep track of all the immediate branch instructions.
204     std::vector<ImmBranch> ImmBranches;
205 
206     /// PushPopMIs - Keep track of all the Thumb push / pop instructions.
207     SmallVector<MachineInstr*, 4> PushPopMIs;
208 
209     /// T2JumpTables - Keep track of all the Thumb2 jumptable instructions.
210     SmallVector<MachineInstr*, 4> T2JumpTables;
211 
212     MachineFunction *MF;
213     MachineConstantPool *MCP;
214     const ARMBaseInstrInfo *TII;
215     const ARMSubtarget *STI;
216     ARMFunctionInfo *AFI;
217     MachineDominatorTree *DT = nullptr;
218     bool isThumb;
219     bool isThumb1;
220     bool isThumb2;
221     bool isPositionIndependentOrROPI;
222 
223   public:
224     static char ID;
225 
226     ARMConstantIslands() : MachineFunctionPass(ID) {}
227 
228     bool runOnMachineFunction(MachineFunction &MF) override;
229 
230     void getAnalysisUsage(AnalysisUsage &AU) const override {
231       AU.addRequired<MachineDominatorTreeWrapperPass>();
232       MachineFunctionPass::getAnalysisUsage(AU);
233     }
234 
235     MachineFunctionProperties getRequiredProperties() const override {
236       return MachineFunctionProperties().set(
237           MachineFunctionProperties::Property::NoVRegs);
238     }
239 
240     StringRef getPassName() const override {
241       return ARM_CP_ISLANDS_OPT_NAME;
242     }
243 
244   private:
245     void doInitialConstPlacement(std::vector<MachineInstr *> &CPEMIs);
246     void doInitialJumpTablePlacement(std::vector<MachineInstr *> &CPEMIs);
247     bool BBHasFallthrough(MachineBasicBlock *MBB);
248     CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
249     Align getCPEAlign(const MachineInstr *CPEMI);
250     void scanFunctionJumpTables();
251     void initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs);
252     MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
253     void updateForInsertedWaterBlock(MachineBasicBlock *NewBB);
254     bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI);
255     unsigned getCombinedIndex(const MachineInstr *CPEMI);
256     int findInRangeCPEntry(CPUser& U, unsigned UserOffset);
257     bool findAvailableWater(CPUser&U, unsigned UserOffset,
258                             water_iterator &WaterIter, bool CloserWater);
259     void createNewWater(unsigned CPUserIndex, unsigned UserOffset,
260                         MachineBasicBlock *&NewMBB);
261     bool handleConstantPoolUser(unsigned CPUserIndex, bool CloserWater);
262     void removeDeadCPEMI(MachineInstr *CPEMI);
263     bool removeUnusedCPEntries();
264     bool isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
265                           MachineInstr *CPEMI, unsigned Disp, bool NegOk,
266                           bool DoDump = false);
267     bool isWaterInRange(unsigned UserOffset, MachineBasicBlock *Water,
268                         CPUser &U, unsigned &Growth);
269     bool fixupImmediateBr(ImmBranch &Br);
270     bool fixupConditionalBr(ImmBranch &Br);
271     bool fixupUnconditionalBr(ImmBranch &Br);
272     bool optimizeThumb2Instructions();
273     bool optimizeThumb2Branches();
274     bool reorderThumb2JumpTables();
275     bool preserveBaseRegister(MachineInstr *JumpMI, MachineInstr *LEAMI,
276                               unsigned &DeadSize, bool &CanDeleteLEA,
277                               bool &BaseRegKill);
278     bool optimizeThumb2JumpTables();
279     MachineBasicBlock *adjustJTTargetBlockForward(unsigned JTI,
280                                                   MachineBasicBlock *BB,
281                                                   MachineBasicBlock *JTBB);
282 
283     unsigned getUserOffset(CPUser&) const;
284     void dumpBBs();
285     void verify();
286 
287     bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
288                          unsigned Disp, bool NegativeOK, bool IsSoImm = false);
289     bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
290                          const CPUser &U) {
291       return isOffsetInRange(UserOffset, TrialOffset,
292                              U.getMaxDisp(), U.NegOk, U.IsSoImm);
293     }
294   };
295 
296 } // end anonymous namespace
297 
298 char ARMConstantIslands::ID = 0;
299 
300 /// verify - check BBOffsets, BBSizes, alignment of islands
301 void ARMConstantIslands::verify() {
302 #ifndef NDEBUG
303   BBInfoVector &BBInfo = BBUtils->getBBInfo();
304   assert(is_sorted(*MF, [&BBInfo](const MachineBasicBlock &LHS,
305                                   const MachineBasicBlock &RHS) {
306     return BBInfo[LHS.getNumber()].postOffset() <
307            BBInfo[RHS.getNumber()].postOffset();
308   }));
309   LLVM_DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n");
310   for (CPUser &U : CPUsers) {
311     unsigned UserOffset = getUserOffset(U);
312     // Verify offset using the real max displacement without the safety
313     // adjustment.
314     if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, U.getMaxDisp()+2, U.NegOk,
315                          /* DoDump = */ true)) {
316       LLVM_DEBUG(dbgs() << "OK\n");
317       continue;
318     }
319     LLVM_DEBUG(dbgs() << "Out of range.\n");
320     dumpBBs();
321     LLVM_DEBUG(MF->dump());
322     llvm_unreachable("Constant pool entry out of range!");
323   }
324 #endif
325 }
326 
327 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
328 /// print block size and offset information - debugging
329 LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() {
330   LLVM_DEBUG({
331     BBInfoVector &BBInfo = BBUtils->getBBInfo();
332     for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
333       const BasicBlockInfo &BBI = BBInfo[J];
334       dbgs() << format("%08x %bb.%u\t", BBI.Offset, J)
335              << " kb=" << unsigned(BBI.KnownBits)
336              << " ua=" << unsigned(BBI.Unalign) << " pa=" << Log2(BBI.PostAlign)
337              << format(" size=%#x\n", BBInfo[J].Size);
338     }
339   });
340 }
341 #endif
342 
343 // Align blocks where the previous block does not fall through. This may add
344 // extra NOP's but they will not be executed. It uses the PrefLoopAlignment as a
345 // measure of how much to align, and only runs at CodeGenOptLevel::Aggressive.
346 static bool AlignBlocks(MachineFunction *MF, const ARMSubtarget *STI) {
347   if (MF->getTarget().getOptLevel() != CodeGenOptLevel::Aggressive ||
348       MF->getFunction().hasOptSize())
349     return false;
350 
351   auto *TLI = STI->getTargetLowering();
352   const Align Alignment = TLI->getPrefLoopAlignment();
353   if (Alignment < 4)
354     return false;
355 
356   bool Changed = false;
357   bool PrevCanFallthough = true;
358   for (auto &MBB : *MF) {
359     if (!PrevCanFallthough) {
360       Changed = true;
361       MBB.setAlignment(Alignment);
362     }
363 
364     PrevCanFallthough = MBB.canFallThrough();
365 
366     // For LOB's, the ARMLowOverheadLoops pass may remove the unconditional
367     // branch later in the pipeline.
368     if (STI->hasLOB()) {
369       for (const auto &MI : reverse(MBB.terminators())) {
370         if (MI.getOpcode() == ARM::t2B &&
371             MI.getOperand(0).getMBB() == MBB.getNextNode())
372           continue;
373         if (isLoopStart(MI) || MI.getOpcode() == ARM::t2LoopEnd ||
374             MI.getOpcode() == ARM::t2LoopEndDec) {
375           PrevCanFallthough = true;
376           break;
377         }
378         // Any other terminator - nothing to do
379         break;
380       }
381     }
382   }
383 
384   return Changed;
385 }
386 
387 bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
388   MF = &mf;
389   MCP = mf.getConstantPool();
390   BBUtils = std::make_unique<ARMBasicBlockUtils>(mf);
391 
392   LLVM_DEBUG(dbgs() << "***** ARMConstantIslands: "
393                     << MCP->getConstants().size() << " CP entries, aligned to "
394                     << MCP->getConstantPoolAlign().value() << " bytes *****\n");
395 
396   STI = &MF->getSubtarget<ARMSubtarget>();
397   TII = STI->getInstrInfo();
398   isPositionIndependentOrROPI =
399       STI->getTargetLowering()->isPositionIndependent() || STI->isROPI();
400   AFI = MF->getInfo<ARMFunctionInfo>();
401   DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
402 
403   isThumb = AFI->isThumbFunction();
404   isThumb1 = AFI->isThumb1OnlyFunction();
405   isThumb2 = AFI->isThumb2Function();
406 
407   bool GenerateTBB = isThumb2 || (isThumb1 && SynthesizeThumb1TBB);
408   // TBB generation code in this constant island pass has not been adapted to
409   // deal with speculation barriers.
410   if (STI->hardenSlsRetBr())
411     GenerateTBB = false;
412 
413   // Renumber all of the machine basic blocks in the function, guaranteeing that
414   // the numbers agree with the position of the block in the function.
415   MF->RenumberBlocks();
416   DT->updateBlockNumbers();
417 
418   // Try to reorder and otherwise adjust the block layout to make good use
419   // of the TB[BH] instructions.
420   bool MadeChange = false;
421   if (GenerateTBB && AdjustJumpTableBlocks) {
422     scanFunctionJumpTables();
423     MadeChange |= reorderThumb2JumpTables();
424     // Data is out of date, so clear it. It'll be re-computed later.
425     T2JumpTables.clear();
426     // Blocks may have shifted around. Keep the numbering up to date.
427     MF->RenumberBlocks();
428     DT->updateBlockNumbers();
429   }
430 
431   // Align any non-fallthrough blocks
432   MadeChange |= AlignBlocks(MF, STI);
433 
434   // Perform the initial placement of the constant pool entries.  To start with,
435   // we put them all at the end of the function.
436   std::vector<MachineInstr*> CPEMIs;
437   if (!MCP->isEmpty())
438     doInitialConstPlacement(CPEMIs);
439 
440   if (MF->getJumpTableInfo())
441     doInitialJumpTablePlacement(CPEMIs);
442 
443   /// The next UID to take is the first unused one.
444   AFI->initPICLabelUId(CPEMIs.size());
445 
446   // Do the initial scan of the function, building up information about the
447   // sizes of each block, the location of all the water, and finding all of the
448   // constant pool users.
449   initializeFunctionInfo(CPEMIs);
450   CPEMIs.clear();
451   LLVM_DEBUG(dumpBBs());
452 
453   // Functions with jump tables need an alignment of 4 because they use the ADR
454   // instruction, which aligns the PC to 4 bytes before adding an offset.
455   if (!T2JumpTables.empty())
456     MF->ensureAlignment(Align(4));
457 
458   /// Remove dead constant pool entries.
459   MadeChange |= removeUnusedCPEntries();
460 
461   // Iteratively place constant pool entries and fix up branches until there
462   // is no change.
463   unsigned NoCPIters = 0, NoBRIters = 0;
464   while (true) {
465     LLVM_DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n');
466     bool CPChange = false;
467     for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
468       // For most inputs, it converges in no more than 5 iterations.
469       // If it doesn't end in 10, the input may have huge BB or many CPEs.
470       // In this case, we will try different heuristics.
471       CPChange |= handleConstantPoolUser(i, NoCPIters >= CPMaxIteration / 2);
472     if (CPChange && ++NoCPIters > CPMaxIteration)
473       report_fatal_error("Constant Island pass failed to converge!");
474     LLVM_DEBUG(dumpBBs());
475 
476     // Clear NewWaterList now.  If we split a block for branches, it should
477     // appear as "new water" for the next iteration of constant pool placement.
478     NewWaterList.clear();
479 
480     LLVM_DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n');
481     bool BRChange = false;
482     for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
483       BRChange |= fixupImmediateBr(ImmBranches[i]);
484     if (BRChange && ++NoBRIters > 30)
485       report_fatal_error("Branch Fix Up pass failed to converge!");
486     LLVM_DEBUG(dumpBBs());
487 
488     if (!CPChange && !BRChange)
489       break;
490     MadeChange = true;
491   }
492 
493   // Shrink 32-bit Thumb2 load and store instructions.
494   if (isThumb2 && !STI->prefers32BitThumb())
495     MadeChange |= optimizeThumb2Instructions();
496 
497   // Shrink 32-bit branch instructions.
498   if (isThumb && STI->hasV8MBaselineOps())
499     MadeChange |= optimizeThumb2Branches();
500 
501   // Optimize jump tables using TBB / TBH.
502   if (GenerateTBB && !STI->genExecuteOnly())
503     MadeChange |= optimizeThumb2JumpTables();
504 
505   // After a while, this might be made debug-only, but it is not expensive.
506   verify();
507 
508   // Save the mapping between original and cloned constpool entries.
509   for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
510     for (unsigned j = 0, je = CPEntries[i].size(); j != je; ++j) {
511       const CPEntry & CPE = CPEntries[i][j];
512       if (CPE.CPEMI && CPE.CPEMI->getOperand(1).isCPI())
513         AFI->recordCPEClone(i, CPE.CPI);
514     }
515   }
516 
517   LLVM_DEBUG(dbgs() << '\n'; dumpBBs());
518 
519   BBUtils->clear();
520   WaterList.clear();
521   CPUsers.clear();
522   CPEntries.clear();
523   JumpTableEntryIndices.clear();
524   JumpTableUserIndices.clear();
525   BlockJumpTableRefCount.clear();
526   ImmBranches.clear();
527   PushPopMIs.clear();
528   T2JumpTables.clear();
529 
530   return MadeChange;
531 }
532 
533 /// Perform the initial placement of the regular constant pool entries.
534 /// To start with, we put them all at the end of the function.
535 void
536 ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs) {
537   // Create the basic block to hold the CPE's.
538   MachineBasicBlock *BB = MF->CreateMachineBasicBlock();
539   MF->push_back(BB);
540 
541   // MachineConstantPool measures alignment in bytes.
542   const Align MaxAlign = MCP->getConstantPoolAlign();
543   const unsigned MaxLogAlign = Log2(MaxAlign);
544 
545   // Mark the basic block as required by the const-pool.
546   BB->setAlignment(MaxAlign);
547 
548   // The function needs to be as aligned as the basic blocks. The linker may
549   // move functions around based on their alignment.
550   // Special case: halfword literals still need word alignment on the function.
551   Align FuncAlign = MaxAlign;
552   if (MaxAlign == 2)
553     FuncAlign = Align(4);
554   MF->ensureAlignment(FuncAlign);
555 
556   // Order the entries in BB by descending alignment.  That ensures correct
557   // alignment of all entries as long as BB is sufficiently aligned.  Keep
558   // track of the insertion point for each alignment.  We are going to bucket
559   // sort the entries as they are created.
560   SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxLogAlign + 1,
561                                                        BB->end());
562 
563   // Add all of the constants from the constant pool to the end block, use an
564   // identity mapping of CPI's to CPE's.
565   const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
566 
567   const DataLayout &TD = MF->getDataLayout();
568   for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
569     unsigned Size = CPs[i].getSizeInBytes(TD);
570     Align Alignment = CPs[i].getAlign();
571     // Verify that all constant pool entries are a multiple of their alignment.
572     // If not, we would have to pad them out so that instructions stay aligned.
573     assert(isAligned(Alignment, Size) && "CP Entry not multiple of 4 bytes!");
574 
575     // Insert CONSTPOOL_ENTRY before entries with a smaller alignment.
576     unsigned LogAlign = Log2(Alignment);
577     MachineBasicBlock::iterator InsAt = InsPoint[LogAlign];
578     MachineInstr *CPEMI =
579       BuildMI(*BB, InsAt, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
580         .addImm(i).addConstantPoolIndex(i).addImm(Size);
581     CPEMIs.push_back(CPEMI);
582 
583     // Ensure that future entries with higher alignment get inserted before
584     // CPEMI. This is bucket sort with iterators.
585     for (unsigned a = LogAlign + 1; a <= MaxLogAlign; ++a)
586       if (InsPoint[a] == InsAt)
587         InsPoint[a] = CPEMI;
588 
589     // Add a new CPEntry, but no corresponding CPUser yet.
590     CPEntries.emplace_back(1, CPEntry(CPEMI, i));
591     ++NumCPEs;
592     LLVM_DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = "
593                       << Size << ", align = " << Alignment.value() << '\n');
594   }
595   LLVM_DEBUG(BB->dump());
596 }
597 
598 /// Do initial placement of the jump tables. Because Thumb2's TBB and TBH
599 /// instructions can be made more efficient if the jump table immediately
600 /// follows the instruction, it's best to place them immediately next to their
601 /// jumps to begin with. In almost all cases they'll never be moved from that
602 /// position.
603 void ARMConstantIslands::doInitialJumpTablePlacement(
604     std::vector<MachineInstr *> &CPEMIs) {
605   unsigned i = CPEntries.size();
606   auto MJTI = MF->getJumpTableInfo();
607   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
608 
609   // Only inline jump tables are placed in the function.
610   if (MJTI->getEntryKind() != MachineJumpTableInfo::EK_Inline)
611     return;
612 
613   MachineBasicBlock *LastCorrectlyNumberedBB = nullptr;
614   for (MachineBasicBlock &MBB : *MF) {
615     auto MI = MBB.getLastNonDebugInstr();
616     // Look past potential SpeculationBarriers at end of BB.
617     while (MI != MBB.end() &&
618            (isSpeculationBarrierEndBBOpcode(MI->getOpcode()) ||
619             MI->isDebugInstr()))
620       --MI;
621 
622     if (MI == MBB.end())
623       continue;
624 
625     unsigned JTOpcode;
626     switch (MI->getOpcode()) {
627     default:
628       continue;
629     case ARM::BR_JTadd:
630     case ARM::BR_JTr:
631     case ARM::tBR_JTr:
632     case ARM::BR_JTm_i12:
633     case ARM::BR_JTm_rs:
634       // These instructions are emitted only in ARM or Thumb1 modes which do not
635       // support PACBTI. Hence we don't add BTI instructions in the destination
636       // blocks.
637       assert(!MF->getInfo<ARMFunctionInfo>()->branchTargetEnforcement() &&
638              "Branch protection must not be enabled for Arm or Thumb1 modes");
639       JTOpcode = ARM::JUMPTABLE_ADDRS;
640       break;
641     case ARM::t2BR_JT:
642       JTOpcode = ARM::JUMPTABLE_INSTS;
643       break;
644     case ARM::tTBB_JT:
645     case ARM::t2TBB_JT:
646       JTOpcode = ARM::JUMPTABLE_TBB;
647       break;
648     case ARM::tTBH_JT:
649     case ARM::t2TBH_JT:
650       JTOpcode = ARM::JUMPTABLE_TBH;
651       break;
652     }
653 
654     unsigned NumOps = MI->getDesc().getNumOperands();
655     MachineOperand JTOp =
656       MI->getOperand(NumOps - (MI->isPredicable() ? 2 : 1));
657     unsigned JTI = JTOp.getIndex();
658     unsigned Size = JT[JTI].MBBs.size() * sizeof(uint32_t);
659     MachineBasicBlock *JumpTableBB = MF->CreateMachineBasicBlock();
660     MF->insert(std::next(MachineFunction::iterator(MBB)), JumpTableBB);
661     MachineInstr *CPEMI = BuildMI(*JumpTableBB, JumpTableBB->begin(),
662                                   DebugLoc(), TII->get(JTOpcode))
663                               .addImm(i++)
664                               .addJumpTableIndex(JTI)
665                               .addImm(Size);
666     CPEMIs.push_back(CPEMI);
667     CPEntries.emplace_back(1, CPEntry(CPEMI, JTI));
668     JumpTableEntryIndices.insert(std::make_pair(JTI, CPEntries.size() - 1));
669     if (!LastCorrectlyNumberedBB)
670       LastCorrectlyNumberedBB = &MBB;
671   }
672 
673   // If we did anything then we need to renumber the subsequent blocks.
674   if (LastCorrectlyNumberedBB) {
675     MF->RenumberBlocks(LastCorrectlyNumberedBB);
676     DT->updateBlockNumbers();
677   }
678 }
679 
680 /// BBHasFallthrough - Return true if the specified basic block can fallthrough
681 /// into the block immediately after it.
682 bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) {
683   // Get the next machine basic block in the function.
684   MachineFunction::iterator MBBI = MBB->getIterator();
685   // Can't fall off end of function.
686   if (std::next(MBBI) == MBB->getParent()->end())
687     return false;
688 
689   MachineBasicBlock *NextBB = &*std::next(MBBI);
690   if (!MBB->isSuccessor(NextBB))
691     return false;
692 
693   // Try to analyze the end of the block. A potential fallthrough may already
694   // have an unconditional branch for whatever reason.
695   MachineBasicBlock *TBB, *FBB;
696   SmallVector<MachineOperand, 4> Cond;
697   bool TooDifficult = TII->analyzeBranch(*MBB, TBB, FBB, Cond);
698   return TooDifficult || FBB == nullptr;
699 }
700 
701 /// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI,
702 /// look up the corresponding CPEntry.
703 ARMConstantIslands::CPEntry *
704 ARMConstantIslands::findConstPoolEntry(unsigned CPI,
705                                        const MachineInstr *CPEMI) {
706   std::vector<CPEntry> &CPEs = CPEntries[CPI];
707   // Number of entries per constpool index should be small, just do a
708   // linear search.
709   for (CPEntry &CPE : CPEs)
710     if (CPE.CPEMI == CPEMI)
711       return &CPE;
712   return nullptr;
713 }
714 
715 /// getCPEAlign - Returns the required alignment of the constant pool entry
716 /// represented by CPEMI.
717 Align ARMConstantIslands::getCPEAlign(const MachineInstr *CPEMI) {
718   switch (CPEMI->getOpcode()) {
719   case ARM::CONSTPOOL_ENTRY:
720     break;
721   case ARM::JUMPTABLE_TBB:
722     return isThumb1 ? Align(4) : Align(1);
723   case ARM::JUMPTABLE_TBH:
724     return isThumb1 ? Align(4) : Align(2);
725   case ARM::JUMPTABLE_INSTS:
726     return Align(2);
727   case ARM::JUMPTABLE_ADDRS:
728     return Align(4);
729   default:
730     llvm_unreachable("unknown constpool entry kind");
731   }
732 
733   unsigned CPI = getCombinedIndex(CPEMI);
734   assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
735   return MCP->getConstants()[CPI].getAlign();
736 }
737 
738 // Exception landing pads, blocks that has their adress taken, and function
739 // entry blocks will always be (potential) indirect jump targets, regardless of
740 // whether they are referenced by or not by jump tables.
741 static bool isAlwaysIndirectTarget(const MachineBasicBlock &MBB) {
742   return MBB.isEHPad() || MBB.hasAddressTaken() ||
743          &MBB == &MBB.getParent()->front();
744 }
745 
746 /// scanFunctionJumpTables - Do a scan of the function, building up
747 /// information about the sizes of each block and the locations of all
748 /// the jump tables.
749 void ARMConstantIslands::scanFunctionJumpTables() {
750   for (MachineBasicBlock &MBB : *MF) {
751     for (MachineInstr &I : MBB)
752       if (I.isBranch() &&
753           (I.getOpcode() == ARM::t2BR_JT || I.getOpcode() == ARM::tBR_JTr))
754         T2JumpTables.push_back(&I);
755   }
756 
757   if (!MF->getInfo<ARMFunctionInfo>()->branchTargetEnforcement())
758     return;
759 
760   if (const MachineJumpTableInfo *JTI = MF->getJumpTableInfo())
761     for (const MachineJumpTableEntry &JTE : JTI->getJumpTables())
762       for (const MachineBasicBlock *MBB : JTE.MBBs) {
763         if (isAlwaysIndirectTarget(*MBB))
764           // Set the reference count essentially to infinity, it will never
765           // reach zero and the BTI Instruction will never be removed.
766           BlockJumpTableRefCount[MBB] = std::numeric_limits<int>::max();
767         else
768           ++BlockJumpTableRefCount[MBB];
769       }
770 }
771 
772 /// initializeFunctionInfo - Do the initial scan of the function, building up
773 /// information about the sizes of each block, the location of all the water,
774 /// and finding all of the constant pool users.
775 void ARMConstantIslands::
776 initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
777 
778   BBUtils->computeAllBlockSizes();
779   BBInfoVector &BBInfo = BBUtils->getBBInfo();
780   // The known bits of the entry block offset are determined by the function
781   // alignment.
782   BBInfo.front().KnownBits = Log2(MF->getAlignment());
783 
784   // Compute block offsets and known bits.
785   BBUtils->adjustBBOffsetsAfter(&MF->front());
786 
787   // We only care about jump table instructions when jump tables are inline.
788   MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
789   bool InlineJumpTables =
790       MJTI && MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline;
791 
792   // Now go back through the instructions and build up our data structures.
793   for (MachineBasicBlock &MBB : *MF) {
794     // If this block doesn't fall through into the next MBB, then this is
795     // 'water' that a constant pool island could be placed.
796     if (!BBHasFallthrough(&MBB))
797       WaterList.push_back(&MBB);
798 
799     for (MachineInstr &I : MBB) {
800       if (I.isDebugInstr())
801         continue;
802 
803       unsigned Opc = I.getOpcode();
804       if (I.isBranch()) {
805         bool isCond = false;
806         unsigned Bits = 0;
807         unsigned Scale = 1;
808         int UOpc = Opc;
809         switch (Opc) {
810         default:
811           continue;  // Ignore other JT branches
812         case ARM::t2BR_JT:
813         case ARM::tBR_JTr:
814           if (InlineJumpTables)
815             T2JumpTables.push_back(&I);
816           continue;   // Does not get an entry in ImmBranches
817         case ARM::Bcc:
818           isCond = true;
819           UOpc = ARM::B;
820           [[fallthrough]];
821         case ARM::B:
822           Bits = 24;
823           Scale = 4;
824           break;
825         case ARM::tBcc:
826           isCond = true;
827           UOpc = ARM::tB;
828           Bits = 8;
829           Scale = 2;
830           break;
831         case ARM::tB:
832           Bits = 11;
833           Scale = 2;
834           break;
835         case ARM::t2Bcc:
836           isCond = true;
837           UOpc = ARM::t2B;
838           Bits = 20;
839           Scale = 2;
840           break;
841         case ARM::t2B:
842           Bits = 24;
843           Scale = 2;
844           break;
845         }
846 
847         // Record this immediate branch.
848         unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
849         ImmBranches.push_back(ImmBranch(&I, MaxOffs, isCond, UOpc));
850       }
851 
852       if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET)
853         PushPopMIs.push_back(&I);
854 
855       if (Opc == ARM::CONSTPOOL_ENTRY || Opc == ARM::JUMPTABLE_ADDRS ||
856           Opc == ARM::JUMPTABLE_INSTS || Opc == ARM::JUMPTABLE_TBB ||
857           Opc == ARM::JUMPTABLE_TBH)
858         continue;
859 
860       // Scan the instructions for constant pool operands.
861       for (unsigned op = 0, e = I.getNumOperands(); op != e; ++op)
862         if (I.getOperand(op).isCPI() ||
863             (I.getOperand(op).isJTI() && InlineJumpTables)) {
864           // We found one.  The addressing mode tells us the max displacement
865           // from the PC that this instruction permits.
866 
867           // Basic size info comes from the TSFlags field.
868           unsigned Bits = 0;
869           unsigned Scale = 1;
870           bool NegOk = false;
871           bool IsSoImm = false;
872 
873           switch (Opc) {
874           default:
875             llvm_unreachable("Unknown addressing mode for CP reference!");
876 
877           // Taking the address of a CP entry.
878           case ARM::LEApcrel:
879           case ARM::LEApcrelJT: {
880               // This takes a SoImm, which is 8 bit immediate rotated. We'll
881               // pretend the maximum offset is 255 * 4. Since each instruction
882               // 4 byte wide, this is always correct. We'll check for other
883               // displacements that fits in a SoImm as well.
884               Bits = 8;
885               NegOk = true;
886               IsSoImm = true;
887               unsigned CPI = I.getOperand(op).getIndex();
888               assert(CPI < CPEMIs.size());
889               MachineInstr *CPEMI = CPEMIs[CPI];
890               const Align CPEAlign = getCPEAlign(CPEMI);
891               const unsigned LogCPEAlign = Log2(CPEAlign);
892               if (LogCPEAlign >= 2)
893                 Scale = 4;
894               else
895                 // For constants with less than 4-byte alignment,
896                 // we'll pretend the maximum offset is 255 * 1.
897                 Scale = 1;
898             }
899             break;
900           case ARM::t2LEApcrel:
901           case ARM::t2LEApcrelJT:
902             Bits = 12;
903             NegOk = true;
904             break;
905           case ARM::tLEApcrel:
906           case ARM::tLEApcrelJT:
907             Bits = 8;
908             Scale = 4;
909             break;
910 
911           case ARM::LDRBi12:
912           case ARM::LDRi12:
913           case ARM::LDRcp:
914           case ARM::t2LDRpci:
915           case ARM::t2LDRHpci:
916           case ARM::t2LDRSHpci:
917           case ARM::t2LDRBpci:
918           case ARM::t2LDRSBpci:
919             Bits = 12;  // +-offset_12
920             NegOk = true;
921             break;
922 
923           case ARM::tLDRpci:
924             Bits = 8;
925             Scale = 4;  // +(offset_8*4)
926             break;
927 
928           case ARM::VLDRD:
929           case ARM::VLDRS:
930             Bits = 8;
931             Scale = 4;  // +-(offset_8*4)
932             NegOk = true;
933             break;
934           case ARM::VLDRH:
935             Bits = 8;
936             Scale = 2;  // +-(offset_8*2)
937             NegOk = true;
938             break;
939           }
940 
941           // Remember that this is a user of a CP entry.
942           unsigned CPI = I.getOperand(op).getIndex();
943           if (I.getOperand(op).isJTI()) {
944             JumpTableUserIndices.insert(std::make_pair(CPI, CPUsers.size()));
945             CPI = JumpTableEntryIndices[CPI];
946           }
947 
948           MachineInstr *CPEMI = CPEMIs[CPI];
949           unsigned MaxOffs = ((1 << Bits)-1) * Scale;
950           CPUsers.push_back(CPUser(&I, CPEMI, MaxOffs, NegOk, IsSoImm));
951 
952           // Increment corresponding CPEntry reference count.
953           CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
954           assert(CPE && "Cannot find a corresponding CPEntry!");
955           CPE->RefCount++;
956 
957           // Instructions can only use one CP entry, don't bother scanning the
958           // rest of the operands.
959           break;
960         }
961     }
962   }
963 }
964 
965 /// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB
966 /// ID.
967 static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
968                               const MachineBasicBlock *RHS) {
969   return LHS->getNumber() < RHS->getNumber();
970 }
971 
972 /// updateForInsertedWaterBlock - When a block is newly inserted into the
973 /// machine function, it upsets all of the block numbers.  Renumber the blocks
974 /// and update the arrays that parallel this numbering.
975 void ARMConstantIslands::updateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
976   // Renumber the MBB's to keep them consecutive.
977   NewBB->getParent()->RenumberBlocks(NewBB);
978   DT->updateBlockNumbers();
979 
980   // Insert an entry into BBInfo to align it properly with the (newly
981   // renumbered) block numbers.
982   BBUtils->insert(NewBB->getNumber(), BasicBlockInfo());
983 
984   // Next, update WaterList.  Specifically, we need to add NewMBB as having
985   // available water after it.
986   water_iterator IP = llvm::lower_bound(WaterList, NewBB, CompareMBBNumbers);
987   WaterList.insert(IP, NewBB);
988 }
989 
990 /// Split the basic block containing MI into two blocks, which are joined by
991 /// an unconditional branch.  Update data structures and renumber blocks to
992 /// account for this change and returns the newly created block.
993 MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
994   MachineBasicBlock *OrigBB = MI->getParent();
995 
996   // Collect liveness information at MI.
997   LivePhysRegs LRs(*MF->getSubtarget().getRegisterInfo());
998   LRs.addLiveOuts(*OrigBB);
999   auto LivenessEnd = ++MachineBasicBlock::iterator(MI).getReverse();
1000   for (MachineInstr &LiveMI : make_range(OrigBB->rbegin(), LivenessEnd))
1001     LRs.stepBackward(LiveMI);
1002 
1003   // Create a new MBB for the code after the OrigBB.
1004   MachineBasicBlock *NewBB =
1005     MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
1006   MachineFunction::iterator MBBI = ++OrigBB->getIterator();
1007   MF->insert(MBBI, NewBB);
1008 
1009   // Splice the instructions starting with MI over to NewBB.
1010   NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
1011 
1012   // Add an unconditional branch from OrigBB to NewBB.
1013   // Note the new unconditional branch is not being recorded.
1014   // There doesn't seem to be meaningful DebugInfo available; this doesn't
1015   // correspond to anything in the source.
1016   unsigned Opc = isThumb ? (isThumb2 ? ARM::t2B : ARM::tB) : ARM::B;
1017   if (!isThumb)
1018     BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB);
1019   else
1020     BuildMI(OrigBB, DebugLoc(), TII->get(Opc))
1021         .addMBB(NewBB)
1022         .add(predOps(ARMCC::AL));
1023   ++NumSplit;
1024 
1025   // Update the CFG.  All succs of OrigBB are now succs of NewBB.
1026   NewBB->transferSuccessors(OrigBB);
1027 
1028   // OrigBB branches to NewBB.
1029   OrigBB->addSuccessor(NewBB);
1030 
1031   // Update live-in information in the new block.
1032   MachineRegisterInfo &MRI = MF->getRegInfo();
1033   for (MCPhysReg L : LRs)
1034     if (!MRI.isReserved(L))
1035       NewBB->addLiveIn(L);
1036 
1037   // Update internal data structures to account for the newly inserted MBB.
1038   // This is almost the same as updateForInsertedWaterBlock, except that
1039   // the Water goes after OrigBB, not NewBB.
1040   MF->RenumberBlocks(NewBB);
1041   DT->updateBlockNumbers();
1042 
1043   // Insert an entry into BBInfo to align it properly with the (newly
1044   // renumbered) block numbers.
1045   BBUtils->insert(NewBB->getNumber(), BasicBlockInfo());
1046 
1047   // Next, update WaterList.  Specifically, we need to add OrigMBB as having
1048   // available water after it (but not if it's already there, which happens
1049   // when splitting before a conditional branch that is followed by an
1050   // unconditional branch - in that case we want to insert NewBB).
1051   water_iterator IP = llvm::lower_bound(WaterList, OrigBB, CompareMBBNumbers);
1052   MachineBasicBlock* WaterBB = *IP;
1053   if (WaterBB == OrigBB)
1054     WaterList.insert(std::next(IP), NewBB);
1055   else
1056     WaterList.insert(IP, OrigBB);
1057   NewWaterList.insert(OrigBB);
1058 
1059   // Figure out how large the OrigBB is.  As the first half of the original
1060   // block, it cannot contain a tablejump.  The size includes
1061   // the new jump we added.  (It should be possible to do this without
1062   // recounting everything, but it's very confusing, and this is rarely
1063   // executed.)
1064   BBUtils->computeBlockSize(OrigBB);
1065 
1066   // Figure out how large the NewMBB is.  As the second half of the original
1067   // block, it may contain a tablejump.
1068   BBUtils->computeBlockSize(NewBB);
1069 
1070   // All BBOffsets following these blocks must be modified.
1071   BBUtils->adjustBBOffsetsAfter(OrigBB);
1072 
1073   return NewBB;
1074 }
1075 
1076 /// getUserOffset - Compute the offset of U.MI as seen by the hardware
1077 /// displacement computation.  Update U.KnownAlignment to match its current
1078 /// basic block location.
1079 unsigned ARMConstantIslands::getUserOffset(CPUser &U) const {
1080   unsigned UserOffset = BBUtils->getOffsetOf(U.MI);
1081 
1082   SmallVectorImpl<BasicBlockInfo> &BBInfo = BBUtils->getBBInfo();
1083   const BasicBlockInfo &BBI = BBInfo[U.MI->getParent()->getNumber()];
1084   unsigned KnownBits = BBI.internalKnownBits();
1085 
1086   // The value read from PC is offset from the actual instruction address.
1087   UserOffset += (isThumb ? 4 : 8);
1088 
1089   // Because of inline assembly, we may not know the alignment (mod 4) of U.MI.
1090   // Make sure U.getMaxDisp() returns a constrained range.
1091   U.KnownAlignment = (KnownBits >= 2);
1092 
1093   // On Thumb, offsets==2 mod 4 are rounded down by the hardware for
1094   // purposes of the displacement computation; compensate for that here.
1095   // For unknown alignments, getMaxDisp() constrains the range instead.
1096   if (isThumb && U.KnownAlignment)
1097     UserOffset &= ~3u;
1098 
1099   return UserOffset;
1100 }
1101 
1102 /// isOffsetInRange - Checks whether UserOffset (the location of a constant pool
1103 /// reference) is within MaxDisp of TrialOffset (a proposed location of a
1104 /// constant pool entry).
1105 /// UserOffset is computed by getUserOffset above to include PC adjustments. If
1106 /// the mod 4 alignment of UserOffset is not known, the uncertainty must be
1107 /// subtracted from MaxDisp instead. CPUser::getMaxDisp() does that.
1108 bool ARMConstantIslands::isOffsetInRange(unsigned UserOffset,
1109                                          unsigned TrialOffset, unsigned MaxDisp,
1110                                          bool NegativeOK, bool IsSoImm) {
1111   if (UserOffset <= TrialOffset) {
1112     // User before the Trial.
1113     if (TrialOffset - UserOffset <= MaxDisp)
1114       return true;
1115     // FIXME: Make use full range of soimm values.
1116   } else if (NegativeOK) {
1117     if (UserOffset - TrialOffset <= MaxDisp)
1118       return true;
1119     // FIXME: Make use full range of soimm values.
1120   }
1121   return false;
1122 }
1123 
1124 /// isWaterInRange - Returns true if a CPE placed after the specified
1125 /// Water (a basic block) will be in range for the specific MI.
1126 ///
1127 /// Compute how much the function will grow by inserting a CPE after Water.
1128 bool ARMConstantIslands::isWaterInRange(unsigned UserOffset,
1129                                         MachineBasicBlock* Water, CPUser &U,
1130                                         unsigned &Growth) {
1131   BBInfoVector &BBInfo = BBUtils->getBBInfo();
1132   const Align CPEAlign = getCPEAlign(U.CPEMI);
1133   const unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPEAlign);
1134   unsigned NextBlockOffset;
1135   Align NextBlockAlignment;
1136   MachineFunction::const_iterator NextBlock = Water->getIterator();
1137   if (++NextBlock == MF->end()) {
1138     NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
1139   } else {
1140     NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset;
1141     NextBlockAlignment = NextBlock->getAlignment();
1142   }
1143   unsigned Size = U.CPEMI->getOperand(2).getImm();
1144   unsigned CPEEnd = CPEOffset + Size;
1145 
1146   // The CPE may be able to hide in the alignment padding before the next
1147   // block. It may also cause more padding to be required if it is more aligned
1148   // that the next block.
1149   if (CPEEnd > NextBlockOffset) {
1150     Growth = CPEEnd - NextBlockOffset;
1151     // Compute the padding that would go at the end of the CPE to align the next
1152     // block.
1153     Growth += offsetToAlignment(CPEEnd, NextBlockAlignment);
1154 
1155     // If the CPE is to be inserted before the instruction, that will raise
1156     // the offset of the instruction. Also account for unknown alignment padding
1157     // in blocks between CPE and the user.
1158     if (CPEOffset < UserOffset)
1159       UserOffset += Growth + UnknownPadding(MF->getAlignment(), Log2(CPEAlign));
1160   } else
1161     // CPE fits in existing padding.
1162     Growth = 0;
1163 
1164   return isOffsetInRange(UserOffset, CPEOffset, U);
1165 }
1166 
1167 /// isCPEntryInRange - Returns true if the distance between specific MI and
1168 /// specific ConstPool entry instruction can fit in MI's displacement field.
1169 bool ARMConstantIslands::isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
1170                                       MachineInstr *CPEMI, unsigned MaxDisp,
1171                                       bool NegOk, bool DoDump) {
1172   unsigned CPEOffset = BBUtils->getOffsetOf(CPEMI);
1173 
1174   if (DoDump) {
1175     LLVM_DEBUG({
1176         BBInfoVector &BBInfo = BBUtils->getBBInfo();
1177       unsigned Block = MI->getParent()->getNumber();
1178       const BasicBlockInfo &BBI = BBInfo[Block];
1179       dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
1180              << " max delta=" << MaxDisp
1181              << format(" insn address=%#x", UserOffset) << " in "
1182              << printMBBReference(*MI->getParent()) << ": "
1183              << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI
1184              << format("CPE address=%#x offset=%+d: ", CPEOffset,
1185                        int(CPEOffset - UserOffset));
1186     });
1187   }
1188 
1189   return isOffsetInRange(UserOffset, CPEOffset, MaxDisp, NegOk);
1190 }
1191 
1192 #ifndef NDEBUG
1193 /// BBIsJumpedOver - Return true of the specified basic block's only predecessor
1194 /// unconditionally branches to its only successor.
1195 static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
1196   if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
1197     return false;
1198 
1199   MachineBasicBlock *Succ = *MBB->succ_begin();
1200   MachineBasicBlock *Pred = *MBB->pred_begin();
1201   MachineInstr *PredMI = &Pred->back();
1202   if (PredMI->getOpcode() == ARM::B || PredMI->getOpcode() == ARM::tB
1203       || PredMI->getOpcode() == ARM::t2B)
1204     return PredMI->getOperand(0).getMBB() == Succ;
1205   return false;
1206 }
1207 #endif // NDEBUG
1208 
1209 /// decrementCPEReferenceCount - find the constant pool entry with index CPI
1210 /// and instruction CPEMI, and decrement its refcount.  If the refcount
1211 /// becomes 0 remove the entry and instruction.  Returns true if we removed
1212 /// the entry, false if we didn't.
1213 bool ARMConstantIslands::decrementCPEReferenceCount(unsigned CPI,
1214                                                     MachineInstr *CPEMI) {
1215   // Find the old entry. Eliminate it if it is no longer used.
1216   CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
1217   assert(CPE && "Unexpected!");
1218   if (--CPE->RefCount == 0) {
1219     removeDeadCPEMI(CPEMI);
1220     CPE->CPEMI = nullptr;
1221     --NumCPEs;
1222     return true;
1223   }
1224   return false;
1225 }
1226 
1227 unsigned ARMConstantIslands::getCombinedIndex(const MachineInstr *CPEMI) {
1228   if (CPEMI->getOperand(1).isCPI())
1229     return CPEMI->getOperand(1).getIndex();
1230 
1231   return JumpTableEntryIndices[CPEMI->getOperand(1).getIndex()];
1232 }
1233 
1234 /// LookForCPEntryInRange - see if the currently referenced CPE is in range;
1235 /// if not, see if an in-range clone of the CPE is in range, and if so,
1236 /// change the data structures so the user references the clone.  Returns:
1237 /// 0 = no existing entry found
1238 /// 1 = entry found, and there were no code insertions or deletions
1239 /// 2 = entry found, and there were code insertions or deletions
1240 int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset) {
1241   MachineInstr *UserMI = U.MI;
1242   MachineInstr *CPEMI  = U.CPEMI;
1243 
1244   // Check to see if the CPE is already in-range.
1245   if (isCPEntryInRange(UserMI, UserOffset, CPEMI, U.getMaxDisp(), U.NegOk,
1246                        true)) {
1247     LLVM_DEBUG(dbgs() << "In range\n");
1248     return 1;
1249   }
1250 
1251   // No.  Look for previously created clones of the CPE that are in range.
1252   unsigned CPI = getCombinedIndex(CPEMI);
1253   std::vector<CPEntry> &CPEs = CPEntries[CPI];
1254   for (CPEntry &CPE : CPEs) {
1255     // We already tried this one
1256     if (CPE.CPEMI == CPEMI)
1257       continue;
1258     // Removing CPEs can leave empty entries, skip
1259     if (CPE.CPEMI == nullptr)
1260       continue;
1261     if (isCPEntryInRange(UserMI, UserOffset, CPE.CPEMI, U.getMaxDisp(),
1262                          U.NegOk)) {
1263       LLVM_DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#" << CPE.CPI
1264                         << "\n");
1265       // Point the CPUser node to the replacement
1266       U.CPEMI = CPE.CPEMI;
1267       // Change the CPI in the instruction operand to refer to the clone.
1268       for (MachineOperand &MO : UserMI->operands())
1269         if (MO.isCPI()) {
1270           MO.setIndex(CPE.CPI);
1271           break;
1272         }
1273       // Adjust the refcount of the clone...
1274       CPE.RefCount++;
1275       // ...and the original.  If we didn't remove the old entry, none of the
1276       // addresses changed, so we don't need another pass.
1277       return decrementCPEReferenceCount(CPI, CPEMI) ? 2 : 1;
1278     }
1279   }
1280   return 0;
1281 }
1282 
1283 /// getUnconditionalBrDisp - Returns the maximum displacement that can fit in
1284 /// the specific unconditional branch instruction.
1285 static inline unsigned getUnconditionalBrDisp(int Opc) {
1286   switch (Opc) {
1287   case ARM::tB:
1288     return ((1<<10)-1)*2;
1289   case ARM::t2B:
1290     return ((1<<23)-1)*2;
1291   default:
1292     break;
1293   }
1294 
1295   return ((1<<23)-1)*4;
1296 }
1297 
1298 /// findAvailableWater - Look for an existing entry in the WaterList in which
1299 /// we can place the CPE referenced from U so it's within range of U's MI.
1300 /// Returns true if found, false if not.  If it returns true, WaterIter
1301 /// is set to the WaterList entry.  For Thumb, prefer water that will not
1302 /// introduce padding to water that will.  To ensure that this pass
1303 /// terminates, the CPE location for a particular CPUser is only allowed to
1304 /// move to a lower address, so search backward from the end of the list and
1305 /// prefer the first water that is in range.
1306 bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
1307                                             water_iterator &WaterIter,
1308                                             bool CloserWater) {
1309   if (WaterList.empty())
1310     return false;
1311 
1312   unsigned BestGrowth = ~0u;
1313   // The nearest water without splitting the UserBB is right after it.
1314   // If the distance is still large (we have a big BB), then we need to split it
1315   // if we don't converge after certain iterations. This helps the following
1316   // situation to converge:
1317   //   BB0:
1318   //      Big BB
1319   //   BB1:
1320   //      Constant Pool
1321   // When a CP access is out of range, BB0 may be used as water. However,
1322   // inserting islands between BB0 and BB1 makes other accesses out of range.
1323   MachineBasicBlock *UserBB = U.MI->getParent();
1324   BBInfoVector &BBInfo = BBUtils->getBBInfo();
1325   const Align CPEAlign = getCPEAlign(U.CPEMI);
1326   unsigned MinNoSplitDisp =
1327       BBInfo[UserBB->getNumber()].postOffset(CPEAlign) - UserOffset;
1328   if (CloserWater && MinNoSplitDisp > U.getMaxDisp() / 2)
1329     return false;
1330   for (water_iterator IP = std::prev(WaterList.end()), B = WaterList.begin();;
1331        --IP) {
1332     MachineBasicBlock* WaterBB = *IP;
1333     // Check if water is in range and is either at a lower address than the
1334     // current "high water mark" or a new water block that was created since
1335     // the previous iteration by inserting an unconditional branch.  In the
1336     // latter case, we want to allow resetting the high water mark back to
1337     // this new water since we haven't seen it before.  Inserting branches
1338     // should be relatively uncommon and when it does happen, we want to be
1339     // sure to take advantage of it for all the CPEs near that block, so that
1340     // we don't insert more branches than necessary.
1341     // When CloserWater is true, we try to find the lowest address after (or
1342     // equal to) user MI's BB no matter of padding growth.
1343     unsigned Growth;
1344     if (isWaterInRange(UserOffset, WaterBB, U, Growth) &&
1345         (WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
1346          NewWaterList.count(WaterBB) || WaterBB == U.MI->getParent()) &&
1347         Growth < BestGrowth) {
1348       // This is the least amount of required padding seen so far.
1349       BestGrowth = Growth;
1350       WaterIter = IP;
1351       LLVM_DEBUG(dbgs() << "Found water after " << printMBBReference(*WaterBB)
1352                         << " Growth=" << Growth << '\n');
1353 
1354       if (CloserWater && WaterBB == U.MI->getParent())
1355         return true;
1356       // Keep looking unless it is perfect and we're not looking for the lowest
1357       // possible address.
1358       if (!CloserWater && BestGrowth == 0)
1359         return true;
1360     }
1361     if (IP == B)
1362       break;
1363   }
1364   return BestGrowth != ~0u;
1365 }
1366 
1367 /// createNewWater - No existing WaterList entry will work for
1368 /// CPUsers[CPUserIndex], so create a place to put the CPE.  The end of the
1369 /// block is used if in range, and the conditional branch munged so control
1370 /// flow is correct.  Otherwise the block is split to create a hole with an
1371 /// unconditional branch around it.  In either case NewMBB is set to a
1372 /// block following which the new island can be inserted (the WaterList
1373 /// is not adjusted).
1374 void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
1375                                         unsigned UserOffset,
1376                                         MachineBasicBlock *&NewMBB) {
1377   CPUser &U = CPUsers[CPUserIndex];
1378   MachineInstr *UserMI = U.MI;
1379   MachineInstr *CPEMI  = U.CPEMI;
1380   const Align CPEAlign = getCPEAlign(CPEMI);
1381   MachineBasicBlock *UserMBB = UserMI->getParent();
1382   BBInfoVector &BBInfo = BBUtils->getBBInfo();
1383   const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()];
1384 
1385   // If the block does not end in an unconditional branch already, and if the
1386   // end of the block is within range, make new water there.  (The addition
1387   // below is for the unconditional branch we will be adding: 4 bytes on ARM +
1388   // Thumb2, 2 on Thumb1.
1389   if (BBHasFallthrough(UserMBB)) {
1390     // Size of branch to insert.
1391     unsigned Delta = isThumb1 ? 2 : 4;
1392     // Compute the offset where the CPE will begin.
1393     unsigned CPEOffset = UserBBI.postOffset(CPEAlign) + Delta;
1394 
1395     if (isOffsetInRange(UserOffset, CPEOffset, U)) {
1396       LLVM_DEBUG(dbgs() << "Split at end of " << printMBBReference(*UserMBB)
1397                         << format(", expected CPE offset %#x\n", CPEOffset));
1398       NewMBB = &*++UserMBB->getIterator();
1399       // Add an unconditional branch from UserMBB to fallthrough block.  Record
1400       // it for branch lengthening; this new branch will not get out of range,
1401       // but if the preceding conditional branch is out of range, the targets
1402       // will be exchanged, and the altered branch may be out of range, so the
1403       // machinery has to know about it.
1404       int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
1405       if (!isThumb)
1406         BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
1407       else
1408         BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr))
1409             .addMBB(NewMBB)
1410             .add(predOps(ARMCC::AL));
1411       unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
1412       ImmBranches.push_back(ImmBranch(&UserMBB->back(),
1413                                       MaxDisp, false, UncondBr));
1414       BBUtils->computeBlockSize(UserMBB);
1415       BBUtils->adjustBBOffsetsAfter(UserMBB);
1416       return;
1417     }
1418   }
1419 
1420   // What a big block.  Find a place within the block to split it.  This is a
1421   // little tricky on Thumb1 since instructions are 2 bytes and constant pool
1422   // entries are 4 bytes: if instruction I references island CPE, and
1423   // instruction I+1 references CPE', it will not work well to put CPE as far
1424   // forward as possible, since then CPE' cannot immediately follow it (that
1425   // location is 2 bytes farther away from I+1 than CPE was from I) and we'd
1426   // need to create a new island.  So, we make a first guess, then walk through
1427   // the instructions between the one currently being looked at and the
1428   // possible insertion point, and make sure any other instructions that
1429   // reference CPEs will be able to use the same island area; if not, we back
1430   // up the insertion point.
1431 
1432   // Try to split the block so it's fully aligned.  Compute the latest split
1433   // point where we can add a 4-byte branch instruction, and then align to
1434   // Align which is the largest possible alignment in the function.
1435   const Align Align = MF->getAlignment();
1436   assert(Align >= CPEAlign && "Over-aligned constant pool entry");
1437   unsigned KnownBits = UserBBI.internalKnownBits();
1438   unsigned UPad = UnknownPadding(Align, KnownBits);
1439   unsigned BaseInsertOffset = UserOffset + U.getMaxDisp() - UPad;
1440   LLVM_DEBUG(dbgs() << format("Split in middle of big block before %#x",
1441                               BaseInsertOffset));
1442 
1443   // The 4 in the following is for the unconditional branch we'll be inserting
1444   // (allows for long branch on Thumb1).  Alignment of the island is handled
1445   // inside isOffsetInRange.
1446   BaseInsertOffset -= 4;
1447 
1448   LLVM_DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset)
1449                     << " la=" << Log2(Align) << " kb=" << KnownBits
1450                     << " up=" << UPad << '\n');
1451 
1452   // This could point off the end of the block if we've already got constant
1453   // pool entries following this block; only the last one is in the water list.
1454   // Back past any possible branches (allow for a conditional and a maximally
1455   // long unconditional).
1456   if (BaseInsertOffset + 8 >= UserBBI.postOffset()) {
1457     // Ensure BaseInsertOffset is larger than the offset of the instruction
1458     // following UserMI so that the loop which searches for the split point
1459     // iterates at least once.
1460     BaseInsertOffset =
1461         std::max(UserBBI.postOffset() - UPad - 8,
1462                  UserOffset + TII->getInstSizeInBytes(*UserMI) + 1);
1463     // If the CP is referenced(ie, UserOffset) is in first four instructions
1464     // after IT, this recalculated BaseInsertOffset could be in the middle of
1465     // an IT block. If it is, change the BaseInsertOffset to just after the
1466     // IT block. This still make the CP Entry is in range becuase of the
1467     // following reasons.
1468     //   1. The initial BaseseInsertOffset calculated is (UserOffset +
1469     //   U.getMaxDisp() - UPad).
1470     //   2. An IT block is only at most 4 instructions plus the "it" itself (18
1471     //   bytes).
1472     //   3. All the relevant instructions support much larger Maximum
1473     //   displacement.
1474     MachineBasicBlock::iterator I = UserMI;
1475     ++I;
1476     Register PredReg;
1477     for (unsigned Offset = UserOffset + TII->getInstSizeInBytes(*UserMI);
1478          I->getOpcode() != ARM::t2IT &&
1479          getITInstrPredicate(*I, PredReg) != ARMCC::AL;
1480          Offset += TII->getInstSizeInBytes(*I), I = std::next(I)) {
1481       BaseInsertOffset =
1482           std::max(BaseInsertOffset, Offset + TII->getInstSizeInBytes(*I) + 1);
1483       assert(I != UserMBB->end() && "Fell off end of block");
1484     }
1485     LLVM_DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset));
1486   }
1487   unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad +
1488     CPEMI->getOperand(2).getImm();
1489   MachineBasicBlock::iterator MI = UserMI;
1490   ++MI;
1491   unsigned CPUIndex = CPUserIndex+1;
1492   unsigned NumCPUsers = CPUsers.size();
1493   MachineInstr *LastIT = nullptr;
1494   for (unsigned Offset = UserOffset + TII->getInstSizeInBytes(*UserMI);
1495        Offset < BaseInsertOffset;
1496        Offset += TII->getInstSizeInBytes(*MI), MI = std::next(MI)) {
1497     assert(MI != UserMBB->end() && "Fell off end of block");
1498     if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == &*MI) {
1499       CPUser &U = CPUsers[CPUIndex];
1500       if (!isOffsetInRange(Offset, EndInsertOffset, U)) {
1501         // Shift intertion point by one unit of alignment so it is within reach.
1502         BaseInsertOffset -= Align.value();
1503         EndInsertOffset -= Align.value();
1504       }
1505       // This is overly conservative, as we don't account for CPEMIs being
1506       // reused within the block, but it doesn't matter much.  Also assume CPEs
1507       // are added in order with alignment padding.  We may eventually be able
1508       // to pack the aligned CPEs better.
1509       EndInsertOffset += U.CPEMI->getOperand(2).getImm();
1510       CPUIndex++;
1511     }
1512 
1513     // Remember the last IT instruction.
1514     if (MI->getOpcode() == ARM::t2IT)
1515       LastIT = &*MI;
1516   }
1517 
1518   --MI;
1519 
1520   // Avoid splitting an IT block.
1521   if (LastIT) {
1522     Register PredReg;
1523     ARMCC::CondCodes CC = getITInstrPredicate(*MI, PredReg);
1524     if (CC != ARMCC::AL)
1525       MI = LastIT;
1526   }
1527 
1528   // Avoid splitting a MOVW+MOVT pair with a relocation on Windows.
1529   // On Windows, this instruction pair is covered by one single
1530   // IMAGE_REL_ARM_MOV32T relocation which covers both instructions. If a
1531   // constant island is injected inbetween them, the relocation will clobber
1532   // the instruction and fail to update the MOVT instruction.
1533   // (These instructions are bundled up until right before the ConstantIslands
1534   // pass.)
1535   if (STI->isTargetWindows() && isThumb && MI->getOpcode() == ARM::t2MOVTi16 &&
1536       (MI->getOperand(2).getTargetFlags() & ARMII::MO_OPTION_MASK) ==
1537           ARMII::MO_HI16) {
1538     --MI;
1539     assert(MI->getOpcode() == ARM::t2MOVi16 &&
1540            (MI->getOperand(1).getTargetFlags() & ARMII::MO_OPTION_MASK) ==
1541                ARMII::MO_LO16);
1542   }
1543 
1544   // We really must not split an IT block.
1545 #ifndef NDEBUG
1546   Register PredReg;
1547   assert(!isThumb || getITInstrPredicate(*MI, PredReg) == ARMCC::AL);
1548 #endif
1549   NewMBB = splitBlockBeforeInstr(&*MI);
1550 }
1551 
1552 /// handleConstantPoolUser - Analyze the specified user, checking to see if it
1553 /// is out-of-range.  If so, pick up the constant pool value and move it some
1554 /// place in-range.  Return true if we changed any addresses (thus must run
1555 /// another pass of branch lengthening), false otherwise.
1556 bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex,
1557                                                 bool CloserWater) {
1558   CPUser &U = CPUsers[CPUserIndex];
1559   MachineInstr *UserMI = U.MI;
1560   MachineInstr *CPEMI  = U.CPEMI;
1561   unsigned CPI = getCombinedIndex(CPEMI);
1562   unsigned Size = CPEMI->getOperand(2).getImm();
1563   // Compute this only once, it's expensive.
1564   unsigned UserOffset = getUserOffset(U);
1565 
1566   // See if the current entry is within range, or there is a clone of it
1567   // in range.
1568   int result = findInRangeCPEntry(U, UserOffset);
1569   if (result==1) return false;
1570   else if (result==2) return true;
1571 
1572   // No existing clone of this CPE is within range.
1573   // We will be generating a new clone.  Get a UID for it.
1574   unsigned ID = AFI->createPICLabelUId();
1575 
1576   // Look for water where we can place this CPE.
1577   MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock();
1578   MachineBasicBlock *NewMBB;
1579   water_iterator IP;
1580   if (findAvailableWater(U, UserOffset, IP, CloserWater)) {
1581     LLVM_DEBUG(dbgs() << "Found water in range\n");
1582     MachineBasicBlock *WaterBB = *IP;
1583 
1584     // If the original WaterList entry was "new water" on this iteration,
1585     // propagate that to the new island.  This is just keeping NewWaterList
1586     // updated to match the WaterList, which will be updated below.
1587     if (NewWaterList.erase(WaterBB))
1588       NewWaterList.insert(NewIsland);
1589 
1590     // The new CPE goes before the following block (NewMBB).
1591     NewMBB = &*++WaterBB->getIterator();
1592   } else {
1593     // No water found.
1594     LLVM_DEBUG(dbgs() << "No water found\n");
1595     createNewWater(CPUserIndex, UserOffset, NewMBB);
1596 
1597     // splitBlockBeforeInstr adds to WaterList, which is important when it is
1598     // called while handling branches so that the water will be seen on the
1599     // next iteration for constant pools, but in this context, we don't want
1600     // it.  Check for this so it will be removed from the WaterList.
1601     // Also remove any entry from NewWaterList.
1602     MachineBasicBlock *WaterBB = &*--NewMBB->getIterator();
1603     IP = find(WaterList, WaterBB);
1604     if (IP != WaterList.end())
1605       NewWaterList.erase(WaterBB);
1606 
1607     // We are adding new water.  Update NewWaterList.
1608     NewWaterList.insert(NewIsland);
1609   }
1610   // Always align the new block because CP entries can be smaller than 4
1611   // bytes. Be careful not to decrease the existing alignment, e.g. NewMBB may
1612   // be an already aligned constant pool block.
1613   const Align Alignment = isThumb ? Align(2) : Align(4);
1614   if (NewMBB->getAlignment() < Alignment)
1615     NewMBB->setAlignment(Alignment);
1616 
1617   // Remove the original WaterList entry; we want subsequent insertions in
1618   // this vicinity to go after the one we're about to insert.  This
1619   // considerably reduces the number of times we have to move the same CPE
1620   // more than once and is also important to ensure the algorithm terminates.
1621   if (IP != WaterList.end())
1622     WaterList.erase(IP);
1623 
1624   // Okay, we know we can put an island before NewMBB now, do it!
1625   MF->insert(NewMBB->getIterator(), NewIsland);
1626 
1627   // Update internal data structures to account for the newly inserted MBB.
1628   updateForInsertedWaterBlock(NewIsland);
1629 
1630   // Now that we have an island to add the CPE to, clone the original CPE and
1631   // add it to the island.
1632   U.HighWaterMark = NewIsland;
1633   U.CPEMI = BuildMI(NewIsland, DebugLoc(), CPEMI->getDesc())
1634                 .addImm(ID)
1635                 .add(CPEMI->getOperand(1))
1636                 .addImm(Size);
1637   CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
1638   ++NumCPEs;
1639 
1640   // Decrement the old entry, and remove it if refcount becomes 0.
1641   decrementCPEReferenceCount(CPI, CPEMI);
1642 
1643   // Mark the basic block as aligned as required by the const-pool entry.
1644   NewIsland->setAlignment(getCPEAlign(U.CPEMI));
1645 
1646   // Increase the size of the island block to account for the new entry.
1647   BBUtils->adjustBBSize(NewIsland, Size);
1648   BBUtils->adjustBBOffsetsAfter(&*--NewIsland->getIterator());
1649 
1650   // Finally, change the CPI in the instruction operand to be ID.
1651   for (MachineOperand &MO : UserMI->operands())
1652     if (MO.isCPI()) {
1653       MO.setIndex(ID);
1654       break;
1655     }
1656 
1657   LLVM_DEBUG(
1658       dbgs() << "  Moved CPE to #" << ID << " CPI=" << CPI
1659              << format(" offset=%#x\n",
1660                        BBUtils->getBBInfo()[NewIsland->getNumber()].Offset));
1661 
1662   return true;
1663 }
1664 
1665 /// removeDeadCPEMI - Remove a dead constant pool entry instruction. Update
1666 /// sizes and offsets of impacted basic blocks.
1667 void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
1668   MachineBasicBlock *CPEBB = CPEMI->getParent();
1669   unsigned Size = CPEMI->getOperand(2).getImm();
1670   CPEMI->eraseFromParent();
1671   BBInfoVector &BBInfo = BBUtils->getBBInfo();
1672   BBUtils->adjustBBSize(CPEBB, -Size);
1673   // All succeeding offsets have the current size value added in, fix this.
1674   if (CPEBB->empty()) {
1675     BBInfo[CPEBB->getNumber()].Size = 0;
1676 
1677     // This block no longer needs to be aligned.
1678     CPEBB->setAlignment(Align(1));
1679   } else {
1680     // Entries are sorted by descending alignment, so realign from the front.
1681     CPEBB->setAlignment(getCPEAlign(&*CPEBB->begin()));
1682   }
1683 
1684   BBUtils->adjustBBOffsetsAfter(CPEBB);
1685   // An island has only one predecessor BB and one successor BB. Check if
1686   // this BB's predecessor jumps directly to this BB's successor. This
1687   // shouldn't happen currently.
1688   assert(!BBIsJumpedOver(CPEBB) && "How did this happen?");
1689   // FIXME: remove the empty blocks after all the work is done?
1690 }
1691 
1692 /// removeUnusedCPEntries - Remove constant pool entries whose refcounts
1693 /// are zero.
1694 bool ARMConstantIslands::removeUnusedCPEntries() {
1695   unsigned MadeChange = false;
1696   for (std::vector<CPEntry> &CPEs : CPEntries) {
1697     for (CPEntry &CPE : CPEs) {
1698       if (CPE.RefCount == 0 && CPE.CPEMI) {
1699         removeDeadCPEMI(CPE.CPEMI);
1700         CPE.CPEMI = nullptr;
1701         MadeChange = true;
1702       }
1703     }
1704   }
1705   return MadeChange;
1706 }
1707 
1708 
1709 /// fixupImmediateBr - Fix up an immediate branch whose destination is too far
1710 /// away to fit in its displacement field.
1711 bool ARMConstantIslands::fixupImmediateBr(ImmBranch &Br) {
1712   MachineInstr *MI = Br.MI;
1713   MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
1714 
1715   // Check to see if the DestBB is already in-range.
1716   if (BBUtils->isBBInRange(MI, DestBB, Br.MaxDisp))
1717     return false;
1718 
1719   if (!Br.isCond)
1720     return fixupUnconditionalBr(Br);
1721   return fixupConditionalBr(Br);
1722 }
1723 
1724 /// fixupUnconditionalBr - Fix up an unconditional branch whose destination is
1725 /// too far away to fit in its displacement field. If the LR register has been
1726 /// spilled in the epilogue, then we can use BL to implement a far jump.
1727 /// Otherwise, add an intermediate branch instruction to a branch.
1728 bool
1729 ARMConstantIslands::fixupUnconditionalBr(ImmBranch &Br) {
1730   MachineInstr *MI = Br.MI;
1731   MachineBasicBlock *MBB = MI->getParent();
1732   if (!isThumb1)
1733     llvm_unreachable("fixupUnconditionalBr is Thumb1 only!");
1734 
1735   if (!AFI->isLRSpilled())
1736     report_fatal_error("underestimated function size");
1737 
1738   // Use BL to implement far jump.
1739   Br.MaxDisp = (1 << 21) * 2;
1740   MI->setDesc(TII->get(ARM::tBfar));
1741   BBInfoVector &BBInfo = BBUtils->getBBInfo();
1742   BBInfo[MBB->getNumber()].Size += 2;
1743   BBUtils->adjustBBOffsetsAfter(MBB);
1744   ++NumUBrFixed;
1745 
1746   LLVM_DEBUG(dbgs() << "  Changed B to long jump " << *MI);
1747 
1748   return true;
1749 }
1750 
1751 /// fixupConditionalBr - Fix up a conditional branch whose destination is too
1752 /// far away to fit in its displacement field. It is converted to an inverse
1753 /// conditional branch + an unconditional branch to the destination.
1754 bool
1755 ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
1756   MachineInstr *MI = Br.MI;
1757   MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
1758 
1759   // Add an unconditional branch to the destination and invert the branch
1760   // condition to jump over it:
1761   // blt L1
1762   // =>
1763   // bge L2
1764   // b   L1
1765   // L2:
1766   ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(1).getImm();
1767   CC = ARMCC::getOppositeCondition(CC);
1768   Register CCReg = MI->getOperand(2).getReg();
1769 
1770   // If the branch is at the end of its MBB and that has a fall-through block,
1771   // direct the updated conditional branch to the fall-through block. Otherwise,
1772   // split the MBB before the next instruction.
1773   MachineBasicBlock *MBB = MI->getParent();
1774   MachineInstr *BMI = &MBB->back();
1775   bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
1776 
1777   ++NumCBrFixed;
1778   if (BMI != MI) {
1779     if (std::next(MachineBasicBlock::iterator(MI)) == std::prev(MBB->end()) &&
1780         BMI->getOpcode() == Br.UncondBr) {
1781       // Last MI in the BB is an unconditional branch. Can we simply invert the
1782       // condition and swap destinations:
1783       // beq L1
1784       // b   L2
1785       // =>
1786       // bne L2
1787       // b   L1
1788       MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
1789       if (BBUtils->isBBInRange(MI, NewDest, Br.MaxDisp)) {
1790         LLVM_DEBUG(
1791             dbgs() << "  Invert Bcc condition and swap its destination with "
1792                    << *BMI);
1793         BMI->getOperand(0).setMBB(DestBB);
1794         MI->getOperand(0).setMBB(NewDest);
1795         MI->getOperand(1).setImm(CC);
1796         return true;
1797       }
1798     }
1799   }
1800 
1801   if (NeedSplit) {
1802     splitBlockBeforeInstr(MI);
1803     // No need for the branch to the next block. We're adding an unconditional
1804     // branch to the destination.
1805     int delta = TII->getInstSizeInBytes(MBB->back());
1806     BBUtils->adjustBBSize(MBB, -delta);
1807     MBB->back().eraseFromParent();
1808 
1809     // The conditional successor will be swapped between the BBs after this, so
1810     // update CFG.
1811     MBB->addSuccessor(DestBB);
1812     std::next(MBB->getIterator())->removeSuccessor(DestBB);
1813 
1814     // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
1815   }
1816   MachineBasicBlock *NextBB = &*++MBB->getIterator();
1817 
1818   LLVM_DEBUG(dbgs() << "  Insert B to " << printMBBReference(*DestBB)
1819                     << " also invert condition and change dest. to "
1820                     << printMBBReference(*NextBB) << "\n");
1821 
1822   // Insert a new conditional branch and a new unconditional branch.
1823   // Also update the ImmBranch as well as adding a new entry for the new branch.
1824   BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode()))
1825     .addMBB(NextBB).addImm(CC).addReg(CCReg);
1826   Br.MI = &MBB->back();
1827   BBUtils->adjustBBSize(MBB, TII->getInstSizeInBytes(MBB->back()));
1828   if (isThumb)
1829     BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr))
1830         .addMBB(DestBB)
1831         .add(predOps(ARMCC::AL));
1832   else
1833     BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
1834   BBUtils->adjustBBSize(MBB, TII->getInstSizeInBytes(MBB->back()));
1835   unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
1836   ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
1837 
1838   // Remove the old conditional branch.  It may or may not still be in MBB.
1839   BBUtils->adjustBBSize(MI->getParent(), -TII->getInstSizeInBytes(*MI));
1840   MI->eraseFromParent();
1841   BBUtils->adjustBBOffsetsAfter(MBB);
1842   return true;
1843 }
1844 
1845 bool ARMConstantIslands::optimizeThumb2Instructions() {
1846   bool MadeChange = false;
1847 
1848   // Shrink ADR and LDR from constantpool.
1849   for (CPUser &U : CPUsers) {
1850     unsigned Opcode = U.MI->getOpcode();
1851     unsigned NewOpc = 0;
1852     unsigned Scale = 1;
1853     unsigned Bits = 0;
1854     switch (Opcode) {
1855     default: break;
1856     case ARM::t2LEApcrel:
1857       if (isARMLowRegister(U.MI->getOperand(0).getReg())) {
1858         NewOpc = ARM::tLEApcrel;
1859         Bits = 8;
1860         Scale = 4;
1861       }
1862       break;
1863     case ARM::t2LDRpci:
1864       if (isARMLowRegister(U.MI->getOperand(0).getReg())) {
1865         NewOpc = ARM::tLDRpci;
1866         Bits = 8;
1867         Scale = 4;
1868       }
1869       break;
1870     }
1871 
1872     if (!NewOpc)
1873       continue;
1874 
1875     unsigned UserOffset = getUserOffset(U);
1876     unsigned MaxOffs = ((1 << Bits) - 1) * Scale;
1877 
1878     // Be conservative with inline asm.
1879     if (!U.KnownAlignment)
1880       MaxOffs -= 2;
1881 
1882     // FIXME: Check if offset is multiple of scale if scale is not 4.
1883     if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) {
1884       LLVM_DEBUG(dbgs() << "Shrink: " << *U.MI);
1885       U.MI->setDesc(TII->get(NewOpc));
1886       MachineBasicBlock *MBB = U.MI->getParent();
1887       BBUtils->adjustBBSize(MBB, -2);
1888       BBUtils->adjustBBOffsetsAfter(MBB);
1889       ++NumT2CPShrunk;
1890       MadeChange = true;
1891     }
1892   }
1893 
1894   return MadeChange;
1895 }
1896 
1897 
1898 bool ARMConstantIslands::optimizeThumb2Branches() {
1899 
1900   auto TryShrinkBranch = [this](ImmBranch &Br) {
1901     unsigned Opcode = Br.MI->getOpcode();
1902     unsigned NewOpc = 0;
1903     unsigned Scale = 1;
1904     unsigned Bits = 0;
1905     switch (Opcode) {
1906     default: break;
1907     case ARM::t2B:
1908       NewOpc = ARM::tB;
1909       Bits = 11;
1910       Scale = 2;
1911       break;
1912     case ARM::t2Bcc:
1913       NewOpc = ARM::tBcc;
1914       Bits = 8;
1915       Scale = 2;
1916       break;
1917     }
1918     if (NewOpc) {
1919       unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
1920       MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
1921       if (BBUtils->isBBInRange(Br.MI, DestBB, MaxOffs)) {
1922         LLVM_DEBUG(dbgs() << "Shrink branch: " << *Br.MI);
1923         Br.MI->setDesc(TII->get(NewOpc));
1924         MachineBasicBlock *MBB = Br.MI->getParent();
1925         BBUtils->adjustBBSize(MBB, -2);
1926         BBUtils->adjustBBOffsetsAfter(MBB);
1927         ++NumT2BrShrunk;
1928         return true;
1929       }
1930     }
1931     return false;
1932   };
1933 
1934   struct ImmCompare {
1935     MachineInstr* MI = nullptr;
1936     unsigned NewOpc = 0;
1937   };
1938 
1939   auto FindCmpForCBZ = [this](ImmBranch &Br, ImmCompare &ImmCmp,
1940                               MachineBasicBlock *DestBB) {
1941     ImmCmp.MI = nullptr;
1942     ImmCmp.NewOpc = 0;
1943 
1944     // If the conditional branch doesn't kill CPSR, then CPSR can be liveout
1945     // so this transformation is not safe.
1946     if (!Br.MI->killsRegister(ARM::CPSR, /*TRI=*/nullptr))
1947       return false;
1948 
1949     Register PredReg;
1950     unsigned NewOpc = 0;
1951     ARMCC::CondCodes Pred = getInstrPredicate(*Br.MI, PredReg);
1952     if (Pred == ARMCC::EQ)
1953       NewOpc = ARM::tCBZ;
1954     else if (Pred == ARMCC::NE)
1955       NewOpc = ARM::tCBNZ;
1956     else
1957       return false;
1958 
1959     // Check if the distance is within 126. Subtract starting offset by 2
1960     // because the cmp will be eliminated.
1961     unsigned BrOffset = BBUtils->getOffsetOf(Br.MI) + 4 - 2;
1962     BBInfoVector &BBInfo = BBUtils->getBBInfo();
1963     unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
1964     if (BrOffset >= DestOffset || (DestOffset - BrOffset) > 126)
1965       return false;
1966 
1967     // Search backwards to find a tCMPi8
1968     auto *TRI = STI->getRegisterInfo();
1969     MachineInstr *CmpMI = findCMPToFoldIntoCBZ(Br.MI, TRI);
1970     if (!CmpMI || CmpMI->getOpcode() != ARM::tCMPi8)
1971       return false;
1972 
1973     ImmCmp.MI = CmpMI;
1974     ImmCmp.NewOpc = NewOpc;
1975     return true;
1976   };
1977 
1978   auto TryConvertToLE = [this](ImmBranch &Br, ImmCompare &Cmp) {
1979     if (Br.MI->getOpcode() != ARM::t2Bcc || !STI->hasLOB() ||
1980         STI->hasMinSize())
1981       return false;
1982 
1983     MachineBasicBlock *MBB = Br.MI->getParent();
1984     MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
1985     if (BBUtils->getOffsetOf(MBB) < BBUtils->getOffsetOf(DestBB) ||
1986         !BBUtils->isBBInRange(Br.MI, DestBB, 4094))
1987       return false;
1988 
1989     if (!DT->dominates(DestBB, MBB))
1990       return false;
1991 
1992     // We queried for the CBN?Z opcode based upon the 'ExitBB', the opposite
1993     // target of Br. So now we need to reverse the condition.
1994     Cmp.NewOpc = Cmp.NewOpc == ARM::tCBZ ? ARM::tCBNZ : ARM::tCBZ;
1995 
1996     MachineInstrBuilder MIB = BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(),
1997                                       TII->get(ARM::t2LE));
1998     // Swapped a t2Bcc for a t2LE, so no need to update the size of the block.
1999     MIB.add(Br.MI->getOperand(0));
2000     Br.MI->eraseFromParent();
2001     Br.MI = MIB;
2002     ++NumLEInserted;
2003     return true;
2004   };
2005 
2006   bool MadeChange = false;
2007 
2008   // The order in which branches appear in ImmBranches is approximately their
2009   // order within the function body. By visiting later branches first, we reduce
2010   // the distance between earlier forward branches and their targets, making it
2011   // more likely that the cbn?z optimization, which can only apply to forward
2012   // branches, will succeed.
2013   for (ImmBranch &Br : reverse(ImmBranches)) {
2014     MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
2015     MachineBasicBlock *MBB = Br.MI->getParent();
2016     MachineBasicBlock *ExitBB = &MBB->back() == Br.MI ?
2017       MBB->getFallThrough() :
2018       MBB->back().getOperand(0).getMBB();
2019 
2020     ImmCompare Cmp;
2021     if (FindCmpForCBZ(Br, Cmp, ExitBB) && TryConvertToLE(Br, Cmp)) {
2022       DestBB = ExitBB;
2023       MadeChange = true;
2024     } else {
2025       FindCmpForCBZ(Br, Cmp, DestBB);
2026       MadeChange |= TryShrinkBranch(Br);
2027     }
2028 
2029     unsigned Opcode = Br.MI->getOpcode();
2030     if ((Opcode != ARM::tBcc && Opcode != ARM::t2LE) || !Cmp.NewOpc)
2031       continue;
2032 
2033     Register Reg = Cmp.MI->getOperand(0).getReg();
2034 
2035     // Check for Kill flags on Reg. If they are present remove them and set kill
2036     // on the new CBZ.
2037     auto *TRI = STI->getRegisterInfo();
2038     MachineBasicBlock::iterator KillMI = Br.MI;
2039     bool RegKilled = false;
2040     do {
2041       --KillMI;
2042       if (KillMI->killsRegister(Reg, TRI)) {
2043         KillMI->clearRegisterKills(Reg, TRI);
2044         RegKilled = true;
2045         break;
2046       }
2047     } while (KillMI != Cmp.MI);
2048 
2049     // Create the new CBZ/CBNZ
2050     LLVM_DEBUG(dbgs() << "Fold: " << *Cmp.MI << " and: " << *Br.MI);
2051     MachineInstr *NewBR =
2052         BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(), TII->get(Cmp.NewOpc))
2053             .addReg(Reg, getKillRegState(RegKilled) |
2054                              getRegState(Cmp.MI->getOperand(0)))
2055             .addMBB(DestBB, Br.MI->getOperand(0).getTargetFlags());
2056 
2057     Cmp.MI->eraseFromParent();
2058 
2059     if (Br.MI->getOpcode() == ARM::tBcc) {
2060       Br.MI->eraseFromParent();
2061       Br.MI = NewBR;
2062       BBUtils->adjustBBSize(MBB, -2);
2063     } else if (MBB->back().getOpcode() != ARM::t2LE) {
2064       // An LE has been generated, but it's not the terminator - that is an
2065       // unconditional branch. However, the logic has now been reversed with the
2066       // CBN?Z being the conditional branch and the LE being the unconditional
2067       // branch. So this means we can remove the redundant unconditional branch
2068       // at the end of the block.
2069       MachineInstr *LastMI = &MBB->back();
2070       BBUtils->adjustBBSize(MBB, -LastMI->getDesc().getSize());
2071       LastMI->eraseFromParent();
2072     }
2073     BBUtils->adjustBBOffsetsAfter(MBB);
2074     ++NumCBZ;
2075     MadeChange = true;
2076   }
2077 
2078   return MadeChange;
2079 }
2080 
2081 static bool isSimpleIndexCalc(MachineInstr &I, unsigned EntryReg,
2082                               unsigned BaseReg) {
2083   if (I.getOpcode() != ARM::t2ADDrs)
2084     return false;
2085 
2086   if (I.getOperand(0).getReg() != EntryReg)
2087     return false;
2088 
2089   if (I.getOperand(1).getReg() != BaseReg)
2090     return false;
2091 
2092   // FIXME: what about CC and IdxReg?
2093   return true;
2094 }
2095 
2096 /// While trying to form a TBB/TBH instruction, we may (if the table
2097 /// doesn't immediately follow the BR_JT) need access to the start of the
2098 /// jump-table. We know one instruction that produces such a register; this
2099 /// function works out whether that definition can be preserved to the BR_JT,
2100 /// possibly by removing an intervening addition (which is usually needed to
2101 /// calculate the actual entry to jump to).
2102 bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI,
2103                                               MachineInstr *LEAMI,
2104                                               unsigned &DeadSize,
2105                                               bool &CanDeleteLEA,
2106                                               bool &BaseRegKill) {
2107   if (JumpMI->getParent() != LEAMI->getParent())
2108     return false;
2109 
2110   // Now we hope that we have at least these instructions in the basic block:
2111   //     BaseReg = t2LEA ...
2112   //     [...]
2113   //     EntryReg = t2ADDrs BaseReg, ...
2114   //     [...]
2115   //     t2BR_JT EntryReg
2116   //
2117   // We have to be very conservative about what we recognise here though. The
2118   // main perturbing factors to watch out for are:
2119   //    + Spills at any point in the chain: not direct problems but we would
2120   //      expect a blocking Def of the spilled register so in practice what we
2121   //      can do is limited.
2122   //    + EntryReg == BaseReg: this is the one situation we should allow a Def
2123   //      of BaseReg, but only if the t2ADDrs can be removed.
2124   //    + Some instruction other than t2ADDrs computing the entry. Not seen in
2125   //      the wild, but we should be careful.
2126   Register EntryReg = JumpMI->getOperand(0).getReg();
2127   Register BaseReg = LEAMI->getOperand(0).getReg();
2128 
2129   CanDeleteLEA = true;
2130   BaseRegKill = false;
2131   MachineInstr *RemovableAdd = nullptr;
2132   MachineBasicBlock::iterator I(LEAMI);
2133   for (++I; &*I != JumpMI; ++I) {
2134     if (isSimpleIndexCalc(*I, EntryReg, BaseReg)) {
2135       RemovableAdd = &*I;
2136       break;
2137     }
2138 
2139     for (const MachineOperand &MO : I->operands()) {
2140       if (!MO.isReg() || !MO.getReg())
2141         continue;
2142       if (MO.isDef() && MO.getReg() == BaseReg)
2143         return false;
2144       if (MO.isUse() && MO.getReg() == BaseReg) {
2145         BaseRegKill = BaseRegKill || MO.isKill();
2146         CanDeleteLEA = false;
2147       }
2148     }
2149   }
2150 
2151   if (!RemovableAdd)
2152     return true;
2153 
2154   // Check the add really is removable, and that nothing else in the block
2155   // clobbers BaseReg.
2156   for (++I; &*I != JumpMI; ++I) {
2157     for (const MachineOperand &MO : I->operands()) {
2158       if (!MO.isReg() || !MO.getReg())
2159         continue;
2160       if (MO.isDef() && MO.getReg() == BaseReg)
2161         return false;
2162       if (MO.isUse() && MO.getReg() == EntryReg)
2163         RemovableAdd = nullptr;
2164     }
2165   }
2166 
2167   if (RemovableAdd) {
2168     RemovableAdd->eraseFromParent();
2169     DeadSize += isThumb2 ? 4 : 2;
2170   } else if (BaseReg == EntryReg) {
2171     // The add wasn't removable, but clobbered the base for the TBB. So we can't
2172     // preserve it.
2173     return false;
2174   }
2175 
2176   // We reached the end of the block without seeing another definition of
2177   // BaseReg (except, possibly the t2ADDrs, which was removed). BaseReg can be
2178   // used in the TBB/TBH if necessary.
2179   return true;
2180 }
2181 
2182 /// Returns whether CPEMI is the first instruction in the block
2183 /// immediately following JTMI (assumed to be a TBB or TBH terminator). If so,
2184 /// we can switch the first register to PC and usually remove the address
2185 /// calculation that preceded it.
2186 static bool jumpTableFollowsTB(MachineInstr *JTMI, MachineInstr *CPEMI) {
2187   MachineFunction::iterator MBB = JTMI->getParent()->getIterator();
2188   MachineFunction *MF = MBB->getParent();
2189   ++MBB;
2190 
2191   return MBB != MF->end() && !MBB->empty() && &*MBB->begin() == CPEMI;
2192 }
2193 
2194 static void RemoveDeadAddBetweenLEAAndJT(MachineInstr *LEAMI,
2195                                          MachineInstr *JumpMI,
2196                                          unsigned &DeadSize) {
2197   // Remove a dead add between the LEA and JT, which used to compute EntryReg,
2198   // but the JT now uses PC. Finds the last ADD (if any) that def's EntryReg
2199   // and is not clobbered / used.
2200   MachineInstr *RemovableAdd = nullptr;
2201   Register EntryReg = JumpMI->getOperand(0).getReg();
2202 
2203   // Find the last ADD to set EntryReg
2204   MachineBasicBlock::iterator I(LEAMI);
2205   for (++I; &*I != JumpMI; ++I) {
2206     if (I->getOpcode() == ARM::t2ADDrs && I->getOperand(0).getReg() == EntryReg)
2207       RemovableAdd = &*I;
2208   }
2209 
2210   if (!RemovableAdd)
2211     return;
2212 
2213   // Ensure EntryReg is not clobbered or used.
2214   MachineBasicBlock::iterator J(RemovableAdd);
2215   for (++J; &*J != JumpMI; ++J) {
2216     for (const MachineOperand &MO : J->operands()) {
2217       if (!MO.isReg() || !MO.getReg())
2218         continue;
2219       if (MO.isDef() && MO.getReg() == EntryReg)
2220         return;
2221       if (MO.isUse() && MO.getReg() == EntryReg)
2222         return;
2223     }
2224   }
2225 
2226   LLVM_DEBUG(dbgs() << "Removing Dead Add: " << *RemovableAdd);
2227   RemovableAdd->eraseFromParent();
2228   DeadSize += 4;
2229 }
2230 
2231 /// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
2232 /// jumptables when it's possible.
2233 bool ARMConstantIslands::optimizeThumb2JumpTables() {
2234   bool MadeChange = false;
2235 
2236   // FIXME: After the tables are shrunk, can we get rid some of the
2237   // constantpool tables?
2238   MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
2239   if (!MJTI) return false;
2240 
2241   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
2242   for (MachineInstr *MI : T2JumpTables) {
2243     const MCInstrDesc &MCID = MI->getDesc();
2244     unsigned NumOps = MCID.getNumOperands();
2245     unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 2 : 1);
2246     MachineOperand JTOP = MI->getOperand(JTOpIdx);
2247     unsigned JTI = JTOP.getIndex();
2248     assert(JTI < JT.size());
2249 
2250     bool ByteOk = true;
2251     bool HalfWordOk = true;
2252     unsigned JTOffset = BBUtils->getOffsetOf(MI) + 4;
2253     const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
2254     BBInfoVector &BBInfo = BBUtils->getBBInfo();
2255     for (MachineBasicBlock *MBB : JTBBs) {
2256       unsigned DstOffset = BBInfo[MBB->getNumber()].Offset;
2257       // Negative offset is not ok. FIXME: We should change BB layout to make
2258       // sure all the branches are forward.
2259       if (ByteOk && (DstOffset - JTOffset) > ((1<<8)-1)*2)
2260         ByteOk = false;
2261       unsigned TBHLimit = ((1<<16)-1)*2;
2262       if (HalfWordOk && (DstOffset - JTOffset) > TBHLimit)
2263         HalfWordOk = false;
2264       if (!ByteOk && !HalfWordOk)
2265         break;
2266     }
2267 
2268     if (!ByteOk && !HalfWordOk)
2269       continue;
2270 
2271     CPUser &User = CPUsers[JumpTableUserIndices[JTI]];
2272     MachineBasicBlock *MBB = MI->getParent();
2273     if (!MI->getOperand(0).isKill()) // FIXME: needed now?
2274       continue;
2275 
2276     unsigned DeadSize = 0;
2277     bool CanDeleteLEA = false;
2278     bool BaseRegKill = false;
2279 
2280     unsigned IdxReg = ~0U;
2281     bool IdxRegKill = true;
2282     if (isThumb2) {
2283       IdxReg = MI->getOperand(1).getReg();
2284       IdxRegKill = MI->getOperand(1).isKill();
2285 
2286       bool PreservedBaseReg =
2287         preserveBaseRegister(MI, User.MI, DeadSize, CanDeleteLEA, BaseRegKill);
2288       if (!jumpTableFollowsTB(MI, User.CPEMI) && !PreservedBaseReg)
2289         continue;
2290     } else {
2291       // We're in thumb-1 mode, so we must have something like:
2292       //   %idx = tLSLri %idx, 2
2293       //   %base = tLEApcrelJT
2294       //   %t = tLDRr %base, %idx
2295       Register BaseReg = User.MI->getOperand(0).getReg();
2296 
2297       MachineBasicBlock *UserMBB = User.MI->getParent();
2298       MachineBasicBlock::iterator Shift = User.MI->getIterator();
2299       if (Shift == UserMBB->begin())
2300         continue;
2301 
2302       Shift = prev_nodbg(Shift, UserMBB->begin());
2303       if (Shift->getOpcode() != ARM::tLSLri ||
2304           Shift->getOperand(3).getImm() != 2 ||
2305           !Shift->getOperand(2).isKill())
2306         continue;
2307       IdxReg = Shift->getOperand(2).getReg();
2308       Register ShiftedIdxReg = Shift->getOperand(0).getReg();
2309 
2310       // It's important that IdxReg is live until the actual TBB/TBH. Most of
2311       // the range is checked later, but the LEA might still clobber it and not
2312       // actually get removed.
2313       if (BaseReg == IdxReg && !jumpTableFollowsTB(MI, User.CPEMI))
2314         continue;
2315 
2316       MachineInstr *Load = User.MI->getNextNode();
2317       if (Load->getOpcode() != ARM::tLDRr)
2318         continue;
2319       if (Load->getOperand(1).getReg() != BaseReg ||
2320           Load->getOperand(2).getReg() != ShiftedIdxReg ||
2321           !Load->getOperand(2).isKill())
2322         continue;
2323 
2324       // If we're in PIC mode, there should be another ADD following.
2325       auto *TRI = STI->getRegisterInfo();
2326 
2327       // %base cannot be redefined after the load as it will appear before
2328       // TBB/TBH like:
2329       //      %base =
2330       //      %base =
2331       //      tBB %base, %idx
2332       if (registerDefinedBetween(BaseReg, Load->getNextNode(), MBB->end(), TRI))
2333         continue;
2334 
2335       if (isPositionIndependentOrROPI) {
2336         MachineInstr *Add = Load->getNextNode();
2337         if (Add->getOpcode() != ARM::tADDrr ||
2338             Add->getOperand(2).getReg() != BaseReg ||
2339             Add->getOperand(3).getReg() != Load->getOperand(0).getReg() ||
2340             !Add->getOperand(3).isKill())
2341           continue;
2342         if (Add->getOperand(0).getReg() != MI->getOperand(0).getReg())
2343           continue;
2344         if (registerDefinedBetween(IdxReg, Add->getNextNode(), MI, TRI))
2345           // IdxReg gets redefined in the middle of the sequence.
2346           continue;
2347         Add->eraseFromParent();
2348         DeadSize += 2;
2349       } else {
2350         if (Load->getOperand(0).getReg() != MI->getOperand(0).getReg())
2351           continue;
2352         if (registerDefinedBetween(IdxReg, Load->getNextNode(), MI, TRI))
2353           // IdxReg gets redefined in the middle of the sequence.
2354           continue;
2355       }
2356 
2357       // Now safe to delete the load and lsl. The LEA will be removed later.
2358       CanDeleteLEA = true;
2359       Shift->eraseFromParent();
2360       Load->eraseFromParent();
2361       DeadSize += 4;
2362     }
2363 
2364     LLVM_DEBUG(dbgs() << "Shrink JT: " << *MI);
2365     MachineInstr *CPEMI = User.CPEMI;
2366     unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
2367     if (!isThumb2)
2368       Opc = ByteOk ? ARM::tTBB_JT : ARM::tTBH_JT;
2369 
2370     MachineBasicBlock::iterator MI_JT = MI;
2371     MachineInstr *NewJTMI =
2372         BuildMI(*MBB, MI_JT, MI->getDebugLoc(), TII->get(Opc))
2373             .addReg(User.MI->getOperand(0).getReg(),
2374                     getKillRegState(BaseRegKill))
2375             .addReg(IdxReg, getKillRegState(IdxRegKill))
2376             .addJumpTableIndex(JTI, JTOP.getTargetFlags())
2377             .addImm(CPEMI->getOperand(0).getImm());
2378     LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << ": " << *NewJTMI);
2379 
2380     unsigned JTOpc = ByteOk ? ARM::JUMPTABLE_TBB : ARM::JUMPTABLE_TBH;
2381     CPEMI->setDesc(TII->get(JTOpc));
2382 
2383     if (jumpTableFollowsTB(MI, User.CPEMI)) {
2384       NewJTMI->getOperand(0).setReg(ARM::PC);
2385       NewJTMI->getOperand(0).setIsKill(false);
2386 
2387       if (CanDeleteLEA) {
2388         if (isThumb2)
2389           RemoveDeadAddBetweenLEAAndJT(User.MI, MI, DeadSize);
2390 
2391         User.MI->eraseFromParent();
2392         DeadSize += isThumb2 ? 4 : 2;
2393 
2394         // The LEA was eliminated, the TBB instruction becomes the only new user
2395         // of the jump table.
2396         User.MI = NewJTMI;
2397         User.MaxDisp = 4;
2398         User.NegOk = false;
2399         User.IsSoImm = false;
2400         User.KnownAlignment = false;
2401       } else {
2402         // The LEA couldn't be eliminated, so we must add another CPUser to
2403         // record the TBB or TBH use.
2404         int CPEntryIdx = JumpTableEntryIndices[JTI];
2405         auto &CPEs = CPEntries[CPEntryIdx];
2406         auto Entry =
2407             find_if(CPEs, [&](CPEntry &E) { return E.CPEMI == User.CPEMI; });
2408         ++Entry->RefCount;
2409         CPUsers.emplace_back(CPUser(NewJTMI, User.CPEMI, 4, false, false));
2410       }
2411     }
2412 
2413     unsigned NewSize = TII->getInstSizeInBytes(*NewJTMI);
2414     unsigned OrigSize = TII->getInstSizeInBytes(*MI);
2415     MI->eraseFromParent();
2416 
2417     int Delta = OrigSize - NewSize + DeadSize;
2418     BBInfo[MBB->getNumber()].Size -= Delta;
2419     BBUtils->adjustBBOffsetsAfter(MBB);
2420 
2421     ++NumTBs;
2422     MadeChange = true;
2423   }
2424 
2425   return MadeChange;
2426 }
2427 
2428 /// reorderThumb2JumpTables - Adjust the function's block layout to ensure that
2429 /// jump tables always branch forwards, since that's what tbb and tbh need.
2430 bool ARMConstantIslands::reorderThumb2JumpTables() {
2431   bool MadeChange = false;
2432 
2433   MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
2434   if (!MJTI) return false;
2435 
2436   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
2437   for (MachineInstr *MI : T2JumpTables) {
2438     const MCInstrDesc &MCID = MI->getDesc();
2439     unsigned NumOps = MCID.getNumOperands();
2440     unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 2 : 1);
2441     MachineOperand JTOP = MI->getOperand(JTOpIdx);
2442     unsigned JTI = JTOP.getIndex();
2443     assert(JTI < JT.size());
2444 
2445     // We prefer if target blocks for the jump table come after the jump
2446     // instruction so we can use TB[BH]. Loop through the target blocks
2447     // and try to adjust them such that that's true.
2448     int JTNumber = MI->getParent()->getNumber();
2449     const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
2450     for (MachineBasicBlock *MBB : JTBBs) {
2451       int DTNumber = MBB->getNumber();
2452 
2453       if (DTNumber < JTNumber) {
2454         // The destination precedes the switch. Try to move the block forward
2455         // so we have a positive offset.
2456         MachineBasicBlock *NewBB =
2457             adjustJTTargetBlockForward(JTI, MBB, MI->getParent());
2458         if (NewBB)
2459           MJTI->ReplaceMBBInJumpTable(JTI, MBB, NewBB);
2460         MadeChange = true;
2461       }
2462     }
2463   }
2464 
2465   return MadeChange;
2466 }
2467 
2468 MachineBasicBlock *ARMConstantIslands::adjustJTTargetBlockForward(
2469     unsigned JTI, MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
2470   // If the destination block is terminated by an unconditional branch,
2471   // try to move it; otherwise, create a new block following the jump
2472   // table that branches back to the actual target. This is a very simple
2473   // heuristic. FIXME: We can definitely improve it.
2474   MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
2475   SmallVector<MachineOperand, 4> Cond;
2476   SmallVector<MachineOperand, 4> CondPrior;
2477   MachineFunction::iterator BBi = BB->getIterator();
2478   MachineFunction::iterator OldPrior = std::prev(BBi);
2479   MachineFunction::iterator OldNext = std::next(BBi);
2480 
2481   // If the block terminator isn't analyzable, don't try to move the block
2482   bool B = TII->analyzeBranch(*BB, TBB, FBB, Cond);
2483 
2484   // If the block ends in an unconditional branch, move it. The prior block
2485   // has to have an analyzable terminator for us to move this one. Be paranoid
2486   // and make sure we're not trying to move the entry block of the function.
2487   if (!B && Cond.empty() && BB != &MF->front() &&
2488       !TII->analyzeBranch(*OldPrior, TBB, FBB, CondPrior)) {
2489     BB->moveAfter(JTBB);
2490     OldPrior->updateTerminator(BB);
2491     BB->updateTerminator(OldNext != MF->end() ? &*OldNext : nullptr);
2492     // Update numbering to account for the block being moved.
2493     MF->RenumberBlocks();
2494     DT->updateBlockNumbers();
2495     ++NumJTMoved;
2496     return nullptr;
2497   }
2498 
2499   // Create a new MBB for the code after the jump BB.
2500   MachineBasicBlock *NewBB =
2501     MF->CreateMachineBasicBlock(JTBB->getBasicBlock());
2502   MachineFunction::iterator MBBI = ++JTBB->getIterator();
2503   MF->insert(MBBI, NewBB);
2504 
2505   // Copy live-in information to new block.
2506   for (const MachineBasicBlock::RegisterMaskPair &RegMaskPair : BB->liveins())
2507     NewBB->addLiveIn(RegMaskPair);
2508 
2509   // Add an unconditional branch from NewBB to BB.
2510   // There doesn't seem to be meaningful DebugInfo available; this doesn't
2511   // correspond directly to anything in the source.
2512   if (isThumb2)
2513     BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B))
2514         .addMBB(BB)
2515         .add(predOps(ARMCC::AL));
2516   else
2517     BuildMI(NewBB, DebugLoc(), TII->get(ARM::tB))
2518         .addMBB(BB)
2519         .add(predOps(ARMCC::AL));
2520 
2521   // Update internal data structures to account for the newly inserted MBB.
2522   MF->RenumberBlocks(NewBB);
2523   DT->updateBlockNumbers();
2524 
2525   // Update the CFG.
2526   NewBB->addSuccessor(BB);
2527   JTBB->replaceSuccessor(BB, NewBB);
2528 
2529   ++NumJTInserted;
2530   return NewBB;
2531 }
2532 
2533 /// createARMConstantIslandPass - returns an instance of the constpool
2534 /// island pass.
2535 FunctionPass *llvm::createARMConstantIslandPass() {
2536   return new ARMConstantIslands();
2537 }
2538 
2539 INITIALIZE_PASS(ARMConstantIslands, "arm-cp-islands", ARM_CP_ISLANDS_OPT_NAME,
2540                 false, false)
2541