1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "ARM.h"
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
21 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "Thumb1RegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterScavenging.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Target/TargetInstrInfo.h"
42 #include "llvm/Target/TargetMachine.h"
43 #include "llvm/Target/TargetRegisterInfo.h"
44 using namespace llvm;
45
46 #define DEBUG_TYPE "arm-ldst-opt"
47
48 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
49 STATISTIC(NumSTMGened , "Number of stm instructions generated");
50 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
51 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
52 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
53 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
54 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
55 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
56 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
57 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
58 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
59
60 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
61 /// load / store instructions to form ldm / stm instructions.
62
63 namespace {
64 struct ARMLoadStoreOpt : public MachineFunctionPass {
65 static char ID;
ARMLoadStoreOpt__anonbcaac5b80111::ARMLoadStoreOpt66 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
67
68 const TargetInstrInfo *TII;
69 const TargetRegisterInfo *TRI;
70 const ARMSubtarget *STI;
71 const TargetLowering *TL;
72 ARMFunctionInfo *AFI;
73 RegScavenger *RS;
74 bool isThumb1, isThumb2;
75
76 bool runOnMachineFunction(MachineFunction &Fn) override;
77
getPassName__anonbcaac5b80111::ARMLoadStoreOpt78 const char *getPassName() const override {
79 return "ARM load / store optimization pass";
80 }
81
82 private:
83 struct MemOpQueueEntry {
84 int Offset;
85 unsigned Reg;
86 bool isKill;
87 unsigned Position;
88 MachineBasicBlock::iterator MBBI;
89 bool Merged;
MemOpQueueEntry__anonbcaac5b80111::ARMLoadStoreOpt::MemOpQueueEntry90 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
91 MachineBasicBlock::iterator i)
92 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
93 };
94 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
95 typedef MemOpQueue::iterator MemOpQueueIter;
96
97 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
98 const MemOpQueue &MemOps, unsigned DefReg,
99 unsigned RangeBegin, unsigned RangeEnd);
100 void UpdateBaseRegUses(MachineBasicBlock &MBB,
101 MachineBasicBlock::iterator MBBI,
102 DebugLoc dl, unsigned Base, unsigned WordOffset,
103 ARMCC::CondCodes Pred, unsigned PredReg);
104 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
105 int Offset, unsigned Base, bool BaseKill, int Opcode,
106 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
107 DebugLoc dl,
108 ArrayRef<std::pair<unsigned, bool> > Regs,
109 ArrayRef<unsigned> ImpDefs);
110 void MergeOpsUpdate(MachineBasicBlock &MBB,
111 MemOpQueue &MemOps,
112 unsigned memOpsBegin,
113 unsigned memOpsEnd,
114 unsigned insertAfter,
115 int Offset,
116 unsigned Base,
117 bool BaseKill,
118 int Opcode,
119 ARMCC::CondCodes Pred,
120 unsigned PredReg,
121 unsigned Scratch,
122 DebugLoc dl,
123 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
124 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
125 int Opcode, unsigned Size,
126 ARMCC::CondCodes Pred, unsigned PredReg,
127 unsigned Scratch, MemOpQueue &MemOps,
128 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
129 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
130 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
131 MachineBasicBlock::iterator &MBBI);
132 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
133 MachineBasicBlock::iterator MBBI,
134 const TargetInstrInfo *TII,
135 bool &Advance,
136 MachineBasicBlock::iterator &I);
137 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
138 MachineBasicBlock::iterator MBBI,
139 bool &Advance,
140 MachineBasicBlock::iterator &I);
141 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
142 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
143 };
144 char ARMLoadStoreOpt::ID = 0;
145 }
146
definesCPSR(const MachineInstr * MI)147 static bool definesCPSR(const MachineInstr *MI) {
148 for (const auto &MO : MI->operands()) {
149 if (!MO.isReg())
150 continue;
151 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
152 // If the instruction has live CPSR def, then it's not safe to fold it
153 // into load / store.
154 return true;
155 }
156
157 return false;
158 }
159
getMemoryOpOffset(const MachineInstr * MI)160 static int getMemoryOpOffset(const MachineInstr *MI) {
161 int Opcode = MI->getOpcode();
162 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
163 unsigned NumOperands = MI->getDesc().getNumOperands();
164 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
165
166 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
167 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
168 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
169 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
170 return OffField;
171
172 // Thumb1 immediate offsets are scaled by 4
173 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi)
174 return OffField * 4;
175
176 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
177 : ARM_AM::getAM5Offset(OffField) * 4;
178 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
179 : ARM_AM::getAM5Op(OffField);
180
181 if (Op == ARM_AM::sub)
182 return -Offset;
183
184 return Offset;
185 }
186
getLoadStoreMultipleOpcode(int Opcode,ARM_AM::AMSubMode Mode)187 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
188 switch (Opcode) {
189 default: llvm_unreachable("Unhandled opcode!");
190 case ARM::LDRi12:
191 ++NumLDMGened;
192 switch (Mode) {
193 default: llvm_unreachable("Unhandled submode!");
194 case ARM_AM::ia: return ARM::LDMIA;
195 case ARM_AM::da: return ARM::LDMDA;
196 case ARM_AM::db: return ARM::LDMDB;
197 case ARM_AM::ib: return ARM::LDMIB;
198 }
199 case ARM::STRi12:
200 ++NumSTMGened;
201 switch (Mode) {
202 default: llvm_unreachable("Unhandled submode!");
203 case ARM_AM::ia: return ARM::STMIA;
204 case ARM_AM::da: return ARM::STMDA;
205 case ARM_AM::db: return ARM::STMDB;
206 case ARM_AM::ib: return ARM::STMIB;
207 }
208 case ARM::tLDRi:
209 // tLDMIA is writeback-only - unless the base register is in the input
210 // reglist.
211 ++NumLDMGened;
212 switch (Mode) {
213 default: llvm_unreachable("Unhandled submode!");
214 case ARM_AM::ia: return ARM::tLDMIA;
215 }
216 case ARM::tSTRi:
217 // There is no non-writeback tSTMIA either.
218 ++NumSTMGened;
219 switch (Mode) {
220 default: llvm_unreachable("Unhandled submode!");
221 case ARM_AM::ia: return ARM::tSTMIA_UPD;
222 }
223 case ARM::t2LDRi8:
224 case ARM::t2LDRi12:
225 ++NumLDMGened;
226 switch (Mode) {
227 default: llvm_unreachable("Unhandled submode!");
228 case ARM_AM::ia: return ARM::t2LDMIA;
229 case ARM_AM::db: return ARM::t2LDMDB;
230 }
231 case ARM::t2STRi8:
232 case ARM::t2STRi12:
233 ++NumSTMGened;
234 switch (Mode) {
235 default: llvm_unreachable("Unhandled submode!");
236 case ARM_AM::ia: return ARM::t2STMIA;
237 case ARM_AM::db: return ARM::t2STMDB;
238 }
239 case ARM::VLDRS:
240 ++NumVLDMGened;
241 switch (Mode) {
242 default: llvm_unreachable("Unhandled submode!");
243 case ARM_AM::ia: return ARM::VLDMSIA;
244 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
245 }
246 case ARM::VSTRS:
247 ++NumVSTMGened;
248 switch (Mode) {
249 default: llvm_unreachable("Unhandled submode!");
250 case ARM_AM::ia: return ARM::VSTMSIA;
251 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
252 }
253 case ARM::VLDRD:
254 ++NumVLDMGened;
255 switch (Mode) {
256 default: llvm_unreachable("Unhandled submode!");
257 case ARM_AM::ia: return ARM::VLDMDIA;
258 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
259 }
260 case ARM::VSTRD:
261 ++NumVSTMGened;
262 switch (Mode) {
263 default: llvm_unreachable("Unhandled submode!");
264 case ARM_AM::ia: return ARM::VSTMDIA;
265 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
266 }
267 }
268 }
269
270 namespace llvm {
271 namespace ARM_AM {
272
getLoadStoreMultipleSubMode(int Opcode)273 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
274 switch (Opcode) {
275 default: llvm_unreachable("Unhandled opcode!");
276 case ARM::LDMIA_RET:
277 case ARM::LDMIA:
278 case ARM::LDMIA_UPD:
279 case ARM::STMIA:
280 case ARM::STMIA_UPD:
281 case ARM::tLDMIA:
282 case ARM::tLDMIA_UPD:
283 case ARM::tSTMIA_UPD:
284 case ARM::t2LDMIA_RET:
285 case ARM::t2LDMIA:
286 case ARM::t2LDMIA_UPD:
287 case ARM::t2STMIA:
288 case ARM::t2STMIA_UPD:
289 case ARM::VLDMSIA:
290 case ARM::VLDMSIA_UPD:
291 case ARM::VSTMSIA:
292 case ARM::VSTMSIA_UPD:
293 case ARM::VLDMDIA:
294 case ARM::VLDMDIA_UPD:
295 case ARM::VSTMDIA:
296 case ARM::VSTMDIA_UPD:
297 return ARM_AM::ia;
298
299 case ARM::LDMDA:
300 case ARM::LDMDA_UPD:
301 case ARM::STMDA:
302 case ARM::STMDA_UPD:
303 return ARM_AM::da;
304
305 case ARM::LDMDB:
306 case ARM::LDMDB_UPD:
307 case ARM::STMDB:
308 case ARM::STMDB_UPD:
309 case ARM::t2LDMDB:
310 case ARM::t2LDMDB_UPD:
311 case ARM::t2STMDB:
312 case ARM::t2STMDB_UPD:
313 case ARM::VLDMSDB_UPD:
314 case ARM::VSTMSDB_UPD:
315 case ARM::VLDMDDB_UPD:
316 case ARM::VSTMDDB_UPD:
317 return ARM_AM::db;
318
319 case ARM::LDMIB:
320 case ARM::LDMIB_UPD:
321 case ARM::STMIB:
322 case ARM::STMIB_UPD:
323 return ARM_AM::ib;
324 }
325 }
326
327 } // end namespace ARM_AM
328 } // end namespace llvm
329
isT1i32Load(unsigned Opc)330 static bool isT1i32Load(unsigned Opc) {
331 return Opc == ARM::tLDRi;
332 }
333
isT2i32Load(unsigned Opc)334 static bool isT2i32Load(unsigned Opc) {
335 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
336 }
337
isi32Load(unsigned Opc)338 static bool isi32Load(unsigned Opc) {
339 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
340 }
341
isT1i32Store(unsigned Opc)342 static bool isT1i32Store(unsigned Opc) {
343 return Opc == ARM::tSTRi;
344 }
345
isT2i32Store(unsigned Opc)346 static bool isT2i32Store(unsigned Opc) {
347 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
348 }
349
isi32Store(unsigned Opc)350 static bool isi32Store(unsigned Opc) {
351 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
352 }
353
getImmScale(unsigned Opc)354 static unsigned getImmScale(unsigned Opc) {
355 switch (Opc) {
356 default: llvm_unreachable("Unhandled opcode!");
357 case ARM::tLDRi:
358 case ARM::tSTRi:
359 return 1;
360 case ARM::tLDRHi:
361 case ARM::tSTRHi:
362 return 2;
363 case ARM::tLDRBi:
364 case ARM::tSTRBi:
365 return 4;
366 }
367 }
368
369 /// Update future uses of the base register with the offset introduced
370 /// due to writeback. This function only works on Thumb1.
371 void
UpdateBaseRegUses(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,DebugLoc dl,unsigned Base,unsigned WordOffset,ARMCC::CondCodes Pred,unsigned PredReg)372 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
373 MachineBasicBlock::iterator MBBI,
374 DebugLoc dl, unsigned Base,
375 unsigned WordOffset,
376 ARMCC::CondCodes Pred, unsigned PredReg) {
377 assert(isThumb1 && "Can only update base register uses for Thumb1!");
378 // Start updating any instructions with immediate offsets. Insert a SUB before
379 // the first non-updateable instruction (if any).
380 for (; MBBI != MBB.end(); ++MBBI) {
381 bool InsertSub = false;
382 unsigned Opc = MBBI->getOpcode();
383
384 if (MBBI->readsRegister(Base)) {
385 int Offset;
386 bool IsLoad =
387 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
388 bool IsStore =
389 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
390
391 if (IsLoad || IsStore) {
392 // Loads and stores with immediate offsets can be updated, but only if
393 // the new offset isn't negative.
394 // The MachineOperand containing the offset immediate is the last one
395 // before predicates.
396 MachineOperand &MO =
397 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
398 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
399 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
400
401 // If storing the base register, it needs to be reset first.
402 unsigned InstrSrcReg = MBBI->getOperand(0).getReg();
403
404 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
405 MO.setImm(Offset);
406 else
407 InsertSub = true;
408
409 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
410 !definesCPSR(MBBI)) {
411 // SUBS/ADDS using this register, with a dead def of the CPSR.
412 // Merge it with the update; if the merged offset is too large,
413 // insert a new sub instead.
414 MachineOperand &MO =
415 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
416 Offset = (Opc == ARM::tSUBi8) ?
417 MO.getImm() + WordOffset * 4 :
418 MO.getImm() - WordOffset * 4 ;
419 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
420 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
421 // Offset == 0.
422 MO.setImm(Offset);
423 // The base register has now been reset, so exit early.
424 return;
425 } else {
426 InsertSub = true;
427 }
428
429 } else {
430 // Can't update the instruction.
431 InsertSub = true;
432 }
433
434 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
435 // Since SUBS sets the condition flags, we can't place the base reset
436 // after an instruction that has a live CPSR def.
437 // The base register might also contain an argument for a function call.
438 InsertSub = true;
439 }
440
441 if (InsertSub) {
442 // An instruction above couldn't be updated, so insert a sub.
443 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
444 .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4)
445 .addImm(Pred).addReg(PredReg);
446 return;
447 }
448
449 if (MBBI->killsRegister(Base))
450 // Register got killed. Stop updating.
451 return;
452 }
453
454 // End of block was reached.
455 if (MBB.succ_size() > 0) {
456 // FIXME: Because of a bug, live registers are sometimes missing from
457 // the successor blocks' live-in sets. This means we can't trust that
458 // information and *always* have to reset at the end of a block.
459 // See PR21029.
460 if (MBBI != MBB.end()) --MBBI;
461 AddDefaultT1CC(
462 BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
463 .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4)
464 .addImm(Pred).addReg(PredReg);
465 }
466 }
467
468 /// MergeOps - Create and insert a LDM or STM with Base as base register and
469 /// registers in Regs as the register operands that would be loaded / stored.
470 /// It returns true if the transformation is done.
471 bool
MergeOps(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,int Offset,unsigned Base,bool BaseKill,int Opcode,ARMCC::CondCodes Pred,unsigned PredReg,unsigned Scratch,DebugLoc dl,ArrayRef<std::pair<unsigned,bool>> Regs,ArrayRef<unsigned> ImpDefs)472 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
473 MachineBasicBlock::iterator MBBI,
474 int Offset, unsigned Base, bool BaseKill,
475 int Opcode, ARMCC::CondCodes Pred,
476 unsigned PredReg, unsigned Scratch, DebugLoc dl,
477 ArrayRef<std::pair<unsigned, bool> > Regs,
478 ArrayRef<unsigned> ImpDefs) {
479 // Only a single register to load / store. Don't bother.
480 unsigned NumRegs = Regs.size();
481 if (NumRegs <= 1)
482 return false;
483
484 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
485 // Compute liveness information for that register to make the decision.
486 bool SafeToClobberCPSR = !isThumb1 ||
487 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) ==
488 MachineBasicBlock::LQR_Dead);
489
490 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
491
492 // Exception: If the base register is in the input reglist, Thumb1 LDM is
493 // non-writeback.
494 // It's also not possible to merge an STR of the base register in Thumb1.
495 if (isThumb1)
496 for (unsigned I = 0; I < NumRegs; ++I)
497 if (Base == Regs[I].first) {
498 if (Opcode == ARM::tLDRi) {
499 Writeback = false;
500 break;
501 } else if (Opcode == ARM::tSTRi) {
502 return false;
503 }
504 }
505
506 ARM_AM::AMSubMode Mode = ARM_AM::ia;
507 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
508 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
509 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
510
511 if (Offset == 4 && haveIBAndDA) {
512 Mode = ARM_AM::ib;
513 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
514 Mode = ARM_AM::da;
515 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
516 // VLDM/VSTM do not support DB mode without also updating the base reg.
517 Mode = ARM_AM::db;
518 } else if (Offset != 0) {
519 // Check if this is a supported opcode before inserting instructions to
520 // calculate a new base register.
521 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
522
523 // If starting offset isn't zero, insert a MI to materialize a new base.
524 // But only do so if it is cost effective, i.e. merging more than two
525 // loads / stores.
526 if (NumRegs <= 2)
527 return false;
528
529 // On Thumb1, it's not worth materializing a new base register without
530 // clobbering the CPSR (i.e. not using ADDS/SUBS).
531 if (!SafeToClobberCPSR)
532 return false;
533
534 unsigned NewBase;
535 if (isi32Load(Opcode)) {
536 // If it is a load, then just use one of the destination register to
537 // use as the new base.
538 NewBase = Regs[NumRegs-1].first;
539 } else {
540 // Use the scratch register to use as a new base.
541 NewBase = Scratch;
542 if (NewBase == 0)
543 return false;
544 }
545
546 int BaseOpc =
547 isThumb2 ? ARM::t2ADDri :
548 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
549 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
550
551 if (Offset < 0) {
552 Offset = - Offset;
553 BaseOpc =
554 isThumb2 ? ARM::t2SUBri :
555 (isThumb1 && Offset < 8) ? ARM::tSUBi3 :
556 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
557 }
558
559 if (!TL->isLegalAddImmediate(Offset))
560 // FIXME: Try add with register operand?
561 return false; // Probably not worth it then.
562
563 if (isThumb1) {
564 // Thumb1: depending on immediate size, use either
565 // ADDS NewBase, Base, #imm3
566 // or
567 // MOV NewBase, Base
568 // ADDS NewBase, #imm8.
569 if (Base != NewBase && Offset >= 8) {
570 const ARMSubtarget &Subtarget = MBB.getParent()->getTarget()
571 .getSubtarget<ARMSubtarget>();
572 // Need to insert a MOV to the new base first.
573 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
574 !Subtarget.hasV6Ops()) {
575 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
576 if (Pred != ARMCC::AL)
577 return false;
578 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase)
579 .addReg(Base, getKillRegState(BaseKill));
580 } else
581 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
582 .addReg(Base, getKillRegState(BaseKill))
583 .addImm(Pred).addReg(PredReg);
584
585 // Set up BaseKill and Base correctly to insert the ADDS/SUBS below.
586 Base = NewBase;
587 BaseKill = false;
588 }
589 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
590 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
591 .addImm(Pred).addReg(PredReg);
592 } else {
593 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
594 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
595 .addImm(Pred).addReg(PredReg).addReg(0);
596 }
597 Base = NewBase;
598 BaseKill = true; // New base is always killed straight away.
599 }
600
601 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
602 Opcode == ARM::VLDRD);
603
604 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
605 // base register writeback.
606 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
607 if (!Opcode) return false;
608
609 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
610 // - There is no writeback (LDM of base register),
611 // - the base register is killed by the merged instruction,
612 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
613 // to reset the base register.
614 // Otherwise, don't merge.
615 // It's safe to return here since the code to materialize a new base register
616 // above is also conditional on SafeToClobberCPSR.
617 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
618 return false;
619
620 MachineInstrBuilder MIB;
621
622 if (Writeback) {
623 if (Opcode == ARM::tLDMIA)
624 // Update tLDMIA with writeback if necessary.
625 Opcode = ARM::tLDMIA_UPD;
626
627 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
628
629 // Thumb1: we might need to set base writeback when building the MI.
630 MIB.addReg(Base, getDefRegState(true))
631 .addReg(Base, getKillRegState(BaseKill));
632
633 // The base isn't dead after a merged instruction with writeback.
634 // Insert a sub instruction after the newly formed instruction to reset.
635 if (!BaseKill)
636 UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
637
638 } else {
639 // No writeback, simply build the MachineInstr.
640 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
641 MIB.addReg(Base, getKillRegState(BaseKill));
642 }
643
644 MIB.addImm(Pred).addReg(PredReg);
645
646 for (unsigned i = 0; i != NumRegs; ++i)
647 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
648 | getKillRegState(Regs[i].second));
649
650 // Add implicit defs for super-registers.
651 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
652 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
653
654 return true;
655 }
656
657 /// \brief Find all instructions using a given imp-def within a range.
658 ///
659 /// We are trying to combine a range of instructions, one of which (located at
660 /// position RangeBegin) implicitly defines a register. The final LDM/STM will
661 /// be placed at RangeEnd, and so any uses of this definition between RangeStart
662 /// and RangeEnd must be modified to use an undefined value.
663 ///
664 /// The live range continues until we find a second definition or one of the
665 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
666 /// we must consider all uses and decide which are relevant in a second pass.
findUsesOfImpDef(SmallVectorImpl<MachineOperand * > & UsesOfImpDefs,const MemOpQueue & MemOps,unsigned DefReg,unsigned RangeBegin,unsigned RangeEnd)667 void ARMLoadStoreOpt::findUsesOfImpDef(
668 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
669 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
670 std::map<unsigned, MachineOperand *> Uses;
671 unsigned LastLivePos = RangeEnd;
672
673 // First we find all uses of this register with Position between RangeBegin
674 // and RangeEnd, any or all of these could be uses of a definition at
675 // RangeBegin. We also record the latest position a definition at RangeBegin
676 // would be considered live.
677 for (unsigned i = 0; i < MemOps.size(); ++i) {
678 MachineInstr &MI = *MemOps[i].MBBI;
679 unsigned MIPosition = MemOps[i].Position;
680 if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
681 continue;
682
683 // If this instruction defines the register, then any later use will be of
684 // that definition rather than ours.
685 if (MI.definesRegister(DefReg))
686 LastLivePos = std::min(LastLivePos, MIPosition);
687
688 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
689 if (!UseOp)
690 continue;
691
692 // If this instruction kills the register then (assuming liveness is
693 // correct when we start) we don't need to think about anything after here.
694 if (UseOp->isKill())
695 LastLivePos = std::min(LastLivePos, MIPosition);
696
697 Uses[MIPosition] = UseOp;
698 }
699
700 // Now we traverse the list of all uses, and append the ones that actually use
701 // our definition to the requested list.
702 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
703 E = Uses.end();
704 I != E; ++I) {
705 // List is sorted by position so once we've found one out of range there
706 // will be no more to consider.
707 if (I->first > LastLivePos)
708 break;
709 UsesOfImpDefs.push_back(I->second);
710 }
711 }
712
713 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
714 // success.
MergeOpsUpdate(MachineBasicBlock & MBB,MemOpQueue & memOps,unsigned memOpsBegin,unsigned memOpsEnd,unsigned insertAfter,int Offset,unsigned Base,bool BaseKill,int Opcode,ARMCC::CondCodes Pred,unsigned PredReg,unsigned Scratch,DebugLoc dl,SmallVectorImpl<MachineBasicBlock::iterator> & Merges)715 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
716 MemOpQueue &memOps,
717 unsigned memOpsBegin, unsigned memOpsEnd,
718 unsigned insertAfter, int Offset,
719 unsigned Base, bool BaseKill,
720 int Opcode,
721 ARMCC::CondCodes Pred, unsigned PredReg,
722 unsigned Scratch,
723 DebugLoc dl,
724 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
725 // First calculate which of the registers should be killed by the merged
726 // instruction.
727 const unsigned insertPos = memOps[insertAfter].Position;
728 SmallSet<unsigned, 4> KilledRegs;
729 DenseMap<unsigned, unsigned> Killer;
730 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
731 if (i == memOpsBegin) {
732 i = memOpsEnd;
733 if (i == e)
734 break;
735 }
736 if (memOps[i].Position < insertPos && memOps[i].isKill) {
737 unsigned Reg = memOps[i].Reg;
738 KilledRegs.insert(Reg);
739 Killer[Reg] = i;
740 }
741 }
742
743 SmallVector<std::pair<unsigned, bool>, 8> Regs;
744 SmallVector<unsigned, 8> ImpDefs;
745 SmallVector<MachineOperand *, 8> UsesOfImpDefs;
746 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
747 unsigned Reg = memOps[i].Reg;
748 // If we are inserting the merged operation after an operation that
749 // uses the same register, make sure to transfer any kill flag.
750 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
751 Regs.push_back(std::make_pair(Reg, isKill));
752
753 // Collect any implicit defs of super-registers. They must be preserved.
754 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
755 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
756 continue;
757 unsigned DefReg = MO->getReg();
758 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
759 ImpDefs.push_back(DefReg);
760
761 // There may be other uses of the definition between this instruction and
762 // the eventual LDM/STM position. These should be marked undef if the
763 // merge takes place.
764 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
765 insertPos);
766 }
767 }
768
769 // Try to do the merge.
770 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
771 ++Loc;
772 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
773 Pred, PredReg, Scratch, dl, Regs, ImpDefs))
774 return;
775
776 // Merge succeeded, update records.
777 Merges.push_back(std::prev(Loc));
778
779 // In gathering loads together, we may have moved the imp-def of a register
780 // past one of its uses. This is OK, since we know better than the rest of
781 // LLVM what's OK with ARM loads and stores; but we still have to adjust the
782 // affected uses.
783 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
784 E = UsesOfImpDefs.end();
785 I != E; ++I)
786 (*I)->setIsUndef();
787
788 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
789 // Remove kill flags from any memops that come before insertPos.
790 if (Regs[i-memOpsBegin].second) {
791 unsigned Reg = Regs[i-memOpsBegin].first;
792 if (KilledRegs.count(Reg)) {
793 unsigned j = Killer[Reg];
794 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
795 assert(Idx >= 0 && "Cannot find killing operand");
796 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
797 memOps[j].isKill = false;
798 }
799 memOps[i].isKill = true;
800 }
801 MBB.erase(memOps[i].MBBI);
802 // Update this memop to refer to the merged instruction.
803 // We may need to move kill flags again.
804 memOps[i].Merged = true;
805 memOps[i].MBBI = Merges.back();
806 memOps[i].Position = insertPos;
807 }
808
809 // Update memOps offsets, since they may have been modified by MergeOps.
810 for (auto &MemOp : memOps) {
811 MemOp.Offset = getMemoryOpOffset(MemOp.MBBI);
812 }
813 }
814
815 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
816 /// load / store multiple instructions.
817 void
MergeLDR_STR(MachineBasicBlock & MBB,unsigned SIndex,unsigned Base,int Opcode,unsigned Size,ARMCC::CondCodes Pred,unsigned PredReg,unsigned Scratch,MemOpQueue & MemOps,SmallVectorImpl<MachineBasicBlock::iterator> & Merges)818 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
819 unsigned Base, int Opcode, unsigned Size,
820 ARMCC::CondCodes Pred, unsigned PredReg,
821 unsigned Scratch, MemOpQueue &MemOps,
822 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
823 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
824 int Offset = MemOps[SIndex].Offset;
825 int SOffset = Offset;
826 unsigned insertAfter = SIndex;
827 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
828 DebugLoc dl = Loc->getDebugLoc();
829 const MachineOperand &PMO = Loc->getOperand(0);
830 unsigned PReg = PMO.getReg();
831 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
832 unsigned Count = 1;
833 unsigned Limit = ~0U;
834 bool BaseKill = false;
835 // vldm / vstm limit are 32 for S variants, 16 for D variants.
836
837 switch (Opcode) {
838 default: break;
839 case ARM::VSTRS:
840 Limit = 32;
841 break;
842 case ARM::VSTRD:
843 Limit = 16;
844 break;
845 case ARM::VLDRD:
846 Limit = 16;
847 break;
848 case ARM::VLDRS:
849 Limit = 32;
850 break;
851 }
852
853 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
854 int NewOffset = MemOps[i].Offset;
855 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
856 unsigned Reg = MO.getReg();
857 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
858 // Register numbers must be in ascending order. For VFP / NEON load and
859 // store multiples, the registers must also be consecutive and within the
860 // limit on the number of registers per instruction.
861 if (Reg != ARM::SP &&
862 NewOffset == Offset + (int)Size &&
863 ((isNotVFP && RegNum > PRegNum) ||
864 ((Count < Limit) && RegNum == PRegNum+1)) &&
865 // On Swift we don't want vldm/vstm to start with a odd register num
866 // because Q register unaligned vldm/vstm need more uops.
867 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
868 Offset += Size;
869 PRegNum = RegNum;
870 ++Count;
871 } else {
872 // Can't merge this in. Try merge the earlier ones first.
873 // We need to compute BaseKill here because the MemOps may have been
874 // reordered.
875 BaseKill = Loc->killsRegister(Base);
876
877 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base,
878 BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
879 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
880 MemOps, Merges);
881 return;
882 }
883
884 if (MemOps[i].Position > MemOps[insertAfter].Position) {
885 insertAfter = i;
886 Loc = MemOps[i].MBBI;
887 }
888 }
889
890 BaseKill = Loc->killsRegister(Base);
891 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
892 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
893 }
894
isMatchingDecrement(MachineInstr * MI,unsigned Base,unsigned Bytes,unsigned Limit,ARMCC::CondCodes Pred,unsigned PredReg)895 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
896 unsigned Bytes, unsigned Limit,
897 ARMCC::CondCodes Pred, unsigned PredReg) {
898 unsigned MyPredReg = 0;
899 if (!MI)
900 return false;
901
902 bool CheckCPSRDef = false;
903 switch (MI->getOpcode()) {
904 default: return false;
905 case ARM::tSUBi8:
906 case ARM::t2SUBri:
907 case ARM::SUBri:
908 CheckCPSRDef = true;
909 // fallthrough
910 case ARM::tSUBspi:
911 break;
912 }
913
914 // Make sure the offset fits in 8 bits.
915 if (Bytes == 0 || (Limit && Bytes >= Limit))
916 return false;
917
918 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
919 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
920 if (!(MI->getOperand(0).getReg() == Base &&
921 MI->getOperand(1).getReg() == Base &&
922 (MI->getOperand(2).getImm() * Scale) == Bytes &&
923 getInstrPredicate(MI, MyPredReg) == Pred &&
924 MyPredReg == PredReg))
925 return false;
926
927 return CheckCPSRDef ? !definesCPSR(MI) : true;
928 }
929
isMatchingIncrement(MachineInstr * MI,unsigned Base,unsigned Bytes,unsigned Limit,ARMCC::CondCodes Pred,unsigned PredReg)930 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
931 unsigned Bytes, unsigned Limit,
932 ARMCC::CondCodes Pred, unsigned PredReg) {
933 unsigned MyPredReg = 0;
934 if (!MI)
935 return false;
936
937 bool CheckCPSRDef = false;
938 switch (MI->getOpcode()) {
939 default: return false;
940 case ARM::tADDi8:
941 case ARM::t2ADDri:
942 case ARM::ADDri:
943 CheckCPSRDef = true;
944 // fallthrough
945 case ARM::tADDspi:
946 break;
947 }
948
949 if (Bytes == 0 || (Limit && Bytes >= Limit))
950 // Make sure the offset fits in 8 bits.
951 return false;
952
953 unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
954 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
955 if (!(MI->getOperand(0).getReg() == Base &&
956 MI->getOperand(1).getReg() == Base &&
957 (MI->getOperand(2).getImm() * Scale) == Bytes &&
958 getInstrPredicate(MI, MyPredReg) == Pred &&
959 MyPredReg == PredReg))
960 return false;
961
962 return CheckCPSRDef ? !definesCPSR(MI) : true;
963 }
964
getLSMultipleTransferSize(MachineInstr * MI)965 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
966 switch (MI->getOpcode()) {
967 default: return 0;
968 case ARM::LDRi12:
969 case ARM::STRi12:
970 case ARM::tLDRi:
971 case ARM::tSTRi:
972 case ARM::t2LDRi8:
973 case ARM::t2LDRi12:
974 case ARM::t2STRi8:
975 case ARM::t2STRi12:
976 case ARM::VLDRS:
977 case ARM::VSTRS:
978 return 4;
979 case ARM::VLDRD:
980 case ARM::VSTRD:
981 return 8;
982 case ARM::LDMIA:
983 case ARM::LDMDA:
984 case ARM::LDMDB:
985 case ARM::LDMIB:
986 case ARM::STMIA:
987 case ARM::STMDA:
988 case ARM::STMDB:
989 case ARM::STMIB:
990 case ARM::tLDMIA:
991 case ARM::tLDMIA_UPD:
992 case ARM::tSTMIA_UPD:
993 case ARM::t2LDMIA:
994 case ARM::t2LDMDB:
995 case ARM::t2STMIA:
996 case ARM::t2STMDB:
997 case ARM::VLDMSIA:
998 case ARM::VSTMSIA:
999 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
1000 case ARM::VLDMDIA:
1001 case ARM::VSTMDIA:
1002 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
1003 }
1004 }
1005
getUpdatingLSMultipleOpcode(unsigned Opc,ARM_AM::AMSubMode Mode)1006 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1007 ARM_AM::AMSubMode Mode) {
1008 switch (Opc) {
1009 default: llvm_unreachable("Unhandled opcode!");
1010 case ARM::LDMIA:
1011 case ARM::LDMDA:
1012 case ARM::LDMDB:
1013 case ARM::LDMIB:
1014 switch (Mode) {
1015 default: llvm_unreachable("Unhandled submode!");
1016 case ARM_AM::ia: return ARM::LDMIA_UPD;
1017 case ARM_AM::ib: return ARM::LDMIB_UPD;
1018 case ARM_AM::da: return ARM::LDMDA_UPD;
1019 case ARM_AM::db: return ARM::LDMDB_UPD;
1020 }
1021 case ARM::STMIA:
1022 case ARM::STMDA:
1023 case ARM::STMDB:
1024 case ARM::STMIB:
1025 switch (Mode) {
1026 default: llvm_unreachable("Unhandled submode!");
1027 case ARM_AM::ia: return ARM::STMIA_UPD;
1028 case ARM_AM::ib: return ARM::STMIB_UPD;
1029 case ARM_AM::da: return ARM::STMDA_UPD;
1030 case ARM_AM::db: return ARM::STMDB_UPD;
1031 }
1032 case ARM::t2LDMIA:
1033 case ARM::t2LDMDB:
1034 switch (Mode) {
1035 default: llvm_unreachable("Unhandled submode!");
1036 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1037 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1038 }
1039 case ARM::t2STMIA:
1040 case ARM::t2STMDB:
1041 switch (Mode) {
1042 default: llvm_unreachable("Unhandled submode!");
1043 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1044 case ARM_AM::db: return ARM::t2STMDB_UPD;
1045 }
1046 case ARM::VLDMSIA:
1047 switch (Mode) {
1048 default: llvm_unreachable("Unhandled submode!");
1049 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1050 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1051 }
1052 case ARM::VLDMDIA:
1053 switch (Mode) {
1054 default: llvm_unreachable("Unhandled submode!");
1055 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1056 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1057 }
1058 case ARM::VSTMSIA:
1059 switch (Mode) {
1060 default: llvm_unreachable("Unhandled submode!");
1061 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1062 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1063 }
1064 case ARM::VSTMDIA:
1065 switch (Mode) {
1066 default: llvm_unreachable("Unhandled submode!");
1067 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1068 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1069 }
1070 }
1071 }
1072
1073 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
1074 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1075 ///
1076 /// stmia rn, <ra, rb, rc>
1077 /// rn := rn + 4 * 3;
1078 /// =>
1079 /// stmia rn!, <ra, rb, rc>
1080 ///
1081 /// rn := rn - 4 * 3;
1082 /// ldmia rn, <ra, rb, rc>
1083 /// =>
1084 /// ldmdb rn!, <ra, rb, rc>
MergeBaseUpdateLSMultiple(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,bool & Advance,MachineBasicBlock::iterator & I)1085 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
1086 MachineBasicBlock::iterator MBBI,
1087 bool &Advance,
1088 MachineBasicBlock::iterator &I) {
1089 // Thumb1 is already using updating loads/stores.
1090 if (isThumb1) return false;
1091
1092 MachineInstr *MI = MBBI;
1093 unsigned Base = MI->getOperand(0).getReg();
1094 bool BaseKill = MI->getOperand(0).isKill();
1095 unsigned Bytes = getLSMultipleTransferSize(MI);
1096 unsigned PredReg = 0;
1097 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1098 int Opcode = MI->getOpcode();
1099 DebugLoc dl = MI->getDebugLoc();
1100
1101 // Can't use an updating ld/st if the base register is also a dest
1102 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1103 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1104 if (MI->getOperand(i).getReg() == Base)
1105 return false;
1106
1107 bool DoMerge = false;
1108 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
1109
1110 // Try merging with the previous instruction.
1111 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1112 if (MBBI != BeginMBBI) {
1113 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1114 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1115 --PrevMBBI;
1116 if (Mode == ARM_AM::ia &&
1117 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1118 Mode = ARM_AM::db;
1119 DoMerge = true;
1120 } else if (Mode == ARM_AM::ib &&
1121 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1122 Mode = ARM_AM::da;
1123 DoMerge = true;
1124 }
1125 if (DoMerge)
1126 MBB.erase(PrevMBBI);
1127 }
1128
1129 // Try merging with the next instruction.
1130 MachineBasicBlock::iterator EndMBBI = MBB.end();
1131 if (!DoMerge && MBBI != EndMBBI) {
1132 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1133 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1134 ++NextMBBI;
1135 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
1136 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1137 DoMerge = true;
1138 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
1139 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1140 DoMerge = true;
1141 }
1142 if (DoMerge) {
1143 if (NextMBBI == I) {
1144 Advance = true;
1145 ++I;
1146 }
1147 MBB.erase(NextMBBI);
1148 }
1149 }
1150
1151 if (!DoMerge)
1152 return false;
1153
1154 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1155 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1156 .addReg(Base, getDefRegState(true)) // WB base register
1157 .addReg(Base, getKillRegState(BaseKill))
1158 .addImm(Pred).addReg(PredReg);
1159
1160 // Transfer the rest of operands.
1161 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1162 MIB.addOperand(MI->getOperand(OpNum));
1163
1164 // Transfer memoperands.
1165 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1166
1167 MBB.erase(MBBI);
1168 return true;
1169 }
1170
getPreIndexedLoadStoreOpcode(unsigned Opc,ARM_AM::AddrOpc Mode)1171 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1172 ARM_AM::AddrOpc Mode) {
1173 switch (Opc) {
1174 case ARM::LDRi12:
1175 return ARM::LDR_PRE_IMM;
1176 case ARM::STRi12:
1177 return ARM::STR_PRE_IMM;
1178 case ARM::VLDRS:
1179 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1180 case ARM::VLDRD:
1181 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1182 case ARM::VSTRS:
1183 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1184 case ARM::VSTRD:
1185 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1186 case ARM::t2LDRi8:
1187 case ARM::t2LDRi12:
1188 return ARM::t2LDR_PRE;
1189 case ARM::t2STRi8:
1190 case ARM::t2STRi12:
1191 return ARM::t2STR_PRE;
1192 default: llvm_unreachable("Unhandled opcode!");
1193 }
1194 }
1195
getPostIndexedLoadStoreOpcode(unsigned Opc,ARM_AM::AddrOpc Mode)1196 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1197 ARM_AM::AddrOpc Mode) {
1198 switch (Opc) {
1199 case ARM::LDRi12:
1200 return ARM::LDR_POST_IMM;
1201 case ARM::STRi12:
1202 return ARM::STR_POST_IMM;
1203 case ARM::VLDRS:
1204 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1205 case ARM::VLDRD:
1206 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1207 case ARM::VSTRS:
1208 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1209 case ARM::VSTRD:
1210 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1211 case ARM::t2LDRi8:
1212 case ARM::t2LDRi12:
1213 return ARM::t2LDR_POST;
1214 case ARM::t2STRi8:
1215 case ARM::t2STRi12:
1216 return ARM::t2STR_POST;
1217 default: llvm_unreachable("Unhandled opcode!");
1218 }
1219 }
1220
1221 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
1222 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
MergeBaseUpdateLoadStore(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const TargetInstrInfo * TII,bool & Advance,MachineBasicBlock::iterator & I)1223 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
1224 MachineBasicBlock::iterator MBBI,
1225 const TargetInstrInfo *TII,
1226 bool &Advance,
1227 MachineBasicBlock::iterator &I) {
1228 // Thumb1 doesn't have updating LDR/STR.
1229 // FIXME: Use LDM/STM with single register instead.
1230 if (isThumb1) return false;
1231
1232 MachineInstr *MI = MBBI;
1233 unsigned Base = MI->getOperand(1).getReg();
1234 bool BaseKill = MI->getOperand(1).isKill();
1235 unsigned Bytes = getLSMultipleTransferSize(MI);
1236 int Opcode = MI->getOpcode();
1237 DebugLoc dl = MI->getDebugLoc();
1238 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1239 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1240 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1241 if (isi32Load(Opcode) || isi32Store(Opcode))
1242 if (MI->getOperand(2).getImm() != 0)
1243 return false;
1244 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1245 return false;
1246
1247 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
1248 // Can't do the merge if the destination register is the same as the would-be
1249 // writeback register.
1250 if (MI->getOperand(0).getReg() == Base)
1251 return false;
1252
1253 unsigned PredReg = 0;
1254 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1255 bool DoMerge = false;
1256 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1257 unsigned NewOpc = 0;
1258 // AM2 - 12 bits, thumb2 - 8 bits.
1259 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
1260
1261 // Try merging with the previous instruction.
1262 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1263 if (MBBI != BeginMBBI) {
1264 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1265 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1266 --PrevMBBI;
1267 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1268 DoMerge = true;
1269 AddSub = ARM_AM::sub;
1270 } else if (!isAM5 &&
1271 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1272 DoMerge = true;
1273 }
1274 if (DoMerge) {
1275 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
1276 MBB.erase(PrevMBBI);
1277 }
1278 }
1279
1280 // Try merging with the next instruction.
1281 MachineBasicBlock::iterator EndMBBI = MBB.end();
1282 if (!DoMerge && MBBI != EndMBBI) {
1283 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1284 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1285 ++NextMBBI;
1286 if (!isAM5 &&
1287 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1288 DoMerge = true;
1289 AddSub = ARM_AM::sub;
1290 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1291 DoMerge = true;
1292 }
1293 if (DoMerge) {
1294 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
1295 if (NextMBBI == I) {
1296 Advance = true;
1297 ++I;
1298 }
1299 MBB.erase(NextMBBI);
1300 }
1301 }
1302
1303 if (!DoMerge)
1304 return false;
1305
1306 if (isAM5) {
1307 // VLDM[SD]_UPD, VSTM[SD]_UPD
1308 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1309 // updating load/store-multiple instructions can be used with only one
1310 // register.)
1311 MachineOperand &MO = MI->getOperand(0);
1312 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1313 .addReg(Base, getDefRegState(true)) // WB base register
1314 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1315 .addImm(Pred).addReg(PredReg)
1316 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1317 getKillRegState(MO.isKill())));
1318 } else if (isLd) {
1319 if (isAM2) {
1320 // LDR_PRE, LDR_POST
1321 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1322 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1323 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1324 .addReg(Base, RegState::Define)
1325 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1326 } else {
1327 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1328 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1329 .addReg(Base, RegState::Define)
1330 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1331 }
1332 } else {
1333 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1334 // t2LDR_PRE, t2LDR_POST
1335 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1336 .addReg(Base, RegState::Define)
1337 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1338 }
1339 } else {
1340 MachineOperand &MO = MI->getOperand(0);
1341 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1342 // the vestigal zero-reg offset register. When that's fixed, this clause
1343 // can be removed entirely.
1344 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1345 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1346 // STR_PRE, STR_POST
1347 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1348 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1349 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1350 } else {
1351 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1352 // t2STR_PRE, t2STR_POST
1353 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1354 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1355 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1356 }
1357 }
1358 MBB.erase(MBBI);
1359
1360 return true;
1361 }
1362
1363 /// isMemoryOp - Returns true if instruction is a memory operation that this
1364 /// pass is capable of operating on.
isMemoryOp(const MachineInstr * MI)1365 static bool isMemoryOp(const MachineInstr *MI) {
1366 // When no memory operands are present, conservatively assume unaligned,
1367 // volatile, unfoldable.
1368 if (!MI->hasOneMemOperand())
1369 return false;
1370
1371 const MachineMemOperand *MMO = *MI->memoperands_begin();
1372
1373 // Don't touch volatile memory accesses - we may be changing their order.
1374 if (MMO->isVolatile())
1375 return false;
1376
1377 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1378 // not.
1379 if (MMO->getAlignment() < 4)
1380 return false;
1381
1382 // str <undef> could probably be eliminated entirely, but for now we just want
1383 // to avoid making a mess of it.
1384 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1385 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1386 MI->getOperand(0).isUndef())
1387 return false;
1388
1389 // Likewise don't mess with references to undefined addresses.
1390 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1391 MI->getOperand(1).isUndef())
1392 return false;
1393
1394 int Opcode = MI->getOpcode();
1395 switch (Opcode) {
1396 default: break;
1397 case ARM::VLDRS:
1398 case ARM::VSTRS:
1399 return MI->getOperand(1).isReg();
1400 case ARM::VLDRD:
1401 case ARM::VSTRD:
1402 return MI->getOperand(1).isReg();
1403 case ARM::LDRi12:
1404 case ARM::STRi12:
1405 case ARM::tLDRi:
1406 case ARM::tSTRi:
1407 case ARM::t2LDRi8:
1408 case ARM::t2LDRi12:
1409 case ARM::t2STRi8:
1410 case ARM::t2STRi12:
1411 return MI->getOperand(1).isReg();
1412 }
1413 return false;
1414 }
1415
1416 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1417 /// op that is being merged.
AdvanceRS(MachineBasicBlock & MBB,MemOpQueue & MemOps)1418 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1419 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1420 unsigned Position = MemOps[0].Position;
1421 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1422 if (MemOps[i].Position < Position) {
1423 Position = MemOps[i].Position;
1424 Loc = MemOps[i].MBBI;
1425 }
1426 }
1427
1428 if (Loc != MBB.begin())
1429 RS->forward(std::prev(Loc));
1430 }
1431
InsertLDR_STR(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI,int Offset,bool isDef,DebugLoc dl,unsigned NewOpc,unsigned Reg,bool RegDeadKill,bool RegUndef,unsigned BaseReg,bool BaseKill,bool BaseUndef,bool OffKill,bool OffUndef,ARMCC::CondCodes Pred,unsigned PredReg,const TargetInstrInfo * TII,bool isT2)1432 static void InsertLDR_STR(MachineBasicBlock &MBB,
1433 MachineBasicBlock::iterator &MBBI,
1434 int Offset, bool isDef,
1435 DebugLoc dl, unsigned NewOpc,
1436 unsigned Reg, bool RegDeadKill, bool RegUndef,
1437 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1438 bool OffKill, bool OffUndef,
1439 ARMCC::CondCodes Pred, unsigned PredReg,
1440 const TargetInstrInfo *TII, bool isT2) {
1441 if (isDef) {
1442 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1443 TII->get(NewOpc))
1444 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1445 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1446 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1447 } else {
1448 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1449 TII->get(NewOpc))
1450 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1451 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1452 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1453 }
1454 }
1455
FixInvalidRegPairOp(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI)1456 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1457 MachineBasicBlock::iterator &MBBI) {
1458 MachineInstr *MI = &*MBBI;
1459 unsigned Opcode = MI->getOpcode();
1460 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1461 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1462 const MachineOperand &BaseOp = MI->getOperand(2);
1463 unsigned BaseReg = BaseOp.getReg();
1464 unsigned EvenReg = MI->getOperand(0).getReg();
1465 unsigned OddReg = MI->getOperand(1).getReg();
1466 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1467 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1468 // ARM errata 602117: LDRD with base in list may result in incorrect base
1469 // register when interrupted or faulted.
1470 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1471 if (!Errata602117 &&
1472 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1473 return false;
1474
1475 MachineBasicBlock::iterator NewBBI = MBBI;
1476 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1477 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1478 bool EvenDeadKill = isLd ?
1479 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1480 bool EvenUndef = MI->getOperand(0).isUndef();
1481 bool OddDeadKill = isLd ?
1482 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1483 bool OddUndef = MI->getOperand(1).isUndef();
1484 bool BaseKill = BaseOp.isKill();
1485 bool BaseUndef = BaseOp.isUndef();
1486 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1487 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1488 int OffImm = getMemoryOpOffset(MI);
1489 unsigned PredReg = 0;
1490 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1491
1492 if (OddRegNum > EvenRegNum && OffImm == 0) {
1493 // Ascending register numbers and no offset. It's safe to change it to a
1494 // ldm or stm.
1495 unsigned NewOpc = (isLd)
1496 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1497 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1498 if (isLd) {
1499 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1500 .addReg(BaseReg, getKillRegState(BaseKill))
1501 .addImm(Pred).addReg(PredReg)
1502 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1503 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1504 ++NumLDRD2LDM;
1505 } else {
1506 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1507 .addReg(BaseReg, getKillRegState(BaseKill))
1508 .addImm(Pred).addReg(PredReg)
1509 .addReg(EvenReg,
1510 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1511 .addReg(OddReg,
1512 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1513 ++NumSTRD2STM;
1514 }
1515 NewBBI = std::prev(MBBI);
1516 } else {
1517 // Split into two instructions.
1518 unsigned NewOpc = (isLd)
1519 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1520 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1521 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1522 // so adjust and use t2LDRi12 here for that.
1523 unsigned NewOpc2 = (isLd)
1524 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1525 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1526 DebugLoc dl = MBBI->getDebugLoc();
1527 // If this is a load and base register is killed, it may have been
1528 // re-defed by the load, make sure the first load does not clobber it.
1529 if (isLd &&
1530 (BaseKill || OffKill) &&
1531 (TRI->regsOverlap(EvenReg, BaseReg))) {
1532 assert(!TRI->regsOverlap(OddReg, BaseReg));
1533 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1534 OddReg, OddDeadKill, false,
1535 BaseReg, false, BaseUndef, false, OffUndef,
1536 Pred, PredReg, TII, isT2);
1537 NewBBI = std::prev(MBBI);
1538 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1539 EvenReg, EvenDeadKill, false,
1540 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1541 Pred, PredReg, TII, isT2);
1542 } else {
1543 if (OddReg == EvenReg && EvenDeadKill) {
1544 // If the two source operands are the same, the kill marker is
1545 // probably on the first one. e.g.
1546 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1547 EvenDeadKill = false;
1548 OddDeadKill = true;
1549 }
1550 // Never kill the base register in the first instruction.
1551 if (EvenReg == BaseReg)
1552 EvenDeadKill = false;
1553 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1554 EvenReg, EvenDeadKill, EvenUndef,
1555 BaseReg, false, BaseUndef, false, OffUndef,
1556 Pred, PredReg, TII, isT2);
1557 NewBBI = std::prev(MBBI);
1558 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1559 OddReg, OddDeadKill, OddUndef,
1560 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1561 Pred, PredReg, TII, isT2);
1562 }
1563 if (isLd)
1564 ++NumLDRD2LDR;
1565 else
1566 ++NumSTRD2STR;
1567 }
1568
1569 MBB.erase(MI);
1570 MBBI = NewBBI;
1571 return true;
1572 }
1573 return false;
1574 }
1575
1576 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1577 /// ops of the same base and incrementing offset into LDM / STM ops.
LoadStoreMultipleOpti(MachineBasicBlock & MBB)1578 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1579 unsigned NumMerges = 0;
1580 unsigned NumMemOps = 0;
1581 MemOpQueue MemOps;
1582 unsigned CurrBase = 0;
1583 int CurrOpc = -1;
1584 unsigned CurrSize = 0;
1585 ARMCC::CondCodes CurrPred = ARMCC::AL;
1586 unsigned CurrPredReg = 0;
1587 unsigned Position = 0;
1588 SmallVector<MachineBasicBlock::iterator,4> Merges;
1589
1590 RS->enterBasicBlock(&MBB);
1591 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1592 while (MBBI != E) {
1593 if (FixInvalidRegPairOp(MBB, MBBI))
1594 continue;
1595
1596 bool Advance = false;
1597 bool TryMerge = false;
1598 bool Clobber = false;
1599
1600 bool isMemOp = isMemoryOp(MBBI);
1601 if (isMemOp) {
1602 int Opcode = MBBI->getOpcode();
1603 unsigned Size = getLSMultipleTransferSize(MBBI);
1604 const MachineOperand &MO = MBBI->getOperand(0);
1605 unsigned Reg = MO.getReg();
1606 bool isKill = MO.isDef() ? false : MO.isKill();
1607 unsigned Base = MBBI->getOperand(1).getReg();
1608 unsigned PredReg = 0;
1609 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1610 int Offset = getMemoryOpOffset(MBBI);
1611 // Watch out for:
1612 // r4 := ldr [r5]
1613 // r5 := ldr [r5, #4]
1614 // r6 := ldr [r5, #8]
1615 //
1616 // The second ldr has effectively broken the chain even though it
1617 // looks like the later ldr(s) use the same base register. Try to
1618 // merge the ldr's so far, including this one. But don't try to
1619 // combine the following ldr(s).
1620 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1621
1622 // Watch out for:
1623 // r4 := ldr [r0, #8]
1624 // r4 := ldr [r0, #4]
1625 //
1626 // The optimization may reorder the second ldr in front of the first
1627 // ldr, which violates write after write(WAW) dependence. The same as
1628 // str. Try to merge inst(s) already in MemOps.
1629 bool Overlap = false;
1630 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
1631 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
1632 Overlap = true;
1633 break;
1634 }
1635 }
1636
1637 if (CurrBase == 0 && !Clobber) {
1638 // Start of a new chain.
1639 CurrBase = Base;
1640 CurrOpc = Opcode;
1641 CurrSize = Size;
1642 CurrPred = Pred;
1643 CurrPredReg = PredReg;
1644 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1645 ++NumMemOps;
1646 Advance = true;
1647 } else if (!Overlap) {
1648 if (Clobber) {
1649 TryMerge = true;
1650 Advance = true;
1651 }
1652
1653 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1654 // No need to match PredReg.
1655 // Continue adding to the queue.
1656 if (Offset > MemOps.back().Offset) {
1657 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1658 Position, MBBI));
1659 ++NumMemOps;
1660 Advance = true;
1661 } else {
1662 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1663 I != E; ++I) {
1664 if (Offset < I->Offset) {
1665 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1666 Position, MBBI));
1667 ++NumMemOps;
1668 Advance = true;
1669 break;
1670 } else if (Offset == I->Offset) {
1671 // Collision! This can't be merged!
1672 break;
1673 }
1674 }
1675 }
1676 }
1677 }
1678 }
1679
1680 if (MBBI->isDebugValue()) {
1681 ++MBBI;
1682 if (MBBI == E)
1683 // Reach the end of the block, try merging the memory instructions.
1684 TryMerge = true;
1685 } else if (Advance) {
1686 ++Position;
1687 ++MBBI;
1688 if (MBBI == E)
1689 // Reach the end of the block, try merging the memory instructions.
1690 TryMerge = true;
1691 } else {
1692 TryMerge = true;
1693 }
1694
1695 if (TryMerge) {
1696 if (NumMemOps > 1) {
1697 // Try to find a free register to use as a new base in case it's needed.
1698 // First advance to the instruction just before the start of the chain.
1699 AdvanceRS(MBB, MemOps);
1700
1701 // Find a scratch register.
1702 unsigned Scratch =
1703 RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
1704
1705 // Process the load / store instructions.
1706 RS->forward(std::prev(MBBI));
1707
1708 // Merge ops.
1709 Merges.clear();
1710 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1711 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1712
1713 // Try folding preceding/trailing base inc/dec into the generated
1714 // LDM/STM ops.
1715 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1716 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1717 ++NumMerges;
1718 NumMerges += Merges.size();
1719
1720 // Try folding preceding/trailing base inc/dec into those load/store
1721 // that were not merged to form LDM/STM ops.
1722 for (unsigned i = 0; i != NumMemOps; ++i)
1723 if (!MemOps[i].Merged)
1724 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1725 ++NumMerges;
1726
1727 // RS may be pointing to an instruction that's deleted.
1728 RS->skipTo(std::prev(MBBI));
1729 } else if (NumMemOps == 1) {
1730 // Try folding preceding/trailing base inc/dec into the single
1731 // load/store.
1732 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1733 ++NumMerges;
1734 RS->forward(std::prev(MBBI));
1735 }
1736 }
1737
1738 CurrBase = 0;
1739 CurrOpc = -1;
1740 CurrSize = 0;
1741 CurrPred = ARMCC::AL;
1742 CurrPredReg = 0;
1743 if (NumMemOps) {
1744 MemOps.clear();
1745 NumMemOps = 0;
1746 }
1747
1748 // If iterator hasn't been advanced and this is not a memory op, skip it.
1749 // It can't start a new chain anyway.
1750 if (!Advance && !isMemOp && MBBI != E) {
1751 ++Position;
1752 ++MBBI;
1753 }
1754 }
1755 }
1756 return NumMerges > 0;
1757 }
1758
1759 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1760 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1761 /// directly restore the value of LR into pc.
1762 /// ldmfd sp!, {..., lr}
1763 /// bx lr
1764 /// or
1765 /// ldmfd sp!, {..., lr}
1766 /// mov pc, lr
1767 /// =>
1768 /// ldmfd sp!, {..., pc}
MergeReturnIntoLDM(MachineBasicBlock & MBB)1769 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1770 // Thumb1 LDM doesn't allow high registers.
1771 if (isThumb1) return false;
1772 if (MBB.empty()) return false;
1773
1774 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1775 if (MBBI != MBB.begin() &&
1776 (MBBI->getOpcode() == ARM::BX_RET ||
1777 MBBI->getOpcode() == ARM::tBX_RET ||
1778 MBBI->getOpcode() == ARM::MOVPCLR)) {
1779 MachineInstr *PrevMI = std::prev(MBBI);
1780 unsigned Opcode = PrevMI->getOpcode();
1781 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1782 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1783 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1784 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1785 if (MO.getReg() != ARM::LR)
1786 return false;
1787 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1788 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1789 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1790 PrevMI->setDesc(TII->get(NewOpc));
1791 MO.setReg(ARM::PC);
1792 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1793 MBB.erase(MBBI);
1794 return true;
1795 }
1796 }
1797 return false;
1798 }
1799
runOnMachineFunction(MachineFunction & Fn)1800 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1801 const TargetMachine &TM = Fn.getTarget();
1802 TL = TM.getSubtargetImpl()->getTargetLowering();
1803 AFI = Fn.getInfo<ARMFunctionInfo>();
1804 TII = TM.getSubtargetImpl()->getInstrInfo();
1805 TRI = TM.getSubtargetImpl()->getRegisterInfo();
1806 STI = &TM.getSubtarget<ARMSubtarget>();
1807 RS = new RegScavenger();
1808 isThumb2 = AFI->isThumb2Function();
1809 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1810
1811 bool Modified = false;
1812 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1813 ++MFI) {
1814 MachineBasicBlock &MBB = *MFI;
1815 Modified |= LoadStoreMultipleOpti(MBB);
1816 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1817 Modified |= MergeReturnIntoLDM(MBB);
1818 }
1819
1820 delete RS;
1821 return Modified;
1822 }
1823
1824
1825 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1826 /// load / stores from consecutive locations close to make it more
1827 /// likely they will be combined later.
1828
1829 namespace {
1830 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1831 static char ID;
ARMPreAllocLoadStoreOpt__anonbcaac5b80211::ARMPreAllocLoadStoreOpt1832 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1833
1834 const DataLayout *TD;
1835 const TargetInstrInfo *TII;
1836 const TargetRegisterInfo *TRI;
1837 const ARMSubtarget *STI;
1838 MachineRegisterInfo *MRI;
1839 MachineFunction *MF;
1840
1841 bool runOnMachineFunction(MachineFunction &Fn) override;
1842
getPassName__anonbcaac5b80211::ARMPreAllocLoadStoreOpt1843 const char *getPassName() const override {
1844 return "ARM pre- register allocation load / store optimization pass";
1845 }
1846
1847 private:
1848 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1849 unsigned &NewOpc, unsigned &EvenReg,
1850 unsigned &OddReg, unsigned &BaseReg,
1851 int &Offset,
1852 unsigned &PredReg, ARMCC::CondCodes &Pred,
1853 bool &isT2);
1854 bool RescheduleOps(MachineBasicBlock *MBB,
1855 SmallVectorImpl<MachineInstr *> &Ops,
1856 unsigned Base, bool isLd,
1857 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1858 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1859 };
1860 char ARMPreAllocLoadStoreOpt::ID = 0;
1861 }
1862
runOnMachineFunction(MachineFunction & Fn)1863 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1864 TD = Fn.getSubtarget().getDataLayout();
1865 TII = Fn.getSubtarget().getInstrInfo();
1866 TRI = Fn.getSubtarget().getRegisterInfo();
1867 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1868 MRI = &Fn.getRegInfo();
1869 MF = &Fn;
1870
1871 bool Modified = false;
1872 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1873 ++MFI)
1874 Modified |= RescheduleLoadStoreInstrs(MFI);
1875
1876 return Modified;
1877 }
1878
IsSafeAndProfitableToMove(bool isLd,unsigned Base,MachineBasicBlock::iterator I,MachineBasicBlock::iterator E,SmallPtrSetImpl<MachineInstr * > & MemOps,SmallSet<unsigned,4> & MemRegs,const TargetRegisterInfo * TRI)1879 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1880 MachineBasicBlock::iterator I,
1881 MachineBasicBlock::iterator E,
1882 SmallPtrSetImpl<MachineInstr*> &MemOps,
1883 SmallSet<unsigned, 4> &MemRegs,
1884 const TargetRegisterInfo *TRI) {
1885 // Are there stores / loads / calls between them?
1886 // FIXME: This is overly conservative. We should make use of alias information
1887 // some day.
1888 SmallSet<unsigned, 4> AddedRegPressure;
1889 while (++I != E) {
1890 if (I->isDebugValue() || MemOps.count(&*I))
1891 continue;
1892 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1893 return false;
1894 if (isLd && I->mayStore())
1895 return false;
1896 if (!isLd) {
1897 if (I->mayLoad())
1898 return false;
1899 // It's not safe to move the first 'str' down.
1900 // str r1, [r0]
1901 // strh r5, [r0]
1902 // str r4, [r0, #+4]
1903 if (I->mayStore())
1904 return false;
1905 }
1906 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1907 MachineOperand &MO = I->getOperand(j);
1908 if (!MO.isReg())
1909 continue;
1910 unsigned Reg = MO.getReg();
1911 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1912 return false;
1913 if (Reg != Base && !MemRegs.count(Reg))
1914 AddedRegPressure.insert(Reg);
1915 }
1916 }
1917
1918 // Estimate register pressure increase due to the transformation.
1919 if (MemRegs.size() <= 4)
1920 // Ok if we are moving small number of instructions.
1921 return true;
1922 return AddedRegPressure.size() <= MemRegs.size() * 2;
1923 }
1924
1925
1926 /// Copy Op0 and Op1 operands into a new array assigned to MI.
concatenateMemOperands(MachineInstr * MI,MachineInstr * Op0,MachineInstr * Op1)1927 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1928 MachineInstr *Op1) {
1929 assert(MI->memoperands_empty() && "expected a new machineinstr");
1930 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1931 + (Op1->memoperands_end() - Op1->memoperands_begin());
1932
1933 MachineFunction *MF = MI->getParent()->getParent();
1934 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1935 MachineSDNode::mmo_iterator MemEnd =
1936 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1937 MemEnd =
1938 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1939 MI->setMemRefs(MemBegin, MemEnd);
1940 }
1941
1942 bool
CanFormLdStDWord(MachineInstr * Op0,MachineInstr * Op1,DebugLoc & dl,unsigned & NewOpc,unsigned & EvenReg,unsigned & OddReg,unsigned & BaseReg,int & Offset,unsigned & PredReg,ARMCC::CondCodes & Pred,bool & isT2)1943 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1944 DebugLoc &dl,
1945 unsigned &NewOpc, unsigned &EvenReg,
1946 unsigned &OddReg, unsigned &BaseReg,
1947 int &Offset, unsigned &PredReg,
1948 ARMCC::CondCodes &Pred,
1949 bool &isT2) {
1950 // Make sure we're allowed to generate LDRD/STRD.
1951 if (!STI->hasV5TEOps())
1952 return false;
1953
1954 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1955 unsigned Scale = 1;
1956 unsigned Opcode = Op0->getOpcode();
1957 if (Opcode == ARM::LDRi12) {
1958 NewOpc = ARM::LDRD;
1959 } else if (Opcode == ARM::STRi12) {
1960 NewOpc = ARM::STRD;
1961 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1962 NewOpc = ARM::t2LDRDi8;
1963 Scale = 4;
1964 isT2 = true;
1965 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1966 NewOpc = ARM::t2STRDi8;
1967 Scale = 4;
1968 isT2 = true;
1969 } else {
1970 return false;
1971 }
1972
1973 // Make sure the base address satisfies i64 ld / st alignment requirement.
1974 // At the moment, we ignore the memoryoperand's value.
1975 // If we want to use AliasAnalysis, we should check it accordingly.
1976 if (!Op0->hasOneMemOperand() ||
1977 (*Op0->memoperands_begin())->isVolatile())
1978 return false;
1979
1980 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1981 const Function *Func = MF->getFunction();
1982 unsigned ReqAlign = STI->hasV6Ops()
1983 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1984 : 8; // Pre-v6 need 8-byte align
1985 if (Align < ReqAlign)
1986 return false;
1987
1988 // Then make sure the immediate offset fits.
1989 int OffImm = getMemoryOpOffset(Op0);
1990 if (isT2) {
1991 int Limit = (1 << 8) * Scale;
1992 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1993 return false;
1994 Offset = OffImm;
1995 } else {
1996 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1997 if (OffImm < 0) {
1998 AddSub = ARM_AM::sub;
1999 OffImm = - OffImm;
2000 }
2001 int Limit = (1 << 8) * Scale;
2002 if (OffImm >= Limit || (OffImm & (Scale-1)))
2003 return false;
2004 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2005 }
2006 EvenReg = Op0->getOperand(0).getReg();
2007 OddReg = Op1->getOperand(0).getReg();
2008 if (EvenReg == OddReg)
2009 return false;
2010 BaseReg = Op0->getOperand(1).getReg();
2011 Pred = getInstrPredicate(Op0, PredReg);
2012 dl = Op0->getDebugLoc();
2013 return true;
2014 }
2015
RescheduleOps(MachineBasicBlock * MBB,SmallVectorImpl<MachineInstr * > & Ops,unsigned Base,bool isLd,DenseMap<MachineInstr *,unsigned> & MI2LocMap)2016 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2017 SmallVectorImpl<MachineInstr *> &Ops,
2018 unsigned Base, bool isLd,
2019 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2020 bool RetVal = false;
2021
2022 // Sort by offset (in reverse order).
2023 std::sort(Ops.begin(), Ops.end(),
2024 [](const MachineInstr *LHS, const MachineInstr *RHS) {
2025 int LOffset = getMemoryOpOffset(LHS);
2026 int ROffset = getMemoryOpOffset(RHS);
2027 assert(LHS == RHS || LOffset != ROffset);
2028 return LOffset > ROffset;
2029 });
2030
2031 // The loads / stores of the same base are in order. Scan them from first to
2032 // last and check for the following:
2033 // 1. Any def of base.
2034 // 2. Any gaps.
2035 while (Ops.size() > 1) {
2036 unsigned FirstLoc = ~0U;
2037 unsigned LastLoc = 0;
2038 MachineInstr *FirstOp = nullptr;
2039 MachineInstr *LastOp = nullptr;
2040 int LastOffset = 0;
2041 unsigned LastOpcode = 0;
2042 unsigned LastBytes = 0;
2043 unsigned NumMove = 0;
2044 for (int i = Ops.size() - 1; i >= 0; --i) {
2045 MachineInstr *Op = Ops[i];
2046 unsigned Loc = MI2LocMap[Op];
2047 if (Loc <= FirstLoc) {
2048 FirstLoc = Loc;
2049 FirstOp = Op;
2050 }
2051 if (Loc >= LastLoc) {
2052 LastLoc = Loc;
2053 LastOp = Op;
2054 }
2055
2056 unsigned LSMOpcode
2057 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2058 if (LastOpcode && LSMOpcode != LastOpcode)
2059 break;
2060
2061 int Offset = getMemoryOpOffset(Op);
2062 unsigned Bytes = getLSMultipleTransferSize(Op);
2063 if (LastBytes) {
2064 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2065 break;
2066 }
2067 LastOffset = Offset;
2068 LastBytes = Bytes;
2069 LastOpcode = LSMOpcode;
2070 if (++NumMove == 8) // FIXME: Tune this limit.
2071 break;
2072 }
2073
2074 if (NumMove <= 1)
2075 Ops.pop_back();
2076 else {
2077 SmallPtrSet<MachineInstr*, 4> MemOps;
2078 SmallSet<unsigned, 4> MemRegs;
2079 for (int i = NumMove-1; i >= 0; --i) {
2080 MemOps.insert(Ops[i]);
2081 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2082 }
2083
2084 // Be conservative, if the instructions are too far apart, don't
2085 // move them. We want to limit the increase of register pressure.
2086 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2087 if (DoMove)
2088 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2089 MemOps, MemRegs, TRI);
2090 if (!DoMove) {
2091 for (unsigned i = 0; i != NumMove; ++i)
2092 Ops.pop_back();
2093 } else {
2094 // This is the new location for the loads / stores.
2095 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2096 while (InsertPos != MBB->end()
2097 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2098 ++InsertPos;
2099
2100 // If we are moving a pair of loads / stores, see if it makes sense
2101 // to try to allocate a pair of registers that can form register pairs.
2102 MachineInstr *Op0 = Ops.back();
2103 MachineInstr *Op1 = Ops[Ops.size()-2];
2104 unsigned EvenReg = 0, OddReg = 0;
2105 unsigned BaseReg = 0, PredReg = 0;
2106 ARMCC::CondCodes Pred = ARMCC::AL;
2107 bool isT2 = false;
2108 unsigned NewOpc = 0;
2109 int Offset = 0;
2110 DebugLoc dl;
2111 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2112 EvenReg, OddReg, BaseReg,
2113 Offset, PredReg, Pred, isT2)) {
2114 Ops.pop_back();
2115 Ops.pop_back();
2116
2117 const MCInstrDesc &MCID = TII->get(NewOpc);
2118 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2119 MRI->constrainRegClass(EvenReg, TRC);
2120 MRI->constrainRegClass(OddReg, TRC);
2121
2122 // Form the pair instruction.
2123 if (isLd) {
2124 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2125 .addReg(EvenReg, RegState::Define)
2126 .addReg(OddReg, RegState::Define)
2127 .addReg(BaseReg);
2128 // FIXME: We're converting from LDRi12 to an insn that still
2129 // uses addrmode2, so we need an explicit offset reg. It should
2130 // always by reg0 since we're transforming LDRi12s.
2131 if (!isT2)
2132 MIB.addReg(0);
2133 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2134 concatenateMemOperands(MIB, Op0, Op1);
2135 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2136 ++NumLDRDFormed;
2137 } else {
2138 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2139 .addReg(EvenReg)
2140 .addReg(OddReg)
2141 .addReg(BaseReg);
2142 // FIXME: We're converting from LDRi12 to an insn that still
2143 // uses addrmode2, so we need an explicit offset reg. It should
2144 // always by reg0 since we're transforming STRi12s.
2145 if (!isT2)
2146 MIB.addReg(0);
2147 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2148 concatenateMemOperands(MIB, Op0, Op1);
2149 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2150 ++NumSTRDFormed;
2151 }
2152 MBB->erase(Op0);
2153 MBB->erase(Op1);
2154
2155 // Add register allocation hints to form register pairs.
2156 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
2157 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
2158 } else {
2159 for (unsigned i = 0; i != NumMove; ++i) {
2160 MachineInstr *Op = Ops.back();
2161 Ops.pop_back();
2162 MBB->splice(InsertPos, MBB, Op);
2163 }
2164 }
2165
2166 NumLdStMoved += NumMove;
2167 RetVal = true;
2168 }
2169 }
2170 }
2171
2172 return RetVal;
2173 }
2174
2175 bool
RescheduleLoadStoreInstrs(MachineBasicBlock * MBB)2176 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2177 bool RetVal = false;
2178
2179 DenseMap<MachineInstr*, unsigned> MI2LocMap;
2180 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2181 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2182 SmallVector<unsigned, 4> LdBases;
2183 SmallVector<unsigned, 4> StBases;
2184
2185 unsigned Loc = 0;
2186 MachineBasicBlock::iterator MBBI = MBB->begin();
2187 MachineBasicBlock::iterator E = MBB->end();
2188 while (MBBI != E) {
2189 for (; MBBI != E; ++MBBI) {
2190 MachineInstr *MI = MBBI;
2191 if (MI->isCall() || MI->isTerminator()) {
2192 // Stop at barriers.
2193 ++MBBI;
2194 break;
2195 }
2196
2197 if (!MI->isDebugValue())
2198 MI2LocMap[MI] = ++Loc;
2199
2200 if (!isMemoryOp(MI))
2201 continue;
2202 unsigned PredReg = 0;
2203 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2204 continue;
2205
2206 int Opc = MI->getOpcode();
2207 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
2208 unsigned Base = MI->getOperand(1).getReg();
2209 int Offset = getMemoryOpOffset(MI);
2210
2211 bool StopHere = false;
2212 if (isLd) {
2213 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2214 Base2LdsMap.find(Base);
2215 if (BI != Base2LdsMap.end()) {
2216 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2217 if (Offset == getMemoryOpOffset(BI->second[i])) {
2218 StopHere = true;
2219 break;
2220 }
2221 }
2222 if (!StopHere)
2223 BI->second.push_back(MI);
2224 } else {
2225 Base2LdsMap[Base].push_back(MI);
2226 LdBases.push_back(Base);
2227 }
2228 } else {
2229 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2230 Base2StsMap.find(Base);
2231 if (BI != Base2StsMap.end()) {
2232 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2233 if (Offset == getMemoryOpOffset(BI->second[i])) {
2234 StopHere = true;
2235 break;
2236 }
2237 }
2238 if (!StopHere)
2239 BI->second.push_back(MI);
2240 } else {
2241 Base2StsMap[Base].push_back(MI);
2242 StBases.push_back(Base);
2243 }
2244 }
2245
2246 if (StopHere) {
2247 // Found a duplicate (a base+offset combination that's seen earlier).
2248 // Backtrack.
2249 --Loc;
2250 break;
2251 }
2252 }
2253
2254 // Re-schedule loads.
2255 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2256 unsigned Base = LdBases[i];
2257 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2258 if (Lds.size() > 1)
2259 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2260 }
2261
2262 // Re-schedule stores.
2263 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2264 unsigned Base = StBases[i];
2265 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2266 if (Sts.size() > 1)
2267 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2268 }
2269
2270 if (MBBI != E) {
2271 Base2LdsMap.clear();
2272 Base2StsMap.clear();
2273 LdBases.clear();
2274 StBases.clear();
2275 }
2276 }
2277
2278 return RetVal;
2279 }
2280
2281
2282 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
2283 /// optimization pass.
createARMLoadStoreOptimizationPass(bool PreAlloc)2284 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2285 if (PreAlloc)
2286 return new ARMPreAllocLoadStoreOpt();
2287 return new ARMLoadStoreOpt();
2288 }
2289