1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This file contains a pass that performs load / store related peephole 11 /// optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ARM.h" 16 #include "ARMBaseInstrInfo.h" 17 #include "ARMBaseRegisterInfo.h" 18 #include "ARMISelLowering.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMSubtarget.h" 21 #include "MCTargetDesc/ARMAddressingModes.h" 22 #include "ThumbRegisterInfo.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallSet.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/Statistic.h" 29 #include "llvm/CodeGen/MachineBasicBlock.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include "llvm/CodeGen/MachineInstr.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/RegisterClassInfo.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/CodeGen/LivePhysRegs.h" 37 #include "llvm/IR/DataLayout.h" 38 #include "llvm/IR/DerivedTypes.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/Support/Allocator.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/ErrorHandling.h" 43 #include "llvm/Support/raw_ostream.h" 44 #include "llvm/Target/TargetInstrInfo.h" 45 #include "llvm/Target/TargetMachine.h" 46 #include "llvm/Target/TargetRegisterInfo.h" 47 using namespace llvm; 48 49 #define DEBUG_TYPE "arm-ldst-opt" 50 51 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 52 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 62 63 namespace { 64 /// Post- register allocation pass the combine load / store instructions to 65 /// form ldm / stm instructions. 66 struct ARMLoadStoreOpt : public MachineFunctionPass { 67 static char ID; 68 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 69 70 const MachineFunction *MF; 71 const TargetInstrInfo *TII; 72 const TargetRegisterInfo *TRI; 73 const MachineRegisterInfo *MRI; 74 const ARMSubtarget *STI; 75 const TargetLowering *TL; 76 ARMFunctionInfo *AFI; 77 LivePhysRegs LiveRegs; 78 RegisterClassInfo RegClassInfo; 79 MachineBasicBlock::const_iterator LiveRegPos; 80 bool LiveRegsValid; 81 bool RegClassInfoValid; 82 bool isThumb1, isThumb2; 83 84 bool runOnMachineFunction(MachineFunction &Fn) override; 85 86 const char *getPassName() const override { 87 return "ARM load / store optimization pass"; 88 } 89 90 private: 91 /// A set of load/store MachineInstrs with same base register sorted by 92 /// offset. 93 struct MemOpQueueEntry { 94 MachineInstr *MI; 95 int Offset; ///< Load/Store offset. 96 unsigned Position; ///< Position as counted from end of basic block. 97 MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position) 98 : MI(MI), Offset(Offset), Position(Position) {} 99 }; 100 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 101 102 /// A set of MachineInstrs that fulfill (nearly all) conditions to get 103 /// merged into a LDM/STM. 104 struct MergeCandidate { 105 /// List of instructions ordered by load/store offset. 106 SmallVector<MachineInstr*, 4> Instrs; 107 /// Index in Instrs of the instruction being latest in the schedule. 108 unsigned LatestMIIdx; 109 /// Index in Instrs of the instruction being earliest in the schedule. 110 unsigned EarliestMIIdx; 111 /// Index into the basic block where the merged instruction will be 112 /// inserted. (See MemOpQueueEntry.Position) 113 unsigned InsertPos; 114 }; 115 BumpPtrAllocator Allocator; 116 SmallVector<const MergeCandidate*,4> Candidates; 117 118 void moveLiveRegsBefore(const MachineBasicBlock &MBB, 119 MachineBasicBlock::const_iterator Before); 120 unsigned findFreeReg(const TargetRegisterClass &RegClass); 121 void UpdateBaseRegUses(MachineBasicBlock &MBB, 122 MachineBasicBlock::iterator MBBI, 123 DebugLoc DL, unsigned Base, unsigned WordOffset, 124 ARMCC::CondCodes Pred, unsigned PredReg); 125 MachineInstr *MergeOps(MachineBasicBlock &MBB, 126 MachineBasicBlock::iterator InsertBefore, int Offset, 127 unsigned Base, bool BaseKill, unsigned Opcode, 128 ARMCC::CondCodes Pred, unsigned PredReg, DebugLoc DL, 129 ArrayRef<std::pair<unsigned, bool>> Regs); 130 void FormCandidates(const MemOpQueue &MemOps); 131 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand); 132 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 133 MachineBasicBlock::iterator &MBBI); 134 bool MergeBaseUpdateLoadStore(MachineInstr *MI); 135 bool MergeBaseUpdateLSMultiple(MachineInstr *MI); 136 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 137 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 138 }; 139 char ARMLoadStoreOpt::ID = 0; 140 } 141 142 static bool definesCPSR(const MachineInstr *MI) { 143 for (const auto &MO : MI->operands()) { 144 if (!MO.isReg()) 145 continue; 146 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 147 // If the instruction has live CPSR def, then it's not safe to fold it 148 // into load / store. 149 return true; 150 } 151 152 return false; 153 } 154 155 static int getMemoryOpOffset(const MachineInstr *MI) { 156 unsigned Opcode = MI->getOpcode(); 157 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 158 unsigned NumOperands = MI->getDesc().getNumOperands(); 159 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 160 161 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 162 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 163 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 164 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 165 return OffField; 166 167 // Thumb1 immediate offsets are scaled by 4 168 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi || 169 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) 170 return OffField * 4; 171 172 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 173 : ARM_AM::getAM5Offset(OffField) * 4; 174 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField) 175 : ARM_AM::getAM5Op(OffField); 176 177 if (Op == ARM_AM::sub) 178 return -Offset; 179 180 return Offset; 181 } 182 183 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) { 184 return MI.getOperand(1); 185 } 186 187 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) { 188 return MI.getOperand(0); 189 } 190 191 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) { 192 switch (Opcode) { 193 default: llvm_unreachable("Unhandled opcode!"); 194 case ARM::LDRi12: 195 ++NumLDMGened; 196 switch (Mode) { 197 default: llvm_unreachable("Unhandled submode!"); 198 case ARM_AM::ia: return ARM::LDMIA; 199 case ARM_AM::da: return ARM::LDMDA; 200 case ARM_AM::db: return ARM::LDMDB; 201 case ARM_AM::ib: return ARM::LDMIB; 202 } 203 case ARM::STRi12: 204 ++NumSTMGened; 205 switch (Mode) { 206 default: llvm_unreachable("Unhandled submode!"); 207 case ARM_AM::ia: return ARM::STMIA; 208 case ARM_AM::da: return ARM::STMDA; 209 case ARM_AM::db: return ARM::STMDB; 210 case ARM_AM::ib: return ARM::STMIB; 211 } 212 case ARM::tLDRi: 213 case ARM::tLDRspi: 214 // tLDMIA is writeback-only - unless the base register is in the input 215 // reglist. 216 ++NumLDMGened; 217 switch (Mode) { 218 default: llvm_unreachable("Unhandled submode!"); 219 case ARM_AM::ia: return ARM::tLDMIA; 220 } 221 case ARM::tSTRi: 222 case ARM::tSTRspi: 223 // There is no non-writeback tSTMIA either. 224 ++NumSTMGened; 225 switch (Mode) { 226 default: llvm_unreachable("Unhandled submode!"); 227 case ARM_AM::ia: return ARM::tSTMIA_UPD; 228 } 229 case ARM::t2LDRi8: 230 case ARM::t2LDRi12: 231 ++NumLDMGened; 232 switch (Mode) { 233 default: llvm_unreachable("Unhandled submode!"); 234 case ARM_AM::ia: return ARM::t2LDMIA; 235 case ARM_AM::db: return ARM::t2LDMDB; 236 } 237 case ARM::t2STRi8: 238 case ARM::t2STRi12: 239 ++NumSTMGened; 240 switch (Mode) { 241 default: llvm_unreachable("Unhandled submode!"); 242 case ARM_AM::ia: return ARM::t2STMIA; 243 case ARM_AM::db: return ARM::t2STMDB; 244 } 245 case ARM::VLDRS: 246 ++NumVLDMGened; 247 switch (Mode) { 248 default: llvm_unreachable("Unhandled submode!"); 249 case ARM_AM::ia: return ARM::VLDMSIA; 250 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 251 } 252 case ARM::VSTRS: 253 ++NumVSTMGened; 254 switch (Mode) { 255 default: llvm_unreachable("Unhandled submode!"); 256 case ARM_AM::ia: return ARM::VSTMSIA; 257 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 258 } 259 case ARM::VLDRD: 260 ++NumVLDMGened; 261 switch (Mode) { 262 default: llvm_unreachable("Unhandled submode!"); 263 case ARM_AM::ia: return ARM::VLDMDIA; 264 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 265 } 266 case ARM::VSTRD: 267 ++NumVSTMGened; 268 switch (Mode) { 269 default: llvm_unreachable("Unhandled submode!"); 270 case ARM_AM::ia: return ARM::VSTMDIA; 271 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 272 } 273 } 274 } 275 276 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) { 277 switch (Opcode) { 278 default: llvm_unreachable("Unhandled opcode!"); 279 case ARM::LDMIA_RET: 280 case ARM::LDMIA: 281 case ARM::LDMIA_UPD: 282 case ARM::STMIA: 283 case ARM::STMIA_UPD: 284 case ARM::tLDMIA: 285 case ARM::tLDMIA_UPD: 286 case ARM::tSTMIA_UPD: 287 case ARM::t2LDMIA_RET: 288 case ARM::t2LDMIA: 289 case ARM::t2LDMIA_UPD: 290 case ARM::t2STMIA: 291 case ARM::t2STMIA_UPD: 292 case ARM::VLDMSIA: 293 case ARM::VLDMSIA_UPD: 294 case ARM::VSTMSIA: 295 case ARM::VSTMSIA_UPD: 296 case ARM::VLDMDIA: 297 case ARM::VLDMDIA_UPD: 298 case ARM::VSTMDIA: 299 case ARM::VSTMDIA_UPD: 300 return ARM_AM::ia; 301 302 case ARM::LDMDA: 303 case ARM::LDMDA_UPD: 304 case ARM::STMDA: 305 case ARM::STMDA_UPD: 306 return ARM_AM::da; 307 308 case ARM::LDMDB: 309 case ARM::LDMDB_UPD: 310 case ARM::STMDB: 311 case ARM::STMDB_UPD: 312 case ARM::t2LDMDB: 313 case ARM::t2LDMDB_UPD: 314 case ARM::t2STMDB: 315 case ARM::t2STMDB_UPD: 316 case ARM::VLDMSDB_UPD: 317 case ARM::VSTMSDB_UPD: 318 case ARM::VLDMDDB_UPD: 319 case ARM::VSTMDDB_UPD: 320 return ARM_AM::db; 321 322 case ARM::LDMIB: 323 case ARM::LDMIB_UPD: 324 case ARM::STMIB: 325 case ARM::STMIB_UPD: 326 return ARM_AM::ib; 327 } 328 } 329 330 static bool isT1i32Load(unsigned Opc) { 331 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi; 332 } 333 334 static bool isT2i32Load(unsigned Opc) { 335 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 336 } 337 338 static bool isi32Load(unsigned Opc) { 339 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; 340 } 341 342 static bool isT1i32Store(unsigned Opc) { 343 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi; 344 } 345 346 static bool isT2i32Store(unsigned Opc) { 347 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 348 } 349 350 static bool isi32Store(unsigned Opc) { 351 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); 352 } 353 354 static bool isLoadSingle(unsigned Opc) { 355 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 356 } 357 358 static unsigned getImmScale(unsigned Opc) { 359 switch (Opc) { 360 default: llvm_unreachable("Unhandled opcode!"); 361 case ARM::tLDRi: 362 case ARM::tSTRi: 363 case ARM::tLDRspi: 364 case ARM::tSTRspi: 365 return 1; 366 case ARM::tLDRHi: 367 case ARM::tSTRHi: 368 return 2; 369 case ARM::tLDRBi: 370 case ARM::tSTRBi: 371 return 4; 372 } 373 } 374 375 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) { 376 switch (MI->getOpcode()) { 377 default: return 0; 378 case ARM::LDRi12: 379 case ARM::STRi12: 380 case ARM::tLDRi: 381 case ARM::tSTRi: 382 case ARM::tLDRspi: 383 case ARM::tSTRspi: 384 case ARM::t2LDRi8: 385 case ARM::t2LDRi12: 386 case ARM::t2STRi8: 387 case ARM::t2STRi12: 388 case ARM::VLDRS: 389 case ARM::VSTRS: 390 return 4; 391 case ARM::VLDRD: 392 case ARM::VSTRD: 393 return 8; 394 case ARM::LDMIA: 395 case ARM::LDMDA: 396 case ARM::LDMDB: 397 case ARM::LDMIB: 398 case ARM::STMIA: 399 case ARM::STMDA: 400 case ARM::STMDB: 401 case ARM::STMIB: 402 case ARM::tLDMIA: 403 case ARM::tLDMIA_UPD: 404 case ARM::tSTMIA_UPD: 405 case ARM::t2LDMIA: 406 case ARM::t2LDMDB: 407 case ARM::t2STMIA: 408 case ARM::t2STMDB: 409 case ARM::VLDMSIA: 410 case ARM::VSTMSIA: 411 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 412 case ARM::VLDMDIA: 413 case ARM::VSTMDIA: 414 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 415 } 416 } 417 418 /// Update future uses of the base register with the offset introduced 419 /// due to writeback. This function only works on Thumb1. 420 void 421 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, 422 MachineBasicBlock::iterator MBBI, 423 DebugLoc DL, unsigned Base, 424 unsigned WordOffset, 425 ARMCC::CondCodes Pred, unsigned PredReg) { 426 assert(isThumb1 && "Can only update base register uses for Thumb1!"); 427 // Start updating any instructions with immediate offsets. Insert a SUB before 428 // the first non-updateable instruction (if any). 429 for (; MBBI != MBB.end(); ++MBBI) { 430 bool InsertSub = false; 431 unsigned Opc = MBBI->getOpcode(); 432 433 if (MBBI->readsRegister(Base)) { 434 int Offset; 435 bool IsLoad = 436 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi; 437 bool IsStore = 438 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi; 439 440 if (IsLoad || IsStore) { 441 // Loads and stores with immediate offsets can be updated, but only if 442 // the new offset isn't negative. 443 // The MachineOperand containing the offset immediate is the last one 444 // before predicates. 445 MachineOperand &MO = 446 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 447 // The offsets are scaled by 1, 2 or 4 depending on the Opcode. 448 Offset = MO.getImm() - WordOffset * getImmScale(Opc); 449 450 // If storing the base register, it needs to be reset first. 451 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg(); 452 453 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) 454 MO.setImm(Offset); 455 else 456 InsertSub = true; 457 458 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && 459 !definesCPSR(MBBI)) { 460 // SUBS/ADDS using this register, with a dead def of the CPSR. 461 // Merge it with the update; if the merged offset is too large, 462 // insert a new sub instead. 463 MachineOperand &MO = 464 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 465 Offset = (Opc == ARM::tSUBi8) ? 466 MO.getImm() + WordOffset * 4 : 467 MO.getImm() - WordOffset * 4 ; 468 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) { 469 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if 470 // Offset == 0. 471 MO.setImm(Offset); 472 // The base register has now been reset, so exit early. 473 return; 474 } else { 475 InsertSub = true; 476 } 477 478 } else { 479 // Can't update the instruction. 480 InsertSub = true; 481 } 482 483 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { 484 // Since SUBS sets the condition flags, we can't place the base reset 485 // after an instruction that has a live CPSR def. 486 // The base register might also contain an argument for a function call. 487 InsertSub = true; 488 } 489 490 if (InsertSub) { 491 // An instruction above couldn't be updated, so insert a sub. 492 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 493 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 494 return; 495 } 496 497 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base)) 498 // Register got killed. Stop updating. 499 return; 500 } 501 502 // End of block was reached. 503 if (MBB.succ_size() > 0) { 504 // FIXME: Because of a bug, live registers are sometimes missing from 505 // the successor blocks' live-in sets. This means we can't trust that 506 // information and *always* have to reset at the end of a block. 507 // See PR21029. 508 if (MBBI != MBB.end()) --MBBI; 509 AddDefaultT1CC( 510 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 511 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 512 } 513 } 514 515 /// Return the first register of class \p RegClass that is not in \p Regs. 516 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) { 517 if (!RegClassInfoValid) { 518 RegClassInfo.runOnMachineFunction(*MF); 519 RegClassInfoValid = true; 520 } 521 522 for (unsigned Reg : RegClassInfo.getOrder(&RegClass)) 523 if (!LiveRegs.contains(Reg)) 524 return Reg; 525 return 0; 526 } 527 528 /// Compute live registers just before instruction \p Before (in normal schedule 529 /// direction). Computes backwards so multiple queries in the same block must 530 /// come in reverse order. 531 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB, 532 MachineBasicBlock::const_iterator Before) { 533 // Initialize if we never queried in this block. 534 if (!LiveRegsValid) { 535 LiveRegs.init(TRI); 536 LiveRegs.addLiveOuts(&MBB, true); 537 LiveRegPos = MBB.end(); 538 LiveRegsValid = true; 539 } 540 // Move backward just before the "Before" position. 541 while (LiveRegPos != Before) { 542 --LiveRegPos; 543 LiveRegs.stepBackward(*LiveRegPos); 544 } 545 } 546 547 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs, 548 unsigned Reg) { 549 for (const std::pair<unsigned, bool> &R : Regs) 550 if (R.first == Reg) 551 return true; 552 return false; 553 } 554 555 /// Create and insert a LDM or STM with Base as base register and registers in 556 /// Regs as the register operands that would be loaded / stored. It returns 557 /// true if the transformation is done. 558 MachineInstr * 559 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 560 MachineBasicBlock::iterator InsertBefore, int Offset, 561 unsigned Base, bool BaseKill, unsigned Opcode, 562 ARMCC::CondCodes Pred, unsigned PredReg, DebugLoc DL, 563 ArrayRef<std::pair<unsigned, bool>> Regs) { 564 unsigned NumRegs = Regs.size(); 565 assert(NumRegs > 1); 566 567 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. 568 // Compute liveness information for that register to make the decision. 569 bool SafeToClobberCPSR = !isThumb1 || 570 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) == 571 MachineBasicBlock::LQR_Dead); 572 573 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. 574 575 // Exception: If the base register is in the input reglist, Thumb1 LDM is 576 // non-writeback. 577 // It's also not possible to merge an STR of the base register in Thumb1. 578 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) { 579 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); 580 if (Opcode == ARM::tLDRi) { 581 Writeback = false; 582 } else if (Opcode == ARM::tSTRi) { 583 return nullptr; 584 } 585 } 586 587 ARM_AM::AMSubMode Mode = ARM_AM::ia; 588 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. 589 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 590 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; 591 592 if (Offset == 4 && haveIBAndDA) { 593 Mode = ARM_AM::ib; 594 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { 595 Mode = ARM_AM::da; 596 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { 597 // VLDM/VSTM do not support DB mode without also updating the base reg. 598 Mode = ARM_AM::db; 599 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { 600 // Check if this is a supported opcode before inserting instructions to 601 // calculate a new base register. 602 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr; 603 604 // If starting offset isn't zero, insert a MI to materialize a new base. 605 // But only do so if it is cost effective, i.e. merging more than two 606 // loads / stores. 607 if (NumRegs <= 2) 608 return nullptr; 609 610 // On Thumb1, it's not worth materializing a new base register without 611 // clobbering the CPSR (i.e. not using ADDS/SUBS). 612 if (!SafeToClobberCPSR) 613 return nullptr; 614 615 unsigned NewBase; 616 if (isi32Load(Opcode)) { 617 // If it is a load, then just use one of the destination register to 618 // use as the new base. 619 NewBase = Regs[NumRegs-1].first; 620 } else { 621 // Find a free register that we can use as scratch register. 622 moveLiveRegsBefore(MBB, InsertBefore); 623 // The merged instruction does not exist yet but will use several Regs if 624 // it is a Store. 625 if (!isLoadSingle(Opcode)) 626 for (const std::pair<unsigned, bool> &R : Regs) 627 LiveRegs.addReg(R.first); 628 629 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass); 630 if (NewBase == 0) 631 return nullptr; 632 } 633 634 int BaseOpc = 635 isThumb2 ? ARM::t2ADDri : 636 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi : 637 (isThumb1 && Offset < 8) ? ARM::tADDi3 : 638 isThumb1 ? ARM::tADDi8 : ARM::ADDri; 639 640 if (Offset < 0) { 641 Offset = - Offset; 642 BaseOpc = 643 isThumb2 ? ARM::t2SUBri : 644 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 : 645 isThumb1 ? ARM::tSUBi8 : ARM::SUBri; 646 } 647 648 if (!TL->isLegalAddImmediate(Offset)) 649 // FIXME: Try add with register operand? 650 return nullptr; // Probably not worth it then. 651 652 // We can only append a kill flag to the add/sub input if the value is not 653 // used in the register list of the stm as well. 654 bool KillOldBase = BaseKill && 655 (!isi32Store(Opcode) || !ContainsReg(Regs, Base)); 656 657 if (isThumb1) { 658 // Thumb1: depending on immediate size, use either 659 // ADDS NewBase, Base, #imm3 660 // or 661 // MOV NewBase, Base 662 // ADDS NewBase, #imm8. 663 if (Base != NewBase && 664 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) { 665 // Need to insert a MOV to the new base first. 666 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && 667 !STI->hasV6Ops()) { 668 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr 669 if (Pred != ARMCC::AL) 670 return nullptr; 671 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase) 672 .addReg(Base, getKillRegState(KillOldBase)); 673 } else 674 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase) 675 .addReg(Base, getKillRegState(KillOldBase)) 676 .addImm(Pred).addReg(PredReg); 677 678 // The following ADDS/SUBS becomes an update. 679 Base = NewBase; 680 KillOldBase = true; 681 } 682 if (BaseOpc == ARM::tADDrSPi) { 683 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); 684 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 685 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4) 686 .addImm(Pred).addReg(PredReg); 687 } else 688 AddDefaultT1CC( 689 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true) 690 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 691 .addImm(Pred).addReg(PredReg); 692 } else { 693 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 694 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 695 .addImm(Pred).addReg(PredReg).addReg(0); 696 } 697 Base = NewBase; 698 BaseKill = true; // New base is always killed straight away. 699 } 700 701 bool isDef = isLoadSingle(Opcode); 702 703 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with 704 // base register writeback. 705 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 706 if (!Opcode) 707 return nullptr; 708 709 // Check if a Thumb1 LDM/STM merge is safe. This is the case if: 710 // - There is no writeback (LDM of base register), 711 // - the base register is killed by the merged instruction, 712 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS 713 // to reset the base register. 714 // Otherwise, don't merge. 715 // It's safe to return here since the code to materialize a new base register 716 // above is also conditional on SafeToClobberCPSR. 717 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) 718 return nullptr; 719 720 MachineInstrBuilder MIB; 721 722 if (Writeback) { 723 if (Opcode == ARM::tLDMIA) 724 // Update tLDMIA with writeback if necessary. 725 Opcode = ARM::tLDMIA_UPD; 726 727 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 728 729 // Thumb1: we might need to set base writeback when building the MI. 730 MIB.addReg(Base, getDefRegState(true)) 731 .addReg(Base, getKillRegState(BaseKill)); 732 733 // The base isn't dead after a merged instruction with writeback. 734 // Insert a sub instruction after the newly formed instruction to reset. 735 if (!BaseKill) 736 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg); 737 738 } else { 739 // No writeback, simply build the MachineInstr. 740 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 741 MIB.addReg(Base, getKillRegState(BaseKill)); 742 } 743 744 MIB.addImm(Pred).addReg(PredReg); 745 746 for (const std::pair<unsigned, bool> &R : Regs) 747 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second)); 748 749 return MIB.getInstr(); 750 } 751 752 /// Call MergeOps and update MemOps and merges accordingly on success. 753 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { 754 const MachineInstr *First = Cand.Instrs.front(); 755 unsigned Opcode = First->getOpcode(); 756 bool IsLoad = isLoadSingle(Opcode); 757 SmallVector<std::pair<unsigned, bool>, 8> Regs; 758 SmallVector<unsigned, 4> ImpDefs; 759 DenseSet<unsigned> KilledRegs; 760 // Determine list of registers and list of implicit super-register defs. 761 for (const MachineInstr *MI : Cand.Instrs) { 762 const MachineOperand &MO = getLoadStoreRegOp(*MI); 763 unsigned Reg = MO.getReg(); 764 bool IsKill = MO.isKill(); 765 if (IsKill) 766 KilledRegs.insert(Reg); 767 Regs.push_back(std::make_pair(Reg, IsKill)); 768 769 if (IsLoad) { 770 // Collect any implicit defs of super-registers, after merging we can't 771 // be sure anymore that we properly preserved these live ranges and must 772 // removed these implicit operands. 773 for (const MachineOperand &MO : MI->implicit_operands()) { 774 if (!MO.isReg() || !MO.isDef() || MO.isDead()) 775 continue; 776 assert(MO.isImplicit()); 777 unsigned DefReg = MO.getReg(); 778 779 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end()) 780 continue; 781 // We can ignore cases where the super-reg is read and written. 782 if (MI->readsRegister(DefReg)) 783 continue; 784 ImpDefs.push_back(DefReg); 785 } 786 } 787 } 788 789 // Attempt the merge. 790 typedef MachineBasicBlock::iterator iterator; 791 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx]; 792 iterator InsertBefore = std::next(iterator(LatestMI)); 793 MachineBasicBlock &MBB = *LatestMI->getParent(); 794 unsigned Offset = getMemoryOpOffset(First); 795 unsigned Base = getLoadStoreBaseOp(*First).getReg(); 796 bool BaseKill = LatestMI->killsRegister(Base); 797 unsigned PredReg = 0; 798 ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg); 799 DebugLoc DL = First->getDebugLoc(); 800 MachineInstr *Merged = MergeOps(MBB, InsertBefore, Offset, Base, BaseKill, 801 Opcode, Pred, PredReg, DL, Regs); 802 if (!Merged) 803 return nullptr; 804 805 // Determine earliest instruction that will get removed. We then keep an 806 // iterator just above it so the following erases don't invalidated it. 807 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]); 808 bool EarliestAtBegin = false; 809 if (EarliestI == MBB.begin()) { 810 EarliestAtBegin = true; 811 } else { 812 EarliestI = std::prev(EarliestI); 813 } 814 815 // Remove instructions which have been merged. 816 for (MachineInstr *MI : Cand.Instrs) 817 MBB.erase(MI); 818 819 // Determine range between the earliest removed instruction and the new one. 820 if (EarliestAtBegin) 821 EarliestI = MBB.begin(); 822 else 823 EarliestI = std::next(EarliestI); 824 auto FixupRange = make_range(EarliestI, iterator(Merged)); 825 826 if (isLoadSingle(Opcode)) { 827 // If the previous loads defined a super-reg, then we have to mark earlier 828 // operands undef; Replicate the super-reg def on the merged instruction. 829 for (MachineInstr &MI : FixupRange) { 830 for (unsigned &ImpDefReg : ImpDefs) { 831 for (MachineOperand &MO : MI.implicit_operands()) { 832 if (!MO.isReg() || MO.getReg() != ImpDefReg) 833 continue; 834 if (MO.readsReg()) 835 MO.setIsUndef(); 836 else if (MO.isDef()) 837 ImpDefReg = 0; 838 } 839 } 840 } 841 842 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged); 843 for (unsigned ImpDef : ImpDefs) 844 MIB.addReg(ImpDef, RegState::ImplicitDefine); 845 } else { 846 // Remove kill flags: We are possibly storing the values later now. 847 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD); 848 for (MachineInstr &MI : FixupRange) { 849 for (MachineOperand &MO : MI.uses()) { 850 if (!MO.isReg() || !MO.isKill()) 851 continue; 852 if (KilledRegs.count(MO.getReg())) 853 MO.setIsKill(false); 854 } 855 } 856 assert(ImpDefs.empty()); 857 } 858 859 return Merged; 860 } 861 862 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries. 863 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { 864 const MachineInstr *FirstMI = MemOps[0].MI; 865 unsigned Opcode = FirstMI->getOpcode(); 866 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 867 unsigned Size = getLSMultipleTransferSize(FirstMI); 868 // vldm / vstm limit are 32 for S variants, 16 for D variants. 869 unsigned Limit; 870 switch (Opcode) { 871 default: 872 Limit = UINT_MAX; 873 break; 874 case ARM::VSTRS: 875 Limit = 32; 876 break; 877 case ARM::VSTRD: 878 Limit = 16; 879 break; 880 case ARM::VLDRD: 881 Limit = 16; 882 break; 883 case ARM::VLDRS: 884 Limit = 32; 885 break; 886 } 887 888 unsigned SIndex = 0; 889 unsigned EIndex = MemOps.size(); 890 do { 891 // Look at the first instruction. 892 const MachineInstr *MI = MemOps[SIndex].MI; 893 int Offset = MemOps[SIndex].Offset; 894 const MachineOperand &PMO = getLoadStoreRegOp(*MI); 895 unsigned PReg = PMO.getReg(); 896 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 897 unsigned Latest = SIndex; 898 unsigned Earliest = SIndex; 899 unsigned Count = 1; 900 901 // Merge additional instructions fulfilling LDM/STM constraints. 902 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) { 903 int NewOffset = MemOps[I].Offset; 904 if (NewOffset != Offset + (int)Size) 905 break; 906 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI); 907 unsigned Reg = MO.getReg(); 908 if (Reg == ARM::SP) 909 break; 910 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 911 // Register numbers must be in ascending order. 912 if (RegNum <= PRegNum) 913 break; 914 // For VFP / NEON load/store multiples, the registers must be consecutive 915 // and within the limit on the number of registers per instruction. 916 if (!isNotVFP && RegNum != PRegNum+1) 917 break; 918 // On Swift we don't want vldm/vstm to start with a odd register num 919 // because Q register unaligned vldm/vstm need more uops. 920 if (!isNotVFP && STI->isSwift() && Count == 1 && (PRegNum % 2) == 1) 921 break; 922 923 // Track MemOp with latest and earliest position (Positions are 924 // counted in reverse). 925 unsigned Position = MemOps[I].Position; 926 if (Position < MemOps[Latest].Position) 927 Latest = I; 928 else if (Position > MemOps[Earliest].Position) 929 Earliest = I; 930 // Prepare for next MemOp. 931 Offset += Size; 932 PRegNum = RegNum; 933 } 934 935 // Form a candidate from the Ops collected so far. 936 MergeCandidate *Candidate = new(Allocator) MergeCandidate; 937 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C) 938 Candidate->Instrs.push_back(MemOps[C].MI); 939 Candidate->LatestMIIdx = Latest - SIndex; 940 Candidate->EarliestMIIdx = Earliest - SIndex; 941 Candidate->InsertPos = MemOps[Latest].Position; 942 Candidates.push_back(Candidate); 943 // Continue after the chain. 944 SIndex += Count; 945 } while (SIndex < EIndex); 946 } 947 948 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 949 unsigned Bytes, unsigned Limit, 950 ARMCC::CondCodes Pred, unsigned PredReg) { 951 unsigned MyPredReg = 0; 952 if (!MI) 953 return false; 954 955 bool CheckCPSRDef = false; 956 switch (MI->getOpcode()) { 957 default: return false; 958 case ARM::tSUBi8: 959 case ARM::t2SUBri: 960 case ARM::SUBri: 961 CheckCPSRDef = true; 962 break; 963 case ARM::tSUBspi: 964 break; 965 } 966 967 // Make sure the offset fits in 8 bits. 968 if (Bytes == 0 || (Limit && Bytes >= Limit)) 969 return false; 970 971 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi || 972 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME 973 if (!(MI->getOperand(0).getReg() == Base && 974 MI->getOperand(1).getReg() == Base && 975 (MI->getOperand(2).getImm() * Scale) == Bytes && 976 getInstrPredicate(MI, MyPredReg) == Pred && 977 MyPredReg == PredReg)) 978 return false; 979 980 return CheckCPSRDef ? !definesCPSR(MI) : true; 981 } 982 983 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 984 unsigned Bytes, unsigned Limit, 985 ARMCC::CondCodes Pred, unsigned PredReg) { 986 unsigned MyPredReg = 0; 987 if (!MI) 988 return false; 989 990 bool CheckCPSRDef = false; 991 switch (MI->getOpcode()) { 992 default: return false; 993 case ARM::tADDi8: 994 case ARM::t2ADDri: 995 case ARM::ADDri: 996 CheckCPSRDef = true; 997 break; 998 case ARM::tADDspi: 999 break; 1000 } 1001 1002 if (Bytes == 0 || (Limit && Bytes >= Limit)) 1003 // Make sure the offset fits in 8 bits. 1004 return false; 1005 1006 unsigned Scale = (MI->getOpcode() == ARM::tADDspi || 1007 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME 1008 if (!(MI->getOperand(0).getReg() == Base && 1009 MI->getOperand(1).getReg() == Base && 1010 (MI->getOperand(2).getImm() * Scale) == Bytes && 1011 getInstrPredicate(MI, MyPredReg) == Pred && 1012 MyPredReg == PredReg)) 1013 return false; 1014 1015 return CheckCPSRDef ? !definesCPSR(MI) : true; 1016 } 1017 1018 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 1019 ARM_AM::AMSubMode Mode) { 1020 switch (Opc) { 1021 default: llvm_unreachable("Unhandled opcode!"); 1022 case ARM::LDMIA: 1023 case ARM::LDMDA: 1024 case ARM::LDMDB: 1025 case ARM::LDMIB: 1026 switch (Mode) { 1027 default: llvm_unreachable("Unhandled submode!"); 1028 case ARM_AM::ia: return ARM::LDMIA_UPD; 1029 case ARM_AM::ib: return ARM::LDMIB_UPD; 1030 case ARM_AM::da: return ARM::LDMDA_UPD; 1031 case ARM_AM::db: return ARM::LDMDB_UPD; 1032 } 1033 case ARM::STMIA: 1034 case ARM::STMDA: 1035 case ARM::STMDB: 1036 case ARM::STMIB: 1037 switch (Mode) { 1038 default: llvm_unreachable("Unhandled submode!"); 1039 case ARM_AM::ia: return ARM::STMIA_UPD; 1040 case ARM_AM::ib: return ARM::STMIB_UPD; 1041 case ARM_AM::da: return ARM::STMDA_UPD; 1042 case ARM_AM::db: return ARM::STMDB_UPD; 1043 } 1044 case ARM::t2LDMIA: 1045 case ARM::t2LDMDB: 1046 switch (Mode) { 1047 default: llvm_unreachable("Unhandled submode!"); 1048 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 1049 case ARM_AM::db: return ARM::t2LDMDB_UPD; 1050 } 1051 case ARM::t2STMIA: 1052 case ARM::t2STMDB: 1053 switch (Mode) { 1054 default: llvm_unreachable("Unhandled submode!"); 1055 case ARM_AM::ia: return ARM::t2STMIA_UPD; 1056 case ARM_AM::db: return ARM::t2STMDB_UPD; 1057 } 1058 case ARM::VLDMSIA: 1059 switch (Mode) { 1060 default: llvm_unreachable("Unhandled submode!"); 1061 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 1062 case ARM_AM::db: return ARM::VLDMSDB_UPD; 1063 } 1064 case ARM::VLDMDIA: 1065 switch (Mode) { 1066 default: llvm_unreachable("Unhandled submode!"); 1067 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 1068 case ARM_AM::db: return ARM::VLDMDDB_UPD; 1069 } 1070 case ARM::VSTMSIA: 1071 switch (Mode) { 1072 default: llvm_unreachable("Unhandled submode!"); 1073 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 1074 case ARM_AM::db: return ARM::VSTMSDB_UPD; 1075 } 1076 case ARM::VSTMDIA: 1077 switch (Mode) { 1078 default: llvm_unreachable("Unhandled submode!"); 1079 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 1080 case ARM_AM::db: return ARM::VSTMDDB_UPD; 1081 } 1082 } 1083 } 1084 1085 /// Fold proceeding/trailing inc/dec of base register into the 1086 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 1087 /// 1088 /// stmia rn, <ra, rb, rc> 1089 /// rn := rn + 4 * 3; 1090 /// => 1091 /// stmia rn!, <ra, rb, rc> 1092 /// 1093 /// rn := rn - 4 * 3; 1094 /// ldmia rn, <ra, rb, rc> 1095 /// => 1096 /// ldmdb rn!, <ra, rb, rc> 1097 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { 1098 // Thumb1 is already using updating loads/stores. 1099 if (isThumb1) return false; 1100 1101 const MachineOperand &BaseOP = MI->getOperand(0); 1102 unsigned Base = BaseOP.getReg(); 1103 bool BaseKill = BaseOP.isKill(); 1104 unsigned Bytes = getLSMultipleTransferSize(MI); 1105 unsigned PredReg = 0; 1106 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1107 unsigned Opcode = MI->getOpcode(); 1108 DebugLoc DL = MI->getDebugLoc(); 1109 1110 // Can't use an updating ld/st if the base register is also a dest 1111 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 1112 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 1113 if (MI->getOperand(i).getReg() == Base) 1114 return false; 1115 1116 bool DoMerge = false; 1117 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode); 1118 1119 // Try merging with the previous instruction. 1120 MachineBasicBlock &MBB = *MI->getParent(); 1121 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1122 MachineBasicBlock::iterator MBBI(MI); 1123 if (MBBI != BeginMBBI) { 1124 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1125 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 1126 --PrevMBBI; 1127 if (Mode == ARM_AM::ia && 1128 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 1129 Mode = ARM_AM::db; 1130 DoMerge = true; 1131 } else if (Mode == ARM_AM::ib && 1132 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 1133 Mode = ARM_AM::da; 1134 DoMerge = true; 1135 } 1136 if (DoMerge) 1137 MBB.erase(PrevMBBI); 1138 } 1139 1140 // Try merging with the next instruction. 1141 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1142 if (!DoMerge && MBBI != EndMBBI) { 1143 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1144 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1145 ++NextMBBI; 1146 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 1147 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 1148 DoMerge = true; 1149 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 1150 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 1151 DoMerge = true; 1152 } 1153 if (DoMerge) 1154 MBB.erase(NextMBBI); 1155 } 1156 1157 if (!DoMerge) 1158 return false; 1159 1160 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 1161 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1162 .addReg(Base, getDefRegState(true)) // WB base register 1163 .addReg(Base, getKillRegState(BaseKill)) 1164 .addImm(Pred).addReg(PredReg); 1165 1166 // Transfer the rest of operands. 1167 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 1168 MIB.addOperand(MI->getOperand(OpNum)); 1169 1170 // Transfer memoperands. 1171 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 1172 1173 MBB.erase(MBBI); 1174 return true; 1175 } 1176 1177 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 1178 ARM_AM::AddrOpc Mode) { 1179 switch (Opc) { 1180 case ARM::LDRi12: 1181 return ARM::LDR_PRE_IMM; 1182 case ARM::STRi12: 1183 return ARM::STR_PRE_IMM; 1184 case ARM::VLDRS: 1185 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1186 case ARM::VLDRD: 1187 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1188 case ARM::VSTRS: 1189 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1190 case ARM::VSTRD: 1191 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1192 case ARM::t2LDRi8: 1193 case ARM::t2LDRi12: 1194 return ARM::t2LDR_PRE; 1195 case ARM::t2STRi8: 1196 case ARM::t2STRi12: 1197 return ARM::t2STR_PRE; 1198 default: llvm_unreachable("Unhandled opcode!"); 1199 } 1200 } 1201 1202 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 1203 ARM_AM::AddrOpc Mode) { 1204 switch (Opc) { 1205 case ARM::LDRi12: 1206 return ARM::LDR_POST_IMM; 1207 case ARM::STRi12: 1208 return ARM::STR_POST_IMM; 1209 case ARM::VLDRS: 1210 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1211 case ARM::VLDRD: 1212 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1213 case ARM::VSTRS: 1214 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1215 case ARM::VSTRD: 1216 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1217 case ARM::t2LDRi8: 1218 case ARM::t2LDRi12: 1219 return ARM::t2LDR_POST; 1220 case ARM::t2STRi8: 1221 case ARM::t2STRi12: 1222 return ARM::t2STR_POST; 1223 default: llvm_unreachable("Unhandled opcode!"); 1224 } 1225 } 1226 1227 /// Fold proceeding/trailing inc/dec of base register into the 1228 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible: 1229 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { 1230 // Thumb1 doesn't have updating LDR/STR. 1231 // FIXME: Use LDM/STM with single register instead. 1232 if (isThumb1) return false; 1233 1234 unsigned Base = getLoadStoreBaseOp(*MI).getReg(); 1235 bool BaseKill = getLoadStoreBaseOp(*MI).isKill(); 1236 unsigned Bytes = getLSMultipleTransferSize(MI); 1237 unsigned Opcode = MI->getOpcode(); 1238 DebugLoc DL = MI->getDebugLoc(); 1239 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 1240 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 1241 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 1242 if (isi32Load(Opcode) || isi32Store(Opcode)) 1243 if (MI->getOperand(2).getImm() != 0) 1244 return false; 1245 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 1246 return false; 1247 1248 bool isLd = isLoadSingle(Opcode); 1249 // Can't do the merge if the destination register is the same as the would-be 1250 // writeback register. 1251 if (MI->getOperand(0).getReg() == Base) 1252 return false; 1253 1254 unsigned PredReg = 0; 1255 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1256 bool DoMerge = false; 1257 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1258 unsigned NewOpc = 0; 1259 // AM2 - 12 bits, thumb2 - 8 bits. 1260 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 1261 1262 // Try merging with the previous instruction. 1263 MachineBasicBlock &MBB = *MI->getParent(); 1264 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1265 MachineBasicBlock::iterator MBBI(MI); 1266 if (MBBI != BeginMBBI) { 1267 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1268 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 1269 --PrevMBBI; 1270 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 1271 DoMerge = true; 1272 AddSub = ARM_AM::sub; 1273 } else if (!isAM5 && 1274 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 1275 DoMerge = true; 1276 } 1277 if (DoMerge) { 1278 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 1279 MBB.erase(PrevMBBI); 1280 } 1281 } 1282 1283 // Try merging with the next instruction. 1284 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1285 if (!DoMerge && MBBI != EndMBBI) { 1286 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1287 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1288 ++NextMBBI; 1289 if (!isAM5 && 1290 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 1291 DoMerge = true; 1292 AddSub = ARM_AM::sub; 1293 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 1294 DoMerge = true; 1295 } 1296 if (DoMerge) { 1297 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 1298 MBB.erase(NextMBBI); 1299 } 1300 } 1301 1302 if (!DoMerge) 1303 return false; 1304 1305 if (isAM5) { 1306 // VLDM[SD]_UPD, VSTM[SD]_UPD 1307 // (There are no base-updating versions of VLDR/VSTR instructions, but the 1308 // updating load/store-multiple instructions can be used with only one 1309 // register.) 1310 MachineOperand &MO = MI->getOperand(0); 1311 BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1312 .addReg(Base, getDefRegState(true)) // WB base register 1313 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 1314 .addImm(Pred).addReg(PredReg) 1315 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 1316 getKillRegState(MO.isKill()))); 1317 } else if (isLd) { 1318 if (isAM2) { 1319 // LDR_PRE, LDR_POST 1320 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 1321 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1322 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1323 .addReg(Base, RegState::Define) 1324 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1325 } else { 1326 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1327 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1328 .addReg(Base, RegState::Define) 1329 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1330 } 1331 } else { 1332 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1333 // t2LDR_PRE, t2LDR_POST 1334 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1335 .addReg(Base, RegState::Define) 1336 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1337 } 1338 } else { 1339 MachineOperand &MO = MI->getOperand(0); 1340 // FIXME: post-indexed stores use am2offset_imm, which still encodes 1341 // the vestigal zero-reg offset register. When that's fixed, this clause 1342 // can be removed entirely. 1343 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 1344 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1345 // STR_PRE, STR_POST 1346 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1347 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1348 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1349 } else { 1350 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1351 // t2STR_PRE, t2STR_POST 1352 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1353 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1354 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1355 } 1356 } 1357 MBB.erase(MBBI); 1358 1359 return true; 1360 } 1361 1362 /// Returns true if instruction is a memory operation that this pass is capable 1363 /// of operating on. 1364 static bool isMemoryOp(const MachineInstr *MI) { 1365 // When no memory operands are present, conservatively assume unaligned, 1366 // volatile, unfoldable. 1367 if (!MI->hasOneMemOperand()) 1368 return false; 1369 1370 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1371 1372 // Don't touch volatile memory accesses - we may be changing their order. 1373 if (MMO->isVolatile()) 1374 return false; 1375 1376 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 1377 // not. 1378 if (MMO->getAlignment() < 4) 1379 return false; 1380 1381 // str <undef> could probably be eliminated entirely, but for now we just want 1382 // to avoid making a mess of it. 1383 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1384 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 1385 MI->getOperand(0).isUndef()) 1386 return false; 1387 1388 // Likewise don't mess with references to undefined addresses. 1389 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 1390 MI->getOperand(1).isUndef()) 1391 return false; 1392 1393 unsigned Opcode = MI->getOpcode(); 1394 switch (Opcode) { 1395 default: break; 1396 case ARM::VLDRS: 1397 case ARM::VSTRS: 1398 return MI->getOperand(1).isReg(); 1399 case ARM::VLDRD: 1400 case ARM::VSTRD: 1401 return MI->getOperand(1).isReg(); 1402 case ARM::LDRi12: 1403 case ARM::STRi12: 1404 case ARM::tLDRi: 1405 case ARM::tSTRi: 1406 case ARM::tLDRspi: 1407 case ARM::tSTRspi: 1408 case ARM::t2LDRi8: 1409 case ARM::t2LDRi12: 1410 case ARM::t2STRi8: 1411 case ARM::t2STRi12: 1412 return MI->getOperand(1).isReg(); 1413 } 1414 return false; 1415 } 1416 1417 static void InsertLDR_STR(MachineBasicBlock &MBB, 1418 MachineBasicBlock::iterator &MBBI, 1419 int Offset, bool isDef, 1420 DebugLoc DL, unsigned NewOpc, 1421 unsigned Reg, bool RegDeadKill, bool RegUndef, 1422 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1423 bool OffKill, bool OffUndef, 1424 ARMCC::CondCodes Pred, unsigned PredReg, 1425 const TargetInstrInfo *TII, bool isT2) { 1426 if (isDef) { 1427 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1428 TII->get(NewOpc)) 1429 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1430 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1431 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1432 } else { 1433 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1434 TII->get(NewOpc)) 1435 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1436 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1437 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1438 } 1439 } 1440 1441 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1442 MachineBasicBlock::iterator &MBBI) { 1443 MachineInstr *MI = &*MBBI; 1444 unsigned Opcode = MI->getOpcode(); 1445 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8) 1446 return false; 1447 1448 const MachineOperand &BaseOp = MI->getOperand(2); 1449 unsigned BaseReg = BaseOp.getReg(); 1450 unsigned EvenReg = MI->getOperand(0).getReg(); 1451 unsigned OddReg = MI->getOperand(1).getReg(); 1452 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1453 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1454 1455 // ARM errata 602117: LDRD with base in list may result in incorrect base 1456 // register when interrupted or faulted. 1457 bool Errata602117 = EvenReg == BaseReg && 1458 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3(); 1459 // ARM LDRD/STRD needs consecutive registers. 1460 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) && 1461 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum); 1462 1463 if (!Errata602117 && !NonConsecutiveRegs) 1464 return false; 1465 1466 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1467 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1468 bool EvenDeadKill = isLd ? 1469 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1470 bool EvenUndef = MI->getOperand(0).isUndef(); 1471 bool OddDeadKill = isLd ? 1472 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1473 bool OddUndef = MI->getOperand(1).isUndef(); 1474 bool BaseKill = BaseOp.isKill(); 1475 bool BaseUndef = BaseOp.isUndef(); 1476 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1477 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1478 int OffImm = getMemoryOpOffset(MI); 1479 unsigned PredReg = 0; 1480 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1481 1482 if (OddRegNum > EvenRegNum && OffImm == 0) { 1483 // Ascending register numbers and no offset. It's safe to change it to a 1484 // ldm or stm. 1485 unsigned NewOpc = (isLd) 1486 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1487 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1488 if (isLd) { 1489 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1490 .addReg(BaseReg, getKillRegState(BaseKill)) 1491 .addImm(Pred).addReg(PredReg) 1492 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1493 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1494 ++NumLDRD2LDM; 1495 } else { 1496 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1497 .addReg(BaseReg, getKillRegState(BaseKill)) 1498 .addImm(Pred).addReg(PredReg) 1499 .addReg(EvenReg, 1500 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1501 .addReg(OddReg, 1502 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1503 ++NumSTRD2STM; 1504 } 1505 } else { 1506 // Split into two instructions. 1507 unsigned NewOpc = (isLd) 1508 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1509 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1510 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1511 // so adjust and use t2LDRi12 here for that. 1512 unsigned NewOpc2 = (isLd) 1513 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1514 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1515 DebugLoc dl = MBBI->getDebugLoc(); 1516 // If this is a load and base register is killed, it may have been 1517 // re-defed by the load, make sure the first load does not clobber it. 1518 if (isLd && 1519 (BaseKill || OffKill) && 1520 (TRI->regsOverlap(EvenReg, BaseReg))) { 1521 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1522 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1523 OddReg, OddDeadKill, false, 1524 BaseReg, false, BaseUndef, false, OffUndef, 1525 Pred, PredReg, TII, isT2); 1526 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1527 EvenReg, EvenDeadKill, false, 1528 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1529 Pred, PredReg, TII, isT2); 1530 } else { 1531 if (OddReg == EvenReg && EvenDeadKill) { 1532 // If the two source operands are the same, the kill marker is 1533 // probably on the first one. e.g. 1534 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1535 EvenDeadKill = false; 1536 OddDeadKill = true; 1537 } 1538 // Never kill the base register in the first instruction. 1539 if (EvenReg == BaseReg) 1540 EvenDeadKill = false; 1541 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1542 EvenReg, EvenDeadKill, EvenUndef, 1543 BaseReg, false, BaseUndef, false, OffUndef, 1544 Pred, PredReg, TII, isT2); 1545 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1546 OddReg, OddDeadKill, OddUndef, 1547 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1548 Pred, PredReg, TII, isT2); 1549 } 1550 if (isLd) 1551 ++NumLDRD2LDR; 1552 else 1553 ++NumSTRD2STR; 1554 } 1555 1556 MBBI = MBB.erase(MBBI); 1557 return true; 1558 } 1559 1560 /// An optimization pass to turn multiple LDR / STR ops of the same base and 1561 /// incrementing offset into LDM / STM ops. 1562 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1563 MemOpQueue MemOps; 1564 unsigned CurrBase = 0; 1565 unsigned CurrOpc = ~0u; 1566 unsigned CurrSize = 0; 1567 ARMCC::CondCodes CurrPred = ARMCC::AL; 1568 unsigned CurrPredReg = 0; 1569 unsigned Position = 0; 1570 assert(Candidates.size() == 0); 1571 LiveRegsValid = false; 1572 1573 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin(); 1574 I = MBBI) { 1575 // The instruction in front of the iterator is the one we look at. 1576 MBBI = std::prev(I); 1577 if (FixInvalidRegPairOp(MBB, MBBI)) 1578 continue; 1579 ++Position; 1580 1581 if (isMemoryOp(MBBI)) { 1582 unsigned Opcode = MBBI->getOpcode(); 1583 unsigned Size = getLSMultipleTransferSize(MBBI); 1584 const MachineOperand &MO = MBBI->getOperand(0); 1585 unsigned Reg = MO.getReg(); 1586 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg(); 1587 unsigned PredReg = 0; 1588 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 1589 int Offset = getMemoryOpOffset(MBBI); 1590 if (CurrBase == 0) { 1591 // Start of a new chain. 1592 CurrBase = Base; 1593 CurrOpc = Opcode; 1594 CurrSize = Size; 1595 CurrPred = Pred; 1596 CurrPredReg = PredReg; 1597 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1598 continue; 1599 } 1600 // Note: No need to match PredReg in the next if. 1601 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1602 // Watch out for: 1603 // r4 := ldr [r0, #8] 1604 // r4 := ldr [r0, #4] 1605 // or 1606 // r0 := ldr [r0] 1607 // If a load overrides the base register or a register loaded by 1608 // another load in our chain, we cannot take this instruction. 1609 bool Overlap = false; 1610 if (isLoadSingle(Opcode)) { 1611 Overlap = (Base == Reg); 1612 if (!Overlap) { 1613 for (const MemOpQueueEntry &E : MemOps) { 1614 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) { 1615 Overlap = true; 1616 break; 1617 } 1618 } 1619 } 1620 } 1621 1622 if (!Overlap) { 1623 // Check offset and sort memory operation into the current chain. 1624 if (Offset > MemOps.back().Offset) { 1625 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1626 continue; 1627 } else { 1628 MemOpQueue::iterator MI, ME; 1629 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) { 1630 if (Offset < MI->Offset) { 1631 // Found a place to insert. 1632 break; 1633 } 1634 if (Offset == MI->Offset) { 1635 // Collision, abort. 1636 MI = ME; 1637 break; 1638 } 1639 } 1640 if (MI != MemOps.end()) { 1641 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position)); 1642 continue; 1643 } 1644 } 1645 } 1646 } 1647 1648 // Don't advance the iterator; The op will start a new chain next. 1649 MBBI = I; 1650 --Position; 1651 // Fallthrough to look into existing chain. 1652 } else if (MBBI->isDebugValue()) 1653 continue; 1654 1655 // If we are here then the chain is broken; Extract candidates for a merge. 1656 if (MemOps.size() > 0) { 1657 FormCandidates(MemOps); 1658 // Reset for the next chain. 1659 CurrBase = 0; 1660 CurrOpc = ~0u; 1661 CurrSize = 0; 1662 CurrPred = ARMCC::AL; 1663 CurrPredReg = 0; 1664 MemOps.clear(); 1665 } 1666 } 1667 if (MemOps.size() > 0) 1668 FormCandidates(MemOps); 1669 1670 // Sort candidates so they get processed from end to begin of the basic 1671 // block later; This is necessary for liveness calculation. 1672 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) { 1673 return M0->InsertPos < M1->InsertPos; 1674 }; 1675 std::sort(Candidates.begin(), Candidates.end(), LessThan); 1676 1677 // Go through list of candidates and merge. 1678 bool Changed = false; 1679 for (const MergeCandidate *Candidate : Candidates) { 1680 if (Candidate->Instrs.size() > 1) { 1681 MachineInstr *Merged = MergeOpsUpdate(*Candidate); 1682 // Merge preceding/trailing base inc/dec into the merged op. 1683 if (Merged) { 1684 MergeBaseUpdateLSMultiple(Merged); 1685 Changed = true; 1686 } else { 1687 for (MachineInstr *MI : Candidate->Instrs) { 1688 if (MergeBaseUpdateLoadStore(MI)) 1689 Changed = true; 1690 } 1691 } 1692 } else { 1693 assert(Candidate->Instrs.size() == 1); 1694 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front())) 1695 Changed = true; 1696 } 1697 } 1698 Candidates.clear(); 1699 1700 return Changed; 1701 } 1702 1703 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr") 1704 /// into the preceding stack restore so it directly restore the value of LR 1705 /// into pc. 1706 /// ldmfd sp!, {..., lr} 1707 /// bx lr 1708 /// or 1709 /// ldmfd sp!, {..., lr} 1710 /// mov pc, lr 1711 /// => 1712 /// ldmfd sp!, {..., pc} 1713 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1714 // Thumb1 LDM doesn't allow high registers. 1715 if (isThumb1) return false; 1716 if (MBB.empty()) return false; 1717 1718 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1719 if (MBBI != MBB.begin() && 1720 (MBBI->getOpcode() == ARM::BX_RET || 1721 MBBI->getOpcode() == ARM::tBX_RET || 1722 MBBI->getOpcode() == ARM::MOVPCLR)) { 1723 MachineInstr *PrevMI = std::prev(MBBI); 1724 unsigned Opcode = PrevMI->getOpcode(); 1725 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1726 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1727 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1728 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1729 if (MO.getReg() != ARM::LR) 1730 return false; 1731 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1732 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1733 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1734 PrevMI->setDesc(TII->get(NewOpc)); 1735 MO.setReg(ARM::PC); 1736 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); 1737 MBB.erase(MBBI); 1738 return true; 1739 } 1740 } 1741 return false; 1742 } 1743 1744 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1745 MF = &Fn; 1746 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1747 TL = STI->getTargetLowering(); 1748 AFI = Fn.getInfo<ARMFunctionInfo>(); 1749 TII = STI->getInstrInfo(); 1750 TRI = STI->getRegisterInfo(); 1751 MRI = &Fn.getRegInfo(); 1752 RegClassInfoValid = false; 1753 isThumb2 = AFI->isThumb2Function(); 1754 isThumb1 = AFI->isThumbFunction() && !isThumb2; 1755 1756 bool Modified = false; 1757 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1758 ++MFI) { 1759 MachineBasicBlock &MBB = *MFI; 1760 Modified |= LoadStoreMultipleOpti(MBB); 1761 if (STI->hasV5TOps()) 1762 Modified |= MergeReturnIntoLDM(MBB); 1763 } 1764 1765 Allocator.Reset(); 1766 return Modified; 1767 } 1768 1769 namespace { 1770 /// Pre- register allocation pass that move load / stores from consecutive 1771 /// locations close to make it more likely they will be combined later. 1772 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1773 static char ID; 1774 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1775 1776 const DataLayout *TD; 1777 const TargetInstrInfo *TII; 1778 const TargetRegisterInfo *TRI; 1779 const ARMSubtarget *STI; 1780 MachineRegisterInfo *MRI; 1781 MachineFunction *MF; 1782 1783 bool runOnMachineFunction(MachineFunction &Fn) override; 1784 1785 const char *getPassName() const override { 1786 return "ARM pre- register allocation load / store optimization pass"; 1787 } 1788 1789 private: 1790 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1791 unsigned &NewOpc, unsigned &EvenReg, 1792 unsigned &OddReg, unsigned &BaseReg, 1793 int &Offset, 1794 unsigned &PredReg, ARMCC::CondCodes &Pred, 1795 bool &isT2); 1796 bool RescheduleOps(MachineBasicBlock *MBB, 1797 SmallVectorImpl<MachineInstr *> &Ops, 1798 unsigned Base, bool isLd, 1799 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1800 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1801 }; 1802 char ARMPreAllocLoadStoreOpt::ID = 0; 1803 } 1804 1805 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1806 TD = Fn.getTarget().getDataLayout(); 1807 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1808 TII = STI->getInstrInfo(); 1809 TRI = STI->getRegisterInfo(); 1810 MRI = &Fn.getRegInfo(); 1811 MF = &Fn; 1812 1813 bool Modified = false; 1814 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1815 ++MFI) 1816 Modified |= RescheduleLoadStoreInstrs(MFI); 1817 1818 return Modified; 1819 } 1820 1821 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1822 MachineBasicBlock::iterator I, 1823 MachineBasicBlock::iterator E, 1824 SmallPtrSetImpl<MachineInstr*> &MemOps, 1825 SmallSet<unsigned, 4> &MemRegs, 1826 const TargetRegisterInfo *TRI) { 1827 // Are there stores / loads / calls between them? 1828 // FIXME: This is overly conservative. We should make use of alias information 1829 // some day. 1830 SmallSet<unsigned, 4> AddedRegPressure; 1831 while (++I != E) { 1832 if (I->isDebugValue() || MemOps.count(&*I)) 1833 continue; 1834 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1835 return false; 1836 if (isLd && I->mayStore()) 1837 return false; 1838 if (!isLd) { 1839 if (I->mayLoad()) 1840 return false; 1841 // It's not safe to move the first 'str' down. 1842 // str r1, [r0] 1843 // strh r5, [r0] 1844 // str r4, [r0, #+4] 1845 if (I->mayStore()) 1846 return false; 1847 } 1848 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1849 MachineOperand &MO = I->getOperand(j); 1850 if (!MO.isReg()) 1851 continue; 1852 unsigned Reg = MO.getReg(); 1853 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1854 return false; 1855 if (Reg != Base && !MemRegs.count(Reg)) 1856 AddedRegPressure.insert(Reg); 1857 } 1858 } 1859 1860 // Estimate register pressure increase due to the transformation. 1861 if (MemRegs.size() <= 4) 1862 // Ok if we are moving small number of instructions. 1863 return true; 1864 return AddedRegPressure.size() <= MemRegs.size() * 2; 1865 } 1866 1867 1868 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI. 1869 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1870 MachineInstr *Op1) { 1871 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1872 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1873 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1874 1875 MachineFunction *MF = MI->getParent()->getParent(); 1876 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1877 MachineSDNode::mmo_iterator MemEnd = 1878 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1879 MemEnd = 1880 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1881 MI->setMemRefs(MemBegin, MemEnd); 1882 } 1883 1884 bool 1885 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1886 DebugLoc &dl, unsigned &NewOpc, 1887 unsigned &FirstReg, 1888 unsigned &SecondReg, 1889 unsigned &BaseReg, int &Offset, 1890 unsigned &PredReg, 1891 ARMCC::CondCodes &Pred, 1892 bool &isT2) { 1893 // Make sure we're allowed to generate LDRD/STRD. 1894 if (!STI->hasV5TEOps()) 1895 return false; 1896 1897 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1898 unsigned Scale = 1; 1899 unsigned Opcode = Op0->getOpcode(); 1900 if (Opcode == ARM::LDRi12) { 1901 NewOpc = ARM::LDRD; 1902 } else if (Opcode == ARM::STRi12) { 1903 NewOpc = ARM::STRD; 1904 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1905 NewOpc = ARM::t2LDRDi8; 1906 Scale = 4; 1907 isT2 = true; 1908 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1909 NewOpc = ARM::t2STRDi8; 1910 Scale = 4; 1911 isT2 = true; 1912 } else { 1913 return false; 1914 } 1915 1916 // Make sure the base address satisfies i64 ld / st alignment requirement. 1917 // At the moment, we ignore the memoryoperand's value. 1918 // If we want to use AliasAnalysis, we should check it accordingly. 1919 if (!Op0->hasOneMemOperand() || 1920 (*Op0->memoperands_begin())->isVolatile()) 1921 return false; 1922 1923 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1924 const Function *Func = MF->getFunction(); 1925 unsigned ReqAlign = STI->hasV6Ops() 1926 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1927 : 8; // Pre-v6 need 8-byte align 1928 if (Align < ReqAlign) 1929 return false; 1930 1931 // Then make sure the immediate offset fits. 1932 int OffImm = getMemoryOpOffset(Op0); 1933 if (isT2) { 1934 int Limit = (1 << 8) * Scale; 1935 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 1936 return false; 1937 Offset = OffImm; 1938 } else { 1939 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1940 if (OffImm < 0) { 1941 AddSub = ARM_AM::sub; 1942 OffImm = - OffImm; 1943 } 1944 int Limit = (1 << 8) * Scale; 1945 if (OffImm >= Limit || (OffImm & (Scale-1))) 1946 return false; 1947 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1948 } 1949 FirstReg = Op0->getOperand(0).getReg(); 1950 SecondReg = Op1->getOperand(0).getReg(); 1951 if (FirstReg == SecondReg) 1952 return false; 1953 BaseReg = Op0->getOperand(1).getReg(); 1954 Pred = getInstrPredicate(Op0, PredReg); 1955 dl = Op0->getDebugLoc(); 1956 return true; 1957 } 1958 1959 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1960 SmallVectorImpl<MachineInstr *> &Ops, 1961 unsigned Base, bool isLd, 1962 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1963 bool RetVal = false; 1964 1965 // Sort by offset (in reverse order). 1966 std::sort(Ops.begin(), Ops.end(), 1967 [](const MachineInstr *LHS, const MachineInstr *RHS) { 1968 int LOffset = getMemoryOpOffset(LHS); 1969 int ROffset = getMemoryOpOffset(RHS); 1970 assert(LHS == RHS || LOffset != ROffset); 1971 return LOffset > ROffset; 1972 }); 1973 1974 // The loads / stores of the same base are in order. Scan them from first to 1975 // last and check for the following: 1976 // 1. Any def of base. 1977 // 2. Any gaps. 1978 while (Ops.size() > 1) { 1979 unsigned FirstLoc = ~0U; 1980 unsigned LastLoc = 0; 1981 MachineInstr *FirstOp = nullptr; 1982 MachineInstr *LastOp = nullptr; 1983 int LastOffset = 0; 1984 unsigned LastOpcode = 0; 1985 unsigned LastBytes = 0; 1986 unsigned NumMove = 0; 1987 for (int i = Ops.size() - 1; i >= 0; --i) { 1988 MachineInstr *Op = Ops[i]; 1989 unsigned Loc = MI2LocMap[Op]; 1990 if (Loc <= FirstLoc) { 1991 FirstLoc = Loc; 1992 FirstOp = Op; 1993 } 1994 if (Loc >= LastLoc) { 1995 LastLoc = Loc; 1996 LastOp = Op; 1997 } 1998 1999 unsigned LSMOpcode 2000 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 2001 if (LastOpcode && LSMOpcode != LastOpcode) 2002 break; 2003 2004 int Offset = getMemoryOpOffset(Op); 2005 unsigned Bytes = getLSMultipleTransferSize(Op); 2006 if (LastBytes) { 2007 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 2008 break; 2009 } 2010 LastOffset = Offset; 2011 LastBytes = Bytes; 2012 LastOpcode = LSMOpcode; 2013 if (++NumMove == 8) // FIXME: Tune this limit. 2014 break; 2015 } 2016 2017 if (NumMove <= 1) 2018 Ops.pop_back(); 2019 else { 2020 SmallPtrSet<MachineInstr*, 4> MemOps; 2021 SmallSet<unsigned, 4> MemRegs; 2022 for (int i = NumMove-1; i >= 0; --i) { 2023 MemOps.insert(Ops[i]); 2024 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 2025 } 2026 2027 // Be conservative, if the instructions are too far apart, don't 2028 // move them. We want to limit the increase of register pressure. 2029 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 2030 if (DoMove) 2031 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 2032 MemOps, MemRegs, TRI); 2033 if (!DoMove) { 2034 for (unsigned i = 0; i != NumMove; ++i) 2035 Ops.pop_back(); 2036 } else { 2037 // This is the new location for the loads / stores. 2038 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 2039 while (InsertPos != MBB->end() 2040 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 2041 ++InsertPos; 2042 2043 // If we are moving a pair of loads / stores, see if it makes sense 2044 // to try to allocate a pair of registers that can form register pairs. 2045 MachineInstr *Op0 = Ops.back(); 2046 MachineInstr *Op1 = Ops[Ops.size()-2]; 2047 unsigned FirstReg = 0, SecondReg = 0; 2048 unsigned BaseReg = 0, PredReg = 0; 2049 ARMCC::CondCodes Pred = ARMCC::AL; 2050 bool isT2 = false; 2051 unsigned NewOpc = 0; 2052 int Offset = 0; 2053 DebugLoc dl; 2054 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 2055 FirstReg, SecondReg, BaseReg, 2056 Offset, PredReg, Pred, isT2)) { 2057 Ops.pop_back(); 2058 Ops.pop_back(); 2059 2060 const MCInstrDesc &MCID = TII->get(NewOpc); 2061 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 2062 MRI->constrainRegClass(FirstReg, TRC); 2063 MRI->constrainRegClass(SecondReg, TRC); 2064 2065 // Form the pair instruction. 2066 if (isLd) { 2067 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2068 .addReg(FirstReg, RegState::Define) 2069 .addReg(SecondReg, RegState::Define) 2070 .addReg(BaseReg); 2071 // FIXME: We're converting from LDRi12 to an insn that still 2072 // uses addrmode2, so we need an explicit offset reg. It should 2073 // always by reg0 since we're transforming LDRi12s. 2074 if (!isT2) 2075 MIB.addReg(0); 2076 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2077 concatenateMemOperands(MIB, Op0, Op1); 2078 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2079 ++NumLDRDFormed; 2080 } else { 2081 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2082 .addReg(FirstReg) 2083 .addReg(SecondReg) 2084 .addReg(BaseReg); 2085 // FIXME: We're converting from LDRi12 to an insn that still 2086 // uses addrmode2, so we need an explicit offset reg. It should 2087 // always by reg0 since we're transforming STRi12s. 2088 if (!isT2) 2089 MIB.addReg(0); 2090 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2091 concatenateMemOperands(MIB, Op0, Op1); 2092 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2093 ++NumSTRDFormed; 2094 } 2095 MBB->erase(Op0); 2096 MBB->erase(Op1); 2097 2098 if (!isT2) { 2099 // Add register allocation hints to form register pairs. 2100 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg); 2101 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg); 2102 } 2103 } else { 2104 for (unsigned i = 0; i != NumMove; ++i) { 2105 MachineInstr *Op = Ops.back(); 2106 Ops.pop_back(); 2107 MBB->splice(InsertPos, MBB, Op); 2108 } 2109 } 2110 2111 NumLdStMoved += NumMove; 2112 RetVal = true; 2113 } 2114 } 2115 } 2116 2117 return RetVal; 2118 } 2119 2120 bool 2121 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 2122 bool RetVal = false; 2123 2124 DenseMap<MachineInstr*, unsigned> MI2LocMap; 2125 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 2126 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 2127 SmallVector<unsigned, 4> LdBases; 2128 SmallVector<unsigned, 4> StBases; 2129 2130 unsigned Loc = 0; 2131 MachineBasicBlock::iterator MBBI = MBB->begin(); 2132 MachineBasicBlock::iterator E = MBB->end(); 2133 while (MBBI != E) { 2134 for (; MBBI != E; ++MBBI) { 2135 MachineInstr *MI = MBBI; 2136 if (MI->isCall() || MI->isTerminator()) { 2137 // Stop at barriers. 2138 ++MBBI; 2139 break; 2140 } 2141 2142 if (!MI->isDebugValue()) 2143 MI2LocMap[MI] = ++Loc; 2144 2145 if (!isMemoryOp(MI)) 2146 continue; 2147 unsigned PredReg = 0; 2148 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 2149 continue; 2150 2151 int Opc = MI->getOpcode(); 2152 bool isLd = isLoadSingle(Opc); 2153 unsigned Base = MI->getOperand(1).getReg(); 2154 int Offset = getMemoryOpOffset(MI); 2155 2156 bool StopHere = false; 2157 if (isLd) { 2158 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2159 Base2LdsMap.find(Base); 2160 if (BI != Base2LdsMap.end()) { 2161 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2162 if (Offset == getMemoryOpOffset(BI->second[i])) { 2163 StopHere = true; 2164 break; 2165 } 2166 } 2167 if (!StopHere) 2168 BI->second.push_back(MI); 2169 } else { 2170 Base2LdsMap[Base].push_back(MI); 2171 LdBases.push_back(Base); 2172 } 2173 } else { 2174 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2175 Base2StsMap.find(Base); 2176 if (BI != Base2StsMap.end()) { 2177 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2178 if (Offset == getMemoryOpOffset(BI->second[i])) { 2179 StopHere = true; 2180 break; 2181 } 2182 } 2183 if (!StopHere) 2184 BI->second.push_back(MI); 2185 } else { 2186 Base2StsMap[Base].push_back(MI); 2187 StBases.push_back(Base); 2188 } 2189 } 2190 2191 if (StopHere) { 2192 // Found a duplicate (a base+offset combination that's seen earlier). 2193 // Backtrack. 2194 --Loc; 2195 break; 2196 } 2197 } 2198 2199 // Re-schedule loads. 2200 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 2201 unsigned Base = LdBases[i]; 2202 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 2203 if (Lds.size() > 1) 2204 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 2205 } 2206 2207 // Re-schedule stores. 2208 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 2209 unsigned Base = StBases[i]; 2210 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 2211 if (Sts.size() > 1) 2212 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 2213 } 2214 2215 if (MBBI != E) { 2216 Base2LdsMap.clear(); 2217 Base2StsMap.clear(); 2218 LdBases.clear(); 2219 StBases.clear(); 2220 } 2221 } 2222 2223 return RetVal; 2224 } 2225 2226 2227 /// Returns an instance of the load / store optimization pass. 2228 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 2229 if (PreAlloc) 2230 return new ARMPreAllocLoadStoreOpt(); 2231 return new ARMLoadStoreOpt(); 2232 } 2233