1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This file contains a pass that performs load / store related peephole 11 /// optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ARM.h" 16 #include "ARMBaseInstrInfo.h" 17 #include "ARMBaseRegisterInfo.h" 18 #include "ARMISelLowering.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMSubtarget.h" 21 #include "MCTargetDesc/ARMAddressingModes.h" 22 #include "ThumbRegisterInfo.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallSet.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/Statistic.h" 29 #include "llvm/CodeGen/MachineBasicBlock.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include "llvm/CodeGen/MachineInstr.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/RegisterClassInfo.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/CodeGen/LivePhysRegs.h" 37 #include "llvm/IR/DataLayout.h" 38 #include "llvm/IR/DerivedTypes.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/Support/Allocator.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/ErrorHandling.h" 43 #include "llvm/Support/raw_ostream.h" 44 #include "llvm/Target/TargetInstrInfo.h" 45 #include "llvm/Target/TargetMachine.h" 46 #include "llvm/Target/TargetRegisterInfo.h" 47 using namespace llvm; 48 49 #define DEBUG_TYPE "arm-ldst-opt" 50 51 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 52 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 62 63 namespace { 64 /// Post- register allocation pass the combine load / store instructions to 65 /// form ldm / stm instructions. 66 struct ARMLoadStoreOpt : public MachineFunctionPass { 67 static char ID; 68 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 69 70 const MachineFunction *MF; 71 const TargetInstrInfo *TII; 72 const TargetRegisterInfo *TRI; 73 const MachineRegisterInfo *MRI; 74 const ARMSubtarget *STI; 75 const TargetLowering *TL; 76 ARMFunctionInfo *AFI; 77 LivePhysRegs LiveRegs; 78 RegisterClassInfo RegClassInfo; 79 MachineBasicBlock::const_iterator LiveRegPos; 80 bool LiveRegsValid; 81 bool RegClassInfoValid; 82 bool isThumb1, isThumb2; 83 84 bool runOnMachineFunction(MachineFunction &Fn) override; 85 86 const char *getPassName() const override { 87 return "ARM load / store optimization pass"; 88 } 89 90 private: 91 /// A set of load/store MachineInstrs with same base register sorted by 92 /// offset. 93 struct MemOpQueueEntry { 94 MachineInstr *MI; 95 int Offset; ///< Load/Store offset. 96 unsigned Position; ///< Position as counted from end of basic block. 97 MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position) 98 : MI(MI), Offset(Offset), Position(Position) {} 99 }; 100 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 101 102 /// A set of MachineInstrs that fulfill (nearly all) conditions to get 103 /// merged into a LDM/STM. 104 struct MergeCandidate { 105 /// List of instructions ordered by load/store offset. 106 SmallVector<MachineInstr*, 4> Instrs; 107 /// Index in Instrs of the instruction being latest in the schedule. 108 unsigned LatestMIIdx; 109 /// Index in Instrs of the instruction being earliest in the schedule. 110 unsigned EarliestMIIdx; 111 /// Index into the basic block where the merged instruction will be 112 /// inserted. (See MemOpQueueEntry.Position) 113 unsigned InsertPos; 114 /// Whether the instructions can be merged into a ldm/stm instruction. 115 bool CanMergeToLSMulti; 116 /// Whether the instructions can be merged into a ldrd/strd instruction. 117 bool CanMergeToLSDouble; 118 }; 119 SpecificBumpPtrAllocator<MergeCandidate> Allocator; 120 SmallVector<const MergeCandidate*,4> Candidates; 121 SmallVector<MachineInstr*,4> MergeBaseCandidates; 122 123 void moveLiveRegsBefore(const MachineBasicBlock &MBB, 124 MachineBasicBlock::const_iterator Before); 125 unsigned findFreeReg(const TargetRegisterClass &RegClass); 126 void UpdateBaseRegUses(MachineBasicBlock &MBB, 127 MachineBasicBlock::iterator MBBI, 128 DebugLoc DL, unsigned Base, unsigned WordOffset, 129 ARMCC::CondCodes Pred, unsigned PredReg); 130 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB, 131 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 132 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 133 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs); 134 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB, 135 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 136 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 137 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const; 138 void FormCandidates(const MemOpQueue &MemOps); 139 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand); 140 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 141 MachineBasicBlock::iterator &MBBI); 142 bool MergeBaseUpdateLoadStore(MachineInstr *MI); 143 bool MergeBaseUpdateLSMultiple(MachineInstr *MI); 144 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const; 145 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 146 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 147 }; 148 char ARMLoadStoreOpt::ID = 0; 149 } 150 151 static bool definesCPSR(const MachineInstr *MI) { 152 for (const auto &MO : MI->operands()) { 153 if (!MO.isReg()) 154 continue; 155 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 156 // If the instruction has live CPSR def, then it's not safe to fold it 157 // into load / store. 158 return true; 159 } 160 161 return false; 162 } 163 164 static int getMemoryOpOffset(const MachineInstr *MI) { 165 unsigned Opcode = MI->getOpcode(); 166 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 167 unsigned NumOperands = MI->getDesc().getNumOperands(); 168 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 169 170 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 171 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 172 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 173 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 174 return OffField; 175 176 // Thumb1 immediate offsets are scaled by 4 177 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi || 178 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) 179 return OffField * 4; 180 181 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 182 : ARM_AM::getAM5Offset(OffField) * 4; 183 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField) 184 : ARM_AM::getAM5Op(OffField); 185 186 if (Op == ARM_AM::sub) 187 return -Offset; 188 189 return Offset; 190 } 191 192 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) { 193 return MI.getOperand(1); 194 } 195 196 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) { 197 return MI.getOperand(0); 198 } 199 200 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) { 201 switch (Opcode) { 202 default: llvm_unreachable("Unhandled opcode!"); 203 case ARM::LDRi12: 204 ++NumLDMGened; 205 switch (Mode) { 206 default: llvm_unreachable("Unhandled submode!"); 207 case ARM_AM::ia: return ARM::LDMIA; 208 case ARM_AM::da: return ARM::LDMDA; 209 case ARM_AM::db: return ARM::LDMDB; 210 case ARM_AM::ib: return ARM::LDMIB; 211 } 212 case ARM::STRi12: 213 ++NumSTMGened; 214 switch (Mode) { 215 default: llvm_unreachable("Unhandled submode!"); 216 case ARM_AM::ia: return ARM::STMIA; 217 case ARM_AM::da: return ARM::STMDA; 218 case ARM_AM::db: return ARM::STMDB; 219 case ARM_AM::ib: return ARM::STMIB; 220 } 221 case ARM::tLDRi: 222 case ARM::tLDRspi: 223 // tLDMIA is writeback-only - unless the base register is in the input 224 // reglist. 225 ++NumLDMGened; 226 switch (Mode) { 227 default: llvm_unreachable("Unhandled submode!"); 228 case ARM_AM::ia: return ARM::tLDMIA; 229 } 230 case ARM::tSTRi: 231 case ARM::tSTRspi: 232 // There is no non-writeback tSTMIA either. 233 ++NumSTMGened; 234 switch (Mode) { 235 default: llvm_unreachable("Unhandled submode!"); 236 case ARM_AM::ia: return ARM::tSTMIA_UPD; 237 } 238 case ARM::t2LDRi8: 239 case ARM::t2LDRi12: 240 ++NumLDMGened; 241 switch (Mode) { 242 default: llvm_unreachable("Unhandled submode!"); 243 case ARM_AM::ia: return ARM::t2LDMIA; 244 case ARM_AM::db: return ARM::t2LDMDB; 245 } 246 case ARM::t2STRi8: 247 case ARM::t2STRi12: 248 ++NumSTMGened; 249 switch (Mode) { 250 default: llvm_unreachable("Unhandled submode!"); 251 case ARM_AM::ia: return ARM::t2STMIA; 252 case ARM_AM::db: return ARM::t2STMDB; 253 } 254 case ARM::VLDRS: 255 ++NumVLDMGened; 256 switch (Mode) { 257 default: llvm_unreachable("Unhandled submode!"); 258 case ARM_AM::ia: return ARM::VLDMSIA; 259 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 260 } 261 case ARM::VSTRS: 262 ++NumVSTMGened; 263 switch (Mode) { 264 default: llvm_unreachable("Unhandled submode!"); 265 case ARM_AM::ia: return ARM::VSTMSIA; 266 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 267 } 268 case ARM::VLDRD: 269 ++NumVLDMGened; 270 switch (Mode) { 271 default: llvm_unreachable("Unhandled submode!"); 272 case ARM_AM::ia: return ARM::VLDMDIA; 273 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 274 } 275 case ARM::VSTRD: 276 ++NumVSTMGened; 277 switch (Mode) { 278 default: llvm_unreachable("Unhandled submode!"); 279 case ARM_AM::ia: return ARM::VSTMDIA; 280 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 281 } 282 } 283 } 284 285 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) { 286 switch (Opcode) { 287 default: llvm_unreachable("Unhandled opcode!"); 288 case ARM::LDMIA_RET: 289 case ARM::LDMIA: 290 case ARM::LDMIA_UPD: 291 case ARM::STMIA: 292 case ARM::STMIA_UPD: 293 case ARM::tLDMIA: 294 case ARM::tLDMIA_UPD: 295 case ARM::tSTMIA_UPD: 296 case ARM::t2LDMIA_RET: 297 case ARM::t2LDMIA: 298 case ARM::t2LDMIA_UPD: 299 case ARM::t2STMIA: 300 case ARM::t2STMIA_UPD: 301 case ARM::VLDMSIA: 302 case ARM::VLDMSIA_UPD: 303 case ARM::VSTMSIA: 304 case ARM::VSTMSIA_UPD: 305 case ARM::VLDMDIA: 306 case ARM::VLDMDIA_UPD: 307 case ARM::VSTMDIA: 308 case ARM::VSTMDIA_UPD: 309 return ARM_AM::ia; 310 311 case ARM::LDMDA: 312 case ARM::LDMDA_UPD: 313 case ARM::STMDA: 314 case ARM::STMDA_UPD: 315 return ARM_AM::da; 316 317 case ARM::LDMDB: 318 case ARM::LDMDB_UPD: 319 case ARM::STMDB: 320 case ARM::STMDB_UPD: 321 case ARM::t2LDMDB: 322 case ARM::t2LDMDB_UPD: 323 case ARM::t2STMDB: 324 case ARM::t2STMDB_UPD: 325 case ARM::VLDMSDB_UPD: 326 case ARM::VSTMSDB_UPD: 327 case ARM::VLDMDDB_UPD: 328 case ARM::VSTMDDB_UPD: 329 return ARM_AM::db; 330 331 case ARM::LDMIB: 332 case ARM::LDMIB_UPD: 333 case ARM::STMIB: 334 case ARM::STMIB_UPD: 335 return ARM_AM::ib; 336 } 337 } 338 339 static bool isT1i32Load(unsigned Opc) { 340 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi; 341 } 342 343 static bool isT2i32Load(unsigned Opc) { 344 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 345 } 346 347 static bool isi32Load(unsigned Opc) { 348 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; 349 } 350 351 static bool isT1i32Store(unsigned Opc) { 352 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi; 353 } 354 355 static bool isT2i32Store(unsigned Opc) { 356 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 357 } 358 359 static bool isi32Store(unsigned Opc) { 360 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); 361 } 362 363 static bool isLoadSingle(unsigned Opc) { 364 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 365 } 366 367 static unsigned getImmScale(unsigned Opc) { 368 switch (Opc) { 369 default: llvm_unreachable("Unhandled opcode!"); 370 case ARM::tLDRi: 371 case ARM::tSTRi: 372 case ARM::tLDRspi: 373 case ARM::tSTRspi: 374 return 1; 375 case ARM::tLDRHi: 376 case ARM::tSTRHi: 377 return 2; 378 case ARM::tLDRBi: 379 case ARM::tSTRBi: 380 return 4; 381 } 382 } 383 384 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) { 385 switch (MI->getOpcode()) { 386 default: return 0; 387 case ARM::LDRi12: 388 case ARM::STRi12: 389 case ARM::tLDRi: 390 case ARM::tSTRi: 391 case ARM::tLDRspi: 392 case ARM::tSTRspi: 393 case ARM::t2LDRi8: 394 case ARM::t2LDRi12: 395 case ARM::t2STRi8: 396 case ARM::t2STRi12: 397 case ARM::VLDRS: 398 case ARM::VSTRS: 399 return 4; 400 case ARM::VLDRD: 401 case ARM::VSTRD: 402 return 8; 403 case ARM::LDMIA: 404 case ARM::LDMDA: 405 case ARM::LDMDB: 406 case ARM::LDMIB: 407 case ARM::STMIA: 408 case ARM::STMDA: 409 case ARM::STMDB: 410 case ARM::STMIB: 411 case ARM::tLDMIA: 412 case ARM::tLDMIA_UPD: 413 case ARM::tSTMIA_UPD: 414 case ARM::t2LDMIA: 415 case ARM::t2LDMDB: 416 case ARM::t2STMIA: 417 case ARM::t2STMDB: 418 case ARM::VLDMSIA: 419 case ARM::VSTMSIA: 420 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 421 case ARM::VLDMDIA: 422 case ARM::VSTMDIA: 423 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 424 } 425 } 426 427 /// Update future uses of the base register with the offset introduced 428 /// due to writeback. This function only works on Thumb1. 429 void 430 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, 431 MachineBasicBlock::iterator MBBI, 432 DebugLoc DL, unsigned Base, 433 unsigned WordOffset, 434 ARMCC::CondCodes Pred, unsigned PredReg) { 435 assert(isThumb1 && "Can only update base register uses for Thumb1!"); 436 // Start updating any instructions with immediate offsets. Insert a SUB before 437 // the first non-updateable instruction (if any). 438 for (; MBBI != MBB.end(); ++MBBI) { 439 bool InsertSub = false; 440 unsigned Opc = MBBI->getOpcode(); 441 442 if (MBBI->readsRegister(Base)) { 443 int Offset; 444 bool IsLoad = 445 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi; 446 bool IsStore = 447 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi; 448 449 if (IsLoad || IsStore) { 450 // Loads and stores with immediate offsets can be updated, but only if 451 // the new offset isn't negative. 452 // The MachineOperand containing the offset immediate is the last one 453 // before predicates. 454 MachineOperand &MO = 455 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 456 // The offsets are scaled by 1, 2 or 4 depending on the Opcode. 457 Offset = MO.getImm() - WordOffset * getImmScale(Opc); 458 459 // If storing the base register, it needs to be reset first. 460 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg(); 461 462 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) 463 MO.setImm(Offset); 464 else 465 InsertSub = true; 466 467 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && 468 !definesCPSR(MBBI)) { 469 // SUBS/ADDS using this register, with a dead def of the CPSR. 470 // Merge it with the update; if the merged offset is too large, 471 // insert a new sub instead. 472 MachineOperand &MO = 473 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 474 Offset = (Opc == ARM::tSUBi8) ? 475 MO.getImm() + WordOffset * 4 : 476 MO.getImm() - WordOffset * 4 ; 477 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) { 478 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if 479 // Offset == 0. 480 MO.setImm(Offset); 481 // The base register has now been reset, so exit early. 482 return; 483 } else { 484 InsertSub = true; 485 } 486 487 } else { 488 // Can't update the instruction. 489 InsertSub = true; 490 } 491 492 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { 493 // Since SUBS sets the condition flags, we can't place the base reset 494 // after an instruction that has a live CPSR def. 495 // The base register might also contain an argument for a function call. 496 InsertSub = true; 497 } 498 499 if (InsertSub) { 500 // An instruction above couldn't be updated, so insert a sub. 501 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 502 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 503 return; 504 } 505 506 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base)) 507 // Register got killed. Stop updating. 508 return; 509 } 510 511 // End of block was reached. 512 if (MBB.succ_size() > 0) { 513 // FIXME: Because of a bug, live registers are sometimes missing from 514 // the successor blocks' live-in sets. This means we can't trust that 515 // information and *always* have to reset at the end of a block. 516 // See PR21029. 517 if (MBBI != MBB.end()) --MBBI; 518 AddDefaultT1CC( 519 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 520 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 521 } 522 } 523 524 /// Return the first register of class \p RegClass that is not in \p Regs. 525 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) { 526 if (!RegClassInfoValid) { 527 RegClassInfo.runOnMachineFunction(*MF); 528 RegClassInfoValid = true; 529 } 530 531 for (unsigned Reg : RegClassInfo.getOrder(&RegClass)) 532 if (!LiveRegs.contains(Reg)) 533 return Reg; 534 return 0; 535 } 536 537 /// Compute live registers just before instruction \p Before (in normal schedule 538 /// direction). Computes backwards so multiple queries in the same block must 539 /// come in reverse order. 540 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB, 541 MachineBasicBlock::const_iterator Before) { 542 // Initialize if we never queried in this block. 543 if (!LiveRegsValid) { 544 LiveRegs.init(TRI); 545 LiveRegs.addLiveOuts(&MBB, true); 546 LiveRegPos = MBB.end(); 547 LiveRegsValid = true; 548 } 549 // Move backward just before the "Before" position. 550 while (LiveRegPos != Before) { 551 --LiveRegPos; 552 LiveRegs.stepBackward(*LiveRegPos); 553 } 554 } 555 556 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs, 557 unsigned Reg) { 558 for (const std::pair<unsigned, bool> &R : Regs) 559 if (R.first == Reg) 560 return true; 561 return false; 562 } 563 564 /// Create and insert a LDM or STM with Base as base register and registers in 565 /// Regs as the register operands that would be loaded / stored. It returns 566 /// true if the transformation is done. 567 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB, 568 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 569 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 570 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) { 571 unsigned NumRegs = Regs.size(); 572 assert(NumRegs > 1); 573 574 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. 575 // Compute liveness information for that register to make the decision. 576 bool SafeToClobberCPSR = !isThumb1 || 577 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) == 578 MachineBasicBlock::LQR_Dead); 579 580 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. 581 582 // Exception: If the base register is in the input reglist, Thumb1 LDM is 583 // non-writeback. 584 // It's also not possible to merge an STR of the base register in Thumb1. 585 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) { 586 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); 587 if (Opcode == ARM::tLDRi) { 588 Writeback = false; 589 } else if (Opcode == ARM::tSTRi) { 590 return nullptr; 591 } 592 } 593 594 ARM_AM::AMSubMode Mode = ARM_AM::ia; 595 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. 596 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 597 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; 598 599 if (Offset == 4 && haveIBAndDA) { 600 Mode = ARM_AM::ib; 601 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { 602 Mode = ARM_AM::da; 603 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { 604 // VLDM/VSTM do not support DB mode without also updating the base reg. 605 Mode = ARM_AM::db; 606 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { 607 // Check if this is a supported opcode before inserting instructions to 608 // calculate a new base register. 609 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr; 610 611 // If starting offset isn't zero, insert a MI to materialize a new base. 612 // But only do so if it is cost effective, i.e. merging more than two 613 // loads / stores. 614 if (NumRegs <= 2) 615 return nullptr; 616 617 // On Thumb1, it's not worth materializing a new base register without 618 // clobbering the CPSR (i.e. not using ADDS/SUBS). 619 if (!SafeToClobberCPSR) 620 return nullptr; 621 622 unsigned NewBase; 623 if (isi32Load(Opcode)) { 624 // If it is a load, then just use one of the destination register to 625 // use as the new base. 626 NewBase = Regs[NumRegs-1].first; 627 } else { 628 // Find a free register that we can use as scratch register. 629 moveLiveRegsBefore(MBB, InsertBefore); 630 // The merged instruction does not exist yet but will use several Regs if 631 // it is a Store. 632 if (!isLoadSingle(Opcode)) 633 for (const std::pair<unsigned, bool> &R : Regs) 634 LiveRegs.addReg(R.first); 635 636 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass); 637 if (NewBase == 0) 638 return nullptr; 639 } 640 641 int BaseOpc = 642 isThumb2 ? ARM::t2ADDri : 643 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi : 644 (isThumb1 && Offset < 8) ? ARM::tADDi3 : 645 isThumb1 ? ARM::tADDi8 : ARM::ADDri; 646 647 if (Offset < 0) { 648 Offset = - Offset; 649 BaseOpc = 650 isThumb2 ? ARM::t2SUBri : 651 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 : 652 isThumb1 ? ARM::tSUBi8 : ARM::SUBri; 653 } 654 655 if (!TL->isLegalAddImmediate(Offset)) 656 // FIXME: Try add with register operand? 657 return nullptr; // Probably not worth it then. 658 659 // We can only append a kill flag to the add/sub input if the value is not 660 // used in the register list of the stm as well. 661 bool KillOldBase = BaseKill && 662 (!isi32Store(Opcode) || !ContainsReg(Regs, Base)); 663 664 if (isThumb1) { 665 // Thumb1: depending on immediate size, use either 666 // ADDS NewBase, Base, #imm3 667 // or 668 // MOV NewBase, Base 669 // ADDS NewBase, #imm8. 670 if (Base != NewBase && 671 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) { 672 // Need to insert a MOV to the new base first. 673 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && 674 !STI->hasV6Ops()) { 675 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr 676 if (Pred != ARMCC::AL) 677 return nullptr; 678 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase) 679 .addReg(Base, getKillRegState(KillOldBase)); 680 } else 681 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase) 682 .addReg(Base, getKillRegState(KillOldBase)) 683 .addImm(Pred).addReg(PredReg); 684 685 // The following ADDS/SUBS becomes an update. 686 Base = NewBase; 687 KillOldBase = true; 688 } 689 if (BaseOpc == ARM::tADDrSPi) { 690 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); 691 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 692 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4) 693 .addImm(Pred).addReg(PredReg); 694 } else 695 AddDefaultT1CC( 696 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true) 697 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 698 .addImm(Pred).addReg(PredReg); 699 } else { 700 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 701 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 702 .addImm(Pred).addReg(PredReg).addReg(0); 703 } 704 Base = NewBase; 705 BaseKill = true; // New base is always killed straight away. 706 } 707 708 bool isDef = isLoadSingle(Opcode); 709 710 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with 711 // base register writeback. 712 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 713 if (!Opcode) 714 return nullptr; 715 716 // Check if a Thumb1 LDM/STM merge is safe. This is the case if: 717 // - There is no writeback (LDM of base register), 718 // - the base register is killed by the merged instruction, 719 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS 720 // to reset the base register. 721 // Otherwise, don't merge. 722 // It's safe to return here since the code to materialize a new base register 723 // above is also conditional on SafeToClobberCPSR. 724 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) 725 return nullptr; 726 727 MachineInstrBuilder MIB; 728 729 if (Writeback) { 730 if (Opcode == ARM::tLDMIA) 731 // Update tLDMIA with writeback if necessary. 732 Opcode = ARM::tLDMIA_UPD; 733 734 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 735 736 // Thumb1: we might need to set base writeback when building the MI. 737 MIB.addReg(Base, getDefRegState(true)) 738 .addReg(Base, getKillRegState(BaseKill)); 739 740 // The base isn't dead after a merged instruction with writeback. 741 // Insert a sub instruction after the newly formed instruction to reset. 742 if (!BaseKill) 743 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg); 744 745 } else { 746 // No writeback, simply build the MachineInstr. 747 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 748 MIB.addReg(Base, getKillRegState(BaseKill)); 749 } 750 751 MIB.addImm(Pred).addReg(PredReg); 752 753 for (const std::pair<unsigned, bool> &R : Regs) 754 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second)); 755 756 return MIB.getInstr(); 757 } 758 759 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB, 760 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 761 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 762 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const { 763 bool IsLoad = isi32Load(Opcode); 764 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store"); 765 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8; 766 767 assert(Regs.size() == 2); 768 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL, 769 TII->get(LoadStoreOpcode)); 770 if (IsLoad) { 771 MIB.addReg(Regs[0].first, RegState::Define) 772 .addReg(Regs[1].first, RegState::Define); 773 } else { 774 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second)) 775 .addReg(Regs[1].first, getKillRegState(Regs[1].second)); 776 } 777 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 778 return MIB.getInstr(); 779 } 780 781 /// Call MergeOps and update MemOps and merges accordingly on success. 782 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { 783 const MachineInstr *First = Cand.Instrs.front(); 784 unsigned Opcode = First->getOpcode(); 785 bool IsLoad = isLoadSingle(Opcode); 786 SmallVector<std::pair<unsigned, bool>, 8> Regs; 787 SmallVector<unsigned, 4> ImpDefs; 788 DenseSet<unsigned> KilledRegs; 789 DenseSet<unsigned> UsedRegs; 790 // Determine list of registers and list of implicit super-register defs. 791 for (const MachineInstr *MI : Cand.Instrs) { 792 const MachineOperand &MO = getLoadStoreRegOp(*MI); 793 unsigned Reg = MO.getReg(); 794 bool IsKill = MO.isKill(); 795 if (IsKill) 796 KilledRegs.insert(Reg); 797 Regs.push_back(std::make_pair(Reg, IsKill)); 798 UsedRegs.insert(Reg); 799 800 if (IsLoad) { 801 // Collect any implicit defs of super-registers, after merging we can't 802 // be sure anymore that we properly preserved these live ranges and must 803 // removed these implicit operands. 804 for (const MachineOperand &MO : MI->implicit_operands()) { 805 if (!MO.isReg() || !MO.isDef() || MO.isDead()) 806 continue; 807 assert(MO.isImplicit()); 808 unsigned DefReg = MO.getReg(); 809 810 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end()) 811 continue; 812 // We can ignore cases where the super-reg is read and written. 813 if (MI->readsRegister(DefReg)) 814 continue; 815 ImpDefs.push_back(DefReg); 816 } 817 } 818 } 819 820 // Attempt the merge. 821 typedef MachineBasicBlock::iterator iterator; 822 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx]; 823 iterator InsertBefore = std::next(iterator(LatestMI)); 824 MachineBasicBlock &MBB = *LatestMI->getParent(); 825 unsigned Offset = getMemoryOpOffset(First); 826 unsigned Base = getLoadStoreBaseOp(*First).getReg(); 827 bool BaseKill = LatestMI->killsRegister(Base); 828 unsigned PredReg = 0; 829 ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg); 830 DebugLoc DL = First->getDebugLoc(); 831 MachineInstr *Merged = nullptr; 832 if (Cand.CanMergeToLSDouble) 833 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill, 834 Opcode, Pred, PredReg, DL, Regs); 835 if (!Merged && Cand.CanMergeToLSMulti) 836 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill, 837 Opcode, Pred, PredReg, DL, Regs); 838 if (!Merged) 839 return nullptr; 840 841 // Determine earliest instruction that will get removed. We then keep an 842 // iterator just above it so the following erases don't invalidated it. 843 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]); 844 bool EarliestAtBegin = false; 845 if (EarliestI == MBB.begin()) { 846 EarliestAtBegin = true; 847 } else { 848 EarliestI = std::prev(EarliestI); 849 } 850 851 // Remove instructions which have been merged. 852 for (MachineInstr *MI : Cand.Instrs) 853 MBB.erase(MI); 854 855 // Determine range between the earliest removed instruction and the new one. 856 if (EarliestAtBegin) 857 EarliestI = MBB.begin(); 858 else 859 EarliestI = std::next(EarliestI); 860 auto FixupRange = make_range(EarliestI, iterator(Merged)); 861 862 if (isLoadSingle(Opcode)) { 863 // If the previous loads defined a super-reg, then we have to mark earlier 864 // operands undef; Replicate the super-reg def on the merged instruction. 865 for (MachineInstr &MI : FixupRange) { 866 for (unsigned &ImpDefReg : ImpDefs) { 867 for (MachineOperand &MO : MI.implicit_operands()) { 868 if (!MO.isReg() || MO.getReg() != ImpDefReg) 869 continue; 870 if (MO.readsReg()) 871 MO.setIsUndef(); 872 else if (MO.isDef()) 873 ImpDefReg = 0; 874 } 875 } 876 } 877 878 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged); 879 for (unsigned ImpDef : ImpDefs) 880 MIB.addReg(ImpDef, RegState::ImplicitDefine); 881 } else { 882 // Remove kill flags: We are possibly storing the values later now. 883 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD); 884 for (MachineInstr &MI : FixupRange) { 885 for (MachineOperand &MO : MI.uses()) { 886 if (!MO.isReg() || !MO.isKill()) 887 continue; 888 if (UsedRegs.count(MO.getReg())) 889 MO.setIsKill(false); 890 } 891 } 892 assert(ImpDefs.empty()); 893 } 894 895 return Merged; 896 } 897 898 static bool isValidLSDoubleOffset(int Offset) { 899 unsigned Value = abs(Offset); 900 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally 901 // multiplied by 4. 902 return (Value % 4) == 0 && Value < 1024; 903 } 904 905 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries. 906 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { 907 const MachineInstr *FirstMI = MemOps[0].MI; 908 unsigned Opcode = FirstMI->getOpcode(); 909 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 910 unsigned Size = getLSMultipleTransferSize(FirstMI); 911 912 unsigned SIndex = 0; 913 unsigned EIndex = MemOps.size(); 914 do { 915 // Look at the first instruction. 916 const MachineInstr *MI = MemOps[SIndex].MI; 917 int Offset = MemOps[SIndex].Offset; 918 const MachineOperand &PMO = getLoadStoreRegOp(*MI); 919 unsigned PReg = PMO.getReg(); 920 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 921 unsigned Latest = SIndex; 922 unsigned Earliest = SIndex; 923 unsigned Count = 1; 924 bool CanMergeToLSDouble = 925 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset); 926 // ARM errata 602117: LDRD with base in list may result in incorrect base 927 // register when interrupted or faulted. 928 if (STI->isCortexM3() && isi32Load(Opcode) && 929 PReg == getLoadStoreBaseOp(*MI).getReg()) 930 CanMergeToLSDouble = false; 931 932 bool CanMergeToLSMulti = true; 933 // On swift vldm/vstm starting with an odd register number as that needs 934 // more uops than single vldrs. 935 if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1) 936 CanMergeToLSMulti = false; 937 938 // Merge following instructions where possible. 939 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) { 940 int NewOffset = MemOps[I].Offset; 941 if (NewOffset != Offset + (int)Size) 942 break; 943 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI); 944 unsigned Reg = MO.getReg(); 945 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 946 947 // See if the current load/store may be part of a multi load/store. 948 bool PartOfLSMulti = CanMergeToLSMulti; 949 if (PartOfLSMulti) { 950 // Cannot load from SP 951 if (Reg == ARM::SP) 952 PartOfLSMulti = false; 953 // Register numbers must be in ascending order. 954 else if (RegNum <= PRegNum) 955 PartOfLSMulti = false; 956 // For VFP / NEON load/store multiples, the registers must be 957 // consecutive and within the limit on the number of registers per 958 // instruction. 959 else if (!isNotVFP && RegNum != PRegNum+1) 960 PartOfLSMulti = false; 961 } 962 // See if the current load/store may be part of a double load/store. 963 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1; 964 965 if (!PartOfLSMulti && !PartOfLSDouble) 966 break; 967 CanMergeToLSMulti &= PartOfLSMulti; 968 CanMergeToLSDouble &= PartOfLSDouble; 969 // Track MemOp with latest and earliest position (Positions are 970 // counted in reverse). 971 unsigned Position = MemOps[I].Position; 972 if (Position < MemOps[Latest].Position) 973 Latest = I; 974 else if (Position > MemOps[Earliest].Position) 975 Earliest = I; 976 // Prepare for next MemOp. 977 Offset += Size; 978 PRegNum = RegNum; 979 } 980 981 // Form a candidate from the Ops collected so far. 982 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate; 983 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C) 984 Candidate->Instrs.push_back(MemOps[C].MI); 985 Candidate->LatestMIIdx = Latest - SIndex; 986 Candidate->EarliestMIIdx = Earliest - SIndex; 987 Candidate->InsertPos = MemOps[Latest].Position; 988 if (Count == 1) 989 CanMergeToLSMulti = CanMergeToLSDouble = false; 990 Candidate->CanMergeToLSMulti = CanMergeToLSMulti; 991 Candidate->CanMergeToLSDouble = CanMergeToLSDouble; 992 Candidates.push_back(Candidate); 993 // Continue after the chain. 994 SIndex += Count; 995 } while (SIndex < EIndex); 996 } 997 998 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 999 ARM_AM::AMSubMode Mode) { 1000 switch (Opc) { 1001 default: llvm_unreachable("Unhandled opcode!"); 1002 case ARM::LDMIA: 1003 case ARM::LDMDA: 1004 case ARM::LDMDB: 1005 case ARM::LDMIB: 1006 switch (Mode) { 1007 default: llvm_unreachable("Unhandled submode!"); 1008 case ARM_AM::ia: return ARM::LDMIA_UPD; 1009 case ARM_AM::ib: return ARM::LDMIB_UPD; 1010 case ARM_AM::da: return ARM::LDMDA_UPD; 1011 case ARM_AM::db: return ARM::LDMDB_UPD; 1012 } 1013 case ARM::STMIA: 1014 case ARM::STMDA: 1015 case ARM::STMDB: 1016 case ARM::STMIB: 1017 switch (Mode) { 1018 default: llvm_unreachable("Unhandled submode!"); 1019 case ARM_AM::ia: return ARM::STMIA_UPD; 1020 case ARM_AM::ib: return ARM::STMIB_UPD; 1021 case ARM_AM::da: return ARM::STMDA_UPD; 1022 case ARM_AM::db: return ARM::STMDB_UPD; 1023 } 1024 case ARM::t2LDMIA: 1025 case ARM::t2LDMDB: 1026 switch (Mode) { 1027 default: llvm_unreachable("Unhandled submode!"); 1028 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 1029 case ARM_AM::db: return ARM::t2LDMDB_UPD; 1030 } 1031 case ARM::t2STMIA: 1032 case ARM::t2STMDB: 1033 switch (Mode) { 1034 default: llvm_unreachable("Unhandled submode!"); 1035 case ARM_AM::ia: return ARM::t2STMIA_UPD; 1036 case ARM_AM::db: return ARM::t2STMDB_UPD; 1037 } 1038 case ARM::VLDMSIA: 1039 switch (Mode) { 1040 default: llvm_unreachable("Unhandled submode!"); 1041 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 1042 case ARM_AM::db: return ARM::VLDMSDB_UPD; 1043 } 1044 case ARM::VLDMDIA: 1045 switch (Mode) { 1046 default: llvm_unreachable("Unhandled submode!"); 1047 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 1048 case ARM_AM::db: return ARM::VLDMDDB_UPD; 1049 } 1050 case ARM::VSTMSIA: 1051 switch (Mode) { 1052 default: llvm_unreachable("Unhandled submode!"); 1053 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 1054 case ARM_AM::db: return ARM::VSTMSDB_UPD; 1055 } 1056 case ARM::VSTMDIA: 1057 switch (Mode) { 1058 default: llvm_unreachable("Unhandled submode!"); 1059 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 1060 case ARM_AM::db: return ARM::VSTMDDB_UPD; 1061 } 1062 } 1063 } 1064 1065 /// Check if the given instruction increments or decrements a register and 1066 /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags 1067 /// generated by the instruction are possibly read as well. 1068 static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg, 1069 ARMCC::CondCodes Pred, unsigned PredReg) { 1070 bool CheckCPSRDef; 1071 int Scale; 1072 switch (MI.getOpcode()) { 1073 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break; 1074 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break; 1075 case ARM::t2SUBri: 1076 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break; 1077 case ARM::t2ADDri: 1078 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break; 1079 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break; 1080 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break; 1081 default: return 0; 1082 } 1083 1084 unsigned MIPredReg; 1085 if (MI.getOperand(0).getReg() != Reg || 1086 MI.getOperand(1).getReg() != Reg || 1087 getInstrPredicate(&MI, MIPredReg) != Pred || 1088 MIPredReg != PredReg) 1089 return 0; 1090 1091 if (CheckCPSRDef && definesCPSR(&MI)) 1092 return 0; 1093 return MI.getOperand(2).getImm() * Scale; 1094 } 1095 1096 /// Searches for an increment or decrement of \p Reg before \p MBBI. 1097 static MachineBasicBlock::iterator 1098 findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg, 1099 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { 1100 Offset = 0; 1101 MachineBasicBlock &MBB = *MBBI->getParent(); 1102 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1103 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1104 if (MBBI == BeginMBBI) 1105 return EndMBBI; 1106 1107 // Skip debug values. 1108 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1109 while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI) 1110 --PrevMBBI; 1111 1112 Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg); 1113 return Offset == 0 ? EndMBBI : PrevMBBI; 1114 } 1115 1116 /// Searches for a increment or decrement of \p Reg after \p MBBI. 1117 static MachineBasicBlock::iterator 1118 findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg, 1119 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { 1120 Offset = 0; 1121 MachineBasicBlock &MBB = *MBBI->getParent(); 1122 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1123 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1124 // Skip debug values. 1125 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1126 ++NextMBBI; 1127 if (NextMBBI == EndMBBI) 1128 return EndMBBI; 1129 1130 Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg); 1131 return Offset == 0 ? EndMBBI : NextMBBI; 1132 } 1133 1134 /// Fold proceeding/trailing inc/dec of base register into the 1135 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 1136 /// 1137 /// stmia rn, <ra, rb, rc> 1138 /// rn := rn + 4 * 3; 1139 /// => 1140 /// stmia rn!, <ra, rb, rc> 1141 /// 1142 /// rn := rn - 4 * 3; 1143 /// ldmia rn, <ra, rb, rc> 1144 /// => 1145 /// ldmdb rn!, <ra, rb, rc> 1146 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { 1147 // Thumb1 is already using updating loads/stores. 1148 if (isThumb1) return false; 1149 1150 const MachineOperand &BaseOP = MI->getOperand(0); 1151 unsigned Base = BaseOP.getReg(); 1152 bool BaseKill = BaseOP.isKill(); 1153 unsigned PredReg = 0; 1154 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1155 unsigned Opcode = MI->getOpcode(); 1156 DebugLoc DL = MI->getDebugLoc(); 1157 1158 // Can't use an updating ld/st if the base register is also a dest 1159 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 1160 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 1161 if (MI->getOperand(i).getReg() == Base) 1162 return false; 1163 1164 int Bytes = getLSMultipleTransferSize(MI); 1165 MachineBasicBlock &MBB = *MI->getParent(); 1166 MachineBasicBlock::iterator MBBI(MI); 1167 int Offset; 1168 MachineBasicBlock::iterator MergeInstr 1169 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); 1170 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode); 1171 if (Mode == ARM_AM::ia && Offset == -Bytes) { 1172 Mode = ARM_AM::db; 1173 } else if (Mode == ARM_AM::ib && Offset == -Bytes) { 1174 Mode = ARM_AM::da; 1175 } else { 1176 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1177 if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) && 1178 ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) 1179 return false; 1180 } 1181 MBB.erase(MergeInstr); 1182 1183 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 1184 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1185 .addReg(Base, getDefRegState(true)) // WB base register 1186 .addReg(Base, getKillRegState(BaseKill)) 1187 .addImm(Pred).addReg(PredReg); 1188 1189 // Transfer the rest of operands. 1190 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 1191 MIB.addOperand(MI->getOperand(OpNum)); 1192 1193 // Transfer memoperands. 1194 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 1195 1196 MBB.erase(MBBI); 1197 return true; 1198 } 1199 1200 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 1201 ARM_AM::AddrOpc Mode) { 1202 switch (Opc) { 1203 case ARM::LDRi12: 1204 return ARM::LDR_PRE_IMM; 1205 case ARM::STRi12: 1206 return ARM::STR_PRE_IMM; 1207 case ARM::VLDRS: 1208 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1209 case ARM::VLDRD: 1210 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1211 case ARM::VSTRS: 1212 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1213 case ARM::VSTRD: 1214 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1215 case ARM::t2LDRi8: 1216 case ARM::t2LDRi12: 1217 return ARM::t2LDR_PRE; 1218 case ARM::t2STRi8: 1219 case ARM::t2STRi12: 1220 return ARM::t2STR_PRE; 1221 default: llvm_unreachable("Unhandled opcode!"); 1222 } 1223 } 1224 1225 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 1226 ARM_AM::AddrOpc Mode) { 1227 switch (Opc) { 1228 case ARM::LDRi12: 1229 return ARM::LDR_POST_IMM; 1230 case ARM::STRi12: 1231 return ARM::STR_POST_IMM; 1232 case ARM::VLDRS: 1233 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1234 case ARM::VLDRD: 1235 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1236 case ARM::VSTRS: 1237 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1238 case ARM::VSTRD: 1239 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1240 case ARM::t2LDRi8: 1241 case ARM::t2LDRi12: 1242 return ARM::t2LDR_POST; 1243 case ARM::t2STRi8: 1244 case ARM::t2STRi12: 1245 return ARM::t2STR_POST; 1246 default: llvm_unreachable("Unhandled opcode!"); 1247 } 1248 } 1249 1250 /// Fold proceeding/trailing inc/dec of base register into the 1251 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible: 1252 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { 1253 // Thumb1 doesn't have updating LDR/STR. 1254 // FIXME: Use LDM/STM with single register instead. 1255 if (isThumb1) return false; 1256 1257 unsigned Base = getLoadStoreBaseOp(*MI).getReg(); 1258 bool BaseKill = getLoadStoreBaseOp(*MI).isKill(); 1259 unsigned Opcode = MI->getOpcode(); 1260 DebugLoc DL = MI->getDebugLoc(); 1261 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 1262 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 1263 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 1264 if (isi32Load(Opcode) || isi32Store(Opcode)) 1265 if (MI->getOperand(2).getImm() != 0) 1266 return false; 1267 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 1268 return false; 1269 1270 // Can't do the merge if the destination register is the same as the would-be 1271 // writeback register. 1272 if (MI->getOperand(0).getReg() == Base) 1273 return false; 1274 1275 unsigned PredReg = 0; 1276 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1277 int Bytes = getLSMultipleTransferSize(MI); 1278 MachineBasicBlock &MBB = *MI->getParent(); 1279 MachineBasicBlock::iterator MBBI(MI); 1280 int Offset; 1281 MachineBasicBlock::iterator MergeInstr 1282 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); 1283 unsigned NewOpc; 1284 if (!isAM5 && Offset == Bytes) { 1285 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add); 1286 } else if (Offset == -Bytes) { 1287 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); 1288 } else { 1289 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1290 if (Offset == Bytes) { 1291 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add); 1292 } else if (!isAM5 && Offset == -Bytes) { 1293 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); 1294 } else 1295 return false; 1296 } 1297 MBB.erase(MergeInstr); 1298 1299 ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add; 1300 1301 bool isLd = isLoadSingle(Opcode); 1302 if (isAM5) { 1303 // VLDM[SD]_UPD, VSTM[SD]_UPD 1304 // (There are no base-updating versions of VLDR/VSTR instructions, but the 1305 // updating load/store-multiple instructions can be used with only one 1306 // register.) 1307 MachineOperand &MO = MI->getOperand(0); 1308 BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1309 .addReg(Base, getDefRegState(true)) // WB base register 1310 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 1311 .addImm(Pred).addReg(PredReg) 1312 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 1313 getKillRegState(MO.isKill()))); 1314 } else if (isLd) { 1315 if (isAM2) { 1316 // LDR_PRE, LDR_POST 1317 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 1318 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1319 .addReg(Base, RegState::Define) 1320 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1321 } else { 1322 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1323 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1324 .addReg(Base, RegState::Define) 1325 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); 1326 } 1327 } else { 1328 // t2LDR_PRE, t2LDR_POST 1329 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1330 .addReg(Base, RegState::Define) 1331 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1332 } 1333 } else { 1334 MachineOperand &MO = MI->getOperand(0); 1335 // FIXME: post-indexed stores use am2offset_imm, which still encodes 1336 // the vestigal zero-reg offset register. When that's fixed, this clause 1337 // can be removed entirely. 1338 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 1339 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1340 // STR_PRE, STR_POST 1341 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1342 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1343 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); 1344 } else { 1345 // t2STR_PRE, t2STR_POST 1346 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1347 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1348 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1349 } 1350 } 1351 MBB.erase(MBBI); 1352 1353 return true; 1354 } 1355 1356 bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const { 1357 unsigned Opcode = MI.getOpcode(); 1358 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) && 1359 "Must have t2STRDi8 or t2LDRDi8"); 1360 if (MI.getOperand(3).getImm() != 0) 1361 return false; 1362 1363 // Behaviour for writeback is undefined if base register is the same as one 1364 // of the others. 1365 const MachineOperand &BaseOp = MI.getOperand(2); 1366 unsigned Base = BaseOp.getReg(); 1367 const MachineOperand &Reg0Op = MI.getOperand(0); 1368 const MachineOperand &Reg1Op = MI.getOperand(1); 1369 if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base) 1370 return false; 1371 1372 unsigned PredReg; 1373 ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg); 1374 MachineBasicBlock::iterator MBBI(MI); 1375 MachineBasicBlock &MBB = *MI.getParent(); 1376 int Offset; 1377 MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred, 1378 PredReg, Offset); 1379 unsigned NewOpc; 1380 if (Offset == 8 || Offset == -8) { 1381 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE; 1382 } else { 1383 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1384 if (Offset == 8 || Offset == -8) { 1385 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST; 1386 } else 1387 return false; 1388 } 1389 MBB.erase(MergeInstr); 1390 1391 DebugLoc DL = MI.getDebugLoc(); 1392 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); 1393 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) { 1394 MIB.addOperand(Reg0Op).addOperand(Reg1Op) 1395 .addReg(BaseOp.getReg(), RegState::Define); 1396 } else { 1397 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST); 1398 MIB.addReg(BaseOp.getReg(), RegState::Define) 1399 .addOperand(Reg0Op).addOperand(Reg1Op); 1400 } 1401 MIB.addReg(BaseOp.getReg(), RegState::Kill) 1402 .addImm(Offset).addImm(Pred).addReg(PredReg); 1403 assert(TII->get(Opcode).getNumOperands() == 6 && 1404 TII->get(NewOpc).getNumOperands() == 7 && 1405 "Unexpected number of operands in Opcode specification."); 1406 1407 // Transfer implicit operands. 1408 for (const MachineOperand &MO : MI.implicit_operands()) 1409 MIB.addOperand(MO); 1410 MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); 1411 1412 MBB.erase(MBBI); 1413 return true; 1414 } 1415 1416 /// Returns true if instruction is a memory operation that this pass is capable 1417 /// of operating on. 1418 static bool isMemoryOp(const MachineInstr *MI) { 1419 // When no memory operands are present, conservatively assume unaligned, 1420 // volatile, unfoldable. 1421 if (!MI->hasOneMemOperand()) 1422 return false; 1423 1424 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1425 1426 // Don't touch volatile memory accesses - we may be changing their order. 1427 if (MMO->isVolatile()) 1428 return false; 1429 1430 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 1431 // not. 1432 if (MMO->getAlignment() < 4) 1433 return false; 1434 1435 // str <undef> could probably be eliminated entirely, but for now we just want 1436 // to avoid making a mess of it. 1437 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1438 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 1439 MI->getOperand(0).isUndef()) 1440 return false; 1441 1442 // Likewise don't mess with references to undefined addresses. 1443 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 1444 MI->getOperand(1).isUndef()) 1445 return false; 1446 1447 unsigned Opcode = MI->getOpcode(); 1448 switch (Opcode) { 1449 default: break; 1450 case ARM::VLDRS: 1451 case ARM::VSTRS: 1452 return MI->getOperand(1).isReg(); 1453 case ARM::VLDRD: 1454 case ARM::VSTRD: 1455 return MI->getOperand(1).isReg(); 1456 case ARM::LDRi12: 1457 case ARM::STRi12: 1458 case ARM::tLDRi: 1459 case ARM::tSTRi: 1460 case ARM::tLDRspi: 1461 case ARM::tSTRspi: 1462 case ARM::t2LDRi8: 1463 case ARM::t2LDRi12: 1464 case ARM::t2STRi8: 1465 case ARM::t2STRi12: 1466 return MI->getOperand(1).isReg(); 1467 } 1468 return false; 1469 } 1470 1471 static void InsertLDR_STR(MachineBasicBlock &MBB, 1472 MachineBasicBlock::iterator &MBBI, 1473 int Offset, bool isDef, 1474 DebugLoc DL, unsigned NewOpc, 1475 unsigned Reg, bool RegDeadKill, bool RegUndef, 1476 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1477 bool OffKill, bool OffUndef, 1478 ARMCC::CondCodes Pred, unsigned PredReg, 1479 const TargetInstrInfo *TII, bool isT2) { 1480 if (isDef) { 1481 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1482 TII->get(NewOpc)) 1483 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1484 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1485 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1486 } else { 1487 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1488 TII->get(NewOpc)) 1489 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1490 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1491 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1492 } 1493 } 1494 1495 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1496 MachineBasicBlock::iterator &MBBI) { 1497 MachineInstr *MI = &*MBBI; 1498 unsigned Opcode = MI->getOpcode(); 1499 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8) 1500 return false; 1501 1502 const MachineOperand &BaseOp = MI->getOperand(2); 1503 unsigned BaseReg = BaseOp.getReg(); 1504 unsigned EvenReg = MI->getOperand(0).getReg(); 1505 unsigned OddReg = MI->getOperand(1).getReg(); 1506 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1507 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1508 1509 // ARM errata 602117: LDRD with base in list may result in incorrect base 1510 // register when interrupted or faulted. 1511 bool Errata602117 = EvenReg == BaseReg && 1512 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3(); 1513 // ARM LDRD/STRD needs consecutive registers. 1514 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) && 1515 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum); 1516 1517 if (!Errata602117 && !NonConsecutiveRegs) 1518 return false; 1519 1520 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1521 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1522 bool EvenDeadKill = isLd ? 1523 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1524 bool EvenUndef = MI->getOperand(0).isUndef(); 1525 bool OddDeadKill = isLd ? 1526 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1527 bool OddUndef = MI->getOperand(1).isUndef(); 1528 bool BaseKill = BaseOp.isKill(); 1529 bool BaseUndef = BaseOp.isUndef(); 1530 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1531 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1532 int OffImm = getMemoryOpOffset(MI); 1533 unsigned PredReg = 0; 1534 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1535 1536 if (OddRegNum > EvenRegNum && OffImm == 0) { 1537 // Ascending register numbers and no offset. It's safe to change it to a 1538 // ldm or stm. 1539 unsigned NewOpc = (isLd) 1540 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1541 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1542 if (isLd) { 1543 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1544 .addReg(BaseReg, getKillRegState(BaseKill)) 1545 .addImm(Pred).addReg(PredReg) 1546 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1547 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1548 ++NumLDRD2LDM; 1549 } else { 1550 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1551 .addReg(BaseReg, getKillRegState(BaseKill)) 1552 .addImm(Pred).addReg(PredReg) 1553 .addReg(EvenReg, 1554 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1555 .addReg(OddReg, 1556 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1557 ++NumSTRD2STM; 1558 } 1559 } else { 1560 // Split into two instructions. 1561 unsigned NewOpc = (isLd) 1562 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1563 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1564 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1565 // so adjust and use t2LDRi12 here for that. 1566 unsigned NewOpc2 = (isLd) 1567 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1568 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1569 DebugLoc dl = MBBI->getDebugLoc(); 1570 // If this is a load and base register is killed, it may have been 1571 // re-defed by the load, make sure the first load does not clobber it. 1572 if (isLd && 1573 (BaseKill || OffKill) && 1574 (TRI->regsOverlap(EvenReg, BaseReg))) { 1575 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1576 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1577 OddReg, OddDeadKill, false, 1578 BaseReg, false, BaseUndef, false, OffUndef, 1579 Pred, PredReg, TII, isT2); 1580 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1581 EvenReg, EvenDeadKill, false, 1582 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1583 Pred, PredReg, TII, isT2); 1584 } else { 1585 if (OddReg == EvenReg && EvenDeadKill) { 1586 // If the two source operands are the same, the kill marker is 1587 // probably on the first one. e.g. 1588 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1589 EvenDeadKill = false; 1590 OddDeadKill = true; 1591 } 1592 // Never kill the base register in the first instruction. 1593 if (EvenReg == BaseReg) 1594 EvenDeadKill = false; 1595 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1596 EvenReg, EvenDeadKill, EvenUndef, 1597 BaseReg, false, BaseUndef, false, OffUndef, 1598 Pred, PredReg, TII, isT2); 1599 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1600 OddReg, OddDeadKill, OddUndef, 1601 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1602 Pred, PredReg, TII, isT2); 1603 } 1604 if (isLd) 1605 ++NumLDRD2LDR; 1606 else 1607 ++NumSTRD2STR; 1608 } 1609 1610 MBBI = MBB.erase(MBBI); 1611 return true; 1612 } 1613 1614 /// An optimization pass to turn multiple LDR / STR ops of the same base and 1615 /// incrementing offset into LDM / STM ops. 1616 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1617 MemOpQueue MemOps; 1618 unsigned CurrBase = 0; 1619 unsigned CurrOpc = ~0u; 1620 ARMCC::CondCodes CurrPred = ARMCC::AL; 1621 unsigned Position = 0; 1622 assert(Candidates.size() == 0); 1623 assert(MergeBaseCandidates.size() == 0); 1624 LiveRegsValid = false; 1625 1626 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin(); 1627 I = MBBI) { 1628 // The instruction in front of the iterator is the one we look at. 1629 MBBI = std::prev(I); 1630 if (FixInvalidRegPairOp(MBB, MBBI)) 1631 continue; 1632 ++Position; 1633 1634 if (isMemoryOp(MBBI)) { 1635 unsigned Opcode = MBBI->getOpcode(); 1636 const MachineOperand &MO = MBBI->getOperand(0); 1637 unsigned Reg = MO.getReg(); 1638 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg(); 1639 unsigned PredReg = 0; 1640 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 1641 int Offset = getMemoryOpOffset(MBBI); 1642 if (CurrBase == 0) { 1643 // Start of a new chain. 1644 CurrBase = Base; 1645 CurrOpc = Opcode; 1646 CurrPred = Pred; 1647 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1648 continue; 1649 } 1650 // Note: No need to match PredReg in the next if. 1651 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1652 // Watch out for: 1653 // r4 := ldr [r0, #8] 1654 // r4 := ldr [r0, #4] 1655 // or 1656 // r0 := ldr [r0] 1657 // If a load overrides the base register or a register loaded by 1658 // another load in our chain, we cannot take this instruction. 1659 bool Overlap = false; 1660 if (isLoadSingle(Opcode)) { 1661 Overlap = (Base == Reg); 1662 if (!Overlap) { 1663 for (const MemOpQueueEntry &E : MemOps) { 1664 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) { 1665 Overlap = true; 1666 break; 1667 } 1668 } 1669 } 1670 } 1671 1672 if (!Overlap) { 1673 // Check offset and sort memory operation into the current chain. 1674 if (Offset > MemOps.back().Offset) { 1675 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1676 continue; 1677 } else { 1678 MemOpQueue::iterator MI, ME; 1679 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) { 1680 if (Offset < MI->Offset) { 1681 // Found a place to insert. 1682 break; 1683 } 1684 if (Offset == MI->Offset) { 1685 // Collision, abort. 1686 MI = ME; 1687 break; 1688 } 1689 } 1690 if (MI != MemOps.end()) { 1691 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position)); 1692 continue; 1693 } 1694 } 1695 } 1696 } 1697 1698 // Don't advance the iterator; The op will start a new chain next. 1699 MBBI = I; 1700 --Position; 1701 // Fallthrough to look into existing chain. 1702 } else if (MBBI->isDebugValue()) { 1703 continue; 1704 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 || 1705 MBBI->getOpcode() == ARM::t2STRDi8) { 1706 // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions 1707 // remember them because we may still be able to merge add/sub into them. 1708 MergeBaseCandidates.push_back(MBBI); 1709 } 1710 1711 1712 // If we are here then the chain is broken; Extract candidates for a merge. 1713 if (MemOps.size() > 0) { 1714 FormCandidates(MemOps); 1715 // Reset for the next chain. 1716 CurrBase = 0; 1717 CurrOpc = ~0u; 1718 CurrPred = ARMCC::AL; 1719 MemOps.clear(); 1720 } 1721 } 1722 if (MemOps.size() > 0) 1723 FormCandidates(MemOps); 1724 1725 // Sort candidates so they get processed from end to begin of the basic 1726 // block later; This is necessary for liveness calculation. 1727 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) { 1728 return M0->InsertPos < M1->InsertPos; 1729 }; 1730 std::sort(Candidates.begin(), Candidates.end(), LessThan); 1731 1732 // Go through list of candidates and merge. 1733 bool Changed = false; 1734 for (const MergeCandidate *Candidate : Candidates) { 1735 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) { 1736 MachineInstr *Merged = MergeOpsUpdate(*Candidate); 1737 // Merge preceding/trailing base inc/dec into the merged op. 1738 if (Merged) { 1739 Changed = true; 1740 unsigned Opcode = Merged->getOpcode(); 1741 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8) 1742 MergeBaseUpdateLSDouble(*Merged); 1743 else 1744 MergeBaseUpdateLSMultiple(Merged); 1745 } else { 1746 for (MachineInstr *MI : Candidate->Instrs) { 1747 if (MergeBaseUpdateLoadStore(MI)) 1748 Changed = true; 1749 } 1750 } 1751 } else { 1752 assert(Candidate->Instrs.size() == 1); 1753 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front())) 1754 Changed = true; 1755 } 1756 } 1757 Candidates.clear(); 1758 // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt. 1759 for (MachineInstr *MI : MergeBaseCandidates) 1760 MergeBaseUpdateLSDouble(*MI); 1761 MergeBaseCandidates.clear(); 1762 1763 return Changed; 1764 } 1765 1766 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr") 1767 /// into the preceding stack restore so it directly restore the value of LR 1768 /// into pc. 1769 /// ldmfd sp!, {..., lr} 1770 /// bx lr 1771 /// or 1772 /// ldmfd sp!, {..., lr} 1773 /// mov pc, lr 1774 /// => 1775 /// ldmfd sp!, {..., pc} 1776 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1777 // Thumb1 LDM doesn't allow high registers. 1778 if (isThumb1) return false; 1779 if (MBB.empty()) return false; 1780 1781 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1782 if (MBBI != MBB.begin() && 1783 (MBBI->getOpcode() == ARM::BX_RET || 1784 MBBI->getOpcode() == ARM::tBX_RET || 1785 MBBI->getOpcode() == ARM::MOVPCLR)) { 1786 MachineInstr *PrevMI = std::prev(MBBI); 1787 unsigned Opcode = PrevMI->getOpcode(); 1788 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1789 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1790 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1791 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1792 if (MO.getReg() != ARM::LR) 1793 return false; 1794 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1795 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1796 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1797 PrevMI->setDesc(TII->get(NewOpc)); 1798 MO.setReg(ARM::PC); 1799 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); 1800 MBB.erase(MBBI); 1801 return true; 1802 } 1803 } 1804 return false; 1805 } 1806 1807 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1808 MF = &Fn; 1809 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1810 TL = STI->getTargetLowering(); 1811 AFI = Fn.getInfo<ARMFunctionInfo>(); 1812 TII = STI->getInstrInfo(); 1813 TRI = STI->getRegisterInfo(); 1814 MRI = &Fn.getRegInfo(); 1815 RegClassInfoValid = false; 1816 isThumb2 = AFI->isThumb2Function(); 1817 isThumb1 = AFI->isThumbFunction() && !isThumb2; 1818 1819 bool Modified = false; 1820 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1821 ++MFI) { 1822 MachineBasicBlock &MBB = *MFI; 1823 Modified |= LoadStoreMultipleOpti(MBB); 1824 if (STI->hasV5TOps()) 1825 Modified |= MergeReturnIntoLDM(MBB); 1826 } 1827 1828 Allocator.DestroyAll(); 1829 return Modified; 1830 } 1831 1832 namespace { 1833 /// Pre- register allocation pass that move load / stores from consecutive 1834 /// locations close to make it more likely they will be combined later. 1835 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1836 static char ID; 1837 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1838 1839 const DataLayout *TD; 1840 const TargetInstrInfo *TII; 1841 const TargetRegisterInfo *TRI; 1842 const ARMSubtarget *STI; 1843 MachineRegisterInfo *MRI; 1844 MachineFunction *MF; 1845 1846 bool runOnMachineFunction(MachineFunction &Fn) override; 1847 1848 const char *getPassName() const override { 1849 return "ARM pre- register allocation load / store optimization pass"; 1850 } 1851 1852 private: 1853 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1854 unsigned &NewOpc, unsigned &EvenReg, 1855 unsigned &OddReg, unsigned &BaseReg, 1856 int &Offset, 1857 unsigned &PredReg, ARMCC::CondCodes &Pred, 1858 bool &isT2); 1859 bool RescheduleOps(MachineBasicBlock *MBB, 1860 SmallVectorImpl<MachineInstr *> &Ops, 1861 unsigned Base, bool isLd, 1862 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1863 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1864 }; 1865 char ARMPreAllocLoadStoreOpt::ID = 0; 1866 } 1867 1868 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1869 TD = Fn.getTarget().getDataLayout(); 1870 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1871 TII = STI->getInstrInfo(); 1872 TRI = STI->getRegisterInfo(); 1873 MRI = &Fn.getRegInfo(); 1874 MF = &Fn; 1875 1876 bool Modified = false; 1877 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1878 ++MFI) 1879 Modified |= RescheduleLoadStoreInstrs(MFI); 1880 1881 return Modified; 1882 } 1883 1884 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1885 MachineBasicBlock::iterator I, 1886 MachineBasicBlock::iterator E, 1887 SmallPtrSetImpl<MachineInstr*> &MemOps, 1888 SmallSet<unsigned, 4> &MemRegs, 1889 const TargetRegisterInfo *TRI) { 1890 // Are there stores / loads / calls between them? 1891 // FIXME: This is overly conservative. We should make use of alias information 1892 // some day. 1893 SmallSet<unsigned, 4> AddedRegPressure; 1894 while (++I != E) { 1895 if (I->isDebugValue() || MemOps.count(&*I)) 1896 continue; 1897 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1898 return false; 1899 if (isLd && I->mayStore()) 1900 return false; 1901 if (!isLd) { 1902 if (I->mayLoad()) 1903 return false; 1904 // It's not safe to move the first 'str' down. 1905 // str r1, [r0] 1906 // strh r5, [r0] 1907 // str r4, [r0, #+4] 1908 if (I->mayStore()) 1909 return false; 1910 } 1911 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1912 MachineOperand &MO = I->getOperand(j); 1913 if (!MO.isReg()) 1914 continue; 1915 unsigned Reg = MO.getReg(); 1916 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1917 return false; 1918 if (Reg != Base && !MemRegs.count(Reg)) 1919 AddedRegPressure.insert(Reg); 1920 } 1921 } 1922 1923 // Estimate register pressure increase due to the transformation. 1924 if (MemRegs.size() <= 4) 1925 // Ok if we are moving small number of instructions. 1926 return true; 1927 return AddedRegPressure.size() <= MemRegs.size() * 2; 1928 } 1929 1930 1931 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI. 1932 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1933 MachineInstr *Op1) { 1934 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1935 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1936 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1937 1938 MachineFunction *MF = MI->getParent()->getParent(); 1939 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1940 MachineSDNode::mmo_iterator MemEnd = 1941 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1942 MemEnd = 1943 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1944 MI->setMemRefs(MemBegin, MemEnd); 1945 } 1946 1947 bool 1948 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1949 DebugLoc &dl, unsigned &NewOpc, 1950 unsigned &FirstReg, 1951 unsigned &SecondReg, 1952 unsigned &BaseReg, int &Offset, 1953 unsigned &PredReg, 1954 ARMCC::CondCodes &Pred, 1955 bool &isT2) { 1956 // Make sure we're allowed to generate LDRD/STRD. 1957 if (!STI->hasV5TEOps()) 1958 return false; 1959 1960 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1961 unsigned Scale = 1; 1962 unsigned Opcode = Op0->getOpcode(); 1963 if (Opcode == ARM::LDRi12) { 1964 NewOpc = ARM::LDRD; 1965 } else if (Opcode == ARM::STRi12) { 1966 NewOpc = ARM::STRD; 1967 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1968 NewOpc = ARM::t2LDRDi8; 1969 Scale = 4; 1970 isT2 = true; 1971 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1972 NewOpc = ARM::t2STRDi8; 1973 Scale = 4; 1974 isT2 = true; 1975 } else { 1976 return false; 1977 } 1978 1979 // Make sure the base address satisfies i64 ld / st alignment requirement. 1980 // At the moment, we ignore the memoryoperand's value. 1981 // If we want to use AliasAnalysis, we should check it accordingly. 1982 if (!Op0->hasOneMemOperand() || 1983 (*Op0->memoperands_begin())->isVolatile()) 1984 return false; 1985 1986 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1987 const Function *Func = MF->getFunction(); 1988 unsigned ReqAlign = STI->hasV6Ops() 1989 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1990 : 8; // Pre-v6 need 8-byte align 1991 if (Align < ReqAlign) 1992 return false; 1993 1994 // Then make sure the immediate offset fits. 1995 int OffImm = getMemoryOpOffset(Op0); 1996 if (isT2) { 1997 int Limit = (1 << 8) * Scale; 1998 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 1999 return false; 2000 Offset = OffImm; 2001 } else { 2002 ARM_AM::AddrOpc AddSub = ARM_AM::add; 2003 if (OffImm < 0) { 2004 AddSub = ARM_AM::sub; 2005 OffImm = - OffImm; 2006 } 2007 int Limit = (1 << 8) * Scale; 2008 if (OffImm >= Limit || (OffImm & (Scale-1))) 2009 return false; 2010 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 2011 } 2012 FirstReg = Op0->getOperand(0).getReg(); 2013 SecondReg = Op1->getOperand(0).getReg(); 2014 if (FirstReg == SecondReg) 2015 return false; 2016 BaseReg = Op0->getOperand(1).getReg(); 2017 Pred = getInstrPredicate(Op0, PredReg); 2018 dl = Op0->getDebugLoc(); 2019 return true; 2020 } 2021 2022 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 2023 SmallVectorImpl<MachineInstr *> &Ops, 2024 unsigned Base, bool isLd, 2025 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 2026 bool RetVal = false; 2027 2028 // Sort by offset (in reverse order). 2029 std::sort(Ops.begin(), Ops.end(), 2030 [](const MachineInstr *LHS, const MachineInstr *RHS) { 2031 int LOffset = getMemoryOpOffset(LHS); 2032 int ROffset = getMemoryOpOffset(RHS); 2033 assert(LHS == RHS || LOffset != ROffset); 2034 return LOffset > ROffset; 2035 }); 2036 2037 // The loads / stores of the same base are in order. Scan them from first to 2038 // last and check for the following: 2039 // 1. Any def of base. 2040 // 2. Any gaps. 2041 while (Ops.size() > 1) { 2042 unsigned FirstLoc = ~0U; 2043 unsigned LastLoc = 0; 2044 MachineInstr *FirstOp = nullptr; 2045 MachineInstr *LastOp = nullptr; 2046 int LastOffset = 0; 2047 unsigned LastOpcode = 0; 2048 unsigned LastBytes = 0; 2049 unsigned NumMove = 0; 2050 for (int i = Ops.size() - 1; i >= 0; --i) { 2051 MachineInstr *Op = Ops[i]; 2052 unsigned Loc = MI2LocMap[Op]; 2053 if (Loc <= FirstLoc) { 2054 FirstLoc = Loc; 2055 FirstOp = Op; 2056 } 2057 if (Loc >= LastLoc) { 2058 LastLoc = Loc; 2059 LastOp = Op; 2060 } 2061 2062 unsigned LSMOpcode 2063 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 2064 if (LastOpcode && LSMOpcode != LastOpcode) 2065 break; 2066 2067 int Offset = getMemoryOpOffset(Op); 2068 unsigned Bytes = getLSMultipleTransferSize(Op); 2069 if (LastBytes) { 2070 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 2071 break; 2072 } 2073 LastOffset = Offset; 2074 LastBytes = Bytes; 2075 LastOpcode = LSMOpcode; 2076 if (++NumMove == 8) // FIXME: Tune this limit. 2077 break; 2078 } 2079 2080 if (NumMove <= 1) 2081 Ops.pop_back(); 2082 else { 2083 SmallPtrSet<MachineInstr*, 4> MemOps; 2084 SmallSet<unsigned, 4> MemRegs; 2085 for (int i = NumMove-1; i >= 0; --i) { 2086 MemOps.insert(Ops[i]); 2087 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 2088 } 2089 2090 // Be conservative, if the instructions are too far apart, don't 2091 // move them. We want to limit the increase of register pressure. 2092 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 2093 if (DoMove) 2094 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 2095 MemOps, MemRegs, TRI); 2096 if (!DoMove) { 2097 for (unsigned i = 0; i != NumMove; ++i) 2098 Ops.pop_back(); 2099 } else { 2100 // This is the new location for the loads / stores. 2101 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 2102 while (InsertPos != MBB->end() 2103 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 2104 ++InsertPos; 2105 2106 // If we are moving a pair of loads / stores, see if it makes sense 2107 // to try to allocate a pair of registers that can form register pairs. 2108 MachineInstr *Op0 = Ops.back(); 2109 MachineInstr *Op1 = Ops[Ops.size()-2]; 2110 unsigned FirstReg = 0, SecondReg = 0; 2111 unsigned BaseReg = 0, PredReg = 0; 2112 ARMCC::CondCodes Pred = ARMCC::AL; 2113 bool isT2 = false; 2114 unsigned NewOpc = 0; 2115 int Offset = 0; 2116 DebugLoc dl; 2117 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 2118 FirstReg, SecondReg, BaseReg, 2119 Offset, PredReg, Pred, isT2)) { 2120 Ops.pop_back(); 2121 Ops.pop_back(); 2122 2123 const MCInstrDesc &MCID = TII->get(NewOpc); 2124 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 2125 MRI->constrainRegClass(FirstReg, TRC); 2126 MRI->constrainRegClass(SecondReg, TRC); 2127 2128 // Form the pair instruction. 2129 if (isLd) { 2130 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2131 .addReg(FirstReg, RegState::Define) 2132 .addReg(SecondReg, RegState::Define) 2133 .addReg(BaseReg); 2134 // FIXME: We're converting from LDRi12 to an insn that still 2135 // uses addrmode2, so we need an explicit offset reg. It should 2136 // always by reg0 since we're transforming LDRi12s. 2137 if (!isT2) 2138 MIB.addReg(0); 2139 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2140 concatenateMemOperands(MIB, Op0, Op1); 2141 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2142 ++NumLDRDFormed; 2143 } else { 2144 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2145 .addReg(FirstReg) 2146 .addReg(SecondReg) 2147 .addReg(BaseReg); 2148 // FIXME: We're converting from LDRi12 to an insn that still 2149 // uses addrmode2, so we need an explicit offset reg. It should 2150 // always by reg0 since we're transforming STRi12s. 2151 if (!isT2) 2152 MIB.addReg(0); 2153 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2154 concatenateMemOperands(MIB, Op0, Op1); 2155 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2156 ++NumSTRDFormed; 2157 } 2158 MBB->erase(Op0); 2159 MBB->erase(Op1); 2160 2161 if (!isT2) { 2162 // Add register allocation hints to form register pairs. 2163 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg); 2164 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg); 2165 } 2166 } else { 2167 for (unsigned i = 0; i != NumMove; ++i) { 2168 MachineInstr *Op = Ops.back(); 2169 Ops.pop_back(); 2170 MBB->splice(InsertPos, MBB, Op); 2171 } 2172 } 2173 2174 NumLdStMoved += NumMove; 2175 RetVal = true; 2176 } 2177 } 2178 } 2179 2180 return RetVal; 2181 } 2182 2183 bool 2184 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 2185 bool RetVal = false; 2186 2187 DenseMap<MachineInstr*, unsigned> MI2LocMap; 2188 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 2189 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 2190 SmallVector<unsigned, 4> LdBases; 2191 SmallVector<unsigned, 4> StBases; 2192 2193 unsigned Loc = 0; 2194 MachineBasicBlock::iterator MBBI = MBB->begin(); 2195 MachineBasicBlock::iterator E = MBB->end(); 2196 while (MBBI != E) { 2197 for (; MBBI != E; ++MBBI) { 2198 MachineInstr *MI = MBBI; 2199 if (MI->isCall() || MI->isTerminator()) { 2200 // Stop at barriers. 2201 ++MBBI; 2202 break; 2203 } 2204 2205 if (!MI->isDebugValue()) 2206 MI2LocMap[MI] = ++Loc; 2207 2208 if (!isMemoryOp(MI)) 2209 continue; 2210 unsigned PredReg = 0; 2211 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 2212 continue; 2213 2214 int Opc = MI->getOpcode(); 2215 bool isLd = isLoadSingle(Opc); 2216 unsigned Base = MI->getOperand(1).getReg(); 2217 int Offset = getMemoryOpOffset(MI); 2218 2219 bool StopHere = false; 2220 if (isLd) { 2221 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2222 Base2LdsMap.find(Base); 2223 if (BI != Base2LdsMap.end()) { 2224 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2225 if (Offset == getMemoryOpOffset(BI->second[i])) { 2226 StopHere = true; 2227 break; 2228 } 2229 } 2230 if (!StopHere) 2231 BI->second.push_back(MI); 2232 } else { 2233 Base2LdsMap[Base].push_back(MI); 2234 LdBases.push_back(Base); 2235 } 2236 } else { 2237 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2238 Base2StsMap.find(Base); 2239 if (BI != Base2StsMap.end()) { 2240 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2241 if (Offset == getMemoryOpOffset(BI->second[i])) { 2242 StopHere = true; 2243 break; 2244 } 2245 } 2246 if (!StopHere) 2247 BI->second.push_back(MI); 2248 } else { 2249 Base2StsMap[Base].push_back(MI); 2250 StBases.push_back(Base); 2251 } 2252 } 2253 2254 if (StopHere) { 2255 // Found a duplicate (a base+offset combination that's seen earlier). 2256 // Backtrack. 2257 --Loc; 2258 break; 2259 } 2260 } 2261 2262 // Re-schedule loads. 2263 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 2264 unsigned Base = LdBases[i]; 2265 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 2266 if (Lds.size() > 1) 2267 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 2268 } 2269 2270 // Re-schedule stores. 2271 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 2272 unsigned Base = StBases[i]; 2273 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 2274 if (Sts.size() > 1) 2275 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 2276 } 2277 2278 if (MBBI != E) { 2279 Base2LdsMap.clear(); 2280 Base2StsMap.clear(); 2281 LdBases.clear(); 2282 StBases.clear(); 2283 } 2284 } 2285 2286 return RetVal; 2287 } 2288 2289 2290 /// Returns an instance of the load / store optimization pass. 2291 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 2292 if (PreAlloc) 2293 return new ARMPreAllocLoadStoreOpt(); 2294 return new ARMLoadStoreOpt(); 2295 } 2296