1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This file contains a pass that performs load / store related peephole 11 /// optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ARM.h" 16 #include "ARMBaseInstrInfo.h" 17 #include "ARMBaseRegisterInfo.h" 18 #include "ARMISelLowering.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMSubtarget.h" 21 #include "MCTargetDesc/ARMAddressingModes.h" 22 #include "ThumbRegisterInfo.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallSet.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/Statistic.h" 29 #include "llvm/CodeGen/MachineBasicBlock.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include "llvm/CodeGen/MachineInstr.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/RegisterClassInfo.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/CodeGen/LivePhysRegs.h" 37 #include "llvm/IR/DataLayout.h" 38 #include "llvm/IR/DerivedTypes.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/Support/Allocator.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/ErrorHandling.h" 43 #include "llvm/Support/raw_ostream.h" 44 #include "llvm/Target/TargetInstrInfo.h" 45 #include "llvm/Target/TargetMachine.h" 46 #include "llvm/Target/TargetRegisterInfo.h" 47 using namespace llvm; 48 49 #define DEBUG_TYPE "arm-ldst-opt" 50 51 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 52 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 62 63 namespace { 64 /// Post- register allocation pass the combine load / store instructions to 65 /// form ldm / stm instructions. 66 struct ARMLoadStoreOpt : public MachineFunctionPass { 67 static char ID; 68 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 69 70 const MachineFunction *MF; 71 const TargetInstrInfo *TII; 72 const TargetRegisterInfo *TRI; 73 const MachineRegisterInfo *MRI; 74 const ARMSubtarget *STI; 75 const TargetLowering *TL; 76 ARMFunctionInfo *AFI; 77 LivePhysRegs LiveRegs; 78 RegisterClassInfo RegClassInfo; 79 MachineBasicBlock::const_iterator LiveRegPos; 80 bool LiveRegsValid; 81 bool RegClassInfoValid; 82 bool isThumb1, isThumb2; 83 84 bool runOnMachineFunction(MachineFunction &Fn) override; 85 86 const char *getPassName() const override { 87 return "ARM load / store optimization pass"; 88 } 89 90 private: 91 /// A set of load/store MachineInstrs with same base register sorted by 92 /// offset. 93 struct MemOpQueueEntry { 94 MachineInstr *MI; 95 int Offset; ///< Load/Store offset. 96 unsigned Position; ///< Position as counted from end of basic block. 97 MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position) 98 : MI(MI), Offset(Offset), Position(Position) {} 99 }; 100 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 101 102 /// A set of MachineInstrs that fulfill (nearly all) conditions to get 103 /// merged into a LDM/STM. 104 struct MergeCandidate { 105 /// List of instructions ordered by load/store offset. 106 SmallVector<MachineInstr*, 4> Instrs; 107 /// Index in Instrs of the instruction being latest in the schedule. 108 unsigned LatestMIIdx; 109 /// Index in Instrs of the instruction being earliest in the schedule. 110 unsigned EarliestMIIdx; 111 /// Index into the basic block where the merged instruction will be 112 /// inserted. (See MemOpQueueEntry.Position) 113 unsigned InsertPos; 114 /// Whether the instructions can be merged into a ldm/stm instruction. 115 bool CanMergeToLSMulti; 116 /// Whether the instructions can be merged into a ldrd/strd instruction. 117 bool CanMergeToLSDouble; 118 }; 119 SpecificBumpPtrAllocator<MergeCandidate> Allocator; 120 SmallVector<const MergeCandidate*,4> Candidates; 121 SmallVector<MachineInstr*,4> MergeBaseCandidates; 122 123 void moveLiveRegsBefore(const MachineBasicBlock &MBB, 124 MachineBasicBlock::const_iterator Before); 125 unsigned findFreeReg(const TargetRegisterClass &RegClass); 126 void UpdateBaseRegUses(MachineBasicBlock &MBB, 127 MachineBasicBlock::iterator MBBI, 128 DebugLoc DL, unsigned Base, unsigned WordOffset, 129 ARMCC::CondCodes Pred, unsigned PredReg); 130 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB, 131 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 132 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 133 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs); 134 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB, 135 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 136 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 137 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const; 138 void FormCandidates(const MemOpQueue &MemOps); 139 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand); 140 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 141 MachineBasicBlock::iterator &MBBI); 142 bool MergeBaseUpdateLoadStore(MachineInstr *MI); 143 bool MergeBaseUpdateLSMultiple(MachineInstr *MI); 144 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const; 145 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 146 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 147 }; 148 char ARMLoadStoreOpt::ID = 0; 149 } 150 151 static bool definesCPSR(const MachineInstr *MI) { 152 for (const auto &MO : MI->operands()) { 153 if (!MO.isReg()) 154 continue; 155 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 156 // If the instruction has live CPSR def, then it's not safe to fold it 157 // into load / store. 158 return true; 159 } 160 161 return false; 162 } 163 164 static int getMemoryOpOffset(const MachineInstr *MI) { 165 unsigned Opcode = MI->getOpcode(); 166 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 167 unsigned NumOperands = MI->getDesc().getNumOperands(); 168 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 169 170 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 171 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 172 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 173 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 174 return OffField; 175 176 // Thumb1 immediate offsets are scaled by 4 177 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi || 178 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) 179 return OffField * 4; 180 181 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 182 : ARM_AM::getAM5Offset(OffField) * 4; 183 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField) 184 : ARM_AM::getAM5Op(OffField); 185 186 if (Op == ARM_AM::sub) 187 return -Offset; 188 189 return Offset; 190 } 191 192 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) { 193 return MI.getOperand(1); 194 } 195 196 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) { 197 return MI.getOperand(0); 198 } 199 200 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) { 201 switch (Opcode) { 202 default: llvm_unreachable("Unhandled opcode!"); 203 case ARM::LDRi12: 204 ++NumLDMGened; 205 switch (Mode) { 206 default: llvm_unreachable("Unhandled submode!"); 207 case ARM_AM::ia: return ARM::LDMIA; 208 case ARM_AM::da: return ARM::LDMDA; 209 case ARM_AM::db: return ARM::LDMDB; 210 case ARM_AM::ib: return ARM::LDMIB; 211 } 212 case ARM::STRi12: 213 ++NumSTMGened; 214 switch (Mode) { 215 default: llvm_unreachable("Unhandled submode!"); 216 case ARM_AM::ia: return ARM::STMIA; 217 case ARM_AM::da: return ARM::STMDA; 218 case ARM_AM::db: return ARM::STMDB; 219 case ARM_AM::ib: return ARM::STMIB; 220 } 221 case ARM::tLDRi: 222 case ARM::tLDRspi: 223 // tLDMIA is writeback-only - unless the base register is in the input 224 // reglist. 225 ++NumLDMGened; 226 switch (Mode) { 227 default: llvm_unreachable("Unhandled submode!"); 228 case ARM_AM::ia: return ARM::tLDMIA; 229 } 230 case ARM::tSTRi: 231 case ARM::tSTRspi: 232 // There is no non-writeback tSTMIA either. 233 ++NumSTMGened; 234 switch (Mode) { 235 default: llvm_unreachable("Unhandled submode!"); 236 case ARM_AM::ia: return ARM::tSTMIA_UPD; 237 } 238 case ARM::t2LDRi8: 239 case ARM::t2LDRi12: 240 ++NumLDMGened; 241 switch (Mode) { 242 default: llvm_unreachable("Unhandled submode!"); 243 case ARM_AM::ia: return ARM::t2LDMIA; 244 case ARM_AM::db: return ARM::t2LDMDB; 245 } 246 case ARM::t2STRi8: 247 case ARM::t2STRi12: 248 ++NumSTMGened; 249 switch (Mode) { 250 default: llvm_unreachable("Unhandled submode!"); 251 case ARM_AM::ia: return ARM::t2STMIA; 252 case ARM_AM::db: return ARM::t2STMDB; 253 } 254 case ARM::VLDRS: 255 ++NumVLDMGened; 256 switch (Mode) { 257 default: llvm_unreachable("Unhandled submode!"); 258 case ARM_AM::ia: return ARM::VLDMSIA; 259 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 260 } 261 case ARM::VSTRS: 262 ++NumVSTMGened; 263 switch (Mode) { 264 default: llvm_unreachable("Unhandled submode!"); 265 case ARM_AM::ia: return ARM::VSTMSIA; 266 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 267 } 268 case ARM::VLDRD: 269 ++NumVLDMGened; 270 switch (Mode) { 271 default: llvm_unreachable("Unhandled submode!"); 272 case ARM_AM::ia: return ARM::VLDMDIA; 273 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 274 } 275 case ARM::VSTRD: 276 ++NumVSTMGened; 277 switch (Mode) { 278 default: llvm_unreachable("Unhandled submode!"); 279 case ARM_AM::ia: return ARM::VSTMDIA; 280 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 281 } 282 } 283 } 284 285 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) { 286 switch (Opcode) { 287 default: llvm_unreachable("Unhandled opcode!"); 288 case ARM::LDMIA_RET: 289 case ARM::LDMIA: 290 case ARM::LDMIA_UPD: 291 case ARM::STMIA: 292 case ARM::STMIA_UPD: 293 case ARM::tLDMIA: 294 case ARM::tLDMIA_UPD: 295 case ARM::tSTMIA_UPD: 296 case ARM::t2LDMIA_RET: 297 case ARM::t2LDMIA: 298 case ARM::t2LDMIA_UPD: 299 case ARM::t2STMIA: 300 case ARM::t2STMIA_UPD: 301 case ARM::VLDMSIA: 302 case ARM::VLDMSIA_UPD: 303 case ARM::VSTMSIA: 304 case ARM::VSTMSIA_UPD: 305 case ARM::VLDMDIA: 306 case ARM::VLDMDIA_UPD: 307 case ARM::VSTMDIA: 308 case ARM::VSTMDIA_UPD: 309 return ARM_AM::ia; 310 311 case ARM::LDMDA: 312 case ARM::LDMDA_UPD: 313 case ARM::STMDA: 314 case ARM::STMDA_UPD: 315 return ARM_AM::da; 316 317 case ARM::LDMDB: 318 case ARM::LDMDB_UPD: 319 case ARM::STMDB: 320 case ARM::STMDB_UPD: 321 case ARM::t2LDMDB: 322 case ARM::t2LDMDB_UPD: 323 case ARM::t2STMDB: 324 case ARM::t2STMDB_UPD: 325 case ARM::VLDMSDB_UPD: 326 case ARM::VSTMSDB_UPD: 327 case ARM::VLDMDDB_UPD: 328 case ARM::VSTMDDB_UPD: 329 return ARM_AM::db; 330 331 case ARM::LDMIB: 332 case ARM::LDMIB_UPD: 333 case ARM::STMIB: 334 case ARM::STMIB_UPD: 335 return ARM_AM::ib; 336 } 337 } 338 339 static bool isT1i32Load(unsigned Opc) { 340 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi; 341 } 342 343 static bool isT2i32Load(unsigned Opc) { 344 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 345 } 346 347 static bool isi32Load(unsigned Opc) { 348 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; 349 } 350 351 static bool isT1i32Store(unsigned Opc) { 352 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi; 353 } 354 355 static bool isT2i32Store(unsigned Opc) { 356 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 357 } 358 359 static bool isi32Store(unsigned Opc) { 360 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); 361 } 362 363 static bool isLoadSingle(unsigned Opc) { 364 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 365 } 366 367 static unsigned getImmScale(unsigned Opc) { 368 switch (Opc) { 369 default: llvm_unreachable("Unhandled opcode!"); 370 case ARM::tLDRi: 371 case ARM::tSTRi: 372 case ARM::tLDRspi: 373 case ARM::tSTRspi: 374 return 1; 375 case ARM::tLDRHi: 376 case ARM::tSTRHi: 377 return 2; 378 case ARM::tLDRBi: 379 case ARM::tSTRBi: 380 return 4; 381 } 382 } 383 384 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) { 385 switch (MI->getOpcode()) { 386 default: return 0; 387 case ARM::LDRi12: 388 case ARM::STRi12: 389 case ARM::tLDRi: 390 case ARM::tSTRi: 391 case ARM::tLDRspi: 392 case ARM::tSTRspi: 393 case ARM::t2LDRi8: 394 case ARM::t2LDRi12: 395 case ARM::t2STRi8: 396 case ARM::t2STRi12: 397 case ARM::VLDRS: 398 case ARM::VSTRS: 399 return 4; 400 case ARM::VLDRD: 401 case ARM::VSTRD: 402 return 8; 403 case ARM::LDMIA: 404 case ARM::LDMDA: 405 case ARM::LDMDB: 406 case ARM::LDMIB: 407 case ARM::STMIA: 408 case ARM::STMDA: 409 case ARM::STMDB: 410 case ARM::STMIB: 411 case ARM::tLDMIA: 412 case ARM::tLDMIA_UPD: 413 case ARM::tSTMIA_UPD: 414 case ARM::t2LDMIA: 415 case ARM::t2LDMDB: 416 case ARM::t2STMIA: 417 case ARM::t2STMDB: 418 case ARM::VLDMSIA: 419 case ARM::VSTMSIA: 420 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 421 case ARM::VLDMDIA: 422 case ARM::VSTMDIA: 423 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 424 } 425 } 426 427 /// Update future uses of the base register with the offset introduced 428 /// due to writeback. This function only works on Thumb1. 429 void 430 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, 431 MachineBasicBlock::iterator MBBI, 432 DebugLoc DL, unsigned Base, 433 unsigned WordOffset, 434 ARMCC::CondCodes Pred, unsigned PredReg) { 435 assert(isThumb1 && "Can only update base register uses for Thumb1!"); 436 // Start updating any instructions with immediate offsets. Insert a SUB before 437 // the first non-updateable instruction (if any). 438 for (; MBBI != MBB.end(); ++MBBI) { 439 bool InsertSub = false; 440 unsigned Opc = MBBI->getOpcode(); 441 442 if (MBBI->readsRegister(Base)) { 443 int Offset; 444 bool IsLoad = 445 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi; 446 bool IsStore = 447 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi; 448 449 if (IsLoad || IsStore) { 450 // Loads and stores with immediate offsets can be updated, but only if 451 // the new offset isn't negative. 452 // The MachineOperand containing the offset immediate is the last one 453 // before predicates. 454 MachineOperand &MO = 455 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 456 // The offsets are scaled by 1, 2 or 4 depending on the Opcode. 457 Offset = MO.getImm() - WordOffset * getImmScale(Opc); 458 459 // If storing the base register, it needs to be reset first. 460 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg(); 461 462 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) 463 MO.setImm(Offset); 464 else 465 InsertSub = true; 466 467 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && 468 !definesCPSR(MBBI)) { 469 // SUBS/ADDS using this register, with a dead def of the CPSR. 470 // Merge it with the update; if the merged offset is too large, 471 // insert a new sub instead. 472 MachineOperand &MO = 473 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 474 Offset = (Opc == ARM::tSUBi8) ? 475 MO.getImm() + WordOffset * 4 : 476 MO.getImm() - WordOffset * 4 ; 477 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) { 478 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if 479 // Offset == 0. 480 MO.setImm(Offset); 481 // The base register has now been reset, so exit early. 482 return; 483 } else { 484 InsertSub = true; 485 } 486 487 } else { 488 // Can't update the instruction. 489 InsertSub = true; 490 } 491 492 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { 493 // Since SUBS sets the condition flags, we can't place the base reset 494 // after an instruction that has a live CPSR def. 495 // The base register might also contain an argument for a function call. 496 InsertSub = true; 497 } 498 499 if (InsertSub) { 500 // An instruction above couldn't be updated, so insert a sub. 501 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 502 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 503 return; 504 } 505 506 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base)) 507 // Register got killed. Stop updating. 508 return; 509 } 510 511 // End of block was reached. 512 if (MBB.succ_size() > 0) { 513 // FIXME: Because of a bug, live registers are sometimes missing from 514 // the successor blocks' live-in sets. This means we can't trust that 515 // information and *always* have to reset at the end of a block. 516 // See PR21029. 517 if (MBBI != MBB.end()) --MBBI; 518 AddDefaultT1CC( 519 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 520 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 521 } 522 } 523 524 /// Return the first register of class \p RegClass that is not in \p Regs. 525 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) { 526 if (!RegClassInfoValid) { 527 RegClassInfo.runOnMachineFunction(*MF); 528 RegClassInfoValid = true; 529 } 530 531 for (unsigned Reg : RegClassInfo.getOrder(&RegClass)) 532 if (!LiveRegs.contains(Reg)) 533 return Reg; 534 return 0; 535 } 536 537 /// Compute live registers just before instruction \p Before (in normal schedule 538 /// direction). Computes backwards so multiple queries in the same block must 539 /// come in reverse order. 540 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB, 541 MachineBasicBlock::const_iterator Before) { 542 // Initialize if we never queried in this block. 543 if (!LiveRegsValid) { 544 LiveRegs.init(TRI); 545 LiveRegs.addLiveOuts(&MBB, true); 546 LiveRegPos = MBB.end(); 547 LiveRegsValid = true; 548 } 549 // Move backward just before the "Before" position. 550 while (LiveRegPos != Before) { 551 --LiveRegPos; 552 LiveRegs.stepBackward(*LiveRegPos); 553 } 554 } 555 556 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs, 557 unsigned Reg) { 558 for (const std::pair<unsigned, bool> &R : Regs) 559 if (R.first == Reg) 560 return true; 561 return false; 562 } 563 564 /// Create and insert a LDM or STM with Base as base register and registers in 565 /// Regs as the register operands that would be loaded / stored. It returns 566 /// true if the transformation is done. 567 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB, 568 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 569 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 570 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) { 571 unsigned NumRegs = Regs.size(); 572 assert(NumRegs > 1); 573 574 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. 575 // Compute liveness information for that register to make the decision. 576 bool SafeToClobberCPSR = !isThumb1 || 577 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) == 578 MachineBasicBlock::LQR_Dead); 579 580 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. 581 582 // Exception: If the base register is in the input reglist, Thumb1 LDM is 583 // non-writeback. 584 // It's also not possible to merge an STR of the base register in Thumb1. 585 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) { 586 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); 587 if (Opcode == ARM::tLDRi) { 588 Writeback = false; 589 } else if (Opcode == ARM::tSTRi) { 590 return nullptr; 591 } 592 } 593 594 ARM_AM::AMSubMode Mode = ARM_AM::ia; 595 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. 596 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 597 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; 598 599 if (Offset == 4 && haveIBAndDA) { 600 Mode = ARM_AM::ib; 601 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { 602 Mode = ARM_AM::da; 603 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { 604 // VLDM/VSTM do not support DB mode without also updating the base reg. 605 Mode = ARM_AM::db; 606 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { 607 // Check if this is a supported opcode before inserting instructions to 608 // calculate a new base register. 609 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr; 610 611 // If starting offset isn't zero, insert a MI to materialize a new base. 612 // But only do so if it is cost effective, i.e. merging more than two 613 // loads / stores. 614 if (NumRegs <= 2) 615 return nullptr; 616 617 // On Thumb1, it's not worth materializing a new base register without 618 // clobbering the CPSR (i.e. not using ADDS/SUBS). 619 if (!SafeToClobberCPSR) 620 return nullptr; 621 622 unsigned NewBase; 623 if (isi32Load(Opcode)) { 624 // If it is a load, then just use one of the destination register to 625 // use as the new base. 626 NewBase = Regs[NumRegs-1].first; 627 } else { 628 // Find a free register that we can use as scratch register. 629 moveLiveRegsBefore(MBB, InsertBefore); 630 // The merged instruction does not exist yet but will use several Regs if 631 // it is a Store. 632 if (!isLoadSingle(Opcode)) 633 for (const std::pair<unsigned, bool> &R : Regs) 634 LiveRegs.addReg(R.first); 635 636 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass); 637 if (NewBase == 0) 638 return nullptr; 639 } 640 641 int BaseOpc = 642 isThumb2 ? ARM::t2ADDri : 643 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi : 644 (isThumb1 && Offset < 8) ? ARM::tADDi3 : 645 isThumb1 ? ARM::tADDi8 : ARM::ADDri; 646 647 if (Offset < 0) { 648 Offset = - Offset; 649 BaseOpc = 650 isThumb2 ? ARM::t2SUBri : 651 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 : 652 isThumb1 ? ARM::tSUBi8 : ARM::SUBri; 653 } 654 655 if (!TL->isLegalAddImmediate(Offset)) 656 // FIXME: Try add with register operand? 657 return nullptr; // Probably not worth it then. 658 659 // We can only append a kill flag to the add/sub input if the value is not 660 // used in the register list of the stm as well. 661 bool KillOldBase = BaseKill && 662 (!isi32Store(Opcode) || !ContainsReg(Regs, Base)); 663 664 if (isThumb1) { 665 // Thumb1: depending on immediate size, use either 666 // ADDS NewBase, Base, #imm3 667 // or 668 // MOV NewBase, Base 669 // ADDS NewBase, #imm8. 670 if (Base != NewBase && 671 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) { 672 // Need to insert a MOV to the new base first. 673 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && 674 !STI->hasV6Ops()) { 675 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr 676 if (Pred != ARMCC::AL) 677 return nullptr; 678 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase) 679 .addReg(Base, getKillRegState(KillOldBase)); 680 } else 681 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase) 682 .addReg(Base, getKillRegState(KillOldBase)) 683 .addImm(Pred).addReg(PredReg); 684 685 // The following ADDS/SUBS becomes an update. 686 Base = NewBase; 687 KillOldBase = true; 688 } 689 if (BaseOpc == ARM::tADDrSPi) { 690 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); 691 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 692 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4) 693 .addImm(Pred).addReg(PredReg); 694 } else 695 AddDefaultT1CC( 696 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true) 697 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 698 .addImm(Pred).addReg(PredReg); 699 } else { 700 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 701 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 702 .addImm(Pred).addReg(PredReg).addReg(0); 703 } 704 Base = NewBase; 705 BaseKill = true; // New base is always killed straight away. 706 } 707 708 bool isDef = isLoadSingle(Opcode); 709 710 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with 711 // base register writeback. 712 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 713 if (!Opcode) 714 return nullptr; 715 716 // Check if a Thumb1 LDM/STM merge is safe. This is the case if: 717 // - There is no writeback (LDM of base register), 718 // - the base register is killed by the merged instruction, 719 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS 720 // to reset the base register. 721 // Otherwise, don't merge. 722 // It's safe to return here since the code to materialize a new base register 723 // above is also conditional on SafeToClobberCPSR. 724 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) 725 return nullptr; 726 727 MachineInstrBuilder MIB; 728 729 if (Writeback) { 730 if (Opcode == ARM::tLDMIA) 731 // Update tLDMIA with writeback if necessary. 732 Opcode = ARM::tLDMIA_UPD; 733 734 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 735 736 // Thumb1: we might need to set base writeback when building the MI. 737 MIB.addReg(Base, getDefRegState(true)) 738 .addReg(Base, getKillRegState(BaseKill)); 739 740 // The base isn't dead after a merged instruction with writeback. 741 // Insert a sub instruction after the newly formed instruction to reset. 742 if (!BaseKill) 743 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg); 744 745 } else { 746 // No writeback, simply build the MachineInstr. 747 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 748 MIB.addReg(Base, getKillRegState(BaseKill)); 749 } 750 751 MIB.addImm(Pred).addReg(PredReg); 752 753 for (const std::pair<unsigned, bool> &R : Regs) 754 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second)); 755 756 return MIB.getInstr(); 757 } 758 759 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB, 760 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 761 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 762 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const { 763 bool IsLoad = isi32Load(Opcode); 764 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store"); 765 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8; 766 767 assert(Regs.size() == 2); 768 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL, 769 TII->get(LoadStoreOpcode)); 770 if (IsLoad) { 771 MIB.addReg(Regs[0].first, RegState::Define) 772 .addReg(Regs[1].first, RegState::Define); 773 } else { 774 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second)) 775 .addReg(Regs[1].first, getKillRegState(Regs[1].second)); 776 } 777 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 778 return MIB.getInstr(); 779 } 780 781 /// Call MergeOps and update MemOps and merges accordingly on success. 782 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { 783 const MachineInstr *First = Cand.Instrs.front(); 784 unsigned Opcode = First->getOpcode(); 785 bool IsLoad = isLoadSingle(Opcode); 786 SmallVector<std::pair<unsigned, bool>, 8> Regs; 787 SmallVector<unsigned, 4> ImpDefs; 788 DenseSet<unsigned> KilledRegs; 789 // Determine list of registers and list of implicit super-register defs. 790 for (const MachineInstr *MI : Cand.Instrs) { 791 const MachineOperand &MO = getLoadStoreRegOp(*MI); 792 unsigned Reg = MO.getReg(); 793 bool IsKill = MO.isKill(); 794 if (IsKill) 795 KilledRegs.insert(Reg); 796 Regs.push_back(std::make_pair(Reg, IsKill)); 797 798 if (IsLoad) { 799 // Collect any implicit defs of super-registers, after merging we can't 800 // be sure anymore that we properly preserved these live ranges and must 801 // removed these implicit operands. 802 for (const MachineOperand &MO : MI->implicit_operands()) { 803 if (!MO.isReg() || !MO.isDef() || MO.isDead()) 804 continue; 805 assert(MO.isImplicit()); 806 unsigned DefReg = MO.getReg(); 807 808 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end()) 809 continue; 810 // We can ignore cases where the super-reg is read and written. 811 if (MI->readsRegister(DefReg)) 812 continue; 813 ImpDefs.push_back(DefReg); 814 } 815 } 816 } 817 818 // Attempt the merge. 819 typedef MachineBasicBlock::iterator iterator; 820 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx]; 821 iterator InsertBefore = std::next(iterator(LatestMI)); 822 MachineBasicBlock &MBB = *LatestMI->getParent(); 823 unsigned Offset = getMemoryOpOffset(First); 824 unsigned Base = getLoadStoreBaseOp(*First).getReg(); 825 bool BaseKill = LatestMI->killsRegister(Base); 826 unsigned PredReg = 0; 827 ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg); 828 DebugLoc DL = First->getDebugLoc(); 829 MachineInstr *Merged = nullptr; 830 if (Cand.CanMergeToLSDouble) 831 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill, 832 Opcode, Pred, PredReg, DL, Regs); 833 if (!Merged && Cand.CanMergeToLSMulti) 834 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill, 835 Opcode, Pred, PredReg, DL, Regs); 836 if (!Merged) 837 return nullptr; 838 839 // Determine earliest instruction that will get removed. We then keep an 840 // iterator just above it so the following erases don't invalidated it. 841 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]); 842 bool EarliestAtBegin = false; 843 if (EarliestI == MBB.begin()) { 844 EarliestAtBegin = true; 845 } else { 846 EarliestI = std::prev(EarliestI); 847 } 848 849 // Remove instructions which have been merged. 850 for (MachineInstr *MI : Cand.Instrs) 851 MBB.erase(MI); 852 853 // Determine range between the earliest removed instruction and the new one. 854 if (EarliestAtBegin) 855 EarliestI = MBB.begin(); 856 else 857 EarliestI = std::next(EarliestI); 858 auto FixupRange = make_range(EarliestI, iterator(Merged)); 859 860 if (isLoadSingle(Opcode)) { 861 // If the previous loads defined a super-reg, then we have to mark earlier 862 // operands undef; Replicate the super-reg def on the merged instruction. 863 for (MachineInstr &MI : FixupRange) { 864 for (unsigned &ImpDefReg : ImpDefs) { 865 for (MachineOperand &MO : MI.implicit_operands()) { 866 if (!MO.isReg() || MO.getReg() != ImpDefReg) 867 continue; 868 if (MO.readsReg()) 869 MO.setIsUndef(); 870 else if (MO.isDef()) 871 ImpDefReg = 0; 872 } 873 } 874 } 875 876 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged); 877 for (unsigned ImpDef : ImpDefs) 878 MIB.addReg(ImpDef, RegState::ImplicitDefine); 879 } else { 880 // Remove kill flags: We are possibly storing the values later now. 881 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD); 882 for (MachineInstr &MI : FixupRange) { 883 for (MachineOperand &MO : MI.uses()) { 884 if (!MO.isReg() || !MO.isKill()) 885 continue; 886 if (KilledRegs.count(MO.getReg())) 887 MO.setIsKill(false); 888 } 889 } 890 assert(ImpDefs.empty()); 891 } 892 893 return Merged; 894 } 895 896 static bool isValidLSDoubleOffset(int Offset) { 897 unsigned Value = abs(Offset); 898 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally 899 // multiplied by 4. 900 return (Value % 4) == 0 && Value < 1024; 901 } 902 903 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries. 904 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { 905 const MachineInstr *FirstMI = MemOps[0].MI; 906 unsigned Opcode = FirstMI->getOpcode(); 907 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 908 unsigned Size = getLSMultipleTransferSize(FirstMI); 909 // vldm / vstm limit are 32 for S variants, 16 for D variants. 910 unsigned Limit; 911 switch (Opcode) { 912 default: 913 Limit = UINT_MAX; 914 break; 915 case ARM::VSTRS: 916 Limit = 32; 917 break; 918 case ARM::VSTRD: 919 Limit = 16; 920 break; 921 case ARM::VLDRD: 922 Limit = 16; 923 break; 924 case ARM::VLDRS: 925 Limit = 32; 926 break; 927 } 928 929 unsigned SIndex = 0; 930 unsigned EIndex = MemOps.size(); 931 do { 932 // Look at the first instruction. 933 const MachineInstr *MI = MemOps[SIndex].MI; 934 int Offset = MemOps[SIndex].Offset; 935 const MachineOperand &PMO = getLoadStoreRegOp(*MI); 936 unsigned PReg = PMO.getReg(); 937 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 938 unsigned Latest = SIndex; 939 unsigned Earliest = SIndex; 940 unsigned Count = 1; 941 bool CanMergeToLSDouble = 942 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset); 943 // ARM errata 602117: LDRD with base in list may result in incorrect base 944 // register when interrupted or faulted. 945 if (STI->isCortexM3() && isi32Load(Opcode) && 946 PReg == getLoadStoreBaseOp(*MI).getReg()) 947 CanMergeToLSDouble = false; 948 949 bool CanMergeToLSMulti = true; 950 // On swift vldm/vstm starting with an odd register number as that needs 951 // more uops than single vldrs. 952 if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1) 953 CanMergeToLSMulti = false; 954 955 // Merge following instructions where possible. 956 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) { 957 int NewOffset = MemOps[I].Offset; 958 if (NewOffset != Offset + (int)Size) 959 break; 960 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI); 961 unsigned Reg = MO.getReg(); 962 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 963 964 // See if the current load/store may be part of a multi load/store. 965 bool PartOfLSMulti = CanMergeToLSMulti; 966 if (PartOfLSMulti) { 967 // Cannot load from SP 968 if (Reg == ARM::SP) 969 PartOfLSMulti = false; 970 // Register numbers must be in ascending order. 971 else if (RegNum <= PRegNum) 972 PartOfLSMulti = false; 973 // For VFP / NEON load/store multiples, the registers must be 974 // consecutive and within the limit on the number of registers per 975 // instruction. 976 else if (!isNotVFP && RegNum != PRegNum+1) 977 PartOfLSMulti = false; 978 } 979 // See if the current load/store may be part of a double load/store. 980 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1; 981 982 if (!PartOfLSMulti && !PartOfLSDouble) 983 break; 984 CanMergeToLSMulti &= PartOfLSMulti; 985 CanMergeToLSDouble &= PartOfLSDouble; 986 // Track MemOp with latest and earliest position (Positions are 987 // counted in reverse). 988 unsigned Position = MemOps[I].Position; 989 if (Position < MemOps[Latest].Position) 990 Latest = I; 991 else if (Position > MemOps[Earliest].Position) 992 Earliest = I; 993 // Prepare for next MemOp. 994 Offset += Size; 995 PRegNum = RegNum; 996 } 997 998 // Form a candidate from the Ops collected so far. 999 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate; 1000 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C) 1001 Candidate->Instrs.push_back(MemOps[C].MI); 1002 Candidate->LatestMIIdx = Latest - SIndex; 1003 Candidate->EarliestMIIdx = Earliest - SIndex; 1004 Candidate->InsertPos = MemOps[Latest].Position; 1005 if (Count == 1) 1006 CanMergeToLSMulti = CanMergeToLSDouble = false; 1007 Candidate->CanMergeToLSMulti = CanMergeToLSMulti; 1008 Candidate->CanMergeToLSDouble = CanMergeToLSDouble; 1009 Candidates.push_back(Candidate); 1010 // Continue after the chain. 1011 SIndex += Count; 1012 } while (SIndex < EIndex); 1013 } 1014 1015 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 1016 ARM_AM::AMSubMode Mode) { 1017 switch (Opc) { 1018 default: llvm_unreachable("Unhandled opcode!"); 1019 case ARM::LDMIA: 1020 case ARM::LDMDA: 1021 case ARM::LDMDB: 1022 case ARM::LDMIB: 1023 switch (Mode) { 1024 default: llvm_unreachable("Unhandled submode!"); 1025 case ARM_AM::ia: return ARM::LDMIA_UPD; 1026 case ARM_AM::ib: return ARM::LDMIB_UPD; 1027 case ARM_AM::da: return ARM::LDMDA_UPD; 1028 case ARM_AM::db: return ARM::LDMDB_UPD; 1029 } 1030 case ARM::STMIA: 1031 case ARM::STMDA: 1032 case ARM::STMDB: 1033 case ARM::STMIB: 1034 switch (Mode) { 1035 default: llvm_unreachable("Unhandled submode!"); 1036 case ARM_AM::ia: return ARM::STMIA_UPD; 1037 case ARM_AM::ib: return ARM::STMIB_UPD; 1038 case ARM_AM::da: return ARM::STMDA_UPD; 1039 case ARM_AM::db: return ARM::STMDB_UPD; 1040 } 1041 case ARM::t2LDMIA: 1042 case ARM::t2LDMDB: 1043 switch (Mode) { 1044 default: llvm_unreachable("Unhandled submode!"); 1045 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 1046 case ARM_AM::db: return ARM::t2LDMDB_UPD; 1047 } 1048 case ARM::t2STMIA: 1049 case ARM::t2STMDB: 1050 switch (Mode) { 1051 default: llvm_unreachable("Unhandled submode!"); 1052 case ARM_AM::ia: return ARM::t2STMIA_UPD; 1053 case ARM_AM::db: return ARM::t2STMDB_UPD; 1054 } 1055 case ARM::VLDMSIA: 1056 switch (Mode) { 1057 default: llvm_unreachable("Unhandled submode!"); 1058 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 1059 case ARM_AM::db: return ARM::VLDMSDB_UPD; 1060 } 1061 case ARM::VLDMDIA: 1062 switch (Mode) { 1063 default: llvm_unreachable("Unhandled submode!"); 1064 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 1065 case ARM_AM::db: return ARM::VLDMDDB_UPD; 1066 } 1067 case ARM::VSTMSIA: 1068 switch (Mode) { 1069 default: llvm_unreachable("Unhandled submode!"); 1070 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 1071 case ARM_AM::db: return ARM::VSTMSDB_UPD; 1072 } 1073 case ARM::VSTMDIA: 1074 switch (Mode) { 1075 default: llvm_unreachable("Unhandled submode!"); 1076 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 1077 case ARM_AM::db: return ARM::VSTMDDB_UPD; 1078 } 1079 } 1080 } 1081 1082 /// Check if the given instruction increments or decrements a register and 1083 /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags 1084 /// generated by the instruction are possibly read as well. 1085 static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg, 1086 ARMCC::CondCodes Pred, unsigned PredReg) { 1087 bool CheckCPSRDef; 1088 int Scale; 1089 switch (MI.getOpcode()) { 1090 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break; 1091 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break; 1092 case ARM::t2SUBri: 1093 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break; 1094 case ARM::t2ADDri: 1095 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break; 1096 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break; 1097 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break; 1098 default: return 0; 1099 } 1100 1101 unsigned MIPredReg; 1102 if (MI.getOperand(0).getReg() != Reg || 1103 MI.getOperand(1).getReg() != Reg || 1104 getInstrPredicate(&MI, MIPredReg) != Pred || 1105 MIPredReg != PredReg) 1106 return 0; 1107 1108 if (CheckCPSRDef && definesCPSR(&MI)) 1109 return 0; 1110 return MI.getOperand(2).getImm() * Scale; 1111 } 1112 1113 /// Searches for an increment or decrement of \p Reg before \p MBBI. 1114 static MachineBasicBlock::iterator 1115 findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg, 1116 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { 1117 Offset = 0; 1118 MachineBasicBlock &MBB = *MBBI->getParent(); 1119 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1120 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1121 if (MBBI == BeginMBBI) 1122 return EndMBBI; 1123 1124 // Skip debug values. 1125 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1126 while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI) 1127 --PrevMBBI; 1128 1129 Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg); 1130 return Offset == 0 ? EndMBBI : PrevMBBI; 1131 } 1132 1133 /// Searches for a increment or decrement of \p Reg after \p MBBI. 1134 static MachineBasicBlock::iterator 1135 findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg, 1136 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { 1137 Offset = 0; 1138 MachineBasicBlock &MBB = *MBBI->getParent(); 1139 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1140 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1141 // Skip debug values. 1142 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1143 ++NextMBBI; 1144 if (NextMBBI == EndMBBI) 1145 return EndMBBI; 1146 1147 Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg); 1148 return Offset == 0 ? EndMBBI : NextMBBI; 1149 } 1150 1151 /// Fold proceeding/trailing inc/dec of base register into the 1152 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 1153 /// 1154 /// stmia rn, <ra, rb, rc> 1155 /// rn := rn + 4 * 3; 1156 /// => 1157 /// stmia rn!, <ra, rb, rc> 1158 /// 1159 /// rn := rn - 4 * 3; 1160 /// ldmia rn, <ra, rb, rc> 1161 /// => 1162 /// ldmdb rn!, <ra, rb, rc> 1163 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { 1164 // Thumb1 is already using updating loads/stores. 1165 if (isThumb1) return false; 1166 1167 const MachineOperand &BaseOP = MI->getOperand(0); 1168 unsigned Base = BaseOP.getReg(); 1169 bool BaseKill = BaseOP.isKill(); 1170 unsigned PredReg = 0; 1171 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1172 unsigned Opcode = MI->getOpcode(); 1173 DebugLoc DL = MI->getDebugLoc(); 1174 1175 // Can't use an updating ld/st if the base register is also a dest 1176 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 1177 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 1178 if (MI->getOperand(i).getReg() == Base) 1179 return false; 1180 1181 int Bytes = getLSMultipleTransferSize(MI); 1182 MachineBasicBlock &MBB = *MI->getParent(); 1183 MachineBasicBlock::iterator MBBI(MI); 1184 int Offset; 1185 MachineBasicBlock::iterator MergeInstr 1186 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); 1187 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode); 1188 if (Mode == ARM_AM::ia && Offset == -Bytes) { 1189 Mode = ARM_AM::db; 1190 } else if (Mode == ARM_AM::ib && Offset == -Bytes) { 1191 Mode = ARM_AM::da; 1192 } else { 1193 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1194 if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) && 1195 ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) 1196 return false; 1197 } 1198 MBB.erase(MergeInstr); 1199 1200 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 1201 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1202 .addReg(Base, getDefRegState(true)) // WB base register 1203 .addReg(Base, getKillRegState(BaseKill)) 1204 .addImm(Pred).addReg(PredReg); 1205 1206 // Transfer the rest of operands. 1207 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 1208 MIB.addOperand(MI->getOperand(OpNum)); 1209 1210 // Transfer memoperands. 1211 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 1212 1213 MBB.erase(MBBI); 1214 return true; 1215 } 1216 1217 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 1218 ARM_AM::AddrOpc Mode) { 1219 switch (Opc) { 1220 case ARM::LDRi12: 1221 return ARM::LDR_PRE_IMM; 1222 case ARM::STRi12: 1223 return ARM::STR_PRE_IMM; 1224 case ARM::VLDRS: 1225 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1226 case ARM::VLDRD: 1227 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1228 case ARM::VSTRS: 1229 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1230 case ARM::VSTRD: 1231 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1232 case ARM::t2LDRi8: 1233 case ARM::t2LDRi12: 1234 return ARM::t2LDR_PRE; 1235 case ARM::t2STRi8: 1236 case ARM::t2STRi12: 1237 return ARM::t2STR_PRE; 1238 default: llvm_unreachable("Unhandled opcode!"); 1239 } 1240 } 1241 1242 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 1243 ARM_AM::AddrOpc Mode) { 1244 switch (Opc) { 1245 case ARM::LDRi12: 1246 return ARM::LDR_POST_IMM; 1247 case ARM::STRi12: 1248 return ARM::STR_POST_IMM; 1249 case ARM::VLDRS: 1250 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1251 case ARM::VLDRD: 1252 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1253 case ARM::VSTRS: 1254 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1255 case ARM::VSTRD: 1256 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1257 case ARM::t2LDRi8: 1258 case ARM::t2LDRi12: 1259 return ARM::t2LDR_POST; 1260 case ARM::t2STRi8: 1261 case ARM::t2STRi12: 1262 return ARM::t2STR_POST; 1263 default: llvm_unreachable("Unhandled opcode!"); 1264 } 1265 } 1266 1267 /// Fold proceeding/trailing inc/dec of base register into the 1268 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible: 1269 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { 1270 // Thumb1 doesn't have updating LDR/STR. 1271 // FIXME: Use LDM/STM with single register instead. 1272 if (isThumb1) return false; 1273 1274 unsigned Base = getLoadStoreBaseOp(*MI).getReg(); 1275 bool BaseKill = getLoadStoreBaseOp(*MI).isKill(); 1276 unsigned Opcode = MI->getOpcode(); 1277 DebugLoc DL = MI->getDebugLoc(); 1278 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 1279 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 1280 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 1281 if (isi32Load(Opcode) || isi32Store(Opcode)) 1282 if (MI->getOperand(2).getImm() != 0) 1283 return false; 1284 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 1285 return false; 1286 1287 // Can't do the merge if the destination register is the same as the would-be 1288 // writeback register. 1289 if (MI->getOperand(0).getReg() == Base) 1290 return false; 1291 1292 unsigned PredReg = 0; 1293 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1294 int Bytes = getLSMultipleTransferSize(MI); 1295 MachineBasicBlock &MBB = *MI->getParent(); 1296 MachineBasicBlock::iterator MBBI(MI); 1297 int Offset; 1298 MachineBasicBlock::iterator MergeInstr 1299 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); 1300 unsigned NewOpc; 1301 if (!isAM5 && Offset == Bytes) { 1302 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add); 1303 } else if (Offset == -Bytes) { 1304 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); 1305 } else { 1306 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1307 if (Offset == Bytes) { 1308 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add); 1309 } else if (!isAM5 && Offset == -Bytes) { 1310 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); 1311 } else 1312 return false; 1313 } 1314 MBB.erase(MergeInstr); 1315 1316 ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add; 1317 1318 bool isLd = isLoadSingle(Opcode); 1319 if (isAM5) { 1320 // VLDM[SD]_UPD, VSTM[SD]_UPD 1321 // (There are no base-updating versions of VLDR/VSTR instructions, but the 1322 // updating load/store-multiple instructions can be used with only one 1323 // register.) 1324 MachineOperand &MO = MI->getOperand(0); 1325 BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1326 .addReg(Base, getDefRegState(true)) // WB base register 1327 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 1328 .addImm(Pred).addReg(PredReg) 1329 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 1330 getKillRegState(MO.isKill()))); 1331 } else if (isLd) { 1332 if (isAM2) { 1333 // LDR_PRE, LDR_POST 1334 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 1335 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1336 .addReg(Base, RegState::Define) 1337 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1338 } else { 1339 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1340 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1341 .addReg(Base, RegState::Define) 1342 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); 1343 } 1344 } else { 1345 // t2LDR_PRE, t2LDR_POST 1346 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1347 .addReg(Base, RegState::Define) 1348 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1349 } 1350 } else { 1351 MachineOperand &MO = MI->getOperand(0); 1352 // FIXME: post-indexed stores use am2offset_imm, which still encodes 1353 // the vestigal zero-reg offset register. When that's fixed, this clause 1354 // can be removed entirely. 1355 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 1356 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1357 // STR_PRE, STR_POST 1358 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1359 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1360 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); 1361 } else { 1362 // t2STR_PRE, t2STR_POST 1363 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1364 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1365 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1366 } 1367 } 1368 MBB.erase(MBBI); 1369 1370 return true; 1371 } 1372 1373 bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const { 1374 unsigned Opcode = MI.getOpcode(); 1375 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) && 1376 "Must have t2STRDi8 or t2LDRDi8"); 1377 if (MI.getOperand(3).getImm() != 0) 1378 return false; 1379 1380 // Behaviour for writeback is undefined if base register is the same as one 1381 // of the others. 1382 const MachineOperand &BaseOp = MI.getOperand(2); 1383 unsigned Base = BaseOp.getReg(); 1384 const MachineOperand &Reg0Op = MI.getOperand(0); 1385 const MachineOperand &Reg1Op = MI.getOperand(1); 1386 if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base) 1387 return false; 1388 1389 unsigned PredReg; 1390 ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg); 1391 MachineBasicBlock::iterator MBBI(MI); 1392 MachineBasicBlock &MBB = *MI.getParent(); 1393 int Offset; 1394 MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred, 1395 PredReg, Offset); 1396 unsigned NewOpc; 1397 if (Offset == 8 || Offset == -8) { 1398 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE; 1399 } else { 1400 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1401 if (Offset == 8 || Offset == -8) { 1402 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST; 1403 } else 1404 return false; 1405 } 1406 MBB.erase(MergeInstr); 1407 1408 DebugLoc DL = MI.getDebugLoc(); 1409 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); 1410 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) { 1411 MIB.addOperand(Reg0Op).addOperand(Reg1Op) 1412 .addReg(BaseOp.getReg(), RegState::Define); 1413 } else { 1414 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST); 1415 MIB.addReg(BaseOp.getReg(), RegState::Define) 1416 .addOperand(Reg0Op).addOperand(Reg1Op); 1417 } 1418 MIB.addReg(BaseOp.getReg(), RegState::Kill) 1419 .addImm(Offset).addImm(Pred).addReg(PredReg); 1420 assert(TII->get(Opcode).getNumOperands() == 6 && 1421 TII->get(NewOpc).getNumOperands() == 7 && 1422 "Unexpected number of operands in Opcode specification."); 1423 1424 // Transfer implicit operands. 1425 for (const MachineOperand &MO : MI.implicit_operands()) 1426 MIB.addOperand(MO); 1427 MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); 1428 1429 MBB.erase(MBBI); 1430 return true; 1431 } 1432 1433 /// Returns true if instruction is a memory operation that this pass is capable 1434 /// of operating on. 1435 static bool isMemoryOp(const MachineInstr *MI) { 1436 // When no memory operands are present, conservatively assume unaligned, 1437 // volatile, unfoldable. 1438 if (!MI->hasOneMemOperand()) 1439 return false; 1440 1441 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1442 1443 // Don't touch volatile memory accesses - we may be changing their order. 1444 if (MMO->isVolatile()) 1445 return false; 1446 1447 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 1448 // not. 1449 if (MMO->getAlignment() < 4) 1450 return false; 1451 1452 // str <undef> could probably be eliminated entirely, but for now we just want 1453 // to avoid making a mess of it. 1454 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1455 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 1456 MI->getOperand(0).isUndef()) 1457 return false; 1458 1459 // Likewise don't mess with references to undefined addresses. 1460 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 1461 MI->getOperand(1).isUndef()) 1462 return false; 1463 1464 unsigned Opcode = MI->getOpcode(); 1465 switch (Opcode) { 1466 default: break; 1467 case ARM::VLDRS: 1468 case ARM::VSTRS: 1469 return MI->getOperand(1).isReg(); 1470 case ARM::VLDRD: 1471 case ARM::VSTRD: 1472 return MI->getOperand(1).isReg(); 1473 case ARM::LDRi12: 1474 case ARM::STRi12: 1475 case ARM::tLDRi: 1476 case ARM::tSTRi: 1477 case ARM::tLDRspi: 1478 case ARM::tSTRspi: 1479 case ARM::t2LDRi8: 1480 case ARM::t2LDRi12: 1481 case ARM::t2STRi8: 1482 case ARM::t2STRi12: 1483 return MI->getOperand(1).isReg(); 1484 } 1485 return false; 1486 } 1487 1488 static void InsertLDR_STR(MachineBasicBlock &MBB, 1489 MachineBasicBlock::iterator &MBBI, 1490 int Offset, bool isDef, 1491 DebugLoc DL, unsigned NewOpc, 1492 unsigned Reg, bool RegDeadKill, bool RegUndef, 1493 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1494 bool OffKill, bool OffUndef, 1495 ARMCC::CondCodes Pred, unsigned PredReg, 1496 const TargetInstrInfo *TII, bool isT2) { 1497 if (isDef) { 1498 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1499 TII->get(NewOpc)) 1500 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1501 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1502 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1503 } else { 1504 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1505 TII->get(NewOpc)) 1506 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1507 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1508 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1509 } 1510 } 1511 1512 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1513 MachineBasicBlock::iterator &MBBI) { 1514 MachineInstr *MI = &*MBBI; 1515 unsigned Opcode = MI->getOpcode(); 1516 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8) 1517 return false; 1518 1519 const MachineOperand &BaseOp = MI->getOperand(2); 1520 unsigned BaseReg = BaseOp.getReg(); 1521 unsigned EvenReg = MI->getOperand(0).getReg(); 1522 unsigned OddReg = MI->getOperand(1).getReg(); 1523 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1524 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1525 1526 // ARM errata 602117: LDRD with base in list may result in incorrect base 1527 // register when interrupted or faulted. 1528 bool Errata602117 = EvenReg == BaseReg && 1529 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3(); 1530 // ARM LDRD/STRD needs consecutive registers. 1531 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) && 1532 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum); 1533 1534 if (!Errata602117 && !NonConsecutiveRegs) 1535 return false; 1536 1537 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1538 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1539 bool EvenDeadKill = isLd ? 1540 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1541 bool EvenUndef = MI->getOperand(0).isUndef(); 1542 bool OddDeadKill = isLd ? 1543 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1544 bool OddUndef = MI->getOperand(1).isUndef(); 1545 bool BaseKill = BaseOp.isKill(); 1546 bool BaseUndef = BaseOp.isUndef(); 1547 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1548 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1549 int OffImm = getMemoryOpOffset(MI); 1550 unsigned PredReg = 0; 1551 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1552 1553 if (OddRegNum > EvenRegNum && OffImm == 0) { 1554 // Ascending register numbers and no offset. It's safe to change it to a 1555 // ldm or stm. 1556 unsigned NewOpc = (isLd) 1557 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1558 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1559 if (isLd) { 1560 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1561 .addReg(BaseReg, getKillRegState(BaseKill)) 1562 .addImm(Pred).addReg(PredReg) 1563 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1564 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1565 ++NumLDRD2LDM; 1566 } else { 1567 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1568 .addReg(BaseReg, getKillRegState(BaseKill)) 1569 .addImm(Pred).addReg(PredReg) 1570 .addReg(EvenReg, 1571 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1572 .addReg(OddReg, 1573 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1574 ++NumSTRD2STM; 1575 } 1576 } else { 1577 // Split into two instructions. 1578 unsigned NewOpc = (isLd) 1579 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1580 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1581 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1582 // so adjust and use t2LDRi12 here for that. 1583 unsigned NewOpc2 = (isLd) 1584 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1585 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1586 DebugLoc dl = MBBI->getDebugLoc(); 1587 // If this is a load and base register is killed, it may have been 1588 // re-defed by the load, make sure the first load does not clobber it. 1589 if (isLd && 1590 (BaseKill || OffKill) && 1591 (TRI->regsOverlap(EvenReg, BaseReg))) { 1592 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1593 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1594 OddReg, OddDeadKill, false, 1595 BaseReg, false, BaseUndef, false, OffUndef, 1596 Pred, PredReg, TII, isT2); 1597 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1598 EvenReg, EvenDeadKill, false, 1599 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1600 Pred, PredReg, TII, isT2); 1601 } else { 1602 if (OddReg == EvenReg && EvenDeadKill) { 1603 // If the two source operands are the same, the kill marker is 1604 // probably on the first one. e.g. 1605 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1606 EvenDeadKill = false; 1607 OddDeadKill = true; 1608 } 1609 // Never kill the base register in the first instruction. 1610 if (EvenReg == BaseReg) 1611 EvenDeadKill = false; 1612 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1613 EvenReg, EvenDeadKill, EvenUndef, 1614 BaseReg, false, BaseUndef, false, OffUndef, 1615 Pred, PredReg, TII, isT2); 1616 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1617 OddReg, OddDeadKill, OddUndef, 1618 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1619 Pred, PredReg, TII, isT2); 1620 } 1621 if (isLd) 1622 ++NumLDRD2LDR; 1623 else 1624 ++NumSTRD2STR; 1625 } 1626 1627 MBBI = MBB.erase(MBBI); 1628 return true; 1629 } 1630 1631 /// An optimization pass to turn multiple LDR / STR ops of the same base and 1632 /// incrementing offset into LDM / STM ops. 1633 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1634 MemOpQueue MemOps; 1635 unsigned CurrBase = 0; 1636 unsigned CurrOpc = ~0u; 1637 unsigned CurrSize = 0; 1638 ARMCC::CondCodes CurrPred = ARMCC::AL; 1639 unsigned CurrPredReg = 0; 1640 unsigned Position = 0; 1641 assert(Candidates.size() == 0); 1642 assert(MergeBaseCandidates.size() == 0); 1643 LiveRegsValid = false; 1644 1645 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin(); 1646 I = MBBI) { 1647 // The instruction in front of the iterator is the one we look at. 1648 MBBI = std::prev(I); 1649 if (FixInvalidRegPairOp(MBB, MBBI)) 1650 continue; 1651 ++Position; 1652 1653 if (isMemoryOp(MBBI)) { 1654 unsigned Opcode = MBBI->getOpcode(); 1655 unsigned Size = getLSMultipleTransferSize(MBBI); 1656 const MachineOperand &MO = MBBI->getOperand(0); 1657 unsigned Reg = MO.getReg(); 1658 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg(); 1659 unsigned PredReg = 0; 1660 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 1661 int Offset = getMemoryOpOffset(MBBI); 1662 if (CurrBase == 0) { 1663 // Start of a new chain. 1664 CurrBase = Base; 1665 CurrOpc = Opcode; 1666 CurrSize = Size; 1667 CurrPred = Pred; 1668 CurrPredReg = PredReg; 1669 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1670 continue; 1671 } 1672 // Note: No need to match PredReg in the next if. 1673 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1674 // Watch out for: 1675 // r4 := ldr [r0, #8] 1676 // r4 := ldr [r0, #4] 1677 // or 1678 // r0 := ldr [r0] 1679 // If a load overrides the base register or a register loaded by 1680 // another load in our chain, we cannot take this instruction. 1681 bool Overlap = false; 1682 if (isLoadSingle(Opcode)) { 1683 Overlap = (Base == Reg); 1684 if (!Overlap) { 1685 for (const MemOpQueueEntry &E : MemOps) { 1686 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) { 1687 Overlap = true; 1688 break; 1689 } 1690 } 1691 } 1692 } 1693 1694 if (!Overlap) { 1695 // Check offset and sort memory operation into the current chain. 1696 if (Offset > MemOps.back().Offset) { 1697 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1698 continue; 1699 } else { 1700 MemOpQueue::iterator MI, ME; 1701 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) { 1702 if (Offset < MI->Offset) { 1703 // Found a place to insert. 1704 break; 1705 } 1706 if (Offset == MI->Offset) { 1707 // Collision, abort. 1708 MI = ME; 1709 break; 1710 } 1711 } 1712 if (MI != MemOps.end()) { 1713 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position)); 1714 continue; 1715 } 1716 } 1717 } 1718 } 1719 1720 // Don't advance the iterator; The op will start a new chain next. 1721 MBBI = I; 1722 --Position; 1723 // Fallthrough to look into existing chain. 1724 } else if (MBBI->isDebugValue()) { 1725 continue; 1726 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 || 1727 MBBI->getOpcode() == ARM::t2STRDi8) { 1728 // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions 1729 // remember them because we may still be able to merge add/sub into them. 1730 MergeBaseCandidates.push_back(MBBI); 1731 } 1732 1733 1734 // If we are here then the chain is broken; Extract candidates for a merge. 1735 if (MemOps.size() > 0) { 1736 FormCandidates(MemOps); 1737 // Reset for the next chain. 1738 CurrBase = 0; 1739 CurrOpc = ~0u; 1740 CurrSize = 0; 1741 CurrPred = ARMCC::AL; 1742 CurrPredReg = 0; 1743 MemOps.clear(); 1744 } 1745 } 1746 if (MemOps.size() > 0) 1747 FormCandidates(MemOps); 1748 1749 // Sort candidates so they get processed from end to begin of the basic 1750 // block later; This is necessary for liveness calculation. 1751 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) { 1752 return M0->InsertPos < M1->InsertPos; 1753 }; 1754 std::sort(Candidates.begin(), Candidates.end(), LessThan); 1755 1756 // Go through list of candidates and merge. 1757 bool Changed = false; 1758 for (const MergeCandidate *Candidate : Candidates) { 1759 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) { 1760 MachineInstr *Merged = MergeOpsUpdate(*Candidate); 1761 // Merge preceding/trailing base inc/dec into the merged op. 1762 if (Merged) { 1763 Changed = true; 1764 unsigned Opcode = Merged->getOpcode(); 1765 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8) 1766 MergeBaseUpdateLSDouble(*Merged); 1767 else 1768 MergeBaseUpdateLSMultiple(Merged); 1769 } else { 1770 for (MachineInstr *MI : Candidate->Instrs) { 1771 if (MergeBaseUpdateLoadStore(MI)) 1772 Changed = true; 1773 } 1774 } 1775 } else { 1776 assert(Candidate->Instrs.size() == 1); 1777 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front())) 1778 Changed = true; 1779 } 1780 } 1781 Candidates.clear(); 1782 // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt. 1783 for (MachineInstr *MI : MergeBaseCandidates) 1784 MergeBaseUpdateLSDouble(*MI); 1785 MergeBaseCandidates.clear(); 1786 1787 return Changed; 1788 } 1789 1790 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr") 1791 /// into the preceding stack restore so it directly restore the value of LR 1792 /// into pc. 1793 /// ldmfd sp!, {..., lr} 1794 /// bx lr 1795 /// or 1796 /// ldmfd sp!, {..., lr} 1797 /// mov pc, lr 1798 /// => 1799 /// ldmfd sp!, {..., pc} 1800 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1801 // Thumb1 LDM doesn't allow high registers. 1802 if (isThumb1) return false; 1803 if (MBB.empty()) return false; 1804 1805 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1806 if (MBBI != MBB.begin() && 1807 (MBBI->getOpcode() == ARM::BX_RET || 1808 MBBI->getOpcode() == ARM::tBX_RET || 1809 MBBI->getOpcode() == ARM::MOVPCLR)) { 1810 MachineInstr *PrevMI = std::prev(MBBI); 1811 unsigned Opcode = PrevMI->getOpcode(); 1812 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1813 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1814 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1815 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1816 if (MO.getReg() != ARM::LR) 1817 return false; 1818 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1819 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1820 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1821 PrevMI->setDesc(TII->get(NewOpc)); 1822 MO.setReg(ARM::PC); 1823 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); 1824 MBB.erase(MBBI); 1825 return true; 1826 } 1827 } 1828 return false; 1829 } 1830 1831 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1832 MF = &Fn; 1833 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1834 TL = STI->getTargetLowering(); 1835 AFI = Fn.getInfo<ARMFunctionInfo>(); 1836 TII = STI->getInstrInfo(); 1837 TRI = STI->getRegisterInfo(); 1838 MRI = &Fn.getRegInfo(); 1839 RegClassInfoValid = false; 1840 isThumb2 = AFI->isThumb2Function(); 1841 isThumb1 = AFI->isThumbFunction() && !isThumb2; 1842 1843 bool Modified = false; 1844 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1845 ++MFI) { 1846 MachineBasicBlock &MBB = *MFI; 1847 Modified |= LoadStoreMultipleOpti(MBB); 1848 if (STI->hasV5TOps()) 1849 Modified |= MergeReturnIntoLDM(MBB); 1850 } 1851 1852 Allocator.DestroyAll(); 1853 return Modified; 1854 } 1855 1856 namespace { 1857 /// Pre- register allocation pass that move load / stores from consecutive 1858 /// locations close to make it more likely they will be combined later. 1859 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1860 static char ID; 1861 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1862 1863 const DataLayout *TD; 1864 const TargetInstrInfo *TII; 1865 const TargetRegisterInfo *TRI; 1866 const ARMSubtarget *STI; 1867 MachineRegisterInfo *MRI; 1868 MachineFunction *MF; 1869 1870 bool runOnMachineFunction(MachineFunction &Fn) override; 1871 1872 const char *getPassName() const override { 1873 return "ARM pre- register allocation load / store optimization pass"; 1874 } 1875 1876 private: 1877 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1878 unsigned &NewOpc, unsigned &EvenReg, 1879 unsigned &OddReg, unsigned &BaseReg, 1880 int &Offset, 1881 unsigned &PredReg, ARMCC::CondCodes &Pred, 1882 bool &isT2); 1883 bool RescheduleOps(MachineBasicBlock *MBB, 1884 SmallVectorImpl<MachineInstr *> &Ops, 1885 unsigned Base, bool isLd, 1886 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1887 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1888 }; 1889 char ARMPreAllocLoadStoreOpt::ID = 0; 1890 } 1891 1892 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1893 TD = Fn.getTarget().getDataLayout(); 1894 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1895 TII = STI->getInstrInfo(); 1896 TRI = STI->getRegisterInfo(); 1897 MRI = &Fn.getRegInfo(); 1898 MF = &Fn; 1899 1900 bool Modified = false; 1901 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1902 ++MFI) 1903 Modified |= RescheduleLoadStoreInstrs(MFI); 1904 1905 return Modified; 1906 } 1907 1908 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1909 MachineBasicBlock::iterator I, 1910 MachineBasicBlock::iterator E, 1911 SmallPtrSetImpl<MachineInstr*> &MemOps, 1912 SmallSet<unsigned, 4> &MemRegs, 1913 const TargetRegisterInfo *TRI) { 1914 // Are there stores / loads / calls between them? 1915 // FIXME: This is overly conservative. We should make use of alias information 1916 // some day. 1917 SmallSet<unsigned, 4> AddedRegPressure; 1918 while (++I != E) { 1919 if (I->isDebugValue() || MemOps.count(&*I)) 1920 continue; 1921 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1922 return false; 1923 if (isLd && I->mayStore()) 1924 return false; 1925 if (!isLd) { 1926 if (I->mayLoad()) 1927 return false; 1928 // It's not safe to move the first 'str' down. 1929 // str r1, [r0] 1930 // strh r5, [r0] 1931 // str r4, [r0, #+4] 1932 if (I->mayStore()) 1933 return false; 1934 } 1935 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1936 MachineOperand &MO = I->getOperand(j); 1937 if (!MO.isReg()) 1938 continue; 1939 unsigned Reg = MO.getReg(); 1940 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1941 return false; 1942 if (Reg != Base && !MemRegs.count(Reg)) 1943 AddedRegPressure.insert(Reg); 1944 } 1945 } 1946 1947 // Estimate register pressure increase due to the transformation. 1948 if (MemRegs.size() <= 4) 1949 // Ok if we are moving small number of instructions. 1950 return true; 1951 return AddedRegPressure.size() <= MemRegs.size() * 2; 1952 } 1953 1954 1955 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI. 1956 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1957 MachineInstr *Op1) { 1958 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1959 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1960 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1961 1962 MachineFunction *MF = MI->getParent()->getParent(); 1963 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1964 MachineSDNode::mmo_iterator MemEnd = 1965 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1966 MemEnd = 1967 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1968 MI->setMemRefs(MemBegin, MemEnd); 1969 } 1970 1971 bool 1972 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1973 DebugLoc &dl, unsigned &NewOpc, 1974 unsigned &FirstReg, 1975 unsigned &SecondReg, 1976 unsigned &BaseReg, int &Offset, 1977 unsigned &PredReg, 1978 ARMCC::CondCodes &Pred, 1979 bool &isT2) { 1980 // Make sure we're allowed to generate LDRD/STRD. 1981 if (!STI->hasV5TEOps()) 1982 return false; 1983 1984 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1985 unsigned Scale = 1; 1986 unsigned Opcode = Op0->getOpcode(); 1987 if (Opcode == ARM::LDRi12) { 1988 NewOpc = ARM::LDRD; 1989 } else if (Opcode == ARM::STRi12) { 1990 NewOpc = ARM::STRD; 1991 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1992 NewOpc = ARM::t2LDRDi8; 1993 Scale = 4; 1994 isT2 = true; 1995 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1996 NewOpc = ARM::t2STRDi8; 1997 Scale = 4; 1998 isT2 = true; 1999 } else { 2000 return false; 2001 } 2002 2003 // Make sure the base address satisfies i64 ld / st alignment requirement. 2004 // At the moment, we ignore the memoryoperand's value. 2005 // If we want to use AliasAnalysis, we should check it accordingly. 2006 if (!Op0->hasOneMemOperand() || 2007 (*Op0->memoperands_begin())->isVolatile()) 2008 return false; 2009 2010 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 2011 const Function *Func = MF->getFunction(); 2012 unsigned ReqAlign = STI->hasV6Ops() 2013 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 2014 : 8; // Pre-v6 need 8-byte align 2015 if (Align < ReqAlign) 2016 return false; 2017 2018 // Then make sure the immediate offset fits. 2019 int OffImm = getMemoryOpOffset(Op0); 2020 if (isT2) { 2021 int Limit = (1 << 8) * Scale; 2022 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 2023 return false; 2024 Offset = OffImm; 2025 } else { 2026 ARM_AM::AddrOpc AddSub = ARM_AM::add; 2027 if (OffImm < 0) { 2028 AddSub = ARM_AM::sub; 2029 OffImm = - OffImm; 2030 } 2031 int Limit = (1 << 8) * Scale; 2032 if (OffImm >= Limit || (OffImm & (Scale-1))) 2033 return false; 2034 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 2035 } 2036 FirstReg = Op0->getOperand(0).getReg(); 2037 SecondReg = Op1->getOperand(0).getReg(); 2038 if (FirstReg == SecondReg) 2039 return false; 2040 BaseReg = Op0->getOperand(1).getReg(); 2041 Pred = getInstrPredicate(Op0, PredReg); 2042 dl = Op0->getDebugLoc(); 2043 return true; 2044 } 2045 2046 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 2047 SmallVectorImpl<MachineInstr *> &Ops, 2048 unsigned Base, bool isLd, 2049 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 2050 bool RetVal = false; 2051 2052 // Sort by offset (in reverse order). 2053 std::sort(Ops.begin(), Ops.end(), 2054 [](const MachineInstr *LHS, const MachineInstr *RHS) { 2055 int LOffset = getMemoryOpOffset(LHS); 2056 int ROffset = getMemoryOpOffset(RHS); 2057 assert(LHS == RHS || LOffset != ROffset); 2058 return LOffset > ROffset; 2059 }); 2060 2061 // The loads / stores of the same base are in order. Scan them from first to 2062 // last and check for the following: 2063 // 1. Any def of base. 2064 // 2. Any gaps. 2065 while (Ops.size() > 1) { 2066 unsigned FirstLoc = ~0U; 2067 unsigned LastLoc = 0; 2068 MachineInstr *FirstOp = nullptr; 2069 MachineInstr *LastOp = nullptr; 2070 int LastOffset = 0; 2071 unsigned LastOpcode = 0; 2072 unsigned LastBytes = 0; 2073 unsigned NumMove = 0; 2074 for (int i = Ops.size() - 1; i >= 0; --i) { 2075 MachineInstr *Op = Ops[i]; 2076 unsigned Loc = MI2LocMap[Op]; 2077 if (Loc <= FirstLoc) { 2078 FirstLoc = Loc; 2079 FirstOp = Op; 2080 } 2081 if (Loc >= LastLoc) { 2082 LastLoc = Loc; 2083 LastOp = Op; 2084 } 2085 2086 unsigned LSMOpcode 2087 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 2088 if (LastOpcode && LSMOpcode != LastOpcode) 2089 break; 2090 2091 int Offset = getMemoryOpOffset(Op); 2092 unsigned Bytes = getLSMultipleTransferSize(Op); 2093 if (LastBytes) { 2094 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 2095 break; 2096 } 2097 LastOffset = Offset; 2098 LastBytes = Bytes; 2099 LastOpcode = LSMOpcode; 2100 if (++NumMove == 8) // FIXME: Tune this limit. 2101 break; 2102 } 2103 2104 if (NumMove <= 1) 2105 Ops.pop_back(); 2106 else { 2107 SmallPtrSet<MachineInstr*, 4> MemOps; 2108 SmallSet<unsigned, 4> MemRegs; 2109 for (int i = NumMove-1; i >= 0; --i) { 2110 MemOps.insert(Ops[i]); 2111 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 2112 } 2113 2114 // Be conservative, if the instructions are too far apart, don't 2115 // move them. We want to limit the increase of register pressure. 2116 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 2117 if (DoMove) 2118 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 2119 MemOps, MemRegs, TRI); 2120 if (!DoMove) { 2121 for (unsigned i = 0; i != NumMove; ++i) 2122 Ops.pop_back(); 2123 } else { 2124 // This is the new location for the loads / stores. 2125 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 2126 while (InsertPos != MBB->end() 2127 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 2128 ++InsertPos; 2129 2130 // If we are moving a pair of loads / stores, see if it makes sense 2131 // to try to allocate a pair of registers that can form register pairs. 2132 MachineInstr *Op0 = Ops.back(); 2133 MachineInstr *Op1 = Ops[Ops.size()-2]; 2134 unsigned FirstReg = 0, SecondReg = 0; 2135 unsigned BaseReg = 0, PredReg = 0; 2136 ARMCC::CondCodes Pred = ARMCC::AL; 2137 bool isT2 = false; 2138 unsigned NewOpc = 0; 2139 int Offset = 0; 2140 DebugLoc dl; 2141 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 2142 FirstReg, SecondReg, BaseReg, 2143 Offset, PredReg, Pred, isT2)) { 2144 Ops.pop_back(); 2145 Ops.pop_back(); 2146 2147 const MCInstrDesc &MCID = TII->get(NewOpc); 2148 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 2149 MRI->constrainRegClass(FirstReg, TRC); 2150 MRI->constrainRegClass(SecondReg, TRC); 2151 2152 // Form the pair instruction. 2153 if (isLd) { 2154 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2155 .addReg(FirstReg, RegState::Define) 2156 .addReg(SecondReg, RegState::Define) 2157 .addReg(BaseReg); 2158 // FIXME: We're converting from LDRi12 to an insn that still 2159 // uses addrmode2, so we need an explicit offset reg. It should 2160 // always by reg0 since we're transforming LDRi12s. 2161 if (!isT2) 2162 MIB.addReg(0); 2163 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2164 concatenateMemOperands(MIB, Op0, Op1); 2165 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2166 ++NumLDRDFormed; 2167 } else { 2168 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2169 .addReg(FirstReg) 2170 .addReg(SecondReg) 2171 .addReg(BaseReg); 2172 // FIXME: We're converting from LDRi12 to an insn that still 2173 // uses addrmode2, so we need an explicit offset reg. It should 2174 // always by reg0 since we're transforming STRi12s. 2175 if (!isT2) 2176 MIB.addReg(0); 2177 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2178 concatenateMemOperands(MIB, Op0, Op1); 2179 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2180 ++NumSTRDFormed; 2181 } 2182 MBB->erase(Op0); 2183 MBB->erase(Op1); 2184 2185 if (!isT2) { 2186 // Add register allocation hints to form register pairs. 2187 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg); 2188 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg); 2189 } 2190 } else { 2191 for (unsigned i = 0; i != NumMove; ++i) { 2192 MachineInstr *Op = Ops.back(); 2193 Ops.pop_back(); 2194 MBB->splice(InsertPos, MBB, Op); 2195 } 2196 } 2197 2198 NumLdStMoved += NumMove; 2199 RetVal = true; 2200 } 2201 } 2202 } 2203 2204 return RetVal; 2205 } 2206 2207 bool 2208 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 2209 bool RetVal = false; 2210 2211 DenseMap<MachineInstr*, unsigned> MI2LocMap; 2212 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 2213 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 2214 SmallVector<unsigned, 4> LdBases; 2215 SmallVector<unsigned, 4> StBases; 2216 2217 unsigned Loc = 0; 2218 MachineBasicBlock::iterator MBBI = MBB->begin(); 2219 MachineBasicBlock::iterator E = MBB->end(); 2220 while (MBBI != E) { 2221 for (; MBBI != E; ++MBBI) { 2222 MachineInstr *MI = MBBI; 2223 if (MI->isCall() || MI->isTerminator()) { 2224 // Stop at barriers. 2225 ++MBBI; 2226 break; 2227 } 2228 2229 if (!MI->isDebugValue()) 2230 MI2LocMap[MI] = ++Loc; 2231 2232 if (!isMemoryOp(MI)) 2233 continue; 2234 unsigned PredReg = 0; 2235 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 2236 continue; 2237 2238 int Opc = MI->getOpcode(); 2239 bool isLd = isLoadSingle(Opc); 2240 unsigned Base = MI->getOperand(1).getReg(); 2241 int Offset = getMemoryOpOffset(MI); 2242 2243 bool StopHere = false; 2244 if (isLd) { 2245 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2246 Base2LdsMap.find(Base); 2247 if (BI != Base2LdsMap.end()) { 2248 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2249 if (Offset == getMemoryOpOffset(BI->second[i])) { 2250 StopHere = true; 2251 break; 2252 } 2253 } 2254 if (!StopHere) 2255 BI->second.push_back(MI); 2256 } else { 2257 Base2LdsMap[Base].push_back(MI); 2258 LdBases.push_back(Base); 2259 } 2260 } else { 2261 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2262 Base2StsMap.find(Base); 2263 if (BI != Base2StsMap.end()) { 2264 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2265 if (Offset == getMemoryOpOffset(BI->second[i])) { 2266 StopHere = true; 2267 break; 2268 } 2269 } 2270 if (!StopHere) 2271 BI->second.push_back(MI); 2272 } else { 2273 Base2StsMap[Base].push_back(MI); 2274 StBases.push_back(Base); 2275 } 2276 } 2277 2278 if (StopHere) { 2279 // Found a duplicate (a base+offset combination that's seen earlier). 2280 // Backtrack. 2281 --Loc; 2282 break; 2283 } 2284 } 2285 2286 // Re-schedule loads. 2287 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 2288 unsigned Base = LdBases[i]; 2289 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 2290 if (Lds.size() > 1) 2291 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 2292 } 2293 2294 // Re-schedule stores. 2295 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 2296 unsigned Base = StBases[i]; 2297 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 2298 if (Sts.size() > 1) 2299 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 2300 } 2301 2302 if (MBBI != E) { 2303 Base2LdsMap.clear(); 2304 Base2StsMap.clear(); 2305 LdBases.clear(); 2306 StBases.clear(); 2307 } 2308 } 2309 2310 return RetVal; 2311 } 2312 2313 2314 /// Returns an instance of the load / store optimization pass. 2315 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 2316 if (PreAlloc) 2317 return new ARMPreAllocLoadStoreOpt(); 2318 return new ARMLoadStoreOpt(); 2319 } 2320