1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This file contains a pass that performs load / store related peephole 11 /// optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ARM.h" 16 #include "ARMBaseInstrInfo.h" 17 #include "ARMBaseRegisterInfo.h" 18 #include "ARMISelLowering.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMSubtarget.h" 21 #include "MCTargetDesc/ARMAddressingModes.h" 22 #include "ThumbRegisterInfo.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallSet.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/Statistic.h" 29 #include "llvm/CodeGen/MachineBasicBlock.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include "llvm/CodeGen/MachineInstr.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/RegisterClassInfo.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/CodeGen/LivePhysRegs.h" 37 #include "llvm/IR/DataLayout.h" 38 #include "llvm/IR/DerivedTypes.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/Support/Allocator.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/ErrorHandling.h" 43 #include "llvm/Support/raw_ostream.h" 44 #include "llvm/Target/TargetInstrInfo.h" 45 #include "llvm/Target/TargetMachine.h" 46 #include "llvm/Target/TargetRegisterInfo.h" 47 using namespace llvm; 48 49 #define DEBUG_TYPE "arm-ldst-opt" 50 51 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 52 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 62 63 namespace { 64 /// Post- register allocation pass the combine load / store instructions to 65 /// form ldm / stm instructions. 66 struct ARMLoadStoreOpt : public MachineFunctionPass { 67 static char ID; 68 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 69 70 const MachineFunction *MF; 71 const TargetInstrInfo *TII; 72 const TargetRegisterInfo *TRI; 73 const MachineRegisterInfo *MRI; 74 const ARMSubtarget *STI; 75 const TargetLowering *TL; 76 ARMFunctionInfo *AFI; 77 LivePhysRegs LiveRegs; 78 RegisterClassInfo RegClassInfo; 79 MachineBasicBlock::const_iterator LiveRegPos; 80 bool LiveRegsValid; 81 bool RegClassInfoValid; 82 bool isThumb1, isThumb2; 83 84 bool runOnMachineFunction(MachineFunction &Fn) override; 85 86 const char *getPassName() const override { 87 return "ARM load / store optimization pass"; 88 } 89 90 private: 91 /// A set of load/store MachineInstrs with same base register sorted by 92 /// offset. 93 struct MemOpQueueEntry { 94 MachineInstr *MI; 95 int Offset; ///< Load/Store offset. 96 unsigned Position; ///< Position as counted from end of basic block. 97 MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position) 98 : MI(MI), Offset(Offset), Position(Position) {} 99 }; 100 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 101 102 /// A set of MachineInstrs that fulfill (nearly all) conditions to get 103 /// merged into a LDM/STM. 104 struct MergeCandidate { 105 /// List of instructions ordered by load/store offset. 106 SmallVector<MachineInstr*, 4> Instrs; 107 /// Index in Instrs of the instruction being latest in the schedule. 108 unsigned LatestMIIdx; 109 /// Index in Instrs of the instruction being earliest in the schedule. 110 unsigned EarliestMIIdx; 111 /// Index into the basic block where the merged instruction will be 112 /// inserted. (See MemOpQueueEntry.Position) 113 unsigned InsertPos; 114 /// Whether the instructions can be merged into a ldm/stm instruction. 115 bool CanMergeToLSMulti; 116 /// Whether the instructions can be merged into a ldrd/strd instruction. 117 bool CanMergeToLSDouble; 118 }; 119 BumpPtrAllocator Allocator; 120 SmallVector<const MergeCandidate*,4> Candidates; 121 122 void moveLiveRegsBefore(const MachineBasicBlock &MBB, 123 MachineBasicBlock::const_iterator Before); 124 unsigned findFreeReg(const TargetRegisterClass &RegClass); 125 void UpdateBaseRegUses(MachineBasicBlock &MBB, 126 MachineBasicBlock::iterator MBBI, 127 DebugLoc DL, unsigned Base, unsigned WordOffset, 128 ARMCC::CondCodes Pred, unsigned PredReg); 129 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB, 130 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 131 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 132 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs); 133 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB, 134 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 135 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 136 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const; 137 void FormCandidates(const MemOpQueue &MemOps); 138 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand); 139 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 140 MachineBasicBlock::iterator &MBBI); 141 bool MergeBaseUpdateLoadStore(MachineInstr *MI); 142 bool MergeBaseUpdateLSMultiple(MachineInstr *MI); 143 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 144 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 145 }; 146 char ARMLoadStoreOpt::ID = 0; 147 } 148 149 static bool definesCPSR(const MachineInstr *MI) { 150 for (const auto &MO : MI->operands()) { 151 if (!MO.isReg()) 152 continue; 153 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 154 // If the instruction has live CPSR def, then it's not safe to fold it 155 // into load / store. 156 return true; 157 } 158 159 return false; 160 } 161 162 static int getMemoryOpOffset(const MachineInstr *MI) { 163 unsigned Opcode = MI->getOpcode(); 164 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 165 unsigned NumOperands = MI->getDesc().getNumOperands(); 166 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 167 168 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 169 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 170 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 171 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 172 return OffField; 173 174 // Thumb1 immediate offsets are scaled by 4 175 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi || 176 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) 177 return OffField * 4; 178 179 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 180 : ARM_AM::getAM5Offset(OffField) * 4; 181 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField) 182 : ARM_AM::getAM5Op(OffField); 183 184 if (Op == ARM_AM::sub) 185 return -Offset; 186 187 return Offset; 188 } 189 190 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) { 191 return MI.getOperand(1); 192 } 193 194 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) { 195 return MI.getOperand(0); 196 } 197 198 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) { 199 switch (Opcode) { 200 default: llvm_unreachable("Unhandled opcode!"); 201 case ARM::LDRi12: 202 ++NumLDMGened; 203 switch (Mode) { 204 default: llvm_unreachable("Unhandled submode!"); 205 case ARM_AM::ia: return ARM::LDMIA; 206 case ARM_AM::da: return ARM::LDMDA; 207 case ARM_AM::db: return ARM::LDMDB; 208 case ARM_AM::ib: return ARM::LDMIB; 209 } 210 case ARM::STRi12: 211 ++NumSTMGened; 212 switch (Mode) { 213 default: llvm_unreachable("Unhandled submode!"); 214 case ARM_AM::ia: return ARM::STMIA; 215 case ARM_AM::da: return ARM::STMDA; 216 case ARM_AM::db: return ARM::STMDB; 217 case ARM_AM::ib: return ARM::STMIB; 218 } 219 case ARM::tLDRi: 220 case ARM::tLDRspi: 221 // tLDMIA is writeback-only - unless the base register is in the input 222 // reglist. 223 ++NumLDMGened; 224 switch (Mode) { 225 default: llvm_unreachable("Unhandled submode!"); 226 case ARM_AM::ia: return ARM::tLDMIA; 227 } 228 case ARM::tSTRi: 229 case ARM::tSTRspi: 230 // There is no non-writeback tSTMIA either. 231 ++NumSTMGened; 232 switch (Mode) { 233 default: llvm_unreachable("Unhandled submode!"); 234 case ARM_AM::ia: return ARM::tSTMIA_UPD; 235 } 236 case ARM::t2LDRi8: 237 case ARM::t2LDRi12: 238 ++NumLDMGened; 239 switch (Mode) { 240 default: llvm_unreachable("Unhandled submode!"); 241 case ARM_AM::ia: return ARM::t2LDMIA; 242 case ARM_AM::db: return ARM::t2LDMDB; 243 } 244 case ARM::t2STRi8: 245 case ARM::t2STRi12: 246 ++NumSTMGened; 247 switch (Mode) { 248 default: llvm_unreachable("Unhandled submode!"); 249 case ARM_AM::ia: return ARM::t2STMIA; 250 case ARM_AM::db: return ARM::t2STMDB; 251 } 252 case ARM::VLDRS: 253 ++NumVLDMGened; 254 switch (Mode) { 255 default: llvm_unreachable("Unhandled submode!"); 256 case ARM_AM::ia: return ARM::VLDMSIA; 257 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 258 } 259 case ARM::VSTRS: 260 ++NumVSTMGened; 261 switch (Mode) { 262 default: llvm_unreachable("Unhandled submode!"); 263 case ARM_AM::ia: return ARM::VSTMSIA; 264 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 265 } 266 case ARM::VLDRD: 267 ++NumVLDMGened; 268 switch (Mode) { 269 default: llvm_unreachable("Unhandled submode!"); 270 case ARM_AM::ia: return ARM::VLDMDIA; 271 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 272 } 273 case ARM::VSTRD: 274 ++NumVSTMGened; 275 switch (Mode) { 276 default: llvm_unreachable("Unhandled submode!"); 277 case ARM_AM::ia: return ARM::VSTMDIA; 278 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 279 } 280 } 281 } 282 283 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) { 284 switch (Opcode) { 285 default: llvm_unreachable("Unhandled opcode!"); 286 case ARM::LDMIA_RET: 287 case ARM::LDMIA: 288 case ARM::LDMIA_UPD: 289 case ARM::STMIA: 290 case ARM::STMIA_UPD: 291 case ARM::tLDMIA: 292 case ARM::tLDMIA_UPD: 293 case ARM::tSTMIA_UPD: 294 case ARM::t2LDMIA_RET: 295 case ARM::t2LDMIA: 296 case ARM::t2LDMIA_UPD: 297 case ARM::t2STMIA: 298 case ARM::t2STMIA_UPD: 299 case ARM::VLDMSIA: 300 case ARM::VLDMSIA_UPD: 301 case ARM::VSTMSIA: 302 case ARM::VSTMSIA_UPD: 303 case ARM::VLDMDIA: 304 case ARM::VLDMDIA_UPD: 305 case ARM::VSTMDIA: 306 case ARM::VSTMDIA_UPD: 307 return ARM_AM::ia; 308 309 case ARM::LDMDA: 310 case ARM::LDMDA_UPD: 311 case ARM::STMDA: 312 case ARM::STMDA_UPD: 313 return ARM_AM::da; 314 315 case ARM::LDMDB: 316 case ARM::LDMDB_UPD: 317 case ARM::STMDB: 318 case ARM::STMDB_UPD: 319 case ARM::t2LDMDB: 320 case ARM::t2LDMDB_UPD: 321 case ARM::t2STMDB: 322 case ARM::t2STMDB_UPD: 323 case ARM::VLDMSDB_UPD: 324 case ARM::VSTMSDB_UPD: 325 case ARM::VLDMDDB_UPD: 326 case ARM::VSTMDDB_UPD: 327 return ARM_AM::db; 328 329 case ARM::LDMIB: 330 case ARM::LDMIB_UPD: 331 case ARM::STMIB: 332 case ARM::STMIB_UPD: 333 return ARM_AM::ib; 334 } 335 } 336 337 static bool isT1i32Load(unsigned Opc) { 338 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi; 339 } 340 341 static bool isT2i32Load(unsigned Opc) { 342 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 343 } 344 345 static bool isi32Load(unsigned Opc) { 346 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; 347 } 348 349 static bool isT1i32Store(unsigned Opc) { 350 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi; 351 } 352 353 static bool isT2i32Store(unsigned Opc) { 354 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 355 } 356 357 static bool isi32Store(unsigned Opc) { 358 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); 359 } 360 361 static bool isLoadSingle(unsigned Opc) { 362 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 363 } 364 365 static unsigned getImmScale(unsigned Opc) { 366 switch (Opc) { 367 default: llvm_unreachable("Unhandled opcode!"); 368 case ARM::tLDRi: 369 case ARM::tSTRi: 370 case ARM::tLDRspi: 371 case ARM::tSTRspi: 372 return 1; 373 case ARM::tLDRHi: 374 case ARM::tSTRHi: 375 return 2; 376 case ARM::tLDRBi: 377 case ARM::tSTRBi: 378 return 4; 379 } 380 } 381 382 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) { 383 switch (MI->getOpcode()) { 384 default: return 0; 385 case ARM::LDRi12: 386 case ARM::STRi12: 387 case ARM::tLDRi: 388 case ARM::tSTRi: 389 case ARM::tLDRspi: 390 case ARM::tSTRspi: 391 case ARM::t2LDRi8: 392 case ARM::t2LDRi12: 393 case ARM::t2STRi8: 394 case ARM::t2STRi12: 395 case ARM::VLDRS: 396 case ARM::VSTRS: 397 return 4; 398 case ARM::VLDRD: 399 case ARM::VSTRD: 400 return 8; 401 case ARM::LDMIA: 402 case ARM::LDMDA: 403 case ARM::LDMDB: 404 case ARM::LDMIB: 405 case ARM::STMIA: 406 case ARM::STMDA: 407 case ARM::STMDB: 408 case ARM::STMIB: 409 case ARM::tLDMIA: 410 case ARM::tLDMIA_UPD: 411 case ARM::tSTMIA_UPD: 412 case ARM::t2LDMIA: 413 case ARM::t2LDMDB: 414 case ARM::t2STMIA: 415 case ARM::t2STMDB: 416 case ARM::VLDMSIA: 417 case ARM::VSTMSIA: 418 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 419 case ARM::VLDMDIA: 420 case ARM::VSTMDIA: 421 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 422 } 423 } 424 425 /// Update future uses of the base register with the offset introduced 426 /// due to writeback. This function only works on Thumb1. 427 void 428 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, 429 MachineBasicBlock::iterator MBBI, 430 DebugLoc DL, unsigned Base, 431 unsigned WordOffset, 432 ARMCC::CondCodes Pred, unsigned PredReg) { 433 assert(isThumb1 && "Can only update base register uses for Thumb1!"); 434 // Start updating any instructions with immediate offsets. Insert a SUB before 435 // the first non-updateable instruction (if any). 436 for (; MBBI != MBB.end(); ++MBBI) { 437 bool InsertSub = false; 438 unsigned Opc = MBBI->getOpcode(); 439 440 if (MBBI->readsRegister(Base)) { 441 int Offset; 442 bool IsLoad = 443 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi; 444 bool IsStore = 445 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi; 446 447 if (IsLoad || IsStore) { 448 // Loads and stores with immediate offsets can be updated, but only if 449 // the new offset isn't negative. 450 // The MachineOperand containing the offset immediate is the last one 451 // before predicates. 452 MachineOperand &MO = 453 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 454 // The offsets are scaled by 1, 2 or 4 depending on the Opcode. 455 Offset = MO.getImm() - WordOffset * getImmScale(Opc); 456 457 // If storing the base register, it needs to be reset first. 458 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg(); 459 460 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) 461 MO.setImm(Offset); 462 else 463 InsertSub = true; 464 465 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && 466 !definesCPSR(MBBI)) { 467 // SUBS/ADDS using this register, with a dead def of the CPSR. 468 // Merge it with the update; if the merged offset is too large, 469 // insert a new sub instead. 470 MachineOperand &MO = 471 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 472 Offset = (Opc == ARM::tSUBi8) ? 473 MO.getImm() + WordOffset * 4 : 474 MO.getImm() - WordOffset * 4 ; 475 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) { 476 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if 477 // Offset == 0. 478 MO.setImm(Offset); 479 // The base register has now been reset, so exit early. 480 return; 481 } else { 482 InsertSub = true; 483 } 484 485 } else { 486 // Can't update the instruction. 487 InsertSub = true; 488 } 489 490 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { 491 // Since SUBS sets the condition flags, we can't place the base reset 492 // after an instruction that has a live CPSR def. 493 // The base register might also contain an argument for a function call. 494 InsertSub = true; 495 } 496 497 if (InsertSub) { 498 // An instruction above couldn't be updated, so insert a sub. 499 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 500 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 501 return; 502 } 503 504 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base)) 505 // Register got killed. Stop updating. 506 return; 507 } 508 509 // End of block was reached. 510 if (MBB.succ_size() > 0) { 511 // FIXME: Because of a bug, live registers are sometimes missing from 512 // the successor blocks' live-in sets. This means we can't trust that 513 // information and *always* have to reset at the end of a block. 514 // See PR21029. 515 if (MBBI != MBB.end()) --MBBI; 516 AddDefaultT1CC( 517 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 518 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 519 } 520 } 521 522 /// Return the first register of class \p RegClass that is not in \p Regs. 523 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) { 524 if (!RegClassInfoValid) { 525 RegClassInfo.runOnMachineFunction(*MF); 526 RegClassInfoValid = true; 527 } 528 529 for (unsigned Reg : RegClassInfo.getOrder(&RegClass)) 530 if (!LiveRegs.contains(Reg)) 531 return Reg; 532 return 0; 533 } 534 535 /// Compute live registers just before instruction \p Before (in normal schedule 536 /// direction). Computes backwards so multiple queries in the same block must 537 /// come in reverse order. 538 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB, 539 MachineBasicBlock::const_iterator Before) { 540 // Initialize if we never queried in this block. 541 if (!LiveRegsValid) { 542 LiveRegs.init(TRI); 543 LiveRegs.addLiveOuts(&MBB, true); 544 LiveRegPos = MBB.end(); 545 LiveRegsValid = true; 546 } 547 // Move backward just before the "Before" position. 548 while (LiveRegPos != Before) { 549 --LiveRegPos; 550 LiveRegs.stepBackward(*LiveRegPos); 551 } 552 } 553 554 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs, 555 unsigned Reg) { 556 for (const std::pair<unsigned, bool> &R : Regs) 557 if (R.first == Reg) 558 return true; 559 return false; 560 } 561 562 /// Create and insert a LDM or STM with Base as base register and registers in 563 /// Regs as the register operands that would be loaded / stored. It returns 564 /// true if the transformation is done. 565 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB, 566 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 567 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 568 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) { 569 unsigned NumRegs = Regs.size(); 570 assert(NumRegs > 1); 571 572 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. 573 // Compute liveness information for that register to make the decision. 574 bool SafeToClobberCPSR = !isThumb1 || 575 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) == 576 MachineBasicBlock::LQR_Dead); 577 578 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. 579 580 // Exception: If the base register is in the input reglist, Thumb1 LDM is 581 // non-writeback. 582 // It's also not possible to merge an STR of the base register in Thumb1. 583 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) { 584 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); 585 if (Opcode == ARM::tLDRi) { 586 Writeback = false; 587 } else if (Opcode == ARM::tSTRi) { 588 return nullptr; 589 } 590 } 591 592 ARM_AM::AMSubMode Mode = ARM_AM::ia; 593 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. 594 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 595 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; 596 597 if (Offset == 4 && haveIBAndDA) { 598 Mode = ARM_AM::ib; 599 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { 600 Mode = ARM_AM::da; 601 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { 602 // VLDM/VSTM do not support DB mode without also updating the base reg. 603 Mode = ARM_AM::db; 604 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { 605 // Check if this is a supported opcode before inserting instructions to 606 // calculate a new base register. 607 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr; 608 609 // If starting offset isn't zero, insert a MI to materialize a new base. 610 // But only do so if it is cost effective, i.e. merging more than two 611 // loads / stores. 612 if (NumRegs <= 2) 613 return nullptr; 614 615 // On Thumb1, it's not worth materializing a new base register without 616 // clobbering the CPSR (i.e. not using ADDS/SUBS). 617 if (!SafeToClobberCPSR) 618 return nullptr; 619 620 unsigned NewBase; 621 if (isi32Load(Opcode)) { 622 // If it is a load, then just use one of the destination register to 623 // use as the new base. 624 NewBase = Regs[NumRegs-1].first; 625 } else { 626 // Find a free register that we can use as scratch register. 627 moveLiveRegsBefore(MBB, InsertBefore); 628 // The merged instruction does not exist yet but will use several Regs if 629 // it is a Store. 630 if (!isLoadSingle(Opcode)) 631 for (const std::pair<unsigned, bool> &R : Regs) 632 LiveRegs.addReg(R.first); 633 634 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass); 635 if (NewBase == 0) 636 return nullptr; 637 } 638 639 int BaseOpc = 640 isThumb2 ? ARM::t2ADDri : 641 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi : 642 (isThumb1 && Offset < 8) ? ARM::tADDi3 : 643 isThumb1 ? ARM::tADDi8 : ARM::ADDri; 644 645 if (Offset < 0) { 646 Offset = - Offset; 647 BaseOpc = 648 isThumb2 ? ARM::t2SUBri : 649 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 : 650 isThumb1 ? ARM::tSUBi8 : ARM::SUBri; 651 } 652 653 if (!TL->isLegalAddImmediate(Offset)) 654 // FIXME: Try add with register operand? 655 return nullptr; // Probably not worth it then. 656 657 // We can only append a kill flag to the add/sub input if the value is not 658 // used in the register list of the stm as well. 659 bool KillOldBase = BaseKill && 660 (!isi32Store(Opcode) || !ContainsReg(Regs, Base)); 661 662 if (isThumb1) { 663 // Thumb1: depending on immediate size, use either 664 // ADDS NewBase, Base, #imm3 665 // or 666 // MOV NewBase, Base 667 // ADDS NewBase, #imm8. 668 if (Base != NewBase && 669 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) { 670 // Need to insert a MOV to the new base first. 671 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && 672 !STI->hasV6Ops()) { 673 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr 674 if (Pred != ARMCC::AL) 675 return nullptr; 676 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase) 677 .addReg(Base, getKillRegState(KillOldBase)); 678 } else 679 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase) 680 .addReg(Base, getKillRegState(KillOldBase)) 681 .addImm(Pred).addReg(PredReg); 682 683 // The following ADDS/SUBS becomes an update. 684 Base = NewBase; 685 KillOldBase = true; 686 } 687 if (BaseOpc == ARM::tADDrSPi) { 688 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); 689 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 690 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4) 691 .addImm(Pred).addReg(PredReg); 692 } else 693 AddDefaultT1CC( 694 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true) 695 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 696 .addImm(Pred).addReg(PredReg); 697 } else { 698 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 699 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 700 .addImm(Pred).addReg(PredReg).addReg(0); 701 } 702 Base = NewBase; 703 BaseKill = true; // New base is always killed straight away. 704 } 705 706 bool isDef = isLoadSingle(Opcode); 707 708 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with 709 // base register writeback. 710 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 711 if (!Opcode) 712 return nullptr; 713 714 // Check if a Thumb1 LDM/STM merge is safe. This is the case if: 715 // - There is no writeback (LDM of base register), 716 // - the base register is killed by the merged instruction, 717 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS 718 // to reset the base register. 719 // Otherwise, don't merge. 720 // It's safe to return here since the code to materialize a new base register 721 // above is also conditional on SafeToClobberCPSR. 722 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) 723 return nullptr; 724 725 MachineInstrBuilder MIB; 726 727 if (Writeback) { 728 if (Opcode == ARM::tLDMIA) 729 // Update tLDMIA with writeback if necessary. 730 Opcode = ARM::tLDMIA_UPD; 731 732 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 733 734 // Thumb1: we might need to set base writeback when building the MI. 735 MIB.addReg(Base, getDefRegState(true)) 736 .addReg(Base, getKillRegState(BaseKill)); 737 738 // The base isn't dead after a merged instruction with writeback. 739 // Insert a sub instruction after the newly formed instruction to reset. 740 if (!BaseKill) 741 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg); 742 743 } else { 744 // No writeback, simply build the MachineInstr. 745 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 746 MIB.addReg(Base, getKillRegState(BaseKill)); 747 } 748 749 MIB.addImm(Pred).addReg(PredReg); 750 751 for (const std::pair<unsigned, bool> &R : Regs) 752 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second)); 753 754 return MIB.getInstr(); 755 } 756 757 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB, 758 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 759 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 760 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const { 761 bool IsLoad = isi32Load(Opcode); 762 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store"); 763 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8; 764 765 assert(Regs.size() == 2); 766 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL, 767 TII->get(LoadStoreOpcode)); 768 if (IsLoad) { 769 MIB.addReg(Regs[0].first, RegState::Define) 770 .addReg(Regs[1].first, RegState::Define); 771 } else { 772 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second)) 773 .addReg(Regs[1].first, getKillRegState(Regs[1].second)); 774 } 775 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 776 return MIB.getInstr(); 777 } 778 779 /// Call MergeOps and update MemOps and merges accordingly on success. 780 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { 781 const MachineInstr *First = Cand.Instrs.front(); 782 unsigned Opcode = First->getOpcode(); 783 bool IsLoad = isLoadSingle(Opcode); 784 SmallVector<std::pair<unsigned, bool>, 8> Regs; 785 SmallVector<unsigned, 4> ImpDefs; 786 DenseSet<unsigned> KilledRegs; 787 // Determine list of registers and list of implicit super-register defs. 788 for (const MachineInstr *MI : Cand.Instrs) { 789 const MachineOperand &MO = getLoadStoreRegOp(*MI); 790 unsigned Reg = MO.getReg(); 791 bool IsKill = MO.isKill(); 792 if (IsKill) 793 KilledRegs.insert(Reg); 794 Regs.push_back(std::make_pair(Reg, IsKill)); 795 796 if (IsLoad) { 797 // Collect any implicit defs of super-registers, after merging we can't 798 // be sure anymore that we properly preserved these live ranges and must 799 // removed these implicit operands. 800 for (const MachineOperand &MO : MI->implicit_operands()) { 801 if (!MO.isReg() || !MO.isDef() || MO.isDead()) 802 continue; 803 assert(MO.isImplicit()); 804 unsigned DefReg = MO.getReg(); 805 806 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end()) 807 continue; 808 // We can ignore cases where the super-reg is read and written. 809 if (MI->readsRegister(DefReg)) 810 continue; 811 ImpDefs.push_back(DefReg); 812 } 813 } 814 } 815 816 // Attempt the merge. 817 typedef MachineBasicBlock::iterator iterator; 818 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx]; 819 iterator InsertBefore = std::next(iterator(LatestMI)); 820 MachineBasicBlock &MBB = *LatestMI->getParent(); 821 unsigned Offset = getMemoryOpOffset(First); 822 unsigned Base = getLoadStoreBaseOp(*First).getReg(); 823 bool BaseKill = LatestMI->killsRegister(Base); 824 unsigned PredReg = 0; 825 ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg); 826 DebugLoc DL = First->getDebugLoc(); 827 MachineInstr *Merged = nullptr; 828 if (Cand.CanMergeToLSDouble) 829 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill, 830 Opcode, Pred, PredReg, DL, Regs); 831 if (!Merged && Cand.CanMergeToLSMulti) 832 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill, 833 Opcode, Pred, PredReg, DL, Regs); 834 if (!Merged) 835 return nullptr; 836 837 // Determine earliest instruction that will get removed. We then keep an 838 // iterator just above it so the following erases don't invalidated it. 839 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]); 840 bool EarliestAtBegin = false; 841 if (EarliestI == MBB.begin()) { 842 EarliestAtBegin = true; 843 } else { 844 EarliestI = std::prev(EarliestI); 845 } 846 847 // Remove instructions which have been merged. 848 for (MachineInstr *MI : Cand.Instrs) 849 MBB.erase(MI); 850 851 // Determine range between the earliest removed instruction and the new one. 852 if (EarliestAtBegin) 853 EarliestI = MBB.begin(); 854 else 855 EarliestI = std::next(EarliestI); 856 auto FixupRange = make_range(EarliestI, iterator(Merged)); 857 858 if (isLoadSingle(Opcode)) { 859 // If the previous loads defined a super-reg, then we have to mark earlier 860 // operands undef; Replicate the super-reg def on the merged instruction. 861 for (MachineInstr &MI : FixupRange) { 862 for (unsigned &ImpDefReg : ImpDefs) { 863 for (MachineOperand &MO : MI.implicit_operands()) { 864 if (!MO.isReg() || MO.getReg() != ImpDefReg) 865 continue; 866 if (MO.readsReg()) 867 MO.setIsUndef(); 868 else if (MO.isDef()) 869 ImpDefReg = 0; 870 } 871 } 872 } 873 874 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged); 875 for (unsigned ImpDef : ImpDefs) 876 MIB.addReg(ImpDef, RegState::ImplicitDefine); 877 } else { 878 // Remove kill flags: We are possibly storing the values later now. 879 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD); 880 for (MachineInstr &MI : FixupRange) { 881 for (MachineOperand &MO : MI.uses()) { 882 if (!MO.isReg() || !MO.isKill()) 883 continue; 884 if (KilledRegs.count(MO.getReg())) 885 MO.setIsKill(false); 886 } 887 } 888 assert(ImpDefs.empty()); 889 } 890 891 return Merged; 892 } 893 894 static bool isValidLSDoubleOffset(int Offset) { 895 unsigned Value = abs(Offset); 896 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally 897 // multiplied by 4. 898 return (Value % 4) == 0 && Value < 1024; 899 } 900 901 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries. 902 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { 903 const MachineInstr *FirstMI = MemOps[0].MI; 904 unsigned Opcode = FirstMI->getOpcode(); 905 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 906 unsigned Size = getLSMultipleTransferSize(FirstMI); 907 // vldm / vstm limit are 32 for S variants, 16 for D variants. 908 unsigned Limit; 909 switch (Opcode) { 910 default: 911 Limit = UINT_MAX; 912 break; 913 case ARM::VSTRS: 914 Limit = 32; 915 break; 916 case ARM::VSTRD: 917 Limit = 16; 918 break; 919 case ARM::VLDRD: 920 Limit = 16; 921 break; 922 case ARM::VLDRS: 923 Limit = 32; 924 break; 925 } 926 927 unsigned SIndex = 0; 928 unsigned EIndex = MemOps.size(); 929 do { 930 // Look at the first instruction. 931 const MachineInstr *MI = MemOps[SIndex].MI; 932 int Offset = MemOps[SIndex].Offset; 933 const MachineOperand &PMO = getLoadStoreRegOp(*MI); 934 unsigned PReg = PMO.getReg(); 935 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 936 unsigned Latest = SIndex; 937 unsigned Earliest = SIndex; 938 unsigned Count = 1; 939 bool CanMergeToLSDouble = 940 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset); 941 // ARM errata 602117: LDRD with base in list may result in incorrect base 942 // register when interrupted or faulted. 943 if (STI->isCortexM3() && isi32Load(Opcode) && 944 PReg == getLoadStoreBaseOp(*MI).getReg()) 945 CanMergeToLSDouble = false; 946 947 bool CanMergeToLSMulti = true; 948 // On swift vldm/vstm starting with an odd register number as that needs 949 // more uops than single vldrs. 950 if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1) 951 CanMergeToLSMulti = false; 952 953 // Merge following instructions where possible. 954 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) { 955 int NewOffset = MemOps[I].Offset; 956 if (NewOffset != Offset + (int)Size) 957 break; 958 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI); 959 unsigned Reg = MO.getReg(); 960 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 961 962 // See if the current load/store may be part of a multi load/store. 963 bool PartOfLSMulti = CanMergeToLSMulti; 964 if (PartOfLSMulti) { 965 // Cannot load from SP 966 if (Reg == ARM::SP) 967 PartOfLSMulti = false; 968 // Register numbers must be in ascending order. 969 else if (RegNum <= PRegNum) 970 PartOfLSMulti = false; 971 // For VFP / NEON load/store multiples, the registers must be 972 // consecutive and within the limit on the number of registers per 973 // instruction. 974 else if (!isNotVFP && RegNum != PRegNum+1) 975 PartOfLSMulti = false; 976 } 977 // See if the current load/store may be part of a double load/store. 978 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1; 979 980 if (!PartOfLSMulti && !PartOfLSDouble) 981 break; 982 CanMergeToLSMulti &= PartOfLSMulti; 983 CanMergeToLSDouble &= PartOfLSDouble; 984 // Track MemOp with latest and earliest position (Positions are 985 // counted in reverse). 986 unsigned Position = MemOps[I].Position; 987 if (Position < MemOps[Latest].Position) 988 Latest = I; 989 else if (Position > MemOps[Earliest].Position) 990 Earliest = I; 991 // Prepare for next MemOp. 992 Offset += Size; 993 PRegNum = RegNum; 994 } 995 996 // Form a candidate from the Ops collected so far. 997 MergeCandidate *Candidate = new(Allocator) MergeCandidate; 998 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C) 999 Candidate->Instrs.push_back(MemOps[C].MI); 1000 Candidate->LatestMIIdx = Latest - SIndex; 1001 Candidate->EarliestMIIdx = Earliest - SIndex; 1002 Candidate->InsertPos = MemOps[Latest].Position; 1003 if (Count == 1) 1004 CanMergeToLSMulti = CanMergeToLSDouble = false; 1005 Candidate->CanMergeToLSMulti = CanMergeToLSMulti; 1006 Candidate->CanMergeToLSDouble = CanMergeToLSDouble; 1007 Candidates.push_back(Candidate); 1008 // Continue after the chain. 1009 SIndex += Count; 1010 } while (SIndex < EIndex); 1011 } 1012 1013 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 1014 unsigned Bytes, unsigned Limit, 1015 ARMCC::CondCodes Pred, unsigned PredReg) { 1016 unsigned MyPredReg = 0; 1017 if (!MI) 1018 return false; 1019 1020 bool CheckCPSRDef = false; 1021 switch (MI->getOpcode()) { 1022 default: return false; 1023 case ARM::tSUBi8: 1024 case ARM::t2SUBri: 1025 case ARM::SUBri: 1026 CheckCPSRDef = true; 1027 break; 1028 case ARM::tSUBspi: 1029 break; 1030 } 1031 1032 // Make sure the offset fits in 8 bits. 1033 if (Bytes == 0 || (Limit && Bytes >= Limit)) 1034 return false; 1035 1036 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi || 1037 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME 1038 if (!(MI->getOperand(0).getReg() == Base && 1039 MI->getOperand(1).getReg() == Base && 1040 (MI->getOperand(2).getImm() * Scale) == Bytes && 1041 getInstrPredicate(MI, MyPredReg) == Pred && 1042 MyPredReg == PredReg)) 1043 return false; 1044 1045 return CheckCPSRDef ? !definesCPSR(MI) : true; 1046 } 1047 1048 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 1049 unsigned Bytes, unsigned Limit, 1050 ARMCC::CondCodes Pred, unsigned PredReg) { 1051 unsigned MyPredReg = 0; 1052 if (!MI) 1053 return false; 1054 1055 bool CheckCPSRDef = false; 1056 switch (MI->getOpcode()) { 1057 default: return false; 1058 case ARM::tADDi8: 1059 case ARM::t2ADDri: 1060 case ARM::ADDri: 1061 CheckCPSRDef = true; 1062 break; 1063 case ARM::tADDspi: 1064 break; 1065 } 1066 1067 if (Bytes == 0 || (Limit && Bytes >= Limit)) 1068 // Make sure the offset fits in 8 bits. 1069 return false; 1070 1071 unsigned Scale = (MI->getOpcode() == ARM::tADDspi || 1072 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME 1073 if (!(MI->getOperand(0).getReg() == Base && 1074 MI->getOperand(1).getReg() == Base && 1075 (MI->getOperand(2).getImm() * Scale) == Bytes && 1076 getInstrPredicate(MI, MyPredReg) == Pred && 1077 MyPredReg == PredReg)) 1078 return false; 1079 1080 return CheckCPSRDef ? !definesCPSR(MI) : true; 1081 } 1082 1083 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 1084 ARM_AM::AMSubMode Mode) { 1085 switch (Opc) { 1086 default: llvm_unreachable("Unhandled opcode!"); 1087 case ARM::LDMIA: 1088 case ARM::LDMDA: 1089 case ARM::LDMDB: 1090 case ARM::LDMIB: 1091 switch (Mode) { 1092 default: llvm_unreachable("Unhandled submode!"); 1093 case ARM_AM::ia: return ARM::LDMIA_UPD; 1094 case ARM_AM::ib: return ARM::LDMIB_UPD; 1095 case ARM_AM::da: return ARM::LDMDA_UPD; 1096 case ARM_AM::db: return ARM::LDMDB_UPD; 1097 } 1098 case ARM::STMIA: 1099 case ARM::STMDA: 1100 case ARM::STMDB: 1101 case ARM::STMIB: 1102 switch (Mode) { 1103 default: llvm_unreachable("Unhandled submode!"); 1104 case ARM_AM::ia: return ARM::STMIA_UPD; 1105 case ARM_AM::ib: return ARM::STMIB_UPD; 1106 case ARM_AM::da: return ARM::STMDA_UPD; 1107 case ARM_AM::db: return ARM::STMDB_UPD; 1108 } 1109 case ARM::t2LDMIA: 1110 case ARM::t2LDMDB: 1111 switch (Mode) { 1112 default: llvm_unreachable("Unhandled submode!"); 1113 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 1114 case ARM_AM::db: return ARM::t2LDMDB_UPD; 1115 } 1116 case ARM::t2STMIA: 1117 case ARM::t2STMDB: 1118 switch (Mode) { 1119 default: llvm_unreachable("Unhandled submode!"); 1120 case ARM_AM::ia: return ARM::t2STMIA_UPD; 1121 case ARM_AM::db: return ARM::t2STMDB_UPD; 1122 } 1123 case ARM::VLDMSIA: 1124 switch (Mode) { 1125 default: llvm_unreachable("Unhandled submode!"); 1126 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 1127 case ARM_AM::db: return ARM::VLDMSDB_UPD; 1128 } 1129 case ARM::VLDMDIA: 1130 switch (Mode) { 1131 default: llvm_unreachable("Unhandled submode!"); 1132 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 1133 case ARM_AM::db: return ARM::VLDMDDB_UPD; 1134 } 1135 case ARM::VSTMSIA: 1136 switch (Mode) { 1137 default: llvm_unreachable("Unhandled submode!"); 1138 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 1139 case ARM_AM::db: return ARM::VSTMSDB_UPD; 1140 } 1141 case ARM::VSTMDIA: 1142 switch (Mode) { 1143 default: llvm_unreachable("Unhandled submode!"); 1144 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 1145 case ARM_AM::db: return ARM::VSTMDDB_UPD; 1146 } 1147 } 1148 } 1149 1150 /// Fold proceeding/trailing inc/dec of base register into the 1151 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 1152 /// 1153 /// stmia rn, <ra, rb, rc> 1154 /// rn := rn + 4 * 3; 1155 /// => 1156 /// stmia rn!, <ra, rb, rc> 1157 /// 1158 /// rn := rn - 4 * 3; 1159 /// ldmia rn, <ra, rb, rc> 1160 /// => 1161 /// ldmdb rn!, <ra, rb, rc> 1162 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { 1163 // Thumb1 is already using updating loads/stores. 1164 if (isThumb1) return false; 1165 1166 const MachineOperand &BaseOP = MI->getOperand(0); 1167 unsigned Base = BaseOP.getReg(); 1168 bool BaseKill = BaseOP.isKill(); 1169 unsigned Bytes = getLSMultipleTransferSize(MI); 1170 unsigned PredReg = 0; 1171 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1172 unsigned Opcode = MI->getOpcode(); 1173 DebugLoc DL = MI->getDebugLoc(); 1174 1175 // Can't use an updating ld/st if the base register is also a dest 1176 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 1177 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 1178 if (MI->getOperand(i).getReg() == Base) 1179 return false; 1180 1181 bool DoMerge = false; 1182 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode); 1183 1184 // Try merging with the previous instruction. 1185 MachineBasicBlock &MBB = *MI->getParent(); 1186 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1187 MachineBasicBlock::iterator MBBI(MI); 1188 if (MBBI != BeginMBBI) { 1189 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1190 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 1191 --PrevMBBI; 1192 if (Mode == ARM_AM::ia && 1193 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 1194 Mode = ARM_AM::db; 1195 DoMerge = true; 1196 } else if (Mode == ARM_AM::ib && 1197 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 1198 Mode = ARM_AM::da; 1199 DoMerge = true; 1200 } 1201 if (DoMerge) 1202 MBB.erase(PrevMBBI); 1203 } 1204 1205 // Try merging with the next instruction. 1206 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1207 if (!DoMerge && MBBI != EndMBBI) { 1208 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1209 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1210 ++NextMBBI; 1211 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 1212 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 1213 DoMerge = true; 1214 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 1215 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 1216 DoMerge = true; 1217 } 1218 if (DoMerge) 1219 MBB.erase(NextMBBI); 1220 } 1221 1222 if (!DoMerge) 1223 return false; 1224 1225 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 1226 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1227 .addReg(Base, getDefRegState(true)) // WB base register 1228 .addReg(Base, getKillRegState(BaseKill)) 1229 .addImm(Pred).addReg(PredReg); 1230 1231 // Transfer the rest of operands. 1232 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 1233 MIB.addOperand(MI->getOperand(OpNum)); 1234 1235 // Transfer memoperands. 1236 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 1237 1238 MBB.erase(MBBI); 1239 return true; 1240 } 1241 1242 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 1243 ARM_AM::AddrOpc Mode) { 1244 switch (Opc) { 1245 case ARM::LDRi12: 1246 return ARM::LDR_PRE_IMM; 1247 case ARM::STRi12: 1248 return ARM::STR_PRE_IMM; 1249 case ARM::VLDRS: 1250 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1251 case ARM::VLDRD: 1252 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1253 case ARM::VSTRS: 1254 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1255 case ARM::VSTRD: 1256 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1257 case ARM::t2LDRi8: 1258 case ARM::t2LDRi12: 1259 return ARM::t2LDR_PRE; 1260 case ARM::t2STRi8: 1261 case ARM::t2STRi12: 1262 return ARM::t2STR_PRE; 1263 default: llvm_unreachable("Unhandled opcode!"); 1264 } 1265 } 1266 1267 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 1268 ARM_AM::AddrOpc Mode) { 1269 switch (Opc) { 1270 case ARM::LDRi12: 1271 return ARM::LDR_POST_IMM; 1272 case ARM::STRi12: 1273 return ARM::STR_POST_IMM; 1274 case ARM::VLDRS: 1275 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1276 case ARM::VLDRD: 1277 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1278 case ARM::VSTRS: 1279 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1280 case ARM::VSTRD: 1281 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1282 case ARM::t2LDRi8: 1283 case ARM::t2LDRi12: 1284 return ARM::t2LDR_POST; 1285 case ARM::t2STRi8: 1286 case ARM::t2STRi12: 1287 return ARM::t2STR_POST; 1288 default: llvm_unreachable("Unhandled opcode!"); 1289 } 1290 } 1291 1292 /// Fold proceeding/trailing inc/dec of base register into the 1293 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible: 1294 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { 1295 // Thumb1 doesn't have updating LDR/STR. 1296 // FIXME: Use LDM/STM with single register instead. 1297 if (isThumb1) return false; 1298 1299 unsigned Base = getLoadStoreBaseOp(*MI).getReg(); 1300 bool BaseKill = getLoadStoreBaseOp(*MI).isKill(); 1301 unsigned Bytes = getLSMultipleTransferSize(MI); 1302 unsigned Opcode = MI->getOpcode(); 1303 DebugLoc DL = MI->getDebugLoc(); 1304 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 1305 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 1306 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 1307 if (isi32Load(Opcode) || isi32Store(Opcode)) 1308 if (MI->getOperand(2).getImm() != 0) 1309 return false; 1310 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 1311 return false; 1312 1313 bool isLd = isLoadSingle(Opcode); 1314 // Can't do the merge if the destination register is the same as the would-be 1315 // writeback register. 1316 if (MI->getOperand(0).getReg() == Base) 1317 return false; 1318 1319 unsigned PredReg = 0; 1320 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1321 bool DoMerge = false; 1322 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1323 unsigned NewOpc = 0; 1324 // AM2 - 12 bits, thumb2 - 8 bits. 1325 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 1326 1327 // Try merging with the previous instruction. 1328 MachineBasicBlock &MBB = *MI->getParent(); 1329 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1330 MachineBasicBlock::iterator MBBI(MI); 1331 if (MBBI != BeginMBBI) { 1332 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1333 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 1334 --PrevMBBI; 1335 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 1336 DoMerge = true; 1337 AddSub = ARM_AM::sub; 1338 } else if (!isAM5 && 1339 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 1340 DoMerge = true; 1341 } 1342 if (DoMerge) { 1343 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 1344 MBB.erase(PrevMBBI); 1345 } 1346 } 1347 1348 // Try merging with the next instruction. 1349 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1350 if (!DoMerge && MBBI != EndMBBI) { 1351 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1352 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1353 ++NextMBBI; 1354 if (!isAM5 && 1355 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 1356 DoMerge = true; 1357 AddSub = ARM_AM::sub; 1358 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 1359 DoMerge = true; 1360 } 1361 if (DoMerge) { 1362 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 1363 MBB.erase(NextMBBI); 1364 } 1365 } 1366 1367 if (!DoMerge) 1368 return false; 1369 1370 if (isAM5) { 1371 // VLDM[SD]_UPD, VSTM[SD]_UPD 1372 // (There are no base-updating versions of VLDR/VSTR instructions, but the 1373 // updating load/store-multiple instructions can be used with only one 1374 // register.) 1375 MachineOperand &MO = MI->getOperand(0); 1376 BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1377 .addReg(Base, getDefRegState(true)) // WB base register 1378 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 1379 .addImm(Pred).addReg(PredReg) 1380 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 1381 getKillRegState(MO.isKill()))); 1382 } else if (isLd) { 1383 if (isAM2) { 1384 // LDR_PRE, LDR_POST 1385 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 1386 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1387 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1388 .addReg(Base, RegState::Define) 1389 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1390 } else { 1391 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1392 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1393 .addReg(Base, RegState::Define) 1394 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1395 } 1396 } else { 1397 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1398 // t2LDR_PRE, t2LDR_POST 1399 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1400 .addReg(Base, RegState::Define) 1401 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1402 } 1403 } else { 1404 MachineOperand &MO = MI->getOperand(0); 1405 // FIXME: post-indexed stores use am2offset_imm, which still encodes 1406 // the vestigal zero-reg offset register. When that's fixed, this clause 1407 // can be removed entirely. 1408 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 1409 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1410 // STR_PRE, STR_POST 1411 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1412 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1413 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1414 } else { 1415 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1416 // t2STR_PRE, t2STR_POST 1417 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1418 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1419 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1420 } 1421 } 1422 MBB.erase(MBBI); 1423 1424 return true; 1425 } 1426 1427 /// Returns true if instruction is a memory operation that this pass is capable 1428 /// of operating on. 1429 static bool isMemoryOp(const MachineInstr *MI) { 1430 // When no memory operands are present, conservatively assume unaligned, 1431 // volatile, unfoldable. 1432 if (!MI->hasOneMemOperand()) 1433 return false; 1434 1435 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1436 1437 // Don't touch volatile memory accesses - we may be changing their order. 1438 if (MMO->isVolatile()) 1439 return false; 1440 1441 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 1442 // not. 1443 if (MMO->getAlignment() < 4) 1444 return false; 1445 1446 // str <undef> could probably be eliminated entirely, but for now we just want 1447 // to avoid making a mess of it. 1448 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1449 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 1450 MI->getOperand(0).isUndef()) 1451 return false; 1452 1453 // Likewise don't mess with references to undefined addresses. 1454 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 1455 MI->getOperand(1).isUndef()) 1456 return false; 1457 1458 unsigned Opcode = MI->getOpcode(); 1459 switch (Opcode) { 1460 default: break; 1461 case ARM::VLDRS: 1462 case ARM::VSTRS: 1463 return MI->getOperand(1).isReg(); 1464 case ARM::VLDRD: 1465 case ARM::VSTRD: 1466 return MI->getOperand(1).isReg(); 1467 case ARM::LDRi12: 1468 case ARM::STRi12: 1469 case ARM::tLDRi: 1470 case ARM::tSTRi: 1471 case ARM::tLDRspi: 1472 case ARM::tSTRspi: 1473 case ARM::t2LDRi8: 1474 case ARM::t2LDRi12: 1475 case ARM::t2STRi8: 1476 case ARM::t2STRi12: 1477 return MI->getOperand(1).isReg(); 1478 } 1479 return false; 1480 } 1481 1482 static void InsertLDR_STR(MachineBasicBlock &MBB, 1483 MachineBasicBlock::iterator &MBBI, 1484 int Offset, bool isDef, 1485 DebugLoc DL, unsigned NewOpc, 1486 unsigned Reg, bool RegDeadKill, bool RegUndef, 1487 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1488 bool OffKill, bool OffUndef, 1489 ARMCC::CondCodes Pred, unsigned PredReg, 1490 const TargetInstrInfo *TII, bool isT2) { 1491 if (isDef) { 1492 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1493 TII->get(NewOpc)) 1494 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1495 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1496 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1497 } else { 1498 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1499 TII->get(NewOpc)) 1500 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1501 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1502 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1503 } 1504 } 1505 1506 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1507 MachineBasicBlock::iterator &MBBI) { 1508 MachineInstr *MI = &*MBBI; 1509 unsigned Opcode = MI->getOpcode(); 1510 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8) 1511 return false; 1512 1513 const MachineOperand &BaseOp = MI->getOperand(2); 1514 unsigned BaseReg = BaseOp.getReg(); 1515 unsigned EvenReg = MI->getOperand(0).getReg(); 1516 unsigned OddReg = MI->getOperand(1).getReg(); 1517 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1518 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1519 1520 // ARM errata 602117: LDRD with base in list may result in incorrect base 1521 // register when interrupted or faulted. 1522 bool Errata602117 = EvenReg == BaseReg && 1523 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3(); 1524 // ARM LDRD/STRD needs consecutive registers. 1525 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) && 1526 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum); 1527 1528 if (!Errata602117 && !NonConsecutiveRegs) 1529 return false; 1530 1531 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1532 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1533 bool EvenDeadKill = isLd ? 1534 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1535 bool EvenUndef = MI->getOperand(0).isUndef(); 1536 bool OddDeadKill = isLd ? 1537 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1538 bool OddUndef = MI->getOperand(1).isUndef(); 1539 bool BaseKill = BaseOp.isKill(); 1540 bool BaseUndef = BaseOp.isUndef(); 1541 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1542 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1543 int OffImm = getMemoryOpOffset(MI); 1544 unsigned PredReg = 0; 1545 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1546 1547 if (OddRegNum > EvenRegNum && OffImm == 0) { 1548 // Ascending register numbers and no offset. It's safe to change it to a 1549 // ldm or stm. 1550 unsigned NewOpc = (isLd) 1551 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1552 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1553 if (isLd) { 1554 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1555 .addReg(BaseReg, getKillRegState(BaseKill)) 1556 .addImm(Pred).addReg(PredReg) 1557 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1558 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1559 ++NumLDRD2LDM; 1560 } else { 1561 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1562 .addReg(BaseReg, getKillRegState(BaseKill)) 1563 .addImm(Pred).addReg(PredReg) 1564 .addReg(EvenReg, 1565 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1566 .addReg(OddReg, 1567 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1568 ++NumSTRD2STM; 1569 } 1570 } else { 1571 // Split into two instructions. 1572 unsigned NewOpc = (isLd) 1573 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1574 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1575 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1576 // so adjust and use t2LDRi12 here for that. 1577 unsigned NewOpc2 = (isLd) 1578 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1579 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1580 DebugLoc dl = MBBI->getDebugLoc(); 1581 // If this is a load and base register is killed, it may have been 1582 // re-defed by the load, make sure the first load does not clobber it. 1583 if (isLd && 1584 (BaseKill || OffKill) && 1585 (TRI->regsOverlap(EvenReg, BaseReg))) { 1586 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1587 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1588 OddReg, OddDeadKill, false, 1589 BaseReg, false, BaseUndef, false, OffUndef, 1590 Pred, PredReg, TII, isT2); 1591 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1592 EvenReg, EvenDeadKill, false, 1593 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1594 Pred, PredReg, TII, isT2); 1595 } else { 1596 if (OddReg == EvenReg && EvenDeadKill) { 1597 // If the two source operands are the same, the kill marker is 1598 // probably on the first one. e.g. 1599 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1600 EvenDeadKill = false; 1601 OddDeadKill = true; 1602 } 1603 // Never kill the base register in the first instruction. 1604 if (EvenReg == BaseReg) 1605 EvenDeadKill = false; 1606 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1607 EvenReg, EvenDeadKill, EvenUndef, 1608 BaseReg, false, BaseUndef, false, OffUndef, 1609 Pred, PredReg, TII, isT2); 1610 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1611 OddReg, OddDeadKill, OddUndef, 1612 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1613 Pred, PredReg, TII, isT2); 1614 } 1615 if (isLd) 1616 ++NumLDRD2LDR; 1617 else 1618 ++NumSTRD2STR; 1619 } 1620 1621 MBBI = MBB.erase(MBBI); 1622 return true; 1623 } 1624 1625 /// An optimization pass to turn multiple LDR / STR ops of the same base and 1626 /// incrementing offset into LDM / STM ops. 1627 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1628 MemOpQueue MemOps; 1629 unsigned CurrBase = 0; 1630 unsigned CurrOpc = ~0u; 1631 unsigned CurrSize = 0; 1632 ARMCC::CondCodes CurrPred = ARMCC::AL; 1633 unsigned CurrPredReg = 0; 1634 unsigned Position = 0; 1635 assert(Candidates.size() == 0); 1636 LiveRegsValid = false; 1637 1638 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin(); 1639 I = MBBI) { 1640 // The instruction in front of the iterator is the one we look at. 1641 MBBI = std::prev(I); 1642 if (FixInvalidRegPairOp(MBB, MBBI)) 1643 continue; 1644 ++Position; 1645 1646 if (isMemoryOp(MBBI)) { 1647 unsigned Opcode = MBBI->getOpcode(); 1648 unsigned Size = getLSMultipleTransferSize(MBBI); 1649 const MachineOperand &MO = MBBI->getOperand(0); 1650 unsigned Reg = MO.getReg(); 1651 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg(); 1652 unsigned PredReg = 0; 1653 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 1654 int Offset = getMemoryOpOffset(MBBI); 1655 if (CurrBase == 0) { 1656 // Start of a new chain. 1657 CurrBase = Base; 1658 CurrOpc = Opcode; 1659 CurrSize = Size; 1660 CurrPred = Pred; 1661 CurrPredReg = PredReg; 1662 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1663 continue; 1664 } 1665 // Note: No need to match PredReg in the next if. 1666 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1667 // Watch out for: 1668 // r4 := ldr [r0, #8] 1669 // r4 := ldr [r0, #4] 1670 // or 1671 // r0 := ldr [r0] 1672 // If a load overrides the base register or a register loaded by 1673 // another load in our chain, we cannot take this instruction. 1674 bool Overlap = false; 1675 if (isLoadSingle(Opcode)) { 1676 Overlap = (Base == Reg); 1677 if (!Overlap) { 1678 for (const MemOpQueueEntry &E : MemOps) { 1679 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) { 1680 Overlap = true; 1681 break; 1682 } 1683 } 1684 } 1685 } 1686 1687 if (!Overlap) { 1688 // Check offset and sort memory operation into the current chain. 1689 if (Offset > MemOps.back().Offset) { 1690 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1691 continue; 1692 } else { 1693 MemOpQueue::iterator MI, ME; 1694 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) { 1695 if (Offset < MI->Offset) { 1696 // Found a place to insert. 1697 break; 1698 } 1699 if (Offset == MI->Offset) { 1700 // Collision, abort. 1701 MI = ME; 1702 break; 1703 } 1704 } 1705 if (MI != MemOps.end()) { 1706 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position)); 1707 continue; 1708 } 1709 } 1710 } 1711 } 1712 1713 // Don't advance the iterator; The op will start a new chain next. 1714 MBBI = I; 1715 --Position; 1716 // Fallthrough to look into existing chain. 1717 } else if (MBBI->isDebugValue()) 1718 continue; 1719 1720 // If we are here then the chain is broken; Extract candidates for a merge. 1721 if (MemOps.size() > 0) { 1722 FormCandidates(MemOps); 1723 // Reset for the next chain. 1724 CurrBase = 0; 1725 CurrOpc = ~0u; 1726 CurrSize = 0; 1727 CurrPred = ARMCC::AL; 1728 CurrPredReg = 0; 1729 MemOps.clear(); 1730 } 1731 } 1732 if (MemOps.size() > 0) 1733 FormCandidates(MemOps); 1734 1735 // Sort candidates so they get processed from end to begin of the basic 1736 // block later; This is necessary for liveness calculation. 1737 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) { 1738 return M0->InsertPos < M1->InsertPos; 1739 }; 1740 std::sort(Candidates.begin(), Candidates.end(), LessThan); 1741 1742 // Go through list of candidates and merge. 1743 bool Changed = false; 1744 for (const MergeCandidate *Candidate : Candidates) { 1745 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) { 1746 MachineInstr *Merged = MergeOpsUpdate(*Candidate); 1747 // Merge preceding/trailing base inc/dec into the merged op. 1748 if (Merged) { 1749 Changed = true; 1750 unsigned Opcode = Merged->getOpcode(); 1751 if (Opcode != ARM::t2STRDi8 && Opcode != ARM::t2LDRDi8) 1752 MergeBaseUpdateLSMultiple(Merged); 1753 } else { 1754 for (MachineInstr *MI : Candidate->Instrs) { 1755 if (MergeBaseUpdateLoadStore(MI)) 1756 Changed = true; 1757 } 1758 } 1759 } else { 1760 assert(Candidate->Instrs.size() == 1); 1761 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front())) 1762 Changed = true; 1763 } 1764 } 1765 Candidates.clear(); 1766 1767 return Changed; 1768 } 1769 1770 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr") 1771 /// into the preceding stack restore so it directly restore the value of LR 1772 /// into pc. 1773 /// ldmfd sp!, {..., lr} 1774 /// bx lr 1775 /// or 1776 /// ldmfd sp!, {..., lr} 1777 /// mov pc, lr 1778 /// => 1779 /// ldmfd sp!, {..., pc} 1780 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1781 // Thumb1 LDM doesn't allow high registers. 1782 if (isThumb1) return false; 1783 if (MBB.empty()) return false; 1784 1785 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1786 if (MBBI != MBB.begin() && 1787 (MBBI->getOpcode() == ARM::BX_RET || 1788 MBBI->getOpcode() == ARM::tBX_RET || 1789 MBBI->getOpcode() == ARM::MOVPCLR)) { 1790 MachineInstr *PrevMI = std::prev(MBBI); 1791 unsigned Opcode = PrevMI->getOpcode(); 1792 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1793 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1794 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1795 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1796 if (MO.getReg() != ARM::LR) 1797 return false; 1798 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1799 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1800 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1801 PrevMI->setDesc(TII->get(NewOpc)); 1802 MO.setReg(ARM::PC); 1803 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); 1804 MBB.erase(MBBI); 1805 return true; 1806 } 1807 } 1808 return false; 1809 } 1810 1811 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1812 MF = &Fn; 1813 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1814 TL = STI->getTargetLowering(); 1815 AFI = Fn.getInfo<ARMFunctionInfo>(); 1816 TII = STI->getInstrInfo(); 1817 TRI = STI->getRegisterInfo(); 1818 MRI = &Fn.getRegInfo(); 1819 RegClassInfoValid = false; 1820 isThumb2 = AFI->isThumb2Function(); 1821 isThumb1 = AFI->isThumbFunction() && !isThumb2; 1822 1823 bool Modified = false; 1824 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1825 ++MFI) { 1826 MachineBasicBlock &MBB = *MFI; 1827 Modified |= LoadStoreMultipleOpti(MBB); 1828 if (STI->hasV5TOps()) 1829 Modified |= MergeReturnIntoLDM(MBB); 1830 } 1831 1832 Allocator.Reset(); 1833 return Modified; 1834 } 1835 1836 namespace { 1837 /// Pre- register allocation pass that move load / stores from consecutive 1838 /// locations close to make it more likely they will be combined later. 1839 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1840 static char ID; 1841 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1842 1843 const DataLayout *TD; 1844 const TargetInstrInfo *TII; 1845 const TargetRegisterInfo *TRI; 1846 const ARMSubtarget *STI; 1847 MachineRegisterInfo *MRI; 1848 MachineFunction *MF; 1849 1850 bool runOnMachineFunction(MachineFunction &Fn) override; 1851 1852 const char *getPassName() const override { 1853 return "ARM pre- register allocation load / store optimization pass"; 1854 } 1855 1856 private: 1857 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1858 unsigned &NewOpc, unsigned &EvenReg, 1859 unsigned &OddReg, unsigned &BaseReg, 1860 int &Offset, 1861 unsigned &PredReg, ARMCC::CondCodes &Pred, 1862 bool &isT2); 1863 bool RescheduleOps(MachineBasicBlock *MBB, 1864 SmallVectorImpl<MachineInstr *> &Ops, 1865 unsigned Base, bool isLd, 1866 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1867 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1868 }; 1869 char ARMPreAllocLoadStoreOpt::ID = 0; 1870 } 1871 1872 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1873 TD = Fn.getTarget().getDataLayout(); 1874 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1875 TII = STI->getInstrInfo(); 1876 TRI = STI->getRegisterInfo(); 1877 MRI = &Fn.getRegInfo(); 1878 MF = &Fn; 1879 1880 bool Modified = false; 1881 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1882 ++MFI) 1883 Modified |= RescheduleLoadStoreInstrs(MFI); 1884 1885 return Modified; 1886 } 1887 1888 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1889 MachineBasicBlock::iterator I, 1890 MachineBasicBlock::iterator E, 1891 SmallPtrSetImpl<MachineInstr*> &MemOps, 1892 SmallSet<unsigned, 4> &MemRegs, 1893 const TargetRegisterInfo *TRI) { 1894 // Are there stores / loads / calls between them? 1895 // FIXME: This is overly conservative. We should make use of alias information 1896 // some day. 1897 SmallSet<unsigned, 4> AddedRegPressure; 1898 while (++I != E) { 1899 if (I->isDebugValue() || MemOps.count(&*I)) 1900 continue; 1901 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1902 return false; 1903 if (isLd && I->mayStore()) 1904 return false; 1905 if (!isLd) { 1906 if (I->mayLoad()) 1907 return false; 1908 // It's not safe to move the first 'str' down. 1909 // str r1, [r0] 1910 // strh r5, [r0] 1911 // str r4, [r0, #+4] 1912 if (I->mayStore()) 1913 return false; 1914 } 1915 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1916 MachineOperand &MO = I->getOperand(j); 1917 if (!MO.isReg()) 1918 continue; 1919 unsigned Reg = MO.getReg(); 1920 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1921 return false; 1922 if (Reg != Base && !MemRegs.count(Reg)) 1923 AddedRegPressure.insert(Reg); 1924 } 1925 } 1926 1927 // Estimate register pressure increase due to the transformation. 1928 if (MemRegs.size() <= 4) 1929 // Ok if we are moving small number of instructions. 1930 return true; 1931 return AddedRegPressure.size() <= MemRegs.size() * 2; 1932 } 1933 1934 1935 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI. 1936 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1937 MachineInstr *Op1) { 1938 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1939 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1940 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1941 1942 MachineFunction *MF = MI->getParent()->getParent(); 1943 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1944 MachineSDNode::mmo_iterator MemEnd = 1945 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1946 MemEnd = 1947 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1948 MI->setMemRefs(MemBegin, MemEnd); 1949 } 1950 1951 bool 1952 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1953 DebugLoc &dl, unsigned &NewOpc, 1954 unsigned &FirstReg, 1955 unsigned &SecondReg, 1956 unsigned &BaseReg, int &Offset, 1957 unsigned &PredReg, 1958 ARMCC::CondCodes &Pred, 1959 bool &isT2) { 1960 // Make sure we're allowed to generate LDRD/STRD. 1961 if (!STI->hasV5TEOps()) 1962 return false; 1963 1964 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1965 unsigned Scale = 1; 1966 unsigned Opcode = Op0->getOpcode(); 1967 if (Opcode == ARM::LDRi12) { 1968 NewOpc = ARM::LDRD; 1969 } else if (Opcode == ARM::STRi12) { 1970 NewOpc = ARM::STRD; 1971 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1972 NewOpc = ARM::t2LDRDi8; 1973 Scale = 4; 1974 isT2 = true; 1975 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1976 NewOpc = ARM::t2STRDi8; 1977 Scale = 4; 1978 isT2 = true; 1979 } else { 1980 return false; 1981 } 1982 1983 // Make sure the base address satisfies i64 ld / st alignment requirement. 1984 // At the moment, we ignore the memoryoperand's value. 1985 // If we want to use AliasAnalysis, we should check it accordingly. 1986 if (!Op0->hasOneMemOperand() || 1987 (*Op0->memoperands_begin())->isVolatile()) 1988 return false; 1989 1990 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1991 const Function *Func = MF->getFunction(); 1992 unsigned ReqAlign = STI->hasV6Ops() 1993 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1994 : 8; // Pre-v6 need 8-byte align 1995 if (Align < ReqAlign) 1996 return false; 1997 1998 // Then make sure the immediate offset fits. 1999 int OffImm = getMemoryOpOffset(Op0); 2000 if (isT2) { 2001 int Limit = (1 << 8) * Scale; 2002 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 2003 return false; 2004 Offset = OffImm; 2005 } else { 2006 ARM_AM::AddrOpc AddSub = ARM_AM::add; 2007 if (OffImm < 0) { 2008 AddSub = ARM_AM::sub; 2009 OffImm = - OffImm; 2010 } 2011 int Limit = (1 << 8) * Scale; 2012 if (OffImm >= Limit || (OffImm & (Scale-1))) 2013 return false; 2014 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 2015 } 2016 FirstReg = Op0->getOperand(0).getReg(); 2017 SecondReg = Op1->getOperand(0).getReg(); 2018 if (FirstReg == SecondReg) 2019 return false; 2020 BaseReg = Op0->getOperand(1).getReg(); 2021 Pred = getInstrPredicate(Op0, PredReg); 2022 dl = Op0->getDebugLoc(); 2023 return true; 2024 } 2025 2026 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 2027 SmallVectorImpl<MachineInstr *> &Ops, 2028 unsigned Base, bool isLd, 2029 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 2030 bool RetVal = false; 2031 2032 // Sort by offset (in reverse order). 2033 std::sort(Ops.begin(), Ops.end(), 2034 [](const MachineInstr *LHS, const MachineInstr *RHS) { 2035 int LOffset = getMemoryOpOffset(LHS); 2036 int ROffset = getMemoryOpOffset(RHS); 2037 assert(LHS == RHS || LOffset != ROffset); 2038 return LOffset > ROffset; 2039 }); 2040 2041 // The loads / stores of the same base are in order. Scan them from first to 2042 // last and check for the following: 2043 // 1. Any def of base. 2044 // 2. Any gaps. 2045 while (Ops.size() > 1) { 2046 unsigned FirstLoc = ~0U; 2047 unsigned LastLoc = 0; 2048 MachineInstr *FirstOp = nullptr; 2049 MachineInstr *LastOp = nullptr; 2050 int LastOffset = 0; 2051 unsigned LastOpcode = 0; 2052 unsigned LastBytes = 0; 2053 unsigned NumMove = 0; 2054 for (int i = Ops.size() - 1; i >= 0; --i) { 2055 MachineInstr *Op = Ops[i]; 2056 unsigned Loc = MI2LocMap[Op]; 2057 if (Loc <= FirstLoc) { 2058 FirstLoc = Loc; 2059 FirstOp = Op; 2060 } 2061 if (Loc >= LastLoc) { 2062 LastLoc = Loc; 2063 LastOp = Op; 2064 } 2065 2066 unsigned LSMOpcode 2067 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 2068 if (LastOpcode && LSMOpcode != LastOpcode) 2069 break; 2070 2071 int Offset = getMemoryOpOffset(Op); 2072 unsigned Bytes = getLSMultipleTransferSize(Op); 2073 if (LastBytes) { 2074 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 2075 break; 2076 } 2077 LastOffset = Offset; 2078 LastBytes = Bytes; 2079 LastOpcode = LSMOpcode; 2080 if (++NumMove == 8) // FIXME: Tune this limit. 2081 break; 2082 } 2083 2084 if (NumMove <= 1) 2085 Ops.pop_back(); 2086 else { 2087 SmallPtrSet<MachineInstr*, 4> MemOps; 2088 SmallSet<unsigned, 4> MemRegs; 2089 for (int i = NumMove-1; i >= 0; --i) { 2090 MemOps.insert(Ops[i]); 2091 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 2092 } 2093 2094 // Be conservative, if the instructions are too far apart, don't 2095 // move them. We want to limit the increase of register pressure. 2096 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 2097 if (DoMove) 2098 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 2099 MemOps, MemRegs, TRI); 2100 if (!DoMove) { 2101 for (unsigned i = 0; i != NumMove; ++i) 2102 Ops.pop_back(); 2103 } else { 2104 // This is the new location for the loads / stores. 2105 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 2106 while (InsertPos != MBB->end() 2107 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 2108 ++InsertPos; 2109 2110 // If we are moving a pair of loads / stores, see if it makes sense 2111 // to try to allocate a pair of registers that can form register pairs. 2112 MachineInstr *Op0 = Ops.back(); 2113 MachineInstr *Op1 = Ops[Ops.size()-2]; 2114 unsigned FirstReg = 0, SecondReg = 0; 2115 unsigned BaseReg = 0, PredReg = 0; 2116 ARMCC::CondCodes Pred = ARMCC::AL; 2117 bool isT2 = false; 2118 unsigned NewOpc = 0; 2119 int Offset = 0; 2120 DebugLoc dl; 2121 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 2122 FirstReg, SecondReg, BaseReg, 2123 Offset, PredReg, Pred, isT2)) { 2124 Ops.pop_back(); 2125 Ops.pop_back(); 2126 2127 const MCInstrDesc &MCID = TII->get(NewOpc); 2128 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 2129 MRI->constrainRegClass(FirstReg, TRC); 2130 MRI->constrainRegClass(SecondReg, TRC); 2131 2132 // Form the pair instruction. 2133 if (isLd) { 2134 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2135 .addReg(FirstReg, RegState::Define) 2136 .addReg(SecondReg, RegState::Define) 2137 .addReg(BaseReg); 2138 // FIXME: We're converting from LDRi12 to an insn that still 2139 // uses addrmode2, so we need an explicit offset reg. It should 2140 // always by reg0 since we're transforming LDRi12s. 2141 if (!isT2) 2142 MIB.addReg(0); 2143 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2144 concatenateMemOperands(MIB, Op0, Op1); 2145 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2146 ++NumLDRDFormed; 2147 } else { 2148 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2149 .addReg(FirstReg) 2150 .addReg(SecondReg) 2151 .addReg(BaseReg); 2152 // FIXME: We're converting from LDRi12 to an insn that still 2153 // uses addrmode2, so we need an explicit offset reg. It should 2154 // always by reg0 since we're transforming STRi12s. 2155 if (!isT2) 2156 MIB.addReg(0); 2157 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2158 concatenateMemOperands(MIB, Op0, Op1); 2159 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2160 ++NumSTRDFormed; 2161 } 2162 MBB->erase(Op0); 2163 MBB->erase(Op1); 2164 2165 if (!isT2) { 2166 // Add register allocation hints to form register pairs. 2167 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg); 2168 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg); 2169 } 2170 } else { 2171 for (unsigned i = 0; i != NumMove; ++i) { 2172 MachineInstr *Op = Ops.back(); 2173 Ops.pop_back(); 2174 MBB->splice(InsertPos, MBB, Op); 2175 } 2176 } 2177 2178 NumLdStMoved += NumMove; 2179 RetVal = true; 2180 } 2181 } 2182 } 2183 2184 return RetVal; 2185 } 2186 2187 bool 2188 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 2189 bool RetVal = false; 2190 2191 DenseMap<MachineInstr*, unsigned> MI2LocMap; 2192 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 2193 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 2194 SmallVector<unsigned, 4> LdBases; 2195 SmallVector<unsigned, 4> StBases; 2196 2197 unsigned Loc = 0; 2198 MachineBasicBlock::iterator MBBI = MBB->begin(); 2199 MachineBasicBlock::iterator E = MBB->end(); 2200 while (MBBI != E) { 2201 for (; MBBI != E; ++MBBI) { 2202 MachineInstr *MI = MBBI; 2203 if (MI->isCall() || MI->isTerminator()) { 2204 // Stop at barriers. 2205 ++MBBI; 2206 break; 2207 } 2208 2209 if (!MI->isDebugValue()) 2210 MI2LocMap[MI] = ++Loc; 2211 2212 if (!isMemoryOp(MI)) 2213 continue; 2214 unsigned PredReg = 0; 2215 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 2216 continue; 2217 2218 int Opc = MI->getOpcode(); 2219 bool isLd = isLoadSingle(Opc); 2220 unsigned Base = MI->getOperand(1).getReg(); 2221 int Offset = getMemoryOpOffset(MI); 2222 2223 bool StopHere = false; 2224 if (isLd) { 2225 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2226 Base2LdsMap.find(Base); 2227 if (BI != Base2LdsMap.end()) { 2228 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2229 if (Offset == getMemoryOpOffset(BI->second[i])) { 2230 StopHere = true; 2231 break; 2232 } 2233 } 2234 if (!StopHere) 2235 BI->second.push_back(MI); 2236 } else { 2237 Base2LdsMap[Base].push_back(MI); 2238 LdBases.push_back(Base); 2239 } 2240 } else { 2241 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2242 Base2StsMap.find(Base); 2243 if (BI != Base2StsMap.end()) { 2244 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2245 if (Offset == getMemoryOpOffset(BI->second[i])) { 2246 StopHere = true; 2247 break; 2248 } 2249 } 2250 if (!StopHere) 2251 BI->second.push_back(MI); 2252 } else { 2253 Base2StsMap[Base].push_back(MI); 2254 StBases.push_back(Base); 2255 } 2256 } 2257 2258 if (StopHere) { 2259 // Found a duplicate (a base+offset combination that's seen earlier). 2260 // Backtrack. 2261 --Loc; 2262 break; 2263 } 2264 } 2265 2266 // Re-schedule loads. 2267 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 2268 unsigned Base = LdBases[i]; 2269 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 2270 if (Lds.size() > 1) 2271 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 2272 } 2273 2274 // Re-schedule stores. 2275 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 2276 unsigned Base = StBases[i]; 2277 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 2278 if (Sts.size() > 1) 2279 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 2280 } 2281 2282 if (MBBI != E) { 2283 Base2LdsMap.clear(); 2284 Base2StsMap.clear(); 2285 LdBases.clear(); 2286 StBases.clear(); 2287 } 2288 } 2289 2290 return RetVal; 2291 } 2292 2293 2294 /// Returns an instance of the load / store optimization pass. 2295 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 2296 if (PreAlloc) 2297 return new ARMPreAllocLoadStoreOpt(); 2298 return new ARMLoadStoreOpt(); 2299 } 2300