1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This file contains a pass that performs load / store related peephole 11 /// optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ARM.h" 16 #include "ARMBaseInstrInfo.h" 17 #include "ARMBaseRegisterInfo.h" 18 #include "ARMISelLowering.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMSubtarget.h" 21 #include "MCTargetDesc/ARMAddressingModes.h" 22 #include "ThumbRegisterInfo.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallSet.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/Statistic.h" 29 #include "llvm/CodeGen/MachineBasicBlock.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include "llvm/CodeGen/MachineInstr.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/RegisterScavenging.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/IR/DataLayout.h" 37 #include "llvm/IR/DerivedTypes.h" 38 #include "llvm/IR/Function.h" 39 #include "llvm/Support/Debug.h" 40 #include "llvm/Support/ErrorHandling.h" 41 #include "llvm/Support/raw_ostream.h" 42 #include "llvm/Target/TargetInstrInfo.h" 43 #include "llvm/Target/TargetMachine.h" 44 #include "llvm/Target/TargetRegisterInfo.h" 45 using namespace llvm; 46 47 #define DEBUG_TYPE "arm-ldst-opt" 48 49 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 50 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 51 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 52 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 53 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 54 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 55 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 56 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 57 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 58 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 59 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 60 61 namespace { 62 /// Post- register allocation pass the combine load / store instructions to 63 /// form ldm / stm instructions. 64 struct ARMLoadStoreOpt : public MachineFunctionPass { 65 static char ID; 66 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 67 68 const TargetInstrInfo *TII; 69 const TargetRegisterInfo *TRI; 70 const ARMSubtarget *STI; 71 const TargetLowering *TL; 72 ARMFunctionInfo *AFI; 73 RegScavenger *RS; 74 bool isThumb1, isThumb2; 75 76 bool runOnMachineFunction(MachineFunction &Fn) override; 77 78 const char *getPassName() const override { 79 return "ARM load / store optimization pass"; 80 } 81 82 private: 83 struct MemOpQueueEntry { 84 int Offset; 85 unsigned Reg; 86 bool isKill; 87 unsigned Position; 88 MachineBasicBlock::iterator MBBI; 89 bool Merged; 90 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 91 MachineBasicBlock::iterator i) 92 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 93 }; 94 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 95 typedef MemOpQueue::iterator MemOpQueueIter; 96 97 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, 98 const MemOpQueue &MemOps, unsigned DefReg, 99 unsigned RangeBegin, unsigned RangeEnd); 100 void UpdateBaseRegUses(MachineBasicBlock &MBB, 101 MachineBasicBlock::iterator MBBI, 102 DebugLoc dl, unsigned Base, unsigned WordOffset, 103 ARMCC::CondCodes Pred, unsigned PredReg); 104 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 105 int Offset, unsigned Base, bool BaseKill, unsigned Opcode, 106 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 107 DebugLoc dl, 108 ArrayRef<std::pair<unsigned, bool> > Regs, 109 ArrayRef<unsigned> ImpDefs); 110 void MergeOpsUpdate(MachineBasicBlock &MBB, 111 MemOpQueue &MemOps, 112 unsigned memOpsBegin, 113 unsigned memOpsEnd, 114 unsigned insertAfter, 115 int Offset, 116 unsigned Base, 117 bool BaseKill, 118 unsigned Opcode, 119 ARMCC::CondCodes Pred, 120 unsigned PredReg, 121 unsigned Scratch, 122 DebugLoc dl, 123 SmallVectorImpl<MachineBasicBlock::iterator> &Merges); 124 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 125 unsigned Opcode, unsigned Size, 126 ARMCC::CondCodes Pred, unsigned PredReg, 127 unsigned Scratch, MemOpQueue &MemOps, 128 SmallVectorImpl<MachineBasicBlock::iterator> &Merges); 129 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 130 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 131 MachineBasicBlock::iterator &MBBI); 132 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 133 MachineBasicBlock::iterator MBBI, 134 const TargetInstrInfo *TII, 135 bool &Advance, 136 MachineBasicBlock::iterator &I); 137 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 138 MachineBasicBlock::iterator MBBI, 139 bool &Advance, 140 MachineBasicBlock::iterator &I); 141 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 142 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 143 }; 144 char ARMLoadStoreOpt::ID = 0; 145 } 146 147 static bool definesCPSR(const MachineInstr *MI) { 148 for (const auto &MO : MI->operands()) { 149 if (!MO.isReg()) 150 continue; 151 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 152 // If the instruction has live CPSR def, then it's not safe to fold it 153 // into load / store. 154 return true; 155 } 156 157 return false; 158 } 159 160 static int getMemoryOpOffset(const MachineInstr *MI) { 161 unsigned Opcode = MI->getOpcode(); 162 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 163 unsigned NumOperands = MI->getDesc().getNumOperands(); 164 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 165 166 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 167 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 168 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 169 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 170 return OffField; 171 172 // Thumb1 immediate offsets are scaled by 4 173 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi || 174 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) 175 return OffField * 4; 176 177 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 178 : ARM_AM::getAM5Offset(OffField) * 4; 179 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField) 180 : ARM_AM::getAM5Op(OffField); 181 182 if (Op == ARM_AM::sub) 183 return -Offset; 184 185 return Offset; 186 } 187 188 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) { 189 switch (Opcode) { 190 default: llvm_unreachable("Unhandled opcode!"); 191 case ARM::LDRi12: 192 ++NumLDMGened; 193 switch (Mode) { 194 default: llvm_unreachable("Unhandled submode!"); 195 case ARM_AM::ia: return ARM::LDMIA; 196 case ARM_AM::da: return ARM::LDMDA; 197 case ARM_AM::db: return ARM::LDMDB; 198 case ARM_AM::ib: return ARM::LDMIB; 199 } 200 case ARM::STRi12: 201 ++NumSTMGened; 202 switch (Mode) { 203 default: llvm_unreachable("Unhandled submode!"); 204 case ARM_AM::ia: return ARM::STMIA; 205 case ARM_AM::da: return ARM::STMDA; 206 case ARM_AM::db: return ARM::STMDB; 207 case ARM_AM::ib: return ARM::STMIB; 208 } 209 case ARM::tLDRi: 210 case ARM::tLDRspi: 211 // tLDMIA is writeback-only - unless the base register is in the input 212 // reglist. 213 ++NumLDMGened; 214 switch (Mode) { 215 default: llvm_unreachable("Unhandled submode!"); 216 case ARM_AM::ia: return ARM::tLDMIA; 217 } 218 case ARM::tSTRi: 219 case ARM::tSTRspi: 220 // There is no non-writeback tSTMIA either. 221 ++NumSTMGened; 222 switch (Mode) { 223 default: llvm_unreachable("Unhandled submode!"); 224 case ARM_AM::ia: return ARM::tSTMIA_UPD; 225 } 226 case ARM::t2LDRi8: 227 case ARM::t2LDRi12: 228 ++NumLDMGened; 229 switch (Mode) { 230 default: llvm_unreachable("Unhandled submode!"); 231 case ARM_AM::ia: return ARM::t2LDMIA; 232 case ARM_AM::db: return ARM::t2LDMDB; 233 } 234 case ARM::t2STRi8: 235 case ARM::t2STRi12: 236 ++NumSTMGened; 237 switch (Mode) { 238 default: llvm_unreachable("Unhandled submode!"); 239 case ARM_AM::ia: return ARM::t2STMIA; 240 case ARM_AM::db: return ARM::t2STMDB; 241 } 242 case ARM::VLDRS: 243 ++NumVLDMGened; 244 switch (Mode) { 245 default: llvm_unreachable("Unhandled submode!"); 246 case ARM_AM::ia: return ARM::VLDMSIA; 247 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 248 } 249 case ARM::VSTRS: 250 ++NumVSTMGened; 251 switch (Mode) { 252 default: llvm_unreachable("Unhandled submode!"); 253 case ARM_AM::ia: return ARM::VSTMSIA; 254 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 255 } 256 case ARM::VLDRD: 257 ++NumVLDMGened; 258 switch (Mode) { 259 default: llvm_unreachable("Unhandled submode!"); 260 case ARM_AM::ia: return ARM::VLDMDIA; 261 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 262 } 263 case ARM::VSTRD: 264 ++NumVSTMGened; 265 switch (Mode) { 266 default: llvm_unreachable("Unhandled submode!"); 267 case ARM_AM::ia: return ARM::VSTMDIA; 268 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 269 } 270 } 271 } 272 273 namespace llvm { 274 namespace ARM_AM { 275 276 AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) { 277 switch (Opcode) { 278 default: llvm_unreachable("Unhandled opcode!"); 279 case ARM::LDMIA_RET: 280 case ARM::LDMIA: 281 case ARM::LDMIA_UPD: 282 case ARM::STMIA: 283 case ARM::STMIA_UPD: 284 case ARM::tLDMIA: 285 case ARM::tLDMIA_UPD: 286 case ARM::tSTMIA_UPD: 287 case ARM::t2LDMIA_RET: 288 case ARM::t2LDMIA: 289 case ARM::t2LDMIA_UPD: 290 case ARM::t2STMIA: 291 case ARM::t2STMIA_UPD: 292 case ARM::VLDMSIA: 293 case ARM::VLDMSIA_UPD: 294 case ARM::VSTMSIA: 295 case ARM::VSTMSIA_UPD: 296 case ARM::VLDMDIA: 297 case ARM::VLDMDIA_UPD: 298 case ARM::VSTMDIA: 299 case ARM::VSTMDIA_UPD: 300 return ARM_AM::ia; 301 302 case ARM::LDMDA: 303 case ARM::LDMDA_UPD: 304 case ARM::STMDA: 305 case ARM::STMDA_UPD: 306 return ARM_AM::da; 307 308 case ARM::LDMDB: 309 case ARM::LDMDB_UPD: 310 case ARM::STMDB: 311 case ARM::STMDB_UPD: 312 case ARM::t2LDMDB: 313 case ARM::t2LDMDB_UPD: 314 case ARM::t2STMDB: 315 case ARM::t2STMDB_UPD: 316 case ARM::VLDMSDB_UPD: 317 case ARM::VSTMSDB_UPD: 318 case ARM::VLDMDDB_UPD: 319 case ARM::VSTMDDB_UPD: 320 return ARM_AM::db; 321 322 case ARM::LDMIB: 323 case ARM::LDMIB_UPD: 324 case ARM::STMIB: 325 case ARM::STMIB_UPD: 326 return ARM_AM::ib; 327 } 328 } 329 330 } // end namespace ARM_AM 331 } // end namespace llvm 332 333 static bool isT1i32Load(unsigned Opc) { 334 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi; 335 } 336 337 static bool isT2i32Load(unsigned Opc) { 338 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 339 } 340 341 static bool isi32Load(unsigned Opc) { 342 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; 343 } 344 345 static bool isT1i32Store(unsigned Opc) { 346 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi; 347 } 348 349 static bool isT2i32Store(unsigned Opc) { 350 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 351 } 352 353 static bool isi32Store(unsigned Opc) { 354 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); 355 } 356 357 static unsigned getImmScale(unsigned Opc) { 358 switch (Opc) { 359 default: llvm_unreachable("Unhandled opcode!"); 360 case ARM::tLDRi: 361 case ARM::tSTRi: 362 case ARM::tLDRspi: 363 case ARM::tSTRspi: 364 return 1; 365 case ARM::tLDRHi: 366 case ARM::tSTRHi: 367 return 2; 368 case ARM::tLDRBi: 369 case ARM::tSTRBi: 370 return 4; 371 } 372 } 373 374 /// Update future uses of the base register with the offset introduced 375 /// due to writeback. This function only works on Thumb1. 376 void 377 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, 378 MachineBasicBlock::iterator MBBI, 379 DebugLoc dl, unsigned Base, 380 unsigned WordOffset, 381 ARMCC::CondCodes Pred, unsigned PredReg) { 382 assert(isThumb1 && "Can only update base register uses for Thumb1!"); 383 // Start updating any instructions with immediate offsets. Insert a SUB before 384 // the first non-updateable instruction (if any). 385 for (; MBBI != MBB.end(); ++MBBI) { 386 bool InsertSub = false; 387 unsigned Opc = MBBI->getOpcode(); 388 389 if (MBBI->readsRegister(Base)) { 390 int Offset; 391 bool IsLoad = 392 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi; 393 bool IsStore = 394 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi; 395 396 if (IsLoad || IsStore) { 397 // Loads and stores with immediate offsets can be updated, but only if 398 // the new offset isn't negative. 399 // The MachineOperand containing the offset immediate is the last one 400 // before predicates. 401 MachineOperand &MO = 402 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 403 // The offsets are scaled by 1, 2 or 4 depending on the Opcode. 404 Offset = MO.getImm() - WordOffset * getImmScale(Opc); 405 406 // If storing the base register, it needs to be reset first. 407 unsigned InstrSrcReg = MBBI->getOperand(0).getReg(); 408 409 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) 410 MO.setImm(Offset); 411 else 412 InsertSub = true; 413 414 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && 415 !definesCPSR(MBBI)) { 416 // SUBS/ADDS using this register, with a dead def of the CPSR. 417 // Merge it with the update; if the merged offset is too large, 418 // insert a new sub instead. 419 MachineOperand &MO = 420 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 421 Offset = (Opc == ARM::tSUBi8) ? 422 MO.getImm() + WordOffset * 4 : 423 MO.getImm() - WordOffset * 4 ; 424 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) { 425 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if 426 // Offset == 0. 427 MO.setImm(Offset); 428 // The base register has now been reset, so exit early. 429 return; 430 } else { 431 InsertSub = true; 432 } 433 434 } else { 435 // Can't update the instruction. 436 InsertSub = true; 437 } 438 439 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { 440 // Since SUBS sets the condition flags, we can't place the base reset 441 // after an instruction that has a live CPSR def. 442 // The base register might also contain an argument for a function call. 443 InsertSub = true; 444 } 445 446 if (InsertSub) { 447 // An instruction above couldn't be updated, so insert a sub. 448 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true) 449 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 450 return; 451 } 452 453 if (MBBI->killsRegister(Base)) 454 // Register got killed. Stop updating. 455 return; 456 } 457 458 // End of block was reached. 459 if (MBB.succ_size() > 0) { 460 // FIXME: Because of a bug, live registers are sometimes missing from 461 // the successor blocks' live-in sets. This means we can't trust that 462 // information and *always* have to reset at the end of a block. 463 // See PR21029. 464 if (MBBI != MBB.end()) --MBBI; 465 AddDefaultT1CC( 466 BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true) 467 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 468 } 469 } 470 471 /// Create and insert a LDM or STM with Base as base register and registers in 472 /// Regs as the register operands that would be loaded / stored. It returns 473 /// true if the transformation is done. 474 bool 475 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 476 MachineBasicBlock::iterator MBBI, 477 int Offset, unsigned Base, bool BaseKill, 478 unsigned Opcode, ARMCC::CondCodes Pred, 479 unsigned PredReg, unsigned Scratch, DebugLoc dl, 480 ArrayRef<std::pair<unsigned, bool> > Regs, 481 ArrayRef<unsigned> ImpDefs) { 482 // Only a single register to load / store. Don't bother. 483 unsigned NumRegs = Regs.size(); 484 if (NumRegs <= 1) 485 return false; 486 487 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. 488 // Compute liveness information for that register to make the decision. 489 bool SafeToClobberCPSR = !isThumb1 || 490 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) == 491 MachineBasicBlock::LQR_Dead); 492 493 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. 494 495 // Exception: If the base register is in the input reglist, Thumb1 LDM is 496 // non-writeback. 497 // It's also not possible to merge an STR of the base register in Thumb1. 498 if (isThumb1) 499 for (const std::pair<unsigned, bool> &R : Regs) 500 if (Base == R.first) { 501 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); 502 if (Opcode == ARM::tLDRi) { 503 Writeback = false; 504 break; 505 } else if (Opcode == ARM::tSTRi) { 506 return false; 507 } 508 } 509 510 ARM_AM::AMSubMode Mode = ARM_AM::ia; 511 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. 512 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 513 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; 514 515 if (Offset == 4 && haveIBAndDA) { 516 Mode = ARM_AM::ib; 517 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { 518 Mode = ARM_AM::da; 519 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { 520 // VLDM/VSTM do not support DB mode without also updating the base reg. 521 Mode = ARM_AM::db; 522 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { 523 // Check if this is a supported opcode before inserting instructions to 524 // calculate a new base register. 525 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; 526 527 // If starting offset isn't zero, insert a MI to materialize a new base. 528 // But only do so if it is cost effective, i.e. merging more than two 529 // loads / stores. 530 if (NumRegs <= 2) 531 return false; 532 533 // On Thumb1, it's not worth materializing a new base register without 534 // clobbering the CPSR (i.e. not using ADDS/SUBS). 535 if (!SafeToClobberCPSR) 536 return false; 537 538 unsigned NewBase; 539 if (isi32Load(Opcode)) { 540 // If it is a load, then just use one of the destination register to 541 // use as the new base. 542 NewBase = Regs[NumRegs-1].first; 543 } else { 544 // Use the scratch register to use as a new base. 545 NewBase = Scratch; 546 if (NewBase == 0) 547 return false; 548 } 549 550 int BaseOpc = 551 isThumb2 ? ARM::t2ADDri : 552 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi : 553 (isThumb1 && Offset < 8) ? ARM::tADDi3 : 554 isThumb1 ? ARM::tADDi8 : ARM::ADDri; 555 556 if (Offset < 0) { 557 Offset = - Offset; 558 BaseOpc = 559 isThumb2 ? ARM::t2SUBri : 560 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 : 561 isThumb1 ? ARM::tSUBi8 : ARM::SUBri; 562 } 563 564 if (!TL->isLegalAddImmediate(Offset)) 565 // FIXME: Try add with register operand? 566 return false; // Probably not worth it then. 567 568 if (isThumb1) { 569 // Thumb1: depending on immediate size, use either 570 // ADDS NewBase, Base, #imm3 571 // or 572 // MOV NewBase, Base 573 // ADDS NewBase, #imm8. 574 if (Base != NewBase && 575 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) { 576 // Need to insert a MOV to the new base first. 577 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && 578 !STI->hasV6Ops()) { 579 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr 580 if (Pred != ARMCC::AL) 581 return false; 582 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase) 583 .addReg(Base, getKillRegState(BaseKill)); 584 } else 585 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) 586 .addReg(Base, getKillRegState(BaseKill)) 587 .addImm(Pred).addReg(PredReg); 588 589 // Set up BaseKill and Base correctly to insert the ADDS/SUBS below. 590 Base = NewBase; 591 BaseKill = false; 592 } 593 if (BaseOpc == ARM::tADDrSPi) { 594 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); 595 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 596 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4) 597 .addImm(Pred).addReg(PredReg); 598 } else 599 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true) 600 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 601 .addImm(Pred).addReg(PredReg); 602 } else { 603 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 604 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 605 .addImm(Pred).addReg(PredReg).addReg(0); 606 } 607 Base = NewBase; 608 BaseKill = true; // New base is always killed straight away. 609 } 610 611 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 612 Opcode == ARM::VLDRD); 613 614 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with 615 // base register writeback. 616 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 617 if (!Opcode) return false; 618 619 // Check if a Thumb1 LDM/STM merge is safe. This is the case if: 620 // - There is no writeback (LDM of base register), 621 // - the base register is killed by the merged instruction, 622 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS 623 // to reset the base register. 624 // Otherwise, don't merge. 625 // It's safe to return here since the code to materialize a new base register 626 // above is also conditional on SafeToClobberCPSR. 627 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) 628 return false; 629 630 MachineInstrBuilder MIB; 631 632 if (Writeback) { 633 if (Opcode == ARM::tLDMIA) 634 // Update tLDMIA with writeback if necessary. 635 Opcode = ARM::tLDMIA_UPD; 636 637 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); 638 639 // Thumb1: we might need to set base writeback when building the MI. 640 MIB.addReg(Base, getDefRegState(true)) 641 .addReg(Base, getKillRegState(BaseKill)); 642 643 // The base isn't dead after a merged instruction with writeback. 644 // Insert a sub instruction after the newly formed instruction to reset. 645 if (!BaseKill) 646 UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg); 647 648 } else { 649 // No writeback, simply build the MachineInstr. 650 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); 651 MIB.addReg(Base, getKillRegState(BaseKill)); 652 } 653 654 MIB.addImm(Pred).addReg(PredReg); 655 656 for (const std::pair<unsigned, bool> &R : Regs) 657 MIB = MIB.addReg(R.first, getDefRegState(isDef) 658 | getKillRegState(R.second)); 659 660 // Add implicit defs for super-registers. 661 for (unsigned ImpDef : ImpDefs) 662 MIB.addReg(ImpDef, RegState::ImplicitDefine); 663 664 return true; 665 } 666 667 /// Find all instructions using a given imp-def within a range. 668 /// 669 /// We are trying to combine a range of instructions, one of which (located at 670 /// position RangeBegin) implicitly defines a register. The final LDM/STM will 671 /// be placed at RangeEnd, and so any uses of this definition between RangeStart 672 /// and RangeEnd must be modified to use an undefined value. 673 /// 674 /// The live range continues until we find a second definition or one of the 675 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so 676 /// we must consider all uses and decide which are relevant in a second pass. 677 void ARMLoadStoreOpt::findUsesOfImpDef( 678 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps, 679 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) { 680 std::map<unsigned, MachineOperand *> Uses; 681 unsigned LastLivePos = RangeEnd; 682 683 // First we find all uses of this register with Position between RangeBegin 684 // and RangeEnd, any or all of these could be uses of a definition at 685 // RangeBegin. We also record the latest position a definition at RangeBegin 686 // would be considered live. 687 for (unsigned i = 0; i < MemOps.size(); ++i) { 688 MachineInstr &MI = *MemOps[i].MBBI; 689 unsigned MIPosition = MemOps[i].Position; 690 if (MIPosition <= RangeBegin || MIPosition > RangeEnd) 691 continue; 692 693 // If this instruction defines the register, then any later use will be of 694 // that definition rather than ours. 695 if (MI.definesRegister(DefReg)) 696 LastLivePos = std::min(LastLivePos, MIPosition); 697 698 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg); 699 if (!UseOp) 700 continue; 701 702 // If this instruction kills the register then (assuming liveness is 703 // correct when we start) we don't need to think about anything after here. 704 if (UseOp->isKill()) 705 LastLivePos = std::min(LastLivePos, MIPosition); 706 707 Uses[MIPosition] = UseOp; 708 } 709 710 // Now we traverse the list of all uses, and append the ones that actually use 711 // our definition to the requested list. 712 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(), 713 E = Uses.end(); 714 I != E; ++I) { 715 // List is sorted by position so once we've found one out of range there 716 // will be no more to consider. 717 if (I->first > LastLivePos) 718 break; 719 UsesOfImpDefs.push_back(I->second); 720 } 721 } 722 723 /// Call MergeOps and update MemOps and merges accordingly on success. 724 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 725 MemOpQueue &memOps, 726 unsigned memOpsBegin, unsigned memOpsEnd, 727 unsigned insertAfter, int Offset, 728 unsigned Base, bool BaseKill, 729 unsigned Opcode, 730 ARMCC::CondCodes Pred, unsigned PredReg, 731 unsigned Scratch, 732 DebugLoc dl, 733 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { 734 // First calculate which of the registers should be killed by the merged 735 // instruction. 736 const unsigned insertPos = memOps[insertAfter].Position; 737 SmallSet<unsigned, 4> KilledRegs; 738 DenseMap<unsigned, unsigned> Killer; 739 for (unsigned i = 0, e = memOps.size(); i != e; ++i) { 740 if (i == memOpsBegin) { 741 i = memOpsEnd; 742 if (i == e) 743 break; 744 } 745 if (memOps[i].Position < insertPos && memOps[i].isKill) { 746 unsigned Reg = memOps[i].Reg; 747 KilledRegs.insert(Reg); 748 Killer[Reg] = i; 749 } 750 } 751 752 SmallVector<std::pair<unsigned, bool>, 8> Regs; 753 SmallVector<unsigned, 8> ImpDefs; 754 SmallVector<MachineOperand *, 8> UsesOfImpDefs; 755 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 756 unsigned Reg = memOps[i].Reg; 757 // If we are inserting the merged operation after an operation that 758 // uses the same register, make sure to transfer any kill flag. 759 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 760 Regs.push_back(std::make_pair(Reg, isKill)); 761 762 // Collect any implicit defs of super-registers. They must be preserved. 763 for (const MachineOperand &MO : memOps[i].MBBI->operands()) { 764 if (!MO.isReg() || !MO.isDef() || !MO.isImplicit() || MO.isDead()) 765 continue; 766 unsigned DefReg = MO.getReg(); 767 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end()) 768 ImpDefs.push_back(DefReg); 769 770 // There may be other uses of the definition between this instruction and 771 // the eventual LDM/STM position. These should be marked undef if the 772 // merge takes place. 773 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position, 774 insertPos); 775 } 776 } 777 778 // Try to do the merge. 779 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 780 ++Loc; 781 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 782 Pred, PredReg, Scratch, dl, Regs, ImpDefs)) 783 return; 784 785 // Merge succeeded, update records. 786 Merges.push_back(std::prev(Loc)); 787 788 // In gathering loads together, we may have moved the imp-def of a register 789 // past one of its uses. This is OK, since we know better than the rest of 790 // LLVM what's OK with ARM loads and stores; but we still have to adjust the 791 // affected uses. 792 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(), 793 E = UsesOfImpDefs.end(); 794 I != E; ++I) 795 (*I)->setIsUndef(); 796 797 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 798 // Remove kill flags from any memops that come before insertPos. 799 if (Regs[i-memOpsBegin].second) { 800 unsigned Reg = Regs[i-memOpsBegin].first; 801 if (KilledRegs.count(Reg)) { 802 unsigned j = Killer[Reg]; 803 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true); 804 assert(Idx >= 0 && "Cannot find killing operand"); 805 memOps[j].MBBI->getOperand(Idx).setIsKill(false); 806 memOps[j].isKill = false; 807 } 808 memOps[i].isKill = true; 809 } 810 MBB.erase(memOps[i].MBBI); 811 // Update this memop to refer to the merged instruction. 812 // We may need to move kill flags again. 813 memOps[i].Merged = true; 814 memOps[i].MBBI = Merges.back(); 815 memOps[i].Position = insertPos; 816 } 817 818 // Update memOps offsets, since they may have been modified by MergeOps. 819 for (auto &MemOp : memOps) { 820 MemOp.Offset = getMemoryOpOffset(MemOp.MBBI); 821 } 822 } 823 824 /// Merge a number of load / store instructions into one or more load / store 825 /// multiple instructions. 826 void 827 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 828 unsigned Base, unsigned Opcode, unsigned Size, 829 ARMCC::CondCodes Pred, unsigned PredReg, 830 unsigned Scratch, MemOpQueue &MemOps, 831 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { 832 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 833 int Offset = MemOps[SIndex].Offset; 834 int SOffset = Offset; 835 unsigned insertAfter = SIndex; 836 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 837 DebugLoc dl = Loc->getDebugLoc(); 838 const MachineOperand &PMO = Loc->getOperand(0); 839 unsigned PReg = PMO.getReg(); 840 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 841 unsigned Count = 1; 842 unsigned Limit = ~0U; 843 bool BaseKill = false; 844 // vldm / vstm limit are 32 for S variants, 16 for D variants. 845 846 switch (Opcode) { 847 default: break; 848 case ARM::VSTRS: 849 Limit = 32; 850 break; 851 case ARM::VSTRD: 852 Limit = 16; 853 break; 854 case ARM::VLDRD: 855 Limit = 16; 856 break; 857 case ARM::VLDRS: 858 Limit = 32; 859 break; 860 } 861 862 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 863 int NewOffset = MemOps[i].Offset; 864 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 865 unsigned Reg = MO.getReg(); 866 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 867 // Register numbers must be in ascending order. For VFP / NEON load and 868 // store multiples, the registers must also be consecutive and within the 869 // limit on the number of registers per instruction. 870 if (Reg != ARM::SP && 871 NewOffset == Offset + (int)Size && 872 ((isNotVFP && RegNum > PRegNum) || 873 ((Count < Limit) && RegNum == PRegNum+1)) && 874 // On Swift we don't want vldm/vstm to start with a odd register num 875 // because Q register unaligned vldm/vstm need more uops. 876 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) { 877 Offset += Size; 878 PRegNum = RegNum; 879 ++Count; 880 } else { 881 // Can't merge this in. Try merge the earlier ones first. 882 // We need to compute BaseKill here because the MemOps may have been 883 // reordered. 884 BaseKill = Loc->killsRegister(Base); 885 886 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base, 887 BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 888 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 889 MemOps, Merges); 890 return; 891 } 892 893 if (MemOps[i].Position > MemOps[insertAfter].Position) { 894 insertAfter = i; 895 Loc = MemOps[i].MBBI; 896 } 897 } 898 899 BaseKill = Loc->killsRegister(Base); 900 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 901 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 902 } 903 904 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 905 unsigned Bytes, unsigned Limit, 906 ARMCC::CondCodes Pred, unsigned PredReg) { 907 unsigned MyPredReg = 0; 908 if (!MI) 909 return false; 910 911 bool CheckCPSRDef = false; 912 switch (MI->getOpcode()) { 913 default: return false; 914 case ARM::tSUBi8: 915 case ARM::t2SUBri: 916 case ARM::SUBri: 917 CheckCPSRDef = true; 918 break; 919 case ARM::tSUBspi: 920 break; 921 } 922 923 // Make sure the offset fits in 8 bits. 924 if (Bytes == 0 || (Limit && Bytes >= Limit)) 925 return false; 926 927 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi || 928 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME 929 if (!(MI->getOperand(0).getReg() == Base && 930 MI->getOperand(1).getReg() == Base && 931 (MI->getOperand(2).getImm() * Scale) == Bytes && 932 getInstrPredicate(MI, MyPredReg) == Pred && 933 MyPredReg == PredReg)) 934 return false; 935 936 return CheckCPSRDef ? !definesCPSR(MI) : true; 937 } 938 939 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 940 unsigned Bytes, unsigned Limit, 941 ARMCC::CondCodes Pred, unsigned PredReg) { 942 unsigned MyPredReg = 0; 943 if (!MI) 944 return false; 945 946 bool CheckCPSRDef = false; 947 switch (MI->getOpcode()) { 948 default: return false; 949 case ARM::tADDi8: 950 case ARM::t2ADDri: 951 case ARM::ADDri: 952 CheckCPSRDef = true; 953 break; 954 case ARM::tADDspi: 955 break; 956 } 957 958 if (Bytes == 0 || (Limit && Bytes >= Limit)) 959 // Make sure the offset fits in 8 bits. 960 return false; 961 962 unsigned Scale = (MI->getOpcode() == ARM::tADDspi || 963 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME 964 if (!(MI->getOperand(0).getReg() == Base && 965 MI->getOperand(1).getReg() == Base && 966 (MI->getOperand(2).getImm() * Scale) == Bytes && 967 getInstrPredicate(MI, MyPredReg) == Pred && 968 MyPredReg == PredReg)) 969 return false; 970 971 return CheckCPSRDef ? !definesCPSR(MI) : true; 972 } 973 974 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 975 switch (MI->getOpcode()) { 976 default: return 0; 977 case ARM::LDRi12: 978 case ARM::STRi12: 979 case ARM::tLDRi: 980 case ARM::tSTRi: 981 case ARM::tLDRspi: 982 case ARM::tSTRspi: 983 case ARM::t2LDRi8: 984 case ARM::t2LDRi12: 985 case ARM::t2STRi8: 986 case ARM::t2STRi12: 987 case ARM::VLDRS: 988 case ARM::VSTRS: 989 return 4; 990 case ARM::VLDRD: 991 case ARM::VSTRD: 992 return 8; 993 case ARM::LDMIA: 994 case ARM::LDMDA: 995 case ARM::LDMDB: 996 case ARM::LDMIB: 997 case ARM::STMIA: 998 case ARM::STMDA: 999 case ARM::STMDB: 1000 case ARM::STMIB: 1001 case ARM::tLDMIA: 1002 case ARM::tLDMIA_UPD: 1003 case ARM::tSTMIA_UPD: 1004 case ARM::t2LDMIA: 1005 case ARM::t2LDMDB: 1006 case ARM::t2STMIA: 1007 case ARM::t2STMDB: 1008 case ARM::VLDMSIA: 1009 case ARM::VSTMSIA: 1010 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 1011 case ARM::VLDMDIA: 1012 case ARM::VSTMDIA: 1013 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 1014 } 1015 } 1016 1017 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 1018 ARM_AM::AMSubMode Mode) { 1019 switch (Opc) { 1020 default: llvm_unreachable("Unhandled opcode!"); 1021 case ARM::LDMIA: 1022 case ARM::LDMDA: 1023 case ARM::LDMDB: 1024 case ARM::LDMIB: 1025 switch (Mode) { 1026 default: llvm_unreachable("Unhandled submode!"); 1027 case ARM_AM::ia: return ARM::LDMIA_UPD; 1028 case ARM_AM::ib: return ARM::LDMIB_UPD; 1029 case ARM_AM::da: return ARM::LDMDA_UPD; 1030 case ARM_AM::db: return ARM::LDMDB_UPD; 1031 } 1032 case ARM::STMIA: 1033 case ARM::STMDA: 1034 case ARM::STMDB: 1035 case ARM::STMIB: 1036 switch (Mode) { 1037 default: llvm_unreachable("Unhandled submode!"); 1038 case ARM_AM::ia: return ARM::STMIA_UPD; 1039 case ARM_AM::ib: return ARM::STMIB_UPD; 1040 case ARM_AM::da: return ARM::STMDA_UPD; 1041 case ARM_AM::db: return ARM::STMDB_UPD; 1042 } 1043 case ARM::t2LDMIA: 1044 case ARM::t2LDMDB: 1045 switch (Mode) { 1046 default: llvm_unreachable("Unhandled submode!"); 1047 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 1048 case ARM_AM::db: return ARM::t2LDMDB_UPD; 1049 } 1050 case ARM::t2STMIA: 1051 case ARM::t2STMDB: 1052 switch (Mode) { 1053 default: llvm_unreachable("Unhandled submode!"); 1054 case ARM_AM::ia: return ARM::t2STMIA_UPD; 1055 case ARM_AM::db: return ARM::t2STMDB_UPD; 1056 } 1057 case ARM::VLDMSIA: 1058 switch (Mode) { 1059 default: llvm_unreachable("Unhandled submode!"); 1060 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 1061 case ARM_AM::db: return ARM::VLDMSDB_UPD; 1062 } 1063 case ARM::VLDMDIA: 1064 switch (Mode) { 1065 default: llvm_unreachable("Unhandled submode!"); 1066 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 1067 case ARM_AM::db: return ARM::VLDMDDB_UPD; 1068 } 1069 case ARM::VSTMSIA: 1070 switch (Mode) { 1071 default: llvm_unreachable("Unhandled submode!"); 1072 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 1073 case ARM_AM::db: return ARM::VSTMSDB_UPD; 1074 } 1075 case ARM::VSTMDIA: 1076 switch (Mode) { 1077 default: llvm_unreachable("Unhandled submode!"); 1078 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 1079 case ARM_AM::db: return ARM::VSTMDDB_UPD; 1080 } 1081 } 1082 } 1083 1084 /// Fold proceeding/trailing inc/dec of base register into the 1085 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 1086 /// 1087 /// stmia rn, <ra, rb, rc> 1088 /// rn := rn + 4 * 3; 1089 /// => 1090 /// stmia rn!, <ra, rb, rc> 1091 /// 1092 /// rn := rn - 4 * 3; 1093 /// ldmia rn, <ra, rb, rc> 1094 /// => 1095 /// ldmdb rn!, <ra, rb, rc> 1096 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 1097 MachineBasicBlock::iterator MBBI, 1098 bool &Advance, 1099 MachineBasicBlock::iterator &I) { 1100 // Thumb1 is already using updating loads/stores. 1101 if (isThumb1) return false; 1102 1103 MachineInstr *MI = MBBI; 1104 unsigned Base = MI->getOperand(0).getReg(); 1105 bool BaseKill = MI->getOperand(0).isKill(); 1106 unsigned Bytes = getLSMultipleTransferSize(MI); 1107 unsigned PredReg = 0; 1108 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1109 unsigned Opcode = MI->getOpcode(); 1110 DebugLoc dl = MI->getDebugLoc(); 1111 1112 // Can't use an updating ld/st if the base register is also a dest 1113 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 1114 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 1115 if (MI->getOperand(i).getReg() == Base) 1116 return false; 1117 1118 bool DoMerge = false; 1119 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); 1120 1121 // Try merging with the previous instruction. 1122 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1123 if (MBBI != BeginMBBI) { 1124 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1125 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 1126 --PrevMBBI; 1127 if (Mode == ARM_AM::ia && 1128 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 1129 Mode = ARM_AM::db; 1130 DoMerge = true; 1131 } else if (Mode == ARM_AM::ib && 1132 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 1133 Mode = ARM_AM::da; 1134 DoMerge = true; 1135 } 1136 if (DoMerge) 1137 MBB.erase(PrevMBBI); 1138 } 1139 1140 // Try merging with the next instruction. 1141 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1142 if (!DoMerge && MBBI != EndMBBI) { 1143 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1144 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1145 ++NextMBBI; 1146 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 1147 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 1148 DoMerge = true; 1149 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 1150 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 1151 DoMerge = true; 1152 } 1153 if (DoMerge) { 1154 if (NextMBBI == I) { 1155 Advance = true; 1156 ++I; 1157 } 1158 MBB.erase(NextMBBI); 1159 } 1160 } 1161 1162 if (!DoMerge) 1163 return false; 1164 1165 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 1166 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 1167 .addReg(Base, getDefRegState(true)) // WB base register 1168 .addReg(Base, getKillRegState(BaseKill)) 1169 .addImm(Pred).addReg(PredReg); 1170 1171 // Transfer the rest of operands. 1172 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 1173 MIB.addOperand(MI->getOperand(OpNum)); 1174 1175 // Transfer memoperands. 1176 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 1177 1178 MBB.erase(MBBI); 1179 return true; 1180 } 1181 1182 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 1183 ARM_AM::AddrOpc Mode) { 1184 switch (Opc) { 1185 case ARM::LDRi12: 1186 return ARM::LDR_PRE_IMM; 1187 case ARM::STRi12: 1188 return ARM::STR_PRE_IMM; 1189 case ARM::VLDRS: 1190 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1191 case ARM::VLDRD: 1192 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1193 case ARM::VSTRS: 1194 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1195 case ARM::VSTRD: 1196 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1197 case ARM::t2LDRi8: 1198 case ARM::t2LDRi12: 1199 return ARM::t2LDR_PRE; 1200 case ARM::t2STRi8: 1201 case ARM::t2STRi12: 1202 return ARM::t2STR_PRE; 1203 default: llvm_unreachable("Unhandled opcode!"); 1204 } 1205 } 1206 1207 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 1208 ARM_AM::AddrOpc Mode) { 1209 switch (Opc) { 1210 case ARM::LDRi12: 1211 return ARM::LDR_POST_IMM; 1212 case ARM::STRi12: 1213 return ARM::STR_POST_IMM; 1214 case ARM::VLDRS: 1215 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1216 case ARM::VLDRD: 1217 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1218 case ARM::VSTRS: 1219 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1220 case ARM::VSTRD: 1221 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1222 case ARM::t2LDRi8: 1223 case ARM::t2LDRi12: 1224 return ARM::t2LDR_POST; 1225 case ARM::t2STRi8: 1226 case ARM::t2STRi12: 1227 return ARM::t2STR_POST; 1228 default: llvm_unreachable("Unhandled opcode!"); 1229 } 1230 } 1231 1232 /// Fold proceeding/trailing inc/dec of base register into the 1233 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible: 1234 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 1235 MachineBasicBlock::iterator MBBI, 1236 const TargetInstrInfo *TII, 1237 bool &Advance, 1238 MachineBasicBlock::iterator &I) { 1239 // Thumb1 doesn't have updating LDR/STR. 1240 // FIXME: Use LDM/STM with single register instead. 1241 if (isThumb1) return false; 1242 1243 MachineInstr *MI = MBBI; 1244 unsigned Base = MI->getOperand(1).getReg(); 1245 bool BaseKill = MI->getOperand(1).isKill(); 1246 unsigned Bytes = getLSMultipleTransferSize(MI); 1247 unsigned Opcode = MI->getOpcode(); 1248 DebugLoc dl = MI->getDebugLoc(); 1249 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 1250 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 1251 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 1252 if (isi32Load(Opcode) || isi32Store(Opcode)) 1253 if (MI->getOperand(2).getImm() != 0) 1254 return false; 1255 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 1256 return false; 1257 1258 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 1259 // Can't do the merge if the destination register is the same as the would-be 1260 // writeback register. 1261 if (MI->getOperand(0).getReg() == Base) 1262 return false; 1263 1264 unsigned PredReg = 0; 1265 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1266 bool DoMerge = false; 1267 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1268 unsigned NewOpc = 0; 1269 // AM2 - 12 bits, thumb2 - 8 bits. 1270 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 1271 1272 // Try merging with the previous instruction. 1273 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1274 if (MBBI != BeginMBBI) { 1275 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1276 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 1277 --PrevMBBI; 1278 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 1279 DoMerge = true; 1280 AddSub = ARM_AM::sub; 1281 } else if (!isAM5 && 1282 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 1283 DoMerge = true; 1284 } 1285 if (DoMerge) { 1286 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 1287 MBB.erase(PrevMBBI); 1288 } 1289 } 1290 1291 // Try merging with the next instruction. 1292 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1293 if (!DoMerge && MBBI != EndMBBI) { 1294 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1295 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1296 ++NextMBBI; 1297 if (!isAM5 && 1298 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 1299 DoMerge = true; 1300 AddSub = ARM_AM::sub; 1301 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 1302 DoMerge = true; 1303 } 1304 if (DoMerge) { 1305 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 1306 if (NextMBBI == I) { 1307 Advance = true; 1308 ++I; 1309 } 1310 MBB.erase(NextMBBI); 1311 } 1312 } 1313 1314 if (!DoMerge) 1315 return false; 1316 1317 if (isAM5) { 1318 // VLDM[SD]_UPD, VSTM[SD]_UPD 1319 // (There are no base-updating versions of VLDR/VSTR instructions, but the 1320 // updating load/store-multiple instructions can be used with only one 1321 // register.) 1322 MachineOperand &MO = MI->getOperand(0); 1323 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 1324 .addReg(Base, getDefRegState(true)) // WB base register 1325 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 1326 .addImm(Pred).addReg(PredReg) 1327 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 1328 getKillRegState(MO.isKill()))); 1329 } else if (isLd) { 1330 if (isAM2) { 1331 // LDR_PRE, LDR_POST 1332 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 1333 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1334 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1335 .addReg(Base, RegState::Define) 1336 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1337 } else { 1338 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1339 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1340 .addReg(Base, RegState::Define) 1341 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1342 } 1343 } else { 1344 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1345 // t2LDR_PRE, t2LDR_POST 1346 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1347 .addReg(Base, RegState::Define) 1348 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1349 } 1350 } else { 1351 MachineOperand &MO = MI->getOperand(0); 1352 // FIXME: post-indexed stores use am2offset_imm, which still encodes 1353 // the vestigal zero-reg offset register. When that's fixed, this clause 1354 // can be removed entirely. 1355 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 1356 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1357 // STR_PRE, STR_POST 1358 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 1359 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1360 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1361 } else { 1362 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1363 // t2STR_PRE, t2STR_POST 1364 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 1365 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1366 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1367 } 1368 } 1369 MBB.erase(MBBI); 1370 1371 return true; 1372 } 1373 1374 /// Returns true if instruction is a memory operation that this pass is capable 1375 /// of operating on. 1376 static bool isMemoryOp(const MachineInstr *MI) { 1377 // When no memory operands are present, conservatively assume unaligned, 1378 // volatile, unfoldable. 1379 if (!MI->hasOneMemOperand()) 1380 return false; 1381 1382 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1383 1384 // Don't touch volatile memory accesses - we may be changing their order. 1385 if (MMO->isVolatile()) 1386 return false; 1387 1388 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 1389 // not. 1390 if (MMO->getAlignment() < 4) 1391 return false; 1392 1393 // str <undef> could probably be eliminated entirely, but for now we just want 1394 // to avoid making a mess of it. 1395 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1396 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 1397 MI->getOperand(0).isUndef()) 1398 return false; 1399 1400 // Likewise don't mess with references to undefined addresses. 1401 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 1402 MI->getOperand(1).isUndef()) 1403 return false; 1404 1405 unsigned Opcode = MI->getOpcode(); 1406 switch (Opcode) { 1407 default: break; 1408 case ARM::VLDRS: 1409 case ARM::VSTRS: 1410 return MI->getOperand(1).isReg(); 1411 case ARM::VLDRD: 1412 case ARM::VSTRD: 1413 return MI->getOperand(1).isReg(); 1414 case ARM::LDRi12: 1415 case ARM::STRi12: 1416 case ARM::tLDRi: 1417 case ARM::tSTRi: 1418 case ARM::tLDRspi: 1419 case ARM::tSTRspi: 1420 case ARM::t2LDRi8: 1421 case ARM::t2LDRi12: 1422 case ARM::t2STRi8: 1423 case ARM::t2STRi12: 1424 return MI->getOperand(1).isReg(); 1425 } 1426 return false; 1427 } 1428 1429 /// Advance register scavenger to just before the earliest memory op that is 1430 /// being merged. 1431 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 1432 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 1433 unsigned Position = MemOps[0].Position; 1434 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 1435 if (MemOps[i].Position < Position) { 1436 Position = MemOps[i].Position; 1437 Loc = MemOps[i].MBBI; 1438 } 1439 } 1440 1441 if (Loc != MBB.begin()) 1442 RS->forward(std::prev(Loc)); 1443 } 1444 1445 static void InsertLDR_STR(MachineBasicBlock &MBB, 1446 MachineBasicBlock::iterator &MBBI, 1447 int Offset, bool isDef, 1448 DebugLoc dl, unsigned NewOpc, 1449 unsigned Reg, bool RegDeadKill, bool RegUndef, 1450 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1451 bool OffKill, bool OffUndef, 1452 ARMCC::CondCodes Pred, unsigned PredReg, 1453 const TargetInstrInfo *TII, bool isT2) { 1454 if (isDef) { 1455 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1456 TII->get(NewOpc)) 1457 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1458 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1459 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1460 } else { 1461 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1462 TII->get(NewOpc)) 1463 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1464 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1465 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1466 } 1467 } 1468 1469 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1470 MachineBasicBlock::iterator &MBBI) { 1471 MachineInstr *MI = &*MBBI; 1472 unsigned Opcode = MI->getOpcode(); 1473 if (Opcode == ARM::LDRD || Opcode == ARM::STRD) { 1474 const MachineOperand &BaseOp = MI->getOperand(2); 1475 unsigned BaseReg = BaseOp.getReg(); 1476 unsigned EvenReg = MI->getOperand(0).getReg(); 1477 unsigned OddReg = MI->getOperand(1).getReg(); 1478 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1479 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1480 // ARM errata 602117: LDRD with base in list may result in incorrect base 1481 // register when interrupted or faulted. 1482 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3(); 1483 if (!Errata602117 && 1484 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)) 1485 return false; 1486 1487 MachineBasicBlock::iterator NewBBI = MBBI; 1488 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1489 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1490 bool EvenDeadKill = isLd ? 1491 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1492 bool EvenUndef = MI->getOperand(0).isUndef(); 1493 bool OddDeadKill = isLd ? 1494 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1495 bool OddUndef = MI->getOperand(1).isUndef(); 1496 bool BaseKill = BaseOp.isKill(); 1497 bool BaseUndef = BaseOp.isUndef(); 1498 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1499 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1500 int OffImm = getMemoryOpOffset(MI); 1501 unsigned PredReg = 0; 1502 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1503 1504 if (OddRegNum > EvenRegNum && OffImm == 0) { 1505 // Ascending register numbers and no offset. It's safe to change it to a 1506 // ldm or stm. 1507 unsigned NewOpc = (isLd) 1508 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1509 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1510 if (isLd) { 1511 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1512 .addReg(BaseReg, getKillRegState(BaseKill)) 1513 .addImm(Pred).addReg(PredReg) 1514 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1515 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1516 ++NumLDRD2LDM; 1517 } else { 1518 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1519 .addReg(BaseReg, getKillRegState(BaseKill)) 1520 .addImm(Pred).addReg(PredReg) 1521 .addReg(EvenReg, 1522 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1523 .addReg(OddReg, 1524 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1525 ++NumSTRD2STM; 1526 } 1527 NewBBI = std::prev(MBBI); 1528 } else { 1529 // Split into two instructions. 1530 unsigned NewOpc = (isLd) 1531 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1532 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1533 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1534 // so adjust and use t2LDRi12 here for that. 1535 unsigned NewOpc2 = (isLd) 1536 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1537 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1538 DebugLoc dl = MBBI->getDebugLoc(); 1539 // If this is a load and base register is killed, it may have been 1540 // re-defed by the load, make sure the first load does not clobber it. 1541 if (isLd && 1542 (BaseKill || OffKill) && 1543 (TRI->regsOverlap(EvenReg, BaseReg))) { 1544 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1545 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1546 OddReg, OddDeadKill, false, 1547 BaseReg, false, BaseUndef, false, OffUndef, 1548 Pred, PredReg, TII, isT2); 1549 NewBBI = std::prev(MBBI); 1550 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1551 EvenReg, EvenDeadKill, false, 1552 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1553 Pred, PredReg, TII, isT2); 1554 } else { 1555 if (OddReg == EvenReg && EvenDeadKill) { 1556 // If the two source operands are the same, the kill marker is 1557 // probably on the first one. e.g. 1558 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1559 EvenDeadKill = false; 1560 OddDeadKill = true; 1561 } 1562 // Never kill the base register in the first instruction. 1563 if (EvenReg == BaseReg) 1564 EvenDeadKill = false; 1565 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1566 EvenReg, EvenDeadKill, EvenUndef, 1567 BaseReg, false, BaseUndef, false, OffUndef, 1568 Pred, PredReg, TII, isT2); 1569 NewBBI = std::prev(MBBI); 1570 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1571 OddReg, OddDeadKill, OddUndef, 1572 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1573 Pred, PredReg, TII, isT2); 1574 } 1575 if (isLd) 1576 ++NumLDRD2LDR; 1577 else 1578 ++NumSTRD2STR; 1579 } 1580 1581 MBB.erase(MI); 1582 MBBI = NewBBI; 1583 return true; 1584 } 1585 return false; 1586 } 1587 1588 /// An optimization pass to turn multiple LDR / STR ops of the same base and 1589 /// incrementing offset into LDM / STM ops. 1590 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1591 unsigned NumMerges = 0; 1592 unsigned NumMemOps = 0; 1593 MemOpQueue MemOps; 1594 unsigned CurrBase = 0; 1595 unsigned CurrOpc = ~0u; 1596 unsigned CurrSize = 0; 1597 ARMCC::CondCodes CurrPred = ARMCC::AL; 1598 unsigned CurrPredReg = 0; 1599 unsigned Position = 0; 1600 SmallVector<MachineBasicBlock::iterator,4> Merges; 1601 1602 RS->enterBasicBlock(&MBB); 1603 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1604 while (MBBI != E) { 1605 if (FixInvalidRegPairOp(MBB, MBBI)) 1606 continue; 1607 1608 bool Advance = false; 1609 bool TryMerge = false; 1610 1611 bool isMemOp = isMemoryOp(MBBI); 1612 if (isMemOp) { 1613 unsigned Opcode = MBBI->getOpcode(); 1614 unsigned Size = getLSMultipleTransferSize(MBBI); 1615 const MachineOperand &MO = MBBI->getOperand(0); 1616 unsigned Reg = MO.getReg(); 1617 bool isKill = MO.isDef() ? false : MO.isKill(); 1618 unsigned Base = MBBI->getOperand(1).getReg(); 1619 unsigned PredReg = 0; 1620 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 1621 int Offset = getMemoryOpOffset(MBBI); 1622 // Watch out for: 1623 // r4 := ldr [r5] 1624 // r5 := ldr [r5, #4] 1625 // r6 := ldr [r5, #8] 1626 // 1627 // The second ldr has effectively broken the chain even though it 1628 // looks like the later ldr(s) use the same base register. Try to 1629 // merge the ldr's so far, including this one. But don't try to 1630 // combine the following ldr(s). 1631 bool Clobber = isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg(); 1632 1633 // Watch out for: 1634 // r4 := ldr [r0, #8] 1635 // r4 := ldr [r0, #4] 1636 // 1637 // The optimization may reorder the second ldr in front of the first 1638 // ldr, which violates write after write(WAW) dependence. The same as 1639 // str. Try to merge inst(s) already in MemOps. 1640 bool Overlap = false; 1641 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) { 1642 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) { 1643 Overlap = true; 1644 break; 1645 } 1646 } 1647 1648 if (CurrBase == 0 && !Clobber) { 1649 // Start of a new chain. 1650 CurrBase = Base; 1651 CurrOpc = Opcode; 1652 CurrSize = Size; 1653 CurrPred = Pred; 1654 CurrPredReg = PredReg; 1655 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 1656 ++NumMemOps; 1657 Advance = true; 1658 } else if (!Overlap) { 1659 if (Clobber) { 1660 TryMerge = true; 1661 Advance = true; 1662 } 1663 1664 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1665 // No need to match PredReg. 1666 // Continue adding to the queue. 1667 if (Offset > MemOps.back().Offset) { 1668 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, 1669 Position, MBBI)); 1670 ++NumMemOps; 1671 Advance = true; 1672 } else { 1673 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1674 I != E; ++I) { 1675 if (Offset < I->Offset) { 1676 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, 1677 Position, MBBI)); 1678 ++NumMemOps; 1679 Advance = true; 1680 break; 1681 } else if (Offset == I->Offset) { 1682 // Collision! This can't be merged! 1683 break; 1684 } 1685 } 1686 } 1687 } 1688 } 1689 } 1690 1691 if (MBBI->isDebugValue()) { 1692 ++MBBI; 1693 if (MBBI == E) 1694 // Reach the end of the block, try merging the memory instructions. 1695 TryMerge = true; 1696 } else if (Advance) { 1697 ++Position; 1698 ++MBBI; 1699 if (MBBI == E) 1700 // Reach the end of the block, try merging the memory instructions. 1701 TryMerge = true; 1702 } else { 1703 TryMerge = true; 1704 } 1705 1706 if (TryMerge) { 1707 if (NumMemOps > 1) { 1708 // Try to find a free register to use as a new base in case it's needed. 1709 // First advance to the instruction just before the start of the chain. 1710 AdvanceRS(MBB, MemOps); 1711 1712 // Find a scratch register. 1713 unsigned Scratch = 1714 RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass); 1715 1716 // Process the load / store instructions. 1717 RS->forward(std::prev(MBBI)); 1718 1719 // Merge ops. 1720 Merges.clear(); 1721 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1722 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1723 1724 // Try folding preceding/trailing base inc/dec into the generated 1725 // LDM/STM ops. 1726 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1727 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1728 ++NumMerges; 1729 NumMerges += Merges.size(); 1730 1731 // Try folding preceding/trailing base inc/dec into those load/store 1732 // that were not merged to form LDM/STM ops. 1733 for (unsigned i = 0; i != NumMemOps; ++i) 1734 if (!MemOps[i].Merged) 1735 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1736 ++NumMerges; 1737 1738 // RS may be pointing to an instruction that's deleted. 1739 RS->skipTo(std::prev(MBBI)); 1740 } else if (NumMemOps == 1) { 1741 // Try folding preceding/trailing base inc/dec into the single 1742 // load/store. 1743 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1744 ++NumMerges; 1745 RS->forward(std::prev(MBBI)); 1746 } 1747 } 1748 1749 CurrBase = 0; 1750 CurrOpc = ~0u; 1751 CurrSize = 0; 1752 CurrPred = ARMCC::AL; 1753 CurrPredReg = 0; 1754 if (NumMemOps) { 1755 MemOps.clear(); 1756 NumMemOps = 0; 1757 } 1758 1759 // If iterator hasn't been advanced and this is not a memory op, skip it. 1760 // It can't start a new chain anyway. 1761 if (!Advance && !isMemOp && MBBI != E) { 1762 ++Position; 1763 ++MBBI; 1764 } 1765 } 1766 } 1767 return NumMerges > 0; 1768 } 1769 1770 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr") 1771 /// into the preceding stack restore so it directly restore the value of LR 1772 /// into pc. 1773 /// ldmfd sp!, {..., lr} 1774 /// bx lr 1775 /// or 1776 /// ldmfd sp!, {..., lr} 1777 /// mov pc, lr 1778 /// => 1779 /// ldmfd sp!, {..., pc} 1780 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1781 // Thumb1 LDM doesn't allow high registers. 1782 if (isThumb1) return false; 1783 if (MBB.empty()) return false; 1784 1785 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1786 if (MBBI != MBB.begin() && 1787 (MBBI->getOpcode() == ARM::BX_RET || 1788 MBBI->getOpcode() == ARM::tBX_RET || 1789 MBBI->getOpcode() == ARM::MOVPCLR)) { 1790 MachineInstr *PrevMI = std::prev(MBBI); 1791 unsigned Opcode = PrevMI->getOpcode(); 1792 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1793 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1794 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1795 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1796 if (MO.getReg() != ARM::LR) 1797 return false; 1798 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1799 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1800 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1801 PrevMI->setDesc(TII->get(NewOpc)); 1802 MO.setReg(ARM::PC); 1803 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); 1804 MBB.erase(MBBI); 1805 return true; 1806 } 1807 } 1808 return false; 1809 } 1810 1811 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1812 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1813 TL = STI->getTargetLowering(); 1814 AFI = Fn.getInfo<ARMFunctionInfo>(); 1815 TII = STI->getInstrInfo(); 1816 TRI = STI->getRegisterInfo(); 1817 RS = new RegScavenger(); 1818 isThumb2 = AFI->isThumb2Function(); 1819 isThumb1 = AFI->isThumbFunction() && !isThumb2; 1820 1821 bool Modified = false; 1822 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1823 ++MFI) { 1824 MachineBasicBlock &MBB = *MFI; 1825 Modified |= LoadStoreMultipleOpti(MBB); 1826 if (STI->hasV5TOps()) 1827 Modified |= MergeReturnIntoLDM(MBB); 1828 } 1829 1830 delete RS; 1831 return Modified; 1832 } 1833 1834 namespace { 1835 /// Pre- register allocation pass that move load / stores from consecutive 1836 /// locations close to make it more likely they will be combined later. 1837 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1838 static char ID; 1839 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1840 1841 const DataLayout *TD; 1842 const TargetInstrInfo *TII; 1843 const TargetRegisterInfo *TRI; 1844 const ARMSubtarget *STI; 1845 MachineRegisterInfo *MRI; 1846 MachineFunction *MF; 1847 1848 bool runOnMachineFunction(MachineFunction &Fn) override; 1849 1850 const char *getPassName() const override { 1851 return "ARM pre- register allocation load / store optimization pass"; 1852 } 1853 1854 private: 1855 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1856 unsigned &NewOpc, unsigned &EvenReg, 1857 unsigned &OddReg, unsigned &BaseReg, 1858 int &Offset, 1859 unsigned &PredReg, ARMCC::CondCodes &Pred, 1860 bool &isT2); 1861 bool RescheduleOps(MachineBasicBlock *MBB, 1862 SmallVectorImpl<MachineInstr *> &Ops, 1863 unsigned Base, bool isLd, 1864 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1865 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1866 }; 1867 char ARMPreAllocLoadStoreOpt::ID = 0; 1868 } 1869 1870 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1871 TD = Fn.getTarget().getDataLayout(); 1872 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1873 TII = STI->getInstrInfo(); 1874 TRI = STI->getRegisterInfo(); 1875 MRI = &Fn.getRegInfo(); 1876 MF = &Fn; 1877 1878 bool Modified = false; 1879 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1880 ++MFI) 1881 Modified |= RescheduleLoadStoreInstrs(MFI); 1882 1883 return Modified; 1884 } 1885 1886 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1887 MachineBasicBlock::iterator I, 1888 MachineBasicBlock::iterator E, 1889 SmallPtrSetImpl<MachineInstr*> &MemOps, 1890 SmallSet<unsigned, 4> &MemRegs, 1891 const TargetRegisterInfo *TRI) { 1892 // Are there stores / loads / calls between them? 1893 // FIXME: This is overly conservative. We should make use of alias information 1894 // some day. 1895 SmallSet<unsigned, 4> AddedRegPressure; 1896 while (++I != E) { 1897 if (I->isDebugValue() || MemOps.count(&*I)) 1898 continue; 1899 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1900 return false; 1901 if (isLd && I->mayStore()) 1902 return false; 1903 if (!isLd) { 1904 if (I->mayLoad()) 1905 return false; 1906 // It's not safe to move the first 'str' down. 1907 // str r1, [r0] 1908 // strh r5, [r0] 1909 // str r4, [r0, #+4] 1910 if (I->mayStore()) 1911 return false; 1912 } 1913 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1914 MachineOperand &MO = I->getOperand(j); 1915 if (!MO.isReg()) 1916 continue; 1917 unsigned Reg = MO.getReg(); 1918 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1919 return false; 1920 if (Reg != Base && !MemRegs.count(Reg)) 1921 AddedRegPressure.insert(Reg); 1922 } 1923 } 1924 1925 // Estimate register pressure increase due to the transformation. 1926 if (MemRegs.size() <= 4) 1927 // Ok if we are moving small number of instructions. 1928 return true; 1929 return AddedRegPressure.size() <= MemRegs.size() * 2; 1930 } 1931 1932 1933 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI. 1934 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1935 MachineInstr *Op1) { 1936 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1937 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1938 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1939 1940 MachineFunction *MF = MI->getParent()->getParent(); 1941 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1942 MachineSDNode::mmo_iterator MemEnd = 1943 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1944 MemEnd = 1945 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1946 MI->setMemRefs(MemBegin, MemEnd); 1947 } 1948 1949 bool 1950 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1951 DebugLoc &dl, unsigned &NewOpc, 1952 unsigned &FirstReg, 1953 unsigned &SecondReg, 1954 unsigned &BaseReg, int &Offset, 1955 unsigned &PredReg, 1956 ARMCC::CondCodes &Pred, 1957 bool &isT2) { 1958 // Make sure we're allowed to generate LDRD/STRD. 1959 if (!STI->hasV5TEOps()) 1960 return false; 1961 1962 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1963 unsigned Scale = 1; 1964 unsigned Opcode = Op0->getOpcode(); 1965 if (Opcode == ARM::LDRi12) { 1966 NewOpc = ARM::LDRD; 1967 } else if (Opcode == ARM::STRi12) { 1968 NewOpc = ARM::STRD; 1969 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1970 NewOpc = ARM::t2LDRDi8; 1971 Scale = 4; 1972 isT2 = true; 1973 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1974 NewOpc = ARM::t2STRDi8; 1975 Scale = 4; 1976 isT2 = true; 1977 } else { 1978 return false; 1979 } 1980 1981 // Make sure the base address satisfies i64 ld / st alignment requirement. 1982 // At the moment, we ignore the memoryoperand's value. 1983 // If we want to use AliasAnalysis, we should check it accordingly. 1984 if (!Op0->hasOneMemOperand() || 1985 (*Op0->memoperands_begin())->isVolatile()) 1986 return false; 1987 1988 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1989 const Function *Func = MF->getFunction(); 1990 unsigned ReqAlign = STI->hasV6Ops() 1991 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1992 : 8; // Pre-v6 need 8-byte align 1993 if (Align < ReqAlign) 1994 return false; 1995 1996 // Then make sure the immediate offset fits. 1997 int OffImm = getMemoryOpOffset(Op0); 1998 if (isT2) { 1999 int Limit = (1 << 8) * Scale; 2000 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 2001 return false; 2002 Offset = OffImm; 2003 } else { 2004 ARM_AM::AddrOpc AddSub = ARM_AM::add; 2005 if (OffImm < 0) { 2006 AddSub = ARM_AM::sub; 2007 OffImm = - OffImm; 2008 } 2009 int Limit = (1 << 8) * Scale; 2010 if (OffImm >= Limit || (OffImm & (Scale-1))) 2011 return false; 2012 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 2013 } 2014 FirstReg = Op0->getOperand(0).getReg(); 2015 SecondReg = Op1->getOperand(0).getReg(); 2016 if (FirstReg == SecondReg) 2017 return false; 2018 BaseReg = Op0->getOperand(1).getReg(); 2019 Pred = getInstrPredicate(Op0, PredReg); 2020 dl = Op0->getDebugLoc(); 2021 return true; 2022 } 2023 2024 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 2025 SmallVectorImpl<MachineInstr *> &Ops, 2026 unsigned Base, bool isLd, 2027 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 2028 bool RetVal = false; 2029 2030 // Sort by offset (in reverse order). 2031 std::sort(Ops.begin(), Ops.end(), 2032 [](const MachineInstr *LHS, const MachineInstr *RHS) { 2033 int LOffset = getMemoryOpOffset(LHS); 2034 int ROffset = getMemoryOpOffset(RHS); 2035 assert(LHS == RHS || LOffset != ROffset); 2036 return LOffset > ROffset; 2037 }); 2038 2039 // The loads / stores of the same base are in order. Scan them from first to 2040 // last and check for the following: 2041 // 1. Any def of base. 2042 // 2. Any gaps. 2043 while (Ops.size() > 1) { 2044 unsigned FirstLoc = ~0U; 2045 unsigned LastLoc = 0; 2046 MachineInstr *FirstOp = nullptr; 2047 MachineInstr *LastOp = nullptr; 2048 int LastOffset = 0; 2049 unsigned LastOpcode = 0; 2050 unsigned LastBytes = 0; 2051 unsigned NumMove = 0; 2052 for (int i = Ops.size() - 1; i >= 0; --i) { 2053 MachineInstr *Op = Ops[i]; 2054 unsigned Loc = MI2LocMap[Op]; 2055 if (Loc <= FirstLoc) { 2056 FirstLoc = Loc; 2057 FirstOp = Op; 2058 } 2059 if (Loc >= LastLoc) { 2060 LastLoc = Loc; 2061 LastOp = Op; 2062 } 2063 2064 unsigned LSMOpcode 2065 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 2066 if (LastOpcode && LSMOpcode != LastOpcode) 2067 break; 2068 2069 int Offset = getMemoryOpOffset(Op); 2070 unsigned Bytes = getLSMultipleTransferSize(Op); 2071 if (LastBytes) { 2072 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 2073 break; 2074 } 2075 LastOffset = Offset; 2076 LastBytes = Bytes; 2077 LastOpcode = LSMOpcode; 2078 if (++NumMove == 8) // FIXME: Tune this limit. 2079 break; 2080 } 2081 2082 if (NumMove <= 1) 2083 Ops.pop_back(); 2084 else { 2085 SmallPtrSet<MachineInstr*, 4> MemOps; 2086 SmallSet<unsigned, 4> MemRegs; 2087 for (int i = NumMove-1; i >= 0; --i) { 2088 MemOps.insert(Ops[i]); 2089 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 2090 } 2091 2092 // Be conservative, if the instructions are too far apart, don't 2093 // move them. We want to limit the increase of register pressure. 2094 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 2095 if (DoMove) 2096 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 2097 MemOps, MemRegs, TRI); 2098 if (!DoMove) { 2099 for (unsigned i = 0; i != NumMove; ++i) 2100 Ops.pop_back(); 2101 } else { 2102 // This is the new location for the loads / stores. 2103 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 2104 while (InsertPos != MBB->end() 2105 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 2106 ++InsertPos; 2107 2108 // If we are moving a pair of loads / stores, see if it makes sense 2109 // to try to allocate a pair of registers that can form register pairs. 2110 MachineInstr *Op0 = Ops.back(); 2111 MachineInstr *Op1 = Ops[Ops.size()-2]; 2112 unsigned FirstReg = 0, SecondReg = 0; 2113 unsigned BaseReg = 0, PredReg = 0; 2114 ARMCC::CondCodes Pred = ARMCC::AL; 2115 bool isT2 = false; 2116 unsigned NewOpc = 0; 2117 int Offset = 0; 2118 DebugLoc dl; 2119 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 2120 FirstReg, SecondReg, BaseReg, 2121 Offset, PredReg, Pred, isT2)) { 2122 Ops.pop_back(); 2123 Ops.pop_back(); 2124 2125 const MCInstrDesc &MCID = TII->get(NewOpc); 2126 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 2127 MRI->constrainRegClass(FirstReg, TRC); 2128 MRI->constrainRegClass(SecondReg, TRC); 2129 2130 // Form the pair instruction. 2131 if (isLd) { 2132 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2133 .addReg(FirstReg, RegState::Define) 2134 .addReg(SecondReg, RegState::Define) 2135 .addReg(BaseReg); 2136 // FIXME: We're converting from LDRi12 to an insn that still 2137 // uses addrmode2, so we need an explicit offset reg. It should 2138 // always by reg0 since we're transforming LDRi12s. 2139 if (!isT2) 2140 MIB.addReg(0); 2141 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2142 concatenateMemOperands(MIB, Op0, Op1); 2143 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2144 ++NumLDRDFormed; 2145 } else { 2146 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2147 .addReg(FirstReg) 2148 .addReg(SecondReg) 2149 .addReg(BaseReg); 2150 // FIXME: We're converting from LDRi12 to an insn that still 2151 // uses addrmode2, so we need an explicit offset reg. It should 2152 // always by reg0 since we're transforming STRi12s. 2153 if (!isT2) 2154 MIB.addReg(0); 2155 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2156 concatenateMemOperands(MIB, Op0, Op1); 2157 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2158 ++NumSTRDFormed; 2159 } 2160 MBB->erase(Op0); 2161 MBB->erase(Op1); 2162 2163 if (!isT2) { 2164 // Add register allocation hints to form register pairs. 2165 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg); 2166 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg); 2167 } 2168 } else { 2169 for (unsigned i = 0; i != NumMove; ++i) { 2170 MachineInstr *Op = Ops.back(); 2171 Ops.pop_back(); 2172 MBB->splice(InsertPos, MBB, Op); 2173 } 2174 } 2175 2176 NumLdStMoved += NumMove; 2177 RetVal = true; 2178 } 2179 } 2180 } 2181 2182 return RetVal; 2183 } 2184 2185 bool 2186 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 2187 bool RetVal = false; 2188 2189 DenseMap<MachineInstr*, unsigned> MI2LocMap; 2190 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 2191 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 2192 SmallVector<unsigned, 4> LdBases; 2193 SmallVector<unsigned, 4> StBases; 2194 2195 unsigned Loc = 0; 2196 MachineBasicBlock::iterator MBBI = MBB->begin(); 2197 MachineBasicBlock::iterator E = MBB->end(); 2198 while (MBBI != E) { 2199 for (; MBBI != E; ++MBBI) { 2200 MachineInstr *MI = MBBI; 2201 if (MI->isCall() || MI->isTerminator()) { 2202 // Stop at barriers. 2203 ++MBBI; 2204 break; 2205 } 2206 2207 if (!MI->isDebugValue()) 2208 MI2LocMap[MI] = ++Loc; 2209 2210 if (!isMemoryOp(MI)) 2211 continue; 2212 unsigned PredReg = 0; 2213 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 2214 continue; 2215 2216 int Opc = MI->getOpcode(); 2217 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 2218 unsigned Base = MI->getOperand(1).getReg(); 2219 int Offset = getMemoryOpOffset(MI); 2220 2221 bool StopHere = false; 2222 if (isLd) { 2223 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2224 Base2LdsMap.find(Base); 2225 if (BI != Base2LdsMap.end()) { 2226 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2227 if (Offset == getMemoryOpOffset(BI->second[i])) { 2228 StopHere = true; 2229 break; 2230 } 2231 } 2232 if (!StopHere) 2233 BI->second.push_back(MI); 2234 } else { 2235 Base2LdsMap[Base].push_back(MI); 2236 LdBases.push_back(Base); 2237 } 2238 } else { 2239 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2240 Base2StsMap.find(Base); 2241 if (BI != Base2StsMap.end()) { 2242 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2243 if (Offset == getMemoryOpOffset(BI->second[i])) { 2244 StopHere = true; 2245 break; 2246 } 2247 } 2248 if (!StopHere) 2249 BI->second.push_back(MI); 2250 } else { 2251 Base2StsMap[Base].push_back(MI); 2252 StBases.push_back(Base); 2253 } 2254 } 2255 2256 if (StopHere) { 2257 // Found a duplicate (a base+offset combination that's seen earlier). 2258 // Backtrack. 2259 --Loc; 2260 break; 2261 } 2262 } 2263 2264 // Re-schedule loads. 2265 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 2266 unsigned Base = LdBases[i]; 2267 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 2268 if (Lds.size() > 1) 2269 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 2270 } 2271 2272 // Re-schedule stores. 2273 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 2274 unsigned Base = StBases[i]; 2275 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 2276 if (Sts.size() > 1) 2277 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 2278 } 2279 2280 if (MBBI != E) { 2281 Base2LdsMap.clear(); 2282 Base2StsMap.clear(); 2283 LdBases.clear(); 2284 StBases.clear(); 2285 } 2286 } 2287 2288 return RetVal; 2289 } 2290 2291 2292 /// Returns an instance of the load / store optimization pass. 2293 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 2294 if (PreAlloc) 2295 return new ARMPreAllocLoadStoreOpt(); 2296 return new ARMLoadStoreOpt(); 2297 } 2298