1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass that performs load / store related peephole 11 // optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ARM.h" 16 #include "ARMBaseInstrInfo.h" 17 #include "ARMBaseRegisterInfo.h" 18 #include "ARMISelLowering.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMSubtarget.h" 21 #include "MCTargetDesc/ARMAddressingModes.h" 22 #include "Thumb1RegisterInfo.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallSet.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/Statistic.h" 29 #include "llvm/CodeGen/MachineBasicBlock.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include "llvm/CodeGen/MachineInstr.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/RegisterScavenging.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/IR/DataLayout.h" 37 #include "llvm/IR/DerivedTypes.h" 38 #include "llvm/IR/Function.h" 39 #include "llvm/Support/Debug.h" 40 #include "llvm/Support/ErrorHandling.h" 41 #include "llvm/Target/TargetInstrInfo.h" 42 #include "llvm/Target/TargetMachine.h" 43 #include "llvm/Target/TargetRegisterInfo.h" 44 using namespace llvm; 45 46 #define DEBUG_TYPE "arm-ldst-opt" 47 48 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 49 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 50 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 51 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 52 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 53 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 54 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 55 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 56 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 57 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 58 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 59 60 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 61 /// load / store instructions to form ldm / stm instructions. 62 63 namespace { 64 struct ARMLoadStoreOpt : public MachineFunctionPass { 65 static char ID; 66 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 67 68 const TargetInstrInfo *TII; 69 const TargetRegisterInfo *TRI; 70 const ARMSubtarget *STI; 71 const TargetLowering *TL; 72 ARMFunctionInfo *AFI; 73 RegScavenger *RS; 74 bool isThumb1, isThumb2; 75 76 bool runOnMachineFunction(MachineFunction &Fn) override; 77 78 const char *getPassName() const override { 79 return "ARM load / store optimization pass"; 80 } 81 82 private: 83 struct MemOpQueueEntry { 84 int Offset; 85 unsigned Reg; 86 bool isKill; 87 unsigned Position; 88 MachineBasicBlock::iterator MBBI; 89 bool Merged; 90 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 91 MachineBasicBlock::iterator i) 92 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 93 }; 94 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 95 typedef MemOpQueue::iterator MemOpQueueIter; 96 97 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, 98 const MemOpQueue &MemOps, unsigned DefReg, 99 unsigned RangeBegin, unsigned RangeEnd); 100 void UpdateBaseRegUses(MachineBasicBlock &MBB, 101 MachineBasicBlock::iterator MBBI, 102 DebugLoc dl, unsigned Base, unsigned WordOffset, 103 ARMCC::CondCodes Pred, unsigned PredReg); 104 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 105 int Offset, unsigned Base, bool BaseKill, int Opcode, 106 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 107 DebugLoc dl, 108 ArrayRef<std::pair<unsigned, bool> > Regs, 109 ArrayRef<unsigned> ImpDefs); 110 void MergeOpsUpdate(MachineBasicBlock &MBB, 111 MemOpQueue &MemOps, 112 unsigned memOpsBegin, 113 unsigned memOpsEnd, 114 unsigned insertAfter, 115 int Offset, 116 unsigned Base, 117 bool BaseKill, 118 int Opcode, 119 ARMCC::CondCodes Pred, 120 unsigned PredReg, 121 unsigned Scratch, 122 DebugLoc dl, 123 SmallVectorImpl<MachineBasicBlock::iterator> &Merges); 124 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 125 int Opcode, unsigned Size, 126 ARMCC::CondCodes Pred, unsigned PredReg, 127 unsigned Scratch, MemOpQueue &MemOps, 128 SmallVectorImpl<MachineBasicBlock::iterator> &Merges); 129 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 130 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 131 MachineBasicBlock::iterator &MBBI); 132 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 133 MachineBasicBlock::iterator MBBI, 134 const TargetInstrInfo *TII, 135 bool &Advance, 136 MachineBasicBlock::iterator &I); 137 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 138 MachineBasicBlock::iterator MBBI, 139 bool &Advance, 140 MachineBasicBlock::iterator &I); 141 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 142 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 143 }; 144 char ARMLoadStoreOpt::ID = 0; 145 } 146 147 static bool definesCPSR(const MachineInstr *MI) { 148 for (const auto &MO : MI->operands()) { 149 if (!MO.isReg()) 150 continue; 151 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 152 // If the instruction has live CPSR def, then it's not safe to fold it 153 // into load / store. 154 return true; 155 } 156 157 return false; 158 } 159 160 static int getMemoryOpOffset(const MachineInstr *MI) { 161 int Opcode = MI->getOpcode(); 162 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 163 unsigned NumOperands = MI->getDesc().getNumOperands(); 164 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 165 166 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 167 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 168 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 169 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 170 return OffField; 171 172 // Thumb1 immediate offsets are scaled by 4 173 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi) 174 return OffField * 4; 175 176 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 177 : ARM_AM::getAM5Offset(OffField) * 4; 178 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField) 179 : ARM_AM::getAM5Op(OffField); 180 181 if (Op == ARM_AM::sub) 182 return -Offset; 183 184 return Offset; 185 } 186 187 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { 188 switch (Opcode) { 189 default: llvm_unreachable("Unhandled opcode!"); 190 case ARM::LDRi12: 191 ++NumLDMGened; 192 switch (Mode) { 193 default: llvm_unreachable("Unhandled submode!"); 194 case ARM_AM::ia: return ARM::LDMIA; 195 case ARM_AM::da: return ARM::LDMDA; 196 case ARM_AM::db: return ARM::LDMDB; 197 case ARM_AM::ib: return ARM::LDMIB; 198 } 199 case ARM::STRi12: 200 ++NumSTMGened; 201 switch (Mode) { 202 default: llvm_unreachable("Unhandled submode!"); 203 case ARM_AM::ia: return ARM::STMIA; 204 case ARM_AM::da: return ARM::STMDA; 205 case ARM_AM::db: return ARM::STMDB; 206 case ARM_AM::ib: return ARM::STMIB; 207 } 208 case ARM::tLDRi: 209 // tLDMIA is writeback-only - unless the base register is in the input 210 // reglist. 211 ++NumLDMGened; 212 switch (Mode) { 213 default: llvm_unreachable("Unhandled submode!"); 214 case ARM_AM::ia: return ARM::tLDMIA; 215 } 216 case ARM::tSTRi: 217 // There is no non-writeback tSTMIA either. 218 ++NumSTMGened; 219 switch (Mode) { 220 default: llvm_unreachable("Unhandled submode!"); 221 case ARM_AM::ia: return ARM::tSTMIA_UPD; 222 } 223 case ARM::t2LDRi8: 224 case ARM::t2LDRi12: 225 ++NumLDMGened; 226 switch (Mode) { 227 default: llvm_unreachable("Unhandled submode!"); 228 case ARM_AM::ia: return ARM::t2LDMIA; 229 case ARM_AM::db: return ARM::t2LDMDB; 230 } 231 case ARM::t2STRi8: 232 case ARM::t2STRi12: 233 ++NumSTMGened; 234 switch (Mode) { 235 default: llvm_unreachable("Unhandled submode!"); 236 case ARM_AM::ia: return ARM::t2STMIA; 237 case ARM_AM::db: return ARM::t2STMDB; 238 } 239 case ARM::VLDRS: 240 ++NumVLDMGened; 241 switch (Mode) { 242 default: llvm_unreachable("Unhandled submode!"); 243 case ARM_AM::ia: return ARM::VLDMSIA; 244 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 245 } 246 case ARM::VSTRS: 247 ++NumVSTMGened; 248 switch (Mode) { 249 default: llvm_unreachable("Unhandled submode!"); 250 case ARM_AM::ia: return ARM::VSTMSIA; 251 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 252 } 253 case ARM::VLDRD: 254 ++NumVLDMGened; 255 switch (Mode) { 256 default: llvm_unreachable("Unhandled submode!"); 257 case ARM_AM::ia: return ARM::VLDMDIA; 258 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 259 } 260 case ARM::VSTRD: 261 ++NumVSTMGened; 262 switch (Mode) { 263 default: llvm_unreachable("Unhandled submode!"); 264 case ARM_AM::ia: return ARM::VSTMDIA; 265 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 266 } 267 } 268 } 269 270 namespace llvm { 271 namespace ARM_AM { 272 273 AMSubMode getLoadStoreMultipleSubMode(int Opcode) { 274 switch (Opcode) { 275 default: llvm_unreachable("Unhandled opcode!"); 276 case ARM::LDMIA_RET: 277 case ARM::LDMIA: 278 case ARM::LDMIA_UPD: 279 case ARM::STMIA: 280 case ARM::STMIA_UPD: 281 case ARM::tLDMIA: 282 case ARM::tLDMIA_UPD: 283 case ARM::tSTMIA_UPD: 284 case ARM::t2LDMIA_RET: 285 case ARM::t2LDMIA: 286 case ARM::t2LDMIA_UPD: 287 case ARM::t2STMIA: 288 case ARM::t2STMIA_UPD: 289 case ARM::VLDMSIA: 290 case ARM::VLDMSIA_UPD: 291 case ARM::VSTMSIA: 292 case ARM::VSTMSIA_UPD: 293 case ARM::VLDMDIA: 294 case ARM::VLDMDIA_UPD: 295 case ARM::VSTMDIA: 296 case ARM::VSTMDIA_UPD: 297 return ARM_AM::ia; 298 299 case ARM::LDMDA: 300 case ARM::LDMDA_UPD: 301 case ARM::STMDA: 302 case ARM::STMDA_UPD: 303 return ARM_AM::da; 304 305 case ARM::LDMDB: 306 case ARM::LDMDB_UPD: 307 case ARM::STMDB: 308 case ARM::STMDB_UPD: 309 case ARM::t2LDMDB: 310 case ARM::t2LDMDB_UPD: 311 case ARM::t2STMDB: 312 case ARM::t2STMDB_UPD: 313 case ARM::VLDMSDB_UPD: 314 case ARM::VSTMSDB_UPD: 315 case ARM::VLDMDDB_UPD: 316 case ARM::VSTMDDB_UPD: 317 return ARM_AM::db; 318 319 case ARM::LDMIB: 320 case ARM::LDMIB_UPD: 321 case ARM::STMIB: 322 case ARM::STMIB_UPD: 323 return ARM_AM::ib; 324 } 325 } 326 327 } // end namespace ARM_AM 328 } // end namespace llvm 329 330 static bool isT1i32Load(unsigned Opc) { 331 return Opc == ARM::tLDRi; 332 } 333 334 static bool isT2i32Load(unsigned Opc) { 335 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 336 } 337 338 static bool isi32Load(unsigned Opc) { 339 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; 340 } 341 342 static bool isT1i32Store(unsigned Opc) { 343 return Opc == ARM::tSTRi; 344 } 345 346 static bool isT2i32Store(unsigned Opc) { 347 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 348 } 349 350 static bool isi32Store(unsigned Opc) { 351 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); 352 } 353 354 static unsigned getImmScale(unsigned Opc) { 355 switch (Opc) { 356 default: llvm_unreachable("Unhandled opcode!"); 357 case ARM::tLDRi: 358 case ARM::tSTRi: 359 return 1; 360 case ARM::tLDRHi: 361 case ARM::tSTRHi: 362 return 2; 363 case ARM::tLDRBi: 364 case ARM::tSTRBi: 365 return 4; 366 } 367 } 368 369 /// Update future uses of the base register with the offset introduced 370 /// due to writeback. This function only works on Thumb1. 371 void 372 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, 373 MachineBasicBlock::iterator MBBI, 374 DebugLoc dl, unsigned Base, 375 unsigned WordOffset, 376 ARMCC::CondCodes Pred, unsigned PredReg) { 377 assert(isThumb1 && "Can only update base register uses for Thumb1!"); 378 // Start updating any instructions with immediate offsets. Insert a SUB before 379 // the first non-updateable instruction (if any). 380 for (; MBBI != MBB.end(); ++MBBI) { 381 bool InsertSub = false; 382 unsigned Opc = MBBI->getOpcode(); 383 384 if (MBBI->readsRegister(Base)) { 385 int Offset; 386 bool IsLoad = 387 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi; 388 bool IsStore = 389 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi; 390 391 if (IsLoad || IsStore) { 392 // Loads and stores with immediate offsets can be updated, but only if 393 // the new offset isn't negative. 394 // The MachineOperand containing the offset immediate is the last one 395 // before predicates. 396 MachineOperand &MO = 397 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 398 // The offsets are scaled by 1, 2 or 4 depending on the Opcode. 399 Offset = MO.getImm() - WordOffset * getImmScale(Opc); 400 401 // If storing the base register, it needs to be reset first. 402 unsigned InstrSrcReg = MBBI->getOperand(0).getReg(); 403 404 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) 405 MO.setImm(Offset); 406 else 407 InsertSub = true; 408 409 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && 410 !definesCPSR(MBBI)) { 411 // SUBS/ADDS using this register, with a dead def of the CPSR. 412 // Merge it with the update; if the merged offset is too large, 413 // insert a new sub instead. 414 MachineOperand &MO = 415 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 416 Offset = (Opc == ARM::tSUBi8) ? 417 MO.getImm() + WordOffset * 4 : 418 MO.getImm() - WordOffset * 4 ; 419 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) { 420 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if 421 // Offset == 0. 422 MO.setImm(Offset); 423 // The base register has now been reset, so exit early. 424 return; 425 } else { 426 InsertSub = true; 427 } 428 429 } else { 430 // Can't update the instruction. 431 InsertSub = true; 432 } 433 434 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { 435 // Since SUBS sets the condition flags, we can't place the base reset 436 // after an instruction that has a live CPSR def. 437 // The base register might also contain an argument for a function call. 438 InsertSub = true; 439 } 440 441 if (InsertSub) { 442 // An instruction above couldn't be updated, so insert a sub. 443 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true) 444 .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4) 445 .addImm(Pred).addReg(PredReg); 446 return; 447 } 448 449 if (MBBI->killsRegister(Base)) 450 // Register got killed. Stop updating. 451 return; 452 } 453 454 // End of block was reached. 455 if (MBB.succ_size() > 0) { 456 // FIXME: Because of a bug, live registers are sometimes missing from 457 // the successor blocks' live-in sets. This means we can't trust that 458 // information and *always* have to reset at the end of a block. 459 // See PR21029. 460 if (MBBI != MBB.end()) --MBBI; 461 AddDefaultT1CC( 462 BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true) 463 .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4) 464 .addImm(Pred).addReg(PredReg); 465 } 466 } 467 468 /// MergeOps - Create and insert a LDM or STM with Base as base register and 469 /// registers in Regs as the register operands that would be loaded / stored. 470 /// It returns true if the transformation is done. 471 bool 472 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 473 MachineBasicBlock::iterator MBBI, 474 int Offset, unsigned Base, bool BaseKill, 475 int Opcode, ARMCC::CondCodes Pred, 476 unsigned PredReg, unsigned Scratch, DebugLoc dl, 477 ArrayRef<std::pair<unsigned, bool> > Regs, 478 ArrayRef<unsigned> ImpDefs) { 479 // Only a single register to load / store. Don't bother. 480 unsigned NumRegs = Regs.size(); 481 if (NumRegs <= 1) 482 return false; 483 484 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. 485 // Compute liveness information for that register to make the decision. 486 bool SafeToClobberCPSR = !isThumb1 || 487 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) == 488 MachineBasicBlock::LQR_Dead); 489 490 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. 491 492 // Exception: If the base register is in the input reglist, Thumb1 LDM is 493 // non-writeback. 494 // It's also not possible to merge an STR of the base register in Thumb1. 495 if (isThumb1) 496 for (unsigned I = 0; I < NumRegs; ++I) 497 if (Base == Regs[I].first) { 498 if (Opcode == ARM::tLDRi) { 499 Writeback = false; 500 break; 501 } else if (Opcode == ARM::tSTRi) { 502 return false; 503 } 504 } 505 506 ARM_AM::AMSubMode Mode = ARM_AM::ia; 507 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. 508 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 509 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; 510 511 if (Offset == 4 && haveIBAndDA) { 512 Mode = ARM_AM::ib; 513 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { 514 Mode = ARM_AM::da; 515 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { 516 // VLDM/VSTM do not support DB mode without also updating the base reg. 517 Mode = ARM_AM::db; 518 } else if (Offset != 0) { 519 // Check if this is a supported opcode before inserting instructions to 520 // calculate a new base register. 521 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; 522 523 // If starting offset isn't zero, insert a MI to materialize a new base. 524 // But only do so if it is cost effective, i.e. merging more than two 525 // loads / stores. 526 if (NumRegs <= 2) 527 return false; 528 529 // On Thumb1, it's not worth materializing a new base register without 530 // clobbering the CPSR (i.e. not using ADDS/SUBS). 531 if (!SafeToClobberCPSR) 532 return false; 533 534 unsigned NewBase; 535 if (isi32Load(Opcode)) { 536 // If it is a load, then just use one of the destination register to 537 // use as the new base. 538 NewBase = Regs[NumRegs-1].first; 539 } else { 540 // Use the scratch register to use as a new base. 541 NewBase = Scratch; 542 if (NewBase == 0) 543 return false; 544 } 545 546 int BaseOpc = 547 isThumb2 ? ARM::t2ADDri : 548 (isThumb1 && Offset < 8) ? ARM::tADDi3 : 549 isThumb1 ? ARM::tADDi8 : ARM::ADDri; 550 551 if (Offset < 0) { 552 Offset = - Offset; 553 BaseOpc = 554 isThumb2 ? ARM::t2SUBri : 555 (isThumb1 && Offset < 8) ? ARM::tSUBi3 : 556 isThumb1 ? ARM::tSUBi8 : ARM::SUBri; 557 } 558 559 if (!TL->isLegalAddImmediate(Offset)) 560 // FIXME: Try add with register operand? 561 return false; // Probably not worth it then. 562 563 if (isThumb1) { 564 // Thumb1: depending on immediate size, use either 565 // ADDS NewBase, Base, #imm3 566 // or 567 // MOV NewBase, Base 568 // ADDS NewBase, #imm8. 569 if (Base != NewBase && Offset >= 8) { 570 // Need to insert a MOV to the new base first. 571 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && 572 !STI->hasV6Ops()) { 573 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr 574 if (Pred != ARMCC::AL) 575 return false; 576 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase) 577 .addReg(Base, getKillRegState(BaseKill)); 578 } else 579 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) 580 .addReg(Base, getKillRegState(BaseKill)) 581 .addImm(Pred).addReg(PredReg); 582 583 // Set up BaseKill and Base correctly to insert the ADDS/SUBS below. 584 Base = NewBase; 585 BaseKill = false; 586 } 587 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true) 588 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 589 .addImm(Pred).addReg(PredReg); 590 } else { 591 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 592 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 593 .addImm(Pred).addReg(PredReg).addReg(0); 594 } 595 Base = NewBase; 596 BaseKill = true; // New base is always killed straight away. 597 } 598 599 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 600 Opcode == ARM::VLDRD); 601 602 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with 603 // base register writeback. 604 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 605 if (!Opcode) return false; 606 607 // Check if a Thumb1 LDM/STM merge is safe. This is the case if: 608 // - There is no writeback (LDM of base register), 609 // - the base register is killed by the merged instruction, 610 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS 611 // to reset the base register. 612 // Otherwise, don't merge. 613 // It's safe to return here since the code to materialize a new base register 614 // above is also conditional on SafeToClobberCPSR. 615 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) 616 return false; 617 618 MachineInstrBuilder MIB; 619 620 if (Writeback) { 621 if (Opcode == ARM::tLDMIA) 622 // Update tLDMIA with writeback if necessary. 623 Opcode = ARM::tLDMIA_UPD; 624 625 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); 626 627 // Thumb1: we might need to set base writeback when building the MI. 628 MIB.addReg(Base, getDefRegState(true)) 629 .addReg(Base, getKillRegState(BaseKill)); 630 631 // The base isn't dead after a merged instruction with writeback. 632 // Insert a sub instruction after the newly formed instruction to reset. 633 if (!BaseKill) 634 UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg); 635 636 } else { 637 // No writeback, simply build the MachineInstr. 638 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); 639 MIB.addReg(Base, getKillRegState(BaseKill)); 640 } 641 642 MIB.addImm(Pred).addReg(PredReg); 643 644 for (unsigned i = 0; i != NumRegs; ++i) 645 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 646 | getKillRegState(Regs[i].second)); 647 648 // Add implicit defs for super-registers. 649 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i) 650 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine); 651 652 return true; 653 } 654 655 /// \brief Find all instructions using a given imp-def within a range. 656 /// 657 /// We are trying to combine a range of instructions, one of which (located at 658 /// position RangeBegin) implicitly defines a register. The final LDM/STM will 659 /// be placed at RangeEnd, and so any uses of this definition between RangeStart 660 /// and RangeEnd must be modified to use an undefined value. 661 /// 662 /// The live range continues until we find a second definition or one of the 663 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so 664 /// we must consider all uses and decide which are relevant in a second pass. 665 void ARMLoadStoreOpt::findUsesOfImpDef( 666 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps, 667 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) { 668 std::map<unsigned, MachineOperand *> Uses; 669 unsigned LastLivePos = RangeEnd; 670 671 // First we find all uses of this register with Position between RangeBegin 672 // and RangeEnd, any or all of these could be uses of a definition at 673 // RangeBegin. We also record the latest position a definition at RangeBegin 674 // would be considered live. 675 for (unsigned i = 0; i < MemOps.size(); ++i) { 676 MachineInstr &MI = *MemOps[i].MBBI; 677 unsigned MIPosition = MemOps[i].Position; 678 if (MIPosition <= RangeBegin || MIPosition > RangeEnd) 679 continue; 680 681 // If this instruction defines the register, then any later use will be of 682 // that definition rather than ours. 683 if (MI.definesRegister(DefReg)) 684 LastLivePos = std::min(LastLivePos, MIPosition); 685 686 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg); 687 if (!UseOp) 688 continue; 689 690 // If this instruction kills the register then (assuming liveness is 691 // correct when we start) we don't need to think about anything after here. 692 if (UseOp->isKill()) 693 LastLivePos = std::min(LastLivePos, MIPosition); 694 695 Uses[MIPosition] = UseOp; 696 } 697 698 // Now we traverse the list of all uses, and append the ones that actually use 699 // our definition to the requested list. 700 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(), 701 E = Uses.end(); 702 I != E; ++I) { 703 // List is sorted by position so once we've found one out of range there 704 // will be no more to consider. 705 if (I->first > LastLivePos) 706 break; 707 UsesOfImpDefs.push_back(I->second); 708 } 709 } 710 711 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 712 // success. 713 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 714 MemOpQueue &memOps, 715 unsigned memOpsBegin, unsigned memOpsEnd, 716 unsigned insertAfter, int Offset, 717 unsigned Base, bool BaseKill, 718 int Opcode, 719 ARMCC::CondCodes Pred, unsigned PredReg, 720 unsigned Scratch, 721 DebugLoc dl, 722 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { 723 // First calculate which of the registers should be killed by the merged 724 // instruction. 725 const unsigned insertPos = memOps[insertAfter].Position; 726 SmallSet<unsigned, 4> KilledRegs; 727 DenseMap<unsigned, unsigned> Killer; 728 for (unsigned i = 0, e = memOps.size(); i != e; ++i) { 729 if (i == memOpsBegin) { 730 i = memOpsEnd; 731 if (i == e) 732 break; 733 } 734 if (memOps[i].Position < insertPos && memOps[i].isKill) { 735 unsigned Reg = memOps[i].Reg; 736 KilledRegs.insert(Reg); 737 Killer[Reg] = i; 738 } 739 } 740 741 SmallVector<std::pair<unsigned, bool>, 8> Regs; 742 SmallVector<unsigned, 8> ImpDefs; 743 SmallVector<MachineOperand *, 8> UsesOfImpDefs; 744 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 745 unsigned Reg = memOps[i].Reg; 746 // If we are inserting the merged operation after an operation that 747 // uses the same register, make sure to transfer any kill flag. 748 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 749 Regs.push_back(std::make_pair(Reg, isKill)); 750 751 // Collect any implicit defs of super-registers. They must be preserved. 752 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) { 753 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead()) 754 continue; 755 unsigned DefReg = MO->getReg(); 756 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end()) 757 ImpDefs.push_back(DefReg); 758 759 // There may be other uses of the definition between this instruction and 760 // the eventual LDM/STM position. These should be marked undef if the 761 // merge takes place. 762 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position, 763 insertPos); 764 } 765 } 766 767 // Try to do the merge. 768 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 769 ++Loc; 770 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 771 Pred, PredReg, Scratch, dl, Regs, ImpDefs)) 772 return; 773 774 // Merge succeeded, update records. 775 Merges.push_back(std::prev(Loc)); 776 777 // In gathering loads together, we may have moved the imp-def of a register 778 // past one of its uses. This is OK, since we know better than the rest of 779 // LLVM what's OK with ARM loads and stores; but we still have to adjust the 780 // affected uses. 781 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(), 782 E = UsesOfImpDefs.end(); 783 I != E; ++I) 784 (*I)->setIsUndef(); 785 786 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 787 // Remove kill flags from any memops that come before insertPos. 788 if (Regs[i-memOpsBegin].second) { 789 unsigned Reg = Regs[i-memOpsBegin].first; 790 if (KilledRegs.count(Reg)) { 791 unsigned j = Killer[Reg]; 792 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true); 793 assert(Idx >= 0 && "Cannot find killing operand"); 794 memOps[j].MBBI->getOperand(Idx).setIsKill(false); 795 memOps[j].isKill = false; 796 } 797 memOps[i].isKill = true; 798 } 799 MBB.erase(memOps[i].MBBI); 800 // Update this memop to refer to the merged instruction. 801 // We may need to move kill flags again. 802 memOps[i].Merged = true; 803 memOps[i].MBBI = Merges.back(); 804 memOps[i].Position = insertPos; 805 } 806 807 // Update memOps offsets, since they may have been modified by MergeOps. 808 for (auto &MemOp : memOps) { 809 MemOp.Offset = getMemoryOpOffset(MemOp.MBBI); 810 } 811 } 812 813 /// MergeLDR_STR - Merge a number of load / store instructions into one or more 814 /// load / store multiple instructions. 815 void 816 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 817 unsigned Base, int Opcode, unsigned Size, 818 ARMCC::CondCodes Pred, unsigned PredReg, 819 unsigned Scratch, MemOpQueue &MemOps, 820 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { 821 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 822 int Offset = MemOps[SIndex].Offset; 823 int SOffset = Offset; 824 unsigned insertAfter = SIndex; 825 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 826 DebugLoc dl = Loc->getDebugLoc(); 827 const MachineOperand &PMO = Loc->getOperand(0); 828 unsigned PReg = PMO.getReg(); 829 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 830 unsigned Count = 1; 831 unsigned Limit = ~0U; 832 bool BaseKill = false; 833 // vldm / vstm limit are 32 for S variants, 16 for D variants. 834 835 switch (Opcode) { 836 default: break; 837 case ARM::VSTRS: 838 Limit = 32; 839 break; 840 case ARM::VSTRD: 841 Limit = 16; 842 break; 843 case ARM::VLDRD: 844 Limit = 16; 845 break; 846 case ARM::VLDRS: 847 Limit = 32; 848 break; 849 } 850 851 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 852 int NewOffset = MemOps[i].Offset; 853 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 854 unsigned Reg = MO.getReg(); 855 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 856 // Register numbers must be in ascending order. For VFP / NEON load and 857 // store multiples, the registers must also be consecutive and within the 858 // limit on the number of registers per instruction. 859 if (Reg != ARM::SP && 860 NewOffset == Offset + (int)Size && 861 ((isNotVFP && RegNum > PRegNum) || 862 ((Count < Limit) && RegNum == PRegNum+1)) && 863 // On Swift we don't want vldm/vstm to start with a odd register num 864 // because Q register unaligned vldm/vstm need more uops. 865 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) { 866 Offset += Size; 867 PRegNum = RegNum; 868 ++Count; 869 } else { 870 // Can't merge this in. Try merge the earlier ones first. 871 // We need to compute BaseKill here because the MemOps may have been 872 // reordered. 873 BaseKill = Loc->killsRegister(Base); 874 875 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base, 876 BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 877 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 878 MemOps, Merges); 879 return; 880 } 881 882 if (MemOps[i].Position > MemOps[insertAfter].Position) { 883 insertAfter = i; 884 Loc = MemOps[i].MBBI; 885 } 886 } 887 888 BaseKill = Loc->killsRegister(Base); 889 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 890 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 891 } 892 893 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 894 unsigned Bytes, unsigned Limit, 895 ARMCC::CondCodes Pred, unsigned PredReg) { 896 unsigned MyPredReg = 0; 897 if (!MI) 898 return false; 899 900 bool CheckCPSRDef = false; 901 switch (MI->getOpcode()) { 902 default: return false; 903 case ARM::tSUBi8: 904 case ARM::t2SUBri: 905 case ARM::SUBri: 906 CheckCPSRDef = true; 907 // fallthrough 908 case ARM::tSUBspi: 909 break; 910 } 911 912 // Make sure the offset fits in 8 bits. 913 if (Bytes == 0 || (Limit && Bytes >= Limit)) 914 return false; 915 916 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi || 917 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME 918 if (!(MI->getOperand(0).getReg() == Base && 919 MI->getOperand(1).getReg() == Base && 920 (MI->getOperand(2).getImm() * Scale) == Bytes && 921 getInstrPredicate(MI, MyPredReg) == Pred && 922 MyPredReg == PredReg)) 923 return false; 924 925 return CheckCPSRDef ? !definesCPSR(MI) : true; 926 } 927 928 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 929 unsigned Bytes, unsigned Limit, 930 ARMCC::CondCodes Pred, unsigned PredReg) { 931 unsigned MyPredReg = 0; 932 if (!MI) 933 return false; 934 935 bool CheckCPSRDef = false; 936 switch (MI->getOpcode()) { 937 default: return false; 938 case ARM::tADDi8: 939 case ARM::t2ADDri: 940 case ARM::ADDri: 941 CheckCPSRDef = true; 942 // fallthrough 943 case ARM::tADDspi: 944 break; 945 } 946 947 if (Bytes == 0 || (Limit && Bytes >= Limit)) 948 // Make sure the offset fits in 8 bits. 949 return false; 950 951 unsigned Scale = (MI->getOpcode() == ARM::tADDspi || 952 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME 953 if (!(MI->getOperand(0).getReg() == Base && 954 MI->getOperand(1).getReg() == Base && 955 (MI->getOperand(2).getImm() * Scale) == Bytes && 956 getInstrPredicate(MI, MyPredReg) == Pred && 957 MyPredReg == PredReg)) 958 return false; 959 960 return CheckCPSRDef ? !definesCPSR(MI) : true; 961 } 962 963 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 964 switch (MI->getOpcode()) { 965 default: return 0; 966 case ARM::LDRi12: 967 case ARM::STRi12: 968 case ARM::tLDRi: 969 case ARM::tSTRi: 970 case ARM::t2LDRi8: 971 case ARM::t2LDRi12: 972 case ARM::t2STRi8: 973 case ARM::t2STRi12: 974 case ARM::VLDRS: 975 case ARM::VSTRS: 976 return 4; 977 case ARM::VLDRD: 978 case ARM::VSTRD: 979 return 8; 980 case ARM::LDMIA: 981 case ARM::LDMDA: 982 case ARM::LDMDB: 983 case ARM::LDMIB: 984 case ARM::STMIA: 985 case ARM::STMDA: 986 case ARM::STMDB: 987 case ARM::STMIB: 988 case ARM::tLDMIA: 989 case ARM::tLDMIA_UPD: 990 case ARM::tSTMIA_UPD: 991 case ARM::t2LDMIA: 992 case ARM::t2LDMDB: 993 case ARM::t2STMIA: 994 case ARM::t2STMDB: 995 case ARM::VLDMSIA: 996 case ARM::VSTMSIA: 997 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 998 case ARM::VLDMDIA: 999 case ARM::VSTMDIA: 1000 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 1001 } 1002 } 1003 1004 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 1005 ARM_AM::AMSubMode Mode) { 1006 switch (Opc) { 1007 default: llvm_unreachable("Unhandled opcode!"); 1008 case ARM::LDMIA: 1009 case ARM::LDMDA: 1010 case ARM::LDMDB: 1011 case ARM::LDMIB: 1012 switch (Mode) { 1013 default: llvm_unreachable("Unhandled submode!"); 1014 case ARM_AM::ia: return ARM::LDMIA_UPD; 1015 case ARM_AM::ib: return ARM::LDMIB_UPD; 1016 case ARM_AM::da: return ARM::LDMDA_UPD; 1017 case ARM_AM::db: return ARM::LDMDB_UPD; 1018 } 1019 case ARM::STMIA: 1020 case ARM::STMDA: 1021 case ARM::STMDB: 1022 case ARM::STMIB: 1023 switch (Mode) { 1024 default: llvm_unreachable("Unhandled submode!"); 1025 case ARM_AM::ia: return ARM::STMIA_UPD; 1026 case ARM_AM::ib: return ARM::STMIB_UPD; 1027 case ARM_AM::da: return ARM::STMDA_UPD; 1028 case ARM_AM::db: return ARM::STMDB_UPD; 1029 } 1030 case ARM::t2LDMIA: 1031 case ARM::t2LDMDB: 1032 switch (Mode) { 1033 default: llvm_unreachable("Unhandled submode!"); 1034 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 1035 case ARM_AM::db: return ARM::t2LDMDB_UPD; 1036 } 1037 case ARM::t2STMIA: 1038 case ARM::t2STMDB: 1039 switch (Mode) { 1040 default: llvm_unreachable("Unhandled submode!"); 1041 case ARM_AM::ia: return ARM::t2STMIA_UPD; 1042 case ARM_AM::db: return ARM::t2STMDB_UPD; 1043 } 1044 case ARM::VLDMSIA: 1045 switch (Mode) { 1046 default: llvm_unreachable("Unhandled submode!"); 1047 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 1048 case ARM_AM::db: return ARM::VLDMSDB_UPD; 1049 } 1050 case ARM::VLDMDIA: 1051 switch (Mode) { 1052 default: llvm_unreachable("Unhandled submode!"); 1053 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 1054 case ARM_AM::db: return ARM::VLDMDDB_UPD; 1055 } 1056 case ARM::VSTMSIA: 1057 switch (Mode) { 1058 default: llvm_unreachable("Unhandled submode!"); 1059 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 1060 case ARM_AM::db: return ARM::VSTMSDB_UPD; 1061 } 1062 case ARM::VSTMDIA: 1063 switch (Mode) { 1064 default: llvm_unreachable("Unhandled submode!"); 1065 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 1066 case ARM_AM::db: return ARM::VSTMDDB_UPD; 1067 } 1068 } 1069 } 1070 1071 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 1072 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 1073 /// 1074 /// stmia rn, <ra, rb, rc> 1075 /// rn := rn + 4 * 3; 1076 /// => 1077 /// stmia rn!, <ra, rb, rc> 1078 /// 1079 /// rn := rn - 4 * 3; 1080 /// ldmia rn, <ra, rb, rc> 1081 /// => 1082 /// ldmdb rn!, <ra, rb, rc> 1083 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 1084 MachineBasicBlock::iterator MBBI, 1085 bool &Advance, 1086 MachineBasicBlock::iterator &I) { 1087 // Thumb1 is already using updating loads/stores. 1088 if (isThumb1) return false; 1089 1090 MachineInstr *MI = MBBI; 1091 unsigned Base = MI->getOperand(0).getReg(); 1092 bool BaseKill = MI->getOperand(0).isKill(); 1093 unsigned Bytes = getLSMultipleTransferSize(MI); 1094 unsigned PredReg = 0; 1095 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1096 int Opcode = MI->getOpcode(); 1097 DebugLoc dl = MI->getDebugLoc(); 1098 1099 // Can't use an updating ld/st if the base register is also a dest 1100 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 1101 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 1102 if (MI->getOperand(i).getReg() == Base) 1103 return false; 1104 1105 bool DoMerge = false; 1106 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); 1107 1108 // Try merging with the previous instruction. 1109 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1110 if (MBBI != BeginMBBI) { 1111 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1112 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 1113 --PrevMBBI; 1114 if (Mode == ARM_AM::ia && 1115 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 1116 Mode = ARM_AM::db; 1117 DoMerge = true; 1118 } else if (Mode == ARM_AM::ib && 1119 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 1120 Mode = ARM_AM::da; 1121 DoMerge = true; 1122 } 1123 if (DoMerge) 1124 MBB.erase(PrevMBBI); 1125 } 1126 1127 // Try merging with the next instruction. 1128 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1129 if (!DoMerge && MBBI != EndMBBI) { 1130 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1131 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1132 ++NextMBBI; 1133 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 1134 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 1135 DoMerge = true; 1136 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 1137 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 1138 DoMerge = true; 1139 } 1140 if (DoMerge) { 1141 if (NextMBBI == I) { 1142 Advance = true; 1143 ++I; 1144 } 1145 MBB.erase(NextMBBI); 1146 } 1147 } 1148 1149 if (!DoMerge) 1150 return false; 1151 1152 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 1153 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 1154 .addReg(Base, getDefRegState(true)) // WB base register 1155 .addReg(Base, getKillRegState(BaseKill)) 1156 .addImm(Pred).addReg(PredReg); 1157 1158 // Transfer the rest of operands. 1159 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 1160 MIB.addOperand(MI->getOperand(OpNum)); 1161 1162 // Transfer memoperands. 1163 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 1164 1165 MBB.erase(MBBI); 1166 return true; 1167 } 1168 1169 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 1170 ARM_AM::AddrOpc Mode) { 1171 switch (Opc) { 1172 case ARM::LDRi12: 1173 return ARM::LDR_PRE_IMM; 1174 case ARM::STRi12: 1175 return ARM::STR_PRE_IMM; 1176 case ARM::VLDRS: 1177 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1178 case ARM::VLDRD: 1179 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1180 case ARM::VSTRS: 1181 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1182 case ARM::VSTRD: 1183 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1184 case ARM::t2LDRi8: 1185 case ARM::t2LDRi12: 1186 return ARM::t2LDR_PRE; 1187 case ARM::t2STRi8: 1188 case ARM::t2STRi12: 1189 return ARM::t2STR_PRE; 1190 default: llvm_unreachable("Unhandled opcode!"); 1191 } 1192 } 1193 1194 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 1195 ARM_AM::AddrOpc Mode) { 1196 switch (Opc) { 1197 case ARM::LDRi12: 1198 return ARM::LDR_POST_IMM; 1199 case ARM::STRi12: 1200 return ARM::STR_POST_IMM; 1201 case ARM::VLDRS: 1202 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1203 case ARM::VLDRD: 1204 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1205 case ARM::VSTRS: 1206 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1207 case ARM::VSTRD: 1208 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1209 case ARM::t2LDRi8: 1210 case ARM::t2LDRi12: 1211 return ARM::t2LDR_POST; 1212 case ARM::t2STRi8: 1213 case ARM::t2STRi12: 1214 return ARM::t2STR_POST; 1215 default: llvm_unreachable("Unhandled opcode!"); 1216 } 1217 } 1218 1219 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 1220 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 1221 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 1222 MachineBasicBlock::iterator MBBI, 1223 const TargetInstrInfo *TII, 1224 bool &Advance, 1225 MachineBasicBlock::iterator &I) { 1226 // Thumb1 doesn't have updating LDR/STR. 1227 // FIXME: Use LDM/STM with single register instead. 1228 if (isThumb1) return false; 1229 1230 MachineInstr *MI = MBBI; 1231 unsigned Base = MI->getOperand(1).getReg(); 1232 bool BaseKill = MI->getOperand(1).isKill(); 1233 unsigned Bytes = getLSMultipleTransferSize(MI); 1234 int Opcode = MI->getOpcode(); 1235 DebugLoc dl = MI->getDebugLoc(); 1236 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 1237 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 1238 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 1239 if (isi32Load(Opcode) || isi32Store(Opcode)) 1240 if (MI->getOperand(2).getImm() != 0) 1241 return false; 1242 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 1243 return false; 1244 1245 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 1246 // Can't do the merge if the destination register is the same as the would-be 1247 // writeback register. 1248 if (MI->getOperand(0).getReg() == Base) 1249 return false; 1250 1251 unsigned PredReg = 0; 1252 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1253 bool DoMerge = false; 1254 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1255 unsigned NewOpc = 0; 1256 // AM2 - 12 bits, thumb2 - 8 bits. 1257 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 1258 1259 // Try merging with the previous instruction. 1260 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1261 if (MBBI != BeginMBBI) { 1262 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1263 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 1264 --PrevMBBI; 1265 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 1266 DoMerge = true; 1267 AddSub = ARM_AM::sub; 1268 } else if (!isAM5 && 1269 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 1270 DoMerge = true; 1271 } 1272 if (DoMerge) { 1273 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 1274 MBB.erase(PrevMBBI); 1275 } 1276 } 1277 1278 // Try merging with the next instruction. 1279 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1280 if (!DoMerge && MBBI != EndMBBI) { 1281 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1282 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1283 ++NextMBBI; 1284 if (!isAM5 && 1285 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 1286 DoMerge = true; 1287 AddSub = ARM_AM::sub; 1288 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 1289 DoMerge = true; 1290 } 1291 if (DoMerge) { 1292 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 1293 if (NextMBBI == I) { 1294 Advance = true; 1295 ++I; 1296 } 1297 MBB.erase(NextMBBI); 1298 } 1299 } 1300 1301 if (!DoMerge) 1302 return false; 1303 1304 if (isAM5) { 1305 // VLDM[SD]_UPD, VSTM[SD]_UPD 1306 // (There are no base-updating versions of VLDR/VSTR instructions, but the 1307 // updating load/store-multiple instructions can be used with only one 1308 // register.) 1309 MachineOperand &MO = MI->getOperand(0); 1310 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 1311 .addReg(Base, getDefRegState(true)) // WB base register 1312 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 1313 .addImm(Pred).addReg(PredReg) 1314 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 1315 getKillRegState(MO.isKill()))); 1316 } else if (isLd) { 1317 if (isAM2) { 1318 // LDR_PRE, LDR_POST 1319 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 1320 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1321 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1322 .addReg(Base, RegState::Define) 1323 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1324 } else { 1325 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1326 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1327 .addReg(Base, RegState::Define) 1328 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1329 } 1330 } else { 1331 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1332 // t2LDR_PRE, t2LDR_POST 1333 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1334 .addReg(Base, RegState::Define) 1335 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1336 } 1337 } else { 1338 MachineOperand &MO = MI->getOperand(0); 1339 // FIXME: post-indexed stores use am2offset_imm, which still encodes 1340 // the vestigal zero-reg offset register. When that's fixed, this clause 1341 // can be removed entirely. 1342 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 1343 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1344 // STR_PRE, STR_POST 1345 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 1346 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1347 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1348 } else { 1349 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1350 // t2STR_PRE, t2STR_POST 1351 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 1352 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1353 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1354 } 1355 } 1356 MBB.erase(MBBI); 1357 1358 return true; 1359 } 1360 1361 /// isMemoryOp - Returns true if instruction is a memory operation that this 1362 /// pass is capable of operating on. 1363 static bool isMemoryOp(const MachineInstr *MI) { 1364 // When no memory operands are present, conservatively assume unaligned, 1365 // volatile, unfoldable. 1366 if (!MI->hasOneMemOperand()) 1367 return false; 1368 1369 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1370 1371 // Don't touch volatile memory accesses - we may be changing their order. 1372 if (MMO->isVolatile()) 1373 return false; 1374 1375 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 1376 // not. 1377 if (MMO->getAlignment() < 4) 1378 return false; 1379 1380 // str <undef> could probably be eliminated entirely, but for now we just want 1381 // to avoid making a mess of it. 1382 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1383 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 1384 MI->getOperand(0).isUndef()) 1385 return false; 1386 1387 // Likewise don't mess with references to undefined addresses. 1388 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 1389 MI->getOperand(1).isUndef()) 1390 return false; 1391 1392 int Opcode = MI->getOpcode(); 1393 switch (Opcode) { 1394 default: break; 1395 case ARM::VLDRS: 1396 case ARM::VSTRS: 1397 return MI->getOperand(1).isReg(); 1398 case ARM::VLDRD: 1399 case ARM::VSTRD: 1400 return MI->getOperand(1).isReg(); 1401 case ARM::LDRi12: 1402 case ARM::STRi12: 1403 case ARM::tLDRi: 1404 case ARM::tSTRi: 1405 case ARM::t2LDRi8: 1406 case ARM::t2LDRi12: 1407 case ARM::t2STRi8: 1408 case ARM::t2STRi12: 1409 return MI->getOperand(1).isReg(); 1410 } 1411 return false; 1412 } 1413 1414 /// AdvanceRS - Advance register scavenger to just before the earliest memory 1415 /// op that is being merged. 1416 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 1417 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 1418 unsigned Position = MemOps[0].Position; 1419 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 1420 if (MemOps[i].Position < Position) { 1421 Position = MemOps[i].Position; 1422 Loc = MemOps[i].MBBI; 1423 } 1424 } 1425 1426 if (Loc != MBB.begin()) 1427 RS->forward(std::prev(Loc)); 1428 } 1429 1430 static void InsertLDR_STR(MachineBasicBlock &MBB, 1431 MachineBasicBlock::iterator &MBBI, 1432 int Offset, bool isDef, 1433 DebugLoc dl, unsigned NewOpc, 1434 unsigned Reg, bool RegDeadKill, bool RegUndef, 1435 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1436 bool OffKill, bool OffUndef, 1437 ARMCC::CondCodes Pred, unsigned PredReg, 1438 const TargetInstrInfo *TII, bool isT2) { 1439 if (isDef) { 1440 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1441 TII->get(NewOpc)) 1442 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1443 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1444 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1445 } else { 1446 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1447 TII->get(NewOpc)) 1448 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1449 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1450 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1451 } 1452 } 1453 1454 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1455 MachineBasicBlock::iterator &MBBI) { 1456 MachineInstr *MI = &*MBBI; 1457 unsigned Opcode = MI->getOpcode(); 1458 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 1459 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 1460 const MachineOperand &BaseOp = MI->getOperand(2); 1461 unsigned BaseReg = BaseOp.getReg(); 1462 unsigned EvenReg = MI->getOperand(0).getReg(); 1463 unsigned OddReg = MI->getOperand(1).getReg(); 1464 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1465 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1466 // ARM errata 602117: LDRD with base in list may result in incorrect base 1467 // register when interrupted or faulted. 1468 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3(); 1469 if (!Errata602117 && 1470 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)) 1471 return false; 1472 1473 MachineBasicBlock::iterator NewBBI = MBBI; 1474 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1475 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1476 bool EvenDeadKill = isLd ? 1477 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1478 bool EvenUndef = MI->getOperand(0).isUndef(); 1479 bool OddDeadKill = isLd ? 1480 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1481 bool OddUndef = MI->getOperand(1).isUndef(); 1482 bool BaseKill = BaseOp.isKill(); 1483 bool BaseUndef = BaseOp.isUndef(); 1484 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1485 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1486 int OffImm = getMemoryOpOffset(MI); 1487 unsigned PredReg = 0; 1488 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1489 1490 if (OddRegNum > EvenRegNum && OffImm == 0) { 1491 // Ascending register numbers and no offset. It's safe to change it to a 1492 // ldm or stm. 1493 unsigned NewOpc = (isLd) 1494 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1495 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1496 if (isLd) { 1497 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1498 .addReg(BaseReg, getKillRegState(BaseKill)) 1499 .addImm(Pred).addReg(PredReg) 1500 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1501 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1502 ++NumLDRD2LDM; 1503 } else { 1504 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1505 .addReg(BaseReg, getKillRegState(BaseKill)) 1506 .addImm(Pred).addReg(PredReg) 1507 .addReg(EvenReg, 1508 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1509 .addReg(OddReg, 1510 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1511 ++NumSTRD2STM; 1512 } 1513 NewBBI = std::prev(MBBI); 1514 } else { 1515 // Split into two instructions. 1516 unsigned NewOpc = (isLd) 1517 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1518 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1519 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1520 // so adjust and use t2LDRi12 here for that. 1521 unsigned NewOpc2 = (isLd) 1522 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1523 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1524 DebugLoc dl = MBBI->getDebugLoc(); 1525 // If this is a load and base register is killed, it may have been 1526 // re-defed by the load, make sure the first load does not clobber it. 1527 if (isLd && 1528 (BaseKill || OffKill) && 1529 (TRI->regsOverlap(EvenReg, BaseReg))) { 1530 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1531 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1532 OddReg, OddDeadKill, false, 1533 BaseReg, false, BaseUndef, false, OffUndef, 1534 Pred, PredReg, TII, isT2); 1535 NewBBI = std::prev(MBBI); 1536 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1537 EvenReg, EvenDeadKill, false, 1538 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1539 Pred, PredReg, TII, isT2); 1540 } else { 1541 if (OddReg == EvenReg && EvenDeadKill) { 1542 // If the two source operands are the same, the kill marker is 1543 // probably on the first one. e.g. 1544 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1545 EvenDeadKill = false; 1546 OddDeadKill = true; 1547 } 1548 // Never kill the base register in the first instruction. 1549 if (EvenReg == BaseReg) 1550 EvenDeadKill = false; 1551 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1552 EvenReg, EvenDeadKill, EvenUndef, 1553 BaseReg, false, BaseUndef, false, OffUndef, 1554 Pred, PredReg, TII, isT2); 1555 NewBBI = std::prev(MBBI); 1556 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1557 OddReg, OddDeadKill, OddUndef, 1558 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1559 Pred, PredReg, TII, isT2); 1560 } 1561 if (isLd) 1562 ++NumLDRD2LDR; 1563 else 1564 ++NumSTRD2STR; 1565 } 1566 1567 MBB.erase(MI); 1568 MBBI = NewBBI; 1569 return true; 1570 } 1571 return false; 1572 } 1573 1574 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 1575 /// ops of the same base and incrementing offset into LDM / STM ops. 1576 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1577 unsigned NumMerges = 0; 1578 unsigned NumMemOps = 0; 1579 MemOpQueue MemOps; 1580 unsigned CurrBase = 0; 1581 int CurrOpc = -1; 1582 unsigned CurrSize = 0; 1583 ARMCC::CondCodes CurrPred = ARMCC::AL; 1584 unsigned CurrPredReg = 0; 1585 unsigned Position = 0; 1586 SmallVector<MachineBasicBlock::iterator,4> Merges; 1587 1588 RS->enterBasicBlock(&MBB); 1589 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1590 while (MBBI != E) { 1591 if (FixInvalidRegPairOp(MBB, MBBI)) 1592 continue; 1593 1594 bool Advance = false; 1595 bool TryMerge = false; 1596 bool Clobber = false; 1597 1598 bool isMemOp = isMemoryOp(MBBI); 1599 if (isMemOp) { 1600 int Opcode = MBBI->getOpcode(); 1601 unsigned Size = getLSMultipleTransferSize(MBBI); 1602 const MachineOperand &MO = MBBI->getOperand(0); 1603 unsigned Reg = MO.getReg(); 1604 bool isKill = MO.isDef() ? false : MO.isKill(); 1605 unsigned Base = MBBI->getOperand(1).getReg(); 1606 unsigned PredReg = 0; 1607 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 1608 int Offset = getMemoryOpOffset(MBBI); 1609 // Watch out for: 1610 // r4 := ldr [r5] 1611 // r5 := ldr [r5, #4] 1612 // r6 := ldr [r5, #8] 1613 // 1614 // The second ldr has effectively broken the chain even though it 1615 // looks like the later ldr(s) use the same base register. Try to 1616 // merge the ldr's so far, including this one. But don't try to 1617 // combine the following ldr(s). 1618 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1619 1620 // Watch out for: 1621 // r4 := ldr [r0, #8] 1622 // r4 := ldr [r0, #4] 1623 // 1624 // The optimization may reorder the second ldr in front of the first 1625 // ldr, which violates write after write(WAW) dependence. The same as 1626 // str. Try to merge inst(s) already in MemOps. 1627 bool Overlap = false; 1628 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) { 1629 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) { 1630 Overlap = true; 1631 break; 1632 } 1633 } 1634 1635 if (CurrBase == 0 && !Clobber) { 1636 // Start of a new chain. 1637 CurrBase = Base; 1638 CurrOpc = Opcode; 1639 CurrSize = Size; 1640 CurrPred = Pred; 1641 CurrPredReg = PredReg; 1642 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 1643 ++NumMemOps; 1644 Advance = true; 1645 } else if (!Overlap) { 1646 if (Clobber) { 1647 TryMerge = true; 1648 Advance = true; 1649 } 1650 1651 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1652 // No need to match PredReg. 1653 // Continue adding to the queue. 1654 if (Offset > MemOps.back().Offset) { 1655 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, 1656 Position, MBBI)); 1657 ++NumMemOps; 1658 Advance = true; 1659 } else { 1660 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1661 I != E; ++I) { 1662 if (Offset < I->Offset) { 1663 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, 1664 Position, MBBI)); 1665 ++NumMemOps; 1666 Advance = true; 1667 break; 1668 } else if (Offset == I->Offset) { 1669 // Collision! This can't be merged! 1670 break; 1671 } 1672 } 1673 } 1674 } 1675 } 1676 } 1677 1678 if (MBBI->isDebugValue()) { 1679 ++MBBI; 1680 if (MBBI == E) 1681 // Reach the end of the block, try merging the memory instructions. 1682 TryMerge = true; 1683 } else if (Advance) { 1684 ++Position; 1685 ++MBBI; 1686 if (MBBI == E) 1687 // Reach the end of the block, try merging the memory instructions. 1688 TryMerge = true; 1689 } else { 1690 TryMerge = true; 1691 } 1692 1693 if (TryMerge) { 1694 if (NumMemOps > 1) { 1695 // Try to find a free register to use as a new base in case it's needed. 1696 // First advance to the instruction just before the start of the chain. 1697 AdvanceRS(MBB, MemOps); 1698 1699 // Find a scratch register. 1700 unsigned Scratch = 1701 RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass); 1702 1703 // Process the load / store instructions. 1704 RS->forward(std::prev(MBBI)); 1705 1706 // Merge ops. 1707 Merges.clear(); 1708 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1709 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1710 1711 // Try folding preceding/trailing base inc/dec into the generated 1712 // LDM/STM ops. 1713 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1714 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1715 ++NumMerges; 1716 NumMerges += Merges.size(); 1717 1718 // Try folding preceding/trailing base inc/dec into those load/store 1719 // that were not merged to form LDM/STM ops. 1720 for (unsigned i = 0; i != NumMemOps; ++i) 1721 if (!MemOps[i].Merged) 1722 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1723 ++NumMerges; 1724 1725 // RS may be pointing to an instruction that's deleted. 1726 RS->skipTo(std::prev(MBBI)); 1727 } else if (NumMemOps == 1) { 1728 // Try folding preceding/trailing base inc/dec into the single 1729 // load/store. 1730 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1731 ++NumMerges; 1732 RS->forward(std::prev(MBBI)); 1733 } 1734 } 1735 1736 CurrBase = 0; 1737 CurrOpc = -1; 1738 CurrSize = 0; 1739 CurrPred = ARMCC::AL; 1740 CurrPredReg = 0; 1741 if (NumMemOps) { 1742 MemOps.clear(); 1743 NumMemOps = 0; 1744 } 1745 1746 // If iterator hasn't been advanced and this is not a memory op, skip it. 1747 // It can't start a new chain anyway. 1748 if (!Advance && !isMemOp && MBBI != E) { 1749 ++Position; 1750 ++MBBI; 1751 } 1752 } 1753 } 1754 return NumMerges > 0; 1755 } 1756 1757 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 1758 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it 1759 /// directly restore the value of LR into pc. 1760 /// ldmfd sp!, {..., lr} 1761 /// bx lr 1762 /// or 1763 /// ldmfd sp!, {..., lr} 1764 /// mov pc, lr 1765 /// => 1766 /// ldmfd sp!, {..., pc} 1767 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1768 // Thumb1 LDM doesn't allow high registers. 1769 if (isThumb1) return false; 1770 if (MBB.empty()) return false; 1771 1772 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1773 if (MBBI != MBB.begin() && 1774 (MBBI->getOpcode() == ARM::BX_RET || 1775 MBBI->getOpcode() == ARM::tBX_RET || 1776 MBBI->getOpcode() == ARM::MOVPCLR)) { 1777 MachineInstr *PrevMI = std::prev(MBBI); 1778 unsigned Opcode = PrevMI->getOpcode(); 1779 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1780 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1781 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1782 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1783 if (MO.getReg() != ARM::LR) 1784 return false; 1785 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1786 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1787 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1788 PrevMI->setDesc(TII->get(NewOpc)); 1789 MO.setReg(ARM::PC); 1790 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); 1791 MBB.erase(MBBI); 1792 return true; 1793 } 1794 } 1795 return false; 1796 } 1797 1798 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1799 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1800 TL = STI->getTargetLowering(); 1801 AFI = Fn.getInfo<ARMFunctionInfo>(); 1802 TII = STI->getInstrInfo(); 1803 TRI = STI->getRegisterInfo(); 1804 RS = new RegScavenger(); 1805 isThumb2 = AFI->isThumb2Function(); 1806 isThumb1 = AFI->isThumbFunction() && !isThumb2; 1807 1808 bool Modified = false; 1809 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1810 ++MFI) { 1811 MachineBasicBlock &MBB = *MFI; 1812 Modified |= LoadStoreMultipleOpti(MBB); 1813 if (STI->hasV5TOps()) 1814 Modified |= MergeReturnIntoLDM(MBB); 1815 } 1816 1817 delete RS; 1818 return Modified; 1819 } 1820 1821 1822 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1823 /// load / stores from consecutive locations close to make it more 1824 /// likely they will be combined later. 1825 1826 namespace { 1827 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1828 static char ID; 1829 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1830 1831 const DataLayout *TD; 1832 const TargetInstrInfo *TII; 1833 const TargetRegisterInfo *TRI; 1834 const ARMSubtarget *STI; 1835 MachineRegisterInfo *MRI; 1836 MachineFunction *MF; 1837 1838 bool runOnMachineFunction(MachineFunction &Fn) override; 1839 1840 const char *getPassName() const override { 1841 return "ARM pre- register allocation load / store optimization pass"; 1842 } 1843 1844 private: 1845 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1846 unsigned &NewOpc, unsigned &EvenReg, 1847 unsigned &OddReg, unsigned &BaseReg, 1848 int &Offset, 1849 unsigned &PredReg, ARMCC::CondCodes &Pred, 1850 bool &isT2); 1851 bool RescheduleOps(MachineBasicBlock *MBB, 1852 SmallVectorImpl<MachineInstr *> &Ops, 1853 unsigned Base, bool isLd, 1854 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1855 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1856 }; 1857 char ARMPreAllocLoadStoreOpt::ID = 0; 1858 } 1859 1860 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1861 TD = Fn.getTarget().getDataLayout(); 1862 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1863 TII = STI->getInstrInfo(); 1864 TRI = STI->getRegisterInfo(); 1865 MRI = &Fn.getRegInfo(); 1866 MF = &Fn; 1867 1868 bool Modified = false; 1869 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1870 ++MFI) 1871 Modified |= RescheduleLoadStoreInstrs(MFI); 1872 1873 return Modified; 1874 } 1875 1876 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1877 MachineBasicBlock::iterator I, 1878 MachineBasicBlock::iterator E, 1879 SmallPtrSetImpl<MachineInstr*> &MemOps, 1880 SmallSet<unsigned, 4> &MemRegs, 1881 const TargetRegisterInfo *TRI) { 1882 // Are there stores / loads / calls between them? 1883 // FIXME: This is overly conservative. We should make use of alias information 1884 // some day. 1885 SmallSet<unsigned, 4> AddedRegPressure; 1886 while (++I != E) { 1887 if (I->isDebugValue() || MemOps.count(&*I)) 1888 continue; 1889 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1890 return false; 1891 if (isLd && I->mayStore()) 1892 return false; 1893 if (!isLd) { 1894 if (I->mayLoad()) 1895 return false; 1896 // It's not safe to move the first 'str' down. 1897 // str r1, [r0] 1898 // strh r5, [r0] 1899 // str r4, [r0, #+4] 1900 if (I->mayStore()) 1901 return false; 1902 } 1903 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1904 MachineOperand &MO = I->getOperand(j); 1905 if (!MO.isReg()) 1906 continue; 1907 unsigned Reg = MO.getReg(); 1908 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1909 return false; 1910 if (Reg != Base && !MemRegs.count(Reg)) 1911 AddedRegPressure.insert(Reg); 1912 } 1913 } 1914 1915 // Estimate register pressure increase due to the transformation. 1916 if (MemRegs.size() <= 4) 1917 // Ok if we are moving small number of instructions. 1918 return true; 1919 return AddedRegPressure.size() <= MemRegs.size() * 2; 1920 } 1921 1922 1923 /// Copy Op0 and Op1 operands into a new array assigned to MI. 1924 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1925 MachineInstr *Op1) { 1926 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1927 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1928 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1929 1930 MachineFunction *MF = MI->getParent()->getParent(); 1931 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1932 MachineSDNode::mmo_iterator MemEnd = 1933 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1934 MemEnd = 1935 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1936 MI->setMemRefs(MemBegin, MemEnd); 1937 } 1938 1939 bool 1940 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1941 DebugLoc &dl, 1942 unsigned &NewOpc, unsigned &EvenReg, 1943 unsigned &OddReg, unsigned &BaseReg, 1944 int &Offset, unsigned &PredReg, 1945 ARMCC::CondCodes &Pred, 1946 bool &isT2) { 1947 // Make sure we're allowed to generate LDRD/STRD. 1948 if (!STI->hasV5TEOps()) 1949 return false; 1950 1951 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1952 unsigned Scale = 1; 1953 unsigned Opcode = Op0->getOpcode(); 1954 if (Opcode == ARM::LDRi12) { 1955 NewOpc = ARM::LDRD; 1956 } else if (Opcode == ARM::STRi12) { 1957 NewOpc = ARM::STRD; 1958 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1959 NewOpc = ARM::t2LDRDi8; 1960 Scale = 4; 1961 isT2 = true; 1962 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1963 NewOpc = ARM::t2STRDi8; 1964 Scale = 4; 1965 isT2 = true; 1966 } else { 1967 return false; 1968 } 1969 1970 // Make sure the base address satisfies i64 ld / st alignment requirement. 1971 // At the moment, we ignore the memoryoperand's value. 1972 // If we want to use AliasAnalysis, we should check it accordingly. 1973 if (!Op0->hasOneMemOperand() || 1974 (*Op0->memoperands_begin())->isVolatile()) 1975 return false; 1976 1977 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1978 const Function *Func = MF->getFunction(); 1979 unsigned ReqAlign = STI->hasV6Ops() 1980 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1981 : 8; // Pre-v6 need 8-byte align 1982 if (Align < ReqAlign) 1983 return false; 1984 1985 // Then make sure the immediate offset fits. 1986 int OffImm = getMemoryOpOffset(Op0); 1987 if (isT2) { 1988 int Limit = (1 << 8) * Scale; 1989 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 1990 return false; 1991 Offset = OffImm; 1992 } else { 1993 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1994 if (OffImm < 0) { 1995 AddSub = ARM_AM::sub; 1996 OffImm = - OffImm; 1997 } 1998 int Limit = (1 << 8) * Scale; 1999 if (OffImm >= Limit || (OffImm & (Scale-1))) 2000 return false; 2001 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 2002 } 2003 EvenReg = Op0->getOperand(0).getReg(); 2004 OddReg = Op1->getOperand(0).getReg(); 2005 if (EvenReg == OddReg) 2006 return false; 2007 BaseReg = Op0->getOperand(1).getReg(); 2008 Pred = getInstrPredicate(Op0, PredReg); 2009 dl = Op0->getDebugLoc(); 2010 return true; 2011 } 2012 2013 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 2014 SmallVectorImpl<MachineInstr *> &Ops, 2015 unsigned Base, bool isLd, 2016 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 2017 bool RetVal = false; 2018 2019 // Sort by offset (in reverse order). 2020 std::sort(Ops.begin(), Ops.end(), 2021 [](const MachineInstr *LHS, const MachineInstr *RHS) { 2022 int LOffset = getMemoryOpOffset(LHS); 2023 int ROffset = getMemoryOpOffset(RHS); 2024 assert(LHS == RHS || LOffset != ROffset); 2025 return LOffset > ROffset; 2026 }); 2027 2028 // The loads / stores of the same base are in order. Scan them from first to 2029 // last and check for the following: 2030 // 1. Any def of base. 2031 // 2. Any gaps. 2032 while (Ops.size() > 1) { 2033 unsigned FirstLoc = ~0U; 2034 unsigned LastLoc = 0; 2035 MachineInstr *FirstOp = nullptr; 2036 MachineInstr *LastOp = nullptr; 2037 int LastOffset = 0; 2038 unsigned LastOpcode = 0; 2039 unsigned LastBytes = 0; 2040 unsigned NumMove = 0; 2041 for (int i = Ops.size() - 1; i >= 0; --i) { 2042 MachineInstr *Op = Ops[i]; 2043 unsigned Loc = MI2LocMap[Op]; 2044 if (Loc <= FirstLoc) { 2045 FirstLoc = Loc; 2046 FirstOp = Op; 2047 } 2048 if (Loc >= LastLoc) { 2049 LastLoc = Loc; 2050 LastOp = Op; 2051 } 2052 2053 unsigned LSMOpcode 2054 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 2055 if (LastOpcode && LSMOpcode != LastOpcode) 2056 break; 2057 2058 int Offset = getMemoryOpOffset(Op); 2059 unsigned Bytes = getLSMultipleTransferSize(Op); 2060 if (LastBytes) { 2061 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 2062 break; 2063 } 2064 LastOffset = Offset; 2065 LastBytes = Bytes; 2066 LastOpcode = LSMOpcode; 2067 if (++NumMove == 8) // FIXME: Tune this limit. 2068 break; 2069 } 2070 2071 if (NumMove <= 1) 2072 Ops.pop_back(); 2073 else { 2074 SmallPtrSet<MachineInstr*, 4> MemOps; 2075 SmallSet<unsigned, 4> MemRegs; 2076 for (int i = NumMove-1; i >= 0; --i) { 2077 MemOps.insert(Ops[i]); 2078 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 2079 } 2080 2081 // Be conservative, if the instructions are too far apart, don't 2082 // move them. We want to limit the increase of register pressure. 2083 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 2084 if (DoMove) 2085 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 2086 MemOps, MemRegs, TRI); 2087 if (!DoMove) { 2088 for (unsigned i = 0; i != NumMove; ++i) 2089 Ops.pop_back(); 2090 } else { 2091 // This is the new location for the loads / stores. 2092 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 2093 while (InsertPos != MBB->end() 2094 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 2095 ++InsertPos; 2096 2097 // If we are moving a pair of loads / stores, see if it makes sense 2098 // to try to allocate a pair of registers that can form register pairs. 2099 MachineInstr *Op0 = Ops.back(); 2100 MachineInstr *Op1 = Ops[Ops.size()-2]; 2101 unsigned EvenReg = 0, OddReg = 0; 2102 unsigned BaseReg = 0, PredReg = 0; 2103 ARMCC::CondCodes Pred = ARMCC::AL; 2104 bool isT2 = false; 2105 unsigned NewOpc = 0; 2106 int Offset = 0; 2107 DebugLoc dl; 2108 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 2109 EvenReg, OddReg, BaseReg, 2110 Offset, PredReg, Pred, isT2)) { 2111 Ops.pop_back(); 2112 Ops.pop_back(); 2113 2114 const MCInstrDesc &MCID = TII->get(NewOpc); 2115 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 2116 MRI->constrainRegClass(EvenReg, TRC); 2117 MRI->constrainRegClass(OddReg, TRC); 2118 2119 // Form the pair instruction. 2120 if (isLd) { 2121 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2122 .addReg(EvenReg, RegState::Define) 2123 .addReg(OddReg, RegState::Define) 2124 .addReg(BaseReg); 2125 // FIXME: We're converting from LDRi12 to an insn that still 2126 // uses addrmode2, so we need an explicit offset reg. It should 2127 // always by reg0 since we're transforming LDRi12s. 2128 if (!isT2) 2129 MIB.addReg(0); 2130 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2131 concatenateMemOperands(MIB, Op0, Op1); 2132 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2133 ++NumLDRDFormed; 2134 } else { 2135 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2136 .addReg(EvenReg) 2137 .addReg(OddReg) 2138 .addReg(BaseReg); 2139 // FIXME: We're converting from LDRi12 to an insn that still 2140 // uses addrmode2, so we need an explicit offset reg. It should 2141 // always by reg0 since we're transforming STRi12s. 2142 if (!isT2) 2143 MIB.addReg(0); 2144 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2145 concatenateMemOperands(MIB, Op0, Op1); 2146 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2147 ++NumSTRDFormed; 2148 } 2149 MBB->erase(Op0); 2150 MBB->erase(Op1); 2151 2152 // Add register allocation hints to form register pairs. 2153 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 2154 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 2155 } else { 2156 for (unsigned i = 0; i != NumMove; ++i) { 2157 MachineInstr *Op = Ops.back(); 2158 Ops.pop_back(); 2159 MBB->splice(InsertPos, MBB, Op); 2160 } 2161 } 2162 2163 NumLdStMoved += NumMove; 2164 RetVal = true; 2165 } 2166 } 2167 } 2168 2169 return RetVal; 2170 } 2171 2172 bool 2173 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 2174 bool RetVal = false; 2175 2176 DenseMap<MachineInstr*, unsigned> MI2LocMap; 2177 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 2178 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 2179 SmallVector<unsigned, 4> LdBases; 2180 SmallVector<unsigned, 4> StBases; 2181 2182 unsigned Loc = 0; 2183 MachineBasicBlock::iterator MBBI = MBB->begin(); 2184 MachineBasicBlock::iterator E = MBB->end(); 2185 while (MBBI != E) { 2186 for (; MBBI != E; ++MBBI) { 2187 MachineInstr *MI = MBBI; 2188 if (MI->isCall() || MI->isTerminator()) { 2189 // Stop at barriers. 2190 ++MBBI; 2191 break; 2192 } 2193 2194 if (!MI->isDebugValue()) 2195 MI2LocMap[MI] = ++Loc; 2196 2197 if (!isMemoryOp(MI)) 2198 continue; 2199 unsigned PredReg = 0; 2200 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 2201 continue; 2202 2203 int Opc = MI->getOpcode(); 2204 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 2205 unsigned Base = MI->getOperand(1).getReg(); 2206 int Offset = getMemoryOpOffset(MI); 2207 2208 bool StopHere = false; 2209 if (isLd) { 2210 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2211 Base2LdsMap.find(Base); 2212 if (BI != Base2LdsMap.end()) { 2213 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2214 if (Offset == getMemoryOpOffset(BI->second[i])) { 2215 StopHere = true; 2216 break; 2217 } 2218 } 2219 if (!StopHere) 2220 BI->second.push_back(MI); 2221 } else { 2222 Base2LdsMap[Base].push_back(MI); 2223 LdBases.push_back(Base); 2224 } 2225 } else { 2226 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2227 Base2StsMap.find(Base); 2228 if (BI != Base2StsMap.end()) { 2229 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2230 if (Offset == getMemoryOpOffset(BI->second[i])) { 2231 StopHere = true; 2232 break; 2233 } 2234 } 2235 if (!StopHere) 2236 BI->second.push_back(MI); 2237 } else { 2238 Base2StsMap[Base].push_back(MI); 2239 StBases.push_back(Base); 2240 } 2241 } 2242 2243 if (StopHere) { 2244 // Found a duplicate (a base+offset combination that's seen earlier). 2245 // Backtrack. 2246 --Loc; 2247 break; 2248 } 2249 } 2250 2251 // Re-schedule loads. 2252 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 2253 unsigned Base = LdBases[i]; 2254 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 2255 if (Lds.size() > 1) 2256 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 2257 } 2258 2259 // Re-schedule stores. 2260 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 2261 unsigned Base = StBases[i]; 2262 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 2263 if (Sts.size() > 1) 2264 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 2265 } 2266 2267 if (MBBI != E) { 2268 Base2LdsMap.clear(); 2269 Base2StsMap.clear(); 2270 LdBases.clear(); 2271 StBases.clear(); 2272 } 2273 } 2274 2275 return RetVal; 2276 } 2277 2278 2279 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store 2280 /// optimization pass. 2281 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 2282 if (PreAlloc) 2283 return new ARMPreAllocLoadStoreOpt(); 2284 return new ARMLoadStoreOpt(); 2285 } 2286