1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This file contains a pass that performs load / store related peephole 11 /// optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ARM.h" 16 #include "ARMBaseInstrInfo.h" 17 #include "ARMBaseRegisterInfo.h" 18 #include "ARMISelLowering.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMSubtarget.h" 21 #include "MCTargetDesc/ARMAddressingModes.h" 22 #include "ThumbRegisterInfo.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallSet.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/Statistic.h" 29 #include "llvm/CodeGen/MachineBasicBlock.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include "llvm/CodeGen/MachineInstr.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/RegisterScavenging.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/IR/DataLayout.h" 37 #include "llvm/IR/DerivedTypes.h" 38 #include "llvm/IR/Function.h" 39 #include "llvm/Support/Debug.h" 40 #include "llvm/Support/ErrorHandling.h" 41 #include "llvm/Support/raw_ostream.h" 42 #include "llvm/Target/TargetInstrInfo.h" 43 #include "llvm/Target/TargetMachine.h" 44 #include "llvm/Target/TargetRegisterInfo.h" 45 using namespace llvm; 46 47 #define DEBUG_TYPE "arm-ldst-opt" 48 49 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 50 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 51 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 52 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 53 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 54 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 55 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 56 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 57 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 58 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 59 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 60 61 namespace { 62 /// Post- register allocation pass the combine load / store instructions to 63 /// form ldm / stm instructions. 64 struct ARMLoadStoreOpt : public MachineFunctionPass { 65 static char ID; 66 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 67 68 const TargetInstrInfo *TII; 69 const TargetRegisterInfo *TRI; 70 const ARMSubtarget *STI; 71 const TargetLowering *TL; 72 ARMFunctionInfo *AFI; 73 RegScavenger *RS; 74 bool isThumb1, isThumb2; 75 76 bool runOnMachineFunction(MachineFunction &Fn) override; 77 78 const char *getPassName() const override { 79 return "ARM load / store optimization pass"; 80 } 81 82 private: 83 struct MemOpQueueEntry { 84 int Offset; 85 unsigned Reg; 86 bool isKill; 87 unsigned Position; 88 MachineBasicBlock::iterator MBBI; 89 bool Merged; 90 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 91 MachineBasicBlock::iterator i) 92 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 93 }; 94 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 95 typedef MemOpQueue::iterator MemOpQueueIter; 96 97 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, 98 const MemOpQueue &MemOps, unsigned DefReg, 99 unsigned RangeBegin, unsigned RangeEnd); 100 void UpdateBaseRegUses(MachineBasicBlock &MBB, 101 MachineBasicBlock::iterator MBBI, 102 DebugLoc dl, unsigned Base, unsigned WordOffset, 103 ARMCC::CondCodes Pred, unsigned PredReg); 104 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 105 int Offset, unsigned Base, bool BaseKill, unsigned Opcode, 106 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 107 DebugLoc dl, 108 ArrayRef<std::pair<unsigned, bool> > Regs, 109 ArrayRef<unsigned> ImpDefs); 110 void MergeOpsUpdate(MachineBasicBlock &MBB, 111 MemOpQueue &MemOps, 112 unsigned memOpsBegin, 113 unsigned memOpsEnd, 114 unsigned insertAfter, 115 int Offset, 116 unsigned Base, 117 bool BaseKill, 118 unsigned Opcode, 119 ARMCC::CondCodes Pred, 120 unsigned PredReg, 121 unsigned Scratch, 122 DebugLoc dl, 123 SmallVectorImpl<MachineBasicBlock::iterator> &Merges); 124 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 125 unsigned Opcode, unsigned Size, 126 ARMCC::CondCodes Pred, unsigned PredReg, 127 unsigned Scratch, MemOpQueue &MemOps, 128 SmallVectorImpl<MachineBasicBlock::iterator> &Merges); 129 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 130 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 131 MachineBasicBlock::iterator &MBBI); 132 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 133 MachineBasicBlock::iterator MBBI, 134 const TargetInstrInfo *TII, 135 bool &Advance, 136 MachineBasicBlock::iterator &I); 137 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 138 MachineBasicBlock::iterator MBBI, 139 bool &Advance, 140 MachineBasicBlock::iterator &I); 141 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 142 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 143 }; 144 char ARMLoadStoreOpt::ID = 0; 145 } 146 147 static bool definesCPSR(const MachineInstr *MI) { 148 for (const auto &MO : MI->operands()) { 149 if (!MO.isReg()) 150 continue; 151 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 152 // If the instruction has live CPSR def, then it's not safe to fold it 153 // into load / store. 154 return true; 155 } 156 157 return false; 158 } 159 160 static int getMemoryOpOffset(const MachineInstr *MI) { 161 unsigned Opcode = MI->getOpcode(); 162 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 163 unsigned NumOperands = MI->getDesc().getNumOperands(); 164 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 165 166 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 167 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 168 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 169 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 170 return OffField; 171 172 // Thumb1 immediate offsets are scaled by 4 173 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi || 174 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) 175 return OffField * 4; 176 177 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 178 : ARM_AM::getAM5Offset(OffField) * 4; 179 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField) 180 : ARM_AM::getAM5Op(OffField); 181 182 if (Op == ARM_AM::sub) 183 return -Offset; 184 185 return Offset; 186 } 187 188 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) { 189 switch (Opcode) { 190 default: llvm_unreachable("Unhandled opcode!"); 191 case ARM::LDRi12: 192 ++NumLDMGened; 193 switch (Mode) { 194 default: llvm_unreachable("Unhandled submode!"); 195 case ARM_AM::ia: return ARM::LDMIA; 196 case ARM_AM::da: return ARM::LDMDA; 197 case ARM_AM::db: return ARM::LDMDB; 198 case ARM_AM::ib: return ARM::LDMIB; 199 } 200 case ARM::STRi12: 201 ++NumSTMGened; 202 switch (Mode) { 203 default: llvm_unreachable("Unhandled submode!"); 204 case ARM_AM::ia: return ARM::STMIA; 205 case ARM_AM::da: return ARM::STMDA; 206 case ARM_AM::db: return ARM::STMDB; 207 case ARM_AM::ib: return ARM::STMIB; 208 } 209 case ARM::tLDRi: 210 case ARM::tLDRspi: 211 // tLDMIA is writeback-only - unless the base register is in the input 212 // reglist. 213 ++NumLDMGened; 214 switch (Mode) { 215 default: llvm_unreachable("Unhandled submode!"); 216 case ARM_AM::ia: return ARM::tLDMIA; 217 } 218 case ARM::tSTRi: 219 case ARM::tSTRspi: 220 // There is no non-writeback tSTMIA either. 221 ++NumSTMGened; 222 switch (Mode) { 223 default: llvm_unreachable("Unhandled submode!"); 224 case ARM_AM::ia: return ARM::tSTMIA_UPD; 225 } 226 case ARM::t2LDRi8: 227 case ARM::t2LDRi12: 228 ++NumLDMGened; 229 switch (Mode) { 230 default: llvm_unreachable("Unhandled submode!"); 231 case ARM_AM::ia: return ARM::t2LDMIA; 232 case ARM_AM::db: return ARM::t2LDMDB; 233 } 234 case ARM::t2STRi8: 235 case ARM::t2STRi12: 236 ++NumSTMGened; 237 switch (Mode) { 238 default: llvm_unreachable("Unhandled submode!"); 239 case ARM_AM::ia: return ARM::t2STMIA; 240 case ARM_AM::db: return ARM::t2STMDB; 241 } 242 case ARM::VLDRS: 243 ++NumVLDMGened; 244 switch (Mode) { 245 default: llvm_unreachable("Unhandled submode!"); 246 case ARM_AM::ia: return ARM::VLDMSIA; 247 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 248 } 249 case ARM::VSTRS: 250 ++NumVSTMGened; 251 switch (Mode) { 252 default: llvm_unreachable("Unhandled submode!"); 253 case ARM_AM::ia: return ARM::VSTMSIA; 254 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 255 } 256 case ARM::VLDRD: 257 ++NumVLDMGened; 258 switch (Mode) { 259 default: llvm_unreachable("Unhandled submode!"); 260 case ARM_AM::ia: return ARM::VLDMDIA; 261 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 262 } 263 case ARM::VSTRD: 264 ++NumVSTMGened; 265 switch (Mode) { 266 default: llvm_unreachable("Unhandled submode!"); 267 case ARM_AM::ia: return ARM::VSTMDIA; 268 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 269 } 270 } 271 } 272 273 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) { 274 switch (Opcode) { 275 default: llvm_unreachable("Unhandled opcode!"); 276 case ARM::LDMIA_RET: 277 case ARM::LDMIA: 278 case ARM::LDMIA_UPD: 279 case ARM::STMIA: 280 case ARM::STMIA_UPD: 281 case ARM::tLDMIA: 282 case ARM::tLDMIA_UPD: 283 case ARM::tSTMIA_UPD: 284 case ARM::t2LDMIA_RET: 285 case ARM::t2LDMIA: 286 case ARM::t2LDMIA_UPD: 287 case ARM::t2STMIA: 288 case ARM::t2STMIA_UPD: 289 case ARM::VLDMSIA: 290 case ARM::VLDMSIA_UPD: 291 case ARM::VSTMSIA: 292 case ARM::VSTMSIA_UPD: 293 case ARM::VLDMDIA: 294 case ARM::VLDMDIA_UPD: 295 case ARM::VSTMDIA: 296 case ARM::VSTMDIA_UPD: 297 return ARM_AM::ia; 298 299 case ARM::LDMDA: 300 case ARM::LDMDA_UPD: 301 case ARM::STMDA: 302 case ARM::STMDA_UPD: 303 return ARM_AM::da; 304 305 case ARM::LDMDB: 306 case ARM::LDMDB_UPD: 307 case ARM::STMDB: 308 case ARM::STMDB_UPD: 309 case ARM::t2LDMDB: 310 case ARM::t2LDMDB_UPD: 311 case ARM::t2STMDB: 312 case ARM::t2STMDB_UPD: 313 case ARM::VLDMSDB_UPD: 314 case ARM::VSTMSDB_UPD: 315 case ARM::VLDMDDB_UPD: 316 case ARM::VSTMDDB_UPD: 317 return ARM_AM::db; 318 319 case ARM::LDMIB: 320 case ARM::LDMIB_UPD: 321 case ARM::STMIB: 322 case ARM::STMIB_UPD: 323 return ARM_AM::ib; 324 } 325 } 326 327 static bool isT1i32Load(unsigned Opc) { 328 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi; 329 } 330 331 static bool isT2i32Load(unsigned Opc) { 332 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 333 } 334 335 static bool isi32Load(unsigned Opc) { 336 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; 337 } 338 339 static bool isT1i32Store(unsigned Opc) { 340 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi; 341 } 342 343 static bool isT2i32Store(unsigned Opc) { 344 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 345 } 346 347 static bool isi32Store(unsigned Opc) { 348 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); 349 } 350 351 static unsigned getImmScale(unsigned Opc) { 352 switch (Opc) { 353 default: llvm_unreachable("Unhandled opcode!"); 354 case ARM::tLDRi: 355 case ARM::tSTRi: 356 case ARM::tLDRspi: 357 case ARM::tSTRspi: 358 return 1; 359 case ARM::tLDRHi: 360 case ARM::tSTRHi: 361 return 2; 362 case ARM::tLDRBi: 363 case ARM::tSTRBi: 364 return 4; 365 } 366 } 367 368 /// Update future uses of the base register with the offset introduced 369 /// due to writeback. This function only works on Thumb1. 370 void 371 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, 372 MachineBasicBlock::iterator MBBI, 373 DebugLoc dl, unsigned Base, 374 unsigned WordOffset, 375 ARMCC::CondCodes Pred, unsigned PredReg) { 376 assert(isThumb1 && "Can only update base register uses for Thumb1!"); 377 // Start updating any instructions with immediate offsets. Insert a SUB before 378 // the first non-updateable instruction (if any). 379 for (; MBBI != MBB.end(); ++MBBI) { 380 bool InsertSub = false; 381 unsigned Opc = MBBI->getOpcode(); 382 383 if (MBBI->readsRegister(Base)) { 384 int Offset; 385 bool IsLoad = 386 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi; 387 bool IsStore = 388 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi; 389 390 if (IsLoad || IsStore) { 391 // Loads and stores with immediate offsets can be updated, but only if 392 // the new offset isn't negative. 393 // The MachineOperand containing the offset immediate is the last one 394 // before predicates. 395 MachineOperand &MO = 396 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 397 // The offsets are scaled by 1, 2 or 4 depending on the Opcode. 398 Offset = MO.getImm() - WordOffset * getImmScale(Opc); 399 400 // If storing the base register, it needs to be reset first. 401 unsigned InstrSrcReg = MBBI->getOperand(0).getReg(); 402 403 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) 404 MO.setImm(Offset); 405 else 406 InsertSub = true; 407 408 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && 409 !definesCPSR(MBBI)) { 410 // SUBS/ADDS using this register, with a dead def of the CPSR. 411 // Merge it with the update; if the merged offset is too large, 412 // insert a new sub instead. 413 MachineOperand &MO = 414 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 415 Offset = (Opc == ARM::tSUBi8) ? 416 MO.getImm() + WordOffset * 4 : 417 MO.getImm() - WordOffset * 4 ; 418 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) { 419 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if 420 // Offset == 0. 421 MO.setImm(Offset); 422 // The base register has now been reset, so exit early. 423 return; 424 } else { 425 InsertSub = true; 426 } 427 428 } else { 429 // Can't update the instruction. 430 InsertSub = true; 431 } 432 433 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { 434 // Since SUBS sets the condition flags, we can't place the base reset 435 // after an instruction that has a live CPSR def. 436 // The base register might also contain an argument for a function call. 437 InsertSub = true; 438 } 439 440 if (InsertSub) { 441 // An instruction above couldn't be updated, so insert a sub. 442 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true) 443 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 444 return; 445 } 446 447 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base)) 448 // Register got killed. Stop updating. 449 return; 450 } 451 452 // End of block was reached. 453 if (MBB.succ_size() > 0) { 454 // FIXME: Because of a bug, live registers are sometimes missing from 455 // the successor blocks' live-in sets. This means we can't trust that 456 // information and *always* have to reset at the end of a block. 457 // See PR21029. 458 if (MBBI != MBB.end()) --MBBI; 459 AddDefaultT1CC( 460 BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true) 461 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 462 } 463 } 464 465 /// Create and insert a LDM or STM with Base as base register and registers in 466 /// Regs as the register operands that would be loaded / stored. It returns 467 /// true if the transformation is done. 468 bool 469 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 470 MachineBasicBlock::iterator MBBI, 471 int Offset, unsigned Base, bool BaseKill, 472 unsigned Opcode, ARMCC::CondCodes Pred, 473 unsigned PredReg, unsigned Scratch, DebugLoc dl, 474 ArrayRef<std::pair<unsigned, bool> > Regs, 475 ArrayRef<unsigned> ImpDefs) { 476 // Only a single register to load / store. Don't bother. 477 unsigned NumRegs = Regs.size(); 478 if (NumRegs <= 1) 479 return false; 480 481 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. 482 // Compute liveness information for that register to make the decision. 483 bool SafeToClobberCPSR = !isThumb1 || 484 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) == 485 MachineBasicBlock::LQR_Dead); 486 487 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. 488 489 // Exception: If the base register is in the input reglist, Thumb1 LDM is 490 // non-writeback. 491 // It's also not possible to merge an STR of the base register in Thumb1. 492 if (isThumb1) 493 for (const std::pair<unsigned, bool> &R : Regs) 494 if (Base == R.first) { 495 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); 496 if (Opcode == ARM::tLDRi) { 497 Writeback = false; 498 break; 499 } else if (Opcode == ARM::tSTRi) { 500 return false; 501 } 502 } 503 504 ARM_AM::AMSubMode Mode = ARM_AM::ia; 505 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. 506 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 507 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; 508 509 if (Offset == 4 && haveIBAndDA) { 510 Mode = ARM_AM::ib; 511 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { 512 Mode = ARM_AM::da; 513 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { 514 // VLDM/VSTM do not support DB mode without also updating the base reg. 515 Mode = ARM_AM::db; 516 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { 517 // Check if this is a supported opcode before inserting instructions to 518 // calculate a new base register. 519 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; 520 521 // If starting offset isn't zero, insert a MI to materialize a new base. 522 // But only do so if it is cost effective, i.e. merging more than two 523 // loads / stores. 524 if (NumRegs <= 2) 525 return false; 526 527 // On Thumb1, it's not worth materializing a new base register without 528 // clobbering the CPSR (i.e. not using ADDS/SUBS). 529 if (!SafeToClobberCPSR) 530 return false; 531 532 unsigned NewBase; 533 if (isi32Load(Opcode)) { 534 // If it is a load, then just use one of the destination register to 535 // use as the new base. 536 NewBase = Regs[NumRegs-1].first; 537 } else { 538 // Use the scratch register to use as a new base. 539 NewBase = Scratch; 540 if (NewBase == 0) 541 return false; 542 } 543 544 int BaseOpc = 545 isThumb2 ? ARM::t2ADDri : 546 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi : 547 (isThumb1 && Offset < 8) ? ARM::tADDi3 : 548 isThumb1 ? ARM::tADDi8 : ARM::ADDri; 549 550 if (Offset < 0) { 551 Offset = - Offset; 552 BaseOpc = 553 isThumb2 ? ARM::t2SUBri : 554 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 : 555 isThumb1 ? ARM::tSUBi8 : ARM::SUBri; 556 } 557 558 if (!TL->isLegalAddImmediate(Offset)) 559 // FIXME: Try add with register operand? 560 return false; // Probably not worth it then. 561 562 if (isThumb1) { 563 // Thumb1: depending on immediate size, use either 564 // ADDS NewBase, Base, #imm3 565 // or 566 // MOV NewBase, Base 567 // ADDS NewBase, #imm8. 568 if (Base != NewBase && 569 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) { 570 // Need to insert a MOV to the new base first. 571 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && 572 !STI->hasV6Ops()) { 573 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr 574 if (Pred != ARMCC::AL) 575 return false; 576 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase) 577 .addReg(Base, getKillRegState(BaseKill)); 578 } else 579 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) 580 .addReg(Base, getKillRegState(BaseKill)) 581 .addImm(Pred).addReg(PredReg); 582 583 // Set up BaseKill and Base correctly to insert the ADDS/SUBS below. 584 Base = NewBase; 585 BaseKill = false; 586 } 587 if (BaseOpc == ARM::tADDrSPi) { 588 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); 589 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 590 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4) 591 .addImm(Pred).addReg(PredReg); 592 } else 593 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true) 594 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 595 .addImm(Pred).addReg(PredReg); 596 } else { 597 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 598 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 599 .addImm(Pred).addReg(PredReg).addReg(0); 600 } 601 Base = NewBase; 602 BaseKill = true; // New base is always killed straight away. 603 } 604 605 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 606 Opcode == ARM::VLDRD); 607 608 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with 609 // base register writeback. 610 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 611 if (!Opcode) return false; 612 613 // Check if a Thumb1 LDM/STM merge is safe. This is the case if: 614 // - There is no writeback (LDM of base register), 615 // - the base register is killed by the merged instruction, 616 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS 617 // to reset the base register. 618 // Otherwise, don't merge. 619 // It's safe to return here since the code to materialize a new base register 620 // above is also conditional on SafeToClobberCPSR. 621 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) 622 return false; 623 624 MachineInstrBuilder MIB; 625 626 if (Writeback) { 627 if (Opcode == ARM::tLDMIA) 628 // Update tLDMIA with writeback if necessary. 629 Opcode = ARM::tLDMIA_UPD; 630 631 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); 632 633 // Thumb1: we might need to set base writeback when building the MI. 634 MIB.addReg(Base, getDefRegState(true)) 635 .addReg(Base, getKillRegState(BaseKill)); 636 637 // The base isn't dead after a merged instruction with writeback. 638 // Insert a sub instruction after the newly formed instruction to reset. 639 if (!BaseKill) 640 UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg); 641 642 } else { 643 // No writeback, simply build the MachineInstr. 644 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); 645 MIB.addReg(Base, getKillRegState(BaseKill)); 646 } 647 648 MIB.addImm(Pred).addReg(PredReg); 649 650 for (const std::pair<unsigned, bool> &R : Regs) 651 MIB = MIB.addReg(R.first, getDefRegState(isDef) 652 | getKillRegState(R.second)); 653 654 // Add implicit defs for super-registers. 655 for (unsigned ImpDef : ImpDefs) 656 MIB.addReg(ImpDef, RegState::ImplicitDefine); 657 658 return true; 659 } 660 661 /// Find all instructions using a given imp-def within a range. 662 /// 663 /// We are trying to combine a range of instructions, one of which (located at 664 /// position RangeBegin) implicitly defines a register. The final LDM/STM will 665 /// be placed at RangeEnd, and so any uses of this definition between RangeStart 666 /// and RangeEnd must be modified to use an undefined value. 667 /// 668 /// The live range continues until we find a second definition or one of the 669 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so 670 /// we must consider all uses and decide which are relevant in a second pass. 671 void ARMLoadStoreOpt::findUsesOfImpDef( 672 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps, 673 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) { 674 std::map<unsigned, MachineOperand *> Uses; 675 unsigned LastLivePos = RangeEnd; 676 677 // First we find all uses of this register with Position between RangeBegin 678 // and RangeEnd, any or all of these could be uses of a definition at 679 // RangeBegin. We also record the latest position a definition at RangeBegin 680 // would be considered live. 681 for (unsigned i = 0; i < MemOps.size(); ++i) { 682 MachineInstr &MI = *MemOps[i].MBBI; 683 unsigned MIPosition = MemOps[i].Position; 684 if (MIPosition <= RangeBegin || MIPosition > RangeEnd) 685 continue; 686 687 // If this instruction defines the register, then any later use will be of 688 // that definition rather than ours. 689 if (MI.definesRegister(DefReg)) 690 LastLivePos = std::min(LastLivePos, MIPosition); 691 692 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg); 693 if (!UseOp) 694 continue; 695 696 // If this instruction kills the register then (assuming liveness is 697 // correct when we start) we don't need to think about anything after here. 698 if (UseOp->isKill()) 699 LastLivePos = std::min(LastLivePos, MIPosition); 700 701 Uses[MIPosition] = UseOp; 702 } 703 704 // Now we traverse the list of all uses, and append the ones that actually use 705 // our definition to the requested list. 706 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(), 707 E = Uses.end(); 708 I != E; ++I) { 709 // List is sorted by position so once we've found one out of range there 710 // will be no more to consider. 711 if (I->first > LastLivePos) 712 break; 713 UsesOfImpDefs.push_back(I->second); 714 } 715 } 716 717 /// Call MergeOps and update MemOps and merges accordingly on success. 718 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 719 MemOpQueue &memOps, 720 unsigned memOpsBegin, unsigned memOpsEnd, 721 unsigned insertAfter, int Offset, 722 unsigned Base, bool BaseKill, 723 unsigned Opcode, 724 ARMCC::CondCodes Pred, unsigned PredReg, 725 unsigned Scratch, 726 DebugLoc dl, 727 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { 728 // First calculate which of the registers should be killed by the merged 729 // instruction. 730 const unsigned insertPos = memOps[insertAfter].Position; 731 SmallSet<unsigned, 4> KilledRegs; 732 DenseMap<unsigned, unsigned> Killer; 733 for (unsigned i = 0, e = memOps.size(); i != e; ++i) { 734 if (i == memOpsBegin) { 735 i = memOpsEnd; 736 if (i == e) 737 break; 738 } 739 if (memOps[i].Position < insertPos && memOps[i].isKill) { 740 unsigned Reg = memOps[i].Reg; 741 KilledRegs.insert(Reg); 742 Killer[Reg] = i; 743 } 744 } 745 746 SmallVector<std::pair<unsigned, bool>, 8> Regs; 747 SmallVector<unsigned, 8> ImpDefs; 748 SmallVector<MachineOperand *, 8> UsesOfImpDefs; 749 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 750 unsigned Reg = memOps[i].Reg; 751 // If we are inserting the merged operation after an operation that 752 // uses the same register, make sure to transfer any kill flag. 753 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 754 Regs.push_back(std::make_pair(Reg, isKill)); 755 756 // Collect any implicit defs of super-registers. They must be preserved. 757 for (const MachineOperand &MO : memOps[i].MBBI->operands()) { 758 if (!MO.isReg() || !MO.isDef() || !MO.isImplicit() || MO.isDead()) 759 continue; 760 unsigned DefReg = MO.getReg(); 761 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end()) 762 ImpDefs.push_back(DefReg); 763 764 // There may be other uses of the definition between this instruction and 765 // the eventual LDM/STM position. These should be marked undef if the 766 // merge takes place. 767 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position, 768 insertPos); 769 } 770 } 771 772 // Try to do the merge. 773 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 774 ++Loc; 775 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 776 Pred, PredReg, Scratch, dl, Regs, ImpDefs)) 777 return; 778 779 // Merge succeeded, update records. 780 Merges.push_back(std::prev(Loc)); 781 782 // In gathering loads together, we may have moved the imp-def of a register 783 // past one of its uses. This is OK, since we know better than the rest of 784 // LLVM what's OK with ARM loads and stores; but we still have to adjust the 785 // affected uses. 786 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(), 787 E = UsesOfImpDefs.end(); 788 I != E; ++I) 789 (*I)->setIsUndef(); 790 791 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 792 // Remove kill flags from any memops that come before insertPos. 793 if (Regs[i-memOpsBegin].second) { 794 unsigned Reg = Regs[i-memOpsBegin].first; 795 if (KilledRegs.count(Reg)) { 796 unsigned j = Killer[Reg]; 797 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true); 798 assert(Idx >= 0 && "Cannot find killing operand"); 799 memOps[j].MBBI->getOperand(Idx).setIsKill(false); 800 memOps[j].isKill = false; 801 } 802 memOps[i].isKill = true; 803 } 804 MBB.erase(memOps[i].MBBI); 805 // Update this memop to refer to the merged instruction. 806 // We may need to move kill flags again. 807 memOps[i].Merged = true; 808 memOps[i].MBBI = Merges.back(); 809 memOps[i].Position = insertPos; 810 } 811 812 // Update memOps offsets, since they may have been modified by MergeOps. 813 for (auto &MemOp : memOps) { 814 MemOp.Offset = getMemoryOpOffset(MemOp.MBBI); 815 } 816 } 817 818 /// Merge a number of load / store instructions into one or more load / store 819 /// multiple instructions. 820 void 821 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 822 unsigned Base, unsigned Opcode, unsigned Size, 823 ARMCC::CondCodes Pred, unsigned PredReg, 824 unsigned Scratch, MemOpQueue &MemOps, 825 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { 826 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 827 int Offset = MemOps[SIndex].Offset; 828 int SOffset = Offset; 829 unsigned insertAfter = SIndex; 830 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 831 DebugLoc dl = Loc->getDebugLoc(); 832 const MachineOperand &PMO = Loc->getOperand(0); 833 unsigned PReg = PMO.getReg(); 834 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 835 unsigned Count = 1; 836 unsigned Limit = ~0U; 837 bool BaseKill = false; 838 // vldm / vstm limit are 32 for S variants, 16 for D variants. 839 840 switch (Opcode) { 841 default: break; 842 case ARM::VSTRS: 843 Limit = 32; 844 break; 845 case ARM::VSTRD: 846 Limit = 16; 847 break; 848 case ARM::VLDRD: 849 Limit = 16; 850 break; 851 case ARM::VLDRS: 852 Limit = 32; 853 break; 854 } 855 856 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 857 int NewOffset = MemOps[i].Offset; 858 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 859 unsigned Reg = MO.getReg(); 860 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 861 // Register numbers must be in ascending order. For VFP / NEON load and 862 // store multiples, the registers must also be consecutive and within the 863 // limit on the number of registers per instruction. 864 if (Reg != ARM::SP && 865 NewOffset == Offset + (int)Size && 866 ((isNotVFP && RegNum > PRegNum) || 867 ((Count < Limit) && RegNum == PRegNum+1)) && 868 // On Swift we don't want vldm/vstm to start with a odd register num 869 // because Q register unaligned vldm/vstm need more uops. 870 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) { 871 Offset += Size; 872 PRegNum = RegNum; 873 ++Count; 874 } else { 875 // Can't merge this in. Try merge the earlier ones first. 876 // We need to compute BaseKill here because the MemOps may have been 877 // reordered. 878 BaseKill = Loc->killsRegister(Base); 879 880 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base, 881 BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 882 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 883 MemOps, Merges); 884 return; 885 } 886 887 if (MemOps[i].Position > MemOps[insertAfter].Position) { 888 insertAfter = i; 889 Loc = MemOps[i].MBBI; 890 } 891 } 892 893 BaseKill = Loc->killsRegister(Base); 894 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 895 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 896 } 897 898 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 899 unsigned Bytes, unsigned Limit, 900 ARMCC::CondCodes Pred, unsigned PredReg) { 901 unsigned MyPredReg = 0; 902 if (!MI) 903 return false; 904 905 bool CheckCPSRDef = false; 906 switch (MI->getOpcode()) { 907 default: return false; 908 case ARM::tSUBi8: 909 case ARM::t2SUBri: 910 case ARM::SUBri: 911 CheckCPSRDef = true; 912 break; 913 case ARM::tSUBspi: 914 break; 915 } 916 917 // Make sure the offset fits in 8 bits. 918 if (Bytes == 0 || (Limit && Bytes >= Limit)) 919 return false; 920 921 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi || 922 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME 923 if (!(MI->getOperand(0).getReg() == Base && 924 MI->getOperand(1).getReg() == Base && 925 (MI->getOperand(2).getImm() * Scale) == Bytes && 926 getInstrPredicate(MI, MyPredReg) == Pred && 927 MyPredReg == PredReg)) 928 return false; 929 930 return CheckCPSRDef ? !definesCPSR(MI) : true; 931 } 932 933 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 934 unsigned Bytes, unsigned Limit, 935 ARMCC::CondCodes Pred, unsigned PredReg) { 936 unsigned MyPredReg = 0; 937 if (!MI) 938 return false; 939 940 bool CheckCPSRDef = false; 941 switch (MI->getOpcode()) { 942 default: return false; 943 case ARM::tADDi8: 944 case ARM::t2ADDri: 945 case ARM::ADDri: 946 CheckCPSRDef = true; 947 break; 948 case ARM::tADDspi: 949 break; 950 } 951 952 if (Bytes == 0 || (Limit && Bytes >= Limit)) 953 // Make sure the offset fits in 8 bits. 954 return false; 955 956 unsigned Scale = (MI->getOpcode() == ARM::tADDspi || 957 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME 958 if (!(MI->getOperand(0).getReg() == Base && 959 MI->getOperand(1).getReg() == Base && 960 (MI->getOperand(2).getImm() * Scale) == Bytes && 961 getInstrPredicate(MI, MyPredReg) == Pred && 962 MyPredReg == PredReg)) 963 return false; 964 965 return CheckCPSRDef ? !definesCPSR(MI) : true; 966 } 967 968 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 969 switch (MI->getOpcode()) { 970 default: return 0; 971 case ARM::LDRi12: 972 case ARM::STRi12: 973 case ARM::tLDRi: 974 case ARM::tSTRi: 975 case ARM::tLDRspi: 976 case ARM::tSTRspi: 977 case ARM::t2LDRi8: 978 case ARM::t2LDRi12: 979 case ARM::t2STRi8: 980 case ARM::t2STRi12: 981 case ARM::VLDRS: 982 case ARM::VSTRS: 983 return 4; 984 case ARM::VLDRD: 985 case ARM::VSTRD: 986 return 8; 987 case ARM::LDMIA: 988 case ARM::LDMDA: 989 case ARM::LDMDB: 990 case ARM::LDMIB: 991 case ARM::STMIA: 992 case ARM::STMDA: 993 case ARM::STMDB: 994 case ARM::STMIB: 995 case ARM::tLDMIA: 996 case ARM::tLDMIA_UPD: 997 case ARM::tSTMIA_UPD: 998 case ARM::t2LDMIA: 999 case ARM::t2LDMDB: 1000 case ARM::t2STMIA: 1001 case ARM::t2STMDB: 1002 case ARM::VLDMSIA: 1003 case ARM::VSTMSIA: 1004 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 1005 case ARM::VLDMDIA: 1006 case ARM::VSTMDIA: 1007 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 1008 } 1009 } 1010 1011 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 1012 ARM_AM::AMSubMode Mode) { 1013 switch (Opc) { 1014 default: llvm_unreachable("Unhandled opcode!"); 1015 case ARM::LDMIA: 1016 case ARM::LDMDA: 1017 case ARM::LDMDB: 1018 case ARM::LDMIB: 1019 switch (Mode) { 1020 default: llvm_unreachable("Unhandled submode!"); 1021 case ARM_AM::ia: return ARM::LDMIA_UPD; 1022 case ARM_AM::ib: return ARM::LDMIB_UPD; 1023 case ARM_AM::da: return ARM::LDMDA_UPD; 1024 case ARM_AM::db: return ARM::LDMDB_UPD; 1025 } 1026 case ARM::STMIA: 1027 case ARM::STMDA: 1028 case ARM::STMDB: 1029 case ARM::STMIB: 1030 switch (Mode) { 1031 default: llvm_unreachable("Unhandled submode!"); 1032 case ARM_AM::ia: return ARM::STMIA_UPD; 1033 case ARM_AM::ib: return ARM::STMIB_UPD; 1034 case ARM_AM::da: return ARM::STMDA_UPD; 1035 case ARM_AM::db: return ARM::STMDB_UPD; 1036 } 1037 case ARM::t2LDMIA: 1038 case ARM::t2LDMDB: 1039 switch (Mode) { 1040 default: llvm_unreachable("Unhandled submode!"); 1041 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 1042 case ARM_AM::db: return ARM::t2LDMDB_UPD; 1043 } 1044 case ARM::t2STMIA: 1045 case ARM::t2STMDB: 1046 switch (Mode) { 1047 default: llvm_unreachable("Unhandled submode!"); 1048 case ARM_AM::ia: return ARM::t2STMIA_UPD; 1049 case ARM_AM::db: return ARM::t2STMDB_UPD; 1050 } 1051 case ARM::VLDMSIA: 1052 switch (Mode) { 1053 default: llvm_unreachable("Unhandled submode!"); 1054 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 1055 case ARM_AM::db: return ARM::VLDMSDB_UPD; 1056 } 1057 case ARM::VLDMDIA: 1058 switch (Mode) { 1059 default: llvm_unreachable("Unhandled submode!"); 1060 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 1061 case ARM_AM::db: return ARM::VLDMDDB_UPD; 1062 } 1063 case ARM::VSTMSIA: 1064 switch (Mode) { 1065 default: llvm_unreachable("Unhandled submode!"); 1066 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 1067 case ARM_AM::db: return ARM::VSTMSDB_UPD; 1068 } 1069 case ARM::VSTMDIA: 1070 switch (Mode) { 1071 default: llvm_unreachable("Unhandled submode!"); 1072 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 1073 case ARM_AM::db: return ARM::VSTMDDB_UPD; 1074 } 1075 } 1076 } 1077 1078 /// Fold proceeding/trailing inc/dec of base register into the 1079 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 1080 /// 1081 /// stmia rn, <ra, rb, rc> 1082 /// rn := rn + 4 * 3; 1083 /// => 1084 /// stmia rn!, <ra, rb, rc> 1085 /// 1086 /// rn := rn - 4 * 3; 1087 /// ldmia rn, <ra, rb, rc> 1088 /// => 1089 /// ldmdb rn!, <ra, rb, rc> 1090 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 1091 MachineBasicBlock::iterator MBBI, 1092 bool &Advance, 1093 MachineBasicBlock::iterator &I) { 1094 // Thumb1 is already using updating loads/stores. 1095 if (isThumb1) return false; 1096 1097 MachineInstr *MI = MBBI; 1098 unsigned Base = MI->getOperand(0).getReg(); 1099 bool BaseKill = MI->getOperand(0).isKill(); 1100 unsigned Bytes = getLSMultipleTransferSize(MI); 1101 unsigned PredReg = 0; 1102 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1103 unsigned Opcode = MI->getOpcode(); 1104 DebugLoc dl = MI->getDebugLoc(); 1105 1106 // Can't use an updating ld/st if the base register is also a dest 1107 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 1108 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 1109 if (MI->getOperand(i).getReg() == Base) 1110 return false; 1111 1112 bool DoMerge = false; 1113 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode); 1114 1115 // Try merging with the previous instruction. 1116 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1117 if (MBBI != BeginMBBI) { 1118 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1119 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 1120 --PrevMBBI; 1121 if (Mode == ARM_AM::ia && 1122 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 1123 Mode = ARM_AM::db; 1124 DoMerge = true; 1125 } else if (Mode == ARM_AM::ib && 1126 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 1127 Mode = ARM_AM::da; 1128 DoMerge = true; 1129 } 1130 if (DoMerge) 1131 MBB.erase(PrevMBBI); 1132 } 1133 1134 // Try merging with the next instruction. 1135 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1136 if (!DoMerge && MBBI != EndMBBI) { 1137 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1138 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1139 ++NextMBBI; 1140 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 1141 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 1142 DoMerge = true; 1143 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 1144 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 1145 DoMerge = true; 1146 } 1147 if (DoMerge) { 1148 if (NextMBBI == I) { 1149 Advance = true; 1150 ++I; 1151 } 1152 MBB.erase(NextMBBI); 1153 } 1154 } 1155 1156 if (!DoMerge) 1157 return false; 1158 1159 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 1160 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 1161 .addReg(Base, getDefRegState(true)) // WB base register 1162 .addReg(Base, getKillRegState(BaseKill)) 1163 .addImm(Pred).addReg(PredReg); 1164 1165 // Transfer the rest of operands. 1166 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 1167 MIB.addOperand(MI->getOperand(OpNum)); 1168 1169 // Transfer memoperands. 1170 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 1171 1172 MBB.erase(MBBI); 1173 return true; 1174 } 1175 1176 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 1177 ARM_AM::AddrOpc Mode) { 1178 switch (Opc) { 1179 case ARM::LDRi12: 1180 return ARM::LDR_PRE_IMM; 1181 case ARM::STRi12: 1182 return ARM::STR_PRE_IMM; 1183 case ARM::VLDRS: 1184 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1185 case ARM::VLDRD: 1186 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1187 case ARM::VSTRS: 1188 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1189 case ARM::VSTRD: 1190 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1191 case ARM::t2LDRi8: 1192 case ARM::t2LDRi12: 1193 return ARM::t2LDR_PRE; 1194 case ARM::t2STRi8: 1195 case ARM::t2STRi12: 1196 return ARM::t2STR_PRE; 1197 default: llvm_unreachable("Unhandled opcode!"); 1198 } 1199 } 1200 1201 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 1202 ARM_AM::AddrOpc Mode) { 1203 switch (Opc) { 1204 case ARM::LDRi12: 1205 return ARM::LDR_POST_IMM; 1206 case ARM::STRi12: 1207 return ARM::STR_POST_IMM; 1208 case ARM::VLDRS: 1209 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1210 case ARM::VLDRD: 1211 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1212 case ARM::VSTRS: 1213 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1214 case ARM::VSTRD: 1215 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1216 case ARM::t2LDRi8: 1217 case ARM::t2LDRi12: 1218 return ARM::t2LDR_POST; 1219 case ARM::t2STRi8: 1220 case ARM::t2STRi12: 1221 return ARM::t2STR_POST; 1222 default: llvm_unreachable("Unhandled opcode!"); 1223 } 1224 } 1225 1226 /// Fold proceeding/trailing inc/dec of base register into the 1227 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible: 1228 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 1229 MachineBasicBlock::iterator MBBI, 1230 const TargetInstrInfo *TII, 1231 bool &Advance, 1232 MachineBasicBlock::iterator &I) { 1233 // Thumb1 doesn't have updating LDR/STR. 1234 // FIXME: Use LDM/STM with single register instead. 1235 if (isThumb1) return false; 1236 1237 MachineInstr *MI = MBBI; 1238 unsigned Base = MI->getOperand(1).getReg(); 1239 bool BaseKill = MI->getOperand(1).isKill(); 1240 unsigned Bytes = getLSMultipleTransferSize(MI); 1241 unsigned Opcode = MI->getOpcode(); 1242 DebugLoc dl = MI->getDebugLoc(); 1243 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 1244 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 1245 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 1246 if (isi32Load(Opcode) || isi32Store(Opcode)) 1247 if (MI->getOperand(2).getImm() != 0) 1248 return false; 1249 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 1250 return false; 1251 1252 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 1253 // Can't do the merge if the destination register is the same as the would-be 1254 // writeback register. 1255 if (MI->getOperand(0).getReg() == Base) 1256 return false; 1257 1258 unsigned PredReg = 0; 1259 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1260 bool DoMerge = false; 1261 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1262 unsigned NewOpc = 0; 1263 // AM2 - 12 bits, thumb2 - 8 bits. 1264 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 1265 1266 // Try merging with the previous instruction. 1267 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1268 if (MBBI != BeginMBBI) { 1269 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1270 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 1271 --PrevMBBI; 1272 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 1273 DoMerge = true; 1274 AddSub = ARM_AM::sub; 1275 } else if (!isAM5 && 1276 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 1277 DoMerge = true; 1278 } 1279 if (DoMerge) { 1280 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 1281 MBB.erase(PrevMBBI); 1282 } 1283 } 1284 1285 // Try merging with the next instruction. 1286 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1287 if (!DoMerge && MBBI != EndMBBI) { 1288 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1289 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1290 ++NextMBBI; 1291 if (!isAM5 && 1292 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 1293 DoMerge = true; 1294 AddSub = ARM_AM::sub; 1295 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 1296 DoMerge = true; 1297 } 1298 if (DoMerge) { 1299 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 1300 if (NextMBBI == I) { 1301 Advance = true; 1302 ++I; 1303 } 1304 MBB.erase(NextMBBI); 1305 } 1306 } 1307 1308 if (!DoMerge) 1309 return false; 1310 1311 if (isAM5) { 1312 // VLDM[SD]_UPD, VSTM[SD]_UPD 1313 // (There are no base-updating versions of VLDR/VSTR instructions, but the 1314 // updating load/store-multiple instructions can be used with only one 1315 // register.) 1316 MachineOperand &MO = MI->getOperand(0); 1317 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 1318 .addReg(Base, getDefRegState(true)) // WB base register 1319 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 1320 .addImm(Pred).addReg(PredReg) 1321 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 1322 getKillRegState(MO.isKill()))); 1323 } else if (isLd) { 1324 if (isAM2) { 1325 // LDR_PRE, LDR_POST 1326 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 1327 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1328 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1329 .addReg(Base, RegState::Define) 1330 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1331 } else { 1332 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1333 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1334 .addReg(Base, RegState::Define) 1335 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1336 } 1337 } else { 1338 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1339 // t2LDR_PRE, t2LDR_POST 1340 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1341 .addReg(Base, RegState::Define) 1342 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1343 } 1344 } else { 1345 MachineOperand &MO = MI->getOperand(0); 1346 // FIXME: post-indexed stores use am2offset_imm, which still encodes 1347 // the vestigal zero-reg offset register. When that's fixed, this clause 1348 // can be removed entirely. 1349 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 1350 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1351 // STR_PRE, STR_POST 1352 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 1353 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1354 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1355 } else { 1356 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1357 // t2STR_PRE, t2STR_POST 1358 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 1359 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1360 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1361 } 1362 } 1363 MBB.erase(MBBI); 1364 1365 return true; 1366 } 1367 1368 /// Returns true if instruction is a memory operation that this pass is capable 1369 /// of operating on. 1370 static bool isMemoryOp(const MachineInstr *MI) { 1371 // When no memory operands are present, conservatively assume unaligned, 1372 // volatile, unfoldable. 1373 if (!MI->hasOneMemOperand()) 1374 return false; 1375 1376 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1377 1378 // Don't touch volatile memory accesses - we may be changing their order. 1379 if (MMO->isVolatile()) 1380 return false; 1381 1382 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 1383 // not. 1384 if (MMO->getAlignment() < 4) 1385 return false; 1386 1387 // str <undef> could probably be eliminated entirely, but for now we just want 1388 // to avoid making a mess of it. 1389 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1390 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 1391 MI->getOperand(0).isUndef()) 1392 return false; 1393 1394 // Likewise don't mess with references to undefined addresses. 1395 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 1396 MI->getOperand(1).isUndef()) 1397 return false; 1398 1399 unsigned Opcode = MI->getOpcode(); 1400 switch (Opcode) { 1401 default: break; 1402 case ARM::VLDRS: 1403 case ARM::VSTRS: 1404 return MI->getOperand(1).isReg(); 1405 case ARM::VLDRD: 1406 case ARM::VSTRD: 1407 return MI->getOperand(1).isReg(); 1408 case ARM::LDRi12: 1409 case ARM::STRi12: 1410 case ARM::tLDRi: 1411 case ARM::tSTRi: 1412 case ARM::tLDRspi: 1413 case ARM::tSTRspi: 1414 case ARM::t2LDRi8: 1415 case ARM::t2LDRi12: 1416 case ARM::t2STRi8: 1417 case ARM::t2STRi12: 1418 return MI->getOperand(1).isReg(); 1419 } 1420 return false; 1421 } 1422 1423 /// Advance register scavenger to just before the earliest memory op that is 1424 /// being merged. 1425 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 1426 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 1427 unsigned Position = MemOps[0].Position; 1428 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 1429 if (MemOps[i].Position < Position) { 1430 Position = MemOps[i].Position; 1431 Loc = MemOps[i].MBBI; 1432 } 1433 } 1434 1435 if (Loc != MBB.begin()) 1436 RS->forward(std::prev(Loc)); 1437 } 1438 1439 static void InsertLDR_STR(MachineBasicBlock &MBB, 1440 MachineBasicBlock::iterator &MBBI, 1441 int Offset, bool isDef, 1442 DebugLoc dl, unsigned NewOpc, 1443 unsigned Reg, bool RegDeadKill, bool RegUndef, 1444 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1445 bool OffKill, bool OffUndef, 1446 ARMCC::CondCodes Pred, unsigned PredReg, 1447 const TargetInstrInfo *TII, bool isT2) { 1448 if (isDef) { 1449 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1450 TII->get(NewOpc)) 1451 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1452 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1453 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1454 } else { 1455 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1456 TII->get(NewOpc)) 1457 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1458 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1459 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1460 } 1461 } 1462 1463 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1464 MachineBasicBlock::iterator &MBBI) { 1465 MachineInstr *MI = &*MBBI; 1466 unsigned Opcode = MI->getOpcode(); 1467 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8) 1468 return false; 1469 1470 const MachineOperand &BaseOp = MI->getOperand(2); 1471 unsigned BaseReg = BaseOp.getReg(); 1472 unsigned EvenReg = MI->getOperand(0).getReg(); 1473 unsigned OddReg = MI->getOperand(1).getReg(); 1474 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1475 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1476 1477 // ARM errata 602117: LDRD with base in list may result in incorrect base 1478 // register when interrupted or faulted. 1479 bool Errata602117 = EvenReg == BaseReg && 1480 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3(); 1481 // ARM LDRD/STRD needs consecutive registers. 1482 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) && 1483 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum); 1484 1485 if (!Errata602117 && !NonConsecutiveRegs) 1486 return false; 1487 1488 MachineBasicBlock::iterator NewBBI = MBBI; 1489 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1490 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1491 bool EvenDeadKill = isLd ? 1492 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1493 bool EvenUndef = MI->getOperand(0).isUndef(); 1494 bool OddDeadKill = isLd ? 1495 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1496 bool OddUndef = MI->getOperand(1).isUndef(); 1497 bool BaseKill = BaseOp.isKill(); 1498 bool BaseUndef = BaseOp.isUndef(); 1499 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1500 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1501 int OffImm = getMemoryOpOffset(MI); 1502 unsigned PredReg = 0; 1503 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1504 1505 if (OddRegNum > EvenRegNum && OffImm == 0) { 1506 // Ascending register numbers and no offset. It's safe to change it to a 1507 // ldm or stm. 1508 unsigned NewOpc = (isLd) 1509 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1510 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1511 if (isLd) { 1512 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1513 .addReg(BaseReg, getKillRegState(BaseKill)) 1514 .addImm(Pred).addReg(PredReg) 1515 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1516 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1517 ++NumLDRD2LDM; 1518 } else { 1519 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1520 .addReg(BaseReg, getKillRegState(BaseKill)) 1521 .addImm(Pred).addReg(PredReg) 1522 .addReg(EvenReg, 1523 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1524 .addReg(OddReg, 1525 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1526 ++NumSTRD2STM; 1527 } 1528 NewBBI = std::prev(MBBI); 1529 } else { 1530 // Split into two instructions. 1531 unsigned NewOpc = (isLd) 1532 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1533 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1534 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1535 // so adjust and use t2LDRi12 here for that. 1536 unsigned NewOpc2 = (isLd) 1537 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1538 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1539 DebugLoc dl = MBBI->getDebugLoc(); 1540 // If this is a load and base register is killed, it may have been 1541 // re-defed by the load, make sure the first load does not clobber it. 1542 if (isLd && 1543 (BaseKill || OffKill) && 1544 (TRI->regsOverlap(EvenReg, BaseReg))) { 1545 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1546 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1547 OddReg, OddDeadKill, false, 1548 BaseReg, false, BaseUndef, false, OffUndef, 1549 Pred, PredReg, TII, isT2); 1550 NewBBI = std::prev(MBBI); 1551 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1552 EvenReg, EvenDeadKill, false, 1553 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1554 Pred, PredReg, TII, isT2); 1555 } else { 1556 if (OddReg == EvenReg && EvenDeadKill) { 1557 // If the two source operands are the same, the kill marker is 1558 // probably on the first one. e.g. 1559 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1560 EvenDeadKill = false; 1561 OddDeadKill = true; 1562 } 1563 // Never kill the base register in the first instruction. 1564 if (EvenReg == BaseReg) 1565 EvenDeadKill = false; 1566 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1567 EvenReg, EvenDeadKill, EvenUndef, 1568 BaseReg, false, BaseUndef, false, OffUndef, 1569 Pred, PredReg, TII, isT2); 1570 NewBBI = std::prev(MBBI); 1571 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1572 OddReg, OddDeadKill, OddUndef, 1573 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1574 Pred, PredReg, TII, isT2); 1575 } 1576 if (isLd) 1577 ++NumLDRD2LDR; 1578 else 1579 ++NumSTRD2STR; 1580 } 1581 1582 MBB.erase(MI); 1583 MBBI = NewBBI; 1584 return true; 1585 } 1586 1587 /// An optimization pass to turn multiple LDR / STR ops of the same base and 1588 /// incrementing offset into LDM / STM ops. 1589 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1590 unsigned NumMerges = 0; 1591 unsigned NumMemOps = 0; 1592 MemOpQueue MemOps; 1593 unsigned CurrBase = 0; 1594 unsigned CurrOpc = ~0u; 1595 unsigned CurrSize = 0; 1596 ARMCC::CondCodes CurrPred = ARMCC::AL; 1597 unsigned CurrPredReg = 0; 1598 unsigned Position = 0; 1599 SmallVector<MachineBasicBlock::iterator,4> Merges; 1600 1601 RS->enterBasicBlock(&MBB); 1602 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1603 while (MBBI != E) { 1604 if (FixInvalidRegPairOp(MBB, MBBI)) 1605 continue; 1606 1607 bool Advance = false; 1608 bool TryMerge = false; 1609 1610 bool isMemOp = isMemoryOp(MBBI); 1611 if (isMemOp) { 1612 unsigned Opcode = MBBI->getOpcode(); 1613 unsigned Size = getLSMultipleTransferSize(MBBI); 1614 const MachineOperand &MO = MBBI->getOperand(0); 1615 unsigned Reg = MO.getReg(); 1616 bool isKill = MO.isDef() ? false : MO.isKill(); 1617 unsigned Base = MBBI->getOperand(1).getReg(); 1618 unsigned PredReg = 0; 1619 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 1620 int Offset = getMemoryOpOffset(MBBI); 1621 // Watch out for: 1622 // r4 := ldr [r5] 1623 // r5 := ldr [r5, #4] 1624 // r6 := ldr [r5, #8] 1625 // 1626 // The second ldr has effectively broken the chain even though it 1627 // looks like the later ldr(s) use the same base register. Try to 1628 // merge the ldr's so far, including this one. But don't try to 1629 // combine the following ldr(s). 1630 bool Clobber = isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg(); 1631 1632 // Watch out for: 1633 // r4 := ldr [r0, #8] 1634 // r4 := ldr [r0, #4] 1635 // 1636 // The optimization may reorder the second ldr in front of the first 1637 // ldr, which violates write after write(WAW) dependence. The same as 1638 // str. Try to merge inst(s) already in MemOps. 1639 bool Overlap = false; 1640 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) { 1641 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) { 1642 Overlap = true; 1643 break; 1644 } 1645 } 1646 1647 if (CurrBase == 0 && !Clobber) { 1648 // Start of a new chain. 1649 CurrBase = Base; 1650 CurrOpc = Opcode; 1651 CurrSize = Size; 1652 CurrPred = Pred; 1653 CurrPredReg = PredReg; 1654 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 1655 ++NumMemOps; 1656 Advance = true; 1657 } else if (!Overlap) { 1658 if (Clobber) { 1659 TryMerge = true; 1660 Advance = true; 1661 } 1662 1663 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1664 // No need to match PredReg. 1665 // Continue adding to the queue. 1666 if (Offset > MemOps.back().Offset) { 1667 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, 1668 Position, MBBI)); 1669 ++NumMemOps; 1670 Advance = true; 1671 } else { 1672 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1673 I != E; ++I) { 1674 if (Offset < I->Offset) { 1675 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, 1676 Position, MBBI)); 1677 ++NumMemOps; 1678 Advance = true; 1679 break; 1680 } else if (Offset == I->Offset) { 1681 // Collision! This can't be merged! 1682 break; 1683 } 1684 } 1685 } 1686 } 1687 } 1688 } 1689 1690 if (MBBI->isDebugValue()) { 1691 ++MBBI; 1692 if (MBBI == E) 1693 // Reach the end of the block, try merging the memory instructions. 1694 TryMerge = true; 1695 } else if (Advance) { 1696 ++Position; 1697 ++MBBI; 1698 if (MBBI == E) 1699 // Reach the end of the block, try merging the memory instructions. 1700 TryMerge = true; 1701 } else { 1702 TryMerge = true; 1703 } 1704 1705 if (TryMerge) { 1706 if (NumMemOps > 1) { 1707 // Try to find a free register to use as a new base in case it's needed. 1708 // First advance to the instruction just before the start of the chain. 1709 AdvanceRS(MBB, MemOps); 1710 1711 // Find a scratch register. 1712 unsigned Scratch = 1713 RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass); 1714 1715 // Process the load / store instructions. 1716 RS->forward(std::prev(MBBI)); 1717 1718 // Merge ops. 1719 Merges.clear(); 1720 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1721 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1722 1723 // Try folding preceding/trailing base inc/dec into the generated 1724 // LDM/STM ops. 1725 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1726 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1727 ++NumMerges; 1728 NumMerges += Merges.size(); 1729 1730 // Try folding preceding/trailing base inc/dec into those load/store 1731 // that were not merged to form LDM/STM ops. 1732 for (unsigned i = 0; i != NumMemOps; ++i) 1733 if (!MemOps[i].Merged) 1734 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1735 ++NumMerges; 1736 1737 // RS may be pointing to an instruction that's deleted. 1738 RS->skipTo(std::prev(MBBI)); 1739 } else if (NumMemOps == 1) { 1740 // Try folding preceding/trailing base inc/dec into the single 1741 // load/store. 1742 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1743 ++NumMerges; 1744 RS->forward(std::prev(MBBI)); 1745 } 1746 } 1747 1748 CurrBase = 0; 1749 CurrOpc = ~0u; 1750 CurrSize = 0; 1751 CurrPred = ARMCC::AL; 1752 CurrPredReg = 0; 1753 if (NumMemOps) { 1754 MemOps.clear(); 1755 NumMemOps = 0; 1756 } 1757 1758 // If iterator hasn't been advanced and this is not a memory op, skip it. 1759 // It can't start a new chain anyway. 1760 if (!Advance && !isMemOp && MBBI != E) { 1761 ++Position; 1762 ++MBBI; 1763 } 1764 } 1765 } 1766 return NumMerges > 0; 1767 } 1768 1769 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr") 1770 /// into the preceding stack restore so it directly restore the value of LR 1771 /// into pc. 1772 /// ldmfd sp!, {..., lr} 1773 /// bx lr 1774 /// or 1775 /// ldmfd sp!, {..., lr} 1776 /// mov pc, lr 1777 /// => 1778 /// ldmfd sp!, {..., pc} 1779 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1780 // Thumb1 LDM doesn't allow high registers. 1781 if (isThumb1) return false; 1782 if (MBB.empty()) return false; 1783 1784 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1785 if (MBBI != MBB.begin() && 1786 (MBBI->getOpcode() == ARM::BX_RET || 1787 MBBI->getOpcode() == ARM::tBX_RET || 1788 MBBI->getOpcode() == ARM::MOVPCLR)) { 1789 MachineInstr *PrevMI = std::prev(MBBI); 1790 unsigned Opcode = PrevMI->getOpcode(); 1791 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1792 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1793 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1794 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1795 if (MO.getReg() != ARM::LR) 1796 return false; 1797 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1798 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1799 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1800 PrevMI->setDesc(TII->get(NewOpc)); 1801 MO.setReg(ARM::PC); 1802 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); 1803 MBB.erase(MBBI); 1804 return true; 1805 } 1806 } 1807 return false; 1808 } 1809 1810 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1811 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1812 TL = STI->getTargetLowering(); 1813 AFI = Fn.getInfo<ARMFunctionInfo>(); 1814 TII = STI->getInstrInfo(); 1815 TRI = STI->getRegisterInfo(); 1816 RS = new RegScavenger(); 1817 isThumb2 = AFI->isThumb2Function(); 1818 isThumb1 = AFI->isThumbFunction() && !isThumb2; 1819 1820 bool Modified = false; 1821 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1822 ++MFI) { 1823 MachineBasicBlock &MBB = *MFI; 1824 Modified |= LoadStoreMultipleOpti(MBB); 1825 if (STI->hasV5TOps()) 1826 Modified |= MergeReturnIntoLDM(MBB); 1827 } 1828 1829 delete RS; 1830 return Modified; 1831 } 1832 1833 namespace { 1834 /// Pre- register allocation pass that move load / stores from consecutive 1835 /// locations close to make it more likely they will be combined later. 1836 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1837 static char ID; 1838 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1839 1840 const DataLayout *TD; 1841 const TargetInstrInfo *TII; 1842 const TargetRegisterInfo *TRI; 1843 const ARMSubtarget *STI; 1844 MachineRegisterInfo *MRI; 1845 MachineFunction *MF; 1846 1847 bool runOnMachineFunction(MachineFunction &Fn) override; 1848 1849 const char *getPassName() const override { 1850 return "ARM pre- register allocation load / store optimization pass"; 1851 } 1852 1853 private: 1854 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1855 unsigned &NewOpc, unsigned &EvenReg, 1856 unsigned &OddReg, unsigned &BaseReg, 1857 int &Offset, 1858 unsigned &PredReg, ARMCC::CondCodes &Pred, 1859 bool &isT2); 1860 bool RescheduleOps(MachineBasicBlock *MBB, 1861 SmallVectorImpl<MachineInstr *> &Ops, 1862 unsigned Base, bool isLd, 1863 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1864 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1865 }; 1866 char ARMPreAllocLoadStoreOpt::ID = 0; 1867 } 1868 1869 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1870 TD = Fn.getTarget().getDataLayout(); 1871 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1872 TII = STI->getInstrInfo(); 1873 TRI = STI->getRegisterInfo(); 1874 MRI = &Fn.getRegInfo(); 1875 MF = &Fn; 1876 1877 bool Modified = false; 1878 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1879 ++MFI) 1880 Modified |= RescheduleLoadStoreInstrs(MFI); 1881 1882 return Modified; 1883 } 1884 1885 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1886 MachineBasicBlock::iterator I, 1887 MachineBasicBlock::iterator E, 1888 SmallPtrSetImpl<MachineInstr*> &MemOps, 1889 SmallSet<unsigned, 4> &MemRegs, 1890 const TargetRegisterInfo *TRI) { 1891 // Are there stores / loads / calls between them? 1892 // FIXME: This is overly conservative. We should make use of alias information 1893 // some day. 1894 SmallSet<unsigned, 4> AddedRegPressure; 1895 while (++I != E) { 1896 if (I->isDebugValue() || MemOps.count(&*I)) 1897 continue; 1898 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1899 return false; 1900 if (isLd && I->mayStore()) 1901 return false; 1902 if (!isLd) { 1903 if (I->mayLoad()) 1904 return false; 1905 // It's not safe to move the first 'str' down. 1906 // str r1, [r0] 1907 // strh r5, [r0] 1908 // str r4, [r0, #+4] 1909 if (I->mayStore()) 1910 return false; 1911 } 1912 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1913 MachineOperand &MO = I->getOperand(j); 1914 if (!MO.isReg()) 1915 continue; 1916 unsigned Reg = MO.getReg(); 1917 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1918 return false; 1919 if (Reg != Base && !MemRegs.count(Reg)) 1920 AddedRegPressure.insert(Reg); 1921 } 1922 } 1923 1924 // Estimate register pressure increase due to the transformation. 1925 if (MemRegs.size() <= 4) 1926 // Ok if we are moving small number of instructions. 1927 return true; 1928 return AddedRegPressure.size() <= MemRegs.size() * 2; 1929 } 1930 1931 1932 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI. 1933 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1934 MachineInstr *Op1) { 1935 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1936 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1937 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1938 1939 MachineFunction *MF = MI->getParent()->getParent(); 1940 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1941 MachineSDNode::mmo_iterator MemEnd = 1942 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1943 MemEnd = 1944 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1945 MI->setMemRefs(MemBegin, MemEnd); 1946 } 1947 1948 bool 1949 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1950 DebugLoc &dl, unsigned &NewOpc, 1951 unsigned &FirstReg, 1952 unsigned &SecondReg, 1953 unsigned &BaseReg, int &Offset, 1954 unsigned &PredReg, 1955 ARMCC::CondCodes &Pred, 1956 bool &isT2) { 1957 // Make sure we're allowed to generate LDRD/STRD. 1958 if (!STI->hasV5TEOps()) 1959 return false; 1960 1961 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1962 unsigned Scale = 1; 1963 unsigned Opcode = Op0->getOpcode(); 1964 if (Opcode == ARM::LDRi12) { 1965 NewOpc = ARM::LDRD; 1966 } else if (Opcode == ARM::STRi12) { 1967 NewOpc = ARM::STRD; 1968 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1969 NewOpc = ARM::t2LDRDi8; 1970 Scale = 4; 1971 isT2 = true; 1972 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1973 NewOpc = ARM::t2STRDi8; 1974 Scale = 4; 1975 isT2 = true; 1976 } else { 1977 return false; 1978 } 1979 1980 // Make sure the base address satisfies i64 ld / st alignment requirement. 1981 // At the moment, we ignore the memoryoperand's value. 1982 // If we want to use AliasAnalysis, we should check it accordingly. 1983 if (!Op0->hasOneMemOperand() || 1984 (*Op0->memoperands_begin())->isVolatile()) 1985 return false; 1986 1987 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1988 const Function *Func = MF->getFunction(); 1989 unsigned ReqAlign = STI->hasV6Ops() 1990 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1991 : 8; // Pre-v6 need 8-byte align 1992 if (Align < ReqAlign) 1993 return false; 1994 1995 // Then make sure the immediate offset fits. 1996 int OffImm = getMemoryOpOffset(Op0); 1997 if (isT2) { 1998 int Limit = (1 << 8) * Scale; 1999 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 2000 return false; 2001 Offset = OffImm; 2002 } else { 2003 ARM_AM::AddrOpc AddSub = ARM_AM::add; 2004 if (OffImm < 0) { 2005 AddSub = ARM_AM::sub; 2006 OffImm = - OffImm; 2007 } 2008 int Limit = (1 << 8) * Scale; 2009 if (OffImm >= Limit || (OffImm & (Scale-1))) 2010 return false; 2011 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 2012 } 2013 FirstReg = Op0->getOperand(0).getReg(); 2014 SecondReg = Op1->getOperand(0).getReg(); 2015 if (FirstReg == SecondReg) 2016 return false; 2017 BaseReg = Op0->getOperand(1).getReg(); 2018 Pred = getInstrPredicate(Op0, PredReg); 2019 dl = Op0->getDebugLoc(); 2020 return true; 2021 } 2022 2023 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 2024 SmallVectorImpl<MachineInstr *> &Ops, 2025 unsigned Base, bool isLd, 2026 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 2027 bool RetVal = false; 2028 2029 // Sort by offset (in reverse order). 2030 std::sort(Ops.begin(), Ops.end(), 2031 [](const MachineInstr *LHS, const MachineInstr *RHS) { 2032 int LOffset = getMemoryOpOffset(LHS); 2033 int ROffset = getMemoryOpOffset(RHS); 2034 assert(LHS == RHS || LOffset != ROffset); 2035 return LOffset > ROffset; 2036 }); 2037 2038 // The loads / stores of the same base are in order. Scan them from first to 2039 // last and check for the following: 2040 // 1. Any def of base. 2041 // 2. Any gaps. 2042 while (Ops.size() > 1) { 2043 unsigned FirstLoc = ~0U; 2044 unsigned LastLoc = 0; 2045 MachineInstr *FirstOp = nullptr; 2046 MachineInstr *LastOp = nullptr; 2047 int LastOffset = 0; 2048 unsigned LastOpcode = 0; 2049 unsigned LastBytes = 0; 2050 unsigned NumMove = 0; 2051 for (int i = Ops.size() - 1; i >= 0; --i) { 2052 MachineInstr *Op = Ops[i]; 2053 unsigned Loc = MI2LocMap[Op]; 2054 if (Loc <= FirstLoc) { 2055 FirstLoc = Loc; 2056 FirstOp = Op; 2057 } 2058 if (Loc >= LastLoc) { 2059 LastLoc = Loc; 2060 LastOp = Op; 2061 } 2062 2063 unsigned LSMOpcode 2064 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 2065 if (LastOpcode && LSMOpcode != LastOpcode) 2066 break; 2067 2068 int Offset = getMemoryOpOffset(Op); 2069 unsigned Bytes = getLSMultipleTransferSize(Op); 2070 if (LastBytes) { 2071 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 2072 break; 2073 } 2074 LastOffset = Offset; 2075 LastBytes = Bytes; 2076 LastOpcode = LSMOpcode; 2077 if (++NumMove == 8) // FIXME: Tune this limit. 2078 break; 2079 } 2080 2081 if (NumMove <= 1) 2082 Ops.pop_back(); 2083 else { 2084 SmallPtrSet<MachineInstr*, 4> MemOps; 2085 SmallSet<unsigned, 4> MemRegs; 2086 for (int i = NumMove-1; i >= 0; --i) { 2087 MemOps.insert(Ops[i]); 2088 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 2089 } 2090 2091 // Be conservative, if the instructions are too far apart, don't 2092 // move them. We want to limit the increase of register pressure. 2093 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 2094 if (DoMove) 2095 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 2096 MemOps, MemRegs, TRI); 2097 if (!DoMove) { 2098 for (unsigned i = 0; i != NumMove; ++i) 2099 Ops.pop_back(); 2100 } else { 2101 // This is the new location for the loads / stores. 2102 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 2103 while (InsertPos != MBB->end() 2104 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 2105 ++InsertPos; 2106 2107 // If we are moving a pair of loads / stores, see if it makes sense 2108 // to try to allocate a pair of registers that can form register pairs. 2109 MachineInstr *Op0 = Ops.back(); 2110 MachineInstr *Op1 = Ops[Ops.size()-2]; 2111 unsigned FirstReg = 0, SecondReg = 0; 2112 unsigned BaseReg = 0, PredReg = 0; 2113 ARMCC::CondCodes Pred = ARMCC::AL; 2114 bool isT2 = false; 2115 unsigned NewOpc = 0; 2116 int Offset = 0; 2117 DebugLoc dl; 2118 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 2119 FirstReg, SecondReg, BaseReg, 2120 Offset, PredReg, Pred, isT2)) { 2121 Ops.pop_back(); 2122 Ops.pop_back(); 2123 2124 const MCInstrDesc &MCID = TII->get(NewOpc); 2125 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 2126 MRI->constrainRegClass(FirstReg, TRC); 2127 MRI->constrainRegClass(SecondReg, TRC); 2128 2129 // Form the pair instruction. 2130 if (isLd) { 2131 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2132 .addReg(FirstReg, RegState::Define) 2133 .addReg(SecondReg, RegState::Define) 2134 .addReg(BaseReg); 2135 // FIXME: We're converting from LDRi12 to an insn that still 2136 // uses addrmode2, so we need an explicit offset reg. It should 2137 // always by reg0 since we're transforming LDRi12s. 2138 if (!isT2) 2139 MIB.addReg(0); 2140 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2141 concatenateMemOperands(MIB, Op0, Op1); 2142 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2143 ++NumLDRDFormed; 2144 } else { 2145 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2146 .addReg(FirstReg) 2147 .addReg(SecondReg) 2148 .addReg(BaseReg); 2149 // FIXME: We're converting from LDRi12 to an insn that still 2150 // uses addrmode2, so we need an explicit offset reg. It should 2151 // always by reg0 since we're transforming STRi12s. 2152 if (!isT2) 2153 MIB.addReg(0); 2154 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2155 concatenateMemOperands(MIB, Op0, Op1); 2156 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2157 ++NumSTRDFormed; 2158 } 2159 MBB->erase(Op0); 2160 MBB->erase(Op1); 2161 2162 if (!isT2) { 2163 // Add register allocation hints to form register pairs. 2164 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg); 2165 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg); 2166 } 2167 } else { 2168 for (unsigned i = 0; i != NumMove; ++i) { 2169 MachineInstr *Op = Ops.back(); 2170 Ops.pop_back(); 2171 MBB->splice(InsertPos, MBB, Op); 2172 } 2173 } 2174 2175 NumLdStMoved += NumMove; 2176 RetVal = true; 2177 } 2178 } 2179 } 2180 2181 return RetVal; 2182 } 2183 2184 bool 2185 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 2186 bool RetVal = false; 2187 2188 DenseMap<MachineInstr*, unsigned> MI2LocMap; 2189 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 2190 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 2191 SmallVector<unsigned, 4> LdBases; 2192 SmallVector<unsigned, 4> StBases; 2193 2194 unsigned Loc = 0; 2195 MachineBasicBlock::iterator MBBI = MBB->begin(); 2196 MachineBasicBlock::iterator E = MBB->end(); 2197 while (MBBI != E) { 2198 for (; MBBI != E; ++MBBI) { 2199 MachineInstr *MI = MBBI; 2200 if (MI->isCall() || MI->isTerminator()) { 2201 // Stop at barriers. 2202 ++MBBI; 2203 break; 2204 } 2205 2206 if (!MI->isDebugValue()) 2207 MI2LocMap[MI] = ++Loc; 2208 2209 if (!isMemoryOp(MI)) 2210 continue; 2211 unsigned PredReg = 0; 2212 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 2213 continue; 2214 2215 int Opc = MI->getOpcode(); 2216 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 2217 unsigned Base = MI->getOperand(1).getReg(); 2218 int Offset = getMemoryOpOffset(MI); 2219 2220 bool StopHere = false; 2221 if (isLd) { 2222 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2223 Base2LdsMap.find(Base); 2224 if (BI != Base2LdsMap.end()) { 2225 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2226 if (Offset == getMemoryOpOffset(BI->second[i])) { 2227 StopHere = true; 2228 break; 2229 } 2230 } 2231 if (!StopHere) 2232 BI->second.push_back(MI); 2233 } else { 2234 Base2LdsMap[Base].push_back(MI); 2235 LdBases.push_back(Base); 2236 } 2237 } else { 2238 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2239 Base2StsMap.find(Base); 2240 if (BI != Base2StsMap.end()) { 2241 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2242 if (Offset == getMemoryOpOffset(BI->second[i])) { 2243 StopHere = true; 2244 break; 2245 } 2246 } 2247 if (!StopHere) 2248 BI->second.push_back(MI); 2249 } else { 2250 Base2StsMap[Base].push_back(MI); 2251 StBases.push_back(Base); 2252 } 2253 } 2254 2255 if (StopHere) { 2256 // Found a duplicate (a base+offset combination that's seen earlier). 2257 // Backtrack. 2258 --Loc; 2259 break; 2260 } 2261 } 2262 2263 // Re-schedule loads. 2264 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 2265 unsigned Base = LdBases[i]; 2266 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 2267 if (Lds.size() > 1) 2268 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 2269 } 2270 2271 // Re-schedule stores. 2272 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 2273 unsigned Base = StBases[i]; 2274 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 2275 if (Sts.size() > 1) 2276 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 2277 } 2278 2279 if (MBBI != E) { 2280 Base2LdsMap.clear(); 2281 Base2StsMap.clear(); 2282 LdBases.clear(); 2283 StBases.clear(); 2284 } 2285 } 2286 2287 return RetVal; 2288 } 2289 2290 2291 /// Returns an instance of the load / store optimization pass. 2292 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 2293 if (PreAlloc) 2294 return new ARMPreAllocLoadStoreOpt(); 2295 return new ARMLoadStoreOpt(); 2296 } 2297