1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass that performs load / store related peephole 11 // optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ARM.h" 16 #include "ARMBaseInstrInfo.h" 17 #include "ARMBaseRegisterInfo.h" 18 #include "ARMISelLowering.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMSubtarget.h" 21 #include "MCTargetDesc/ARMAddressingModes.h" 22 #include "ThumbRegisterInfo.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallSet.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/Statistic.h" 29 #include "llvm/CodeGen/MachineBasicBlock.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include "llvm/CodeGen/MachineInstr.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/RegisterScavenging.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/IR/DataLayout.h" 37 #include "llvm/IR/DerivedTypes.h" 38 #include "llvm/IR/Function.h" 39 #include "llvm/Support/Debug.h" 40 #include "llvm/Support/ErrorHandling.h" 41 #include "llvm/Support/raw_ostream.h" 42 #include "llvm/Target/TargetInstrInfo.h" 43 #include "llvm/Target/TargetMachine.h" 44 #include "llvm/Target/TargetRegisterInfo.h" 45 using namespace llvm; 46 47 #define DEBUG_TYPE "arm-ldst-opt" 48 49 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 50 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 51 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 52 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 53 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 54 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 55 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 56 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 57 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 58 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 59 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 60 61 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 62 /// load / store instructions to form ldm / stm instructions. 63 64 namespace { 65 struct ARMLoadStoreOpt : public MachineFunctionPass { 66 static char ID; 67 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 68 69 const TargetInstrInfo *TII; 70 const TargetRegisterInfo *TRI; 71 const ARMSubtarget *STI; 72 const TargetLowering *TL; 73 ARMFunctionInfo *AFI; 74 RegScavenger *RS; 75 bool isThumb1, isThumb2; 76 77 bool runOnMachineFunction(MachineFunction &Fn) override; 78 79 const char *getPassName() const override { 80 return "ARM load / store optimization pass"; 81 } 82 83 private: 84 struct MemOpQueueEntry { 85 int Offset; 86 unsigned Reg; 87 bool isKill; 88 unsigned Position; 89 MachineBasicBlock::iterator MBBI; 90 bool Merged; 91 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 92 MachineBasicBlock::iterator i) 93 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 94 }; 95 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 96 typedef MemOpQueue::iterator MemOpQueueIter; 97 98 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, 99 const MemOpQueue &MemOps, unsigned DefReg, 100 unsigned RangeBegin, unsigned RangeEnd); 101 void UpdateBaseRegUses(MachineBasicBlock &MBB, 102 MachineBasicBlock::iterator MBBI, 103 DebugLoc dl, unsigned Base, unsigned WordOffset, 104 ARMCC::CondCodes Pred, unsigned PredReg); 105 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 106 int Offset, unsigned Base, bool BaseKill, unsigned Opcode, 107 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 108 DebugLoc dl, 109 ArrayRef<std::pair<unsigned, bool> > Regs, 110 ArrayRef<unsigned> ImpDefs); 111 void MergeOpsUpdate(MachineBasicBlock &MBB, 112 MemOpQueue &MemOps, 113 unsigned memOpsBegin, 114 unsigned memOpsEnd, 115 unsigned insertAfter, 116 int Offset, 117 unsigned Base, 118 bool BaseKill, 119 unsigned Opcode, 120 ARMCC::CondCodes Pred, 121 unsigned PredReg, 122 unsigned Scratch, 123 DebugLoc dl, 124 SmallVectorImpl<MachineBasicBlock::iterator> &Merges); 125 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 126 unsigned Opcode, unsigned Size, 127 ARMCC::CondCodes Pred, unsigned PredReg, 128 unsigned Scratch, MemOpQueue &MemOps, 129 SmallVectorImpl<MachineBasicBlock::iterator> &Merges); 130 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 131 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 132 MachineBasicBlock::iterator &MBBI); 133 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 134 MachineBasicBlock::iterator MBBI, 135 const TargetInstrInfo *TII, 136 bool &Advance, 137 MachineBasicBlock::iterator &I); 138 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 139 MachineBasicBlock::iterator MBBI, 140 bool &Advance, 141 MachineBasicBlock::iterator &I); 142 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 143 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 144 }; 145 char ARMLoadStoreOpt::ID = 0; 146 } 147 148 static bool definesCPSR(const MachineInstr *MI) { 149 for (const auto &MO : MI->operands()) { 150 if (!MO.isReg()) 151 continue; 152 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 153 // If the instruction has live CPSR def, then it's not safe to fold it 154 // into load / store. 155 return true; 156 } 157 158 return false; 159 } 160 161 static int getMemoryOpOffset(const MachineInstr *MI) { 162 unsigned Opcode = MI->getOpcode(); 163 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 164 unsigned NumOperands = MI->getDesc().getNumOperands(); 165 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 166 167 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 168 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 169 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 170 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 171 return OffField; 172 173 // Thumb1 immediate offsets are scaled by 4 174 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi || 175 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) 176 return OffField * 4; 177 178 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 179 : ARM_AM::getAM5Offset(OffField) * 4; 180 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField) 181 : ARM_AM::getAM5Op(OffField); 182 183 if (Op == ARM_AM::sub) 184 return -Offset; 185 186 return Offset; 187 } 188 189 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) { 190 switch (Opcode) { 191 default: llvm_unreachable("Unhandled opcode!"); 192 case ARM::LDRi12: 193 ++NumLDMGened; 194 switch (Mode) { 195 default: llvm_unreachable("Unhandled submode!"); 196 case ARM_AM::ia: return ARM::LDMIA; 197 case ARM_AM::da: return ARM::LDMDA; 198 case ARM_AM::db: return ARM::LDMDB; 199 case ARM_AM::ib: return ARM::LDMIB; 200 } 201 case ARM::STRi12: 202 ++NumSTMGened; 203 switch (Mode) { 204 default: llvm_unreachable("Unhandled submode!"); 205 case ARM_AM::ia: return ARM::STMIA; 206 case ARM_AM::da: return ARM::STMDA; 207 case ARM_AM::db: return ARM::STMDB; 208 case ARM_AM::ib: return ARM::STMIB; 209 } 210 case ARM::tLDRi: 211 case ARM::tLDRspi: 212 // tLDMIA is writeback-only - unless the base register is in the input 213 // reglist. 214 ++NumLDMGened; 215 switch (Mode) { 216 default: llvm_unreachable("Unhandled submode!"); 217 case ARM_AM::ia: return ARM::tLDMIA; 218 } 219 case ARM::tSTRi: 220 case ARM::tSTRspi: 221 // There is no non-writeback tSTMIA either. 222 ++NumSTMGened; 223 switch (Mode) { 224 default: llvm_unreachable("Unhandled submode!"); 225 case ARM_AM::ia: return ARM::tSTMIA_UPD; 226 } 227 case ARM::t2LDRi8: 228 case ARM::t2LDRi12: 229 ++NumLDMGened; 230 switch (Mode) { 231 default: llvm_unreachable("Unhandled submode!"); 232 case ARM_AM::ia: return ARM::t2LDMIA; 233 case ARM_AM::db: return ARM::t2LDMDB; 234 } 235 case ARM::t2STRi8: 236 case ARM::t2STRi12: 237 ++NumSTMGened; 238 switch (Mode) { 239 default: llvm_unreachable("Unhandled submode!"); 240 case ARM_AM::ia: return ARM::t2STMIA; 241 case ARM_AM::db: return ARM::t2STMDB; 242 } 243 case ARM::VLDRS: 244 ++NumVLDMGened; 245 switch (Mode) { 246 default: llvm_unreachable("Unhandled submode!"); 247 case ARM_AM::ia: return ARM::VLDMSIA; 248 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 249 } 250 case ARM::VSTRS: 251 ++NumVSTMGened; 252 switch (Mode) { 253 default: llvm_unreachable("Unhandled submode!"); 254 case ARM_AM::ia: return ARM::VSTMSIA; 255 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 256 } 257 case ARM::VLDRD: 258 ++NumVLDMGened; 259 switch (Mode) { 260 default: llvm_unreachable("Unhandled submode!"); 261 case ARM_AM::ia: return ARM::VLDMDIA; 262 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 263 } 264 case ARM::VSTRD: 265 ++NumVSTMGened; 266 switch (Mode) { 267 default: llvm_unreachable("Unhandled submode!"); 268 case ARM_AM::ia: return ARM::VSTMDIA; 269 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 270 } 271 } 272 } 273 274 namespace llvm { 275 namespace ARM_AM { 276 277 AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) { 278 switch (Opcode) { 279 default: llvm_unreachable("Unhandled opcode!"); 280 case ARM::LDMIA_RET: 281 case ARM::LDMIA: 282 case ARM::LDMIA_UPD: 283 case ARM::STMIA: 284 case ARM::STMIA_UPD: 285 case ARM::tLDMIA: 286 case ARM::tLDMIA_UPD: 287 case ARM::tSTMIA_UPD: 288 case ARM::t2LDMIA_RET: 289 case ARM::t2LDMIA: 290 case ARM::t2LDMIA_UPD: 291 case ARM::t2STMIA: 292 case ARM::t2STMIA_UPD: 293 case ARM::VLDMSIA: 294 case ARM::VLDMSIA_UPD: 295 case ARM::VSTMSIA: 296 case ARM::VSTMSIA_UPD: 297 case ARM::VLDMDIA: 298 case ARM::VLDMDIA_UPD: 299 case ARM::VSTMDIA: 300 case ARM::VSTMDIA_UPD: 301 return ARM_AM::ia; 302 303 case ARM::LDMDA: 304 case ARM::LDMDA_UPD: 305 case ARM::STMDA: 306 case ARM::STMDA_UPD: 307 return ARM_AM::da; 308 309 case ARM::LDMDB: 310 case ARM::LDMDB_UPD: 311 case ARM::STMDB: 312 case ARM::STMDB_UPD: 313 case ARM::t2LDMDB: 314 case ARM::t2LDMDB_UPD: 315 case ARM::t2STMDB: 316 case ARM::t2STMDB_UPD: 317 case ARM::VLDMSDB_UPD: 318 case ARM::VSTMSDB_UPD: 319 case ARM::VLDMDDB_UPD: 320 case ARM::VSTMDDB_UPD: 321 return ARM_AM::db; 322 323 case ARM::LDMIB: 324 case ARM::LDMIB_UPD: 325 case ARM::STMIB: 326 case ARM::STMIB_UPD: 327 return ARM_AM::ib; 328 } 329 } 330 331 } // end namespace ARM_AM 332 } // end namespace llvm 333 334 static bool isT1i32Load(unsigned Opc) { 335 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi; 336 } 337 338 static bool isT2i32Load(unsigned Opc) { 339 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 340 } 341 342 static bool isi32Load(unsigned Opc) { 343 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; 344 } 345 346 static bool isT1i32Store(unsigned Opc) { 347 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi; 348 } 349 350 static bool isT2i32Store(unsigned Opc) { 351 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 352 } 353 354 static bool isi32Store(unsigned Opc) { 355 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); 356 } 357 358 static unsigned getImmScale(unsigned Opc) { 359 switch (Opc) { 360 default: llvm_unreachable("Unhandled opcode!"); 361 case ARM::tLDRi: 362 case ARM::tSTRi: 363 case ARM::tLDRspi: 364 case ARM::tSTRspi: 365 return 1; 366 case ARM::tLDRHi: 367 case ARM::tSTRHi: 368 return 2; 369 case ARM::tLDRBi: 370 case ARM::tSTRBi: 371 return 4; 372 } 373 } 374 375 /// Update future uses of the base register with the offset introduced 376 /// due to writeback. This function only works on Thumb1. 377 void 378 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, 379 MachineBasicBlock::iterator MBBI, 380 DebugLoc dl, unsigned Base, 381 unsigned WordOffset, 382 ARMCC::CondCodes Pred, unsigned PredReg) { 383 assert(isThumb1 && "Can only update base register uses for Thumb1!"); 384 // Start updating any instructions with immediate offsets. Insert a SUB before 385 // the first non-updateable instruction (if any). 386 for (; MBBI != MBB.end(); ++MBBI) { 387 bool InsertSub = false; 388 unsigned Opc = MBBI->getOpcode(); 389 390 if (MBBI->readsRegister(Base)) { 391 int Offset; 392 bool IsLoad = 393 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi; 394 bool IsStore = 395 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi; 396 397 if (IsLoad || IsStore) { 398 // Loads and stores with immediate offsets can be updated, but only if 399 // the new offset isn't negative. 400 // The MachineOperand containing the offset immediate is the last one 401 // before predicates. 402 MachineOperand &MO = 403 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 404 // The offsets are scaled by 1, 2 or 4 depending on the Opcode. 405 Offset = MO.getImm() - WordOffset * getImmScale(Opc); 406 407 // If storing the base register, it needs to be reset first. 408 unsigned InstrSrcReg = MBBI->getOperand(0).getReg(); 409 410 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) 411 MO.setImm(Offset); 412 else 413 InsertSub = true; 414 415 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && 416 !definesCPSR(MBBI)) { 417 // SUBS/ADDS using this register, with a dead def of the CPSR. 418 // Merge it with the update; if the merged offset is too large, 419 // insert a new sub instead. 420 MachineOperand &MO = 421 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 422 Offset = (Opc == ARM::tSUBi8) ? 423 MO.getImm() + WordOffset * 4 : 424 MO.getImm() - WordOffset * 4 ; 425 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) { 426 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if 427 // Offset == 0. 428 MO.setImm(Offset); 429 // The base register has now been reset, so exit early. 430 return; 431 } else { 432 InsertSub = true; 433 } 434 435 } else { 436 // Can't update the instruction. 437 InsertSub = true; 438 } 439 440 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { 441 // Since SUBS sets the condition flags, we can't place the base reset 442 // after an instruction that has a live CPSR def. 443 // The base register might also contain an argument for a function call. 444 InsertSub = true; 445 } 446 447 if (InsertSub) { 448 // An instruction above couldn't be updated, so insert a sub. 449 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true) 450 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 451 return; 452 } 453 454 if (MBBI->killsRegister(Base)) 455 // Register got killed. Stop updating. 456 return; 457 } 458 459 // End of block was reached. 460 if (MBB.succ_size() > 0) { 461 // FIXME: Because of a bug, live registers are sometimes missing from 462 // the successor blocks' live-in sets. This means we can't trust that 463 // information and *always* have to reset at the end of a block. 464 // See PR21029. 465 if (MBBI != MBB.end()) --MBBI; 466 AddDefaultT1CC( 467 BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true) 468 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 469 } 470 } 471 472 /// MergeOps - Create and insert a LDM or STM with Base as base register and 473 /// registers in Regs as the register operands that would be loaded / stored. 474 /// It returns true if the transformation is done. 475 bool 476 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 477 MachineBasicBlock::iterator MBBI, 478 int Offset, unsigned Base, bool BaseKill, 479 unsigned Opcode, ARMCC::CondCodes Pred, 480 unsigned PredReg, unsigned Scratch, DebugLoc dl, 481 ArrayRef<std::pair<unsigned, bool> > Regs, 482 ArrayRef<unsigned> ImpDefs) { 483 // Only a single register to load / store. Don't bother. 484 unsigned NumRegs = Regs.size(); 485 if (NumRegs <= 1) 486 return false; 487 488 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. 489 // Compute liveness information for that register to make the decision. 490 bool SafeToClobberCPSR = !isThumb1 || 491 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) == 492 MachineBasicBlock::LQR_Dead); 493 494 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. 495 496 // Exception: If the base register is in the input reglist, Thumb1 LDM is 497 // non-writeback. 498 // It's also not possible to merge an STR of the base register in Thumb1. 499 if (isThumb1) 500 for (const std::pair<unsigned, bool> &R : Regs) 501 if (Base == R.first) { 502 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); 503 if (Opcode == ARM::tLDRi) { 504 Writeback = false; 505 break; 506 } else if (Opcode == ARM::tSTRi) { 507 return false; 508 } 509 } 510 511 ARM_AM::AMSubMode Mode = ARM_AM::ia; 512 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. 513 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 514 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; 515 516 if (Offset == 4 && haveIBAndDA) { 517 Mode = ARM_AM::ib; 518 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { 519 Mode = ARM_AM::da; 520 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { 521 // VLDM/VSTM do not support DB mode without also updating the base reg. 522 Mode = ARM_AM::db; 523 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { 524 // Check if this is a supported opcode before inserting instructions to 525 // calculate a new base register. 526 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; 527 528 // If starting offset isn't zero, insert a MI to materialize a new base. 529 // But only do so if it is cost effective, i.e. merging more than two 530 // loads / stores. 531 if (NumRegs <= 2) 532 return false; 533 534 // On Thumb1, it's not worth materializing a new base register without 535 // clobbering the CPSR (i.e. not using ADDS/SUBS). 536 if (!SafeToClobberCPSR) 537 return false; 538 539 unsigned NewBase; 540 if (isi32Load(Opcode)) { 541 // If it is a load, then just use one of the destination register to 542 // use as the new base. 543 NewBase = Regs[NumRegs-1].first; 544 } else { 545 // Use the scratch register to use as a new base. 546 NewBase = Scratch; 547 if (NewBase == 0) 548 return false; 549 } 550 551 int BaseOpc = 552 isThumb2 ? ARM::t2ADDri : 553 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi : 554 (isThumb1 && Offset < 8) ? ARM::tADDi3 : 555 isThumb1 ? ARM::tADDi8 : ARM::ADDri; 556 557 if (Offset < 0) { 558 Offset = - Offset; 559 BaseOpc = 560 isThumb2 ? ARM::t2SUBri : 561 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 : 562 isThumb1 ? ARM::tSUBi8 : ARM::SUBri; 563 } 564 565 if (!TL->isLegalAddImmediate(Offset)) 566 // FIXME: Try add with register operand? 567 return false; // Probably not worth it then. 568 569 if (isThumb1) { 570 // Thumb1: depending on immediate size, use either 571 // ADDS NewBase, Base, #imm3 572 // or 573 // MOV NewBase, Base 574 // ADDS NewBase, #imm8. 575 if (Base != NewBase && 576 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) { 577 // Need to insert a MOV to the new base first. 578 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && 579 !STI->hasV6Ops()) { 580 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr 581 if (Pred != ARMCC::AL) 582 return false; 583 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase) 584 .addReg(Base, getKillRegState(BaseKill)); 585 } else 586 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) 587 .addReg(Base, getKillRegState(BaseKill)) 588 .addImm(Pred).addReg(PredReg); 589 590 // Set up BaseKill and Base correctly to insert the ADDS/SUBS below. 591 Base = NewBase; 592 BaseKill = false; 593 } 594 if (BaseOpc == ARM::tADDrSPi) { 595 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); 596 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 597 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4) 598 .addImm(Pred).addReg(PredReg); 599 } else 600 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true) 601 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 602 .addImm(Pred).addReg(PredReg); 603 } else { 604 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 605 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 606 .addImm(Pred).addReg(PredReg).addReg(0); 607 } 608 Base = NewBase; 609 BaseKill = true; // New base is always killed straight away. 610 } 611 612 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 613 Opcode == ARM::VLDRD); 614 615 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with 616 // base register writeback. 617 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 618 if (!Opcode) return false; 619 620 // Check if a Thumb1 LDM/STM merge is safe. This is the case if: 621 // - There is no writeback (LDM of base register), 622 // - the base register is killed by the merged instruction, 623 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS 624 // to reset the base register. 625 // Otherwise, don't merge. 626 // It's safe to return here since the code to materialize a new base register 627 // above is also conditional on SafeToClobberCPSR. 628 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) 629 return false; 630 631 MachineInstrBuilder MIB; 632 633 if (Writeback) { 634 if (Opcode == ARM::tLDMIA) 635 // Update tLDMIA with writeback if necessary. 636 Opcode = ARM::tLDMIA_UPD; 637 638 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); 639 640 // Thumb1: we might need to set base writeback when building the MI. 641 MIB.addReg(Base, getDefRegState(true)) 642 .addReg(Base, getKillRegState(BaseKill)); 643 644 // The base isn't dead after a merged instruction with writeback. 645 // Insert a sub instruction after the newly formed instruction to reset. 646 if (!BaseKill) 647 UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg); 648 649 } else { 650 // No writeback, simply build the MachineInstr. 651 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); 652 MIB.addReg(Base, getKillRegState(BaseKill)); 653 } 654 655 MIB.addImm(Pred).addReg(PredReg); 656 657 for (const std::pair<unsigned, bool> &R : Regs) 658 MIB = MIB.addReg(R.first, getDefRegState(isDef) 659 | getKillRegState(R.second)); 660 661 // Add implicit defs for super-registers. 662 for (unsigned ImpDef : ImpDefs) 663 MIB.addReg(ImpDef, RegState::ImplicitDefine); 664 665 return true; 666 } 667 668 /// \brief Find all instructions using a given imp-def within a range. 669 /// 670 /// We are trying to combine a range of instructions, one of which (located at 671 /// position RangeBegin) implicitly defines a register. The final LDM/STM will 672 /// be placed at RangeEnd, and so any uses of this definition between RangeStart 673 /// and RangeEnd must be modified to use an undefined value. 674 /// 675 /// The live range continues until we find a second definition or one of the 676 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so 677 /// we must consider all uses and decide which are relevant in a second pass. 678 void ARMLoadStoreOpt::findUsesOfImpDef( 679 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps, 680 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) { 681 std::map<unsigned, MachineOperand *> Uses; 682 unsigned LastLivePos = RangeEnd; 683 684 // First we find all uses of this register with Position between RangeBegin 685 // and RangeEnd, any or all of these could be uses of a definition at 686 // RangeBegin. We also record the latest position a definition at RangeBegin 687 // would be considered live. 688 for (unsigned i = 0; i < MemOps.size(); ++i) { 689 MachineInstr &MI = *MemOps[i].MBBI; 690 unsigned MIPosition = MemOps[i].Position; 691 if (MIPosition <= RangeBegin || MIPosition > RangeEnd) 692 continue; 693 694 // If this instruction defines the register, then any later use will be of 695 // that definition rather than ours. 696 if (MI.definesRegister(DefReg)) 697 LastLivePos = std::min(LastLivePos, MIPosition); 698 699 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg); 700 if (!UseOp) 701 continue; 702 703 // If this instruction kills the register then (assuming liveness is 704 // correct when we start) we don't need to think about anything after here. 705 if (UseOp->isKill()) 706 LastLivePos = std::min(LastLivePos, MIPosition); 707 708 Uses[MIPosition] = UseOp; 709 } 710 711 // Now we traverse the list of all uses, and append the ones that actually use 712 // our definition to the requested list. 713 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(), 714 E = Uses.end(); 715 I != E; ++I) { 716 // List is sorted by position so once we've found one out of range there 717 // will be no more to consider. 718 if (I->first > LastLivePos) 719 break; 720 UsesOfImpDefs.push_back(I->second); 721 } 722 } 723 724 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 725 // success. 726 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 727 MemOpQueue &memOps, 728 unsigned memOpsBegin, unsigned memOpsEnd, 729 unsigned insertAfter, int Offset, 730 unsigned Base, bool BaseKill, 731 unsigned Opcode, 732 ARMCC::CondCodes Pred, unsigned PredReg, 733 unsigned Scratch, 734 DebugLoc dl, 735 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { 736 // First calculate which of the registers should be killed by the merged 737 // instruction. 738 const unsigned insertPos = memOps[insertAfter].Position; 739 SmallSet<unsigned, 4> KilledRegs; 740 DenseMap<unsigned, unsigned> Killer; 741 for (unsigned i = 0, e = memOps.size(); i != e; ++i) { 742 if (i == memOpsBegin) { 743 i = memOpsEnd; 744 if (i == e) 745 break; 746 } 747 if (memOps[i].Position < insertPos && memOps[i].isKill) { 748 unsigned Reg = memOps[i].Reg; 749 KilledRegs.insert(Reg); 750 Killer[Reg] = i; 751 } 752 } 753 754 SmallVector<std::pair<unsigned, bool>, 8> Regs; 755 SmallVector<unsigned, 8> ImpDefs; 756 SmallVector<MachineOperand *, 8> UsesOfImpDefs; 757 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 758 unsigned Reg = memOps[i].Reg; 759 // If we are inserting the merged operation after an operation that 760 // uses the same register, make sure to transfer any kill flag. 761 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 762 Regs.push_back(std::make_pair(Reg, isKill)); 763 764 // Collect any implicit defs of super-registers. They must be preserved. 765 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) { 766 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead()) 767 continue; 768 unsigned DefReg = MO->getReg(); 769 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end()) 770 ImpDefs.push_back(DefReg); 771 772 // There may be other uses of the definition between this instruction and 773 // the eventual LDM/STM position. These should be marked undef if the 774 // merge takes place. 775 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position, 776 insertPos); 777 } 778 } 779 780 // Try to do the merge. 781 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 782 ++Loc; 783 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 784 Pred, PredReg, Scratch, dl, Regs, ImpDefs)) 785 return; 786 787 // Merge succeeded, update records. 788 Merges.push_back(std::prev(Loc)); 789 790 // In gathering loads together, we may have moved the imp-def of a register 791 // past one of its uses. This is OK, since we know better than the rest of 792 // LLVM what's OK with ARM loads and stores; but we still have to adjust the 793 // affected uses. 794 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(), 795 E = UsesOfImpDefs.end(); 796 I != E; ++I) 797 (*I)->setIsUndef(); 798 799 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 800 // Remove kill flags from any memops that come before insertPos. 801 if (Regs[i-memOpsBegin].second) { 802 unsigned Reg = Regs[i-memOpsBegin].first; 803 if (KilledRegs.count(Reg)) { 804 unsigned j = Killer[Reg]; 805 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true); 806 assert(Idx >= 0 && "Cannot find killing operand"); 807 memOps[j].MBBI->getOperand(Idx).setIsKill(false); 808 memOps[j].isKill = false; 809 } 810 memOps[i].isKill = true; 811 } 812 MBB.erase(memOps[i].MBBI); 813 // Update this memop to refer to the merged instruction. 814 // We may need to move kill flags again. 815 memOps[i].Merged = true; 816 memOps[i].MBBI = Merges.back(); 817 memOps[i].Position = insertPos; 818 } 819 820 // Update memOps offsets, since they may have been modified by MergeOps. 821 for (auto &MemOp : memOps) { 822 MemOp.Offset = getMemoryOpOffset(MemOp.MBBI); 823 } 824 } 825 826 /// MergeLDR_STR - Merge a number of load / store instructions into one or more 827 /// load / store multiple instructions. 828 void 829 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 830 unsigned Base, unsigned Opcode, unsigned Size, 831 ARMCC::CondCodes Pred, unsigned PredReg, 832 unsigned Scratch, MemOpQueue &MemOps, 833 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { 834 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 835 int Offset = MemOps[SIndex].Offset; 836 int SOffset = Offset; 837 unsigned insertAfter = SIndex; 838 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 839 DebugLoc dl = Loc->getDebugLoc(); 840 const MachineOperand &PMO = Loc->getOperand(0); 841 unsigned PReg = PMO.getReg(); 842 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 843 unsigned Count = 1; 844 unsigned Limit = ~0U; 845 bool BaseKill = false; 846 // vldm / vstm limit are 32 for S variants, 16 for D variants. 847 848 switch (Opcode) { 849 default: break; 850 case ARM::VSTRS: 851 Limit = 32; 852 break; 853 case ARM::VSTRD: 854 Limit = 16; 855 break; 856 case ARM::VLDRD: 857 Limit = 16; 858 break; 859 case ARM::VLDRS: 860 Limit = 32; 861 break; 862 } 863 864 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 865 int NewOffset = MemOps[i].Offset; 866 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 867 unsigned Reg = MO.getReg(); 868 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 869 // Register numbers must be in ascending order. For VFP / NEON load and 870 // store multiples, the registers must also be consecutive and within the 871 // limit on the number of registers per instruction. 872 if (Reg != ARM::SP && 873 NewOffset == Offset + (int)Size && 874 ((isNotVFP && RegNum > PRegNum) || 875 ((Count < Limit) && RegNum == PRegNum+1)) && 876 // On Swift we don't want vldm/vstm to start with a odd register num 877 // because Q register unaligned vldm/vstm need more uops. 878 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) { 879 Offset += Size; 880 PRegNum = RegNum; 881 ++Count; 882 } else { 883 // Can't merge this in. Try merge the earlier ones first. 884 // We need to compute BaseKill here because the MemOps may have been 885 // reordered. 886 BaseKill = Loc->killsRegister(Base); 887 888 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base, 889 BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 890 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 891 MemOps, Merges); 892 return; 893 } 894 895 if (MemOps[i].Position > MemOps[insertAfter].Position) { 896 insertAfter = i; 897 Loc = MemOps[i].MBBI; 898 } 899 } 900 901 BaseKill = Loc->killsRegister(Base); 902 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 903 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 904 } 905 906 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 907 unsigned Bytes, unsigned Limit, 908 ARMCC::CondCodes Pred, unsigned PredReg) { 909 unsigned MyPredReg = 0; 910 if (!MI) 911 return false; 912 913 bool CheckCPSRDef = false; 914 switch (MI->getOpcode()) { 915 default: return false; 916 case ARM::tSUBi8: 917 case ARM::t2SUBri: 918 case ARM::SUBri: 919 CheckCPSRDef = true; 920 break; 921 case ARM::tSUBspi: 922 break; 923 } 924 925 // Make sure the offset fits in 8 bits. 926 if (Bytes == 0 || (Limit && Bytes >= Limit)) 927 return false; 928 929 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi || 930 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME 931 if (!(MI->getOperand(0).getReg() == Base && 932 MI->getOperand(1).getReg() == Base && 933 (MI->getOperand(2).getImm() * Scale) == Bytes && 934 getInstrPredicate(MI, MyPredReg) == Pred && 935 MyPredReg == PredReg)) 936 return false; 937 938 return CheckCPSRDef ? !definesCPSR(MI) : true; 939 } 940 941 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 942 unsigned Bytes, unsigned Limit, 943 ARMCC::CondCodes Pred, unsigned PredReg) { 944 unsigned MyPredReg = 0; 945 if (!MI) 946 return false; 947 948 bool CheckCPSRDef = false; 949 switch (MI->getOpcode()) { 950 default: return false; 951 case ARM::tADDi8: 952 case ARM::t2ADDri: 953 case ARM::ADDri: 954 CheckCPSRDef = true; 955 break; 956 case ARM::tADDspi: 957 break; 958 } 959 960 if (Bytes == 0 || (Limit && Bytes >= Limit)) 961 // Make sure the offset fits in 8 bits. 962 return false; 963 964 unsigned Scale = (MI->getOpcode() == ARM::tADDspi || 965 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME 966 if (!(MI->getOperand(0).getReg() == Base && 967 MI->getOperand(1).getReg() == Base && 968 (MI->getOperand(2).getImm() * Scale) == Bytes && 969 getInstrPredicate(MI, MyPredReg) == Pred && 970 MyPredReg == PredReg)) 971 return false; 972 973 return CheckCPSRDef ? !definesCPSR(MI) : true; 974 } 975 976 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 977 switch (MI->getOpcode()) { 978 default: return 0; 979 case ARM::LDRi12: 980 case ARM::STRi12: 981 case ARM::tLDRi: 982 case ARM::tSTRi: 983 case ARM::tLDRspi: 984 case ARM::tSTRspi: 985 case ARM::t2LDRi8: 986 case ARM::t2LDRi12: 987 case ARM::t2STRi8: 988 case ARM::t2STRi12: 989 case ARM::VLDRS: 990 case ARM::VSTRS: 991 return 4; 992 case ARM::VLDRD: 993 case ARM::VSTRD: 994 return 8; 995 case ARM::LDMIA: 996 case ARM::LDMDA: 997 case ARM::LDMDB: 998 case ARM::LDMIB: 999 case ARM::STMIA: 1000 case ARM::STMDA: 1001 case ARM::STMDB: 1002 case ARM::STMIB: 1003 case ARM::tLDMIA: 1004 case ARM::tLDMIA_UPD: 1005 case ARM::tSTMIA_UPD: 1006 case ARM::t2LDMIA: 1007 case ARM::t2LDMDB: 1008 case ARM::t2STMIA: 1009 case ARM::t2STMDB: 1010 case ARM::VLDMSIA: 1011 case ARM::VSTMSIA: 1012 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 1013 case ARM::VLDMDIA: 1014 case ARM::VSTMDIA: 1015 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 1016 } 1017 } 1018 1019 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 1020 ARM_AM::AMSubMode Mode) { 1021 switch (Opc) { 1022 default: llvm_unreachable("Unhandled opcode!"); 1023 case ARM::LDMIA: 1024 case ARM::LDMDA: 1025 case ARM::LDMDB: 1026 case ARM::LDMIB: 1027 switch (Mode) { 1028 default: llvm_unreachable("Unhandled submode!"); 1029 case ARM_AM::ia: return ARM::LDMIA_UPD; 1030 case ARM_AM::ib: return ARM::LDMIB_UPD; 1031 case ARM_AM::da: return ARM::LDMDA_UPD; 1032 case ARM_AM::db: return ARM::LDMDB_UPD; 1033 } 1034 case ARM::STMIA: 1035 case ARM::STMDA: 1036 case ARM::STMDB: 1037 case ARM::STMIB: 1038 switch (Mode) { 1039 default: llvm_unreachable("Unhandled submode!"); 1040 case ARM_AM::ia: return ARM::STMIA_UPD; 1041 case ARM_AM::ib: return ARM::STMIB_UPD; 1042 case ARM_AM::da: return ARM::STMDA_UPD; 1043 case ARM_AM::db: return ARM::STMDB_UPD; 1044 } 1045 case ARM::t2LDMIA: 1046 case ARM::t2LDMDB: 1047 switch (Mode) { 1048 default: llvm_unreachable("Unhandled submode!"); 1049 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 1050 case ARM_AM::db: return ARM::t2LDMDB_UPD; 1051 } 1052 case ARM::t2STMIA: 1053 case ARM::t2STMDB: 1054 switch (Mode) { 1055 default: llvm_unreachable("Unhandled submode!"); 1056 case ARM_AM::ia: return ARM::t2STMIA_UPD; 1057 case ARM_AM::db: return ARM::t2STMDB_UPD; 1058 } 1059 case ARM::VLDMSIA: 1060 switch (Mode) { 1061 default: llvm_unreachable("Unhandled submode!"); 1062 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 1063 case ARM_AM::db: return ARM::VLDMSDB_UPD; 1064 } 1065 case ARM::VLDMDIA: 1066 switch (Mode) { 1067 default: llvm_unreachable("Unhandled submode!"); 1068 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 1069 case ARM_AM::db: return ARM::VLDMDDB_UPD; 1070 } 1071 case ARM::VSTMSIA: 1072 switch (Mode) { 1073 default: llvm_unreachable("Unhandled submode!"); 1074 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 1075 case ARM_AM::db: return ARM::VSTMSDB_UPD; 1076 } 1077 case ARM::VSTMDIA: 1078 switch (Mode) { 1079 default: llvm_unreachable("Unhandled submode!"); 1080 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 1081 case ARM_AM::db: return ARM::VSTMDDB_UPD; 1082 } 1083 } 1084 } 1085 1086 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 1087 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 1088 /// 1089 /// stmia rn, <ra, rb, rc> 1090 /// rn := rn + 4 * 3; 1091 /// => 1092 /// stmia rn!, <ra, rb, rc> 1093 /// 1094 /// rn := rn - 4 * 3; 1095 /// ldmia rn, <ra, rb, rc> 1096 /// => 1097 /// ldmdb rn!, <ra, rb, rc> 1098 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 1099 MachineBasicBlock::iterator MBBI, 1100 bool &Advance, 1101 MachineBasicBlock::iterator &I) { 1102 // Thumb1 is already using updating loads/stores. 1103 if (isThumb1) return false; 1104 1105 MachineInstr *MI = MBBI; 1106 unsigned Base = MI->getOperand(0).getReg(); 1107 bool BaseKill = MI->getOperand(0).isKill(); 1108 unsigned Bytes = getLSMultipleTransferSize(MI); 1109 unsigned PredReg = 0; 1110 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1111 unsigned Opcode = MI->getOpcode(); 1112 DebugLoc dl = MI->getDebugLoc(); 1113 1114 // Can't use an updating ld/st if the base register is also a dest 1115 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 1116 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 1117 if (MI->getOperand(i).getReg() == Base) 1118 return false; 1119 1120 bool DoMerge = false; 1121 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); 1122 1123 // Try merging with the previous instruction. 1124 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1125 if (MBBI != BeginMBBI) { 1126 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1127 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 1128 --PrevMBBI; 1129 if (Mode == ARM_AM::ia && 1130 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 1131 Mode = ARM_AM::db; 1132 DoMerge = true; 1133 } else if (Mode == ARM_AM::ib && 1134 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 1135 Mode = ARM_AM::da; 1136 DoMerge = true; 1137 } 1138 if (DoMerge) 1139 MBB.erase(PrevMBBI); 1140 } 1141 1142 // Try merging with the next instruction. 1143 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1144 if (!DoMerge && MBBI != EndMBBI) { 1145 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1146 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1147 ++NextMBBI; 1148 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 1149 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 1150 DoMerge = true; 1151 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 1152 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 1153 DoMerge = true; 1154 } 1155 if (DoMerge) { 1156 if (NextMBBI == I) { 1157 Advance = true; 1158 ++I; 1159 } 1160 MBB.erase(NextMBBI); 1161 } 1162 } 1163 1164 if (!DoMerge) 1165 return false; 1166 1167 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 1168 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 1169 .addReg(Base, getDefRegState(true)) // WB base register 1170 .addReg(Base, getKillRegState(BaseKill)) 1171 .addImm(Pred).addReg(PredReg); 1172 1173 // Transfer the rest of operands. 1174 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 1175 MIB.addOperand(MI->getOperand(OpNum)); 1176 1177 // Transfer memoperands. 1178 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 1179 1180 MBB.erase(MBBI); 1181 return true; 1182 } 1183 1184 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 1185 ARM_AM::AddrOpc Mode) { 1186 switch (Opc) { 1187 case ARM::LDRi12: 1188 return ARM::LDR_PRE_IMM; 1189 case ARM::STRi12: 1190 return ARM::STR_PRE_IMM; 1191 case ARM::VLDRS: 1192 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1193 case ARM::VLDRD: 1194 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1195 case ARM::VSTRS: 1196 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1197 case ARM::VSTRD: 1198 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1199 case ARM::t2LDRi8: 1200 case ARM::t2LDRi12: 1201 return ARM::t2LDR_PRE; 1202 case ARM::t2STRi8: 1203 case ARM::t2STRi12: 1204 return ARM::t2STR_PRE; 1205 default: llvm_unreachable("Unhandled opcode!"); 1206 } 1207 } 1208 1209 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 1210 ARM_AM::AddrOpc Mode) { 1211 switch (Opc) { 1212 case ARM::LDRi12: 1213 return ARM::LDR_POST_IMM; 1214 case ARM::STRi12: 1215 return ARM::STR_POST_IMM; 1216 case ARM::VLDRS: 1217 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1218 case ARM::VLDRD: 1219 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1220 case ARM::VSTRS: 1221 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1222 case ARM::VSTRD: 1223 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1224 case ARM::t2LDRi8: 1225 case ARM::t2LDRi12: 1226 return ARM::t2LDR_POST; 1227 case ARM::t2STRi8: 1228 case ARM::t2STRi12: 1229 return ARM::t2STR_POST; 1230 default: llvm_unreachable("Unhandled opcode!"); 1231 } 1232 } 1233 1234 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 1235 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 1236 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 1237 MachineBasicBlock::iterator MBBI, 1238 const TargetInstrInfo *TII, 1239 bool &Advance, 1240 MachineBasicBlock::iterator &I) { 1241 // Thumb1 doesn't have updating LDR/STR. 1242 // FIXME: Use LDM/STM with single register instead. 1243 if (isThumb1) return false; 1244 1245 MachineInstr *MI = MBBI; 1246 unsigned Base = MI->getOperand(1).getReg(); 1247 bool BaseKill = MI->getOperand(1).isKill(); 1248 unsigned Bytes = getLSMultipleTransferSize(MI); 1249 unsigned Opcode = MI->getOpcode(); 1250 DebugLoc dl = MI->getDebugLoc(); 1251 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 1252 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 1253 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 1254 if (isi32Load(Opcode) || isi32Store(Opcode)) 1255 if (MI->getOperand(2).getImm() != 0) 1256 return false; 1257 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 1258 return false; 1259 1260 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 1261 // Can't do the merge if the destination register is the same as the would-be 1262 // writeback register. 1263 if (MI->getOperand(0).getReg() == Base) 1264 return false; 1265 1266 unsigned PredReg = 0; 1267 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1268 bool DoMerge = false; 1269 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1270 unsigned NewOpc = 0; 1271 // AM2 - 12 bits, thumb2 - 8 bits. 1272 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 1273 1274 // Try merging with the previous instruction. 1275 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1276 if (MBBI != BeginMBBI) { 1277 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1278 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 1279 --PrevMBBI; 1280 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 1281 DoMerge = true; 1282 AddSub = ARM_AM::sub; 1283 } else if (!isAM5 && 1284 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 1285 DoMerge = true; 1286 } 1287 if (DoMerge) { 1288 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 1289 MBB.erase(PrevMBBI); 1290 } 1291 } 1292 1293 // Try merging with the next instruction. 1294 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1295 if (!DoMerge && MBBI != EndMBBI) { 1296 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1297 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1298 ++NextMBBI; 1299 if (!isAM5 && 1300 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 1301 DoMerge = true; 1302 AddSub = ARM_AM::sub; 1303 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 1304 DoMerge = true; 1305 } 1306 if (DoMerge) { 1307 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 1308 if (NextMBBI == I) { 1309 Advance = true; 1310 ++I; 1311 } 1312 MBB.erase(NextMBBI); 1313 } 1314 } 1315 1316 if (!DoMerge) 1317 return false; 1318 1319 if (isAM5) { 1320 // VLDM[SD]_UPD, VSTM[SD]_UPD 1321 // (There are no base-updating versions of VLDR/VSTR instructions, but the 1322 // updating load/store-multiple instructions can be used with only one 1323 // register.) 1324 MachineOperand &MO = MI->getOperand(0); 1325 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 1326 .addReg(Base, getDefRegState(true)) // WB base register 1327 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 1328 .addImm(Pred).addReg(PredReg) 1329 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 1330 getKillRegState(MO.isKill()))); 1331 } else if (isLd) { 1332 if (isAM2) { 1333 // LDR_PRE, LDR_POST 1334 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 1335 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1336 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1337 .addReg(Base, RegState::Define) 1338 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1339 } else { 1340 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1341 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1342 .addReg(Base, RegState::Define) 1343 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1344 } 1345 } else { 1346 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1347 // t2LDR_PRE, t2LDR_POST 1348 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1349 .addReg(Base, RegState::Define) 1350 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1351 } 1352 } else { 1353 MachineOperand &MO = MI->getOperand(0); 1354 // FIXME: post-indexed stores use am2offset_imm, which still encodes 1355 // the vestigal zero-reg offset register. When that's fixed, this clause 1356 // can be removed entirely. 1357 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 1358 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1359 // STR_PRE, STR_POST 1360 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 1361 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1362 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1363 } else { 1364 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1365 // t2STR_PRE, t2STR_POST 1366 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 1367 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1368 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1369 } 1370 } 1371 MBB.erase(MBBI); 1372 1373 return true; 1374 } 1375 1376 /// isMemoryOp - Returns true if instruction is a memory operation that this 1377 /// pass is capable of operating on. 1378 static bool isMemoryOp(const MachineInstr *MI) { 1379 // When no memory operands are present, conservatively assume unaligned, 1380 // volatile, unfoldable. 1381 if (!MI->hasOneMemOperand()) 1382 return false; 1383 1384 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1385 1386 // Don't touch volatile memory accesses - we may be changing their order. 1387 if (MMO->isVolatile()) 1388 return false; 1389 1390 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 1391 // not. 1392 if (MMO->getAlignment() < 4) 1393 return false; 1394 1395 // str <undef> could probably be eliminated entirely, but for now we just want 1396 // to avoid making a mess of it. 1397 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1398 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 1399 MI->getOperand(0).isUndef()) 1400 return false; 1401 1402 // Likewise don't mess with references to undefined addresses. 1403 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 1404 MI->getOperand(1).isUndef()) 1405 return false; 1406 1407 unsigned Opcode = MI->getOpcode(); 1408 switch (Opcode) { 1409 default: break; 1410 case ARM::VLDRS: 1411 case ARM::VSTRS: 1412 return MI->getOperand(1).isReg(); 1413 case ARM::VLDRD: 1414 case ARM::VSTRD: 1415 return MI->getOperand(1).isReg(); 1416 case ARM::LDRi12: 1417 case ARM::STRi12: 1418 case ARM::tLDRi: 1419 case ARM::tSTRi: 1420 case ARM::tLDRspi: 1421 case ARM::tSTRspi: 1422 case ARM::t2LDRi8: 1423 case ARM::t2LDRi12: 1424 case ARM::t2STRi8: 1425 case ARM::t2STRi12: 1426 return MI->getOperand(1).isReg(); 1427 } 1428 return false; 1429 } 1430 1431 /// AdvanceRS - Advance register scavenger to just before the earliest memory 1432 /// op that is being merged. 1433 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 1434 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 1435 unsigned Position = MemOps[0].Position; 1436 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 1437 if (MemOps[i].Position < Position) { 1438 Position = MemOps[i].Position; 1439 Loc = MemOps[i].MBBI; 1440 } 1441 } 1442 1443 if (Loc != MBB.begin()) 1444 RS->forward(std::prev(Loc)); 1445 } 1446 1447 static void InsertLDR_STR(MachineBasicBlock &MBB, 1448 MachineBasicBlock::iterator &MBBI, 1449 int Offset, bool isDef, 1450 DebugLoc dl, unsigned NewOpc, 1451 unsigned Reg, bool RegDeadKill, bool RegUndef, 1452 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1453 bool OffKill, bool OffUndef, 1454 ARMCC::CondCodes Pred, unsigned PredReg, 1455 const TargetInstrInfo *TII, bool isT2) { 1456 if (isDef) { 1457 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1458 TII->get(NewOpc)) 1459 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1460 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1461 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1462 } else { 1463 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1464 TII->get(NewOpc)) 1465 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1466 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1467 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1468 } 1469 } 1470 1471 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1472 MachineBasicBlock::iterator &MBBI) { 1473 MachineInstr *MI = &*MBBI; 1474 unsigned Opcode = MI->getOpcode(); 1475 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 1476 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 1477 const MachineOperand &BaseOp = MI->getOperand(2); 1478 unsigned BaseReg = BaseOp.getReg(); 1479 unsigned EvenReg = MI->getOperand(0).getReg(); 1480 unsigned OddReg = MI->getOperand(1).getReg(); 1481 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1482 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1483 // ARM errata 602117: LDRD with base in list may result in incorrect base 1484 // register when interrupted or faulted. 1485 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3(); 1486 if (!Errata602117 && 1487 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)) 1488 return false; 1489 1490 MachineBasicBlock::iterator NewBBI = MBBI; 1491 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1492 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1493 bool EvenDeadKill = isLd ? 1494 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1495 bool EvenUndef = MI->getOperand(0).isUndef(); 1496 bool OddDeadKill = isLd ? 1497 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1498 bool OddUndef = MI->getOperand(1).isUndef(); 1499 bool BaseKill = BaseOp.isKill(); 1500 bool BaseUndef = BaseOp.isUndef(); 1501 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1502 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1503 int OffImm = getMemoryOpOffset(MI); 1504 unsigned PredReg = 0; 1505 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1506 1507 if (OddRegNum > EvenRegNum && OffImm == 0) { 1508 // Ascending register numbers and no offset. It's safe to change it to a 1509 // ldm or stm. 1510 unsigned NewOpc = (isLd) 1511 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1512 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1513 if (isLd) { 1514 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1515 .addReg(BaseReg, getKillRegState(BaseKill)) 1516 .addImm(Pred).addReg(PredReg) 1517 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1518 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1519 ++NumLDRD2LDM; 1520 } else { 1521 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1522 .addReg(BaseReg, getKillRegState(BaseKill)) 1523 .addImm(Pred).addReg(PredReg) 1524 .addReg(EvenReg, 1525 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1526 .addReg(OddReg, 1527 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1528 ++NumSTRD2STM; 1529 } 1530 NewBBI = std::prev(MBBI); 1531 } else { 1532 // Split into two instructions. 1533 unsigned NewOpc = (isLd) 1534 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1535 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1536 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1537 // so adjust and use t2LDRi12 here for that. 1538 unsigned NewOpc2 = (isLd) 1539 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1540 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1541 DebugLoc dl = MBBI->getDebugLoc(); 1542 // If this is a load and base register is killed, it may have been 1543 // re-defed by the load, make sure the first load does not clobber it. 1544 if (isLd && 1545 (BaseKill || OffKill) && 1546 (TRI->regsOverlap(EvenReg, BaseReg))) { 1547 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1548 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1549 OddReg, OddDeadKill, false, 1550 BaseReg, false, BaseUndef, false, OffUndef, 1551 Pred, PredReg, TII, isT2); 1552 NewBBI = std::prev(MBBI); 1553 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1554 EvenReg, EvenDeadKill, false, 1555 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1556 Pred, PredReg, TII, isT2); 1557 } else { 1558 if (OddReg == EvenReg && EvenDeadKill) { 1559 // If the two source operands are the same, the kill marker is 1560 // probably on the first one. e.g. 1561 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1562 EvenDeadKill = false; 1563 OddDeadKill = true; 1564 } 1565 // Never kill the base register in the first instruction. 1566 if (EvenReg == BaseReg) 1567 EvenDeadKill = false; 1568 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1569 EvenReg, EvenDeadKill, EvenUndef, 1570 BaseReg, false, BaseUndef, false, OffUndef, 1571 Pred, PredReg, TII, isT2); 1572 NewBBI = std::prev(MBBI); 1573 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1574 OddReg, OddDeadKill, OddUndef, 1575 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1576 Pred, PredReg, TII, isT2); 1577 } 1578 if (isLd) 1579 ++NumLDRD2LDR; 1580 else 1581 ++NumSTRD2STR; 1582 } 1583 1584 MBB.erase(MI); 1585 MBBI = NewBBI; 1586 return true; 1587 } 1588 return false; 1589 } 1590 1591 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 1592 /// ops of the same base and incrementing offset into LDM / STM ops. 1593 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1594 unsigned NumMerges = 0; 1595 unsigned NumMemOps = 0; 1596 MemOpQueue MemOps; 1597 unsigned CurrBase = 0; 1598 unsigned CurrOpc = ~0u; 1599 unsigned CurrSize = 0; 1600 ARMCC::CondCodes CurrPred = ARMCC::AL; 1601 unsigned CurrPredReg = 0; 1602 unsigned Position = 0; 1603 SmallVector<MachineBasicBlock::iterator,4> Merges; 1604 1605 RS->enterBasicBlock(&MBB); 1606 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1607 while (MBBI != E) { 1608 if (FixInvalidRegPairOp(MBB, MBBI)) 1609 continue; 1610 1611 bool Advance = false; 1612 bool TryMerge = false; 1613 1614 bool isMemOp = isMemoryOp(MBBI); 1615 if (isMemOp) { 1616 unsigned Opcode = MBBI->getOpcode(); 1617 unsigned Size = getLSMultipleTransferSize(MBBI); 1618 const MachineOperand &MO = MBBI->getOperand(0); 1619 unsigned Reg = MO.getReg(); 1620 bool isKill = MO.isDef() ? false : MO.isKill(); 1621 unsigned Base = MBBI->getOperand(1).getReg(); 1622 unsigned PredReg = 0; 1623 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 1624 int Offset = getMemoryOpOffset(MBBI); 1625 // Watch out for: 1626 // r4 := ldr [r5] 1627 // r5 := ldr [r5, #4] 1628 // r6 := ldr [r5, #8] 1629 // 1630 // The second ldr has effectively broken the chain even though it 1631 // looks like the later ldr(s) use the same base register. Try to 1632 // merge the ldr's so far, including this one. But don't try to 1633 // combine the following ldr(s). 1634 bool Clobber = isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg(); 1635 1636 // Watch out for: 1637 // r4 := ldr [r0, #8] 1638 // r4 := ldr [r0, #4] 1639 // 1640 // The optimization may reorder the second ldr in front of the first 1641 // ldr, which violates write after write(WAW) dependence. The same as 1642 // str. Try to merge inst(s) already in MemOps. 1643 bool Overlap = false; 1644 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) { 1645 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) { 1646 Overlap = true; 1647 break; 1648 } 1649 } 1650 1651 if (CurrBase == 0 && !Clobber) { 1652 // Start of a new chain. 1653 CurrBase = Base; 1654 CurrOpc = Opcode; 1655 CurrSize = Size; 1656 CurrPred = Pred; 1657 CurrPredReg = PredReg; 1658 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 1659 ++NumMemOps; 1660 Advance = true; 1661 } else if (!Overlap) { 1662 if (Clobber) { 1663 TryMerge = true; 1664 Advance = true; 1665 } 1666 1667 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1668 // No need to match PredReg. 1669 // Continue adding to the queue. 1670 if (Offset > MemOps.back().Offset) { 1671 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, 1672 Position, MBBI)); 1673 ++NumMemOps; 1674 Advance = true; 1675 } else { 1676 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1677 I != E; ++I) { 1678 if (Offset < I->Offset) { 1679 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, 1680 Position, MBBI)); 1681 ++NumMemOps; 1682 Advance = true; 1683 break; 1684 } else if (Offset == I->Offset) { 1685 // Collision! This can't be merged! 1686 break; 1687 } 1688 } 1689 } 1690 } 1691 } 1692 } 1693 1694 if (MBBI->isDebugValue()) { 1695 ++MBBI; 1696 if (MBBI == E) 1697 // Reach the end of the block, try merging the memory instructions. 1698 TryMerge = true; 1699 } else if (Advance) { 1700 ++Position; 1701 ++MBBI; 1702 if (MBBI == E) 1703 // Reach the end of the block, try merging the memory instructions. 1704 TryMerge = true; 1705 } else { 1706 TryMerge = true; 1707 } 1708 1709 if (TryMerge) { 1710 if (NumMemOps > 1) { 1711 // Try to find a free register to use as a new base in case it's needed. 1712 // First advance to the instruction just before the start of the chain. 1713 AdvanceRS(MBB, MemOps); 1714 1715 // Find a scratch register. 1716 unsigned Scratch = 1717 RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass); 1718 1719 // Process the load / store instructions. 1720 RS->forward(std::prev(MBBI)); 1721 1722 // Merge ops. 1723 Merges.clear(); 1724 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1725 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1726 1727 // Try folding preceding/trailing base inc/dec into the generated 1728 // LDM/STM ops. 1729 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1730 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1731 ++NumMerges; 1732 NumMerges += Merges.size(); 1733 1734 // Try folding preceding/trailing base inc/dec into those load/store 1735 // that were not merged to form LDM/STM ops. 1736 for (unsigned i = 0; i != NumMemOps; ++i) 1737 if (!MemOps[i].Merged) 1738 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1739 ++NumMerges; 1740 1741 // RS may be pointing to an instruction that's deleted. 1742 RS->skipTo(std::prev(MBBI)); 1743 } else if (NumMemOps == 1) { 1744 // Try folding preceding/trailing base inc/dec into the single 1745 // load/store. 1746 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1747 ++NumMerges; 1748 RS->forward(std::prev(MBBI)); 1749 } 1750 } 1751 1752 CurrBase = 0; 1753 CurrOpc = ~0u; 1754 CurrSize = 0; 1755 CurrPred = ARMCC::AL; 1756 CurrPredReg = 0; 1757 if (NumMemOps) { 1758 MemOps.clear(); 1759 NumMemOps = 0; 1760 } 1761 1762 // If iterator hasn't been advanced and this is not a memory op, skip it. 1763 // It can't start a new chain anyway. 1764 if (!Advance && !isMemOp && MBBI != E) { 1765 ++Position; 1766 ++MBBI; 1767 } 1768 } 1769 } 1770 return NumMerges > 0; 1771 } 1772 1773 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 1774 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it 1775 /// directly restore the value of LR into pc. 1776 /// ldmfd sp!, {..., lr} 1777 /// bx lr 1778 /// or 1779 /// ldmfd sp!, {..., lr} 1780 /// mov pc, lr 1781 /// => 1782 /// ldmfd sp!, {..., pc} 1783 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1784 // Thumb1 LDM doesn't allow high registers. 1785 if (isThumb1) return false; 1786 if (MBB.empty()) return false; 1787 1788 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1789 if (MBBI != MBB.begin() && 1790 (MBBI->getOpcode() == ARM::BX_RET || 1791 MBBI->getOpcode() == ARM::tBX_RET || 1792 MBBI->getOpcode() == ARM::MOVPCLR)) { 1793 MachineInstr *PrevMI = std::prev(MBBI); 1794 unsigned Opcode = PrevMI->getOpcode(); 1795 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1796 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1797 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1798 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1799 if (MO.getReg() != ARM::LR) 1800 return false; 1801 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1802 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1803 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1804 PrevMI->setDesc(TII->get(NewOpc)); 1805 MO.setReg(ARM::PC); 1806 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); 1807 MBB.erase(MBBI); 1808 return true; 1809 } 1810 } 1811 return false; 1812 } 1813 1814 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1815 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1816 TL = STI->getTargetLowering(); 1817 AFI = Fn.getInfo<ARMFunctionInfo>(); 1818 TII = STI->getInstrInfo(); 1819 TRI = STI->getRegisterInfo(); 1820 RS = new RegScavenger(); 1821 isThumb2 = AFI->isThumb2Function(); 1822 isThumb1 = AFI->isThumbFunction() && !isThumb2; 1823 1824 bool Modified = false; 1825 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1826 ++MFI) { 1827 MachineBasicBlock &MBB = *MFI; 1828 Modified |= LoadStoreMultipleOpti(MBB); 1829 if (STI->hasV5TOps()) 1830 Modified |= MergeReturnIntoLDM(MBB); 1831 } 1832 1833 delete RS; 1834 return Modified; 1835 } 1836 1837 1838 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1839 /// load / stores from consecutive locations close to make it more 1840 /// likely they will be combined later. 1841 1842 namespace { 1843 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1844 static char ID; 1845 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1846 1847 const DataLayout *TD; 1848 const TargetInstrInfo *TII; 1849 const TargetRegisterInfo *TRI; 1850 const ARMSubtarget *STI; 1851 MachineRegisterInfo *MRI; 1852 MachineFunction *MF; 1853 1854 bool runOnMachineFunction(MachineFunction &Fn) override; 1855 1856 const char *getPassName() const override { 1857 return "ARM pre- register allocation load / store optimization pass"; 1858 } 1859 1860 private: 1861 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1862 unsigned &NewOpc, unsigned &EvenReg, 1863 unsigned &OddReg, unsigned &BaseReg, 1864 int &Offset, 1865 unsigned &PredReg, ARMCC::CondCodes &Pred, 1866 bool &isT2); 1867 bool RescheduleOps(MachineBasicBlock *MBB, 1868 SmallVectorImpl<MachineInstr *> &Ops, 1869 unsigned Base, bool isLd, 1870 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1871 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1872 }; 1873 char ARMPreAllocLoadStoreOpt::ID = 0; 1874 } 1875 1876 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1877 TD = Fn.getTarget().getDataLayout(); 1878 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1879 TII = STI->getInstrInfo(); 1880 TRI = STI->getRegisterInfo(); 1881 MRI = &Fn.getRegInfo(); 1882 MF = &Fn; 1883 1884 bool Modified = false; 1885 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1886 ++MFI) 1887 Modified |= RescheduleLoadStoreInstrs(MFI); 1888 1889 return Modified; 1890 } 1891 1892 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1893 MachineBasicBlock::iterator I, 1894 MachineBasicBlock::iterator E, 1895 SmallPtrSetImpl<MachineInstr*> &MemOps, 1896 SmallSet<unsigned, 4> &MemRegs, 1897 const TargetRegisterInfo *TRI) { 1898 // Are there stores / loads / calls between them? 1899 // FIXME: This is overly conservative. We should make use of alias information 1900 // some day. 1901 SmallSet<unsigned, 4> AddedRegPressure; 1902 while (++I != E) { 1903 if (I->isDebugValue() || MemOps.count(&*I)) 1904 continue; 1905 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1906 return false; 1907 if (isLd && I->mayStore()) 1908 return false; 1909 if (!isLd) { 1910 if (I->mayLoad()) 1911 return false; 1912 // It's not safe to move the first 'str' down. 1913 // str r1, [r0] 1914 // strh r5, [r0] 1915 // str r4, [r0, #+4] 1916 if (I->mayStore()) 1917 return false; 1918 } 1919 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1920 MachineOperand &MO = I->getOperand(j); 1921 if (!MO.isReg()) 1922 continue; 1923 unsigned Reg = MO.getReg(); 1924 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1925 return false; 1926 if (Reg != Base && !MemRegs.count(Reg)) 1927 AddedRegPressure.insert(Reg); 1928 } 1929 } 1930 1931 // Estimate register pressure increase due to the transformation. 1932 if (MemRegs.size() <= 4) 1933 // Ok if we are moving small number of instructions. 1934 return true; 1935 return AddedRegPressure.size() <= MemRegs.size() * 2; 1936 } 1937 1938 1939 /// Copy Op0 and Op1 operands into a new array assigned to MI. 1940 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1941 MachineInstr *Op1) { 1942 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1943 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1944 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1945 1946 MachineFunction *MF = MI->getParent()->getParent(); 1947 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1948 MachineSDNode::mmo_iterator MemEnd = 1949 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1950 MemEnd = 1951 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1952 MI->setMemRefs(MemBegin, MemEnd); 1953 } 1954 1955 bool 1956 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1957 DebugLoc &dl, 1958 unsigned &NewOpc, unsigned &EvenReg, 1959 unsigned &OddReg, unsigned &BaseReg, 1960 int &Offset, unsigned &PredReg, 1961 ARMCC::CondCodes &Pred, 1962 bool &isT2) { 1963 // Make sure we're allowed to generate LDRD/STRD. 1964 if (!STI->hasV5TEOps()) 1965 return false; 1966 1967 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1968 unsigned Scale = 1; 1969 unsigned Opcode = Op0->getOpcode(); 1970 if (Opcode == ARM::LDRi12) { 1971 NewOpc = ARM::LDRD; 1972 } else if (Opcode == ARM::STRi12) { 1973 NewOpc = ARM::STRD; 1974 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1975 NewOpc = ARM::t2LDRDi8; 1976 Scale = 4; 1977 isT2 = true; 1978 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1979 NewOpc = ARM::t2STRDi8; 1980 Scale = 4; 1981 isT2 = true; 1982 } else { 1983 return false; 1984 } 1985 1986 // Make sure the base address satisfies i64 ld / st alignment requirement. 1987 // At the moment, we ignore the memoryoperand's value. 1988 // If we want to use AliasAnalysis, we should check it accordingly. 1989 if (!Op0->hasOneMemOperand() || 1990 (*Op0->memoperands_begin())->isVolatile()) 1991 return false; 1992 1993 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1994 const Function *Func = MF->getFunction(); 1995 unsigned ReqAlign = STI->hasV6Ops() 1996 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1997 : 8; // Pre-v6 need 8-byte align 1998 if (Align < ReqAlign) 1999 return false; 2000 2001 // Then make sure the immediate offset fits. 2002 int OffImm = getMemoryOpOffset(Op0); 2003 if (isT2) { 2004 int Limit = (1 << 8) * Scale; 2005 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 2006 return false; 2007 Offset = OffImm; 2008 } else { 2009 ARM_AM::AddrOpc AddSub = ARM_AM::add; 2010 if (OffImm < 0) { 2011 AddSub = ARM_AM::sub; 2012 OffImm = - OffImm; 2013 } 2014 int Limit = (1 << 8) * Scale; 2015 if (OffImm >= Limit || (OffImm & (Scale-1))) 2016 return false; 2017 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 2018 } 2019 EvenReg = Op0->getOperand(0).getReg(); 2020 OddReg = Op1->getOperand(0).getReg(); 2021 if (EvenReg == OddReg) 2022 return false; 2023 BaseReg = Op0->getOperand(1).getReg(); 2024 Pred = getInstrPredicate(Op0, PredReg); 2025 dl = Op0->getDebugLoc(); 2026 return true; 2027 } 2028 2029 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 2030 SmallVectorImpl<MachineInstr *> &Ops, 2031 unsigned Base, bool isLd, 2032 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 2033 bool RetVal = false; 2034 2035 // Sort by offset (in reverse order). 2036 std::sort(Ops.begin(), Ops.end(), 2037 [](const MachineInstr *LHS, const MachineInstr *RHS) { 2038 int LOffset = getMemoryOpOffset(LHS); 2039 int ROffset = getMemoryOpOffset(RHS); 2040 assert(LHS == RHS || LOffset != ROffset); 2041 return LOffset > ROffset; 2042 }); 2043 2044 // The loads / stores of the same base are in order. Scan them from first to 2045 // last and check for the following: 2046 // 1. Any def of base. 2047 // 2. Any gaps. 2048 while (Ops.size() > 1) { 2049 unsigned FirstLoc = ~0U; 2050 unsigned LastLoc = 0; 2051 MachineInstr *FirstOp = nullptr; 2052 MachineInstr *LastOp = nullptr; 2053 int LastOffset = 0; 2054 unsigned LastOpcode = 0; 2055 unsigned LastBytes = 0; 2056 unsigned NumMove = 0; 2057 for (int i = Ops.size() - 1; i >= 0; --i) { 2058 MachineInstr *Op = Ops[i]; 2059 unsigned Loc = MI2LocMap[Op]; 2060 if (Loc <= FirstLoc) { 2061 FirstLoc = Loc; 2062 FirstOp = Op; 2063 } 2064 if (Loc >= LastLoc) { 2065 LastLoc = Loc; 2066 LastOp = Op; 2067 } 2068 2069 unsigned LSMOpcode 2070 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 2071 if (LastOpcode && LSMOpcode != LastOpcode) 2072 break; 2073 2074 int Offset = getMemoryOpOffset(Op); 2075 unsigned Bytes = getLSMultipleTransferSize(Op); 2076 if (LastBytes) { 2077 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 2078 break; 2079 } 2080 LastOffset = Offset; 2081 LastBytes = Bytes; 2082 LastOpcode = LSMOpcode; 2083 if (++NumMove == 8) // FIXME: Tune this limit. 2084 break; 2085 } 2086 2087 if (NumMove <= 1) 2088 Ops.pop_back(); 2089 else { 2090 SmallPtrSet<MachineInstr*, 4> MemOps; 2091 SmallSet<unsigned, 4> MemRegs; 2092 for (int i = NumMove-1; i >= 0; --i) { 2093 MemOps.insert(Ops[i]); 2094 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 2095 } 2096 2097 // Be conservative, if the instructions are too far apart, don't 2098 // move them. We want to limit the increase of register pressure. 2099 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 2100 if (DoMove) 2101 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 2102 MemOps, MemRegs, TRI); 2103 if (!DoMove) { 2104 for (unsigned i = 0; i != NumMove; ++i) 2105 Ops.pop_back(); 2106 } else { 2107 // This is the new location for the loads / stores. 2108 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 2109 while (InsertPos != MBB->end() 2110 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 2111 ++InsertPos; 2112 2113 // If we are moving a pair of loads / stores, see if it makes sense 2114 // to try to allocate a pair of registers that can form register pairs. 2115 MachineInstr *Op0 = Ops.back(); 2116 MachineInstr *Op1 = Ops[Ops.size()-2]; 2117 unsigned EvenReg = 0, OddReg = 0; 2118 unsigned BaseReg = 0, PredReg = 0; 2119 ARMCC::CondCodes Pred = ARMCC::AL; 2120 bool isT2 = false; 2121 unsigned NewOpc = 0; 2122 int Offset = 0; 2123 DebugLoc dl; 2124 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 2125 EvenReg, OddReg, BaseReg, 2126 Offset, PredReg, Pred, isT2)) { 2127 Ops.pop_back(); 2128 Ops.pop_back(); 2129 2130 const MCInstrDesc &MCID = TII->get(NewOpc); 2131 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 2132 MRI->constrainRegClass(EvenReg, TRC); 2133 MRI->constrainRegClass(OddReg, TRC); 2134 2135 // Form the pair instruction. 2136 if (isLd) { 2137 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2138 .addReg(EvenReg, RegState::Define) 2139 .addReg(OddReg, RegState::Define) 2140 .addReg(BaseReg); 2141 // FIXME: We're converting from LDRi12 to an insn that still 2142 // uses addrmode2, so we need an explicit offset reg. It should 2143 // always by reg0 since we're transforming LDRi12s. 2144 if (!isT2) 2145 MIB.addReg(0); 2146 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2147 concatenateMemOperands(MIB, Op0, Op1); 2148 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2149 ++NumLDRDFormed; 2150 } else { 2151 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2152 .addReg(EvenReg) 2153 .addReg(OddReg) 2154 .addReg(BaseReg); 2155 // FIXME: We're converting from LDRi12 to an insn that still 2156 // uses addrmode2, so we need an explicit offset reg. It should 2157 // always by reg0 since we're transforming STRi12s. 2158 if (!isT2) 2159 MIB.addReg(0); 2160 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2161 concatenateMemOperands(MIB, Op0, Op1); 2162 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2163 ++NumSTRDFormed; 2164 } 2165 MBB->erase(Op0); 2166 MBB->erase(Op1); 2167 2168 // Add register allocation hints to form register pairs. 2169 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 2170 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 2171 } else { 2172 for (unsigned i = 0; i != NumMove; ++i) { 2173 MachineInstr *Op = Ops.back(); 2174 Ops.pop_back(); 2175 MBB->splice(InsertPos, MBB, Op); 2176 } 2177 } 2178 2179 NumLdStMoved += NumMove; 2180 RetVal = true; 2181 } 2182 } 2183 } 2184 2185 return RetVal; 2186 } 2187 2188 bool 2189 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 2190 bool RetVal = false; 2191 2192 DenseMap<MachineInstr*, unsigned> MI2LocMap; 2193 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 2194 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 2195 SmallVector<unsigned, 4> LdBases; 2196 SmallVector<unsigned, 4> StBases; 2197 2198 unsigned Loc = 0; 2199 MachineBasicBlock::iterator MBBI = MBB->begin(); 2200 MachineBasicBlock::iterator E = MBB->end(); 2201 while (MBBI != E) { 2202 for (; MBBI != E; ++MBBI) { 2203 MachineInstr *MI = MBBI; 2204 if (MI->isCall() || MI->isTerminator()) { 2205 // Stop at barriers. 2206 ++MBBI; 2207 break; 2208 } 2209 2210 if (!MI->isDebugValue()) 2211 MI2LocMap[MI] = ++Loc; 2212 2213 if (!isMemoryOp(MI)) 2214 continue; 2215 unsigned PredReg = 0; 2216 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 2217 continue; 2218 2219 int Opc = MI->getOpcode(); 2220 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 2221 unsigned Base = MI->getOperand(1).getReg(); 2222 int Offset = getMemoryOpOffset(MI); 2223 2224 bool StopHere = false; 2225 if (isLd) { 2226 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2227 Base2LdsMap.find(Base); 2228 if (BI != Base2LdsMap.end()) { 2229 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2230 if (Offset == getMemoryOpOffset(BI->second[i])) { 2231 StopHere = true; 2232 break; 2233 } 2234 } 2235 if (!StopHere) 2236 BI->second.push_back(MI); 2237 } else { 2238 Base2LdsMap[Base].push_back(MI); 2239 LdBases.push_back(Base); 2240 } 2241 } else { 2242 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2243 Base2StsMap.find(Base); 2244 if (BI != Base2StsMap.end()) { 2245 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2246 if (Offset == getMemoryOpOffset(BI->second[i])) { 2247 StopHere = true; 2248 break; 2249 } 2250 } 2251 if (!StopHere) 2252 BI->second.push_back(MI); 2253 } else { 2254 Base2StsMap[Base].push_back(MI); 2255 StBases.push_back(Base); 2256 } 2257 } 2258 2259 if (StopHere) { 2260 // Found a duplicate (a base+offset combination that's seen earlier). 2261 // Backtrack. 2262 --Loc; 2263 break; 2264 } 2265 } 2266 2267 // Re-schedule loads. 2268 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 2269 unsigned Base = LdBases[i]; 2270 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 2271 if (Lds.size() > 1) 2272 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 2273 } 2274 2275 // Re-schedule stores. 2276 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 2277 unsigned Base = StBases[i]; 2278 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 2279 if (Sts.size() > 1) 2280 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 2281 } 2282 2283 if (MBBI != E) { 2284 Base2LdsMap.clear(); 2285 Base2StsMap.clear(); 2286 LdBases.clear(); 2287 StBases.clear(); 2288 } 2289 } 2290 2291 return RetVal; 2292 } 2293 2294 2295 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store 2296 /// optimization pass. 2297 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 2298 if (PreAlloc) 2299 return new ARMPreAllocLoadStoreOpt(); 2300 return new ARMLoadStoreOpt(); 2301 } 2302