1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass that performs load / store related peephole 11 // optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ARM.h" 16 #include "ARMBaseInstrInfo.h" 17 #include "ARMBaseRegisterInfo.h" 18 #include "ARMISelLowering.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMSubtarget.h" 21 #include "MCTargetDesc/ARMAddressingModes.h" 22 #include "Thumb1RegisterInfo.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallSet.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/Statistic.h" 29 #include "llvm/CodeGen/MachineBasicBlock.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include "llvm/CodeGen/MachineInstr.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/RegisterScavenging.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/IR/DataLayout.h" 37 #include "llvm/IR/DerivedTypes.h" 38 #include "llvm/IR/Function.h" 39 #include "llvm/Support/Debug.h" 40 #include "llvm/Support/ErrorHandling.h" 41 #include "llvm/Target/TargetInstrInfo.h" 42 #include "llvm/Target/TargetMachine.h" 43 #include "llvm/Target/TargetRegisterInfo.h" 44 using namespace llvm; 45 46 #define DEBUG_TYPE "arm-ldst-opt" 47 48 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 49 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 50 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 51 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 52 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 53 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 54 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 55 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 56 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 57 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 58 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 59 60 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 61 /// load / store instructions to form ldm / stm instructions. 62 63 namespace { 64 struct ARMLoadStoreOpt : public MachineFunctionPass { 65 static char ID; 66 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 67 68 const TargetInstrInfo *TII; 69 const TargetRegisterInfo *TRI; 70 const ARMSubtarget *STI; 71 const TargetLowering *TL; 72 ARMFunctionInfo *AFI; 73 RegScavenger *RS; 74 bool isThumb1, isThumb2; 75 76 bool runOnMachineFunction(MachineFunction &Fn) override; 77 78 const char *getPassName() const override { 79 return "ARM load / store optimization pass"; 80 } 81 82 private: 83 struct MemOpQueueEntry { 84 int Offset; 85 unsigned Reg; 86 bool isKill; 87 unsigned Position; 88 MachineBasicBlock::iterator MBBI; 89 bool Merged; 90 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 91 MachineBasicBlock::iterator i) 92 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 93 }; 94 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 95 typedef MemOpQueue::iterator MemOpQueueIter; 96 97 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, 98 const MemOpQueue &MemOps, unsigned DefReg, 99 unsigned RangeBegin, unsigned RangeEnd); 100 void UpdateBaseRegUses(MachineBasicBlock &MBB, 101 MachineBasicBlock::iterator MBBI, 102 DebugLoc dl, unsigned Base, unsigned WordOffset, 103 ARMCC::CondCodes Pred, unsigned PredReg); 104 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 105 int Offset, unsigned Base, bool BaseKill, int Opcode, 106 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 107 DebugLoc dl, 108 ArrayRef<std::pair<unsigned, bool> > Regs, 109 ArrayRef<unsigned> ImpDefs); 110 void MergeOpsUpdate(MachineBasicBlock &MBB, 111 MemOpQueue &MemOps, 112 unsigned memOpsBegin, 113 unsigned memOpsEnd, 114 unsigned insertAfter, 115 int Offset, 116 unsigned Base, 117 bool BaseKill, 118 int Opcode, 119 ARMCC::CondCodes Pred, 120 unsigned PredReg, 121 unsigned Scratch, 122 DebugLoc dl, 123 SmallVectorImpl<MachineBasicBlock::iterator> &Merges); 124 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 125 int Opcode, unsigned Size, 126 ARMCC::CondCodes Pred, unsigned PredReg, 127 unsigned Scratch, MemOpQueue &MemOps, 128 SmallVectorImpl<MachineBasicBlock::iterator> &Merges); 129 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 130 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 131 MachineBasicBlock::iterator &MBBI); 132 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 133 MachineBasicBlock::iterator MBBI, 134 const TargetInstrInfo *TII, 135 bool &Advance, 136 MachineBasicBlock::iterator &I); 137 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 138 MachineBasicBlock::iterator MBBI, 139 bool &Advance, 140 MachineBasicBlock::iterator &I); 141 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 142 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 143 }; 144 char ARMLoadStoreOpt::ID = 0; 145 } 146 147 static bool definesCPSR(const MachineInstr *MI) { 148 for (const auto &MO : MI->operands()) { 149 if (!MO.isReg()) 150 continue; 151 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 152 // If the instruction has live CPSR def, then it's not safe to fold it 153 // into load / store. 154 return true; 155 } 156 157 return false; 158 } 159 160 static int getMemoryOpOffset(const MachineInstr *MI) { 161 int Opcode = MI->getOpcode(); 162 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 163 unsigned NumOperands = MI->getDesc().getNumOperands(); 164 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 165 166 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 167 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 168 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 169 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 170 return OffField; 171 172 // Thumb1 immediate offsets are scaled by 4 173 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi || 174 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) 175 return OffField * 4; 176 177 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 178 : ARM_AM::getAM5Offset(OffField) * 4; 179 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField) 180 : ARM_AM::getAM5Op(OffField); 181 182 if (Op == ARM_AM::sub) 183 return -Offset; 184 185 return Offset; 186 } 187 188 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { 189 switch (Opcode) { 190 default: llvm_unreachable("Unhandled opcode!"); 191 case ARM::LDRi12: 192 ++NumLDMGened; 193 switch (Mode) { 194 default: llvm_unreachable("Unhandled submode!"); 195 case ARM_AM::ia: return ARM::LDMIA; 196 case ARM_AM::da: return ARM::LDMDA; 197 case ARM_AM::db: return ARM::LDMDB; 198 case ARM_AM::ib: return ARM::LDMIB; 199 } 200 case ARM::STRi12: 201 ++NumSTMGened; 202 switch (Mode) { 203 default: llvm_unreachable("Unhandled submode!"); 204 case ARM_AM::ia: return ARM::STMIA; 205 case ARM_AM::da: return ARM::STMDA; 206 case ARM_AM::db: return ARM::STMDB; 207 case ARM_AM::ib: return ARM::STMIB; 208 } 209 case ARM::tLDRi: 210 case ARM::tLDRspi: 211 // tLDMIA is writeback-only - unless the base register is in the input 212 // reglist. 213 ++NumLDMGened; 214 switch (Mode) { 215 default: llvm_unreachable("Unhandled submode!"); 216 case ARM_AM::ia: return ARM::tLDMIA; 217 } 218 case ARM::tSTRi: 219 case ARM::tSTRspi: 220 // There is no non-writeback tSTMIA either. 221 ++NumSTMGened; 222 switch (Mode) { 223 default: llvm_unreachable("Unhandled submode!"); 224 case ARM_AM::ia: return ARM::tSTMIA_UPD; 225 } 226 case ARM::t2LDRi8: 227 case ARM::t2LDRi12: 228 ++NumLDMGened; 229 switch (Mode) { 230 default: llvm_unreachable("Unhandled submode!"); 231 case ARM_AM::ia: return ARM::t2LDMIA; 232 case ARM_AM::db: return ARM::t2LDMDB; 233 } 234 case ARM::t2STRi8: 235 case ARM::t2STRi12: 236 ++NumSTMGened; 237 switch (Mode) { 238 default: llvm_unreachable("Unhandled submode!"); 239 case ARM_AM::ia: return ARM::t2STMIA; 240 case ARM_AM::db: return ARM::t2STMDB; 241 } 242 case ARM::VLDRS: 243 ++NumVLDMGened; 244 switch (Mode) { 245 default: llvm_unreachable("Unhandled submode!"); 246 case ARM_AM::ia: return ARM::VLDMSIA; 247 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 248 } 249 case ARM::VSTRS: 250 ++NumVSTMGened; 251 switch (Mode) { 252 default: llvm_unreachable("Unhandled submode!"); 253 case ARM_AM::ia: return ARM::VSTMSIA; 254 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 255 } 256 case ARM::VLDRD: 257 ++NumVLDMGened; 258 switch (Mode) { 259 default: llvm_unreachable("Unhandled submode!"); 260 case ARM_AM::ia: return ARM::VLDMDIA; 261 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 262 } 263 case ARM::VSTRD: 264 ++NumVSTMGened; 265 switch (Mode) { 266 default: llvm_unreachable("Unhandled submode!"); 267 case ARM_AM::ia: return ARM::VSTMDIA; 268 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 269 } 270 } 271 } 272 273 namespace llvm { 274 namespace ARM_AM { 275 276 AMSubMode getLoadStoreMultipleSubMode(int Opcode) { 277 switch (Opcode) { 278 default: llvm_unreachable("Unhandled opcode!"); 279 case ARM::LDMIA_RET: 280 case ARM::LDMIA: 281 case ARM::LDMIA_UPD: 282 case ARM::STMIA: 283 case ARM::STMIA_UPD: 284 case ARM::tLDMIA: 285 case ARM::tLDMIA_UPD: 286 case ARM::tSTMIA_UPD: 287 case ARM::t2LDMIA_RET: 288 case ARM::t2LDMIA: 289 case ARM::t2LDMIA_UPD: 290 case ARM::t2STMIA: 291 case ARM::t2STMIA_UPD: 292 case ARM::VLDMSIA: 293 case ARM::VLDMSIA_UPD: 294 case ARM::VSTMSIA: 295 case ARM::VSTMSIA_UPD: 296 case ARM::VLDMDIA: 297 case ARM::VLDMDIA_UPD: 298 case ARM::VSTMDIA: 299 case ARM::VSTMDIA_UPD: 300 return ARM_AM::ia; 301 302 case ARM::LDMDA: 303 case ARM::LDMDA_UPD: 304 case ARM::STMDA: 305 case ARM::STMDA_UPD: 306 return ARM_AM::da; 307 308 case ARM::LDMDB: 309 case ARM::LDMDB_UPD: 310 case ARM::STMDB: 311 case ARM::STMDB_UPD: 312 case ARM::t2LDMDB: 313 case ARM::t2LDMDB_UPD: 314 case ARM::t2STMDB: 315 case ARM::t2STMDB_UPD: 316 case ARM::VLDMSDB_UPD: 317 case ARM::VSTMSDB_UPD: 318 case ARM::VLDMDDB_UPD: 319 case ARM::VSTMDDB_UPD: 320 return ARM_AM::db; 321 322 case ARM::LDMIB: 323 case ARM::LDMIB_UPD: 324 case ARM::STMIB: 325 case ARM::STMIB_UPD: 326 return ARM_AM::ib; 327 } 328 } 329 330 } // end namespace ARM_AM 331 } // end namespace llvm 332 333 static bool isT1i32Load(unsigned Opc) { 334 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi; 335 } 336 337 static bool isT2i32Load(unsigned Opc) { 338 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 339 } 340 341 static bool isi32Load(unsigned Opc) { 342 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; 343 } 344 345 static bool isT1i32Store(unsigned Opc) { 346 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi; 347 } 348 349 static bool isT2i32Store(unsigned Opc) { 350 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 351 } 352 353 static bool isi32Store(unsigned Opc) { 354 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); 355 } 356 357 static unsigned getImmScale(unsigned Opc) { 358 switch (Opc) { 359 default: llvm_unreachable("Unhandled opcode!"); 360 case ARM::tLDRi: 361 case ARM::tSTRi: 362 case ARM::tLDRspi: 363 case ARM::tSTRspi: 364 return 1; 365 case ARM::tLDRHi: 366 case ARM::tSTRHi: 367 return 2; 368 case ARM::tLDRBi: 369 case ARM::tSTRBi: 370 return 4; 371 } 372 } 373 374 /// Update future uses of the base register with the offset introduced 375 /// due to writeback. This function only works on Thumb1. 376 void 377 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, 378 MachineBasicBlock::iterator MBBI, 379 DebugLoc dl, unsigned Base, 380 unsigned WordOffset, 381 ARMCC::CondCodes Pred, unsigned PredReg) { 382 assert(isThumb1 && "Can only update base register uses for Thumb1!"); 383 // Start updating any instructions with immediate offsets. Insert a SUB before 384 // the first non-updateable instruction (if any). 385 for (; MBBI != MBB.end(); ++MBBI) { 386 bool InsertSub = false; 387 unsigned Opc = MBBI->getOpcode(); 388 389 if (MBBI->readsRegister(Base)) { 390 int Offset; 391 bool IsLoad = 392 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi; 393 bool IsStore = 394 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi; 395 396 if (IsLoad || IsStore) { 397 // Loads and stores with immediate offsets can be updated, but only if 398 // the new offset isn't negative. 399 // The MachineOperand containing the offset immediate is the last one 400 // before predicates. 401 MachineOperand &MO = 402 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 403 // The offsets are scaled by 1, 2 or 4 depending on the Opcode. 404 Offset = MO.getImm() - WordOffset * getImmScale(Opc); 405 406 // If storing the base register, it needs to be reset first. 407 unsigned InstrSrcReg = MBBI->getOperand(0).getReg(); 408 409 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) 410 MO.setImm(Offset); 411 else 412 InsertSub = true; 413 414 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && 415 !definesCPSR(MBBI)) { 416 // SUBS/ADDS using this register, with a dead def of the CPSR. 417 // Merge it with the update; if the merged offset is too large, 418 // insert a new sub instead. 419 MachineOperand &MO = 420 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 421 Offset = (Opc == ARM::tSUBi8) ? 422 MO.getImm() + WordOffset * 4 : 423 MO.getImm() - WordOffset * 4 ; 424 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) { 425 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if 426 // Offset == 0. 427 MO.setImm(Offset); 428 // The base register has now been reset, so exit early. 429 return; 430 } else { 431 InsertSub = true; 432 } 433 434 } else { 435 // Can't update the instruction. 436 InsertSub = true; 437 } 438 439 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { 440 // Since SUBS sets the condition flags, we can't place the base reset 441 // after an instruction that has a live CPSR def. 442 // The base register might also contain an argument for a function call. 443 InsertSub = true; 444 } 445 446 if (InsertSub) { 447 // An instruction above couldn't be updated, so insert a sub. 448 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true) 449 .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4) 450 .addImm(Pred).addReg(PredReg); 451 return; 452 } 453 454 if (MBBI->killsRegister(Base)) 455 // Register got killed. Stop updating. 456 return; 457 } 458 459 // End of block was reached. 460 if (MBB.succ_size() > 0) { 461 // FIXME: Because of a bug, live registers are sometimes missing from 462 // the successor blocks' live-in sets. This means we can't trust that 463 // information and *always* have to reset at the end of a block. 464 // See PR21029. 465 if (MBBI != MBB.end()) --MBBI; 466 AddDefaultT1CC( 467 BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true) 468 .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4) 469 .addImm(Pred).addReg(PredReg); 470 } 471 } 472 473 /// MergeOps - Create and insert a LDM or STM with Base as base register and 474 /// registers in Regs as the register operands that would be loaded / stored. 475 /// It returns true if the transformation is done. 476 bool 477 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 478 MachineBasicBlock::iterator MBBI, 479 int Offset, unsigned Base, bool BaseKill, 480 int Opcode, ARMCC::CondCodes Pred, 481 unsigned PredReg, unsigned Scratch, DebugLoc dl, 482 ArrayRef<std::pair<unsigned, bool> > Regs, 483 ArrayRef<unsigned> ImpDefs) { 484 // Only a single register to load / store. Don't bother. 485 unsigned NumRegs = Regs.size(); 486 if (NumRegs <= 1) 487 return false; 488 489 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. 490 // Compute liveness information for that register to make the decision. 491 bool SafeToClobberCPSR = !isThumb1 || 492 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) == 493 MachineBasicBlock::LQR_Dead); 494 495 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. 496 497 // Exception: If the base register is in the input reglist, Thumb1 LDM is 498 // non-writeback. 499 // It's also not possible to merge an STR of the base register in Thumb1. 500 if (isThumb1) 501 for (unsigned I = 0; I < NumRegs; ++I) 502 if (Base == Regs[I].first) { 503 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); 504 if (Opcode == ARM::tLDRi) { 505 Writeback = false; 506 break; 507 } else if (Opcode == ARM::tSTRi) { 508 return false; 509 } 510 } 511 512 ARM_AM::AMSubMode Mode = ARM_AM::ia; 513 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. 514 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 515 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; 516 517 if (Offset == 4 && haveIBAndDA) { 518 Mode = ARM_AM::ib; 519 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { 520 Mode = ARM_AM::da; 521 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { 522 // VLDM/VSTM do not support DB mode without also updating the base reg. 523 Mode = ARM_AM::db; 524 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { 525 // Check if this is a supported opcode before inserting instructions to 526 // calculate a new base register. 527 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; 528 529 // If starting offset isn't zero, insert a MI to materialize a new base. 530 // But only do so if it is cost effective, i.e. merging more than two 531 // loads / stores. 532 if (NumRegs <= 2) 533 return false; 534 535 // On Thumb1, it's not worth materializing a new base register without 536 // clobbering the CPSR (i.e. not using ADDS/SUBS). 537 if (!SafeToClobberCPSR) 538 return false; 539 540 unsigned NewBase; 541 if (isi32Load(Opcode)) { 542 // If it is a load, then just use one of the destination register to 543 // use as the new base. 544 NewBase = Regs[NumRegs-1].first; 545 } else { 546 // Use the scratch register to use as a new base. 547 NewBase = Scratch; 548 if (NewBase == 0) 549 return false; 550 } 551 552 int BaseOpc = 553 isThumb2 ? ARM::t2ADDri : 554 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi : 555 (isThumb1 && Offset < 8) ? ARM::tADDi3 : 556 isThumb1 ? ARM::tADDi8 : ARM::ADDri; 557 558 if (Offset < 0) { 559 Offset = - Offset; 560 BaseOpc = 561 isThumb2 ? ARM::t2SUBri : 562 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 : 563 isThumb1 ? ARM::tSUBi8 : ARM::SUBri; 564 } 565 566 if (!TL->isLegalAddImmediate(Offset)) 567 // FIXME: Try add with register operand? 568 return false; // Probably not worth it then. 569 570 if (isThumb1) { 571 // Thumb1: depending on immediate size, use either 572 // ADDS NewBase, Base, #imm3 573 // or 574 // MOV NewBase, Base 575 // ADDS NewBase, #imm8. 576 if (Base != NewBase && 577 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) { 578 // Need to insert a MOV to the new base first. 579 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && 580 !STI->hasV6Ops()) { 581 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr 582 if (Pred != ARMCC::AL) 583 return false; 584 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase) 585 .addReg(Base, getKillRegState(BaseKill)); 586 } else 587 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) 588 .addReg(Base, getKillRegState(BaseKill)) 589 .addImm(Pred).addReg(PredReg); 590 591 // Set up BaseKill and Base correctly to insert the ADDS/SUBS below. 592 Base = NewBase; 593 BaseKill = false; 594 } 595 if (BaseOpc == ARM::tADDrSPi) { 596 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); 597 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 598 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4) 599 .addImm(Pred).addReg(PredReg); 600 } else 601 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true) 602 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 603 .addImm(Pred).addReg(PredReg); 604 } else { 605 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 606 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 607 .addImm(Pred).addReg(PredReg).addReg(0); 608 } 609 Base = NewBase; 610 BaseKill = true; // New base is always killed straight away. 611 } 612 613 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 614 Opcode == ARM::VLDRD); 615 616 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with 617 // base register writeback. 618 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 619 if (!Opcode) return false; 620 621 // Check if a Thumb1 LDM/STM merge is safe. This is the case if: 622 // - There is no writeback (LDM of base register), 623 // - the base register is killed by the merged instruction, 624 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS 625 // to reset the base register. 626 // Otherwise, don't merge. 627 // It's safe to return here since the code to materialize a new base register 628 // above is also conditional on SafeToClobberCPSR. 629 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) 630 return false; 631 632 MachineInstrBuilder MIB; 633 634 if (Writeback) { 635 if (Opcode == ARM::tLDMIA) 636 // Update tLDMIA with writeback if necessary. 637 Opcode = ARM::tLDMIA_UPD; 638 639 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); 640 641 // Thumb1: we might need to set base writeback when building the MI. 642 MIB.addReg(Base, getDefRegState(true)) 643 .addReg(Base, getKillRegState(BaseKill)); 644 645 // The base isn't dead after a merged instruction with writeback. 646 // Insert a sub instruction after the newly formed instruction to reset. 647 if (!BaseKill) 648 UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg); 649 650 } else { 651 // No writeback, simply build the MachineInstr. 652 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); 653 MIB.addReg(Base, getKillRegState(BaseKill)); 654 } 655 656 MIB.addImm(Pred).addReg(PredReg); 657 658 for (unsigned i = 0; i != NumRegs; ++i) 659 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 660 | getKillRegState(Regs[i].second)); 661 662 // Add implicit defs for super-registers. 663 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i) 664 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine); 665 666 return true; 667 } 668 669 /// \brief Find all instructions using a given imp-def within a range. 670 /// 671 /// We are trying to combine a range of instructions, one of which (located at 672 /// position RangeBegin) implicitly defines a register. The final LDM/STM will 673 /// be placed at RangeEnd, and so any uses of this definition between RangeStart 674 /// and RangeEnd must be modified to use an undefined value. 675 /// 676 /// The live range continues until we find a second definition or one of the 677 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so 678 /// we must consider all uses and decide which are relevant in a second pass. 679 void ARMLoadStoreOpt::findUsesOfImpDef( 680 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps, 681 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) { 682 std::map<unsigned, MachineOperand *> Uses; 683 unsigned LastLivePos = RangeEnd; 684 685 // First we find all uses of this register with Position between RangeBegin 686 // and RangeEnd, any or all of these could be uses of a definition at 687 // RangeBegin. We also record the latest position a definition at RangeBegin 688 // would be considered live. 689 for (unsigned i = 0; i < MemOps.size(); ++i) { 690 MachineInstr &MI = *MemOps[i].MBBI; 691 unsigned MIPosition = MemOps[i].Position; 692 if (MIPosition <= RangeBegin || MIPosition > RangeEnd) 693 continue; 694 695 // If this instruction defines the register, then any later use will be of 696 // that definition rather than ours. 697 if (MI.definesRegister(DefReg)) 698 LastLivePos = std::min(LastLivePos, MIPosition); 699 700 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg); 701 if (!UseOp) 702 continue; 703 704 // If this instruction kills the register then (assuming liveness is 705 // correct when we start) we don't need to think about anything after here. 706 if (UseOp->isKill()) 707 LastLivePos = std::min(LastLivePos, MIPosition); 708 709 Uses[MIPosition] = UseOp; 710 } 711 712 // Now we traverse the list of all uses, and append the ones that actually use 713 // our definition to the requested list. 714 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(), 715 E = Uses.end(); 716 I != E; ++I) { 717 // List is sorted by position so once we've found one out of range there 718 // will be no more to consider. 719 if (I->first > LastLivePos) 720 break; 721 UsesOfImpDefs.push_back(I->second); 722 } 723 } 724 725 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 726 // success. 727 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 728 MemOpQueue &memOps, 729 unsigned memOpsBegin, unsigned memOpsEnd, 730 unsigned insertAfter, int Offset, 731 unsigned Base, bool BaseKill, 732 int Opcode, 733 ARMCC::CondCodes Pred, unsigned PredReg, 734 unsigned Scratch, 735 DebugLoc dl, 736 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { 737 // First calculate which of the registers should be killed by the merged 738 // instruction. 739 const unsigned insertPos = memOps[insertAfter].Position; 740 SmallSet<unsigned, 4> KilledRegs; 741 DenseMap<unsigned, unsigned> Killer; 742 for (unsigned i = 0, e = memOps.size(); i != e; ++i) { 743 if (i == memOpsBegin) { 744 i = memOpsEnd; 745 if (i == e) 746 break; 747 } 748 if (memOps[i].Position < insertPos && memOps[i].isKill) { 749 unsigned Reg = memOps[i].Reg; 750 KilledRegs.insert(Reg); 751 Killer[Reg] = i; 752 } 753 } 754 755 SmallVector<std::pair<unsigned, bool>, 8> Regs; 756 SmallVector<unsigned, 8> ImpDefs; 757 SmallVector<MachineOperand *, 8> UsesOfImpDefs; 758 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 759 unsigned Reg = memOps[i].Reg; 760 // If we are inserting the merged operation after an operation that 761 // uses the same register, make sure to transfer any kill flag. 762 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 763 Regs.push_back(std::make_pair(Reg, isKill)); 764 765 // Collect any implicit defs of super-registers. They must be preserved. 766 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) { 767 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead()) 768 continue; 769 unsigned DefReg = MO->getReg(); 770 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end()) 771 ImpDefs.push_back(DefReg); 772 773 // There may be other uses of the definition between this instruction and 774 // the eventual LDM/STM position. These should be marked undef if the 775 // merge takes place. 776 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position, 777 insertPos); 778 } 779 } 780 781 // Try to do the merge. 782 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 783 ++Loc; 784 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 785 Pred, PredReg, Scratch, dl, Regs, ImpDefs)) 786 return; 787 788 // Merge succeeded, update records. 789 Merges.push_back(std::prev(Loc)); 790 791 // In gathering loads together, we may have moved the imp-def of a register 792 // past one of its uses. This is OK, since we know better than the rest of 793 // LLVM what's OK with ARM loads and stores; but we still have to adjust the 794 // affected uses. 795 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(), 796 E = UsesOfImpDefs.end(); 797 I != E; ++I) 798 (*I)->setIsUndef(); 799 800 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 801 // Remove kill flags from any memops that come before insertPos. 802 if (Regs[i-memOpsBegin].second) { 803 unsigned Reg = Regs[i-memOpsBegin].first; 804 if (KilledRegs.count(Reg)) { 805 unsigned j = Killer[Reg]; 806 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true); 807 assert(Idx >= 0 && "Cannot find killing operand"); 808 memOps[j].MBBI->getOperand(Idx).setIsKill(false); 809 memOps[j].isKill = false; 810 } 811 memOps[i].isKill = true; 812 } 813 MBB.erase(memOps[i].MBBI); 814 // Update this memop to refer to the merged instruction. 815 // We may need to move kill flags again. 816 memOps[i].Merged = true; 817 memOps[i].MBBI = Merges.back(); 818 memOps[i].Position = insertPos; 819 } 820 821 // Update memOps offsets, since they may have been modified by MergeOps. 822 for (auto &MemOp : memOps) { 823 MemOp.Offset = getMemoryOpOffset(MemOp.MBBI); 824 } 825 } 826 827 /// MergeLDR_STR - Merge a number of load / store instructions into one or more 828 /// load / store multiple instructions. 829 void 830 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 831 unsigned Base, int Opcode, unsigned Size, 832 ARMCC::CondCodes Pred, unsigned PredReg, 833 unsigned Scratch, MemOpQueue &MemOps, 834 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { 835 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 836 int Offset = MemOps[SIndex].Offset; 837 int SOffset = Offset; 838 unsigned insertAfter = SIndex; 839 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 840 DebugLoc dl = Loc->getDebugLoc(); 841 const MachineOperand &PMO = Loc->getOperand(0); 842 unsigned PReg = PMO.getReg(); 843 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 844 unsigned Count = 1; 845 unsigned Limit = ~0U; 846 bool BaseKill = false; 847 // vldm / vstm limit are 32 for S variants, 16 for D variants. 848 849 switch (Opcode) { 850 default: break; 851 case ARM::VSTRS: 852 Limit = 32; 853 break; 854 case ARM::VSTRD: 855 Limit = 16; 856 break; 857 case ARM::VLDRD: 858 Limit = 16; 859 break; 860 case ARM::VLDRS: 861 Limit = 32; 862 break; 863 } 864 865 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 866 int NewOffset = MemOps[i].Offset; 867 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 868 unsigned Reg = MO.getReg(); 869 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 870 // Register numbers must be in ascending order. For VFP / NEON load and 871 // store multiples, the registers must also be consecutive and within the 872 // limit on the number of registers per instruction. 873 if (Reg != ARM::SP && 874 NewOffset == Offset + (int)Size && 875 ((isNotVFP && RegNum > PRegNum) || 876 ((Count < Limit) && RegNum == PRegNum+1)) && 877 // On Swift we don't want vldm/vstm to start with a odd register num 878 // because Q register unaligned vldm/vstm need more uops. 879 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) { 880 Offset += Size; 881 PRegNum = RegNum; 882 ++Count; 883 } else { 884 // Can't merge this in. Try merge the earlier ones first. 885 // We need to compute BaseKill here because the MemOps may have been 886 // reordered. 887 BaseKill = Loc->killsRegister(Base); 888 889 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base, 890 BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 891 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 892 MemOps, Merges); 893 return; 894 } 895 896 if (MemOps[i].Position > MemOps[insertAfter].Position) { 897 insertAfter = i; 898 Loc = MemOps[i].MBBI; 899 } 900 } 901 902 BaseKill = Loc->killsRegister(Base); 903 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 904 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 905 } 906 907 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 908 unsigned Bytes, unsigned Limit, 909 ARMCC::CondCodes Pred, unsigned PredReg) { 910 unsigned MyPredReg = 0; 911 if (!MI) 912 return false; 913 914 bool CheckCPSRDef = false; 915 switch (MI->getOpcode()) { 916 default: return false; 917 case ARM::tSUBi8: 918 case ARM::t2SUBri: 919 case ARM::SUBri: 920 CheckCPSRDef = true; 921 // fallthrough 922 case ARM::tSUBspi: 923 break; 924 } 925 926 // Make sure the offset fits in 8 bits. 927 if (Bytes == 0 || (Limit && Bytes >= Limit)) 928 return false; 929 930 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi || 931 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME 932 if (!(MI->getOperand(0).getReg() == Base && 933 MI->getOperand(1).getReg() == Base && 934 (MI->getOperand(2).getImm() * Scale) == Bytes && 935 getInstrPredicate(MI, MyPredReg) == Pred && 936 MyPredReg == PredReg)) 937 return false; 938 939 return CheckCPSRDef ? !definesCPSR(MI) : true; 940 } 941 942 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 943 unsigned Bytes, unsigned Limit, 944 ARMCC::CondCodes Pred, unsigned PredReg) { 945 unsigned MyPredReg = 0; 946 if (!MI) 947 return false; 948 949 bool CheckCPSRDef = false; 950 switch (MI->getOpcode()) { 951 default: return false; 952 case ARM::tADDi8: 953 case ARM::t2ADDri: 954 case ARM::ADDri: 955 CheckCPSRDef = true; 956 // fallthrough 957 case ARM::tADDspi: 958 break; 959 } 960 961 if (Bytes == 0 || (Limit && Bytes >= Limit)) 962 // Make sure the offset fits in 8 bits. 963 return false; 964 965 unsigned Scale = (MI->getOpcode() == ARM::tADDspi || 966 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME 967 if (!(MI->getOperand(0).getReg() == Base && 968 MI->getOperand(1).getReg() == Base && 969 (MI->getOperand(2).getImm() * Scale) == Bytes && 970 getInstrPredicate(MI, MyPredReg) == Pred && 971 MyPredReg == PredReg)) 972 return false; 973 974 return CheckCPSRDef ? !definesCPSR(MI) : true; 975 } 976 977 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 978 switch (MI->getOpcode()) { 979 default: return 0; 980 case ARM::LDRi12: 981 case ARM::STRi12: 982 case ARM::tLDRi: 983 case ARM::tSTRi: 984 case ARM::tLDRspi: 985 case ARM::tSTRspi: 986 case ARM::t2LDRi8: 987 case ARM::t2LDRi12: 988 case ARM::t2STRi8: 989 case ARM::t2STRi12: 990 case ARM::VLDRS: 991 case ARM::VSTRS: 992 return 4; 993 case ARM::VLDRD: 994 case ARM::VSTRD: 995 return 8; 996 case ARM::LDMIA: 997 case ARM::LDMDA: 998 case ARM::LDMDB: 999 case ARM::LDMIB: 1000 case ARM::STMIA: 1001 case ARM::STMDA: 1002 case ARM::STMDB: 1003 case ARM::STMIB: 1004 case ARM::tLDMIA: 1005 case ARM::tLDMIA_UPD: 1006 case ARM::tSTMIA_UPD: 1007 case ARM::t2LDMIA: 1008 case ARM::t2LDMDB: 1009 case ARM::t2STMIA: 1010 case ARM::t2STMDB: 1011 case ARM::VLDMSIA: 1012 case ARM::VSTMSIA: 1013 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 1014 case ARM::VLDMDIA: 1015 case ARM::VSTMDIA: 1016 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 1017 } 1018 } 1019 1020 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 1021 ARM_AM::AMSubMode Mode) { 1022 switch (Opc) { 1023 default: llvm_unreachable("Unhandled opcode!"); 1024 case ARM::LDMIA: 1025 case ARM::LDMDA: 1026 case ARM::LDMDB: 1027 case ARM::LDMIB: 1028 switch (Mode) { 1029 default: llvm_unreachable("Unhandled submode!"); 1030 case ARM_AM::ia: return ARM::LDMIA_UPD; 1031 case ARM_AM::ib: return ARM::LDMIB_UPD; 1032 case ARM_AM::da: return ARM::LDMDA_UPD; 1033 case ARM_AM::db: return ARM::LDMDB_UPD; 1034 } 1035 case ARM::STMIA: 1036 case ARM::STMDA: 1037 case ARM::STMDB: 1038 case ARM::STMIB: 1039 switch (Mode) { 1040 default: llvm_unreachable("Unhandled submode!"); 1041 case ARM_AM::ia: return ARM::STMIA_UPD; 1042 case ARM_AM::ib: return ARM::STMIB_UPD; 1043 case ARM_AM::da: return ARM::STMDA_UPD; 1044 case ARM_AM::db: return ARM::STMDB_UPD; 1045 } 1046 case ARM::t2LDMIA: 1047 case ARM::t2LDMDB: 1048 switch (Mode) { 1049 default: llvm_unreachable("Unhandled submode!"); 1050 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 1051 case ARM_AM::db: return ARM::t2LDMDB_UPD; 1052 } 1053 case ARM::t2STMIA: 1054 case ARM::t2STMDB: 1055 switch (Mode) { 1056 default: llvm_unreachable("Unhandled submode!"); 1057 case ARM_AM::ia: return ARM::t2STMIA_UPD; 1058 case ARM_AM::db: return ARM::t2STMDB_UPD; 1059 } 1060 case ARM::VLDMSIA: 1061 switch (Mode) { 1062 default: llvm_unreachable("Unhandled submode!"); 1063 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 1064 case ARM_AM::db: return ARM::VLDMSDB_UPD; 1065 } 1066 case ARM::VLDMDIA: 1067 switch (Mode) { 1068 default: llvm_unreachable("Unhandled submode!"); 1069 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 1070 case ARM_AM::db: return ARM::VLDMDDB_UPD; 1071 } 1072 case ARM::VSTMSIA: 1073 switch (Mode) { 1074 default: llvm_unreachable("Unhandled submode!"); 1075 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 1076 case ARM_AM::db: return ARM::VSTMSDB_UPD; 1077 } 1078 case ARM::VSTMDIA: 1079 switch (Mode) { 1080 default: llvm_unreachable("Unhandled submode!"); 1081 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 1082 case ARM_AM::db: return ARM::VSTMDDB_UPD; 1083 } 1084 } 1085 } 1086 1087 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 1088 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 1089 /// 1090 /// stmia rn, <ra, rb, rc> 1091 /// rn := rn + 4 * 3; 1092 /// => 1093 /// stmia rn!, <ra, rb, rc> 1094 /// 1095 /// rn := rn - 4 * 3; 1096 /// ldmia rn, <ra, rb, rc> 1097 /// => 1098 /// ldmdb rn!, <ra, rb, rc> 1099 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 1100 MachineBasicBlock::iterator MBBI, 1101 bool &Advance, 1102 MachineBasicBlock::iterator &I) { 1103 // Thumb1 is already using updating loads/stores. 1104 if (isThumb1) return false; 1105 1106 MachineInstr *MI = MBBI; 1107 unsigned Base = MI->getOperand(0).getReg(); 1108 bool BaseKill = MI->getOperand(0).isKill(); 1109 unsigned Bytes = getLSMultipleTransferSize(MI); 1110 unsigned PredReg = 0; 1111 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1112 int Opcode = MI->getOpcode(); 1113 DebugLoc dl = MI->getDebugLoc(); 1114 1115 // Can't use an updating ld/st if the base register is also a dest 1116 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 1117 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 1118 if (MI->getOperand(i).getReg() == Base) 1119 return false; 1120 1121 bool DoMerge = false; 1122 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); 1123 1124 // Try merging with the previous instruction. 1125 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1126 if (MBBI != BeginMBBI) { 1127 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1128 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 1129 --PrevMBBI; 1130 if (Mode == ARM_AM::ia && 1131 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 1132 Mode = ARM_AM::db; 1133 DoMerge = true; 1134 } else if (Mode == ARM_AM::ib && 1135 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 1136 Mode = ARM_AM::da; 1137 DoMerge = true; 1138 } 1139 if (DoMerge) 1140 MBB.erase(PrevMBBI); 1141 } 1142 1143 // Try merging with the next instruction. 1144 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1145 if (!DoMerge && MBBI != EndMBBI) { 1146 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1147 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1148 ++NextMBBI; 1149 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 1150 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 1151 DoMerge = true; 1152 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 1153 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 1154 DoMerge = true; 1155 } 1156 if (DoMerge) { 1157 if (NextMBBI == I) { 1158 Advance = true; 1159 ++I; 1160 } 1161 MBB.erase(NextMBBI); 1162 } 1163 } 1164 1165 if (!DoMerge) 1166 return false; 1167 1168 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 1169 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 1170 .addReg(Base, getDefRegState(true)) // WB base register 1171 .addReg(Base, getKillRegState(BaseKill)) 1172 .addImm(Pred).addReg(PredReg); 1173 1174 // Transfer the rest of operands. 1175 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 1176 MIB.addOperand(MI->getOperand(OpNum)); 1177 1178 // Transfer memoperands. 1179 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 1180 1181 MBB.erase(MBBI); 1182 return true; 1183 } 1184 1185 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 1186 ARM_AM::AddrOpc Mode) { 1187 switch (Opc) { 1188 case ARM::LDRi12: 1189 return ARM::LDR_PRE_IMM; 1190 case ARM::STRi12: 1191 return ARM::STR_PRE_IMM; 1192 case ARM::VLDRS: 1193 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1194 case ARM::VLDRD: 1195 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1196 case ARM::VSTRS: 1197 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1198 case ARM::VSTRD: 1199 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1200 case ARM::t2LDRi8: 1201 case ARM::t2LDRi12: 1202 return ARM::t2LDR_PRE; 1203 case ARM::t2STRi8: 1204 case ARM::t2STRi12: 1205 return ARM::t2STR_PRE; 1206 default: llvm_unreachable("Unhandled opcode!"); 1207 } 1208 } 1209 1210 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 1211 ARM_AM::AddrOpc Mode) { 1212 switch (Opc) { 1213 case ARM::LDRi12: 1214 return ARM::LDR_POST_IMM; 1215 case ARM::STRi12: 1216 return ARM::STR_POST_IMM; 1217 case ARM::VLDRS: 1218 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1219 case ARM::VLDRD: 1220 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1221 case ARM::VSTRS: 1222 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1223 case ARM::VSTRD: 1224 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1225 case ARM::t2LDRi8: 1226 case ARM::t2LDRi12: 1227 return ARM::t2LDR_POST; 1228 case ARM::t2STRi8: 1229 case ARM::t2STRi12: 1230 return ARM::t2STR_POST; 1231 default: llvm_unreachable("Unhandled opcode!"); 1232 } 1233 } 1234 1235 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 1236 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 1237 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 1238 MachineBasicBlock::iterator MBBI, 1239 const TargetInstrInfo *TII, 1240 bool &Advance, 1241 MachineBasicBlock::iterator &I) { 1242 // Thumb1 doesn't have updating LDR/STR. 1243 // FIXME: Use LDM/STM with single register instead. 1244 if (isThumb1) return false; 1245 1246 MachineInstr *MI = MBBI; 1247 unsigned Base = MI->getOperand(1).getReg(); 1248 bool BaseKill = MI->getOperand(1).isKill(); 1249 unsigned Bytes = getLSMultipleTransferSize(MI); 1250 int Opcode = MI->getOpcode(); 1251 DebugLoc dl = MI->getDebugLoc(); 1252 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 1253 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 1254 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 1255 if (isi32Load(Opcode) || isi32Store(Opcode)) 1256 if (MI->getOperand(2).getImm() != 0) 1257 return false; 1258 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 1259 return false; 1260 1261 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 1262 // Can't do the merge if the destination register is the same as the would-be 1263 // writeback register. 1264 if (MI->getOperand(0).getReg() == Base) 1265 return false; 1266 1267 unsigned PredReg = 0; 1268 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1269 bool DoMerge = false; 1270 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1271 unsigned NewOpc = 0; 1272 // AM2 - 12 bits, thumb2 - 8 bits. 1273 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 1274 1275 // Try merging with the previous instruction. 1276 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1277 if (MBBI != BeginMBBI) { 1278 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1279 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 1280 --PrevMBBI; 1281 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 1282 DoMerge = true; 1283 AddSub = ARM_AM::sub; 1284 } else if (!isAM5 && 1285 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 1286 DoMerge = true; 1287 } 1288 if (DoMerge) { 1289 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 1290 MBB.erase(PrevMBBI); 1291 } 1292 } 1293 1294 // Try merging with the next instruction. 1295 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1296 if (!DoMerge && MBBI != EndMBBI) { 1297 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1298 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1299 ++NextMBBI; 1300 if (!isAM5 && 1301 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 1302 DoMerge = true; 1303 AddSub = ARM_AM::sub; 1304 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 1305 DoMerge = true; 1306 } 1307 if (DoMerge) { 1308 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 1309 if (NextMBBI == I) { 1310 Advance = true; 1311 ++I; 1312 } 1313 MBB.erase(NextMBBI); 1314 } 1315 } 1316 1317 if (!DoMerge) 1318 return false; 1319 1320 if (isAM5) { 1321 // VLDM[SD]_UPD, VSTM[SD]_UPD 1322 // (There are no base-updating versions of VLDR/VSTR instructions, but the 1323 // updating load/store-multiple instructions can be used with only one 1324 // register.) 1325 MachineOperand &MO = MI->getOperand(0); 1326 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 1327 .addReg(Base, getDefRegState(true)) // WB base register 1328 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 1329 .addImm(Pred).addReg(PredReg) 1330 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 1331 getKillRegState(MO.isKill()))); 1332 } else if (isLd) { 1333 if (isAM2) { 1334 // LDR_PRE, LDR_POST 1335 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 1336 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1337 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1338 .addReg(Base, RegState::Define) 1339 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1340 } else { 1341 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1342 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1343 .addReg(Base, RegState::Define) 1344 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1345 } 1346 } else { 1347 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1348 // t2LDR_PRE, t2LDR_POST 1349 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1350 .addReg(Base, RegState::Define) 1351 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1352 } 1353 } else { 1354 MachineOperand &MO = MI->getOperand(0); 1355 // FIXME: post-indexed stores use am2offset_imm, which still encodes 1356 // the vestigal zero-reg offset register. When that's fixed, this clause 1357 // can be removed entirely. 1358 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 1359 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1360 // STR_PRE, STR_POST 1361 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 1362 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1363 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1364 } else { 1365 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1366 // t2STR_PRE, t2STR_POST 1367 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 1368 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1369 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1370 } 1371 } 1372 MBB.erase(MBBI); 1373 1374 return true; 1375 } 1376 1377 /// isMemoryOp - Returns true if instruction is a memory operation that this 1378 /// pass is capable of operating on. 1379 static bool isMemoryOp(const MachineInstr *MI) { 1380 // When no memory operands are present, conservatively assume unaligned, 1381 // volatile, unfoldable. 1382 if (!MI->hasOneMemOperand()) 1383 return false; 1384 1385 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1386 1387 // Don't touch volatile memory accesses - we may be changing their order. 1388 if (MMO->isVolatile()) 1389 return false; 1390 1391 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 1392 // not. 1393 if (MMO->getAlignment() < 4) 1394 return false; 1395 1396 // str <undef> could probably be eliminated entirely, but for now we just want 1397 // to avoid making a mess of it. 1398 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1399 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 1400 MI->getOperand(0).isUndef()) 1401 return false; 1402 1403 // Likewise don't mess with references to undefined addresses. 1404 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 1405 MI->getOperand(1).isUndef()) 1406 return false; 1407 1408 int Opcode = MI->getOpcode(); 1409 switch (Opcode) { 1410 default: break; 1411 case ARM::VLDRS: 1412 case ARM::VSTRS: 1413 return MI->getOperand(1).isReg(); 1414 case ARM::VLDRD: 1415 case ARM::VSTRD: 1416 return MI->getOperand(1).isReg(); 1417 case ARM::LDRi12: 1418 case ARM::STRi12: 1419 case ARM::tLDRi: 1420 case ARM::tSTRi: 1421 case ARM::tLDRspi: 1422 case ARM::tSTRspi: 1423 case ARM::t2LDRi8: 1424 case ARM::t2LDRi12: 1425 case ARM::t2STRi8: 1426 case ARM::t2STRi12: 1427 return MI->getOperand(1).isReg(); 1428 } 1429 return false; 1430 } 1431 1432 /// AdvanceRS - Advance register scavenger to just before the earliest memory 1433 /// op that is being merged. 1434 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 1435 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 1436 unsigned Position = MemOps[0].Position; 1437 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 1438 if (MemOps[i].Position < Position) { 1439 Position = MemOps[i].Position; 1440 Loc = MemOps[i].MBBI; 1441 } 1442 } 1443 1444 if (Loc != MBB.begin()) 1445 RS->forward(std::prev(Loc)); 1446 } 1447 1448 static void InsertLDR_STR(MachineBasicBlock &MBB, 1449 MachineBasicBlock::iterator &MBBI, 1450 int Offset, bool isDef, 1451 DebugLoc dl, unsigned NewOpc, 1452 unsigned Reg, bool RegDeadKill, bool RegUndef, 1453 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1454 bool OffKill, bool OffUndef, 1455 ARMCC::CondCodes Pred, unsigned PredReg, 1456 const TargetInstrInfo *TII, bool isT2) { 1457 if (isDef) { 1458 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1459 TII->get(NewOpc)) 1460 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1461 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1462 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1463 } else { 1464 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1465 TII->get(NewOpc)) 1466 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1467 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1468 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1469 } 1470 } 1471 1472 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1473 MachineBasicBlock::iterator &MBBI) { 1474 MachineInstr *MI = &*MBBI; 1475 unsigned Opcode = MI->getOpcode(); 1476 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 1477 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 1478 const MachineOperand &BaseOp = MI->getOperand(2); 1479 unsigned BaseReg = BaseOp.getReg(); 1480 unsigned EvenReg = MI->getOperand(0).getReg(); 1481 unsigned OddReg = MI->getOperand(1).getReg(); 1482 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1483 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1484 // ARM errata 602117: LDRD with base in list may result in incorrect base 1485 // register when interrupted or faulted. 1486 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3(); 1487 if (!Errata602117 && 1488 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)) 1489 return false; 1490 1491 MachineBasicBlock::iterator NewBBI = MBBI; 1492 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1493 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1494 bool EvenDeadKill = isLd ? 1495 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1496 bool EvenUndef = MI->getOperand(0).isUndef(); 1497 bool OddDeadKill = isLd ? 1498 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1499 bool OddUndef = MI->getOperand(1).isUndef(); 1500 bool BaseKill = BaseOp.isKill(); 1501 bool BaseUndef = BaseOp.isUndef(); 1502 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1503 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1504 int OffImm = getMemoryOpOffset(MI); 1505 unsigned PredReg = 0; 1506 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1507 1508 if (OddRegNum > EvenRegNum && OffImm == 0) { 1509 // Ascending register numbers and no offset. It's safe to change it to a 1510 // ldm or stm. 1511 unsigned NewOpc = (isLd) 1512 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1513 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1514 if (isLd) { 1515 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1516 .addReg(BaseReg, getKillRegState(BaseKill)) 1517 .addImm(Pred).addReg(PredReg) 1518 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1519 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1520 ++NumLDRD2LDM; 1521 } else { 1522 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1523 .addReg(BaseReg, getKillRegState(BaseKill)) 1524 .addImm(Pred).addReg(PredReg) 1525 .addReg(EvenReg, 1526 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1527 .addReg(OddReg, 1528 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1529 ++NumSTRD2STM; 1530 } 1531 NewBBI = std::prev(MBBI); 1532 } else { 1533 // Split into two instructions. 1534 unsigned NewOpc = (isLd) 1535 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1536 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1537 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1538 // so adjust and use t2LDRi12 here for that. 1539 unsigned NewOpc2 = (isLd) 1540 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1541 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1542 DebugLoc dl = MBBI->getDebugLoc(); 1543 // If this is a load and base register is killed, it may have been 1544 // re-defed by the load, make sure the first load does not clobber it. 1545 if (isLd && 1546 (BaseKill || OffKill) && 1547 (TRI->regsOverlap(EvenReg, BaseReg))) { 1548 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1549 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1550 OddReg, OddDeadKill, false, 1551 BaseReg, false, BaseUndef, false, OffUndef, 1552 Pred, PredReg, TII, isT2); 1553 NewBBI = std::prev(MBBI); 1554 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1555 EvenReg, EvenDeadKill, false, 1556 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1557 Pred, PredReg, TII, isT2); 1558 } else { 1559 if (OddReg == EvenReg && EvenDeadKill) { 1560 // If the two source operands are the same, the kill marker is 1561 // probably on the first one. e.g. 1562 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1563 EvenDeadKill = false; 1564 OddDeadKill = true; 1565 } 1566 // Never kill the base register in the first instruction. 1567 if (EvenReg == BaseReg) 1568 EvenDeadKill = false; 1569 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1570 EvenReg, EvenDeadKill, EvenUndef, 1571 BaseReg, false, BaseUndef, false, OffUndef, 1572 Pred, PredReg, TII, isT2); 1573 NewBBI = std::prev(MBBI); 1574 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1575 OddReg, OddDeadKill, OddUndef, 1576 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1577 Pred, PredReg, TII, isT2); 1578 } 1579 if (isLd) 1580 ++NumLDRD2LDR; 1581 else 1582 ++NumSTRD2STR; 1583 } 1584 1585 MBB.erase(MI); 1586 MBBI = NewBBI; 1587 return true; 1588 } 1589 return false; 1590 } 1591 1592 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 1593 /// ops of the same base and incrementing offset into LDM / STM ops. 1594 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1595 unsigned NumMerges = 0; 1596 unsigned NumMemOps = 0; 1597 MemOpQueue MemOps; 1598 unsigned CurrBase = 0; 1599 int CurrOpc = -1; 1600 unsigned CurrSize = 0; 1601 ARMCC::CondCodes CurrPred = ARMCC::AL; 1602 unsigned CurrPredReg = 0; 1603 unsigned Position = 0; 1604 SmallVector<MachineBasicBlock::iterator,4> Merges; 1605 1606 RS->enterBasicBlock(&MBB); 1607 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1608 while (MBBI != E) { 1609 if (FixInvalidRegPairOp(MBB, MBBI)) 1610 continue; 1611 1612 bool Advance = false; 1613 bool TryMerge = false; 1614 bool Clobber = false; 1615 1616 bool isMemOp = isMemoryOp(MBBI); 1617 if (isMemOp) { 1618 int Opcode = MBBI->getOpcode(); 1619 unsigned Size = getLSMultipleTransferSize(MBBI); 1620 const MachineOperand &MO = MBBI->getOperand(0); 1621 unsigned Reg = MO.getReg(); 1622 bool isKill = MO.isDef() ? false : MO.isKill(); 1623 unsigned Base = MBBI->getOperand(1).getReg(); 1624 unsigned PredReg = 0; 1625 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 1626 int Offset = getMemoryOpOffset(MBBI); 1627 // Watch out for: 1628 // r4 := ldr [r5] 1629 // r5 := ldr [r5, #4] 1630 // r6 := ldr [r5, #8] 1631 // 1632 // The second ldr has effectively broken the chain even though it 1633 // looks like the later ldr(s) use the same base register. Try to 1634 // merge the ldr's so far, including this one. But don't try to 1635 // combine the following ldr(s). 1636 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1637 1638 // Watch out for: 1639 // r4 := ldr [r0, #8] 1640 // r4 := ldr [r0, #4] 1641 // 1642 // The optimization may reorder the second ldr in front of the first 1643 // ldr, which violates write after write(WAW) dependence. The same as 1644 // str. Try to merge inst(s) already in MemOps. 1645 bool Overlap = false; 1646 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) { 1647 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) { 1648 Overlap = true; 1649 break; 1650 } 1651 } 1652 1653 if (CurrBase == 0 && !Clobber) { 1654 // Start of a new chain. 1655 CurrBase = Base; 1656 CurrOpc = Opcode; 1657 CurrSize = Size; 1658 CurrPred = Pred; 1659 CurrPredReg = PredReg; 1660 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 1661 ++NumMemOps; 1662 Advance = true; 1663 } else if (!Overlap) { 1664 if (Clobber) { 1665 TryMerge = true; 1666 Advance = true; 1667 } 1668 1669 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1670 // No need to match PredReg. 1671 // Continue adding to the queue. 1672 if (Offset > MemOps.back().Offset) { 1673 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, 1674 Position, MBBI)); 1675 ++NumMemOps; 1676 Advance = true; 1677 } else { 1678 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1679 I != E; ++I) { 1680 if (Offset < I->Offset) { 1681 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, 1682 Position, MBBI)); 1683 ++NumMemOps; 1684 Advance = true; 1685 break; 1686 } else if (Offset == I->Offset) { 1687 // Collision! This can't be merged! 1688 break; 1689 } 1690 } 1691 } 1692 } 1693 } 1694 } 1695 1696 if (MBBI->isDebugValue()) { 1697 ++MBBI; 1698 if (MBBI == E) 1699 // Reach the end of the block, try merging the memory instructions. 1700 TryMerge = true; 1701 } else if (Advance) { 1702 ++Position; 1703 ++MBBI; 1704 if (MBBI == E) 1705 // Reach the end of the block, try merging the memory instructions. 1706 TryMerge = true; 1707 } else { 1708 TryMerge = true; 1709 } 1710 1711 if (TryMerge) { 1712 if (NumMemOps > 1) { 1713 // Try to find a free register to use as a new base in case it's needed. 1714 // First advance to the instruction just before the start of the chain. 1715 AdvanceRS(MBB, MemOps); 1716 1717 // Find a scratch register. 1718 unsigned Scratch = 1719 RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass); 1720 1721 // Process the load / store instructions. 1722 RS->forward(std::prev(MBBI)); 1723 1724 // Merge ops. 1725 Merges.clear(); 1726 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1727 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1728 1729 // Try folding preceding/trailing base inc/dec into the generated 1730 // LDM/STM ops. 1731 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1732 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1733 ++NumMerges; 1734 NumMerges += Merges.size(); 1735 1736 // Try folding preceding/trailing base inc/dec into those load/store 1737 // that were not merged to form LDM/STM ops. 1738 for (unsigned i = 0; i != NumMemOps; ++i) 1739 if (!MemOps[i].Merged) 1740 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1741 ++NumMerges; 1742 1743 // RS may be pointing to an instruction that's deleted. 1744 RS->skipTo(std::prev(MBBI)); 1745 } else if (NumMemOps == 1) { 1746 // Try folding preceding/trailing base inc/dec into the single 1747 // load/store. 1748 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1749 ++NumMerges; 1750 RS->forward(std::prev(MBBI)); 1751 } 1752 } 1753 1754 CurrBase = 0; 1755 CurrOpc = -1; 1756 CurrSize = 0; 1757 CurrPred = ARMCC::AL; 1758 CurrPredReg = 0; 1759 if (NumMemOps) { 1760 MemOps.clear(); 1761 NumMemOps = 0; 1762 } 1763 1764 // If iterator hasn't been advanced and this is not a memory op, skip it. 1765 // It can't start a new chain anyway. 1766 if (!Advance && !isMemOp && MBBI != E) { 1767 ++Position; 1768 ++MBBI; 1769 } 1770 } 1771 } 1772 return NumMerges > 0; 1773 } 1774 1775 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 1776 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it 1777 /// directly restore the value of LR into pc. 1778 /// ldmfd sp!, {..., lr} 1779 /// bx lr 1780 /// or 1781 /// ldmfd sp!, {..., lr} 1782 /// mov pc, lr 1783 /// => 1784 /// ldmfd sp!, {..., pc} 1785 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1786 // Thumb1 LDM doesn't allow high registers. 1787 if (isThumb1) return false; 1788 if (MBB.empty()) return false; 1789 1790 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1791 if (MBBI != MBB.begin() && 1792 (MBBI->getOpcode() == ARM::BX_RET || 1793 MBBI->getOpcode() == ARM::tBX_RET || 1794 MBBI->getOpcode() == ARM::MOVPCLR)) { 1795 MachineInstr *PrevMI = std::prev(MBBI); 1796 unsigned Opcode = PrevMI->getOpcode(); 1797 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1798 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1799 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1800 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1801 if (MO.getReg() != ARM::LR) 1802 return false; 1803 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1804 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1805 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1806 PrevMI->setDesc(TII->get(NewOpc)); 1807 MO.setReg(ARM::PC); 1808 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); 1809 MBB.erase(MBBI); 1810 return true; 1811 } 1812 } 1813 return false; 1814 } 1815 1816 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1817 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1818 TL = STI->getTargetLowering(); 1819 AFI = Fn.getInfo<ARMFunctionInfo>(); 1820 TII = STI->getInstrInfo(); 1821 TRI = STI->getRegisterInfo(); 1822 RS = new RegScavenger(); 1823 isThumb2 = AFI->isThumb2Function(); 1824 isThumb1 = AFI->isThumbFunction() && !isThumb2; 1825 1826 bool Modified = false; 1827 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1828 ++MFI) { 1829 MachineBasicBlock &MBB = *MFI; 1830 Modified |= LoadStoreMultipleOpti(MBB); 1831 if (STI->hasV5TOps()) 1832 Modified |= MergeReturnIntoLDM(MBB); 1833 } 1834 1835 delete RS; 1836 return Modified; 1837 } 1838 1839 1840 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1841 /// load / stores from consecutive locations close to make it more 1842 /// likely they will be combined later. 1843 1844 namespace { 1845 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1846 static char ID; 1847 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1848 1849 const DataLayout *TD; 1850 const TargetInstrInfo *TII; 1851 const TargetRegisterInfo *TRI; 1852 const ARMSubtarget *STI; 1853 MachineRegisterInfo *MRI; 1854 MachineFunction *MF; 1855 1856 bool runOnMachineFunction(MachineFunction &Fn) override; 1857 1858 const char *getPassName() const override { 1859 return "ARM pre- register allocation load / store optimization pass"; 1860 } 1861 1862 private: 1863 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1864 unsigned &NewOpc, unsigned &EvenReg, 1865 unsigned &OddReg, unsigned &BaseReg, 1866 int &Offset, 1867 unsigned &PredReg, ARMCC::CondCodes &Pred, 1868 bool &isT2); 1869 bool RescheduleOps(MachineBasicBlock *MBB, 1870 SmallVectorImpl<MachineInstr *> &Ops, 1871 unsigned Base, bool isLd, 1872 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1873 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1874 }; 1875 char ARMPreAllocLoadStoreOpt::ID = 0; 1876 } 1877 1878 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1879 TD = Fn.getTarget().getDataLayout(); 1880 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1881 TII = STI->getInstrInfo(); 1882 TRI = STI->getRegisterInfo(); 1883 MRI = &Fn.getRegInfo(); 1884 MF = &Fn; 1885 1886 bool Modified = false; 1887 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1888 ++MFI) 1889 Modified |= RescheduleLoadStoreInstrs(MFI); 1890 1891 return Modified; 1892 } 1893 1894 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1895 MachineBasicBlock::iterator I, 1896 MachineBasicBlock::iterator E, 1897 SmallPtrSetImpl<MachineInstr*> &MemOps, 1898 SmallSet<unsigned, 4> &MemRegs, 1899 const TargetRegisterInfo *TRI) { 1900 // Are there stores / loads / calls between them? 1901 // FIXME: This is overly conservative. We should make use of alias information 1902 // some day. 1903 SmallSet<unsigned, 4> AddedRegPressure; 1904 while (++I != E) { 1905 if (I->isDebugValue() || MemOps.count(&*I)) 1906 continue; 1907 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1908 return false; 1909 if (isLd && I->mayStore()) 1910 return false; 1911 if (!isLd) { 1912 if (I->mayLoad()) 1913 return false; 1914 // It's not safe to move the first 'str' down. 1915 // str r1, [r0] 1916 // strh r5, [r0] 1917 // str r4, [r0, #+4] 1918 if (I->mayStore()) 1919 return false; 1920 } 1921 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1922 MachineOperand &MO = I->getOperand(j); 1923 if (!MO.isReg()) 1924 continue; 1925 unsigned Reg = MO.getReg(); 1926 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1927 return false; 1928 if (Reg != Base && !MemRegs.count(Reg)) 1929 AddedRegPressure.insert(Reg); 1930 } 1931 } 1932 1933 // Estimate register pressure increase due to the transformation. 1934 if (MemRegs.size() <= 4) 1935 // Ok if we are moving small number of instructions. 1936 return true; 1937 return AddedRegPressure.size() <= MemRegs.size() * 2; 1938 } 1939 1940 1941 /// Copy Op0 and Op1 operands into a new array assigned to MI. 1942 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1943 MachineInstr *Op1) { 1944 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1945 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1946 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1947 1948 MachineFunction *MF = MI->getParent()->getParent(); 1949 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1950 MachineSDNode::mmo_iterator MemEnd = 1951 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1952 MemEnd = 1953 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1954 MI->setMemRefs(MemBegin, MemEnd); 1955 } 1956 1957 bool 1958 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1959 DebugLoc &dl, 1960 unsigned &NewOpc, unsigned &EvenReg, 1961 unsigned &OddReg, unsigned &BaseReg, 1962 int &Offset, unsigned &PredReg, 1963 ARMCC::CondCodes &Pred, 1964 bool &isT2) { 1965 // Make sure we're allowed to generate LDRD/STRD. 1966 if (!STI->hasV5TEOps()) 1967 return false; 1968 1969 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1970 unsigned Scale = 1; 1971 unsigned Opcode = Op0->getOpcode(); 1972 if (Opcode == ARM::LDRi12) { 1973 NewOpc = ARM::LDRD; 1974 } else if (Opcode == ARM::STRi12) { 1975 NewOpc = ARM::STRD; 1976 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1977 NewOpc = ARM::t2LDRDi8; 1978 Scale = 4; 1979 isT2 = true; 1980 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1981 NewOpc = ARM::t2STRDi8; 1982 Scale = 4; 1983 isT2 = true; 1984 } else { 1985 return false; 1986 } 1987 1988 // Make sure the base address satisfies i64 ld / st alignment requirement. 1989 // At the moment, we ignore the memoryoperand's value. 1990 // If we want to use AliasAnalysis, we should check it accordingly. 1991 if (!Op0->hasOneMemOperand() || 1992 (*Op0->memoperands_begin())->isVolatile()) 1993 return false; 1994 1995 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1996 const Function *Func = MF->getFunction(); 1997 unsigned ReqAlign = STI->hasV6Ops() 1998 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1999 : 8; // Pre-v6 need 8-byte align 2000 if (Align < ReqAlign) 2001 return false; 2002 2003 // Then make sure the immediate offset fits. 2004 int OffImm = getMemoryOpOffset(Op0); 2005 if (isT2) { 2006 int Limit = (1 << 8) * Scale; 2007 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 2008 return false; 2009 Offset = OffImm; 2010 } else { 2011 ARM_AM::AddrOpc AddSub = ARM_AM::add; 2012 if (OffImm < 0) { 2013 AddSub = ARM_AM::sub; 2014 OffImm = - OffImm; 2015 } 2016 int Limit = (1 << 8) * Scale; 2017 if (OffImm >= Limit || (OffImm & (Scale-1))) 2018 return false; 2019 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 2020 } 2021 EvenReg = Op0->getOperand(0).getReg(); 2022 OddReg = Op1->getOperand(0).getReg(); 2023 if (EvenReg == OddReg) 2024 return false; 2025 BaseReg = Op0->getOperand(1).getReg(); 2026 Pred = getInstrPredicate(Op0, PredReg); 2027 dl = Op0->getDebugLoc(); 2028 return true; 2029 } 2030 2031 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 2032 SmallVectorImpl<MachineInstr *> &Ops, 2033 unsigned Base, bool isLd, 2034 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 2035 bool RetVal = false; 2036 2037 // Sort by offset (in reverse order). 2038 std::sort(Ops.begin(), Ops.end(), 2039 [](const MachineInstr *LHS, const MachineInstr *RHS) { 2040 int LOffset = getMemoryOpOffset(LHS); 2041 int ROffset = getMemoryOpOffset(RHS); 2042 assert(LHS == RHS || LOffset != ROffset); 2043 return LOffset > ROffset; 2044 }); 2045 2046 // The loads / stores of the same base are in order. Scan them from first to 2047 // last and check for the following: 2048 // 1. Any def of base. 2049 // 2. Any gaps. 2050 while (Ops.size() > 1) { 2051 unsigned FirstLoc = ~0U; 2052 unsigned LastLoc = 0; 2053 MachineInstr *FirstOp = nullptr; 2054 MachineInstr *LastOp = nullptr; 2055 int LastOffset = 0; 2056 unsigned LastOpcode = 0; 2057 unsigned LastBytes = 0; 2058 unsigned NumMove = 0; 2059 for (int i = Ops.size() - 1; i >= 0; --i) { 2060 MachineInstr *Op = Ops[i]; 2061 unsigned Loc = MI2LocMap[Op]; 2062 if (Loc <= FirstLoc) { 2063 FirstLoc = Loc; 2064 FirstOp = Op; 2065 } 2066 if (Loc >= LastLoc) { 2067 LastLoc = Loc; 2068 LastOp = Op; 2069 } 2070 2071 unsigned LSMOpcode 2072 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 2073 if (LastOpcode && LSMOpcode != LastOpcode) 2074 break; 2075 2076 int Offset = getMemoryOpOffset(Op); 2077 unsigned Bytes = getLSMultipleTransferSize(Op); 2078 if (LastBytes) { 2079 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 2080 break; 2081 } 2082 LastOffset = Offset; 2083 LastBytes = Bytes; 2084 LastOpcode = LSMOpcode; 2085 if (++NumMove == 8) // FIXME: Tune this limit. 2086 break; 2087 } 2088 2089 if (NumMove <= 1) 2090 Ops.pop_back(); 2091 else { 2092 SmallPtrSet<MachineInstr*, 4> MemOps; 2093 SmallSet<unsigned, 4> MemRegs; 2094 for (int i = NumMove-1; i >= 0; --i) { 2095 MemOps.insert(Ops[i]); 2096 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 2097 } 2098 2099 // Be conservative, if the instructions are too far apart, don't 2100 // move them. We want to limit the increase of register pressure. 2101 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 2102 if (DoMove) 2103 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 2104 MemOps, MemRegs, TRI); 2105 if (!DoMove) { 2106 for (unsigned i = 0; i != NumMove; ++i) 2107 Ops.pop_back(); 2108 } else { 2109 // This is the new location for the loads / stores. 2110 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 2111 while (InsertPos != MBB->end() 2112 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 2113 ++InsertPos; 2114 2115 // If we are moving a pair of loads / stores, see if it makes sense 2116 // to try to allocate a pair of registers that can form register pairs. 2117 MachineInstr *Op0 = Ops.back(); 2118 MachineInstr *Op1 = Ops[Ops.size()-2]; 2119 unsigned EvenReg = 0, OddReg = 0; 2120 unsigned BaseReg = 0, PredReg = 0; 2121 ARMCC::CondCodes Pred = ARMCC::AL; 2122 bool isT2 = false; 2123 unsigned NewOpc = 0; 2124 int Offset = 0; 2125 DebugLoc dl; 2126 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 2127 EvenReg, OddReg, BaseReg, 2128 Offset, PredReg, Pred, isT2)) { 2129 Ops.pop_back(); 2130 Ops.pop_back(); 2131 2132 const MCInstrDesc &MCID = TII->get(NewOpc); 2133 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 2134 MRI->constrainRegClass(EvenReg, TRC); 2135 MRI->constrainRegClass(OddReg, TRC); 2136 2137 // Form the pair instruction. 2138 if (isLd) { 2139 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2140 .addReg(EvenReg, RegState::Define) 2141 .addReg(OddReg, RegState::Define) 2142 .addReg(BaseReg); 2143 // FIXME: We're converting from LDRi12 to an insn that still 2144 // uses addrmode2, so we need an explicit offset reg. It should 2145 // always by reg0 since we're transforming LDRi12s. 2146 if (!isT2) 2147 MIB.addReg(0); 2148 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2149 concatenateMemOperands(MIB, Op0, Op1); 2150 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2151 ++NumLDRDFormed; 2152 } else { 2153 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2154 .addReg(EvenReg) 2155 .addReg(OddReg) 2156 .addReg(BaseReg); 2157 // FIXME: We're converting from LDRi12 to an insn that still 2158 // uses addrmode2, so we need an explicit offset reg. It should 2159 // always by reg0 since we're transforming STRi12s. 2160 if (!isT2) 2161 MIB.addReg(0); 2162 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2163 concatenateMemOperands(MIB, Op0, Op1); 2164 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2165 ++NumSTRDFormed; 2166 } 2167 MBB->erase(Op0); 2168 MBB->erase(Op1); 2169 2170 // Add register allocation hints to form register pairs. 2171 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 2172 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 2173 } else { 2174 for (unsigned i = 0; i != NumMove; ++i) { 2175 MachineInstr *Op = Ops.back(); 2176 Ops.pop_back(); 2177 MBB->splice(InsertPos, MBB, Op); 2178 } 2179 } 2180 2181 NumLdStMoved += NumMove; 2182 RetVal = true; 2183 } 2184 } 2185 } 2186 2187 return RetVal; 2188 } 2189 2190 bool 2191 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 2192 bool RetVal = false; 2193 2194 DenseMap<MachineInstr*, unsigned> MI2LocMap; 2195 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 2196 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 2197 SmallVector<unsigned, 4> LdBases; 2198 SmallVector<unsigned, 4> StBases; 2199 2200 unsigned Loc = 0; 2201 MachineBasicBlock::iterator MBBI = MBB->begin(); 2202 MachineBasicBlock::iterator E = MBB->end(); 2203 while (MBBI != E) { 2204 for (; MBBI != E; ++MBBI) { 2205 MachineInstr *MI = MBBI; 2206 if (MI->isCall() || MI->isTerminator()) { 2207 // Stop at barriers. 2208 ++MBBI; 2209 break; 2210 } 2211 2212 if (!MI->isDebugValue()) 2213 MI2LocMap[MI] = ++Loc; 2214 2215 if (!isMemoryOp(MI)) 2216 continue; 2217 unsigned PredReg = 0; 2218 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 2219 continue; 2220 2221 int Opc = MI->getOpcode(); 2222 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 2223 unsigned Base = MI->getOperand(1).getReg(); 2224 int Offset = getMemoryOpOffset(MI); 2225 2226 bool StopHere = false; 2227 if (isLd) { 2228 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2229 Base2LdsMap.find(Base); 2230 if (BI != Base2LdsMap.end()) { 2231 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2232 if (Offset == getMemoryOpOffset(BI->second[i])) { 2233 StopHere = true; 2234 break; 2235 } 2236 } 2237 if (!StopHere) 2238 BI->second.push_back(MI); 2239 } else { 2240 Base2LdsMap[Base].push_back(MI); 2241 LdBases.push_back(Base); 2242 } 2243 } else { 2244 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2245 Base2StsMap.find(Base); 2246 if (BI != Base2StsMap.end()) { 2247 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2248 if (Offset == getMemoryOpOffset(BI->second[i])) { 2249 StopHere = true; 2250 break; 2251 } 2252 } 2253 if (!StopHere) 2254 BI->second.push_back(MI); 2255 } else { 2256 Base2StsMap[Base].push_back(MI); 2257 StBases.push_back(Base); 2258 } 2259 } 2260 2261 if (StopHere) { 2262 // Found a duplicate (a base+offset combination that's seen earlier). 2263 // Backtrack. 2264 --Loc; 2265 break; 2266 } 2267 } 2268 2269 // Re-schedule loads. 2270 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 2271 unsigned Base = LdBases[i]; 2272 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 2273 if (Lds.size() > 1) 2274 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 2275 } 2276 2277 // Re-schedule stores. 2278 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 2279 unsigned Base = StBases[i]; 2280 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 2281 if (Sts.size() > 1) 2282 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 2283 } 2284 2285 if (MBBI != E) { 2286 Base2LdsMap.clear(); 2287 Base2StsMap.clear(); 2288 LdBases.clear(); 2289 StBases.clear(); 2290 } 2291 } 2292 2293 return RetVal; 2294 } 2295 2296 2297 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store 2298 /// optimization pass. 2299 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 2300 if (PreAlloc) 2301 return new ARMPreAllocLoadStoreOpt(); 2302 return new ARMLoadStoreOpt(); 2303 } 2304