1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This file contains a pass that performs load / store related peephole 11 /// optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ARM.h" 16 #include "ARMBaseInstrInfo.h" 17 #include "ARMBaseRegisterInfo.h" 18 #include "ARMISelLowering.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMSubtarget.h" 21 #include "MCTargetDesc/ARMAddressingModes.h" 22 #include "ThumbRegisterInfo.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallSet.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/Statistic.h" 29 #include "llvm/CodeGen/MachineBasicBlock.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include "llvm/CodeGen/MachineInstr.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/RegisterScavenging.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/IR/DataLayout.h" 37 #include "llvm/IR/DerivedTypes.h" 38 #include "llvm/IR/Function.h" 39 #include "llvm/Support/Debug.h" 40 #include "llvm/Support/ErrorHandling.h" 41 #include "llvm/Support/raw_ostream.h" 42 #include "llvm/Target/TargetInstrInfo.h" 43 #include "llvm/Target/TargetMachine.h" 44 #include "llvm/Target/TargetRegisterInfo.h" 45 using namespace llvm; 46 47 #define DEBUG_TYPE "arm-ldst-opt" 48 49 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 50 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 51 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 52 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 53 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 54 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 55 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 56 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 57 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 58 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 59 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 60 61 namespace { 62 /// Post- register allocation pass the combine load / store instructions to 63 /// form ldm / stm instructions. 64 struct ARMLoadStoreOpt : public MachineFunctionPass { 65 static char ID; 66 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 67 68 const TargetInstrInfo *TII; 69 const TargetRegisterInfo *TRI; 70 const ARMSubtarget *STI; 71 const TargetLowering *TL; 72 ARMFunctionInfo *AFI; 73 RegScavenger *RS; 74 bool isThumb1, isThumb2; 75 76 bool runOnMachineFunction(MachineFunction &Fn) override; 77 78 const char *getPassName() const override { 79 return "ARM load / store optimization pass"; 80 } 81 82 private: 83 struct MemOpQueueEntry { 84 int Offset; 85 unsigned Reg; 86 bool isKill; 87 unsigned Position; 88 MachineBasicBlock::iterator MBBI; 89 bool Merged; 90 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 91 MachineBasicBlock::iterator i) 92 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 93 }; 94 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 95 typedef MemOpQueue::iterator MemOpQueueIter; 96 97 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, 98 const MemOpQueue &MemOps, unsigned DefReg, 99 unsigned RangeBegin, unsigned RangeEnd); 100 void UpdateBaseRegUses(MachineBasicBlock &MBB, 101 MachineBasicBlock::iterator MBBI, 102 DebugLoc dl, unsigned Base, unsigned WordOffset, 103 ARMCC::CondCodes Pred, unsigned PredReg); 104 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 105 int Offset, unsigned Base, bool BaseKill, unsigned Opcode, 106 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 107 DebugLoc dl, 108 ArrayRef<std::pair<unsigned, bool> > Regs, 109 ArrayRef<unsigned> ImpDefs); 110 void MergeOpsUpdate(MachineBasicBlock &MBB, 111 MemOpQueue &MemOps, 112 unsigned memOpsBegin, 113 unsigned memOpsEnd, 114 unsigned insertAfter, 115 int Offset, 116 unsigned Base, 117 bool BaseKill, 118 unsigned Opcode, 119 ARMCC::CondCodes Pred, 120 unsigned PredReg, 121 unsigned Scratch, 122 DebugLoc dl, 123 SmallVectorImpl<MachineBasicBlock::iterator> &Merges); 124 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 125 unsigned Opcode, unsigned Size, 126 ARMCC::CondCodes Pred, unsigned PredReg, 127 unsigned Scratch, MemOpQueue &MemOps, 128 SmallVectorImpl<MachineBasicBlock::iterator> &Merges); 129 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 130 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 131 MachineBasicBlock::iterator &MBBI); 132 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 133 MachineBasicBlock::iterator MBBI, 134 const TargetInstrInfo *TII, 135 bool &Advance, 136 MachineBasicBlock::iterator &I); 137 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 138 MachineBasicBlock::iterator MBBI, 139 bool &Advance, 140 MachineBasicBlock::iterator &I); 141 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 142 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 143 }; 144 char ARMLoadStoreOpt::ID = 0; 145 } 146 147 static bool definesCPSR(const MachineInstr *MI) { 148 for (const auto &MO : MI->operands()) { 149 if (!MO.isReg()) 150 continue; 151 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 152 // If the instruction has live CPSR def, then it's not safe to fold it 153 // into load / store. 154 return true; 155 } 156 157 return false; 158 } 159 160 static int getMemoryOpOffset(const MachineInstr *MI) { 161 unsigned Opcode = MI->getOpcode(); 162 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 163 unsigned NumOperands = MI->getDesc().getNumOperands(); 164 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 165 166 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 167 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 168 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 169 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 170 return OffField; 171 172 // Thumb1 immediate offsets are scaled by 4 173 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi || 174 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) 175 return OffField * 4; 176 177 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 178 : ARM_AM::getAM5Offset(OffField) * 4; 179 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField) 180 : ARM_AM::getAM5Op(OffField); 181 182 if (Op == ARM_AM::sub) 183 return -Offset; 184 185 return Offset; 186 } 187 188 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) { 189 switch (Opcode) { 190 default: llvm_unreachable("Unhandled opcode!"); 191 case ARM::LDRi12: 192 ++NumLDMGened; 193 switch (Mode) { 194 default: llvm_unreachable("Unhandled submode!"); 195 case ARM_AM::ia: return ARM::LDMIA; 196 case ARM_AM::da: return ARM::LDMDA; 197 case ARM_AM::db: return ARM::LDMDB; 198 case ARM_AM::ib: return ARM::LDMIB; 199 } 200 case ARM::STRi12: 201 ++NumSTMGened; 202 switch (Mode) { 203 default: llvm_unreachable("Unhandled submode!"); 204 case ARM_AM::ia: return ARM::STMIA; 205 case ARM_AM::da: return ARM::STMDA; 206 case ARM_AM::db: return ARM::STMDB; 207 case ARM_AM::ib: return ARM::STMIB; 208 } 209 case ARM::tLDRi: 210 case ARM::tLDRspi: 211 // tLDMIA is writeback-only - unless the base register is in the input 212 // reglist. 213 ++NumLDMGened; 214 switch (Mode) { 215 default: llvm_unreachable("Unhandled submode!"); 216 case ARM_AM::ia: return ARM::tLDMIA; 217 } 218 case ARM::tSTRi: 219 case ARM::tSTRspi: 220 // There is no non-writeback tSTMIA either. 221 ++NumSTMGened; 222 switch (Mode) { 223 default: llvm_unreachable("Unhandled submode!"); 224 case ARM_AM::ia: return ARM::tSTMIA_UPD; 225 } 226 case ARM::t2LDRi8: 227 case ARM::t2LDRi12: 228 ++NumLDMGened; 229 switch (Mode) { 230 default: llvm_unreachable("Unhandled submode!"); 231 case ARM_AM::ia: return ARM::t2LDMIA; 232 case ARM_AM::db: return ARM::t2LDMDB; 233 } 234 case ARM::t2STRi8: 235 case ARM::t2STRi12: 236 ++NumSTMGened; 237 switch (Mode) { 238 default: llvm_unreachable("Unhandled submode!"); 239 case ARM_AM::ia: return ARM::t2STMIA; 240 case ARM_AM::db: return ARM::t2STMDB; 241 } 242 case ARM::VLDRS: 243 ++NumVLDMGened; 244 switch (Mode) { 245 default: llvm_unreachable("Unhandled submode!"); 246 case ARM_AM::ia: return ARM::VLDMSIA; 247 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 248 } 249 case ARM::VSTRS: 250 ++NumVSTMGened; 251 switch (Mode) { 252 default: llvm_unreachable("Unhandled submode!"); 253 case ARM_AM::ia: return ARM::VSTMSIA; 254 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 255 } 256 case ARM::VLDRD: 257 ++NumVLDMGened; 258 switch (Mode) { 259 default: llvm_unreachable("Unhandled submode!"); 260 case ARM_AM::ia: return ARM::VLDMDIA; 261 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 262 } 263 case ARM::VSTRD: 264 ++NumVSTMGened; 265 switch (Mode) { 266 default: llvm_unreachable("Unhandled submode!"); 267 case ARM_AM::ia: return ARM::VSTMDIA; 268 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 269 } 270 } 271 } 272 273 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) { 274 switch (Opcode) { 275 default: llvm_unreachable("Unhandled opcode!"); 276 case ARM::LDMIA_RET: 277 case ARM::LDMIA: 278 case ARM::LDMIA_UPD: 279 case ARM::STMIA: 280 case ARM::STMIA_UPD: 281 case ARM::tLDMIA: 282 case ARM::tLDMIA_UPD: 283 case ARM::tSTMIA_UPD: 284 case ARM::t2LDMIA_RET: 285 case ARM::t2LDMIA: 286 case ARM::t2LDMIA_UPD: 287 case ARM::t2STMIA: 288 case ARM::t2STMIA_UPD: 289 case ARM::VLDMSIA: 290 case ARM::VLDMSIA_UPD: 291 case ARM::VSTMSIA: 292 case ARM::VSTMSIA_UPD: 293 case ARM::VLDMDIA: 294 case ARM::VLDMDIA_UPD: 295 case ARM::VSTMDIA: 296 case ARM::VSTMDIA_UPD: 297 return ARM_AM::ia; 298 299 case ARM::LDMDA: 300 case ARM::LDMDA_UPD: 301 case ARM::STMDA: 302 case ARM::STMDA_UPD: 303 return ARM_AM::da; 304 305 case ARM::LDMDB: 306 case ARM::LDMDB_UPD: 307 case ARM::STMDB: 308 case ARM::STMDB_UPD: 309 case ARM::t2LDMDB: 310 case ARM::t2LDMDB_UPD: 311 case ARM::t2STMDB: 312 case ARM::t2STMDB_UPD: 313 case ARM::VLDMSDB_UPD: 314 case ARM::VSTMSDB_UPD: 315 case ARM::VLDMDDB_UPD: 316 case ARM::VSTMDDB_UPD: 317 return ARM_AM::db; 318 319 case ARM::LDMIB: 320 case ARM::LDMIB_UPD: 321 case ARM::STMIB: 322 case ARM::STMIB_UPD: 323 return ARM_AM::ib; 324 } 325 } 326 327 static bool isT1i32Load(unsigned Opc) { 328 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi; 329 } 330 331 static bool isT2i32Load(unsigned Opc) { 332 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 333 } 334 335 static bool isi32Load(unsigned Opc) { 336 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; 337 } 338 339 static bool isT1i32Store(unsigned Opc) { 340 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi; 341 } 342 343 static bool isT2i32Store(unsigned Opc) { 344 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 345 } 346 347 static bool isi32Store(unsigned Opc) { 348 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); 349 } 350 351 static unsigned getImmScale(unsigned Opc) { 352 switch (Opc) { 353 default: llvm_unreachable("Unhandled opcode!"); 354 case ARM::tLDRi: 355 case ARM::tSTRi: 356 case ARM::tLDRspi: 357 case ARM::tSTRspi: 358 return 1; 359 case ARM::tLDRHi: 360 case ARM::tSTRHi: 361 return 2; 362 case ARM::tLDRBi: 363 case ARM::tSTRBi: 364 return 4; 365 } 366 } 367 368 /// Update future uses of the base register with the offset introduced 369 /// due to writeback. This function only works on Thumb1. 370 void 371 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, 372 MachineBasicBlock::iterator MBBI, 373 DebugLoc dl, unsigned Base, 374 unsigned WordOffset, 375 ARMCC::CondCodes Pred, unsigned PredReg) { 376 assert(isThumb1 && "Can only update base register uses for Thumb1!"); 377 // Start updating any instructions with immediate offsets. Insert a SUB before 378 // the first non-updateable instruction (if any). 379 for (; MBBI != MBB.end(); ++MBBI) { 380 bool InsertSub = false; 381 unsigned Opc = MBBI->getOpcode(); 382 383 if (MBBI->readsRegister(Base)) { 384 int Offset; 385 bool IsLoad = 386 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi; 387 bool IsStore = 388 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi; 389 390 if (IsLoad || IsStore) { 391 // Loads and stores with immediate offsets can be updated, but only if 392 // the new offset isn't negative. 393 // The MachineOperand containing the offset immediate is the last one 394 // before predicates. 395 MachineOperand &MO = 396 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 397 // The offsets are scaled by 1, 2 or 4 depending on the Opcode. 398 Offset = MO.getImm() - WordOffset * getImmScale(Opc); 399 400 // If storing the base register, it needs to be reset first. 401 unsigned InstrSrcReg = MBBI->getOperand(0).getReg(); 402 403 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) 404 MO.setImm(Offset); 405 else 406 InsertSub = true; 407 408 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && 409 !definesCPSR(MBBI)) { 410 // SUBS/ADDS using this register, with a dead def of the CPSR. 411 // Merge it with the update; if the merged offset is too large, 412 // insert a new sub instead. 413 MachineOperand &MO = 414 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 415 Offset = (Opc == ARM::tSUBi8) ? 416 MO.getImm() + WordOffset * 4 : 417 MO.getImm() - WordOffset * 4 ; 418 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) { 419 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if 420 // Offset == 0. 421 MO.setImm(Offset); 422 // The base register has now been reset, so exit early. 423 return; 424 } else { 425 InsertSub = true; 426 } 427 428 } else { 429 // Can't update the instruction. 430 InsertSub = true; 431 } 432 433 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { 434 // Since SUBS sets the condition flags, we can't place the base reset 435 // after an instruction that has a live CPSR def. 436 // The base register might also contain an argument for a function call. 437 InsertSub = true; 438 } 439 440 if (InsertSub) { 441 // An instruction above couldn't be updated, so insert a sub. 442 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true) 443 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 444 return; 445 } 446 447 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base)) 448 // Register got killed. Stop updating. 449 return; 450 } 451 452 // End of block was reached. 453 if (MBB.succ_size() > 0) { 454 // FIXME: Because of a bug, live registers are sometimes missing from 455 // the successor blocks' live-in sets. This means we can't trust that 456 // information and *always* have to reset at the end of a block. 457 // See PR21029. 458 if (MBBI != MBB.end()) --MBBI; 459 AddDefaultT1CC( 460 BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true) 461 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 462 } 463 } 464 465 /// Create and insert a LDM or STM with Base as base register and registers in 466 /// Regs as the register operands that would be loaded / stored. It returns 467 /// true if the transformation is done. 468 bool 469 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 470 MachineBasicBlock::iterator MBBI, 471 int Offset, unsigned Base, bool BaseKill, 472 unsigned Opcode, ARMCC::CondCodes Pred, 473 unsigned PredReg, unsigned Scratch, DebugLoc dl, 474 ArrayRef<std::pair<unsigned, bool> > Regs, 475 ArrayRef<unsigned> ImpDefs) { 476 // Only a single register to load / store. Don't bother. 477 unsigned NumRegs = Regs.size(); 478 if (NumRegs <= 1) 479 return false; 480 481 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. 482 // Compute liveness information for that register to make the decision. 483 bool SafeToClobberCPSR = !isThumb1 || 484 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) == 485 MachineBasicBlock::LQR_Dead); 486 487 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. 488 489 // Exception: If the base register is in the input reglist, Thumb1 LDM is 490 // non-writeback. 491 // It's also not possible to merge an STR of the base register in Thumb1. 492 if (isThumb1) 493 for (const std::pair<unsigned, bool> &R : Regs) 494 if (Base == R.first) { 495 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); 496 if (Opcode == ARM::tLDRi) { 497 Writeback = false; 498 break; 499 } else if (Opcode == ARM::tSTRi) { 500 return false; 501 } 502 } 503 504 ARM_AM::AMSubMode Mode = ARM_AM::ia; 505 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. 506 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 507 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; 508 509 if (Offset == 4 && haveIBAndDA) { 510 Mode = ARM_AM::ib; 511 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { 512 Mode = ARM_AM::da; 513 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { 514 // VLDM/VSTM do not support DB mode without also updating the base reg. 515 Mode = ARM_AM::db; 516 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { 517 // Check if this is a supported opcode before inserting instructions to 518 // calculate a new base register. 519 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; 520 521 // If starting offset isn't zero, insert a MI to materialize a new base. 522 // But only do so if it is cost effective, i.e. merging more than two 523 // loads / stores. 524 if (NumRegs <= 2) 525 return false; 526 527 // On Thumb1, it's not worth materializing a new base register without 528 // clobbering the CPSR (i.e. not using ADDS/SUBS). 529 if (!SafeToClobberCPSR) 530 return false; 531 532 unsigned NewBase; 533 if (isi32Load(Opcode)) { 534 // If it is a load, then just use one of the destination register to 535 // use as the new base. 536 NewBase = Regs[NumRegs-1].first; 537 } else { 538 // Use the scratch register to use as a new base. 539 NewBase = Scratch; 540 if (NewBase == 0) 541 return false; 542 } 543 544 int BaseOpc = 545 isThumb2 ? ARM::t2ADDri : 546 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi : 547 (isThumb1 && Offset < 8) ? ARM::tADDi3 : 548 isThumb1 ? ARM::tADDi8 : ARM::ADDri; 549 550 if (Offset < 0) { 551 Offset = - Offset; 552 BaseOpc = 553 isThumb2 ? ARM::t2SUBri : 554 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 : 555 isThumb1 ? ARM::tSUBi8 : ARM::SUBri; 556 } 557 558 if (!TL->isLegalAddImmediate(Offset)) 559 // FIXME: Try add with register operand? 560 return false; // Probably not worth it then. 561 562 if (isThumb1) { 563 // Thumb1: depending on immediate size, use either 564 // ADDS NewBase, Base, #imm3 565 // or 566 // MOV NewBase, Base 567 // ADDS NewBase, #imm8. 568 if (Base != NewBase && 569 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) { 570 // Need to insert a MOV to the new base first. 571 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && 572 !STI->hasV6Ops()) { 573 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr 574 if (Pred != ARMCC::AL) 575 return false; 576 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase) 577 .addReg(Base, getKillRegState(BaseKill)); 578 } else 579 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) 580 .addReg(Base, getKillRegState(BaseKill)) 581 .addImm(Pred).addReg(PredReg); 582 583 // Set up BaseKill and Base correctly to insert the ADDS/SUBS below. 584 Base = NewBase; 585 BaseKill = false; 586 } 587 if (BaseOpc == ARM::tADDrSPi) { 588 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); 589 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 590 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4) 591 .addImm(Pred).addReg(PredReg); 592 } else 593 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true) 594 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 595 .addImm(Pred).addReg(PredReg); 596 } else { 597 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 598 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 599 .addImm(Pred).addReg(PredReg).addReg(0); 600 } 601 Base = NewBase; 602 BaseKill = true; // New base is always killed straight away. 603 } 604 605 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 606 Opcode == ARM::VLDRD); 607 608 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with 609 // base register writeback. 610 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 611 if (!Opcode) return false; 612 613 // Check if a Thumb1 LDM/STM merge is safe. This is the case if: 614 // - There is no writeback (LDM of base register), 615 // - the base register is killed by the merged instruction, 616 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS 617 // to reset the base register. 618 // Otherwise, don't merge. 619 // It's safe to return here since the code to materialize a new base register 620 // above is also conditional on SafeToClobberCPSR. 621 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) 622 return false; 623 624 MachineInstrBuilder MIB; 625 626 if (Writeback) { 627 if (Opcode == ARM::tLDMIA) 628 // Update tLDMIA with writeback if necessary. 629 Opcode = ARM::tLDMIA_UPD; 630 631 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); 632 633 // Thumb1: we might need to set base writeback when building the MI. 634 MIB.addReg(Base, getDefRegState(true)) 635 .addReg(Base, getKillRegState(BaseKill)); 636 637 // The base isn't dead after a merged instruction with writeback. 638 // Insert a sub instruction after the newly formed instruction to reset. 639 if (!BaseKill) 640 UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg); 641 642 } else { 643 // No writeback, simply build the MachineInstr. 644 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); 645 MIB.addReg(Base, getKillRegState(BaseKill)); 646 } 647 648 MIB.addImm(Pred).addReg(PredReg); 649 650 for (const std::pair<unsigned, bool> &R : Regs) 651 MIB = MIB.addReg(R.first, getDefRegState(isDef) 652 | getKillRegState(R.second)); 653 654 // Add implicit defs for super-registers. 655 for (unsigned ImpDef : ImpDefs) 656 MIB.addReg(ImpDef, RegState::ImplicitDefine); 657 658 return true; 659 } 660 661 /// Find all instructions using a given imp-def within a range. 662 /// 663 /// We are trying to combine a range of instructions, one of which (located at 664 /// position RangeBegin) implicitly defines a register. The final LDM/STM will 665 /// be placed at RangeEnd, and so any uses of this definition between RangeStart 666 /// and RangeEnd must be modified to use an undefined value. 667 /// 668 /// The live range continues until we find a second definition or one of the 669 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so 670 /// we must consider all uses and decide which are relevant in a second pass. 671 void ARMLoadStoreOpt::findUsesOfImpDef( 672 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps, 673 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) { 674 std::map<unsigned, MachineOperand *> Uses; 675 unsigned LastLivePos = RangeEnd; 676 677 // First we find all uses of this register with Position between RangeBegin 678 // and RangeEnd, any or all of these could be uses of a definition at 679 // RangeBegin. We also record the latest position a definition at RangeBegin 680 // would be considered live. 681 for (unsigned i = 0; i < MemOps.size(); ++i) { 682 MachineInstr &MI = *MemOps[i].MBBI; 683 unsigned MIPosition = MemOps[i].Position; 684 if (MIPosition <= RangeBegin || MIPosition > RangeEnd) 685 continue; 686 687 // If this instruction defines the register, then any later use will be of 688 // that definition rather than ours. 689 if (MI.definesRegister(DefReg)) 690 LastLivePos = std::min(LastLivePos, MIPosition); 691 692 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg); 693 if (!UseOp) 694 continue; 695 696 // If this instruction kills the register then (assuming liveness is 697 // correct when we start) we don't need to think about anything after here. 698 if (UseOp->isKill()) 699 LastLivePos = std::min(LastLivePos, MIPosition); 700 701 Uses[MIPosition] = UseOp; 702 } 703 704 // Now we traverse the list of all uses, and append the ones that actually use 705 // our definition to the requested list. 706 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(), 707 E = Uses.end(); 708 I != E; ++I) { 709 // List is sorted by position so once we've found one out of range there 710 // will be no more to consider. 711 if (I->first > LastLivePos) 712 break; 713 UsesOfImpDefs.push_back(I->second); 714 } 715 } 716 717 /// Call MergeOps and update MemOps and merges accordingly on success. 718 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 719 MemOpQueue &memOps, 720 unsigned memOpsBegin, unsigned memOpsEnd, 721 unsigned insertAfter, int Offset, 722 unsigned Base, bool BaseKill, 723 unsigned Opcode, 724 ARMCC::CondCodes Pred, unsigned PredReg, 725 unsigned Scratch, 726 DebugLoc dl, 727 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { 728 // First calculate which of the registers should be killed by the merged 729 // instruction. 730 const unsigned insertPos = memOps[insertAfter].Position; 731 SmallSet<unsigned, 4> KilledRegs; 732 DenseMap<unsigned, unsigned> Killer; 733 for (unsigned i = 0, e = memOps.size(); i != e; ++i) { 734 if (i == memOpsBegin) { 735 i = memOpsEnd; 736 if (i == e) 737 break; 738 } 739 if (memOps[i].Position < insertPos && memOps[i].isKill) { 740 unsigned Reg = memOps[i].Reg; 741 KilledRegs.insert(Reg); 742 Killer[Reg] = i; 743 } 744 } 745 746 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 747 MachineOperand &TransferOp = memOps[i].MBBI->getOperand(0); 748 if (TransferOp.isUse() && TransferOp.getReg() == Base) 749 BaseKill = false; 750 } 751 752 SmallVector<std::pair<unsigned, bool>, 8> Regs; 753 SmallVector<unsigned, 8> ImpDefs; 754 SmallVector<MachineOperand *, 8> UsesOfImpDefs; 755 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 756 unsigned Reg = memOps[i].Reg; 757 // If we are inserting the merged operation after an operation that 758 // uses the same register, make sure to transfer any kill flag. 759 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 760 Regs.push_back(std::make_pair(Reg, isKill)); 761 762 // Collect any implicit defs of super-registers. They must be preserved. 763 for (const MachineOperand &MO : memOps[i].MBBI->operands()) { 764 if (!MO.isReg() || !MO.isDef() || !MO.isImplicit() || MO.isDead()) 765 continue; 766 unsigned DefReg = MO.getReg(); 767 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end()) 768 ImpDefs.push_back(DefReg); 769 770 // There may be other uses of the definition between this instruction and 771 // the eventual LDM/STM position. These should be marked undef if the 772 // merge takes place. 773 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position, 774 insertPos); 775 } 776 } 777 778 // Try to do the merge. 779 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 780 ++Loc; 781 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 782 Pred, PredReg, Scratch, dl, Regs, ImpDefs)) 783 return; 784 785 // Merge succeeded, update records. 786 Merges.push_back(std::prev(Loc)); 787 788 // In gathering loads together, we may have moved the imp-def of a register 789 // past one of its uses. This is OK, since we know better than the rest of 790 // LLVM what's OK with ARM loads and stores; but we still have to adjust the 791 // affected uses. 792 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(), 793 E = UsesOfImpDefs.end(); 794 I != E; ++I) 795 (*I)->setIsUndef(); 796 797 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 798 // Remove kill flags from any memops that come before insertPos. 799 if (Regs[i-memOpsBegin].second) { 800 unsigned Reg = Regs[i-memOpsBegin].first; 801 if (KilledRegs.count(Reg)) { 802 unsigned j = Killer[Reg]; 803 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true); 804 assert(Idx >= 0 && "Cannot find killing operand"); 805 memOps[j].MBBI->getOperand(Idx).setIsKill(false); 806 memOps[j].isKill = false; 807 } 808 memOps[i].isKill = true; 809 } 810 MBB.erase(memOps[i].MBBI); 811 // Update this memop to refer to the merged instruction. 812 // We may need to move kill flags again. 813 memOps[i].Merged = true; 814 memOps[i].MBBI = Merges.back(); 815 memOps[i].Position = insertPos; 816 } 817 818 // Update memOps offsets, since they may have been modified by MergeOps. 819 for (auto &MemOp : memOps) { 820 MemOp.Offset = getMemoryOpOffset(MemOp.MBBI); 821 } 822 } 823 824 /// Merge a number of load / store instructions into one or more load / store 825 /// multiple instructions. 826 void 827 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 828 unsigned Base, unsigned Opcode, unsigned Size, 829 ARMCC::CondCodes Pred, unsigned PredReg, 830 unsigned Scratch, MemOpQueue &MemOps, 831 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { 832 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 833 int Offset = MemOps[SIndex].Offset; 834 int SOffset = Offset; 835 unsigned insertAfter = SIndex; 836 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 837 DebugLoc dl = Loc->getDebugLoc(); 838 const MachineOperand &PMO = Loc->getOperand(0); 839 unsigned PReg = PMO.getReg(); 840 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 841 unsigned Count = 1; 842 unsigned Limit = ~0U; 843 bool BaseKill = false; 844 // vldm / vstm limit are 32 for S variants, 16 for D variants. 845 846 switch (Opcode) { 847 default: break; 848 case ARM::VSTRS: 849 Limit = 32; 850 break; 851 case ARM::VSTRD: 852 Limit = 16; 853 break; 854 case ARM::VLDRD: 855 Limit = 16; 856 break; 857 case ARM::VLDRS: 858 Limit = 32; 859 break; 860 } 861 862 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 863 int NewOffset = MemOps[i].Offset; 864 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 865 unsigned Reg = MO.getReg(); 866 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 867 // Register numbers must be in ascending order. For VFP / NEON load and 868 // store multiples, the registers must also be consecutive and within the 869 // limit on the number of registers per instruction. 870 if (Reg != ARM::SP && 871 NewOffset == Offset + (int)Size && 872 ((isNotVFP && RegNum > PRegNum) || 873 ((Count < Limit) && RegNum == PRegNum+1)) && 874 // On Swift we don't want vldm/vstm to start with a odd register num 875 // because Q register unaligned vldm/vstm need more uops. 876 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) { 877 Offset += Size; 878 PRegNum = RegNum; 879 ++Count; 880 } else { 881 // Can't merge this in. Try merge the earlier ones first. 882 // We need to compute BaseKill here because the MemOps may have been 883 // reordered. 884 BaseKill = Loc->killsRegister(Base); 885 886 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base, 887 BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 888 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 889 MemOps, Merges); 890 return; 891 } 892 893 if (MemOps[i].Position > MemOps[insertAfter].Position) { 894 insertAfter = i; 895 Loc = MemOps[i].MBBI; 896 } 897 } 898 899 BaseKill = Loc->killsRegister(Base); 900 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 901 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 902 } 903 904 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 905 unsigned Bytes, unsigned Limit, 906 ARMCC::CondCodes Pred, unsigned PredReg) { 907 unsigned MyPredReg = 0; 908 if (!MI) 909 return false; 910 911 bool CheckCPSRDef = false; 912 switch (MI->getOpcode()) { 913 default: return false; 914 case ARM::tSUBi8: 915 case ARM::t2SUBri: 916 case ARM::SUBri: 917 CheckCPSRDef = true; 918 break; 919 case ARM::tSUBspi: 920 break; 921 } 922 923 // Make sure the offset fits in 8 bits. 924 if (Bytes == 0 || (Limit && Bytes >= Limit)) 925 return false; 926 927 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi || 928 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME 929 if (!(MI->getOperand(0).getReg() == Base && 930 MI->getOperand(1).getReg() == Base && 931 (MI->getOperand(2).getImm() * Scale) == Bytes && 932 getInstrPredicate(MI, MyPredReg) == Pred && 933 MyPredReg == PredReg)) 934 return false; 935 936 return CheckCPSRDef ? !definesCPSR(MI) : true; 937 } 938 939 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 940 unsigned Bytes, unsigned Limit, 941 ARMCC::CondCodes Pred, unsigned PredReg) { 942 unsigned MyPredReg = 0; 943 if (!MI) 944 return false; 945 946 bool CheckCPSRDef = false; 947 switch (MI->getOpcode()) { 948 default: return false; 949 case ARM::tADDi8: 950 case ARM::t2ADDri: 951 case ARM::ADDri: 952 CheckCPSRDef = true; 953 break; 954 case ARM::tADDspi: 955 break; 956 } 957 958 if (Bytes == 0 || (Limit && Bytes >= Limit)) 959 // Make sure the offset fits in 8 bits. 960 return false; 961 962 unsigned Scale = (MI->getOpcode() == ARM::tADDspi || 963 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME 964 if (!(MI->getOperand(0).getReg() == Base && 965 MI->getOperand(1).getReg() == Base && 966 (MI->getOperand(2).getImm() * Scale) == Bytes && 967 getInstrPredicate(MI, MyPredReg) == Pred && 968 MyPredReg == PredReg)) 969 return false; 970 971 return CheckCPSRDef ? !definesCPSR(MI) : true; 972 } 973 974 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 975 switch (MI->getOpcode()) { 976 default: return 0; 977 case ARM::LDRi12: 978 case ARM::STRi12: 979 case ARM::tLDRi: 980 case ARM::tSTRi: 981 case ARM::tLDRspi: 982 case ARM::tSTRspi: 983 case ARM::t2LDRi8: 984 case ARM::t2LDRi12: 985 case ARM::t2STRi8: 986 case ARM::t2STRi12: 987 case ARM::VLDRS: 988 case ARM::VSTRS: 989 return 4; 990 case ARM::VLDRD: 991 case ARM::VSTRD: 992 return 8; 993 case ARM::LDMIA: 994 case ARM::LDMDA: 995 case ARM::LDMDB: 996 case ARM::LDMIB: 997 case ARM::STMIA: 998 case ARM::STMDA: 999 case ARM::STMDB: 1000 case ARM::STMIB: 1001 case ARM::tLDMIA: 1002 case ARM::tLDMIA_UPD: 1003 case ARM::tSTMIA_UPD: 1004 case ARM::t2LDMIA: 1005 case ARM::t2LDMDB: 1006 case ARM::t2STMIA: 1007 case ARM::t2STMDB: 1008 case ARM::VLDMSIA: 1009 case ARM::VSTMSIA: 1010 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 1011 case ARM::VLDMDIA: 1012 case ARM::VSTMDIA: 1013 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 1014 } 1015 } 1016 1017 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 1018 ARM_AM::AMSubMode Mode) { 1019 switch (Opc) { 1020 default: llvm_unreachable("Unhandled opcode!"); 1021 case ARM::LDMIA: 1022 case ARM::LDMDA: 1023 case ARM::LDMDB: 1024 case ARM::LDMIB: 1025 switch (Mode) { 1026 default: llvm_unreachable("Unhandled submode!"); 1027 case ARM_AM::ia: return ARM::LDMIA_UPD; 1028 case ARM_AM::ib: return ARM::LDMIB_UPD; 1029 case ARM_AM::da: return ARM::LDMDA_UPD; 1030 case ARM_AM::db: return ARM::LDMDB_UPD; 1031 } 1032 case ARM::STMIA: 1033 case ARM::STMDA: 1034 case ARM::STMDB: 1035 case ARM::STMIB: 1036 switch (Mode) { 1037 default: llvm_unreachable("Unhandled submode!"); 1038 case ARM_AM::ia: return ARM::STMIA_UPD; 1039 case ARM_AM::ib: return ARM::STMIB_UPD; 1040 case ARM_AM::da: return ARM::STMDA_UPD; 1041 case ARM_AM::db: return ARM::STMDB_UPD; 1042 } 1043 case ARM::t2LDMIA: 1044 case ARM::t2LDMDB: 1045 switch (Mode) { 1046 default: llvm_unreachable("Unhandled submode!"); 1047 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 1048 case ARM_AM::db: return ARM::t2LDMDB_UPD; 1049 } 1050 case ARM::t2STMIA: 1051 case ARM::t2STMDB: 1052 switch (Mode) { 1053 default: llvm_unreachable("Unhandled submode!"); 1054 case ARM_AM::ia: return ARM::t2STMIA_UPD; 1055 case ARM_AM::db: return ARM::t2STMDB_UPD; 1056 } 1057 case ARM::VLDMSIA: 1058 switch (Mode) { 1059 default: llvm_unreachable("Unhandled submode!"); 1060 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 1061 case ARM_AM::db: return ARM::VLDMSDB_UPD; 1062 } 1063 case ARM::VLDMDIA: 1064 switch (Mode) { 1065 default: llvm_unreachable("Unhandled submode!"); 1066 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 1067 case ARM_AM::db: return ARM::VLDMDDB_UPD; 1068 } 1069 case ARM::VSTMSIA: 1070 switch (Mode) { 1071 default: llvm_unreachable("Unhandled submode!"); 1072 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 1073 case ARM_AM::db: return ARM::VSTMSDB_UPD; 1074 } 1075 case ARM::VSTMDIA: 1076 switch (Mode) { 1077 default: llvm_unreachable("Unhandled submode!"); 1078 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 1079 case ARM_AM::db: return ARM::VSTMDDB_UPD; 1080 } 1081 } 1082 } 1083 1084 /// Fold proceeding/trailing inc/dec of base register into the 1085 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 1086 /// 1087 /// stmia rn, <ra, rb, rc> 1088 /// rn := rn + 4 * 3; 1089 /// => 1090 /// stmia rn!, <ra, rb, rc> 1091 /// 1092 /// rn := rn - 4 * 3; 1093 /// ldmia rn, <ra, rb, rc> 1094 /// => 1095 /// ldmdb rn!, <ra, rb, rc> 1096 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 1097 MachineBasicBlock::iterator MBBI, 1098 bool &Advance, 1099 MachineBasicBlock::iterator &I) { 1100 // Thumb1 is already using updating loads/stores. 1101 if (isThumb1) return false; 1102 1103 MachineInstr *MI = MBBI; 1104 unsigned Base = MI->getOperand(0).getReg(); 1105 bool BaseKill = MI->getOperand(0).isKill(); 1106 unsigned Bytes = getLSMultipleTransferSize(MI); 1107 unsigned PredReg = 0; 1108 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1109 unsigned Opcode = MI->getOpcode(); 1110 DebugLoc dl = MI->getDebugLoc(); 1111 1112 // Can't use an updating ld/st if the base register is also a dest 1113 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 1114 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 1115 if (MI->getOperand(i).getReg() == Base) 1116 return false; 1117 1118 bool DoMerge = false; 1119 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode); 1120 1121 // Try merging with the previous instruction. 1122 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1123 if (MBBI != BeginMBBI) { 1124 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1125 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 1126 --PrevMBBI; 1127 if (Mode == ARM_AM::ia && 1128 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 1129 Mode = ARM_AM::db; 1130 DoMerge = true; 1131 } else if (Mode == ARM_AM::ib && 1132 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 1133 Mode = ARM_AM::da; 1134 DoMerge = true; 1135 } 1136 if (DoMerge) 1137 MBB.erase(PrevMBBI); 1138 } 1139 1140 // Try merging with the next instruction. 1141 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1142 if (!DoMerge && MBBI != EndMBBI) { 1143 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1144 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1145 ++NextMBBI; 1146 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 1147 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 1148 DoMerge = true; 1149 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 1150 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 1151 DoMerge = true; 1152 } 1153 if (DoMerge) { 1154 if (NextMBBI == I) { 1155 Advance = true; 1156 ++I; 1157 } 1158 MBB.erase(NextMBBI); 1159 } 1160 } 1161 1162 if (!DoMerge) 1163 return false; 1164 1165 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 1166 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 1167 .addReg(Base, getDefRegState(true)) // WB base register 1168 .addReg(Base, getKillRegState(BaseKill)) 1169 .addImm(Pred).addReg(PredReg); 1170 1171 // Transfer the rest of operands. 1172 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 1173 MIB.addOperand(MI->getOperand(OpNum)); 1174 1175 // Transfer memoperands. 1176 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 1177 1178 MBB.erase(MBBI); 1179 return true; 1180 } 1181 1182 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 1183 ARM_AM::AddrOpc Mode) { 1184 switch (Opc) { 1185 case ARM::LDRi12: 1186 return ARM::LDR_PRE_IMM; 1187 case ARM::STRi12: 1188 return ARM::STR_PRE_IMM; 1189 case ARM::VLDRS: 1190 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1191 case ARM::VLDRD: 1192 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1193 case ARM::VSTRS: 1194 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1195 case ARM::VSTRD: 1196 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1197 case ARM::t2LDRi8: 1198 case ARM::t2LDRi12: 1199 return ARM::t2LDR_PRE; 1200 case ARM::t2STRi8: 1201 case ARM::t2STRi12: 1202 return ARM::t2STR_PRE; 1203 default: llvm_unreachable("Unhandled opcode!"); 1204 } 1205 } 1206 1207 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 1208 ARM_AM::AddrOpc Mode) { 1209 switch (Opc) { 1210 case ARM::LDRi12: 1211 return ARM::LDR_POST_IMM; 1212 case ARM::STRi12: 1213 return ARM::STR_POST_IMM; 1214 case ARM::VLDRS: 1215 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1216 case ARM::VLDRD: 1217 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1218 case ARM::VSTRS: 1219 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1220 case ARM::VSTRD: 1221 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1222 case ARM::t2LDRi8: 1223 case ARM::t2LDRi12: 1224 return ARM::t2LDR_POST; 1225 case ARM::t2STRi8: 1226 case ARM::t2STRi12: 1227 return ARM::t2STR_POST; 1228 default: llvm_unreachable("Unhandled opcode!"); 1229 } 1230 } 1231 1232 /// Fold proceeding/trailing inc/dec of base register into the 1233 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible: 1234 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 1235 MachineBasicBlock::iterator MBBI, 1236 const TargetInstrInfo *TII, 1237 bool &Advance, 1238 MachineBasicBlock::iterator &I) { 1239 // Thumb1 doesn't have updating LDR/STR. 1240 // FIXME: Use LDM/STM with single register instead. 1241 if (isThumb1) return false; 1242 1243 MachineInstr *MI = MBBI; 1244 unsigned Base = MI->getOperand(1).getReg(); 1245 bool BaseKill = MI->getOperand(1).isKill(); 1246 unsigned Bytes = getLSMultipleTransferSize(MI); 1247 unsigned Opcode = MI->getOpcode(); 1248 DebugLoc dl = MI->getDebugLoc(); 1249 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 1250 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 1251 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 1252 if (isi32Load(Opcode) || isi32Store(Opcode)) 1253 if (MI->getOperand(2).getImm() != 0) 1254 return false; 1255 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 1256 return false; 1257 1258 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 1259 // Can't do the merge if the destination register is the same as the would-be 1260 // writeback register. 1261 if (MI->getOperand(0).getReg() == Base) 1262 return false; 1263 1264 unsigned PredReg = 0; 1265 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1266 bool DoMerge = false; 1267 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1268 unsigned NewOpc = 0; 1269 // AM2 - 12 bits, thumb2 - 8 bits. 1270 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 1271 1272 // Try merging with the previous instruction. 1273 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1274 if (MBBI != BeginMBBI) { 1275 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1276 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 1277 --PrevMBBI; 1278 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 1279 DoMerge = true; 1280 AddSub = ARM_AM::sub; 1281 } else if (!isAM5 && 1282 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 1283 DoMerge = true; 1284 } 1285 if (DoMerge) { 1286 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 1287 MBB.erase(PrevMBBI); 1288 } 1289 } 1290 1291 // Try merging with the next instruction. 1292 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1293 if (!DoMerge && MBBI != EndMBBI) { 1294 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1295 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1296 ++NextMBBI; 1297 if (!isAM5 && 1298 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 1299 DoMerge = true; 1300 AddSub = ARM_AM::sub; 1301 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 1302 DoMerge = true; 1303 } 1304 if (DoMerge) { 1305 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 1306 if (NextMBBI == I) { 1307 Advance = true; 1308 ++I; 1309 } 1310 MBB.erase(NextMBBI); 1311 } 1312 } 1313 1314 if (!DoMerge) 1315 return false; 1316 1317 if (isAM5) { 1318 // VLDM[SD]_UPD, VSTM[SD]_UPD 1319 // (There are no base-updating versions of VLDR/VSTR instructions, but the 1320 // updating load/store-multiple instructions can be used with only one 1321 // register.) 1322 MachineOperand &MO = MI->getOperand(0); 1323 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 1324 .addReg(Base, getDefRegState(true)) // WB base register 1325 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 1326 .addImm(Pred).addReg(PredReg) 1327 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 1328 getKillRegState(MO.isKill()))); 1329 } else if (isLd) { 1330 if (isAM2) { 1331 // LDR_PRE, LDR_POST 1332 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 1333 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1334 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1335 .addReg(Base, RegState::Define) 1336 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1337 } else { 1338 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1339 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1340 .addReg(Base, RegState::Define) 1341 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1342 } 1343 } else { 1344 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1345 // t2LDR_PRE, t2LDR_POST 1346 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1347 .addReg(Base, RegState::Define) 1348 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1349 } 1350 } else { 1351 MachineOperand &MO = MI->getOperand(0); 1352 // FIXME: post-indexed stores use am2offset_imm, which still encodes 1353 // the vestigal zero-reg offset register. When that's fixed, this clause 1354 // can be removed entirely. 1355 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 1356 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1357 // STR_PRE, STR_POST 1358 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 1359 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1360 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1361 } else { 1362 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1363 // t2STR_PRE, t2STR_POST 1364 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 1365 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1366 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1367 } 1368 } 1369 MBB.erase(MBBI); 1370 1371 return true; 1372 } 1373 1374 /// Returns true if instruction is a memory operation that this pass is capable 1375 /// of operating on. 1376 static bool isMemoryOp(const MachineInstr *MI) { 1377 // When no memory operands are present, conservatively assume unaligned, 1378 // volatile, unfoldable. 1379 if (!MI->hasOneMemOperand()) 1380 return false; 1381 1382 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1383 1384 // Don't touch volatile memory accesses - we may be changing their order. 1385 if (MMO->isVolatile()) 1386 return false; 1387 1388 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 1389 // not. 1390 if (MMO->getAlignment() < 4) 1391 return false; 1392 1393 // str <undef> could probably be eliminated entirely, but for now we just want 1394 // to avoid making a mess of it. 1395 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1396 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 1397 MI->getOperand(0).isUndef()) 1398 return false; 1399 1400 // Likewise don't mess with references to undefined addresses. 1401 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 1402 MI->getOperand(1).isUndef()) 1403 return false; 1404 1405 unsigned Opcode = MI->getOpcode(); 1406 switch (Opcode) { 1407 default: break; 1408 case ARM::VLDRS: 1409 case ARM::VSTRS: 1410 return MI->getOperand(1).isReg(); 1411 case ARM::VLDRD: 1412 case ARM::VSTRD: 1413 return MI->getOperand(1).isReg(); 1414 case ARM::LDRi12: 1415 case ARM::STRi12: 1416 case ARM::tLDRi: 1417 case ARM::tSTRi: 1418 case ARM::tLDRspi: 1419 case ARM::tSTRspi: 1420 case ARM::t2LDRi8: 1421 case ARM::t2LDRi12: 1422 case ARM::t2STRi8: 1423 case ARM::t2STRi12: 1424 return MI->getOperand(1).isReg(); 1425 } 1426 return false; 1427 } 1428 1429 /// Advance register scavenger to just before the earliest memory op that is 1430 /// being merged. 1431 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 1432 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 1433 unsigned Position = MemOps[0].Position; 1434 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 1435 if (MemOps[i].Position < Position) { 1436 Position = MemOps[i].Position; 1437 Loc = MemOps[i].MBBI; 1438 } 1439 } 1440 1441 if (Loc != MBB.begin()) 1442 RS->forward(std::prev(Loc)); 1443 } 1444 1445 static void InsertLDR_STR(MachineBasicBlock &MBB, 1446 MachineBasicBlock::iterator &MBBI, 1447 int Offset, bool isDef, 1448 DebugLoc dl, unsigned NewOpc, 1449 unsigned Reg, bool RegDeadKill, bool RegUndef, 1450 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1451 bool OffKill, bool OffUndef, 1452 ARMCC::CondCodes Pred, unsigned PredReg, 1453 const TargetInstrInfo *TII, bool isT2) { 1454 if (isDef) { 1455 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1456 TII->get(NewOpc)) 1457 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1458 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1459 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1460 } else { 1461 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1462 TII->get(NewOpc)) 1463 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1464 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1465 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1466 } 1467 } 1468 1469 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1470 MachineBasicBlock::iterator &MBBI) { 1471 MachineInstr *MI = &*MBBI; 1472 unsigned Opcode = MI->getOpcode(); 1473 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8) 1474 return false; 1475 1476 const MachineOperand &BaseOp = MI->getOperand(2); 1477 unsigned BaseReg = BaseOp.getReg(); 1478 unsigned EvenReg = MI->getOperand(0).getReg(); 1479 unsigned OddReg = MI->getOperand(1).getReg(); 1480 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1481 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1482 1483 // ARM errata 602117: LDRD with base in list may result in incorrect base 1484 // register when interrupted or faulted. 1485 bool Errata602117 = EvenReg == BaseReg && 1486 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3(); 1487 // ARM LDRD/STRD needs consecutive registers. 1488 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) && 1489 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum); 1490 1491 if (!Errata602117 && !NonConsecutiveRegs) 1492 return false; 1493 1494 MachineBasicBlock::iterator NewBBI = MBBI; 1495 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1496 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1497 bool EvenDeadKill = isLd ? 1498 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1499 bool EvenUndef = MI->getOperand(0).isUndef(); 1500 bool OddDeadKill = isLd ? 1501 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1502 bool OddUndef = MI->getOperand(1).isUndef(); 1503 bool BaseKill = BaseOp.isKill(); 1504 bool BaseUndef = BaseOp.isUndef(); 1505 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1506 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1507 int OffImm = getMemoryOpOffset(MI); 1508 unsigned PredReg = 0; 1509 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1510 1511 if (OddRegNum > EvenRegNum && OffImm == 0) { 1512 // Ascending register numbers and no offset. It's safe to change it to a 1513 // ldm or stm. 1514 unsigned NewOpc = (isLd) 1515 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1516 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1517 if (isLd) { 1518 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1519 .addReg(BaseReg, getKillRegState(BaseKill)) 1520 .addImm(Pred).addReg(PredReg) 1521 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1522 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1523 ++NumLDRD2LDM; 1524 } else { 1525 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1526 .addReg(BaseReg, getKillRegState(BaseKill)) 1527 .addImm(Pred).addReg(PredReg) 1528 .addReg(EvenReg, 1529 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1530 .addReg(OddReg, 1531 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1532 ++NumSTRD2STM; 1533 } 1534 NewBBI = std::prev(MBBI); 1535 } else { 1536 // Split into two instructions. 1537 unsigned NewOpc = (isLd) 1538 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1539 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1540 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1541 // so adjust and use t2LDRi12 here for that. 1542 unsigned NewOpc2 = (isLd) 1543 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1544 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1545 DebugLoc dl = MBBI->getDebugLoc(); 1546 // If this is a load and base register is killed, it may have been 1547 // re-defed by the load, make sure the first load does not clobber it. 1548 if (isLd && 1549 (BaseKill || OffKill) && 1550 (TRI->regsOverlap(EvenReg, BaseReg))) { 1551 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1552 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1553 OddReg, OddDeadKill, false, 1554 BaseReg, false, BaseUndef, false, OffUndef, 1555 Pred, PredReg, TII, isT2); 1556 NewBBI = std::prev(MBBI); 1557 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1558 EvenReg, EvenDeadKill, false, 1559 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1560 Pred, PredReg, TII, isT2); 1561 } else { 1562 if (OddReg == EvenReg && EvenDeadKill) { 1563 // If the two source operands are the same, the kill marker is 1564 // probably on the first one. e.g. 1565 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1566 EvenDeadKill = false; 1567 OddDeadKill = true; 1568 } 1569 // Never kill the base register in the first instruction. 1570 if (EvenReg == BaseReg) 1571 EvenDeadKill = false; 1572 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1573 EvenReg, EvenDeadKill, EvenUndef, 1574 BaseReg, false, BaseUndef, false, OffUndef, 1575 Pred, PredReg, TII, isT2); 1576 NewBBI = std::prev(MBBI); 1577 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1578 OddReg, OddDeadKill, OddUndef, 1579 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1580 Pred, PredReg, TII, isT2); 1581 } 1582 if (isLd) 1583 ++NumLDRD2LDR; 1584 else 1585 ++NumSTRD2STR; 1586 } 1587 1588 MBB.erase(MI); 1589 MBBI = NewBBI; 1590 return true; 1591 } 1592 1593 /// An optimization pass to turn multiple LDR / STR ops of the same base and 1594 /// incrementing offset into LDM / STM ops. 1595 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1596 unsigned NumMerges = 0; 1597 unsigned NumMemOps = 0; 1598 MemOpQueue MemOps; 1599 unsigned CurrBase = 0; 1600 unsigned CurrOpc = ~0u; 1601 unsigned CurrSize = 0; 1602 ARMCC::CondCodes CurrPred = ARMCC::AL; 1603 unsigned CurrPredReg = 0; 1604 unsigned Position = 0; 1605 SmallVector<MachineBasicBlock::iterator,4> Merges; 1606 1607 RS->enterBasicBlock(&MBB); 1608 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1609 while (MBBI != E) { 1610 if (FixInvalidRegPairOp(MBB, MBBI)) 1611 continue; 1612 1613 bool Advance = false; 1614 bool TryMerge = false; 1615 1616 bool isMemOp = isMemoryOp(MBBI); 1617 if (isMemOp) { 1618 unsigned Opcode = MBBI->getOpcode(); 1619 unsigned Size = getLSMultipleTransferSize(MBBI); 1620 const MachineOperand &MO = MBBI->getOperand(0); 1621 unsigned Reg = MO.getReg(); 1622 bool isKill = MO.isDef() ? false : MO.isKill(); 1623 unsigned Base = MBBI->getOperand(1).getReg(); 1624 unsigned PredReg = 0; 1625 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 1626 int Offset = getMemoryOpOffset(MBBI); 1627 // Watch out for: 1628 // r4 := ldr [r5] 1629 // r5 := ldr [r5, #4] 1630 // r6 := ldr [r5, #8] 1631 // 1632 // The second ldr has effectively broken the chain even though it 1633 // looks like the later ldr(s) use the same base register. Try to 1634 // merge the ldr's so far, including this one. But don't try to 1635 // combine the following ldr(s). 1636 bool Clobber = isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg(); 1637 1638 // Watch out for: 1639 // r4 := ldr [r0, #8] 1640 // r4 := ldr [r0, #4] 1641 // 1642 // The optimization may reorder the second ldr in front of the first 1643 // ldr, which violates write after write(WAW) dependence. The same as 1644 // str. Try to merge inst(s) already in MemOps. 1645 bool Overlap = false; 1646 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) { 1647 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) { 1648 Overlap = true; 1649 break; 1650 } 1651 } 1652 1653 if (CurrBase == 0 && !Clobber) { 1654 // Start of a new chain. 1655 CurrBase = Base; 1656 CurrOpc = Opcode; 1657 CurrSize = Size; 1658 CurrPred = Pred; 1659 CurrPredReg = PredReg; 1660 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 1661 ++NumMemOps; 1662 Advance = true; 1663 } else if (!Overlap) { 1664 if (Clobber) { 1665 TryMerge = true; 1666 Advance = true; 1667 } 1668 1669 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1670 // No need to match PredReg. 1671 // Continue adding to the queue. 1672 if (Offset > MemOps.back().Offset) { 1673 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, 1674 Position, MBBI)); 1675 ++NumMemOps; 1676 Advance = true; 1677 } else { 1678 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1679 I != E; ++I) { 1680 if (Offset < I->Offset) { 1681 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, 1682 Position, MBBI)); 1683 ++NumMemOps; 1684 Advance = true; 1685 break; 1686 } else if (Offset == I->Offset) { 1687 // Collision! This can't be merged! 1688 break; 1689 } 1690 } 1691 } 1692 } 1693 } 1694 } 1695 1696 if (MBBI->isDebugValue()) { 1697 ++MBBI; 1698 if (MBBI == E) 1699 // Reach the end of the block, try merging the memory instructions. 1700 TryMerge = true; 1701 } else if (Advance) { 1702 ++Position; 1703 ++MBBI; 1704 if (MBBI == E) 1705 // Reach the end of the block, try merging the memory instructions. 1706 TryMerge = true; 1707 } else { 1708 TryMerge = true; 1709 } 1710 1711 if (TryMerge) { 1712 if (NumMemOps > 1) { 1713 // Try to find a free register to use as a new base in case it's needed. 1714 // First advance to the instruction just before the start of the chain. 1715 AdvanceRS(MBB, MemOps); 1716 1717 // Find a scratch register. 1718 unsigned Scratch = 1719 RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass); 1720 1721 // Process the load / store instructions. 1722 RS->forward(std::prev(MBBI)); 1723 1724 // Merge ops. 1725 Merges.clear(); 1726 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1727 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1728 1729 // Try folding preceding/trailing base inc/dec into the generated 1730 // LDM/STM ops. 1731 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1732 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1733 ++NumMerges; 1734 NumMerges += Merges.size(); 1735 1736 // Try folding preceding/trailing base inc/dec into those load/store 1737 // that were not merged to form LDM/STM ops. 1738 for (unsigned i = 0; i != NumMemOps; ++i) 1739 if (!MemOps[i].Merged) 1740 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1741 ++NumMerges; 1742 1743 // RS may be pointing to an instruction that's deleted. 1744 RS->skipTo(std::prev(MBBI)); 1745 } else if (NumMemOps == 1) { 1746 // Try folding preceding/trailing base inc/dec into the single 1747 // load/store. 1748 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1749 ++NumMerges; 1750 RS->forward(std::prev(MBBI)); 1751 } 1752 } 1753 1754 CurrBase = 0; 1755 CurrOpc = ~0u; 1756 CurrSize = 0; 1757 CurrPred = ARMCC::AL; 1758 CurrPredReg = 0; 1759 if (NumMemOps) { 1760 MemOps.clear(); 1761 NumMemOps = 0; 1762 } 1763 1764 // If iterator hasn't been advanced and this is not a memory op, skip it. 1765 // It can't start a new chain anyway. 1766 if (!Advance && !isMemOp && MBBI != E) { 1767 ++Position; 1768 ++MBBI; 1769 } 1770 } 1771 } 1772 return NumMerges > 0; 1773 } 1774 1775 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr") 1776 /// into the preceding stack restore so it directly restore the value of LR 1777 /// into pc. 1778 /// ldmfd sp!, {..., lr} 1779 /// bx lr 1780 /// or 1781 /// ldmfd sp!, {..., lr} 1782 /// mov pc, lr 1783 /// => 1784 /// ldmfd sp!, {..., pc} 1785 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1786 // Thumb1 LDM doesn't allow high registers. 1787 if (isThumb1) return false; 1788 if (MBB.empty()) return false; 1789 1790 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1791 if (MBBI != MBB.begin() && 1792 (MBBI->getOpcode() == ARM::BX_RET || 1793 MBBI->getOpcode() == ARM::tBX_RET || 1794 MBBI->getOpcode() == ARM::MOVPCLR)) { 1795 MachineInstr *PrevMI = std::prev(MBBI); 1796 unsigned Opcode = PrevMI->getOpcode(); 1797 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1798 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1799 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1800 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1801 if (MO.getReg() != ARM::LR) 1802 return false; 1803 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1804 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1805 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1806 PrevMI->setDesc(TII->get(NewOpc)); 1807 MO.setReg(ARM::PC); 1808 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); 1809 MBB.erase(MBBI); 1810 return true; 1811 } 1812 } 1813 return false; 1814 } 1815 1816 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1817 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1818 TL = STI->getTargetLowering(); 1819 AFI = Fn.getInfo<ARMFunctionInfo>(); 1820 TII = STI->getInstrInfo(); 1821 TRI = STI->getRegisterInfo(); 1822 RS = new RegScavenger(); 1823 isThumb2 = AFI->isThumb2Function(); 1824 isThumb1 = AFI->isThumbFunction() && !isThumb2; 1825 1826 bool Modified = false; 1827 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1828 ++MFI) { 1829 MachineBasicBlock &MBB = *MFI; 1830 Modified |= LoadStoreMultipleOpti(MBB); 1831 if (STI->hasV5TOps()) 1832 Modified |= MergeReturnIntoLDM(MBB); 1833 } 1834 1835 delete RS; 1836 return Modified; 1837 } 1838 1839 namespace { 1840 /// Pre- register allocation pass that move load / stores from consecutive 1841 /// locations close to make it more likely they will be combined later. 1842 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1843 static char ID; 1844 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1845 1846 const DataLayout *TD; 1847 const TargetInstrInfo *TII; 1848 const TargetRegisterInfo *TRI; 1849 const ARMSubtarget *STI; 1850 MachineRegisterInfo *MRI; 1851 MachineFunction *MF; 1852 1853 bool runOnMachineFunction(MachineFunction &Fn) override; 1854 1855 const char *getPassName() const override { 1856 return "ARM pre- register allocation load / store optimization pass"; 1857 } 1858 1859 private: 1860 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1861 unsigned &NewOpc, unsigned &EvenReg, 1862 unsigned &OddReg, unsigned &BaseReg, 1863 int &Offset, 1864 unsigned &PredReg, ARMCC::CondCodes &Pred, 1865 bool &isT2); 1866 bool RescheduleOps(MachineBasicBlock *MBB, 1867 SmallVectorImpl<MachineInstr *> &Ops, 1868 unsigned Base, bool isLd, 1869 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1870 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1871 }; 1872 char ARMPreAllocLoadStoreOpt::ID = 0; 1873 } 1874 1875 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1876 TD = Fn.getTarget().getDataLayout(); 1877 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1878 TII = STI->getInstrInfo(); 1879 TRI = STI->getRegisterInfo(); 1880 MRI = &Fn.getRegInfo(); 1881 MF = &Fn; 1882 1883 bool Modified = false; 1884 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1885 ++MFI) 1886 Modified |= RescheduleLoadStoreInstrs(MFI); 1887 1888 return Modified; 1889 } 1890 1891 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1892 MachineBasicBlock::iterator I, 1893 MachineBasicBlock::iterator E, 1894 SmallPtrSetImpl<MachineInstr*> &MemOps, 1895 SmallSet<unsigned, 4> &MemRegs, 1896 const TargetRegisterInfo *TRI) { 1897 // Are there stores / loads / calls between them? 1898 // FIXME: This is overly conservative. We should make use of alias information 1899 // some day. 1900 SmallSet<unsigned, 4> AddedRegPressure; 1901 while (++I != E) { 1902 if (I->isDebugValue() || MemOps.count(&*I)) 1903 continue; 1904 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1905 return false; 1906 if (isLd && I->mayStore()) 1907 return false; 1908 if (!isLd) { 1909 if (I->mayLoad()) 1910 return false; 1911 // It's not safe to move the first 'str' down. 1912 // str r1, [r0] 1913 // strh r5, [r0] 1914 // str r4, [r0, #+4] 1915 if (I->mayStore()) 1916 return false; 1917 } 1918 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1919 MachineOperand &MO = I->getOperand(j); 1920 if (!MO.isReg()) 1921 continue; 1922 unsigned Reg = MO.getReg(); 1923 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1924 return false; 1925 if (Reg != Base && !MemRegs.count(Reg)) 1926 AddedRegPressure.insert(Reg); 1927 } 1928 } 1929 1930 // Estimate register pressure increase due to the transformation. 1931 if (MemRegs.size() <= 4) 1932 // Ok if we are moving small number of instructions. 1933 return true; 1934 return AddedRegPressure.size() <= MemRegs.size() * 2; 1935 } 1936 1937 1938 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI. 1939 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1940 MachineInstr *Op1) { 1941 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1942 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1943 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1944 1945 MachineFunction *MF = MI->getParent()->getParent(); 1946 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1947 MachineSDNode::mmo_iterator MemEnd = 1948 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1949 MemEnd = 1950 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1951 MI->setMemRefs(MemBegin, MemEnd); 1952 } 1953 1954 bool 1955 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1956 DebugLoc &dl, unsigned &NewOpc, 1957 unsigned &FirstReg, 1958 unsigned &SecondReg, 1959 unsigned &BaseReg, int &Offset, 1960 unsigned &PredReg, 1961 ARMCC::CondCodes &Pred, 1962 bool &isT2) { 1963 // Make sure we're allowed to generate LDRD/STRD. 1964 if (!STI->hasV5TEOps()) 1965 return false; 1966 1967 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1968 unsigned Scale = 1; 1969 unsigned Opcode = Op0->getOpcode(); 1970 if (Opcode == ARM::LDRi12) { 1971 NewOpc = ARM::LDRD; 1972 } else if (Opcode == ARM::STRi12) { 1973 NewOpc = ARM::STRD; 1974 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1975 NewOpc = ARM::t2LDRDi8; 1976 Scale = 4; 1977 isT2 = true; 1978 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1979 NewOpc = ARM::t2STRDi8; 1980 Scale = 4; 1981 isT2 = true; 1982 } else { 1983 return false; 1984 } 1985 1986 // Make sure the base address satisfies i64 ld / st alignment requirement. 1987 // At the moment, we ignore the memoryoperand's value. 1988 // If we want to use AliasAnalysis, we should check it accordingly. 1989 if (!Op0->hasOneMemOperand() || 1990 (*Op0->memoperands_begin())->isVolatile()) 1991 return false; 1992 1993 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1994 const Function *Func = MF->getFunction(); 1995 unsigned ReqAlign = STI->hasV6Ops() 1996 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1997 : 8; // Pre-v6 need 8-byte align 1998 if (Align < ReqAlign) 1999 return false; 2000 2001 // Then make sure the immediate offset fits. 2002 int OffImm = getMemoryOpOffset(Op0); 2003 if (isT2) { 2004 int Limit = (1 << 8) * Scale; 2005 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 2006 return false; 2007 Offset = OffImm; 2008 } else { 2009 ARM_AM::AddrOpc AddSub = ARM_AM::add; 2010 if (OffImm < 0) { 2011 AddSub = ARM_AM::sub; 2012 OffImm = - OffImm; 2013 } 2014 int Limit = (1 << 8) * Scale; 2015 if (OffImm >= Limit || (OffImm & (Scale-1))) 2016 return false; 2017 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 2018 } 2019 FirstReg = Op0->getOperand(0).getReg(); 2020 SecondReg = Op1->getOperand(0).getReg(); 2021 if (FirstReg == SecondReg) 2022 return false; 2023 BaseReg = Op0->getOperand(1).getReg(); 2024 Pred = getInstrPredicate(Op0, PredReg); 2025 dl = Op0->getDebugLoc(); 2026 return true; 2027 } 2028 2029 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 2030 SmallVectorImpl<MachineInstr *> &Ops, 2031 unsigned Base, bool isLd, 2032 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 2033 bool RetVal = false; 2034 2035 // Sort by offset (in reverse order). 2036 std::sort(Ops.begin(), Ops.end(), 2037 [](const MachineInstr *LHS, const MachineInstr *RHS) { 2038 int LOffset = getMemoryOpOffset(LHS); 2039 int ROffset = getMemoryOpOffset(RHS); 2040 assert(LHS == RHS || LOffset != ROffset); 2041 return LOffset > ROffset; 2042 }); 2043 2044 // The loads / stores of the same base are in order. Scan them from first to 2045 // last and check for the following: 2046 // 1. Any def of base. 2047 // 2. Any gaps. 2048 while (Ops.size() > 1) { 2049 unsigned FirstLoc = ~0U; 2050 unsigned LastLoc = 0; 2051 MachineInstr *FirstOp = nullptr; 2052 MachineInstr *LastOp = nullptr; 2053 int LastOffset = 0; 2054 unsigned LastOpcode = 0; 2055 unsigned LastBytes = 0; 2056 unsigned NumMove = 0; 2057 for (int i = Ops.size() - 1; i >= 0; --i) { 2058 MachineInstr *Op = Ops[i]; 2059 unsigned Loc = MI2LocMap[Op]; 2060 if (Loc <= FirstLoc) { 2061 FirstLoc = Loc; 2062 FirstOp = Op; 2063 } 2064 if (Loc >= LastLoc) { 2065 LastLoc = Loc; 2066 LastOp = Op; 2067 } 2068 2069 unsigned LSMOpcode 2070 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 2071 if (LastOpcode && LSMOpcode != LastOpcode) 2072 break; 2073 2074 int Offset = getMemoryOpOffset(Op); 2075 unsigned Bytes = getLSMultipleTransferSize(Op); 2076 if (LastBytes) { 2077 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 2078 break; 2079 } 2080 LastOffset = Offset; 2081 LastBytes = Bytes; 2082 LastOpcode = LSMOpcode; 2083 if (++NumMove == 8) // FIXME: Tune this limit. 2084 break; 2085 } 2086 2087 if (NumMove <= 1) 2088 Ops.pop_back(); 2089 else { 2090 SmallPtrSet<MachineInstr*, 4> MemOps; 2091 SmallSet<unsigned, 4> MemRegs; 2092 for (int i = NumMove-1; i >= 0; --i) { 2093 MemOps.insert(Ops[i]); 2094 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 2095 } 2096 2097 // Be conservative, if the instructions are too far apart, don't 2098 // move them. We want to limit the increase of register pressure. 2099 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 2100 if (DoMove) 2101 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 2102 MemOps, MemRegs, TRI); 2103 if (!DoMove) { 2104 for (unsigned i = 0; i != NumMove; ++i) 2105 Ops.pop_back(); 2106 } else { 2107 // This is the new location for the loads / stores. 2108 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 2109 while (InsertPos != MBB->end() 2110 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 2111 ++InsertPos; 2112 2113 // If we are moving a pair of loads / stores, see if it makes sense 2114 // to try to allocate a pair of registers that can form register pairs. 2115 MachineInstr *Op0 = Ops.back(); 2116 MachineInstr *Op1 = Ops[Ops.size()-2]; 2117 unsigned FirstReg = 0, SecondReg = 0; 2118 unsigned BaseReg = 0, PredReg = 0; 2119 ARMCC::CondCodes Pred = ARMCC::AL; 2120 bool isT2 = false; 2121 unsigned NewOpc = 0; 2122 int Offset = 0; 2123 DebugLoc dl; 2124 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 2125 FirstReg, SecondReg, BaseReg, 2126 Offset, PredReg, Pred, isT2)) { 2127 Ops.pop_back(); 2128 Ops.pop_back(); 2129 2130 const MCInstrDesc &MCID = TII->get(NewOpc); 2131 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 2132 MRI->constrainRegClass(FirstReg, TRC); 2133 MRI->constrainRegClass(SecondReg, TRC); 2134 2135 // Form the pair instruction. 2136 if (isLd) { 2137 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2138 .addReg(FirstReg, RegState::Define) 2139 .addReg(SecondReg, RegState::Define) 2140 .addReg(BaseReg); 2141 // FIXME: We're converting from LDRi12 to an insn that still 2142 // uses addrmode2, so we need an explicit offset reg. It should 2143 // always by reg0 since we're transforming LDRi12s. 2144 if (!isT2) 2145 MIB.addReg(0); 2146 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2147 concatenateMemOperands(MIB, Op0, Op1); 2148 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2149 ++NumLDRDFormed; 2150 } else { 2151 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2152 .addReg(FirstReg) 2153 .addReg(SecondReg) 2154 .addReg(BaseReg); 2155 // FIXME: We're converting from LDRi12 to an insn that still 2156 // uses addrmode2, so we need an explicit offset reg. It should 2157 // always by reg0 since we're transforming STRi12s. 2158 if (!isT2) 2159 MIB.addReg(0); 2160 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2161 concatenateMemOperands(MIB, Op0, Op1); 2162 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2163 ++NumSTRDFormed; 2164 } 2165 MBB->erase(Op0); 2166 MBB->erase(Op1); 2167 2168 if (!isT2) { 2169 // Add register allocation hints to form register pairs. 2170 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg); 2171 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg); 2172 } 2173 } else { 2174 for (unsigned i = 0; i != NumMove; ++i) { 2175 MachineInstr *Op = Ops.back(); 2176 Ops.pop_back(); 2177 MBB->splice(InsertPos, MBB, Op); 2178 } 2179 } 2180 2181 NumLdStMoved += NumMove; 2182 RetVal = true; 2183 } 2184 } 2185 } 2186 2187 return RetVal; 2188 } 2189 2190 bool 2191 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 2192 bool RetVal = false; 2193 2194 DenseMap<MachineInstr*, unsigned> MI2LocMap; 2195 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 2196 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 2197 SmallVector<unsigned, 4> LdBases; 2198 SmallVector<unsigned, 4> StBases; 2199 2200 unsigned Loc = 0; 2201 MachineBasicBlock::iterator MBBI = MBB->begin(); 2202 MachineBasicBlock::iterator E = MBB->end(); 2203 while (MBBI != E) { 2204 for (; MBBI != E; ++MBBI) { 2205 MachineInstr *MI = MBBI; 2206 if (MI->isCall() || MI->isTerminator()) { 2207 // Stop at barriers. 2208 ++MBBI; 2209 break; 2210 } 2211 2212 if (!MI->isDebugValue()) 2213 MI2LocMap[MI] = ++Loc; 2214 2215 if (!isMemoryOp(MI)) 2216 continue; 2217 unsigned PredReg = 0; 2218 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 2219 continue; 2220 2221 int Opc = MI->getOpcode(); 2222 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 2223 unsigned Base = MI->getOperand(1).getReg(); 2224 int Offset = getMemoryOpOffset(MI); 2225 2226 bool StopHere = false; 2227 if (isLd) { 2228 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2229 Base2LdsMap.find(Base); 2230 if (BI != Base2LdsMap.end()) { 2231 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2232 if (Offset == getMemoryOpOffset(BI->second[i])) { 2233 StopHere = true; 2234 break; 2235 } 2236 } 2237 if (!StopHere) 2238 BI->second.push_back(MI); 2239 } else { 2240 Base2LdsMap[Base].push_back(MI); 2241 LdBases.push_back(Base); 2242 } 2243 } else { 2244 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2245 Base2StsMap.find(Base); 2246 if (BI != Base2StsMap.end()) { 2247 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2248 if (Offset == getMemoryOpOffset(BI->second[i])) { 2249 StopHere = true; 2250 break; 2251 } 2252 } 2253 if (!StopHere) 2254 BI->second.push_back(MI); 2255 } else { 2256 Base2StsMap[Base].push_back(MI); 2257 StBases.push_back(Base); 2258 } 2259 } 2260 2261 if (StopHere) { 2262 // Found a duplicate (a base+offset combination that's seen earlier). 2263 // Backtrack. 2264 --Loc; 2265 break; 2266 } 2267 } 2268 2269 // Re-schedule loads. 2270 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 2271 unsigned Base = LdBases[i]; 2272 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 2273 if (Lds.size() > 1) 2274 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 2275 } 2276 2277 // Re-schedule stores. 2278 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 2279 unsigned Base = StBases[i]; 2280 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 2281 if (Sts.size() > 1) 2282 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 2283 } 2284 2285 if (MBBI != E) { 2286 Base2LdsMap.clear(); 2287 Base2StsMap.clear(); 2288 LdBases.clear(); 2289 StBases.clear(); 2290 } 2291 } 2292 2293 return RetVal; 2294 } 2295 2296 2297 /// Returns an instance of the load / store optimization pass. 2298 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 2299 if (PreAlloc) 2300 return new ARMPreAllocLoadStoreOpt(); 2301 return new ARMLoadStoreOpt(); 2302 } 2303