1 //===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the pass that finds instructions that can be
10 // re-written as LEA instructions in order to reduce pipeline delays.
11 // It replaces LEAs with ADD/INC/DEC when that is better for size/speed.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "X86.h"
16 #include "X86InstrInfo.h"
17 #include "X86Subtarget.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/ProfileSummaryInfo.h"
20 #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineSizeOpts.h"
24 #include "llvm/CodeGen/Passes.h"
25 #include "llvm/CodeGen/TargetSchedule.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/raw_ostream.h"
28 using namespace llvm;
29
30 #define FIXUPLEA_DESC "X86 LEA Fixup"
31 #define FIXUPLEA_NAME "x86-fixup-LEAs"
32
33 #define DEBUG_TYPE FIXUPLEA_NAME
34
35 STATISTIC(NumLEAs, "Number of LEA instructions created");
36
37 namespace {
38 class FixupLEAPass : public MachineFunctionPass {
39 enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
40
41 /// Given a machine register, look for the instruction
42 /// which writes it in the current basic block. If found,
43 /// try to replace it with an equivalent LEA instruction.
44 /// If replacement succeeds, then also process the newly created
45 /// instruction.
46 void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I,
47 MachineBasicBlock &MBB);
48
49 /// Given a memory access or LEA instruction
50 /// whose address mode uses a base and/or index register, look for
51 /// an opportunity to replace the instruction which sets the base or index
52 /// register with an equivalent LEA instruction.
53 void processInstruction(MachineBasicBlock::iterator &I,
54 MachineBasicBlock &MBB);
55
56 /// Given a LEA instruction which is unprofitable
57 /// on SlowLEA targets try to replace it with an equivalent ADD instruction.
58 void processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
59 MachineBasicBlock &MBB);
60
61 /// Given a LEA instruction which is unprofitable
62 /// on SNB+ try to replace it with other instructions.
63 /// According to Intel's Optimization Reference Manual:
64 /// " For LEA instructions with three source operands and some specific
65 /// situations, instruction latency has increased to 3 cycles, and must
66 /// dispatch via port 1:
67 /// - LEA that has all three source operands: base, index, and offset
68 /// - LEA that uses base and index registers where the base is EBP, RBP,
69 /// or R13
70 /// - LEA that uses RIP relative addressing mode
71 /// - LEA that uses 16-bit addressing mode "
72 /// This function currently handles the first 2 cases only.
73 void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
74 MachineBasicBlock &MBB, bool OptIncDec);
75
76 /// Look for LEAs that are really two address LEAs that we might be able to
77 /// turn into regular ADD instructions.
78 bool optTwoAddrLEA(MachineBasicBlock::iterator &I,
79 MachineBasicBlock &MBB, bool OptIncDec,
80 bool UseLEAForSP) const;
81
82 /// Look for and transform the sequence
83 /// lea (reg1, reg2), reg3
84 /// sub reg3, reg4
85 /// to
86 /// sub reg1, reg4
87 /// sub reg2, reg4
88 /// It can also optimize the sequence lea/add similarly.
89 bool optLEAALU(MachineBasicBlock::iterator &I, MachineBasicBlock &MBB) const;
90
91 /// Step forwards in MBB, looking for an ADD/SUB instruction which uses
92 /// the dest register of LEA instruction I.
93 MachineBasicBlock::iterator searchALUInst(MachineBasicBlock::iterator &I,
94 MachineBasicBlock &MBB) const;
95
96 /// Check instructions between LeaI and AluI (exclusively).
97 /// Set BaseIndexDef to true if base or index register from LeaI is defined.
98 /// Set AluDestRef to true if the dest register of AluI is used or defined.
99 /// *KilledBase is set to the killed base register usage.
100 /// *KilledIndex is set to the killed index register usage.
101 void checkRegUsage(MachineBasicBlock::iterator &LeaI,
102 MachineBasicBlock::iterator &AluI, bool &BaseIndexDef,
103 bool &AluDestRef, MachineOperand **KilledBase,
104 MachineOperand **KilledIndex) const;
105
106 /// Determine if an instruction references a machine register
107 /// and, if so, whether it reads or writes the register.
108 RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I);
109
110 /// Step backwards through a basic block, looking
111 /// for an instruction which writes a register within
112 /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
113 MachineBasicBlock::iterator searchBackwards(MachineOperand &p,
114 MachineBasicBlock::iterator &I,
115 MachineBasicBlock &MBB);
116
117 /// if an instruction can be converted to an
118 /// equivalent LEA, insert the new instruction into the basic block
119 /// and return a pointer to it. Otherwise, return zero.
120 MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB,
121 MachineBasicBlock::iterator &MBBI) const;
122
123 public:
124 static char ID;
125
getPassName() const126 StringRef getPassName() const override { return FIXUPLEA_DESC; }
127
FixupLEAPass()128 FixupLEAPass() : MachineFunctionPass(ID) { }
129
130 /// Loop over all of the basic blocks,
131 /// replacing instructions by equivalent LEA instructions
132 /// if needed and when possible.
133 bool runOnMachineFunction(MachineFunction &MF) override;
134
135 // This pass runs after regalloc and doesn't support VReg operands.
getRequiredProperties() const136 MachineFunctionProperties getRequiredProperties() const override {
137 return MachineFunctionProperties().set(
138 MachineFunctionProperties::Property::NoVRegs);
139 }
140
getAnalysisUsage(AnalysisUsage & AU) const141 void getAnalysisUsage(AnalysisUsage &AU) const override {
142 AU.addRequired<ProfileSummaryInfoWrapperPass>();
143 AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
144 MachineFunctionPass::getAnalysisUsage(AU);
145 }
146
147 private:
148 TargetSchedModel TSM;
149 const X86InstrInfo *TII = nullptr;
150 const X86RegisterInfo *TRI = nullptr;
151 };
152 }
153
154 char FixupLEAPass::ID = 0;
155
INITIALIZE_PASS(FixupLEAPass,FIXUPLEA_NAME,FIXUPLEA_DESC,false,false)156 INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false)
157
158 MachineInstr *
159 FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB,
160 MachineBasicBlock::iterator &MBBI) const {
161 MachineInstr &MI = *MBBI;
162 switch (MI.getOpcode()) {
163 case X86::MOV32rr:
164 case X86::MOV64rr: {
165 const MachineOperand &Src = MI.getOperand(1);
166 const MachineOperand &Dest = MI.getOperand(0);
167 MachineInstr *NewMI =
168 BuildMI(MBB, MBBI, MI.getDebugLoc(),
169 TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r
170 : X86::LEA64r))
171 .add(Dest)
172 .add(Src)
173 .addImm(1)
174 .addReg(0)
175 .addImm(0)
176 .addReg(0);
177 return NewMI;
178 }
179 }
180
181 if (!MI.isConvertibleTo3Addr())
182 return nullptr;
183
184 switch (MI.getOpcode()) {
185 default:
186 // Only convert instructions that we've verified are safe.
187 return nullptr;
188 case X86::ADD64ri32:
189 case X86::ADD64ri8:
190 case X86::ADD64ri32_DB:
191 case X86::ADD64ri8_DB:
192 case X86::ADD32ri:
193 case X86::ADD32ri8:
194 case X86::ADD32ri_DB:
195 case X86::ADD32ri8_DB:
196 if (!MI.getOperand(2).isImm()) {
197 // convertToThreeAddress will call getImm()
198 // which requires isImm() to be true
199 return nullptr;
200 }
201 break;
202 case X86::SHL64ri:
203 case X86::SHL32ri:
204 case X86::INC64r:
205 case X86::INC32r:
206 case X86::DEC64r:
207 case X86::DEC32r:
208 case X86::ADD64rr:
209 case X86::ADD64rr_DB:
210 case X86::ADD32rr:
211 case X86::ADD32rr_DB:
212 // These instructions are all fine to convert.
213 break;
214 }
215 return TII->convertToThreeAddress(MI, nullptr, nullptr);
216 }
217
createX86FixupLEAs()218 FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); }
219
isLEA(unsigned Opcode)220 static bool isLEA(unsigned Opcode) {
221 return Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
222 Opcode == X86::LEA64_32r;
223 }
224
runOnMachineFunction(MachineFunction & MF)225 bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
226 if (skipFunction(MF.getFunction()))
227 return false;
228
229 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
230 bool IsSlowLEA = ST.slowLEA();
231 bool IsSlow3OpsLEA = ST.slow3OpsLEA();
232 bool LEAUsesAG = ST.leaUsesAG();
233
234 bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize();
235 bool UseLEAForSP = ST.useLeaForSP();
236
237 TSM.init(&ST);
238 TII = ST.getInstrInfo();
239 TRI = ST.getRegisterInfo();
240 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
241 auto *MBFI = (PSI && PSI->hasProfileSummary())
242 ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()
243 : nullptr;
244
245 LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
246 for (MachineBasicBlock &MBB : MF) {
247 // First pass. Try to remove or optimize existing LEAs.
248 bool OptIncDecPerBB =
249 OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
250 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
251 if (!isLEA(I->getOpcode()))
252 continue;
253
254 if (optTwoAddrLEA(I, MBB, OptIncDecPerBB, UseLEAForSP))
255 continue;
256
257 if (IsSlowLEA)
258 processInstructionForSlowLEA(I, MBB);
259 else if (IsSlow3OpsLEA)
260 processInstrForSlow3OpLEA(I, MBB, OptIncDecPerBB);
261 }
262
263 // Second pass for creating LEAs. This may reverse some of the
264 // transformations above.
265 if (LEAUsesAG) {
266 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
267 processInstruction(I, MBB);
268 }
269 }
270
271 LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";);
272
273 return true;
274 }
275
276 FixupLEAPass::RegUsageState
usesRegister(MachineOperand & p,MachineBasicBlock::iterator I)277 FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
278 RegUsageState RegUsage = RU_NotUsed;
279 MachineInstr &MI = *I;
280
281 for (const MachineOperand &MO : MI.operands()) {
282 if (MO.isReg() && MO.getReg() == p.getReg()) {
283 if (MO.isDef())
284 return RU_Write;
285 RegUsage = RU_Read;
286 }
287 }
288 return RegUsage;
289 }
290
291 /// getPreviousInstr - Given a reference to an instruction in a basic
292 /// block, return a reference to the previous instruction in the block,
293 /// wrapping around to the last instruction of the block if the block
294 /// branches to itself.
getPreviousInstr(MachineBasicBlock::iterator & I,MachineBasicBlock & MBB)295 static inline bool getPreviousInstr(MachineBasicBlock::iterator &I,
296 MachineBasicBlock &MBB) {
297 if (I == MBB.begin()) {
298 if (MBB.isPredecessor(&MBB)) {
299 I = --MBB.end();
300 return true;
301 } else
302 return false;
303 }
304 --I;
305 return true;
306 }
307
308 MachineBasicBlock::iterator
searchBackwards(MachineOperand & p,MachineBasicBlock::iterator & I,MachineBasicBlock & MBB)309 FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
310 MachineBasicBlock &MBB) {
311 int InstrDistance = 1;
312 MachineBasicBlock::iterator CurInst;
313 static const int INSTR_DISTANCE_THRESHOLD = 5;
314
315 CurInst = I;
316 bool Found;
317 Found = getPreviousInstr(CurInst, MBB);
318 while (Found && I != CurInst) {
319 if (CurInst->isCall() || CurInst->isInlineAsm())
320 break;
321 if (InstrDistance > INSTR_DISTANCE_THRESHOLD)
322 break; // too far back to make a difference
323 if (usesRegister(p, CurInst) == RU_Write) {
324 return CurInst;
325 }
326 InstrDistance += TSM.computeInstrLatency(&*CurInst);
327 Found = getPreviousInstr(CurInst, MBB);
328 }
329 return MachineBasicBlock::iterator();
330 }
331
isInefficientLEAReg(unsigned Reg)332 static inline bool isInefficientLEAReg(unsigned Reg) {
333 return Reg == X86::EBP || Reg == X86::RBP ||
334 Reg == X86::R13D || Reg == X86::R13;
335 }
336
337 /// Returns true if this LEA uses base an index registers, and the base register
338 /// is known to be inefficient for the subtarget.
339 // TODO: use a variant scheduling class to model the latency profile
340 // of LEA instructions, and implement this logic as a scheduling predicate.
hasInefficientLEABaseReg(const MachineOperand & Base,const MachineOperand & Index)341 static inline bool hasInefficientLEABaseReg(const MachineOperand &Base,
342 const MachineOperand &Index) {
343 return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() &&
344 Index.getReg() != X86::NoRegister;
345 }
346
hasLEAOffset(const MachineOperand & Offset)347 static inline bool hasLEAOffset(const MachineOperand &Offset) {
348 return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal();
349 }
350
getADDrrFromLEA(unsigned LEAOpcode)351 static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) {
352 switch (LEAOpcode) {
353 default:
354 llvm_unreachable("Unexpected LEA instruction");
355 case X86::LEA32r:
356 case X86::LEA64_32r:
357 return X86::ADD32rr;
358 case X86::LEA64r:
359 return X86::ADD64rr;
360 }
361 }
362
getSUBrrFromLEA(unsigned LEAOpcode)363 static inline unsigned getSUBrrFromLEA(unsigned LEAOpcode) {
364 switch (LEAOpcode) {
365 default:
366 llvm_unreachable("Unexpected LEA instruction");
367 case X86::LEA32r:
368 case X86::LEA64_32r:
369 return X86::SUB32rr;
370 case X86::LEA64r:
371 return X86::SUB64rr;
372 }
373 }
374
getADDriFromLEA(unsigned LEAOpcode,const MachineOperand & Offset)375 static inline unsigned getADDriFromLEA(unsigned LEAOpcode,
376 const MachineOperand &Offset) {
377 bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm());
378 switch (LEAOpcode) {
379 default:
380 llvm_unreachable("Unexpected LEA instruction");
381 case X86::LEA32r:
382 case X86::LEA64_32r:
383 return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri;
384 case X86::LEA64r:
385 return IsInt8 ? X86::ADD64ri8 : X86::ADD64ri32;
386 }
387 }
388
getINCDECFromLEA(unsigned LEAOpcode,bool IsINC)389 static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) {
390 switch (LEAOpcode) {
391 default:
392 llvm_unreachable("Unexpected LEA instruction");
393 case X86::LEA32r:
394 case X86::LEA64_32r:
395 return IsINC ? X86::INC32r : X86::DEC32r;
396 case X86::LEA64r:
397 return IsINC ? X86::INC64r : X86::DEC64r;
398 }
399 }
400
401 MachineBasicBlock::iterator
searchALUInst(MachineBasicBlock::iterator & I,MachineBasicBlock & MBB) const402 FixupLEAPass::searchALUInst(MachineBasicBlock::iterator &I,
403 MachineBasicBlock &MBB) const {
404 const int InstrDistanceThreshold = 5;
405 int InstrDistance = 1;
406 MachineBasicBlock::iterator CurInst = std::next(I);
407
408 unsigned LEAOpcode = I->getOpcode();
409 unsigned AddOpcode = getADDrrFromLEA(LEAOpcode);
410 unsigned SubOpcode = getSUBrrFromLEA(LEAOpcode);
411 Register DestReg = I->getOperand(0).getReg();
412
413 while (CurInst != MBB.end()) {
414 if (CurInst->isCall() || CurInst->isInlineAsm())
415 break;
416 if (InstrDistance > InstrDistanceThreshold)
417 break;
418
419 // Check if the lea dest register is used in an add/sub instruction only.
420 for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) {
421 MachineOperand &Opnd = CurInst->getOperand(I);
422 if (Opnd.isReg()) {
423 if (Opnd.getReg() == DestReg) {
424 if (Opnd.isDef() || !Opnd.isKill())
425 return MachineBasicBlock::iterator();
426
427 unsigned AluOpcode = CurInst->getOpcode();
428 if (AluOpcode != AddOpcode && AluOpcode != SubOpcode)
429 return MachineBasicBlock::iterator();
430
431 MachineOperand &Opnd2 = CurInst->getOperand(3 - I);
432 MachineOperand AluDest = CurInst->getOperand(0);
433 if (Opnd2.getReg() != AluDest.getReg())
434 return MachineBasicBlock::iterator();
435
436 // X - (Y + Z) may generate different flags than (X - Y) - Z when
437 // there is overflow. So we can't change the alu instruction if the
438 // flags register is live.
439 if (!CurInst->registerDefIsDead(X86::EFLAGS, TRI))
440 return MachineBasicBlock::iterator();
441
442 return CurInst;
443 }
444 if (TRI->regsOverlap(DestReg, Opnd.getReg()))
445 return MachineBasicBlock::iterator();
446 }
447 }
448
449 InstrDistance++;
450 ++CurInst;
451 }
452 return MachineBasicBlock::iterator();
453 }
454
checkRegUsage(MachineBasicBlock::iterator & LeaI,MachineBasicBlock::iterator & AluI,bool & BaseIndexDef,bool & AluDestRef,MachineOperand ** KilledBase,MachineOperand ** KilledIndex) const455 void FixupLEAPass::checkRegUsage(MachineBasicBlock::iterator &LeaI,
456 MachineBasicBlock::iterator &AluI,
457 bool &BaseIndexDef, bool &AluDestRef,
458 MachineOperand **KilledBase,
459 MachineOperand **KilledIndex) const {
460 BaseIndexDef = AluDestRef = false;
461 *KilledBase = *KilledIndex = nullptr;
462 Register BaseReg = LeaI->getOperand(1 + X86::AddrBaseReg).getReg();
463 Register IndexReg = LeaI->getOperand(1 + X86::AddrIndexReg).getReg();
464 Register AluDestReg = AluI->getOperand(0).getReg();
465
466 MachineBasicBlock::iterator CurInst = std::next(LeaI);
467 while (CurInst != AluI) {
468 for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) {
469 MachineOperand &Opnd = CurInst->getOperand(I);
470 if (!Opnd.isReg())
471 continue;
472 Register Reg = Opnd.getReg();
473 if (TRI->regsOverlap(Reg, AluDestReg))
474 AluDestRef = true;
475 if (TRI->regsOverlap(Reg, BaseReg)) {
476 if (Opnd.isDef())
477 BaseIndexDef = true;
478 else if (Opnd.isKill())
479 *KilledBase = &Opnd;
480 }
481 if (TRI->regsOverlap(Reg, IndexReg)) {
482 if (Opnd.isDef())
483 BaseIndexDef = true;
484 else if (Opnd.isKill())
485 *KilledIndex = &Opnd;
486 }
487 }
488 ++CurInst;
489 }
490 }
491
optLEAALU(MachineBasicBlock::iterator & I,MachineBasicBlock & MBB) const492 bool FixupLEAPass::optLEAALU(MachineBasicBlock::iterator &I,
493 MachineBasicBlock &MBB) const {
494 // Look for an add/sub instruction which uses the result of lea.
495 MachineBasicBlock::iterator AluI = searchALUInst(I, MBB);
496 if (AluI == MachineBasicBlock::iterator())
497 return false;
498
499 // Check if there are any related register usage between lea and alu.
500 bool BaseIndexDef, AluDestRef;
501 MachineOperand *KilledBase, *KilledIndex;
502 checkRegUsage(I, AluI, BaseIndexDef, AluDestRef, &KilledBase, &KilledIndex);
503
504 MachineBasicBlock::iterator InsertPos = AluI;
505 if (BaseIndexDef) {
506 if (AluDestRef)
507 return false;
508 InsertPos = I;
509 KilledBase = KilledIndex = nullptr;
510 }
511
512 // Check if there are same registers.
513 Register AluDestReg = AluI->getOperand(0).getReg();
514 Register BaseReg = I->getOperand(1 + X86::AddrBaseReg).getReg();
515 Register IndexReg = I->getOperand(1 + X86::AddrIndexReg).getReg();
516 if (I->getOpcode() == X86::LEA64_32r) {
517 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
518 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
519 }
520 if (AluDestReg == IndexReg) {
521 if (BaseReg == IndexReg)
522 return false;
523 std::swap(BaseReg, IndexReg);
524 std::swap(KilledBase, KilledIndex);
525 }
526 if (BaseReg == IndexReg)
527 KilledBase = nullptr;
528
529 // Now it's safe to change instructions.
530 MachineInstr *NewMI1, *NewMI2;
531 unsigned NewOpcode = AluI->getOpcode();
532 NewMI1 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode),
533 AluDestReg)
534 .addReg(AluDestReg, RegState::Kill)
535 .addReg(BaseReg, KilledBase ? RegState::Kill : 0);
536 NewMI1->addRegisterDead(X86::EFLAGS, TRI);
537 NewMI2 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode),
538 AluDestReg)
539 .addReg(AluDestReg, RegState::Kill)
540 .addReg(IndexReg, KilledIndex ? RegState::Kill : 0);
541 NewMI2->addRegisterDead(X86::EFLAGS, TRI);
542
543 // Clear the old Kill flags.
544 if (KilledBase)
545 KilledBase->setIsKill(false);
546 if (KilledIndex)
547 KilledIndex->setIsKill(false);
548
549 MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI2, 1);
550 MBB.erase(I);
551 MBB.erase(AluI);
552 I = NewMI1;
553 return true;
554 }
555
optTwoAddrLEA(MachineBasicBlock::iterator & I,MachineBasicBlock & MBB,bool OptIncDec,bool UseLEAForSP) const556 bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
557 MachineBasicBlock &MBB, bool OptIncDec,
558 bool UseLEAForSP) const {
559 MachineInstr &MI = *I;
560
561 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
562 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
563 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
564 const MachineOperand &Disp = MI.getOperand(1 + X86::AddrDisp);
565 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
566
567 if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 ||
568 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I) !=
569 MachineBasicBlock::LQR_Dead)
570 return false;
571
572 Register DestReg = MI.getOperand(0).getReg();
573 Register BaseReg = Base.getReg();
574 Register IndexReg = Index.getReg();
575
576 // Don't change stack adjustment LEAs.
577 if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP))
578 return false;
579
580 // LEA64_32 has 64-bit operands but 32-bit result.
581 if (MI.getOpcode() == X86::LEA64_32r) {
582 if (BaseReg != 0)
583 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
584 if (IndexReg != 0)
585 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
586 }
587
588 MachineInstr *NewMI = nullptr;
589
590 // Case 1.
591 // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1
592 // which can be turned into add %reg2, %reg1
593 if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 &&
594 (DestReg == BaseReg || DestReg == IndexReg)) {
595 unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode());
596 if (DestReg != BaseReg)
597 std::swap(BaseReg, IndexReg);
598
599 if (MI.getOpcode() == X86::LEA64_32r) {
600 // TODO: Do we need the super register implicit use?
601 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
602 .addReg(BaseReg).addReg(IndexReg)
603 .addReg(Base.getReg(), RegState::Implicit)
604 .addReg(Index.getReg(), RegState::Implicit);
605 } else {
606 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
607 .addReg(BaseReg).addReg(IndexReg);
608 }
609 } else if (DestReg == BaseReg && IndexReg == 0) {
610 // Case 2.
611 // This is an LEA with only a base register and a displacement,
612 // We can use ADDri or INC/DEC.
613
614 // Does this LEA have one these forms:
615 // lea %reg, 1(%reg)
616 // lea %reg, -1(%reg)
617 if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) {
618 bool IsINC = Disp.getImm() == 1;
619 unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC);
620
621 if (MI.getOpcode() == X86::LEA64_32r) {
622 // TODO: Do we need the super register implicit use?
623 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
624 .addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit);
625 } else {
626 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
627 .addReg(BaseReg);
628 }
629 } else {
630 unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp);
631 if (MI.getOpcode() == X86::LEA64_32r) {
632 // TODO: Do we need the super register implicit use?
633 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
634 .addReg(BaseReg).addImm(Disp.getImm())
635 .addReg(Base.getReg(), RegState::Implicit);
636 } else {
637 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
638 .addReg(BaseReg).addImm(Disp.getImm());
639 }
640 }
641 } else if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0) {
642 // Case 3.
643 // Look for and transform the sequence
644 // lea (reg1, reg2), reg3
645 // sub reg3, reg4
646 return optLEAALU(I, MBB);
647 } else
648 return false;
649
650 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
651 MBB.erase(I);
652 I = NewMI;
653 return true;
654 }
655
processInstruction(MachineBasicBlock::iterator & I,MachineBasicBlock & MBB)656 void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,
657 MachineBasicBlock &MBB) {
658 // Process a load, store, or LEA instruction.
659 MachineInstr &MI = *I;
660 const MCInstrDesc &Desc = MI.getDesc();
661 int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags);
662 if (AddrOffset >= 0) {
663 AddrOffset += X86II::getOperandBias(Desc);
664 MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg);
665 if (p.isReg() && p.getReg() != X86::ESP) {
666 seekLEAFixup(p, I, MBB);
667 }
668 MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg);
669 if (q.isReg() && q.getReg() != X86::ESP) {
670 seekLEAFixup(q, I, MBB);
671 }
672 }
673 }
674
seekLEAFixup(MachineOperand & p,MachineBasicBlock::iterator & I,MachineBasicBlock & MBB)675 void FixupLEAPass::seekLEAFixup(MachineOperand &p,
676 MachineBasicBlock::iterator &I,
677 MachineBasicBlock &MBB) {
678 MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB);
679 if (MBI != MachineBasicBlock::iterator()) {
680 MachineInstr *NewMI = postRAConvertToLEA(MBB, MBI);
681 if (NewMI) {
682 ++NumLEAs;
683 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump(););
684 // now to replace with an equivalent LEA...
685 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump(););
686 MBB.getParent()->substituteDebugValuesForInst(*MBI, *NewMI, 1);
687 MBB.erase(MBI);
688 MachineBasicBlock::iterator J =
689 static_cast<MachineBasicBlock::iterator>(NewMI);
690 processInstruction(J, MBB);
691 }
692 }
693 }
694
processInstructionForSlowLEA(MachineBasicBlock::iterator & I,MachineBasicBlock & MBB)695 void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
696 MachineBasicBlock &MBB) {
697 MachineInstr &MI = *I;
698 const unsigned Opcode = MI.getOpcode();
699
700 const MachineOperand &Dst = MI.getOperand(0);
701 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
702 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
703 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
704 const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp);
705 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
706
707 if (Segment.getReg() != 0 || !Offset.isImm() ||
708 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=
709 MachineBasicBlock::LQR_Dead)
710 return;
711 const Register DstR = Dst.getReg();
712 const Register SrcR1 = Base.getReg();
713 const Register SrcR2 = Index.getReg();
714 if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR))
715 return;
716 if (Scale.getImm() > 1)
717 return;
718 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););
719 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
720 MachineInstr *NewMI = nullptr;
721 // Make ADD instruction for two registers writing to LEA's destination
722 if (SrcR1 != 0 && SrcR2 != 0) {
723 const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode));
724 const MachineOperand &Src = SrcR1 == DstR ? Index : Base;
725 NewMI =
726 BuildMI(MBB, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src);
727 LLVM_DEBUG(NewMI->dump(););
728 }
729 // Make ADD instruction for immediate
730 if (Offset.getImm() != 0) {
731 const MCInstrDesc &ADDri =
732 TII->get(getADDriFromLEA(Opcode, Offset));
733 const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index;
734 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), ADDri, DstR)
735 .add(SrcR)
736 .addImm(Offset.getImm());
737 LLVM_DEBUG(NewMI->dump(););
738 }
739 if (NewMI) {
740 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
741 MBB.erase(I);
742 I = NewMI;
743 }
744 }
745
processInstrForSlow3OpLEA(MachineBasicBlock::iterator & I,MachineBasicBlock & MBB,bool OptIncDec)746 void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
747 MachineBasicBlock &MBB,
748 bool OptIncDec) {
749 MachineInstr &MI = *I;
750 const unsigned LEAOpcode = MI.getOpcode();
751
752 const MachineOperand &Dest = MI.getOperand(0);
753 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
754 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
755 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
756 const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp);
757 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
758
759 if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) ||
760 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=
761 MachineBasicBlock::LQR_Dead ||
762 Segment.getReg() != X86::NoRegister)
763 return;
764
765 Register DestReg = Dest.getReg();
766 Register BaseReg = Base.getReg();
767 Register IndexReg = Index.getReg();
768
769 if (MI.getOpcode() == X86::LEA64_32r) {
770 if (BaseReg != 0)
771 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
772 if (IndexReg != 0)
773 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
774 }
775
776 bool IsScale1 = Scale.getImm() == 1;
777 bool IsInefficientBase = isInefficientLEAReg(BaseReg);
778 bool IsInefficientIndex = isInefficientLEAReg(IndexReg);
779
780 // Skip these cases since it takes more than 2 instructions
781 // to replace the LEA instruction.
782 if (IsInefficientBase && DestReg == BaseReg && !IsScale1)
783 return;
784
785 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump(););
786 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
787
788 MachineInstr *NewMI = nullptr;
789
790 // First try to replace LEA with one or two (for the 3-op LEA case)
791 // add instructions:
792 // 1.lea (%base,%index,1), %base => add %index,%base
793 // 2.lea (%base,%index,1), %index => add %base,%index
794 if (IsScale1 && (DestReg == BaseReg || DestReg == IndexReg)) {
795 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
796 if (DestReg != BaseReg)
797 std::swap(BaseReg, IndexReg);
798
799 if (MI.getOpcode() == X86::LEA64_32r) {
800 // TODO: Do we need the super register implicit use?
801 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
802 .addReg(BaseReg)
803 .addReg(IndexReg)
804 .addReg(Base.getReg(), RegState::Implicit)
805 .addReg(Index.getReg(), RegState::Implicit);
806 } else {
807 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
808 .addReg(BaseReg)
809 .addReg(IndexReg);
810 }
811 } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
812 // If the base is inefficient try switching the index and base operands,
813 // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
814 // lea offset(%base,%index,scale),%dst =>
815 // lea (%base,%index,scale); add offset,%dst
816 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
817 .add(Dest)
818 .add(IsInefficientBase ? Index : Base)
819 .add(Scale)
820 .add(IsInefficientBase ? Base : Index)
821 .addImm(0)
822 .add(Segment);
823 LLVM_DEBUG(NewMI->dump(););
824 }
825
826 // If either replacement succeeded above, add the offset if needed, then
827 // replace the instruction.
828 if (NewMI) {
829 // Create ADD instruction for the Offset in case of 3-Ops LEA.
830 if (hasLEAOffset(Offset)) {
831 if (OptIncDec && Offset.isImm() &&
832 (Offset.getImm() == 1 || Offset.getImm() == -1)) {
833 unsigned NewOpc =
834 getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1);
835 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
836 .addReg(DestReg);
837 LLVM_DEBUG(NewMI->dump(););
838 } else {
839 unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset);
840 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
841 .addReg(DestReg)
842 .add(Offset);
843 LLVM_DEBUG(NewMI->dump(););
844 }
845 }
846
847 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
848 MBB.erase(I);
849 I = NewMI;
850 return;
851 }
852
853 // Handle the rest of the cases with inefficient base register:
854 assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!");
855 assert(IsInefficientBase && "efficient base should be handled already!");
856
857 // FIXME: Handle LEA64_32r.
858 if (LEAOpcode == X86::LEA64_32r)
859 return;
860
861 // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst
862 if (IsScale1 && !hasLEAOffset(Offset)) {
863 bool BIK = Base.isKill() && BaseReg != IndexReg;
864 TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK);
865 LLVM_DEBUG(MI.getPrevNode()->dump(););
866
867 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
868 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
869 .addReg(DestReg)
870 .add(Index);
871 LLVM_DEBUG(NewMI->dump(););
872
873 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
874 MBB.erase(I);
875 I = NewMI;
876 return;
877 }
878
879 // lea offset(%base,%index,scale), %dst =>
880 // lea offset( ,%index,scale), %dst; add %base,%dst
881 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
882 .add(Dest)
883 .addReg(0)
884 .add(Scale)
885 .add(Index)
886 .add(Offset)
887 .add(Segment);
888 LLVM_DEBUG(NewMI->dump(););
889
890 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
891 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
892 .addReg(DestReg)
893 .add(Base);
894 LLVM_DEBUG(NewMI->dump(););
895
896 MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
897 MBB.erase(I);
898 I = NewMI;
899 }
900