xref: /netbsd-src/external/apache2/llvm/dist/llvm/lib/Target/AArch64/AArch64FastISel.cpp (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the AArch64-specific support for the FastISel class. Some
10 // of the target-specific code is generated by tablegen in the file
11 // AArch64GenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AArch64.h"
16 #include "AArch64CallingConvention.h"
17 #include "AArch64RegisterInfo.h"
18 #include "AArch64Subtarget.h"
19 #include "MCTargetDesc/AArch64AddressingModes.h"
20 #include "Utils/AArch64BaseInfo.h"
21 #include "llvm/ADT/APFloat.h"
22 #include "llvm/ADT/APInt.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/Analysis/BranchProbabilityInfo.h"
26 #include "llvm/CodeGen/CallingConvLower.h"
27 #include "llvm/CodeGen/FastISel.h"
28 #include "llvm/CodeGen/FunctionLoweringInfo.h"
29 #include "llvm/CodeGen/ISDOpcodes.h"
30 #include "llvm/CodeGen/MachineBasicBlock.h"
31 #include "llvm/CodeGen/MachineConstantPool.h"
32 #include "llvm/CodeGen/MachineFrameInfo.h"
33 #include "llvm/CodeGen/MachineInstr.h"
34 #include "llvm/CodeGen/MachineInstrBuilder.h"
35 #include "llvm/CodeGen/MachineMemOperand.h"
36 #include "llvm/CodeGen/MachineRegisterInfo.h"
37 #include "llvm/CodeGen/RuntimeLibcalls.h"
38 #include "llvm/CodeGen/ValueTypes.h"
39 #include "llvm/IR/Argument.h"
40 #include "llvm/IR/Attributes.h"
41 #include "llvm/IR/BasicBlock.h"
42 #include "llvm/IR/CallingConv.h"
43 #include "llvm/IR/Constant.h"
44 #include "llvm/IR/Constants.h"
45 #include "llvm/IR/DataLayout.h"
46 #include "llvm/IR/DerivedTypes.h"
47 #include "llvm/IR/Function.h"
48 #include "llvm/IR/GetElementPtrTypeIterator.h"
49 #include "llvm/IR/GlobalValue.h"
50 #include "llvm/IR/InstrTypes.h"
51 #include "llvm/IR/Instruction.h"
52 #include "llvm/IR/Instructions.h"
53 #include "llvm/IR/IntrinsicInst.h"
54 #include "llvm/IR/Intrinsics.h"
55 #include "llvm/IR/Operator.h"
56 #include "llvm/IR/Type.h"
57 #include "llvm/IR/User.h"
58 #include "llvm/IR/Value.h"
59 #include "llvm/MC/MCInstrDesc.h"
60 #include "llvm/MC/MCRegisterInfo.h"
61 #include "llvm/MC/MCSymbol.h"
62 #include "llvm/Support/AtomicOrdering.h"
63 #include "llvm/Support/Casting.h"
64 #include "llvm/Support/CodeGen.h"
65 #include "llvm/Support/Compiler.h"
66 #include "llvm/Support/ErrorHandling.h"
67 #include "llvm/Support/MachineValueType.h"
68 #include "llvm/Support/MathExtras.h"
69 #include <algorithm>
70 #include <cassert>
71 #include <cstdint>
72 #include <iterator>
73 #include <utility>
74 
75 using namespace llvm;
76 
77 namespace {
78 
79 class AArch64FastISel final : public FastISel {
80   class Address {
81   public:
82     using BaseKind = enum {
83       RegBase,
84       FrameIndexBase
85     };
86 
87   private:
88     BaseKind Kind = RegBase;
89     AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
90     union {
91       unsigned Reg;
92       int FI;
93     } Base;
94     unsigned OffsetReg = 0;
95     unsigned Shift = 0;
96     int64_t Offset = 0;
97     const GlobalValue *GV = nullptr;
98 
99   public:
Address()100     Address() { Base.Reg = 0; }
101 
setKind(BaseKind K)102     void setKind(BaseKind K) { Kind = K; }
getKind() const103     BaseKind getKind() const { return Kind; }
setExtendType(AArch64_AM::ShiftExtendType E)104     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
getExtendType() const105     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
isRegBase() const106     bool isRegBase() const { return Kind == RegBase; }
isFIBase() const107     bool isFIBase() const { return Kind == FrameIndexBase; }
108 
setReg(unsigned Reg)109     void setReg(unsigned Reg) {
110       assert(isRegBase() && "Invalid base register access!");
111       Base.Reg = Reg;
112     }
113 
getReg() const114     unsigned getReg() const {
115       assert(isRegBase() && "Invalid base register access!");
116       return Base.Reg;
117     }
118 
setOffsetReg(unsigned Reg)119     void setOffsetReg(unsigned Reg) {
120       OffsetReg = Reg;
121     }
122 
getOffsetReg() const123     unsigned getOffsetReg() const {
124       return OffsetReg;
125     }
126 
setFI(unsigned FI)127     void setFI(unsigned FI) {
128       assert(isFIBase() && "Invalid base frame index  access!");
129       Base.FI = FI;
130     }
131 
getFI() const132     unsigned getFI() const {
133       assert(isFIBase() && "Invalid base frame index access!");
134       return Base.FI;
135     }
136 
setOffset(int64_t O)137     void setOffset(int64_t O) { Offset = O; }
getOffset()138     int64_t getOffset() { return Offset; }
setShift(unsigned S)139     void setShift(unsigned S) { Shift = S; }
getShift()140     unsigned getShift() { return Shift; }
141 
setGlobalValue(const GlobalValue * G)142     void setGlobalValue(const GlobalValue *G) { GV = G; }
getGlobalValue()143     const GlobalValue *getGlobalValue() { return GV; }
144   };
145 
146   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
147   /// make the right decision when generating code for different targets.
148   const AArch64Subtarget *Subtarget;
149   LLVMContext *Context;
150 
151   bool fastLowerArguments() override;
152   bool fastLowerCall(CallLoweringInfo &CLI) override;
153   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
154 
155 private:
156   // Selection routines.
157   bool selectAddSub(const Instruction *I);
158   bool selectLogicalOp(const Instruction *I);
159   bool selectLoad(const Instruction *I);
160   bool selectStore(const Instruction *I);
161   bool selectBranch(const Instruction *I);
162   bool selectIndirectBr(const Instruction *I);
163   bool selectCmp(const Instruction *I);
164   bool selectSelect(const Instruction *I);
165   bool selectFPExt(const Instruction *I);
166   bool selectFPTrunc(const Instruction *I);
167   bool selectFPToInt(const Instruction *I, bool Signed);
168   bool selectIntToFP(const Instruction *I, bool Signed);
169   bool selectRem(const Instruction *I, unsigned ISDOpcode);
170   bool selectRet(const Instruction *I);
171   bool selectTrunc(const Instruction *I);
172   bool selectIntExt(const Instruction *I);
173   bool selectMul(const Instruction *I);
174   bool selectShift(const Instruction *I);
175   bool selectBitCast(const Instruction *I);
176   bool selectFRem(const Instruction *I);
177   bool selectSDiv(const Instruction *I);
178   bool selectGetElementPtr(const Instruction *I);
179   bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
180 
181   // Utility helper routines.
182   bool isTypeLegal(Type *Ty, MVT &VT);
183   bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
184   bool isValueAvailable(const Value *V) const;
185   bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
186   bool computeCallAddress(const Value *V, Address &Addr);
187   bool simplifyAddress(Address &Addr, MVT VT);
188   void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
189                             MachineMemOperand::Flags Flags,
190                             unsigned ScaleFactor, MachineMemOperand *MMO);
191   bool isMemCpySmall(uint64_t Len, unsigned Alignment);
192   bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
193                           unsigned Alignment);
194   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
195                          const Value *Cond);
196   bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
197   bool optimizeSelect(const SelectInst *SI);
198   unsigned getRegForGEPIndex(const Value *Idx);
199 
200   // Emit helper routines.
201   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
202                       const Value *RHS, bool SetFlags = false,
203                       bool WantResult = true,  bool IsZExt = false);
204   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
205                          unsigned RHSReg, bool SetFlags = false,
206                          bool WantResult = true);
207   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
208                          uint64_t Imm, bool SetFlags = false,
209                          bool WantResult = true);
210   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
211                          unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,
212                          uint64_t ShiftImm, bool SetFlags = false,
213                          bool WantResult = true);
214   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
215                          unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,
216                          uint64_t ShiftImm, bool SetFlags = false,
217                          bool WantResult = true);
218 
219   // Emit functions.
220   bool emitCompareAndBranch(const BranchInst *BI);
221   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
222   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
223   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
224   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
225   unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
226                     MachineMemOperand *MMO = nullptr);
227   bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
228                  MachineMemOperand *MMO = nullptr);
229   bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
230                         MachineMemOperand *MMO = nullptr);
231   unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
232   unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
233   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
234                    bool SetFlags = false, bool WantResult = true,
235                    bool IsZExt = false);
236   unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);
237   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
238                    bool SetFlags = false, bool WantResult = true,
239                    bool IsZExt = false);
240   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
241                        bool WantResult = true);
242   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
243                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
244                        bool WantResult = true);
245   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
246                          const Value *RHS);
247   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
248                             uint64_t Imm);
249   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
250                             unsigned RHSReg, uint64_t ShiftImm);
251   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
252   unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);
253   unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
254   unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
255   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
256   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
257                       bool IsZExt = true);
258   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
259   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
260                       bool IsZExt = true);
261   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
262   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
263                       bool IsZExt = false);
264 
265   unsigned materializeInt(const ConstantInt *CI, MVT VT);
266   unsigned materializeFP(const ConstantFP *CFP, MVT VT);
267   unsigned materializeGV(const GlobalValue *GV);
268 
269   // Call handling routines.
270 private:
271   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
272   bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
273                        unsigned &NumBytes);
274   bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
275 
276 public:
277   // Backend specific FastISel code.
278   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
279   unsigned fastMaterializeConstant(const Constant *C) override;
280   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
281 
AArch64FastISel(FunctionLoweringInfo & FuncInfo,const TargetLibraryInfo * LibInfo)282   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
283                            const TargetLibraryInfo *LibInfo)
284       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
285     Subtarget =
286         &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
287     Context = &FuncInfo.Fn->getContext();
288   }
289 
290   bool fastSelectInstruction(const Instruction *I) override;
291 
292 #include "AArch64GenFastISel.inc"
293 };
294 
295 } // end anonymous namespace
296 
297 /// Check if the sign-/zero-extend will be a noop.
isIntExtFree(const Instruction * I)298 static bool isIntExtFree(const Instruction *I) {
299   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
300          "Unexpected integer extend instruction.");
301   assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
302          "Unexpected value type.");
303   bool IsZExt = isa<ZExtInst>(I);
304 
305   if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
306     if (LI->hasOneUse())
307       return true;
308 
309   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
310     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
311       return true;
312 
313   return false;
314 }
315 
316 /// Determine the implicit scale factor that is applied by a memory
317 /// operation for a given value type.
getImplicitScaleFactor(MVT VT)318 static unsigned getImplicitScaleFactor(MVT VT) {
319   switch (VT.SimpleTy) {
320   default:
321     return 0;    // invalid
322   case MVT::i1:  // fall-through
323   case MVT::i8:
324     return 1;
325   case MVT::i16:
326     return 2;
327   case MVT::i32: // fall-through
328   case MVT::f32:
329     return 4;
330   case MVT::i64: // fall-through
331   case MVT::f64:
332     return 8;
333   }
334 }
335 
CCAssignFnForCall(CallingConv::ID CC) const336 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
337   if (CC == CallingConv::WebKit_JS)
338     return CC_AArch64_WebKit_JS;
339   if (CC == CallingConv::GHC)
340     return CC_AArch64_GHC;
341   if (CC == CallingConv::CFGuard_Check)
342     return CC_AArch64_Win64_CFGuard_Check;
343   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
344 }
345 
fastMaterializeAlloca(const AllocaInst * AI)346 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
347   assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
348          "Alloca should always return a pointer.");
349 
350   // Don't handle dynamic allocas.
351   if (!FuncInfo.StaticAllocaMap.count(AI))
352     return 0;
353 
354   DenseMap<const AllocaInst *, int>::iterator SI =
355       FuncInfo.StaticAllocaMap.find(AI);
356 
357   if (SI != FuncInfo.StaticAllocaMap.end()) {
358     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
359     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
360             ResultReg)
361         .addFrameIndex(SI->second)
362         .addImm(0)
363         .addImm(0);
364     return ResultReg;
365   }
366 
367   return 0;
368 }
369 
materializeInt(const ConstantInt * CI,MVT VT)370 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
371   if (VT > MVT::i64)
372     return 0;
373 
374   if (!CI->isZero())
375     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
376 
377   // Create a copy from the zero register to materialize a "0" value.
378   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
379                                                    : &AArch64::GPR32RegClass;
380   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
381   unsigned ResultReg = createResultReg(RC);
382   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
383           ResultReg).addReg(ZeroReg, getKillRegState(true));
384   return ResultReg;
385 }
386 
materializeFP(const ConstantFP * CFP,MVT VT)387 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
388   // Positive zero (+0.0) has to be materialized with a fmov from the zero
389   // register, because the immediate version of fmov cannot encode zero.
390   if (CFP->isNullValue())
391     return fastMaterializeFloatZero(CFP);
392 
393   if (VT != MVT::f32 && VT != MVT::f64)
394     return 0;
395 
396   const APFloat Val = CFP->getValueAPF();
397   bool Is64Bit = (VT == MVT::f64);
398   // This checks to see if we can use FMOV instructions to materialize
399   // a constant, otherwise we have to materialize via the constant pool.
400   int Imm =
401       Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
402   if (Imm != -1) {
403     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
404     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
405   }
406 
407   // For the large code model materialize the FP constant in code.
408   if (TM.getCodeModel() == CodeModel::Large) {
409     unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
410     const TargetRegisterClass *RC = Is64Bit ?
411         &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
412 
413     unsigned TmpReg = createResultReg(RC);
414     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
415         .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
416 
417     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
418     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
419             TII.get(TargetOpcode::COPY), ResultReg)
420         .addReg(TmpReg, getKillRegState(true));
421 
422     return ResultReg;
423   }
424 
425   // Materialize via constant pool.  MachineConstantPool wants an explicit
426   // alignment.
427   Align Alignment = DL.getPrefTypeAlign(CFP->getType());
428 
429   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
430   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
431   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
432           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
433 
434   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
435   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
436   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
437       .addReg(ADRPReg)
438       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
439   return ResultReg;
440 }
441 
materializeGV(const GlobalValue * GV)442 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
443   // We can't handle thread-local variables quickly yet.
444   if (GV->isThreadLocal())
445     return 0;
446 
447   // MachO still uses GOT for large code-model accesses, but ELF requires
448   // movz/movk sequences, which FastISel doesn't handle yet.
449   if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
450     return 0;
451 
452   unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
453 
454   EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
455   if (!DestEVT.isSimple())
456     return 0;
457 
458   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
459   unsigned ResultReg;
460 
461   if (OpFlags & AArch64II::MO_GOT) {
462     // ADRP + LDRX
463     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
464             ADRPReg)
465         .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
466 
467     unsigned LdrOpc;
468     if (Subtarget->isTargetILP32()) {
469       ResultReg = createResultReg(&AArch64::GPR32RegClass);
470       LdrOpc = AArch64::LDRWui;
471     } else {
472       ResultReg = createResultReg(&AArch64::GPR64RegClass);
473       LdrOpc = AArch64::LDRXui;
474     }
475     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(LdrOpc),
476             ResultReg)
477       .addReg(ADRPReg)
478       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
479                         AArch64II::MO_NC | OpFlags);
480     if (!Subtarget->isTargetILP32())
481       return ResultReg;
482 
483     // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
484     // so we must extend the result on ILP32.
485     unsigned Result64 = createResultReg(&AArch64::GPR64RegClass);
486     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
487             TII.get(TargetOpcode::SUBREG_TO_REG))
488         .addDef(Result64)
489         .addImm(0)
490         .addReg(ResultReg, RegState::Kill)
491         .addImm(AArch64::sub_32);
492     return Result64;
493   } else {
494     // ADRP + ADDX
495     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
496             ADRPReg)
497         .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
498 
499     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
500     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
501             ResultReg)
502         .addReg(ADRPReg)
503         .addGlobalAddress(GV, 0,
504                           AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)
505         .addImm(0);
506   }
507   return ResultReg;
508 }
509 
fastMaterializeConstant(const Constant * C)510 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
511   EVT CEVT = TLI.getValueType(DL, C->getType(), true);
512 
513   // Only handle simple types.
514   if (!CEVT.isSimple())
515     return 0;
516   MVT VT = CEVT.getSimpleVT();
517   // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
518   // 'null' pointers need to have a somewhat special treatment.
519   if (isa<ConstantPointerNull>(C)) {
520     assert(VT == MVT::i64 && "Expected 64-bit pointers");
521     return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
522   }
523 
524   if (const auto *CI = dyn_cast<ConstantInt>(C))
525     return materializeInt(CI, VT);
526   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
527     return materializeFP(CFP, VT);
528   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
529     return materializeGV(GV);
530 
531   return 0;
532 }
533 
fastMaterializeFloatZero(const ConstantFP * CFP)534 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
535   assert(CFP->isNullValue() &&
536          "Floating-point constant is not a positive zero.");
537   MVT VT;
538   if (!isTypeLegal(CFP->getType(), VT))
539     return 0;
540 
541   if (VT != MVT::f32 && VT != MVT::f64)
542     return 0;
543 
544   bool Is64Bit = (VT == MVT::f64);
545   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
546   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
547   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
548 }
549 
550 /// Check if the multiply is by a power-of-2 constant.
isMulPowOf2(const Value * I)551 static bool isMulPowOf2(const Value *I) {
552   if (const auto *MI = dyn_cast<MulOperator>(I)) {
553     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
554       if (C->getValue().isPowerOf2())
555         return true;
556     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
557       if (C->getValue().isPowerOf2())
558         return true;
559   }
560   return false;
561 }
562 
563 // Computes the address to get to an object.
computeAddress(const Value * Obj,Address & Addr,Type * Ty)564 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
565 {
566   const User *U = nullptr;
567   unsigned Opcode = Instruction::UserOp1;
568   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
569     // Don't walk into other basic blocks unless the object is an alloca from
570     // another block, otherwise it may not have a virtual register assigned.
571     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
572         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
573       Opcode = I->getOpcode();
574       U = I;
575     }
576   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
577     Opcode = C->getOpcode();
578     U = C;
579   }
580 
581   if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
582     if (Ty->getAddressSpace() > 255)
583       // Fast instruction selection doesn't support the special
584       // address spaces.
585       return false;
586 
587   switch (Opcode) {
588   default:
589     break;
590   case Instruction::BitCast:
591     // Look through bitcasts.
592     return computeAddress(U->getOperand(0), Addr, Ty);
593 
594   case Instruction::IntToPtr:
595     // Look past no-op inttoptrs.
596     if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
597         TLI.getPointerTy(DL))
598       return computeAddress(U->getOperand(0), Addr, Ty);
599     break;
600 
601   case Instruction::PtrToInt:
602     // Look past no-op ptrtoints.
603     if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
604       return computeAddress(U->getOperand(0), Addr, Ty);
605     break;
606 
607   case Instruction::GetElementPtr: {
608     Address SavedAddr = Addr;
609     uint64_t TmpOffset = Addr.getOffset();
610 
611     // Iterate through the GEP folding the constants into offsets where
612     // we can.
613     for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
614          GTI != E; ++GTI) {
615       const Value *Op = GTI.getOperand();
616       if (StructType *STy = GTI.getStructTypeOrNull()) {
617         const StructLayout *SL = DL.getStructLayout(STy);
618         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
619         TmpOffset += SL->getElementOffset(Idx);
620       } else {
621         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
622         while (true) {
623           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
624             // Constant-offset addressing.
625             TmpOffset += CI->getSExtValue() * S;
626             break;
627           }
628           if (canFoldAddIntoGEP(U, Op)) {
629             // A compatible add with a constant operand. Fold the constant.
630             ConstantInt *CI =
631                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
632             TmpOffset += CI->getSExtValue() * S;
633             // Iterate on the other operand.
634             Op = cast<AddOperator>(Op)->getOperand(0);
635             continue;
636           }
637           // Unsupported
638           goto unsupported_gep;
639         }
640       }
641     }
642 
643     // Try to grab the base operand now.
644     Addr.setOffset(TmpOffset);
645     if (computeAddress(U->getOperand(0), Addr, Ty))
646       return true;
647 
648     // We failed, restore everything and try the other options.
649     Addr = SavedAddr;
650 
651   unsupported_gep:
652     break;
653   }
654   case Instruction::Alloca: {
655     const AllocaInst *AI = cast<AllocaInst>(Obj);
656     DenseMap<const AllocaInst *, int>::iterator SI =
657         FuncInfo.StaticAllocaMap.find(AI);
658     if (SI != FuncInfo.StaticAllocaMap.end()) {
659       Addr.setKind(Address::FrameIndexBase);
660       Addr.setFI(SI->second);
661       return true;
662     }
663     break;
664   }
665   case Instruction::Add: {
666     // Adds of constants are common and easy enough.
667     const Value *LHS = U->getOperand(0);
668     const Value *RHS = U->getOperand(1);
669 
670     if (isa<ConstantInt>(LHS))
671       std::swap(LHS, RHS);
672 
673     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
674       Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
675       return computeAddress(LHS, Addr, Ty);
676     }
677 
678     Address Backup = Addr;
679     if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
680       return true;
681     Addr = Backup;
682 
683     break;
684   }
685   case Instruction::Sub: {
686     // Subs of constants are common and easy enough.
687     const Value *LHS = U->getOperand(0);
688     const Value *RHS = U->getOperand(1);
689 
690     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
691       Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
692       return computeAddress(LHS, Addr, Ty);
693     }
694     break;
695   }
696   case Instruction::Shl: {
697     if (Addr.getOffsetReg())
698       break;
699 
700     const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
701     if (!CI)
702       break;
703 
704     unsigned Val = CI->getZExtValue();
705     if (Val < 1 || Val > 3)
706       break;
707 
708     uint64_t NumBytes = 0;
709     if (Ty && Ty->isSized()) {
710       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
711       NumBytes = NumBits / 8;
712       if (!isPowerOf2_64(NumBits))
713         NumBytes = 0;
714     }
715 
716     if (NumBytes != (1ULL << Val))
717       break;
718 
719     Addr.setShift(Val);
720     Addr.setExtendType(AArch64_AM::LSL);
721 
722     const Value *Src = U->getOperand(0);
723     if (const auto *I = dyn_cast<Instruction>(Src)) {
724       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
725         // Fold the zext or sext when it won't become a noop.
726         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
727           if (!isIntExtFree(ZE) &&
728               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
729             Addr.setExtendType(AArch64_AM::UXTW);
730             Src = ZE->getOperand(0);
731           }
732         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
733           if (!isIntExtFree(SE) &&
734               SE->getOperand(0)->getType()->isIntegerTy(32)) {
735             Addr.setExtendType(AArch64_AM::SXTW);
736             Src = SE->getOperand(0);
737           }
738         }
739       }
740     }
741 
742     if (const auto *AI = dyn_cast<BinaryOperator>(Src))
743       if (AI->getOpcode() == Instruction::And) {
744         const Value *LHS = AI->getOperand(0);
745         const Value *RHS = AI->getOperand(1);
746 
747         if (const auto *C = dyn_cast<ConstantInt>(LHS))
748           if (C->getValue() == 0xffffffff)
749             std::swap(LHS, RHS);
750 
751         if (const auto *C = dyn_cast<ConstantInt>(RHS))
752           if (C->getValue() == 0xffffffff) {
753             Addr.setExtendType(AArch64_AM::UXTW);
754             unsigned Reg = getRegForValue(LHS);
755             if (!Reg)
756               return false;
757             Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
758             Addr.setOffsetReg(Reg);
759             return true;
760           }
761       }
762 
763     unsigned Reg = getRegForValue(Src);
764     if (!Reg)
765       return false;
766     Addr.setOffsetReg(Reg);
767     return true;
768   }
769   case Instruction::Mul: {
770     if (Addr.getOffsetReg())
771       break;
772 
773     if (!isMulPowOf2(U))
774       break;
775 
776     const Value *LHS = U->getOperand(0);
777     const Value *RHS = U->getOperand(1);
778 
779     // Canonicalize power-of-2 value to the RHS.
780     if (const auto *C = dyn_cast<ConstantInt>(LHS))
781       if (C->getValue().isPowerOf2())
782         std::swap(LHS, RHS);
783 
784     assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
785     const auto *C = cast<ConstantInt>(RHS);
786     unsigned Val = C->getValue().logBase2();
787     if (Val < 1 || Val > 3)
788       break;
789 
790     uint64_t NumBytes = 0;
791     if (Ty && Ty->isSized()) {
792       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
793       NumBytes = NumBits / 8;
794       if (!isPowerOf2_64(NumBits))
795         NumBytes = 0;
796     }
797 
798     if (NumBytes != (1ULL << Val))
799       break;
800 
801     Addr.setShift(Val);
802     Addr.setExtendType(AArch64_AM::LSL);
803 
804     const Value *Src = LHS;
805     if (const auto *I = dyn_cast<Instruction>(Src)) {
806       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
807         // Fold the zext or sext when it won't become a noop.
808         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
809           if (!isIntExtFree(ZE) &&
810               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
811             Addr.setExtendType(AArch64_AM::UXTW);
812             Src = ZE->getOperand(0);
813           }
814         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
815           if (!isIntExtFree(SE) &&
816               SE->getOperand(0)->getType()->isIntegerTy(32)) {
817             Addr.setExtendType(AArch64_AM::SXTW);
818             Src = SE->getOperand(0);
819           }
820         }
821       }
822     }
823 
824     unsigned Reg = getRegForValue(Src);
825     if (!Reg)
826       return false;
827     Addr.setOffsetReg(Reg);
828     return true;
829   }
830   case Instruction::And: {
831     if (Addr.getOffsetReg())
832       break;
833 
834     if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
835       break;
836 
837     const Value *LHS = U->getOperand(0);
838     const Value *RHS = U->getOperand(1);
839 
840     if (const auto *C = dyn_cast<ConstantInt>(LHS))
841       if (C->getValue() == 0xffffffff)
842         std::swap(LHS, RHS);
843 
844     if (const auto *C = dyn_cast<ConstantInt>(RHS))
845       if (C->getValue() == 0xffffffff) {
846         Addr.setShift(0);
847         Addr.setExtendType(AArch64_AM::LSL);
848         Addr.setExtendType(AArch64_AM::UXTW);
849 
850         unsigned Reg = getRegForValue(LHS);
851         if (!Reg)
852           return false;
853         Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
854         Addr.setOffsetReg(Reg);
855         return true;
856       }
857     break;
858   }
859   case Instruction::SExt:
860   case Instruction::ZExt: {
861     if (!Addr.getReg() || Addr.getOffsetReg())
862       break;
863 
864     const Value *Src = nullptr;
865     // Fold the zext or sext when it won't become a noop.
866     if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
867       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
868         Addr.setExtendType(AArch64_AM::UXTW);
869         Src = ZE->getOperand(0);
870       }
871     } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
872       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
873         Addr.setExtendType(AArch64_AM::SXTW);
874         Src = SE->getOperand(0);
875       }
876     }
877 
878     if (!Src)
879       break;
880 
881     Addr.setShift(0);
882     unsigned Reg = getRegForValue(Src);
883     if (!Reg)
884       return false;
885     Addr.setOffsetReg(Reg);
886     return true;
887   }
888   } // end switch
889 
890   if (Addr.isRegBase() && !Addr.getReg()) {
891     unsigned Reg = getRegForValue(Obj);
892     if (!Reg)
893       return false;
894     Addr.setReg(Reg);
895     return true;
896   }
897 
898   if (!Addr.getOffsetReg()) {
899     unsigned Reg = getRegForValue(Obj);
900     if (!Reg)
901       return false;
902     Addr.setOffsetReg(Reg);
903     return true;
904   }
905 
906   return false;
907 }
908 
computeCallAddress(const Value * V,Address & Addr)909 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
910   const User *U = nullptr;
911   unsigned Opcode = Instruction::UserOp1;
912   bool InMBB = true;
913 
914   if (const auto *I = dyn_cast<Instruction>(V)) {
915     Opcode = I->getOpcode();
916     U = I;
917     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
918   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
919     Opcode = C->getOpcode();
920     U = C;
921   }
922 
923   switch (Opcode) {
924   default: break;
925   case Instruction::BitCast:
926     // Look past bitcasts if its operand is in the same BB.
927     if (InMBB)
928       return computeCallAddress(U->getOperand(0), Addr);
929     break;
930   case Instruction::IntToPtr:
931     // Look past no-op inttoptrs if its operand is in the same BB.
932     if (InMBB &&
933         TLI.getValueType(DL, U->getOperand(0)->getType()) ==
934             TLI.getPointerTy(DL))
935       return computeCallAddress(U->getOperand(0), Addr);
936     break;
937   case Instruction::PtrToInt:
938     // Look past no-op ptrtoints if its operand is in the same BB.
939     if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
940       return computeCallAddress(U->getOperand(0), Addr);
941     break;
942   }
943 
944   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
945     Addr.setGlobalValue(GV);
946     return true;
947   }
948 
949   // If all else fails, try to materialize the value in a register.
950   if (!Addr.getGlobalValue()) {
951     Addr.setReg(getRegForValue(V));
952     return Addr.getReg() != 0;
953   }
954 
955   return false;
956 }
957 
isTypeLegal(Type * Ty,MVT & VT)958 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
959   EVT evt = TLI.getValueType(DL, Ty, true);
960 
961   if (Subtarget->isTargetILP32() && Ty->isPointerTy())
962     return false;
963 
964   // Only handle simple types.
965   if (evt == MVT::Other || !evt.isSimple())
966     return false;
967   VT = evt.getSimpleVT();
968 
969   // This is a legal type, but it's not something we handle in fast-isel.
970   if (VT == MVT::f128)
971     return false;
972 
973   // Handle all other legal types, i.e. a register that will directly hold this
974   // value.
975   return TLI.isTypeLegal(VT);
976 }
977 
978 /// Determine if the value type is supported by FastISel.
979 ///
980 /// FastISel for AArch64 can handle more value types than are legal. This adds
981 /// simple value type such as i1, i8, and i16.
isTypeSupported(Type * Ty,MVT & VT,bool IsVectorAllowed)982 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
983   if (Ty->isVectorTy() && !IsVectorAllowed)
984     return false;
985 
986   if (isTypeLegal(Ty, VT))
987     return true;
988 
989   // If this is a type than can be sign or zero-extended to a basic operation
990   // go ahead and accept it now.
991   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
992     return true;
993 
994   return false;
995 }
996 
isValueAvailable(const Value * V) const997 bool AArch64FastISel::isValueAvailable(const Value *V) const {
998   if (!isa<Instruction>(V))
999     return true;
1000 
1001   const auto *I = cast<Instruction>(V);
1002   return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
1003 }
1004 
simplifyAddress(Address & Addr,MVT VT)1005 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1006   if (Subtarget->isTargetILP32())
1007     return false;
1008 
1009   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1010   if (!ScaleFactor)
1011     return false;
1012 
1013   bool ImmediateOffsetNeedsLowering = false;
1014   bool RegisterOffsetNeedsLowering = false;
1015   int64_t Offset = Addr.getOffset();
1016   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1017     ImmediateOffsetNeedsLowering = true;
1018   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1019            !isUInt<12>(Offset / ScaleFactor))
1020     ImmediateOffsetNeedsLowering = true;
1021 
1022   // Cannot encode an offset register and an immediate offset in the same
1023   // instruction. Fold the immediate offset into the load/store instruction and
1024   // emit an additional add to take care of the offset register.
1025   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1026     RegisterOffsetNeedsLowering = true;
1027 
1028   // Cannot encode zero register as base.
1029   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1030     RegisterOffsetNeedsLowering = true;
1031 
1032   // If this is a stack pointer and the offset needs to be simplified then put
1033   // the alloca address into a register, set the base type back to register and
1034   // continue. This should almost never happen.
1035   if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1036   {
1037     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1038     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
1039             ResultReg)
1040       .addFrameIndex(Addr.getFI())
1041       .addImm(0)
1042       .addImm(0);
1043     Addr.setKind(Address::RegBase);
1044     Addr.setReg(ResultReg);
1045   }
1046 
1047   if (RegisterOffsetNeedsLowering) {
1048     unsigned ResultReg = 0;
1049     if (Addr.getReg()) {
1050       if (Addr.getExtendType() == AArch64_AM::SXTW ||
1051           Addr.getExtendType() == AArch64_AM::UXTW   )
1052         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1053                                   Addr.getOffsetReg(), Addr.getExtendType(),
1054                                   Addr.getShift());
1055       else
1056         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1057                                   Addr.getOffsetReg(), AArch64_AM::LSL,
1058                                   Addr.getShift());
1059     } else {
1060       if (Addr.getExtendType() == AArch64_AM::UXTW)
1061         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1062                                Addr.getShift(), /*IsZExt=*/true);
1063       else if (Addr.getExtendType() == AArch64_AM::SXTW)
1064         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1065                                Addr.getShift(), /*IsZExt=*/false);
1066       else
1067         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1068                                Addr.getShift());
1069     }
1070     if (!ResultReg)
1071       return false;
1072 
1073     Addr.setReg(ResultReg);
1074     Addr.setOffsetReg(0);
1075     Addr.setShift(0);
1076     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1077   }
1078 
1079   // Since the offset is too large for the load/store instruction get the
1080   // reg+offset into a register.
1081   if (ImmediateOffsetNeedsLowering) {
1082     unsigned ResultReg;
1083     if (Addr.getReg())
1084       // Try to fold the immediate into the add instruction.
1085       ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1086     else
1087       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1088 
1089     if (!ResultReg)
1090       return false;
1091     Addr.setReg(ResultReg);
1092     Addr.setOffset(0);
1093   }
1094   return true;
1095 }
1096 
addLoadStoreOperands(Address & Addr,const MachineInstrBuilder & MIB,MachineMemOperand::Flags Flags,unsigned ScaleFactor,MachineMemOperand * MMO)1097 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1098                                            const MachineInstrBuilder &MIB,
1099                                            MachineMemOperand::Flags Flags,
1100                                            unsigned ScaleFactor,
1101                                            MachineMemOperand *MMO) {
1102   int64_t Offset = Addr.getOffset() / ScaleFactor;
1103   // Frame base works a bit differently. Handle it separately.
1104   if (Addr.isFIBase()) {
1105     int FI = Addr.getFI();
1106     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
1107     // and alignment should be based on the VT.
1108     MMO = FuncInfo.MF->getMachineMemOperand(
1109         MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1110         MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1111     // Now add the rest of the operands.
1112     MIB.addFrameIndex(FI).addImm(Offset);
1113   } else {
1114     assert(Addr.isRegBase() && "Unexpected address kind.");
1115     const MCInstrDesc &II = MIB->getDesc();
1116     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1117     Addr.setReg(
1118       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1119     Addr.setOffsetReg(
1120       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1121     if (Addr.getOffsetReg()) {
1122       assert(Addr.getOffset() == 0 && "Unexpected offset");
1123       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1124                       Addr.getExtendType() == AArch64_AM::SXTX;
1125       MIB.addReg(Addr.getReg());
1126       MIB.addReg(Addr.getOffsetReg());
1127       MIB.addImm(IsSigned);
1128       MIB.addImm(Addr.getShift() != 0);
1129     } else
1130       MIB.addReg(Addr.getReg()).addImm(Offset);
1131   }
1132 
1133   if (MMO)
1134     MIB.addMemOperand(MMO);
1135 }
1136 
emitAddSub(bool UseAdd,MVT RetVT,const Value * LHS,const Value * RHS,bool SetFlags,bool WantResult,bool IsZExt)1137 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1138                                      const Value *RHS, bool SetFlags,
1139                                      bool WantResult,  bool IsZExt) {
1140   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1141   bool NeedExtend = false;
1142   switch (RetVT.SimpleTy) {
1143   default:
1144     return 0;
1145   case MVT::i1:
1146     NeedExtend = true;
1147     break;
1148   case MVT::i8:
1149     NeedExtend = true;
1150     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1151     break;
1152   case MVT::i16:
1153     NeedExtend = true;
1154     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1155     break;
1156   case MVT::i32:  // fall-through
1157   case MVT::i64:
1158     break;
1159   }
1160   MVT SrcVT = RetVT;
1161   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1162 
1163   // Canonicalize immediates to the RHS first.
1164   if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1165     std::swap(LHS, RHS);
1166 
1167   // Canonicalize mul by power of 2 to the RHS.
1168   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1169     if (isMulPowOf2(LHS))
1170       std::swap(LHS, RHS);
1171 
1172   // Canonicalize shift immediate to the RHS.
1173   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1174     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1175       if (isa<ConstantInt>(SI->getOperand(1)))
1176         if (SI->getOpcode() == Instruction::Shl  ||
1177             SI->getOpcode() == Instruction::LShr ||
1178             SI->getOpcode() == Instruction::AShr   )
1179           std::swap(LHS, RHS);
1180 
1181   unsigned LHSReg = getRegForValue(LHS);
1182   if (!LHSReg)
1183     return 0;
1184 
1185   if (NeedExtend)
1186     LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1187 
1188   unsigned ResultReg = 0;
1189   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1190     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1191     if (C->isNegative())
1192       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1193                                 WantResult);
1194     else
1195       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1196                                 WantResult);
1197   } else if (const auto *C = dyn_cast<Constant>(RHS))
1198     if (C->isNullValue())
1199       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1200 
1201   if (ResultReg)
1202     return ResultReg;
1203 
1204   // Only extend the RHS within the instruction if there is a valid extend type.
1205   if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1206       isValueAvailable(RHS)) {
1207     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1208       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1209         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1210           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1211           if (!RHSReg)
1212             return 0;
1213           return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType,
1214                                C->getZExtValue(), SetFlags, WantResult);
1215         }
1216     unsigned RHSReg = getRegForValue(RHS);
1217     if (!RHSReg)
1218       return 0;
1219     return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1220                          SetFlags, WantResult);
1221   }
1222 
1223   // Check if the mul can be folded into the instruction.
1224   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1225     if (isMulPowOf2(RHS)) {
1226       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1227       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1228 
1229       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1230         if (C->getValue().isPowerOf2())
1231           std::swap(MulLHS, MulRHS);
1232 
1233       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1234       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1235       unsigned RHSReg = getRegForValue(MulLHS);
1236       if (!RHSReg)
1237         return 0;
1238       ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1239                                 ShiftVal, SetFlags, WantResult);
1240       if (ResultReg)
1241         return ResultReg;
1242     }
1243   }
1244 
1245   // Check if the shift can be folded into the instruction.
1246   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1247     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1248       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1249         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1250         switch (SI->getOpcode()) {
1251         default: break;
1252         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
1253         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1254         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1255         }
1256         uint64_t ShiftVal = C->getZExtValue();
1257         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1258           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1259           if (!RHSReg)
1260             return 0;
1261           ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1262                                     ShiftVal, SetFlags, WantResult);
1263           if (ResultReg)
1264             return ResultReg;
1265         }
1266       }
1267     }
1268   }
1269 
1270   unsigned RHSReg = getRegForValue(RHS);
1271   if (!RHSReg)
1272     return 0;
1273 
1274   if (NeedExtend)
1275     RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1276 
1277   return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1278 }
1279 
emitAddSub_rr(bool UseAdd,MVT RetVT,unsigned LHSReg,unsigned RHSReg,bool SetFlags,bool WantResult)1280 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1281                                         unsigned RHSReg, bool SetFlags,
1282                                         bool WantResult) {
1283   assert(LHSReg && RHSReg && "Invalid register number.");
1284 
1285   if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1286       RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1287     return 0;
1288 
1289   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1290     return 0;
1291 
1292   static const unsigned OpcTable[2][2][2] = {
1293     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
1294       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
1295     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1296       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
1297   };
1298   bool Is64Bit = RetVT == MVT::i64;
1299   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1300   const TargetRegisterClass *RC =
1301       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1302   unsigned ResultReg;
1303   if (WantResult)
1304     ResultReg = createResultReg(RC);
1305   else
1306     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1307 
1308   const MCInstrDesc &II = TII.get(Opc);
1309   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1310   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1311   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1312       .addReg(LHSReg)
1313       .addReg(RHSReg);
1314   return ResultReg;
1315 }
1316 
emitAddSub_ri(bool UseAdd,MVT RetVT,unsigned LHSReg,uint64_t Imm,bool SetFlags,bool WantResult)1317 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1318                                         uint64_t Imm, bool SetFlags,
1319                                         bool WantResult) {
1320   assert(LHSReg && "Invalid register number.");
1321 
1322   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1323     return 0;
1324 
1325   unsigned ShiftImm;
1326   if (isUInt<12>(Imm))
1327     ShiftImm = 0;
1328   else if ((Imm & 0xfff000) == Imm) {
1329     ShiftImm = 12;
1330     Imm >>= 12;
1331   } else
1332     return 0;
1333 
1334   static const unsigned OpcTable[2][2][2] = {
1335     { { AArch64::SUBWri,  AArch64::SUBXri  },
1336       { AArch64::ADDWri,  AArch64::ADDXri  }  },
1337     { { AArch64::SUBSWri, AArch64::SUBSXri },
1338       { AArch64::ADDSWri, AArch64::ADDSXri }  }
1339   };
1340   bool Is64Bit = RetVT == MVT::i64;
1341   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1342   const TargetRegisterClass *RC;
1343   if (SetFlags)
1344     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1345   else
1346     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1347   unsigned ResultReg;
1348   if (WantResult)
1349     ResultReg = createResultReg(RC);
1350   else
1351     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1352 
1353   const MCInstrDesc &II = TII.get(Opc);
1354   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1355   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1356       .addReg(LHSReg)
1357       .addImm(Imm)
1358       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1359   return ResultReg;
1360 }
1361 
emitAddSub_rs(bool UseAdd,MVT RetVT,unsigned LHSReg,unsigned RHSReg,AArch64_AM::ShiftExtendType ShiftType,uint64_t ShiftImm,bool SetFlags,bool WantResult)1362 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1363                                         unsigned RHSReg,
1364                                         AArch64_AM::ShiftExtendType ShiftType,
1365                                         uint64_t ShiftImm, bool SetFlags,
1366                                         bool WantResult) {
1367   assert(LHSReg && RHSReg && "Invalid register number.");
1368   assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1369          RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1370 
1371   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1372     return 0;
1373 
1374   // Don't deal with undefined shifts.
1375   if (ShiftImm >= RetVT.getSizeInBits())
1376     return 0;
1377 
1378   static const unsigned OpcTable[2][2][2] = {
1379     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1380       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1381     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1382       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1383   };
1384   bool Is64Bit = RetVT == MVT::i64;
1385   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1386   const TargetRegisterClass *RC =
1387       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1388   unsigned ResultReg;
1389   if (WantResult)
1390     ResultReg = createResultReg(RC);
1391   else
1392     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1393 
1394   const MCInstrDesc &II = TII.get(Opc);
1395   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1396   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1397   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1398       .addReg(LHSReg)
1399       .addReg(RHSReg)
1400       .addImm(getShifterImm(ShiftType, ShiftImm));
1401   return ResultReg;
1402 }
1403 
emitAddSub_rx(bool UseAdd,MVT RetVT,unsigned LHSReg,unsigned RHSReg,AArch64_AM::ShiftExtendType ExtType,uint64_t ShiftImm,bool SetFlags,bool WantResult)1404 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1405                                         unsigned RHSReg,
1406                                         AArch64_AM::ShiftExtendType ExtType,
1407                                         uint64_t ShiftImm, bool SetFlags,
1408                                         bool WantResult) {
1409   assert(LHSReg && RHSReg && "Invalid register number.");
1410   assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1411          RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1412 
1413   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1414     return 0;
1415 
1416   if (ShiftImm >= 4)
1417     return 0;
1418 
1419   static const unsigned OpcTable[2][2][2] = {
1420     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1421       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1422     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1423       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1424   };
1425   bool Is64Bit = RetVT == MVT::i64;
1426   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1427   const TargetRegisterClass *RC = nullptr;
1428   if (SetFlags)
1429     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1430   else
1431     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1432   unsigned ResultReg;
1433   if (WantResult)
1434     ResultReg = createResultReg(RC);
1435   else
1436     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1437 
1438   const MCInstrDesc &II = TII.get(Opc);
1439   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1440   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1441   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1442       .addReg(LHSReg)
1443       .addReg(RHSReg)
1444       .addImm(getArithExtendImm(ExtType, ShiftImm));
1445   return ResultReg;
1446 }
1447 
emitCmp(const Value * LHS,const Value * RHS,bool IsZExt)1448 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1449   Type *Ty = LHS->getType();
1450   EVT EVT = TLI.getValueType(DL, Ty, true);
1451   if (!EVT.isSimple())
1452     return false;
1453   MVT VT = EVT.getSimpleVT();
1454 
1455   switch (VT.SimpleTy) {
1456   default:
1457     return false;
1458   case MVT::i1:
1459   case MVT::i8:
1460   case MVT::i16:
1461   case MVT::i32:
1462   case MVT::i64:
1463     return emitICmp(VT, LHS, RHS, IsZExt);
1464   case MVT::f32:
1465   case MVT::f64:
1466     return emitFCmp(VT, LHS, RHS);
1467   }
1468 }
1469 
emitICmp(MVT RetVT,const Value * LHS,const Value * RHS,bool IsZExt)1470 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1471                                bool IsZExt) {
1472   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1473                  IsZExt) != 0;
1474 }
1475 
emitICmp_ri(MVT RetVT,unsigned LHSReg,uint64_t Imm)1476 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {
1477   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1478                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
1479 }
1480 
emitFCmp(MVT RetVT,const Value * LHS,const Value * RHS)1481 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1482   if (RetVT != MVT::f32 && RetVT != MVT::f64)
1483     return false;
1484 
1485   // Check to see if the 2nd operand is a constant that we can encode directly
1486   // in the compare.
1487   bool UseImm = false;
1488   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1489     if (CFP->isZero() && !CFP->isNegative())
1490       UseImm = true;
1491 
1492   unsigned LHSReg = getRegForValue(LHS);
1493   if (!LHSReg)
1494     return false;
1495 
1496   if (UseImm) {
1497     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1498     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1499         .addReg(LHSReg);
1500     return true;
1501   }
1502 
1503   unsigned RHSReg = getRegForValue(RHS);
1504   if (!RHSReg)
1505     return false;
1506 
1507   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1508   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1509       .addReg(LHSReg)
1510       .addReg(RHSReg);
1511   return true;
1512 }
1513 
emitAdd(MVT RetVT,const Value * LHS,const Value * RHS,bool SetFlags,bool WantResult,bool IsZExt)1514 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1515                                   bool SetFlags, bool WantResult, bool IsZExt) {
1516   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1517                     IsZExt);
1518 }
1519 
1520 /// This method is a wrapper to simplify add emission.
1521 ///
1522 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1523 /// that fails, then try to materialize the immediate into a register and use
1524 /// emitAddSub_rr instead.
emitAdd_ri_(MVT VT,unsigned Op0,int64_t Imm)1525 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {
1526   unsigned ResultReg;
1527   if (Imm < 0)
1528     ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1529   else
1530     ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1531 
1532   if (ResultReg)
1533     return ResultReg;
1534 
1535   unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1536   if (!CReg)
1537     return 0;
1538 
1539   ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1540   return ResultReg;
1541 }
1542 
emitSub(MVT RetVT,const Value * LHS,const Value * RHS,bool SetFlags,bool WantResult,bool IsZExt)1543 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1544                                   bool SetFlags, bool WantResult, bool IsZExt) {
1545   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1546                     IsZExt);
1547 }
1548 
emitSubs_rr(MVT RetVT,unsigned LHSReg,unsigned RHSReg,bool WantResult)1549 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1550                                       unsigned RHSReg, bool WantResult) {
1551   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1552                        /*SetFlags=*/true, WantResult);
1553 }
1554 
emitSubs_rs(MVT RetVT,unsigned LHSReg,unsigned RHSReg,AArch64_AM::ShiftExtendType ShiftType,uint64_t ShiftImm,bool WantResult)1555 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1556                                       unsigned RHSReg,
1557                                       AArch64_AM::ShiftExtendType ShiftType,
1558                                       uint64_t ShiftImm, bool WantResult) {
1559   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1560                        ShiftImm, /*SetFlags=*/true, WantResult);
1561 }
1562 
emitLogicalOp(unsigned ISDOpc,MVT RetVT,const Value * LHS,const Value * RHS)1563 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1564                                         const Value *LHS, const Value *RHS) {
1565   // Canonicalize immediates to the RHS first.
1566   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1567     std::swap(LHS, RHS);
1568 
1569   // Canonicalize mul by power-of-2 to the RHS.
1570   if (LHS->hasOneUse() && isValueAvailable(LHS))
1571     if (isMulPowOf2(LHS))
1572       std::swap(LHS, RHS);
1573 
1574   // Canonicalize shift immediate to the RHS.
1575   if (LHS->hasOneUse() && isValueAvailable(LHS))
1576     if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1577       if (isa<ConstantInt>(SI->getOperand(1)))
1578         std::swap(LHS, RHS);
1579 
1580   unsigned LHSReg = getRegForValue(LHS);
1581   if (!LHSReg)
1582     return 0;
1583 
1584   unsigned ResultReg = 0;
1585   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1586     uint64_t Imm = C->getZExtValue();
1587     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1588   }
1589   if (ResultReg)
1590     return ResultReg;
1591 
1592   // Check if the mul can be folded into the instruction.
1593   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1594     if (isMulPowOf2(RHS)) {
1595       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1596       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1597 
1598       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1599         if (C->getValue().isPowerOf2())
1600           std::swap(MulLHS, MulRHS);
1601 
1602       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1603       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1604 
1605       unsigned RHSReg = getRegForValue(MulLHS);
1606       if (!RHSReg)
1607         return 0;
1608       ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1609       if (ResultReg)
1610         return ResultReg;
1611     }
1612   }
1613 
1614   // Check if the shift can be folded into the instruction.
1615   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1616     if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1617       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1618         uint64_t ShiftVal = C->getZExtValue();
1619         unsigned RHSReg = getRegForValue(SI->getOperand(0));
1620         if (!RHSReg)
1621           return 0;
1622         ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1623         if (ResultReg)
1624           return ResultReg;
1625       }
1626   }
1627 
1628   unsigned RHSReg = getRegForValue(RHS);
1629   if (!RHSReg)
1630     return 0;
1631 
1632   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1633   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1634   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1635     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1636     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1637   }
1638   return ResultReg;
1639 }
1640 
emitLogicalOp_ri(unsigned ISDOpc,MVT RetVT,unsigned LHSReg,uint64_t Imm)1641 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1642                                            unsigned LHSReg, uint64_t Imm) {
1643   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1644                 "ISD nodes are not consecutive!");
1645   static const unsigned OpcTable[3][2] = {
1646     { AArch64::ANDWri, AArch64::ANDXri },
1647     { AArch64::ORRWri, AArch64::ORRXri },
1648     { AArch64::EORWri, AArch64::EORXri }
1649   };
1650   const TargetRegisterClass *RC;
1651   unsigned Opc;
1652   unsigned RegSize;
1653   switch (RetVT.SimpleTy) {
1654   default:
1655     return 0;
1656   case MVT::i1:
1657   case MVT::i8:
1658   case MVT::i16:
1659   case MVT::i32: {
1660     unsigned Idx = ISDOpc - ISD::AND;
1661     Opc = OpcTable[Idx][0];
1662     RC = &AArch64::GPR32spRegClass;
1663     RegSize = 32;
1664     break;
1665   }
1666   case MVT::i64:
1667     Opc = OpcTable[ISDOpc - ISD::AND][1];
1668     RC = &AArch64::GPR64spRegClass;
1669     RegSize = 64;
1670     break;
1671   }
1672 
1673   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1674     return 0;
1675 
1676   unsigned ResultReg =
1677       fastEmitInst_ri(Opc, RC, LHSReg,
1678                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1679   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1680     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1681     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1682   }
1683   return ResultReg;
1684 }
1685 
emitLogicalOp_rs(unsigned ISDOpc,MVT RetVT,unsigned LHSReg,unsigned RHSReg,uint64_t ShiftImm)1686 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1687                                            unsigned LHSReg, unsigned RHSReg,
1688                                            uint64_t ShiftImm) {
1689   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1690                 "ISD nodes are not consecutive!");
1691   static const unsigned OpcTable[3][2] = {
1692     { AArch64::ANDWrs, AArch64::ANDXrs },
1693     { AArch64::ORRWrs, AArch64::ORRXrs },
1694     { AArch64::EORWrs, AArch64::EORXrs }
1695   };
1696 
1697   // Don't deal with undefined shifts.
1698   if (ShiftImm >= RetVT.getSizeInBits())
1699     return 0;
1700 
1701   const TargetRegisterClass *RC;
1702   unsigned Opc;
1703   switch (RetVT.SimpleTy) {
1704   default:
1705     return 0;
1706   case MVT::i1:
1707   case MVT::i8:
1708   case MVT::i16:
1709   case MVT::i32:
1710     Opc = OpcTable[ISDOpc - ISD::AND][0];
1711     RC = &AArch64::GPR32RegClass;
1712     break;
1713   case MVT::i64:
1714     Opc = OpcTable[ISDOpc - ISD::AND][1];
1715     RC = &AArch64::GPR64RegClass;
1716     break;
1717   }
1718   unsigned ResultReg =
1719       fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1720                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1721   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1722     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1723     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1724   }
1725   return ResultReg;
1726 }
1727 
emitAnd_ri(MVT RetVT,unsigned LHSReg,uint64_t Imm)1728 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,
1729                                      uint64_t Imm) {
1730   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1731 }
1732 
emitLoad(MVT VT,MVT RetVT,Address Addr,bool WantZExt,MachineMemOperand * MMO)1733 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1734                                    bool WantZExt, MachineMemOperand *MMO) {
1735   if (!TLI.allowsMisalignedMemoryAccesses(VT))
1736     return 0;
1737 
1738   // Simplify this down to something we can handle.
1739   if (!simplifyAddress(Addr, VT))
1740     return 0;
1741 
1742   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1743   if (!ScaleFactor)
1744     llvm_unreachable("Unexpected value type.");
1745 
1746   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1747   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1748   bool UseScaled = true;
1749   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1750     UseScaled = false;
1751     ScaleFactor = 1;
1752   }
1753 
1754   static const unsigned GPOpcTable[2][8][4] = {
1755     // Sign-extend.
1756     { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
1757         AArch64::LDURXi  },
1758       { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
1759         AArch64::LDURXi  },
1760       { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
1761         AArch64::LDRXui  },
1762       { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
1763         AArch64::LDRXui  },
1764       { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1765         AArch64::LDRXroX },
1766       { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1767         AArch64::LDRXroX },
1768       { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1769         AArch64::LDRXroW },
1770       { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1771         AArch64::LDRXroW }
1772     },
1773     // Zero-extend.
1774     { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1775         AArch64::LDURXi  },
1776       { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1777         AArch64::LDURXi  },
1778       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1779         AArch64::LDRXui  },
1780       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1781         AArch64::LDRXui  },
1782       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1783         AArch64::LDRXroX },
1784       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1785         AArch64::LDRXroX },
1786       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1787         AArch64::LDRXroW },
1788       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1789         AArch64::LDRXroW }
1790     }
1791   };
1792 
1793   static const unsigned FPOpcTable[4][2] = {
1794     { AArch64::LDURSi,  AArch64::LDURDi  },
1795     { AArch64::LDRSui,  AArch64::LDRDui  },
1796     { AArch64::LDRSroX, AArch64::LDRDroX },
1797     { AArch64::LDRSroW, AArch64::LDRDroW }
1798   };
1799 
1800   unsigned Opc;
1801   const TargetRegisterClass *RC;
1802   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1803                       Addr.getOffsetReg();
1804   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1805   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1806       Addr.getExtendType() == AArch64_AM::SXTW)
1807     Idx++;
1808 
1809   bool IsRet64Bit = RetVT == MVT::i64;
1810   switch (VT.SimpleTy) {
1811   default:
1812     llvm_unreachable("Unexpected value type.");
1813   case MVT::i1: // Intentional fall-through.
1814   case MVT::i8:
1815     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1816     RC = (IsRet64Bit && !WantZExt) ?
1817              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1818     break;
1819   case MVT::i16:
1820     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1821     RC = (IsRet64Bit && !WantZExt) ?
1822              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1823     break;
1824   case MVT::i32:
1825     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1826     RC = (IsRet64Bit && !WantZExt) ?
1827              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1828     break;
1829   case MVT::i64:
1830     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1831     RC = &AArch64::GPR64RegClass;
1832     break;
1833   case MVT::f32:
1834     Opc = FPOpcTable[Idx][0];
1835     RC = &AArch64::FPR32RegClass;
1836     break;
1837   case MVT::f64:
1838     Opc = FPOpcTable[Idx][1];
1839     RC = &AArch64::FPR64RegClass;
1840     break;
1841   }
1842 
1843   // Create the base instruction, then add the operands.
1844   unsigned ResultReg = createResultReg(RC);
1845   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1846                                     TII.get(Opc), ResultReg);
1847   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1848 
1849   // Loading an i1 requires special handling.
1850   if (VT == MVT::i1) {
1851     unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1852     assert(ANDReg && "Unexpected AND instruction emission failure.");
1853     ResultReg = ANDReg;
1854   }
1855 
1856   // For zero-extending loads to 64bit we emit a 32bit load and then convert
1857   // the 32bit reg to a 64bit reg.
1858   if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1859     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1860     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1861             TII.get(AArch64::SUBREG_TO_REG), Reg64)
1862         .addImm(0)
1863         .addReg(ResultReg, getKillRegState(true))
1864         .addImm(AArch64::sub_32);
1865     ResultReg = Reg64;
1866   }
1867   return ResultReg;
1868 }
1869 
selectAddSub(const Instruction * I)1870 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1871   MVT VT;
1872   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1873     return false;
1874 
1875   if (VT.isVector())
1876     return selectOperator(I, I->getOpcode());
1877 
1878   unsigned ResultReg;
1879   switch (I->getOpcode()) {
1880   default:
1881     llvm_unreachable("Unexpected instruction.");
1882   case Instruction::Add:
1883     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1884     break;
1885   case Instruction::Sub:
1886     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1887     break;
1888   }
1889   if (!ResultReg)
1890     return false;
1891 
1892   updateValueMap(I, ResultReg);
1893   return true;
1894 }
1895 
selectLogicalOp(const Instruction * I)1896 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1897   MVT VT;
1898   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1899     return false;
1900 
1901   if (VT.isVector())
1902     return selectOperator(I, I->getOpcode());
1903 
1904   unsigned ResultReg;
1905   switch (I->getOpcode()) {
1906   default:
1907     llvm_unreachable("Unexpected instruction.");
1908   case Instruction::And:
1909     ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1910     break;
1911   case Instruction::Or:
1912     ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1913     break;
1914   case Instruction::Xor:
1915     ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1916     break;
1917   }
1918   if (!ResultReg)
1919     return false;
1920 
1921   updateValueMap(I, ResultReg);
1922   return true;
1923 }
1924 
selectLoad(const Instruction * I)1925 bool AArch64FastISel::selectLoad(const Instruction *I) {
1926   MVT VT;
1927   // Verify we have a legal type before going any further.  Currently, we handle
1928   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1929   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1930   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1931       cast<LoadInst>(I)->isAtomic())
1932     return false;
1933 
1934   const Value *SV = I->getOperand(0);
1935   if (TLI.supportSwiftError()) {
1936     // Swifterror values can come from either a function parameter with
1937     // swifterror attribute or an alloca with swifterror attribute.
1938     if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1939       if (Arg->hasSwiftErrorAttr())
1940         return false;
1941     }
1942 
1943     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1944       if (Alloca->isSwiftError())
1945         return false;
1946     }
1947   }
1948 
1949   // See if we can handle this address.
1950   Address Addr;
1951   if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1952     return false;
1953 
1954   // Fold the following sign-/zero-extend into the load instruction.
1955   bool WantZExt = true;
1956   MVT RetVT = VT;
1957   const Value *IntExtVal = nullptr;
1958   if (I->hasOneUse()) {
1959     if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1960       if (isTypeSupported(ZE->getType(), RetVT))
1961         IntExtVal = ZE;
1962       else
1963         RetVT = VT;
1964     } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1965       if (isTypeSupported(SE->getType(), RetVT))
1966         IntExtVal = SE;
1967       else
1968         RetVT = VT;
1969       WantZExt = false;
1970     }
1971   }
1972 
1973   unsigned ResultReg =
1974       emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1975   if (!ResultReg)
1976     return false;
1977 
1978   // There are a few different cases we have to handle, because the load or the
1979   // sign-/zero-extend might not be selected by FastISel if we fall-back to
1980   // SelectionDAG. There is also an ordering issue when both instructions are in
1981   // different basic blocks.
1982   // 1.) The load instruction is selected by FastISel, but the integer extend
1983   //     not. This usually happens when the integer extend is in a different
1984   //     basic block and SelectionDAG took over for that basic block.
1985   // 2.) The load instruction is selected before the integer extend. This only
1986   //     happens when the integer extend is in a different basic block.
1987   // 3.) The load instruction is selected by SelectionDAG and the integer extend
1988   //     by FastISel. This happens if there are instructions between the load
1989   //     and the integer extend that couldn't be selected by FastISel.
1990   if (IntExtVal) {
1991     // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
1992     // could select it. Emit a copy to subreg if necessary. FastISel will remove
1993     // it when it selects the integer extend.
1994     unsigned Reg = lookUpRegForValue(IntExtVal);
1995     auto *MI = MRI.getUniqueVRegDef(Reg);
1996     if (!MI) {
1997       if (RetVT == MVT::i64 && VT <= MVT::i32) {
1998         if (WantZExt) {
1999           // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2000           MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2001           ResultReg = std::prev(I)->getOperand(0).getReg();
2002           removeDeadCode(I, std::next(I));
2003         } else
2004           ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2005                                                  AArch64::sub_32);
2006       }
2007       updateValueMap(I, ResultReg);
2008       return true;
2009     }
2010 
2011     // The integer extend has already been emitted - delete all the instructions
2012     // that have been emitted by the integer extend lowering code and use the
2013     // result from the load instruction directly.
2014     while (MI) {
2015       Reg = 0;
2016       for (auto &Opnd : MI->uses()) {
2017         if (Opnd.isReg()) {
2018           Reg = Opnd.getReg();
2019           break;
2020         }
2021       }
2022       MachineBasicBlock::iterator I(MI);
2023       removeDeadCode(I, std::next(I));
2024       MI = nullptr;
2025       if (Reg)
2026         MI = MRI.getUniqueVRegDef(Reg);
2027     }
2028     updateValueMap(IntExtVal, ResultReg);
2029     return true;
2030   }
2031 
2032   updateValueMap(I, ResultReg);
2033   return true;
2034 }
2035 
emitStoreRelease(MVT VT,unsigned SrcReg,unsigned AddrReg,MachineMemOperand * MMO)2036 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2037                                        unsigned AddrReg,
2038                                        MachineMemOperand *MMO) {
2039   unsigned Opc;
2040   switch (VT.SimpleTy) {
2041   default: return false;
2042   case MVT::i8:  Opc = AArch64::STLRB; break;
2043   case MVT::i16: Opc = AArch64::STLRH; break;
2044   case MVT::i32: Opc = AArch64::STLRW; break;
2045   case MVT::i64: Opc = AArch64::STLRX; break;
2046   }
2047 
2048   const MCInstrDesc &II = TII.get(Opc);
2049   SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2050   AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2051   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2052       .addReg(SrcReg)
2053       .addReg(AddrReg)
2054       .addMemOperand(MMO);
2055   return true;
2056 }
2057 
emitStore(MVT VT,unsigned SrcReg,Address Addr,MachineMemOperand * MMO)2058 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2059                                 MachineMemOperand *MMO) {
2060   if (!TLI.allowsMisalignedMemoryAccesses(VT))
2061     return false;
2062 
2063   // Simplify this down to something we can handle.
2064   if (!simplifyAddress(Addr, VT))
2065     return false;
2066 
2067   unsigned ScaleFactor = getImplicitScaleFactor(VT);
2068   if (!ScaleFactor)
2069     llvm_unreachable("Unexpected value type.");
2070 
2071   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2072   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2073   bool UseScaled = true;
2074   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2075     UseScaled = false;
2076     ScaleFactor = 1;
2077   }
2078 
2079   static const unsigned OpcTable[4][6] = {
2080     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
2081       AArch64::STURSi,   AArch64::STURDi },
2082     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
2083       AArch64::STRSui,   AArch64::STRDui },
2084     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2085       AArch64::STRSroX,  AArch64::STRDroX },
2086     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2087       AArch64::STRSroW,  AArch64::STRDroW }
2088   };
2089 
2090   unsigned Opc;
2091   bool VTIsi1 = false;
2092   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2093                       Addr.getOffsetReg();
2094   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2095   if (Addr.getExtendType() == AArch64_AM::UXTW ||
2096       Addr.getExtendType() == AArch64_AM::SXTW)
2097     Idx++;
2098 
2099   switch (VT.SimpleTy) {
2100   default: llvm_unreachable("Unexpected value type.");
2101   case MVT::i1:  VTIsi1 = true; LLVM_FALLTHROUGH;
2102   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
2103   case MVT::i16: Opc = OpcTable[Idx][1]; break;
2104   case MVT::i32: Opc = OpcTable[Idx][2]; break;
2105   case MVT::i64: Opc = OpcTable[Idx][3]; break;
2106   case MVT::f32: Opc = OpcTable[Idx][4]; break;
2107   case MVT::f64: Opc = OpcTable[Idx][5]; break;
2108   }
2109 
2110   // Storing an i1 requires special handling.
2111   if (VTIsi1 && SrcReg != AArch64::WZR) {
2112     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2113     assert(ANDReg && "Unexpected AND instruction emission failure.");
2114     SrcReg = ANDReg;
2115   }
2116   // Create the base instruction, then add the operands.
2117   const MCInstrDesc &II = TII.get(Opc);
2118   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2119   MachineInstrBuilder MIB =
2120       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2121   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2122 
2123   return true;
2124 }
2125 
selectStore(const Instruction * I)2126 bool AArch64FastISel::selectStore(const Instruction *I) {
2127   MVT VT;
2128   const Value *Op0 = I->getOperand(0);
2129   // Verify we have a legal type before going any further.  Currently, we handle
2130   // simple types that will directly fit in a register (i32/f32/i64/f64) or
2131   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2132   if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2133     return false;
2134 
2135   const Value *PtrV = I->getOperand(1);
2136   if (TLI.supportSwiftError()) {
2137     // Swifterror values can come from either a function parameter with
2138     // swifterror attribute or an alloca with swifterror attribute.
2139     if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2140       if (Arg->hasSwiftErrorAttr())
2141         return false;
2142     }
2143 
2144     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2145       if (Alloca->isSwiftError())
2146         return false;
2147     }
2148   }
2149 
2150   // Get the value to be stored into a register. Use the zero register directly
2151   // when possible to avoid an unnecessary copy and a wasted register.
2152   unsigned SrcReg = 0;
2153   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2154     if (CI->isZero())
2155       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2156   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2157     if (CF->isZero() && !CF->isNegative()) {
2158       VT = MVT::getIntegerVT(VT.getSizeInBits());
2159       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2160     }
2161   }
2162 
2163   if (!SrcReg)
2164     SrcReg = getRegForValue(Op0);
2165 
2166   if (!SrcReg)
2167     return false;
2168 
2169   auto *SI = cast<StoreInst>(I);
2170 
2171   // Try to emit a STLR for seq_cst/release.
2172   if (SI->isAtomic()) {
2173     AtomicOrdering Ord = SI->getOrdering();
2174     // The non-atomic instructions are sufficient for relaxed stores.
2175     if (isReleaseOrStronger(Ord)) {
2176       // The STLR addressing mode only supports a base reg; pass that directly.
2177       unsigned AddrReg = getRegForValue(PtrV);
2178       return emitStoreRelease(VT, SrcReg, AddrReg,
2179                               createMachineMemOperandFor(I));
2180     }
2181   }
2182 
2183   // See if we can handle this address.
2184   Address Addr;
2185   if (!computeAddress(PtrV, Addr, Op0->getType()))
2186     return false;
2187 
2188   if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2189     return false;
2190   return true;
2191 }
2192 
getCompareCC(CmpInst::Predicate Pred)2193 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2194   switch (Pred) {
2195   case CmpInst::FCMP_ONE:
2196   case CmpInst::FCMP_UEQ:
2197   default:
2198     // AL is our "false" for now. The other two need more compares.
2199     return AArch64CC::AL;
2200   case CmpInst::ICMP_EQ:
2201   case CmpInst::FCMP_OEQ:
2202     return AArch64CC::EQ;
2203   case CmpInst::ICMP_SGT:
2204   case CmpInst::FCMP_OGT:
2205     return AArch64CC::GT;
2206   case CmpInst::ICMP_SGE:
2207   case CmpInst::FCMP_OGE:
2208     return AArch64CC::GE;
2209   case CmpInst::ICMP_UGT:
2210   case CmpInst::FCMP_UGT:
2211     return AArch64CC::HI;
2212   case CmpInst::FCMP_OLT:
2213     return AArch64CC::MI;
2214   case CmpInst::ICMP_ULE:
2215   case CmpInst::FCMP_OLE:
2216     return AArch64CC::LS;
2217   case CmpInst::FCMP_ORD:
2218     return AArch64CC::VC;
2219   case CmpInst::FCMP_UNO:
2220     return AArch64CC::VS;
2221   case CmpInst::FCMP_UGE:
2222     return AArch64CC::PL;
2223   case CmpInst::ICMP_SLT:
2224   case CmpInst::FCMP_ULT:
2225     return AArch64CC::LT;
2226   case CmpInst::ICMP_SLE:
2227   case CmpInst::FCMP_ULE:
2228     return AArch64CC::LE;
2229   case CmpInst::FCMP_UNE:
2230   case CmpInst::ICMP_NE:
2231     return AArch64CC::NE;
2232   case CmpInst::ICMP_UGE:
2233     return AArch64CC::HS;
2234   case CmpInst::ICMP_ULT:
2235     return AArch64CC::LO;
2236   }
2237 }
2238 
2239 /// Try to emit a combined compare-and-branch instruction.
emitCompareAndBranch(const BranchInst * BI)2240 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2241   // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2242   // will not be produced, as they are conditional branch instructions that do
2243   // not set flags.
2244   if (FuncInfo.MF->getFunction().hasFnAttribute(
2245           Attribute::SpeculativeLoadHardening))
2246     return false;
2247 
2248   assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2249   const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2250   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2251 
2252   const Value *LHS = CI->getOperand(0);
2253   const Value *RHS = CI->getOperand(1);
2254 
2255   MVT VT;
2256   if (!isTypeSupported(LHS->getType(), VT))
2257     return false;
2258 
2259   unsigned BW = VT.getSizeInBits();
2260   if (BW > 64)
2261     return false;
2262 
2263   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2264   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2265 
2266   // Try to take advantage of fallthrough opportunities.
2267   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2268     std::swap(TBB, FBB);
2269     Predicate = CmpInst::getInversePredicate(Predicate);
2270   }
2271 
2272   int TestBit = -1;
2273   bool IsCmpNE;
2274   switch (Predicate) {
2275   default:
2276     return false;
2277   case CmpInst::ICMP_EQ:
2278   case CmpInst::ICMP_NE:
2279     if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2280       std::swap(LHS, RHS);
2281 
2282     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2283       return false;
2284 
2285     if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2286       if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2287         const Value *AndLHS = AI->getOperand(0);
2288         const Value *AndRHS = AI->getOperand(1);
2289 
2290         if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2291           if (C->getValue().isPowerOf2())
2292             std::swap(AndLHS, AndRHS);
2293 
2294         if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2295           if (C->getValue().isPowerOf2()) {
2296             TestBit = C->getValue().logBase2();
2297             LHS = AndLHS;
2298           }
2299       }
2300 
2301     if (VT == MVT::i1)
2302       TestBit = 0;
2303 
2304     IsCmpNE = Predicate == CmpInst::ICMP_NE;
2305     break;
2306   case CmpInst::ICMP_SLT:
2307   case CmpInst::ICMP_SGE:
2308     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2309       return false;
2310 
2311     TestBit = BW - 1;
2312     IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2313     break;
2314   case CmpInst::ICMP_SGT:
2315   case CmpInst::ICMP_SLE:
2316     if (!isa<ConstantInt>(RHS))
2317       return false;
2318 
2319     if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2320       return false;
2321 
2322     TestBit = BW - 1;
2323     IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2324     break;
2325   } // end switch
2326 
2327   static const unsigned OpcTable[2][2][2] = {
2328     { {AArch64::CBZW,  AArch64::CBZX },
2329       {AArch64::CBNZW, AArch64::CBNZX} },
2330     { {AArch64::TBZW,  AArch64::TBZX },
2331       {AArch64::TBNZW, AArch64::TBNZX} }
2332   };
2333 
2334   bool IsBitTest = TestBit != -1;
2335   bool Is64Bit = BW == 64;
2336   if (TestBit < 32 && TestBit >= 0)
2337     Is64Bit = false;
2338 
2339   unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2340   const MCInstrDesc &II = TII.get(Opc);
2341 
2342   unsigned SrcReg = getRegForValue(LHS);
2343   if (!SrcReg)
2344     return false;
2345 
2346   if (BW == 64 && !Is64Bit)
2347     SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2348 
2349   if ((BW < 32) && !IsBitTest)
2350     SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2351 
2352   // Emit the combined compare and branch instruction.
2353   SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
2354   MachineInstrBuilder MIB =
2355       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2356           .addReg(SrcReg);
2357   if (IsBitTest)
2358     MIB.addImm(TestBit);
2359   MIB.addMBB(TBB);
2360 
2361   finishCondBranch(BI->getParent(), TBB, FBB);
2362   return true;
2363 }
2364 
selectBranch(const Instruction * I)2365 bool AArch64FastISel::selectBranch(const Instruction *I) {
2366   const BranchInst *BI = cast<BranchInst>(I);
2367   if (BI->isUnconditional()) {
2368     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2369     fastEmitBranch(MSucc, BI->getDebugLoc());
2370     return true;
2371   }
2372 
2373   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2374   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2375 
2376   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2377     if (CI->hasOneUse() && isValueAvailable(CI)) {
2378       // Try to optimize or fold the cmp.
2379       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2380       switch (Predicate) {
2381       default:
2382         break;
2383       case CmpInst::FCMP_FALSE:
2384         fastEmitBranch(FBB, DbgLoc);
2385         return true;
2386       case CmpInst::FCMP_TRUE:
2387         fastEmitBranch(TBB, DbgLoc);
2388         return true;
2389       }
2390 
2391       // Try to emit a combined compare-and-branch first.
2392       if (emitCompareAndBranch(BI))
2393         return true;
2394 
2395       // Try to take advantage of fallthrough opportunities.
2396       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2397         std::swap(TBB, FBB);
2398         Predicate = CmpInst::getInversePredicate(Predicate);
2399       }
2400 
2401       // Emit the cmp.
2402       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2403         return false;
2404 
2405       // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2406       // instruction.
2407       AArch64CC::CondCode CC = getCompareCC(Predicate);
2408       AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2409       switch (Predicate) {
2410       default:
2411         break;
2412       case CmpInst::FCMP_UEQ:
2413         ExtraCC = AArch64CC::EQ;
2414         CC = AArch64CC::VS;
2415         break;
2416       case CmpInst::FCMP_ONE:
2417         ExtraCC = AArch64CC::MI;
2418         CC = AArch64CC::GT;
2419         break;
2420       }
2421       assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2422 
2423       // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2424       if (ExtraCC != AArch64CC::AL) {
2425         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2426             .addImm(ExtraCC)
2427             .addMBB(TBB);
2428       }
2429 
2430       // Emit the branch.
2431       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2432           .addImm(CC)
2433           .addMBB(TBB);
2434 
2435       finishCondBranch(BI->getParent(), TBB, FBB);
2436       return true;
2437     }
2438   } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2439     uint64_t Imm = CI->getZExtValue();
2440     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2441     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2442         .addMBB(Target);
2443 
2444     // Obtain the branch probability and add the target to the successor list.
2445     if (FuncInfo.BPI) {
2446       auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2447           BI->getParent(), Target->getBasicBlock());
2448       FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2449     } else
2450       FuncInfo.MBB->addSuccessorWithoutProb(Target);
2451     return true;
2452   } else {
2453     AArch64CC::CondCode CC = AArch64CC::NE;
2454     if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2455       // Fake request the condition, otherwise the intrinsic might be completely
2456       // optimized away.
2457       unsigned CondReg = getRegForValue(BI->getCondition());
2458       if (!CondReg)
2459         return false;
2460 
2461       // Emit the branch.
2462       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2463         .addImm(CC)
2464         .addMBB(TBB);
2465 
2466       finishCondBranch(BI->getParent(), TBB, FBB);
2467       return true;
2468     }
2469   }
2470 
2471   unsigned CondReg = getRegForValue(BI->getCondition());
2472   if (CondReg == 0)
2473     return false;
2474 
2475   // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2476   unsigned Opcode = AArch64::TBNZW;
2477   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2478     std::swap(TBB, FBB);
2479     Opcode = AArch64::TBZW;
2480   }
2481 
2482   const MCInstrDesc &II = TII.get(Opcode);
2483   unsigned ConstrainedCondReg
2484     = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2485   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2486       .addReg(ConstrainedCondReg)
2487       .addImm(0)
2488       .addMBB(TBB);
2489 
2490   finishCondBranch(BI->getParent(), TBB, FBB);
2491   return true;
2492 }
2493 
selectIndirectBr(const Instruction * I)2494 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2495   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2496   unsigned AddrReg = getRegForValue(BI->getOperand(0));
2497   if (AddrReg == 0)
2498     return false;
2499 
2500   // Emit the indirect branch.
2501   const MCInstrDesc &II = TII.get(AArch64::BR);
2502   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
2503   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2504 
2505   // Make sure the CFG is up-to-date.
2506   for (auto *Succ : BI->successors())
2507     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2508 
2509   return true;
2510 }
2511 
selectCmp(const Instruction * I)2512 bool AArch64FastISel::selectCmp(const Instruction *I) {
2513   const CmpInst *CI = cast<CmpInst>(I);
2514 
2515   // Vectors of i1 are weird: bail out.
2516   if (CI->getType()->isVectorTy())
2517     return false;
2518 
2519   // Try to optimize or fold the cmp.
2520   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2521   unsigned ResultReg = 0;
2522   switch (Predicate) {
2523   default:
2524     break;
2525   case CmpInst::FCMP_FALSE:
2526     ResultReg = createResultReg(&AArch64::GPR32RegClass);
2527     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2528             TII.get(TargetOpcode::COPY), ResultReg)
2529         .addReg(AArch64::WZR, getKillRegState(true));
2530     break;
2531   case CmpInst::FCMP_TRUE:
2532     ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2533     break;
2534   }
2535 
2536   if (ResultReg) {
2537     updateValueMap(I, ResultReg);
2538     return true;
2539   }
2540 
2541   // Emit the cmp.
2542   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2543     return false;
2544 
2545   ResultReg = createResultReg(&AArch64::GPR32RegClass);
2546 
2547   // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2548   // condition codes are inverted, because they are used by CSINC.
2549   static unsigned CondCodeTable[2][2] = {
2550     { AArch64CC::NE, AArch64CC::VC },
2551     { AArch64CC::PL, AArch64CC::LE }
2552   };
2553   unsigned *CondCodes = nullptr;
2554   switch (Predicate) {
2555   default:
2556     break;
2557   case CmpInst::FCMP_UEQ:
2558     CondCodes = &CondCodeTable[0][0];
2559     break;
2560   case CmpInst::FCMP_ONE:
2561     CondCodes = &CondCodeTable[1][0];
2562     break;
2563   }
2564 
2565   if (CondCodes) {
2566     unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2567     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2568             TmpReg1)
2569         .addReg(AArch64::WZR, getKillRegState(true))
2570         .addReg(AArch64::WZR, getKillRegState(true))
2571         .addImm(CondCodes[0]);
2572     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2573             ResultReg)
2574         .addReg(TmpReg1, getKillRegState(true))
2575         .addReg(AArch64::WZR, getKillRegState(true))
2576         .addImm(CondCodes[1]);
2577 
2578     updateValueMap(I, ResultReg);
2579     return true;
2580   }
2581 
2582   // Now set a register based on the comparison.
2583   AArch64CC::CondCode CC = getCompareCC(Predicate);
2584   assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2585   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2586   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2587           ResultReg)
2588       .addReg(AArch64::WZR, getKillRegState(true))
2589       .addReg(AArch64::WZR, getKillRegState(true))
2590       .addImm(invertedCC);
2591 
2592   updateValueMap(I, ResultReg);
2593   return true;
2594 }
2595 
2596 /// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2597 /// value.
optimizeSelect(const SelectInst * SI)2598 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2599   if (!SI->getType()->isIntegerTy(1))
2600     return false;
2601 
2602   const Value *Src1Val, *Src2Val;
2603   unsigned Opc = 0;
2604   bool NeedExtraOp = false;
2605   if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2606     if (CI->isOne()) {
2607       Src1Val = SI->getCondition();
2608       Src2Val = SI->getFalseValue();
2609       Opc = AArch64::ORRWrr;
2610     } else {
2611       assert(CI->isZero());
2612       Src1Val = SI->getFalseValue();
2613       Src2Val = SI->getCondition();
2614       Opc = AArch64::BICWrr;
2615     }
2616   } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2617     if (CI->isOne()) {
2618       Src1Val = SI->getCondition();
2619       Src2Val = SI->getTrueValue();
2620       Opc = AArch64::ORRWrr;
2621       NeedExtraOp = true;
2622     } else {
2623       assert(CI->isZero());
2624       Src1Val = SI->getCondition();
2625       Src2Val = SI->getTrueValue();
2626       Opc = AArch64::ANDWrr;
2627     }
2628   }
2629 
2630   if (!Opc)
2631     return false;
2632 
2633   unsigned Src1Reg = getRegForValue(Src1Val);
2634   if (!Src1Reg)
2635     return false;
2636 
2637   unsigned Src2Reg = getRegForValue(Src2Val);
2638   if (!Src2Reg)
2639     return false;
2640 
2641   if (NeedExtraOp)
2642     Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2643 
2644   unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2645                                        Src2Reg);
2646   updateValueMap(SI, ResultReg);
2647   return true;
2648 }
2649 
selectSelect(const Instruction * I)2650 bool AArch64FastISel::selectSelect(const Instruction *I) {
2651   assert(isa<SelectInst>(I) && "Expected a select instruction.");
2652   MVT VT;
2653   if (!isTypeSupported(I->getType(), VT))
2654     return false;
2655 
2656   unsigned Opc;
2657   const TargetRegisterClass *RC;
2658   switch (VT.SimpleTy) {
2659   default:
2660     return false;
2661   case MVT::i1:
2662   case MVT::i8:
2663   case MVT::i16:
2664   case MVT::i32:
2665     Opc = AArch64::CSELWr;
2666     RC = &AArch64::GPR32RegClass;
2667     break;
2668   case MVT::i64:
2669     Opc = AArch64::CSELXr;
2670     RC = &AArch64::GPR64RegClass;
2671     break;
2672   case MVT::f32:
2673     Opc = AArch64::FCSELSrrr;
2674     RC = &AArch64::FPR32RegClass;
2675     break;
2676   case MVT::f64:
2677     Opc = AArch64::FCSELDrrr;
2678     RC = &AArch64::FPR64RegClass;
2679     break;
2680   }
2681 
2682   const SelectInst *SI = cast<SelectInst>(I);
2683   const Value *Cond = SI->getCondition();
2684   AArch64CC::CondCode CC = AArch64CC::NE;
2685   AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2686 
2687   if (optimizeSelect(SI))
2688     return true;
2689 
2690   // Try to pickup the flags, so we don't have to emit another compare.
2691   if (foldXALUIntrinsic(CC, I, Cond)) {
2692     // Fake request the condition to force emission of the XALU intrinsic.
2693     unsigned CondReg = getRegForValue(Cond);
2694     if (!CondReg)
2695       return false;
2696   } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2697              isValueAvailable(Cond)) {
2698     const auto *Cmp = cast<CmpInst>(Cond);
2699     // Try to optimize or fold the cmp.
2700     CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2701     const Value *FoldSelect = nullptr;
2702     switch (Predicate) {
2703     default:
2704       break;
2705     case CmpInst::FCMP_FALSE:
2706       FoldSelect = SI->getFalseValue();
2707       break;
2708     case CmpInst::FCMP_TRUE:
2709       FoldSelect = SI->getTrueValue();
2710       break;
2711     }
2712 
2713     if (FoldSelect) {
2714       unsigned SrcReg = getRegForValue(FoldSelect);
2715       if (!SrcReg)
2716         return false;
2717 
2718       updateValueMap(I, SrcReg);
2719       return true;
2720     }
2721 
2722     // Emit the cmp.
2723     if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2724       return false;
2725 
2726     // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2727     CC = getCompareCC(Predicate);
2728     switch (Predicate) {
2729     default:
2730       break;
2731     case CmpInst::FCMP_UEQ:
2732       ExtraCC = AArch64CC::EQ;
2733       CC = AArch64CC::VS;
2734       break;
2735     case CmpInst::FCMP_ONE:
2736       ExtraCC = AArch64CC::MI;
2737       CC = AArch64CC::GT;
2738       break;
2739     }
2740     assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2741   } else {
2742     unsigned CondReg = getRegForValue(Cond);
2743     if (!CondReg)
2744       return false;
2745 
2746     const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2747     CondReg = constrainOperandRegClass(II, CondReg, 1);
2748 
2749     // Emit a TST instruction (ANDS wzr, reg, #imm).
2750     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2751             AArch64::WZR)
2752         .addReg(CondReg)
2753         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2754   }
2755 
2756   unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2757   unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2758 
2759   if (!Src1Reg || !Src2Reg)
2760     return false;
2761 
2762   if (ExtraCC != AArch64CC::AL)
2763     Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2764 
2765   unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2766   updateValueMap(I, ResultReg);
2767   return true;
2768 }
2769 
selectFPExt(const Instruction * I)2770 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2771   Value *V = I->getOperand(0);
2772   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2773     return false;
2774 
2775   unsigned Op = getRegForValue(V);
2776   if (Op == 0)
2777     return false;
2778 
2779   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2780   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2781           ResultReg).addReg(Op);
2782   updateValueMap(I, ResultReg);
2783   return true;
2784 }
2785 
selectFPTrunc(const Instruction * I)2786 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2787   Value *V = I->getOperand(0);
2788   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2789     return false;
2790 
2791   unsigned Op = getRegForValue(V);
2792   if (Op == 0)
2793     return false;
2794 
2795   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2796   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2797           ResultReg).addReg(Op);
2798   updateValueMap(I, ResultReg);
2799   return true;
2800 }
2801 
2802 // FPToUI and FPToSI
selectFPToInt(const Instruction * I,bool Signed)2803 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2804   MVT DestVT;
2805   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2806     return false;
2807 
2808   unsigned SrcReg = getRegForValue(I->getOperand(0));
2809   if (SrcReg == 0)
2810     return false;
2811 
2812   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2813   if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2814     return false;
2815 
2816   unsigned Opc;
2817   if (SrcVT == MVT::f64) {
2818     if (Signed)
2819       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2820     else
2821       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2822   } else {
2823     if (Signed)
2824       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2825     else
2826       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2827   }
2828   unsigned ResultReg = createResultReg(
2829       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2830   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2831       .addReg(SrcReg);
2832   updateValueMap(I, ResultReg);
2833   return true;
2834 }
2835 
selectIntToFP(const Instruction * I,bool Signed)2836 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2837   MVT DestVT;
2838   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2839     return false;
2840   // Let regular ISEL handle FP16
2841   if (DestVT == MVT::f16)
2842     return false;
2843 
2844   assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2845          "Unexpected value type.");
2846 
2847   unsigned SrcReg = getRegForValue(I->getOperand(0));
2848   if (!SrcReg)
2849     return false;
2850 
2851   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2852 
2853   // Handle sign-extension.
2854   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2855     SrcReg =
2856         emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2857     if (!SrcReg)
2858       return false;
2859   }
2860 
2861   unsigned Opc;
2862   if (SrcVT == MVT::i64) {
2863     if (Signed)
2864       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2865     else
2866       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2867   } else {
2868     if (Signed)
2869       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2870     else
2871       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2872   }
2873 
2874   unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2875   updateValueMap(I, ResultReg);
2876   return true;
2877 }
2878 
fastLowerArguments()2879 bool AArch64FastISel::fastLowerArguments() {
2880   if (!FuncInfo.CanLowerReturn)
2881     return false;
2882 
2883   const Function *F = FuncInfo.Fn;
2884   if (F->isVarArg())
2885     return false;
2886 
2887   CallingConv::ID CC = F->getCallingConv();
2888   if (CC != CallingConv::C && CC != CallingConv::Swift)
2889     return false;
2890 
2891   if (Subtarget->hasCustomCallingConv())
2892     return false;
2893 
2894   // Only handle simple cases of up to 8 GPR and FPR each.
2895   unsigned GPRCnt = 0;
2896   unsigned FPRCnt = 0;
2897   for (auto const &Arg : F->args()) {
2898     if (Arg.hasAttribute(Attribute::ByVal) ||
2899         Arg.hasAttribute(Attribute::InReg) ||
2900         Arg.hasAttribute(Attribute::StructRet) ||
2901         Arg.hasAttribute(Attribute::SwiftSelf) ||
2902         Arg.hasAttribute(Attribute::SwiftAsync) ||
2903         Arg.hasAttribute(Attribute::SwiftError) ||
2904         Arg.hasAttribute(Attribute::Nest))
2905       return false;
2906 
2907     Type *ArgTy = Arg.getType();
2908     if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2909       return false;
2910 
2911     EVT ArgVT = TLI.getValueType(DL, ArgTy);
2912     if (!ArgVT.isSimple())
2913       return false;
2914 
2915     MVT VT = ArgVT.getSimpleVT().SimpleTy;
2916     if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2917       return false;
2918 
2919     if (VT.isVector() &&
2920         (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2921       return false;
2922 
2923     if (VT >= MVT::i1 && VT <= MVT::i64)
2924       ++GPRCnt;
2925     else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2926              VT.is128BitVector())
2927       ++FPRCnt;
2928     else
2929       return false;
2930 
2931     if (GPRCnt > 8 || FPRCnt > 8)
2932       return false;
2933   }
2934 
2935   static const MCPhysReg Registers[6][8] = {
2936     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2937       AArch64::W5, AArch64::W6, AArch64::W7 },
2938     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2939       AArch64::X5, AArch64::X6, AArch64::X7 },
2940     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2941       AArch64::H5, AArch64::H6, AArch64::H7 },
2942     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2943       AArch64::S5, AArch64::S6, AArch64::S7 },
2944     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2945       AArch64::D5, AArch64::D6, AArch64::D7 },
2946     { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2947       AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2948   };
2949 
2950   unsigned GPRIdx = 0;
2951   unsigned FPRIdx = 0;
2952   for (auto const &Arg : F->args()) {
2953     MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2954     unsigned SrcReg;
2955     const TargetRegisterClass *RC;
2956     if (VT >= MVT::i1 && VT <= MVT::i32) {
2957       SrcReg = Registers[0][GPRIdx++];
2958       RC = &AArch64::GPR32RegClass;
2959       VT = MVT::i32;
2960     } else if (VT == MVT::i64) {
2961       SrcReg = Registers[1][GPRIdx++];
2962       RC = &AArch64::GPR64RegClass;
2963     } else if (VT == MVT::f16) {
2964       SrcReg = Registers[2][FPRIdx++];
2965       RC = &AArch64::FPR16RegClass;
2966     } else if (VT ==  MVT::f32) {
2967       SrcReg = Registers[3][FPRIdx++];
2968       RC = &AArch64::FPR32RegClass;
2969     } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2970       SrcReg = Registers[4][FPRIdx++];
2971       RC = &AArch64::FPR64RegClass;
2972     } else if (VT.is128BitVector()) {
2973       SrcReg = Registers[5][FPRIdx++];
2974       RC = &AArch64::FPR128RegClass;
2975     } else
2976       llvm_unreachable("Unexpected value type.");
2977 
2978     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2979     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2980     // Without this, EmitLiveInCopies may eliminate the livein if its only
2981     // use is a bitcast (which isn't turned into an instruction).
2982     unsigned ResultReg = createResultReg(RC);
2983     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2984             TII.get(TargetOpcode::COPY), ResultReg)
2985         .addReg(DstReg, getKillRegState(true));
2986     updateValueMap(&Arg, ResultReg);
2987   }
2988   return true;
2989 }
2990 
processCallArgs(CallLoweringInfo & CLI,SmallVectorImpl<MVT> & OutVTs,unsigned & NumBytes)2991 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
2992                                       SmallVectorImpl<MVT> &OutVTs,
2993                                       unsigned &NumBytes) {
2994   CallingConv::ID CC = CLI.CallConv;
2995   SmallVector<CCValAssign, 16> ArgLocs;
2996   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
2997   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
2998 
2999   // Get a count of how many bytes are to be pushed on the stack.
3000   NumBytes = CCInfo.getNextStackOffset();
3001 
3002   // Issue CALLSEQ_START
3003   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3004   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3005     .addImm(NumBytes).addImm(0);
3006 
3007   // Process the args.
3008   for (CCValAssign &VA : ArgLocs) {
3009     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3010     MVT ArgVT = OutVTs[VA.getValNo()];
3011 
3012     unsigned ArgReg = getRegForValue(ArgVal);
3013     if (!ArgReg)
3014       return false;
3015 
3016     // Handle arg promotion: SExt, ZExt, AExt.
3017     switch (VA.getLocInfo()) {
3018     case CCValAssign::Full:
3019       break;
3020     case CCValAssign::SExt: {
3021       MVT DestVT = VA.getLocVT();
3022       MVT SrcVT = ArgVT;
3023       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3024       if (!ArgReg)
3025         return false;
3026       break;
3027     }
3028     case CCValAssign::AExt:
3029     // Intentional fall-through.
3030     case CCValAssign::ZExt: {
3031       MVT DestVT = VA.getLocVT();
3032       MVT SrcVT = ArgVT;
3033       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3034       if (!ArgReg)
3035         return false;
3036       break;
3037     }
3038     default:
3039       llvm_unreachable("Unknown arg promotion!");
3040     }
3041 
3042     // Now copy/store arg to correct locations.
3043     if (VA.isRegLoc() && !VA.needsCustom()) {
3044       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3045               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3046       CLI.OutRegs.push_back(VA.getLocReg());
3047     } else if (VA.needsCustom()) {
3048       // FIXME: Handle custom args.
3049       return false;
3050     } else {
3051       assert(VA.isMemLoc() && "Assuming store on stack.");
3052 
3053       // Don't emit stores for undef values.
3054       if (isa<UndefValue>(ArgVal))
3055         continue;
3056 
3057       // Need to store on the stack.
3058       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3059 
3060       unsigned BEAlign = 0;
3061       if (ArgSize < 8 && !Subtarget->isLittleEndian())
3062         BEAlign = 8 - ArgSize;
3063 
3064       Address Addr;
3065       Addr.setKind(Address::RegBase);
3066       Addr.setReg(AArch64::SP);
3067       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3068 
3069       Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3070       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3071           MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3072           MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3073 
3074       if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3075         return false;
3076     }
3077   }
3078   return true;
3079 }
3080 
finishCall(CallLoweringInfo & CLI,MVT RetVT,unsigned NumBytes)3081 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3082                                  unsigned NumBytes) {
3083   CallingConv::ID CC = CLI.CallConv;
3084 
3085   // Issue CALLSEQ_END
3086   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3087   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3088     .addImm(NumBytes).addImm(0);
3089 
3090   // Now the return value.
3091   if (RetVT != MVT::isVoid) {
3092     SmallVector<CCValAssign, 16> RVLocs;
3093     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3094     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3095 
3096     // Only handle a single return value.
3097     if (RVLocs.size() != 1)
3098       return false;
3099 
3100     // Copy all of the result registers out of their specified physreg.
3101     MVT CopyVT = RVLocs[0].getValVT();
3102 
3103     // TODO: Handle big-endian results
3104     if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3105       return false;
3106 
3107     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3108     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3109             TII.get(TargetOpcode::COPY), ResultReg)
3110         .addReg(RVLocs[0].getLocReg());
3111     CLI.InRegs.push_back(RVLocs[0].getLocReg());
3112 
3113     CLI.ResultReg = ResultReg;
3114     CLI.NumResultRegs = 1;
3115   }
3116 
3117   return true;
3118 }
3119 
fastLowerCall(CallLoweringInfo & CLI)3120 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3121   CallingConv::ID CC  = CLI.CallConv;
3122   bool IsTailCall     = CLI.IsTailCall;
3123   bool IsVarArg       = CLI.IsVarArg;
3124   const Value *Callee = CLI.Callee;
3125   MCSymbol *Symbol = CLI.Symbol;
3126 
3127   if (!Callee && !Symbol)
3128     return false;
3129 
3130   // Allow SelectionDAG isel to handle tail calls.
3131   if (IsTailCall)
3132     return false;
3133 
3134   // FIXME: we could and should support this, but for now correctness at -O0 is
3135   // more important.
3136   if (Subtarget->isTargetILP32())
3137     return false;
3138 
3139   CodeModel::Model CM = TM.getCodeModel();
3140   // Only support the small-addressing and large code models.
3141   if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3142     return false;
3143 
3144   // FIXME: Add large code model support for ELF.
3145   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3146     return false;
3147 
3148   // Let SDISel handle vararg functions.
3149   if (IsVarArg)
3150     return false;
3151 
3152   // FIXME: Only handle *simple* calls for now.
3153   MVT RetVT;
3154   if (CLI.RetTy->isVoidTy())
3155     RetVT = MVT::isVoid;
3156   else if (!isTypeLegal(CLI.RetTy, RetVT))
3157     return false;
3158 
3159   for (auto Flag : CLI.OutFlags)
3160     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3161         Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3162       return false;
3163 
3164   // Set up the argument vectors.
3165   SmallVector<MVT, 16> OutVTs;
3166   OutVTs.reserve(CLI.OutVals.size());
3167 
3168   for (auto *Val : CLI.OutVals) {
3169     MVT VT;
3170     if (!isTypeLegal(Val->getType(), VT) &&
3171         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3172       return false;
3173 
3174     // We don't handle vector parameters yet.
3175     if (VT.isVector() || VT.getSizeInBits() > 64)
3176       return false;
3177 
3178     OutVTs.push_back(VT);
3179   }
3180 
3181   Address Addr;
3182   if (Callee && !computeCallAddress(Callee, Addr))
3183     return false;
3184 
3185   // The weak function target may be zero; in that case we must use indirect
3186   // addressing via a stub on windows as it may be out of range for a
3187   // PC-relative jump.
3188   if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3189       Addr.getGlobalValue()->hasExternalWeakLinkage())
3190     return false;
3191 
3192   // Handle the arguments now that we've gotten them.
3193   unsigned NumBytes;
3194   if (!processCallArgs(CLI, OutVTs, NumBytes))
3195     return false;
3196 
3197   const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3198   if (RegInfo->isAnyArgRegReserved(*MF))
3199     RegInfo->emitReservedArgRegCallError(*MF);
3200 
3201   // Issue the call.
3202   MachineInstrBuilder MIB;
3203   if (Subtarget->useSmallAddressing()) {
3204     const MCInstrDesc &II =
3205         TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3206     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3207     if (Symbol)
3208       MIB.addSym(Symbol, 0);
3209     else if (Addr.getGlobalValue())
3210       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3211     else if (Addr.getReg()) {
3212       unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3213       MIB.addReg(Reg);
3214     } else
3215       return false;
3216   } else {
3217     unsigned CallReg = 0;
3218     if (Symbol) {
3219       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3220       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3221               ADRPReg)
3222           .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3223 
3224       CallReg = createResultReg(&AArch64::GPR64RegClass);
3225       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3226               TII.get(AArch64::LDRXui), CallReg)
3227           .addReg(ADRPReg)
3228           .addSym(Symbol,
3229                   AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3230     } else if (Addr.getGlobalValue())
3231       CallReg = materializeGV(Addr.getGlobalValue());
3232     else if (Addr.getReg())
3233       CallReg = Addr.getReg();
3234 
3235     if (!CallReg)
3236       return false;
3237 
3238     const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3239     CallReg = constrainOperandRegClass(II, CallReg, 0);
3240     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3241   }
3242 
3243   // Add implicit physical register uses to the call.
3244   for (auto Reg : CLI.OutRegs)
3245     MIB.addReg(Reg, RegState::Implicit);
3246 
3247   // Add a register mask with the call-preserved registers.
3248   // Proper defs for return values will be added by setPhysRegsDeadExcept().
3249   MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3250 
3251   CLI.Call = MIB;
3252 
3253   // Finish off the call including any return values.
3254   return finishCall(CLI, RetVT, NumBytes);
3255 }
3256 
isMemCpySmall(uint64_t Len,unsigned Alignment)3257 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3258   if (Alignment)
3259     return Len / Alignment <= 4;
3260   else
3261     return Len < 32;
3262 }
3263 
tryEmitSmallMemCpy(Address Dest,Address Src,uint64_t Len,unsigned Alignment)3264 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3265                                          uint64_t Len, unsigned Alignment) {
3266   // Make sure we don't bloat code by inlining very large memcpy's.
3267   if (!isMemCpySmall(Len, Alignment))
3268     return false;
3269 
3270   int64_t UnscaledOffset = 0;
3271   Address OrigDest = Dest;
3272   Address OrigSrc = Src;
3273 
3274   while (Len) {
3275     MVT VT;
3276     if (!Alignment || Alignment >= 8) {
3277       if (Len >= 8)
3278         VT = MVT::i64;
3279       else if (Len >= 4)
3280         VT = MVT::i32;
3281       else if (Len >= 2)
3282         VT = MVT::i16;
3283       else {
3284         VT = MVT::i8;
3285       }
3286     } else {
3287       // Bound based on alignment.
3288       if (Len >= 4 && Alignment == 4)
3289         VT = MVT::i32;
3290       else if (Len >= 2 && Alignment == 2)
3291         VT = MVT::i16;
3292       else {
3293         VT = MVT::i8;
3294       }
3295     }
3296 
3297     unsigned ResultReg = emitLoad(VT, VT, Src);
3298     if (!ResultReg)
3299       return false;
3300 
3301     if (!emitStore(VT, ResultReg, Dest))
3302       return false;
3303 
3304     int64_t Size = VT.getSizeInBits() / 8;
3305     Len -= Size;
3306     UnscaledOffset += Size;
3307 
3308     // We need to recompute the unscaled offset for each iteration.
3309     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3310     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3311   }
3312 
3313   return true;
3314 }
3315 
3316 /// Check if it is possible to fold the condition from the XALU intrinsic
3317 /// into the user. The condition code will only be updated on success.
foldXALUIntrinsic(AArch64CC::CondCode & CC,const Instruction * I,const Value * Cond)3318 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3319                                         const Instruction *I,
3320                                         const Value *Cond) {
3321   if (!isa<ExtractValueInst>(Cond))
3322     return false;
3323 
3324   const auto *EV = cast<ExtractValueInst>(Cond);
3325   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3326     return false;
3327 
3328   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3329   MVT RetVT;
3330   const Function *Callee = II->getCalledFunction();
3331   Type *RetTy =
3332   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3333   if (!isTypeLegal(RetTy, RetVT))
3334     return false;
3335 
3336   if (RetVT != MVT::i32 && RetVT != MVT::i64)
3337     return false;
3338 
3339   const Value *LHS = II->getArgOperand(0);
3340   const Value *RHS = II->getArgOperand(1);
3341 
3342   // Canonicalize immediate to the RHS.
3343   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3344     std::swap(LHS, RHS);
3345 
3346   // Simplify multiplies.
3347   Intrinsic::ID IID = II->getIntrinsicID();
3348   switch (IID) {
3349   default:
3350     break;
3351   case Intrinsic::smul_with_overflow:
3352     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3353       if (C->getValue() == 2)
3354         IID = Intrinsic::sadd_with_overflow;
3355     break;
3356   case Intrinsic::umul_with_overflow:
3357     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3358       if (C->getValue() == 2)
3359         IID = Intrinsic::uadd_with_overflow;
3360     break;
3361   }
3362 
3363   AArch64CC::CondCode TmpCC;
3364   switch (IID) {
3365   default:
3366     return false;
3367   case Intrinsic::sadd_with_overflow:
3368   case Intrinsic::ssub_with_overflow:
3369     TmpCC = AArch64CC::VS;
3370     break;
3371   case Intrinsic::uadd_with_overflow:
3372     TmpCC = AArch64CC::HS;
3373     break;
3374   case Intrinsic::usub_with_overflow:
3375     TmpCC = AArch64CC::LO;
3376     break;
3377   case Intrinsic::smul_with_overflow:
3378   case Intrinsic::umul_with_overflow:
3379     TmpCC = AArch64CC::NE;
3380     break;
3381   }
3382 
3383   // Check if both instructions are in the same basic block.
3384   if (!isValueAvailable(II))
3385     return false;
3386 
3387   // Make sure nothing is in the way
3388   BasicBlock::const_iterator Start(I);
3389   BasicBlock::const_iterator End(II);
3390   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3391     // We only expect extractvalue instructions between the intrinsic and the
3392     // instruction to be selected.
3393     if (!isa<ExtractValueInst>(Itr))
3394       return false;
3395 
3396     // Check that the extractvalue operand comes from the intrinsic.
3397     const auto *EVI = cast<ExtractValueInst>(Itr);
3398     if (EVI->getAggregateOperand() != II)
3399       return false;
3400   }
3401 
3402   CC = TmpCC;
3403   return true;
3404 }
3405 
fastLowerIntrinsicCall(const IntrinsicInst * II)3406 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3407   // FIXME: Handle more intrinsics.
3408   switch (II->getIntrinsicID()) {
3409   default: return false;
3410   case Intrinsic::frameaddress: {
3411     MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3412     MFI.setFrameAddressIsTaken(true);
3413 
3414     const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3415     Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3416     Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3417     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3418             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3419     // Recursively load frame address
3420     // ldr x0, [fp]
3421     // ldr x0, [x0]
3422     // ldr x0, [x0]
3423     // ...
3424     unsigned DestReg;
3425     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3426     while (Depth--) {
3427       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3428                                 SrcReg, 0);
3429       assert(DestReg && "Unexpected LDR instruction emission failure.");
3430       SrcReg = DestReg;
3431     }
3432 
3433     updateValueMap(II, SrcReg);
3434     return true;
3435   }
3436   case Intrinsic::sponentry: {
3437     MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3438 
3439     // SP = FP + Fixed Object + 16
3440     int FI = MFI.CreateFixedObject(4, 0, false);
3441     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3442     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3443             TII.get(AArch64::ADDXri), ResultReg)
3444             .addFrameIndex(FI)
3445             .addImm(0)
3446             .addImm(0);
3447 
3448     updateValueMap(II, ResultReg);
3449     return true;
3450   }
3451   case Intrinsic::memcpy:
3452   case Intrinsic::memmove: {
3453     const auto *MTI = cast<MemTransferInst>(II);
3454     // Don't handle volatile.
3455     if (MTI->isVolatile())
3456       return false;
3457 
3458     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
3459     // we would emit dead code because we don't currently handle memmoves.
3460     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3461     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3462       // Small memcpy's are common enough that we want to do them without a call
3463       // if possible.
3464       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3465       unsigned Alignment = MinAlign(MTI->getDestAlignment(),
3466                                     MTI->getSourceAlignment());
3467       if (isMemCpySmall(Len, Alignment)) {
3468         Address Dest, Src;
3469         if (!computeAddress(MTI->getRawDest(), Dest) ||
3470             !computeAddress(MTI->getRawSource(), Src))
3471           return false;
3472         if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3473           return true;
3474       }
3475     }
3476 
3477     if (!MTI->getLength()->getType()->isIntegerTy(64))
3478       return false;
3479 
3480     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3481       // Fast instruction selection doesn't support the special
3482       // address spaces.
3483       return false;
3484 
3485     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3486     return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1);
3487   }
3488   case Intrinsic::memset: {
3489     const MemSetInst *MSI = cast<MemSetInst>(II);
3490     // Don't handle volatile.
3491     if (MSI->isVolatile())
3492       return false;
3493 
3494     if (!MSI->getLength()->getType()->isIntegerTy(64))
3495       return false;
3496 
3497     if (MSI->getDestAddressSpace() > 255)
3498       // Fast instruction selection doesn't support the special
3499       // address spaces.
3500       return false;
3501 
3502     return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
3503   }
3504   case Intrinsic::sin:
3505   case Intrinsic::cos:
3506   case Intrinsic::pow: {
3507     MVT RetVT;
3508     if (!isTypeLegal(II->getType(), RetVT))
3509       return false;
3510 
3511     if (RetVT != MVT::f32 && RetVT != MVT::f64)
3512       return false;
3513 
3514     static const RTLIB::Libcall LibCallTable[3][2] = {
3515       { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3516       { RTLIB::COS_F32, RTLIB::COS_F64 },
3517       { RTLIB::POW_F32, RTLIB::POW_F64 }
3518     };
3519     RTLIB::Libcall LC;
3520     bool Is64Bit = RetVT == MVT::f64;
3521     switch (II->getIntrinsicID()) {
3522     default:
3523       llvm_unreachable("Unexpected intrinsic.");
3524     case Intrinsic::sin:
3525       LC = LibCallTable[0][Is64Bit];
3526       break;
3527     case Intrinsic::cos:
3528       LC = LibCallTable[1][Is64Bit];
3529       break;
3530     case Intrinsic::pow:
3531       LC = LibCallTable[2][Is64Bit];
3532       break;
3533     }
3534 
3535     ArgListTy Args;
3536     Args.reserve(II->getNumArgOperands());
3537 
3538     // Populate the argument list.
3539     for (auto &Arg : II->arg_operands()) {
3540       ArgListEntry Entry;
3541       Entry.Val = Arg;
3542       Entry.Ty = Arg->getType();
3543       Args.push_back(Entry);
3544     }
3545 
3546     CallLoweringInfo CLI;
3547     MCContext &Ctx = MF->getContext();
3548     CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3549                   TLI.getLibcallName(LC), std::move(Args));
3550     if (!lowerCallTo(CLI))
3551       return false;
3552     updateValueMap(II, CLI.ResultReg);
3553     return true;
3554   }
3555   case Intrinsic::fabs: {
3556     MVT VT;
3557     if (!isTypeLegal(II->getType(), VT))
3558       return false;
3559 
3560     unsigned Opc;
3561     switch (VT.SimpleTy) {
3562     default:
3563       return false;
3564     case MVT::f32:
3565       Opc = AArch64::FABSSr;
3566       break;
3567     case MVT::f64:
3568       Opc = AArch64::FABSDr;
3569       break;
3570     }
3571     unsigned SrcReg = getRegForValue(II->getOperand(0));
3572     if (!SrcReg)
3573       return false;
3574     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3575     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3576       .addReg(SrcReg);
3577     updateValueMap(II, ResultReg);
3578     return true;
3579   }
3580   case Intrinsic::trap:
3581     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3582         .addImm(1);
3583     return true;
3584   case Intrinsic::debugtrap:
3585     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3586         .addImm(0xF000);
3587     return true;
3588 
3589   case Intrinsic::sqrt: {
3590     Type *RetTy = II->getCalledFunction()->getReturnType();
3591 
3592     MVT VT;
3593     if (!isTypeLegal(RetTy, VT))
3594       return false;
3595 
3596     unsigned Op0Reg = getRegForValue(II->getOperand(0));
3597     if (!Op0Reg)
3598       return false;
3599 
3600     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3601     if (!ResultReg)
3602       return false;
3603 
3604     updateValueMap(II, ResultReg);
3605     return true;
3606   }
3607   case Intrinsic::sadd_with_overflow:
3608   case Intrinsic::uadd_with_overflow:
3609   case Intrinsic::ssub_with_overflow:
3610   case Intrinsic::usub_with_overflow:
3611   case Intrinsic::smul_with_overflow:
3612   case Intrinsic::umul_with_overflow: {
3613     // This implements the basic lowering of the xalu with overflow intrinsics.
3614     const Function *Callee = II->getCalledFunction();
3615     auto *Ty = cast<StructType>(Callee->getReturnType());
3616     Type *RetTy = Ty->getTypeAtIndex(0U);
3617 
3618     MVT VT;
3619     if (!isTypeLegal(RetTy, VT))
3620       return false;
3621 
3622     if (VT != MVT::i32 && VT != MVT::i64)
3623       return false;
3624 
3625     const Value *LHS = II->getArgOperand(0);
3626     const Value *RHS = II->getArgOperand(1);
3627     // Canonicalize immediate to the RHS.
3628     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3629       std::swap(LHS, RHS);
3630 
3631     // Simplify multiplies.
3632     Intrinsic::ID IID = II->getIntrinsicID();
3633     switch (IID) {
3634     default:
3635       break;
3636     case Intrinsic::smul_with_overflow:
3637       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3638         if (C->getValue() == 2) {
3639           IID = Intrinsic::sadd_with_overflow;
3640           RHS = LHS;
3641         }
3642       break;
3643     case Intrinsic::umul_with_overflow:
3644       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3645         if (C->getValue() == 2) {
3646           IID = Intrinsic::uadd_with_overflow;
3647           RHS = LHS;
3648         }
3649       break;
3650     }
3651 
3652     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3653     AArch64CC::CondCode CC = AArch64CC::Invalid;
3654     switch (IID) {
3655     default: llvm_unreachable("Unexpected intrinsic!");
3656     case Intrinsic::sadd_with_overflow:
3657       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3658       CC = AArch64CC::VS;
3659       break;
3660     case Intrinsic::uadd_with_overflow:
3661       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3662       CC = AArch64CC::HS;
3663       break;
3664     case Intrinsic::ssub_with_overflow:
3665       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3666       CC = AArch64CC::VS;
3667       break;
3668     case Intrinsic::usub_with_overflow:
3669       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3670       CC = AArch64CC::LO;
3671       break;
3672     case Intrinsic::smul_with_overflow: {
3673       CC = AArch64CC::NE;
3674       unsigned LHSReg = getRegForValue(LHS);
3675       if (!LHSReg)
3676         return false;
3677 
3678       unsigned RHSReg = getRegForValue(RHS);
3679       if (!RHSReg)
3680         return false;
3681 
3682       if (VT == MVT::i32) {
3683         MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3684         unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg, 32);
3685         MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3686         ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, AArch64::sub_32);
3687         emitSubs_rs(VT, ShiftReg, MulReg, AArch64_AM::ASR, 31,
3688                     /*WantResult=*/false);
3689       } else {
3690         assert(VT == MVT::i64 && "Unexpected value type.");
3691         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3692         // reused in the next instruction.
3693         MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3694         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3695         emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3696                     /*WantResult=*/false);
3697       }
3698       break;
3699     }
3700     case Intrinsic::umul_with_overflow: {
3701       CC = AArch64CC::NE;
3702       unsigned LHSReg = getRegForValue(LHS);
3703       if (!LHSReg)
3704         return false;
3705 
3706       unsigned RHSReg = getRegForValue(RHS);
3707       if (!RHSReg)
3708         return false;
3709 
3710       if (VT == MVT::i32) {
3711         MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3712         emitSubs_rs(MVT::i64, AArch64::XZR, MulReg, AArch64_AM::LSR, 32,
3713                     /*WantResult=*/false);
3714         MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3715       } else {
3716         assert(VT == MVT::i64 && "Unexpected value type.");
3717         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3718         // reused in the next instruction.
3719         MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3720         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3721         emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3722       }
3723       break;
3724     }
3725     }
3726 
3727     if (MulReg) {
3728       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3729       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3730               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3731     }
3732 
3733     if (!ResultReg1)
3734       return false;
3735 
3736     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3737                                   AArch64::WZR, AArch64::WZR,
3738                                   getInvertedCondCode(CC));
3739     (void)ResultReg2;
3740     assert((ResultReg1 + 1) == ResultReg2 &&
3741            "Nonconsecutive result registers.");
3742     updateValueMap(II, ResultReg1, 2);
3743     return true;
3744   }
3745   }
3746   return false;
3747 }
3748 
selectRet(const Instruction * I)3749 bool AArch64FastISel::selectRet(const Instruction *I) {
3750   const ReturnInst *Ret = cast<ReturnInst>(I);
3751   const Function &F = *I->getParent()->getParent();
3752 
3753   if (!FuncInfo.CanLowerReturn)
3754     return false;
3755 
3756   if (F.isVarArg())
3757     return false;
3758 
3759   if (TLI.supportSwiftError() &&
3760       F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3761     return false;
3762 
3763   if (TLI.supportSplitCSR(FuncInfo.MF))
3764     return false;
3765 
3766   // Build a list of return value registers.
3767   SmallVector<unsigned, 4> RetRegs;
3768 
3769   if (Ret->getNumOperands() > 0) {
3770     CallingConv::ID CC = F.getCallingConv();
3771     SmallVector<ISD::OutputArg, 4> Outs;
3772     GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3773 
3774     // Analyze operands of the call, assigning locations to each operand.
3775     SmallVector<CCValAssign, 16> ValLocs;
3776     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3777     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3778                                                      : RetCC_AArch64_AAPCS;
3779     CCInfo.AnalyzeReturn(Outs, RetCC);
3780 
3781     // Only handle a single return value for now.
3782     if (ValLocs.size() != 1)
3783       return false;
3784 
3785     CCValAssign &VA = ValLocs[0];
3786     const Value *RV = Ret->getOperand(0);
3787 
3788     // Don't bother handling odd stuff for now.
3789     if ((VA.getLocInfo() != CCValAssign::Full) &&
3790         (VA.getLocInfo() != CCValAssign::BCvt))
3791       return false;
3792 
3793     // Only handle register returns for now.
3794     if (!VA.isRegLoc())
3795       return false;
3796 
3797     unsigned Reg = getRegForValue(RV);
3798     if (Reg == 0)
3799       return false;
3800 
3801     unsigned SrcReg = Reg + VA.getValNo();
3802     Register DestReg = VA.getLocReg();
3803     // Avoid a cross-class copy. This is very unlikely.
3804     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3805       return false;
3806 
3807     EVT RVEVT = TLI.getValueType(DL, RV->getType());
3808     if (!RVEVT.isSimple())
3809       return false;
3810 
3811     // Vectors (of > 1 lane) in big endian need tricky handling.
3812     if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3813         !Subtarget->isLittleEndian())
3814       return false;
3815 
3816     MVT RVVT = RVEVT.getSimpleVT();
3817     if (RVVT == MVT::f128)
3818       return false;
3819 
3820     MVT DestVT = VA.getValVT();
3821     // Special handling for extended integers.
3822     if (RVVT != DestVT) {
3823       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3824         return false;
3825 
3826       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3827         return false;
3828 
3829       bool IsZExt = Outs[0].Flags.isZExt();
3830       SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3831       if (SrcReg == 0)
3832         return false;
3833     }
3834 
3835     // "Callee" (i.e. value producer) zero extends pointers at function
3836     // boundary.
3837     if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3838       SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3839 
3840     // Make the copy.
3841     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3842             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3843 
3844     // Add register to return instruction.
3845     RetRegs.push_back(VA.getLocReg());
3846   }
3847 
3848   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3849                                     TII.get(AArch64::RET_ReallyLR));
3850   for (unsigned RetReg : RetRegs)
3851     MIB.addReg(RetReg, RegState::Implicit);
3852   return true;
3853 }
3854 
selectTrunc(const Instruction * I)3855 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3856   Type *DestTy = I->getType();
3857   Value *Op = I->getOperand(0);
3858   Type *SrcTy = Op->getType();
3859 
3860   EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3861   EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3862   if (!SrcEVT.isSimple())
3863     return false;
3864   if (!DestEVT.isSimple())
3865     return false;
3866 
3867   MVT SrcVT = SrcEVT.getSimpleVT();
3868   MVT DestVT = DestEVT.getSimpleVT();
3869 
3870   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3871       SrcVT != MVT::i8)
3872     return false;
3873   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3874       DestVT != MVT::i1)
3875     return false;
3876 
3877   unsigned SrcReg = getRegForValue(Op);
3878   if (!SrcReg)
3879     return false;
3880 
3881   // If we're truncating from i64 to a smaller non-legal type then generate an
3882   // AND. Otherwise, we know the high bits are undefined and a truncate only
3883   // generate a COPY. We cannot mark the source register also as result
3884   // register, because this can incorrectly transfer the kill flag onto the
3885   // source register.
3886   unsigned ResultReg;
3887   if (SrcVT == MVT::i64) {
3888     uint64_t Mask = 0;
3889     switch (DestVT.SimpleTy) {
3890     default:
3891       // Trunc i64 to i32 is handled by the target-independent fast-isel.
3892       return false;
3893     case MVT::i1:
3894       Mask = 0x1;
3895       break;
3896     case MVT::i8:
3897       Mask = 0xff;
3898       break;
3899     case MVT::i16:
3900       Mask = 0xffff;
3901       break;
3902     }
3903     // Issue an extract_subreg to get the lower 32-bits.
3904     unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3905                                                 AArch64::sub_32);
3906     // Create the AND instruction which performs the actual truncation.
3907     ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
3908     assert(ResultReg && "Unexpected AND instruction emission failure.");
3909   } else {
3910     ResultReg = createResultReg(&AArch64::GPR32RegClass);
3911     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3912             TII.get(TargetOpcode::COPY), ResultReg)
3913         .addReg(SrcReg);
3914   }
3915 
3916   updateValueMap(I, ResultReg);
3917   return true;
3918 }
3919 
emiti1Ext(unsigned SrcReg,MVT DestVT,bool IsZExt)3920 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3921   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3922           DestVT == MVT::i64) &&
3923          "Unexpected value type.");
3924   // Handle i8 and i16 as i32.
3925   if (DestVT == MVT::i8 || DestVT == MVT::i16)
3926     DestVT = MVT::i32;
3927 
3928   if (IsZExt) {
3929     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
3930     assert(ResultReg && "Unexpected AND instruction emission failure.");
3931     if (DestVT == MVT::i64) {
3932       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
3933       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
3934       Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3935       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3936               TII.get(AArch64::SUBREG_TO_REG), Reg64)
3937           .addImm(0)
3938           .addReg(ResultReg)
3939           .addImm(AArch64::sub_32);
3940       ResultReg = Reg64;
3941     }
3942     return ResultReg;
3943   } else {
3944     if (DestVT == MVT::i64) {
3945       // FIXME: We're SExt i1 to i64.
3946       return 0;
3947     }
3948     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3949                             0, 0);
3950   }
3951 }
3952 
emitMul_rr(MVT RetVT,unsigned Op0,unsigned Op1)3953 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
3954   unsigned Opc, ZReg;
3955   switch (RetVT.SimpleTy) {
3956   default: return 0;
3957   case MVT::i8:
3958   case MVT::i16:
3959   case MVT::i32:
3960     RetVT = MVT::i32;
3961     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3962   case MVT::i64:
3963     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3964   }
3965 
3966   const TargetRegisterClass *RC =
3967       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3968   return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
3969 }
3970 
emitSMULL_rr(MVT RetVT,unsigned Op0,unsigned Op1)3971 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
3972   if (RetVT != MVT::i64)
3973     return 0;
3974 
3975   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
3976                           Op0, Op1, AArch64::XZR);
3977 }
3978 
emitUMULL_rr(MVT RetVT,unsigned Op0,unsigned Op1)3979 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
3980   if (RetVT != MVT::i64)
3981     return 0;
3982 
3983   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
3984                           Op0, Op1, AArch64::XZR);
3985 }
3986 
emitLSL_rr(MVT RetVT,unsigned Op0Reg,unsigned Op1Reg)3987 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,
3988                                      unsigned Op1Reg) {
3989   unsigned Opc = 0;
3990   bool NeedTrunc = false;
3991   uint64_t Mask = 0;
3992   switch (RetVT.SimpleTy) {
3993   default: return 0;
3994   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
3995   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
3996   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
3997   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
3998   }
3999 
4000   const TargetRegisterClass *RC =
4001       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4002   if (NeedTrunc)
4003     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4004 
4005   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4006   if (NeedTrunc)
4007     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4008   return ResultReg;
4009 }
4010 
emitLSL_ri(MVT RetVT,MVT SrcVT,unsigned Op0,uint64_t Shift,bool IsZExt)4011 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4012                                      uint64_t Shift, bool IsZExt) {
4013   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4014          "Unexpected source/return type pair.");
4015   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4016           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4017          "Unexpected source value type.");
4018   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4019           RetVT == MVT::i64) && "Unexpected return value type.");
4020 
4021   bool Is64Bit = (RetVT == MVT::i64);
4022   unsigned RegSize = Is64Bit ? 64 : 32;
4023   unsigned DstBits = RetVT.getSizeInBits();
4024   unsigned SrcBits = SrcVT.getSizeInBits();
4025   const TargetRegisterClass *RC =
4026       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4027 
4028   // Just emit a copy for "zero" shifts.
4029   if (Shift == 0) {
4030     if (RetVT == SrcVT) {
4031       unsigned ResultReg = createResultReg(RC);
4032       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4033               TII.get(TargetOpcode::COPY), ResultReg)
4034           .addReg(Op0);
4035       return ResultReg;
4036     } else
4037       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4038   }
4039 
4040   // Don't deal with undefined shifts.
4041   if (Shift >= DstBits)
4042     return 0;
4043 
4044   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4045   // {S|U}BFM Wd, Wn, #r, #s
4046   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4047 
4048   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4049   // %2 = shl i16 %1, 4
4050   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4051   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4052   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4053   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4054 
4055   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4056   // %2 = shl i16 %1, 8
4057   // Wd<32+7-24,32-24> = Wn<7:0>
4058   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4059   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4060   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4061 
4062   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4063   // %2 = shl i16 %1, 12
4064   // Wd<32+3-20,32-20> = Wn<3:0>
4065   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4066   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4067   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4068 
4069   unsigned ImmR = RegSize - Shift;
4070   // Limit the width to the length of the source type.
4071   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4072   static const unsigned OpcTable[2][2] = {
4073     {AArch64::SBFMWri, AArch64::SBFMXri},
4074     {AArch64::UBFMWri, AArch64::UBFMXri}
4075   };
4076   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4077   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4078     Register TmpReg = MRI.createVirtualRegister(RC);
4079     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4080             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4081         .addImm(0)
4082         .addReg(Op0)
4083         .addImm(AArch64::sub_32);
4084     Op0 = TmpReg;
4085   }
4086   return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4087 }
4088 
emitLSR_rr(MVT RetVT,unsigned Op0Reg,unsigned Op1Reg)4089 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,
4090                                      unsigned Op1Reg) {
4091   unsigned Opc = 0;
4092   bool NeedTrunc = false;
4093   uint64_t Mask = 0;
4094   switch (RetVT.SimpleTy) {
4095   default: return 0;
4096   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
4097   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4098   case MVT::i32: Opc = AArch64::LSRVWr; break;
4099   case MVT::i64: Opc = AArch64::LSRVXr; break;
4100   }
4101 
4102   const TargetRegisterClass *RC =
4103       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4104   if (NeedTrunc) {
4105     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4106     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4107   }
4108   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4109   if (NeedTrunc)
4110     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4111   return ResultReg;
4112 }
4113 
emitLSR_ri(MVT RetVT,MVT SrcVT,unsigned Op0,uint64_t Shift,bool IsZExt)4114 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4115                                      uint64_t Shift, bool IsZExt) {
4116   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4117          "Unexpected source/return type pair.");
4118   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4119           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4120          "Unexpected source value type.");
4121   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4122           RetVT == MVT::i64) && "Unexpected return value type.");
4123 
4124   bool Is64Bit = (RetVT == MVT::i64);
4125   unsigned RegSize = Is64Bit ? 64 : 32;
4126   unsigned DstBits = RetVT.getSizeInBits();
4127   unsigned SrcBits = SrcVT.getSizeInBits();
4128   const TargetRegisterClass *RC =
4129       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4130 
4131   // Just emit a copy for "zero" shifts.
4132   if (Shift == 0) {
4133     if (RetVT == SrcVT) {
4134       unsigned ResultReg = createResultReg(RC);
4135       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4136               TII.get(TargetOpcode::COPY), ResultReg)
4137       .addReg(Op0);
4138       return ResultReg;
4139     } else
4140       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4141   }
4142 
4143   // Don't deal with undefined shifts.
4144   if (Shift >= DstBits)
4145     return 0;
4146 
4147   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4148   // {S|U}BFM Wd, Wn, #r, #s
4149   // Wd<s-r:0> = Wn<s:r> when r <= s
4150 
4151   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4152   // %2 = lshr i16 %1, 4
4153   // Wd<7-4:0> = Wn<7:4>
4154   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4155   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4156   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4157 
4158   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4159   // %2 = lshr i16 %1, 8
4160   // Wd<7-7,0> = Wn<7:7>
4161   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4162   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4163   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4164 
4165   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4166   // %2 = lshr i16 %1, 12
4167   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4168   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4169   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4170   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4171 
4172   if (Shift >= SrcBits && IsZExt)
4173     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4174 
4175   // It is not possible to fold a sign-extend into the LShr instruction. In this
4176   // case emit a sign-extend.
4177   if (!IsZExt) {
4178     Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4179     if (!Op0)
4180       return 0;
4181     SrcVT = RetVT;
4182     SrcBits = SrcVT.getSizeInBits();
4183     IsZExt = true;
4184   }
4185 
4186   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4187   unsigned ImmS = SrcBits - 1;
4188   static const unsigned OpcTable[2][2] = {
4189     {AArch64::SBFMWri, AArch64::SBFMXri},
4190     {AArch64::UBFMWri, AArch64::UBFMXri}
4191   };
4192   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4193   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4194     Register TmpReg = MRI.createVirtualRegister(RC);
4195     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4196             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4197         .addImm(0)
4198         .addReg(Op0)
4199         .addImm(AArch64::sub_32);
4200     Op0 = TmpReg;
4201   }
4202   return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4203 }
4204 
emitASR_rr(MVT RetVT,unsigned Op0Reg,unsigned Op1Reg)4205 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,
4206                                      unsigned Op1Reg) {
4207   unsigned Opc = 0;
4208   bool NeedTrunc = false;
4209   uint64_t Mask = 0;
4210   switch (RetVT.SimpleTy) {
4211   default: return 0;
4212   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
4213   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4214   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
4215   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
4216   }
4217 
4218   const TargetRegisterClass *RC =
4219       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4220   if (NeedTrunc) {
4221     Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4222     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4223   }
4224   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4225   if (NeedTrunc)
4226     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4227   return ResultReg;
4228 }
4229 
emitASR_ri(MVT RetVT,MVT SrcVT,unsigned Op0,uint64_t Shift,bool IsZExt)4230 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4231                                      uint64_t Shift, bool IsZExt) {
4232   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4233          "Unexpected source/return type pair.");
4234   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4235           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4236          "Unexpected source value type.");
4237   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4238           RetVT == MVT::i64) && "Unexpected return value type.");
4239 
4240   bool Is64Bit = (RetVT == MVT::i64);
4241   unsigned RegSize = Is64Bit ? 64 : 32;
4242   unsigned DstBits = RetVT.getSizeInBits();
4243   unsigned SrcBits = SrcVT.getSizeInBits();
4244   const TargetRegisterClass *RC =
4245       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4246 
4247   // Just emit a copy for "zero" shifts.
4248   if (Shift == 0) {
4249     if (RetVT == SrcVT) {
4250       unsigned ResultReg = createResultReg(RC);
4251       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4252               TII.get(TargetOpcode::COPY), ResultReg)
4253       .addReg(Op0);
4254       return ResultReg;
4255     } else
4256       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4257   }
4258 
4259   // Don't deal with undefined shifts.
4260   if (Shift >= DstBits)
4261     return 0;
4262 
4263   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4264   // {S|U}BFM Wd, Wn, #r, #s
4265   // Wd<s-r:0> = Wn<s:r> when r <= s
4266 
4267   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4268   // %2 = ashr i16 %1, 4
4269   // Wd<7-4:0> = Wn<7:4>
4270   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4271   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4272   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4273 
4274   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4275   // %2 = ashr i16 %1, 8
4276   // Wd<7-7,0> = Wn<7:7>
4277   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4278   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4279   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4280 
4281   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4282   // %2 = ashr i16 %1, 12
4283   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4284   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4285   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4286   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4287 
4288   if (Shift >= SrcBits && IsZExt)
4289     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4290 
4291   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4292   unsigned ImmS = SrcBits - 1;
4293   static const unsigned OpcTable[2][2] = {
4294     {AArch64::SBFMWri, AArch64::SBFMXri},
4295     {AArch64::UBFMWri, AArch64::UBFMXri}
4296   };
4297   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4298   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4299     Register TmpReg = MRI.createVirtualRegister(RC);
4300     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4301             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4302         .addImm(0)
4303         .addReg(Op0)
4304         .addImm(AArch64::sub_32);
4305     Op0 = TmpReg;
4306   }
4307   return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4308 }
4309 
emitIntExt(MVT SrcVT,unsigned SrcReg,MVT DestVT,bool IsZExt)4310 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4311                                      bool IsZExt) {
4312   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4313 
4314   // FastISel does not have plumbing to deal with extensions where the SrcVT or
4315   // DestVT are odd things, so test to make sure that they are both types we can
4316   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4317   // bail out to SelectionDAG.
4318   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4319        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4320       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
4321        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
4322     return 0;
4323 
4324   unsigned Opc;
4325   unsigned Imm = 0;
4326 
4327   switch (SrcVT.SimpleTy) {
4328   default:
4329     return 0;
4330   case MVT::i1:
4331     return emiti1Ext(SrcReg, DestVT, IsZExt);
4332   case MVT::i8:
4333     if (DestVT == MVT::i64)
4334       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4335     else
4336       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4337     Imm = 7;
4338     break;
4339   case MVT::i16:
4340     if (DestVT == MVT::i64)
4341       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4342     else
4343       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4344     Imm = 15;
4345     break;
4346   case MVT::i32:
4347     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4348     Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4349     Imm = 31;
4350     break;
4351   }
4352 
4353   // Handle i8 and i16 as i32.
4354   if (DestVT == MVT::i8 || DestVT == MVT::i16)
4355     DestVT = MVT::i32;
4356   else if (DestVT == MVT::i64) {
4357     Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4358     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4359             TII.get(AArch64::SUBREG_TO_REG), Src64)
4360         .addImm(0)
4361         .addReg(SrcReg)
4362         .addImm(AArch64::sub_32);
4363     SrcReg = Src64;
4364   }
4365 
4366   const TargetRegisterClass *RC =
4367       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4368   return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4369 }
4370 
isZExtLoad(const MachineInstr * LI)4371 static bool isZExtLoad(const MachineInstr *LI) {
4372   switch (LI->getOpcode()) {
4373   default:
4374     return false;
4375   case AArch64::LDURBBi:
4376   case AArch64::LDURHHi:
4377   case AArch64::LDURWi:
4378   case AArch64::LDRBBui:
4379   case AArch64::LDRHHui:
4380   case AArch64::LDRWui:
4381   case AArch64::LDRBBroX:
4382   case AArch64::LDRHHroX:
4383   case AArch64::LDRWroX:
4384   case AArch64::LDRBBroW:
4385   case AArch64::LDRHHroW:
4386   case AArch64::LDRWroW:
4387     return true;
4388   }
4389 }
4390 
isSExtLoad(const MachineInstr * LI)4391 static bool isSExtLoad(const MachineInstr *LI) {
4392   switch (LI->getOpcode()) {
4393   default:
4394     return false;
4395   case AArch64::LDURSBWi:
4396   case AArch64::LDURSHWi:
4397   case AArch64::LDURSBXi:
4398   case AArch64::LDURSHXi:
4399   case AArch64::LDURSWi:
4400   case AArch64::LDRSBWui:
4401   case AArch64::LDRSHWui:
4402   case AArch64::LDRSBXui:
4403   case AArch64::LDRSHXui:
4404   case AArch64::LDRSWui:
4405   case AArch64::LDRSBWroX:
4406   case AArch64::LDRSHWroX:
4407   case AArch64::LDRSBXroX:
4408   case AArch64::LDRSHXroX:
4409   case AArch64::LDRSWroX:
4410   case AArch64::LDRSBWroW:
4411   case AArch64::LDRSHWroW:
4412   case AArch64::LDRSBXroW:
4413   case AArch64::LDRSHXroW:
4414   case AArch64::LDRSWroW:
4415     return true;
4416   }
4417 }
4418 
optimizeIntExtLoad(const Instruction * I,MVT RetVT,MVT SrcVT)4419 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4420                                          MVT SrcVT) {
4421   const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4422   if (!LI || !LI->hasOneUse())
4423     return false;
4424 
4425   // Check if the load instruction has already been selected.
4426   unsigned Reg = lookUpRegForValue(LI);
4427   if (!Reg)
4428     return false;
4429 
4430   MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4431   if (!MI)
4432     return false;
4433 
4434   // Check if the correct load instruction has been emitted - SelectionDAG might
4435   // have emitted a zero-extending load, but we need a sign-extending load.
4436   bool IsZExt = isa<ZExtInst>(I);
4437   const auto *LoadMI = MI;
4438   if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4439       LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4440     Register LoadReg = MI->getOperand(1).getReg();
4441     LoadMI = MRI.getUniqueVRegDef(LoadReg);
4442     assert(LoadMI && "Expected valid instruction");
4443   }
4444   if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4445     return false;
4446 
4447   // Nothing to be done.
4448   if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4449     updateValueMap(I, Reg);
4450     return true;
4451   }
4452 
4453   if (IsZExt) {
4454     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4455     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4456             TII.get(AArch64::SUBREG_TO_REG), Reg64)
4457         .addImm(0)
4458         .addReg(Reg, getKillRegState(true))
4459         .addImm(AArch64::sub_32);
4460     Reg = Reg64;
4461   } else {
4462     assert((MI->getOpcode() == TargetOpcode::COPY &&
4463             MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4464            "Expected copy instruction");
4465     Reg = MI->getOperand(1).getReg();
4466     MachineBasicBlock::iterator I(MI);
4467     removeDeadCode(I, std::next(I));
4468   }
4469   updateValueMap(I, Reg);
4470   return true;
4471 }
4472 
selectIntExt(const Instruction * I)4473 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4474   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4475          "Unexpected integer extend instruction.");
4476   MVT RetVT;
4477   MVT SrcVT;
4478   if (!isTypeSupported(I->getType(), RetVT))
4479     return false;
4480 
4481   if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4482     return false;
4483 
4484   // Try to optimize already sign-/zero-extended values from load instructions.
4485   if (optimizeIntExtLoad(I, RetVT, SrcVT))
4486     return true;
4487 
4488   unsigned SrcReg = getRegForValue(I->getOperand(0));
4489   if (!SrcReg)
4490     return false;
4491 
4492   // Try to optimize already sign-/zero-extended values from function arguments.
4493   bool IsZExt = isa<ZExtInst>(I);
4494   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4495     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4496       if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4497         unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4498         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4499                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4500             .addImm(0)
4501             .addReg(SrcReg)
4502             .addImm(AArch64::sub_32);
4503         SrcReg = ResultReg;
4504       }
4505 
4506       updateValueMap(I, SrcReg);
4507       return true;
4508     }
4509   }
4510 
4511   unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4512   if (!ResultReg)
4513     return false;
4514 
4515   updateValueMap(I, ResultReg);
4516   return true;
4517 }
4518 
selectRem(const Instruction * I,unsigned ISDOpcode)4519 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4520   EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4521   if (!DestEVT.isSimple())
4522     return false;
4523 
4524   MVT DestVT = DestEVT.getSimpleVT();
4525   if (DestVT != MVT::i64 && DestVT != MVT::i32)
4526     return false;
4527 
4528   unsigned DivOpc;
4529   bool Is64bit = (DestVT == MVT::i64);
4530   switch (ISDOpcode) {
4531   default:
4532     return false;
4533   case ISD::SREM:
4534     DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4535     break;
4536   case ISD::UREM:
4537     DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4538     break;
4539   }
4540   unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4541   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4542   if (!Src0Reg)
4543     return false;
4544 
4545   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4546   if (!Src1Reg)
4547     return false;
4548 
4549   const TargetRegisterClass *RC =
4550       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4551   unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4552   assert(QuotReg && "Unexpected DIV instruction emission failure.");
4553   // The remainder is computed as numerator - (quotient * denominator) using the
4554   // MSUB instruction.
4555   unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4556   updateValueMap(I, ResultReg);
4557   return true;
4558 }
4559 
selectMul(const Instruction * I)4560 bool AArch64FastISel::selectMul(const Instruction *I) {
4561   MVT VT;
4562   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4563     return false;
4564 
4565   if (VT.isVector())
4566     return selectBinaryOp(I, ISD::MUL);
4567 
4568   const Value *Src0 = I->getOperand(0);
4569   const Value *Src1 = I->getOperand(1);
4570   if (const auto *C = dyn_cast<ConstantInt>(Src0))
4571     if (C->getValue().isPowerOf2())
4572       std::swap(Src0, Src1);
4573 
4574   // Try to simplify to a shift instruction.
4575   if (const auto *C = dyn_cast<ConstantInt>(Src1))
4576     if (C->getValue().isPowerOf2()) {
4577       uint64_t ShiftVal = C->getValue().logBase2();
4578       MVT SrcVT = VT;
4579       bool IsZExt = true;
4580       if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4581         if (!isIntExtFree(ZExt)) {
4582           MVT VT;
4583           if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4584             SrcVT = VT;
4585             IsZExt = true;
4586             Src0 = ZExt->getOperand(0);
4587           }
4588         }
4589       } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4590         if (!isIntExtFree(SExt)) {
4591           MVT VT;
4592           if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4593             SrcVT = VT;
4594             IsZExt = false;
4595             Src0 = SExt->getOperand(0);
4596           }
4597         }
4598       }
4599 
4600       unsigned Src0Reg = getRegForValue(Src0);
4601       if (!Src0Reg)
4602         return false;
4603 
4604       unsigned ResultReg =
4605           emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4606 
4607       if (ResultReg) {
4608         updateValueMap(I, ResultReg);
4609         return true;
4610       }
4611     }
4612 
4613   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4614   if (!Src0Reg)
4615     return false;
4616 
4617   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4618   if (!Src1Reg)
4619     return false;
4620 
4621   unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4622 
4623   if (!ResultReg)
4624     return false;
4625 
4626   updateValueMap(I, ResultReg);
4627   return true;
4628 }
4629 
selectShift(const Instruction * I)4630 bool AArch64FastISel::selectShift(const Instruction *I) {
4631   MVT RetVT;
4632   if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4633     return false;
4634 
4635   if (RetVT.isVector())
4636     return selectOperator(I, I->getOpcode());
4637 
4638   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4639     unsigned ResultReg = 0;
4640     uint64_t ShiftVal = C->getZExtValue();
4641     MVT SrcVT = RetVT;
4642     bool IsZExt = I->getOpcode() != Instruction::AShr;
4643     const Value *Op0 = I->getOperand(0);
4644     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4645       if (!isIntExtFree(ZExt)) {
4646         MVT TmpVT;
4647         if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4648           SrcVT = TmpVT;
4649           IsZExt = true;
4650           Op0 = ZExt->getOperand(0);
4651         }
4652       }
4653     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4654       if (!isIntExtFree(SExt)) {
4655         MVT TmpVT;
4656         if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4657           SrcVT = TmpVT;
4658           IsZExt = false;
4659           Op0 = SExt->getOperand(0);
4660         }
4661       }
4662     }
4663 
4664     unsigned Op0Reg = getRegForValue(Op0);
4665     if (!Op0Reg)
4666       return false;
4667 
4668     switch (I->getOpcode()) {
4669     default: llvm_unreachable("Unexpected instruction.");
4670     case Instruction::Shl:
4671       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4672       break;
4673     case Instruction::AShr:
4674       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4675       break;
4676     case Instruction::LShr:
4677       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4678       break;
4679     }
4680     if (!ResultReg)
4681       return false;
4682 
4683     updateValueMap(I, ResultReg);
4684     return true;
4685   }
4686 
4687   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4688   if (!Op0Reg)
4689     return false;
4690 
4691   unsigned Op1Reg = getRegForValue(I->getOperand(1));
4692   if (!Op1Reg)
4693     return false;
4694 
4695   unsigned ResultReg = 0;
4696   switch (I->getOpcode()) {
4697   default: llvm_unreachable("Unexpected instruction.");
4698   case Instruction::Shl:
4699     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4700     break;
4701   case Instruction::AShr:
4702     ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4703     break;
4704   case Instruction::LShr:
4705     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4706     break;
4707   }
4708 
4709   if (!ResultReg)
4710     return false;
4711 
4712   updateValueMap(I, ResultReg);
4713   return true;
4714 }
4715 
selectBitCast(const Instruction * I)4716 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4717   MVT RetVT, SrcVT;
4718 
4719   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4720     return false;
4721   if (!isTypeLegal(I->getType(), RetVT))
4722     return false;
4723 
4724   unsigned Opc;
4725   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4726     Opc = AArch64::FMOVWSr;
4727   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4728     Opc = AArch64::FMOVXDr;
4729   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4730     Opc = AArch64::FMOVSWr;
4731   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4732     Opc = AArch64::FMOVDXr;
4733   else
4734     return false;
4735 
4736   const TargetRegisterClass *RC = nullptr;
4737   switch (RetVT.SimpleTy) {
4738   default: llvm_unreachable("Unexpected value type.");
4739   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4740   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4741   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4742   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4743   }
4744   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4745   if (!Op0Reg)
4746     return false;
4747 
4748   unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4749   if (!ResultReg)
4750     return false;
4751 
4752   updateValueMap(I, ResultReg);
4753   return true;
4754 }
4755 
selectFRem(const Instruction * I)4756 bool AArch64FastISel::selectFRem(const Instruction *I) {
4757   MVT RetVT;
4758   if (!isTypeLegal(I->getType(), RetVT))
4759     return false;
4760 
4761   RTLIB::Libcall LC;
4762   switch (RetVT.SimpleTy) {
4763   default:
4764     return false;
4765   case MVT::f32:
4766     LC = RTLIB::REM_F32;
4767     break;
4768   case MVT::f64:
4769     LC = RTLIB::REM_F64;
4770     break;
4771   }
4772 
4773   ArgListTy Args;
4774   Args.reserve(I->getNumOperands());
4775 
4776   // Populate the argument list.
4777   for (auto &Arg : I->operands()) {
4778     ArgListEntry Entry;
4779     Entry.Val = Arg;
4780     Entry.Ty = Arg->getType();
4781     Args.push_back(Entry);
4782   }
4783 
4784   CallLoweringInfo CLI;
4785   MCContext &Ctx = MF->getContext();
4786   CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4787                 TLI.getLibcallName(LC), std::move(Args));
4788   if (!lowerCallTo(CLI))
4789     return false;
4790   updateValueMap(I, CLI.ResultReg);
4791   return true;
4792 }
4793 
selectSDiv(const Instruction * I)4794 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4795   MVT VT;
4796   if (!isTypeLegal(I->getType(), VT))
4797     return false;
4798 
4799   if (!isa<ConstantInt>(I->getOperand(1)))
4800     return selectBinaryOp(I, ISD::SDIV);
4801 
4802   const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4803   if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4804       !(C.isPowerOf2() || (-C).isPowerOf2()))
4805     return selectBinaryOp(I, ISD::SDIV);
4806 
4807   unsigned Lg2 = C.countTrailingZeros();
4808   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4809   if (!Src0Reg)
4810     return false;
4811 
4812   if (cast<BinaryOperator>(I)->isExact()) {
4813     unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4814     if (!ResultReg)
4815       return false;
4816     updateValueMap(I, ResultReg);
4817     return true;
4818   }
4819 
4820   int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4821   unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4822   if (!AddReg)
4823     return false;
4824 
4825   // (Src0 < 0) ? Pow2 - 1 : 0;
4826   if (!emitICmp_ri(VT, Src0Reg, 0))
4827     return false;
4828 
4829   unsigned SelectOpc;
4830   const TargetRegisterClass *RC;
4831   if (VT == MVT::i64) {
4832     SelectOpc = AArch64::CSELXr;
4833     RC = &AArch64::GPR64RegClass;
4834   } else {
4835     SelectOpc = AArch64::CSELWr;
4836     RC = &AArch64::GPR32RegClass;
4837   }
4838   unsigned SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4839                                         AArch64CC::LT);
4840   if (!SelectReg)
4841     return false;
4842 
4843   // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4844   // negate the result.
4845   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4846   unsigned ResultReg;
4847   if (C.isNegative())
4848     ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4849                               AArch64_AM::ASR, Lg2);
4850   else
4851     ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4852 
4853   if (!ResultReg)
4854     return false;
4855 
4856   updateValueMap(I, ResultReg);
4857   return true;
4858 }
4859 
4860 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4861 /// have to duplicate it for AArch64, because otherwise we would fail during the
4862 /// sign-extend emission.
getRegForGEPIndex(const Value * Idx)4863 unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4864   unsigned IdxN = getRegForValue(Idx);
4865   if (IdxN == 0)
4866     // Unhandled operand. Halt "fast" selection and bail.
4867     return 0;
4868 
4869   // If the index is smaller or larger than intptr_t, truncate or extend it.
4870   MVT PtrVT = TLI.getPointerTy(DL);
4871   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4872   if (IdxVT.bitsLT(PtrVT)) {
4873     IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4874   } else if (IdxVT.bitsGT(PtrVT))
4875     llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4876   return IdxN;
4877 }
4878 
4879 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4880 /// duplicate it for AArch64, because otherwise we would bail out even for
4881 /// simple cases. This is because the standard fastEmit functions don't cover
4882 /// MUL at all and ADD is lowered very inefficientily.
selectGetElementPtr(const Instruction * I)4883 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4884   if (Subtarget->isTargetILP32())
4885     return false;
4886 
4887   unsigned N = getRegForValue(I->getOperand(0));
4888   if (!N)
4889     return false;
4890 
4891   // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4892   // into a single N = N + TotalOffset.
4893   uint64_t TotalOffs = 0;
4894   MVT VT = TLI.getPointerTy(DL);
4895   for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4896        GTI != E; ++GTI) {
4897     const Value *Idx = GTI.getOperand();
4898     if (auto *StTy = GTI.getStructTypeOrNull()) {
4899       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4900       // N = N + Offset
4901       if (Field)
4902         TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4903     } else {
4904       Type *Ty = GTI.getIndexedType();
4905 
4906       // If this is a constant subscript, handle it quickly.
4907       if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4908         if (CI->isZero())
4909           continue;
4910         // N = N + Offset
4911         TotalOffs +=
4912             DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4913         continue;
4914       }
4915       if (TotalOffs) {
4916         N = emitAdd_ri_(VT, N, TotalOffs);
4917         if (!N)
4918           return false;
4919         TotalOffs = 0;
4920       }
4921 
4922       // N = N + Idx * ElementSize;
4923       uint64_t ElementSize = DL.getTypeAllocSize(Ty);
4924       unsigned IdxN = getRegForGEPIndex(Idx);
4925       if (!IdxN)
4926         return false;
4927 
4928       if (ElementSize != 1) {
4929         unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
4930         if (!C)
4931           return false;
4932         IdxN = emitMul_rr(VT, IdxN, C);
4933         if (!IdxN)
4934           return false;
4935       }
4936       N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
4937       if (!N)
4938         return false;
4939     }
4940   }
4941   if (TotalOffs) {
4942     N = emitAdd_ri_(VT, N, TotalOffs);
4943     if (!N)
4944       return false;
4945   }
4946   updateValueMap(I, N);
4947   return true;
4948 }
4949 
selectAtomicCmpXchg(const AtomicCmpXchgInst * I)4950 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
4951   assert(TM.getOptLevel() == CodeGenOpt::None &&
4952          "cmpxchg survived AtomicExpand at optlevel > -O0");
4953 
4954   auto *RetPairTy = cast<StructType>(I->getType());
4955   Type *RetTy = RetPairTy->getTypeAtIndex(0U);
4956   assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
4957          "cmpxchg has a non-i1 status result");
4958 
4959   MVT VT;
4960   if (!isTypeLegal(RetTy, VT))
4961     return false;
4962 
4963   const TargetRegisterClass *ResRC;
4964   unsigned Opc, CmpOpc;
4965   // This only supports i32/i64, because i8/i16 aren't legal, and the generic
4966   // extractvalue selection doesn't support that.
4967   if (VT == MVT::i32) {
4968     Opc = AArch64::CMP_SWAP_32;
4969     CmpOpc = AArch64::SUBSWrs;
4970     ResRC = &AArch64::GPR32RegClass;
4971   } else if (VT == MVT::i64) {
4972     Opc = AArch64::CMP_SWAP_64;
4973     CmpOpc = AArch64::SUBSXrs;
4974     ResRC = &AArch64::GPR64RegClass;
4975   } else {
4976     return false;
4977   }
4978 
4979   const MCInstrDesc &II = TII.get(Opc);
4980 
4981   const unsigned AddrReg = constrainOperandRegClass(
4982       II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
4983   const unsigned DesiredReg = constrainOperandRegClass(
4984       II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
4985   const unsigned NewReg = constrainOperandRegClass(
4986       II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
4987 
4988   const unsigned ResultReg1 = createResultReg(ResRC);
4989   const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
4990   const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
4991 
4992   // FIXME: MachineMemOperand doesn't support cmpxchg yet.
4993   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
4994       .addDef(ResultReg1)
4995       .addDef(ScratchReg)
4996       .addUse(AddrReg)
4997       .addUse(DesiredReg)
4998       .addUse(NewReg);
4999 
5000   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
5001       .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5002       .addUse(ResultReg1)
5003       .addUse(DesiredReg)
5004       .addImm(0);
5005 
5006   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
5007       .addDef(ResultReg2)
5008       .addUse(AArch64::WZR)
5009       .addUse(AArch64::WZR)
5010       .addImm(AArch64CC::NE);
5011 
5012   assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5013   updateValueMap(I, ResultReg1, 2);
5014   return true;
5015 }
5016 
fastSelectInstruction(const Instruction * I)5017 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5018   switch (I->getOpcode()) {
5019   default:
5020     break;
5021   case Instruction::Add:
5022   case Instruction::Sub:
5023     return selectAddSub(I);
5024   case Instruction::Mul:
5025     return selectMul(I);
5026   case Instruction::SDiv:
5027     return selectSDiv(I);
5028   case Instruction::SRem:
5029     if (!selectBinaryOp(I, ISD::SREM))
5030       return selectRem(I, ISD::SREM);
5031     return true;
5032   case Instruction::URem:
5033     if (!selectBinaryOp(I, ISD::UREM))
5034       return selectRem(I, ISD::UREM);
5035     return true;
5036   case Instruction::Shl:
5037   case Instruction::LShr:
5038   case Instruction::AShr:
5039     return selectShift(I);
5040   case Instruction::And:
5041   case Instruction::Or:
5042   case Instruction::Xor:
5043     return selectLogicalOp(I);
5044   case Instruction::Br:
5045     return selectBranch(I);
5046   case Instruction::IndirectBr:
5047     return selectIndirectBr(I);
5048   case Instruction::BitCast:
5049     if (!FastISel::selectBitCast(I))
5050       return selectBitCast(I);
5051     return true;
5052   case Instruction::FPToSI:
5053     if (!selectCast(I, ISD::FP_TO_SINT))
5054       return selectFPToInt(I, /*Signed=*/true);
5055     return true;
5056   case Instruction::FPToUI:
5057     return selectFPToInt(I, /*Signed=*/false);
5058   case Instruction::ZExt:
5059   case Instruction::SExt:
5060     return selectIntExt(I);
5061   case Instruction::Trunc:
5062     if (!selectCast(I, ISD::TRUNCATE))
5063       return selectTrunc(I);
5064     return true;
5065   case Instruction::FPExt:
5066     return selectFPExt(I);
5067   case Instruction::FPTrunc:
5068     return selectFPTrunc(I);
5069   case Instruction::SIToFP:
5070     if (!selectCast(I, ISD::SINT_TO_FP))
5071       return selectIntToFP(I, /*Signed=*/true);
5072     return true;
5073   case Instruction::UIToFP:
5074     return selectIntToFP(I, /*Signed=*/false);
5075   case Instruction::Load:
5076     return selectLoad(I);
5077   case Instruction::Store:
5078     return selectStore(I);
5079   case Instruction::FCmp:
5080   case Instruction::ICmp:
5081     return selectCmp(I);
5082   case Instruction::Select:
5083     return selectSelect(I);
5084   case Instruction::Ret:
5085     return selectRet(I);
5086   case Instruction::FRem:
5087     return selectFRem(I);
5088   case Instruction::GetElementPtr:
5089     return selectGetElementPtr(I);
5090   case Instruction::AtomicCmpXchg:
5091     return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5092   }
5093 
5094   // fall-back to target-independent instruction selection.
5095   return selectOperator(I, I->getOpcode());
5096 }
5097 
createFastISel(FunctionLoweringInfo & FuncInfo,const TargetLibraryInfo * LibInfo)5098 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
5099                                         const TargetLibraryInfo *LibInfo) {
5100   return new AArch64FastISel(FuncInfo, LibInfo);
5101 }
5102