1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the AArch64-specific support for the FastISel class. Some 10 // of the target-specific code is generated by tablegen in the file 11 // AArch64GenFastISel.inc, which is #included here. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AArch64.h" 16 #include "AArch64CallingConvention.h" 17 #include "AArch64MachineFunctionInfo.h" 18 #include "AArch64RegisterInfo.h" 19 #include "AArch64Subtarget.h" 20 #include "MCTargetDesc/AArch64AddressingModes.h" 21 #include "Utils/AArch64BaseInfo.h" 22 #include "Utils/AArch64SMEAttributes.h" 23 #include "llvm/ADT/APFloat.h" 24 #include "llvm/ADT/APInt.h" 25 #include "llvm/ADT/DenseMap.h" 26 #include "llvm/ADT/SmallVector.h" 27 #include "llvm/Analysis/BranchProbabilityInfo.h" 28 #include "llvm/CodeGen/CallingConvLower.h" 29 #include "llvm/CodeGen/FastISel.h" 30 #include "llvm/CodeGen/FunctionLoweringInfo.h" 31 #include "llvm/CodeGen/ISDOpcodes.h" 32 #include "llvm/CodeGen/MachineBasicBlock.h" 33 #include "llvm/CodeGen/MachineConstantPool.h" 34 #include "llvm/CodeGen/MachineFrameInfo.h" 35 #include "llvm/CodeGen/MachineInstr.h" 36 #include "llvm/CodeGen/MachineInstrBuilder.h" 37 #include "llvm/CodeGen/MachineMemOperand.h" 38 #include "llvm/CodeGen/MachineRegisterInfo.h" 39 #include "llvm/CodeGen/ValueTypes.h" 40 #include "llvm/CodeGenTypes/MachineValueType.h" 41 #include "llvm/IR/Argument.h" 42 #include "llvm/IR/Attributes.h" 43 #include "llvm/IR/BasicBlock.h" 44 #include "llvm/IR/CallingConv.h" 45 #include "llvm/IR/Constant.h" 46 #include "llvm/IR/Constants.h" 47 #include "llvm/IR/DataLayout.h" 48 #include "llvm/IR/DerivedTypes.h" 49 #include "llvm/IR/Function.h" 50 #include "llvm/IR/GetElementPtrTypeIterator.h" 51 #include "llvm/IR/GlobalValue.h" 52 #include "llvm/IR/InstrTypes.h" 53 #include "llvm/IR/Instruction.h" 54 #include "llvm/IR/Instructions.h" 55 #include "llvm/IR/IntrinsicInst.h" 56 #include "llvm/IR/Intrinsics.h" 57 #include "llvm/IR/IntrinsicsAArch64.h" 58 #include "llvm/IR/Module.h" 59 #include "llvm/IR/Operator.h" 60 #include "llvm/IR/Type.h" 61 #include "llvm/IR/User.h" 62 #include "llvm/IR/Value.h" 63 #include "llvm/MC/MCInstrDesc.h" 64 #include "llvm/MC/MCSymbol.h" 65 #include "llvm/Support/AtomicOrdering.h" 66 #include "llvm/Support/Casting.h" 67 #include "llvm/Support/CodeGen.h" 68 #include "llvm/Support/Compiler.h" 69 #include "llvm/Support/ErrorHandling.h" 70 #include "llvm/Support/MathExtras.h" 71 #include <algorithm> 72 #include <cassert> 73 #include <cstdint> 74 #include <iterator> 75 #include <utility> 76 77 using namespace llvm; 78 79 namespace { 80 81 class AArch64FastISel final : public FastISel { 82 class Address { 83 public: 84 using BaseKind = enum { 85 RegBase, 86 FrameIndexBase 87 }; 88 89 private: 90 BaseKind Kind = RegBase; 91 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend; 92 union { 93 unsigned Reg; 94 int FI; 95 } Base; 96 unsigned OffsetReg = 0; 97 unsigned Shift = 0; 98 int64_t Offset = 0; 99 const GlobalValue *GV = nullptr; 100 101 public: 102 Address() { Base.Reg = 0; } 103 104 void setKind(BaseKind K) { Kind = K; } 105 BaseKind getKind() const { return Kind; } 106 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; } 107 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; } 108 bool isRegBase() const { return Kind == RegBase; } 109 bool isFIBase() const { return Kind == FrameIndexBase; } 110 111 void setReg(unsigned Reg) { 112 assert(isRegBase() && "Invalid base register access!"); 113 Base.Reg = Reg; 114 } 115 116 unsigned getReg() const { 117 assert(isRegBase() && "Invalid base register access!"); 118 return Base.Reg; 119 } 120 121 void setOffsetReg(unsigned Reg) { 122 OffsetReg = Reg; 123 } 124 125 unsigned getOffsetReg() const { 126 return OffsetReg; 127 } 128 129 void setFI(unsigned FI) { 130 assert(isFIBase() && "Invalid base frame index access!"); 131 Base.FI = FI; 132 } 133 134 unsigned getFI() const { 135 assert(isFIBase() && "Invalid base frame index access!"); 136 return Base.FI; 137 } 138 139 void setOffset(int64_t O) { Offset = O; } 140 int64_t getOffset() { return Offset; } 141 void setShift(unsigned S) { Shift = S; } 142 unsigned getShift() { return Shift; } 143 144 void setGlobalValue(const GlobalValue *G) { GV = G; } 145 const GlobalValue *getGlobalValue() { return GV; } 146 }; 147 148 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 149 /// make the right decision when generating code for different targets. 150 const AArch64Subtarget *Subtarget; 151 LLVMContext *Context; 152 153 bool fastLowerArguments() override; 154 bool fastLowerCall(CallLoweringInfo &CLI) override; 155 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; 156 157 private: 158 // Selection routines. 159 bool selectAddSub(const Instruction *I); 160 bool selectLogicalOp(const Instruction *I); 161 bool selectLoad(const Instruction *I); 162 bool selectStore(const Instruction *I); 163 bool selectBranch(const Instruction *I); 164 bool selectIndirectBr(const Instruction *I); 165 bool selectCmp(const Instruction *I); 166 bool selectSelect(const Instruction *I); 167 bool selectFPExt(const Instruction *I); 168 bool selectFPTrunc(const Instruction *I); 169 bool selectFPToInt(const Instruction *I, bool Signed); 170 bool selectIntToFP(const Instruction *I, bool Signed); 171 bool selectRem(const Instruction *I, unsigned ISDOpcode); 172 bool selectRet(const Instruction *I); 173 bool selectTrunc(const Instruction *I); 174 bool selectIntExt(const Instruction *I); 175 bool selectMul(const Instruction *I); 176 bool selectShift(const Instruction *I); 177 bool selectBitCast(const Instruction *I); 178 bool selectFRem(const Instruction *I); 179 bool selectSDiv(const Instruction *I); 180 bool selectGetElementPtr(const Instruction *I); 181 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I); 182 183 // Utility helper routines. 184 bool isTypeLegal(Type *Ty, MVT &VT); 185 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false); 186 bool isValueAvailable(const Value *V) const; 187 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr); 188 bool computeCallAddress(const Value *V, Address &Addr); 189 bool simplifyAddress(Address &Addr, MVT VT); 190 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, 191 MachineMemOperand::Flags Flags, 192 unsigned ScaleFactor, MachineMemOperand *MMO); 193 bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment); 194 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, 195 MaybeAlign Alignment); 196 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I, 197 const Value *Cond); 198 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT); 199 bool optimizeSelect(const SelectInst *SI); 200 unsigned getRegForGEPIndex(const Value *Idx); 201 202 // Emit helper routines. 203 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 204 const Value *RHS, bool SetFlags = false, 205 bool WantResult = true, bool IsZExt = false); 206 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 207 unsigned RHSReg, bool SetFlags = false, 208 bool WantResult = true); 209 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 210 uint64_t Imm, bool SetFlags = false, 211 bool WantResult = true); 212 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 213 unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType, 214 uint64_t ShiftImm, bool SetFlags = false, 215 bool WantResult = true); 216 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 217 unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType, 218 uint64_t ShiftImm, bool SetFlags = false, 219 bool WantResult = true); 220 221 // Emit functions. 222 bool emitCompareAndBranch(const BranchInst *BI); 223 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt); 224 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); 225 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm); 226 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS); 227 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true, 228 MachineMemOperand *MMO = nullptr); 229 bool emitStore(MVT VT, unsigned SrcReg, Address Addr, 230 MachineMemOperand *MMO = nullptr); 231 bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg, 232 MachineMemOperand *MMO = nullptr); 233 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); 234 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); 235 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 236 bool SetFlags = false, bool WantResult = true, 237 bool IsZExt = false); 238 unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm); 239 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 240 bool SetFlags = false, bool WantResult = true, 241 bool IsZExt = false); 242 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg, 243 bool WantResult = true); 244 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg, 245 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm, 246 bool WantResult = true); 247 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, 248 const Value *RHS); 249 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 250 uint64_t Imm); 251 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 252 unsigned RHSReg, uint64_t ShiftImm); 253 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm); 254 unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1); 255 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1); 256 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1); 257 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); 258 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, 259 bool IsZExt = true); 260 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); 261 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, 262 bool IsZExt = true); 263 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); 264 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, 265 bool IsZExt = false); 266 267 unsigned materializeInt(const ConstantInt *CI, MVT VT); 268 unsigned materializeFP(const ConstantFP *CFP, MVT VT); 269 unsigned materializeGV(const GlobalValue *GV); 270 271 // Call handling routines. 272 private: 273 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; 274 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs, 275 unsigned &NumBytes); 276 bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes); 277 278 public: 279 // Backend specific FastISel code. 280 unsigned fastMaterializeAlloca(const AllocaInst *AI) override; 281 unsigned fastMaterializeConstant(const Constant *C) override; 282 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override; 283 284 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, 285 const TargetLibraryInfo *LibInfo) 286 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { 287 Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>(); 288 Context = &FuncInfo.Fn->getContext(); 289 } 290 291 bool fastSelectInstruction(const Instruction *I) override; 292 293 #include "AArch64GenFastISel.inc" 294 }; 295 296 } // end anonymous namespace 297 298 /// Check if the sign-/zero-extend will be a noop. 299 static bool isIntExtFree(const Instruction *I) { 300 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 301 "Unexpected integer extend instruction."); 302 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() && 303 "Unexpected value type."); 304 bool IsZExt = isa<ZExtInst>(I); 305 306 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0))) 307 if (LI->hasOneUse()) 308 return true; 309 310 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) 311 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) 312 return true; 313 314 return false; 315 } 316 317 /// Determine the implicit scale factor that is applied by a memory 318 /// operation for a given value type. 319 static unsigned getImplicitScaleFactor(MVT VT) { 320 switch (VT.SimpleTy) { 321 default: 322 return 0; // invalid 323 case MVT::i1: // fall-through 324 case MVT::i8: 325 return 1; 326 case MVT::i16: 327 return 2; 328 case MVT::i32: // fall-through 329 case MVT::f32: 330 return 4; 331 case MVT::i64: // fall-through 332 case MVT::f64: 333 return 8; 334 } 335 } 336 337 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { 338 if (CC == CallingConv::GHC) 339 return CC_AArch64_GHC; 340 if (CC == CallingConv::CFGuard_Check) 341 return CC_AArch64_Win64_CFGuard_Check; 342 if (Subtarget->isTargetDarwin()) 343 return CC_AArch64_DarwinPCS; 344 if (Subtarget->isTargetWindows()) 345 return CC_AArch64_Win64PCS; 346 return CC_AArch64_AAPCS; 347 } 348 349 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) { 350 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 && 351 "Alloca should always return a pointer."); 352 353 // Don't handle dynamic allocas. 354 if (!FuncInfo.StaticAllocaMap.count(AI)) 355 return 0; 356 357 DenseMap<const AllocaInst *, int>::iterator SI = 358 FuncInfo.StaticAllocaMap.find(AI); 359 360 if (SI != FuncInfo.StaticAllocaMap.end()) { 361 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); 362 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri), 363 ResultReg) 364 .addFrameIndex(SI->second) 365 .addImm(0) 366 .addImm(0); 367 return ResultReg; 368 } 369 370 return 0; 371 } 372 373 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) { 374 if (VT > MVT::i64) 375 return 0; 376 377 if (!CI->isZero()) 378 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); 379 380 // Create a copy from the zero register to materialize a "0" value. 381 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass 382 : &AArch64::GPR32RegClass; 383 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 384 Register ResultReg = createResultReg(RC); 385 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), 386 ResultReg).addReg(ZeroReg, getKillRegState(true)); 387 return ResultReg; 388 } 389 390 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) { 391 // Positive zero (+0.0) has to be materialized with a fmov from the zero 392 // register, because the immediate version of fmov cannot encode zero. 393 if (CFP->isNullValue()) 394 return fastMaterializeFloatZero(CFP); 395 396 if (VT != MVT::f32 && VT != MVT::f64) 397 return 0; 398 399 const APFloat Val = CFP->getValueAPF(); 400 bool Is64Bit = (VT == MVT::f64); 401 // This checks to see if we can use FMOV instructions to materialize 402 // a constant, otherwise we have to materialize via the constant pool. 403 int Imm = 404 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val); 405 if (Imm != -1) { 406 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi; 407 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); 408 } 409 410 // For the large code model materialize the FP constant in code. 411 if (TM.getCodeModel() == CodeModel::Large) { 412 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm; 413 const TargetRegisterClass *RC = Is64Bit ? 414 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 415 416 Register TmpReg = createResultReg(RC); 417 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg) 418 .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); 419 420 Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 421 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 422 TII.get(TargetOpcode::COPY), ResultReg) 423 .addReg(TmpReg, getKillRegState(true)); 424 425 return ResultReg; 426 } 427 428 // Materialize via constant pool. MachineConstantPool wants an explicit 429 // alignment. 430 Align Alignment = DL.getPrefTypeAlign(CFP->getType()); 431 432 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment); 433 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 434 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), 435 ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE); 436 437 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui; 438 Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 439 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg) 440 .addReg(ADRPReg) 441 .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 442 return ResultReg; 443 } 444 445 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { 446 // We can't handle thread-local variables quickly yet. 447 if (GV->isThreadLocal()) 448 return 0; 449 450 // MachO still uses GOT for large code-model accesses, but ELF requires 451 // movz/movk sequences, which FastISel doesn't handle yet. 452 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO()) 453 return 0; 454 455 if (FuncInfo.MF->getInfo<AArch64FunctionInfo>()->hasELFSignedGOT()) 456 return 0; 457 458 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); 459 460 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true); 461 if (!DestEVT.isSimple()) 462 return 0; 463 464 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 465 unsigned ResultReg; 466 467 if (OpFlags & AArch64II::MO_GOT) { 468 // ADRP + LDRX 469 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), 470 ADRPReg) 471 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); 472 473 unsigned LdrOpc; 474 if (Subtarget->isTargetILP32()) { 475 ResultReg = createResultReg(&AArch64::GPR32RegClass); 476 LdrOpc = AArch64::LDRWui; 477 } else { 478 ResultReg = createResultReg(&AArch64::GPR64RegClass); 479 LdrOpc = AArch64::LDRXui; 480 } 481 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc), 482 ResultReg) 483 .addReg(ADRPReg) 484 .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | 485 AArch64II::MO_NC | OpFlags); 486 if (!Subtarget->isTargetILP32()) 487 return ResultReg; 488 489 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits 490 // so we must extend the result on ILP32. 491 Register Result64 = createResultReg(&AArch64::GPR64RegClass); 492 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 493 TII.get(TargetOpcode::SUBREG_TO_REG)) 494 .addDef(Result64) 495 .addImm(0) 496 .addReg(ResultReg, RegState::Kill) 497 .addImm(AArch64::sub_32); 498 return Result64; 499 } else { 500 // ADRP + ADDX 501 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), 502 ADRPReg) 503 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); 504 505 if (OpFlags & AArch64II::MO_TAGGED) { 506 // MO_TAGGED on the page indicates a tagged address. Set the tag now. 507 // We do so by creating a MOVK that sets bits 48-63 of the register to 508 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in 509 // the small code model so we can assume a binary size of <= 4GB, which 510 // makes the untagged PC relative offset positive. The binary must also be 511 // loaded into address range [0, 2^48). Both of these properties need to 512 // be ensured at runtime when using tagged addresses. 513 // 514 // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that 515 // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands 516 // are not exactly 1:1 with FastISel so we cannot easily abstract this 517 // out. At some point, it would be nice to find a way to not have this 518 // duplciate code. 519 unsigned DstReg = createResultReg(&AArch64::GPR64commonRegClass); 520 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi), 521 DstReg) 522 .addReg(ADRPReg) 523 .addGlobalAddress(GV, /*Offset=*/0x100000000, 524 AArch64II::MO_PREL | AArch64II::MO_G3) 525 .addImm(48); 526 ADRPReg = DstReg; 527 } 528 529 ResultReg = createResultReg(&AArch64::GPR64spRegClass); 530 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri), 531 ResultReg) 532 .addReg(ADRPReg) 533 .addGlobalAddress(GV, 0, 534 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags) 535 .addImm(0); 536 } 537 return ResultReg; 538 } 539 540 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { 541 EVT CEVT = TLI.getValueType(DL, C->getType(), true); 542 543 // Only handle simple types. 544 if (!CEVT.isSimple()) 545 return 0; 546 MVT VT = CEVT.getSimpleVT(); 547 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that, 548 // 'null' pointers need to have a somewhat special treatment. 549 if (isa<ConstantPointerNull>(C)) { 550 assert(VT == MVT::i64 && "Expected 64-bit pointers"); 551 return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT); 552 } 553 554 if (const auto *CI = dyn_cast<ConstantInt>(C)) 555 return materializeInt(CI, VT); 556 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 557 return materializeFP(CFP, VT); 558 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 559 return materializeGV(GV); 560 561 return 0; 562 } 563 564 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) { 565 assert(CFP->isNullValue() && 566 "Floating-point constant is not a positive zero."); 567 MVT VT; 568 if (!isTypeLegal(CFP->getType(), VT)) 569 return 0; 570 571 if (VT != MVT::f32 && VT != MVT::f64) 572 return 0; 573 574 bool Is64Bit = (VT == MVT::f64); 575 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 576 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr; 577 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg); 578 } 579 580 /// Check if the multiply is by a power-of-2 constant. 581 static bool isMulPowOf2(const Value *I) { 582 if (const auto *MI = dyn_cast<MulOperator>(I)) { 583 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0))) 584 if (C->getValue().isPowerOf2()) 585 return true; 586 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1))) 587 if (C->getValue().isPowerOf2()) 588 return true; 589 } 590 return false; 591 } 592 593 // Computes the address to get to an object. 594 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) 595 { 596 const User *U = nullptr; 597 unsigned Opcode = Instruction::UserOp1; 598 if (const Instruction *I = dyn_cast<Instruction>(Obj)) { 599 // Don't walk into other basic blocks unless the object is an alloca from 600 // another block, otherwise it may not have a virtual register assigned. 601 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || 602 FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) { 603 Opcode = I->getOpcode(); 604 U = I; 605 } 606 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { 607 Opcode = C->getOpcode(); 608 U = C; 609 } 610 611 if (auto *Ty = dyn_cast<PointerType>(Obj->getType())) 612 if (Ty->getAddressSpace() > 255) 613 // Fast instruction selection doesn't support the special 614 // address spaces. 615 return false; 616 617 switch (Opcode) { 618 default: 619 break; 620 case Instruction::BitCast: 621 // Look through bitcasts. 622 return computeAddress(U->getOperand(0), Addr, Ty); 623 624 case Instruction::IntToPtr: 625 // Look past no-op inttoptrs. 626 if (TLI.getValueType(DL, U->getOperand(0)->getType()) == 627 TLI.getPointerTy(DL)) 628 return computeAddress(U->getOperand(0), Addr, Ty); 629 break; 630 631 case Instruction::PtrToInt: 632 // Look past no-op ptrtoints. 633 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 634 return computeAddress(U->getOperand(0), Addr, Ty); 635 break; 636 637 case Instruction::GetElementPtr: { 638 Address SavedAddr = Addr; 639 uint64_t TmpOffset = Addr.getOffset(); 640 641 // Iterate through the GEP folding the constants into offsets where 642 // we can. 643 for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U); 644 GTI != E; ++GTI) { 645 const Value *Op = GTI.getOperand(); 646 if (StructType *STy = GTI.getStructTypeOrNull()) { 647 const StructLayout *SL = DL.getStructLayout(STy); 648 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 649 TmpOffset += SL->getElementOffset(Idx); 650 } else { 651 uint64_t S = GTI.getSequentialElementStride(DL); 652 while (true) { 653 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 654 // Constant-offset addressing. 655 TmpOffset += CI->getSExtValue() * S; 656 break; 657 } 658 if (canFoldAddIntoGEP(U, Op)) { 659 // A compatible add with a constant operand. Fold the constant. 660 ConstantInt *CI = 661 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 662 TmpOffset += CI->getSExtValue() * S; 663 // Iterate on the other operand. 664 Op = cast<AddOperator>(Op)->getOperand(0); 665 continue; 666 } 667 // Unsupported 668 goto unsupported_gep; 669 } 670 } 671 } 672 673 // Try to grab the base operand now. 674 Addr.setOffset(TmpOffset); 675 if (computeAddress(U->getOperand(0), Addr, Ty)) 676 return true; 677 678 // We failed, restore everything and try the other options. 679 Addr = SavedAddr; 680 681 unsupported_gep: 682 break; 683 } 684 case Instruction::Alloca: { 685 const AllocaInst *AI = cast<AllocaInst>(Obj); 686 DenseMap<const AllocaInst *, int>::iterator SI = 687 FuncInfo.StaticAllocaMap.find(AI); 688 if (SI != FuncInfo.StaticAllocaMap.end()) { 689 Addr.setKind(Address::FrameIndexBase); 690 Addr.setFI(SI->second); 691 return true; 692 } 693 break; 694 } 695 case Instruction::Add: { 696 // Adds of constants are common and easy enough. 697 const Value *LHS = U->getOperand(0); 698 const Value *RHS = U->getOperand(1); 699 700 if (isa<ConstantInt>(LHS)) 701 std::swap(LHS, RHS); 702 703 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 704 Addr.setOffset(Addr.getOffset() + CI->getSExtValue()); 705 return computeAddress(LHS, Addr, Ty); 706 } 707 708 Address Backup = Addr; 709 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty)) 710 return true; 711 Addr = Backup; 712 713 break; 714 } 715 case Instruction::Sub: { 716 // Subs of constants are common and easy enough. 717 const Value *LHS = U->getOperand(0); 718 const Value *RHS = U->getOperand(1); 719 720 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 721 Addr.setOffset(Addr.getOffset() - CI->getSExtValue()); 722 return computeAddress(LHS, Addr, Ty); 723 } 724 break; 725 } 726 case Instruction::Shl: { 727 if (Addr.getOffsetReg()) 728 break; 729 730 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1)); 731 if (!CI) 732 break; 733 734 unsigned Val = CI->getZExtValue(); 735 if (Val < 1 || Val > 3) 736 break; 737 738 uint64_t NumBytes = 0; 739 if (Ty && Ty->isSized()) { 740 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 741 NumBytes = NumBits / 8; 742 if (!isPowerOf2_64(NumBits)) 743 NumBytes = 0; 744 } 745 746 if (NumBytes != (1ULL << Val)) 747 break; 748 749 Addr.setShift(Val); 750 Addr.setExtendType(AArch64_AM::LSL); 751 752 const Value *Src = U->getOperand(0); 753 if (const auto *I = dyn_cast<Instruction>(Src)) { 754 if (FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) { 755 // Fold the zext or sext when it won't become a noop. 756 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 757 if (!isIntExtFree(ZE) && 758 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 759 Addr.setExtendType(AArch64_AM::UXTW); 760 Src = ZE->getOperand(0); 761 } 762 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 763 if (!isIntExtFree(SE) && 764 SE->getOperand(0)->getType()->isIntegerTy(32)) { 765 Addr.setExtendType(AArch64_AM::SXTW); 766 Src = SE->getOperand(0); 767 } 768 } 769 } 770 } 771 772 if (const auto *AI = dyn_cast<BinaryOperator>(Src)) 773 if (AI->getOpcode() == Instruction::And) { 774 const Value *LHS = AI->getOperand(0); 775 const Value *RHS = AI->getOperand(1); 776 777 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 778 if (C->getValue() == 0xffffffff) 779 std::swap(LHS, RHS); 780 781 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 782 if (C->getValue() == 0xffffffff) { 783 Addr.setExtendType(AArch64_AM::UXTW); 784 Register Reg = getRegForValue(LHS); 785 if (!Reg) 786 return false; 787 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32); 788 Addr.setOffsetReg(Reg); 789 return true; 790 } 791 } 792 793 Register Reg = getRegForValue(Src); 794 if (!Reg) 795 return false; 796 Addr.setOffsetReg(Reg); 797 return true; 798 } 799 case Instruction::Mul: { 800 if (Addr.getOffsetReg()) 801 break; 802 803 if (!isMulPowOf2(U)) 804 break; 805 806 const Value *LHS = U->getOperand(0); 807 const Value *RHS = U->getOperand(1); 808 809 // Canonicalize power-of-2 value to the RHS. 810 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 811 if (C->getValue().isPowerOf2()) 812 std::swap(LHS, RHS); 813 814 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt."); 815 const auto *C = cast<ConstantInt>(RHS); 816 unsigned Val = C->getValue().logBase2(); 817 if (Val < 1 || Val > 3) 818 break; 819 820 uint64_t NumBytes = 0; 821 if (Ty && Ty->isSized()) { 822 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 823 NumBytes = NumBits / 8; 824 if (!isPowerOf2_64(NumBits)) 825 NumBytes = 0; 826 } 827 828 if (NumBytes != (1ULL << Val)) 829 break; 830 831 Addr.setShift(Val); 832 Addr.setExtendType(AArch64_AM::LSL); 833 834 const Value *Src = LHS; 835 if (const auto *I = dyn_cast<Instruction>(Src)) { 836 if (FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) { 837 // Fold the zext or sext when it won't become a noop. 838 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 839 if (!isIntExtFree(ZE) && 840 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 841 Addr.setExtendType(AArch64_AM::UXTW); 842 Src = ZE->getOperand(0); 843 } 844 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 845 if (!isIntExtFree(SE) && 846 SE->getOperand(0)->getType()->isIntegerTy(32)) { 847 Addr.setExtendType(AArch64_AM::SXTW); 848 Src = SE->getOperand(0); 849 } 850 } 851 } 852 } 853 854 Register Reg = getRegForValue(Src); 855 if (!Reg) 856 return false; 857 Addr.setOffsetReg(Reg); 858 return true; 859 } 860 case Instruction::And: { 861 if (Addr.getOffsetReg()) 862 break; 863 864 if (!Ty || DL.getTypeSizeInBits(Ty) != 8) 865 break; 866 867 const Value *LHS = U->getOperand(0); 868 const Value *RHS = U->getOperand(1); 869 870 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 871 if (C->getValue() == 0xffffffff) 872 std::swap(LHS, RHS); 873 874 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 875 if (C->getValue() == 0xffffffff) { 876 Addr.setShift(0); 877 Addr.setExtendType(AArch64_AM::LSL); 878 Addr.setExtendType(AArch64_AM::UXTW); 879 880 Register Reg = getRegForValue(LHS); 881 if (!Reg) 882 return false; 883 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32); 884 Addr.setOffsetReg(Reg); 885 return true; 886 } 887 break; 888 } 889 case Instruction::SExt: 890 case Instruction::ZExt: { 891 if (!Addr.getReg() || Addr.getOffsetReg()) 892 break; 893 894 const Value *Src = nullptr; 895 // Fold the zext or sext when it won't become a noop. 896 if (const auto *ZE = dyn_cast<ZExtInst>(U)) { 897 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) { 898 Addr.setExtendType(AArch64_AM::UXTW); 899 Src = ZE->getOperand(0); 900 } 901 } else if (const auto *SE = dyn_cast<SExtInst>(U)) { 902 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) { 903 Addr.setExtendType(AArch64_AM::SXTW); 904 Src = SE->getOperand(0); 905 } 906 } 907 908 if (!Src) 909 break; 910 911 Addr.setShift(0); 912 Register Reg = getRegForValue(Src); 913 if (!Reg) 914 return false; 915 Addr.setOffsetReg(Reg); 916 return true; 917 } 918 } // end switch 919 920 if (Addr.isRegBase() && !Addr.getReg()) { 921 Register Reg = getRegForValue(Obj); 922 if (!Reg) 923 return false; 924 Addr.setReg(Reg); 925 return true; 926 } 927 928 if (!Addr.getOffsetReg()) { 929 Register Reg = getRegForValue(Obj); 930 if (!Reg) 931 return false; 932 Addr.setOffsetReg(Reg); 933 return true; 934 } 935 936 return false; 937 } 938 939 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { 940 const User *U = nullptr; 941 unsigned Opcode = Instruction::UserOp1; 942 bool InMBB = true; 943 944 if (const auto *I = dyn_cast<Instruction>(V)) { 945 Opcode = I->getOpcode(); 946 U = I; 947 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); 948 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) { 949 Opcode = C->getOpcode(); 950 U = C; 951 } 952 953 switch (Opcode) { 954 default: break; 955 case Instruction::BitCast: 956 // Look past bitcasts if its operand is in the same BB. 957 if (InMBB) 958 return computeCallAddress(U->getOperand(0), Addr); 959 break; 960 case Instruction::IntToPtr: 961 // Look past no-op inttoptrs if its operand is in the same BB. 962 if (InMBB && 963 TLI.getValueType(DL, U->getOperand(0)->getType()) == 964 TLI.getPointerTy(DL)) 965 return computeCallAddress(U->getOperand(0), Addr); 966 break; 967 case Instruction::PtrToInt: 968 // Look past no-op ptrtoints if its operand is in the same BB. 969 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 970 return computeCallAddress(U->getOperand(0), Addr); 971 break; 972 } 973 974 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 975 Addr.setGlobalValue(GV); 976 return true; 977 } 978 979 // If all else fails, try to materialize the value in a register. 980 if (!Addr.getGlobalValue()) { 981 Addr.setReg(getRegForValue(V)); 982 return Addr.getReg() != 0; 983 } 984 985 return false; 986 } 987 988 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { 989 EVT evt = TLI.getValueType(DL, Ty, true); 990 991 if (Subtarget->isTargetILP32() && Ty->isPointerTy()) 992 return false; 993 994 // Only handle simple types. 995 if (evt == MVT::Other || !evt.isSimple()) 996 return false; 997 VT = evt.getSimpleVT(); 998 999 // This is a legal type, but it's not something we handle in fast-isel. 1000 if (VT == MVT::f128) 1001 return false; 1002 1003 // Handle all other legal types, i.e. a register that will directly hold this 1004 // value. 1005 return TLI.isTypeLegal(VT); 1006 } 1007 1008 /// Determine if the value type is supported by FastISel. 1009 /// 1010 /// FastISel for AArch64 can handle more value types than are legal. This adds 1011 /// simple value type such as i1, i8, and i16. 1012 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) { 1013 if (Ty->isVectorTy() && !IsVectorAllowed) 1014 return false; 1015 1016 if (isTypeLegal(Ty, VT)) 1017 return true; 1018 1019 // If this is a type than can be sign or zero-extended to a basic operation 1020 // go ahead and accept it now. 1021 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) 1022 return true; 1023 1024 return false; 1025 } 1026 1027 bool AArch64FastISel::isValueAvailable(const Value *V) const { 1028 if (!isa<Instruction>(V)) 1029 return true; 1030 1031 const auto *I = cast<Instruction>(V); 1032 return FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB; 1033 } 1034 1035 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { 1036 if (Subtarget->isTargetILP32()) 1037 return false; 1038 1039 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1040 if (!ScaleFactor) 1041 return false; 1042 1043 bool ImmediateOffsetNeedsLowering = false; 1044 bool RegisterOffsetNeedsLowering = false; 1045 int64_t Offset = Addr.getOffset(); 1046 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset)) 1047 ImmediateOffsetNeedsLowering = true; 1048 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) && 1049 !isUInt<12>(Offset / ScaleFactor)) 1050 ImmediateOffsetNeedsLowering = true; 1051 1052 // Cannot encode an offset register and an immediate offset in the same 1053 // instruction. Fold the immediate offset into the load/store instruction and 1054 // emit an additional add to take care of the offset register. 1055 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg()) 1056 RegisterOffsetNeedsLowering = true; 1057 1058 // Cannot encode zero register as base. 1059 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg()) 1060 RegisterOffsetNeedsLowering = true; 1061 1062 // If this is a stack pointer and the offset needs to be simplified then put 1063 // the alloca address into a register, set the base type back to register and 1064 // continue. This should almost never happen. 1065 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase()) 1066 { 1067 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); 1068 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri), 1069 ResultReg) 1070 .addFrameIndex(Addr.getFI()) 1071 .addImm(0) 1072 .addImm(0); 1073 Addr.setKind(Address::RegBase); 1074 Addr.setReg(ResultReg); 1075 } 1076 1077 if (RegisterOffsetNeedsLowering) { 1078 unsigned ResultReg = 0; 1079 if (Addr.getReg()) { 1080 if (Addr.getExtendType() == AArch64_AM::SXTW || 1081 Addr.getExtendType() == AArch64_AM::UXTW ) 1082 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1083 Addr.getOffsetReg(), Addr.getExtendType(), 1084 Addr.getShift()); 1085 else 1086 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1087 Addr.getOffsetReg(), AArch64_AM::LSL, 1088 Addr.getShift()); 1089 } else { 1090 if (Addr.getExtendType() == AArch64_AM::UXTW) 1091 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1092 Addr.getShift(), /*IsZExt=*/true); 1093 else if (Addr.getExtendType() == AArch64_AM::SXTW) 1094 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1095 Addr.getShift(), /*IsZExt=*/false); 1096 else 1097 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(), 1098 Addr.getShift()); 1099 } 1100 if (!ResultReg) 1101 return false; 1102 1103 Addr.setReg(ResultReg); 1104 Addr.setOffsetReg(0); 1105 Addr.setShift(0); 1106 Addr.setExtendType(AArch64_AM::InvalidShiftExtend); 1107 } 1108 1109 // Since the offset is too large for the load/store instruction get the 1110 // reg+offset into a register. 1111 if (ImmediateOffsetNeedsLowering) { 1112 unsigned ResultReg; 1113 if (Addr.getReg()) 1114 // Try to fold the immediate into the add instruction. 1115 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset); 1116 else 1117 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset); 1118 1119 if (!ResultReg) 1120 return false; 1121 Addr.setReg(ResultReg); 1122 Addr.setOffset(0); 1123 } 1124 return true; 1125 } 1126 1127 void AArch64FastISel::addLoadStoreOperands(Address &Addr, 1128 const MachineInstrBuilder &MIB, 1129 MachineMemOperand::Flags Flags, 1130 unsigned ScaleFactor, 1131 MachineMemOperand *MMO) { 1132 int64_t Offset = Addr.getOffset() / ScaleFactor; 1133 // Frame base works a bit differently. Handle it separately. 1134 if (Addr.isFIBase()) { 1135 int FI = Addr.getFI(); 1136 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size 1137 // and alignment should be based on the VT. 1138 MMO = FuncInfo.MF->getMachineMemOperand( 1139 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags, 1140 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 1141 // Now add the rest of the operands. 1142 MIB.addFrameIndex(FI).addImm(Offset); 1143 } else { 1144 assert(Addr.isRegBase() && "Unexpected address kind."); 1145 const MCInstrDesc &II = MIB->getDesc(); 1146 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0; 1147 Addr.setReg( 1148 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx)); 1149 Addr.setOffsetReg( 1150 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1)); 1151 if (Addr.getOffsetReg()) { 1152 assert(Addr.getOffset() == 0 && "Unexpected offset"); 1153 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW || 1154 Addr.getExtendType() == AArch64_AM::SXTX; 1155 MIB.addReg(Addr.getReg()); 1156 MIB.addReg(Addr.getOffsetReg()); 1157 MIB.addImm(IsSigned); 1158 MIB.addImm(Addr.getShift() != 0); 1159 } else 1160 MIB.addReg(Addr.getReg()).addImm(Offset); 1161 } 1162 1163 if (MMO) 1164 MIB.addMemOperand(MMO); 1165 } 1166 1167 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 1168 const Value *RHS, bool SetFlags, 1169 bool WantResult, bool IsZExt) { 1170 AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend; 1171 bool NeedExtend = false; 1172 switch (RetVT.SimpleTy) { 1173 default: 1174 return 0; 1175 case MVT::i1: 1176 NeedExtend = true; 1177 break; 1178 case MVT::i8: 1179 NeedExtend = true; 1180 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB; 1181 break; 1182 case MVT::i16: 1183 NeedExtend = true; 1184 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH; 1185 break; 1186 case MVT::i32: // fall-through 1187 case MVT::i64: 1188 break; 1189 } 1190 MVT SrcVT = RetVT; 1191 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32); 1192 1193 // Canonicalize immediates to the RHS first. 1194 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS)) 1195 std::swap(LHS, RHS); 1196 1197 // Canonicalize mul by power of 2 to the RHS. 1198 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1199 if (isMulPowOf2(LHS)) 1200 std::swap(LHS, RHS); 1201 1202 // Canonicalize shift immediate to the RHS. 1203 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1204 if (const auto *SI = dyn_cast<BinaryOperator>(LHS)) 1205 if (isa<ConstantInt>(SI->getOperand(1))) 1206 if (SI->getOpcode() == Instruction::Shl || 1207 SI->getOpcode() == Instruction::LShr || 1208 SI->getOpcode() == Instruction::AShr ) 1209 std::swap(LHS, RHS); 1210 1211 Register LHSReg = getRegForValue(LHS); 1212 if (!LHSReg) 1213 return 0; 1214 1215 if (NeedExtend) 1216 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt); 1217 1218 unsigned ResultReg = 0; 1219 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1220 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue(); 1221 if (C->isNegative()) 1222 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags, 1223 WantResult); 1224 else 1225 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags, 1226 WantResult); 1227 } else if (const auto *C = dyn_cast<Constant>(RHS)) 1228 if (C->isNullValue()) 1229 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult); 1230 1231 if (ResultReg) 1232 return ResultReg; 1233 1234 // Only extend the RHS within the instruction if there is a valid extend type. 1235 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() && 1236 isValueAvailable(RHS)) { 1237 Register RHSReg = getRegForValue(RHS); 1238 if (!RHSReg) 1239 return 0; 1240 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0, 1241 SetFlags, WantResult); 1242 } 1243 1244 // Check if the mul can be folded into the instruction. 1245 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1246 if (isMulPowOf2(RHS)) { 1247 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1248 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1249 1250 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1251 if (C->getValue().isPowerOf2()) 1252 std::swap(MulLHS, MulRHS); 1253 1254 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1255 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1256 Register RHSReg = getRegForValue(MulLHS); 1257 if (!RHSReg) 1258 return 0; 1259 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL, 1260 ShiftVal, SetFlags, WantResult); 1261 if (ResultReg) 1262 return ResultReg; 1263 } 1264 } 1265 1266 // Check if the shift can be folded into the instruction. 1267 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1268 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) { 1269 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1270 AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; 1271 switch (SI->getOpcode()) { 1272 default: break; 1273 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break; 1274 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break; 1275 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break; 1276 } 1277 uint64_t ShiftVal = C->getZExtValue(); 1278 if (ShiftType != AArch64_AM::InvalidShiftExtend) { 1279 Register RHSReg = getRegForValue(SI->getOperand(0)); 1280 if (!RHSReg) 1281 return 0; 1282 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType, 1283 ShiftVal, SetFlags, WantResult); 1284 if (ResultReg) 1285 return ResultReg; 1286 } 1287 } 1288 } 1289 } 1290 1291 Register RHSReg = getRegForValue(RHS); 1292 if (!RHSReg) 1293 return 0; 1294 1295 if (NeedExtend) 1296 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt); 1297 1298 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult); 1299 } 1300 1301 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 1302 unsigned RHSReg, bool SetFlags, 1303 bool WantResult) { 1304 assert(LHSReg && RHSReg && "Invalid register number."); 1305 1306 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP || 1307 RHSReg == AArch64::SP || RHSReg == AArch64::WSP) 1308 return 0; 1309 1310 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1311 return 0; 1312 1313 static const unsigned OpcTable[2][2][2] = { 1314 { { AArch64::SUBWrr, AArch64::SUBXrr }, 1315 { AArch64::ADDWrr, AArch64::ADDXrr } }, 1316 { { AArch64::SUBSWrr, AArch64::SUBSXrr }, 1317 { AArch64::ADDSWrr, AArch64::ADDSXrr } } 1318 }; 1319 bool Is64Bit = RetVT == MVT::i64; 1320 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1321 const TargetRegisterClass *RC = 1322 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1323 unsigned ResultReg; 1324 if (WantResult) 1325 ResultReg = createResultReg(RC); 1326 else 1327 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1328 1329 const MCInstrDesc &II = TII.get(Opc); 1330 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1331 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1332 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) 1333 .addReg(LHSReg) 1334 .addReg(RHSReg); 1335 return ResultReg; 1336 } 1337 1338 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 1339 uint64_t Imm, bool SetFlags, 1340 bool WantResult) { 1341 assert(LHSReg && "Invalid register number."); 1342 1343 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1344 return 0; 1345 1346 unsigned ShiftImm; 1347 if (isUInt<12>(Imm)) 1348 ShiftImm = 0; 1349 else if ((Imm & 0xfff000) == Imm) { 1350 ShiftImm = 12; 1351 Imm >>= 12; 1352 } else 1353 return 0; 1354 1355 static const unsigned OpcTable[2][2][2] = { 1356 { { AArch64::SUBWri, AArch64::SUBXri }, 1357 { AArch64::ADDWri, AArch64::ADDXri } }, 1358 { { AArch64::SUBSWri, AArch64::SUBSXri }, 1359 { AArch64::ADDSWri, AArch64::ADDSXri } } 1360 }; 1361 bool Is64Bit = RetVT == MVT::i64; 1362 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1363 const TargetRegisterClass *RC; 1364 if (SetFlags) 1365 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1366 else 1367 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1368 unsigned ResultReg; 1369 if (WantResult) 1370 ResultReg = createResultReg(RC); 1371 else 1372 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1373 1374 const MCInstrDesc &II = TII.get(Opc); 1375 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1376 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) 1377 .addReg(LHSReg) 1378 .addImm(Imm) 1379 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm)); 1380 return ResultReg; 1381 } 1382 1383 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 1384 unsigned RHSReg, 1385 AArch64_AM::ShiftExtendType ShiftType, 1386 uint64_t ShiftImm, bool SetFlags, 1387 bool WantResult) { 1388 assert(LHSReg && RHSReg && "Invalid register number."); 1389 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP && 1390 RHSReg != AArch64::SP && RHSReg != AArch64::WSP); 1391 1392 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1393 return 0; 1394 1395 // Don't deal with undefined shifts. 1396 if (ShiftImm >= RetVT.getSizeInBits()) 1397 return 0; 1398 1399 static const unsigned OpcTable[2][2][2] = { 1400 { { AArch64::SUBWrs, AArch64::SUBXrs }, 1401 { AArch64::ADDWrs, AArch64::ADDXrs } }, 1402 { { AArch64::SUBSWrs, AArch64::SUBSXrs }, 1403 { AArch64::ADDSWrs, AArch64::ADDSXrs } } 1404 }; 1405 bool Is64Bit = RetVT == MVT::i64; 1406 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1407 const TargetRegisterClass *RC = 1408 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1409 unsigned ResultReg; 1410 if (WantResult) 1411 ResultReg = createResultReg(RC); 1412 else 1413 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1414 1415 const MCInstrDesc &II = TII.get(Opc); 1416 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1417 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1418 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) 1419 .addReg(LHSReg) 1420 .addReg(RHSReg) 1421 .addImm(getShifterImm(ShiftType, ShiftImm)); 1422 return ResultReg; 1423 } 1424 1425 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 1426 unsigned RHSReg, 1427 AArch64_AM::ShiftExtendType ExtType, 1428 uint64_t ShiftImm, bool SetFlags, 1429 bool WantResult) { 1430 assert(LHSReg && RHSReg && "Invalid register number."); 1431 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR && 1432 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR); 1433 1434 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1435 return 0; 1436 1437 if (ShiftImm >= 4) 1438 return 0; 1439 1440 static const unsigned OpcTable[2][2][2] = { 1441 { { AArch64::SUBWrx, AArch64::SUBXrx }, 1442 { AArch64::ADDWrx, AArch64::ADDXrx } }, 1443 { { AArch64::SUBSWrx, AArch64::SUBSXrx }, 1444 { AArch64::ADDSWrx, AArch64::ADDSXrx } } 1445 }; 1446 bool Is64Bit = RetVT == MVT::i64; 1447 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1448 const TargetRegisterClass *RC = nullptr; 1449 if (SetFlags) 1450 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1451 else 1452 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1453 unsigned ResultReg; 1454 if (WantResult) 1455 ResultReg = createResultReg(RC); 1456 else 1457 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1458 1459 const MCInstrDesc &II = TII.get(Opc); 1460 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1461 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1462 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) 1463 .addReg(LHSReg) 1464 .addReg(RHSReg) 1465 .addImm(getArithExtendImm(ExtType, ShiftImm)); 1466 return ResultReg; 1467 } 1468 1469 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) { 1470 Type *Ty = LHS->getType(); 1471 EVT EVT = TLI.getValueType(DL, Ty, true); 1472 if (!EVT.isSimple()) 1473 return false; 1474 MVT VT = EVT.getSimpleVT(); 1475 1476 switch (VT.SimpleTy) { 1477 default: 1478 return false; 1479 case MVT::i1: 1480 case MVT::i8: 1481 case MVT::i16: 1482 case MVT::i32: 1483 case MVT::i64: 1484 return emitICmp(VT, LHS, RHS, IsZExt); 1485 case MVT::f32: 1486 case MVT::f64: 1487 return emitFCmp(VT, LHS, RHS); 1488 } 1489 } 1490 1491 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, 1492 bool IsZExt) { 1493 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false, 1494 IsZExt) != 0; 1495 } 1496 1497 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) { 1498 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm, 1499 /*SetFlags=*/true, /*WantResult=*/false) != 0; 1500 } 1501 1502 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) { 1503 if (RetVT != MVT::f32 && RetVT != MVT::f64) 1504 return false; 1505 1506 // Check to see if the 2nd operand is a constant that we can encode directly 1507 // in the compare. 1508 bool UseImm = false; 1509 if (const auto *CFP = dyn_cast<ConstantFP>(RHS)) 1510 if (CFP->isZero() && !CFP->isNegative()) 1511 UseImm = true; 1512 1513 Register LHSReg = getRegForValue(LHS); 1514 if (!LHSReg) 1515 return false; 1516 1517 if (UseImm) { 1518 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri; 1519 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc)) 1520 .addReg(LHSReg); 1521 return true; 1522 } 1523 1524 Register RHSReg = getRegForValue(RHS); 1525 if (!RHSReg) 1526 return false; 1527 1528 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr; 1529 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc)) 1530 .addReg(LHSReg) 1531 .addReg(RHSReg); 1532 return true; 1533 } 1534 1535 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 1536 bool SetFlags, bool WantResult, bool IsZExt) { 1537 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult, 1538 IsZExt); 1539 } 1540 1541 /// This method is a wrapper to simplify add emission. 1542 /// 1543 /// First try to emit an add with an immediate operand using emitAddSub_ri. If 1544 /// that fails, then try to materialize the immediate into a register and use 1545 /// emitAddSub_rr instead. 1546 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) { 1547 unsigned ResultReg; 1548 if (Imm < 0) 1549 ResultReg = emitAddSub_ri(false, VT, Op0, -Imm); 1550 else 1551 ResultReg = emitAddSub_ri(true, VT, Op0, Imm); 1552 1553 if (ResultReg) 1554 return ResultReg; 1555 1556 unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm); 1557 if (!CReg) 1558 return 0; 1559 1560 ResultReg = emitAddSub_rr(true, VT, Op0, CReg); 1561 return ResultReg; 1562 } 1563 1564 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 1565 bool SetFlags, bool WantResult, bool IsZExt) { 1566 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult, 1567 IsZExt); 1568 } 1569 1570 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg, 1571 unsigned RHSReg, bool WantResult) { 1572 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, 1573 /*SetFlags=*/true, WantResult); 1574 } 1575 1576 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg, 1577 unsigned RHSReg, 1578 AArch64_AM::ShiftExtendType ShiftType, 1579 uint64_t ShiftImm, bool WantResult) { 1580 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType, 1581 ShiftImm, /*SetFlags=*/true, WantResult); 1582 } 1583 1584 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, 1585 const Value *LHS, const Value *RHS) { 1586 // Canonicalize immediates to the RHS first. 1587 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS)) 1588 std::swap(LHS, RHS); 1589 1590 // Canonicalize mul by power-of-2 to the RHS. 1591 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1592 if (isMulPowOf2(LHS)) 1593 std::swap(LHS, RHS); 1594 1595 // Canonicalize shift immediate to the RHS. 1596 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1597 if (const auto *SI = dyn_cast<ShlOperator>(LHS)) 1598 if (isa<ConstantInt>(SI->getOperand(1))) 1599 std::swap(LHS, RHS); 1600 1601 Register LHSReg = getRegForValue(LHS); 1602 if (!LHSReg) 1603 return 0; 1604 1605 unsigned ResultReg = 0; 1606 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1607 uint64_t Imm = C->getZExtValue(); 1608 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm); 1609 } 1610 if (ResultReg) 1611 return ResultReg; 1612 1613 // Check if the mul can be folded into the instruction. 1614 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1615 if (isMulPowOf2(RHS)) { 1616 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1617 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1618 1619 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1620 if (C->getValue().isPowerOf2()) 1621 std::swap(MulLHS, MulRHS); 1622 1623 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1624 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1625 1626 Register RHSReg = getRegForValue(MulLHS); 1627 if (!RHSReg) 1628 return 0; 1629 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal); 1630 if (ResultReg) 1631 return ResultReg; 1632 } 1633 } 1634 1635 // Check if the shift can be folded into the instruction. 1636 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1637 if (const auto *SI = dyn_cast<ShlOperator>(RHS)) 1638 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1639 uint64_t ShiftVal = C->getZExtValue(); 1640 Register RHSReg = getRegForValue(SI->getOperand(0)); 1641 if (!RHSReg) 1642 return 0; 1643 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal); 1644 if (ResultReg) 1645 return ResultReg; 1646 } 1647 } 1648 1649 Register RHSReg = getRegForValue(RHS); 1650 if (!RHSReg) 1651 return 0; 1652 1653 MVT VT = std::max(MVT::i32, RetVT.SimpleTy); 1654 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg); 1655 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1656 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1657 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 1658 } 1659 return ResultReg; 1660 } 1661 1662 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, 1663 unsigned LHSReg, uint64_t Imm) { 1664 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1665 "ISD nodes are not consecutive!"); 1666 static const unsigned OpcTable[3][2] = { 1667 { AArch64::ANDWri, AArch64::ANDXri }, 1668 { AArch64::ORRWri, AArch64::ORRXri }, 1669 { AArch64::EORWri, AArch64::EORXri } 1670 }; 1671 const TargetRegisterClass *RC; 1672 unsigned Opc; 1673 unsigned RegSize; 1674 switch (RetVT.SimpleTy) { 1675 default: 1676 return 0; 1677 case MVT::i1: 1678 case MVT::i8: 1679 case MVT::i16: 1680 case MVT::i32: { 1681 unsigned Idx = ISDOpc - ISD::AND; 1682 Opc = OpcTable[Idx][0]; 1683 RC = &AArch64::GPR32spRegClass; 1684 RegSize = 32; 1685 break; 1686 } 1687 case MVT::i64: 1688 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1689 RC = &AArch64::GPR64spRegClass; 1690 RegSize = 64; 1691 break; 1692 } 1693 1694 if (!AArch64_AM::isLogicalImmediate(Imm, RegSize)) 1695 return 0; 1696 1697 Register ResultReg = 1698 fastEmitInst_ri(Opc, RC, LHSReg, 1699 AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); 1700 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) { 1701 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1702 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 1703 } 1704 return ResultReg; 1705 } 1706 1707 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, 1708 unsigned LHSReg, unsigned RHSReg, 1709 uint64_t ShiftImm) { 1710 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1711 "ISD nodes are not consecutive!"); 1712 static const unsigned OpcTable[3][2] = { 1713 { AArch64::ANDWrs, AArch64::ANDXrs }, 1714 { AArch64::ORRWrs, AArch64::ORRXrs }, 1715 { AArch64::EORWrs, AArch64::EORXrs } 1716 }; 1717 1718 // Don't deal with undefined shifts. 1719 if (ShiftImm >= RetVT.getSizeInBits()) 1720 return 0; 1721 1722 const TargetRegisterClass *RC; 1723 unsigned Opc; 1724 switch (RetVT.SimpleTy) { 1725 default: 1726 return 0; 1727 case MVT::i1: 1728 case MVT::i8: 1729 case MVT::i16: 1730 case MVT::i32: 1731 Opc = OpcTable[ISDOpc - ISD::AND][0]; 1732 RC = &AArch64::GPR32RegClass; 1733 break; 1734 case MVT::i64: 1735 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1736 RC = &AArch64::GPR64RegClass; 1737 break; 1738 } 1739 Register ResultReg = 1740 fastEmitInst_rri(Opc, RC, LHSReg, RHSReg, 1741 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm)); 1742 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1743 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1744 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 1745 } 1746 return ResultReg; 1747 } 1748 1749 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, 1750 uint64_t Imm) { 1751 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm); 1752 } 1753 1754 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr, 1755 bool WantZExt, MachineMemOperand *MMO) { 1756 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 1757 return 0; 1758 1759 // Simplify this down to something we can handle. 1760 if (!simplifyAddress(Addr, VT)) 1761 return 0; 1762 1763 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1764 if (!ScaleFactor) 1765 llvm_unreachable("Unexpected value type."); 1766 1767 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 1768 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 1769 bool UseScaled = true; 1770 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 1771 UseScaled = false; 1772 ScaleFactor = 1; 1773 } 1774 1775 static const unsigned GPOpcTable[2][8][4] = { 1776 // Sign-extend. 1777 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi, 1778 AArch64::LDURXi }, 1779 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi, 1780 AArch64::LDURXi }, 1781 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui, 1782 AArch64::LDRXui }, 1783 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui, 1784 AArch64::LDRXui }, 1785 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX, 1786 AArch64::LDRXroX }, 1787 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX, 1788 AArch64::LDRXroX }, 1789 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW, 1790 AArch64::LDRXroW }, 1791 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW, 1792 AArch64::LDRXroW } 1793 }, 1794 // Zero-extend. 1795 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1796 AArch64::LDURXi }, 1797 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1798 AArch64::LDURXi }, 1799 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1800 AArch64::LDRXui }, 1801 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1802 AArch64::LDRXui }, 1803 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1804 AArch64::LDRXroX }, 1805 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1806 AArch64::LDRXroX }, 1807 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1808 AArch64::LDRXroW }, 1809 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1810 AArch64::LDRXroW } 1811 } 1812 }; 1813 1814 static const unsigned FPOpcTable[4][2] = { 1815 { AArch64::LDURSi, AArch64::LDURDi }, 1816 { AArch64::LDRSui, AArch64::LDRDui }, 1817 { AArch64::LDRSroX, AArch64::LDRDroX }, 1818 { AArch64::LDRSroW, AArch64::LDRDroW } 1819 }; 1820 1821 unsigned Opc; 1822 const TargetRegisterClass *RC; 1823 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 1824 Addr.getOffsetReg(); 1825 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 1826 if (Addr.getExtendType() == AArch64_AM::UXTW || 1827 Addr.getExtendType() == AArch64_AM::SXTW) 1828 Idx++; 1829 1830 bool IsRet64Bit = RetVT == MVT::i64; 1831 switch (VT.SimpleTy) { 1832 default: 1833 llvm_unreachable("Unexpected value type."); 1834 case MVT::i1: // Intentional fall-through. 1835 case MVT::i8: 1836 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0]; 1837 RC = (IsRet64Bit && !WantZExt) ? 1838 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1839 break; 1840 case MVT::i16: 1841 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1]; 1842 RC = (IsRet64Bit && !WantZExt) ? 1843 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1844 break; 1845 case MVT::i32: 1846 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2]; 1847 RC = (IsRet64Bit && !WantZExt) ? 1848 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1849 break; 1850 case MVT::i64: 1851 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3]; 1852 RC = &AArch64::GPR64RegClass; 1853 break; 1854 case MVT::f32: 1855 Opc = FPOpcTable[Idx][0]; 1856 RC = &AArch64::FPR32RegClass; 1857 break; 1858 case MVT::f64: 1859 Opc = FPOpcTable[Idx][1]; 1860 RC = &AArch64::FPR64RegClass; 1861 break; 1862 } 1863 1864 // Create the base instruction, then add the operands. 1865 Register ResultReg = createResultReg(RC); 1866 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 1867 TII.get(Opc), ResultReg); 1868 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO); 1869 1870 // Loading an i1 requires special handling. 1871 if (VT == MVT::i1) { 1872 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1); 1873 assert(ANDReg && "Unexpected AND instruction emission failure."); 1874 ResultReg = ANDReg; 1875 } 1876 1877 // For zero-extending loads to 64bit we emit a 32bit load and then convert 1878 // the 32bit reg to a 64bit reg. 1879 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) { 1880 Register Reg64 = createResultReg(&AArch64::GPR64RegClass); 1881 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 1882 TII.get(AArch64::SUBREG_TO_REG), Reg64) 1883 .addImm(0) 1884 .addReg(ResultReg, getKillRegState(true)) 1885 .addImm(AArch64::sub_32); 1886 ResultReg = Reg64; 1887 } 1888 return ResultReg; 1889 } 1890 1891 bool AArch64FastISel::selectAddSub(const Instruction *I) { 1892 MVT VT; 1893 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1894 return false; 1895 1896 if (VT.isVector()) 1897 return selectOperator(I, I->getOpcode()); 1898 1899 unsigned ResultReg; 1900 switch (I->getOpcode()) { 1901 default: 1902 llvm_unreachable("Unexpected instruction."); 1903 case Instruction::Add: 1904 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1)); 1905 break; 1906 case Instruction::Sub: 1907 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1)); 1908 break; 1909 } 1910 if (!ResultReg) 1911 return false; 1912 1913 updateValueMap(I, ResultReg); 1914 return true; 1915 } 1916 1917 bool AArch64FastISel::selectLogicalOp(const Instruction *I) { 1918 MVT VT; 1919 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1920 return false; 1921 1922 if (VT.isVector()) 1923 return selectOperator(I, I->getOpcode()); 1924 1925 unsigned ResultReg; 1926 switch (I->getOpcode()) { 1927 default: 1928 llvm_unreachable("Unexpected instruction."); 1929 case Instruction::And: 1930 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1)); 1931 break; 1932 case Instruction::Or: 1933 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1)); 1934 break; 1935 case Instruction::Xor: 1936 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1)); 1937 break; 1938 } 1939 if (!ResultReg) 1940 return false; 1941 1942 updateValueMap(I, ResultReg); 1943 return true; 1944 } 1945 1946 bool AArch64FastISel::selectLoad(const Instruction *I) { 1947 MVT VT; 1948 // Verify we have a legal type before going any further. Currently, we handle 1949 // simple types that will directly fit in a register (i32/f32/i64/f64) or 1950 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 1951 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) || 1952 cast<LoadInst>(I)->isAtomic()) 1953 return false; 1954 1955 const Value *SV = I->getOperand(0); 1956 if (TLI.supportSwiftError()) { 1957 // Swifterror values can come from either a function parameter with 1958 // swifterror attribute or an alloca with swifterror attribute. 1959 if (const Argument *Arg = dyn_cast<Argument>(SV)) { 1960 if (Arg->hasSwiftErrorAttr()) 1961 return false; 1962 } 1963 1964 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) { 1965 if (Alloca->isSwiftError()) 1966 return false; 1967 } 1968 } 1969 1970 // See if we can handle this address. 1971 Address Addr; 1972 if (!computeAddress(I->getOperand(0), Addr, I->getType())) 1973 return false; 1974 1975 // Fold the following sign-/zero-extend into the load instruction. 1976 bool WantZExt = true; 1977 MVT RetVT = VT; 1978 const Value *IntExtVal = nullptr; 1979 if (I->hasOneUse()) { 1980 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) { 1981 if (isTypeSupported(ZE->getType(), RetVT)) 1982 IntExtVal = ZE; 1983 else 1984 RetVT = VT; 1985 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) { 1986 if (isTypeSupported(SE->getType(), RetVT)) 1987 IntExtVal = SE; 1988 else 1989 RetVT = VT; 1990 WantZExt = false; 1991 } 1992 } 1993 1994 unsigned ResultReg = 1995 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I)); 1996 if (!ResultReg) 1997 return false; 1998 1999 // There are a few different cases we have to handle, because the load or the 2000 // sign-/zero-extend might not be selected by FastISel if we fall-back to 2001 // SelectionDAG. There is also an ordering issue when both instructions are in 2002 // different basic blocks. 2003 // 1.) The load instruction is selected by FastISel, but the integer extend 2004 // not. This usually happens when the integer extend is in a different 2005 // basic block and SelectionDAG took over for that basic block. 2006 // 2.) The load instruction is selected before the integer extend. This only 2007 // happens when the integer extend is in a different basic block. 2008 // 3.) The load instruction is selected by SelectionDAG and the integer extend 2009 // by FastISel. This happens if there are instructions between the load 2010 // and the integer extend that couldn't be selected by FastISel. 2011 if (IntExtVal) { 2012 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG 2013 // could select it. Emit a copy to subreg if necessary. FastISel will remove 2014 // it when it selects the integer extend. 2015 Register Reg = lookUpRegForValue(IntExtVal); 2016 auto *MI = MRI.getUniqueVRegDef(Reg); 2017 if (!MI) { 2018 if (RetVT == MVT::i64 && VT <= MVT::i32) { 2019 if (WantZExt) { 2020 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). 2021 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt)); 2022 ResultReg = std::prev(I)->getOperand(0).getReg(); 2023 removeDeadCode(I, std::next(I)); 2024 } else 2025 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg, 2026 AArch64::sub_32); 2027 } 2028 updateValueMap(I, ResultReg); 2029 return true; 2030 } 2031 2032 // The integer extend has already been emitted - delete all the instructions 2033 // that have been emitted by the integer extend lowering code and use the 2034 // result from the load instruction directly. 2035 while (MI) { 2036 Reg = 0; 2037 for (auto &Opnd : MI->uses()) { 2038 if (Opnd.isReg()) { 2039 Reg = Opnd.getReg(); 2040 break; 2041 } 2042 } 2043 MachineBasicBlock::iterator I(MI); 2044 removeDeadCode(I, std::next(I)); 2045 MI = nullptr; 2046 if (Reg) 2047 MI = MRI.getUniqueVRegDef(Reg); 2048 } 2049 updateValueMap(IntExtVal, ResultReg); 2050 return true; 2051 } 2052 2053 updateValueMap(I, ResultReg); 2054 return true; 2055 } 2056 2057 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg, 2058 unsigned AddrReg, 2059 MachineMemOperand *MMO) { 2060 unsigned Opc; 2061 switch (VT.SimpleTy) { 2062 default: return false; 2063 case MVT::i8: Opc = AArch64::STLRB; break; 2064 case MVT::i16: Opc = AArch64::STLRH; break; 2065 case MVT::i32: Opc = AArch64::STLRW; break; 2066 case MVT::i64: Opc = AArch64::STLRX; break; 2067 } 2068 2069 const MCInstrDesc &II = TII.get(Opc); 2070 SrcReg = constrainOperandRegClass(II, SrcReg, 0); 2071 AddrReg = constrainOperandRegClass(II, AddrReg, 1); 2072 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) 2073 .addReg(SrcReg) 2074 .addReg(AddrReg) 2075 .addMemOperand(MMO); 2076 return true; 2077 } 2078 2079 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, 2080 MachineMemOperand *MMO) { 2081 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 2082 return false; 2083 2084 // Simplify this down to something we can handle. 2085 if (!simplifyAddress(Addr, VT)) 2086 return false; 2087 2088 unsigned ScaleFactor = getImplicitScaleFactor(VT); 2089 if (!ScaleFactor) 2090 llvm_unreachable("Unexpected value type."); 2091 2092 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 2093 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 2094 bool UseScaled = true; 2095 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 2096 UseScaled = false; 2097 ScaleFactor = 1; 2098 } 2099 2100 static const unsigned OpcTable[4][6] = { 2101 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi, 2102 AArch64::STURSi, AArch64::STURDi }, 2103 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui, 2104 AArch64::STRSui, AArch64::STRDui }, 2105 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX, 2106 AArch64::STRSroX, AArch64::STRDroX }, 2107 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW, 2108 AArch64::STRSroW, AArch64::STRDroW } 2109 }; 2110 2111 unsigned Opc; 2112 bool VTIsi1 = false; 2113 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 2114 Addr.getOffsetReg(); 2115 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 2116 if (Addr.getExtendType() == AArch64_AM::UXTW || 2117 Addr.getExtendType() == AArch64_AM::SXTW) 2118 Idx++; 2119 2120 switch (VT.SimpleTy) { 2121 default: llvm_unreachable("Unexpected value type."); 2122 case MVT::i1: VTIsi1 = true; [[fallthrough]]; 2123 case MVT::i8: Opc = OpcTable[Idx][0]; break; 2124 case MVT::i16: Opc = OpcTable[Idx][1]; break; 2125 case MVT::i32: Opc = OpcTable[Idx][2]; break; 2126 case MVT::i64: Opc = OpcTable[Idx][3]; break; 2127 case MVT::f32: Opc = OpcTable[Idx][4]; break; 2128 case MVT::f64: Opc = OpcTable[Idx][5]; break; 2129 } 2130 2131 // Storing an i1 requires special handling. 2132 if (VTIsi1 && SrcReg != AArch64::WZR) { 2133 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1); 2134 assert(ANDReg && "Unexpected AND instruction emission failure."); 2135 SrcReg = ANDReg; 2136 } 2137 // Create the base instruction, then add the operands. 2138 const MCInstrDesc &II = TII.get(Opc); 2139 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2140 MachineInstrBuilder MIB = 2141 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg); 2142 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO); 2143 2144 return true; 2145 } 2146 2147 bool AArch64FastISel::selectStore(const Instruction *I) { 2148 MVT VT; 2149 const Value *Op0 = I->getOperand(0); 2150 // Verify we have a legal type before going any further. Currently, we handle 2151 // simple types that will directly fit in a register (i32/f32/i64/f64) or 2152 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 2153 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true)) 2154 return false; 2155 2156 const Value *PtrV = I->getOperand(1); 2157 if (TLI.supportSwiftError()) { 2158 // Swifterror values can come from either a function parameter with 2159 // swifterror attribute or an alloca with swifterror attribute. 2160 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) { 2161 if (Arg->hasSwiftErrorAttr()) 2162 return false; 2163 } 2164 2165 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) { 2166 if (Alloca->isSwiftError()) 2167 return false; 2168 } 2169 } 2170 2171 // Get the value to be stored into a register. Use the zero register directly 2172 // when possible to avoid an unnecessary copy and a wasted register. 2173 unsigned SrcReg = 0; 2174 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) { 2175 if (CI->isZero()) 2176 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2177 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) { 2178 if (CF->isZero() && !CF->isNegative()) { 2179 VT = MVT::getIntegerVT(VT.getSizeInBits()); 2180 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2181 } 2182 } 2183 2184 if (!SrcReg) 2185 SrcReg = getRegForValue(Op0); 2186 2187 if (!SrcReg) 2188 return false; 2189 2190 auto *SI = cast<StoreInst>(I); 2191 2192 // Try to emit a STLR for seq_cst/release. 2193 if (SI->isAtomic()) { 2194 AtomicOrdering Ord = SI->getOrdering(); 2195 // The non-atomic instructions are sufficient for relaxed stores. 2196 if (isReleaseOrStronger(Ord)) { 2197 // The STLR addressing mode only supports a base reg; pass that directly. 2198 Register AddrReg = getRegForValue(PtrV); 2199 return emitStoreRelease(VT, SrcReg, AddrReg, 2200 createMachineMemOperandFor(I)); 2201 } 2202 } 2203 2204 // See if we can handle this address. 2205 Address Addr; 2206 if (!computeAddress(PtrV, Addr, Op0->getType())) 2207 return false; 2208 2209 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I))) 2210 return false; 2211 return true; 2212 } 2213 2214 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { 2215 switch (Pred) { 2216 case CmpInst::FCMP_ONE: 2217 case CmpInst::FCMP_UEQ: 2218 default: 2219 // AL is our "false" for now. The other two need more compares. 2220 return AArch64CC::AL; 2221 case CmpInst::ICMP_EQ: 2222 case CmpInst::FCMP_OEQ: 2223 return AArch64CC::EQ; 2224 case CmpInst::ICMP_SGT: 2225 case CmpInst::FCMP_OGT: 2226 return AArch64CC::GT; 2227 case CmpInst::ICMP_SGE: 2228 case CmpInst::FCMP_OGE: 2229 return AArch64CC::GE; 2230 case CmpInst::ICMP_UGT: 2231 case CmpInst::FCMP_UGT: 2232 return AArch64CC::HI; 2233 case CmpInst::FCMP_OLT: 2234 return AArch64CC::MI; 2235 case CmpInst::ICMP_ULE: 2236 case CmpInst::FCMP_OLE: 2237 return AArch64CC::LS; 2238 case CmpInst::FCMP_ORD: 2239 return AArch64CC::VC; 2240 case CmpInst::FCMP_UNO: 2241 return AArch64CC::VS; 2242 case CmpInst::FCMP_UGE: 2243 return AArch64CC::PL; 2244 case CmpInst::ICMP_SLT: 2245 case CmpInst::FCMP_ULT: 2246 return AArch64CC::LT; 2247 case CmpInst::ICMP_SLE: 2248 case CmpInst::FCMP_ULE: 2249 return AArch64CC::LE; 2250 case CmpInst::FCMP_UNE: 2251 case CmpInst::ICMP_NE: 2252 return AArch64CC::NE; 2253 case CmpInst::ICMP_UGE: 2254 return AArch64CC::HS; 2255 case CmpInst::ICMP_ULT: 2256 return AArch64CC::LO; 2257 } 2258 } 2259 2260 /// Try to emit a combined compare-and-branch instruction. 2261 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { 2262 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions 2263 // will not be produced, as they are conditional branch instructions that do 2264 // not set flags. 2265 if (FuncInfo.MF->getFunction().hasFnAttribute( 2266 Attribute::SpeculativeLoadHardening)) 2267 return false; 2268 2269 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction"); 2270 const CmpInst *CI = cast<CmpInst>(BI->getCondition()); 2271 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2272 2273 const Value *LHS = CI->getOperand(0); 2274 const Value *RHS = CI->getOperand(1); 2275 2276 MVT VT; 2277 if (!isTypeSupported(LHS->getType(), VT)) 2278 return false; 2279 2280 unsigned BW = VT.getSizeInBits(); 2281 if (BW > 64) 2282 return false; 2283 2284 MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0)); 2285 MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1)); 2286 2287 // Try to take advantage of fallthrough opportunities. 2288 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2289 std::swap(TBB, FBB); 2290 Predicate = CmpInst::getInversePredicate(Predicate); 2291 } 2292 2293 int TestBit = -1; 2294 bool IsCmpNE; 2295 switch (Predicate) { 2296 default: 2297 return false; 2298 case CmpInst::ICMP_EQ: 2299 case CmpInst::ICMP_NE: 2300 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue()) 2301 std::swap(LHS, RHS); 2302 2303 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2304 return false; 2305 2306 if (const auto *AI = dyn_cast<BinaryOperator>(LHS)) 2307 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) { 2308 const Value *AndLHS = AI->getOperand(0); 2309 const Value *AndRHS = AI->getOperand(1); 2310 2311 if (const auto *C = dyn_cast<ConstantInt>(AndLHS)) 2312 if (C->getValue().isPowerOf2()) 2313 std::swap(AndLHS, AndRHS); 2314 2315 if (const auto *C = dyn_cast<ConstantInt>(AndRHS)) 2316 if (C->getValue().isPowerOf2()) { 2317 TestBit = C->getValue().logBase2(); 2318 LHS = AndLHS; 2319 } 2320 } 2321 2322 if (VT == MVT::i1) 2323 TestBit = 0; 2324 2325 IsCmpNE = Predicate == CmpInst::ICMP_NE; 2326 break; 2327 case CmpInst::ICMP_SLT: 2328 case CmpInst::ICMP_SGE: 2329 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2330 return false; 2331 2332 TestBit = BW - 1; 2333 IsCmpNE = Predicate == CmpInst::ICMP_SLT; 2334 break; 2335 case CmpInst::ICMP_SGT: 2336 case CmpInst::ICMP_SLE: 2337 if (!isa<ConstantInt>(RHS)) 2338 return false; 2339 2340 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true)) 2341 return false; 2342 2343 TestBit = BW - 1; 2344 IsCmpNE = Predicate == CmpInst::ICMP_SLE; 2345 break; 2346 } // end switch 2347 2348 static const unsigned OpcTable[2][2][2] = { 2349 { {AArch64::CBZW, AArch64::CBZX }, 2350 {AArch64::CBNZW, AArch64::CBNZX} }, 2351 { {AArch64::TBZW, AArch64::TBZX }, 2352 {AArch64::TBNZW, AArch64::TBNZX} } 2353 }; 2354 2355 bool IsBitTest = TestBit != -1; 2356 bool Is64Bit = BW == 64; 2357 if (TestBit < 32 && TestBit >= 0) 2358 Is64Bit = false; 2359 2360 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; 2361 const MCInstrDesc &II = TII.get(Opc); 2362 2363 Register SrcReg = getRegForValue(LHS); 2364 if (!SrcReg) 2365 return false; 2366 2367 if (BW == 64 && !Is64Bit) 2368 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32); 2369 2370 if ((BW < 32) && !IsBitTest) 2371 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true); 2372 2373 // Emit the combined compare and branch instruction. 2374 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2375 MachineInstrBuilder MIB = 2376 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc)) 2377 .addReg(SrcReg); 2378 if (IsBitTest) 2379 MIB.addImm(TestBit); 2380 MIB.addMBB(TBB); 2381 2382 finishCondBranch(BI->getParent(), TBB, FBB); 2383 return true; 2384 } 2385 2386 bool AArch64FastISel::selectBranch(const Instruction *I) { 2387 const BranchInst *BI = cast<BranchInst>(I); 2388 if (BI->isUnconditional()) { 2389 MachineBasicBlock *MSucc = FuncInfo.getMBB(BI->getSuccessor(0)); 2390 fastEmitBranch(MSucc, BI->getDebugLoc()); 2391 return true; 2392 } 2393 2394 MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0)); 2395 MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1)); 2396 2397 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 2398 if (CI->hasOneUse() && isValueAvailable(CI)) { 2399 // Try to optimize or fold the cmp. 2400 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2401 switch (Predicate) { 2402 default: 2403 break; 2404 case CmpInst::FCMP_FALSE: 2405 fastEmitBranch(FBB, MIMD.getDL()); 2406 return true; 2407 case CmpInst::FCMP_TRUE: 2408 fastEmitBranch(TBB, MIMD.getDL()); 2409 return true; 2410 } 2411 2412 // Try to emit a combined compare-and-branch first. 2413 if (emitCompareAndBranch(BI)) 2414 return true; 2415 2416 // Try to take advantage of fallthrough opportunities. 2417 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2418 std::swap(TBB, FBB); 2419 Predicate = CmpInst::getInversePredicate(Predicate); 2420 } 2421 2422 // Emit the cmp. 2423 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2424 return false; 2425 2426 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch 2427 // instruction. 2428 AArch64CC::CondCode CC = getCompareCC(Predicate); 2429 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2430 switch (Predicate) { 2431 default: 2432 break; 2433 case CmpInst::FCMP_UEQ: 2434 ExtraCC = AArch64CC::EQ; 2435 CC = AArch64CC::VS; 2436 break; 2437 case CmpInst::FCMP_ONE: 2438 ExtraCC = AArch64CC::MI; 2439 CC = AArch64CC::GT; 2440 break; 2441 } 2442 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2443 2444 // Emit the extra branch for FCMP_UEQ and FCMP_ONE. 2445 if (ExtraCC != AArch64CC::AL) { 2446 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) 2447 .addImm(ExtraCC) 2448 .addMBB(TBB); 2449 } 2450 2451 // Emit the branch. 2452 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) 2453 .addImm(CC) 2454 .addMBB(TBB); 2455 2456 finishCondBranch(BI->getParent(), TBB, FBB); 2457 return true; 2458 } 2459 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { 2460 uint64_t Imm = CI->getZExtValue(); 2461 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; 2462 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B)) 2463 .addMBB(Target); 2464 2465 // Obtain the branch probability and add the target to the successor list. 2466 if (FuncInfo.BPI) { 2467 auto BranchProbability = FuncInfo.BPI->getEdgeProbability( 2468 BI->getParent(), Target->getBasicBlock()); 2469 FuncInfo.MBB->addSuccessor(Target, BranchProbability); 2470 } else 2471 FuncInfo.MBB->addSuccessorWithoutProb(Target); 2472 return true; 2473 } else { 2474 AArch64CC::CondCode CC = AArch64CC::NE; 2475 if (foldXALUIntrinsic(CC, I, BI->getCondition())) { 2476 // Fake request the condition, otherwise the intrinsic might be completely 2477 // optimized away. 2478 Register CondReg = getRegForValue(BI->getCondition()); 2479 if (!CondReg) 2480 return false; 2481 2482 // Emit the branch. 2483 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) 2484 .addImm(CC) 2485 .addMBB(TBB); 2486 2487 finishCondBranch(BI->getParent(), TBB, FBB); 2488 return true; 2489 } 2490 } 2491 2492 Register CondReg = getRegForValue(BI->getCondition()); 2493 if (CondReg == 0) 2494 return false; 2495 2496 // i1 conditions come as i32 values, test the lowest bit with tb(n)z. 2497 unsigned Opcode = AArch64::TBNZW; 2498 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2499 std::swap(TBB, FBB); 2500 Opcode = AArch64::TBZW; 2501 } 2502 2503 const MCInstrDesc &II = TII.get(Opcode); 2504 Register ConstrainedCondReg 2505 = constrainOperandRegClass(II, CondReg, II.getNumDefs()); 2506 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) 2507 .addReg(ConstrainedCondReg) 2508 .addImm(0) 2509 .addMBB(TBB); 2510 2511 finishCondBranch(BI->getParent(), TBB, FBB); 2512 return true; 2513 } 2514 2515 bool AArch64FastISel::selectIndirectBr(const Instruction *I) { 2516 const IndirectBrInst *BI = cast<IndirectBrInst>(I); 2517 Register AddrReg = getRegForValue(BI->getOperand(0)); 2518 if (AddrReg == 0) 2519 return false; 2520 2521 // Authenticated indirectbr is not implemented yet. 2522 if (FuncInfo.MF->getFunction().hasFnAttribute("ptrauth-indirect-gotos")) 2523 return false; 2524 2525 // Emit the indirect branch. 2526 const MCInstrDesc &II = TII.get(AArch64::BR); 2527 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs()); 2528 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg); 2529 2530 // Make sure the CFG is up-to-date. 2531 for (const auto *Succ : BI->successors()) 2532 FuncInfo.MBB->addSuccessor(FuncInfo.getMBB(Succ)); 2533 2534 return true; 2535 } 2536 2537 bool AArch64FastISel::selectCmp(const Instruction *I) { 2538 const CmpInst *CI = cast<CmpInst>(I); 2539 2540 // Vectors of i1 are weird: bail out. 2541 if (CI->getType()->isVectorTy()) 2542 return false; 2543 2544 // Try to optimize or fold the cmp. 2545 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2546 unsigned ResultReg = 0; 2547 switch (Predicate) { 2548 default: 2549 break; 2550 case CmpInst::FCMP_FALSE: 2551 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2552 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 2553 TII.get(TargetOpcode::COPY), ResultReg) 2554 .addReg(AArch64::WZR, getKillRegState(true)); 2555 break; 2556 case CmpInst::FCMP_TRUE: 2557 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1); 2558 break; 2559 } 2560 2561 if (ResultReg) { 2562 updateValueMap(I, ResultReg); 2563 return true; 2564 } 2565 2566 // Emit the cmp. 2567 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2568 return false; 2569 2570 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2571 2572 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These 2573 // condition codes are inverted, because they are used by CSINC. 2574 static unsigned CondCodeTable[2][2] = { 2575 { AArch64CC::NE, AArch64CC::VC }, 2576 { AArch64CC::PL, AArch64CC::LE } 2577 }; 2578 unsigned *CondCodes = nullptr; 2579 switch (Predicate) { 2580 default: 2581 break; 2582 case CmpInst::FCMP_UEQ: 2583 CondCodes = &CondCodeTable[0][0]; 2584 break; 2585 case CmpInst::FCMP_ONE: 2586 CondCodes = &CondCodeTable[1][0]; 2587 break; 2588 } 2589 2590 if (CondCodes) { 2591 Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass); 2592 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr), 2593 TmpReg1) 2594 .addReg(AArch64::WZR, getKillRegState(true)) 2595 .addReg(AArch64::WZR, getKillRegState(true)) 2596 .addImm(CondCodes[0]); 2597 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr), 2598 ResultReg) 2599 .addReg(TmpReg1, getKillRegState(true)) 2600 .addReg(AArch64::WZR, getKillRegState(true)) 2601 .addImm(CondCodes[1]); 2602 2603 updateValueMap(I, ResultReg); 2604 return true; 2605 } 2606 2607 // Now set a register based on the comparison. 2608 AArch64CC::CondCode CC = getCompareCC(Predicate); 2609 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2610 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC); 2611 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr), 2612 ResultReg) 2613 .addReg(AArch64::WZR, getKillRegState(true)) 2614 .addReg(AArch64::WZR, getKillRegState(true)) 2615 .addImm(invertedCC); 2616 2617 updateValueMap(I, ResultReg); 2618 return true; 2619 } 2620 2621 /// Optimize selects of i1 if one of the operands has a 'true' or 'false' 2622 /// value. 2623 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) { 2624 if (!SI->getType()->isIntegerTy(1)) 2625 return false; 2626 2627 const Value *Src1Val, *Src2Val; 2628 unsigned Opc = 0; 2629 bool NeedExtraOp = false; 2630 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) { 2631 if (CI->isOne()) { 2632 Src1Val = SI->getCondition(); 2633 Src2Val = SI->getFalseValue(); 2634 Opc = AArch64::ORRWrr; 2635 } else { 2636 assert(CI->isZero()); 2637 Src1Val = SI->getFalseValue(); 2638 Src2Val = SI->getCondition(); 2639 Opc = AArch64::BICWrr; 2640 } 2641 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) { 2642 if (CI->isOne()) { 2643 Src1Val = SI->getCondition(); 2644 Src2Val = SI->getTrueValue(); 2645 Opc = AArch64::ORRWrr; 2646 NeedExtraOp = true; 2647 } else { 2648 assert(CI->isZero()); 2649 Src1Val = SI->getCondition(); 2650 Src2Val = SI->getTrueValue(); 2651 Opc = AArch64::ANDWrr; 2652 } 2653 } 2654 2655 if (!Opc) 2656 return false; 2657 2658 Register Src1Reg = getRegForValue(Src1Val); 2659 if (!Src1Reg) 2660 return false; 2661 2662 Register Src2Reg = getRegForValue(Src2Val); 2663 if (!Src2Reg) 2664 return false; 2665 2666 if (NeedExtraOp) 2667 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1); 2668 2669 Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg, 2670 Src2Reg); 2671 updateValueMap(SI, ResultReg); 2672 return true; 2673 } 2674 2675 bool AArch64FastISel::selectSelect(const Instruction *I) { 2676 assert(isa<SelectInst>(I) && "Expected a select instruction."); 2677 MVT VT; 2678 if (!isTypeSupported(I->getType(), VT)) 2679 return false; 2680 2681 unsigned Opc; 2682 const TargetRegisterClass *RC; 2683 switch (VT.SimpleTy) { 2684 default: 2685 return false; 2686 case MVT::i1: 2687 case MVT::i8: 2688 case MVT::i16: 2689 case MVT::i32: 2690 Opc = AArch64::CSELWr; 2691 RC = &AArch64::GPR32RegClass; 2692 break; 2693 case MVT::i64: 2694 Opc = AArch64::CSELXr; 2695 RC = &AArch64::GPR64RegClass; 2696 break; 2697 case MVT::f32: 2698 Opc = AArch64::FCSELSrrr; 2699 RC = &AArch64::FPR32RegClass; 2700 break; 2701 case MVT::f64: 2702 Opc = AArch64::FCSELDrrr; 2703 RC = &AArch64::FPR64RegClass; 2704 break; 2705 } 2706 2707 const SelectInst *SI = cast<SelectInst>(I); 2708 const Value *Cond = SI->getCondition(); 2709 AArch64CC::CondCode CC = AArch64CC::NE; 2710 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2711 2712 if (optimizeSelect(SI)) 2713 return true; 2714 2715 // Try to pickup the flags, so we don't have to emit another compare. 2716 if (foldXALUIntrinsic(CC, I, Cond)) { 2717 // Fake request the condition to force emission of the XALU intrinsic. 2718 Register CondReg = getRegForValue(Cond); 2719 if (!CondReg) 2720 return false; 2721 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() && 2722 isValueAvailable(Cond)) { 2723 const auto *Cmp = cast<CmpInst>(Cond); 2724 // Try to optimize or fold the cmp. 2725 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp); 2726 const Value *FoldSelect = nullptr; 2727 switch (Predicate) { 2728 default: 2729 break; 2730 case CmpInst::FCMP_FALSE: 2731 FoldSelect = SI->getFalseValue(); 2732 break; 2733 case CmpInst::FCMP_TRUE: 2734 FoldSelect = SI->getTrueValue(); 2735 break; 2736 } 2737 2738 if (FoldSelect) { 2739 Register SrcReg = getRegForValue(FoldSelect); 2740 if (!SrcReg) 2741 return false; 2742 2743 updateValueMap(I, SrcReg); 2744 return true; 2745 } 2746 2747 // Emit the cmp. 2748 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned())) 2749 return false; 2750 2751 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction. 2752 CC = getCompareCC(Predicate); 2753 switch (Predicate) { 2754 default: 2755 break; 2756 case CmpInst::FCMP_UEQ: 2757 ExtraCC = AArch64CC::EQ; 2758 CC = AArch64CC::VS; 2759 break; 2760 case CmpInst::FCMP_ONE: 2761 ExtraCC = AArch64CC::MI; 2762 CC = AArch64CC::GT; 2763 break; 2764 } 2765 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2766 } else { 2767 Register CondReg = getRegForValue(Cond); 2768 if (!CondReg) 2769 return false; 2770 2771 const MCInstrDesc &II = TII.get(AArch64::ANDSWri); 2772 CondReg = constrainOperandRegClass(II, CondReg, 1); 2773 2774 // Emit a TST instruction (ANDS wzr, reg, #imm). 2775 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, 2776 AArch64::WZR) 2777 .addReg(CondReg) 2778 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 2779 } 2780 2781 Register Src1Reg = getRegForValue(SI->getTrueValue()); 2782 Register Src2Reg = getRegForValue(SI->getFalseValue()); 2783 2784 if (!Src1Reg || !Src2Reg) 2785 return false; 2786 2787 if (ExtraCC != AArch64CC::AL) 2788 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC); 2789 2790 Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC); 2791 updateValueMap(I, ResultReg); 2792 return true; 2793 } 2794 2795 bool AArch64FastISel::selectFPExt(const Instruction *I) { 2796 Value *V = I->getOperand(0); 2797 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy()) 2798 return false; 2799 2800 Register Op = getRegForValue(V); 2801 if (Op == 0) 2802 return false; 2803 2804 Register ResultReg = createResultReg(&AArch64::FPR64RegClass); 2805 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr), 2806 ResultReg).addReg(Op); 2807 updateValueMap(I, ResultReg); 2808 return true; 2809 } 2810 2811 bool AArch64FastISel::selectFPTrunc(const Instruction *I) { 2812 Value *V = I->getOperand(0); 2813 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy()) 2814 return false; 2815 2816 Register Op = getRegForValue(V); 2817 if (Op == 0) 2818 return false; 2819 2820 Register ResultReg = createResultReg(&AArch64::FPR32RegClass); 2821 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr), 2822 ResultReg).addReg(Op); 2823 updateValueMap(I, ResultReg); 2824 return true; 2825 } 2826 2827 // FPToUI and FPToSI 2828 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { 2829 MVT DestVT; 2830 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2831 return false; 2832 2833 Register SrcReg = getRegForValue(I->getOperand(0)); 2834 if (SrcReg == 0) 2835 return false; 2836 2837 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2838 if (SrcVT == MVT::f128 || SrcVT == MVT::f16 || SrcVT == MVT::bf16) 2839 return false; 2840 2841 unsigned Opc; 2842 if (SrcVT == MVT::f64) { 2843 if (Signed) 2844 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr; 2845 else 2846 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr; 2847 } else { 2848 if (Signed) 2849 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr; 2850 else 2851 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr; 2852 } 2853 Register ResultReg = createResultReg( 2854 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass); 2855 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg) 2856 .addReg(SrcReg); 2857 updateValueMap(I, ResultReg); 2858 return true; 2859 } 2860 2861 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { 2862 MVT DestVT; 2863 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2864 return false; 2865 // Let regular ISEL handle FP16 2866 if (DestVT == MVT::f16 || DestVT == MVT::bf16) 2867 return false; 2868 2869 assert((DestVT == MVT::f32 || DestVT == MVT::f64) && 2870 "Unexpected value type."); 2871 2872 Register SrcReg = getRegForValue(I->getOperand(0)); 2873 if (!SrcReg) 2874 return false; 2875 2876 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2877 2878 // Handle sign-extension. 2879 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { 2880 SrcReg = 2881 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed); 2882 if (!SrcReg) 2883 return false; 2884 } 2885 2886 unsigned Opc; 2887 if (SrcVT == MVT::i64) { 2888 if (Signed) 2889 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri; 2890 else 2891 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri; 2892 } else { 2893 if (Signed) 2894 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri; 2895 else 2896 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri; 2897 } 2898 2899 Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg); 2900 updateValueMap(I, ResultReg); 2901 return true; 2902 } 2903 2904 bool AArch64FastISel::fastLowerArguments() { 2905 if (!FuncInfo.CanLowerReturn) 2906 return false; 2907 2908 const Function *F = FuncInfo.Fn; 2909 if (F->isVarArg()) 2910 return false; 2911 2912 CallingConv::ID CC = F->getCallingConv(); 2913 if (CC != CallingConv::C && CC != CallingConv::Swift) 2914 return false; 2915 2916 if (Subtarget->hasCustomCallingConv()) 2917 return false; 2918 2919 // Only handle simple cases of up to 8 GPR and FPR each. 2920 unsigned GPRCnt = 0; 2921 unsigned FPRCnt = 0; 2922 for (auto const &Arg : F->args()) { 2923 if (Arg.hasAttribute(Attribute::ByVal) || 2924 Arg.hasAttribute(Attribute::InReg) || 2925 Arg.hasAttribute(Attribute::StructRet) || 2926 Arg.hasAttribute(Attribute::SwiftSelf) || 2927 Arg.hasAttribute(Attribute::SwiftAsync) || 2928 Arg.hasAttribute(Attribute::SwiftError) || 2929 Arg.hasAttribute(Attribute::Nest)) 2930 return false; 2931 2932 Type *ArgTy = Arg.getType(); 2933 if (ArgTy->isStructTy() || ArgTy->isArrayTy()) 2934 return false; 2935 2936 EVT ArgVT = TLI.getValueType(DL, ArgTy); 2937 if (!ArgVT.isSimple()) 2938 return false; 2939 2940 MVT VT = ArgVT.getSimpleVT().SimpleTy; 2941 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8()) 2942 return false; 2943 2944 if (VT.isVector() && 2945 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian())) 2946 return false; 2947 2948 if (VT >= MVT::i1 && VT <= MVT::i64) 2949 ++GPRCnt; 2950 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() || 2951 VT.is128BitVector()) 2952 ++FPRCnt; 2953 else 2954 return false; 2955 2956 if (GPRCnt > 8 || FPRCnt > 8) 2957 return false; 2958 } 2959 2960 static const MCPhysReg Registers[6][8] = { 2961 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4, 2962 AArch64::W5, AArch64::W6, AArch64::W7 }, 2963 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, 2964 AArch64::X5, AArch64::X6, AArch64::X7 }, 2965 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4, 2966 AArch64::H5, AArch64::H6, AArch64::H7 }, 2967 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4, 2968 AArch64::S5, AArch64::S6, AArch64::S7 }, 2969 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4, 2970 AArch64::D5, AArch64::D6, AArch64::D7 }, 2971 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, 2972 AArch64::Q5, AArch64::Q6, AArch64::Q7 } 2973 }; 2974 2975 unsigned GPRIdx = 0; 2976 unsigned FPRIdx = 0; 2977 for (auto const &Arg : F->args()) { 2978 MVT VT = TLI.getSimpleValueType(DL, Arg.getType()); 2979 unsigned SrcReg; 2980 const TargetRegisterClass *RC; 2981 if (VT >= MVT::i1 && VT <= MVT::i32) { 2982 SrcReg = Registers[0][GPRIdx++]; 2983 RC = &AArch64::GPR32RegClass; 2984 VT = MVT::i32; 2985 } else if (VT == MVT::i64) { 2986 SrcReg = Registers[1][GPRIdx++]; 2987 RC = &AArch64::GPR64RegClass; 2988 } else if (VT == MVT::f16 || VT == MVT::bf16) { 2989 SrcReg = Registers[2][FPRIdx++]; 2990 RC = &AArch64::FPR16RegClass; 2991 } else if (VT == MVT::f32) { 2992 SrcReg = Registers[3][FPRIdx++]; 2993 RC = &AArch64::FPR32RegClass; 2994 } else if ((VT == MVT::f64) || VT.is64BitVector()) { 2995 SrcReg = Registers[4][FPRIdx++]; 2996 RC = &AArch64::FPR64RegClass; 2997 } else if (VT.is128BitVector()) { 2998 SrcReg = Registers[5][FPRIdx++]; 2999 RC = &AArch64::FPR128RegClass; 3000 } else 3001 llvm_unreachable("Unexpected value type."); 3002 3003 Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); 3004 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. 3005 // Without this, EmitLiveInCopies may eliminate the livein if its only 3006 // use is a bitcast (which isn't turned into an instruction). 3007 Register ResultReg = createResultReg(RC); 3008 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3009 TII.get(TargetOpcode::COPY), ResultReg) 3010 .addReg(DstReg, getKillRegState(true)); 3011 updateValueMap(&Arg, ResultReg); 3012 } 3013 return true; 3014 } 3015 3016 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, 3017 SmallVectorImpl<MVT> &OutVTs, 3018 unsigned &NumBytes) { 3019 CallingConv::ID CC = CLI.CallConv; 3020 SmallVector<CCValAssign, 16> ArgLocs; 3021 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context); 3022 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC)); 3023 3024 // Get a count of how many bytes are to be pushed on the stack. 3025 NumBytes = CCInfo.getStackSize(); 3026 3027 // Issue CALLSEQ_START 3028 unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); 3029 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown)) 3030 .addImm(NumBytes).addImm(0); 3031 3032 // Process the args. 3033 for (CCValAssign &VA : ArgLocs) { 3034 const Value *ArgVal = CLI.OutVals[VA.getValNo()]; 3035 MVT ArgVT = OutVTs[VA.getValNo()]; 3036 3037 Register ArgReg = getRegForValue(ArgVal); 3038 if (!ArgReg) 3039 return false; 3040 3041 // Handle arg promotion: SExt, ZExt, AExt. 3042 switch (VA.getLocInfo()) { 3043 case CCValAssign::Full: 3044 break; 3045 case CCValAssign::SExt: { 3046 MVT DestVT = VA.getLocVT(); 3047 MVT SrcVT = ArgVT; 3048 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false); 3049 if (!ArgReg) 3050 return false; 3051 break; 3052 } 3053 case CCValAssign::AExt: 3054 // Intentional fall-through. 3055 case CCValAssign::ZExt: { 3056 MVT DestVT = VA.getLocVT(); 3057 MVT SrcVT = ArgVT; 3058 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true); 3059 if (!ArgReg) 3060 return false; 3061 break; 3062 } 3063 default: 3064 llvm_unreachable("Unknown arg promotion!"); 3065 } 3066 3067 // Now copy/store arg to correct locations. 3068 if (VA.isRegLoc() && !VA.needsCustom()) { 3069 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3070 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); 3071 CLI.OutRegs.push_back(VA.getLocReg()); 3072 } else if (VA.needsCustom()) { 3073 // FIXME: Handle custom args. 3074 return false; 3075 } else { 3076 assert(VA.isMemLoc() && "Assuming store on stack."); 3077 3078 // Don't emit stores for undef values. 3079 if (isa<UndefValue>(ArgVal)) 3080 continue; 3081 3082 // Need to store on the stack. 3083 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8; 3084 3085 unsigned BEAlign = 0; 3086 if (ArgSize < 8 && !Subtarget->isLittleEndian()) 3087 BEAlign = 8 - ArgSize; 3088 3089 Address Addr; 3090 Addr.setKind(Address::RegBase); 3091 Addr.setReg(AArch64::SP); 3092 Addr.setOffset(VA.getLocMemOffset() + BEAlign); 3093 3094 Align Alignment = DL.getABITypeAlign(ArgVal->getType()); 3095 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( 3096 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()), 3097 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); 3098 3099 if (!emitStore(ArgVT, ArgReg, Addr, MMO)) 3100 return false; 3101 } 3102 } 3103 return true; 3104 } 3105 3106 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) { 3107 CallingConv::ID CC = CLI.CallConv; 3108 3109 // Issue CALLSEQ_END 3110 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); 3111 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp)) 3112 .addImm(NumBytes).addImm(0); 3113 3114 // Now the return values. 3115 SmallVector<CCValAssign, 16> RVLocs; 3116 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); 3117 CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC)); 3118 3119 Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy); 3120 for (unsigned i = 0; i != RVLocs.size(); ++i) { 3121 CCValAssign &VA = RVLocs[i]; 3122 MVT CopyVT = VA.getValVT(); 3123 unsigned CopyReg = ResultReg + i; 3124 3125 // TODO: Handle big-endian results 3126 if (CopyVT.isVector() && !Subtarget->isLittleEndian()) 3127 return false; 3128 3129 // Copy result out of their specified physreg. 3130 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), 3131 CopyReg) 3132 .addReg(VA.getLocReg()); 3133 CLI.InRegs.push_back(VA.getLocReg()); 3134 } 3135 3136 CLI.ResultReg = ResultReg; 3137 CLI.NumResultRegs = RVLocs.size(); 3138 3139 return true; 3140 } 3141 3142 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { 3143 CallingConv::ID CC = CLI.CallConv; 3144 bool IsTailCall = CLI.IsTailCall; 3145 bool IsVarArg = CLI.IsVarArg; 3146 const Value *Callee = CLI.Callee; 3147 MCSymbol *Symbol = CLI.Symbol; 3148 3149 if (!Callee && !Symbol) 3150 return false; 3151 3152 // Allow SelectionDAG isel to handle calls to functions like setjmp that need 3153 // a bti instruction following the call. 3154 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) && 3155 !Subtarget->noBTIAtReturnTwice() && 3156 MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) 3157 return false; 3158 3159 // Allow SelectionDAG isel to handle indirect calls with KCFI checks. 3160 if (CLI.CB && CLI.CB->isIndirectCall() && 3161 CLI.CB->getOperandBundle(LLVMContext::OB_kcfi)) 3162 return false; 3163 3164 // Allow SelectionDAG isel to handle tail calls. 3165 if (IsTailCall) 3166 return false; 3167 3168 // FIXME: we could and should support this, but for now correctness at -O0 is 3169 // more important. 3170 if (Subtarget->isTargetILP32()) 3171 return false; 3172 3173 CodeModel::Model CM = TM.getCodeModel(); 3174 // Only support the small-addressing and large code models. 3175 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing()) 3176 return false; 3177 3178 // FIXME: Add large code model support for ELF. 3179 if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) 3180 return false; 3181 3182 // ELF -fno-plt compiled intrinsic calls do not have the nonlazybind 3183 // attribute. Check "RtLibUseGOT" instead. 3184 if (MF->getFunction().getParent()->getRtLibUseGOT()) 3185 return false; 3186 3187 // Let SDISel handle vararg functions. 3188 if (IsVarArg) 3189 return false; 3190 3191 if (Subtarget->isWindowsArm64EC()) 3192 return false; 3193 3194 for (auto Flag : CLI.OutFlags) 3195 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() || 3196 Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError()) 3197 return false; 3198 3199 // Set up the argument vectors. 3200 SmallVector<MVT, 16> OutVTs; 3201 OutVTs.reserve(CLI.OutVals.size()); 3202 3203 for (auto *Val : CLI.OutVals) { 3204 MVT VT; 3205 if (!isTypeLegal(Val->getType(), VT) && 3206 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) 3207 return false; 3208 3209 // We don't handle vector parameters yet. 3210 if (VT.isVector() || VT.getSizeInBits() > 64) 3211 return false; 3212 3213 OutVTs.push_back(VT); 3214 } 3215 3216 Address Addr; 3217 if (Callee && !computeCallAddress(Callee, Addr)) 3218 return false; 3219 3220 // The weak function target may be zero; in that case we must use indirect 3221 // addressing via a stub on windows as it may be out of range for a 3222 // PC-relative jump. 3223 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() && 3224 Addr.getGlobalValue()->hasExternalWeakLinkage()) 3225 return false; 3226 3227 // Handle the arguments now that we've gotten them. 3228 unsigned NumBytes; 3229 if (!processCallArgs(CLI, OutVTs, NumBytes)) 3230 return false; 3231 3232 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 3233 if (RegInfo->isAnyArgRegReserved(*MF)) 3234 RegInfo->emitReservedArgRegCallError(*MF); 3235 3236 // Issue the call. 3237 MachineInstrBuilder MIB; 3238 if (Subtarget->useSmallAddressing()) { 3239 const MCInstrDesc &II = 3240 TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL); 3241 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II); 3242 if (Symbol) 3243 MIB.addSym(Symbol, 0); 3244 else if (Addr.getGlobalValue()) 3245 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0); 3246 else if (Addr.getReg()) { 3247 Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0); 3248 MIB.addReg(Reg); 3249 } else 3250 return false; 3251 } else { 3252 unsigned CallReg = 0; 3253 if (Symbol) { 3254 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 3255 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), 3256 ADRPReg) 3257 .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE); 3258 3259 CallReg = createResultReg(&AArch64::GPR64RegClass); 3260 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3261 TII.get(AArch64::LDRXui), CallReg) 3262 .addReg(ADRPReg) 3263 .addSym(Symbol, 3264 AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 3265 } else if (Addr.getGlobalValue()) 3266 CallReg = materializeGV(Addr.getGlobalValue()); 3267 else if (Addr.getReg()) 3268 CallReg = Addr.getReg(); 3269 3270 if (!CallReg) 3271 return false; 3272 3273 const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF)); 3274 CallReg = constrainOperandRegClass(II, CallReg, 0); 3275 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg); 3276 } 3277 3278 // Add implicit physical register uses to the call. 3279 for (auto Reg : CLI.OutRegs) 3280 MIB.addReg(Reg, RegState::Implicit); 3281 3282 // Add a register mask with the call-preserved registers. 3283 // Proper defs for return values will be added by setPhysRegsDeadExcept(). 3284 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); 3285 3286 CLI.Call = MIB; 3287 3288 // Finish off the call including any return values. 3289 return finishCall(CLI, NumBytes); 3290 } 3291 3292 bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) { 3293 if (Alignment) 3294 return Len / Alignment->value() <= 4; 3295 else 3296 return Len < 32; 3297 } 3298 3299 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src, 3300 uint64_t Len, MaybeAlign Alignment) { 3301 // Make sure we don't bloat code by inlining very large memcpy's. 3302 if (!isMemCpySmall(Len, Alignment)) 3303 return false; 3304 3305 int64_t UnscaledOffset = 0; 3306 Address OrigDest = Dest; 3307 Address OrigSrc = Src; 3308 3309 while (Len) { 3310 MVT VT; 3311 if (!Alignment || *Alignment >= 8) { 3312 if (Len >= 8) 3313 VT = MVT::i64; 3314 else if (Len >= 4) 3315 VT = MVT::i32; 3316 else if (Len >= 2) 3317 VT = MVT::i16; 3318 else { 3319 VT = MVT::i8; 3320 } 3321 } else { 3322 assert(Alignment && "Alignment is set in this branch"); 3323 // Bound based on alignment. 3324 if (Len >= 4 && *Alignment == 4) 3325 VT = MVT::i32; 3326 else if (Len >= 2 && *Alignment == 2) 3327 VT = MVT::i16; 3328 else { 3329 VT = MVT::i8; 3330 } 3331 } 3332 3333 unsigned ResultReg = emitLoad(VT, VT, Src); 3334 if (!ResultReg) 3335 return false; 3336 3337 if (!emitStore(VT, ResultReg, Dest)) 3338 return false; 3339 3340 int64_t Size = VT.getSizeInBits() / 8; 3341 Len -= Size; 3342 UnscaledOffset += Size; 3343 3344 // We need to recompute the unscaled offset for each iteration. 3345 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset); 3346 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset); 3347 } 3348 3349 return true; 3350 } 3351 3352 /// Check if it is possible to fold the condition from the XALU intrinsic 3353 /// into the user. The condition code will only be updated on success. 3354 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, 3355 const Instruction *I, 3356 const Value *Cond) { 3357 if (!isa<ExtractValueInst>(Cond)) 3358 return false; 3359 3360 const auto *EV = cast<ExtractValueInst>(Cond); 3361 if (!isa<IntrinsicInst>(EV->getAggregateOperand())) 3362 return false; 3363 3364 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand()); 3365 MVT RetVT; 3366 const Function *Callee = II->getCalledFunction(); 3367 Type *RetTy = 3368 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U); 3369 if (!isTypeLegal(RetTy, RetVT)) 3370 return false; 3371 3372 if (RetVT != MVT::i32 && RetVT != MVT::i64) 3373 return false; 3374 3375 const Value *LHS = II->getArgOperand(0); 3376 const Value *RHS = II->getArgOperand(1); 3377 3378 // Canonicalize immediate to the RHS. 3379 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative()) 3380 std::swap(LHS, RHS); 3381 3382 // Simplify multiplies. 3383 Intrinsic::ID IID = II->getIntrinsicID(); 3384 switch (IID) { 3385 default: 3386 break; 3387 case Intrinsic::smul_with_overflow: 3388 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3389 if (C->getValue() == 2) 3390 IID = Intrinsic::sadd_with_overflow; 3391 break; 3392 case Intrinsic::umul_with_overflow: 3393 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3394 if (C->getValue() == 2) 3395 IID = Intrinsic::uadd_with_overflow; 3396 break; 3397 } 3398 3399 AArch64CC::CondCode TmpCC; 3400 switch (IID) { 3401 default: 3402 return false; 3403 case Intrinsic::sadd_with_overflow: 3404 case Intrinsic::ssub_with_overflow: 3405 TmpCC = AArch64CC::VS; 3406 break; 3407 case Intrinsic::uadd_with_overflow: 3408 TmpCC = AArch64CC::HS; 3409 break; 3410 case Intrinsic::usub_with_overflow: 3411 TmpCC = AArch64CC::LO; 3412 break; 3413 case Intrinsic::smul_with_overflow: 3414 case Intrinsic::umul_with_overflow: 3415 TmpCC = AArch64CC::NE; 3416 break; 3417 } 3418 3419 // Check if both instructions are in the same basic block. 3420 if (!isValueAvailable(II)) 3421 return false; 3422 3423 // Make sure nothing is in the way 3424 BasicBlock::const_iterator Start(I); 3425 BasicBlock::const_iterator End(II); 3426 for (auto Itr = std::prev(Start); Itr != End; --Itr) { 3427 // We only expect extractvalue instructions between the intrinsic and the 3428 // instruction to be selected. 3429 if (!isa<ExtractValueInst>(Itr)) 3430 return false; 3431 3432 // Check that the extractvalue operand comes from the intrinsic. 3433 const auto *EVI = cast<ExtractValueInst>(Itr); 3434 if (EVI->getAggregateOperand() != II) 3435 return false; 3436 } 3437 3438 CC = TmpCC; 3439 return true; 3440 } 3441 3442 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { 3443 // FIXME: Handle more intrinsics. 3444 switch (II->getIntrinsicID()) { 3445 default: return false; 3446 case Intrinsic::frameaddress: { 3447 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); 3448 MFI.setFrameAddressIsTaken(true); 3449 3450 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 3451 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); 3452 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3453 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3454 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr); 3455 // Recursively load frame address 3456 // ldr x0, [fp] 3457 // ldr x0, [x0] 3458 // ldr x0, [x0] 3459 // ... 3460 unsigned DestReg; 3461 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue(); 3462 while (Depth--) { 3463 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass, 3464 SrcReg, 0); 3465 assert(DestReg && "Unexpected LDR instruction emission failure."); 3466 SrcReg = DestReg; 3467 } 3468 3469 updateValueMap(II, SrcReg); 3470 return true; 3471 } 3472 case Intrinsic::sponentry: { 3473 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); 3474 3475 // SP = FP + Fixed Object + 16 3476 int FI = MFI.CreateFixedObject(4, 0, false); 3477 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); 3478 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3479 TII.get(AArch64::ADDXri), ResultReg) 3480 .addFrameIndex(FI) 3481 .addImm(0) 3482 .addImm(0); 3483 3484 updateValueMap(II, ResultReg); 3485 return true; 3486 } 3487 case Intrinsic::memcpy: 3488 case Intrinsic::memmove: { 3489 const auto *MTI = cast<MemTransferInst>(II); 3490 // Don't handle volatile. 3491 if (MTI->isVolatile()) 3492 return false; 3493 3494 // Disable inlining for memmove before calls to ComputeAddress. Otherwise, 3495 // we would emit dead code because we don't currently handle memmoves. 3496 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy); 3497 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) { 3498 // Small memcpy's are common enough that we want to do them without a call 3499 // if possible. 3500 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue(); 3501 MaybeAlign Alignment; 3502 if (MTI->getDestAlign() || MTI->getSourceAlign()) 3503 Alignment = std::min(MTI->getDestAlign().valueOrOne(), 3504 MTI->getSourceAlign().valueOrOne()); 3505 if (isMemCpySmall(Len, Alignment)) { 3506 Address Dest, Src; 3507 if (!computeAddress(MTI->getRawDest(), Dest) || 3508 !computeAddress(MTI->getRawSource(), Src)) 3509 return false; 3510 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment)) 3511 return true; 3512 } 3513 } 3514 3515 if (!MTI->getLength()->getType()->isIntegerTy(64)) 3516 return false; 3517 3518 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255) 3519 // Fast instruction selection doesn't support the special 3520 // address spaces. 3521 return false; 3522 3523 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove"; 3524 return lowerCallTo(II, IntrMemName, II->arg_size() - 1); 3525 } 3526 case Intrinsic::memset: { 3527 const MemSetInst *MSI = cast<MemSetInst>(II); 3528 // Don't handle volatile. 3529 if (MSI->isVolatile()) 3530 return false; 3531 3532 if (!MSI->getLength()->getType()->isIntegerTy(64)) 3533 return false; 3534 3535 if (MSI->getDestAddressSpace() > 255) 3536 // Fast instruction selection doesn't support the special 3537 // address spaces. 3538 return false; 3539 3540 return lowerCallTo(II, "memset", II->arg_size() - 1); 3541 } 3542 case Intrinsic::sin: 3543 case Intrinsic::cos: 3544 case Intrinsic::tan: 3545 case Intrinsic::pow: { 3546 MVT RetVT; 3547 if (!isTypeLegal(II->getType(), RetVT)) 3548 return false; 3549 3550 if (RetVT != MVT::f32 && RetVT != MVT::f64) 3551 return false; 3552 3553 static const RTLIB::Libcall LibCallTable[4][2] = { 3554 {RTLIB::SIN_F32, RTLIB::SIN_F64}, 3555 {RTLIB::COS_F32, RTLIB::COS_F64}, 3556 {RTLIB::TAN_F32, RTLIB::TAN_F64}, 3557 {RTLIB::POW_F32, RTLIB::POW_F64}}; 3558 RTLIB::Libcall LC; 3559 bool Is64Bit = RetVT == MVT::f64; 3560 switch (II->getIntrinsicID()) { 3561 default: 3562 llvm_unreachable("Unexpected intrinsic."); 3563 case Intrinsic::sin: 3564 LC = LibCallTable[0][Is64Bit]; 3565 break; 3566 case Intrinsic::cos: 3567 LC = LibCallTable[1][Is64Bit]; 3568 break; 3569 case Intrinsic::tan: 3570 LC = LibCallTable[2][Is64Bit]; 3571 break; 3572 case Intrinsic::pow: 3573 LC = LibCallTable[3][Is64Bit]; 3574 break; 3575 } 3576 3577 ArgListTy Args; 3578 Args.reserve(II->arg_size()); 3579 3580 // Populate the argument list. 3581 for (auto &Arg : II->args()) { 3582 ArgListEntry Entry; 3583 Entry.Val = Arg; 3584 Entry.Ty = Arg->getType(); 3585 Args.push_back(Entry); 3586 } 3587 3588 CallLoweringInfo CLI; 3589 MCContext &Ctx = MF->getContext(); 3590 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(), 3591 TLI.getLibcallName(LC), std::move(Args)); 3592 if (!lowerCallTo(CLI)) 3593 return false; 3594 updateValueMap(II, CLI.ResultReg); 3595 return true; 3596 } 3597 case Intrinsic::fabs: { 3598 MVT VT; 3599 if (!isTypeLegal(II->getType(), VT)) 3600 return false; 3601 3602 unsigned Opc; 3603 switch (VT.SimpleTy) { 3604 default: 3605 return false; 3606 case MVT::f32: 3607 Opc = AArch64::FABSSr; 3608 break; 3609 case MVT::f64: 3610 Opc = AArch64::FABSDr; 3611 break; 3612 } 3613 Register SrcReg = getRegForValue(II->getOperand(0)); 3614 if (!SrcReg) 3615 return false; 3616 Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 3617 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg) 3618 .addReg(SrcReg); 3619 updateValueMap(II, ResultReg); 3620 return true; 3621 } 3622 case Intrinsic::trap: 3623 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK)) 3624 .addImm(1); 3625 return true; 3626 case Intrinsic::debugtrap: 3627 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK)) 3628 .addImm(0xF000); 3629 return true; 3630 3631 case Intrinsic::sqrt: { 3632 Type *RetTy = II->getCalledFunction()->getReturnType(); 3633 3634 MVT VT; 3635 if (!isTypeLegal(RetTy, VT)) 3636 return false; 3637 3638 Register Op0Reg = getRegForValue(II->getOperand(0)); 3639 if (!Op0Reg) 3640 return false; 3641 3642 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg); 3643 if (!ResultReg) 3644 return false; 3645 3646 updateValueMap(II, ResultReg); 3647 return true; 3648 } 3649 case Intrinsic::sadd_with_overflow: 3650 case Intrinsic::uadd_with_overflow: 3651 case Intrinsic::ssub_with_overflow: 3652 case Intrinsic::usub_with_overflow: 3653 case Intrinsic::smul_with_overflow: 3654 case Intrinsic::umul_with_overflow: { 3655 // This implements the basic lowering of the xalu with overflow intrinsics. 3656 const Function *Callee = II->getCalledFunction(); 3657 auto *Ty = cast<StructType>(Callee->getReturnType()); 3658 Type *RetTy = Ty->getTypeAtIndex(0U); 3659 3660 MVT VT; 3661 if (!isTypeLegal(RetTy, VT)) 3662 return false; 3663 3664 if (VT != MVT::i32 && VT != MVT::i64) 3665 return false; 3666 3667 const Value *LHS = II->getArgOperand(0); 3668 const Value *RHS = II->getArgOperand(1); 3669 // Canonicalize immediate to the RHS. 3670 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative()) 3671 std::swap(LHS, RHS); 3672 3673 // Simplify multiplies. 3674 Intrinsic::ID IID = II->getIntrinsicID(); 3675 switch (IID) { 3676 default: 3677 break; 3678 case Intrinsic::smul_with_overflow: 3679 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3680 if (C->getValue() == 2) { 3681 IID = Intrinsic::sadd_with_overflow; 3682 RHS = LHS; 3683 } 3684 break; 3685 case Intrinsic::umul_with_overflow: 3686 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3687 if (C->getValue() == 2) { 3688 IID = Intrinsic::uadd_with_overflow; 3689 RHS = LHS; 3690 } 3691 break; 3692 } 3693 3694 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0; 3695 AArch64CC::CondCode CC = AArch64CC::Invalid; 3696 switch (IID) { 3697 default: llvm_unreachable("Unexpected intrinsic!"); 3698 case Intrinsic::sadd_with_overflow: 3699 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3700 CC = AArch64CC::VS; 3701 break; 3702 case Intrinsic::uadd_with_overflow: 3703 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3704 CC = AArch64CC::HS; 3705 break; 3706 case Intrinsic::ssub_with_overflow: 3707 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3708 CC = AArch64CC::VS; 3709 break; 3710 case Intrinsic::usub_with_overflow: 3711 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3712 CC = AArch64CC::LO; 3713 break; 3714 case Intrinsic::smul_with_overflow: { 3715 CC = AArch64CC::NE; 3716 Register LHSReg = getRegForValue(LHS); 3717 if (!LHSReg) 3718 return false; 3719 3720 Register RHSReg = getRegForValue(RHS); 3721 if (!RHSReg) 3722 return false; 3723 3724 if (VT == MVT::i32) { 3725 MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg); 3726 Register MulSubReg = 3727 fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32); 3728 // cmp xreg, wreg, sxtw 3729 emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg, 3730 AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true, 3731 /*WantResult=*/false); 3732 MulReg = MulSubReg; 3733 } else { 3734 assert(VT == MVT::i64 && "Unexpected value type."); 3735 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3736 // reused in the next instruction. 3737 MulReg = emitMul_rr(VT, LHSReg, RHSReg); 3738 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg); 3739 emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63, 3740 /*WantResult=*/false); 3741 } 3742 break; 3743 } 3744 case Intrinsic::umul_with_overflow: { 3745 CC = AArch64CC::NE; 3746 Register LHSReg = getRegForValue(LHS); 3747 if (!LHSReg) 3748 return false; 3749 3750 Register RHSReg = getRegForValue(RHS); 3751 if (!RHSReg) 3752 return false; 3753 3754 if (VT == MVT::i32) { 3755 MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg); 3756 // tst xreg, #0xffffffff00000000 3757 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3758 TII.get(AArch64::ANDSXri), AArch64::XZR) 3759 .addReg(MulReg) 3760 .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64)); 3761 MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32); 3762 } else { 3763 assert(VT == MVT::i64 && "Unexpected value type."); 3764 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3765 // reused in the next instruction. 3766 MulReg = emitMul_rr(VT, LHSReg, RHSReg); 3767 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg); 3768 emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false); 3769 } 3770 break; 3771 } 3772 } 3773 3774 if (MulReg) { 3775 ResultReg1 = createResultReg(TLI.getRegClassFor(VT)); 3776 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3777 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg); 3778 } 3779 3780 if (!ResultReg1) 3781 return false; 3782 3783 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass, 3784 AArch64::WZR, AArch64::WZR, 3785 getInvertedCondCode(CC)); 3786 (void)ResultReg2; 3787 assert((ResultReg1 + 1) == ResultReg2 && 3788 "Nonconsecutive result registers."); 3789 updateValueMap(II, ResultReg1, 2); 3790 return true; 3791 } 3792 case Intrinsic::aarch64_crc32b: 3793 case Intrinsic::aarch64_crc32h: 3794 case Intrinsic::aarch64_crc32w: 3795 case Intrinsic::aarch64_crc32x: 3796 case Intrinsic::aarch64_crc32cb: 3797 case Intrinsic::aarch64_crc32ch: 3798 case Intrinsic::aarch64_crc32cw: 3799 case Intrinsic::aarch64_crc32cx: { 3800 if (!Subtarget->hasCRC()) 3801 return false; 3802 3803 unsigned Opc; 3804 switch (II->getIntrinsicID()) { 3805 default: 3806 llvm_unreachable("Unexpected intrinsic!"); 3807 case Intrinsic::aarch64_crc32b: 3808 Opc = AArch64::CRC32Brr; 3809 break; 3810 case Intrinsic::aarch64_crc32h: 3811 Opc = AArch64::CRC32Hrr; 3812 break; 3813 case Intrinsic::aarch64_crc32w: 3814 Opc = AArch64::CRC32Wrr; 3815 break; 3816 case Intrinsic::aarch64_crc32x: 3817 Opc = AArch64::CRC32Xrr; 3818 break; 3819 case Intrinsic::aarch64_crc32cb: 3820 Opc = AArch64::CRC32CBrr; 3821 break; 3822 case Intrinsic::aarch64_crc32ch: 3823 Opc = AArch64::CRC32CHrr; 3824 break; 3825 case Intrinsic::aarch64_crc32cw: 3826 Opc = AArch64::CRC32CWrr; 3827 break; 3828 case Intrinsic::aarch64_crc32cx: 3829 Opc = AArch64::CRC32CXrr; 3830 break; 3831 } 3832 3833 Register LHSReg = getRegForValue(II->getArgOperand(0)); 3834 Register RHSReg = getRegForValue(II->getArgOperand(1)); 3835 if (!LHSReg || !RHSReg) 3836 return false; 3837 3838 Register ResultReg = 3839 fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg); 3840 updateValueMap(II, ResultReg); 3841 return true; 3842 } 3843 } 3844 return false; 3845 } 3846 3847 bool AArch64FastISel::selectRet(const Instruction *I) { 3848 const ReturnInst *Ret = cast<ReturnInst>(I); 3849 const Function &F = *I->getParent()->getParent(); 3850 3851 if (!FuncInfo.CanLowerReturn) 3852 return false; 3853 3854 if (F.isVarArg()) 3855 return false; 3856 3857 if (TLI.supportSwiftError() && 3858 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError)) 3859 return false; 3860 3861 if (TLI.supportSplitCSR(FuncInfo.MF)) 3862 return false; 3863 3864 // Build a list of return value registers. 3865 SmallVector<unsigned, 4> RetRegs; 3866 3867 if (Ret->getNumOperands() > 0) { 3868 CallingConv::ID CC = F.getCallingConv(); 3869 SmallVector<ISD::OutputArg, 4> Outs; 3870 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL); 3871 3872 // Analyze operands of the call, assigning locations to each operand. 3873 SmallVector<CCValAssign, 16> ValLocs; 3874 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); 3875 CCInfo.AnalyzeReturn(Outs, RetCC_AArch64_AAPCS); 3876 3877 // Only handle a single return value for now. 3878 if (ValLocs.size() != 1) 3879 return false; 3880 3881 CCValAssign &VA = ValLocs[0]; 3882 const Value *RV = Ret->getOperand(0); 3883 3884 // Don't bother handling odd stuff for now. 3885 if ((VA.getLocInfo() != CCValAssign::Full) && 3886 (VA.getLocInfo() != CCValAssign::BCvt)) 3887 return false; 3888 3889 // Only handle register returns for now. 3890 if (!VA.isRegLoc()) 3891 return false; 3892 3893 Register Reg = getRegForValue(RV); 3894 if (Reg == 0) 3895 return false; 3896 3897 unsigned SrcReg = Reg + VA.getValNo(); 3898 Register DestReg = VA.getLocReg(); 3899 // Avoid a cross-class copy. This is very unlikely. 3900 if (!MRI.getRegClass(SrcReg)->contains(DestReg)) 3901 return false; 3902 3903 EVT RVEVT = TLI.getValueType(DL, RV->getType()); 3904 if (!RVEVT.isSimple()) 3905 return false; 3906 3907 // Vectors (of > 1 lane) in big endian need tricky handling. 3908 if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() && 3909 !Subtarget->isLittleEndian()) 3910 return false; 3911 3912 MVT RVVT = RVEVT.getSimpleVT(); 3913 if (RVVT == MVT::f128) 3914 return false; 3915 3916 MVT DestVT = VA.getValVT(); 3917 // Special handling for extended integers. 3918 if (RVVT != DestVT) { 3919 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) 3920 return false; 3921 3922 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) 3923 return false; 3924 3925 bool IsZExt = Outs[0].Flags.isZExt(); 3926 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt); 3927 if (SrcReg == 0) 3928 return false; 3929 } 3930 3931 // "Callee" (i.e. value producer) zero extends pointers at function 3932 // boundary. 3933 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy()) 3934 SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff); 3935 3936 // Make the copy. 3937 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3938 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg); 3939 3940 // Add register to return instruction. 3941 RetRegs.push_back(VA.getLocReg()); 3942 } 3943 3944 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3945 TII.get(AArch64::RET_ReallyLR)); 3946 for (unsigned RetReg : RetRegs) 3947 MIB.addReg(RetReg, RegState::Implicit); 3948 return true; 3949 } 3950 3951 bool AArch64FastISel::selectTrunc(const Instruction *I) { 3952 Type *DestTy = I->getType(); 3953 Value *Op = I->getOperand(0); 3954 Type *SrcTy = Op->getType(); 3955 3956 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true); 3957 EVT DestEVT = TLI.getValueType(DL, DestTy, true); 3958 if (!SrcEVT.isSimple()) 3959 return false; 3960 if (!DestEVT.isSimple()) 3961 return false; 3962 3963 MVT SrcVT = SrcEVT.getSimpleVT(); 3964 MVT DestVT = DestEVT.getSimpleVT(); 3965 3966 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && 3967 SrcVT != MVT::i8) 3968 return false; 3969 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 && 3970 DestVT != MVT::i1) 3971 return false; 3972 3973 Register SrcReg = getRegForValue(Op); 3974 if (!SrcReg) 3975 return false; 3976 3977 // If we're truncating from i64 to a smaller non-legal type then generate an 3978 // AND. Otherwise, we know the high bits are undefined and a truncate only 3979 // generate a COPY. We cannot mark the source register also as result 3980 // register, because this can incorrectly transfer the kill flag onto the 3981 // source register. 3982 unsigned ResultReg; 3983 if (SrcVT == MVT::i64) { 3984 uint64_t Mask = 0; 3985 switch (DestVT.SimpleTy) { 3986 default: 3987 // Trunc i64 to i32 is handled by the target-independent fast-isel. 3988 return false; 3989 case MVT::i1: 3990 Mask = 0x1; 3991 break; 3992 case MVT::i8: 3993 Mask = 0xff; 3994 break; 3995 case MVT::i16: 3996 Mask = 0xffff; 3997 break; 3998 } 3999 // Issue an extract_subreg to get the lower 32-bits. 4000 Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, 4001 AArch64::sub_32); 4002 // Create the AND instruction which performs the actual truncation. 4003 ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask); 4004 assert(ResultReg && "Unexpected AND instruction emission failure."); 4005 } else { 4006 ResultReg = createResultReg(&AArch64::GPR32RegClass); 4007 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4008 TII.get(TargetOpcode::COPY), ResultReg) 4009 .addReg(SrcReg); 4010 } 4011 4012 updateValueMap(I, ResultReg); 4013 return true; 4014 } 4015 4016 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) { 4017 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 || 4018 DestVT == MVT::i64) && 4019 "Unexpected value type."); 4020 // Handle i8 and i16 as i32. 4021 if (DestVT == MVT::i8 || DestVT == MVT::i16) 4022 DestVT = MVT::i32; 4023 4024 if (IsZExt) { 4025 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1); 4026 assert(ResultReg && "Unexpected AND instruction emission failure."); 4027 if (DestVT == MVT::i64) { 4028 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the 4029 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. 4030 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 4031 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4032 TII.get(AArch64::SUBREG_TO_REG), Reg64) 4033 .addImm(0) 4034 .addReg(ResultReg) 4035 .addImm(AArch64::sub_32); 4036 ResultReg = Reg64; 4037 } 4038 return ResultReg; 4039 } else { 4040 if (DestVT == MVT::i64) { 4041 // FIXME: We're SExt i1 to i64. 4042 return 0; 4043 } 4044 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg, 4045 0, 0); 4046 } 4047 } 4048 4049 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) { 4050 unsigned Opc, ZReg; 4051 switch (RetVT.SimpleTy) { 4052 default: return 0; 4053 case MVT::i8: 4054 case MVT::i16: 4055 case MVT::i32: 4056 RetVT = MVT::i32; 4057 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break; 4058 case MVT::i64: 4059 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break; 4060 } 4061 4062 const TargetRegisterClass *RC = 4063 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4064 return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg); 4065 } 4066 4067 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) { 4068 if (RetVT != MVT::i64) 4069 return 0; 4070 4071 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass, 4072 Op0, Op1, AArch64::XZR); 4073 } 4074 4075 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) { 4076 if (RetVT != MVT::i64) 4077 return 0; 4078 4079 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass, 4080 Op0, Op1, AArch64::XZR); 4081 } 4082 4083 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, 4084 unsigned Op1Reg) { 4085 unsigned Opc = 0; 4086 bool NeedTrunc = false; 4087 uint64_t Mask = 0; 4088 switch (RetVT.SimpleTy) { 4089 default: return 0; 4090 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break; 4091 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break; 4092 case MVT::i32: Opc = AArch64::LSLVWr; break; 4093 case MVT::i64: Opc = AArch64::LSLVXr; break; 4094 } 4095 4096 const TargetRegisterClass *RC = 4097 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4098 if (NeedTrunc) 4099 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); 4100 4101 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg); 4102 if (NeedTrunc) 4103 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 4104 return ResultReg; 4105 } 4106 4107 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4108 uint64_t Shift, bool IsZExt) { 4109 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4110 "Unexpected source/return type pair."); 4111 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4112 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4113 "Unexpected source value type."); 4114 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4115 RetVT == MVT::i64) && "Unexpected return value type."); 4116 4117 bool Is64Bit = (RetVT == MVT::i64); 4118 unsigned RegSize = Is64Bit ? 64 : 32; 4119 unsigned DstBits = RetVT.getSizeInBits(); 4120 unsigned SrcBits = SrcVT.getSizeInBits(); 4121 const TargetRegisterClass *RC = 4122 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4123 4124 // Just emit a copy for "zero" shifts. 4125 if (Shift == 0) { 4126 if (RetVT == SrcVT) { 4127 Register ResultReg = createResultReg(RC); 4128 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4129 TII.get(TargetOpcode::COPY), ResultReg) 4130 .addReg(Op0); 4131 return ResultReg; 4132 } else 4133 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4134 } 4135 4136 // Don't deal with undefined shifts. 4137 if (Shift >= DstBits) 4138 return 0; 4139 4140 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4141 // {S|U}BFM Wd, Wn, #r, #s 4142 // Wd<32+s-r,32-r> = Wn<s:0> when r > s 4143 4144 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4145 // %2 = shl i16 %1, 4 4146 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7 4147 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext 4148 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext 4149 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext 4150 4151 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4152 // %2 = shl i16 %1, 8 4153 // Wd<32+7-24,32-24> = Wn<7:0> 4154 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext 4155 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext 4156 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext 4157 4158 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4159 // %2 = shl i16 %1, 12 4160 // Wd<32+3-20,32-20> = Wn<3:0> 4161 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext 4162 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext 4163 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext 4164 4165 unsigned ImmR = RegSize - Shift; 4166 // Limit the width to the length of the source type. 4167 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift); 4168 static const unsigned OpcTable[2][2] = { 4169 {AArch64::SBFMWri, AArch64::SBFMXri}, 4170 {AArch64::UBFMWri, AArch64::UBFMXri} 4171 }; 4172 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4173 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4174 Register TmpReg = MRI.createVirtualRegister(RC); 4175 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4176 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4177 .addImm(0) 4178 .addReg(Op0) 4179 .addImm(AArch64::sub_32); 4180 Op0 = TmpReg; 4181 } 4182 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS); 4183 } 4184 4185 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, 4186 unsigned Op1Reg) { 4187 unsigned Opc = 0; 4188 bool NeedTrunc = false; 4189 uint64_t Mask = 0; 4190 switch (RetVT.SimpleTy) { 4191 default: return 0; 4192 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break; 4193 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break; 4194 case MVT::i32: Opc = AArch64::LSRVWr; break; 4195 case MVT::i64: Opc = AArch64::LSRVXr; break; 4196 } 4197 4198 const TargetRegisterClass *RC = 4199 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4200 if (NeedTrunc) { 4201 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask); 4202 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); 4203 } 4204 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg); 4205 if (NeedTrunc) 4206 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 4207 return ResultReg; 4208 } 4209 4210 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4211 uint64_t Shift, bool IsZExt) { 4212 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4213 "Unexpected source/return type pair."); 4214 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4215 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4216 "Unexpected source value type."); 4217 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4218 RetVT == MVT::i64) && "Unexpected return value type."); 4219 4220 bool Is64Bit = (RetVT == MVT::i64); 4221 unsigned RegSize = Is64Bit ? 64 : 32; 4222 unsigned DstBits = RetVT.getSizeInBits(); 4223 unsigned SrcBits = SrcVT.getSizeInBits(); 4224 const TargetRegisterClass *RC = 4225 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4226 4227 // Just emit a copy for "zero" shifts. 4228 if (Shift == 0) { 4229 if (RetVT == SrcVT) { 4230 Register ResultReg = createResultReg(RC); 4231 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4232 TII.get(TargetOpcode::COPY), ResultReg) 4233 .addReg(Op0); 4234 return ResultReg; 4235 } else 4236 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4237 } 4238 4239 // Don't deal with undefined shifts. 4240 if (Shift >= DstBits) 4241 return 0; 4242 4243 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4244 // {S|U}BFM Wd, Wn, #r, #s 4245 // Wd<s-r:0> = Wn<s:r> when r <= s 4246 4247 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4248 // %2 = lshr i16 %1, 4 4249 // Wd<7-4:0> = Wn<7:4> 4250 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext 4251 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4252 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4253 4254 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4255 // %2 = lshr i16 %1, 8 4256 // Wd<7-7,0> = Wn<7:7> 4257 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext 4258 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4259 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4260 4261 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4262 // %2 = lshr i16 %1, 12 4263 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4264 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext 4265 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4266 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4267 4268 if (Shift >= SrcBits && IsZExt) 4269 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4270 4271 // It is not possible to fold a sign-extend into the LShr instruction. In this 4272 // case emit a sign-extend. 4273 if (!IsZExt) { 4274 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4275 if (!Op0) 4276 return 0; 4277 SrcVT = RetVT; 4278 SrcBits = SrcVT.getSizeInBits(); 4279 IsZExt = true; 4280 } 4281 4282 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4283 unsigned ImmS = SrcBits - 1; 4284 static const unsigned OpcTable[2][2] = { 4285 {AArch64::SBFMWri, AArch64::SBFMXri}, 4286 {AArch64::UBFMWri, AArch64::UBFMXri} 4287 }; 4288 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4289 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4290 Register TmpReg = MRI.createVirtualRegister(RC); 4291 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4292 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4293 .addImm(0) 4294 .addReg(Op0) 4295 .addImm(AArch64::sub_32); 4296 Op0 = TmpReg; 4297 } 4298 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS); 4299 } 4300 4301 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, 4302 unsigned Op1Reg) { 4303 unsigned Opc = 0; 4304 bool NeedTrunc = false; 4305 uint64_t Mask = 0; 4306 switch (RetVT.SimpleTy) { 4307 default: return 0; 4308 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break; 4309 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break; 4310 case MVT::i32: Opc = AArch64::ASRVWr; break; 4311 case MVT::i64: Opc = AArch64::ASRVXr; break; 4312 } 4313 4314 const TargetRegisterClass *RC = 4315 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4316 if (NeedTrunc) { 4317 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false); 4318 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); 4319 } 4320 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg); 4321 if (NeedTrunc) 4322 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 4323 return ResultReg; 4324 } 4325 4326 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4327 uint64_t Shift, bool IsZExt) { 4328 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4329 "Unexpected source/return type pair."); 4330 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4331 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4332 "Unexpected source value type."); 4333 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4334 RetVT == MVT::i64) && "Unexpected return value type."); 4335 4336 bool Is64Bit = (RetVT == MVT::i64); 4337 unsigned RegSize = Is64Bit ? 64 : 32; 4338 unsigned DstBits = RetVT.getSizeInBits(); 4339 unsigned SrcBits = SrcVT.getSizeInBits(); 4340 const TargetRegisterClass *RC = 4341 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4342 4343 // Just emit a copy for "zero" shifts. 4344 if (Shift == 0) { 4345 if (RetVT == SrcVT) { 4346 Register ResultReg = createResultReg(RC); 4347 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4348 TII.get(TargetOpcode::COPY), ResultReg) 4349 .addReg(Op0); 4350 return ResultReg; 4351 } else 4352 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4353 } 4354 4355 // Don't deal with undefined shifts. 4356 if (Shift >= DstBits) 4357 return 0; 4358 4359 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4360 // {S|U}BFM Wd, Wn, #r, #s 4361 // Wd<s-r:0> = Wn<s:r> when r <= s 4362 4363 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4364 // %2 = ashr i16 %1, 4 4365 // Wd<7-4:0> = Wn<7:4> 4366 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext 4367 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4368 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4369 4370 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4371 // %2 = ashr i16 %1, 8 4372 // Wd<7-7,0> = Wn<7:7> 4373 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4374 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4375 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4376 4377 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4378 // %2 = ashr i16 %1, 12 4379 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4380 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4381 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4382 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4383 4384 if (Shift >= SrcBits && IsZExt) 4385 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4386 4387 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4388 unsigned ImmS = SrcBits - 1; 4389 static const unsigned OpcTable[2][2] = { 4390 {AArch64::SBFMWri, AArch64::SBFMXri}, 4391 {AArch64::UBFMWri, AArch64::UBFMXri} 4392 }; 4393 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4394 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4395 Register TmpReg = MRI.createVirtualRegister(RC); 4396 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4397 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4398 .addImm(0) 4399 .addReg(Op0) 4400 .addImm(AArch64::sub_32); 4401 Op0 = TmpReg; 4402 } 4403 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS); 4404 } 4405 4406 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, 4407 bool IsZExt) { 4408 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?"); 4409 4410 // FastISel does not have plumbing to deal with extensions where the SrcVT or 4411 // DestVT are odd things, so test to make sure that they are both types we can 4412 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise 4413 // bail out to SelectionDAG. 4414 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) && 4415 (DestVT != MVT::i32) && (DestVT != MVT::i64)) || 4416 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) && 4417 (SrcVT != MVT::i16) && (SrcVT != MVT::i32))) 4418 return 0; 4419 4420 unsigned Opc; 4421 unsigned Imm = 0; 4422 4423 switch (SrcVT.SimpleTy) { 4424 default: 4425 return 0; 4426 case MVT::i1: 4427 return emiti1Ext(SrcReg, DestVT, IsZExt); 4428 case MVT::i8: 4429 if (DestVT == MVT::i64) 4430 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4431 else 4432 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4433 Imm = 7; 4434 break; 4435 case MVT::i16: 4436 if (DestVT == MVT::i64) 4437 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4438 else 4439 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4440 Imm = 15; 4441 break; 4442 case MVT::i32: 4443 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?"); 4444 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4445 Imm = 31; 4446 break; 4447 } 4448 4449 // Handle i8 and i16 as i32. 4450 if (DestVT == MVT::i8 || DestVT == MVT::i16) 4451 DestVT = MVT::i32; 4452 else if (DestVT == MVT::i64) { 4453 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 4454 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4455 TII.get(AArch64::SUBREG_TO_REG), Src64) 4456 .addImm(0) 4457 .addReg(SrcReg) 4458 .addImm(AArch64::sub_32); 4459 SrcReg = Src64; 4460 } 4461 4462 const TargetRegisterClass *RC = 4463 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4464 return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm); 4465 } 4466 4467 static bool isZExtLoad(const MachineInstr *LI) { 4468 switch (LI->getOpcode()) { 4469 default: 4470 return false; 4471 case AArch64::LDURBBi: 4472 case AArch64::LDURHHi: 4473 case AArch64::LDURWi: 4474 case AArch64::LDRBBui: 4475 case AArch64::LDRHHui: 4476 case AArch64::LDRWui: 4477 case AArch64::LDRBBroX: 4478 case AArch64::LDRHHroX: 4479 case AArch64::LDRWroX: 4480 case AArch64::LDRBBroW: 4481 case AArch64::LDRHHroW: 4482 case AArch64::LDRWroW: 4483 return true; 4484 } 4485 } 4486 4487 static bool isSExtLoad(const MachineInstr *LI) { 4488 switch (LI->getOpcode()) { 4489 default: 4490 return false; 4491 case AArch64::LDURSBWi: 4492 case AArch64::LDURSHWi: 4493 case AArch64::LDURSBXi: 4494 case AArch64::LDURSHXi: 4495 case AArch64::LDURSWi: 4496 case AArch64::LDRSBWui: 4497 case AArch64::LDRSHWui: 4498 case AArch64::LDRSBXui: 4499 case AArch64::LDRSHXui: 4500 case AArch64::LDRSWui: 4501 case AArch64::LDRSBWroX: 4502 case AArch64::LDRSHWroX: 4503 case AArch64::LDRSBXroX: 4504 case AArch64::LDRSHXroX: 4505 case AArch64::LDRSWroX: 4506 case AArch64::LDRSBWroW: 4507 case AArch64::LDRSHWroW: 4508 case AArch64::LDRSBXroW: 4509 case AArch64::LDRSHXroW: 4510 case AArch64::LDRSWroW: 4511 return true; 4512 } 4513 } 4514 4515 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT, 4516 MVT SrcVT) { 4517 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)); 4518 if (!LI || !LI->hasOneUse()) 4519 return false; 4520 4521 // Check if the load instruction has already been selected. 4522 Register Reg = lookUpRegForValue(LI); 4523 if (!Reg) 4524 return false; 4525 4526 MachineInstr *MI = MRI.getUniqueVRegDef(Reg); 4527 if (!MI) 4528 return false; 4529 4530 // Check if the correct load instruction has been emitted - SelectionDAG might 4531 // have emitted a zero-extending load, but we need a sign-extending load. 4532 bool IsZExt = isa<ZExtInst>(I); 4533 const auto *LoadMI = MI; 4534 if (LoadMI->getOpcode() == TargetOpcode::COPY && 4535 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) { 4536 Register LoadReg = MI->getOperand(1).getReg(); 4537 LoadMI = MRI.getUniqueVRegDef(LoadReg); 4538 assert(LoadMI && "Expected valid instruction"); 4539 } 4540 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI))) 4541 return false; 4542 4543 // Nothing to be done. 4544 if (RetVT != MVT::i64 || SrcVT > MVT::i32) { 4545 updateValueMap(I, Reg); 4546 return true; 4547 } 4548 4549 if (IsZExt) { 4550 Register Reg64 = createResultReg(&AArch64::GPR64RegClass); 4551 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4552 TII.get(AArch64::SUBREG_TO_REG), Reg64) 4553 .addImm(0) 4554 .addReg(Reg, getKillRegState(true)) 4555 .addImm(AArch64::sub_32); 4556 Reg = Reg64; 4557 } else { 4558 assert((MI->getOpcode() == TargetOpcode::COPY && 4559 MI->getOperand(1).getSubReg() == AArch64::sub_32) && 4560 "Expected copy instruction"); 4561 Reg = MI->getOperand(1).getReg(); 4562 MachineBasicBlock::iterator I(MI); 4563 removeDeadCode(I, std::next(I)); 4564 } 4565 updateValueMap(I, Reg); 4566 return true; 4567 } 4568 4569 bool AArch64FastISel::selectIntExt(const Instruction *I) { 4570 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 4571 "Unexpected integer extend instruction."); 4572 MVT RetVT; 4573 MVT SrcVT; 4574 if (!isTypeSupported(I->getType(), RetVT)) 4575 return false; 4576 4577 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT)) 4578 return false; 4579 4580 // Try to optimize already sign-/zero-extended values from load instructions. 4581 if (optimizeIntExtLoad(I, RetVT, SrcVT)) 4582 return true; 4583 4584 Register SrcReg = getRegForValue(I->getOperand(0)); 4585 if (!SrcReg) 4586 return false; 4587 4588 // Try to optimize already sign-/zero-extended values from function arguments. 4589 bool IsZExt = isa<ZExtInst>(I); 4590 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) { 4591 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) { 4592 if (RetVT == MVT::i64 && SrcVT != MVT::i64) { 4593 Register ResultReg = createResultReg(&AArch64::GPR64RegClass); 4594 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4595 TII.get(AArch64::SUBREG_TO_REG), ResultReg) 4596 .addImm(0) 4597 .addReg(SrcReg) 4598 .addImm(AArch64::sub_32); 4599 SrcReg = ResultReg; 4600 } 4601 4602 updateValueMap(I, SrcReg); 4603 return true; 4604 } 4605 } 4606 4607 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt); 4608 if (!ResultReg) 4609 return false; 4610 4611 updateValueMap(I, ResultReg); 4612 return true; 4613 } 4614 4615 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) { 4616 EVT DestEVT = TLI.getValueType(DL, I->getType(), true); 4617 if (!DestEVT.isSimple()) 4618 return false; 4619 4620 MVT DestVT = DestEVT.getSimpleVT(); 4621 if (DestVT != MVT::i64 && DestVT != MVT::i32) 4622 return false; 4623 4624 unsigned DivOpc; 4625 bool Is64bit = (DestVT == MVT::i64); 4626 switch (ISDOpcode) { 4627 default: 4628 return false; 4629 case ISD::SREM: 4630 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr; 4631 break; 4632 case ISD::UREM: 4633 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr; 4634 break; 4635 } 4636 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr; 4637 Register Src0Reg = getRegForValue(I->getOperand(0)); 4638 if (!Src0Reg) 4639 return false; 4640 4641 Register Src1Reg = getRegForValue(I->getOperand(1)); 4642 if (!Src1Reg) 4643 return false; 4644 4645 const TargetRegisterClass *RC = 4646 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4647 Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg); 4648 assert(QuotReg && "Unexpected DIV instruction emission failure."); 4649 // The remainder is computed as numerator - (quotient * denominator) using the 4650 // MSUB instruction. 4651 Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg); 4652 updateValueMap(I, ResultReg); 4653 return true; 4654 } 4655 4656 bool AArch64FastISel::selectMul(const Instruction *I) { 4657 MVT VT; 4658 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 4659 return false; 4660 4661 if (VT.isVector()) 4662 return selectBinaryOp(I, ISD::MUL); 4663 4664 const Value *Src0 = I->getOperand(0); 4665 const Value *Src1 = I->getOperand(1); 4666 if (const auto *C = dyn_cast<ConstantInt>(Src0)) 4667 if (C->getValue().isPowerOf2()) 4668 std::swap(Src0, Src1); 4669 4670 // Try to simplify to a shift instruction. 4671 if (const auto *C = dyn_cast<ConstantInt>(Src1)) 4672 if (C->getValue().isPowerOf2()) { 4673 uint64_t ShiftVal = C->getValue().logBase2(); 4674 MVT SrcVT = VT; 4675 bool IsZExt = true; 4676 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) { 4677 if (!isIntExtFree(ZExt)) { 4678 MVT VT; 4679 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) { 4680 SrcVT = VT; 4681 IsZExt = true; 4682 Src0 = ZExt->getOperand(0); 4683 } 4684 } 4685 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) { 4686 if (!isIntExtFree(SExt)) { 4687 MVT VT; 4688 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) { 4689 SrcVT = VT; 4690 IsZExt = false; 4691 Src0 = SExt->getOperand(0); 4692 } 4693 } 4694 } 4695 4696 Register Src0Reg = getRegForValue(Src0); 4697 if (!Src0Reg) 4698 return false; 4699 4700 unsigned ResultReg = 4701 emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt); 4702 4703 if (ResultReg) { 4704 updateValueMap(I, ResultReg); 4705 return true; 4706 } 4707 } 4708 4709 Register Src0Reg = getRegForValue(I->getOperand(0)); 4710 if (!Src0Reg) 4711 return false; 4712 4713 Register Src1Reg = getRegForValue(I->getOperand(1)); 4714 if (!Src1Reg) 4715 return false; 4716 4717 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg); 4718 4719 if (!ResultReg) 4720 return false; 4721 4722 updateValueMap(I, ResultReg); 4723 return true; 4724 } 4725 4726 bool AArch64FastISel::selectShift(const Instruction *I) { 4727 MVT RetVT; 4728 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true)) 4729 return false; 4730 4731 if (RetVT.isVector()) 4732 return selectOperator(I, I->getOpcode()); 4733 4734 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) { 4735 unsigned ResultReg = 0; 4736 uint64_t ShiftVal = C->getZExtValue(); 4737 MVT SrcVT = RetVT; 4738 bool IsZExt = I->getOpcode() != Instruction::AShr; 4739 const Value *Op0 = I->getOperand(0); 4740 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) { 4741 if (!isIntExtFree(ZExt)) { 4742 MVT TmpVT; 4743 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) { 4744 SrcVT = TmpVT; 4745 IsZExt = true; 4746 Op0 = ZExt->getOperand(0); 4747 } 4748 } 4749 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) { 4750 if (!isIntExtFree(SExt)) { 4751 MVT TmpVT; 4752 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) { 4753 SrcVT = TmpVT; 4754 IsZExt = false; 4755 Op0 = SExt->getOperand(0); 4756 } 4757 } 4758 } 4759 4760 Register Op0Reg = getRegForValue(Op0); 4761 if (!Op0Reg) 4762 return false; 4763 4764 switch (I->getOpcode()) { 4765 default: llvm_unreachable("Unexpected instruction."); 4766 case Instruction::Shl: 4767 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt); 4768 break; 4769 case Instruction::AShr: 4770 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt); 4771 break; 4772 case Instruction::LShr: 4773 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt); 4774 break; 4775 } 4776 if (!ResultReg) 4777 return false; 4778 4779 updateValueMap(I, ResultReg); 4780 return true; 4781 } 4782 4783 Register Op0Reg = getRegForValue(I->getOperand(0)); 4784 if (!Op0Reg) 4785 return false; 4786 4787 Register Op1Reg = getRegForValue(I->getOperand(1)); 4788 if (!Op1Reg) 4789 return false; 4790 4791 unsigned ResultReg = 0; 4792 switch (I->getOpcode()) { 4793 default: llvm_unreachable("Unexpected instruction."); 4794 case Instruction::Shl: 4795 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg); 4796 break; 4797 case Instruction::AShr: 4798 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg); 4799 break; 4800 case Instruction::LShr: 4801 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg); 4802 break; 4803 } 4804 4805 if (!ResultReg) 4806 return false; 4807 4808 updateValueMap(I, ResultReg); 4809 return true; 4810 } 4811 4812 bool AArch64FastISel::selectBitCast(const Instruction *I) { 4813 MVT RetVT, SrcVT; 4814 4815 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT)) 4816 return false; 4817 if (!isTypeLegal(I->getType(), RetVT)) 4818 return false; 4819 4820 unsigned Opc; 4821 if (RetVT == MVT::f32 && SrcVT == MVT::i32) 4822 Opc = AArch64::FMOVWSr; 4823 else if (RetVT == MVT::f64 && SrcVT == MVT::i64) 4824 Opc = AArch64::FMOVXDr; 4825 else if (RetVT == MVT::i32 && SrcVT == MVT::f32) 4826 Opc = AArch64::FMOVSWr; 4827 else if (RetVT == MVT::i64 && SrcVT == MVT::f64) 4828 Opc = AArch64::FMOVDXr; 4829 else 4830 return false; 4831 4832 const TargetRegisterClass *RC = nullptr; 4833 switch (RetVT.SimpleTy) { 4834 default: llvm_unreachable("Unexpected value type."); 4835 case MVT::i32: RC = &AArch64::GPR32RegClass; break; 4836 case MVT::i64: RC = &AArch64::GPR64RegClass; break; 4837 case MVT::f32: RC = &AArch64::FPR32RegClass; break; 4838 case MVT::f64: RC = &AArch64::FPR64RegClass; break; 4839 } 4840 Register Op0Reg = getRegForValue(I->getOperand(0)); 4841 if (!Op0Reg) 4842 return false; 4843 4844 Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg); 4845 if (!ResultReg) 4846 return false; 4847 4848 updateValueMap(I, ResultReg); 4849 return true; 4850 } 4851 4852 bool AArch64FastISel::selectFRem(const Instruction *I) { 4853 MVT RetVT; 4854 if (!isTypeLegal(I->getType(), RetVT)) 4855 return false; 4856 4857 RTLIB::Libcall LC; 4858 switch (RetVT.SimpleTy) { 4859 default: 4860 return false; 4861 case MVT::f32: 4862 LC = RTLIB::REM_F32; 4863 break; 4864 case MVT::f64: 4865 LC = RTLIB::REM_F64; 4866 break; 4867 } 4868 4869 ArgListTy Args; 4870 Args.reserve(I->getNumOperands()); 4871 4872 // Populate the argument list. 4873 for (auto &Arg : I->operands()) { 4874 ArgListEntry Entry; 4875 Entry.Val = Arg; 4876 Entry.Ty = Arg->getType(); 4877 Args.push_back(Entry); 4878 } 4879 4880 CallLoweringInfo CLI; 4881 MCContext &Ctx = MF->getContext(); 4882 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(), 4883 TLI.getLibcallName(LC), std::move(Args)); 4884 if (!lowerCallTo(CLI)) 4885 return false; 4886 updateValueMap(I, CLI.ResultReg); 4887 return true; 4888 } 4889 4890 bool AArch64FastISel::selectSDiv(const Instruction *I) { 4891 MVT VT; 4892 if (!isTypeLegal(I->getType(), VT)) 4893 return false; 4894 4895 if (!isa<ConstantInt>(I->getOperand(1))) 4896 return selectBinaryOp(I, ISD::SDIV); 4897 4898 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue(); 4899 if ((VT != MVT::i32 && VT != MVT::i64) || !C || 4900 !(C.isPowerOf2() || C.isNegatedPowerOf2())) 4901 return selectBinaryOp(I, ISD::SDIV); 4902 4903 unsigned Lg2 = C.countr_zero(); 4904 Register Src0Reg = getRegForValue(I->getOperand(0)); 4905 if (!Src0Reg) 4906 return false; 4907 4908 if (cast<BinaryOperator>(I)->isExact()) { 4909 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2); 4910 if (!ResultReg) 4911 return false; 4912 updateValueMap(I, ResultReg); 4913 return true; 4914 } 4915 4916 int64_t Pow2MinusOne = (1ULL << Lg2) - 1; 4917 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne); 4918 if (!AddReg) 4919 return false; 4920 4921 // (Src0 < 0) ? Pow2 - 1 : 0; 4922 if (!emitICmp_ri(VT, Src0Reg, 0)) 4923 return false; 4924 4925 unsigned SelectOpc; 4926 const TargetRegisterClass *RC; 4927 if (VT == MVT::i64) { 4928 SelectOpc = AArch64::CSELXr; 4929 RC = &AArch64::GPR64RegClass; 4930 } else { 4931 SelectOpc = AArch64::CSELWr; 4932 RC = &AArch64::GPR32RegClass; 4933 } 4934 Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg, 4935 AArch64CC::LT); 4936 if (!SelectReg) 4937 return false; 4938 4939 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also 4940 // negate the result. 4941 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 4942 unsigned ResultReg; 4943 if (C.isNegative()) 4944 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg, 4945 AArch64_AM::ASR, Lg2); 4946 else 4947 ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2); 4948 4949 if (!ResultReg) 4950 return false; 4951 4952 updateValueMap(I, ResultReg); 4953 return true; 4954 } 4955 4956 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We 4957 /// have to duplicate it for AArch64, because otherwise we would fail during the 4958 /// sign-extend emission. 4959 unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) { 4960 Register IdxN = getRegForValue(Idx); 4961 if (IdxN == 0) 4962 // Unhandled operand. Halt "fast" selection and bail. 4963 return 0; 4964 4965 // If the index is smaller or larger than intptr_t, truncate or extend it. 4966 MVT PtrVT = TLI.getPointerTy(DL); 4967 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); 4968 if (IdxVT.bitsLT(PtrVT)) { 4969 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false); 4970 } else if (IdxVT.bitsGT(PtrVT)) 4971 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64"); 4972 return IdxN; 4973 } 4974 4975 /// This is mostly a copy of the existing FastISel GEP code, but we have to 4976 /// duplicate it for AArch64, because otherwise we would bail out even for 4977 /// simple cases. This is because the standard fastEmit functions don't cover 4978 /// MUL at all and ADD is lowered very inefficientily. 4979 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { 4980 if (Subtarget->isTargetILP32()) 4981 return false; 4982 4983 Register N = getRegForValue(I->getOperand(0)); 4984 if (!N) 4985 return false; 4986 4987 // Keep a running tab of the total offset to coalesce multiple N = N + Offset 4988 // into a single N = N + TotalOffset. 4989 uint64_t TotalOffs = 0; 4990 MVT VT = TLI.getPointerTy(DL); 4991 for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I); 4992 GTI != E; ++GTI) { 4993 const Value *Idx = GTI.getOperand(); 4994 if (auto *StTy = GTI.getStructTypeOrNull()) { 4995 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); 4996 // N = N + Offset 4997 if (Field) 4998 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); 4999 } else { 5000 // If this is a constant subscript, handle it quickly. 5001 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { 5002 if (CI->isZero()) 5003 continue; 5004 // N = N + Offset 5005 TotalOffs += GTI.getSequentialElementStride(DL) * 5006 cast<ConstantInt>(CI)->getSExtValue(); 5007 continue; 5008 } 5009 if (TotalOffs) { 5010 N = emitAdd_ri_(VT, N, TotalOffs); 5011 if (!N) 5012 return false; 5013 TotalOffs = 0; 5014 } 5015 5016 // N = N + Idx * ElementSize; 5017 uint64_t ElementSize = GTI.getSequentialElementStride(DL); 5018 unsigned IdxN = getRegForGEPIndex(Idx); 5019 if (!IdxN) 5020 return false; 5021 5022 if (ElementSize != 1) { 5023 unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize); 5024 if (!C) 5025 return false; 5026 IdxN = emitMul_rr(VT, IdxN, C); 5027 if (!IdxN) 5028 return false; 5029 } 5030 N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN); 5031 if (!N) 5032 return false; 5033 } 5034 } 5035 if (TotalOffs) { 5036 N = emitAdd_ri_(VT, N, TotalOffs); 5037 if (!N) 5038 return false; 5039 } 5040 updateValueMap(I, N); 5041 return true; 5042 } 5043 5044 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) { 5045 assert(TM.getOptLevel() == CodeGenOptLevel::None && 5046 "cmpxchg survived AtomicExpand at optlevel > -O0"); 5047 5048 auto *RetPairTy = cast<StructType>(I->getType()); 5049 Type *RetTy = RetPairTy->getTypeAtIndex(0U); 5050 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) && 5051 "cmpxchg has a non-i1 status result"); 5052 5053 MVT VT; 5054 if (!isTypeLegal(RetTy, VT)) 5055 return false; 5056 5057 const TargetRegisterClass *ResRC; 5058 unsigned Opc, CmpOpc; 5059 // This only supports i32/i64, because i8/i16 aren't legal, and the generic 5060 // extractvalue selection doesn't support that. 5061 if (VT == MVT::i32) { 5062 Opc = AArch64::CMP_SWAP_32; 5063 CmpOpc = AArch64::SUBSWrs; 5064 ResRC = &AArch64::GPR32RegClass; 5065 } else if (VT == MVT::i64) { 5066 Opc = AArch64::CMP_SWAP_64; 5067 CmpOpc = AArch64::SUBSXrs; 5068 ResRC = &AArch64::GPR64RegClass; 5069 } else { 5070 return false; 5071 } 5072 5073 const MCInstrDesc &II = TII.get(Opc); 5074 5075 const Register AddrReg = constrainOperandRegClass( 5076 II, getRegForValue(I->getPointerOperand()), II.getNumDefs()); 5077 const Register DesiredReg = constrainOperandRegClass( 5078 II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1); 5079 const Register NewReg = constrainOperandRegClass( 5080 II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2); 5081 5082 const Register ResultReg1 = createResultReg(ResRC); 5083 const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass); 5084 const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass); 5085 5086 // FIXME: MachineMemOperand doesn't support cmpxchg yet. 5087 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) 5088 .addDef(ResultReg1) 5089 .addDef(ScratchReg) 5090 .addUse(AddrReg) 5091 .addUse(DesiredReg) 5092 .addUse(NewReg); 5093 5094 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc)) 5095 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR) 5096 .addUse(ResultReg1) 5097 .addUse(DesiredReg) 5098 .addImm(0); 5099 5100 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr)) 5101 .addDef(ResultReg2) 5102 .addUse(AArch64::WZR) 5103 .addUse(AArch64::WZR) 5104 .addImm(AArch64CC::NE); 5105 5106 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers."); 5107 updateValueMap(I, ResultReg1, 2); 5108 return true; 5109 } 5110 5111 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { 5112 if (TLI.fallBackToDAGISel(*I)) 5113 return false; 5114 switch (I->getOpcode()) { 5115 default: 5116 break; 5117 case Instruction::Add: 5118 case Instruction::Sub: 5119 return selectAddSub(I); 5120 case Instruction::Mul: 5121 return selectMul(I); 5122 case Instruction::SDiv: 5123 return selectSDiv(I); 5124 case Instruction::SRem: 5125 if (!selectBinaryOp(I, ISD::SREM)) 5126 return selectRem(I, ISD::SREM); 5127 return true; 5128 case Instruction::URem: 5129 if (!selectBinaryOp(I, ISD::UREM)) 5130 return selectRem(I, ISD::UREM); 5131 return true; 5132 case Instruction::Shl: 5133 case Instruction::LShr: 5134 case Instruction::AShr: 5135 return selectShift(I); 5136 case Instruction::And: 5137 case Instruction::Or: 5138 case Instruction::Xor: 5139 return selectLogicalOp(I); 5140 case Instruction::Br: 5141 return selectBranch(I); 5142 case Instruction::IndirectBr: 5143 return selectIndirectBr(I); 5144 case Instruction::BitCast: 5145 if (!FastISel::selectBitCast(I)) 5146 return selectBitCast(I); 5147 return true; 5148 case Instruction::FPToSI: 5149 if (!selectCast(I, ISD::FP_TO_SINT)) 5150 return selectFPToInt(I, /*Signed=*/true); 5151 return true; 5152 case Instruction::FPToUI: 5153 return selectFPToInt(I, /*Signed=*/false); 5154 case Instruction::ZExt: 5155 case Instruction::SExt: 5156 return selectIntExt(I); 5157 case Instruction::Trunc: 5158 if (!selectCast(I, ISD::TRUNCATE)) 5159 return selectTrunc(I); 5160 return true; 5161 case Instruction::FPExt: 5162 return selectFPExt(I); 5163 case Instruction::FPTrunc: 5164 return selectFPTrunc(I); 5165 case Instruction::SIToFP: 5166 if (!selectCast(I, ISD::SINT_TO_FP)) 5167 return selectIntToFP(I, /*Signed=*/true); 5168 return true; 5169 case Instruction::UIToFP: 5170 return selectIntToFP(I, /*Signed=*/false); 5171 case Instruction::Load: 5172 return selectLoad(I); 5173 case Instruction::Store: 5174 return selectStore(I); 5175 case Instruction::FCmp: 5176 case Instruction::ICmp: 5177 return selectCmp(I); 5178 case Instruction::Select: 5179 return selectSelect(I); 5180 case Instruction::Ret: 5181 return selectRet(I); 5182 case Instruction::FRem: 5183 return selectFRem(I); 5184 case Instruction::GetElementPtr: 5185 return selectGetElementPtr(I); 5186 case Instruction::AtomicCmpXchg: 5187 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I)); 5188 } 5189 5190 // fall-back to target-independent instruction selection. 5191 return selectOperator(I, I->getOpcode()); 5192 } 5193 5194 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo, 5195 const TargetLibraryInfo *LibInfo) { 5196 5197 SMEAttrs CallerAttrs(*FuncInfo.Fn); 5198 if (CallerAttrs.hasZAState() || CallerAttrs.hasZT0State() || 5199 CallerAttrs.hasStreamingInterfaceOrBody() || 5200 CallerAttrs.hasStreamingCompatibleInterface() || 5201 CallerAttrs.hasAgnosticZAInterface()) 5202 return nullptr; 5203 return new AArch64FastISel(FuncInfo, LibInfo); 5204 } 5205