1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the ARM target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "ARM.h" 14 #include "ARMBaseInstrInfo.h" 15 #include "ARMTargetMachine.h" 16 #include "MCTargetDesc/ARMAddressingModes.h" 17 #include "Utils/ARMBaseInfo.h" 18 #include "llvm/ADT/APSInt.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineRegisterInfo.h" 24 #include "llvm/CodeGen/SelectionDAG.h" 25 #include "llvm/CodeGen/SelectionDAGISel.h" 26 #include "llvm/CodeGen/TargetLowering.h" 27 #include "llvm/IR/Constants.h" 28 #include "llvm/IR/DerivedTypes.h" 29 #include "llvm/IR/Function.h" 30 #include "llvm/IR/Intrinsics.h" 31 #include "llvm/IR/IntrinsicsARM.h" 32 #include "llvm/IR/LLVMContext.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Target/TargetOptions.h" 36 #include <optional> 37 38 using namespace llvm; 39 40 #define DEBUG_TYPE "arm-isel" 41 #define PASS_NAME "ARM Instruction Selection" 42 43 static cl::opt<bool> 44 DisableShifterOp("disable-shifter-op", cl::Hidden, 45 cl::desc("Disable isel of shifter-op"), 46 cl::init(false)); 47 48 //===--------------------------------------------------------------------===// 49 /// ARMDAGToDAGISel - ARM specific code to select ARM machine 50 /// instructions for SelectionDAG operations. 51 /// 52 namespace { 53 54 class ARMDAGToDAGISel : public SelectionDAGISel { 55 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 56 /// make the right decision when generating code for different targets. 57 const ARMSubtarget *Subtarget; 58 59 public: 60 ARMDAGToDAGISel() = delete; 61 62 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel) 63 : SelectionDAGISel(tm, OptLevel) {} 64 65 bool runOnMachineFunction(MachineFunction &MF) override { 66 // Reset the subtarget each time through. 67 Subtarget = &MF.getSubtarget<ARMSubtarget>(); 68 SelectionDAGISel::runOnMachineFunction(MF); 69 return true; 70 } 71 72 void PreprocessISelDAG() override; 73 74 /// getI32Imm - Return a target constant of type i32 with the specified 75 /// value. 76 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 77 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 78 } 79 80 void Select(SDNode *N) override; 81 82 /// Return true as some complex patterns, like those that call 83 /// canExtractShiftFromMul can modify the DAG inplace. 84 bool ComplexPatternFuncMutatesDAG() const override { return true; } 85 86 bool hasNoVMLxHazardUse(SDNode *N) const; 87 bool isShifterOpProfitable(const SDValue &Shift, 88 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); 89 bool SelectRegShifterOperand(SDValue N, SDValue &A, 90 SDValue &B, SDValue &C, 91 bool CheckProfitability = true); 92 bool SelectImmShifterOperand(SDValue N, SDValue &A, 93 SDValue &B, bool CheckProfitability = true); 94 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B, 95 SDValue &C) { 96 // Don't apply the profitability check 97 return SelectRegShifterOperand(N, A, B, C, false); 98 } 99 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) { 100 // Don't apply the profitability check 101 return SelectImmShifterOperand(N, A, B, false); 102 } 103 bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) { 104 if (!N.hasOneUse()) 105 return false; 106 return SelectImmShifterOperand(N, A, B, false); 107 } 108 109 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out); 110 111 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 112 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); 113 114 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 115 SDValue &Offset, SDValue &Opc); 116 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 117 SDValue &Offset, SDValue &Opc); 118 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 119 SDValue &Offset, SDValue &Opc); 120 bool SelectAddrOffsetNone(SDValue N, SDValue &Base); 121 bool SelectAddrMode3(SDValue N, SDValue &Base, 122 SDValue &Offset, SDValue &Opc); 123 bool SelectAddrMode3Offset(SDNode *Op, SDValue N, 124 SDValue &Offset, SDValue &Opc); 125 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16); 126 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset); 127 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset); 128 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); 129 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); 130 131 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); 132 133 // Thumb Addressing Modes: 134 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); 135 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset); 136 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, 137 SDValue &OffImm); 138 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 139 SDValue &OffImm); 140 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 141 SDValue &OffImm); 142 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 143 SDValue &OffImm); 144 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); 145 template <unsigned Shift> 146 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 147 148 // Thumb 2 Addressing Modes: 149 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 150 template <unsigned Shift> 151 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm); 152 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, 153 SDValue &OffImm); 154 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 155 SDValue &OffImm); 156 template <unsigned Shift> 157 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm); 158 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm, 159 unsigned Shift); 160 template <unsigned Shift> 161 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 162 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, 163 SDValue &OffReg, SDValue &ShImm); 164 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); 165 166 template<int Min, int Max> 167 bool SelectImmediateInRange(SDValue N, SDValue &OffImm); 168 169 inline bool is_so_imm(unsigned Imm) const { 170 return ARM_AM::getSOImmVal(Imm) != -1; 171 } 172 173 inline bool is_so_imm_not(unsigned Imm) const { 174 return ARM_AM::getSOImmVal(~Imm) != -1; 175 } 176 177 inline bool is_t2_so_imm(unsigned Imm) const { 178 return ARM_AM::getT2SOImmVal(Imm) != -1; 179 } 180 181 inline bool is_t2_so_imm_not(unsigned Imm) const { 182 return ARM_AM::getT2SOImmVal(~Imm) != -1; 183 } 184 185 // Include the pieces autogenerated from the target description. 186 #include "ARMGenDAGISel.inc" 187 188 private: 189 void transferMemOperands(SDNode *Src, SDNode *Dst); 190 191 /// Indexed (pre/post inc/dec) load matching code for ARM. 192 bool tryARMIndexedLoad(SDNode *N); 193 bool tryT1IndexedLoad(SDNode *N); 194 bool tryT2IndexedLoad(SDNode *N); 195 bool tryMVEIndexedLoad(SDNode *N); 196 bool tryFMULFixed(SDNode *N, SDLoc dl); 197 bool tryFP_TO_INT(SDNode *N, SDLoc dl); 198 bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul, 199 bool IsUnsigned, 200 bool FixedToFloat); 201 202 /// SelectVLD - Select NEON load intrinsics. NumVecs should be 203 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for 204 /// loads of D registers and even subregs and odd subregs of Q registers. 205 /// For NumVecs <= 2, QOpcodes1 is not used. 206 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 207 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 208 const uint16_t *QOpcodes1); 209 210 /// SelectVST - Select NEON store intrinsics. NumVecs should 211 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for 212 /// stores of D registers and even subregs and odd subregs of Q registers. 213 /// For NumVecs <= 2, QOpcodes1 is not used. 214 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 215 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 216 const uint16_t *QOpcodes1); 217 218 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should 219 /// be 2, 3 or 4. The opcode arrays specify the instructions used for 220 /// load/store of D registers and Q registers. 221 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 222 unsigned NumVecs, const uint16_t *DOpcodes, 223 const uint16_t *QOpcodes); 224 225 /// Helper functions for setting up clusters of MVE predication operands. 226 template <typename SDValueVector> 227 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 228 SDValue PredicateMask); 229 template <typename SDValueVector> 230 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 231 SDValue PredicateMask, SDValue Inactive); 232 233 template <typename SDValueVector> 234 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc); 235 template <typename SDValueVector> 236 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy); 237 238 /// SelectMVE_WB - Select MVE writeback load/store intrinsics. 239 void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated); 240 241 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics. 242 void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate, 243 bool HasSaturationOperand); 244 245 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics. 246 void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 247 uint16_t OpcodeWithNoCarry, bool Add, bool Predicated); 248 249 /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between 250 /// vector lanes. 251 void SelectMVE_VSHLC(SDNode *N, bool Predicated); 252 253 /// Select long MVE vector reductions with two vector operands 254 /// Stride is the number of vector element widths the instruction can operate 255 /// on: 256 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32] 257 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32] 258 /// Stride is used when addressing the OpcodesS array which contains multiple 259 /// opcodes for each element width. 260 /// TySize is the index into the list of element types listed above 261 void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 262 const uint16_t *OpcodesS, const uint16_t *OpcodesU, 263 size_t Stride, size_t TySize); 264 265 /// Select a 64-bit MVE vector reduction with two vector operands 266 /// arm_mve_vmlldava_[predicated] 267 void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 268 const uint16_t *OpcodesU); 269 /// Select a 72-bit MVE vector rounding reduction with two vector operands 270 /// int_arm_mve_vrmlldavha[_predicated] 271 void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 272 const uint16_t *OpcodesU); 273 274 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs 275 /// should be 2 or 4. The opcode array specifies the instructions 276 /// used for 8, 16 and 32-bit lane sizes respectively, and each 277 /// pointer points to a set of NumVecs sub-opcodes used for the 278 /// different stages (e.g. VLD20 versus VLD21) of each load family. 279 void SelectMVE_VLD(SDNode *N, unsigned NumVecs, 280 const uint16_t *const *Opcodes, bool HasWriteback); 281 282 /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an 283 /// array of 3 elements for the 8, 16 and 32-bit lane sizes. 284 void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, 285 bool Wrapping, bool Predicated); 286 287 /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D, 288 /// CX1DA, CX2D, CX2DA, CX3, CX3DA). 289 /// \arg \c NumExtraOps number of extra operands besides the coprocossor, 290 /// the accumulator and the immediate operand, i.e. 0 291 /// for CX1*, 1 for CX2*, 2 for CX3* 292 /// \arg \c HasAccum whether the instruction has an accumulator operand 293 void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps, 294 bool HasAccum); 295 296 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs 297 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used 298 /// for loading D registers. 299 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating, 300 unsigned NumVecs, const uint16_t *DOpcodes, 301 const uint16_t *QOpcodes0 = nullptr, 302 const uint16_t *QOpcodes1 = nullptr); 303 304 /// Try to select SBFX/UBFX instructions for ARM. 305 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned); 306 307 bool tryInsertVectorElt(SDNode *N); 308 309 // Select special operations if node forms integer ABS pattern 310 bool tryABSOp(SDNode *N); 311 312 bool tryReadRegister(SDNode *N); 313 bool tryWriteRegister(SDNode *N); 314 315 bool tryInlineAsm(SDNode *N); 316 317 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI); 318 319 void SelectCMP_SWAP(SDNode *N); 320 321 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 322 /// inline asm expressions. 323 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 324 InlineAsm::ConstraintCode ConstraintID, 325 std::vector<SDValue> &OutOps) override; 326 327 // Form pairs of consecutive R, S, D, or Q registers. 328 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); 329 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1); 330 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1); 331 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1); 332 333 // Form sequences of 4 consecutive S, D, or Q registers. 334 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 335 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 336 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 337 338 // Get the alignment operand for a NEON VLD or VST instruction. 339 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs, 340 bool is64BitVector); 341 342 /// Checks if N is a multiplication by a constant where we can extract out a 343 /// power of two from the constant so that it can be used in a shift, but only 344 /// if it simplifies the materialization of the constant. Returns true if it 345 /// is, and assigns to PowerOfTwo the power of two that should be extracted 346 /// out and to NewMulConst the new constant to be multiplied by. 347 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift, 348 unsigned &PowerOfTwo, SDValue &NewMulConst) const; 349 350 /// Replace N with M in CurDAG, in a way that also ensures that M gets 351 /// selected when N would have been selected. 352 void replaceDAGValue(const SDValue &N, SDValue M); 353 }; 354 355 class ARMDAGToDAGISelLegacy : public SelectionDAGISelLegacy { 356 public: 357 static char ID; 358 ARMDAGToDAGISelLegacy(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel) 359 : SelectionDAGISelLegacy( 360 ID, std::make_unique<ARMDAGToDAGISel>(tm, OptLevel)) {} 361 }; 362 } 363 364 char ARMDAGToDAGISelLegacy::ID = 0; 365 366 INITIALIZE_PASS(ARMDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false) 367 368 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 369 /// operand. If so Imm will receive the 32-bit value. 370 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 371 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 372 Imm = N->getAsZExtVal(); 373 return true; 374 } 375 return false; 376 } 377 378 // isInt32Immediate - This method tests to see if a constant operand. 379 // If so Imm will receive the 32 bit value. 380 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 381 return isInt32Immediate(N.getNode(), Imm); 382 } 383 384 // isOpcWithIntImmediate - This method tests to see if the node is a specific 385 // opcode and that it has a immediate integer right operand. 386 // If so Imm will receive the 32 bit value. 387 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 388 return N->getOpcode() == Opc && 389 isInt32Immediate(N->getOperand(1).getNode(), Imm); 390 } 391 392 /// Check whether a particular node is a constant value representable as 393 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax). 394 /// 395 /// \param ScaledConstant [out] - On success, the pre-scaled constant value. 396 static bool isScaledConstantInRange(SDValue Node, int Scale, 397 int RangeMin, int RangeMax, 398 int &ScaledConstant) { 399 assert(Scale > 0 && "Invalid scale!"); 400 401 // Check that this is a constant. 402 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node); 403 if (!C) 404 return false; 405 406 ScaledConstant = (int) C->getZExtValue(); 407 if ((ScaledConstant % Scale) != 0) 408 return false; 409 410 ScaledConstant /= Scale; 411 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; 412 } 413 414 void ARMDAGToDAGISel::PreprocessISelDAG() { 415 if (!Subtarget->hasV6T2Ops()) 416 return; 417 418 bool isThumb2 = Subtarget->isThumb(); 419 // We use make_early_inc_range to avoid invalidation issues. 420 for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) { 421 if (N.getOpcode() != ISD::ADD) 422 continue; 423 424 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with 425 // leading zeros, followed by consecutive set bits, followed by 1 or 2 426 // trailing zeros, e.g. 1020. 427 // Transform the expression to 428 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number 429 // of trailing zeros of c2. The left shift would be folded as an shifter 430 // operand of 'add' and the 'and' and 'srl' would become a bits extraction 431 // node (UBFX). 432 433 SDValue N0 = N.getOperand(0); 434 SDValue N1 = N.getOperand(1); 435 unsigned And_imm = 0; 436 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { 437 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) 438 std::swap(N0, N1); 439 } 440 if (!And_imm) 441 continue; 442 443 // Check if the AND mask is an immediate of the form: 000.....1111111100 444 unsigned TZ = llvm::countr_zero(And_imm); 445 if (TZ != 1 && TZ != 2) 446 // Be conservative here. Shifter operands aren't always free. e.g. On 447 // Swift, left shifter operand of 1 / 2 for free but others are not. 448 // e.g. 449 // ubfx r3, r1, #16, #8 450 // ldr.w r3, [r0, r3, lsl #2] 451 // vs. 452 // mov.w r9, #1020 453 // and.w r2, r9, r1, lsr #14 454 // ldr r2, [r0, r2] 455 continue; 456 And_imm >>= TZ; 457 if (And_imm & (And_imm + 1)) 458 continue; 459 460 // Look for (and (srl X, c1), c2). 461 SDValue Srl = N1.getOperand(0); 462 unsigned Srl_imm = 0; 463 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || 464 (Srl_imm <= 2)) 465 continue; 466 467 // Make sure first operand is not a shifter operand which would prevent 468 // folding of the left shift. 469 SDValue CPTmp0; 470 SDValue CPTmp1; 471 SDValue CPTmp2; 472 if (isThumb2) { 473 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1)) 474 continue; 475 } else { 476 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || 477 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) 478 continue; 479 } 480 481 // Now make the transformation. 482 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32, 483 Srl.getOperand(0), 484 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl), 485 MVT::i32)); 486 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32, 487 Srl, 488 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32)); 489 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, 490 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32)); 491 CurDAG->UpdateNodeOperands(&N, N0, N1); 492 } 493 } 494 495 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS 496 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at 497 /// least on current ARM implementations) which should be avoidded. 498 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { 499 if (OptLevel == CodeGenOptLevel::None) 500 return true; 501 502 if (!Subtarget->hasVMLxHazards()) 503 return true; 504 505 if (!N->hasOneUse()) 506 return false; 507 508 SDNode *User = *N->user_begin(); 509 if (User->getOpcode() == ISD::CopyToReg) 510 return true; 511 if (User->isMachineOpcode()) { 512 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( 513 CurDAG->getSubtarget().getInstrInfo()); 514 515 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode()); 516 if (MCID.mayStore()) 517 return true; 518 unsigned Opcode = MCID.getOpcode(); 519 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) 520 return true; 521 // vmlx feeding into another vmlx. We actually want to unfold 522 // the use later in the MLxExpansion pass. e.g. 523 // vmla 524 // vmla (stall 8 cycles) 525 // 526 // vmul (5 cycles) 527 // vadd (5 cycles) 528 // vmla 529 // This adds up to about 18 - 19 cycles. 530 // 531 // vmla 532 // vmul (stall 4 cycles) 533 // vadd adds up to about 14 cycles. 534 return TII->isFpMLxInstruction(Opcode); 535 } 536 537 return false; 538 } 539 540 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, 541 ARM_AM::ShiftOpc ShOpcVal, 542 unsigned ShAmt) { 543 if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) 544 return true; 545 if (Shift.hasOneUse()) 546 return true; 547 // R << 2 is free. 548 return ShOpcVal == ARM_AM::lsl && 549 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); 550 } 551 552 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, 553 unsigned MaxShift, 554 unsigned &PowerOfTwo, 555 SDValue &NewMulConst) const { 556 assert(N.getOpcode() == ISD::MUL); 557 assert(MaxShift > 0); 558 559 // If the multiply is used in more than one place then changing the constant 560 // will make other uses incorrect, so don't. 561 if (!N.hasOneUse()) return false; 562 // Check if the multiply is by a constant 563 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1)); 564 if (!MulConst) return false; 565 // If the constant is used in more than one place then modifying it will mean 566 // we need to materialize two constants instead of one, which is a bad idea. 567 if (!MulConst->hasOneUse()) return false; 568 unsigned MulConstVal = MulConst->getZExtValue(); 569 if (MulConstVal == 0) return false; 570 571 // Find the largest power of 2 that MulConstVal is a multiple of 572 PowerOfTwo = MaxShift; 573 while ((MulConstVal % (1 << PowerOfTwo)) != 0) { 574 --PowerOfTwo; 575 if (PowerOfTwo == 0) return false; 576 } 577 578 // Only optimise if the new cost is better 579 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo); 580 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32); 581 unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget); 582 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget); 583 return NewCost < OldCost; 584 } 585 586 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) { 587 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode()); 588 ReplaceUses(N, M); 589 } 590 591 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, 592 SDValue &BaseReg, 593 SDValue &Opc, 594 bool CheckProfitability) { 595 if (DisableShifterOp) 596 return false; 597 598 // If N is a multiply-by-constant and it's profitable to extract a shift and 599 // use it in a shifted operand do so. 600 if (N.getOpcode() == ISD::MUL) { 601 unsigned PowerOfTwo = 0; 602 SDValue NewMulConst; 603 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { 604 HandleSDNode Handle(N); 605 SDLoc Loc(N); 606 replaceDAGValue(N.getOperand(1), NewMulConst); 607 BaseReg = Handle.getValue(); 608 Opc = CurDAG->getTargetConstant( 609 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32); 610 return true; 611 } 612 } 613 614 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 615 616 // Don't match base register only case. That is matched to a separate 617 // lower complexity pattern with explicit register operand. 618 if (ShOpcVal == ARM_AM::no_shift) return false; 619 620 BaseReg = N.getOperand(0); 621 unsigned ShImmVal = 0; 622 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 623 if (!RHS) return false; 624 ShImmVal = RHS->getZExtValue() & 31; 625 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 626 SDLoc(N), MVT::i32); 627 return true; 628 } 629 630 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, 631 SDValue &BaseReg, 632 SDValue &ShReg, 633 SDValue &Opc, 634 bool CheckProfitability) { 635 if (DisableShifterOp) 636 return false; 637 638 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 639 640 // Don't match base register only case. That is matched to a separate 641 // lower complexity pattern with explicit register operand. 642 if (ShOpcVal == ARM_AM::no_shift) return false; 643 644 BaseReg = N.getOperand(0); 645 unsigned ShImmVal = 0; 646 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 647 if (RHS) return false; 648 649 ShReg = N.getOperand(1); 650 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) 651 return false; 652 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 653 SDLoc(N), MVT::i32); 654 return true; 655 } 656 657 // Determine whether an ISD::OR's operands are suitable to turn the operation 658 // into an addition, which often has more compact encodings. 659 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) { 660 assert(Parent->getOpcode() == ISD::OR && "unexpected parent"); 661 Out = N; 662 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1)); 663 } 664 665 666 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, 667 SDValue &Base, 668 SDValue &OffImm) { 669 // Match simple R + imm12 operands. 670 671 // Base only. 672 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 673 !CurDAG->isBaseWithConstantOffset(N)) { 674 if (N.getOpcode() == ISD::FrameIndex) { 675 // Match frame index. 676 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 677 Base = CurDAG->getTargetFrameIndex( 678 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 679 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 680 return true; 681 } 682 683 if (N.getOpcode() == ARMISD::Wrapper && 684 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 685 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 686 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 687 Base = N.getOperand(0); 688 } else 689 Base = N; 690 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 691 return true; 692 } 693 694 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 695 int RHSC = (int)RHS->getSExtValue(); 696 if (N.getOpcode() == ISD::SUB) 697 RHSC = -RHSC; 698 699 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits 700 Base = N.getOperand(0); 701 if (Base.getOpcode() == ISD::FrameIndex) { 702 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 703 Base = CurDAG->getTargetFrameIndex( 704 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 705 } 706 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32); 707 return true; 708 } 709 } 710 711 // Base only. 712 Base = N; 713 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 714 return true; 715 } 716 717 718 719 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, 720 SDValue &Opc) { 721 if (N.getOpcode() == ISD::MUL && 722 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { 723 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 724 // X * [3,5,9] -> X + X * [2,4,8] etc. 725 int RHSC = (int)RHS->getZExtValue(); 726 if (RHSC & 1) { 727 RHSC = RHSC & ~1; 728 ARM_AM::AddrOpc AddSub = ARM_AM::add; 729 if (RHSC < 0) { 730 AddSub = ARM_AM::sub; 731 RHSC = - RHSC; 732 } 733 if (isPowerOf2_32(RHSC)) { 734 unsigned ShAmt = Log2_32(RHSC); 735 Base = Offset = N.getOperand(0); 736 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 737 ARM_AM::lsl), 738 SDLoc(N), MVT::i32); 739 return true; 740 } 741 } 742 } 743 } 744 745 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 746 // ISD::OR that is equivalent to an ISD::ADD. 747 !CurDAG->isBaseWithConstantOffset(N)) 748 return false; 749 750 // Leave simple R +/- imm12 operands for LDRi12 751 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { 752 int RHSC; 753 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 754 -0x1000+1, 0x1000, RHSC)) // 12 bits. 755 return false; 756 } 757 758 // Otherwise this is R +/- [possibly shifted] R. 759 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; 760 ARM_AM::ShiftOpc ShOpcVal = 761 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 762 unsigned ShAmt = 0; 763 764 Base = N.getOperand(0); 765 Offset = N.getOperand(1); 766 767 if (ShOpcVal != ARM_AM::no_shift) { 768 // Check to see if the RHS of the shift is a constant, if not, we can't fold 769 // it. 770 if (ConstantSDNode *Sh = 771 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 772 ShAmt = Sh->getZExtValue(); 773 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 774 Offset = N.getOperand(1).getOperand(0); 775 else { 776 ShAmt = 0; 777 ShOpcVal = ARM_AM::no_shift; 778 } 779 } else { 780 ShOpcVal = ARM_AM::no_shift; 781 } 782 } 783 784 // Try matching (R shl C) + (R). 785 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 786 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 787 N.getOperand(0).hasOneUse())) { 788 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 789 if (ShOpcVal != ARM_AM::no_shift) { 790 // Check to see if the RHS of the shift is a constant, if not, we can't 791 // fold it. 792 if (ConstantSDNode *Sh = 793 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 794 ShAmt = Sh->getZExtValue(); 795 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 796 Offset = N.getOperand(0).getOperand(0); 797 Base = N.getOperand(1); 798 } else { 799 ShAmt = 0; 800 ShOpcVal = ARM_AM::no_shift; 801 } 802 } else { 803 ShOpcVal = ARM_AM::no_shift; 804 } 805 } 806 } 807 808 // If Offset is a multiply-by-constant and it's profitable to extract a shift 809 // and use it in a shifted operand do so. 810 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) { 811 unsigned PowerOfTwo = 0; 812 SDValue NewMulConst; 813 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { 814 HandleSDNode Handle(Offset); 815 replaceDAGValue(Offset.getOperand(1), NewMulConst); 816 Offset = Handle.getValue(); 817 ShAmt = PowerOfTwo; 818 ShOpcVal = ARM_AM::lsl; 819 } 820 } 821 822 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 823 SDLoc(N), MVT::i32); 824 return true; 825 } 826 827 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 828 SDValue &Offset, SDValue &Opc) { 829 unsigned Opcode = Op->getOpcode(); 830 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 831 ? cast<LoadSDNode>(Op)->getAddressingMode() 832 : cast<StoreSDNode>(Op)->getAddressingMode(); 833 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 834 ? ARM_AM::add : ARM_AM::sub; 835 int Val; 836 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) 837 return false; 838 839 Offset = N; 840 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 841 unsigned ShAmt = 0; 842 if (ShOpcVal != ARM_AM::no_shift) { 843 // Check to see if the RHS of the shift is a constant, if not, we can't fold 844 // it. 845 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 846 ShAmt = Sh->getZExtValue(); 847 if (isShifterOpProfitable(N, ShOpcVal, ShAmt)) 848 Offset = N.getOperand(0); 849 else { 850 ShAmt = 0; 851 ShOpcVal = ARM_AM::no_shift; 852 } 853 } else { 854 ShOpcVal = ARM_AM::no_shift; 855 } 856 } 857 858 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 859 SDLoc(N), MVT::i32); 860 return true; 861 } 862 863 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 864 SDValue &Offset, SDValue &Opc) { 865 unsigned Opcode = Op->getOpcode(); 866 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 867 ? cast<LoadSDNode>(Op)->getAddressingMode() 868 : cast<StoreSDNode>(Op)->getAddressingMode(); 869 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 870 ? ARM_AM::add : ARM_AM::sub; 871 int Val; 872 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 873 if (AddSub == ARM_AM::sub) Val *= -1; 874 Offset = CurDAG->getRegister(0, MVT::i32); 875 Opc = CurDAG->getSignedTargetConstant(Val, SDLoc(Op), MVT::i32); 876 return true; 877 } 878 879 return false; 880 } 881 882 883 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 884 SDValue &Offset, SDValue &Opc) { 885 unsigned Opcode = Op->getOpcode(); 886 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 887 ? cast<LoadSDNode>(Op)->getAddressingMode() 888 : cast<StoreSDNode>(Op)->getAddressingMode(); 889 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 890 ? ARM_AM::add : ARM_AM::sub; 891 int Val; 892 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 893 Offset = CurDAG->getRegister(0, MVT::i32); 894 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, 895 ARM_AM::no_shift), 896 SDLoc(Op), MVT::i32); 897 return true; 898 } 899 900 return false; 901 } 902 903 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) { 904 Base = N; 905 return true; 906 } 907 908 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, 909 SDValue &Base, SDValue &Offset, 910 SDValue &Opc) { 911 if (N.getOpcode() == ISD::SUB) { 912 // X - C is canonicalize to X + -C, no need to handle it here. 913 Base = N.getOperand(0); 914 Offset = N.getOperand(1); 915 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N), 916 MVT::i32); 917 return true; 918 } 919 920 if (!CurDAG->isBaseWithConstantOffset(N)) { 921 Base = N; 922 if (N.getOpcode() == ISD::FrameIndex) { 923 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 924 Base = CurDAG->getTargetFrameIndex( 925 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 926 } 927 Offset = CurDAG->getRegister(0, MVT::i32); 928 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 929 MVT::i32); 930 return true; 931 } 932 933 // If the RHS is +/- imm8, fold into addr mode. 934 int RHSC; 935 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 936 -256 + 1, 256, RHSC)) { // 8 bits. 937 Base = N.getOperand(0); 938 if (Base.getOpcode() == ISD::FrameIndex) { 939 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 940 Base = CurDAG->getTargetFrameIndex( 941 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 942 } 943 Offset = CurDAG->getRegister(0, MVT::i32); 944 945 ARM_AM::AddrOpc AddSub = ARM_AM::add; 946 if (RHSC < 0) { 947 AddSub = ARM_AM::sub; 948 RHSC = -RHSC; 949 } 950 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N), 951 MVT::i32); 952 return true; 953 } 954 955 Base = N.getOperand(0); 956 Offset = N.getOperand(1); 957 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 958 MVT::i32); 959 return true; 960 } 961 962 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, 963 SDValue &Offset, SDValue &Opc) { 964 unsigned Opcode = Op->getOpcode(); 965 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 966 ? cast<LoadSDNode>(Op)->getAddressingMode() 967 : cast<StoreSDNode>(Op)->getAddressingMode(); 968 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 969 ? ARM_AM::add : ARM_AM::sub; 970 int Val; 971 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits. 972 Offset = CurDAG->getRegister(0, MVT::i32); 973 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op), 974 MVT::i32); 975 return true; 976 } 977 978 Offset = N; 979 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op), 980 MVT::i32); 981 return true; 982 } 983 984 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, 985 bool FP16) { 986 if (!CurDAG->isBaseWithConstantOffset(N)) { 987 Base = N; 988 if (N.getOpcode() == ISD::FrameIndex) { 989 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 990 Base = CurDAG->getTargetFrameIndex( 991 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 992 } else if (N.getOpcode() == ARMISD::Wrapper && 993 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 994 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 995 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 996 Base = N.getOperand(0); 997 } 998 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 999 SDLoc(N), MVT::i32); 1000 return true; 1001 } 1002 1003 // If the RHS is +/- imm8, fold into addr mode. 1004 int RHSC; 1005 const int Scale = FP16 ? 2 : 4; 1006 1007 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) { 1008 Base = N.getOperand(0); 1009 if (Base.getOpcode() == ISD::FrameIndex) { 1010 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1011 Base = CurDAG->getTargetFrameIndex( 1012 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1013 } 1014 1015 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1016 if (RHSC < 0) { 1017 AddSub = ARM_AM::sub; 1018 RHSC = -RHSC; 1019 } 1020 1021 if (FP16) 1022 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC), 1023 SDLoc(N), MVT::i32); 1024 else 1025 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), 1026 SDLoc(N), MVT::i32); 1027 1028 return true; 1029 } 1030 1031 Base = N; 1032 1033 if (FP16) 1034 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0), 1035 SDLoc(N), MVT::i32); 1036 else 1037 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 1038 SDLoc(N), MVT::i32); 1039 1040 return true; 1041 } 1042 1043 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, 1044 SDValue &Base, SDValue &Offset) { 1045 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false); 1046 } 1047 1048 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N, 1049 SDValue &Base, SDValue &Offset) { 1050 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true); 1051 } 1052 1053 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, 1054 SDValue &Align) { 1055 Addr = N; 1056 1057 unsigned Alignment = 0; 1058 1059 MemSDNode *MemN = cast<MemSDNode>(Parent); 1060 1061 if (isa<LSBaseSDNode>(MemN) || 1062 ((MemN->getOpcode() == ARMISD::VST1_UPD || 1063 MemN->getOpcode() == ARMISD::VLD1_UPD) && 1064 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) { 1065 // This case occurs only for VLD1-lane/dup and VST1-lane instructions. 1066 // The maximum alignment is equal to the memory size being referenced. 1067 llvm::Align MMOAlign = MemN->getAlign(); 1068 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8; 1069 if (MMOAlign.value() >= MemSize && MemSize > 1) 1070 Alignment = MemSize; 1071 } else { 1072 // All other uses of addrmode6 are for intrinsics. For now just record 1073 // the raw alignment value; it will be refined later based on the legal 1074 // alignment operands for the intrinsic. 1075 Alignment = MemN->getAlign().value(); 1076 } 1077 1078 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32); 1079 return true; 1080 } 1081 1082 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, 1083 SDValue &Offset) { 1084 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op); 1085 ISD::MemIndexedMode AM = LdSt->getAddressingMode(); 1086 if (AM != ISD::POST_INC) 1087 return false; 1088 Offset = N; 1089 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) { 1090 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) 1091 Offset = CurDAG->getRegister(0, MVT::i32); 1092 } 1093 return true; 1094 } 1095 1096 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, 1097 SDValue &Offset, SDValue &Label) { 1098 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { 1099 Offset = N.getOperand(0); 1100 SDValue N1 = N.getOperand(1); 1101 Label = CurDAG->getTargetConstant(N1->getAsZExtVal(), SDLoc(N), MVT::i32); 1102 return true; 1103 } 1104 1105 return false; 1106 } 1107 1108 1109 //===----------------------------------------------------------------------===// 1110 // Thumb Addressing Modes 1111 //===----------------------------------------------------------------------===// 1112 1113 static bool shouldUseZeroOffsetLdSt(SDValue N) { 1114 // Negative numbers are difficult to materialise in thumb1. If we are 1115 // selecting the add of a negative, instead try to select ri with a zero 1116 // offset, so create the add node directly which will become a sub. 1117 if (N.getOpcode() != ISD::ADD) 1118 return false; 1119 1120 // Look for an imm which is not legal for ld/st, but is legal for sub. 1121 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) 1122 return C->getSExtValue() < 0 && C->getSExtValue() >= -255; 1123 1124 return false; 1125 } 1126 1127 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, 1128 SDValue &Offset) { 1129 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) { 1130 if (!isNullConstant(N)) 1131 return false; 1132 1133 Base = Offset = N; 1134 return true; 1135 } 1136 1137 Base = N.getOperand(0); 1138 Offset = N.getOperand(1); 1139 return true; 1140 } 1141 1142 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base, 1143 SDValue &Offset) { 1144 if (shouldUseZeroOffsetLdSt(N)) 1145 return false; // Select ri instead 1146 return SelectThumbAddrModeRRSext(N, Base, Offset); 1147 } 1148 1149 bool 1150 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, 1151 SDValue &Base, SDValue &OffImm) { 1152 if (shouldUseZeroOffsetLdSt(N)) { 1153 Base = N; 1154 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1155 return true; 1156 } 1157 1158 if (!CurDAG->isBaseWithConstantOffset(N)) { 1159 if (N.getOpcode() == ISD::ADD) { 1160 return false; // We want to select register offset instead 1161 } else if (N.getOpcode() == ARMISD::Wrapper && 1162 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1163 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1164 N.getOperand(0).getOpcode() != ISD::TargetConstantPool && 1165 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1166 Base = N.getOperand(0); 1167 } else { 1168 Base = N; 1169 } 1170 1171 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1172 return true; 1173 } 1174 1175 // If the RHS is + imm5 * scale, fold into addr mode. 1176 int RHSC; 1177 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { 1178 Base = N.getOperand(0); 1179 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32); 1180 return true; 1181 } 1182 1183 // Offset is too large, so use register offset instead. 1184 return false; 1185 } 1186 1187 bool 1188 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 1189 SDValue &OffImm) { 1190 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm); 1191 } 1192 1193 bool 1194 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 1195 SDValue &OffImm) { 1196 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm); 1197 } 1198 1199 bool 1200 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 1201 SDValue &OffImm) { 1202 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm); 1203 } 1204 1205 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, 1206 SDValue &Base, SDValue &OffImm) { 1207 if (N.getOpcode() == ISD::FrameIndex) { 1208 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1209 // Only multiples of 4 are allowed for the offset, so the frame object 1210 // alignment must be at least 4. 1211 MachineFrameInfo &MFI = MF->getFrameInfo(); 1212 if (MFI.getObjectAlign(FI) < Align(4)) 1213 MFI.setObjectAlignment(FI, Align(4)); 1214 Base = CurDAG->getTargetFrameIndex( 1215 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1216 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1217 return true; 1218 } 1219 1220 if (!CurDAG->isBaseWithConstantOffset(N)) 1221 return false; 1222 1223 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) { 1224 // If the RHS is + imm8 * scale, fold into addr mode. 1225 int RHSC; 1226 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) { 1227 Base = N.getOperand(0); 1228 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1229 // Make sure the offset is inside the object, or we might fail to 1230 // allocate an emergency spill slot. (An out-of-range access is UB, but 1231 // it could show up anyway.) 1232 MachineFrameInfo &MFI = MF->getFrameInfo(); 1233 if (RHSC * 4 < MFI.getObjectSize(FI)) { 1234 // For LHS+RHS to result in an offset that's a multiple of 4 the object 1235 // indexed by the LHS must be 4-byte aligned. 1236 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4)) 1237 MFI.setObjectAlignment(FI, Align(4)); 1238 if (MFI.getObjectAlign(FI) >= Align(4)) { 1239 Base = CurDAG->getTargetFrameIndex( 1240 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1241 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32); 1242 return true; 1243 } 1244 } 1245 } 1246 } 1247 1248 return false; 1249 } 1250 1251 template <unsigned Shift> 1252 bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base, 1253 SDValue &OffImm) { 1254 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1255 int RHSC; 1256 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1257 RHSC)) { 1258 Base = N.getOperand(0); 1259 if (N.getOpcode() == ISD::SUB) 1260 RHSC = -RHSC; 1261 OffImm = CurDAG->getSignedTargetConstant(RHSC * (1 << Shift), SDLoc(N), 1262 MVT::i32); 1263 return true; 1264 } 1265 } 1266 1267 // Base only. 1268 Base = N; 1269 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1270 return true; 1271 } 1272 1273 1274 //===----------------------------------------------------------------------===// 1275 // Thumb 2 Addressing Modes 1276 //===----------------------------------------------------------------------===// 1277 1278 1279 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, 1280 SDValue &Base, SDValue &OffImm) { 1281 // Match simple R + imm12 operands. 1282 1283 // Base only. 1284 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1285 !CurDAG->isBaseWithConstantOffset(N)) { 1286 if (N.getOpcode() == ISD::FrameIndex) { 1287 // Match frame index. 1288 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1289 Base = CurDAG->getTargetFrameIndex( 1290 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1291 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1292 return true; 1293 } 1294 1295 if (N.getOpcode() == ARMISD::Wrapper && 1296 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1297 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1298 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1299 Base = N.getOperand(0); 1300 if (Base.getOpcode() == ISD::TargetConstantPool) 1301 return false; // We want to select t2LDRpci instead. 1302 } else 1303 Base = N; 1304 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1305 return true; 1306 } 1307 1308 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1309 if (SelectT2AddrModeImm8(N, Base, OffImm)) 1310 // Let t2LDRi8 handle (R - imm8). 1311 return false; 1312 1313 int RHSC = (int)RHS->getZExtValue(); 1314 if (N.getOpcode() == ISD::SUB) 1315 RHSC = -RHSC; 1316 1317 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) 1318 Base = N.getOperand(0); 1319 if (Base.getOpcode() == ISD::FrameIndex) { 1320 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1321 Base = CurDAG->getTargetFrameIndex( 1322 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1323 } 1324 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32); 1325 return true; 1326 } 1327 } 1328 1329 // Base only. 1330 Base = N; 1331 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1332 return true; 1333 } 1334 1335 template <unsigned Shift> 1336 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base, 1337 SDValue &OffImm) { 1338 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1339 int RHSC; 1340 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) { 1341 Base = N.getOperand(0); 1342 if (Base.getOpcode() == ISD::FrameIndex) { 1343 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1344 Base = CurDAG->getTargetFrameIndex( 1345 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1346 } 1347 1348 if (N.getOpcode() == ISD::SUB) 1349 RHSC = -RHSC; 1350 OffImm = CurDAG->getSignedTargetConstant(RHSC * (1 << Shift), SDLoc(N), 1351 MVT::i32); 1352 return true; 1353 } 1354 } 1355 1356 // Base only. 1357 Base = N; 1358 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1359 return true; 1360 } 1361 1362 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, 1363 SDValue &Base, SDValue &OffImm) { 1364 // Match simple R - imm8 operands. 1365 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1366 !CurDAG->isBaseWithConstantOffset(N)) 1367 return false; 1368 1369 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1370 int RHSC = (int)RHS->getSExtValue(); 1371 if (N.getOpcode() == ISD::SUB) 1372 RHSC = -RHSC; 1373 1374 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative) 1375 Base = N.getOperand(0); 1376 if (Base.getOpcode() == ISD::FrameIndex) { 1377 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1378 Base = CurDAG->getTargetFrameIndex( 1379 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1380 } 1381 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32); 1382 return true; 1383 } 1384 } 1385 1386 return false; 1387 } 1388 1389 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 1390 SDValue &OffImm){ 1391 unsigned Opcode = Op->getOpcode(); 1392 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1393 ? cast<LoadSDNode>(Op)->getAddressingMode() 1394 : cast<StoreSDNode>(Op)->getAddressingMode(); 1395 int RHSC; 1396 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits. 1397 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1398 ? CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32) 1399 : CurDAG->getSignedTargetConstant(-RHSC, SDLoc(N), MVT::i32); 1400 return true; 1401 } 1402 1403 return false; 1404 } 1405 1406 template <unsigned Shift> 1407 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base, 1408 SDValue &OffImm) { 1409 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1410 int RHSC; 1411 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1412 RHSC)) { 1413 Base = N.getOperand(0); 1414 if (Base.getOpcode() == ISD::FrameIndex) { 1415 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1416 Base = CurDAG->getTargetFrameIndex( 1417 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1418 } 1419 1420 if (N.getOpcode() == ISD::SUB) 1421 RHSC = -RHSC; 1422 OffImm = CurDAG->getSignedTargetConstant(RHSC * (1 << Shift), SDLoc(N), 1423 MVT::i32); 1424 return true; 1425 } 1426 } 1427 1428 // Base only. 1429 Base = N; 1430 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1431 return true; 1432 } 1433 1434 template <unsigned Shift> 1435 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1436 SDValue &OffImm) { 1437 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift); 1438 } 1439 1440 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1441 SDValue &OffImm, 1442 unsigned Shift) { 1443 unsigned Opcode = Op->getOpcode(); 1444 ISD::MemIndexedMode AM; 1445 switch (Opcode) { 1446 case ISD::LOAD: 1447 AM = cast<LoadSDNode>(Op)->getAddressingMode(); 1448 break; 1449 case ISD::STORE: 1450 AM = cast<StoreSDNode>(Op)->getAddressingMode(); 1451 break; 1452 case ISD::MLOAD: 1453 AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode(); 1454 break; 1455 case ISD::MSTORE: 1456 AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode(); 1457 break; 1458 default: 1459 llvm_unreachable("Unexpected Opcode for Imm7Offset"); 1460 } 1461 1462 int RHSC; 1463 // 7 bit constant, shifted by Shift. 1464 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { 1465 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1466 ? CurDAG->getSignedTargetConstant(RHSC * (1 << Shift), 1467 SDLoc(N), MVT::i32) 1468 : CurDAG->getSignedTargetConstant(-RHSC * (1 << Shift), 1469 SDLoc(N), MVT::i32); 1470 return true; 1471 } 1472 return false; 1473 } 1474 1475 template <int Min, int Max> 1476 bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) { 1477 int Val; 1478 if (isScaledConstantInRange(N, 1, Min, Max, Val)) { 1479 OffImm = CurDAG->getSignedTargetConstant(Val, SDLoc(N), MVT::i32); 1480 return true; 1481 } 1482 return false; 1483 } 1484 1485 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, 1486 SDValue &Base, 1487 SDValue &OffReg, SDValue &ShImm) { 1488 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. 1489 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) 1490 return false; 1491 1492 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8. 1493 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1494 int RHSC = (int)RHS->getZExtValue(); 1495 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned) 1496 return false; 1497 else if (RHSC < 0 && RHSC >= -255) // 8 bits 1498 return false; 1499 } 1500 1501 // Look for (R + R) or (R + (R << [1,2,3])). 1502 unsigned ShAmt = 0; 1503 Base = N.getOperand(0); 1504 OffReg = N.getOperand(1); 1505 1506 // Swap if it is ((R << c) + R). 1507 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode()); 1508 if (ShOpcVal != ARM_AM::lsl) { 1509 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode()); 1510 if (ShOpcVal == ARM_AM::lsl) 1511 std::swap(Base, OffReg); 1512 } 1513 1514 if (ShOpcVal == ARM_AM::lsl) { 1515 // Check to see if the RHS of the shift is a constant, if not, we can't fold 1516 // it. 1517 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) { 1518 ShAmt = Sh->getZExtValue(); 1519 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt)) 1520 OffReg = OffReg.getOperand(0); 1521 else { 1522 ShAmt = 0; 1523 } 1524 } 1525 } 1526 1527 // If OffReg is a multiply-by-constant and it's profitable to extract a shift 1528 // and use it in a shifted operand do so. 1529 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) { 1530 unsigned PowerOfTwo = 0; 1531 SDValue NewMulConst; 1532 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { 1533 HandleSDNode Handle(OffReg); 1534 replaceDAGValue(OffReg.getOperand(1), NewMulConst); 1535 OffReg = Handle.getValue(); 1536 ShAmt = PowerOfTwo; 1537 } 1538 } 1539 1540 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32); 1541 1542 return true; 1543 } 1544 1545 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, 1546 SDValue &OffImm) { 1547 // This *must* succeed since it's used for the irreplaceable ldrex and strex 1548 // instructions. 1549 Base = N; 1550 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1551 1552 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N)) 1553 return true; 1554 1555 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1556 if (!RHS) 1557 return true; 1558 1559 uint32_t RHSC = (int)RHS->getZExtValue(); 1560 if (RHSC > 1020 || RHSC % 4 != 0) 1561 return true; 1562 1563 Base = N.getOperand(0); 1564 if (Base.getOpcode() == ISD::FrameIndex) { 1565 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1566 Base = CurDAG->getTargetFrameIndex( 1567 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1568 } 1569 1570 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32); 1571 return true; 1572 } 1573 1574 //===--------------------------------------------------------------------===// 1575 1576 /// getAL - Returns a ARMCC::AL immediate node. 1577 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) { 1578 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32); 1579 } 1580 1581 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { 1582 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 1583 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); 1584 } 1585 1586 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) { 1587 LoadSDNode *LD = cast<LoadSDNode>(N); 1588 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1589 if (AM == ISD::UNINDEXED) 1590 return false; 1591 1592 EVT LoadedVT = LD->getMemoryVT(); 1593 SDValue Offset, AMOpc; 1594 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1595 unsigned Opcode = 0; 1596 bool Match = false; 1597 if (LoadedVT == MVT::i32 && isPre && 1598 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1599 Opcode = ARM::LDR_PRE_IMM; 1600 Match = true; 1601 } else if (LoadedVT == MVT::i32 && !isPre && 1602 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1603 Opcode = ARM::LDR_POST_IMM; 1604 Match = true; 1605 } else if (LoadedVT == MVT::i32 && 1606 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1607 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG; 1608 Match = true; 1609 1610 } else if (LoadedVT == MVT::i16 && 1611 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1612 Match = true; 1613 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) 1614 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) 1615 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); 1616 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { 1617 if (LD->getExtensionType() == ISD::SEXTLOAD) { 1618 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1619 Match = true; 1620 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; 1621 } 1622 } else { 1623 if (isPre && 1624 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1625 Match = true; 1626 Opcode = ARM::LDRB_PRE_IMM; 1627 } else if (!isPre && 1628 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1629 Match = true; 1630 Opcode = ARM::LDRB_POST_IMM; 1631 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1632 Match = true; 1633 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG; 1634 } 1635 } 1636 } 1637 1638 if (Match) { 1639 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) { 1640 SDValue Chain = LD->getChain(); 1641 SDValue Base = LD->getBasePtr(); 1642 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)), 1643 CurDAG->getRegister(0, MVT::i32), Chain }; 1644 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1645 MVT::Other, Ops); 1646 transferMemOperands(N, New); 1647 ReplaceNode(N, New); 1648 return true; 1649 } else { 1650 SDValue Chain = LD->getChain(); 1651 SDValue Base = LD->getBasePtr(); 1652 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)), 1653 CurDAG->getRegister(0, MVT::i32), Chain }; 1654 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1655 MVT::Other, Ops); 1656 transferMemOperands(N, New); 1657 ReplaceNode(N, New); 1658 return true; 1659 } 1660 } 1661 1662 return false; 1663 } 1664 1665 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) { 1666 LoadSDNode *LD = cast<LoadSDNode>(N); 1667 EVT LoadedVT = LD->getMemoryVT(); 1668 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1669 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD || 1670 LoadedVT.getSimpleVT().SimpleTy != MVT::i32) 1671 return false; 1672 1673 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset()); 1674 if (!COffs || COffs->getZExtValue() != 4) 1675 return false; 1676 1677 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}. 1678 // The encoding of LDM is not how the rest of ISel expects a post-inc load to 1679 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after 1680 // ISel. 1681 SDValue Chain = LD->getChain(); 1682 SDValue Base = LD->getBasePtr(); 1683 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)), 1684 CurDAG->getRegister(0, MVT::i32), Chain }; 1685 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, 1686 MVT::i32, MVT::Other, Ops); 1687 transferMemOperands(N, New); 1688 ReplaceNode(N, New); 1689 return true; 1690 } 1691 1692 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { 1693 LoadSDNode *LD = cast<LoadSDNode>(N); 1694 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1695 if (AM == ISD::UNINDEXED) 1696 return false; 1697 1698 EVT LoadedVT = LD->getMemoryVT(); 1699 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1700 SDValue Offset; 1701 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1702 unsigned Opcode = 0; 1703 bool Match = false; 1704 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) { 1705 switch (LoadedVT.getSimpleVT().SimpleTy) { 1706 case MVT::i32: 1707 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; 1708 break; 1709 case MVT::i16: 1710 if (isSExtLd) 1711 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST; 1712 else 1713 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST; 1714 break; 1715 case MVT::i8: 1716 case MVT::i1: 1717 if (isSExtLd) 1718 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST; 1719 else 1720 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; 1721 break; 1722 default: 1723 return false; 1724 } 1725 Match = true; 1726 } 1727 1728 if (Match) { 1729 SDValue Chain = LD->getChain(); 1730 SDValue Base = LD->getBasePtr(); 1731 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)), 1732 CurDAG->getRegister(0, MVT::i32), Chain }; 1733 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1734 MVT::Other, Ops); 1735 transferMemOperands(N, New); 1736 ReplaceNode(N, New); 1737 return true; 1738 } 1739 1740 return false; 1741 } 1742 1743 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) { 1744 EVT LoadedVT; 1745 unsigned Opcode = 0; 1746 bool isSExtLd, isPre; 1747 Align Alignment; 1748 ARMVCC::VPTCodes Pred; 1749 SDValue PredReg; 1750 SDValue Chain, Base, Offset; 1751 1752 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1753 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1754 if (AM == ISD::UNINDEXED) 1755 return false; 1756 LoadedVT = LD->getMemoryVT(); 1757 if (!LoadedVT.isVector()) 1758 return false; 1759 1760 Chain = LD->getChain(); 1761 Base = LD->getBasePtr(); 1762 Offset = LD->getOffset(); 1763 Alignment = LD->getAlign(); 1764 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1765 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1766 Pred = ARMVCC::None; 1767 PredReg = CurDAG->getRegister(0, MVT::i32); 1768 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) { 1769 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1770 if (AM == ISD::UNINDEXED) 1771 return false; 1772 LoadedVT = LD->getMemoryVT(); 1773 if (!LoadedVT.isVector()) 1774 return false; 1775 1776 Chain = LD->getChain(); 1777 Base = LD->getBasePtr(); 1778 Offset = LD->getOffset(); 1779 Alignment = LD->getAlign(); 1780 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1781 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1782 Pred = ARMVCC::Then; 1783 PredReg = LD->getMask(); 1784 } else 1785 llvm_unreachable("Expected a Load or a Masked Load!"); 1786 1787 // We allow LE non-masked loads to change the type (for example use a vldrb.8 1788 // as opposed to a vldrw.32). This can allow extra addressing modes or 1789 // alignments for what is otherwise an equivalent instruction. 1790 bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N); 1791 1792 SDValue NewOffset; 1793 if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 && 1794 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) { 1795 if (isSExtLd) 1796 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post; 1797 else 1798 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post; 1799 } else if (LoadedVT == MVT::v8i8 && 1800 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1801 if (isSExtLd) 1802 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post; 1803 else 1804 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post; 1805 } else if (LoadedVT == MVT::v4i8 && 1806 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1807 if (isSExtLd) 1808 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post; 1809 else 1810 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post; 1811 } else if (Alignment >= Align(4) && 1812 (CanChangeType || LoadedVT == MVT::v4i32 || 1813 LoadedVT == MVT::v4f32) && 1814 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2)) 1815 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post; 1816 else if (Alignment >= Align(2) && 1817 (CanChangeType || LoadedVT == MVT::v8i16 || 1818 LoadedVT == MVT::v8f16) && 1819 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) 1820 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post; 1821 else if ((CanChangeType || LoadedVT == MVT::v16i8) && 1822 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) 1823 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post; 1824 else 1825 return false; 1826 1827 SDValue Ops[] = {Base, 1828 NewOffset, 1829 CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), 1830 PredReg, 1831 CurDAG->getRegister(0, MVT::i32), // tp_reg 1832 Chain}; 1833 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, 1834 N->getValueType(0), MVT::Other, Ops); 1835 transferMemOperands(N, New); 1836 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 1837 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 1838 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 1839 CurDAG->RemoveDeadNode(N); 1840 return true; 1841 } 1842 1843 /// Form a GPRPair pseudo register from a pair of GPR regs. 1844 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { 1845 SDLoc dl(V0.getNode()); 1846 SDValue RegClass = 1847 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); 1848 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 1849 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 1850 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1851 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1852 } 1853 1854 /// Form a D register from a pair of S registers. 1855 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1856 SDLoc dl(V0.getNode()); 1857 SDValue RegClass = 1858 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32); 1859 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1860 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1861 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1862 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1863 } 1864 1865 /// Form a quad register from a pair of D registers. 1866 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1867 SDLoc dl(V0.getNode()); 1868 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl, 1869 MVT::i32); 1870 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1871 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1872 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1873 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1874 } 1875 1876 /// Form 4 consecutive D registers from a pair of Q registers. 1877 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1878 SDLoc dl(V0.getNode()); 1879 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1880 MVT::i32); 1881 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1882 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1883 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1884 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1885 } 1886 1887 /// Form 4 consecutive S registers. 1888 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, 1889 SDValue V2, SDValue V3) { 1890 SDLoc dl(V0.getNode()); 1891 SDValue RegClass = 1892 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32); 1893 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1894 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1895 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32); 1896 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32); 1897 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1898 V2, SubReg2, V3, SubReg3 }; 1899 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1900 } 1901 1902 /// Form 4 consecutive D registers. 1903 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, 1904 SDValue V2, SDValue V3) { 1905 SDLoc dl(V0.getNode()); 1906 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1907 MVT::i32); 1908 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1909 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1910 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32); 1911 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32); 1912 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1913 V2, SubReg2, V3, SubReg3 }; 1914 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1915 } 1916 1917 /// Form 4 consecutive Q registers. 1918 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, 1919 SDValue V2, SDValue V3) { 1920 SDLoc dl(V0.getNode()); 1921 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl, 1922 MVT::i32); 1923 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1924 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1925 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32); 1926 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32); 1927 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1928 V2, SubReg2, V3, SubReg3 }; 1929 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1930 } 1931 1932 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand 1933 /// of a NEON VLD or VST instruction. The supported values depend on the 1934 /// number of registers being loaded. 1935 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl, 1936 unsigned NumVecs, bool is64BitVector) { 1937 unsigned NumRegs = NumVecs; 1938 if (!is64BitVector && NumVecs < 3) 1939 NumRegs *= 2; 1940 1941 unsigned Alignment = Align->getAsZExtVal(); 1942 if (Alignment >= 32 && NumRegs == 4) 1943 Alignment = 32; 1944 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4)) 1945 Alignment = 16; 1946 else if (Alignment >= 8) 1947 Alignment = 8; 1948 else 1949 Alignment = 0; 1950 1951 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 1952 } 1953 1954 static bool isVLDfixed(unsigned Opc) 1955 { 1956 switch (Opc) { 1957 default: return false; 1958 case ARM::VLD1d8wb_fixed : return true; 1959 case ARM::VLD1d16wb_fixed : return true; 1960 case ARM::VLD1d64Qwb_fixed : return true; 1961 case ARM::VLD1d32wb_fixed : return true; 1962 case ARM::VLD1d64wb_fixed : return true; 1963 case ARM::VLD1d8TPseudoWB_fixed : return true; 1964 case ARM::VLD1d16TPseudoWB_fixed : return true; 1965 case ARM::VLD1d32TPseudoWB_fixed : return true; 1966 case ARM::VLD1d64TPseudoWB_fixed : return true; 1967 case ARM::VLD1d8QPseudoWB_fixed : return true; 1968 case ARM::VLD1d16QPseudoWB_fixed : return true; 1969 case ARM::VLD1d32QPseudoWB_fixed : return true; 1970 case ARM::VLD1d64QPseudoWB_fixed : return true; 1971 case ARM::VLD1q8wb_fixed : return true; 1972 case ARM::VLD1q16wb_fixed : return true; 1973 case ARM::VLD1q32wb_fixed : return true; 1974 case ARM::VLD1q64wb_fixed : return true; 1975 case ARM::VLD1DUPd8wb_fixed : return true; 1976 case ARM::VLD1DUPd16wb_fixed : return true; 1977 case ARM::VLD1DUPd32wb_fixed : return true; 1978 case ARM::VLD1DUPq8wb_fixed : return true; 1979 case ARM::VLD1DUPq16wb_fixed : return true; 1980 case ARM::VLD1DUPq32wb_fixed : return true; 1981 case ARM::VLD2d8wb_fixed : return true; 1982 case ARM::VLD2d16wb_fixed : return true; 1983 case ARM::VLD2d32wb_fixed : return true; 1984 case ARM::VLD2q8PseudoWB_fixed : return true; 1985 case ARM::VLD2q16PseudoWB_fixed : return true; 1986 case ARM::VLD2q32PseudoWB_fixed : return true; 1987 case ARM::VLD2DUPd8wb_fixed : return true; 1988 case ARM::VLD2DUPd16wb_fixed : return true; 1989 case ARM::VLD2DUPd32wb_fixed : return true; 1990 case ARM::VLD2DUPq8OddPseudoWB_fixed: return true; 1991 case ARM::VLD2DUPq16OddPseudoWB_fixed: return true; 1992 case ARM::VLD2DUPq32OddPseudoWB_fixed: return true; 1993 } 1994 } 1995 1996 static bool isVSTfixed(unsigned Opc) 1997 { 1998 switch (Opc) { 1999 default: return false; 2000 case ARM::VST1d8wb_fixed : return true; 2001 case ARM::VST1d16wb_fixed : return true; 2002 case ARM::VST1d32wb_fixed : return true; 2003 case ARM::VST1d64wb_fixed : return true; 2004 case ARM::VST1q8wb_fixed : return true; 2005 case ARM::VST1q16wb_fixed : return true; 2006 case ARM::VST1q32wb_fixed : return true; 2007 case ARM::VST1q64wb_fixed : return true; 2008 case ARM::VST1d8TPseudoWB_fixed : return true; 2009 case ARM::VST1d16TPseudoWB_fixed : return true; 2010 case ARM::VST1d32TPseudoWB_fixed : return true; 2011 case ARM::VST1d64TPseudoWB_fixed : return true; 2012 case ARM::VST1d8QPseudoWB_fixed : return true; 2013 case ARM::VST1d16QPseudoWB_fixed : return true; 2014 case ARM::VST1d32QPseudoWB_fixed : return true; 2015 case ARM::VST1d64QPseudoWB_fixed : return true; 2016 case ARM::VST2d8wb_fixed : return true; 2017 case ARM::VST2d16wb_fixed : return true; 2018 case ARM::VST2d32wb_fixed : return true; 2019 case ARM::VST2q8PseudoWB_fixed : return true; 2020 case ARM::VST2q16PseudoWB_fixed : return true; 2021 case ARM::VST2q32PseudoWB_fixed : return true; 2022 } 2023 } 2024 2025 // Get the register stride update opcode of a VLD/VST instruction that 2026 // is otherwise equivalent to the given fixed stride updating instruction. 2027 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { 2028 assert((isVLDfixed(Opc) || isVSTfixed(Opc)) 2029 && "Incorrect fixed stride updating instruction."); 2030 switch (Opc) { 2031 default: break; 2032 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; 2033 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register; 2034 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register; 2035 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register; 2036 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register; 2037 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; 2038 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; 2039 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; 2040 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register; 2041 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; 2042 case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register; 2043 case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register; 2044 case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register; 2045 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; 2046 case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register; 2047 case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register; 2048 case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register; 2049 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register; 2050 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register; 2051 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register; 2052 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register; 2053 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register; 2054 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register; 2055 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register; 2056 case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register; 2057 case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register; 2058 case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register; 2059 2060 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; 2061 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; 2062 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register; 2063 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register; 2064 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register; 2065 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; 2066 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; 2067 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; 2068 case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register; 2069 case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register; 2070 case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register; 2071 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; 2072 case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register; 2073 case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register; 2074 case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register; 2075 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; 2076 2077 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; 2078 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; 2079 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; 2080 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; 2081 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; 2082 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; 2083 2084 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; 2085 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; 2086 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; 2087 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; 2088 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; 2089 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; 2090 2091 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register; 2092 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register; 2093 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register; 2094 } 2095 return Opc; // If not one we handle, return it unchanged. 2096 } 2097 2098 /// Returns true if the given increment is a Constant known to be equal to the 2099 /// access size performed by a NEON load/store. This means the "[rN]!" form can 2100 /// be used. 2101 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) { 2102 auto C = dyn_cast<ConstantSDNode>(Inc); 2103 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs; 2104 } 2105 2106 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 2107 const uint16_t *DOpcodes, 2108 const uint16_t *QOpcodes0, 2109 const uint16_t *QOpcodes1) { 2110 assert(Subtarget->hasNEON()); 2111 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); 2112 SDLoc dl(N); 2113 2114 SDValue MemAddr, Align; 2115 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2116 // nodes are not intrinsics. 2117 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2118 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2119 return; 2120 2121 SDValue Chain = N->getOperand(0); 2122 EVT VT = N->getValueType(0); 2123 bool is64BitVector = VT.is64BitVector(); 2124 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2125 2126 unsigned OpcodeIndex; 2127 switch (VT.getSimpleVT().SimpleTy) { 2128 default: llvm_unreachable("unhandled vld type"); 2129 // Double-register operations: 2130 case MVT::v8i8: OpcodeIndex = 0; break; 2131 case MVT::v4f16: 2132 case MVT::v4bf16: 2133 case MVT::v4i16: OpcodeIndex = 1; break; 2134 case MVT::v2f32: 2135 case MVT::v2i32: OpcodeIndex = 2; break; 2136 case MVT::v1i64: OpcodeIndex = 3; break; 2137 // Quad-register operations: 2138 case MVT::v16i8: OpcodeIndex = 0; break; 2139 case MVT::v8f16: 2140 case MVT::v8bf16: 2141 case MVT::v8i16: OpcodeIndex = 1; break; 2142 case MVT::v4f32: 2143 case MVT::v4i32: OpcodeIndex = 2; break; 2144 case MVT::v2f64: 2145 case MVT::v2i64: OpcodeIndex = 3; break; 2146 } 2147 2148 EVT ResTy; 2149 if (NumVecs == 1) 2150 ResTy = VT; 2151 else { 2152 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2153 if (!is64BitVector) 2154 ResTyElts *= 2; 2155 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 2156 } 2157 std::vector<EVT> ResTys; 2158 ResTys.push_back(ResTy); 2159 if (isUpdating) 2160 ResTys.push_back(MVT::i32); 2161 ResTys.push_back(MVT::Other); 2162 2163 SDValue Pred = getAL(CurDAG, dl); 2164 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2165 SDNode *VLd; 2166 SmallVector<SDValue, 7> Ops; 2167 2168 // Double registers and VLD1/VLD2 quad registers are directly supported. 2169 if (is64BitVector || NumVecs <= 2) { 2170 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2171 QOpcodes0[OpcodeIndex]); 2172 Ops.push_back(MemAddr); 2173 Ops.push_back(Align); 2174 if (isUpdating) { 2175 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2176 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2177 if (!IsImmUpdate) { 2178 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so 2179 // check for the opcode rather than the number of vector elements. 2180 if (isVLDfixed(Opc)) 2181 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2182 Ops.push_back(Inc); 2183 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in 2184 // the operands if not such an opcode. 2185 } else if (!isVLDfixed(Opc)) 2186 Ops.push_back(Reg0); 2187 } 2188 Ops.push_back(Pred); 2189 Ops.push_back(Reg0); 2190 Ops.push_back(Chain); 2191 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2192 2193 } else { 2194 // Otherwise, quad registers are loaded with two separate instructions, 2195 // where one loads the even registers and the other loads the odd registers. 2196 EVT AddrTy = MemAddr.getValueType(); 2197 2198 // Load the even subregs. This is always an updating load, so that it 2199 // provides the address to the second load for the odd subregs. 2200 SDValue ImplDef = 2201 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 2202 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; 2203 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2204 ResTy, AddrTy, MVT::Other, OpsA); 2205 Chain = SDValue(VLdA, 2); 2206 2207 // Load the odd subregs. 2208 Ops.push_back(SDValue(VLdA, 1)); 2209 Ops.push_back(Align); 2210 if (isUpdating) { 2211 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2212 assert(isa<ConstantSDNode>(Inc.getNode()) && 2213 "only constant post-increment update allowed for VLD3/4"); 2214 (void)Inc; 2215 Ops.push_back(Reg0); 2216 } 2217 Ops.push_back(SDValue(VLdA, 0)); 2218 Ops.push_back(Pred); 2219 Ops.push_back(Reg0); 2220 Ops.push_back(Chain); 2221 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops); 2222 } 2223 2224 // Transfer memoperands. 2225 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2226 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp}); 2227 2228 if (NumVecs == 1) { 2229 ReplaceNode(N, VLd); 2230 return; 2231 } 2232 2233 // Extract out the subregisters. 2234 SDValue SuperReg = SDValue(VLd, 0); 2235 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2236 ARM::qsub_3 == ARM::qsub_0 + 3, 2237 "Unexpected subreg numbering"); 2238 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0); 2239 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2240 ReplaceUses(SDValue(N, Vec), 2241 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2242 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); 2243 if (isUpdating) 2244 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); 2245 CurDAG->RemoveDeadNode(N); 2246 } 2247 2248 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 2249 const uint16_t *DOpcodes, 2250 const uint16_t *QOpcodes0, 2251 const uint16_t *QOpcodes1) { 2252 assert(Subtarget->hasNEON()); 2253 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); 2254 SDLoc dl(N); 2255 2256 SDValue MemAddr, Align; 2257 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2258 // nodes are not intrinsics. 2259 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2260 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2261 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2262 return; 2263 2264 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2265 2266 SDValue Chain = N->getOperand(0); 2267 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2268 bool is64BitVector = VT.is64BitVector(); 2269 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2270 2271 unsigned OpcodeIndex; 2272 switch (VT.getSimpleVT().SimpleTy) { 2273 default: llvm_unreachable("unhandled vst type"); 2274 // Double-register operations: 2275 case MVT::v8i8: OpcodeIndex = 0; break; 2276 case MVT::v4f16: 2277 case MVT::v4bf16: 2278 case MVT::v4i16: OpcodeIndex = 1; break; 2279 case MVT::v2f32: 2280 case MVT::v2i32: OpcodeIndex = 2; break; 2281 case MVT::v1i64: OpcodeIndex = 3; break; 2282 // Quad-register operations: 2283 case MVT::v16i8: OpcodeIndex = 0; break; 2284 case MVT::v8f16: 2285 case MVT::v8bf16: 2286 case MVT::v8i16: OpcodeIndex = 1; break; 2287 case MVT::v4f32: 2288 case MVT::v4i32: OpcodeIndex = 2; break; 2289 case MVT::v2f64: 2290 case MVT::v2i64: OpcodeIndex = 3; break; 2291 } 2292 2293 std::vector<EVT> ResTys; 2294 if (isUpdating) 2295 ResTys.push_back(MVT::i32); 2296 ResTys.push_back(MVT::Other); 2297 2298 SDValue Pred = getAL(CurDAG, dl); 2299 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2300 SmallVector<SDValue, 7> Ops; 2301 2302 // Double registers and VST1/VST2 quad registers are directly supported. 2303 if (is64BitVector || NumVecs <= 2) { 2304 SDValue SrcReg; 2305 if (NumVecs == 1) { 2306 SrcReg = N->getOperand(Vec0Idx); 2307 } else if (is64BitVector) { 2308 // Form a REG_SEQUENCE to force register allocation. 2309 SDValue V0 = N->getOperand(Vec0Idx + 0); 2310 SDValue V1 = N->getOperand(Vec0Idx + 1); 2311 if (NumVecs == 2) 2312 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2313 else { 2314 SDValue V2 = N->getOperand(Vec0Idx + 2); 2315 // If it's a vst3, form a quad D-register and leave the last part as 2316 // an undef. 2317 SDValue V3 = (NumVecs == 3) 2318 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) 2319 : N->getOperand(Vec0Idx + 3); 2320 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2321 } 2322 } else { 2323 // Form a QQ register. 2324 SDValue Q0 = N->getOperand(Vec0Idx); 2325 SDValue Q1 = N->getOperand(Vec0Idx + 1); 2326 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0); 2327 } 2328 2329 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2330 QOpcodes0[OpcodeIndex]); 2331 Ops.push_back(MemAddr); 2332 Ops.push_back(Align); 2333 if (isUpdating) { 2334 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2335 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2336 if (!IsImmUpdate) { 2337 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so 2338 // check for the opcode rather than the number of vector elements. 2339 if (isVSTfixed(Opc)) 2340 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2341 Ops.push_back(Inc); 2342 } 2343 // VST1/VST2 fixed increment does not need Reg0 so only include it in 2344 // the operands if not such an opcode. 2345 else if (!isVSTfixed(Opc)) 2346 Ops.push_back(Reg0); 2347 } 2348 Ops.push_back(SrcReg); 2349 Ops.push_back(Pred); 2350 Ops.push_back(Reg0); 2351 Ops.push_back(Chain); 2352 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2353 2354 // Transfer memoperands. 2355 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp}); 2356 2357 ReplaceNode(N, VSt); 2358 return; 2359 } 2360 2361 // Otherwise, quad registers are stored with two separate instructions, 2362 // where one stores the even registers and the other stores the odd registers. 2363 2364 // Form the QQQQ REG_SEQUENCE. 2365 SDValue V0 = N->getOperand(Vec0Idx + 0); 2366 SDValue V1 = N->getOperand(Vec0Idx + 1); 2367 SDValue V2 = N->getOperand(Vec0Idx + 2); 2368 SDValue V3 = (NumVecs == 3) 2369 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2370 : N->getOperand(Vec0Idx + 3); 2371 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2372 2373 // Store the even D registers. This is always an updating store, so that it 2374 // provides the address to the second store for the odd subregs. 2375 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain }; 2376 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2377 MemAddr.getValueType(), 2378 MVT::Other, OpsA); 2379 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp}); 2380 Chain = SDValue(VStA, 1); 2381 2382 // Store the odd D registers. 2383 Ops.push_back(SDValue(VStA, 0)); 2384 Ops.push_back(Align); 2385 if (isUpdating) { 2386 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2387 assert(isa<ConstantSDNode>(Inc.getNode()) && 2388 "only constant post-increment update allowed for VST3/4"); 2389 (void)Inc; 2390 Ops.push_back(Reg0); 2391 } 2392 Ops.push_back(RegSeq); 2393 Ops.push_back(Pred); 2394 Ops.push_back(Reg0); 2395 Ops.push_back(Chain); 2396 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, 2397 Ops); 2398 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp}); 2399 ReplaceNode(N, VStB); 2400 } 2401 2402 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 2403 unsigned NumVecs, 2404 const uint16_t *DOpcodes, 2405 const uint16_t *QOpcodes) { 2406 assert(Subtarget->hasNEON()); 2407 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); 2408 SDLoc dl(N); 2409 2410 SDValue MemAddr, Align; 2411 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2412 // nodes are not intrinsics. 2413 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2414 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2415 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2416 return; 2417 2418 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2419 2420 SDValue Chain = N->getOperand(0); 2421 unsigned Lane = N->getConstantOperandVal(Vec0Idx + NumVecs); 2422 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2423 bool is64BitVector = VT.is64BitVector(); 2424 2425 unsigned Alignment = 0; 2426 if (NumVecs != 3) { 2427 Alignment = Align->getAsZExtVal(); 2428 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2429 if (Alignment > NumBytes) 2430 Alignment = NumBytes; 2431 if (Alignment < 8 && Alignment < NumBytes) 2432 Alignment = 0; 2433 // Alignment must be a power of two; make sure of that. 2434 Alignment = (Alignment & -Alignment); 2435 if (Alignment == 1) 2436 Alignment = 0; 2437 } 2438 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2439 2440 unsigned OpcodeIndex; 2441 switch (VT.getSimpleVT().SimpleTy) { 2442 default: llvm_unreachable("unhandled vld/vst lane type"); 2443 // Double-register operations: 2444 case MVT::v8i8: OpcodeIndex = 0; break; 2445 case MVT::v4f16: 2446 case MVT::v4bf16: 2447 case MVT::v4i16: OpcodeIndex = 1; break; 2448 case MVT::v2f32: 2449 case MVT::v2i32: OpcodeIndex = 2; break; 2450 // Quad-register operations: 2451 case MVT::v8f16: 2452 case MVT::v8bf16: 2453 case MVT::v8i16: OpcodeIndex = 0; break; 2454 case MVT::v4f32: 2455 case MVT::v4i32: OpcodeIndex = 1; break; 2456 } 2457 2458 std::vector<EVT> ResTys; 2459 if (IsLoad) { 2460 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2461 if (!is64BitVector) 2462 ResTyElts *= 2; 2463 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), 2464 MVT::i64, ResTyElts)); 2465 } 2466 if (isUpdating) 2467 ResTys.push_back(MVT::i32); 2468 ResTys.push_back(MVT::Other); 2469 2470 SDValue Pred = getAL(CurDAG, dl); 2471 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2472 2473 SmallVector<SDValue, 8> Ops; 2474 Ops.push_back(MemAddr); 2475 Ops.push_back(Align); 2476 if (isUpdating) { 2477 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2478 bool IsImmUpdate = 2479 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2480 Ops.push_back(IsImmUpdate ? Reg0 : Inc); 2481 } 2482 2483 SDValue SuperReg; 2484 SDValue V0 = N->getOperand(Vec0Idx + 0); 2485 SDValue V1 = N->getOperand(Vec0Idx + 1); 2486 if (NumVecs == 2) { 2487 if (is64BitVector) 2488 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2489 else 2490 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0); 2491 } else { 2492 SDValue V2 = N->getOperand(Vec0Idx + 2); 2493 SDValue V3 = (NumVecs == 3) 2494 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2495 : N->getOperand(Vec0Idx + 3); 2496 if (is64BitVector) 2497 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2498 else 2499 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2500 } 2501 Ops.push_back(SuperReg); 2502 Ops.push_back(getI32Imm(Lane, dl)); 2503 Ops.push_back(Pred); 2504 Ops.push_back(Reg0); 2505 Ops.push_back(Chain); 2506 2507 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2508 QOpcodes[OpcodeIndex]); 2509 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2510 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp}); 2511 if (!IsLoad) { 2512 ReplaceNode(N, VLdLn); 2513 return; 2514 } 2515 2516 // Extract the subregisters. 2517 SuperReg = SDValue(VLdLn, 0); 2518 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2519 ARM::qsub_3 == ARM::qsub_0 + 3, 2520 "Unexpected subreg numbering"); 2521 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2522 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2523 ReplaceUses(SDValue(N, Vec), 2524 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2525 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); 2526 if (isUpdating) 2527 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); 2528 CurDAG->RemoveDeadNode(N); 2529 } 2530 2531 template <typename SDValueVector> 2532 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2533 SDValue PredicateMask) { 2534 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2535 Ops.push_back(PredicateMask); 2536 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg 2537 } 2538 2539 template <typename SDValueVector> 2540 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2541 SDValue PredicateMask, 2542 SDValue Inactive) { 2543 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2544 Ops.push_back(PredicateMask); 2545 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg 2546 Ops.push_back(Inactive); 2547 } 2548 2549 template <typename SDValueVector> 2550 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) { 2551 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2552 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2553 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg 2554 } 2555 2556 template <typename SDValueVector> 2557 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2558 EVT InactiveTy) { 2559 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2560 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2561 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg 2562 Ops.push_back(SDValue( 2563 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0)); 2564 } 2565 2566 void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, 2567 bool Predicated) { 2568 SDLoc Loc(N); 2569 SmallVector<SDValue, 8> Ops; 2570 2571 uint16_t Opcode; 2572 switch (N->getValueType(1).getVectorElementType().getSizeInBits()) { 2573 case 32: 2574 Opcode = Opcodes[0]; 2575 break; 2576 case 64: 2577 Opcode = Opcodes[1]; 2578 break; 2579 default: 2580 llvm_unreachable("bad vector element size in SelectMVE_WB"); 2581 } 2582 2583 Ops.push_back(N->getOperand(2)); // vector of base addresses 2584 2585 int32_t ImmValue = N->getConstantOperandVal(3); 2586 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset 2587 2588 if (Predicated) 2589 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2590 else 2591 AddEmptyMVEPredicateToOps(Ops, Loc); 2592 2593 Ops.push_back(N->getOperand(0)); // chain 2594 2595 SmallVector<EVT, 8> VTs; 2596 VTs.push_back(N->getValueType(1)); 2597 VTs.push_back(N->getValueType(0)); 2598 VTs.push_back(N->getValueType(2)); 2599 2600 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops); 2601 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 2602 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 2603 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 2604 transferMemOperands(N, New); 2605 CurDAG->RemoveDeadNode(N); 2606 } 2607 2608 void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode, 2609 bool Immediate, 2610 bool HasSaturationOperand) { 2611 SDLoc Loc(N); 2612 SmallVector<SDValue, 8> Ops; 2613 2614 // Two 32-bit halves of the value to be shifted 2615 Ops.push_back(N->getOperand(1)); 2616 Ops.push_back(N->getOperand(2)); 2617 2618 // The shift count 2619 if (Immediate) { 2620 int32_t ImmValue = N->getConstantOperandVal(3); 2621 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2622 } else { 2623 Ops.push_back(N->getOperand(3)); 2624 } 2625 2626 // The immediate saturation operand, if any 2627 if (HasSaturationOperand) { 2628 int32_t SatOp = N->getConstantOperandVal(4); 2629 int SatBit = (SatOp == 64 ? 0 : 1); 2630 Ops.push_back(getI32Imm(SatBit, Loc)); 2631 } 2632 2633 // MVE scalar shifts are IT-predicable, so include the standard 2634 // predicate arguments. 2635 Ops.push_back(getAL(CurDAG, Loc)); 2636 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2637 2638 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops)); 2639 } 2640 2641 void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 2642 uint16_t OpcodeWithNoCarry, 2643 bool Add, bool Predicated) { 2644 SDLoc Loc(N); 2645 SmallVector<SDValue, 8> Ops; 2646 uint16_t Opcode; 2647 2648 unsigned FirstInputOp = Predicated ? 2 : 1; 2649 2650 // Two input vectors and the input carry flag 2651 Ops.push_back(N->getOperand(FirstInputOp)); 2652 Ops.push_back(N->getOperand(FirstInputOp + 1)); 2653 SDValue CarryIn = N->getOperand(FirstInputOp + 2); 2654 ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn); 2655 uint32_t CarryMask = 1 << 29; 2656 uint32_t CarryExpected = Add ? 0 : CarryMask; 2657 if (CarryInConstant && 2658 (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) { 2659 Opcode = OpcodeWithNoCarry; 2660 } else { 2661 Ops.push_back(CarryIn); 2662 Opcode = OpcodeWithCarry; 2663 } 2664 2665 if (Predicated) 2666 AddMVEPredicateToOps(Ops, Loc, 2667 N->getOperand(FirstInputOp + 3), // predicate 2668 N->getOperand(FirstInputOp - 1)); // inactive 2669 else 2670 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2671 2672 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops)); 2673 } 2674 2675 void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) { 2676 SDLoc Loc(N); 2677 SmallVector<SDValue, 8> Ops; 2678 2679 // One vector input, followed by a 32-bit word of bits to shift in 2680 // and then an immediate shift count 2681 Ops.push_back(N->getOperand(1)); 2682 Ops.push_back(N->getOperand(2)); 2683 int32_t ImmValue = N->getConstantOperandVal(3); 2684 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2685 2686 if (Predicated) 2687 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2688 else 2689 AddEmptyMVEPredicateToOps(Ops, Loc); 2690 2691 CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), ArrayRef(Ops)); 2692 } 2693 2694 static bool SDValueToConstBool(SDValue SDVal) { 2695 assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant"); 2696 ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal); 2697 uint64_t Value = SDValConstant->getZExtValue(); 2698 assert((Value == 0 || Value == 1) && "expected value 0 or 1"); 2699 return Value; 2700 } 2701 2702 void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 2703 const uint16_t *OpcodesS, 2704 const uint16_t *OpcodesU, 2705 size_t Stride, size_t TySize) { 2706 assert(TySize < Stride && "Invalid TySize"); 2707 bool IsUnsigned = SDValueToConstBool(N->getOperand(1)); 2708 bool IsSub = SDValueToConstBool(N->getOperand(2)); 2709 bool IsExchange = SDValueToConstBool(N->getOperand(3)); 2710 if (IsUnsigned) { 2711 assert(!IsSub && 2712 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist"); 2713 assert(!IsExchange && 2714 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist"); 2715 } 2716 2717 auto OpIsZero = [N](size_t OpNo) { 2718 return isNullConstant(N->getOperand(OpNo)); 2719 }; 2720 2721 // If the input accumulator value is not zero, select an instruction with 2722 // accumulator, otherwise select an instruction without accumulator 2723 bool IsAccum = !(OpIsZero(4) && OpIsZero(5)); 2724 2725 const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS; 2726 if (IsSub) 2727 Opcodes += 4 * Stride; 2728 if (IsExchange) 2729 Opcodes += 2 * Stride; 2730 if (IsAccum) 2731 Opcodes += Stride; 2732 uint16_t Opcode = Opcodes[TySize]; 2733 2734 SDLoc Loc(N); 2735 SmallVector<SDValue, 8> Ops; 2736 // Push the accumulator operands, if they are used 2737 if (IsAccum) { 2738 Ops.push_back(N->getOperand(4)); 2739 Ops.push_back(N->getOperand(5)); 2740 } 2741 // Push the two vector operands 2742 Ops.push_back(N->getOperand(6)); 2743 Ops.push_back(N->getOperand(7)); 2744 2745 if (Predicated) 2746 AddMVEPredicateToOps(Ops, Loc, N->getOperand(8)); 2747 else 2748 AddEmptyMVEPredicateToOps(Ops, Loc); 2749 2750 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops)); 2751 } 2752 2753 void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated, 2754 const uint16_t *OpcodesS, 2755 const uint16_t *OpcodesU) { 2756 EVT VecTy = N->getOperand(6).getValueType(); 2757 size_t SizeIndex; 2758 switch (VecTy.getVectorElementType().getSizeInBits()) { 2759 case 16: 2760 SizeIndex = 0; 2761 break; 2762 case 32: 2763 SizeIndex = 1; 2764 break; 2765 default: 2766 llvm_unreachable("bad vector element size"); 2767 } 2768 2769 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex); 2770 } 2771 2772 void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, 2773 const uint16_t *OpcodesS, 2774 const uint16_t *OpcodesU) { 2775 assert( 2776 N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() == 2777 32 && 2778 "bad vector element size"); 2779 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0); 2780 } 2781 2782 void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs, 2783 const uint16_t *const *Opcodes, 2784 bool HasWriteback) { 2785 EVT VT = N->getValueType(0); 2786 SDLoc Loc(N); 2787 2788 const uint16_t *OurOpcodes; 2789 switch (VT.getVectorElementType().getSizeInBits()) { 2790 case 8: 2791 OurOpcodes = Opcodes[0]; 2792 break; 2793 case 16: 2794 OurOpcodes = Opcodes[1]; 2795 break; 2796 case 32: 2797 OurOpcodes = Opcodes[2]; 2798 break; 2799 default: 2800 llvm_unreachable("bad vector element size in SelectMVE_VLD"); 2801 } 2802 2803 EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2); 2804 SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other}; 2805 unsigned PtrOperand = HasWriteback ? 1 : 2; 2806 2807 auto Data = SDValue( 2808 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0); 2809 SDValue Chain = N->getOperand(0); 2810 // Add a MVE_VLDn instruction for each Vec, except the last 2811 for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) { 2812 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain}; 2813 auto LoadInst = 2814 CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops); 2815 Data = SDValue(LoadInst, 0); 2816 Chain = SDValue(LoadInst, 1); 2817 transferMemOperands(N, LoadInst); 2818 } 2819 // The last may need a writeback on it 2820 if (HasWriteback) 2821 ResultTys = {DataTy, MVT::i32, MVT::Other}; 2822 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain}; 2823 auto LoadInst = 2824 CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops); 2825 transferMemOperands(N, LoadInst); 2826 2827 unsigned i; 2828 for (i = 0; i < NumVecs; i++) 2829 ReplaceUses(SDValue(N, i), 2830 CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT, 2831 SDValue(LoadInst, 0))); 2832 if (HasWriteback) 2833 ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1)); 2834 ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1)); 2835 CurDAG->RemoveDeadNode(N); 2836 } 2837 2838 void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, 2839 bool Wrapping, bool Predicated) { 2840 EVT VT = N->getValueType(0); 2841 SDLoc Loc(N); 2842 2843 uint16_t Opcode; 2844 switch (VT.getScalarSizeInBits()) { 2845 case 8: 2846 Opcode = Opcodes[0]; 2847 break; 2848 case 16: 2849 Opcode = Opcodes[1]; 2850 break; 2851 case 32: 2852 Opcode = Opcodes[2]; 2853 break; 2854 default: 2855 llvm_unreachable("bad vector element size in SelectMVE_VxDUP"); 2856 } 2857 2858 SmallVector<SDValue, 8> Ops; 2859 unsigned OpIdx = 1; 2860 2861 SDValue Inactive; 2862 if (Predicated) 2863 Inactive = N->getOperand(OpIdx++); 2864 2865 Ops.push_back(N->getOperand(OpIdx++)); // base 2866 if (Wrapping) 2867 Ops.push_back(N->getOperand(OpIdx++)); // limit 2868 2869 SDValue ImmOp = N->getOperand(OpIdx++); // step 2870 int ImmValue = ImmOp->getAsZExtVal(); 2871 Ops.push_back(getI32Imm(ImmValue, Loc)); 2872 2873 if (Predicated) 2874 AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive); 2875 else 2876 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2877 2878 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops)); 2879 } 2880 2881 void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode, 2882 size_t NumExtraOps, bool HasAccum) { 2883 bool IsBigEndian = CurDAG->getDataLayout().isBigEndian(); 2884 SDLoc Loc(N); 2885 SmallVector<SDValue, 8> Ops; 2886 2887 unsigned OpIdx = 1; 2888 2889 // Convert and append the immediate operand designating the coprocessor. 2890 SDValue ImmCorpoc = N->getOperand(OpIdx++); 2891 uint32_t ImmCoprocVal = ImmCorpoc->getAsZExtVal(); 2892 Ops.push_back(getI32Imm(ImmCoprocVal, Loc)); 2893 2894 // For accumulating variants copy the low and high order parts of the 2895 // accumulator into a register pair and add it to the operand vector. 2896 if (HasAccum) { 2897 SDValue AccLo = N->getOperand(OpIdx++); 2898 SDValue AccHi = N->getOperand(OpIdx++); 2899 if (IsBigEndian) 2900 std::swap(AccLo, AccHi); 2901 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0)); 2902 } 2903 2904 // Copy extra operands as-is. 2905 for (size_t I = 0; I < NumExtraOps; I++) 2906 Ops.push_back(N->getOperand(OpIdx++)); 2907 2908 // Convert and append the immediate operand 2909 SDValue Imm = N->getOperand(OpIdx); 2910 uint32_t ImmVal = Imm->getAsZExtVal(); 2911 Ops.push_back(getI32Imm(ImmVal, Loc)); 2912 2913 // Accumulating variants are IT-predicable, add predicate operands. 2914 if (HasAccum) { 2915 SDValue Pred = getAL(CurDAG, Loc); 2916 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 2917 Ops.push_back(Pred); 2918 Ops.push_back(PredReg); 2919 } 2920 2921 // Create the CDE intruction 2922 SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops); 2923 SDValue ResultPair = SDValue(InstrNode, 0); 2924 2925 // The original intrinsic had two outputs, and the output of the dual-register 2926 // CDE instruction is a register pair. We need to extract the two subregisters 2927 // and replace all uses of the original outputs with the extracted 2928 // subregisters. 2929 uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1}; 2930 if (IsBigEndian) 2931 std::swap(SubRegs[0], SubRegs[1]); 2932 2933 for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) { 2934 if (SDValue(N, ResIdx).use_empty()) 2935 continue; 2936 SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc, 2937 MVT::i32, ResultPair); 2938 ReplaceUses(SDValue(N, ResIdx), SubReg); 2939 } 2940 2941 CurDAG->RemoveDeadNode(N); 2942 } 2943 2944 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic, 2945 bool isUpdating, unsigned NumVecs, 2946 const uint16_t *DOpcodes, 2947 const uint16_t *QOpcodes0, 2948 const uint16_t *QOpcodes1) { 2949 assert(Subtarget->hasNEON()); 2950 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); 2951 SDLoc dl(N); 2952 2953 SDValue MemAddr, Align; 2954 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2955 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2956 return; 2957 2958 SDValue Chain = N->getOperand(0); 2959 EVT VT = N->getValueType(0); 2960 bool is64BitVector = VT.is64BitVector(); 2961 2962 unsigned Alignment = 0; 2963 if (NumVecs != 3) { 2964 Alignment = Align->getAsZExtVal(); 2965 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2966 if (Alignment > NumBytes) 2967 Alignment = NumBytes; 2968 if (Alignment < 8 && Alignment < NumBytes) 2969 Alignment = 0; 2970 // Alignment must be a power of two; make sure of that. 2971 Alignment = (Alignment & -Alignment); 2972 if (Alignment == 1) 2973 Alignment = 0; 2974 } 2975 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2976 2977 unsigned OpcodeIndex; 2978 switch (VT.getSimpleVT().SimpleTy) { 2979 default: llvm_unreachable("unhandled vld-dup type"); 2980 case MVT::v8i8: 2981 case MVT::v16i8: OpcodeIndex = 0; break; 2982 case MVT::v4i16: 2983 case MVT::v8i16: 2984 case MVT::v4f16: 2985 case MVT::v8f16: 2986 case MVT::v4bf16: 2987 case MVT::v8bf16: 2988 OpcodeIndex = 1; break; 2989 case MVT::v2f32: 2990 case MVT::v2i32: 2991 case MVT::v4f32: 2992 case MVT::v4i32: OpcodeIndex = 2; break; 2993 case MVT::v1f64: 2994 case MVT::v1i64: OpcodeIndex = 3; break; 2995 } 2996 2997 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2998 if (!is64BitVector) 2999 ResTyElts *= 2; 3000 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 3001 3002 std::vector<EVT> ResTys; 3003 ResTys.push_back(ResTy); 3004 if (isUpdating) 3005 ResTys.push_back(MVT::i32); 3006 ResTys.push_back(MVT::Other); 3007 3008 SDValue Pred = getAL(CurDAG, dl); 3009 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3010 3011 SmallVector<SDValue, 6> Ops; 3012 Ops.push_back(MemAddr); 3013 Ops.push_back(Align); 3014 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] 3015 : (NumVecs == 1) ? QOpcodes0[OpcodeIndex] 3016 : QOpcodes1[OpcodeIndex]; 3017 if (isUpdating) { 3018 SDValue Inc = N->getOperand(2); 3019 bool IsImmUpdate = 3020 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 3021 if (IsImmUpdate) { 3022 if (!isVLDfixed(Opc)) 3023 Ops.push_back(Reg0); 3024 } else { 3025 if (isVLDfixed(Opc)) 3026 Opc = getVLDSTRegisterUpdateOpcode(Opc); 3027 Ops.push_back(Inc); 3028 } 3029 } 3030 if (is64BitVector || NumVecs == 1) { 3031 // Double registers and VLD1 quad registers are directly supported. 3032 } else { 3033 SDValue ImplDef = SDValue( 3034 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 3035 const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain}; 3036 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy, 3037 MVT::Other, OpsA); 3038 Ops.push_back(SDValue(VLdA, 0)); 3039 Chain = SDValue(VLdA, 1); 3040 } 3041 3042 Ops.push_back(Pred); 3043 Ops.push_back(Reg0); 3044 Ops.push_back(Chain); 3045 3046 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 3047 3048 // Transfer memoperands. 3049 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3050 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp}); 3051 3052 // Extract the subregisters. 3053 if (NumVecs == 1) { 3054 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0)); 3055 } else { 3056 SDValue SuperReg = SDValue(VLdDup, 0); 3057 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering"); 3058 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 3059 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) { 3060 ReplaceUses(SDValue(N, Vec), 3061 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); 3062 } 3063 } 3064 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); 3065 if (isUpdating) 3066 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); 3067 CurDAG->RemoveDeadNode(N); 3068 } 3069 3070 bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) { 3071 if (!Subtarget->hasMVEIntegerOps()) 3072 return false; 3073 3074 SDLoc dl(N); 3075 3076 // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and 3077 // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent 3078 // inserts of the correct type: 3079 SDValue Ins1 = SDValue(N, 0); 3080 SDValue Ins2 = N->getOperand(0); 3081 EVT VT = Ins1.getValueType(); 3082 if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() || 3083 !isa<ConstantSDNode>(Ins1.getOperand(2)) || 3084 !isa<ConstantSDNode>(Ins2.getOperand(2)) || 3085 (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT)) 3086 return false; 3087 3088 unsigned Lane1 = Ins1.getConstantOperandVal(2); 3089 unsigned Lane2 = Ins2.getConstantOperandVal(2); 3090 if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1) 3091 return false; 3092 3093 // If the inserted values will be able to use T/B already, leave it to the 3094 // existing tablegen patterns. For example VCVTT/VCVTB. 3095 SDValue Val1 = Ins1.getOperand(1); 3096 SDValue Val2 = Ins2.getOperand(1); 3097 if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND) 3098 return false; 3099 3100 // Check if the inserted values are both extracts. 3101 if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 3102 Val1.getOpcode() == ARMISD::VGETLANEu) && 3103 (Val2.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 3104 Val2.getOpcode() == ARMISD::VGETLANEu) && 3105 isa<ConstantSDNode>(Val1.getOperand(1)) && 3106 isa<ConstantSDNode>(Val2.getOperand(1)) && 3107 (Val1.getOperand(0).getValueType() == MVT::v8f16 || 3108 Val1.getOperand(0).getValueType() == MVT::v8i16) && 3109 (Val2.getOperand(0).getValueType() == MVT::v8f16 || 3110 Val2.getOperand(0).getValueType() == MVT::v8i16)) { 3111 unsigned ExtractLane1 = Val1.getConstantOperandVal(1); 3112 unsigned ExtractLane2 = Val2.getConstantOperandVal(1); 3113 3114 // If the two extracted lanes are from the same place and adjacent, this 3115 // simplifies into a f32 lane move. 3116 if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 && 3117 ExtractLane1 == ExtractLane2 + 1) { 3118 SDValue NewExt = CurDAG->getTargetExtractSubreg( 3119 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0)); 3120 SDValue NewIns = CurDAG->getTargetInsertSubreg( 3121 ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0), 3122 NewExt); 3123 ReplaceUses(Ins1, NewIns); 3124 return true; 3125 } 3126 3127 // Else v8i16 pattern of an extract and an insert, with a optional vmovx for 3128 // extracting odd lanes. 3129 if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) { 3130 SDValue Inp1 = CurDAG->getTargetExtractSubreg( 3131 ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0)); 3132 SDValue Inp2 = CurDAG->getTargetExtractSubreg( 3133 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0)); 3134 if (ExtractLane1 % 2 != 0) 3135 Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0); 3136 if (ExtractLane2 % 2 != 0) 3137 Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0); 3138 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1); 3139 SDValue NewIns = 3140 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32, 3141 Ins2.getOperand(0), SDValue(VINS, 0)); 3142 ReplaceUses(Ins1, NewIns); 3143 return true; 3144 } 3145 } 3146 3147 // The inserted values are not extracted - if they are f16 then insert them 3148 // directly using a VINS. 3149 if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) { 3150 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1); 3151 SDValue NewIns = 3152 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32, 3153 Ins2.getOperand(0), SDValue(VINS, 0)); 3154 ReplaceUses(Ins1, NewIns); 3155 return true; 3156 } 3157 3158 return false; 3159 } 3160 3161 bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N, 3162 SDNode *FMul, 3163 bool IsUnsigned, 3164 bool FixedToFloat) { 3165 auto Type = N->getValueType(0); 3166 unsigned ScalarBits = Type.getScalarSizeInBits(); 3167 if (ScalarBits > 32) 3168 return false; 3169 3170 SDNodeFlags FMulFlags = FMul->getFlags(); 3171 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is 3172 // allowed in 16 bit unsigned floats 3173 if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned) 3174 return false; 3175 3176 SDValue ImmNode = FMul->getOperand(1); 3177 SDValue VecVal = FMul->getOperand(0); 3178 if (VecVal->getOpcode() == ISD::UINT_TO_FP || 3179 VecVal->getOpcode() == ISD::SINT_TO_FP) 3180 VecVal = VecVal->getOperand(0); 3181 3182 if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits) 3183 return false; 3184 3185 if (ImmNode.getOpcode() == ISD::BITCAST) { 3186 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits) 3187 return false; 3188 ImmNode = ImmNode.getOperand(0); 3189 } 3190 3191 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits) 3192 return false; 3193 3194 APFloat ImmAPF(0.0f); 3195 switch (ImmNode.getOpcode()) { 3196 case ARMISD::VMOVIMM: 3197 case ARMISD::VDUP: { 3198 if (!isa<ConstantSDNode>(ImmNode.getOperand(0))) 3199 return false; 3200 unsigned Imm = ImmNode.getConstantOperandVal(0); 3201 if (ImmNode.getOpcode() == ARMISD::VMOVIMM) 3202 Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits); 3203 ImmAPF = 3204 APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(), 3205 APInt(ScalarBits, Imm)); 3206 break; 3207 } 3208 case ARMISD::VMOVFPIMM: { 3209 ImmAPF = APFloat(ARM_AM::getFPImmFloat(ImmNode.getConstantOperandVal(0))); 3210 break; 3211 } 3212 default: 3213 return false; 3214 } 3215 3216 // Where n is the number of fractional bits, multiplying by 2^n will convert 3217 // from float to fixed and multiplying by 2^-n will convert from fixed to 3218 // float. Taking log2 of the factor (after taking the inverse in the case of 3219 // float to fixed) will give n. 3220 APFloat ToConvert = ImmAPF; 3221 if (FixedToFloat) { 3222 if (!ImmAPF.getExactInverse(&ToConvert)) 3223 return false; 3224 } 3225 APSInt Converted(64, false); 3226 bool IsExact; 3227 ToConvert.convertToInteger(Converted, llvm::RoundingMode::NearestTiesToEven, 3228 &IsExact); 3229 if (!IsExact || !Converted.isPowerOf2()) 3230 return false; 3231 3232 unsigned FracBits = Converted.logBase2(); 3233 if (FracBits > ScalarBits) 3234 return false; 3235 3236 SmallVector<SDValue, 3> Ops{ 3237 VecVal, CurDAG->getConstant(FracBits, SDLoc(N), MVT::i32)}; 3238 AddEmptyMVEPredicateToOps(Ops, SDLoc(N), Type); 3239 3240 unsigned int Opcode; 3241 switch (ScalarBits) { 3242 case 16: 3243 if (FixedToFloat) 3244 Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix; 3245 else 3246 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix; 3247 break; 3248 case 32: 3249 if (FixedToFloat) 3250 Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix; 3251 else 3252 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix; 3253 break; 3254 default: 3255 llvm_unreachable("unexpected number of scalar bits"); 3256 break; 3257 } 3258 3259 ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), Type, Ops)); 3260 return true; 3261 } 3262 3263 bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) { 3264 // Transform a floating-point to fixed-point conversion to a VCVT 3265 if (!Subtarget->hasMVEFloatOps()) 3266 return false; 3267 EVT Type = N->getValueType(0); 3268 if (!Type.isVector()) 3269 return false; 3270 unsigned int ScalarBits = Type.getScalarSizeInBits(); 3271 3272 bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT || 3273 N->getOpcode() == ISD::FP_TO_UINT_SAT; 3274 SDNode *Node = N->getOperand(0).getNode(); 3275 3276 // floating-point to fixed-point with one fractional bit gets turned into an 3277 // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y)) 3278 if (Node->getOpcode() == ISD::FADD) { 3279 if (Node->getOperand(0) != Node->getOperand(1)) 3280 return false; 3281 SDNodeFlags Flags = Node->getFlags(); 3282 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is 3283 // allowed in 16 bit unsigned floats 3284 if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned) 3285 return false; 3286 3287 unsigned Opcode; 3288 switch (ScalarBits) { 3289 case 16: 3290 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix; 3291 break; 3292 case 32: 3293 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix; 3294 break; 3295 } 3296 SmallVector<SDValue, 3> Ops{Node->getOperand(0), 3297 CurDAG->getConstant(1, dl, MVT::i32)}; 3298 AddEmptyMVEPredicateToOps(Ops, dl, Type); 3299 3300 ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops)); 3301 return true; 3302 } 3303 3304 if (Node->getOpcode() != ISD::FMUL) 3305 return false; 3306 3307 return transformFixedFloatingPointConversion(N, Node, IsUnsigned, false); 3308 } 3309 3310 bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) { 3311 // Transform a fixed-point to floating-point conversion to a VCVT 3312 if (!Subtarget->hasMVEFloatOps()) 3313 return false; 3314 auto Type = N->getValueType(0); 3315 if (!Type.isVector()) 3316 return false; 3317 3318 auto LHS = N->getOperand(0); 3319 if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP) 3320 return false; 3321 3322 return transformFixedFloatingPointConversion( 3323 N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true); 3324 } 3325 3326 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { 3327 if (!Subtarget->hasV6T2Ops()) 3328 return false; 3329 3330 unsigned Opc = isSigned 3331 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) 3332 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); 3333 SDLoc dl(N); 3334 3335 // For unsigned extracts, check for a shift right and mask 3336 unsigned And_imm = 0; 3337 if (N->getOpcode() == ISD::AND) { 3338 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) { 3339 3340 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 3341 if (And_imm & (And_imm + 1)) 3342 return false; 3343 3344 unsigned Srl_imm = 0; 3345 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, 3346 Srl_imm)) { 3347 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3348 3349 // Mask off the unnecessary bits of the AND immediate; normally 3350 // DAGCombine will do this, but that might not happen if 3351 // targetShrinkDemandedConstant chooses a different immediate. 3352 And_imm &= -1U >> Srl_imm; 3353 3354 // Note: The width operand is encoded as width-1. 3355 unsigned Width = llvm::countr_one(And_imm) - 1; 3356 unsigned LSB = Srl_imm; 3357 3358 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3359 3360 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) { 3361 // It's cheaper to use a right shift to extract the top bits. 3362 if (Subtarget->isThumb()) { 3363 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri; 3364 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3365 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3366 getAL(CurDAG, dl), Reg0, Reg0 }; 3367 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3368 return true; 3369 } 3370 3371 // ARM models shift instructions as MOVsi with shifter operand. 3372 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL); 3373 SDValue ShOpc = 3374 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl, 3375 MVT::i32); 3376 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, 3377 getAL(CurDAG, dl), Reg0, Reg0 }; 3378 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops); 3379 return true; 3380 } 3381 3382 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3383 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3384 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3385 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3386 getAL(CurDAG, dl), Reg0 }; 3387 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3388 return true; 3389 } 3390 } 3391 return false; 3392 } 3393 3394 // Otherwise, we're looking for a shift of a shift 3395 unsigned Shl_imm = 0; 3396 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { 3397 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); 3398 unsigned Srl_imm = 0; 3399 if (isInt32Immediate(N->getOperand(1), Srl_imm)) { 3400 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3401 // Note: The width operand is encoded as width-1. 3402 unsigned Width = 32 - Srl_imm - 1; 3403 int LSB = Srl_imm - Shl_imm; 3404 if (LSB < 0) 3405 return false; 3406 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3407 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3408 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3409 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3410 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3411 getAL(CurDAG, dl), Reg0 }; 3412 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3413 return true; 3414 } 3415 } 3416 3417 // Or we are looking for a shift of an and, with a mask operand 3418 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) && 3419 isShiftedMask_32(And_imm)) { 3420 unsigned Srl_imm = 0; 3421 unsigned LSB = llvm::countr_zero(And_imm); 3422 // Shift must be the same as the ands lsb 3423 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) { 3424 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3425 unsigned MSB = llvm::Log2_32(And_imm); 3426 // Note: The width operand is encoded as width-1. 3427 unsigned Width = MSB - LSB; 3428 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3429 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3430 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3431 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32), 3432 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3433 getAL(CurDAG, dl), Reg0 }; 3434 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3435 return true; 3436 } 3437 } 3438 3439 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) { 3440 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 3441 unsigned LSB = 0; 3442 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) && 3443 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB)) 3444 return false; 3445 3446 if (LSB + Width > 32) 3447 return false; 3448 3449 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3450 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx"); 3451 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3452 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3453 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32), 3454 getAL(CurDAG, dl), Reg0 }; 3455 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3456 return true; 3457 } 3458 3459 return false; 3460 } 3461 3462 /// Target-specific DAG combining for ISD::SUB. 3463 /// Target-independent combining lowers SELECT_CC nodes of the form 3464 /// select_cc setg[ge] X, 0, X, -X 3465 /// select_cc setgt X, -1, X, -X 3466 /// select_cc setl[te] X, 0, -X, X 3467 /// select_cc setlt X, 1, -X, X 3468 /// which represent Integer ABS into: 3469 /// Y = sra (X, size(X)-1); sub (xor (X, Y), Y) 3470 /// ARM instruction selection detects the latter and matches it to 3471 /// ARM::ABS or ARM::t2ABS machine node. 3472 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){ 3473 SDValue SUBSrc0 = N->getOperand(0); 3474 SDValue SUBSrc1 = N->getOperand(1); 3475 EVT VT = N->getValueType(0); 3476 3477 if (Subtarget->isThumb1Only()) 3478 return false; 3479 3480 if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA) 3481 return false; 3482 3483 SDValue XORSrc0 = SUBSrc0.getOperand(0); 3484 SDValue XORSrc1 = SUBSrc0.getOperand(1); 3485 SDValue SRASrc0 = SUBSrc1.getOperand(0); 3486 SDValue SRASrc1 = SUBSrc1.getOperand(1); 3487 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1); 3488 EVT XType = SRASrc0.getValueType(); 3489 unsigned Size = XType.getSizeInBits() - 1; 3490 3491 if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() && 3492 SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) { 3493 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; 3494 CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0); 3495 return true; 3496 } 3497 3498 return false; 3499 } 3500 3501 /// We've got special pseudo-instructions for these 3502 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 3503 unsigned Opcode; 3504 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 3505 if (MemTy == MVT::i8) 3506 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8; 3507 else if (MemTy == MVT::i16) 3508 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16; 3509 else if (MemTy == MVT::i32) 3510 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32; 3511 else 3512 llvm_unreachable("Unknown AtomicCmpSwap type"); 3513 3514 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 3515 N->getOperand(0)}; 3516 SDNode *CmpSwap = CurDAG->getMachineNode( 3517 Opcode, SDLoc(N), 3518 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops); 3519 3520 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 3521 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 3522 3523 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 3524 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 3525 CurDAG->RemoveDeadNode(N); 3526 } 3527 3528 static std::optional<std::pair<unsigned, unsigned>> 3529 getContiguousRangeOfSetBits(const APInt &A) { 3530 unsigned FirstOne = A.getBitWidth() - A.countl_zero() - 1; 3531 unsigned LastOne = A.countr_zero(); 3532 if (A.popcount() != (FirstOne - LastOne + 1)) 3533 return std::nullopt; 3534 return std::make_pair(FirstOne, LastOne); 3535 } 3536 3537 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) { 3538 assert(N->getOpcode() == ARMISD::CMPZ); 3539 SwitchEQNEToPLMI = false; 3540 3541 if (!Subtarget->isThumb()) 3542 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and 3543 // LSR don't exist as standalone instructions - they need the barrel shifter. 3544 return; 3545 3546 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X)) 3547 SDValue And = N->getOperand(0); 3548 if (!And->hasOneUse()) 3549 return; 3550 3551 SDValue Zero = N->getOperand(1); 3552 if (!isNullConstant(Zero) || And->getOpcode() != ISD::AND) 3553 return; 3554 SDValue X = And.getOperand(0); 3555 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1)); 3556 3557 if (!C) 3558 return; 3559 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue()); 3560 if (!Range) 3561 return; 3562 3563 // There are several ways to lower this: 3564 SDNode *NewN; 3565 SDLoc dl(N); 3566 3567 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* { 3568 if (Subtarget->isThumb2()) { 3569 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri; 3570 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3571 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3572 CurDAG->getRegister(0, MVT::i32) }; 3573 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3574 } else { 3575 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src, 3576 CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3577 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 3578 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3579 } 3580 }; 3581 3582 if (Range->second == 0) { 3583 // 1. Mask includes the LSB -> Simply shift the top N bits off 3584 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3585 ReplaceNode(And.getNode(), NewN); 3586 } else if (Range->first == 31) { 3587 // 2. Mask includes the MSB -> Simply shift the bottom N bits off 3588 NewN = EmitShift(ARM::tLSRri, X, Range->second); 3589 ReplaceNode(And.getNode(), NewN); 3590 } else if (Range->first == Range->second) { 3591 // 3. Only one bit is set. We can shift this into the sign bit and use a 3592 // PL/MI comparison. This is not safe if CMPZ has multiple uses because 3593 // only one of them (the one currently being selected) will be switched 3594 // to use the new condition code. 3595 if (!N->hasOneUse()) 3596 return; 3597 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3598 ReplaceNode(And.getNode(), NewN); 3599 3600 SwitchEQNEToPLMI = true; 3601 } else if (!Subtarget->hasV6T2Ops()) { 3602 // 4. Do a double shift to clear bottom and top bits, but only in 3603 // thumb-1 mode as in thumb-2 we can use UBFX. 3604 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3605 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0), 3606 Range->second + (31 - Range->first)); 3607 ReplaceNode(And.getNode(), NewN); 3608 } 3609 } 3610 3611 static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3], 3612 unsigned Opc128[3]) { 3613 assert((VT.is64BitVector() || VT.is128BitVector()) && 3614 "Unexpected vector shuffle length"); 3615 switch (VT.getScalarSizeInBits()) { 3616 default: 3617 llvm_unreachable("Unexpected vector shuffle element size"); 3618 case 8: 3619 return VT.is64BitVector() ? Opc64[0] : Opc128[0]; 3620 case 16: 3621 return VT.is64BitVector() ? Opc64[1] : Opc128[1]; 3622 case 32: 3623 return VT.is64BitVector() ? Opc64[2] : Opc128[2]; 3624 } 3625 } 3626 3627 void ARMDAGToDAGISel::Select(SDNode *N) { 3628 SDLoc dl(N); 3629 3630 if (N->isMachineOpcode()) { 3631 N->setNodeId(-1); 3632 return; // Already selected. 3633 } 3634 3635 switch (N->getOpcode()) { 3636 default: break; 3637 case ISD::STORE: { 3638 // For Thumb1, match an sp-relative store in C++. This is a little 3639 // unfortunate, but I don't think I can make the chain check work 3640 // otherwise. (The chain of the store has to be the same as the chain 3641 // of the CopyFromReg, or else we can't replace the CopyFromReg with 3642 // a direct reference to "SP".) 3643 // 3644 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use 3645 // a different addressing mode from other four-byte stores. 3646 // 3647 // This pattern usually comes up with call arguments. 3648 StoreSDNode *ST = cast<StoreSDNode>(N); 3649 SDValue Ptr = ST->getBasePtr(); 3650 if (Subtarget->isThumb1Only() && ST->isUnindexed()) { 3651 int RHSC = 0; 3652 if (Ptr.getOpcode() == ISD::ADD && 3653 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) 3654 Ptr = Ptr.getOperand(0); 3655 3656 if (Ptr.getOpcode() == ISD::CopyFromReg && 3657 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP && 3658 Ptr.getOperand(0) == ST->getChain()) { 3659 SDValue Ops[] = {ST->getValue(), 3660 CurDAG->getRegister(ARM::SP, MVT::i32), 3661 CurDAG->getTargetConstant(RHSC, dl, MVT::i32), 3662 getAL(CurDAG, dl), 3663 CurDAG->getRegister(0, MVT::i32), 3664 ST->getChain()}; 3665 MachineSDNode *ResNode = 3666 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops); 3667 MachineMemOperand *MemOp = ST->getMemOperand(); 3668 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3669 ReplaceNode(N, ResNode); 3670 return; 3671 } 3672 } 3673 break; 3674 } 3675 case ISD::WRITE_REGISTER: 3676 if (tryWriteRegister(N)) 3677 return; 3678 break; 3679 case ISD::READ_REGISTER: 3680 if (tryReadRegister(N)) 3681 return; 3682 break; 3683 case ISD::INLINEASM: 3684 case ISD::INLINEASM_BR: 3685 if (tryInlineAsm(N)) 3686 return; 3687 break; 3688 case ISD::SUB: 3689 // Select special operations if SUB node forms integer ABS pattern 3690 if (tryABSOp(N)) 3691 return; 3692 // Other cases are autogenerated. 3693 break; 3694 case ISD::Constant: { 3695 unsigned Val = N->getAsZExtVal(); 3696 // If we can't materialize the constant we need to use a literal pool 3697 if (ConstantMaterializationCost(Val, Subtarget) > 2 && 3698 !Subtarget->genExecuteOnly()) { 3699 SDValue CPIdx = CurDAG->getTargetConstantPool( 3700 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), 3701 TLI->getPointerTy(CurDAG->getDataLayout())); 3702 3703 SDNode *ResNode; 3704 if (Subtarget->isThumb()) { 3705 SDValue Ops[] = { 3706 CPIdx, 3707 getAL(CurDAG, dl), 3708 CurDAG->getRegister(0, MVT::i32), 3709 CurDAG->getEntryNode() 3710 }; 3711 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, 3712 Ops); 3713 } else { 3714 SDValue Ops[] = { 3715 CPIdx, 3716 CurDAG->getTargetConstant(0, dl, MVT::i32), 3717 getAL(CurDAG, dl), 3718 CurDAG->getRegister(0, MVT::i32), 3719 CurDAG->getEntryNode() 3720 }; 3721 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, 3722 Ops); 3723 } 3724 // Annotate the Node with memory operand information so that MachineInstr 3725 // queries work properly. This e.g. gives the register allocation the 3726 // required information for rematerialization. 3727 MachineFunction& MF = CurDAG->getMachineFunction(); 3728 MachineMemOperand *MemOp = 3729 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF), 3730 MachineMemOperand::MOLoad, 4, Align(4)); 3731 3732 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3733 3734 ReplaceNode(N, ResNode); 3735 return; 3736 } 3737 3738 // Other cases are autogenerated. 3739 break; 3740 } 3741 case ISD::FrameIndex: { 3742 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. 3743 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 3744 SDValue TFI = CurDAG->getTargetFrameIndex( 3745 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3746 if (Subtarget->isThumb1Only()) { 3747 // Set the alignment of the frame object to 4, to avoid having to generate 3748 // more than one ADD 3749 MachineFrameInfo &MFI = MF->getFrameInfo(); 3750 if (MFI.getObjectAlign(FI) < Align(4)) 3751 MFI.setObjectAlignment(FI, Align(4)); 3752 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, 3753 CurDAG->getTargetConstant(0, dl, MVT::i32)); 3754 return; 3755 } else { 3756 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? 3757 ARM::t2ADDri : ARM::ADDri); 3758 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32), 3759 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3760 CurDAG->getRegister(0, MVT::i32) }; 3761 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3762 return; 3763 } 3764 } 3765 case ISD::INSERT_VECTOR_ELT: { 3766 if (tryInsertVectorElt(N)) 3767 return; 3768 break; 3769 } 3770 case ISD::SRL: 3771 if (tryV6T2BitfieldExtractOp(N, false)) 3772 return; 3773 break; 3774 case ISD::SIGN_EXTEND_INREG: 3775 case ISD::SRA: 3776 if (tryV6T2BitfieldExtractOp(N, true)) 3777 return; 3778 break; 3779 case ISD::FP_TO_UINT: 3780 case ISD::FP_TO_SINT: 3781 case ISD::FP_TO_UINT_SAT: 3782 case ISD::FP_TO_SINT_SAT: 3783 if (tryFP_TO_INT(N, dl)) 3784 return; 3785 break; 3786 case ISD::FMUL: 3787 if (tryFMULFixed(N, dl)) 3788 return; 3789 break; 3790 case ISD::MUL: 3791 if (Subtarget->isThumb1Only()) 3792 break; 3793 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 3794 unsigned RHSV = C->getZExtValue(); 3795 if (!RHSV) break; 3796 if (isPowerOf2_32(RHSV-1)) { // 2^n+1? 3797 unsigned ShImm = Log2_32(RHSV-1); 3798 if (ShImm >= 32) 3799 break; 3800 SDValue V = N->getOperand(0); 3801 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3802 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3803 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3804 if (Subtarget->isThumb()) { 3805 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3806 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops); 3807 return; 3808 } else { 3809 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3810 Reg0 }; 3811 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops); 3812 return; 3813 } 3814 } 3815 if (isPowerOf2_32(RHSV+1)) { // 2^n-1? 3816 unsigned ShImm = Log2_32(RHSV+1); 3817 if (ShImm >= 32) 3818 break; 3819 SDValue V = N->getOperand(0); 3820 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3821 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3822 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3823 if (Subtarget->isThumb()) { 3824 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3825 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops); 3826 return; 3827 } else { 3828 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3829 Reg0 }; 3830 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops); 3831 return; 3832 } 3833 } 3834 } 3835 break; 3836 case ISD::AND: { 3837 // Check for unsigned bitfield extract 3838 if (tryV6T2BitfieldExtractOp(N, false)) 3839 return; 3840 3841 // If an immediate is used in an AND node, it is possible that the immediate 3842 // can be more optimally materialized when negated. If this is the case we 3843 // can negate the immediate and use a BIC instead. 3844 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 3845 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) { 3846 uint32_t Imm = (uint32_t) N1C->getZExtValue(); 3847 3848 // In Thumb2 mode, an AND can take a 12-bit immediate. If this 3849 // immediate can be negated and fit in the immediate operand of 3850 // a t2BIC, don't do any manual transform here as this can be 3851 // handled by the generic ISel machinery. 3852 bool PreferImmediateEncoding = 3853 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm)); 3854 if (!PreferImmediateEncoding && 3855 ConstantMaterializationCost(Imm, Subtarget) > 3856 ConstantMaterializationCost(~Imm, Subtarget)) { 3857 // The current immediate costs more to materialize than a negated 3858 // immediate, so negate the immediate and use a BIC. 3859 SDValue NewImm = CurDAG->getConstant(~Imm, dl, MVT::i32); 3860 // If the new constant didn't exist before, reposition it in the topological 3861 // ordering so it is just before N. Otherwise, don't touch its location. 3862 if (NewImm->getNodeId() == -1) 3863 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode()); 3864 3865 if (!Subtarget->hasThumb2()) { 3866 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), 3867 N->getOperand(0), NewImm, getAL(CurDAG, dl), 3868 CurDAG->getRegister(0, MVT::i32)}; 3869 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops)); 3870 return; 3871 } else { 3872 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl), 3873 CurDAG->getRegister(0, MVT::i32), 3874 CurDAG->getRegister(0, MVT::i32)}; 3875 ReplaceNode(N, 3876 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops)); 3877 return; 3878 } 3879 } 3880 } 3881 3882 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits 3883 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits 3884 // are entirely contributed by c2 and lower 16-bits are entirely contributed 3885 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)). 3886 // Select it to: "movt x, ((c1 & 0xffff) >> 16) 3887 EVT VT = N->getValueType(0); 3888 if (VT != MVT::i32) 3889 break; 3890 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2()) 3891 ? ARM::t2MOVTi16 3892 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0); 3893 if (!Opc) 3894 break; 3895 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 3896 N1C = dyn_cast<ConstantSDNode>(N1); 3897 if (!N1C) 3898 break; 3899 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) { 3900 SDValue N2 = N0.getOperand(1); 3901 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 3902 if (!N2C) 3903 break; 3904 unsigned N1CVal = N1C->getZExtValue(); 3905 unsigned N2CVal = N2C->getZExtValue(); 3906 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) && 3907 (N1CVal & 0xffffU) == 0xffffU && 3908 (N2CVal & 0xffffU) == 0x0U) { 3909 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16, 3910 dl, MVT::i32); 3911 SDValue Ops[] = { N0.getOperand(0), Imm16, 3912 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 3913 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 3914 return; 3915 } 3916 } 3917 3918 break; 3919 } 3920 case ARMISD::UMAAL: { 3921 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; 3922 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 3923 N->getOperand(2), N->getOperand(3), 3924 getAL(CurDAG, dl), 3925 CurDAG->getRegister(0, MVT::i32) }; 3926 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops)); 3927 return; 3928 } 3929 case ARMISD::UMLAL:{ 3930 if (Subtarget->isThumb()) { 3931 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3932 N->getOperand(3), getAL(CurDAG, dl), 3933 CurDAG->getRegister(0, MVT::i32)}; 3934 ReplaceNode( 3935 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops)); 3936 return; 3937 }else{ 3938 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3939 N->getOperand(3), getAL(CurDAG, dl), 3940 CurDAG->getRegister(0, MVT::i32), 3941 CurDAG->getRegister(0, MVT::i32) }; 3942 ReplaceNode(N, CurDAG->getMachineNode( 3943 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl, 3944 MVT::i32, MVT::i32, Ops)); 3945 return; 3946 } 3947 } 3948 case ARMISD::SMLAL:{ 3949 if (Subtarget->isThumb()) { 3950 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3951 N->getOperand(3), getAL(CurDAG, dl), 3952 CurDAG->getRegister(0, MVT::i32)}; 3953 ReplaceNode( 3954 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops)); 3955 return; 3956 }else{ 3957 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3958 N->getOperand(3), getAL(CurDAG, dl), 3959 CurDAG->getRegister(0, MVT::i32), 3960 CurDAG->getRegister(0, MVT::i32) }; 3961 ReplaceNode(N, CurDAG->getMachineNode( 3962 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl, 3963 MVT::i32, MVT::i32, Ops)); 3964 return; 3965 } 3966 } 3967 case ARMISD::SUBE: { 3968 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) 3969 break; 3970 // Look for a pattern to match SMMLS 3971 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b)))) 3972 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI || 3973 N->getOperand(2).getOpcode() != ARMISD::SUBC || 3974 !SDValue(N, 1).use_empty()) 3975 break; 3976 3977 if (Subtarget->isThumb()) 3978 assert(Subtarget->hasThumb2() && 3979 "This pattern should not be generated for Thumb"); 3980 3981 SDValue SmulLoHi = N->getOperand(1); 3982 SDValue Subc = N->getOperand(2); 3983 SDValue Zero = Subc.getOperand(0); 3984 3985 if (!isNullConstant(Zero) || Subc.getOperand(1) != SmulLoHi.getValue(0) || 3986 N->getOperand(1) != SmulLoHi.getValue(1) || 3987 N->getOperand(2) != Subc.getValue(1)) 3988 break; 3989 3990 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS; 3991 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1), 3992 N->getOperand(0), getAL(CurDAG, dl), 3993 CurDAG->getRegister(0, MVT::i32) }; 3994 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops)); 3995 return; 3996 } 3997 case ISD::LOAD: { 3998 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 3999 return; 4000 if (Subtarget->isThumb() && Subtarget->hasThumb2()) { 4001 if (tryT2IndexedLoad(N)) 4002 return; 4003 } else if (Subtarget->isThumb()) { 4004 if (tryT1IndexedLoad(N)) 4005 return; 4006 } else if (tryARMIndexedLoad(N)) 4007 return; 4008 // Other cases are autogenerated. 4009 break; 4010 } 4011 case ISD::MLOAD: 4012 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 4013 return; 4014 // Other cases are autogenerated. 4015 break; 4016 case ARMISD::WLSSETUP: { 4017 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32, 4018 N->getOperand(0)); 4019 ReplaceUses(N, New); 4020 CurDAG->RemoveDeadNode(N); 4021 return; 4022 } 4023 case ARMISD::WLS: { 4024 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other, 4025 N->getOperand(1), N->getOperand(2), 4026 N->getOperand(0)); 4027 ReplaceUses(N, New); 4028 CurDAG->RemoveDeadNode(N); 4029 return; 4030 } 4031 case ARMISD::LE: { 4032 SDValue Ops[] = { N->getOperand(1), 4033 N->getOperand(2), 4034 N->getOperand(0) }; 4035 unsigned Opc = ARM::t2LoopEnd; 4036 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 4037 ReplaceUses(N, New); 4038 CurDAG->RemoveDeadNode(N); 4039 return; 4040 } 4041 case ARMISD::LDRD: { 4042 if (Subtarget->isThumb2()) 4043 break; // TableGen handles isel in this case. 4044 SDValue Base, RegOffset, ImmOffset; 4045 const SDValue &Chain = N->getOperand(0); 4046 const SDValue &Addr = N->getOperand(1); 4047 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); 4048 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { 4049 // The register-offset variant of LDRD mandates that the register 4050 // allocated to RegOffset is not reused in any of the remaining operands. 4051 // This restriction is currently not enforced. Therefore emitting this 4052 // variant is explicitly avoided. 4053 Base = Addr; 4054 RegOffset = CurDAG->getRegister(0, MVT::i32); 4055 } 4056 SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain}; 4057 SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl, 4058 {MVT::Untyped, MVT::Other}, Ops); 4059 SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 4060 SDValue(New, 0)); 4061 SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 4062 SDValue(New, 0)); 4063 transferMemOperands(N, New); 4064 ReplaceUses(SDValue(N, 0), Lo); 4065 ReplaceUses(SDValue(N, 1), Hi); 4066 ReplaceUses(SDValue(N, 2), SDValue(New, 1)); 4067 CurDAG->RemoveDeadNode(N); 4068 return; 4069 } 4070 case ARMISD::STRD: { 4071 if (Subtarget->isThumb2()) 4072 break; // TableGen handles isel in this case. 4073 SDValue Base, RegOffset, ImmOffset; 4074 const SDValue &Chain = N->getOperand(0); 4075 const SDValue &Addr = N->getOperand(3); 4076 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); 4077 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { 4078 // The register-offset variant of STRD mandates that the register 4079 // allocated to RegOffset is not reused in any of the remaining operands. 4080 // This restriction is currently not enforced. Therefore emitting this 4081 // variant is explicitly avoided. 4082 Base = Addr; 4083 RegOffset = CurDAG->getRegister(0, MVT::i32); 4084 } 4085 SDNode *RegPair = 4086 createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2)); 4087 SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain}; 4088 SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops); 4089 transferMemOperands(N, New); 4090 ReplaceUses(SDValue(N, 0), SDValue(New, 0)); 4091 CurDAG->RemoveDeadNode(N); 4092 return; 4093 } 4094 case ARMISD::LOOP_DEC: { 4095 SDValue Ops[] = { N->getOperand(1), 4096 N->getOperand(2), 4097 N->getOperand(0) }; 4098 SDNode *Dec = 4099 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 4100 CurDAG->getVTList(MVT::i32, MVT::Other), Ops); 4101 ReplaceUses(N, Dec); 4102 CurDAG->RemoveDeadNode(N); 4103 return; 4104 } 4105 case ARMISD::BRCOND: { 4106 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 4107 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) 4108 // Pattern complexity = 6 cost = 1 size = 0 4109 4110 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 4111 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc) 4112 // Pattern complexity = 6 cost = 1 size = 0 4113 4114 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 4115 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc) 4116 // Pattern complexity = 6 cost = 1 size = 0 4117 4118 unsigned Opc = Subtarget->isThumb() ? 4119 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; 4120 SDValue Chain = N->getOperand(0); 4121 SDValue N1 = N->getOperand(1); 4122 SDValue N2 = N->getOperand(2); 4123 SDValue Flags = N->getOperand(3); 4124 assert(N1.getOpcode() == ISD::BasicBlock); 4125 assert(N2.getOpcode() == ISD::Constant); 4126 4127 unsigned CC = (unsigned)N2->getAsZExtVal(); 4128 4129 if (Flags.getOpcode() == ARMISD::CMPZ) { 4130 if (Flags.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) { 4131 SDValue Int = Flags.getOperand(0); 4132 uint64_t ID = Int->getConstantOperandVal(1); 4133 4134 // Handle low-overhead loops. 4135 if (ID == Intrinsic::loop_decrement_reg) { 4136 SDValue Elements = Int.getOperand(2); 4137 SDValue Size = CurDAG->getTargetConstant(Int.getConstantOperandVal(3), 4138 dl, MVT::i32); 4139 4140 SDValue Args[] = { Elements, Size, Int.getOperand(0) }; 4141 SDNode *LoopDec = 4142 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 4143 CurDAG->getVTList(MVT::i32, MVT::Other), 4144 Args); 4145 ReplaceUses(Int.getNode(), LoopDec); 4146 4147 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain }; 4148 SDNode *LoopEnd = 4149 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs); 4150 4151 ReplaceUses(N, LoopEnd); 4152 CurDAG->RemoveDeadNode(N); 4153 CurDAG->RemoveDeadNode(Flags.getNode()); 4154 CurDAG->RemoveDeadNode(Int.getNode()); 4155 return; 4156 } 4157 } 4158 4159 bool SwitchEQNEToPLMI; 4160 SelectCMPZ(Flags.getNode(), SwitchEQNEToPLMI); 4161 Flags = N->getOperand(3); 4162 4163 if (SwitchEQNEToPLMI) { 4164 switch ((ARMCC::CondCodes)CC) { 4165 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 4166 case ARMCC::NE: 4167 CC = (unsigned)ARMCC::MI; 4168 break; 4169 case ARMCC::EQ: 4170 CC = (unsigned)ARMCC::PL; 4171 break; 4172 } 4173 } 4174 } 4175 4176 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32); 4177 Chain = CurDAG->getCopyToReg(Chain, dl, ARM::CPSR, Flags, SDValue()); 4178 SDValue Ops[] = {N1, Tmp2, CurDAG->getRegister(ARM::CPSR, MVT::i32), Chain, 4179 Chain.getValue(1)}; 4180 CurDAG->SelectNodeTo(N, Opc, MVT::Other, Ops); 4181 return; 4182 } 4183 4184 case ARMISD::CMPZ: { 4185 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0) 4186 // This allows us to avoid materializing the expensive negative constant. 4187 // The CMPZ #0 is useless and will be peepholed away but we need to keep 4188 // it for its flags output. 4189 SDValue X = N->getOperand(0); 4190 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode()); 4191 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) { 4192 int64_t Addend = -C->getSExtValue(); 4193 4194 SDNode *Add = nullptr; 4195 // ADDS can be better than CMN if the immediate fits in a 4196 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3. 4197 // Outside that range we can just use a CMN which is 32-bit but has a 4198 // 12-bit immediate range. 4199 if (Addend < 1<<8) { 4200 if (Subtarget->isThumb2()) { 4201 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32), 4202 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 4203 CurDAG->getRegister(0, MVT::i32) }; 4204 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops); 4205 } else { 4206 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8; 4207 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, 4208 CurDAG->getTargetConstant(Addend, dl, MVT::i32), 4209 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 4210 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 4211 } 4212 } 4213 if (Add) { 4214 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)}; 4215 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, N->getVTList(), Ops2); 4216 } 4217 } 4218 // Other cases are autogenerated. 4219 break; 4220 } 4221 4222 case ARMISD::CMOV: { 4223 SDValue Flags = N->getOperand(3); 4224 4225 if (Flags.getOpcode() == ARMISD::CMPZ) { 4226 bool SwitchEQNEToPLMI; 4227 SelectCMPZ(Flags.getNode(), SwitchEQNEToPLMI); 4228 4229 if (SwitchEQNEToPLMI) { 4230 SDValue ARMcc = N->getOperand(2); 4231 ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc->getAsZExtVal(); 4232 4233 switch (CC) { 4234 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 4235 case ARMCC::NE: 4236 CC = ARMCC::MI; 4237 break; 4238 case ARMCC::EQ: 4239 CC = ARMCC::PL; 4240 break; 4241 } 4242 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32); 4243 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc, 4244 N->getOperand(3)}; 4245 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops); 4246 } 4247 } 4248 // Other cases are autogenerated. 4249 break; 4250 } 4251 case ARMISD::VZIP: { 4252 EVT VT = N->getValueType(0); 4253 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 4254 unsigned Opc64[] = {ARM::VZIPd8, ARM::VZIPd16, ARM::VTRNd32}; 4255 unsigned Opc128[] = {ARM::VZIPq8, ARM::VZIPq16, ARM::VZIPq32}; 4256 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128); 4257 SDValue Pred = getAL(CurDAG, dl); 4258 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4259 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg}; 4260 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4261 return; 4262 } 4263 case ARMISD::VUZP: { 4264 EVT VT = N->getValueType(0); 4265 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 4266 unsigned Opc64[] = {ARM::VUZPd8, ARM::VUZPd16, ARM::VTRNd32}; 4267 unsigned Opc128[] = {ARM::VUZPq8, ARM::VUZPq16, ARM::VUZPq32}; 4268 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128); 4269 SDValue Pred = getAL(CurDAG, dl); 4270 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4271 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg}; 4272 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4273 return; 4274 } 4275 case ARMISD::VTRN: { 4276 EVT VT = N->getValueType(0); 4277 unsigned Opc64[] = {ARM::VTRNd8, ARM::VTRNd16, ARM::VTRNd32}; 4278 unsigned Opc128[] = {ARM::VTRNq8, ARM::VTRNq16, ARM::VTRNq32}; 4279 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128); 4280 SDValue Pred = getAL(CurDAG, dl); 4281 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4282 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg}; 4283 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4284 return; 4285 } 4286 case ARMISD::BUILD_VECTOR: { 4287 EVT VecVT = N->getValueType(0); 4288 EVT EltVT = VecVT.getVectorElementType(); 4289 unsigned NumElts = VecVT.getVectorNumElements(); 4290 if (EltVT == MVT::f64) { 4291 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); 4292 ReplaceNode( 4293 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 4294 return; 4295 } 4296 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); 4297 if (NumElts == 2) { 4298 ReplaceNode( 4299 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 4300 return; 4301 } 4302 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); 4303 ReplaceNode(N, 4304 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), 4305 N->getOperand(2), N->getOperand(3))); 4306 return; 4307 } 4308 4309 case ARMISD::VLD1DUP: { 4310 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16, 4311 ARM::VLD1DUPd32 }; 4312 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16, 4313 ARM::VLD1DUPq32 }; 4314 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes); 4315 return; 4316 } 4317 4318 case ARMISD::VLD2DUP: { 4319 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4320 ARM::VLD2DUPd32 }; 4321 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes); 4322 return; 4323 } 4324 4325 case ARMISD::VLD3DUP: { 4326 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo, 4327 ARM::VLD3DUPd16Pseudo, 4328 ARM::VLD3DUPd32Pseudo }; 4329 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes); 4330 return; 4331 } 4332 4333 case ARMISD::VLD4DUP: { 4334 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo, 4335 ARM::VLD4DUPd16Pseudo, 4336 ARM::VLD4DUPd32Pseudo }; 4337 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes); 4338 return; 4339 } 4340 4341 case ARMISD::VLD1DUP_UPD: { 4342 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed, 4343 ARM::VLD1DUPd16wb_fixed, 4344 ARM::VLD1DUPd32wb_fixed }; 4345 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed, 4346 ARM::VLD1DUPq16wb_fixed, 4347 ARM::VLD1DUPq32wb_fixed }; 4348 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes); 4349 return; 4350 } 4351 4352 case ARMISD::VLD2DUP_UPD: { 4353 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed, 4354 ARM::VLD2DUPd16wb_fixed, 4355 ARM::VLD2DUPd32wb_fixed, 4356 ARM::VLD1q64wb_fixed }; 4357 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, 4358 ARM::VLD2DUPq16EvenPseudo, 4359 ARM::VLD2DUPq32EvenPseudo }; 4360 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed, 4361 ARM::VLD2DUPq16OddPseudoWB_fixed, 4362 ARM::VLD2DUPq32OddPseudoWB_fixed }; 4363 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, DOpcodes, QOpcodes0, QOpcodes1); 4364 return; 4365 } 4366 4367 case ARMISD::VLD3DUP_UPD: { 4368 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, 4369 ARM::VLD3DUPd16Pseudo_UPD, 4370 ARM::VLD3DUPd32Pseudo_UPD, 4371 ARM::VLD1d64TPseudoWB_fixed }; 4372 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, 4373 ARM::VLD3DUPq16EvenPseudo, 4374 ARM::VLD3DUPq32EvenPseudo }; 4375 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD, 4376 ARM::VLD3DUPq16OddPseudo_UPD, 4377 ARM::VLD3DUPq32OddPseudo_UPD }; 4378 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4379 return; 4380 } 4381 4382 case ARMISD::VLD4DUP_UPD: { 4383 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, 4384 ARM::VLD4DUPd16Pseudo_UPD, 4385 ARM::VLD4DUPd32Pseudo_UPD, 4386 ARM::VLD1d64QPseudoWB_fixed }; 4387 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, 4388 ARM::VLD4DUPq16EvenPseudo, 4389 ARM::VLD4DUPq32EvenPseudo }; 4390 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD, 4391 ARM::VLD4DUPq16OddPseudo_UPD, 4392 ARM::VLD4DUPq32OddPseudo_UPD }; 4393 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4394 return; 4395 } 4396 4397 case ARMISD::VLD1_UPD: { 4398 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed, 4399 ARM::VLD1d16wb_fixed, 4400 ARM::VLD1d32wb_fixed, 4401 ARM::VLD1d64wb_fixed }; 4402 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed, 4403 ARM::VLD1q16wb_fixed, 4404 ARM::VLD1q32wb_fixed, 4405 ARM::VLD1q64wb_fixed }; 4406 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr); 4407 return; 4408 } 4409 4410 case ARMISD::VLD2_UPD: { 4411 if (Subtarget->hasNEON()) { 4412 static const uint16_t DOpcodes[] = { 4413 ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed, 4414 ARM::VLD1q64wb_fixed}; 4415 static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed, 4416 ARM::VLD2q16PseudoWB_fixed, 4417 ARM::VLD2q32PseudoWB_fixed}; 4418 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 4419 } else { 4420 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, 4421 ARM::MVE_VLD21_8_wb}; 4422 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 4423 ARM::MVE_VLD21_16_wb}; 4424 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 4425 ARM::MVE_VLD21_32_wb}; 4426 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4427 SelectMVE_VLD(N, 2, Opcodes, true); 4428 } 4429 return; 4430 } 4431 4432 case ARMISD::VLD3_UPD: { 4433 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, 4434 ARM::VLD3d16Pseudo_UPD, 4435 ARM::VLD3d32Pseudo_UPD, 4436 ARM::VLD1d64TPseudoWB_fixed}; 4437 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4438 ARM::VLD3q16Pseudo_UPD, 4439 ARM::VLD3q32Pseudo_UPD }; 4440 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, 4441 ARM::VLD3q16oddPseudo_UPD, 4442 ARM::VLD3q32oddPseudo_UPD }; 4443 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4444 return; 4445 } 4446 4447 case ARMISD::VLD4_UPD: { 4448 if (Subtarget->hasNEON()) { 4449 static const uint16_t DOpcodes[] = { 4450 ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD, 4451 ARM::VLD1d64QPseudoWB_fixed}; 4452 static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD, 4453 ARM::VLD4q16Pseudo_UPD, 4454 ARM::VLD4q32Pseudo_UPD}; 4455 static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD, 4456 ARM::VLD4q16oddPseudo_UPD, 4457 ARM::VLD4q32oddPseudo_UPD}; 4458 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4459 } else { 4460 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 4461 ARM::MVE_VLD42_8, 4462 ARM::MVE_VLD43_8_wb}; 4463 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 4464 ARM::MVE_VLD42_16, 4465 ARM::MVE_VLD43_16_wb}; 4466 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 4467 ARM::MVE_VLD42_32, 4468 ARM::MVE_VLD43_32_wb}; 4469 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4470 SelectMVE_VLD(N, 4, Opcodes, true); 4471 } 4472 return; 4473 } 4474 4475 case ARMISD::VLD1x2_UPD: { 4476 if (Subtarget->hasNEON()) { 4477 static const uint16_t DOpcodes[] = { 4478 ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed, 4479 ARM::VLD1q64wb_fixed}; 4480 static const uint16_t QOpcodes[] = { 4481 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed, 4482 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed}; 4483 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 4484 return; 4485 } 4486 break; 4487 } 4488 4489 case ARMISD::VLD1x3_UPD: { 4490 if (Subtarget->hasNEON()) { 4491 static const uint16_t DOpcodes[] = { 4492 ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed, 4493 ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed}; 4494 static const uint16_t QOpcodes0[] = { 4495 ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD, 4496 ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD}; 4497 static const uint16_t QOpcodes1[] = { 4498 ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD, 4499 ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD}; 4500 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4501 return; 4502 } 4503 break; 4504 } 4505 4506 case ARMISD::VLD1x4_UPD: { 4507 if (Subtarget->hasNEON()) { 4508 static const uint16_t DOpcodes[] = { 4509 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed, 4510 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed}; 4511 static const uint16_t QOpcodes0[] = { 4512 ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD, 4513 ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD}; 4514 static const uint16_t QOpcodes1[] = { 4515 ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD, 4516 ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD}; 4517 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4518 return; 4519 } 4520 break; 4521 } 4522 4523 case ARMISD::VLD2LN_UPD: { 4524 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, 4525 ARM::VLD2LNd16Pseudo_UPD, 4526 ARM::VLD2LNd32Pseudo_UPD }; 4527 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD, 4528 ARM::VLD2LNq32Pseudo_UPD }; 4529 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes); 4530 return; 4531 } 4532 4533 case ARMISD::VLD3LN_UPD: { 4534 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, 4535 ARM::VLD3LNd16Pseudo_UPD, 4536 ARM::VLD3LNd32Pseudo_UPD }; 4537 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD, 4538 ARM::VLD3LNq32Pseudo_UPD }; 4539 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes); 4540 return; 4541 } 4542 4543 case ARMISD::VLD4LN_UPD: { 4544 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, 4545 ARM::VLD4LNd16Pseudo_UPD, 4546 ARM::VLD4LNd32Pseudo_UPD }; 4547 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD, 4548 ARM::VLD4LNq32Pseudo_UPD }; 4549 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes); 4550 return; 4551 } 4552 4553 case ARMISD::VST1_UPD: { 4554 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed, 4555 ARM::VST1d16wb_fixed, 4556 ARM::VST1d32wb_fixed, 4557 ARM::VST1d64wb_fixed }; 4558 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed, 4559 ARM::VST1q16wb_fixed, 4560 ARM::VST1q32wb_fixed, 4561 ARM::VST1q64wb_fixed }; 4562 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr); 4563 return; 4564 } 4565 4566 case ARMISD::VST2_UPD: { 4567 if (Subtarget->hasNEON()) { 4568 static const uint16_t DOpcodes[] = { 4569 ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed, 4570 ARM::VST1q64wb_fixed}; 4571 static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed, 4572 ARM::VST2q16PseudoWB_fixed, 4573 ARM::VST2q32PseudoWB_fixed}; 4574 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 4575 return; 4576 } 4577 break; 4578 } 4579 4580 case ARMISD::VST3_UPD: { 4581 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD, 4582 ARM::VST3d16Pseudo_UPD, 4583 ARM::VST3d32Pseudo_UPD, 4584 ARM::VST1d64TPseudoWB_fixed}; 4585 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 4586 ARM::VST3q16Pseudo_UPD, 4587 ARM::VST3q32Pseudo_UPD }; 4588 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, 4589 ARM::VST3q16oddPseudo_UPD, 4590 ARM::VST3q32oddPseudo_UPD }; 4591 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4592 return; 4593 } 4594 4595 case ARMISD::VST4_UPD: { 4596 if (Subtarget->hasNEON()) { 4597 static const uint16_t DOpcodes[] = { 4598 ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD, 4599 ARM::VST1d64QPseudoWB_fixed}; 4600 static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD, 4601 ARM::VST4q16Pseudo_UPD, 4602 ARM::VST4q32Pseudo_UPD}; 4603 static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD, 4604 ARM::VST4q16oddPseudo_UPD, 4605 ARM::VST4q32oddPseudo_UPD}; 4606 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4607 return; 4608 } 4609 break; 4610 } 4611 4612 case ARMISD::VST1x2_UPD: { 4613 if (Subtarget->hasNEON()) { 4614 static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed, 4615 ARM::VST1q16wb_fixed, 4616 ARM::VST1q32wb_fixed, 4617 ARM::VST1q64wb_fixed}; 4618 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed, 4619 ARM::VST1d16QPseudoWB_fixed, 4620 ARM::VST1d32QPseudoWB_fixed, 4621 ARM::VST1d64QPseudoWB_fixed }; 4622 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 4623 return; 4624 } 4625 break; 4626 } 4627 4628 case ARMISD::VST1x3_UPD: { 4629 if (Subtarget->hasNEON()) { 4630 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed, 4631 ARM::VST1d16TPseudoWB_fixed, 4632 ARM::VST1d32TPseudoWB_fixed, 4633 ARM::VST1d64TPseudoWB_fixed }; 4634 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 4635 ARM::VST1q16LowTPseudo_UPD, 4636 ARM::VST1q32LowTPseudo_UPD, 4637 ARM::VST1q64LowTPseudo_UPD }; 4638 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD, 4639 ARM::VST1q16HighTPseudo_UPD, 4640 ARM::VST1q32HighTPseudo_UPD, 4641 ARM::VST1q64HighTPseudo_UPD }; 4642 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4643 return; 4644 } 4645 break; 4646 } 4647 4648 case ARMISD::VST1x4_UPD: { 4649 if (Subtarget->hasNEON()) { 4650 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed, 4651 ARM::VST1d16QPseudoWB_fixed, 4652 ARM::VST1d32QPseudoWB_fixed, 4653 ARM::VST1d64QPseudoWB_fixed }; 4654 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 4655 ARM::VST1q16LowQPseudo_UPD, 4656 ARM::VST1q32LowQPseudo_UPD, 4657 ARM::VST1q64LowQPseudo_UPD }; 4658 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD, 4659 ARM::VST1q16HighQPseudo_UPD, 4660 ARM::VST1q32HighQPseudo_UPD, 4661 ARM::VST1q64HighQPseudo_UPD }; 4662 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4663 return; 4664 } 4665 break; 4666 } 4667 case ARMISD::VST2LN_UPD: { 4668 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, 4669 ARM::VST2LNd16Pseudo_UPD, 4670 ARM::VST2LNd32Pseudo_UPD }; 4671 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD, 4672 ARM::VST2LNq32Pseudo_UPD }; 4673 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes); 4674 return; 4675 } 4676 4677 case ARMISD::VST3LN_UPD: { 4678 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, 4679 ARM::VST3LNd16Pseudo_UPD, 4680 ARM::VST3LNd32Pseudo_UPD }; 4681 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD, 4682 ARM::VST3LNq32Pseudo_UPD }; 4683 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes); 4684 return; 4685 } 4686 4687 case ARMISD::VST4LN_UPD: { 4688 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, 4689 ARM::VST4LNd16Pseudo_UPD, 4690 ARM::VST4LNd32Pseudo_UPD }; 4691 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD, 4692 ARM::VST4LNq32Pseudo_UPD }; 4693 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes); 4694 return; 4695 } 4696 4697 case ISD::INTRINSIC_VOID: 4698 case ISD::INTRINSIC_W_CHAIN: { 4699 unsigned IntNo = N->getConstantOperandVal(1); 4700 switch (IntNo) { 4701 default: 4702 break; 4703 4704 case Intrinsic::arm_mrrc: 4705 case Intrinsic::arm_mrrc2: { 4706 SDLoc dl(N); 4707 SDValue Chain = N->getOperand(0); 4708 unsigned Opc; 4709 4710 if (Subtarget->isThumb()) 4711 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2); 4712 else 4713 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2); 4714 4715 SmallVector<SDValue, 5> Ops; 4716 Ops.push_back(getI32Imm(N->getConstantOperandVal(2), dl)); /* coproc */ 4717 Ops.push_back(getI32Imm(N->getConstantOperandVal(3), dl)); /* opc */ 4718 Ops.push_back(getI32Imm(N->getConstantOperandVal(4), dl)); /* CRm */ 4719 4720 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded 4721 // instruction will always be '1111' but it is possible in assembly language to specify 4722 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction. 4723 if (Opc != ARM::MRRC2) { 4724 Ops.push_back(getAL(CurDAG, dl)); 4725 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4726 } 4727 4728 Ops.push_back(Chain); 4729 4730 // Writes to two registers. 4731 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other}; 4732 4733 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops)); 4734 return; 4735 } 4736 case Intrinsic::arm_ldaexd: 4737 case Intrinsic::arm_ldrexd: { 4738 SDLoc dl(N); 4739 SDValue Chain = N->getOperand(0); 4740 SDValue MemAddr = N->getOperand(2); 4741 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps(); 4742 4743 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd; 4744 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD) 4745 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD); 4746 4747 // arm_ldrexd returns a i64 value in {i32, i32} 4748 std::vector<EVT> ResTys; 4749 if (isThumb) { 4750 ResTys.push_back(MVT::i32); 4751 ResTys.push_back(MVT::i32); 4752 } else 4753 ResTys.push_back(MVT::Untyped); 4754 ResTys.push_back(MVT::Other); 4755 4756 // Place arguments in the right order. 4757 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl), 4758 CurDAG->getRegister(0, MVT::i32), Chain}; 4759 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4760 // Transfer memoperands. 4761 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4762 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 4763 4764 // Remap uses. 4765 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); 4766 if (!SDValue(N, 0).use_empty()) { 4767 SDValue Result; 4768 if (isThumb) 4769 Result = SDValue(Ld, 0); 4770 else { 4771 SDValue SubRegIdx = 4772 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 4773 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4774 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4775 Result = SDValue(ResNode,0); 4776 } 4777 ReplaceUses(SDValue(N, 0), Result); 4778 } 4779 if (!SDValue(N, 1).use_empty()) { 4780 SDValue Result; 4781 if (isThumb) 4782 Result = SDValue(Ld, 1); 4783 else { 4784 SDValue SubRegIdx = 4785 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 4786 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4787 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4788 Result = SDValue(ResNode,0); 4789 } 4790 ReplaceUses(SDValue(N, 1), Result); 4791 } 4792 ReplaceUses(SDValue(N, 2), OutChain); 4793 CurDAG->RemoveDeadNode(N); 4794 return; 4795 } 4796 case Intrinsic::arm_stlexd: 4797 case Intrinsic::arm_strexd: { 4798 SDLoc dl(N); 4799 SDValue Chain = N->getOperand(0); 4800 SDValue Val0 = N->getOperand(2); 4801 SDValue Val1 = N->getOperand(3); 4802 SDValue MemAddr = N->getOperand(4); 4803 4804 // Store exclusive double return a i32 value which is the return status 4805 // of the issued store. 4806 const EVT ResTys[] = {MVT::i32, MVT::Other}; 4807 4808 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); 4809 // Place arguments in the right order. 4810 SmallVector<SDValue, 7> Ops; 4811 if (isThumb) { 4812 Ops.push_back(Val0); 4813 Ops.push_back(Val1); 4814 } else 4815 // arm_strexd uses GPRPair. 4816 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0)); 4817 Ops.push_back(MemAddr); 4818 Ops.push_back(getAL(CurDAG, dl)); 4819 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4820 Ops.push_back(Chain); 4821 4822 bool IsRelease = IntNo == Intrinsic::arm_stlexd; 4823 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD) 4824 : (IsRelease ? ARM::STLEXD : ARM::STREXD); 4825 4826 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4827 // Transfer memoperands. 4828 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4829 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 4830 4831 ReplaceNode(N, St); 4832 return; 4833 } 4834 4835 case Intrinsic::arm_neon_vld1: { 4836 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, 4837 ARM::VLD1d32, ARM::VLD1d64 }; 4838 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4839 ARM::VLD1q32, ARM::VLD1q64}; 4840 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr); 4841 return; 4842 } 4843 4844 case Intrinsic::arm_neon_vld1x2: { 4845 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4846 ARM::VLD1q32, ARM::VLD1q64 }; 4847 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo, 4848 ARM::VLD1d16QPseudo, 4849 ARM::VLD1d32QPseudo, 4850 ARM::VLD1d64QPseudo }; 4851 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4852 return; 4853 } 4854 4855 case Intrinsic::arm_neon_vld1x3: { 4856 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo, 4857 ARM::VLD1d16TPseudo, 4858 ARM::VLD1d32TPseudo, 4859 ARM::VLD1d64TPseudo }; 4860 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD, 4861 ARM::VLD1q16LowTPseudo_UPD, 4862 ARM::VLD1q32LowTPseudo_UPD, 4863 ARM::VLD1q64LowTPseudo_UPD }; 4864 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo, 4865 ARM::VLD1q16HighTPseudo, 4866 ARM::VLD1q32HighTPseudo, 4867 ARM::VLD1q64HighTPseudo }; 4868 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4869 return; 4870 } 4871 4872 case Intrinsic::arm_neon_vld1x4: { 4873 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo, 4874 ARM::VLD1d16QPseudo, 4875 ARM::VLD1d32QPseudo, 4876 ARM::VLD1d64QPseudo }; 4877 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD, 4878 ARM::VLD1q16LowQPseudo_UPD, 4879 ARM::VLD1q32LowQPseudo_UPD, 4880 ARM::VLD1q64LowQPseudo_UPD }; 4881 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo, 4882 ARM::VLD1q16HighQPseudo, 4883 ARM::VLD1q32HighQPseudo, 4884 ARM::VLD1q64HighQPseudo }; 4885 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4886 return; 4887 } 4888 4889 case Intrinsic::arm_neon_vld2: { 4890 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, 4891 ARM::VLD2d32, ARM::VLD1q64 }; 4892 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, 4893 ARM::VLD2q32Pseudo }; 4894 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4895 return; 4896 } 4897 4898 case Intrinsic::arm_neon_vld3: { 4899 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo, 4900 ARM::VLD3d16Pseudo, 4901 ARM::VLD3d32Pseudo, 4902 ARM::VLD1d64TPseudo }; 4903 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4904 ARM::VLD3q16Pseudo_UPD, 4905 ARM::VLD3q32Pseudo_UPD }; 4906 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo, 4907 ARM::VLD3q16oddPseudo, 4908 ARM::VLD3q32oddPseudo }; 4909 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4910 return; 4911 } 4912 4913 case Intrinsic::arm_neon_vld4: { 4914 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo, 4915 ARM::VLD4d16Pseudo, 4916 ARM::VLD4d32Pseudo, 4917 ARM::VLD1d64QPseudo }; 4918 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 4919 ARM::VLD4q16Pseudo_UPD, 4920 ARM::VLD4q32Pseudo_UPD }; 4921 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo, 4922 ARM::VLD4q16oddPseudo, 4923 ARM::VLD4q32oddPseudo }; 4924 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4925 return; 4926 } 4927 4928 case Intrinsic::arm_neon_vld2dup: { 4929 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4930 ARM::VLD2DUPd32, ARM::VLD1q64 }; 4931 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, 4932 ARM::VLD2DUPq16EvenPseudo, 4933 ARM::VLD2DUPq32EvenPseudo }; 4934 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo, 4935 ARM::VLD2DUPq16OddPseudo, 4936 ARM::VLD2DUPq32OddPseudo }; 4937 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2, 4938 DOpcodes, QOpcodes0, QOpcodes1); 4939 return; 4940 } 4941 4942 case Intrinsic::arm_neon_vld3dup: { 4943 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo, 4944 ARM::VLD3DUPd16Pseudo, 4945 ARM::VLD3DUPd32Pseudo, 4946 ARM::VLD1d64TPseudo }; 4947 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, 4948 ARM::VLD3DUPq16EvenPseudo, 4949 ARM::VLD3DUPq32EvenPseudo }; 4950 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo, 4951 ARM::VLD3DUPq16OddPseudo, 4952 ARM::VLD3DUPq32OddPseudo }; 4953 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3, 4954 DOpcodes, QOpcodes0, QOpcodes1); 4955 return; 4956 } 4957 4958 case Intrinsic::arm_neon_vld4dup: { 4959 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo, 4960 ARM::VLD4DUPd16Pseudo, 4961 ARM::VLD4DUPd32Pseudo, 4962 ARM::VLD1d64QPseudo }; 4963 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, 4964 ARM::VLD4DUPq16EvenPseudo, 4965 ARM::VLD4DUPq32EvenPseudo }; 4966 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo, 4967 ARM::VLD4DUPq16OddPseudo, 4968 ARM::VLD4DUPq32OddPseudo }; 4969 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4, 4970 DOpcodes, QOpcodes0, QOpcodes1); 4971 return; 4972 } 4973 4974 case Intrinsic::arm_neon_vld2lane: { 4975 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo, 4976 ARM::VLD2LNd16Pseudo, 4977 ARM::VLD2LNd32Pseudo }; 4978 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo, 4979 ARM::VLD2LNq32Pseudo }; 4980 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes); 4981 return; 4982 } 4983 4984 case Intrinsic::arm_neon_vld3lane: { 4985 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo, 4986 ARM::VLD3LNd16Pseudo, 4987 ARM::VLD3LNd32Pseudo }; 4988 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo, 4989 ARM::VLD3LNq32Pseudo }; 4990 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes); 4991 return; 4992 } 4993 4994 case Intrinsic::arm_neon_vld4lane: { 4995 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo, 4996 ARM::VLD4LNd16Pseudo, 4997 ARM::VLD4LNd32Pseudo }; 4998 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo, 4999 ARM::VLD4LNq32Pseudo }; 5000 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes); 5001 return; 5002 } 5003 5004 case Intrinsic::arm_neon_vst1: { 5005 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, 5006 ARM::VST1d32, ARM::VST1d64 }; 5007 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 5008 ARM::VST1q32, ARM::VST1q64 }; 5009 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr); 5010 return; 5011 } 5012 5013 case Intrinsic::arm_neon_vst1x2: { 5014 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 5015 ARM::VST1q32, ARM::VST1q64 }; 5016 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo, 5017 ARM::VST1d16QPseudo, 5018 ARM::VST1d32QPseudo, 5019 ARM::VST1d64QPseudo }; 5020 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 5021 return; 5022 } 5023 5024 case Intrinsic::arm_neon_vst1x3: { 5025 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo, 5026 ARM::VST1d16TPseudo, 5027 ARM::VST1d32TPseudo, 5028 ARM::VST1d64TPseudo }; 5029 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 5030 ARM::VST1q16LowTPseudo_UPD, 5031 ARM::VST1q32LowTPseudo_UPD, 5032 ARM::VST1q64LowTPseudo_UPD }; 5033 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo, 5034 ARM::VST1q16HighTPseudo, 5035 ARM::VST1q32HighTPseudo, 5036 ARM::VST1q64HighTPseudo }; 5037 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 5038 return; 5039 } 5040 5041 case Intrinsic::arm_neon_vst1x4: { 5042 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo, 5043 ARM::VST1d16QPseudo, 5044 ARM::VST1d32QPseudo, 5045 ARM::VST1d64QPseudo }; 5046 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 5047 ARM::VST1q16LowQPseudo_UPD, 5048 ARM::VST1q32LowQPseudo_UPD, 5049 ARM::VST1q64LowQPseudo_UPD }; 5050 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo, 5051 ARM::VST1q16HighQPseudo, 5052 ARM::VST1q32HighQPseudo, 5053 ARM::VST1q64HighQPseudo }; 5054 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 5055 return; 5056 } 5057 5058 case Intrinsic::arm_neon_vst2: { 5059 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, 5060 ARM::VST2d32, ARM::VST1q64 }; 5061 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, 5062 ARM::VST2q32Pseudo }; 5063 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 5064 return; 5065 } 5066 5067 case Intrinsic::arm_neon_vst3: { 5068 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo, 5069 ARM::VST3d16Pseudo, 5070 ARM::VST3d32Pseudo, 5071 ARM::VST1d64TPseudo }; 5072 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 5073 ARM::VST3q16Pseudo_UPD, 5074 ARM::VST3q32Pseudo_UPD }; 5075 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo, 5076 ARM::VST3q16oddPseudo, 5077 ARM::VST3q32oddPseudo }; 5078 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 5079 return; 5080 } 5081 5082 case Intrinsic::arm_neon_vst4: { 5083 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo, 5084 ARM::VST4d16Pseudo, 5085 ARM::VST4d32Pseudo, 5086 ARM::VST1d64QPseudo }; 5087 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 5088 ARM::VST4q16Pseudo_UPD, 5089 ARM::VST4q32Pseudo_UPD }; 5090 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo, 5091 ARM::VST4q16oddPseudo, 5092 ARM::VST4q32oddPseudo }; 5093 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 5094 return; 5095 } 5096 5097 case Intrinsic::arm_neon_vst2lane: { 5098 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo, 5099 ARM::VST2LNd16Pseudo, 5100 ARM::VST2LNd32Pseudo }; 5101 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo, 5102 ARM::VST2LNq32Pseudo }; 5103 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes); 5104 return; 5105 } 5106 5107 case Intrinsic::arm_neon_vst3lane: { 5108 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo, 5109 ARM::VST3LNd16Pseudo, 5110 ARM::VST3LNd32Pseudo }; 5111 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo, 5112 ARM::VST3LNq32Pseudo }; 5113 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes); 5114 return; 5115 } 5116 5117 case Intrinsic::arm_neon_vst4lane: { 5118 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo, 5119 ARM::VST4LNd16Pseudo, 5120 ARM::VST4LNd32Pseudo }; 5121 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo, 5122 ARM::VST4LNq32Pseudo }; 5123 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes); 5124 return; 5125 } 5126 5127 case Intrinsic::arm_mve_vldr_gather_base_wb: 5128 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: { 5129 static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre, 5130 ARM::MVE_VLDRDU64_qi_pre}; 5131 SelectMVE_WB(N, Opcodes, 5132 IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated); 5133 return; 5134 } 5135 5136 case Intrinsic::arm_mve_vld2q: { 5137 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8}; 5138 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 5139 ARM::MVE_VLD21_16}; 5140 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 5141 ARM::MVE_VLD21_32}; 5142 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 5143 SelectMVE_VLD(N, 2, Opcodes, false); 5144 return; 5145 } 5146 5147 case Intrinsic::arm_mve_vld4q: { 5148 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 5149 ARM::MVE_VLD42_8, ARM::MVE_VLD43_8}; 5150 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 5151 ARM::MVE_VLD42_16, 5152 ARM::MVE_VLD43_16}; 5153 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 5154 ARM::MVE_VLD42_32, 5155 ARM::MVE_VLD43_32}; 5156 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 5157 SelectMVE_VLD(N, 4, Opcodes, false); 5158 return; 5159 } 5160 } 5161 break; 5162 } 5163 5164 case ISD::INTRINSIC_WO_CHAIN: { 5165 unsigned IntNo = N->getConstantOperandVal(0); 5166 switch (IntNo) { 5167 default: 5168 break; 5169 5170 // Scalar f32 -> bf16 5171 case Intrinsic::arm_neon_vcvtbfp2bf: { 5172 SDLoc dl(N); 5173 const SDValue &Src = N->getOperand(1); 5174 llvm::EVT DestTy = N->getValueType(0); 5175 SDValue Pred = getAL(CurDAG, dl); 5176 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 5177 SDValue Ops[] = { Src, Src, Pred, Reg0 }; 5178 CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops); 5179 return; 5180 } 5181 5182 // Vector v4f32 -> v4bf16 5183 case Intrinsic::arm_neon_vcvtfp2bf: { 5184 SDLoc dl(N); 5185 const SDValue &Src = N->getOperand(1); 5186 SDValue Pred = getAL(CurDAG, dl); 5187 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 5188 SDValue Ops[] = { Src, Pred, Reg0 }; 5189 CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops); 5190 return; 5191 } 5192 5193 case Intrinsic::arm_mve_urshrl: 5194 SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false); 5195 return; 5196 case Intrinsic::arm_mve_uqshll: 5197 SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false); 5198 return; 5199 case Intrinsic::arm_mve_srshrl: 5200 SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false); 5201 return; 5202 case Intrinsic::arm_mve_sqshll: 5203 SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false); 5204 return; 5205 case Intrinsic::arm_mve_uqrshll: 5206 SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true); 5207 return; 5208 case Intrinsic::arm_mve_sqrshrl: 5209 SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true); 5210 return; 5211 5212 case Intrinsic::arm_mve_vadc: 5213 case Intrinsic::arm_mve_vadc_predicated: 5214 SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true, 5215 IntNo == Intrinsic::arm_mve_vadc_predicated); 5216 return; 5217 case Intrinsic::arm_mve_vsbc: 5218 case Intrinsic::arm_mve_vsbc_predicated: 5219 SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, false, 5220 IntNo == Intrinsic::arm_mve_vsbc_predicated); 5221 return; 5222 case Intrinsic::arm_mve_vshlc: 5223 case Intrinsic::arm_mve_vshlc_predicated: 5224 SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated); 5225 return; 5226 5227 case Intrinsic::arm_mve_vmlldava: 5228 case Intrinsic::arm_mve_vmlldava_predicated: { 5229 static const uint16_t OpcodesU[] = { 5230 ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32, 5231 ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32, 5232 }; 5233 static const uint16_t OpcodesS[] = { 5234 ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32, 5235 ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32, 5236 ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32, 5237 ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32, 5238 ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32, 5239 ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32, 5240 ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32, 5241 ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32, 5242 }; 5243 SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated, 5244 OpcodesS, OpcodesU); 5245 return; 5246 } 5247 5248 case Intrinsic::arm_mve_vrmlldavha: 5249 case Intrinsic::arm_mve_vrmlldavha_predicated: { 5250 static const uint16_t OpcodesU[] = { 5251 ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32, 5252 }; 5253 static const uint16_t OpcodesS[] = { 5254 ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32, 5255 ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32, 5256 ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32, 5257 ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32, 5258 }; 5259 SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated, 5260 OpcodesS, OpcodesU); 5261 return; 5262 } 5263 5264 case Intrinsic::arm_mve_vidup: 5265 case Intrinsic::arm_mve_vidup_predicated: { 5266 static const uint16_t Opcodes[] = { 5267 ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32, 5268 }; 5269 SelectMVE_VxDUP(N, Opcodes, false, 5270 IntNo == Intrinsic::arm_mve_vidup_predicated); 5271 return; 5272 } 5273 5274 case Intrinsic::arm_mve_vddup: 5275 case Intrinsic::arm_mve_vddup_predicated: { 5276 static const uint16_t Opcodes[] = { 5277 ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32, 5278 }; 5279 SelectMVE_VxDUP(N, Opcodes, false, 5280 IntNo == Intrinsic::arm_mve_vddup_predicated); 5281 return; 5282 } 5283 5284 case Intrinsic::arm_mve_viwdup: 5285 case Intrinsic::arm_mve_viwdup_predicated: { 5286 static const uint16_t Opcodes[] = { 5287 ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32, 5288 }; 5289 SelectMVE_VxDUP(N, Opcodes, true, 5290 IntNo == Intrinsic::arm_mve_viwdup_predicated); 5291 return; 5292 } 5293 5294 case Intrinsic::arm_mve_vdwdup: 5295 case Intrinsic::arm_mve_vdwdup_predicated: { 5296 static const uint16_t Opcodes[] = { 5297 ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32, 5298 }; 5299 SelectMVE_VxDUP(N, Opcodes, true, 5300 IntNo == Intrinsic::arm_mve_vdwdup_predicated); 5301 return; 5302 } 5303 5304 case Intrinsic::arm_cde_cx1d: 5305 case Intrinsic::arm_cde_cx1da: 5306 case Intrinsic::arm_cde_cx2d: 5307 case Intrinsic::arm_cde_cx2da: 5308 case Intrinsic::arm_cde_cx3d: 5309 case Intrinsic::arm_cde_cx3da: { 5310 bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da || 5311 IntNo == Intrinsic::arm_cde_cx2da || 5312 IntNo == Intrinsic::arm_cde_cx3da; 5313 size_t NumExtraOps; 5314 uint16_t Opcode; 5315 switch (IntNo) { 5316 case Intrinsic::arm_cde_cx1d: 5317 case Intrinsic::arm_cde_cx1da: 5318 NumExtraOps = 0; 5319 Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D; 5320 break; 5321 case Intrinsic::arm_cde_cx2d: 5322 case Intrinsic::arm_cde_cx2da: 5323 NumExtraOps = 1; 5324 Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D; 5325 break; 5326 case Intrinsic::arm_cde_cx3d: 5327 case Intrinsic::arm_cde_cx3da: 5328 NumExtraOps = 2; 5329 Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D; 5330 break; 5331 default: 5332 llvm_unreachable("Unexpected opcode"); 5333 } 5334 SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum); 5335 return; 5336 } 5337 } 5338 break; 5339 } 5340 5341 case ISD::ATOMIC_CMP_SWAP: 5342 SelectCMP_SWAP(N); 5343 return; 5344 } 5345 5346 SelectCode(N); 5347 } 5348 5349 // Inspect a register string of the form 5350 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or 5351 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string 5352 // and obtain the integer operands from them, adding these operands to the 5353 // provided vector. 5354 static void getIntOperandsFromRegisterString(StringRef RegString, 5355 SelectionDAG *CurDAG, 5356 const SDLoc &DL, 5357 std::vector<SDValue> &Ops) { 5358 SmallVector<StringRef, 5> Fields; 5359 RegString.split(Fields, ':'); 5360 5361 if (Fields.size() > 1) { 5362 bool AllIntFields = true; 5363 5364 for (StringRef Field : Fields) { 5365 // Need to trim out leading 'cp' characters and get the integer field. 5366 unsigned IntField; 5367 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField); 5368 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32)); 5369 } 5370 5371 assert(AllIntFields && 5372 "Unexpected non-integer value in special register string."); 5373 (void)AllIntFields; 5374 } 5375 } 5376 5377 // Maps a Banked Register string to its mask value. The mask value returned is 5378 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register 5379 // mask operand, which expresses which register is to be used, e.g. r8, and in 5380 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string 5381 // was invalid. 5382 static inline int getBankedRegisterMask(StringRef RegString) { 5383 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower()); 5384 if (!TheReg) 5385 return -1; 5386 return TheReg->Encoding; 5387 } 5388 5389 // The flags here are common to those allowed for apsr in the A class cores and 5390 // those allowed for the special registers in the M class cores. Returns a 5391 // value representing which flags were present, -1 if invalid. 5392 static inline int getMClassFlagsMask(StringRef Flags) { 5393 return StringSwitch<int>(Flags) 5394 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is 5395 // correct when flags are not permitted 5396 .Case("g", 0x1) 5397 .Case("nzcvq", 0x2) 5398 .Case("nzcvqg", 0x3) 5399 .Default(-1); 5400 } 5401 5402 // Maps MClass special registers string to its value for use in the 5403 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand. 5404 // Returns -1 to signify that the string was invalid. 5405 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) { 5406 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg); 5407 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits(); 5408 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits)) 5409 return -1; 5410 return (int)(TheReg->Encoding & 0xFFF); // SYSm value 5411 } 5412 5413 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { 5414 // The mask operand contains the special register (R Bit) in bit 4, whether 5415 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and 5416 // bits 3-0 contains the fields to be accessed in the special register, set by 5417 // the flags provided with the register. 5418 int Mask = 0; 5419 if (Reg == "apsr") { 5420 // The flags permitted for apsr are the same flags that are allowed in 5421 // M class registers. We get the flag value and then shift the flags into 5422 // the correct place to combine with the mask. 5423 Mask = getMClassFlagsMask(Flags); 5424 if (Mask == -1) 5425 return -1; 5426 return Mask << 2; 5427 } 5428 5429 if (Reg != "cpsr" && Reg != "spsr") { 5430 return -1; 5431 } 5432 5433 // This is the same as if the flags were "fc" 5434 if (Flags.empty() || Flags == "all") 5435 return Mask | 0x9; 5436 5437 // Inspect the supplied flags string and set the bits in the mask for 5438 // the relevant and valid flags allowed for cpsr and spsr. 5439 for (char Flag : Flags) { 5440 int FlagVal; 5441 switch (Flag) { 5442 case 'c': 5443 FlagVal = 0x1; 5444 break; 5445 case 'x': 5446 FlagVal = 0x2; 5447 break; 5448 case 's': 5449 FlagVal = 0x4; 5450 break; 5451 case 'f': 5452 FlagVal = 0x8; 5453 break; 5454 default: 5455 FlagVal = 0; 5456 } 5457 5458 // This avoids allowing strings where the same flag bit appears twice. 5459 if (!FlagVal || (Mask & FlagVal)) 5460 return -1; 5461 Mask |= FlagVal; 5462 } 5463 5464 // If the register is spsr then we need to set the R bit. 5465 if (Reg == "spsr") 5466 Mask |= 0x10; 5467 5468 return Mask; 5469 } 5470 5471 // Lower the read_register intrinsic to ARM specific DAG nodes 5472 // using the supplied metadata string to select the instruction node to use 5473 // and the registers/masks to construct as operands for the node. 5474 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){ 5475 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 5476 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 5477 bool IsThumb2 = Subtarget->isThumb2(); 5478 SDLoc DL(N); 5479 5480 std::vector<SDValue> Ops; 5481 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 5482 5483 if (!Ops.empty()) { 5484 // If the special register string was constructed of fields (as defined 5485 // in the ACLE) then need to lower to MRC node (32 bit) or 5486 // MRRC node(64 bit), we can make the distinction based on the number of 5487 // operands we have. 5488 unsigned Opcode; 5489 SmallVector<EVT, 3> ResTypes; 5490 if (Ops.size() == 5){ 5491 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC; 5492 ResTypes.append({ MVT::i32, MVT::Other }); 5493 } else { 5494 assert(Ops.size() == 3 && 5495 "Invalid number of fields in special register string."); 5496 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC; 5497 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other }); 5498 } 5499 5500 Ops.push_back(getAL(CurDAG, DL)); 5501 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 5502 Ops.push_back(N->getOperand(0)); 5503 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops)); 5504 return true; 5505 } 5506 5507 std::string SpecialReg = RegString->getString().lower(); 5508 5509 int BankedReg = getBankedRegisterMask(SpecialReg); 5510 if (BankedReg != -1) { 5511 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), 5512 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5513 N->getOperand(0) }; 5514 ReplaceNode( 5515 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked, 5516 DL, MVT::i32, MVT::Other, Ops)); 5517 return true; 5518 } 5519 5520 // The VFP registers are read by creating SelectionDAG nodes with opcodes 5521 // corresponding to the register that is being read from. So we switch on the 5522 // string to find which opcode we need to use. 5523 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 5524 .Case("fpscr", ARM::VMRS) 5525 .Case("fpexc", ARM::VMRS_FPEXC) 5526 .Case("fpsid", ARM::VMRS_FPSID) 5527 .Case("mvfr0", ARM::VMRS_MVFR0) 5528 .Case("mvfr1", ARM::VMRS_MVFR1) 5529 .Case("mvfr2", ARM::VMRS_MVFR2) 5530 .Case("fpinst", ARM::VMRS_FPINST) 5531 .Case("fpinst2", ARM::VMRS_FPINST2) 5532 .Default(0); 5533 5534 // If an opcode was found then we can lower the read to a VFP instruction. 5535 if (Opcode) { 5536 if (!Subtarget->hasVFP2Base()) 5537 return false; 5538 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base()) 5539 return false; 5540 5541 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5542 N->getOperand(0) }; 5543 ReplaceNode(N, 5544 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops)); 5545 return true; 5546 } 5547 5548 // If the target is M Class then need to validate that the register string 5549 // is an acceptable value, so check that a mask can be constructed from the 5550 // string. 5551 if (Subtarget->isMClass()) { 5552 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 5553 if (SYSmValue == -1) 5554 return false; 5555 5556 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 5557 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5558 N->getOperand(0) }; 5559 ReplaceNode( 5560 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops)); 5561 return true; 5562 } 5563 5564 // Here we know the target is not M Class so we need to check if it is one 5565 // of the remaining possible values which are apsr, cpsr or spsr. 5566 if (SpecialReg == "apsr" || SpecialReg == "cpsr") { 5567 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5568 N->getOperand(0) }; 5569 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, 5570 DL, MVT::i32, MVT::Other, Ops)); 5571 return true; 5572 } 5573 5574 if (SpecialReg == "spsr") { 5575 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5576 N->getOperand(0) }; 5577 ReplaceNode( 5578 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL, 5579 MVT::i32, MVT::Other, Ops)); 5580 return true; 5581 } 5582 5583 return false; 5584 } 5585 5586 // Lower the write_register intrinsic to ARM specific DAG nodes 5587 // using the supplied metadata string to select the instruction node to use 5588 // and the registers/masks to use in the nodes 5589 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){ 5590 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 5591 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 5592 bool IsThumb2 = Subtarget->isThumb2(); 5593 SDLoc DL(N); 5594 5595 std::vector<SDValue> Ops; 5596 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 5597 5598 if (!Ops.empty()) { 5599 // If the special register string was constructed of fields (as defined 5600 // in the ACLE) then need to lower to MCR node (32 bit) or 5601 // MCRR node(64 bit), we can make the distinction based on the number of 5602 // operands we have. 5603 unsigned Opcode; 5604 if (Ops.size() == 5) { 5605 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR; 5606 Ops.insert(Ops.begin()+2, N->getOperand(2)); 5607 } else { 5608 assert(Ops.size() == 3 && 5609 "Invalid number of fields in special register string."); 5610 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR; 5611 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) }; 5612 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2); 5613 } 5614 5615 Ops.push_back(getAL(CurDAG, DL)); 5616 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 5617 Ops.push_back(N->getOperand(0)); 5618 5619 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 5620 return true; 5621 } 5622 5623 std::string SpecialReg = RegString->getString().lower(); 5624 int BankedReg = getBankedRegisterMask(SpecialReg); 5625 if (BankedReg != -1) { 5626 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2), 5627 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5628 N->getOperand(0) }; 5629 ReplaceNode( 5630 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked, 5631 DL, MVT::Other, Ops)); 5632 return true; 5633 } 5634 5635 // The VFP registers are written to by creating SelectionDAG nodes with 5636 // opcodes corresponding to the register that is being written. So we switch 5637 // on the string to find which opcode we need to use. 5638 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 5639 .Case("fpscr", ARM::VMSR) 5640 .Case("fpexc", ARM::VMSR_FPEXC) 5641 .Case("fpsid", ARM::VMSR_FPSID) 5642 .Case("fpinst", ARM::VMSR_FPINST) 5643 .Case("fpinst2", ARM::VMSR_FPINST2) 5644 .Default(0); 5645 5646 if (Opcode) { 5647 if (!Subtarget->hasVFP2Base()) 5648 return false; 5649 Ops = { N->getOperand(2), getAL(CurDAG, DL), 5650 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 5651 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 5652 return true; 5653 } 5654 5655 std::pair<StringRef, StringRef> Fields; 5656 Fields = StringRef(SpecialReg).rsplit('_'); 5657 std::string Reg = Fields.first.str(); 5658 StringRef Flags = Fields.second; 5659 5660 // If the target was M Class then need to validate the special register value 5661 // and retrieve the mask for use in the instruction node. 5662 if (Subtarget->isMClass()) { 5663 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 5664 if (SYSmValue == -1) 5665 return false; 5666 5667 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 5668 N->getOperand(2), getAL(CurDAG, DL), 5669 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 5670 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops)); 5671 return true; 5672 } 5673 5674 // We then check to see if a valid mask can be constructed for one of the 5675 // register string values permitted for the A and R class cores. These values 5676 // are apsr, spsr and cpsr; these are also valid on older cores. 5677 int Mask = getARClassRegisterMask(Reg, Flags); 5678 if (Mask != -1) { 5679 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2), 5680 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5681 N->getOperand(0) }; 5682 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR, 5683 DL, MVT::Other, Ops)); 5684 return true; 5685 } 5686 5687 return false; 5688 } 5689 5690 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){ 5691 std::vector<SDValue> AsmNodeOperands; 5692 InlineAsm::Flag Flag; 5693 bool Changed = false; 5694 unsigned NumOps = N->getNumOperands(); 5695 5696 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. 5697 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require 5698 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs 5699 // respectively. Since there is no constraint to explicitly specify a 5700 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb, 5701 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack 5702 // them into a GPRPair. 5703 5704 SDLoc dl(N); 5705 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps - 1) : SDValue(); 5706 5707 SmallVector<bool, 8> OpChanged; 5708 // Glue node will be appended late. 5709 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { 5710 SDValue op = N->getOperand(i); 5711 AsmNodeOperands.push_back(op); 5712 5713 if (i < InlineAsm::Op_FirstOperand) 5714 continue; 5715 5716 if (const auto *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) 5717 Flag = InlineAsm::Flag(C->getZExtValue()); 5718 else 5719 continue; 5720 5721 // Immediate operands to inline asm in the SelectionDAG are modeled with 5722 // two operands. The first is a constant of value InlineAsm::Kind::Imm, and 5723 // the second is a constant with the value of the immediate. If we get here 5724 // and we have a Kind::Imm, skip the next operand, and continue. 5725 if (Flag.isImmKind()) { 5726 SDValue op = N->getOperand(++i); 5727 AsmNodeOperands.push_back(op); 5728 continue; 5729 } 5730 5731 const unsigned NumRegs = Flag.getNumOperandRegisters(); 5732 if (NumRegs) 5733 OpChanged.push_back(false); 5734 5735 unsigned DefIdx = 0; 5736 bool IsTiedToChangedOp = false; 5737 // If it's a use that is tied with a previous def, it has no 5738 // reg class constraint. 5739 if (Changed && Flag.isUseOperandTiedToDef(DefIdx)) 5740 IsTiedToChangedOp = OpChanged[DefIdx]; 5741 5742 // Memory operands to inline asm in the SelectionDAG are modeled with two 5743 // operands: a constant of value InlineAsm::Kind::Mem followed by the input 5744 // operand. If we get here and we have a Kind::Mem, skip the next operand 5745 // (so it doesn't get misinterpreted), and continue. We do this here because 5746 // it's important to update the OpChanged array correctly before moving on. 5747 if (Flag.isMemKind()) { 5748 SDValue op = N->getOperand(++i); 5749 AsmNodeOperands.push_back(op); 5750 continue; 5751 } 5752 5753 if (!Flag.isRegUseKind() && !Flag.isRegDefKind() && 5754 !Flag.isRegDefEarlyClobberKind()) 5755 continue; 5756 5757 unsigned RC; 5758 const bool HasRC = Flag.hasRegClassConstraint(RC); 5759 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID)) 5760 || NumRegs != 2) 5761 continue; 5762 5763 assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); 5764 SDValue V0 = N->getOperand(i+1); 5765 SDValue V1 = N->getOperand(i+2); 5766 Register Reg0 = cast<RegisterSDNode>(V0)->getReg(); 5767 Register Reg1 = cast<RegisterSDNode>(V1)->getReg(); 5768 SDValue PairedReg; 5769 MachineRegisterInfo &MRI = MF->getRegInfo(); 5770 5771 if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) { 5772 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to 5773 // the original GPRs. 5774 5775 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5776 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5777 SDValue Chain = SDValue(N,0); 5778 5779 SDNode *GU = N->getGluedUser(); 5780 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, 5781 Chain.getValue(1)); 5782 5783 // Extract values from a GPRPair reg and copy to the original GPR reg. 5784 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 5785 RegCopy); 5786 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 5787 RegCopy); 5788 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, 5789 RegCopy.getValue(1)); 5790 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); 5791 5792 // Update the original glue user. 5793 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); 5794 Ops.push_back(T1.getValue(1)); 5795 CurDAG->UpdateNodeOperands(GU, Ops); 5796 } else { 5797 // For Kind == InlineAsm::Kind::RegUse, we first copy two GPRs into a 5798 // GPRPair and then pass the GPRPair to the inline asm. 5799 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; 5800 5801 // As REG_SEQ doesn't take RegisterSDNode, we copy them first. 5802 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, 5803 Chain.getValue(1)); 5804 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, 5805 T0.getValue(1)); 5806 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); 5807 5808 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two 5809 // i32 VRs of inline asm with it. 5810 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5811 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5812 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); 5813 5814 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; 5815 Glue = Chain.getValue(1); 5816 } 5817 5818 Changed = true; 5819 5820 if(PairedReg.getNode()) { 5821 OpChanged[OpChanged.size() -1 ] = true; 5822 Flag = InlineAsm::Flag(Flag.getKind(), 1 /* RegNum*/); 5823 if (IsTiedToChangedOp) 5824 Flag.setMatchingOp(DefIdx); 5825 else 5826 Flag.setRegClass(ARM::GPRPairRegClassID); 5827 // Replace the current flag. 5828 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( 5829 Flag, dl, MVT::i32); 5830 // Add the new register node and skip the original two GPRs. 5831 AsmNodeOperands.push_back(PairedReg); 5832 // Skip the next two GPRs. 5833 i += 2; 5834 } 5835 } 5836 5837 if (Glue.getNode()) 5838 AsmNodeOperands.push_back(Glue); 5839 if (!Changed) 5840 return false; 5841 5842 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N), 5843 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); 5844 New->setNodeId(-1); 5845 ReplaceNode(N, New.getNode()); 5846 return true; 5847 } 5848 5849 bool ARMDAGToDAGISel::SelectInlineAsmMemoryOperand( 5850 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, 5851 std::vector<SDValue> &OutOps) { 5852 switch(ConstraintID) { 5853 default: 5854 llvm_unreachable("Unexpected asm memory constraint"); 5855 case InlineAsm::ConstraintCode::m: 5856 case InlineAsm::ConstraintCode::o: 5857 case InlineAsm::ConstraintCode::Q: 5858 case InlineAsm::ConstraintCode::Um: 5859 case InlineAsm::ConstraintCode::Un: 5860 case InlineAsm::ConstraintCode::Uq: 5861 case InlineAsm::ConstraintCode::Us: 5862 case InlineAsm::ConstraintCode::Ut: 5863 case InlineAsm::ConstraintCode::Uv: 5864 case InlineAsm::ConstraintCode::Uy: 5865 // Require the address to be in a register. That is safe for all ARM 5866 // variants and it is hard to do anything much smarter without knowing 5867 // how the operand is used. 5868 OutOps.push_back(Op); 5869 return false; 5870 } 5871 return true; 5872 } 5873 5874 /// createARMISelDag - This pass converts a legalized DAG into a 5875 /// ARM-specific DAG, ready for instruction scheduling. 5876 /// 5877 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, 5878 CodeGenOptLevel OptLevel) { 5879 return new ARMDAGToDAGISelLegacy(TM, OptLevel); 5880 } 5881