1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the ARM target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "ARM.h" 14 #include "ARMBaseInstrInfo.h" 15 #include "ARMTargetMachine.h" 16 #include "MCTargetDesc/ARMAddressingModes.h" 17 #include "Utils/ARMBaseInfo.h" 18 #include "llvm/ADT/APSInt.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineRegisterInfo.h" 24 #include "llvm/CodeGen/SelectionDAG.h" 25 #include "llvm/CodeGen/SelectionDAGISel.h" 26 #include "llvm/CodeGen/TargetLowering.h" 27 #include "llvm/IR/Constants.h" 28 #include "llvm/IR/DerivedTypes.h" 29 #include "llvm/IR/Function.h" 30 #include "llvm/IR/Intrinsics.h" 31 #include "llvm/IR/IntrinsicsARM.h" 32 #include "llvm/IR/LLVMContext.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Target/TargetOptions.h" 36 #include <optional> 37 38 using namespace llvm; 39 40 #define DEBUG_TYPE "arm-isel" 41 #define PASS_NAME "ARM Instruction Selection" 42 43 static cl::opt<bool> 44 DisableShifterOp("disable-shifter-op", cl::Hidden, 45 cl::desc("Disable isel of shifter-op"), 46 cl::init(false)); 47 48 //===--------------------------------------------------------------------===// 49 /// ARMDAGToDAGISel - ARM specific code to select ARM machine 50 /// instructions for SelectionDAG operations. 51 /// 52 namespace { 53 54 class ARMDAGToDAGISel : public SelectionDAGISel { 55 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 56 /// make the right decision when generating code for different targets. 57 const ARMSubtarget *Subtarget; 58 59 public: 60 ARMDAGToDAGISel() = delete; 61 62 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel) 63 : SelectionDAGISel(tm, OptLevel) {} 64 65 bool runOnMachineFunction(MachineFunction &MF) override { 66 // Reset the subtarget each time through. 67 Subtarget = &MF.getSubtarget<ARMSubtarget>(); 68 SelectionDAGISel::runOnMachineFunction(MF); 69 return true; 70 } 71 72 void PreprocessISelDAG() override; 73 74 /// getI32Imm - Return a target constant of type i32 with the specified 75 /// value. 76 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 77 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 78 } 79 80 void Select(SDNode *N) override; 81 82 /// Return true as some complex patterns, like those that call 83 /// canExtractShiftFromMul can modify the DAG inplace. 84 bool ComplexPatternFuncMutatesDAG() const override { return true; } 85 86 bool hasNoVMLxHazardUse(SDNode *N) const; 87 bool isShifterOpProfitable(const SDValue &Shift, 88 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); 89 bool SelectRegShifterOperand(SDValue N, SDValue &A, 90 SDValue &B, SDValue &C, 91 bool CheckProfitability = true); 92 bool SelectImmShifterOperand(SDValue N, SDValue &A, 93 SDValue &B, bool CheckProfitability = true); 94 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B, 95 SDValue &C) { 96 // Don't apply the profitability check 97 return SelectRegShifterOperand(N, A, B, C, false); 98 } 99 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) { 100 // Don't apply the profitability check 101 return SelectImmShifterOperand(N, A, B, false); 102 } 103 bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) { 104 if (!N.hasOneUse()) 105 return false; 106 return SelectImmShifterOperand(N, A, B, false); 107 } 108 109 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out); 110 111 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 112 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); 113 114 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) { 115 const ConstantSDNode *CN = cast<ConstantSDNode>(N); 116 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32); 117 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32); 118 return true; 119 } 120 121 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 122 SDValue &Offset, SDValue &Opc); 123 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 124 SDValue &Offset, SDValue &Opc); 125 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 126 SDValue &Offset, SDValue &Opc); 127 bool SelectAddrOffsetNone(SDValue N, SDValue &Base); 128 bool SelectAddrMode3(SDValue N, SDValue &Base, 129 SDValue &Offset, SDValue &Opc); 130 bool SelectAddrMode3Offset(SDNode *Op, SDValue N, 131 SDValue &Offset, SDValue &Opc); 132 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16); 133 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset); 134 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset); 135 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); 136 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); 137 138 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); 139 140 // Thumb Addressing Modes: 141 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); 142 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset); 143 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, 144 SDValue &OffImm); 145 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 146 SDValue &OffImm); 147 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 148 SDValue &OffImm); 149 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 150 SDValue &OffImm); 151 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); 152 template <unsigned Shift> 153 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 154 155 // Thumb 2 Addressing Modes: 156 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 157 template <unsigned Shift> 158 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm); 159 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, 160 SDValue &OffImm); 161 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 162 SDValue &OffImm); 163 template <unsigned Shift> 164 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm); 165 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm, 166 unsigned Shift); 167 template <unsigned Shift> 168 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 169 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, 170 SDValue &OffReg, SDValue &ShImm); 171 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); 172 173 template<int Min, int Max> 174 bool SelectImmediateInRange(SDValue N, SDValue &OffImm); 175 176 inline bool is_so_imm(unsigned Imm) const { 177 return ARM_AM::getSOImmVal(Imm) != -1; 178 } 179 180 inline bool is_so_imm_not(unsigned Imm) const { 181 return ARM_AM::getSOImmVal(~Imm) != -1; 182 } 183 184 inline bool is_t2_so_imm(unsigned Imm) const { 185 return ARM_AM::getT2SOImmVal(Imm) != -1; 186 } 187 188 inline bool is_t2_so_imm_not(unsigned Imm) const { 189 return ARM_AM::getT2SOImmVal(~Imm) != -1; 190 } 191 192 // Include the pieces autogenerated from the target description. 193 #include "ARMGenDAGISel.inc" 194 195 private: 196 void transferMemOperands(SDNode *Src, SDNode *Dst); 197 198 /// Indexed (pre/post inc/dec) load matching code for ARM. 199 bool tryARMIndexedLoad(SDNode *N); 200 bool tryT1IndexedLoad(SDNode *N); 201 bool tryT2IndexedLoad(SDNode *N); 202 bool tryMVEIndexedLoad(SDNode *N); 203 bool tryFMULFixed(SDNode *N, SDLoc dl); 204 bool tryFP_TO_INT(SDNode *N, SDLoc dl); 205 bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul, 206 bool IsUnsigned, 207 bool FixedToFloat); 208 209 /// SelectVLD - Select NEON load intrinsics. NumVecs should be 210 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for 211 /// loads of D registers and even subregs and odd subregs of Q registers. 212 /// For NumVecs <= 2, QOpcodes1 is not used. 213 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 214 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 215 const uint16_t *QOpcodes1); 216 217 /// SelectVST - Select NEON store intrinsics. NumVecs should 218 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for 219 /// stores of D registers and even subregs and odd subregs of Q registers. 220 /// For NumVecs <= 2, QOpcodes1 is not used. 221 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 222 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 223 const uint16_t *QOpcodes1); 224 225 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should 226 /// be 2, 3 or 4. The opcode arrays specify the instructions used for 227 /// load/store of D registers and Q registers. 228 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 229 unsigned NumVecs, const uint16_t *DOpcodes, 230 const uint16_t *QOpcodes); 231 232 /// Helper functions for setting up clusters of MVE predication operands. 233 template <typename SDValueVector> 234 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 235 SDValue PredicateMask); 236 template <typename SDValueVector> 237 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 238 SDValue PredicateMask, SDValue Inactive); 239 240 template <typename SDValueVector> 241 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc); 242 template <typename SDValueVector> 243 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy); 244 245 /// SelectMVE_WB - Select MVE writeback load/store intrinsics. 246 void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated); 247 248 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics. 249 void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate, 250 bool HasSaturationOperand); 251 252 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics. 253 void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 254 uint16_t OpcodeWithNoCarry, bool Add, bool Predicated); 255 256 /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between 257 /// vector lanes. 258 void SelectMVE_VSHLC(SDNode *N, bool Predicated); 259 260 /// Select long MVE vector reductions with two vector operands 261 /// Stride is the number of vector element widths the instruction can operate 262 /// on: 263 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32] 264 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32] 265 /// Stride is used when addressing the OpcodesS array which contains multiple 266 /// opcodes for each element width. 267 /// TySize is the index into the list of element types listed above 268 void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 269 const uint16_t *OpcodesS, const uint16_t *OpcodesU, 270 size_t Stride, size_t TySize); 271 272 /// Select a 64-bit MVE vector reduction with two vector operands 273 /// arm_mve_vmlldava_[predicated] 274 void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 275 const uint16_t *OpcodesU); 276 /// Select a 72-bit MVE vector rounding reduction with two vector operands 277 /// int_arm_mve_vrmlldavha[_predicated] 278 void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 279 const uint16_t *OpcodesU); 280 281 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs 282 /// should be 2 or 4. The opcode array specifies the instructions 283 /// used for 8, 16 and 32-bit lane sizes respectively, and each 284 /// pointer points to a set of NumVecs sub-opcodes used for the 285 /// different stages (e.g. VLD20 versus VLD21) of each load family. 286 void SelectMVE_VLD(SDNode *N, unsigned NumVecs, 287 const uint16_t *const *Opcodes, bool HasWriteback); 288 289 /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an 290 /// array of 3 elements for the 8, 16 and 32-bit lane sizes. 291 void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, 292 bool Wrapping, bool Predicated); 293 294 /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D, 295 /// CX1DA, CX2D, CX2DA, CX3, CX3DA). 296 /// \arg \c NumExtraOps number of extra operands besides the coprocossor, 297 /// the accumulator and the immediate operand, i.e. 0 298 /// for CX1*, 1 for CX2*, 2 for CX3* 299 /// \arg \c HasAccum whether the instruction has an accumulator operand 300 void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps, 301 bool HasAccum); 302 303 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs 304 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used 305 /// for loading D registers. 306 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating, 307 unsigned NumVecs, const uint16_t *DOpcodes, 308 const uint16_t *QOpcodes0 = nullptr, 309 const uint16_t *QOpcodes1 = nullptr); 310 311 /// Try to select SBFX/UBFX instructions for ARM. 312 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned); 313 314 bool tryInsertVectorElt(SDNode *N); 315 316 // Select special operations if node forms integer ABS pattern 317 bool tryABSOp(SDNode *N); 318 319 bool tryReadRegister(SDNode *N); 320 bool tryWriteRegister(SDNode *N); 321 322 bool tryInlineAsm(SDNode *N); 323 324 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI); 325 326 void SelectCMP_SWAP(SDNode *N); 327 328 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 329 /// inline asm expressions. 330 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 331 InlineAsm::ConstraintCode ConstraintID, 332 std::vector<SDValue> &OutOps) override; 333 334 // Form pairs of consecutive R, S, D, or Q registers. 335 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); 336 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1); 337 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1); 338 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1); 339 340 // Form sequences of 4 consecutive S, D, or Q registers. 341 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 342 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 343 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 344 345 // Get the alignment operand for a NEON VLD or VST instruction. 346 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs, 347 bool is64BitVector); 348 349 /// Checks if N is a multiplication by a constant where we can extract out a 350 /// power of two from the constant so that it can be used in a shift, but only 351 /// if it simplifies the materialization of the constant. Returns true if it 352 /// is, and assigns to PowerOfTwo the power of two that should be extracted 353 /// out and to NewMulConst the new constant to be multiplied by. 354 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift, 355 unsigned &PowerOfTwo, SDValue &NewMulConst) const; 356 357 /// Replace N with M in CurDAG, in a way that also ensures that M gets 358 /// selected when N would have been selected. 359 void replaceDAGValue(const SDValue &N, SDValue M); 360 }; 361 362 class ARMDAGToDAGISelLegacy : public SelectionDAGISelLegacy { 363 public: 364 static char ID; 365 ARMDAGToDAGISelLegacy(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel) 366 : SelectionDAGISelLegacy( 367 ID, std::make_unique<ARMDAGToDAGISel>(tm, OptLevel)) {} 368 }; 369 } 370 371 char ARMDAGToDAGISelLegacy::ID = 0; 372 373 INITIALIZE_PASS(ARMDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false) 374 375 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 376 /// operand. If so Imm will receive the 32-bit value. 377 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 378 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 379 Imm = N->getAsZExtVal(); 380 return true; 381 } 382 return false; 383 } 384 385 // isInt32Immediate - This method tests to see if a constant operand. 386 // If so Imm will receive the 32 bit value. 387 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 388 return isInt32Immediate(N.getNode(), Imm); 389 } 390 391 // isOpcWithIntImmediate - This method tests to see if the node is a specific 392 // opcode and that it has a immediate integer right operand. 393 // If so Imm will receive the 32 bit value. 394 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 395 return N->getOpcode() == Opc && 396 isInt32Immediate(N->getOperand(1).getNode(), Imm); 397 } 398 399 /// Check whether a particular node is a constant value representable as 400 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax). 401 /// 402 /// \param ScaledConstant [out] - On success, the pre-scaled constant value. 403 static bool isScaledConstantInRange(SDValue Node, int Scale, 404 int RangeMin, int RangeMax, 405 int &ScaledConstant) { 406 assert(Scale > 0 && "Invalid scale!"); 407 408 // Check that this is a constant. 409 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node); 410 if (!C) 411 return false; 412 413 ScaledConstant = (int) C->getZExtValue(); 414 if ((ScaledConstant % Scale) != 0) 415 return false; 416 417 ScaledConstant /= Scale; 418 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; 419 } 420 421 void ARMDAGToDAGISel::PreprocessISelDAG() { 422 if (!Subtarget->hasV6T2Ops()) 423 return; 424 425 bool isThumb2 = Subtarget->isThumb(); 426 // We use make_early_inc_range to avoid invalidation issues. 427 for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) { 428 if (N.getOpcode() != ISD::ADD) 429 continue; 430 431 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with 432 // leading zeros, followed by consecutive set bits, followed by 1 or 2 433 // trailing zeros, e.g. 1020. 434 // Transform the expression to 435 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number 436 // of trailing zeros of c2. The left shift would be folded as an shifter 437 // operand of 'add' and the 'and' and 'srl' would become a bits extraction 438 // node (UBFX). 439 440 SDValue N0 = N.getOperand(0); 441 SDValue N1 = N.getOperand(1); 442 unsigned And_imm = 0; 443 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { 444 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) 445 std::swap(N0, N1); 446 } 447 if (!And_imm) 448 continue; 449 450 // Check if the AND mask is an immediate of the form: 000.....1111111100 451 unsigned TZ = llvm::countr_zero(And_imm); 452 if (TZ != 1 && TZ != 2) 453 // Be conservative here. Shifter operands aren't always free. e.g. On 454 // Swift, left shifter operand of 1 / 2 for free but others are not. 455 // e.g. 456 // ubfx r3, r1, #16, #8 457 // ldr.w r3, [r0, r3, lsl #2] 458 // vs. 459 // mov.w r9, #1020 460 // and.w r2, r9, r1, lsr #14 461 // ldr r2, [r0, r2] 462 continue; 463 And_imm >>= TZ; 464 if (And_imm & (And_imm + 1)) 465 continue; 466 467 // Look for (and (srl X, c1), c2). 468 SDValue Srl = N1.getOperand(0); 469 unsigned Srl_imm = 0; 470 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || 471 (Srl_imm <= 2)) 472 continue; 473 474 // Make sure first operand is not a shifter operand which would prevent 475 // folding of the left shift. 476 SDValue CPTmp0; 477 SDValue CPTmp1; 478 SDValue CPTmp2; 479 if (isThumb2) { 480 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1)) 481 continue; 482 } else { 483 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || 484 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) 485 continue; 486 } 487 488 // Now make the transformation. 489 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32, 490 Srl.getOperand(0), 491 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl), 492 MVT::i32)); 493 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32, 494 Srl, 495 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32)); 496 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, 497 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32)); 498 CurDAG->UpdateNodeOperands(&N, N0, N1); 499 } 500 } 501 502 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS 503 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at 504 /// least on current ARM implementations) which should be avoidded. 505 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { 506 if (OptLevel == CodeGenOptLevel::None) 507 return true; 508 509 if (!Subtarget->hasVMLxHazards()) 510 return true; 511 512 if (!N->hasOneUse()) 513 return false; 514 515 SDNode *Use = *N->use_begin(); 516 if (Use->getOpcode() == ISD::CopyToReg) 517 return true; 518 if (Use->isMachineOpcode()) { 519 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( 520 CurDAG->getSubtarget().getInstrInfo()); 521 522 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); 523 if (MCID.mayStore()) 524 return true; 525 unsigned Opcode = MCID.getOpcode(); 526 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) 527 return true; 528 // vmlx feeding into another vmlx. We actually want to unfold 529 // the use later in the MLxExpansion pass. e.g. 530 // vmla 531 // vmla (stall 8 cycles) 532 // 533 // vmul (5 cycles) 534 // vadd (5 cycles) 535 // vmla 536 // This adds up to about 18 - 19 cycles. 537 // 538 // vmla 539 // vmul (stall 4 cycles) 540 // vadd adds up to about 14 cycles. 541 return TII->isFpMLxInstruction(Opcode); 542 } 543 544 return false; 545 } 546 547 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, 548 ARM_AM::ShiftOpc ShOpcVal, 549 unsigned ShAmt) { 550 if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) 551 return true; 552 if (Shift.hasOneUse()) 553 return true; 554 // R << 2 is free. 555 return ShOpcVal == ARM_AM::lsl && 556 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); 557 } 558 559 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, 560 unsigned MaxShift, 561 unsigned &PowerOfTwo, 562 SDValue &NewMulConst) const { 563 assert(N.getOpcode() == ISD::MUL); 564 assert(MaxShift > 0); 565 566 // If the multiply is used in more than one place then changing the constant 567 // will make other uses incorrect, so don't. 568 if (!N.hasOneUse()) return false; 569 // Check if the multiply is by a constant 570 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1)); 571 if (!MulConst) return false; 572 // If the constant is used in more than one place then modifying it will mean 573 // we need to materialize two constants instead of one, which is a bad idea. 574 if (!MulConst->hasOneUse()) return false; 575 unsigned MulConstVal = MulConst->getZExtValue(); 576 if (MulConstVal == 0) return false; 577 578 // Find the largest power of 2 that MulConstVal is a multiple of 579 PowerOfTwo = MaxShift; 580 while ((MulConstVal % (1 << PowerOfTwo)) != 0) { 581 --PowerOfTwo; 582 if (PowerOfTwo == 0) return false; 583 } 584 585 // Only optimise if the new cost is better 586 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo); 587 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32); 588 unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget); 589 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget); 590 return NewCost < OldCost; 591 } 592 593 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) { 594 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode()); 595 ReplaceUses(N, M); 596 } 597 598 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, 599 SDValue &BaseReg, 600 SDValue &Opc, 601 bool CheckProfitability) { 602 if (DisableShifterOp) 603 return false; 604 605 // If N is a multiply-by-constant and it's profitable to extract a shift and 606 // use it in a shifted operand do so. 607 if (N.getOpcode() == ISD::MUL) { 608 unsigned PowerOfTwo = 0; 609 SDValue NewMulConst; 610 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { 611 HandleSDNode Handle(N); 612 SDLoc Loc(N); 613 replaceDAGValue(N.getOperand(1), NewMulConst); 614 BaseReg = Handle.getValue(); 615 Opc = CurDAG->getTargetConstant( 616 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32); 617 return true; 618 } 619 } 620 621 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 622 623 // Don't match base register only case. That is matched to a separate 624 // lower complexity pattern with explicit register operand. 625 if (ShOpcVal == ARM_AM::no_shift) return false; 626 627 BaseReg = N.getOperand(0); 628 unsigned ShImmVal = 0; 629 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 630 if (!RHS) return false; 631 ShImmVal = RHS->getZExtValue() & 31; 632 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 633 SDLoc(N), MVT::i32); 634 return true; 635 } 636 637 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, 638 SDValue &BaseReg, 639 SDValue &ShReg, 640 SDValue &Opc, 641 bool CheckProfitability) { 642 if (DisableShifterOp) 643 return false; 644 645 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 646 647 // Don't match base register only case. That is matched to a separate 648 // lower complexity pattern with explicit register operand. 649 if (ShOpcVal == ARM_AM::no_shift) return false; 650 651 BaseReg = N.getOperand(0); 652 unsigned ShImmVal = 0; 653 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 654 if (RHS) return false; 655 656 ShReg = N.getOperand(1); 657 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) 658 return false; 659 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 660 SDLoc(N), MVT::i32); 661 return true; 662 } 663 664 // Determine whether an ISD::OR's operands are suitable to turn the operation 665 // into an addition, which often has more compact encodings. 666 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) { 667 assert(Parent->getOpcode() == ISD::OR && "unexpected parent"); 668 Out = N; 669 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1)); 670 } 671 672 673 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, 674 SDValue &Base, 675 SDValue &OffImm) { 676 // Match simple R + imm12 operands. 677 678 // Base only. 679 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 680 !CurDAG->isBaseWithConstantOffset(N)) { 681 if (N.getOpcode() == ISD::FrameIndex) { 682 // Match frame index. 683 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 684 Base = CurDAG->getTargetFrameIndex( 685 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 686 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 687 return true; 688 } 689 690 if (N.getOpcode() == ARMISD::Wrapper && 691 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 692 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 693 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 694 Base = N.getOperand(0); 695 } else 696 Base = N; 697 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 698 return true; 699 } 700 701 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 702 int RHSC = (int)RHS->getSExtValue(); 703 if (N.getOpcode() == ISD::SUB) 704 RHSC = -RHSC; 705 706 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits 707 Base = N.getOperand(0); 708 if (Base.getOpcode() == ISD::FrameIndex) { 709 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 710 Base = CurDAG->getTargetFrameIndex( 711 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 712 } 713 OffImm = CurDAG->getSignedConstant(RHSC, SDLoc(N), MVT::i32, 714 /*isTarget=*/true); 715 return true; 716 } 717 } 718 719 // Base only. 720 Base = N; 721 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 722 return true; 723 } 724 725 726 727 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, 728 SDValue &Opc) { 729 if (N.getOpcode() == ISD::MUL && 730 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { 731 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 732 // X * [3,5,9] -> X + X * [2,4,8] etc. 733 int RHSC = (int)RHS->getZExtValue(); 734 if (RHSC & 1) { 735 RHSC = RHSC & ~1; 736 ARM_AM::AddrOpc AddSub = ARM_AM::add; 737 if (RHSC < 0) { 738 AddSub = ARM_AM::sub; 739 RHSC = - RHSC; 740 } 741 if (isPowerOf2_32(RHSC)) { 742 unsigned ShAmt = Log2_32(RHSC); 743 Base = Offset = N.getOperand(0); 744 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 745 ARM_AM::lsl), 746 SDLoc(N), MVT::i32); 747 return true; 748 } 749 } 750 } 751 } 752 753 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 754 // ISD::OR that is equivalent to an ISD::ADD. 755 !CurDAG->isBaseWithConstantOffset(N)) 756 return false; 757 758 // Leave simple R +/- imm12 operands for LDRi12 759 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { 760 int RHSC; 761 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 762 -0x1000+1, 0x1000, RHSC)) // 12 bits. 763 return false; 764 } 765 766 // Otherwise this is R +/- [possibly shifted] R. 767 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; 768 ARM_AM::ShiftOpc ShOpcVal = 769 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 770 unsigned ShAmt = 0; 771 772 Base = N.getOperand(0); 773 Offset = N.getOperand(1); 774 775 if (ShOpcVal != ARM_AM::no_shift) { 776 // Check to see if the RHS of the shift is a constant, if not, we can't fold 777 // it. 778 if (ConstantSDNode *Sh = 779 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 780 ShAmt = Sh->getZExtValue(); 781 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 782 Offset = N.getOperand(1).getOperand(0); 783 else { 784 ShAmt = 0; 785 ShOpcVal = ARM_AM::no_shift; 786 } 787 } else { 788 ShOpcVal = ARM_AM::no_shift; 789 } 790 } 791 792 // Try matching (R shl C) + (R). 793 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 794 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 795 N.getOperand(0).hasOneUse())) { 796 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 797 if (ShOpcVal != ARM_AM::no_shift) { 798 // Check to see if the RHS of the shift is a constant, if not, we can't 799 // fold it. 800 if (ConstantSDNode *Sh = 801 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 802 ShAmt = Sh->getZExtValue(); 803 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 804 Offset = N.getOperand(0).getOperand(0); 805 Base = N.getOperand(1); 806 } else { 807 ShAmt = 0; 808 ShOpcVal = ARM_AM::no_shift; 809 } 810 } else { 811 ShOpcVal = ARM_AM::no_shift; 812 } 813 } 814 } 815 816 // If Offset is a multiply-by-constant and it's profitable to extract a shift 817 // and use it in a shifted operand do so. 818 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) { 819 unsigned PowerOfTwo = 0; 820 SDValue NewMulConst; 821 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { 822 HandleSDNode Handle(Offset); 823 replaceDAGValue(Offset.getOperand(1), NewMulConst); 824 Offset = Handle.getValue(); 825 ShAmt = PowerOfTwo; 826 ShOpcVal = ARM_AM::lsl; 827 } 828 } 829 830 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 831 SDLoc(N), MVT::i32); 832 return true; 833 } 834 835 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 836 SDValue &Offset, SDValue &Opc) { 837 unsigned Opcode = Op->getOpcode(); 838 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 839 ? cast<LoadSDNode>(Op)->getAddressingMode() 840 : cast<StoreSDNode>(Op)->getAddressingMode(); 841 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 842 ? ARM_AM::add : ARM_AM::sub; 843 int Val; 844 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) 845 return false; 846 847 Offset = N; 848 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 849 unsigned ShAmt = 0; 850 if (ShOpcVal != ARM_AM::no_shift) { 851 // Check to see if the RHS of the shift is a constant, if not, we can't fold 852 // it. 853 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 854 ShAmt = Sh->getZExtValue(); 855 if (isShifterOpProfitable(N, ShOpcVal, ShAmt)) 856 Offset = N.getOperand(0); 857 else { 858 ShAmt = 0; 859 ShOpcVal = ARM_AM::no_shift; 860 } 861 } else { 862 ShOpcVal = ARM_AM::no_shift; 863 } 864 } 865 866 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 867 SDLoc(N), MVT::i32); 868 return true; 869 } 870 871 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 872 SDValue &Offset, SDValue &Opc) { 873 unsigned Opcode = Op->getOpcode(); 874 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 875 ? cast<LoadSDNode>(Op)->getAddressingMode() 876 : cast<StoreSDNode>(Op)->getAddressingMode(); 877 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 878 ? ARM_AM::add : ARM_AM::sub; 879 int Val; 880 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 881 if (AddSub == ARM_AM::sub) Val *= -1; 882 Offset = CurDAG->getRegister(0, MVT::i32); 883 Opc = 884 CurDAG->getSignedConstant(Val, SDLoc(Op), MVT::i32, /*isTarget*/ true); 885 return true; 886 } 887 888 return false; 889 } 890 891 892 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 893 SDValue &Offset, SDValue &Opc) { 894 unsigned Opcode = Op->getOpcode(); 895 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 896 ? cast<LoadSDNode>(Op)->getAddressingMode() 897 : cast<StoreSDNode>(Op)->getAddressingMode(); 898 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 899 ? ARM_AM::add : ARM_AM::sub; 900 int Val; 901 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 902 Offset = CurDAG->getRegister(0, MVT::i32); 903 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, 904 ARM_AM::no_shift), 905 SDLoc(Op), MVT::i32); 906 return true; 907 } 908 909 return false; 910 } 911 912 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) { 913 Base = N; 914 return true; 915 } 916 917 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, 918 SDValue &Base, SDValue &Offset, 919 SDValue &Opc) { 920 if (N.getOpcode() == ISD::SUB) { 921 // X - C is canonicalize to X + -C, no need to handle it here. 922 Base = N.getOperand(0); 923 Offset = N.getOperand(1); 924 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N), 925 MVT::i32); 926 return true; 927 } 928 929 if (!CurDAG->isBaseWithConstantOffset(N)) { 930 Base = N; 931 if (N.getOpcode() == ISD::FrameIndex) { 932 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 933 Base = CurDAG->getTargetFrameIndex( 934 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 935 } 936 Offset = CurDAG->getRegister(0, MVT::i32); 937 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 938 MVT::i32); 939 return true; 940 } 941 942 // If the RHS is +/- imm8, fold into addr mode. 943 int RHSC; 944 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 945 -256 + 1, 256, RHSC)) { // 8 bits. 946 Base = N.getOperand(0); 947 if (Base.getOpcode() == ISD::FrameIndex) { 948 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 949 Base = CurDAG->getTargetFrameIndex( 950 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 951 } 952 Offset = CurDAG->getRegister(0, MVT::i32); 953 954 ARM_AM::AddrOpc AddSub = ARM_AM::add; 955 if (RHSC < 0) { 956 AddSub = ARM_AM::sub; 957 RHSC = -RHSC; 958 } 959 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N), 960 MVT::i32); 961 return true; 962 } 963 964 Base = N.getOperand(0); 965 Offset = N.getOperand(1); 966 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 967 MVT::i32); 968 return true; 969 } 970 971 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, 972 SDValue &Offset, SDValue &Opc) { 973 unsigned Opcode = Op->getOpcode(); 974 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 975 ? cast<LoadSDNode>(Op)->getAddressingMode() 976 : cast<StoreSDNode>(Op)->getAddressingMode(); 977 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 978 ? ARM_AM::add : ARM_AM::sub; 979 int Val; 980 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits. 981 Offset = CurDAG->getRegister(0, MVT::i32); 982 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op), 983 MVT::i32); 984 return true; 985 } 986 987 Offset = N; 988 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op), 989 MVT::i32); 990 return true; 991 } 992 993 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, 994 bool FP16) { 995 if (!CurDAG->isBaseWithConstantOffset(N)) { 996 Base = N; 997 if (N.getOpcode() == ISD::FrameIndex) { 998 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 999 Base = CurDAG->getTargetFrameIndex( 1000 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1001 } else if (N.getOpcode() == ARMISD::Wrapper && 1002 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1003 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1004 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1005 Base = N.getOperand(0); 1006 } 1007 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 1008 SDLoc(N), MVT::i32); 1009 return true; 1010 } 1011 1012 // If the RHS is +/- imm8, fold into addr mode. 1013 int RHSC; 1014 const int Scale = FP16 ? 2 : 4; 1015 1016 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) { 1017 Base = N.getOperand(0); 1018 if (Base.getOpcode() == ISD::FrameIndex) { 1019 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1020 Base = CurDAG->getTargetFrameIndex( 1021 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1022 } 1023 1024 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1025 if (RHSC < 0) { 1026 AddSub = ARM_AM::sub; 1027 RHSC = -RHSC; 1028 } 1029 1030 if (FP16) 1031 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC), 1032 SDLoc(N), MVT::i32); 1033 else 1034 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), 1035 SDLoc(N), MVT::i32); 1036 1037 return true; 1038 } 1039 1040 Base = N; 1041 1042 if (FP16) 1043 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0), 1044 SDLoc(N), MVT::i32); 1045 else 1046 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 1047 SDLoc(N), MVT::i32); 1048 1049 return true; 1050 } 1051 1052 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, 1053 SDValue &Base, SDValue &Offset) { 1054 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false); 1055 } 1056 1057 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N, 1058 SDValue &Base, SDValue &Offset) { 1059 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true); 1060 } 1061 1062 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, 1063 SDValue &Align) { 1064 Addr = N; 1065 1066 unsigned Alignment = 0; 1067 1068 MemSDNode *MemN = cast<MemSDNode>(Parent); 1069 1070 if (isa<LSBaseSDNode>(MemN) || 1071 ((MemN->getOpcode() == ARMISD::VST1_UPD || 1072 MemN->getOpcode() == ARMISD::VLD1_UPD) && 1073 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) { 1074 // This case occurs only for VLD1-lane/dup and VST1-lane instructions. 1075 // The maximum alignment is equal to the memory size being referenced. 1076 llvm::Align MMOAlign = MemN->getAlign(); 1077 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8; 1078 if (MMOAlign.value() >= MemSize && MemSize > 1) 1079 Alignment = MemSize; 1080 } else { 1081 // All other uses of addrmode6 are for intrinsics. For now just record 1082 // the raw alignment value; it will be refined later based on the legal 1083 // alignment operands for the intrinsic. 1084 Alignment = MemN->getAlign().value(); 1085 } 1086 1087 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32); 1088 return true; 1089 } 1090 1091 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, 1092 SDValue &Offset) { 1093 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op); 1094 ISD::MemIndexedMode AM = LdSt->getAddressingMode(); 1095 if (AM != ISD::POST_INC) 1096 return false; 1097 Offset = N; 1098 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) { 1099 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) 1100 Offset = CurDAG->getRegister(0, MVT::i32); 1101 } 1102 return true; 1103 } 1104 1105 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, 1106 SDValue &Offset, SDValue &Label) { 1107 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { 1108 Offset = N.getOperand(0); 1109 SDValue N1 = N.getOperand(1); 1110 Label = CurDAG->getTargetConstant(N1->getAsZExtVal(), SDLoc(N), MVT::i32); 1111 return true; 1112 } 1113 1114 return false; 1115 } 1116 1117 1118 //===----------------------------------------------------------------------===// 1119 // Thumb Addressing Modes 1120 //===----------------------------------------------------------------------===// 1121 1122 static bool shouldUseZeroOffsetLdSt(SDValue N) { 1123 // Negative numbers are difficult to materialise in thumb1. If we are 1124 // selecting the add of a negative, instead try to select ri with a zero 1125 // offset, so create the add node directly which will become a sub. 1126 if (N.getOpcode() != ISD::ADD) 1127 return false; 1128 1129 // Look for an imm which is not legal for ld/st, but is legal for sub. 1130 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) 1131 return C->getSExtValue() < 0 && C->getSExtValue() >= -255; 1132 1133 return false; 1134 } 1135 1136 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, 1137 SDValue &Offset) { 1138 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) { 1139 if (!isNullConstant(N)) 1140 return false; 1141 1142 Base = Offset = N; 1143 return true; 1144 } 1145 1146 Base = N.getOperand(0); 1147 Offset = N.getOperand(1); 1148 return true; 1149 } 1150 1151 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base, 1152 SDValue &Offset) { 1153 if (shouldUseZeroOffsetLdSt(N)) 1154 return false; // Select ri instead 1155 return SelectThumbAddrModeRRSext(N, Base, Offset); 1156 } 1157 1158 bool 1159 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, 1160 SDValue &Base, SDValue &OffImm) { 1161 if (shouldUseZeroOffsetLdSt(N)) { 1162 Base = N; 1163 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1164 return true; 1165 } 1166 1167 if (!CurDAG->isBaseWithConstantOffset(N)) { 1168 if (N.getOpcode() == ISD::ADD) { 1169 return false; // We want to select register offset instead 1170 } else if (N.getOpcode() == ARMISD::Wrapper && 1171 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1172 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1173 N.getOperand(0).getOpcode() != ISD::TargetConstantPool && 1174 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1175 Base = N.getOperand(0); 1176 } else { 1177 Base = N; 1178 } 1179 1180 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1181 return true; 1182 } 1183 1184 // If the RHS is + imm5 * scale, fold into addr mode. 1185 int RHSC; 1186 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { 1187 Base = N.getOperand(0); 1188 OffImm = 1189 CurDAG->getSignedConstant(RHSC, SDLoc(N), MVT::i32, /*isTarget=*/true); 1190 return true; 1191 } 1192 1193 // Offset is too large, so use register offset instead. 1194 return false; 1195 } 1196 1197 bool 1198 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 1199 SDValue &OffImm) { 1200 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm); 1201 } 1202 1203 bool 1204 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 1205 SDValue &OffImm) { 1206 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm); 1207 } 1208 1209 bool 1210 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 1211 SDValue &OffImm) { 1212 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm); 1213 } 1214 1215 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, 1216 SDValue &Base, SDValue &OffImm) { 1217 if (N.getOpcode() == ISD::FrameIndex) { 1218 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1219 // Only multiples of 4 are allowed for the offset, so the frame object 1220 // alignment must be at least 4. 1221 MachineFrameInfo &MFI = MF->getFrameInfo(); 1222 if (MFI.getObjectAlign(FI) < Align(4)) 1223 MFI.setObjectAlignment(FI, Align(4)); 1224 Base = CurDAG->getTargetFrameIndex( 1225 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1226 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1227 return true; 1228 } 1229 1230 if (!CurDAG->isBaseWithConstantOffset(N)) 1231 return false; 1232 1233 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) { 1234 // If the RHS is + imm8 * scale, fold into addr mode. 1235 int RHSC; 1236 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) { 1237 Base = N.getOperand(0); 1238 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1239 // Make sure the offset is inside the object, or we might fail to 1240 // allocate an emergency spill slot. (An out-of-range access is UB, but 1241 // it could show up anyway.) 1242 MachineFrameInfo &MFI = MF->getFrameInfo(); 1243 if (RHSC * 4 < MFI.getObjectSize(FI)) { 1244 // For LHS+RHS to result in an offset that's a multiple of 4 the object 1245 // indexed by the LHS must be 4-byte aligned. 1246 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4)) 1247 MFI.setObjectAlignment(FI, Align(4)); 1248 if (MFI.getObjectAlign(FI) >= Align(4)) { 1249 Base = CurDAG->getTargetFrameIndex( 1250 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1251 OffImm = CurDAG->getSignedConstant(RHSC, SDLoc(N), MVT::i32, 1252 /*isTarget=*/true); 1253 return true; 1254 } 1255 } 1256 } 1257 } 1258 1259 return false; 1260 } 1261 1262 template <unsigned Shift> 1263 bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base, 1264 SDValue &OffImm) { 1265 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1266 int RHSC; 1267 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1268 RHSC)) { 1269 Base = N.getOperand(0); 1270 if (N.getOpcode() == ISD::SUB) 1271 RHSC = -RHSC; 1272 OffImm = CurDAG->getSignedConstant(RHSC * (1 << Shift), SDLoc(N), 1273 MVT::i32, /*isTarget=*/true); 1274 return true; 1275 } 1276 } 1277 1278 // Base only. 1279 Base = N; 1280 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1281 return true; 1282 } 1283 1284 1285 //===----------------------------------------------------------------------===// 1286 // Thumb 2 Addressing Modes 1287 //===----------------------------------------------------------------------===// 1288 1289 1290 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, 1291 SDValue &Base, SDValue &OffImm) { 1292 // Match simple R + imm12 operands. 1293 1294 // Base only. 1295 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1296 !CurDAG->isBaseWithConstantOffset(N)) { 1297 if (N.getOpcode() == ISD::FrameIndex) { 1298 // Match frame index. 1299 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1300 Base = CurDAG->getTargetFrameIndex( 1301 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1302 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1303 return true; 1304 } 1305 1306 if (N.getOpcode() == ARMISD::Wrapper && 1307 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1308 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1309 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1310 Base = N.getOperand(0); 1311 if (Base.getOpcode() == ISD::TargetConstantPool) 1312 return false; // We want to select t2LDRpci instead. 1313 } else 1314 Base = N; 1315 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1316 return true; 1317 } 1318 1319 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1320 if (SelectT2AddrModeImm8(N, Base, OffImm)) 1321 // Let t2LDRi8 handle (R - imm8). 1322 return false; 1323 1324 int RHSC = (int)RHS->getZExtValue(); 1325 if (N.getOpcode() == ISD::SUB) 1326 RHSC = -RHSC; 1327 1328 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) 1329 Base = N.getOperand(0); 1330 if (Base.getOpcode() == ISD::FrameIndex) { 1331 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1332 Base = CurDAG->getTargetFrameIndex( 1333 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1334 } 1335 OffImm = CurDAG->getSignedConstant(RHSC, SDLoc(N), MVT::i32, 1336 /*isTarget=*/true); 1337 return true; 1338 } 1339 } 1340 1341 // Base only. 1342 Base = N; 1343 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1344 return true; 1345 } 1346 1347 template <unsigned Shift> 1348 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base, 1349 SDValue &OffImm) { 1350 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1351 int RHSC; 1352 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) { 1353 Base = N.getOperand(0); 1354 if (Base.getOpcode() == ISD::FrameIndex) { 1355 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1356 Base = CurDAG->getTargetFrameIndex( 1357 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1358 } 1359 1360 if (N.getOpcode() == ISD::SUB) 1361 RHSC = -RHSC; 1362 OffImm = 1363 CurDAG->getSignedConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32, 1364 /*isTarget=*/true); 1365 return true; 1366 } 1367 } 1368 1369 // Base only. 1370 Base = N; 1371 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1372 return true; 1373 } 1374 1375 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, 1376 SDValue &Base, SDValue &OffImm) { 1377 // Match simple R - imm8 operands. 1378 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1379 !CurDAG->isBaseWithConstantOffset(N)) 1380 return false; 1381 1382 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1383 int RHSC = (int)RHS->getSExtValue(); 1384 if (N.getOpcode() == ISD::SUB) 1385 RHSC = -RHSC; 1386 1387 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative) 1388 Base = N.getOperand(0); 1389 if (Base.getOpcode() == ISD::FrameIndex) { 1390 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1391 Base = CurDAG->getTargetFrameIndex( 1392 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1393 } 1394 OffImm = CurDAG->getSignedConstant(RHSC, SDLoc(N), MVT::i32, 1395 /*isTarget=*/true); 1396 return true; 1397 } 1398 } 1399 1400 return false; 1401 } 1402 1403 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 1404 SDValue &OffImm){ 1405 unsigned Opcode = Op->getOpcode(); 1406 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1407 ? cast<LoadSDNode>(Op)->getAddressingMode() 1408 : cast<StoreSDNode>(Op)->getAddressingMode(); 1409 int RHSC; 1410 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits. 1411 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1412 ? CurDAG->getSignedConstant(RHSC, SDLoc(N), MVT::i32, 1413 /*isTarget=*/true) 1414 : CurDAG->getSignedConstant(-RHSC, SDLoc(N), MVT::i32, 1415 /*isTarget=*/true); 1416 return true; 1417 } 1418 1419 return false; 1420 } 1421 1422 template <unsigned Shift> 1423 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base, 1424 SDValue &OffImm) { 1425 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1426 int RHSC; 1427 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1428 RHSC)) { 1429 Base = N.getOperand(0); 1430 if (Base.getOpcode() == ISD::FrameIndex) { 1431 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1432 Base = CurDAG->getTargetFrameIndex( 1433 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1434 } 1435 1436 if (N.getOpcode() == ISD::SUB) 1437 RHSC = -RHSC; 1438 OffImm = CurDAG->getSignedConstant(RHSC * (1 << Shift), SDLoc(N), 1439 MVT::i32, /*isTarget=*/true); 1440 return true; 1441 } 1442 } 1443 1444 // Base only. 1445 Base = N; 1446 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1447 return true; 1448 } 1449 1450 template <unsigned Shift> 1451 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1452 SDValue &OffImm) { 1453 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift); 1454 } 1455 1456 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1457 SDValue &OffImm, 1458 unsigned Shift) { 1459 unsigned Opcode = Op->getOpcode(); 1460 ISD::MemIndexedMode AM; 1461 switch (Opcode) { 1462 case ISD::LOAD: 1463 AM = cast<LoadSDNode>(Op)->getAddressingMode(); 1464 break; 1465 case ISD::STORE: 1466 AM = cast<StoreSDNode>(Op)->getAddressingMode(); 1467 break; 1468 case ISD::MLOAD: 1469 AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode(); 1470 break; 1471 case ISD::MSTORE: 1472 AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode(); 1473 break; 1474 default: 1475 llvm_unreachable("Unexpected Opcode for Imm7Offset"); 1476 } 1477 1478 int RHSC; 1479 // 7 bit constant, shifted by Shift. 1480 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { 1481 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1482 ? CurDAG->getSignedConstant(RHSC * (1 << Shift), SDLoc(N), 1483 MVT::i32, /*isTarget=*/true) 1484 : CurDAG->getSignedConstant(-RHSC * (1 << Shift), SDLoc(N), 1485 MVT::i32, /*isTarget=*/true); 1486 return true; 1487 } 1488 return false; 1489 } 1490 1491 template <int Min, int Max> 1492 bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) { 1493 int Val; 1494 if (isScaledConstantInRange(N, 1, Min, Max, Val)) { 1495 OffImm = 1496 CurDAG->getSignedConstant(Val, SDLoc(N), MVT::i32, /*isTarget=*/true); 1497 return true; 1498 } 1499 return false; 1500 } 1501 1502 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, 1503 SDValue &Base, 1504 SDValue &OffReg, SDValue &ShImm) { 1505 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. 1506 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) 1507 return false; 1508 1509 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8. 1510 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1511 int RHSC = (int)RHS->getZExtValue(); 1512 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned) 1513 return false; 1514 else if (RHSC < 0 && RHSC >= -255) // 8 bits 1515 return false; 1516 } 1517 1518 // Look for (R + R) or (R + (R << [1,2,3])). 1519 unsigned ShAmt = 0; 1520 Base = N.getOperand(0); 1521 OffReg = N.getOperand(1); 1522 1523 // Swap if it is ((R << c) + R). 1524 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode()); 1525 if (ShOpcVal != ARM_AM::lsl) { 1526 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode()); 1527 if (ShOpcVal == ARM_AM::lsl) 1528 std::swap(Base, OffReg); 1529 } 1530 1531 if (ShOpcVal == ARM_AM::lsl) { 1532 // Check to see if the RHS of the shift is a constant, if not, we can't fold 1533 // it. 1534 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) { 1535 ShAmt = Sh->getZExtValue(); 1536 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt)) 1537 OffReg = OffReg.getOperand(0); 1538 else { 1539 ShAmt = 0; 1540 } 1541 } 1542 } 1543 1544 // If OffReg is a multiply-by-constant and it's profitable to extract a shift 1545 // and use it in a shifted operand do so. 1546 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) { 1547 unsigned PowerOfTwo = 0; 1548 SDValue NewMulConst; 1549 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { 1550 HandleSDNode Handle(OffReg); 1551 replaceDAGValue(OffReg.getOperand(1), NewMulConst); 1552 OffReg = Handle.getValue(); 1553 ShAmt = PowerOfTwo; 1554 } 1555 } 1556 1557 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32); 1558 1559 return true; 1560 } 1561 1562 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, 1563 SDValue &OffImm) { 1564 // This *must* succeed since it's used for the irreplaceable ldrex and strex 1565 // instructions. 1566 Base = N; 1567 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1568 1569 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N)) 1570 return true; 1571 1572 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1573 if (!RHS) 1574 return true; 1575 1576 uint32_t RHSC = (int)RHS->getZExtValue(); 1577 if (RHSC > 1020 || RHSC % 4 != 0) 1578 return true; 1579 1580 Base = N.getOperand(0); 1581 if (Base.getOpcode() == ISD::FrameIndex) { 1582 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1583 Base = CurDAG->getTargetFrameIndex( 1584 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1585 } 1586 1587 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32); 1588 return true; 1589 } 1590 1591 //===--------------------------------------------------------------------===// 1592 1593 /// getAL - Returns a ARMCC::AL immediate node. 1594 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) { 1595 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32); 1596 } 1597 1598 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { 1599 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 1600 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); 1601 } 1602 1603 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) { 1604 LoadSDNode *LD = cast<LoadSDNode>(N); 1605 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1606 if (AM == ISD::UNINDEXED) 1607 return false; 1608 1609 EVT LoadedVT = LD->getMemoryVT(); 1610 SDValue Offset, AMOpc; 1611 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1612 unsigned Opcode = 0; 1613 bool Match = false; 1614 if (LoadedVT == MVT::i32 && isPre && 1615 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1616 Opcode = ARM::LDR_PRE_IMM; 1617 Match = true; 1618 } else if (LoadedVT == MVT::i32 && !isPre && 1619 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1620 Opcode = ARM::LDR_POST_IMM; 1621 Match = true; 1622 } else if (LoadedVT == MVT::i32 && 1623 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1624 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG; 1625 Match = true; 1626 1627 } else if (LoadedVT == MVT::i16 && 1628 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1629 Match = true; 1630 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) 1631 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) 1632 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); 1633 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { 1634 if (LD->getExtensionType() == ISD::SEXTLOAD) { 1635 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1636 Match = true; 1637 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; 1638 } 1639 } else { 1640 if (isPre && 1641 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1642 Match = true; 1643 Opcode = ARM::LDRB_PRE_IMM; 1644 } else if (!isPre && 1645 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1646 Match = true; 1647 Opcode = ARM::LDRB_POST_IMM; 1648 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1649 Match = true; 1650 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG; 1651 } 1652 } 1653 } 1654 1655 if (Match) { 1656 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) { 1657 SDValue Chain = LD->getChain(); 1658 SDValue Base = LD->getBasePtr(); 1659 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)), 1660 CurDAG->getRegister(0, MVT::i32), Chain }; 1661 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1662 MVT::Other, Ops); 1663 transferMemOperands(N, New); 1664 ReplaceNode(N, New); 1665 return true; 1666 } else { 1667 SDValue Chain = LD->getChain(); 1668 SDValue Base = LD->getBasePtr(); 1669 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)), 1670 CurDAG->getRegister(0, MVT::i32), Chain }; 1671 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1672 MVT::Other, Ops); 1673 transferMemOperands(N, New); 1674 ReplaceNode(N, New); 1675 return true; 1676 } 1677 } 1678 1679 return false; 1680 } 1681 1682 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) { 1683 LoadSDNode *LD = cast<LoadSDNode>(N); 1684 EVT LoadedVT = LD->getMemoryVT(); 1685 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1686 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD || 1687 LoadedVT.getSimpleVT().SimpleTy != MVT::i32) 1688 return false; 1689 1690 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset()); 1691 if (!COffs || COffs->getZExtValue() != 4) 1692 return false; 1693 1694 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}. 1695 // The encoding of LDM is not how the rest of ISel expects a post-inc load to 1696 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after 1697 // ISel. 1698 SDValue Chain = LD->getChain(); 1699 SDValue Base = LD->getBasePtr(); 1700 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)), 1701 CurDAG->getRegister(0, MVT::i32), Chain }; 1702 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, 1703 MVT::i32, MVT::Other, Ops); 1704 transferMemOperands(N, New); 1705 ReplaceNode(N, New); 1706 return true; 1707 } 1708 1709 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { 1710 LoadSDNode *LD = cast<LoadSDNode>(N); 1711 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1712 if (AM == ISD::UNINDEXED) 1713 return false; 1714 1715 EVT LoadedVT = LD->getMemoryVT(); 1716 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1717 SDValue Offset; 1718 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1719 unsigned Opcode = 0; 1720 bool Match = false; 1721 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) { 1722 switch (LoadedVT.getSimpleVT().SimpleTy) { 1723 case MVT::i32: 1724 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; 1725 break; 1726 case MVT::i16: 1727 if (isSExtLd) 1728 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST; 1729 else 1730 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST; 1731 break; 1732 case MVT::i8: 1733 case MVT::i1: 1734 if (isSExtLd) 1735 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST; 1736 else 1737 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; 1738 break; 1739 default: 1740 return false; 1741 } 1742 Match = true; 1743 } 1744 1745 if (Match) { 1746 SDValue Chain = LD->getChain(); 1747 SDValue Base = LD->getBasePtr(); 1748 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)), 1749 CurDAG->getRegister(0, MVT::i32), Chain }; 1750 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1751 MVT::Other, Ops); 1752 transferMemOperands(N, New); 1753 ReplaceNode(N, New); 1754 return true; 1755 } 1756 1757 return false; 1758 } 1759 1760 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) { 1761 EVT LoadedVT; 1762 unsigned Opcode = 0; 1763 bool isSExtLd, isPre; 1764 Align Alignment; 1765 ARMVCC::VPTCodes Pred; 1766 SDValue PredReg; 1767 SDValue Chain, Base, Offset; 1768 1769 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1770 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1771 if (AM == ISD::UNINDEXED) 1772 return false; 1773 LoadedVT = LD->getMemoryVT(); 1774 if (!LoadedVT.isVector()) 1775 return false; 1776 1777 Chain = LD->getChain(); 1778 Base = LD->getBasePtr(); 1779 Offset = LD->getOffset(); 1780 Alignment = LD->getAlign(); 1781 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1782 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1783 Pred = ARMVCC::None; 1784 PredReg = CurDAG->getRegister(0, MVT::i32); 1785 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) { 1786 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1787 if (AM == ISD::UNINDEXED) 1788 return false; 1789 LoadedVT = LD->getMemoryVT(); 1790 if (!LoadedVT.isVector()) 1791 return false; 1792 1793 Chain = LD->getChain(); 1794 Base = LD->getBasePtr(); 1795 Offset = LD->getOffset(); 1796 Alignment = LD->getAlign(); 1797 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1798 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1799 Pred = ARMVCC::Then; 1800 PredReg = LD->getMask(); 1801 } else 1802 llvm_unreachable("Expected a Load or a Masked Load!"); 1803 1804 // We allow LE non-masked loads to change the type (for example use a vldrb.8 1805 // as opposed to a vldrw.32). This can allow extra addressing modes or 1806 // alignments for what is otherwise an equivalent instruction. 1807 bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N); 1808 1809 SDValue NewOffset; 1810 if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 && 1811 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) { 1812 if (isSExtLd) 1813 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post; 1814 else 1815 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post; 1816 } else if (LoadedVT == MVT::v8i8 && 1817 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1818 if (isSExtLd) 1819 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post; 1820 else 1821 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post; 1822 } else if (LoadedVT == MVT::v4i8 && 1823 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1824 if (isSExtLd) 1825 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post; 1826 else 1827 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post; 1828 } else if (Alignment >= Align(4) && 1829 (CanChangeType || LoadedVT == MVT::v4i32 || 1830 LoadedVT == MVT::v4f32) && 1831 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2)) 1832 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post; 1833 else if (Alignment >= Align(2) && 1834 (CanChangeType || LoadedVT == MVT::v8i16 || 1835 LoadedVT == MVT::v8f16) && 1836 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) 1837 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post; 1838 else if ((CanChangeType || LoadedVT == MVT::v16i8) && 1839 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) 1840 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post; 1841 else 1842 return false; 1843 1844 SDValue Ops[] = {Base, 1845 NewOffset, 1846 CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), 1847 PredReg, 1848 CurDAG->getRegister(0, MVT::i32), // tp_reg 1849 Chain}; 1850 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, 1851 N->getValueType(0), MVT::Other, Ops); 1852 transferMemOperands(N, New); 1853 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 1854 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 1855 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 1856 CurDAG->RemoveDeadNode(N); 1857 return true; 1858 } 1859 1860 /// Form a GPRPair pseudo register from a pair of GPR regs. 1861 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { 1862 SDLoc dl(V0.getNode()); 1863 SDValue RegClass = 1864 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); 1865 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 1866 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 1867 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1868 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1869 } 1870 1871 /// Form a D register from a pair of S registers. 1872 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1873 SDLoc dl(V0.getNode()); 1874 SDValue RegClass = 1875 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32); 1876 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1877 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1878 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1879 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1880 } 1881 1882 /// Form a quad register from a pair of D registers. 1883 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1884 SDLoc dl(V0.getNode()); 1885 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl, 1886 MVT::i32); 1887 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1888 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1889 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1890 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1891 } 1892 1893 /// Form 4 consecutive D registers from a pair of Q registers. 1894 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1895 SDLoc dl(V0.getNode()); 1896 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1897 MVT::i32); 1898 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1899 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1900 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1901 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1902 } 1903 1904 /// Form 4 consecutive S registers. 1905 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, 1906 SDValue V2, SDValue V3) { 1907 SDLoc dl(V0.getNode()); 1908 SDValue RegClass = 1909 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32); 1910 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1911 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1912 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32); 1913 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32); 1914 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1915 V2, SubReg2, V3, SubReg3 }; 1916 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1917 } 1918 1919 /// Form 4 consecutive D registers. 1920 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, 1921 SDValue V2, SDValue V3) { 1922 SDLoc dl(V0.getNode()); 1923 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1924 MVT::i32); 1925 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1926 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1927 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32); 1928 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32); 1929 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1930 V2, SubReg2, V3, SubReg3 }; 1931 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1932 } 1933 1934 /// Form 4 consecutive Q registers. 1935 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, 1936 SDValue V2, SDValue V3) { 1937 SDLoc dl(V0.getNode()); 1938 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl, 1939 MVT::i32); 1940 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1941 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1942 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32); 1943 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32); 1944 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1945 V2, SubReg2, V3, SubReg3 }; 1946 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1947 } 1948 1949 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand 1950 /// of a NEON VLD or VST instruction. The supported values depend on the 1951 /// number of registers being loaded. 1952 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl, 1953 unsigned NumVecs, bool is64BitVector) { 1954 unsigned NumRegs = NumVecs; 1955 if (!is64BitVector && NumVecs < 3) 1956 NumRegs *= 2; 1957 1958 unsigned Alignment = Align->getAsZExtVal(); 1959 if (Alignment >= 32 && NumRegs == 4) 1960 Alignment = 32; 1961 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4)) 1962 Alignment = 16; 1963 else if (Alignment >= 8) 1964 Alignment = 8; 1965 else 1966 Alignment = 0; 1967 1968 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 1969 } 1970 1971 static bool isVLDfixed(unsigned Opc) 1972 { 1973 switch (Opc) { 1974 default: return false; 1975 case ARM::VLD1d8wb_fixed : return true; 1976 case ARM::VLD1d16wb_fixed : return true; 1977 case ARM::VLD1d64Qwb_fixed : return true; 1978 case ARM::VLD1d32wb_fixed : return true; 1979 case ARM::VLD1d64wb_fixed : return true; 1980 case ARM::VLD1d8TPseudoWB_fixed : return true; 1981 case ARM::VLD1d16TPseudoWB_fixed : return true; 1982 case ARM::VLD1d32TPseudoWB_fixed : return true; 1983 case ARM::VLD1d64TPseudoWB_fixed : return true; 1984 case ARM::VLD1d8QPseudoWB_fixed : return true; 1985 case ARM::VLD1d16QPseudoWB_fixed : return true; 1986 case ARM::VLD1d32QPseudoWB_fixed : return true; 1987 case ARM::VLD1d64QPseudoWB_fixed : return true; 1988 case ARM::VLD1q8wb_fixed : return true; 1989 case ARM::VLD1q16wb_fixed : return true; 1990 case ARM::VLD1q32wb_fixed : return true; 1991 case ARM::VLD1q64wb_fixed : return true; 1992 case ARM::VLD1DUPd8wb_fixed : return true; 1993 case ARM::VLD1DUPd16wb_fixed : return true; 1994 case ARM::VLD1DUPd32wb_fixed : return true; 1995 case ARM::VLD1DUPq8wb_fixed : return true; 1996 case ARM::VLD1DUPq16wb_fixed : return true; 1997 case ARM::VLD1DUPq32wb_fixed : return true; 1998 case ARM::VLD2d8wb_fixed : return true; 1999 case ARM::VLD2d16wb_fixed : return true; 2000 case ARM::VLD2d32wb_fixed : return true; 2001 case ARM::VLD2q8PseudoWB_fixed : return true; 2002 case ARM::VLD2q16PseudoWB_fixed : return true; 2003 case ARM::VLD2q32PseudoWB_fixed : return true; 2004 case ARM::VLD2DUPd8wb_fixed : return true; 2005 case ARM::VLD2DUPd16wb_fixed : return true; 2006 case ARM::VLD2DUPd32wb_fixed : return true; 2007 case ARM::VLD2DUPq8OddPseudoWB_fixed: return true; 2008 case ARM::VLD2DUPq16OddPseudoWB_fixed: return true; 2009 case ARM::VLD2DUPq32OddPseudoWB_fixed: return true; 2010 } 2011 } 2012 2013 static bool isVSTfixed(unsigned Opc) 2014 { 2015 switch (Opc) { 2016 default: return false; 2017 case ARM::VST1d8wb_fixed : return true; 2018 case ARM::VST1d16wb_fixed : return true; 2019 case ARM::VST1d32wb_fixed : return true; 2020 case ARM::VST1d64wb_fixed : return true; 2021 case ARM::VST1q8wb_fixed : return true; 2022 case ARM::VST1q16wb_fixed : return true; 2023 case ARM::VST1q32wb_fixed : return true; 2024 case ARM::VST1q64wb_fixed : return true; 2025 case ARM::VST1d8TPseudoWB_fixed : return true; 2026 case ARM::VST1d16TPseudoWB_fixed : return true; 2027 case ARM::VST1d32TPseudoWB_fixed : return true; 2028 case ARM::VST1d64TPseudoWB_fixed : return true; 2029 case ARM::VST1d8QPseudoWB_fixed : return true; 2030 case ARM::VST1d16QPseudoWB_fixed : return true; 2031 case ARM::VST1d32QPseudoWB_fixed : return true; 2032 case ARM::VST1d64QPseudoWB_fixed : return true; 2033 case ARM::VST2d8wb_fixed : return true; 2034 case ARM::VST2d16wb_fixed : return true; 2035 case ARM::VST2d32wb_fixed : return true; 2036 case ARM::VST2q8PseudoWB_fixed : return true; 2037 case ARM::VST2q16PseudoWB_fixed : return true; 2038 case ARM::VST2q32PseudoWB_fixed : return true; 2039 } 2040 } 2041 2042 // Get the register stride update opcode of a VLD/VST instruction that 2043 // is otherwise equivalent to the given fixed stride updating instruction. 2044 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { 2045 assert((isVLDfixed(Opc) || isVSTfixed(Opc)) 2046 && "Incorrect fixed stride updating instruction."); 2047 switch (Opc) { 2048 default: break; 2049 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; 2050 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register; 2051 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register; 2052 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register; 2053 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register; 2054 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; 2055 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; 2056 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; 2057 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register; 2058 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; 2059 case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register; 2060 case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register; 2061 case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register; 2062 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; 2063 case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register; 2064 case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register; 2065 case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register; 2066 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register; 2067 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register; 2068 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register; 2069 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register; 2070 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register; 2071 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register; 2072 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register; 2073 case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register; 2074 case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register; 2075 case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register; 2076 2077 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; 2078 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; 2079 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register; 2080 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register; 2081 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register; 2082 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; 2083 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; 2084 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; 2085 case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register; 2086 case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register; 2087 case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register; 2088 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; 2089 case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register; 2090 case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register; 2091 case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register; 2092 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; 2093 2094 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; 2095 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; 2096 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; 2097 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; 2098 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; 2099 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; 2100 2101 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; 2102 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; 2103 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; 2104 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; 2105 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; 2106 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; 2107 2108 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register; 2109 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register; 2110 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register; 2111 } 2112 return Opc; // If not one we handle, return it unchanged. 2113 } 2114 2115 /// Returns true if the given increment is a Constant known to be equal to the 2116 /// access size performed by a NEON load/store. This means the "[rN]!" form can 2117 /// be used. 2118 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) { 2119 auto C = dyn_cast<ConstantSDNode>(Inc); 2120 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs; 2121 } 2122 2123 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 2124 const uint16_t *DOpcodes, 2125 const uint16_t *QOpcodes0, 2126 const uint16_t *QOpcodes1) { 2127 assert(Subtarget->hasNEON()); 2128 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); 2129 SDLoc dl(N); 2130 2131 SDValue MemAddr, Align; 2132 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2133 // nodes are not intrinsics. 2134 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2135 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2136 return; 2137 2138 SDValue Chain = N->getOperand(0); 2139 EVT VT = N->getValueType(0); 2140 bool is64BitVector = VT.is64BitVector(); 2141 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2142 2143 unsigned OpcodeIndex; 2144 switch (VT.getSimpleVT().SimpleTy) { 2145 default: llvm_unreachable("unhandled vld type"); 2146 // Double-register operations: 2147 case MVT::v8i8: OpcodeIndex = 0; break; 2148 case MVT::v4f16: 2149 case MVT::v4bf16: 2150 case MVT::v4i16: OpcodeIndex = 1; break; 2151 case MVT::v2f32: 2152 case MVT::v2i32: OpcodeIndex = 2; break; 2153 case MVT::v1i64: OpcodeIndex = 3; break; 2154 // Quad-register operations: 2155 case MVT::v16i8: OpcodeIndex = 0; break; 2156 case MVT::v8f16: 2157 case MVT::v8bf16: 2158 case MVT::v8i16: OpcodeIndex = 1; break; 2159 case MVT::v4f32: 2160 case MVT::v4i32: OpcodeIndex = 2; break; 2161 case MVT::v2f64: 2162 case MVT::v2i64: OpcodeIndex = 3; break; 2163 } 2164 2165 EVT ResTy; 2166 if (NumVecs == 1) 2167 ResTy = VT; 2168 else { 2169 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2170 if (!is64BitVector) 2171 ResTyElts *= 2; 2172 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 2173 } 2174 std::vector<EVT> ResTys; 2175 ResTys.push_back(ResTy); 2176 if (isUpdating) 2177 ResTys.push_back(MVT::i32); 2178 ResTys.push_back(MVT::Other); 2179 2180 SDValue Pred = getAL(CurDAG, dl); 2181 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2182 SDNode *VLd; 2183 SmallVector<SDValue, 7> Ops; 2184 2185 // Double registers and VLD1/VLD2 quad registers are directly supported. 2186 if (is64BitVector || NumVecs <= 2) { 2187 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2188 QOpcodes0[OpcodeIndex]); 2189 Ops.push_back(MemAddr); 2190 Ops.push_back(Align); 2191 if (isUpdating) { 2192 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2193 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2194 if (!IsImmUpdate) { 2195 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so 2196 // check for the opcode rather than the number of vector elements. 2197 if (isVLDfixed(Opc)) 2198 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2199 Ops.push_back(Inc); 2200 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in 2201 // the operands if not such an opcode. 2202 } else if (!isVLDfixed(Opc)) 2203 Ops.push_back(Reg0); 2204 } 2205 Ops.push_back(Pred); 2206 Ops.push_back(Reg0); 2207 Ops.push_back(Chain); 2208 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2209 2210 } else { 2211 // Otherwise, quad registers are loaded with two separate instructions, 2212 // where one loads the even registers and the other loads the odd registers. 2213 EVT AddrTy = MemAddr.getValueType(); 2214 2215 // Load the even subregs. This is always an updating load, so that it 2216 // provides the address to the second load for the odd subregs. 2217 SDValue ImplDef = 2218 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 2219 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; 2220 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2221 ResTy, AddrTy, MVT::Other, OpsA); 2222 Chain = SDValue(VLdA, 2); 2223 2224 // Load the odd subregs. 2225 Ops.push_back(SDValue(VLdA, 1)); 2226 Ops.push_back(Align); 2227 if (isUpdating) { 2228 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2229 assert(isa<ConstantSDNode>(Inc.getNode()) && 2230 "only constant post-increment update allowed for VLD3/4"); 2231 (void)Inc; 2232 Ops.push_back(Reg0); 2233 } 2234 Ops.push_back(SDValue(VLdA, 0)); 2235 Ops.push_back(Pred); 2236 Ops.push_back(Reg0); 2237 Ops.push_back(Chain); 2238 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops); 2239 } 2240 2241 // Transfer memoperands. 2242 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2243 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp}); 2244 2245 if (NumVecs == 1) { 2246 ReplaceNode(N, VLd); 2247 return; 2248 } 2249 2250 // Extract out the subregisters. 2251 SDValue SuperReg = SDValue(VLd, 0); 2252 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2253 ARM::qsub_3 == ARM::qsub_0 + 3, 2254 "Unexpected subreg numbering"); 2255 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0); 2256 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2257 ReplaceUses(SDValue(N, Vec), 2258 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2259 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); 2260 if (isUpdating) 2261 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); 2262 CurDAG->RemoveDeadNode(N); 2263 } 2264 2265 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 2266 const uint16_t *DOpcodes, 2267 const uint16_t *QOpcodes0, 2268 const uint16_t *QOpcodes1) { 2269 assert(Subtarget->hasNEON()); 2270 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); 2271 SDLoc dl(N); 2272 2273 SDValue MemAddr, Align; 2274 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2275 // nodes are not intrinsics. 2276 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2277 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2278 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2279 return; 2280 2281 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2282 2283 SDValue Chain = N->getOperand(0); 2284 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2285 bool is64BitVector = VT.is64BitVector(); 2286 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2287 2288 unsigned OpcodeIndex; 2289 switch (VT.getSimpleVT().SimpleTy) { 2290 default: llvm_unreachable("unhandled vst type"); 2291 // Double-register operations: 2292 case MVT::v8i8: OpcodeIndex = 0; break; 2293 case MVT::v4f16: 2294 case MVT::v4bf16: 2295 case MVT::v4i16: OpcodeIndex = 1; break; 2296 case MVT::v2f32: 2297 case MVT::v2i32: OpcodeIndex = 2; break; 2298 case MVT::v1i64: OpcodeIndex = 3; break; 2299 // Quad-register operations: 2300 case MVT::v16i8: OpcodeIndex = 0; break; 2301 case MVT::v8f16: 2302 case MVT::v8bf16: 2303 case MVT::v8i16: OpcodeIndex = 1; break; 2304 case MVT::v4f32: 2305 case MVT::v4i32: OpcodeIndex = 2; break; 2306 case MVT::v2f64: 2307 case MVT::v2i64: OpcodeIndex = 3; break; 2308 } 2309 2310 std::vector<EVT> ResTys; 2311 if (isUpdating) 2312 ResTys.push_back(MVT::i32); 2313 ResTys.push_back(MVT::Other); 2314 2315 SDValue Pred = getAL(CurDAG, dl); 2316 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2317 SmallVector<SDValue, 7> Ops; 2318 2319 // Double registers and VST1/VST2 quad registers are directly supported. 2320 if (is64BitVector || NumVecs <= 2) { 2321 SDValue SrcReg; 2322 if (NumVecs == 1) { 2323 SrcReg = N->getOperand(Vec0Idx); 2324 } else if (is64BitVector) { 2325 // Form a REG_SEQUENCE to force register allocation. 2326 SDValue V0 = N->getOperand(Vec0Idx + 0); 2327 SDValue V1 = N->getOperand(Vec0Idx + 1); 2328 if (NumVecs == 2) 2329 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2330 else { 2331 SDValue V2 = N->getOperand(Vec0Idx + 2); 2332 // If it's a vst3, form a quad D-register and leave the last part as 2333 // an undef. 2334 SDValue V3 = (NumVecs == 3) 2335 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) 2336 : N->getOperand(Vec0Idx + 3); 2337 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2338 } 2339 } else { 2340 // Form a QQ register. 2341 SDValue Q0 = N->getOperand(Vec0Idx); 2342 SDValue Q1 = N->getOperand(Vec0Idx + 1); 2343 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0); 2344 } 2345 2346 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2347 QOpcodes0[OpcodeIndex]); 2348 Ops.push_back(MemAddr); 2349 Ops.push_back(Align); 2350 if (isUpdating) { 2351 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2352 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2353 if (!IsImmUpdate) { 2354 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so 2355 // check for the opcode rather than the number of vector elements. 2356 if (isVSTfixed(Opc)) 2357 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2358 Ops.push_back(Inc); 2359 } 2360 // VST1/VST2 fixed increment does not need Reg0 so only include it in 2361 // the operands if not such an opcode. 2362 else if (!isVSTfixed(Opc)) 2363 Ops.push_back(Reg0); 2364 } 2365 Ops.push_back(SrcReg); 2366 Ops.push_back(Pred); 2367 Ops.push_back(Reg0); 2368 Ops.push_back(Chain); 2369 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2370 2371 // Transfer memoperands. 2372 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp}); 2373 2374 ReplaceNode(N, VSt); 2375 return; 2376 } 2377 2378 // Otherwise, quad registers are stored with two separate instructions, 2379 // where one stores the even registers and the other stores the odd registers. 2380 2381 // Form the QQQQ REG_SEQUENCE. 2382 SDValue V0 = N->getOperand(Vec0Idx + 0); 2383 SDValue V1 = N->getOperand(Vec0Idx + 1); 2384 SDValue V2 = N->getOperand(Vec0Idx + 2); 2385 SDValue V3 = (NumVecs == 3) 2386 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2387 : N->getOperand(Vec0Idx + 3); 2388 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2389 2390 // Store the even D registers. This is always an updating store, so that it 2391 // provides the address to the second store for the odd subregs. 2392 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain }; 2393 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2394 MemAddr.getValueType(), 2395 MVT::Other, OpsA); 2396 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp}); 2397 Chain = SDValue(VStA, 1); 2398 2399 // Store the odd D registers. 2400 Ops.push_back(SDValue(VStA, 0)); 2401 Ops.push_back(Align); 2402 if (isUpdating) { 2403 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2404 assert(isa<ConstantSDNode>(Inc.getNode()) && 2405 "only constant post-increment update allowed for VST3/4"); 2406 (void)Inc; 2407 Ops.push_back(Reg0); 2408 } 2409 Ops.push_back(RegSeq); 2410 Ops.push_back(Pred); 2411 Ops.push_back(Reg0); 2412 Ops.push_back(Chain); 2413 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, 2414 Ops); 2415 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp}); 2416 ReplaceNode(N, VStB); 2417 } 2418 2419 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 2420 unsigned NumVecs, 2421 const uint16_t *DOpcodes, 2422 const uint16_t *QOpcodes) { 2423 assert(Subtarget->hasNEON()); 2424 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); 2425 SDLoc dl(N); 2426 2427 SDValue MemAddr, Align; 2428 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2429 // nodes are not intrinsics. 2430 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2431 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2432 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2433 return; 2434 2435 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2436 2437 SDValue Chain = N->getOperand(0); 2438 unsigned Lane = N->getConstantOperandVal(Vec0Idx + NumVecs); 2439 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2440 bool is64BitVector = VT.is64BitVector(); 2441 2442 unsigned Alignment = 0; 2443 if (NumVecs != 3) { 2444 Alignment = Align->getAsZExtVal(); 2445 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2446 if (Alignment > NumBytes) 2447 Alignment = NumBytes; 2448 if (Alignment < 8 && Alignment < NumBytes) 2449 Alignment = 0; 2450 // Alignment must be a power of two; make sure of that. 2451 Alignment = (Alignment & -Alignment); 2452 if (Alignment == 1) 2453 Alignment = 0; 2454 } 2455 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2456 2457 unsigned OpcodeIndex; 2458 switch (VT.getSimpleVT().SimpleTy) { 2459 default: llvm_unreachable("unhandled vld/vst lane type"); 2460 // Double-register operations: 2461 case MVT::v8i8: OpcodeIndex = 0; break; 2462 case MVT::v4f16: 2463 case MVT::v4bf16: 2464 case MVT::v4i16: OpcodeIndex = 1; break; 2465 case MVT::v2f32: 2466 case MVT::v2i32: OpcodeIndex = 2; break; 2467 // Quad-register operations: 2468 case MVT::v8f16: 2469 case MVT::v8bf16: 2470 case MVT::v8i16: OpcodeIndex = 0; break; 2471 case MVT::v4f32: 2472 case MVT::v4i32: OpcodeIndex = 1; break; 2473 } 2474 2475 std::vector<EVT> ResTys; 2476 if (IsLoad) { 2477 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2478 if (!is64BitVector) 2479 ResTyElts *= 2; 2480 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), 2481 MVT::i64, ResTyElts)); 2482 } 2483 if (isUpdating) 2484 ResTys.push_back(MVT::i32); 2485 ResTys.push_back(MVT::Other); 2486 2487 SDValue Pred = getAL(CurDAG, dl); 2488 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2489 2490 SmallVector<SDValue, 8> Ops; 2491 Ops.push_back(MemAddr); 2492 Ops.push_back(Align); 2493 if (isUpdating) { 2494 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2495 bool IsImmUpdate = 2496 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2497 Ops.push_back(IsImmUpdate ? Reg0 : Inc); 2498 } 2499 2500 SDValue SuperReg; 2501 SDValue V0 = N->getOperand(Vec0Idx + 0); 2502 SDValue V1 = N->getOperand(Vec0Idx + 1); 2503 if (NumVecs == 2) { 2504 if (is64BitVector) 2505 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2506 else 2507 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0); 2508 } else { 2509 SDValue V2 = N->getOperand(Vec0Idx + 2); 2510 SDValue V3 = (NumVecs == 3) 2511 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2512 : N->getOperand(Vec0Idx + 3); 2513 if (is64BitVector) 2514 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2515 else 2516 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2517 } 2518 Ops.push_back(SuperReg); 2519 Ops.push_back(getI32Imm(Lane, dl)); 2520 Ops.push_back(Pred); 2521 Ops.push_back(Reg0); 2522 Ops.push_back(Chain); 2523 2524 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2525 QOpcodes[OpcodeIndex]); 2526 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2527 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp}); 2528 if (!IsLoad) { 2529 ReplaceNode(N, VLdLn); 2530 return; 2531 } 2532 2533 // Extract the subregisters. 2534 SuperReg = SDValue(VLdLn, 0); 2535 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2536 ARM::qsub_3 == ARM::qsub_0 + 3, 2537 "Unexpected subreg numbering"); 2538 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2539 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2540 ReplaceUses(SDValue(N, Vec), 2541 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2542 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); 2543 if (isUpdating) 2544 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); 2545 CurDAG->RemoveDeadNode(N); 2546 } 2547 2548 template <typename SDValueVector> 2549 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2550 SDValue PredicateMask) { 2551 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2552 Ops.push_back(PredicateMask); 2553 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg 2554 } 2555 2556 template <typename SDValueVector> 2557 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2558 SDValue PredicateMask, 2559 SDValue Inactive) { 2560 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2561 Ops.push_back(PredicateMask); 2562 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg 2563 Ops.push_back(Inactive); 2564 } 2565 2566 template <typename SDValueVector> 2567 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) { 2568 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2569 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2570 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg 2571 } 2572 2573 template <typename SDValueVector> 2574 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2575 EVT InactiveTy) { 2576 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2577 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2578 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg 2579 Ops.push_back(SDValue( 2580 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0)); 2581 } 2582 2583 void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, 2584 bool Predicated) { 2585 SDLoc Loc(N); 2586 SmallVector<SDValue, 8> Ops; 2587 2588 uint16_t Opcode; 2589 switch (N->getValueType(1).getVectorElementType().getSizeInBits()) { 2590 case 32: 2591 Opcode = Opcodes[0]; 2592 break; 2593 case 64: 2594 Opcode = Opcodes[1]; 2595 break; 2596 default: 2597 llvm_unreachable("bad vector element size in SelectMVE_WB"); 2598 } 2599 2600 Ops.push_back(N->getOperand(2)); // vector of base addresses 2601 2602 int32_t ImmValue = N->getConstantOperandVal(3); 2603 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset 2604 2605 if (Predicated) 2606 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2607 else 2608 AddEmptyMVEPredicateToOps(Ops, Loc); 2609 2610 Ops.push_back(N->getOperand(0)); // chain 2611 2612 SmallVector<EVT, 8> VTs; 2613 VTs.push_back(N->getValueType(1)); 2614 VTs.push_back(N->getValueType(0)); 2615 VTs.push_back(N->getValueType(2)); 2616 2617 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops); 2618 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 2619 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 2620 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 2621 transferMemOperands(N, New); 2622 CurDAG->RemoveDeadNode(N); 2623 } 2624 2625 void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode, 2626 bool Immediate, 2627 bool HasSaturationOperand) { 2628 SDLoc Loc(N); 2629 SmallVector<SDValue, 8> Ops; 2630 2631 // Two 32-bit halves of the value to be shifted 2632 Ops.push_back(N->getOperand(1)); 2633 Ops.push_back(N->getOperand(2)); 2634 2635 // The shift count 2636 if (Immediate) { 2637 int32_t ImmValue = N->getConstantOperandVal(3); 2638 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2639 } else { 2640 Ops.push_back(N->getOperand(3)); 2641 } 2642 2643 // The immediate saturation operand, if any 2644 if (HasSaturationOperand) { 2645 int32_t SatOp = N->getConstantOperandVal(4); 2646 int SatBit = (SatOp == 64 ? 0 : 1); 2647 Ops.push_back(getI32Imm(SatBit, Loc)); 2648 } 2649 2650 // MVE scalar shifts are IT-predicable, so include the standard 2651 // predicate arguments. 2652 Ops.push_back(getAL(CurDAG, Loc)); 2653 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2654 2655 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops)); 2656 } 2657 2658 void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 2659 uint16_t OpcodeWithNoCarry, 2660 bool Add, bool Predicated) { 2661 SDLoc Loc(N); 2662 SmallVector<SDValue, 8> Ops; 2663 uint16_t Opcode; 2664 2665 unsigned FirstInputOp = Predicated ? 2 : 1; 2666 2667 // Two input vectors and the input carry flag 2668 Ops.push_back(N->getOperand(FirstInputOp)); 2669 Ops.push_back(N->getOperand(FirstInputOp + 1)); 2670 SDValue CarryIn = N->getOperand(FirstInputOp + 2); 2671 ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn); 2672 uint32_t CarryMask = 1 << 29; 2673 uint32_t CarryExpected = Add ? 0 : CarryMask; 2674 if (CarryInConstant && 2675 (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) { 2676 Opcode = OpcodeWithNoCarry; 2677 } else { 2678 Ops.push_back(CarryIn); 2679 Opcode = OpcodeWithCarry; 2680 } 2681 2682 if (Predicated) 2683 AddMVEPredicateToOps(Ops, Loc, 2684 N->getOperand(FirstInputOp + 3), // predicate 2685 N->getOperand(FirstInputOp - 1)); // inactive 2686 else 2687 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2688 2689 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops)); 2690 } 2691 2692 void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) { 2693 SDLoc Loc(N); 2694 SmallVector<SDValue, 8> Ops; 2695 2696 // One vector input, followed by a 32-bit word of bits to shift in 2697 // and then an immediate shift count 2698 Ops.push_back(N->getOperand(1)); 2699 Ops.push_back(N->getOperand(2)); 2700 int32_t ImmValue = N->getConstantOperandVal(3); 2701 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2702 2703 if (Predicated) 2704 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2705 else 2706 AddEmptyMVEPredicateToOps(Ops, Loc); 2707 2708 CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), ArrayRef(Ops)); 2709 } 2710 2711 static bool SDValueToConstBool(SDValue SDVal) { 2712 assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant"); 2713 ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal); 2714 uint64_t Value = SDValConstant->getZExtValue(); 2715 assert((Value == 0 || Value == 1) && "expected value 0 or 1"); 2716 return Value; 2717 } 2718 2719 void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 2720 const uint16_t *OpcodesS, 2721 const uint16_t *OpcodesU, 2722 size_t Stride, size_t TySize) { 2723 assert(TySize < Stride && "Invalid TySize"); 2724 bool IsUnsigned = SDValueToConstBool(N->getOperand(1)); 2725 bool IsSub = SDValueToConstBool(N->getOperand(2)); 2726 bool IsExchange = SDValueToConstBool(N->getOperand(3)); 2727 if (IsUnsigned) { 2728 assert(!IsSub && 2729 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist"); 2730 assert(!IsExchange && 2731 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist"); 2732 } 2733 2734 auto OpIsZero = [N](size_t OpNo) { 2735 return isNullConstant(N->getOperand(OpNo)); 2736 }; 2737 2738 // If the input accumulator value is not zero, select an instruction with 2739 // accumulator, otherwise select an instruction without accumulator 2740 bool IsAccum = !(OpIsZero(4) && OpIsZero(5)); 2741 2742 const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS; 2743 if (IsSub) 2744 Opcodes += 4 * Stride; 2745 if (IsExchange) 2746 Opcodes += 2 * Stride; 2747 if (IsAccum) 2748 Opcodes += Stride; 2749 uint16_t Opcode = Opcodes[TySize]; 2750 2751 SDLoc Loc(N); 2752 SmallVector<SDValue, 8> Ops; 2753 // Push the accumulator operands, if they are used 2754 if (IsAccum) { 2755 Ops.push_back(N->getOperand(4)); 2756 Ops.push_back(N->getOperand(5)); 2757 } 2758 // Push the two vector operands 2759 Ops.push_back(N->getOperand(6)); 2760 Ops.push_back(N->getOperand(7)); 2761 2762 if (Predicated) 2763 AddMVEPredicateToOps(Ops, Loc, N->getOperand(8)); 2764 else 2765 AddEmptyMVEPredicateToOps(Ops, Loc); 2766 2767 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops)); 2768 } 2769 2770 void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated, 2771 const uint16_t *OpcodesS, 2772 const uint16_t *OpcodesU) { 2773 EVT VecTy = N->getOperand(6).getValueType(); 2774 size_t SizeIndex; 2775 switch (VecTy.getVectorElementType().getSizeInBits()) { 2776 case 16: 2777 SizeIndex = 0; 2778 break; 2779 case 32: 2780 SizeIndex = 1; 2781 break; 2782 default: 2783 llvm_unreachable("bad vector element size"); 2784 } 2785 2786 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex); 2787 } 2788 2789 void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, 2790 const uint16_t *OpcodesS, 2791 const uint16_t *OpcodesU) { 2792 assert( 2793 N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() == 2794 32 && 2795 "bad vector element size"); 2796 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0); 2797 } 2798 2799 void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs, 2800 const uint16_t *const *Opcodes, 2801 bool HasWriteback) { 2802 EVT VT = N->getValueType(0); 2803 SDLoc Loc(N); 2804 2805 const uint16_t *OurOpcodes; 2806 switch (VT.getVectorElementType().getSizeInBits()) { 2807 case 8: 2808 OurOpcodes = Opcodes[0]; 2809 break; 2810 case 16: 2811 OurOpcodes = Opcodes[1]; 2812 break; 2813 case 32: 2814 OurOpcodes = Opcodes[2]; 2815 break; 2816 default: 2817 llvm_unreachable("bad vector element size in SelectMVE_VLD"); 2818 } 2819 2820 EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2); 2821 SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other}; 2822 unsigned PtrOperand = HasWriteback ? 1 : 2; 2823 2824 auto Data = SDValue( 2825 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0); 2826 SDValue Chain = N->getOperand(0); 2827 // Add a MVE_VLDn instruction for each Vec, except the last 2828 for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) { 2829 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain}; 2830 auto LoadInst = 2831 CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops); 2832 Data = SDValue(LoadInst, 0); 2833 Chain = SDValue(LoadInst, 1); 2834 transferMemOperands(N, LoadInst); 2835 } 2836 // The last may need a writeback on it 2837 if (HasWriteback) 2838 ResultTys = {DataTy, MVT::i32, MVT::Other}; 2839 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain}; 2840 auto LoadInst = 2841 CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops); 2842 transferMemOperands(N, LoadInst); 2843 2844 unsigned i; 2845 for (i = 0; i < NumVecs; i++) 2846 ReplaceUses(SDValue(N, i), 2847 CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT, 2848 SDValue(LoadInst, 0))); 2849 if (HasWriteback) 2850 ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1)); 2851 ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1)); 2852 CurDAG->RemoveDeadNode(N); 2853 } 2854 2855 void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, 2856 bool Wrapping, bool Predicated) { 2857 EVT VT = N->getValueType(0); 2858 SDLoc Loc(N); 2859 2860 uint16_t Opcode; 2861 switch (VT.getScalarSizeInBits()) { 2862 case 8: 2863 Opcode = Opcodes[0]; 2864 break; 2865 case 16: 2866 Opcode = Opcodes[1]; 2867 break; 2868 case 32: 2869 Opcode = Opcodes[2]; 2870 break; 2871 default: 2872 llvm_unreachable("bad vector element size in SelectMVE_VxDUP"); 2873 } 2874 2875 SmallVector<SDValue, 8> Ops; 2876 unsigned OpIdx = 1; 2877 2878 SDValue Inactive; 2879 if (Predicated) 2880 Inactive = N->getOperand(OpIdx++); 2881 2882 Ops.push_back(N->getOperand(OpIdx++)); // base 2883 if (Wrapping) 2884 Ops.push_back(N->getOperand(OpIdx++)); // limit 2885 2886 SDValue ImmOp = N->getOperand(OpIdx++); // step 2887 int ImmValue = ImmOp->getAsZExtVal(); 2888 Ops.push_back(getI32Imm(ImmValue, Loc)); 2889 2890 if (Predicated) 2891 AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive); 2892 else 2893 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2894 2895 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops)); 2896 } 2897 2898 void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode, 2899 size_t NumExtraOps, bool HasAccum) { 2900 bool IsBigEndian = CurDAG->getDataLayout().isBigEndian(); 2901 SDLoc Loc(N); 2902 SmallVector<SDValue, 8> Ops; 2903 2904 unsigned OpIdx = 1; 2905 2906 // Convert and append the immediate operand designating the coprocessor. 2907 SDValue ImmCorpoc = N->getOperand(OpIdx++); 2908 uint32_t ImmCoprocVal = ImmCorpoc->getAsZExtVal(); 2909 Ops.push_back(getI32Imm(ImmCoprocVal, Loc)); 2910 2911 // For accumulating variants copy the low and high order parts of the 2912 // accumulator into a register pair and add it to the operand vector. 2913 if (HasAccum) { 2914 SDValue AccLo = N->getOperand(OpIdx++); 2915 SDValue AccHi = N->getOperand(OpIdx++); 2916 if (IsBigEndian) 2917 std::swap(AccLo, AccHi); 2918 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0)); 2919 } 2920 2921 // Copy extra operands as-is. 2922 for (size_t I = 0; I < NumExtraOps; I++) 2923 Ops.push_back(N->getOperand(OpIdx++)); 2924 2925 // Convert and append the immediate operand 2926 SDValue Imm = N->getOperand(OpIdx); 2927 uint32_t ImmVal = Imm->getAsZExtVal(); 2928 Ops.push_back(getI32Imm(ImmVal, Loc)); 2929 2930 // Accumulating variants are IT-predicable, add predicate operands. 2931 if (HasAccum) { 2932 SDValue Pred = getAL(CurDAG, Loc); 2933 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 2934 Ops.push_back(Pred); 2935 Ops.push_back(PredReg); 2936 } 2937 2938 // Create the CDE intruction 2939 SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops); 2940 SDValue ResultPair = SDValue(InstrNode, 0); 2941 2942 // The original intrinsic had two outputs, and the output of the dual-register 2943 // CDE instruction is a register pair. We need to extract the two subregisters 2944 // and replace all uses of the original outputs with the extracted 2945 // subregisters. 2946 uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1}; 2947 if (IsBigEndian) 2948 std::swap(SubRegs[0], SubRegs[1]); 2949 2950 for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) { 2951 if (SDValue(N, ResIdx).use_empty()) 2952 continue; 2953 SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc, 2954 MVT::i32, ResultPair); 2955 ReplaceUses(SDValue(N, ResIdx), SubReg); 2956 } 2957 2958 CurDAG->RemoveDeadNode(N); 2959 } 2960 2961 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic, 2962 bool isUpdating, unsigned NumVecs, 2963 const uint16_t *DOpcodes, 2964 const uint16_t *QOpcodes0, 2965 const uint16_t *QOpcodes1) { 2966 assert(Subtarget->hasNEON()); 2967 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); 2968 SDLoc dl(N); 2969 2970 SDValue MemAddr, Align; 2971 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2972 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2973 return; 2974 2975 SDValue Chain = N->getOperand(0); 2976 EVT VT = N->getValueType(0); 2977 bool is64BitVector = VT.is64BitVector(); 2978 2979 unsigned Alignment = 0; 2980 if (NumVecs != 3) { 2981 Alignment = Align->getAsZExtVal(); 2982 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2983 if (Alignment > NumBytes) 2984 Alignment = NumBytes; 2985 if (Alignment < 8 && Alignment < NumBytes) 2986 Alignment = 0; 2987 // Alignment must be a power of two; make sure of that. 2988 Alignment = (Alignment & -Alignment); 2989 if (Alignment == 1) 2990 Alignment = 0; 2991 } 2992 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2993 2994 unsigned OpcodeIndex; 2995 switch (VT.getSimpleVT().SimpleTy) { 2996 default: llvm_unreachable("unhandled vld-dup type"); 2997 case MVT::v8i8: 2998 case MVT::v16i8: OpcodeIndex = 0; break; 2999 case MVT::v4i16: 3000 case MVT::v8i16: 3001 case MVT::v4f16: 3002 case MVT::v8f16: 3003 case MVT::v4bf16: 3004 case MVT::v8bf16: 3005 OpcodeIndex = 1; break; 3006 case MVT::v2f32: 3007 case MVT::v2i32: 3008 case MVT::v4f32: 3009 case MVT::v4i32: OpcodeIndex = 2; break; 3010 case MVT::v1f64: 3011 case MVT::v1i64: OpcodeIndex = 3; break; 3012 } 3013 3014 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 3015 if (!is64BitVector) 3016 ResTyElts *= 2; 3017 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 3018 3019 std::vector<EVT> ResTys; 3020 ResTys.push_back(ResTy); 3021 if (isUpdating) 3022 ResTys.push_back(MVT::i32); 3023 ResTys.push_back(MVT::Other); 3024 3025 SDValue Pred = getAL(CurDAG, dl); 3026 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3027 3028 SmallVector<SDValue, 6> Ops; 3029 Ops.push_back(MemAddr); 3030 Ops.push_back(Align); 3031 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] 3032 : (NumVecs == 1) ? QOpcodes0[OpcodeIndex] 3033 : QOpcodes1[OpcodeIndex]; 3034 if (isUpdating) { 3035 SDValue Inc = N->getOperand(2); 3036 bool IsImmUpdate = 3037 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 3038 if (IsImmUpdate) { 3039 if (!isVLDfixed(Opc)) 3040 Ops.push_back(Reg0); 3041 } else { 3042 if (isVLDfixed(Opc)) 3043 Opc = getVLDSTRegisterUpdateOpcode(Opc); 3044 Ops.push_back(Inc); 3045 } 3046 } 3047 if (is64BitVector || NumVecs == 1) { 3048 // Double registers and VLD1 quad registers are directly supported. 3049 } else { 3050 SDValue ImplDef = SDValue( 3051 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 3052 const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain}; 3053 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy, 3054 MVT::Other, OpsA); 3055 Ops.push_back(SDValue(VLdA, 0)); 3056 Chain = SDValue(VLdA, 1); 3057 } 3058 3059 Ops.push_back(Pred); 3060 Ops.push_back(Reg0); 3061 Ops.push_back(Chain); 3062 3063 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 3064 3065 // Transfer memoperands. 3066 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3067 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp}); 3068 3069 // Extract the subregisters. 3070 if (NumVecs == 1) { 3071 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0)); 3072 } else { 3073 SDValue SuperReg = SDValue(VLdDup, 0); 3074 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering"); 3075 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 3076 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) { 3077 ReplaceUses(SDValue(N, Vec), 3078 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); 3079 } 3080 } 3081 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); 3082 if (isUpdating) 3083 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); 3084 CurDAG->RemoveDeadNode(N); 3085 } 3086 3087 bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) { 3088 if (!Subtarget->hasMVEIntegerOps()) 3089 return false; 3090 3091 SDLoc dl(N); 3092 3093 // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and 3094 // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent 3095 // inserts of the correct type: 3096 SDValue Ins1 = SDValue(N, 0); 3097 SDValue Ins2 = N->getOperand(0); 3098 EVT VT = Ins1.getValueType(); 3099 if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() || 3100 !isa<ConstantSDNode>(Ins1.getOperand(2)) || 3101 !isa<ConstantSDNode>(Ins2.getOperand(2)) || 3102 (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT)) 3103 return false; 3104 3105 unsigned Lane1 = Ins1.getConstantOperandVal(2); 3106 unsigned Lane2 = Ins2.getConstantOperandVal(2); 3107 if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1) 3108 return false; 3109 3110 // If the inserted values will be able to use T/B already, leave it to the 3111 // existing tablegen patterns. For example VCVTT/VCVTB. 3112 SDValue Val1 = Ins1.getOperand(1); 3113 SDValue Val2 = Ins2.getOperand(1); 3114 if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND) 3115 return false; 3116 3117 // Check if the inserted values are both extracts. 3118 if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 3119 Val1.getOpcode() == ARMISD::VGETLANEu) && 3120 (Val2.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 3121 Val2.getOpcode() == ARMISD::VGETLANEu) && 3122 isa<ConstantSDNode>(Val1.getOperand(1)) && 3123 isa<ConstantSDNode>(Val2.getOperand(1)) && 3124 (Val1.getOperand(0).getValueType() == MVT::v8f16 || 3125 Val1.getOperand(0).getValueType() == MVT::v8i16) && 3126 (Val2.getOperand(0).getValueType() == MVT::v8f16 || 3127 Val2.getOperand(0).getValueType() == MVT::v8i16)) { 3128 unsigned ExtractLane1 = Val1.getConstantOperandVal(1); 3129 unsigned ExtractLane2 = Val2.getConstantOperandVal(1); 3130 3131 // If the two extracted lanes are from the same place and adjacent, this 3132 // simplifies into a f32 lane move. 3133 if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 && 3134 ExtractLane1 == ExtractLane2 + 1) { 3135 SDValue NewExt = CurDAG->getTargetExtractSubreg( 3136 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0)); 3137 SDValue NewIns = CurDAG->getTargetInsertSubreg( 3138 ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0), 3139 NewExt); 3140 ReplaceUses(Ins1, NewIns); 3141 return true; 3142 } 3143 3144 // Else v8i16 pattern of an extract and an insert, with a optional vmovx for 3145 // extracting odd lanes. 3146 if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) { 3147 SDValue Inp1 = CurDAG->getTargetExtractSubreg( 3148 ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0)); 3149 SDValue Inp2 = CurDAG->getTargetExtractSubreg( 3150 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0)); 3151 if (ExtractLane1 % 2 != 0) 3152 Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0); 3153 if (ExtractLane2 % 2 != 0) 3154 Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0); 3155 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1); 3156 SDValue NewIns = 3157 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32, 3158 Ins2.getOperand(0), SDValue(VINS, 0)); 3159 ReplaceUses(Ins1, NewIns); 3160 return true; 3161 } 3162 } 3163 3164 // The inserted values are not extracted - if they are f16 then insert them 3165 // directly using a VINS. 3166 if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) { 3167 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1); 3168 SDValue NewIns = 3169 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32, 3170 Ins2.getOperand(0), SDValue(VINS, 0)); 3171 ReplaceUses(Ins1, NewIns); 3172 return true; 3173 } 3174 3175 return false; 3176 } 3177 3178 bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N, 3179 SDNode *FMul, 3180 bool IsUnsigned, 3181 bool FixedToFloat) { 3182 auto Type = N->getValueType(0); 3183 unsigned ScalarBits = Type.getScalarSizeInBits(); 3184 if (ScalarBits > 32) 3185 return false; 3186 3187 SDNodeFlags FMulFlags = FMul->getFlags(); 3188 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is 3189 // allowed in 16 bit unsigned floats 3190 if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned) 3191 return false; 3192 3193 SDValue ImmNode = FMul->getOperand(1); 3194 SDValue VecVal = FMul->getOperand(0); 3195 if (VecVal->getOpcode() == ISD::UINT_TO_FP || 3196 VecVal->getOpcode() == ISD::SINT_TO_FP) 3197 VecVal = VecVal->getOperand(0); 3198 3199 if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits) 3200 return false; 3201 3202 if (ImmNode.getOpcode() == ISD::BITCAST) { 3203 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits) 3204 return false; 3205 ImmNode = ImmNode.getOperand(0); 3206 } 3207 3208 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits) 3209 return false; 3210 3211 APFloat ImmAPF(0.0f); 3212 switch (ImmNode.getOpcode()) { 3213 case ARMISD::VMOVIMM: 3214 case ARMISD::VDUP: { 3215 if (!isa<ConstantSDNode>(ImmNode.getOperand(0))) 3216 return false; 3217 unsigned Imm = ImmNode.getConstantOperandVal(0); 3218 if (ImmNode.getOpcode() == ARMISD::VMOVIMM) 3219 Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits); 3220 ImmAPF = 3221 APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(), 3222 APInt(ScalarBits, Imm)); 3223 break; 3224 } 3225 case ARMISD::VMOVFPIMM: { 3226 ImmAPF = APFloat(ARM_AM::getFPImmFloat(ImmNode.getConstantOperandVal(0))); 3227 break; 3228 } 3229 default: 3230 return false; 3231 } 3232 3233 // Where n is the number of fractional bits, multiplying by 2^n will convert 3234 // from float to fixed and multiplying by 2^-n will convert from fixed to 3235 // float. Taking log2 of the factor (after taking the inverse in the case of 3236 // float to fixed) will give n. 3237 APFloat ToConvert = ImmAPF; 3238 if (FixedToFloat) { 3239 if (!ImmAPF.getExactInverse(&ToConvert)) 3240 return false; 3241 } 3242 APSInt Converted(64, false); 3243 bool IsExact; 3244 ToConvert.convertToInteger(Converted, llvm::RoundingMode::NearestTiesToEven, 3245 &IsExact); 3246 if (!IsExact || !Converted.isPowerOf2()) 3247 return false; 3248 3249 unsigned FracBits = Converted.logBase2(); 3250 if (FracBits > ScalarBits) 3251 return false; 3252 3253 SmallVector<SDValue, 3> Ops{ 3254 VecVal, CurDAG->getConstant(FracBits, SDLoc(N), MVT::i32)}; 3255 AddEmptyMVEPredicateToOps(Ops, SDLoc(N), Type); 3256 3257 unsigned int Opcode; 3258 switch (ScalarBits) { 3259 case 16: 3260 if (FixedToFloat) 3261 Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix; 3262 else 3263 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix; 3264 break; 3265 case 32: 3266 if (FixedToFloat) 3267 Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix; 3268 else 3269 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix; 3270 break; 3271 default: 3272 llvm_unreachable("unexpected number of scalar bits"); 3273 break; 3274 } 3275 3276 ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), Type, Ops)); 3277 return true; 3278 } 3279 3280 bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) { 3281 // Transform a floating-point to fixed-point conversion to a VCVT 3282 if (!Subtarget->hasMVEFloatOps()) 3283 return false; 3284 EVT Type = N->getValueType(0); 3285 if (!Type.isVector()) 3286 return false; 3287 unsigned int ScalarBits = Type.getScalarSizeInBits(); 3288 3289 bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT || 3290 N->getOpcode() == ISD::FP_TO_UINT_SAT; 3291 SDNode *Node = N->getOperand(0).getNode(); 3292 3293 // floating-point to fixed-point with one fractional bit gets turned into an 3294 // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y)) 3295 if (Node->getOpcode() == ISD::FADD) { 3296 if (Node->getOperand(0) != Node->getOperand(1)) 3297 return false; 3298 SDNodeFlags Flags = Node->getFlags(); 3299 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is 3300 // allowed in 16 bit unsigned floats 3301 if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned) 3302 return false; 3303 3304 unsigned Opcode; 3305 switch (ScalarBits) { 3306 case 16: 3307 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix; 3308 break; 3309 case 32: 3310 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix; 3311 break; 3312 } 3313 SmallVector<SDValue, 3> Ops{Node->getOperand(0), 3314 CurDAG->getConstant(1, dl, MVT::i32)}; 3315 AddEmptyMVEPredicateToOps(Ops, dl, Type); 3316 3317 ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops)); 3318 return true; 3319 } 3320 3321 if (Node->getOpcode() != ISD::FMUL) 3322 return false; 3323 3324 return transformFixedFloatingPointConversion(N, Node, IsUnsigned, false); 3325 } 3326 3327 bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) { 3328 // Transform a fixed-point to floating-point conversion to a VCVT 3329 if (!Subtarget->hasMVEFloatOps()) 3330 return false; 3331 auto Type = N->getValueType(0); 3332 if (!Type.isVector()) 3333 return false; 3334 3335 auto LHS = N->getOperand(0); 3336 if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP) 3337 return false; 3338 3339 return transformFixedFloatingPointConversion( 3340 N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true); 3341 } 3342 3343 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { 3344 if (!Subtarget->hasV6T2Ops()) 3345 return false; 3346 3347 unsigned Opc = isSigned 3348 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) 3349 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); 3350 SDLoc dl(N); 3351 3352 // For unsigned extracts, check for a shift right and mask 3353 unsigned And_imm = 0; 3354 if (N->getOpcode() == ISD::AND) { 3355 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) { 3356 3357 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 3358 if (And_imm & (And_imm + 1)) 3359 return false; 3360 3361 unsigned Srl_imm = 0; 3362 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, 3363 Srl_imm)) { 3364 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3365 3366 // Mask off the unnecessary bits of the AND immediate; normally 3367 // DAGCombine will do this, but that might not happen if 3368 // targetShrinkDemandedConstant chooses a different immediate. 3369 And_imm &= -1U >> Srl_imm; 3370 3371 // Note: The width operand is encoded as width-1. 3372 unsigned Width = llvm::countr_one(And_imm) - 1; 3373 unsigned LSB = Srl_imm; 3374 3375 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3376 3377 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) { 3378 // It's cheaper to use a right shift to extract the top bits. 3379 if (Subtarget->isThumb()) { 3380 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri; 3381 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3382 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3383 getAL(CurDAG, dl), Reg0, Reg0 }; 3384 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3385 return true; 3386 } 3387 3388 // ARM models shift instructions as MOVsi with shifter operand. 3389 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL); 3390 SDValue ShOpc = 3391 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl, 3392 MVT::i32); 3393 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, 3394 getAL(CurDAG, dl), Reg0, Reg0 }; 3395 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops); 3396 return true; 3397 } 3398 3399 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3400 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3401 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3402 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3403 getAL(CurDAG, dl), Reg0 }; 3404 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3405 return true; 3406 } 3407 } 3408 return false; 3409 } 3410 3411 // Otherwise, we're looking for a shift of a shift 3412 unsigned Shl_imm = 0; 3413 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { 3414 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); 3415 unsigned Srl_imm = 0; 3416 if (isInt32Immediate(N->getOperand(1), Srl_imm)) { 3417 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3418 // Note: The width operand is encoded as width-1. 3419 unsigned Width = 32 - Srl_imm - 1; 3420 int LSB = Srl_imm - Shl_imm; 3421 if (LSB < 0) 3422 return false; 3423 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3424 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3425 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3426 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3427 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3428 getAL(CurDAG, dl), Reg0 }; 3429 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3430 return true; 3431 } 3432 } 3433 3434 // Or we are looking for a shift of an and, with a mask operand 3435 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) && 3436 isShiftedMask_32(And_imm)) { 3437 unsigned Srl_imm = 0; 3438 unsigned LSB = llvm::countr_zero(And_imm); 3439 // Shift must be the same as the ands lsb 3440 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) { 3441 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3442 unsigned MSB = llvm::Log2_32(And_imm); 3443 // Note: The width operand is encoded as width-1. 3444 unsigned Width = MSB - LSB; 3445 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3446 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3447 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3448 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32), 3449 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3450 getAL(CurDAG, dl), Reg0 }; 3451 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3452 return true; 3453 } 3454 } 3455 3456 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) { 3457 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 3458 unsigned LSB = 0; 3459 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) && 3460 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB)) 3461 return false; 3462 3463 if (LSB + Width > 32) 3464 return false; 3465 3466 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3467 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx"); 3468 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3469 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3470 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32), 3471 getAL(CurDAG, dl), Reg0 }; 3472 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3473 return true; 3474 } 3475 3476 return false; 3477 } 3478 3479 /// Target-specific DAG combining for ISD::SUB. 3480 /// Target-independent combining lowers SELECT_CC nodes of the form 3481 /// select_cc setg[ge] X, 0, X, -X 3482 /// select_cc setgt X, -1, X, -X 3483 /// select_cc setl[te] X, 0, -X, X 3484 /// select_cc setlt X, 1, -X, X 3485 /// which represent Integer ABS into: 3486 /// Y = sra (X, size(X)-1); sub (xor (X, Y), Y) 3487 /// ARM instruction selection detects the latter and matches it to 3488 /// ARM::ABS or ARM::t2ABS machine node. 3489 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){ 3490 SDValue SUBSrc0 = N->getOperand(0); 3491 SDValue SUBSrc1 = N->getOperand(1); 3492 EVT VT = N->getValueType(0); 3493 3494 if (Subtarget->isThumb1Only()) 3495 return false; 3496 3497 if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA) 3498 return false; 3499 3500 SDValue XORSrc0 = SUBSrc0.getOperand(0); 3501 SDValue XORSrc1 = SUBSrc0.getOperand(1); 3502 SDValue SRASrc0 = SUBSrc1.getOperand(0); 3503 SDValue SRASrc1 = SUBSrc1.getOperand(1); 3504 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1); 3505 EVT XType = SRASrc0.getValueType(); 3506 unsigned Size = XType.getSizeInBits() - 1; 3507 3508 if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() && 3509 SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) { 3510 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; 3511 CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0); 3512 return true; 3513 } 3514 3515 return false; 3516 } 3517 3518 /// We've got special pseudo-instructions for these 3519 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 3520 unsigned Opcode; 3521 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 3522 if (MemTy == MVT::i8) 3523 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8; 3524 else if (MemTy == MVT::i16) 3525 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16; 3526 else if (MemTy == MVT::i32) 3527 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32; 3528 else 3529 llvm_unreachable("Unknown AtomicCmpSwap type"); 3530 3531 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 3532 N->getOperand(0)}; 3533 SDNode *CmpSwap = CurDAG->getMachineNode( 3534 Opcode, SDLoc(N), 3535 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops); 3536 3537 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 3538 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 3539 3540 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 3541 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 3542 CurDAG->RemoveDeadNode(N); 3543 } 3544 3545 static std::optional<std::pair<unsigned, unsigned>> 3546 getContiguousRangeOfSetBits(const APInt &A) { 3547 unsigned FirstOne = A.getBitWidth() - A.countl_zero() - 1; 3548 unsigned LastOne = A.countr_zero(); 3549 if (A.popcount() != (FirstOne - LastOne + 1)) 3550 return std::nullopt; 3551 return std::make_pair(FirstOne, LastOne); 3552 } 3553 3554 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) { 3555 assert(N->getOpcode() == ARMISD::CMPZ); 3556 SwitchEQNEToPLMI = false; 3557 3558 if (!Subtarget->isThumb()) 3559 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and 3560 // LSR don't exist as standalone instructions - they need the barrel shifter. 3561 return; 3562 3563 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X)) 3564 SDValue And = N->getOperand(0); 3565 if (!And->hasOneUse()) 3566 return; 3567 3568 SDValue Zero = N->getOperand(1); 3569 if (!isNullConstant(Zero) || And->getOpcode() != ISD::AND) 3570 return; 3571 SDValue X = And.getOperand(0); 3572 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1)); 3573 3574 if (!C) 3575 return; 3576 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue()); 3577 if (!Range) 3578 return; 3579 3580 // There are several ways to lower this: 3581 SDNode *NewN; 3582 SDLoc dl(N); 3583 3584 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* { 3585 if (Subtarget->isThumb2()) { 3586 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri; 3587 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3588 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3589 CurDAG->getRegister(0, MVT::i32) }; 3590 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3591 } else { 3592 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src, 3593 CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3594 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 3595 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3596 } 3597 }; 3598 3599 if (Range->second == 0) { 3600 // 1. Mask includes the LSB -> Simply shift the top N bits off 3601 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3602 ReplaceNode(And.getNode(), NewN); 3603 } else if (Range->first == 31) { 3604 // 2. Mask includes the MSB -> Simply shift the bottom N bits off 3605 NewN = EmitShift(ARM::tLSRri, X, Range->second); 3606 ReplaceNode(And.getNode(), NewN); 3607 } else if (Range->first == Range->second) { 3608 // 3. Only one bit is set. We can shift this into the sign bit and use a 3609 // PL/MI comparison. 3610 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3611 ReplaceNode(And.getNode(), NewN); 3612 3613 SwitchEQNEToPLMI = true; 3614 } else if (!Subtarget->hasV6T2Ops()) { 3615 // 4. Do a double shift to clear bottom and top bits, but only in 3616 // thumb-1 mode as in thumb-2 we can use UBFX. 3617 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3618 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0), 3619 Range->second + (31 - Range->first)); 3620 ReplaceNode(And.getNode(), NewN); 3621 } 3622 } 3623 3624 static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3], 3625 unsigned Opc128[3]) { 3626 assert((VT.is64BitVector() || VT.is128BitVector()) && 3627 "Unexpected vector shuffle length"); 3628 switch (VT.getScalarSizeInBits()) { 3629 default: 3630 llvm_unreachable("Unexpected vector shuffle element size"); 3631 case 8: 3632 return VT.is64BitVector() ? Opc64[0] : Opc128[0]; 3633 case 16: 3634 return VT.is64BitVector() ? Opc64[1] : Opc128[1]; 3635 case 32: 3636 return VT.is64BitVector() ? Opc64[2] : Opc128[2]; 3637 } 3638 } 3639 3640 void ARMDAGToDAGISel::Select(SDNode *N) { 3641 SDLoc dl(N); 3642 3643 if (N->isMachineOpcode()) { 3644 N->setNodeId(-1); 3645 return; // Already selected. 3646 } 3647 3648 switch (N->getOpcode()) { 3649 default: break; 3650 case ISD::STORE: { 3651 // For Thumb1, match an sp-relative store in C++. This is a little 3652 // unfortunate, but I don't think I can make the chain check work 3653 // otherwise. (The chain of the store has to be the same as the chain 3654 // of the CopyFromReg, or else we can't replace the CopyFromReg with 3655 // a direct reference to "SP".) 3656 // 3657 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use 3658 // a different addressing mode from other four-byte stores. 3659 // 3660 // This pattern usually comes up with call arguments. 3661 StoreSDNode *ST = cast<StoreSDNode>(N); 3662 SDValue Ptr = ST->getBasePtr(); 3663 if (Subtarget->isThumb1Only() && ST->isUnindexed()) { 3664 int RHSC = 0; 3665 if (Ptr.getOpcode() == ISD::ADD && 3666 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) 3667 Ptr = Ptr.getOperand(0); 3668 3669 if (Ptr.getOpcode() == ISD::CopyFromReg && 3670 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP && 3671 Ptr.getOperand(0) == ST->getChain()) { 3672 SDValue Ops[] = {ST->getValue(), 3673 CurDAG->getRegister(ARM::SP, MVT::i32), 3674 CurDAG->getTargetConstant(RHSC, dl, MVT::i32), 3675 getAL(CurDAG, dl), 3676 CurDAG->getRegister(0, MVT::i32), 3677 ST->getChain()}; 3678 MachineSDNode *ResNode = 3679 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops); 3680 MachineMemOperand *MemOp = ST->getMemOperand(); 3681 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3682 ReplaceNode(N, ResNode); 3683 return; 3684 } 3685 } 3686 break; 3687 } 3688 case ISD::WRITE_REGISTER: 3689 if (tryWriteRegister(N)) 3690 return; 3691 break; 3692 case ISD::READ_REGISTER: 3693 if (tryReadRegister(N)) 3694 return; 3695 break; 3696 case ISD::INLINEASM: 3697 case ISD::INLINEASM_BR: 3698 if (tryInlineAsm(N)) 3699 return; 3700 break; 3701 case ISD::SUB: 3702 // Select special operations if SUB node forms integer ABS pattern 3703 if (tryABSOp(N)) 3704 return; 3705 // Other cases are autogenerated. 3706 break; 3707 case ISD::Constant: { 3708 unsigned Val = N->getAsZExtVal(); 3709 // If we can't materialize the constant we need to use a literal pool 3710 if (ConstantMaterializationCost(Val, Subtarget) > 2 && 3711 !Subtarget->genExecuteOnly()) { 3712 SDValue CPIdx = CurDAG->getTargetConstantPool( 3713 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), 3714 TLI->getPointerTy(CurDAG->getDataLayout())); 3715 3716 SDNode *ResNode; 3717 if (Subtarget->isThumb()) { 3718 SDValue Ops[] = { 3719 CPIdx, 3720 getAL(CurDAG, dl), 3721 CurDAG->getRegister(0, MVT::i32), 3722 CurDAG->getEntryNode() 3723 }; 3724 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, 3725 Ops); 3726 } else { 3727 SDValue Ops[] = { 3728 CPIdx, 3729 CurDAG->getTargetConstant(0, dl, MVT::i32), 3730 getAL(CurDAG, dl), 3731 CurDAG->getRegister(0, MVT::i32), 3732 CurDAG->getEntryNode() 3733 }; 3734 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, 3735 Ops); 3736 } 3737 // Annotate the Node with memory operand information so that MachineInstr 3738 // queries work properly. This e.g. gives the register allocation the 3739 // required information for rematerialization. 3740 MachineFunction& MF = CurDAG->getMachineFunction(); 3741 MachineMemOperand *MemOp = 3742 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF), 3743 MachineMemOperand::MOLoad, 4, Align(4)); 3744 3745 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3746 3747 ReplaceNode(N, ResNode); 3748 return; 3749 } 3750 3751 // Other cases are autogenerated. 3752 break; 3753 } 3754 case ISD::FrameIndex: { 3755 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. 3756 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 3757 SDValue TFI = CurDAG->getTargetFrameIndex( 3758 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3759 if (Subtarget->isThumb1Only()) { 3760 // Set the alignment of the frame object to 4, to avoid having to generate 3761 // more than one ADD 3762 MachineFrameInfo &MFI = MF->getFrameInfo(); 3763 if (MFI.getObjectAlign(FI) < Align(4)) 3764 MFI.setObjectAlignment(FI, Align(4)); 3765 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, 3766 CurDAG->getTargetConstant(0, dl, MVT::i32)); 3767 return; 3768 } else { 3769 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? 3770 ARM::t2ADDri : ARM::ADDri); 3771 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32), 3772 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3773 CurDAG->getRegister(0, MVT::i32) }; 3774 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3775 return; 3776 } 3777 } 3778 case ISD::INSERT_VECTOR_ELT: { 3779 if (tryInsertVectorElt(N)) 3780 return; 3781 break; 3782 } 3783 case ISD::SRL: 3784 if (tryV6T2BitfieldExtractOp(N, false)) 3785 return; 3786 break; 3787 case ISD::SIGN_EXTEND_INREG: 3788 case ISD::SRA: 3789 if (tryV6T2BitfieldExtractOp(N, true)) 3790 return; 3791 break; 3792 case ISD::FP_TO_UINT: 3793 case ISD::FP_TO_SINT: 3794 case ISD::FP_TO_UINT_SAT: 3795 case ISD::FP_TO_SINT_SAT: 3796 if (tryFP_TO_INT(N, dl)) 3797 return; 3798 break; 3799 case ISD::FMUL: 3800 if (tryFMULFixed(N, dl)) 3801 return; 3802 break; 3803 case ISD::MUL: 3804 if (Subtarget->isThumb1Only()) 3805 break; 3806 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 3807 unsigned RHSV = C->getZExtValue(); 3808 if (!RHSV) break; 3809 if (isPowerOf2_32(RHSV-1)) { // 2^n+1? 3810 unsigned ShImm = Log2_32(RHSV-1); 3811 if (ShImm >= 32) 3812 break; 3813 SDValue V = N->getOperand(0); 3814 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3815 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3816 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3817 if (Subtarget->isThumb()) { 3818 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3819 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops); 3820 return; 3821 } else { 3822 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3823 Reg0 }; 3824 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops); 3825 return; 3826 } 3827 } 3828 if (isPowerOf2_32(RHSV+1)) { // 2^n-1? 3829 unsigned ShImm = Log2_32(RHSV+1); 3830 if (ShImm >= 32) 3831 break; 3832 SDValue V = N->getOperand(0); 3833 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3834 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3835 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3836 if (Subtarget->isThumb()) { 3837 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3838 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops); 3839 return; 3840 } else { 3841 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3842 Reg0 }; 3843 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops); 3844 return; 3845 } 3846 } 3847 } 3848 break; 3849 case ISD::AND: { 3850 // Check for unsigned bitfield extract 3851 if (tryV6T2BitfieldExtractOp(N, false)) 3852 return; 3853 3854 // If an immediate is used in an AND node, it is possible that the immediate 3855 // can be more optimally materialized when negated. If this is the case we 3856 // can negate the immediate and use a BIC instead. 3857 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 3858 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) { 3859 uint32_t Imm = (uint32_t) N1C->getZExtValue(); 3860 3861 // In Thumb2 mode, an AND can take a 12-bit immediate. If this 3862 // immediate can be negated and fit in the immediate operand of 3863 // a t2BIC, don't do any manual transform here as this can be 3864 // handled by the generic ISel machinery. 3865 bool PreferImmediateEncoding = 3866 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm)); 3867 if (!PreferImmediateEncoding && 3868 ConstantMaterializationCost(Imm, Subtarget) > 3869 ConstantMaterializationCost(~Imm, Subtarget)) { 3870 // The current immediate costs more to materialize than a negated 3871 // immediate, so negate the immediate and use a BIC. 3872 SDValue NewImm = CurDAG->getConstant(~Imm, dl, MVT::i32); 3873 // If the new constant didn't exist before, reposition it in the topological 3874 // ordering so it is just before N. Otherwise, don't touch its location. 3875 if (NewImm->getNodeId() == -1) 3876 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode()); 3877 3878 if (!Subtarget->hasThumb2()) { 3879 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), 3880 N->getOperand(0), NewImm, getAL(CurDAG, dl), 3881 CurDAG->getRegister(0, MVT::i32)}; 3882 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops)); 3883 return; 3884 } else { 3885 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl), 3886 CurDAG->getRegister(0, MVT::i32), 3887 CurDAG->getRegister(0, MVT::i32)}; 3888 ReplaceNode(N, 3889 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops)); 3890 return; 3891 } 3892 } 3893 } 3894 3895 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits 3896 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits 3897 // are entirely contributed by c2 and lower 16-bits are entirely contributed 3898 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)). 3899 // Select it to: "movt x, ((c1 & 0xffff) >> 16) 3900 EVT VT = N->getValueType(0); 3901 if (VT != MVT::i32) 3902 break; 3903 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2()) 3904 ? ARM::t2MOVTi16 3905 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0); 3906 if (!Opc) 3907 break; 3908 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 3909 N1C = dyn_cast<ConstantSDNode>(N1); 3910 if (!N1C) 3911 break; 3912 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) { 3913 SDValue N2 = N0.getOperand(1); 3914 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 3915 if (!N2C) 3916 break; 3917 unsigned N1CVal = N1C->getZExtValue(); 3918 unsigned N2CVal = N2C->getZExtValue(); 3919 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) && 3920 (N1CVal & 0xffffU) == 0xffffU && 3921 (N2CVal & 0xffffU) == 0x0U) { 3922 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16, 3923 dl, MVT::i32); 3924 SDValue Ops[] = { N0.getOperand(0), Imm16, 3925 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 3926 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 3927 return; 3928 } 3929 } 3930 3931 break; 3932 } 3933 case ARMISD::UMAAL: { 3934 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; 3935 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 3936 N->getOperand(2), N->getOperand(3), 3937 getAL(CurDAG, dl), 3938 CurDAG->getRegister(0, MVT::i32) }; 3939 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops)); 3940 return; 3941 } 3942 case ARMISD::UMLAL:{ 3943 if (Subtarget->isThumb()) { 3944 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3945 N->getOperand(3), getAL(CurDAG, dl), 3946 CurDAG->getRegister(0, MVT::i32)}; 3947 ReplaceNode( 3948 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops)); 3949 return; 3950 }else{ 3951 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3952 N->getOperand(3), getAL(CurDAG, dl), 3953 CurDAG->getRegister(0, MVT::i32), 3954 CurDAG->getRegister(0, MVT::i32) }; 3955 ReplaceNode(N, CurDAG->getMachineNode( 3956 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl, 3957 MVT::i32, MVT::i32, Ops)); 3958 return; 3959 } 3960 } 3961 case ARMISD::SMLAL:{ 3962 if (Subtarget->isThumb()) { 3963 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3964 N->getOperand(3), getAL(CurDAG, dl), 3965 CurDAG->getRegister(0, MVT::i32)}; 3966 ReplaceNode( 3967 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops)); 3968 return; 3969 }else{ 3970 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3971 N->getOperand(3), getAL(CurDAG, dl), 3972 CurDAG->getRegister(0, MVT::i32), 3973 CurDAG->getRegister(0, MVT::i32) }; 3974 ReplaceNode(N, CurDAG->getMachineNode( 3975 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl, 3976 MVT::i32, MVT::i32, Ops)); 3977 return; 3978 } 3979 } 3980 case ARMISD::SUBE: { 3981 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) 3982 break; 3983 // Look for a pattern to match SMMLS 3984 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b)))) 3985 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI || 3986 N->getOperand(2).getOpcode() != ARMISD::SUBC || 3987 !SDValue(N, 1).use_empty()) 3988 break; 3989 3990 if (Subtarget->isThumb()) 3991 assert(Subtarget->hasThumb2() && 3992 "This pattern should not be generated for Thumb"); 3993 3994 SDValue SmulLoHi = N->getOperand(1); 3995 SDValue Subc = N->getOperand(2); 3996 SDValue Zero = Subc.getOperand(0); 3997 3998 if (!isNullConstant(Zero) || Subc.getOperand(1) != SmulLoHi.getValue(0) || 3999 N->getOperand(1) != SmulLoHi.getValue(1) || 4000 N->getOperand(2) != Subc.getValue(1)) 4001 break; 4002 4003 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS; 4004 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1), 4005 N->getOperand(0), getAL(CurDAG, dl), 4006 CurDAG->getRegister(0, MVT::i32) }; 4007 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops)); 4008 return; 4009 } 4010 case ISD::LOAD: { 4011 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 4012 return; 4013 if (Subtarget->isThumb() && Subtarget->hasThumb2()) { 4014 if (tryT2IndexedLoad(N)) 4015 return; 4016 } else if (Subtarget->isThumb()) { 4017 if (tryT1IndexedLoad(N)) 4018 return; 4019 } else if (tryARMIndexedLoad(N)) 4020 return; 4021 // Other cases are autogenerated. 4022 break; 4023 } 4024 case ISD::MLOAD: 4025 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 4026 return; 4027 // Other cases are autogenerated. 4028 break; 4029 case ARMISD::WLSSETUP: { 4030 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32, 4031 N->getOperand(0)); 4032 ReplaceUses(N, New); 4033 CurDAG->RemoveDeadNode(N); 4034 return; 4035 } 4036 case ARMISD::WLS: { 4037 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other, 4038 N->getOperand(1), N->getOperand(2), 4039 N->getOperand(0)); 4040 ReplaceUses(N, New); 4041 CurDAG->RemoveDeadNode(N); 4042 return; 4043 } 4044 case ARMISD::LE: { 4045 SDValue Ops[] = { N->getOperand(1), 4046 N->getOperand(2), 4047 N->getOperand(0) }; 4048 unsigned Opc = ARM::t2LoopEnd; 4049 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 4050 ReplaceUses(N, New); 4051 CurDAG->RemoveDeadNode(N); 4052 return; 4053 } 4054 case ARMISD::LDRD: { 4055 if (Subtarget->isThumb2()) 4056 break; // TableGen handles isel in this case. 4057 SDValue Base, RegOffset, ImmOffset; 4058 const SDValue &Chain = N->getOperand(0); 4059 const SDValue &Addr = N->getOperand(1); 4060 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); 4061 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { 4062 // The register-offset variant of LDRD mandates that the register 4063 // allocated to RegOffset is not reused in any of the remaining operands. 4064 // This restriction is currently not enforced. Therefore emitting this 4065 // variant is explicitly avoided. 4066 Base = Addr; 4067 RegOffset = CurDAG->getRegister(0, MVT::i32); 4068 } 4069 SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain}; 4070 SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl, 4071 {MVT::Untyped, MVT::Other}, Ops); 4072 SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 4073 SDValue(New, 0)); 4074 SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 4075 SDValue(New, 0)); 4076 transferMemOperands(N, New); 4077 ReplaceUses(SDValue(N, 0), Lo); 4078 ReplaceUses(SDValue(N, 1), Hi); 4079 ReplaceUses(SDValue(N, 2), SDValue(New, 1)); 4080 CurDAG->RemoveDeadNode(N); 4081 return; 4082 } 4083 case ARMISD::STRD: { 4084 if (Subtarget->isThumb2()) 4085 break; // TableGen handles isel in this case. 4086 SDValue Base, RegOffset, ImmOffset; 4087 const SDValue &Chain = N->getOperand(0); 4088 const SDValue &Addr = N->getOperand(3); 4089 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); 4090 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { 4091 // The register-offset variant of STRD mandates that the register 4092 // allocated to RegOffset is not reused in any of the remaining operands. 4093 // This restriction is currently not enforced. Therefore emitting this 4094 // variant is explicitly avoided. 4095 Base = Addr; 4096 RegOffset = CurDAG->getRegister(0, MVT::i32); 4097 } 4098 SDNode *RegPair = 4099 createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2)); 4100 SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain}; 4101 SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops); 4102 transferMemOperands(N, New); 4103 ReplaceUses(SDValue(N, 0), SDValue(New, 0)); 4104 CurDAG->RemoveDeadNode(N); 4105 return; 4106 } 4107 case ARMISD::LOOP_DEC: { 4108 SDValue Ops[] = { N->getOperand(1), 4109 N->getOperand(2), 4110 N->getOperand(0) }; 4111 SDNode *Dec = 4112 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 4113 CurDAG->getVTList(MVT::i32, MVT::Other), Ops); 4114 ReplaceUses(N, Dec); 4115 CurDAG->RemoveDeadNode(N); 4116 return; 4117 } 4118 case ARMISD::BRCOND: { 4119 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 4120 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) 4121 // Pattern complexity = 6 cost = 1 size = 0 4122 4123 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 4124 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc) 4125 // Pattern complexity = 6 cost = 1 size = 0 4126 4127 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 4128 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc) 4129 // Pattern complexity = 6 cost = 1 size = 0 4130 4131 unsigned Opc = Subtarget->isThumb() ? 4132 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; 4133 SDValue Chain = N->getOperand(0); 4134 SDValue N1 = N->getOperand(1); 4135 SDValue N2 = N->getOperand(2); 4136 SDValue N3 = N->getOperand(3); 4137 SDValue InGlue = N->getOperand(4); 4138 assert(N1.getOpcode() == ISD::BasicBlock); 4139 assert(N2.getOpcode() == ISD::Constant); 4140 assert(N3.getOpcode() == ISD::Register); 4141 4142 unsigned CC = (unsigned)N2->getAsZExtVal(); 4143 4144 if (InGlue.getOpcode() == ARMISD::CMPZ) { 4145 if (InGlue.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) { 4146 SDValue Int = InGlue.getOperand(0); 4147 uint64_t ID = Int->getConstantOperandVal(1); 4148 4149 // Handle low-overhead loops. 4150 if (ID == Intrinsic::loop_decrement_reg) { 4151 SDValue Elements = Int.getOperand(2); 4152 SDValue Size = CurDAG->getTargetConstant(Int.getConstantOperandVal(3), 4153 dl, MVT::i32); 4154 4155 SDValue Args[] = { Elements, Size, Int.getOperand(0) }; 4156 SDNode *LoopDec = 4157 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 4158 CurDAG->getVTList(MVT::i32, MVT::Other), 4159 Args); 4160 ReplaceUses(Int.getNode(), LoopDec); 4161 4162 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain }; 4163 SDNode *LoopEnd = 4164 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs); 4165 4166 ReplaceUses(N, LoopEnd); 4167 CurDAG->RemoveDeadNode(N); 4168 CurDAG->RemoveDeadNode(InGlue.getNode()); 4169 CurDAG->RemoveDeadNode(Int.getNode()); 4170 return; 4171 } 4172 } 4173 4174 bool SwitchEQNEToPLMI; 4175 SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI); 4176 InGlue = N->getOperand(4); 4177 4178 if (SwitchEQNEToPLMI) { 4179 switch ((ARMCC::CondCodes)CC) { 4180 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 4181 case ARMCC::NE: 4182 CC = (unsigned)ARMCC::MI; 4183 break; 4184 case ARMCC::EQ: 4185 CC = (unsigned)ARMCC::PL; 4186 break; 4187 } 4188 } 4189 } 4190 4191 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32); 4192 SDValue Ops[] = { N1, Tmp2, N3, Chain, InGlue }; 4193 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, 4194 MVT::Glue, Ops); 4195 Chain = SDValue(ResNode, 0); 4196 if (N->getNumValues() == 2) { 4197 InGlue = SDValue(ResNode, 1); 4198 ReplaceUses(SDValue(N, 1), InGlue); 4199 } 4200 ReplaceUses(SDValue(N, 0), 4201 SDValue(Chain.getNode(), Chain.getResNo())); 4202 CurDAG->RemoveDeadNode(N); 4203 return; 4204 } 4205 4206 case ARMISD::CMPZ: { 4207 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0) 4208 // This allows us to avoid materializing the expensive negative constant. 4209 // The CMPZ #0 is useless and will be peepholed away but we need to keep it 4210 // for its glue output. 4211 SDValue X = N->getOperand(0); 4212 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode()); 4213 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) { 4214 int64_t Addend = -C->getSExtValue(); 4215 4216 SDNode *Add = nullptr; 4217 // ADDS can be better than CMN if the immediate fits in a 4218 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3. 4219 // Outside that range we can just use a CMN which is 32-bit but has a 4220 // 12-bit immediate range. 4221 if (Addend < 1<<8) { 4222 if (Subtarget->isThumb2()) { 4223 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32), 4224 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 4225 CurDAG->getRegister(0, MVT::i32) }; 4226 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops); 4227 } else { 4228 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8; 4229 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, 4230 CurDAG->getTargetConstant(Addend, dl, MVT::i32), 4231 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 4232 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 4233 } 4234 } 4235 if (Add) { 4236 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)}; 4237 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2); 4238 } 4239 } 4240 // Other cases are autogenerated. 4241 break; 4242 } 4243 4244 case ARMISD::CMOV: { 4245 SDValue InGlue = N->getOperand(4); 4246 4247 if (InGlue.getOpcode() == ARMISD::CMPZ) { 4248 bool SwitchEQNEToPLMI; 4249 SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI); 4250 4251 if (SwitchEQNEToPLMI) { 4252 SDValue ARMcc = N->getOperand(2); 4253 ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc->getAsZExtVal(); 4254 4255 switch (CC) { 4256 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 4257 case ARMCC::NE: 4258 CC = ARMCC::MI; 4259 break; 4260 case ARMCC::EQ: 4261 CC = ARMCC::PL; 4262 break; 4263 } 4264 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32); 4265 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc, 4266 N->getOperand(3), N->getOperand(4)}; 4267 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops); 4268 } 4269 4270 } 4271 // Other cases are autogenerated. 4272 break; 4273 } 4274 case ARMISD::VZIP: { 4275 EVT VT = N->getValueType(0); 4276 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 4277 unsigned Opc64[] = {ARM::VZIPd8, ARM::VZIPd16, ARM::VTRNd32}; 4278 unsigned Opc128[] = {ARM::VZIPq8, ARM::VZIPq16, ARM::VZIPq32}; 4279 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128); 4280 SDValue Pred = getAL(CurDAG, dl); 4281 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4282 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg}; 4283 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4284 return; 4285 } 4286 case ARMISD::VUZP: { 4287 EVT VT = N->getValueType(0); 4288 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 4289 unsigned Opc64[] = {ARM::VUZPd8, ARM::VUZPd16, ARM::VTRNd32}; 4290 unsigned Opc128[] = {ARM::VUZPq8, ARM::VUZPq16, ARM::VUZPq32}; 4291 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128); 4292 SDValue Pred = getAL(CurDAG, dl); 4293 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4294 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg}; 4295 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4296 return; 4297 } 4298 case ARMISD::VTRN: { 4299 EVT VT = N->getValueType(0); 4300 unsigned Opc64[] = {ARM::VTRNd8, ARM::VTRNd16, ARM::VTRNd32}; 4301 unsigned Opc128[] = {ARM::VTRNq8, ARM::VTRNq16, ARM::VTRNq32}; 4302 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128); 4303 SDValue Pred = getAL(CurDAG, dl); 4304 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4305 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg}; 4306 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4307 return; 4308 } 4309 case ARMISD::BUILD_VECTOR: { 4310 EVT VecVT = N->getValueType(0); 4311 EVT EltVT = VecVT.getVectorElementType(); 4312 unsigned NumElts = VecVT.getVectorNumElements(); 4313 if (EltVT == MVT::f64) { 4314 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); 4315 ReplaceNode( 4316 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 4317 return; 4318 } 4319 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); 4320 if (NumElts == 2) { 4321 ReplaceNode( 4322 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 4323 return; 4324 } 4325 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); 4326 ReplaceNode(N, 4327 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), 4328 N->getOperand(2), N->getOperand(3))); 4329 return; 4330 } 4331 4332 case ARMISD::VLD1DUP: { 4333 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16, 4334 ARM::VLD1DUPd32 }; 4335 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16, 4336 ARM::VLD1DUPq32 }; 4337 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes); 4338 return; 4339 } 4340 4341 case ARMISD::VLD2DUP: { 4342 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4343 ARM::VLD2DUPd32 }; 4344 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes); 4345 return; 4346 } 4347 4348 case ARMISD::VLD3DUP: { 4349 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo, 4350 ARM::VLD3DUPd16Pseudo, 4351 ARM::VLD3DUPd32Pseudo }; 4352 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes); 4353 return; 4354 } 4355 4356 case ARMISD::VLD4DUP: { 4357 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo, 4358 ARM::VLD4DUPd16Pseudo, 4359 ARM::VLD4DUPd32Pseudo }; 4360 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes); 4361 return; 4362 } 4363 4364 case ARMISD::VLD1DUP_UPD: { 4365 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed, 4366 ARM::VLD1DUPd16wb_fixed, 4367 ARM::VLD1DUPd32wb_fixed }; 4368 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed, 4369 ARM::VLD1DUPq16wb_fixed, 4370 ARM::VLD1DUPq32wb_fixed }; 4371 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes); 4372 return; 4373 } 4374 4375 case ARMISD::VLD2DUP_UPD: { 4376 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed, 4377 ARM::VLD2DUPd16wb_fixed, 4378 ARM::VLD2DUPd32wb_fixed, 4379 ARM::VLD1q64wb_fixed }; 4380 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, 4381 ARM::VLD2DUPq16EvenPseudo, 4382 ARM::VLD2DUPq32EvenPseudo }; 4383 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed, 4384 ARM::VLD2DUPq16OddPseudoWB_fixed, 4385 ARM::VLD2DUPq32OddPseudoWB_fixed }; 4386 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, DOpcodes, QOpcodes0, QOpcodes1); 4387 return; 4388 } 4389 4390 case ARMISD::VLD3DUP_UPD: { 4391 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, 4392 ARM::VLD3DUPd16Pseudo_UPD, 4393 ARM::VLD3DUPd32Pseudo_UPD, 4394 ARM::VLD1d64TPseudoWB_fixed }; 4395 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, 4396 ARM::VLD3DUPq16EvenPseudo, 4397 ARM::VLD3DUPq32EvenPseudo }; 4398 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD, 4399 ARM::VLD3DUPq16OddPseudo_UPD, 4400 ARM::VLD3DUPq32OddPseudo_UPD }; 4401 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4402 return; 4403 } 4404 4405 case ARMISD::VLD4DUP_UPD: { 4406 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, 4407 ARM::VLD4DUPd16Pseudo_UPD, 4408 ARM::VLD4DUPd32Pseudo_UPD, 4409 ARM::VLD1d64QPseudoWB_fixed }; 4410 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, 4411 ARM::VLD4DUPq16EvenPseudo, 4412 ARM::VLD4DUPq32EvenPseudo }; 4413 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD, 4414 ARM::VLD4DUPq16OddPseudo_UPD, 4415 ARM::VLD4DUPq32OddPseudo_UPD }; 4416 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4417 return; 4418 } 4419 4420 case ARMISD::VLD1_UPD: { 4421 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed, 4422 ARM::VLD1d16wb_fixed, 4423 ARM::VLD1d32wb_fixed, 4424 ARM::VLD1d64wb_fixed }; 4425 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed, 4426 ARM::VLD1q16wb_fixed, 4427 ARM::VLD1q32wb_fixed, 4428 ARM::VLD1q64wb_fixed }; 4429 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr); 4430 return; 4431 } 4432 4433 case ARMISD::VLD2_UPD: { 4434 if (Subtarget->hasNEON()) { 4435 static const uint16_t DOpcodes[] = { 4436 ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed, 4437 ARM::VLD1q64wb_fixed}; 4438 static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed, 4439 ARM::VLD2q16PseudoWB_fixed, 4440 ARM::VLD2q32PseudoWB_fixed}; 4441 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 4442 } else { 4443 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, 4444 ARM::MVE_VLD21_8_wb}; 4445 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 4446 ARM::MVE_VLD21_16_wb}; 4447 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 4448 ARM::MVE_VLD21_32_wb}; 4449 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4450 SelectMVE_VLD(N, 2, Opcodes, true); 4451 } 4452 return; 4453 } 4454 4455 case ARMISD::VLD3_UPD: { 4456 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, 4457 ARM::VLD3d16Pseudo_UPD, 4458 ARM::VLD3d32Pseudo_UPD, 4459 ARM::VLD1d64TPseudoWB_fixed}; 4460 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4461 ARM::VLD3q16Pseudo_UPD, 4462 ARM::VLD3q32Pseudo_UPD }; 4463 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, 4464 ARM::VLD3q16oddPseudo_UPD, 4465 ARM::VLD3q32oddPseudo_UPD }; 4466 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4467 return; 4468 } 4469 4470 case ARMISD::VLD4_UPD: { 4471 if (Subtarget->hasNEON()) { 4472 static const uint16_t DOpcodes[] = { 4473 ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD, 4474 ARM::VLD1d64QPseudoWB_fixed}; 4475 static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD, 4476 ARM::VLD4q16Pseudo_UPD, 4477 ARM::VLD4q32Pseudo_UPD}; 4478 static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD, 4479 ARM::VLD4q16oddPseudo_UPD, 4480 ARM::VLD4q32oddPseudo_UPD}; 4481 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4482 } else { 4483 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 4484 ARM::MVE_VLD42_8, 4485 ARM::MVE_VLD43_8_wb}; 4486 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 4487 ARM::MVE_VLD42_16, 4488 ARM::MVE_VLD43_16_wb}; 4489 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 4490 ARM::MVE_VLD42_32, 4491 ARM::MVE_VLD43_32_wb}; 4492 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4493 SelectMVE_VLD(N, 4, Opcodes, true); 4494 } 4495 return; 4496 } 4497 4498 case ARMISD::VLD1x2_UPD: { 4499 if (Subtarget->hasNEON()) { 4500 static const uint16_t DOpcodes[] = { 4501 ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed, 4502 ARM::VLD1q64wb_fixed}; 4503 static const uint16_t QOpcodes[] = { 4504 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed, 4505 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed}; 4506 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 4507 return; 4508 } 4509 break; 4510 } 4511 4512 case ARMISD::VLD1x3_UPD: { 4513 if (Subtarget->hasNEON()) { 4514 static const uint16_t DOpcodes[] = { 4515 ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed, 4516 ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed}; 4517 static const uint16_t QOpcodes0[] = { 4518 ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD, 4519 ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD}; 4520 static const uint16_t QOpcodes1[] = { 4521 ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD, 4522 ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD}; 4523 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4524 return; 4525 } 4526 break; 4527 } 4528 4529 case ARMISD::VLD1x4_UPD: { 4530 if (Subtarget->hasNEON()) { 4531 static const uint16_t DOpcodes[] = { 4532 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed, 4533 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed}; 4534 static const uint16_t QOpcodes0[] = { 4535 ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD, 4536 ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD}; 4537 static const uint16_t QOpcodes1[] = { 4538 ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD, 4539 ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD}; 4540 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4541 return; 4542 } 4543 break; 4544 } 4545 4546 case ARMISD::VLD2LN_UPD: { 4547 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, 4548 ARM::VLD2LNd16Pseudo_UPD, 4549 ARM::VLD2LNd32Pseudo_UPD }; 4550 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD, 4551 ARM::VLD2LNq32Pseudo_UPD }; 4552 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes); 4553 return; 4554 } 4555 4556 case ARMISD::VLD3LN_UPD: { 4557 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, 4558 ARM::VLD3LNd16Pseudo_UPD, 4559 ARM::VLD3LNd32Pseudo_UPD }; 4560 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD, 4561 ARM::VLD3LNq32Pseudo_UPD }; 4562 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes); 4563 return; 4564 } 4565 4566 case ARMISD::VLD4LN_UPD: { 4567 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, 4568 ARM::VLD4LNd16Pseudo_UPD, 4569 ARM::VLD4LNd32Pseudo_UPD }; 4570 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD, 4571 ARM::VLD4LNq32Pseudo_UPD }; 4572 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes); 4573 return; 4574 } 4575 4576 case ARMISD::VST1_UPD: { 4577 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed, 4578 ARM::VST1d16wb_fixed, 4579 ARM::VST1d32wb_fixed, 4580 ARM::VST1d64wb_fixed }; 4581 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed, 4582 ARM::VST1q16wb_fixed, 4583 ARM::VST1q32wb_fixed, 4584 ARM::VST1q64wb_fixed }; 4585 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr); 4586 return; 4587 } 4588 4589 case ARMISD::VST2_UPD: { 4590 if (Subtarget->hasNEON()) { 4591 static const uint16_t DOpcodes[] = { 4592 ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed, 4593 ARM::VST1q64wb_fixed}; 4594 static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed, 4595 ARM::VST2q16PseudoWB_fixed, 4596 ARM::VST2q32PseudoWB_fixed}; 4597 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 4598 return; 4599 } 4600 break; 4601 } 4602 4603 case ARMISD::VST3_UPD: { 4604 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD, 4605 ARM::VST3d16Pseudo_UPD, 4606 ARM::VST3d32Pseudo_UPD, 4607 ARM::VST1d64TPseudoWB_fixed}; 4608 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 4609 ARM::VST3q16Pseudo_UPD, 4610 ARM::VST3q32Pseudo_UPD }; 4611 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, 4612 ARM::VST3q16oddPseudo_UPD, 4613 ARM::VST3q32oddPseudo_UPD }; 4614 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4615 return; 4616 } 4617 4618 case ARMISD::VST4_UPD: { 4619 if (Subtarget->hasNEON()) { 4620 static const uint16_t DOpcodes[] = { 4621 ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD, 4622 ARM::VST1d64QPseudoWB_fixed}; 4623 static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD, 4624 ARM::VST4q16Pseudo_UPD, 4625 ARM::VST4q32Pseudo_UPD}; 4626 static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD, 4627 ARM::VST4q16oddPseudo_UPD, 4628 ARM::VST4q32oddPseudo_UPD}; 4629 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4630 return; 4631 } 4632 break; 4633 } 4634 4635 case ARMISD::VST1x2_UPD: { 4636 if (Subtarget->hasNEON()) { 4637 static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed, 4638 ARM::VST1q16wb_fixed, 4639 ARM::VST1q32wb_fixed, 4640 ARM::VST1q64wb_fixed}; 4641 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed, 4642 ARM::VST1d16QPseudoWB_fixed, 4643 ARM::VST1d32QPseudoWB_fixed, 4644 ARM::VST1d64QPseudoWB_fixed }; 4645 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 4646 return; 4647 } 4648 break; 4649 } 4650 4651 case ARMISD::VST1x3_UPD: { 4652 if (Subtarget->hasNEON()) { 4653 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed, 4654 ARM::VST1d16TPseudoWB_fixed, 4655 ARM::VST1d32TPseudoWB_fixed, 4656 ARM::VST1d64TPseudoWB_fixed }; 4657 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 4658 ARM::VST1q16LowTPseudo_UPD, 4659 ARM::VST1q32LowTPseudo_UPD, 4660 ARM::VST1q64LowTPseudo_UPD }; 4661 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD, 4662 ARM::VST1q16HighTPseudo_UPD, 4663 ARM::VST1q32HighTPseudo_UPD, 4664 ARM::VST1q64HighTPseudo_UPD }; 4665 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4666 return; 4667 } 4668 break; 4669 } 4670 4671 case ARMISD::VST1x4_UPD: { 4672 if (Subtarget->hasNEON()) { 4673 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed, 4674 ARM::VST1d16QPseudoWB_fixed, 4675 ARM::VST1d32QPseudoWB_fixed, 4676 ARM::VST1d64QPseudoWB_fixed }; 4677 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 4678 ARM::VST1q16LowQPseudo_UPD, 4679 ARM::VST1q32LowQPseudo_UPD, 4680 ARM::VST1q64LowQPseudo_UPD }; 4681 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD, 4682 ARM::VST1q16HighQPseudo_UPD, 4683 ARM::VST1q32HighQPseudo_UPD, 4684 ARM::VST1q64HighQPseudo_UPD }; 4685 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4686 return; 4687 } 4688 break; 4689 } 4690 case ARMISD::VST2LN_UPD: { 4691 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, 4692 ARM::VST2LNd16Pseudo_UPD, 4693 ARM::VST2LNd32Pseudo_UPD }; 4694 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD, 4695 ARM::VST2LNq32Pseudo_UPD }; 4696 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes); 4697 return; 4698 } 4699 4700 case ARMISD::VST3LN_UPD: { 4701 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, 4702 ARM::VST3LNd16Pseudo_UPD, 4703 ARM::VST3LNd32Pseudo_UPD }; 4704 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD, 4705 ARM::VST3LNq32Pseudo_UPD }; 4706 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes); 4707 return; 4708 } 4709 4710 case ARMISD::VST4LN_UPD: { 4711 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, 4712 ARM::VST4LNd16Pseudo_UPD, 4713 ARM::VST4LNd32Pseudo_UPD }; 4714 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD, 4715 ARM::VST4LNq32Pseudo_UPD }; 4716 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes); 4717 return; 4718 } 4719 4720 case ISD::INTRINSIC_VOID: 4721 case ISD::INTRINSIC_W_CHAIN: { 4722 unsigned IntNo = N->getConstantOperandVal(1); 4723 switch (IntNo) { 4724 default: 4725 break; 4726 4727 case Intrinsic::arm_mrrc: 4728 case Intrinsic::arm_mrrc2: { 4729 SDLoc dl(N); 4730 SDValue Chain = N->getOperand(0); 4731 unsigned Opc; 4732 4733 if (Subtarget->isThumb()) 4734 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2); 4735 else 4736 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2); 4737 4738 SmallVector<SDValue, 5> Ops; 4739 Ops.push_back(getI32Imm(N->getConstantOperandVal(2), dl)); /* coproc */ 4740 Ops.push_back(getI32Imm(N->getConstantOperandVal(3), dl)); /* opc */ 4741 Ops.push_back(getI32Imm(N->getConstantOperandVal(4), dl)); /* CRm */ 4742 4743 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded 4744 // instruction will always be '1111' but it is possible in assembly language to specify 4745 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction. 4746 if (Opc != ARM::MRRC2) { 4747 Ops.push_back(getAL(CurDAG, dl)); 4748 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4749 } 4750 4751 Ops.push_back(Chain); 4752 4753 // Writes to two registers. 4754 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other}; 4755 4756 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops)); 4757 return; 4758 } 4759 case Intrinsic::arm_ldaexd: 4760 case Intrinsic::arm_ldrexd: { 4761 SDLoc dl(N); 4762 SDValue Chain = N->getOperand(0); 4763 SDValue MemAddr = N->getOperand(2); 4764 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps(); 4765 4766 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd; 4767 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD) 4768 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD); 4769 4770 // arm_ldrexd returns a i64 value in {i32, i32} 4771 std::vector<EVT> ResTys; 4772 if (isThumb) { 4773 ResTys.push_back(MVT::i32); 4774 ResTys.push_back(MVT::i32); 4775 } else 4776 ResTys.push_back(MVT::Untyped); 4777 ResTys.push_back(MVT::Other); 4778 4779 // Place arguments in the right order. 4780 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl), 4781 CurDAG->getRegister(0, MVT::i32), Chain}; 4782 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4783 // Transfer memoperands. 4784 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4785 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 4786 4787 // Remap uses. 4788 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); 4789 if (!SDValue(N, 0).use_empty()) { 4790 SDValue Result; 4791 if (isThumb) 4792 Result = SDValue(Ld, 0); 4793 else { 4794 SDValue SubRegIdx = 4795 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 4796 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4797 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4798 Result = SDValue(ResNode,0); 4799 } 4800 ReplaceUses(SDValue(N, 0), Result); 4801 } 4802 if (!SDValue(N, 1).use_empty()) { 4803 SDValue Result; 4804 if (isThumb) 4805 Result = SDValue(Ld, 1); 4806 else { 4807 SDValue SubRegIdx = 4808 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 4809 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4810 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4811 Result = SDValue(ResNode,0); 4812 } 4813 ReplaceUses(SDValue(N, 1), Result); 4814 } 4815 ReplaceUses(SDValue(N, 2), OutChain); 4816 CurDAG->RemoveDeadNode(N); 4817 return; 4818 } 4819 case Intrinsic::arm_stlexd: 4820 case Intrinsic::arm_strexd: { 4821 SDLoc dl(N); 4822 SDValue Chain = N->getOperand(0); 4823 SDValue Val0 = N->getOperand(2); 4824 SDValue Val1 = N->getOperand(3); 4825 SDValue MemAddr = N->getOperand(4); 4826 4827 // Store exclusive double return a i32 value which is the return status 4828 // of the issued store. 4829 const EVT ResTys[] = {MVT::i32, MVT::Other}; 4830 4831 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); 4832 // Place arguments in the right order. 4833 SmallVector<SDValue, 7> Ops; 4834 if (isThumb) { 4835 Ops.push_back(Val0); 4836 Ops.push_back(Val1); 4837 } else 4838 // arm_strexd uses GPRPair. 4839 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0)); 4840 Ops.push_back(MemAddr); 4841 Ops.push_back(getAL(CurDAG, dl)); 4842 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4843 Ops.push_back(Chain); 4844 4845 bool IsRelease = IntNo == Intrinsic::arm_stlexd; 4846 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD) 4847 : (IsRelease ? ARM::STLEXD : ARM::STREXD); 4848 4849 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4850 // Transfer memoperands. 4851 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4852 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 4853 4854 ReplaceNode(N, St); 4855 return; 4856 } 4857 4858 case Intrinsic::arm_neon_vld1: { 4859 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, 4860 ARM::VLD1d32, ARM::VLD1d64 }; 4861 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4862 ARM::VLD1q32, ARM::VLD1q64}; 4863 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr); 4864 return; 4865 } 4866 4867 case Intrinsic::arm_neon_vld1x2: { 4868 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4869 ARM::VLD1q32, ARM::VLD1q64 }; 4870 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo, 4871 ARM::VLD1d16QPseudo, 4872 ARM::VLD1d32QPseudo, 4873 ARM::VLD1d64QPseudo }; 4874 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4875 return; 4876 } 4877 4878 case Intrinsic::arm_neon_vld1x3: { 4879 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo, 4880 ARM::VLD1d16TPseudo, 4881 ARM::VLD1d32TPseudo, 4882 ARM::VLD1d64TPseudo }; 4883 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD, 4884 ARM::VLD1q16LowTPseudo_UPD, 4885 ARM::VLD1q32LowTPseudo_UPD, 4886 ARM::VLD1q64LowTPseudo_UPD }; 4887 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo, 4888 ARM::VLD1q16HighTPseudo, 4889 ARM::VLD1q32HighTPseudo, 4890 ARM::VLD1q64HighTPseudo }; 4891 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4892 return; 4893 } 4894 4895 case Intrinsic::arm_neon_vld1x4: { 4896 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo, 4897 ARM::VLD1d16QPseudo, 4898 ARM::VLD1d32QPseudo, 4899 ARM::VLD1d64QPseudo }; 4900 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD, 4901 ARM::VLD1q16LowQPseudo_UPD, 4902 ARM::VLD1q32LowQPseudo_UPD, 4903 ARM::VLD1q64LowQPseudo_UPD }; 4904 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo, 4905 ARM::VLD1q16HighQPseudo, 4906 ARM::VLD1q32HighQPseudo, 4907 ARM::VLD1q64HighQPseudo }; 4908 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4909 return; 4910 } 4911 4912 case Intrinsic::arm_neon_vld2: { 4913 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, 4914 ARM::VLD2d32, ARM::VLD1q64 }; 4915 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, 4916 ARM::VLD2q32Pseudo }; 4917 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4918 return; 4919 } 4920 4921 case Intrinsic::arm_neon_vld3: { 4922 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo, 4923 ARM::VLD3d16Pseudo, 4924 ARM::VLD3d32Pseudo, 4925 ARM::VLD1d64TPseudo }; 4926 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4927 ARM::VLD3q16Pseudo_UPD, 4928 ARM::VLD3q32Pseudo_UPD }; 4929 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo, 4930 ARM::VLD3q16oddPseudo, 4931 ARM::VLD3q32oddPseudo }; 4932 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4933 return; 4934 } 4935 4936 case Intrinsic::arm_neon_vld4: { 4937 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo, 4938 ARM::VLD4d16Pseudo, 4939 ARM::VLD4d32Pseudo, 4940 ARM::VLD1d64QPseudo }; 4941 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 4942 ARM::VLD4q16Pseudo_UPD, 4943 ARM::VLD4q32Pseudo_UPD }; 4944 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo, 4945 ARM::VLD4q16oddPseudo, 4946 ARM::VLD4q32oddPseudo }; 4947 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4948 return; 4949 } 4950 4951 case Intrinsic::arm_neon_vld2dup: { 4952 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4953 ARM::VLD2DUPd32, ARM::VLD1q64 }; 4954 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, 4955 ARM::VLD2DUPq16EvenPseudo, 4956 ARM::VLD2DUPq32EvenPseudo }; 4957 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo, 4958 ARM::VLD2DUPq16OddPseudo, 4959 ARM::VLD2DUPq32OddPseudo }; 4960 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2, 4961 DOpcodes, QOpcodes0, QOpcodes1); 4962 return; 4963 } 4964 4965 case Intrinsic::arm_neon_vld3dup: { 4966 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo, 4967 ARM::VLD3DUPd16Pseudo, 4968 ARM::VLD3DUPd32Pseudo, 4969 ARM::VLD1d64TPseudo }; 4970 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, 4971 ARM::VLD3DUPq16EvenPseudo, 4972 ARM::VLD3DUPq32EvenPseudo }; 4973 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo, 4974 ARM::VLD3DUPq16OddPseudo, 4975 ARM::VLD3DUPq32OddPseudo }; 4976 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3, 4977 DOpcodes, QOpcodes0, QOpcodes1); 4978 return; 4979 } 4980 4981 case Intrinsic::arm_neon_vld4dup: { 4982 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo, 4983 ARM::VLD4DUPd16Pseudo, 4984 ARM::VLD4DUPd32Pseudo, 4985 ARM::VLD1d64QPseudo }; 4986 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, 4987 ARM::VLD4DUPq16EvenPseudo, 4988 ARM::VLD4DUPq32EvenPseudo }; 4989 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo, 4990 ARM::VLD4DUPq16OddPseudo, 4991 ARM::VLD4DUPq32OddPseudo }; 4992 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4, 4993 DOpcodes, QOpcodes0, QOpcodes1); 4994 return; 4995 } 4996 4997 case Intrinsic::arm_neon_vld2lane: { 4998 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo, 4999 ARM::VLD2LNd16Pseudo, 5000 ARM::VLD2LNd32Pseudo }; 5001 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo, 5002 ARM::VLD2LNq32Pseudo }; 5003 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes); 5004 return; 5005 } 5006 5007 case Intrinsic::arm_neon_vld3lane: { 5008 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo, 5009 ARM::VLD3LNd16Pseudo, 5010 ARM::VLD3LNd32Pseudo }; 5011 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo, 5012 ARM::VLD3LNq32Pseudo }; 5013 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes); 5014 return; 5015 } 5016 5017 case Intrinsic::arm_neon_vld4lane: { 5018 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo, 5019 ARM::VLD4LNd16Pseudo, 5020 ARM::VLD4LNd32Pseudo }; 5021 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo, 5022 ARM::VLD4LNq32Pseudo }; 5023 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes); 5024 return; 5025 } 5026 5027 case Intrinsic::arm_neon_vst1: { 5028 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, 5029 ARM::VST1d32, ARM::VST1d64 }; 5030 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 5031 ARM::VST1q32, ARM::VST1q64 }; 5032 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr); 5033 return; 5034 } 5035 5036 case Intrinsic::arm_neon_vst1x2: { 5037 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 5038 ARM::VST1q32, ARM::VST1q64 }; 5039 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo, 5040 ARM::VST1d16QPseudo, 5041 ARM::VST1d32QPseudo, 5042 ARM::VST1d64QPseudo }; 5043 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 5044 return; 5045 } 5046 5047 case Intrinsic::arm_neon_vst1x3: { 5048 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo, 5049 ARM::VST1d16TPseudo, 5050 ARM::VST1d32TPseudo, 5051 ARM::VST1d64TPseudo }; 5052 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 5053 ARM::VST1q16LowTPseudo_UPD, 5054 ARM::VST1q32LowTPseudo_UPD, 5055 ARM::VST1q64LowTPseudo_UPD }; 5056 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo, 5057 ARM::VST1q16HighTPseudo, 5058 ARM::VST1q32HighTPseudo, 5059 ARM::VST1q64HighTPseudo }; 5060 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 5061 return; 5062 } 5063 5064 case Intrinsic::arm_neon_vst1x4: { 5065 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo, 5066 ARM::VST1d16QPseudo, 5067 ARM::VST1d32QPseudo, 5068 ARM::VST1d64QPseudo }; 5069 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 5070 ARM::VST1q16LowQPseudo_UPD, 5071 ARM::VST1q32LowQPseudo_UPD, 5072 ARM::VST1q64LowQPseudo_UPD }; 5073 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo, 5074 ARM::VST1q16HighQPseudo, 5075 ARM::VST1q32HighQPseudo, 5076 ARM::VST1q64HighQPseudo }; 5077 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 5078 return; 5079 } 5080 5081 case Intrinsic::arm_neon_vst2: { 5082 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, 5083 ARM::VST2d32, ARM::VST1q64 }; 5084 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, 5085 ARM::VST2q32Pseudo }; 5086 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 5087 return; 5088 } 5089 5090 case Intrinsic::arm_neon_vst3: { 5091 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo, 5092 ARM::VST3d16Pseudo, 5093 ARM::VST3d32Pseudo, 5094 ARM::VST1d64TPseudo }; 5095 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 5096 ARM::VST3q16Pseudo_UPD, 5097 ARM::VST3q32Pseudo_UPD }; 5098 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo, 5099 ARM::VST3q16oddPseudo, 5100 ARM::VST3q32oddPseudo }; 5101 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 5102 return; 5103 } 5104 5105 case Intrinsic::arm_neon_vst4: { 5106 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo, 5107 ARM::VST4d16Pseudo, 5108 ARM::VST4d32Pseudo, 5109 ARM::VST1d64QPseudo }; 5110 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 5111 ARM::VST4q16Pseudo_UPD, 5112 ARM::VST4q32Pseudo_UPD }; 5113 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo, 5114 ARM::VST4q16oddPseudo, 5115 ARM::VST4q32oddPseudo }; 5116 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 5117 return; 5118 } 5119 5120 case Intrinsic::arm_neon_vst2lane: { 5121 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo, 5122 ARM::VST2LNd16Pseudo, 5123 ARM::VST2LNd32Pseudo }; 5124 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo, 5125 ARM::VST2LNq32Pseudo }; 5126 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes); 5127 return; 5128 } 5129 5130 case Intrinsic::arm_neon_vst3lane: { 5131 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo, 5132 ARM::VST3LNd16Pseudo, 5133 ARM::VST3LNd32Pseudo }; 5134 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo, 5135 ARM::VST3LNq32Pseudo }; 5136 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes); 5137 return; 5138 } 5139 5140 case Intrinsic::arm_neon_vst4lane: { 5141 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo, 5142 ARM::VST4LNd16Pseudo, 5143 ARM::VST4LNd32Pseudo }; 5144 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo, 5145 ARM::VST4LNq32Pseudo }; 5146 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes); 5147 return; 5148 } 5149 5150 case Intrinsic::arm_mve_vldr_gather_base_wb: 5151 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: { 5152 static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre, 5153 ARM::MVE_VLDRDU64_qi_pre}; 5154 SelectMVE_WB(N, Opcodes, 5155 IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated); 5156 return; 5157 } 5158 5159 case Intrinsic::arm_mve_vld2q: { 5160 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8}; 5161 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 5162 ARM::MVE_VLD21_16}; 5163 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 5164 ARM::MVE_VLD21_32}; 5165 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 5166 SelectMVE_VLD(N, 2, Opcodes, false); 5167 return; 5168 } 5169 5170 case Intrinsic::arm_mve_vld4q: { 5171 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 5172 ARM::MVE_VLD42_8, ARM::MVE_VLD43_8}; 5173 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 5174 ARM::MVE_VLD42_16, 5175 ARM::MVE_VLD43_16}; 5176 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 5177 ARM::MVE_VLD42_32, 5178 ARM::MVE_VLD43_32}; 5179 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 5180 SelectMVE_VLD(N, 4, Opcodes, false); 5181 return; 5182 } 5183 } 5184 break; 5185 } 5186 5187 case ISD::INTRINSIC_WO_CHAIN: { 5188 unsigned IntNo = N->getConstantOperandVal(0); 5189 switch (IntNo) { 5190 default: 5191 break; 5192 5193 // Scalar f32 -> bf16 5194 case Intrinsic::arm_neon_vcvtbfp2bf: { 5195 SDLoc dl(N); 5196 const SDValue &Src = N->getOperand(1); 5197 llvm::EVT DestTy = N->getValueType(0); 5198 SDValue Pred = getAL(CurDAG, dl); 5199 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 5200 SDValue Ops[] = { Src, Src, Pred, Reg0 }; 5201 CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops); 5202 return; 5203 } 5204 5205 // Vector v4f32 -> v4bf16 5206 case Intrinsic::arm_neon_vcvtfp2bf: { 5207 SDLoc dl(N); 5208 const SDValue &Src = N->getOperand(1); 5209 SDValue Pred = getAL(CurDAG, dl); 5210 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 5211 SDValue Ops[] = { Src, Pred, Reg0 }; 5212 CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops); 5213 return; 5214 } 5215 5216 case Intrinsic::arm_mve_urshrl: 5217 SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false); 5218 return; 5219 case Intrinsic::arm_mve_uqshll: 5220 SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false); 5221 return; 5222 case Intrinsic::arm_mve_srshrl: 5223 SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false); 5224 return; 5225 case Intrinsic::arm_mve_sqshll: 5226 SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false); 5227 return; 5228 case Intrinsic::arm_mve_uqrshll: 5229 SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true); 5230 return; 5231 case Intrinsic::arm_mve_sqrshrl: 5232 SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true); 5233 return; 5234 5235 case Intrinsic::arm_mve_vadc: 5236 case Intrinsic::arm_mve_vadc_predicated: 5237 SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true, 5238 IntNo == Intrinsic::arm_mve_vadc_predicated); 5239 return; 5240 case Intrinsic::arm_mve_vsbc: 5241 case Intrinsic::arm_mve_vsbc_predicated: 5242 SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true, 5243 IntNo == Intrinsic::arm_mve_vsbc_predicated); 5244 return; 5245 case Intrinsic::arm_mve_vshlc: 5246 case Intrinsic::arm_mve_vshlc_predicated: 5247 SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated); 5248 return; 5249 5250 case Intrinsic::arm_mve_vmlldava: 5251 case Intrinsic::arm_mve_vmlldava_predicated: { 5252 static const uint16_t OpcodesU[] = { 5253 ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32, 5254 ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32, 5255 }; 5256 static const uint16_t OpcodesS[] = { 5257 ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32, 5258 ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32, 5259 ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32, 5260 ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32, 5261 ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32, 5262 ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32, 5263 ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32, 5264 ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32, 5265 }; 5266 SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated, 5267 OpcodesS, OpcodesU); 5268 return; 5269 } 5270 5271 case Intrinsic::arm_mve_vrmlldavha: 5272 case Intrinsic::arm_mve_vrmlldavha_predicated: { 5273 static const uint16_t OpcodesU[] = { 5274 ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32, 5275 }; 5276 static const uint16_t OpcodesS[] = { 5277 ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32, 5278 ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32, 5279 ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32, 5280 ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32, 5281 }; 5282 SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated, 5283 OpcodesS, OpcodesU); 5284 return; 5285 } 5286 5287 case Intrinsic::arm_mve_vidup: 5288 case Intrinsic::arm_mve_vidup_predicated: { 5289 static const uint16_t Opcodes[] = { 5290 ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32, 5291 }; 5292 SelectMVE_VxDUP(N, Opcodes, false, 5293 IntNo == Intrinsic::arm_mve_vidup_predicated); 5294 return; 5295 } 5296 5297 case Intrinsic::arm_mve_vddup: 5298 case Intrinsic::arm_mve_vddup_predicated: { 5299 static const uint16_t Opcodes[] = { 5300 ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32, 5301 }; 5302 SelectMVE_VxDUP(N, Opcodes, false, 5303 IntNo == Intrinsic::arm_mve_vddup_predicated); 5304 return; 5305 } 5306 5307 case Intrinsic::arm_mve_viwdup: 5308 case Intrinsic::arm_mve_viwdup_predicated: { 5309 static const uint16_t Opcodes[] = { 5310 ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32, 5311 }; 5312 SelectMVE_VxDUP(N, Opcodes, true, 5313 IntNo == Intrinsic::arm_mve_viwdup_predicated); 5314 return; 5315 } 5316 5317 case Intrinsic::arm_mve_vdwdup: 5318 case Intrinsic::arm_mve_vdwdup_predicated: { 5319 static const uint16_t Opcodes[] = { 5320 ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32, 5321 }; 5322 SelectMVE_VxDUP(N, Opcodes, true, 5323 IntNo == Intrinsic::arm_mve_vdwdup_predicated); 5324 return; 5325 } 5326 5327 case Intrinsic::arm_cde_cx1d: 5328 case Intrinsic::arm_cde_cx1da: 5329 case Intrinsic::arm_cde_cx2d: 5330 case Intrinsic::arm_cde_cx2da: 5331 case Intrinsic::arm_cde_cx3d: 5332 case Intrinsic::arm_cde_cx3da: { 5333 bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da || 5334 IntNo == Intrinsic::arm_cde_cx2da || 5335 IntNo == Intrinsic::arm_cde_cx3da; 5336 size_t NumExtraOps; 5337 uint16_t Opcode; 5338 switch (IntNo) { 5339 case Intrinsic::arm_cde_cx1d: 5340 case Intrinsic::arm_cde_cx1da: 5341 NumExtraOps = 0; 5342 Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D; 5343 break; 5344 case Intrinsic::arm_cde_cx2d: 5345 case Intrinsic::arm_cde_cx2da: 5346 NumExtraOps = 1; 5347 Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D; 5348 break; 5349 case Intrinsic::arm_cde_cx3d: 5350 case Intrinsic::arm_cde_cx3da: 5351 NumExtraOps = 2; 5352 Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D; 5353 break; 5354 default: 5355 llvm_unreachable("Unexpected opcode"); 5356 } 5357 SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum); 5358 return; 5359 } 5360 } 5361 break; 5362 } 5363 5364 case ISD::ATOMIC_CMP_SWAP: 5365 SelectCMP_SWAP(N); 5366 return; 5367 } 5368 5369 SelectCode(N); 5370 } 5371 5372 // Inspect a register string of the form 5373 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or 5374 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string 5375 // and obtain the integer operands from them, adding these operands to the 5376 // provided vector. 5377 static void getIntOperandsFromRegisterString(StringRef RegString, 5378 SelectionDAG *CurDAG, 5379 const SDLoc &DL, 5380 std::vector<SDValue> &Ops) { 5381 SmallVector<StringRef, 5> Fields; 5382 RegString.split(Fields, ':'); 5383 5384 if (Fields.size() > 1) { 5385 bool AllIntFields = true; 5386 5387 for (StringRef Field : Fields) { 5388 // Need to trim out leading 'cp' characters and get the integer field. 5389 unsigned IntField; 5390 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField); 5391 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32)); 5392 } 5393 5394 assert(AllIntFields && 5395 "Unexpected non-integer value in special register string."); 5396 (void)AllIntFields; 5397 } 5398 } 5399 5400 // Maps a Banked Register string to its mask value. The mask value returned is 5401 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register 5402 // mask operand, which expresses which register is to be used, e.g. r8, and in 5403 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string 5404 // was invalid. 5405 static inline int getBankedRegisterMask(StringRef RegString) { 5406 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower()); 5407 if (!TheReg) 5408 return -1; 5409 return TheReg->Encoding; 5410 } 5411 5412 // The flags here are common to those allowed for apsr in the A class cores and 5413 // those allowed for the special registers in the M class cores. Returns a 5414 // value representing which flags were present, -1 if invalid. 5415 static inline int getMClassFlagsMask(StringRef Flags) { 5416 return StringSwitch<int>(Flags) 5417 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is 5418 // correct when flags are not permitted 5419 .Case("g", 0x1) 5420 .Case("nzcvq", 0x2) 5421 .Case("nzcvqg", 0x3) 5422 .Default(-1); 5423 } 5424 5425 // Maps MClass special registers string to its value for use in the 5426 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand. 5427 // Returns -1 to signify that the string was invalid. 5428 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) { 5429 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg); 5430 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits(); 5431 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits)) 5432 return -1; 5433 return (int)(TheReg->Encoding & 0xFFF); // SYSm value 5434 } 5435 5436 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { 5437 // The mask operand contains the special register (R Bit) in bit 4, whether 5438 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and 5439 // bits 3-0 contains the fields to be accessed in the special register, set by 5440 // the flags provided with the register. 5441 int Mask = 0; 5442 if (Reg == "apsr") { 5443 // The flags permitted for apsr are the same flags that are allowed in 5444 // M class registers. We get the flag value and then shift the flags into 5445 // the correct place to combine with the mask. 5446 Mask = getMClassFlagsMask(Flags); 5447 if (Mask == -1) 5448 return -1; 5449 return Mask << 2; 5450 } 5451 5452 if (Reg != "cpsr" && Reg != "spsr") { 5453 return -1; 5454 } 5455 5456 // This is the same as if the flags were "fc" 5457 if (Flags.empty() || Flags == "all") 5458 return Mask | 0x9; 5459 5460 // Inspect the supplied flags string and set the bits in the mask for 5461 // the relevant and valid flags allowed for cpsr and spsr. 5462 for (char Flag : Flags) { 5463 int FlagVal; 5464 switch (Flag) { 5465 case 'c': 5466 FlagVal = 0x1; 5467 break; 5468 case 'x': 5469 FlagVal = 0x2; 5470 break; 5471 case 's': 5472 FlagVal = 0x4; 5473 break; 5474 case 'f': 5475 FlagVal = 0x8; 5476 break; 5477 default: 5478 FlagVal = 0; 5479 } 5480 5481 // This avoids allowing strings where the same flag bit appears twice. 5482 if (!FlagVal || (Mask & FlagVal)) 5483 return -1; 5484 Mask |= FlagVal; 5485 } 5486 5487 // If the register is spsr then we need to set the R bit. 5488 if (Reg == "spsr") 5489 Mask |= 0x10; 5490 5491 return Mask; 5492 } 5493 5494 // Lower the read_register intrinsic to ARM specific DAG nodes 5495 // using the supplied metadata string to select the instruction node to use 5496 // and the registers/masks to construct as operands for the node. 5497 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){ 5498 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 5499 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 5500 bool IsThumb2 = Subtarget->isThumb2(); 5501 SDLoc DL(N); 5502 5503 std::vector<SDValue> Ops; 5504 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 5505 5506 if (!Ops.empty()) { 5507 // If the special register string was constructed of fields (as defined 5508 // in the ACLE) then need to lower to MRC node (32 bit) or 5509 // MRRC node(64 bit), we can make the distinction based on the number of 5510 // operands we have. 5511 unsigned Opcode; 5512 SmallVector<EVT, 3> ResTypes; 5513 if (Ops.size() == 5){ 5514 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC; 5515 ResTypes.append({ MVT::i32, MVT::Other }); 5516 } else { 5517 assert(Ops.size() == 3 && 5518 "Invalid number of fields in special register string."); 5519 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC; 5520 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other }); 5521 } 5522 5523 Ops.push_back(getAL(CurDAG, DL)); 5524 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 5525 Ops.push_back(N->getOperand(0)); 5526 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops)); 5527 return true; 5528 } 5529 5530 std::string SpecialReg = RegString->getString().lower(); 5531 5532 int BankedReg = getBankedRegisterMask(SpecialReg); 5533 if (BankedReg != -1) { 5534 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), 5535 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5536 N->getOperand(0) }; 5537 ReplaceNode( 5538 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked, 5539 DL, MVT::i32, MVT::Other, Ops)); 5540 return true; 5541 } 5542 5543 // The VFP registers are read by creating SelectionDAG nodes with opcodes 5544 // corresponding to the register that is being read from. So we switch on the 5545 // string to find which opcode we need to use. 5546 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 5547 .Case("fpscr", ARM::VMRS) 5548 .Case("fpexc", ARM::VMRS_FPEXC) 5549 .Case("fpsid", ARM::VMRS_FPSID) 5550 .Case("mvfr0", ARM::VMRS_MVFR0) 5551 .Case("mvfr1", ARM::VMRS_MVFR1) 5552 .Case("mvfr2", ARM::VMRS_MVFR2) 5553 .Case("fpinst", ARM::VMRS_FPINST) 5554 .Case("fpinst2", ARM::VMRS_FPINST2) 5555 .Default(0); 5556 5557 // If an opcode was found then we can lower the read to a VFP instruction. 5558 if (Opcode) { 5559 if (!Subtarget->hasVFP2Base()) 5560 return false; 5561 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base()) 5562 return false; 5563 5564 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5565 N->getOperand(0) }; 5566 ReplaceNode(N, 5567 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops)); 5568 return true; 5569 } 5570 5571 // If the target is M Class then need to validate that the register string 5572 // is an acceptable value, so check that a mask can be constructed from the 5573 // string. 5574 if (Subtarget->isMClass()) { 5575 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 5576 if (SYSmValue == -1) 5577 return false; 5578 5579 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 5580 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5581 N->getOperand(0) }; 5582 ReplaceNode( 5583 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops)); 5584 return true; 5585 } 5586 5587 // Here we know the target is not M Class so we need to check if it is one 5588 // of the remaining possible values which are apsr, cpsr or spsr. 5589 if (SpecialReg == "apsr" || SpecialReg == "cpsr") { 5590 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5591 N->getOperand(0) }; 5592 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, 5593 DL, MVT::i32, MVT::Other, Ops)); 5594 return true; 5595 } 5596 5597 if (SpecialReg == "spsr") { 5598 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5599 N->getOperand(0) }; 5600 ReplaceNode( 5601 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL, 5602 MVT::i32, MVT::Other, Ops)); 5603 return true; 5604 } 5605 5606 return false; 5607 } 5608 5609 // Lower the write_register intrinsic to ARM specific DAG nodes 5610 // using the supplied metadata string to select the instruction node to use 5611 // and the registers/masks to use in the nodes 5612 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){ 5613 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 5614 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 5615 bool IsThumb2 = Subtarget->isThumb2(); 5616 SDLoc DL(N); 5617 5618 std::vector<SDValue> Ops; 5619 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 5620 5621 if (!Ops.empty()) { 5622 // If the special register string was constructed of fields (as defined 5623 // in the ACLE) then need to lower to MCR node (32 bit) or 5624 // MCRR node(64 bit), we can make the distinction based on the number of 5625 // operands we have. 5626 unsigned Opcode; 5627 if (Ops.size() == 5) { 5628 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR; 5629 Ops.insert(Ops.begin()+2, N->getOperand(2)); 5630 } else { 5631 assert(Ops.size() == 3 && 5632 "Invalid number of fields in special register string."); 5633 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR; 5634 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) }; 5635 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2); 5636 } 5637 5638 Ops.push_back(getAL(CurDAG, DL)); 5639 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 5640 Ops.push_back(N->getOperand(0)); 5641 5642 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 5643 return true; 5644 } 5645 5646 std::string SpecialReg = RegString->getString().lower(); 5647 int BankedReg = getBankedRegisterMask(SpecialReg); 5648 if (BankedReg != -1) { 5649 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2), 5650 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5651 N->getOperand(0) }; 5652 ReplaceNode( 5653 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked, 5654 DL, MVT::Other, Ops)); 5655 return true; 5656 } 5657 5658 // The VFP registers are written to by creating SelectionDAG nodes with 5659 // opcodes corresponding to the register that is being written. So we switch 5660 // on the string to find which opcode we need to use. 5661 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 5662 .Case("fpscr", ARM::VMSR) 5663 .Case("fpexc", ARM::VMSR_FPEXC) 5664 .Case("fpsid", ARM::VMSR_FPSID) 5665 .Case("fpinst", ARM::VMSR_FPINST) 5666 .Case("fpinst2", ARM::VMSR_FPINST2) 5667 .Default(0); 5668 5669 if (Opcode) { 5670 if (!Subtarget->hasVFP2Base()) 5671 return false; 5672 Ops = { N->getOperand(2), getAL(CurDAG, DL), 5673 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 5674 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 5675 return true; 5676 } 5677 5678 std::pair<StringRef, StringRef> Fields; 5679 Fields = StringRef(SpecialReg).rsplit('_'); 5680 std::string Reg = Fields.first.str(); 5681 StringRef Flags = Fields.second; 5682 5683 // If the target was M Class then need to validate the special register value 5684 // and retrieve the mask for use in the instruction node. 5685 if (Subtarget->isMClass()) { 5686 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 5687 if (SYSmValue == -1) 5688 return false; 5689 5690 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 5691 N->getOperand(2), getAL(CurDAG, DL), 5692 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 5693 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops)); 5694 return true; 5695 } 5696 5697 // We then check to see if a valid mask can be constructed for one of the 5698 // register string values permitted for the A and R class cores. These values 5699 // are apsr, spsr and cpsr; these are also valid on older cores. 5700 int Mask = getARClassRegisterMask(Reg, Flags); 5701 if (Mask != -1) { 5702 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2), 5703 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5704 N->getOperand(0) }; 5705 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR, 5706 DL, MVT::Other, Ops)); 5707 return true; 5708 } 5709 5710 return false; 5711 } 5712 5713 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){ 5714 std::vector<SDValue> AsmNodeOperands; 5715 InlineAsm::Flag Flag; 5716 bool Changed = false; 5717 unsigned NumOps = N->getNumOperands(); 5718 5719 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. 5720 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require 5721 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs 5722 // respectively. Since there is no constraint to explicitly specify a 5723 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb, 5724 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack 5725 // them into a GPRPair. 5726 5727 SDLoc dl(N); 5728 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps - 1) : SDValue(); 5729 5730 SmallVector<bool, 8> OpChanged; 5731 // Glue node will be appended late. 5732 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { 5733 SDValue op = N->getOperand(i); 5734 AsmNodeOperands.push_back(op); 5735 5736 if (i < InlineAsm::Op_FirstOperand) 5737 continue; 5738 5739 if (const auto *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) 5740 Flag = InlineAsm::Flag(C->getZExtValue()); 5741 else 5742 continue; 5743 5744 // Immediate operands to inline asm in the SelectionDAG are modeled with 5745 // two operands. The first is a constant of value InlineAsm::Kind::Imm, and 5746 // the second is a constant with the value of the immediate. If we get here 5747 // and we have a Kind::Imm, skip the next operand, and continue. 5748 if (Flag.isImmKind()) { 5749 SDValue op = N->getOperand(++i); 5750 AsmNodeOperands.push_back(op); 5751 continue; 5752 } 5753 5754 const unsigned NumRegs = Flag.getNumOperandRegisters(); 5755 if (NumRegs) 5756 OpChanged.push_back(false); 5757 5758 unsigned DefIdx = 0; 5759 bool IsTiedToChangedOp = false; 5760 // If it's a use that is tied with a previous def, it has no 5761 // reg class constraint. 5762 if (Changed && Flag.isUseOperandTiedToDef(DefIdx)) 5763 IsTiedToChangedOp = OpChanged[DefIdx]; 5764 5765 // Memory operands to inline asm in the SelectionDAG are modeled with two 5766 // operands: a constant of value InlineAsm::Kind::Mem followed by the input 5767 // operand. If we get here and we have a Kind::Mem, skip the next operand 5768 // (so it doesn't get misinterpreted), and continue. We do this here because 5769 // it's important to update the OpChanged array correctly before moving on. 5770 if (Flag.isMemKind()) { 5771 SDValue op = N->getOperand(++i); 5772 AsmNodeOperands.push_back(op); 5773 continue; 5774 } 5775 5776 if (!Flag.isRegUseKind() && !Flag.isRegDefKind() && 5777 !Flag.isRegDefEarlyClobberKind()) 5778 continue; 5779 5780 unsigned RC; 5781 const bool HasRC = Flag.hasRegClassConstraint(RC); 5782 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID)) 5783 || NumRegs != 2) 5784 continue; 5785 5786 assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); 5787 SDValue V0 = N->getOperand(i+1); 5788 SDValue V1 = N->getOperand(i+2); 5789 Register Reg0 = cast<RegisterSDNode>(V0)->getReg(); 5790 Register Reg1 = cast<RegisterSDNode>(V1)->getReg(); 5791 SDValue PairedReg; 5792 MachineRegisterInfo &MRI = MF->getRegInfo(); 5793 5794 if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) { 5795 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to 5796 // the original GPRs. 5797 5798 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5799 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5800 SDValue Chain = SDValue(N,0); 5801 5802 SDNode *GU = N->getGluedUser(); 5803 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, 5804 Chain.getValue(1)); 5805 5806 // Extract values from a GPRPair reg and copy to the original GPR reg. 5807 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 5808 RegCopy); 5809 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 5810 RegCopy); 5811 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, 5812 RegCopy.getValue(1)); 5813 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); 5814 5815 // Update the original glue user. 5816 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); 5817 Ops.push_back(T1.getValue(1)); 5818 CurDAG->UpdateNodeOperands(GU, Ops); 5819 } else { 5820 // For Kind == InlineAsm::Kind::RegUse, we first copy two GPRs into a 5821 // GPRPair and then pass the GPRPair to the inline asm. 5822 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; 5823 5824 // As REG_SEQ doesn't take RegisterSDNode, we copy them first. 5825 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, 5826 Chain.getValue(1)); 5827 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, 5828 T0.getValue(1)); 5829 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); 5830 5831 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two 5832 // i32 VRs of inline asm with it. 5833 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5834 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5835 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); 5836 5837 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; 5838 Glue = Chain.getValue(1); 5839 } 5840 5841 Changed = true; 5842 5843 if(PairedReg.getNode()) { 5844 OpChanged[OpChanged.size() -1 ] = true; 5845 Flag = InlineAsm::Flag(Flag.getKind(), 1 /* RegNum*/); 5846 if (IsTiedToChangedOp) 5847 Flag.setMatchingOp(DefIdx); 5848 else 5849 Flag.setRegClass(ARM::GPRPairRegClassID); 5850 // Replace the current flag. 5851 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( 5852 Flag, dl, MVT::i32); 5853 // Add the new register node and skip the original two GPRs. 5854 AsmNodeOperands.push_back(PairedReg); 5855 // Skip the next two GPRs. 5856 i += 2; 5857 } 5858 } 5859 5860 if (Glue.getNode()) 5861 AsmNodeOperands.push_back(Glue); 5862 if (!Changed) 5863 return false; 5864 5865 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N), 5866 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); 5867 New->setNodeId(-1); 5868 ReplaceNode(N, New.getNode()); 5869 return true; 5870 } 5871 5872 bool ARMDAGToDAGISel::SelectInlineAsmMemoryOperand( 5873 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, 5874 std::vector<SDValue> &OutOps) { 5875 switch(ConstraintID) { 5876 default: 5877 llvm_unreachable("Unexpected asm memory constraint"); 5878 case InlineAsm::ConstraintCode::m: 5879 case InlineAsm::ConstraintCode::o: 5880 case InlineAsm::ConstraintCode::Q: 5881 case InlineAsm::ConstraintCode::Um: 5882 case InlineAsm::ConstraintCode::Un: 5883 case InlineAsm::ConstraintCode::Uq: 5884 case InlineAsm::ConstraintCode::Us: 5885 case InlineAsm::ConstraintCode::Ut: 5886 case InlineAsm::ConstraintCode::Uv: 5887 case InlineAsm::ConstraintCode::Uy: 5888 // Require the address to be in a register. That is safe for all ARM 5889 // variants and it is hard to do anything much smarter without knowing 5890 // how the operand is used. 5891 OutOps.push_back(Op); 5892 return false; 5893 } 5894 return true; 5895 } 5896 5897 /// createARMISelDag - This pass converts a legalized DAG into a 5898 /// ARM-specific DAG, ready for instruction scheduling. 5899 /// 5900 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, 5901 CodeGenOptLevel OptLevel) { 5902 return new ARMDAGToDAGISelLegacy(TM, OptLevel); 5903 } 5904