1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the ARM target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "ARM.h" 14 #include "ARMBaseInstrInfo.h" 15 #include "ARMTargetMachine.h" 16 #include "MCTargetDesc/ARMAddressingModes.h" 17 #include "Utils/ARMBaseInfo.h" 18 #include "llvm/ADT/APSInt.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineRegisterInfo.h" 24 #include "llvm/CodeGen/SelectionDAG.h" 25 #include "llvm/CodeGen/SelectionDAGISel.h" 26 #include "llvm/CodeGen/TargetLowering.h" 27 #include "llvm/IR/Constants.h" 28 #include "llvm/IR/DerivedTypes.h" 29 #include "llvm/IR/Function.h" 30 #include "llvm/IR/Intrinsics.h" 31 #include "llvm/IR/IntrinsicsARM.h" 32 #include "llvm/IR/LLVMContext.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Target/TargetOptions.h" 36 #include <optional> 37 38 using namespace llvm; 39 40 #define DEBUG_TYPE "arm-isel" 41 #define PASS_NAME "ARM Instruction Selection" 42 43 static cl::opt<bool> 44 DisableShifterOp("disable-shifter-op", cl::Hidden, 45 cl::desc("Disable isel of shifter-op"), 46 cl::init(false)); 47 48 //===--------------------------------------------------------------------===// 49 /// ARMDAGToDAGISel - ARM specific code to select ARM machine 50 /// instructions for SelectionDAG operations. 51 /// 52 namespace { 53 54 class ARMDAGToDAGISel : public SelectionDAGISel { 55 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 56 /// make the right decision when generating code for different targets. 57 const ARMSubtarget *Subtarget; 58 59 public: 60 ARMDAGToDAGISel() = delete; 61 62 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel) 63 : SelectionDAGISel(tm, OptLevel) {} 64 65 bool runOnMachineFunction(MachineFunction &MF) override { 66 // Reset the subtarget each time through. 67 Subtarget = &MF.getSubtarget<ARMSubtarget>(); 68 SelectionDAGISel::runOnMachineFunction(MF); 69 return true; 70 } 71 72 void PreprocessISelDAG() override; 73 74 /// getI32Imm - Return a target constant of type i32 with the specified 75 /// value. 76 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 77 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 78 } 79 80 void Select(SDNode *N) override; 81 82 /// Return true as some complex patterns, like those that call 83 /// canExtractShiftFromMul can modify the DAG inplace. 84 bool ComplexPatternFuncMutatesDAG() const override { return true; } 85 86 bool hasNoVMLxHazardUse(SDNode *N) const; 87 bool isShifterOpProfitable(const SDValue &Shift, 88 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); 89 bool SelectRegShifterOperand(SDValue N, SDValue &A, 90 SDValue &B, SDValue &C, 91 bool CheckProfitability = true); 92 bool SelectImmShifterOperand(SDValue N, SDValue &A, 93 SDValue &B, bool CheckProfitability = true); 94 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B, 95 SDValue &C) { 96 // Don't apply the profitability check 97 return SelectRegShifterOperand(N, A, B, C, false); 98 } 99 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) { 100 // Don't apply the profitability check 101 return SelectImmShifterOperand(N, A, B, false); 102 } 103 bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) { 104 if (!N.hasOneUse()) 105 return false; 106 return SelectImmShifterOperand(N, A, B, false); 107 } 108 109 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out); 110 111 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 112 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); 113 114 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) { 115 const ConstantSDNode *CN = cast<ConstantSDNode>(N); 116 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32); 117 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32); 118 return true; 119 } 120 121 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 122 SDValue &Offset, SDValue &Opc); 123 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 124 SDValue &Offset, SDValue &Opc); 125 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 126 SDValue &Offset, SDValue &Opc); 127 bool SelectAddrOffsetNone(SDValue N, SDValue &Base); 128 bool SelectAddrMode3(SDValue N, SDValue &Base, 129 SDValue &Offset, SDValue &Opc); 130 bool SelectAddrMode3Offset(SDNode *Op, SDValue N, 131 SDValue &Offset, SDValue &Opc); 132 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16); 133 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset); 134 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset); 135 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); 136 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); 137 138 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); 139 140 // Thumb Addressing Modes: 141 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); 142 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset); 143 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, 144 SDValue &OffImm); 145 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 146 SDValue &OffImm); 147 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 148 SDValue &OffImm); 149 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 150 SDValue &OffImm); 151 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); 152 template <unsigned Shift> 153 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 154 155 // Thumb 2 Addressing Modes: 156 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 157 template <unsigned Shift> 158 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm); 159 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, 160 SDValue &OffImm); 161 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 162 SDValue &OffImm); 163 template <unsigned Shift> 164 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm); 165 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm, 166 unsigned Shift); 167 template <unsigned Shift> 168 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 169 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, 170 SDValue &OffReg, SDValue &ShImm); 171 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); 172 173 template<int Min, int Max> 174 bool SelectImmediateInRange(SDValue N, SDValue &OffImm); 175 176 inline bool is_so_imm(unsigned Imm) const { 177 return ARM_AM::getSOImmVal(Imm) != -1; 178 } 179 180 inline bool is_so_imm_not(unsigned Imm) const { 181 return ARM_AM::getSOImmVal(~Imm) != -1; 182 } 183 184 inline bool is_t2_so_imm(unsigned Imm) const { 185 return ARM_AM::getT2SOImmVal(Imm) != -1; 186 } 187 188 inline bool is_t2_so_imm_not(unsigned Imm) const { 189 return ARM_AM::getT2SOImmVal(~Imm) != -1; 190 } 191 192 // Include the pieces autogenerated from the target description. 193 #include "ARMGenDAGISel.inc" 194 195 private: 196 void transferMemOperands(SDNode *Src, SDNode *Dst); 197 198 /// Indexed (pre/post inc/dec) load matching code for ARM. 199 bool tryARMIndexedLoad(SDNode *N); 200 bool tryT1IndexedLoad(SDNode *N); 201 bool tryT2IndexedLoad(SDNode *N); 202 bool tryMVEIndexedLoad(SDNode *N); 203 bool tryFMULFixed(SDNode *N, SDLoc dl); 204 bool tryFP_TO_INT(SDNode *N, SDLoc dl); 205 bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul, 206 bool IsUnsigned, 207 bool FixedToFloat); 208 209 /// SelectVLD - Select NEON load intrinsics. NumVecs should be 210 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for 211 /// loads of D registers and even subregs and odd subregs of Q registers. 212 /// For NumVecs <= 2, QOpcodes1 is not used. 213 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 214 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 215 const uint16_t *QOpcodes1); 216 217 /// SelectVST - Select NEON store intrinsics. NumVecs should 218 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for 219 /// stores of D registers and even subregs and odd subregs of Q registers. 220 /// For NumVecs <= 2, QOpcodes1 is not used. 221 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 222 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 223 const uint16_t *QOpcodes1); 224 225 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should 226 /// be 2, 3 or 4. The opcode arrays specify the instructions used for 227 /// load/store of D registers and Q registers. 228 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 229 unsigned NumVecs, const uint16_t *DOpcodes, 230 const uint16_t *QOpcodes); 231 232 /// Helper functions for setting up clusters of MVE predication operands. 233 template <typename SDValueVector> 234 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 235 SDValue PredicateMask); 236 template <typename SDValueVector> 237 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 238 SDValue PredicateMask, SDValue Inactive); 239 240 template <typename SDValueVector> 241 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc); 242 template <typename SDValueVector> 243 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy); 244 245 /// SelectMVE_WB - Select MVE writeback load/store intrinsics. 246 void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated); 247 248 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics. 249 void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate, 250 bool HasSaturationOperand); 251 252 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics. 253 void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 254 uint16_t OpcodeWithNoCarry, bool Add, bool Predicated); 255 256 /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between 257 /// vector lanes. 258 void SelectMVE_VSHLC(SDNode *N, bool Predicated); 259 260 /// Select long MVE vector reductions with two vector operands 261 /// Stride is the number of vector element widths the instruction can operate 262 /// on: 263 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32] 264 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32] 265 /// Stride is used when addressing the OpcodesS array which contains multiple 266 /// opcodes for each element width. 267 /// TySize is the index into the list of element types listed above 268 void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 269 const uint16_t *OpcodesS, const uint16_t *OpcodesU, 270 size_t Stride, size_t TySize); 271 272 /// Select a 64-bit MVE vector reduction with two vector operands 273 /// arm_mve_vmlldava_[predicated] 274 void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 275 const uint16_t *OpcodesU); 276 /// Select a 72-bit MVE vector rounding reduction with two vector operands 277 /// int_arm_mve_vrmlldavha[_predicated] 278 void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 279 const uint16_t *OpcodesU); 280 281 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs 282 /// should be 2 or 4. The opcode array specifies the instructions 283 /// used for 8, 16 and 32-bit lane sizes respectively, and each 284 /// pointer points to a set of NumVecs sub-opcodes used for the 285 /// different stages (e.g. VLD20 versus VLD21) of each load family. 286 void SelectMVE_VLD(SDNode *N, unsigned NumVecs, 287 const uint16_t *const *Opcodes, bool HasWriteback); 288 289 /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an 290 /// array of 3 elements for the 8, 16 and 32-bit lane sizes. 291 void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, 292 bool Wrapping, bool Predicated); 293 294 /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D, 295 /// CX1DA, CX2D, CX2DA, CX3, CX3DA). 296 /// \arg \c NumExtraOps number of extra operands besides the coprocossor, 297 /// the accumulator and the immediate operand, i.e. 0 298 /// for CX1*, 1 for CX2*, 2 for CX3* 299 /// \arg \c HasAccum whether the instruction has an accumulator operand 300 void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps, 301 bool HasAccum); 302 303 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs 304 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used 305 /// for loading D registers. 306 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating, 307 unsigned NumVecs, const uint16_t *DOpcodes, 308 const uint16_t *QOpcodes0 = nullptr, 309 const uint16_t *QOpcodes1 = nullptr); 310 311 /// Try to select SBFX/UBFX instructions for ARM. 312 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned); 313 314 bool tryInsertVectorElt(SDNode *N); 315 316 // Select special operations if node forms integer ABS pattern 317 bool tryABSOp(SDNode *N); 318 319 bool tryReadRegister(SDNode *N); 320 bool tryWriteRegister(SDNode *N); 321 322 bool tryInlineAsm(SDNode *N); 323 324 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI); 325 326 void SelectCMP_SWAP(SDNode *N); 327 328 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 329 /// inline asm expressions. 330 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 331 InlineAsm::ConstraintCode ConstraintID, 332 std::vector<SDValue> &OutOps) override; 333 334 // Form pairs of consecutive R, S, D, or Q registers. 335 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); 336 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1); 337 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1); 338 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1); 339 340 // Form sequences of 4 consecutive S, D, or Q registers. 341 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 342 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 343 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 344 345 // Get the alignment operand for a NEON VLD or VST instruction. 346 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs, 347 bool is64BitVector); 348 349 /// Checks if N is a multiplication by a constant where we can extract out a 350 /// power of two from the constant so that it can be used in a shift, but only 351 /// if it simplifies the materialization of the constant. Returns true if it 352 /// is, and assigns to PowerOfTwo the power of two that should be extracted 353 /// out and to NewMulConst the new constant to be multiplied by. 354 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift, 355 unsigned &PowerOfTwo, SDValue &NewMulConst) const; 356 357 /// Replace N with M in CurDAG, in a way that also ensures that M gets 358 /// selected when N would have been selected. 359 void replaceDAGValue(const SDValue &N, SDValue M); 360 }; 361 362 class ARMDAGToDAGISelLegacy : public SelectionDAGISelLegacy { 363 public: 364 static char ID; 365 ARMDAGToDAGISelLegacy(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel) 366 : SelectionDAGISelLegacy( 367 ID, std::make_unique<ARMDAGToDAGISel>(tm, OptLevel)) {} 368 }; 369 } 370 371 char ARMDAGToDAGISelLegacy::ID = 0; 372 373 INITIALIZE_PASS(ARMDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false) 374 375 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 376 /// operand. If so Imm will receive the 32-bit value. 377 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 378 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 379 Imm = N->getAsZExtVal(); 380 return true; 381 } 382 return false; 383 } 384 385 // isInt32Immediate - This method tests to see if a constant operand. 386 // If so Imm will receive the 32 bit value. 387 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 388 return isInt32Immediate(N.getNode(), Imm); 389 } 390 391 // isOpcWithIntImmediate - This method tests to see if the node is a specific 392 // opcode and that it has a immediate integer right operand. 393 // If so Imm will receive the 32 bit value. 394 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 395 return N->getOpcode() == Opc && 396 isInt32Immediate(N->getOperand(1).getNode(), Imm); 397 } 398 399 /// Check whether a particular node is a constant value representable as 400 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax). 401 /// 402 /// \param ScaledConstant [out] - On success, the pre-scaled constant value. 403 static bool isScaledConstantInRange(SDValue Node, int Scale, 404 int RangeMin, int RangeMax, 405 int &ScaledConstant) { 406 assert(Scale > 0 && "Invalid scale!"); 407 408 // Check that this is a constant. 409 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node); 410 if (!C) 411 return false; 412 413 ScaledConstant = (int) C->getZExtValue(); 414 if ((ScaledConstant % Scale) != 0) 415 return false; 416 417 ScaledConstant /= Scale; 418 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; 419 } 420 421 void ARMDAGToDAGISel::PreprocessISelDAG() { 422 if (!Subtarget->hasV6T2Ops()) 423 return; 424 425 bool isThumb2 = Subtarget->isThumb(); 426 // We use make_early_inc_range to avoid invalidation issues. 427 for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) { 428 if (N.getOpcode() != ISD::ADD) 429 continue; 430 431 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with 432 // leading zeros, followed by consecutive set bits, followed by 1 or 2 433 // trailing zeros, e.g. 1020. 434 // Transform the expression to 435 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number 436 // of trailing zeros of c2. The left shift would be folded as an shifter 437 // operand of 'add' and the 'and' and 'srl' would become a bits extraction 438 // node (UBFX). 439 440 SDValue N0 = N.getOperand(0); 441 SDValue N1 = N.getOperand(1); 442 unsigned And_imm = 0; 443 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { 444 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) 445 std::swap(N0, N1); 446 } 447 if (!And_imm) 448 continue; 449 450 // Check if the AND mask is an immediate of the form: 000.....1111111100 451 unsigned TZ = llvm::countr_zero(And_imm); 452 if (TZ != 1 && TZ != 2) 453 // Be conservative here. Shifter operands aren't always free. e.g. On 454 // Swift, left shifter operand of 1 / 2 for free but others are not. 455 // e.g. 456 // ubfx r3, r1, #16, #8 457 // ldr.w r3, [r0, r3, lsl #2] 458 // vs. 459 // mov.w r9, #1020 460 // and.w r2, r9, r1, lsr #14 461 // ldr r2, [r0, r2] 462 continue; 463 And_imm >>= TZ; 464 if (And_imm & (And_imm + 1)) 465 continue; 466 467 // Look for (and (srl X, c1), c2). 468 SDValue Srl = N1.getOperand(0); 469 unsigned Srl_imm = 0; 470 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || 471 (Srl_imm <= 2)) 472 continue; 473 474 // Make sure first operand is not a shifter operand which would prevent 475 // folding of the left shift. 476 SDValue CPTmp0; 477 SDValue CPTmp1; 478 SDValue CPTmp2; 479 if (isThumb2) { 480 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1)) 481 continue; 482 } else { 483 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || 484 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) 485 continue; 486 } 487 488 // Now make the transformation. 489 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32, 490 Srl.getOperand(0), 491 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl), 492 MVT::i32)); 493 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32, 494 Srl, 495 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32)); 496 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, 497 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32)); 498 CurDAG->UpdateNodeOperands(&N, N0, N1); 499 } 500 } 501 502 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS 503 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at 504 /// least on current ARM implementations) which should be avoidded. 505 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { 506 if (OptLevel == CodeGenOptLevel::None) 507 return true; 508 509 if (!Subtarget->hasVMLxHazards()) 510 return true; 511 512 if (!N->hasOneUse()) 513 return false; 514 515 SDNode *Use = *N->use_begin(); 516 if (Use->getOpcode() == ISD::CopyToReg) 517 return true; 518 if (Use->isMachineOpcode()) { 519 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( 520 CurDAG->getSubtarget().getInstrInfo()); 521 522 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); 523 if (MCID.mayStore()) 524 return true; 525 unsigned Opcode = MCID.getOpcode(); 526 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) 527 return true; 528 // vmlx feeding into another vmlx. We actually want to unfold 529 // the use later in the MLxExpansion pass. e.g. 530 // vmla 531 // vmla (stall 8 cycles) 532 // 533 // vmul (5 cycles) 534 // vadd (5 cycles) 535 // vmla 536 // This adds up to about 18 - 19 cycles. 537 // 538 // vmla 539 // vmul (stall 4 cycles) 540 // vadd adds up to about 14 cycles. 541 return TII->isFpMLxInstruction(Opcode); 542 } 543 544 return false; 545 } 546 547 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, 548 ARM_AM::ShiftOpc ShOpcVal, 549 unsigned ShAmt) { 550 if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) 551 return true; 552 if (Shift.hasOneUse()) 553 return true; 554 // R << 2 is free. 555 return ShOpcVal == ARM_AM::lsl && 556 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); 557 } 558 559 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, 560 unsigned MaxShift, 561 unsigned &PowerOfTwo, 562 SDValue &NewMulConst) const { 563 assert(N.getOpcode() == ISD::MUL); 564 assert(MaxShift > 0); 565 566 // If the multiply is used in more than one place then changing the constant 567 // will make other uses incorrect, so don't. 568 if (!N.hasOneUse()) return false; 569 // Check if the multiply is by a constant 570 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1)); 571 if (!MulConst) return false; 572 // If the constant is used in more than one place then modifying it will mean 573 // we need to materialize two constants instead of one, which is a bad idea. 574 if (!MulConst->hasOneUse()) return false; 575 unsigned MulConstVal = MulConst->getZExtValue(); 576 if (MulConstVal == 0) return false; 577 578 // Find the largest power of 2 that MulConstVal is a multiple of 579 PowerOfTwo = MaxShift; 580 while ((MulConstVal % (1 << PowerOfTwo)) != 0) { 581 --PowerOfTwo; 582 if (PowerOfTwo == 0) return false; 583 } 584 585 // Only optimise if the new cost is better 586 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo); 587 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32); 588 unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget); 589 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget); 590 return NewCost < OldCost; 591 } 592 593 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) { 594 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode()); 595 ReplaceUses(N, M); 596 } 597 598 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, 599 SDValue &BaseReg, 600 SDValue &Opc, 601 bool CheckProfitability) { 602 if (DisableShifterOp) 603 return false; 604 605 // If N is a multiply-by-constant and it's profitable to extract a shift and 606 // use it in a shifted operand do so. 607 if (N.getOpcode() == ISD::MUL) { 608 unsigned PowerOfTwo = 0; 609 SDValue NewMulConst; 610 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { 611 HandleSDNode Handle(N); 612 SDLoc Loc(N); 613 replaceDAGValue(N.getOperand(1), NewMulConst); 614 BaseReg = Handle.getValue(); 615 Opc = CurDAG->getTargetConstant( 616 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32); 617 return true; 618 } 619 } 620 621 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 622 623 // Don't match base register only case. That is matched to a separate 624 // lower complexity pattern with explicit register operand. 625 if (ShOpcVal == ARM_AM::no_shift) return false; 626 627 BaseReg = N.getOperand(0); 628 unsigned ShImmVal = 0; 629 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 630 if (!RHS) return false; 631 ShImmVal = RHS->getZExtValue() & 31; 632 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 633 SDLoc(N), MVT::i32); 634 return true; 635 } 636 637 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, 638 SDValue &BaseReg, 639 SDValue &ShReg, 640 SDValue &Opc, 641 bool CheckProfitability) { 642 if (DisableShifterOp) 643 return false; 644 645 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 646 647 // Don't match base register only case. That is matched to a separate 648 // lower complexity pattern with explicit register operand. 649 if (ShOpcVal == ARM_AM::no_shift) return false; 650 651 BaseReg = N.getOperand(0); 652 unsigned ShImmVal = 0; 653 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 654 if (RHS) return false; 655 656 ShReg = N.getOperand(1); 657 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) 658 return false; 659 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 660 SDLoc(N), MVT::i32); 661 return true; 662 } 663 664 // Determine whether an ISD::OR's operands are suitable to turn the operation 665 // into an addition, which often has more compact encodings. 666 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) { 667 assert(Parent->getOpcode() == ISD::OR && "unexpected parent"); 668 Out = N; 669 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1)); 670 } 671 672 673 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, 674 SDValue &Base, 675 SDValue &OffImm) { 676 // Match simple R + imm12 operands. 677 678 // Base only. 679 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 680 !CurDAG->isBaseWithConstantOffset(N)) { 681 if (N.getOpcode() == ISD::FrameIndex) { 682 // Match frame index. 683 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 684 Base = CurDAG->getTargetFrameIndex( 685 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 686 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 687 return true; 688 } 689 690 if (N.getOpcode() == ARMISD::Wrapper && 691 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 692 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 693 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 694 Base = N.getOperand(0); 695 } else 696 Base = N; 697 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 698 return true; 699 } 700 701 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 702 int RHSC = (int)RHS->getSExtValue(); 703 if (N.getOpcode() == ISD::SUB) 704 RHSC = -RHSC; 705 706 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits 707 Base = N.getOperand(0); 708 if (Base.getOpcode() == ISD::FrameIndex) { 709 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 710 Base = CurDAG->getTargetFrameIndex( 711 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 712 } 713 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32); 714 return true; 715 } 716 } 717 718 // Base only. 719 Base = N; 720 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 721 return true; 722 } 723 724 725 726 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, 727 SDValue &Opc) { 728 if (N.getOpcode() == ISD::MUL && 729 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { 730 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 731 // X * [3,5,9] -> X + X * [2,4,8] etc. 732 int RHSC = (int)RHS->getZExtValue(); 733 if (RHSC & 1) { 734 RHSC = RHSC & ~1; 735 ARM_AM::AddrOpc AddSub = ARM_AM::add; 736 if (RHSC < 0) { 737 AddSub = ARM_AM::sub; 738 RHSC = - RHSC; 739 } 740 if (isPowerOf2_32(RHSC)) { 741 unsigned ShAmt = Log2_32(RHSC); 742 Base = Offset = N.getOperand(0); 743 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 744 ARM_AM::lsl), 745 SDLoc(N), MVT::i32); 746 return true; 747 } 748 } 749 } 750 } 751 752 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 753 // ISD::OR that is equivalent to an ISD::ADD. 754 !CurDAG->isBaseWithConstantOffset(N)) 755 return false; 756 757 // Leave simple R +/- imm12 operands for LDRi12 758 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { 759 int RHSC; 760 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 761 -0x1000+1, 0x1000, RHSC)) // 12 bits. 762 return false; 763 } 764 765 // Otherwise this is R +/- [possibly shifted] R. 766 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; 767 ARM_AM::ShiftOpc ShOpcVal = 768 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 769 unsigned ShAmt = 0; 770 771 Base = N.getOperand(0); 772 Offset = N.getOperand(1); 773 774 if (ShOpcVal != ARM_AM::no_shift) { 775 // Check to see if the RHS of the shift is a constant, if not, we can't fold 776 // it. 777 if (ConstantSDNode *Sh = 778 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 779 ShAmt = Sh->getZExtValue(); 780 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 781 Offset = N.getOperand(1).getOperand(0); 782 else { 783 ShAmt = 0; 784 ShOpcVal = ARM_AM::no_shift; 785 } 786 } else { 787 ShOpcVal = ARM_AM::no_shift; 788 } 789 } 790 791 // Try matching (R shl C) + (R). 792 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 793 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 794 N.getOperand(0).hasOneUse())) { 795 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 796 if (ShOpcVal != ARM_AM::no_shift) { 797 // Check to see if the RHS of the shift is a constant, if not, we can't 798 // fold it. 799 if (ConstantSDNode *Sh = 800 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 801 ShAmt = Sh->getZExtValue(); 802 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 803 Offset = N.getOperand(0).getOperand(0); 804 Base = N.getOperand(1); 805 } else { 806 ShAmt = 0; 807 ShOpcVal = ARM_AM::no_shift; 808 } 809 } else { 810 ShOpcVal = ARM_AM::no_shift; 811 } 812 } 813 } 814 815 // If Offset is a multiply-by-constant and it's profitable to extract a shift 816 // and use it in a shifted operand do so. 817 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) { 818 unsigned PowerOfTwo = 0; 819 SDValue NewMulConst; 820 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { 821 HandleSDNode Handle(Offset); 822 replaceDAGValue(Offset.getOperand(1), NewMulConst); 823 Offset = Handle.getValue(); 824 ShAmt = PowerOfTwo; 825 ShOpcVal = ARM_AM::lsl; 826 } 827 } 828 829 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 830 SDLoc(N), MVT::i32); 831 return true; 832 } 833 834 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 835 SDValue &Offset, SDValue &Opc) { 836 unsigned Opcode = Op->getOpcode(); 837 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 838 ? cast<LoadSDNode>(Op)->getAddressingMode() 839 : cast<StoreSDNode>(Op)->getAddressingMode(); 840 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 841 ? ARM_AM::add : ARM_AM::sub; 842 int Val; 843 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) 844 return false; 845 846 Offset = N; 847 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 848 unsigned ShAmt = 0; 849 if (ShOpcVal != ARM_AM::no_shift) { 850 // Check to see if the RHS of the shift is a constant, if not, we can't fold 851 // it. 852 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 853 ShAmt = Sh->getZExtValue(); 854 if (isShifterOpProfitable(N, ShOpcVal, ShAmt)) 855 Offset = N.getOperand(0); 856 else { 857 ShAmt = 0; 858 ShOpcVal = ARM_AM::no_shift; 859 } 860 } else { 861 ShOpcVal = ARM_AM::no_shift; 862 } 863 } 864 865 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 866 SDLoc(N), MVT::i32); 867 return true; 868 } 869 870 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 871 SDValue &Offset, SDValue &Opc) { 872 unsigned Opcode = Op->getOpcode(); 873 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 874 ? cast<LoadSDNode>(Op)->getAddressingMode() 875 : cast<StoreSDNode>(Op)->getAddressingMode(); 876 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 877 ? ARM_AM::add : ARM_AM::sub; 878 int Val; 879 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 880 if (AddSub == ARM_AM::sub) Val *= -1; 881 Offset = CurDAG->getRegister(0, MVT::i32); 882 Opc = CurDAG->getSignedTargetConstant(Val, SDLoc(Op), MVT::i32); 883 return true; 884 } 885 886 return false; 887 } 888 889 890 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 891 SDValue &Offset, SDValue &Opc) { 892 unsigned Opcode = Op->getOpcode(); 893 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 894 ? cast<LoadSDNode>(Op)->getAddressingMode() 895 : cast<StoreSDNode>(Op)->getAddressingMode(); 896 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 897 ? ARM_AM::add : ARM_AM::sub; 898 int Val; 899 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 900 Offset = CurDAG->getRegister(0, MVT::i32); 901 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, 902 ARM_AM::no_shift), 903 SDLoc(Op), MVT::i32); 904 return true; 905 } 906 907 return false; 908 } 909 910 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) { 911 Base = N; 912 return true; 913 } 914 915 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, 916 SDValue &Base, SDValue &Offset, 917 SDValue &Opc) { 918 if (N.getOpcode() == ISD::SUB) { 919 // X - C is canonicalize to X + -C, no need to handle it here. 920 Base = N.getOperand(0); 921 Offset = N.getOperand(1); 922 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N), 923 MVT::i32); 924 return true; 925 } 926 927 if (!CurDAG->isBaseWithConstantOffset(N)) { 928 Base = N; 929 if (N.getOpcode() == ISD::FrameIndex) { 930 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 931 Base = CurDAG->getTargetFrameIndex( 932 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 933 } 934 Offset = CurDAG->getRegister(0, MVT::i32); 935 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 936 MVT::i32); 937 return true; 938 } 939 940 // If the RHS is +/- imm8, fold into addr mode. 941 int RHSC; 942 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 943 -256 + 1, 256, RHSC)) { // 8 bits. 944 Base = N.getOperand(0); 945 if (Base.getOpcode() == ISD::FrameIndex) { 946 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 947 Base = CurDAG->getTargetFrameIndex( 948 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 949 } 950 Offset = CurDAG->getRegister(0, MVT::i32); 951 952 ARM_AM::AddrOpc AddSub = ARM_AM::add; 953 if (RHSC < 0) { 954 AddSub = ARM_AM::sub; 955 RHSC = -RHSC; 956 } 957 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N), 958 MVT::i32); 959 return true; 960 } 961 962 Base = N.getOperand(0); 963 Offset = N.getOperand(1); 964 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 965 MVT::i32); 966 return true; 967 } 968 969 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, 970 SDValue &Offset, SDValue &Opc) { 971 unsigned Opcode = Op->getOpcode(); 972 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 973 ? cast<LoadSDNode>(Op)->getAddressingMode() 974 : cast<StoreSDNode>(Op)->getAddressingMode(); 975 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 976 ? ARM_AM::add : ARM_AM::sub; 977 int Val; 978 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits. 979 Offset = CurDAG->getRegister(0, MVT::i32); 980 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op), 981 MVT::i32); 982 return true; 983 } 984 985 Offset = N; 986 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op), 987 MVT::i32); 988 return true; 989 } 990 991 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, 992 bool FP16) { 993 if (!CurDAG->isBaseWithConstantOffset(N)) { 994 Base = N; 995 if (N.getOpcode() == ISD::FrameIndex) { 996 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 997 Base = CurDAG->getTargetFrameIndex( 998 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 999 } else if (N.getOpcode() == ARMISD::Wrapper && 1000 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1001 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1002 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1003 Base = N.getOperand(0); 1004 } 1005 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 1006 SDLoc(N), MVT::i32); 1007 return true; 1008 } 1009 1010 // If the RHS is +/- imm8, fold into addr mode. 1011 int RHSC; 1012 const int Scale = FP16 ? 2 : 4; 1013 1014 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) { 1015 Base = N.getOperand(0); 1016 if (Base.getOpcode() == ISD::FrameIndex) { 1017 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1018 Base = CurDAG->getTargetFrameIndex( 1019 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1020 } 1021 1022 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1023 if (RHSC < 0) { 1024 AddSub = ARM_AM::sub; 1025 RHSC = -RHSC; 1026 } 1027 1028 if (FP16) 1029 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC), 1030 SDLoc(N), MVT::i32); 1031 else 1032 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), 1033 SDLoc(N), MVT::i32); 1034 1035 return true; 1036 } 1037 1038 Base = N; 1039 1040 if (FP16) 1041 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0), 1042 SDLoc(N), MVT::i32); 1043 else 1044 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 1045 SDLoc(N), MVT::i32); 1046 1047 return true; 1048 } 1049 1050 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, 1051 SDValue &Base, SDValue &Offset) { 1052 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false); 1053 } 1054 1055 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N, 1056 SDValue &Base, SDValue &Offset) { 1057 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true); 1058 } 1059 1060 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, 1061 SDValue &Align) { 1062 Addr = N; 1063 1064 unsigned Alignment = 0; 1065 1066 MemSDNode *MemN = cast<MemSDNode>(Parent); 1067 1068 if (isa<LSBaseSDNode>(MemN) || 1069 ((MemN->getOpcode() == ARMISD::VST1_UPD || 1070 MemN->getOpcode() == ARMISD::VLD1_UPD) && 1071 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) { 1072 // This case occurs only for VLD1-lane/dup and VST1-lane instructions. 1073 // The maximum alignment is equal to the memory size being referenced. 1074 llvm::Align MMOAlign = MemN->getAlign(); 1075 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8; 1076 if (MMOAlign.value() >= MemSize && MemSize > 1) 1077 Alignment = MemSize; 1078 } else { 1079 // All other uses of addrmode6 are for intrinsics. For now just record 1080 // the raw alignment value; it will be refined later based on the legal 1081 // alignment operands for the intrinsic. 1082 Alignment = MemN->getAlign().value(); 1083 } 1084 1085 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32); 1086 return true; 1087 } 1088 1089 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, 1090 SDValue &Offset) { 1091 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op); 1092 ISD::MemIndexedMode AM = LdSt->getAddressingMode(); 1093 if (AM != ISD::POST_INC) 1094 return false; 1095 Offset = N; 1096 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) { 1097 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) 1098 Offset = CurDAG->getRegister(0, MVT::i32); 1099 } 1100 return true; 1101 } 1102 1103 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, 1104 SDValue &Offset, SDValue &Label) { 1105 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { 1106 Offset = N.getOperand(0); 1107 SDValue N1 = N.getOperand(1); 1108 Label = CurDAG->getTargetConstant(N1->getAsZExtVal(), SDLoc(N), MVT::i32); 1109 return true; 1110 } 1111 1112 return false; 1113 } 1114 1115 1116 //===----------------------------------------------------------------------===// 1117 // Thumb Addressing Modes 1118 //===----------------------------------------------------------------------===// 1119 1120 static bool shouldUseZeroOffsetLdSt(SDValue N) { 1121 // Negative numbers are difficult to materialise in thumb1. If we are 1122 // selecting the add of a negative, instead try to select ri with a zero 1123 // offset, so create the add node directly which will become a sub. 1124 if (N.getOpcode() != ISD::ADD) 1125 return false; 1126 1127 // Look for an imm which is not legal for ld/st, but is legal for sub. 1128 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) 1129 return C->getSExtValue() < 0 && C->getSExtValue() >= -255; 1130 1131 return false; 1132 } 1133 1134 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, 1135 SDValue &Offset) { 1136 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) { 1137 if (!isNullConstant(N)) 1138 return false; 1139 1140 Base = Offset = N; 1141 return true; 1142 } 1143 1144 Base = N.getOperand(0); 1145 Offset = N.getOperand(1); 1146 return true; 1147 } 1148 1149 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base, 1150 SDValue &Offset) { 1151 if (shouldUseZeroOffsetLdSt(N)) 1152 return false; // Select ri instead 1153 return SelectThumbAddrModeRRSext(N, Base, Offset); 1154 } 1155 1156 bool 1157 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, 1158 SDValue &Base, SDValue &OffImm) { 1159 if (shouldUseZeroOffsetLdSt(N)) { 1160 Base = N; 1161 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1162 return true; 1163 } 1164 1165 if (!CurDAG->isBaseWithConstantOffset(N)) { 1166 if (N.getOpcode() == ISD::ADD) { 1167 return false; // We want to select register offset instead 1168 } else if (N.getOpcode() == ARMISD::Wrapper && 1169 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1170 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1171 N.getOperand(0).getOpcode() != ISD::TargetConstantPool && 1172 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1173 Base = N.getOperand(0); 1174 } else { 1175 Base = N; 1176 } 1177 1178 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1179 return true; 1180 } 1181 1182 // If the RHS is + imm5 * scale, fold into addr mode. 1183 int RHSC; 1184 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { 1185 Base = N.getOperand(0); 1186 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32); 1187 return true; 1188 } 1189 1190 // Offset is too large, so use register offset instead. 1191 return false; 1192 } 1193 1194 bool 1195 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 1196 SDValue &OffImm) { 1197 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm); 1198 } 1199 1200 bool 1201 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 1202 SDValue &OffImm) { 1203 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm); 1204 } 1205 1206 bool 1207 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 1208 SDValue &OffImm) { 1209 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm); 1210 } 1211 1212 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, 1213 SDValue &Base, SDValue &OffImm) { 1214 if (N.getOpcode() == ISD::FrameIndex) { 1215 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1216 // Only multiples of 4 are allowed for the offset, so the frame object 1217 // alignment must be at least 4. 1218 MachineFrameInfo &MFI = MF->getFrameInfo(); 1219 if (MFI.getObjectAlign(FI) < Align(4)) 1220 MFI.setObjectAlignment(FI, Align(4)); 1221 Base = CurDAG->getTargetFrameIndex( 1222 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1223 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1224 return true; 1225 } 1226 1227 if (!CurDAG->isBaseWithConstantOffset(N)) 1228 return false; 1229 1230 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) { 1231 // If the RHS is + imm8 * scale, fold into addr mode. 1232 int RHSC; 1233 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) { 1234 Base = N.getOperand(0); 1235 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1236 // Make sure the offset is inside the object, or we might fail to 1237 // allocate an emergency spill slot. (An out-of-range access is UB, but 1238 // it could show up anyway.) 1239 MachineFrameInfo &MFI = MF->getFrameInfo(); 1240 if (RHSC * 4 < MFI.getObjectSize(FI)) { 1241 // For LHS+RHS to result in an offset that's a multiple of 4 the object 1242 // indexed by the LHS must be 4-byte aligned. 1243 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4)) 1244 MFI.setObjectAlignment(FI, Align(4)); 1245 if (MFI.getObjectAlign(FI) >= Align(4)) { 1246 Base = CurDAG->getTargetFrameIndex( 1247 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1248 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32); 1249 return true; 1250 } 1251 } 1252 } 1253 } 1254 1255 return false; 1256 } 1257 1258 template <unsigned Shift> 1259 bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base, 1260 SDValue &OffImm) { 1261 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1262 int RHSC; 1263 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1264 RHSC)) { 1265 Base = N.getOperand(0); 1266 if (N.getOpcode() == ISD::SUB) 1267 RHSC = -RHSC; 1268 OffImm = CurDAG->getSignedTargetConstant(RHSC * (1 << Shift), SDLoc(N), 1269 MVT::i32); 1270 return true; 1271 } 1272 } 1273 1274 // Base only. 1275 Base = N; 1276 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1277 return true; 1278 } 1279 1280 1281 //===----------------------------------------------------------------------===// 1282 // Thumb 2 Addressing Modes 1283 //===----------------------------------------------------------------------===// 1284 1285 1286 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, 1287 SDValue &Base, SDValue &OffImm) { 1288 // Match simple R + imm12 operands. 1289 1290 // Base only. 1291 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1292 !CurDAG->isBaseWithConstantOffset(N)) { 1293 if (N.getOpcode() == ISD::FrameIndex) { 1294 // Match frame index. 1295 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1296 Base = CurDAG->getTargetFrameIndex( 1297 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1298 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1299 return true; 1300 } 1301 1302 if (N.getOpcode() == ARMISD::Wrapper && 1303 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1304 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1305 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1306 Base = N.getOperand(0); 1307 if (Base.getOpcode() == ISD::TargetConstantPool) 1308 return false; // We want to select t2LDRpci instead. 1309 } else 1310 Base = N; 1311 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1312 return true; 1313 } 1314 1315 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1316 if (SelectT2AddrModeImm8(N, Base, OffImm)) 1317 // Let t2LDRi8 handle (R - imm8). 1318 return false; 1319 1320 int RHSC = (int)RHS->getZExtValue(); 1321 if (N.getOpcode() == ISD::SUB) 1322 RHSC = -RHSC; 1323 1324 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) 1325 Base = N.getOperand(0); 1326 if (Base.getOpcode() == ISD::FrameIndex) { 1327 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1328 Base = CurDAG->getTargetFrameIndex( 1329 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1330 } 1331 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32); 1332 return true; 1333 } 1334 } 1335 1336 // Base only. 1337 Base = N; 1338 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1339 return true; 1340 } 1341 1342 template <unsigned Shift> 1343 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base, 1344 SDValue &OffImm) { 1345 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1346 int RHSC; 1347 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) { 1348 Base = N.getOperand(0); 1349 if (Base.getOpcode() == ISD::FrameIndex) { 1350 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1351 Base = CurDAG->getTargetFrameIndex( 1352 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1353 } 1354 1355 if (N.getOpcode() == ISD::SUB) 1356 RHSC = -RHSC; 1357 OffImm = CurDAG->getSignedTargetConstant(RHSC * (1 << Shift), SDLoc(N), 1358 MVT::i32); 1359 return true; 1360 } 1361 } 1362 1363 // Base only. 1364 Base = N; 1365 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1366 return true; 1367 } 1368 1369 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, 1370 SDValue &Base, SDValue &OffImm) { 1371 // Match simple R - imm8 operands. 1372 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1373 !CurDAG->isBaseWithConstantOffset(N)) 1374 return false; 1375 1376 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1377 int RHSC = (int)RHS->getSExtValue(); 1378 if (N.getOpcode() == ISD::SUB) 1379 RHSC = -RHSC; 1380 1381 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative) 1382 Base = N.getOperand(0); 1383 if (Base.getOpcode() == ISD::FrameIndex) { 1384 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1385 Base = CurDAG->getTargetFrameIndex( 1386 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1387 } 1388 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32); 1389 return true; 1390 } 1391 } 1392 1393 return false; 1394 } 1395 1396 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 1397 SDValue &OffImm){ 1398 unsigned Opcode = Op->getOpcode(); 1399 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1400 ? cast<LoadSDNode>(Op)->getAddressingMode() 1401 : cast<StoreSDNode>(Op)->getAddressingMode(); 1402 int RHSC; 1403 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits. 1404 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1405 ? CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32) 1406 : CurDAG->getSignedTargetConstant(-RHSC, SDLoc(N), MVT::i32); 1407 return true; 1408 } 1409 1410 return false; 1411 } 1412 1413 template <unsigned Shift> 1414 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base, 1415 SDValue &OffImm) { 1416 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1417 int RHSC; 1418 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1419 RHSC)) { 1420 Base = N.getOperand(0); 1421 if (Base.getOpcode() == ISD::FrameIndex) { 1422 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1423 Base = CurDAG->getTargetFrameIndex( 1424 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1425 } 1426 1427 if (N.getOpcode() == ISD::SUB) 1428 RHSC = -RHSC; 1429 OffImm = CurDAG->getSignedTargetConstant(RHSC * (1 << Shift), SDLoc(N), 1430 MVT::i32); 1431 return true; 1432 } 1433 } 1434 1435 // Base only. 1436 Base = N; 1437 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1438 return true; 1439 } 1440 1441 template <unsigned Shift> 1442 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1443 SDValue &OffImm) { 1444 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift); 1445 } 1446 1447 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1448 SDValue &OffImm, 1449 unsigned Shift) { 1450 unsigned Opcode = Op->getOpcode(); 1451 ISD::MemIndexedMode AM; 1452 switch (Opcode) { 1453 case ISD::LOAD: 1454 AM = cast<LoadSDNode>(Op)->getAddressingMode(); 1455 break; 1456 case ISD::STORE: 1457 AM = cast<StoreSDNode>(Op)->getAddressingMode(); 1458 break; 1459 case ISD::MLOAD: 1460 AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode(); 1461 break; 1462 case ISD::MSTORE: 1463 AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode(); 1464 break; 1465 default: 1466 llvm_unreachable("Unexpected Opcode for Imm7Offset"); 1467 } 1468 1469 int RHSC; 1470 // 7 bit constant, shifted by Shift. 1471 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { 1472 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1473 ? CurDAG->getSignedTargetConstant(RHSC * (1 << Shift), 1474 SDLoc(N), MVT::i32) 1475 : CurDAG->getSignedTargetConstant(-RHSC * (1 << Shift), 1476 SDLoc(N), MVT::i32); 1477 return true; 1478 } 1479 return false; 1480 } 1481 1482 template <int Min, int Max> 1483 bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) { 1484 int Val; 1485 if (isScaledConstantInRange(N, 1, Min, Max, Val)) { 1486 OffImm = CurDAG->getSignedTargetConstant(Val, SDLoc(N), MVT::i32); 1487 return true; 1488 } 1489 return false; 1490 } 1491 1492 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, 1493 SDValue &Base, 1494 SDValue &OffReg, SDValue &ShImm) { 1495 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. 1496 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) 1497 return false; 1498 1499 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8. 1500 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1501 int RHSC = (int)RHS->getZExtValue(); 1502 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned) 1503 return false; 1504 else if (RHSC < 0 && RHSC >= -255) // 8 bits 1505 return false; 1506 } 1507 1508 // Look for (R + R) or (R + (R << [1,2,3])). 1509 unsigned ShAmt = 0; 1510 Base = N.getOperand(0); 1511 OffReg = N.getOperand(1); 1512 1513 // Swap if it is ((R << c) + R). 1514 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode()); 1515 if (ShOpcVal != ARM_AM::lsl) { 1516 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode()); 1517 if (ShOpcVal == ARM_AM::lsl) 1518 std::swap(Base, OffReg); 1519 } 1520 1521 if (ShOpcVal == ARM_AM::lsl) { 1522 // Check to see if the RHS of the shift is a constant, if not, we can't fold 1523 // it. 1524 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) { 1525 ShAmt = Sh->getZExtValue(); 1526 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt)) 1527 OffReg = OffReg.getOperand(0); 1528 else { 1529 ShAmt = 0; 1530 } 1531 } 1532 } 1533 1534 // If OffReg is a multiply-by-constant and it's profitable to extract a shift 1535 // and use it in a shifted operand do so. 1536 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) { 1537 unsigned PowerOfTwo = 0; 1538 SDValue NewMulConst; 1539 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { 1540 HandleSDNode Handle(OffReg); 1541 replaceDAGValue(OffReg.getOperand(1), NewMulConst); 1542 OffReg = Handle.getValue(); 1543 ShAmt = PowerOfTwo; 1544 } 1545 } 1546 1547 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32); 1548 1549 return true; 1550 } 1551 1552 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, 1553 SDValue &OffImm) { 1554 // This *must* succeed since it's used for the irreplaceable ldrex and strex 1555 // instructions. 1556 Base = N; 1557 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1558 1559 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N)) 1560 return true; 1561 1562 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1563 if (!RHS) 1564 return true; 1565 1566 uint32_t RHSC = (int)RHS->getZExtValue(); 1567 if (RHSC > 1020 || RHSC % 4 != 0) 1568 return true; 1569 1570 Base = N.getOperand(0); 1571 if (Base.getOpcode() == ISD::FrameIndex) { 1572 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1573 Base = CurDAG->getTargetFrameIndex( 1574 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1575 } 1576 1577 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32); 1578 return true; 1579 } 1580 1581 //===--------------------------------------------------------------------===// 1582 1583 /// getAL - Returns a ARMCC::AL immediate node. 1584 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) { 1585 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32); 1586 } 1587 1588 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { 1589 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 1590 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); 1591 } 1592 1593 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) { 1594 LoadSDNode *LD = cast<LoadSDNode>(N); 1595 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1596 if (AM == ISD::UNINDEXED) 1597 return false; 1598 1599 EVT LoadedVT = LD->getMemoryVT(); 1600 SDValue Offset, AMOpc; 1601 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1602 unsigned Opcode = 0; 1603 bool Match = false; 1604 if (LoadedVT == MVT::i32 && isPre && 1605 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1606 Opcode = ARM::LDR_PRE_IMM; 1607 Match = true; 1608 } else if (LoadedVT == MVT::i32 && !isPre && 1609 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1610 Opcode = ARM::LDR_POST_IMM; 1611 Match = true; 1612 } else if (LoadedVT == MVT::i32 && 1613 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1614 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG; 1615 Match = true; 1616 1617 } else if (LoadedVT == MVT::i16 && 1618 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1619 Match = true; 1620 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) 1621 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) 1622 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); 1623 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { 1624 if (LD->getExtensionType() == ISD::SEXTLOAD) { 1625 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1626 Match = true; 1627 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; 1628 } 1629 } else { 1630 if (isPre && 1631 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1632 Match = true; 1633 Opcode = ARM::LDRB_PRE_IMM; 1634 } else if (!isPre && 1635 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1636 Match = true; 1637 Opcode = ARM::LDRB_POST_IMM; 1638 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1639 Match = true; 1640 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG; 1641 } 1642 } 1643 } 1644 1645 if (Match) { 1646 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) { 1647 SDValue Chain = LD->getChain(); 1648 SDValue Base = LD->getBasePtr(); 1649 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)), 1650 CurDAG->getRegister(0, MVT::i32), Chain }; 1651 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1652 MVT::Other, Ops); 1653 transferMemOperands(N, New); 1654 ReplaceNode(N, New); 1655 return true; 1656 } else { 1657 SDValue Chain = LD->getChain(); 1658 SDValue Base = LD->getBasePtr(); 1659 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)), 1660 CurDAG->getRegister(0, MVT::i32), Chain }; 1661 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1662 MVT::Other, Ops); 1663 transferMemOperands(N, New); 1664 ReplaceNode(N, New); 1665 return true; 1666 } 1667 } 1668 1669 return false; 1670 } 1671 1672 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) { 1673 LoadSDNode *LD = cast<LoadSDNode>(N); 1674 EVT LoadedVT = LD->getMemoryVT(); 1675 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1676 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD || 1677 LoadedVT.getSimpleVT().SimpleTy != MVT::i32) 1678 return false; 1679 1680 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset()); 1681 if (!COffs || COffs->getZExtValue() != 4) 1682 return false; 1683 1684 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}. 1685 // The encoding of LDM is not how the rest of ISel expects a post-inc load to 1686 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after 1687 // ISel. 1688 SDValue Chain = LD->getChain(); 1689 SDValue Base = LD->getBasePtr(); 1690 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)), 1691 CurDAG->getRegister(0, MVT::i32), Chain }; 1692 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, 1693 MVT::i32, MVT::Other, Ops); 1694 transferMemOperands(N, New); 1695 ReplaceNode(N, New); 1696 return true; 1697 } 1698 1699 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { 1700 LoadSDNode *LD = cast<LoadSDNode>(N); 1701 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1702 if (AM == ISD::UNINDEXED) 1703 return false; 1704 1705 EVT LoadedVT = LD->getMemoryVT(); 1706 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1707 SDValue Offset; 1708 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1709 unsigned Opcode = 0; 1710 bool Match = false; 1711 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) { 1712 switch (LoadedVT.getSimpleVT().SimpleTy) { 1713 case MVT::i32: 1714 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; 1715 break; 1716 case MVT::i16: 1717 if (isSExtLd) 1718 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST; 1719 else 1720 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST; 1721 break; 1722 case MVT::i8: 1723 case MVT::i1: 1724 if (isSExtLd) 1725 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST; 1726 else 1727 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; 1728 break; 1729 default: 1730 return false; 1731 } 1732 Match = true; 1733 } 1734 1735 if (Match) { 1736 SDValue Chain = LD->getChain(); 1737 SDValue Base = LD->getBasePtr(); 1738 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)), 1739 CurDAG->getRegister(0, MVT::i32), Chain }; 1740 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1741 MVT::Other, Ops); 1742 transferMemOperands(N, New); 1743 ReplaceNode(N, New); 1744 return true; 1745 } 1746 1747 return false; 1748 } 1749 1750 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) { 1751 EVT LoadedVT; 1752 unsigned Opcode = 0; 1753 bool isSExtLd, isPre; 1754 Align Alignment; 1755 ARMVCC::VPTCodes Pred; 1756 SDValue PredReg; 1757 SDValue Chain, Base, Offset; 1758 1759 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1760 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1761 if (AM == ISD::UNINDEXED) 1762 return false; 1763 LoadedVT = LD->getMemoryVT(); 1764 if (!LoadedVT.isVector()) 1765 return false; 1766 1767 Chain = LD->getChain(); 1768 Base = LD->getBasePtr(); 1769 Offset = LD->getOffset(); 1770 Alignment = LD->getAlign(); 1771 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1772 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1773 Pred = ARMVCC::None; 1774 PredReg = CurDAG->getRegister(0, MVT::i32); 1775 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) { 1776 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1777 if (AM == ISD::UNINDEXED) 1778 return false; 1779 LoadedVT = LD->getMemoryVT(); 1780 if (!LoadedVT.isVector()) 1781 return false; 1782 1783 Chain = LD->getChain(); 1784 Base = LD->getBasePtr(); 1785 Offset = LD->getOffset(); 1786 Alignment = LD->getAlign(); 1787 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1788 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1789 Pred = ARMVCC::Then; 1790 PredReg = LD->getMask(); 1791 } else 1792 llvm_unreachable("Expected a Load or a Masked Load!"); 1793 1794 // We allow LE non-masked loads to change the type (for example use a vldrb.8 1795 // as opposed to a vldrw.32). This can allow extra addressing modes or 1796 // alignments for what is otherwise an equivalent instruction. 1797 bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N); 1798 1799 SDValue NewOffset; 1800 if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 && 1801 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) { 1802 if (isSExtLd) 1803 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post; 1804 else 1805 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post; 1806 } else if (LoadedVT == MVT::v8i8 && 1807 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1808 if (isSExtLd) 1809 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post; 1810 else 1811 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post; 1812 } else if (LoadedVT == MVT::v4i8 && 1813 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1814 if (isSExtLd) 1815 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post; 1816 else 1817 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post; 1818 } else if (Alignment >= Align(4) && 1819 (CanChangeType || LoadedVT == MVT::v4i32 || 1820 LoadedVT == MVT::v4f32) && 1821 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2)) 1822 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post; 1823 else if (Alignment >= Align(2) && 1824 (CanChangeType || LoadedVT == MVT::v8i16 || 1825 LoadedVT == MVT::v8f16) && 1826 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) 1827 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post; 1828 else if ((CanChangeType || LoadedVT == MVT::v16i8) && 1829 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) 1830 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post; 1831 else 1832 return false; 1833 1834 SDValue Ops[] = {Base, 1835 NewOffset, 1836 CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), 1837 PredReg, 1838 CurDAG->getRegister(0, MVT::i32), // tp_reg 1839 Chain}; 1840 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, 1841 N->getValueType(0), MVT::Other, Ops); 1842 transferMemOperands(N, New); 1843 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 1844 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 1845 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 1846 CurDAG->RemoveDeadNode(N); 1847 return true; 1848 } 1849 1850 /// Form a GPRPair pseudo register from a pair of GPR regs. 1851 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { 1852 SDLoc dl(V0.getNode()); 1853 SDValue RegClass = 1854 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); 1855 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 1856 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 1857 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1858 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1859 } 1860 1861 /// Form a D register from a pair of S registers. 1862 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1863 SDLoc dl(V0.getNode()); 1864 SDValue RegClass = 1865 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32); 1866 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1867 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1868 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1869 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1870 } 1871 1872 /// Form a quad register from a pair of D registers. 1873 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1874 SDLoc dl(V0.getNode()); 1875 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl, 1876 MVT::i32); 1877 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1878 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1879 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1880 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1881 } 1882 1883 /// Form 4 consecutive D registers from a pair of Q registers. 1884 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1885 SDLoc dl(V0.getNode()); 1886 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1887 MVT::i32); 1888 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1889 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1890 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1891 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1892 } 1893 1894 /// Form 4 consecutive S registers. 1895 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, 1896 SDValue V2, SDValue V3) { 1897 SDLoc dl(V0.getNode()); 1898 SDValue RegClass = 1899 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32); 1900 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1901 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1902 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32); 1903 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32); 1904 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1905 V2, SubReg2, V3, SubReg3 }; 1906 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1907 } 1908 1909 /// Form 4 consecutive D registers. 1910 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, 1911 SDValue V2, SDValue V3) { 1912 SDLoc dl(V0.getNode()); 1913 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1914 MVT::i32); 1915 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1916 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1917 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32); 1918 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32); 1919 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1920 V2, SubReg2, V3, SubReg3 }; 1921 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1922 } 1923 1924 /// Form 4 consecutive Q registers. 1925 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, 1926 SDValue V2, SDValue V3) { 1927 SDLoc dl(V0.getNode()); 1928 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl, 1929 MVT::i32); 1930 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1931 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1932 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32); 1933 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32); 1934 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1935 V2, SubReg2, V3, SubReg3 }; 1936 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1937 } 1938 1939 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand 1940 /// of a NEON VLD or VST instruction. The supported values depend on the 1941 /// number of registers being loaded. 1942 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl, 1943 unsigned NumVecs, bool is64BitVector) { 1944 unsigned NumRegs = NumVecs; 1945 if (!is64BitVector && NumVecs < 3) 1946 NumRegs *= 2; 1947 1948 unsigned Alignment = Align->getAsZExtVal(); 1949 if (Alignment >= 32 && NumRegs == 4) 1950 Alignment = 32; 1951 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4)) 1952 Alignment = 16; 1953 else if (Alignment >= 8) 1954 Alignment = 8; 1955 else 1956 Alignment = 0; 1957 1958 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 1959 } 1960 1961 static bool isVLDfixed(unsigned Opc) 1962 { 1963 switch (Opc) { 1964 default: return false; 1965 case ARM::VLD1d8wb_fixed : return true; 1966 case ARM::VLD1d16wb_fixed : return true; 1967 case ARM::VLD1d64Qwb_fixed : return true; 1968 case ARM::VLD1d32wb_fixed : return true; 1969 case ARM::VLD1d64wb_fixed : return true; 1970 case ARM::VLD1d8TPseudoWB_fixed : return true; 1971 case ARM::VLD1d16TPseudoWB_fixed : return true; 1972 case ARM::VLD1d32TPseudoWB_fixed : return true; 1973 case ARM::VLD1d64TPseudoWB_fixed : return true; 1974 case ARM::VLD1d8QPseudoWB_fixed : return true; 1975 case ARM::VLD1d16QPseudoWB_fixed : return true; 1976 case ARM::VLD1d32QPseudoWB_fixed : return true; 1977 case ARM::VLD1d64QPseudoWB_fixed : return true; 1978 case ARM::VLD1q8wb_fixed : return true; 1979 case ARM::VLD1q16wb_fixed : return true; 1980 case ARM::VLD1q32wb_fixed : return true; 1981 case ARM::VLD1q64wb_fixed : return true; 1982 case ARM::VLD1DUPd8wb_fixed : return true; 1983 case ARM::VLD1DUPd16wb_fixed : return true; 1984 case ARM::VLD1DUPd32wb_fixed : return true; 1985 case ARM::VLD1DUPq8wb_fixed : return true; 1986 case ARM::VLD1DUPq16wb_fixed : return true; 1987 case ARM::VLD1DUPq32wb_fixed : return true; 1988 case ARM::VLD2d8wb_fixed : return true; 1989 case ARM::VLD2d16wb_fixed : return true; 1990 case ARM::VLD2d32wb_fixed : return true; 1991 case ARM::VLD2q8PseudoWB_fixed : return true; 1992 case ARM::VLD2q16PseudoWB_fixed : return true; 1993 case ARM::VLD2q32PseudoWB_fixed : return true; 1994 case ARM::VLD2DUPd8wb_fixed : return true; 1995 case ARM::VLD2DUPd16wb_fixed : return true; 1996 case ARM::VLD2DUPd32wb_fixed : return true; 1997 case ARM::VLD2DUPq8OddPseudoWB_fixed: return true; 1998 case ARM::VLD2DUPq16OddPseudoWB_fixed: return true; 1999 case ARM::VLD2DUPq32OddPseudoWB_fixed: return true; 2000 } 2001 } 2002 2003 static bool isVSTfixed(unsigned Opc) 2004 { 2005 switch (Opc) { 2006 default: return false; 2007 case ARM::VST1d8wb_fixed : return true; 2008 case ARM::VST1d16wb_fixed : return true; 2009 case ARM::VST1d32wb_fixed : return true; 2010 case ARM::VST1d64wb_fixed : return true; 2011 case ARM::VST1q8wb_fixed : return true; 2012 case ARM::VST1q16wb_fixed : return true; 2013 case ARM::VST1q32wb_fixed : return true; 2014 case ARM::VST1q64wb_fixed : return true; 2015 case ARM::VST1d8TPseudoWB_fixed : return true; 2016 case ARM::VST1d16TPseudoWB_fixed : return true; 2017 case ARM::VST1d32TPseudoWB_fixed : return true; 2018 case ARM::VST1d64TPseudoWB_fixed : return true; 2019 case ARM::VST1d8QPseudoWB_fixed : return true; 2020 case ARM::VST1d16QPseudoWB_fixed : return true; 2021 case ARM::VST1d32QPseudoWB_fixed : return true; 2022 case ARM::VST1d64QPseudoWB_fixed : return true; 2023 case ARM::VST2d8wb_fixed : return true; 2024 case ARM::VST2d16wb_fixed : return true; 2025 case ARM::VST2d32wb_fixed : return true; 2026 case ARM::VST2q8PseudoWB_fixed : return true; 2027 case ARM::VST2q16PseudoWB_fixed : return true; 2028 case ARM::VST2q32PseudoWB_fixed : return true; 2029 } 2030 } 2031 2032 // Get the register stride update opcode of a VLD/VST instruction that 2033 // is otherwise equivalent to the given fixed stride updating instruction. 2034 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { 2035 assert((isVLDfixed(Opc) || isVSTfixed(Opc)) 2036 && "Incorrect fixed stride updating instruction."); 2037 switch (Opc) { 2038 default: break; 2039 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; 2040 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register; 2041 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register; 2042 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register; 2043 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register; 2044 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; 2045 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; 2046 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; 2047 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register; 2048 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; 2049 case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register; 2050 case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register; 2051 case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register; 2052 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; 2053 case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register; 2054 case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register; 2055 case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register; 2056 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register; 2057 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register; 2058 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register; 2059 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register; 2060 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register; 2061 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register; 2062 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register; 2063 case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register; 2064 case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register; 2065 case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register; 2066 2067 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; 2068 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; 2069 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register; 2070 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register; 2071 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register; 2072 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; 2073 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; 2074 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; 2075 case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register; 2076 case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register; 2077 case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register; 2078 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; 2079 case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register; 2080 case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register; 2081 case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register; 2082 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; 2083 2084 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; 2085 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; 2086 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; 2087 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; 2088 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; 2089 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; 2090 2091 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; 2092 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; 2093 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; 2094 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; 2095 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; 2096 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; 2097 2098 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register; 2099 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register; 2100 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register; 2101 } 2102 return Opc; // If not one we handle, return it unchanged. 2103 } 2104 2105 /// Returns true if the given increment is a Constant known to be equal to the 2106 /// access size performed by a NEON load/store. This means the "[rN]!" form can 2107 /// be used. 2108 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) { 2109 auto C = dyn_cast<ConstantSDNode>(Inc); 2110 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs; 2111 } 2112 2113 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 2114 const uint16_t *DOpcodes, 2115 const uint16_t *QOpcodes0, 2116 const uint16_t *QOpcodes1) { 2117 assert(Subtarget->hasNEON()); 2118 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); 2119 SDLoc dl(N); 2120 2121 SDValue MemAddr, Align; 2122 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2123 // nodes are not intrinsics. 2124 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2125 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2126 return; 2127 2128 SDValue Chain = N->getOperand(0); 2129 EVT VT = N->getValueType(0); 2130 bool is64BitVector = VT.is64BitVector(); 2131 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2132 2133 unsigned OpcodeIndex; 2134 switch (VT.getSimpleVT().SimpleTy) { 2135 default: llvm_unreachable("unhandled vld type"); 2136 // Double-register operations: 2137 case MVT::v8i8: OpcodeIndex = 0; break; 2138 case MVT::v4f16: 2139 case MVT::v4bf16: 2140 case MVT::v4i16: OpcodeIndex = 1; break; 2141 case MVT::v2f32: 2142 case MVT::v2i32: OpcodeIndex = 2; break; 2143 case MVT::v1i64: OpcodeIndex = 3; break; 2144 // Quad-register operations: 2145 case MVT::v16i8: OpcodeIndex = 0; break; 2146 case MVT::v8f16: 2147 case MVT::v8bf16: 2148 case MVT::v8i16: OpcodeIndex = 1; break; 2149 case MVT::v4f32: 2150 case MVT::v4i32: OpcodeIndex = 2; break; 2151 case MVT::v2f64: 2152 case MVT::v2i64: OpcodeIndex = 3; break; 2153 } 2154 2155 EVT ResTy; 2156 if (NumVecs == 1) 2157 ResTy = VT; 2158 else { 2159 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2160 if (!is64BitVector) 2161 ResTyElts *= 2; 2162 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 2163 } 2164 std::vector<EVT> ResTys; 2165 ResTys.push_back(ResTy); 2166 if (isUpdating) 2167 ResTys.push_back(MVT::i32); 2168 ResTys.push_back(MVT::Other); 2169 2170 SDValue Pred = getAL(CurDAG, dl); 2171 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2172 SDNode *VLd; 2173 SmallVector<SDValue, 7> Ops; 2174 2175 // Double registers and VLD1/VLD2 quad registers are directly supported. 2176 if (is64BitVector || NumVecs <= 2) { 2177 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2178 QOpcodes0[OpcodeIndex]); 2179 Ops.push_back(MemAddr); 2180 Ops.push_back(Align); 2181 if (isUpdating) { 2182 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2183 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2184 if (!IsImmUpdate) { 2185 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so 2186 // check for the opcode rather than the number of vector elements. 2187 if (isVLDfixed(Opc)) 2188 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2189 Ops.push_back(Inc); 2190 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in 2191 // the operands if not such an opcode. 2192 } else if (!isVLDfixed(Opc)) 2193 Ops.push_back(Reg0); 2194 } 2195 Ops.push_back(Pred); 2196 Ops.push_back(Reg0); 2197 Ops.push_back(Chain); 2198 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2199 2200 } else { 2201 // Otherwise, quad registers are loaded with two separate instructions, 2202 // where one loads the even registers and the other loads the odd registers. 2203 EVT AddrTy = MemAddr.getValueType(); 2204 2205 // Load the even subregs. This is always an updating load, so that it 2206 // provides the address to the second load for the odd subregs. 2207 SDValue ImplDef = 2208 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 2209 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; 2210 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2211 ResTy, AddrTy, MVT::Other, OpsA); 2212 Chain = SDValue(VLdA, 2); 2213 2214 // Load the odd subregs. 2215 Ops.push_back(SDValue(VLdA, 1)); 2216 Ops.push_back(Align); 2217 if (isUpdating) { 2218 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2219 assert(isa<ConstantSDNode>(Inc.getNode()) && 2220 "only constant post-increment update allowed for VLD3/4"); 2221 (void)Inc; 2222 Ops.push_back(Reg0); 2223 } 2224 Ops.push_back(SDValue(VLdA, 0)); 2225 Ops.push_back(Pred); 2226 Ops.push_back(Reg0); 2227 Ops.push_back(Chain); 2228 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops); 2229 } 2230 2231 // Transfer memoperands. 2232 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2233 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp}); 2234 2235 if (NumVecs == 1) { 2236 ReplaceNode(N, VLd); 2237 return; 2238 } 2239 2240 // Extract out the subregisters. 2241 SDValue SuperReg = SDValue(VLd, 0); 2242 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2243 ARM::qsub_3 == ARM::qsub_0 + 3, 2244 "Unexpected subreg numbering"); 2245 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0); 2246 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2247 ReplaceUses(SDValue(N, Vec), 2248 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2249 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); 2250 if (isUpdating) 2251 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); 2252 CurDAG->RemoveDeadNode(N); 2253 } 2254 2255 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 2256 const uint16_t *DOpcodes, 2257 const uint16_t *QOpcodes0, 2258 const uint16_t *QOpcodes1) { 2259 assert(Subtarget->hasNEON()); 2260 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); 2261 SDLoc dl(N); 2262 2263 SDValue MemAddr, Align; 2264 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2265 // nodes are not intrinsics. 2266 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2267 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2268 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2269 return; 2270 2271 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2272 2273 SDValue Chain = N->getOperand(0); 2274 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2275 bool is64BitVector = VT.is64BitVector(); 2276 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2277 2278 unsigned OpcodeIndex; 2279 switch (VT.getSimpleVT().SimpleTy) { 2280 default: llvm_unreachable("unhandled vst type"); 2281 // Double-register operations: 2282 case MVT::v8i8: OpcodeIndex = 0; break; 2283 case MVT::v4f16: 2284 case MVT::v4bf16: 2285 case MVT::v4i16: OpcodeIndex = 1; break; 2286 case MVT::v2f32: 2287 case MVT::v2i32: OpcodeIndex = 2; break; 2288 case MVT::v1i64: OpcodeIndex = 3; break; 2289 // Quad-register operations: 2290 case MVT::v16i8: OpcodeIndex = 0; break; 2291 case MVT::v8f16: 2292 case MVT::v8bf16: 2293 case MVT::v8i16: OpcodeIndex = 1; break; 2294 case MVT::v4f32: 2295 case MVT::v4i32: OpcodeIndex = 2; break; 2296 case MVT::v2f64: 2297 case MVT::v2i64: OpcodeIndex = 3; break; 2298 } 2299 2300 std::vector<EVT> ResTys; 2301 if (isUpdating) 2302 ResTys.push_back(MVT::i32); 2303 ResTys.push_back(MVT::Other); 2304 2305 SDValue Pred = getAL(CurDAG, dl); 2306 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2307 SmallVector<SDValue, 7> Ops; 2308 2309 // Double registers and VST1/VST2 quad registers are directly supported. 2310 if (is64BitVector || NumVecs <= 2) { 2311 SDValue SrcReg; 2312 if (NumVecs == 1) { 2313 SrcReg = N->getOperand(Vec0Idx); 2314 } else if (is64BitVector) { 2315 // Form a REG_SEQUENCE to force register allocation. 2316 SDValue V0 = N->getOperand(Vec0Idx + 0); 2317 SDValue V1 = N->getOperand(Vec0Idx + 1); 2318 if (NumVecs == 2) 2319 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2320 else { 2321 SDValue V2 = N->getOperand(Vec0Idx + 2); 2322 // If it's a vst3, form a quad D-register and leave the last part as 2323 // an undef. 2324 SDValue V3 = (NumVecs == 3) 2325 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) 2326 : N->getOperand(Vec0Idx + 3); 2327 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2328 } 2329 } else { 2330 // Form a QQ register. 2331 SDValue Q0 = N->getOperand(Vec0Idx); 2332 SDValue Q1 = N->getOperand(Vec0Idx + 1); 2333 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0); 2334 } 2335 2336 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2337 QOpcodes0[OpcodeIndex]); 2338 Ops.push_back(MemAddr); 2339 Ops.push_back(Align); 2340 if (isUpdating) { 2341 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2342 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2343 if (!IsImmUpdate) { 2344 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so 2345 // check for the opcode rather than the number of vector elements. 2346 if (isVSTfixed(Opc)) 2347 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2348 Ops.push_back(Inc); 2349 } 2350 // VST1/VST2 fixed increment does not need Reg0 so only include it in 2351 // the operands if not such an opcode. 2352 else if (!isVSTfixed(Opc)) 2353 Ops.push_back(Reg0); 2354 } 2355 Ops.push_back(SrcReg); 2356 Ops.push_back(Pred); 2357 Ops.push_back(Reg0); 2358 Ops.push_back(Chain); 2359 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2360 2361 // Transfer memoperands. 2362 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp}); 2363 2364 ReplaceNode(N, VSt); 2365 return; 2366 } 2367 2368 // Otherwise, quad registers are stored with two separate instructions, 2369 // where one stores the even registers and the other stores the odd registers. 2370 2371 // Form the QQQQ REG_SEQUENCE. 2372 SDValue V0 = N->getOperand(Vec0Idx + 0); 2373 SDValue V1 = N->getOperand(Vec0Idx + 1); 2374 SDValue V2 = N->getOperand(Vec0Idx + 2); 2375 SDValue V3 = (NumVecs == 3) 2376 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2377 : N->getOperand(Vec0Idx + 3); 2378 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2379 2380 // Store the even D registers. This is always an updating store, so that it 2381 // provides the address to the second store for the odd subregs. 2382 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain }; 2383 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2384 MemAddr.getValueType(), 2385 MVT::Other, OpsA); 2386 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp}); 2387 Chain = SDValue(VStA, 1); 2388 2389 // Store the odd D registers. 2390 Ops.push_back(SDValue(VStA, 0)); 2391 Ops.push_back(Align); 2392 if (isUpdating) { 2393 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2394 assert(isa<ConstantSDNode>(Inc.getNode()) && 2395 "only constant post-increment update allowed for VST3/4"); 2396 (void)Inc; 2397 Ops.push_back(Reg0); 2398 } 2399 Ops.push_back(RegSeq); 2400 Ops.push_back(Pred); 2401 Ops.push_back(Reg0); 2402 Ops.push_back(Chain); 2403 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, 2404 Ops); 2405 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp}); 2406 ReplaceNode(N, VStB); 2407 } 2408 2409 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 2410 unsigned NumVecs, 2411 const uint16_t *DOpcodes, 2412 const uint16_t *QOpcodes) { 2413 assert(Subtarget->hasNEON()); 2414 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); 2415 SDLoc dl(N); 2416 2417 SDValue MemAddr, Align; 2418 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2419 // nodes are not intrinsics. 2420 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2421 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2422 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2423 return; 2424 2425 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2426 2427 SDValue Chain = N->getOperand(0); 2428 unsigned Lane = N->getConstantOperandVal(Vec0Idx + NumVecs); 2429 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2430 bool is64BitVector = VT.is64BitVector(); 2431 2432 unsigned Alignment = 0; 2433 if (NumVecs != 3) { 2434 Alignment = Align->getAsZExtVal(); 2435 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2436 if (Alignment > NumBytes) 2437 Alignment = NumBytes; 2438 if (Alignment < 8 && Alignment < NumBytes) 2439 Alignment = 0; 2440 // Alignment must be a power of two; make sure of that. 2441 Alignment = (Alignment & -Alignment); 2442 if (Alignment == 1) 2443 Alignment = 0; 2444 } 2445 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2446 2447 unsigned OpcodeIndex; 2448 switch (VT.getSimpleVT().SimpleTy) { 2449 default: llvm_unreachable("unhandled vld/vst lane type"); 2450 // Double-register operations: 2451 case MVT::v8i8: OpcodeIndex = 0; break; 2452 case MVT::v4f16: 2453 case MVT::v4bf16: 2454 case MVT::v4i16: OpcodeIndex = 1; break; 2455 case MVT::v2f32: 2456 case MVT::v2i32: OpcodeIndex = 2; break; 2457 // Quad-register operations: 2458 case MVT::v8f16: 2459 case MVT::v8bf16: 2460 case MVT::v8i16: OpcodeIndex = 0; break; 2461 case MVT::v4f32: 2462 case MVT::v4i32: OpcodeIndex = 1; break; 2463 } 2464 2465 std::vector<EVT> ResTys; 2466 if (IsLoad) { 2467 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2468 if (!is64BitVector) 2469 ResTyElts *= 2; 2470 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), 2471 MVT::i64, ResTyElts)); 2472 } 2473 if (isUpdating) 2474 ResTys.push_back(MVT::i32); 2475 ResTys.push_back(MVT::Other); 2476 2477 SDValue Pred = getAL(CurDAG, dl); 2478 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2479 2480 SmallVector<SDValue, 8> Ops; 2481 Ops.push_back(MemAddr); 2482 Ops.push_back(Align); 2483 if (isUpdating) { 2484 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2485 bool IsImmUpdate = 2486 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2487 Ops.push_back(IsImmUpdate ? Reg0 : Inc); 2488 } 2489 2490 SDValue SuperReg; 2491 SDValue V0 = N->getOperand(Vec0Idx + 0); 2492 SDValue V1 = N->getOperand(Vec0Idx + 1); 2493 if (NumVecs == 2) { 2494 if (is64BitVector) 2495 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2496 else 2497 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0); 2498 } else { 2499 SDValue V2 = N->getOperand(Vec0Idx + 2); 2500 SDValue V3 = (NumVecs == 3) 2501 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2502 : N->getOperand(Vec0Idx + 3); 2503 if (is64BitVector) 2504 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2505 else 2506 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2507 } 2508 Ops.push_back(SuperReg); 2509 Ops.push_back(getI32Imm(Lane, dl)); 2510 Ops.push_back(Pred); 2511 Ops.push_back(Reg0); 2512 Ops.push_back(Chain); 2513 2514 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2515 QOpcodes[OpcodeIndex]); 2516 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2517 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp}); 2518 if (!IsLoad) { 2519 ReplaceNode(N, VLdLn); 2520 return; 2521 } 2522 2523 // Extract the subregisters. 2524 SuperReg = SDValue(VLdLn, 0); 2525 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2526 ARM::qsub_3 == ARM::qsub_0 + 3, 2527 "Unexpected subreg numbering"); 2528 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2529 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2530 ReplaceUses(SDValue(N, Vec), 2531 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2532 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); 2533 if (isUpdating) 2534 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); 2535 CurDAG->RemoveDeadNode(N); 2536 } 2537 2538 template <typename SDValueVector> 2539 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2540 SDValue PredicateMask) { 2541 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2542 Ops.push_back(PredicateMask); 2543 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg 2544 } 2545 2546 template <typename SDValueVector> 2547 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2548 SDValue PredicateMask, 2549 SDValue Inactive) { 2550 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2551 Ops.push_back(PredicateMask); 2552 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg 2553 Ops.push_back(Inactive); 2554 } 2555 2556 template <typename SDValueVector> 2557 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) { 2558 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2559 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2560 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg 2561 } 2562 2563 template <typename SDValueVector> 2564 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2565 EVT InactiveTy) { 2566 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2567 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2568 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg 2569 Ops.push_back(SDValue( 2570 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0)); 2571 } 2572 2573 void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, 2574 bool Predicated) { 2575 SDLoc Loc(N); 2576 SmallVector<SDValue, 8> Ops; 2577 2578 uint16_t Opcode; 2579 switch (N->getValueType(1).getVectorElementType().getSizeInBits()) { 2580 case 32: 2581 Opcode = Opcodes[0]; 2582 break; 2583 case 64: 2584 Opcode = Opcodes[1]; 2585 break; 2586 default: 2587 llvm_unreachable("bad vector element size in SelectMVE_WB"); 2588 } 2589 2590 Ops.push_back(N->getOperand(2)); // vector of base addresses 2591 2592 int32_t ImmValue = N->getConstantOperandVal(3); 2593 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset 2594 2595 if (Predicated) 2596 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2597 else 2598 AddEmptyMVEPredicateToOps(Ops, Loc); 2599 2600 Ops.push_back(N->getOperand(0)); // chain 2601 2602 SmallVector<EVT, 8> VTs; 2603 VTs.push_back(N->getValueType(1)); 2604 VTs.push_back(N->getValueType(0)); 2605 VTs.push_back(N->getValueType(2)); 2606 2607 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops); 2608 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 2609 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 2610 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 2611 transferMemOperands(N, New); 2612 CurDAG->RemoveDeadNode(N); 2613 } 2614 2615 void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode, 2616 bool Immediate, 2617 bool HasSaturationOperand) { 2618 SDLoc Loc(N); 2619 SmallVector<SDValue, 8> Ops; 2620 2621 // Two 32-bit halves of the value to be shifted 2622 Ops.push_back(N->getOperand(1)); 2623 Ops.push_back(N->getOperand(2)); 2624 2625 // The shift count 2626 if (Immediate) { 2627 int32_t ImmValue = N->getConstantOperandVal(3); 2628 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2629 } else { 2630 Ops.push_back(N->getOperand(3)); 2631 } 2632 2633 // The immediate saturation operand, if any 2634 if (HasSaturationOperand) { 2635 int32_t SatOp = N->getConstantOperandVal(4); 2636 int SatBit = (SatOp == 64 ? 0 : 1); 2637 Ops.push_back(getI32Imm(SatBit, Loc)); 2638 } 2639 2640 // MVE scalar shifts are IT-predicable, so include the standard 2641 // predicate arguments. 2642 Ops.push_back(getAL(CurDAG, Loc)); 2643 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2644 2645 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops)); 2646 } 2647 2648 void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 2649 uint16_t OpcodeWithNoCarry, 2650 bool Add, bool Predicated) { 2651 SDLoc Loc(N); 2652 SmallVector<SDValue, 8> Ops; 2653 uint16_t Opcode; 2654 2655 unsigned FirstInputOp = Predicated ? 2 : 1; 2656 2657 // Two input vectors and the input carry flag 2658 Ops.push_back(N->getOperand(FirstInputOp)); 2659 Ops.push_back(N->getOperand(FirstInputOp + 1)); 2660 SDValue CarryIn = N->getOperand(FirstInputOp + 2); 2661 ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn); 2662 uint32_t CarryMask = 1 << 29; 2663 uint32_t CarryExpected = Add ? 0 : CarryMask; 2664 if (CarryInConstant && 2665 (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) { 2666 Opcode = OpcodeWithNoCarry; 2667 } else { 2668 Ops.push_back(CarryIn); 2669 Opcode = OpcodeWithCarry; 2670 } 2671 2672 if (Predicated) 2673 AddMVEPredicateToOps(Ops, Loc, 2674 N->getOperand(FirstInputOp + 3), // predicate 2675 N->getOperand(FirstInputOp - 1)); // inactive 2676 else 2677 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2678 2679 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops)); 2680 } 2681 2682 void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) { 2683 SDLoc Loc(N); 2684 SmallVector<SDValue, 8> Ops; 2685 2686 // One vector input, followed by a 32-bit word of bits to shift in 2687 // and then an immediate shift count 2688 Ops.push_back(N->getOperand(1)); 2689 Ops.push_back(N->getOperand(2)); 2690 int32_t ImmValue = N->getConstantOperandVal(3); 2691 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2692 2693 if (Predicated) 2694 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2695 else 2696 AddEmptyMVEPredicateToOps(Ops, Loc); 2697 2698 CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), ArrayRef(Ops)); 2699 } 2700 2701 static bool SDValueToConstBool(SDValue SDVal) { 2702 assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant"); 2703 ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal); 2704 uint64_t Value = SDValConstant->getZExtValue(); 2705 assert((Value == 0 || Value == 1) && "expected value 0 or 1"); 2706 return Value; 2707 } 2708 2709 void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 2710 const uint16_t *OpcodesS, 2711 const uint16_t *OpcodesU, 2712 size_t Stride, size_t TySize) { 2713 assert(TySize < Stride && "Invalid TySize"); 2714 bool IsUnsigned = SDValueToConstBool(N->getOperand(1)); 2715 bool IsSub = SDValueToConstBool(N->getOperand(2)); 2716 bool IsExchange = SDValueToConstBool(N->getOperand(3)); 2717 if (IsUnsigned) { 2718 assert(!IsSub && 2719 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist"); 2720 assert(!IsExchange && 2721 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist"); 2722 } 2723 2724 auto OpIsZero = [N](size_t OpNo) { 2725 return isNullConstant(N->getOperand(OpNo)); 2726 }; 2727 2728 // If the input accumulator value is not zero, select an instruction with 2729 // accumulator, otherwise select an instruction without accumulator 2730 bool IsAccum = !(OpIsZero(4) && OpIsZero(5)); 2731 2732 const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS; 2733 if (IsSub) 2734 Opcodes += 4 * Stride; 2735 if (IsExchange) 2736 Opcodes += 2 * Stride; 2737 if (IsAccum) 2738 Opcodes += Stride; 2739 uint16_t Opcode = Opcodes[TySize]; 2740 2741 SDLoc Loc(N); 2742 SmallVector<SDValue, 8> Ops; 2743 // Push the accumulator operands, if they are used 2744 if (IsAccum) { 2745 Ops.push_back(N->getOperand(4)); 2746 Ops.push_back(N->getOperand(5)); 2747 } 2748 // Push the two vector operands 2749 Ops.push_back(N->getOperand(6)); 2750 Ops.push_back(N->getOperand(7)); 2751 2752 if (Predicated) 2753 AddMVEPredicateToOps(Ops, Loc, N->getOperand(8)); 2754 else 2755 AddEmptyMVEPredicateToOps(Ops, Loc); 2756 2757 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops)); 2758 } 2759 2760 void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated, 2761 const uint16_t *OpcodesS, 2762 const uint16_t *OpcodesU) { 2763 EVT VecTy = N->getOperand(6).getValueType(); 2764 size_t SizeIndex; 2765 switch (VecTy.getVectorElementType().getSizeInBits()) { 2766 case 16: 2767 SizeIndex = 0; 2768 break; 2769 case 32: 2770 SizeIndex = 1; 2771 break; 2772 default: 2773 llvm_unreachable("bad vector element size"); 2774 } 2775 2776 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex); 2777 } 2778 2779 void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, 2780 const uint16_t *OpcodesS, 2781 const uint16_t *OpcodesU) { 2782 assert( 2783 N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() == 2784 32 && 2785 "bad vector element size"); 2786 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0); 2787 } 2788 2789 void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs, 2790 const uint16_t *const *Opcodes, 2791 bool HasWriteback) { 2792 EVT VT = N->getValueType(0); 2793 SDLoc Loc(N); 2794 2795 const uint16_t *OurOpcodes; 2796 switch (VT.getVectorElementType().getSizeInBits()) { 2797 case 8: 2798 OurOpcodes = Opcodes[0]; 2799 break; 2800 case 16: 2801 OurOpcodes = Opcodes[1]; 2802 break; 2803 case 32: 2804 OurOpcodes = Opcodes[2]; 2805 break; 2806 default: 2807 llvm_unreachable("bad vector element size in SelectMVE_VLD"); 2808 } 2809 2810 EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2); 2811 SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other}; 2812 unsigned PtrOperand = HasWriteback ? 1 : 2; 2813 2814 auto Data = SDValue( 2815 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0); 2816 SDValue Chain = N->getOperand(0); 2817 // Add a MVE_VLDn instruction for each Vec, except the last 2818 for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) { 2819 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain}; 2820 auto LoadInst = 2821 CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops); 2822 Data = SDValue(LoadInst, 0); 2823 Chain = SDValue(LoadInst, 1); 2824 transferMemOperands(N, LoadInst); 2825 } 2826 // The last may need a writeback on it 2827 if (HasWriteback) 2828 ResultTys = {DataTy, MVT::i32, MVT::Other}; 2829 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain}; 2830 auto LoadInst = 2831 CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops); 2832 transferMemOperands(N, LoadInst); 2833 2834 unsigned i; 2835 for (i = 0; i < NumVecs; i++) 2836 ReplaceUses(SDValue(N, i), 2837 CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT, 2838 SDValue(LoadInst, 0))); 2839 if (HasWriteback) 2840 ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1)); 2841 ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1)); 2842 CurDAG->RemoveDeadNode(N); 2843 } 2844 2845 void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, 2846 bool Wrapping, bool Predicated) { 2847 EVT VT = N->getValueType(0); 2848 SDLoc Loc(N); 2849 2850 uint16_t Opcode; 2851 switch (VT.getScalarSizeInBits()) { 2852 case 8: 2853 Opcode = Opcodes[0]; 2854 break; 2855 case 16: 2856 Opcode = Opcodes[1]; 2857 break; 2858 case 32: 2859 Opcode = Opcodes[2]; 2860 break; 2861 default: 2862 llvm_unreachable("bad vector element size in SelectMVE_VxDUP"); 2863 } 2864 2865 SmallVector<SDValue, 8> Ops; 2866 unsigned OpIdx = 1; 2867 2868 SDValue Inactive; 2869 if (Predicated) 2870 Inactive = N->getOperand(OpIdx++); 2871 2872 Ops.push_back(N->getOperand(OpIdx++)); // base 2873 if (Wrapping) 2874 Ops.push_back(N->getOperand(OpIdx++)); // limit 2875 2876 SDValue ImmOp = N->getOperand(OpIdx++); // step 2877 int ImmValue = ImmOp->getAsZExtVal(); 2878 Ops.push_back(getI32Imm(ImmValue, Loc)); 2879 2880 if (Predicated) 2881 AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive); 2882 else 2883 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2884 2885 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops)); 2886 } 2887 2888 void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode, 2889 size_t NumExtraOps, bool HasAccum) { 2890 bool IsBigEndian = CurDAG->getDataLayout().isBigEndian(); 2891 SDLoc Loc(N); 2892 SmallVector<SDValue, 8> Ops; 2893 2894 unsigned OpIdx = 1; 2895 2896 // Convert and append the immediate operand designating the coprocessor. 2897 SDValue ImmCorpoc = N->getOperand(OpIdx++); 2898 uint32_t ImmCoprocVal = ImmCorpoc->getAsZExtVal(); 2899 Ops.push_back(getI32Imm(ImmCoprocVal, Loc)); 2900 2901 // For accumulating variants copy the low and high order parts of the 2902 // accumulator into a register pair and add it to the operand vector. 2903 if (HasAccum) { 2904 SDValue AccLo = N->getOperand(OpIdx++); 2905 SDValue AccHi = N->getOperand(OpIdx++); 2906 if (IsBigEndian) 2907 std::swap(AccLo, AccHi); 2908 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0)); 2909 } 2910 2911 // Copy extra operands as-is. 2912 for (size_t I = 0; I < NumExtraOps; I++) 2913 Ops.push_back(N->getOperand(OpIdx++)); 2914 2915 // Convert and append the immediate operand 2916 SDValue Imm = N->getOperand(OpIdx); 2917 uint32_t ImmVal = Imm->getAsZExtVal(); 2918 Ops.push_back(getI32Imm(ImmVal, Loc)); 2919 2920 // Accumulating variants are IT-predicable, add predicate operands. 2921 if (HasAccum) { 2922 SDValue Pred = getAL(CurDAG, Loc); 2923 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 2924 Ops.push_back(Pred); 2925 Ops.push_back(PredReg); 2926 } 2927 2928 // Create the CDE intruction 2929 SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops); 2930 SDValue ResultPair = SDValue(InstrNode, 0); 2931 2932 // The original intrinsic had two outputs, and the output of the dual-register 2933 // CDE instruction is a register pair. We need to extract the two subregisters 2934 // and replace all uses of the original outputs with the extracted 2935 // subregisters. 2936 uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1}; 2937 if (IsBigEndian) 2938 std::swap(SubRegs[0], SubRegs[1]); 2939 2940 for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) { 2941 if (SDValue(N, ResIdx).use_empty()) 2942 continue; 2943 SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc, 2944 MVT::i32, ResultPair); 2945 ReplaceUses(SDValue(N, ResIdx), SubReg); 2946 } 2947 2948 CurDAG->RemoveDeadNode(N); 2949 } 2950 2951 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic, 2952 bool isUpdating, unsigned NumVecs, 2953 const uint16_t *DOpcodes, 2954 const uint16_t *QOpcodes0, 2955 const uint16_t *QOpcodes1) { 2956 assert(Subtarget->hasNEON()); 2957 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); 2958 SDLoc dl(N); 2959 2960 SDValue MemAddr, Align; 2961 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2962 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2963 return; 2964 2965 SDValue Chain = N->getOperand(0); 2966 EVT VT = N->getValueType(0); 2967 bool is64BitVector = VT.is64BitVector(); 2968 2969 unsigned Alignment = 0; 2970 if (NumVecs != 3) { 2971 Alignment = Align->getAsZExtVal(); 2972 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2973 if (Alignment > NumBytes) 2974 Alignment = NumBytes; 2975 if (Alignment < 8 && Alignment < NumBytes) 2976 Alignment = 0; 2977 // Alignment must be a power of two; make sure of that. 2978 Alignment = (Alignment & -Alignment); 2979 if (Alignment == 1) 2980 Alignment = 0; 2981 } 2982 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2983 2984 unsigned OpcodeIndex; 2985 switch (VT.getSimpleVT().SimpleTy) { 2986 default: llvm_unreachable("unhandled vld-dup type"); 2987 case MVT::v8i8: 2988 case MVT::v16i8: OpcodeIndex = 0; break; 2989 case MVT::v4i16: 2990 case MVT::v8i16: 2991 case MVT::v4f16: 2992 case MVT::v8f16: 2993 case MVT::v4bf16: 2994 case MVT::v8bf16: 2995 OpcodeIndex = 1; break; 2996 case MVT::v2f32: 2997 case MVT::v2i32: 2998 case MVT::v4f32: 2999 case MVT::v4i32: OpcodeIndex = 2; break; 3000 case MVT::v1f64: 3001 case MVT::v1i64: OpcodeIndex = 3; break; 3002 } 3003 3004 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 3005 if (!is64BitVector) 3006 ResTyElts *= 2; 3007 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 3008 3009 std::vector<EVT> ResTys; 3010 ResTys.push_back(ResTy); 3011 if (isUpdating) 3012 ResTys.push_back(MVT::i32); 3013 ResTys.push_back(MVT::Other); 3014 3015 SDValue Pred = getAL(CurDAG, dl); 3016 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3017 3018 SmallVector<SDValue, 6> Ops; 3019 Ops.push_back(MemAddr); 3020 Ops.push_back(Align); 3021 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] 3022 : (NumVecs == 1) ? QOpcodes0[OpcodeIndex] 3023 : QOpcodes1[OpcodeIndex]; 3024 if (isUpdating) { 3025 SDValue Inc = N->getOperand(2); 3026 bool IsImmUpdate = 3027 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 3028 if (IsImmUpdate) { 3029 if (!isVLDfixed(Opc)) 3030 Ops.push_back(Reg0); 3031 } else { 3032 if (isVLDfixed(Opc)) 3033 Opc = getVLDSTRegisterUpdateOpcode(Opc); 3034 Ops.push_back(Inc); 3035 } 3036 } 3037 if (is64BitVector || NumVecs == 1) { 3038 // Double registers and VLD1 quad registers are directly supported. 3039 } else { 3040 SDValue ImplDef = SDValue( 3041 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 3042 const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain}; 3043 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy, 3044 MVT::Other, OpsA); 3045 Ops.push_back(SDValue(VLdA, 0)); 3046 Chain = SDValue(VLdA, 1); 3047 } 3048 3049 Ops.push_back(Pred); 3050 Ops.push_back(Reg0); 3051 Ops.push_back(Chain); 3052 3053 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 3054 3055 // Transfer memoperands. 3056 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3057 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp}); 3058 3059 // Extract the subregisters. 3060 if (NumVecs == 1) { 3061 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0)); 3062 } else { 3063 SDValue SuperReg = SDValue(VLdDup, 0); 3064 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering"); 3065 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 3066 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) { 3067 ReplaceUses(SDValue(N, Vec), 3068 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); 3069 } 3070 } 3071 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); 3072 if (isUpdating) 3073 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); 3074 CurDAG->RemoveDeadNode(N); 3075 } 3076 3077 bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) { 3078 if (!Subtarget->hasMVEIntegerOps()) 3079 return false; 3080 3081 SDLoc dl(N); 3082 3083 // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and 3084 // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent 3085 // inserts of the correct type: 3086 SDValue Ins1 = SDValue(N, 0); 3087 SDValue Ins2 = N->getOperand(0); 3088 EVT VT = Ins1.getValueType(); 3089 if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() || 3090 !isa<ConstantSDNode>(Ins1.getOperand(2)) || 3091 !isa<ConstantSDNode>(Ins2.getOperand(2)) || 3092 (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT)) 3093 return false; 3094 3095 unsigned Lane1 = Ins1.getConstantOperandVal(2); 3096 unsigned Lane2 = Ins2.getConstantOperandVal(2); 3097 if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1) 3098 return false; 3099 3100 // If the inserted values will be able to use T/B already, leave it to the 3101 // existing tablegen patterns. For example VCVTT/VCVTB. 3102 SDValue Val1 = Ins1.getOperand(1); 3103 SDValue Val2 = Ins2.getOperand(1); 3104 if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND) 3105 return false; 3106 3107 // Check if the inserted values are both extracts. 3108 if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 3109 Val1.getOpcode() == ARMISD::VGETLANEu) && 3110 (Val2.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 3111 Val2.getOpcode() == ARMISD::VGETLANEu) && 3112 isa<ConstantSDNode>(Val1.getOperand(1)) && 3113 isa<ConstantSDNode>(Val2.getOperand(1)) && 3114 (Val1.getOperand(0).getValueType() == MVT::v8f16 || 3115 Val1.getOperand(0).getValueType() == MVT::v8i16) && 3116 (Val2.getOperand(0).getValueType() == MVT::v8f16 || 3117 Val2.getOperand(0).getValueType() == MVT::v8i16)) { 3118 unsigned ExtractLane1 = Val1.getConstantOperandVal(1); 3119 unsigned ExtractLane2 = Val2.getConstantOperandVal(1); 3120 3121 // If the two extracted lanes are from the same place and adjacent, this 3122 // simplifies into a f32 lane move. 3123 if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 && 3124 ExtractLane1 == ExtractLane2 + 1) { 3125 SDValue NewExt = CurDAG->getTargetExtractSubreg( 3126 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0)); 3127 SDValue NewIns = CurDAG->getTargetInsertSubreg( 3128 ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0), 3129 NewExt); 3130 ReplaceUses(Ins1, NewIns); 3131 return true; 3132 } 3133 3134 // Else v8i16 pattern of an extract and an insert, with a optional vmovx for 3135 // extracting odd lanes. 3136 if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) { 3137 SDValue Inp1 = CurDAG->getTargetExtractSubreg( 3138 ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0)); 3139 SDValue Inp2 = CurDAG->getTargetExtractSubreg( 3140 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0)); 3141 if (ExtractLane1 % 2 != 0) 3142 Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0); 3143 if (ExtractLane2 % 2 != 0) 3144 Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0); 3145 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1); 3146 SDValue NewIns = 3147 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32, 3148 Ins2.getOperand(0), SDValue(VINS, 0)); 3149 ReplaceUses(Ins1, NewIns); 3150 return true; 3151 } 3152 } 3153 3154 // The inserted values are not extracted - if they are f16 then insert them 3155 // directly using a VINS. 3156 if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) { 3157 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1); 3158 SDValue NewIns = 3159 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32, 3160 Ins2.getOperand(0), SDValue(VINS, 0)); 3161 ReplaceUses(Ins1, NewIns); 3162 return true; 3163 } 3164 3165 return false; 3166 } 3167 3168 bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N, 3169 SDNode *FMul, 3170 bool IsUnsigned, 3171 bool FixedToFloat) { 3172 auto Type = N->getValueType(0); 3173 unsigned ScalarBits = Type.getScalarSizeInBits(); 3174 if (ScalarBits > 32) 3175 return false; 3176 3177 SDNodeFlags FMulFlags = FMul->getFlags(); 3178 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is 3179 // allowed in 16 bit unsigned floats 3180 if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned) 3181 return false; 3182 3183 SDValue ImmNode = FMul->getOperand(1); 3184 SDValue VecVal = FMul->getOperand(0); 3185 if (VecVal->getOpcode() == ISD::UINT_TO_FP || 3186 VecVal->getOpcode() == ISD::SINT_TO_FP) 3187 VecVal = VecVal->getOperand(0); 3188 3189 if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits) 3190 return false; 3191 3192 if (ImmNode.getOpcode() == ISD::BITCAST) { 3193 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits) 3194 return false; 3195 ImmNode = ImmNode.getOperand(0); 3196 } 3197 3198 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits) 3199 return false; 3200 3201 APFloat ImmAPF(0.0f); 3202 switch (ImmNode.getOpcode()) { 3203 case ARMISD::VMOVIMM: 3204 case ARMISD::VDUP: { 3205 if (!isa<ConstantSDNode>(ImmNode.getOperand(0))) 3206 return false; 3207 unsigned Imm = ImmNode.getConstantOperandVal(0); 3208 if (ImmNode.getOpcode() == ARMISD::VMOVIMM) 3209 Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits); 3210 ImmAPF = 3211 APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(), 3212 APInt(ScalarBits, Imm)); 3213 break; 3214 } 3215 case ARMISD::VMOVFPIMM: { 3216 ImmAPF = APFloat(ARM_AM::getFPImmFloat(ImmNode.getConstantOperandVal(0))); 3217 break; 3218 } 3219 default: 3220 return false; 3221 } 3222 3223 // Where n is the number of fractional bits, multiplying by 2^n will convert 3224 // from float to fixed and multiplying by 2^-n will convert from fixed to 3225 // float. Taking log2 of the factor (after taking the inverse in the case of 3226 // float to fixed) will give n. 3227 APFloat ToConvert = ImmAPF; 3228 if (FixedToFloat) { 3229 if (!ImmAPF.getExactInverse(&ToConvert)) 3230 return false; 3231 } 3232 APSInt Converted(64, false); 3233 bool IsExact; 3234 ToConvert.convertToInteger(Converted, llvm::RoundingMode::NearestTiesToEven, 3235 &IsExact); 3236 if (!IsExact || !Converted.isPowerOf2()) 3237 return false; 3238 3239 unsigned FracBits = Converted.logBase2(); 3240 if (FracBits > ScalarBits) 3241 return false; 3242 3243 SmallVector<SDValue, 3> Ops{ 3244 VecVal, CurDAG->getConstant(FracBits, SDLoc(N), MVT::i32)}; 3245 AddEmptyMVEPredicateToOps(Ops, SDLoc(N), Type); 3246 3247 unsigned int Opcode; 3248 switch (ScalarBits) { 3249 case 16: 3250 if (FixedToFloat) 3251 Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix; 3252 else 3253 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix; 3254 break; 3255 case 32: 3256 if (FixedToFloat) 3257 Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix; 3258 else 3259 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix; 3260 break; 3261 default: 3262 llvm_unreachable("unexpected number of scalar bits"); 3263 break; 3264 } 3265 3266 ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), Type, Ops)); 3267 return true; 3268 } 3269 3270 bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) { 3271 // Transform a floating-point to fixed-point conversion to a VCVT 3272 if (!Subtarget->hasMVEFloatOps()) 3273 return false; 3274 EVT Type = N->getValueType(0); 3275 if (!Type.isVector()) 3276 return false; 3277 unsigned int ScalarBits = Type.getScalarSizeInBits(); 3278 3279 bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT || 3280 N->getOpcode() == ISD::FP_TO_UINT_SAT; 3281 SDNode *Node = N->getOperand(0).getNode(); 3282 3283 // floating-point to fixed-point with one fractional bit gets turned into an 3284 // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y)) 3285 if (Node->getOpcode() == ISD::FADD) { 3286 if (Node->getOperand(0) != Node->getOperand(1)) 3287 return false; 3288 SDNodeFlags Flags = Node->getFlags(); 3289 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is 3290 // allowed in 16 bit unsigned floats 3291 if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned) 3292 return false; 3293 3294 unsigned Opcode; 3295 switch (ScalarBits) { 3296 case 16: 3297 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix; 3298 break; 3299 case 32: 3300 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix; 3301 break; 3302 } 3303 SmallVector<SDValue, 3> Ops{Node->getOperand(0), 3304 CurDAG->getConstant(1, dl, MVT::i32)}; 3305 AddEmptyMVEPredicateToOps(Ops, dl, Type); 3306 3307 ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops)); 3308 return true; 3309 } 3310 3311 if (Node->getOpcode() != ISD::FMUL) 3312 return false; 3313 3314 return transformFixedFloatingPointConversion(N, Node, IsUnsigned, false); 3315 } 3316 3317 bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) { 3318 // Transform a fixed-point to floating-point conversion to a VCVT 3319 if (!Subtarget->hasMVEFloatOps()) 3320 return false; 3321 auto Type = N->getValueType(0); 3322 if (!Type.isVector()) 3323 return false; 3324 3325 auto LHS = N->getOperand(0); 3326 if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP) 3327 return false; 3328 3329 return transformFixedFloatingPointConversion( 3330 N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true); 3331 } 3332 3333 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { 3334 if (!Subtarget->hasV6T2Ops()) 3335 return false; 3336 3337 unsigned Opc = isSigned 3338 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) 3339 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); 3340 SDLoc dl(N); 3341 3342 // For unsigned extracts, check for a shift right and mask 3343 unsigned And_imm = 0; 3344 if (N->getOpcode() == ISD::AND) { 3345 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) { 3346 3347 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 3348 if (And_imm & (And_imm + 1)) 3349 return false; 3350 3351 unsigned Srl_imm = 0; 3352 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, 3353 Srl_imm)) { 3354 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3355 3356 // Mask off the unnecessary bits of the AND immediate; normally 3357 // DAGCombine will do this, but that might not happen if 3358 // targetShrinkDemandedConstant chooses a different immediate. 3359 And_imm &= -1U >> Srl_imm; 3360 3361 // Note: The width operand is encoded as width-1. 3362 unsigned Width = llvm::countr_one(And_imm) - 1; 3363 unsigned LSB = Srl_imm; 3364 3365 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3366 3367 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) { 3368 // It's cheaper to use a right shift to extract the top bits. 3369 if (Subtarget->isThumb()) { 3370 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri; 3371 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3372 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3373 getAL(CurDAG, dl), Reg0, Reg0 }; 3374 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3375 return true; 3376 } 3377 3378 // ARM models shift instructions as MOVsi with shifter operand. 3379 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL); 3380 SDValue ShOpc = 3381 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl, 3382 MVT::i32); 3383 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, 3384 getAL(CurDAG, dl), Reg0, Reg0 }; 3385 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops); 3386 return true; 3387 } 3388 3389 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3390 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3391 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3392 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3393 getAL(CurDAG, dl), Reg0 }; 3394 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3395 return true; 3396 } 3397 } 3398 return false; 3399 } 3400 3401 // Otherwise, we're looking for a shift of a shift 3402 unsigned Shl_imm = 0; 3403 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { 3404 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); 3405 unsigned Srl_imm = 0; 3406 if (isInt32Immediate(N->getOperand(1), Srl_imm)) { 3407 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3408 // Note: The width operand is encoded as width-1. 3409 unsigned Width = 32 - Srl_imm - 1; 3410 int LSB = Srl_imm - Shl_imm; 3411 if (LSB < 0) 3412 return false; 3413 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3414 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3415 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3416 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3417 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3418 getAL(CurDAG, dl), Reg0 }; 3419 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3420 return true; 3421 } 3422 } 3423 3424 // Or we are looking for a shift of an and, with a mask operand 3425 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) && 3426 isShiftedMask_32(And_imm)) { 3427 unsigned Srl_imm = 0; 3428 unsigned LSB = llvm::countr_zero(And_imm); 3429 // Shift must be the same as the ands lsb 3430 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) { 3431 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3432 unsigned MSB = llvm::Log2_32(And_imm); 3433 // Note: The width operand is encoded as width-1. 3434 unsigned Width = MSB - LSB; 3435 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3436 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3437 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3438 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32), 3439 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3440 getAL(CurDAG, dl), Reg0 }; 3441 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3442 return true; 3443 } 3444 } 3445 3446 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) { 3447 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 3448 unsigned LSB = 0; 3449 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) && 3450 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB)) 3451 return false; 3452 3453 if (LSB + Width > 32) 3454 return false; 3455 3456 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3457 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx"); 3458 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3459 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3460 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32), 3461 getAL(CurDAG, dl), Reg0 }; 3462 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3463 return true; 3464 } 3465 3466 return false; 3467 } 3468 3469 /// Target-specific DAG combining for ISD::SUB. 3470 /// Target-independent combining lowers SELECT_CC nodes of the form 3471 /// select_cc setg[ge] X, 0, X, -X 3472 /// select_cc setgt X, -1, X, -X 3473 /// select_cc setl[te] X, 0, -X, X 3474 /// select_cc setlt X, 1, -X, X 3475 /// which represent Integer ABS into: 3476 /// Y = sra (X, size(X)-1); sub (xor (X, Y), Y) 3477 /// ARM instruction selection detects the latter and matches it to 3478 /// ARM::ABS or ARM::t2ABS machine node. 3479 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){ 3480 SDValue SUBSrc0 = N->getOperand(0); 3481 SDValue SUBSrc1 = N->getOperand(1); 3482 EVT VT = N->getValueType(0); 3483 3484 if (Subtarget->isThumb1Only()) 3485 return false; 3486 3487 if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA) 3488 return false; 3489 3490 SDValue XORSrc0 = SUBSrc0.getOperand(0); 3491 SDValue XORSrc1 = SUBSrc0.getOperand(1); 3492 SDValue SRASrc0 = SUBSrc1.getOperand(0); 3493 SDValue SRASrc1 = SUBSrc1.getOperand(1); 3494 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1); 3495 EVT XType = SRASrc0.getValueType(); 3496 unsigned Size = XType.getSizeInBits() - 1; 3497 3498 if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() && 3499 SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) { 3500 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; 3501 CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0); 3502 return true; 3503 } 3504 3505 return false; 3506 } 3507 3508 /// We've got special pseudo-instructions for these 3509 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 3510 unsigned Opcode; 3511 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 3512 if (MemTy == MVT::i8) 3513 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8; 3514 else if (MemTy == MVT::i16) 3515 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16; 3516 else if (MemTy == MVT::i32) 3517 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32; 3518 else 3519 llvm_unreachable("Unknown AtomicCmpSwap type"); 3520 3521 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 3522 N->getOperand(0)}; 3523 SDNode *CmpSwap = CurDAG->getMachineNode( 3524 Opcode, SDLoc(N), 3525 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops); 3526 3527 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 3528 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 3529 3530 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 3531 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 3532 CurDAG->RemoveDeadNode(N); 3533 } 3534 3535 static std::optional<std::pair<unsigned, unsigned>> 3536 getContiguousRangeOfSetBits(const APInt &A) { 3537 unsigned FirstOne = A.getBitWidth() - A.countl_zero() - 1; 3538 unsigned LastOne = A.countr_zero(); 3539 if (A.popcount() != (FirstOne - LastOne + 1)) 3540 return std::nullopt; 3541 return std::make_pair(FirstOne, LastOne); 3542 } 3543 3544 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) { 3545 assert(N->getOpcode() == ARMISD::CMPZ); 3546 SwitchEQNEToPLMI = false; 3547 3548 if (!Subtarget->isThumb()) 3549 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and 3550 // LSR don't exist as standalone instructions - they need the barrel shifter. 3551 return; 3552 3553 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X)) 3554 SDValue And = N->getOperand(0); 3555 if (!And->hasOneUse()) 3556 return; 3557 3558 SDValue Zero = N->getOperand(1); 3559 if (!isNullConstant(Zero) || And->getOpcode() != ISD::AND) 3560 return; 3561 SDValue X = And.getOperand(0); 3562 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1)); 3563 3564 if (!C) 3565 return; 3566 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue()); 3567 if (!Range) 3568 return; 3569 3570 // There are several ways to lower this: 3571 SDNode *NewN; 3572 SDLoc dl(N); 3573 3574 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* { 3575 if (Subtarget->isThumb2()) { 3576 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri; 3577 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3578 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3579 CurDAG->getRegister(0, MVT::i32) }; 3580 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3581 } else { 3582 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src, 3583 CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3584 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 3585 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3586 } 3587 }; 3588 3589 if (Range->second == 0) { 3590 // 1. Mask includes the LSB -> Simply shift the top N bits off 3591 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3592 ReplaceNode(And.getNode(), NewN); 3593 } else if (Range->first == 31) { 3594 // 2. Mask includes the MSB -> Simply shift the bottom N bits off 3595 NewN = EmitShift(ARM::tLSRri, X, Range->second); 3596 ReplaceNode(And.getNode(), NewN); 3597 } else if (Range->first == Range->second) { 3598 // 3. Only one bit is set. We can shift this into the sign bit and use a 3599 // PL/MI comparison. 3600 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3601 ReplaceNode(And.getNode(), NewN); 3602 3603 SwitchEQNEToPLMI = true; 3604 } else if (!Subtarget->hasV6T2Ops()) { 3605 // 4. Do a double shift to clear bottom and top bits, but only in 3606 // thumb-1 mode as in thumb-2 we can use UBFX. 3607 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3608 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0), 3609 Range->second + (31 - Range->first)); 3610 ReplaceNode(And.getNode(), NewN); 3611 } 3612 } 3613 3614 static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3], 3615 unsigned Opc128[3]) { 3616 assert((VT.is64BitVector() || VT.is128BitVector()) && 3617 "Unexpected vector shuffle length"); 3618 switch (VT.getScalarSizeInBits()) { 3619 default: 3620 llvm_unreachable("Unexpected vector shuffle element size"); 3621 case 8: 3622 return VT.is64BitVector() ? Opc64[0] : Opc128[0]; 3623 case 16: 3624 return VT.is64BitVector() ? Opc64[1] : Opc128[1]; 3625 case 32: 3626 return VT.is64BitVector() ? Opc64[2] : Opc128[2]; 3627 } 3628 } 3629 3630 void ARMDAGToDAGISel::Select(SDNode *N) { 3631 SDLoc dl(N); 3632 3633 if (N->isMachineOpcode()) { 3634 N->setNodeId(-1); 3635 return; // Already selected. 3636 } 3637 3638 switch (N->getOpcode()) { 3639 default: break; 3640 case ISD::STORE: { 3641 // For Thumb1, match an sp-relative store in C++. This is a little 3642 // unfortunate, but I don't think I can make the chain check work 3643 // otherwise. (The chain of the store has to be the same as the chain 3644 // of the CopyFromReg, or else we can't replace the CopyFromReg with 3645 // a direct reference to "SP".) 3646 // 3647 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use 3648 // a different addressing mode from other four-byte stores. 3649 // 3650 // This pattern usually comes up with call arguments. 3651 StoreSDNode *ST = cast<StoreSDNode>(N); 3652 SDValue Ptr = ST->getBasePtr(); 3653 if (Subtarget->isThumb1Only() && ST->isUnindexed()) { 3654 int RHSC = 0; 3655 if (Ptr.getOpcode() == ISD::ADD && 3656 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) 3657 Ptr = Ptr.getOperand(0); 3658 3659 if (Ptr.getOpcode() == ISD::CopyFromReg && 3660 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP && 3661 Ptr.getOperand(0) == ST->getChain()) { 3662 SDValue Ops[] = {ST->getValue(), 3663 CurDAG->getRegister(ARM::SP, MVT::i32), 3664 CurDAG->getTargetConstant(RHSC, dl, MVT::i32), 3665 getAL(CurDAG, dl), 3666 CurDAG->getRegister(0, MVT::i32), 3667 ST->getChain()}; 3668 MachineSDNode *ResNode = 3669 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops); 3670 MachineMemOperand *MemOp = ST->getMemOperand(); 3671 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3672 ReplaceNode(N, ResNode); 3673 return; 3674 } 3675 } 3676 break; 3677 } 3678 case ISD::WRITE_REGISTER: 3679 if (tryWriteRegister(N)) 3680 return; 3681 break; 3682 case ISD::READ_REGISTER: 3683 if (tryReadRegister(N)) 3684 return; 3685 break; 3686 case ISD::INLINEASM: 3687 case ISD::INLINEASM_BR: 3688 if (tryInlineAsm(N)) 3689 return; 3690 break; 3691 case ISD::SUB: 3692 // Select special operations if SUB node forms integer ABS pattern 3693 if (tryABSOp(N)) 3694 return; 3695 // Other cases are autogenerated. 3696 break; 3697 case ISD::Constant: { 3698 unsigned Val = N->getAsZExtVal(); 3699 // If we can't materialize the constant we need to use a literal pool 3700 if (ConstantMaterializationCost(Val, Subtarget) > 2 && 3701 !Subtarget->genExecuteOnly()) { 3702 SDValue CPIdx = CurDAG->getTargetConstantPool( 3703 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), 3704 TLI->getPointerTy(CurDAG->getDataLayout())); 3705 3706 SDNode *ResNode; 3707 if (Subtarget->isThumb()) { 3708 SDValue Ops[] = { 3709 CPIdx, 3710 getAL(CurDAG, dl), 3711 CurDAG->getRegister(0, MVT::i32), 3712 CurDAG->getEntryNode() 3713 }; 3714 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, 3715 Ops); 3716 } else { 3717 SDValue Ops[] = { 3718 CPIdx, 3719 CurDAG->getTargetConstant(0, dl, MVT::i32), 3720 getAL(CurDAG, dl), 3721 CurDAG->getRegister(0, MVT::i32), 3722 CurDAG->getEntryNode() 3723 }; 3724 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, 3725 Ops); 3726 } 3727 // Annotate the Node with memory operand information so that MachineInstr 3728 // queries work properly. This e.g. gives the register allocation the 3729 // required information for rematerialization. 3730 MachineFunction& MF = CurDAG->getMachineFunction(); 3731 MachineMemOperand *MemOp = 3732 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF), 3733 MachineMemOperand::MOLoad, 4, Align(4)); 3734 3735 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3736 3737 ReplaceNode(N, ResNode); 3738 return; 3739 } 3740 3741 // Other cases are autogenerated. 3742 break; 3743 } 3744 case ISD::FrameIndex: { 3745 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. 3746 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 3747 SDValue TFI = CurDAG->getTargetFrameIndex( 3748 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3749 if (Subtarget->isThumb1Only()) { 3750 // Set the alignment of the frame object to 4, to avoid having to generate 3751 // more than one ADD 3752 MachineFrameInfo &MFI = MF->getFrameInfo(); 3753 if (MFI.getObjectAlign(FI) < Align(4)) 3754 MFI.setObjectAlignment(FI, Align(4)); 3755 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, 3756 CurDAG->getTargetConstant(0, dl, MVT::i32)); 3757 return; 3758 } else { 3759 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? 3760 ARM::t2ADDri : ARM::ADDri); 3761 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32), 3762 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3763 CurDAG->getRegister(0, MVT::i32) }; 3764 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3765 return; 3766 } 3767 } 3768 case ISD::INSERT_VECTOR_ELT: { 3769 if (tryInsertVectorElt(N)) 3770 return; 3771 break; 3772 } 3773 case ISD::SRL: 3774 if (tryV6T2BitfieldExtractOp(N, false)) 3775 return; 3776 break; 3777 case ISD::SIGN_EXTEND_INREG: 3778 case ISD::SRA: 3779 if (tryV6T2BitfieldExtractOp(N, true)) 3780 return; 3781 break; 3782 case ISD::FP_TO_UINT: 3783 case ISD::FP_TO_SINT: 3784 case ISD::FP_TO_UINT_SAT: 3785 case ISD::FP_TO_SINT_SAT: 3786 if (tryFP_TO_INT(N, dl)) 3787 return; 3788 break; 3789 case ISD::FMUL: 3790 if (tryFMULFixed(N, dl)) 3791 return; 3792 break; 3793 case ISD::MUL: 3794 if (Subtarget->isThumb1Only()) 3795 break; 3796 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 3797 unsigned RHSV = C->getZExtValue(); 3798 if (!RHSV) break; 3799 if (isPowerOf2_32(RHSV-1)) { // 2^n+1? 3800 unsigned ShImm = Log2_32(RHSV-1); 3801 if (ShImm >= 32) 3802 break; 3803 SDValue V = N->getOperand(0); 3804 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3805 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3806 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3807 if (Subtarget->isThumb()) { 3808 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3809 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops); 3810 return; 3811 } else { 3812 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3813 Reg0 }; 3814 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops); 3815 return; 3816 } 3817 } 3818 if (isPowerOf2_32(RHSV+1)) { // 2^n-1? 3819 unsigned ShImm = Log2_32(RHSV+1); 3820 if (ShImm >= 32) 3821 break; 3822 SDValue V = N->getOperand(0); 3823 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3824 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3825 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3826 if (Subtarget->isThumb()) { 3827 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3828 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops); 3829 return; 3830 } else { 3831 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3832 Reg0 }; 3833 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops); 3834 return; 3835 } 3836 } 3837 } 3838 break; 3839 case ISD::AND: { 3840 // Check for unsigned bitfield extract 3841 if (tryV6T2BitfieldExtractOp(N, false)) 3842 return; 3843 3844 // If an immediate is used in an AND node, it is possible that the immediate 3845 // can be more optimally materialized when negated. If this is the case we 3846 // can negate the immediate and use a BIC instead. 3847 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 3848 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) { 3849 uint32_t Imm = (uint32_t) N1C->getZExtValue(); 3850 3851 // In Thumb2 mode, an AND can take a 12-bit immediate. If this 3852 // immediate can be negated and fit in the immediate operand of 3853 // a t2BIC, don't do any manual transform here as this can be 3854 // handled by the generic ISel machinery. 3855 bool PreferImmediateEncoding = 3856 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm)); 3857 if (!PreferImmediateEncoding && 3858 ConstantMaterializationCost(Imm, Subtarget) > 3859 ConstantMaterializationCost(~Imm, Subtarget)) { 3860 // The current immediate costs more to materialize than a negated 3861 // immediate, so negate the immediate and use a BIC. 3862 SDValue NewImm = CurDAG->getConstant(~Imm, dl, MVT::i32); 3863 // If the new constant didn't exist before, reposition it in the topological 3864 // ordering so it is just before N. Otherwise, don't touch its location. 3865 if (NewImm->getNodeId() == -1) 3866 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode()); 3867 3868 if (!Subtarget->hasThumb2()) { 3869 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), 3870 N->getOperand(0), NewImm, getAL(CurDAG, dl), 3871 CurDAG->getRegister(0, MVT::i32)}; 3872 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops)); 3873 return; 3874 } else { 3875 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl), 3876 CurDAG->getRegister(0, MVT::i32), 3877 CurDAG->getRegister(0, MVT::i32)}; 3878 ReplaceNode(N, 3879 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops)); 3880 return; 3881 } 3882 } 3883 } 3884 3885 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits 3886 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits 3887 // are entirely contributed by c2 and lower 16-bits are entirely contributed 3888 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)). 3889 // Select it to: "movt x, ((c1 & 0xffff) >> 16) 3890 EVT VT = N->getValueType(0); 3891 if (VT != MVT::i32) 3892 break; 3893 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2()) 3894 ? ARM::t2MOVTi16 3895 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0); 3896 if (!Opc) 3897 break; 3898 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 3899 N1C = dyn_cast<ConstantSDNode>(N1); 3900 if (!N1C) 3901 break; 3902 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) { 3903 SDValue N2 = N0.getOperand(1); 3904 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 3905 if (!N2C) 3906 break; 3907 unsigned N1CVal = N1C->getZExtValue(); 3908 unsigned N2CVal = N2C->getZExtValue(); 3909 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) && 3910 (N1CVal & 0xffffU) == 0xffffU && 3911 (N2CVal & 0xffffU) == 0x0U) { 3912 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16, 3913 dl, MVT::i32); 3914 SDValue Ops[] = { N0.getOperand(0), Imm16, 3915 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 3916 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 3917 return; 3918 } 3919 } 3920 3921 break; 3922 } 3923 case ARMISD::UMAAL: { 3924 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; 3925 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 3926 N->getOperand(2), N->getOperand(3), 3927 getAL(CurDAG, dl), 3928 CurDAG->getRegister(0, MVT::i32) }; 3929 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops)); 3930 return; 3931 } 3932 case ARMISD::UMLAL:{ 3933 if (Subtarget->isThumb()) { 3934 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3935 N->getOperand(3), getAL(CurDAG, dl), 3936 CurDAG->getRegister(0, MVT::i32)}; 3937 ReplaceNode( 3938 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops)); 3939 return; 3940 }else{ 3941 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3942 N->getOperand(3), getAL(CurDAG, dl), 3943 CurDAG->getRegister(0, MVT::i32), 3944 CurDAG->getRegister(0, MVT::i32) }; 3945 ReplaceNode(N, CurDAG->getMachineNode( 3946 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl, 3947 MVT::i32, MVT::i32, Ops)); 3948 return; 3949 } 3950 } 3951 case ARMISD::SMLAL:{ 3952 if (Subtarget->isThumb()) { 3953 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3954 N->getOperand(3), getAL(CurDAG, dl), 3955 CurDAG->getRegister(0, MVT::i32)}; 3956 ReplaceNode( 3957 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops)); 3958 return; 3959 }else{ 3960 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3961 N->getOperand(3), getAL(CurDAG, dl), 3962 CurDAG->getRegister(0, MVT::i32), 3963 CurDAG->getRegister(0, MVT::i32) }; 3964 ReplaceNode(N, CurDAG->getMachineNode( 3965 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl, 3966 MVT::i32, MVT::i32, Ops)); 3967 return; 3968 } 3969 } 3970 case ARMISD::SUBE: { 3971 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) 3972 break; 3973 // Look for a pattern to match SMMLS 3974 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b)))) 3975 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI || 3976 N->getOperand(2).getOpcode() != ARMISD::SUBC || 3977 !SDValue(N, 1).use_empty()) 3978 break; 3979 3980 if (Subtarget->isThumb()) 3981 assert(Subtarget->hasThumb2() && 3982 "This pattern should not be generated for Thumb"); 3983 3984 SDValue SmulLoHi = N->getOperand(1); 3985 SDValue Subc = N->getOperand(2); 3986 SDValue Zero = Subc.getOperand(0); 3987 3988 if (!isNullConstant(Zero) || Subc.getOperand(1) != SmulLoHi.getValue(0) || 3989 N->getOperand(1) != SmulLoHi.getValue(1) || 3990 N->getOperand(2) != Subc.getValue(1)) 3991 break; 3992 3993 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS; 3994 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1), 3995 N->getOperand(0), getAL(CurDAG, dl), 3996 CurDAG->getRegister(0, MVT::i32) }; 3997 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops)); 3998 return; 3999 } 4000 case ISD::LOAD: { 4001 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 4002 return; 4003 if (Subtarget->isThumb() && Subtarget->hasThumb2()) { 4004 if (tryT2IndexedLoad(N)) 4005 return; 4006 } else if (Subtarget->isThumb()) { 4007 if (tryT1IndexedLoad(N)) 4008 return; 4009 } else if (tryARMIndexedLoad(N)) 4010 return; 4011 // Other cases are autogenerated. 4012 break; 4013 } 4014 case ISD::MLOAD: 4015 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 4016 return; 4017 // Other cases are autogenerated. 4018 break; 4019 case ARMISD::WLSSETUP: { 4020 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32, 4021 N->getOperand(0)); 4022 ReplaceUses(N, New); 4023 CurDAG->RemoveDeadNode(N); 4024 return; 4025 } 4026 case ARMISD::WLS: { 4027 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other, 4028 N->getOperand(1), N->getOperand(2), 4029 N->getOperand(0)); 4030 ReplaceUses(N, New); 4031 CurDAG->RemoveDeadNode(N); 4032 return; 4033 } 4034 case ARMISD::LE: { 4035 SDValue Ops[] = { N->getOperand(1), 4036 N->getOperand(2), 4037 N->getOperand(0) }; 4038 unsigned Opc = ARM::t2LoopEnd; 4039 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 4040 ReplaceUses(N, New); 4041 CurDAG->RemoveDeadNode(N); 4042 return; 4043 } 4044 case ARMISD::LDRD: { 4045 if (Subtarget->isThumb2()) 4046 break; // TableGen handles isel in this case. 4047 SDValue Base, RegOffset, ImmOffset; 4048 const SDValue &Chain = N->getOperand(0); 4049 const SDValue &Addr = N->getOperand(1); 4050 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); 4051 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { 4052 // The register-offset variant of LDRD mandates that the register 4053 // allocated to RegOffset is not reused in any of the remaining operands. 4054 // This restriction is currently not enforced. Therefore emitting this 4055 // variant is explicitly avoided. 4056 Base = Addr; 4057 RegOffset = CurDAG->getRegister(0, MVT::i32); 4058 } 4059 SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain}; 4060 SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl, 4061 {MVT::Untyped, MVT::Other}, Ops); 4062 SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 4063 SDValue(New, 0)); 4064 SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 4065 SDValue(New, 0)); 4066 transferMemOperands(N, New); 4067 ReplaceUses(SDValue(N, 0), Lo); 4068 ReplaceUses(SDValue(N, 1), Hi); 4069 ReplaceUses(SDValue(N, 2), SDValue(New, 1)); 4070 CurDAG->RemoveDeadNode(N); 4071 return; 4072 } 4073 case ARMISD::STRD: { 4074 if (Subtarget->isThumb2()) 4075 break; // TableGen handles isel in this case. 4076 SDValue Base, RegOffset, ImmOffset; 4077 const SDValue &Chain = N->getOperand(0); 4078 const SDValue &Addr = N->getOperand(3); 4079 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); 4080 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { 4081 // The register-offset variant of STRD mandates that the register 4082 // allocated to RegOffset is not reused in any of the remaining operands. 4083 // This restriction is currently not enforced. Therefore emitting this 4084 // variant is explicitly avoided. 4085 Base = Addr; 4086 RegOffset = CurDAG->getRegister(0, MVT::i32); 4087 } 4088 SDNode *RegPair = 4089 createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2)); 4090 SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain}; 4091 SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops); 4092 transferMemOperands(N, New); 4093 ReplaceUses(SDValue(N, 0), SDValue(New, 0)); 4094 CurDAG->RemoveDeadNode(N); 4095 return; 4096 } 4097 case ARMISD::LOOP_DEC: { 4098 SDValue Ops[] = { N->getOperand(1), 4099 N->getOperand(2), 4100 N->getOperand(0) }; 4101 SDNode *Dec = 4102 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 4103 CurDAG->getVTList(MVT::i32, MVT::Other), Ops); 4104 ReplaceUses(N, Dec); 4105 CurDAG->RemoveDeadNode(N); 4106 return; 4107 } 4108 case ARMISD::BRCOND: { 4109 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 4110 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) 4111 // Pattern complexity = 6 cost = 1 size = 0 4112 4113 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 4114 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc) 4115 // Pattern complexity = 6 cost = 1 size = 0 4116 4117 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 4118 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc) 4119 // Pattern complexity = 6 cost = 1 size = 0 4120 4121 unsigned Opc = Subtarget->isThumb() ? 4122 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; 4123 SDValue Chain = N->getOperand(0); 4124 SDValue N1 = N->getOperand(1); 4125 SDValue N2 = N->getOperand(2); 4126 SDValue N3 = N->getOperand(3); 4127 SDValue InGlue = N->getOperand(4); 4128 assert(N1.getOpcode() == ISD::BasicBlock); 4129 assert(N2.getOpcode() == ISD::Constant); 4130 assert(N3.getOpcode() == ISD::Register); 4131 4132 unsigned CC = (unsigned)N2->getAsZExtVal(); 4133 4134 if (InGlue.getOpcode() == ARMISD::CMPZ) { 4135 if (InGlue.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) { 4136 SDValue Int = InGlue.getOperand(0); 4137 uint64_t ID = Int->getConstantOperandVal(1); 4138 4139 // Handle low-overhead loops. 4140 if (ID == Intrinsic::loop_decrement_reg) { 4141 SDValue Elements = Int.getOperand(2); 4142 SDValue Size = CurDAG->getTargetConstant(Int.getConstantOperandVal(3), 4143 dl, MVT::i32); 4144 4145 SDValue Args[] = { Elements, Size, Int.getOperand(0) }; 4146 SDNode *LoopDec = 4147 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 4148 CurDAG->getVTList(MVT::i32, MVT::Other), 4149 Args); 4150 ReplaceUses(Int.getNode(), LoopDec); 4151 4152 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain }; 4153 SDNode *LoopEnd = 4154 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs); 4155 4156 ReplaceUses(N, LoopEnd); 4157 CurDAG->RemoveDeadNode(N); 4158 CurDAG->RemoveDeadNode(InGlue.getNode()); 4159 CurDAG->RemoveDeadNode(Int.getNode()); 4160 return; 4161 } 4162 } 4163 4164 bool SwitchEQNEToPLMI; 4165 SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI); 4166 InGlue = N->getOperand(4); 4167 4168 if (SwitchEQNEToPLMI) { 4169 switch ((ARMCC::CondCodes)CC) { 4170 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 4171 case ARMCC::NE: 4172 CC = (unsigned)ARMCC::MI; 4173 break; 4174 case ARMCC::EQ: 4175 CC = (unsigned)ARMCC::PL; 4176 break; 4177 } 4178 } 4179 } 4180 4181 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32); 4182 SDValue Ops[] = { N1, Tmp2, N3, Chain, InGlue }; 4183 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, 4184 MVT::Glue, Ops); 4185 Chain = SDValue(ResNode, 0); 4186 if (N->getNumValues() == 2) { 4187 InGlue = SDValue(ResNode, 1); 4188 ReplaceUses(SDValue(N, 1), InGlue); 4189 } 4190 ReplaceUses(SDValue(N, 0), 4191 SDValue(Chain.getNode(), Chain.getResNo())); 4192 CurDAG->RemoveDeadNode(N); 4193 return; 4194 } 4195 4196 case ARMISD::CMPZ: { 4197 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0) 4198 // This allows us to avoid materializing the expensive negative constant. 4199 // The CMPZ #0 is useless and will be peepholed away but we need to keep it 4200 // for its glue output. 4201 SDValue X = N->getOperand(0); 4202 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode()); 4203 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) { 4204 int64_t Addend = -C->getSExtValue(); 4205 4206 SDNode *Add = nullptr; 4207 // ADDS can be better than CMN if the immediate fits in a 4208 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3. 4209 // Outside that range we can just use a CMN which is 32-bit but has a 4210 // 12-bit immediate range. 4211 if (Addend < 1<<8) { 4212 if (Subtarget->isThumb2()) { 4213 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32), 4214 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 4215 CurDAG->getRegister(0, MVT::i32) }; 4216 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops); 4217 } else { 4218 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8; 4219 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, 4220 CurDAG->getTargetConstant(Addend, dl, MVT::i32), 4221 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 4222 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 4223 } 4224 } 4225 if (Add) { 4226 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)}; 4227 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2); 4228 } 4229 } 4230 // Other cases are autogenerated. 4231 break; 4232 } 4233 4234 case ARMISD::CMOV: { 4235 SDValue InGlue = N->getOperand(4); 4236 4237 if (InGlue.getOpcode() == ARMISD::CMPZ) { 4238 bool SwitchEQNEToPLMI; 4239 SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI); 4240 4241 if (SwitchEQNEToPLMI) { 4242 SDValue ARMcc = N->getOperand(2); 4243 ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc->getAsZExtVal(); 4244 4245 switch (CC) { 4246 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 4247 case ARMCC::NE: 4248 CC = ARMCC::MI; 4249 break; 4250 case ARMCC::EQ: 4251 CC = ARMCC::PL; 4252 break; 4253 } 4254 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32); 4255 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc, 4256 N->getOperand(3), N->getOperand(4)}; 4257 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops); 4258 } 4259 4260 } 4261 // Other cases are autogenerated. 4262 break; 4263 } 4264 case ARMISD::VZIP: { 4265 EVT VT = N->getValueType(0); 4266 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 4267 unsigned Opc64[] = {ARM::VZIPd8, ARM::VZIPd16, ARM::VTRNd32}; 4268 unsigned Opc128[] = {ARM::VZIPq8, ARM::VZIPq16, ARM::VZIPq32}; 4269 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128); 4270 SDValue Pred = getAL(CurDAG, dl); 4271 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4272 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg}; 4273 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4274 return; 4275 } 4276 case ARMISD::VUZP: { 4277 EVT VT = N->getValueType(0); 4278 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 4279 unsigned Opc64[] = {ARM::VUZPd8, ARM::VUZPd16, ARM::VTRNd32}; 4280 unsigned Opc128[] = {ARM::VUZPq8, ARM::VUZPq16, ARM::VUZPq32}; 4281 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128); 4282 SDValue Pred = getAL(CurDAG, dl); 4283 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4284 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg}; 4285 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4286 return; 4287 } 4288 case ARMISD::VTRN: { 4289 EVT VT = N->getValueType(0); 4290 unsigned Opc64[] = {ARM::VTRNd8, ARM::VTRNd16, ARM::VTRNd32}; 4291 unsigned Opc128[] = {ARM::VTRNq8, ARM::VTRNq16, ARM::VTRNq32}; 4292 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128); 4293 SDValue Pred = getAL(CurDAG, dl); 4294 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4295 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg}; 4296 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4297 return; 4298 } 4299 case ARMISD::BUILD_VECTOR: { 4300 EVT VecVT = N->getValueType(0); 4301 EVT EltVT = VecVT.getVectorElementType(); 4302 unsigned NumElts = VecVT.getVectorNumElements(); 4303 if (EltVT == MVT::f64) { 4304 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); 4305 ReplaceNode( 4306 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 4307 return; 4308 } 4309 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); 4310 if (NumElts == 2) { 4311 ReplaceNode( 4312 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 4313 return; 4314 } 4315 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); 4316 ReplaceNode(N, 4317 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), 4318 N->getOperand(2), N->getOperand(3))); 4319 return; 4320 } 4321 4322 case ARMISD::VLD1DUP: { 4323 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16, 4324 ARM::VLD1DUPd32 }; 4325 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16, 4326 ARM::VLD1DUPq32 }; 4327 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes); 4328 return; 4329 } 4330 4331 case ARMISD::VLD2DUP: { 4332 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4333 ARM::VLD2DUPd32 }; 4334 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes); 4335 return; 4336 } 4337 4338 case ARMISD::VLD3DUP: { 4339 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo, 4340 ARM::VLD3DUPd16Pseudo, 4341 ARM::VLD3DUPd32Pseudo }; 4342 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes); 4343 return; 4344 } 4345 4346 case ARMISD::VLD4DUP: { 4347 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo, 4348 ARM::VLD4DUPd16Pseudo, 4349 ARM::VLD4DUPd32Pseudo }; 4350 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes); 4351 return; 4352 } 4353 4354 case ARMISD::VLD1DUP_UPD: { 4355 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed, 4356 ARM::VLD1DUPd16wb_fixed, 4357 ARM::VLD1DUPd32wb_fixed }; 4358 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed, 4359 ARM::VLD1DUPq16wb_fixed, 4360 ARM::VLD1DUPq32wb_fixed }; 4361 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes); 4362 return; 4363 } 4364 4365 case ARMISD::VLD2DUP_UPD: { 4366 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed, 4367 ARM::VLD2DUPd16wb_fixed, 4368 ARM::VLD2DUPd32wb_fixed, 4369 ARM::VLD1q64wb_fixed }; 4370 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, 4371 ARM::VLD2DUPq16EvenPseudo, 4372 ARM::VLD2DUPq32EvenPseudo }; 4373 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed, 4374 ARM::VLD2DUPq16OddPseudoWB_fixed, 4375 ARM::VLD2DUPq32OddPseudoWB_fixed }; 4376 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, DOpcodes, QOpcodes0, QOpcodes1); 4377 return; 4378 } 4379 4380 case ARMISD::VLD3DUP_UPD: { 4381 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, 4382 ARM::VLD3DUPd16Pseudo_UPD, 4383 ARM::VLD3DUPd32Pseudo_UPD, 4384 ARM::VLD1d64TPseudoWB_fixed }; 4385 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, 4386 ARM::VLD3DUPq16EvenPseudo, 4387 ARM::VLD3DUPq32EvenPseudo }; 4388 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD, 4389 ARM::VLD3DUPq16OddPseudo_UPD, 4390 ARM::VLD3DUPq32OddPseudo_UPD }; 4391 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4392 return; 4393 } 4394 4395 case ARMISD::VLD4DUP_UPD: { 4396 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, 4397 ARM::VLD4DUPd16Pseudo_UPD, 4398 ARM::VLD4DUPd32Pseudo_UPD, 4399 ARM::VLD1d64QPseudoWB_fixed }; 4400 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, 4401 ARM::VLD4DUPq16EvenPseudo, 4402 ARM::VLD4DUPq32EvenPseudo }; 4403 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD, 4404 ARM::VLD4DUPq16OddPseudo_UPD, 4405 ARM::VLD4DUPq32OddPseudo_UPD }; 4406 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4407 return; 4408 } 4409 4410 case ARMISD::VLD1_UPD: { 4411 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed, 4412 ARM::VLD1d16wb_fixed, 4413 ARM::VLD1d32wb_fixed, 4414 ARM::VLD1d64wb_fixed }; 4415 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed, 4416 ARM::VLD1q16wb_fixed, 4417 ARM::VLD1q32wb_fixed, 4418 ARM::VLD1q64wb_fixed }; 4419 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr); 4420 return; 4421 } 4422 4423 case ARMISD::VLD2_UPD: { 4424 if (Subtarget->hasNEON()) { 4425 static const uint16_t DOpcodes[] = { 4426 ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed, 4427 ARM::VLD1q64wb_fixed}; 4428 static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed, 4429 ARM::VLD2q16PseudoWB_fixed, 4430 ARM::VLD2q32PseudoWB_fixed}; 4431 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 4432 } else { 4433 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, 4434 ARM::MVE_VLD21_8_wb}; 4435 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 4436 ARM::MVE_VLD21_16_wb}; 4437 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 4438 ARM::MVE_VLD21_32_wb}; 4439 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4440 SelectMVE_VLD(N, 2, Opcodes, true); 4441 } 4442 return; 4443 } 4444 4445 case ARMISD::VLD3_UPD: { 4446 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, 4447 ARM::VLD3d16Pseudo_UPD, 4448 ARM::VLD3d32Pseudo_UPD, 4449 ARM::VLD1d64TPseudoWB_fixed}; 4450 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4451 ARM::VLD3q16Pseudo_UPD, 4452 ARM::VLD3q32Pseudo_UPD }; 4453 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, 4454 ARM::VLD3q16oddPseudo_UPD, 4455 ARM::VLD3q32oddPseudo_UPD }; 4456 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4457 return; 4458 } 4459 4460 case ARMISD::VLD4_UPD: { 4461 if (Subtarget->hasNEON()) { 4462 static const uint16_t DOpcodes[] = { 4463 ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD, 4464 ARM::VLD1d64QPseudoWB_fixed}; 4465 static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD, 4466 ARM::VLD4q16Pseudo_UPD, 4467 ARM::VLD4q32Pseudo_UPD}; 4468 static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD, 4469 ARM::VLD4q16oddPseudo_UPD, 4470 ARM::VLD4q32oddPseudo_UPD}; 4471 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4472 } else { 4473 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 4474 ARM::MVE_VLD42_8, 4475 ARM::MVE_VLD43_8_wb}; 4476 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 4477 ARM::MVE_VLD42_16, 4478 ARM::MVE_VLD43_16_wb}; 4479 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 4480 ARM::MVE_VLD42_32, 4481 ARM::MVE_VLD43_32_wb}; 4482 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4483 SelectMVE_VLD(N, 4, Opcodes, true); 4484 } 4485 return; 4486 } 4487 4488 case ARMISD::VLD1x2_UPD: { 4489 if (Subtarget->hasNEON()) { 4490 static const uint16_t DOpcodes[] = { 4491 ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed, 4492 ARM::VLD1q64wb_fixed}; 4493 static const uint16_t QOpcodes[] = { 4494 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed, 4495 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed}; 4496 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 4497 return; 4498 } 4499 break; 4500 } 4501 4502 case ARMISD::VLD1x3_UPD: { 4503 if (Subtarget->hasNEON()) { 4504 static const uint16_t DOpcodes[] = { 4505 ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed, 4506 ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed}; 4507 static const uint16_t QOpcodes0[] = { 4508 ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD, 4509 ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD}; 4510 static const uint16_t QOpcodes1[] = { 4511 ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD, 4512 ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD}; 4513 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4514 return; 4515 } 4516 break; 4517 } 4518 4519 case ARMISD::VLD1x4_UPD: { 4520 if (Subtarget->hasNEON()) { 4521 static const uint16_t DOpcodes[] = { 4522 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed, 4523 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed}; 4524 static const uint16_t QOpcodes0[] = { 4525 ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD, 4526 ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD}; 4527 static const uint16_t QOpcodes1[] = { 4528 ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD, 4529 ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD}; 4530 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4531 return; 4532 } 4533 break; 4534 } 4535 4536 case ARMISD::VLD2LN_UPD: { 4537 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, 4538 ARM::VLD2LNd16Pseudo_UPD, 4539 ARM::VLD2LNd32Pseudo_UPD }; 4540 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD, 4541 ARM::VLD2LNq32Pseudo_UPD }; 4542 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes); 4543 return; 4544 } 4545 4546 case ARMISD::VLD3LN_UPD: { 4547 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, 4548 ARM::VLD3LNd16Pseudo_UPD, 4549 ARM::VLD3LNd32Pseudo_UPD }; 4550 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD, 4551 ARM::VLD3LNq32Pseudo_UPD }; 4552 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes); 4553 return; 4554 } 4555 4556 case ARMISD::VLD4LN_UPD: { 4557 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, 4558 ARM::VLD4LNd16Pseudo_UPD, 4559 ARM::VLD4LNd32Pseudo_UPD }; 4560 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD, 4561 ARM::VLD4LNq32Pseudo_UPD }; 4562 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes); 4563 return; 4564 } 4565 4566 case ARMISD::VST1_UPD: { 4567 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed, 4568 ARM::VST1d16wb_fixed, 4569 ARM::VST1d32wb_fixed, 4570 ARM::VST1d64wb_fixed }; 4571 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed, 4572 ARM::VST1q16wb_fixed, 4573 ARM::VST1q32wb_fixed, 4574 ARM::VST1q64wb_fixed }; 4575 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr); 4576 return; 4577 } 4578 4579 case ARMISD::VST2_UPD: { 4580 if (Subtarget->hasNEON()) { 4581 static const uint16_t DOpcodes[] = { 4582 ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed, 4583 ARM::VST1q64wb_fixed}; 4584 static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed, 4585 ARM::VST2q16PseudoWB_fixed, 4586 ARM::VST2q32PseudoWB_fixed}; 4587 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 4588 return; 4589 } 4590 break; 4591 } 4592 4593 case ARMISD::VST3_UPD: { 4594 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD, 4595 ARM::VST3d16Pseudo_UPD, 4596 ARM::VST3d32Pseudo_UPD, 4597 ARM::VST1d64TPseudoWB_fixed}; 4598 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 4599 ARM::VST3q16Pseudo_UPD, 4600 ARM::VST3q32Pseudo_UPD }; 4601 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, 4602 ARM::VST3q16oddPseudo_UPD, 4603 ARM::VST3q32oddPseudo_UPD }; 4604 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4605 return; 4606 } 4607 4608 case ARMISD::VST4_UPD: { 4609 if (Subtarget->hasNEON()) { 4610 static const uint16_t DOpcodes[] = { 4611 ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD, 4612 ARM::VST1d64QPseudoWB_fixed}; 4613 static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD, 4614 ARM::VST4q16Pseudo_UPD, 4615 ARM::VST4q32Pseudo_UPD}; 4616 static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD, 4617 ARM::VST4q16oddPseudo_UPD, 4618 ARM::VST4q32oddPseudo_UPD}; 4619 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4620 return; 4621 } 4622 break; 4623 } 4624 4625 case ARMISD::VST1x2_UPD: { 4626 if (Subtarget->hasNEON()) { 4627 static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed, 4628 ARM::VST1q16wb_fixed, 4629 ARM::VST1q32wb_fixed, 4630 ARM::VST1q64wb_fixed}; 4631 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed, 4632 ARM::VST1d16QPseudoWB_fixed, 4633 ARM::VST1d32QPseudoWB_fixed, 4634 ARM::VST1d64QPseudoWB_fixed }; 4635 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 4636 return; 4637 } 4638 break; 4639 } 4640 4641 case ARMISD::VST1x3_UPD: { 4642 if (Subtarget->hasNEON()) { 4643 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed, 4644 ARM::VST1d16TPseudoWB_fixed, 4645 ARM::VST1d32TPseudoWB_fixed, 4646 ARM::VST1d64TPseudoWB_fixed }; 4647 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 4648 ARM::VST1q16LowTPseudo_UPD, 4649 ARM::VST1q32LowTPseudo_UPD, 4650 ARM::VST1q64LowTPseudo_UPD }; 4651 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD, 4652 ARM::VST1q16HighTPseudo_UPD, 4653 ARM::VST1q32HighTPseudo_UPD, 4654 ARM::VST1q64HighTPseudo_UPD }; 4655 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4656 return; 4657 } 4658 break; 4659 } 4660 4661 case ARMISD::VST1x4_UPD: { 4662 if (Subtarget->hasNEON()) { 4663 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed, 4664 ARM::VST1d16QPseudoWB_fixed, 4665 ARM::VST1d32QPseudoWB_fixed, 4666 ARM::VST1d64QPseudoWB_fixed }; 4667 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 4668 ARM::VST1q16LowQPseudo_UPD, 4669 ARM::VST1q32LowQPseudo_UPD, 4670 ARM::VST1q64LowQPseudo_UPD }; 4671 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD, 4672 ARM::VST1q16HighQPseudo_UPD, 4673 ARM::VST1q32HighQPseudo_UPD, 4674 ARM::VST1q64HighQPseudo_UPD }; 4675 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4676 return; 4677 } 4678 break; 4679 } 4680 case ARMISD::VST2LN_UPD: { 4681 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, 4682 ARM::VST2LNd16Pseudo_UPD, 4683 ARM::VST2LNd32Pseudo_UPD }; 4684 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD, 4685 ARM::VST2LNq32Pseudo_UPD }; 4686 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes); 4687 return; 4688 } 4689 4690 case ARMISD::VST3LN_UPD: { 4691 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, 4692 ARM::VST3LNd16Pseudo_UPD, 4693 ARM::VST3LNd32Pseudo_UPD }; 4694 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD, 4695 ARM::VST3LNq32Pseudo_UPD }; 4696 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes); 4697 return; 4698 } 4699 4700 case ARMISD::VST4LN_UPD: { 4701 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, 4702 ARM::VST4LNd16Pseudo_UPD, 4703 ARM::VST4LNd32Pseudo_UPD }; 4704 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD, 4705 ARM::VST4LNq32Pseudo_UPD }; 4706 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes); 4707 return; 4708 } 4709 4710 case ISD::INTRINSIC_VOID: 4711 case ISD::INTRINSIC_W_CHAIN: { 4712 unsigned IntNo = N->getConstantOperandVal(1); 4713 switch (IntNo) { 4714 default: 4715 break; 4716 4717 case Intrinsic::arm_mrrc: 4718 case Intrinsic::arm_mrrc2: { 4719 SDLoc dl(N); 4720 SDValue Chain = N->getOperand(0); 4721 unsigned Opc; 4722 4723 if (Subtarget->isThumb()) 4724 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2); 4725 else 4726 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2); 4727 4728 SmallVector<SDValue, 5> Ops; 4729 Ops.push_back(getI32Imm(N->getConstantOperandVal(2), dl)); /* coproc */ 4730 Ops.push_back(getI32Imm(N->getConstantOperandVal(3), dl)); /* opc */ 4731 Ops.push_back(getI32Imm(N->getConstantOperandVal(4), dl)); /* CRm */ 4732 4733 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded 4734 // instruction will always be '1111' but it is possible in assembly language to specify 4735 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction. 4736 if (Opc != ARM::MRRC2) { 4737 Ops.push_back(getAL(CurDAG, dl)); 4738 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4739 } 4740 4741 Ops.push_back(Chain); 4742 4743 // Writes to two registers. 4744 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other}; 4745 4746 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops)); 4747 return; 4748 } 4749 case Intrinsic::arm_ldaexd: 4750 case Intrinsic::arm_ldrexd: { 4751 SDLoc dl(N); 4752 SDValue Chain = N->getOperand(0); 4753 SDValue MemAddr = N->getOperand(2); 4754 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps(); 4755 4756 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd; 4757 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD) 4758 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD); 4759 4760 // arm_ldrexd returns a i64 value in {i32, i32} 4761 std::vector<EVT> ResTys; 4762 if (isThumb) { 4763 ResTys.push_back(MVT::i32); 4764 ResTys.push_back(MVT::i32); 4765 } else 4766 ResTys.push_back(MVT::Untyped); 4767 ResTys.push_back(MVT::Other); 4768 4769 // Place arguments in the right order. 4770 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl), 4771 CurDAG->getRegister(0, MVT::i32), Chain}; 4772 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4773 // Transfer memoperands. 4774 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4775 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 4776 4777 // Remap uses. 4778 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); 4779 if (!SDValue(N, 0).use_empty()) { 4780 SDValue Result; 4781 if (isThumb) 4782 Result = SDValue(Ld, 0); 4783 else { 4784 SDValue SubRegIdx = 4785 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 4786 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4787 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4788 Result = SDValue(ResNode,0); 4789 } 4790 ReplaceUses(SDValue(N, 0), Result); 4791 } 4792 if (!SDValue(N, 1).use_empty()) { 4793 SDValue Result; 4794 if (isThumb) 4795 Result = SDValue(Ld, 1); 4796 else { 4797 SDValue SubRegIdx = 4798 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 4799 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4800 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4801 Result = SDValue(ResNode,0); 4802 } 4803 ReplaceUses(SDValue(N, 1), Result); 4804 } 4805 ReplaceUses(SDValue(N, 2), OutChain); 4806 CurDAG->RemoveDeadNode(N); 4807 return; 4808 } 4809 case Intrinsic::arm_stlexd: 4810 case Intrinsic::arm_strexd: { 4811 SDLoc dl(N); 4812 SDValue Chain = N->getOperand(0); 4813 SDValue Val0 = N->getOperand(2); 4814 SDValue Val1 = N->getOperand(3); 4815 SDValue MemAddr = N->getOperand(4); 4816 4817 // Store exclusive double return a i32 value which is the return status 4818 // of the issued store. 4819 const EVT ResTys[] = {MVT::i32, MVT::Other}; 4820 4821 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); 4822 // Place arguments in the right order. 4823 SmallVector<SDValue, 7> Ops; 4824 if (isThumb) { 4825 Ops.push_back(Val0); 4826 Ops.push_back(Val1); 4827 } else 4828 // arm_strexd uses GPRPair. 4829 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0)); 4830 Ops.push_back(MemAddr); 4831 Ops.push_back(getAL(CurDAG, dl)); 4832 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4833 Ops.push_back(Chain); 4834 4835 bool IsRelease = IntNo == Intrinsic::arm_stlexd; 4836 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD) 4837 : (IsRelease ? ARM::STLEXD : ARM::STREXD); 4838 4839 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4840 // Transfer memoperands. 4841 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4842 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 4843 4844 ReplaceNode(N, St); 4845 return; 4846 } 4847 4848 case Intrinsic::arm_neon_vld1: { 4849 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, 4850 ARM::VLD1d32, ARM::VLD1d64 }; 4851 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4852 ARM::VLD1q32, ARM::VLD1q64}; 4853 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr); 4854 return; 4855 } 4856 4857 case Intrinsic::arm_neon_vld1x2: { 4858 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4859 ARM::VLD1q32, ARM::VLD1q64 }; 4860 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo, 4861 ARM::VLD1d16QPseudo, 4862 ARM::VLD1d32QPseudo, 4863 ARM::VLD1d64QPseudo }; 4864 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4865 return; 4866 } 4867 4868 case Intrinsic::arm_neon_vld1x3: { 4869 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo, 4870 ARM::VLD1d16TPseudo, 4871 ARM::VLD1d32TPseudo, 4872 ARM::VLD1d64TPseudo }; 4873 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD, 4874 ARM::VLD1q16LowTPseudo_UPD, 4875 ARM::VLD1q32LowTPseudo_UPD, 4876 ARM::VLD1q64LowTPseudo_UPD }; 4877 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo, 4878 ARM::VLD1q16HighTPseudo, 4879 ARM::VLD1q32HighTPseudo, 4880 ARM::VLD1q64HighTPseudo }; 4881 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4882 return; 4883 } 4884 4885 case Intrinsic::arm_neon_vld1x4: { 4886 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo, 4887 ARM::VLD1d16QPseudo, 4888 ARM::VLD1d32QPseudo, 4889 ARM::VLD1d64QPseudo }; 4890 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD, 4891 ARM::VLD1q16LowQPseudo_UPD, 4892 ARM::VLD1q32LowQPseudo_UPD, 4893 ARM::VLD1q64LowQPseudo_UPD }; 4894 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo, 4895 ARM::VLD1q16HighQPseudo, 4896 ARM::VLD1q32HighQPseudo, 4897 ARM::VLD1q64HighQPseudo }; 4898 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4899 return; 4900 } 4901 4902 case Intrinsic::arm_neon_vld2: { 4903 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, 4904 ARM::VLD2d32, ARM::VLD1q64 }; 4905 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, 4906 ARM::VLD2q32Pseudo }; 4907 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4908 return; 4909 } 4910 4911 case Intrinsic::arm_neon_vld3: { 4912 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo, 4913 ARM::VLD3d16Pseudo, 4914 ARM::VLD3d32Pseudo, 4915 ARM::VLD1d64TPseudo }; 4916 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4917 ARM::VLD3q16Pseudo_UPD, 4918 ARM::VLD3q32Pseudo_UPD }; 4919 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo, 4920 ARM::VLD3q16oddPseudo, 4921 ARM::VLD3q32oddPseudo }; 4922 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4923 return; 4924 } 4925 4926 case Intrinsic::arm_neon_vld4: { 4927 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo, 4928 ARM::VLD4d16Pseudo, 4929 ARM::VLD4d32Pseudo, 4930 ARM::VLD1d64QPseudo }; 4931 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 4932 ARM::VLD4q16Pseudo_UPD, 4933 ARM::VLD4q32Pseudo_UPD }; 4934 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo, 4935 ARM::VLD4q16oddPseudo, 4936 ARM::VLD4q32oddPseudo }; 4937 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4938 return; 4939 } 4940 4941 case Intrinsic::arm_neon_vld2dup: { 4942 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4943 ARM::VLD2DUPd32, ARM::VLD1q64 }; 4944 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, 4945 ARM::VLD2DUPq16EvenPseudo, 4946 ARM::VLD2DUPq32EvenPseudo }; 4947 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo, 4948 ARM::VLD2DUPq16OddPseudo, 4949 ARM::VLD2DUPq32OddPseudo }; 4950 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2, 4951 DOpcodes, QOpcodes0, QOpcodes1); 4952 return; 4953 } 4954 4955 case Intrinsic::arm_neon_vld3dup: { 4956 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo, 4957 ARM::VLD3DUPd16Pseudo, 4958 ARM::VLD3DUPd32Pseudo, 4959 ARM::VLD1d64TPseudo }; 4960 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, 4961 ARM::VLD3DUPq16EvenPseudo, 4962 ARM::VLD3DUPq32EvenPseudo }; 4963 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo, 4964 ARM::VLD3DUPq16OddPseudo, 4965 ARM::VLD3DUPq32OddPseudo }; 4966 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3, 4967 DOpcodes, QOpcodes0, QOpcodes1); 4968 return; 4969 } 4970 4971 case Intrinsic::arm_neon_vld4dup: { 4972 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo, 4973 ARM::VLD4DUPd16Pseudo, 4974 ARM::VLD4DUPd32Pseudo, 4975 ARM::VLD1d64QPseudo }; 4976 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, 4977 ARM::VLD4DUPq16EvenPseudo, 4978 ARM::VLD4DUPq32EvenPseudo }; 4979 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo, 4980 ARM::VLD4DUPq16OddPseudo, 4981 ARM::VLD4DUPq32OddPseudo }; 4982 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4, 4983 DOpcodes, QOpcodes0, QOpcodes1); 4984 return; 4985 } 4986 4987 case Intrinsic::arm_neon_vld2lane: { 4988 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo, 4989 ARM::VLD2LNd16Pseudo, 4990 ARM::VLD2LNd32Pseudo }; 4991 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo, 4992 ARM::VLD2LNq32Pseudo }; 4993 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes); 4994 return; 4995 } 4996 4997 case Intrinsic::arm_neon_vld3lane: { 4998 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo, 4999 ARM::VLD3LNd16Pseudo, 5000 ARM::VLD3LNd32Pseudo }; 5001 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo, 5002 ARM::VLD3LNq32Pseudo }; 5003 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes); 5004 return; 5005 } 5006 5007 case Intrinsic::arm_neon_vld4lane: { 5008 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo, 5009 ARM::VLD4LNd16Pseudo, 5010 ARM::VLD4LNd32Pseudo }; 5011 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo, 5012 ARM::VLD4LNq32Pseudo }; 5013 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes); 5014 return; 5015 } 5016 5017 case Intrinsic::arm_neon_vst1: { 5018 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, 5019 ARM::VST1d32, ARM::VST1d64 }; 5020 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 5021 ARM::VST1q32, ARM::VST1q64 }; 5022 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr); 5023 return; 5024 } 5025 5026 case Intrinsic::arm_neon_vst1x2: { 5027 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 5028 ARM::VST1q32, ARM::VST1q64 }; 5029 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo, 5030 ARM::VST1d16QPseudo, 5031 ARM::VST1d32QPseudo, 5032 ARM::VST1d64QPseudo }; 5033 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 5034 return; 5035 } 5036 5037 case Intrinsic::arm_neon_vst1x3: { 5038 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo, 5039 ARM::VST1d16TPseudo, 5040 ARM::VST1d32TPseudo, 5041 ARM::VST1d64TPseudo }; 5042 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 5043 ARM::VST1q16LowTPseudo_UPD, 5044 ARM::VST1q32LowTPseudo_UPD, 5045 ARM::VST1q64LowTPseudo_UPD }; 5046 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo, 5047 ARM::VST1q16HighTPseudo, 5048 ARM::VST1q32HighTPseudo, 5049 ARM::VST1q64HighTPseudo }; 5050 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 5051 return; 5052 } 5053 5054 case Intrinsic::arm_neon_vst1x4: { 5055 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo, 5056 ARM::VST1d16QPseudo, 5057 ARM::VST1d32QPseudo, 5058 ARM::VST1d64QPseudo }; 5059 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 5060 ARM::VST1q16LowQPseudo_UPD, 5061 ARM::VST1q32LowQPseudo_UPD, 5062 ARM::VST1q64LowQPseudo_UPD }; 5063 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo, 5064 ARM::VST1q16HighQPseudo, 5065 ARM::VST1q32HighQPseudo, 5066 ARM::VST1q64HighQPseudo }; 5067 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 5068 return; 5069 } 5070 5071 case Intrinsic::arm_neon_vst2: { 5072 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, 5073 ARM::VST2d32, ARM::VST1q64 }; 5074 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, 5075 ARM::VST2q32Pseudo }; 5076 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 5077 return; 5078 } 5079 5080 case Intrinsic::arm_neon_vst3: { 5081 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo, 5082 ARM::VST3d16Pseudo, 5083 ARM::VST3d32Pseudo, 5084 ARM::VST1d64TPseudo }; 5085 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 5086 ARM::VST3q16Pseudo_UPD, 5087 ARM::VST3q32Pseudo_UPD }; 5088 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo, 5089 ARM::VST3q16oddPseudo, 5090 ARM::VST3q32oddPseudo }; 5091 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 5092 return; 5093 } 5094 5095 case Intrinsic::arm_neon_vst4: { 5096 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo, 5097 ARM::VST4d16Pseudo, 5098 ARM::VST4d32Pseudo, 5099 ARM::VST1d64QPseudo }; 5100 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 5101 ARM::VST4q16Pseudo_UPD, 5102 ARM::VST4q32Pseudo_UPD }; 5103 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo, 5104 ARM::VST4q16oddPseudo, 5105 ARM::VST4q32oddPseudo }; 5106 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 5107 return; 5108 } 5109 5110 case Intrinsic::arm_neon_vst2lane: { 5111 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo, 5112 ARM::VST2LNd16Pseudo, 5113 ARM::VST2LNd32Pseudo }; 5114 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo, 5115 ARM::VST2LNq32Pseudo }; 5116 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes); 5117 return; 5118 } 5119 5120 case Intrinsic::arm_neon_vst3lane: { 5121 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo, 5122 ARM::VST3LNd16Pseudo, 5123 ARM::VST3LNd32Pseudo }; 5124 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo, 5125 ARM::VST3LNq32Pseudo }; 5126 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes); 5127 return; 5128 } 5129 5130 case Intrinsic::arm_neon_vst4lane: { 5131 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo, 5132 ARM::VST4LNd16Pseudo, 5133 ARM::VST4LNd32Pseudo }; 5134 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo, 5135 ARM::VST4LNq32Pseudo }; 5136 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes); 5137 return; 5138 } 5139 5140 case Intrinsic::arm_mve_vldr_gather_base_wb: 5141 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: { 5142 static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre, 5143 ARM::MVE_VLDRDU64_qi_pre}; 5144 SelectMVE_WB(N, Opcodes, 5145 IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated); 5146 return; 5147 } 5148 5149 case Intrinsic::arm_mve_vld2q: { 5150 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8}; 5151 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 5152 ARM::MVE_VLD21_16}; 5153 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 5154 ARM::MVE_VLD21_32}; 5155 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 5156 SelectMVE_VLD(N, 2, Opcodes, false); 5157 return; 5158 } 5159 5160 case Intrinsic::arm_mve_vld4q: { 5161 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 5162 ARM::MVE_VLD42_8, ARM::MVE_VLD43_8}; 5163 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 5164 ARM::MVE_VLD42_16, 5165 ARM::MVE_VLD43_16}; 5166 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 5167 ARM::MVE_VLD42_32, 5168 ARM::MVE_VLD43_32}; 5169 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 5170 SelectMVE_VLD(N, 4, Opcodes, false); 5171 return; 5172 } 5173 } 5174 break; 5175 } 5176 5177 case ISD::INTRINSIC_WO_CHAIN: { 5178 unsigned IntNo = N->getConstantOperandVal(0); 5179 switch (IntNo) { 5180 default: 5181 break; 5182 5183 // Scalar f32 -> bf16 5184 case Intrinsic::arm_neon_vcvtbfp2bf: { 5185 SDLoc dl(N); 5186 const SDValue &Src = N->getOperand(1); 5187 llvm::EVT DestTy = N->getValueType(0); 5188 SDValue Pred = getAL(CurDAG, dl); 5189 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 5190 SDValue Ops[] = { Src, Src, Pred, Reg0 }; 5191 CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops); 5192 return; 5193 } 5194 5195 // Vector v4f32 -> v4bf16 5196 case Intrinsic::arm_neon_vcvtfp2bf: { 5197 SDLoc dl(N); 5198 const SDValue &Src = N->getOperand(1); 5199 SDValue Pred = getAL(CurDAG, dl); 5200 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 5201 SDValue Ops[] = { Src, Pred, Reg0 }; 5202 CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops); 5203 return; 5204 } 5205 5206 case Intrinsic::arm_mve_urshrl: 5207 SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false); 5208 return; 5209 case Intrinsic::arm_mve_uqshll: 5210 SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false); 5211 return; 5212 case Intrinsic::arm_mve_srshrl: 5213 SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false); 5214 return; 5215 case Intrinsic::arm_mve_sqshll: 5216 SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false); 5217 return; 5218 case Intrinsic::arm_mve_uqrshll: 5219 SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true); 5220 return; 5221 case Intrinsic::arm_mve_sqrshrl: 5222 SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true); 5223 return; 5224 5225 case Intrinsic::arm_mve_vadc: 5226 case Intrinsic::arm_mve_vadc_predicated: 5227 SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true, 5228 IntNo == Intrinsic::arm_mve_vadc_predicated); 5229 return; 5230 case Intrinsic::arm_mve_vsbc: 5231 case Intrinsic::arm_mve_vsbc_predicated: 5232 SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true, 5233 IntNo == Intrinsic::arm_mve_vsbc_predicated); 5234 return; 5235 case Intrinsic::arm_mve_vshlc: 5236 case Intrinsic::arm_mve_vshlc_predicated: 5237 SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated); 5238 return; 5239 5240 case Intrinsic::arm_mve_vmlldava: 5241 case Intrinsic::arm_mve_vmlldava_predicated: { 5242 static const uint16_t OpcodesU[] = { 5243 ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32, 5244 ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32, 5245 }; 5246 static const uint16_t OpcodesS[] = { 5247 ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32, 5248 ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32, 5249 ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32, 5250 ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32, 5251 ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32, 5252 ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32, 5253 ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32, 5254 ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32, 5255 }; 5256 SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated, 5257 OpcodesS, OpcodesU); 5258 return; 5259 } 5260 5261 case Intrinsic::arm_mve_vrmlldavha: 5262 case Intrinsic::arm_mve_vrmlldavha_predicated: { 5263 static const uint16_t OpcodesU[] = { 5264 ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32, 5265 }; 5266 static const uint16_t OpcodesS[] = { 5267 ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32, 5268 ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32, 5269 ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32, 5270 ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32, 5271 }; 5272 SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated, 5273 OpcodesS, OpcodesU); 5274 return; 5275 } 5276 5277 case Intrinsic::arm_mve_vidup: 5278 case Intrinsic::arm_mve_vidup_predicated: { 5279 static const uint16_t Opcodes[] = { 5280 ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32, 5281 }; 5282 SelectMVE_VxDUP(N, Opcodes, false, 5283 IntNo == Intrinsic::arm_mve_vidup_predicated); 5284 return; 5285 } 5286 5287 case Intrinsic::arm_mve_vddup: 5288 case Intrinsic::arm_mve_vddup_predicated: { 5289 static const uint16_t Opcodes[] = { 5290 ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32, 5291 }; 5292 SelectMVE_VxDUP(N, Opcodes, false, 5293 IntNo == Intrinsic::arm_mve_vddup_predicated); 5294 return; 5295 } 5296 5297 case Intrinsic::arm_mve_viwdup: 5298 case Intrinsic::arm_mve_viwdup_predicated: { 5299 static const uint16_t Opcodes[] = { 5300 ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32, 5301 }; 5302 SelectMVE_VxDUP(N, Opcodes, true, 5303 IntNo == Intrinsic::arm_mve_viwdup_predicated); 5304 return; 5305 } 5306 5307 case Intrinsic::arm_mve_vdwdup: 5308 case Intrinsic::arm_mve_vdwdup_predicated: { 5309 static const uint16_t Opcodes[] = { 5310 ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32, 5311 }; 5312 SelectMVE_VxDUP(N, Opcodes, true, 5313 IntNo == Intrinsic::arm_mve_vdwdup_predicated); 5314 return; 5315 } 5316 5317 case Intrinsic::arm_cde_cx1d: 5318 case Intrinsic::arm_cde_cx1da: 5319 case Intrinsic::arm_cde_cx2d: 5320 case Intrinsic::arm_cde_cx2da: 5321 case Intrinsic::arm_cde_cx3d: 5322 case Intrinsic::arm_cde_cx3da: { 5323 bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da || 5324 IntNo == Intrinsic::arm_cde_cx2da || 5325 IntNo == Intrinsic::arm_cde_cx3da; 5326 size_t NumExtraOps; 5327 uint16_t Opcode; 5328 switch (IntNo) { 5329 case Intrinsic::arm_cde_cx1d: 5330 case Intrinsic::arm_cde_cx1da: 5331 NumExtraOps = 0; 5332 Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D; 5333 break; 5334 case Intrinsic::arm_cde_cx2d: 5335 case Intrinsic::arm_cde_cx2da: 5336 NumExtraOps = 1; 5337 Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D; 5338 break; 5339 case Intrinsic::arm_cde_cx3d: 5340 case Intrinsic::arm_cde_cx3da: 5341 NumExtraOps = 2; 5342 Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D; 5343 break; 5344 default: 5345 llvm_unreachable("Unexpected opcode"); 5346 } 5347 SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum); 5348 return; 5349 } 5350 } 5351 break; 5352 } 5353 5354 case ISD::ATOMIC_CMP_SWAP: 5355 SelectCMP_SWAP(N); 5356 return; 5357 } 5358 5359 SelectCode(N); 5360 } 5361 5362 // Inspect a register string of the form 5363 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or 5364 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string 5365 // and obtain the integer operands from them, adding these operands to the 5366 // provided vector. 5367 static void getIntOperandsFromRegisterString(StringRef RegString, 5368 SelectionDAG *CurDAG, 5369 const SDLoc &DL, 5370 std::vector<SDValue> &Ops) { 5371 SmallVector<StringRef, 5> Fields; 5372 RegString.split(Fields, ':'); 5373 5374 if (Fields.size() > 1) { 5375 bool AllIntFields = true; 5376 5377 for (StringRef Field : Fields) { 5378 // Need to trim out leading 'cp' characters and get the integer field. 5379 unsigned IntField; 5380 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField); 5381 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32)); 5382 } 5383 5384 assert(AllIntFields && 5385 "Unexpected non-integer value in special register string."); 5386 (void)AllIntFields; 5387 } 5388 } 5389 5390 // Maps a Banked Register string to its mask value. The mask value returned is 5391 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register 5392 // mask operand, which expresses which register is to be used, e.g. r8, and in 5393 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string 5394 // was invalid. 5395 static inline int getBankedRegisterMask(StringRef RegString) { 5396 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower()); 5397 if (!TheReg) 5398 return -1; 5399 return TheReg->Encoding; 5400 } 5401 5402 // The flags here are common to those allowed for apsr in the A class cores and 5403 // those allowed for the special registers in the M class cores. Returns a 5404 // value representing which flags were present, -1 if invalid. 5405 static inline int getMClassFlagsMask(StringRef Flags) { 5406 return StringSwitch<int>(Flags) 5407 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is 5408 // correct when flags are not permitted 5409 .Case("g", 0x1) 5410 .Case("nzcvq", 0x2) 5411 .Case("nzcvqg", 0x3) 5412 .Default(-1); 5413 } 5414 5415 // Maps MClass special registers string to its value for use in the 5416 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand. 5417 // Returns -1 to signify that the string was invalid. 5418 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) { 5419 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg); 5420 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits(); 5421 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits)) 5422 return -1; 5423 return (int)(TheReg->Encoding & 0xFFF); // SYSm value 5424 } 5425 5426 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { 5427 // The mask operand contains the special register (R Bit) in bit 4, whether 5428 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and 5429 // bits 3-0 contains the fields to be accessed in the special register, set by 5430 // the flags provided with the register. 5431 int Mask = 0; 5432 if (Reg == "apsr") { 5433 // The flags permitted for apsr are the same flags that are allowed in 5434 // M class registers. We get the flag value and then shift the flags into 5435 // the correct place to combine with the mask. 5436 Mask = getMClassFlagsMask(Flags); 5437 if (Mask == -1) 5438 return -1; 5439 return Mask << 2; 5440 } 5441 5442 if (Reg != "cpsr" && Reg != "spsr") { 5443 return -1; 5444 } 5445 5446 // This is the same as if the flags were "fc" 5447 if (Flags.empty() || Flags == "all") 5448 return Mask | 0x9; 5449 5450 // Inspect the supplied flags string and set the bits in the mask for 5451 // the relevant and valid flags allowed for cpsr and spsr. 5452 for (char Flag : Flags) { 5453 int FlagVal; 5454 switch (Flag) { 5455 case 'c': 5456 FlagVal = 0x1; 5457 break; 5458 case 'x': 5459 FlagVal = 0x2; 5460 break; 5461 case 's': 5462 FlagVal = 0x4; 5463 break; 5464 case 'f': 5465 FlagVal = 0x8; 5466 break; 5467 default: 5468 FlagVal = 0; 5469 } 5470 5471 // This avoids allowing strings where the same flag bit appears twice. 5472 if (!FlagVal || (Mask & FlagVal)) 5473 return -1; 5474 Mask |= FlagVal; 5475 } 5476 5477 // If the register is spsr then we need to set the R bit. 5478 if (Reg == "spsr") 5479 Mask |= 0x10; 5480 5481 return Mask; 5482 } 5483 5484 // Lower the read_register intrinsic to ARM specific DAG nodes 5485 // using the supplied metadata string to select the instruction node to use 5486 // and the registers/masks to construct as operands for the node. 5487 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){ 5488 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 5489 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 5490 bool IsThumb2 = Subtarget->isThumb2(); 5491 SDLoc DL(N); 5492 5493 std::vector<SDValue> Ops; 5494 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 5495 5496 if (!Ops.empty()) { 5497 // If the special register string was constructed of fields (as defined 5498 // in the ACLE) then need to lower to MRC node (32 bit) or 5499 // MRRC node(64 bit), we can make the distinction based on the number of 5500 // operands we have. 5501 unsigned Opcode; 5502 SmallVector<EVT, 3> ResTypes; 5503 if (Ops.size() == 5){ 5504 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC; 5505 ResTypes.append({ MVT::i32, MVT::Other }); 5506 } else { 5507 assert(Ops.size() == 3 && 5508 "Invalid number of fields in special register string."); 5509 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC; 5510 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other }); 5511 } 5512 5513 Ops.push_back(getAL(CurDAG, DL)); 5514 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 5515 Ops.push_back(N->getOperand(0)); 5516 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops)); 5517 return true; 5518 } 5519 5520 std::string SpecialReg = RegString->getString().lower(); 5521 5522 int BankedReg = getBankedRegisterMask(SpecialReg); 5523 if (BankedReg != -1) { 5524 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), 5525 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5526 N->getOperand(0) }; 5527 ReplaceNode( 5528 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked, 5529 DL, MVT::i32, MVT::Other, Ops)); 5530 return true; 5531 } 5532 5533 // The VFP registers are read by creating SelectionDAG nodes with opcodes 5534 // corresponding to the register that is being read from. So we switch on the 5535 // string to find which opcode we need to use. 5536 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 5537 .Case("fpscr", ARM::VMRS) 5538 .Case("fpexc", ARM::VMRS_FPEXC) 5539 .Case("fpsid", ARM::VMRS_FPSID) 5540 .Case("mvfr0", ARM::VMRS_MVFR0) 5541 .Case("mvfr1", ARM::VMRS_MVFR1) 5542 .Case("mvfr2", ARM::VMRS_MVFR2) 5543 .Case("fpinst", ARM::VMRS_FPINST) 5544 .Case("fpinst2", ARM::VMRS_FPINST2) 5545 .Default(0); 5546 5547 // If an opcode was found then we can lower the read to a VFP instruction. 5548 if (Opcode) { 5549 if (!Subtarget->hasVFP2Base()) 5550 return false; 5551 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base()) 5552 return false; 5553 5554 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5555 N->getOperand(0) }; 5556 ReplaceNode(N, 5557 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops)); 5558 return true; 5559 } 5560 5561 // If the target is M Class then need to validate that the register string 5562 // is an acceptable value, so check that a mask can be constructed from the 5563 // string. 5564 if (Subtarget->isMClass()) { 5565 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 5566 if (SYSmValue == -1) 5567 return false; 5568 5569 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 5570 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5571 N->getOperand(0) }; 5572 ReplaceNode( 5573 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops)); 5574 return true; 5575 } 5576 5577 // Here we know the target is not M Class so we need to check if it is one 5578 // of the remaining possible values which are apsr, cpsr or spsr. 5579 if (SpecialReg == "apsr" || SpecialReg == "cpsr") { 5580 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5581 N->getOperand(0) }; 5582 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, 5583 DL, MVT::i32, MVT::Other, Ops)); 5584 return true; 5585 } 5586 5587 if (SpecialReg == "spsr") { 5588 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5589 N->getOperand(0) }; 5590 ReplaceNode( 5591 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL, 5592 MVT::i32, MVT::Other, Ops)); 5593 return true; 5594 } 5595 5596 return false; 5597 } 5598 5599 // Lower the write_register intrinsic to ARM specific DAG nodes 5600 // using the supplied metadata string to select the instruction node to use 5601 // and the registers/masks to use in the nodes 5602 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){ 5603 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1)); 5604 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0)); 5605 bool IsThumb2 = Subtarget->isThumb2(); 5606 SDLoc DL(N); 5607 5608 std::vector<SDValue> Ops; 5609 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 5610 5611 if (!Ops.empty()) { 5612 // If the special register string was constructed of fields (as defined 5613 // in the ACLE) then need to lower to MCR node (32 bit) or 5614 // MCRR node(64 bit), we can make the distinction based on the number of 5615 // operands we have. 5616 unsigned Opcode; 5617 if (Ops.size() == 5) { 5618 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR; 5619 Ops.insert(Ops.begin()+2, N->getOperand(2)); 5620 } else { 5621 assert(Ops.size() == 3 && 5622 "Invalid number of fields in special register string."); 5623 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR; 5624 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) }; 5625 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2); 5626 } 5627 5628 Ops.push_back(getAL(CurDAG, DL)); 5629 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 5630 Ops.push_back(N->getOperand(0)); 5631 5632 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 5633 return true; 5634 } 5635 5636 std::string SpecialReg = RegString->getString().lower(); 5637 int BankedReg = getBankedRegisterMask(SpecialReg); 5638 if (BankedReg != -1) { 5639 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2), 5640 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5641 N->getOperand(0) }; 5642 ReplaceNode( 5643 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked, 5644 DL, MVT::Other, Ops)); 5645 return true; 5646 } 5647 5648 // The VFP registers are written to by creating SelectionDAG nodes with 5649 // opcodes corresponding to the register that is being written. So we switch 5650 // on the string to find which opcode we need to use. 5651 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 5652 .Case("fpscr", ARM::VMSR) 5653 .Case("fpexc", ARM::VMSR_FPEXC) 5654 .Case("fpsid", ARM::VMSR_FPSID) 5655 .Case("fpinst", ARM::VMSR_FPINST) 5656 .Case("fpinst2", ARM::VMSR_FPINST2) 5657 .Default(0); 5658 5659 if (Opcode) { 5660 if (!Subtarget->hasVFP2Base()) 5661 return false; 5662 Ops = { N->getOperand(2), getAL(CurDAG, DL), 5663 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 5664 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 5665 return true; 5666 } 5667 5668 std::pair<StringRef, StringRef> Fields; 5669 Fields = StringRef(SpecialReg).rsplit('_'); 5670 std::string Reg = Fields.first.str(); 5671 StringRef Flags = Fields.second; 5672 5673 // If the target was M Class then need to validate the special register value 5674 // and retrieve the mask for use in the instruction node. 5675 if (Subtarget->isMClass()) { 5676 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 5677 if (SYSmValue == -1) 5678 return false; 5679 5680 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 5681 N->getOperand(2), getAL(CurDAG, DL), 5682 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 5683 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops)); 5684 return true; 5685 } 5686 5687 // We then check to see if a valid mask can be constructed for one of the 5688 // register string values permitted for the A and R class cores. These values 5689 // are apsr, spsr and cpsr; these are also valid on older cores. 5690 int Mask = getARClassRegisterMask(Reg, Flags); 5691 if (Mask != -1) { 5692 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2), 5693 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5694 N->getOperand(0) }; 5695 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR, 5696 DL, MVT::Other, Ops)); 5697 return true; 5698 } 5699 5700 return false; 5701 } 5702 5703 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){ 5704 std::vector<SDValue> AsmNodeOperands; 5705 InlineAsm::Flag Flag; 5706 bool Changed = false; 5707 unsigned NumOps = N->getNumOperands(); 5708 5709 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. 5710 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require 5711 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs 5712 // respectively. Since there is no constraint to explicitly specify a 5713 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb, 5714 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack 5715 // them into a GPRPair. 5716 5717 SDLoc dl(N); 5718 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps - 1) : SDValue(); 5719 5720 SmallVector<bool, 8> OpChanged; 5721 // Glue node will be appended late. 5722 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { 5723 SDValue op = N->getOperand(i); 5724 AsmNodeOperands.push_back(op); 5725 5726 if (i < InlineAsm::Op_FirstOperand) 5727 continue; 5728 5729 if (const auto *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) 5730 Flag = InlineAsm::Flag(C->getZExtValue()); 5731 else 5732 continue; 5733 5734 // Immediate operands to inline asm in the SelectionDAG are modeled with 5735 // two operands. The first is a constant of value InlineAsm::Kind::Imm, and 5736 // the second is a constant with the value of the immediate. If we get here 5737 // and we have a Kind::Imm, skip the next operand, and continue. 5738 if (Flag.isImmKind()) { 5739 SDValue op = N->getOperand(++i); 5740 AsmNodeOperands.push_back(op); 5741 continue; 5742 } 5743 5744 const unsigned NumRegs = Flag.getNumOperandRegisters(); 5745 if (NumRegs) 5746 OpChanged.push_back(false); 5747 5748 unsigned DefIdx = 0; 5749 bool IsTiedToChangedOp = false; 5750 // If it's a use that is tied with a previous def, it has no 5751 // reg class constraint. 5752 if (Changed && Flag.isUseOperandTiedToDef(DefIdx)) 5753 IsTiedToChangedOp = OpChanged[DefIdx]; 5754 5755 // Memory operands to inline asm in the SelectionDAG are modeled with two 5756 // operands: a constant of value InlineAsm::Kind::Mem followed by the input 5757 // operand. If we get here and we have a Kind::Mem, skip the next operand 5758 // (so it doesn't get misinterpreted), and continue. We do this here because 5759 // it's important to update the OpChanged array correctly before moving on. 5760 if (Flag.isMemKind()) { 5761 SDValue op = N->getOperand(++i); 5762 AsmNodeOperands.push_back(op); 5763 continue; 5764 } 5765 5766 if (!Flag.isRegUseKind() && !Flag.isRegDefKind() && 5767 !Flag.isRegDefEarlyClobberKind()) 5768 continue; 5769 5770 unsigned RC; 5771 const bool HasRC = Flag.hasRegClassConstraint(RC); 5772 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID)) 5773 || NumRegs != 2) 5774 continue; 5775 5776 assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); 5777 SDValue V0 = N->getOperand(i+1); 5778 SDValue V1 = N->getOperand(i+2); 5779 Register Reg0 = cast<RegisterSDNode>(V0)->getReg(); 5780 Register Reg1 = cast<RegisterSDNode>(V1)->getReg(); 5781 SDValue PairedReg; 5782 MachineRegisterInfo &MRI = MF->getRegInfo(); 5783 5784 if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) { 5785 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to 5786 // the original GPRs. 5787 5788 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5789 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5790 SDValue Chain = SDValue(N,0); 5791 5792 SDNode *GU = N->getGluedUser(); 5793 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, 5794 Chain.getValue(1)); 5795 5796 // Extract values from a GPRPair reg and copy to the original GPR reg. 5797 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 5798 RegCopy); 5799 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 5800 RegCopy); 5801 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, 5802 RegCopy.getValue(1)); 5803 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); 5804 5805 // Update the original glue user. 5806 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); 5807 Ops.push_back(T1.getValue(1)); 5808 CurDAG->UpdateNodeOperands(GU, Ops); 5809 } else { 5810 // For Kind == InlineAsm::Kind::RegUse, we first copy two GPRs into a 5811 // GPRPair and then pass the GPRPair to the inline asm. 5812 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; 5813 5814 // As REG_SEQ doesn't take RegisterSDNode, we copy them first. 5815 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, 5816 Chain.getValue(1)); 5817 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, 5818 T0.getValue(1)); 5819 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); 5820 5821 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two 5822 // i32 VRs of inline asm with it. 5823 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5824 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5825 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); 5826 5827 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; 5828 Glue = Chain.getValue(1); 5829 } 5830 5831 Changed = true; 5832 5833 if(PairedReg.getNode()) { 5834 OpChanged[OpChanged.size() -1 ] = true; 5835 Flag = InlineAsm::Flag(Flag.getKind(), 1 /* RegNum*/); 5836 if (IsTiedToChangedOp) 5837 Flag.setMatchingOp(DefIdx); 5838 else 5839 Flag.setRegClass(ARM::GPRPairRegClassID); 5840 // Replace the current flag. 5841 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( 5842 Flag, dl, MVT::i32); 5843 // Add the new register node and skip the original two GPRs. 5844 AsmNodeOperands.push_back(PairedReg); 5845 // Skip the next two GPRs. 5846 i += 2; 5847 } 5848 } 5849 5850 if (Glue.getNode()) 5851 AsmNodeOperands.push_back(Glue); 5852 if (!Changed) 5853 return false; 5854 5855 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N), 5856 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); 5857 New->setNodeId(-1); 5858 ReplaceNode(N, New.getNode()); 5859 return true; 5860 } 5861 5862 bool ARMDAGToDAGISel::SelectInlineAsmMemoryOperand( 5863 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, 5864 std::vector<SDValue> &OutOps) { 5865 switch(ConstraintID) { 5866 default: 5867 llvm_unreachable("Unexpected asm memory constraint"); 5868 case InlineAsm::ConstraintCode::m: 5869 case InlineAsm::ConstraintCode::o: 5870 case InlineAsm::ConstraintCode::Q: 5871 case InlineAsm::ConstraintCode::Um: 5872 case InlineAsm::ConstraintCode::Un: 5873 case InlineAsm::ConstraintCode::Uq: 5874 case InlineAsm::ConstraintCode::Us: 5875 case InlineAsm::ConstraintCode::Ut: 5876 case InlineAsm::ConstraintCode::Uv: 5877 case InlineAsm::ConstraintCode::Uy: 5878 // Require the address to be in a register. That is safe for all ARM 5879 // variants and it is hard to do anything much smarter without knowing 5880 // how the operand is used. 5881 OutOps.push_back(Op); 5882 return false; 5883 } 5884 return true; 5885 } 5886 5887 /// createARMISelDag - This pass converts a legalized DAG into a 5888 /// ARM-specific DAG, ready for instruction scheduling. 5889 /// 5890 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, 5891 CodeGenOptLevel OptLevel) { 5892 return new ARMDAGToDAGISelLegacy(TM, OptLevel); 5893 } 5894