1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the ARM target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "ARM.h" 14 #include "ARMBaseInstrInfo.h" 15 #include "ARMTargetMachine.h" 16 #include "MCTargetDesc/ARMAddressingModes.h" 17 #include "Utils/ARMBaseInfo.h" 18 #include "llvm/ADT/StringSwitch.h" 19 #include "llvm/CodeGen/MachineFrameInfo.h" 20 #include "llvm/CodeGen/MachineFunction.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/SelectionDAG.h" 24 #include "llvm/CodeGen/SelectionDAGISel.h" 25 #include "llvm/CodeGen/TargetLowering.h" 26 #include "llvm/IR/CallingConv.h" 27 #include "llvm/IR/Constants.h" 28 #include "llvm/IR/DerivedTypes.h" 29 #include "llvm/IR/Function.h" 30 #include "llvm/IR/Intrinsics.h" 31 #include "llvm/IR/LLVMContext.h" 32 #include "llvm/Support/CommandLine.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Target/TargetOptions.h" 36 37 using namespace llvm; 38 39 #define DEBUG_TYPE "arm-isel" 40 41 static cl::opt<bool> 42 DisableShifterOp("disable-shifter-op", cl::Hidden, 43 cl::desc("Disable isel of shifter-op"), 44 cl::init(false)); 45 46 //===--------------------------------------------------------------------===// 47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine 48 /// instructions for SelectionDAG operations. 49 /// 50 namespace { 51 52 class ARMDAGToDAGISel : public SelectionDAGISel { 53 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 54 /// make the right decision when generating code for different targets. 55 const ARMSubtarget *Subtarget; 56 57 public: 58 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel) 59 : SelectionDAGISel(tm, OptLevel) {} 60 61 bool runOnMachineFunction(MachineFunction &MF) override { 62 // Reset the subtarget each time through. 63 Subtarget = &MF.getSubtarget<ARMSubtarget>(); 64 SelectionDAGISel::runOnMachineFunction(MF); 65 return true; 66 } 67 68 StringRef getPassName() const override { return "ARM Instruction Selection"; } 69 70 void PreprocessISelDAG() override; 71 72 /// getI32Imm - Return a target constant of type i32 with the specified 73 /// value. 74 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 75 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 76 } 77 78 void Select(SDNode *N) override; 79 80 bool hasNoVMLxHazardUse(SDNode *N) const; 81 bool isShifterOpProfitable(const SDValue &Shift, 82 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); 83 bool SelectRegShifterOperand(SDValue N, SDValue &A, 84 SDValue &B, SDValue &C, 85 bool CheckProfitability = true); 86 bool SelectImmShifterOperand(SDValue N, SDValue &A, 87 SDValue &B, bool CheckProfitability = true); 88 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, 89 SDValue &B, SDValue &C) { 90 // Don't apply the profitability check 91 return SelectRegShifterOperand(N, A, B, C, false); 92 } 93 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, 94 SDValue &B) { 95 // Don't apply the profitability check 96 return SelectImmShifterOperand(N, A, B, false); 97 } 98 99 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out); 100 101 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 102 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); 103 104 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) { 105 const ConstantSDNode *CN = cast<ConstantSDNode>(N); 106 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32); 107 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32); 108 return true; 109 } 110 111 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 112 SDValue &Offset, SDValue &Opc); 113 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 114 SDValue &Offset, SDValue &Opc); 115 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 116 SDValue &Offset, SDValue &Opc); 117 bool SelectAddrOffsetNone(SDValue N, SDValue &Base); 118 bool SelectAddrMode3(SDValue N, SDValue &Base, 119 SDValue &Offset, SDValue &Opc); 120 bool SelectAddrMode3Offset(SDNode *Op, SDValue N, 121 SDValue &Offset, SDValue &Opc); 122 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16); 123 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset); 124 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset); 125 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); 126 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); 127 128 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); 129 130 // Thumb Addressing Modes: 131 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); 132 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset); 133 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, 134 SDValue &OffImm); 135 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 136 SDValue &OffImm); 137 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 138 SDValue &OffImm); 139 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 140 SDValue &OffImm); 141 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); 142 template <unsigned Shift> 143 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 144 145 // Thumb 2 Addressing Modes: 146 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 147 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, 148 SDValue &OffImm); 149 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 150 SDValue &OffImm); 151 template <unsigned Shift> 152 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm); 153 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm, 154 unsigned Shift); 155 template <unsigned Shift> 156 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 157 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, 158 SDValue &OffReg, SDValue &ShImm); 159 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); 160 161 inline bool is_so_imm(unsigned Imm) const { 162 return ARM_AM::getSOImmVal(Imm) != -1; 163 } 164 165 inline bool is_so_imm_not(unsigned Imm) const { 166 return ARM_AM::getSOImmVal(~Imm) != -1; 167 } 168 169 inline bool is_t2_so_imm(unsigned Imm) const { 170 return ARM_AM::getT2SOImmVal(Imm) != -1; 171 } 172 173 inline bool is_t2_so_imm_not(unsigned Imm) const { 174 return ARM_AM::getT2SOImmVal(~Imm) != -1; 175 } 176 177 // Include the pieces autogenerated from the target description. 178 #include "ARMGenDAGISel.inc" 179 180 private: 181 void transferMemOperands(SDNode *Src, SDNode *Dst); 182 183 /// Indexed (pre/post inc/dec) load matching code for ARM. 184 bool tryARMIndexedLoad(SDNode *N); 185 bool tryT1IndexedLoad(SDNode *N); 186 bool tryT2IndexedLoad(SDNode *N); 187 bool tryMVEIndexedLoad(SDNode *N); 188 189 /// SelectVLD - Select NEON load intrinsics. NumVecs should be 190 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for 191 /// loads of D registers and even subregs and odd subregs of Q registers. 192 /// For NumVecs <= 2, QOpcodes1 is not used. 193 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 194 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 195 const uint16_t *QOpcodes1); 196 197 /// SelectVST - Select NEON store intrinsics. NumVecs should 198 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for 199 /// stores of D registers and even subregs and odd subregs of Q registers. 200 /// For NumVecs <= 2, QOpcodes1 is not used. 201 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 202 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 203 const uint16_t *QOpcodes1); 204 205 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should 206 /// be 2, 3 or 4. The opcode arrays specify the instructions used for 207 /// load/store of D registers and Q registers. 208 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 209 unsigned NumVecs, const uint16_t *DOpcodes, 210 const uint16_t *QOpcodes); 211 212 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs 213 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used 214 /// for loading D registers. 215 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating, 216 unsigned NumVecs, const uint16_t *DOpcodes, 217 const uint16_t *QOpcodes0 = nullptr, 218 const uint16_t *QOpcodes1 = nullptr); 219 220 /// Try to select SBFX/UBFX instructions for ARM. 221 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned); 222 223 // Select special operations if node forms integer ABS pattern 224 bool tryABSOp(SDNode *N); 225 226 bool tryReadRegister(SDNode *N); 227 bool tryWriteRegister(SDNode *N); 228 229 bool tryInlineAsm(SDNode *N); 230 231 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI); 232 233 void SelectCMP_SWAP(SDNode *N); 234 235 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 236 /// inline asm expressions. 237 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 238 std::vector<SDValue> &OutOps) override; 239 240 // Form pairs of consecutive R, S, D, or Q registers. 241 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); 242 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1); 243 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1); 244 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1); 245 246 // Form sequences of 4 consecutive S, D, or Q registers. 247 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 248 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 249 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 250 251 // Get the alignment operand for a NEON VLD or VST instruction. 252 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs, 253 bool is64BitVector); 254 255 /// Checks if N is a multiplication by a constant where we can extract out a 256 /// power of two from the constant so that it can be used in a shift, but only 257 /// if it simplifies the materialization of the constant. Returns true if it 258 /// is, and assigns to PowerOfTwo the power of two that should be extracted 259 /// out and to NewMulConst the new constant to be multiplied by. 260 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift, 261 unsigned &PowerOfTwo, SDValue &NewMulConst) const; 262 263 /// Replace N with M in CurDAG, in a way that also ensures that M gets 264 /// selected when N would have been selected. 265 void replaceDAGValue(const SDValue &N, SDValue M); 266 }; 267 } 268 269 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 270 /// operand. If so Imm will receive the 32-bit value. 271 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 272 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 273 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 274 return true; 275 } 276 return false; 277 } 278 279 // isInt32Immediate - This method tests to see if a constant operand. 280 // If so Imm will receive the 32 bit value. 281 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 282 return isInt32Immediate(N.getNode(), Imm); 283 } 284 285 // isOpcWithIntImmediate - This method tests to see if the node is a specific 286 // opcode and that it has a immediate integer right operand. 287 // If so Imm will receive the 32 bit value. 288 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 289 return N->getOpcode() == Opc && 290 isInt32Immediate(N->getOperand(1).getNode(), Imm); 291 } 292 293 /// Check whether a particular node is a constant value representable as 294 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax). 295 /// 296 /// \param ScaledConstant [out] - On success, the pre-scaled constant value. 297 static bool isScaledConstantInRange(SDValue Node, int Scale, 298 int RangeMin, int RangeMax, 299 int &ScaledConstant) { 300 assert(Scale > 0 && "Invalid scale!"); 301 302 // Check that this is a constant. 303 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node); 304 if (!C) 305 return false; 306 307 ScaledConstant = (int) C->getZExtValue(); 308 if ((ScaledConstant % Scale) != 0) 309 return false; 310 311 ScaledConstant /= Scale; 312 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; 313 } 314 315 void ARMDAGToDAGISel::PreprocessISelDAG() { 316 if (!Subtarget->hasV6T2Ops()) 317 return; 318 319 bool isThumb2 = Subtarget->isThumb(); 320 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 321 E = CurDAG->allnodes_end(); I != E; ) { 322 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. 323 324 if (N->getOpcode() != ISD::ADD) 325 continue; 326 327 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with 328 // leading zeros, followed by consecutive set bits, followed by 1 or 2 329 // trailing zeros, e.g. 1020. 330 // Transform the expression to 331 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number 332 // of trailing zeros of c2. The left shift would be folded as an shifter 333 // operand of 'add' and the 'and' and 'srl' would become a bits extraction 334 // node (UBFX). 335 336 SDValue N0 = N->getOperand(0); 337 SDValue N1 = N->getOperand(1); 338 unsigned And_imm = 0; 339 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { 340 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) 341 std::swap(N0, N1); 342 } 343 if (!And_imm) 344 continue; 345 346 // Check if the AND mask is an immediate of the form: 000.....1111111100 347 unsigned TZ = countTrailingZeros(And_imm); 348 if (TZ != 1 && TZ != 2) 349 // Be conservative here. Shifter operands aren't always free. e.g. On 350 // Swift, left shifter operand of 1 / 2 for free but others are not. 351 // e.g. 352 // ubfx r3, r1, #16, #8 353 // ldr.w r3, [r0, r3, lsl #2] 354 // vs. 355 // mov.w r9, #1020 356 // and.w r2, r9, r1, lsr #14 357 // ldr r2, [r0, r2] 358 continue; 359 And_imm >>= TZ; 360 if (And_imm & (And_imm + 1)) 361 continue; 362 363 // Look for (and (srl X, c1), c2). 364 SDValue Srl = N1.getOperand(0); 365 unsigned Srl_imm = 0; 366 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || 367 (Srl_imm <= 2)) 368 continue; 369 370 // Make sure first operand is not a shifter operand which would prevent 371 // folding of the left shift. 372 SDValue CPTmp0; 373 SDValue CPTmp1; 374 SDValue CPTmp2; 375 if (isThumb2) { 376 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1)) 377 continue; 378 } else { 379 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || 380 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) 381 continue; 382 } 383 384 // Now make the transformation. 385 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32, 386 Srl.getOperand(0), 387 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl), 388 MVT::i32)); 389 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32, 390 Srl, 391 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32)); 392 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, 393 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32)); 394 CurDAG->UpdateNodeOperands(N, N0, N1); 395 } 396 } 397 398 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS 399 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at 400 /// least on current ARM implementations) which should be avoidded. 401 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { 402 if (OptLevel == CodeGenOpt::None) 403 return true; 404 405 if (!Subtarget->hasVMLxHazards()) 406 return true; 407 408 if (!N->hasOneUse()) 409 return false; 410 411 SDNode *Use = *N->use_begin(); 412 if (Use->getOpcode() == ISD::CopyToReg) 413 return true; 414 if (Use->isMachineOpcode()) { 415 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( 416 CurDAG->getSubtarget().getInstrInfo()); 417 418 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); 419 if (MCID.mayStore()) 420 return true; 421 unsigned Opcode = MCID.getOpcode(); 422 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) 423 return true; 424 // vmlx feeding into another vmlx. We actually want to unfold 425 // the use later in the MLxExpansion pass. e.g. 426 // vmla 427 // vmla (stall 8 cycles) 428 // 429 // vmul (5 cycles) 430 // vadd (5 cycles) 431 // vmla 432 // This adds up to about 18 - 19 cycles. 433 // 434 // vmla 435 // vmul (stall 4 cycles) 436 // vadd adds up to about 14 cycles. 437 return TII->isFpMLxInstruction(Opcode); 438 } 439 440 return false; 441 } 442 443 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, 444 ARM_AM::ShiftOpc ShOpcVal, 445 unsigned ShAmt) { 446 if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) 447 return true; 448 if (Shift.hasOneUse()) 449 return true; 450 // R << 2 is free. 451 return ShOpcVal == ARM_AM::lsl && 452 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); 453 } 454 455 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, 456 unsigned MaxShift, 457 unsigned &PowerOfTwo, 458 SDValue &NewMulConst) const { 459 assert(N.getOpcode() == ISD::MUL); 460 assert(MaxShift > 0); 461 462 // If the multiply is used in more than one place then changing the constant 463 // will make other uses incorrect, so don't. 464 if (!N.hasOneUse()) return false; 465 // Check if the multiply is by a constant 466 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1)); 467 if (!MulConst) return false; 468 // If the constant is used in more than one place then modifying it will mean 469 // we need to materialize two constants instead of one, which is a bad idea. 470 if (!MulConst->hasOneUse()) return false; 471 unsigned MulConstVal = MulConst->getZExtValue(); 472 if (MulConstVal == 0) return false; 473 474 // Find the largest power of 2 that MulConstVal is a multiple of 475 PowerOfTwo = MaxShift; 476 while ((MulConstVal % (1 << PowerOfTwo)) != 0) { 477 --PowerOfTwo; 478 if (PowerOfTwo == 0) return false; 479 } 480 481 // Only optimise if the new cost is better 482 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo); 483 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32); 484 unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget); 485 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget); 486 return NewCost < OldCost; 487 } 488 489 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) { 490 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode()); 491 ReplaceUses(N, M); 492 } 493 494 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, 495 SDValue &BaseReg, 496 SDValue &Opc, 497 bool CheckProfitability) { 498 if (DisableShifterOp) 499 return false; 500 501 // If N is a multiply-by-constant and it's profitable to extract a shift and 502 // use it in a shifted operand do so. 503 if (N.getOpcode() == ISD::MUL) { 504 unsigned PowerOfTwo = 0; 505 SDValue NewMulConst; 506 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { 507 HandleSDNode Handle(N); 508 SDLoc Loc(N); 509 replaceDAGValue(N.getOperand(1), NewMulConst); 510 BaseReg = Handle.getValue(); 511 Opc = CurDAG->getTargetConstant( 512 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32); 513 return true; 514 } 515 } 516 517 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 518 519 // Don't match base register only case. That is matched to a separate 520 // lower complexity pattern with explicit register operand. 521 if (ShOpcVal == ARM_AM::no_shift) return false; 522 523 BaseReg = N.getOperand(0); 524 unsigned ShImmVal = 0; 525 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 526 if (!RHS) return false; 527 ShImmVal = RHS->getZExtValue() & 31; 528 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 529 SDLoc(N), MVT::i32); 530 return true; 531 } 532 533 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, 534 SDValue &BaseReg, 535 SDValue &ShReg, 536 SDValue &Opc, 537 bool CheckProfitability) { 538 if (DisableShifterOp) 539 return false; 540 541 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 542 543 // Don't match base register only case. That is matched to a separate 544 // lower complexity pattern with explicit register operand. 545 if (ShOpcVal == ARM_AM::no_shift) return false; 546 547 BaseReg = N.getOperand(0); 548 unsigned ShImmVal = 0; 549 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 550 if (RHS) return false; 551 552 ShReg = N.getOperand(1); 553 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) 554 return false; 555 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 556 SDLoc(N), MVT::i32); 557 return true; 558 } 559 560 // Determine whether an ISD::OR's operands are suitable to turn the operation 561 // into an addition, which often has more compact encodings. 562 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) { 563 assert(Parent->getOpcode() == ISD::OR && "unexpected parent"); 564 Out = N; 565 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1)); 566 } 567 568 569 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, 570 SDValue &Base, 571 SDValue &OffImm) { 572 // Match simple R + imm12 operands. 573 574 // Base only. 575 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 576 !CurDAG->isBaseWithConstantOffset(N)) { 577 if (N.getOpcode() == ISD::FrameIndex) { 578 // Match frame index. 579 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 580 Base = CurDAG->getTargetFrameIndex( 581 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 582 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 583 return true; 584 } 585 586 if (N.getOpcode() == ARMISD::Wrapper && 587 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 588 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 589 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 590 Base = N.getOperand(0); 591 } else 592 Base = N; 593 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 594 return true; 595 } 596 597 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 598 int RHSC = (int)RHS->getSExtValue(); 599 if (N.getOpcode() == ISD::SUB) 600 RHSC = -RHSC; 601 602 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits 603 Base = N.getOperand(0); 604 if (Base.getOpcode() == ISD::FrameIndex) { 605 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 606 Base = CurDAG->getTargetFrameIndex( 607 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 608 } 609 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 610 return true; 611 } 612 } 613 614 // Base only. 615 Base = N; 616 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 617 return true; 618 } 619 620 621 622 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, 623 SDValue &Opc) { 624 if (N.getOpcode() == ISD::MUL && 625 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { 626 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 627 // X * [3,5,9] -> X + X * [2,4,8] etc. 628 int RHSC = (int)RHS->getZExtValue(); 629 if (RHSC & 1) { 630 RHSC = RHSC & ~1; 631 ARM_AM::AddrOpc AddSub = ARM_AM::add; 632 if (RHSC < 0) { 633 AddSub = ARM_AM::sub; 634 RHSC = - RHSC; 635 } 636 if (isPowerOf2_32(RHSC)) { 637 unsigned ShAmt = Log2_32(RHSC); 638 Base = Offset = N.getOperand(0); 639 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 640 ARM_AM::lsl), 641 SDLoc(N), MVT::i32); 642 return true; 643 } 644 } 645 } 646 } 647 648 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 649 // ISD::OR that is equivalent to an ISD::ADD. 650 !CurDAG->isBaseWithConstantOffset(N)) 651 return false; 652 653 // Leave simple R +/- imm12 operands for LDRi12 654 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { 655 int RHSC; 656 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 657 -0x1000+1, 0x1000, RHSC)) // 12 bits. 658 return false; 659 } 660 661 // Otherwise this is R +/- [possibly shifted] R. 662 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; 663 ARM_AM::ShiftOpc ShOpcVal = 664 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 665 unsigned ShAmt = 0; 666 667 Base = N.getOperand(0); 668 Offset = N.getOperand(1); 669 670 if (ShOpcVal != ARM_AM::no_shift) { 671 // Check to see if the RHS of the shift is a constant, if not, we can't fold 672 // it. 673 if (ConstantSDNode *Sh = 674 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 675 ShAmt = Sh->getZExtValue(); 676 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 677 Offset = N.getOperand(1).getOperand(0); 678 else { 679 ShAmt = 0; 680 ShOpcVal = ARM_AM::no_shift; 681 } 682 } else { 683 ShOpcVal = ARM_AM::no_shift; 684 } 685 } 686 687 // Try matching (R shl C) + (R). 688 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 689 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 690 N.getOperand(0).hasOneUse())) { 691 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 692 if (ShOpcVal != ARM_AM::no_shift) { 693 // Check to see if the RHS of the shift is a constant, if not, we can't 694 // fold it. 695 if (ConstantSDNode *Sh = 696 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 697 ShAmt = Sh->getZExtValue(); 698 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 699 Offset = N.getOperand(0).getOperand(0); 700 Base = N.getOperand(1); 701 } else { 702 ShAmt = 0; 703 ShOpcVal = ARM_AM::no_shift; 704 } 705 } else { 706 ShOpcVal = ARM_AM::no_shift; 707 } 708 } 709 } 710 711 // If Offset is a multiply-by-constant and it's profitable to extract a shift 712 // and use it in a shifted operand do so. 713 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) { 714 unsigned PowerOfTwo = 0; 715 SDValue NewMulConst; 716 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { 717 HandleSDNode Handle(Offset); 718 replaceDAGValue(Offset.getOperand(1), NewMulConst); 719 Offset = Handle.getValue(); 720 ShAmt = PowerOfTwo; 721 ShOpcVal = ARM_AM::lsl; 722 } 723 } 724 725 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 726 SDLoc(N), MVT::i32); 727 return true; 728 } 729 730 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 731 SDValue &Offset, SDValue &Opc) { 732 unsigned Opcode = Op->getOpcode(); 733 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 734 ? cast<LoadSDNode>(Op)->getAddressingMode() 735 : cast<StoreSDNode>(Op)->getAddressingMode(); 736 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 737 ? ARM_AM::add : ARM_AM::sub; 738 int Val; 739 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) 740 return false; 741 742 Offset = N; 743 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 744 unsigned ShAmt = 0; 745 if (ShOpcVal != ARM_AM::no_shift) { 746 // Check to see if the RHS of the shift is a constant, if not, we can't fold 747 // it. 748 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 749 ShAmt = Sh->getZExtValue(); 750 if (isShifterOpProfitable(N, ShOpcVal, ShAmt)) 751 Offset = N.getOperand(0); 752 else { 753 ShAmt = 0; 754 ShOpcVal = ARM_AM::no_shift; 755 } 756 } else { 757 ShOpcVal = ARM_AM::no_shift; 758 } 759 } 760 761 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 762 SDLoc(N), MVT::i32); 763 return true; 764 } 765 766 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 767 SDValue &Offset, SDValue &Opc) { 768 unsigned Opcode = Op->getOpcode(); 769 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 770 ? cast<LoadSDNode>(Op)->getAddressingMode() 771 : cast<StoreSDNode>(Op)->getAddressingMode(); 772 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 773 ? ARM_AM::add : ARM_AM::sub; 774 int Val; 775 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 776 if (AddSub == ARM_AM::sub) Val *= -1; 777 Offset = CurDAG->getRegister(0, MVT::i32); 778 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32); 779 return true; 780 } 781 782 return false; 783 } 784 785 786 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 787 SDValue &Offset, SDValue &Opc) { 788 unsigned Opcode = Op->getOpcode(); 789 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 790 ? cast<LoadSDNode>(Op)->getAddressingMode() 791 : cast<StoreSDNode>(Op)->getAddressingMode(); 792 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 793 ? ARM_AM::add : ARM_AM::sub; 794 int Val; 795 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 796 Offset = CurDAG->getRegister(0, MVT::i32); 797 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, 798 ARM_AM::no_shift), 799 SDLoc(Op), MVT::i32); 800 return true; 801 } 802 803 return false; 804 } 805 806 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) { 807 Base = N; 808 return true; 809 } 810 811 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, 812 SDValue &Base, SDValue &Offset, 813 SDValue &Opc) { 814 if (N.getOpcode() == ISD::SUB) { 815 // X - C is canonicalize to X + -C, no need to handle it here. 816 Base = N.getOperand(0); 817 Offset = N.getOperand(1); 818 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N), 819 MVT::i32); 820 return true; 821 } 822 823 if (!CurDAG->isBaseWithConstantOffset(N)) { 824 Base = N; 825 if (N.getOpcode() == ISD::FrameIndex) { 826 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 827 Base = CurDAG->getTargetFrameIndex( 828 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 829 } 830 Offset = CurDAG->getRegister(0, MVT::i32); 831 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 832 MVT::i32); 833 return true; 834 } 835 836 // If the RHS is +/- imm8, fold into addr mode. 837 int RHSC; 838 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 839 -256 + 1, 256, RHSC)) { // 8 bits. 840 Base = N.getOperand(0); 841 if (Base.getOpcode() == ISD::FrameIndex) { 842 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 843 Base = CurDAG->getTargetFrameIndex( 844 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 845 } 846 Offset = CurDAG->getRegister(0, MVT::i32); 847 848 ARM_AM::AddrOpc AddSub = ARM_AM::add; 849 if (RHSC < 0) { 850 AddSub = ARM_AM::sub; 851 RHSC = -RHSC; 852 } 853 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N), 854 MVT::i32); 855 return true; 856 } 857 858 Base = N.getOperand(0); 859 Offset = N.getOperand(1); 860 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 861 MVT::i32); 862 return true; 863 } 864 865 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, 866 SDValue &Offset, SDValue &Opc) { 867 unsigned Opcode = Op->getOpcode(); 868 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 869 ? cast<LoadSDNode>(Op)->getAddressingMode() 870 : cast<StoreSDNode>(Op)->getAddressingMode(); 871 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 872 ? ARM_AM::add : ARM_AM::sub; 873 int Val; 874 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits. 875 Offset = CurDAG->getRegister(0, MVT::i32); 876 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op), 877 MVT::i32); 878 return true; 879 } 880 881 Offset = N; 882 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op), 883 MVT::i32); 884 return true; 885 } 886 887 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, 888 bool FP16) { 889 if (!CurDAG->isBaseWithConstantOffset(N)) { 890 Base = N; 891 if (N.getOpcode() == ISD::FrameIndex) { 892 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 893 Base = CurDAG->getTargetFrameIndex( 894 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 895 } else if (N.getOpcode() == ARMISD::Wrapper && 896 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 897 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 898 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 899 Base = N.getOperand(0); 900 } 901 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 902 SDLoc(N), MVT::i32); 903 return true; 904 } 905 906 // If the RHS is +/- imm8, fold into addr mode. 907 int RHSC; 908 const int Scale = FP16 ? 2 : 4; 909 910 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) { 911 Base = N.getOperand(0); 912 if (Base.getOpcode() == ISD::FrameIndex) { 913 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 914 Base = CurDAG->getTargetFrameIndex( 915 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 916 } 917 918 ARM_AM::AddrOpc AddSub = ARM_AM::add; 919 if (RHSC < 0) { 920 AddSub = ARM_AM::sub; 921 RHSC = -RHSC; 922 } 923 924 if (FP16) 925 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC), 926 SDLoc(N), MVT::i32); 927 else 928 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), 929 SDLoc(N), MVT::i32); 930 931 return true; 932 } 933 934 Base = N; 935 936 if (FP16) 937 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0), 938 SDLoc(N), MVT::i32); 939 else 940 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 941 SDLoc(N), MVT::i32); 942 943 return true; 944 } 945 946 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, 947 SDValue &Base, SDValue &Offset) { 948 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false); 949 } 950 951 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N, 952 SDValue &Base, SDValue &Offset) { 953 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true); 954 } 955 956 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, 957 SDValue &Align) { 958 Addr = N; 959 960 unsigned Alignment = 0; 961 962 MemSDNode *MemN = cast<MemSDNode>(Parent); 963 964 if (isa<LSBaseSDNode>(MemN) || 965 ((MemN->getOpcode() == ARMISD::VST1_UPD || 966 MemN->getOpcode() == ARMISD::VLD1_UPD) && 967 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) { 968 // This case occurs only for VLD1-lane/dup and VST1-lane instructions. 969 // The maximum alignment is equal to the memory size being referenced. 970 unsigned MMOAlign = MemN->getAlignment(); 971 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8; 972 if (MMOAlign >= MemSize && MemSize > 1) 973 Alignment = MemSize; 974 } else { 975 // All other uses of addrmode6 are for intrinsics. For now just record 976 // the raw alignment value; it will be refined later based on the legal 977 // alignment operands for the intrinsic. 978 Alignment = MemN->getAlignment(); 979 } 980 981 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32); 982 return true; 983 } 984 985 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, 986 SDValue &Offset) { 987 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op); 988 ISD::MemIndexedMode AM = LdSt->getAddressingMode(); 989 if (AM != ISD::POST_INC) 990 return false; 991 Offset = N; 992 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) { 993 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) 994 Offset = CurDAG->getRegister(0, MVT::i32); 995 } 996 return true; 997 } 998 999 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, 1000 SDValue &Offset, SDValue &Label) { 1001 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { 1002 Offset = N.getOperand(0); 1003 SDValue N1 = N.getOperand(1); 1004 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(), 1005 SDLoc(N), MVT::i32); 1006 return true; 1007 } 1008 1009 return false; 1010 } 1011 1012 1013 //===----------------------------------------------------------------------===// 1014 // Thumb Addressing Modes 1015 //===----------------------------------------------------------------------===// 1016 1017 static bool shouldUseZeroOffsetLdSt(SDValue N) { 1018 // Negative numbers are difficult to materialise in thumb1. If we are 1019 // selecting the add of a negative, instead try to select ri with a zero 1020 // offset, so create the add node directly which will become a sub. 1021 if (N.getOpcode() != ISD::ADD) 1022 return false; 1023 1024 // Look for an imm which is not legal for ld/st, but is legal for sub. 1025 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) 1026 return C->getSExtValue() < 0 && C->getSExtValue() >= -255; 1027 1028 return false; 1029 } 1030 1031 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, 1032 SDValue &Offset) { 1033 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) { 1034 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N); 1035 if (!NC || !NC->isNullValue()) 1036 return false; 1037 1038 Base = Offset = N; 1039 return true; 1040 } 1041 1042 Base = N.getOperand(0); 1043 Offset = N.getOperand(1); 1044 return true; 1045 } 1046 1047 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base, 1048 SDValue &Offset) { 1049 if (shouldUseZeroOffsetLdSt(N)) 1050 return false; // Select ri instead 1051 return SelectThumbAddrModeRRSext(N, Base, Offset); 1052 } 1053 1054 bool 1055 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, 1056 SDValue &Base, SDValue &OffImm) { 1057 if (shouldUseZeroOffsetLdSt(N)) { 1058 Base = N; 1059 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1060 return true; 1061 } 1062 1063 if (!CurDAG->isBaseWithConstantOffset(N)) { 1064 if (N.getOpcode() == ISD::ADD) { 1065 return false; // We want to select register offset instead 1066 } else if (N.getOpcode() == ARMISD::Wrapper && 1067 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1068 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1069 N.getOperand(0).getOpcode() != ISD::TargetConstantPool && 1070 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1071 Base = N.getOperand(0); 1072 } else { 1073 Base = N; 1074 } 1075 1076 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1077 return true; 1078 } 1079 1080 // If the RHS is + imm5 * scale, fold into addr mode. 1081 int RHSC; 1082 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { 1083 Base = N.getOperand(0); 1084 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1085 return true; 1086 } 1087 1088 // Offset is too large, so use register offset instead. 1089 return false; 1090 } 1091 1092 bool 1093 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 1094 SDValue &OffImm) { 1095 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm); 1096 } 1097 1098 bool 1099 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 1100 SDValue &OffImm) { 1101 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm); 1102 } 1103 1104 bool 1105 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 1106 SDValue &OffImm) { 1107 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm); 1108 } 1109 1110 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, 1111 SDValue &Base, SDValue &OffImm) { 1112 if (N.getOpcode() == ISD::FrameIndex) { 1113 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1114 // Only multiples of 4 are allowed for the offset, so the frame object 1115 // alignment must be at least 4. 1116 MachineFrameInfo &MFI = MF->getFrameInfo(); 1117 if (MFI.getObjectAlignment(FI) < 4) 1118 MFI.setObjectAlignment(FI, 4); 1119 Base = CurDAG->getTargetFrameIndex( 1120 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1121 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1122 return true; 1123 } 1124 1125 if (!CurDAG->isBaseWithConstantOffset(N)) 1126 return false; 1127 1128 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) { 1129 // If the RHS is + imm8 * scale, fold into addr mode. 1130 int RHSC; 1131 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) { 1132 Base = N.getOperand(0); 1133 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1134 // Make sure the offset is inside the object, or we might fail to 1135 // allocate an emergency spill slot. (An out-of-range access is UB, but 1136 // it could show up anyway.) 1137 MachineFrameInfo &MFI = MF->getFrameInfo(); 1138 if (RHSC * 4 < MFI.getObjectSize(FI)) { 1139 // For LHS+RHS to result in an offset that's a multiple of 4 the object 1140 // indexed by the LHS must be 4-byte aligned. 1141 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlignment(FI) < 4) 1142 MFI.setObjectAlignment(FI, 4); 1143 if (MFI.getObjectAlignment(FI) >= 4) { 1144 Base = CurDAG->getTargetFrameIndex( 1145 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1146 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1147 return true; 1148 } 1149 } 1150 } 1151 } 1152 1153 return false; 1154 } 1155 1156 template <unsigned Shift> 1157 bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base, 1158 SDValue &OffImm) { 1159 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1160 int RHSC; 1161 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1162 RHSC)) { 1163 Base = N.getOperand(0); 1164 if (N.getOpcode() == ISD::SUB) 1165 RHSC = -RHSC; 1166 OffImm = 1167 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1168 return true; 1169 } 1170 } 1171 1172 // Base only. 1173 Base = N; 1174 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1175 return true; 1176 } 1177 1178 1179 //===----------------------------------------------------------------------===// 1180 // Thumb 2 Addressing Modes 1181 //===----------------------------------------------------------------------===// 1182 1183 1184 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, 1185 SDValue &Base, SDValue &OffImm) { 1186 // Match simple R + imm12 operands. 1187 1188 // Base only. 1189 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1190 !CurDAG->isBaseWithConstantOffset(N)) { 1191 if (N.getOpcode() == ISD::FrameIndex) { 1192 // Match frame index. 1193 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1194 Base = CurDAG->getTargetFrameIndex( 1195 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1196 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1197 return true; 1198 } 1199 1200 if (N.getOpcode() == ARMISD::Wrapper && 1201 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1202 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1203 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1204 Base = N.getOperand(0); 1205 if (Base.getOpcode() == ISD::TargetConstantPool) 1206 return false; // We want to select t2LDRpci instead. 1207 } else 1208 Base = N; 1209 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1210 return true; 1211 } 1212 1213 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1214 if (SelectT2AddrModeImm8(N, Base, OffImm)) 1215 // Let t2LDRi8 handle (R - imm8). 1216 return false; 1217 1218 int RHSC = (int)RHS->getZExtValue(); 1219 if (N.getOpcode() == ISD::SUB) 1220 RHSC = -RHSC; 1221 1222 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) 1223 Base = N.getOperand(0); 1224 if (Base.getOpcode() == ISD::FrameIndex) { 1225 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1226 Base = CurDAG->getTargetFrameIndex( 1227 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1228 } 1229 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1230 return true; 1231 } 1232 } 1233 1234 // Base only. 1235 Base = N; 1236 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1237 return true; 1238 } 1239 1240 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, 1241 SDValue &Base, SDValue &OffImm) { 1242 // Match simple R - imm8 operands. 1243 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1244 !CurDAG->isBaseWithConstantOffset(N)) 1245 return false; 1246 1247 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1248 int RHSC = (int)RHS->getSExtValue(); 1249 if (N.getOpcode() == ISD::SUB) 1250 RHSC = -RHSC; 1251 1252 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative) 1253 Base = N.getOperand(0); 1254 if (Base.getOpcode() == ISD::FrameIndex) { 1255 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1256 Base = CurDAG->getTargetFrameIndex( 1257 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1258 } 1259 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1260 return true; 1261 } 1262 } 1263 1264 return false; 1265 } 1266 1267 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 1268 SDValue &OffImm){ 1269 unsigned Opcode = Op->getOpcode(); 1270 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1271 ? cast<LoadSDNode>(Op)->getAddressingMode() 1272 : cast<StoreSDNode>(Op)->getAddressingMode(); 1273 int RHSC; 1274 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits. 1275 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1276 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32) 1277 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32); 1278 return true; 1279 } 1280 1281 return false; 1282 } 1283 1284 template <unsigned Shift> 1285 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base, 1286 SDValue &OffImm) { 1287 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1288 int RHSC; 1289 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1290 RHSC)) { 1291 Base = N.getOperand(0); 1292 if (Base.getOpcode() == ISD::FrameIndex) { 1293 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1294 Base = CurDAG->getTargetFrameIndex( 1295 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1296 } 1297 1298 if (N.getOpcode() == ISD::SUB) 1299 RHSC = -RHSC; 1300 OffImm = 1301 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1302 return true; 1303 } 1304 } 1305 1306 // Base only. 1307 Base = N; 1308 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1309 return true; 1310 } 1311 1312 template <unsigned Shift> 1313 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1314 SDValue &OffImm) { 1315 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift); 1316 } 1317 1318 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1319 SDValue &OffImm, 1320 unsigned Shift) { 1321 unsigned Opcode = Op->getOpcode(); 1322 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1323 ? cast<LoadSDNode>(Op)->getAddressingMode() 1324 : cast<StoreSDNode>(Op)->getAddressingMode(); 1325 int RHSC; 1326 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { // 7 bits. 1327 OffImm = 1328 ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1329 ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32) 1330 : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N), 1331 MVT::i32); 1332 return true; 1333 } 1334 return false; 1335 } 1336 1337 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, 1338 SDValue &Base, 1339 SDValue &OffReg, SDValue &ShImm) { 1340 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. 1341 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) 1342 return false; 1343 1344 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8. 1345 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1346 int RHSC = (int)RHS->getZExtValue(); 1347 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned) 1348 return false; 1349 else if (RHSC < 0 && RHSC >= -255) // 8 bits 1350 return false; 1351 } 1352 1353 // Look for (R + R) or (R + (R << [1,2,3])). 1354 unsigned ShAmt = 0; 1355 Base = N.getOperand(0); 1356 OffReg = N.getOperand(1); 1357 1358 // Swap if it is ((R << c) + R). 1359 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode()); 1360 if (ShOpcVal != ARM_AM::lsl) { 1361 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode()); 1362 if (ShOpcVal == ARM_AM::lsl) 1363 std::swap(Base, OffReg); 1364 } 1365 1366 if (ShOpcVal == ARM_AM::lsl) { 1367 // Check to see if the RHS of the shift is a constant, if not, we can't fold 1368 // it. 1369 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) { 1370 ShAmt = Sh->getZExtValue(); 1371 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt)) 1372 OffReg = OffReg.getOperand(0); 1373 else { 1374 ShAmt = 0; 1375 } 1376 } 1377 } 1378 1379 // If OffReg is a multiply-by-constant and it's profitable to extract a shift 1380 // and use it in a shifted operand do so. 1381 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) { 1382 unsigned PowerOfTwo = 0; 1383 SDValue NewMulConst; 1384 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { 1385 HandleSDNode Handle(OffReg); 1386 replaceDAGValue(OffReg.getOperand(1), NewMulConst); 1387 OffReg = Handle.getValue(); 1388 ShAmt = PowerOfTwo; 1389 } 1390 } 1391 1392 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32); 1393 1394 return true; 1395 } 1396 1397 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, 1398 SDValue &OffImm) { 1399 // This *must* succeed since it's used for the irreplaceable ldrex and strex 1400 // instructions. 1401 Base = N; 1402 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1403 1404 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N)) 1405 return true; 1406 1407 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1408 if (!RHS) 1409 return true; 1410 1411 uint32_t RHSC = (int)RHS->getZExtValue(); 1412 if (RHSC > 1020 || RHSC % 4 != 0) 1413 return true; 1414 1415 Base = N.getOperand(0); 1416 if (Base.getOpcode() == ISD::FrameIndex) { 1417 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1418 Base = CurDAG->getTargetFrameIndex( 1419 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1420 } 1421 1422 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32); 1423 return true; 1424 } 1425 1426 //===--------------------------------------------------------------------===// 1427 1428 /// getAL - Returns a ARMCC::AL immediate node. 1429 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) { 1430 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32); 1431 } 1432 1433 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { 1434 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 1435 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); 1436 } 1437 1438 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) { 1439 LoadSDNode *LD = cast<LoadSDNode>(N); 1440 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1441 if (AM == ISD::UNINDEXED) 1442 return false; 1443 1444 EVT LoadedVT = LD->getMemoryVT(); 1445 SDValue Offset, AMOpc; 1446 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1447 unsigned Opcode = 0; 1448 bool Match = false; 1449 if (LoadedVT == MVT::i32 && isPre && 1450 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1451 Opcode = ARM::LDR_PRE_IMM; 1452 Match = true; 1453 } else if (LoadedVT == MVT::i32 && !isPre && 1454 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1455 Opcode = ARM::LDR_POST_IMM; 1456 Match = true; 1457 } else if (LoadedVT == MVT::i32 && 1458 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1459 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG; 1460 Match = true; 1461 1462 } else if (LoadedVT == MVT::i16 && 1463 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1464 Match = true; 1465 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) 1466 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) 1467 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); 1468 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { 1469 if (LD->getExtensionType() == ISD::SEXTLOAD) { 1470 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1471 Match = true; 1472 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; 1473 } 1474 } else { 1475 if (isPre && 1476 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1477 Match = true; 1478 Opcode = ARM::LDRB_PRE_IMM; 1479 } else if (!isPre && 1480 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1481 Match = true; 1482 Opcode = ARM::LDRB_POST_IMM; 1483 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1484 Match = true; 1485 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG; 1486 } 1487 } 1488 } 1489 1490 if (Match) { 1491 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) { 1492 SDValue Chain = LD->getChain(); 1493 SDValue Base = LD->getBasePtr(); 1494 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)), 1495 CurDAG->getRegister(0, MVT::i32), Chain }; 1496 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1497 MVT::Other, Ops); 1498 transferMemOperands(N, New); 1499 ReplaceNode(N, New); 1500 return true; 1501 } else { 1502 SDValue Chain = LD->getChain(); 1503 SDValue Base = LD->getBasePtr(); 1504 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)), 1505 CurDAG->getRegister(0, MVT::i32), Chain }; 1506 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1507 MVT::Other, Ops); 1508 transferMemOperands(N, New); 1509 ReplaceNode(N, New); 1510 return true; 1511 } 1512 } 1513 1514 return false; 1515 } 1516 1517 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) { 1518 LoadSDNode *LD = cast<LoadSDNode>(N); 1519 EVT LoadedVT = LD->getMemoryVT(); 1520 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1521 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD || 1522 LoadedVT.getSimpleVT().SimpleTy != MVT::i32) 1523 return false; 1524 1525 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset()); 1526 if (!COffs || COffs->getZExtValue() != 4) 1527 return false; 1528 1529 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}. 1530 // The encoding of LDM is not how the rest of ISel expects a post-inc load to 1531 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after 1532 // ISel. 1533 SDValue Chain = LD->getChain(); 1534 SDValue Base = LD->getBasePtr(); 1535 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)), 1536 CurDAG->getRegister(0, MVT::i32), Chain }; 1537 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, 1538 MVT::i32, MVT::Other, Ops); 1539 transferMemOperands(N, New); 1540 ReplaceNode(N, New); 1541 return true; 1542 } 1543 1544 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { 1545 LoadSDNode *LD = cast<LoadSDNode>(N); 1546 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1547 if (AM == ISD::UNINDEXED) 1548 return false; 1549 1550 EVT LoadedVT = LD->getMemoryVT(); 1551 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1552 SDValue Offset; 1553 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1554 unsigned Opcode = 0; 1555 bool Match = false; 1556 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) { 1557 switch (LoadedVT.getSimpleVT().SimpleTy) { 1558 case MVT::i32: 1559 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; 1560 break; 1561 case MVT::i16: 1562 if (isSExtLd) 1563 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST; 1564 else 1565 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST; 1566 break; 1567 case MVT::i8: 1568 case MVT::i1: 1569 if (isSExtLd) 1570 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST; 1571 else 1572 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; 1573 break; 1574 default: 1575 return false; 1576 } 1577 Match = true; 1578 } 1579 1580 if (Match) { 1581 SDValue Chain = LD->getChain(); 1582 SDValue Base = LD->getBasePtr(); 1583 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)), 1584 CurDAG->getRegister(0, MVT::i32), Chain }; 1585 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1586 MVT::Other, Ops); 1587 transferMemOperands(N, New); 1588 ReplaceNode(N, New); 1589 return true; 1590 } 1591 1592 return false; 1593 } 1594 1595 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) { 1596 LoadSDNode *LD = cast<LoadSDNode>(N); 1597 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1598 if (AM == ISD::UNINDEXED) 1599 return false; 1600 EVT LoadedVT = LD->getMemoryVT(); 1601 if (!LoadedVT.isVector()) 1602 return false; 1603 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1604 SDValue Offset; 1605 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1606 unsigned Opcode = 0; 1607 unsigned Align = LD->getAlignment(); 1608 bool IsLE = Subtarget->isLittle(); 1609 1610 if (Align >= 2 && LoadedVT == MVT::v4i16 && 1611 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1)) { 1612 if (isSExtLd) 1613 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post; 1614 else 1615 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post; 1616 } else if (LoadedVT == MVT::v8i8 && 1617 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) { 1618 if (isSExtLd) 1619 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post; 1620 else 1621 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post; 1622 } else if (LoadedVT == MVT::v4i8 && 1623 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) { 1624 if (isSExtLd) 1625 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post; 1626 else 1627 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post; 1628 } else if (Align >= 4 && 1629 (IsLE || LoadedVT == MVT::v4i32 || LoadedVT == MVT::v4f32) && 1630 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 2)) 1631 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post; 1632 else if (Align >= 2 && 1633 (IsLE || LoadedVT == MVT::v8i16 || LoadedVT == MVT::v8f16) && 1634 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1)) 1635 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post; 1636 else if ((IsLE || LoadedVT == MVT::v16i8) && 1637 SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) 1638 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post; 1639 else 1640 return false; 1641 1642 SDValue Chain = LD->getChain(); 1643 SDValue Base = LD->getBasePtr(); 1644 SDValue Ops[] = {Base, Offset, 1645 CurDAG->getTargetConstant(ARMVCC::None, SDLoc(N), MVT::i32), 1646 CurDAG->getRegister(0, MVT::i32), Chain}; 1647 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), LD->getValueType(0), 1648 MVT::i32, MVT::Other, Ops); 1649 transferMemOperands(N, New); 1650 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 1651 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 1652 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 1653 CurDAG->RemoveDeadNode(N); 1654 return true; 1655 } 1656 1657 /// Form a GPRPair pseudo register from a pair of GPR regs. 1658 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { 1659 SDLoc dl(V0.getNode()); 1660 SDValue RegClass = 1661 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); 1662 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 1663 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 1664 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1665 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1666 } 1667 1668 /// Form a D register from a pair of S registers. 1669 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1670 SDLoc dl(V0.getNode()); 1671 SDValue RegClass = 1672 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32); 1673 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1674 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1675 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1676 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1677 } 1678 1679 /// Form a quad register from a pair of D registers. 1680 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1681 SDLoc dl(V0.getNode()); 1682 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl, 1683 MVT::i32); 1684 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1685 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1686 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1687 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1688 } 1689 1690 /// Form 4 consecutive D registers from a pair of Q registers. 1691 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1692 SDLoc dl(V0.getNode()); 1693 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1694 MVT::i32); 1695 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1696 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1697 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1698 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1699 } 1700 1701 /// Form 4 consecutive S registers. 1702 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, 1703 SDValue V2, SDValue V3) { 1704 SDLoc dl(V0.getNode()); 1705 SDValue RegClass = 1706 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32); 1707 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1708 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1709 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32); 1710 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32); 1711 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1712 V2, SubReg2, V3, SubReg3 }; 1713 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1714 } 1715 1716 /// Form 4 consecutive D registers. 1717 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, 1718 SDValue V2, SDValue V3) { 1719 SDLoc dl(V0.getNode()); 1720 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1721 MVT::i32); 1722 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1723 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1724 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32); 1725 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32); 1726 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1727 V2, SubReg2, V3, SubReg3 }; 1728 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1729 } 1730 1731 /// Form 4 consecutive Q registers. 1732 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, 1733 SDValue V2, SDValue V3) { 1734 SDLoc dl(V0.getNode()); 1735 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl, 1736 MVT::i32); 1737 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1738 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1739 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32); 1740 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32); 1741 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1742 V2, SubReg2, V3, SubReg3 }; 1743 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1744 } 1745 1746 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand 1747 /// of a NEON VLD or VST instruction. The supported values depend on the 1748 /// number of registers being loaded. 1749 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl, 1750 unsigned NumVecs, bool is64BitVector) { 1751 unsigned NumRegs = NumVecs; 1752 if (!is64BitVector && NumVecs < 3) 1753 NumRegs *= 2; 1754 1755 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 1756 if (Alignment >= 32 && NumRegs == 4) 1757 Alignment = 32; 1758 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4)) 1759 Alignment = 16; 1760 else if (Alignment >= 8) 1761 Alignment = 8; 1762 else 1763 Alignment = 0; 1764 1765 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 1766 } 1767 1768 static bool isVLDfixed(unsigned Opc) 1769 { 1770 switch (Opc) { 1771 default: return false; 1772 case ARM::VLD1d8wb_fixed : return true; 1773 case ARM::VLD1d16wb_fixed : return true; 1774 case ARM::VLD1d64Qwb_fixed : return true; 1775 case ARM::VLD1d32wb_fixed : return true; 1776 case ARM::VLD1d64wb_fixed : return true; 1777 case ARM::VLD1d64TPseudoWB_fixed : return true; 1778 case ARM::VLD1d64QPseudoWB_fixed : return true; 1779 case ARM::VLD1q8wb_fixed : return true; 1780 case ARM::VLD1q16wb_fixed : return true; 1781 case ARM::VLD1q32wb_fixed : return true; 1782 case ARM::VLD1q64wb_fixed : return true; 1783 case ARM::VLD1DUPd8wb_fixed : return true; 1784 case ARM::VLD1DUPd16wb_fixed : return true; 1785 case ARM::VLD1DUPd32wb_fixed : return true; 1786 case ARM::VLD1DUPq8wb_fixed : return true; 1787 case ARM::VLD1DUPq16wb_fixed : return true; 1788 case ARM::VLD1DUPq32wb_fixed : return true; 1789 case ARM::VLD2d8wb_fixed : return true; 1790 case ARM::VLD2d16wb_fixed : return true; 1791 case ARM::VLD2d32wb_fixed : return true; 1792 case ARM::VLD2q8PseudoWB_fixed : return true; 1793 case ARM::VLD2q16PseudoWB_fixed : return true; 1794 case ARM::VLD2q32PseudoWB_fixed : return true; 1795 case ARM::VLD2DUPd8wb_fixed : return true; 1796 case ARM::VLD2DUPd16wb_fixed : return true; 1797 case ARM::VLD2DUPd32wb_fixed : return true; 1798 } 1799 } 1800 1801 static bool isVSTfixed(unsigned Opc) 1802 { 1803 switch (Opc) { 1804 default: return false; 1805 case ARM::VST1d8wb_fixed : return true; 1806 case ARM::VST1d16wb_fixed : return true; 1807 case ARM::VST1d32wb_fixed : return true; 1808 case ARM::VST1d64wb_fixed : return true; 1809 case ARM::VST1q8wb_fixed : return true; 1810 case ARM::VST1q16wb_fixed : return true; 1811 case ARM::VST1q32wb_fixed : return true; 1812 case ARM::VST1q64wb_fixed : return true; 1813 case ARM::VST1d64TPseudoWB_fixed : return true; 1814 case ARM::VST1d64QPseudoWB_fixed : return true; 1815 case ARM::VST2d8wb_fixed : return true; 1816 case ARM::VST2d16wb_fixed : return true; 1817 case ARM::VST2d32wb_fixed : return true; 1818 case ARM::VST2q8PseudoWB_fixed : return true; 1819 case ARM::VST2q16PseudoWB_fixed : return true; 1820 case ARM::VST2q32PseudoWB_fixed : return true; 1821 } 1822 } 1823 1824 // Get the register stride update opcode of a VLD/VST instruction that 1825 // is otherwise equivalent to the given fixed stride updating instruction. 1826 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { 1827 assert((isVLDfixed(Opc) || isVSTfixed(Opc)) 1828 && "Incorrect fixed stride updating instruction."); 1829 switch (Opc) { 1830 default: break; 1831 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; 1832 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register; 1833 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register; 1834 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register; 1835 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register; 1836 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; 1837 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; 1838 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; 1839 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register; 1840 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; 1841 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; 1842 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register; 1843 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register; 1844 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register; 1845 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register; 1846 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register; 1847 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register; 1848 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register; 1849 1850 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; 1851 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; 1852 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register; 1853 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register; 1854 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register; 1855 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; 1856 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; 1857 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; 1858 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; 1859 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; 1860 1861 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; 1862 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; 1863 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; 1864 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; 1865 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; 1866 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; 1867 1868 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; 1869 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; 1870 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; 1871 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; 1872 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; 1873 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; 1874 1875 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register; 1876 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register; 1877 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register; 1878 } 1879 return Opc; // If not one we handle, return it unchanged. 1880 } 1881 1882 /// Returns true if the given increment is a Constant known to be equal to the 1883 /// access size performed by a NEON load/store. This means the "[rN]!" form can 1884 /// be used. 1885 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) { 1886 auto C = dyn_cast<ConstantSDNode>(Inc); 1887 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs; 1888 } 1889 1890 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 1891 const uint16_t *DOpcodes, 1892 const uint16_t *QOpcodes0, 1893 const uint16_t *QOpcodes1) { 1894 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); 1895 SDLoc dl(N); 1896 1897 SDValue MemAddr, Align; 1898 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 1899 // nodes are not intrinsics. 1900 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 1901 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 1902 return; 1903 1904 SDValue Chain = N->getOperand(0); 1905 EVT VT = N->getValueType(0); 1906 bool is64BitVector = VT.is64BitVector(); 1907 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 1908 1909 unsigned OpcodeIndex; 1910 switch (VT.getSimpleVT().SimpleTy) { 1911 default: llvm_unreachable("unhandled vld type"); 1912 // Double-register operations: 1913 case MVT::v8i8: OpcodeIndex = 0; break; 1914 case MVT::v4f16: 1915 case MVT::v4i16: OpcodeIndex = 1; break; 1916 case MVT::v2f32: 1917 case MVT::v2i32: OpcodeIndex = 2; break; 1918 case MVT::v1i64: OpcodeIndex = 3; break; 1919 // Quad-register operations: 1920 case MVT::v16i8: OpcodeIndex = 0; break; 1921 case MVT::v8f16: 1922 case MVT::v8i16: OpcodeIndex = 1; break; 1923 case MVT::v4f32: 1924 case MVT::v4i32: OpcodeIndex = 2; break; 1925 case MVT::v2f64: 1926 case MVT::v2i64: OpcodeIndex = 3; break; 1927 } 1928 1929 EVT ResTy; 1930 if (NumVecs == 1) 1931 ResTy = VT; 1932 else { 1933 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 1934 if (!is64BitVector) 1935 ResTyElts *= 2; 1936 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 1937 } 1938 std::vector<EVT> ResTys; 1939 ResTys.push_back(ResTy); 1940 if (isUpdating) 1941 ResTys.push_back(MVT::i32); 1942 ResTys.push_back(MVT::Other); 1943 1944 SDValue Pred = getAL(CurDAG, dl); 1945 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 1946 SDNode *VLd; 1947 SmallVector<SDValue, 7> Ops; 1948 1949 // Double registers and VLD1/VLD2 quad registers are directly supported. 1950 if (is64BitVector || NumVecs <= 2) { 1951 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 1952 QOpcodes0[OpcodeIndex]); 1953 Ops.push_back(MemAddr); 1954 Ops.push_back(Align); 1955 if (isUpdating) { 1956 SDValue Inc = N->getOperand(AddrOpIdx + 1); 1957 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 1958 if (!IsImmUpdate) { 1959 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so 1960 // check for the opcode rather than the number of vector elements. 1961 if (isVLDfixed(Opc)) 1962 Opc = getVLDSTRegisterUpdateOpcode(Opc); 1963 Ops.push_back(Inc); 1964 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in 1965 // the operands if not such an opcode. 1966 } else if (!isVLDfixed(Opc)) 1967 Ops.push_back(Reg0); 1968 } 1969 Ops.push_back(Pred); 1970 Ops.push_back(Reg0); 1971 Ops.push_back(Chain); 1972 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1973 1974 } else { 1975 // Otherwise, quad registers are loaded with two separate instructions, 1976 // where one loads the even registers and the other loads the odd registers. 1977 EVT AddrTy = MemAddr.getValueType(); 1978 1979 // Load the even subregs. This is always an updating load, so that it 1980 // provides the address to the second load for the odd subregs. 1981 SDValue ImplDef = 1982 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 1983 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; 1984 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 1985 ResTy, AddrTy, MVT::Other, OpsA); 1986 Chain = SDValue(VLdA, 2); 1987 1988 // Load the odd subregs. 1989 Ops.push_back(SDValue(VLdA, 1)); 1990 Ops.push_back(Align); 1991 if (isUpdating) { 1992 SDValue Inc = N->getOperand(AddrOpIdx + 1); 1993 assert(isa<ConstantSDNode>(Inc.getNode()) && 1994 "only constant post-increment update allowed for VLD3/4"); 1995 (void)Inc; 1996 Ops.push_back(Reg0); 1997 } 1998 Ops.push_back(SDValue(VLdA, 0)); 1999 Ops.push_back(Pred); 2000 Ops.push_back(Reg0); 2001 Ops.push_back(Chain); 2002 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops); 2003 } 2004 2005 // Transfer memoperands. 2006 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2007 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp}); 2008 2009 if (NumVecs == 1) { 2010 ReplaceNode(N, VLd); 2011 return; 2012 } 2013 2014 // Extract out the subregisters. 2015 SDValue SuperReg = SDValue(VLd, 0); 2016 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2017 ARM::qsub_3 == ARM::qsub_0 + 3, 2018 "Unexpected subreg numbering"); 2019 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0); 2020 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2021 ReplaceUses(SDValue(N, Vec), 2022 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2023 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); 2024 if (isUpdating) 2025 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); 2026 CurDAG->RemoveDeadNode(N); 2027 } 2028 2029 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 2030 const uint16_t *DOpcodes, 2031 const uint16_t *QOpcodes0, 2032 const uint16_t *QOpcodes1) { 2033 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); 2034 SDLoc dl(N); 2035 2036 SDValue MemAddr, Align; 2037 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2038 // nodes are not intrinsics. 2039 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2040 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2041 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2042 return; 2043 2044 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2045 2046 SDValue Chain = N->getOperand(0); 2047 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2048 bool is64BitVector = VT.is64BitVector(); 2049 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2050 2051 unsigned OpcodeIndex; 2052 switch (VT.getSimpleVT().SimpleTy) { 2053 default: llvm_unreachable("unhandled vst type"); 2054 // Double-register operations: 2055 case MVT::v8i8: OpcodeIndex = 0; break; 2056 case MVT::v4f16: 2057 case MVT::v4i16: OpcodeIndex = 1; break; 2058 case MVT::v2f32: 2059 case MVT::v2i32: OpcodeIndex = 2; break; 2060 case MVT::v1i64: OpcodeIndex = 3; break; 2061 // Quad-register operations: 2062 case MVT::v16i8: OpcodeIndex = 0; break; 2063 case MVT::v8f16: 2064 case MVT::v8i16: OpcodeIndex = 1; break; 2065 case MVT::v4f32: 2066 case MVT::v4i32: OpcodeIndex = 2; break; 2067 case MVT::v2f64: 2068 case MVT::v2i64: OpcodeIndex = 3; break; 2069 } 2070 2071 std::vector<EVT> ResTys; 2072 if (isUpdating) 2073 ResTys.push_back(MVT::i32); 2074 ResTys.push_back(MVT::Other); 2075 2076 SDValue Pred = getAL(CurDAG, dl); 2077 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2078 SmallVector<SDValue, 7> Ops; 2079 2080 // Double registers and VST1/VST2 quad registers are directly supported. 2081 if (is64BitVector || NumVecs <= 2) { 2082 SDValue SrcReg; 2083 if (NumVecs == 1) { 2084 SrcReg = N->getOperand(Vec0Idx); 2085 } else if (is64BitVector) { 2086 // Form a REG_SEQUENCE to force register allocation. 2087 SDValue V0 = N->getOperand(Vec0Idx + 0); 2088 SDValue V1 = N->getOperand(Vec0Idx + 1); 2089 if (NumVecs == 2) 2090 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2091 else { 2092 SDValue V2 = N->getOperand(Vec0Idx + 2); 2093 // If it's a vst3, form a quad D-register and leave the last part as 2094 // an undef. 2095 SDValue V3 = (NumVecs == 3) 2096 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) 2097 : N->getOperand(Vec0Idx + 3); 2098 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2099 } 2100 } else { 2101 // Form a QQ register. 2102 SDValue Q0 = N->getOperand(Vec0Idx); 2103 SDValue Q1 = N->getOperand(Vec0Idx + 1); 2104 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0); 2105 } 2106 2107 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2108 QOpcodes0[OpcodeIndex]); 2109 Ops.push_back(MemAddr); 2110 Ops.push_back(Align); 2111 if (isUpdating) { 2112 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2113 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2114 if (!IsImmUpdate) { 2115 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so 2116 // check for the opcode rather than the number of vector elements. 2117 if (isVSTfixed(Opc)) 2118 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2119 Ops.push_back(Inc); 2120 } 2121 // VST1/VST2 fixed increment does not need Reg0 so only include it in 2122 // the operands if not such an opcode. 2123 else if (!isVSTfixed(Opc)) 2124 Ops.push_back(Reg0); 2125 } 2126 Ops.push_back(SrcReg); 2127 Ops.push_back(Pred); 2128 Ops.push_back(Reg0); 2129 Ops.push_back(Chain); 2130 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2131 2132 // Transfer memoperands. 2133 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp}); 2134 2135 ReplaceNode(N, VSt); 2136 return; 2137 } 2138 2139 // Otherwise, quad registers are stored with two separate instructions, 2140 // where one stores the even registers and the other stores the odd registers. 2141 2142 // Form the QQQQ REG_SEQUENCE. 2143 SDValue V0 = N->getOperand(Vec0Idx + 0); 2144 SDValue V1 = N->getOperand(Vec0Idx + 1); 2145 SDValue V2 = N->getOperand(Vec0Idx + 2); 2146 SDValue V3 = (NumVecs == 3) 2147 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2148 : N->getOperand(Vec0Idx + 3); 2149 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2150 2151 // Store the even D registers. This is always an updating store, so that it 2152 // provides the address to the second store for the odd subregs. 2153 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain }; 2154 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2155 MemAddr.getValueType(), 2156 MVT::Other, OpsA); 2157 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp}); 2158 Chain = SDValue(VStA, 1); 2159 2160 // Store the odd D registers. 2161 Ops.push_back(SDValue(VStA, 0)); 2162 Ops.push_back(Align); 2163 if (isUpdating) { 2164 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2165 assert(isa<ConstantSDNode>(Inc.getNode()) && 2166 "only constant post-increment update allowed for VST3/4"); 2167 (void)Inc; 2168 Ops.push_back(Reg0); 2169 } 2170 Ops.push_back(RegSeq); 2171 Ops.push_back(Pred); 2172 Ops.push_back(Reg0); 2173 Ops.push_back(Chain); 2174 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, 2175 Ops); 2176 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp}); 2177 ReplaceNode(N, VStB); 2178 } 2179 2180 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 2181 unsigned NumVecs, 2182 const uint16_t *DOpcodes, 2183 const uint16_t *QOpcodes) { 2184 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); 2185 SDLoc dl(N); 2186 2187 SDValue MemAddr, Align; 2188 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2189 // nodes are not intrinsics. 2190 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2191 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2192 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2193 return; 2194 2195 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2196 2197 SDValue Chain = N->getOperand(0); 2198 unsigned Lane = 2199 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); 2200 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2201 bool is64BitVector = VT.is64BitVector(); 2202 2203 unsigned Alignment = 0; 2204 if (NumVecs != 3) { 2205 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2206 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2207 if (Alignment > NumBytes) 2208 Alignment = NumBytes; 2209 if (Alignment < 8 && Alignment < NumBytes) 2210 Alignment = 0; 2211 // Alignment must be a power of two; make sure of that. 2212 Alignment = (Alignment & -Alignment); 2213 if (Alignment == 1) 2214 Alignment = 0; 2215 } 2216 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2217 2218 unsigned OpcodeIndex; 2219 switch (VT.getSimpleVT().SimpleTy) { 2220 default: llvm_unreachable("unhandled vld/vst lane type"); 2221 // Double-register operations: 2222 case MVT::v8i8: OpcodeIndex = 0; break; 2223 case MVT::v4f16: 2224 case MVT::v4i16: OpcodeIndex = 1; break; 2225 case MVT::v2f32: 2226 case MVT::v2i32: OpcodeIndex = 2; break; 2227 // Quad-register operations: 2228 case MVT::v8f16: 2229 case MVT::v8i16: OpcodeIndex = 0; break; 2230 case MVT::v4f32: 2231 case MVT::v4i32: OpcodeIndex = 1; break; 2232 } 2233 2234 std::vector<EVT> ResTys; 2235 if (IsLoad) { 2236 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2237 if (!is64BitVector) 2238 ResTyElts *= 2; 2239 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), 2240 MVT::i64, ResTyElts)); 2241 } 2242 if (isUpdating) 2243 ResTys.push_back(MVT::i32); 2244 ResTys.push_back(MVT::Other); 2245 2246 SDValue Pred = getAL(CurDAG, dl); 2247 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2248 2249 SmallVector<SDValue, 8> Ops; 2250 Ops.push_back(MemAddr); 2251 Ops.push_back(Align); 2252 if (isUpdating) { 2253 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2254 bool IsImmUpdate = 2255 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2256 Ops.push_back(IsImmUpdate ? Reg0 : Inc); 2257 } 2258 2259 SDValue SuperReg; 2260 SDValue V0 = N->getOperand(Vec0Idx + 0); 2261 SDValue V1 = N->getOperand(Vec0Idx + 1); 2262 if (NumVecs == 2) { 2263 if (is64BitVector) 2264 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2265 else 2266 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0); 2267 } else { 2268 SDValue V2 = N->getOperand(Vec0Idx + 2); 2269 SDValue V3 = (NumVecs == 3) 2270 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2271 : N->getOperand(Vec0Idx + 3); 2272 if (is64BitVector) 2273 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2274 else 2275 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2276 } 2277 Ops.push_back(SuperReg); 2278 Ops.push_back(getI32Imm(Lane, dl)); 2279 Ops.push_back(Pred); 2280 Ops.push_back(Reg0); 2281 Ops.push_back(Chain); 2282 2283 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2284 QOpcodes[OpcodeIndex]); 2285 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2286 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp}); 2287 if (!IsLoad) { 2288 ReplaceNode(N, VLdLn); 2289 return; 2290 } 2291 2292 // Extract the subregisters. 2293 SuperReg = SDValue(VLdLn, 0); 2294 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2295 ARM::qsub_3 == ARM::qsub_0 + 3, 2296 "Unexpected subreg numbering"); 2297 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2298 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2299 ReplaceUses(SDValue(N, Vec), 2300 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2301 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); 2302 if (isUpdating) 2303 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); 2304 CurDAG->RemoveDeadNode(N); 2305 } 2306 2307 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic, 2308 bool isUpdating, unsigned NumVecs, 2309 const uint16_t *DOpcodes, 2310 const uint16_t *QOpcodes0, 2311 const uint16_t *QOpcodes1) { 2312 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); 2313 SDLoc dl(N); 2314 2315 SDValue MemAddr, Align; 2316 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2317 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2318 return; 2319 2320 SDValue Chain = N->getOperand(0); 2321 EVT VT = N->getValueType(0); 2322 bool is64BitVector = VT.is64BitVector(); 2323 2324 unsigned Alignment = 0; 2325 if (NumVecs != 3) { 2326 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2327 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2328 if (Alignment > NumBytes) 2329 Alignment = NumBytes; 2330 if (Alignment < 8 && Alignment < NumBytes) 2331 Alignment = 0; 2332 // Alignment must be a power of two; make sure of that. 2333 Alignment = (Alignment & -Alignment); 2334 if (Alignment == 1) 2335 Alignment = 0; 2336 } 2337 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2338 2339 unsigned OpcodeIndex; 2340 switch (VT.getSimpleVT().SimpleTy) { 2341 default: llvm_unreachable("unhandled vld-dup type"); 2342 case MVT::v8i8: 2343 case MVT::v16i8: OpcodeIndex = 0; break; 2344 case MVT::v4i16: 2345 case MVT::v8i16: 2346 case MVT::v4f16: 2347 case MVT::v8f16: 2348 OpcodeIndex = 1; break; 2349 case MVT::v2f32: 2350 case MVT::v2i32: 2351 case MVT::v4f32: 2352 case MVT::v4i32: OpcodeIndex = 2; break; 2353 case MVT::v1f64: 2354 case MVT::v1i64: OpcodeIndex = 3; break; 2355 } 2356 2357 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2358 if (!is64BitVector) 2359 ResTyElts *= 2; 2360 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 2361 2362 std::vector<EVT> ResTys; 2363 ResTys.push_back(ResTy); 2364 if (isUpdating) 2365 ResTys.push_back(MVT::i32); 2366 ResTys.push_back(MVT::Other); 2367 2368 SDValue Pred = getAL(CurDAG, dl); 2369 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2370 2371 SDNode *VLdDup; 2372 if (is64BitVector || NumVecs == 1) { 2373 SmallVector<SDValue, 6> Ops; 2374 Ops.push_back(MemAddr); 2375 Ops.push_back(Align); 2376 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] : 2377 QOpcodes0[OpcodeIndex]; 2378 if (isUpdating) { 2379 // fixed-stride update instructions don't have an explicit writeback 2380 // operand. It's implicit in the opcode itself. 2381 SDValue Inc = N->getOperand(2); 2382 bool IsImmUpdate = 2383 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2384 if (NumVecs <= 2 && !IsImmUpdate) 2385 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2386 if (!IsImmUpdate) 2387 Ops.push_back(Inc); 2388 // FIXME: VLD3 and VLD4 haven't been updated to that form yet. 2389 else if (NumVecs > 2) 2390 Ops.push_back(Reg0); 2391 } 2392 Ops.push_back(Pred); 2393 Ops.push_back(Reg0); 2394 Ops.push_back(Chain); 2395 VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2396 } else if (NumVecs == 2) { 2397 const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain }; 2398 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], 2399 dl, ResTys, OpsA); 2400 2401 Chain = SDValue(VLdA, 1); 2402 const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain }; 2403 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB); 2404 } else { 2405 SDValue ImplDef = 2406 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 2407 const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain }; 2408 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], 2409 dl, ResTys, OpsA); 2410 2411 SDValue SuperReg = SDValue(VLdA, 0); 2412 Chain = SDValue(VLdA, 1); 2413 const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain }; 2414 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB); 2415 } 2416 2417 // Transfer memoperands. 2418 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2419 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp}); 2420 2421 // Extract the subregisters. 2422 if (NumVecs == 1) { 2423 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0)); 2424 } else { 2425 SDValue SuperReg = SDValue(VLdDup, 0); 2426 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering"); 2427 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2428 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) { 2429 ReplaceUses(SDValue(N, Vec), 2430 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); 2431 } 2432 } 2433 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); 2434 if (isUpdating) 2435 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); 2436 CurDAG->RemoveDeadNode(N); 2437 } 2438 2439 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { 2440 if (!Subtarget->hasV6T2Ops()) 2441 return false; 2442 2443 unsigned Opc = isSigned 2444 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) 2445 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); 2446 SDLoc dl(N); 2447 2448 // For unsigned extracts, check for a shift right and mask 2449 unsigned And_imm = 0; 2450 if (N->getOpcode() == ISD::AND) { 2451 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) { 2452 2453 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 2454 if (And_imm & (And_imm + 1)) 2455 return false; 2456 2457 unsigned Srl_imm = 0; 2458 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, 2459 Srl_imm)) { 2460 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2461 2462 // Mask off the unnecessary bits of the AND immediate; normally 2463 // DAGCombine will do this, but that might not happen if 2464 // targetShrinkDemandedConstant chooses a different immediate. 2465 And_imm &= -1U >> Srl_imm; 2466 2467 // Note: The width operand is encoded as width-1. 2468 unsigned Width = countTrailingOnes(And_imm) - 1; 2469 unsigned LSB = Srl_imm; 2470 2471 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2472 2473 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) { 2474 // It's cheaper to use a right shift to extract the top bits. 2475 if (Subtarget->isThumb()) { 2476 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri; 2477 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2478 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2479 getAL(CurDAG, dl), Reg0, Reg0 }; 2480 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2481 return true; 2482 } 2483 2484 // ARM models shift instructions as MOVsi with shifter operand. 2485 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL); 2486 SDValue ShOpc = 2487 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl, 2488 MVT::i32); 2489 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, 2490 getAL(CurDAG, dl), Reg0, Reg0 }; 2491 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops); 2492 return true; 2493 } 2494 2495 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 2496 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2497 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2498 CurDAG->getTargetConstant(Width, dl, MVT::i32), 2499 getAL(CurDAG, dl), Reg0 }; 2500 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2501 return true; 2502 } 2503 } 2504 return false; 2505 } 2506 2507 // Otherwise, we're looking for a shift of a shift 2508 unsigned Shl_imm = 0; 2509 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { 2510 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); 2511 unsigned Srl_imm = 0; 2512 if (isInt32Immediate(N->getOperand(1), Srl_imm)) { 2513 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2514 // Note: The width operand is encoded as width-1. 2515 unsigned Width = 32 - Srl_imm - 1; 2516 int LSB = Srl_imm - Shl_imm; 2517 if (LSB < 0) 2518 return false; 2519 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2520 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 2521 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2522 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2523 CurDAG->getTargetConstant(Width, dl, MVT::i32), 2524 getAL(CurDAG, dl), Reg0 }; 2525 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2526 return true; 2527 } 2528 } 2529 2530 // Or we are looking for a shift of an and, with a mask operand 2531 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) && 2532 isShiftedMask_32(And_imm)) { 2533 unsigned Srl_imm = 0; 2534 unsigned LSB = countTrailingZeros(And_imm); 2535 // Shift must be the same as the ands lsb 2536 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) { 2537 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2538 unsigned MSB = 31 - countLeadingZeros(And_imm); 2539 // Note: The width operand is encoded as width-1. 2540 unsigned Width = MSB - LSB; 2541 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2542 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 2543 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2544 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32), 2545 CurDAG->getTargetConstant(Width, dl, MVT::i32), 2546 getAL(CurDAG, dl), Reg0 }; 2547 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2548 return true; 2549 } 2550 } 2551 2552 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) { 2553 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 2554 unsigned LSB = 0; 2555 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) && 2556 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB)) 2557 return false; 2558 2559 if (LSB + Width > 32) 2560 return false; 2561 2562 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2563 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx"); 2564 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2565 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2566 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32), 2567 getAL(CurDAG, dl), Reg0 }; 2568 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2569 return true; 2570 } 2571 2572 return false; 2573 } 2574 2575 /// Target-specific DAG combining for ISD::XOR. 2576 /// Target-independent combining lowers SELECT_CC nodes of the form 2577 /// select_cc setg[ge] X, 0, X, -X 2578 /// select_cc setgt X, -1, X, -X 2579 /// select_cc setl[te] X, 0, -X, X 2580 /// select_cc setlt X, 1, -X, X 2581 /// which represent Integer ABS into: 2582 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y) 2583 /// ARM instruction selection detects the latter and matches it to 2584 /// ARM::ABS or ARM::t2ABS machine node. 2585 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){ 2586 SDValue XORSrc0 = N->getOperand(0); 2587 SDValue XORSrc1 = N->getOperand(1); 2588 EVT VT = N->getValueType(0); 2589 2590 if (Subtarget->isThumb1Only()) 2591 return false; 2592 2593 if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA) 2594 return false; 2595 2596 SDValue ADDSrc0 = XORSrc0.getOperand(0); 2597 SDValue ADDSrc1 = XORSrc0.getOperand(1); 2598 SDValue SRASrc0 = XORSrc1.getOperand(0); 2599 SDValue SRASrc1 = XORSrc1.getOperand(1); 2600 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1); 2601 EVT XType = SRASrc0.getValueType(); 2602 unsigned Size = XType.getSizeInBits() - 1; 2603 2604 if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 && 2605 XType.isInteger() && SRAConstant != nullptr && 2606 Size == SRAConstant->getZExtValue()) { 2607 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; 2608 CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); 2609 return true; 2610 } 2611 2612 return false; 2613 } 2614 2615 /// We've got special pseudo-instructions for these 2616 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 2617 unsigned Opcode; 2618 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 2619 if (MemTy == MVT::i8) 2620 Opcode = ARM::CMP_SWAP_8; 2621 else if (MemTy == MVT::i16) 2622 Opcode = ARM::CMP_SWAP_16; 2623 else if (MemTy == MVT::i32) 2624 Opcode = ARM::CMP_SWAP_32; 2625 else 2626 llvm_unreachable("Unknown AtomicCmpSwap type"); 2627 2628 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 2629 N->getOperand(0)}; 2630 SDNode *CmpSwap = CurDAG->getMachineNode( 2631 Opcode, SDLoc(N), 2632 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops); 2633 2634 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 2635 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 2636 2637 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 2638 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 2639 CurDAG->RemoveDeadNode(N); 2640 } 2641 2642 static Optional<std::pair<unsigned, unsigned>> 2643 getContiguousRangeOfSetBits(const APInt &A) { 2644 unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1; 2645 unsigned LastOne = A.countTrailingZeros(); 2646 if (A.countPopulation() != (FirstOne - LastOne + 1)) 2647 return Optional<std::pair<unsigned,unsigned>>(); 2648 return std::make_pair(FirstOne, LastOne); 2649 } 2650 2651 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) { 2652 assert(N->getOpcode() == ARMISD::CMPZ); 2653 SwitchEQNEToPLMI = false; 2654 2655 if (!Subtarget->isThumb()) 2656 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and 2657 // LSR don't exist as standalone instructions - they need the barrel shifter. 2658 return; 2659 2660 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X)) 2661 SDValue And = N->getOperand(0); 2662 if (!And->hasOneUse()) 2663 return; 2664 2665 SDValue Zero = N->getOperand(1); 2666 if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() || 2667 And->getOpcode() != ISD::AND) 2668 return; 2669 SDValue X = And.getOperand(0); 2670 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1)); 2671 2672 if (!C) 2673 return; 2674 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue()); 2675 if (!Range) 2676 return; 2677 2678 // There are several ways to lower this: 2679 SDNode *NewN; 2680 SDLoc dl(N); 2681 2682 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* { 2683 if (Subtarget->isThumb2()) { 2684 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri; 2685 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32), 2686 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 2687 CurDAG->getRegister(0, MVT::i32) }; 2688 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 2689 } else { 2690 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src, 2691 CurDAG->getTargetConstant(Imm, dl, MVT::i32), 2692 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 2693 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 2694 } 2695 }; 2696 2697 if (Range->second == 0) { 2698 // 1. Mask includes the LSB -> Simply shift the top N bits off 2699 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 2700 ReplaceNode(And.getNode(), NewN); 2701 } else if (Range->first == 31) { 2702 // 2. Mask includes the MSB -> Simply shift the bottom N bits off 2703 NewN = EmitShift(ARM::tLSRri, X, Range->second); 2704 ReplaceNode(And.getNode(), NewN); 2705 } else if (Range->first == Range->second) { 2706 // 3. Only one bit is set. We can shift this into the sign bit and use a 2707 // PL/MI comparison. 2708 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 2709 ReplaceNode(And.getNode(), NewN); 2710 2711 SwitchEQNEToPLMI = true; 2712 } else if (!Subtarget->hasV6T2Ops()) { 2713 // 4. Do a double shift to clear bottom and top bits, but only in 2714 // thumb-1 mode as in thumb-2 we can use UBFX. 2715 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 2716 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0), 2717 Range->second + (31 - Range->first)); 2718 ReplaceNode(And.getNode(), NewN); 2719 } 2720 2721 } 2722 2723 void ARMDAGToDAGISel::Select(SDNode *N) { 2724 SDLoc dl(N); 2725 2726 if (N->isMachineOpcode()) { 2727 N->setNodeId(-1); 2728 return; // Already selected. 2729 } 2730 2731 switch (N->getOpcode()) { 2732 default: break; 2733 case ISD::STORE: { 2734 // For Thumb1, match an sp-relative store in C++. This is a little 2735 // unfortunate, but I don't think I can make the chain check work 2736 // otherwise. (The chain of the store has to be the same as the chain 2737 // of the CopyFromReg, or else we can't replace the CopyFromReg with 2738 // a direct reference to "SP".) 2739 // 2740 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use 2741 // a different addressing mode from other four-byte stores. 2742 // 2743 // This pattern usually comes up with call arguments. 2744 StoreSDNode *ST = cast<StoreSDNode>(N); 2745 SDValue Ptr = ST->getBasePtr(); 2746 if (Subtarget->isThumb1Only() && ST->isUnindexed()) { 2747 int RHSC = 0; 2748 if (Ptr.getOpcode() == ISD::ADD && 2749 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) 2750 Ptr = Ptr.getOperand(0); 2751 2752 if (Ptr.getOpcode() == ISD::CopyFromReg && 2753 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP && 2754 Ptr.getOperand(0) == ST->getChain()) { 2755 SDValue Ops[] = {ST->getValue(), 2756 CurDAG->getRegister(ARM::SP, MVT::i32), 2757 CurDAG->getTargetConstant(RHSC, dl, MVT::i32), 2758 getAL(CurDAG, dl), 2759 CurDAG->getRegister(0, MVT::i32), 2760 ST->getChain()}; 2761 MachineSDNode *ResNode = 2762 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops); 2763 MachineMemOperand *MemOp = ST->getMemOperand(); 2764 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 2765 ReplaceNode(N, ResNode); 2766 return; 2767 } 2768 } 2769 break; 2770 } 2771 case ISD::WRITE_REGISTER: 2772 if (tryWriteRegister(N)) 2773 return; 2774 break; 2775 case ISD::READ_REGISTER: 2776 if (tryReadRegister(N)) 2777 return; 2778 break; 2779 case ISD::INLINEASM: 2780 case ISD::INLINEASM_BR: 2781 if (tryInlineAsm(N)) 2782 return; 2783 break; 2784 case ISD::XOR: 2785 // Select special operations if XOR node forms integer ABS pattern 2786 if (tryABSOp(N)) 2787 return; 2788 // Other cases are autogenerated. 2789 break; 2790 case ISD::Constant: { 2791 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); 2792 // If we can't materialize the constant we need to use a literal pool 2793 if (ConstantMaterializationCost(Val, Subtarget) > 2) { 2794 SDValue CPIdx = CurDAG->getTargetConstantPool( 2795 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), 2796 TLI->getPointerTy(CurDAG->getDataLayout())); 2797 2798 SDNode *ResNode; 2799 if (Subtarget->isThumb()) { 2800 SDValue Ops[] = { 2801 CPIdx, 2802 getAL(CurDAG, dl), 2803 CurDAG->getRegister(0, MVT::i32), 2804 CurDAG->getEntryNode() 2805 }; 2806 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, 2807 Ops); 2808 } else { 2809 SDValue Ops[] = { 2810 CPIdx, 2811 CurDAG->getTargetConstant(0, dl, MVT::i32), 2812 getAL(CurDAG, dl), 2813 CurDAG->getRegister(0, MVT::i32), 2814 CurDAG->getEntryNode() 2815 }; 2816 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, 2817 Ops); 2818 } 2819 // Annotate the Node with memory operand information so that MachineInstr 2820 // queries work properly. This e.g. gives the register allocation the 2821 // required information for rematerialization. 2822 MachineFunction& MF = CurDAG->getMachineFunction(); 2823 MachineMemOperand *MemOp = 2824 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF), 2825 MachineMemOperand::MOLoad, 4, 4); 2826 2827 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 2828 2829 ReplaceNode(N, ResNode); 2830 return; 2831 } 2832 2833 // Other cases are autogenerated. 2834 break; 2835 } 2836 case ISD::FrameIndex: { 2837 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. 2838 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 2839 SDValue TFI = CurDAG->getTargetFrameIndex( 2840 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 2841 if (Subtarget->isThumb1Only()) { 2842 // Set the alignment of the frame object to 4, to avoid having to generate 2843 // more than one ADD 2844 MachineFrameInfo &MFI = MF->getFrameInfo(); 2845 if (MFI.getObjectAlignment(FI) < 4) 2846 MFI.setObjectAlignment(FI, 4); 2847 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, 2848 CurDAG->getTargetConstant(0, dl, MVT::i32)); 2849 return; 2850 } else { 2851 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? 2852 ARM::t2ADDri : ARM::ADDri); 2853 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32), 2854 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 2855 CurDAG->getRegister(0, MVT::i32) }; 2856 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2857 return; 2858 } 2859 } 2860 case ISD::SRL: 2861 if (tryV6T2BitfieldExtractOp(N, false)) 2862 return; 2863 break; 2864 case ISD::SIGN_EXTEND_INREG: 2865 case ISD::SRA: 2866 if (tryV6T2BitfieldExtractOp(N, true)) 2867 return; 2868 break; 2869 case ISD::MUL: 2870 if (Subtarget->isThumb1Only()) 2871 break; 2872 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 2873 unsigned RHSV = C->getZExtValue(); 2874 if (!RHSV) break; 2875 if (isPowerOf2_32(RHSV-1)) { // 2^n+1? 2876 unsigned ShImm = Log2_32(RHSV-1); 2877 if (ShImm >= 32) 2878 break; 2879 SDValue V = N->getOperand(0); 2880 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 2881 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 2882 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2883 if (Subtarget->isThumb()) { 2884 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 2885 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops); 2886 return; 2887 } else { 2888 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 2889 Reg0 }; 2890 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops); 2891 return; 2892 } 2893 } 2894 if (isPowerOf2_32(RHSV+1)) { // 2^n-1? 2895 unsigned ShImm = Log2_32(RHSV+1); 2896 if (ShImm >= 32) 2897 break; 2898 SDValue V = N->getOperand(0); 2899 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 2900 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 2901 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2902 if (Subtarget->isThumb()) { 2903 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 2904 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops); 2905 return; 2906 } else { 2907 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 2908 Reg0 }; 2909 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops); 2910 return; 2911 } 2912 } 2913 } 2914 break; 2915 case ISD::AND: { 2916 // Check for unsigned bitfield extract 2917 if (tryV6T2BitfieldExtractOp(N, false)) 2918 return; 2919 2920 // If an immediate is used in an AND node, it is possible that the immediate 2921 // can be more optimally materialized when negated. If this is the case we 2922 // can negate the immediate and use a BIC instead. 2923 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 2924 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) { 2925 uint32_t Imm = (uint32_t) N1C->getZExtValue(); 2926 2927 // In Thumb2 mode, an AND can take a 12-bit immediate. If this 2928 // immediate can be negated and fit in the immediate operand of 2929 // a t2BIC, don't do any manual transform here as this can be 2930 // handled by the generic ISel machinery. 2931 bool PreferImmediateEncoding = 2932 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm)); 2933 if (!PreferImmediateEncoding && 2934 ConstantMaterializationCost(Imm, Subtarget) > 2935 ConstantMaterializationCost(~Imm, Subtarget)) { 2936 // The current immediate costs more to materialize than a negated 2937 // immediate, so negate the immediate and use a BIC. 2938 SDValue NewImm = 2939 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32); 2940 // If the new constant didn't exist before, reposition it in the topological 2941 // ordering so it is just before N. Otherwise, don't touch its location. 2942 if (NewImm->getNodeId() == -1) 2943 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode()); 2944 2945 if (!Subtarget->hasThumb2()) { 2946 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), 2947 N->getOperand(0), NewImm, getAL(CurDAG, dl), 2948 CurDAG->getRegister(0, MVT::i32)}; 2949 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops)); 2950 return; 2951 } else { 2952 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl), 2953 CurDAG->getRegister(0, MVT::i32), 2954 CurDAG->getRegister(0, MVT::i32)}; 2955 ReplaceNode(N, 2956 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops)); 2957 return; 2958 } 2959 } 2960 } 2961 2962 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits 2963 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits 2964 // are entirely contributed by c2 and lower 16-bits are entirely contributed 2965 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)). 2966 // Select it to: "movt x, ((c1 & 0xffff) >> 16) 2967 EVT VT = N->getValueType(0); 2968 if (VT != MVT::i32) 2969 break; 2970 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2()) 2971 ? ARM::t2MOVTi16 2972 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0); 2973 if (!Opc) 2974 break; 2975 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 2976 N1C = dyn_cast<ConstantSDNode>(N1); 2977 if (!N1C) 2978 break; 2979 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) { 2980 SDValue N2 = N0.getOperand(1); 2981 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 2982 if (!N2C) 2983 break; 2984 unsigned N1CVal = N1C->getZExtValue(); 2985 unsigned N2CVal = N2C->getZExtValue(); 2986 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) && 2987 (N1CVal & 0xffffU) == 0xffffU && 2988 (N2CVal & 0xffffU) == 0x0U) { 2989 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16, 2990 dl, MVT::i32); 2991 SDValue Ops[] = { N0.getOperand(0), Imm16, 2992 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 2993 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 2994 return; 2995 } 2996 } 2997 2998 break; 2999 } 3000 case ARMISD::UMAAL: { 3001 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; 3002 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 3003 N->getOperand(2), N->getOperand(3), 3004 getAL(CurDAG, dl), 3005 CurDAG->getRegister(0, MVT::i32) }; 3006 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops)); 3007 return; 3008 } 3009 case ARMISD::UMLAL:{ 3010 if (Subtarget->isThumb()) { 3011 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3012 N->getOperand(3), getAL(CurDAG, dl), 3013 CurDAG->getRegister(0, MVT::i32)}; 3014 ReplaceNode( 3015 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops)); 3016 return; 3017 }else{ 3018 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3019 N->getOperand(3), getAL(CurDAG, dl), 3020 CurDAG->getRegister(0, MVT::i32), 3021 CurDAG->getRegister(0, MVT::i32) }; 3022 ReplaceNode(N, CurDAG->getMachineNode( 3023 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl, 3024 MVT::i32, MVT::i32, Ops)); 3025 return; 3026 } 3027 } 3028 case ARMISD::SMLAL:{ 3029 if (Subtarget->isThumb()) { 3030 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3031 N->getOperand(3), getAL(CurDAG, dl), 3032 CurDAG->getRegister(0, MVT::i32)}; 3033 ReplaceNode( 3034 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops)); 3035 return; 3036 }else{ 3037 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3038 N->getOperand(3), getAL(CurDAG, dl), 3039 CurDAG->getRegister(0, MVT::i32), 3040 CurDAG->getRegister(0, MVT::i32) }; 3041 ReplaceNode(N, CurDAG->getMachineNode( 3042 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl, 3043 MVT::i32, MVT::i32, Ops)); 3044 return; 3045 } 3046 } 3047 case ARMISD::SUBE: { 3048 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) 3049 break; 3050 // Look for a pattern to match SMMLS 3051 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b)))) 3052 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI || 3053 N->getOperand(2).getOpcode() != ARMISD::SUBC || 3054 !SDValue(N, 1).use_empty()) 3055 break; 3056 3057 if (Subtarget->isThumb()) 3058 assert(Subtarget->hasThumb2() && 3059 "This pattern should not be generated for Thumb"); 3060 3061 SDValue SmulLoHi = N->getOperand(1); 3062 SDValue Subc = N->getOperand(2); 3063 auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0)); 3064 3065 if (!Zero || Zero->getZExtValue() != 0 || 3066 Subc.getOperand(1) != SmulLoHi.getValue(0) || 3067 N->getOperand(1) != SmulLoHi.getValue(1) || 3068 N->getOperand(2) != Subc.getValue(1)) 3069 break; 3070 3071 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS; 3072 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1), 3073 N->getOperand(0), getAL(CurDAG, dl), 3074 CurDAG->getRegister(0, MVT::i32) }; 3075 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops)); 3076 return; 3077 } 3078 case ISD::LOAD: { 3079 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 3080 return; 3081 if (Subtarget->isThumb() && Subtarget->hasThumb2()) { 3082 if (tryT2IndexedLoad(N)) 3083 return; 3084 } else if (Subtarget->isThumb()) { 3085 if (tryT1IndexedLoad(N)) 3086 return; 3087 } else if (tryARMIndexedLoad(N)) 3088 return; 3089 // Other cases are autogenerated. 3090 break; 3091 } 3092 case ARMISD::WLS: 3093 case ARMISD::LE: { 3094 SDValue Ops[] = { N->getOperand(1), 3095 N->getOperand(2), 3096 N->getOperand(0) }; 3097 unsigned Opc = N->getOpcode() == ARMISD::WLS ? 3098 ARM::t2WhileLoopStart : ARM::t2LoopEnd; 3099 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 3100 ReplaceUses(N, New); 3101 CurDAG->RemoveDeadNode(N); 3102 return; 3103 } 3104 case ARMISD::LOOP_DEC: { 3105 SDValue Ops[] = { N->getOperand(1), 3106 N->getOperand(2), 3107 N->getOperand(0) }; 3108 SDNode *Dec = 3109 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 3110 CurDAG->getVTList(MVT::i32, MVT::Other), Ops); 3111 ReplaceUses(N, Dec); 3112 CurDAG->RemoveDeadNode(N); 3113 return; 3114 } 3115 case ARMISD::BRCOND: { 3116 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3117 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) 3118 // Pattern complexity = 6 cost = 1 size = 0 3119 3120 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3121 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc) 3122 // Pattern complexity = 6 cost = 1 size = 0 3123 3124 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3125 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc) 3126 // Pattern complexity = 6 cost = 1 size = 0 3127 3128 unsigned Opc = Subtarget->isThumb() ? 3129 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; 3130 SDValue Chain = N->getOperand(0); 3131 SDValue N1 = N->getOperand(1); 3132 SDValue N2 = N->getOperand(2); 3133 SDValue N3 = N->getOperand(3); 3134 SDValue InFlag = N->getOperand(4); 3135 assert(N1.getOpcode() == ISD::BasicBlock); 3136 assert(N2.getOpcode() == ISD::Constant); 3137 assert(N3.getOpcode() == ISD::Register); 3138 3139 unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue(); 3140 3141 if (InFlag.getOpcode() == ARMISD::CMPZ) { 3142 if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) { 3143 SDValue Int = InFlag.getOperand(0); 3144 uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue(); 3145 3146 // Handle low-overhead loops. 3147 if (ID == Intrinsic::loop_decrement_reg) { 3148 SDValue Elements = Int.getOperand(2); 3149 SDValue Size = CurDAG->getTargetConstant( 3150 cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl, 3151 MVT::i32); 3152 3153 SDValue Args[] = { Elements, Size, Int.getOperand(0) }; 3154 SDNode *LoopDec = 3155 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 3156 CurDAG->getVTList(MVT::i32, MVT::Other), 3157 Args); 3158 ReplaceUses(Int.getNode(), LoopDec); 3159 3160 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain }; 3161 SDNode *LoopEnd = 3162 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs); 3163 3164 ReplaceUses(N, LoopEnd); 3165 CurDAG->RemoveDeadNode(N); 3166 CurDAG->RemoveDeadNode(InFlag.getNode()); 3167 CurDAG->RemoveDeadNode(Int.getNode()); 3168 return; 3169 } 3170 } 3171 3172 bool SwitchEQNEToPLMI; 3173 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 3174 InFlag = N->getOperand(4); 3175 3176 if (SwitchEQNEToPLMI) { 3177 switch ((ARMCC::CondCodes)CC) { 3178 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 3179 case ARMCC::NE: 3180 CC = (unsigned)ARMCC::MI; 3181 break; 3182 case ARMCC::EQ: 3183 CC = (unsigned)ARMCC::PL; 3184 break; 3185 } 3186 } 3187 } 3188 3189 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32); 3190 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag }; 3191 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, 3192 MVT::Glue, Ops); 3193 Chain = SDValue(ResNode, 0); 3194 if (N->getNumValues() == 2) { 3195 InFlag = SDValue(ResNode, 1); 3196 ReplaceUses(SDValue(N, 1), InFlag); 3197 } 3198 ReplaceUses(SDValue(N, 0), 3199 SDValue(Chain.getNode(), Chain.getResNo())); 3200 CurDAG->RemoveDeadNode(N); 3201 return; 3202 } 3203 3204 case ARMISD::CMPZ: { 3205 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0) 3206 // This allows us to avoid materializing the expensive negative constant. 3207 // The CMPZ #0 is useless and will be peepholed away but we need to keep it 3208 // for its glue output. 3209 SDValue X = N->getOperand(0); 3210 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode()); 3211 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) { 3212 int64_t Addend = -C->getSExtValue(); 3213 3214 SDNode *Add = nullptr; 3215 // ADDS can be better than CMN if the immediate fits in a 3216 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3. 3217 // Outside that range we can just use a CMN which is 32-bit but has a 3218 // 12-bit immediate range. 3219 if (Addend < 1<<8) { 3220 if (Subtarget->isThumb2()) { 3221 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32), 3222 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3223 CurDAG->getRegister(0, MVT::i32) }; 3224 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops); 3225 } else { 3226 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8; 3227 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, 3228 CurDAG->getTargetConstant(Addend, dl, MVT::i32), 3229 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 3230 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3231 } 3232 } 3233 if (Add) { 3234 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)}; 3235 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2); 3236 } 3237 } 3238 // Other cases are autogenerated. 3239 break; 3240 } 3241 3242 case ARMISD::CMOV: { 3243 SDValue InFlag = N->getOperand(4); 3244 3245 if (InFlag.getOpcode() == ARMISD::CMPZ) { 3246 bool SwitchEQNEToPLMI; 3247 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 3248 3249 if (SwitchEQNEToPLMI) { 3250 SDValue ARMcc = N->getOperand(2); 3251 ARMCC::CondCodes CC = 3252 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); 3253 3254 switch (CC) { 3255 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 3256 case ARMCC::NE: 3257 CC = ARMCC::MI; 3258 break; 3259 case ARMCC::EQ: 3260 CC = ARMCC::PL; 3261 break; 3262 } 3263 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32); 3264 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc, 3265 N->getOperand(3), N->getOperand(4)}; 3266 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops); 3267 } 3268 3269 } 3270 // Other cases are autogenerated. 3271 break; 3272 } 3273 3274 case ARMISD::VZIP: { 3275 unsigned Opc = 0; 3276 EVT VT = N->getValueType(0); 3277 switch (VT.getSimpleVT().SimpleTy) { 3278 default: return; 3279 case MVT::v8i8: Opc = ARM::VZIPd8; break; 3280 case MVT::v4f16: 3281 case MVT::v4i16: Opc = ARM::VZIPd16; break; 3282 case MVT::v2f32: 3283 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 3284 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3285 case MVT::v16i8: Opc = ARM::VZIPq8; break; 3286 case MVT::v8f16: 3287 case MVT::v8i16: Opc = ARM::VZIPq16; break; 3288 case MVT::v4f32: 3289 case MVT::v4i32: Opc = ARM::VZIPq32; break; 3290 } 3291 SDValue Pred = getAL(CurDAG, dl); 3292 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3293 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3294 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3295 return; 3296 } 3297 case ARMISD::VUZP: { 3298 unsigned Opc = 0; 3299 EVT VT = N->getValueType(0); 3300 switch (VT.getSimpleVT().SimpleTy) { 3301 default: return; 3302 case MVT::v8i8: Opc = ARM::VUZPd8; break; 3303 case MVT::v4f16: 3304 case MVT::v4i16: Opc = ARM::VUZPd16; break; 3305 case MVT::v2f32: 3306 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 3307 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3308 case MVT::v16i8: Opc = ARM::VUZPq8; break; 3309 case MVT::v8f16: 3310 case MVT::v8i16: Opc = ARM::VUZPq16; break; 3311 case MVT::v4f32: 3312 case MVT::v4i32: Opc = ARM::VUZPq32; break; 3313 } 3314 SDValue Pred = getAL(CurDAG, dl); 3315 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3316 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3317 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3318 return; 3319 } 3320 case ARMISD::VTRN: { 3321 unsigned Opc = 0; 3322 EVT VT = N->getValueType(0); 3323 switch (VT.getSimpleVT().SimpleTy) { 3324 default: return; 3325 case MVT::v8i8: Opc = ARM::VTRNd8; break; 3326 case MVT::v4f16: 3327 case MVT::v4i16: Opc = ARM::VTRNd16; break; 3328 case MVT::v2f32: 3329 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3330 case MVT::v16i8: Opc = ARM::VTRNq8; break; 3331 case MVT::v8f16: 3332 case MVT::v8i16: Opc = ARM::VTRNq16; break; 3333 case MVT::v4f32: 3334 case MVT::v4i32: Opc = ARM::VTRNq32; break; 3335 } 3336 SDValue Pred = getAL(CurDAG, dl); 3337 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3338 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3339 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3340 return; 3341 } 3342 case ARMISD::BUILD_VECTOR: { 3343 EVT VecVT = N->getValueType(0); 3344 EVT EltVT = VecVT.getVectorElementType(); 3345 unsigned NumElts = VecVT.getVectorNumElements(); 3346 if (EltVT == MVT::f64) { 3347 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); 3348 ReplaceNode( 3349 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 3350 return; 3351 } 3352 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); 3353 if (NumElts == 2) { 3354 ReplaceNode( 3355 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 3356 return; 3357 } 3358 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); 3359 ReplaceNode(N, 3360 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), 3361 N->getOperand(2), N->getOperand(3))); 3362 return; 3363 } 3364 3365 case ARMISD::VLD1DUP: { 3366 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16, 3367 ARM::VLD1DUPd32 }; 3368 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16, 3369 ARM::VLD1DUPq32 }; 3370 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes); 3371 return; 3372 } 3373 3374 case ARMISD::VLD2DUP: { 3375 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 3376 ARM::VLD2DUPd32 }; 3377 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes); 3378 return; 3379 } 3380 3381 case ARMISD::VLD3DUP: { 3382 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo, 3383 ARM::VLD3DUPd16Pseudo, 3384 ARM::VLD3DUPd32Pseudo }; 3385 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes); 3386 return; 3387 } 3388 3389 case ARMISD::VLD4DUP: { 3390 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo, 3391 ARM::VLD4DUPd16Pseudo, 3392 ARM::VLD4DUPd32Pseudo }; 3393 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes); 3394 return; 3395 } 3396 3397 case ARMISD::VLD1DUP_UPD: { 3398 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed, 3399 ARM::VLD1DUPd16wb_fixed, 3400 ARM::VLD1DUPd32wb_fixed }; 3401 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed, 3402 ARM::VLD1DUPq16wb_fixed, 3403 ARM::VLD1DUPq32wb_fixed }; 3404 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes); 3405 return; 3406 } 3407 3408 case ARMISD::VLD2DUP_UPD: { 3409 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed, 3410 ARM::VLD2DUPd16wb_fixed, 3411 ARM::VLD2DUPd32wb_fixed }; 3412 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes); 3413 return; 3414 } 3415 3416 case ARMISD::VLD3DUP_UPD: { 3417 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, 3418 ARM::VLD3DUPd16Pseudo_UPD, 3419 ARM::VLD3DUPd32Pseudo_UPD }; 3420 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes); 3421 return; 3422 } 3423 3424 case ARMISD::VLD4DUP_UPD: { 3425 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, 3426 ARM::VLD4DUPd16Pseudo_UPD, 3427 ARM::VLD4DUPd32Pseudo_UPD }; 3428 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes); 3429 return; 3430 } 3431 3432 case ARMISD::VLD1_UPD: { 3433 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed, 3434 ARM::VLD1d16wb_fixed, 3435 ARM::VLD1d32wb_fixed, 3436 ARM::VLD1d64wb_fixed }; 3437 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed, 3438 ARM::VLD1q16wb_fixed, 3439 ARM::VLD1q32wb_fixed, 3440 ARM::VLD1q64wb_fixed }; 3441 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr); 3442 return; 3443 } 3444 3445 case ARMISD::VLD2_UPD: { 3446 static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed, 3447 ARM::VLD2d16wb_fixed, 3448 ARM::VLD2d32wb_fixed, 3449 ARM::VLD1q64wb_fixed}; 3450 static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed, 3451 ARM::VLD2q16PseudoWB_fixed, 3452 ARM::VLD2q32PseudoWB_fixed }; 3453 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 3454 return; 3455 } 3456 3457 case ARMISD::VLD3_UPD: { 3458 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, 3459 ARM::VLD3d16Pseudo_UPD, 3460 ARM::VLD3d32Pseudo_UPD, 3461 ARM::VLD1d64TPseudoWB_fixed}; 3462 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 3463 ARM::VLD3q16Pseudo_UPD, 3464 ARM::VLD3q32Pseudo_UPD }; 3465 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, 3466 ARM::VLD3q16oddPseudo_UPD, 3467 ARM::VLD3q32oddPseudo_UPD }; 3468 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 3469 return; 3470 } 3471 3472 case ARMISD::VLD4_UPD: { 3473 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, 3474 ARM::VLD4d16Pseudo_UPD, 3475 ARM::VLD4d32Pseudo_UPD, 3476 ARM::VLD1d64QPseudoWB_fixed}; 3477 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 3478 ARM::VLD4q16Pseudo_UPD, 3479 ARM::VLD4q32Pseudo_UPD }; 3480 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD, 3481 ARM::VLD4q16oddPseudo_UPD, 3482 ARM::VLD4q32oddPseudo_UPD }; 3483 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 3484 return; 3485 } 3486 3487 case ARMISD::VLD2LN_UPD: { 3488 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, 3489 ARM::VLD2LNd16Pseudo_UPD, 3490 ARM::VLD2LNd32Pseudo_UPD }; 3491 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD, 3492 ARM::VLD2LNq32Pseudo_UPD }; 3493 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes); 3494 return; 3495 } 3496 3497 case ARMISD::VLD3LN_UPD: { 3498 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, 3499 ARM::VLD3LNd16Pseudo_UPD, 3500 ARM::VLD3LNd32Pseudo_UPD }; 3501 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD, 3502 ARM::VLD3LNq32Pseudo_UPD }; 3503 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes); 3504 return; 3505 } 3506 3507 case ARMISD::VLD4LN_UPD: { 3508 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, 3509 ARM::VLD4LNd16Pseudo_UPD, 3510 ARM::VLD4LNd32Pseudo_UPD }; 3511 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD, 3512 ARM::VLD4LNq32Pseudo_UPD }; 3513 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes); 3514 return; 3515 } 3516 3517 case ARMISD::VST1_UPD: { 3518 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed, 3519 ARM::VST1d16wb_fixed, 3520 ARM::VST1d32wb_fixed, 3521 ARM::VST1d64wb_fixed }; 3522 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed, 3523 ARM::VST1q16wb_fixed, 3524 ARM::VST1q32wb_fixed, 3525 ARM::VST1q64wb_fixed }; 3526 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr); 3527 return; 3528 } 3529 3530 case ARMISD::VST2_UPD: { 3531 static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed, 3532 ARM::VST2d16wb_fixed, 3533 ARM::VST2d32wb_fixed, 3534 ARM::VST1q64wb_fixed}; 3535 static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed, 3536 ARM::VST2q16PseudoWB_fixed, 3537 ARM::VST2q32PseudoWB_fixed }; 3538 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 3539 return; 3540 } 3541 3542 case ARMISD::VST3_UPD: { 3543 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD, 3544 ARM::VST3d16Pseudo_UPD, 3545 ARM::VST3d32Pseudo_UPD, 3546 ARM::VST1d64TPseudoWB_fixed}; 3547 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 3548 ARM::VST3q16Pseudo_UPD, 3549 ARM::VST3q32Pseudo_UPD }; 3550 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, 3551 ARM::VST3q16oddPseudo_UPD, 3552 ARM::VST3q32oddPseudo_UPD }; 3553 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 3554 return; 3555 } 3556 3557 case ARMISD::VST4_UPD: { 3558 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD, 3559 ARM::VST4d16Pseudo_UPD, 3560 ARM::VST4d32Pseudo_UPD, 3561 ARM::VST1d64QPseudoWB_fixed}; 3562 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 3563 ARM::VST4q16Pseudo_UPD, 3564 ARM::VST4q32Pseudo_UPD }; 3565 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD, 3566 ARM::VST4q16oddPseudo_UPD, 3567 ARM::VST4q32oddPseudo_UPD }; 3568 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 3569 return; 3570 } 3571 3572 case ARMISD::VST2LN_UPD: { 3573 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, 3574 ARM::VST2LNd16Pseudo_UPD, 3575 ARM::VST2LNd32Pseudo_UPD }; 3576 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD, 3577 ARM::VST2LNq32Pseudo_UPD }; 3578 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes); 3579 return; 3580 } 3581 3582 case ARMISD::VST3LN_UPD: { 3583 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, 3584 ARM::VST3LNd16Pseudo_UPD, 3585 ARM::VST3LNd32Pseudo_UPD }; 3586 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD, 3587 ARM::VST3LNq32Pseudo_UPD }; 3588 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes); 3589 return; 3590 } 3591 3592 case ARMISD::VST4LN_UPD: { 3593 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, 3594 ARM::VST4LNd16Pseudo_UPD, 3595 ARM::VST4LNd32Pseudo_UPD }; 3596 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD, 3597 ARM::VST4LNq32Pseudo_UPD }; 3598 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes); 3599 return; 3600 } 3601 3602 case ISD::INTRINSIC_VOID: 3603 case ISD::INTRINSIC_W_CHAIN: { 3604 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 3605 switch (IntNo) { 3606 default: 3607 break; 3608 3609 case Intrinsic::arm_mrrc: 3610 case Intrinsic::arm_mrrc2: { 3611 SDLoc dl(N); 3612 SDValue Chain = N->getOperand(0); 3613 unsigned Opc; 3614 3615 if (Subtarget->isThumb()) 3616 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2); 3617 else 3618 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2); 3619 3620 SmallVector<SDValue, 5> Ops; 3621 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */ 3622 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */ 3623 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */ 3624 3625 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded 3626 // instruction will always be '1111' but it is possible in assembly language to specify 3627 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction. 3628 if (Opc != ARM::MRRC2) { 3629 Ops.push_back(getAL(CurDAG, dl)); 3630 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 3631 } 3632 3633 Ops.push_back(Chain); 3634 3635 // Writes to two registers. 3636 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other}; 3637 3638 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops)); 3639 return; 3640 } 3641 case Intrinsic::arm_ldaexd: 3642 case Intrinsic::arm_ldrexd: { 3643 SDLoc dl(N); 3644 SDValue Chain = N->getOperand(0); 3645 SDValue MemAddr = N->getOperand(2); 3646 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps(); 3647 3648 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd; 3649 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD) 3650 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD); 3651 3652 // arm_ldrexd returns a i64 value in {i32, i32} 3653 std::vector<EVT> ResTys; 3654 if (isThumb) { 3655 ResTys.push_back(MVT::i32); 3656 ResTys.push_back(MVT::i32); 3657 } else 3658 ResTys.push_back(MVT::Untyped); 3659 ResTys.push_back(MVT::Other); 3660 3661 // Place arguments in the right order. 3662 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl), 3663 CurDAG->getRegister(0, MVT::i32), Chain}; 3664 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 3665 // Transfer memoperands. 3666 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3667 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 3668 3669 // Remap uses. 3670 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); 3671 if (!SDValue(N, 0).use_empty()) { 3672 SDValue Result; 3673 if (isThumb) 3674 Result = SDValue(Ld, 0); 3675 else { 3676 SDValue SubRegIdx = 3677 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 3678 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 3679 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 3680 Result = SDValue(ResNode,0); 3681 } 3682 ReplaceUses(SDValue(N, 0), Result); 3683 } 3684 if (!SDValue(N, 1).use_empty()) { 3685 SDValue Result; 3686 if (isThumb) 3687 Result = SDValue(Ld, 1); 3688 else { 3689 SDValue SubRegIdx = 3690 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 3691 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 3692 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 3693 Result = SDValue(ResNode,0); 3694 } 3695 ReplaceUses(SDValue(N, 1), Result); 3696 } 3697 ReplaceUses(SDValue(N, 2), OutChain); 3698 CurDAG->RemoveDeadNode(N); 3699 return; 3700 } 3701 case Intrinsic::arm_stlexd: 3702 case Intrinsic::arm_strexd: { 3703 SDLoc dl(N); 3704 SDValue Chain = N->getOperand(0); 3705 SDValue Val0 = N->getOperand(2); 3706 SDValue Val1 = N->getOperand(3); 3707 SDValue MemAddr = N->getOperand(4); 3708 3709 // Store exclusive double return a i32 value which is the return status 3710 // of the issued store. 3711 const EVT ResTys[] = {MVT::i32, MVT::Other}; 3712 3713 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); 3714 // Place arguments in the right order. 3715 SmallVector<SDValue, 7> Ops; 3716 if (isThumb) { 3717 Ops.push_back(Val0); 3718 Ops.push_back(Val1); 3719 } else 3720 // arm_strexd uses GPRPair. 3721 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0)); 3722 Ops.push_back(MemAddr); 3723 Ops.push_back(getAL(CurDAG, dl)); 3724 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 3725 Ops.push_back(Chain); 3726 3727 bool IsRelease = IntNo == Intrinsic::arm_stlexd; 3728 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD) 3729 : (IsRelease ? ARM::STLEXD : ARM::STREXD); 3730 3731 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 3732 // Transfer memoperands. 3733 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3734 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 3735 3736 ReplaceNode(N, St); 3737 return; 3738 } 3739 3740 case Intrinsic::arm_neon_vld1: { 3741 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, 3742 ARM::VLD1d32, ARM::VLD1d64 }; 3743 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 3744 ARM::VLD1q32, ARM::VLD1q64}; 3745 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr); 3746 return; 3747 } 3748 3749 case Intrinsic::arm_neon_vld1x2: { 3750 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 3751 ARM::VLD1q32, ARM::VLD1q64 }; 3752 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo, 3753 ARM::VLD1d16QPseudo, 3754 ARM::VLD1d32QPseudo, 3755 ARM::VLD1d64QPseudo }; 3756 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 3757 return; 3758 } 3759 3760 case Intrinsic::arm_neon_vld1x3: { 3761 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo, 3762 ARM::VLD1d16TPseudo, 3763 ARM::VLD1d32TPseudo, 3764 ARM::VLD1d64TPseudo }; 3765 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD, 3766 ARM::VLD1q16LowTPseudo_UPD, 3767 ARM::VLD1q32LowTPseudo_UPD, 3768 ARM::VLD1q64LowTPseudo_UPD }; 3769 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo, 3770 ARM::VLD1q16HighTPseudo, 3771 ARM::VLD1q32HighTPseudo, 3772 ARM::VLD1q64HighTPseudo }; 3773 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 3774 return; 3775 } 3776 3777 case Intrinsic::arm_neon_vld1x4: { 3778 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo, 3779 ARM::VLD1d16QPseudo, 3780 ARM::VLD1d32QPseudo, 3781 ARM::VLD1d64QPseudo }; 3782 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD, 3783 ARM::VLD1q16LowQPseudo_UPD, 3784 ARM::VLD1q32LowQPseudo_UPD, 3785 ARM::VLD1q64LowQPseudo_UPD }; 3786 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo, 3787 ARM::VLD1q16HighQPseudo, 3788 ARM::VLD1q32HighQPseudo, 3789 ARM::VLD1q64HighQPseudo }; 3790 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 3791 return; 3792 } 3793 3794 case Intrinsic::arm_neon_vld2: { 3795 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, 3796 ARM::VLD2d32, ARM::VLD1q64 }; 3797 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, 3798 ARM::VLD2q32Pseudo }; 3799 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 3800 return; 3801 } 3802 3803 case Intrinsic::arm_neon_vld3: { 3804 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo, 3805 ARM::VLD3d16Pseudo, 3806 ARM::VLD3d32Pseudo, 3807 ARM::VLD1d64TPseudo }; 3808 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 3809 ARM::VLD3q16Pseudo_UPD, 3810 ARM::VLD3q32Pseudo_UPD }; 3811 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo, 3812 ARM::VLD3q16oddPseudo, 3813 ARM::VLD3q32oddPseudo }; 3814 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 3815 return; 3816 } 3817 3818 case Intrinsic::arm_neon_vld4: { 3819 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo, 3820 ARM::VLD4d16Pseudo, 3821 ARM::VLD4d32Pseudo, 3822 ARM::VLD1d64QPseudo }; 3823 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 3824 ARM::VLD4q16Pseudo_UPD, 3825 ARM::VLD4q32Pseudo_UPD }; 3826 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo, 3827 ARM::VLD4q16oddPseudo, 3828 ARM::VLD4q32oddPseudo }; 3829 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 3830 return; 3831 } 3832 3833 case Intrinsic::arm_neon_vld2dup: { 3834 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 3835 ARM::VLD2DUPd32, ARM::VLD1q64 }; 3836 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, 3837 ARM::VLD2DUPq16EvenPseudo, 3838 ARM::VLD2DUPq32EvenPseudo }; 3839 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo, 3840 ARM::VLD2DUPq16OddPseudo, 3841 ARM::VLD2DUPq32OddPseudo }; 3842 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2, 3843 DOpcodes, QOpcodes0, QOpcodes1); 3844 return; 3845 } 3846 3847 case Intrinsic::arm_neon_vld3dup: { 3848 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo, 3849 ARM::VLD3DUPd16Pseudo, 3850 ARM::VLD3DUPd32Pseudo, 3851 ARM::VLD1d64TPseudo }; 3852 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, 3853 ARM::VLD3DUPq16EvenPseudo, 3854 ARM::VLD3DUPq32EvenPseudo }; 3855 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo, 3856 ARM::VLD3DUPq16OddPseudo, 3857 ARM::VLD3DUPq32OddPseudo }; 3858 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3, 3859 DOpcodes, QOpcodes0, QOpcodes1); 3860 return; 3861 } 3862 3863 case Intrinsic::arm_neon_vld4dup: { 3864 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo, 3865 ARM::VLD4DUPd16Pseudo, 3866 ARM::VLD4DUPd32Pseudo, 3867 ARM::VLD1d64QPseudo }; 3868 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, 3869 ARM::VLD4DUPq16EvenPseudo, 3870 ARM::VLD4DUPq32EvenPseudo }; 3871 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo, 3872 ARM::VLD4DUPq16OddPseudo, 3873 ARM::VLD4DUPq32OddPseudo }; 3874 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4, 3875 DOpcodes, QOpcodes0, QOpcodes1); 3876 return; 3877 } 3878 3879 case Intrinsic::arm_neon_vld2lane: { 3880 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo, 3881 ARM::VLD2LNd16Pseudo, 3882 ARM::VLD2LNd32Pseudo }; 3883 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo, 3884 ARM::VLD2LNq32Pseudo }; 3885 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes); 3886 return; 3887 } 3888 3889 case Intrinsic::arm_neon_vld3lane: { 3890 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo, 3891 ARM::VLD3LNd16Pseudo, 3892 ARM::VLD3LNd32Pseudo }; 3893 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo, 3894 ARM::VLD3LNq32Pseudo }; 3895 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes); 3896 return; 3897 } 3898 3899 case Intrinsic::arm_neon_vld4lane: { 3900 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo, 3901 ARM::VLD4LNd16Pseudo, 3902 ARM::VLD4LNd32Pseudo }; 3903 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo, 3904 ARM::VLD4LNq32Pseudo }; 3905 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes); 3906 return; 3907 } 3908 3909 case Intrinsic::arm_neon_vst1: { 3910 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, 3911 ARM::VST1d32, ARM::VST1d64 }; 3912 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 3913 ARM::VST1q32, ARM::VST1q64 }; 3914 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr); 3915 return; 3916 } 3917 3918 case Intrinsic::arm_neon_vst1x2: { 3919 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 3920 ARM::VST1q32, ARM::VST1q64 }; 3921 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo, 3922 ARM::VST1d16QPseudo, 3923 ARM::VST1d32QPseudo, 3924 ARM::VST1d64QPseudo }; 3925 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 3926 return; 3927 } 3928 3929 case Intrinsic::arm_neon_vst1x3: { 3930 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo, 3931 ARM::VST1d16TPseudo, 3932 ARM::VST1d32TPseudo, 3933 ARM::VST1d64TPseudo }; 3934 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 3935 ARM::VST1q16LowTPseudo_UPD, 3936 ARM::VST1q32LowTPseudo_UPD, 3937 ARM::VST1q64LowTPseudo_UPD }; 3938 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo, 3939 ARM::VST1q16HighTPseudo, 3940 ARM::VST1q32HighTPseudo, 3941 ARM::VST1q64HighTPseudo }; 3942 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 3943 return; 3944 } 3945 3946 case Intrinsic::arm_neon_vst1x4: { 3947 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo, 3948 ARM::VST1d16QPseudo, 3949 ARM::VST1d32QPseudo, 3950 ARM::VST1d64QPseudo }; 3951 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 3952 ARM::VST1q16LowQPseudo_UPD, 3953 ARM::VST1q32LowQPseudo_UPD, 3954 ARM::VST1q64LowQPseudo_UPD }; 3955 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo, 3956 ARM::VST1q16HighQPseudo, 3957 ARM::VST1q32HighQPseudo, 3958 ARM::VST1q64HighQPseudo }; 3959 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 3960 return; 3961 } 3962 3963 case Intrinsic::arm_neon_vst2: { 3964 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, 3965 ARM::VST2d32, ARM::VST1q64 }; 3966 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, 3967 ARM::VST2q32Pseudo }; 3968 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 3969 return; 3970 } 3971 3972 case Intrinsic::arm_neon_vst3: { 3973 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo, 3974 ARM::VST3d16Pseudo, 3975 ARM::VST3d32Pseudo, 3976 ARM::VST1d64TPseudo }; 3977 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 3978 ARM::VST3q16Pseudo_UPD, 3979 ARM::VST3q32Pseudo_UPD }; 3980 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo, 3981 ARM::VST3q16oddPseudo, 3982 ARM::VST3q32oddPseudo }; 3983 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 3984 return; 3985 } 3986 3987 case Intrinsic::arm_neon_vst4: { 3988 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo, 3989 ARM::VST4d16Pseudo, 3990 ARM::VST4d32Pseudo, 3991 ARM::VST1d64QPseudo }; 3992 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 3993 ARM::VST4q16Pseudo_UPD, 3994 ARM::VST4q32Pseudo_UPD }; 3995 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo, 3996 ARM::VST4q16oddPseudo, 3997 ARM::VST4q32oddPseudo }; 3998 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 3999 return; 4000 } 4001 4002 case Intrinsic::arm_neon_vst2lane: { 4003 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo, 4004 ARM::VST2LNd16Pseudo, 4005 ARM::VST2LNd32Pseudo }; 4006 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo, 4007 ARM::VST2LNq32Pseudo }; 4008 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes); 4009 return; 4010 } 4011 4012 case Intrinsic::arm_neon_vst3lane: { 4013 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo, 4014 ARM::VST3LNd16Pseudo, 4015 ARM::VST3LNd32Pseudo }; 4016 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo, 4017 ARM::VST3LNq32Pseudo }; 4018 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes); 4019 return; 4020 } 4021 4022 case Intrinsic::arm_neon_vst4lane: { 4023 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo, 4024 ARM::VST4LNd16Pseudo, 4025 ARM::VST4LNd32Pseudo }; 4026 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo, 4027 ARM::VST4LNq32Pseudo }; 4028 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes); 4029 return; 4030 } 4031 } 4032 break; 4033 } 4034 4035 case ISD::ATOMIC_CMP_SWAP: 4036 SelectCMP_SWAP(N); 4037 return; 4038 } 4039 4040 SelectCode(N); 4041 } 4042 4043 // Inspect a register string of the form 4044 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or 4045 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string 4046 // and obtain the integer operands from them, adding these operands to the 4047 // provided vector. 4048 static void getIntOperandsFromRegisterString(StringRef RegString, 4049 SelectionDAG *CurDAG, 4050 const SDLoc &DL, 4051 std::vector<SDValue> &Ops) { 4052 SmallVector<StringRef, 5> Fields; 4053 RegString.split(Fields, ':'); 4054 4055 if (Fields.size() > 1) { 4056 bool AllIntFields = true; 4057 4058 for (StringRef Field : Fields) { 4059 // Need to trim out leading 'cp' characters and get the integer field. 4060 unsigned IntField; 4061 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField); 4062 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32)); 4063 } 4064 4065 assert(AllIntFields && 4066 "Unexpected non-integer value in special register string."); 4067 } 4068 } 4069 4070 // Maps a Banked Register string to its mask value. The mask value returned is 4071 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register 4072 // mask operand, which expresses which register is to be used, e.g. r8, and in 4073 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string 4074 // was invalid. 4075 static inline int getBankedRegisterMask(StringRef RegString) { 4076 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower()); 4077 if (!TheReg) 4078 return -1; 4079 return TheReg->Encoding; 4080 } 4081 4082 // The flags here are common to those allowed for apsr in the A class cores and 4083 // those allowed for the special registers in the M class cores. Returns a 4084 // value representing which flags were present, -1 if invalid. 4085 static inline int getMClassFlagsMask(StringRef Flags) { 4086 return StringSwitch<int>(Flags) 4087 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is 4088 // correct when flags are not permitted 4089 .Case("g", 0x1) 4090 .Case("nzcvq", 0x2) 4091 .Case("nzcvqg", 0x3) 4092 .Default(-1); 4093 } 4094 4095 // Maps MClass special registers string to its value for use in the 4096 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand. 4097 // Returns -1 to signify that the string was invalid. 4098 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) { 4099 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg); 4100 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits(); 4101 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits)) 4102 return -1; 4103 return (int)(TheReg->Encoding & 0xFFF); // SYSm value 4104 } 4105 4106 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { 4107 // The mask operand contains the special register (R Bit) in bit 4, whether 4108 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and 4109 // bits 3-0 contains the fields to be accessed in the special register, set by 4110 // the flags provided with the register. 4111 int Mask = 0; 4112 if (Reg == "apsr") { 4113 // The flags permitted for apsr are the same flags that are allowed in 4114 // M class registers. We get the flag value and then shift the flags into 4115 // the correct place to combine with the mask. 4116 Mask = getMClassFlagsMask(Flags); 4117 if (Mask == -1) 4118 return -1; 4119 return Mask << 2; 4120 } 4121 4122 if (Reg != "cpsr" && Reg != "spsr") { 4123 return -1; 4124 } 4125 4126 // This is the same as if the flags were "fc" 4127 if (Flags.empty() || Flags == "all") 4128 return Mask | 0x9; 4129 4130 // Inspect the supplied flags string and set the bits in the mask for 4131 // the relevant and valid flags allowed for cpsr and spsr. 4132 for (char Flag : Flags) { 4133 int FlagVal; 4134 switch (Flag) { 4135 case 'c': 4136 FlagVal = 0x1; 4137 break; 4138 case 'x': 4139 FlagVal = 0x2; 4140 break; 4141 case 's': 4142 FlagVal = 0x4; 4143 break; 4144 case 'f': 4145 FlagVal = 0x8; 4146 break; 4147 default: 4148 FlagVal = 0; 4149 } 4150 4151 // This avoids allowing strings where the same flag bit appears twice. 4152 if (!FlagVal || (Mask & FlagVal)) 4153 return -1; 4154 Mask |= FlagVal; 4155 } 4156 4157 // If the register is spsr then we need to set the R bit. 4158 if (Reg == "spsr") 4159 Mask |= 0x10; 4160 4161 return Mask; 4162 } 4163 4164 // Lower the read_register intrinsic to ARM specific DAG nodes 4165 // using the supplied metadata string to select the instruction node to use 4166 // and the registers/masks to construct as operands for the node. 4167 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){ 4168 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 4169 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 4170 bool IsThumb2 = Subtarget->isThumb2(); 4171 SDLoc DL(N); 4172 4173 std::vector<SDValue> Ops; 4174 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 4175 4176 if (!Ops.empty()) { 4177 // If the special register string was constructed of fields (as defined 4178 // in the ACLE) then need to lower to MRC node (32 bit) or 4179 // MRRC node(64 bit), we can make the distinction based on the number of 4180 // operands we have. 4181 unsigned Opcode; 4182 SmallVector<EVT, 3> ResTypes; 4183 if (Ops.size() == 5){ 4184 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC; 4185 ResTypes.append({ MVT::i32, MVT::Other }); 4186 } else { 4187 assert(Ops.size() == 3 && 4188 "Invalid number of fields in special register string."); 4189 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC; 4190 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other }); 4191 } 4192 4193 Ops.push_back(getAL(CurDAG, DL)); 4194 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4195 Ops.push_back(N->getOperand(0)); 4196 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops)); 4197 return true; 4198 } 4199 4200 std::string SpecialReg = RegString->getString().lower(); 4201 4202 int BankedReg = getBankedRegisterMask(SpecialReg); 4203 if (BankedReg != -1) { 4204 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), 4205 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4206 N->getOperand(0) }; 4207 ReplaceNode( 4208 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked, 4209 DL, MVT::i32, MVT::Other, Ops)); 4210 return true; 4211 } 4212 4213 // The VFP registers are read by creating SelectionDAG nodes with opcodes 4214 // corresponding to the register that is being read from. So we switch on the 4215 // string to find which opcode we need to use. 4216 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 4217 .Case("fpscr", ARM::VMRS) 4218 .Case("fpexc", ARM::VMRS_FPEXC) 4219 .Case("fpsid", ARM::VMRS_FPSID) 4220 .Case("mvfr0", ARM::VMRS_MVFR0) 4221 .Case("mvfr1", ARM::VMRS_MVFR1) 4222 .Case("mvfr2", ARM::VMRS_MVFR2) 4223 .Case("fpinst", ARM::VMRS_FPINST) 4224 .Case("fpinst2", ARM::VMRS_FPINST2) 4225 .Default(0); 4226 4227 // If an opcode was found then we can lower the read to a VFP instruction. 4228 if (Opcode) { 4229 if (!Subtarget->hasVFP2Base()) 4230 return false; 4231 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base()) 4232 return false; 4233 4234 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4235 N->getOperand(0) }; 4236 ReplaceNode(N, 4237 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops)); 4238 return true; 4239 } 4240 4241 // If the target is M Class then need to validate that the register string 4242 // is an acceptable value, so check that a mask can be constructed from the 4243 // string. 4244 if (Subtarget->isMClass()) { 4245 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 4246 if (SYSmValue == -1) 4247 return false; 4248 4249 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 4250 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4251 N->getOperand(0) }; 4252 ReplaceNode( 4253 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops)); 4254 return true; 4255 } 4256 4257 // Here we know the target is not M Class so we need to check if it is one 4258 // of the remaining possible values which are apsr, cpsr or spsr. 4259 if (SpecialReg == "apsr" || SpecialReg == "cpsr") { 4260 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4261 N->getOperand(0) }; 4262 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, 4263 DL, MVT::i32, MVT::Other, Ops)); 4264 return true; 4265 } 4266 4267 if (SpecialReg == "spsr") { 4268 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4269 N->getOperand(0) }; 4270 ReplaceNode( 4271 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL, 4272 MVT::i32, MVT::Other, Ops)); 4273 return true; 4274 } 4275 4276 return false; 4277 } 4278 4279 // Lower the write_register intrinsic to ARM specific DAG nodes 4280 // using the supplied metadata string to select the instruction node to use 4281 // and the registers/masks to use in the nodes 4282 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){ 4283 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 4284 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 4285 bool IsThumb2 = Subtarget->isThumb2(); 4286 SDLoc DL(N); 4287 4288 std::vector<SDValue> Ops; 4289 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 4290 4291 if (!Ops.empty()) { 4292 // If the special register string was constructed of fields (as defined 4293 // in the ACLE) then need to lower to MCR node (32 bit) or 4294 // MCRR node(64 bit), we can make the distinction based on the number of 4295 // operands we have. 4296 unsigned Opcode; 4297 if (Ops.size() == 5) { 4298 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR; 4299 Ops.insert(Ops.begin()+2, N->getOperand(2)); 4300 } else { 4301 assert(Ops.size() == 3 && 4302 "Invalid number of fields in special register string."); 4303 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR; 4304 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) }; 4305 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2); 4306 } 4307 4308 Ops.push_back(getAL(CurDAG, DL)); 4309 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4310 Ops.push_back(N->getOperand(0)); 4311 4312 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 4313 return true; 4314 } 4315 4316 std::string SpecialReg = RegString->getString().lower(); 4317 int BankedReg = getBankedRegisterMask(SpecialReg); 4318 if (BankedReg != -1) { 4319 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2), 4320 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4321 N->getOperand(0) }; 4322 ReplaceNode( 4323 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked, 4324 DL, MVT::Other, Ops)); 4325 return true; 4326 } 4327 4328 // The VFP registers are written to by creating SelectionDAG nodes with 4329 // opcodes corresponding to the register that is being written. So we switch 4330 // on the string to find which opcode we need to use. 4331 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 4332 .Case("fpscr", ARM::VMSR) 4333 .Case("fpexc", ARM::VMSR_FPEXC) 4334 .Case("fpsid", ARM::VMSR_FPSID) 4335 .Case("fpinst", ARM::VMSR_FPINST) 4336 .Case("fpinst2", ARM::VMSR_FPINST2) 4337 .Default(0); 4338 4339 if (Opcode) { 4340 if (!Subtarget->hasVFP2Base()) 4341 return false; 4342 Ops = { N->getOperand(2), getAL(CurDAG, DL), 4343 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 4344 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 4345 return true; 4346 } 4347 4348 std::pair<StringRef, StringRef> Fields; 4349 Fields = StringRef(SpecialReg).rsplit('_'); 4350 std::string Reg = Fields.first.str(); 4351 StringRef Flags = Fields.second; 4352 4353 // If the target was M Class then need to validate the special register value 4354 // and retrieve the mask for use in the instruction node. 4355 if (Subtarget->isMClass()) { 4356 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 4357 if (SYSmValue == -1) 4358 return false; 4359 4360 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 4361 N->getOperand(2), getAL(CurDAG, DL), 4362 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 4363 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops)); 4364 return true; 4365 } 4366 4367 // We then check to see if a valid mask can be constructed for one of the 4368 // register string values permitted for the A and R class cores. These values 4369 // are apsr, spsr and cpsr; these are also valid on older cores. 4370 int Mask = getARClassRegisterMask(Reg, Flags); 4371 if (Mask != -1) { 4372 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2), 4373 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4374 N->getOperand(0) }; 4375 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR, 4376 DL, MVT::Other, Ops)); 4377 return true; 4378 } 4379 4380 return false; 4381 } 4382 4383 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){ 4384 std::vector<SDValue> AsmNodeOperands; 4385 unsigned Flag, Kind; 4386 bool Changed = false; 4387 unsigned NumOps = N->getNumOperands(); 4388 4389 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. 4390 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require 4391 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs 4392 // respectively. Since there is no constraint to explicitly specify a 4393 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb, 4394 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack 4395 // them into a GPRPair. 4396 4397 SDLoc dl(N); 4398 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) 4399 : SDValue(nullptr,0); 4400 4401 SmallVector<bool, 8> OpChanged; 4402 // Glue node will be appended late. 4403 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { 4404 SDValue op = N->getOperand(i); 4405 AsmNodeOperands.push_back(op); 4406 4407 if (i < InlineAsm::Op_FirstOperand) 4408 continue; 4409 4410 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) { 4411 Flag = C->getZExtValue(); 4412 Kind = InlineAsm::getKind(Flag); 4413 } 4414 else 4415 continue; 4416 4417 // Immediate operands to inline asm in the SelectionDAG are modeled with 4418 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and 4419 // the second is a constant with the value of the immediate. If we get here 4420 // and we have a Kind_Imm, skip the next operand, and continue. 4421 if (Kind == InlineAsm::Kind_Imm) { 4422 SDValue op = N->getOperand(++i); 4423 AsmNodeOperands.push_back(op); 4424 continue; 4425 } 4426 4427 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag); 4428 if (NumRegs) 4429 OpChanged.push_back(false); 4430 4431 unsigned DefIdx = 0; 4432 bool IsTiedToChangedOp = false; 4433 // If it's a use that is tied with a previous def, it has no 4434 // reg class constraint. 4435 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx)) 4436 IsTiedToChangedOp = OpChanged[DefIdx]; 4437 4438 // Memory operands to inline asm in the SelectionDAG are modeled with two 4439 // operands: a constant of value InlineAsm::Kind_Mem followed by the input 4440 // operand. If we get here and we have a Kind_Mem, skip the next operand (so 4441 // it doesn't get misinterpreted), and continue. We do this here because 4442 // it's important to update the OpChanged array correctly before moving on. 4443 if (Kind == InlineAsm::Kind_Mem) { 4444 SDValue op = N->getOperand(++i); 4445 AsmNodeOperands.push_back(op); 4446 continue; 4447 } 4448 4449 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef 4450 && Kind != InlineAsm::Kind_RegDefEarlyClobber) 4451 continue; 4452 4453 unsigned RC; 4454 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); 4455 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID)) 4456 || NumRegs != 2) 4457 continue; 4458 4459 assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); 4460 SDValue V0 = N->getOperand(i+1); 4461 SDValue V1 = N->getOperand(i+2); 4462 unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg(); 4463 unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg(); 4464 SDValue PairedReg; 4465 MachineRegisterInfo &MRI = MF->getRegInfo(); 4466 4467 if (Kind == InlineAsm::Kind_RegDef || 4468 Kind == InlineAsm::Kind_RegDefEarlyClobber) { 4469 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to 4470 // the original GPRs. 4471 4472 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 4473 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 4474 SDValue Chain = SDValue(N,0); 4475 4476 SDNode *GU = N->getGluedUser(); 4477 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, 4478 Chain.getValue(1)); 4479 4480 // Extract values from a GPRPair reg and copy to the original GPR reg. 4481 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 4482 RegCopy); 4483 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 4484 RegCopy); 4485 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, 4486 RegCopy.getValue(1)); 4487 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); 4488 4489 // Update the original glue user. 4490 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); 4491 Ops.push_back(T1.getValue(1)); 4492 CurDAG->UpdateNodeOperands(GU, Ops); 4493 } 4494 else { 4495 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a 4496 // GPRPair and then pass the GPRPair to the inline asm. 4497 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; 4498 4499 // As REG_SEQ doesn't take RegisterSDNode, we copy them first. 4500 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, 4501 Chain.getValue(1)); 4502 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, 4503 T0.getValue(1)); 4504 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); 4505 4506 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two 4507 // i32 VRs of inline asm with it. 4508 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 4509 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 4510 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); 4511 4512 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; 4513 Glue = Chain.getValue(1); 4514 } 4515 4516 Changed = true; 4517 4518 if(PairedReg.getNode()) { 4519 OpChanged[OpChanged.size() -1 ] = true; 4520 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); 4521 if (IsTiedToChangedOp) 4522 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx); 4523 else 4524 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); 4525 // Replace the current flag. 4526 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( 4527 Flag, dl, MVT::i32); 4528 // Add the new register node and skip the original two GPRs. 4529 AsmNodeOperands.push_back(PairedReg); 4530 // Skip the next two GPRs. 4531 i += 2; 4532 } 4533 } 4534 4535 if (Glue.getNode()) 4536 AsmNodeOperands.push_back(Glue); 4537 if (!Changed) 4538 return false; 4539 4540 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N), 4541 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); 4542 New->setNodeId(-1); 4543 ReplaceNode(N, New.getNode()); 4544 return true; 4545 } 4546 4547 4548 bool ARMDAGToDAGISel:: 4549 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 4550 std::vector<SDValue> &OutOps) { 4551 switch(ConstraintID) { 4552 default: 4553 llvm_unreachable("Unexpected asm memory constraint"); 4554 case InlineAsm::Constraint_i: 4555 // FIXME: It seems strange that 'i' is needed here since it's supposed to 4556 // be an immediate and not a memory constraint. 4557 LLVM_FALLTHROUGH; 4558 case InlineAsm::Constraint_m: 4559 case InlineAsm::Constraint_o: 4560 case InlineAsm::Constraint_Q: 4561 case InlineAsm::Constraint_Um: 4562 case InlineAsm::Constraint_Un: 4563 case InlineAsm::Constraint_Uq: 4564 case InlineAsm::Constraint_Us: 4565 case InlineAsm::Constraint_Ut: 4566 case InlineAsm::Constraint_Uv: 4567 case InlineAsm::Constraint_Uy: 4568 // Require the address to be in a register. That is safe for all ARM 4569 // variants and it is hard to do anything much smarter without knowing 4570 // how the operand is used. 4571 OutOps.push_back(Op); 4572 return false; 4573 } 4574 return true; 4575 } 4576 4577 /// createARMISelDag - This pass converts a legalized DAG into a 4578 /// ARM-specific DAG, ready for instruction scheduling. 4579 /// 4580 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, 4581 CodeGenOpt::Level OptLevel) { 4582 return new ARMDAGToDAGISel(TM, OptLevel); 4583 } 4584